From 88a309465b3f05a100c3b81966982c0f9f5d23a6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 20 Jan 2022 05:20:06 -0800 Subject: [PATCH 0001/4122] lib: zstd: clean up double word in comment. Remove the second 'a' and 'into'. Signed-off-by: Tom Rix Signed-off-by: Nick Terrell --- include/linux/zstd_lib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index b8c7dbf98390..6b91758b61af 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -1330,7 +1330,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals - * by merging them into into the literals of the next sequence. + * by merging them into the literals of the next sequence. * * As such, the final generated result has no explicit representation of block boundaries, * and the final last literals segment is not represented in the sequences. @@ -1377,7 +1377,7 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. * - * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. From c73275cf6834787ca090317f1d20dbfa3b7f05aa Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 23 Aug 2022 09:15:03 +0800 Subject: [PATCH 0002/4122] apparmor: fix a memleak in multi_transaction_new() In multi_transaction_new(), the variable t is not freed or passed out on the failure of copy_from_user(t->data, buf, size), which could lead to a memleak. Fix this bug by adding a put_multi_transaction(t) in the error path. Fixes: 1dea3b41e84c5 ("apparmor: speed up transactional queries") Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index d066ccc219e2..7160e7aa58b9 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -868,8 +868,10 @@ static struct multi_transaction *multi_transaction_new(struct file *file, if (!t) return ERR_PTR(-ENOMEM); kref_init(&t->count); - if (copy_from_user(t->data, buf, size)) + if (copy_from_user(t->data, buf, size)) { + put_multi_transaction(t); return ERR_PTR(-EFAULT); + } return t; } From 9c4557efc558a68e4cd973490fd936d6e3414db8 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 6 Sep 2022 03:39:55 -0700 Subject: [PATCH 0003/4122] apparmor: fix lockdep warning when removing a namespace Fix the following lockdep warning [ 1119.158984] ============================================ [ 1119.158988] WARNING: possible recursive locking detected [ 1119.158996] 6.0.0-rc1+ #257 Tainted: G E N [ 1119.158999] -------------------------------------------- [ 1119.159001] bash/80100 is trying to acquire lock: [ 1119.159007] ffff88803e79b4a0 (&ns->lock/1){+.+.}-{4:4}, at: destroy_ns.part.0+0x43/0x140 [ 1119.159028] but task is already holding lock: [ 1119.159030] ffff8881009764a0 (&ns->lock/1){+.+.}-{4:4}, at: aa_remove_profiles+0x3f0/0x640 [ 1119.159040] other info that might help us debug this: [ 1119.159042] Possible unsafe locking scenario: [ 1119.159043] CPU0 [ 1119.159045] ---- [ 1119.159047] lock(&ns->lock/1); [ 1119.159051] lock(&ns->lock/1); [ 1119.159055] *** DEADLOCK *** Which is caused by an incorrect lockdep nesting notation Fixes: feb3c766a3ab ("apparmor: fix possible recursive lock warning in __aa_create_ns") Signed-off-by: John Johansen --- security/apparmor/policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 499c0209b6a4..fbdfcef91c61 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -1170,7 +1170,7 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj, if (!name) { /* remove namespace - can only happen if fqname[0] == ':' */ - mutex_lock_nested(&ns->parent->lock, ns->level); + mutex_lock_nested(&ns->parent->lock, ns->parent->level); __aa_bump_ns_revision(ns); __aa_remove_ns(ns); mutex_unlock(&ns->parent->lock); From f47acc4b7c43d566bf42816335830c4c17f9c200 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 6 Sep 2022 14:03:44 -0700 Subject: [PATCH 0004/4122] apparmor: reserve mediation classes Reserve mediation classes that exist in out of tree development branches or are used by userspace mediation helpers. Signed-off-by: John Johansen --- security/apparmor/include/apparmor.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 9c3fc36a0702..dd2c131ed170 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -28,8 +28,15 @@ #define AA_CLASS_SIGNAL 10 #define AA_CLASS_NET 14 #define AA_CLASS_LABEL 16 +#define AA_CLASS_POSIX_MQUEUE 17 +#define AA_CLASS_IO_URING 18 +#define AA_CLASS_MODULE 19 +#define AA_CLASS_DISPLAY_LSM 20 -#define AA_CLASS_LAST AA_CLASS_LABEL +#define AA_CLASS_X 31 +#define AA_CLASS_DBUS 32 + +#define AA_CLASS_LAST AA_CLASS_DBUS /* Control parameters settable through module/boot flags */ extern enum audit_mode aa_g_audit; From f4d6b94b40c966ddd9eeb0d451e8a02c595ec7e3 Mon Sep 17 00:00:00 2001 From: Jon Tourville Date: Mon, 11 Jul 2022 11:36:08 -0500 Subject: [PATCH 0005/4122] apparmor: use zstd compression for profile data Change the algorithm used by apparmor to compress profile data from zlib to zstd, using the new zstd API introduced in 5.16. Zstd provides a larger range of compression levels than zlib and significantly better performance at the default level (for a relatively small increase in compressed size). The apparmor module parameter raw_data_compression_level is now clamped to the minimum and maximum compression levels reported by the zstd library. A compression level of 0 retains the previous behavior of disabling policy compression instead of using zstd's behavior, which is to use the default compression level. Signed-off-by: Jon Tourville Signed-off-by: John Johansen --- security/apparmor/Kconfig | 4 +- security/apparmor/apparmorfs.c | 58 +++++++--------- security/apparmor/lsm.c | 10 +-- security/apparmor/policy_unpack.c | 111 ++++++++++++++---------------- 4 files changed, 81 insertions(+), 102 deletions(-) diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index cb3496e00d8a..acac3bb3eef2 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -85,8 +85,8 @@ config SECURITY_APPARMOR_HASH_DEFAULT config SECURITY_APPARMOR_EXPORT_BINARY bool "Allow exporting the raw binary policy" depends on SECURITY_APPARMOR_INTROSPECT_POLICY - select ZLIB_INFLATE - select ZLIB_DEFLATE + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS default y help This option allows reading back binary policy as it was loaded. diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 7160e7aa58b9..d98bbf267fc7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -1297,42 +1297,30 @@ SEQ_RAWDATA_FOPS(revision); SEQ_RAWDATA_FOPS(hash); SEQ_RAWDATA_FOPS(compressed_size); -static int deflate_decompress(char *src, size_t slen, char *dst, size_t dlen) +static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen) { #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY - if (aa_g_rawdata_compression_level != 0) { - int error = 0; - struct z_stream_s strm; + if (aa_g_rawdata_compression_level == 0) { + const size_t wksp_len = zstd_dctx_workspace_bound(); + zstd_dctx *ctx; + void *wksp; + size_t out_len; + int ret = 0; - memset(&strm, 0, sizeof(strm)); - - strm.workspace = kvzalloc(zlib_inflate_workspacesize(), GFP_KERNEL); - if (!strm.workspace) - return -ENOMEM; - - strm.next_in = src; - strm.avail_in = slen; - - error = zlib_inflateInit(&strm); - if (error != Z_OK) { - error = -ENOMEM; - goto fail_inflate_init; + wksp = kvzalloc(wksp_len, GFP_KERNEL); + if (!wksp) { + ret = -ENOMEM; + goto cleanup; } - strm.next_out = dst; - strm.avail_out = dlen; - - error = zlib_inflate(&strm, Z_FINISH); - if (error != Z_STREAM_END) - error = -EINVAL; - else - error = 0; - - zlib_inflateEnd(&strm); -fail_inflate_init: - kvfree(strm.workspace); - - return error; + out_len = zstd_decompress_dctx(ctx, dst, dlen, src, slen); + if (zstd_is_error(out_len)) { + ret = -EINVAL; + goto cleanup; + } +cleanup: + kvfree(wksp); + return ret; } #endif @@ -1381,9 +1369,9 @@ static int rawdata_open(struct inode *inode, struct file *file) private->loaddata = loaddata; - error = deflate_decompress(loaddata->data, loaddata->compressed_size, - RAWDATA_F_DATA_BUF(private), - loaddata->size); + error = decompress_zstd(loaddata->data, loaddata->compressed_size, + RAWDATA_F_DATA_BUF(private), + loaddata->size); if (error) goto fail_decompress; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e29cade7b662..ec873ff0a4bb 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -1361,7 +1361,7 @@ module_param_named(export_binary, aa_g_export_binary, aabool, 0600); #endif /* policy loaddata compression level */ -int aa_g_rawdata_compression_level = Z_DEFAULT_COMPRESSION; +int aa_g_rawdata_compression_level = ZSTD_CLEVEL_DEFAULT; module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level, aacompressionlevel, 0400); @@ -1543,9 +1543,9 @@ static int param_set_aacompressionlevel(const char *val, error = param_set_int(val, kp); aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level, - Z_NO_COMPRESSION, - Z_BEST_COMPRESSION); - pr_info("AppArmor: policy rawdata compression level set to %u\n", + zstd_min_clevel(), + zstd_max_clevel()); + pr_info("AppArmor: policy rawdata compression level set to %d\n", aa_g_rawdata_compression_level); return error; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 55d31bac4f35..10e462d00321 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "include/apparmor.h" #include "include/audit.h" @@ -1059,81 +1059,73 @@ struct aa_load_ent *aa_load_ent_alloc(void) return ent; } -static int deflate_compress(const char *src, size_t slen, char **dst, - size_t *dlen) +static int compress_zstd(const char *src, size_t slen, char **dst, size_t *dlen) { #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY - int error; - struct z_stream_s strm; - void *stgbuf, *dstbuf; - size_t stglen = deflateBound(slen); + const zstd_parameters params = + zstd_get_params(aa_g_rawdata_compression_level, slen); + const size_t wksp_len = zstd_cctx_workspace_bound(¶ms.cParams); + void *wksp = NULL; + zstd_cctx *ctx = NULL; + size_t out_len = zstd_compress_bound(slen); + void *out = NULL; + int ret = 0; - memset(&strm, 0, sizeof(strm)); - - if (stglen < slen) - return -EFBIG; - - strm.workspace = kvzalloc(zlib_deflate_workspacesize(MAX_WBITS, - MAX_MEM_LEVEL), - GFP_KERNEL); - if (!strm.workspace) - return -ENOMEM; - - error = zlib_deflateInit(&strm, aa_g_rawdata_compression_level); - if (error != Z_OK) { - error = -ENOMEM; - goto fail_deflate_init; + out = kvzalloc(out_len, GFP_KERNEL); + if (!out) { + ret = -ENOMEM; + goto cleanup; } - stgbuf = kvzalloc(stglen, GFP_KERNEL); - if (!stgbuf) { - error = -ENOMEM; - goto fail_stg_alloc; + wksp = kvzalloc(wksp_len, GFP_KERNEL); + if (!wksp) { + ret = -ENOMEM; + goto cleanup; } - strm.next_in = src; - strm.avail_in = slen; - strm.next_out = stgbuf; - strm.avail_out = stglen; - - error = zlib_deflate(&strm, Z_FINISH); - if (error != Z_STREAM_END) { - error = -EINVAL; - goto fail_deflate; + ctx = zstd_init_cctx(wksp, wksp_len); + if (!ctx) { + ret = -EINVAL; + goto cleanup; } - error = 0; - if (is_vmalloc_addr(stgbuf)) { - dstbuf = kvzalloc(strm.total_out, GFP_KERNEL); - if (dstbuf) { - memcpy(dstbuf, stgbuf, strm.total_out); - kvfree(stgbuf); + out_len = zstd_compress_cctx(ctx, out, out_len, src, slen, ¶ms); + if (zstd_is_error(out_len)) { + ret = -EINVAL; + goto cleanup; + } + + if (is_vmalloc_addr(out)) { + *dst = kvzalloc(out_len, GFP_KERNEL); + if (*dst) { + memcpy(*dst, out, out_len); + kvfree(out); + out = NULL; } - } else + } else { /* * If the staging buffer was kmalloc'd, then using krealloc is * probably going to be faster. The destination buffer will * always be smaller, so it's just shrunk, avoiding a memcpy */ - dstbuf = krealloc(stgbuf, strm.total_out, GFP_KERNEL); - - if (!dstbuf) { - error = -ENOMEM; - goto fail_deflate; + *dst = krealloc(out, out_len, GFP_KERNEL); } - *dst = dstbuf; - *dlen = strm.total_out; + if (!*dst) { + ret = -ENOMEM; + goto cleanup; + } -fail_stg_alloc: - zlib_deflateEnd(&strm); -fail_deflate_init: - kvfree(strm.workspace); - return error; + *dlen = out_len; -fail_deflate: - kvfree(stgbuf); - goto fail_stg_alloc; +cleanup: + if (ret) { + kvfree(out); + *dst = NULL; + } + + kvfree(wksp); + return ret; #else *dlen = slen; return 0; @@ -1142,7 +1134,6 @@ fail_deflate: static int compress_loaddata(struct aa_loaddata *data) { - AA_BUG(data->compressed_size > 0); /* @@ -1151,8 +1142,8 @@ static int compress_loaddata(struct aa_loaddata *data) */ if (aa_g_rawdata_compression_level != 0) { void *udata = data->data; - int error = deflate_compress(udata, data->size, &data->data, - &data->compressed_size); + int error = compress_zstd(udata, data->size, &data->data, + &data->compressed_size); if (error) return error; From 2218d08123362c63bab257caf5ec3bc1a6e87ae9 Mon Sep 17 00:00:00 2001 From: Jon Tourville Date: Mon, 11 Jul 2022 11:36:09 -0500 Subject: [PATCH 0006/4122] apparmor: expose compression level limits in sysfs Create two new files in apparmor's sysfs: /sys/kernel/security/apparmor/raw_data_compression_level_min /sys/kernel/security/apparmor/raw_data_compression_level_max These correspond to the minimum and maximum zstd compression levels that can be assigned to the apparmor module parameter raw_data_compression_level. Signed-off-by: Jon Tourville Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index d98bbf267fc7..044affb1ce83 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1199,10 +1199,24 @@ static int seq_ns_name_show(struct seq_file *seq, void *v) return 0; } +static int seq_ns_compress_min_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", zstd_min_clevel()); + return 0; +} + +static int seq_ns_compress_max_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", zstd_max_clevel()); + return 0; +} + SEQ_NS_FOPS(stacked); SEQ_NS_FOPS(nsstacked); SEQ_NS_FOPS(level); SEQ_NS_FOPS(name); +SEQ_NS_FOPS(compress_min); +SEQ_NS_FOPS(compress_max); /* policy/raw_data/ * file ops */ @@ -2382,6 +2396,8 @@ static struct aa_sfs_entry aa_sfs_entry_apparmor[] = { AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops), AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops), AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops), + AA_SFS_FILE_FOPS("raw_data_compression_level_min", 0444, &seq_ns_compress_min_fops), + AA_SFS_FILE_FOPS("raw_data_compression_level_max", 0444, &seq_ns_compress_max_fops), AA_SFS_DIR("features", aa_sfs_entry_features), { } }; From 408d53e923bd852d5d80243a642004163db53a87 Mon Sep 17 00:00:00 2001 From: Mike Salvatore Date: Mon, 30 Mar 2020 16:43:29 -0400 Subject: [PATCH 0007/4122] apparmor: compute file permissions on profile load Rather than computing file permissions for each file access, file permissions can be computed once on profile load and stored for lookup. Signed-off-by: Mike Salvatore Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/domain.c | 10 +-- security/apparmor/file.c | 132 ++++++++++++++++++++---------- security/apparmor/include/file.h | 15 +++- security/apparmor/policy_unpack.c | 3 + 5 files changed, 112 insertions(+), 50 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 044affb1ce83..825b3093dcdd 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -624,7 +624,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, if (state) { struct path_cond cond = { }; - tmp = aa_compute_fperms(dfa, state, &cond); + tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); } } else if (profile->policy.dfa) { if (!PROFILE_MEDIATES(profile, *match_str)) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 91689d34d281..2c99edd8953a 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -162,7 +162,7 @@ next: if (!state) goto fail; } - *perms = aa_compute_fperms(profile->file.dfa, state, &cond); + *perms = *(aa_lookup_fperms(&(profile->file), state, &cond)); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -215,7 +215,7 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = aa_compute_fperms(profile->file.dfa, state, &cond); + tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -224,7 +224,7 @@ next: state = match_component(profile, tp, stack, start); if (!state) goto fail; - tmp = aa_compute_fperms(profile->file.dfa, state, &cond); + tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } @@ -661,7 +661,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile, } /* find exec permissions for name */ - state = aa_str_perms(profile->file.dfa, state, name, cond, &perms); + state = aa_str_perms(&(profile->file), state, name, cond, &perms); if (perms.allow & MAY_EXEC) { /* exec permission determine how to transition */ new = x_to_label(profile, bprm, name, perms.xindex, &target, @@ -756,7 +756,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, } /* find exec permissions for name */ - state = aa_str_perms(profile->file.dfa, state, xname, cond, &perms); + state = aa_str_perms(&(profile->file), state, xname, cond, &perms); if (!(perms.allow & AA_MAY_ONEXEC)) { info = "no change_onexec valid for executable"; goto audit; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index e1b7e93602e4..710b7d7517eb 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -201,49 +201,99 @@ static u32 map_old_perms(u32 old) return new; } -/** - * aa_compute_fperms - convert dfa compressed perms to internal perms - * @dfa: dfa to compute perms for (NOT NULL) - * @state: state in dfa - * @cond: conditions to consider (NOT NULL) - * - * TODO: convert from dfa + state to permission entry, do computation conversion - * at load time. - * - * Returns: computed permission set - */ -struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state, - struct path_cond *cond) +static void __aa_compute_fperms_allow(struct aa_perms *perms, + struct aa_dfa *dfa, + unsigned int state) { - /* FIXME: change over to new dfa format - * currently file perms are encoded in the dfa, new format - * splits the permissions from the dfa. This mapping can be - * done at profile load - */ - struct aa_perms perms = { }; - - if (uid_eq(current_fsuid(), cond->uid)) { - perms.allow = map_old_perms(dfa_user_allow(dfa, state)); - perms.audit = map_old_perms(dfa_user_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); - perms.xindex = dfa_user_xindex(dfa, state); - } else { - perms.allow = map_old_perms(dfa_other_allow(dfa, state)); - perms.audit = map_old_perms(dfa_other_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); - perms.xindex = dfa_other_xindex(dfa, state); - } - perms.allow |= AA_MAY_GETATTR; + perms->allow |= AA_MAY_GETATTR; /* change_profile wasn't determined by ownership in old mapping */ if (ACCEPT_TABLE(dfa)[state] & 0x80000000) - perms.allow |= AA_MAY_CHANGE_PROFILE; + perms->allow |= AA_MAY_CHANGE_PROFILE; if (ACCEPT_TABLE(dfa)[state] & 0x40000000) - perms.allow |= AA_MAY_ONEXEC; + perms->allow |= AA_MAY_ONEXEC; +} + +static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_user_allow(dfa, state)); + perms.audit = map_old_perms(dfa_user_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); + perms.xindex = dfa_user_xindex(dfa, state); + + __aa_compute_fperms_allow(&perms, dfa, state); return perms; } +static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_other_allow(dfa, state)); + perms.audit = map_old_perms(dfa_other_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); + perms.xindex = dfa_other_xindex(dfa, state); + + __aa_compute_fperms_allow(&perms, dfa, state); + + return perms; +} + +/** + * aa_compute_fperms - convert dfa compressed perms to internal perms and store + * them so they can be retrieved later. + * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which + * permissions will be computed (NOT NULL) + * + * TODO: convert from dfa + state to permission entry + */ +void aa_compute_fperms(struct aa_file_rules *file_rules) +{ + int state; + int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen; + + // DFAs are restricted from having a state_count of less than 2 + file_rules->fperms_table = kvzalloc( + state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); + + // Since fperms_table is initialized with zeroes via kvzalloc(), we can + // skip the trap state (state == 0) + for (state = 1; state < state_count; state++) { + file_rules->fperms_table[state * 2] = + __aa_compute_fperms_user(file_rules->dfa, state); + file_rules->fperms_table[state * 2 + 1] = + __aa_compute_fperms_other(file_rules->dfa, state); + } +} + +/** + * aa_lookup_fperms - convert dfa compressed perms to internal perms + * @dfa: dfa to lookup perms for (NOT NULL) + * @state: state in dfa + * @cond: conditions to consider (NOT NULL) + * + * TODO: convert from dfa + state to permission entry + * + * Returns: a pointer to a file permission set + */ +struct aa_perms default_perms = {}; +struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, + unsigned int state, struct path_cond *cond) +{ + if (!(file_rules->fperms_table)) + return &default_perms; + + if (uid_eq(current_fsuid(), cond->uid)) + return &(file_rules->fperms_table[state * 2]); + + return &(file_rules->fperms_table[state * 2 + 1]); +} + /** * aa_str_perms - find permission that match @name * @dfa: to match against (MAYBE NULL) @@ -254,13 +304,13 @@ struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state, * * Returns: the final state in @dfa when beginning @start and walking @name */ -unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start, +unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start, const char *name, struct path_cond *cond, struct aa_perms *perms) { unsigned int state; - state = aa_dfa_match(dfa, start, name); - *perms = aa_compute_fperms(dfa, state, cond); + state = aa_dfa_match(file_rules->dfa, start, name); + *perms = *(aa_lookup_fperms(file_rules, state, cond)); return state; } @@ -273,7 +323,7 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, if (profile_unconfined(profile)) return 0; - aa_str_perms(profile->file.dfa, profile->file.start, name, cond, perms); + aa_str_perms(&(profile->file), profile->file.start, name, cond, perms); if (request & ~perms->allow) e = -EACCES; return aa_audit_file(profile, perms, op, request, name, NULL, NULL, @@ -380,7 +430,7 @@ static int profile_path_link(struct aa_profile *profile, error = -EACCES; /* aa_str_perms - handles the case of the dfa being NULL */ - state = aa_str_perms(profile->file.dfa, profile->file.start, lname, + state = aa_str_perms(&(profile->file), profile->file.start, lname, cond, &lperms); if (!(lperms.allow & AA_MAY_LINK)) @@ -388,7 +438,7 @@ static int profile_path_link(struct aa_profile *profile, /* test to see if target can be paired with link */ state = aa_dfa_null_transition(profile->file.dfa, state); - aa_str_perms(profile->file.dfa, state, tname, cond, &perms); + aa_str_perms(&(profile->file), state, tname, cond, &perms); /* force audit/quiet masks for link are stored in the second entry * in the link pair. @@ -410,7 +460,7 @@ static int profile_path_link(struct aa_profile *profile, /* Do link perm subset test requiring allowed permission on link are * a subset of the allowed permissions on target. */ - aa_str_perms(profile->file.dfa, profile->file.start, tname, cond, + aa_str_perms(&(profile->file), profile->file.start, tname, cond, &perms); /* AA_MAY_LINK is not considered in the subset test */ diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 029cb20e322d..ab201d625a34 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -181,11 +181,13 @@ struct aa_file_rules { /* struct perms perms; */ struct aa_domain trans; /* TODO: add delegate table */ + struct aa_perms *fperms_table; }; -struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state, - struct path_cond *cond); -unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start, +void aa_compute_fperms(struct aa_file_rules *file_rules); +struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, + unsigned int state, struct path_cond *cond); +unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start, const char *name, struct path_cond *cond, struct aa_perms *perms); @@ -204,10 +206,17 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, void aa_inherit_files(const struct cred *cred, struct files_struct *files); +static inline void aa_free_fperms_table(struct aa_perms *fperms_table) +{ + if (fperms_table) + kvfree(fperms_table); +} + static inline void aa_free_file_rules(struct aa_file_rules *rules) { aa_put_dfa(rules->dfa); aa_free_domain_entries(&rules->trans); + aa_free_fperms_table(rules->fperms_table); } /** diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 10e462d00321..54175bca4256 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -22,6 +22,7 @@ #include "include/audit.h" #include "include/cred.h" #include "include/crypto.h" +#include "include/file.h" #include "include/match.h" #include "include/path.h" #include "include/policy.h" @@ -878,6 +879,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else profile->file.dfa = aa_get_dfa(nulldfa); + aa_compute_fperms(&(profile->file)); + if (!unpack_trans_table(e, profile)) { info = "failed to unpack profile transition table"; goto fail; From b5b57993504f91785fa70e002e5e494fb549726e Mon Sep 17 00:00:00 2001 From: Mike Salvatore Date: Sun, 31 May 2020 10:52:06 -0400 Subject: [PATCH 0008/4122] apparmor: compute xmatch permissions on profile load Rather than computing xmatch permissions each time access is requested, these permissions can be computed once on profile load and stored for lookup. Signed-off-by: Mike Salvatore Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 ++-- security/apparmor/include/policy.h | 2 ++ security/apparmor/policy.c | 1 + security/apparmor/policy_unpack.c | 22 +++++++++++++++++++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 2c99edd8953a..22351b6d71e6 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -339,7 +339,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, /* Check xattr value */ state = aa_dfa_match_len(profile->xmatch, state, value, size); - perm = dfa_user_allow(profile->xmatch, state); + perm = profile->xmatch_perms[state]; if (!(perm & MAY_EXEC)) { ret = -EINVAL; goto out; @@ -419,7 +419,7 @@ restart: state = aa_dfa_leftmatch(profile->xmatch, DFA_START, name, &count); - perm = dfa_user_allow(profile->xmatch, state); + perm = profile->xmatch_perms[state]; /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { int ret = 0; diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 639b5b248e63..128c6a9430d4 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -104,6 +104,7 @@ struct aa_data { * @attach: human readable attachment string * @xmatch: optional extended matching for unconfined executables names * @xmatch_len: xmatch prefix len, used to determine xmatch priority + * @xmatch_perms: precomputed permissions for the xmatch DFA indexed by state * @audit: the auditing mode of the profile * @mode: the enforcement mode of the profile * @path_flags: flags controlling path generation behavior @@ -140,6 +141,7 @@ struct aa_profile { const char *attach; struct aa_dfa *xmatch; unsigned int xmatch_len; + u32 *xmatch_perms; enum audit_mode audit; long mode; u32 path_flags; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index fbdfcef91c61..e2d23cd85cd2 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -231,6 +231,7 @@ void aa_free_profile(struct aa_profile *profile) kfree_sensitive(profile->secmark); kfree_sensitive(profile->dirname); aa_put_dfa(profile->xmatch); + kvfree(profile->xmatch_perms); aa_put_dfa(profile->policy.dfa); if (profile->data) { diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 54175bca4256..70b7a35b5b96 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -669,6 +669,23 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) return strcmp(data->key, *key); } +static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch) +{ + u32 *perms_table; + int state; + int state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; + + // DFAs are restricted from having a state_count of less than 2 + perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL); + + // Since perms_table is initialized with zeroes via kvcalloc(), we can + // skip the trap state (state == 0) + for (state = 1; state < state_count; state++) + perms_table[state] = dfa_user_allow(xmatch, state); + + return perms_table; +} + /** * unpack_profile - unpack a serialized profile * @e: serialized data extent information (NOT NULL) @@ -727,13 +744,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "bad xmatch"; goto fail; } - /* xmatch_len is not optional if xmatch is set */ + /* neither xmatch_len not xmatch_perms are optional if xmatch is set */ if (profile->xmatch) { if (!unpack_u32(e, &tmp, NULL)) { info = "missing xmatch len"; goto fail; } profile->xmatch_len = tmp; + + profile->xmatch_perms = aa_compute_xmatch_perms( + profile->xmatch); } /* disconnected attachment string is optional */ From 754f209b811ac462e00ed0f79b48047c446f5c43 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 12 Nov 2020 10:07:25 -0800 Subject: [PATCH 0009/4122] apparmor: move fperm computation into policy_unpack fperm computation is only needed during policy_unpack so move the code there to isolate it fromt the run time code. Signed-off-by: John Johansen --- security/apparmor/file.c | 97 ------------------------------ security/apparmor/include/file.h | 1 - security/apparmor/policy_unpack.c | 98 +++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 98 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 710b7d7517eb..1227ae839154 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -174,103 +174,6 @@ static int path_name(const char *op, struct aa_label *label, return 0; } -/** - * map_old_perms - map old file perms layout to the new layout - * @old: permission set in old mapping - * - * Returns: new permission mapping - */ -static u32 map_old_perms(u32 old) -{ - u32 new = old & 0xf; - if (old & MAY_READ) - new |= AA_MAY_GETATTR | AA_MAY_OPEN; - if (old & MAY_WRITE) - new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE | - AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN; - if (old & 0x10) - new |= AA_MAY_LINK; - /* the old mapping lock and link_subset flags where overlaid - * and use was determined by part of a pair that they were in - */ - if (old & 0x20) - new |= AA_MAY_LOCK | AA_LINK_SUBSET; - if (old & 0x40) /* AA_EXEC_MMAP */ - new |= AA_EXEC_MMAP; - - return new; -} - -static void __aa_compute_fperms_allow(struct aa_perms *perms, - struct aa_dfa *dfa, - unsigned int state) -{ - perms->allow |= AA_MAY_GETATTR; - - /* change_profile wasn't determined by ownership in old mapping */ - if (ACCEPT_TABLE(dfa)[state] & 0x80000000) - perms->allow |= AA_MAY_CHANGE_PROFILE; - if (ACCEPT_TABLE(dfa)[state] & 0x40000000) - perms->allow |= AA_MAY_ONEXEC; -} - -static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa, - unsigned int state) -{ - struct aa_perms perms = { }; - - perms.allow = map_old_perms(dfa_user_allow(dfa, state)); - perms.audit = map_old_perms(dfa_user_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); - perms.xindex = dfa_user_xindex(dfa, state); - - __aa_compute_fperms_allow(&perms, dfa, state); - - return perms; -} - -static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, - unsigned int state) -{ - struct aa_perms perms = { }; - - perms.allow = map_old_perms(dfa_other_allow(dfa, state)); - perms.audit = map_old_perms(dfa_other_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); - perms.xindex = dfa_other_xindex(dfa, state); - - __aa_compute_fperms_allow(&perms, dfa, state); - - return perms; -} - -/** - * aa_compute_fperms - convert dfa compressed perms to internal perms and store - * them so they can be retrieved later. - * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which - * permissions will be computed (NOT NULL) - * - * TODO: convert from dfa + state to permission entry - */ -void aa_compute_fperms(struct aa_file_rules *file_rules) -{ - int state; - int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen; - - // DFAs are restricted from having a state_count of less than 2 - file_rules->fperms_table = kvzalloc( - state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); - - // Since fperms_table is initialized with zeroes via kvzalloc(), we can - // skip the trap state (state == 0) - for (state = 1; state < state_count; state++) { - file_rules->fperms_table[state * 2] = - __aa_compute_fperms_user(file_rules->dfa, state); - file_rules->fperms_table[state * 2 + 1] = - __aa_compute_fperms_other(file_rules->dfa, state); - } -} - /** * aa_lookup_fperms - convert dfa compressed perms to internal perms * @dfa: dfa to lookup perms for (NOT NULL) diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index ab201d625a34..1f9e54aa1adf 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -184,7 +184,6 @@ struct aa_file_rules { struct aa_perms *fperms_table; }; -void aa_compute_fperms(struct aa_file_rules *file_rules); struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, unsigned int state, struct path_cond *cond); unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start, diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 70b7a35b5b96..c22c6815ff4b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -669,6 +669,104 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) return strcmp(data->key, *key); } +/** + * map_old_perms - map old file perms layout to the new layout + * @old: permission set in old mapping + * + * Returns: new permission mapping + */ +static u32 map_old_perms(u32 old) +{ + u32 new = old & 0xf; + + if (old & MAY_READ) + new |= AA_MAY_GETATTR | AA_MAY_OPEN; + if (old & MAY_WRITE) + new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE | + AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN; + if (old & 0x10) + new |= AA_MAY_LINK; + /* the old mapping lock and link_subset flags where overlaid + * and use was determined by part of a pair that they were in + */ + if (old & 0x20) + new |= AA_MAY_LOCK | AA_LINK_SUBSET; + if (old & 0x40) /* AA_EXEC_MMAP */ + new |= AA_EXEC_MMAP; + + return new; +} + +static void __aa_compute_fperms_allow(struct aa_perms *perms, + struct aa_dfa *dfa, + unsigned int state) +{ + perms->allow |= AA_MAY_GETATTR; + + /* change_profile wasn't determined by ownership in old mapping */ + if (ACCEPT_TABLE(dfa)[state] & 0x80000000) + perms->allow |= AA_MAY_CHANGE_PROFILE; + if (ACCEPT_TABLE(dfa)[state] & 0x40000000) + perms->allow |= AA_MAY_ONEXEC; +} + +static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_user_allow(dfa, state)); + perms.audit = map_old_perms(dfa_user_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); + perms.xindex = dfa_user_xindex(dfa, state); + + __aa_compute_fperms_allow(&perms, dfa, state); + + return perms; +} + +static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_other_allow(dfa, state)); + perms.audit = map_old_perms(dfa_other_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); + perms.xindex = dfa_other_xindex(dfa, state); + + __aa_compute_fperms_allow(&perms, dfa, state); + + return perms; +} + +/** + * aa_compute_fperms - convert dfa compressed perms to internal perms and store + * them so they can be retrieved later. + * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which + * permissions will be computed (NOT NULL) + * + * TODO: convert from dfa + state to permission entry + */ +static void aa_compute_fperms(struct aa_file_rules *file_rules) +{ + int state; + int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen; + + // DFAs are restricted from having a state_count of less than 2 + file_rules->fperms_table = kvzalloc( + state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); + + // Since fperms_table is initialized with zeroes via kvzalloc(), we can + // skip the trap state (state == 0) + for (state = 1; state < state_count; state++) { + file_rules->fperms_table[state * 2] = + __aa_compute_fperms_user(file_rules->dfa, state); + file_rules->fperms_table[state * 2 + 1] = + __aa_compute_fperms_other(file_rules->dfa, state); + } +} + static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch) { u32 *perms_table; From 0310f093ba95e7640c886298de36560c123df5bd Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 12 Nov 2020 10:26:26 -0800 Subject: [PATCH 0010/4122] apparmor: rework and cleanup fperm computation shorten the name of some of the mapping functions which shortens line lengths. change the mapping so it returns the perm table instead of operating directly on the file struct. Handle potential memory allocation failure. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 70 +++++++++++++++++-------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index c22c6815ff4b..0f9a88354d63 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -697,9 +697,8 @@ static u32 map_old_perms(u32 old) return new; } -static void __aa_compute_fperms_allow(struct aa_perms *perms, - struct aa_dfa *dfa, - unsigned int state) +static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa, + unsigned int state) { perms->allow |= AA_MAY_GETATTR; @@ -710,8 +709,8 @@ static void __aa_compute_fperms_allow(struct aa_perms *perms, perms->allow |= AA_MAY_ONEXEC; } -static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa, - unsigned int state) +static struct aa_perms compute_fperms_user(struct aa_dfa *dfa, + unsigned int state) { struct aa_perms perms = { }; @@ -720,13 +719,13 @@ static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa, perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); perms.xindex = dfa_user_xindex(dfa, state); - __aa_compute_fperms_allow(&perms, dfa, state); + compute_fperms_allow(&perms, dfa, state); return perms; } -static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, - unsigned int state) +static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, + unsigned int state) { struct aa_perms perms = { }; @@ -735,7 +734,7 @@ static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); perms.xindex = dfa_other_xindex(dfa, state); - __aa_compute_fperms_allow(&perms, dfa, state); + compute_fperms_allow(&perms, dfa, state); return perms; } @@ -743,41 +742,46 @@ static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa, /** * aa_compute_fperms - convert dfa compressed perms to internal perms and store * them so they can be retrieved later. - * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which - * permissions will be computed (NOT NULL) + * @dfa: a dfa using fperms to remap to internal permissions * - * TODO: convert from dfa + state to permission entry + * Returns: remapped perm table */ -static void aa_compute_fperms(struct aa_file_rules *file_rules) +static struct aa_perms *compute_fperms(struct aa_dfa *dfa) { int state; - int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen; + int state_count; + struct aa_perms *table; - // DFAs are restricted from having a state_count of less than 2 - file_rules->fperms_table = kvzalloc( - state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); + AA_BUG(!dfa); - // Since fperms_table is initialized with zeroes via kvzalloc(), we can - // skip the trap state (state == 0) + state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ + table = kvzalloc(state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); + if (!table) + return NULL; + + /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) { - file_rules->fperms_table[state * 2] = - __aa_compute_fperms_user(file_rules->dfa, state); - file_rules->fperms_table[state * 2 + 1] = - __aa_compute_fperms_other(file_rules->dfa, state); + table[state * 2] = compute_fperms_user(dfa, state); + table[state * 2 + 1] = compute_fperms_other(dfa, state); } + + return table; } -static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch) +static u32 *compute_xmatch_perms(struct aa_dfa *xmatch) { u32 *perms_table; int state; - int state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; + int state_count; - // DFAs are restricted from having a state_count of less than 2 + AA_BUG(!xmatch); + + state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL); - // Since perms_table is initialized with zeroes via kvcalloc(), we can - // skip the trap state (state == 0) + /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) perms_table[state] = dfa_user_allow(xmatch, state); @@ -850,8 +854,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } profile->xmatch_len = tmp; - profile->xmatch_perms = aa_compute_xmatch_perms( - profile->xmatch); + profile->xmatch_perms = compute_xmatch_perms(profile->xmatch); } /* disconnected attachment string is optional */ @@ -997,8 +1000,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else profile->file.dfa = aa_get_dfa(nulldfa); - aa_compute_fperms(&(profile->file)); - + profile->file.fperms_table = compute_fperms(profile->file.dfa); + if (!profile->file.fperms_table) { + info = "failed to remap file permission table"; + goto fail; + } if (!unpack_trans_table(e, profile)) { info = "failed to unpack profile transition table"; goto fail; From e48ffd24c1d87dba227225615790cd059a707adb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 13 Nov 2020 16:30:47 -0800 Subject: [PATCH 0011/4122] apparmor: convert xmatch to use aa_perms structure Convert xmatch from using perms encoded in the accept entry of the dfa to the common external aa_perms in a table. Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 ++-- security/apparmor/include/policy.h | 3 ++- security/apparmor/policy_unpack.c | 13 +++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 22351b6d71e6..4fcdcc0de48c 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -339,7 +339,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, /* Check xattr value */ state = aa_dfa_match_len(profile->xmatch, state, value, size); - perm = profile->xmatch_perms[state]; + perm = profile->xmatch_perms[state].allow; if (!(perm & MAY_EXEC)) { ret = -EINVAL; goto out; @@ -419,7 +419,7 @@ restart: state = aa_dfa_leftmatch(profile->xmatch, DFA_START, name, &count); - perm = profile->xmatch_perms[state]; + perm = profile->xmatch_perms[state].allow; /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { int ret = 0; diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 128c6a9430d4..7882d5e5096b 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -141,7 +141,8 @@ struct aa_profile { const char *attach; struct aa_dfa *xmatch; unsigned int xmatch_len; - u32 *xmatch_perms; + struct aa_perms *xmatch_perms; + enum audit_mode audit; long mode; u32 path_flags; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 0f9a88354d63..44910c201c49 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -769,9 +769,9 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) return table; } -static u32 *compute_xmatch_perms(struct aa_dfa *xmatch) +static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) { - u32 *perms_table; + struct aa_perms *perms_table; int state; int state_count; @@ -779,11 +779,12 @@ static u32 *compute_xmatch_perms(struct aa_dfa *xmatch) state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; /* DFAs are restricted from having a state_count of less than 2 */ - perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL); + perms_table = kvcalloc(state_count, sizeof(struct aa_perms), + GFP_KERNEL); /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) - perms_table[state] = dfa_user_allow(xmatch, state); + perms_table[state].allow = dfa_user_allow(xmatch, state); return perms_table; } @@ -855,6 +856,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->xmatch_len = tmp; profile->xmatch_perms = compute_xmatch_perms(profile->xmatch); + if (!profile->xmatch_perms) { + info = "failed to convert xmatch permission table"; + goto fail; + } } /* disconnected attachment string is optional */ From e2967ede22978f132cd52929edff96c701bde0eb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 17 Nov 2020 01:38:16 -0800 Subject: [PATCH 0012/4122] apparmor: compute policydb permission on profile load Rather than computing policydb permissions for each access permissions can be computed once on profile load and stored for lookup. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/include/perms.h | 13 +++++-- security/apparmor/include/policy.h | 1 + security/apparmor/label.c | 6 ++-- security/apparmor/lib.c | 42 ----------------------- security/apparmor/mount.c | 53 ++++++++++------------------ security/apparmor/net.c | 2 +- security/apparmor/policy.c | 2 +- security/apparmor/policy_unpack.c | 55 +++++++++++++++++++++++++++++- 9 files changed, 90 insertions(+), 86 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 825b3093dcdd..117783779337 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -633,7 +633,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, state = aa_dfa_match_len(dfa, profile->policy.start[0], match_str, match_len); if (state) - aa_compute_perms(dfa, state, &tmp); + tmp = *aa_lookup_perms(profile->policy.perms, state); } aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum_raw(perms, &tmp); diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 13f20c598448..de9631edb1ff 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -133,6 +133,17 @@ extern struct aa_perms allperms; xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2))) +extern struct aa_perms default_perms; + +static inline struct aa_perms *aa_lookup_perms(struct aa_perms *perms, + unsigned int state) +{ + if (!(perms)) + return &default_perms; + + return &(perms[state]); +} + void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask); void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, @@ -141,8 +152,6 @@ void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, u32 chrsmask, const char * const *names, u32 namesmask); void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); -void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, - struct aa_perms *perms); void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend); void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend); void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 7882d5e5096b..0dec18cd95e5 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -77,6 +77,7 @@ enum profile_mode { struct aa_policydb { /* Generic policy DFA specific rule types will be subsections of it */ struct aa_dfa *dfa; + struct aa_perms *perms; unsigned int start[AA_CLASS_LAST + 1]; }; diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 0f36ee907438..ddb04417bdab 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1328,7 +1328,7 @@ next: if (!state) goto fail; } - aa_compute_perms(profile->policy.dfa, state, perms); + *perms = *aa_lookup_perms(profile->policy.perms, state); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -1379,7 +1379,7 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - aa_compute_perms(profile->policy.dfa, state, &tmp); + tmp = *aa_lookup_perms(profile->policy.perms, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -1388,7 +1388,7 @@ next: state = match_component(profile, tp, start); if (!state) goto fail; - aa_compute_perms(profile->policy.dfa, state, &tmp); + tmp = *aa_lookup_perms(profile->policy.perms, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 1c72a61108d3..505ef5848f7c 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -315,48 +315,6 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) */ } -static u32 map_other(u32 x) -{ - return ((x & 0x3) << 8) | /* SETATTR/GETATTR */ - ((x & 0x1c) << 18) | /* ACCEPT/BIND/LISTEN */ - ((x & 0x60) << 19); /* SETOPT/GETOPT */ -} - -static u32 map_xbits(u32 x) -{ - return ((x & 0x1) << 7) | - ((x & 0x7e) << 9); -} - -void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, - struct aa_perms *perms) -{ - /* This mapping is convulated due to history. - * v1-v4: only file perms - * v5: added policydb which dropped in perm user conditional to - * gain new perm bits, but had to map around the xbits because - * the userspace compiler was still munging them. - * v9: adds using the xbits in policydb because the compiler now - * supports treating policydb permission bits different. - * Unfortunately there is not way to force auditing on the - * perms represented by the xbits - */ - *perms = (struct aa_perms) { - .allow = dfa_user_allow(dfa, state) | - map_xbits(dfa_user_xbits(dfa, state)), - .audit = dfa_user_audit(dfa, state), - .quiet = dfa_user_quiet(dfa, state) | - map_xbits(dfa_other_xbits(dfa, state)), - }; - - /* for v5-v9 perm mapping in the policydb, the other set is used - * to extend the general perm set - */ - perms->allow |= map_other(dfa_other_allow(dfa, state)); - perms->audit |= map_other(dfa_other_audit(dfa, state)); - perms->quiet |= map_other(dfa_other_quiet(dfa, state)); -} - /** * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms * @accum - perms struct to accumulate into diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index f61247241803..1e978c2b1ee4 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -203,25 +203,6 @@ static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state, return state; } -/** - * compute_mnt_perms - compute mount permission associated with @state - * @dfa: dfa to match against (NOT NULL) - * @state: state match finished in - * - * Returns: mount permissions - */ -static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa, - unsigned int state) -{ - struct aa_perms perms = { - .allow = dfa_user_allow(dfa, state), - .audit = dfa_user_audit(dfa, state), - .quiet = dfa_user_quiet(dfa, state), - }; - - return perms; -} - static const char * const mnt_info_table[] = { "match succeeded", "failed mntpnt match", @@ -236,50 +217,52 @@ static const char * const mnt_info_table[] = { * Returns 0 on success else element that match failed in, this is the * index into the mnt_info_table above */ -static int do_match_mnt(struct aa_dfa *dfa, unsigned int start, +static int do_match_mnt(struct aa_policydb *policy, unsigned int start, const char *mntpnt, const char *devname, const char *type, unsigned long flags, void *data, bool binary, struct aa_perms *perms) { unsigned int state; - AA_BUG(!dfa); + AA_BUG(!policy); + AA_BUG(!policy->dfa); + AA_BUG(!policy->perms); AA_BUG(!perms); - state = aa_dfa_match(dfa, start, mntpnt); - state = aa_dfa_null_transition(dfa, state); + state = aa_dfa_match(policy->dfa, start, mntpnt); + state = aa_dfa_null_transition(policy->dfa, state); if (!state) return 1; if (devname) - state = aa_dfa_match(dfa, state, devname); - state = aa_dfa_null_transition(dfa, state); + state = aa_dfa_match(policy->dfa, state, devname); + state = aa_dfa_null_transition(policy->dfa, state); if (!state) return 2; if (type) - state = aa_dfa_match(dfa, state, type); - state = aa_dfa_null_transition(dfa, state); + state = aa_dfa_match(policy->dfa, state, type); + state = aa_dfa_null_transition(policy->dfa, state); if (!state) return 3; - state = match_mnt_flags(dfa, state, flags); + state = match_mnt_flags(policy->dfa, state, flags); if (!state) return 4; - *perms = compute_mnt_perms(dfa, state); + *perms = *aa_lookup_perms(policy->perms, state); if (perms->allow & AA_MAY_MOUNT) return 0; /* only match data if not binary and the DFA flags data is expected */ if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) { - state = aa_dfa_null_transition(dfa, state); + state = aa_dfa_null_transition(policy->dfa, state); if (!state) return 4; - state = aa_dfa_match(dfa, state, data); + state = aa_dfa_match(policy->dfa, state, data); if (!state) return 5; - *perms = compute_mnt_perms(dfa, state); + *perms = *aa_lookup_perms(policy->perms, state); if (perms->allow & AA_MAY_MOUNT) return 0; } @@ -341,7 +324,7 @@ static int match_mnt_path_str(struct aa_profile *profile, } error = -EACCES; - pos = do_match_mnt(profile->policy.dfa, + pos = do_match_mnt(&profile->policy, profile->policy.start[AA_CLASS_MOUNT], mntpnt, devname, type, flags, data, binary, &perms); if (pos) { @@ -601,7 +584,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, state = aa_dfa_match(profile->policy.dfa, profile->policy.start[AA_CLASS_MOUNT], name); - perms = compute_mnt_perms(profile->policy.dfa, state); + perms = *aa_lookup_perms(profile->policy.perms, state); if (AA_MAY_UMOUNT & ~perms.allow) error = -EACCES; @@ -672,7 +655,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, new_name); state = aa_dfa_null_transition(profile->policy.dfa, state); state = aa_dfa_match(profile->policy.dfa, state, old_name); - perms = compute_mnt_perms(profile->policy.dfa, state); + perms = *aa_lookup_perms(profile->policy.perms, state); if (AA_MAY_PIVOTROOT & perms.allow) error = 0; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 7efe4d17273d..88e8a7ea54c0 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -125,7 +125,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, buffer[1] = cpu_to_be16((u16) type); state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer, 4); - aa_compute_perms(profile->policy.dfa, state, &perms); + perms = *aa_lookup_perms(profile->policy.perms, state); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_net_cb); diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index e2d23cd85cd2..6c3086e2c820 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -233,7 +233,7 @@ void aa_free_profile(struct aa_profile *profile) aa_put_dfa(profile->xmatch); kvfree(profile->xmatch_perms); aa_put_dfa(profile->policy.dfa); - + kvfree(profile->policy.perms); if (profile->data) { rht = profile->data; profile->data = NULL; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 44910c201c49..ed063385a83b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -756,7 +756,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; /* DFAs are restricted from having a state_count of less than 2 */ - table = kvzalloc(state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL); + table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL); if (!table) return NULL; @@ -789,6 +789,54 @@ static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) return perms_table; } +static u32 map_other(u32 x) +{ + return ((x & 0x3) << 8) | /* SETATTR/GETATTR */ + ((x & 0x1c) << 18) | /* ACCEPT/BIND/LISTEN */ + ((x & 0x60) << 19); /* SETOPT/GETOPT */ +} + +static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms = { }; + + perms.allow = dfa_user_allow(dfa, state); + perms.audit = dfa_user_audit(dfa, state); + perms.quiet = dfa_user_quiet(dfa, state); + + /* for v5 perm mapping in the policydb, the other set is used + * to extend the general perm set + */ + + perms.allow |= map_other(dfa_other_allow(dfa, state)); + perms.audit |= map_other(dfa_other_audit(dfa, state)); + perms.quiet |= map_other(dfa_other_quiet(dfa, state)); + + return perms; +} + +static struct aa_perms *compute_perms(struct aa_dfa *dfa) +{ + int state; + int state_count; + struct aa_perms *table; + + AA_BUG(!dfa); + + state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ + table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); + if (!table) + return NULL; + + /* zero init so skip the trap state (state == 0) */ + for (state = 1; state < state_count; state++) + table[state] = compute_perms_entry(dfa, state); + + return table; +} + /** * unpack_profile - unpack a serialized profile * @e: serialized data extent information (NOT NULL) @@ -986,6 +1034,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } else profile->policy.dfa = aa_get_dfa(nulldfa); + profile->policy.perms = compute_perms(profile->policy.dfa); + if (!profile->policy.perms) { + info = "failed to remap policydb permission table"; + goto fail; + } /* get file rules */ profile->file.dfa = unpack_dfa(e); From 53bdc46f4bdd20d477afb374767cabe627fd04ae Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 19 Nov 2020 10:37:48 -0800 Subject: [PATCH 0013/4122] apparmor: combine file_rules and aa_policydb into a single shared struct file_rules and policydb are almost the same and will need the same features in the future so combine them. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 3 ++- security/apparmor/domain.c | 7 +++--- security/apparmor/file.c | 20 ++++++++------- security/apparmor/include/file.h | 39 +++--------------------------- security/apparmor/include/policy.h | 14 ++++++++--- security/apparmor/policy.c | 5 ++-- security/apparmor/policy_unpack.c | 11 +++++---- 7 files changed, 40 insertions(+), 59 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 117783779337..1625fee17fc7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -619,7 +619,8 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, return; if (profile->file.dfa && *match_str == AA_CLASS_FILE) { dfa = profile->file.dfa; - state = aa_dfa_match_len(dfa, profile->file.start, + state = aa_dfa_match_len(dfa, + profile->file.start[AA_CLASS_FILE], match_str + 1, match_len - 1); if (state) { struct path_cond cond = { }; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 4fcdcc0de48c..819b7828cbc4 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -627,7 +627,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile, { struct aa_label *new = NULL; const char *info = NULL, *name = NULL, *target = NULL; - unsigned int state = profile->file.start; + unsigned int state = profile->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; bool nonewprivs = false; int error = 0; @@ -723,7 +723,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, char *buffer, struct path_cond *cond, bool *secure_exec) { - unsigned int state = profile->file.start; + unsigned int state = profile->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; int error = -EACCES; @@ -1267,7 +1267,8 @@ static int change_profile_perms_wrapper(const char *op, const char *name, if (!error) error = change_profile_perms(profile, target, stack, request, - profile->file.start, perms); + profile->file.start[AA_CLASS_FILE], + perms); if (error) error = aa_audit_file(profile, perms, op, request, name, NULL, target, GLOBAL_ROOT_UID, info, diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 1227ae839154..d2be851be412 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -185,16 +185,16 @@ static int path_name(const char *op, struct aa_label *label, * Returns: a pointer to a file permission set */ struct aa_perms default_perms = {}; -struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, +struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, unsigned int state, struct path_cond *cond) { - if (!(file_rules->fperms_table)) + if (!(file_rules->perms)) return &default_perms; if (uid_eq(current_fsuid(), cond->uid)) - return &(file_rules->fperms_table[state * 2]); + return &(file_rules->perms[state * 2]); - return &(file_rules->fperms_table[state * 2 + 1]); + return &(file_rules->perms[state * 2 + 1]); } /** @@ -207,7 +207,7 @@ struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, * * Returns: the final state in @dfa when beginning @start and walking @name */ -unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start, +unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start, const char *name, struct path_cond *cond, struct aa_perms *perms) { @@ -226,7 +226,8 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, if (profile_unconfined(profile)) return 0; - aa_str_perms(&(profile->file), profile->file.start, name, cond, perms); + aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE], + name, cond, perms); if (request & ~perms->allow) e = -EACCES; return aa_audit_file(profile, perms, op, request, name, NULL, NULL, @@ -333,7 +334,8 @@ static int profile_path_link(struct aa_profile *profile, error = -EACCES; /* aa_str_perms - handles the case of the dfa being NULL */ - state = aa_str_perms(&(profile->file), profile->file.start, lname, + state = aa_str_perms(&(profile->file), + profile->file.start[AA_CLASS_FILE], lname, cond, &lperms); if (!(lperms.allow & AA_MAY_LINK)) @@ -363,8 +365,8 @@ static int profile_path_link(struct aa_profile *profile, /* Do link perm subset test requiring allowed permission on link are * a subset of the allowed permissions on target. */ - aa_str_perms(&(profile->file), profile->file.start, tname, cond, - &perms); + aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE], + tname, cond, &perms); /* AA_MAY_LINK is not considered in the subset test */ request = lperms.allow & ~AA_MAY_LINK; diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 1f9e54aa1adf..736b8f655404 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -17,6 +17,7 @@ #include "match.h" #include "perms.h" +struct aa_policydb; struct aa_profile; struct path; @@ -164,29 +165,9 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error); -/** - * struct aa_file_rules - components used for file rule permissions - * @dfa: dfa to match path names and conditionals against - * @perms: permission table indexed by the matched state accept entry of @dfa - * @trans: transition table for indexed by named x transitions - * - * File permission are determined by matching a path against @dfa and - * then using the value of the accept entry for the matching state as - * an index into @perms. If a named exec transition is required it is - * looked up in the transition table. - */ -struct aa_file_rules { - unsigned int start; - struct aa_dfa *dfa; - /* struct perms perms; */ - struct aa_domain trans; - /* TODO: add delegate table */ - struct aa_perms *fperms_table; -}; - -struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules, - unsigned int state, struct path_cond *cond); -unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start, +struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, + unsigned int state, struct path_cond *cond); +unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start, const char *name, struct path_cond *cond, struct aa_perms *perms); @@ -205,18 +186,6 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, void aa_inherit_files(const struct cred *cred, struct files_struct *files); -static inline void aa_free_fperms_table(struct aa_perms *fperms_table) -{ - if (fperms_table) - kvfree(fperms_table); -} - -static inline void aa_free_file_rules(struct aa_file_rules *rules) -{ - aa_put_dfa(rules->dfa); - aa_free_domain_entries(&rules->trans); - aa_free_fperms_table(rules->fperms_table); -} /** * aa_map_file_perms - map file flags to AppArmor permissions diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 0dec18cd95e5..9bafeb3847d5 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -75,13 +75,21 @@ enum profile_mode { * start: set of start states for the different classes of data */ struct aa_policydb { - /* Generic policy DFA specific rule types will be subsections of it */ struct aa_dfa *dfa; struct aa_perms *perms; + struct aa_domain trans; unsigned int start[AA_CLASS_LAST + 1]; - }; +static inline void aa_destroy_policydb(struct aa_policydb *policy) +{ + aa_put_dfa(policy->dfa); + if (policy->perms) + kvfree(policy->perms); + aa_free_domain_entries(&policy->trans); + +} + /* struct aa_data - generic data structure * key: name for retrieving this data * size: size of data in bytes @@ -151,7 +159,7 @@ struct aa_profile { int size; struct aa_policydb policy; - struct aa_file_rules file; + struct aa_policydb file; struct aa_caps caps; int xattr_count; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 6c3086e2c820..0814ee57a06b 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -219,7 +219,7 @@ void aa_free_profile(struct aa_profile *profile) aa_put_ns(profile->ns); kfree_sensitive(profile->rename); - aa_free_file_rules(&profile->file); + aa_destroy_policydb(&profile->file); aa_free_cap_rules(&profile->caps); aa_free_rlimit_rules(&profile->rlimits); @@ -232,8 +232,7 @@ void aa_free_profile(struct aa_profile *profile) kfree_sensitive(profile->dirname); aa_put_dfa(profile->xmatch); kvfree(profile->xmatch_perms); - aa_put_dfa(profile->policy.dfa); - kvfree(profile->policy.perms); + aa_destroy_policydb(&profile->policy); if (profile->data) { rht = profile->data; profile->data = NULL; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index ed063385a83b..726fa02026b5 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1048,18 +1048,19 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to unpack profile file rules"; goto fail; } else if (profile->file.dfa) { - if (!unpack_u32(e, &profile->file.start, "dfa_start")) + if (!unpack_u32(e, &profile->file.start[AA_CLASS_FILE], + "dfa_start")) /* default start state */ - profile->file.start = DFA_START; + profile->file.start[AA_CLASS_FILE] = DFA_START; } else if (profile->policy.dfa && profile->policy.start[AA_CLASS_FILE]) { profile->file.dfa = aa_get_dfa(profile->policy.dfa); - profile->file.start = profile->policy.start[AA_CLASS_FILE]; + profile->file.start[AA_CLASS_FILE] = profile->policy.start[AA_CLASS_FILE]; } else profile->file.dfa = aa_get_dfa(nulldfa); - profile->file.fperms_table = compute_fperms(profile->file.dfa); - if (!profile->file.fperms_table) { + profile->file.perms = compute_fperms(profile->file.dfa); + if (!profile->file.perms) { info = "failed to remap file permission table"; goto fail; } From 048d49544455b3e3a535c4ec89057ea5ca8676f0 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 21 Nov 2020 01:42:40 -0800 Subject: [PATCH 0014/4122] apparmor: convert xmatch to using the new shared policydb struct continue permission unification by converting xmatch to use the policydb struct that is used by the other profile dfas. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/domain.c | 22 ++++++++++++---------- security/apparmor/include/apparmor.h | 1 + security/apparmor/include/policy.h | 4 +--- security/apparmor/policy.c | 3 +-- security/apparmor/policy_unpack.c | 25 ++++++++++++------------- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 1625fee17fc7..a2d12b80592b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1095,7 +1095,7 @@ static int seq_profile_attach_show(struct seq_file *seq, void *v) struct aa_profile *profile = labels_profile(label); if (profile->attach) seq_printf(seq, "%s\n", profile->attach); - else if (profile->xmatch) + else if (profile->xmatch.dfa) seq_puts(seq, "\n"); else seq_printf(seq, "%s\n", profile->base.name); diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 819b7828cbc4..0df17fb236c7 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -321,7 +321,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, might_sleep(); /* transition from exec match to xattr set */ - state = aa_dfa_outofband_transition(profile->xmatch, state); + state = aa_dfa_outofband_transition(profile->xmatch.dfa, state); d = bprm->file->f_path.dentry; for (i = 0; i < profile->xattr_count; i++) { @@ -335,18 +335,19 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, * that not present xattr can be distinguished from a 0 * length value or rule that matches any value */ - state = aa_dfa_null_transition(profile->xmatch, state); + state = aa_dfa_null_transition(profile->xmatch.dfa, + state); /* Check xattr value */ - state = aa_dfa_match_len(profile->xmatch, state, value, - size); - perm = profile->xmatch_perms[state].allow; + state = aa_dfa_match_len(profile->xmatch.dfa, state, + value, size); + perm = profile->xmatch.perms[state].allow; if (!(perm & MAY_EXEC)) { ret = -EINVAL; goto out; } } /* transition to next element */ - state = aa_dfa_outofband_transition(profile->xmatch, state); + state = aa_dfa_outofband_transition(profile->xmatch.dfa, state); if (size < 0) { /* * No xattr match, so verify if transition to @@ -413,13 +414,14 @@ restart: * as another profile, signal a conflict and refuse to * match. */ - if (profile->xmatch) { + if (profile->xmatch.dfa) { unsigned int state, count; u32 perm; - state = aa_dfa_leftmatch(profile->xmatch, DFA_START, - name, &count); - perm = profile->xmatch_perms[state].allow; + state = aa_dfa_leftmatch(profile->xmatch.dfa, + profile->xmatch.start[AA_CLASS_XMATCH], + name, &count); + perm = profile->xmatch.perms[state].allow; /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { int ret = 0; diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index dd2c131ed170..8fd66a4ca0b8 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -26,6 +26,7 @@ #define AA_CLASS_MOUNT 7 #define AA_CLASS_PTRACE 9 #define AA_CLASS_SIGNAL 10 +#define AA_CLASS_XMATCH 11 #define AA_CLASS_NET 14 #define AA_CLASS_LABEL 16 #define AA_CLASS_POSIX_MQUEUE 17 diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 9bafeb3847d5..44d8cbb1c368 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -113,7 +113,6 @@ struct aa_data { * @attach: human readable attachment string * @xmatch: optional extended matching for unconfined executables names * @xmatch_len: xmatch prefix len, used to determine xmatch priority - * @xmatch_perms: precomputed permissions for the xmatch DFA indexed by state * @audit: the auditing mode of the profile * @mode: the enforcement mode of the profile * @path_flags: flags controlling path generation behavior @@ -148,9 +147,8 @@ struct aa_profile { const char *rename; const char *attach; - struct aa_dfa *xmatch; + struct aa_policydb xmatch; unsigned int xmatch_len; - struct aa_perms *xmatch_perms; enum audit_mode audit; long mode; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 0814ee57a06b..cdcf26c9bed5 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -230,8 +230,7 @@ void aa_free_profile(struct aa_profile *profile) kfree_sensitive(profile->secmark[i].label); kfree_sensitive(profile->secmark); kfree_sensitive(profile->dirname); - aa_put_dfa(profile->xmatch); - kvfree(profile->xmatch_perms); + aa_destroy_policydb(&profile->xmatch); aa_destroy_policydb(&profile->policy); if (profile->data) { rht = profile->data; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 726fa02026b5..f2a075986e49 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -771,7 +771,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) { - struct aa_perms *perms_table; + struct aa_perms *perms; int state; int state_count; @@ -779,14 +779,13 @@ static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; /* DFAs are restricted from having a state_count of less than 2 */ - perms_table = kvcalloc(state_count, sizeof(struct aa_perms), - GFP_KERNEL); + perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) - perms_table[state].allow = dfa_user_allow(xmatch, state); + perms[state].allow = dfa_user_allow(xmatch, state); - return perms_table; + return perms; } static u32 map_other(u32 x) @@ -888,23 +887,23 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) (void) unpack_str(e, &profile->attach, "attach"); /* xmatch is optional and may be NULL */ - profile->xmatch = unpack_dfa(e); - if (IS_ERR(profile->xmatch)) { - error = PTR_ERR(profile->xmatch); - profile->xmatch = NULL; + profile->xmatch.dfa = unpack_dfa(e); + if (IS_ERR(profile->xmatch.dfa)) { + error = PTR_ERR(profile->xmatch.dfa); + profile->xmatch.dfa = NULL; info = "bad xmatch"; goto fail; } /* neither xmatch_len not xmatch_perms are optional if xmatch is set */ - if (profile->xmatch) { + if (profile->xmatch.dfa) { if (!unpack_u32(e, &tmp, NULL)) { info = "missing xmatch len"; goto fail; } profile->xmatch_len = tmp; - - profile->xmatch_perms = compute_xmatch_perms(profile->xmatch); - if (!profile->xmatch_perms) { + profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START; + profile->xmatch.perms = compute_xmatch_perms(profile->xmatch.dfa); + if (!profile->xmatch.perms) { info = "failed to convert xmatch permission table"; goto fail; } From 7572fea31e3e5c4c19154ccc064eb1f83dfe1333 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 13 Nov 2020 01:46:23 -0800 Subject: [PATCH 0015/4122] apparmor: convert fperm lookup to use accept as an index Remap file dfa accept table from embedded perms to index and then move fperm lookup to use the accept entry as an index into the fperm table. This is a step toward unifying permission lookup. Signed-off-by: John Johansen --- security/apparmor/file.c | 6 ++-- security/apparmor/policy_unpack.c | 57 ++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d2be851be412..7bddec3df75f 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -188,13 +188,15 @@ struct aa_perms default_perms = {}; struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, unsigned int state, struct path_cond *cond) { + unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state]; + if (!(file_rules->perms)) return &default_perms; if (uid_eq(current_fsuid(), cond->uid)) - return &(file_rules->perms[state * 2]); + return &(file_rules->perms[index]); - return &(file_rules->perms[state * 2 + 1]); + return &(file_rules->perms[index + 1]); } /** diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index f2a075986e49..4cf62c1be388 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -836,6 +836,29 @@ static struct aa_perms *compute_perms(struct aa_dfa *dfa) return table; } +/** + * remap_dfa_accept - remap old dfa accept table to be an index + * @dfa: dfa to do the remapping on + * @factor: scaling factor for the index conversion. + * + * Used in conjunction with compute_Xperms, it converts old style perms + * that are encoded in the dfa accept tables to the new style where + * there is a permission table and the accept table is an index into + * the permission table. + */ +static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) +{ + unsigned int state; + unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + + AA_BUG(!dfa); + + for (state = 0; state < state_count; state++) + ACCEPT_TABLE(dfa)[state] = state * factor; + kvfree(dfa->tables[YYTD_ID_ACCEPT2]); + dfa->tables[YYTD_ID_ACCEPT2] = NULL; +} + /** * unpack_profile - unpack a serialized profile * @e: serialized data extent information (NOT NULL) @@ -1051,6 +1074,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) "dfa_start")) /* default start state */ profile->file.start[AA_CLASS_FILE] = DFA_START; + profile->file.perms = compute_fperms(profile->file.dfa); + if (!profile->file.perms) { + info = "failed to remap file permission table"; + goto fail; + } + remap_dfa_accept(profile->file.dfa, 2); + if (!unpack_trans_table(e, profile)) { + info = "failed to unpack profile transition table"; + goto fail; + } } else if (profile->policy.dfa && profile->policy.start[AA_CLASS_FILE]) { profile->file.dfa = aa_get_dfa(profile->policy.dfa); @@ -1058,16 +1091,6 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else profile->file.dfa = aa_get_dfa(nulldfa); - profile->file.perms = compute_fperms(profile->file.dfa); - if (!profile->file.perms) { - info = "failed to remap file permission table"; - goto fail; - } - if (!unpack_trans_table(e, profile)) { - info = "failed to unpack profile transition table"; - goto fail; - } - if (unpack_nameX(e, AA_STRUCT, "data")) { info = "out of memory"; profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL); @@ -1198,9 +1221,7 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) { int i; for (i = 0; i < dfa->tables[YYTD_ID_ACCEPT]->td_lolen; i++) { - if (!verify_xindex(dfa_user_xindex(dfa, i), table_size)) - return false; - if (!verify_xindex(dfa_other_xindex(dfa, i), table_size)) + if (!verify_xindex(ACCEPT_TABLE(dfa)[i], table_size)) return false; } return true; @@ -1211,14 +1232,16 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) * @profile: profile to verify (NOT NULL) * * Returns: 0 if passes verification else error + * + * This verification is post any unpack mapping or changes */ static int verify_profile(struct aa_profile *profile) { if (profile->file.dfa && - !verify_dfa_xindex(profile->file.dfa, - profile->file.trans.size)) { - audit_iface(profile, NULL, NULL, "Invalid named transition", - NULL, -EPROTO); + !verify_dfa_xindex(profile->file.dfa, + profile->file.trans.size)) { + audit_iface(profile, NULL, NULL, + "Unpack: Invalid named transition", NULL, -EPROTO); return -EPROTO; } From 2d63dd43ae334ec6f5374d37bb06c4cc57621b3c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 13 Nov 2020 23:36:09 -0800 Subject: [PATCH 0016/4122] apparmor: convert xmatch lookup to use accept as an index Remap xmatch dfa accept table from embedded perms to an index and then move xmatch lookup to use accept entry to index into the xmatch table. This is step towards unifying permission lookup and reducing the size of permissions tables. Signed-off-by: John Johansen --- security/apparmor/domain.c | 10 ++++++---- security/apparmor/policy_unpack.c | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 0df17fb236c7..45a8887021f1 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -328,7 +328,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, size = vfs_getxattr_alloc(&init_user_ns, d, profile->xattrs[i], &value, value_size, GFP_KERNEL); if (size >= 0) { - u32 perm; + u32 index, perm; /* * Check the xattr presence before value. This ensure @@ -340,7 +340,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, /* Check xattr value */ state = aa_dfa_match_len(profile->xmatch.dfa, state, value, size); - perm = profile->xmatch.perms[state].allow; + index = ACCEPT_TABLE(profile->xmatch.dfa)[state]; + perm = profile->xmatch.perms[index].allow; if (!(perm & MAY_EXEC)) { ret = -EINVAL; goto out; @@ -416,12 +417,13 @@ restart: */ if (profile->xmatch.dfa) { unsigned int state, count; - u32 perm; + u32 index, perm; state = aa_dfa_leftmatch(profile->xmatch.dfa, profile->xmatch.start[AA_CLASS_XMATCH], name, &count); - perm = profile->xmatch.perms[state].allow; + index = ACCEPT_TABLE(profile->xmatch.dfa)[state]; + perm = profile->xmatch.perms[index].allow; /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { int ret = 0; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 4cf62c1be388..4cdc96988783 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -930,6 +930,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to convert xmatch permission table"; goto fail; } + remap_dfa_accept(profile->xmatch.dfa, 1); } /* disconnected attachment string is optional */ From bf690f59d0429c62de4db1234f16557eedcb39bf Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 10 Apr 2021 02:09:44 -0700 Subject: [PATCH 0017/4122] apparmor: cleanup shared permission struct The shared permissions struct has the stop field which is unneeded and the "reserved" subtree field commented which is needed. Also reorganize so that the entries are logically grouped. Signed-off-by: John Johansen --- security/apparmor/include/perms.h | 17 +++++++---------- security/apparmor/lib.c | 4 ++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index de9631edb1ff..1f3e7680e809 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -65,22 +65,19 @@ extern const char *aa_file_perm_names[]; struct aa_perms { u32 allow; - u32 audit; /* set only when allow is set */ - u32 deny; /* explicit deny, or conflict if allow also set */ - u32 quiet; /* set only when ~allow | deny */ - u32 kill; /* set only when ~allow | deny */ - u32 stop; /* set only when ~allow | deny */ - u32 complain; /* accumulates only used when ~allow & ~deny */ + u32 subtree; /* allow perm on full subtree only when allow is set */ u32 cond; /* set only when ~allow and ~deny */ - u32 hide; /* set only when ~allow | deny */ + u32 kill; /* set only when ~allow | deny */ + u32 complain; /* accumulates only used when ~allow & ~deny */ u32 prompt; /* accumulates only used when ~allow & ~deny */ - /* Reserved: - * u32 subtree; / * set only when allow is set * / - */ + u32 audit; /* set only when allow is set */ + u32 quiet; /* set only when ~allow | deny */ + u32 hide; /* set only when ~allow | deny */ + u16 xindex; }; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 505ef5848f7c..974a217218a6 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -327,11 +327,11 @@ void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) accum->audit |= addend->audit & addend->allow; accum->quiet &= addend->quiet & ~addend->allow; accum->kill |= addend->kill & ~addend->allow; - accum->stop |= addend->stop & ~addend->allow; accum->complain |= addend->complain & ~addend->allow & ~addend->deny; accum->cond |= addend->cond & ~addend->allow & ~addend->deny; accum->hide &= addend->hide & ~addend->allow; accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny; + accum->subtree |= addend->subtree & ~addend->deny; } /** @@ -346,11 +346,11 @@ void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) accum->audit |= addend->audit & accum->allow; accum->quiet &= addend->quiet & ~accum->allow; accum->kill |= addend->kill & ~accum->allow; - accum->stop |= addend->stop & ~accum->allow; accum->complain |= addend->complain & ~accum->allow & ~accum->deny; accum->cond |= addend->cond & ~accum->allow & ~accum->deny; accum->hide &= addend->hide & ~accum->allow; accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny; + accum->subtree &= addend->subtree & ~accum->deny; } void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, From e844fe9b51c984472ea98be3b2d1201ba9ee3213 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 16 Jul 2022 01:53:46 -0700 Subject: [PATCH 0018/4122] apparmor: convert policy lookup to use accept as an index Remap polidydb dfa accept table from embedded perms to an index, and then move the perm lookup to use the accept entry as an index into the perm table. This is done so that the perm table can be separated from the dfa, allowing dfa accept to index to share expanded permission sets. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/include/perms.h | 8 -------- security/apparmor/include/policy.h | 12 ++++++++++++ security/apparmor/label.c | 6 +++--- security/apparmor/mount.c | 8 ++++---- security/apparmor/net.c | 2 +- security/apparmor/policy_unpack.c | 19 ++++++++++++------- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index a2d12b80592b..f2b78108bae8 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -634,7 +634,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, state = aa_dfa_match_len(dfa, profile->policy.start[0], match_str, match_len); if (state) - tmp = *aa_lookup_perms(profile->policy.perms, state); + tmp = *aa_lookup_perms(&profile->policy, state); } aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum_raw(perms, &tmp); diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 1f3e7680e809..1014a7bbc027 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -132,14 +132,6 @@ extern struct aa_perms allperms; extern struct aa_perms default_perms; -static inline struct aa_perms *aa_lookup_perms(struct aa_perms *perms, - unsigned int state) -{ - if (!(perms)) - return &default_perms; - - return &(perms[state]); -} void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 44d8cbb1c368..31c0af876250 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -90,6 +90,18 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy) } +static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, + unsigned int state) +{ + unsigned int index = ACCEPT_TABLE(policy->dfa)[state]; + + if (!(policy->perms)) + return &default_perms; + + return &(policy->perms[index]); +} + + /* struct aa_data - generic data structure * key: name for retrieving this data * size: size of data in bytes diff --git a/security/apparmor/label.c b/security/apparmor/label.c index ddb04417bdab..30cb68641c0f 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1328,7 +1328,7 @@ next: if (!state) goto fail; } - *perms = *aa_lookup_perms(profile->policy.perms, state); + *perms = *aa_lookup_perms(&profile->policy, state); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -1379,7 +1379,7 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = *aa_lookup_perms(profile->policy.perms, state); + tmp = *aa_lookup_perms(&profile->policy, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -1388,7 +1388,7 @@ next: state = match_component(profile, tp, start); if (!state) goto fail; - tmp = *aa_lookup_perms(profile->policy.perms, state); + tmp = *aa_lookup_perms(&profile->policy, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 1e978c2b1ee4..7594f3a3441e 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -249,7 +249,7 @@ static int do_match_mnt(struct aa_policydb *policy, unsigned int start, state = match_mnt_flags(policy->dfa, state, flags); if (!state) return 4; - *perms = *aa_lookup_perms(policy->perms, state); + *perms = *aa_lookup_perms(policy, state); if (perms->allow & AA_MAY_MOUNT) return 0; @@ -262,7 +262,7 @@ static int do_match_mnt(struct aa_policydb *policy, unsigned int start, state = aa_dfa_match(policy->dfa, state, data); if (!state) return 5; - *perms = *aa_lookup_perms(policy->perms, state); + *perms = *aa_lookup_perms(policy, state); if (perms->allow & AA_MAY_MOUNT) return 0; } @@ -584,7 +584,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, state = aa_dfa_match(profile->policy.dfa, profile->policy.start[AA_CLASS_MOUNT], name); - perms = *aa_lookup_perms(profile->policy.perms, state); + perms = *aa_lookup_perms(&profile->policy, state); if (AA_MAY_UMOUNT & ~perms.allow) error = -EACCES; @@ -655,7 +655,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, new_name); state = aa_dfa_null_transition(profile->policy.dfa, state); state = aa_dfa_match(profile->policy.dfa, state, old_name); - perms = *aa_lookup_perms(profile->policy.perms, state); + perms = *aa_lookup_perms(&profile->policy, state); if (AA_MAY_PIVOTROOT & perms.allow) error = 0; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 88e8a7ea54c0..fcfb97079e1b 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -125,7 +125,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, buffer[1] = cpu_to_be16((u16) type); state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer, 4); - perms = *aa_lookup_perms(profile->policy.perms, state); + perms = *aa_lookup_perms(&profile->policy, state); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_net_cb); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 4cdc96988783..0917412ba48f 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1055,13 +1055,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; + profile->policy.perms = compute_perms(profile->policy.dfa); + if (!profile->policy.perms) { + info = "failed to remap policydb permission table"; + goto fail; + } + /* Do not remap internal dfas */ + remap_dfa_accept(profile->policy.dfa, 1); } else profile->policy.dfa = aa_get_dfa(nulldfa); - profile->policy.perms = compute_perms(profile->policy.dfa); - if (!profile->policy.perms) { - info = "failed to remap policydb permission table"; - goto fail; - } /* get file rules */ profile->file.dfa = unpack_dfa(e); @@ -1238,9 +1240,12 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) */ static int verify_profile(struct aa_profile *profile) { - if (profile->file.dfa && + if ((profile->file.dfa && !verify_dfa_xindex(profile->file.dfa, - profile->file.trans.size)) { + profile->file.trans.size)) || + (profile->policy.dfa && + !verify_dfa_xindex(profile->policy.dfa, + profile->policy.trans.size))) { audit_iface(profile, NULL, NULL, "Unpack: Invalid named transition", NULL, -EPROTO); return -EPROTO; From 33fc95d8293cfca352ac875668857293e22d7d51 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 17 Jan 2022 13:43:49 -0800 Subject: [PATCH 0019/4122] apparmor: preparse for state being more than just an integer Convert from an unsigned int to a state_t for state position. This is a step in prepping for the state position carrying some additional flags, and a limited form of backtracking to support variables. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/domain.c | 25 ++++++------- security/apparmor/file.c | 12 +++---- security/apparmor/include/file.h | 8 ++--- security/apparmor/include/label.h | 6 ++-- security/apparmor/include/lib.h | 4 +-- security/apparmor/include/match.h | 28 +++++++-------- security/apparmor/include/policy.h | 14 ++++---- security/apparmor/ipc.c | 2 +- security/apparmor/label.c | 14 ++++---- security/apparmor/lib.c | 2 +- security/apparmor/match.c | 58 +++++++++++++++--------------- security/apparmor/mount.c | 10 +++--- security/apparmor/net.c | 2 +- security/apparmor/policy_unpack.c | 16 ++++----- 15 files changed, 101 insertions(+), 102 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index f2b78108bae8..fb9d2ccb34d6 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -613,7 +613,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, { struct aa_perms tmp = { }; struct aa_dfa *dfa; - unsigned int state = 0; + aa_state_t state = DFA_NOMATCH; if (profile_unconfined(profile)) return; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 45a8887021f1..5883f0fc02d3 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -95,9 +95,9 @@ out: * If a subns profile is not to be matched should be prescreened with * visibility test. */ -static inline unsigned int match_component(struct aa_profile *profile, - struct aa_profile *tp, - bool stack, unsigned int state) +static inline aa_state_t match_component(struct aa_profile *profile, + struct aa_profile *tp, + bool stack, aa_state_t state) { const char *ns_name; @@ -132,7 +132,7 @@ static inline unsigned int match_component(struct aa_profile *profile, */ static int label_compound_match(struct aa_profile *profile, struct aa_label *label, bool stack, - unsigned int state, bool subns, u32 request, + aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { struct aa_profile *tp; @@ -192,14 +192,14 @@ fail: */ static int label_components_match(struct aa_profile *profile, struct aa_label *label, bool stack, - unsigned int start, bool subns, u32 request, + aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { struct aa_profile *tp; struct label_it i; struct aa_perms tmp; struct path_cond cond = { }; - unsigned int state = 0; + aa_state_t state = 0; /* find first subcomponent to test */ label_for_each(i, label, tp) { @@ -252,7 +252,7 @@ fail: * Returns: the state the match finished in, may be the none matching state */ static int label_match(struct aa_profile *profile, struct aa_label *label, - bool stack, unsigned int state, bool subns, u32 request, + bool stack, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { int error; @@ -286,7 +286,7 @@ static int label_match(struct aa_profile *profile, struct aa_label *label, */ static int change_profile_perms(struct aa_profile *profile, struct aa_label *target, bool stack, - u32 request, unsigned int start, + u32 request, aa_state_t start, struct aa_perms *perms) { if (profile_unconfined(profile)) { @@ -308,7 +308,7 @@ static int change_profile_perms(struct aa_profile *profile, * Returns: number of extended attributes that matched, or < 0 on error */ static int aa_xattrs_match(const struct linux_binprm *bprm, - struct aa_profile *profile, unsigned int state) + struct aa_profile *profile, aa_state_t state) { int i; ssize_t size; @@ -416,7 +416,8 @@ restart: * match. */ if (profile->xmatch.dfa) { - unsigned int state, count; + unsigned int count; + aa_state_t state; u32 index, perm; state = aa_dfa_leftmatch(profile->xmatch.dfa, @@ -631,7 +632,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile, { struct aa_label *new = NULL; const char *info = NULL, *name = NULL, *target = NULL; - unsigned int state = profile->file.start[AA_CLASS_FILE]; + aa_state_t state = profile->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; bool nonewprivs = false; int error = 0; @@ -727,7 +728,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, char *buffer, struct path_cond *cond, bool *secure_exec) { - unsigned int state = profile->file.start[AA_CLASS_FILE]; + aa_state_t state = profile->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; int error = -EACCES; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 7bddec3df75f..636efcade3f5 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -186,7 +186,7 @@ static int path_name(const char *op, struct aa_label *label, */ struct aa_perms default_perms = {}; struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - unsigned int state, struct path_cond *cond) + aa_state_t state, struct path_cond *cond) { unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state]; @@ -209,11 +209,11 @@ struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, * * Returns: the final state in @dfa when beginning @start and walking @name */ -unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start, - const char *name, struct path_cond *cond, - struct aa_perms *perms) +aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, + const char *name, struct path_cond *cond, + struct aa_perms *perms) { - unsigned int state; + aa_state_t state; state = aa_dfa_match(file_rules->dfa, start, name); *perms = *(aa_lookup_fperms(file_rules, state, cond)); @@ -320,7 +320,7 @@ static int profile_path_link(struct aa_profile *profile, struct aa_perms lperms = {}, perms; const char *info = NULL; u32 request = AA_MAY_LINK; - unsigned int state; + aa_state_t state; int error; error = path_name(OP_LINK, &profile->label, link, profile->path_flags, diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 736b8f655404..8c82cf279dc2 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -166,10 +166,10 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, const char *info, int error); struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - unsigned int state, struct path_cond *cond); -unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start, - const char *name, struct path_cond *cond, - struct aa_perms *perms); + aa_state_t state, struct path_cond *cond); +aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, + const char *name, struct path_cond *cond, + struct aa_perms *perms); int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, u32 request, struct path_cond *cond, diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 860484c6f99a..1130ba10a152 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -333,7 +333,7 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str, static inline const char *aa_label_strn_split(const char *str, int n) { const char *pos; - unsigned int state; + aa_state_t state; state = aa_dfa_matchn_until(stacksplitdfa, DFA_START, str, n, &pos); if (!ACCEPT_TABLE(stacksplitdfa)[state]) @@ -345,7 +345,7 @@ static inline const char *aa_label_strn_split(const char *str, int n) static inline const char *aa_label_str_split(const char *str) { const char *pos; - unsigned int state; + aa_state_t state; state = aa_dfa_match_until(stacksplitdfa, DFA_START, str, &pos); if (!ACCEPT_TABLE(stacksplitdfa)[state]) @@ -358,7 +358,7 @@ static inline const char *aa_label_str_split(const char *str) struct aa_perms; int aa_label_match(struct aa_profile *profile, struct aa_label *label, - unsigned int state, bool subns, u32 request, + aa_state_t state, bool subns, u32 request, struct aa_perms *perms); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f42359f58eb5..f176f3ced2a3 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -87,8 +87,8 @@ static inline bool aa_strneq(const char *str, const char *sub, int len) * character which is not used in standard matching and is only * used to separate pairs. */ -static inline unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, - unsigned int start) +static inline aa_state_t aa_dfa_null_transition(struct aa_dfa *dfa, + aa_state_t start) { /* the null transition only needs the string's null terminator byte */ return aa_dfa_next(dfa, start, 0); diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 884489590588..58fbf67139b9 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -125,19 +125,19 @@ static inline size_t table_size(size_t len, size_t el_size) int aa_setup_dfa_engine(void); void aa_teardown_dfa_engine(void); +#define aa_state_t unsigned int + struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags); -unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start, - const char *str, int len); -unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start, - const char *str); -unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state, - const char c); -unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa, - unsigned int state); -unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start, - const char *str, const char **retpos); -unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start, - const char *str, int n, const char **retpos); +aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start, + const char *str, int len); +aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, + const char *str); +aa_state_t aa_dfa_next(struct aa_dfa *dfa, aa_state_t state, const char c); +aa_state_t aa_dfa_outofband_transition(struct aa_dfa *dfa, aa_state_t state); +aa_state_t aa_dfa_match_until(struct aa_dfa *dfa, aa_state_t start, + const char *str, const char **retpos); +aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, + const char *str, int n, const char **retpos); void aa_dfa_free_kref(struct kref *kref); @@ -156,8 +156,8 @@ struct match_workbuf N = { \ .len = 0, \ } -unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start, - const char *str, unsigned int *count); +aa_state_t aa_dfa_leftmatch(struct aa_dfa *dfa, aa_state_t start, + const char *str, unsigned int *count); /** * aa_get_dfa - increment refcount on dfa @p diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 31c0af876250..3a7d165e8fcc 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -78,7 +78,7 @@ struct aa_policydb { struct aa_dfa *dfa; struct aa_perms *perms; struct aa_domain trans; - unsigned int start[AA_CLASS_LAST + 1]; + aa_state_t start[AA_CLASS_LAST + 1]; }; static inline void aa_destroy_policydb(struct aa_policydb *policy) @@ -91,7 +91,7 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy) } static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, - unsigned int state) + aa_state_t state) { unsigned int index = ACCEPT_TABLE(policy->dfa)[state]; @@ -239,7 +239,7 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p) return labels_profile(aa_get_newest_label(&p->label)); } -static inline unsigned int PROFILE_MEDIATES(struct aa_profile *profile, +static inline aa_state_t PROFILE_MEDIATES(struct aa_profile *profile, unsigned char class) { if (class <= AA_CLASS_LAST) @@ -249,13 +249,13 @@ static inline unsigned int PROFILE_MEDIATES(struct aa_profile *profile, profile->policy.start[0], &class, 1); } -static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile, - u16 AF) { - unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET); +static inline aa_state_t PROFILE_MEDIATES_AF(struct aa_profile *profile, + u16 AF) { + aa_state_t state = PROFILE_MEDIATES(profile, AA_CLASS_NET); __be16 be_af = cpu_to_be16(AF); if (!state) - return 0; + return DFA_NOMATCH; return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2); } diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 3dbbc59d440d..7255a9d52372 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -79,7 +79,7 @@ static int profile_signal_perm(struct aa_profile *profile, struct common_audit_data *sa) { struct aa_perms perms; - unsigned int state; + aa_state_t state; if (profile_unconfined(profile) || !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL)) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 30cb68641c0f..3a967003fa7c 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1265,9 +1265,9 @@ static inline bool label_is_visible(struct aa_profile *profile, * If a subns profile is not to be matched should be prescreened with * visibility test. */ -static inline unsigned int match_component(struct aa_profile *profile, - struct aa_profile *tp, - unsigned int state) +static inline aa_state_t match_component(struct aa_profile *profile, + struct aa_profile *tp, + aa_state_t state) { const char *ns_name; @@ -1299,7 +1299,7 @@ static inline unsigned int match_component(struct aa_profile *profile, */ static int label_compound_match(struct aa_profile *profile, struct aa_label *label, - unsigned int state, bool subns, u32 request, + aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { struct aa_profile *tp; @@ -1356,14 +1356,14 @@ fail: * check to be stacked. */ static int label_components_match(struct aa_profile *profile, - struct aa_label *label, unsigned int start, + struct aa_label *label, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { struct aa_profile *tp; struct label_it i; struct aa_perms tmp; - unsigned int state = 0; + aa_state_t state = 0; /* find first subcomponent to test */ label_for_each(i, label, tp) { @@ -1415,7 +1415,7 @@ fail: * Returns: the state the match finished in, may be the none matching state */ int aa_label_match(struct aa_profile *profile, struct aa_label *label, - unsigned int state, bool subns, u32 request, + aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { int error = label_compound_match(profile, label, state, subns, request, diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 974a217218a6..60deb4dc30c7 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -357,7 +357,7 @@ void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, int type, u32 request, struct aa_perms *perms) { /* TODO: doesn't yet handle extended types */ - unsigned int state; + aa_state_t state; state = aa_dfa_next(profile->policy.dfa, profile->policy.start[AA_CLASS_LABEL], diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 3e9e1eaf990e..5095c26ca683 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -436,17 +436,17 @@ do { \ * * Returns: final state reached after input is consumed */ -unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start, - const char *str, int len) +aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start, + const char *str, int len) { u16 *def = DEFAULT_TABLE(dfa); u32 *base = BASE_TABLE(dfa); u16 *next = NEXT_TABLE(dfa); u16 *check = CHECK_TABLE(dfa); - unsigned int state = start; + aa_state_t state = start; - if (state == 0) - return 0; + if (state == DFA_NOMATCH) + return DFA_NOMATCH; /* current state is , matching character *str */ if (dfa->tables[YYTD_ID_EC]) { @@ -476,17 +476,16 @@ unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start, * * Returns: final state reached after input is consumed */ -unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start, - const char *str) +aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, const char *str) { u16 *def = DEFAULT_TABLE(dfa); u32 *base = BASE_TABLE(dfa); u16 *next = NEXT_TABLE(dfa); u16 *check = CHECK_TABLE(dfa); - unsigned int state = start; + aa_state_t state = start; - if (state == 0) - return 0; + if (state == DFA_NOMATCH) + return DFA_NOMATCH; /* current state is , matching character *str */ if (dfa->tables[YYTD_ID_EC]) { @@ -515,8 +514,7 @@ unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start, * * Returns: state reach after input @c */ -unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state, - const char c) +aa_state_t aa_dfa_next(struct aa_dfa *dfa, aa_state_t state, const char c) { u16 *def = DEFAULT_TABLE(dfa); u32 *base = BASE_TABLE(dfa); @@ -534,7 +532,7 @@ unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state, return state; } -unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa, unsigned int state) +aa_state_t aa_dfa_outofband_transition(struct aa_dfa *dfa, aa_state_t state) { u16 *def = DEFAULT_TABLE(dfa); u32 *base = BASE_TABLE(dfa); @@ -564,7 +562,7 @@ unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa, unsigned int state) * * Returns: final state reached after input is consumed */ -unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start, +aa_state_t aa_dfa_match_until(struct aa_dfa *dfa, aa_state_t start, const char *str, const char **retpos) { u16 *def = DEFAULT_TABLE(dfa); @@ -572,10 +570,10 @@ unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start, u16 *next = NEXT_TABLE(dfa); u16 *check = CHECK_TABLE(dfa); u32 *accept = ACCEPT_TABLE(dfa); - unsigned int state = start, pos; + aa_state_t state = start, pos; - if (state == 0) - return 0; + if (state == DFA_NOMATCH) + return DFA_NOMATCH; /* current state is , matching character *str */ if (dfa->tables[YYTD_ID_EC]) { @@ -625,7 +623,7 @@ unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start, * * Returns: final state reached after input is consumed */ -unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start, +aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, const char *str, int n, const char **retpos) { u16 *def = DEFAULT_TABLE(dfa); @@ -633,11 +631,11 @@ unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start, u16 *next = NEXT_TABLE(dfa); u16 *check = CHECK_TABLE(dfa); u32 *accept = ACCEPT_TABLE(dfa); - unsigned int state = start, pos; + aa_state_t state = start, pos; *retpos = NULL; - if (state == 0) - return 0; + if (state == DFA_NOMATCH) + return DFA_NOMATCH; /* current state is , matching character *str */ if (dfa->tables[YYTD_ID_EC]) { @@ -677,11 +675,11 @@ do { \ } while (0) /* For DFAs that don't support extended tagging of states */ -static bool is_loop(struct match_workbuf *wb, unsigned int state, +static bool is_loop(struct match_workbuf *wb, aa_state_t state, unsigned int *adjust) { - unsigned int pos = wb->pos; - unsigned int i; + aa_state_t pos = wb->pos; + aa_state_t i; if (wb->history[pos] < state) return false; @@ -700,7 +698,7 @@ static bool is_loop(struct match_workbuf *wb, unsigned int state, return true; } -static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start, +static aa_state_t leftmatch_fb(struct aa_dfa *dfa, aa_state_t start, const char *str, struct match_workbuf *wb, unsigned int *count) { @@ -708,7 +706,7 @@ static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start, u32 *base = BASE_TABLE(dfa); u16 *next = NEXT_TABLE(dfa); u16 *check = CHECK_TABLE(dfa); - unsigned int state = start, pos; + aa_state_t state = start, pos; AA_BUG(!dfa); AA_BUG(!str); @@ -716,8 +714,8 @@ static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start, AA_BUG(!count); *count = 0; - if (state == 0) - return 0; + if (state == DFA_NOMATCH) + return DFA_NOMATCH; /* current state is , matching character *str */ if (dfa->tables[YYTD_ID_EC]) { @@ -781,8 +779,8 @@ out: * * Returns: final state reached after input is consumed */ -unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start, - const char *str, unsigned int *count) +aa_state_t aa_dfa_leftmatch(struct aa_dfa *dfa, aa_state_t start, + const char *str, unsigned int *count) { DEFINE_MATCH_WB(wb); diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 7594f3a3441e..84aaf25e5dee 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -190,7 +190,7 @@ static int audit_mount(struct aa_profile *profile, const char *op, * * Returns: next state after flags match */ -static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state, +static aa_state_t match_mnt_flags(struct aa_dfa *dfa, aa_state_t state, unsigned long flags) { unsigned int i; @@ -217,12 +217,12 @@ static const char * const mnt_info_table[] = { * Returns 0 on success else element that match failed in, this is the * index into the mnt_info_table above */ -static int do_match_mnt(struct aa_policydb *policy, unsigned int start, +static int do_match_mnt(struct aa_policydb *policy, aa_state_t start, const char *mntpnt, const char *devname, const char *type, unsigned long flags, void *data, bool binary, struct aa_perms *perms) { - unsigned int state; + aa_state_t state; AA_BUG(!policy); AA_BUG(!policy->dfa); @@ -567,7 +567,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, { struct aa_perms perms = { }; const char *name = NULL, *info = NULL; - unsigned int state; + aa_state_t state; int error; AA_BUG(!profile); @@ -627,7 +627,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, const char *old_name, *new_name = NULL, *info = NULL; const char *trans_name = NULL; struct aa_perms perms = { }; - unsigned int state; + aa_state_t state; int error; AA_BUG(!profile); diff --git a/security/apparmor/net.c b/security/apparmor/net.c index fcfb97079e1b..d420d3aec3b8 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -109,7 +109,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, u32 request, u16 family, int type) { struct aa_perms perms = { }; - unsigned int state; + aa_state_t state; __be16 buffer[2]; AA_BUG(family >= AF_MAX); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 0917412ba48f..3ea591d31be7 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -698,7 +698,7 @@ static u32 map_old_perms(u32 old) } static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa, - unsigned int state) + aa_state_t state) { perms->allow |= AA_MAY_GETATTR; @@ -710,7 +710,7 @@ static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa, } static struct aa_perms compute_fperms_user(struct aa_dfa *dfa, - unsigned int state) + aa_state_t state) { struct aa_perms perms = { }; @@ -725,7 +725,7 @@ static struct aa_perms compute_fperms_user(struct aa_dfa *dfa, } static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, - unsigned int state) + aa_state_t state) { struct aa_perms perms = { }; @@ -748,8 +748,8 @@ static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, */ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) { - int state; - int state_count; + aa_state_t state; + unsigned int state_count; struct aa_perms *table; AA_BUG(!dfa); @@ -796,7 +796,7 @@ static u32 map_other(u32 x) } static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, - unsigned int state) + aa_state_t state) { struct aa_perms perms = { }; @@ -817,8 +817,8 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, static struct aa_perms *compute_perms(struct aa_dfa *dfa) { - int state; - int state_count; + unsigned int state; + unsigned int state_count; struct aa_perms *table; AA_BUG(!dfa); From 1b5a6198f5a9d0aa5497da0dc4bcd4fc166ee516 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 6 May 2022 18:57:12 -0700 Subject: [PATCH 0020/4122] apparmor: Fix abi check to include v8 abi The v8 abi is supported by the kernel but the userspace supported version check does not allow for it. This was missed when v8 was added due to a bug in the userspace compiler which was setting an older abi version for v8 encoding (which is forward compatible except on the network encoding). However it is possible to detect the network encoding by checking the policydb network support which the code does. The end result was that missing the abi flag worked until userspace was fixed and began correctly checking for the v8 abi version. Fixes: 56974a6fcfef ("apparmor: add base infastructure for socket mediation") Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 3ea591d31be7..0203e43460b6 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1183,7 +1183,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) * if not specified use previous version * Mask off everything that is not kernel abi version */ - if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) { + if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) { audit_iface(NULL, NULL, NULL, "unsupported interface version", e, error); return error; From 1cf26c3d2c4c2098e39a9905174d7842b531e693 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 7 May 2022 01:58:36 -0700 Subject: [PATCH 0021/4122] apparmor: fix apparmor mediating locking non-fs unix sockets the v8 and earlier policy does not encode the locking permission for no-fs unix sockets. However the kernel is enforcing mediation. Add the AA_MAY_LOCK perm to v8 and earlier computed perm mask which will grant permission for all current abi profiles, but still allow specifying auditing of the operation if needed. Link: http://bugs.launchpad.net/bugs/1780227 Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 0203e43460b6..2406c5c4caaf 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -31,6 +31,7 @@ #define K_ABI_MASK 0x3ff #define FORCE_COMPLAIN_FLAG 0x800 #define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK)) +#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK)) #define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK)) #define v5 5 /* base version */ @@ -796,7 +797,8 @@ static u32 map_other(u32 x) } static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, - aa_state_t state) + aa_state_t state, + u32 version) { struct aa_perms perms = { }; @@ -809,13 +811,15 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, */ perms.allow |= map_other(dfa_other_allow(dfa, state)); + if (VERSION_LE(version, v8)) + perms.allow |= AA_MAY_LOCK; perms.audit |= map_other(dfa_other_audit(dfa, state)); perms.quiet |= map_other(dfa_other_quiet(dfa, state)); return perms; } -static struct aa_perms *compute_perms(struct aa_dfa *dfa) +static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version) { unsigned int state; unsigned int state_count; @@ -831,7 +835,7 @@ static struct aa_perms *compute_perms(struct aa_dfa *dfa) /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) - table[state] = compute_perms_entry(dfa, state); + table[state] = compute_perms_entry(dfa, state, version); return table; } @@ -1055,7 +1059,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - profile->policy.perms = compute_perms(profile->policy.dfa); + profile->policy.perms = compute_perms(profile->policy.dfa, + e->version); if (!profile->policy.perms) { info = "failed to remap policydb permission table"; goto fail; From 3c076531c5529c94cee330dffc4615ad02bb6edb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 10 May 2022 02:21:22 -0700 Subject: [PATCH 0022/4122] apparmor: extend policydb permission set by making use of the xbits The policydb permission set has left the xbits unused. Make them available for mediation. Note: that this does not bring full auditing control of the permissions as there are not enough bits. The quieting of denials is provided as that is used more than forced auditing of allowed permissions. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 2406c5c4caaf..e91883116663 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -38,6 +38,7 @@ #define v6 6 /* per entry policydb mediation check */ #define v7 7 #define v8 8 /* full network masking */ +#define v9 9 /* xbits are used as permission bits in policydb */ /* * The AppArmor interface treats data as a type byte followed by the @@ -796,6 +797,12 @@ static u32 map_other(u32 x) ((x & 0x60) << 19); /* SETOPT/GETOPT */ } +static u32 map_xbits(u32 x) +{ + return ((x & 0x1) << 7) | + ((x & 0x7e) << 9); +} + static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, aa_state_t state, u32 version) @@ -806,15 +813,31 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, perms.audit = dfa_user_audit(dfa, state); perms.quiet = dfa_user_quiet(dfa, state); - /* for v5 perm mapping in the policydb, the other set is used - * to extend the general perm set + /* + * This mapping is convulated due to history. + * v1-v4: only file perms, which are handled by compute_fperms + * v5: added policydb which dropped user conditional to gain new + * perm bits, but had to map around the xbits because the + * userspace compiler was still munging them. + * v9: adds using the xbits in policydb because the compiler now + * supports treating policydb permission bits different. + * Unfortunately there is no way to force auditing on the + * perms represented by the xbits */ - perms.allow |= map_other(dfa_other_allow(dfa, state)); if (VERSION_LE(version, v8)) perms.allow |= AA_MAY_LOCK; + else + perms.allow |= map_xbits(dfa_user_xbits(dfa, state)); + + /* + * for v5-v9 perm mapping in the policydb, the other set is used + * to extend the general perm set + */ perms.audit |= map_other(dfa_other_audit(dfa, state)); perms.quiet |= map_other(dfa_other_quiet(dfa, state)); + if (VERSION_GT(version, v8)) + perms.quiet |= map_xbits(dfa_other_xbits(dfa, state)); return perms; } @@ -1188,7 +1211,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) * if not specified use previous version * Mask off everything that is not kernel abi version */ - if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) { + if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v9)) { audit_iface(NULL, NULL, NULL, "unsupported interface version", e, error); return error; From b06a62ebf5a3f041b22def1608f1a8ab9bbfa951 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 16 May 2022 04:37:08 -0700 Subject: [PATCH 0023/4122] apparmor: move dfa perm macros into policy_unpack Now that the permission remapping macros aren't needed anywhere except during profile unpack, move them. Signed-off-by: John Johansen --- security/apparmor/include/file.h | 51 ------------------------------- security/apparmor/policy_unpack.c | 49 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 8c82cf279dc2..4212426020cb 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -109,57 +109,6 @@ struct path_cond { #define COMBINED_PERM_MASK(X) ((X).allow | (X).audit | (X).quiet | (X).kill) -/* FIXME: split perms from dfa and match this to description - * also add delegation info. - */ -static inline u16 dfa_map_xindex(u16 mask) -{ - u16 old_index = (mask >> 10) & 0xf; - u16 index = 0; - - if (mask & 0x100) - index |= AA_X_UNSAFE; - if (mask & 0x200) - index |= AA_X_INHERIT; - if (mask & 0x80) - index |= AA_X_UNCONFINED; - - if (old_index == 1) { - index |= AA_X_UNCONFINED; - } else if (old_index == 2) { - index |= AA_X_NAME; - } else if (old_index == 3) { - index |= AA_X_NAME | AA_X_CHILD; - } else if (old_index) { - index |= AA_X_TABLE; - index |= old_index - 4; - } - - return index; -} - -/* - * map old dfa inline permissions to new format - */ -#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \ - ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) -#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f) -#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f) -#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f) -#define dfa_user_xindex(dfa, state) \ - (dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff)) - -#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \ - 0x7f) | \ - ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) -#define dfa_other_xbits(dfa, state) \ - ((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f) -#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f) -#define dfa_other_quiet(dfa, state) \ - ((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f) -#define dfa_other_xindex(dfa, state) \ - dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff) - int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, const char *op, u32 request, const char *name, const char *target, struct aa_label *tlabel, kuid_t ouid, diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index e91883116663..32cca5f27b8f 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -671,6 +671,55 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) return strcmp(data->key, *key); } +/* remap old accept table embedded permissions to separate permission table */ +static u16 dfa_map_xindex(u16 mask) +{ + u16 old_index = (mask >> 10) & 0xf; + u16 index = 0; + + if (mask & 0x100) + index |= AA_X_UNSAFE; + if (mask & 0x200) + index |= AA_X_INHERIT; + if (mask & 0x80) + index |= AA_X_UNCONFINED; + + if (old_index == 1) { + index |= AA_X_UNCONFINED; + } else if (old_index == 2) { + index |= AA_X_NAME; + } else if (old_index == 3) { + index |= AA_X_NAME | AA_X_CHILD; + } else if (old_index) { + index |= AA_X_TABLE; + index |= old_index - 4; + } + + return index; +} + +/* + * map old dfa inline permissions to new format + */ +#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \ + ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) +#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f) +#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f) +#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f) +#define dfa_user_xindex(dfa, state) \ + (dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff)) + +#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \ + 0x7f) | \ + ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) +#define dfa_other_xbits(dfa, state) \ + ((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f) +#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f) +#define dfa_other_quiet(dfa, state) \ + ((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f) +#define dfa_other_xindex(dfa, state) \ + dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff) + /** * map_old_perms - map old file perms layout to the new layout * @old: permission set in old mapping From ae6d35ed0a481824a8730c39d5b319c8a76ea00e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 16 Jul 2022 03:29:19 -0700 Subject: [PATCH 0024/4122] apparmor: extend xindex size Allow the xindex to have 2^24 entries. Signed-off-by: John Johansen --- security/apparmor/include/file.h | 19 +++++++++---------- security/apparmor/include/perms.h | 2 +- security/apparmor/policy_unpack.c | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 4212426020cb..521c8568f6d4 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -88,18 +88,17 @@ static inline struct aa_label *aa_get_file_label(struct aa_file_ctx *ctx) * - exec type - which determines how the executable name and index are used * - flags - which modify how the destination name is applied */ -#define AA_X_INDEX_MASK 0x03ff +#define AA_X_INDEX_MASK 0x00ffffff -#define AA_X_TYPE_MASK 0x0c00 -#define AA_X_TYPE_SHIFT 10 -#define AA_X_NONE 0x0000 -#define AA_X_NAME 0x0400 /* use executable name px */ -#define AA_X_TABLE 0x0800 /* use a specified name ->n# */ +#define AA_X_TYPE_MASK 0x0c000000 +#define AA_X_NONE 0x00000000 +#define AA_X_NAME 0x04000000 /* use executable name px */ +#define AA_X_TABLE 0x08000000 /* use a specified name ->n# */ -#define AA_X_UNSAFE 0x1000 -#define AA_X_CHILD 0x2000 /* make >AA_X_NONE apply to children */ -#define AA_X_INHERIT 0x4000 -#define AA_X_UNCONFINED 0x8000 +#define AA_X_UNSAFE 0x10000000 +#define AA_X_CHILD 0x20000000 +#define AA_X_INHERIT 0x40000000 +#define AA_X_UNCONFINED 0x80000000 /* need to make conditional which ones are being set */ struct path_cond { diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 1014a7bbc027..8739cef73549 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -78,7 +78,7 @@ struct aa_perms { u32 quiet; /* set only when ~allow | deny */ u32 hide; /* set only when ~allow | deny */ - u16 xindex; + u32 xindex; }; #define ALL_PERMS_MASK 0xffffffff diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 32cca5f27b8f..c578d9af785e 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -489,8 +489,8 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) int i, size; size = unpack_array(e, NULL); - /* currently 4 exec bits and entries 0-3 are reserved iupcx */ - if (size > 16 - 4) + /* currently 2^24 bits entries 0-3 */ + if (size > (1 << 24)) goto fail; profile->file.trans.table = kcalloc(size, sizeof(char *), GFP_KERNEL); @@ -672,10 +672,10 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) } /* remap old accept table embedded permissions to separate permission table */ -static u16 dfa_map_xindex(u16 mask) +static u32 dfa_map_xindex(u16 mask) { u16 old_index = (mask >> 10) & 0xf; - u16 index = 0; + u32 index = 0; if (mask & 0x100) index |= AA_X_UNSAFE; From caa9f579ca7255e9d6c25f072447d895c5928c97 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 21 Aug 2022 22:48:32 -0700 Subject: [PATCH 0025/4122] apparmor: isolate policy backwards compatibility to its own file The details of mapping old policy into newer policy formats clutters up the unpack code and makes it possible to accidentally use old mappings in code, so isolate the mapping code into its own file. This will become more important when the dfa remapping code lands, as it will greatly expand the compat code base. Signed-off-by: John Johansen --- security/apparmor/Makefile | 3 +- security/apparmor/include/policy_compat.h | 33 +++ security/apparmor/include/policy_unpack.h | 1 + security/apparmor/policy_compat.c | 319 ++++++++++++++++++++++ security/apparmor/policy_unpack.c | 290 +------------------- 5 files changed, 359 insertions(+), 287 deletions(-) create mode 100644 security/apparmor/include/policy_compat.h create mode 100644 security/apparmor/policy_compat.c diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index ff23fcfefe19..4377123c2b98 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ - resource.o secid.o file.o policy_ns.o label.o mount.o net.o + resource.o secid.o file.o policy_ns.o label.o mount.o net.o \ + policy_compat.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o clean-files := capability_names.h rlim_names.h net_names.h diff --git a/security/apparmor/include/policy_compat.h b/security/apparmor/include/policy_compat.h new file mode 100644 index 000000000000..af0e174332df --- /dev/null +++ b/security/apparmor/include/policy_compat.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AppArmor security module + * + * Code to provide backwards compatibility with older policy versions, + * by converting/mapping older policy formats into the newer internal + * formats. + * + * Copyright 2022 Canonical Ltd. + */ + +#ifndef __POLICY_COMPAT_H +#define __POLICY_COMPAT_H + +#include "policy.h" + +#define K_ABI_MASK 0x3ff +#define FORCE_COMPLAIN_FLAG 0x800 +#define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK)) +#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK)) +#define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK)) + +#define v5 5 /* base version */ +#define v6 6 /* per entry policydb mediation check */ +#define v7 7 +#define v8 8 /* full network masking */ +#define v9 9 /* xbits are used as permission bits in policydb */ + +int aa_compat_map_xmatch(struct aa_policydb *policy); +int aa_compat_map_policy(struct aa_policydb *policy, u32 version); +int aa_compat_map_file(struct aa_policydb *policy); + +#endif /* __POLICY_COMPAT_H */ diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h index eb5f7d7f132b..cdfbc8a54a9d 100644 --- a/security/apparmor/include/policy_unpack.h +++ b/security/apparmor/include/policy_unpack.h @@ -16,6 +16,7 @@ #include #include + struct aa_load_ent { struct list_head list; struct aa_profile *new; diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c new file mode 100644 index 000000000000..1aa5cced935e --- /dev/null +++ b/security/apparmor/policy_compat.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AppArmor security module + * + * This file contains AppArmor functions for unpacking policy loaded + * from userspace. + * + * Copyright (C) 1998-2008 Novell/SUSE + * Copyright 2009-2022 Canonical Ltd. + * + * Code to provide backwards compatibility with older policy versions, + * by converting/mapping older policy formats into the newer internal + * formats. + */ + +#include +#include + +#include "include/lib.h" +#include "include/policy_unpack.h" +#include "include/policy_compat.h" + +/* remap old accept table embedded permissions to separate permission table */ +static u32 dfa_map_xindex(u16 mask) +{ + u16 old_index = (mask >> 10) & 0xf; + u32 index = 0; + + if (mask & 0x100) + index |= AA_X_UNSAFE; + if (mask & 0x200) + index |= AA_X_INHERIT; + if (mask & 0x80) + index |= AA_X_UNCONFINED; + + if (old_index == 1) { + index |= AA_X_UNCONFINED; + } else if (old_index == 2) { + index |= AA_X_NAME; + } else if (old_index == 3) { + index |= AA_X_NAME | AA_X_CHILD; + } else if (old_index) { + index |= AA_X_TABLE; + index |= old_index - 4; + } + + return index; +} + +/* + * map old dfa inline permissions to new format + */ +#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \ + ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) +#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f) +#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f) +#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f) +#define dfa_user_xindex(dfa, state) \ + (dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff)) + +#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \ + 0x7f) | \ + ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) +#define dfa_other_xbits(dfa, state) \ + ((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f) +#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f) +#define dfa_other_quiet(dfa, state) \ + ((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f) +#define dfa_other_xindex(dfa, state) \ + dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff) + +/** + * map_old_perms - map old file perms layout to the new layout + * @old: permission set in old mapping + * + * Returns: new permission mapping + */ +static u32 map_old_perms(u32 old) +{ + u32 new = old & 0xf; + + if (old & MAY_READ) + new |= AA_MAY_GETATTR | AA_MAY_OPEN; + if (old & MAY_WRITE) + new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE | + AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN; + if (old & 0x10) + new |= AA_MAY_LINK; + /* the old mapping lock and link_subset flags where overlaid + * and use was determined by part of a pair that they were in + */ + if (old & 0x20) + new |= AA_MAY_LOCK | AA_LINK_SUBSET; + if (old & 0x40) /* AA_EXEC_MMAP */ + new |= AA_EXEC_MMAP; + + return new; +} + +static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa, + aa_state_t state) +{ + perms->allow |= AA_MAY_GETATTR; + + /* change_profile wasn't determined by ownership in old mapping */ + if (ACCEPT_TABLE(dfa)[state] & 0x80000000) + perms->allow |= AA_MAY_CHANGE_PROFILE; + if (ACCEPT_TABLE(dfa)[state] & 0x40000000) + perms->allow |= AA_MAY_ONEXEC; +} + +static struct aa_perms compute_fperms_user(struct aa_dfa *dfa, + aa_state_t state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_user_allow(dfa, state)); + perms.audit = map_old_perms(dfa_user_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); + perms.xindex = dfa_user_xindex(dfa, state); + + compute_fperms_allow(&perms, dfa, state); + + return perms; +} + +static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, + aa_state_t state) +{ + struct aa_perms perms = { }; + + perms.allow = map_old_perms(dfa_other_allow(dfa, state)); + perms.audit = map_old_perms(dfa_other_audit(dfa, state)); + perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); + perms.xindex = dfa_other_xindex(dfa, state); + + compute_fperms_allow(&perms, dfa, state); + + return perms; +} + +/** + * aa_compute_fperms - convert dfa compressed perms to internal perms and store + * them so they can be retrieved later. + * @dfa: a dfa using fperms to remap to internal permissions + * + * Returns: remapped perm table + */ +static struct aa_perms *compute_fperms(struct aa_dfa *dfa) +{ + aa_state_t state; + unsigned int state_count; + struct aa_perms *table; + + AA_BUG(!dfa); + + state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ + table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL); + if (!table) + return NULL; + + /* zero init so skip the trap state (state == 0) */ + for (state = 1; state < state_count; state++) { + table[state * 2] = compute_fperms_user(dfa, state); + table[state * 2 + 1] = compute_fperms_other(dfa, state); + } + + return table; +} + +static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) +{ + struct aa_perms *perms; + int state; + int state_count; + + AA_BUG(!xmatch); + + state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ + perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); + + /* zero init so skip the trap state (state == 0) */ + for (state = 1; state < state_count; state++) + perms[state].allow = dfa_user_allow(xmatch, state); + + return perms; +} + +static u32 map_other(u32 x) +{ + return ((x & 0x3) << 8) | /* SETATTR/GETATTR */ + ((x & 0x1c) << 18) | /* ACCEPT/BIND/LISTEN */ + ((x & 0x60) << 19); /* SETOPT/GETOPT */ +} + +static u32 map_xbits(u32 x) +{ + return ((x & 0x1) << 7) | + ((x & 0x7e) << 9); +} + +static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, + aa_state_t state, + u32 version) +{ + struct aa_perms perms = { }; + + perms.allow = dfa_user_allow(dfa, state); + perms.audit = dfa_user_audit(dfa, state); + perms.quiet = dfa_user_quiet(dfa, state); + + /* + * This mapping is convulated due to history. + * v1-v4: only file perms, which are handled by compute_fperms + * v5: added policydb which dropped user conditional to gain new + * perm bits, but had to map around the xbits because the + * userspace compiler was still munging them. + * v9: adds using the xbits in policydb because the compiler now + * supports treating policydb permission bits different. + * Unfortunately there is no way to force auditing on the + * perms represented by the xbits + */ + perms.allow |= map_other(dfa_other_allow(dfa, state)); + if (VERSION_LE(version, v8)) + perms.allow |= AA_MAY_LOCK; + else + perms.allow |= map_xbits(dfa_user_xbits(dfa, state)); + + /* + * for v5-v9 perm mapping in the policydb, the other set is used + * to extend the general perm set + */ + perms.audit |= map_other(dfa_other_audit(dfa, state)); + perms.quiet |= map_other(dfa_other_quiet(dfa, state)); + if (VERSION_GT(version, v8)) + perms.quiet |= map_xbits(dfa_other_xbits(dfa, state)); + + return perms; +} + +static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version) +{ + unsigned int state; + unsigned int state_count; + struct aa_perms *table; + + AA_BUG(!dfa); + + state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + /* DFAs are restricted from having a state_count of less than 2 */ + table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); + if (!table) + return NULL; + + /* zero init so skip the trap state (state == 0) */ + for (state = 1; state < state_count; state++) + table[state] = compute_perms_entry(dfa, state, version); + + return table; +} + +/** + * remap_dfa_accept - remap old dfa accept table to be an index + * @dfa: dfa to do the remapping on + * @factor: scaling factor for the index conversion. + * + * Used in conjunction with compute_Xperms, it converts old style perms + * that are encoded in the dfa accept tables to the new style where + * there is a permission table and the accept table is an index into + * the permission table. + */ +static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) +{ + unsigned int state; + unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; + + AA_BUG(!dfa); + + for (state = 0; state < state_count; state++) + ACCEPT_TABLE(dfa)[state] = state * factor; + kvfree(dfa->tables[YYTD_ID_ACCEPT2]); + dfa->tables[YYTD_ID_ACCEPT2] = NULL; +} + +/* TODO: merge different dfa mappings into single map_policy fn */ +int aa_compat_map_xmatch(struct aa_policydb *policy) +{ + policy->perms = compute_xmatch_perms(policy->dfa); + if (!policy->perms) + return -ENOMEM; + + remap_dfa_accept(policy->dfa, 1); + + return 0; +} + +int aa_compat_map_policy(struct aa_policydb *policy, u32 version) +{ + policy->perms = compute_perms(policy->dfa, version); + if (!policy->perms) + return -ENOMEM; + + remap_dfa_accept(policy->dfa, 1); + + return 0; +} + +int aa_compat_map_file(struct aa_policydb *policy) +{ + policy->perms = compute_fperms(policy->dfa); + if (!policy->perms) + return -ENOMEM; + + remap_dfa_accept(policy->dfa, 2); + + return 0; +} diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index c578d9af785e..63196df2841b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -27,18 +27,8 @@ #include "include/path.h" #include "include/policy.h" #include "include/policy_unpack.h" +#include "include/policy_compat.h" -#define K_ABI_MASK 0x3ff -#define FORCE_COMPLAIN_FLAG 0x800 -#define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK)) -#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK)) -#define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK)) - -#define v5 5 /* base version */ -#define v6 6 /* per entry policydb mediation check */ -#define v7 7 -#define v8 8 /* full network masking */ -#define v9 9 /* xbits are used as permission bits in policydb */ /* * The AppArmor interface treats data as a type byte followed by the @@ -671,270 +661,6 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) return strcmp(data->key, *key); } -/* remap old accept table embedded permissions to separate permission table */ -static u32 dfa_map_xindex(u16 mask) -{ - u16 old_index = (mask >> 10) & 0xf; - u32 index = 0; - - if (mask & 0x100) - index |= AA_X_UNSAFE; - if (mask & 0x200) - index |= AA_X_INHERIT; - if (mask & 0x80) - index |= AA_X_UNCONFINED; - - if (old_index == 1) { - index |= AA_X_UNCONFINED; - } else if (old_index == 2) { - index |= AA_X_NAME; - } else if (old_index == 3) { - index |= AA_X_NAME | AA_X_CHILD; - } else if (old_index) { - index |= AA_X_TABLE; - index |= old_index - 4; - } - - return index; -} - -/* - * map old dfa inline permissions to new format - */ -#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \ - ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) -#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f) -#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f) -#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f) -#define dfa_user_xindex(dfa, state) \ - (dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff)) - -#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \ - 0x7f) | \ - ((ACCEPT_TABLE(dfa)[state]) & 0x80000000)) -#define dfa_other_xbits(dfa, state) \ - ((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f) -#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f) -#define dfa_other_quiet(dfa, state) \ - ((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f) -#define dfa_other_xindex(dfa, state) \ - dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff) - -/** - * map_old_perms - map old file perms layout to the new layout - * @old: permission set in old mapping - * - * Returns: new permission mapping - */ -static u32 map_old_perms(u32 old) -{ - u32 new = old & 0xf; - - if (old & MAY_READ) - new |= AA_MAY_GETATTR | AA_MAY_OPEN; - if (old & MAY_WRITE) - new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE | - AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN; - if (old & 0x10) - new |= AA_MAY_LINK; - /* the old mapping lock and link_subset flags where overlaid - * and use was determined by part of a pair that they were in - */ - if (old & 0x20) - new |= AA_MAY_LOCK | AA_LINK_SUBSET; - if (old & 0x40) /* AA_EXEC_MMAP */ - new |= AA_EXEC_MMAP; - - return new; -} - -static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa, - aa_state_t state) -{ - perms->allow |= AA_MAY_GETATTR; - - /* change_profile wasn't determined by ownership in old mapping */ - if (ACCEPT_TABLE(dfa)[state] & 0x80000000) - perms->allow |= AA_MAY_CHANGE_PROFILE; - if (ACCEPT_TABLE(dfa)[state] & 0x40000000) - perms->allow |= AA_MAY_ONEXEC; -} - -static struct aa_perms compute_fperms_user(struct aa_dfa *dfa, - aa_state_t state) -{ - struct aa_perms perms = { }; - - perms.allow = map_old_perms(dfa_user_allow(dfa, state)); - perms.audit = map_old_perms(dfa_user_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_user_quiet(dfa, state)); - perms.xindex = dfa_user_xindex(dfa, state); - - compute_fperms_allow(&perms, dfa, state); - - return perms; -} - -static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, - aa_state_t state) -{ - struct aa_perms perms = { }; - - perms.allow = map_old_perms(dfa_other_allow(dfa, state)); - perms.audit = map_old_perms(dfa_other_audit(dfa, state)); - perms.quiet = map_old_perms(dfa_other_quiet(dfa, state)); - perms.xindex = dfa_other_xindex(dfa, state); - - compute_fperms_allow(&perms, dfa, state); - - return perms; -} - -/** - * aa_compute_fperms - convert dfa compressed perms to internal perms and store - * them so they can be retrieved later. - * @dfa: a dfa using fperms to remap to internal permissions - * - * Returns: remapped perm table - */ -static struct aa_perms *compute_fperms(struct aa_dfa *dfa) -{ - aa_state_t state; - unsigned int state_count; - struct aa_perms *table; - - AA_BUG(!dfa); - - state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; - /* DFAs are restricted from having a state_count of less than 2 */ - table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL); - if (!table) - return NULL; - - /* zero init so skip the trap state (state == 0) */ - for (state = 1; state < state_count; state++) { - table[state * 2] = compute_fperms_user(dfa, state); - table[state * 2 + 1] = compute_fperms_other(dfa, state); - } - - return table; -} - -static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) -{ - struct aa_perms *perms; - int state; - int state_count; - - AA_BUG(!xmatch); - - state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; - /* DFAs are restricted from having a state_count of less than 2 */ - perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); - - /* zero init so skip the trap state (state == 0) */ - for (state = 1; state < state_count; state++) - perms[state].allow = dfa_user_allow(xmatch, state); - - return perms; -} - -static u32 map_other(u32 x) -{ - return ((x & 0x3) << 8) | /* SETATTR/GETATTR */ - ((x & 0x1c) << 18) | /* ACCEPT/BIND/LISTEN */ - ((x & 0x60) << 19); /* SETOPT/GETOPT */ -} - -static u32 map_xbits(u32 x) -{ - return ((x & 0x1) << 7) | - ((x & 0x7e) << 9); -} - -static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, - aa_state_t state, - u32 version) -{ - struct aa_perms perms = { }; - - perms.allow = dfa_user_allow(dfa, state); - perms.audit = dfa_user_audit(dfa, state); - perms.quiet = dfa_user_quiet(dfa, state); - - /* - * This mapping is convulated due to history. - * v1-v4: only file perms, which are handled by compute_fperms - * v5: added policydb which dropped user conditional to gain new - * perm bits, but had to map around the xbits because the - * userspace compiler was still munging them. - * v9: adds using the xbits in policydb because the compiler now - * supports treating policydb permission bits different. - * Unfortunately there is no way to force auditing on the - * perms represented by the xbits - */ - perms.allow |= map_other(dfa_other_allow(dfa, state)); - if (VERSION_LE(version, v8)) - perms.allow |= AA_MAY_LOCK; - else - perms.allow |= map_xbits(dfa_user_xbits(dfa, state)); - - /* - * for v5-v9 perm mapping in the policydb, the other set is used - * to extend the general perm set - */ - perms.audit |= map_other(dfa_other_audit(dfa, state)); - perms.quiet |= map_other(dfa_other_quiet(dfa, state)); - if (VERSION_GT(version, v8)) - perms.quiet |= map_xbits(dfa_other_xbits(dfa, state)); - - return perms; -} - -static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version) -{ - unsigned int state; - unsigned int state_count; - struct aa_perms *table; - - AA_BUG(!dfa); - - state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; - /* DFAs are restricted from having a state_count of less than 2 */ - table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); - if (!table) - return NULL; - - /* zero init so skip the trap state (state == 0) */ - for (state = 1; state < state_count; state++) - table[state] = compute_perms_entry(dfa, state, version); - - return table; -} - -/** - * remap_dfa_accept - remap old dfa accept table to be an index - * @dfa: dfa to do the remapping on - * @factor: scaling factor for the index conversion. - * - * Used in conjunction with compute_Xperms, it converts old style perms - * that are encoded in the dfa accept tables to the new style where - * there is a permission table and the accept table is an index into - * the permission table. - */ -static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) -{ - unsigned int state; - unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen; - - AA_BUG(!dfa); - - for (state = 0; state < state_count; state++) - ACCEPT_TABLE(dfa)[state] = state * factor; - kvfree(dfa->tables[YYTD_ID_ACCEPT2]); - dfa->tables[YYTD_ID_ACCEPT2] = NULL; -} - /** * unpack_profile - unpack a serialized profile * @e: serialized data extent information (NOT NULL) @@ -1001,12 +727,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } profile->xmatch_len = tmp; profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START; - profile->xmatch.perms = compute_xmatch_perms(profile->xmatch.dfa); - if (!profile->xmatch.perms) { + if (aa_compat_map_xmatch(&profile->xmatch)) { info = "failed to convert xmatch permission table"; goto fail; } - remap_dfa_accept(profile->xmatch.dfa, 1); } /* disconnected attachment string is optional */ @@ -1131,14 +855,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - profile->policy.perms = compute_perms(profile->policy.dfa, - e->version); - if (!profile->policy.perms) { + if (aa_compat_map_policy(&profile->policy, e->version)) { info = "failed to remap policydb permission table"; goto fail; } - /* Do not remap internal dfas */ - remap_dfa_accept(profile->policy.dfa, 1); } else profile->policy.dfa = aa_get_dfa(nulldfa); @@ -1154,12 +874,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) "dfa_start")) /* default start state */ profile->file.start[AA_CLASS_FILE] = DFA_START; - profile->file.perms = compute_fperms(profile->file.dfa); - if (!profile->file.perms) { + if (aa_compat_map_file(&profile->file)) { info = "failed to remap file permission table"; goto fail; } - remap_dfa_accept(profile->file.dfa, 2); if (!unpack_trans_table(e, profile)) { info = "failed to unpack profile transition table"; goto fail; From 90917d5b6866df79d892087ba51b46c983d2fcfe Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 16 Jul 2022 03:33:43 -0700 Subject: [PATCH 0026/4122] apparmor: extend permissions to support a label and tag string add indexes for label and tag entries. Rename the domain table to the str_table as its a shared string table with label and tags. Signed-off-by: John Johansen --- security/apparmor/domain.c | 18 ------------------ security/apparmor/include/domain.h | 6 ------ security/apparmor/include/lib.h | 6 ++++++ security/apparmor/include/perms.h | 2 ++ security/apparmor/include/policy.h | 6 ++++-- security/apparmor/lib.c | 19 +++++++++++++++++++ security/apparmor/policy_unpack.c | 2 +- 7 files changed, 32 insertions(+), 27 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 5883f0fc02d3..4cb046cf3a14 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -29,24 +29,6 @@ #include "include/policy.h" #include "include/policy_ns.h" -/** - * aa_free_domain_entries - free entries in a domain table - * @domain: the domain table to free (MAYBE NULL) - */ -void aa_free_domain_entries(struct aa_domain *domain) -{ - int i; - if (domain) { - if (!domain->table) - return; - - for (i = 0; i < domain->size; i++) - kfree_sensitive(domain->table[i]); - kfree_sensitive(domain->table); - domain->table = NULL; - } -} - /** * may_change_ptraced_domain - check if can change profile on ptraced task * @to_label: profile to change to (NOT NULL) diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h index d14928fe1c6f..77f9a0ed0f04 100644 --- a/security/apparmor/include/domain.h +++ b/security/apparmor/include/domain.h @@ -16,11 +16,6 @@ #ifndef __AA_DOMAIN_H #define __AA_DOMAIN_H -struct aa_domain { - int size; - char **table; -}; - #define AA_CHANGE_NOFLAGS 0 #define AA_CHANGE_TEST 1 #define AA_CHANGE_CHILD 2 @@ -32,7 +27,6 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm); -void aa_free_domain_entries(struct aa_domain *domain); int aa_change_hat(const char *hats[], int count, u64 token, int flags); int aa_change_profile(const char *fqname, int flags); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f176f3ced2a3..f1a29ab7ea1b 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -99,6 +99,12 @@ static inline bool path_mediated_fs(struct dentry *dentry) return !(dentry->d_sb->s_flags & SB_NOUSER); } +struct aa_str_table { + int size; + char **table; +}; + +void aa_free_str_table(struct aa_str_table *table); struct counted_str { struct kref count; diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 8739cef73549..d66059fcebb4 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -79,6 +79,8 @@ struct aa_perms { u32 hide; /* set only when ~allow | deny */ u32 xindex; + u32 tag; /* tag string index, if present */ + u32 label; /* label string index, if present */ }; #define ALL_PERMS_MASK 0xffffffff diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 3a7d165e8fcc..a28a662a0622 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -72,12 +72,14 @@ enum profile_mode { /* struct aa_policydb - match engine for a policy * dfa: dfa pattern match + * perms: table of permissions + * strs: table of strings, index by x * start: set of start states for the different classes of data */ struct aa_policydb { struct aa_dfa *dfa; struct aa_perms *perms; - struct aa_domain trans; + struct aa_str_table trans; aa_state_t start[AA_CLASS_LAST + 1]; }; @@ -86,7 +88,7 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy) aa_put_dfa(policy->dfa); if (policy->perms) kvfree(policy->perms); - aa_free_domain_entries(&policy->trans); + aa_free_str_table(&policy->trans); } diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 60deb4dc30c7..69aeb2dbd6d6 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -25,6 +25,25 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; +/** + * aa_free_str_table - free entries str table + * @str: the string table to free (MAYBE NULL) + */ +void aa_free_str_table(struct aa_str_table *t) +{ + int i; + + if (t) { + if (!t->table) + return; + + for (i = 0; i < t->size; i++) + kfree_sensitive(t->table[i]); + kfree_sensitive(t->table); + t->table = NULL; + } +} + /** * aa_split_fqname - split a fqname into a profile and namespace name * @fqname: a full qualified name in namespace profile format (NOT NULL) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 63196df2841b..df39ee8f4e03 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -534,7 +534,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) return true; fail: - aa_free_domain_entries(&profile->file.trans); + aa_free_str_table(&profile->file.trans); e->pos = saved_pos; return false; } From 8c4b785a86be1219f7d50f7b38266c454d6a9bbc Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 19 Apr 2022 16:25:55 -0700 Subject: [PATCH 0027/4122] apparmor: add mediation class information to auditing Audit messages currently don't contain the mediation class which can make them less clear than they should be in some circumstances. With newer mediation classes coming this potential confusion will become worse. Fix this by adding the mediatin class to the messages. Signed-off-by: John Johansen --- security/apparmor/audit.c | 28 ++++++++++++++++++++++++++++ security/apparmor/capability.c | 2 +- security/apparmor/file.c | 2 +- security/apparmor/include/apparmor.h | 2 +- security/apparmor/include/audit.h | 8 ++++++-- security/apparmor/include/net.h | 1 + security/apparmor/ipc.c | 2 +- security/apparmor/lib.c | 2 +- security/apparmor/lsm.c | 3 ++- security/apparmor/mount.c | 2 +- security/apparmor/policy.c | 2 +- security/apparmor/policy_unpack.c | 2 +- security/apparmor/resource.c | 3 ++- security/apparmor/task.c | 2 +- 14 files changed, 48 insertions(+), 13 deletions(-) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 704b0c895605..e638f7bc9f52 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -36,6 +36,28 @@ static const char *const aa_audit_type[] = { "AUTO" }; +static const char *const aa_class_names[] = { + "none", + "unknown", + "file", + "cap", + "net", + "rlimits", + "domain", + "mount", + "unknown", + "ptrace", + "signal", + "unknown", + "unknown", + "unknown", + "net", + "unknown", + "label", + "lsm", +}; + + /* * Currently AppArmor auditing is fed straight into the audit framework. * @@ -65,6 +87,12 @@ static void audit_pre(struct audit_buffer *ab, void *ca) audit_log_format(ab, " operation=\"%s\"", aad(sa)->op); } + if (aad(sa)->class) + audit_log_format(ab, " class=\"%s\"", + aad(sa)->class <= AA_CLASS_LAST ? + aa_class_names[aad(sa)->class] : + "unknown"); + if (aad(sa)->info) { audit_log_format(ab, " info=\"%s\"", aad(sa)->info); if (aad(sa)->error) diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index deccea8654ad..6cabd6109f12 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -148,7 +148,7 @@ int aa_capable(struct aa_label *label, int cap, unsigned int opts) { struct aa_profile *profile; int error = 0; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_CAP, OP_CAPABLE); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_CAP, AA_CLASS_CAP, OP_CAPABLE); sa.u.cap = cap; error = fn_for_each_confined(label, profile, diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 636efcade3f5..69d936d04f94 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -95,7 +95,7 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, kuid_t ouid, const char *info, int error) { int type = AUDIT_APPARMOR_AUTO; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_TASK, op); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op); sa.u.tsk = NULL; aad(&sa)->request = request; diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 8fd66a4ca0b8..6d9ca075fcb9 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -16,7 +16,7 @@ /* * Class of mediation types in the AppArmor policy db */ -#define AA_CLASS_ENTRY 0 +#define AA_CLASS_NONE 0 #define AA_CLASS_UNKNOWN 1 #define AA_CLASS_FILE 2 #define AA_CLASS_CAP 3 diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 18519a4eb67e..c328f07f11cd 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -107,6 +107,7 @@ enum audit_type { struct apparmor_audit_data { int error; int type; + u16 class; const char *op; struct aa_label *label; const char *name; @@ -155,9 +156,12 @@ struct apparmor_audit_data { /* macros for dealing with apparmor_audit_data structure */ #define aad(SA) ((SA)->apparmor_audit_data) -#define DEFINE_AUDIT_DATA(NAME, T, X) \ +#define DEFINE_AUDIT_DATA(NAME, T, C, X) \ /* TODO: cleanup audit init so we don't need _aad = {0,} */ \ - struct apparmor_audit_data NAME ## _aad = { .op = (X), }; \ + struct apparmor_audit_data NAME ## _aad = { \ + .class = (C), \ + .op = (X), \ + }; \ struct common_audit_data NAME = \ { \ .type = (T), \ diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index aadb4b29fb66..6fa440b5daed 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -59,6 +59,7 @@ struct aa_sk_ctx { DEFINE_AUDIT_DATA(NAME, \ ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \ LSM_AUDIT_DATA_NONE, \ + AA_CLASS_NET, \ OP); \ NAME.u.net = &(NAME ## _net); \ aad(&NAME)->net.type = (T); \ diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 7255a9d52372..4ecaf2ba26c5 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -98,7 +98,7 @@ static int profile_signal_perm(struct aa_profile *profile, int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig) { struct aa_profile *profile; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL); aad(&sa)->signal = map_signal_num(sig); aad(&sa)->unmappedsig = sig; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 69aeb2dbd6d6..768cc182e9ca 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -143,7 +143,7 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, void aa_info_message(const char *str) { if (audit_enabled) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, NULL); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); aad(&sa)->info = str; aa_audit_msg(AUDIT_APPARMOR_STATUS, &sa, NULL); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ec873ff0a4bb..784709286a62 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -647,7 +647,8 @@ static int apparmor_setprocattr(const char *name, void *value, char *command, *largs = NULL, *args = value; size_t arg_size; int error; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SETPROCATTR); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, + OP_SETPROCATTR); if (size == 0) return -EINVAL; diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 84aaf25e5dee..02d8215cb9fd 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -134,7 +134,7 @@ static int audit_mount(struct aa_profile *profile, const char *op, struct aa_perms *perms, const char *info, int error) { int audit_type = AUDIT_APPARMOR_AUTO; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_MOUNT, op); if (likely(!error)) { u32 mask = perms->audit; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index cdcf26c9bed5..6222236de021 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -617,7 +617,7 @@ static int audit_policy(struct aa_label *label, const char *op, const char *ns_name, const char *name, const char *info, int error) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, op); aad(&sa)->iface.ns = ns_name; aad(&sa)->name = name; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index df39ee8f4e03..4bf33bd0ca69 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -100,7 +100,7 @@ static int audit_iface(struct aa_profile *new, const char *ns_name, int error) { struct aa_profile *profile = labels_profile(aa_current_raw_label()); - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, NULL); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); if (e) aad(&sa)->iface.pos = e->pos - e->start; aad(&sa)->iface.ns = ns_name; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 1ae4874251a9..cc018469e22d 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -53,7 +53,8 @@ static int audit_resource(struct aa_profile *profile, unsigned int resource, unsigned long value, struct aa_label *peer, const char *info, int error) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SETRLIMIT); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_RLIMITS, + OP_SETRLIMIT); aad(&sa)->rlim.rlim = resource; aad(&sa)->rlim.max = value; diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 503dc0877fb1..b19900f85c14 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -285,7 +285,7 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, { struct aa_profile *profile; u32 xrequest = request << PTRACE_PERM_SHIFT; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE); + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_PTRACE, OP_PTRACE); return xcheck_labels(tracer, tracee, profile, profile_tracer_perm(profile, tracee, request, &sa), From 22fac8a051191113becc0da62bf88b0ba8ce6c08 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 17 Dec 2019 15:40:41 -0800 Subject: [PATCH 0028/4122] apparmor: add user mode flag Allow the profile to contain a user mode prompt flag. This works similar to complain mode but will try to send messages to a userspace daemon. If the daemon is not present or timesout regular informent will occur. Signed-off-by: John Johansen --- security/apparmor/include/policy.h | 3 +++ security/apparmor/include/policy_unpack.h | 1 + security/apparmor/lib.c | 7 ++----- security/apparmor/policy.c | 1 + security/apparmor/policy_unpack.c | 2 ++ 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index a28a662a0622..9fc5d7fa36e8 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -44,6 +44,8 @@ extern const char *const aa_profile_mode_names[]; #define COMPLAIN_MODE(_profile) PROFILE_MODE((_profile), APPARMOR_COMPLAIN) +#define USER_MODE(_profile) PROFILE_MODE((_profile), APPARMOR_USER) + #define KILL_MODE(_profile) PROFILE_MODE((_profile), APPARMOR_KILL) #define PROFILE_IS_HAT(_profile) ((_profile)->label.flags & FLAG_HAT) @@ -67,6 +69,7 @@ enum profile_mode { APPARMOR_COMPLAIN, /* allow and log access violations */ APPARMOR_KILL, /* kill task on access violation */ APPARMOR_UNCONFINED, /* profile set to unconfined */ + APPARMOR_USER, /* modified complain mode to userspace */ }; diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h index cdfbc8a54a9d..1e10e360a0ec 100644 --- a/security/apparmor/include/policy_unpack.h +++ b/security/apparmor/include/policy_unpack.h @@ -36,6 +36,7 @@ struct aa_load_ent *aa_load_ent_alloc(void); #define PACKED_MODE_COMPLAIN 1 #define PACKED_MODE_KILL 2 #define PACKED_MODE_UNCONFINED 3 +#define PACKED_MODE_USER 4 struct aa_ns; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 768cc182e9ca..b0fcec893274 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -327,11 +327,8 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) perms->kill = ALL_PERMS_MASK; else if (COMPLAIN_MODE(profile)) perms->complain = ALL_PERMS_MASK; -/* - * TODO: - * else if (PROMPT_MODE(profile)) - * perms->prompt = ALL_PERMS_MASK; - */ + else if (USER_MODE(profile)) + perms->prompt = ALL_PERMS_MASK; } /** diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 6222236de021..3c3a5263695d 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -94,6 +94,7 @@ const char *const aa_profile_mode_names[] = { "complain", "kill", "unconfined", + "user", }; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 4bf33bd0ca69..04e9fca250df 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -761,6 +761,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else if (tmp == PACKED_MODE_UNCONFINED) { profile->mode = APPARMOR_UNCONFINED; profile->label.flags |= FLAG_UNCONFINED; + } else if (tmp == PACKED_MODE_USER) { + profile->mode = APPARMOR_USER; } else { goto fail; } From a0792e2ceddc1bff8bda34a82b5ef7f00cbe7a9f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 23 Aug 2022 01:06:15 -0700 Subject: [PATCH 0029/4122] apparmor: make transition table unpack generic so it can be reused Currently the transition table is tied to the file dfa. Make it so we can unpack a transition table against any dfa. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 04e9fca250df..052e3b914c18 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -466,13 +466,14 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e) /** * unpack_trans_table - unpack a profile transition table * @e: serialized data extent information (NOT NULL) - * @profile: profile to add the accept table to (NOT NULL) + * @table: str table to unpack to (NOT NULL) * - * Returns: true if table successfully unpacked + * Returns: true if table successfully unpacked or not present */ -static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) +static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) { void *saved_pos = e->pos; + char **table; /* exec table is optional */ if (unpack_nameX(e, AA_STRUCT, "xtable")) { @@ -482,12 +483,10 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) /* currently 2^24 bits entries 0-3 */ if (size > (1 << 24)) goto fail; - profile->file.trans.table = kcalloc(size, sizeof(char *), - GFP_KERNEL); - if (!profile->file.trans.table) + table = kcalloc(size, sizeof(char *), GFP_KERNEL); + if (!table) goto fail; - profile->file.trans.size = size; for (i = 0; i < size; i++) { char *str; int c, j, pos, size2 = unpack_strdup(e, &str, NULL); @@ -496,7 +495,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) */ if (!size2) goto fail; - profile->file.trans.table[i] = str; + table[i] = str; /* verify that name doesn't start with space */ if (isspace(*str)) goto fail; @@ -530,11 +529,14 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) goto fail; if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; + + strs->table = table; + strs->size = size; } return true; fail: - aa_free_str_table(&profile->file.trans); + kfree_sensitive(table); e->pos = saved_pos; return false; } @@ -880,7 +882,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to remap file permission table"; goto fail; } - if (!unpack_trans_table(e, profile)) { + if (!unpack_trans_table(e, &profile->file.trans)) { info = "failed to unpack profile transition table"; goto fail; } From ad596ea74e746d60bb7e13f3adde097a08b2089b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 18 Jul 2022 16:53:17 -0700 Subject: [PATCH 0030/4122] apparmor: group dfa policydb unpacking There are currently three policydb rule groupings (xmatch, file, policydb) that each do their own slightly different thing. Group them into a single routine and unify. This extends/unifies dfa features by - all dfas are allowed having an optional start field - all dfas are allowed having a string/transition table Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 101 +++++++++++++++++++----------- 1 file changed, 63 insertions(+), 38 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 052e3b914c18..a1fe0a5e8e57 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -648,6 +648,54 @@ fail: return false; } +static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy, + bool required_dfa, bool required_trans, + const char **info) +{ + int i; + + policy->dfa = unpack_dfa(e); + if (IS_ERR(policy->dfa)) { + int error = PTR_ERR(policy->dfa); + + policy->dfa = NULL; + *info = "failed to unpack - dfa"; + return error; + } else if (!policy->dfa) { + if (required_dfa) { + *info = "missing required dfa"; + return -EPROTO; + } + goto out; + } + + /* + * only unpack the following if a dfa is present + * + * sadly start was given different names for file and policydb + * but since it is optional we can try both + */ + if (!unpack_u32(e, &policy->start[0], "start")) + /* default start state */ + policy->start[0] = DFA_START; + if (!unpack_u32(e, &policy->start[AA_CLASS_FILE], "dfa_start")) { + /* default start state for xmatch and file dfa */ + policy->start[AA_CLASS_FILE] = DFA_START; + } /* setup class index */ + for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { + policy->start[i] = aa_dfa_next(policy->dfa, policy->start[0], + i); + } + if (!unpack_trans_table(e, &policy->trans) && required_trans) { + *info = "failed to unpack profile transition table"; + return -EPROTO; + } + /* TODO: move compat mapping here, requires dfa merging first */ + +out: + return 0; +} + static u32 strhash(const void *data, u32 len, u32 seed) { const char * const *key = data; @@ -679,7 +727,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) struct rhashtable_params params = { 0 }; char *key = NULL; struct aa_data *data; - int i, error = -EPROTO; + int error = -EPROTO; kernel_cap_t tmpcap; u32 tmp; @@ -714,13 +762,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) (void) unpack_str(e, &profile->attach, "attach"); /* xmatch is optional and may be NULL */ - profile->xmatch.dfa = unpack_dfa(e); - if (IS_ERR(profile->xmatch.dfa)) { - error = PTR_ERR(profile->xmatch.dfa); - profile->xmatch.dfa = NULL; - info = "bad xmatch"; + error = unpack_pdb(e, &profile->xmatch, false, false, &info); + if (error) goto fail; - } + /* neither xmatch_len not xmatch_perms are optional if xmatch is set */ if (profile->xmatch.dfa) { if (!unpack_u32(e, &tmp, NULL)) { @@ -838,25 +883,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; - profile->policy.dfa = unpack_dfa(e); - if (IS_ERR(profile->policy.dfa)) { - error = PTR_ERR(profile->policy.dfa); - profile->policy.dfa = NULL; + error = unpack_pdb(e, &profile->policy, true, false, &info); + if (error) goto fail; - } else if (!profile->policy.dfa) { - error = -EPROTO; - goto fail; - } - if (!unpack_u32(e, &profile->policy.start[0], "start")) - /* default start state */ - profile->policy.start[0] = DFA_START; - /* setup class index */ - for (i = AA_CLASS_FILE; i <= AA_CLASS_LAST; i++) { - profile->policy.start[i] = - aa_dfa_next(profile->policy.dfa, - profile->policy.start[0], - i); - } + /* Fixup: drop when we get rid of start array */ + if (aa_dfa_next(profile->policy.dfa, profile->policy.start[0], + AA_CLASS_FILE)) + profile->policy.start[AA_CLASS_FILE] = + aa_dfa_next(profile->policy.dfa, + profile->policy.start[0], + AA_CLASS_FILE); if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; if (aa_compat_map_policy(&profile->policy, e->version)) { @@ -867,25 +903,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->policy.dfa = aa_get_dfa(nulldfa); /* get file rules */ - profile->file.dfa = unpack_dfa(e); - if (IS_ERR(profile->file.dfa)) { - error = PTR_ERR(profile->file.dfa); - profile->file.dfa = NULL; - info = "failed to unpack profile file rules"; + error = unpack_pdb(e, &profile->file, false, true, &info); + if (error) { goto fail; } else if (profile->file.dfa) { - if (!unpack_u32(e, &profile->file.start[AA_CLASS_FILE], - "dfa_start")) - /* default start state */ - profile->file.start[AA_CLASS_FILE] = DFA_START; if (aa_compat_map_file(&profile->file)) { info = "failed to remap file permission table"; goto fail; } - if (!unpack_trans_table(e, &profile->file.trans)) { - info = "failed to unpack profile transition table"; - goto fail; - } } else if (profile->policy.dfa && profile->policy.start[AA_CLASS_FILE]) { profile->file.dfa = aa_get_dfa(profile->policy.dfa); From 371e50a0b19f9765bfb9e4f172e72f4e9a4625bc Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 26 Aug 2022 09:26:57 -0700 Subject: [PATCH 0031/4122] apparmor: make unpack_array return a trianary value currently unpack_array() does not return an error nor whether the array is not present. The ability to detect an error or the array not being present is needed so rework the unpack_array() to return the needed information. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 43 ++++++++++++++++---------- security/apparmor/policy_unpack_test.c | 12 +++---- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index a1fe0a5e8e57..7d3b3e664c1c 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -67,6 +67,11 @@ struct aa_ext { u32 version; }; +#define tri int +#define TRI_TRUE 1 +#define TRI_NONE 0 +#define TRI_FALSE -1 + /* audit callback for unpack fields */ static void audit_cb(struct audit_buffer *ab, void *va) { @@ -344,22 +349,22 @@ fail: return false; } -static size_t unpack_array(struct aa_ext *e, const char *name) +static tri unpack_array(struct aa_ext *e, const char *name, u16 *size) { void *pos = e->pos; if (unpack_nameX(e, AA_ARRAY, name)) { - int size; if (!inbounds(e, sizeof(u16))) goto fail; - size = (int)le16_to_cpu(get_unaligned((__le16 *) e->pos)); + *size = le16_to_cpu(get_unaligned((__le16 *) e->pos)); e->pos += sizeof(u16); - return size; + return TRI_TRUE; } + return TRI_NONE; fail: e->pos = pos; - return 0; + return TRI_FALSE; } static size_t unpack_blob(struct aa_ext *e, char **blob, const char *name) @@ -477,11 +482,12 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) /* exec table is optional */ if (unpack_nameX(e, AA_STRUCT, "xtable")) { - int i, size; + u16 size; + int i; - size = unpack_array(e, NULL); - /* currently 2^24 bits entries 0-3 */ - if (size > (1 << 24)) + if (unpack_array(e, NULL, &size) != TRI_TRUE || + size > (1 << 24)) + /* currently 2^24 bits entries 0-3 */ goto fail; table = kcalloc(size, sizeof(char *), GFP_KERNEL); if (!table) @@ -546,9 +552,11 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile) void *pos = e->pos; if (unpack_nameX(e, AA_STRUCT, "xattrs")) { - int i, size; + u16 size; + int i; - size = unpack_array(e, NULL); + if (unpack_array(e, NULL, &size) != TRI_TRUE) + goto fail; profile->xattr_count = size; profile->xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL); if (!profile->xattrs) @@ -573,10 +581,12 @@ fail: static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) { void *pos = e->pos; - int i, size; + u16 size; + int i; if (unpack_nameX(e, AA_STRUCT, "secmark")) { - size = unpack_array(e, NULL); + if (unpack_array(e, NULL, &size) != TRI_TRUE) + goto fail; profile->secmark = kcalloc(size, sizeof(struct aa_secmark), GFP_KERNEL); @@ -620,14 +630,15 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) /* rlimits are optional */ if (unpack_nameX(e, AA_STRUCT, "rlimits")) { - int i, size; + u16 size; + int i; u32 tmp = 0; if (!unpack_u32(e, &tmp, NULL)) goto fail; profile->rlimits.mask = tmp; - size = unpack_array(e, NULL); - if (size > RLIM_NLIMITS) + if (unpack_array(e, NULL, &size) != TRI_TRUE || + size > RLIM_NLIMITS) goto fail; for (i = 0; i < size; i++) { u64 tmp2 = 0; diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index 0a969b2e03db..1a43d538c4c0 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -144,8 +144,8 @@ static void policy_unpack_test_unpack_array_with_null_name(struct kunit *test) puf->e->pos += TEST_ARRAY_BUF_OFFSET; - array_size = unpack_array(puf->e, NULL); - + KUNIT_EXPECT_EQ(test, unpack_array(puf->e, NULL, &array_size), + TRI_TRUE); KUNIT_EXPECT_EQ(test, array_size, (u16)TEST_ARRAY_SIZE); KUNIT_EXPECT_PTR_EQ(test, puf->e->pos, puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16) + 1); @@ -159,8 +159,8 @@ static void policy_unpack_test_unpack_array_with_name(struct kunit *test) puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET; - array_size = unpack_array(puf->e, name); - + KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size), + TRI_TRUE); KUNIT_EXPECT_EQ(test, array_size, (u16)TEST_ARRAY_SIZE); KUNIT_EXPECT_PTR_EQ(test, puf->e->pos, puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16) + 1); @@ -175,8 +175,8 @@ static void policy_unpack_test_unpack_array_out_of_bounds(struct kunit *test) puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET; puf->e->end = puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16); - array_size = unpack_array(puf->e, name); - + KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size), + TRI_TRUE); KUNIT_EXPECT_EQ(test, array_size, 0); KUNIT_EXPECT_PTR_EQ(test, puf->e->pos, puf->e->start + TEST_NAMED_ARRAY_BUF_OFFSET); From fd1b2b95a21177eaa9e26989637e477be4d93b2f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 26 Aug 2022 08:53:42 -0700 Subject: [PATCH 0032/4122] apparmor: add the ability for policy to specify a permission table Currently permissions are encoded in the dfa accept entries that are then mapped to an internal permission structure. This limits the permissions that userspace can specify, so allow userspace to directly specify the permission table. Signed-off-by: John Johansen --- security/apparmor/include/policy.h | 5 +- security/apparmor/policy_unpack.c | 104 ++++++++++++++++++++++++++--- 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 9fc5d7fa36e8..2c39bd389f87 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -81,7 +81,10 @@ enum profile_mode { */ struct aa_policydb { struct aa_dfa *dfa; - struct aa_perms *perms; + struct { + struct aa_perms *perms; + u32 size; + }; struct aa_str_table trans; aa_state_t start[AA_CLASS_LAST + 1]; }; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 7d3b3e664c1c..b85dbdde8939 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -435,10 +435,11 @@ static int unpack_strdup(struct aa_ext *e, char **string, const char *name) /** * unpack_dfa - unpack a file rule dfa * @e: serialized data extent information (NOT NULL) + * @flags: dfa flags to check * * returns dfa or ERR_PTR or NULL if no dfa */ -static struct aa_dfa *unpack_dfa(struct aa_ext *e) +static struct aa_dfa *unpack_dfa(struct aa_ext *e, int flags) { char *blob = NULL; size_t size; @@ -454,8 +455,6 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e) size_t sz = blob - (char *) e->start - ((e->pos - e->start) & 7); size_t pad = ALIGN(sz, 8) - sz; - int flags = TO_ACCEPT1_FLAG(YYTD_DATA32) | - TO_ACCEPT2_FLAG(YYTD_DATA32); if (aa_g_paranoid_load) flags |= DFA_FLAG_VERIFY_STATES; dfa = aa_dfa_unpack(blob + pad, size - pad, flags); @@ -659,23 +658,104 @@ fail: return false; } +static bool unpack_perm(struct aa_ext *e, u32 version, struct aa_perms *perm) +{ + bool res; + + if (version != 1) + return false; + + res = unpack_u32(e, &perm->allow, NULL); + res = res && unpack_u32(e, &perm->allow, NULL); + res = res && unpack_u32(e, &perm->deny, NULL); + res = res && unpack_u32(e, &perm->subtree, NULL); + res = res && unpack_u32(e, &perm->cond, NULL); + res = res && unpack_u32(e, &perm->kill, NULL); + res = res && unpack_u32(e, &perm->complain, NULL); + res = res && unpack_u32(e, &perm->prompt, NULL); + res = res && unpack_u32(e, &perm->audit, NULL); + res = res && unpack_u32(e, &perm->quiet, NULL); + res = res && unpack_u32(e, &perm->hide, NULL); + res = res && unpack_u32(e, &perm->xindex, NULL); + res = res && unpack_u32(e, &perm->tag, NULL); + res = res && unpack_u32(e, &perm->label, NULL); + + return res; +} + +static ssize_t unpack_perms_table(struct aa_ext *e, struct aa_perms **perms) +{ + void *pos = e->pos; + u16 size = 0; + + AA_BUG(!perms); + /* + * policy perms are optional, in which case perms are embedded + * in the dfa accept table + */ + if (unpack_nameX(e, AA_STRUCT, "perms")) { + int i; + u32 version; + + if (!unpack_u32(e, &version, "version")) + goto fail_reset; + if (unpack_array(e, NULL, &size) != TRI_TRUE) + goto fail_reset; + *perms = kcalloc(size, sizeof(struct aa_perms), GFP_KERNEL); + if (!*perms) + goto fail_reset; + for (i = 0; i < size; i++) { + if (!unpack_perm(e, version, &(*perms)[i])) + goto fail; + } + if (!unpack_nameX(e, AA_ARRAYEND, NULL)) + goto fail; + if (!unpack_nameX(e, AA_STRUCTEND, NULL)) + goto fail; + } else + *perms = NULL; + + return size; + +fail: + kfree(*perms); +fail_reset: + e->pos = pos; + return -EPROTO; +} + static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy, bool required_dfa, bool required_trans, const char **info) { - int i; + void *pos = e->pos; + int i, flags, error = -EPROTO; - policy->dfa = unpack_dfa(e); + policy->size = unpack_perms_table(e, &policy->perms); + if (policy->size < 0) { + error = policy->size; + policy->perms = NULL; + *info = "failed to unpack - perms"; + goto fail; + } else if (policy->perms) { + /* perms table present accept is index */ + flags = TO_ACCEPT1_FLAG(YYTD_DATA32); + } else { + /* packed perms in accept1 and accept2 */ + flags = TO_ACCEPT1_FLAG(YYTD_DATA32) | + TO_ACCEPT2_FLAG(YYTD_DATA32); + } + + policy->dfa = unpack_dfa(e, flags); if (IS_ERR(policy->dfa)) { - int error = PTR_ERR(policy->dfa); - + error = PTR_ERR(policy->dfa); policy->dfa = NULL; *info = "failed to unpack - dfa"; - return error; + goto fail; } else if (!policy->dfa) { if (required_dfa) { *info = "missing required dfa"; - return -EPROTO; + goto fail; } goto out; } @@ -699,12 +779,16 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy, } if (!unpack_trans_table(e, &policy->trans) && required_trans) { *info = "failed to unpack profile transition table"; - return -EPROTO; + goto fail; } /* TODO: move compat mapping here, requires dfa merging first */ out: return 0; + +fail: + e->pos = pos; + return error; } static u32 strhash(const void *data, u32 len, u32 seed) From 670f31774ab6bf8e2d756f27444b035b9be8a0c9 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 26 Aug 2022 13:32:34 -0700 Subject: [PATCH 0033/4122] apparmor: verify permission table indexes While the dfa xindex's are verified, the indexes in the permission table are not currently verified. Fix this. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 35 ++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index b85dbdde8939..312bd632a472 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -781,8 +781,9 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy, *info = "failed to unpack profile transition table"; goto fail; } - /* TODO: move compat mapping here, requires dfa merging first */ + /* TODO: move compat mapping here, requires dfa merging first */ + /* TODO: move verify here, it has to be done after compat mappings */ out: return 0; @@ -1149,6 +1150,22 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) return true; } +static bool verify_perm_indexes(struct aa_policydb *pdb) +{ + int i; + + for (i = 0; i < pdb->size; i++) { + if (pdb->perms[i].xindex >= pdb->trans.size) + return false; + if (pdb->perms[i].tag >= pdb->trans.size) + return false; + if (pdb->perms[i].label >= pdb->trans.size) + return false; + } + + return true; +} + /** * verify_profile - Do post unpack analysis to verify profile consistency * @profile: profile to verify (NOT NULL) @@ -1170,6 +1187,22 @@ static int verify_profile(struct aa_profile *profile) return -EPROTO; } + if (!verify_perm_indexes(&profile->file)) { + audit_iface(profile, NULL, NULL, + "Unpack: Invalid perm index", NULL, -EPROTO); + return -EPROTO; + } + if (!verify_perm_indexes(&profile->policy)) { + audit_iface(profile, NULL, NULL, + "Unpack: Invalid perm index", NULL, -EPROTO); + return -EPROTO; + } + if (!verify_perm_indexes(&profile->xmatch)) { + audit_iface(profile, NULL, NULL, + "Unpack: Invalid perm index", NULL, -EPROTO); + return -EPROTO; + } + return 0; } From 0bece4fa97a2bd397da66d4fced78f76eb214a3e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 5 Sep 2022 23:53:29 -0700 Subject: [PATCH 0034/4122] apparmor: make sure perm indexes are accumulated accumulate permission indexes on a first encountered basis. This favors original rulesets so that new ones can not override without profile replacement. Signed-off-by: John Johansen --- security/apparmor/include/file.h | 4 ++-- security/apparmor/include/perms.h | 9 +++++++++ security/apparmor/lib.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 521c8568f6d4..1a1c0f0c5071 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -88,10 +88,10 @@ static inline struct aa_label *aa_get_file_label(struct aa_file_ctx *ctx) * - exec type - which determines how the executable name and index are used * - flags - which modify how the destination name is applied */ -#define AA_X_INDEX_MASK 0x00ffffff +#define AA_X_INDEX_MASK AA_INDEX_MASK #define AA_X_TYPE_MASK 0x0c000000 -#define AA_X_NONE 0x00000000 +#define AA_X_NONE AA_INDEX_NONE #define AA_X_NAME 0x04000000 /* use executable name px */ #define AA_X_TABLE 0x08000000 /* use a specified name ->n# */ diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index d66059fcebb4..0de8c3fb090d 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -78,11 +78,20 @@ struct aa_perms { u32 quiet; /* set only when ~allow | deny */ u32 hide; /* set only when ~allow | deny */ + u32 xindex; u32 tag; /* tag string index, if present */ u32 label; /* label string index, if present */ }; +/* + * Indexes are broken into a 24 bit index and 8 bit flag. + * For the index to be valid there must be a value in the flag + */ +#define AA_INDEX_MASK 0x00ffffff +#define AA_INDEX_FLAG_MASK 0xff000000 +#define AA_INDEX_NONE 0 + #define ALL_PERMS_MASK 0xffffffff extern struct aa_perms nullperms; extern struct aa_perms allperms; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index b0fcec893274..d6a8c361025b 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -348,6 +348,13 @@ void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) accum->hide &= addend->hide & ~addend->allow; accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny; accum->subtree |= addend->subtree & ~addend->deny; + + if (!accum->xindex) + accum->xindex = addend->xindex; + if (!accum->tag) + accum->tag = addend->tag; + if (!accum->label) + accum->label = addend->label; } /** @@ -367,6 +374,13 @@ void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) accum->hide &= addend->hide & ~accum->allow; accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny; accum->subtree &= addend->subtree & ~accum->deny; + + if (!accum->xindex) + accum->xindex = addend->xindex; + if (!accum->tag) + accum->tag = addend->tag; + if (!accum->label) + accum->label = addend->label; } void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, From 3dfd16ab697ff23973b6fbb89808372bcd008dd1 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 5 Sep 2022 23:57:51 -0700 Subject: [PATCH 0035/4122] apparmor: cleanup: move perm accumulation into perms.h Perm accumulation is going to be used much more frequently so let the compiler figure out if it can be optimized when used. Signed-off-by: John Johansen --- security/apparmor/include/perms.h | 53 +++++++++++++++++++++++++++++++ security/apparmor/lib.c | 52 ------------------------------ 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 0de8c3fb090d..9fa71957ac3a 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -96,6 +96,59 @@ struct aa_perms { extern struct aa_perms nullperms; extern struct aa_perms allperms; +/** + * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms + * @accum - perms struct to accumulate into + * @addend - perms struct to add to @accum + */ +static inline void aa_perms_accum_raw(struct aa_perms *accum, + struct aa_perms *addend) +{ + accum->deny |= addend->deny; + accum->allow &= addend->allow & ~addend->deny; + accum->audit |= addend->audit & addend->allow; + accum->quiet &= addend->quiet & ~addend->allow; + accum->kill |= addend->kill & ~addend->allow; + accum->complain |= addend->complain & ~addend->allow & ~addend->deny; + accum->cond |= addend->cond & ~addend->allow & ~addend->deny; + accum->hide &= addend->hide & ~addend->allow; + accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny; + accum->subtree |= addend->subtree & ~addend->deny; + + if (!accum->xindex) + accum->xindex = addend->xindex; + if (!accum->tag) + accum->tag = addend->tag; + if (!accum->label) + accum->label = addend->label; +} + +/** + * aa_perms_accum - accumulate perms, masking off overlapping perms + * @accum - perms struct to accumulate into + * @addend - perms struct to add to @accum + */ +static inline void aa_perms_accum(struct aa_perms *accum, + struct aa_perms *addend) +{ + accum->deny |= addend->deny; + accum->allow &= addend->allow & ~accum->deny; + accum->audit |= addend->audit & accum->allow; + accum->quiet &= addend->quiet & ~accum->allow; + accum->kill |= addend->kill & ~accum->allow; + accum->complain |= addend->complain & ~accum->allow & ~accum->deny; + accum->cond |= addend->cond & ~accum->allow & ~accum->deny; + accum->hide &= addend->hide & ~accum->allow; + accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny; + accum->subtree &= addend->subtree & ~accum->deny; + + if (!accum->xindex) + accum->xindex = addend->xindex; + if (!accum->tag) + accum->tag = addend->tag; + if (!accum->label) + accum->label = addend->label; +} #define xcheck(FN1, FN2) \ ({ \ diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index d6a8c361025b..10e3b11e02ad 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -331,58 +331,6 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) perms->prompt = ALL_PERMS_MASK; } -/** - * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum - */ -void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) -{ - accum->deny |= addend->deny; - accum->allow &= addend->allow & ~addend->deny; - accum->audit |= addend->audit & addend->allow; - accum->quiet &= addend->quiet & ~addend->allow; - accum->kill |= addend->kill & ~addend->allow; - accum->complain |= addend->complain & ~addend->allow & ~addend->deny; - accum->cond |= addend->cond & ~addend->allow & ~addend->deny; - accum->hide &= addend->hide & ~addend->allow; - accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny; - accum->subtree |= addend->subtree & ~addend->deny; - - if (!accum->xindex) - accum->xindex = addend->xindex; - if (!accum->tag) - accum->tag = addend->tag; - if (!accum->label) - accum->label = addend->label; -} - -/** - * aa_perms_accum - accumulate perms, masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum - */ -void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) -{ - accum->deny |= addend->deny; - accum->allow &= addend->allow & ~accum->deny; - accum->audit |= addend->audit & accum->allow; - accum->quiet &= addend->quiet & ~accum->allow; - accum->kill |= addend->kill & ~accum->allow; - accum->complain |= addend->complain & ~accum->allow & ~accum->deny; - accum->cond |= addend->cond & ~accum->allow & ~accum->deny; - accum->hide &= addend->hide & ~accum->allow; - accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny; - accum->subtree &= addend->subtree & ~accum->deny; - - if (!accum->xindex) - accum->xindex = addend->xindex; - if (!accum->tag) - accum->tag = addend->tag; - if (!accum->label) - accum->label = addend->label; -} - void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, int type, u32 request, struct aa_perms *perms) { From 3bf3d728a58d7dcf2bbf179e3263fb8651f6097b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 6 Sep 2022 00:38:20 -0700 Subject: [PATCH 0036/4122] apparmor: verify loaded permission bits masks don't overlap Add an additional verification that loaded permission sets don't overlap in ways that are not intended. This will help ensure that permission accumulation can't result in an invalid permission set. Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 312bd632a472..5a78aaa0eea4 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1150,11 +1150,37 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) return true; } -static bool verify_perm_indexes(struct aa_policydb *pdb) +static bool verify_perm(struct aa_perms *perm) +{ + /* TODO: allow option to just force the perms into a valid state */ + if (perm->allow & perm->deny) + return false; + if (perm->subtree & ~perm->allow) + return false; + if (perm->cond & (perm->allow | perm->deny)) + return false; + if (perm->kill & perm->allow) + return false; + if (perm->complain & (perm->allow | perm->deny)) + return false; + if (perm->prompt & (perm->allow | perm->deny)) + return false; + if (perm->complain & perm->prompt) + return false; + if (perm->hide & perm->allow) + return false; + + return true; +} + +static bool verify_perms(struct aa_policydb *pdb) { int i; for (i = 0; i < pdb->size; i++) { + if (!verify_perm(&pdb->perms[i])) + return false; + /* verify indexes into str table */ if (pdb->perms[i].xindex >= pdb->trans.size) return false; if (pdb->perms[i].tag >= pdb->trans.size) @@ -1187,17 +1213,17 @@ static int verify_profile(struct aa_profile *profile) return -EPROTO; } - if (!verify_perm_indexes(&profile->file)) { + if (!verify_perms(&profile->file)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; } - if (!verify_perm_indexes(&profile->policy)) { + if (!verify_perms(&profile->policy)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; } - if (!verify_perm_indexes(&profile->xmatch)) { + if (!verify_perms(&profile->xmatch)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; From 217af7e2f4deb629aaa49622685ccfee923898ca Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 29 Jul 2022 17:17:31 -0700 Subject: [PATCH 0037/4122] apparmor: refactor profile rules and attachments In preparation for moving from a single set of rules and a single attachment to multiple rulesets and attachments separate from the profile refactor attachment information and ruleset info into their own structures. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 27 ++++--- security/apparmor/capability.c | 12 +-- security/apparmor/domain.c | 81 +++++++++++--------- security/apparmor/file.c | 14 ++-- security/apparmor/include/label.h | 9 ++- security/apparmor/include/perms.h | 3 +- security/apparmor/include/policy.h | 84 ++++++++++++-------- security/apparmor/ipc.c | 9 ++- security/apparmor/label.c | 45 ++++++----- security/apparmor/lib.c | 13 ++-- security/apparmor/lsm.c | 4 +- security/apparmor/mount.c | 31 ++++---- security/apparmor/net.c | 24 +++--- security/apparmor/policy.c | 44 +++++++---- security/apparmor/policy_ns.c | 4 +- security/apparmor/policy_unpack.c | 118 +++++++++++++++-------------- security/apparmor/resource.c | 15 ++-- security/apparmor/task.c | 10 +-- 18 files changed, 308 insertions(+), 239 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index fb9d2ccb34d6..84ef8b400b40 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -611,30 +611,29 @@ static const struct file_operations aa_fs_ns_revision_fops = { static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, const char *match_str, size_t match_len) { + struct aa_ruleset *rules = &profile->rules; struct aa_perms tmp = { }; - struct aa_dfa *dfa; aa_state_t state = DFA_NOMATCH; if (profile_unconfined(profile)) return; - if (profile->file.dfa && *match_str == AA_CLASS_FILE) { - dfa = profile->file.dfa; - state = aa_dfa_match_len(dfa, - profile->file.start[AA_CLASS_FILE], + if (rules->file.dfa && *match_str == AA_CLASS_FILE) { + state = aa_dfa_match_len(rules->file.dfa, + rules->file.start[AA_CLASS_FILE], match_str + 1, match_len - 1); if (state) { struct path_cond cond = { }; - tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); + tmp = *(aa_lookup_fperms(&(rules->file), state, &cond)); } - } else if (profile->policy.dfa) { - if (!PROFILE_MEDIATES(profile, *match_str)) + } else if (rules->policy.dfa) { + if (!RULE_MEDIATES(rules, *match_str)) return; /* no change to current perms */ - dfa = profile->policy.dfa; - state = aa_dfa_match_len(dfa, profile->policy.start[0], + state = aa_dfa_match_len(rules->policy.dfa, + rules->policy.start[0], match_str, match_len); if (state) - tmp = *aa_lookup_perms(&profile->policy, state); + tmp = *aa_lookup_perms(&rules->policy, state); } aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum_raw(perms, &tmp); @@ -1093,9 +1092,9 @@ static int seq_profile_attach_show(struct seq_file *seq, void *v) struct aa_proxy *proxy = seq->private; struct aa_label *label = aa_get_label_rcu(&proxy->label); struct aa_profile *profile = labels_profile(label); - if (profile->attach) - seq_printf(seq, "%s\n", profile->attach); - else if (profile->xmatch.dfa) + if (profile->attach.xmatch_str) + seq_printf(seq, "%s\n", profile->attach.xmatch_str); + else if (profile->attach.xmatch.dfa) seq_puts(seq, "\n"); else seq_printf(seq, "%s\n", profile->base.name); diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 6cabd6109f12..b66ec63e2a48 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -64,6 +64,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, int cap, int error) { + struct aa_ruleset *rules = &profile->rules; struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -72,13 +73,13 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, if (likely(!error)) { /* test if auditing is being forced */ if (likely((AUDIT_MODE(profile) != AUDIT_ALL) && - !cap_raised(profile->caps.audit, cap))) + !cap_raised(rules->caps.audit, cap))) return 0; type = AUDIT_APPARMOR_AUDIT; } else if (KILL_MODE(profile) || - cap_raised(profile->caps.kill, cap)) { + cap_raised(rules->caps.kill, cap)) { type = AUDIT_APPARMOR_KILL; - } else if (cap_raised(profile->caps.quiet, cap) && + } else if (cap_raised(rules->caps.quiet, cap) && AUDIT_MODE(profile) != AUDIT_NOQUIET && AUDIT_MODE(profile) != AUDIT_ALL) { /* quiet auditing */ @@ -114,10 +115,11 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, static int profile_capable(struct aa_profile *profile, int cap, unsigned int opts, struct common_audit_data *sa) { + struct aa_ruleset *rules = &profile->rules; int error; - if (cap_raised(profile->caps.allow, cap) && - !cap_raised(profile->caps.denied, cap)) + if (cap_raised(rules->caps.allow, cap) && + !cap_raised(rules->caps.denied, cap)) error = 0; else error = -EPERM; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 4cb046cf3a14..ad035d14cfc5 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -81,19 +81,20 @@ static inline aa_state_t match_component(struct aa_profile *profile, struct aa_profile *tp, bool stack, aa_state_t state) { + struct aa_ruleset *rules = &profile->rules; const char *ns_name; if (stack) - state = aa_dfa_match(profile->file.dfa, state, "&"); + state = aa_dfa_match(rules->file.dfa, state, "&"); if (profile->ns == tp->ns) - return aa_dfa_match(profile->file.dfa, state, tp->base.hname); + return aa_dfa_match(rules->file.dfa, state, tp->base.hname); /* try matching with namespace name and then profile */ ns_name = aa_ns_name(profile->ns, tp->ns, true); - state = aa_dfa_match_len(profile->file.dfa, state, ":", 1); - state = aa_dfa_match(profile->file.dfa, state, ns_name); - state = aa_dfa_match_len(profile->file.dfa, state, ":", 1); - return aa_dfa_match(profile->file.dfa, state, tp->base.hname); + state = aa_dfa_match_len(rules->file.dfa, state, ":", 1); + state = aa_dfa_match(rules->file.dfa, state, ns_name); + state = aa_dfa_match_len(rules->file.dfa, state, ":", 1); + return aa_dfa_match(rules->file.dfa, state, tp->base.hname); } /** @@ -117,6 +118,7 @@ static int label_compound_match(struct aa_profile *profile, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { + struct aa_ruleset *rules = &profile->rules; struct aa_profile *tp; struct label_it i; struct path_cond cond = { }; @@ -139,12 +141,12 @@ next: label_for_each_cont(i, label, tp) { if (!aa_ns_visible(profile->ns, tp->ns, subns)) continue; - state = aa_dfa_match(profile->file.dfa, state, "//&"); + state = aa_dfa_match(rules->file.dfa, state, "//&"); state = match_component(profile, tp, false, state); if (!state) goto fail; } - *perms = *(aa_lookup_fperms(&(profile->file), state, &cond)); + *perms = *(aa_lookup_fperms(&(rules->file), state, &cond)); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -177,6 +179,7 @@ static int label_components_match(struct aa_profile *profile, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { + struct aa_ruleset *rules = &profile->rules; struct aa_profile *tp; struct label_it i; struct aa_perms tmp; @@ -197,7 +200,7 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); + tmp = *(aa_lookup_fperms(&(rules->file), state, &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -206,7 +209,7 @@ next: state = match_component(profile, tp, stack, start); if (!state) goto fail; - tmp = *(aa_lookup_fperms(&(profile->file), state, &cond)); + tmp = *(aa_lookup_fperms(&(rules->file), state, &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } @@ -296,18 +299,19 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, ssize_t size; struct dentry *d; char *value = NULL; - int value_size = 0, ret = profile->xattr_count; + struct aa_attachment *attach = &profile->attach; + int value_size = 0, ret = attach->xattr_count; - if (!bprm || !profile->xattr_count) + if (!bprm || !attach->xattr_count) return 0; might_sleep(); /* transition from exec match to xattr set */ - state = aa_dfa_outofband_transition(profile->xmatch.dfa, state); + state = aa_dfa_outofband_transition(attach->xmatch.dfa, state); d = bprm->file->f_path.dentry; - for (i = 0; i < profile->xattr_count; i++) { - size = vfs_getxattr_alloc(&init_user_ns, d, profile->xattrs[i], + for (i = 0; i < attach->xattr_count; i++) { + size = vfs_getxattr_alloc(&init_user_ns, d, attach->xattrs[i], &value, value_size, GFP_KERNEL); if (size >= 0) { u32 index, perm; @@ -317,20 +321,20 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, * that not present xattr can be distinguished from a 0 * length value or rule that matches any value */ - state = aa_dfa_null_transition(profile->xmatch.dfa, + state = aa_dfa_null_transition(attach->xmatch.dfa, state); /* Check xattr value */ - state = aa_dfa_match_len(profile->xmatch.dfa, state, + state = aa_dfa_match_len(attach->xmatch.dfa, state, value, size); - index = ACCEPT_TABLE(profile->xmatch.dfa)[state]; - perm = profile->xmatch.perms[index].allow; + index = ACCEPT_TABLE(attach->xmatch.dfa)[state]; + perm = attach->xmatch.perms[index].allow; if (!(perm & MAY_EXEC)) { ret = -EINVAL; goto out; } } /* transition to next element */ - state = aa_dfa_outofband_transition(profile->xmatch.dfa, state); + state = aa_dfa_outofband_transition(attach->xmatch.dfa, state); if (size < 0) { /* * No xattr match, so verify if transition to @@ -382,6 +386,8 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm, rcu_read_lock(); restart: list_for_each_entry_rcu(profile, head, base.list) { + struct aa_attachment *attach = &profile->attach; + if (profile->label.flags & FLAG_NULL && &profile->label == ns_unconfined(profile->ns)) continue; @@ -397,16 +403,16 @@ restart: * as another profile, signal a conflict and refuse to * match. */ - if (profile->xmatch.dfa) { + if (attach->xmatch.dfa) { unsigned int count; aa_state_t state; u32 index, perm; - state = aa_dfa_leftmatch(profile->xmatch.dfa, - profile->xmatch.start[AA_CLASS_XMATCH], + state = aa_dfa_leftmatch(attach->xmatch.dfa, + attach->xmatch.start[AA_CLASS_XMATCH], name, &count); - index = ACCEPT_TABLE(profile->xmatch.dfa)[state]; - perm = profile->xmatch.perms[index].allow; + index = ACCEPT_TABLE(attach->xmatch.dfa)[state]; + perm = attach->xmatch.perms[index].allow; /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { int ret = 0; @@ -414,7 +420,7 @@ restart: if (count < candidate_len) continue; - if (bprm && profile->xattr_count) { + if (bprm && attach->xattr_count) { long rev = READ_ONCE(ns->revision); if (!aa_get_profile_not0(profile)) @@ -453,7 +459,7 @@ restart: * xattrs, or a longer match */ candidate = profile; - candidate_len = max(count, profile->xmatch_len); + candidate_len = max(count, attach->xmatch_len); candidate_xattrs = ret; conflict = false; } @@ -497,6 +503,7 @@ static const char *next_name(int xtype, const char *name) struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) { + struct aa_ruleset *rules = &profile->rules; struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; @@ -507,7 +514,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, /* TODO: move lookup parsing to unpack time so this is a straight * index into the resultant label */ - for (*name = profile->file.trans.table[index]; !label && *name; + for (*name = rules->file.trans.table[index]; !label && *name; *name = next_name(xtype, *name)) { if (xindex & AA_X_CHILD) { struct aa_profile *new_profile; @@ -546,6 +553,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile, const char **lookupname, const char **info) { + struct aa_ruleset *rules = &profile->rules; struct aa_label *new = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; @@ -558,7 +566,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile, break; case AA_X_TABLE: /* TODO: fix when perm mapping done at unload */ - stack = profile->file.trans.table[xindex & AA_X_INDEX_MASK]; + stack = rules->file.trans.table[xindex & AA_X_INDEX_MASK]; if (*stack != '&') { /* released by caller */ new = x_table_lookup(profile, xindex, lookupname); @@ -612,9 +620,10 @@ static struct aa_label *profile_transition(struct aa_profile *profile, char *buffer, struct path_cond *cond, bool *secure_exec) { + struct aa_ruleset *rules = &profile->rules; struct aa_label *new = NULL; const char *info = NULL, *name = NULL, *target = NULL; - aa_state_t state = profile->file.start[AA_CLASS_FILE]; + aa_state_t state = rules->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; bool nonewprivs = false; int error = 0; @@ -648,7 +657,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile, } /* find exec permissions for name */ - state = aa_str_perms(&(profile->file), state, name, cond, &perms); + state = aa_str_perms(&(rules->file), state, name, cond, &perms); if (perms.allow & MAY_EXEC) { /* exec permission determine how to transition */ new = x_to_label(profile, bprm, name, perms.xindex, &target, @@ -710,7 +719,8 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, char *buffer, struct path_cond *cond, bool *secure_exec) { - aa_state_t state = profile->file.start[AA_CLASS_FILE]; + struct aa_ruleset *rules = &profile->rules; + aa_state_t state = rules->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; int error = -EACCES; @@ -743,7 +753,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, } /* find exec permissions for name */ - state = aa_str_perms(&(profile->file), state, xname, cond, &perms); + state = aa_str_perms(&(rules->file), state, xname, cond, &perms); if (!(perms.allow & AA_MAY_ONEXEC)) { info = "no change_onexec valid for executable"; goto audit; @@ -752,7 +762,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, * onexec permission is linked to exec with a standard pairing * exec\0change_profile */ - state = aa_dfa_null_transition(profile->file.dfa, state); + state = aa_dfa_null_transition(rules->file.dfa, state); error = change_profile_perms(profile, onexec, stack, AA_MAY_ONEXEC, state, &perms); if (error) { @@ -1249,12 +1259,13 @@ static int change_profile_perms_wrapper(const char *op, const char *name, struct aa_label *target, bool stack, u32 request, struct aa_perms *perms) { + struct aa_ruleset *rules = &profile->rules; const char *info = NULL; int error = 0; if (!error) error = change_profile_perms(profile, target, stack, request, - profile->file.start[AA_CLASS_FILE], + rules->file.start[AA_CLASS_FILE], perms); if (error) error = aa_audit_file(profile, perms, op, request, name, diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 69d936d04f94..ef5d98f81a2b 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -224,11 +224,12 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { + struct aa_ruleset *rules = &profile->rules; int e = 0; if (profile_unconfined(profile)) return 0; - aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE], + aa_str_perms(&(rules->file), rules->file.start[AA_CLASS_FILE], name, cond, perms); if (request & ~perms->allow) e = -EACCES; @@ -316,6 +317,7 @@ static int profile_path_link(struct aa_profile *profile, const struct path *target, char *buffer2, struct path_cond *cond) { + struct aa_ruleset *rules = &profile->rules; const char *lname, *tname = NULL; struct aa_perms lperms = {}, perms; const char *info = NULL; @@ -336,16 +338,16 @@ static int profile_path_link(struct aa_profile *profile, error = -EACCES; /* aa_str_perms - handles the case of the dfa being NULL */ - state = aa_str_perms(&(profile->file), - profile->file.start[AA_CLASS_FILE], lname, + state = aa_str_perms(&(rules->file), + rules->file.start[AA_CLASS_FILE], lname, cond, &lperms); if (!(lperms.allow & AA_MAY_LINK)) goto audit; /* test to see if target can be paired with link */ - state = aa_dfa_null_transition(profile->file.dfa, state); - aa_str_perms(&(profile->file), state, tname, cond, &perms); + state = aa_dfa_null_transition(rules->file.dfa, state); + aa_str_perms(&(rules->file), state, tname, cond, &perms); /* force audit/quiet masks for link are stored in the second entry * in the link pair. @@ -367,7 +369,7 @@ static int profile_path_link(struct aa_profile *profile, /* Do link perm subset test requiring allowed permission on link are * a subset of the allowed permissions on target. */ - aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE], + aa_str_perms(&(rules->file), rules->file.start[AA_CLASS_FILE], tname, cond, &perms); /* AA_MAY_LINK is not considered in the subset test */ diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 1130ba10a152..2a72e6b17d68 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -261,7 +261,7 @@ for ((I).i = (I).j = 0; \ struct label_it i; \ int ret = 0; \ label_for_each(i, (L), profile) { \ - if (PROFILE_MEDIATES(profile, (C))) { \ + if (RULE_MEDIATES(&profile->rules, (C))) { \ ret = 1; \ break; \ } \ @@ -357,9 +357,10 @@ static inline const char *aa_label_str_split(const char *str) struct aa_perms; -int aa_label_match(struct aa_profile *profile, struct aa_label *label, - aa_state_t state, bool subns, u32 request, - struct aa_perms *perms); +struct aa_ruleset; +int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules, + struct aa_label *label, aa_state_t state, bool subns, + u32 request, struct aa_perms *perms); /** diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 9fa71957ac3a..797a7a00644d 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -207,7 +207,8 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend); void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend); -void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, +void aa_profile_match_label(struct aa_profile *profile, + struct aa_ruleset *rules, struct aa_label *label, int type, u32 request, struct aa_perms *perms); int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, u32 request, int type, u32 *deny, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 2c39bd389f87..9ee2c05e2895 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -123,6 +123,43 @@ struct aa_data { struct rhash_head head; }; +/* struct aa_ruleset - data covering mediation rules + * @size: the memory consumed by this ruleset + * @policy: general match rules governing policy + * @file: The set of rules governing basic file access and domain transitions + * @caps: capabilities for the profile + * @rlimits: rlimits for the profile + * @secmark_count: number of secmark entries + * @secmark: secmark label match info + */ +struct aa_ruleset { + int size; + + /* TODO: merge policy and file */ + struct aa_policydb policy; + struct aa_policydb file; + struct aa_caps caps; + + struct aa_rlimit rlimits; + + int secmark_count; + struct aa_secmark *secmark; +}; + +/* struct aa_attachment - data and rules for a profiles attachment + * @xmatch_str: human readable attachment string + * @xmatch: optional extended matching for unconfined executables names + * @xmatch_len: xmatch prefix len, used to determine xmatch priority + * @xattr_count: number of xattrs in table + * @xattrs: table of xattrs + */ +struct aa_attachment { + const char *xmatch_str; + struct aa_policydb xmatch; + unsigned int xmatch_len; + int xattr_count; + char **xattrs; +}; /* struct aa_profile - basic confinement data * @base - base components of the profile (name, refcount, lists, lock ...) @@ -130,18 +167,13 @@ struct aa_data { * @parent: parent of profile * @ns: namespace the profile is in * @rename: optional profile name that this profile renamed - * @attach: human readable attachment string - * @xmatch: optional extended matching for unconfined executables names - * @xmatch_len: xmatch prefix len, used to determine xmatch priority + * * @audit: the auditing mode of the profile * @mode: the enforcement mode of the profile * @path_flags: flags controlling path generation behavior * @disconnected: what to prepend if attach_disconnected is specified - * @size: the memory consumed by this profiles rules - * @policy: general match rules governing policy - * @file: The set of rules governing basic file access and domain transitions - * @caps: capabilities for the profile - * @rlimits: rlimits for the profile + * @attach: attachment rules for the profile + * @rules: rules to be enforced * * @dents: dentries for the profiles file entries in apparmorfs * @dirname: name of the profile dir in apparmorfs @@ -166,27 +198,13 @@ struct aa_profile { struct aa_ns *ns; const char *rename; - const char *attach; - struct aa_policydb xmatch; - unsigned int xmatch_len; - enum audit_mode audit; long mode; u32 path_flags; const char *disconnected; - int size; - struct aa_policydb policy; - struct aa_policydb file; - struct aa_caps caps; - - int xattr_count; - char **xattrs; - - struct aa_rlimit rlimits; - - int secmark_count; - struct aa_secmark *secmark; + struct aa_attachment attach; + struct aa_ruleset rules; struct aa_loaddata *rawdata; unsigned char *hash; @@ -247,24 +265,24 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p) return labels_profile(aa_get_newest_label(&p->label)); } -static inline aa_state_t PROFILE_MEDIATES(struct aa_profile *profile, - unsigned char class) +static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules, + unsigned char class) { if (class <= AA_CLASS_LAST) - return profile->policy.start[class]; + return rules->policy.start[class]; else - return aa_dfa_match_len(profile->policy.dfa, - profile->policy.start[0], &class, 1); + return aa_dfa_match_len(rules->policy.dfa, + rules->policy.start[0], &class, 1); } -static inline aa_state_t PROFILE_MEDIATES_AF(struct aa_profile *profile, - u16 AF) { - aa_state_t state = PROFILE_MEDIATES(profile, AA_CLASS_NET); +static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF) +{ + aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NET); __be16 be_af = cpu_to_be16(AF); if (!state) return DFA_NOMATCH; - return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2); + return aa_dfa_match_len(rules->policy.dfa, state, (char *) &be_af, 2); } /** diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 4ecaf2ba26c5..dc2fa548312d 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -78,19 +78,20 @@ static int profile_signal_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, struct common_audit_data *sa) { + struct aa_ruleset *rules = &profile->rules; struct aa_perms perms; aa_state_t state; if (profile_unconfined(profile) || - !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL)) + !RULE_MEDIATES(rules, AA_CLASS_SIGNAL)) return 0; aad(sa)->peer = peer; /* TODO: secondary cache check */ - state = aa_dfa_next(profile->policy.dfa, - profile->policy.start[AA_CLASS_SIGNAL], + state = aa_dfa_next(rules->policy.dfa, + rules->policy.start[AA_CLASS_SIGNAL], aad(sa)->signal); - aa_label_match(profile, peer, state, false, request, &perms); + aa_label_match(profile, rules, peer, state, false, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_signal_cb); } diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 3a967003fa7c..98dadd960977 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1266,20 +1266,21 @@ static inline bool label_is_visible(struct aa_profile *profile, * visibility test. */ static inline aa_state_t match_component(struct aa_profile *profile, + struct aa_ruleset *rules, struct aa_profile *tp, aa_state_t state) { const char *ns_name; if (profile->ns == tp->ns) - return aa_dfa_match(profile->policy.dfa, state, tp->base.hname); + return aa_dfa_match(rules->policy.dfa, state, tp->base.hname); /* try matching with namespace name and then profile */ ns_name = aa_ns_name(profile->ns, tp->ns, true); - state = aa_dfa_match_len(profile->policy.dfa, state, ":", 1); - state = aa_dfa_match(profile->policy.dfa, state, ns_name); - state = aa_dfa_match_len(profile->policy.dfa, state, ":", 1); - return aa_dfa_match(profile->policy.dfa, state, tp->base.hname); + state = aa_dfa_match_len(rules->policy.dfa, state, ":", 1); + state = aa_dfa_match(rules->policy.dfa, state, ns_name); + state = aa_dfa_match_len(rules->policy.dfa, state, ":", 1); + return aa_dfa_match(rules->policy.dfa, state, tp->base.hname); } /** @@ -1298,6 +1299,7 @@ static inline aa_state_t match_component(struct aa_profile *profile, * check to be stacked. */ static int label_compound_match(struct aa_profile *profile, + struct aa_ruleset *rules, struct aa_label *label, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) @@ -1309,7 +1311,7 @@ static int label_compound_match(struct aa_profile *profile, label_for_each(i, label, tp) { if (!aa_ns_visible(profile->ns, tp->ns, subns)) continue; - state = match_component(profile, tp, state); + state = match_component(profile, rules, tp, state); if (!state) goto fail; goto next; @@ -1323,12 +1325,12 @@ next: label_for_each_cont(i, label, tp) { if (!aa_ns_visible(profile->ns, tp->ns, subns)) continue; - state = aa_dfa_match(profile->policy.dfa, state, "//&"); - state = match_component(profile, tp, state); + state = aa_dfa_match(rules->policy.dfa, state, "//&"); + state = match_component(profile, rules, tp, state); if (!state) goto fail; } - *perms = *aa_lookup_perms(&profile->policy, state); + *perms = *aa_lookup_perms(&rules->policy, state); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -1343,6 +1345,7 @@ fail: /** * label_components_match - find perms for all subcomponents of a label * @profile: profile to find perms for + * @rules: ruleset to search * @label: label to check access permissions for * @start: state to start match in * @subns: whether to do permission checks on components in a subns @@ -1356,6 +1359,7 @@ fail: * check to be stacked. */ static int label_components_match(struct aa_profile *profile, + struct aa_ruleset *rules, struct aa_label *label, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) @@ -1369,7 +1373,7 @@ static int label_components_match(struct aa_profile *profile, label_for_each(i, label, tp) { if (!aa_ns_visible(profile->ns, tp->ns, subns)) continue; - state = match_component(profile, tp, start); + state = match_component(profile, rules, tp, start); if (!state) goto fail; goto next; @@ -1379,16 +1383,16 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = *aa_lookup_perms(&profile->policy, state); + tmp = *aa_lookup_perms(&rules->policy, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { if (!aa_ns_visible(profile->ns, tp->ns, subns)) continue; - state = match_component(profile, tp, start); + state = match_component(profile, rules, tp, start); if (!state) goto fail; - tmp = *aa_lookup_perms(&profile->policy, state); + tmp = *aa_lookup_perms(&rules->policy, state); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } @@ -1406,6 +1410,7 @@ fail: /** * aa_label_match - do a multi-component label match * @profile: profile to match against (NOT NULL) + * @rules: ruleset to search * @label: label to match (NOT NULL) * @state: state to start in * @subns: whether to match subns components @@ -1414,18 +1419,18 @@ fail: * * Returns: the state the match finished in, may be the none matching state */ -int aa_label_match(struct aa_profile *profile, struct aa_label *label, - aa_state_t state, bool subns, u32 request, - struct aa_perms *perms) +int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules, + struct aa_label *label, aa_state_t state, bool subns, + u32 request, struct aa_perms *perms) { - int error = label_compound_match(profile, label, state, subns, request, - perms); + int error = label_compound_match(profile, rules, label, state, subns, + request, perms); if (!error) return error; *perms = allperms; - return label_components_match(profile, label, state, subns, request, - perms); + return label_components_match(profile, rules, label, state, subns, + request, perms); } diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 10e3b11e02ad..ec73e51ca7e3 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -331,16 +331,18 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) perms->prompt = ALL_PERMS_MASK; } -void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label, +void aa_profile_match_label(struct aa_profile *profile, + struct aa_ruleset *rules, + struct aa_label *label, int type, u32 request, struct aa_perms *perms) { /* TODO: doesn't yet handle extended types */ aa_state_t state; - state = aa_dfa_next(profile->policy.dfa, - profile->policy.start[AA_CLASS_LABEL], + state = aa_dfa_next(rules->policy.dfa, + rules->policy.start[AA_CLASS_LABEL], type); - aa_label_match(profile, label, state, false, request, perms); + aa_label_match(profile, rules, label, state, false, request, perms); } @@ -355,7 +357,8 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, aad(sa)->peer = &target->label; aad(sa)->request = request; - aa_profile_match_label(profile, &target->label, type, request, &perms); + aa_profile_match_label(profile, &profile->rules, &target->label, type, + request, &perms); aa_apply_modes_to_perms(profile, &perms); *deny |= request & perms.deny; return aa_check_perms(profile, &perms, request, sa, aa_audit_perms_cb); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 784709286a62..62f2ca32b959 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -166,9 +166,9 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, if (COMPLAIN_MODE(profile)) continue; *effective = cap_intersect(*effective, - profile->caps.allow); + profile->rules.caps.allow); *permitted = cap_intersect(*permitted, - profile->caps.allow); + profile->rules.caps.allow); } } rcu_read_unlock(); diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 02d8215cb9fd..d4724bdcb07f 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -303,13 +303,14 @@ static int match_mnt_path_str(struct aa_profile *profile, { struct aa_perms perms = { }; const char *mntpnt = NULL, *info = NULL; + struct aa_ruleset *rules = &profile->rules; int pos, error; AA_BUG(!profile); AA_BUG(!mntpath); AA_BUG(!buffer); - if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT)) return 0; error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, @@ -324,8 +325,8 @@ static int match_mnt_path_str(struct aa_profile *profile, } error = -EACCES; - pos = do_match_mnt(&profile->policy, - profile->policy.start[AA_CLASS_MOUNT], + pos = do_match_mnt(&rules->policy, + rules->policy.start[AA_CLASS_MOUNT], mntpnt, devname, type, flags, data, binary, &perms); if (pos) { info = mnt_info_table[pos]; @@ -363,7 +364,7 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, AA_BUG(!profile); AA_BUG(devpath && !devbuffer); - if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + if (!RULE_MEDIATES(&profile->rules, AA_CLASS_MOUNT)) return 0; if (devpath) { @@ -565,6 +566,7 @@ out: static int profile_umount(struct aa_profile *profile, const struct path *path, char *buffer) { + struct aa_ruleset *rules = &profile->rules; struct aa_perms perms = { }; const char *name = NULL, *info = NULL; aa_state_t state; @@ -573,7 +575,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, AA_BUG(!profile); AA_BUG(!path); - if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT)) return 0; error = aa_path_name(path, path_flags(profile, path), buffer, &name, @@ -581,10 +583,10 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, if (error) goto audit; - state = aa_dfa_match(profile->policy.dfa, - profile->policy.start[AA_CLASS_MOUNT], + state = aa_dfa_match(rules->policy.dfa, + rules->policy.start[AA_CLASS_MOUNT], name); - perms = *aa_lookup_perms(&profile->policy, state); + perms = *aa_lookup_perms(&rules->policy, state); if (AA_MAY_UMOUNT & ~perms.allow) error = -EACCES; @@ -624,6 +626,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, const struct path *old_path, char *old_buffer) { + struct aa_ruleset *rules = &profile->rules; const char *old_name, *new_name = NULL, *info = NULL; const char *trans_name = NULL; struct aa_perms perms = { }; @@ -635,7 +638,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, AA_BUG(!old_path); if (profile_unconfined(profile) || - !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + !RULE_MEDIATES(rules, AA_CLASS_MOUNT)) return aa_get_newest_label(&profile->label); error = aa_path_name(old_path, path_flags(profile, old_path), @@ -650,12 +653,12 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, goto audit; error = -EACCES; - state = aa_dfa_match(profile->policy.dfa, - profile->policy.start[AA_CLASS_MOUNT], + state = aa_dfa_match(rules->policy.dfa, + rules->policy.start[AA_CLASS_MOUNT], new_name); - state = aa_dfa_null_transition(profile->policy.dfa, state); - state = aa_dfa_match(profile->policy.dfa, state, old_name); - perms = *aa_lookup_perms(&profile->policy, state); + state = aa_dfa_null_transition(rules->policy.dfa, state); + state = aa_dfa_match(rules->policy.dfa, state, old_name); + perms = *aa_lookup_perms(&rules->policy, state); if (AA_MAY_PIVOTROOT & perms.allow) error = 0; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index d420d3aec3b8..ae789ee834ad 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -108,6 +108,7 @@ void audit_net_cb(struct audit_buffer *ab, void *va) int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, u32 request, u16 family, int type) { + struct aa_ruleset *rules = &profile->rules; struct aa_perms perms = { }; aa_state_t state; __be16 buffer[2]; @@ -117,15 +118,15 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, if (profile_unconfined(profile)) return 0; - state = PROFILE_MEDIATES(profile, AA_CLASS_NET); + state = RULE_MEDIATES(rules, AA_CLASS_NET); if (!state) return 0; buffer[0] = cpu_to_be16(family); buffer[1] = cpu_to_be16((u16) type); - state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer, + state = aa_dfa_match_len(rules->policy.dfa, state, (char *) &buffer, 4); - perms = *aa_lookup_perms(&profile->policy, state); + perms = *aa_lookup_perms(&rules->policy, state); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_net_cb); @@ -216,25 +217,26 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, { int i, ret; struct aa_perms perms = { }; + struct aa_ruleset *rules = &profile->rules; - if (profile->secmark_count == 0) + if (rules->secmark_count == 0) return 0; - for (i = 0; i < profile->secmark_count; i++) { - if (!profile->secmark[i].secid) { - ret = apparmor_secmark_init(&profile->secmark[i]); + for (i = 0; i < rules->secmark_count; i++) { + if (!rules->secmark[i].secid) { + ret = apparmor_secmark_init(&rules->secmark[i]); if (ret) return ret; } - if (profile->secmark[i].secid == secid || - profile->secmark[i].secid == AA_SECID_WILDCARD) { - if (profile->secmark[i].deny) + if (rules->secmark[i].secid == secid || + rules->secmark[i].secid == AA_SECID_WILDCARD) { + if (rules->secmark[i].deny) perms.deny = ALL_PERMS_MASK; else perms.allow = ALL_PERMS_MASK; - if (profile->secmark[i].audit) + if (rules->secmark[i].audit) perms.audit = ALL_PERMS_MASK; } } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 3c3a5263695d..74c0a3b34e9b 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -193,6 +193,30 @@ static void aa_free_data(void *ptr, void *arg) kfree_sensitive(data); } +static void free_attachment(struct aa_attachment *attach) +{ + int i; + + for (i = 0; i < attach->xattr_count; i++) + kfree_sensitive(attach->xattrs[i]); + kfree_sensitive(attach->xattrs); + aa_destroy_policydb(&attach->xmatch); +} + +static void free_ruleset(struct aa_ruleset *rules) +{ + int i; + + aa_destroy_policydb(&rules->file); + aa_destroy_policydb(&rules->policy); + aa_free_cap_rules(&rules->caps); + aa_free_rlimit_rules(&rules->rlimits); + + for (i = 0; i < rules->secmark_count; i++) + kfree_sensitive(rules->secmark[i].label); + kfree_sensitive(rules->secmark); +} + /** * aa_free_profile - free a profile * @profile: the profile to free (MAYBE NULL) @@ -206,7 +230,6 @@ static void aa_free_data(void *ptr, void *arg) void aa_free_profile(struct aa_profile *profile) { struct rhashtable *rht; - int i; AA_DEBUG("%s(%p)\n", __func__, profile); @@ -220,19 +243,10 @@ void aa_free_profile(struct aa_profile *profile) aa_put_ns(profile->ns); kfree_sensitive(profile->rename); - aa_destroy_policydb(&profile->file); - aa_free_cap_rules(&profile->caps); - aa_free_rlimit_rules(&profile->rlimits); - - for (i = 0; i < profile->xattr_count; i++) - kfree_sensitive(profile->xattrs[i]); - kfree_sensitive(profile->xattrs); - for (i = 0; i < profile->secmark_count; i++) - kfree_sensitive(profile->secmark[i].label); - kfree_sensitive(profile->secmark); + free_attachment(&profile->attach); + free_ruleset(&profile->rules); kfree_sensitive(profile->dirname); - aa_destroy_policydb(&profile->xmatch); - aa_destroy_policydb(&profile->policy); + if (profile->data) { rht = profile->data; profile->data = NULL; @@ -544,8 +558,8 @@ name: /* released on free_profile */ rcu_assign_pointer(profile->parent, aa_get_profile(parent)); profile->ns = aa_get_ns(parent->ns); - profile->file.dfa = aa_get_dfa(nulldfa); - profile->policy.dfa = aa_get_dfa(nulldfa); + profile->rules.file.dfa = aa_get_dfa(nulldfa); + profile->rules.policy.dfa = aa_get_dfa(nulldfa); mutex_lock_nested(&profile->ns->lock, profile->ns->level); p = __find_child(&parent->base.profiles, bname); diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 43beaad083fe..cb10994cd3b6 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -91,8 +91,8 @@ static struct aa_profile *alloc_unconfined(const char *name) profile->label.flags |= FLAG_IX_ON_NAME_ERROR | FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED; profile->mode = APPARMOR_UNCONFINED; - profile->file.dfa = aa_get_dfa(nulldfa); - profile->policy.dfa = aa_get_dfa(nulldfa); + profile->rules.file.dfa = aa_get_dfa(nulldfa); + profile->rules.policy.dfa = aa_get_dfa(nulldfa); return profile; } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5a78aaa0eea4..bbca7772dfa2 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -556,12 +556,12 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile) if (unpack_array(e, NULL, &size) != TRI_TRUE) goto fail; - profile->xattr_count = size; - profile->xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL); - if (!profile->xattrs) + profile->attach.xattr_count = size; + profile->attach.xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL); + if (!profile->attach.xattrs) goto fail; for (i = 0; i < size; i++) { - if (!unpack_strdup(e, &profile->xattrs[i], NULL)) + if (!unpack_strdup(e, &profile->attach.xattrs[i], NULL)) goto fail; } if (!unpack_nameX(e, AA_ARRAYEND, NULL)) @@ -579,6 +579,7 @@ fail: static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) { + struct aa_ruleset *rules = &profile->rules; void *pos = e->pos; u16 size; int i; @@ -587,19 +588,19 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) if (unpack_array(e, NULL, &size) != TRI_TRUE) goto fail; - profile->secmark = kcalloc(size, sizeof(struct aa_secmark), + rules->secmark = kcalloc(size, sizeof(struct aa_secmark), GFP_KERNEL); - if (!profile->secmark) + if (!rules->secmark) goto fail; - profile->secmark_count = size; + rules->secmark_count = size; for (i = 0; i < size; i++) { - if (!unpack_u8(e, &profile->secmark[i].audit, NULL)) + if (!unpack_u8(e, &rules->secmark[i].audit, NULL)) goto fail; - if (!unpack_u8(e, &profile->secmark[i].deny, NULL)) + if (!unpack_u8(e, &rules->secmark[i].deny, NULL)) goto fail; - if (!unpack_strdup(e, &profile->secmark[i].label, NULL)) + if (!unpack_strdup(e, &rules->secmark[i].label, NULL)) goto fail; } if (!unpack_nameX(e, AA_ARRAYEND, NULL)) @@ -611,12 +612,12 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) return true; fail: - if (profile->secmark) { + if (rules->secmark) { for (i = 0; i < size; i++) - kfree(profile->secmark[i].label); - kfree(profile->secmark); - profile->secmark_count = 0; - profile->secmark = NULL; + kfree(rules->secmark[i].label); + kfree(rules->secmark); + rules->secmark_count = 0; + rules->secmark = NULL; } e->pos = pos; @@ -634,7 +635,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) u32 tmp = 0; if (!unpack_u32(e, &tmp, NULL)) goto fail; - profile->rlimits.mask = tmp; + profile->rules.rlimits.mask = tmp; if (unpack_array(e, NULL, &size) != TRI_TRUE || size > RLIM_NLIMITS) @@ -644,7 +645,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) int a = aa_map_resource(i); if (!unpack_u64(e, &tmp2, NULL)) goto fail; - profile->rlimits.limits[a].rlim_max = tmp2; + profile->rules.rlimits.limits[a].rlim_max = tmp2; } if (!unpack_nameX(e, AA_ARRAYEND, NULL)) goto fail; @@ -816,6 +817,7 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj) */ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) { + struct aa_ruleset *rules; struct aa_profile *profile = NULL; const char *tmpname, *tmpns = NULL, *name = NULL; const char *info = "failed to unpack profile"; @@ -850,27 +852,30 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile = aa_alloc_profile(name, NULL, GFP_KERNEL); if (!profile) return ERR_PTR(-ENOMEM); + rules = &profile->rules; /* profile renaming is optional */ (void) unpack_str(e, &profile->rename, "rename"); /* attachment string is optional */ - (void) unpack_str(e, &profile->attach, "attach"); + (void) unpack_str(e, &profile->attach.xmatch_str, "attach"); /* xmatch is optional and may be NULL */ - error = unpack_pdb(e, &profile->xmatch, false, false, &info); - if (error) + error = unpack_pdb(e, &profile->attach.xmatch, false, false, &info); + if (error) { + info = "bad xmatch"; goto fail; + } /* neither xmatch_len not xmatch_perms are optional if xmatch is set */ - if (profile->xmatch.dfa) { + if (profile->attach.xmatch.dfa) { if (!unpack_u32(e, &tmp, NULL)) { info = "missing xmatch len"; goto fail; } - profile->xmatch_len = tmp; - profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START; - if (aa_compat_map_xmatch(&profile->xmatch)) { + profile->attach.xmatch_len = tmp; + profile->attach.xmatch.start[AA_CLASS_XMATCH] = DFA_START; + if (aa_compat_map_xmatch(&profile->attach.xmatch)) { info = "failed to convert xmatch permission table"; goto fail; } @@ -926,11 +931,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->path_flags = PATH_MEDIATE_DELETED; info = "failed to unpack profile capabilities"; - if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL)) + if (!unpack_u32(e, &(rules->caps.allow.cap[0]), NULL)) goto fail; - if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL)) + if (!unpack_u32(e, &(rules->caps.audit.cap[0]), NULL)) goto fail; - if (!unpack_u32(e, &(profile->caps.quiet.cap[0]), NULL)) + if (!unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL)) goto fail; if (!unpack_u32(e, &tmpcap.cap[0], NULL)) goto fail; @@ -938,11 +943,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to unpack upper profile capabilities"; if (unpack_nameX(e, AA_STRUCT, "caps64")) { /* optional upper half of 64 bit caps */ - if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL)) + if (!unpack_u32(e, &(rules->caps.allow.cap[1]), NULL)) goto fail; - if (!unpack_u32(e, &(profile->caps.audit.cap[1]), NULL)) + if (!unpack_u32(e, &(rules->caps.audit.cap[1]), NULL)) goto fail; - if (!unpack_u32(e, &(profile->caps.quiet.cap[1]), NULL)) + if (!unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL)) goto fail; if (!unpack_u32(e, &(tmpcap.cap[1]), NULL)) goto fail; @@ -953,9 +958,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to unpack extended profile capabilities"; if (unpack_nameX(e, AA_STRUCT, "capsx")) { /* optional extended caps mediation mask */ - if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL)) + if (!unpack_u32(e, &(rules->caps.extended.cap[0]), NULL)) goto fail; - if (!unpack_u32(e, &(profile->caps.extended.cap[1]), NULL)) + if (!unpack_u32(e, &(rules->caps.extended.cap[1]), NULL)) goto fail; if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; @@ -979,40 +984,41 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; - error = unpack_pdb(e, &profile->policy, true, false, &info); + error = unpack_pdb(e, &rules->policy, true, false, + &info); if (error) goto fail; /* Fixup: drop when we get rid of start array */ - if (aa_dfa_next(profile->policy.dfa, profile->policy.start[0], + if (aa_dfa_next(rules->policy.dfa, rules->policy.start[0], AA_CLASS_FILE)) - profile->policy.start[AA_CLASS_FILE] = - aa_dfa_next(profile->policy.dfa, - profile->policy.start[0], + rules->policy.start[AA_CLASS_FILE] = + aa_dfa_next(rules->policy.dfa, + rules->policy.start[0], AA_CLASS_FILE); if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - if (aa_compat_map_policy(&profile->policy, e->version)) { + if (aa_compat_map_policy(&rules->policy, e->version)) { info = "failed to remap policydb permission table"; goto fail; } } else - profile->policy.dfa = aa_get_dfa(nulldfa); + rules->policy.dfa = aa_get_dfa(nulldfa); /* get file rules */ - error = unpack_pdb(e, &profile->file, false, true, &info); + error = unpack_pdb(e, &rules->file, false, true, &info); if (error) { goto fail; - } else if (profile->file.dfa) { - if (aa_compat_map_file(&profile->file)) { + } else if (rules->file.dfa) { + if (aa_compat_map_file(&rules->file)) { info = "failed to remap file permission table"; goto fail; } - } else if (profile->policy.dfa && - profile->policy.start[AA_CLASS_FILE]) { - profile->file.dfa = aa_get_dfa(profile->policy.dfa); - profile->file.start[AA_CLASS_FILE] = profile->policy.start[AA_CLASS_FILE]; + } else if (rules->policy.dfa && + rules->policy.start[AA_CLASS_FILE]) { + rules->file.dfa = aa_get_dfa(rules->policy.dfa); + rules->file.start[AA_CLASS_FILE] = rules->policy.start[AA_CLASS_FILE]; } else - profile->file.dfa = aa_get_dfa(nulldfa); + rules->file.dfa = aa_get_dfa(nulldfa); if (unpack_nameX(e, AA_STRUCT, "data")) { info = "out of memory"; @@ -1202,28 +1208,28 @@ static bool verify_perms(struct aa_policydb *pdb) */ static int verify_profile(struct aa_profile *profile) { - if ((profile->file.dfa && - !verify_dfa_xindex(profile->file.dfa, - profile->file.trans.size)) || - (profile->policy.dfa && - !verify_dfa_xindex(profile->policy.dfa, - profile->policy.trans.size))) { + if ((profile->rules.file.dfa && + !verify_dfa_xindex(profile->rules.file.dfa, + profile->rules.file.trans.size)) || + (profile->rules.policy.dfa && + !verify_dfa_xindex(profile->rules.policy.dfa, + profile->rules.policy.trans.size))) { audit_iface(profile, NULL, NULL, "Unpack: Invalid named transition", NULL, -EPROTO); return -EPROTO; } - if (!verify_perms(&profile->file)) { + if (!verify_perms(&profile->rules.file)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; } - if (!verify_perms(&profile->policy)) { + if (!verify_perms(&profile->rules.policy)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; } - if (!verify_perms(&profile->xmatch)) { + if (!verify_perms(&profile->attach.xmatch)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index cc018469e22d..f28026804d13 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -82,10 +82,11 @@ int aa_map_resource(int resource) static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, struct rlimit *new_rlim) { + struct aa_ruleset *rules = &profile->rules; int e = 0; - if (profile->rlimits.mask & (1 << resource) && new_rlim->rlim_max > - profile->rlimits.limits[resource].rlim_max) + if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max > + rules->rlimits.limits[resource].rlim_max) e = -EACCES; return audit_resource(profile, resource, new_rlim->rlim_max, NULL, NULL, e); @@ -153,12 +154,12 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) * to the lesser of the tasks hard limit and the init tasks soft limit */ label_for_each_confined(i, old_l, old) { - if (old->rlimits.mask) { + if (old->rules.rlimits.mask) { int j; for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) { - if (old->rlimits.mask & mask) { + if (old->rules.rlimits.mask & mask) { rlim = current->signal->rlim + j; initrlim = init_task.signal->rlim + j; rlim->rlim_cur = min(rlim->rlim_max, @@ -172,15 +173,15 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) label_for_each_confined(i, new_l, new) { int j; - if (!new->rlimits.mask) + if (!new->rules.rlimits.mask) continue; for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) { - if (!(new->rlimits.mask & mask)) + if (!(new->rules.rlimits.mask & mask)) continue; rlim = current->signal->rlim + j; rlim->rlim_max = min(rlim->rlim_max, - new->rlimits.limits[j].rlim_max); + new->rules.rlimits.limits[j].rlim_max); /* soft limit should not exceed hard limit */ rlim->rlim_cur = min(rlim->rlim_cur, rlim->rlim_max); } diff --git a/security/apparmor/task.c b/security/apparmor/task.c index b19900f85c14..7e64fba42ca3 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -223,7 +223,7 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va) FLAGS_NONE, GFP_ATOMIC); } -/* assumes check for PROFILE_MEDIATES is already done */ +/* assumes check for RULE_MEDIATES is already done */ /* TODO: conditionals */ static int profile_ptrace_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, @@ -232,8 +232,8 @@ static int profile_ptrace_perm(struct aa_profile *profile, struct aa_perms perms = { }; aad(sa)->peer = peer; - aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request, - &perms); + aa_profile_match_label(profile, &profile->rules, peer, + AA_CLASS_PTRACE, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb); } @@ -243,7 +243,7 @@ static int profile_tracee_perm(struct aa_profile *tracee, struct common_audit_data *sa) { if (profile_unconfined(tracee) || unconfined(tracer) || - !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE)) + !RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) return 0; return profile_ptrace_perm(tracee, tracer, request, sa); @@ -256,7 +256,7 @@ static int profile_tracer_perm(struct aa_profile *tracer, if (profile_unconfined(tracer)) return 0; - if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE)) + if (RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) return profile_ptrace_perm(tracer, tracee, request, sa); /* profile uses the old style capability check for ptrace */ From 1ad22fcc4d0d2fb2e0f35aed555a86d016d5e590 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 5 Sep 2022 20:47:36 -0700 Subject: [PATCH 0038/4122] apparmor: rework profile->rules to be a list Convert profile->rules to a list as the next step towards supporting multiple rulesets in a profile. For this step only support a single list entry item. The logic for iterating the list will come as a separate step. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 3 ++- security/apparmor/capability.c | 6 +++-- security/apparmor/domain.c | 24 ++++++++++++------- security/apparmor/file.c | 6 +++-- security/apparmor/include/policy.h | 17 +++++++++++++- security/apparmor/ipc.c | 5 ++-- security/apparmor/lib.c | 6 +++-- security/apparmor/lsm.c | 7 ++++-- security/apparmor/mount.c | 13 +++++++---- security/apparmor/net.c | 6 +++-- security/apparmor/policy.c | 37 +++++++++++++++++++++++++++--- security/apparmor/policy_ns.c | 6 +++-- security/apparmor/policy_unpack.c | 34 ++++++++++++++------------- security/apparmor/resource.c | 19 ++++++++++----- security/apparmor/task.c | 10 ++++---- 15 files changed, 142 insertions(+), 57 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 84ef8b400b40..f6d83ffde3c4 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -611,7 +611,8 @@ static const struct file_operations aa_fs_ns_revision_fops = { static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, const char *match_str, size_t match_len) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms tmp = { }; aa_state_t state = DFA_NOMATCH; diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index b66ec63e2a48..326a51838ef2 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -64,7 +64,8 @@ static void audit_cb(struct audit_buffer *ab, void *va) static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, int cap, int error) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -115,7 +116,8 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, static int profile_capable(struct aa_profile *profile, int cap, unsigned int opts, struct common_audit_data *sa) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); int error; if (cap_raised(rules->caps.allow, cap) && diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index ad035d14cfc5..d4b09f061aee 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -81,7 +81,8 @@ static inline aa_state_t match_component(struct aa_profile *profile, struct aa_profile *tp, bool stack, aa_state_t state) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); const char *ns_name; if (stack) @@ -118,7 +119,8 @@ static int label_compound_match(struct aa_profile *profile, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_profile *tp; struct label_it i; struct path_cond cond = { }; @@ -179,7 +181,8 @@ static int label_components_match(struct aa_profile *profile, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_profile *tp; struct label_it i; struct aa_perms tmp; @@ -503,7 +506,8 @@ static const char *next_name(int xtype, const char *name) struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; @@ -553,7 +557,8 @@ static struct aa_label *x_to_label(struct aa_profile *profile, const char **lookupname, const char **info) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_label *new = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; @@ -620,7 +625,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_label *new = NULL; const char *info = NULL, *name = NULL, *target = NULL; aa_state_t state = rules->file.start[AA_CLASS_FILE]; @@ -719,7 +725,8 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); aa_state_t state = rules->file.start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; @@ -1259,7 +1266,8 @@ static int change_profile_perms_wrapper(const char *op, const char *name, struct aa_label *target, bool stack, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); const char *info = NULL; int error = 0; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index ef5d98f81a2b..d7f27848e7cc 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -224,7 +224,8 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); int e = 0; if (profile_unconfined(profile)) @@ -317,7 +318,8 @@ static int profile_path_link(struct aa_profile *profile, const struct path *target, char *buffer2, struct path_cond *cond) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); const char *lname, *tname = NULL; struct aa_perms lperms = {}, perms; const char *info = NULL; diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 9ee2c05e2895..5cadfb20df29 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -124,6 +124,7 @@ struct aa_data { }; /* struct aa_ruleset - data covering mediation rules + * @list: list the rule is on * @size: the memory consumed by this ruleset * @policy: general match rules governing policy * @file: The set of rules governing basic file access and domain transitions @@ -133,6 +134,8 @@ struct aa_data { * @secmark: secmark label match info */ struct aa_ruleset { + struct list_head list; + int size; /* TODO: merge policy and file */ @@ -147,6 +150,7 @@ struct aa_ruleset { }; /* struct aa_attachment - data and rules for a profiles attachment + * @list: * @xmatch_str: human readable attachment string * @xmatch: optional extended matching for unconfined executables names * @xmatch_len: xmatch prefix len, used to determine xmatch priority @@ -204,7 +208,7 @@ struct aa_profile { const char *disconnected; struct aa_attachment attach; - struct aa_ruleset rules; + struct list_head rules; struct aa_loaddata *rawdata; unsigned char *hash; @@ -227,6 +231,7 @@ void aa_add_profile(struct aa_policy *common, struct aa_profile *profile); void aa_free_proxy_kref(struct kref *kref); +struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp); struct aa_profile *aa_alloc_profile(const char *name, struct aa_proxy *proxy, gfp_t gfp); struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, @@ -285,6 +290,16 @@ static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF) return aa_dfa_match_len(rules->policy.dfa, state, (char *) &be_af, 2); } +static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, + unsigned char class) +{ + struct aa_ruleset *rule; + + /* TODO: change to list walk */ + rule = list_first_entry(head, typeof(*rule), list); + return RULE_MEDIATES(rule, class); +} + /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index dc2fa548312d..1d4099385bdf 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -78,12 +78,13 @@ static int profile_signal_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, struct common_audit_data *sa) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms perms; aa_state_t state; if (profile_unconfined(profile) || - !RULE_MEDIATES(rules, AA_CLASS_SIGNAL)) + !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL)) return 0; aad(sa)->peer = peer; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index ec73e51ca7e3..a630c951bb3b 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -351,14 +351,16 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, u32 request, int type, u32 *deny, struct common_audit_data *sa) { + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms perms; aad(sa)->label = &profile->label; aad(sa)->peer = &target->label; aad(sa)->request = request; - aa_profile_match_label(profile, &profile->rules, &target->label, type, - request, &perms); + aa_profile_match_label(profile, rules, &target->label, type, request, + &perms); aa_apply_modes_to_perms(profile, &perms); *deny |= request & perms.deny; return aa_check_perms(profile, &perms, request, sa, aa_audit_perms_cb); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 62f2ca32b959..a22e53e44123 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -163,12 +163,15 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, struct label_it i; label_for_each_confined(i, label, profile) { + struct aa_ruleset *rules; if (COMPLAIN_MODE(profile)) continue; + rules = list_first_entry(&profile->rules, + typeof(*rules), list); *effective = cap_intersect(*effective, - profile->rules.caps.allow); + rules->caps.allow); *permitted = cap_intersect(*permitted, - profile->rules.caps.allow); + rules->caps.allow); } } rcu_read_unlock(); diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index d4724bdcb07f..cdfa430ae216 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -303,7 +303,8 @@ static int match_mnt_path_str(struct aa_profile *profile, { struct aa_perms perms = { }; const char *mntpnt = NULL, *info = NULL; - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); int pos, error; AA_BUG(!profile); @@ -359,12 +360,14 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, bool binary) { const char *devname = NULL, *info = NULL; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); int error = -EACCES; AA_BUG(!profile); AA_BUG(devpath && !devbuffer); - if (!RULE_MEDIATES(&profile->rules, AA_CLASS_MOUNT)) + if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT)) return 0; if (devpath) { @@ -566,7 +569,8 @@ out: static int profile_umount(struct aa_profile *profile, const struct path *path, char *buffer) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms perms = { }; const char *name = NULL, *info = NULL; aa_state_t state; @@ -626,7 +630,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, const struct path *old_path, char *old_buffer) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); const char *old_name, *new_name = NULL, *info = NULL; const char *trans_name = NULL; struct aa_perms perms = { }; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index ae789ee834ad..788be1609a86 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -108,7 +108,8 @@ void audit_net_cb(struct audit_buffer *ab, void *va) int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, u32 request, u16 family, int type) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms perms = { }; aa_state_t state; __be16 buffer[2]; @@ -217,7 +218,8 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, { int i, ret; struct aa_perms perms = { }; - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); if (rules->secmark_count == 0) return 0; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 74c0a3b34e9b..6f4cc8bfe03d 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -217,6 +217,17 @@ static void free_ruleset(struct aa_ruleset *rules) kfree_sensitive(rules->secmark); } +struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) +{ + struct aa_ruleset *rules; + + rules = kzalloc(sizeof(*rules), gfp); + if (rules) + INIT_LIST_HEAD(&rules->list); + + return rules; +} + /** * aa_free_profile - free a profile * @profile: the profile to free (MAYBE NULL) @@ -229,6 +240,7 @@ static void free_ruleset(struct aa_ruleset *rules) */ void aa_free_profile(struct aa_profile *profile) { + struct aa_ruleset *rule, *tmp; struct rhashtable *rht; AA_DEBUG("%s(%p)\n", __func__, profile); @@ -244,7 +256,15 @@ void aa_free_profile(struct aa_profile *profile) kfree_sensitive(profile->rename); free_attachment(&profile->attach); - free_ruleset(&profile->rules); + + /* + * at this point there are no tasks that can have a reference + * to rules + */ + list_for_each_entry_safe(rule, tmp, &profile->rules, list) { + list_del_init(&rule->list); + free_ruleset(rule); + } kfree_sensitive(profile->dirname); if (profile->data) { @@ -272,6 +292,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, gfp_t gfp) { struct aa_profile *profile; + struct aa_ruleset *rules; /* freed by free_profile - usually through aa_put_profile */ profile = kzalloc(struct_size(profile, label.vec, 2), gfp); @@ -283,6 +304,14 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, if (!aa_label_init(&profile->label, 1, gfp)) goto fail; + INIT_LIST_HEAD(&profile->rules); + + /* allocate the first ruleset, but leave it empty */ + rules = aa_alloc_ruleset(gfp); + if (!rules) + goto fail; + list_add(&rules->list, &profile->rules); + /* update being set needed by fs interface */ if (!proxy) { proxy = aa_alloc_proxy(&profile->label, gfp); @@ -516,6 +545,7 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base, struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, const char *base, gfp_t gfp) { + struct aa_ruleset *rules; struct aa_profile *p, *profile; const char *bname; char *name = NULL; @@ -558,8 +588,9 @@ name: /* released on free_profile */ rcu_assign_pointer(profile->parent, aa_get_profile(parent)); profile->ns = aa_get_ns(parent->ns); - profile->rules.file.dfa = aa_get_dfa(nulldfa); - profile->rules.policy.dfa = aa_get_dfa(nulldfa); + rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules->file.dfa = aa_get_dfa(nulldfa); + rules->policy.dfa = aa_get_dfa(nulldfa); mutex_lock_nested(&profile->ns->lock, profile->ns->level); p = __find_child(&parent->base.profiles, bname); diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index cb10994cd3b6..121aa79bccaa 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -83,6 +83,7 @@ const char *aa_ns_name(struct aa_ns *curr, struct aa_ns *view, bool subns) static struct aa_profile *alloc_unconfined(const char *name) { struct aa_profile *profile; + struct aa_ruleset *rules; profile = aa_alloc_profile(name, NULL, GFP_KERNEL); if (!profile) @@ -91,8 +92,9 @@ static struct aa_profile *alloc_unconfined(const char *name) profile->label.flags |= FLAG_IX_ON_NAME_ERROR | FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED; profile->mode = APPARMOR_UNCONFINED; - profile->rules.file.dfa = aa_get_dfa(nulldfa); - profile->rules.policy.dfa = aa_get_dfa(nulldfa); + rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules->file.dfa = aa_get_dfa(nulldfa); + rules->policy.dfa = aa_get_dfa(nulldfa); return profile; } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index bbca7772dfa2..ac9955ef5d4a 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -577,9 +577,8 @@ fail: return false; } -static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) +static bool unpack_secmark(struct aa_ext *e, struct aa_ruleset *rules) { - struct aa_ruleset *rules = &profile->rules; void *pos = e->pos; u16 size; int i; @@ -624,7 +623,7 @@ fail: return false; } -static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) +static bool unpack_rlimits(struct aa_ext *e, struct aa_ruleset *rules) { void *pos = e->pos; @@ -635,7 +634,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) u32 tmp = 0; if (!unpack_u32(e, &tmp, NULL)) goto fail; - profile->rules.rlimits.mask = tmp; + rules->rlimits.mask = tmp; if (unpack_array(e, NULL, &size) != TRI_TRUE || size > RLIM_NLIMITS) @@ -645,7 +644,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) int a = aa_map_resource(i); if (!unpack_u64(e, &tmp2, NULL)) goto fail; - profile->rules.rlimits.limits[a].rlim_max = tmp2; + rules->rlimits.limits[a].rlim_max = tmp2; } if (!unpack_nameX(e, AA_ARRAYEND, NULL)) goto fail; @@ -852,7 +851,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile = aa_alloc_profile(name, NULL, GFP_KERNEL); if (!profile) return ERR_PTR(-ENOMEM); - rules = &profile->rules; + rules = list_first_entry(&profile->rules, typeof(*rules), list); /* profile renaming is optional */ (void) unpack_str(e, &profile->rename, "rename"); @@ -971,12 +970,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - if (!unpack_rlimits(e, profile)) { + if (!unpack_rlimits(e, rules)) { info = "failed to unpack profile rlimits"; goto fail; } - if (!unpack_secmark(e, profile)) { + if (!unpack_secmark(e, rules)) { info = "failed to unpack profile secmark rules"; goto fail; } @@ -1208,23 +1207,26 @@ static bool verify_perms(struct aa_policydb *pdb) */ static int verify_profile(struct aa_profile *profile) { - if ((profile->rules.file.dfa && - !verify_dfa_xindex(profile->rules.file.dfa, - profile->rules.file.trans.size)) || - (profile->rules.policy.dfa && - !verify_dfa_xindex(profile->rules.policy.dfa, - profile->rules.policy.trans.size))) { + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + if (!rules) + return 0; + + if ((rules->file.dfa && !verify_dfa_xindex(rules->file.dfa, + rules->file.trans.size)) || + (rules->policy.dfa && + !verify_dfa_xindex(rules->policy.dfa, rules->policy.trans.size))) { audit_iface(profile, NULL, NULL, "Unpack: Invalid named transition", NULL, -EPROTO); return -EPROTO; } - if (!verify_perms(&profile->rules.file)) { + if (!verify_perms(&rules->file)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; } - if (!verify_perms(&profile->rules.policy)) { + if (!verify_perms(&rules->policy)) { audit_iface(profile, NULL, NULL, "Unpack: Invalid perm index", NULL, -EPROTO); return -EPROTO; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index f28026804d13..ed543f4edfd9 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -82,7 +82,8 @@ int aa_map_resource(int resource) static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, struct rlimit *new_rlim) { - struct aa_ruleset *rules = &profile->rules; + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); int e = 0; if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max > @@ -154,12 +155,15 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) * to the lesser of the tasks hard limit and the init tasks soft limit */ label_for_each_confined(i, old_l, old) { - if (old->rules.rlimits.mask) { + struct aa_ruleset *rules = list_first_entry(&old->rules, + typeof(*rules), + list); + if (rules->rlimits.mask) { int j; for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) { - if (old->rules.rlimits.mask & mask) { + if (rules->rlimits.mask & mask) { rlim = current->signal->rlim + j; initrlim = init_task.signal->rlim + j; rlim->rlim_cur = min(rlim->rlim_max, @@ -171,17 +175,20 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) /* set any new hard limits as dictated by the new profile */ label_for_each_confined(i, new_l, new) { + struct aa_ruleset *rules = list_first_entry(&new->rules, + typeof(*rules), + list); int j; - if (!new->rules.rlimits.mask) + if (!rules->rlimits.mask) continue; for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) { - if (!(new->rules.rlimits.mask & mask)) + if (!(rules->rlimits.mask & mask)) continue; rlim = current->signal->rlim + j; rlim->rlim_max = min(rlim->rlim_max, - new->rules.rlimits.limits[j].rlim_max); + rules->rlimits.limits[j].rlim_max); /* soft limit should not exceed hard limit */ rlim->rlim_cur = min(rlim->rlim_cur, rlim->rlim_max); } diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 7e64fba42ca3..5000cbd055b6 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -229,11 +229,13 @@ static int profile_ptrace_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, struct common_audit_data *sa) { + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); struct aa_perms perms = { }; aad(sa)->peer = peer; - aa_profile_match_label(profile, &profile->rules, peer, - AA_CLASS_PTRACE, request, &perms); + aa_profile_match_label(profile, rules, peer, AA_CLASS_PTRACE, request, + &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb); } @@ -243,7 +245,7 @@ static int profile_tracee_perm(struct aa_profile *tracee, struct common_audit_data *sa) { if (profile_unconfined(tracee) || unconfined(tracer) || - !RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) + !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) return 0; return profile_ptrace_perm(tracee, tracer, request, sa); @@ -256,7 +258,7 @@ static int profile_tracer_perm(struct aa_profile *tracer, if (profile_unconfined(tracer)) return 0; - if (RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) + if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) return profile_ptrace_perm(tracer, tracee, request, sa); /* profile uses the old style capability check for ptrace */ From 961f3e3de14467f3babe252f7b6cc44a36ebba64 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 11 Sep 2022 22:05:26 -0700 Subject: [PATCH 0039/4122] apparmor: fix aa_class_names[] to match reserved classes The class name map did not have the reserved names added. Fix this Signed-off-by: John Johansen --- security/apparmor/audit.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index e638f7bc9f52..8dfdda98fbf1 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -48,13 +48,28 @@ static const char *const aa_class_names[] = { "unknown", "ptrace", "signal", - "unknown", + "xmatch", "unknown", "unknown", "net", "unknown", "label", + "posix_mqueue", + "io_uring", + "module", "lsm", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "unknown", + "X", + "dbus", }; From 1f939c6bd1512d0b39b470396740added3cb403f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 20 Sep 2022 04:01:28 -0700 Subject: [PATCH 0040/4122] apparmor: Fix regression in stacking due to label flags The unconfined label flag is not being computed correctly. It should only be set if all the profiles in the vector are set, which is different than what is required for the debug and stale flag that are set if any on the profile flags are set. Fixes: c1ed5da19765 ("apparmor: allow label to carry debug flags") Signed-off-by: John Johansen --- security/apparmor/label.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 98dadd960977..aa4031628af5 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -197,15 +197,18 @@ static bool vec_is_stale(struct aa_profile **vec, int n) return false; } -static long union_vec_flags(struct aa_profile **vec, int n, long mask) +static long accum_vec_flags(struct aa_profile **vec, int n) { - long u = 0; + long u = FLAG_UNCONFINED; int i; AA_BUG(!vec); for (i = 0; i < n; i++) { - u |= vec[i]->label.flags & mask; + u |= vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 | + FLAG_STALE); + if (!(u & vec[i]->label.flags & FLAG_UNCONFINED)) + u &= ~FLAG_UNCONFINED; } return u; @@ -1097,8 +1100,7 @@ static struct aa_label *label_merge_insert(struct aa_label *new, else if (k == b->size) return aa_get_label(b); } - new->flags |= union_vec_flags(new->vec, new->size, FLAG_UNCONFINED | - FLAG_DEBUG1 | FLAG_DEBUG2); + new->flags |= accum_vec_flags(new->vec, new->size); ls = labels_set(new); write_lock_irqsave(&ls->lock, flags); label = __label_insert(labels_set(new), new, false); From adaa9a3f72e6f98538bfac54f6dc4afc0537f410 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 23 Sep 2022 17:21:18 +0800 Subject: [PATCH 0041/4122] apparmor: Simplify obtain the newest label on a cred In aa_get_task_label(), aa_get_newest_cred_label(__task_cred(task)) can do the same things as aa_get_newest_label(__aa_task_raw_label(task)), so we can replace it and remove __aa_task_raw_label() to simplify the code. Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 13 ------------- security/apparmor/task.c | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 0b9ae4804ef7..58fdc72af664 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -63,19 +63,6 @@ static inline struct aa_label *aa_get_newest_cred_label(const struct cred *cred) return aa_get_newest_label(aa_cred_raw_label(cred)); } -/** - * __aa_task_raw_label - retrieve another task's label - * @task: task to query (NOT NULL) - * - * Returns: @task's label without incrementing its ref count - * - * If @task != current needs to be called in RCU safe critical section - */ -static inline struct aa_label *__aa_task_raw_label(struct task_struct *task) -{ - return aa_cred_raw_label(__task_cred(task)); -} - /** * aa_current_raw_label - find the current tasks confining label * diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 5000cbd055b6..84d16a29bfcb 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -31,7 +31,7 @@ struct aa_label *aa_get_task_label(struct task_struct *task) struct aa_label *p; rcu_read_lock(); - p = aa_get_newest_label(__aa_task_raw_label(task)); + p = aa_get_newest_cred_label(__task_cred(task)); rcu_read_unlock(); return p; From 65f7f666f21ce374628d58b3cc48515070f31e72 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 14 Sep 2022 15:46:07 +0800 Subject: [PATCH 0042/4122] apparmor: make __aa_path_perm() static Make __aa_path_perm() static as it's only used inside apparmor/file.c. Signed-off-by: Xiu Jianfeng Signed-off-by: John Johansen --- security/apparmor/file.c | 7 ++++--- security/apparmor/include/file.h | 3 --- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d7f27848e7cc..e7dc5ea38997 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -220,9 +220,10 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, return state; } -int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name, - u32 request, struct path_cond *cond, int flags, - struct aa_perms *perms) +static int __aa_path_perm(const char *op, struct aa_profile *profile, + const char *name, u32 request, + struct path_cond *cond, int flags, + struct aa_perms *perms) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 1a1c0f0c5071..5be620af33ba 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -119,9 +119,6 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); -int __aa_path_perm(const char *op, struct aa_profile *profile, - const char *name, u32 request, struct path_cond *cond, - int flags, struct aa_perms *perms); int aa_path_perm(const char *op, struct aa_label *label, const struct path *path, int flags, u32 request, struct path_cond *cond); From 1ddece8cd0f43582085497eacff2e3cd37f93d1f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 24 Sep 2022 22:25:25 -0700 Subject: [PATCH 0043/4122] apparmor: Fix doc comment for compute_fperms When compute_fperms was moved to policy_compat and made static it was renamed from aa_compute_fperms to just compute_fperms to help indicate it is only available statically. Unfortunately the doc comment did not also get updated to reflect the change. Reported-by: kernel test robot Signed-off-by: John Johansen --- security/apparmor/policy_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index 1aa5cced935e..9e52e218bf30 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -140,8 +140,8 @@ static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, } /** - * aa_compute_fperms - convert dfa compressed perms to internal perms and store - * them so they can be retrieved later. + * compute_fperms - convert dfa compressed perms to internal perms and store + * them so they can be retrieved later. * @dfa: a dfa using fperms to remap to internal permissions * * Returns: remapped perm table From 73c7e91c8bc98a5da94be62a9a4ba2793f86a97b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 24 Sep 2022 22:34:07 -0700 Subject: [PATCH 0044/4122] apparmor: Remove unnecessary size check when unpacking trans_table The index into the trans_table has a max size of 2^24 bits which the code was testing but this is unnecessary as unpack_array can only unpack a table of 2^16 bits in size so the table unpacked will never be larger than what can be indexed, and any test here is redundant. Reported-by: kernel test robot Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index ac9955ef5d4a..6deaeecb76fe 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -484,9 +484,13 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) u16 size; int i; - if (unpack_array(e, NULL, &size) != TRI_TRUE || - size > (1 << 24)) - /* currently 2^24 bits entries 0-3 */ + if (unpack_array(e, NULL, &size) != TRI_TRUE) + /* + * Note: index into trans table array is a max + * of 2^24, but unpack array can only unpack + * an array of 2^16 in size atm so no need + * for size check here + */ goto fail; table = kcalloc(size, sizeof(char *), GFP_KERNEL); if (!table) From 14d37a7f14569adbf7a019710762271fa2a9e739 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 25 Sep 2022 15:36:45 -0700 Subject: [PATCH 0045/4122] apparmor: make sure the decompression ctx is promperly initialized The decompress ctx was not properly initialized when reading raw profile data back to userspace. Reported-by: kernel test robot Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data") Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index f6d83ffde3c4..ddd64b8ebf05 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1327,7 +1327,11 @@ static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen) ret = -ENOMEM; goto cleanup; } - + ctx = zstd_init_dctx(wksp, wksp_len); + if (ctx == NULL) { + ret = -ENOMEM; + goto cleanup; + } out_len = zstd_decompress_dctx(ctx, dst, dlen, src, slen); if (zstd_is_error(out_len)) { ret = -EINVAL; From 70f24a9f9084b7fffd95daa707cce8e339b189dd Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 29 Sep 2022 06:24:29 -0700 Subject: [PATCH 0046/4122] apparmor: Fix undefined references to zstd_ symbols Unfortunately the switch to using zstd compression did not properly ifdef all the code that uses zstd_ symbols. So that if exporting of binary policy is disabled in the config the compile will fail with the following errors security/apparmor/lsm.c:1545: undefined reference to `zstd_min_clevel' aarch64-linux-ld: security/apparmor/lsm.c:1545: undefined reference to `zstd_max_clevel' Reported-by: kernel test robot Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data") Signed-off-by: John Johansen Acked-by: Jon Tourville --- security/apparmor/apparmorfs.c | 4 ++-- security/apparmor/include/apparmor.h | 11 +++++++++++ security/apparmor/lsm.c | 5 ++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index ddd64b8ebf05..2c138309ad66 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1202,13 +1202,13 @@ static int seq_ns_name_show(struct seq_file *seq, void *v) static int seq_ns_compress_min_show(struct seq_file *seq, void *v) { - seq_printf(seq, "%d\n", zstd_min_clevel()); + seq_printf(seq, "%d\n", AA_MIN_CLEVEL); return 0; } static int seq_ns_compress_max_show(struct seq_file *seq, void *v) { - seq_printf(seq, "%d\n", zstd_max_clevel()); + seq_printf(seq, "%d\n", AA_MAX_CLEVEL); return 0; } diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 6d9ca075fcb9..8a81557c9d59 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -51,4 +51,15 @@ extern bool aa_g_logsyscall; extern bool aa_g_paranoid_load; extern unsigned int aa_g_path_max; +#ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY +#define AA_MIN_CLEVEL zstd_min_clevel() +#define AA_MAX_CLEVEL zstd_max_clevel() +#define AA_DEFAULT_CLEVEL ZSTD_CLEVEL_DEFAULT +#else +#define AA_MIN_CLEVEL 0 +#define AA_MAX_CLEVEL 0 +#define AA_DEFAULT_CLEVEL 0 +#endif /* CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */ + + #endif /* __APPARMOR_H */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index a22e53e44123..8e2b951c4988 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1365,7 +1365,7 @@ module_param_named(export_binary, aa_g_export_binary, aabool, 0600); #endif /* policy loaddata compression level */ -int aa_g_rawdata_compression_level = ZSTD_CLEVEL_DEFAULT; +int aa_g_rawdata_compression_level = AA_DEFAULT_CLEVEL; module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level, aacompressionlevel, 0400); @@ -1547,8 +1547,7 @@ static int param_set_aacompressionlevel(const char *val, error = param_set_int(val, kp); aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level, - zstd_min_clevel(), - zstd_max_clevel()); + AA_MIN_CLEVEL, AA_MAX_CLEVEL); pr_info("AppArmor: policy rawdata compression level set to %d\n", aa_g_rawdata_compression_level); From a2f31df06b7aa1769f12ec6f9ae7f18e78582cad Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 29 Sep 2022 06:48:10 -0700 Subject: [PATCH 0047/4122] apparmor: Fix decompression of rawdata for read back to userspace The rawdata readback has a few of problems. First if compression is enabled when the data is read then the compressed data is read out instead decompressing the data. Second if compression of the data fails, the code does not handle holding onto the raw_data in uncompressed form. Third if the compression is enabled/disabled after the rawdata was loaded, the check against the global control of whether to use compression does not reflect what was already done to the data. Fix these by always storing the compressed size, along with the original data size even if compression fails or is not used. And use this to detect whether the rawdata is actually compressed. Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data") Signed-off-by: John Johansen Acked-by: Jon Tourville --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/policy_unpack.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2c138309ad66..424b2c1e586d 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1315,7 +1315,7 @@ SEQ_RAWDATA_FOPS(compressed_size); static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen) { #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY - if (aa_g_rawdata_compression_level == 0) { + if (slen < dlen) { const size_t wksp_len = zstd_dctx_workspace_bound(); zstd_dctx *ctx; void *wksp; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 6deaeecb76fe..45c9dfdc8e0d 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1294,7 +1294,7 @@ static int compress_zstd(const char *src, size_t slen, char **dst, size_t *dlen) } out_len = zstd_compress_cctx(ctx, out, out_len, src, slen, ¶ms); - if (zstd_is_error(out_len)) { + if (zstd_is_error(out_len) || out_len >= slen) { ret = -EINVAL; goto cleanup; } @@ -1348,9 +1348,10 @@ static int compress_loaddata(struct aa_loaddata *data) void *udata = data->data; int error = compress_zstd(udata, data->size, &data->data, &data->compressed_size); - if (error) + if (error) { + data->compressed_size = data->size; return error; - + } if (udata != data->data) kvfree(udata); } else From 32490541682bf8ea445e9bd29c866981851e0912 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 3 Oct 2022 01:30:38 -0700 Subject: [PATCH 0048/4122] apparmor: Fix kunit test for out of bounds array The apparmor kunit tests are failing on the out of bounds array check with the following failure # policy_unpack_test_unpack_array_out_of_bounds: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:178 Expected unpack_array(puf->e, name, &array_size) == 1, but unpack_array(puf->e, name, &array_size) == -1 # policy_unpack_test_unpack_array_out_of_bounds: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:180 Expected array_size == 0, but array_size == 64192 not ok 5 - policy_unpack_test_unpack_array_out_of_bounds This is because unpack_array changed to allow distinguishing between the array not being present and an error. In the error case the array size is not set and should not be tested. Reported-by: kernel test robot Fixes: 995a5b64620e ("apparmor: make unpack_array return a trianary value") Signed-off-by: John Johansen --- security/apparmor/policy_unpack_test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index 1a43d538c4c0..b214f6ea8a72 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -176,8 +176,7 @@ static void policy_unpack_test_unpack_array_out_of_bounds(struct kunit *test) puf->e->end = puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16); KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size), - TRI_TRUE); - KUNIT_EXPECT_EQ(test, array_size, 0); + TRI_FALSE); KUNIT_EXPECT_PTR_EQ(test, puf->e->pos, puf->e->start + TEST_NAMED_ARRAY_BUF_OFFSET); } From 5515a8e30eaa8ae0d57ec59c908716cf2af114ae Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 4 Oct 2022 13:45:15 +0500 Subject: [PATCH 0049/4122] apparmor: store return value of unpack_perms_table() to signed variable The unpack_perms_table() can return error which is negative value. Store the return value to a signed variable. policy->size is unsigned variable. It shouldn't be used to store the return status. Fixes: 2d6b2dea7f3c ("apparmor: add the ability for policy to specify a permission table") Signed-off-by: Muhammad Usama Anjum Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 45c9dfdc8e0d..09f316943951 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -734,14 +734,18 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy, { void *pos = e->pos; int i, flags, error = -EPROTO; + ssize_t size; - policy->size = unpack_perms_table(e, &policy->perms); - if (policy->size < 0) { - error = policy->size; + size = unpack_perms_table(e, &policy->perms); + if (size < 0) { + error = size; policy->perms = NULL; *info = "failed to unpack - perms"; goto fail; - } else if (policy->perms) { + } + policy->size = size; + + if (policy->perms) { /* perms table present accept is index */ flags = TO_ACCEPT1_FLAG(YYTD_DATA32); } else { From ee21a175ecfa821b74822881d354c7f848930738 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 10 Oct 2022 11:18:50 -0700 Subject: [PATCH 0050/4122] apparmor: fix uninitialize table variable in error in unpack_trans_table The error path has one case where *table is uninitialized, initialize it. Fixes: a0792e2ceddc ("apparmor: make transition table unpack generic so it can be reused") Reported-by: kernel test robot Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 09f316943951..3b956b1235f3 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -477,7 +477,7 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e, int flags) static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs) { void *saved_pos = e->pos; - char **table; + char **table = NULL; /* exec table is optional */ if (unpack_nameX(e, AA_STRUCT, "xtable")) { From 53991aedcd34760be23f1b0ef312e39b6add84af Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 10 Oct 2022 12:15:10 -0700 Subject: [PATCH 0051/4122] apparmor: Fix unpack_profile() warn: passing zero to 'ERR_PTR' unpack_profile() sets a default error on entry but this gets overridden by error assignment by functions called in its body. If an error check that was relying on the default value is triggered after one of these error assignments then zero will be passed to ERR_PTR. Fix this by setting up a default -EPROTO assignment in the error path and while we are at it make sure the correct error is returned in non-default cases. Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments") Reported-by: kernel test robot Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 3b956b1235f3..2e028d540c6b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -851,6 +851,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) *ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL); if (!*ns_name) { info = "out of memory"; + error = -ENOMEM; goto fail; } name = tmpname; @@ -882,7 +883,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } profile->attach.xmatch_len = tmp; profile->attach.xmatch.start[AA_CLASS_XMATCH] = DFA_START; - if (aa_compat_map_xmatch(&profile->attach.xmatch)) { + error = aa_compat_map_xmatch(&profile->attach.xmatch); + if (error) { info = "failed to convert xmatch permission table"; goto fail; } @@ -1004,7 +1006,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) AA_CLASS_FILE); if (!unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - if (aa_compat_map_policy(&rules->policy, e->version)) { + error = aa_compat_map_policy(&rules->policy, e->version); + if (error) { info = "failed to remap policydb permission table"; goto fail; } @@ -1016,7 +1019,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (error) { goto fail; } else if (rules->file.dfa) { - if (aa_compat_map_file(&rules->file)) { + error = aa_compat_map_file(&rules->file); + if (error) { info = "failed to remap file permission table"; goto fail; } @@ -1027,12 +1031,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else rules->file.dfa = aa_get_dfa(nulldfa); + error = -EPROTO; if (unpack_nameX(e, AA_STRUCT, "data")) { info = "out of memory"; profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL); - if (!profile->data) + if (!profile->data) { + error = -ENOMEM; goto fail; - + } params.nelem_hint = 3; params.key_len = sizeof(void *); params.key_offset = offsetof(struct aa_data, key); @@ -1049,6 +1055,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) { kfree_sensitive(key); + error = -ENOMEM; goto fail; } @@ -1058,6 +1065,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (data->size && !data->data) { kfree_sensitive(data->key); kfree_sensitive(data); + error = -ENOMEM; goto fail; } @@ -1079,6 +1087,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) return profile; fail: + if (error == 0) + /* default error covers most cases */ + error = -EPROTO; if (profile) name = NULL; else if (!name) From f37722ac71cc8b5ab86f4b3c4d9b9388e1315e8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:25:06 +0300 Subject: [PATCH 0052/4122] phy: stm32: fix an error code in probe If "index > usbphyc->nphys" is true then this returns success but it should return -EINVAL. Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)") Signed-off-by: Dan Carpenter Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index a98c911cc37a..5bb9647b078f 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -710,6 +710,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) ret = of_property_read_u32(child, "reg", &index); if (ret || index > usbphyc->nphys) { dev_err(&phy->dev, "invalid reg property: %d\n", ret); + if (!ret) + ret = -EINVAL; goto put_child; } From cbdbe312c9b6f9dbf698c3db1a5bec4140fe1c21 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 15 Oct 2022 13:11:22 -0700 Subject: [PATCH 0053/4122] dt-bindings: phy-j721e-wiz: add j784s4 compatible string Add ti,j784s4-wiz-10g compatible string to binding documentation. Signed-off-by: Matt Ranostay Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221015201123.195477-2-mranostay@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index 2225925b6dad..a9e38739c010 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -17,6 +17,7 @@ properties: - ti,j721e-wiz-10g - ti,am64-wiz-10g - ti,j7200-wiz-10g + - ti,j784s4-wiz-10g power-domains: maxItems: 1 From e27ecef8a8ccc13c54df54f5d100aa608de4c306 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 15 Oct 2022 13:11:23 -0700 Subject: [PATCH 0054/4122] phy: ti: phy-j721e-wiz: add j784s4-wiz-10g module support Add support for j784s4-wiz-10g device which has two core reference clocks (e.g core_ref_clk, core_ref1_clk) which requires an additional mux selection option. Acked-by: Roger Quadros Signed-off-by: Matt Ranostay Link: https://lore.kernel.org/r/20221015201123.195477-3-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-j721e-wiz.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 41725c6bcdf6..141b51af4427 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -81,14 +81,20 @@ static const struct reg_field phy_reset_n = REG_FIELD(WIZ_SERDES_RST, 31, 31); static const struct reg_field phy_en_refclk = REG_FIELD(WIZ_SERDES_RST, 30, 30); static const struct reg_field pll1_refclk_mux_sel = REG_FIELD(WIZ_SERDES_RST, 29, 29); +static const struct reg_field pll1_refclk_mux_sel_2 = + REG_FIELD(WIZ_SERDES_RST, 22, 23); static const struct reg_field pll0_refclk_mux_sel = REG_FIELD(WIZ_SERDES_RST, 28, 28); +static const struct reg_field pll0_refclk_mux_sel_2 = + REG_FIELD(WIZ_SERDES_RST, 28, 29); static const struct reg_field refclk_dig_sel_16g = REG_FIELD(WIZ_SERDES_RST, 24, 25); static const struct reg_field refclk_dig_sel_10g = REG_FIELD(WIZ_SERDES_RST, 24, 24); static const struct reg_field pma_cmn_refclk_int_mode = REG_FIELD(WIZ_SERDES_TOP_CTRL, 28, 29); +static const struct reg_field pma_cmn_refclk1_int_mode = + REG_FIELD(WIZ_SERDES_TOP_CTRL, 20, 21); static const struct reg_field pma_cmn_refclk_mode = REG_FIELD(WIZ_SERDES_TOP_CTRL, 30, 31); static const struct reg_field pma_cmn_refclk_dig_div = @@ -315,6 +321,7 @@ enum wiz_type { J721E_WIZ_10G, /* Also for J7200 SR1.0 */ AM64_WIZ_10G, J7200_WIZ_10G, /* J7200 SR2.0 */ + J784S4_WIZ_10G, }; struct wiz_data { @@ -992,6 +999,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node) switch (wiz->type) { case AM64_WIZ_10G: case J7200_WIZ_10G: + case J784S4_WIZ_10G: of_clk_del_provider(dev->of_node); return; default: @@ -1123,6 +1131,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node) switch (wiz->type) { case AM64_WIZ_10G: case J7200_WIZ_10G: + case J784S4_WIZ_10G: ret = wiz_clock_register(wiz); if (ret) dev_err(dev, "Failed to register wiz clocks\n"); @@ -1299,6 +1308,16 @@ static struct wiz_data j7200_pg2_10g_data = { .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, }; +static struct wiz_data j784s4_10g_data = { + .type = J784S4_WIZ_10G, + .pll0_refclk_mux_sel = &pll0_refclk_mux_sel_2, + .pll1_refclk_mux_sel = &pll1_refclk_mux_sel_2, + .refclk_dig_sel = &refclk_dig_sel_16g, + .pma_cmn_refclk1_int_mode = &pma_cmn_refclk1_int_mode, + .clk_mux_sel = clk_mux_sel_10g_2_refclk, + .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, +}; + static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data, @@ -1312,6 +1331,9 @@ static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j7200-wiz-10g", .data = &j7200_pg2_10g_data, }, + { + .compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data, + }, {} }; MODULE_DEVICE_TABLE(of, wiz_id_table); From 25caed3dcadacd0443dce4fb820e4a33029bba40 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:46:59 +0800 Subject: [PATCH 0055/4122] dt-binding: phy: Add i.MX8MP PCIe PHY binding Add i.MX8MP PCIe PHY binding. On i.MX8MM, the initialized default value of PERST bit(BIT3) of SRC_PCIEPHY_RCR is 1b'1. But i.MX8MP has one inversed default value 1b'0 of PERST bit. And the PERST bit should be kept 1b'1 after power and clocks are stable. So add one more PERST explicitly for i.MX8MP PCIe PHY. Signed-off-by: Richard Zhu Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1665625622-20551-2-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- .../bindings/phy/fsl,imx8-pcie-phy.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml index 0af765ba2793..182a219387b0 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - fsl,imx8mm-pcie-phy + - fsl,imx8mp-pcie-phy reg: maxItems: 1 @@ -28,11 +29,16 @@ properties: - const: ref resets: - maxItems: 1 + minItems: 1 + maxItems: 2 reset-names: - items: - - const: pciephy + oneOf: + - items: # for iMX8MM + - const: pciephy + - items: # for IMX8MP + - const: pciephy + - const: perst fsl,refclk-pad-mode: description: | @@ -60,6 +66,10 @@ properties: description: A boolean property indicating the CLKREQ# signal is not supported in the board design (optional) + power-domains: + description: PCIe PHY power domain (optional). + maxItems: 1 + required: - "#phy-cells" - compatible From e9e7dca53bf5a5bddf70c87157660a29cdcdd2d8 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:00 +0800 Subject: [PATCH 0056/4122] phy: freescale: imx8m-pcie: Refine register definitions No function changes, refine PHY register definitions. - Keep align with other CMN PHY registers, refine the definitions of PHY_CMN_REG75. - Remove two BIT definitions that are not used at all. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Link: https://lore.kernel.org/r/1665625622-20551-3-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index c93286483b42..3c8c255499cd 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -31,12 +31,10 @@ #define IMX8MM_PCIE_PHY_CMN_REG065 0x194 #define ANA_AUX_RX_TERM (BIT(7) | BIT(4)) #define ANA_AUX_TX_LVL GENMASK(3, 0) -#define IMX8MM_PCIE_PHY_CMN_REG75 0x1D4 -#define PCIE_PHY_CMN_REG75_PLL_DONE 0x3 +#define IMX8MM_PCIE_PHY_CMN_REG075 0x1D4 +#define ANA_PLL_DONE 0x3 #define PCIE_PHY_TRSV_REG5 0x414 -#define PCIE_PHY_TRSV_REG5_GEN1_DEEMP 0x2D #define PCIE_PHY_TRSV_REG6 0x418 -#define PCIE_PHY_TRSV_REG6_GEN2_DEEMP 0xF #define IMX8MM_GPR_PCIE_REF_CLK_SEL GENMASK(25, 24) #define IMX8MM_GPR_PCIE_REF_CLK_PLL FIELD_PREP(IMX8MM_GPR_PCIE_REF_CLK_SEL, 0x3) @@ -131,9 +129,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_deassert(imx8_phy->reset); /* Polling to check the phy is ready or not. */ - ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75, - val, val == PCIE_PHY_CMN_REG75_PLL_DONE, - 10, 20000); + ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075, + val, val == ANA_PLL_DONE, 10, 20000); return ret; } From ca679c49c4463595499a053ba94328acb574fffa Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:01 +0800 Subject: [PATCH 0057/4122] phy: freescale: imx8m-pcie: Refine i.MX8MM PCIe PHY driver To make it more flexible and easy to expand. Refine i.MX8MM PCIe PHY driver. - Use gpr compatible string to avoid the codes duplications when add another platform PCIe PHY support. - Re-arrange the codes to let it more flexible and easy to expand. No functional change. Re-arrange the TX tuning, since internal registers can be wrote through APB interface before assertion of CMN_RST. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-4-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 106 +++++++++++++-------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 3c8c255499cd..3e494612db3c 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,15 @@ #define IMX8MM_GPR_PCIE_SSC_EN BIT(16) #define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE BIT(9) +enum imx8_pcie_phy_type { + IMX8MM, +}; + +struct imx8_pcie_phy_drvdata { + const char *gpr; + enum imx8_pcie_phy_type variant; +}; + struct imx8_pcie_phy { void __iomem *base; struct clk *clk; @@ -55,6 +65,7 @@ struct imx8_pcie_phy { u32 tx_deemph_gen1; u32 tx_deemph_gen2; bool clkreq_unused; + const struct imx8_pcie_phy_drvdata *drvdata; }; static int imx8_pcie_phy_power_on(struct phy *phy) @@ -66,31 +77,17 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_assert(imx8_phy->reset); pad_mode = imx8_phy->refclk_pad_mode; - /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, - imx8_phy->clkreq_unused ? - 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN, - IMX8MM_GPR_PCIE_AUX_EN); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_POWER_OFF, 0); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_SSC_EN, 0); - - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_REF_CLK_SEL, - pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? - IMX8MM_GPR_PCIE_REF_CLK_EXT : - IMX8MM_GPR_PCIE_REF_CLK_PLL); - usleep_range(100, 200); - - /* Do the PHY common block reset */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_CMN_RST, - IMX8MM_GPR_PCIE_CMN_RST); - usleep_range(200, 500); + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ + if (imx8_phy->tx_deemph_gen1) + writel(imx8_phy->tx_deemph_gen1, + imx8_phy->base + PCIE_PHY_TRSV_REG5); + if (imx8_phy->tx_deemph_gen2) + writel(imx8_phy->tx_deemph_gen2, + imx8_phy->base + PCIE_PHY_TRSV_REG6); + break; + } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || pad_mode == IMX8_PCIE_REFCLK_PAD_UNUSED) { @@ -118,15 +115,37 @@ static int imx8_pcie_phy_power_on(struct phy *phy) imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065); } - /* Tune PHY de-emphasis setting to pass PCIe compliance. */ - if (imx8_phy->tx_deemph_gen1) - writel(imx8_phy->tx_deemph_gen1, - imx8_phy->base + PCIE_PHY_TRSV_REG5); - if (imx8_phy->tx_deemph_gen2) - writel(imx8_phy->tx_deemph_gen2, - imx8_phy->base + PCIE_PHY_TRSV_REG6); + /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, + imx8_phy->clkreq_unused ? + 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN, + IMX8MM_GPR_PCIE_AUX_EN); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_POWER_OFF, 0); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_SSC_EN, 0); - reset_control_deassert(imx8_phy->reset); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_REF_CLK_SEL, + pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? + IMX8MM_GPR_PCIE_REF_CLK_EXT : + IMX8MM_GPR_PCIE_REF_CLK_PLL); + usleep_range(100, 200); + + /* Do the PHY common block reset */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_CMN_RST, + IMX8MM_GPR_PCIE_CMN_RST); + + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + reset_control_deassert(imx8_phy->reset); + usleep_range(200, 500); + break; + } /* Polling to check the phy is ready or not. */ ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075, @@ -157,6 +176,17 @@ static const struct phy_ops imx8_pcie_phy_ops = { .owner = THIS_MODULE, }; +static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { + .gpr = "fsl,imx8mm-iomuxc-gpr", + .variant = IMX8MM, +}; + +static const struct of_device_id imx8_pcie_phy_of_match[] = { + {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + { }, +}; +MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); + static int imx8_pcie_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -169,6 +199,8 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) if (!imx8_phy) return -ENOMEM; + imx8_phy->drvdata = of_device_get_match_data(dev); + /* get PHY refclk pad mode */ of_property_read_u32(np, "fsl,refclk-pad-mode", &imx8_phy->refclk_pad_mode); @@ -194,7 +226,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) /* Grab GPR config register range */ imx8_phy->iomuxc_gpr = - syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr); if (IS_ERR(imx8_phy->iomuxc_gpr)) { dev_err(dev, "unable to find iomuxc registers\n"); return PTR_ERR(imx8_phy->iomuxc_gpr); @@ -222,12 +254,6 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } -static const struct of_device_id imx8_pcie_phy_of_match[] = { - {.compatible = "fsl,imx8mm-pcie-phy",}, - { }, -}; -MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); - static struct platform_driver imx8_pcie_phy_driver = { .probe = imx8_pcie_phy_probe, .driver = { From dce9edff16ee8df20e791e82e0704c4667cc3908 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:02 +0800 Subject: [PATCH 0058/4122] phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY support Add i.MX8MP PCIe PHY support. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-5-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 25 ++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 3e494612db3c..7585e8080b77 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -48,6 +48,7 @@ enum imx8_pcie_phy_type { IMX8MM, + IMX8MP, }; struct imx8_pcie_phy_drvdata { @@ -60,6 +61,7 @@ struct imx8_pcie_phy { struct clk *clk; struct phy *phy; struct regmap *iomuxc_gpr; + struct reset_control *perst; struct reset_control *reset; u32 refclk_pad_mode; u32 tx_deemph_gen1; @@ -74,11 +76,11 @@ static int imx8_pcie_phy_power_on(struct phy *phy) u32 val, pad_mode; struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy); - reset_control_assert(imx8_phy->reset); - pad_mode = imx8_phy->refclk_pad_mode; switch (imx8_phy->drvdata->variant) { case IMX8MM: + reset_control_assert(imx8_phy->reset); + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ if (imx8_phy->tx_deemph_gen1) writel(imx8_phy->tx_deemph_gen1, @@ -87,6 +89,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy) writel(imx8_phy->tx_deemph_gen2, imx8_phy->base + PCIE_PHY_TRSV_REG6); break; + case IMX8MP: /* Do nothing. */ + break; } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || @@ -141,6 +145,9 @@ static int imx8_pcie_phy_power_on(struct phy *phy) IMX8MM_GPR_PCIE_CMN_RST); switch (imx8_phy->drvdata->variant) { + case IMX8MP: + reset_control_deassert(imx8_phy->perst); + fallthrough; case IMX8MM: reset_control_deassert(imx8_phy->reset); usleep_range(200, 500); @@ -181,8 +188,14 @@ static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { .variant = IMX8MM, }; +static const struct imx8_pcie_phy_drvdata imx8mp_drvdata = { + .gpr = "fsl,imx8mp-iomuxc-gpr", + .variant = IMX8MP, +}; + static const struct of_device_id imx8_pcie_phy_of_match[] = { {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + {.compatible = "fsl,imx8mp-pcie-phy", .data = &imx8mp_drvdata, }, { }, }; MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); @@ -238,6 +251,14 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR(imx8_phy->reset); } + if (imx8_phy->drvdata->variant == IMX8MP) { + imx8_phy->perst = + devm_reset_control_get_exclusive(dev, "perst"); + if (IS_ERR(imx8_phy->perst)) + dev_err_probe(dev, PTR_ERR(imx8_phy->perst), + "Failed to get PCIE PHY PERST control\n"); + } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); imx8_phy->base = devm_ioremap_resource(dev, res); if (IS_ERR(imx8_phy->base)) From b01d622d76134e9401970ffd3fbbb9a7051f976a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 20 Sep 2022 14:11:54 +0200 Subject: [PATCH 0059/4122] phy: marvell: phy-mvebu-a3700-comphy: Reset COMPHY registers before USB 3.0 power on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turris MOX board with older ARM Trusted Firmware version v1.5 is not able to detect any USB 3.0 device connected to USB-A port on Mox-A module after commit 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset support"). On the other hand USB 2.0 devices connected to the same USB-A port are working fine. It looks as if the older firmware configures COMPHY registers for USB 3.0 somehow incompatibly for kernel driver. Experiments show that resetting COMPHY registers via setting SFT_RST auto-clearing bit in COMPHY_SFT_RESET register fixes this issue. Reset the COMPHY in mvebu_a3700_comphy_usb3_power_on() function as a first step after selecting COMPHY lane and USB 3.0 function. With this change Turris MOX board can successfully detect USB 3.0 devices again. Before the above mentioned commit this reset was implemented in PHY reset method, so this is the reason why there was no issue with older firmware version then. Fixes: 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset support") Reported-by: Marek Behún Signed-off-by: Pali Rohár Tested-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20220920121154.30115-1-pali@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 67712c77d806..d641b345afa3 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -826,6 +826,9 @@ mvebu_a3700_comphy_usb3_power_on(struct mvebu_a3700_comphy_lane *lane) if (ret) return ret; + /* COMPHY register reset (cleared automatically) */ + comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST); + /* * 0. Set PHY OTG Control(0x5d034), bit 4, Power up OTG module The * register belong to UTMI module, so it is set in UTMI phy driver. From 2566ad8ec418934c213cb50fd2084ffd896a2fea Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:02 +0300 Subject: [PATCH 0060/4122] phy: qcom-qmp-pcie: split register tables into common and extra parts SM8250 configuration tables are split into two parts: the common one and the PHY-specific tables. Make this split more formal. Rather than having a blind renamed copy of all QMP table fields, add separate struct qmp_phy_cfg_tables and add two instances of this structure to the struct qmp_phy_cfg. Later on this will be used to support different PHY modes (RC vs EP). Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-2-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 406 +++++++++++++---------- 1 file changed, 222 insertions(+), 184 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 5be5348fbb26..ae0d7b49dfa3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1300,31 +1300,30 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), }; +struct qmp_phy_cfg_tables { + const struct qmp_phy_init_tbl *serdes; + int serdes_num; + const struct qmp_phy_init_tbl *tx; + int tx_num; + const struct qmp_phy_init_tbl *rx; + int rx_num; + const struct qmp_phy_init_tbl *pcs; + int pcs_num; + const struct qmp_phy_init_tbl *pcs_misc; + int pcs_misc_num; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; - /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ - const struct qmp_phy_init_tbl *serdes_tbl; - int serdes_tbl_num; - const struct qmp_phy_init_tbl *serdes_tbl_sec; - int serdes_tbl_num_sec; - const struct qmp_phy_init_tbl *tx_tbl; - int tx_tbl_num; - const struct qmp_phy_init_tbl *tx_tbl_sec; - int tx_tbl_num_sec; - const struct qmp_phy_init_tbl *rx_tbl; - int rx_tbl_num; - const struct qmp_phy_init_tbl *rx_tbl_sec; - int rx_tbl_num_sec; - const struct qmp_phy_init_tbl *pcs_tbl; - int pcs_tbl_num; - const struct qmp_phy_init_tbl *pcs_tbl_sec; - int pcs_tbl_num_sec; - const struct qmp_phy_init_tbl *pcs_misc_tbl; - int pcs_misc_tbl_num; - const struct qmp_phy_init_tbl *pcs_misc_tbl_sec; - int pcs_misc_tbl_num_sec; + /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ + const struct qmp_phy_cfg_tables tables; + /* + * Additional init sequence for PHY blocks, providing additional + * register programming. Unless required it can be left omitted. + */ + const struct qmp_phy_cfg_tables *tables_rc; /* clock ids to be requested */ const char * const *clk_list; @@ -1459,14 +1458,16 @@ static const char * const sdm845_pciephy_reset_l[] = { static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, - .serdes_tbl = ipq8074_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), - .tx_tbl = ipq8074_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq8074_pcie_tx_tbl), - .rx_tbl = ipq8074_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq8074_pcie_rx_tbl), - .pcs_tbl = ipq8074_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq8074_pcie_pcs_tbl), + .tables = { + .serdes = ipq8074_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), + .tx = ipq8074_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(ipq8074_pcie_tx_tbl), + .rx = ipq8074_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(ipq8074_pcie_rx_tbl), + .pcs = ipq8074_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq8074_pcie_pcs_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1487,14 +1488,16 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .lanes = 1, - .serdes_tbl = ipq8074_pcie_gen3_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), - .tx_tbl = ipq8074_pcie_gen3_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl), - .rx_tbl = ipq8074_pcie_gen3_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), - .pcs_tbl = ipq8074_pcie_gen3_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + .tables = { + .serdes = ipq8074_pcie_gen3_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), + .tx = ipq8074_pcie_gen3_tx_tbl, + .tx_num = ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl), + .rx = ipq8074_pcie_gen3_rx_tbl, + .rx_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), + .pcs = ipq8074_pcie_gen3_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1516,16 +1519,18 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .lanes = 1, - .serdes_tbl = ipq6018_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), - .tx_tbl = ipq6018_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq6018_pcie_tx_tbl), - .rx_tbl = ipq6018_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq6018_pcie_rx_tbl), - .pcs_tbl = ipq6018_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq6018_pcie_pcs_tbl), - .pcs_misc_tbl = ipq6018_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl), + .tables = { + .serdes = ipq6018_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), + .tx = ipq6018_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(ipq6018_pcie_tx_tbl), + .rx = ipq6018_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(ipq6018_pcie_rx_tbl), + .pcs = ipq6018_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq6018_pcie_pcs_tbl), + .pcs_misc = ipq6018_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1545,16 +1550,18 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sdm845_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), - .tx_tbl = sdm845_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl), - .rx_tbl = sdm845_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl), - .pcs_tbl = sdm845_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sdm845_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sdm845_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), + .tx = sdm845_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl), + .rx = sdm845_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl), + .pcs = sdm845_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl), + .pcs_misc = sdm845_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1575,14 +1582,16 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sdm845_qhp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), - .tx_tbl = sdm845_qhp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl), - .rx_tbl = sdm845_qhp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl), - .pcs_tbl = sdm845_qhp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl), + .tables = { + .serdes = sdm845_qhp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), + .tx = sdm845_qhp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl), + .rx = sdm845_qhp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl), + .pcs = sdm845_qhp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1603,24 +1612,28 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sm8250_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), - .serdes_tbl_sec = sm8250_qmp_gen3x1_pcie_serdes_tbl, - .serdes_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), - .tx_tbl = sm8250_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), - .rx_tbl = sm8250_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), - .rx_tbl_sec = sm8250_qmp_gen3x1_pcie_rx_tbl, - .rx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl), - .pcs_tbl = sm8250_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), - .pcs_tbl_sec = sm8250_qmp_gen3x1_pcie_pcs_tbl, - .pcs_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl), - .pcs_misc_tbl = sm8250_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), - .pcs_misc_tbl_sec = sm8250_qmp_gen3x1_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8250_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), + .tx = sm8250_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), + .rx = sm8250_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), + .pcs = sm8250_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), + }, + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8250_qmp_gen3x1_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), + .rx = sm8250_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl), + .pcs = sm8250_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1641,24 +1654,28 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sm8250_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), - .tx_tbl = sm8250_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), - .tx_tbl_sec = sm8250_qmp_gen3x2_pcie_tx_tbl, - .tx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), - .rx_tbl = sm8250_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), - .rx_tbl_sec = sm8250_qmp_gen3x2_pcie_rx_tbl, - .rx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl), - .pcs_tbl = sm8250_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), - .pcs_tbl_sec = sm8250_qmp_gen3x2_pcie_pcs_tbl, - .pcs_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl), - .pcs_misc_tbl = sm8250_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), - .pcs_misc_tbl_sec = sm8250_qmp_gen3x2_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8250_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), + .tx = sm8250_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), + .rx = sm8250_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), + .pcs = sm8250_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), + }, + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tx = sm8250_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), + .rx = sm8250_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl), + .pcs = sm8250_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1679,14 +1696,16 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .lanes = 1, - .serdes_tbl = msm8998_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), - .tx_tbl = msm8998_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(msm8998_pcie_tx_tbl), - .rx_tbl = msm8998_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(msm8998_pcie_rx_tbl), - .pcs_tbl = msm8998_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(msm8998_pcie_pcs_tbl), + .tables = { + .serdes = msm8998_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), + .tx = msm8998_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(msm8998_pcie_tx_tbl), + .rx = msm8998_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(msm8998_pcie_rx_tbl), + .pcs = msm8998_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(msm8998_pcie_pcs_tbl), + }, .clk_list = msm8996_phy_clk_l, .num_clks = ARRAY_SIZE(msm8996_phy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1703,16 +1722,18 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), - .tx_tbl = sc8180x_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), - .rx_tbl = sc8180x_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl), - .pcs_tbl = sc8180x_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sc8180x_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sc8180x_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), + .tx = sc8180x_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), + .rx = sc8180x_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl), + .pcs = sc8180x_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl), + .pcs_misc = sc8180x_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1732,16 +1753,18 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sdx55_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), - .tx_tbl = sdx55_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl), - .rx_tbl = sdx55_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl), - .pcs_tbl = sdx55_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sdx55_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sdx55_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), + .tx = sdx55_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl), + .rx = sdx55_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl), + .pcs = sdx55_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl), + .pcs_misc = sdx55_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1762,16 +1785,18 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sm8450_qmp_gen3x1_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), - .tx_tbl = sm8450_qmp_gen3x1_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl), - .rx_tbl = sm8450_qmp_gen3x1_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl), - .pcs_tbl = sm8450_qmp_gen3x1_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl), - .pcs_misc_tbl = sm8450_qmp_gen3x1_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8450_qmp_gen3x1_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), + .tx = sm8450_qmp_gen3x1_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl), + .rx = sm8450_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl), + .pcs = sm8450_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sm8450_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1792,16 +1817,18 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sm8450_qmp_gen4x2_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), - .tx_tbl = sm8450_qmp_gen4x2_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl), - .rx_tbl = sm8450_qmp_gen4x2_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl), - .pcs_tbl = sm8450_qmp_gen4x2_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl), - .pcs_misc_tbl = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8450_qmp_gen4x2_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), + .tx = sm8450_qmp_gen4x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl), + .rx = sm8450_qmp_gen4x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl), + .pcs = sm8450_qmp_gen4x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1850,17 +1877,49 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, regs, tbl, num, 0xff); } -static int qmp_pcie_serdes_init(struct qmp_phy *qphy) +static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; - const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; - int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_pcie_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); - qmp_pcie_configure(serdes, cfg->regs, cfg->serdes_tbl_sec, cfg->serdes_tbl_num_sec); + if (!tables) + return; - return 0; + qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num); +} + +static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +{ + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *tx = qphy->tx; + void __iomem *rx = qphy->rx; + + if (!tables) + return; + + qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1); + + if (cfg->lanes >= 2) + qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2); + + qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1); + if (cfg->lanes >= 2) + qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2); +} + +static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +{ + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *pcs = qphy->pcs; + void __iomem *pcs_misc = qphy->pcs_misc; + + if (!tables) + return; + + qmp_pcie_configure(pcs, cfg->regs, + tables->pcs, tables->pcs_num); + qmp_pcie_configure(pcs_misc, cfg->regs, + tables->pcs_misc, tables->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) @@ -1932,15 +1991,13 @@ static int qmp_pcie_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; void __iomem *status; unsigned int mask, val, ready; int ret; - qmp_pcie_serdes_init(qphy); + qmp_pcie_serdes_init(qphy, &cfg->tables); + qmp_pcie_serdes_init(qphy, cfg->tables_rc); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { @@ -1949,31 +2006,11 @@ static int qmp_pcie_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); - qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl_sec, cfg->tx_tbl_num_sec, 1); + qmp_pcie_lanes_init(qphy, &cfg->tables); + qmp_pcie_lanes_init(qphy, cfg->tables_rc); - if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 2); - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl_sec, - cfg->tx_tbl_num_sec, 2); - } - - qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); - qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl_sec, cfg->rx_tbl_num_sec, 1); - - if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 2); - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl_sec, - cfg->rx_tbl_num_sec, 2); - } - - qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); - qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl_sec, cfg->pcs_tbl_num_sec); - - qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl, cfg->pcs_misc_tbl_num); - qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl_sec, cfg->pcs_misc_tbl_num_sec); + qmp_pcie_pcs_init(qphy, &cfg->tables); + qmp_pcie_pcs_init(qphy, cfg->tables_rc); /* * Pull out PHY from POWER DOWN state. @@ -2240,7 +2277,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, qphy->pcs_misc = qphy->pcs + 0x400; if (IS_ERR(qphy->pcs_misc)) { - if (cfg->pcs_misc_tbl || cfg->pcs_misc_tbl_sec) + if (cfg->tables.pcs_misc || + (cfg->tables_rc && cfg->tables_rc->pcs_misc)) return PTR_ERR(qphy->pcs_misc); } From 11bf53a38c82baef349b4efc6a84f069dab7085a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:03 +0300 Subject: [PATCH 0061/4122] phy: qcom-qmp-pcie: support separate tables for EP mode The PCIe QMP PHY requires different programming sequences when being used for the RC (Root Complex) or for the EP (End Point) modes. Allow selecting the submode and thus selecting a set of PHY programming tables. Since the RC and EP modes share common some common init sequence, the common sequence is kept in the main table and the sequence differences are pushed to the extra tables. Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-3-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 46 ++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index ae0d7b49dfa3..ba01338d93ac 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1320,10 +1321,14 @@ struct qmp_phy_cfg { /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_cfg_tables tables; /* - * Additional init sequence for PHY blocks, providing additional - * register programming. Unless required it can be left omitted. + * Additional init sequences for PHY blocks, providing additional + * register programming. They are used for providing separate sequences + * for the Root Complex and End Point use cases. + * + * If EP mode is not supported, both tables can be left unset. */ const struct qmp_phy_cfg_tables *tables_rc; + const struct qmp_phy_cfg_tables *tables_ep; /* clock ids to be requested */ const char * const *clk_list; @@ -1367,6 +1372,7 @@ struct qmp_phy_cfg { * @pcs_misc: iomapped memory space for lane's pcs_misc * @pipe_clk: pipe clock * @qmp: QMP phy to which this lane belongs + * @mode: currently selected PHY mode */ struct qmp_phy { struct phy *phy; @@ -1380,6 +1386,7 @@ struct qmp_phy { void __iomem *pcs_misc; struct clk *pipe_clk; struct qcom_qmp *qmp; + int mode; }; /** @@ -1991,13 +1998,19 @@ static int qmp_pcie_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct qmp_phy_cfg_tables *mode_tables; void __iomem *pcs = qphy->pcs; void __iomem *status; unsigned int mask, val, ready; int ret; + if (qphy->mode == PHY_MODE_PCIE_RC) + mode_tables = cfg->tables_rc; + else + mode_tables = cfg->tables_ep; + qmp_pcie_serdes_init(qphy, &cfg->tables); - qmp_pcie_serdes_init(qphy, cfg->tables_rc); + qmp_pcie_serdes_init(qphy, mode_tables); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { @@ -2007,10 +2020,10 @@ static int qmp_pcie_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_pcie_lanes_init(qphy, &cfg->tables); - qmp_pcie_lanes_init(qphy, cfg->tables_rc); + qmp_pcie_lanes_init(qphy, mode_tables); qmp_pcie_pcs_init(qphy, &cfg->tables); - qmp_pcie_pcs_init(qphy, cfg->tables_rc); + qmp_pcie_pcs_init(qphy, mode_tables); /* * Pull out PHY from POWER DOWN state. @@ -2097,6 +2110,23 @@ static int qmp_pcie_disable(struct phy *phy) return qmp_pcie_exit(phy); } +static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + switch (submode) { + case PHY_MODE_PCIE_RC: + case PHY_MODE_PCIE_EP: + qphy->mode = submode; + break; + default: + dev_err(&phy->dev, "Unsupported submode %d\n", submode); + return -EINVAL; + } + + return 0; +} + static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2220,6 +2250,7 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) static const struct phy_ops qmp_pcie_ops = { .power_on = qmp_pcie_enable, .power_off = qmp_pcie_disable, + .set_mode = qmp_pcie_set_mode, .owner = THIS_MODULE, }; @@ -2235,6 +2266,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, if (!qphy) return -ENOMEM; + qphy->mode = PHY_MODE_PCIE_RC; + qphy->cfg = cfg; qphy->serdes = serdes; /* @@ -2278,7 +2311,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, if (IS_ERR(qphy->pcs_misc)) { if (cfg->tables.pcs_misc || - (cfg->tables_rc && cfg->tables_rc->pcs_misc)) + (cfg->tables_rc && cfg->tables_rc->pcs_misc) || + (cfg->tables_ep && cfg->tables_ep->pcs_misc)) return PTR_ERR(qphy->pcs_misc); } From f5682f13b7ab0bbdffd11934afe4b5c011d5be74 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:04 +0300 Subject: [PATCH 0062/4122] phy: qcom-qmp-pcie: Support SM8450 PCIe1 PHY in EP mode Add support for using PCIe1 (gen4x2) in EP mode on SM8450. The tables to program are mostly common with the RC mode tables, so only register difference are split into separate RC and EP tables. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-4-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 78 +++++++++++++++---- .../qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 1 + 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index ba01338d93ac..f3f75eda01a6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1185,15 +1185,29 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_pcs_misc_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x97), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x0c), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06), QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16), @@ -1201,8 +1215,6 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36), QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14), @@ -1215,17 +1227,8 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0x55), QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0x55), QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x05), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x20), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_tx_tbl[] = { @@ -1293,14 +1296,44 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_EQ_CONFIG1, 0x16), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3, 0x28), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), }; +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BG_TIMER, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYS_CLK_CTRL, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x27), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x28), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE0, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE1, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x60), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), +}; + struct qmp_phy_cfg_tables { const struct qmp_phy_init_tbl *serdes; int serdes_num; @@ -1836,6 +1869,21 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .pcs_misc = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), }, + + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8450_qmp_gen4x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl), + }, + + .tables_ep = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8450_qmp_gen4x2_pcie_ep_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl), + }, + .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index 1eedf50cf9cb..c9fa90b45475 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -8,6 +8,7 @@ /* Only for QMP V5_20 PHY - PCIe PCS registers */ #define QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x01c +#define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5 0x084 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 From f90747d1b641aad244cca7d6aa20aa25f33ae8e4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:05 +0300 Subject: [PATCH 0063/4122] PCI: qcom: Setup PHY to work in RC mode Call phy_set_mode_ext() to notify the PHY driver that the PHY is being used in the RC mode. Reviewed-by: Jingoo Han Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220927092207.161501-5-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/pci/controller/dwc/pcie-qcom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index f711acacaeaf..7db94a22238d 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1497,6 +1498,10 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; + ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC); + if (ret) + goto err_deinit; + ret = phy_power_on(pcie->phy); if (ret) goto err_deinit; From a84ed1919fb3fc767ae3aad13bbff8ea8eaceedd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:06 +0300 Subject: [PATCH 0064/4122] PCI: qcom-ep: Setup PHY to work in EP mode Call phy_set_mode_ext() to notify the PHY driver that the PHY is being used in the EP mode. Reviewed-by: Manivannan Sadhasivam Reviewed-by: Jingoo Han Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220927092207.161501-6-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 6d0d1b759ca2..19b32839ea26 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -268,6 +269,10 @@ static int qcom_pcie_enable_resources(struct qcom_pcie_ep *pcie_ep) if (ret) goto err_disable_clk; + ret = phy_set_mode_ext(pcie_ep->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_EP); + if (ret) + goto err_phy_exit; + ret = phy_power_on(pcie_ep->phy); if (ret) goto err_phy_exit; From 8d3bf72497a8def5dc75e10a2229f1c692598b97 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:03 +0200 Subject: [PATCH 0065/4122] phy: qcom-qmp: fix obsolete lane comments All QMP drivers but the MSM8996 and combo ones handle exactly one PHY and the corresponding memory resources are not per-lane, but per PHY. Update the obsolete comments. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9807c4d935cd..8a2a35c0855b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2669,7 +2669,7 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for each PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 461f0b5d464a..707ec81c7a2a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -777,7 +777,7 @@ static int qmp_pcie_msm8996_create(struct device *dev, struct device_node *np, i qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for each PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. */ qphy->tx = devm_of_iomap(dev, np, 0, NULL); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f3f75eda01a6..e0408c423ac6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2319,7 +2319,7 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index c08d34ad1313..db5642e1f715 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1094,7 +1094,7 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index b84c0d4b5754..965e486ab87d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2581,7 +2581,7 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. From f823346de8b1fa44bbab3ef62d40e9616332c7ee Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:04 +0200 Subject: [PATCH 0066/4122] phy: qcom-qmp-combo: drop unused UFS reset Drop the unused UFS reset code which isn't used since the QMP driver split. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8a2a35c0855b..c21512b9ab52 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -944,7 +944,6 @@ struct qmp_phy_dp_clks { * @phys: array of per-lane phy descriptors * @phy_mutex: mutex lock for PHY common block initialization * @init_count: phy common block initialization count - * @ufs_reset: optional UFS PHY reset handle */ struct qcom_qmp { struct device *dev; @@ -958,8 +957,6 @@ struct qcom_qmp { struct mutex phy_mutex; int init_count; - - struct reset_control *ufs_reset; }; static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy); @@ -2027,8 +2024,6 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) return 0; } - reset_control_assert(qmp->ufs_reset); - reset_control_bulk_assert(cfg->num_resets, qmp->resets); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); @@ -2103,10 +2098,6 @@ static int qmp_combo_power_on(struct phy *phy) else qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); - ret = reset_control_deassert(qmp->ufs_reset); - if (ret) - goto err_disable_pipe_clk; - if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); From 4567bb1799d253ceb81ba9c9837ae13a86e4b50a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:05 +0200 Subject: [PATCH 0067/4122] phy: qcom-qmp-pcie: drop unused common-block registers Drop the common-block register defines that are unused since the QMP driver split. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e0408c423ac6..bdbd18b89a8c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -78,11 +78,6 @@ struct qmp_phy_init_tbl { /* set of registers with offsets different per-PHY */ enum qphy_reg_layout { - /* Common block control registers */ - QPHY_COM_SW_RESET, - QPHY_COM_POWER_DOWN_CONTROL, - QPHY_COM_START_CONTROL, - QPHY_COM_PCS_READY_STATUS, /* PCS registers */ QPHY_SW_RESET, QPHY_START_CTRL, @@ -100,10 +95,6 @@ static const unsigned int ipq_pciephy_gen3_regs_layout[QPHY_LAYOUT_SIZE] = { }; static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { - [QPHY_COM_SW_RESET] = 0x400, - [QPHY_COM_POWER_DOWN_CONTROL] = 0x404, - [QPHY_COM_START_CONTROL] = 0x408, - [QPHY_COM_PCS_READY_STATUS] = 0x448, [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, From 6d5b1e2067aef151747b4ec1cd927d44b61e4293 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:06 +0200 Subject: [PATCH 0068/4122] phy: qcom-qmp-pcie: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bdbd18b89a8c..1105d439828c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -98,18 +98,21 @@ static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_qmp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_qhp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x2ac, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm8250_pcie_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -1999,13 +2002,8 @@ static int qmp_pcie_init(struct phy *phy) if (ret) goto err_assert_reset; - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -2112,13 +2110,8 @@ static int qmp_pcie_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } From 5b68d95c3fc72b4a89b9c7549e1ef638a01a3e15 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:07 +0200 Subject: [PATCH 0069/4122] phy: qcom-qmp-pcie: move power-down update Move the power-down-control register update that powers on the PHY to the power-on handler so that it matches the power-off handler. Note that the power-on handler is currently always called directly after init. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 1105d439828c..b42c5e185228 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1976,7 +1976,6 @@ static int qmp_pcie_init(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; int ret; /* turn on regulator supplies */ @@ -2002,9 +2001,6 @@ static int qmp_pcie_init(struct phy *phy) if (ret) goto err_assert_reset; - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - return 0; err_assert_reset: @@ -2041,6 +2037,9 @@ static int qmp_pcie_power_on(struct phy *phy) unsigned int mask, val, ready; int ret; + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); + if (qphy->mode == PHY_MODE_PCIE_RC) mode_tables = cfg->tables_rc; else From 4d3701f94f274ac67cddd1e87a2311a2a40c0138 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:08 +0200 Subject: [PATCH 0070/4122] phy: qcom-qmp-pcie-msm8996: clean up power-down handling This driver uses v2 registers only so drop the unnecessary POWER_DOWN_CONTROL override. Note that this register is already hard-coded when powering on the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 707ec81c7a2a..5fdd85a1dc3e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -91,7 +91,6 @@ enum qphy_reg_layout { QPHY_SW_RESET, QPHY_START_CTRL, QPHY_PCS_STATUS, - QPHY_PCS_POWER_DOWN_CONTROL, /* Keep last to ensure regs_layout arrays are properly initialized */ QPHY_LAYOUT_SIZE }; @@ -591,13 +590,8 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, + cfg->pwrdn_ctrl); return 0; } From 2e52ddf045a08fcae8dc4c88d10aa01252dd4165 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:09 +0200 Subject: [PATCH 0071/4122] phy: qcom-qmp-combo: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v3) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c21512b9ab52..7b434e2ee640 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -121,6 +121,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0dc, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170, @@ -1991,13 +1992,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); mutex_unlock(&qmp->phy_mutex); @@ -2144,13 +2140,8 @@ static int qmp_combo_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); } return 0; From 2d3068cf8d9aa80cfbe2dd4226abbf425c26f8b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:10 +0200 Subject: [PATCH 0072/4122] phy: qcom-qmp-ufs: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v4) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index db5642e1f715..e28c45ab74ea 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -89,22 +89,26 @@ enum qphy_reg_layout { static const unsigned int msm8996_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x168, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x160, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm6115_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x168, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm8150_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = QPHY_V4_PCS_UFS_PHY_START, [QPHY_PCS_READY_STATUS] = QPHY_V4_PCS_UFS_READY_STATUS, [QPHY_SW_RESET] = QPHY_V4_PCS_UFS_SW_RESET, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V4_PCS_UFS_POWER_DOWN_CONTROL, }; static const struct qmp_phy_init_tbl msm8996_ufs_serdes_tbl[] = { @@ -856,13 +860,8 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) if (ret) goto err_disable_regulators; - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -996,13 +995,8 @@ static int qmp_ufs_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } From 645d3d04702401e002928b934b830bd25be9e277 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:11 +0200 Subject: [PATCH 0073/4122] phy: qcom-qmp-usb: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v3) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 965e486ab87d..b0b13fb6cb59 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -126,6 +126,7 @@ static const unsigned int usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d4, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x178, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -135,6 +136,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0dc, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int qmp_v4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -2164,13 +2166,8 @@ static int qmp_usb_init(struct phy *phy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); } - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -2277,13 +2274,8 @@ static int qmp_usb_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } From 5b76f5ec63e0bfd20d955fc9d09dc2cff7742bec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:12 +0200 Subject: [PATCH 0074/4122] phy: qcom-qmp-pcie: clean up clock lists Keep the clock lists together and sorted by symbol name. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index b42c5e185228..cb2128e5a78d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1462,6 +1462,10 @@ static inline void qphy_clrbits(void __iomem *base, u32 offset, u32 val) } /* list of clocks required by phy */ +static const char * const ipq8074_pciephy_clk_l[] = { + "aux", "cfg_ahb", +}; + static const char * const msm8996_phy_clk_l[] = { "aux", "cfg_ahb", "ref", }; @@ -1476,10 +1480,6 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; -static const char * const ipq8074_pciephy_clk_l[] = { - "aux", "cfg_ahb", -}; - /* list of resets */ static const char * const ipq8074_pciephy_reset_l[] = { "phy", "common", From 2d93887cb4bac0a36ce9e146956f631ab7994680 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:13 +0200 Subject: [PATCH 0075/4122] phy: qcom-qmp-pcie: drop bogus register update Since commit 0d58280cf1e6 ("phy: Update PHY power control sequence") the PHY is powered on before configuring the registers and only the MSM8996 PCIe PHY, which includes the POWER_DOWN_CONTROL register in its PCS initialisation table, may possibly require a second update afterwards. To make things worse, the POWER_DOWN_CONTROL register lies at a different offset on more recent SoCs so that the second update, which still used a hard-coded offset, would write to an unrelated register (e.g. a revision-id register on SC8280XP). As the MSM8996 PCIe PHY is now handled by a separate driver, simply drop the bogus register update. Fixes: e4d8b05ad5f9 ("phy: qcom-qmp: Use proper PWRDOWN offset for sm8150 USB") added support Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #RB3 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index cb2128e5a78d..30838ae8f027 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2061,12 +2061,6 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, &cfg->tables); qmp_pcie_pcs_init(qphy, mode_tables); - /* - * Pull out PHY from POWER DOWN state. - * This is active low enable signal to power-down PHY. - */ - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); - if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); From bf08ce132cd069afc45635e1ffeb6adb0523cc60 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Wed, 12 Oct 2022 16:25:23 +0200 Subject: [PATCH 0076/4122] drivers/gpio: use simple i2c probe All these drivers have an i2c probe function which doesn't use the "struct i2c_device_id *id" parameter, so they can trivially be converted to the "probe_new" style of probe with a single argument. This is part of an ongoing transition to single-argument i2c probe functions. Old-style probe functions involve a call to i2c_match_id: in drivers/i2c/i2c-core-base.c, /* * When there are no more users of probe(), * rename probe_new to probe. */ if (driver->probe_new) status = driver->probe_new(client); else if (driver->probe) status = driver->probe(client, i2c_match_id(driver->id_table, client)); else status = -EINVAL; Drivers which don't need the second parameter can be declared using probe_new instead, avoiding the call to i2c_match_id. Drivers which do can still be converted to probe_new-style, calling i2c_match_id themselves (as is done currently for of_match_id). This change was done using the following Coccinelle script, and fixed up for whitespace changes: @ rule1 @ identifier fn; identifier client, id; @@ - static int fn(struct i2c_client *client, const struct i2c_device_id *id) + static int fn(struct i2c_client *client) { ...when != id } @ rule2 depends on rule1 @ identifier rule1.fn; identifier driver; @@ struct i2c_driver driver = { - .probe + .probe_new = ( fn | - &fn + fn ) , }; Signed-off-by: Stephen Kitt Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-gw-pld.c | 5 ++--- drivers/gpio/gpio-max7300.c | 5 ++--- drivers/gpio/gpio-tpic2810.c | 5 ++--- drivers/gpio/gpio-ts4900.c | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c index 2109803ffb38..5057fa9ad610 100644 --- a/drivers/gpio/gpio-gw-pld.c +++ b/drivers/gpio/gpio-gw-pld.c @@ -67,8 +67,7 @@ static void gw_pld_set8(struct gpio_chip *gc, unsigned offset, int value) gw_pld_output8(gc, offset, value); } -static int gw_pld_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int gw_pld_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct gw_pld *gw; @@ -126,7 +125,7 @@ static struct i2c_driver gw_pld_driver = { .name = "gw_pld", .of_match_table = gw_pld_dt_ids, }, - .probe = gw_pld_probe, + .probe_new = gw_pld_probe, .id_table = gw_pld_id, }; module_i2c_driver(gw_pld_driver); diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c index 43da381a4d7e..cf482f4f0098 100644 --- a/drivers/gpio/gpio-max7300.c +++ b/drivers/gpio/gpio-max7300.c @@ -28,8 +28,7 @@ static int max7300_i2c_read(struct device *dev, unsigned int reg) return i2c_smbus_read_byte_data(client, reg); } -static int max7300_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max7300_probe(struct i2c_client *client) { struct max7301 *ts; @@ -63,7 +62,7 @@ static struct i2c_driver max7300_driver = { .driver = { .name = "max7300", }, - .probe = max7300_probe, + .probe_new = max7300_probe, .remove = max7300_remove, .id_table = max7300_id, }; diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c index d642c35cb97c..349c5fbd9b02 100644 --- a/drivers/gpio/gpio-tpic2810.c +++ b/drivers/gpio/gpio-tpic2810.c @@ -98,8 +98,7 @@ static const struct of_device_id tpic2810_of_match_table[] = { }; MODULE_DEVICE_TABLE(of, tpic2810_of_match_table); -static int tpic2810_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tpic2810_probe(struct i2c_client *client) { struct tpic2810 *gpio; int ret; @@ -144,7 +143,7 @@ static struct i2c_driver tpic2810_driver = { .name = "tpic2810", .of_match_table = tpic2810_of_match_table, }, - .probe = tpic2810_probe, + .probe_new = tpic2810_probe, .remove = tpic2810_remove, .id_table = tpic2810_id_table, }; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index 416725c26e94..43e8b66e04f7 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -136,8 +136,7 @@ static const struct of_device_id ts4900_gpio_of_match_table[] = { }; MODULE_DEVICE_TABLE(of, ts4900_gpio_of_match_table); -static int ts4900_gpio_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ts4900_gpio_probe(struct i2c_client *client) { struct ts4900_gpio_priv *priv; u32 ngpio; @@ -186,7 +185,7 @@ static struct i2c_driver ts4900_gpio_driver = { .name = "ts4900-gpio", .of_match_table = ts4900_gpio_of_match_table, }, - .probe = ts4900_gpio_probe, + .probe_new = ts4900_gpio_probe, .id_table = ts4900_gpio_id_table, }; module_i2c_driver(ts4900_gpio_driver); From 317627a4a19e2a6f8d60e3e2eefe6dfd87059d79 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 2 Sep 2022 14:42:01 +0200 Subject: [PATCH 0077/4122] gpio: Remove sta2x11 GPIO driver The Connext chip has 4 gpio cells looking very similar to those of the Nomadik, whose gpio/pinctrl driver (already featuring devicetree support) will be used instead of the sta2x11 specific one. Signed-off-by: Davide Ciminaghi Acked-by: Giancarlo Asnaghi Acked-by: Linus Walleij Signed-off-by: Christophe Leroy Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 8 - drivers/gpio/Makefile | 1 - drivers/gpio/gpio-sta2x11.c | 411 ------------------------------------ 3 files changed, 420 deletions(-) delete mode 100644 drivers/gpio/gpio-sta2x11.c diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index a01af1180616..e034f752e7ce 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -600,14 +600,6 @@ config GPIO_SPRD help Say yes here to support Spreadtrum GPIO device. -config GPIO_STA2X11 - bool "STA2x11/ConneXt GPIO support" - depends on MFD_STA2X11 - select GENERIC_IRQ_CHIP - help - Say yes here to support the STA2x11/ConneXt GPIO device. - The GPIO module has 128 GPIO pins with alternate functions. - config GPIO_STP_XWAY bool "XWAY STP GPIOs" depends on SOC_XWAY || COMPILE_TEST diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 29e3beb6548c..84fae267e8eb 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -140,7 +140,6 @@ obj-$(CONFIG_GPIO_SL28CPLD) += gpio-sl28cpld.o obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o obj-$(CONFIG_GPIO_SPEAR_SPICS) += gpio-spear-spics.o obj-$(CONFIG_GPIO_SPRD) += gpio-sprd.o -obj-$(CONFIG_GPIO_STA2X11) += gpio-sta2x11.o obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o obj-$(CONFIG_GPIO_STP_XWAY) += gpio-stp-xway.o obj-$(CONFIG_GPIO_SYSCON) += gpio-syscon.o diff --git a/drivers/gpio/gpio-sta2x11.c b/drivers/gpio/gpio-sta2x11.c deleted file mode 100644 index e07cca0f8d35..000000000000 --- a/drivers/gpio/gpio-sta2x11.c +++ /dev/null @@ -1,411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * STMicroelectronics ConneXt (STA2X11) GPIO driver - * - * Copyright 2012 ST Microelectronics (Alessandro Rubini) - * Based on gpio-ml-ioh.c, Copyright 2010 OKI Semiconductors Ltd. - * Also based on previous sta2x11 work, Copyright 2011 Wind River Systems, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct gsta_regs { - u32 dat; /* 0x00 */ - u32 dats; - u32 datc; - u32 pdis; - u32 dir; /* 0x10 */ - u32 dirs; - u32 dirc; - u32 unused_1c; - u32 afsela; /* 0x20 */ - u32 unused_24[7]; - u32 rimsc; /* 0x40 */ - u32 fimsc; - u32 is; - u32 ic; -}; - -struct gsta_gpio { - spinlock_t lock; - struct device *dev; - void __iomem *reg_base; - struct gsta_regs __iomem *regs[GSTA_NR_BLOCKS]; - struct gpio_chip gpio; - int irq_base; - /* FIXME: save the whole config here (AF, ...) */ - unsigned irq_type[GSTA_NR_GPIO]; -}; - -/* - * gpio methods - */ - -static void gsta_gpio_set(struct gpio_chip *gpio, unsigned nr, int val) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - if (val) - writel(bit, ®s->dats); - else - writel(bit, ®s->datc); -} - -static int gsta_gpio_get(struct gpio_chip *gpio, unsigned nr) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - return !!(readl(®s->dat) & bit); -} - -static int gsta_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, - int val) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - writel(bit, ®s->dirs); - /* Data register after direction, otherwise pullup/down is selected */ - if (val) - writel(bit, ®s->dats); - else - writel(bit, ®s->datc); - return 0; -} - -static int gsta_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - writel(bit, ®s->dirc); - return 0; -} - -static int gsta_gpio_to_irq(struct gpio_chip *gpio, unsigned offset) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - return chip->irq_base + offset; -} - -static void gsta_gpio_setup(struct gsta_gpio *chip) /* called from probe */ -{ - struct gpio_chip *gpio = &chip->gpio; - - /* - * ARCH_NR_GPIOS is currently 256 and dynamic allocation starts - * from the end. However, for compatibility, we need the first - * ConneXt device to start from gpio 0: it's the main chipset - * on most boards so documents and drivers assume gpio0..gpio127 - */ - static int gpio_base; - - gpio->label = dev_name(chip->dev); - gpio->owner = THIS_MODULE; - gpio->direction_input = gsta_gpio_direction_input; - gpio->get = gsta_gpio_get; - gpio->direction_output = gsta_gpio_direction_output; - gpio->set = gsta_gpio_set; - gpio->dbg_show = NULL; - gpio->base = gpio_base; - gpio->ngpio = GSTA_NR_GPIO; - gpio->can_sleep = false; - gpio->to_irq = gsta_gpio_to_irq; - - /* - * After the first device, turn to dynamic gpio numbers. - * For example, with ARCH_NR_GPIOS = 256 we can fit two cards - */ - if (!gpio_base) - gpio_base = -1; -} - -/* - * Special method: alternate functions and pullup/pulldown. This is only - * invoked on startup to configure gpio's according to platform data. - * FIXME : this functionality shall be managed (and exported to other drivers) - * via the pin control subsystem. - */ -static void gsta_set_config(struct gsta_gpio *chip, int nr, unsigned cfg) -{ - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - unsigned long flags; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - int err = 0; - - pr_info("%s: %p %i %i\n", __func__, chip, nr, cfg); - - if (cfg == PINMUX_TYPE_NONE) - return; - - /* Alternate function or not? */ - spin_lock_irqsave(&chip->lock, flags); - val = readl(®s->afsela); - if (cfg == PINMUX_TYPE_FUNCTION) - val |= bit; - else - val &= ~bit; - writel(val | bit, ®s->afsela); - if (cfg == PINMUX_TYPE_FUNCTION) { - spin_unlock_irqrestore(&chip->lock, flags); - return; - } - - /* not alternate function: set details */ - switch (cfg) { - case PINMUX_TYPE_OUTPUT_LOW: - writel(bit, ®s->dirs); - writel(bit, ®s->datc); - break; - case PINMUX_TYPE_OUTPUT_HIGH: - writel(bit, ®s->dirs); - writel(bit, ®s->dats); - break; - case PINMUX_TYPE_INPUT: - writel(bit, ®s->dirc); - val = readl(®s->pdis) | bit; - writel(val, ®s->pdis); - break; - case PINMUX_TYPE_INPUT_PULLUP: - writel(bit, ®s->dirc); - val = readl(®s->pdis) & ~bit; - writel(val, ®s->pdis); - writel(bit, ®s->dats); - break; - case PINMUX_TYPE_INPUT_PULLDOWN: - writel(bit, ®s->dirc); - val = readl(®s->pdis) & ~bit; - writel(val, ®s->pdis); - writel(bit, ®s->datc); - break; - default: - err = 1; - } - spin_unlock_irqrestore(&chip->lock, flags); - if (err) - pr_err("%s: chip %p, pin %i, cfg %i is invalid\n", - __func__, chip, nr, cfg); -} - -/* - * Irq methods - */ - -static void gsta_irq_disable(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - struct gsta_gpio *chip = gc->private; - int nr = data->irq - chip->irq_base; - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - unsigned long flags; - - spin_lock_irqsave(&chip->lock, flags); - if (chip->irq_type[nr] & IRQ_TYPE_EDGE_RISING) { - val = readl(®s->rimsc) & ~bit; - writel(val, ®s->rimsc); - } - if (chip->irq_type[nr] & IRQ_TYPE_EDGE_FALLING) { - val = readl(®s->fimsc) & ~bit; - writel(val, ®s->fimsc); - } - spin_unlock_irqrestore(&chip->lock, flags); - return; -} - -static void gsta_irq_enable(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - struct gsta_gpio *chip = gc->private; - int nr = data->irq - chip->irq_base; - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - int type; - unsigned long flags; - - type = chip->irq_type[nr]; - - spin_lock_irqsave(&chip->lock, flags); - val = readl(®s->rimsc); - if (type & IRQ_TYPE_EDGE_RISING) - writel(val | bit, ®s->rimsc); - else - writel(val & ~bit, ®s->rimsc); - val = readl(®s->rimsc); - if (type & IRQ_TYPE_EDGE_FALLING) - writel(val | bit, ®s->fimsc); - else - writel(val & ~bit, ®s->fimsc); - spin_unlock_irqrestore(&chip->lock, flags); - return; -} - -static int gsta_irq_type(struct irq_data *d, unsigned int type) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct gsta_gpio *chip = gc->private; - int nr = d->irq - chip->irq_base; - - /* We only support edge interrupts */ - if (!(type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))) { - pr_debug("%s: unsupported type 0x%x\n", __func__, type); - return -EINVAL; - } - - chip->irq_type[nr] = type; /* used for enable/disable */ - - gsta_irq_enable(d); - return 0; -} - -static irqreturn_t gsta_gpio_handler(int irq, void *dev_id) -{ - struct gsta_gpio *chip = dev_id; - struct gsta_regs __iomem *regs; - u32 is; - int i, nr, base; - irqreturn_t ret = IRQ_NONE; - - for (i = 0; i < GSTA_NR_BLOCKS; i++) { - regs = chip->regs[i]; - base = chip->irq_base + i * GSTA_GPIO_PER_BLOCK; - while ((is = readl(®s->is))) { - nr = __ffs(is); - irq = base + nr; - generic_handle_irq(irq); - writel(1 << nr, ®s->ic); - ret = IRQ_HANDLED; - } - } - return ret; -} - -static int gsta_alloc_irq_chip(struct gsta_gpio *chip) -{ - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - int rv; - - gc = devm_irq_alloc_generic_chip(chip->dev, KBUILD_MODNAME, 1, - chip->irq_base, - chip->reg_base, handle_simple_irq); - if (!gc) - return -ENOMEM; - - gc->private = chip; - ct = gc->chip_types; - - ct->chip.irq_set_type = gsta_irq_type; - ct->chip.irq_disable = gsta_irq_disable; - ct->chip.irq_enable = gsta_irq_enable; - - /* FIXME: this makes at most 32 interrupts. Request 0 by now */ - rv = devm_irq_setup_generic_chip(chip->dev, gc, - 0 /* IRQ_MSK(GSTA_GPIO_PER_BLOCK) */, - 0, IRQ_NOREQUEST | IRQ_NOPROBE, 0); - if (rv) - return rv; - - /* Set up all 128 interrupts: code from setup_generic_chip */ - { - struct irq_chip_type *ct = gc->chip_types; - int i, j; - for (j = 0; j < GSTA_NR_GPIO; j++) { - i = chip->irq_base + j; - irq_set_chip_and_handler(i, &ct->chip, ct->handler); - irq_set_chip_data(i, gc); - irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); - } - gc->irq_cnt = i - gc->irq_base; - } - - return 0; -} - -/* The platform device used here is instantiated by the MFD device */ -static int gsta_probe(struct platform_device *dev) -{ - int i, err; - struct pci_dev *pdev; - struct sta2x11_gpio_pdata *gpio_pdata; - struct gsta_gpio *chip; - - pdev = *(struct pci_dev **)dev_get_platdata(&dev->dev); - gpio_pdata = dev_get_platdata(&pdev->dev); - - if (gpio_pdata == NULL) - dev_err(&dev->dev, "no gpio config\n"); - pr_debug("gpio config: %p\n", gpio_pdata); - - chip = devm_kzalloc(&dev->dev, sizeof(*chip), GFP_KERNEL); - if (!chip) - return -ENOMEM; - chip->dev = &dev->dev; - chip->reg_base = devm_platform_ioremap_resource(dev, 0); - if (IS_ERR(chip->reg_base)) - return PTR_ERR(chip->reg_base); - - for (i = 0; i < GSTA_NR_BLOCKS; i++) { - chip->regs[i] = chip->reg_base + i * 4096; - /* disable all irqs */ - writel(0, &chip->regs[i]->rimsc); - writel(0, &chip->regs[i]->fimsc); - writel(~0, &chip->regs[i]->ic); - } - spin_lock_init(&chip->lock); - gsta_gpio_setup(chip); - if (gpio_pdata) - for (i = 0; i < GSTA_NR_GPIO; i++) - gsta_set_config(chip, i, gpio_pdata->pinconfig[i]); - - /* 384 was used in previous code: be compatible for other drivers */ - err = devm_irq_alloc_descs(&dev->dev, -1, 384, - GSTA_NR_GPIO, NUMA_NO_NODE); - if (err < 0) { - dev_warn(&dev->dev, "sta2x11 gpio: Can't get irq base (%i)\n", - -err); - return err; - } - chip->irq_base = err; - - err = gsta_alloc_irq_chip(chip); - if (err) - return err; - - err = devm_request_irq(&dev->dev, pdev->irq, gsta_gpio_handler, - IRQF_SHARED, KBUILD_MODNAME, chip); - if (err < 0) { - dev_err(&dev->dev, "sta2x11 gpio: Can't request irq (%i)\n", - -err); - return err; - } - - return devm_gpiochip_add_data(&dev->dev, &chip->gpio, chip); -} - -static struct platform_driver sta2x11_gpio_platform_driver = { - .driver = { - .name = "sta2x11-gpio", - .suppress_bind_attrs = true, - }, - .probe = gsta_probe, -}; -builtin_platform_driver(sta2x11_gpio_platform_driver); From 95b39792c6646322e0684f1a1aa395ee82b6f3fb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:02 +0200 Subject: [PATCH 0078/4122] gpio: aggregator: Stop using ARCH_NR_GPIOS ARCH_NR_GPIOS is used locally in aggr_parse() as the maximum number of GPIOs to be aggregated together by the driver since commit ec75039d5550 ("gpio: aggregator: Use bitmap_parselist() for parsing GPIO offsets"). Don't rely on the total possible number of GPIOs in the system but define a local arbitrary macro for that, set to 512 which should be large enough as it is also the default value for ARCH_NR_GPIOS. Signed-off-by: Christophe Leroy Reviewed-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 0cb2664085cf..6d17d262ad91 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -23,6 +23,7 @@ #include #include +#define AGGREGATOR_MAX_GPIOS 512 /* * GPIO Aggregator sysfs interface @@ -64,7 +65,7 @@ static int aggr_parse(struct gpio_aggregator *aggr) unsigned int i, n = 0; int error = 0; - bitmap = bitmap_alloc(ARCH_NR_GPIOS, GFP_KERNEL); + bitmap = bitmap_alloc(AGGREGATOR_MAX_GPIOS, GFP_KERNEL); if (!bitmap) return -ENOMEM; @@ -84,13 +85,13 @@ static int aggr_parse(struct gpio_aggregator *aggr) } /* GPIO chip + offset(s) */ - error = bitmap_parselist(offsets, bitmap, ARCH_NR_GPIOS); + error = bitmap_parselist(offsets, bitmap, AGGREGATOR_MAX_GPIOS); if (error) { pr_err("Cannot parse %s: %d\n", offsets, error); goto free_bitmap; } - for_each_set_bit(i, bitmap, ARCH_NR_GPIOS) { + for_each_set_bit(i, bitmap, AGGREGATOR_MAX_GPIOS) { error = aggr_add_gpio(aggr, name, i, &n); if (error) goto free_bitmap; From 95e827a1b0b7c8334d24da7b4a2d17ec5aa7374c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:03 +0200 Subject: [PATCH 0079/4122] gpio: davinci: Stop using ARCH_NR_GPIOS Since commit 14e85c0e69d5 ("gpio: remove gpio_descs global array") there is no global limitation anymore on the number of GPIOs in the system so don't clamp the number of GPIOs with ARCH_NR_GPIOS. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-davinci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 59c4c48d8296..1018860c83c2 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -217,9 +217,6 @@ static int davinci_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (WARN_ON(ARCH_NR_GPIOS < ngpio)) - ngpio = ARCH_NR_GPIOS; - /* * If there are unbanked interrupts then the number of * interrupts is equal to number of gpios else all are banked so From 502df79b860563d79143be7a1453c2b3224cd836 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:04 +0200 Subject: [PATCH 0080/4122] gpiolib: Warn on drivers still using static gpiobase allocation In the preparation of getting completely rid of static gpiobase allocation in the future, emit a warning in drivers still doing so. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4756ea08894f..5c64d1a412c7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -715,6 +715,9 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * a poison instead. */ gc->base = base; + } else { + dev_warn(&gdev->dev, + "Static allocation of GPIO base is deprecated, use dynamic allocation.\n"); } gdev->base = base; From 7b61212f2a07a5afd213c8876e52b5c9946441e2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:05 +0200 Subject: [PATCH 0081/4122] gpiolib: Get rid of ARCH_NR_GPIOS Since commit 14e85c0e69d5 ("gpio: remove gpio_descs global array") there is no limitation on the number of GPIOs that can be allocated in the system since the allocation is fully dynamic. ARCH_NR_GPIOS is today only used in order to provide downwards gpiobase allocation from that value, while static allocation is performed upwards from 0. However that has the disadvantage of limiting the number of GPIOs that can be registered in the system. To overcome this limitation without requiring each and every platform to provide its 'best-guess' maximum number, rework the allocation to allocate upwards, allowing approx 2 millions of GPIOs. In order to still allow static allocation for legacy drivers, define GPIO_DYNAMIC_BASE with the value 512 as the start for dynamic allocation. The 512 value is chosen because it is the end of the current default range so all current static allocations are expected to be below that value. Of course that's just a rough estimate based on the default value, but assuming static allocations come first, even if there are more static allocations it should fit under the 512 value. In the future, it is expected that all static allocations go away and then dynamic allocation will be patched to start at 0. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- arch/arm/include/asm/gpio.h | 1 - drivers/gpio/gpiolib.c | 10 +++---- include/asm-generic/gpio.h | 55 ++++++++++++++----------------------- 3 files changed, 26 insertions(+), 40 deletions(-) diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h index f3bb8a2bf788..4ebbb58f06ea 100644 --- a/arch/arm/include/asm/gpio.h +++ b/arch/arm/include/asm/gpio.h @@ -2,7 +2,6 @@ #ifndef _ARCH_ARM_GPIO_H #define _ARCH_ARM_GPIO_H -/* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */ #include /* The trivial gpiolib dispatchers */ diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 5c64d1a412c7..e8faedca6b14 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -183,14 +183,14 @@ EXPORT_SYMBOL_GPL(gpiod_to_chip); static int gpiochip_find_base(int ngpio) { struct gpio_device *gdev; - int base = ARCH_NR_GPIOS - ngpio; + int base = GPIO_DYNAMIC_BASE; - list_for_each_entry_reverse(gdev, &gpio_devices, list) { + list_for_each_entry(gdev, &gpio_devices, list) { /* found a free space? */ - if (gdev->base + gdev->ngpio <= base) + if (gdev->base >= base + ngpio) break; - /* nope, check the space right before the chip */ - base = gdev->base - ngpio; + /* nope, check the space right after the chip */ + base = gdev->base + gdev->ngpio; } if (gpio_is_valid(base)) { diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index aea9aee1f3e9..a7752cf152ce 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -11,40 +11,18 @@ #include #include -/* Platforms may implement their GPIO interface with library code, +/* + * Platforms may implement their GPIO interface with library code, * at a small performance cost for non-inlined operations and some * extra memory (for code and for per-GPIO table entries). - * - * While the GPIO programming interface defines valid GPIO numbers - * to be in the range 0..MAX_INT, this library restricts them to the - * smaller range 0..ARCH_NR_GPIOS-1. - * - * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of - * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is - * actually an estimate of a board-specific value. */ -#ifndef ARCH_NR_GPIOS -#if defined(CONFIG_ARCH_NR_GPIO) && CONFIG_ARCH_NR_GPIO > 0 -#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO -#else -#define ARCH_NR_GPIOS 512 -#endif -#endif - /* - * "valid" GPIO numbers are nonnegative and may be passed to - * setup routines like gpio_request(). only some valid numbers - * can successfully be requested and used. - * - * Invalid GPIO numbers are useful for indicating no-such-GPIO in - * platform data and other tables. + * At the end we want all GPIOs to be dynamically allocated from 0. + * However, some legacy drivers still perform fixed allocation. + * Until they are all fixed, leave 0-512 space for them. */ - -static inline bool gpio_is_valid(int number) -{ - return number >= 0 && number < ARCH_NR_GPIOS; -} +#define GPIO_DYNAMIC_BASE 512 struct device; struct gpio; @@ -140,12 +118,6 @@ static inline void gpio_unexport(unsigned gpio) #include -static inline bool gpio_is_valid(int number) -{ - /* only non-negative numbers are valid */ - return number >= 0; -} - /* platforms that don't directly support access to GPIOs through I2C, SPI, * or other blocking infrastructure can use these wrappers. */ @@ -169,4 +141,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) #endif /* !CONFIG_GPIOLIB */ +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + +static inline bool gpio_is_valid(int number) +{ + /* only non-negative numbers are valid */ + return number >= 0; +} + #endif /* _ASM_GENERIC_GPIO_H */ From f2b470f036770805ebd20fb5cfe800395c7215af Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:06 +0200 Subject: [PATCH 0082/4122] Documentation: gpio: Remove text about ARCH_NR_GPIOS ARCH_NR_GPIOS have been removed, clean up the documentation. After this patch, the only place when ARCH_NR_GPIOS remains is in translations/zh_CN/gpio.txt and translations/zh_TW/gpio.txt. I don't have the skills to update that, anyway those two files are already out of sync as they are still mentionning ARCH_REQUIRE_GPIOLIB which was removed by commit 65053e1a7743 ("gpio: delete ARCH_[WANTS_OPTIONAL|REQUIRE]_GPIOLIB") Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- Documentation/driver-api/gpio/legacy.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst index 9b12eeb89170..e17910cc3271 100644 --- a/Documentation/driver-api/gpio/legacy.rst +++ b/Documentation/driver-api/gpio/legacy.rst @@ -558,11 +558,6 @@ Platform Support To force-enable this framework, a platform's Kconfig will "select" GPIOLIB, else it is up to the user to configure support for GPIO. -It may also provide a custom value for ARCH_NR_GPIOS, so that it better -reflects the number of GPIOs in actual use on that platform, without -wasting static table space. (It should count both built-in/SoC GPIOs and -also ones on GPIO expanders. - If neither of these options are selected, the platform does not support GPIOs through GPIO-lib and the code cannot be enabled by the user. From f71806d8dc6c053b5a5344536380c5b2bb82b3fc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:07 +0200 Subject: [PATCH 0083/4122] x86: Remove CONFIG_ARCH_NR_GPIO CONFIG_ARCH_NR_GPIO is not used anymore, remove it. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Signed-off-by: Bartosz Golaszewski --- arch/x86/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6d1879ef933a..a3288f7e7b2b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -357,11 +357,6 @@ config ARCH_HAS_CPU_RELAX config ARCH_HIBERNATION_POSSIBLE def_bool y -config ARCH_NR_GPIO - int - default 1024 if X86_64 - default 512 - config ARCH_SUSPEND_POSSIBLE def_bool y From 8937944f4ee4f5763de8784ccdb068d93d9b0f3e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:08 +0200 Subject: [PATCH 0084/4122] arm: Remove CONFIG_ARCH_NR_GPIO CONFIG_ARCH_NR_GPIO is not used anymore, remove it. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- arch/arm/Kconfig | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a08c9d092a33..b03fd451122e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1155,27 +1155,6 @@ config ARM_PSCI 0022A ("Power State Coordination Interface System Software on ARM processors"). -# The GPIO number here must be sorted by descending number. In case of -# a multiplatform kernel, we just want the highest value required by the -# selected platforms. -config ARCH_NR_GPIO - int - default 2048 if ARCH_INTEL_SOCFPGA - default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \ - ARCH_ZYNQ || ARCH_ASPEED - default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \ - SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 - default 416 if ARCH_SUNXI - default 392 if ARCH_U8500 - default 352 if ARCH_VT8500 - default 288 if ARCH_ROCKCHIP - default 264 if MACH_H4700 - default 0 - help - Maximum number of GPIOs in the system. - - If unsure, leave the default value. - config HZ_FIXED int default 128 if SOC_AT91RM9200 From f5a681d238885f238a5f06fcfda625a90d87a327 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Sep 2022 14:42:09 +0200 Subject: [PATCH 0085/4122] arm64: Remove CONFIG_ARCH_NR_GPIO CONFIG_ARCH_NR_GPIO is not used anymore, remove it. Signed-off-by: Christophe Leroy Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- arch/arm64/Kconfig | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 505c8a1ccbe0..a0c36763d954 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2145,18 +2145,6 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG -# The GPIO number here must be sorted by descending number. In case of -# a multiplatform kernel, we just want the highest value required by the -# selected platforms. -config ARCH_NR_GPIO - int - default 2048 if ARCH_APPLE - default 0 - help - Maximum number of GPIOs in the system. - - If unsure, leave the default value. - endmenu # "Kernel Features" menu "Boot options" From b5636d45aae42aa345b4c7918bdef245ed63da68 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:41 +0200 Subject: [PATCH 0086/4122] x86/cpu: Remove segment load from switch_to_new_gdt() On 32bit FS and on 64bit GS segments are already set up correctly, but load_percpu_segment() still sets [FG]S after switching from the early GDT to the direct GDT. For 32bit the segment load has no side effects, but on 64bit it causes GSBASE to become 0, which means that any per CPU access before GSBASE is set to the new value is going to fault. That's the reason why the whole file containing this code has stackprotector removed. But that's a pointless exercise for both 32 and 64 bit as the relevant segment selector is already correct. Loading the new GDT does not change that. Remove the segment loads and add comments. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.097052006@infradead.org --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/cpu/common.c | 47 +++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 67c9d73b31fa..e21ec970d41a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -670,7 +670,6 @@ extern struct desc_ptr early_gdt_descr; extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); -extern void load_percpu_segment(int); extern void cpu_init(void); extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e508f239098..c09abee6f4d5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -701,16 +701,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); -void load_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#endif -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); @@ -738,16 +728,41 @@ void load_fixmap_gdt(int cpu) } EXPORT_SYMBOL_GPL(load_fixmap_gdt); -/* - * Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. +/** + * switch_to_new_gdt - Switch form early GDT to the direct one + * @cpu: The CPU number for which this is invoked + * + * Invoked during early boot to switch from early GDT and early per CPU + * (%fs on 32bit, GS_BASE on 64bit) to the direct GDT and the runtime per + * CPU area. */ void switch_to_new_gdt(int cpu) { - /* Load the original GDT */ load_direct_gdt(cpu); - /* Reload the per-cpu base */ - load_percpu_segment(cpu); + +#ifdef CONFIG_X86_64 + /* + * No need to load %gs. It is already correct. + * + * Writing %gs on 64bit would zero GSBASE which would make any per + * CPU operation up to the point of the wrmsrl() fault. + * + * Set GSBASE to the new offset. Until the wrmsrl() happens the + * early mapping is still valid. That means the GSBASE update will + * lose any prior per CPU data which was not copied over in + * setup_per_cpu_areas(). + */ + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); +#else + /* + * %fs is already set to __KERNEL_PERCPU, but after switching GDT + * it is required to load FS again so that the 'hidden' part is + * updated from the new GDT. Up to this point the early per CPU + * translation is active. Any content of the early per CPU data + * which was not copied over in setup_per_cpu_areas() is lost. + */ + loadsegment(fs, __KERNEL_PERCPU); +#endif } static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; From 1f19e2d50baf6515991844eaa8a84a0b0037da70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:42 +0200 Subject: [PATCH 0087/4122] x86/cpu: Get rid of redundant switch_to_new_gdt() invocations The only place where switch_to_new_gdt() is required is early boot to switch from the early GDT to the direct GDT. Any other invocation is completely redundant because it does not change anything. Secondary CPUs come out of the ASM code with GDT and GSBASE correctly set up. The same is true for XEN_PV. Remove all the voodoo invocations which are left overs from the ancient past, rename the function to switch_gdt_and_percpu_base() and mark it init. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.198076128@infradead.org --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 17 ++++++----------- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++++- arch/x86/xen/enlighten_pv.c | 2 +- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e21ec970d41a..c660700ecfc6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -667,7 +667,7 @@ extern int sysenter_setup(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void switch_to_new_gdt(int); +extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c09abee6f4d5..f51928dd275a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -729,14 +729,15 @@ void load_fixmap_gdt(int cpu) EXPORT_SYMBOL_GPL(load_fixmap_gdt); /** - * switch_to_new_gdt - Switch form early GDT to the direct one + * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base * @cpu: The CPU number for which this is invoked * - * Invoked during early boot to switch from early GDT and early per CPU - * (%fs on 32bit, GS_BASE on 64bit) to the direct GDT and the runtime per - * CPU area. + * Invoked during early boot to switch from early GDT and early per CPU to + * the direct GDT and the runtime per CPU area. On 32-bit the percpu base + * switch is implicit by loading the direct GDT. On 64bit this requires + * to update GSBASE. */ -void switch_to_new_gdt(int cpu) +void __init switch_gdt_and_percpu_base(int cpu) { load_direct_gdt(cpu); @@ -2263,12 +2264,6 @@ void cpu_init(void) boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - switch_to_new_gdt(cpu); - if (IS_ENABLED(CONFIG_X86_64)) { loadsegment(fs, 0); memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 49325caa7307..555089a5b446 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -211,7 +211,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (!cpu) - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); } /* indicate the early static arrays will soon be gone */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3f3ea0287f69..ce8728d2e5ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1453,7 +1453,11 @@ void arch_thaw_secondary_cpus_end(void) void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(me); + + /* SMP handles this from setup_per_cpu_areas() */ + if (!IS_ENABLED(CONFIG_SMP)) + switch_gdt_and_percpu_base(me); + /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f82857e48815..9b892079581b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1209,7 +1209,7 @@ static void __init xen_setup_gdt(int cpu) pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; pv_ops.cpu.load_gdt = xen_load_gdt_boot; - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; pv_ops.cpu.load_gdt = xen_load_gdt; From 2cb15faaedeb67f52f2ddc32b5ca152acfc422c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:43 +0200 Subject: [PATCH 0088/4122] x86/cpu: Re-enable stackprotector Commit 5416c2663517 ("x86: make sure load_percpu_segment has no stackprotector") disabled the stackprotector for cpu/common.c because of load_percpu_segment(). Back then the boot stack canary was initialized very early in start_kernel(). Switching the per CPU area by loading the GDT caused the stackprotector to fail with paravirt enabled kernels as the GSBASE was not updated yet. In hindsight a wrong change because it would have been sufficient to ensure that the canary is the same in both per CPU areas. Commit d55535232c3d ("random: move rand_initialize() earlier") moved the stack canary initialization to a later point in the init sequence. As a consequence the per CPU stack canary is 0 when switching the per CPU areas, so there is no requirement anymore to exclude this file. Add a comment to load_percpu_segment(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.303010511@infradead.org --- arch/x86/kernel/cpu/Makefile | 3 --- arch/x86/kernel/cpu/common.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index f10a921ee756..d7e3ceaf75c1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,9 +17,6 @@ KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n -# Make sure load_percpu_segment has no stackprotector -CFLAGS_common.o := -fno-stack-protector - obj-y := cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f51928dd275a..8e873181759a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -752,6 +752,9 @@ void __init switch_gdt_and_percpu_base(int cpu) * early mapping is still valid. That means the GSBASE update will * lose any prior per CPU data which was not copied over in * setup_per_cpu_areas(). + * + * This works even with stackprotector enabled because the + * per CPU stack canary is 0 in both per CPU areas. */ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); #else From 4c4eb3ecc91f4fee6d6bf7cfbc1e21f2e38d19ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:44 +0200 Subject: [PATCH 0089/4122] x86/modules: Set VM_FLUSH_RESET_PERMS in module_alloc() Instead of resetting permissions all over the place when freeing module memory tell the vmalloc code to do so. Avoids the exercise for the next upcoming user. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.406703869@infradead.org --- arch/x86/kernel/ftrace.c | 2 -- arch/x86/kernel/kprobes/core.c | 1 - arch/x86/kernel/module.c | 9 +++++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bd165004776d..00eac455a3a1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -413,8 +413,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; - set_vm_flush_reset_perms(trampoline); - if (likely(system_state != SYSTEM_BOOTING)) set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b9..01b8d956aa76 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -414,7 +414,6 @@ void *alloc_insn_page(void) if (!page) return NULL; - set_vm_flush_reset_perms(page); /* * First make the page read-only, and only then make it executable to * prevent it from being W+X in between. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index c032edcd3d95..43f011277219 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -74,10 +74,11 @@ void *module_alloc(unsigned long size) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, - PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, - __builtin_return_address(0)); + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; From b26d66f8dace32c46ce58147002964ce8cdfde5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:45 +0200 Subject: [PATCH 0090/4122] x86/vdso: Ensure all kernel code is seen by objtool extable.c is kernel code and not part of the VDSO Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.512144110@infradead.org --- arch/x86/entry/vdso/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 3e88b9df8c8f..3ef611044c8f 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -33,11 +33,12 @@ vobjs32-y += vdso32/vclock_gettime.o vobjs-$(CONFIG_X86_SGX) += vsgx.o # files to link into kernel -obj-y += vma.o extable.o -KASAN_SANITIZE_vma.o := y -UBSAN_SANITIZE_vma.o := y -KCSAN_SANITIZE_vma.o := y -OBJECT_FILES_NON_STANDARD_vma.o := n +obj-y += vma.o extable.o +KASAN_SANITIZE_vma.o := y +UBSAN_SANITIZE_vma.o := y +KCSAN_SANITIZE_vma.o := y +OBJECT_FILES_NON_STANDARD_vma.o := n +OBJECT_FILES_NON_STANDARD_extable.o := n # vDSO images to build vdso_img-$(VDSO64-y) += 64 From 24a9c543d2114d416f84e386c2fa90089bd97e4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:46 +0200 Subject: [PATCH 0091/4122] x86: Sanitize linker script The section ordering in the text section is more than suboptimal: ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT STATIC_CALL_TEXT INDIRECT_THUNK_TEXT ENTRY_TEXT is in a seperate PMD so it can be mapped into the cpu entry area when KPTI is enabled. That means the sections after it are also in a seperate PMD. That's wasteful especially as the indirect thunk text is a hotpath on retpoline enabled systems and the static call text is fairly hot on 32bit. Move the entry text section last so that the other sections share a PMD with the text before it. This is obviously just best effort and not guaranteed when the previous text is just at a PMD boundary. The text section placement needs an overhaul in general. There is e.g. no point to have debugfs, sysfs, cpuhotplug and other rarely used functions next to hot path text. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.614728935@infradead.org --- arch/x86/kernel/vmlinux.lds.S | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 15f29053cec4..0e9fc080c417 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -132,18 +132,19 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT - ALIGN_ENTRY_TEXT_BEGIN - ENTRY_TEXT - ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT - STATIC_CALL_TEXT - *(.gnu.warning) - #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; *(.text.__x86.*) __indirect_thunk_end = .; #endif + STATIC_CALL_TEXT + + ALIGN_ENTRY_TEXT_BEGIN + ENTRY_TEXT + ALIGN_ENTRY_TEXT_END + *(.gnu.warning) + } :text =0xcccc /* End of text section, which should occupy whole number of pages */ From d49a0626216b95cd4bf696f6acf55f39a16ab0bb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:10:47 +0200 Subject: [PATCH 0092/4122] arch: Introduce CONFIG_FUNCTION_ALIGNMENT Generic function-alignment infrastructure. Architectures can select FUNCTION_ALIGNMENT_xxB symbols; the FUNCTION_ALIGNMENT symbol is then set to the largest such selected size, 0 otherwise. From this the -falign-functions compiler argument and __ALIGN macro are set. This incorporates the DEBUG_FORCE_FUNCTION_ALIGN_64B knob and future alignment requirements for x86_64 (later in this series) into a single place. NOTE: also removes the 0x90 filler byte from the generic __ALIGN primitive, that value makes no sense outside of x86. NOTE: .balign 0 reverts to a no-op. Requested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.719248727@infradead.org --- Makefile | 4 ++-- arch/Kconfig | 24 ++++++++++++++++++++++++ arch/ia64/Kconfig | 1 + arch/ia64/Makefile | 2 +- arch/x86/Kconfig | 2 ++ arch/x86/boot/compressed/head_64.S | 8 ++++++++ arch/x86/include/asm/linkage.h | 4 +--- include/asm-generic/vmlinux.lds.h | 4 ++-- include/linux/linkage.h | 4 ++-- lib/Kconfig.debug | 1 + 10 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index f41ec8c8426b..141e1bcc0671 100644 --- a/Makefile +++ b/Makefile @@ -1004,8 +1004,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif -ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B -KBUILD_CFLAGS += -falign-functions=64 +ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) +KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) endif # arch Makefile may override CC so keep this after arch Makefile is included diff --git a/arch/Kconfig b/arch/Kconfig index 8f138e580d1a..402580253802 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1428,4 +1428,28 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config FUNCTION_ALIGNMENT_4B + bool + +config FUNCTION_ALIGNMENT_8B + bool + +config FUNCTION_ALIGNMENT_16B + bool + +config FUNCTION_ALIGNMENT_32B + bool + +config FUNCTION_ALIGNMENT_64B + bool + +config FUNCTION_ALIGNMENT + int + default 64 if FUNCTION_ALIGNMENT_64B + default 32 if FUNCTION_ALIGNMENT_32B + default 16 if FUNCTION_ALIGNMENT_16B + default 8 if FUNCTION_ALIGNMENT_8B + default 4 if FUNCTION_ALIGNMENT_4B + default 0 + endmenu diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c6e06cdc738f..d7e4a24e8644 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -63,6 +63,7 @@ config IA64 select NUMA if !FLATMEM select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select ZONE_DMA32 + select FUNCTION_ALIGNMENT_32B default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 56c4bb276b6e..d553ab7022fe 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls + -frename-registers -fno-optimize-sibling-calls KBUILD_CFLAGS_KERNEL := -mconstant-gp GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6d1879ef933a..f408fa87ed94 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -290,6 +290,8 @@ config X86 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 + select FUNCTION_ALIGNMENT_4B imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d33f060900d2..190b803eb787 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -37,6 +37,14 @@ #include #include "pgtable.h" +/* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + /* * Locally defined symbols should be marked hidden: */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f484d656d34e..9ee0e2851742 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -14,10 +14,8 @@ #ifdef __ASSEMBLY__ -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) -#endif #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c15de165ec8f..335b5711a7ed 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -81,8 +81,8 @@ #define RO_EXCEPTION_TABLE #endif -/* Align . to a 8 byte boundary equals to maximum function alignment. */ -#define ALIGN_FUNCTION() . = ALIGN(8) +/* Align . function alignment. */ +#define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT) /* * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 1feab6136b5b..5c8865bb59d9 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -69,8 +69,8 @@ #endif #ifndef __ALIGN -#define __ALIGN .align 4,0x90 -#define __ALIGN_STR ".align 4,0x90" +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT +#define __ALIGN_STR __stringify(__ALIGN) #endif #ifdef __ASSEMBLY__ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3fc7abffc7aa..e90dc6738534 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -467,6 +467,7 @@ config SECTION_MISMATCH_WARN_ONLY config DEBUG_FORCE_FUNCTION_ALIGN_64B bool "Force all function address 64B aligned" depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC) + select FUNCTION_ALIGNMENT_64B help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance From 8eb5d34e77c63fde8af21c691bcf6e3cd87f7829 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:48 +0200 Subject: [PATCH 0093/4122] x86/asm: Differentiate between code and function alignment Create SYM_F_ALIGN to differentiate alignment requirements between SYM_CODE and SYM_FUNC. This distinction is useful later when adding padding in front of functions; IOW this allows following the compiler's patchable-function-entry option. [peterz: Changelog] Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.824822743@infradead.org --- arch/x86/include/asm/linkage.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9ee0e2851742..c2d6e2733b11 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -12,11 +12,15 @@ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ -#ifdef __ASSEMBLY__ - #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) +#define ASM_FUNC_ALIGN __ALIGN_STR +#define __FUNC_ALIGN __ALIGN +#define SYM_F_ALIGN __FUNC_ALIGN + +#ifdef __ASSEMBLY__ + #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_RETPOLINE */ @@ -55,7 +59,7 @@ /* SYM_FUNC_START -- use for global functions */ #define SYM_FUNC_START(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ @@ -65,7 +69,7 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #define SYM_FUNC_START_LOCAL(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ @@ -75,7 +79,7 @@ /* SYM_FUNC_START_WEAK -- use for weak functions */ #define SYM_FUNC_START_WEAK(name) \ - SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ From 1934dc9a8a92fda36ab80cfd55edab1708dcdf9a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:10:49 +0200 Subject: [PATCH 0094/4122] x86/error_inject: Align function properly Ensure inline asm functions are consistently aligned with compiler generated and SYM_FUNC_START*() functions. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.930201368@infradead.org --- arch/x86/lib/error-inject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 1e3de0769b81..b5a6d83106bc 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -11,6 +11,7 @@ asm( ".text\n" ".type just_return_func, @function\n" ".globl just_return_func\n" + ASM_FUNC_ALIGN "just_return_func:\n" ANNOTATE_NOENDBR ASM_RET From 1d293758e548aa6ff65e4dd3f5a9bc2a34b38ce3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:50 +0200 Subject: [PATCH 0095/4122] x86/paravirt: Properly align PV functions Ensure inline asm functions are consistently aligned with compiler generated and SYM_FUNC_START*() functions. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220915111144.038540008@infradead.org --- arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/qspinlock_paravirt.h | 2 +- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/paravirt.c | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2a0b8dd4ec33..1be66c15ecbd 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -665,6 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); asm(".pushsection " section ", \"ax\";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ + ASM_FUNC_ALIGN \ PV_THUNK_NAME(func) ":" \ ASM_ENDBR \ FRAME_BEGIN \ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 60ece592b220..082551b3c75e 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -40,7 +40,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); asm (".pushsection .spinlock.text;" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" - ".align 4,0x90;" + ASM_FUNC_ALIGN PV_UNLOCK ": " ASM_ENDBR FRAME_BEGIN diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d4e48b4a438b..95fb85bea111 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -802,6 +802,7 @@ asm( ".pushsection .text;" ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" +ASM_FUNC_ALIGN "__raw_callee_save___kvm_vcpu_is_preempted:" ASM_ENDBR "movq __per_cpu_offset(,%rdi,8), %rax;" diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7ca2d46c08cc..e244c49b52d7 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -40,6 +40,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" + ASM_FUNC_ALIGN "_paravirt_nop:\n\t" ASM_ENDBR ASM_RET @@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" /* stub always returning 0. */ asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" + ASM_FUNC_ALIGN "paravirt_ret0:\n\t" ASM_ENDBR "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" From 67e93ddd5d0b84ac17bddb13d98533e425282421 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:51 +0200 Subject: [PATCH 0096/4122] x86/entry: Align SYM_CODE_START() variants Explicitly align a bunch of commonly called SYM_CODE_START() symbols. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.144068841@infradead.org --- arch/x86/entry/entry_64.S | 16 ++++++++++------ arch/x86/entry/thunk_64.S | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d124..e635f962afb8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -284,7 +284,8 @@ SYM_FUNC_END(__switch_to_asm) * r12: kernel thread arg */ .pushsection .text, "ax" -SYM_CODE_START(ret_from_fork) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(ret_from_fork) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi @@ -600,13 +601,13 @@ SYM_CODE_END(\asmsym) * shared between 32 and 64 bit and emit the __irqentry_text_* markers * so the stacktrace boundary checks work. */ - .align 16 + __ALIGN .globl __irqentry_text_start __irqentry_text_start: #include - .align 16 + __ALIGN .globl __irqentry_text_end __irqentry_text_end: ANNOTATE_NOENDBR @@ -828,7 +829,8 @@ EXPORT_SYMBOL(asm_load_gs_index) * * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) */ -SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) + __FUNC_ALIGN +SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -856,7 +858,8 @@ SYM_CODE_END(exc_xen_hypervisor_callback) * We distinguish between categories by comparing each saved segment register * with its current contents: any discrepancy means we in category 1. */ -SYM_CODE_START(xen_failsafe_callback) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(xen_failsafe_callback) UNWIND_HINT_EMPTY ENDBR movl %ds, %ecx @@ -1516,7 +1519,8 @@ SYM_CODE_END(ignore_sysret) #endif .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_and_make_dead) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index f38b07d2768b..5e37f41e5f14 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -11,7 +11,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func -SYM_FUNC_START_NOALIGN(\name) +SYM_FUNC_START(\name) pushq %rbp movq %rsp, %rbp @@ -36,7 +36,7 @@ SYM_FUNC_END(\name) EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) +SYM_CODE_START_LOCAL(__thunk_restore) popq %r11 popq %r10 popq %r9 From f6dabc817e1f0da8f4088734ad0b9814adad0bce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:52 +0200 Subject: [PATCH 0097/4122] crypto: x86/camellia: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.248229966@infradead.org --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 2 -- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 4 ---- 2 files changed, 6 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2e1658ddbe1a..4a30618281ec 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk16) /* input: * %rdi: ctx, CTX @@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0e4e9abbf4de..deaf62aa73a6 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -221,7 +221,6 @@ * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, @@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, @@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk32) /* input: * %rdi: ctx, CTX @@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk32) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk32) /* input: * %rdi: ctx, CTX From 88cdf02551f9aef9284d778ecd375c400555d900 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:53 +0200 Subject: [PATCH 0098/4122] crypto: x86/cast5: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.353555711@infradead.org --- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b258af420c92..0326a01503c3 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -208,7 +208,6 @@ .text -.align 16 SYM_FUNC_START_LOCAL(__cast5_enc_blk16) /* input: * %rdi: ctx @@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) RET; SYM_FUNC_END(__cast5_enc_blk16) -.align 16 SYM_FUNC_START_LOCAL(__cast5_dec_blk16) /* input: * %rdi: ctx From ba1b270c20dfb7f7b7a076b1a97ef4b7dcb539b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:54 +0200 Subject: [PATCH 0099/4122] crypto: x86/crct10dif-pcl: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.456602381@infradead.org --- arch/x86/crypto/crct10dif-pcl-asm_64.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 721474abfb71..5286db5b8165 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -94,7 +94,6 @@ # # Assumes len >= 16. # -.align 16 SYM_FUNC_START(crc_t10dif_pcl) movdqa .Lbswap_mask(%rip), BSWAP_MASK From 8b44221671ec45d725a4558ff7aa5ea90ecfc885 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:55 +0200 Subject: [PATCH 0100/4122] crypto: x86/serpent: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.558544791@infradead.org --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 2 -- arch/x86/crypto/serpent-avx2-asm_64.S | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 82f2313f512b..97e283621851 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) RET; SYM_FUNC_END(__serpent_enc_blk8_avx) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 8ea34c9b9316..6d60c50593a9 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk16) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) RET; SYM_FUNC_END(__serpent_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk16) /* input: * %rdi: ctx, CTX From c2a3ce6fdb122b12bc4cfffd28ecf8a9fb0d6736 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:56 +0200 Subject: [PATCH 0101/4122] crypto: x86/sha1: Remove custom alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.662580589@infradead.org --- arch/x86/crypto/sha1_ni_asm.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 2f94ec0e763b..cd943b2af2c4 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -92,7 +92,6 @@ * numBlocks: Number of blocks to process */ .text -.align 32 SYM_FUNC_START(sha1_ni_transform) push %rbp mov %rsp, %rbp From 3ba56d0b87113785413dfc5b9910d45001cc4eeb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:57 +0200 Subject: [PATCH 0102/4122] crypto: x86/sha256: Remove custom alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.766564176@infradead.org --- arch/x86/crypto/sha256-avx-asm.S | 1 - arch/x86/crypto/sha256-avx2-asm.S | 1 - arch/x86/crypto/sha256-ssse3-asm.S | 1 - arch/x86/crypto/sha256_ni_asm.S | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 3baa1ec39097..3649370690c5 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -347,7 +347,6 @@ a = TMP_ ######################################################################## .text SYM_FUNC_START(sha256_transform_avx) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9bcdbc47b8b4..c4c1dc5ee078 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -524,7 +524,6 @@ STACK_SIZE = _CTX + _CTX_SIZE ######################################################################## .text SYM_FUNC_START(sha256_transform_rorx) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c4a5db612c32..96b7dcdeaebe 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -356,7 +356,6 @@ a = TMP_ ######################################################################## .text SYM_FUNC_START(sha256_transform_ssse3) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 94d50dd27cb5..b3f1a1a12027 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -96,7 +96,6 @@ */ .text -.align 32 SYM_FUNC_START(sha256_ni_transform) shl $6, NUM_BLKS /* convert to bytes */ From 2f93238b87ddbbe1b050ec48ab5843fc61346adb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:58 +0200 Subject: [PATCH 0103/4122] crypto: x86/sm[34]: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. ( this code couldn't seem to make up it's mind about what alignment it actually wanted, randomly mixing 8 and 16 bytes ) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.868540856@infradead.org --- arch/x86/crypto/sm3-avx-asm_64.S | 1 - arch/x86/crypto/sm4-aesni-avx-asm_64.S | 7 ------- arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 ------ 3 files changed, 14 deletions(-) diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S index b12b9efb5ec5..b28d804ee10d 100644 --- a/arch/x86/crypto/sm3-avx-asm_64.S +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -327,7 +327,6 @@ * void sm3_transform_avx(struct sm3_state *state, * const u8 *data, int nblocks); */ -.align 16 SYM_FUNC_START(sm3_transform_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 4767ab61ff48..e13c8537b2ec 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -139,13 +139,11 @@ .text -.align 16 /* * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt4) /* input: * %rdi: round key array, CTX @@ -249,7 +247,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4) RET; SYM_FUNC_END(sm4_aesni_avx_crypt4) -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) /* input: * %rdi: round key array, CTX @@ -363,7 +360,6 @@ SYM_FUNC_END(__sm4_crypt_blk8) * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt8) /* input: * %rdi: round key array, CTX @@ -419,7 +415,6 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8) * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) /* input: * %rdi: round key array, CTX @@ -494,7 +489,6 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) /* input: * %rdi: round key array, CTX @@ -544,7 +538,6 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) /* input: * %rdi: round key array, CTX diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index 4732fe8bb65b..2212705f7da6 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -153,9 +153,6 @@ .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef .text -.align 16 - -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) /* input: * %rdi: round key array, CTX @@ -281,7 +278,6 @@ SYM_FUNC_END(__sm4_crypt_blk16) * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) /* input: * %rdi: round key array, CTX @@ -394,7 +390,6 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) /* input: * %rdi: round key array, CTX @@ -448,7 +443,6 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) /* input: * %rdi: round key array, CTX From e2c9475e88f70820270ca5cc81a1ae2fda262278 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:10:59 +0200 Subject: [PATCH 0104/4122] crypto: twofish: Remove redundant alignments SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Also, with having pushed the function alignment to 16 bytes, this custom alignment is completely superfluous. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111144.971229477@infradead.org --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 31f9b2ec3857..12fde271cd3f 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -228,7 +228,6 @@ vpxor x2, wkey, x2; \ vpxor x3, wkey, x3; -.align 8 SYM_FUNC_START_LOCAL(__twofish_enc_blk8) /* input: * %rdi: ctx, CTX @@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) RET; SYM_FUNC_END(__twofish_enc_blk8) -.align 8 SYM_FUNC_START_LOCAL(__twofish_dec_blk8) /* input: * %rdi: ctx, CTX From fdc9ee7e97aa2c1dfa7ebb092fffec40ffa59108 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:00 +0200 Subject: [PATCH 0105/4122] crypto: x86/poly1305: Remove custom function alignment SYM_FUNC_START*() and friends already imply alignment, remove custom alignment hacks to make code consistent. This prepares for future function call ABI changes. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.073285765@infradead.org --- arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 2077ce7a5647..b9abcd79c1f4 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -108,7 +108,6 @@ if (!$kernel) { sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { - $code .= ".align $align\n"; $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { From e57ef2ed97c1d078973298658a8096644a1e9e09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:01 +0200 Subject: [PATCH 0106/4122] x86: Put hot per CPU variables into a struct The layout of per-cpu variables is at the mercy of the compiler. This can lead to random performance fluctuations from build to build. Create a structure to hold some of the hottest per-cpu variables, starting with current_task. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.179707194@infradead.org --- arch/x86/include/asm/current.h | 19 ++++++++++++++++--- arch/x86/kernel/cpu/common.c | 14 +++++--------- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 3e204e6140b5..63c42ac3cd86 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -3,16 +3,29 @@ #define _ASM_X86_CURRENT_H #include -#include #ifndef __ASSEMBLY__ + +#include +#include + struct task_struct; -DECLARE_PER_CPU(struct task_struct *, current_task); +struct pcpu_hot { + union { + struct { + struct task_struct *current_task; + }; + u8 pad[64]; + }; +}; +static_assert(sizeof(struct pcpu_hot) == 64); + +DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); static __always_inline struct task_struct *get_current(void) { - return this_cpu_read_stable(current_task); + return this_cpu_read_stable(pcpu_hot.current_task); } #define current get_current() diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8e873181759a..52071539a14c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2012,18 +2012,16 @@ static __init int setup_clearcpuid(char *arg) } __setup("clearcpuid=", setup_clearcpuid); +DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { + .current_task = &init_task, +}; +EXPORT_PER_CPU_SYMBOL(pcpu_hot); + #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); -/* - * The following percpu variables are hot. Align current_task to - * cacheline size such that they fall in the same cacheline. - */ -DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = - &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(void *, hardirq_stack_ptr); DEFINE_PER_CPU(bool, hardirq_stack_inuse); @@ -2083,8 +2081,6 @@ void syscall_init(void) #else /* CONFIG_X86_64 */ -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2f314b170c9f..807da45d84c7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - this_cpu_write(current_task, next_p); + raw_cpu_write(pcpu_hot.current_task, next_p); switch_fpu_finish(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6b3418bff326..c4f6cacf6599 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -617,7 +617,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - this_cpu_write(current_task, next_p); + raw_cpu_write(pcpu_hot.current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ce8728d2e5ef..05f315777691 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1046,7 +1046,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); - per_cpu(current_task, cpu) = idle; + per_cpu(pcpu_hot.current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle); /* Initialize the interrupt stack(s) */ From 64701838bf0575ef8acb1ad2db5934e864f3e6c3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:02 +0200 Subject: [PATCH 0107/4122] x86/percpu: Move preempt_count next to current_task Add preempt_count to pcpu_hot, since it is once of the most used per-cpu variables. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.284170644@infradead.org --- arch/x86/include/asm/current.h | 1 + arch/x86/include/asm/preempt.h | 27 ++++++++++++++------------- arch/x86/kernel/cpu/common.c | 8 +------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 63c42ac3cd86..0f4b46293c6c 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -15,6 +15,7 @@ struct pcpu_hot { union { struct { struct task_struct *current_task; + int preempt_count; }; u8 pad[64]; }; diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 5f6daea1ee24..2d13f25b1bd8 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -4,11 +4,11 @@ #include #include +#include + #include #include -DECLARE_PER_CPU(int, __preempt_count); - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -24,7 +24,7 @@ DECLARE_PER_CPU(int, __preempt_count); */ static __always_inline int preempt_count(void) { - return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) @@ -32,10 +32,10 @@ static __always_inline void preempt_count_set(int pc) int old, new; do { - old = raw_cpu_read_4(__preempt_count); + old = raw_cpu_read_4(pcpu_hot.preempt_count); new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old); + } while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old); } /* @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ + per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* @@ -58,17 +58,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -77,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - raw_cpu_add_4(__preempt_count, val); + raw_cpu_add_4(pcpu_hot.preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - raw_cpu_add_4(__preempt_count, -val); + raw_cpu_add_4(pcpu_hot.preempt_count, -val); } /* @@ -92,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); + return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, + __percpu_arg([var])); } /* @@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); + return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 52071539a14c..cafb6bd90d10 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2014,6 +2014,7 @@ __setup("clearcpuid=", setup_clearcpuid); DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { .current_task = &init_task, + .preempt_count = INIT_PREEMPT_COUNT, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); @@ -2022,13 +2023,9 @@ DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); - DEFINE_PER_CPU(void *, hardirq_stack_ptr); DEFINE_PER_CPU(bool, hardirq_stack_inuse); -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; static void wrmsrl_cstar(unsigned long val) @@ -2081,9 +2078,6 @@ void syscall_init(void) #else /* CONFIG_X86_64 */ -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - /* * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find * the top of the kernel stack. Use an extra percpu variable to track the From 7443b296e699e6922f5be243c8d2e316de8cacbe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:03 +0200 Subject: [PATCH 0108/4122] x86/percpu: Move cpu_number next to current_task Also add cpu_number to the pcpu_hot structure, it is often referenced and this cacheline is there. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.387678283@infradead.org --- arch/x86/include/asm/current.h | 1 + arch/x86/include/asm/smp.h | 12 +++++------- arch/x86/kernel/setup_percpu.c | 5 +---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0f4b46293c6c..8ac6589e9a1b 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -16,6 +16,7 @@ struct pcpu_hot { struct { struct task_struct *current_task; int preempt_count; + int cpu_number; }; u8 pad[64]; }; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a73bced40e24..b4dbb20dab1a 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -3,10 +3,10 @@ #define _ASM_X86_SMP_H #ifndef __ASSEMBLY__ #include -#include -#include #include +#include +#include extern int smp_num_siblings; extern unsigned int num_processors; @@ -19,7 +19,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); @@ -150,11 +149,10 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); /* * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. + * from the initial startup. */ -#define raw_smp_processor_id() this_cpu_read(cpu_number) -#define __smp_processor_id() __this_cpu_read(cpu_number) +#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number) +#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number) #ifdef CONFIG_X86_32 extern int safe_smp_processor_id(void); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 555089a5b446..c2fc4c41c164 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,9 +23,6 @@ #include #include -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@ -172,7 +169,7 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(cpu_number, cpu) = cpu; + per_cpu(pcpu_hot.cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the From c063a217bc0726c2560138229de5673dbb253a02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:04 +0200 Subject: [PATCH 0109/4122] x86/percpu: Move current_top_of_stack next to current_task Extend the struct pcpu_hot cacheline with current_top_of_stack; another very frequently used value. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.493038635@infradead.org --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/entry/entry_64_compat.S | 6 +++--- arch/x86/include/asm/current.h | 1 + arch/x86/include/asm/processor.h | 4 +--- arch/x86/kernel/asm-offsets.c | 2 ++ arch/x86/kernel/cpu/common.c | 12 +----------- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- 11 files changed, 19 insertions(+), 28 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e7156038..91397f58ac30 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi) * is using the thread stack right now, so it's safe for us to use it. */ movl %esp, %ebx - movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp call exc_nmi movl %ebx, %esp @@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp call make_task_dead diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e635f962afb8..9249a45cf53f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64) /* tss.sp2 is scratch space. */ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -1209,7 +1209,7 @@ SYM_CODE_START(asm_exc_nmi) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1525,7 +1525,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movq PER_CPU_VAR(cpu_current_top_of_stack), %rax + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4dd19819053a..1dfee868d4a1 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rax popq %rax - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -191,7 +191,7 @@ SYM_CODE_START(entry_SYSCALL_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp /* Switch to the kernel stack */ - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -332,7 +332,7 @@ SYM_CODE_START(entry_INT80_compat) ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV movq %rsp, %rax - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp pushq 5*8(%rax) /* regs->ss */ pushq 4*8(%rax) /* regs->rsp */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 8ac6589e9a1b..2dd013128f1e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -17,6 +17,7 @@ struct pcpu_hot { struct task_struct *current_task; int preempt_count; int cpu_number; + unsigned long top_of_stack; }; u8 pad[64]; }; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c660700ecfc6..c345f3096c80 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -426,8 +426,6 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); -DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); - #ifdef CONFIG_X86_64 struct fixed_percpu_data { /* @@ -566,7 +564,7 @@ static __always_inline unsigned long current_top_of_stack(void) * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ - return this_cpu_read_stable(cpu_current_top_of_stack); + return this_cpu_read_stable(pcpu_hot.top_of_stack); } static __always_inline bool on_thread_stack(void) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cb50589a7102..a9824318e1c5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -109,6 +109,8 @@ static void __used common(void) OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); + if (IS_ENABLED(CONFIG_KVM_INTEL)) { BLANK(); OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cafb6bd90d10..408245c2eead 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2015,6 +2015,7 @@ __setup("clearcpuid=", setup_clearcpuid); DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { .current_task = &init_task, .preempt_count = INIT_PREEMPT_COUNT, + .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); @@ -2026,8 +2027,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); DEFINE_PER_CPU(void *, hardirq_stack_ptr); DEFINE_PER_CPU(bool, hardirq_stack_inuse); -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; - static void wrmsrl_cstar(unsigned long val) { /* @@ -2078,15 +2077,6 @@ void syscall_init(void) #else /* CONFIG_X86_64 */ -/* - * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find - * the top of the kernel stack. Use an extra percpu variable to track the - * top of the kernel stack directly. - */ -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); - #ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, __stack_chk_guard); EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 807da45d84c7..470c128759ea 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0 and cpu_current_top_of_stack. This changes + * Reload esp0 and pcpu_hot.top_of_stack. This changes * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ update_task_stack(next_p); refresh_sysenter_cs(next); - this_cpu_write(cpu_current_top_of_stack, + this_cpu_write(pcpu_hot.top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c4f6cacf6599..7f807e8bc923 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -618,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch the PDA and FPU contexts. */ raw_cpu_write(pcpu_hot.current_task, next_p); - this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 05f315777691..87863a93e918 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1056,7 +1056,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 178015a820f0..7ac19aba8983 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -851,7 +851,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) */ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; if (regs != eregs) *regs = *eregs; return regs; @@ -869,7 +869,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * trust it and switch to the current kernel stack */ if (ip_within_syscall_gap(regs)) { - sp = this_cpu_read(cpu_current_top_of_stack); + sp = this_cpu_read(pcpu_hot.top_of_stack); goto sync; } From d7b6d709a76a4f4ef3108ac41e1b39eb80f5c084 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:05 +0200 Subject: [PATCH 0110/4122] x86/percpu: Move irq_stack variables next to current_task Further extend struct pcpu_hot with the hard and soft irq stack pointers. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.599170752@infradead.org --- arch/x86/include/asm/current.h | 6 ++++++ arch/x86/include/asm/irq_stack.h | 12 ++++++------ arch/x86/include/asm/processor.h | 4 ---- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/dumpstack_32.c | 4 ++-- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/irq_32.c | 13 +++++-------- arch/x86/kernel/irq_64.c | 6 +++--- arch/x86/kernel/process_64.c | 2 +- 9 files changed, 24 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 2dd013128f1e..ac3090ddf34e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -18,6 +18,12 @@ struct pcpu_hot { int preempt_count; int cpu_number; unsigned long top_of_stack; + void *hardirq_stack_ptr; +#ifdef CONFIG_X86_64 + bool hardirq_stack_inuse; +#else + void *softirq_stack_ptr; +#endif }; u8 pad[64]; }; diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 147cb8fdda92..798183867d78 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -116,7 +116,7 @@ ASM_CALL_ARG2 #define call_on_irqstack(func, asm_call, argconstr...) \ - call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ + call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \ func, asm_call, argconstr) /* Macros to assert type correctness for run_*_on_irqstack macros */ @@ -135,7 +135,7 @@ * User mode entry and interrupt on the irq stack do not \ * switch stacks. If from user mode the task stack is empty. \ */ \ - if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \ irq_enter_rcu(); \ func(c_args); \ irq_exit_rcu(); \ @@ -146,9 +146,9 @@ * places. Invoke the stack switch macro with the call \ * sequence which matches the above direct invocation. \ */ \ - __this_cpu_write(hardirq_stack_inuse, true); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ call_on_irqstack(func, asm_call, constr); \ - __this_cpu_write(hardirq_stack_inuse, false); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ } \ } @@ -212,9 +212,9 @@ */ #define do_softirq_own_stack() \ { \ - __this_cpu_write(hardirq_stack_inuse, true); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ - __this_cpu_write(hardirq_stack_inuse, false); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ } #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c345f3096c80..bdde68744eb3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -448,8 +448,6 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } -DECLARE_PER_CPU(void *, hardirq_stack_ptr); -DECLARE_PER_CPU(bool, hardirq_stack_inuse); extern asmlinkage void ignore_sysret(void); /* Save actual FS/GS selectors and bases to current->thread */ @@ -458,8 +456,6 @@ void current_save_fsgs(void); #ifdef CONFIG_STACKPROTECTOR DECLARE_PER_CPU(unsigned long, __stack_chk_guard); #endif -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); -DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* !X86_64 */ struct perf_event; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 408245c2eead..2bec4b4b2c50 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2024,9 +2024,6 @@ DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); -DEFINE_PER_CPU(void *, hardirq_stack_ptr); -DEFINE_PER_CPU(bool, hardirq_stack_inuse); - static void wrmsrl_cstar(unsigned long val) { /* diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 722fd712e1cf..b4905d5173fd 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* @@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6c5defd6569a..f05339fee778 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); + unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); unsigned long *begin; /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 01833ebf5e8e..dc1049c01f9b 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -52,9 +52,6 @@ static inline int check_stack_overflow(void) { return 0; } static inline void print_stack_overflow(void) { } #endif -DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); -DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr); - static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" @@ -77,7 +74,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); - irqstk = __this_cpu_read(hardirq_stack_ptr); + irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); /* * this is where we switch to the IRQ stack. However, if we are @@ -115,7 +112,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) int node = cpu_to_node(cpu); struct page *ph, *ps; - if (per_cpu(hardirq_stack_ptr, cpu)) + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) return 0; ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); @@ -127,8 +124,8 @@ int irq_init_percpu_irqstack(unsigned int cpu) return -ENOMEM; } - per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); - per_cpu(softirq_stack_ptr, cpu) = page_address(ps); + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph); + per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps); return 0; } @@ -138,7 +135,7 @@ void do_softirq_own_stack(void) struct irq_stack *irqstk; u32 *isp, *prev_esp; - irqstk = __this_cpu_read(softirq_stack_ptr); + irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr); /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1c0fb96b9e39..fe0c859873d1 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -50,7 +50,7 @@ static int map_irq_stack(unsigned int cpu) return -ENOMEM; /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #else @@ -63,14 +63,14 @@ static int map_irq_stack(unsigned int cpu) void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #endif int irq_init_percpu_irqstack(unsigned int cpu) { - if (per_cpu(hardirq_stack_ptr, cpu)) + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) return 0; return map_irq_stack(cpu); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7f807e8bc923..1312de5b76aa 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -563,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(hardirq_stack_inuse)); + this_cpu_read(pcpu_hot.hardirq_stack_inuse)); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_fpu, cpu); From 7fcecafebed90d03f35bec6e147fc0b5f6e1bc71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:06 +0200 Subject: [PATCH 0111/4122] x86/softirq: Move softirq pending next to current task Another hot variable which is strict per CPU and benefits from being in the same cache line. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.702133710@infradead.org --- arch/x86/include/asm/current.h | 1 + arch/x86/include/asm/hardirq.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index ac3090ddf34e..b89aba077b84 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -19,6 +19,7 @@ struct pcpu_hot { int cpu_number; unsigned long top_of_stack; void *hardirq_stack_ptr; + u16 softirq_pending; #ifdef CONFIG_X86_64 bool hardirq_stack_inuse; #else diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 275e7fd20310..66837b8c67f1 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,9 +3,9 @@ #define _ASM_X86_HARDIRQ_H #include +#include typedef struct { - u16 __softirq_pending; #if IS_ENABLED(CONFIG_KVM_INTEL) u8 kvm_cpu_l1tf_flush_l1d; #endif @@ -60,6 +60,7 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat +#define local_softirq_pending_ref pcpu_hot.softirq_pending #if IS_ENABLED(CONFIG_KVM_INTEL) static inline void kvm_set_cpu_l1tf_flush_l1d(void) From 5b71ac8a2a3185da34a6556e791b533b48183a41 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Mon, 17 Oct 2022 16:41:06 +0200 Subject: [PATCH 0112/4122] x86: Fixup asm-offsets duplicate It turns out that 'stack_canary_offset' is a variable name; shadowing that with a #define is ripe of fail when the asm-offsets.h header gets included. Rename the thing. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/asm-offsets_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9249a45cf53f..5c578a7dfcd7 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary #endif /* diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 9b698215d261..bb65371ea9df 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -57,7 +57,7 @@ int main(void) BLANK(); #ifdef CONFIG_STACKPROTECTOR - DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); + OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary); BLANK(); #endif return 0; From 61c6065ef7ec0447a280179d04b2d81c80c2f479 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:07 +0200 Subject: [PATCH 0113/4122] objtool: Allow !PC relative relocations Objtool doesn't currently much like per-cpu usage in alternatives: arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0xf: unsupported relocation in alternatives section f: 65 c7 04 25 00 00 00 00 00 00 00 80 movl $0x80000000,%gs:0x0 13: R_X86_64_32S __x86_call_depth Since the R_X86_64_32S relocation is location invariant (it's computation doesn't include P - the address of the location itself), it can be trivially allowed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.806607235@infradead.org --- tools/objtool/arch/x86/decode.c | 24 ++++++++++++++++++++++++ tools/objtool/check.c | 2 +- tools/objtool/include/objtool/arch.h | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1c253b4b7ce0..f0943830add7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -73,6 +73,30 @@ unsigned long arch_jump_destination(struct instruction *insn) return insn->offset + insn->len + insn->immediate; } +bool arch_pc_relative_reloc(struct reloc *reloc) +{ + /* + * All relocation types where P (the address of the target) + * is included in the computation. + */ + switch (reloc->type) { + case R_X86_64_PC8: + case R_X86_64_PC16: + case R_X86_64_PC32: + case R_X86_64_PC64: + + case R_X86_64_PLT32: + case R_X86_64_GOTPC32: + case R_X86_64_GOTPCREL: + return true; + + default: + break; + } + + return false; +} + #define ADD_OP(op) \ if (!(op = calloc(1, sizeof(*op)))) \ return -1; \ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43ec14c29a60..7174bba14494 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1645,7 +1645,7 @@ static int handle_group_alt(struct objtool_file *file, * accordingly. */ alt_reloc = insn_reloc(file, insn); - if (alt_reloc && + if (alt_reloc && arch_pc_relative_reloc(alt_reloc) && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { WARN_FUNC("unsupported relocation in alternatives section", diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index beb2f3aa94ff..fe2ea4b892c3 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -93,4 +93,6 @@ bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); +bool arch_pc_relative_reloc(struct reloc *reloc); + #endif /* _ARCH_H */ From 6644ee846cb983437063da8fd24b7cae671fd019 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:08 +0200 Subject: [PATCH 0114/4122] objtool: Track init section For future usage of .init.text exclusion track the init section in the instruction decoder and use the result in retpoline validation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111145.910334431@infradead.org --- tools/objtool/check.c | 17 ++++++++++------- tools/objtool/include/objtool/elf.h | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7174bba14494..bb7c8196bf57 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -382,6 +382,15 @@ static int decode_instructions(struct objtool_file *file) !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; + /* + * .init.text code is ran before userspace and thus doesn't + * strictly need retpolines, except for modules which are + * loaded late, they very much do need retpoline in their + * .init.text + */ + if (!strcmp(sec->name, ".init.text") && !opts.module) + sec->init = true; + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { @@ -3748,13 +3757,7 @@ static int validate_retpoline(struct objtool_file *file) if (insn->retpoline_safe) continue; - /* - * .init.text code is ran before userspace and thus doesn't - * strictly need retpolines, except for modules which are - * loaded late, they very much do need retpoline in their - * .init.text - */ - if (!strcmp(insn->sec->name, ".init.text") && !opts.module) + if (insn->sec->init) continue; if (insn->type == INSN_RETURN) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 16f4067b82ae..baa808583c4f 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -38,7 +38,7 @@ struct section { Elf_Data *data; char *name; int idx; - bool changed, text, rodata, noinstr; + bool changed, text, rodata, noinstr, init; }; struct symbol { From 00abd38408127a57861698a8bffba65849de6bbd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:09 +0200 Subject: [PATCH 0115/4122] objtool: Add .call_sites section In preparation for call depth tracking provide a section which collects all direct calls. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.016511961@infradead.org --- arch/x86/kernel/vmlinux.lds.S | 7 ++++ tools/objtool/check.c | 51 +++++++++++++++++++++++++ tools/objtool/include/objtool/objtool.h | 1 + tools/objtool/objtool.c | 1 + 4 files changed, 60 insertions(+) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0e9fc080c417..b69df9e013cc 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -291,6 +291,13 @@ SECTIONS *(.return_sites) __return_sites_end = .; } + + . = ALIGN(8); + .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) { + __call_sites = .; + *(.call_sites) + __call_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bb7c8196bf57..f578e030e8bb 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -902,6 +902,49 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; } +static int create_direct_call_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + unsigned int *loc; + int idx; + + sec = find_section_by_name(file->elf, ".call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->call_list); + WARN("file already has .call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->call_list, call_node) + idx++; + + sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx); + if (!sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->call_list, call_node) { + + loc = (unsigned int *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned int)); + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned int), + R_X86_64_PC32, + insn->sec, insn->offset)) + return -1; + + idx++; + } + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -1279,6 +1322,9 @@ static void annotate_call_site(struct objtool_file *file, return; } + if (insn->type == INSN_CALL && !insn->sec->init) + list_add_tail(&insn->call_node, &file->call_list); + if (!sibling && dead_end_function(file, sym)) insn->dead_end = true; } @@ -4305,6 +4351,11 @@ int check(struct objtool_file *file) if (ret < 0) goto out; warnings += ret; + + ret = create_direct_call_sections(file); + if (ret < 0) + goto out; + warnings += ret; } if (opts.mcount) { diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 7f2d1b095333..6b40977bcdb1 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -28,6 +28,7 @@ struct objtool_file { struct list_head static_call_list; struct list_head mcount_loc_list; struct list_head endbr_list; + struct list_head call_list; bool ignore_unreachables, hints, rodata; unsigned int nr_endbr; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index a7ecc32e3512..6affd8067f83 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -106,6 +106,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); + INIT_LIST_HEAD(&file.call_list); file.ignore_unreachables = opts.no_unreachable; file.hints = false; From 0c0a6d8934e2081df93ba0bfc0cf615cc9c06988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:10 +0200 Subject: [PATCH 0116/4122] objtool: Add --hacks=skylake Make the call/func sections selectable via the --hacks option. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.120821440@infradead.org --- scripts/Makefile.lib | 1 + tools/objtool/builtin-check.c | 7 ++++++- tools/objtool/check.c | 10 ++++++---- tools/objtool/include/objtool/builtin.h | 1 + 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3aa384cec76b..85f02756dc9c 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -254,6 +254,7 @@ objtool := $(objtree)/tools/objtool/objtool objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr +objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount objtool-args-$(CONFIG_UNWINDER_ORC) += --orc diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 24fbe803a0d3..0a04f8ea4432 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -57,12 +57,17 @@ static int parse_hacks(const struct option *opt, const char *str, int unset) found = true; } + if (!str || strstr(str, "skylake")) { + opts.hack_skylake = true; + found = true; + } + return found ? 0 : -1; } const struct option check_options[] = { OPT_GROUP("Actions:"), - OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks), + OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks), OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"), OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"), OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f578e030e8bb..1461c8894fb7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4352,10 +4352,12 @@ int check(struct objtool_file *file) goto out; warnings += ret; - ret = create_direct_call_sections(file); - if (ret < 0) - goto out; - warnings += ret; + if (opts.hack_skylake) { + ret = create_direct_call_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } } if (opts.mcount) { diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 42a52f1a0add..22092a9f3cf6 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -14,6 +14,7 @@ struct opts { bool dump_orc; bool hack_jump_label; bool hack_noinstr; + bool hack_skylake; bool ibt; bool mcount; bool noinstr; From 5da6aea375cde499fdfac3cde4f26df4a840eb9f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:12 +0200 Subject: [PATCH 0117/4122] objtool: Fix find_{symbol,func}_containing() The current find_{symbol,func}_containing() functions are broken in the face of overlapping symbols, exactly the case that is needed for a new ibt/endbr supression. Import interval_tree_generic.h into the tools tree and convert the symbol tree to an interval tree to support proper range stabs. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.330203761@infradead.org --- tools/include/linux/interval_tree_generic.h | 187 ++++++++++++++++++++ tools/objtool/elf.c | 95 +++++----- tools/objtool/include/objtool/elf.h | 3 +- 3 files changed, 230 insertions(+), 55 deletions(-) create mode 100644 tools/include/linux/interval_tree_generic.h diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h new file mode 100644 index 000000000000..aaa8a0767aa3 --- /dev/null +++ b/tools/include/linux/interval_tree_generic.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + + include/linux/interval_tree_generic.h +*/ + +#include + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \ + ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + bool leftmost = true; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else { \ + link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node, *leftmost; \ + \ + if (!root->rb_root.rb_node) \ + return NULL; \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7e24b09b1163..89b37cd4ab1d 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -50,39 +51,23 @@ static inline u32 str_hash(const char *str) __elf_table(name); \ }) -static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b) +static inline unsigned long __sym_start(struct symbol *s) { - struct symbol *sa = rb_entry(a, struct symbol, node); - struct symbol *sb = rb_entry(b, struct symbol, node); - - if (sa->offset < sb->offset) - return true; - if (sa->offset > sb->offset) - return false; - - if (sa->len < sb->len) - return true; - if (sa->len > sb->len) - return false; - - sa->alias = sb; - - return false; + return s->offset; } -static int symbol_by_offset(const void *key, const struct rb_node *node) +static inline unsigned long __sym_last(struct symbol *s) { - const struct symbol *s = rb_entry(node, struct symbol, node); - const unsigned long *o = key; - - if (*o < s->offset) - return -1; - if (*o >= s->offset + s->len) - return 1; - - return 0; + return s->offset + s->len - 1; } +INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last, + __sym_start, __sym_last, static, __sym) + +#define __sym_for_each(_iter, _tree, _start, _end) \ + for (_iter = __sym_iter_first((_tree), (_start), (_end)); \ + _iter; _iter = __sym_iter_next(_iter, (_start), (_end))) + struct symbol_hole { unsigned long key; const struct symbol *sym; @@ -147,13 +132,12 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) { - struct rb_node *node; + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); - - if (s->offset == offset && s->type != STT_SECTION) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->offset == offset && iter->type != STT_SECTION) + return iter; } return NULL; @@ -161,13 +145,12 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) { - struct rb_node *node; + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); - - if (s->offset == offset && s->type == STT_FUNC) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->offset == offset && iter->type == STT_FUNC) + return iter; } return NULL; @@ -175,13 +158,12 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset) { - struct rb_node *node; + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); - - if (s->type != STT_SECTION) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->type != STT_SECTION) + return iter; } return NULL; @@ -202,7 +184,7 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) /* * Find the rightmost symbol for which @offset is after it. */ - n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset); + n = rb_find(&hole, &sec->symbol_tree.rb_root, symbol_hole_by_offset); /* found a symbol that contains @offset */ if (n) @@ -224,13 +206,12 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) struct symbol *find_func_containing(struct section *sec, unsigned long offset) { - struct rb_node *node; + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); - - if (s->type == STT_FUNC) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->type == STT_FUNC) + return iter; } return NULL; @@ -373,6 +354,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) { struct list_head *entry; struct rb_node *pnode; + struct symbol *iter; INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; @@ -386,7 +368,12 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) sym->offset = sym->sym.st_value; sym->len = sym->sym.st_size; - rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); + __sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) { + if (iter->offset == sym->offset && iter->type == sym->type) + iter->alias = sym; + } + + __sym_insert(sym, &sym->sec->symbol_tree); pnode = rb_prev(&sym->node); if (pnode) entry = &rb_entry(pnode, struct symbol, node)->list; @@ -401,7 +388,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) * can exist within a function, confusing the sorting. */ if (!sym->len) - rb_erase(&sym->node, &sym->sec->symbol_tree); + __sym_remove(sym, &sym->sec->symbol_tree); } static int read_symbols(struct elf *elf) diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index baa808583c4f..d28533106b78 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -30,7 +30,7 @@ struct section { struct hlist_node hash; struct hlist_node name_hash; GElf_Shdr sh; - struct rb_root symbol_tree; + struct rb_root_cached symbol_tree; struct list_head symbol_list; struct list_head reloc_list; struct section *base, *reloc; @@ -53,6 +53,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + unsigned long __subtree_last; struct symbol *pfunc, *cfunc, *alias; u8 uaccess_safe : 1; u8 static_call_tramp : 1; From 08ef8c40112b8cd157515cd532f65cb82c934a76 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:13 +0200 Subject: [PATCH 0118/4122] objtool: Allow symbol range comparisons for IBT/ENDBR A semi common pattern is where code checks if a code address is within a specific range. All text addresses require either ENDBR or ANNOTATE_ENDBR, however the ANNOTATE_NOENDBR past the range is unnatural. Instead, suppress this warning when this is exactly at the end of a symbol that itself starts with either ENDBR/ANNOTATE_ENDBR. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.434642471@infradead.org --- arch/x86/entry/entry_64_compat.S | 1 - tools/objtool/check.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 1dfee868d4a1..bc45ea7d08ee 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -128,7 +128,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) - ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1461c8894fb7..3f46f46ab85c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4033,6 +4033,24 @@ static void mark_endbr_used(struct instruction *insn) list_del_init(&insn->call_node); } +static bool noendbr_range(struct objtool_file *file, struct instruction *insn) +{ + struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1); + struct instruction *first; + + if (!sym) + return false; + + first = find_insn(file, sym->sec, sym->offset); + if (!first) + return false; + + if (first->type != INSN_ENDBR && !first->noendbr) + return false; + + return insn->offset == sym->offset + sym->len; +} + static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) { struct instruction *dest; @@ -4105,9 +4123,19 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; } + /* + * Accept anything ANNOTATE_NOENDBR. + */ if (dest->noendbr) continue; + /* + * Accept if this is the instruction after a symbol + * that is (no)endbr -- typical code-range usage. + */ + if (noendbr_range(file, dest)) + continue; + WARN_FUNC("relocation to !ENDBR: %s", insn->sec, insn->offset, offstr(dest->sec, dest->offset)); From dbcdbdfdf137b49144204571f1a5e5dc01b8aaad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 22 Sep 2022 22:03:50 +0200 Subject: [PATCH 0119/4122] objtool: Rework instruction -> symbol mapping Currently insn->func contains a instruction -> symbol link for STT_FUNC symbols. A NULL value is assumed to mean STT_NOTYPE. However, there are also instructions not covered by any symbol at all. This can happen due to __weak symbols for example. Since the current scheme cannot differentiate between no symbol and STT_NOTYPE symbol, change things around. Make insn->sym point to any symbol type such that !insn->sym means no symbol and add a helper insn_func() that check the sym->type to retain the old functionality. This then prepares the way to add code that depends on the distinction between STT_NOTYPE and no symbol at all. Signed-off-by: Peter Zijlstra (Intel) --- tools/objtool/check.c | 105 ++++++++++++++------------ tools/objtool/include/objtool/check.h | 12 ++- 2 files changed, 66 insertions(+), 51 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3f46f46ab85c..e532efb9b5ab 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -62,12 +62,12 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, struct instruction *insn) { struct instruction *next = list_next_entry(insn, list); - struct symbol *func = insn->func; + struct symbol *func = insn_func(insn); if (!func) return NULL; - if (&next->list != &file->insn_list && next->func == func) + if (&next->list != &file->insn_list && insn_func(next) == func) return next; /* Check if we're already in the subfunction: */ @@ -83,7 +83,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, { struct instruction *prev = list_prev_entry(insn, list); - if (&prev->list != &file->insn_list && prev->func == insn->func) + if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn)) return prev; return NULL; @@ -133,7 +133,7 @@ static bool is_sibling_call(struct instruction *insn) * sibling call detection consistency between vmlinux.o and individual * objects. */ - if (!insn->func) + if (!insn_func(insn)) return false; /* An indirect jump is either a sibling call or a jump to a table. */ @@ -207,7 +207,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; insn = find_insn(file, func->sec, func->offset); - if (!insn->func) + if (!insn_func(insn)) return false; func_for_each_insn(file, func, insn) { @@ -243,7 +243,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; } - return __dead_end_function(file, dest->func, recursion+1); + return __dead_end_function(file, insn_func(dest), recursion+1); } } @@ -427,7 +427,10 @@ static int decode_instructions(struct objtool_file *file) } list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC || func->alias != func) + if (func->type != STT_NOTYPE && func->type != STT_FUNC) + continue; + + if (func->return_thunk || func->alias != func) continue; if (!find_insn(file, sec, func->offset)) { @@ -437,9 +440,11 @@ static int decode_instructions(struct objtool_file *file) } sym_for_each_insn(file, func, insn) { - insn->func = func; - if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { - if (insn->offset == insn->func->offset) { + insn->sym = func; + if (func->type == STT_FUNC && + insn->type == INSN_ENDBR && + list_empty(&insn->call_node)) { + if (insn->offset == func->offset) { list_add_tail(&insn->call_node, &file->endbr_list); file->nr_endbr++; } else { @@ -1397,19 +1402,19 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn, static bool same_function(struct instruction *insn1, struct instruction *insn2) { - return insn1->func->pfunc == insn2->func->pfunc; + return insn_func(insn1)->pfunc == insn_func(insn2)->pfunc; } static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) { - if (insn->offset == insn->func->offset) + if (insn->offset == insn_func(insn)->offset) return true; if (opts.ibt) { struct instruction *prev = prev_insn_same_sym(file, insn); if (prev && prev->type == INSN_ENDBR && - insn->offset == insn->func->offset + prev->len) + insn->offset == insn_func(insn)->offset + prev->len) return true; } @@ -1450,7 +1455,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->return_thunk) { add_return_call(file, insn, true); continue; - } else if (insn->func) { + } else if (insn_func(insn)) { /* * External sibling call or internal sibling call with * STT_FUNC reloc. @@ -1492,8 +1497,8 @@ static int add_jump_destinations(struct objtool_file *file) /* * Cross-function jump. */ - if (insn->func && jump_dest->func && - insn->func != jump_dest->func) { + if (insn_func(insn) && insn_func(jump_dest) && + insn_func(insn) != insn_func(jump_dest)) { /* * For GCC 8+, create parent/child links for any cold @@ -1510,10 +1515,10 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn->func->name, ".cold") && - strstr(jump_dest->func->name, ".cold")) { - insn->func->cfunc = jump_dest->func; - jump_dest->func->pfunc = insn->func; + if (!strstr(insn_func(insn)->name, ".cold") && + strstr(insn_func(jump_dest)->name, ".cold")) { + insn_func(insn)->cfunc = insn_func(jump_dest); + insn_func(jump_dest)->pfunc = insn_func(insn); } else if (!same_function(insn, jump_dest) && is_first_func_insn(file, jump_dest)) { @@ -1521,7 +1526,7 @@ static int add_jump_destinations(struct objtool_file *file) * Internal sibling call without reloc or with * STT_SECTION reloc. */ - add_call_dest(file, insn, jump_dest->func, true); + add_call_dest(file, insn, insn_func(jump_dest), true); continue; } } @@ -1572,7 +1577,7 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - if (insn->func && insn->call_dest->type != STT_FUNC) { + if (insn_func(insn) && insn->call_dest->type != STT_FUNC) { WARN_FUNC("unsupported call to non-function", insn->sec, insn->offset); return -1; @@ -1668,7 +1673,7 @@ static int handle_group_alt(struct objtool_file *file, nop->offset = special_alt->new_off + special_alt->new_len; nop->len = special_alt->orig_len - special_alt->new_len; nop->type = INSN_NOP; - nop->func = orig_insn->func; + nop->sym = orig_insn->sym; nop->alt_group = new_alt_group; nop->ignore = orig_insn->ignore_alts; } @@ -1688,7 +1693,7 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; insn->ignore = orig_insn->ignore_alts; - insn->func = orig_insn->func; + insn->sym = orig_insn->sym; insn->alt_group = new_alt_group; /* @@ -1882,7 +1887,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *reloc = table; struct instruction *dest_insn; struct alternative *alt; - struct symbol *pfunc = insn->func->pfunc; + struct symbol *pfunc = insn_func(insn)->pfunc; unsigned int prev_offset = 0; /* @@ -1909,7 +1914,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, break; /* Make sure the destination is in the same function: */ - if (!dest_insn->func || dest_insn->func->pfunc != pfunc) + if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -1949,7 +1954,7 @@ static struct reloc *find_jump_table(struct objtool_file *file, * it. */ for (; - insn && insn->func && insn->func->pfunc == func; + insn && insn_func(insn) && insn_func(insn)->pfunc == func; insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) @@ -1966,7 +1971,7 @@ static struct reloc *find_jump_table(struct objtool_file *file, if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); - if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) + if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; return table_reloc; @@ -2415,6 +2420,13 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_{jump_call}_destination. + */ + ret = classify_symbols(file); + if (ret) + return ret; + ret = decode_instructions(file); if (ret) return ret; @@ -2433,13 +2445,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be before add_{jump_call}_destination. - */ - ret = classify_symbols(file); - if (ret) - return ret; - /* * Must be before add_jump_destinations(), which depends on 'func' * being set for alternatives, to enable proper sibling call detection. @@ -2648,7 +2653,7 @@ static int update_cfi_state(struct instruction *insn, /* stack operations don't make sense with an undefined CFA */ if (cfa->base == CFI_UNDEFINED) { - if (insn->func) { + if (insn_func(insn)) { WARN_FUNC("undefined stack state", insn->sec, insn->offset); return -1; } @@ -2994,7 +2999,7 @@ static int update_cfi_state(struct instruction *insn, } /* detect when asm code uses rbp as a scratch register */ - if (opts.stackval && insn->func && op->src.reg == CFI_BP && + if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP && cfa->base != CFI_BP) cfi->bp_scratch = true; break; @@ -3390,13 +3395,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, while (1) { next_insn = next_insn_to_validate(file, insn); - if (func && insn->func && func != insn->func->pfunc) { + if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { /* Ignore KCFI type preambles, which always fall through */ if (!strncmp(func->name, "__cfi_", 6)) return 0; WARN("%s() falls through to next function %s()", - func->name, insn->func->name); + func->name, insn_func(insn)->name); return 1; } @@ -3638,7 +3643,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { if (insn->hint && !insn->visited && !insn->ignore) { - ret = validate_branch(file, insn->func, insn, state); + ret = validate_branch(file, insn_func(insn), insn, state); if (ret && opts.backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; @@ -3861,7 +3866,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * In this case we'll find a piece of code (whole function) that is not * covered by a !section symbol. Ignore them. */ - if (opts.link && !insn->func) { + if (opts.link && !insn_func(insn)) { int size = find_symbol_hole_containing(insn->sec, insn->offset); unsigned long end = insn->offset + size; @@ -3885,10 +3890,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio /* * If this hole jumps to a .cold function, mark it ignore too. */ - if (insn->jump_dest && insn->jump_dest->func && - strstr(insn->jump_dest->func->name, ".cold")) { + if (insn->jump_dest && insn_func(insn->jump_dest) && + strstr(insn_func(insn->jump_dest)->name, ".cold")) { struct instruction *dest = insn->jump_dest; - func_for_each_insn(file, dest->func, dest) + func_for_each_insn(file, insn_func(dest), dest) dest->ignore = true; } } @@ -3896,10 +3901,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return false; } - if (!insn->func) + if (!insn_func(insn)) return false; - if (insn->func->static_call_tramp) + if (insn_func(insn)->static_call_tramp) return true; /* @@ -3930,7 +3935,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio if (insn->type == INSN_JUMP_UNCONDITIONAL) { if (insn->jump_dest && - insn->jump_dest->func == insn->func) { + insn_func(insn->jump_dest) == insn_func(insn)) { insn = insn->jump_dest; continue; } @@ -3938,7 +3943,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio break; } - if (insn->offset + insn->len >= insn->func->offset + insn->func->len) + if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len) break; insn = list_next_entry(insn, list); @@ -3967,7 +3972,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, state->uaccess = sym->uaccess_safe; - ret = validate_branch(file, insn->func, insn, *state); + ret = validate_branch(file, insn_func(insn), insn, *state); if (ret && opts.backtrace) BT_FUNC("<=== (sym)", insn); return ret; @@ -4104,7 +4109,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; } - if (dest->func && dest->func == insn->func) { + if (insn_func(dest) && insn_func(dest) == insn_func(insn)) { /* * Anything from->to self is either _THIS_IP_ or * IRET-to-self. diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 036129cebeee..acd7fae59348 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -67,11 +67,21 @@ struct instruction { struct reloc *jump_table; struct reloc *reloc; struct list_head alts; - struct symbol *func; + struct symbol *sym; struct list_head stack_ops; struct cfi_state *cfi; }; +static inline struct symbol *insn_func(struct instruction *insn) +{ + struct symbol *sym = insn->sym; + + if (sym && sym->type != STT_FUNC) + sym = NULL; + + return sym; +} + #define VISITED_BRANCH 0x01 #define VISITED_BRANCH_UACCESS 0x02 #define VISITED_BRANCH_MASK 0x03 From 5a9c361a416fc3a3301e859ff09587cc1b933eb8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 11 Jul 2022 11:49:50 +0200 Subject: [PATCH 0120/4122] objtool: Allow STT_NOTYPE -> STT_FUNC+0 sibling-calls Teach objtool about STT_NOTYPE -> STT_FUNC+0 sibling calls. Doing do allows slightly simpler .S files. There is a slight complication in that we specifically do not want to allow sibling calls from symbol holes (previously covered by STT_WEAK symbols) -- such things exist where a weak function has a .cold subfunction for example. Additionally, STT_NOTYPE tail-calls are allowed to happen with a modified stack frame, they don't need to obey the normal rules after all. Signed-off-by: Peter Zijlstra (Intel) --- tools/objtool/check.c | 74 +++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e532efb9b5ab..7936312e10c7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -129,16 +129,13 @@ static bool is_jump_table_jump(struct instruction *insn) static bool is_sibling_call(struct instruction *insn) { /* - * Assume only ELF functions can make sibling calls. This ensures - * sibling call detection consistency between vmlinux.o and individual - * objects. + * Assume only STT_FUNC calls have jump-tables. */ - if (!insn_func(insn)) - return false; - - /* An indirect jump is either a sibling call or a jump to a table. */ - if (insn->type == INSN_JUMP_DYNAMIC) - return !is_jump_table_jump(insn); + if (insn_func(insn)) { + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return !is_jump_table_jump(insn); + } /* add_jump_destinations() sets insn->call_dest for sibling calls. */ return (is_static_jump(insn) && insn->call_dest); @@ -1400,27 +1397,50 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn, list_add_tail(&insn->call_node, &file->return_thunk_list); } -static bool same_function(struct instruction *insn1, struct instruction *insn2) +static bool is_first_func_insn(struct objtool_file *file, + struct instruction *insn, struct symbol *sym) { - return insn_func(insn1)->pfunc == insn_func(insn2)->pfunc; -} - -static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) -{ - if (insn->offset == insn_func(insn)->offset) + if (insn->offset == sym->offset) return true; + /* Allow direct CALL/JMP past ENDBR */ if (opts.ibt) { struct instruction *prev = prev_insn_same_sym(file, insn); if (prev && prev->type == INSN_ENDBR && - insn->offset == insn_func(insn)->offset + prev->len) + insn->offset == sym->offset + prev->len) return true; } return false; } +/* + * A sibling call is a tail-call to another symbol -- to differentiate from a + * recursive tail-call which is to the same symbol. + */ +static bool jump_is_sibling_call(struct objtool_file *file, + struct instruction *from, struct instruction *to) +{ + struct symbol *fs = from->sym; + struct symbol *ts = to->sym; + + /* Not a sibling call if from/to a symbol hole */ + if (!fs || !ts) + return false; + + /* Not a sibling call if not targeting the start of a symbol. */ + if (!is_first_func_insn(file, to, ts)) + return false; + + /* Disallow sibling calls into STT_NOTYPE */ + if (ts->type == STT_NOTYPE) + return false; + + /* Must not be self to be a sibling */ + return fs->pfunc != ts->pfunc; +} + /* * Find the destination instructions for all jumps. */ @@ -1519,18 +1539,18 @@ static int add_jump_destinations(struct objtool_file *file) strstr(insn_func(jump_dest)->name, ".cold")) { insn_func(insn)->cfunc = insn_func(jump_dest); insn_func(jump_dest)->pfunc = insn_func(insn); - - } else if (!same_function(insn, jump_dest) && - is_first_func_insn(file, jump_dest)) { - /* - * Internal sibling call without reloc or with - * STT_SECTION reloc. - */ - add_call_dest(file, insn, insn_func(jump_dest), true); - continue; } } + if (jump_is_sibling_call(file, insn, jump_dest)) { + /* + * Internal sibling call without reloc or with + * STT_SECTION reloc. + */ + add_call_dest(file, insn, insn_func(jump_dest), true); + continue; + } + insn->jump_dest = jump_dest; } @@ -3309,7 +3329,7 @@ static int validate_sibling_call(struct objtool_file *file, struct instruction *insn, struct insn_state *state) { - if (has_modified_stack_frame(insn, state)) { + if (insn_func(insn) && has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", insn->sec, insn->offset); return 1; From ef79ed20e3ae9ee9ac2e0f3a4e12814893972e63 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:14 +0200 Subject: [PATCH 0121/4122] x86/entry: Make sync_regs() invocation a tail call No point in having a call there. Spare the call/ret overhead. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.539578813@infradead.org --- arch/x86/entry/entry_64.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5c578a7dfcd7..b24b84b3425f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1062,11 +1062,8 @@ SYM_CODE_START_LOCAL(error_entry) UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ -.Lerror_entry_from_usermode_after_swapgs: - /* Put us onto the real thread stack. */ - call sync_regs - RET + jmp sync_regs /* * There are two places in the kernel that can potentially fault with @@ -1124,7 +1121,7 @@ SYM_CODE_START_LOCAL(error_entry) leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rdi - jmp .Lerror_entry_from_usermode_after_swapgs + jmp sync_regs SYM_CODE_END(error_entry) SYM_CODE_START_LOCAL(error_return) From cb855971d717a2dd752241f66fedad9dc178388c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:16 +0200 Subject: [PATCH 0122/4122] x86/putuser: Provide room for padding Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.746429822@infradead.org --- arch/x86/lib/putuser.S | 62 +++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index b7dfd60243b7..32125224fcca 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -47,8 +47,6 @@ SYM_FUNC_START(__put_user_1) LOAD_TASK_SIZE_MINUS_N(0) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) - ENDBR ASM_STAC 1: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -56,54 +54,87 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) RET SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) + +SYM_FUNC_START(__put_user_nocheck_1) + ENDBR + ASM_STAC +2: movb %al,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_1) EXPORT_SYMBOL(__put_user_nocheck_1) SYM_FUNC_START(__put_user_2) LOAD_TASK_SIZE_MINUS_N(1) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) - ENDBR ASM_STAC -2: movw %ax,(%_ASM_CX) +3: movw %ax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) + +SYM_FUNC_START(__put_user_nocheck_2) + ENDBR + ASM_STAC +4: movw %ax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_2) EXPORT_SYMBOL(__put_user_nocheck_2) SYM_FUNC_START(__put_user_4) LOAD_TASK_SIZE_MINUS_N(3) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) - ENDBR ASM_STAC -3: movl %eax,(%_ASM_CX) +5: movl %eax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) + +SYM_FUNC_START(__put_user_nocheck_4) + ENDBR + ASM_STAC +6: movl %eax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_4) EXPORT_SYMBOL(__put_user_nocheck_4) SYM_FUNC_START(__put_user_8) LOAD_TASK_SIZE_MINUS_N(7) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL) - ENDBR ASM_STAC -4: mov %_ASM_AX,(%_ASM_CX) +7: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 -5: movl %edx,4(%_ASM_CX) +8: movl %edx,4(%_ASM_CX) #endif xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) + +SYM_FUNC_START(__put_user_nocheck_8) + ENDBR + ASM_STAC +9: mov %_ASM_AX,(%_ASM_CX) +#ifdef CONFIG_X86_32 +10: movl %edx,4(%_ASM_CX) +#endif + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_8) EXPORT_SYMBOL(__put_user_nocheck_8) SYM_CODE_START_LOCAL(.Lbad_put_user_clac) @@ -117,6 +148,11 @@ SYM_CODE_END(.Lbad_put_user_clac) _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac) _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac) -#ifdef CONFIG_X86_32 _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac) +#ifdef CONFIG_X86_32 + _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac) #endif From 8f7c0d8b23c3f5f740a48db31ebadef28af17a22 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:17 +0200 Subject: [PATCH 0123/4122] x86/Kconfig: Add CONFIG_CALL_THUNKS In preparation for mitigating the Intel SKL RSB underflow issue in software, add a new configuration symbol which allows to build the required call thunk infrastructure conditionally. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.849523555@infradead.org --- arch/x86/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f408fa87ed94..e18963e77cb1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2446,6 +2446,14 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) +config HAVE_CALL_THUNKS + def_bool y + depends on RETHUNK && OBJTOOL + +config CALL_THUNKS + def_bool n + select FUNCTION_ALIGNMENT_16B + menuconfig SPECULATION_MITIGATIONS bool "Mitigations for speculative execution vulnerabilities" default y From bea75b33895f7f87f0c40023e36a2d087e87ffa1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:18 +0200 Subject: [PATCH 0124/4122] x86/Kconfig: Introduce function padding Now that all functions are 16 byte aligned, add 16 bytes of NOP padding in front of each function. This prepares things for software call stack tracking and kCFI/FineIBT. This significantly increases kernel .text size, around 5.1% on a x86_64-defconfig-ish build. However, per the random access argument used for alignment, these 16 extra bytes are code that wouldn't be used. Performance measurements back this up by showing no significant performance regressions. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111146.950884492@infradead.org --- arch/x86/Kconfig | 20 ++++++++++++- arch/x86/Makefile | 6 ++++ arch/x86/entry/vdso/Makefile | 3 +- arch/x86/include/asm/linkage.h | 51 ++++++++++++++++++++++++++++++++-- include/linux/bpf.h | 4 +++ 5 files changed, 79 insertions(+), 5 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e18963e77cb1..e368fc0daa4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2446,9 +2446,27 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) +config CC_HAS_ENTRY_PADDING + def_bool $(cc-option,-fpatchable-function-entry=16,16) + +config FUNCTION_PADDING_CFI + int + default 59 if FUNCTION_ALIGNMENT_64B + default 27 if FUNCTION_ALIGNMENT_32B + default 11 if FUNCTION_ALIGNMENT_16B + default 3 if FUNCTION_ALIGNMENT_8B + default 0 + +# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG +# except Kconfig can't do arithmetic :/ +config FUNCTION_PADDING_BYTES + int + default FUNCTION_PADDING_CFI if CFI_CLANG + default FUNCTION_ALIGNMENT + config HAVE_CALL_THUNKS def_bool y - depends on RETHUNK && OBJTOOL + depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL config CALL_THUNKS def_bool n diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 415a5d138de4..1640e005092b 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -208,6 +208,12 @@ ifdef CONFIG_SLS KBUILD_CFLAGS += -mharden-sls=all endif +ifdef CONFIG_CALL_THUNKS +PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) +KBUILD_CFLAGS += $(PADDING_CFLAGS) +export PADDING_CFLAGS +endif + KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 3ef611044c8f..838613ac15b8 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -95,7 +95,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),) endif endif -$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) $(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # @@ -158,6 +158,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += -fno-stack-protector KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index c2d6e2733b11..45e0df850645 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -15,8 +15,19 @@ #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) -#define ASM_FUNC_ALIGN __ALIGN_STR -#define __FUNC_ALIGN __ALIGN +#if defined(CONFIG_CALL_THUNKS) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90; +#else +#define FUNCTION_PADDING +#endif + +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) +# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING +#else +# define __FUNC_ALIGN __ALIGN +#endif + +#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) #define SYM_F_ALIGN __FUNC_ALIGN #ifdef __ASSEMBLY__ @@ -45,11 +56,45 @@ #endif /* __ASSEMBLY__ */ +/* + * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_THUNKS) the + * CFI symbol layout changes. + * + * Without CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .skip FUNCTION_PADDING, 0x90 + * .byte 0xb8 + * .long __kcfi_typeid_##name + * name: + * + * With CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .byte 0xb8 + * .long __kcfi_typeid_##name + * .skip FUNCTION_PADDING, 0x90 + * name: + * + * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized. + */ + +#ifdef CONFIG_CALL_THUNKS +#define CFI_PRE_PADDING +#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#else +#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#define CFI_POST_PADDING +#endif + #define __CFI_TYPE(name) \ SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ - .fill 11, 1, 0x90 ASM_NL \ + CFI_PRE_PADDING \ .byte 0xb8 ASM_NL \ .long __kcfi_typeid_##name ASM_NL \ + CFI_POST_PADDING \ SYM_FUNC_END(__cfi_##name) /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9e7d46d16032..5296aea9b5b4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -984,7 +984,11 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func } #ifdef CONFIG_X86_64 +#ifdef CONFIG_CALL_THUNKS +#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5+CONFIG_FUNCTION_PADDING_BYTES,CONFIG_FUNCTION_PADDING_BYTES))) +#else #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) +#endif #else #define BPF_DISPATCHER_ATTRIBUTES #endif From 80e4c1cd42fff110bfdae8fce7ac4f22465f9664 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:19 +0200 Subject: [PATCH 0125/4122] x86/retbleed: Add X86_FEATURE_CALL_DEPTH Intel SKL CPUs fall back to other predictors when the RSB underflows. The only microcode mitigation is IBRS which is insanely expensive. It comes with performance drops of up to 30% depending on the workload. A way less expensive, but nevertheless horrible mitigation is to track the call depth in software and overeagerly fill the RSB when returns underflow the software counter. Provide a configuration symbol and a CPU misfeature bit. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.056176424@infradead.org --- arch/x86/Kconfig | 19 +++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 9 ++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e368fc0daa4a..6ae7fa4b8eb7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2523,6 +2523,25 @@ config CPU_UNRET_ENTRY help Compile the kernel with support for the retbleed=unret mitigation. +config CALL_DEPTH_TRACKING + bool "Mitigate RSB underflow with call depth tracking" + depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + select CALL_THUNKS + default y + help + Compile the kernel with call depth tracking to mitigate the Intel + SKL Return-Speculation-Buffer (RSB) underflow issue. The + mitigation is off by default and needs to be enabled on the + kernel command line via the retbleed=stuff option. For + non-affected systems the overhead of this option is marginal as + the call depth tracking is using run-time generated call thunks + in a compiler generated padding area and call patching. This + increases text size by ~5%. For non affected systems this space + is unused. On affected SKL systems this results in a significant + performance gain over the IBRS mitigation. + + config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" depends on CPU_SUP_AMD && X86_64 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..aefd0816a333 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,6 +304,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_CALL_DEPTH (11*32+18) /* "" Call depth tracking for RSB stuffing */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 33d2cd04d254..bbb03b25263e 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -69,6 +69,12 @@ # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif +#ifdef CONFIG_CALL_DEPTH_TRACKING +# define DISABLE_CALL_DEPTH_TRACKING 0 +#else +# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -101,7 +107,8 @@ #define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ + DISABLE_CALL_DEPTH_TRACKING) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 From fe54d0793796ccdb213d8ea7bff0b49903b6afaa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:20 +0200 Subject: [PATCH 0126/4122] x86/alternatives: Provide text_poke_copy_locked() The upcoming call thunk patching must hold text_mutex and needs access to text_poke_copy(), which takes text_mutex. Provide a _locked postfixed variant to expose the inner workings. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.159977224@infradead.org --- arch/x86/include/asm/text-patching.h | 1 + arch/x86/kernel/alternative.c | 37 +++++++++++++++++----------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 1cc15528ce29..f4b87f08f5c5 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cadcea035e0..fad3c0e4838e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1236,6 +1236,27 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) return __text_poke(text_poke_memcpy, addr, opcode, len); } +void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, + bool core_ok) +{ + unsigned long start = (unsigned long)addr; + size_t patched = 0; + + if (WARN_ON_ONCE(!core_ok && core_kernel_text(start))) + return NULL; + + while (patched < len) { + unsigned long ptr = start + patched; + size_t s; + + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); + + __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); + patched += s; + } + return addr; +} + /** * text_poke_copy - Copy instructions into (an unused part of) RX memory * @addr: address to modify @@ -1250,22 +1271,8 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) */ void *text_poke_copy(void *addr, const void *opcode, size_t len) { - unsigned long start = (unsigned long)addr; - size_t patched = 0; - - if (WARN_ON_ONCE(core_kernel_text(start))) - return NULL; - mutex_lock(&text_mutex); - while (patched < len) { - unsigned long ptr = start + patched; - size_t s; - - s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); - - __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); - patched += s; - } + addr = text_poke_copy_locked(addr, opcode, len, false); mutex_unlock(&text_mutex); return addr; } From c22cf380c79c4bb0e502b0343f57271b17626424 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:21 +0200 Subject: [PATCH 0127/4122] x86/entry: Make some entry symbols global paranoid_entry(), error_entry() and xen_error_entry() have to be exempted from call accounting by thunk patching because they are before UNTRAIN_RET. Expose them so they are available in the alternative code. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.265598113@infradead.org --- arch/x86/entry/entry_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b24b84b3425f..4cc0125fdfdc 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -327,7 +327,8 @@ SYM_CODE_END(ret_from_fork) #endif .endm -SYM_CODE_START_LOCAL(xen_error_entry) +SYM_CODE_START(xen_error_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -906,7 +907,8 @@ SYM_CODE_END(xen_failsafe_callback) * R14 - old CR3 * R15 - old SPEC_CTRL */ -SYM_CODE_START_LOCAL(paranoid_entry) +SYM_CODE_START(paranoid_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -1041,7 +1043,8 @@ SYM_CODE_END(paranoid_exit) /* * Switch GS and CR3 if needed. */ -SYM_CODE_START_LOCAL(error_entry) +SYM_CODE_START(error_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 From 239f2e248ef12840178a3ed1a217f19b5fbfde26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:22 +0200 Subject: [PATCH 0128/4122] x86/paravirt: Make struct paravirt_call_site unconditionally available For the upcoming call thunk patching it's less ifdeffery when the data structure is unconditionally available. The code can then be trivially fenced off with IS_ENABLED(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.367853167@infradead.org --- arch/x86/include/asm/paravirt.h | 4 ++-- arch/x86/include/asm/paravirt_types.h | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 1be66c15ecbd..2851bc2339d5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,13 +4,13 @@ /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ +#include + #ifdef CONFIG_PARAVIRT #include #include #include -#include - #ifndef __ASSEMBLY__ #include #include diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f3d601574730..e137d9412123 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,6 +2,17 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H +#ifndef __ASSEMBLY__ +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch_site { + u8 *instr; /* original instructions */ + u8 type; /* type of this instruction */ + u8 len; /* length of original instruction */ +}; +#endif + +#ifdef CONFIG_PARAVIRT + /* Bitmask of what can be clobbered: usually at least eax. */ #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) @@ -593,16 +604,9 @@ unsigned long paravirt_ret0(void); #define paravirt_nop ((void *)_paravirt_nop) -/* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 type; /* type of this instruction */ - u8 len; /* length of original instruction */ -}; - extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #endif /* __ASSEMBLY__ */ - +#endif /* CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ From e81dc127ef69887c72735a3e3868930e2bf313ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:23 +0200 Subject: [PATCH 0129/4122] x86/callthunks: Add call patching for call depth tracking Mitigating the Intel SKL RSB underflow issue in software requires to track the call depth. That is every CALL and every RET need to be intercepted and additional code injected. The existing retbleed mitigations already include means of redirecting RET to __x86_return_thunk; this can be re-purposed and RET can be redirected to another function doing RET accounting. CALL accounting will use the function padding introduced in prior patches. For each CALL instruction, the destination symbol's padding is rewritten to do the accounting and the CALL instruction is adjusted to call into the padding. This ensures only affected CPUs pay the overhead of this accounting. Unaffected CPUs will leave the padding unused and have their 'JMP __x86_return_thunk' replaced with an actual 'RET' instruction. Objtool has been modified to supply a .call_sites section that lists all the 'CALL' instructions. Additionally the paravirt instruction sites are iterated since they will have been patched from an indirect call to direct calls (or direct instructions in which case it'll be ignored). Module handling and the actual thunk code for SKL will be added in subsequent steps. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.470877038@infradead.org --- arch/x86/Kconfig | 12 ++ arch/x86/include/asm/alternative.h | 12 ++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/alternative.c | 6 + arch/x86/kernel/callthunks.c | 251 +++++++++++++++++++++++++++ arch/x86/kernel/head_64.S | 1 + arch/x86/kernel/relocate_kernel_64.S | 5 +- arch/x86/kernel/vmlinux.lds.S | 8 - 8 files changed, 287 insertions(+), 10 deletions(-) create mode 100644 arch/x86/kernel/callthunks.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6ae7fa4b8eb7..a1dae9d5e3da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2541,6 +2541,18 @@ config CALL_DEPTH_TRACKING is unused. On affected SKL systems this results in a significant performance gain over the IBRS mitigation. +config CALL_THUNKS_DEBUG + bool "Enable call thunks and call depth tracking debugging" + depends on CALL_DEPTH_TRACKING + select FUNCTION_ALIGNMENT_32B + default n + help + Enable call/ret counters for imbalance detection and build in + a noisy dmesg about callthunks generation and call patching for + trouble shooting. The debug prints need to be enabled on the + kernel command line with 'debug-callthunks'. + Only enable this, when you are debugging call thunks as this + creates a noticable runtime overhead. If unsure say N. config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9542c582d546..6b7bbd0db248 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -80,6 +80,18 @@ extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; +struct paravirt_patch_site; + +struct callthunk_sites { + s32 *call_start, *call_end; + struct paravirt_patch_site *pv_start, *pv_end; +}; + +#ifdef CONFIG_CALL_THUNKS +extern void callthunks_patch_builtin_calls(void); +#else +static __always_inline void callthunks_patch_builtin_calls(void) {} +#endif #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f901658d9f7c..c2739a5886fa 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -143,6 +143,8 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_CALL_THUNKS) += callthunks.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index fad3c0e4838e..963872d17707 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -947,6 +947,12 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + /* + * Now all calls are established. Apply the call thunks if + * required. + */ + callthunks_patch_builtin_calls(); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c new file mode 100644 index 000000000000..e5275d6e674d --- /dev/null +++ b/arch/x86/kernel/callthunks.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "callthunks: " fmt + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int __initdata_or_module debug_callthunks; + +#define prdbg(fmt, args...) \ +do { \ + if (debug_callthunks) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while(0) + +static int __init debug_thunks(char *str) +{ + debug_callthunks = 1; + return 1; +} +__setup("debug-callthunks", debug_thunks); + +extern s32 __call_sites[], __call_sites_end[]; + +struct thunk_desc { + void *template; + unsigned int template_size; +}; + +struct core_text { + unsigned long base; + unsigned long end; + const char *name; +}; + +static bool thunks_initialized __ro_after_init; + +static const struct core_text builtin_coretext = { + .base = (unsigned long)_text, + .end = (unsigned long)_etext, + .name = "builtin", +}; + +static struct thunk_desc callthunk_desc __ro_after_init; + +extern void error_entry(void); +extern void xen_error_entry(void); +extern void paranoid_entry(void); + +static inline bool within_coretext(const struct core_text *ct, void *addr) +{ + unsigned long p = (unsigned long)addr; + + return ct->base <= p && p < ct->end; +} + +static inline bool within_module_coretext(void *addr) +{ + bool ret = false; + +#ifdef CONFIG_MODULES + struct module *mod; + + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_core((unsigned long)addr, mod)) + ret = true; + preempt_enable(); +#endif + return ret; +} + +static bool is_coretext(const struct core_text *ct, void *addr) +{ + if (ct && within_coretext(ct, addr)) + return true; + if (within_coretext(&builtin_coretext, addr)) + return true; + return within_module_coretext(addr); +} + +static __init_or_module bool skip_addr(void *dest) +{ + if (dest == error_entry) + return true; + if (dest == paranoid_entry) + return true; + if (dest == xen_error_entry) + return true; + /* Does FILL_RSB... */ + if (dest == __switch_to_asm) + return true; + /* Accounts directly */ + if (dest == ret_from_fork) + return true; +#ifdef CONFIG_HOTPLUG_CPU + if (dest == start_cpu0) + return true; +#endif +#ifdef CONFIG_FUNCTION_TRACER + if (dest == __fentry__) + return true; +#endif +#ifdef CONFIG_KEXEC_CORE + if (dest >= (void *)relocate_kernel && + dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE) + return true; +#endif +#ifdef CONFIG_XEN + if (dest >= (void *)hypercall_page && + dest < (void*)hypercall_page + PAGE_SIZE) + return true; +#endif + return false; +} + +static __init_or_module void *call_get_dest(void *addr) +{ + struct insn insn; + void *dest; + int ret; + + ret = insn_decode_kernel(&insn, addr); + if (ret) + return ERR_PTR(ret); + + /* Patched out call? */ + if (insn.opcode.bytes[0] != CALL_INSN_OPCODE) + return NULL; + + dest = addr + insn.length + insn.immediate.value; + if (skip_addr(dest)) + return NULL; + return dest; +} + +static const u8 nops[] = { + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, +}; + +static __init_or_module void *patch_dest(void *dest, bool direct) +{ + unsigned int tsize = callthunk_desc.template_size; + u8 *pad = dest - tsize; + + /* Already patched? */ + if (!bcmp(pad, callthunk_desc.template, tsize)) + return pad; + + /* Ensure there are nops */ + if (bcmp(pad, nops, tsize)) { + pr_warn_once("Invalid padding area for %pS\n", dest); + return NULL; + } + + if (direct) + memcpy(pad, callthunk_desc.template, tsize); + else + text_poke_copy_locked(pad, callthunk_desc.template, tsize, true); + return pad; +} + +static __init_or_module void patch_call(void *addr, const struct core_text *ct) +{ + void *pad, *dest; + u8 bytes[8]; + + if (!within_coretext(ct, addr)) + return; + + dest = call_get_dest(addr); + if (!dest || WARN_ON_ONCE(IS_ERR(dest))) + return; + + if (!is_coretext(ct, dest)) + return; + + pad = patch_dest(dest, within_coretext(ct, dest)); + if (!pad) + return; + + prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr, + dest, dest, pad); + __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE); + text_poke_early(addr, bytes, CALL_INSN_SIZE); +} + +static __init_or_module void +patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) +{ + s32 *s; + + for (s = start; s < end; s++) + patch_call((void *)s + *s, ct); +} + +static __init_or_module void +patch_paravirt_call_sites(struct paravirt_patch_site *start, + struct paravirt_patch_site *end, + const struct core_text *ct) +{ + struct paravirt_patch_site *p; + + for (p = start; p < end; p++) + patch_call(p->instr, ct); +} + +static __init_or_module void +callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) +{ + prdbg("Patching call sites %s\n", ct->name); + patch_call_sites(cs->call_start, cs->call_end, ct); + patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct); + prdbg("Patching call sites done%s\n", ct->name); +} + +void __init callthunks_patch_builtin_calls(void) +{ + struct callthunk_sites cs = { + .call_start = __call_sites, + .call_end = __call_sites_end, + .pv_start = __parainstructions, + .pv_end = __parainstructions_end + }; + + if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + return; + + pr_info("Setting up call depth tracking\n"); + mutex_lock(&text_mutex); + callthunks_setup(&cs, &builtin_coretext); + thunks_initialized = true; + mutex_unlock(&text_mutex); +} diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d860d437631b..222efd4a09bc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -370,6 +370,7 @@ SYM_CODE_END(secondary_startup_64) * start_secondary() via .Ljump_to_C_code. */ SYM_CODE_START(start_cpu0) + ANNOTATE_NOENDBR UNWIND_HINT_EMPTY movq initial_stack(%rip), %rsp jmp .Ljump_to_C_code diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4809c0dc4eb0..4a73351f87f8 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -41,6 +41,7 @@ .text .align PAGE_SIZE .code64 +SYM_CODE_START_NOALIGN(relocate_range) SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR @@ -312,5 +313,5 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) int3 SYM_CODE_END(swap_pages) - .globl kexec_control_code_size -.set kexec_control_code_size, . - relocate_kernel + .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc +SYM_CODE_END(relocate_range); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b69df9e013cc..49f3f86433c7 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -501,11 +501,3 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_KEXEC_CORE -#include - -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); -#endif - From eaf44c816ed8d1ef94c354e3ed47d53cd5a5cb13 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:24 +0200 Subject: [PATCH 0130/4122] x86/modules: Add call patching As for the builtins create call thunks and patch the call sites to call the thunk on Intel SKL CPUs for retbleed mitigation. Note, that module init functions are ignored for sake of simplicity because loading modules is not something which is done in high frequent loops and the attacker has not really a handle on when this happens in order to launch a matching attack. The depth tracking will still work for calls into the builtins and because the call is not accounted it will underflow faster and overstuff, but that's mitigated by the saturating counter and the side effect is only temporary. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.575673066@infradead.org --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/callthunks.c | 19 +++++++++++++++++++ arch/x86/kernel/module.c | 20 +++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 6b7bbd0db248..ef007fa33dc4 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -89,8 +89,13 @@ struct callthunk_sites { #ifdef CONFIG_CALL_THUNKS extern void callthunks_patch_builtin_calls(void); +extern void callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod); #else static __always_inline void callthunks_patch_builtin_calls(void) {} +static __always_inline void +callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod) {} #endif #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index e5275d6e674d..7b9d998ebd7d 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -249,3 +249,22 @@ void __init callthunks_patch_builtin_calls(void) thunks_initialized = true; mutex_unlock(&text_mutex); } + +#ifdef CONFIG_MODULES +void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, + struct module *mod) +{ + struct core_text ct = { + .base = (unsigned long)mod->core_layout.base, + .end = (unsigned long)mod->core_layout.base + mod->core_layout.size, + .name = mod->name, + }; + + if (!thunks_initialized) + return; + + mutex_lock(&text_mutex); + callthunks_setup(cs, &ct); + mutex_unlock(&text_mutex); +} +#endif /* CONFIG_MODULES */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 43f011277219..2fb9de2cef40 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -254,7 +254,8 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, + *calls = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -274,6 +275,8 @@ int module_finalize(const Elf_Ehdr *hdr, retpolines = s; if (!strcmp(".return_sites", secstrings + s->sh_name)) returns = s; + if (!strcmp(".call_sites", secstrings + s->sh_name)) + calls = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -299,6 +302,21 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (calls || para) { + struct callthunk_sites cs = {}; + + if (calls) { + cs.call_start = (void *)calls->sh_addr; + cs.call_end = (void *)calls->sh_addr + calls->sh_size; + } + + if (para) { + cs.pv_start = (void *)para->sh_addr; + cs.pv_end = (void *)para->sh_addr + para->sh_size; + } + + callthunks_patch_module_calls(&cs, me); + } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); From 770ae1b709528a6a173b5c7b183818ee9b45e376 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:25 +0200 Subject: [PATCH 0131/4122] x86/returnthunk: Allow different return thunks In preparation for call depth tracking on Intel SKL CPUs, make it possible to patch in a SKL specific return thunk. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.680469665@infradead.org --- arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/kernel/alternative.c | 17 +++++++++++++---- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- 5 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..f10ca334dd75 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -208,6 +208,12 @@ extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_CALL_THUNKS +extern void (*x86_return_thunk)(void); +#else +#define x86_return_thunk (&__x86_return_thunk) +#endif + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 963872d17707..04d1e3d35b0e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -518,6 +518,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + +#ifdef CONFIG_CALL_THUNKS +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; +#endif + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -533,14 +538,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 00eac455a3a1..4ac6692d5ef8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -359,7 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aaaba85d6d7f..5d3844a98373 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -52,7 +52,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99620428ad78..0df391ecd4d8 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) From 52354973573cc260ff2fc661cb28ff8eaa7b879b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:26 +0200 Subject: [PATCH 0132/4122] x86/asm: Provide ALTERNATIVE_3 Fairly straight forward adaptation/extention of ALTERNATIVE_2. Required for call depth tracking. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.787711192@infradead.org --- arch/x86/include/asm/alternative.h | 33 +++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ef007fa33dc4..4c416b21bac8 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -364,6 +364,7 @@ static inline int alternatives_text_reserved(void *start, void *end) #define old_len 141b-140b #define new_len1 144f-143f #define new_len2 145f-144f +#define new_len3 146f-145f /* * gas compatible max based on the idea from: @@ -371,7 +372,8 @@ static inline int alternatives_text_reserved(void *start, void *end) * * The additional "-" is needed because gas uses a "true" value of -1. */ -#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) +#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) +#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c)) /* @@ -383,8 +385,8 @@ static inline int alternatives_text_reserved(void *start, void *end) 140: \oldinstr 141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 + .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_2(new_len1, new_len2) - (old_len)),0x90 142: .pushsection .altinstructions,"a" @@ -401,6 +403,31 @@ static inline int alternatives_text_reserved(void *start, void *end) .popsection .endm +.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3 +140: + \oldinstr +141: + .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \ + (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + \newinstr3 +146: + .popsection +.endm + /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ #define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ From 5d8213864ade86b48fc492584ea86d65a62f892e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:27 +0200 Subject: [PATCH 0133/4122] x86/retbleed: Add SKL return thunk To address the Intel SKL RSB underflow issue in software it's required to do call depth tracking. Provide a return thunk for call depth tracking on Intel SKL CPUs. The tracking does not use a counter. It uses uses arithmetic shift right on call entry and logical shift left on return. The depth tracking variable is initialized to 0x8000.... when the call depth is zero. The arithmetic shift right sign extends the MSB and saturates after the 12th call. The shift count is 5 so the tracking covers 12 nested calls. On return the variable is shifted left logically so it becomes zero again. CALL RET 0: 0x8000000000000000 0x0000000000000000 1: 0xfc00000000000000 0xf000000000000000 ... 11: 0xfffffffffffffff8 0xfffffffffffffc00 12: 0xffffffffffffffff 0xffffffffffffffe0 After a return buffer fill the depth is credited 12 calls before the next stuffing has to take place. There is a inaccuracy for situations like this: 10 calls 5 returns 3 calls 4 returns 3 calls .... The shift count might cause this to be off by one in either direction, but there is still a cushion vs. the RSB depth. The algorithm does not claim to be perfect, but it should obfuscate the problem enough to make exploitation extremly difficult. The theory behind this is: RSB is a stack with depth 16 which is filled on every call. On the return path speculation "pops" entries to speculate down the call chain. Once the speculative RSB is empty it switches to other predictors, e.g. the Branch History Buffer, which can be mistrained by user space and misguide the speculation path to a gadget. Call depth tracking is designed to break this speculation path by stuffing speculation trap calls into the RSB which are never getting a corresponding return executed. This stalls the prediction path until it gets resteered, The assumption is that stuffing at the 12th return is sufficient to break the speculation before it hits the underflow and the fallback to the other predictors. Testing confirms that it works. Johannes, one of the retbleed researchers. tried to attack this approach but failed. There is obviously no scientific proof that this will withstand future research progress, but all we can do right now is to speculate about it. The SAR/SHL usage was suggested by Andi Kleen. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.890071690@infradead.org --- arch/x86/entry/entry_64.S | 10 ++- arch/x86/include/asm/current.h | 3 + arch/x86/include/asm/nospec-branch.h | 121 +++++++++++++++++++++++++-- arch/x86/kernel/asm-offsets.c | 3 + arch/x86/kvm/svm/vmenter.S | 1 + arch/x86/lib/retpoline.S | 31 +++++++ 6 files changed, 159 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4cc0125fdfdc..15739a2c0983 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -288,6 +288,7 @@ SYM_FUNC_END(__switch_to_asm) SYM_CODE_START_NOALIGN(ret_from_fork) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR // copy_thread + CALL_DEPTH_ACCOUNT movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -332,7 +333,7 @@ SYM_CODE_START(xen_error_entry) UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL RET SYM_CODE_END(xen_error_entry) @@ -977,7 +978,7 @@ SYM_CODE_START(paranoid_entry) * CR3 above, keep the old value in a callee saved register. */ IBRS_ENTER save_reg=%r15 - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL RET SYM_CODE_END(paranoid_entry) @@ -1062,7 +1063,7 @@ SYM_CODE_START(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax IBRS_ENTER - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ /* Put us onto the real thread stack. */ @@ -1097,6 +1098,7 @@ SYM_CODE_START(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + CALL_DEPTH_ACCOUNT leaq 8(%rsp), %rax /* return pt_regs pointer */ ANNOTATE_UNRET_END RET @@ -1115,7 +1117,7 @@ SYM_CODE_START(error_entry) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax IBRS_ENTER - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL /* * Pretend that the exception came from user mode: set up pt_regs diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index b89aba077b84..a1168e7b69e5 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -17,6 +17,9 @@ struct pcpu_hot { struct task_struct *current_task; int preempt_count; int cpu_number; +#ifdef CONFIG_CALL_DEPTH_TRACKING + u64 call_depth; +#endif unsigned long top_of_stack; void *hardirq_stack_ptr; u16 softirq_pending; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f10ca334dd75..d4be826a2282 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,8 +12,83 @@ #include #include #include +#include -#define RETPOLINE_THUNK_SIZE 32 +/* + * Call depth tracking for Intel SKL CPUs to address the RSB underflow + * issue in software. + * + * The tracking does not use a counter. It uses uses arithmetic shift + * right on call entry and logical shift left on return. + * + * The depth tracking variable is initialized to 0x8000.... when the call + * depth is zero. The arithmetic shift right sign extends the MSB and + * saturates after the 12th call. The shift count is 5 for both directions + * so the tracking covers 12 nested calls. + * + * Call + * 0: 0x8000000000000000 0x0000000000000000 + * 1: 0xfc00000000000000 0xf000000000000000 + * ... + * 11: 0xfffffffffffffff8 0xfffffffffffffc00 + * 12: 0xffffffffffffffff 0xffffffffffffffe0 + * + * After a return buffer fill the depth is credited 12 calls before the + * next stuffing has to take place. + * + * There is a inaccuracy for situations like this: + * + * 10 calls + * 5 returns + * 3 calls + * 4 returns + * 3 calls + * .... + * + * The shift count might cause this to be off by one in either direction, + * but there is still a cushion vs. the RSB depth. The algorithm does not + * claim to be perfect and it can be speculated around by the CPU, but it + * is considered that it obfuscates the problem enough to make exploitation + * extremly difficult. + */ +#define RET_DEPTH_SHIFT 5 +#define RSB_RET_STUFF_LOOPS 16 +#define RET_DEPTH_INIT 0x8000000000000000ULL +#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL +#define RET_DEPTH_CREDIT 0xffffffffffffffffULL + +#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) + +#include + +#define CREDIT_CALL_DEPTH \ + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define ASM_CREDIT_CALL_DEPTH \ + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define RESET_CALL_DEPTH \ + mov $0x80, %rax; \ + shl $56, %rax; \ + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define RESET_CALL_DEPTH_FROM_CALL \ + mov $0xfc, %rax; \ + shl $56, %rax; \ + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define INCREMENT_CALL_DEPTH \ + sarq $5, %gs:pcpu_hot + X86_call_depth; + +#define ASM_INCREMENT_CALL_DEPTH \ + sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#else +#define CREDIT_CALL_DEPTH +#define RESET_CALL_DEPTH +#define INCREMENT_CALL_DEPTH +#define RESET_CALL_DEPTH_FROM_CALL +#endif /* * Fill the CPU return stack buffer. @@ -32,6 +107,7 @@ * from C via asm(".include ") but let's not go there. */ +#define RETPOLINE_THUNK_SIZE 32 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* @@ -60,7 +136,8 @@ dec reg; \ jnz 771b; \ /* barrier for jnz misprediction */ \ - lfence; + lfence; \ + ASM_CREDIT_CALL_DEPTH #else /* * i386 doesn't unconditionally have LFENCE, as such it can't @@ -185,11 +262,32 @@ * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_X86_FEATURE_CALL_DEPTH) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + ALTERNATIVE_3 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ + __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH +#endif +.endm + +.macro UNTRAIN_RET_FROM_CALL +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_X86_FEATURE_CALL_DEPTH) + ANNOTATE_UNRET_END + ALTERNATIVE_3 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ + __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH +#endif +.endm + + +.macro CALL_DEPTH_ACCOUNT +#ifdef CONFIG_CALL_DEPTH_TRACKING + ALTERNATIVE "", \ + __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm @@ -214,6 +312,17 @@ extern void (*x86_return_thunk)(void); #define x86_return_thunk (&__x86_return_thunk) #endif +#ifdef CONFIG_CALL_DEPTH_TRACKING +extern void __x86_return_skl(void); + +static inline void x86_set_skl_return_thunk(void) +{ + x86_return_thunk = &__x86_return_skl; +} +#else +static inline void x86_set_skl_return_thunk(void) {} +#endif + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index a9824318e1c5..13afdbbee349 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -110,6 +110,9 @@ static void __used common(void) OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); +#ifdef CONFIG_CALL_DEPTH_TRACKING + OFFSET(X86_call_depth, pcpu_hot, call_depth); +#endif if (IS_ENABLED(CONFIG_KVM_INTEL)) { BLANK(); diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 723f8534986c..09eacf19d718 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include #include #include diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 073289a55f84..1e79eccc1d69 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include .section .text.__x86.indirect_thunk @@ -140,3 +142,32 @@ __EXPORT_THUNK(zen_untrain_ret) EXPORT_SYMBOL(__x86_return_thunk) #endif /* CONFIG_RETHUNK */ + +#ifdef CONFIG_CALL_DEPTH_TRACKING + + .align 64 +SYM_FUNC_START(__x86_return_skl) + ANNOTATE_NOENDBR + /* Keep the hotpath in a 16byte I-fetch */ + shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) + jz 1f + ANNOTATE_UNRET_SAFE + ret + int3 +1: + .rept 16 + ANNOTATE_INTRA_FUNCTION_CALL + call 2f + int3 +2: + .endr + add $(8*16), %rsp + + CREDIT_CALL_DEPTH + + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(__x86_return_skl) + +#endif /* CONFIG_CALL_DEPTH_TRACKING */ From 3b6c1747da48ff40ab746b0e860cffe83619f5c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:28 +0200 Subject: [PATCH 0134/4122] x86/retpoline: Add SKL retthunk retpolines Ensure that retpolines do the proper call accounting so that the return accounting works correctly. Specifically; retpolines are used to replace both 'jmp *%reg' and 'call *%reg', however these two cases do not have the same accounting requirements. Therefore split things up and provide two different retpoline arrays for SKL. The 'jmp *%reg' case needs no accounting, the __x86_indirect_jump_thunk_array[] covers this. The retpoline is changed to not use the return thunk; it's a simple call;ret construct. [ strictly speaking it should do: andq $(~0x1f), PER_CPU_VAR(__x86_call_depth) but we can argue this can be covered by the fuzz we already have in the accounting depth (12) vs the RSB depth (16) ] The 'call *%reg' case does need accounting, the __x86_indirect_call_thunk_array[] covers this. Again, this retpoline avoids the use of the return-thunk, in this case to avoid double accounting. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.996634749@infradead.org --- arch/x86/include/asm/nospec-branch.h | 12 +++++ arch/x86/kernel/alternative.c | 59 +++++++++++++++++++++-- arch/x86/lib/retpoline.S | 71 ++++++++++++++++++++++++---- arch/x86/net/bpf_jit_comp.c | 5 +- 4 files changed, 135 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d4be826a2282..06ba7caa0cad 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -301,6 +301,8 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); @@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {} #include #undef GEN +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; +#include +#undef GEN + +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; +#include +#undef GEN + #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 04d1e3d35b0e..19221d77dc27 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } +static inline bool is_jcc32(struct insn *insn) +{ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ + return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; +} + +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + u8 op = insn->opcode.bytes[0]; + int i = 0; + + /* + * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional + * tail-calls. Deal with them. + */ + if (is_jcc32(insn)) { + bytes[i++] = op; + op = insn->opcode.bytes[1]; + goto clang_jcc; + } + + if (insn->length == 6) + bytes[i++] = 0x2e; /* CS-prefix */ + + switch (op) { + case CALL_INSN_OPCODE: + __text_gen_insn(bytes+i, op, addr+i, + __x86_indirect_call_thunk_array[reg], + CALL_INSN_SIZE); + i += CALL_INSN_SIZE; + break; + + case JMP32_INSN_OPCODE: +clang_jcc: + __text_gen_insn(bytes+i, op, addr+i, + __x86_indirect_jump_thunk_array[reg], + JMP32_INSN_SIZE); + i += JMP32_INSN_SIZE; + break; + + default: + WARN("%pS %px %*ph\n", addr, addr, 6, addr); + return -1; + } + + WARN_ON_ONCE(i != insn->length); + + return i; +} + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) BUG_ON(reg == 4); if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + return emit_call_track_retpoline(addr, insn, reg, bytes); + return -1; + } op = insn->opcode.bytes[0]; @@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) * [ NOP ] * 1: */ - /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ - if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { + if (is_jcc32(insn)) { cc = insn->opcode.bytes[1] & 0xf; cc ^= 1; /* invert condition */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 1e79eccc1d69..e00206077ae9 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -14,17 +14,18 @@ .section .text.__x86.indirect_thunk -.macro RETPOLINE reg + +.macro POLINE reg ANNOTATE_INTRA_FUNCTION_CALL call .Ldo_rop_\@ -.Lspec_trap_\@: - UNWIND_HINT_EMPTY - pause - lfence - jmp .Lspec_trap_\@ + int3 .Ldo_rop_\@: mov %\reg, (%_ASM_SP) UNWIND_HINT_FUNC +.endm + +.macro RETPOLINE reg + POLINE \reg RET .endm @@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) */ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) @@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array) .align RETPOLINE_THUNK_SIZE SYM_CODE_END(__x86_indirect_thunk_array) -#define GEN(reg) EXPORT_THUNK(reg) +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #include #undef GEN +#ifdef CONFIG_CALL_DEPTH_TRACKING +.macro CALL_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + + CALL_DEPTH_ACCOUNT + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_call_thunk_array) + +#define GEN(reg) CALL_THUNK reg +#include +#undef GEN + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_call_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) +#include +#undef GEN + +.macro JUMP_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_jump_thunk_array) + +#define GEN(reg) JUMP_THUNK reg +#include +#undef GEN + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_jump_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) +#include +#undef GEN +#endif /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0df391ecd4d8..ad8cb7f15ab8 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -417,7 +417,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); - emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); + else + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) From bbaceb189a21d7245e8063701fe10985396028f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:29 +0200 Subject: [PATCH 0135/4122] x86/retbleed: Add SKL call thunk Add the actual SKL call thunk for call depth accounting. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.101125588@infradead.org --- arch/x86/kernel/callthunks.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 7b9d998ebd7d..01f6f6b5a93c 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,21 @@ static const struct core_text builtin_coretext = { .name = "builtin", }; -static struct thunk_desc callthunk_desc __ro_after_init; +asm ( + ".pushsection .rodata \n" + ".global skl_call_thunk_template \n" + "skl_call_thunk_template: \n" + __stringify(INCREMENT_CALL_DEPTH)" \n" + ".global skl_call_thunk_tail \n" + "skl_call_thunk_tail: \n" + ".popsection \n" +); + +extern u8 skl_call_thunk_template[]; +extern u8 skl_call_thunk_tail[]; + +#define SKL_TMPL_SIZE \ + ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template)) extern void error_entry(void); extern void xen_error_entry(void); @@ -157,11 +172,11 @@ static const u8 nops[] = { static __init_or_module void *patch_dest(void *dest, bool direct) { - unsigned int tsize = callthunk_desc.template_size; + unsigned int tsize = SKL_TMPL_SIZE; u8 *pad = dest - tsize; /* Already patched? */ - if (!bcmp(pad, callthunk_desc.template, tsize)) + if (!bcmp(pad, skl_call_thunk_template, tsize)) return pad; /* Ensure there are nops */ @@ -171,9 +186,9 @@ static __init_or_module void *patch_dest(void *dest, bool direct) } if (direct) - memcpy(pad, callthunk_desc.template, tsize); + memcpy(pad, skl_call_thunk_template, tsize); else - text_poke_copy_locked(pad, callthunk_desc.template, tsize, true); + text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true); return pad; } From f5c1bb2afe93396d41c5cbdcb909b08a75b8dde4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:30 +0200 Subject: [PATCH 0136/4122] x86/calldepth: Add ret/call counting for debug Add a debuigfs mechanism to validate the accounting, e.g. vs. call/ret balance and to gather statistics about the stuffing to call ratio. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.204285506@infradead.org --- arch/x86/include/asm/nospec-branch.h | 36 ++++++++++++++++--- arch/x86/kernel/callthunks.c | 53 ++++++++++++++++++++++++++++ arch/x86/lib/retpoline.S | 7 +++- 3 files changed, 91 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 06ba7caa0cad..4771147c7c5a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -57,6 +57,22 @@ #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL #define RET_DEPTH_CREDIT 0xffffffffffffffffULL +#ifdef CONFIG_CALL_THUNKS_DEBUG +# define CALL_THUNKS_DEBUG_INC_CALLS \ + incq %gs:__x86_call_count; +# define CALL_THUNKS_DEBUG_INC_RETS \ + incq %gs:__x86_ret_count; +# define CALL_THUNKS_DEBUG_INC_STUFFS \ + incq %gs:__x86_stuffs_count; +# define CALL_THUNKS_DEBUG_INC_CTXSW \ + incq %gs:__x86_ctxsw_count; +#else +# define CALL_THUNKS_DEBUG_INC_CALLS +# define CALL_THUNKS_DEBUG_INC_RETS +# define CALL_THUNKS_DEBUG_INC_STUFFS +# define CALL_THUNKS_DEBUG_INC_CTXSW +#endif + #if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) #include @@ -75,18 +91,23 @@ #define RESET_CALL_DEPTH_FROM_CALL \ mov $0xfc, %rax; \ shl $56, %rax; \ - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ - sarq $5, %gs:pcpu_hot + X86_call_depth; + sarq $5, %gs:pcpu_hot + X86_call_depth; \ + CALL_THUNKS_DEBUG_INC_CALLS #define ASM_INCREMENT_CALL_DEPTH \ - sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); + sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS #else #define CREDIT_CALL_DEPTH +#define ASM_CREDIT_CALL_DEPTH #define RESET_CALL_DEPTH #define INCREMENT_CALL_DEPTH +#define ASM_INCREMENT_CALL_DEPTH #define RESET_CALL_DEPTH_FROM_CALL #endif @@ -137,7 +158,8 @@ jnz 771b; \ /* barrier for jnz misprediction */ \ lfence; \ - ASM_CREDIT_CALL_DEPTH + ASM_CREDIT_CALL_DEPTH \ + CALL_THUNKS_DEBUG_INC_CTXSW #else /* * i386 doesn't unconditionally have LFENCE, as such it can't @@ -321,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void) { x86_return_thunk = &__x86_return_skl; } +#ifdef CONFIG_CALL_THUNKS_DEBUG +DECLARE_PER_CPU(u64, __x86_call_count); +DECLARE_PER_CPU(u64, __x86_ret_count); +DECLARE_PER_CPU(u64, __x86_stuffs_count); +DECLARE_PER_CPU(u64, __x86_ctxsw_count); +#endif #else static inline void x86_set_skl_return_thunk(void) {} #endif diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 01f6f6b5a93c..dfe7ffff88b9 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -2,6 +2,7 @@ #define pr_fmt(fmt) "callthunks: " fmt +#include #include #include #include @@ -35,6 +36,15 @@ static int __init debug_thunks(char *str) } __setup("debug-callthunks", debug_thunks); +#ifdef CONFIG_CALL_THUNKS_DEBUG +DEFINE_PER_CPU(u64, __x86_call_count); +DEFINE_PER_CPU(u64, __x86_ret_count); +DEFINE_PER_CPU(u64, __x86_stuffs_count); +DEFINE_PER_CPU(u64, __x86_ctxsw_count); +EXPORT_SYMBOL_GPL(__x86_ctxsw_count); +EXPORT_SYMBOL_GPL(__x86_call_count); +#endif + extern s32 __call_sites[], __call_sites_end[]; struct thunk_desc { @@ -283,3 +293,46 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, mutex_unlock(&text_mutex); } #endif /* CONFIG_MODULES */ + +#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS) +static int callthunks_debug_show(struct seq_file *m, void *p) +{ + unsigned long cpu = (unsigned long)m->private; + + seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,", + per_cpu(__x86_call_count, cpu), + per_cpu(__x86_ret_count, cpu), + per_cpu(__x86_stuffs_count, cpu), + per_cpu(__x86_ctxsw_count, cpu)); + return 0; +} + +static int callthunks_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, callthunks_debug_show, inode->i_private); +} + +static const struct file_operations dfs_ops = { + .open = callthunks_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init callthunks_debugfs_init(void) +{ + struct dentry *dir; + unsigned long cpu; + + dir = debugfs_create_dir("callthunks", NULL); + for_each_possible_cpu(cpu) { + void *arg = (void *)cpu; + char name [10]; + + sprintf(name, "cpu%lu", cpu); + debugfs_create_file(name, 0644, dir, arg, &dfs_ops); + } + return 0; +} +__initcall(callthunks_debugfs_init); +#endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index e00206077ae9..5f61c65322be 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -203,13 +203,18 @@ EXPORT_SYMBOL(__x86_return_thunk) .align 64 SYM_FUNC_START(__x86_return_skl) ANNOTATE_NOENDBR - /* Keep the hotpath in a 16byte I-fetch */ + /* + * Keep the hotpath in a 16byte I-fetch for the non-debug + * case. + */ + CALL_THUNKS_DEBUG_INC_RETS shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) jz 1f ANNOTATE_UNRET_SAFE ret int3 1: + CALL_THUNKS_DEBUG_INC_STUFFS .rept 16 ANNOTATE_INTRA_FUNCTION_CALL call 2f From 7825451fa4dc04660f1f53d236e4302161d0ebd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:31 +0200 Subject: [PATCH 0137/4122] static_call: Add call depth tracking support When indirect calls are switched to direct calls then it has to be ensured that the call target is not the function, but the call thunk when call depth tracking is enabled. But static calls are available before call thunks have been set up. Ensure a second run through the static call patching code after call thunks have been created. When call thunks are not enabled this has no side effects. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.306100465@infradead.org --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/callthunks.c | 18 ++++++++++++++++++ arch/x86/kernel/static_call.c | 1 + include/linux/static_call.h | 2 ++ kernel/static_call_inline.c | 23 ++++++++++++++++++----- 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4c416b21bac8..07ac25793a3f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -91,11 +91,16 @@ struct callthunk_sites { extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); +extern void *callthunks_translate_call_dest(void *dest); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod) {} +static __always_inline void *callthunks_translate_call_dest(void *dest) +{ + return dest; +} #endif #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index dfe7ffff88b9..071003605a86 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -271,10 +272,27 @@ void __init callthunks_patch_builtin_calls(void) pr_info("Setting up call depth tracking\n"); mutex_lock(&text_mutex); callthunks_setup(&cs, &builtin_coretext); + static_call_force_reinit(); thunks_initialized = true; mutex_unlock(&text_mutex); } +void *callthunks_translate_call_dest(void *dest) +{ + void *target; + + lockdep_assert_held(&text_mutex); + + if (!thunks_initialized || skip_addr(dest)) + return dest; + + if (!is_coretext(NULL, dest)) + return dest; + + target = patch_dest(dest, false); + return target ? : dest; +} + #ifdef CONFIG_MODULES void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, struct module *mod) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 5d3844a98373..2ebc338980bc 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -34,6 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, switch (type) { case CALL: + func = callthunks_translate_call_dest(func); code = text_gen_insn(CALL_INSN_OPCODE, insn, func); if (func == &__static_call_return0) { emulate = code; diff --git a/include/linux/static_call.h b/include/linux/static_call.h index df53bed9d71f..141e6b176a1b 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -162,6 +162,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool extern int __init static_call_init(void); +extern void static_call_force_reinit(void); + struct static_call_mod { struct static_call_mod *next; struct module *mod; /* for vmlinux, mod == NULL */ diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index dc5665b62814..639397b5491c 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -15,7 +15,18 @@ extern struct static_call_site __start_static_call_sites[], extern struct static_call_tramp_key __start_static_call_tramp_key[], __stop_static_call_tramp_key[]; -static bool static_call_initialized; +static int static_call_initialized; + +/* + * Must be called before early_initcall() to be effective. + */ +void static_call_force_reinit(void) +{ + if (WARN_ON_ONCE(!static_call_initialized)) + return; + + static_call_initialized++; +} /* mutex to protect key modules/sites */ static DEFINE_MUTEX(static_call_mutex); @@ -475,7 +486,8 @@ int __init static_call_init(void) { int ret; - if (static_call_initialized) + /* See static_call_force_reinit(). */ + if (static_call_initialized == 1) return 0; cpus_read_lock(); @@ -490,11 +502,12 @@ int __init static_call_init(void) BUG(); } - static_call_initialized = true; - #ifdef CONFIG_MODULES - register_module_notifier(&static_call_module_nb); + if (!static_call_initialized) + register_module_notifier(&static_call_module_nb); #endif + + static_call_initialized = 1; return 0; } early_initcall(static_call_init); From f1389181622a08d6f1c71407a64df36b809d632b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:32 +0200 Subject: [PATCH 0138/4122] kallsyms: Take callthunks into account Since the pre-symbol function padding is an integral part of the symbol make kallsyms report it as part of the symbol by reporting it as sym-x instead of prev_sym+y. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.409656012@infradead.org --- kernel/kallsyms.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 60c20f301a6b..cc244c02b4cf 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -293,6 +293,12 @@ static unsigned long get_symbol_pos(unsigned long addr, return low; } +#ifdef CONFIG_FUNCTION_PADDING_BYTES +#define PADDING_BYTES CONFIG_FUNCTION_PADDING_BYTES +#else +#define PADDING_BYTES 0 +#endif + /* * Lookup an address but don't bother to find any names. */ @@ -300,13 +306,25 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { char namebuf[KSYM_NAME_LEN]; + int ret; + + addr += PADDING_BYTES; if (is_ksym_addr(addr)) { get_symbol_pos(addr, symbolsize, offset); - return 1; + ret = 1; + goto found; } - return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) || - !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); + + ret = !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf); + if (!ret) { + ret = !!__bpf_address_lookup(addr, symbolsize, + offset, namebuf); + } +found: + if (ret && offset) + *offset -= PADDING_BYTES; + return ret; } static const char *kallsyms_lookup_buildid(unsigned long addr, @@ -319,6 +337,8 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; + addr += PADDING_BYTES; + if (is_ksym_addr(addr)) { unsigned long pos; @@ -348,6 +368,8 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, found: cleanup_symbol_name(namebuf); + if (ret && offset) + *offset -= PADDING_BYTES; return ret; } @@ -374,6 +396,8 @@ int lookup_symbol_name(unsigned long addr, char *symname) symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; + addr += PADDING_BYTES; + if (is_ksym_addr(addr)) { unsigned long pos; @@ -401,6 +425,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, name[0] = '\0'; name[KSYM_NAME_LEN - 1] = '\0'; + addr += PADDING_BYTES; + if (is_ksym_addr(addr)) { unsigned long pos; @@ -417,6 +443,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, return res; found: + if (offset) + *offset -= PADDING_BYTES; cleanup_symbol_name(name); return 0; } @@ -442,8 +470,15 @@ static int __sprint_symbol(char *buffer, unsigned long address, len = strlen(buffer); offset -= symbol_offset; - if (add_offset) - len += sprintf(buffer + len, "+%#lx/%#lx", offset, size); + if (add_offset) { + char s = '+'; + + if ((long)offset < 0) { + s = '-'; + offset = 0UL - offset; + } + len += sprintf(buffer + len, "%c%#lx/%#lx", s, offset, size); + } if (modname) { len += sprintf(buffer + len, " [%s", modname); From 396e0b8e09e86440c2119d12c2101110d3cd5bf9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:33 +0200 Subject: [PATCH 0139/4122] x86/orc: Make it callthunk aware Callthunks addresses on the stack would confuse the ORC unwinder. Handle them correctly and tell ORC to proceed further down the stack. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.511637628@infradead.org --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/callthunks.c | 13 +++++++++++++ arch/x86/kernel/unwind_orc.c | 21 ++++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 07ac25793a3f..4b8cd256c95e 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -92,6 +92,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); +extern bool is_callthunk(void *addr); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -101,6 +102,10 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) { return dest; } +static __always_inline bool is_callthunk(void *addr) +{ + return false; +} #endif #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 071003605a86..7f9788194eb5 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -293,6 +293,19 @@ void *callthunks_translate_call_dest(void *dest) return target ? : dest; } +bool is_callthunk(void *addr) +{ + unsigned int tmpl_size = SKL_TMPL_SIZE; + void *tmpl = skl_call_thunk_template; + unsigned long dest; + + dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); + if (!thunks_initialized || skip_addr((void *)dest)) + return false; + + return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); +} + #ifdef CONFIG_MODULES void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, struct module *mod) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 0ea57da92940..cfac2b54b37b 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -136,6 +136,21 @@ static struct orc_entry null_orc_entry = { .type = UNWIND_HINT_TYPE_CALL }; +#ifdef CONFIG_CALL_THUNKS +static struct orc_entry *orc_callthunk_find(unsigned long ip) +{ + if (!is_callthunk((void *)ip)) + return NULL; + + return &null_orc_entry; +} +#else +static struct orc_entry *orc_callthunk_find(unsigned long ip) +{ + return NULL; +} +#endif + /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { .type = UNWIND_HINT_TYPE_CALL, @@ -189,7 +204,11 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; - return orc_ftrace_find(ip); + orc = orc_ftrace_find(ip); + if (orc) + return orc; + + return orc_callthunk_find(ip); } #ifdef CONFIG_MODULES From b2e9dfe54be4d023124d588d6f03d16a9c0d2507 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:34 +0200 Subject: [PATCH 0140/4122] x86/bpf: Emit call depth accounting if required Ensure that calls in BPF jitted programs are emitting call depth accounting when enabled to keep the call/return balanced. The return thunk jump is already injected due to the earlier retbleed mitigations. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.615413406@infradead.org --- arch/x86/include/asm/alternative.h | 6 ++++++ arch/x86/kernel/callthunks.c | 19 ++++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 32 +++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4b8cd256c95e..664c0779375c 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -93,6 +93,7 @@ extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); extern bool is_callthunk(void *addr); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -106,6 +107,11 @@ static __always_inline bool is_callthunk(void *addr) { return false; } +static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, + void *func) +{ + return 0; +} #endif #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 7f9788194eb5..a03d646b5e69 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -306,6 +306,25 @@ bool is_callthunk(void *addr) return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); } +#ifdef CONFIG_BPF_JIT +int x86_call_depth_emit_accounting(u8 **pprog, void *func) +{ + unsigned int tmpl_size = SKL_TMPL_SIZE; + void *tmpl = skl_call_thunk_template; + + if (!thunks_initialized) + return 0; + + /* Is function call target a thunk? */ + if (is_callthunk(func)) + return 0; + + memcpy(*pprog, tmpl, tmpl_size); + *pprog += tmpl_size; + return tmpl_size; +} +#endif + #ifdef CONFIG_MODULES void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, struct module *mod) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ad8cb7f15ab8..a6b46740ea30 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -340,6 +340,13 @@ static int emit_call(u8 **pprog, void *func, void *ip) return emit_patch(pprog, func, ip, 0xE8); } +static int emit_rsb_call(u8 **pprog, void *func, void *ip) +{ + OPTIMIZER_HIDE_VAR(func); + x86_call_depth_emit_accounting(pprog, func); + return emit_patch(pprog, func, ip, 0xE8); +} + static int emit_jump(u8 **pprog, void *func, void *ip) { return emit_patch(pprog, func, ip, 0xE9); @@ -1436,19 +1443,26 @@ st: if (is_imm8(insn->off)) break; /* call */ - case BPF_JMP | BPF_CALL: + case BPF_JMP | BPF_CALL: { + int offs; + func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, -round_up(bpf_prog->aux->stack_depth, 8) - 8); - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) + if (!imm32) return -EINVAL; + offs = 7 + x86_call_depth_emit_accounting(&prog, func); } else { - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) + if (!imm32) return -EINVAL; + offs = x86_call_depth_emit_accounting(&prog, func); } + if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + return -EINVAL; break; + } case BPF_JMP | BPF_TAIL_CALL: if (imm32) @@ -1854,7 +1868,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg2: lea rsi, [rbp - ctx_cookie_off] */ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); - if (emit_call(&prog, enter, prog)) + if (emit_rsb_call(&prog, enter, prog)) return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); @@ -1875,7 +1889,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, (long) p->insnsi >> 32, (u32) (long) p->insnsi); /* call JITed bpf program or interpreter */ - if (emit_call(&prog, p->bpf_func, prog)) + if (emit_rsb_call(&prog, p->bpf_func, prog)) return -EINVAL; /* @@ -1899,7 +1913,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); - if (emit_call(&prog, exit, prog)) + if (emit_rsb_call(&prog, exit, prog)) return -EINVAL; *pprog = prog; @@ -2147,7 +2161,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_call(&prog, __bpf_tramp_enter, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) { ret = -EINVAL; goto cleanup; } @@ -2179,7 +2193,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT2(0xff, 0xd0); /* call *rax */ } else { /* call original function */ - if (emit_call(&prog, orig_call, prog)) { + if (emit_rsb_call(&prog, orig_call, prog)) { ret = -EINVAL; goto cleanup; } @@ -2223,7 +2237,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i im->ip_epilogue = prog; /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_call(&prog, __bpf_tramp_exit, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) { ret = -EINVAL; goto cleanup; } From eac828eaef295cd0cc8b58f55fa5c8401fdc2370 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:35 +0200 Subject: [PATCH 0141/4122] x86/ftrace: Remove ftrace_epilogue() Remove the weird jumps to RET and simply use RET. This then promotes ftrace_stub() to a real function; which becomes important for kcfi. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.719080593@infradead.org --- arch/x86/kernel/ftrace_64.S | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index dfeb227de561..a90c55a6b481 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -172,20 +172,14 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR - - jmp ftrace_epilogue + RET SYM_FUNC_END(ftrace_caller); STACK_FRAME_NON_STANDARD_FP(ftrace_caller) -SYM_FUNC_START(ftrace_epilogue) -/* - * This is weak to keep gas from relaxing the jumps. - */ -SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) +SYM_FUNC_START(ftrace_stub) UNWIND_HINT_FUNC - ENDBR RET -SYM_FUNC_END(ftrace_epilogue) +SYM_FUNC_END(ftrace_stub) SYM_FUNC_START(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ @@ -262,14 +256,11 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) popfq /* - * As this jmp to ftrace_epilogue can be a short jump - * it must not be copied into the trampoline. - * The trampoline will add the code to jump - * to the return. + * The trampoline will add the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR - jmp ftrace_epilogue + RET /* Swap the flags with orig_rax */ 1: movq MCOUNT_REG_SIZE(%rsp), %rdi @@ -280,7 +271,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) /* Restore flags */ popfq UNWIND_HINT_FUNC - jmp ftrace_epilogue + RET SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) From 36b64f101219dd9e6e4f0ea880b64e8a90da547b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:36 +0200 Subject: [PATCH 0142/4122] x86/ftrace: Rebalance RSB ftrace_regs_caller() uses a PUSH;RET pattern to tail-call into a direct-call function, this unbalances the RSB, fix that. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.823216933@infradead.org --- arch/x86/kernel/ftrace_64.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index a90c55a6b481..b5b54f58957e 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -271,6 +271,17 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) /* Restore flags */ popfq UNWIND_HINT_FUNC + + /* + * The above left an extra return value on the stack; effectively + * doing a tail-call without using a register. This PUSH;RET + * pattern unbalances the RSB, inject a pointless CALL to rebalance. + */ + ANNOTATE_INTRA_FUNCTION_CALL + CALL .Ldo_rebalance + int3 +.Ldo_rebalance: + add $8, %rsp RET SYM_FUNC_END(ftrace_regs_caller) From ee3e2469b3463d28ca4cde20e0283319ac6a562d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:37 +0200 Subject: [PATCH 0143/4122] x86/ftrace: Make it call depth tracking aware Since ftrace has trampolines, don't use thunks for the __fentry__ site but instead require that every function called from there includes accounting. This very much includes all the direct-call functions. Additionally, ftrace uses ROP tricks in two places: - return_to_handler(), and - ftrace_regs_caller() when pt_regs->orig_ax is set by a direct-call. return_to_handler() already uses a retpoline to replace an indirect-jump to defeat IBT, since this is a jump-type retpoline, make sure there is no accounting done and ALTERNATIVE the RET into a ret. ftrace_regs_caller() does much the same and gets the same treatment. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111148.927545073@infradead.org --- arch/x86/include/asm/nospec-branch.h | 9 +++++++++ arch/x86/kernel/callthunks.c | 2 +- arch/x86/kernel/ftrace.c | 16 +++++++++++---- arch/x86/kernel/ftrace_64.S | 22 +++++++++++++++++++-- arch/x86/net/bpf_jit_comp.c | 6 ++++++ kernel/trace/trace_selftest.c | 9 ++++++++- samples/ftrace/ftrace-direct-modify.c | 3 +++ samples/ftrace/ftrace-direct-multi-modify.c | 3 +++ samples/ftrace/ftrace-direct-multi.c | 2 ++ samples/ftrace/ftrace-direct-too.c | 2 ++ samples/ftrace/ftrace-direct.c | 2 ++ 11 files changed, 68 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4771147c7c5a..82580adbca4b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -343,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void) { x86_return_thunk = &__x86_return_skl; } + +#define CALL_DEPTH_ACCOUNT \ + ALTERNATIVE("", \ + __stringify(INCREMENT_CALL_DEPTH), \ + X86_FEATURE_CALL_DEPTH) + #ifdef CONFIG_CALL_THUNKS_DEBUG DECLARE_PER_CPU(u64, __x86_call_count); DECLARE_PER_CPU(u64, __x86_ret_count); @@ -351,6 +357,9 @@ DECLARE_PER_CPU(u64, __x86_ctxsw_count); #endif #else static inline void x86_set_skl_return_thunk(void) {} + +#define CALL_DEPTH_ACCOUNT "" + #endif #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index a03d646b5e69..7d2c75ec9a8c 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -316,7 +316,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; /* Is function call target a thunk? */ - if (is_callthunk(func)) + if (func && is_callthunk(func)) return 0; memcpy(*pprog, tmpl, tmpl_size); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4ac6692d5ef8..cf15ef5aecff 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void) static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { + /* + * No need to translate into a callthunk. The trampoline does + * the depth accounting itself. + */ return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } @@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long size; unsigned long *ptr; void *trampoline; - void *ip; + void *ip, *dest; /* 48 8b 15 is movq (%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; @@ -404,10 +408,14 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the call to the function */ mutex_lock(&text_mutex); call_offset -= start_offset; + /* + * No need to translate into a callthunk. The trampoline does + * the depth accounting before the call already. + */ + dest = ftrace_ops_get_func(ops); memcpy(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, - trampoline + call_offset, - ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index b5b54f58957e..6a7e6d666a12 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -132,6 +133,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) + CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -140,6 +142,8 @@ SYM_FUNC_START(ftrace_caller) /* save_mcount_regs fills in first two parameters */ save_mcount_regs + CALL_DEPTH_ACCOUNT + /* Stack - skipping return address of ftrace_caller */ leaq MCOUNT_REG_SIZE+8(%rsp), %rcx movq %rcx, RSP(%rsp) @@ -155,6 +159,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) /* Only ops with REGS flag set should have CS register set */ movq $0, CS(%rsp) + /* Account for the function call below */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) ANNOTATE_NOENDBR call ftrace_stub @@ -189,6 +196,8 @@ SYM_FUNC_START(ftrace_regs_caller) save_mcount_regs 8 /* save_mcount_regs fills in first two parameters */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ @@ -219,6 +228,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) /* regs go into 4th parameter */ leaq (%rsp), %rcx + /* Account for the function call below */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) ANNOTATE_NOENDBR call ftrace_stub @@ -282,7 +294,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) int3 .Ldo_rebalance: add $8, %rsp - RET + ALTERNATIVE __stringify(RET), \ + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ + X86_FEATURE_CALL_DEPTH SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) @@ -291,6 +305,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ SYM_FUNC_START(__fentry__) + CALL_DEPTH_ACCOUNT + cmpq $ftrace_stub, ftrace_trace_function jnz trace @@ -347,6 +363,8 @@ SYM_CODE_START(return_to_handler) int3 .Ldo_rop: mov %rdi, (%rsp) - RET + ALTERNATIVE __stringify(RET), \ + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ + X86_FEATURE_CALL_DEPTH SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a6b46740ea30..f46b62029d91 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2135,6 +2136,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i prog = image; EMIT_ENDBR(); + /* + * This is the direct-call trampoline, as such it needs accounting + * for the __fentry__ call. + */ + x86_call_depth_emit_accounting(&prog, NULL); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a2d301f58ced..ff0536cea968 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -785,7 +785,14 @@ static struct fgraph_ops fgraph_ops __initdata = { }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -noinline __noclone static void trace_direct_tramp(void) { } +#ifndef CALL_DEPTH_ACCOUNT +#define CALL_DEPTH_ACCOUNT "" +#endif + +noinline __noclone static void trace_direct_tramp(void) +{ + asm(CALL_DEPTH_ACCOUNT); +} #endif /* diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 39146fa83e20..de5a0f67f320 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -3,6 +3,7 @@ #include #include #include +#include extern void my_direct_func1(void); extern void my_direct_func2(void); @@ -34,6 +35,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" @@ -45,6 +47,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " call my_direct_func2\n" " leave\n" ASM_RET diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 65aa94d96f4e..d52370cad0b6 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -3,6 +3,7 @@ #include #include #include +#include extern void my_direct_func1(unsigned long ip); extern void my_direct_func2(unsigned long ip); @@ -32,6 +33,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " pushq %rdi\n" " movq 8(%rbp), %rdi\n" " call my_direct_func1\n" @@ -46,6 +48,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " pushq %rdi\n" " movq 8(%rbp), %rdi\n" " call my_direct_func2\n" diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 41ded7c615c7..ec1088922517 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -5,6 +5,7 @@ #include #include #include +#include extern void my_direct_func(unsigned long ip); @@ -27,6 +28,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " pushq %rdi\n" " movq 8(%rbp), %rdi\n" " call my_direct_func\n" diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6690468c5cc2..e13fb59a2b47 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -4,6 +4,7 @@ #include /* for handle_mm_fault() */ #include #include +#include extern void my_direct_func(struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -29,6 +30,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " pushq %rdi\n" " pushq %rsi\n" " pushq %rdx\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index e8f1e440b9b8..1f769d0db20f 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -4,6 +4,7 @@ #include /* for wake_up_process() */ #include #include +#include extern void my_direct_func(struct task_struct *p); @@ -26,6 +27,7 @@ asm ( ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" + CALL_DEPTH_ACCOUNT " pushq %rdi\n" " call my_direct_func\n" " popq %rdi\n" From d82a0345cf218f5050f5ad913e1ae6c579105731 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2022 13:11:38 +0200 Subject: [PATCH 0144/4122] x86/retbleed: Add call depth tracking mitigation The fully secure mitigation for RSB underflow on Intel SKL CPUs is IBRS, which inflicts up to 30% penalty for pathological syscall heavy work loads. Software based call depth tracking and RSB refill is not perfect, but reduces the attack surface massively. The penalty for the pathological case is about 8% which is still annoying but definitely more palatable than IBRS. Add a retbleed=stuff command line option to enable the call depth tracking and software refill of the RSB. This gives admins a choice. IBeeRS are safe and cause headaches, call depth tracking is considered to be s(t)ufficiently safe. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111149.029587352@infradead.org --- arch/x86/kernel/cpu/bugs.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..e6c23ead1617 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -787,6 +787,7 @@ enum retbleed_mitigation { RETBLEED_MITIGATION_IBPB, RETBLEED_MITIGATION_IBRS, RETBLEED_MITIGATION_EIBRS, + RETBLEED_MITIGATION_STUFF, }; enum retbleed_mitigation_cmd { @@ -794,6 +795,7 @@ enum retbleed_mitigation_cmd { RETBLEED_CMD_AUTO, RETBLEED_CMD_UNRET, RETBLEED_CMD_IBPB, + RETBLEED_CMD_STUFF, }; static const char * const retbleed_strings[] = { @@ -802,6 +804,7 @@ static const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + [RETBLEED_MITIGATION_STUFF] = "Mitigation: Stuffing", }; static enum retbleed_mitigation retbleed_mitigation __ro_after_init = @@ -831,6 +834,8 @@ static int __init retbleed_parse_cmdline(char *str) retbleed_cmd = RETBLEED_CMD_UNRET; } else if (!strcmp(str, "ibpb")) { retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "stuff")) { + retbleed_cmd = RETBLEED_CMD_STUFF; } else if (!strcmp(str, "nosmt")) { retbleed_nosmt = true; } else { @@ -879,6 +884,21 @@ static void __init retbleed_select_mitigation(void) } break; + case RETBLEED_CMD_STUFF: + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) && + spectre_v2_enabled == SPECTRE_V2_RETPOLINE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + + } else { + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING)) + pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); + else + pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n"); + + goto do_cmd_auto; + } + break; + do_cmd_auto: case RETBLEED_CMD_AUTO: default: @@ -916,6 +936,12 @@ do_cmd_auto: mitigate_smt = true; break; + case RETBLEED_MITIGATION_STUFF: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + x86_set_skl_return_thunk(); + break; + default: break; } @@ -926,7 +952,7 @@ do_cmd_auto: /* * Let IBRS trump all on Intel without affecting the effects of the - * retbleed= cmdline option. + * retbleed= cmdline option except for call depth based stuffing */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { switch (spectre_v2_enabled) { @@ -939,7 +965,8 @@ do_cmd_auto: retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; break; default: - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) + pr_err(RETBLEED_INTEL_MSG); } } @@ -1413,6 +1440,7 @@ static void __init spectre_v2_select_mitigation(void) if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && + retbleed_cmd != RETBLEED_CMD_STUFF && boot_cpu_has(X86_FEATURE_IBRS) && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { mode = SPECTRE_V2_IBRS; From 5c9a92dec3235b0c1d51e92860f8014753161593 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Mon, 17 Oct 2022 16:41:20 +0200 Subject: [PATCH 0145/4122] x86/bugs: Add retbleed=force Debug aid, allows running retbleed=force,stuff on non-affected uarchs Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/kernel/cpu/bugs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e6c23ead1617..b307b83e22be 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -838,6 +838,8 @@ static int __init retbleed_parse_cmdline(char *str) retbleed_cmd = RETBLEED_CMD_STUFF; } else if (!strcmp(str, "nosmt")) { retbleed_nosmt = true; + } else if (!strcmp(str, "force")) { + setup_force_cpu_bug(X86_BUG_RETBLEED); } else { pr_err("Ignoring unknown retbleed option (%s).", str); } From 44112922674b94a7d699dfff6307fc830018df7c Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 17 Oct 2022 17:20:28 +0800 Subject: [PATCH 0146/4122] scsi: libsas: Add sas_ata_device_link_abort() Similar to how AHCI handles NCQ errors in ahci_error_intr() -> ata_port_abort() -> ata_do_link_abort(), add an NCQ error handler for LLDDs to call to initiate a link abort. This will mark all outstanding QCs as failed and kick-off EH. Note: A "force reset" argument is added for drivers which require the ATA error handling to always reset the device. A driver may require this feature for when SATA device per-SCSI cmnd resources are only released during reset for ATA EH. As such, we need an option to force reset to be done, regardless of what any EH autopsy decides. The SATA device FIS fields are set to indicate a device error from ata_eh_analyze_tf(). Suggested-by: Damien Le Moal Suggested-by: Niklas Cassel Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-2-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Tested-by: Niklas Cassel # pm80xx Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 15 +++++++++++++++ include/scsi/sas_ata.h | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index d35c9296f738..61f64d54e67d 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -861,6 +861,21 @@ void sas_ata_wait_eh(struct domain_device *dev) ata_port_wait_eh(ap); } +void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) +{ + struct ata_port *ap = device->sata_dev.ap; + struct ata_link *link = &ap->link; + + device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */ + device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */ + + link->eh_info.err_mask |= AC_ERR_DEV; + if (force_reset) + link->eh_info.action |= ATA_EH_RESET; + ata_link_abort(link); +} +EXPORT_SYMBOL_GPL(sas_ata_device_link_abort); + int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id) { struct sas_tmf_task tmf_task = {}; diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index a1df4f9d57a3..e47f0aec0722 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -32,6 +32,7 @@ void sas_probe_sata(struct asd_sas_port *port); void sas_suspend_sata(struct asd_sas_port *port); void sas_resume_sata(struct asd_sas_port *port); void sas_ata_end_eh(struct ata_port *ap); +void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline); @@ -87,6 +88,11 @@ static inline void sas_ata_end_eh(struct ata_port *ap) { } +static inline void sas_ata_device_link_abort(struct domain_device *dev, + bool force_reset) +{ +} + static inline int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id) { From 4b329abc91800d23941ac773e69b322a13981ecb Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Mon, 17 Oct 2022 17:20:29 +0800 Subject: [PATCH 0147/4122] scsi: hisi_sas: Move slot variable definition in hisi_sas_abort_task() Each branch currently defines a slot variable independently, and it is neater to move it to the function head. Signed-off-by: Xingui Yang Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-3-git-send-email-john.garry@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 699b07abb6b0..8303aa5eaf25 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1547,6 +1547,7 @@ static int hisi_sas_abort_task(struct sas_task *task) struct hisi_sas_internal_abort_data internal_abort_data = { false }; struct domain_device *device = task->dev; struct hisi_sas_device *sas_dev = device->lldd_dev; + struct hisi_sas_slot *slot = task->lldd_task; struct hisi_hba *hisi_hba; struct device *dev; int rc = TMF_RESP_FUNC_FAILED; @@ -1560,7 +1561,6 @@ static int hisi_sas_abort_task(struct sas_task *task) spin_lock_irqsave(&task->task_state_lock, flags); if (task->task_state_flags & SAS_TASK_STATE_DONE) { - struct hisi_sas_slot *slot = task->lldd_task; struct hisi_sas_cq *cq; if (slot) { @@ -1578,8 +1578,7 @@ static int hisi_sas_abort_task(struct sas_task *task) task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags); - if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) { - struct hisi_sas_slot *slot = task->lldd_task; + if (slot && task->task_proto & SAS_PROTOCOL_SSP) { u16 tag = slot->idx; int rc2; @@ -1613,9 +1612,8 @@ static int hisi_sas_abort_task(struct sas_task *task) hisi_sas_dereg_device(hisi_hba, device); rc = hisi_sas_softreset_ata_disk(device); } - } else if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SMP) { + } else if (slot && task->task_proto & SAS_PROTOCOL_SMP) { /* SMP */ - struct hisi_sas_slot *slot = task->lldd_task; u32 tag = slot->idx; struct hisi_sas_cq *cq = &hisi_hba->cq[slot->dlvry_queue]; From 930d97dabdd56681aef752a35475f0212a171741 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Mon, 17 Oct 2022 17:20:30 +0800 Subject: [PATCH 0148/4122] scsi: hisi_sas: Add SATA_DISK_ERR bit handling for v3 hw When CQ header dw3 SATA_DISK_ERR is set it means this SATA disk is in error state and the current IPTT is invalid. An invalid IPTT does not correspond to any slot. In this scenario, new I/Os that delivered to disk will be rejected by the controller and all I/Os remaining in the disk should be aborted, which we add here with the sas_ata_device_link_abort() call. In hisi_sas_abort_task() we don't want to issue a soft reset as it may cause info to be lost in the target disk for the ATA EH autopsy. In this case, just release resources - the disk won't return other I/Os normally after NCQ Error, so this is safe. Signed-off-by: Xingui Yang Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-4-git-send-email-john.garry@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas.h | 1 + drivers/scsi/hisi_sas/hisi_sas_main.c | 18 +++++++++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 48 ++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 9aebf4a26b13..6f8a52a1b808 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -104,6 +104,7 @@ enum { enum dev_status { HISI_SAS_DEV_INIT, HISI_SAS_DEV_NORMAL, + HISI_SAS_DEV_NCQ_ERR, }; enum { diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 8303aa5eaf25..4c37ae9eb6b6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1604,13 +1604,26 @@ static int hisi_sas_abort_task(struct sas_task *task) } else if (task->task_proto & SAS_PROTOCOL_SATA || task->task_proto & SAS_PROTOCOL_STP) { if (task->dev->dev_type == SAS_SATA_DEV) { + struct ata_queued_cmd *qc = task->uldd_task; + rc = hisi_sas_internal_task_abort_dev(sas_dev, false); if (rc < 0) { dev_err(dev, "abort task: internal abort failed\n"); goto out; } hisi_sas_dereg_device(hisi_hba, device); - rc = hisi_sas_softreset_ata_disk(device); + + /* + * If an ATA internal command times out in ATA EH, it + * need to execute soft reset, so check the scsicmd + */ + if ((sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) && + qc && qc->scsicmd) { + hisi_sas_do_release_task(hisi_hba, task, slot); + rc = TMF_RESP_FUNC_COMPLETE; + } else { + rc = hisi_sas_softreset_ata_disk(device); + } } } else if (slot && task->task_proto & SAS_PROTOCOL_SMP) { /* SMP */ @@ -1727,6 +1740,9 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) struct device *dev = hisi_hba->dev; int rc; + if (sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) + sas_dev->dev_status = HISI_SAS_DEV_NORMAL; + rc = hisi_sas_internal_task_abort_dev(sas_dev, false); if (rc < 0) { dev_err(dev, "I_T nexus reset: internal abort (%d)\n", rc); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index d56b4bfd2767..0ae8a60aaf93 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -404,6 +404,11 @@ #define CMPLT_HDR_CMPLT_MSK (0x3 << CMPLT_HDR_CMPLT_OFF) #define CMPLT_HDR_ERROR_PHASE_OFF 2 #define CMPLT_HDR_ERROR_PHASE_MSK (0xff << CMPLT_HDR_ERROR_PHASE_OFF) +/* bit[9:2] Error Phase */ +#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF \ + 8 +#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK \ + (0x1 << ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF) #define CMPLT_HDR_RSPNS_XFRD_OFF 10 #define CMPLT_HDR_RSPNS_XFRD_MSK (0x1 << CMPLT_HDR_RSPNS_XFRD_OFF) #define CMPLT_HDR_RSPNS_GOOD_OFF 11 @@ -423,8 +428,15 @@ #define CMPLT_HDR_DEV_ID_OFF 16 #define CMPLT_HDR_DEV_ID_MSK (0xffff << CMPLT_HDR_DEV_ID_OFF) /* dw3 */ +#define CMPLT_HDR_SATA_DISK_ERR_OFF 16 +#define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << CMPLT_HDR_SATA_DISK_ERR_OFF) #define CMPLT_HDR_IO_IN_TARGET_OFF 17 #define CMPLT_HDR_IO_IN_TARGET_MSK (0x1 << CMPLT_HDR_IO_IN_TARGET_OFF) +/* bit[23:18] ERR_FIS_ATA_STATUS */ +#define FIS_ATA_STATUS_ERR_OFF 18 +#define FIS_ATA_STATUS_ERR_MSK (0x1 << FIS_ATA_STATUS_ERR_OFF) +#define FIS_TYPE_SDB_OFF 31 +#define FIS_TYPE_SDB_MSK (0x1 << FIS_TYPE_SDB_OFF) /* ITCT header */ /* qw0 */ @@ -2148,6 +2160,18 @@ static irqreturn_t fatal_axi_int_v3_hw(int irq_no, void *p) return IRQ_HANDLED; } +static bool is_ncq_err_v3_hw(struct hisi_sas_complete_v3_hdr *complete_hdr) +{ + u32 dw0, dw3; + + dw0 = le32_to_cpu(complete_hdr->dw0); + dw3 = le32_to_cpu(complete_hdr->dw3); + + return (dw0 & ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK) && + (dw3 & FIS_TYPE_SDB_MSK) && + (dw3 & FIS_ATA_STATUS_ERR_MSK); +} + static bool slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, struct hisi_sas_slot *slot) @@ -2381,14 +2405,34 @@ static irqreturn_t cq_thread_v3_hw(int irq_no, void *p) while (rd_point != wr_point) { struct hisi_sas_complete_v3_hdr *complete_hdr; struct device *dev = hisi_hba->dev; - u32 dw1; + u32 dw0, dw1, dw3; int iptt; complete_hdr = &complete_queue[rd_point]; + dw0 = le32_to_cpu(complete_hdr->dw0); dw1 = le32_to_cpu(complete_hdr->dw1); + dw3 = le32_to_cpu(complete_hdr->dw3); iptt = dw1 & CMPLT_HDR_IPTT_MSK; - if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) { + if (unlikely((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) && + (dw3 & CMPLT_HDR_SATA_DISK_ERR_MSK)) { + int device_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >> + CMPLT_HDR_DEV_ID_OFF; + struct hisi_sas_itct *itct = + &hisi_hba->itct[device_id]; + struct hisi_sas_device *sas_dev = + &hisi_hba->devices[device_id]; + struct domain_device *device = sas_dev->sas_device; + + dev_err(dev, "erroneous completion disk err dev id=%d sas_addr=0x%llx CQ hdr: 0x%x 0x%x 0x%x 0x%x\n", + device_id, itct->sas_addr, dw0, dw1, + complete_hdr->act, dw3); + + if (is_ncq_err_v3_hw(complete_hdr)) + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + + sas_ata_device_link_abort(device, true); + } else if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) { slot = &hisi_hba->slot_info[iptt]; slot->cmplt_queue_slot = rd_point; slot->cmplt_queue = queue; From 4ef4f1a6155571d3d53583a4e8e7ccbbec220b8a Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Mon, 17 Oct 2022 17:20:31 +0800 Subject: [PATCH 0149/4122] scsi: hisi_sas: Modify v3 HW SATA disk error state completion processing When an NCQ error occurs, the controller will abnormally complete the I/Os that are newly delivered to disk, and bit8 in CQ dw3 will be set which indicates that the SATA disk is in error state. The current processing flow is to set ts->stat to SAS_OPEN_REJECT and then sas_ata_task_done() will set FIS stat to ATA_ERR. After analyzing the I/O by ata_eh_analyze_tf(), err_mask will set to AC_ERR_HSM. If media error occurs for four times within 10 minutes and the chip rejects new I/Os for four times, NCQ will be disabled due to excessive errors, which is undesirable. Therefore, use sas_task_abort() to handle abnormally completed I/Os when SATA disk is in error state, as these abnormally completed I/Os are already processed by sas_ata_device_link_abort() and qc->flag are set to ATA_QCFLAG_FAILED. If sas_task_abort() is used, qc->err_mask will not be modified in EH. Unlike the current process flow, it will not increase the count of ECAT_TOUT_HSM and not turn off NCQ. Like other I/Os on the disk that do not have an error but do not return after the NCQ error, they are retried after the EH. Signed-off-by: Xingui Yang Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-5-git-send-email-john.garry@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 0ae8a60aaf93..0c3fcb807806 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -428,6 +428,8 @@ #define CMPLT_HDR_DEV_ID_OFF 16 #define CMPLT_HDR_DEV_ID_MSK (0xffff << CMPLT_HDR_DEV_ID_OFF) /* dw3 */ +#define SATA_DISK_IN_ERROR_STATUS_OFF 8 +#define SATA_DISK_IN_ERROR_STATUS_MSK (0x1 << SATA_DISK_IN_ERROR_STATUS_OFF) #define CMPLT_HDR_SATA_DISK_ERR_OFF 16 #define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << CMPLT_HDR_SATA_DISK_ERR_OFF) #define CMPLT_HDR_IO_IN_TARGET_OFF 17 @@ -2219,7 +2221,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN; - } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { + } else if ((dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) || + (dw3 & SATA_DISK_IN_ERROR_STATUS_MSK)) { ts->stat = SAS_PHY_DOWN; slot->abort = 1; } else { From 0b639decf65160b1afd9993019be37d7869c0340 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 17 Oct 2022 17:20:32 +0800 Subject: [PATCH 0150/4122] scsi: pm8001: Modify task abort handling for SATA task When we try to abort a SATA task, the CCB of the task which we are trying to avoid may still complete. In this case, we should not touch the task associated with that CCB as we can race with libsas freeing the last later in sas_eh_handle_sas_errors() -> sas_eh_finish_cmd() for when TASK_IS_ABORTED is returned from sas_scsi_find_task() Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-6-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Tested-by: Niklas Cassel # pm80xx Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 15 +++++++++++++-- drivers/scsi/pm8001/pm8001_sas.c | 8 ++++++++ drivers/scsi/pm8001/pm80xx_hwi.c | 14 ++++++++++---- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 628b08ba6770..c0adc3a9d196 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -2295,7 +2295,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) if (t->dev && (t->dev->lldd_dev)) pm8001_dev = t->dev->lldd_dev; } else { - pm8001_dbg(pm8001_ha, FAIL, "task null\n"); + pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n", + ccb->ccb_tag); + pm8001_ccb_free(pm8001_ha, ccb); return; } @@ -2675,8 +2677,17 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dev = ccb->device; if (event) pm8001_dbg(pm8001_ha, FAIL, "sata IO status 0x%x\n", event); - if (unlikely(!t || !t->lldd_task || !t->dev)) + + if (unlikely(!t)) { + pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n", + ccb->ccb_tag); + pm8001_ccb_free(pm8001_ha, ccb); return; + } + + if (unlikely(!t->lldd_task || !t->dev)) + return; + ts = &t->task_status; pm8001_dbg(pm8001_ha, DEVIO, "port_id:0x%x, device_id:0x%x, tag:0x%x, event:0x%x\n", diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 8e3f2f9ddaac..d5ec29f69be3 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -983,6 +983,7 @@ int pm8001_query_task(struct sas_task *task) /* mandatory SAM-3, still need free task/ccb info, abort the specified task */ int pm8001_abort_task(struct sas_task *task) { + struct pm8001_ccb_info *ccb = task->lldd_task; unsigned long flags; u32 tag; struct domain_device *dev ; @@ -1113,6 +1114,13 @@ int pm8001_abort_task(struct sas_task *task) pm8001_dev, DS_OPERATIONAL); wait_for_completion(&completion); } else { + /* + * Ensure that if we see a completion for the ccb + * associated with the task which we are trying to + * abort then we should not touch the sas_task as it + * may race with libsas freeing it when return here. + */ + ccb->task = NULL; ret = sas_execute_internal_abort_single(dev, tag, 0, NULL); } rc = TMF_RESP_FUNC_COMPLETE; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f8b8624458f7..dd0e06983cd3 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2396,7 +2396,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, if (t->dev && (t->dev->lldd_dev)) pm8001_dev = t->dev->lldd_dev; } else { - pm8001_dbg(pm8001_ha, FAIL, "task null\n"); + pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n", + ccb->ccb_tag); + pm8001_ccb_free(pm8001_ha, ccb); return; } @@ -2813,12 +2815,16 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, ccb = &pm8001_ha->ccb_info[tag]; t = ccb->task; pm8001_dev = ccb->device; - - if (unlikely(!t || !t->lldd_task || !t->dev)) { - pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n"); + if (unlikely(!t)) { + pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n", + ccb->ccb_tag); + pm8001_ccb_free(pm8001_ha, ccb); return; } + if (unlikely(!t->lldd_task || !t->dev)) + return; + ts = &t->task_status; pm8001_dbg(pm8001_ha, IOERR, "port_id:0x%x, tag:0x%x, event:0x%x\n", port_id, tag, event); From 811be570a9a8df96b4fd43ff00837b947bbaf49b Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 17 Oct 2022 17:20:33 +0800 Subject: [PATCH 0151/4122] scsi: pm8001: Use sas_ata_device_link_abort() to handle NCQ errors In commit c6b9ef5779c3 ("[SCSI] pm80xx: NCQ error handling changes") the driver had support added to handle NCQ errors but much of what is done in this handling is duplicated from the libata EH. In that named commit we handle in 2x main steps: a. Issue read log ext10 to examine and clear the errors b. Issue SATA_ABORT all command Indeed, in libata EH, we do similar to above: a. ata_do_eh() -> ata_eh_autopsy() -> ata_eh_link_autopsy() -> ata_eh_analyze_ncq_error() -> ata_eh_read_log_10h() b. ata_do_eh() -> ata_eh_recover() which will issue a device soft reset or hard reset Since there is so much duplication, use sas_ata_device_link_abort() which will abort all pending IOs and kick of ATA EH which will do the steps, above. However we will not follow the advisory to send the SATA_ABORT all command after the autopsy in read log ext10. Indeed, in libsas EH, we already send a per-task SATA_ABORT command, and this is prior to the ATA EH kicking in and issuing the read log ext10 in the recovery process. I judge that this is ok as the SATA_ABORT command does not actually send any protocol on the link to abort I/O on the other side, so would not change any state on the disk (for the read log ext10 command). Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-7-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Tested-by: Niklas Cassel # pm80xx Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 171 +++---------------------------- drivers/scsi/pm8001/pm8001_sas.c | 6 -- drivers/scsi/pm8001/pm8001_sas.h | 5 - drivers/scsi/pm8001/pm80xx_hwi.c | 163 ++--------------------------- 4 files changed, 19 insertions(+), 326 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index c0adc3a9d196..ec1a9ab61814 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1724,7 +1724,14 @@ void pm8001_work_fn(struct work_struct *work) pm8001_free_dev(pm8001_dev); } } - } break; + } + break; + case IO_XFER_ERROR_ABORTED_NCQ_MODE: + { + dev = pm8001_dev->sas_device; + sas_ata_device_link_abort(dev, false); + } + break; } kfree(pw); } @@ -1748,110 +1755,6 @@ int pm8001_handle_event(struct pm8001_hba_info *pm8001_ha, void *data, return ret; } -static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, - struct pm8001_device *pm8001_ha_dev) -{ - struct pm8001_ccb_info *ccb; - struct sas_task *task; - struct task_abort_req task_abort; - u32 opc = OPC_INB_SATA_ABORT; - int ret; - - pm8001_ha_dev->id |= NCQ_ABORT_ALL_FLAG; - pm8001_ha_dev->id &= ~NCQ_READ_LOG_FLAG; - - task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { - pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n"); - return; - } - - task->task_done = pm8001_task_done; - - ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task); - if (!ccb) { - sas_free_task(task); - return; - } - - memset(&task_abort, 0, sizeof(task_abort)); - task_abort.abort_all = cpu_to_le32(1); - task_abort.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - task_abort.tag = cpu_to_le32(ccb->ccb_tag); - - ret = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &task_abort, - sizeof(task_abort), 0); - if (ret) { - sas_free_task(task); - pm8001_ccb_free(pm8001_ha, ccb); - } -} - -static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, - struct pm8001_device *pm8001_ha_dev) -{ - struct sata_start_req sata_cmd; - int res; - struct pm8001_ccb_info *ccb; - struct sas_task *task = NULL; - struct host_to_dev_fis fis; - struct domain_device *dev; - u32 opc = OPC_INB_SATA_HOST_OPSTART; - - task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { - pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task !!!\n"); - return; - } - task->task_done = pm8001_task_done; - - /* - * Allocate domain device by ourselves as libsas is not going to - * provide any. - */ - dev = kzalloc(sizeof(struct domain_device), GFP_ATOMIC); - if (!dev) { - sas_free_task(task); - pm8001_dbg(pm8001_ha, FAIL, - "Domain device cannot be allocated\n"); - return; - } - task->dev = dev; - task->dev->lldd_dev = pm8001_ha_dev; - - ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task); - if (!ccb) { - sas_free_task(task); - kfree(dev); - return; - } - - pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG; - pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG; - - /* construct read log FIS */ - memset(&fis, 0, sizeof(struct host_to_dev_fis)); - fis.fis_type = 0x27; - fis.flags = 0x80; - fis.command = ATA_CMD_READ_LOG_EXT; - fis.lbal = 0x10; - fis.sector_count = 0x1; - - memset(&sata_cmd, 0, sizeof(sata_cmd)); - sata_cmd.tag = cpu_to_le32(ccb->ccb_tag); - sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9)); - memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); - - res = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd, - sizeof(sata_cmd), 0); - if (res) { - sas_free_task(task); - pm8001_ccb_free(pm8001_ha, ccb); - kfree(dev); - } -} - /** * mpi_ssp_completion- process the event that FW response to the SSP request. * @pm8001_ha: our hba card information @@ -2301,8 +2204,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) return; } - if ((pm8001_dev && !(pm8001_dev->id & NCQ_READ_LOG_FLAG)) - && unlikely(!t || !t->lldd_task || !t->dev)) { + if (pm8001_dev && unlikely(!t || !t->lldd_task || !t->dev)) { pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n"); return; } @@ -2360,15 +2262,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) if (param == 0) { ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_SAM_STAT_GOOD; - /* check if response is for SEND READ LOG */ - if (pm8001_dev && - (pm8001_dev->id & NCQ_READ_LOG_FLAG)) { - pm8001_send_abort_all(pm8001_ha, pm8001_dev); - /* Free the tag */ - pm8001_tag_free(pm8001_ha, tag); - sas_free_task(t); - return; - } } else { u8 len; ts->resp = SAS_TASK_COMPLETE; @@ -2666,9 +2559,10 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) if (event == IO_XFER_ERROR_ABORTED_NCQ_MODE) { /* find device using device id */ pm8001_dev = pm8001_find_dev(pm8001_ha, dev_id); - /* send read log extension */ if (pm8001_dev) - pm8001_send_read_log(pm8001_ha, pm8001_dev); + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_XFER_ERROR_ABORTED_NCQ_MODE); return; } @@ -3649,12 +3543,7 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_ccb_task_free(pm8001_ha, ccb); mb(); - if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) { - sas_free_task(t); - pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG; - } else { - t->task_done(t); - } + t->task_done(t); return 0; } @@ -4206,7 +4095,6 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u64 phys_addr; u32 ATAP = 0x0; u32 dir; - unsigned long flags; u32 opc = OPC_INB_SATA_HOST_OPSTART; memset(&sata_cmd, 0, sizeof(sata_cmd)); @@ -4261,39 +4149,6 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha, sata_cmd.esgl = 0; } - /* Check for read log for failed drive and return */ - if (sata_cmd.sata_fis.command == 0x2f) { - if (((pm8001_ha_dev->id & NCQ_READ_LOG_FLAG) || - (pm8001_ha_dev->id & NCQ_ABORT_ALL_FLAG) || - (pm8001_ha_dev->id & NCQ_2ND_RLE_FLAG))) { - struct task_status_struct *ts; - - pm8001_ha_dev->id &= 0xDFFFFFFF; - ts = &task->task_status; - - spin_lock_irqsave(&task->task_state_lock, flags); - ts->resp = SAS_TASK_COMPLETE; - ts->stat = SAS_SAM_STAT_GOOD; - task->task_state_flags &= ~SAS_TASK_STATE_PENDING; - task->task_state_flags |= SAS_TASK_STATE_DONE; - if (unlikely((task->task_state_flags & - SAS_TASK_STATE_ABORTED))) { - spin_unlock_irqrestore(&task->task_state_lock, - flags); - pm8001_dbg(pm8001_ha, FAIL, - "task 0x%p resp 0x%x stat 0x%x but aborted by upper layer\n", - task, ts->resp, - ts->stat); - pm8001_ccb_task_free(pm8001_ha, ccb); - } else { - spin_unlock_irqrestore(&task->task_state_lock, - flags); - pm8001_ccb_task_free_done(pm8001_ha, ccb); - return 0; - } - } - } - return pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd, sizeof(sata_cmd), 0); } diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index d5ec29f69be3..2d84ae95a1f9 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -687,12 +687,6 @@ int pm8001_dev_found(struct domain_device *dev) return pm8001_dev_found_notify(dev); } -void pm8001_task_done(struct sas_task *task) -{ - del_timer(&task->slow_task->timer); - complete(&task->slow_task->completion); -} - #define PM8001_TASK_TIMEOUT 20 /** diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index b08f52673889..16a753d5e8a7 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -579,10 +579,6 @@ struct pm8001_fw_image_header { #define FLASH_UPDATE_DNLD_NOT_SUPPORTED 0x10 #define FLASH_UPDATE_DISABLED 0x11 -#define NCQ_READ_LOG_FLAG 0x80000000 -#define NCQ_ABORT_ALL_FLAG 0x40000000 -#define NCQ_2ND_RLE_FLAG 0x20000000 - /* Device states */ #define DS_OPERATIONAL 0x01 #define DS_PORT_IN_RESET 0x02 @@ -709,7 +705,6 @@ int pm8001_mpi_fw_flash_update_resp(struct pm8001_hba_info *pm8001_ha, int pm8001_mpi_general_event(struct pm8001_hba_info *pm8001_ha, void *piomb); int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb); struct sas_task *pm8001_alloc_task(void); -void pm8001_task_done(struct sas_task *task); void pm8001_free_task(struct sas_task *task); void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag); struct pm8001_device *pm8001_find_dev(struct pm8001_hba_info *pm8001_ha, diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index dd0e06983cd3..4484c498bcb6 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -1778,113 +1778,6 @@ pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec) pm80xx_chip_intx_interrupt_disable(pm8001_ha); } -static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha, - struct pm8001_device *pm8001_ha_dev) -{ - struct pm8001_ccb_info *ccb; - struct sas_task *task; - struct task_abort_req task_abort; - u32 opc = OPC_INB_SATA_ABORT; - int ret; - - pm8001_ha_dev->id |= NCQ_ABORT_ALL_FLAG; - pm8001_ha_dev->id &= ~NCQ_READ_LOG_FLAG; - - task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { - pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n"); - return; - } - task->task_done = pm8001_task_done; - - ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task); - if (!ccb) { - sas_free_task(task); - return; - } - - memset(&task_abort, 0, sizeof(task_abort)); - task_abort.abort_all = cpu_to_le32(1); - task_abort.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - task_abort.tag = cpu_to_le32(ccb->ccb_tag); - - ret = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &task_abort, - sizeof(task_abort), 0); - pm8001_dbg(pm8001_ha, FAIL, "Executing abort task end\n"); - if (ret) { - sas_free_task(task); - pm8001_ccb_free(pm8001_ha, ccb); - } -} - -static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha, - struct pm8001_device *pm8001_ha_dev) -{ - struct sata_start_req sata_cmd; - int res; - struct pm8001_ccb_info *ccb; - struct sas_task *task = NULL; - struct host_to_dev_fis fis; - struct domain_device *dev; - u32 opc = OPC_INB_SATA_HOST_OPSTART; - - task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { - pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task !!!\n"); - return; - } - task->task_done = pm8001_task_done; - - /* - * Allocate domain device by ourselves as libsas is not going to - * provide any. - */ - dev = kzalloc(sizeof(struct domain_device), GFP_ATOMIC); - if (!dev) { - sas_free_task(task); - pm8001_dbg(pm8001_ha, FAIL, - "Domain device cannot be allocated\n"); - return; - } - - task->dev = dev; - task->dev->lldd_dev = pm8001_ha_dev; - - ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task); - if (!ccb) { - sas_free_task(task); - kfree(dev); - return; - } - - pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG; - pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG; - - memset(&sata_cmd, 0, sizeof(sata_cmd)); - - /* construct read log FIS */ - memset(&fis, 0, sizeof(struct host_to_dev_fis)); - fis.fis_type = 0x27; - fis.flags = 0x80; - fis.command = ATA_CMD_READ_LOG_EXT; - fis.lbal = 0x10; - fis.sector_count = 0x1; - - sata_cmd.tag = cpu_to_le32(ccb->ccb_tag); - sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9))); - memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); - - res = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd, - sizeof(sata_cmd), 0); - pm8001_dbg(pm8001_ha, FAIL, "Executing read log end\n"); - if (res) { - sas_free_task(task); - pm8001_ccb_free(pm8001_ha, ccb); - kfree(dev); - } -} - /** * mpi_ssp_completion - process the event that FW response to the SSP request. * @pm8001_ha: our hba card information @@ -2402,11 +2295,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, return; } - if ((pm8001_dev && !(pm8001_dev->id & NCQ_READ_LOG_FLAG)) - && unlikely(!t || !t->lldd_task || !t->dev)) { - pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n"); + + if (pm8001_dev && unlikely(!t->lldd_task || !t->dev)) return; - } ts = &t->task_status; @@ -2463,15 +2354,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, if (param == 0) { ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_SAM_STAT_GOOD; - /* check if response is for SEND READ LOG */ - if (pm8001_dev && - (pm8001_dev->id & NCQ_READ_LOG_FLAG)) { - pm80xx_send_abort_all(pm8001_ha, pm8001_dev); - /* Free the tag */ - pm8001_tag_free(pm8001_ha, tag); - sas_free_task(t); - return; - } } else { u8 len; ts->resp = SAS_TASK_COMPLETE; @@ -2806,9 +2688,11 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, if (event == IO_XFER_ERROR_ABORTED_NCQ_MODE) { /* find device using device id */ pm8001_dev = pm8001_find_dev(pm8001_ha, dev_id); - /* send read log extension */ + /* send read log extension by aborting the link - libata does what we want */ if (pm8001_dev) - pm80xx_send_read_log(pm8001_ha, pm8001_dev); + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_XFER_ERROR_ABORTED_NCQ_MODE); return; } @@ -4556,7 +4440,6 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u32 end_addr_high, end_addr_low; u32 ATAP = 0x0; u32 dir; - unsigned long flags; u32 opc = OPC_INB_SATA_HOST_OPSTART; memset(&sata_cmd, 0, sizeof(sata_cmd)); @@ -4735,40 +4618,6 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, (task->ata_task.atapi_packet[15] << 24))); } - /* Check for read log for failed drive and return */ - if (sata_cmd.sata_fis.command == 0x2f) { - if (pm8001_ha_dev && ((pm8001_ha_dev->id & NCQ_READ_LOG_FLAG) || - (pm8001_ha_dev->id & NCQ_ABORT_ALL_FLAG) || - (pm8001_ha_dev->id & NCQ_2ND_RLE_FLAG))) { - struct task_status_struct *ts; - - pm8001_ha_dev->id &= 0xDFFFFFFF; - ts = &task->task_status; - - spin_lock_irqsave(&task->task_state_lock, flags); - ts->resp = SAS_TASK_COMPLETE; - ts->stat = SAS_SAM_STAT_GOOD; - task->task_state_flags &= ~SAS_TASK_STATE_PENDING; - task->task_state_flags |= SAS_TASK_STATE_DONE; - if (unlikely((task->task_state_flags & - SAS_TASK_STATE_ABORTED))) { - spin_unlock_irqrestore(&task->task_state_lock, - flags); - pm8001_dbg(pm8001_ha, FAIL, - "task 0x%p resp 0x%x stat 0x%x but aborted by upper layer\n", - task, ts->resp, - ts->stat); - pm8001_ccb_task_free(pm8001_ha, ccb); - return 0; - } else { - spin_unlock_irqrestore(&task->task_state_lock, - flags); - pm8001_ccb_task_free_done(pm8001_ha, ccb); - atomic_dec(&pm8001_ha_dev->running_req); - return 0; - } - } - } trace_pm80xx_request_issue(pm8001_ha->id, ccb->device ? ccb->device->attached_phy : PM8001_MAX_PHYS, ccb->ccb_tag, opc, From 8e8d43642f2f9bbed9e7823c6e5b6fd7c7fbc3dc Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 17 Oct 2022 17:20:34 +0800 Subject: [PATCH 0152/4122] scsi: libsas: Make sas_{alloc, alloc_slow, free}_task() private We have no users outside libsas any longer, so make sas_alloc_task(), sas_alloc_slow_task(), and sas_free_task() private. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-8-git-send-email-john.garry@huawei.com Tested-by: Damien Le Moal Tested-by: Niklas Cassel # pm80xx Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_init.c | 3 --- drivers/scsi/libsas/sas_internal.h | 4 ++++ include/scsi/libsas.h | 4 ---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index e4f77072a58d..f2c05ebeb72f 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -35,7 +35,6 @@ struct sas_task *sas_alloc_task(gfp_t flags) return task; } -EXPORT_SYMBOL_GPL(sas_alloc_task); struct sas_task *sas_alloc_slow_task(gfp_t flags) { @@ -56,7 +55,6 @@ struct sas_task *sas_alloc_slow_task(gfp_t flags) return task; } -EXPORT_SYMBOL_GPL(sas_alloc_slow_task); void sas_free_task(struct sas_task *task) { @@ -65,7 +63,6 @@ void sas_free_task(struct sas_task *task) kmem_cache_free(sas_task_cache, task); } } -EXPORT_SYMBOL_GPL(sas_free_task); /*------------ SAS addr hash -----------*/ void sas_hash_addr(u8 *hashed, const u8 *sas_addr) diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 8d0ad3abc7b5..b54bcf3c9a9d 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -52,6 +52,10 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha); struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy, gfp_t gfp_flags); void sas_free_event(struct asd_sas_event *event); +struct sas_task *sas_alloc_task(gfp_t flags); +struct sas_task *sas_alloc_slow_task(gfp_t flags); +void sas_free_task(struct sas_task *task); + int sas_register_ports(struct sas_ha_struct *sas_ha); void sas_unregister_ports(struct sas_ha_struct *sas_ha); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 2dbead74a2af..f86b56bf7833 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -639,10 +639,6 @@ struct sas_task_slow { #define SAS_TASK_STATE_ABORTED 4 #define SAS_TASK_NEED_DEV_RESET 8 -extern struct sas_task *sas_alloc_task(gfp_t flags); -extern struct sas_task *sas_alloc_slow_task(gfp_t flags); -extern void sas_free_task(struct sas_task *task); - static inline bool sas_is_internal_abort(struct sas_task *task) { return task->task_proto == SAS_PROTOCOL_INTERNAL_ABORT; From cc22efbec0110181725b1f5f6778155a2e352522 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 17 Oct 2022 17:20:35 +0800 Subject: [PATCH 0153/4122] scsi: libsas: Update SATA dev FIS in sas_ata_task_done() In sas_ata_task_done(), for commands which complete with error we set the SATA dev FIS status field with ATA_ERR. In ata_eh_analyze_tf() this would be interpreted as a HSM error. Set ATA_DRDY, which will lead libata to judge as a device error, which is a safer bet. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1665998435-199946-9-git-send-email-john.garry@huawei.com Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 61f64d54e67d..78e6046fb55a 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -139,8 +139,8 @@ static void sas_ata_task_done(struct sas_task *task) qc->flags |= ATA_QCFLAG_FAILED; } - dev->sata_dev.fis[3] = 0x04; /* status err */ - dev->sata_dev.fis[2] = ATA_ERR; + dev->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */ + dev->sata_dev.fis[3] = ATA_ABORTED; /* tf error */ } } From b6da92356cd6106dd9e7e8e168e3b7df4fe37d5d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:11 -0700 Subject: [PATCH 0154/4122] scsi: esas2r: Initialize two host template members implicitly Prepare for removing the 'proc_dir' and 'present' members from the SCSI host template by implicitly initializing 'present' and 'emulated' in 'driver_template'. Reviewed-by: John Garry Cc: Bradley Grove Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/esas2r/esas2r_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c index 7a4eadad23d7..27f6e7ccded8 100644 --- a/drivers/scsi/esas2r/esas2r_main.c +++ b/drivers/scsi/esas2r/esas2r_main.c @@ -248,8 +248,6 @@ static struct scsi_host_template driver_template = { .sg_tablesize = SG_CHUNK_SIZE, .cmd_per_lun = ESAS2R_DEFAULT_CMD_PER_LUN, - .present = 0, - .emulated = 0, .proc_name = ESAS2R_DRVR_NAME, .change_queue_depth = scsi_change_queue_depth, .max_sectors = 0xFFFF, From 77916da7e4a0975bd2b93e5214295e3318886cdb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:12 -0700 Subject: [PATCH 0155/4122] scsi: esas2r: Introduce scsi_template_proc_dir() Prepare for removing the 'proc_dir' and 'present' members from the SCSI host template. This commit does not change any functionality. Reviewed-by: John Garry Cc: Bradley Grove Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/esas2r/esas2r_main.c | 17 +++++++++++------ drivers/scsi/scsi_proc.c | 11 +++++++++++ include/scsi/scsi_host.h | 6 ++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c index 27f6e7ccded8..d7a2c49ff5ee 100644 --- a/drivers/scsi/esas2r/esas2r_main.c +++ b/drivers/scsi/esas2r/esas2r_main.c @@ -635,10 +635,13 @@ static void __exit esas2r_exit(void) esas2r_log(ESAS2R_LOG_INFO, "%s called", __func__); if (esas2r_proc_major > 0) { + struct proc_dir_entry *proc_dir; + esas2r_log(ESAS2R_LOG_INFO, "unregister proc"); - remove_proc_entry(ATTONODE_NAME, - esas2r_proc_host->hostt->proc_dir); + proc_dir = scsi_template_proc_dir(esas2r_proc_host->hostt); + if (proc_dir) + remove_proc_entry(ATTONODE_NAME, proc_dir); unregister_chrdev(esas2r_proc_major, ESAS2R_DRVR_NAME); esas2r_proc_major = 0; @@ -728,11 +731,13 @@ const char *esas2r_info(struct Scsi_Host *sh) esas2r_proc_major); if (esas2r_proc_major > 0) { - struct proc_dir_entry *pde; + struct proc_dir_entry *proc_dir; + struct proc_dir_entry *pde = NULL; - pde = proc_create(ATTONODE_NAME, 0, - sh->hostt->proc_dir, - &esas2r_proc_ops); + proc_dir = scsi_template_proc_dir(sh->hostt); + if (proc_dir) + pde = proc_create(ATTONODE_NAME, 0, proc_dir, + &esas2r_proc_ops); if (!pde) { esas2r_log_dev(ESAS2R_LOG_WARN, diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 95aee1ad1383..456b43097288 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -83,6 +83,17 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file) 4 * PAGE_SIZE); } +/** + * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template + * @sht: SCSI host template pointer. + */ +struct proc_dir_entry * +scsi_template_proc_dir(const struct scsi_host_template *sht) +{ + return sht->proc_dir; +} +EXPORT_SYMBOL_GPL(scsi_template_proc_dir); + static const struct proc_ops proc_scsi_ops = { .proc_open = proc_scsi_host_open, .proc_release = single_release, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index fcf25f1642a3..3854ffcb0b3e 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -751,6 +751,12 @@ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int); extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); +#if defined(CONFIG_SCSI_PROC_FS) +struct proc_dir_entry * +scsi_template_proc_dir(const struct scsi_host_template *sht); +#else +#define scsi_template_proc_dir(sht) NULL +#endif extern void scsi_scan_host(struct Scsi_Host *); extern void scsi_rescan_device(struct device *); extern void scsi_remove_host(struct Scsi_Host *); From ecca3f9b16366e601a6748bf31e9fe227812248f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:13 -0700 Subject: [PATCH 0156/4122] scsi: core: Fail host creation if creating the proc directory fails Users expect that the contents of /proc/scsi is in sync with the contents of /sys/class/scsi_host. Hence fail host creation if creating the proc directory fails. Suggested-by: John Garry Reviewed-by: John Garry Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-4-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 ++- drivers/scsi/scsi_priv.h | 4 ++-- drivers/scsi/scsi_proc.c | 13 +++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 9857dba09c95..12346e2297fd 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -519,7 +519,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) "failed to create tmf workq\n"); goto fail; } - scsi_proc_hostdir_add(shost->hostt); + if (scsi_proc_hostdir_add(shost->hostt) < 0) + goto fail; return shost; fail: /* diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index c52de9a973e4..494f48e03e90 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -111,14 +111,14 @@ extern void scsi_evt_thread(struct work_struct *work); /* scsi_proc.c */ #ifdef CONFIG_SCSI_PROC_FS -extern void scsi_proc_hostdir_add(struct scsi_host_template *); +extern int scsi_proc_hostdir_add(struct scsi_host_template *); extern void scsi_proc_hostdir_rm(struct scsi_host_template *); extern void scsi_proc_host_add(struct Scsi_Host *); extern void scsi_proc_host_rm(struct Scsi_Host *); extern int scsi_init_procfs(void); extern void scsi_exit_procfs(void); #else -# define scsi_proc_hostdir_add(sht) do { } while (0) +# define scsi_proc_hostdir_add(sht) 0 # define scsi_proc_hostdir_rm(sht) do { } while (0) # define scsi_proc_host_add(shost) do { } while (0) # define scsi_proc_host_rm(shost) do { } while (0) diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 456b43097288..1b09cea2a752 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -108,20 +108,25 @@ static const struct proc_ops proc_scsi_ops = { * * Sets sht->proc_dir to the new directory. */ - -void scsi_proc_hostdir_add(struct scsi_host_template *sht) +int scsi_proc_hostdir_add(struct scsi_host_template *sht) { + int ret = 0; + if (!sht->show_info) - return; + return 0; mutex_lock(&global_host_template_mutex); if (!sht->present++) { sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); - if (!sht->proc_dir) + if (!sht->proc_dir) { printk(KERN_ERR "%s: proc_mkdir failed for %s\n", __func__, sht->proc_name); + ret = -ENOMEM; + } } mutex_unlock(&global_host_template_mutex); + + return ret; } /** From 036abd6140078b4125f60e731f28e15de708f87d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:14 -0700 Subject: [PATCH 0157/4122] scsi: core: Introduce a new list for SCSI proc directory entries Instead of using scsi_host_template members to track the SCSI proc directory entries, track these entries in a list. This changes the time needed for looking up the proc dir pointer from O(1) into O(n). This is considered acceptable since the number of SCSI host adapter types per host is usually small (less than ten). This change has been tested by attaching two USB storage devices to a qemu host: $ grep -aH . /proc/scsi/usb-storage/* /proc/scsi/usb-storage/7: Host scsi7: usb-storage /proc/scsi/usb-storage/7: Vendor: QEMU /proc/scsi/usb-storage/7: Product: QEMU USB HARDDRIVE /proc/scsi/usb-storage/7:Serial Number: 1-0000:00:02.1:00.0-6 /proc/scsi/usb-storage/7: Protocol: Transparent SCSI /proc/scsi/usb-storage/7: Transport: Bulk /proc/scsi/usb-storage/7: Quirks: SANE_SENSE /proc/scsi/usb-storage/8: Host scsi8: usb-storage /proc/scsi/usb-storage/8: Vendor: QEMU /proc/scsi/usb-storage/8: Product: QEMU USB HARDDRIVE /proc/scsi/usb-storage/8:Serial Number: 1-0000:00:02.1:00.0-7 /proc/scsi/usb-storage/8: Protocol: Transparent SCSI /proc/scsi/usb-storage/8: Transport: Bulk /proc/scsi/usb-storage/8: Quirks: SANE_SENSE This commit prepares for constifying most SCSI host templates. Reviewed-by: John Garry Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_priv.h | 4 +- drivers/scsi/scsi_proc.c | 121 ++++++++++++++++++++++++++++++++------- include/scsi/scsi_host.h | 12 ---- 3 files changed, 102 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 494f48e03e90..96284a0e13fe 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -111,8 +111,8 @@ extern void scsi_evt_thread(struct work_struct *work); /* scsi_proc.c */ #ifdef CONFIG_SCSI_PROC_FS -extern int scsi_proc_hostdir_add(struct scsi_host_template *); -extern void scsi_proc_hostdir_rm(struct scsi_host_template *); +extern int scsi_proc_hostdir_add(const struct scsi_host_template *); +extern void scsi_proc_hostdir_rm(const struct scsi_host_template *); extern void scsi_proc_host_add(struct Scsi_Host *); extern void scsi_proc_host_rm(struct Scsi_Host *); extern int scsi_init_procfs(void); diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 1b09cea2a752..4a6eb1741be0 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -43,8 +43,23 @@ static struct proc_dir_entry *proc_scsi; -/* Protect sht->present and sht->proc_dir */ +/* Protects scsi_proc_list */ static DEFINE_MUTEX(global_host_template_mutex); +static LIST_HEAD(scsi_proc_list); + +/** + * struct scsi_proc_entry - (host template, SCSI proc dir) association + * @entry: entry in scsi_proc_list. + * @sht: SCSI host template associated with the procfs directory. + * @proc_dir: procfs directory associated with the SCSI host template. + * @present: Number of SCSI hosts instantiated for @sht. + */ +struct scsi_proc_entry { + struct list_head entry; + const struct scsi_host_template *sht; + struct proc_dir_entry *proc_dir; + unsigned int present; +}; static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -83,6 +98,32 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file) 4 * PAGE_SIZE); } +static struct scsi_proc_entry * +__scsi_lookup_proc_entry(const struct scsi_host_template *sht) +{ + struct scsi_proc_entry *e; + + lockdep_assert_held(&global_host_template_mutex); + + list_for_each_entry(e, &scsi_proc_list, entry) + if (e->sht == sht) + return e; + + return NULL; +} + +static struct scsi_proc_entry * +scsi_lookup_proc_entry(const struct scsi_host_template *sht) +{ + struct scsi_proc_entry *e; + + mutex_lock(&global_host_template_mutex); + e = __scsi_lookup_proc_entry(sht); + mutex_unlock(&global_host_template_mutex); + + return e; +} + /** * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template * @sht: SCSI host template pointer. @@ -90,7 +131,9 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file) struct proc_dir_entry * scsi_template_proc_dir(const struct scsi_host_template *sht) { - return sht->proc_dir; + struct scsi_proc_entry *e = scsi_lookup_proc_entry(sht); + + return e ? e->proc_dir : NULL; } EXPORT_SYMBOL_GPL(scsi_template_proc_dir); @@ -108,24 +151,41 @@ static const struct proc_ops proc_scsi_ops = { * * Sets sht->proc_dir to the new directory. */ -int scsi_proc_hostdir_add(struct scsi_host_template *sht) +int scsi_proc_hostdir_add(const struct scsi_host_template *sht) { - int ret = 0; + struct scsi_proc_entry *e; + int ret; if (!sht->show_info) return 0; mutex_lock(&global_host_template_mutex); - if (!sht->present++) { - sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); - if (!sht->proc_dir) { - printk(KERN_ERR "%s: proc_mkdir failed for %s\n", - __func__, sht->proc_name); + e = __scsi_lookup_proc_entry(sht); + if (!e) { + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { ret = -ENOMEM; + goto unlock; } } + if (e->present++) + goto success; + e->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); + if (!e->proc_dir) { + printk(KERN_ERR "%s: proc_mkdir failed for %s\n", __func__, + sht->proc_name); + ret = -ENOMEM; + goto unlock; + } + e->sht = sht; + list_add_tail(&e->entry, &scsi_proc_list); +success: + e = NULL; + ret = 0; +unlock: mutex_unlock(&global_host_template_mutex); + kfree(e); return ret; } @@ -133,15 +193,19 @@ int scsi_proc_hostdir_add(struct scsi_host_template *sht) * scsi_proc_hostdir_rm - remove directory in /proc for a scsi host * @sht: owner of directory */ -void scsi_proc_hostdir_rm(struct scsi_host_template *sht) +void scsi_proc_hostdir_rm(const struct scsi_host_template *sht) { + struct scsi_proc_entry *e; + if (!sht->show_info) return; mutex_lock(&global_host_template_mutex); - if (!--sht->present && sht->proc_dir) { + e = __scsi_lookup_proc_entry(sht); + if (e && !--e->present) { remove_proc_entry(sht->proc_name, proc_scsi); - sht->proc_dir = NULL; + list_del(&e->entry); + kfree(e); } mutex_unlock(&global_host_template_mutex); } @@ -153,20 +217,29 @@ void scsi_proc_hostdir_rm(struct scsi_host_template *sht) */ void scsi_proc_host_add(struct Scsi_Host *shost) { - struct scsi_host_template *sht = shost->hostt; + const struct scsi_host_template *sht = shost->hostt; + struct scsi_proc_entry *e; struct proc_dir_entry *p; char name[10]; - if (!sht->proc_dir) + if (!sht->show_info) return; + e = scsi_lookup_proc_entry(sht); + if (!e) + goto err; + sprintf(name,"%d", shost->host_no); - p = proc_create_data(name, S_IRUGO | S_IWUSR, - sht->proc_dir, &proc_scsi_ops, shost); + p = proc_create_data(name, S_IRUGO | S_IWUSR, e->proc_dir, + &proc_scsi_ops, shost); if (!p) - printk(KERN_ERR "%s: Failed to register host %d in" - "%s\n", __func__, shost->host_no, - sht->proc_name); + goto err; + return; + +err: + shost_printk(KERN_ERR, shost, + "%s: Failed to register host (%s failed)\n", __func__, + e ? "proc_create_data()" : "scsi_proc_hostdir_add()"); } /** @@ -175,13 +248,19 @@ void scsi_proc_host_add(struct Scsi_Host *shost) */ void scsi_proc_host_rm(struct Scsi_Host *shost) { + const struct scsi_host_template *sht = shost->hostt; + struct scsi_proc_entry *e; char name[10]; - if (!shost->hostt->proc_dir) + if (!sht->show_info) + return; + + e = scsi_lookup_proc_entry(sht); + if (!e) return; sprintf(name,"%d", shost->host_no); - remove_proc_entry(name, shost->hostt->proc_dir); + remove_proc_entry(name, e->proc_dir); } /** * proc_print_scsidevice - return data about this host diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 3854ffcb0b3e..e71436183c0d 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -357,12 +357,6 @@ struct scsi_host_template { */ const char *proc_name; - /* - * Used to store the procfs directory if a driver implements the - * show_info method. - */ - struct proc_dir_entry *proc_dir; - /* * This determines if we will use a non-interrupt driven * or an interrupt driven scheme. It is set to the maximum number @@ -423,12 +417,6 @@ struct scsi_host_template { */ short cmd_per_lun; - /* - * present contains counter indicating how many boards of this - * type were found when we did the scan. - */ - unsigned char present; - /* If use block layer to manage tags, this is tag allocation policy */ int tag_alloc_policy; From d460f624059266c2e7f0280bdd3ae806d4b75211 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:15 -0700 Subject: [PATCH 0158/4122] scsi: core: Rework scsi_single_lun_run() Use __starget_for_each_device() instead of open-coding starget_for_each_device(). Run the queues asynchronously instead of synchronously. This commit removes code that calls scsi_device_put() from atomic context. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-6-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8b89fab7c420..fa96d3cfdfa3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -307,6 +307,18 @@ static void scsi_kick_queue(struct request_queue *q) blk_mq_run_hw_queues(q, false); } +/* + * Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with + * interrupts disabled. + */ +static void scsi_kick_sdev_queue(struct scsi_device *sdev, void *data) +{ + struct scsi_device *current_sdev = data; + + if (sdev != current_sdev) + blk_mq_run_hw_queues(sdev->request_queue, true); +} + /* * Called for single_lun devices on IO completion. Clear starget_sdev_user, * and call blk_run_queue for all the scsi_devices on the target - @@ -317,7 +329,6 @@ static void scsi_kick_queue(struct request_queue *q) static void scsi_single_lun_run(struct scsi_device *current_sdev) { struct Scsi_Host *shost = current_sdev->host; - struct scsi_device *sdev, *tmp; struct scsi_target *starget = scsi_target(current_sdev); unsigned long flags; @@ -334,22 +345,9 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev) scsi_kick_queue(current_sdev->request_queue); spin_lock_irqsave(shost->host_lock, flags); - if (starget->starget_sdev_user) - goto out; - list_for_each_entry_safe(sdev, tmp, &starget->devices, - same_target_siblings) { - if (sdev == current_sdev) - continue; - if (scsi_device_get(sdev)) - continue; - - spin_unlock_irqrestore(shost->host_lock, flags); - scsi_kick_queue(sdev->request_queue); - spin_lock_irqsave(shost->host_lock, flags); - - scsi_device_put(sdev); - } - out: + if (!starget->starget_sdev_user) + __starget_for_each_device(starget, current_sdev, + scsi_kick_sdev_queue); spin_unlock_irqrestore(shost->host_lock, flags); } From 6d1aa3b0589bdd17a46ed74fbd2c2d0fc59038ff Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:16 -0700 Subject: [PATCH 0159/4122] scsi: ufs: Simplify ufshcd_set_dev_pwr_mode() Simplify the code for incrementing the SCSI device reference count in ufshcd_set_dev_pwr_mode(). This commit removes one scsi_device_put() call that happens from atomic context. Reviewed-by: Adrian Hunter Cc: Avri Altman Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-7-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 7256e6c43ca6..c8f0fe740005 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8752,15 +8752,10 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->ufs_device_wlun; - if (sdp) { + if (sdp && scsi_device_online(sdp)) ret = scsi_device_get(sdp); - if (!ret && !scsi_device_online(sdp)) { - ret = -ENODEV; - scsi_device_put(sdp); - } - } else { + else ret = -ENODEV; - } spin_unlock_irqrestore(hba->host->host_lock, flags); if (ret) From 195fae206ef20a29b09f281b6db8ea30fafaa908 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:17 -0700 Subject: [PATCH 0160/4122] scsi: core: Remove the put_device() call from scsi_device_get() scsi_device_get() may be called from atomic context, e.g. by shost_for_each_device(). A later commit will allow put_device() to sleep for SCSI devices. Hence remove the put_device() call from scsi_device_get(). According to Rusty Russell's "Module Refcount and Stuff mini-FAQ", calling module_put() from atomic context is allowed since considerable time. See also https://lkml.org/lkml/2002/11/18/330. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-8-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index c59eac7a32f2..9feb0323bc44 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -563,14 +563,14 @@ int scsi_device_get(struct scsi_device *sdev) { if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL) goto fail; - if (!get_device(&sdev->sdev_gendev)) - goto fail; if (!try_module_get(sdev->host->hostt->module)) - goto fail_put_device; + goto fail; + if (!get_device(&sdev->sdev_gendev)) + goto fail_put_module; return 0; -fail_put_device: - put_device(&sdev->sdev_gendev); +fail_put_module: + module_put(sdev->host->hostt->module); fail: return -ENXIO; } From f93ed747e2c7e6bfbf309291879b33b0d0231a7d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Oct 2022 17:24:18 -0700 Subject: [PATCH 0161/4122] scsi: core: Release SCSI devices synchronously All upstream scsi_device_put() calls happen from thread context. Hence simplify scsi_device_put() by always calling the release function synchronously. This commit prepares for constifying the SCSI host template by removing an assignment that clears the module pointer in the SCSI host template. scsi_device_dev_release_usercontext() was introduced in 2006 via commit 65110b216895 ("[SCSI] fix wrong context bugs in SCSI"). Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Cc: Krzysztof Kozlowski Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221015002418.30955-9-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 2 ++ drivers/scsi/scsi_sysfs.c | 22 ++-------------------- include/scsi/scsi_device.h | 1 - 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 9feb0323bc44..1426b9b03612 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -588,6 +588,8 @@ void scsi_device_put(struct scsi_device *sdev) { struct module *mod = sdev->host->hostt->module; + might_sleep(); + put_device(&sdev->sdev_gendev); module_put(mod); } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index c95177ca6ed2..f2a345cc0f8a 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -441,20 +441,15 @@ static void scsi_device_cls_release(struct device *class_dev) put_device(&sdev->sdev_gendev); } -static void scsi_device_dev_release_usercontext(struct work_struct *work) +static void scsi_device_dev_release(struct device *dev) { - struct scsi_device *sdev; + struct scsi_device *sdev = to_scsi_device(dev); struct device *parent; struct list_head *this, *tmp; struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL; unsigned long flags; - struct module *mod; - - sdev = container_of(work, struct scsi_device, ew.work); - - mod = sdev->host->hostt->module; scsi_dh_release_device(sdev); @@ -518,19 +513,6 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) if (parent) put_device(parent); - module_put(mod); -} - -static void scsi_device_dev_release(struct device *dev) -{ - struct scsi_device *sdp = to_scsi_device(dev); - - /* Set module pointer as NULL in case of module unloading */ - if (!try_module_get(sdp->host->hostt->module)) - sdp->host->hostt->module = NULL; - - execute_in_process_context(scsi_device_dev_release_usercontext, - &sdp->ew); } static struct class sdev_class = { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c36656d8ac6c..24bdbf7999ab 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -236,7 +236,6 @@ struct scsi_device { struct device sdev_gendev, sdev_dev; - struct execute_work ew; /* used to get process context on put */ struct work_struct requeue_work; struct scsi_device_handler *handler; From 773792e4e704ca1c47e3d9bc6ed5be2a00a22ad5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:23 +0800 Subject: [PATCH 0162/4122] scsi: libsas: Introduce SAS address comparison helpers SAS address comparison is widely used in libsas. However they are all opencoded and to avoid the line spill over 80 columns, are mostly split into multi-lines. Introduce some helpers to prepare for some refactoring. Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-2-yanaijie@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_internal.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index b54bcf3c9a9d..6cf190ade35e 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -115,6 +115,23 @@ static inline void sas_smp_host_handler(struct bsg_job *job, } #endif +static inline bool sas_phy_match_dev_addr(struct domain_device *dev, + struct ex_phy *phy) +{ + return SAS_ADDR(dev->sas_addr) == SAS_ADDR(phy->attached_sas_addr); +} + +static inline bool sas_phy_match_port_addr(struct asd_sas_port *port, + struct ex_phy *phy) +{ + return SAS_ADDR(port->sas_addr) == SAS_ADDR(phy->attached_sas_addr); +} + +static inline bool sas_phy_addr_match(struct ex_phy *p1, struct ex_phy *p2) +{ + return SAS_ADDR(p1->attached_sas_addr) == SAS_ADDR(p2->attached_sas_addr); +} + static inline void sas_fail_probe(struct domain_device *dev, const char *func, int err) { pr_warn("%s: for %s device %016llx returned %d\n", From 2d08f329a4f2eace6b041d60132f441fc8e0b616 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:24 +0800 Subject: [PATCH 0163/4122] scsi: libsas: Introduce sas_find_attached_phy_id() helper LLDDs are all implementing their own attached phy ID finding code. Factor it out to libsas. Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-3-yanaijie@huawei.com Reviewed-by: Jack Wang Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 16 ++++++++++++++++ include/scsi/libsas.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 5ce251830104..7ffb42946335 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2107,6 +2107,22 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) return res; } +int sas_find_attached_phy_id(struct expander_device *ex_dev, + struct domain_device *dev) +{ + struct ex_phy *phy; + int phy_id; + + for (phy_id = 0; phy_id < ex_dev->num_phys; phy_id++) { + phy = &ex_dev->ex_phy[phy_id]; + if (sas_phy_match_dev_addr(dev, phy)) + return phy_id; + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(sas_find_attached_phy_id); + void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, struct sas_rphy *rphy) { diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f86b56bf7833..ec6c9ecd8d12 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -746,6 +746,8 @@ int sas_clear_task_set(struct domain_device *dev, u8 *lun); int sas_lu_reset(struct domain_device *dev, u8 *lun); int sas_query_task(struct sas_task *task, u16 tag); int sas_abort_task(struct sas_task *task, u16 tag); +int sas_find_attached_phy_id(struct expander_device *ex_dev, + struct domain_device *dev); void sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event, gfp_t gfp_flags); From ec64858657a8c393e2ae956d37c23bf94aee8200 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:25 +0800 Subject: [PATCH 0164/4122] scsi: pm8001: Use sas_find_attached_phy_id() instead of open coding it The attached phy id finding is open coded. Replace it with sas_find_attached_phy_id(). To keep things consistent, the return value of pm8001_dev_found_notify() is also changed to -ENODEV after calling sas_find_attathed_phy_id() failed. Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-4-yanaijie@huawei.com Reviewed-by: Jack Wang Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 2d84ae95a1f9..51230b827149 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -645,22 +645,16 @@ static int pm8001_dev_found_notify(struct domain_device *dev) pm8001_device->dcompletion = &completion; if (parent_dev && dev_is_expander(parent_dev->dev_type)) { int phy_id; - struct ex_phy *phy; - for (phy_id = 0; phy_id < parent_dev->ex_dev.num_phys; - phy_id++) { - phy = &parent_dev->ex_dev.ex_phy[phy_id]; - if (SAS_ADDR(phy->attached_sas_addr) - == SAS_ADDR(dev->sas_addr)) { - pm8001_device->attached_phy = phy_id; - break; - } - } - if (phy_id == parent_dev->ex_dev.num_phys) { + + phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev); + if (phy_id < 0) { pm8001_dbg(pm8001_ha, FAIL, "Error: no attached dev:%016llx at ex:%016llx.\n", SAS_ADDR(dev->sas_addr), SAS_ADDR(parent_dev->sas_addr)); - res = -1; + res = phy_id; + } else { + pm8001_device->attached_phy = phy_id; } } else { if (dev->dev_type == SAS_SATA_DEV) { From 178c39d94ac2cf9524ff797d90dcdf96b110fb27 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:26 +0800 Subject: [PATCH 0165/4122] scsi: mvsas: Use sas_find_attached_phy_id() instead of open coding it The attached phy finding is open coded. Replace it with sas_find_attached_phy_id(). To keep things consistent, the return value of mvs_dev_found_notify() is also changed to -ENODEV after calling sas_find_attathed_phy_id() failed. Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-5-yanaijie@huawei.com Reviewed-by: Jack Wang Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_sas.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index a6867dae0e7c..bf7d4995b257 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -1190,23 +1190,16 @@ static int mvs_dev_found_notify(struct domain_device *dev, int lock) mvi_device->sas_device = dev; if (parent_dev && dev_is_expander(parent_dev->dev_type)) { int phy_id; - u8 phy_num = parent_dev->ex_dev.num_phys; - struct ex_phy *phy; - for (phy_id = 0; phy_id < phy_num; phy_id++) { - phy = &parent_dev->ex_dev.ex_phy[phy_id]; - if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(dev->sas_addr)) { - mvi_device->attached_phy = phy_id; - break; - } - } - if (phy_id == phy_num) { + phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev); + if (phy_id < 0) { mv_printk("Error: no attached dev:%016llx" "at ex:%016llx.\n", SAS_ADDR(dev->sas_addr), SAS_ADDR(parent_dev->sas_addr)); - res = -1; + res = phy_id; + } else { + mvi_device->attached_phy = phy_id; } } From f0ed7bd5d9137b8e736e44ce353620ec19ee6242 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:27 +0800 Subject: [PATCH 0166/4122] scsi: hisi_sas: Use sas_find_attathed_phy_id() instead of open coding it The attached phy finding is open coded. Replace it with sas_find_attached_phy_id(). To keep things consistent, the return value of hisi_sas_dev_found() is also changed to -ENODEV after calling sas_find_attathed_phy_id() failed. Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-6-yanaijie@huawei.com Reviewed-by: Jack Wang Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Acked-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 4c37ae9eb6b6..10813836a728 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -792,22 +792,14 @@ static int hisi_sas_dev_found(struct domain_device *device) if (parent_dev && dev_is_expander(parent_dev->dev_type)) { int phy_no; - u8 phy_num = parent_dev->ex_dev.num_phys; - struct ex_phy *phy; - for (phy_no = 0; phy_no < phy_num; phy_no++) { - phy = &parent_dev->ex_dev.ex_phy[phy_no]; - if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(device->sas_addr)) - break; - } - - if (phy_no == phy_num) { + phy_no = sas_find_attached_phy_id(&parent_dev->ex_dev, device); + if (phy_no < 0) { dev_info(dev, "dev found: no attached " "dev:%016llx at ex:%016llx\n", SAS_ADDR(device->sas_addr), SAS_ADDR(parent_dev->sas_addr)); - rc = -EINVAL; + rc = phy_no; goto err_out; } } From ad74d1dadbe9fc5ff7f80796f7cac0f126a5ea74 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:28 +0800 Subject: [PATCH 0167/4122] scsi: libsas: Use sas_phy_match_dev_addr() instead of open coding it The SAS address comparison of domain device and expander phy is open coded. Replace it with sas_phy_match_dev_addr(). Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-7-yanaijie@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 7ffb42946335..9d6330c55cbf 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -738,9 +738,7 @@ static void sas_ex_get_linkrate(struct domain_device *parent, phy->phy_state == PHY_NOT_PRESENT) continue; - if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(child->sas_addr)) { - + if (sas_phy_match_dev_addr(child, phy)) { child->min_linkrate = min(parent->min_linkrate, phy->linkrate); child->max_linkrate = max(parent->max_linkrate, @@ -1012,8 +1010,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) sas_add_parent_port(dev, phy_id); return 0; } - if (dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) == - SAS_ADDR(dev->parent->sas_addr))) { + if (dev->parent && sas_phy_match_dev_addr(dev->parent, ex_phy)) { sas_add_parent_port(dev, phy_id); if (ex_phy->routing_attr == TABLE_ROUTING) sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1); @@ -1312,7 +1309,7 @@ static int sas_check_parent_topology(struct domain_device *child) parent_phy->phy_state == PHY_NOT_PRESENT) continue; - if (SAS_ADDR(parent_phy->attached_sas_addr) != SAS_ADDR(child->sas_addr)) + if (!sas_phy_match_dev_addr(child, parent_phy)) continue; child_phy = &child_ex->ex_phy[parent_phy->attached_phy_id]; @@ -1522,8 +1519,7 @@ static int sas_configure_parent(struct domain_device *parent, struct ex_phy *phy = &ex_parent->ex_phy[i]; if ((phy->routing_attr == TABLE_ROUTING) && - (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(child->sas_addr))) { + sas_phy_match_dev_addr(child, phy)) { res = sas_configure_phy(parent, i, sas_addr, include); if (res) return res; @@ -1858,8 +1854,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, if (last) { list_for_each_entry_safe(child, n, &ex_dev->children, siblings) { - if (SAS_ADDR(child->sas_addr) == - SAS_ADDR(phy->attached_sas_addr)) { + if (sas_phy_match_dev_addr(child, phy)) { set_bit(SAS_DEV_GONE, &child->state); if (dev_is_expander(child->dev_type)) sas_unregister_ex_tree(parent->port, child); @@ -1941,8 +1936,7 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) if (res) return res; list_for_each_entry(child, &dev->ex_dev.children, siblings) { - if (SAS_ADDR(child->sas_addr) == - SAS_ADDR(ex_phy->attached_sas_addr)) { + if (sas_phy_match_dev_addr(child, ex_phy)) { if (dev_is_expander(child->dev_type)) res = sas_discover_bfs_by_root(child); break; From bfa22905f3865469479f028770a352126ad0d2e8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:29 +0800 Subject: [PATCH 0168/4122] scsi: libsas: Use sas_phy_addr_match() instead of open coding it The SAS address comparison of expander phys is open coded. Replace it with sas_phy_addr_match(). Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-8-yanaijie@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 9d6330c55cbf..caa0b2286733 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2058,8 +2058,7 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) if (i == phy_id) continue; - if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(changed_phy->attached_sas_addr)) { + if (sas_phy_addr_match(phy, changed_phy)) { last = false; break; } From 868a8824838f1f0d781e838fa36dbb2de6bc7fdd Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Sep 2022 15:01:30 +0800 Subject: [PATCH 0169/4122] scsi: libsas: Use sas_phy_match_port_addr() instead of open coding it The SAS address comparison of asd_sas_port and expander phy is open coded. Replace it with sas_phy_match_port_addr(). Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20220928070130.3657183-9-yanaijie@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index caa0b2286733..2907ca5d0ed4 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1005,8 +1005,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) } /* Parent and domain coherency */ - if (!dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) == - SAS_ADDR(dev->port->sas_addr))) { + if (!dev->parent && sas_phy_match_port_addr(dev->port, ex_phy)) { sas_add_parent_port(dev, phy_id); return 0; } From ce3a0a29fb9f36d1cd221fffa64a3c405308868f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 8 Oct 2022 22:33:53 +0300 Subject: [PATCH 0170/4122] gpio: merrifield: Use str_enable_disable() helper Use str_enable_disable() helper instead of open coding the same. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-merrifield.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 72ac09a59702..92ea8411050d 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -14,6 +14,7 @@ #include #include #include +#include #define GCCR 0x000 /* controller configuration */ #define GPLR 0x004 /* pin level r/o */ @@ -331,7 +332,7 @@ static int mrfld_irq_set_wake(struct irq_data *d, unsigned int on) raw_spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(priv->dev, "%sable wake for gpio %u\n", on ? "en" : "dis", gpio); + dev_dbg(priv->dev, "%s wake for gpio %u\n", str_enable_disable(on), gpio); return 0; } From 26312973bfbc1db24b157797776ee2b5b48f5c50 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Thu, 6 Oct 2022 12:14:56 -0400 Subject: [PATCH 0171/4122] IB/uverbs: fix the typo of optional Fix the typo of optional in the function of UVERBS_HANDLER. Signed-off-by: Deming Wang Link: https://lore.kernel.org/r/20221006161456.2998-1-wangdeming@inspur.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index dd1075466f61..7b4773fa4bc0 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( UVERBS_ATTR_CREATE_QP_SRQ_HANDLE)) return -EINVAL; - /* send_cq is optinal */ + /* send_cq is optional */ if (cap.max_send_wr) { send_cq = uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE); From 53c2d5b14a82f6e7f0f8089083972df20e66a354 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Sat, 1 Oct 2022 10:00:45 +0800 Subject: [PATCH 0172/4122] RDMA/core: return -EOPNOSUPP for ODP unsupported device ib_reg_mr(3) which is used to register a MR with specific access flags for specific HCA will set errno when something go wrong. So, here we should return the specific -EOPNOTSUPP when the being requested ODP access flag is unsupported by the HCA(such as RXE). Signed-off-by: Li Zhijian Link: https://lore.kernel.org/r/20221001020045.8324-1-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 975d6e9efbcb..a1f4d53a4bb6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4334,7 +4334,7 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev, if (flags & IB_ACCESS_ON_DEMAND && !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) - return -EINVAL; + return -EOPNOTSUPP; return 0; } From 7ac7bfe746d8faddbd79abed526ee67f46d8867c Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Sun, 9 Oct 2022 16:10:47 +0800 Subject: [PATCH 0173/4122] RDMA/opa_vnic: fix spelling typo in comment Fix spelling typo in comment. Reported-by: k2ci Signed-off-by: Jiangshan Yi Link: https://lore.kernel.org/r/20221009081047.2643471-1-13667453960@163.com Signed-off-by: Leon Romanovsky --- include/rdma/opa_vnic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index f3d5377b217a..d297f084001a 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev) return oparn->dev_priv; } -/* opa_vnic skb meta data structrue */ +/* opa_vnic skb meta data structure */ struct opa_vnic_skb_mdata { u8 vl; u8 entropy; From acc7d94ab431fb6f34e41588f2784410c2d9008e Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Sun, 16 Oct 2022 12:38:31 +0300 Subject: [PATCH 0174/4122] IB/iser: open code iser_conn_state_comp_exch There is a single caller to iser_conn_state_comp_exch. Open code its logic and remove it. Acked-by: Max Gurtovoy Signed-off-by: Sergey Gorenko Link: https://lore.kernel.org/r/20221016093833.12537-2-mgurtovoy@nvidia.com Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/iser/iser_verbs.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a00ca117303a..a73c30230ff9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -347,22 +347,6 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -/* - * Called with state mutex held - */ -static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, - enum iser_conn_state comp, - enum iser_conn_state exch) -{ - int ret; - - ret = (iser_conn->state == comp); - if (ret) - iser_conn->state = exch; - - return ret; -} - void iser_release_work(struct work_struct *work) { struct iser_conn *iser_conn; @@ -465,10 +449,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn) int err = 0; /* terminate the iser conn only if the conn state is UP */ - if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) + if (iser_conn->state != ISER_CONN_UP) return 0; + iser_conn->state = ISER_CONN_TERMINATING; iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state); /* suspend queuing of new iscsi commands */ From a75243ae08d23272235cb26117464843538936b4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 16 Oct 2022 12:38:32 +0300 Subject: [PATCH 0175/4122] IB/iser: add safety checks for state_mutex lock In some cases, we need to make sure that state_mutex is taken. Use lockdep_assert_held to warn us in case it doesn't while it should. Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20221016093833.12537-3-mgurtovoy@nvidia.com Reviewed-by: Sergey Gorenko Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/iser/iser_verbs.c | 26 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a73c30230ff9..f33e3a7f605d 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -448,6 +448,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn) struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; + lockdep_assert_held(&iser_conn->state_mutex); + /* terminate the iser conn only if the conn state is UP */ if (iser_conn->state != ISER_CONN_UP) return 0; @@ -482,9 +484,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn) */ static void iser_connect_error(struct rdma_cm_id *cma_id) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = cma_id->context; + + lockdep_assert_held(&iser_conn->state_mutex); - iser_conn = cma_id->context; iser_conn->state = ISER_CONN_TERMINATING; } @@ -526,12 +529,13 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn, */ static void iser_addr_handler(struct rdma_cm_id *cma_id) { + struct iser_conn *iser_conn = cma_id->context; struct iser_device *device; - struct iser_conn *iser_conn; struct ib_conn *ib_conn; int ret; - iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -581,6 +585,8 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_device *ib_dev = ib_conn->device->ib_device; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -613,14 +619,18 @@ failure: iser_connect_error(cma_id); } +/* + * Called with state mutex held + */ static void iser_connected_handler(struct rdma_cm_id *cma_id, const void *private_data) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = cma_id->context; struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; - iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -654,11 +664,15 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) } } +/* + * Called with state mutex held + */ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) { struct iser_conn *iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); /* * We are not guaranteed that we visited disconnected_handler * by now, call it here to be safe that we handle CM drep From c1842f34fceef47d6285e558004f8e2d6ed91b91 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 16 Oct 2022 12:38:33 +0300 Subject: [PATCH 0176/4122] IB/iser: open code iser_disconnected_handler There is a single caller to iser_disconnected_handler. Open code its logic and remove it. Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20221016093833.12537-4-mgurtovoy@nvidia.com Reviewed-by: Sergey Gorenko Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/iser/iser_verbs.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index f33e3a7f605d..1b8eda0dae4e 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -651,19 +651,6 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id, complete(&iser_conn->up_completion); } -static void iser_disconnected_handler(struct rdma_cm_id *cma_id) -{ - struct iser_conn *iser_conn = cma_id->context; - - if (iser_conn_terminate(iser_conn)) { - if (iser_conn->iscsi_conn) - iscsi_conn_failure(iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); - else - iser_err("iscsi_iser connection isn't bound\n"); - } -} - /* * Called with state mutex held */ @@ -678,7 +665,13 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, * by now, call it here to be safe that we handle CM drep * and flush errors. */ - iser_disconnected_handler(cma_id); + if (iser_conn_terminate(iser_conn)) { + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + else + iser_err("iscsi_iser connection isn't bound\n"); + } iser_free_ib_conn_res(iser_conn, destroy); complete(&iser_conn->ib_completion); } From f74495761df10c25a98256d16ea7465191b6e2cd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 18 Oct 2022 09:25:00 +0800 Subject: [PATCH 0177/4122] soundwire: dmi-quirks: add quirk variant for LAPBC710 NUC15 Some NUC15 LAPBC710 devices don't expose the same DMI information as the Intel reference, add additional entry in the match table. BugLink: https://github.com/thesofproject/linux/issues/3885 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221018012500.1592994-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index f81cdd83ec26..7969881f126d 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -90,6 +90,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { }, .driver_data = (void *)intel_tgl_bios, }, + { + /* quirk used for NUC15 LAPBC710 skew */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"), + }, + .driver_data = (void *)intel_tgl_bios, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), From 3c6bd6fa83bb6c7a891891a8a32aea2820aadb06 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:37 -0400 Subject: [PATCH 0178/4122] dt-bindings: dma: qcom: gpi: add fallback compatible The drivers are transitioning from matching against lists of specific compatible strings to matching against smaller lists of more generic compatible strings. Use the SDM845 compatible string as a fallback in the schema to support this change. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-2-mailingradian@gmail.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/qcom,gpi.yaml | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index eabf8a76d3a0..182b8573230d 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -18,14 +18,18 @@ allOf: properties: compatible: - enum: - - qcom,sc7280-gpi-dma - - qcom,sdm845-gpi-dma - - qcom,sm6350-gpi-dma - - qcom,sm8150-gpi-dma - - qcom,sm8250-gpi-dma - - qcom,sm8350-gpi-dma - - qcom,sm8450-gpi-dma + oneOf: + - enum: + - qcom,sc7280-gpi-dma + - qcom,sdm845-gpi-dma + - qcom,sm6350-gpi-dma + - qcom,sm8350-gpi-dma + - qcom,sm8450-gpi-dma + - items: + - enum: + - qcom,sm8150-gpi-dma + - qcom,sm8250-gpi-dma + - const: qcom,sdm845-gpi-dma reg: maxItems: 1 From 67fd570d734d35ef6b5e8ee5a3195a2aa843c2d8 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:38 -0400 Subject: [PATCH 0179/4122] dt-bindings: dma: qcom: gpi: add compatible for sdm670 The Snapdragon 670 uses GPI DMA for its GENI interface. Add a compatible string for it in the documentation. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-3-mailingradian@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 182b8573230d..6f7dcae944e4 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -27,6 +27,7 @@ properties: - qcom,sm8450-gpi-dma - items: - enum: + - qcom,sdm670-gpi-dma - qcom,sm8150-gpi-dma - qcom,sm8250-gpi-dma - const: qcom,sdm845-gpi-dma From 8527721ee6bd596a211fa2a1bbaf939e994cb89c Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:39 -0400 Subject: [PATCH 0180/4122] dmaengine: qcom: deprecate redundant of_device_id entries The drivers are transitioning from matching against lists of specific compatible strings to matching against smaller lists of more generic compatible strings. Add a message that the compatible strings with an ee_offset of 0 are deprecated except for the SDM845 compatible string. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-4-mailingradian@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 3f56514bbef8..f8e19e6e6117 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2289,6 +2289,10 @@ static const struct of_device_id gpi_of_match[] = { { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, + /* + * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as + * fallback and not need their own entries here. + */ { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, From dfad1e14b27734b204ea821977d43b16d1d1919f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Oct 2022 19:03:48 -0400 Subject: [PATCH 0181/4122] dt-bindings: dma: qcom: gpi: Use sm6350 fallback Several devices like SM6350, SM8150 and SC7280 are actually compatible, so use one compatible fallback for all of them. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221018230352.1238479-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 6f7dcae944e4..0c2894498845 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -20,11 +20,14 @@ properties: compatible: oneOf: - enum: - - qcom,sc7280-gpi-dma - qcom,sdm845-gpi-dma - qcom,sm6350-gpi-dma - - qcom,sm8350-gpi-dma - - qcom,sm8450-gpi-dma + - items: + - enum: + - qcom,sc7280-gpi-dma + - qcom,sm8350-gpi-dma + - qcom,sm8450-gpi-dma + - const: qcom,sm6350-gpi-dma - items: - enum: - qcom,sdm670-gpi-dma From 88bc8ac63db045e74b2ea7015c51384c05b09ae5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Oct 2022 19:03:49 -0400 Subject: [PATCH 0182/4122] dmaengine: qcom: gpi: Document preferred SM6350 binding Devices with ee offset of 0x10000 should rather bind with SM6350 compatible, so the list will not unnecessarily grow for compatible devices. Signed-off-by: Krzysztof Kozlowski Acked-by: Richard Acayan Link: https://lore.kernel.org/r/20221018230352.1238479-3-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index f8e19e6e6117..061add832295 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2286,13 +2286,14 @@ static int gpi_probe(struct platform_device *pdev) } static const struct of_device_id gpi_of_match[] = { - { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, /* - * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as - * fallback and not need their own entries here. + * Do not grow the list for compatible devices. Instead use + * qcom,sdm845-gpi-dma (for ee_offset = 0x0) or qcom,sm6350-gpi-dma + * (for ee_offset = 0x10000). */ + { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, From 4967a7803c341361a8bf67ace206bca8b390dc22 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2022 17:09:13 +0100 Subject: [PATCH 0183/4122] dmaengine: ioat: Fix spelling mistake "idel" -> "idle" There is a spelling mistake in the module description. Fix it. Signed-off-by: Colin Ian King Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20221004160913.154739-1-colin.i.king@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad2..79d244011093 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -33,7 +33,7 @@ MODULE_PARM_DESC(completion_timeout, static int idle_timeout = 2000; module_param(idle_timeout, int, 0644); MODULE_PARM_DESC(idle_timeout, - "set ioat idel timeout [msec] (default 2000 [msec])"); + "set ioat idle timeout [msec] (default 2000 [msec])"); #define IDLE_TIMEOUT msecs_to_jiffies(idle_timeout) #define COMPLETION_TIMEOUT msecs_to_jiffies(completion_timeout) From beb6f6493853d862490f0d5b99910caa358dd3d4 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:18 -0700 Subject: [PATCH 0184/4122] of/irq: export of_msi_get_domain Export of_mis_get_domain to enable it for users from outside. Signed-off-by: Matthias Brugger Acked-by: Rob Herring Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200122104723.16955-1-peter.ujfalusi@ti.com Signed-off-by: Kevin Hilman Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220929234820.940048-2-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/of/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 2bac44f09554..e9bf5236ed89 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -730,6 +730,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev, return NULL; } +EXPORT_SYMBOL_GPL(of_msi_get_domain); /** * of_msi_configure - Set the msi_domain field of a device From 56b0a668cb35c5f04ef98ffc22b297f116fe7108 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:19 -0700 Subject: [PATCH 0185/4122] dmaengine: ti: convert k3-udma to module Currently k3-udma driver is built as separate platform drivers with a shared probe and identical code path, just differnet platform data. To enable to build as module, convert the separate platform driver into a single module_platform_driver with the data selection done via compatible string and of_match. The separate of_match tables are also combined into a single table to avoid the multiple calls to of_match_node() Since all modern TI platforms using this are DT enabled, the removal of separate platform_drivers should have no functional change. Acked-by: Peter Ujfalusi Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20220929234820.940048-3-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Kconfig | 4 ++-- drivers/dma/ti/k3-udma-glue.c | 5 ++++- drivers/dma/ti/k3-udma.c | 40 +++++------------------------------ 3 files changed, 11 insertions(+), 38 deletions(-) diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index 79618fac119a..f196be3b222f 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig @@ -35,7 +35,7 @@ config DMA_OMAP DMA engine is found on OMAP and DRA7xx parts. config TI_K3_UDMA - bool "Texas Instruments UDMA support" + tristate "Texas Instruments UDMA support" depends on ARCH_K3 depends on TI_SCI_PROTOCOL depends on TI_SCI_INTA_IRQCHIP @@ -48,7 +48,7 @@ config TI_K3_UDMA DMA engine is used in AM65x and j721e. config TI_K3_UDMA_GLUE_LAYER - bool "Texas Instruments UDMA Glue layer for non DMAengine users" + tristate "Texas Instruments UDMA Glue layer for non DMAengine users" depends on ARCH_K3 depends on TI_K3_UDMA help diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index 4fdd9f06b723..c29de4695ae7 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -6,6 +6,7 @@ * */ +#include #include #include #include @@ -1433,4 +1434,6 @@ static int __init k3_udma_glue_class_init(void) { return class_register(&k3_udma_glue_devclass); } -arch_initcall(k3_udma_glue_class_init); + +module_init(k3_udma_glue_class_init); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 7b5081989b3d..ce8b80bb34d7 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -4335,18 +4336,10 @@ static const struct of_device_id udma_of_match[] = { .compatible = "ti,j721e-navss-mcu-udmap", .data = &j721e_mcu_data, }, - { /* Sentinel */ }, -}; - -static const struct of_device_id bcdma_of_match[] = { { .compatible = "ti,am64-dmss-bcdma", .data = &am64_bcdma_data, }, - { /* Sentinel */ }, -}; - -static const struct of_device_id pktdma_of_match[] = { { .compatible = "ti,am64-dmss-pktdma", .data = &am64_pktdma_data, @@ -5271,14 +5264,9 @@ static int udma_probe(struct platform_device *pdev) return -ENOMEM; match = of_match_node(udma_of_match, dev->of_node); - if (!match) - match = of_match_node(bcdma_of_match, dev->of_node); if (!match) { - match = of_match_node(pktdma_of_match, dev->of_node); - if (!match) { - dev_err(dev, "No compatible match found\n"); - return -ENODEV; - } + dev_err(dev, "No compatible match found\n"); + return -ENODEV; } ud->match_data = match->data; @@ -5511,27 +5499,9 @@ static struct platform_driver udma_driver = { }, .probe = udma_probe, }; -builtin_platform_driver(udma_driver); -static struct platform_driver bcdma_driver = { - .driver = { - .name = "ti-bcdma", - .of_match_table = bcdma_of_match, - .suppress_bind_attrs = true, - }, - .probe = udma_probe, -}; -builtin_platform_driver(bcdma_driver); - -static struct platform_driver pktdma_driver = { - .driver = { - .name = "ti-pktdma", - .of_match_table = pktdma_of_match, - .suppress_bind_attrs = true, - }, - .probe = udma_probe, -}; -builtin_platform_driver(pktdma_driver); +module_platform_driver(udma_driver); +MODULE_LICENSE("GPL v2"); /* Private interfaces to UDMA */ #include "k3-udma-private.c" From d15aae73a9f6c321167b9120f263df7dbc08d2ba Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:20 -0700 Subject: [PATCH 0186/4122] dmaengine: ti: convert PSIL to be buildable as module Combine all the SoC specific files into a single lib that can be built-in or built as a module. Acked-by: Peter Ujfalusi Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20220929234820.940048-4-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Kconfig | 3 ++- drivers/dma/ti/Makefile | 15 ++++++++------- drivers/dma/ti/k3-psil.c | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index f196be3b222f..2adc2cca10e9 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig @@ -56,7 +56,8 @@ config TI_K3_UDMA_GLUE_LAYER If unsure, say N. config TI_K3_PSIL - bool + tristate + default TI_K3_UDMA config TI_DMA_CROSSBAR bool diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index d3a303f0d7c6..b53d05b11ca5 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -4,11 +4,12 @@ obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o -obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \ - k3-psil-am654.o \ - k3-psil-j721e.o \ - k3-psil-j7200.o \ - k3-psil-am64.o \ - k3-psil-j721s2.o \ - k3-psil-am62.o +k3-psil-lib-objs := k3-psil.o \ + k3-psil-am654.o \ + k3-psil-j721e.o \ + k3-psil-j7200.o \ + k3-psil-am64.o \ + k3-psil-j721s2.o \ + k3-psil-am62.o +obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index 761a384093d2..8b6533a1eeeb 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -101,3 +102,4 @@ int psil_set_new_ep_config(struct device *dev, const char *name, return 0; } EXPORT_SYMBOL_GPL(psil_set_new_ep_config); +MODULE_LICENSE("GPL v2"); From 97c4cf380ff2d5a58ff13b9ac415ad998f623510 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Wed, 19 Oct 2022 09:39:33 +0300 Subject: [PATCH 0187/4122] dt-bindings: ingenic: Add support for the JZ4755 dmaengine Update documentation prior to adding driver changes. Acked-by: Krzysztof Kozlowski Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221019063934.3278444-2-lis8215@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ingenic,dma.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 3b0b3b919af8..e42b8ce948db 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -18,6 +18,7 @@ properties: - enum: - ingenic,jz4740-dma - ingenic,jz4725b-dma + - ingenic,jz4755-dma - ingenic,jz4760-dma - ingenic,jz4760-bdma - ingenic,jz4760-mdma From 042427ea0e415ea25468605f1b562f4ecec43541 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Wed, 19 Oct 2022 09:39:34 +0300 Subject: [PATCH 0188/4122] dmaengine: JZ4780: Add support for the JZ4755. The JZ4755 has 4 DMA channels per DMA unit, two idential DMA units. The JZ4755 has the similar DMA engine to JZ4725b and it has the same bug as JZ4725b, see commit a40c94be2336. At least the JZ_SOC_DATA_BREAK_LINKS flag make it work much better, although not ideal. Reviewed-by: Paul Cercueil Tested-by: Siarhei Volkau Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221019063934.3278444-3-lis8215@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 2a483802d9ee..9c1a6e9a9c03 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -1038,6 +1038,13 @@ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = { JZ_SOC_DATA_BREAK_LINKS, }; +static const struct jz4780_dma_soc_data jz4755_dma_soc_data = { + .nb_channels = 4, + .transfer_ord_max = 5, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | + JZ_SOC_DATA_BREAK_LINKS, +}; + static const struct jz4780_dma_soc_data jz4760_dma_soc_data = { .nb_channels = 5, .transfer_ord_max = 6, @@ -1101,6 +1108,7 @@ static const struct jz4780_dma_soc_data x1830_dma_soc_data = { static const struct of_device_id jz4780_dma_dt_match[] = { { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data }, { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data }, + { .compatible = "ingenic,jz4755-dma", .data = &jz4755_dma_soc_data }, { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data }, { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data }, { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data }, From c3b63380f52a5cc945c092259c3545fb4915719d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Oct 2022 18:12:50 +0200 Subject: [PATCH 0189/4122] dmaengine: idma64: Make idma64_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function idma64_remove() returns zero unconditionally. Make it return void. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221014161250.468687-1-u.kleine-koenig@pengutronix.de Signed-off-by: Vinod Koul --- drivers/dma/idma64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index f4c07ad3be15..c33087c5cd02 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -600,7 +600,7 @@ static int idma64_probe(struct idma64_chip *chip) return 0; } -static int idma64_remove(struct idma64_chip *chip) +static void idma64_remove(struct idma64_chip *chip) { struct idma64 *idma64 = chip->idma64; unsigned short i; @@ -618,8 +618,6 @@ static int idma64_remove(struct idma64_chip *chip) tasklet_kill(&idma64c->vchan.task); } - - return 0; } /* ---------------------------------------------------------------------- */ @@ -664,7 +662,9 @@ static int idma64_platform_remove(struct platform_device *pdev) { struct idma64_chip *chip = platform_get_drvdata(pdev); - return idma64_remove(chip); + idma64_remove(chip); + + return 0; } static int __maybe_unused idma64_pm_suspend(struct device *dev) From 91123b37e8a99cc489d5bdcfebd1c25f29382504 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 1 Oct 2022 04:15:28 +0800 Subject: [PATCH 0190/4122] dmaengine: idxd: Make max batch size attributes in sysfs invisible for Intel IAA In current code, dev.max_batch_size and wq.max_batch_size attributes in sysfs are exposed to user to show or update the values. >From Intel IAA spec [1], Intel IAA does not support batch processing. So these sysfs attributes should not be supported on IAA device. Fix this issue by making the attributes of max_batch_size invisible in sysfs through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that the attributes are not visible when the device does not support batch. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: e7184b159dd3 ("dmaengine: idxd: add support for configurable max wq batch size") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20220930201528.18621-3-xiaochen.shen@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 2 ++ drivers/dma/idxd/sysfs.c | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 8e2c2c405db2..69e2d9155e0d 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -22,6 +22,7 @@ Date: Oct 25, 2019 KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The largest number of work descriptors in a batch. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/dsa/max_work_queues_size Date: Oct 25, 2019 @@ -205,6 +206,7 @@ KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The max batch size for this workqueue. Cannot exceed device max batch size. Configurable parameter. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/wq./ats_disable Date: Nov 13, 2020 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index bdaccf9e0436..f30aad90537b 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1233,6 +1233,14 @@ static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, !idxd->hw.wq_cap.op_config; } +static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_wq_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1243,6 +1251,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_op_config_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1533,6 +1544,26 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_RW(cmd_status); +static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + +static umode_t idxd_device_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static struct attribute *idxd_device_attributes[] = { &dev_attr_version.attr, &dev_attr_max_groups.attr, @@ -1560,6 +1591,7 @@ static struct attribute *idxd_device_attributes[] = { static const struct attribute_group idxd_device_attribute_group = { .attrs = idxd_device_attributes, + .is_visible = idxd_device_attr_visible, }; static const struct attribute_group *idxd_attribute_groups[] = { From 568aa6dd641f63166bb60d769e256789b3ac42d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Wed, 19 Oct 2022 15:23:24 +0200 Subject: [PATCH 0191/4122] dmaengine: apple-admac: Allocate cache SRAM to channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a previously unknown part of the controller interface: We have to assign SRAM carveouts to channels to store their in-flight samples in. So, obtain the size of the SRAM from a read-only register and divide it into 2K blocks for allocation to channels. The FIFO depths we configure will always fit into 2K. (This fixes audio artifacts during simultaneous playback/capture on multiple channels -- which looking back is fully accounted for by having had the caches in the DMA controller overlap in memory.) Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221019132324.8585-2-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 102 +++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 317ca76ccafd..8f2744599435 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -21,6 +21,12 @@ #define NCHANNELS_MAX 64 #define IRQ_NOUTPUTS 4 +/* + * For allocation purposes we split the cache + * memory into blocks of fixed size (given in bytes). + */ +#define SRAM_BLOCK 2048 + #define RING_WRITE_SLOT GENMASK(1, 0) #define RING_READ_SLOT GENMASK(5, 4) #define RING_FULL BIT(9) @@ -36,6 +42,9 @@ #define REG_TX_STOP 0x0004 #define REG_RX_START 0x0008 #define REG_RX_STOP 0x000c +#define REG_IMPRINT 0x0090 +#define REG_TX_SRAM_SIZE 0x0094 +#define REG_RX_SRAM_SIZE 0x0098 #define REG_CHAN_CTL(ch) (0x8000 + (ch) * 0x200) #define REG_CHAN_CTL_RST_RINGS BIT(0) @@ -53,7 +62,9 @@ #define BUS_WIDTH_FRAME_2_WORDS 0x10 #define BUS_WIDTH_FRAME_4_WORDS 0x20 -#define CHAN_BUFSIZE 0x8000 +#define REG_CHAN_SRAM_CARVEOUT(ch) (0x8050 + (ch) * 0x200) +#define CHAN_SRAM_CARVEOUT_SIZE GENMASK(31, 16) +#define CHAN_SRAM_CARVEOUT_BASE GENMASK(15, 0) #define REG_CHAN_FIFOCTL(ch) (0x8054 + (ch) * 0x200) #define CHAN_FIFOCTL_LIMIT GENMASK(31, 16) @@ -76,6 +87,8 @@ struct admac_chan { struct dma_chan chan; struct tasklet_struct tasklet; + u32 carveout; + spinlock_t lock; struct admac_tx *current_tx; int nperiod_acks; @@ -92,12 +105,24 @@ struct admac_chan { struct list_head to_free; }; +struct admac_sram { + u32 size; + /* + * SRAM_CARVEOUT has 16-bit fields, so the SRAM cannot be larger than + * 64K and a 32-bit bitfield over 2K blocks covers it. + */ + u32 allocated; +}; + struct admac_data { struct dma_device dma; struct device *dev; __iomem void *base; struct reset_control *rstc; + struct mutex cache_alloc_lock; + struct admac_sram txcache, rxcache; + int irq; int irq_index; int nchannels; @@ -118,6 +143,60 @@ struct admac_tx { struct list_head node; }; +static int admac_alloc_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 *out) +{ + struct admac_sram *sram; + int i, ret = 0, nblocks; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + mutex_lock(&ad->cache_alloc_lock); + + nblocks = sram->size / SRAM_BLOCK; + for (i = 0; i < nblocks; i++) + if (!(sram->allocated & BIT(i))) + break; + + if (i < nblocks) { + *out = FIELD_PREP(CHAN_SRAM_CARVEOUT_BASE, i * SRAM_BLOCK) | + FIELD_PREP(CHAN_SRAM_CARVEOUT_SIZE, SRAM_BLOCK); + sram->allocated |= BIT(i); + } else { + ret = -EBUSY; + } + + mutex_unlock(&ad->cache_alloc_lock); + + return ret; +} + +static void admac_free_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 carveout) +{ + struct admac_sram *sram; + u32 base = FIELD_GET(CHAN_SRAM_CARVEOUT_BASE, carveout); + int i; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + if (WARN_ON(base >= sram->size)) + return; + + mutex_lock(&ad->cache_alloc_lock); + i = base / SRAM_BLOCK; + sram->allocated &= ~BIT(i); + mutex_unlock(&ad->cache_alloc_lock); +} + static void admac_modify(struct admac_data *ad, int reg, u32 mask, u32 val) { void __iomem *addr = ad->base + reg; @@ -466,15 +545,28 @@ static void admac_synchronize(struct dma_chan *chan) static int admac_alloc_chan_resources(struct dma_chan *chan) { struct admac_chan *adchan = to_admac_chan(chan); + struct admac_data *ad = adchan->host; + int ret; dma_cookie_init(&adchan->chan); + ret = admac_alloc_sram_carveout(ad, admac_chan_direction(adchan->no), + &adchan->carveout); + if (ret < 0) + return ret; + + writel_relaxed(adchan->carveout, + ad->base + REG_CHAN_SRAM_CARVEOUT(adchan->no)); return 0; } static void admac_free_chan_resources(struct dma_chan *chan) { + struct admac_chan *adchan = to_admac_chan(chan); + admac_terminate_all(chan); admac_synchronize(chan); + admac_free_sram_carveout(adchan->host, admac_chan_direction(adchan->no), + adchan->carveout); } static struct dma_chan *admac_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -712,6 +804,7 @@ static int admac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ad); ad->dev = &pdev->dev; ad->nchannels = nchannels; + mutex_init(&ad->cache_alloc_lock); /* * The controller has 4 IRQ outputs. Try them all until @@ -801,6 +894,13 @@ static int admac_probe(struct platform_device *pdev) goto free_irq; } + ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE); + ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE); + + dev_info(&pdev->dev, "Audio DMA Controller\n"); + dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n", + readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size); + return 0; free_irq: From cd0ab43ec91a6114ea309e9e72382fdb184e7b9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Oct 2022 17:03:32 +0200 Subject: [PATCH 0192/4122] dmaengine: remove iop-adma driver The iop32x platform was removed, so this driver is no longer needed. Cc: Dan Williams Signed-off-by: Arnd Bergmann Acked-by: Dan Williams Link: https://lore.kernel.org/r/20221019150410.3851944-10-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 8 - drivers/dma/Makefile | 1 - drivers/dma/iop-adma.c | 1554 ---------------------------------------- drivers/dma/iop-adma.h | 914 ----------------------- 4 files changed, 2477 deletions(-) delete mode 100644 drivers/dma/iop-adma.c delete mode 100644 drivers/dma/iop-adma.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 7524b62a8870..b73fc89ba877 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -357,14 +357,6 @@ config INTEL_IOATDMA If unsure, say N. -config INTEL_IOP_ADMA - tristate "Intel IOP32x ADMA support" - depends on ARCH_IOP32X || COMPILE_TEST - select DMA_ENGINE - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - help - Enable support for the Intel(R) IOP Series RAID engines. - config K3_DMA tristate "Hisilicon K3 DMA support" depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 10f7d4241001..5b55ada052a7 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_INTEL_IDMA64) += idma64.o obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-y += idxd/ -obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c deleted file mode 100644 index 310b899d581f..000000000000 --- a/drivers/dma/iop-adma.c +++ /dev/null @@ -1,1554 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * offload engine driver for the Intel Xscale series of i/o processors - * Copyright © 2006, Intel Corporation. - */ - -/* - * This driver supports the asynchrounous DMA copy and RAID engines available - * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "iop-adma.h" -#include "dmaengine.h" - -#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) -#define to_iop_adma_device(dev) \ - container_of(dev, struct iop_adma_device, common) -#define tx_to_iop_adma_slot(tx) \ - container_of(tx, struct iop_adma_desc_slot, async_tx) - -/** - * iop_adma_free_slots - flags descriptor slots for reuse - * @slot: Slot to free - * Caller must hold &iop_chan->lock while calling this function - */ -static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) -{ - int stride = slot->slots_per_op; - - while (stride--) { - slot->slots_per_op = 0; - slot = list_entry(slot->slot_node.next, - struct iop_adma_desc_slot, - slot_node); - } -} - -static dma_cookie_t -iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *iop_chan, dma_cookie_t cookie) -{ - struct dma_async_tx_descriptor *tx = &desc->async_tx; - - BUG_ON(tx->cookie < 0); - if (tx->cookie > 0) { - cookie = tx->cookie; - tx->cookie = 0; - - /* call the callback (must not sleep or submit new - * operations to this channel) - */ - dmaengine_desc_get_callback_invoke(tx, NULL); - - dma_descriptor_unmap(tx); - if (desc->group_head) - desc->group_head = NULL; - } - - /* run dependent operations */ - dma_run_dependencies(tx); - - return cookie; -} - -static int -iop_adma_clean_slot(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *iop_chan) -{ - /* the client is allowed to attach dependent operations - * until 'ack' is set - */ - if (!async_tx_test_ack(&desc->async_tx)) - return 0; - - /* leave the last descriptor in the chain - * so we can append to it - */ - if (desc->chain_node.next == &iop_chan->chain) - return 1; - - dev_dbg(iop_chan->device->common.dev, - "\tfree slot: %d slots_per_op: %d\n", - desc->idx, desc->slots_per_op); - - list_del(&desc->chain_node); - iop_adma_free_slots(desc); - - return 0; -} - -static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; - dma_cookie_t cookie = 0; - u32 current_desc = iop_chan_get_current_descriptor(iop_chan); - int busy = iop_chan_is_busy(iop_chan); - int seen_current = 0, slot_cnt = 0, slots_per_op = 0; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - /* free completed slots from the chain starting with - * the oldest descriptor - */ - list_for_each_entry_safe(iter, _iter, &iop_chan->chain, - chain_node) { - pr_debug("\tcookie: %d slot: %d busy: %d " - "this_desc: %pad next_desc: %#llx ack: %d\n", - iter->async_tx.cookie, iter->idx, busy, - &iter->async_tx.phys, (u64)iop_desc_get_next_desc(iter), - async_tx_test_ack(&iter->async_tx)); - prefetch(_iter); - prefetch(&_iter->async_tx); - - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; - - /* stop the search if we reach the current descriptor and the - * channel is busy, or if it appears that the current descriptor - * needs to be re-read (i.e. has been appended to) - */ - if (iter->async_tx.phys == current_desc) { - BUG_ON(seen_current++); - if (busy || iop_desc_get_next_desc(iter)) - break; - } - - /* detect the start of a group transaction */ - if (!slot_cnt && !slots_per_op) { - slot_cnt = iter->slot_cnt; - slots_per_op = iter->slots_per_op; - if (slot_cnt <= slots_per_op) { - slot_cnt = 0; - slots_per_op = 0; - } - } - - if (slot_cnt) { - pr_debug("\tgroup++\n"); - if (!grp_start) - grp_start = iter; - slot_cnt -= slots_per_op; - } - - /* all the members of a group are complete */ - if (slots_per_op != 0 && slot_cnt == 0) { - struct iop_adma_desc_slot *grp_iter, *_grp_iter; - int end_of_chain = 0; - pr_debug("\tgroup end\n"); - - /* collect the total results */ - if (grp_start->xor_check_result) { - u32 zero_sum_result = 0; - slot_cnt = grp_start->slot_cnt; - grp_iter = grp_start; - - list_for_each_entry_from(grp_iter, - &iop_chan->chain, chain_node) { - zero_sum_result |= - iop_desc_get_zero_result(grp_iter); - pr_debug("\titer%d result: %d\n", - grp_iter->idx, zero_sum_result); - slot_cnt -= slots_per_op; - if (slot_cnt == 0) - break; - } - pr_debug("\tgrp_start->xor_check_result: %p\n", - grp_start->xor_check_result); - *grp_start->xor_check_result = zero_sum_result; - } - - /* clean up the group */ - slot_cnt = grp_start->slot_cnt; - grp_iter = grp_start; - list_for_each_entry_safe_from(grp_iter, _grp_iter, - &iop_chan->chain, chain_node) { - cookie = iop_adma_run_tx_complete_actions( - grp_iter, iop_chan, cookie); - - slot_cnt -= slots_per_op; - end_of_chain = iop_adma_clean_slot(grp_iter, - iop_chan); - - if (slot_cnt == 0 || end_of_chain) - break; - } - - /* the group should be complete at this point */ - BUG_ON(slot_cnt); - - slots_per_op = 0; - grp_start = NULL; - if (end_of_chain) - break; - else - continue; - } else if (slots_per_op) /* wait for group completion */ - continue; - - /* write back zero sum results (single descriptor case) */ - if (iter->xor_check_result && iter->async_tx.cookie) - *iter->xor_check_result = - iop_desc_get_zero_result(iter); - - cookie = iop_adma_run_tx_complete_actions( - iter, iop_chan, cookie); - - if (iop_adma_clean_slot(iter, iop_chan)) - break; - } - - if (cookie > 0) { - iop_chan->common.completed_cookie = cookie; - pr_debug("\tcompleted cookie %d\n", cookie); - } -} - -static void -iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) -{ - spin_lock_bh(&iop_chan->lock); - __iop_adma_slot_cleanup(iop_chan); - spin_unlock_bh(&iop_chan->lock); -} - -static void iop_adma_tasklet(struct tasklet_struct *t) -{ - struct iop_adma_chan *iop_chan = from_tasklet(iop_chan, t, - irq_tasklet); - - /* lockdep will flag depedency submissions as potentially - * recursive locking, this is not the case as a dependency - * submission will never recurse a channels submit routine. - * There are checks in async_tx.c to prevent this. - */ - spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); - __iop_adma_slot_cleanup(iop_chan); - spin_unlock(&iop_chan->lock); -} - -static struct iop_adma_desc_slot * -iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, - int slots_per_op) -{ - struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; - LIST_HEAD(chain); - int slots_found, retry = 0; - - /* start search from the last allocated descrtiptor - * if a contiguous allocation can not be found start searching - * from the beginning of the list - */ -retry: - slots_found = 0; - if (retry == 0) - iter = iop_chan->last_used; - else - iter = list_entry(&iop_chan->all_slots, - struct iop_adma_desc_slot, - slot_node); - - list_for_each_entry_safe_continue( - iter, _iter, &iop_chan->all_slots, slot_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); - if (iter->slots_per_op) { - /* give up after finding the first busy slot - * on the second pass through the list - */ - if (retry) - break; - - slots_found = 0; - continue; - } - - /* start the allocation if the slot is correctly aligned */ - if (!slots_found++) { - if (iop_desc_is_aligned(iter, slots_per_op)) - alloc_start = iter; - else { - slots_found = 0; - continue; - } - } - - if (slots_found == num_slots) { - struct iop_adma_desc_slot *alloc_tail = NULL; - struct iop_adma_desc_slot *last_used = NULL; - iter = alloc_start; - while (num_slots) { - int i; - dev_dbg(iop_chan->device->common.dev, - "allocated slot: %d " - "(desc %p phys: %#llx) slots_per_op %d\n", - iter->idx, iter->hw_desc, - (u64)iter->async_tx.phys, slots_per_op); - - /* pre-ack all but the last descriptor */ - if (num_slots != slots_per_op) - async_tx_ack(&iter->async_tx); - - list_add_tail(&iter->chain_node, &chain); - alloc_tail = iter; - iter->async_tx.cookie = 0; - iter->slot_cnt = num_slots; - iter->xor_check_result = NULL; - for (i = 0; i < slots_per_op; i++) { - iter->slots_per_op = slots_per_op - i; - last_used = iter; - iter = list_entry(iter->slot_node.next, - struct iop_adma_desc_slot, - slot_node); - } - num_slots -= slots_per_op; - } - alloc_tail->group_head = alloc_start; - alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->tx_list); - iop_chan->last_used = last_used; - iop_desc_clear_next_desc(alloc_start); - iop_desc_clear_next_desc(alloc_tail); - return alloc_tail; - } - } - if (!retry++) - goto retry; - - /* perform direct reclaim if the allocation fails */ - __iop_adma_slot_cleanup(iop_chan); - - return NULL; -} - -static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) -{ - dev_dbg(iop_chan->device->common.dev, "pending: %d\n", - iop_chan->pending); - - if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { - iop_chan->pending = 0; - iop_chan_append(iop_chan); - } -} - -static dma_cookie_t -iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); - struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); - struct iop_adma_desc_slot *grp_start, *old_chain_tail; - int slot_cnt; - dma_cookie_t cookie; - dma_addr_t next_dma; - - grp_start = sw_desc->group_head; - slot_cnt = grp_start->slot_cnt; - - spin_lock_bh(&iop_chan->lock); - cookie = dma_cookie_assign(tx); - - old_chain_tail = list_entry(iop_chan->chain.prev, - struct iop_adma_desc_slot, chain_node); - list_splice_init(&sw_desc->tx_list, - &old_chain_tail->chain_node); - - /* fix up the hardware chain */ - next_dma = grp_start->async_tx.phys; - iop_desc_set_next_desc(old_chain_tail, next_dma); - BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */ - - /* check for pre-chained descriptors */ - iop_paranoia(iop_desc_get_next_desc(sw_desc)); - - /* increment the pending count by the number of slots - * memcpy operations have a 1:1 (slot:operation) relation - * other operations are heavier and will pop the threshold - * more often. - */ - iop_chan->pending += slot_cnt; - iop_adma_check_threshold(iop_chan); - spin_unlock_bh(&iop_chan->lock); - - dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", - __func__, sw_desc->async_tx.cookie, sw_desc->idx); - - return cookie; -} - -static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); -static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); - -/** - * iop_adma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: allocate descriptor resources for this channel - * - * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To - * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be - * greater than 2x the number slots needed to satisfy a device->max_xor - * request. - * */ -static int iop_adma_alloc_chan_resources(struct dma_chan *chan) -{ - char *hw_desc; - dma_addr_t dma_desc; - int idx; - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *slot = NULL; - int init = iop_chan->slots_allocated ? 0 : 1; - struct iop_adma_platform_data *plat_data = - dev_get_platdata(&iop_chan->device->pdev->dev); - int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; - - /* Allocate descriptor slots */ - do { - idx = iop_chan->slots_allocated; - if (idx == num_descs_in_pool) - break; - - slot = kzalloc(sizeof(*slot), GFP_KERNEL); - if (!slot) { - printk(KERN_INFO "IOP ADMA Channel only initialized" - " %d descriptor slots", idx); - break; - } - hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; - slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; - - dma_async_tx_descriptor_init(&slot->async_tx, chan); - slot->async_tx.tx_submit = iop_adma_tx_submit; - INIT_LIST_HEAD(&slot->tx_list); - INIT_LIST_HEAD(&slot->chain_node); - INIT_LIST_HEAD(&slot->slot_node); - dma_desc = iop_chan->device->dma_desc_pool; - slot->async_tx.phys = dma_desc + idx * IOP_ADMA_SLOT_SIZE; - slot->idx = idx; - - spin_lock_bh(&iop_chan->lock); - iop_chan->slots_allocated++; - list_add_tail(&slot->slot_node, &iop_chan->all_slots); - spin_unlock_bh(&iop_chan->lock); - } while (iop_chan->slots_allocated < num_descs_in_pool); - - if (idx && !iop_chan->last_used) - iop_chan->last_used = list_entry(iop_chan->all_slots.next, - struct iop_adma_desc_slot, - slot_node); - - dev_dbg(iop_chan->device->common.dev, - "allocated %d descriptor slots last_used: %p\n", - iop_chan->slots_allocated, iop_chan->last_used); - - /* initialize the channel and the chain with a null operation */ - if (init) { - if (dma_has_cap(DMA_MEMCPY, - iop_chan->device->common.cap_mask)) - iop_chan_start_null_memcpy(iop_chan); - else if (dma_has_cap(DMA_XOR, - iop_chan->device->common.cap_mask)) - iop_chan_start_null_xor(iop_chan); - else - BUG(); - } - - return (idx > 0) ? idx : -ENOMEM; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_interrupt(grp_start, iop_chan); - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, "%s len: %zu\n", - __func__, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_memcpy(grp_start, flags); - iop_desc_set_byte_count(grp_start, iop_chan, len); - iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - iop_desc_set_memcpy_src_addr(grp_start, dma_src); - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, - dma_addr_t *dma_src, unsigned int src_cnt, size_t len, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, - "%s src_cnt: %d len: %zu flags: %lx\n", - __func__, src_cnt, len, flags); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_xor(grp_start, src_cnt, flags); - iop_desc_set_byte_count(grp_start, iop_chan, len); - iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_xor_src_addr(grp_start, src_cnt, - dma_src[src_cnt]); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, - unsigned int src_cnt, size_t len, u32 *result, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - - dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n", - __func__, src_cnt, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_zero_sum(grp_start, src_cnt, flags); - iop_desc_set_zero_sum_byte_count(grp_start, len); - grp_start->xor_check_result = result; - pr_debug("\t%s: grp_start->xor_check_result: %p\n", - __func__, grp_start->xor_check_result); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_zero_sum_src_addr(grp_start, src_cnt, - dma_src[src_cnt]); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *g; - int slot_cnt, slots_per_op; - int continue_srcs; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, - "%s src_cnt: %d len: %zu flags: %lx\n", - __func__, src_cnt, len, flags); - - if (dmaf_p_disabled_continue(flags)) - continue_srcs = 1+src_cnt; - else if (dmaf_continue(flags)) - continue_srcs = 3+src_cnt; - else - continue_srcs = 0+src_cnt; - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - int i; - - g = sw_desc->group_head; - iop_desc_set_byte_count(g, iop_chan, len); - - /* even if P is disabled its destination address (bits - * [3:0]) must match Q. It is ok if P points to an - * invalid address, it won't be written. - */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1] & 0x7; - - iop_desc_set_pq_addr(g, dst); - sw_desc->async_tx.flags = flags; - for (i = 0; i < src_cnt; i++) - iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); - - /* if we are continuing a previous operation factor in - * the old p and q values, see the comment for dma_maxpq - * in include/linux/dmaengine.h - */ - if (dmaf_p_disabled_continue(flags)) - iop_desc_set_pq_src_addr(g, i++, dst[1], 1); - else if (dmaf_continue(flags)) { - iop_desc_set_pq_src_addr(g, i++, dst[0], 0); - iop_desc_set_pq_src_addr(g, i++, dst[1], 1); - iop_desc_set_pq_src_addr(g, i++, dst[1], 0); - } - iop_desc_init_pq(g, i, flags); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, enum sum_check_flags *pqres, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *g; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n", - __func__, src_cnt, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - /* for validate operations p and q are tagged onto the - * end of the source list - */ - int pq_idx = src_cnt; - - g = sw_desc->group_head; - iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); - iop_desc_set_pq_zero_sum_byte_count(g, len); - g->pq_check_result = pqres; - pr_debug("\t%s: g->pq_check_result: %p\n", - __func__, g->pq_check_result); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, - src[src_cnt], - scf[src_cnt]); - iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static void iop_adma_free_chan_resources(struct dma_chan *chan) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *iter, *_iter; - int in_use_descs = 0; - - iop_adma_slot_cleanup(iop_chan); - - spin_lock_bh(&iop_chan->lock); - list_for_each_entry_safe(iter, _iter, &iop_chan->chain, - chain_node) { - in_use_descs++; - list_del(&iter->chain_node); - } - list_for_each_entry_safe_reverse( - iter, _iter, &iop_chan->all_slots, slot_node) { - list_del(&iter->slot_node); - kfree(iter); - iop_chan->slots_allocated--; - } - iop_chan->last_used = NULL; - - dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", - __func__, iop_chan->slots_allocated); - spin_unlock_bh(&iop_chan->lock); - - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", - in_use_descs - 1); -} - -/** - * iop_adma_status - poll the status of an ADMA transaction - * @chan: ADMA channel handle - * @cookie: ADMA transaction identifier - * @txstate: a holder for the current state of the channel or NULL - */ -static enum dma_status iop_adma_status(struct dma_chan *chan, - dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - int ret; - - ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - iop_adma_slot_cleanup(iop_chan); - - return dma_cookie_status(chan, cookie, txstate); -} - -static irqreturn_t iop_adma_eot_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - - dev_dbg(chan->device->common.dev, "%s\n", __func__); - - tasklet_schedule(&chan->irq_tasklet); - - iop_adma_device_clear_eot_status(chan); - - return IRQ_HANDLED; -} - -static irqreturn_t iop_adma_eoc_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - - dev_dbg(chan->device->common.dev, "%s\n", __func__); - - tasklet_schedule(&chan->irq_tasklet); - - iop_adma_device_clear_eoc_status(chan); - - return IRQ_HANDLED; -} - -static irqreturn_t iop_adma_err_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - unsigned long status = iop_chan_get_status(chan); - - dev_err(chan->device->common.dev, - "error ( %s%s%s%s%s%s%s)\n", - iop_is_err_int_parity(status, chan) ? "int_parity " : "", - iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", - iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", - iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", - iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", - iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", - iop_is_err_split_tx(status, chan) ? "split_tx " : ""); - - iop_adma_device_clear_err_status(chan); - - BUG(); - - return IRQ_HANDLED; -} - -static void iop_adma_issue_pending(struct dma_chan *chan) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - - if (iop_chan->pending) { - iop_chan->pending = 0; - iop_chan_append(iop_chan); - } -} - -/* - * Perform a transaction to verify the HW works. - */ -#define IOP_ADMA_TEST_SIZE 2000 - -static int iop_adma_memcpy_self_test(struct iop_adma_device *device) -{ - int i; - void *src, *dest; - dma_addr_t src_dma, dest_dma; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - struct dma_async_tx_descriptor *tx; - int err = 0; - struct iop_adma_chan *iop_chan; - - dev_dbg(device->common.dev, "%s\n", __func__); - - src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } - - /* Fill in src buffer */ - for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) - ((u8 *) src)[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - dest_dma = dma_map_single(dma_chan->device->dev, dest, - IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); - src_dma = dma_map_single(dma_chan->device->dev, src, - IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma, - IOP_ADMA_TEST_SIZE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(1); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - iop_chan = to_iop_adma_chan(dma_chan); - dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, - IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); - if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { - dev_err(dma_chan->device->dev, - "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ -static int -iop_adma_xor_val_self_test(struct iop_adma_device *device) -{ - int i, src_idx; - struct page *dest; - struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; - struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; - dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; - dma_addr_t dest_dma; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u8 cmp_byte = 0; - u32 cmp_word; - u32 zero_sum_result; - int err = 0; - struct iop_adma_chan *iop_chan; - - dev_dbg(device->common.dev, "%s\n", __func__); - - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { - xor_srcs[src_idx] = alloc_page(GFP_KERNEL); - if (!xor_srcs[src_idx]) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - } - - dest = alloc_page(GFP_KERNEL); - if (!dest) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - - /* Fill in src buffers */ - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { - u8 *ptr = page_address(xor_srcs[src_idx]); - for (i = 0; i < PAGE_SIZE; i++) - ptr[i] = (1 << src_idx); - } - - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) - cmp_byte ^= (u8) (1 << src_idx); - - cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | - (cmp_byte << 8) | cmp_byte; - - memset(page_address(dest), 0, PAGE_SIZE); - - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - /* test xor */ - dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], - 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test xor timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - iop_chan = to_iop_adma_chan(dma_chan); - dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, - PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { - u32 *ptr = page_address(dest); - if (ptr[i] != cmp_word) { - dev_err(dma_chan->device->dev, - "Self-test xor failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - } - dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, - PAGE_SIZE, DMA_TO_DEVICE); - - /* skip zero sum if the capability is not present */ - if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) - goto free_resources; - - /* zero sum the sources with the destintation page */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - zero_sum_srcs[i] = xor_srcs[i]; - zero_sum_srcs[i] = dest; - - zero_sum_result = 1; - - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, - zero_sum_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test zero sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 0) { - dev_err(dma_chan->device->dev, - "Self-test zero sum failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - /* test for non-zero parity sum */ - zero_sum_result = 0; - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, - zero_sum_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test non-zero sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 1) { - dev_err(dma_chan->device->dev, - "Self-test non-zero sum failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - src_idx = IOP_ADMA_NUM_SRC_TEST; - while (src_idx--) - __free_page(xor_srcs[src_idx]); - __free_page(dest); - return err; -} - -#ifdef CONFIG_RAID6_PQ -static int -iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) -{ - /* combined sources, software pq results, and extra hw pq results */ - struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; - /* ptr to the extra hw pq buffers defined above */ - struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; - /* address conversion buffers (dma_map / page_address) */ - void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; - dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2]; - dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST]; - - int i; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u32 zero_sum_result; - int err = 0; - struct device *dev; - - dev_dbg(device->common.dev, "%s\n", __func__); - - for (i = 0; i < ARRAY_SIZE(pq); i++) { - pq[i] = alloc_page(GFP_KERNEL); - if (!pq[i]) { - while (i--) - __free_page(pq[i]); - return -ENOMEM; - } - } - - /* Fill in src buffers */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { - pq_sw[i] = page_address(pq[i]); - memset(pq_sw[i], 0x11111111 * (1<common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - dev = dma_chan->device->dev; - - /* initialize the dests */ - memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); - memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); - - /* test pq */ - pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); - pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, - IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, - PAGE_SIZE, - DMA_PREP_INTERRUPT | - DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test pq timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); - - if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], - page_address(pq_hw[0]), PAGE_SIZE) != 0) { - dev_err(dev, "Self-test p failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], - page_address(pq_hw[1]), PAGE_SIZE) != 0) { - dev_err(dev, "Self-test q failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - /* test correct zero sum using the software generated pq values */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - zero_sum_result = ~0; - tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], - pq_src, IOP_ADMA_NUM_SRC_TEST, - raid6_gfexp, PAGE_SIZE, &zero_sum_result, - DMA_PREP_INTERRUPT|DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 0) { - dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", - zero_sum_result); - err = -ENODEV; - goto free_resources; - } - - /* test incorrect zero sum */ - i = IOP_ADMA_NUM_SRC_TEST; - memset(pq_sw[i] + 100, 0, 100); - memset(pq_sw[i+1] + 200, 0, 200); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - zero_sum_result = 0; - tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], - pq_src, IOP_ADMA_NUM_SRC_TEST, - raid6_gfexp, PAGE_SIZE, &zero_sum_result, - DMA_PREP_INTERRUPT|DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { - dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", - zero_sum_result); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - i = ARRAY_SIZE(pq); - while (i--) - __free_page(pq[i]); - return err; -} -#endif - -static int iop_adma_remove(struct platform_device *dev) -{ - struct iop_adma_device *device = platform_get_drvdata(dev); - struct dma_chan *chan, *_chan; - struct iop_adma_chan *iop_chan; - struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev); - - dma_async_device_unregister(&device->common); - - dma_free_coherent(&dev->dev, plat_data->pool_size, - device->dma_desc_pool_virt, device->dma_desc_pool); - - list_for_each_entry_safe(chan, _chan, &device->common.channels, - device_node) { - iop_chan = to_iop_adma_chan(chan); - list_del(&chan->device_node); - kfree(iop_chan); - } - kfree(device); - - return 0; -} - -static int iop_adma_probe(struct platform_device *pdev) -{ - struct resource *res; - int ret = 0, i; - struct iop_adma_device *adev; - struct iop_adma_chan *iop_chan; - struct dma_device *dma_dev; - struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - - if (!devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - adev = kzalloc(sizeof(*adev), GFP_KERNEL); - if (!adev) - return -ENOMEM; - dma_dev = &adev->common; - - /* allocate coherent memory for hardware descriptors - * note: writecombine gives slightly better performance, but - * requires that we explicitly flush the writes - */ - adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev, - plat_data->pool_size, - &adev->dma_desc_pool, - GFP_KERNEL); - if (!adev->dma_desc_pool_virt) { - ret = -ENOMEM; - goto err_free_adev; - } - - dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %pad\n", - __func__, adev->dma_desc_pool_virt, &adev->dma_desc_pool); - - adev->id = plat_data->hw_id; - - /* discover transaction capabilites from the platform data */ - dma_dev->cap_mask = plat_data->cap_mask; - - adev->pdev = pdev; - platform_set_drvdata(pdev, adev); - - INIT_LIST_HEAD(&dma_dev->channels); - - /* set base routines */ - dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; - dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; - dma_dev->device_tx_status = iop_adma_status; - dma_dev->device_issue_pending = iop_adma_issue_pending; - dma_dev->dev = &pdev->dev; - - /* set prep routines based on capability */ - if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) - dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; - if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - dma_dev->max_xor = iop_adma_get_max_xor(); - dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; - } - if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) - dma_dev->device_prep_dma_xor_val = - iop_adma_prep_dma_xor_val; - if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { - dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); - dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; - } - if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) - dma_dev->device_prep_dma_pq_val = - iop_adma_prep_dma_pq_val; - if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) - dma_dev->device_prep_dma_interrupt = - iop_adma_prep_dma_interrupt; - - iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); - if (!iop_chan) { - ret = -ENOMEM; - goto err_free_dma; - } - iop_chan->device = adev; - - iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!iop_chan->mmr_base) { - ret = -ENOMEM; - goto err_free_iop_chan; - } - tasklet_setup(&iop_chan->irq_tasklet, iop_adma_tasklet); - - /* clear errors before enabling interrupts */ - iop_adma_device_clear_err_status(iop_chan); - - for (i = 0; i < 3; i++) { - static const irq_handler_t handler[] = { - iop_adma_eot_handler, - iop_adma_eoc_handler, - iop_adma_err_handler - }; - int irq = platform_get_irq(pdev, i); - if (irq < 0) { - ret = -ENXIO; - goto err_free_iop_chan; - } else { - ret = devm_request_irq(&pdev->dev, irq, - handler[i], 0, pdev->name, iop_chan); - if (ret) - goto err_free_iop_chan; - } - } - - spin_lock_init(&iop_chan->lock); - INIT_LIST_HEAD(&iop_chan->chain); - INIT_LIST_HEAD(&iop_chan->all_slots); - iop_chan->common.device = dma_dev; - dma_cookie_init(&iop_chan->common); - list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); - - if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { - ret = iop_adma_memcpy_self_test(adev); - dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); - if (ret) - goto err_free_iop_chan; - } - - if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - ret = iop_adma_xor_val_self_test(adev); - dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); - if (ret) - goto err_free_iop_chan; - } - - if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && - dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { - #ifdef CONFIG_RAID6_PQ - ret = iop_adma_pq_zero_sum_self_test(adev); - dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); - #else - /* can not test raid6, so do not publish capability */ - dma_cap_clear(DMA_PQ, dma_dev->cap_mask); - dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); - ret = 0; - #endif - if (ret) - goto err_free_iop_chan; - } - - dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", - dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", - dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", - dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", - dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); - - dma_async_device_register(dma_dev); - goto out; - - err_free_iop_chan: - kfree(iop_chan); - err_free_dma: - dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, - adev->dma_desc_pool_virt, adev->dma_desc_pool); - err_free_adev: - kfree(adev); - out: - return ret; -} - -static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *sw_desc, *grp_start; - dma_cookie_t cookie; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - - list_splice_init(&sw_desc->tx_list, &iop_chan->chain); - async_tx_ack(&sw_desc->async_tx); - iop_desc_init_memcpy(grp_start, 0); - iop_desc_set_byte_count(grp_start, iop_chan, 0); - iop_desc_set_dest_addr(grp_start, iop_chan, 0); - iop_desc_set_memcpy_src_addr(grp_start, 0); - - cookie = dma_cookie_assign(&sw_desc->async_tx); - - /* initialize the completed cookie to be less than - * the most recently used cookie - */ - iop_chan->common.completed_cookie = cookie - 1; - - /* channel should not be busy */ - BUG_ON(iop_chan_is_busy(iop_chan)); - - /* clear any prior error-status bits */ - iop_adma_device_clear_err_status(iop_chan); - - /* disable operation */ - iop_chan_disable(iop_chan); - - /* set the descriptor address */ - iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); - - /* 1/ don't add pre-chained descriptors - * 2/ dummy read to flush next_desc write - */ - BUG_ON(iop_desc_get_next_desc(sw_desc)); - - /* run the descriptor */ - iop_chan_enable(iop_chan); - } else - dev_err(iop_chan->device->common.dev, - "failed to allocate null descriptor\n"); - spin_unlock_bh(&iop_chan->lock); -} - -static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *sw_desc, *grp_start; - dma_cookie_t cookie; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->tx_list, &iop_chan->chain); - async_tx_ack(&sw_desc->async_tx); - iop_desc_init_null_xor(grp_start, 2, 0); - iop_desc_set_byte_count(grp_start, iop_chan, 0); - iop_desc_set_dest_addr(grp_start, iop_chan, 0); - iop_desc_set_xor_src_addr(grp_start, 0, 0); - iop_desc_set_xor_src_addr(grp_start, 1, 0); - - cookie = dma_cookie_assign(&sw_desc->async_tx); - - /* initialize the completed cookie to be less than - * the most recently used cookie - */ - iop_chan->common.completed_cookie = cookie - 1; - - /* channel should not be busy */ - BUG_ON(iop_chan_is_busy(iop_chan)); - - /* clear any prior error-status bits */ - iop_adma_device_clear_err_status(iop_chan); - - /* disable operation */ - iop_chan_disable(iop_chan); - - /* set the descriptor address */ - iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); - - /* 1/ don't add pre-chained descriptors - * 2/ dummy read to flush next_desc write - */ - BUG_ON(iop_desc_get_next_desc(sw_desc)); - - /* run the descriptor */ - iop_chan_enable(iop_chan); - } else - dev_err(iop_chan->device->common.dev, - "failed to allocate null descriptor\n"); - spin_unlock_bh(&iop_chan->lock); -} - -static struct platform_driver iop_adma_driver = { - .probe = iop_adma_probe, - .remove = iop_adma_remove, - .driver = { - .name = "iop-adma", - }, -}; - -module_platform_driver(iop_adma_driver); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("IOP ADMA Engine Driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:iop-adma"); diff --git a/drivers/dma/iop-adma.h b/drivers/dma/iop-adma.h deleted file mode 100644 index d44eabb6f5eb..000000000000 --- a/drivers/dma/iop-adma.h +++ /dev/null @@ -1,914 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2006, Intel Corporation. - */ -#ifndef _ADMA_H -#define _ADMA_H -#include -#include -#include - -/* Memory copy units */ -#define DMA_CCR(chan) (chan->mmr_base + 0x0) -#define DMA_CSR(chan) (chan->mmr_base + 0x4) -#define DMA_DAR(chan) (chan->mmr_base + 0xc) -#define DMA_NDAR(chan) (chan->mmr_base + 0x10) -#define DMA_PADR(chan) (chan->mmr_base + 0x14) -#define DMA_PUADR(chan) (chan->mmr_base + 0x18) -#define DMA_LADR(chan) (chan->mmr_base + 0x1c) -#define DMA_BCR(chan) (chan->mmr_base + 0x20) -#define DMA_DCR(chan) (chan->mmr_base + 0x24) - -/* Application accelerator unit */ -#define AAU_ACR(chan) (chan->mmr_base + 0x0) -#define AAU_ASR(chan) (chan->mmr_base + 0x4) -#define AAU_ADAR(chan) (chan->mmr_base + 0x8) -#define AAU_ANDAR(chan) (chan->mmr_base + 0xc) -#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2))) -#define AAU_DAR(chan) (chan->mmr_base + 0x20) -#define AAU_ABCR(chan) (chan->mmr_base + 0x24) -#define AAU_ADCR(chan) (chan->mmr_base + 0x28) -#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2))) -#define AAU_EDCR0_IDX 8 -#define AAU_EDCR1_IDX 17 -#define AAU_EDCR2_IDX 26 - -struct iop3xx_aau_desc_ctrl { - unsigned int int_en:1; - unsigned int blk1_cmd_ctrl:3; - unsigned int blk2_cmd_ctrl:3; - unsigned int blk3_cmd_ctrl:3; - unsigned int blk4_cmd_ctrl:3; - unsigned int blk5_cmd_ctrl:3; - unsigned int blk6_cmd_ctrl:3; - unsigned int blk7_cmd_ctrl:3; - unsigned int blk8_cmd_ctrl:3; - unsigned int blk_ctrl:2; - unsigned int dual_xor_en:1; - unsigned int tx_complete:1; - unsigned int zero_result_err:1; - unsigned int zero_result_en:1; - unsigned int dest_write_en:1; -}; - -struct iop3xx_aau_e_desc_ctrl { - unsigned int reserved:1; - unsigned int blk1_cmd_ctrl:3; - unsigned int blk2_cmd_ctrl:3; - unsigned int blk3_cmd_ctrl:3; - unsigned int blk4_cmd_ctrl:3; - unsigned int blk5_cmd_ctrl:3; - unsigned int blk6_cmd_ctrl:3; - unsigned int blk7_cmd_ctrl:3; - unsigned int blk8_cmd_ctrl:3; - unsigned int reserved2:7; -}; - -struct iop3xx_dma_desc_ctrl { - unsigned int pci_transaction:4; - unsigned int int_en:1; - unsigned int dac_cycle_en:1; - unsigned int mem_to_mem_en:1; - unsigned int crc_data_tx_en:1; - unsigned int crc_gen_en:1; - unsigned int crc_seed_dis:1; - unsigned int reserved:21; - unsigned int crc_tx_complete:1; -}; - -struct iop3xx_desc_dma { - u32 next_desc; - union { - u32 pci_src_addr; - u32 pci_dest_addr; - u32 src_addr; - }; - union { - u32 upper_pci_src_addr; - u32 upper_pci_dest_addr; - }; - union { - u32 local_pci_src_addr; - u32 local_pci_dest_addr; - u32 dest_addr; - }; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_dma_desc_ctrl desc_ctrl_field; - }; - u32 crc_addr; -}; - -struct iop3xx_desc_aau { - u32 next_desc; - u32 src[4]; - u32 dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - union { - u32 src_addr; - u32 e_desc_ctrl; - struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; - } src_edc[31]; -}; - -struct iop3xx_aau_gfmr { - unsigned int gfmr1:8; - unsigned int gfmr2:8; - unsigned int gfmr3:8; - unsigned int gfmr4:8; -}; - -struct iop3xx_desc_pq_xor { - u32 next_desc; - u32 src[3]; - union { - u32 data_mult1; - struct iop3xx_aau_gfmr data_mult1_field; - }; - u32 dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - union { - u32 src_addr; - u32 e_desc_ctrl; - struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; - u32 data_multiplier; - struct iop3xx_aau_gfmr data_mult_field; - u32 reserved; - } src_edc_gfmr[19]; -}; - -struct iop3xx_desc_dual_xor { - u32 next_desc; - u32 src0_addr; - u32 src1_addr; - u32 h_src_addr; - u32 d_src_addr; - u32 h_dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - u32 d_dest_addr; -}; - -union iop3xx_desc { - struct iop3xx_desc_aau *aau; - struct iop3xx_desc_dma *dma; - struct iop3xx_desc_pq_xor *pq_xor; - struct iop3xx_desc_dual_xor *dual_xor; - void *ptr; -}; - -/* No support for p+q operations */ -static inline int -iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op) -{ - BUG(); - return 0; -} - -static inline void -iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, - dma_addr_t addr, unsigned char coef) -{ - BUG(); -} - -static inline int -iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op) -{ - BUG(); - return 0; -} - -static inline void -iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) -{ - BUG(); -} - -#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr - -static inline void -iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, - dma_addr_t *src) -{ - BUG(); -} - -static inline int iop_adma_get_max_xor(void) -{ - return 32; -} - -static inline int iop_adma_get_max_pq(void) -{ - BUG(); - return 0; -} - -static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) -{ - int id = chan->device->id; - - switch (id) { - case DMA0_ID: - case DMA1_ID: - return __raw_readl(DMA_DAR(chan)); - case AAU_ID: - return __raw_readl(AAU_ADAR(chan)); - default: - BUG(); - } - return 0; -} - -static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, - u32 next_desc_addr) -{ - int id = chan->device->id; - - switch (id) { - case DMA0_ID: - case DMA1_ID: - __raw_writel(next_desc_addr, DMA_NDAR(chan)); - break; - case AAU_ID: - __raw_writel(next_desc_addr, AAU_ANDAR(chan)); - break; - } - -} - -#define IOP_ADMA_STATUS_BUSY (1 << 10) -#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024) -#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024) -#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) - -static inline int iop_chan_is_busy(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0; -} - -static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc, - int num_slots) -{ - /* num_slots will only ever be 1, 2, 4, or 8 */ - return (desc->idx & (num_slots - 1)) ? 0 : 1; -} - -/* to do: support large (i.e. > hw max) buffer sizes */ -static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) -{ - *slots_per_op = 1; - return 1; -} - -/* to do: support large (i.e. > hw max) buffer sizes */ -static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) -{ - *slots_per_op = 1; - return 1; -} - -static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - static const char slot_count_table[] = { - 1, 1, 1, 1, /* 01 - 04 */ - 2, 2, 2, 2, /* 05 - 08 */ - 4, 4, 4, 4, /* 09 - 12 */ - 4, 4, 4, 4, /* 13 - 16 */ - 8, 8, 8, 8, /* 17 - 20 */ - 8, 8, 8, 8, /* 21 - 24 */ - 8, 8, 8, 8, /* 25 - 28 */ - 8, 8, 8, 8, /* 29 - 32 */ - }; - *slots_per_op = slot_count_table[src_cnt - 1]; - return *slots_per_op; -} - -static inline int -iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return iop_chan_memcpy_slot_count(0, slots_per_op); - case AAU_ID: - return iop3xx_aau_xor_slot_count(0, 2, slots_per_op); - default: - BUG(); - } - return 0; -} - -static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); - - if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT) - return slot_cnt; - - len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; - while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) { - len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; - slot_cnt += *slots_per_op; - } - - slot_cnt += *slots_per_op; - - return slot_cnt; -} - -/* zero sum on iop3xx is limited to 1k at a time so it requires multiple - * descriptors - */ -static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); - - if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) - return slot_cnt; - - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - slot_cnt += *slots_per_op; - } - - slot_cnt += *slots_per_op; - - return slot_cnt; -} - -static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->byte_count; - case AAU_ID: - return hw_desc.aau->byte_count; - default: - BUG(); - } - return 0; -} - -/* translate the src_idx to a descriptor word index */ -static inline int __desc_idx(int src_idx) -{ - static const int desc_idx_table[] = { 0, 0, 0, 0, - 0, 1, 2, 3, - 5, 6, 7, 8, - 9, 10, 11, 12, - 14, 15, 16, 17, - 18, 19, 20, 21, - 23, 24, 25, 26, - 27, 28, 29, 30, - }; - - return desc_idx_table[src_idx]; -} - -static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - int src_idx) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->src_addr; - case AAU_ID: - break; - default: - BUG(); - } - - if (src_idx < 4) - return hw_desc.aau->src[src_idx]; - else - return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr; -} - -static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc, - int src_idx, dma_addr_t addr) -{ - if (src_idx < 4) - hw_desc->src[src_idx] = addr; - else - hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr; -} - -static inline void -iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags) -{ - struct iop3xx_desc_dma *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_dma_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - u_desc_ctrl.field.mem_to_mem_en = 1; - u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */ - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; - hw_desc->upper_pci_src_addr = 0; - hw_desc->crc_addr = 0; -} - -static inline void -iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */ - u_desc_ctrl.field.dest_write_en = 1; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; -} - -static inline u32 -iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, - unsigned long flags) -{ - int i, shift; - u32 edcr; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - switch (src_cnt) { - case 25 ... 32: - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - edcr = 0; - shift = 1; - for (i = 24; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr; - src_cnt = 24; - fallthrough; - case 17 ... 24: - if (!u_desc_ctrl.field.blk_ctrl) { - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - } - edcr = 0; - shift = 1; - for (i = 16; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr; - src_cnt = 16; - fallthrough; - case 9 ... 16: - if (!u_desc_ctrl.field.blk_ctrl) - u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ - edcr = 0; - shift = 1; - for (i = 8; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr; - src_cnt = 8; - fallthrough; - case 2 ... 8: - shift = 1; - for (i = 0; i < src_cnt; i++) { - u_desc_ctrl.value |= (1 << shift); - shift += 3; - } - - if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) - u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ - } - - u_desc_ctrl.field.dest_write_en = 1; - u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */ - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; - - return u_desc_ctrl.value; -} - -static inline void -iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags); -} - -/* return the number of operations */ -static inline int -iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - int i, j; - - hw_desc = desc->hw_desc; - - for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, j++) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags); - u_desc_ctrl.field.dest_write_en = 0; - u_desc_ctrl.field.zero_result_en = 1; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - iter->desc_ctrl = u_desc_ctrl.value; - - /* for the subsequent descriptors preserve the store queue - * and chain them together - */ - if (i) { - prev_hw_desc = - iop_hw_desc_slot_idx(hw_desc, i - slots_per_op); - prev_hw_desc->next_desc = - (u32) (desc->async_tx.phys + (i << 5)); - } - } - - return j; -} - -static inline void -iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - switch (src_cnt) { - case 25 ... 32: - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - fallthrough; - case 17 ... 24: - if (!u_desc_ctrl.field.blk_ctrl) { - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - } - hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0; - fallthrough; - case 9 ... 16: - if (!u_desc_ctrl.field.blk_ctrl) - u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ - hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0; - fallthrough; - case 1 ... 8: - if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) - u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ - } - - u_desc_ctrl.field.dest_write_en = 0; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; -} - -static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - u32 byte_count) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - hw_desc.dma->byte_count = byte_count; - break; - case AAU_ID: - hw_desc.aau->byte_count = byte_count; - break; - default: - BUG(); - } -} - -static inline void -iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - iop_desc_init_memcpy(desc, 1); - hw_desc.dma->byte_count = 0; - hw_desc.dma->dest_addr = 0; - hw_desc.dma->src_addr = 0; - break; - case AAU_ID: - iop_desc_init_null_xor(desc, 2, 1); - hw_desc.aau->byte_count = 0; - hw_desc.aau->dest_addr = 0; - hw_desc.aau->src[0] = 0; - hw_desc.aau->src[1] = 0; - break; - default: - BUG(); - } -} - -static inline void -iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) -{ - int slots_per_op = desc->slots_per_op; - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int i = 0; - - if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - hw_desc->byte_count = len; - } else { - do { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - i += slots_per_op; - } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); - - iter = iop_hw_desc_slot_idx(hw_desc, i); - iter->byte_count = len; - } -} - -static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - dma_addr_t addr) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - hw_desc.dma->dest_addr = addr; - break; - case AAU_ID: - hw_desc.aau->dest_addr = addr; - break; - default: - BUG(); - } -} - -static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, - dma_addr_t addr) -{ - struct iop3xx_desc_dma *hw_desc = desc->hw_desc; - hw_desc->src_addr = addr; -} - -static inline void -iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx, - dma_addr_t addr) -{ - - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - int i; - - for (i = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); - } -} - -static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, - int src_idx, dma_addr_t addr) -{ - - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - int i; - - for (i = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); - } -} - -static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, - u32 next_desc_addr) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - iop_paranoia(hw_desc.dma->next_desc); - hw_desc.dma->next_desc = next_desc_addr; -} - -static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - return hw_desc.dma->next_desc; -} - -static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - hw_desc.dma->next_desc = 0; -} - -static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, - u32 val) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - hw_desc->src[0] = val; -} - -static inline enum sum_check_flags -iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; - - iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err << SUM_CHECK_P; -} - -static inline void iop_chan_append(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl; - - dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - dma_chan_ctrl |= 0x2; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) -{ - return __raw_readl(DMA_CSR(chan)); -} - -static inline void iop_chan_disable(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - dma_chan_ctrl &= ~1; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline void iop_chan_enable(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - - dma_chan_ctrl |= 1; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - status &= (1 << 9); - __raw_writel(status, DMA_CSR(chan)); -} - -static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - status &= (1 << 8); - __raw_writel(status, DMA_CSR(chan)); -} - -static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1); - break; - case AAU_ID: - status &= (1 << 5); - break; - default: - BUG(); - } - - __raw_writel(status, DMA_CSR(chan)); -} - -static inline int -iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) -{ - return test_bit(5, &status); -} - -static inline int -iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(2, &status); - default: - return 0; - } -} - -static inline int -iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(3, &status); - default: - return 0; - } -} - -static inline int -iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(1, &status); - default: - return 0; - } -} -#endif /* _ADMA_H */ From 7ebe49b76a001b10b007193b1771f33e6cbc4f3f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 12:41:26 +0200 Subject: [PATCH 0193/4122] driver core: allow kobj_to_dev() to take a const pointer If a const * to a kobject is passed to kobj_to_dev(), we want to return back a const * to a device as the driver core shouldn't be modifying a constant structure. But when dealing with container_of() the pointer const attribute is cast away, so we need to manually handle this by determining the type of the pointer passed in to know the type of the pointer to pass out. Luckily _Generic can do this type of magic, and as the kernel now supports C11 it is availble to us to handle this type of build-time type detection. Cc: "Rafael J. Wysocki" Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221016104126.1259809-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..023ea50b1916 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -680,11 +680,27 @@ struct device_link { bool supplier_preactivated; /* Owned by consumer probe. */ }; -static inline struct device *kobj_to_dev(struct kobject *kobj) +static inline struct device *__kobj_to_dev(struct kobject *kobj) { return container_of(kobj, struct device, kobj); } +static inline const struct device *__kobj_to_dev_const(const struct kobject *kobj) +{ + return container_of(kobj, const struct device, kobj); +} + +/* + * container_of() will happily take a const * and spit back a non-const * as it + * is just doing pointer math. But we want to be a bit more careful in the + * driver code, so manually force any const * of a kobject to also be a const * + * to a device. + */ +#define kobj_to_dev(kobj) \ + _Generic((kobj), \ + const struct kobject *: __kobj_to_dev_const, \ + struct kobject *: __kobj_to_dev)(kobj) + /** * device_iommu_mapped - Returns true when the device DMA is translated * by an IOMMU From 593efa4091f5f05c224f8b7fd204d18dbff97e31 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 12:41:55 +0200 Subject: [PATCH 0194/4122] USB: allow some usb functions to take a const pointer. The functions to_usb_interface(), to_usb_device, and interface_to_usbdev() sometimes would like to take a const * and return a const * back. As we are doing pointer math, a call to container_of() loses the const-ness of a pointer, so use a _Generic() macro to pick the proper inline function to call instead. Link: https://lore.kernel.org/r/20221016104155.1260201-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 55 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/include/linux/usb.h b/include/linux/usb.h index 9ff1ad4dfad1..3a55131e0ad4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -258,7 +258,27 @@ struct usb_interface { struct device *usb_dev; struct work_struct reset_ws; /* for resets in atomic context */ }; -#define to_usb_interface(d) container_of(d, struct usb_interface, dev) + +static inline struct usb_interface *__to_usb_interface(struct device *d) +{ + return container_of(d, struct usb_interface, dev); +} + +static inline const struct usb_interface *__to_usb_interface_const(const struct device *d) +{ + return container_of(d, struct usb_interface, dev); +} + +/* + * container_of() will happily take a const * and spit back a non-const * as it + * is just doing pointer math. But we want to be a bit more careful in the USB + * driver code, so manually force any const * of a device to also be a const * + * to a usb_device. + */ +#define to_usb_interface(dev) \ + _Generic((dev), \ + const struct device *: __to_usb_interface_const, \ + struct device *: __to_usb_interface)(dev) static inline void *usb_get_intfdata(struct usb_interface *intf) { @@ -709,12 +729,41 @@ struct usb_device { u16 hub_delay; unsigned use_generic_driver:1; }; -#define to_usb_device(d) container_of(d, struct usb_device, dev) -static inline struct usb_device *interface_to_usbdev(struct usb_interface *intf) +static inline struct usb_device *__to_usb_device(struct device *d) +{ + return container_of(d, struct usb_device, dev); +} + +static inline const struct usb_device *__to_usb_device_const(const struct device *d) +{ + return container_of(d, struct usb_device, dev); +} + +/* + * container_of() will happily take a const * and spit back a non-const * as it + * is just doing pointer math. But we want to be a bit more careful in the USB + * driver code, so manually force any const * of a device to also be a const * + * to a usb_device. + */ +#define to_usb_device(dev) \ + _Generic((dev), \ + const struct device *: __to_usb_device_const, \ + struct device *: __to_usb_device)(dev) + +static inline struct usb_device *__intf_to_usbdev(struct usb_interface *intf) { return to_usb_device(intf->dev.parent); } +static inline const struct usb_device *__intf_to_usbdev_const(const struct usb_interface *intf) +{ + return to_usb_device((const struct device *)intf->dev.parent); +} + +#define interface_to_usbdev(intf) \ + _Generic((intf), \ + const struct usb_interface *: __intf_to_usbdev_const, \ + struct usb_interface *: __intf_to_usbdev)(intf) extern struct usb_device *usb_get_dev(struct usb_device *dev); extern void usb_put_dev(struct usb_device *dev); From 5033ac5c580cb22245a0c2b9e53d508e8fdd50d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Oct 2022 18:51:28 +0200 Subject: [PATCH 0195/4122] USB: make devnode() callback in usb_class_driver take a const * With the changes to the driver core to make more pointers const, the USB subsystem also needs to be modified to take a const * for the devnode callback so that the driver core's constant pointer will also be properly propagated. Cc: Benjamin Tissoires Cc: Juergen Stuber Reviewed-by: Johan Hovold Acked-by: Pete Zaitcev Reviewed-by: Jiri Kosina Link: https://lore.kernel.org/r/20221001165128.2688526-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 2 +- drivers/usb/class/usblp.c | 2 +- drivers/usb/misc/iowarrior.c | 2 +- drivers/usb/misc/legousbtower.c | 2 +- include/linux/usb.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 2fb2991dbe4c..59cf3ddfdf78 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -857,7 +857,7 @@ static const struct file_operations hiddev_fops = { .llseek = noop_llseek, }; -static char *hiddev_devnode(struct device *dev, umode_t *mode) +static char *hiddev_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index f27b4aecff3d..5a2e43331064 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1090,7 +1090,7 @@ static const struct file_operations usblp_fops = { .llseek = noop_llseek, }; -static char *usblp_devnode(struct device *dev, umode_t *mode) +static char *usblp_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 988a8c02e7e2..f9427a67789c 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -717,7 +717,7 @@ static const struct file_operations iowarrior_fops = { .llseek = noop_llseek, }; -static char *iowarrior_devnode(struct device *dev, umode_t *mode) +static char *iowarrior_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 1c9e09138c10..379cf01a6e96 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -245,7 +245,7 @@ static const struct file_operations tower_fops = { .llseek = tower_llseek, }; -static char *legousbtower_devnode(struct device *dev, umode_t *mode) +static char *legousbtower_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/include/linux/usb.h b/include/linux/usb.h index 3a55131e0ad4..4b463a5e4ba2 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1321,7 +1321,7 @@ struct usb_device_driver { */ struct usb_class_driver { char *name; - char *(*devnode)(struct device *dev, umode_t *mode); + char *(*devnode)(const struct device *dev, umode_t *mode); const struct file_operations *fops; int minor_base; }; From 326c3753a6358ffab607749ea0aa95d1d0ad79b0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:02 -0700 Subject: [PATCH 0196/4122] gpiolib: of: add a quirk for legacy names in Mediatek mt2701-cs42448 The driver is using non-standard "i2s1-in-sel-gpio1" and "i2s1-in-sel-gpio2" names to describe its gpios. In preparation to converting to the standard naming (i2s1-in-sel-gpios) and switching the driver to gpiod API add a quirk to gpiolib to keep compatibility with existing DTSes. Reviewed-by: Daniel Thompson Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0e4e1291604d..cef4f6634125 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -488,6 +488,38 @@ static struct gpio_desc *of_find_usb_gpio(struct device_node *np, return of_get_named_gpiod_flags(np, con_id, idx, of_flags); } +static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np, + const char *con_id, + unsigned int idx, + enum of_gpio_flags *of_flags) +{ + struct gpio_desc *desc; + const char *legacy_id; + + if (!IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448)) + return ERR_PTR(-ENOENT); + + if (!of_device_is_compatible(np, "mediatek,mt2701-cs42448-machine")) + return ERR_PTR(-ENOENT); + + if (!con_id || strcmp(con_id, "i2s1-in-sel")) + return ERR_PTR(-ENOENT); + + if (idx == 0) + legacy_id = "i2s1-in-sel-gpio1"; + else if (idx == 1) + legacy_id = "i2s1-in-sel-gpio2"; + else + return ERR_PTR(-ENOENT); + + desc = of_get_named_gpiod_flags(np, legacy_id, 0, of_flags); + if (!gpiod_not_found(desc)) + pr_info("%s is using legacy gpio name '%s' instead of '%s-gpios'\n", + of_node_full_name(np), legacy_id, con_id); + + return desc; +} + typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np, const char *con_id, unsigned int idx, @@ -498,6 +530,7 @@ static const of_find_gpio_quirk of_find_gpio_quirks[] = { of_find_regulator_gpio, of_find_arizona_gpio, of_find_usb_gpio, + of_find_mt2701_gpio, NULL }; From b311c5cba779a87e85525d351965bbd2c18111de Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:03 -0700 Subject: [PATCH 0197/4122] gpiolib: of: consolidate simple renames into a single quirk This consolidates all quirks doing simple renames (either allowing suffix-less names or trivial renames, when index changes are not required) into a single quirk. Reviewed-by: Daniel Thompson Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 183 +++++++++++++++----------------------- 1 file changed, 71 insertions(+), 112 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index cef4f6634125..63c6fa3086f3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -365,127 +365,90 @@ struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node, } EXPORT_SYMBOL_GPL(gpiod_get_from_of_node); -/* - * The SPI GPIO bindings happened before we managed to establish that GPIO - * properties should be named "foo-gpios" so we have this special kludge for - * them. - */ -static struct gpio_desc *of_find_spi_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - char prop_name[32]; /* 32 is max size of property name */ - - /* - * Hopefully the compiler stubs the rest of the function if this - * is false. - */ - if (!IS_ENABLED(CONFIG_SPI_MASTER)) - return ERR_PTR(-ENOENT); - - /* Allow this specifically for "spi-gpio" devices */ - if (!of_device_is_compatible(np, "spi-gpio") || !con_id) - return ERR_PTR(-ENOENT); - - /* Will be "gpio-sck", "gpio-mosi" or "gpio-miso" */ - snprintf(prop_name, sizeof(prop_name), "%s-%s", "gpio", con_id); - - return of_get_named_gpiod_flags(np, prop_name, idx, of_flags); -} - -/* - * The old Freescale bindings use simply "gpios" as name for the chip select - * lines rather than "cs-gpios" like all other SPI hardware. Account for this - * with a special quirk. - */ -static struct gpio_desc *of_find_spi_cs_gpio(struct device_node *np, +static struct gpio_desc *of_find_gpio_rename(struct device_node *np, const char *con_id, unsigned int idx, enum of_gpio_flags *of_flags) { - if (!IS_ENABLED(CONFIG_SPI_MASTER)) - return ERR_PTR(-ENOENT); + static const struct of_rename_gpio { + const char *con_id; + const char *legacy_id; /* NULL - same as con_id */ + /* + * Compatible string can be set to NULL in case where + * matching to a particular compatible is not practical, + * but it should only be done for gpio names that have + * vendor prefix to reduce risk of false positives. + * Addition of such entries is strongly discouraged. + */ + const char *compatible; + } gpios[] = { +#if IS_ENABLED(CONFIG_MFD_ARIZONA) + { "wlf,reset", NULL, NULL }, +#endif +#if IS_ENABLED(CONFIG_REGULATOR) + /* + * Some regulator bindings happened before we managed to + * establish that GPIO properties should be named + * "foo-gpios" so we have this special kludge for them. + */ + { "wlf,ldoena", NULL, NULL }, /* Arizona */ + { "wlf,ldo1ena", NULL, NULL }, /* WM8994 */ + { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ +#endif +#if IS_ENABLED(CONFIG_SPI_MASTER) - /* Allow this specifically for Freescale and PPC devices */ - if (!of_device_is_compatible(np, "fsl,spi") && - !of_device_is_compatible(np, "aeroflexgaisler,spictrl") && - !of_device_is_compatible(np, "ibm,ppc4xx-spi")) - return ERR_PTR(-ENOENT); - /* Allow only if asking for "cs-gpios" */ - if (!con_id || strcmp(con_id, "cs")) - return ERR_PTR(-ENOENT); + /* + * The SPI GPIO bindings happened before we managed to + * establish that GPIO properties should be named + * "foo-gpios" so we have this special kludge for them. + */ + { "miso", "gpio-miso", "spi-gpio" }, + { "mosi", "gpio-mosi", "spi-gpio" }, + { "sck", "gpio-sck", "spi-gpio" }, - /* - * While all other SPI controllers use "cs-gpios" the Freescale - * uses just "gpios" so translate to that when "cs-gpios" is - * requested. - */ - return of_get_named_gpiod_flags(np, "gpios", idx, of_flags); -} - -/* - * Some regulator bindings happened before we managed to establish that GPIO - * properties should be named "foo-gpios" so we have this special kludge for - * them. - */ -static struct gpio_desc *of_find_regulator_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - /* These are the connection IDs we accept as legacy GPIO phandles */ - const char *whitelist[] = { - "wlf,ldoena", /* Arizona */ - "wlf,ldo1ena", /* WM8994 */ - "wlf,ldo2ena", /* WM8994 */ + /* + * The old Freescale bindings use simply "gpios" as name + * for the chip select lines rather than "cs-gpios" like + * all other SPI hardware. Allow this specifically for + * Freescale and PPC devices. + */ + { "cs", "gpios", "fsl,spi" }, + { "cs", "gpios", "aeroflexgaisler,spictrl" }, + { "cs", "gpios", "ibm,ppc4xx-spi" }, +#endif +#if IS_ENABLED(CONFIG_TYPEC_FUSB302) + /* + * Fairchild FUSB302 host is using undocumented "fcs,int_n" + * property without the compulsory "-gpios" suffix. + */ + { "fcs,int_n", NULL, "fcs,fusb302" }, +#endif }; - int i; - - if (!IS_ENABLED(CONFIG_REGULATOR)) - return ERR_PTR(-ENOENT); + struct gpio_desc *desc; + const char *legacy_id; + unsigned int i; if (!con_id) return ERR_PTR(-ENOENT); - i = match_string(whitelist, ARRAY_SIZE(whitelist), con_id); - if (i < 0) - return ERR_PTR(-ENOENT); + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (strcmp(con_id, gpios[i].con_id)) + continue; - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); -} + if (gpios[i].compatible && + !of_device_is_compatible(np, gpios[i].compatible)) + continue; -static struct gpio_desc *of_find_arizona_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - if (!IS_ENABLED(CONFIG_MFD_ARIZONA)) - return ERR_PTR(-ENOENT); + legacy_id = gpios[i].legacy_id ?: gpios[i].con_id; + desc = of_get_named_gpiod_flags(np, legacy_id, idx, of_flags); + if (!gpiod_not_found(desc)) { + pr_info("%s uses legacy gpio name '%s' instead of '%s-gpios'\n", + of_node_full_name(np), legacy_id, con_id); + return desc; + } + } - if (!con_id || strcmp(con_id, "wlf,reset")) - return ERR_PTR(-ENOENT); - - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); -} - -static struct gpio_desc *of_find_usb_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - /* - * Currently this USB quirk is only for the Fairchild FUSB302 host - * which is using an undocumented DT GPIO line named "fcs,int_n" - * without the compulsory "-gpios" suffix. - */ - if (!IS_ENABLED(CONFIG_TYPEC_FUSB302)) - return ERR_PTR(-ENOENT); - - if (!con_id || strcmp(con_id, "fcs,int_n")) - return ERR_PTR(-ENOENT); - - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); + return ERR_PTR(-ENOENT); } static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np, @@ -525,11 +488,7 @@ typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np, unsigned int idx, enum of_gpio_flags *of_flags); static const of_find_gpio_quirk of_find_gpio_quirks[] = { - of_find_spi_gpio, - of_find_spi_cs_gpio, - of_find_regulator_gpio, - of_find_arizona_gpio, - of_find_usb_gpio, + of_find_gpio_rename, of_find_mt2701_gpio, NULL }; From 307c593ba5f915e308fd23a2daae7e9a5209b604 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:04 -0700 Subject: [PATCH 0198/4122] gpiolib: of: tighten selection of gpio renaming quirks Tighten selection of legacy gpio renaming quirks so that they only considered on more relevant configurations. Suggested-by: Daniel Thompson Reviewed-by: Daniel Thompson Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 63c6fa3086f3..7d4bbf6484bc 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -385,18 +385,21 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, #if IS_ENABLED(CONFIG_MFD_ARIZONA) { "wlf,reset", NULL, NULL }, #endif -#if IS_ENABLED(CONFIG_REGULATOR) + /* * Some regulator bindings happened before we managed to * establish that GPIO properties should be named * "foo-gpios" so we have this special kludge for them. */ +#if IS_ENABLED(CONFIG_REGULATOR_ARIZONA_LDO1) { "wlf,ldoena", NULL, NULL }, /* Arizona */ +#endif +#if IS_ENABLED(CONFIG_REGULATOR_WM8994) { "wlf,ldo1ena", NULL, NULL }, /* WM8994 */ { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ #endif -#if IS_ENABLED(CONFIG_SPI_MASTER) +#if IS_ENABLED(CONFIG_SPI_GPIO) /* * The SPI GPIO bindings happened before we managed to * establish that GPIO properties should be named @@ -405,6 +408,7 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "miso", "gpio-miso", "spi-gpio" }, { "mosi", "gpio-mosi", "spi-gpio" }, { "sck", "gpio-sck", "spi-gpio" }, +#endif /* * The old Freescale bindings use simply "gpios" as name @@ -412,10 +416,14 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, * all other SPI hardware. Allow this specifically for * Freescale and PPC devices. */ +#if IS_ENABLED(CONFIG_SPI_FSL_SPI) { "cs", "gpios", "fsl,spi" }, { "cs", "gpios", "aeroflexgaisler,spictrl" }, +#endif +#if IS_ENABLED(CONFIG_SPI_PPC4xx) { "cs", "gpios", "ibm,ppc4xx-spi" }, #endif + #if IS_ENABLED(CONFIG_TYPEC_FUSB302) /* * Fairchild FUSB302 host is using undocumented "fcs,int_n" From fbbbcd177a27508a47c5136b31de5cf4c8d0ab1c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:05 -0700 Subject: [PATCH 0199/4122] gpiolib: of: add quirk for locating reset lines with legacy bindings Some legacy mappings used "gpio[s]-reset" instead of "reset-gpios", add a quirk so that gpiod API will still work on unmodified DTSes. Reviewed-by: Daniel Thompson Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 7d4bbf6484bc..2b5d1b3095c7 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -382,9 +382,18 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, */ const char *compatible; } gpios[] = { +#if !IS_ENABLED(CONFIG_LCD_HX8357) + /* Himax LCD controllers used "gpios-reset" */ + { "reset", "gpios-reset", "himax,hx8357" }, + { "reset", "gpios-reset", "himax,hx8369" }, +#endif #if IS_ENABLED(CONFIG_MFD_ARIZONA) { "wlf,reset", NULL, NULL }, #endif +#if !IS_ENABLED(CONFIG_PCI_LANTIQ) + /* MIPS Lantiq PCI */ + { "reset", "gpios-reset", "lantiq,pci-xway" }, +#endif /* * Some regulator bindings happened before we managed to @@ -399,6 +408,13 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ #endif +#if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X) + { "reset", "gpio-reset", "ti,tlv320aic3x" }, + { "reset", "gpio-reset", "ti,tlv320aic33" }, + { "reset", "gpio-reset", "ti,tlv320aic3007" }, + { "reset", "gpio-reset", "ti,tlv320aic3104" }, + { "reset", "gpio-reset", "ti,tlv320aic3106" }, +#endif #if IS_ENABLED(CONFIG_SPI_GPIO) /* * The SPI GPIO bindings happened before we managed to From 9c2cc7171e08eef52110d272fdf2225d6dcd81b6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:06 -0700 Subject: [PATCH 0200/4122] gpiolib: of: add a quirk for reset line for Marvell NFC controller The controller is using non-standard "reset-n-io" name for its reset gpio property, whereas gpiod API expects "-gpios". Add a quirk so that gpiod API will still work on unmodified DTSes. Reviewed-by: Daniel Thompson Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 2b5d1b3095c7..a9cedc39a245 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -390,6 +390,16 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, #if IS_ENABLED(CONFIG_MFD_ARIZONA) { "wlf,reset", NULL, NULL }, #endif +#if IS_ENABLED(CONFIG_NFC_MRVL_I2C) + { "reset", "reset-n-io", "marvell,nfc-i2c" }, +#endif +#if IS_ENABLED(CONFIG_NFC_MRVL_SPI) + { "reset", "reset-n-io", "marvell,nfc-spi" }, +#endif +#if IS_ENABLED(CONFIG_NFC_MRVL_UART) + { "reset", "reset-n-io", "marvell,nfc-uart" }, + { "reset", "reset-n-io", "mrvl,nfc-uart" }, +#endif #if !IS_ENABLED(CONFIG_PCI_LANTIQ) /* MIPS Lantiq PCI */ { "reset", "gpios-reset", "lantiq,pci-xway" }, From 944004eb56dc977ad5f882ca4338f45396052317 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:07 -0700 Subject: [PATCH 0201/4122] gpiolib: of: add a quirk for reset line for Cirrus CS42L56 codec The controller is using non-standard "cirrus,gpio-nreset" name for its reset gpio property, whereas gpiod API expects "-gpios". Add a quirk so that gpiod API will still work on unmodified DTSes. Reviewed-by: Daniel Thompson Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a9cedc39a245..ffdbac2eeaa6 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -418,6 +418,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ #endif +#if IS_ENABLED(CONFIG_SND_SOC_CS42L56) + { "reset", "cirrus,gpio-nreset", "cirrus,cs42l56" }, +#endif #if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X) { "reset", "gpio-reset", "ti,tlv320aic3x" }, { "reset", "gpio-reset", "ti,tlv320aic33" }, From eaf1a29665cda1c767cac0d523828892bd77a842 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:08 -0700 Subject: [PATCH 0202/4122] gpiolib: of: add a quirk for legacy names in MOXA ART RTC The driver is using non-standard "gpio-rtc-data", "gpio-rtc-sclk", and "gpio-rtc-reset" names for properties describing its gpios. In preparation to converting to the standard naming ("rtc-*-gpios") and switching the driver to gpiod API add a quirk to gpiolib to keep compatibility with existing DTSes. Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ffdbac2eeaa6..d22498c72a67 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -390,6 +390,11 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, #if IS_ENABLED(CONFIG_MFD_ARIZONA) { "wlf,reset", NULL, NULL }, #endif +#if IS_ENABLED(CONFIG_RTC_DRV_MOXART) + { "rtc-data", "gpio-rtc-data", "moxa,moxart-rtc" }, + { "rtc-sclk", "gpio-rtc-sclk", "moxa,moxart-rtc" }, + { "rtc-reset", "gpio-rtc-reset", "moxa,moxart-rtc" }, +#endif #if IS_ENABLED(CONFIG_NFC_MRVL_I2C) { "reset", "reset-n-io", "marvell,nfc-i2c" }, #endif From aa1d058d48f292aa138e33ad12b7b4d18b5407cd Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 18 Oct 2022 10:32:42 +0800 Subject: [PATCH 0203/4122] kernfs: dont take i_lock on inode attr read The kernfs write lock is held when the kernfs node inode attributes are updated. Therefore, when either kernfs_iop_getattr() or kernfs_iop_permission() are called the kernfs node inode attributes won't change. Consequently concurrent kernfs_refresh_inode() calls always copy the same values from the kernfs node. So there's no need to take the inode i_lock to get consistent values for generic_fillattr() and generic_permission(), the kernfs read lock is sufficient. Cc: Tejun Heo Signed-off-by: Ian Kent Link: https://lore.kernel.org/r/166606036215.13363.1288735296954908554.stgit@donald.themaw.net Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/inode.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3d783d80f5da..74f3453f4639 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -190,10 +190,8 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns, struct kernfs_root *root = kernfs_root(kn); down_read(&root->kernfs_rwsem); - spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); generic_fillattr(&init_user_ns, inode, stat); - spin_unlock(&inode->i_lock); up_read(&root->kernfs_rwsem); return 0; @@ -288,10 +286,8 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns, root = kernfs_root(kn); down_read(&root->kernfs_rwsem); - spin_lock(&inode->i_lock); kernfs_refresh_inode(kn, inode); ret = generic_permission(&init_user_ns, inode, mask); - spin_unlock(&inode->i_lock); up_read(&root->kernfs_rwsem); return ret; From 92b57842f43014e6ca81ddf6d5d59e9ddf762e12 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 18 Oct 2022 10:32:49 +0800 Subject: [PATCH 0204/4122] kernfs: dont take i_lock on revalidate In kernfs_dop_revalidate() when the passed in dentry is negative the dentry directory is checked to see if it has changed and if so the negative dentry is discarded so it can refreshed. During this check the dentry inode i_lock is taken to mitigate against a possible concurrent rename. But if it's racing with a rename, becuase the dentry is negative, it can't be the source it must be the target and it must be going to do a d_move() otherwise the rename will return an error. In this case the parent dentry of the target will not change, it will be the same over the d_move(), only the source dentry parent may change so the inode i_lock isn't needed. Cc: Tejun Heo Signed-off-by: Ian Kent Link: https://lore.kernel.org/r/166606036967.13363.9336408133975631967.stgit@donald.themaw.net Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3990f3e270cb..6acd9c3d4cff 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1073,20 +1073,30 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) /* If the kernfs parent node has changed discard and * proceed to ->lookup. + * + * There's nothing special needed here when getting the + * dentry parent, even if a concurrent rename is in + * progress. That's because the dentry is negative so + * it can only be the target of the rename and it will + * be doing a d_move() not a replace. Consequently the + * dentry d_parent won't change over the d_move(). + * + * Also kernfs negative dentries transitioning from + * negative to positive during revalidate won't happen + * because they are invalidated on containing directory + * changes and the lookup re-done so that a new positive + * dentry can be properly created. */ - spin_lock(&dentry->d_lock); + root = kernfs_root_from_sb(dentry->d_sb); + down_read(&root->kernfs_rwsem); parent = kernfs_dentry_node(dentry->d_parent); if (parent) { - spin_unlock(&dentry->d_lock); - root = kernfs_root(parent); - down_read(&root->kernfs_rwsem); if (kernfs_dir_changed(parent, dentry)) { up_read(&root->kernfs_rwsem); return 0; } - up_read(&root->kernfs_rwsem); - } else - spin_unlock(&dentry->d_lock); + } + up_read(&root->kernfs_rwsem); /* The kernfs parent node hasn't changed, leave the * dentry negative and return success. From e3186e36925fc18384492491ebcf3da749780a30 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:09 -0700 Subject: [PATCH 0205/4122] gpiolib: of: factor out code overriding gpio line polarity There are several instances where we use a separate property to override polarity specified in gpio property. Factor it out into a separate function. Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 48 +++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index d22498c72a67..6faf0dc7bc31 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -130,6 +130,28 @@ bool of_gpio_need_valid_mask(const struct gpio_chip *gc) return false; } +/* + * Overrides stated polarity of a gpio line and warns when there is a + * discrepancy. + */ +static void of_gpio_quirk_polarity(const struct device_node *np, + bool active_high, + enum of_gpio_flags *flags) +{ + if (active_high) { + if (*flags & OF_GPIO_ACTIVE_LOW) { + pr_warn("%s GPIO handle specifies active low - ignored\n", + of_node_full_name(np)); + *flags &= ~OF_GPIO_ACTIVE_LOW; + } + } else { + if (!(*flags & OF_GPIO_ACTIVE_LOW)) + pr_info("%s enforce active low on GPIO handle\n", + of_node_full_name(np)); + *flags |= OF_GPIO_ACTIVE_LOW; + } +} + static void of_gpio_flags_quirks(const struct device_node *np, const char *propname, enum of_gpio_flags *flags, @@ -145,7 +167,7 @@ static void of_gpio_flags_quirks(const struct device_node *np, (!(strcmp(propname, "enable-gpio") && strcmp(propname, "enable-gpios")) && of_device_is_compatible(np, "regulator-gpio")))) { - bool active_low = !of_property_read_bool(np, + bool active_high = of_property_read_bool(np, "enable-active-high"); /* * The regulator GPIO handles are specified such that the @@ -153,13 +175,7 @@ static void of_gpio_flags_quirks(const struct device_node *np, * the polarity of the GPIO line. Any phandle flags must * be actively ignored. */ - if ((*flags & OF_GPIO_ACTIVE_LOW) && !active_low) { - pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(np)); - *flags &= ~OF_GPIO_ACTIVE_LOW; - } - if (active_low) - *flags |= OF_GPIO_ACTIVE_LOW; + of_gpio_quirk_polarity(np, active_high, flags); } /* * Legacy open drain handling for fixed voltage regulators. @@ -200,18 +216,10 @@ static void of_gpio_flags_quirks(const struct device_node *np, * conflict and the "spi-cs-high" flag will * take precedence. */ - if (of_property_read_bool(child, "spi-cs-high")) { - if (*flags & OF_GPIO_ACTIVE_LOW) { - pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(child)); - *flags &= ~OF_GPIO_ACTIVE_LOW; - } - } else { - if (!(*flags & OF_GPIO_ACTIVE_LOW)) - pr_info("%s enforce active low on chipselect handle\n", - of_node_full_name(child)); - *flags |= OF_GPIO_ACTIVE_LOW; - } + bool active_high = of_property_read_bool(child, + "spi-cs-high"); + of_gpio_quirk_polarity(child, active_high, + flags); of_node_put(child); break; } From b02c85c9458cdd15e2c43413d7d2541a468cde57 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:10 -0700 Subject: [PATCH 0206/4122] gpiolib: of: add quirk for phy reset polarity for Freescale Ethernet Bindings for Freescale Fast Ethernet Controller use a separate property "phy-reset-active-high" to specify polarity of its phy gpio line. To allow converting the driver to gpiod API we need to add this quirk to gpiolib. Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 6faf0dc7bc31..c2a55ffb2b20 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -231,6 +231,33 @@ static void of_gpio_flags_quirks(const struct device_node *np, !strcmp(propname, "snps,reset-gpio") && of_property_read_bool(np, "snps,reset-active-low")) *flags |= OF_GPIO_ACTIVE_LOW; + + /* + * Freescale Fast Ethernet Controller uses a separate property to + * describe polarity of the phy reset line. + */ + if (IS_ENABLED(CONFIG_FEC)) { + static const char * const fec_devices[] = { + "fsl,imx25-fec", + "fsl,imx27-fec", + "fsl,imx28-fec", + "fsl,imx6q-fec", + "fsl,mvf600-fec", + "fsl,imx6sx-fec", + "fsl,imx6ul-fec", + "fsl,imx8mq-fec", + "fsl,imx8qm-fec", + "fsl,s32v234-fec", + NULL + }; + + if (!strcmp(propname, "phy-reset-gpios") && + of_device_compatible_match(np, fec_devices)) { + bool active_high = of_property_read_bool(np, + "phy-reset-active-high"); + of_gpio_quirk_polarity(np, active_high, flags); + } + } } /** From 99d18d42c942854a073191714a311dc2420ec7d3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2022 22:41:11 -0700 Subject: [PATCH 0207/4122] gpiolib: of: add a quirk for reset line polarity for Himax LCDs Existing DTS that use legacy (non-standard) property name for the reset line "gpios-reset" also specify incorrect polarity (0 which maps to "active high"). Add a quirk to force polarity to "active low" so that once driver is converted to gpiod API that pays attention to line polarity it will work properly. Reviewed-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index c2a55ffb2b20..52616848a37c 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -152,11 +152,47 @@ static void of_gpio_quirk_polarity(const struct device_node *np, } } +/* + * This quirk does static polarity overrides in cases where existing + * DTS specified incorrect polarity. + */ +static void of_gpio_try_fixup_polarity(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags) +{ + static const struct { + const char *compatible; + const char *propname; + bool active_high; + } gpios[] = { +#if !IS_ENABLED(CONFIG_LCD_HX8357) + /* + * Himax LCD controllers used incorrectly named + * "gpios-reset" property and also specified wrong + * polarity. + */ + { "himax,hx8357", "gpios-reset", false }, + { "himax,hx8369", "gpios-reset", false }, +#endif + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (of_device_is_compatible(np, gpios[i].compatible) && + !strcmp(propname, gpios[i].propname)) { + of_gpio_quirk_polarity(np, gpios[i].active_high, flags); + break; + } + } +} + static void of_gpio_flags_quirks(const struct device_node *np, const char *propname, enum of_gpio_flags *flags, int index) { + of_gpio_try_fixup_polarity(np, propname, flags); + /* * Some GPIO fixed regulator quirks. * Note that active low is the default. From dbf53a29b28b277fa952a000245b558536c6bdd7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 19 Oct 2022 18:59:45 +0200 Subject: [PATCH 0208/4122] x86/paravirt: Fix a !PARAVIRT build warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix ./include/trace/events/xen.h:28:31: warning: ‘enum paravirt_lazy_mode’ \ declared inside parameter list will not be visible outside of this definition or declaration which turns into a build error: ./include/trace/events/xen.h:28:50: error: parameter 1 (‘mode’) has incomplete type 28 | TP_PROTO(enum paravirt_lazy_mode mode), \ due to enum paravirt_lazy_mode being visible only under CONFIG_PARAVIRT. Just pull it up where it is unconditionally visible. Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y1AtAXM8YjtBm2cj@zn.tnic --- arch/x86/include/asm/paravirt_types.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index e137d9412123..27c692791b7e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -9,6 +9,13 @@ struct paravirt_patch_site { u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; #endif #ifdef CONFIG_PARAVIRT @@ -582,13 +589,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - enum paravirt_lazy_mode paravirt_get_lazy_mode(void); void paravirt_start_context_switch(struct task_struct *prev); void paravirt_end_context_switch(struct task_struct *next); From 0c04b83d95e02c9def6e2db49fc2cab618faf949 Mon Sep 17 00:00:00 2001 From: Tanjuate Brunostar Date: Tue, 18 Oct 2022 12:01:54 +0000 Subject: [PATCH 0209/4122] staging: rts5208: split long line of code Fix checkpatch warning by splitting up a long line of code, improving code readability Signed-off-by: Tanjuate Brunostar Acked-by: Julia Lawall Link: https://lore.kernel.org/r/Y06Vsr7JVvpPem5T@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c index d1fafd530c80..4b7122add51a 100644 --- a/drivers/staging/rts5208/sd.c +++ b/drivers/staging/rts5208/sd.c @@ -4506,7 +4506,8 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip) if (CHK_SD(sd_card)) { retval = reset_sd(chip); if (retval != STATUS_SUCCESS) { - sd_card->sd_lock_status &= ~(SD_UNLOCK_POW_ON | SD_SDR_RST); + sd_card->sd_lock_status &= + ~(SD_UNLOCK_POW_ON | SD_SDR_RST); goto sd_execute_write_cmd_failed; } } From fd22186003a98daaab1ea08aaa177c041fdaa649 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Mon, 17 Oct 2022 12:02:30 +0530 Subject: [PATCH 0210/4122] staging: iio: frequency: ad9834: merge unnecessary split lines Improve code readability by merging unnecessary split lines that are well within the code-style guidelines post merge. Signed-off-by: Deepak R Varma Acked-by: Julia Lawall Link: https://lore.kernel.org/r/Y0z2/qFe3kW96MTs@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/frequency/ad9834.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c index 2b4267a87e65..285df0e489a6 100644 --- a/drivers/staging/iio/frequency/ad9834.c +++ b/drivers/staging/iio/frequency/ad9834.c @@ -331,11 +331,9 @@ static IIO_DEV_ATTR_PHASE(0, 1, 0200, NULL, ad9834_write, AD9834_REG_PHASE1); static IIO_DEV_ATTR_PHASESYMBOL(0, 0200, NULL, ad9834_write, AD9834_PSEL); static IIO_CONST_ATTR_PHASE_SCALE(0, "0.0015339808"); /* 2PI/2^12 rad*/ -static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, - ad9834_write, AD9834_PIN_SW); +static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, ad9834_write, AD9834_PIN_SW); static IIO_DEV_ATTR_OUT_ENABLE(0, 0200, NULL, ad9834_write, AD9834_RESET); -static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, - ad9834_write, AD9834_OPBITEN); +static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, ad9834_write, AD9834_OPBITEN); static IIO_DEV_ATTR_OUT_WAVETYPE(0, 0, ad9834_store_wavetype, 0); static IIO_DEV_ATTR_OUT_WAVETYPE(0, 1, ad9834_store_wavetype, 1); From febdb8eea98b1c506cf8624531a334e507c4a771 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 1 Oct 2022 19:06:18 +0200 Subject: [PATCH 0211/4122] staging: r8188eu: remove bLedLinkBlinkInProgress Remove the bLedLinkBlinkInProgress component from struct led_priv. Its only use is to block requests for "link blinking" when this blinking pattern is already active. The "link blinking" pattern is a continuous blinking pattern (as opposed to other patterns where we blink N times), it's no problem to restart this pattern when it's already running. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221001170618.444444-1-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 14 -------------- drivers/staging/r8188eu/include/rtw_led.h | 1 - 2 files changed, 15 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 1e316e6358ea..2527c252c3e9 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -26,7 +26,6 @@ static void ResetLedStatus(struct led_priv *pLed) pLed->BlinkTimes = 0; /* Number of times to toggle led state for blinking. */ - pLed->bLedLinkBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; } @@ -99,7 +98,6 @@ static void blink_work(struct work_struct *work) pLed->BlinkTimes--; if (pLed->BlinkTimes == 0) { if (check_fwstate(pmlmepriv, _FW_LINKED)) { - pLed->bLedLinkBlinkInProgress = true; pLed->CurrLedState = LED_BLINK_NORMAL; schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); } else { @@ -115,7 +113,6 @@ static void blink_work(struct work_struct *work) pLed->BlinkTimes--; if (pLed->BlinkTimes == 0) { if (check_fwstate(pmlmepriv, _FW_LINKED)) { - pLed->bLedLinkBlinkInProgress = true; pLed->CurrLedState = LED_BLINK_NORMAL; schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); } else { @@ -132,7 +129,6 @@ static void blink_work(struct work_struct *work) break; case LED_BLINK_WPS_STOP: /* WPS success */ if (!pLed->bLedOn) { - pLed->bLedLinkBlinkInProgress = true; pLed->CurrLedState = LED_BLINK_NORMAL; schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); @@ -189,23 +185,18 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) cancel_delayed_work(&pLed->blink_work); - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = false; pLed->CurrLedState = LED_BLINK_SLOWLY; schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL); break; case LED_CTL_LINK: - if (!pLed->bLedLinkBlinkInProgress) - return; - if (pLed->CurrLedState == LED_BLINK_SCAN || IS_LED_WPS_BLINKING(pLed)) return; cancel_delayed_work(&pLed->blink_work); pLed->bLedBlinkInProgress = false; - pLed->bLedLinkBlinkInProgress = true; pLed->CurrLedState = LED_BLINK_NORMAL; schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); @@ -222,7 +213,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) cancel_delayed_work(&pLed->blink_work); - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = false; pLed->bLedScanBlinkInProgress = true; @@ -240,7 +230,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) cancel_delayed_work(&pLed->blink_work); - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = true; pLed->CurrLedState = LED_BLINK_TXRX; @@ -253,7 +242,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) cancel_delayed_work(&pLed->blink_work); - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; pLed->bLedWPSBlinkInProgress = true; @@ -263,7 +251,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) case LED_CTL_STOP_WPS: cancel_delayed_work(&pLed->blink_work); - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; pLed->bLedWPSBlinkInProgress = true; @@ -283,7 +270,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) break; case LED_CTL_POWER_OFF: pLed->CurrLedState = RTW_LED_OFF; - pLed->bLedLinkBlinkInProgress = false; pLed->bLedBlinkInProgress = false; pLed->bLedWPSBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; diff --git a/drivers/staging/r8188eu/include/rtw_led.h b/drivers/staging/r8188eu/include/rtw_led.h index 8520f022a67f..f57dcf6c8b24 100644 --- a/drivers/staging/r8188eu/include/rtw_led.h +++ b/drivers/staging/r8188eu/include/rtw_led.h @@ -47,7 +47,6 @@ struct led_priv { u32 BlinkTimes; /* Number of times to toggle led state for blinking. */ - bool bLedLinkBlinkInProgress; bool bLedScanBlinkInProgress; struct delayed_work blink_work; }; From 85eba7ac39f7e86a2b473aae9d664f9ef47cad2c Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:40:29 +0200 Subject: [PATCH 0212/4122] staging: rtl8192e: Remove unchanged variable bFwCtrlLPS bFwCtrlLPS is just once initialized and never changed. The evaluation will always have the same result. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/d5b5cfd26648180d082f38085a807c932e87703c.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 15 ++++----------- drivers/staging/rtl8192e/rtllib.h | 3 --- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 89bc989cffba..10b79003c633 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -889,7 +889,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rtllib->PowerSaveControl.bInactivePs = true; priv->rtllib->PowerSaveControl.bIPSModeBackup = false; priv->rtllib->PowerSaveControl.bLeisurePs = true; - priv->rtllib->PowerSaveControl.bFwCtrlLPS = false; priv->rtllib->LPSDelayCnt = 0; priv->rtllib->sta_sleep = LPS_IS_WAKE; priv->rtllib->rf_power_state = rf_on; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index 8c00b111ddb2..49c50ec21d04 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -229,11 +229,8 @@ void rtl92e_leisure_ps_enter(struct net_device *dev) if (pPSC->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) { if (priv->rtllib->ps == RTLLIB_PS_DISABLED) { - if (!pPSC->bFwCtrlLPS) { - if (priv->rtllib->SetFwCmdHandler) - priv->rtllib->SetFwCmdHandler( - dev, FW_CMD_LPS_ENTER); - } + if (priv->rtllib->SetFwCmdHandler) + priv->rtllib->SetFwCmdHandler(dev, FW_CMD_LPS_ENTER); _rtl92e_ps_set_mode(dev, RTLLIB_PS_MBCAST | RTLLIB_PS_UNICAST); } @@ -251,12 +248,8 @@ void rtl92e_leisure_ps_leave(struct net_device *dev) if (pPSC->bLeisurePs) { if (priv->rtllib->ps != RTLLIB_PS_DISABLED) { _rtl92e_ps_set_mode(dev, RTLLIB_PS_DISABLED); - - if (!pPSC->bFwCtrlLPS) { - if (priv->rtllib->SetFwCmdHandler) - priv->rtllib->SetFwCmdHandler(dev, - FW_CMD_LPS_LEAVE); - } + if (priv->rtllib->SetFwCmdHandler) + priv->rtllib->SetFwCmdHandler(dev, FW_CMD_LPS_LEAVE); } } } diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 3c72ed2a30a4..40bea71bcb22 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1264,9 +1264,6 @@ struct rt_pwr_save_ctrl { u32 CurPsLevel; u32 RegRfPsLevel; - - bool bFwCtrlLPS; - }; #define RT_RF_CHANGE_SOURCE u32 From e77996261369bb5b202d3739270fcc5bc50c5c91 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:40:36 +0200 Subject: [PATCH 0213/4122] staging: rtl8192e: Remove unchanged variable bInactivePs bInactivePs is just once initialized and never changed. The evaluation will always have the same result. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/9f46eebf8220a06a1889eaf2d6bac74dd08cfd1f.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_cam.c | 18 ++++--- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 49 +++++++++----------- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 48 +++++++++---------- drivers/staging/rtl8192e/rtllib.h | 2 - 5 files changed, 51 insertions(+), 67 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c index 41faeb4b9b9b..8c3ce6cc2541 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c @@ -81,17 +81,15 @@ void rtl92e_set_key(struct net_device *dev, u8 EntryNo, u8 KeyIndex, enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; - if (priv->rtllib->PowerSaveControl.bInactivePs) { - if (rt_state == rf_off) { - if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) { - netdev_warn(dev, "%s(): RF is OFF.\n", - __func__); - return; - } - mutex_lock(&priv->rtllib->ips_mutex); - rtl92e_ips_leave(dev); - mutex_unlock(&priv->rtllib->ips_mutex); + if (rt_state == rf_off) { + if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) { + netdev_warn(dev, "%s(): RF is OFF.\n", + __func__); + return; } + mutex_lock(&priv->rtllib->ips_mutex); + rtl92e_ips_leave(dev); + mutex_unlock(&priv->rtllib->ips_mutex); } priv->rtllib->is_set_key = true; if (EntryNo >= TOTAL_CAM_ENTRY) { diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 10b79003c633..7c0fd2bce923 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -886,7 +886,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; priv->SetRFPowerStateInProgress = false; - priv->rtllib->PowerSaveControl.bInactivePs = true; priv->rtllib->PowerSaveControl.bIPSModeBackup = false; priv->rtllib->PowerSaveControl.bLeisurePs = true; priv->rtllib->LPSDelayCnt = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index 49c50ec21d04..fba86ef730b5 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -116,16 +116,14 @@ void rtl92e_ips_enter(struct net_device *dev) &(priv->rtllib->PowerSaveControl); enum rt_rf_power_state rt_state; - if (pPSC->bInactivePs) { - rt_state = priv->rtllib->rf_power_state; - if (rt_state == rf_on && !pPSC->bSwRfProcessing && - (priv->rtllib->state != RTLLIB_LINKED) && - (priv->rtllib->iw_mode != IW_MODE_MASTER)) { - pPSC->eInactivePowerState = rf_off; - priv->isRFOff = true; - priv->bInPowerSaveMode = true; - _rtl92e_ps_update_rf_state(dev); - } + rt_state = priv->rtllib->rf_power_state; + if (rt_state == rf_on && !pPSC->bSwRfProcessing && + (priv->rtllib->state != RTLLIB_LINKED) && + (priv->rtllib->iw_mode != IW_MODE_MASTER)) { + pPSC->eInactivePowerState = rf_off; + priv->isRFOff = true; + priv->bInPowerSaveMode = true; + _rtl92e_ps_update_rf_state(dev); } } @@ -136,14 +134,12 @@ void rtl92e_ips_leave(struct net_device *dev) &(priv->rtllib->PowerSaveControl); enum rt_rf_power_state rt_state; - if (pPSC->bInactivePs) { - rt_state = priv->rtllib->rf_power_state; - if (rt_state != rf_on && !pPSC->bSwRfProcessing && - priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) { - pPSC->eInactivePowerState = rf_on; - priv->bInPowerSaveMode = false; - _rtl92e_ps_update_rf_state(dev); - } + rt_state = priv->rtllib->rf_power_state; + if (rt_state != rf_on && !pPSC->bSwRfProcessing && + priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) { + pPSC->eInactivePowerState = rf_on; + priv->bInPowerSaveMode = false; + _rtl92e_ps_update_rf_state(dev); } } @@ -165,18 +161,15 @@ void rtl92e_rtllib_ips_leave_wq(struct net_device *dev) enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; - - if (priv->rtllib->PowerSaveControl.bInactivePs) { - if (rt_state == rf_off) { - if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) { - netdev_warn(dev, "%s(): RF is OFF.\n", - __func__); - return; - } - netdev_info(dev, "=========>%s(): rtl92e_ips_leave\n", + if (rt_state == rf_off) { + if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) { + netdev_warn(dev, "%s(): RF is OFF.\n", __func__); - schedule_work(&priv->rtllib->ips_leave_wq); + return; } + netdev_info(dev, "=========>%s(): rtl92e_ips_leave\n", + __func__); + schedule_work(&priv->rtllib->ips_leave_wq); } } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 4920cb49e381..12eea4fcb9dd 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -252,22 +252,20 @@ static int _rtl92e_wx_set_mode(struct net_device *dev, mutex_lock(&priv->wx_mutex); if (wrqu->mode == IW_MODE_ADHOC || wrqu->mode == IW_MODE_MONITOR || ieee->bNetPromiscuousMode) { - if (priv->rtllib->PowerSaveControl.bInactivePs) { - if (rt_state == rf_off) { - if (priv->rtllib->rf_off_reason > - RF_CHANGE_BY_IPS) { - netdev_warn(dev, "%s(): RF is OFF.\n", - __func__); - mutex_unlock(&priv->wx_mutex); - return -1; - } - netdev_info(dev, - "=========>%s(): rtl92e_ips_leave\n", + if (rt_state == rf_off) { + if (priv->rtllib->rf_off_reason > + RF_CHANGE_BY_IPS) { + netdev_warn(dev, "%s(): RF is OFF.\n", __func__); - mutex_lock(&priv->rtllib->ips_mutex); - rtl92e_ips_leave(dev); - mutex_unlock(&priv->rtllib->ips_mutex); + mutex_unlock(&priv->wx_mutex); + return -1; } + netdev_info(dev, + "=========>%s(): rtl92e_ips_leave\n", + __func__); + mutex_lock(&priv->rtllib->ips_mutex); + rtl92e_ips_leave(dev); + mutex_unlock(&priv->rtllib->ips_mutex); } } ret = rtllib_wx_set_mode(priv->rtllib, a, wrqu, b); @@ -414,19 +412,17 @@ static int _rtl92e_wx_set_scan(struct net_device *dev, priv->rtllib->FirstIe_InScan = true; if (priv->rtllib->state != RTLLIB_LINKED) { - if (priv->rtllib->PowerSaveControl.bInactivePs) { - if (rt_state == rf_off) { - if (priv->rtllib->rf_off_reason > - RF_CHANGE_BY_IPS) { - netdev_warn(dev, "%s(): RF is OFF.\n", - __func__); - mutex_unlock(&priv->wx_mutex); - return -1; - } - mutex_lock(&priv->rtllib->ips_mutex); - rtl92e_ips_leave(dev); - mutex_unlock(&priv->rtllib->ips_mutex); + if (rt_state == rf_off) { + if (priv->rtllib->rf_off_reason > + RF_CHANGE_BY_IPS) { + netdev_warn(dev, "%s(): RF is OFF.\n", + __func__); + mutex_unlock(&priv->wx_mutex); + return -1; } + mutex_lock(&priv->rtllib->ips_mutex); + rtl92e_ips_leave(dev); + mutex_unlock(&priv->rtllib->ips_mutex); } rtllib_stop_scan(priv->rtllib); if (priv->rtllib->LedControlHandler) diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 40bea71bcb22..d23d1c3ce39b 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1250,8 +1250,6 @@ enum rt_rf_power_state { }; struct rt_pwr_save_ctrl { - - bool bInactivePs; bool bIPSModeBackup; bool bSwRfProcessing; enum rt_rf_power_state eInactivePowerState; From 3700c365b6b204eb6d76dac7312fc3bb21faa350 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:40:49 +0200 Subject: [PATCH 0214/4122] staging: rtl8192e: Remove unused variable bIPSModeBackup bIPSModeBackup is just once initialized and never used. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/657035f84d266fd5c6f96e9b530a96c2ab4ff900.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtllib.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 7c0fd2bce923..cbb65cfea83a 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -886,7 +886,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; priv->SetRFPowerStateInProgress = false; - priv->rtllib->PowerSaveControl.bIPSModeBackup = false; priv->rtllib->PowerSaveControl.bLeisurePs = true; priv->rtllib->LPSDelayCnt = 0; priv->rtllib->sta_sleep = LPS_IS_WAKE; diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index d23d1c3ce39b..e0fa87b12ceb 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1250,7 +1250,6 @@ enum rt_rf_power_state { }; struct rt_pwr_save_ctrl { - bool bIPSModeBackup; bool bSwRfProcessing; enum rt_rf_power_state eInactivePowerState; enum ips_callback_function ReturnPoint; From 697541a0e53d16a12b00ffa4c9d204b275e68053 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:40:56 +0200 Subject: [PATCH 0215/4122] staging: rtl8192e: Remove unused variable bInPowerSaveMode bInPowerSaveMode is just once initialized and changed but never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/be3ebecd88b85ba1d87b9b3fbe02f2e78e6a669e.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 2 -- 3 files changed, 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index cbb65cfea83a..db55b9b65192 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -881,7 +881,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->hw_radio_off = false; priv->RegRfOff = false; priv->isRFOff = false; - priv->bInPowerSaveMode = false; priv->rtllib->rf_off_reason = 0; priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 7021f9c435d9..28733dc2038b 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -475,7 +475,6 @@ struct r8192_priv { bool RegRfOff; bool isRFOff; - bool bInPowerSaveMode; u8 bHwRfOffAction; bool rf_change_in_progress; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index fba86ef730b5..1ced5bcebdab 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -122,7 +122,6 @@ void rtl92e_ips_enter(struct net_device *dev) (priv->rtllib->iw_mode != IW_MODE_MASTER)) { pPSC->eInactivePowerState = rf_off; priv->isRFOff = true; - priv->bInPowerSaveMode = true; _rtl92e_ps_update_rf_state(dev); } } @@ -138,7 +137,6 @@ void rtl92e_ips_leave(struct net_device *dev) if (rt_state != rf_on && !pPSC->bSwRfProcessing && priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) { pPSC->eInactivePowerState = rf_on; - priv->bInPowerSaveMode = false; _rtl92e_ps_update_rf_state(dev); } } From 11dc999d77e4c76deb353d9923e783cdc16c3a7d Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:04 +0200 Subject: [PATCH 0216/4122] staging: rtl8192e: Remove unused variable isRFOff isRFOff is just once initialized and changed but never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/53242638126775f6698fdcfc49ac552a4e08578b.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index db55b9b65192..3cddc9a86f28 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -880,7 +880,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rtllib->wx_set_enc = 0; priv->hw_radio_off = false; priv->RegRfOff = false; - priv->isRFOff = false; priv->rtllib->rf_off_reason = 0; priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 28733dc2038b..23dccd6079ea 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -474,7 +474,6 @@ struct r8192_priv { u16 ChannelPlan; bool RegRfOff; - bool isRFOff; u8 bHwRfOffAction; bool rf_change_in_progress; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index 1ced5bcebdab..1501f7be8eee 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -121,7 +121,6 @@ void rtl92e_ips_enter(struct net_device *dev) (priv->rtllib->state != RTLLIB_LINKED) && (priv->rtllib->iw_mode != IW_MODE_MASTER)) { pPSC->eInactivePowerState = rf_off; - priv->isRFOff = true; _rtl92e_ps_update_rf_state(dev); } } From 20401e6c6a59f56d9235756168f620e7bffbae26 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:10 +0200 Subject: [PATCH 0217/4122] staging: rtl8192e: Remove unchanged variable RegRfOff RegRfOff is just once initialized with false and then set to false again. All evaluations will result in false. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/fa306d364b43fee7b81f5289309e93bb6fccdba5.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 7 +------ drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 2 -- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 18e4e5d84878..f2cbec6eb08f 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -624,9 +624,6 @@ start: } priv->pFirmware->status = FW_STATUS_0_INIT; - if (priv->RegRfOff) - priv->rtllib->rf_power_state = rf_off; - ulRegRead = rtl92e_readl(dev, CPU_GEN); if (priv->pFirmware->status == FW_STATUS_0_INIT) ulRegRead |= CPU_GEN_SYSTEM_RESET; @@ -756,9 +753,7 @@ start: rtl92e_writeb(dev, 0x87, 0x0); - if (priv->RegRfOff) { - rtl92e_set_rf_state(dev, rf_off, RF_CHANGE_BY_SW); - } else if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_PS) { + if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_PS) { rtl92e_set_rf_state(dev, rf_off, priv->rtllib->rf_off_reason); } else if (priv->rtllib->rf_off_reason >= RF_CHANGE_BY_IPS) { rtl92e_set_rf_state(dev, rf_off, priv->rtllib->rf_off_reason); diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index 1b592258e640..a44dffa76a39 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -1325,7 +1325,6 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, do { InitilizeCount--; - priv->RegRfOff = false; rtstatus = rtl92e_enable_nic(dev); } while (!rtstatus && (InitilizeCount > 0)); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 3cddc9a86f28..e77a73df9bc5 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -879,7 +879,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->RxCounter = 0; priv->rtllib->wx_set_enc = 0; priv->hw_radio_off = false; - priv->RegRfOff = false; priv->rtllib->rf_off_reason = 0; priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 23dccd6079ea..7d716fa12995 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -472,8 +472,6 @@ struct r8192_priv { u16 RegChannelPlan; u16 ChannelPlan; - - bool RegRfOff; u8 bHwRfOffAction; bool rf_change_in_progress; From ec437736505f4ba1152730d8c0ec3d0167bb358b Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:18 +0200 Subject: [PATCH 0218/4122] staging: rtl8192e: Remove unchanged variable bDisableNormalResetCheck bDisableNormalResetCheck is just once initialized with false. All evaluations will result in !false. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/026313f17cf708bf8fa0661f83b2c2b515e55b12.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 6 ++---- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index e77a73df9bc5..9dcae5d71fb9 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -871,7 +871,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->ScanDelay = 50; priv->ResetProgress = RESET_TYPE_NORESET; priv->bForcedSilentReset = false; - priv->bDisableNormalResetCheck = false; priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); @@ -1403,13 +1402,12 @@ static void _rtl92e_watchdog_wq_cb(void *data) } spin_unlock_irqrestore(&priv->tx_lock, flags); - if (!priv->bDisableNormalResetCheck && ResetType == RESET_TYPE_NORMAL) { + if (ResetType == RESET_TYPE_NORMAL) { priv->ResetProgress = RESET_TYPE_NORMAL; return; } - if (((priv->force_reset) || (!priv->bDisableNormalResetCheck && - ResetType == RESET_TYPE_SILENT))) + if ((priv->force_reset || ResetType == RESET_TYPE_SILENT)) _rtl92e_if_silent_reset(dev); priv->force_reset = false; priv->bForcedSilentReset = false; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 7d716fa12995..8a24037a93ec 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -541,7 +541,6 @@ struct r8192_priv { enum reset_type ResetProgress; bool bForcedSilentReset; - bool bDisableNormalResetCheck; u16 TxCounter; u16 RxCounter; bool bResetInProgress; From eda244c081e60a576eecaf635205c4fa5dc9032f Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:24 +0200 Subject: [PATCH 0219/4122] staging: rtl8192e: Remove unused variable bForcedSilentReset bForcedSilentReset is never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/10504e628eae110d73cd43050e7cc5801ce7f17b.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 4 ---- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - 2 files changed, 5 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 9dcae5d71fb9..ae0fcbbee2bb 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -870,7 +870,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->CckPwEnl = 6; priv->ScanDelay = 50; priv->ResetProgress = RESET_TYPE_NORESET; - priv->bForcedSilentReset = false; priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); @@ -1240,8 +1239,6 @@ RESET_START: END: priv->ResetProgress = RESET_TYPE_NORESET; priv->reset_count++; - - priv->bForcedSilentReset = false; priv->bResetInProgress = false; rtl92e_writeb(dev, UFWP, 1); @@ -1410,7 +1407,6 @@ static void _rtl92e_watchdog_wq_cb(void *data) if ((priv->force_reset || ResetType == RESET_TYPE_SILENT)) _rtl92e_if_silent_reset(dev); priv->force_reset = false; - priv->bForcedSilentReset = false; priv->bResetInProgress = false; } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 8a24037a93ec..763ed761bb38 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -540,7 +540,6 @@ struct r8192_priv { u32 reset_count; enum reset_type ResetProgress; - bool bForcedSilentReset; u16 TxCounter; u16 RxCounter; bool bResetInProgress; From 93057f8354462708e9721ad6ff6a2294a5ae8a98 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:31 +0200 Subject: [PATCH 0220/4122] staging: rtl8192e: Remove unused variable ScanDelay ScanDelay is never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/13d0b993d077490c65768f2b11a631a447f8e86f.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 2 -- 3 files changed, 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index f2cbec6eb08f..c3dcaa27fd2e 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -503,7 +503,6 @@ static void _rtl92e_read_eeprom_info(struct net_device *dev) priv->ChannelPlan = 0x0; break; case EEPROM_CID_Nettronix: - priv->ScanDelay = 100; priv->CustomerID = RT_CID_Nettronix; break; case EEPROM_CID_Pronet: diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index ae0fcbbee2bb..f07f0fc690a3 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -868,7 +868,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rfa_txpowertrackingindex = 0; priv->rfc_txpowertrackingindex = 0; priv->CckPwEnl = 6; - priv->ScanDelay = 50; priv->ResetProgress = RESET_TYPE_NORESET; priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 763ed761bb38..c536131ecd5d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -410,8 +410,6 @@ struct r8192_priv { short chan; short sens; short max_sens; - - u8 ScanDelay; bool ps_force; u32 irq_mask[2]; From 753def95543cf1a01f5487c0ae8cce3ee5180d77 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sat, 1 Oct 2022 11:41:38 +0200 Subject: [PATCH 0221/4122] staging: rtl8192e: Remove unused variable bDriverIsGoingToUnload bDriverIsGoingToUnload is never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/48a84cbe359f159cc9c296b261256a405ee3884e.1664616227.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 3 --- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index f07f0fc690a3..213aac943ef7 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -686,7 +686,6 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset) (&priv->rtllib->PowerSaveControl); bool init_status; - priv->bDriverIsGoingToUnload = false; priv->bdisable_nic = false; priv->up = 1; @@ -735,7 +734,6 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf) if (priv->rtllib->state == RTLLIB_LINKED) rtl92e_leisure_ps_leave(dev); - priv->bDriverIsGoingToUnload = true; priv->up = 0; priv->rtllib->ieee_up = 0; priv->bfirst_after_down = true; @@ -838,7 +836,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->polling_timer_on = 0; priv->up_first_time = 1; priv->blinked_ingpio = false; - priv->bDriverIsGoingToUnload = false; priv->being_init_adapter = false; priv->initialized_at_probe = false; priv->bdisable_nic = false; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index c536131ecd5d..1ae3c77e2fef 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -311,7 +311,6 @@ struct r8192_priv { bool bfirst_after_down; bool initialized_at_probe; bool being_init_adapter; - bool bDriverIsGoingToUnload; int irq; short irq_enabled; From 11247c998b00298e9b621f3121380494146c40ff Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 09:48:23 +0200 Subject: [PATCH 0222/4122] staging: r8188eu: merge odm_types.h into other headers The macros SET_TX_DESC_ANTSEL_{A,B,C}_88E are used in odm_RTL8188E.c. Move them from odm_types.h to odm_RTL8188E.h. ODM_CE is used in places where also ODM_ITRF_USB is used in the code. Move the ODM_CE define to the header that defines ODM_ITRF_USB. While at it remove an extra space between '#' and 'define ODM_ITRF_USB'. The haeder odm_types.h is now empty and we can remove it. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002074827.8566-2-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/odm.h | 3 ++- drivers/staging/r8188eu/include/odm_RTL8188E.h | 7 +++++++ drivers/staging/r8188eu/include/odm_types.h | 16 ---------------- drivers/staging/r8188eu/include/rtl8188e_hal.h | 1 - 4 files changed, 9 insertions(+), 18 deletions(-) delete mode 100644 drivers/staging/r8188eu/include/odm_types.h diff --git a/drivers/staging/r8188eu/include/odm.h b/drivers/staging/r8188eu/include/odm.h index f131e17167bf..89b01dd614ba 100644 --- a/drivers/staging/r8188eu/include/odm.h +++ b/drivers/staging/r8188eu/include/odm.h @@ -119,7 +119,8 @@ enum odm_ability_def { ODM_BB_PWR_TRA = BIT(8), }; -# define ODM_ITRF_USB 0x2 +#define ODM_ITRF_USB 0x2 +#define ODM_CE 0x04 /* ODM_CMNINFO_WM_MODE */ enum odm_wireless_mode { diff --git a/drivers/staging/r8188eu/include/odm_RTL8188E.h b/drivers/staging/r8188eu/include/odm_RTL8188E.h index 3c6471f1a893..4f16af248591 100644 --- a/drivers/staging/r8188eu/include/odm_RTL8188E.h +++ b/drivers/staging/r8188eu/include/odm_RTL8188E.h @@ -11,6 +11,13 @@ #define MAIN_ANT_CGCS_RX 0 #define AUX_ANT_CGCS_RX 1 +#define SET_TX_DESC_ANTSEL_A_88E(__ptxdesc, __value) \ + le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(24)) +#define SET_TX_DESC_ANTSEL_B_88E(__ptxdesc, __value) \ + le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(25)) +#define SET_TX_DESC_ANTSEL_C_88E(__ptxdesc, __value) \ + le32p_replace_bits((__le32 *)(__ptxdesc + 28), __value, BIT(29)) + void ODM_AntennaDiversityInit_88E(struct odm_dm_struct *pDM_Odm); void ODM_AntennaDiversity_88E(struct odm_dm_struct *pDM_Odm); diff --git a/drivers/staging/r8188eu/include/odm_types.h b/drivers/staging/r8188eu/include/odm_types.h deleted file mode 100644 index 76302df4b330..000000000000 --- a/drivers/staging/r8188eu/include/odm_types.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ -/* Copyright(c) 2007 - 2011 Realtek Corporation. */ - -#ifndef __ODM_TYPES_H__ -#define __ODM_TYPES_H__ - -#define ODM_CE 0x04 /* BIT(2) */ - -#define SET_TX_DESC_ANTSEL_A_88E(__ptxdesc, __value) \ - le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(24)) -#define SET_TX_DESC_ANTSEL_B_88E(__ptxdesc, __value) \ - le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(25)) -#define SET_TX_DESC_ANTSEL_C_88E(__ptxdesc, __value) \ - le32p_replace_bits((__le32 *)(__ptxdesc + 28), __value, BIT(29)) - -#endif /* __ODM_TYPES_H__ */ diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h index ed4091e7cc7e..a1e88e6d5c0c 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_hal.h +++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h @@ -14,7 +14,6 @@ #include "rtl8188e_xmit.h" #include "rtl8188e_cmd.h" #include "rtw_efuse.h" -#include "odm_types.h" #include "odm.h" #include "odm_HWConfig.h" #include "odm_RegDefine11N.h" From ef2a2422ab86d99e0bfd940c38c60b77c9ad9954 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 09:48:24 +0200 Subject: [PATCH 0223/4122] staging: r8188eu: convert rtw_init_evt_priv() to common error logic Convert the function rtw_init_evt_priv() to common kernel error logic. Return 0 on success and negative value on failure. This is part of getting rid of returning _SUCCESS and _FAIL which uses inverted error logic and is used all over the driver. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002074827.8566-3-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_cmd.c | 8 +++----- drivers/staging/r8188eu/include/rtw_cmd.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c index 3fadace33de6..88b939e42f5a 100644 --- a/drivers/staging/r8188eu/core/rtw_cmd.c +++ b/drivers/staging/r8188eu/core/rtw_cmd.c @@ -90,10 +90,8 @@ u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) return _SUCCESS; } -u32 rtw_init_evt_priv(struct evt_priv *pevtpriv) +int rtw_init_evt_priv(struct evt_priv *pevtpriv) { - u32 res = _SUCCESS; - /* allocate DMA-able/Non-Page memory for cmd_buf and rsp_buf */ atomic_set(&pevtpriv->event_seq, 0); @@ -101,9 +99,9 @@ u32 rtw_init_evt_priv(struct evt_priv *pevtpriv) pevtpriv->c2h_wk_alive = false; pevtpriv->c2h_queue = rtw_cbuf_alloc(C2H_QUEUE_MAX_LEN + 1); if (!pevtpriv->c2h_queue) - res = _FAIL; + return -ENOMEM; - return res; + return 0; } void rtw_free_cmd_priv(struct cmd_priv *pcmdpriv) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 9a76aa85de94..c6c48e1fb961 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -85,7 +85,7 @@ int rtw_cmd_thread(void *context); u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv); void rtw_free_cmd_priv(struct cmd_priv *pcmdpriv); -u32 rtw_init_evt_priv(struct evt_priv *pevtpriv); +int rtw_init_evt_priv(struct evt_priv *pevtpriv); void rtw_free_evt_priv(struct evt_priv *pevtpriv); void rtw_evt_notify_isr(struct evt_priv *pevtpriv); u8 p2p_protocol_wk_cmd(struct adapter *padapter, int intCmdType); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 6a45315d01a2..dd4c89d7390d 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -468,7 +468,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter) padapter->cmdpriv.padapter = padapter; - if ((rtw_init_evt_priv(&padapter->evtpriv)) == _FAIL) { + if (rtw_init_evt_priv(&padapter->evtpriv)) { dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_evt_priv failed\n"); goto free_cmd_priv; } From d4fda24757678311ff0a219bd150a3c3aeb6a2f8 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 09:48:25 +0200 Subject: [PATCH 0224/4122] staging: r8188eu: convert rtw_init_cmd_priv() to common error logic Convert the function rtw_init_cmd_priv() to common kernel error logic. Return 0 on success and negative value on failure. This is part of getting rid of returning _SUCCESS and _FAIL which uses inverted error logic and is used all over the driver. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002074827.8566-4-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_cmd.c | 8 ++++---- drivers/staging/r8188eu/include/rtw_cmd.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c index 88b939e42f5a..fdc0c71e28a4 100644 --- a/drivers/staging/r8188eu/core/rtw_cmd.c +++ b/drivers/staging/r8188eu/core/rtw_cmd.c @@ -54,7 +54,7 @@ exit: return _SUCCESS; } -u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) +int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) { init_completion(&pcmdpriv->enqueue_cmd); /* sema_init(&(pcmdpriv->cmd_done_sema), 0); */ @@ -71,7 +71,7 @@ u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) GFP_KERNEL); if (!pcmdpriv->cmd_allocated_buf) - return _FAIL; + return -ENOMEM; pcmdpriv->cmd_buf = pcmdpriv->cmd_allocated_buf + CMDBUFF_ALIGN_SZ - ((size_t)(pcmdpriv->cmd_allocated_buf) & (CMDBUFF_ALIGN_SZ - 1)); @@ -79,7 +79,7 @@ u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) if (!pcmdpriv->rsp_allocated_buf) { kfree(pcmdpriv->cmd_allocated_buf); - return _FAIL; + return -ENOMEM; } pcmdpriv->rsp_buf = pcmdpriv->rsp_allocated_buf + 4 - ((size_t)(pcmdpriv->rsp_allocated_buf) & 3); @@ -87,7 +87,7 @@ u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv) pcmdpriv->cmd_done_cnt = 0; pcmdpriv->rsp_cnt = 0; - return _SUCCESS; + return 0; } int rtw_init_evt_priv(struct evt_priv *pevtpriv) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index c6c48e1fb961..8bbfeb42604f 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -82,7 +82,7 @@ void rtw_free_cmd_obj(struct cmd_obj *pcmd); int rtw_cmd_thread(void *context); -u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv); +int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv); void rtw_free_cmd_priv(struct cmd_priv *pcmdpriv); int rtw_init_evt_priv(struct evt_priv *pevtpriv); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index dd4c89d7390d..490e0c7dc034 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -461,7 +461,7 @@ u8 rtw_reset_drv_sw(struct adapter *padapter) u8 rtw_init_drv_sw(struct adapter *padapter) { - if ((rtw_init_cmd_priv(&padapter->cmdpriv)) == _FAIL) { + if (rtw_init_cmd_priv(&padapter->cmdpriv)) { dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_cmd_priv failed\n"); return _FAIL; } From 40b3f62227d46d21eab71832e331e3aa740b1b34 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 09:48:26 +0200 Subject: [PATCH 0225/4122] staging: r8188eu: convert rtw_init_mlme_priv() to common error logic Convert the function rtw_init_mlme_priv() to common kernel error logic. Return 0 on success and negative value on failure. This is part of getting rid of returning _SUCCESS and _FAIL which uses inverted error logic and is used all over the driver. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002074827.8566-5-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme.c | 12 ++++-------- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c index 5ca03d6cac32..1f69e5c57d5d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme.c +++ b/drivers/staging/r8188eu/core/rtw_mlme.c @@ -224,7 +224,6 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct mlme_priv *pmlmepriv) u8 *pbuf; struct wlan_network *pnetwork; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; - int res = _SUCCESS; /* We don't need to memset padapter->XXX to zero, because adapter is allocated by vzalloc(). */ @@ -245,10 +244,9 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct mlme_priv *pmlmepriv) pbuf = vzalloc(MAX_BSS_CNT * (sizeof(struct wlan_network))); - if (!pbuf) { - res = _FAIL; - goto exit; - } + if (!pbuf) + return -ENOMEM; + pmlmepriv->free_bss_buf = pbuf; pnetwork = (struct wlan_network *)pbuf; @@ -265,9 +263,7 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct mlme_priv *pmlmepriv) rtw_init_mlme_timer(padapter); -exit: - - return res; + return 0; } void rtw_free_mlme_priv(struct mlme_priv *pmlmepriv) diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 490e0c7dc034..d8b8a5291e40 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -473,7 +473,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter) goto free_cmd_priv; } - if (rtw_init_mlme_priv(padapter) == _FAIL) { + if (rtw_init_mlme_priv(padapter)) { dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_mlme_priv failed\n"); goto free_evt_priv; } From fd692ab4c511a4dd5685e9e34e4ee69ddc362675 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 09:48:27 +0200 Subject: [PATCH 0226/4122] staging: r8188eu: convert _rtw_init_sta_priv() to common error logic Convert the function _rtw_init_sta_priv() to common kernel error logic. Return 0 on success and negative value on failure. This is part of getting rid of returning _SUCCESS and _FAIL which uses inverted error logic and is used all over the driver. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002074827.8566-6-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_sta_mgt.c | 6 +++--- drivers/staging/r8188eu/include/sta_info.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c index 98eeb16cab6c..bbde5c03f9ae 100644 --- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c +++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c @@ -45,7 +45,7 @@ static void _rtw_init_stainfo(struct sta_info *psta) psta->keep_alive_trycnt = 0; } -u32 _rtw_init_sta_priv(struct sta_priv *pstapriv) +int _rtw_init_sta_priv(struct sta_priv *pstapriv) { struct sta_info *psta; s32 i; @@ -53,7 +53,7 @@ u32 _rtw_init_sta_priv(struct sta_priv *pstapriv) pstapriv->pallocated_stainfo_buf = vzalloc(sizeof(struct sta_info) * NUM_STA + 4); if (!pstapriv->pallocated_stainfo_buf) - return _FAIL; + return -ENOMEM; pstapriv->pstainfo_buf = pstapriv->pallocated_stainfo_buf + 4 - ((size_t)(pstapriv->pallocated_stainfo_buf) & 3); @@ -93,7 +93,7 @@ u32 _rtw_init_sta_priv(struct sta_priv *pstapriv) pstapriv->expire_to = 3; /* 3*2 = 6 sec */ pstapriv->max_num_sta = NUM_STA; - return _SUCCESS; + return 0; } inline int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta) diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h index 4112c837bcef..f76e086b5701 100644 --- a/drivers/staging/r8188eu/include/sta_info.h +++ b/drivers/staging/r8188eu/include/sta_info.h @@ -295,7 +295,7 @@ static inline u32 wifi_mac_hash(u8 *mac) return x; } -extern u32 _rtw_init_sta_priv(struct sta_priv *pstapriv); +extern int _rtw_init_sta_priv(struct sta_priv *pstapriv); extern void _rtw_free_sta_priv(struct sta_priv *pstapriv); #define stainfo_offset_valid(offset) (offset < NUM_STA && offset >= 0) diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index d8b8a5291e40..e43ef7e5ba70 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -494,7 +494,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter) goto free_xmit_priv; } - if (_rtw_init_sta_priv(&padapter->stapriv) == _FAIL) { + if (_rtw_init_sta_priv(&padapter->stapriv)) { dev_err(dvobj_to_dev(padapter->dvobj), "_rtw_init_sta_priv failed\n"); goto free_recv_priv; } From a609750c8bf0e29060862f2f232f775129c31593 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 16:35:43 +0200 Subject: [PATCH 0227/4122] staging: r8188eu: convert rtw_reset_drv_sw() to void The function rtw_reset_drv_sw() always returns _SUCCESS. None of its callers use the return value. Convert the return type of that function from u8 to void. This is part of getting rid of _FAIL / _SUCCESS. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002143544.7974-2-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/osdep_intf.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h index 36511c469546..ce8e1bc4a61c 100644 --- a/drivers/staging/r8188eu/include/osdep_intf.h +++ b/drivers/staging/r8188eu/include/osdep_intf.h @@ -44,7 +44,7 @@ int netdev_close(struct net_device *pnetdev); u8 rtw_init_drv_sw(struct adapter *padapter); u8 rtw_free_drv_sw(struct adapter *padapter); -u8 rtw_reset_drv_sw(struct adapter *padapter); +void rtw_reset_drv_sw(struct adapter *padapter); u32 rtw_start_drv_threads(struct adapter *padapter); void rtw_stop_drv_threads (struct adapter *padapter); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index e43ef7e5ba70..2983e665bd5c 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -433,7 +433,7 @@ static void rtw_init_default_value(struct adapter *padapter) padapter->bShowGetP2PState = 1; } -u8 rtw_reset_drv_sw(struct adapter *padapter) +void rtw_reset_drv_sw(struct adapter *padapter) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; @@ -455,8 +455,6 @@ u8 rtw_reset_drv_sw(struct adapter *padapter) padapter->mlmeextpriv.sitesurvey_res.state = SCAN_DISABLE; rtw_set_signal_stat_timer(&padapter->recvpriv); - - return _SUCCESS; } u8 rtw_init_drv_sw(struct adapter *padapter) From c55f29446d175049700bdfb41a8d30fa67640d15 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 2 Oct 2022 16:35:44 +0200 Subject: [PATCH 0228/4122] staging: r8188eu: convert rtw_free_drv_sw() to void The function rtw_free_drv_sw() always returns _SUCCESS. None of its callers use the return value. Convert the return type of that function from u8 to void. This is part of getting rid of _FAIL / _SUCCESS. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221002143544.7974-3-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/osdep_intf.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h index ce8e1bc4a61c..0f7d74a3ff6d 100644 --- a/drivers/staging/r8188eu/include/osdep_intf.h +++ b/drivers/staging/r8188eu/include/osdep_intf.h @@ -43,7 +43,7 @@ int netdev_open(struct net_device *pnetdev); int netdev_close(struct net_device *pnetdev); u8 rtw_init_drv_sw(struct adapter *padapter); -u8 rtw_free_drv_sw(struct adapter *padapter); +void rtw_free_drv_sw(struct adapter *padapter); void rtw_reset_drv_sw(struct adapter *padapter); u32 rtw_start_drv_threads(struct adapter *padapter); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 2983e665bd5c..38e324754c8f 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -548,7 +548,7 @@ void rtw_cancel_all_timer(struct adapter *padapter) _cancel_timer_ex(&padapter->recvpriv.signal_stat_timer); } -u8 rtw_free_drv_sw(struct adapter *padapter) +void rtw_free_drv_sw(struct adapter *padapter) { /* we can call rtw_p2p_enable here, but: */ /* 1. rtw_p2p_enable may have IO operation */ @@ -585,8 +585,6 @@ u8 rtw_free_drv_sw(struct adapter *padapter) /* clear pbuddystruct adapter to avoid access wrong pointer. */ if (padapter->pbuddy_adapter) padapter->pbuddy_adapter->pbuddy_adapter = NULL; - - return _SUCCESS; } void netdev_br_init(struct net_device *netdev) From c21899dcab317de9007e65dbf218400782ee064c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2022 16:09:44 +0100 Subject: [PATCH 0229/4122] staging: rtl8192u: Fix spelling mistake athros -> Atheros and fix grammer There is a spellig mistake, correct it and fix capital letter on the proper noun. Also fix the grammar. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221004150944.148157-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c index b58e75932ecd..f142d0986990 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c @@ -1806,7 +1806,7 @@ int ieee80211_parse_info_param(struct ieee80211_device *ieee, info_element->data[0] == 0x00 && info_element->data[1] == 0x13 && info_element->data[2] == 0x74)) { - netdev_dbg(ieee->dev, "========> athros AP is exist\n"); + netdev_dbg(ieee->dev, "========> Atheros AP exists\n"); network->atheros_cap_exist = true; } else network->atheros_cap_exist = false; From 0fd4d8b7e87e472d54fdb4912703c1c09d12ac70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2022 16:35:39 +0100 Subject: [PATCH 0230/4122] staging: rtl8723bs: Remove redundant initialization of variable efuseValue The variable efuseValue is being initialized with a value that is never read. The variable is being re-assigned later on. The initialization is redundant and can be removed. Cleans up warning: drivers/staging/rtl8723bs/core/rtw_efuse.c:285:6: warning: variable 'efuseValue' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221004153539.150867-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_efuse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_efuse.c b/drivers/staging/rtl8723bs/core/rtw_efuse.c index 06e727ce9cc2..eb848f9bbf2c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_efuse.c +++ b/drivers/staging/rtl8723bs/core/rtw_efuse.c @@ -282,7 +282,7 @@ u8 efuse_OneByteWrite(struct adapter *padapter, u16 addr, u8 data, bool bPseudoT { u8 tmpidx = 0; u8 bResult = false; - u32 efuseValue = 0; + u32 efuseValue; if (bPseudoTest) return Efuse_Write1ByteToFakeContent(addr, data); From dcf478ab1566ceb6c7796b3830bd340e50bb3f73 Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Wed, 19 Oct 2022 12:31:10 -0700 Subject: [PATCH 0231/4122] staging: rtl8723bs: Removed extra tabs in conditional statements checkpatch found extra tabs in two conditional statements in rtw_ieee80211.c. Should be one tab instead of two. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/Y1BQfiwOXzAZpCCa@marshmallow Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ieee80211.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c index 3d8a64f69448..30e7457a9c31 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c @@ -1063,18 +1063,18 @@ void rtw_get_bcn_info(struct wlan_network *pnetwork) /* parsing HT_CAP_IE */ p = rtw_get_ie(pnetwork->network.ies + _FIXED_IE_LENGTH_, WLAN_EID_HT_CAPABILITY, &len, pnetwork->network.ie_length - _FIXED_IE_LENGTH_); if (p && len > 0) { - pht_cap = (struct ieee80211_ht_cap *)(p + 2); - pnetwork->bcn_info.ht_cap_info = le16_to_cpu(pht_cap->cap_info); + pht_cap = (struct ieee80211_ht_cap *)(p + 2); + pnetwork->bcn_info.ht_cap_info = le16_to_cpu(pht_cap->cap_info); } else { - pnetwork->bcn_info.ht_cap_info = 0; + pnetwork->bcn_info.ht_cap_info = 0; } /* parsing HT_INFO_IE */ p = rtw_get_ie(pnetwork->network.ies + _FIXED_IE_LENGTH_, WLAN_EID_HT_OPERATION, &len, pnetwork->network.ie_length - _FIXED_IE_LENGTH_); if (p && len > 0) { - pht_info = (struct HT_info_element *)(p + 2); - pnetwork->bcn_info.ht_info_infos_0 = pht_info->infos[0]; + pht_info = (struct HT_info_element *)(p + 2); + pnetwork->bcn_info.ht_info_infos_0 = pht_info->infos[0]; } else { - pnetwork->bcn_info.ht_info_infos_0 = 0; + pnetwork->bcn_info.ht_info_infos_0 = 0; } } From 1850f1598d4a991289628487648e17f2631e37df Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Wed, 19 Oct 2022 19:10:52 -0700 Subject: [PATCH 0232/4122] staging: rtl8723bs: align block comment stars Align '*' on each line of block comment in rtw_ioctl_set. Issue found by checkpatch. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/3d9738edd0992b72bf8fc8a05706a490772b5317.1666230736.git.eperi1024@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c index 8c11daff2d59..f15dbf450ff4 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c +++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c @@ -462,11 +462,11 @@ exit: } /* -* rtw_get_cur_max_rate - -* @adapter: pointer to struct adapter structure -* -* Return 0 or 100Kbps -*/ + * rtw_get_cur_max_rate - + * @adapter: pointer to struct adapter structure + * + * Return 0 or 100Kbps + */ u16 rtw_get_cur_max_rate(struct adapter *adapter) { int i = 0; From d9e57cc302f33fcbcd12fe9cca6213d520e0537c Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Wed, 19 Oct 2022 19:10:53 -0700 Subject: [PATCH 0233/4122] staging: rtl8723bs: remove unnecessary parenthesis Remove extra parenthesis in conditional statement in rtw_ioctl_set. Issue found by checkpatch. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/a08a440eabddd8e78d045ca9898a415d81f6f6d7.1666230736.git.eperi1024@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c index f15dbf450ff4..8c7daab141db 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c +++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c @@ -159,7 +159,7 @@ u8 rtw_set_802_11_ssid(struct adapter *padapter, struct ndis_802_11_ssid *ssid) if (check_fwstate(pmlmepriv, _FW_LINKED|WIFI_ADHOC_MASTER_STATE) == true) { if ((pmlmepriv->assoc_ssid.ssid_length == ssid->ssid_length) && (!memcmp(&pmlmepriv->assoc_ssid.ssid, ssid->ssid, ssid->ssid_length))) { - if ((check_fwstate(pmlmepriv, WIFI_STATION_STATE) == false)) { + if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) == false) { if (rtw_is_same_ibss(padapter, pnetwork) == false) { /* if in WIFI_ADHOC_MASTER_STATE | WIFI_ADHOC_STATE, create bss or rejoin again */ rtw_disassoc_cmd(padapter, 0, true); From b860ce04ac409386e4261931525ac3b3ac1758e1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 Oct 2022 21:29:41 +0100 Subject: [PATCH 0234/4122] staging: sm750fb: Kconfig: Fix spelling mistake "accelearion" -> "acceleration" There is a spelling mistake in a Kconfig description. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221007202941.2756304-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/sm750fb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/sm750fb/Kconfig b/drivers/staging/sm750fb/Kconfig index 8c0d8a873d5b..2101a6605efc 100644 --- a/drivers/staging/sm750fb/Kconfig +++ b/drivers/staging/sm750fb/Kconfig @@ -8,7 +8,7 @@ config FB_SM750 select FB_CFB_IMAGEBLIT help Frame buffer driver for the Silicon Motion SM750 chip - with 2D accelearion and dual head support. + with 2D acceleration and dual head support. This driver is also available as a module. The module will be called sm750fb. If you want to compile it as a module, say M From 6c2fb5dfb640ebc8b40a98b6c4337e9fb9f87ee3 Mon Sep 17 00:00:00 2001 From: Dragan Cvetic Date: Mon, 10 Oct 2022 20:02:51 +0100 Subject: [PATCH 0235/4122] staging: rtl8192e: Remove single statement braces Remove braces around single line statement, to resolve checkpatch.pl warnings "braces {} are not necessary for single statement blocks" Signed-off-by: Dragan Cvetic Link: https://lore.kernel.org/r/20221010190252.12402-1-dragan.m.cvetic@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index 702551056227..d7bfaf68291c 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -267,10 +267,8 @@ static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev) "PATH=/usr/bin:/bin", NULL}; - if (priv->ResetProgress == RESET_TYPE_SILENT) { + if (priv->ResetProgress == RESET_TYPE_SILENT) return; - } - if (priv->rtllib->state != RTLLIB_LINKED) return; call_usermodehelper(ac_dc_script, argv, envp, UMH_WAIT_PROC); @@ -330,9 +328,8 @@ static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev) bool bshort_gi_enabled = false; static u8 ping_rssi_state; - if (!priv->up) { + if (!priv->up) return; - } if (pra->rate_adaptive_disabled) return; @@ -777,9 +774,8 @@ static void _rtl92e_dm_tx_power_tracking_cb_thermal(struct net_device *dev) tmpRegA = rtl92e_get_bb_reg(dev, rOFDM0_XATxIQImbalance, bMaskDWord); for (i = 0; i < OFDM_Table_Length; i++) { - if (tmpRegA == OFDMSwingTable[i]) { + if (tmpRegA == OFDMSwingTable[i]) priv->OFDM_index[0] = i; - } } TempCCk = rtl92e_get_bb_reg(dev, rCCK0_TxFilter1, bMaskByte2); @@ -1066,9 +1062,8 @@ void rtl92e_dm_restore_state(struct net_device *dev) u32 reg_ratr = priv->rate_adaptive.last_ratr; u32 ratr_value; - if (!priv->up) { + if (!priv->up) return; - } if (priv->rate_adaptive.rate_adaptive_disabled) return; From 2122a86d426381069f279d9e6a71323cfc0da39f Mon Sep 17 00:00:00 2001 From: Dragan Cvetic Date: Mon, 10 Oct 2022 20:04:55 +0100 Subject: [PATCH 0236/4122] staging: rtl8192e: Rename CurSTAConnectState and PreSTAConnectState Rename variable CurSTAConnectState to cur_sta_connect_state, PreSTAConnectState to pre_sta_connect_state to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Dragan Cvetic Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/20221010190457.13199-2-dragan.m.cvetic@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 22 +++++++++++----------- drivers/staging/rtl8192e/rtl8192e/rtl_dm.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index d7bfaf68291c..0572ce40290e 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -1138,8 +1138,8 @@ static void _rtl92e_dm_dig_init(struct net_device *dev) dm_digtable.dig_state = DM_STA_DIG_MAX; dm_digtable.dig_highpwr_state = DM_STA_DIG_MAX; - dm_digtable.CurSTAConnectState = DIG_STA_DISCONNECT; - dm_digtable.PreSTAConnectState = DIG_STA_DISCONNECT; + dm_digtable.cur_sta_connect_state = DIG_STA_DISCONNECT; + dm_digtable.pre_sta_connect_state = DIG_STA_DISCONNECT; dm_digtable.rssi_low_thresh = DM_DIG_THRESH_LOW; dm_digtable.rssi_high_thresh = DM_DIG_THRESH_HIGH; @@ -1207,9 +1207,9 @@ static void _rtl92e_dm_ctrl_initgain_byrssi_driver(struct net_device *dev) } if (priv->rtllib->state == RTLLIB_LINKED) - dm_digtable.CurSTAConnectState = DIG_STA_CONNECT; + dm_digtable.cur_sta_connect_state = DIG_STA_CONNECT; else - dm_digtable.CurSTAConnectState = DIG_STA_DISCONNECT; + dm_digtable.cur_sta_connect_state = DIG_STA_DISCONNECT; dm_digtable.rssi_val = priv->undecorated_smoothed_pwdb; @@ -1218,7 +1218,7 @@ static void _rtl92e_dm_ctrl_initgain_byrssi_driver(struct net_device *dev) _rtl92e_dm_cs_ratio(dev); if (dm_digtable.dig_algorithm_switch) dm_digtable.dig_algorithm_switch = 0; - dm_digtable.PreSTAConnectState = dm_digtable.CurSTAConnectState; + dm_digtable.pre_sta_connect_state = dm_digtable.cur_sta_connect_state; } @@ -1368,8 +1368,8 @@ static void _rtl92e_dm_initial_gain(struct net_device *dev) return; } - if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) { - if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) { + if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) { + if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) { long gain_range = dm_digtable.rssi_val + 10 - dm_digtable.backoff_val; gain_range = clamp_t(long, gain_range, @@ -1419,8 +1419,8 @@ static void _rtl92e_dm_pd_th(struct net_device *dev) reset_cnt = 0; } - if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) { - if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) { + if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) { + if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) { if (dm_digtable.rssi_val >= dm_digtable.rssi_high_power_highthresh) dm_digtable.curpd_thstate = @@ -1487,8 +1487,8 @@ static void _rtl92e_dm_cs_ratio(struct net_device *dev) reset_cnt = 0; } - if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) { - if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) { + if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) { + if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) { if (dm_digtable.rssi_val <= dm_digtable.rssi_low_thresh) dm_digtable.curcs_ratio_state = DIG_CS_RATIO_LOWER; else if (dm_digtable.rssi_val >= dm_digtable.rssi_high_thresh) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h index 51e295d389a8..89c58e38c1f7 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h @@ -66,8 +66,8 @@ struct dig_t { u8 dig_state; u8 dig_highpwr_state; - u8 CurSTAConnectState; - u8 PreSTAConnectState; + u8 cur_sta_connect_state; + u8 pre_sta_connect_state; u8 curpd_thstate; u8 prepd_thstate; From 9adc341cdcf63a4343fdb8c45aa46cafcd8dffae Mon Sep 17 00:00:00 2001 From: Dragan Cvetic Date: Mon, 10 Oct 2022 20:04:57 +0100 Subject: [PATCH 0237/4122] staging: rtl8192e: Rename Op, Length and Value Rename variable Op to op, Length to length and Value to value to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Dragan Cvetic Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/20221010190457.13199-4-dragan.m.cvetic@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 6 +++--- drivers/staging/rtl8192e/rtl8192e/rtl_dm.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index 0572ce40290e..c9e495538e2c 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -628,9 +628,9 @@ static void _rtl92e_dm_tx_power_tracking_callback_tssi(struct net_device *dev) for (j = 0; j <= 30; j++) { - tx_cmd.Op = TXCMD_SET_TX_PWR_TRACKING; - tx_cmd.Length = 4; - tx_cmd.Value = Value; + tx_cmd.op = TXCMD_SET_TX_PWR_TRACKING; + tx_cmd.length = 4; + tx_cmd.value = Value; rtl92e_send_cmd_pkt(dev, DESC_PACKET_TYPE_NORMAL, (u8 *)&tx_cmd, sizeof(struct dcmd_txcmd)); mdelay(1); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h index 89c58e38c1f7..1d4d7d98a859 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h @@ -152,9 +152,9 @@ enum dm_cck_rx_path_method { struct dcmd_txcmd { - u32 Op; - u32 Length; - u32 Value; + u32 op; + u32 length; + u32 value; }; /*------------------------------Define structure----------------------------*/ From 04a5673391a5c56831f09b11a7735035e4a1ea62 Mon Sep 17 00:00:00 2001 From: Rui Li Date: Wed, 12 Oct 2022 22:36:33 +0800 Subject: [PATCH 0238/4122] staging: rtl8192e: remove unnecessary braces for single statement blocks This commit cleans up checkpatch warning as follows: braces {} are not necessary for single statement blocks Signed-off-by: Rui Li Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/166558541522.9.15423282339326993462.68459319@lirui.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 3 +-- drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 9 +++------ drivers/staging/rtl8192e/rtllib_softmac_wx.c | 3 +-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index c3dcaa27fd2e..210b7ecc273a 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1106,9 +1106,8 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, if (cb_desc->bHwSec) { static u8 tmp; - if (!tmp) { + if (!tmp) tmp = 1; - } switch (priv->rtllib->pairwise_key_type) { case KEY_TYPE_WEP40: case KEY_TYPE_WEP104: diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index a44dffa76a39..58da2dab55bd 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -522,9 +522,8 @@ static bool _rtl92e_bb_config_para_file(struct net_device *dev) rtStatus = rtl92e_check_bb_and_rf(dev, (enum hw90_block)eCheckItem, (enum rf90_radio_path)0); - if (!rtStatus) { + if (!rtStatus) return rtStatus; - } } rtl92e_set_bb_reg(dev, rFPGA0_RFMOD, bCCKEn|bOFDMEn, 0x0); _rtl92e_phy_config_bb(dev, BaseBand_Config_PHY_REG); @@ -1378,9 +1377,8 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, i++; } - if (i >= MAX_DOZE_WAITING_TIMES_9x) { + if (i >= MAX_DOZE_WAITING_TIMES_9x) break; - } } rtl92e_set_rf_off(dev); break; @@ -1397,9 +1395,8 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, i++; } - if (i >= MAX_DOZE_WAITING_TIMES_9x) { + if (i >= MAX_DOZE_WAITING_TIMES_9x) break; - } } if (pPSC->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC && diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index f9589c5b62ba..fdf867a5dd7a 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -571,9 +571,8 @@ int rtllib_wx_set_power(struct rtllib_device *ieee, ieee->ps = RTLLIB_PS_DISABLED; goto exit; } - if (wrqu->power.flags & IW_POWER_TIMEOUT) { + if (wrqu->power.flags & IW_POWER_TIMEOUT) ieee->ps_timeout = wrqu->power.value / 1000; - } if (wrqu->power.flags & IW_POWER_PERIOD) ieee->ps_period = wrqu->power.value / 1000; From 06b764cf60877bdd6604c173963501e16236ccdf Mon Sep 17 00:00:00 2001 From: Rigel Di Scala Date: Wed, 12 Oct 2022 21:16:12 +0000 Subject: [PATCH 0239/4122] Staging: rtl8192e: fix a brace style issue Fixed a coding style issue affecting a conditional if statement. Signed-off-by: Rigel Di Scala Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/20221012211612.75871-1-zedr@zedr.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_BAProc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index 19d13b3fcecf..e932ad1a9e96 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -180,11 +180,10 @@ static void rtllib_send_ADDBAReq(struct rtllib_device *ieee, u8 *dst, skb = rtllib_ADDBA(ieee, dst, pBA, 0, ACT_ADDBAREQ); - if (skb) { + if (skb) softmac_mgmt_xmit(skb, ieee); - } else { + else netdev_dbg(ieee->dev, "Failed to generate ADDBAReq packet.\n"); - } } static void rtllib_send_ADDBARsp(struct rtllib_device *ieee, u8 *dst, From c5997186452ae29b535fd6cb65b4f5534b52ea25 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:32 +0200 Subject: [PATCH 0240/4122] staging: r8188eu: replace one GetAddr3Ptr call Define a struct ieee80211_mgmt for the message that we process in OnDeAuth. Use this struct to read the bssid. This patch removes one GetAddr3Ptr call, getting us a tiny step closer to removing GetAddr3Ptr. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 07905e2ae8e0..0c4b3b99150d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1457,6 +1457,7 @@ report_assoc_result: unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; unsigned short reason; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -1464,8 +1465,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) u8 *pframe = precv_frame->rx_data; struct wifidirect_info *pwdinfo = &padapter->wdinfo; - /* check A3 */ - if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN))) + if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) return _SUCCESS; if (pwdinfo->rx_invitereq_info.scan_op_ch_only) { From f54ded554f04cc0e7f5edcc571d9c09581f67312 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:33 +0200 Subject: [PATCH 0241/4122] staging: r8188eu: get reason code from mgmt struct Read the deauth reson code from the newly added mgmt structure instead of calculating the offset ourselves. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 0c4b3b99150d..5c59fc91ecae 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1473,7 +1473,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) _set_timer(&pwdinfo->reset_ch_sitesurvey, 10); } - reason = le16_to_cpu(*(__le16 *)(pframe + WLAN_HDR_A3_LEN)); + reason = le16_to_cpu(mgmt->u.disassoc.reason_code); if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { struct sta_info *psta; From 8ea03e32f51475f10ebf430fece7c3f8b8fa476b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:34 +0200 Subject: [PATCH 0242/4122] staging: r8188eu: clarify the bBusyTraffic assignment bBusyTraffic is set only if we're not in WIFI_AP_STATE, i.e. in the else branch. If we were not in WIFI_AP_STATE, we'd go into the if branch and return _SUCCESS before making it to the bBusyTraffic assignment. Move the assignment into the else branch to make this clearer. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 5c59fc91ecae..fd2daeca7112 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1515,8 +1515,9 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) if (!ignore_received_deauth) receive_disconnect(padapter, GetAddr3Ptr(pframe), reason); + + pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } - pmlmepriv->LinkDetectInfo.bBusyTraffic = false; return _SUCCESS; } From 15697b04e42e399f1c74d484f130859df756b1ae Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:35 +0200 Subject: [PATCH 0243/4122] staging: r8188eu: use sa instead of Addr2 For management frames, Addr2 is the Source Address (SA). Use sa from the mgmt structure and remove the GetAddr2Ptr call. GetAddr2Ptr is a driver-specific function that we should eventually remove. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index fd2daeca7112..732ada6ab932 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1479,7 +1479,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) struct sta_info *psta; struct sta_priv *pstapriv = &padapter->stapriv; - psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe)); + psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (psta) { u8 updated = 0; From 094fbfbac3570345dbcb946dd2785bad03b680cf Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:36 +0200 Subject: [PATCH 0244/4122] staging: r8188eu: get bssid from mgmt struct For management frames, Addr3 is the BSSID. Read it from the mgmt structure instead of calling GetAddr3Ptr. The pframe variable is now unused and can be removed. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 732ada6ab932..742976c38cd5 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1462,7 +1462,6 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - u8 *pframe = precv_frame->rx_data; struct wifidirect_info *pwdinfo = &padapter->wdinfo; if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) @@ -1514,7 +1513,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) } if (!ignore_received_deauth) - receive_disconnect(padapter, GetAddr3Ptr(pframe), reason); + receive_disconnect(padapter, mgmt->bssid, reason); pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } From ece8119070de777c7a1685211c8056a2ce6d3df8 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:37 +0200 Subject: [PATCH 0245/4122] staging: r8188eu: exit for deauth from unknown station If we receive a deauth message from an unknown station, we can drop this message and exit immediately. Reorder the code to make this clearer, don't wrap everything in an if statement. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 742976c38cd5..40df0f9982f4 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1477,21 +1477,21 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { struct sta_info *psta; struct sta_priv *pstapriv = &padapter->stapriv; + u8 updated = 0; psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - u8 updated = 0; + if (!psta) + return _SUCCESS; - spin_lock_bh(&pstapriv->asoc_list_lock); - if (!list_empty(&psta->asoc_list)) { - list_del_init(&psta->asoc_list); - pstapriv->asoc_list_cnt--; - updated = ap_free_sta(padapter, psta, false, reason); - } - spin_unlock_bh(&pstapriv->asoc_list_lock); - - associated_clients_update(padapter, updated); + spin_lock_bh(&pstapriv->asoc_list_lock); + if (!list_empty(&psta->asoc_list)) { + list_del_init(&psta->asoc_list); + pstapriv->asoc_list_cnt--; + updated = ap_free_sta(padapter, psta, false, reason); } + spin_unlock_bh(&pstapriv->asoc_list_lock); + + associated_clients_update(padapter, updated); return _SUCCESS; } else { From 6325d858c48d2fefe1c93da36f74b506d7abf71d Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:38 +0200 Subject: [PATCH 0246/4122] staging: r8188eu: remove unnecessary return Remove the return statement at the end of the if branch. We can continue to the final return after the if-else. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 40df0f9982f4..465f51bce0e3 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1492,8 +1492,6 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) spin_unlock_bh(&pstapriv->asoc_list_lock); associated_clients_update(padapter, updated); - - return _SUCCESS; } else { int ignore_received_deauth = 0; From 000848a511fc77b26864f2d95c8efa853e6bf82f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:39 +0200 Subject: [PATCH 0247/4122] staging: r8188eu: summarize two flags checks Summarize the two statements to check if either WIFI_FW_AUTH_STATE or WIFI_FW_ASSOC_STATE is set. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 465f51bce0e3..09ffecc5b2b3 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1500,8 +1500,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) * However, the Win8.1 with BRCM Wi-Fi will send the deauth with reason code 6 to us after receieving our deauth. * Added the following code to avoid this case. */ - if ((pmlmeinfo->state & WIFI_FW_AUTH_STATE) || - (pmlmeinfo->state & WIFI_FW_ASSOC_STATE)) { + if (pmlmeinfo->state & (WIFI_FW_AUTH_STATE | WIFI_FW_ASSOC_STATE)) { if (reason == WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA) { ignore_received_deauth = 1; } else if (reason == WLAN_REASON_PREV_AUTH_NOT_VALID) { From e2c532aadd86fd51ebde566e6af74b1eb1be89e0 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:24:40 +0200 Subject: [PATCH 0248/4122] staging: r8188eu: ignore_received_deauth is a boolean The ignore_received_deauth is in fact a boolean variable. Change its type to bool and use true, false for its values. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Acked-by: Pavel Skripkin Link: https://lore.kernel.org/r/20221015152440.232281-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 09ffecc5b2b3..fda446b6779c 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1493,7 +1493,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) associated_clients_update(padapter, updated); } else { - int ignore_received_deauth = 0; + bool ignore_received_deauth = false; /* Before sending the auth frame to start the STA/GC mode connection with AP/GO, * we will send the deauth first. @@ -1502,10 +1502,10 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) */ if (pmlmeinfo->state & (WIFI_FW_AUTH_STATE | WIFI_FW_ASSOC_STATE)) { if (reason == WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA) { - ignore_received_deauth = 1; + ignore_received_deauth = true; } else if (reason == WLAN_REASON_PREV_AUTH_NOT_VALID) { // TODO: 802.11r - ignore_received_deauth = 1; + ignore_received_deauth = true; } } From 92b81816d5f867c2b1017970786e0f50882a33f7 Mon Sep 17 00:00:00 2001 From: Anjandev Momi Date: Fri, 14 Oct 2022 01:18:37 -0700 Subject: [PATCH 0249/4122] Staging: rtl8192e: remove unnecessary parentheses This patch removes the following CHECK generated by checkpatch.pl: ./drivers/staging/rtl8192e/rtl819x_BAProc.c:116: CHECK: Unnecessary parentheses around pBA->ba_start_seq_ctrl ./drivers/staging/rtl8192e/rtl819x_BAProc.c:261: CHECK: Unnecessary parentheses around '&pTS' ./drivers/staging/rtl8192e/rtl819x_BAProc.c:346: CHECK: Unnecessary parentheses around '&pTS' Signed-off-by: Anjandev Momi Link: https://lore.kernel.org/r/20221014081839.23902-2-anjan@momi.ca Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_BAProc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index e932ad1a9e96..f208ee1b2955 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -111,7 +111,7 @@ static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst, tag += 2; if (type == ACT_ADDBAREQ) { - memcpy(tag, (u8 *)&(pBA->ba_start_seq_ctrl), 2); + memcpy(tag, (u8 *)&pBA->ba_start_seq_ctrl, 2); tag += 2; } @@ -253,7 +253,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb) ieee->pHTInfo->bCurrentHTSupport); goto OnADDBAReq_Fail; } - if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst, + if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst, (u8)(pBaParamSet->field.tid), RX_DIR, true)) { rc = ADDBA_STATUS_REFUSED; netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__); @@ -337,7 +337,7 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb) goto OnADDBARsp_Reject; } - if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst, + if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst, (u8)(pBaParamSet->field.tid), TX_DIR, false)) { netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__); ReasonCode = DELBA_REASON_UNKNOWN_BA; From a079a4b2c9bc40b00cad01a1babf209acab1eea2 Mon Sep 17 00:00:00 2001 From: Anjandev Momi Date: Fri, 14 Oct 2022 01:18:38 -0700 Subject: [PATCH 0250/4122] Staging: rtl8192e: remove multiple blank lines This patch removes the following checks generated by checkpatch.pl: ./drivers/staging/rtl8192e/rtl819x_BAProc.c:164: CHECK: Please don't use multiple blank lines ./drivers/staging/rtl8192e/rtl819x_BAProc.c:383: CHECK: Please don't use multiple blank lines Signed-off-by: Anjandev Momi Link: https://lore.kernel.org/r/20221014081839.23902-3-anjan@momi.ca Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_BAProc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index f208ee1b2955..94131a5983f1 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -159,7 +159,6 @@ static struct sk_buff *rtllib_DELBA(struct rtllib_device *ieee, u8 *dst, *tag++ = ACT_CAT_BA; *tag++ = ACT_DELBA; - put_unaligned_le16(DelbaParamSet.short_data, tag); tag += 2; @@ -374,7 +373,6 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb) goto OnADDBARsp_Reject; } - pAdmittedBA->dialog_token = *pDialogToken; pAdmittedBA->ba_timeout_value = *pBaTimeoutVal; pAdmittedBA->ba_start_seq_ctrl = pPendingBA->ba_start_seq_ctrl; From 2f618d1167b59dc7270be86f89819cf3417c192f Mon Sep 17 00:00:00 2001 From: Anjandev Momi Date: Fri, 14 Oct 2022 01:18:39 -0700 Subject: [PATCH 0251/4122] Staging: rtl8192e: make alignment match open parenthesis This patch removes the following checks generated by checkpatch.pl: ./drivers/staging/rtl8192e/rtl819x_BAProc.c:261: CHECK: Alignment should match open parenthesis ./drivers/staging/rtl8192e/rtl819x_BAProc.c:284: CHECK: Alignment should match open parenthesis ./drivers/staging/rtl8192e/rtl819x_BAProc.c:421: CHECK: Alignment should match open parenthesis ./drivers/staging/rtl8192e/rtl819x_BAProc.c:441: CHECK: Alignment should match open parenthesis Signed-off-by: Anjandev Momi Link: https://lore.kernel.org/r/20221014081839.23902-4-anjan@momi.ca Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_BAProc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index 94131a5983f1..a7aa772778d3 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -253,7 +253,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb) goto OnADDBAReq_Fail; } if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst, - (u8)(pBaParamSet->field.tid), RX_DIR, true)) { + (u8)(pBaParamSet->field.tid), RX_DIR, true)) { rc = ADDBA_STATUS_REFUSED; netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__); goto OnADDBAReq_Fail; @@ -412,7 +412,7 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb) } if (!ieee->current_network.qos_data.active || - !ieee->pHTInfo->bCurrentHTSupport) { + !ieee->pHTInfo->bCurrentHTSupport) { netdev_warn(ieee->dev, "received DELBA while QOS or HT is not supported(%d, %d)\n", ieee->current_network. qos_data.active, @@ -432,7 +432,7 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb) struct rx_ts_record *pRxTs; if (!GetTs(ieee, (struct ts_common_info **)&pRxTs, dst, - (u8)pDelBaParamSet->field.tid, RX_DIR, false)) { + (u8)pDelBaParamSet->field.tid, RX_DIR, false)) { netdev_warn(ieee->dev, "%s(): can't get TS for RXTS. dst:%pM TID:%d\n", __func__, dst, From 98703e4264afe64cbf2c5a4b6589d6e2a3068833 Mon Sep 17 00:00:00 2001 From: Anjandev Momi Date: Fri, 14 Oct 2022 01:18:40 -0700 Subject: [PATCH 0252/4122] Staging: rtl8192e: add blank line after function declaration This patch removes the following check generated by checkpatch.pl ./drivers/staging/rtl8192e/rtl819x_BAProc.c:65: CHECK: Please use a blank line after function/struct/union/enum declarations Signed-off-by: Anjandev Momi Link: https://lore.kernel.org/r/20221014081839.23902-5-anjan@momi.ca Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_BAProc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index a7aa772778d3..7c0369319f97 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -62,6 +62,7 @@ void ResetBaEntry(struct ba_record *pBA) pBA->dialog_token = 0; pBA->ba_start_seq_ctrl.short_data = 0; } + static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst, struct ba_record *pBA, u16 StatusCode, u8 type) From 12c6223fc1804fd9295dc50d358294539b4a4184 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:06 +0200 Subject: [PATCH 0253/4122] staging: r8188eu: fix led register settings Using an InterTech DMG-02 dongle, the led remains on when the system goes into standby mode. After wakeup, it's no longer possible to control the led. It turned out that the register settings to enable or disable the led were not correct. They worked for some dongles like the Edimax V2 but not for others like the InterTech DMG-02. This patch fixes the register settings. Bit 3 in the led_cfg2 register controls the led status, bit 5 must always be set to be able to control the led, bit 6 has no influence on the led. Setting the mac_pinmux_cfg register is not necessary. These settings were tested with Edimax V2 and InterTech DMG-02. Cc: stable@vger.kernel.org Fixes: 8cd574e6af54 ("staging: r8188eu: introduce new hal dir for RTL8188eu driver") Suggested-by: Michael Straube Signed-off-by: Martin Kaiser Tested-by: Michael Straube # InterTech DMG-02, Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 2527c252c3e9..5b214488571b 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -31,40 +31,19 @@ static void ResetLedStatus(struct led_priv *pLed) static void SwLedOn(struct adapter *padapter, struct led_priv *pLed) { - u8 LedCfg; - int res; - if (padapter->bDriverStopped) return; - res = rtw_read8(padapter, REG_LEDCFG2, &LedCfg); - if (res) - return; - - rtw_write8(padapter, REG_LEDCFG2, (LedCfg & 0xf0) | BIT(5) | BIT(6)); /* SW control led0 on. */ + rtw_write8(padapter, REG_LEDCFG2, BIT(5)); /* SW control led0 on. */ pLed->bLedOn = true; } static void SwLedOff(struct adapter *padapter, struct led_priv *pLed) { - u8 LedCfg; - int res; - if (padapter->bDriverStopped) goto exit; - res = rtw_read8(padapter, REG_LEDCFG2, &LedCfg);/* 0x4E */ - if (res) - goto exit; - - LedCfg &= 0x90; /* Set to software control. */ - rtw_write8(padapter, REG_LEDCFG2, (LedCfg | BIT(3))); - res = rtw_read8(padapter, REG_MAC_PINMUX_CFG, &LedCfg); - if (res) - goto exit; - - LedCfg &= 0xFE; - rtw_write8(padapter, REG_MAC_PINMUX_CFG, LedCfg); + rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3)); exit: pLed->bLedOn = false; } From e5931b7ba5a9ccfea6e36f9f07eac89091782e9b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:07 +0200 Subject: [PATCH 0254/4122] staging: r8188eu: handle rtw_write8 errors in SwLedOn Check the status returned by rtw_write8. Update bLedOn only if we could update the REG_LEDCFG2 register. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 5b214488571b..4f1cad890cae 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -34,7 +34,9 @@ static void SwLedOn(struct adapter *padapter, struct led_priv *pLed) if (padapter->bDriverStopped) return; - rtw_write8(padapter, REG_LEDCFG2, BIT(5)); /* SW control led0 on. */ + if (rtw_write8(padapter, REG_LEDCFG2, BIT(5)) != _SUCCESS) + return; + pLed->bLedOn = true; } From ef032c4fa43655715a39378d23cf3cc943cfdafa Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:08 +0200 Subject: [PATCH 0255/4122] staging: r8188eu: fix status updates in SwLedOff Update bLedOn only if we could update the REG_LEDCFG2 register. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 4f1cad890cae..38433296d327 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -43,10 +43,11 @@ static void SwLedOn(struct adapter *padapter, struct led_priv *pLed) static void SwLedOff(struct adapter *padapter, struct led_priv *pLed) { if (padapter->bDriverStopped) - goto exit; + return; + + if (rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3)) != _SUCCESS) + return; - rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3)); -exit: pLed->bLedOn = false; } From c16a98833a9979c15febdc60a3ab5d00c72c1b5e Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:09 +0200 Subject: [PATCH 0256/4122] staging: r8188eu: SwLedOn needs no padapter parameter Remove the padapter parameter from the SwLedOn function. padapter can be derived from the pLed parameter. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 38433296d327..aa8f41edfade 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -29,8 +29,10 @@ static void ResetLedStatus(struct led_priv *pLed) pLed->bLedScanBlinkInProgress = false; } -static void SwLedOn(struct adapter *padapter, struct led_priv *pLed) +static void SwLedOn(struct led_priv *pLed) { + struct adapter *padapter = container_of(pLed, struct adapter, ledpriv); + if (padapter->bDriverStopped) return; @@ -67,7 +69,7 @@ static void blink_work(struct work_struct *work) if (pLed->bLedOn) SwLedOff(padapter, pLed); else - SwLedOn(padapter, pLed); + SwLedOn(pLed); switch (pLed->CurrLedState) { case LED_BLINK_SLOWLY: From 728a14bf8f1e095f6edddcf15f70c9307ec1f0ca Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:10 +0200 Subject: [PATCH 0257/4122] staging: r8188eu: SwLedOff needs no padapter parameter Remove the padapter parameter from the SwLedOff function. padapter can be derived from the pLed parameter. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index aa8f41edfade..56f043d8ff38 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -42,8 +42,10 @@ static void SwLedOn(struct led_priv *pLed) pLed->bLedOn = true; } -static void SwLedOff(struct adapter *padapter, struct led_priv *pLed) +static void SwLedOff(struct led_priv *pLed) { + struct adapter *padapter = container_of(pLed, struct adapter, ledpriv); + if (padapter->bDriverStopped) return; @@ -61,13 +63,13 @@ static void blink_work(struct work_struct *work) struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (padapter->pwrctrlpriv.rf_pwrstate != rf_on) { - SwLedOff(padapter, pLed); + SwLedOff(pLed); ResetLedStatus(pLed); return; } if (pLed->bLedOn) - SwLedOff(padapter, pLed); + SwLedOff(pLed); else SwLedOn(pLed); @@ -141,7 +143,7 @@ void rtl8188eu_DeInitSwLeds(struct adapter *padapter) cancel_delayed_work_sync(&ledpriv->blink_work); ResetLedStatus(ledpriv); - SwLedOff(padapter, ledpriv); + SwLedOff(ledpriv); } void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) @@ -258,7 +260,7 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) pLed->bLedWPSBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; cancel_delayed_work(&pLed->blink_work); - SwLedOff(padapter, pLed); + SwLedOff(pLed); break; default: break; From 4fc4de550eb0fb47514ee0f37f9a0abd4d4bf2c9 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:11 +0200 Subject: [PATCH 0258/4122] staging: r8188eu: remove two unused defines The C2H_MEM_SZ and FREE_CMDOBJ_SZ defines are not used by the r8188eu driver. Remove them. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_cmd.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 8bbfeb42604f..0cb054909bc6 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -8,13 +8,9 @@ #include "rtw_rf.h" #include "rtw_led.h" -#define C2H_MEM_SZ (16*1024) - #include "osdep_service.h" #include "ieee80211.h" /* */ -#define FREE_CMDOBJ_SZ 128 - #define MAX_CMDSZ 1024 #define MAX_RSPSZ 512 #define MAX_EVTSZ 1024 From 5ecf2bb6b1f9659e79e99465c327bb97234bfdaf Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:12 +0200 Subject: [PATCH 0259/4122] staging: r8188eu: don't include rtw_led.h from rtw_cmd.h The rtw_cmd.h does not need any definitions from the led layer, there's no reason to include rtw_led.h. When I tried to remove this component struct led_priv { struct adapter *padapter; ... I saw compiler errors because of this chain of include files: drv_types.h -> rtw_cmd.h -> rtw_led.h rtw_led.h uses struct adapter before it sees the definiton near the end of drv_types.h. (It seems that a simple struct adapter * prevents this problem.) The best option for fixing this issue is to not include rtw_led.h in rtw_cmd.h. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_cmd.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 0cb054909bc6..ee9218b1d7a9 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -6,7 +6,6 @@ #include "wlan_bssdef.h" #include "rtw_rf.h" -#include "rtw_led.h" #include "osdep_service.h" #include "ieee80211.h" /* */ From 1188cfa646ba2a7c295fe0cfac24529aec28ff2f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:13 +0200 Subject: [PATCH 0260/4122] staging: r8188eu: remove padapter from struct led_priv The only struct led_priv that's used in the r8188eu driver in embedded in the driver's global struct adapter. We can use container_of to access the "outer" structure, there's no need to store a pointer to it. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 3 +-- drivers/staging/r8188eu/include/rtw_led.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 56f043d8ff38..2dbd7b5ffdd0 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -59,7 +59,7 @@ static void blink_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct led_priv *pLed = container_of(dwork, struct led_priv, blink_work); - struct adapter *padapter = pLed->padapter; + struct adapter *padapter = container_of(pLed, struct adapter, ledpriv); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (padapter->pwrctrlpriv.rf_pwrstate != rf_on) { @@ -132,7 +132,6 @@ void rtl8188eu_InitSwLeds(struct adapter *padapter) { struct led_priv *pledpriv = &padapter->ledpriv; - pledpriv->padapter = padapter; ResetLedStatus(pledpriv); INIT_DELAYED_WORK(&pledpriv->blink_work, blink_work); } diff --git a/drivers/staging/r8188eu/include/rtw_led.h b/drivers/staging/r8188eu/include/rtw_led.h index f57dcf6c8b24..ea5f5edd9013 100644 --- a/drivers/staging/r8188eu/include/rtw_led.h +++ b/drivers/staging/r8188eu/include/rtw_led.h @@ -33,8 +33,6 @@ enum LED_STATE_871x { }; struct led_priv { - struct adapter *padapter; - bool bRegUseLed; enum LED_STATE_871x CurrLedState; /* Current LED state. */ From 126647d113dc6adf06cc4997ef8026f1972c34f7 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:14 +0200 Subject: [PATCH 0261/4122] staging: r8188eu: set two more state variables Set two more state variables in the blink worker when scan blinking and tx/rx blinking are finished. bLedBlinkInProgress is true during tx/rx blinking, bLedScanBlinkInProgress is true during scan blinking. If we doing neither of the two, we may safely set both variables to false. This change makes the scan and tx/rx cases almost identical, we are now ready to summarize the two cases. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index 2dbd7b5ffdd0..f8bd183fba1e 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -90,6 +90,7 @@ static void blink_work(struct work_struct *work) pLed->CurrLedState = LED_BLINK_SLOWLY; schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL); } + pLed->bLedBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; } else { schedule_delayed_work(&pLed->blink_work, LED_BLINK_SCAN_INTVL); @@ -106,6 +107,7 @@ static void blink_work(struct work_struct *work) schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL); } pLed->bLedBlinkInProgress = false; + pLed->bLedScanBlinkInProgress = false; } else { schedule_delayed_work(&pLed->blink_work, LED_BLINK_FASTER_INTVL); } From e1445e7b003b2b2eace1145f58670200ddadd100 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 15 Oct 2022 17:11:15 +0200 Subject: [PATCH 0262/4122] staging: r8188eu: summarize tx/rx and scan blinking Summarize the code for tx/rx blinking and for scan blinking in blink_work. The only difference is the delay for scheduling the next worker. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221015151115.232095-11-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index f8bd183fba1e..ce8de2eb7845 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -81,21 +81,6 @@ static void blink_work(struct work_struct *work) schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); break; case LED_BLINK_SCAN: - pLed->BlinkTimes--; - if (pLed->BlinkTimes == 0) { - if (check_fwstate(pmlmepriv, _FW_LINKED)) { - pLed->CurrLedState = LED_BLINK_NORMAL; - schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL); - } else { - pLed->CurrLedState = LED_BLINK_SLOWLY; - schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL); - } - pLed->bLedBlinkInProgress = false; - pLed->bLedScanBlinkInProgress = false; - } else { - schedule_delayed_work(&pLed->blink_work, LED_BLINK_SCAN_INTVL); - } - break; case LED_BLINK_TXRX: pLed->BlinkTimes--; if (pLed->BlinkTimes == 0) { @@ -109,7 +94,9 @@ static void blink_work(struct work_struct *work) pLed->bLedBlinkInProgress = false; pLed->bLedScanBlinkInProgress = false; } else { - schedule_delayed_work(&pLed->blink_work, LED_BLINK_FASTER_INTVL); + schedule_delayed_work(&pLed->blink_work, + pLed->CurrLedState == LED_BLINK_SCAN ? + LED_BLINK_SCAN_INTVL : LED_BLINK_FASTER_INTVL); } break; case LED_BLINK_WPS: From 31760f04cae2019b6eb4617af4409d8eaf0ac7ac Mon Sep 17 00:00:00 2001 From: Danijel Korent Date: Sat, 15 Oct 2022 18:50:23 +0200 Subject: [PATCH 0263/4122] staging: rtl8192e: Added spaces around operators in rtl_cam.c/rtl_eeprom.c Fixed "spaces preferred around operator" type of problems reported by checkpatch Signed-off-by: Danijel Korent Link: https://lore.kernel.org/r/20221015165023.487200-1-danijel.korent@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_cam.c | 23 +++++++++---------- .../staging/rtl8192e/rtl8192e/rtl_eeprom.c | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c index 8c3ce6cc2541..9d8d4837e6b2 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c @@ -17,7 +17,7 @@ void rtl92e_cam_reset(struct net_device *dev) { u32 ulcommand = 0; - ulcommand |= BIT31|BIT30; + ulcommand |= BIT31 | BIT30; rtl92e_writel(dev, RWCAM, ulcommand); } @@ -40,7 +40,6 @@ void rtl92e_enable_hw_security_config(struct net_device *dev) SECR_value |= SCR_TxUseDK; } - ieee->hwsec_active = 1; if ((ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) { ieee->hwsec_active = 0; @@ -98,33 +97,33 @@ void rtl92e_set_key(struct net_device *dev, u8 EntryNo, u8 KeyIndex, } if (DefaultKey) - usConfig |= BIT15 | (KeyType<<2); + usConfig |= BIT15 | (KeyType << 2); else - usConfig |= BIT15 | (KeyType<<2) | KeyIndex; + usConfig |= BIT15 | (KeyType << 2) | KeyIndex; for (i = 0; i < CAM_CONTENT_COUNT; i++) { TargetCommand = i + CAM_CONTENT_COUNT * EntryNo; - TargetCommand |= BIT31|BIT16; + TargetCommand |= BIT31 | BIT16; if (i == 0) { - TargetContent = (u32)(*(MacAddr+0)) << 16 | - (u32)(*(MacAddr+1)) << 24 | + TargetContent = (u32)(*(MacAddr + 0)) << 16 | + (u32)(*(MacAddr + 1)) << 24 | (u32)usConfig; rtl92e_writel(dev, WCAMI, TargetContent); rtl92e_writel(dev, RWCAM, TargetCommand); } else if (i == 1) { - TargetContent = (u32)(*(MacAddr+2)) | - (u32)(*(MacAddr+3)) << 8 | - (u32)(*(MacAddr+4)) << 16 | - (u32)(*(MacAddr+5)) << 24; + TargetContent = (u32)(*(MacAddr + 2)) | + (u32)(*(MacAddr + 3)) << 8 | + (u32)(*(MacAddr + 4)) << 16 | + (u32)(*(MacAddr + 5)) << 24; rtl92e_writel(dev, WCAMI, TargetContent); rtl92e_writel(dev, RWCAM, TargetCommand); } else { if (KeyContent != NULL) { rtl92e_writel(dev, WCAMI, - (u32)(*(KeyContent+i-2))); + (u32)(*(KeyContent + i - 2))); rtl92e_writel(dev, RWCAM, TargetCommand); udelay(100); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c b/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c index 59532ed2156d..db57c655c695 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c @@ -79,6 +79,6 @@ u32 rtl92e_eeprom_read(struct net_device *dev, u32 addr) ret = _rtl92e_eeprom_xfer(dev, (addr & 0x3F) | (0x6 << 6), 9); rtl92e_writeb(dev, EPROM_CMD, - (EPROM_CMD_NORMAL< Date: Thu, 20 Oct 2022 14:16:09 +0100 Subject: [PATCH 0264/4122] staging: octeon: remove redundant variable total_freed The variable total_freed is accumulating skb_to_free however it is not being used after this. The use of total_freed is redundant and hence the variable can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221020131609.1546667-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-tx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index a36e36701c74..bbf33b88bb7c 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -73,7 +73,6 @@ static void cvm_oct_free_tx_skbs(struct net_device *dev) { int skb_to_free; int qos, queues_per_port; - int total_freed = 0; int total_remaining = 0; unsigned long flags; struct octeon_ethernet *priv = netdev_priv(dev); @@ -87,7 +86,6 @@ static void cvm_oct_free_tx_skbs(struct net_device *dev) MAX_SKB_TO_FREE); skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau + qos * 4); - total_freed += skb_to_free; if (skb_to_free > 0) { struct sk_buff *to_free_list = NULL; From 45e6319bd5f2154d8b8c9f1eaa4ac030ba0d330c Mon Sep 17 00:00:00 2001 From: Zhiqi Song Date: Sat, 24 Sep 2022 15:38:31 +0800 Subject: [PATCH 0265/4122] crypto: hisilicon/hpre - fix resource leak in remove process In hpre_remove(), when the disable operation of qm sriov failed, the following logic should continue to be executed to release the remaining resources that have been allocated, instead of returning directly, otherwise there will be resource leakage. Signed-off-by: Zhiqi Song Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 471e5ca720f5..baf1faec7046 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -1437,18 +1437,12 @@ err_with_qm_init: static void hpre_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); - int ret; hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &hpre_devices); hisi_qm_alg_unregister(qm, &hpre_devices); - if (qm->fun_type == QM_HW_PF && qm->vfs_num) { - ret = hisi_qm_sriov_disable(pdev, true); - if (ret) { - pci_err(pdev, "Disable SRIOV fail!\n"); - return; - } - } + if (qm->fun_type == QM_HW_PF && qm->vfs_num) + hisi_qm_sriov_disable(pdev, true); hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); From 7001141d34e550854425afa76e960513cf150a62 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 24 Sep 2022 17:34:24 +0800 Subject: [PATCH 0266/4122] crypto: hisilicon/qm - drop unnecessary IS_ENABLE(CONFIG_NUMA) check dev_to_node() can handle the case when CONFIG_NUMA is not set, so the check of CONFIG_NUMA is redundant and can be removed. Signed-off-by: Yicong Yang Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 8b387de69d22..9a38e170fb1d 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4277,16 +4277,14 @@ static int hisi_qm_sort_devices(int node, struct list_head *head, struct hisi_qm *qm; struct list_head *n; struct device *dev; - int dev_node = 0; + int dev_node; list_for_each_entry(qm, &qm_list->list, list) { dev = &qm->pdev->dev; - if (IS_ENABLED(CONFIG_NUMA)) { - dev_node = dev_to_node(dev); - if (dev_node < 0) - dev_node = 0; - } + dev_node = dev_to_node(dev); + if (dev_node < 0) + dev_node = 0; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) From f57e292897cac13b6ddee078aea21173b234ecb7 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 24 Sep 2022 18:14:42 +0800 Subject: [PATCH 0267/4122] crypto: hisilicon/qm - fix incorrect parameters usage In qm_get_xqc_depth(), parameters low_bits and high_bits save the values of the corresponding bits. However, the values saved by the two parameters are opposite. As a result, the values returned to the callers are incorrect. Fixes: 129a9f340172 ("crypto: hisilicon/qm - get qp num and depth from hardware registers") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 9a38e170fb1d..01c083e2c4bd 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -909,8 +909,8 @@ static void qm_get_xqc_depth(struct hisi_qm *qm, u16 *low_bits, u32 depth; depth = hisi_qm_get_hw_info(qm, qm_basic_info, type, qm->cap_ver); - *high_bits = depth & QM_XQ_DEPTH_MASK; - *low_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK; + *low_bits = depth & QM_XQ_DEPTH_MASK; + *high_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK; } static u32 qm_get_irq_num(struct hisi_qm *qm) From 94adb03fd58bbe355e3d7a9d0f701889313e4a51 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 24 Sep 2022 18:34:45 +0800 Subject: [PATCH 0268/4122] crypto: hisilicon/sec - enabling clock gating of the address prefetch module Change the value of clock gating register to 0x7fff to enable clock gating of the address prefetch module. When the device is idle, the clock is turned off to save power. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 3705412bac5f..6eb8a16ba0a7 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -55,7 +55,7 @@ #define SEC_CONTROL_REG 0x301200 #define SEC_DYNAMIC_GATE_REG 0x30121c #define SEC_CORE_AUTO_GATE 0x30212c -#define SEC_DYNAMIC_GATE_EN 0x7bff +#define SEC_DYNAMIC_GATE_EN 0x7fff #define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0) #define SEC_CLK_GATE_ENABLE BIT(3) #define SEC_CLK_GATE_DISABLE (~BIT(3)) From ee1537fe3dd89860d0336563891f6cac707d0cb5 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 24 Sep 2022 19:04:31 +0800 Subject: [PATCH 0269/4122] crypto: hisilicon/qm - re-enable communicate interrupt before notifying PF After the device is reset, the VF needs to re-enable communication interrupt before the VF sends restart complete message to the PF. If the interrupt is re-enabled after the VF notifies the PF, the PF may fail to send messages to the VF after receiving VF's restart complete message. Fixes: 760fe22cf5e9 ("crypto: hisilicon/qm - update reset flow") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 01c083e2c4bd..e3edb176d976 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -5723,6 +5723,7 @@ static void qm_pf_reset_vf_done(struct hisi_qm *qm) cmd = QM_VF_START_FAIL; } + qm_cmd_init(qm); ret = qm_ping_pf(qm, cmd); if (ret) dev_warn(&pdev->dev, "PF responds timeout in reset done!\n"); @@ -5784,7 +5785,6 @@ static void qm_pf_reset_vf_process(struct hisi_qm *qm, goto err_get_status; qm_pf_reset_vf_done(qm); - qm_cmd_init(qm); dev_info(dev, "device reset done.\n"); From ad981647dbe1ea91071b9783dd62d74e22c6d955 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 26 Sep 2022 17:14:21 +0800 Subject: [PATCH 0270/4122] crypto: ccm - use local variables instead of indirect references The variable odata has been introduced into the function scope as a variable and should be used directly. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/ccm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 6b815ece51c6..30dbae72728f 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -218,7 +218,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, cryptlen += ilen; } - ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen); + ahash_request_set_crypt(ahreq, plain, odata, cryptlen); err = crypto_ahash_finup(ahreq); out: return err; From f30fe6314698d107edbb9db50bc3c3443a30ec80 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 26 Sep 2022 17:14:40 +0800 Subject: [PATCH 0271/4122] crypto: scatterwalk - remove duplicate function declarations scatterwalk_map() is an inline function already defined in the header file, it is necessary to delete the re-declaration at the same location, which was left out in the header file by an earlier modification. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- include/crypto/scatterwalk.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index ccdb05f68a75..f2c42b4111b1 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -93,7 +93,6 @@ static inline void scatterwalk_done(struct scatter_walk *walk, int out, void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); -void *scatterwalk_map(struct scatter_walk *walk); void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, unsigned int start, unsigned int nbytes, int out); From 237f9eceb2f3c888c1a26a4209243607d3cc0c7c Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Mon, 26 Sep 2022 17:27:11 +0800 Subject: [PATCH 0272/4122] crypto: ccp - Add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs Signed-off-by: ruanjinjie Acked-by: John Allen Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 5976530c00a8..332181027305 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -400,7 +400,7 @@ static void ccp_unregister_algs(void) } } -static int ccp_crypto_init(void) +static int __init ccp_crypto_init(void) { int ret; @@ -421,7 +421,7 @@ static int ccp_crypto_init(void) return ret; } -static void ccp_crypto_exit(void) +static void __exit ccp_crypto_exit(void) { ccp_unregister_algs(); } From 224f3a050e495a7c3c1bcee2c613d0996bc661dc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Sep 2022 16:45:45 -0500 Subject: [PATCH 0273/4122] crypto: talitos - Replace zero-length arrays with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length arrays declarations in anonymous union with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for flexible-array members in unions. Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/216 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Herbert Xu --- drivers/crypto/talitos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 32825119e880..1a93ee355929 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -65,8 +65,8 @@ struct talitos_edesc { dma_addr_t dma_link_tbl; struct talitos_desc desc; union { - struct talitos_ptr link_tbl[0]; - u8 buf[0]; + DECLARE_FLEX_ARRAY(struct talitos_ptr, link_tbl); + DECLARE_FLEX_ARRAY(u8, buf); }; }; From 22044d9b04b593831d8e16ba7aafabf4e75964f5 Mon Sep 17 00:00:00 2001 From: Peter Harliman Liem Date: Tue, 27 Sep 2022 11:10:08 +0800 Subject: [PATCH 0274/4122] crypto: inside-secure - Expand soc data structure Currently platform data is assigned directly to version string(instead of struct). To make it more scalable, we move it to use data struct instead. This allows customization for individual platforms other than version string. Signed-off-by: Peter Harliman Liem Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 44 +++++++++++++++++-------- drivers/crypto/inside-secure/safexcel.h | 6 +++- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index ad0d8c4a71ac..8f4872470529 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -410,10 +410,10 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv) int i, j, ret = 0, pe; int ipuesz, ifppsz, minifw = 0; - if (priv->version == EIP197D_MRVL) + if (priv->data->version == EIP197D_MRVL) dir = "eip197d"; - else if (priv->version == EIP197B_MRVL || - priv->version == EIP197_DEVBRD) + else if (priv->data->version == EIP197B_MRVL || + priv->data->version == EIP197_DEVBRD) dir = "eip197b"; else return -ENODEV; @@ -423,7 +423,7 @@ retry_fw: snprintf(fw_path, 37, "inside-secure/%s/%s", dir, fw_name[i]); ret = firmware_request_nowarn(&fw[i], fw_path, priv->dev); if (ret) { - if (minifw || priv->version != EIP197B_MRVL) + if (minifw || priv->data->version != EIP197B_MRVL) goto release_fw; /* Fallback to the old firmware location for the @@ -1597,7 +1597,7 @@ static int safexcel_probe_generic(void *pdev, safexcel_configure(priv); - if (IS_ENABLED(CONFIG_PCI) && priv->version == EIP197_DEVBRD) { + if (IS_ENABLED(CONFIG_PCI) && priv->data->version == EIP197_DEVBRD) { /* * Request MSI vectors for global + 1 per ring - * or just 1 for older dev images @@ -1731,7 +1731,7 @@ static int safexcel_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; - priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev); + priv->data = (struct safexcel_priv_data *)of_device_get_match_data(dev); platform_set_drvdata(pdev, priv); @@ -1806,27 +1806,43 @@ static int safexcel_remove(struct platform_device *pdev) return 0; } +static const struct safexcel_priv_data eip97ies_mrvl_data = { + .version = EIP97IES_MRVL, +}; + +static const struct safexcel_priv_data eip197b_mrvl_data = { + .version = EIP197B_MRVL, +}; + +static const struct safexcel_priv_data eip197d_mrvl_data = { + .version = EIP197D_MRVL, +}; + +static const struct safexcel_priv_data eip197_devbrd_data = { + .version = EIP197_DEVBRD, +}; + static const struct of_device_id safexcel_of_match_table[] = { { .compatible = "inside-secure,safexcel-eip97ies", - .data = (void *)EIP97IES_MRVL, + .data = &eip97ies_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197b", - .data = (void *)EIP197B_MRVL, + .data = &eip197b_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197d", - .data = (void *)EIP197D_MRVL, + .data = &eip197d_mrvl_data, }, /* For backward compatibility and intended for generic use */ { .compatible = "inside-secure,safexcel-eip97", - .data = (void *)EIP97IES_MRVL, + .data = &eip97ies_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197", - .data = (void *)EIP197B_MRVL, + .data = &eip197b_mrvl_data, }, {}, }; @@ -1862,7 +1878,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev, return -ENOMEM; priv->dev = dev; - priv->version = (enum safexcel_eip_version)ent->driver_data; + priv->data = (struct safexcel_priv_data *)ent->driver_data; pci_set_drvdata(pdev, priv); @@ -1881,7 +1897,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev, } priv->base = pcim_iomap_table(pdev)[0]; - if (priv->version == EIP197_DEVBRD) { + if (priv->data->version == EIP197_DEVBRD) { dev_dbg(dev, "Device identified as FPGA based development board - applying HW reset\n"); rc = pcim_iomap_regions(pdev, 4, "crypto_safexcel"); @@ -1949,7 +1965,7 @@ static const struct pci_device_id safexcel_pci_ids[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_XILINX, 0x9038, 0x16ae, 0xc522), - .driver_data = EIP197_DEVBRD, + .driver_data = (kernel_ulong_t)&eip197_devbrd_data, }, {}, }; diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index 797ff91512e0..e8da8b30a392 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -733,6 +733,10 @@ enum safexcel_eip_version { EIP197_DEVBRD }; +struct safexcel_priv_data { + enum safexcel_eip_version version; +}; + /* Priority we use for advertising our algorithms */ #define SAFEXCEL_CRA_PRIORITY 300 @@ -815,7 +819,7 @@ struct safexcel_crypto_priv { struct clk *reg_clk; struct safexcel_config config; - enum safexcel_eip_version version; + struct safexcel_priv_data *data; struct safexcel_register_offsets offsets; struct safexcel_hwconfig hwconfig; u32 flags; From 594ed3d245d3e2d0760f30724e02ecf1604b2c01 Mon Sep 17 00:00:00 2001 From: Peter Harliman Liem Date: Tue, 27 Sep 2022 11:10:09 +0800 Subject: [PATCH 0275/4122] crypto: inside-secure - Add fw_little_endian option This is to add fw_little_endian option, which can be used for platform which firmware is using little-endian (instead of big-endian). Signed-off-by: Peter Harliman Liem Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 14 ++++++++++---- drivers/crypto/inside-secure/safexcel.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 8f4872470529..4d6d64ff9a0f 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -316,14 +316,20 @@ static void eip197_init_firmware(struct safexcel_crypto_priv *priv) static int eip197_write_firmware(struct safexcel_crypto_priv *priv, const struct firmware *fw) { - const __be32 *data = (const __be32 *)fw->data; + u32 val; int i; /* Write the firmware */ - for (i = 0; i < fw->size / sizeof(u32); i++) - writel(be32_to_cpu(data[i]), + for (i = 0; i < fw->size / sizeof(u32); i++) { + if (priv->data->fw_little_endian) + val = le32_to_cpu(((const __le32 *)fw->data)[i]); + else + val = be32_to_cpu(((const __be32 *)fw->data)[i]); + + writel(val, priv->base + EIP197_CLASSIFICATION_RAMS + - i * sizeof(__be32)); + i * sizeof(val)); + } /* Exclude final 2 NOPs from size */ return i - EIP197_FW_TERMINAL_NOPS; diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index e8da8b30a392..f049293870b4 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -735,6 +735,7 @@ enum safexcel_eip_version { struct safexcel_priv_data { enum safexcel_eip_version version; + bool fw_little_endian; }; /* Priority we use for advertising our algorithms */ From 36dd88b1c09c78f993cb11dcc5f4211d78a10e5f Mon Sep 17 00:00:00 2001 From: Peter Harliman Liem Date: Tue, 27 Sep 2022 11:10:10 +0800 Subject: [PATCH 0276/4122] crypto: inside-secure - Add MaxLinear platform This is to add MaxLinear platform into compatible id. Firmware endianness option is added since MaxLinear firmware is in little endian format. Signed-off-by: Peter Harliman Liem Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 11 +++++++++++ drivers/crypto/inside-secure/safexcel.h | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 4d6d64ff9a0f..ae6110376e21 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -421,6 +421,8 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv) else if (priv->data->version == EIP197B_MRVL || priv->data->version == EIP197_DEVBRD) dir = "eip197b"; + else if (priv->data->version == EIP197C_MXL) + dir = "eip197c"; else return -ENODEV; @@ -1828,6 +1830,11 @@ static const struct safexcel_priv_data eip197_devbrd_data = { .version = EIP197_DEVBRD, }; +static const struct safexcel_priv_data eip197c_mxl_data = { + .version = EIP197C_MXL, + .fw_little_endian = true, +}; + static const struct of_device_id safexcel_of_match_table[] = { { .compatible = "inside-secure,safexcel-eip97ies", @@ -1841,6 +1848,10 @@ static const struct of_device_id safexcel_of_match_table[] = { .compatible = "inside-secure,safexcel-eip197d", .data = &eip197d_mrvl_data, }, + { + .compatible = "inside-secure,safexcel-eip197c-mxl", + .data = &eip197c_mxl_data, + }, /* For backward compatibility and intended for generic use */ { .compatible = "inside-secure,safexcel-eip97", diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index f049293870b4..6c2fc662f64f 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -730,7 +730,8 @@ enum safexcel_eip_version { EIP97IES_MRVL, EIP197B_MRVL, EIP197D_MRVL, - EIP197_DEVBRD + EIP197_DEVBRD, + EIP197C_MXL, }; struct safexcel_priv_data { From 839b8ae2fc10f205317bcc32c9de18456756e1f5 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 08:55:55 +0000 Subject: [PATCH 0277/4122] crypto: sun8i-ss - use dma_addr instead u32 The DMA address need to be stored in a dma_addr_t Fixes: 359e893e8af4 ("crypto: sun8i-ss - rework handling of IV") Reported-by: Dan Carpenter Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 910d6751644c..902f6be057ec 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -124,7 +124,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; int i = 0; - u32 a; + dma_addr_t a; int err; rctx->ivlen = ivsize; From 375de984a3cb691a0bbaf9756e8595e0b54e27e0 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 27 Sep 2022 13:39:55 +0000 Subject: [PATCH 0278/4122] crypto: ccp - Remove unused struct ccp_crypto_cpu After commit bc3854476f36("crypto: ccp - Use a single queue for proper ordering of tfm requests"), no one use struct ccp_crypto_cpu, so remove it. Signed-off-by: Yuan Can Acked-by: John Allen Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 332181027305..dd86d2650bea 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -78,13 +78,6 @@ struct ccp_crypto_cmd { int ret; }; -struct ccp_crypto_cpu { - struct work_struct work; - struct completion completion; - struct ccp_crypto_cmd *crypto_cmd; - int err; -}; - static inline bool ccp_crypto_success(int err) { if (err && (err != -EINPROGRESS) && (err != -EBUSY)) From 094528b6a5a755b1195a01e10b13597d67d1a0e6 Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Wed, 28 Sep 2022 13:25:05 +0300 Subject: [PATCH 0279/4122] crypto: nitrox - avoid double free on error path in nitrox_sriov_init() If alloc_workqueue() fails in nitrox_mbox_init() it deallocates ndev->iov.vfdev and returns error code, but then nitrox_sriov_init() calls nitrox_sriov_cleanup() where ndev->iov.vfdev is deallocated again. Fix this by nulling ndev->iov.vfdev after the first deallocation. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 9e5de3e06e54 ("crypto: cavium/nitrox - Add mailbox...") Signed-off-by: Natalia Petrova Signed-off-by: Alexey Khoroshilov Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_mbx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c index 9e7308e39b30..d4e06999af9b 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c @@ -195,6 +195,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev) ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0); if (!ndev->iov.pf2vf_wq) { kfree(ndev->iov.vfdev); + ndev->iov.vfdev = NULL; return -ENOMEM; } /* enable pf2vf mailbox interrupts */ From 10da230a4df1dfe32a58eb09246f5ffe82346f27 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 28 Sep 2022 13:45:05 -0500 Subject: [PATCH 0280/4122] crypto: ccp - Add support for TEE for PCI ID 0x14CA SoCs containing 0x14CA are present both in datacenter parts that support SEV as well as client parts that support TEE. Cc: stable@vger.kernel.org # 5.15+ Tested-by: Rijo-john Thomas Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-pci.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 792d6da7f0c0..084d052fddcc 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -381,6 +381,15 @@ static const struct psp_vdata pspv3 = { .inten_reg = 0x10690, .intsts_reg = 0x10694, }; + +static const struct psp_vdata pspv4 = { + .sev = &sevv2, + .tee = &teev1, + .feature_reg = 0x109fc, + .inten_reg = 0x10690, + .intsts_reg = 0x10694, +}; + #endif static const struct sp_dev_vdata dev_vdata[] = { @@ -426,7 +435,7 @@ static const struct sp_dev_vdata dev_vdata[] = { { /* 5 */ .bar = 2, #ifdef CONFIG_CRYPTO_DEV_SP_PSP - .psp_vdata = &pspv2, + .psp_vdata = &pspv4, #endif }, { /* 6 */ From be7f5ef9ff4bbe99e4fcdf63057a993be178af46 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Sep 2022 23:24:43 +0100 Subject: [PATCH 0281/4122] crypto: stm32 - Fix spelling mistake "wite" -> "write" There are a couple of spelling mistakes in dev_err messages. Fix them. Signed-off-by: Colin Ian King Acked-by: nicolas.toromanoff@foss.st.com Signed-off-by: Herbert Xu --- drivers/crypto/stm32/stm32-cryp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 59ef541123ae..59638dfce573 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -1400,7 +1400,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* wait end of process */ err = stm32_cryp_wait_output(cryp); if (err) { - dev_err(cryp->dev, "Timeout (wite ccm padded data)\n"); + dev_err(cryp->dev, "Timeout (write ccm padded data)\n"); return stm32_cryp_finish_req(cryp, err); } @@ -1440,7 +1440,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* h) wait for completion */ err = stm32_cryp_wait_busy(cryp); if (err) - dev_err(cryp->dev, "Timeout (wite ccm padded data)\n"); + dev_err(cryp->dev, "Timeout (write ccm padded data)\n"); /* i) run the he normal Final phase */ stm32_cryp_finish_req(cryp, err); From 518a198f41d6539dc025f0e4fe2785f9031fa1eb Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 29 Sep 2022 16:31:10 +0300 Subject: [PATCH 0282/4122] dt-bindings: rng: nuvoton,npcm-rng: Add npcm845 compatible string Add a compatible string for Nuvoton BMC NPCM845 RNG. Signed-off-by: Tomer Maimon Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml index abd134c9d400..e8e4ab1e5b95 100644 --- a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml +++ b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml @@ -16,7 +16,9 @@ maintainers: properties: compatible: - const: nuvoton,npcm750-rng + enum: + - nuvoton,npcm750-rng + - nuvoton,npcm845-rng reg: maxItems: 1 From f07b3e87fe62984db66fd4179ae7e960e4fc43e8 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 29 Sep 2022 16:31:11 +0300 Subject: [PATCH 0283/4122] hwrng: npcm - Add NPCM8XX support Adding RNG NPCM8XX support to NPCM RNG driver. RNG NPCM8XX uses a different clock prescaler. As part of adding NPCM8XX support: - Add NPCM8XX specific compatible string. - Add data to handle architecture specific clock prescaler. Signed-off-by: Tomer Maimon Signed-off-by: Herbert Xu --- drivers/char/hw_random/npcm-rng.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c index 1ec5f267a656..5bf7f370f985 100644 --- a/drivers/char/hw_random/npcm-rng.c +++ b/drivers/char/hw_random/npcm-rng.c @@ -13,11 +13,13 @@ #include #include #include +#include #define NPCM_RNGCS_REG 0x00 /* Control and status register */ #define NPCM_RNGD_REG 0x04 /* Data register */ #define NPCM_RNGMODE_REG 0x08 /* Mode register */ +#define NPCM_RNG_CLK_SET_62_5MHZ BIT(2) /* 60-80 MHz */ #define NPCM_RNG_CLK_SET_25MHZ GENMASK(4, 3) /* 20-25 MHz */ #define NPCM_RNG_DATA_VALID BIT(1) #define NPCM_RNG_ENABLE BIT(0) @@ -31,14 +33,14 @@ struct npcm_rng { void __iomem *base; struct hwrng rng; + u32 clkp; }; static int npcm_rng_init(struct hwrng *rng) { struct npcm_rng *priv = to_npcm_rng(rng); - writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE, - priv->base + NPCM_RNGCS_REG); + writel(priv->clkp | NPCM_RNG_ENABLE, priv->base + NPCM_RNGCS_REG); return 0; } @@ -47,7 +49,7 @@ static void npcm_rng_cleanup(struct hwrng *rng) { struct npcm_rng *priv = to_npcm_rng(rng); - writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG); + writel(priv->clkp, priv->base + NPCM_RNGCS_REG); } static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -110,6 +112,7 @@ static int npcm_rng_probe(struct platform_device *pdev) priv->rng.read = npcm_rng_read; priv->rng.priv = (unsigned long)&pdev->dev; priv->rng.quality = 1000; + priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev); writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG); @@ -162,7 +165,10 @@ static const struct dev_pm_ops npcm_rng_pm_ops = { }; static const struct of_device_id rng_dt_id[] __maybe_unused = { - { .compatible = "nuvoton,npcm750-rng", }, + { .compatible = "nuvoton,npcm750-rng", + .data = (void *)NPCM_RNG_CLK_SET_25MHZ }, + { .compatible = "nuvoton,npcm845-rng", + .data = (void *)NPCM_RNG_CLK_SET_62_5MHZ }, {}, }; MODULE_DEVICE_TABLE(of, rng_dt_id); From 46beeade05c6a7673873ba0a7b6396cd3a3b3473 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Sep 2022 14:09:34 +0800 Subject: [PATCH 0284/4122] crypto: ixp4xx - Fix sparse warnings This fixes a number of trivial sparse warnings in ixp4xx. Signed-off-by: Herbert Xu Acked-by: Corentin Labbe Tested-by: Corentin Labbe Acked-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index d39a386b31ac..984b3cc0237c 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -420,7 +420,7 @@ static void one_packet(dma_addr_t phys) break; case CTL_FLAG_GEN_REVAES: ctx = crypto_tfm_ctx(crypt->data.tfm); - *(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); + *(__be32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); if (atomic_dec_and_test(&ctx->configuring)) complete(&ctx->completion); break; @@ -720,7 +720,7 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, crypt->init_len = init_len; crypt->ctl_flags |= CTL_FLAG_GEN_ICV; - buf->next = 0; + buf->next = NULL; buf->buf_len = HMAC_PAD_BLOCKLEN; buf->pkt_len = 0; buf->phys_addr = pad_phys; @@ -751,7 +751,7 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize #ifndef __ARMEB__ cfgword ^= 0xAA000000; /* change the "byte swap" flags */ #endif - *(u32 *)cinfo = cpu_to_be32(cfgword); + *(__be32 *)cinfo = cpu_to_be32(cfgword); cinfo += sizeof(cfgword); /* write ICV to cryptinfo */ @@ -788,7 +788,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) if (!crypt) return -EAGAIN; - *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); + *(__be32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); crypt->data.tfm = tfm; crypt->crypt_offs = 0; @@ -846,7 +846,7 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key, return err; } /* write cfg word to cryptinfo */ - *(u32 *)cinfo = cpu_to_be32(cipher_cfg); + *(__be32 *)cinfo = cpu_to_be32(cipher_cfg); cinfo += sizeof(cipher_cfg); /* write cipher key to cryptinfo */ From 65c92cbb3f2365627a10cf97560d51e88fb4e588 Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Fri, 30 Sep 2022 16:40:14 -0500 Subject: [PATCH 0285/4122] crypto: tcrypt - fix return value for multiple subtests When a test mode invokes multiple tests (e.g., mode 0 invokes modes 1 through 199, and mode 3 tests three block cipher modes with des), don't keep accumulating the return values with ret += tcrypt_test(), which results in a bogus value if more than one report a nonzero value (e.g., two reporting -2 (-ENOENT) end up reporting -4 (-EINTR)). Instead, keep track of the minimum return value reported by any subtest. Fixes: 4e033a6bc70f ("crypto: tcrypt - Do not exit on success in fips mode") Signed-off-by: Robert Elliott Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 256 ++++++++++++++++++++++++------------------------ 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a82679b576bb..3f7dc94a63e0 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1471,387 +1471,387 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) } for (i = 1; i < 200; i++) - ret += do_test(NULL, 0, 0, i, num_mb); + ret = min(ret, do_test(NULL, 0, 0, i, num_mb)); break; case 1: - ret += tcrypt_test("md5"); + ret = min(ret, tcrypt_test("md5")); break; case 2: - ret += tcrypt_test("sha1"); + ret = min(ret, tcrypt_test("sha1")); break; case 3: - ret += tcrypt_test("ecb(des)"); - ret += tcrypt_test("cbc(des)"); - ret += tcrypt_test("ctr(des)"); + ret = min(ret, tcrypt_test("ecb(des)")); + ret = min(ret, tcrypt_test("cbc(des)")); + ret = min(ret, tcrypt_test("ctr(des)")); break; case 4: - ret += tcrypt_test("ecb(des3_ede)"); - ret += tcrypt_test("cbc(des3_ede)"); - ret += tcrypt_test("ctr(des3_ede)"); + ret = min(ret, tcrypt_test("ecb(des3_ede)")); + ret = min(ret, tcrypt_test("cbc(des3_ede)")); + ret = min(ret, tcrypt_test("ctr(des3_ede)")); break; case 5: - ret += tcrypt_test("md4"); + ret = min(ret, tcrypt_test("md4")); break; case 6: - ret += tcrypt_test("sha256"); + ret = min(ret, tcrypt_test("sha256")); break; case 7: - ret += tcrypt_test("ecb(blowfish)"); - ret += tcrypt_test("cbc(blowfish)"); - ret += tcrypt_test("ctr(blowfish)"); + ret = min(ret, tcrypt_test("ecb(blowfish)")); + ret = min(ret, tcrypt_test("cbc(blowfish)")); + ret = min(ret, tcrypt_test("ctr(blowfish)")); break; case 8: - ret += tcrypt_test("ecb(twofish)"); - ret += tcrypt_test("cbc(twofish)"); - ret += tcrypt_test("ctr(twofish)"); - ret += tcrypt_test("lrw(twofish)"); - ret += tcrypt_test("xts(twofish)"); + ret = min(ret, tcrypt_test("ecb(twofish)")); + ret = min(ret, tcrypt_test("cbc(twofish)")); + ret = min(ret, tcrypt_test("ctr(twofish)")); + ret = min(ret, tcrypt_test("lrw(twofish)")); + ret = min(ret, tcrypt_test("xts(twofish)")); break; case 9: - ret += tcrypt_test("ecb(serpent)"); - ret += tcrypt_test("cbc(serpent)"); - ret += tcrypt_test("ctr(serpent)"); - ret += tcrypt_test("lrw(serpent)"); - ret += tcrypt_test("xts(serpent)"); + ret = min(ret, tcrypt_test("ecb(serpent)")); + ret = min(ret, tcrypt_test("cbc(serpent)")); + ret = min(ret, tcrypt_test("ctr(serpent)")); + ret = min(ret, tcrypt_test("lrw(serpent)")); + ret = min(ret, tcrypt_test("xts(serpent)")); break; case 10: - ret += tcrypt_test("ecb(aes)"); - ret += tcrypt_test("cbc(aes)"); - ret += tcrypt_test("lrw(aes)"); - ret += tcrypt_test("xts(aes)"); - ret += tcrypt_test("ctr(aes)"); - ret += tcrypt_test("rfc3686(ctr(aes))"); - ret += tcrypt_test("ofb(aes)"); - ret += tcrypt_test("cfb(aes)"); - ret += tcrypt_test("xctr(aes)"); + ret = min(ret, tcrypt_test("ecb(aes)")); + ret = min(ret, tcrypt_test("cbc(aes)")); + ret = min(ret, tcrypt_test("lrw(aes)")); + ret = min(ret, tcrypt_test("xts(aes)")); + ret = min(ret, tcrypt_test("ctr(aes)")); + ret = min(ret, tcrypt_test("rfc3686(ctr(aes))")); + ret = min(ret, tcrypt_test("ofb(aes)")); + ret = min(ret, tcrypt_test("cfb(aes)")); + ret = min(ret, tcrypt_test("xctr(aes)")); break; case 11: - ret += tcrypt_test("sha384"); + ret = min(ret, tcrypt_test("sha384")); break; case 12: - ret += tcrypt_test("sha512"); + ret = min(ret, tcrypt_test("sha512")); break; case 13: - ret += tcrypt_test("deflate"); + ret = min(ret, tcrypt_test("deflate")); break; case 14: - ret += tcrypt_test("ecb(cast5)"); - ret += tcrypt_test("cbc(cast5)"); - ret += tcrypt_test("ctr(cast5)"); + ret = min(ret, tcrypt_test("ecb(cast5)")); + ret = min(ret, tcrypt_test("cbc(cast5)")); + ret = min(ret, tcrypt_test("ctr(cast5)")); break; case 15: - ret += tcrypt_test("ecb(cast6)"); - ret += tcrypt_test("cbc(cast6)"); - ret += tcrypt_test("ctr(cast6)"); - ret += tcrypt_test("lrw(cast6)"); - ret += tcrypt_test("xts(cast6)"); + ret = min(ret, tcrypt_test("ecb(cast6)")); + ret = min(ret, tcrypt_test("cbc(cast6)")); + ret = min(ret, tcrypt_test("ctr(cast6)")); + ret = min(ret, tcrypt_test("lrw(cast6)")); + ret = min(ret, tcrypt_test("xts(cast6)")); break; case 16: - ret += tcrypt_test("ecb(arc4)"); + ret = min(ret, tcrypt_test("ecb(arc4)")); break; case 17: - ret += tcrypt_test("michael_mic"); + ret = min(ret, tcrypt_test("michael_mic")); break; case 18: - ret += tcrypt_test("crc32c"); + ret = min(ret, tcrypt_test("crc32c")); break; case 19: - ret += tcrypt_test("ecb(tea)"); + ret = min(ret, tcrypt_test("ecb(tea)")); break; case 20: - ret += tcrypt_test("ecb(xtea)"); + ret = min(ret, tcrypt_test("ecb(xtea)")); break; case 21: - ret += tcrypt_test("ecb(khazad)"); + ret = min(ret, tcrypt_test("ecb(khazad)")); break; case 22: - ret += tcrypt_test("wp512"); + ret = min(ret, tcrypt_test("wp512")); break; case 23: - ret += tcrypt_test("wp384"); + ret = min(ret, tcrypt_test("wp384")); break; case 24: - ret += tcrypt_test("wp256"); + ret = min(ret, tcrypt_test("wp256")); break; case 26: - ret += tcrypt_test("ecb(anubis)"); - ret += tcrypt_test("cbc(anubis)"); + ret = min(ret, tcrypt_test("ecb(anubis)")); + ret = min(ret, tcrypt_test("cbc(anubis)")); break; case 30: - ret += tcrypt_test("ecb(xeta)"); + ret = min(ret, tcrypt_test("ecb(xeta)")); break; case 31: - ret += tcrypt_test("pcbc(fcrypt)"); + ret = min(ret, tcrypt_test("pcbc(fcrypt)")); break; case 32: - ret += tcrypt_test("ecb(camellia)"); - ret += tcrypt_test("cbc(camellia)"); - ret += tcrypt_test("ctr(camellia)"); - ret += tcrypt_test("lrw(camellia)"); - ret += tcrypt_test("xts(camellia)"); + ret = min(ret, tcrypt_test("ecb(camellia)")); + ret = min(ret, tcrypt_test("cbc(camellia)")); + ret = min(ret, tcrypt_test("ctr(camellia)")); + ret = min(ret, tcrypt_test("lrw(camellia)")); + ret = min(ret, tcrypt_test("xts(camellia)")); break; case 33: - ret += tcrypt_test("sha224"); + ret = min(ret, tcrypt_test("sha224")); break; case 35: - ret += tcrypt_test("gcm(aes)"); + ret = min(ret, tcrypt_test("gcm(aes)")); break; case 36: - ret += tcrypt_test("lzo"); + ret = min(ret, tcrypt_test("lzo")); break; case 37: - ret += tcrypt_test("ccm(aes)"); + ret = min(ret, tcrypt_test("ccm(aes)")); break; case 38: - ret += tcrypt_test("cts(cbc(aes))"); + ret = min(ret, tcrypt_test("cts(cbc(aes))")); break; case 39: - ret += tcrypt_test("xxhash64"); + ret = min(ret, tcrypt_test("xxhash64")); break; case 40: - ret += tcrypt_test("rmd160"); + ret = min(ret, tcrypt_test("rmd160")); break; case 42: - ret += tcrypt_test("blake2b-512"); + ret = min(ret, tcrypt_test("blake2b-512")); break; case 43: - ret += tcrypt_test("ecb(seed)"); + ret = min(ret, tcrypt_test("ecb(seed)")); break; case 45: - ret += tcrypt_test("rfc4309(ccm(aes))"); + ret = min(ret, tcrypt_test("rfc4309(ccm(aes))")); break; case 46: - ret += tcrypt_test("ghash"); + ret = min(ret, tcrypt_test("ghash")); break; case 47: - ret += tcrypt_test("crct10dif"); + ret = min(ret, tcrypt_test("crct10dif")); break; case 48: - ret += tcrypt_test("sha3-224"); + ret = min(ret, tcrypt_test("sha3-224")); break; case 49: - ret += tcrypt_test("sha3-256"); + ret = min(ret, tcrypt_test("sha3-256")); break; case 50: - ret += tcrypt_test("sha3-384"); + ret = min(ret, tcrypt_test("sha3-384")); break; case 51: - ret += tcrypt_test("sha3-512"); + ret = min(ret, tcrypt_test("sha3-512")); break; case 52: - ret += tcrypt_test("sm3"); + ret = min(ret, tcrypt_test("sm3")); break; case 53: - ret += tcrypt_test("streebog256"); + ret = min(ret, tcrypt_test("streebog256")); break; case 54: - ret += tcrypt_test("streebog512"); + ret = min(ret, tcrypt_test("streebog512")); break; case 55: - ret += tcrypt_test("gcm(sm4)"); + ret = min(ret, tcrypt_test("gcm(sm4)")); break; case 56: - ret += tcrypt_test("ccm(sm4)"); + ret = min(ret, tcrypt_test("ccm(sm4)")); break; case 57: - ret += tcrypt_test("polyval"); + ret = min(ret, tcrypt_test("polyval")); break; case 58: - ret += tcrypt_test("gcm(aria)"); + ret = min(ret, tcrypt_test("gcm(aria)")); break; case 100: - ret += tcrypt_test("hmac(md5)"); + ret = min(ret, tcrypt_test("hmac(md5)")); break; case 101: - ret += tcrypt_test("hmac(sha1)"); + ret = min(ret, tcrypt_test("hmac(sha1)")); break; case 102: - ret += tcrypt_test("hmac(sha256)"); + ret = min(ret, tcrypt_test("hmac(sha256)")); break; case 103: - ret += tcrypt_test("hmac(sha384)"); + ret = min(ret, tcrypt_test("hmac(sha384)")); break; case 104: - ret += tcrypt_test("hmac(sha512)"); + ret = min(ret, tcrypt_test("hmac(sha512)")); break; case 105: - ret += tcrypt_test("hmac(sha224)"); + ret = min(ret, tcrypt_test("hmac(sha224)")); break; case 106: - ret += tcrypt_test("xcbc(aes)"); + ret = min(ret, tcrypt_test("xcbc(aes)")); break; case 108: - ret += tcrypt_test("hmac(rmd160)"); + ret = min(ret, tcrypt_test("hmac(rmd160)")); break; case 109: - ret += tcrypt_test("vmac64(aes)"); + ret = min(ret, tcrypt_test("vmac64(aes)")); break; case 111: - ret += tcrypt_test("hmac(sha3-224)"); + ret = min(ret, tcrypt_test("hmac(sha3-224)")); break; case 112: - ret += tcrypt_test("hmac(sha3-256)"); + ret = min(ret, tcrypt_test("hmac(sha3-256)")); break; case 113: - ret += tcrypt_test("hmac(sha3-384)"); + ret = min(ret, tcrypt_test("hmac(sha3-384)")); break; case 114: - ret += tcrypt_test("hmac(sha3-512)"); + ret = min(ret, tcrypt_test("hmac(sha3-512)")); break; case 115: - ret += tcrypt_test("hmac(streebog256)"); + ret = min(ret, tcrypt_test("hmac(streebog256)")); break; case 116: - ret += tcrypt_test("hmac(streebog512)"); + ret = min(ret, tcrypt_test("hmac(streebog512)")); break; case 150: - ret += tcrypt_test("ansi_cprng"); + ret = min(ret, tcrypt_test("ansi_cprng")); break; case 151: - ret += tcrypt_test("rfc4106(gcm(aes))"); + ret = min(ret, tcrypt_test("rfc4106(gcm(aes))")); break; case 152: - ret += tcrypt_test("rfc4543(gcm(aes))"); + ret = min(ret, tcrypt_test("rfc4543(gcm(aes))")); break; case 153: - ret += tcrypt_test("cmac(aes)"); + ret = min(ret, tcrypt_test("cmac(aes)")); break; case 154: - ret += tcrypt_test("cmac(des3_ede)"); + ret = min(ret, tcrypt_test("cmac(des3_ede)")); break; case 155: - ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(aes))")); break; case 156: - ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"); + ret = min(ret, tcrypt_test("authenc(hmac(md5),ecb(cipher_null))")); break; case 157: - ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))")); break; case 158: - ret += tcrypt_test("cbcmac(sm4)"); + ret = min(ret, tcrypt_test("cbcmac(sm4)")); break; case 159: - ret += tcrypt_test("cmac(sm4)"); + ret = min(ret, tcrypt_test("cmac(sm4)")); break; case 181: - ret += tcrypt_test("authenc(hmac(sha1),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))")); break; case 182: - ret += tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))")); break; case 183: - ret += tcrypt_test("authenc(hmac(sha224),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des))")); break; case 184: - ret += tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))")); break; case 185: - ret += tcrypt_test("authenc(hmac(sha256),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des))")); break; case 186: - ret += tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))")); break; case 187: - ret += tcrypt_test("authenc(hmac(sha384),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des))")); break; case 188: - ret += tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))")); break; case 189: - ret += tcrypt_test("authenc(hmac(sha512),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des))")); break; case 190: - ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))")); break; case 191: - ret += tcrypt_test("ecb(sm4)"); - ret += tcrypt_test("cbc(sm4)"); - ret += tcrypt_test("cfb(sm4)"); - ret += tcrypt_test("ctr(sm4)"); + ret = min(ret, tcrypt_test("ecb(sm4)")); + ret = min(ret, tcrypt_test("cbc(sm4)")); + ret = min(ret, tcrypt_test("cfb(sm4)")); + ret = min(ret, tcrypt_test("ctr(sm4)")); break; case 192: - ret += tcrypt_test("ecb(aria)"); - ret += tcrypt_test("cbc(aria)"); - ret += tcrypt_test("cfb(aria)"); - ret += tcrypt_test("ctr(aria)"); + ret = min(ret, tcrypt_test("ecb(aria)")); + ret = min(ret, tcrypt_test("cbc(aria)")); + ret = min(ret, tcrypt_test("cfb(aria)")); + ret = min(ret, tcrypt_test("ctr(aria)")); break; case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, From 76a4e874593543a2dff91d249c95bac728df2774 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 6 Oct 2022 04:34:19 +0000 Subject: [PATCH 0286/4122] crypto: n2 - add missing hash statesize Add missing statesize to hash templates. This is mandatory otherwise no algorithms can be registered as the core requires statesize to be set. CC: stable@kernel.org # 4.3+ Reported-by: Rolf Eike Beer Tested-by: Rolf Eike Beer Fixes: 0a625fd2abaa ("crypto: n2 - Add Niagara2 crypto driver") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/n2_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 31e24df18877..20d0dcd50344 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1229,6 +1229,7 @@ struct n2_hash_tmpl { const u8 *hash_init; u8 hw_op_hashsz; u8 digest_size; + u8 statesize; u8 block_size; u8 auth_type; u8 hmac_type; @@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_MD5, .hw_op_hashsz = MD5_DIGEST_SIZE, .digest_size = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), .block_size = MD5_HMAC_BLOCK_SIZE }, { .name = "sha1", .hash_zero = sha1_zero_message_hash, @@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA1, .hw_op_hashsz = SHA1_DIGEST_SIZE, .digest_size = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), .block_size = SHA1_BLOCK_SIZE }, { .name = "sha256", .hash_zero = sha256_zero_message_hash, @@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA256, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA256_BLOCK_SIZE }, { .name = "sha224", .hash_zero = sha224_zero_message_hash, @@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_RESERVED, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA224_BLOCK_SIZE }, }; #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls) @@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl) halg = &ahash->halg; halg->digestsize = tmpl->digest_size; + halg->statesize = tmpl->statesize; base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); From f1da27b7c4191f78ed81d3dabf64c769f896296c Mon Sep 17 00:00:00 2001 From: "Mingming.Su" Date: Sat, 8 Oct 2022 18:45:53 +0200 Subject: [PATCH 0287/4122] hwrng: mtk - add mt7986 support 1. Add trng compatible name for MT7986 2. Fix mtk_rng_wait_ready() function Signed-off-by: Mingming.Su Signed-off-by: Frank Wunderlich Signed-off-by: Herbert Xu --- drivers/char/hw_random/mtk-rng.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index 6c00ea008555..aa993753ab12 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -22,7 +22,7 @@ #define RNG_AUTOSUSPEND_TIMEOUT 100 #define USEC_POLL 2 -#define TIMEOUT_POLL 20 +#define TIMEOUT_POLL 60 #define RNG_CTRL 0x00 #define RNG_EN BIT(0) @@ -77,7 +77,7 @@ static bool mtk_rng_wait_ready(struct hwrng *rng, bool wait) readl_poll_timeout_atomic(priv->base + RNG_CTRL, ready, ready & RNG_READY, USEC_POLL, TIMEOUT_POLL); - return !!ready; + return !!(ready & RNG_READY); } static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -179,6 +179,7 @@ static const struct dev_pm_ops mtk_rng_pm_ops = { #endif /* CONFIG_PM */ static const struct of_device_id mtk_rng_match[] = { + { .compatible = "mediatek,mt7986-rng" }, { .compatible = "mediatek,mt7623-rng" }, {}, }; From 854e25a6d653b76007c142b7edbaba81a8789a7f Mon Sep 17 00:00:00 2001 From: jianchunfu Date: Sun, 9 Oct 2022 17:52:54 +0800 Subject: [PATCH 0288/4122] crypto: talitos - Use the defined variable to clean code Use the defined variable "dev" to make the code cleaner. Signed-off-by: jianchunfu Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c9ad6c213090..71db6450b6aa 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1999,7 +1999,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) /* Buffer up to one whole block */ nents = sg_nents_for_len(areq->src, nbytes); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_copy_to_buffer(areq->src, nents, @@ -2040,7 +2040,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) offset = nbytes_to_hash - req_ctx->nbuf; nents = sg_nents_for_len(areq->src, offset); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_copy_to_buffer(areq->src, nents, @@ -2054,7 +2054,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (to_hash_later) { nents = sg_nents_for_len(areq->src, nbytes); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_pcopy_to_buffer(areq->src, nents, From 7e11a4fc84dcc9746936c46d9a88489a365fea45 Mon Sep 17 00:00:00 2001 From: Tomas Marek Date: Wed, 12 Oct 2022 18:09:23 +0200 Subject: [PATCH 0289/4122] hwrng: stm32 - fix number of returned bytes on read The stm32_rng_read() function uses `retval` variable as a counter of generated random bytes. However, the same variable is used to store a result of the polling function in case the driver is waiting until the TRNG is ready. The TRNG generates random numbers by 16B. One loop read 4B. So, the function calls the polling every 16B, i.e. every 4th loop. The `retval` counter is reset on poll call and only number of bytes read after the last poll call is returned to the caller. The remaining sampled random bytes (for example 48 out of 64 in case 64 bytes are read) are not used. Use different variable to store the polling function result and do not overwrite `retval` counter. Cc: Oleg Karfich Signed-off-by: Tomas Marek Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index bc22178f83e8..8eaacefd498b 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -49,11 +49,13 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) /* Manage timeout which is based on timer and take */ /* care of initial delay time when enabling rng */ if (!sr && wait) { - retval = readl_relaxed_poll_timeout_atomic(priv->base + int ret; + + ret = readl_relaxed_poll_timeout_atomic(priv->base + RNG_SR, sr, sr, 10, 50000); - if (retval) + if (ret) dev_err((struct device *)priv->rng.priv, "%s: timeout %x!\n", __func__, sr); } From e64f57e8cd5abe167cdf453869d6274608480519 Mon Sep 17 00:00:00 2001 From: Tomas Marek Date: Wed, 12 Oct 2022 18:09:24 +0200 Subject: [PATCH 0290/4122] hwrng: stm32 - fix read of the last word The stm32_rng_read() function samples TRNG by 4 bytes until at least 5 bytes are free in the input buffer. The last four bytes are never read. For example, 60 bytes are returned in case the input buffer size is 64 bytes. Read until at least 4 bytes are free in the input buffer. Fill the buffer entirely in case the buffer size is divisible by 4. Cc: Oleg Karfich Signed-off-by: Tomas Marek Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 8eaacefd498b..366edda4848b 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -44,7 +44,7 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) pm_runtime_get_sync((struct device *) priv->rng.priv); - while (max > sizeof(u32)) { + while (max >= sizeof(u32)) { sr = readl_relaxed(priv->base + RNG_SR); /* Manage timeout which is based on timer and take */ /* care of initial delay time when enabling rng */ From 38bcb51f81af17a6d40fc135e565fc1fb8aa8e9d Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 17 Oct 2022 16:10:53 +0800 Subject: [PATCH 0291/4122] leds-pca955x: Remove the unused function pca95xx_num_led_regs() The function pca95xx_num_led_regs() is defined in the leds-pca955x.c file, but not called elsewhere, so delete this unused function. drivers/leds/leds-pca955x.c:149:19: warning: unused function 'pca95xx_num_led_regs'. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2411 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Pavel Machek --- drivers/leds/leds-pca955x.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 81aaf21212d7..33ec4543fb4f 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -145,12 +145,6 @@ static inline int pca95xx_num_input_regs(int bits) return (bits + 7) / 8; } -/* 4 bits per LED selector register */ -static inline int pca95xx_num_led_regs(int bits) -{ - return (bits + 3) / 4; -} - /* * Return an LED selector register value based on an existing one, with * the appropriate 2-bit state value set for the given LED number (0-3). From fb0f4051ee8e0ae89697e417f1a547e715acc824 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Oct 2022 22:07:31 +0100 Subject: [PATCH 0292/4122] leds: lp55xx: remove variable j The variable j being incremented but it is never referenced, it is redundant and can be removed. Signed-off-by: Colin Ian King Signed-off-by: Pavel Machek --- drivers/leds/leds-lp55xx-common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index 9fdfc1b9a1a0..ca2e28fb843f 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -166,7 +166,7 @@ static int lp55xx_init_led(struct lp55xx_led *led, struct mc_subled *mc_led_info; struct led_classdev *led_cdev; char name[32]; - int i, j = 0; + int i; int ret; if (chan >= max_channel) { @@ -201,7 +201,6 @@ static int lp55xx_init_led(struct lp55xx_led *led, pdata->led_config[chan].color_id[i]; mc_led_info[i].channel = pdata->led_config[chan].output_num[i]; - j++; } led->mc_cdev.subled_info = mc_led_info; From 621a323c3a7e23b364deaddf769e731f2da6ff03 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 18 Oct 2022 16:12:35 +0800 Subject: [PATCH 0293/4122] scsi: target: Remove the unused function transport_lba_64_ext() The function transport_lba_64_ext() is defined in the target_core_sbc.c file, but not called elsewhere, so remove this unused function. drivers/target/target_core_sbc.c:276:34: warning: unused function 'transport_lba_64_ext'. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2427 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Link: https://lore.kernel.org/r/20221018081235.124662-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_sbc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 1e3216de1e04..1cd41e3834bb 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -270,14 +270,6 @@ static inline unsigned long long transport_lba_64(unsigned char *cdb) return get_unaligned_be64(&cdb[2]); } -/* - * For VARIABLE_LENGTH_CDB w/ 32 byte extended CDBs - */ -static inline unsigned long long transport_lba_64_ext(unsigned char *cdb) -{ - return get_unaligned_be64(&cdb[12]); -} - static sense_reason_t sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops) { From a9ee3f840646e2ec419c734e592ffe997195435e Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:15:57 +0800 Subject: [PATCH 0294/4122] scsi: libsas: Add sas_task_find_rq() blk-mq already provides a unique tag per request. Some libsas LLDDs - like hisi_sas - already use this tag as the unique per-I/O HW tag. Add a common function to provide the request associated with a sas_task for all libsas LLDDs. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-2-git-send-email-john.garry@huawei.com Reviewed-by: Jack Wang Reviewed-by: Jason Yan Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index ec6c9ecd8d12..1aee3d0ebbb2 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -644,6 +644,24 @@ static inline bool sas_is_internal_abort(struct sas_task *task) return task->task_proto == SAS_PROTOCOL_INTERNAL_ABORT; } +static inline struct request *sas_task_find_rq(struct sas_task *task) +{ + struct scsi_cmnd *scmd; + + if (task->task_proto & SAS_PROTOCOL_STP_ALL) { + struct ata_queued_cmd *qc = task->uldd_task; + + scmd = qc ? qc->scsicmd : NULL; + } else { + scmd = task->uldd_task; + } + + if (!scmd) + return NULL; + + return scsi_cmd_to_rq(scmd); +} + struct sas_domain_function_template { /* The class calls these to notify the LLDD of an event. */ void (*lldd_port_formed)(struct asd_sas_phy *); From 295fd2330a91f295522ad2b7fe2109833ae32e33 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:15:58 +0800 Subject: [PATCH 0295/4122] scsi: hisi_sas: Use sas_task_find_rq() Use sas_task_find_rq() to lookup the request per task for its driver tag. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-3-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 10813836a728..26e474b0f53f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -177,13 +177,13 @@ static void hisi_sas_slot_index_set(struct hisi_hba *hisi_hba, int slot_idx) } static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba, - struct scsi_cmnd *scsi_cmnd) + struct request *rq) { int index; void *bitmap = hisi_hba->slot_index_tags; - if (scsi_cmnd) - return scsi_cmd_to_rq(scsi_cmnd)->tag; + if (rq) + return rq->tag; spin_lock(&hisi_hba->lock); index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count, @@ -461,11 +461,11 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags) struct asd_sas_port *sas_port = device->port; struct hisi_sas_device *sas_dev = device->lldd_dev; bool internal_abort = sas_is_internal_abort(task); - struct scsi_cmnd *scmd = NULL; struct hisi_sas_dq *dq = NULL; struct hisi_sas_port *port; struct hisi_hba *hisi_hba; struct hisi_sas_slot *slot; + struct request *rq = NULL; struct device *dev; int rc; @@ -520,22 +520,12 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags) return -ECOMM; } - if (task->uldd_task) { - struct ata_queued_cmd *qc; - - if (dev_is_sata(device)) { - qc = task->uldd_task; - scmd = qc->scsicmd; - } else { - scmd = task->uldd_task; - } - } - - if (scmd) { + rq = sas_task_find_rq(task); + if (rq) { unsigned int dq_index; u32 blk_tag; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + blk_tag = blk_mq_unique_tag(rq); dq_index = blk_mq_unique_tag_to_hwq(blk_tag); dq = &hisi_hba->dq[dq_index]; } else { @@ -580,7 +570,7 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags) if (!internal_abort && hisi_hba->hw->slot_index_alloc) rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device); else - rc = hisi_sas_slot_index_alloc(hisi_hba, scmd); + rc = hisi_sas_slot_index_alloc(hisi_hba, rq); if (rc < 0) goto err_out_dif_dma_unmap; From f7d190a94e35a2784af8871e275b86e68ff8034a Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:15:59 +0800 Subject: [PATCH 0296/4122] scsi: hisi_sas: Put reserved tags in lower region of tagset To be consistent with blk-mq, put the reserved tags in the lower region of the tagset. Eventually we hope to get rid of all this reserved tag management. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-4-git-send-email-john.garry@huawei.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 26e474b0f53f..54860d252466 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -183,16 +183,16 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba, void *bitmap = hisi_hba->slot_index_tags; if (rq) - return rq->tag; + return rq->tag + HISI_SAS_RESERVED_IPTT; spin_lock(&hisi_hba->lock); - index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count, + index = find_next_zero_bit(bitmap, HISI_SAS_RESERVED_IPTT, hisi_hba->last_slot_index + 1); - if (index >= hisi_hba->slot_index_count) { + if (index >= HISI_SAS_RESERVED_IPTT) { index = find_next_zero_bit(bitmap, - hisi_hba->slot_index_count, - HISI_SAS_UNRESERVED_IPTT); - if (index >= hisi_hba->slot_index_count) { + HISI_SAS_RESERVED_IPTT, + 0); + if (index >= HISI_SAS_RESERVED_IPTT) { spin_unlock(&hisi_hba->lock); return -SAS_QUEUE_FULL; } @@ -2216,7 +2216,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba) if (!hisi_hba->sata_breakpoint) goto err_out; - hisi_hba->last_slot_index = HISI_SAS_UNRESERVED_IPTT; + hisi_hba->last_slot_index = 0; hisi_hba->wq = create_singlethread_workqueue(dev_name(dev)); if (!hisi_hba->wq) { From 1baa70d36403aa572453eee9fdd4f637455ecaaf Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Tue, 18 Oct 2022 19:16:00 +0800 Subject: [PATCH 0297/4122] scsi: pm8001: Remove pm8001_tag_init() In commit 5a141315ed7c ("scsi: pm80xx: Increase the number of outstanding I/O supported to 1024") the pm8001_ha->tags allocation was moved into pm8001_init_ccb_tag(). This changed the execution order of allocation. pm8001_tag_init() used to be called after the pm8001_ha->tags allocation and now it is called before the allocation. Before: pm8001_pci_probe() `--> pm8001_pci_alloc() `--> pm8001_alloc() `--> pm8001_ha->tags = kzalloc(...) `--> pm8001_tag_init(pm8001_ha); // OK: tags are allocated After: pm8001_pci_probe() `--> pm8001_pci_alloc() | `--> pm8001_alloc() | `--> pm8001_tag_init(pm8001_ha); // NOK: tags are not allocated | `--> pm8001_init_ccb_tag() `--> pm8001_ha->tags = kzalloc(...) // today it is bitmap_zalloc() Since pm8001_ha->tags_num is zero when pm8001_tag_init() is called it does nothing. Tags memory is allocated with bitmap_zalloc() so there is no need to manually clear each bit with pm8001_tag_free(). Reviewed-by: Changyuan Lyu Signed-off-by: Igor Pylypiv Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-5-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Jack Wang Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 2 -- drivers/scsi/pm8001/pm8001_sas.c | 7 ------- drivers/scsi/pm8001/pm8001_sas.h | 1 - 3 files changed, 10 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 2ff2fac1e403..040a8280f23b 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -436,8 +436,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, atomic_set(&pm8001_ha->devices[i].running_req, 0); } pm8001_ha->flags = PM8001F_INIT_TIME; - /* Initialize tags */ - pm8001_tag_init(pm8001_ha); return 0; err_out_nodev: diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 51230b827149..c9fa3328f3fa 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -96,13 +96,6 @@ int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out) return 0; } -void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha) -{ - int i; - for (i = 0; i < pm8001_ha->tags_num; ++i) - pm8001_tag_free(pm8001_ha, i); -} - /** * pm8001_mem_alloc - allocate memory for pm8001. * @pdev: pci device. diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index 16a753d5e8a7..ecb98bc5a8d0 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -632,7 +632,6 @@ extern struct workqueue_struct *pm8001_wq; /******************** function prototype *********************/ int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out); -void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha); u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag); void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha, struct pm8001_ccb_info *ccb); From 6472cfb418a0ba783a469deeb6586fb2f133c268 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:16:01 +0800 Subject: [PATCH 0298/4122] scsi: pm8001: Use sas_task_find_rq() for tagging The request associated with a SCSI command coming from the block layer has a unique tag, so use that when possible for getting a CCB. Unfortunately we don't support reserved commands in the SCSI midlayer yet, so in the interim continue to manage those tags internally (along with tags for private commands). Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-6-git-send-email-john.garry@huawei.com Reviewed-by: Jack Wang Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 12 ++++-------- drivers/scsi/pm8001/pm8001_sas.c | 13 +++++++++---- drivers/scsi/pm8001/pm8001_sas.h | 11 ++++++++--- drivers/scsi/pm8001/pm80xx_hwi.c | 19 +++---------------- 4 files changed, 24 insertions(+), 31 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 040a8280f23b..a1df61205b20 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -196,7 +196,7 @@ static void pm8001_free(struct pm8001_hba_info *pm8001_ha) } PM8001_CHIP_DISP->chip_iounmap(pm8001_ha); flush_workqueue(pm8001_wq); - bitmap_free(pm8001_ha->tags); + bitmap_free(pm8001_ha->rsvd_tags); kfree(pm8001_ha); } @@ -1208,18 +1208,15 @@ static int pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha) struct Scsi_Host *shost = pm8001_ha->shost; struct device *dev = pm8001_ha->dev; u32 max_out_io, ccb_count; - u32 can_queue; int i; max_out_io = pm8001_ha->main_cfg_tbl.pm80xx_tbl.max_out_io; ccb_count = min_t(int, PM8001_MAX_CCB, max_out_io); - /* Update to the scsi host*/ - can_queue = ccb_count - PM8001_RESERVE_SLOT; - shost->can_queue = can_queue; + shost->can_queue = ccb_count - PM8001_RESERVE_SLOT; - pm8001_ha->tags = bitmap_zalloc(ccb_count, GFP_KERNEL); - if (!pm8001_ha->tags) + pm8001_ha->rsvd_tags = bitmap_zalloc(PM8001_RESERVE_SLOT, GFP_KERNEL); + if (!pm8001_ha->rsvd_tags) goto err_out; /* Memory region for ccb_info*/ @@ -1244,7 +1241,6 @@ static int pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha) pm8001_ha->ccb_info[i].task = NULL; pm8001_ha->ccb_info[i].ccb_tag = PM8001_INVALID_TAG; pm8001_ha->ccb_info[i].device = NULL; - ++pm8001_ha->tags_num; } return 0; diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index c9fa3328f3fa..2359e827c9e6 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -65,9 +65,12 @@ static int pm8001_find_tag(struct sas_task *task, u32 *tag) */ void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag) { - void *bitmap = pm8001_ha->tags; + void *bitmap = pm8001_ha->rsvd_tags; unsigned long flags; + if (tag >= PM8001_RESERVE_SLOT) + return; + spin_lock_irqsave(&pm8001_ha->bitmap_lock, flags); __clear_bit(tag, bitmap); spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags); @@ -80,18 +83,20 @@ void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag) */ int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out) { - void *bitmap = pm8001_ha->tags; + void *bitmap = pm8001_ha->rsvd_tags; unsigned long flags; unsigned int tag; spin_lock_irqsave(&pm8001_ha->bitmap_lock, flags); - tag = find_first_zero_bit(bitmap, pm8001_ha->tags_num); - if (tag >= pm8001_ha->tags_num) { + tag = find_first_zero_bit(bitmap, PM8001_RESERVE_SLOT); + if (tag >= PM8001_RESERVE_SLOT) { spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags); return -SAS_QUEUE_FULL; } __set_bit(tag, bitmap); spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags); + + /* reserved tags are in the lower region of the tagset */ *tag_out = tag; return 0; } diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index ecb98bc5a8d0..cf5f1b091959 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -510,8 +510,7 @@ struct pm8001_hba_info { u32 chip_id; const struct pm8001_chip_info *chip; struct completion *nvmd_completion; - int tags_num; - unsigned long *tags; + unsigned long *rsvd_tags; struct pm8001_phy phy[PM8001_MAX_PHYS]; struct pm8001_port port[PM8001_MAX_PHYS]; u32 id; @@ -736,9 +735,15 @@ pm8001_ccb_alloc(struct pm8001_hba_info *pm8001_ha, struct pm8001_device *dev, struct sas_task *task) { struct pm8001_ccb_info *ccb; + struct request *rq = NULL; u32 tag; - if (pm8001_tag_alloc(pm8001_ha, &tag)) { + if (task) + rq = sas_task_find_rq(task); + + if (rq) { + tag = rq->tag + PM8001_RESERVE_SLOT; + } else if (pm8001_tag_alloc(pm8001_ha, &tag)) { pm8001_dbg(pm8001_ha, FAIL, "Failed to allocate a tag\n"); return NULL; } diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 4484c498bcb6..bc71db442dd9 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4247,25 +4247,12 @@ static int check_enc_sat_cmd(struct sas_task *task) static u32 pm80xx_chip_get_q_index(struct sas_task *task) { - struct scsi_cmnd *scmd = NULL; - u32 blk_tag; + struct request *rq = sas_task_find_rq(task); - if (task->uldd_task) { - struct ata_queued_cmd *qc; - - if (dev_is_sata(task->dev)) { - qc = task->uldd_task; - scmd = qc->scsicmd; - } else { - scmd = task->uldd_task; - } - } - - if (!scmd) + if (!rq) return 0; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); - return blk_mq_unique_tag_to_hwq(blk_tag); + return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(rq)); } /** From ffc9f9bf3f14876d019f67ef17d41138802529a8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:16:02 +0800 Subject: [PATCH 0299/4122] scsi: mvsas: Delete mvs_tag_init() All mvs_tag_init() does is zero the tag bitmap, but this is already done with the kzalloc() call to alloc the tags, so delete this unneeded function. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-7-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_init.c | 2 -- drivers/scsi/mvsas/mv_sas.c | 7 ------- drivers/scsi/mvsas/mv_sas.h | 1 - 3 files changed, 10 deletions(-) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 2fde496fff5f..c85fb812ad43 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -286,8 +286,6 @@ static int mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost) } mvi->tags_num = slot_nr; - /* Initialize tags */ - mvs_tag_init(mvi); return 0; err_out: return 1; diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index bf7d4995b257..3aed5e3e0c8c 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -51,13 +51,6 @@ inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out) return 0; } -void mvs_tag_init(struct mvs_info *mvi) -{ - int i; - for (i = 0; i < mvi->tags_num; ++i) - mvs_tag_clear(mvi, i); -} - static struct mvs_info *mvs_find_dev_mvi(struct domain_device *dev) { unsigned long i = 0, j = 0, hi = 0; diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 509d8f32a04f..fe57665bdb50 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -428,7 +428,6 @@ void mvs_tag_clear(struct mvs_info *mvi, u32 tag); void mvs_tag_free(struct mvs_info *mvi, u32 tag); void mvs_tag_set(struct mvs_info *mvi, unsigned int tag); int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out); -void mvs_tag_init(struct mvs_info *mvi); void mvs_iounmap(void __iomem *regs); int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex); void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard); From 2acf97f199f9eba8321390325519e9b6bff60108 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 18 Oct 2022 19:16:03 +0800 Subject: [PATCH 0300/4122] scsi: mvsas: Use sas_task_find_rq() for tagging The request associated with a SCSI command coming from the block layer has a unique tag, so use that when possible for getting a slot. Unfortunately we don't support reserved commands in the SCSI midlayer yet. As such, SMP tasks - as an example - will not have a request associated, so in the interim continue to manage those tags for that type of sas_task internally. We reserve an arbitrary 4 tags for these internal tags. Indeed, we already decrement MVS_RSVD_SLOTS by 2 for the shost can_queue when flag MVF_FLAG_SOC is set. This change was made in commit 20b09c2992fe ("[SCSI] mvsas: add support for 94xx; layout change; bug fixes"), but what those 2 slots are used for is not obvious. Also make the tag management functions static, where possible. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666091763-11023-8-git-send-email-john.garry@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_defs.h | 1 + drivers/scsi/mvsas/mv_init.c | 9 +++++---- drivers/scsi/mvsas/mv_sas.c | 35 ++++++++++++++++++++++------------- drivers/scsi/mvsas/mv_sas.h | 7 +------ 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h index 7123a2efbf58..8ef174cd4d37 100644 --- a/drivers/scsi/mvsas/mv_defs.h +++ b/drivers/scsi/mvsas/mv_defs.h @@ -40,6 +40,7 @@ enum driver_configuration { MVS_ATA_CMD_SZ = 96, /* SATA command table buffer size */ MVS_OAF_SZ = 64, /* Open address frame buffer size */ MVS_QUEUE_SIZE = 64, /* Support Queue depth */ + MVS_RSVD_SLOTS = 4, MVS_SOC_CAN_QUEUE = MVS_SOC_SLOTS - 2, }; diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index c85fb812ad43..cfe84473a515 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -142,7 +142,7 @@ static void mvs_free(struct mvs_info *mvi) scsi_host_put(mvi->shost); list_for_each_entry(mwq, &mvi->wq_list, entry) cancel_delayed_work(&mwq->work_q); - kfree(mvi->tags); + kfree(mvi->rsvd_tags); kfree(mvi); } @@ -284,7 +284,6 @@ static int mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost) printk(KERN_DEBUG "failed to create dma pool %s.\n", pool_name); goto err_out; } - mvi->tags_num = slot_nr; return 0; err_out: @@ -367,8 +366,8 @@ static struct mvs_info *mvs_pci_alloc(struct pci_dev *pdev, mvi->sas = sha; mvi->shost = shost; - mvi->tags = kzalloc(MVS_CHIP_SLOT_SZ>>3, GFP_KERNEL); - if (!mvi->tags) + mvi->rsvd_tags = bitmap_zalloc(MVS_RSVD_SLOTS, GFP_KERNEL); + if (!mvi->rsvd_tags) goto err_out; if (MVS_CHIP_DISP->chip_ioremap(mvi)) @@ -469,6 +468,8 @@ static void mvs_post_sas_ha_init(struct Scsi_Host *shost, else can_queue = MVS_CHIP_SLOT_SZ; + can_queue -= MVS_RSVD_SLOTS; + shost->sg_tablesize = min_t(u16, SG_ALL, MVS_MAX_SG); shost->can_queue = can_queue; mvi->shost->cmd_per_lun = MVS_QUEUE_SIZE; diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 3aed5e3e0c8c..9978c424214c 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -20,31 +20,34 @@ static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag) return 0; } -void mvs_tag_clear(struct mvs_info *mvi, u32 tag) +static void mvs_tag_clear(struct mvs_info *mvi, u32 tag) { - void *bitmap = mvi->tags; + void *bitmap = mvi->rsvd_tags; clear_bit(tag, bitmap); } -void mvs_tag_free(struct mvs_info *mvi, u32 tag) +static void mvs_tag_free(struct mvs_info *mvi, u32 tag) { + if (tag >= MVS_RSVD_SLOTS) + return; + mvs_tag_clear(mvi, tag); } -void mvs_tag_set(struct mvs_info *mvi, unsigned int tag) +static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag) { - void *bitmap = mvi->tags; + void *bitmap = mvi->rsvd_tags; set_bit(tag, bitmap); } -inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out) +static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out) { unsigned int index, tag; - void *bitmap = mvi->tags; + void *bitmap = mvi->rsvd_tags; - index = find_first_zero_bit(bitmap, mvi->tags_num); + index = find_first_zero_bit(bitmap, MVS_RSVD_SLOTS); tag = index; - if (tag >= mvi->tags_num) + if (tag >= MVS_RSVD_SLOTS) return -SAS_QUEUE_FULL; mvs_tag_set(mvi, tag); *tag_out = tag; @@ -696,6 +699,7 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf struct mvs_task_exec_info tei; struct mvs_slot_info *slot; u32 tag = 0xdeadbeef, n_elem = 0; + struct request *rq; int rc = 0; if (!dev->port) { @@ -760,9 +764,14 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf n_elem = task->num_scatter; } - rc = mvs_tag_alloc(mvi, &tag); - if (rc) - goto err_out; + rq = sas_task_find_rq(task); + if (rq) { + tag = rq->tag + MVS_RSVD_SLOTS; + } else { + rc = mvs_tag_alloc(mvi, &tag); + if (rc) + goto err_out; + } slot = &mvi->slot_info[tag]; @@ -857,7 +866,7 @@ int mvs_queue_command(struct sas_task *task, gfp_t gfp_flags) static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc) { u32 slot_idx = rx_desc & RXQ_SLOT_MASK; - mvs_tag_clear(mvi, slot_idx); + mvs_tag_free(mvi, slot_idx); } static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task, diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index fe57665bdb50..68df771e2975 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -370,8 +370,7 @@ struct mvs_info { u32 chip_id; const struct mvs_chip_info *chip; - int tags_num; - unsigned long *tags; + unsigned long *rsvd_tags; /* further per-slot information */ struct mvs_phy phy[MVS_MAX_PHYS]; struct mvs_port port[MVS_MAX_PHYS]; @@ -424,10 +423,6 @@ struct mvs_task_exec_info { /******************** function prototype *********************/ void mvs_get_sas_addr(void *buf, u32 buflen); -void mvs_tag_clear(struct mvs_info *mvi, u32 tag); -void mvs_tag_free(struct mvs_info *mvi, u32 tag); -void mvs_tag_set(struct mvs_info *mvi, unsigned int tag); -int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out); void mvs_iounmap(void __iomem *regs); int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex); void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard); From 5f62639dc2b668d8fa3bd3d4a92cf6e51b7574c6 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Fri, 7 Oct 2022 16:07:51 -0700 Subject: [PATCH 0301/4122] scsi: pm80xx: Remove unused reset_in_progress flag logic The reset_in_progress flag was never set. Signed-off-by: Igor Pylypiv Link: https://lore.kernel.org/r/20221007230751.309363-1-ipylypiv@google.com Reviewed-by: Andrew Konecki Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.h | 1 - drivers/scsi/pm8001/pm80xx_hwi.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index cf5f1b091959..dc1f4d958e03 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -534,7 +534,6 @@ struct pm8001_hba_info { bool controller_fatal_error; const struct firmware *fw_image; struct isr_param irq_vector[PM8001_MAX_MSIX_VEC]; - u32 reset_in_progress; u32 non_fatal_count; u32 non_fatal_read_length; u32 max_q_num; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index bc71db442dd9..9584cadc4201 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3440,10 +3440,6 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) case HW_EVENT_PHY_DOWN: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_DOWN\n"); hw_event_phy_down(pm8001_ha, piomb); - if (pm8001_ha->reset_in_progress) { - pm8001_dbg(pm8001_ha, MSG, "Reset in progress\n"); - return 0; - } phy->phy_attached = 0; phy->phy_state = PHY_LINK_DISABLE; break; From e6f8a22ff4a14aeada44c8f78dfb7503f2ca318f Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sun, 9 Oct 2022 14:02:49 +0800 Subject: [PATCH 0302/4122] scsi: qedf: Remove set but unused variable 'page' The variable page is not used in the function, so delete it. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2348 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Link: https://lore.kernel.org/r/20221009060249.40178-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index e045c6e25090..35e16600fc63 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2951,7 +2951,6 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf) int i; struct scsi_bd *pbl; u64 *list; - dma_addr_t page; /* Alloc dma memory for BDQ buffers */ for (i = 0; i < QEDF_BDQ_SIZE; i++) { @@ -3012,11 +3011,9 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf) qedf->bdq_pbl_list_num_entries = qedf->bdq_pbl_mem_size / QEDF_PAGE_SIZE; list = (u64 *)qedf->bdq_pbl_list; - page = qedf->bdq_pbl_list_dma; for (i = 0; i < qedf->bdq_pbl_list_num_entries; i++) { *list = qedf->bdq_pbl_dma; list++; - page += QEDF_PAGE_SIZE; } return 0; From 4fc66e7b16adf054e8dc7a5cd189085b8f545091 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Oct 2022 09:43:19 -0700 Subject: [PATCH 0303/4122] scsi: lpfc: Set sli4_param's cmf option to zero when CMF is turned off Add missed clearing of phba->sli4_hba.pc_sli4_params.cmf when CMF is turned off. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221017164323.14536-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 99d06dc7ddf6..768294b9bc0b 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -8354,6 +8354,7 @@ no_cmf: phba->cgn_i = NULL; /* Ensure CGN Mode is off */ phba->cmf_active_mode = LPFC_CFG_OFF; + sli4_params->cmf = 0; return 0; } } From c44e50f4a0ec00c2298f31f91bc2c3e9bbd81c7e Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Oct 2022 09:43:20 -0700 Subject: [PATCH 0304/4122] scsi: lpfc: Fix hard lockup when reading the rx_monitor from debugfs During I/O and simultaneous cat of /sys/kernel/debug/lpfc/fnX/rx_monitor, a hard lockup similar to the call trace below may occur. The spin_lock_bh in lpfc_rx_monitor_report is not protecting from timer interrupts as expected, so change the strength of the spin lock to _irq. Kernel panic - not syncing: Hard LOCKUP CPU: 3 PID: 110402 Comm: cat Kdump: loaded exception RIP: native_queued_spin_lock_slowpath+91 [IRQ stack] native_queued_spin_lock_slowpath at ffffffffb814e30b _raw_spin_lock at ffffffffb89a667a lpfc_rx_monitor_record at ffffffffc0a73a36 [lpfc] lpfc_cmf_timer at ffffffffc0abbc67 [lpfc] __hrtimer_run_queues at ffffffffb8184250 hrtimer_interrupt at ffffffffb8184ab0 smp_apic_timer_interrupt at ffffffffb8a026ba apic_timer_interrupt at ffffffffb8a01c4f [End of IRQ stack] apic_timer_interrupt at ffffffffb8a01c4f lpfc_rx_monitor_report at ffffffffc0a73c80 [lpfc] lpfc_rx_monitor_read at ffffffffc0addde1 [lpfc] full_proxy_read at ffffffffb83e7fc3 vfs_read at ffffffffb833fe71 ksys_read at ffffffffb83402af do_syscall_64 at ffffffffb800430b entry_SYSCALL_64_after_hwframe at ffffffffb8a000ad Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221017164323.14536-2-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 768294b9bc0b..86ba45ac91c8 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -8150,10 +8150,10 @@ u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, "IO_cnt", "Info", "BWutil(ms)"); } - /* Needs to be _bh because record is called from timer interrupt + /* Needs to be _irq because record is called from timer interrupt * context */ - spin_lock_bh(ring_lock); + spin_lock_irq(ring_lock); while (*head_idx != *tail_idx) { entry = &ring[*head_idx]; @@ -8197,7 +8197,7 @@ u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, if (cnt >= max_read_entries) break; } - spin_unlock_bh(ring_lock); + spin_unlock_irq(ring_lock); return cnt; } From eaf660e4282ba11239704b2b89ae94feae2010e0 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Oct 2022 09:43:21 -0700 Subject: [PATCH 0305/4122] scsi: lpfc: Log when congestion management limits are in effect When bandwidth reduces from or recovers back to 100% due to congestion management, log the event. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221017164323.14536-3-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 86ba45ac91c8..d25afc9dde14 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1848,6 +1848,18 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, phba->cmf_link_byte_count); bwpcent = div64_u64(bw * 100 + slop, phba->cmf_link_byte_count); + + if (phba->cmf_max_bytes_per_interval < bw && + bwpcent > 95) + lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, + "6208 Congestion bandwidth " + "limits removed\n"); + else if ((phba->cmf_max_bytes_per_interval > bw) && + ((bwpcent + pcent) <= 100) && ((bwpcent + pcent) > 95)) + lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, + "6209 Congestion bandwidth " + "limits in effect\n"); + if (asig) { lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, "6237 BW Threshold %lld%% (%lld): " From 479b0917e4477f49df2e3be454aac3cfa5dec171 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Oct 2022 09:43:22 -0700 Subject: [PATCH 0306/4122] scsi: lpfc: Create a sysfs entry called lpfc_xcvr_data for transceiver info The DUMP_MEMORY mailbox command is implemented for page A0 and A2 to retrieve transceiver information from firmware. The mailbox command output is then formatted to print raw data values for userspace to parse via sysfs. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221017164323.14536-4-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 118 +++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_crtn.h | 3 + drivers/scsi/lpfc/lpfc_els.c | 128 ++++++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_hw4.h | 5 +- 4 files changed, 252 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index ef1481326fd7..030ad1d59cbd 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1877,6 +1877,122 @@ lpfc_set_trunking(struct lpfc_hba *phba, char *buff_out) return 0; } +static ssize_t +lpfc_xcvr_data_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; + struct lpfc_hba *phba = vport->phba; + int rc; + int len = 0; + struct lpfc_rdp_context *rdp_context; + u16 temperature; + u16 rx_power; + u16 tx_bias; + u16 tx_power; + u16 vcc; + char chbuf[128]; + u16 wavelength = 0; + struct sff_trasnceiver_codes_byte7 *trasn_code_byte7; + + /* Get transceiver information */ + rdp_context = kmalloc(sizeof(*rdp_context), GFP_KERNEL); + + rc = lpfc_get_sfp_info_wait(phba, rdp_context); + if (rc) { + len = scnprintf(buf, PAGE_SIZE - len, "SFP info NA:\n"); + goto out_free_rdp; + } + + strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_NAME], 16); + chbuf[16] = 0; + + len = scnprintf(buf, PAGE_SIZE - len, "VendorName:\t%s\n", chbuf); + len += scnprintf(buf + len, PAGE_SIZE - len, + "VendorOUI:\t%02x-%02x-%02x\n", + (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI], + (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI + 1], + (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI + 2]); + strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_PN], 16); + chbuf[16] = 0; + len += scnprintf(buf + len, PAGE_SIZE - len, "VendorPN:\t%s\n", chbuf); + strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_SN], 16); + chbuf[16] = 0; + len += scnprintf(buf + len, PAGE_SIZE - len, "VendorSN:\t%s\n", chbuf); + strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_REV], 4); + chbuf[4] = 0; + len += scnprintf(buf + len, PAGE_SIZE - len, "VendorRev:\t%s\n", chbuf); + strncpy(chbuf, &rdp_context->page_a0[SSF_DATE_CODE], 8); + chbuf[8] = 0; + len += scnprintf(buf + len, PAGE_SIZE - len, "DateCode:\t%s\n", chbuf); + len += scnprintf(buf + len, PAGE_SIZE - len, "Identifier:\t%xh\n", + (uint8_t)rdp_context->page_a0[SSF_IDENTIFIER]); + len += scnprintf(buf + len, PAGE_SIZE - len, "ExtIdentifier:\t%xh\n", + (uint8_t)rdp_context->page_a0[SSF_EXT_IDENTIFIER]); + len += scnprintf(buf + len, PAGE_SIZE - len, "Connector:\t%xh\n", + (uint8_t)rdp_context->page_a0[SSF_CONNECTOR]); + wavelength = (rdp_context->page_a0[SSF_WAVELENGTH_B1] << 8) | + rdp_context->page_a0[SSF_WAVELENGTH_B0]; + + len += scnprintf(buf + len, PAGE_SIZE - len, "Wavelength:\t%d nm\n", + wavelength); + trasn_code_byte7 = (struct sff_trasnceiver_codes_byte7 *) + &rdp_context->page_a0[SSF_TRANSCEIVER_CODE_B7]; + + len += scnprintf(buf + len, PAGE_SIZE - len, "Speeds: \t"); + if (*(uint8_t *)trasn_code_byte7 == 0) { + len += scnprintf(buf + len, PAGE_SIZE - len, + "Unknown\n"); + } else { + if (trasn_code_byte7->fc_sp_100MB) + len += scnprintf(buf + len, PAGE_SIZE - len, + "1 "); + if (trasn_code_byte7->fc_sp_200mb) + len += scnprintf(buf + len, PAGE_SIZE - len, + "2 "); + if (trasn_code_byte7->fc_sp_400MB) + len += scnprintf(buf + len, PAGE_SIZE - len, + "4 "); + if (trasn_code_byte7->fc_sp_800MB) + len += scnprintf(buf + len, PAGE_SIZE - len, + "8 "); + if (trasn_code_byte7->fc_sp_1600MB) + len += scnprintf(buf + len, PAGE_SIZE - len, + "16 "); + if (trasn_code_byte7->fc_sp_3200MB) + len += scnprintf(buf + len, PAGE_SIZE - len, + "32 "); + if (trasn_code_byte7->speed_chk_ecc) + len += scnprintf(buf + len, PAGE_SIZE - len, + "64 "); + len += scnprintf(buf + len, PAGE_SIZE - len, "GB\n"); + } + temperature = (rdp_context->page_a2[SFF_TEMPERATURE_B1] << 8 | + rdp_context->page_a2[SFF_TEMPERATURE_B0]); + vcc = (rdp_context->page_a2[SFF_VCC_B1] << 8 | + rdp_context->page_a2[SFF_VCC_B0]); + tx_power = (rdp_context->page_a2[SFF_TXPOWER_B1] << 8 | + rdp_context->page_a2[SFF_TXPOWER_B0]); + tx_bias = (rdp_context->page_a2[SFF_TX_BIAS_CURRENT_B1] << 8 | + rdp_context->page_a2[SFF_TX_BIAS_CURRENT_B0]); + rx_power = (rdp_context->page_a2[SFF_RXPOWER_B1] << 8 | + rdp_context->page_a2[SFF_RXPOWER_B0]); + + len += scnprintf(buf + len, PAGE_SIZE - len, + "Temperature:\tx%04x C\n", temperature); + len += scnprintf(buf + len, PAGE_SIZE - len, "Vcc:\t\tx%04x V\n", vcc); + len += scnprintf(buf + len, PAGE_SIZE - len, + "TxBiasCurrent:\tx%04x mA\n", tx_bias); + len += scnprintf(buf + len, PAGE_SIZE - len, "TxPower:\tx%04x mW\n", + tx_power); + len += scnprintf(buf + len, PAGE_SIZE - len, "RxPower:\tx%04x mW\n", + rx_power); +out_free_rdp: + kfree(rdp_context); + return len; +} + /** * lpfc_board_mode_show - Return the state of the board * @dev: class device that is converted into a Scsi_host. @@ -2810,6 +2926,7 @@ static DEVICE_ATTR_RO(lpfc_drvr_version); static DEVICE_ATTR_RO(lpfc_enable_fip); static DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR, lpfc_board_mode_show, lpfc_board_mode_store); +static DEVICE_ATTR_RO(lpfc_xcvr_data); static DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset); static DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL); static DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL); @@ -5906,6 +6023,7 @@ static struct attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_fcp_wait_abts_rsp.attr, &dev_attr_nport_evt_cnt.attr, &dev_attr_board_mode.attr, + &dev_attr_lpfc_xcvr_data.attr, &dev_attr_max_vpi.attr, &dev_attr_used_vpi.attr, &dev_attr_max_rpi.attr, diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index d2d207791056..8928f016d09e 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -687,3 +687,6 @@ int lpfc_issue_els_qfpa(struct lpfc_vport *vport); void lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp); + +int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, + struct lpfc_rdp_context *rdp_context); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 863b2125fed6..2b03210264bb 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7190,6 +7190,134 @@ rdp_fail: return 1; } +int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, + struct lpfc_rdp_context *rdp_context) +{ + LPFC_MBOXQ_t *mbox = NULL; + int rc; + struct lpfc_dmabuf *mp; + struct lpfc_dmabuf *mpsave; + void *virt; + MAILBOX_t *mb; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) { + lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_ELS, + "7205 failed to allocate mailbox memory"); + return 1; + } + + if (lpfc_sli4_dump_page_a0(phba, mbox)) + goto sfp_fail; + mp = mbox->ctx_buf; + mpsave = mp; + virt = mp->virt; + if (phba->sli_rev < LPFC_SLI_REV4) { + mb = &mbox->u.mb; + mb->un.varDmp.cv = 1; + mb->un.varDmp.co = 1; + mb->un.varWords[2] = 0; + mb->un.varWords[3] = DMP_SFF_PAGE_A0_SIZE / 4; + mb->un.varWords[4] = 0; + mb->un.varWords[5] = 0; + mb->un.varWords[6] = 0; + mb->un.varWords[7] = 0; + mb->un.varWords[8] = 0; + mb->un.varWords[9] = 0; + mb->un.varWords[10] = 0; + mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; + mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; + mbox->mbox_offset_word = 5; + mbox->ctx_buf = virt; + } else { + bf_set(lpfc_mbx_memory_dump_type3_length, + &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE); + mbox->u.mqe.un.mem_dump_type3.addr_lo = putPaddrLow(mp->phys); + mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); + } + mbox->vport = phba->pport; + mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + + rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); + if (rc == MBX_NOT_FINISHED) { + rc = 1; + goto error; + } + + if (phba->sli_rev == LPFC_SLI_REV4) + mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); + else + mp = mpsave; + + if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) { + rc = 1; + goto error; + } + + lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a0, + DMP_SFF_PAGE_A0_SIZE); + + memset(mbox, 0, sizeof(*mbox)); + memset(mp->virt, 0, DMP_SFF_PAGE_A2_SIZE); + INIT_LIST_HEAD(&mp->list); + + /* save address for completion */ + mbox->ctx_buf = mp; + mbox->vport = phba->pport; + + bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY); + bf_set(lpfc_mbx_memory_dump_type3_type, + &mbox->u.mqe.un.mem_dump_type3, DMP_LMSD); + bf_set(lpfc_mbx_memory_dump_type3_link, + &mbox->u.mqe.un.mem_dump_type3, phba->sli4_hba.physical_port); + bf_set(lpfc_mbx_memory_dump_type3_page_no, + &mbox->u.mqe.un.mem_dump_type3, DMP_PAGE_A2); + if (phba->sli_rev < LPFC_SLI_REV4) { + mb = &mbox->u.mb; + mb->un.varDmp.cv = 1; + mb->un.varDmp.co = 1; + mb->un.varWords[2] = 0; + mb->un.varWords[3] = DMP_SFF_PAGE_A2_SIZE / 4; + mb->un.varWords[4] = 0; + mb->un.varWords[5] = 0; + mb->un.varWords[6] = 0; + mb->un.varWords[7] = 0; + mb->un.varWords[8] = 0; + mb->un.varWords[9] = 0; + mb->un.varWords[10] = 0; + mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; + mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; + mbox->mbox_offset_word = 5; + mbox->ctx_buf = virt; + } else { + bf_set(lpfc_mbx_memory_dump_type3_length, + &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE); + mbox->u.mqe.un.mem_dump_type3.addr_lo = putPaddrLow(mp->phys); + mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); + } + + mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); + if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) { + rc = 1; + goto error; + } + rc = 0; + + lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a2, + DMP_SFF_PAGE_A2_SIZE); + +error: + mbox->ctx_buf = mpsave; + lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED); + + return rc; + +sfp_fail: + mempool_free(mbox, phba->mbox_mem_pool); + return 1; +} + /* * lpfc_els_rcv_rdp - Process an unsolicited RDP ELS. * @vport: pointer to a host virtual N_Port data structure. diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 5288fc69908a..fb3504dbb899 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -3162,7 +3162,8 @@ struct lpfc_mbx_memory_dump_type3 { #define SFF_LENGTH_COPPER 18 #define SSF_LENGTH_50UM_OM3 19 #define SSF_VENDOR_NAME 20 -#define SSF_VENDOR_OUI 36 +#define SSF_TRANSCEIVER2 36 +#define SSF_VENDOR_OUI 37 #define SSF_VENDOR_PN 40 #define SSF_VENDOR_REV 56 #define SSF_WAVELENGTH_B1 60 @@ -3281,7 +3282,7 @@ struct sff_trasnceiver_codes_byte6 { struct sff_trasnceiver_codes_byte7 { uint8_t fc_sp_100MB:1; /* 100 MB/sec */ - uint8_t reserve:1; + uint8_t speed_chk_ecc:1; uint8_t fc_sp_200mb:1; /* 200 MB/sec */ uint8_t fc_sp_3200MB:1; /* 3200 MB/sec */ uint8_t fc_sp_400MB:1; /* 400 MB/sec */ From 24b3e45ca9c53185baec34488efcb75bbe162f7a Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Oct 2022 09:43:23 -0700 Subject: [PATCH 0307/4122] scsi: lpfc: Update lpfc version to 14.2.0.8 Update lpfc version to 14.2.0.8 Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221017164323.14536-5-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 192d5630a44d..378eba7b09d9 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.2.0.7" +#define LPFC_DRIVER_VERSION "14.2.0.8" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 978b7922d3dca672b41bb4b8ce6c06ab77112741 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:49 -0700 Subject: [PATCH 0308/4122] scsi: core: Fix a race between scsi_done() and scsi_timeout() If there is a race between scsi_done() and scsi_timeout() and if scsi_timeout() loses the race, scsi_timeout() should not reset the request timer. Hence change the return value for this case from BLK_EH_RESET_TIMER into BLK_EH_DONE. Although the block layer holds a reference on a request (req->ref) while calling a timeout handler, restarting the timer (blk_add_timer()) while a request is being completed is racy. Reviewed-by: Mike Christie Cc: Keith Busch Cc: Christoph Hellwig Cc: Ming Lei Cc: John Garry Cc: Hannes Reinecke Reported-by: Adrian Hunter Fixes: 15f73f5b3e59 ("blk-mq: move failure injection out of blk_mq_complete_request") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6995c8979230..02520f912306 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -343,19 +343,11 @@ enum blk_eh_timer_return scsi_timeout(struct request *req) if (rtn == BLK_EH_DONE) { /* - * Set the command to complete first in order to prevent a real - * completion from releasing the command while error handling - * is using it. If the command was already completed, then the - * lower level driver beat the timeout handler, and it is safe - * to return without escalating error recovery. - * - * If timeout handling lost the race to a real completion, the - * block layer may ignore that due to a fake timeout injection, - * so return RESET_TIMER to allow error handling another shot - * at this command. + * If scsi_done() has already set SCMD_STATE_COMPLETE, do not + * modify *scmd. */ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) - return BLK_EH_RESET_TIMER; + return BLK_EH_DONE; if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); From dee7121e8c0a3ce41af2b02d516f54eaec32abcd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:50 -0700 Subject: [PATCH 0309/4122] scsi: core: Change the return type of .eh_timed_out() Commit 6600593cbd93 ("block: rename BLK_EH_NOT_HANDLED to BLK_EH_DONE") made it impossible for .eh_timed_out() implementations to call scsi_done() without causing a crash. Restore support for SCSI timeout handlers to call scsi_done() as follows: * Change all .eh_timed_out() handlers as follows: - Change the return type into enum scsi_timeout_action. - Change BLK_EH_RESET_TIMER into SCSI_EH_RESET_TIMER. - Change BLK_EH_DONE into SCSI_EH_NOT_HANDLED. * In scsi_timeout(), convert the SCSI_EH_* values into BLK_EH_* values. Reviewed-by: Lee Duncan Cc: Christoph Hellwig Cc: Ming Lei Cc: John Garry Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-3-bvanassche@acm.org Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- Documentation/scsi/scsi_eh.rst | 7 +++-- drivers/message/fusion/mptsas.c | 8 +++--- drivers/scsi/libiscsi.c | 26 +++++++++--------- drivers/scsi/megaraid/megaraid_sas_base.c | 7 +++-- drivers/scsi/mvumi.c | 4 +-- drivers/scsi/qla4xxx/ql4_os.c | 8 +++--- drivers/scsi/scsi_error.c | 33 +++++++++++++---------- drivers/scsi/scsi_transport_fc.c | 7 +++-- drivers/scsi/scsi_transport_srp.c | 8 +++--- drivers/scsi/storvsc_drv.c | 4 +-- drivers/scsi/virtio_scsi.c | 4 +-- include/scsi/libiscsi.h | 2 +- include/scsi/scsi_host.h | 14 +++++++++- include/scsi/scsi_transport_fc.h | 2 +- include/scsi/scsi_transport_srp.h | 2 +- 15 files changed, 77 insertions(+), 59 deletions(-) diff --git a/Documentation/scsi/scsi_eh.rst b/Documentation/scsi/scsi_eh.rst index bad624fab823..104d09e9af09 100644 --- a/Documentation/scsi/scsi_eh.rst +++ b/Documentation/scsi/scsi_eh.rst @@ -92,14 +92,17 @@ The timeout handler is scsi_timeout(). When a timeout occurs, this function 1. invokes optional hostt->eh_timed_out() callback. Return value can be one of - - BLK_EH_RESET_TIMER + - SCSI_EH_RESET_TIMER This indicates that more time is required to finish the command. Timer is restarted. - - BLK_EH_DONE + - SCSI_EH_NOT_HANDLED eh_timed_out() callback did not handle the command. Step #2 is taken. + - SCSI_EH_DONE + eh_timed_out() completed the command. + 2. scsi_abort_command() is invoked to schedule an asynchronous abort which may issue a retry scmd->allowed + 1 times. Asynchronous aborts are not invoked for commands for which the SCSI_EH_ABORT_SCHEDULED flag is set (this diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 34901bcd1ce8..88fe4a860ae5 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1952,12 +1952,12 @@ mptsas_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt) * @sc: scsi command that the midlayer is about to time out * **/ -static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc) +static enum scsi_timeout_action mptsas_eh_timed_out(struct scsi_cmnd *sc) { MPT_SCSI_HOST *hd; MPT_ADAPTER *ioc; VirtDevice *vdevice; - enum blk_eh_timer_return rc = BLK_EH_DONE; + enum scsi_timeout_action rc = SCSI_EH_NOT_HANDLED; hd = shost_priv(sc->device->host); if (hd == NULL) { @@ -1980,7 +1980,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc) dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: ioc is in reset," "SML need to reset the timer (sc=%p)\n", ioc->name, __func__, sc)); - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; } vdevice = sc->device->hostdata; if (vdevice && vdevice->vtarget && (vdevice->vtarget->inDMD @@ -1988,7 +1988,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc) dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: target removed " "or in device removal delay (sc=%p)\n", ioc->name, __func__, sc)); - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d95f4bcdeb2e..ef2fc860257e 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2071,9 +2071,9 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) return 0; } -enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) +enum scsi_timeout_action iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) { - enum blk_eh_timer_return rc = BLK_EH_DONE; + enum scsi_timeout_action rc = SCSI_EH_NOT_HANDLED; struct iscsi_task *task = NULL, *running_task; struct iscsi_cls_session *cls_session; struct iscsi_session *session; @@ -2093,7 +2093,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Raced with completion. Blk layer has taken ownership * so let timeout code complete it now. */ - rc = BLK_EH_DONE; + rc = SCSI_EH_NOT_HANDLED; spin_unlock(&session->back_lock); goto done; } @@ -2102,7 +2102,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Racing with the completion path right now, so give it more * time so that path can complete it like normal. */ - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; task = NULL; spin_unlock(&session->back_lock); goto done; @@ -2120,21 +2120,21 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) if (unlikely(system_state != SYSTEM_RUNNING)) { sc->result = DID_NO_CONNECT << 16; ISCSI_DBG_EH(session, "sc on shutdown, handled\n"); - rc = BLK_EH_DONE; + rc = SCSI_EH_NOT_HANDLED; goto done; } /* * We are probably in the middle of iscsi recovery so let * that complete and handle the error. */ - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } conn = session->leadconn; if (!conn) { /* In the middle of shuting down */ - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } @@ -2151,7 +2151,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) "Last data xfer at %lu. Last timeout was at " "%lu\n.", task->last_xfer, task->last_timeout); task->have_checked_conn = false; - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } @@ -2162,7 +2162,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * and can let the iscsi eh handle it */ if (iscsi_has_ping_timed_out(conn)) { - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } @@ -2200,7 +2200,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) task->last_xfer, running_task->last_xfer, task->last_timeout); spin_unlock(&session->back_lock); - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } } @@ -2216,14 +2216,14 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) */ if (READ_ONCE(conn->ping_task)) { task->have_checked_conn = true; - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; goto done; } /* Make sure there is a transport check done */ iscsi_send_nopout(conn, NULL); task->have_checked_conn = true; - rc = BLK_EH_RESET_TIMER; + rc = SCSI_EH_RESET_TIMER; done: spin_unlock_bh(&session->frwd_lock); @@ -2232,7 +2232,7 @@ done: task->last_timeout = jiffies; iscsi_put_task(task); } - ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? + ISCSI_DBG_EH(session, "return %s\n", rc == SCSI_EH_RESET_TIMER ? "timer reset" : "shutdown or nh"); return rc; } diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 9be4ba61a076..6940043a91ae 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2927,15 +2927,14 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) * Sets the FW busy flag and reduces the host->can_queue if the * cmd has not been completed within the timeout period. */ -static enum -blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +static enum scsi_timeout_action megasas_reset_timer(struct scsi_cmnd *scmd) { struct megasas_instance *instance; unsigned long flags; if (time_after(jiffies, scmd->jiffies_at_alloc + (scmd_timeout * 2) * HZ)) { - return BLK_EH_DONE; + return SCSI_EH_NOT_HANDLED; } instance = (struct megasas_instance *)scmd->device->host->hostdata; @@ -2949,7 +2948,7 @@ blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) spin_unlock_irqrestore(instance->host->host_lock, flags); } - return BLK_EH_RESET_TIMER; + return SCSI_EH_RESET_TIMER; } /** diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index 05d3ce9b72db..b3dcb8918618 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -2109,7 +2109,7 @@ out_return_cmd: return 0; } -static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd) +static enum scsi_timeout_action mvumi_timed_out(struct scsi_cmnd *scmd) { struct mvumi_cmd *cmd = mvumi_priv(scmd)->cmd_priv; struct Scsi_Host *host = scmd->device->host; @@ -2137,7 +2137,7 @@ static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd) mvumi_return_cmd(mhba, cmd); spin_unlock_irqrestore(mhba->shost->host_lock, flags); - return BLK_EH_DONE; + return SCSI_EH_NOT_HANDLED; } static int diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 9e849f6b0d0f..005502125b27 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -116,7 +116,7 @@ static int qla4xxx_iface_set_param(struct Scsi_Host *shost, void *data, static int qla4xxx_get_iface_param(struct iscsi_iface *iface, enum iscsi_param_type param_type, int param, char *buf); -static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc); +static enum scsi_timeout_action qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc); static struct iscsi_endpoint *qla4xxx_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking); @@ -1871,17 +1871,17 @@ exit_get_stats: return; } -static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) +static enum scsi_timeout_action qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) { struct iscsi_cls_session *session; unsigned long flags; - enum blk_eh_timer_return ret = BLK_EH_DONE; + enum scsi_timeout_action ret = SCSI_EH_NOT_HANDLED; session = starget_to_session(scsi_target(sc->device)); spin_lock_irqsave(&session->lock, flags); if (session->state == ISCSI_SESSION_FAILED) - ret = BLK_EH_RESET_TIMER; + ret = SCSI_EH_RESET_TIMER; spin_unlock_irqrestore(&session->lock, flags); return ret; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 02520f912306..be2a70c5ac6d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -328,7 +328,6 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) enum blk_eh_timer_return scsi_timeout(struct request *req) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); - enum blk_eh_timer_return rtn = BLK_EH_DONE; struct Scsi_Host *host = scmd->device->host; trace_scsi_dispatch_cmd_timeout(scmd); @@ -338,23 +337,29 @@ enum blk_eh_timer_return scsi_timeout(struct request *req) if (host->eh_deadline != -1 && !host->last_reset) host->last_reset = jiffies; - if (host->hostt->eh_timed_out) - rtn = host->hostt->eh_timed_out(scmd); - - if (rtn == BLK_EH_DONE) { - /* - * If scsi_done() has already set SCMD_STATE_COMPLETE, do not - * modify *scmd. - */ - if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) + if (host->hostt->eh_timed_out) { + switch (host->hostt->eh_timed_out(scmd)) { + case SCSI_EH_DONE: return BLK_EH_DONE; - if (scsi_abort_command(scmd) != SUCCESS) { - set_host_byte(scmd, DID_TIME_OUT); - scsi_eh_scmd_add(scmd); + case SCSI_EH_RESET_TIMER: + return BLK_EH_RESET_TIMER; + case SCSI_EH_NOT_HANDLED: + break; } } - return rtn; + /* + * If scsi_done() has already set SCMD_STATE_COMPLETE, do not modify + * *scmd. + */ + if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) + return BLK_EH_DONE; + if (scsi_abort_command(scmd) != SUCCESS) { + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_scmd_add(scmd); + } + + return BLK_EH_DONE; } /** diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 8934160c4a33..0965f8a7134f 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -2530,15 +2530,14 @@ static int fc_vport_match(struct attribute_container *cont, * Notes: * This routine assumes no locks are held on entry. */ -enum blk_eh_timer_return -fc_eh_timed_out(struct scsi_cmnd *scmd) +enum scsi_timeout_action fc_eh_timed_out(struct scsi_cmnd *scmd) { struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); if (rport->port_state == FC_PORTSTATE_BLOCKED) - return BLK_EH_RESET_TIMER; + return SCSI_EH_RESET_TIMER; - return BLK_EH_DONE; + return SCSI_EH_NOT_HANDLED; } EXPORT_SYMBOL(fc_eh_timed_out); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 98a34ed10f1a..87d0fb8dc503 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -594,13 +594,13 @@ EXPORT_SYMBOL(srp_reconnect_rport); * @scmd: SCSI command. * * If a timeout occurs while an rport is in the blocked state, ask the SCSI - * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core - * handle the timeout (BLK_EH_DONE). + * EH to continue waiting (SCSI_EH_RESET_TIMER). Otherwise let the SCSI core + * handle the timeout (SCSI_EH_NOT_HANDLED). * * Note: This function is called from soft-IRQ context and with the request * queue lock held. */ -enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) +enum scsi_timeout_action srp_timed_out(struct scsi_cmnd *scmd) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; @@ -611,7 +611,7 @@ enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) return rport && rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? - BLK_EH_RESET_TIMER : BLK_EH_DONE; + SCSI_EH_RESET_TIMER : SCSI_EH_NOT_HANDLED; } EXPORT_SYMBOL(srp_timed_out); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..a84194d82347 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1652,13 +1652,13 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) * be unbounded on Azure. Reset the timer unconditionally to give the host a * chance to perform EH. */ -static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) +static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd) { #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) if (scmnd->device->host->transportt == fc_transport_template) return fc_eh_timed_out(scmnd); #endif - return BLK_EH_RESET_TIMER; + return SCSI_EH_RESET_TIMER; } static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 2a79ab16134b..d07d24c06b54 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -731,9 +731,9 @@ static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq) * latencies might be higher than on bare metal. Reset the timer * unconditionally to give the host a chance to perform EH. */ -static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd) +static enum scsi_timeout_action virtscsi_eh_timed_out(struct scsi_cmnd *scmnd) { - return BLK_EH_RESET_TIMER; + return SCSI_EH_RESET_TIMER; } static struct scsi_host_template virtscsi_host_template = { diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 654cc3918c94..695eebc6f2c8 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -393,7 +393,7 @@ extern int iscsi_eh_recover_target(struct scsi_cmnd *sc); extern int iscsi_eh_session_reset(struct scsi_cmnd *sc); extern int iscsi_eh_device_reset(struct scsi_cmnd *sc); extern int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc); -extern enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc); +extern enum scsi_timeout_action iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc); /* * iSCSI host helpers. diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index e71436183c0d..587cc767bb67 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -27,6 +27,18 @@ struct scsi_transport_template; #define MODE_INITIATOR 0x01 #define MODE_TARGET 0x02 +/** + * enum scsi_timeout_action - How to handle a command that timed out. + * @SCSI_EH_DONE: The command has already been completed. + * @SCSI_EH_RESET_TIMER: Reset the timer and continue waiting for completion. + * @SCSI_EH_NOT_HANDLED: The command has not yet finished. Abort the command. + */ +enum scsi_timeout_action { + SCSI_EH_DONE, + SCSI_EH_RESET_TIMER, + SCSI_EH_NOT_HANDLED, +}; + struct scsi_host_template { /* * Put fields referenced in IO submission path together in @@ -331,7 +343,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); + enum scsi_timeout_action (*eh_timed_out)(struct scsi_cmnd *); /* * Optional routine that allows the transport to decide if a cmd * is retryable. Return true if the transport is in a state the diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index e80a7c542c88..3dcda19d3520 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -862,7 +862,7 @@ struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel, int fc_vport_terminate(struct fc_vport *vport); int fc_block_rport(struct fc_rport *rport); int fc_block_scsi_eh(struct scsi_cmnd *cmnd); -enum blk_eh_timer_return fc_eh_timed_out(struct scsi_cmnd *scmd); +enum scsi_timeout_action fc_eh_timed_out(struct scsi_cmnd *scmd); bool fc_eh_should_retry_cmd(struct scsi_cmnd *scmd); static inline struct Scsi_Host *fc_bsg_to_shost(struct bsg_job *job) diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index d22df12584f9..dfc78aa112ad 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -118,7 +118,7 @@ extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); extern void srp_stop_rport_timers(struct srp_rport *rport); -enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd); +enum scsi_timeout_action srp_timed_out(struct scsi_cmnd *scmd); /** * srp_chkready() - evaluate the transport layer state before I/O From 310bcaef6d7ed1626bba95dd9b5c5acd189c0e35 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:51 -0700 Subject: [PATCH 0310/4122] scsi: core: Support failing requests while recovering The current behavior for SCSI commands submitted while error recovery is ongoing is to retry command submission after error recovery has finished. See also the scsi_host_in_recovery() check in scsi_host_queue_ready(). Add support for failing SCSI commands while host recovery is in progress. This functionality will be used to fix a deadlock in the UFS driver. Cc: Christoph Hellwig Cc: Ming Lei Cc: John Garry Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-4-bvanassche@acm.org Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 8 +++++--- include/scsi/scsi_cmnd.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fa96d3cfdfa3..ec890865abae 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1341,9 +1341,6 @@ static inline int scsi_host_queue_ready(struct request_queue *q, struct scsi_device *sdev, struct scsi_cmnd *cmd) { - if (scsi_host_in_recovery(shost)) - return 0; - if (atomic_read(&shost->host_blocked) > 0) { if (scsi_host_busy(shost) > 0) goto starved; @@ -1732,6 +1729,11 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) goto out_put_budget; + if (unlikely(scsi_host_in_recovery(shost))) { + if (cmd->flags & SCMD_FAIL_IF_RECOVERING) + ret = BLK_STS_OFFLINE; + goto out_dec_target_busy; + } if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 7d3622db38ed..c2cb5f69635c 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -52,8 +52,9 @@ struct scsi_pointer { #define SCMD_TAGGED (1 << 0) #define SCMD_INITIALIZED (1 << 1) #define SCMD_LAST (1 << 2) +#define SCMD_FAIL_IF_RECOVERING (1 << 4) /* flags preserved across unprep / reprep */ -#define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED) +#define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING) /* for scmd->state */ #define SCMD_STATE_COMPLETE 0 From 1626c7bba1c42499d6753bd919803158e5792f08 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:52 -0700 Subject: [PATCH 0311/4122] scsi: ufs: Remove an outdated comment Although the host lock had to be held by ufshcd_clk_scaling_start_busy() callers when that function was introduced, that is no longer the case today. Hence remove the comment that claims that callers of this function must hold the host lock. Reviewed-by: Bean Huo Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index c8f0fe740005..bdee494381ca 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2013,7 +2013,6 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba) destroy_workqueue(hba->clk_gating.clk_gating_workq); } -/* Must be called with host lock acquired */ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba) { bool queue_resume_work = false; From 836d322d73cb08486ecc50787695175a135e62ba Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:53 -0700 Subject: [PATCH 0312/4122] scsi: ufs: Use 'else' in ufshcd_set_dev_pwr_mode() Convert if (ret) { ... } if (!ret) { ... } into if (ret) { ... } else { ... }. Reviewed-by: Bean Huo Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-6-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index bdee494381ca..db1997e99da2 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8797,10 +8797,9 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, scsi_print_sense_hdr(sdp, NULL, &sshdr); ret = -EIO; } - } - - if (!ret) + } else { hba->curr_dev_pwr_mode = pwr_mode; + } scsi_device_put(sdp); hba->host->eh_noresume = 0; From dcd5b7637c6d442d957f73780a03047413ed3a10 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:54 -0700 Subject: [PATCH 0313/4122] scsi: ufs: Reduce the START STOP UNIT timeout Reduce the START STOP UNIT command timeout to one second since on Android devices a kernel panic is triggered if an attempt to suspend the system takes more than 20 seconds. One second should be enough for the START STOP UNIT command since this command completes in less than a millisecond for the UFS devices I have access to. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-7-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index db1997e99da2..f83a0045a129 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8746,8 +8746,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, struct scsi_device *sdp; unsigned long flags; int ret, retries; - unsigned long deadline; - int32_t remaining; spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->ufs_device_wlun; @@ -8775,14 +8773,9 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ - deadline = jiffies + 10 * HZ; for (retries = 3; retries > 0; --retries) { - ret = -ETIMEDOUT; - remaining = deadline - jiffies; - if (remaining <= 0) - break; ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - remaining / HZ, 0, 0, RQF_PM, NULL); + HZ, 0, 0, RQF_PM, NULL); if (!scsi_status_is_check_condition(ret) || !scsi_sense_valid(&sshdr) || sshdr.sense_key != UNIT_ATTENTION) From 579a4e9dbd53978cad8df88dc612837cdd210ce0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:55 -0700 Subject: [PATCH 0314/4122] scsi: ufs: Try harder to change the power mode Instead of only retrying the START STOP UNIT command if a unit attention is reported, repeat it if any SCSI error is reported by the device or if the command timed out. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-8-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f83a0045a129..84ca17d29898 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8776,9 +8776,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, for (retries = 3; retries > 0; --retries) { ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, HZ, 0, 0, RQF_PM, NULL); - if (!scsi_status_is_check_condition(ret) || - !scsi_sense_valid(&sshdr) || - sshdr.sense_key != UNIT_ATTENTION) + /* + * scsi_execute() only returns a negative value if the request + * queue is dying. + */ + if (ret <= 0) break; } if (ret) { From 1a547cbc6fdd07992f915a614a3f7ba3fccef8fb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:56 -0700 Subject: [PATCH 0315/4122] scsi: ufs: Track system suspend / resume activity Add a new boolean variable that tracks whether the system is suspending, suspended or resuming. This information will be used in a later commit to fix a deadlock between the SCSI error handler and the suspend code. Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-9-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 ++ include/ufs/ufshcd.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 84ca17d29898..2a32bcc93d2e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9247,6 +9247,7 @@ static int ufshcd_wl_suspend(struct device *dev) hba = shost_priv(sdev->host); down(&hba->host_sem); + hba->system_suspending = true; if (pm_runtime_suspended(dev)) goto out; @@ -9288,6 +9289,7 @@ out: hba->curr_dev_pwr_mode, hba->uic_link_state); if (!ret) hba->is_sys_suspended = false; + hba->system_suspending = false; up(&hba->host_sem); return ret; } diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9f28349ebcff..96538eb3a6c0 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -802,7 +802,9 @@ struct ufs_hba_monitor { * @caps: bitmask with information about UFS controller capabilities * @devfreq: frequency scaling information owned by the devfreq core * @clk_scaling: frequency scaling information owned by the UFS driver - * @is_sys_suspended: whether or not the entire system has been suspended + * @system_suspending: system suspend has been started and system resume has + * not yet finished. + * @is_sys_suspended: UFS device has been suspended because of system suspend * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. @@ -943,6 +945,7 @@ struct ufs_hba { struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; + bool system_suspending; bool is_sys_suspended; enum bkops_status urgent_bkops_lvl; From 6a354a7e740ee779d8595bb3c555d415433f2b19 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:57 -0700 Subject: [PATCH 0316/4122] scsi: ufs: Introduce the function ufshcd_execute_start_stop() Open-code scsi_execute() because a later patch will modify scmd->flags and because scsi_execute() does not support setting scmd->flags. No functionality is changed. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-10-bvanassche@acm.org Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2a32bcc93d2e..c5ccc7ba583b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8729,6 +8729,39 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) } } +static int ufshcd_execute_start_stop(struct scsi_device *sdev, + enum ufs_dev_pwr_mode pwr_mode, + struct scsi_sense_hdr *sshdr) +{ + unsigned char cdb[6] = { START_STOP, 0, 0, 0, pwr_mode << 4, 0 }; + struct request *req; + struct scsi_cmnd *scmd; + int ret; + + req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN, + BLK_MQ_REQ_PM); + if (IS_ERR(req)) + return PTR_ERR(req); + + scmd = blk_mq_rq_to_pdu(req); + scmd->cmd_len = COMMAND_SIZE(cdb[0]); + memcpy(scmd->cmnd, cdb, scmd->cmd_len); + scmd->allowed = 0/*retries*/; + req->timeout = 1 * HZ; + req->rq_flags |= RQF_PM | RQF_QUIET; + + blk_execute_rq(req, /*at_head=*/true); + + if (sshdr) + scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, + sshdr); + ret = scmd->result; + + blk_mq_free_request(req); + + return ret; +} + /** * ufshcd_set_dev_pwr_mode - sends START STOP UNIT command to set device * power mode @@ -8741,7 +8774,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) { - unsigned char cmd[6] = { START_STOP }; struct scsi_sense_hdr sshdr; struct scsi_device *sdp; unsigned long flags; @@ -8766,16 +8798,13 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, */ hba->host->eh_noresume = 1; - cmd[4] = pwr_mode << 4; - /* * Current function would be generally called from the power management * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ for (retries = 3; retries > 0; --retries) { - ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - HZ, 0, 0, RQF_PM, NULL); + ret = ufshcd_execute_start_stop(sdp, pwr_mode, &sshdr); /* * scsi_execute() only returns a negative value if the request * queue is dying. From 7029e2151a7c6a5c60b35996d026528e7d51aae3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 18 Oct 2022 13:29:58 -0700 Subject: [PATCH 0317/4122] scsi: ufs: Fix a deadlock between PM and the SCSI error handler The following deadlock has been observed on multiple test setups: * ufshcd_wl_suspend() is waiting for blk_execute_rq(START STOP UNIT) to complete while ufshcd_wl_suspend() holds host_sem. * The SCSI error handler is activated, changes the host state to SHOST_RECOVERY, ufshcd_eh_host_reset_handler() and ufshcd_err_handler() are called and the latter function tries to obtain host_sem. This is a deadlock because blk_execute_rq() can't execute SCSI commands while the host is in the SHOST_RECOVERY state and because the error handler cannot make progress because host_sem is held by another thread. Fix this deadlock as follows: * Fail attempts to suspend the system while the SCSI error handler is in progress by setting the SCMD_FAIL_IF_RECOVERING flag for START STOP UNIT commands. * If the system is suspending and a START STOP UNIT command times out, handle the SCSI command timeout from inside the context of the SCSI timeout handler instead of activating the SCSI error handler. The runtime power management code is not affected by this deadlock since hba->host_sem is not touched by the runtime power management functions in the UFS driver. Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221018202958.1902564-11-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index c5ccc7ba583b..b2203dd79e8c 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8292,6 +8292,28 @@ out: } } +static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) +{ + struct ufs_hba *hba = shost_priv(scmd->device->host); + + if (!hba->system_suspending) { + /* Activate the error handler in the SCSI core. */ + return SCSI_EH_NOT_HANDLED; + } + + /* + * If we get here we know that no TMFs are outstanding and also that + * the only pending command is a START STOP UNIT command. Handle the + * timeout of that command directly to prevent a deadlock between + * ufshcd_set_dev_pwr_mode() and ufshcd_err_handler(). + */ + ufshcd_link_recovery(hba); + dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n", + __func__, hba->outstanding_tasks); + + return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; +} + static const struct attribute_group *ufshcd_driver_groups[] = { &ufs_sysfs_unit_descriptor_group, &ufs_sysfs_lun_attributes_group, @@ -8326,6 +8348,7 @@ static struct scsi_host_template ufshcd_driver_template = { .eh_abort_handler = ufshcd_abort, .eh_device_reset_handler = ufshcd_eh_device_reset_handler, .eh_host_reset_handler = ufshcd_eh_host_reset_handler, + .eh_timed_out = ufshcd_eh_timed_out, .this_id = -1, .sg_tablesize = SG_ALL, .cmd_per_lun = UFSHCD_CMD_PER_LUN, @@ -8747,6 +8770,7 @@ static int ufshcd_execute_start_stop(struct scsi_device *sdev, scmd->cmd_len = COMMAND_SIZE(cdb[0]); memcpy(scmd->cmnd, cdb, scmd->cmd_len); scmd->allowed = 0/*retries*/; + scmd->flags |= SCMD_FAIL_IF_RECOVERING; req->timeout = 1 * HZ; req->rq_flags |= RQF_PM | RQF_QUIET; From b203c67ebe752c8f2a2babf5e58d244c82680922 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Sat, 22 Oct 2022 01:43:40 +0000 Subject: [PATCH 0318/4122] csky: add arch support current_stack_pointer To follow the existing per-arch conventions, using "current_stack_pointer" to set sp. This will let it be used in non-arch places(like HARDENED_USERCOPY). Refer to the implementation of riscv commit fdecfea09328 ("riscv: Rename "sp_in_global" to "current_stack_pointer""). Link: https://lore.kernel.org/lkml/20220224060411.1855683-1-keescook@chromium.org/ Signed-off-by: Tong Tiangen Signed-off-by: Guo Ren --- arch/csky/Kconfig | 1 + arch/csky/include/asm/processor.h | 2 ++ arch/csky/kernel/stacktrace.c | 6 ++---- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index adee6ab36862..2236b5c0c213 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -9,6 +9,7 @@ config CSKY select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 63ad71fab30d..ea75d72dea86 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -84,4 +84,6 @@ unsigned long __get_wchan(struct task_struct *p); #define cpu_relax() barrier() +register unsigned long current_stack_pointer __asm__("sp"); + #endif /* __ASM_CSKY_PROCESSOR_H */ diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c index 9f78f5d21511..27ecd63e321b 100644 --- a/arch/csky/kernel/stacktrace.c +++ b/arch/csky/kernel/stacktrace.c @@ -23,10 +23,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); const register unsigned long current_fp __asm__ ("r8"); fp = current_fp; - sp = current_sp; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ @@ -68,8 +67,7 @@ static void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); - sp = current_sp; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ From ce0ba954805e0783ceb7304d4fb357a02038e231 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 Oct 2022 21:16:48 +0100 Subject: [PATCH 0319/4122] csky: Kconfig: Fix spelling mistake "Meory" -> "Memory" There is a spelling mistake in a Kconfig option description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Guo Ren --- arch/csky/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 2236b5c0c213..e0ecd1cc81a9 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -270,7 +270,7 @@ menuconfig HAVE_TCM bool "Tightly-Coupled/Sram Memory" depends on !COMPILE_TEST help - The implementation are not only used by TCM (Tightly-Coupled Meory) + The implementation are not only used by TCM (Tightly-Coupled Memory) but also used by sram on SOC bus. It follow existed linux tcm software interface, so that old tcm application codes could be re-used directly. From 33a0a1e3b3d17445832177981dc7a1c6a5b009f8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Oct 2022 18:53:15 +0200 Subject: [PATCH 0320/4122] kobject: modify kobject_get_path() to take a const * kobject_get_path() does not modify the kobject passed to it, so make the pointer constant. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20221001165315.2690141-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- lib/kobject.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 57fb972fea05..592f9785b058 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -115,7 +115,7 @@ extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); extern void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid); -extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); +extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); diff --git a/lib/kobject.c b/lib/kobject.c index a0b2dbfcfa23..0380ec889a6a 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -94,10 +94,10 @@ static int create_dir(struct kobject *kobj) return 0; } -static int get_kobj_path_length(struct kobject *kobj) +static int get_kobj_path_length(const struct kobject *kobj) { int length = 1; - struct kobject *parent = kobj; + const struct kobject *parent = kobj; /* walk up the ancestors until we hit the one pointing to the * root. @@ -112,9 +112,9 @@ static int get_kobj_path_length(struct kobject *kobj) return length; } -static void fill_kobj_path(struct kobject *kobj, char *path, int length) +static void fill_kobj_path(const struct kobject *kobj, char *path, int length) { - struct kobject *parent; + const struct kobject *parent; --length; for (parent = kobj; parent; parent = parent->parent) { @@ -136,7 +136,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) * * Return: The newly allocated memory, caller must free with kfree(). */ -char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) +char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; From 3d24903a6dd27ab817b4c6c24bee245ff06f7c8e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Oct 2022 09:23:10 +0200 Subject: [PATCH 0321/4122] kobject: make get_ktype() take a const pointer get_ktype() does not modify the structure passed to it, so mark the parameter as being const to allow other const structures to be passed to it in the future. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20221021072310.3931690-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 592f9785b058..fc40fc81aeb1 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -198,7 +198,7 @@ static inline void kset_put(struct kset *k) kobject_put(&k->kobj); } -static inline const struct kobj_type *get_ktype(struct kobject *kobj) +static inline const struct kobj_type *get_ktype(const struct kobject *kobj) { return kobj->ktype; } From acd50e52a7f2e75b3109523d2f114fa46a4362c0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Oct 2022 19:09:50 +0100 Subject: [PATCH 0322/4122] staging: rtl8712: Remove variable xcnt The variable xcnt being incremented but it is never referenced, it is redundant and can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221021180950.29139-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_xmit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/rtl8712_xmit.c b/drivers/staging/rtl8712/rtl8712_xmit.c index 84a22eba7ebf..4cb01f590673 100644 --- a/drivers/staging/rtl8712/rtl8712_xmit.c +++ b/drivers/staging/rtl8712/rtl8712_xmit.c @@ -601,7 +601,7 @@ int r8712_xmitframe_complete(struct _adapter *padapter, #ifdef CONFIG_R8712_TX_AGGR struct xmit_frame *p2ndxmitframe = NULL; #else - int res = _SUCCESS, xcnt = 0; + int res = _SUCCESS; #endif phwxmits = pxmitpriv->hwxmits; @@ -673,7 +673,6 @@ int r8712_xmitframe_complete(struct _adapter *padapter, dump_xframe(padapter, pxmitframe); else r8712_free_xmitframe_ex(pxmitpriv, pxmitframe); - xcnt++; #endif } else { /* pxmitframe == NULL && p2ndxmitframe == NULL */ From 02119c5d84e327483595687821f9c38c88d2d929 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 16 Oct 2022 12:07:40 +0100 Subject: [PATCH 0323/4122] staging: rtl8192u: Provide a TODO file for this driver Provide a TODO file that lists the tasks that should be carried out in order to move this driver off drivers/staging. It's missing from original addition of this driver. Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20221016110743.1448067-1-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/TODO | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 drivers/staging/rtl8192u/TODO diff --git a/drivers/staging/rtl8192u/TODO b/drivers/staging/rtl8192u/TODO new file mode 100644 index 000000000000..ab9d5d145b3b --- /dev/null +++ b/drivers/staging/rtl8192u/TODO @@ -0,0 +1,16 @@ +To-do list: + +* Correct the coding style according to Linux guidelines; please read the document + at https://www.kernel.org/doc/html/latest/process/coding-style.html. +* Remove unnecessary debugging/printing macros; for those that are still needed + use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()). +* Remove dead code such as unusued functions, variables, fields, etc.. +* Use in-kernel API and remove unnecessary wrappers where possible. +* Fix bugs due to code that sleeps in atomic context. +* Remove the HAL layer and migrate its functionality into the relevant parts of + the driver. +* Switch to use LIB80211. +* Switch to use MAC80211. +* Switch to use CFG80211. +* Improve the error handling of various functions, particularly those that use + existing kernel APIs. From f124c1751844c4592110a292403f342c50a89344 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 16 Oct 2022 12:07:41 +0100 Subject: [PATCH 0324/4122] staging: rtl8192e: Update the TODO file for this driver The driver directory where it was proposed to move to has changed, while add it add some more pieces this driver to update. Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20221016110743.1448067-2-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/TODO | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/TODO b/drivers/staging/rtl8192e/TODO index d51f159d1adf..7221ae65d63e 100644 --- a/drivers/staging/rtl8192e/TODO +++ b/drivers/staging/rtl8192e/TODO @@ -1,2 +1,18 @@ -* merge into drivers/net/wireless/rtllib/rtl8192e +To-do list: + +* merge into drivers/net/wireless/realtek/rtlwifi/rtl8192* * clean up function naming +* Correct the coding style according to Linux guidelines; please read the document + at https://www.kernel.org/doc/html/latest/process/coding-style.html. +* Remove unnecessary debugging/printing macros; for those that are still needed + use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()). +* Remove dead code such as unusued functions, variables, fields, etc.. +* Use in-kernel API and remove unnecessary wrappers where possible. +* Fix bugs due to code that sleeps in atomic context. +* Remove the HAL layer and migrate its functionality into the relevant parts of + the driver. +* Switch to use LIB80211. +* Switch to use MAC80211. +* Switch to use CFG80211. +* Improve the error handling of various functions, particularly those that use + existing kernel APIs. From ce51c0c7fae3390f070e6b515e3a57f6bfdfd18e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 16 Oct 2022 12:07:42 +0100 Subject: [PATCH 0325/4122] staging: ks7010: Update the TODO file for this driver Add move to mac80211 from wext to the todo for this driver. Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20221016110743.1448067-3-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ks7010/TODO | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/ks7010/TODO b/drivers/staging/ks7010/TODO index ab6f39175d99..80c97543b977 100644 --- a/drivers/staging/ks7010/TODO +++ b/drivers/staging/ks7010/TODO @@ -27,6 +27,9 @@ Now the TODOs: - fix the 'card removal' event when card is inserted when booting - check what other upstream wireless mechanisms can be used instead of the custom ones here +- Switch to use LIB80211. +- Switch to use MAC80211. +- Switch to use CFG80211. Please send any patches to: Greg Kroah-Hartman From 1f3e0b41919208be71f869a753609d6c11e12e85 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 16 Oct 2022 12:07:43 +0100 Subject: [PATCH 0326/4122] staging: wlan-ng: Provide a TODO file for this driver Provide a TODO file that lists the tasks that should be carried out in order to move this driver off drivers/staging. It's missing from original addition of this driver. Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20221016110743.1448067-4-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/TODO | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 drivers/staging/wlan-ng/TODO diff --git a/drivers/staging/wlan-ng/TODO b/drivers/staging/wlan-ng/TODO new file mode 100644 index 000000000000..ab9d5d145b3b --- /dev/null +++ b/drivers/staging/wlan-ng/TODO @@ -0,0 +1,16 @@ +To-do list: + +* Correct the coding style according to Linux guidelines; please read the document + at https://www.kernel.org/doc/html/latest/process/coding-style.html. +* Remove unnecessary debugging/printing macros; for those that are still needed + use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()). +* Remove dead code such as unusued functions, variables, fields, etc.. +* Use in-kernel API and remove unnecessary wrappers where possible. +* Fix bugs due to code that sleeps in atomic context. +* Remove the HAL layer and migrate its functionality into the relevant parts of + the driver. +* Switch to use LIB80211. +* Switch to use MAC80211. +* Switch to use CFG80211. +* Improve the error handling of various functions, particularly those that use + existing kernel APIs. From ea679d8e032b0affc65c9d56b7bad3ef144366e1 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 20 Oct 2022 22:50:00 +0530 Subject: [PATCH 0327/4122] staging: most: dim2: correct misleading struct type name Correct the misleading struct type name dim_ch_state_t to dim_ch_state since this not a typedef but a normal structure declaration. Suggested-by: Julia Lawall Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y1GDQO+06fD24Pf/@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/dim2/dim2.c | 4 ++-- drivers/staging/most/dim2/hal.c | 4 ++-- drivers/staging/most/dim2/hal.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/most/dim2/dim2.c b/drivers/staging/most/dim2/dim2.c index 97dff82b7a5f..7a5f80e637a0 100644 --- a/drivers/staging/most/dim2/dim2.c +++ b/drivers/staging/most/dim2/dim2.c @@ -161,7 +161,7 @@ static int try_start_dim_transfer(struct hdm_channel *hdm_ch) struct list_head *head = &hdm_ch->pending_list; struct mbo *mbo; unsigned long flags; - struct dim_ch_state_t st; + struct dim_ch_state st; BUG_ON(!hdm_ch); BUG_ON(!hdm_ch->is_initialized); @@ -259,7 +259,7 @@ static void retrieve_netinfo(struct dim2_hdm *dev, struct mbo *mbo) static void service_done_flag(struct dim2_hdm *dev, int ch_idx) { struct hdm_channel *hdm_ch = dev->hch + ch_idx; - struct dim_ch_state_t st; + struct dim_ch_state st; struct list_head *head; struct mbo *mbo; int done_buffers; diff --git a/drivers/staging/most/dim2/hal.c b/drivers/staging/most/dim2/hal.c index 65282c276862..a5d40b5b138a 100644 --- a/drivers/staging/most/dim2/hal.c +++ b/drivers/staging/most/dim2/hal.c @@ -943,8 +943,8 @@ u8 dim_service_channel(struct dim_channel *ch) return channel_service(ch); } -struct dim_ch_state_t *dim_get_channel_state(struct dim_channel *ch, - struct dim_ch_state_t *state_ptr) +struct dim_ch_state *dim_get_channel_state(struct dim_channel *ch, + struct dim_ch_state *state_ptr) { if (!ch || !state_ptr) return NULL; diff --git a/drivers/staging/most/dim2/hal.h b/drivers/staging/most/dim2/hal.h index 20531449acab..ef10a8741c10 100644 --- a/drivers/staging/most/dim2/hal.h +++ b/drivers/staging/most/dim2/hal.h @@ -27,7 +27,7 @@ enum mlb_clk_speed { CLK_8192FS = 7, }; -struct dim_ch_state_t { +struct dim_ch_state { bool ready; /* Shows readiness to enqueue next buffer */ u16 done_buffers; /* Number of completed buffers */ }; @@ -87,8 +87,8 @@ void dim_service_ahb_int_irq(struct dim_channel *const *channels); u8 dim_service_channel(struct dim_channel *ch); -struct dim_ch_state_t *dim_get_channel_state(struct dim_channel *ch, - struct dim_ch_state_t *state_ptr); +struct dim_ch_state *dim_get_channel_state(struct dim_channel *ch, + struct dim_ch_state *state_ptr); u16 dim_dbr_space(struct dim_channel *ch); From 6cc7b783f8295677a19e88c12e47021c06c3b6ef Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 02:56:47 +0530 Subject: [PATCH 0328/4122] staging: r8188eu: use Linux kernel variable naming convention Follow the Linux Kernel coding style variable naming convention instead of using camelCase style. Issue reported by checkpatch script for these variables: tagLen, tagType, networkAddr, ipAddr, macAddr Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/a107c527e9032c22a62e93ff12d5fae625e70212.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 112 +++++++++++----------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index 4c5f30792a46..e509b8454e25 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -50,17 +50,17 @@ static unsigned char *__nat25_find_pppoe_tag(struct pppoe_hdr *ph, unsigned short type) { unsigned char *cur_ptr, *start_ptr; - unsigned short tagLen, tagType; + unsigned short tag_len, tag_type; start_ptr = (unsigned char *)ph->tag; cur_ptr = (unsigned char *)ph->tag; while ((cur_ptr - start_ptr) < ntohs(ph->length)) { /* prevent un-alignment access */ - tagType = (unsigned short)((cur_ptr[0] << 8) + cur_ptr[1]); - tagLen = (unsigned short)((cur_ptr[2] << 8) + cur_ptr[3]); - if (tagType == type) + tag_type = (unsigned short)((cur_ptr[0] << 8) + cur_ptr[1]); + tag_len = (unsigned short)((cur_ptr[2] << 8) + cur_ptr[3]); + if (tag_type == type) return cur_ptr; - cur_ptr = cur_ptr + TAG_HDR_LEN + tagLen; + cur_ptr = cur_ptr + TAG_HDR_LEN + tag_len; } return NULL; } @@ -111,32 +111,32 @@ static int __nat25_has_expired(struct nat25_network_db_entry *fdb) return 0; } -static void __nat25_generate_ipv4_network_addr(unsigned char *networkAddr, - unsigned int *ipAddr) +static void __nat25_generate_ipv4_network_addr(unsigned char *addr, + unsigned int *ip_addr) { - memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN); + memset(addr, 0, MAX_NETWORK_ADDR_LEN); - networkAddr[0] = NAT25_IPV4; - memcpy(networkAddr + 7, (unsigned char *)ipAddr, 4); + addr[0] = NAT25_IPV4; + memcpy(addr + 7, (unsigned char *)ip_addr, 4); } -static void __nat25_generate_pppoe_network_addr(unsigned char *networkAddr, +static void __nat25_generate_pppoe_network_addr(unsigned char *addr, unsigned char *ac_mac, __be16 *sid) { - memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN); + memset(addr, 0, MAX_NETWORK_ADDR_LEN); - networkAddr[0] = NAT25_PPPOE; - memcpy(networkAddr + 1, (unsigned char *)sid, 2); - memcpy(networkAddr + 3, (unsigned char *)ac_mac, 6); + addr[0] = NAT25_PPPOE; + memcpy(addr + 1, (unsigned char *)sid, 2); + memcpy(addr + 3, (unsigned char *)ac_mac, 6); } -static void __nat25_generate_ipv6_network_addr(unsigned char *networkAddr, - unsigned int *ipAddr) +static void __nat25_generate_ipv6_network_addr(unsigned char *addr, + unsigned int *ip_addr) { - memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN); + memset(addr, 0, MAX_NETWORK_ADDR_LEN); - networkAddr[0] = NAT25_IPV6; - memcpy(networkAddr + 1, (unsigned char *)ipAddr, 16); + addr[0] = NAT25_IPV6; + memcpy(addr + 1, (unsigned char *)ip_addr, 16); } static unsigned char *scan_tlv(unsigned char *data, int len, unsigned char tag, unsigned char len8b) @@ -200,40 +200,40 @@ static int update_nd_link_layer_addr(unsigned char *data, int len, unsigned char return 0; } -static int __nat25_network_hash(unsigned char *networkAddr) +static int __nat25_network_hash(unsigned char *addr) { - if (networkAddr[0] == NAT25_IPV4) { + if (addr[0] == NAT25_IPV4) { unsigned long x; - x = networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10]; + x = addr[7] ^ addr[8] ^ addr[9] ^ addr[10]; return x & (NAT25_HASH_SIZE - 1); - } else if (networkAddr[0] == NAT25_IPX) { + } else if (addr[0] == NAT25_IPX) { unsigned long x; - x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^ - networkAddr[6] ^ networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10]; + x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ + addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10]; return x & (NAT25_HASH_SIZE - 1); - } else if (networkAddr[0] == NAT25_APPLE) { + } else if (addr[0] == NAT25_APPLE) { unsigned long x; - x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3]; + x = addr[1] ^ addr[2] ^ addr[3]; return x & (NAT25_HASH_SIZE - 1); - } else if (networkAddr[0] == NAT25_PPPOE) { + } else if (addr[0] == NAT25_PPPOE) { unsigned long x; - x = networkAddr[0] ^ networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^ networkAddr[6] ^ networkAddr[7] ^ networkAddr[8]; + x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^ addr[7] ^ addr[8]; return x & (NAT25_HASH_SIZE - 1); - } else if (networkAddr[0] == NAT25_IPV6) { + } else if (addr[0] == NAT25_IPV6) { unsigned long x; - x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^ - networkAddr[6] ^ networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10] ^ - networkAddr[11] ^ networkAddr[12] ^ networkAddr[13] ^ networkAddr[14] ^ networkAddr[15] ^ - networkAddr[16]; + x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ + addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^ + addr[11] ^ addr[12] ^ addr[13] ^ addr[14] ^ addr[15] ^ + addr[16]; return x & (NAT25_HASH_SIZE - 1); } else { @@ -241,7 +241,7 @@ static int __nat25_network_hash(unsigned char *networkAddr) int i; for (i = 0; i < MAX_NETWORK_ADDR_LEN; i++) - x ^= networkAddr[i]; + x ^= addr[i]; return x & (NAT25_HASH_SIZE - 1); } @@ -269,17 +269,17 @@ static void __network_hash_unlink(struct nat25_network_db_entry *ent) } static void __nat25_db_network_insert(struct adapter *priv, - unsigned char *macAddr, unsigned char *networkAddr) + unsigned char *mac_addr, unsigned char *addr) { struct nat25_network_db_entry *db; int hash; spin_lock_bh(&priv->br_ext_lock); - hash = __nat25_network_hash(networkAddr); + hash = __nat25_network_hash(addr); db = priv->nethash[hash]; while (db) { - if (!memcmp(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN)) { - memcpy(db->macAddr, macAddr, ETH_ALEN); + if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) { + memcpy(db->macAddr, mac_addr, ETH_ALEN); db->ageing_timer = jiffies; spin_unlock_bh(&priv->br_ext_lock); return; @@ -291,8 +291,8 @@ static void __nat25_db_network_insert(struct adapter *priv, spin_unlock_bh(&priv->br_ext_lock); return; } - memcpy(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN); - memcpy(db->macAddr, macAddr, ETH_ALEN); + memcpy(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN); + memcpy(db->macAddr, mac_addr, ETH_ALEN); atomic_set(&db->use_count, 1); db->ageing_timer = jiffies; @@ -366,7 +366,7 @@ void nat25_db_expire(struct adapter *priv) int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method) { unsigned short protocol; - unsigned char networkAddr[MAX_NETWORK_ADDR_LEN]; + unsigned char addr[MAX_NETWORK_ADDR_LEN]; unsigned int tmp; if (!skb) @@ -395,9 +395,9 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method) if (iph->saddr == 0) return 0; tmp = be32_to_cpu(iph->saddr); - __nat25_generate_ipv4_network_addr(networkAddr, &tmp); + __nat25_generate_ipv4_network_addr(addr, &tmp); /* record source IP address and , source mac address into db */ - __nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr); + __nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr); return 0; default: return -1; @@ -421,8 +421,8 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method) memcpy(arp_ptr, GET_MY_HWADDR(priv), ETH_ALEN); arp_ptr += arp->ar_hln; sender = (unsigned int *)arp_ptr; - __nat25_generate_ipv4_network_addr(networkAddr, sender); - __nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr); + __nat25_generate_ipv4_network_addr(addr, sender); + __nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr); return 0; default: return -1; @@ -495,9 +495,9 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method) return -1; } } else { /* session phase */ - __nat25_generate_pppoe_network_addr(networkAddr, skb->data, &ph->sid); + __nat25_generate_pppoe_network_addr(addr, skb->data, &ph->sid); - __nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr); + __nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr); if (!priv->ethBrExtInfo.addPPPoETag && priv->pppoe_connection_in_progress && @@ -548,8 +548,8 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method) return -1; case NAT25_INSERT: if (memcmp(&iph->saddr, "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0", 16)) { - __nat25_generate_ipv6_network_addr(networkAddr, (unsigned int *)&iph->saddr); - __nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr); + __nat25_generate_ipv6_network_addr(addr, (unsigned int *)&iph->saddr); + __nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr); if (iph->nexthdr == IPPROTO_ICMPV6 && skb->len > (ETH_HLEN + sizeof(*iph) + 4)) { @@ -639,17 +639,17 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb) } } -void *scdb_findEntry(struct adapter *priv, unsigned char *ipAddr) +void *scdb_findEntry(struct adapter *priv, unsigned char *ip_addr) { - unsigned char networkAddr[MAX_NETWORK_ADDR_LEN]; + unsigned char addr[MAX_NETWORK_ADDR_LEN]; struct nat25_network_db_entry *db; int hash; - __nat25_generate_ipv4_network_addr(networkAddr, (unsigned int *)ipAddr); - hash = __nat25_network_hash(networkAddr); + __nat25_generate_ipv4_network_addr(addr, (unsigned int *)ip_addr); + hash = __nat25_network_hash(addr); db = priv->nethash[hash]; while (db) { - if (!memcmp(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN)) { + if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) { return (void *)db; } From 9d76dae6c86e7945b51054cd477c5b8f0bcb3a87 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 02:57:16 +0530 Subject: [PATCH 0329/4122] staging: r8188eu: reformat long computation lines Reformat long running computation instructions to improve code readability. Address checkpatch script complaints like: CHECK: line length of 171 exceeds 100 columns Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/e07506ef1dc4ac1d3f8b076a8182628bd0e5cec0.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index e509b8454e25..d4059f0fc362 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -212,7 +212,7 @@ static int __nat25_network_hash(unsigned char *addr) unsigned long x; x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ - addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10]; + addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10]; return x & (NAT25_HASH_SIZE - 1); } else if (addr[0] == NAT25_APPLE) { @@ -224,16 +224,16 @@ static int __nat25_network_hash(unsigned char *addr) } else if (addr[0] == NAT25_PPPOE) { unsigned long x; - x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^ addr[7] ^ addr[8]; + x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ + addr[5] ^ addr[6] ^ addr[7] ^ addr[8]; return x & (NAT25_HASH_SIZE - 1); } else if (addr[0] == NAT25_IPV6) { unsigned long x; - x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ - addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^ - addr[11] ^ addr[12] ^ addr[13] ^ addr[14] ^ addr[15] ^ - addr[16]; + x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^ + addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^ addr[11] ^ addr[12] ^ + addr[13] ^ addr[14] ^ addr[15] ^ addr[16]; return x & (NAT25_HASH_SIZE - 1); } else { From 3c480b32431272e0a4a78ee1f20bba96deaddd72 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 02:57:47 +0530 Subject: [PATCH 0330/4122] staging: r8188eu: remove {} for single statement blocks As per the Linux kernel coding-style guidelines, there is no need to use {} for single statement blocks. Issue flagged by checkpatch script. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/a50460e1507621b29a7901cc4ff9501b172417db.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index d4059f0fc362..b418cbc307b3 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -649,9 +649,8 @@ void *scdb_findEntry(struct adapter *priv, unsigned char *ip_addr) hash = __nat25_network_hash(addr); db = priv->nethash[hash]; while (db) { - if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) { + if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) return (void *)db; - } db = db->next_hash; } From 227041c5d88aace2a1c95d6741438015ac1d9560 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 02:58:39 +0530 Subject: [PATCH 0331/4122] staging: r8188eu: use htons macro instead of __constant_htons Macro "htons" is more efficient and clearer. It should be used for constants instead of the __constant_htons macro. Resolves following checkpatch script complaint: WARNING: __constant_htons should be htons Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/b46adfbdce0362ed0dbe0fc957ef2f47a93c24bb.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 6 +++--- drivers/staging/r8188eu/core/rtw_xmit.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index b418cbc307b3..a23f7df373ed 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -606,14 +606,14 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb) if (!priv->ethBrExtInfo.dhcp_bcst_disable) { __be16 protocol = *((__be16 *)(skb->data + 2 * ETH_ALEN)); - if (protocol == __constant_htons(ETH_P_IP)) { /* IP */ + if (protocol == htons(ETH_P_IP)) { /* IP */ struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN); if (iph->protocol == IPPROTO_UDP) { /* UDP */ struct udphdr *udph = (struct udphdr *)((size_t)iph + (iph->ihl << 2)); - if ((udph->source == __constant_htons(CLIENT_PORT)) && - (udph->dest == __constant_htons(SERVER_PORT))) { /* DHCP request */ + if ((udph->source == htons(CLIENT_PORT)) && + (udph->dest == htons(SERVER_PORT))) { /* DHCP request */ struct dhcpMessage *dhcph = (struct dhcpMessage *)((size_t)udph + sizeof(struct udphdr)); u32 cookie = be32_to_cpu((__be32)dhcph->cookie); diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c index 873d2c5c3634..4f8220428328 100644 --- a/drivers/staging/r8188eu/core/rtw_xmit.c +++ b/drivers/staging/r8188eu/core/rtw_xmit.c @@ -1622,14 +1622,14 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb) spin_lock_bh(&padapter->br_ext_lock); if (!(skb->data[0] & 1) && br_port && memcmp(skb->data + ETH_ALEN, padapter->br_mac, ETH_ALEN) && - *((__be16 *)(skb->data + ETH_ALEN * 2)) != __constant_htons(ETH_P_8021Q) && - *((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP) && + *((__be16 *)(skb->data + ETH_ALEN * 2)) != htons(ETH_P_8021Q) && + *((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP) && !memcmp(padapter->scdb_mac, skb->data + ETH_ALEN, ETH_ALEN) && padapter->scdb_entry) { memcpy(skb->data + ETH_ALEN, GET_MY_HWADDR(padapter), ETH_ALEN); padapter->scdb_entry->ageing_timer = jiffies; spin_unlock_bh(&padapter->br_ext_lock); } else { - if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_8021Q)) { + if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_8021Q)) { is_vlan_tag = 1; vlan_hdr = *((unsigned short *)(skb->data + ETH_ALEN * 2 + 2)); for (i = 0; i < 6; i++) @@ -1637,10 +1637,10 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb) skb_pull(skb, 4); } if (!memcmp(skb->data + ETH_ALEN, padapter->br_mac, ETH_ALEN) && - (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP))) + (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP))) memcpy(padapter->br_ip, skb->data + WLAN_ETHHDR_LEN + 12, 4); - if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP)) { + if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP)) { if (memcmp(padapter->scdb_mac, skb->data + ETH_ALEN, ETH_ALEN)) { padapter->scdb_entry = (struct nat25_network_db_entry *)scdb_findEntry(padapter, skb->data + WLAN_ETHHDR_LEN + 12); @@ -1669,7 +1669,7 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb) skb_push(skb, 4); for (i = 0; i < 6; i++) *((unsigned short *)(skb->data + i * 2)) = *((unsigned short *)(skb->data + 4 + i * 2)); - *((__be16 *)(skb->data + ETH_ALEN * 2)) = __constant_htons(ETH_P_8021Q); + *((__be16 *)(skb->data + ETH_ALEN * 2)) = htons(ETH_P_8021Q); *((unsigned short *)(skb->data + ETH_ALEN * 2 + 2)) = vlan_hdr; } @@ -1708,7 +1708,7 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb) skb_push(skb, 4); for (i = 0; i < 6; i++) *((unsigned short *)(skb->data + i * 2)) = *((unsigned short *)(skb->data + 4 + i * 2)); - *((__be16 *)(skb->data + ETH_ALEN * 2)) = __constant_htons(ETH_P_8021Q); + *((__be16 *)(skb->data + ETH_ALEN * 2)) = htons(ETH_P_8021Q); *((unsigned short *)(skb->data + ETH_ALEN * 2 + 2)) = vlan_hdr; } } From 0f2635b3ff1057d50bfe4a011c5706117e97e114 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 02:59:37 +0530 Subject: [PATCH 0332/4122] staging: r8188eu: correct misspelled words in comments Fix spelling mistakes in code comments across the driver. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/00be5f2a97b0c899279bd8f9cd27634186b77b9d.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_ioctl_set.c | 2 +- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 36 +++++++++---------- drivers/staging/r8188eu/core/rtw_recv.c | 8 ++--- drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 2 +- drivers/staging/r8188eu/hal/odm_RTL8188E.c | 2 +- .../staging/r8188eu/hal/rtl8188e_hal_init.c | 2 +- drivers/staging/r8188eu/hal/rtl8188e_phycfg.c | 16 ++++----- .../staging/r8188eu/include/Hal8188EPhyReg.h | 4 +-- .../staging/r8188eu/include/rtl8188e_hal.h | 2 +- .../staging/r8188eu/include/rtl8188e_spec.h | 6 ++-- drivers/staging/r8188eu/include/rtw_cmd.h | 4 +-- drivers/staging/r8188eu/include/rtw_recv.h | 4 +-- drivers/staging/r8188eu/include/rtw_xmit.h | 2 +- drivers/staging/r8188eu/include/wifi.h | 12 +++---- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 6 ++-- 15 files changed, 54 insertions(+), 54 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_ioctl_set.c b/drivers/staging/r8188eu/core/rtw_ioctl_set.c index 55e6b0f41dc3..786431826659 100644 --- a/drivers/staging/r8188eu/core/rtw_ioctl_set.c +++ b/drivers/staging/r8188eu/core/rtw_ioctl_set.c @@ -287,7 +287,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter, if ((*pold_state == Ndis802_11Infrastructure) || (*pold_state == Ndis802_11IBSS)) { if (check_fwstate(pmlmepriv, _FW_LINKED)) - rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have chked whether issue dis-assoc_cmd or not */ + rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have checked whether issue dis-assoc_cmd or not */ } *pold_state = networktype; diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index fda446b6779c..d146b94307b8 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -137,7 +137,7 @@ static struct rt_channel_plan_map RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = { {0x03}, /* 0x41, RT_CHANNEL_DOMAIN_GLOBAL_DOAMIN_2G */ }; -static struct rt_channel_plan_map RTW_CHANNEL_PLAN_MAP_REALTEK_DEFINE = {0x03}; /* use the conbination for max channel numbers */ +static struct rt_channel_plan_map RTW_CHANNEL_PLAN_MAP_REALTEK_DEFINE = {0x03}; /* use the combination for max channel numbers */ /* * Search the @param channel_num in given @param channel_set @@ -1751,7 +1751,7 @@ void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr) p2pie[p2pielen++] = 0x09; /* WFA P2P v1.0 */ /* Commented by Albert 20110306 */ - /* According to the P2P Specification, the group negoitation request frame should contain 9 P2P attributes */ + /* According to the P2P Specification, the group negotiation request frame should contain 9 P2P attributes */ /* 1. P2P Capability */ /* 2. Group Owner Intent */ /* 3. Configuration Timeout */ @@ -2106,7 +2106,7 @@ static void issue_p2p_GO_response(struct adapter *padapter, u8 *raddr, u8 *frame p2pie[p2pielen++] = 0x09; /* WFA P2P v1.0 */ /* Commented by Albert 20100908 */ - /* According to the P2P Specification, the group negoitation response frame should contain 9 P2P attributes */ + /* According to the P2P Specification, the group negotiation response frame should contain 9 P2P attributes */ /* 1. Status */ /* 2. P2P Capability */ /* 3. Group Owner Intent */ @@ -2402,7 +2402,7 @@ static void issue_p2p_GO_confirm(struct adapter *padapter, u8 *raddr, u8 result) p2pie[p2pielen++] = 0x09; /* WFA P2P v1.0 */ /* Commented by Albert 20110306 */ - /* According to the P2P Specification, the group negoitation request frame should contain 5 P2P attributes */ + /* According to the P2P Specification, the group negotiation request frame should contain 5 P2P attributes */ /* 1. Status */ /* 2. P2P Capability */ /* 3. Operating Channel */ @@ -4010,7 +4010,7 @@ struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv) /**************************************************************************** -Following are some TX fuctions for WiFi MLME +Following are some TX functions for WiFi MLME *****************************************************************************/ @@ -4612,7 +4612,7 @@ exit: return ret; } -/* if psta == NULL, indiate we are station(client) now... */ +/* if psta == NULL, indicate we are station (client) now... */ void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short status) { struct xmit_frame *pmgntframe; @@ -5011,7 +5011,7 @@ void issue_assocreq(struct adapter *padapter) if (!padapter->registrypriv.wifi_spec) { /* Commented by Kurt 20110629 */ /* In some older APs, WPS handshake */ - /* would be fail if we append vender extensions informations to AP */ + /* would be fail if we append vendor extension information to AP */ if (!memcmp(pIE->data, WPS_OUI, 4)) pIE->Length = 14; } @@ -5166,7 +5166,7 @@ exit: kfree(pmlmepriv->assoc_req); } -/* when wait_ack is ture, this function shoule be called at process context */ +/* when wait_ack is true, this function should be called at process context */ static int _issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int wait_ack) { int ret = _FAIL; @@ -5235,7 +5235,7 @@ exit: return ret; } -/* when wait_ms > 0 , this function shoule be called at process context */ +/* when wait_ms > 0, this function should be called at process context */ /* da == NULL for station mode */ int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms) { @@ -5244,7 +5244,7 @@ int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int pow struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - /* da == NULL, assum it's null data for sta to ap*/ + /* da == NULL, assume it's null data for sta to ap*/ if (!da) da = get_my_bssid(&pmlmeinfo->network); @@ -5268,7 +5268,7 @@ exit: return ret; } -/* when wait_ack is ture, this function shoule be called at process context */ +/* when wait_ack is true, this function should be called at process context */ static int _issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int wait_ack) { int ret = _FAIL; @@ -5341,7 +5341,7 @@ exit: return ret; } -/* when wait_ms > 0 , this function shoule be called at process context */ +/* when wait_ms > 0 , this function should be called at process context */ /* da == NULL for station mode */ int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int try_cnt, int wait_ms) { @@ -5350,7 +5350,7 @@ int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - /* da == NULL, assum it's null data for sta to ap*/ + /* da == NULL, assume it's null data for sta to ap*/ if (!da) da = get_my_bssid(&pmlmeinfo->network); @@ -5882,7 +5882,7 @@ static void rtw_set_opmode(struct adapter *adapter, u8 mode) /**************************************************************************** -Following are some utitity fuctions for WiFi MLME +Following are some utility functions for WiFi MLME *****************************************************************************/ @@ -6055,7 +6055,7 @@ void site_survey(struct adapter *padapter) } else { /* 20100721:Interrupt scan operation here. */ /* For SW antenna diversity before link, it needs to switch to another antenna and scan again. */ - /* It compares the scan result and select beter one to do connection. */ + /* It compares the scan result and selects a better one to do connection. */ if (AntDivBeforeLink8188E(padapter)) { pmlmeext->sitesurvey_res.bss_cnt = 0; pmlmeext->sitesurvey_res.channel_idx = -1; @@ -6325,7 +6325,7 @@ void start_create_ibss(struct adapter *padapter) /* update wireless mode */ update_wireless_mode(padapter); - /* udpate capability */ + /* update capability */ caps = rtw_get_capability((struct wlan_bssid_ex *)pnetwork); update_capinfo(padapter, caps); if (caps & cap_IBSS) {/* adhoc master */ @@ -6375,7 +6375,7 @@ void start_clnt_join(struct adapter *padapter) /* update wireless mode */ update_wireless_mode(padapter); - /* udpate capability */ + /* update capability */ caps = rtw_get_capability((struct wlan_bssid_ex *)pnetwork); update_capinfo(padapter, caps); if (caps & cap_ESS) { @@ -6969,7 +6969,7 @@ void mlmeext_joinbss_event_callback(struct adapter *padapter, int join_res) /* BCN interval */ rtw_write16(padapter, REG_BCN_INTERVAL, pmlmeinfo->bcn_interval); - /* udpate capability */ + /* update capability */ update_capinfo(padapter, pmlmeinfo->capability); /* WMM, Update EDCA param */ diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index bb5c3b3888e0..4b68a543f68b 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -972,7 +972,7 @@ static void validate_recv_ctrl_frame(struct adapter *padapter, if (psta->sleepq_len == 0) { pstapriv->tim_bitmap &= ~BIT(psta->aid); - /* upate BCN for TIM IE */ + /* update BCN for TIM IE */ /* update_BCNTIM(padapter); */ update_beacon(padapter, _TIM_IE_, NULL, false); } @@ -986,7 +986,7 @@ static void validate_recv_ctrl_frame(struct adapter *padapter, pstapriv->tim_bitmap &= ~BIT(psta->aid); - /* upate BCN for TIM IE */ + /* update BCN for TIM IE */ /* update_BCNTIM(padapter); */ update_beacon(padapter, _TIM_IE_, NULL, false); } @@ -1984,13 +1984,13 @@ static void rtw_signal_stat_timer_hdl(struct timer_list *t) } else { if (recvpriv->signal_strength_data.update_req == 0) {/* update_req is clear, means we got rx */ avg_signal_strength = recvpriv->signal_strength_data.avg_val; - /* after avg_vals are accquired, we can re-stat the signal values */ + /* after avg_vals are acquired, we can re-stat the signal values */ recvpriv->signal_strength_data.update_req = 1; } if (recvpriv->signal_qual_data.update_req == 0) {/* update_req is clear, means we got rx */ avg_signal_qual = recvpriv->signal_qual_data.avg_val; - /* after avg_vals are accquired, we can re-stat the signal values */ + /* after avg_vals are acquired, we can re-stat the signal values */ recvpriv->signal_qual_data.update_req = 1; } diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c index 525deab10820..60cdfcf80daa 100644 --- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c +++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c @@ -69,7 +69,7 @@ void ODM_TxPwrTrackAdjust88E(struct odm_dm_struct *dm_odm, u8 Type,/* 0 = OFDM, /*----------------------------------------------------------------------------- * Function: odm_TxPwrTrackSetPwr88E() * - * Overview: 88E change all channel tx power accordign to flag. + * Overview: 88E change all channel tx power according to flag. * OFDM & CCK are all different. * * Input: NONE diff --git a/drivers/staging/r8188eu/hal/odm_RTL8188E.c b/drivers/staging/r8188eu/hal/odm_RTL8188E.c index c8a3c521bd60..dd9c8291f025 100644 --- a/drivers/staging/r8188eu/hal/odm_RTL8188E.c +++ b/drivers/staging/r8188eu/hal/odm_RTL8188E.c @@ -194,7 +194,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm) for (i = 0; i < ODM_ASSOCIATE_ENTRY_NUM; i++) { pEntry = dm_odm->pODM_StaInfo[i]; if (IS_STA_VALID(pEntry)) { - /* 2 Caculate RSSI per Antenna */ + /* 2 Calculate RSSI per Antenna */ Main_RSSI = (dm_fat_tbl->MainAnt_Cnt[i] != 0) ? (dm_fat_tbl->MainAnt_Sum[i] / dm_fat_tbl->MainAnt_Cnt[i]) : 0; Aux_RSSI = (dm_fat_tbl->AuxAnt_Cnt[i] != 0) ? (dm_fat_tbl->AuxAnt_Sum[i] / dm_fat_tbl->AuxAnt_Cnt[i]) : 0; TargetAnt = (Main_RSSI >= Aux_RSSI) ? MAIN_ANT : AUX_ANT; diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c index 158260547f2b..cc29963f4b49 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c @@ -355,7 +355,7 @@ void rtl8188e_EfusePowerSwitch(struct adapter *pAdapter, u8 PwrState) if (PwrState) { rtw_write8(pAdapter, REG_EFUSE_ACCESS, EFUSE_ACCESS_ON); - /* 1.2V Power: From VDDON with Power Cut(0x0000h[15]), defualt valid */ + /* 1.2V Power: From VDDON with Power Cut(0x0000h[15]), default valid */ res = rtw_read16(pAdapter, REG_SYS_ISO_CTRL, &tmpV16); if (res) return; diff --git a/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c b/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c index 532c63bce0bf..b7f3c7a670fb 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c @@ -23,7 +23,7 @@ static u32 phy_calculate_bit_shift(u32 bitmask) /** * Function: PHY_QueryBBReg * -* OverView: Read "sepcific bits" from BB register +* Overview: Read "sepcific bits" from BB register * * Input: * struct adapter *Adapter, @@ -56,7 +56,7 @@ rtl8188e_PHY_QueryBBReg( /** * Function: PHY_SetBBReg * -* OverView: Write "Specific bits" to BB register (page 8~) +* Overview: Write "Specific bits" to BB register (page 8~) * * Input: * struct adapter *Adapter, @@ -94,7 +94,7 @@ void rtl8188e_PHY_SetBBReg(struct adapter *Adapter, u32 RegAddr, u32 BitMask, u3 /** * Function: phy_RFSerialRead * -* OverView: Read regster from RF chips +* Overview: Read register from RF chips * * Input: * struct adapter *Adapter, @@ -160,7 +160,7 @@ phy_RFSerialRead( /** * Function: phy_RFSerialWrite * -* OverView: Write data to RF register (page 8~) +* Overview: Write data to RF register (page 8~) * * Input: * struct adapter *Adapter, @@ -235,7 +235,7 @@ phy_RFSerialWrite( /** * Function: PHY_QueryRFReg * -* OverView: Query "Specific bits" to RF register (page 8~) +* Overview: Query "Specific bits" to RF register (page 8~) * * Input: * struct adapter *Adapter, @@ -261,7 +261,7 @@ u32 rtl8188e_PHY_QueryRFReg(struct adapter *Adapter, u32 RegAddr, u32 BitMask) /** * Function: PHY_SetRFReg * -* OverView: Write "Specific bits" to RF register (page 8~) +* Overview: Write "Specific bits" to RF register (page 8~) * * Input: * struct adapter *Adapter, @@ -335,7 +335,7 @@ s32 PHY_MACConfig8188E(struct adapter *Adapter) /** * Function: phy_InitBBRFRegisterDefinition * -* OverView: Initialize Register definition offset for Radio Path A/B/C/D +* Overview: Initialize Register definition offset for Radio Path A/B/C/D * * Input: * struct adapter *Adapter, @@ -363,7 +363,7 @@ phy_InitBBRFRegisterDefinition( /* RF Interface (Output and) Enable */ pHalData->PHYRegDef.rfintfe = rFPGA0_XA_RFInterfaceOE; /* 16 MSBs if read 32-bit from 0x860 (16-bit for 0x862) */ - /* Addr of LSSI. Wirte RF register by driver */ + /* Addr of LSSI. Write RF register by driver */ pHalData->PHYRegDef.rf3wireOffset = rFPGA0_XA_LSSIParameter; /* LSSI Parameter */ /* RF parameter */ diff --git a/drivers/staging/r8188eu/include/Hal8188EPhyReg.h b/drivers/staging/r8188eu/include/Hal8188EPhyReg.h index 8b8c75a1f149..da2329be4474 100644 --- a/drivers/staging/r8188eu/include/Hal8188EPhyReg.h +++ b/drivers/staging/r8188eu/include/Hal8188EPhyReg.h @@ -92,7 +92,7 @@ #define rFPGA0_AdDaClockEn 0x888 #define rFPGA0_AnalogParameter4 0x88c -#define rFPGA0_XA_LSSIReadBack 0x8a0 /* Tranceiver LSSI Readback */ +#define rFPGA0_XA_LSSIReadBack 0x8a0 /* Transceiver LSSI Readback */ #define rFPGA0_XB_LSSIReadBack 0x8a4 #define rFPGA0_XC_LSSIReadBack 0x8a8 #define rFPGA0_XD_LSSIReadBack 0x8ac @@ -167,7 +167,7 @@ /* RxIQ DC offset, Rx digital filter, DC notch filter */ #define rOFDM0_XARxAFE 0xc10 -#define rOFDM0_XARxIQImbalance 0xc14 /* RxIQ imblance matrix */ +#define rOFDM0_XARxIQImbalance 0xc14 /* RxIQ imbalance matrix */ #define rOFDM0_XBRxAFE 0xc18 #define rOFDM0_XBRxIQImbalance 0xc1c #define rOFDM0_XCRxAFE 0xc20 diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h index a1e88e6d5c0c..69faaaa7d373 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_hal.h +++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h @@ -87,7 +87,7 @@ struct txpowerinfo24g { /* 9bytes + 1byt + 5bytes and pre 1byte. */ /* For worst case: */ /* | 2byte|----8bytes----|1byte|--7bytes--| 92D */ -/* PG data exclude header, dummy 7 bytes frome CP test and reserved 1byte. */ +/* PG data exclude header, dummy 7 bytes from CP test and reserved 1byte. */ #define EFUSE_OOB_PROTECT_BYTES_88E 18 #define EFUSE_PROTECT_BYTES_BANK 16 diff --git a/drivers/staging/r8188eu/include/rtl8188e_spec.h b/drivers/staging/r8188eu/include/rtl8188e_spec.h index e34619140e33..e34ecdc09688 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_spec.h +++ b/drivers/staging/r8188eu/include/rtl8188e_spec.h @@ -4,7 +4,7 @@ #ifndef __RTL8188E_SPEC_H__ #define __RTL8188E_SPEC_H__ -/* 8192C Regsiter offset definition */ +/* 8192C Register offset definition */ #define HAL_PS_TIMER_INT_DELAY 50 /* 50 microseconds */ #define HAL_92C_NAV_UPPER_UNIT 128 /* micro-second */ @@ -674,7 +674,7 @@ Current IOREG MAP #define REG_USB_HRPWM 0xFE58 #define REG_USB_HCPWM 0xFE57 -/* 8192C Regsiter Bit and Content definition */ +/* 8192C Register Bit and Content definition */ /* 0x0000h ~ 0x00FFh System Configuration */ /* 2 SYS_ISO_CTRL */ @@ -1135,7 +1135,7 @@ Current IOREG MAP #define EEPROM_Default_CrystalCap_88E 0x20 #define EEPROM_Default_ThermalMeter_88E 0x18 -/* New EFUSE deafult value */ +/* New EFUSE default value */ #define EEPROM_DEFAULT_24G_INDEX 0x2D #define EEPROM_DEFAULT_24G_HT20_DIFF 0X02 #define EEPROM_DEFAULT_24G_OFDM_DIFF 0X04 diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index ee9218b1d7a9..2896a732780b 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -450,7 +450,7 @@ struct Tx_Beacon_param mac[0] == 0 ==> CMD mode, return H2C_SUCCESS. - The following condition must be ture under CMD mode + The following condition must be true under CMD mode mac[1] == mac[4], mac[2] == mac[3], mac[0]=mac[5]= 0; s0 == 0x1234, s1 == 0xabcd, w0 == 0x78563412, w1 == 0x5aa5def7; s2 == (b1 << 8 | b0); @@ -503,7 +503,7 @@ struct drvextra_cmd_parm { unsigned char *pbuf; }; -/*------------------- Below are used for RF/BB tunning ---------------------*/ +/*------------------- Below are used for RF/BB tuning ---------------------*/ struct setantenna_parm { u8 tx_antset; diff --git a/drivers/staging/r8188eu/include/rtw_recv.h b/drivers/staging/r8188eu/include/rtw_recv.h index 7768b0c5988c..12026431a3d2 100644 --- a/drivers/staging/r8188eu/include/rtw_recv.h +++ b/drivers/staging/r8188eu/include/rtw_recv.h @@ -92,7 +92,7 @@ struct rx_pkt_attrib { u8 privacy; /* in frame_ctrl field */ u8 bdecrypted; u8 encrypt; /* when 0 indicate no encrypt. when non-zero, - * indicate the encrypt algorith */ + * indicate the encrypt algorithm */ u8 iv_len; u8 icv_len; u8 crc_err; @@ -175,7 +175,7 @@ struct recv_priv { u8 *precv_buf; /* 4 alignment */ struct __queue free_recv_buf_queue; u32 free_recv_buf_queue_cnt; - /* For display the phy informatiom */ + /* For display the phy information */ u8 is_signal_dbg; /* for debug */ u8 signal_strength_dbg; /* for debug */ s8 rssi; diff --git a/drivers/staging/r8188eu/include/rtw_xmit.h b/drivers/staging/r8188eu/include/rtw_xmit.h index 82efcd54af3f..cff065554608 100644 --- a/drivers/staging/r8188eu/include/rtw_xmit.h +++ b/drivers/staging/r8188eu/include/rtw_xmit.h @@ -116,7 +116,7 @@ struct pkt_attrib { u32 last_txcmdsz; u8 nr_frags; u8 encrypt; /* when 0 indicate no encrypt. when non-zero, - * indicate the encrypt algorith */ + * indicate the encrypt algorithm */ u8 iv_len; u8 icv_len; u8 iv[18]; diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index 0254310bdf44..381385a7e118 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -701,7 +701,7 @@ struct ADDBA_request { #define P2P_WILDCARD_SSID_LEN 7 -/* default value, used when: (1)p2p disabed or (2)p2p enabled +/* default value, used when: (1)p2p disabled or (2)p2p enabled * but only do 1 scan phase */ #define P2P_FINDPHASE_EX_NONE 0 /* used when p2p enabled and want to do 1 scan phase and @@ -766,11 +766,11 @@ enum P2P_STATE { P2P_STATE_TX_PROVISION_DIS_REQ = 6, P2P_STATE_RX_PROVISION_DIS_RSP = 7, P2P_STATE_RX_PROVISION_DIS_REQ = 8, - /* Doing the group owner negoitation handshake */ + /* Doing the group owner negotiation handshake */ P2P_STATE_GONEGO_ING = 9, - /* finish the group negoitation handshake with success */ + /* finish the group negotiation handshake with success */ P2P_STATE_GONEGO_OK = 10, - /* finish the group negoitation handshake with failure */ + /* finish the group negotiation handshake with failure */ P2P_STATE_GONEGO_FAIL = 11, /* receiving the P2P Inviation request and match with the profile. */ P2P_STATE_RECV_INVITE_REQ_MATCH = 12, @@ -790,9 +790,9 @@ enum P2P_STATE { P2P_STATE_RECV_INVITE_REQ_JOIN = 19, /* recveing the P2P Inviation response with failure */ P2P_STATE_RX_INVITE_RESP_FAIL = 20, - /* receiving p2p negoitation response with information is not available */ + /* receiving p2p negotiation response with information is not available */ P2P_STATE_RX_INFOR_NOREADY = 21, - /* sending p2p negoitation response with information is not available */ + /* sending p2p negotiation response with information is not available */ P2P_STATE_TX_INFOR_NOREADY = 22, }; diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index 2de2e1e32738..8516e253bb03 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -2647,7 +2647,7 @@ static int rtw_p2p_connect(struct net_device *dev, u32 peer_channel = 0; /* Commented by Albert 20110304 */ - /* The input data contains two informations. */ + /* The input data contains two information. */ /* 1. First information is the MAC address which wants to formate with */ /* 2. Second information is the WPS PINCode or "pbc" string for push button method */ /* Format: 00:E0:4C:00:00:05 */ @@ -2721,7 +2721,7 @@ static void rtw_p2p_invite_req(struct net_device *dev, uint p2pielen = 0, attr_contentlen = 0; struct tx_invite_req_info *pinvite_req_info = &pwdinfo->invitereq_info; - /* The input data contains two informations. */ + /* The input data contains two information items. */ /* 1. First information is the P2P device address which you want to send to. */ /* 2. Second information is the group id which combines with GO's mac address, space and GO's ssid. */ /* Command line sample: iwpriv wlan0 p2p_set invite ="00:11:22:33:44:55 00:E0:4C:00:00:05 DIRECT-xy" */ @@ -2845,7 +2845,7 @@ static void rtw_p2p_prov_disc(struct net_device *dev, u8 *p2pie; uint p2pielen = 0, attr_contentlen = 0; - /* The input data contains two informations. */ + /* The input data contains two information items. */ /* 1. First information is the MAC address which wants to issue the provisioning discovery request frame. */ /* 2. Second information is the WPS configuration method which wants to discovery */ /* Format: 00:E0:4C:00:00:05_display */ From 4b66ec6961f170cf782fb66520c36f2e950c7601 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:00:13 +0530 Subject: [PATCH 0333/4122] staging: r8188eu: Add space between function & macro parameters Space required between function and macro parameters to improve code readability. This Linux kernel coding style guideline resolves following error reported by checkpatch script: ERROR: space required after that ',' (ctx:VxV) Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/ce200b3a986628f943dfb0c4e412276793e59bbc.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- .../staging/r8188eu/include/osdep_service.h | 4 +-- .../staging/r8188eu/include/rtl8188e_hal.h | 2 +- drivers/staging/r8188eu/include/rtw_cmd.h | 18 ++++++------- drivers/staging/r8188eu/include/rtw_io.h | 26 +++++++++---------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h index 72990a1cdc66..ec2631455f08 100644 --- a/drivers/staging/r8188eu/include/osdep_service.h +++ b/drivers/staging/r8188eu/include/osdep_service.h @@ -53,7 +53,7 @@ static inline struct list_head *get_list_head(struct __queue *queue) return (&(queue->queue)); } -static inline void _set_timer(struct timer_list *ptimer,u32 delay_time) +static inline void _set_timer(struct timer_list *ptimer, u32 delay_time) { mod_timer(ptimer, jiffies + msecs_to_jiffies(delay_time)); } @@ -108,7 +108,7 @@ void rtw_free_netdev(struct net_device *netdev); #define FUNC_ADPT_FMT "%s(%s)" #define FUNC_ADPT_ARG(adapter) __func__, adapter->pnetdev->name -#define rtw_signal_process(pid, sig) kill_pid(find_vpid((pid)),(sig), 1) +#define rtw_signal_process(pid, sig) kill_pid(find_vpid((pid)), (sig), 1) /* Macros for handling unaligned memory accesses */ diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h index 69faaaa7d373..d9681f9b3915 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_hal.h +++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h @@ -164,7 +164,7 @@ void Hal_ReadTxPowerInfo88E(struct adapter *padapter, u8 *hwinfo, void rtl8188e_EfuseParseChnlPlan(struct adapter *padapter, u8 *hwinfo, bool AutoLoadFail); -void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter,u8 *PROMContent, +void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter, u8 *PROMContent, bool AutoLoadFail); void Hal_ReadThermalMeter_88E(struct adapter * dapter, u8 *PROMContent, bool AutoloadFail); diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 2896a732780b..b4d4ac056e91 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -725,17 +725,17 @@ Result: #define H2C_CMD_OVERFLOW 0x06 #define H2C_RESERVED 0x07 -u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, int ssid_num); -u8 rtw_createbss_cmd(struct adapter *padapter); +u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, int ssid_num); +u8 rtw_createbss_cmd(struct adapter *padapter); u8 rtw_setstakey_cmd(struct adapter *padapter, u8 *psta, u8 unicast_key); u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue); -u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network* pnetwork); +u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network* pnetwork); u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue); -u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype); -u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset); -u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode); +u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype); +u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset); +u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode); -u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset,u8 *pval); +u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval); u8 rtw_setfwdig_cmd(struct adapter*padapter, u8 type); u8 rtw_setfwra_cmd(struct adapter*padapter, u8 type); @@ -746,10 +746,10 @@ u8 rtw_dynamic_chk_wk_cmd(struct adapter *adapter); u8 rtw_lps_ctrl_wk_cmd(struct adapter*padapter, u8 lps_ctrl_type, u8 enqueue); u8 rtw_rpt_timer_cfg_cmd(struct adapter*padapter, u16 minRptTime); - u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna,u8 enqueue); +u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna, u8 enqueue); u8 rtw_ps_cmd(struct adapter*padapter); -u8 rtw_chk_hi_queue_cmd(struct adapter*padapter); +u8 rtw_chk_hi_queue_cmd(struct adapter *padapter); u8 rtw_set_chplan_cmd(struct adapter *padapter, u8 chplan); diff --git a/drivers/staging/r8188eu/include/rtw_io.h b/drivers/staging/r8188eu/include/rtw_io.h index 925c7967ac04..87fcf6c94ff3 100644 --- a/drivers/staging/r8188eu/include/rtw_io.h +++ b/drivers/staging/r8188eu/include/rtw_io.h @@ -209,7 +209,7 @@ struct io_priv { }; uint ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue); -void sync_ioreq_enqueue(struct io_req *preq,struct io_queue *ioqueue); +void sync_ioreq_enqueue(struct io_req *preq, struct io_queue *ioqueue); uint sync_ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue); uint free_ioreq(struct io_req *preq, struct io_queue *pio_queue); struct io_req *alloc_ioreq(struct io_queue *pio_q); @@ -285,18 +285,18 @@ void bus_sync_io(struct io_queue *pio_q); u32 _ioreq2rwmem(struct io_queue *pio_q); void dev_power_down(struct adapter *Adapter, u8 bpwrup); -#define PlatformEFIOWrite1Byte(_a,_b,_c) \ - rtw_write8(_a,_b,_c) -#define PlatformEFIOWrite2Byte(_a,_b,_c) \ - rtw_write16(_a,_b,_c) -#define PlatformEFIOWrite4Byte(_a,_b,_c) \ - rtw_write32(_a,_b,_c) +#define PlatformEFIOWrite1Byte(_a, _b, _c) \ + rtw_write8(_a, _b, _c) +#define PlatformEFIOWrite2Byte(_a, _b, _c) \ + rtw_write16(_a, _b, _c) +#define PlatformEFIOWrite4Byte(_a, _b, _c) \ + rtw_write32(_a, _b, _c) -#define PlatformEFIORead1Byte(_a,_b) \ - rtw_read8(_a,_b) -#define PlatformEFIORead2Byte(_a,_b) \ - rtw_read16(_a,_b) -#define PlatformEFIORead4Byte(_a,_b) \ - rtw_read32(_a,_b) +#define PlatformEFIORead1Byte(_a, _b) \ + rtw_read8(_a, _b) +#define PlatformEFIORead2Byte(_a, _b) \ + rtw_read16(_a, _b) +#define PlatformEFIORead4Byte(_a, _b) \ + rtw_read32(_a, _b) #endif /* _RTL8711_IO_H_ */ From 2bf279885c596c7680426cacd0fe137db0e00a85 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:00:35 +0530 Subject: [PATCH 0334/4122] staging: r8188eu: Associate pointer symbol with parameter name The pointer symbol '*' should be associated with the function parameter name and not its type. This improves code readability and adheres to the coding-style guidelines. Address following checkpatch reported error: ERROR: "foo * bar" should be "foo *bar" While in there, update parameter name at one place to match other function declarations. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/d946b69bfdfb44baae3a130e412ed2e217a710a7.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtl8188e_hal.h | 2 +- drivers/staging/r8188eu/include/rtw_cmd.h | 16 ++++++++-------- drivers/staging/r8188eu/include/rtw_ioctl_set.h | 4 ++-- drivers/staging/r8188eu/include/rtw_mlme.h | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h index d9681f9b3915..feeb37c22897 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_hal.h +++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h @@ -166,7 +166,7 @@ void rtl8188e_EfuseParseChnlPlan(struct adapter *padapter, u8 *hwinfo, bool AutoLoadFail); void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter, u8 *PROMContent, bool AutoLoadFail); -void Hal_ReadThermalMeter_88E(struct adapter * dapter, u8 *PROMContent, +void Hal_ReadThermalMeter_88E(struct adapter *padapter, u8 *PROMContent, bool AutoloadFail); void Hal_EfuseParseXtal_8188E(struct adapter *pAdapter, u8 *hwinfo, bool AutoLoadFail); diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index b4d4ac056e91..98c9a7b67719 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -729,25 +729,25 @@ u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, i u8 rtw_createbss_cmd(struct adapter *padapter); u8 rtw_setstakey_cmd(struct adapter *padapter, u8 *psta, u8 unicast_key); u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue); -u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network* pnetwork); +u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network *pnetwork); u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue); u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype); u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset); u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode); u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval); -u8 rtw_setfwdig_cmd(struct adapter*padapter, u8 type); -u8 rtw_setfwra_cmd(struct adapter*padapter, u8 type); +u8 rtw_setfwdig_cmd(struct adapter *padapter, u8 type); +u8 rtw_setfwra_cmd(struct adapter *padapter, u8 type); -u8 rtw_addbareq_cmd(struct adapter*padapter, u8 tid, u8 *addr); +u8 rtw_addbareq_cmd(struct adapter *padapter, u8 tid, u8 *addr); u8 rtw_dynamic_chk_wk_cmd(struct adapter *adapter); -u8 rtw_lps_ctrl_wk_cmd(struct adapter*padapter, u8 lps_ctrl_type, u8 enqueue); -u8 rtw_rpt_timer_cfg_cmd(struct adapter*padapter, u16 minRptTime); +u8 rtw_lps_ctrl_wk_cmd(struct adapter *padapter, u8 lps_ctrl_type, u8 enqueue); +u8 rtw_rpt_timer_cfg_cmd(struct adapter *padapter, u16 minRptTime); -u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna, u8 enqueue); -u8 rtw_ps_cmd(struct adapter*padapter); +u8 rtw_antenna_select_cmd(struct adapter *padapter, u8 antenna, u8 enqueue); +u8 rtw_ps_cmd(struct adapter *padapter); u8 rtw_chk_hi_queue_cmd(struct adapter *padapter); diff --git a/drivers/staging/r8188eu/include/rtw_ioctl_set.h b/drivers/staging/r8188eu/include/rtw_ioctl_set.h index 7365079c704f..abe460d6504d 100644 --- a/drivers/staging/r8188eu/include/rtw_ioctl_set.h +++ b/drivers/staging/r8188eu/include/rtw_ioctl_set.h @@ -10,10 +10,10 @@ typedef u8 NDIS_802_11_PMKID_VALUE[16]; u8 rtw_set_802_11_authentication_mode(struct adapter *adapt, enum ndis_802_11_auth_mode authmode); -u8 rtw_set_802_11_bssid(struct adapter*adapter, u8 *bssid); +u8 rtw_set_802_11_bssid(struct adapter *adapter, u8 *bssid); u8 rtw_set_802_11_add_wep(struct adapter *adapter, struct ndis_802_11_wep *wep); u8 rtw_set_802_11_disassociate(struct adapter *adapter); -u8 rtw_set_802_11_bssid_list_scan(struct adapter*adapter, +u8 rtw_set_802_11_bssid_list_scan(struct adapter *adapter, struct ndis_802_11_ssid *pssid, int ssid_max_num); u8 rtw_set_802_11_infrastructure_mode(struct adapter *adapter, diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h index b69989cbab21..7658f864136e 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme.h +++ b/drivers/staging/r8188eu/include/rtw_mlme.h @@ -547,7 +547,7 @@ void _rtw_free_network(struct mlme_priv *pmlmepriv, void _rtw_free_network_nolock(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork); -struct wlan_network* _rtw_find_network(struct __queue *scanned_queue, u8 *addr); +struct wlan_network *_rtw_find_network(struct __queue *scanned_queue, u8 *addr); void _rtw_free_network_queue(struct adapter *padapter, u8 isfreeall); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index b322d0848db9..be470f913a94 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -479,11 +479,11 @@ void report_survey_event(struct adapter *padapter, struct recv_frame *precv_fram void report_surveydone_event(struct adapter *padapter); void report_del_sta_event(struct adapter *padapter, unsigned char *addr, unsigned short reason); -void report_add_sta_event(struct adapter *padapter, unsigned char* addr, +void report_add_sta_event(struct adapter *padapter, unsigned char *addr, int cam_idx); void beacon_timing_control(struct adapter *padapter); -extern u8 set_tx_beacon_cmd(struct adapter*padapter); +extern u8 set_tx_beacon_cmd(struct adapter *padapter); unsigned int setup_beacon_frame(struct adapter *padapter, unsigned char *beacon_frame); void update_mgnt_tx_rate(struct adapter *padapter, u8 rate); @@ -502,7 +502,7 @@ void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr); void issue_probereq_p2p(struct adapter *padapter, u8 *da); void issue_p2p_invitation_response(struct adapter *padapter, u8 *raddr, u8 dialogToken, u8 success); -void issue_p2p_invitation_request(struct adapter *padapter, u8* raddr); +void issue_p2p_invitation_request(struct adapter *padapter, u8 *raddr); void issue_beacon(struct adapter *padapter, int timeout_ms); void issue_probersp(struct adapter *padapter, unsigned char *da, u8 is_valid_p2p_probereq); @@ -514,7 +514,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da); s32 issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid, - u8* da, int try_cnt, int wait_ms); + u8 *da, int try_cnt, int wait_ms); int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms); int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, From 285e8d027ecccb4623331c62896d225371502262 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:01:05 +0530 Subject: [PATCH 0335/4122] staging: r8188eu: replace leading spaces by tabs Spaces are prohibited as per the Linux coding style guidelines. Replace those by tabs wherever possible to improve code alignment. Error reported by checkpatch script. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/c32b702c61ea3367d60f0a4c2443093d6ce45a69.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_cmd.h | 10 +++++----- drivers/staging/r8188eu/include/rtw_mlme.h | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 98c9a7b67719..45734d32fec6 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -587,14 +587,14 @@ struct setratable_parm { }; struct getratable_parm { - uint rsvd; + uint rsvd; }; struct getratable_rsp { - u8 ss_ForceUp[NumRates]; - u8 ss_ULevel[NumRates]; - u8 ss_DLevel[NumRates]; - u8 count_judge[NumRates]; + u8 ss_ForceUp[NumRates]; + u8 ss_ULevel[NumRates]; + u8 ss_DLevel[NumRates]; + u8 count_judge[NumRates]; }; /* to get TX,RX retry count */ diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h index 7658f864136e..ebf7168a7ef9 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme.h +++ b/drivers/staging/r8188eu/include/rtw_mlme.h @@ -528,7 +528,7 @@ void rtw_indicate_scan_done(struct adapter *padapter); int rtw_restruct_sec_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_len); int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, - uint in_len, uint initial_out_len); + uint in_len, uint initial_out_len); void rtw_init_registrypriv_dev_network(struct adapter *adapter); void rtw_update_registrypriv_dev_network(struct adapter *adapter); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index be470f913a94..413b94e38744 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -449,7 +449,7 @@ void ERP_IE_handler(struct adapter *padapter, struct ndis_802_11_var_ie *pIE); void VCS_update(struct adapter *padapter, struct sta_info *psta); void update_beacon_info(struct adapter *padapter, u8 *pframe, uint len, - struct sta_info *psta); + struct sta_info *psta); int rtw_check_bcn_info(struct adapter *Adapter, u8 *pframe, u32 packet_len); void update_IOT_info(struct adapter *padapter); void update_capinfo(struct adapter *adapter, u16 updatecap); From d119c18fa41dd455c6237e0b3dbb266de6a3c065 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:01:37 +0530 Subject: [PATCH 0336/4122] staging: r8188eu: Put '{" on the symbol declaration line Open braces '{" should be placed on the line of symbol declaration as per the coding-style guidelines. Improves readability and matches with style used in rest of the code. Issue reported by checkpatch script. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/375f742936493b562bd4dfba90eb75bd8ab84f8a.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_cmd.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index 45734d32fec6..a740a9a101d8 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -440,8 +440,7 @@ struct getrfintfs_parm { u8 rfintfs; }; -struct Tx_Beacon_param -{ +struct Tx_Beacon_param { struct wlan_bssid_ex network; }; @@ -677,26 +676,22 @@ struct set_ch_parm { }; /*H2C Handler index: 59 */ -struct SetChannelPlan_param -{ +struct SetChannelPlan_param { u8 channel_plan; }; /*H2C Handler index: 60 */ -struct LedBlink_param -{ +struct LedBlink_param { struct LED_871x *pLed; }; /*H2C Handler index: 61 */ -struct SetChannelSwitch_param -{ +struct SetChannelSwitch_param { u8 new_ch_no; }; /*H2C Handler index: 62 */ -struct TDLSoption_param -{ +struct TDLSoption_param { u8 addr[ETH_ALEN]; u8 option; }; @@ -854,8 +849,7 @@ enum rtw_h2c_cmd { #define _SetRFReg_CMD_ _Write_RFREG_CMD_ #ifdef _RTW_CMD_C_ -static struct _cmd_callback rtw_cmd_callback[] = -{ +static struct _cmd_callback rtw_cmd_callback[] = { {GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/ {GEN_CMD_CODE(_Write_MACREG), NULL}, {GEN_CMD_CODE(_Read_BBREG), &rtw_getbbrfreg_cmdrsp_callback}, From c61c8480e3a59d469d4ff7d6ed9c88d43ba1722e Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:02:12 +0530 Subject: [PATCH 0337/4122] staging: r8188eu: Correct missing or extra space in the statements Properly spacing out code statements/instructions improves code readability. Add missing or remove extra space as necessary according to the Linux Kernel coding-style guidelines. Following errors reported by checkpatch script for inconsistent code spacing: ERROR: space prohibited before that close parenthesis ')' ERROR: space prohibited before that ',' (ctx:WxW) CHECK: spaces preferred around that '&' (ctx:VxV) Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/4559d1a406b9f32379ec01cfadacea13a11803ac.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- .../staging/r8188eu/include/rtl8188e_spec.h | 12 ++++++------ drivers/staging/r8188eu/include/rtw_mlme.h | 18 +++++++++--------- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtl8188e_spec.h b/drivers/staging/r8188eu/include/rtl8188e_spec.h index e34ecdc09688..3fa3b3e5dd64 100644 --- a/drivers/staging/r8188eu/include/rtl8188e_spec.h +++ b/drivers/staging/r8188eu/include/rtl8188e_spec.h @@ -900,12 +900,12 @@ Current IOREG MAP #define HQSEL_HIQ BIT(5) /* For normal driver, 0x10C */ -#define _TXDMA_HIQ_MAP(x) (((x)&0x3) << 14) -#define _TXDMA_MGQ_MAP(x) (((x)&0x3) << 12) -#define _TXDMA_BKQ_MAP(x) (((x)&0x3) << 10) -#define _TXDMA_BEQ_MAP(x) (((x)&0x3) << 8 ) -#define _TXDMA_VIQ_MAP(x) (((x)&0x3) << 6 ) -#define _TXDMA_VOQ_MAP(x) (((x)&0x3) << 4 ) +#define _TXDMA_HIQ_MAP(x) (((x) & 0x3) << 14) +#define _TXDMA_MGQ_MAP(x) (((x) & 0x3) << 12) +#define _TXDMA_BKQ_MAP(x) (((x) & 0x3) << 10) +#define _TXDMA_BEQ_MAP(x) (((x) & 0x3) << 8) +#define _TXDMA_VIQ_MAP(x) (((x) & 0x3) << 6) +#define _TXDMA_VOQ_MAP(x) (((x) & 0x3) << 4) #define QUEUE_LOW 1 #define QUEUE_NORMAL 2 diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h index ebf7168a7ef9..ca539c652f26 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme.h +++ b/drivers/staging/r8188eu/include/rtw_mlme.h @@ -101,17 +101,17 @@ struct rt_link_detect { struct profile_info { u8 ssidlen; - u8 ssid[ WLAN_SSID_MAXLEN ]; - u8 peermac[ ETH_ALEN ]; + u8 ssid[WLAN_SSID_MAXLEN]; + u8 peermac[ETH_ALEN]; }; struct tx_invite_req_info { u8 token; u8 benable; - u8 go_ssid[ WLAN_SSID_MAXLEN ]; + u8 go_ssid[WLAN_SSID_MAXLEN]; u8 ssidlen; - u8 go_bssid[ ETH_ALEN ]; - u8 peer_macaddr[ ETH_ALEN ]; + u8 go_bssid[ETH_ALEN]; + u8 peer_macaddr[ETH_ALEN]; u8 operating_ch; /* This information will be set by using the * p2p_set op_ch=x */ u8 peer_ch; /* The listen channel for peer P2P device */ @@ -154,9 +154,9 @@ struct tx_nego_req_info { }; struct group_id_info { - u8 go_device_addr[ ETH_ALEN ]; /* The GO's device address of + u8 go_device_addr[ETH_ALEN]; /* The GO's device address of * this P2P group */ - u8 ssid[ WLAN_SSID_MAXLEN ]; /* The SSID of this P2P group */ + u8 ssid[WLAN_SSID_MAXLEN]; /* The SSID of this P2P group */ }; struct scan_limit_info { @@ -459,7 +459,7 @@ static inline void set_fwstate(struct mlme_priv *pmlmepriv, int state) { pmlmepriv->fw_state |= state; /* FOR HW integration */ - if (_FW_UNDER_SURVEY==state) + if (_FW_UNDER_SURVEY == state) pmlmepriv->bScanInProcess = true; } @@ -467,7 +467,7 @@ static inline void _clr_fwstate_(struct mlme_priv *pmlmepriv, int state) { pmlmepriv->fw_state &= ~state; /* FOR HW integration */ - if (_FW_UNDER_SURVEY==state) + if (_FW_UNDER_SURVEY == state) pmlmepriv->bScanInProcess = false; } diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 413b94e38744..66aa8b497aa1 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -729,7 +729,7 @@ enum rtw_c2h_event { GEN_EVT_CODE(_Survey), /*8*/ GEN_EVT_CODE(_SurveyDone), /*9*/ - GEN_EVT_CODE(_JoinBss) , /*10*/ + GEN_EVT_CODE(_JoinBss), /*10*/ GEN_EVT_CODE(_AddSTA), GEN_EVT_CODE(_DelSTA), GEN_EVT_CODE(_AtimDone), From 325d0d2c9d1187143eb0a3d5a6f16308a986bdb4 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Fri, 21 Oct 2022 03:02:35 +0530 Subject: [PATCH 0338/4122] staging: r8188eu: Remove unused macros Simple variants of macros PlatformEFIOWrite and PlatformEFIORead are defined but never used. As they do not appear to be designed for anything significant, we can remove them to avoid unexpected usage. Suggested-by: Julia Lawall Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/efaf637a14b6f7fdd0178e2aecf8abf17e6922f6.1666299151.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_io.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtw_io.h b/drivers/staging/r8188eu/include/rtw_io.h index 87fcf6c94ff3..e9744694204b 100644 --- a/drivers/staging/r8188eu/include/rtw_io.h +++ b/drivers/staging/r8188eu/include/rtw_io.h @@ -285,18 +285,4 @@ void bus_sync_io(struct io_queue *pio_q); u32 _ioreq2rwmem(struct io_queue *pio_q); void dev_power_down(struct adapter *Adapter, u8 bpwrup); -#define PlatformEFIOWrite1Byte(_a, _b, _c) \ - rtw_write8(_a, _b, _c) -#define PlatformEFIOWrite2Byte(_a, _b, _c) \ - rtw_write16(_a, _b, _c) -#define PlatformEFIOWrite4Byte(_a, _b, _c) \ - rtw_write32(_a, _b, _c) - -#define PlatformEFIORead1Byte(_a, _b) \ - rtw_read8(_a, _b) -#define PlatformEFIORead2Byte(_a, _b) \ - rtw_read16(_a, _b) -#define PlatformEFIORead4Byte(_a, _b) \ - rtw_read32(_a, _b) - #endif /* _RTL8711_IO_H_ */ From 302baa63e7553e7bf57df2f98d82855461d81bbe Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Fri, 21 Oct 2022 12:54:55 -0700 Subject: [PATCH 0339/4122] staging: rtl8723bs: remove tab in variable definition Remove unnecessary tab in variable definition in rtw_ioctl_set. Issue found by checkpatch. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/ee1bcccb23b3d24eb87d0b08bfa817b4af692dc5.1666380274.git.eperi1024@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c index 8c7daab141db..ede7d2930844 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c +++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c @@ -367,7 +367,7 @@ u8 rtw_set_802_11_disassociate(struct adapter *padapter) u8 rtw_set_802_11_bssid_list_scan(struct adapter *padapter, struct ndis_802_11_ssid *pssid, int ssid_max_num) { - struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; u8 res = true; if (!padapter) { From 83df6f3f2690533bf094c18bef39a016311b7a46 Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Fri, 21 Oct 2022 12:54:56 -0700 Subject: [PATCH 0340/4122] staging: rtl8723bs: add newline after variable declaration Fix checkpatch style warning by adding newline after variable declaration in rtw_ioctl_set Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/76e1bfd210d79e6d3f7cc09233621c8b741b2370.1666380274.git.eperi1024@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c index ede7d2930844..47323160e72d 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c +++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c @@ -78,6 +78,7 @@ u8 rtw_do_join(struct adapter *padapter) goto exit; } else { int select_ret; + spin_unlock_bh(&(pmlmepriv->scanned_queue.lock)); select_ret = rtw_select_and_join_from_scanned_queue(pmlmepriv); if (select_ret == _SUCCESS) { From 19cbe487f55fd04bc0b83bc34b6175f9dc6ab4e4 Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Fri, 21 Oct 2022 12:54:57 -0700 Subject: [PATCH 0341/4122] staging: rtl8723bs: use tab instead of spaces for indent Replace spaces with tab for indent and correct alignment for closing brace in rtw_ioctl_set. Issue found by checkpatch. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/0516cac046c617b55718fddb2aac3a50d543d84c.1666380274.git.eperi1024@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c index 47323160e72d..3b44f0dd5b0a 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c +++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c @@ -312,7 +312,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter, if ((*pold_state == Ndis802_11Infrastructure) || (*pold_state == Ndis802_11IBSS)) { if (check_fwstate(pmlmepriv, _FW_LINKED) == true) rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have checked whether issue dis-assoc_cmd or not */ - } + } *pold_state = networktype; From 57d91e039070faa938622d28093ba1d031e29b3b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Oct 2022 12:44:42 +0100 Subject: [PATCH 0342/4122] leds: max8997: Don't error if there is no pdata The driver works just fine if no platform data is supplied. Signed-off-by: Paul Cercueil Reviewed-by: Andy Shevchenko Signed-off-by: Pavel Machek --- drivers/leds/leds-max8997.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c index c0bddb33888d..c8d7f55c9dec 100644 --- a/drivers/leds/leds-max8997.c +++ b/drivers/leds/leds-max8997.c @@ -238,11 +238,6 @@ static int max8997_led_probe(struct platform_device *pdev) char name[20]; int ret = 0; - if (pdata == NULL) { - dev_err(&pdev->dev, "no platform data\n"); - return -ENODEV; - } - led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL); if (led == NULL) return -ENOMEM; @@ -258,7 +253,7 @@ static int max8997_led_probe(struct platform_device *pdev) led->iodev = iodev; /* initialize mode and brightness according to platform_data */ - if (pdata->led_pdata) { + if (pdata && pdata->led_pdata) { u8 mode = 0, brightness = 0; mode = pdata->led_pdata->mode[led->id]; From 3031993b3474794ecb71b6f969a3e60e4bda9d8a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 21 Oct 2022 22:19:40 +0300 Subject: [PATCH 0343/4122] led: qcom-lpg: Fix sleeping in atomic lpg_brighness_set() function can sleep, while led's brightness_set() callback must be non-blocking. Change LPG driver to use brightness_set_blocking() instead. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/0 preempt_count: 101, expected: 0 INFO: lockdep is turned off. CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.1.0-rc1-00014-gbe99b089c6fc-dirty #85 Hardware name: Qualcomm Technologies, Inc. DB820c (DT) Call trace: dump_backtrace.part.0+0xe4/0xf0 show_stack+0x18/0x40 dump_stack_lvl+0x88/0xb4 dump_stack+0x18/0x34 __might_resched+0x170/0x254 __might_sleep+0x48/0x9c __mutex_lock+0x4c/0x400 mutex_lock_nested+0x2c/0x40 lpg_brightness_single_set+0x40/0x90 led_set_brightness_nosleep+0x34/0x60 led_heartbeat_function+0x80/0x170 call_timer_fn+0xb8/0x340 __run_timers.part.0+0x20c/0x254 run_timer_softirq+0x3c/0x7c _stext+0x14c/0x578 ____do_softirq+0x10/0x20 call_on_irq_stack+0x2c/0x5c do_softirq_own_stack+0x1c/0x30 __irq_exit_rcu+0x164/0x170 irq_exit_rcu+0x10/0x40 el1_interrupt+0x38/0x50 el1h_64_irq_handler+0x18/0x2c el1h_64_irq+0x64/0x68 cpuidle_enter_state+0xc8/0x380 cpuidle_enter+0x38/0x50 do_idle+0x244/0x2d0 cpu_startup_entry+0x24/0x30 rest_init+0x128/0x1a0 arch_post_acpi_subsys_init+0x0/0x18 start_kernel+0x6f4/0x734 __primary_switched+0xbc/0xc4 Fixes: 24e2d05d1b68 ("leds: Add driver for Qualcomm LPG") Signed-off-by: Dmitry Baryshkov Signed-off-by: Pavel Machek --- drivers/leds/rgb/leds-qcom-lpg.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c index 02f51cc61837..c1a56259226f 100644 --- a/drivers/leds/rgb/leds-qcom-lpg.c +++ b/drivers/leds/rgb/leds-qcom-lpg.c @@ -602,8 +602,8 @@ static void lpg_brightness_set(struct lpg_led *led, struct led_classdev *cdev, lpg_lut_sync(lpg, lut_mask); } -static void lpg_brightness_single_set(struct led_classdev *cdev, - enum led_brightness value) +static int lpg_brightness_single_set(struct led_classdev *cdev, + enum led_brightness value) { struct lpg_led *led = container_of(cdev, struct lpg_led, cdev); struct mc_subled info; @@ -614,10 +614,12 @@ static void lpg_brightness_single_set(struct led_classdev *cdev, lpg_brightness_set(led, cdev, &info); mutex_unlock(&led->lpg->lock); + + return 0; } -static void lpg_brightness_mc_set(struct led_classdev *cdev, - enum led_brightness value) +static int lpg_brightness_mc_set(struct led_classdev *cdev, + enum led_brightness value) { struct led_classdev_mc *mc = lcdev_to_mccdev(cdev); struct lpg_led *led = container_of(mc, struct lpg_led, mcdev); @@ -628,6 +630,8 @@ static void lpg_brightness_mc_set(struct led_classdev *cdev, lpg_brightness_set(led, cdev, mc->subled_info); mutex_unlock(&led->lpg->lock); + + return 0; } static int lpg_blink_set(struct lpg_led *led, @@ -1118,7 +1122,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np) led->mcdev.num_colors = num_channels; cdev = &led->mcdev.led_cdev; - cdev->brightness_set = lpg_brightness_mc_set; + cdev->brightness_set_blocking = lpg_brightness_mc_set; cdev->blink_set = lpg_blink_mc_set; /* Register pattern accessors only if we have a LUT block */ @@ -1132,7 +1136,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np) return ret; cdev = &led->cdev; - cdev->brightness_set = lpg_brightness_single_set; + cdev->brightness_set_blocking = lpg_brightness_single_set; cdev->blink_set = lpg_blink_single_set; /* Register pattern accessors only if we have a LUT block */ @@ -1151,7 +1155,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np) else cdev->brightness = LED_OFF; - cdev->brightness_set(cdev, cdev->brightness); + cdev->brightness_set_blocking(cdev, cdev->brightness); init_data.fwnode = of_fwnode_handle(np); From 5f52a8ba7e91215c3d046d298fb328d1b9f7897d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 21 Oct 2022 15:54:28 -0500 Subject: [PATCH 0344/4122] dt-bindings: leds: Add 'cpuX' to 'linux,default-trigger' Add 'cpu' and 'cpuN' to possible values for 'linux,default-trigger'. There's 45 cases of them in upstream dts files. Signed-off-by: Rob Herring Signed-off-by: Pavel Machek --- Documentation/devicetree/bindings/leds/common.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml index 3c14a98430e1..f5c57a580078 100644 --- a/Documentation/devicetree/bindings/leds/common.yaml +++ b/Documentation/devicetree/bindings/leds/common.yaml @@ -100,6 +100,7 @@ properties: - pattern # LED is triggered by SD/MMC activity - pattern: "^mmc[0-9]+$" + - pattern: "^cpu[0-9]*$" led-pattern: description: | From 375ac0b2c1f11d8274999e91546d69e3140c6c6a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Oct 2022 14:06:49 +0100 Subject: [PATCH 0345/4122] usb: ftdi-elan: remove variable err_count Variable err_count is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221020130649.1546112-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ftdi-elan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index b2f980409d0b..33b35788bd0b 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -1956,7 +1956,6 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi) int long_stop = 10; int retry_on_timeout = 5; int retry_on_empty = 10; - int err_count = 0; retval = ftdi_elan_flush_input_fifo(ftdi); if (retval) return retval; @@ -2051,7 +2050,6 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi) continue; } } else { - err_count += 1; dev_err(&ftdi->udev->dev, "error = %d\n", retval); if (read_stop-- > 0) { From 13cc02f115d010d078851fac7f347890e62c097d Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sat, 1 Oct 2022 15:56:33 +0100 Subject: [PATCH 0346/4122] usbip: vudc: Convert snprintf() to sysfs_emit() Coccinnelle reports a warning Warning: Use scnprintf or sprintf Following the advice on kernel documentation https://www.kernel.org/doc/html/latest/filesystems/sysfs.html For show(device *...) functions we should only use sysfs_emit() or sysfs_emit_at() especially when formatting the value to be returned to user space. Convert snprintf() to sysfs_emit() Signed-off-by: Jules Irenge Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/YzhVIaNGdM33pcts@octinomon Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index c95e6b2bfd32..907a43a00896 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -242,7 +242,7 @@ static ssize_t usbip_status_show(struct device *dev, status = udc->ud.status; spin_unlock_irq(&udc->ud.lock); - return snprintf(out, PAGE_SIZE, "%d\n", status); + return sysfs_emit(out, "%d\n", status); } static DEVICE_ATTR_RO(usbip_status); From 27ef01e381c777521084724248c5736cd1cdda63 Mon Sep 17 00:00:00 2001 From: Xuezhi Zhang Date: Fri, 14 Oct 2022 19:06:06 +0800 Subject: [PATCH 0347/4122] usbip: convert sysfs snprintf to sysfs_emit Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Xuezhi Zhang Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/20221014110606.599352-1-zhangxuezhi3@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 3c6d452e3bf4..f92047d860f0 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -30,7 +30,7 @@ static ssize_t usbip_status_show(struct device *dev, status = sdev->ud.status; spin_unlock_irq(&sdev->ud.lock); - return snprintf(buf, PAGE_SIZE, "%d\n", status); + return sysfs_emit(buf, "%d\n", status); } static DEVICE_ATTR_RO(usbip_status); From 90732f1769165dcf0778d723ad188f6441a930f5 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Sun, 9 Oct 2022 15:23:05 +0800 Subject: [PATCH 0348/4122] usb: cdns3: adjust the partial logic of cdnsp_pci_remove In cdnsp_pci_remove, if pci_is_enabled returns true, it will call cdns_remove; else it will call kfree. Then both control flow goes to pci_dev_put. Adjust this logic by modifying it to an if else. Signed-off-by: Dongliang Mu Acked-by: Pawel Laszczak Link: https://lore.kernel.org/r/20221009072305.1593707-1-dzm91@hust.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-pci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index fe8a114c586c..efd54ed918b9 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -192,14 +192,12 @@ static void cdnsp_pci_remove(struct pci_dev *pdev) if (pci_dev_run_wake(pdev)) pm_runtime_get_noresume(&pdev->dev); - if (!pci_is_enabled(func)) { + if (pci_is_enabled(func)) { + cdns_remove(cdnsp); + } else { kfree(cdnsp); - goto pci_put; } - cdns_remove(cdnsp); - -pci_put: pci_dev_put(func); } From 4e74b483a3ce87e173634ba238a84b7fe404061b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 Oct 2022 21:32:10 +0100 Subject: [PATCH 0349/4122] USB: host: Kconfig: Fix spelling mistake "firwmare" -> "firmware" There is a spelling mistake in a Kconfig description. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221007203210.2756505-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 247568bc17a2..8e8db71021a5 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -47,7 +47,7 @@ config USB_XHCI_PCI_RENESAS tristate "Support for additional Renesas xHCI controller with firmware" help Say 'Y' to enable the support for the Renesas xHCI controller with - firmware. Make sure you have the firwmare for the device and + firmware. Make sure you have the firmware for the device and installed on your system for this device to work. If unsure, say 'N'. From 61dd457c0188c0deef68c2b919c0a2defe5db388 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 5 Oct 2022 12:55:55 +0200 Subject: [PATCH 0350/4122] dt-bindings: usb: dwc2: Add some missing Lantiq variants These IP block variants appear in various vendor trees and are distinct variants which needs to be handled. Cc: devicetree@vger.kernel.org Signed-off-by: Linus Walleij Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221005105555.2665485-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/dwc2.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml index dc4988c0009c..1ab85489a3f8 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.yaml +++ b/Documentation/devicetree/bindings/usb/dwc2.yaml @@ -43,7 +43,10 @@ properties: - const: rockchip,rk3066-usb - const: snps,dwc2 - const: lantiq,arx100-usb + - const: lantiq,ase-usb + - const: lantiq,danube-usb - const: lantiq,xrx200-usb + - const: lantiq,xrx300-usb - items: - enum: - amlogic,meson8-usb From 9b6447e04bc2a4d06f2ef74a583848c573a25dbc Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 3 Oct 2022 11:10:16 +0200 Subject: [PATCH 0351/4122] USB: usbip: missing lock in stub down Missing lock in sysfs operation when we want to close the connection in order to check the status and send the down event in a safe way. Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Shuah Khan Link: https://lore.kernel.org/r/20221003091016.641900-1-jtornosm@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index f92047d860f0..9c6954aad6c8 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -118,6 +118,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a } else { dev_info(dev, "stub down\n"); + mutex_lock(&sdev->ud.sysfs_lock); + spin_lock_irq(&sdev->ud.lock); if (sdev->ud.status != SDEV_ST_USED) goto err; From d182bf156c4cb8b08ce4a75e82b3357b14a4382d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Tue, 11 Oct 2022 09:53:48 +0200 Subject: [PATCH 0352/4122] usb: gadget: uvc: default the ctrl request interface offsets For the userspace it is needed to distinguish between requests for the control or streaming interface. The userspace would have to parse the configfs to know which interface index it has to compare the ctrl requests against. Since the interface numbers are not fixed, e.g. for composite gadgets, the interface offset depends on the setup. The kernel has this information when handing over the ctrl request to the userspace. This patch removes the offset from the interface numbers and expose the default interface defines in the uapi g_uvc.h. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20221011075348.1786897-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 15 ++++++++++++--- include/uapi/linux/usb/g_uvc.h | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 6e196e06181e..6e131624011a 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -39,9 +39,6 @@ MODULE_PARM_DESC(trace, "Trace level bitmask"); /* string IDs are assigned dynamically */ -#define UVC_STRING_CONTROL_IDX 0 -#define UVC_STRING_STREAMING_IDX 1 - static struct usb_string uvc_en_us_strings[] = { /* [UVC_STRING_CONTROL_IDX].s = DYNAMIC, */ [UVC_STRING_STREAMING_IDX].s = "Video Streaming", @@ -228,6 +225,8 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) struct uvc_device *uvc = to_uvc(f); struct v4l2_event v4l2_event; struct uvc_event *uvc_event = (void *)&v4l2_event.u.data; + unsigned int interface = le16_to_cpu(ctrl->wIndex) & 0xff; + struct usb_ctrlrequest *mctrl; if ((ctrl->bRequestType & USB_TYPE_MASK) != USB_TYPE_CLASS) { uvcg_info(f, "invalid request type\n"); @@ -248,6 +247,16 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) memset(&v4l2_event, 0, sizeof(v4l2_event)); v4l2_event.type = UVC_EVENT_SETUP; memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req)); + + /* check for the interface number, fixup the interface number in + * the ctrl request so the userspace doesn't have to bother with + * offset and configfs parsing + */ + mctrl = &uvc_event->req; + mctrl->wIndex &= ~cpu_to_le16(0xff); + if (interface == uvc->streaming_intf) + mctrl->wIndex = cpu_to_le16(UVC_STRING_STREAMING_IDX); + v4l2_event_queue(&uvc->vdev, &v4l2_event); return 0; diff --git a/include/uapi/linux/usb/g_uvc.h b/include/uapi/linux/usb/g_uvc.h index 652f169a019e..8d7824dde1b2 100644 --- a/include/uapi/linux/usb/g_uvc.h +++ b/include/uapi/linux/usb/g_uvc.h @@ -21,6 +21,9 @@ #define UVC_EVENT_DATA (V4L2_EVENT_PRIVATE_START + 5) #define UVC_EVENT_LAST (V4L2_EVENT_PRIVATE_START + 5) +#define UVC_STRING_CONTROL_IDX 0 +#define UVC_STRING_STREAMING_IDX 1 + struct uvc_request_data { __s32 length; __u8 data[60]; From a84436a987e7f4ee8eeb62a8a5abcfc60b356d16 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 19 Oct 2022 17:55:52 +0300 Subject: [PATCH 0353/4122] usb: typec: retimer: Use device type for matching Device name is not reliable so using the type instead in retimer_fwnode_match(). This will also introduce is_typec_retimer() helper, and remove the static keyword from the retimer device type. That will make it accessible also in the main typec class. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221019145552.32493-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/retimer.c | 16 ++-------------- drivers/usb/typec/retimer.h | 4 ++++ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/usb/typec/retimer.c b/drivers/usb/typec/retimer.c index ee94dbbe4745..3a4146ea6e7c 100644 --- a/drivers/usb/typec/retimer.c +++ b/drivers/usb/typec/retimer.c @@ -17,21 +17,9 @@ #include "class.h" #include "retimer.h" -static bool dev_name_ends_with(struct device *dev, const char *suffix) -{ - const char *name = dev_name(dev); - const int name_len = strlen(name); - const int suffix_len = strlen(suffix); - - if (suffix_len > name_len) - return false; - - return strcmp(name + (name_len - suffix_len), suffix) == 0; -} - static int retimer_fwnode_match(struct device *dev, const void *fwnode) { - return device_match_fwnode(dev, fwnode) && dev_name_ends_with(dev, "-retimer"); + return is_typec_retimer(dev) && device_match_fwnode(dev, fwnode); } static void *typec_retimer_match(struct fwnode_handle *fwnode, const char *id, void *data) @@ -97,7 +85,7 @@ static void typec_retimer_release(struct device *dev) kfree(to_typec_retimer(dev)); } -static const struct device_type typec_retimer_dev_type = { +const struct device_type typec_retimer_dev_type = { .name = "typec_retimer", .release = typec_retimer_release, }; diff --git a/drivers/usb/typec/retimer.h b/drivers/usb/typec/retimer.h index fa15951d4846..e34bd23323be 100644 --- a/drivers/usb/typec/retimer.h +++ b/drivers/usb/typec/retimer.h @@ -12,4 +12,8 @@ struct typec_retimer { #define to_typec_retimer(_dev_) container_of(_dev_, struct typec_retimer, dev) +const struct device_type typec_retimer_dev_type; + +#define is_typec_retimer(dev) ((dev)->type == &typec_retimer_dev_type) + #endif /* __USB_TYPEC_RETIMER__ */ From 32fee1df51109a117eb5063e950c372278688098 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Oct 2022 17:29:31 +0200 Subject: [PATCH 0354/4122] usb: musb: remove unused davinci support The musb-davinci driver was only used on dm644x, which got removed in linux-6.0. The only remaining davinci machines are da8xx devicetree based and do not use this hardware. Signed-off-by: Arnd Bergmann Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20221019152947.3857217-6-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 12 - drivers/usb/musb/Makefile | 2 - drivers/usb/musb/cppi_dma.c | 1547 ----------------------------------- drivers/usb/musb/davinci.c | 606 -------------- drivers/usb/musb/davinci.h | 103 --- 5 files changed, 2270 deletions(-) delete mode 100644 drivers/usb/musb/cppi_dma.c delete mode 100644 drivers/usb/musb/davinci.c delete mode 100644 drivers/usb/musb/davinci.h diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 6c8f7763e75e..f9eec666103c 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -70,12 +70,6 @@ config USB_MUSB_SUNXI select GENERIC_PHY select SUNXI_SRAM -config USB_MUSB_DAVINCI - tristate "DaVinci" - depends on ARCH_DAVINCI_DMx - depends on NOP_USB_XCEIV - depends on BROKEN - config USB_MUSB_DA8XX tristate "DA8xx/OMAP-L1x" depends on ARCH_DAVINCI_DA8XX @@ -161,12 +155,6 @@ config USB_INVENTRA_DMA help Enable DMA transfers using Mentor's engine. -config USB_TI_CPPI_DMA - bool 'TI CPPI (Davinci)' - depends on USB_MUSB_DAVINCI - help - Enable DMA transfers when TI CPPI DMA is available. - config USB_TI_CPPI41_DMA bool 'TI CPPI 4.1' depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX) && DMADEVICES diff --git a/drivers/usb/musb/Makefile b/drivers/usb/musb/Makefile index 51dd54a8de49..44a9e27b2157 100644 --- a/drivers/usb/musb/Makefile +++ b/drivers/usb/musb/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_USB_MUSB_OMAP2PLUS) += omap2430.o obj-$(CONFIG_USB_MUSB_AM35X) += am35x.o obj-$(CONFIG_USB_MUSB_DSPS) += musb_dsps.o obj-$(CONFIG_USB_MUSB_TUSB6010) += tusb6010.o -obj-$(CONFIG_USB_MUSB_DAVINCI) += davinci.o obj-$(CONFIG_USB_MUSB_DA8XX) += da8xx.o obj-$(CONFIG_USB_MUSB_UX500) += ux500.o obj-$(CONFIG_USB_MUSB_JZ4740) += jz4740.o @@ -33,7 +32,6 @@ obj-$(CONFIG_USB_MUSB_POLARFIRE_SOC) += mpfs.o # though PIO is always there to back up DMA, and for ep0 musb_hdrc-$(CONFIG_USB_INVENTRA_DMA) += musbhsdma.o -musb_hdrc-$(CONFIG_USB_TI_CPPI_DMA) += cppi_dma.o musb_hdrc-$(CONFIG_USB_TUSB_OMAP_DMA) += tusb6010_omap.o musb_hdrc-$(CONFIG_USB_UX500_DMA) += ux500_dma.o musb_hdrc-$(CONFIG_USB_TI_CPPI41_DMA) += musb_cppi41.o diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c deleted file mode 100644 index edb5b63d7063..000000000000 --- a/drivers/usb/musb/cppi_dma.c +++ /dev/null @@ -1,1547 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2005-2006 by Texas Instruments - * - * This file implements a DMA interface using TI's CPPI DMA. - * For now it's DaVinci-only, but CPPI isn't specific to DaVinci or USB. - * The TUSB6020, using VLYNQ, has CPPI that looks much like DaVinci. - */ - -#include -#include -#include -#include - -#include "musb_core.h" -#include "musb_debug.h" -#include "cppi_dma.h" -#include "davinci.h" - - -/* CPPI DMA status 7-mar-2006: - * - * - See musb_{host,gadget}.c for more info - * - * - Correct RX DMA generally forces the engine into irq-per-packet mode, - * which can easily saturate the CPU under non-mass-storage loads. - * - * NOTES 24-aug-2006 (2.6.18-rc4): - * - * - peripheral RXDMA wedged in a test with packets of length 512/512/1. - * evidently after the 1 byte packet was received and acked, the queue - * of BDs got garbaged so it wouldn't empty the fifo. (rxcsr 0x2003, - * and RX DMA0: 4 left, 80000000 8feff880, 8feff860 8feff860; 8f321401 - * 004001ff 00000001 .. 8feff860) Host was just getting NAKed on tx - * of its next (512 byte) packet. IRQ issues? - * - * REVISIT: the "transfer DMA" glue between CPPI and USB fifos will - * evidently also directly update the RX and TX CSRs ... so audit all - * host and peripheral side DMA code to avoid CSR access after DMA has - * been started. - */ - -/* REVISIT now we can avoid preallocating these descriptors; or - * more simply, switch to a global freelist not per-channel ones. - * Note: at full speed, 64 descriptors == 4K bulk data. - */ -#define NUM_TXCHAN_BD 64 -#define NUM_RXCHAN_BD 64 - -static inline void cpu_drain_writebuffer(void) -{ - wmb(); -#ifdef CONFIG_CPU_ARM926T - /* REVISIT this "should not be needed", - * but lack of it sure seemed to hurt ... - */ - asm("mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n"); -#endif -} - -static inline struct cppi_descriptor *cppi_bd_alloc(struct cppi_channel *c) -{ - struct cppi_descriptor *bd = c->freelist; - - if (bd) - c->freelist = bd->next; - return bd; -} - -static inline void -cppi_bd_free(struct cppi_channel *c, struct cppi_descriptor *bd) -{ - if (!bd) - return; - bd->next = c->freelist; - c->freelist = bd; -} - -/* - * Start DMA controller - * - * Initialize the DMA controller as necessary. - */ - -/* zero out entire rx state RAM entry for the channel */ -static void cppi_reset_rx(struct cppi_rx_stateram __iomem *rx) -{ - musb_writel(&rx->rx_skipbytes, 0, 0); - musb_writel(&rx->rx_head, 0, 0); - musb_writel(&rx->rx_sop, 0, 0); - musb_writel(&rx->rx_current, 0, 0); - musb_writel(&rx->rx_buf_current, 0, 0); - musb_writel(&rx->rx_len_len, 0, 0); - musb_writel(&rx->rx_cnt_cnt, 0, 0); -} - -/* zero out entire tx state RAM entry for the channel */ -static void cppi_reset_tx(struct cppi_tx_stateram __iomem *tx, u32 ptr) -{ - musb_writel(&tx->tx_head, 0, 0); - musb_writel(&tx->tx_buf, 0, 0); - musb_writel(&tx->tx_current, 0, 0); - musb_writel(&tx->tx_buf_current, 0, 0); - musb_writel(&tx->tx_info, 0, 0); - musb_writel(&tx->tx_rem_len, 0, 0); - /* musb_writel(&tx->tx_dummy, 0, 0); */ - musb_writel(&tx->tx_complete, 0, ptr); -} - -static void cppi_pool_init(struct cppi *cppi, struct cppi_channel *c) -{ - int j; - - /* initialize channel fields */ - c->head = NULL; - c->tail = NULL; - c->last_processed = NULL; - c->channel.status = MUSB_DMA_STATUS_UNKNOWN; - c->controller = cppi; - c->is_rndis = 0; - c->freelist = NULL; - - /* build the BD Free list for the channel */ - for (j = 0; j < NUM_TXCHAN_BD + 1; j++) { - struct cppi_descriptor *bd; - dma_addr_t dma; - - bd = dma_pool_alloc(cppi->pool, GFP_KERNEL, &dma); - bd->dma = dma; - cppi_bd_free(c, bd); - } -} - -static int cppi_channel_abort(struct dma_channel *); - -static void cppi_pool_free(struct cppi_channel *c) -{ - struct cppi *cppi = c->controller; - struct cppi_descriptor *bd; - - (void) cppi_channel_abort(&c->channel); - c->channel.status = MUSB_DMA_STATUS_UNKNOWN; - c->controller = NULL; - - /* free all its bds */ - bd = c->last_processed; - do { - if (bd) - dma_pool_free(cppi->pool, bd, bd->dma); - bd = cppi_bd_alloc(c); - } while (bd); - c->last_processed = NULL; -} - -static void cppi_controller_start(struct cppi *controller) -{ - void __iomem *tibase; - int i; - - /* do whatever is necessary to start controller */ - for (i = 0; i < ARRAY_SIZE(controller->tx); i++) { - controller->tx[i].transmit = true; - controller->tx[i].index = i; - } - for (i = 0; i < ARRAY_SIZE(controller->rx); i++) { - controller->rx[i].transmit = false; - controller->rx[i].index = i; - } - - /* setup BD list on a per channel basis */ - for (i = 0; i < ARRAY_SIZE(controller->tx); i++) - cppi_pool_init(controller, controller->tx + i); - for (i = 0; i < ARRAY_SIZE(controller->rx); i++) - cppi_pool_init(controller, controller->rx + i); - - tibase = controller->tibase; - INIT_LIST_HEAD(&controller->tx_complete); - - /* initialise tx/rx channel head pointers to zero */ - for (i = 0; i < ARRAY_SIZE(controller->tx); i++) { - struct cppi_channel *tx_ch = controller->tx + i; - struct cppi_tx_stateram __iomem *tx; - - INIT_LIST_HEAD(&tx_ch->tx_complete); - - tx = tibase + DAVINCI_TXCPPI_STATERAM_OFFSET(i); - tx_ch->state_ram = tx; - cppi_reset_tx(tx, 0); - } - for (i = 0; i < ARRAY_SIZE(controller->rx); i++) { - struct cppi_channel *rx_ch = controller->rx + i; - struct cppi_rx_stateram __iomem *rx; - - INIT_LIST_HEAD(&rx_ch->tx_complete); - - rx = tibase + DAVINCI_RXCPPI_STATERAM_OFFSET(i); - rx_ch->state_ram = rx; - cppi_reset_rx(rx); - } - - /* enable individual cppi channels */ - musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, - DAVINCI_DMA_ALL_CHANNELS_ENABLE); - musb_writel(tibase, DAVINCI_RXCPPI_INTENAB_REG, - DAVINCI_DMA_ALL_CHANNELS_ENABLE); - - /* enable tx/rx CPPI control */ - musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE); - musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE); - - /* disable RNDIS mode, also host rx RNDIS autorequest */ - musb_writel(tibase, DAVINCI_RNDIS_REG, 0); - musb_writel(tibase, DAVINCI_AUTOREQ_REG, 0); -} - -/* - * Stop DMA controller - * - * De-Init the DMA controller as necessary. - */ - -static void cppi_controller_stop(struct cppi *controller) -{ - void __iomem *tibase; - int i; - struct musb *musb; - - musb = controller->controller.musb; - - tibase = controller->tibase; - /* DISABLE INDIVIDUAL CHANNEL Interrupts */ - musb_writel(tibase, DAVINCI_TXCPPI_INTCLR_REG, - DAVINCI_DMA_ALL_CHANNELS_ENABLE); - musb_writel(tibase, DAVINCI_RXCPPI_INTCLR_REG, - DAVINCI_DMA_ALL_CHANNELS_ENABLE); - - musb_dbg(musb, "Tearing down RX and TX Channels"); - for (i = 0; i < ARRAY_SIZE(controller->tx); i++) { - /* FIXME restructure of txdma to use bds like rxdma */ - controller->tx[i].last_processed = NULL; - cppi_pool_free(controller->tx + i); - } - for (i = 0; i < ARRAY_SIZE(controller->rx); i++) - cppi_pool_free(controller->rx + i); - - /* in Tx Case proper teardown is supported. We resort to disabling - * Tx/Rx CPPI after cleanup of Tx channels. Before TX teardown is - * complete TX CPPI cannot be disabled. - */ - /*disable tx/rx cppi */ - musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE); - musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE); -} - -/* While dma channel is allocated, we only want the core irqs active - * for fault reports, otherwise we'd get irqs that we don't care about. - * Except for TX irqs, where dma done != fifo empty and reusable ... - * - * NOTE: docs don't say either way, but irq masking **enables** irqs. - * - * REVISIT same issue applies to pure PIO usage too, and non-cppi dma... - */ -static inline void core_rxirq_disable(void __iomem *tibase, unsigned epnum) -{ - musb_writel(tibase, DAVINCI_USB_INT_MASK_CLR_REG, 1 << (epnum + 8)); -} - -static inline void core_rxirq_enable(void __iomem *tibase, unsigned epnum) -{ - musb_writel(tibase, DAVINCI_USB_INT_MASK_SET_REG, 1 << (epnum + 8)); -} - - -/* - * Allocate a CPPI Channel for DMA. With CPPI, channels are bound to - * each transfer direction of a non-control endpoint, so allocating - * (and deallocating) is mostly a way to notice bad housekeeping on - * the software side. We assume the irqs are always active. - */ -static struct dma_channel * -cppi_channel_allocate(struct dma_controller *c, - struct musb_hw_ep *ep, u8 transmit) -{ - struct cppi *controller; - u8 index; - struct cppi_channel *cppi_ch; - void __iomem *tibase; - struct musb *musb; - - controller = container_of(c, struct cppi, controller); - tibase = controller->tibase; - musb = c->musb; - - /* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */ - index = ep->epnum - 1; - - /* return the corresponding CPPI Channel Handle, and - * probably disable the non-CPPI irq until we need it. - */ - if (transmit) { - if (index >= ARRAY_SIZE(controller->tx)) { - musb_dbg(musb, "no %cX%d CPPI channel", 'T', index); - return NULL; - } - cppi_ch = controller->tx + index; - } else { - if (index >= ARRAY_SIZE(controller->rx)) { - musb_dbg(musb, "no %cX%d CPPI channel", 'R', index); - return NULL; - } - cppi_ch = controller->rx + index; - core_rxirq_disable(tibase, ep->epnum); - } - - /* REVISIT make this an error later once the same driver code works - * with the other DMA engine too - */ - if (cppi_ch->hw_ep) - musb_dbg(musb, "re-allocating DMA%d %cX channel %p", - index, transmit ? 'T' : 'R', cppi_ch); - cppi_ch->hw_ep = ep; - cppi_ch->channel.status = MUSB_DMA_STATUS_FREE; - cppi_ch->channel.max_len = 0x7fffffff; - - musb_dbg(musb, "Allocate CPPI%d %cX", index, transmit ? 'T' : 'R'); - return &cppi_ch->channel; -} - -/* Release a CPPI Channel. */ -static void cppi_channel_release(struct dma_channel *channel) -{ - struct cppi_channel *c; - void __iomem *tibase; - - /* REVISIT: for paranoia, check state and abort if needed... */ - - c = container_of(channel, struct cppi_channel, channel); - tibase = c->controller->tibase; - if (!c->hw_ep) - musb_dbg(c->controller->controller.musb, - "releasing idle DMA channel %p", c); - else if (!c->transmit) - core_rxirq_enable(tibase, c->index + 1); - - /* for now, leave its cppi IRQ enabled (we won't trigger it) */ - c->hw_ep = NULL; - channel->status = MUSB_DMA_STATUS_UNKNOWN; -} - -/* Context: controller irqlocked */ -static void -cppi_dump_rx(int level, struct cppi_channel *c, const char *tag) -{ - void __iomem *base = c->controller->mregs; - struct cppi_rx_stateram __iomem *rx = c->state_ram; - - musb_ep_select(base, c->index + 1); - - musb_dbg(c->controller->controller.musb, - "RX DMA%d%s: %d left, csr %04x, " - "%08x H%08x S%08x C%08x, " - "B%08x L%08x %08x .. %08x", - c->index, tag, - musb_readl(c->controller->tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index), - musb_readw(c->hw_ep->regs, MUSB_RXCSR), - - musb_readl(&rx->rx_skipbytes, 0), - musb_readl(&rx->rx_head, 0), - musb_readl(&rx->rx_sop, 0), - musb_readl(&rx->rx_current, 0), - - musb_readl(&rx->rx_buf_current, 0), - musb_readl(&rx->rx_len_len, 0), - musb_readl(&rx->rx_cnt_cnt, 0), - musb_readl(&rx->rx_complete, 0) - ); -} - -/* Context: controller irqlocked */ -static void -cppi_dump_tx(int level, struct cppi_channel *c, const char *tag) -{ - void __iomem *base = c->controller->mregs; - struct cppi_tx_stateram __iomem *tx = c->state_ram; - - musb_ep_select(base, c->index + 1); - - musb_dbg(c->controller->controller.musb, - "TX DMA%d%s: csr %04x, " - "H%08x S%08x C%08x %08x, " - "F%08x L%08x .. %08x", - c->index, tag, - musb_readw(c->hw_ep->regs, MUSB_TXCSR), - - musb_readl(&tx->tx_head, 0), - musb_readl(&tx->tx_buf, 0), - musb_readl(&tx->tx_current, 0), - musb_readl(&tx->tx_buf_current, 0), - - musb_readl(&tx->tx_info, 0), - musb_readl(&tx->tx_rem_len, 0), - /* dummy/unused word 6 */ - musb_readl(&tx->tx_complete, 0) - ); -} - -/* Context: controller irqlocked */ -static inline void -cppi_rndis_update(struct cppi_channel *c, int is_rx, - void __iomem *tibase, int is_rndis) -{ - /* we may need to change the rndis flag for this cppi channel */ - if (c->is_rndis != is_rndis) { - u32 value = musb_readl(tibase, DAVINCI_RNDIS_REG); - u32 temp = 1 << (c->index); - - if (is_rx) - temp <<= 16; - if (is_rndis) - value |= temp; - else - value &= ~temp; - musb_writel(tibase, DAVINCI_RNDIS_REG, value); - c->is_rndis = is_rndis; - } -} - -static void cppi_dump_rxbd(const char *tag, struct cppi_descriptor *bd) -{ - pr_debug("RXBD/%s %08x: " - "nxt %08x buf %08x off.blen %08x opt.plen %08x\n", - tag, bd->dma, - bd->hw_next, bd->hw_bufp, bd->hw_off_len, - bd->hw_options); -} - -static void cppi_dump_rxq(int level, const char *tag, struct cppi_channel *rx) -{ - struct cppi_descriptor *bd; - - cppi_dump_rx(level, rx, tag); - if (rx->last_processed) - cppi_dump_rxbd("last", rx->last_processed); - for (bd = rx->head; bd; bd = bd->next) - cppi_dump_rxbd("active", bd); -} - - -/* NOTE: DaVinci autoreq is ignored except for host side "RNDIS" mode RX; - * so we won't ever use it (see "CPPI RX Woes" below). - */ -static inline int cppi_autoreq_update(struct cppi_channel *rx, - void __iomem *tibase, int onepacket, unsigned n_bds) -{ - u32 val; - -#ifdef RNDIS_RX_IS_USABLE - u32 tmp; - /* assert(is_host_active(musb)) */ - - /* start from "AutoReq never" */ - tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG); - val = tmp & ~((0x3) << (rx->index * 2)); - - /* HCD arranged reqpkt for packet #1. we arrange int - * for all but the last one, maybe in two segments. - */ - if (!onepacket) { -#if 0 - /* use two segments, autoreq "all" then the last "never" */ - val |= ((0x3) << (rx->index * 2)); - n_bds--; -#else - /* one segment, autoreq "all-but-last" */ - val |= ((0x1) << (rx->index * 2)); -#endif - } - - if (val != tmp) { - int n = 100; - - /* make sure that autoreq is updated before continuing */ - musb_writel(tibase, DAVINCI_AUTOREQ_REG, val); - do { - tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG); - if (tmp == val) - break; - cpu_relax(); - } while (n-- > 0); - } -#endif - - /* REQPKT is turned off after each segment */ - if (n_bds && rx->channel.actual_len) { - void __iomem *regs = rx->hw_ep->regs; - - val = musb_readw(regs, MUSB_RXCSR); - if (!(val & MUSB_RXCSR_H_REQPKT)) { - val |= MUSB_RXCSR_H_REQPKT | MUSB_RXCSR_H_WZC_BITS; - musb_writew(regs, MUSB_RXCSR, val); - /* flush writebuffer */ - val = musb_readw(regs, MUSB_RXCSR); - } - } - return n_bds; -} - - -/* Buffer enqueuing Logic: - * - * - RX builds new queues each time, to help handle routine "early - * termination" cases (faults, including errors and short reads) - * more correctly. - * - * - for now, TX reuses the same queue of BDs every time - * - * REVISIT long term, we want a normal dynamic model. - * ... the goal will be to append to the - * existing queue, processing completed "dma buffers" (segments) on the fly. - * - * Otherwise we force an IRQ latency between requests, which slows us a lot - * (especially in "transparent" dma). Unfortunately that model seems to be - * inherent in the DMA model from the Mentor code, except in the rare case - * of transfers big enough (~128+ KB) that we could append "middle" segments - * in the TX paths. (RX can't do this, see below.) - * - * That's true even in the CPPI- friendly iso case, where most urbs have - * several small segments provided in a group and where the "packet at a time" - * "transparent" DMA model is always correct, even on the RX side. - */ - -/* - * CPPI TX: - * ======== - * TX is a lot more reasonable than RX; it doesn't need to run in - * irq-per-packet mode very often. RNDIS mode seems to behave too - * (except how it handles the exactly-N-packets case). Building a - * txdma queue with multiple requests (urb or usb_request) looks - * like it would work ... but fault handling would need much testing. - * - * The main issue with TX mode RNDIS relates to transfer lengths that - * are an exact multiple of the packet length. It appears that there's - * a hiccup in that case (maybe the DMA completes before the ZLP gets - * written?) boiling down to not being able to rely on CPPI writing any - * terminating zero length packet before the next transfer is written. - * So that's punted to PIO; better yet, gadget drivers can avoid it. - * - * Plus, there's allegedly an undocumented constraint that rndis transfer - * length be a multiple of 64 bytes ... but the chip doesn't act that - * way, and we really don't _want_ that behavior anyway. - * - * On TX, "transparent" mode works ... although experiments have shown - * problems trying to use the SOP/EOP bits in different USB packets. - * - * REVISIT try to handle terminating zero length packets using CPPI - * instead of doing it by PIO after an IRQ. (Meanwhile, make Ethernet - * links avoid that issue by forcing them to avoid zlps.) - */ -static void -cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx) -{ - unsigned maxpacket = tx->maxpacket; - dma_addr_t addr = tx->buf_dma + tx->offset; - size_t length = tx->buf_len - tx->offset; - struct cppi_descriptor *bd; - unsigned n_bds; - unsigned i; - struct cppi_tx_stateram __iomem *tx_ram = tx->state_ram; - int rndis; - - /* TX can use the CPPI "rndis" mode, where we can probably fit this - * transfer in one BD and one IRQ. The only time we would NOT want - * to use it is when hardware constraints prevent it, or if we'd - * trigger the "send a ZLP?" confusion. - */ - rndis = (maxpacket & 0x3f) == 0 - && length > maxpacket - && length < 0xffff - && (length % maxpacket) != 0; - - if (rndis) { - maxpacket = length; - n_bds = 1; - } else { - if (length) - n_bds = DIV_ROUND_UP(length, maxpacket); - else - n_bds = 1; - n_bds = min(n_bds, (unsigned) NUM_TXCHAN_BD); - length = min(n_bds * maxpacket, length); - } - - musb_dbg(musb, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u", - tx->index, - maxpacket, - rndis ? "rndis" : "transparent", - n_bds, - (unsigned long long)addr, length); - - cppi_rndis_update(tx, 0, musb->ctrl_base, rndis); - - /* assuming here that channel_program is called during - * transfer initiation ... current code maintains state - * for one outstanding request only (no queues, not even - * the implicit ones of an iso urb). - */ - - bd = tx->freelist; - tx->head = bd; - tx->last_processed = NULL; - - /* FIXME use BD pool like RX side does, and just queue - * the minimum number for this request. - */ - - /* Prepare queue of BDs first, then hand it to hardware. - * All BDs except maybe the last should be of full packet - * size; for RNDIS there _is_ only that last packet. - */ - for (i = 0; i < n_bds; ) { - if (++i < n_bds && bd->next) - bd->hw_next = bd->next->dma; - else - bd->hw_next = 0; - - bd->hw_bufp = tx->buf_dma + tx->offset; - - /* FIXME set EOP only on the last packet, - * SOP only on the first ... avoid IRQs - */ - if ((tx->offset + maxpacket) <= tx->buf_len) { - tx->offset += maxpacket; - bd->hw_off_len = maxpacket; - bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET - | CPPI_OWN_SET | maxpacket; - } else { - /* only this one may be a partial USB Packet */ - u32 partial_len; - - partial_len = tx->buf_len - tx->offset; - tx->offset = tx->buf_len; - bd->hw_off_len = partial_len; - - bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET - | CPPI_OWN_SET | partial_len; - if (partial_len == 0) - bd->hw_options |= CPPI_ZERO_SET; - } - - musb_dbg(musb, "TXBD %p: nxt %08x buf %08x len %04x opt %08x", - bd, bd->hw_next, bd->hw_bufp, - bd->hw_off_len, bd->hw_options); - - /* update the last BD enqueued to the list */ - tx->tail = bd; - bd = bd->next; - } - - /* BDs live in DMA-coherent memory, but writes might be pending */ - cpu_drain_writebuffer(); - - /* Write to the HeadPtr in state RAM to trigger */ - musb_writel(&tx_ram->tx_head, 0, (u32)tx->freelist->dma); - - cppi_dump_tx(5, tx, "/S"); -} - -/* - * CPPI RX Woes: - * ============= - * Consider a 1KB bulk RX buffer in two scenarios: (a) it's fed two 300 byte - * packets back-to-back, and (b) it's fed two 512 byte packets back-to-back. - * (Full speed transfers have similar scenarios.) - * - * The correct behavior for Linux is that (a) fills the buffer with 300 bytes, - * and the next packet goes into a buffer that's queued later; while (b) fills - * the buffer with 1024 bytes. How to do that with CPPI? - * - * - RX queues in "rndis" mode -- one single BD -- handle (a) correctly, but - * (b) loses **BADLY** because nothing (!) happens when that second packet - * fills the buffer, much less when a third one arrives. (Which makes this - * not a "true" RNDIS mode. In the RNDIS protocol short-packet termination - * is optional, and it's fine if peripherals -- not hosts! -- pad messages - * out to end-of-buffer. Standard PCI host controller DMA descriptors - * implement that mode by default ... which is no accident.) - * - * - RX queues in "transparent" mode -- two BDs with 512 bytes each -- have - * converse problems: (b) is handled right, but (a) loses badly. CPPI RX - * ignores SOP/EOP markings and processes both of those BDs; so both packets - * are loaded into the buffer (with a 212 byte gap between them), and the next - * buffer queued will NOT get its 300 bytes of data. (It seems like SOP/EOP - * are intended as outputs for RX queues, not inputs...) - * - * - A variant of "transparent" mode -- one BD at a time -- is the only way to - * reliably make both cases work, with software handling both cases correctly - * and at the significant penalty of needing an IRQ per packet. (The lack of - * I/O overlap can be slightly ameliorated by enabling double buffering.) - * - * So how to get rid of IRQ-per-packet? The transparent multi-BD case could - * be used in special cases like mass storage, which sets URB_SHORT_NOT_OK - * (or maybe its peripheral side counterpart) to flag (a) scenarios as errors - * with guaranteed driver level fault recovery and scrubbing out what's left - * of that garbaged datastream. - * - * But there seems to be no way to identify the cases where CPPI RNDIS mode - * is appropriate -- which do NOT include RNDIS host drivers, but do include - * the CDC Ethernet driver! -- and the documentation is incomplete/wrong. - * So we can't _ever_ use RX RNDIS mode ... except by using a heuristic - * that applies best on the peripheral side (and which could fail rudely). - * - * Leaving only "transparent" mode; we avoid multi-bd modes in almost all - * cases other than mass storage class. Otherwise we're correct but slow, - * since CPPI penalizes our need for a "true RNDIS" default mode. - */ - - -/* Heuristic, intended to kick in for ethernet/rndis peripheral ONLY - * - * IFF - * (a) peripheral mode ... since rndis peripherals could pad their - * writes to hosts, causing i/o failure; or we'd have to cope with - * a largely unknowable variety of host side protocol variants - * (b) and short reads are NOT errors ... since full reads would - * cause those same i/o failures - * (c) and read length is - * - less than 64KB (max per cppi descriptor) - * - not a multiple of 4096 (g_zero default, full reads typical) - * - N (>1) packets long, ditto (full reads not EXPECTED) - * THEN - * try rx rndis mode - * - * Cost of heuristic failing: RXDMA wedges at the end of transfers that - * fill out the whole buffer. Buggy host side usb network drivers could - * trigger that, but "in the field" such bugs seem to be all but unknown. - * - * So this module parameter lets the heuristic be disabled. When using - * gadgetfs, the heuristic will probably need to be disabled. - */ -static bool cppi_rx_rndis = 1; - -module_param(cppi_rx_rndis, bool, 0); -MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic"); - - -/** - * cppi_next_rx_segment - dma read for the next chunk of a buffer - * @musb: the controller - * @rx: dma channel - * @onepacket: true unless caller treats short reads as errors, and - * performs fault recovery above usbcore. - * Context: controller irqlocked - * - * See above notes about why we can't use multi-BD RX queues except in - * rare cases (mass storage class), and can never use the hardware "rndis" - * mode (since it's not a "true" RNDIS mode) with complete safety.. - * - * It's ESSENTIAL that callers specify "onepacket" mode unless they kick in - * code to recover from corrupted datastreams after each short transfer. - */ -static void -cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket) -{ - unsigned maxpacket = rx->maxpacket; - dma_addr_t addr = rx->buf_dma + rx->offset; - size_t length = rx->buf_len - rx->offset; - struct cppi_descriptor *bd, *tail; - unsigned n_bds; - unsigned i; - void __iomem *tibase = musb->ctrl_base; - int is_rndis = 0; - struct cppi_rx_stateram __iomem *rx_ram = rx->state_ram; - struct cppi_descriptor *d; - - if (onepacket) { - /* almost every USB driver, host or peripheral side */ - n_bds = 1; - - /* maybe apply the heuristic above */ - if (cppi_rx_rndis - && is_peripheral_active(musb) - && length > maxpacket - && (length & ~0xffff) == 0 - && (length & 0x0fff) != 0 - && (length & (maxpacket - 1)) == 0) { - maxpacket = length; - is_rndis = 1; - } - } else { - /* virtually nothing except mass storage class */ - if (length > 0xffff) { - n_bds = 0xffff / maxpacket; - length = n_bds * maxpacket; - } else { - n_bds = DIV_ROUND_UP(length, maxpacket); - } - if (n_bds == 1) - onepacket = 1; - else - n_bds = min(n_bds, (unsigned) NUM_RXCHAN_BD); - } - - /* In host mode, autorequest logic can generate some IN tokens; it's - * tricky since we can't leave REQPKT set in RXCSR after the transfer - * finishes. So: multipacket transfers involve two or more segments. - * And always at least two IRQs ... RNDIS mode is not an option. - */ - if (is_host_active(musb)) - n_bds = cppi_autoreq_update(rx, tibase, onepacket, n_bds); - - cppi_rndis_update(rx, 1, musb->ctrl_base, is_rndis); - - length = min(n_bds * maxpacket, length); - - musb_dbg(musb, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) " - "dma 0x%llx len %u %u/%u", - rx->index, maxpacket, - onepacket - ? (is_rndis ? "rndis" : "onepacket") - : "multipacket", - n_bds, - musb_readl(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4)) - & 0xffff, - (unsigned long long)addr, length, - rx->channel.actual_len, rx->buf_len); - - /* only queue one segment at a time, since the hardware prevents - * correct queue shutdown after unexpected short packets - */ - bd = cppi_bd_alloc(rx); - rx->head = bd; - - /* Build BDs for all packets in this segment */ - for (i = 0, tail = NULL; bd && i < n_bds; i++, tail = bd) { - u32 bd_len; - - if (i) { - bd = cppi_bd_alloc(rx); - if (!bd) - break; - tail->next = bd; - tail->hw_next = bd->dma; - } - bd->hw_next = 0; - - /* all but the last packet will be maxpacket size */ - if (maxpacket < length) - bd_len = maxpacket; - else - bd_len = length; - - bd->hw_bufp = addr; - addr += bd_len; - rx->offset += bd_len; - - bd->hw_off_len = (0 /*offset*/ << 16) + bd_len; - bd->buflen = bd_len; - - bd->hw_options = CPPI_OWN_SET | (i == 0 ? length : 0); - length -= bd_len; - } - - /* we always expect at least one reusable BD! */ - if (!tail) { - WARNING("rx dma%d -- no BDs? need %d\n", rx->index, n_bds); - return; - } else if (i < n_bds) - WARNING("rx dma%d -- only %d of %d BDs\n", rx->index, i, n_bds); - - tail->next = NULL; - tail->hw_next = 0; - - bd = rx->head; - rx->tail = tail; - - /* short reads and other faults should terminate this entire - * dma segment. we want one "dma packet" per dma segment, not - * one per USB packet, terminating the whole queue at once... - * NOTE that current hardware seems to ignore SOP and EOP. - */ - bd->hw_options |= CPPI_SOP_SET; - tail->hw_options |= CPPI_EOP_SET; - - for (d = rx->head; d; d = d->next) - cppi_dump_rxbd("S", d); - - /* in case the preceding transfer left some state... */ - tail = rx->last_processed; - if (tail) { - tail->next = bd; - tail->hw_next = bd->dma; - } - - core_rxirq_enable(tibase, rx->index + 1); - - /* BDs live in DMA-coherent memory, but writes might be pending */ - cpu_drain_writebuffer(); - - /* REVISIT specs say to write this AFTER the BUFCNT register - * below ... but that loses badly. - */ - musb_writel(&rx_ram->rx_head, 0, bd->dma); - - /* bufferCount must be at least 3, and zeroes on completion - * unless it underflows below zero, or stops at two, or keeps - * growing ... grr. - */ - i = musb_readl(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4)) - & 0xffff; - - if (!i) - musb_writel(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4), - n_bds + 2); - else if (n_bds > (i - 3)) - musb_writel(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4), - n_bds - (i - 3)); - - i = musb_readl(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4)) - & 0xffff; - if (i < (2 + n_bds)) { - musb_dbg(musb, "bufcnt%d underrun - %d (for %d)", - rx->index, i, n_bds); - musb_writel(tibase, - DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4), - n_bds + 2); - } - - cppi_dump_rx(4, rx, "/S"); -} - -/** - * cppi_channel_program - program channel for data transfer - * @ch: the channel - * @maxpacket: max packet size - * @mode: For RX, 1 unless the usb protocol driver promised to treat - * all short reads as errors and kick in high level fault recovery. - * For TX, ignored because of RNDIS mode races/glitches. - * @dma_addr: dma address of buffer - * @len: length of buffer - * Context: controller irqlocked - */ -static int cppi_channel_program(struct dma_channel *ch, - u16 maxpacket, u8 mode, - dma_addr_t dma_addr, u32 len) -{ - struct cppi_channel *cppi_ch; - struct cppi *controller; - struct musb *musb; - - cppi_ch = container_of(ch, struct cppi_channel, channel); - controller = cppi_ch->controller; - musb = controller->controller.musb; - - switch (ch->status) { - case MUSB_DMA_STATUS_BUS_ABORT: - case MUSB_DMA_STATUS_CORE_ABORT: - /* fault irq handler should have handled cleanup */ - WARNING("%cX DMA%d not cleaned up after abort!\n", - cppi_ch->transmit ? 'T' : 'R', - cppi_ch->index); - /* WARN_ON(1); */ - break; - case MUSB_DMA_STATUS_BUSY: - WARNING("program active channel? %cX DMA%d\n", - cppi_ch->transmit ? 'T' : 'R', - cppi_ch->index); - /* WARN_ON(1); */ - break; - case MUSB_DMA_STATUS_UNKNOWN: - musb_dbg(musb, "%cX DMA%d not allocated!", - cppi_ch->transmit ? 'T' : 'R', - cppi_ch->index); - fallthrough; - case MUSB_DMA_STATUS_FREE: - break; - } - - ch->status = MUSB_DMA_STATUS_BUSY; - - /* set transfer parameters, then queue up its first segment */ - cppi_ch->buf_dma = dma_addr; - cppi_ch->offset = 0; - cppi_ch->maxpacket = maxpacket; - cppi_ch->buf_len = len; - cppi_ch->channel.actual_len = 0; - - /* TX channel? or RX? */ - if (cppi_ch->transmit) - cppi_next_tx_segment(musb, cppi_ch); - else - cppi_next_rx_segment(musb, cppi_ch, mode); - - return true; -} - -static bool cppi_rx_scan(struct cppi *cppi, unsigned ch) -{ - struct cppi_channel *rx = &cppi->rx[ch]; - struct cppi_rx_stateram __iomem *state = rx->state_ram; - struct cppi_descriptor *bd; - struct cppi_descriptor *last = rx->last_processed; - bool completed = false; - bool acked = false; - int i; - dma_addr_t safe2ack; - void __iomem *regs = rx->hw_ep->regs; - struct musb *musb = cppi->controller.musb; - - cppi_dump_rx(6, rx, "/K"); - - bd = last ? last->next : rx->head; - if (!bd) - return false; - - /* run through all completed BDs */ - for (i = 0, safe2ack = musb_readl(&state->rx_complete, 0); - (safe2ack || completed) && bd && i < NUM_RXCHAN_BD; - i++, bd = bd->next) { - u16 len; - - /* catch latest BD writes from CPPI */ - rmb(); - if (!completed && (bd->hw_options & CPPI_OWN_SET)) - break; - - musb_dbg(musb, "C/RXBD %llx: nxt %08x buf %08x " - "off.len %08x opt.len %08x (%d)", - (unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp, - bd->hw_off_len, bd->hw_options, - rx->channel.actual_len); - - /* actual packet received length */ - if ((bd->hw_options & CPPI_SOP_SET) && !completed) - len = bd->hw_off_len & CPPI_RECV_PKTLEN_MASK; - else - len = 0; - - if (bd->hw_options & CPPI_EOQ_MASK) - completed = true; - - if (!completed && len < bd->buflen) { - /* NOTE: when we get a short packet, RXCSR_H_REQPKT - * must have been cleared, and no more DMA packets may - * active be in the queue... TI docs didn't say, but - * CPPI ignores those BDs even though OWN is still set. - */ - completed = true; - musb_dbg(musb, "rx short %d/%d (%d)", - len, bd->buflen, - rx->channel.actual_len); - } - - /* If we got here, we expect to ack at least one BD; meanwhile - * CPPI may completing other BDs while we scan this list... - * - * RACE: we can notice OWN cleared before CPPI raises the - * matching irq by writing that BD as the completion pointer. - * In such cases, stop scanning and wait for the irq, avoiding - * lost acks and states where BD ownership is unclear. - */ - if (bd->dma == safe2ack) { - musb_writel(&state->rx_complete, 0, safe2ack); - safe2ack = musb_readl(&state->rx_complete, 0); - acked = true; - if (bd->dma == safe2ack) - safe2ack = 0; - } - - rx->channel.actual_len += len; - - cppi_bd_free(rx, last); - last = bd; - - /* stop scanning on end-of-segment */ - if (bd->hw_next == 0) - completed = true; - } - rx->last_processed = last; - - /* dma abort, lost ack, or ... */ - if (!acked && last) { - int csr; - - if (safe2ack == 0 || safe2ack == rx->last_processed->dma) - musb_writel(&state->rx_complete, 0, safe2ack); - if (safe2ack == 0) { - cppi_bd_free(rx, last); - rx->last_processed = NULL; - - /* if we land here on the host side, H_REQPKT will - * be clear and we need to restart the queue... - */ - WARN_ON(rx->head); - } - musb_ep_select(cppi->mregs, rx->index + 1); - csr = musb_readw(regs, MUSB_RXCSR); - if (csr & MUSB_RXCSR_DMAENAB) { - musb_dbg(musb, "list%d %p/%p, last %llx%s, csr %04x", - rx->index, - rx->head, rx->tail, - rx->last_processed - ? (unsigned long long) - rx->last_processed->dma - : 0, - completed ? ", completed" : "", - csr); - cppi_dump_rxq(4, "/what?", rx); - } - } - if (!completed) { - int csr; - - rx->head = bd; - - /* REVISIT seems like "autoreq all but EOP" doesn't... - * setting it here "should" be racey, but seems to work - */ - csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR); - if (is_host_active(cppi->controller.musb) - && bd - && !(csr & MUSB_RXCSR_H_REQPKT)) { - csr |= MUSB_RXCSR_H_REQPKT; - musb_writew(regs, MUSB_RXCSR, - MUSB_RXCSR_H_WZC_BITS | csr); - csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR); - } - } else { - rx->head = NULL; - rx->tail = NULL; - } - - cppi_dump_rx(6, rx, completed ? "/completed" : "/cleaned"); - return completed; -} - -irqreturn_t cppi_interrupt(int irq, void *dev_id) -{ - struct musb *musb = dev_id; - struct cppi *cppi; - void __iomem *tibase; - struct musb_hw_ep *hw_ep = NULL; - u32 rx, tx; - int i, index; - unsigned long flags; - - cppi = container_of(musb->dma_controller, struct cppi, controller); - if (cppi->irq) - spin_lock_irqsave(&musb->lock, flags); - - tibase = musb->ctrl_base; - - tx = musb_readl(tibase, DAVINCI_TXCPPI_MASKED_REG); - rx = musb_readl(tibase, DAVINCI_RXCPPI_MASKED_REG); - - if (!tx && !rx) { - if (cppi->irq) - spin_unlock_irqrestore(&musb->lock, flags); - return IRQ_NONE; - } - - musb_dbg(musb, "CPPI IRQ Tx%x Rx%x", tx, rx); - - /* process TX channels */ - for (index = 0; tx; tx = tx >> 1, index++) { - struct cppi_channel *tx_ch; - struct cppi_tx_stateram __iomem *tx_ram; - bool completed = false; - struct cppi_descriptor *bd; - - if (!(tx & 1)) - continue; - - tx_ch = cppi->tx + index; - tx_ram = tx_ch->state_ram; - - /* FIXME need a cppi_tx_scan() routine, which - * can also be called from abort code - */ - - cppi_dump_tx(5, tx_ch, "/E"); - - bd = tx_ch->head; - - /* - * If Head is null then this could mean that a abort interrupt - * that needs to be acknowledged. - */ - if (NULL == bd) { - musb_dbg(musb, "null BD"); - musb_writel(&tx_ram->tx_complete, 0, 0); - continue; - } - - /* run through all completed BDs */ - for (i = 0; !completed && bd && i < NUM_TXCHAN_BD; - i++, bd = bd->next) { - u16 len; - - /* catch latest BD writes from CPPI */ - rmb(); - if (bd->hw_options & CPPI_OWN_SET) - break; - - musb_dbg(musb, "C/TXBD %p n %x b %x off %x opt %x", - bd, bd->hw_next, bd->hw_bufp, - bd->hw_off_len, bd->hw_options); - - len = bd->hw_off_len & CPPI_BUFFER_LEN_MASK; - tx_ch->channel.actual_len += len; - - tx_ch->last_processed = bd; - - /* write completion register to acknowledge - * processing of completed BDs, and possibly - * release the IRQ; EOQ might not be set ... - * - * REVISIT use the same ack strategy as rx - * - * REVISIT have observed bit 18 set; huh?? - */ - /* if ((bd->hw_options & CPPI_EOQ_MASK)) */ - musb_writel(&tx_ram->tx_complete, 0, bd->dma); - - /* stop scanning on end-of-segment */ - if (bd->hw_next == 0) - completed = true; - } - - /* on end of segment, maybe go to next one */ - if (completed) { - /* cppi_dump_tx(4, tx_ch, "/complete"); */ - - /* transfer more, or report completion */ - if (tx_ch->offset >= tx_ch->buf_len) { - tx_ch->head = NULL; - tx_ch->tail = NULL; - tx_ch->channel.status = MUSB_DMA_STATUS_FREE; - - hw_ep = tx_ch->hw_ep; - - musb_dma_completion(musb, index + 1, 1); - - } else { - /* Bigger transfer than we could fit in - * that first batch of descriptors... - */ - cppi_next_tx_segment(musb, tx_ch); - } - } else - tx_ch->head = bd; - } - - /* Start processing the RX block */ - for (index = 0; rx; rx = rx >> 1, index++) { - - if (rx & 1) { - struct cppi_channel *rx_ch; - - rx_ch = cppi->rx + index; - - /* let incomplete dma segments finish */ - if (!cppi_rx_scan(cppi, index)) - continue; - - /* start another dma segment if needed */ - if (rx_ch->channel.actual_len != rx_ch->buf_len - && rx_ch->channel.actual_len - == rx_ch->offset) { - cppi_next_rx_segment(musb, rx_ch, 1); - continue; - } - - /* all segments completed! */ - rx_ch->channel.status = MUSB_DMA_STATUS_FREE; - - hw_ep = rx_ch->hw_ep; - - core_rxirq_disable(tibase, index + 1); - musb_dma_completion(musb, index + 1, 0); - } - } - - /* write to CPPI EOI register to re-enable interrupts */ - musb_writel(tibase, DAVINCI_CPPI_EOI_REG, 0); - - if (cppi->irq) - spin_unlock_irqrestore(&musb->lock, flags); - - return IRQ_HANDLED; -} -EXPORT_SYMBOL_GPL(cppi_interrupt); - -/* Instantiate a software object representing a DMA controller. */ -struct dma_controller * -cppi_dma_controller_create(struct musb *musb, void __iomem *mregs) -{ - struct cppi *controller; - struct device *dev = musb->controller; - struct platform_device *pdev = to_platform_device(dev); - int irq = platform_get_irq_byname(pdev, "dma"); - - controller = kzalloc(sizeof *controller, GFP_KERNEL); - if (!controller) - return NULL; - - controller->mregs = mregs; - controller->tibase = mregs - DAVINCI_BASE_OFFSET; - - controller->controller.musb = musb; - controller->controller.channel_alloc = cppi_channel_allocate; - controller->controller.channel_release = cppi_channel_release; - controller->controller.channel_program = cppi_channel_program; - controller->controller.channel_abort = cppi_channel_abort; - - /* NOTE: allocating from on-chip SRAM would give the least - * contention for memory access, if that ever matters here. - */ - - /* setup BufferPool */ - controller->pool = dma_pool_create("cppi", - controller->controller.musb->controller, - sizeof(struct cppi_descriptor), - CPPI_DESCRIPTOR_ALIGN, 0); - if (!controller->pool) { - kfree(controller); - return NULL; - } - - if (irq > 0) { - if (request_irq(irq, cppi_interrupt, 0, "cppi-dma", musb)) { - dev_err(dev, "request_irq %d failed!\n", irq); - musb_dma_controller_destroy(&controller->controller); - return NULL; - } - controller->irq = irq; - } - - cppi_controller_start(controller); - return &controller->controller; -} -EXPORT_SYMBOL_GPL(cppi_dma_controller_create); - -/* - * Destroy a previously-instantiated DMA controller. - */ -void cppi_dma_controller_destroy(struct dma_controller *c) -{ - struct cppi *cppi; - - cppi = container_of(c, struct cppi, controller); - - cppi_controller_stop(cppi); - - if (cppi->irq) - free_irq(cppi->irq, cppi->controller.musb); - - /* assert: caller stopped the controller first */ - dma_pool_destroy(cppi->pool); - - kfree(cppi); -} -EXPORT_SYMBOL_GPL(cppi_dma_controller_destroy); - -/* - * Context: controller irqlocked, endpoint selected - */ -static int cppi_channel_abort(struct dma_channel *channel) -{ - struct cppi_channel *cppi_ch; - struct cppi *controller; - void __iomem *mbase; - void __iomem *tibase; - void __iomem *regs; - u32 value; - struct cppi_descriptor *queue; - - cppi_ch = container_of(channel, struct cppi_channel, channel); - - controller = cppi_ch->controller; - - switch (channel->status) { - case MUSB_DMA_STATUS_BUS_ABORT: - case MUSB_DMA_STATUS_CORE_ABORT: - /* from RX or TX fault irq handler */ - case MUSB_DMA_STATUS_BUSY: - /* the hardware needs shutting down */ - regs = cppi_ch->hw_ep->regs; - break; - case MUSB_DMA_STATUS_UNKNOWN: - case MUSB_DMA_STATUS_FREE: - return 0; - default: - return -EINVAL; - } - - if (!cppi_ch->transmit && cppi_ch->head) - cppi_dump_rxq(3, "/abort", cppi_ch); - - mbase = controller->mregs; - tibase = controller->tibase; - - queue = cppi_ch->head; - cppi_ch->head = NULL; - cppi_ch->tail = NULL; - - /* REVISIT should rely on caller having done this, - * and caller should rely on us not changing it. - * peripheral code is safe ... check host too. - */ - musb_ep_select(mbase, cppi_ch->index + 1); - - if (cppi_ch->transmit) { - struct cppi_tx_stateram __iomem *tx_ram; - /* REVISIT put timeouts on these controller handshakes */ - - cppi_dump_tx(6, cppi_ch, " (teardown)"); - - /* teardown DMA engine then usb core */ - do { - value = musb_readl(tibase, DAVINCI_TXCPPI_TEAR_REG); - } while (!(value & CPPI_TEAR_READY)); - musb_writel(tibase, DAVINCI_TXCPPI_TEAR_REG, cppi_ch->index); - - tx_ram = cppi_ch->state_ram; - do { - value = musb_readl(&tx_ram->tx_complete, 0); - } while (0xFFFFFFFC != value); - - /* FIXME clean up the transfer state ... here? - * the completion routine should get called with - * an appropriate status code. - */ - - value = musb_readw(regs, MUSB_TXCSR); - value &= ~MUSB_TXCSR_DMAENAB; - value |= MUSB_TXCSR_FLUSHFIFO; - musb_writew(regs, MUSB_TXCSR, value); - musb_writew(regs, MUSB_TXCSR, value); - - /* - * 1. Write to completion Ptr value 0x1(bit 0 set) - * (write back mode) - * 2. Wait for abort interrupt and then put the channel in - * compare mode by writing 1 to the tx_complete register. - */ - cppi_reset_tx(tx_ram, 1); - cppi_ch->head = NULL; - musb_writel(&tx_ram->tx_complete, 0, 1); - cppi_dump_tx(5, cppi_ch, " (done teardown)"); - - /* REVISIT tx side _should_ clean up the same way - * as the RX side ... this does no cleanup at all! - */ - - } else /* RX */ { - u16 csr; - - /* NOTE: docs don't guarantee any of this works ... we - * expect that if the usb core stops telling the cppi core - * to pull more data from it, then it'll be safe to flush - * current RX DMA state iff any pending fifo transfer is done. - */ - - core_rxirq_disable(tibase, cppi_ch->index + 1); - - /* for host, ensure ReqPkt is never set again */ - if (is_host_active(cppi_ch->controller->controller.musb)) { - value = musb_readl(tibase, DAVINCI_AUTOREQ_REG); - value &= ~((0x3) << (cppi_ch->index * 2)); - musb_writel(tibase, DAVINCI_AUTOREQ_REG, value); - } - - csr = musb_readw(regs, MUSB_RXCSR); - - /* for host, clear (just) ReqPkt at end of current packet(s) */ - if (is_host_active(cppi_ch->controller->controller.musb)) { - csr |= MUSB_RXCSR_H_WZC_BITS; - csr &= ~MUSB_RXCSR_H_REQPKT; - } else - csr |= MUSB_RXCSR_P_WZC_BITS; - - /* clear dma enable */ - csr &= ~(MUSB_RXCSR_DMAENAB); - musb_writew(regs, MUSB_RXCSR, csr); - csr = musb_readw(regs, MUSB_RXCSR); - - /* Quiesce: wait for current dma to finish (if not cleanup). - * We can't use bit zero of stateram->rx_sop, since that - * refers to an entire "DMA packet" not just emptying the - * current fifo. Most segments need multiple usb packets. - */ - if (channel->status == MUSB_DMA_STATUS_BUSY) - udelay(50); - - /* scan the current list, reporting any data that was - * transferred and acking any IRQ - */ - cppi_rx_scan(controller, cppi_ch->index); - - /* clobber the existing state once it's idle - * - * NOTE: arguably, we should also wait for all the other - * RX channels to quiesce (how??) and then temporarily - * disable RXCPPI_CTRL_REG ... but it seems that we can - * rely on the controller restarting from state ram, with - * only RXCPPI_BUFCNT state being bogus. BUFCNT will - * correct itself after the next DMA transfer though. - * - * REVISIT does using rndis mode change that? - */ - cppi_reset_rx(cppi_ch->state_ram); - - /* next DMA request _should_ load cppi head ptr */ - - /* ... we don't "free" that list, only mutate it in place. */ - cppi_dump_rx(5, cppi_ch, " (done abort)"); - - /* clean up previously pending bds */ - cppi_bd_free(cppi_ch, cppi_ch->last_processed); - cppi_ch->last_processed = NULL; - - while (queue) { - struct cppi_descriptor *tmp = queue->next; - - cppi_bd_free(cppi_ch, queue); - queue = tmp; - } - } - - channel->status = MUSB_DMA_STATUS_FREE; - cppi_ch->buf_dma = 0; - cppi_ch->offset = 0; - cppi_ch->buf_len = 0; - cppi_ch->maxpacket = 0; - return 0; -} - -/* TBD Queries: - * - * Power Management ... probably turn off cppi during suspend, restart; - * check state ram? Clocking is presumably shared with usb core. - */ diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c deleted file mode 100644 index 704435526394..000000000000 --- a/drivers/usb/musb/davinci.c +++ /dev/null @@ -1,606 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2005-2006 by Texas Instruments - * - * This file is part of the Inventra Controller Driver for Linux. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include "musb_core.h" - -#include "davinci.h" -#include "cppi_dma.h" - - -#define USB_PHY_CTRL IO_ADDRESS(USBPHY_CTL_PADDR) -#define DM355_DEEPSLEEP IO_ADDRESS(DM355_DEEPSLEEP_PADDR) - -struct davinci_glue { - struct device *dev; - struct platform_device *musb; - struct clk *clk; - bool vbus_state; - struct gpio_desc *vbus; - struct work_struct vbus_work; -}; - -/* REVISIT (PM) we should be able to keep the PHY in low power mode most - * of the time (24 MHZ oscillator and PLL off, etc) by setting POWER.D0 - * and, when in host mode, autosuspending idle root ports... PHYPLLON - * (overriding SUSPENDM?) then likely needs to stay off. - */ - -static inline void phy_on(void) -{ - u32 phy_ctrl = __raw_readl(USB_PHY_CTRL); - - /* power everything up; start the on-chip PHY and its PLL */ - phy_ctrl &= ~(USBPHY_OSCPDWN | USBPHY_OTGPDWN | USBPHY_PHYPDWN); - phy_ctrl |= USBPHY_SESNDEN | USBPHY_VBDTCTEN | USBPHY_PHYPLLON; - __raw_writel(phy_ctrl, USB_PHY_CTRL); - - /* wait for PLL to lock before proceeding */ - while ((__raw_readl(USB_PHY_CTRL) & USBPHY_PHYCLKGD) == 0) - cpu_relax(); -} - -static inline void phy_off(void) -{ - u32 phy_ctrl = __raw_readl(USB_PHY_CTRL); - - /* powerdown the on-chip PHY, its PLL, and the OTG block */ - phy_ctrl &= ~(USBPHY_SESNDEN | USBPHY_VBDTCTEN | USBPHY_PHYPLLON); - phy_ctrl |= USBPHY_OSCPDWN | USBPHY_OTGPDWN | USBPHY_PHYPDWN; - __raw_writel(phy_ctrl, USB_PHY_CTRL); -} - -static int dma_off = 1; - -static void davinci_musb_enable(struct musb *musb) -{ - u32 tmp, old, val; - - /* workaround: setup irqs through both register sets */ - tmp = (musb->epmask & DAVINCI_USB_TX_ENDPTS_MASK) - << DAVINCI_USB_TXINT_SHIFT; - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp); - old = tmp; - tmp = (musb->epmask & (0xfffe & DAVINCI_USB_RX_ENDPTS_MASK)) - << DAVINCI_USB_RXINT_SHIFT; - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp); - tmp |= old; - - val = ~MUSB_INTR_SOF; - tmp |= ((val & 0x01ff) << DAVINCI_USB_USBINT_SHIFT); - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp); - - if (is_dma_capable() && !dma_off) - printk(KERN_WARNING "%s %s: dma not reactivated\n", - __FILE__, __func__); - else - dma_off = 0; - - /* force a DRVVBUS irq so we can start polling for ID change */ - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_SET_REG, - DAVINCI_INTR_DRVVBUS << DAVINCI_USB_USBINT_SHIFT); -} - -/* - * Disable the HDRC and flush interrupts - */ -static void davinci_musb_disable(struct musb *musb) -{ - /* because we don't set CTRLR.UINT, "important" to: - * - not read/write INTRUSB/INTRUSBE - * - (except during initial setup, as workaround) - * - use INTSETR/INTCLRR instead - */ - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_CLR_REG, - DAVINCI_USB_USBINT_MASK - | DAVINCI_USB_TXINT_MASK - | DAVINCI_USB_RXINT_MASK); - musb_writel(musb->ctrl_base, DAVINCI_USB_EOI_REG, 0); - - if (is_dma_capable() && !dma_off) - WARNING("dma still active\n"); -} - - -#define portstate(stmt) stmt - -/* - * VBUS SWITCHING IS BOARD-SPECIFIC ... at least for the DM6446 EVM, - * which doesn't wire DRVVBUS to the FET that switches it. Unclear - * if that's a problem with the DM6446 chip or just with that board. - * - * In either case, the DM355 EVM automates DRVVBUS the normal way, - * when J10 is out, and TI documents it as handling OTG. - */ - -/* I2C operations are always synchronous, and require a task context. - * With unloaded systems, using the shared workqueue seems to suffice - * to satisfy the 100msec A_WAIT_VRISE timeout... - */ -static void evm_deferred_drvvbus(struct work_struct *work) -{ - struct davinci_glue *glue = container_of(work, struct davinci_glue, - vbus_work); - - gpiod_set_value_cansleep(glue->vbus, glue->vbus_state); - glue->vbus_state = !glue->vbus_state; -} - -static void davinci_musb_source_power(struct musb *musb, int is_on, - int immediate) -{ - struct davinci_glue *glue = dev_get_drvdata(musb->controller->parent); - - /* This GPIO handling is entirely optional */ - if (!glue->vbus) - return; - - if (is_on) - is_on = 1; - - if (glue->vbus_state == is_on) - return; - /* 0/1 vs "-1 == unknown/init" */ - glue->vbus_state = !is_on; - - if (machine_is_davinci_evm()) { - if (immediate) - gpiod_set_value_cansleep(glue->vbus, glue->vbus_state); - else - schedule_work(&glue->vbus_work); - } - if (immediate) - glue->vbus_state = is_on; -} - -static void davinci_musb_set_vbus(struct musb *musb, int is_on) -{ - WARN_ON(is_on && is_peripheral_active(musb)); - davinci_musb_source_power(musb, is_on, 0); -} - - -#define POLL_SECONDS 2 - -static void otg_timer(struct timer_list *t) -{ - struct musb *musb = from_timer(musb, t, dev_timer); - void __iomem *mregs = musb->mregs; - u8 devctl; - unsigned long flags; - - /* We poll because DaVinci's won't expose several OTG-critical - * status change events (from the transceiver) otherwise. - */ - devctl = musb_readb(mregs, MUSB_DEVCTL); - dev_dbg(musb->controller, "poll devctl %02x (%s)\n", devctl, - usb_otg_state_string(musb->xceiv->otg->state)); - - spin_lock_irqsave(&musb->lock, flags); - switch (musb->xceiv->otg->state) { - case OTG_STATE_A_WAIT_VFALL: - /* Wait till VBUS falls below SessionEnd (~0.2V); the 1.3 RTL - * seems to mis-handle session "start" otherwise (or in our - * case "recover"), in routine "VBUS was valid by the time - * VBUSERR got reported during enumeration" cases. - */ - if (devctl & MUSB_DEVCTL_VBUS) { - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - break; - } - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; - musb_writel(musb->ctrl_base, DAVINCI_USB_INT_SET_REG, - MUSB_INTR_VBUSERROR << DAVINCI_USB_USBINT_SHIFT); - break; - case OTG_STATE_B_IDLE: - /* - * There's no ID-changed IRQ, so we have no good way to tell - * when to switch to the A-Default state machine (by setting - * the DEVCTL.SESSION flag). - * - * Workaround: whenever we're in B_IDLE, try setting the - * session flag every few seconds. If it works, ID was - * grounded and we're now in the A-Default state machine. - * - * NOTE setting the session flag is _supposed_ to trigger - * SRP, but clearly it doesn't. - */ - musb_writeb(mregs, MUSB_DEVCTL, - devctl | MUSB_DEVCTL_SESSION); - devctl = musb_readb(mregs, MUSB_DEVCTL); - if (devctl & MUSB_DEVCTL_BDEVICE) - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - else - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - break; - default: - break; - } - spin_unlock_irqrestore(&musb->lock, flags); -} - -static irqreturn_t davinci_musb_interrupt(int irq, void *__hci) -{ - unsigned long flags; - irqreturn_t retval = IRQ_NONE; - struct musb *musb = __hci; - struct usb_otg *otg = musb->xceiv->otg; - void __iomem *tibase = musb->ctrl_base; - struct cppi *cppi; - u32 tmp; - - spin_lock_irqsave(&musb->lock, flags); - - /* NOTE: DaVinci shadows the Mentor IRQs. Don't manage them through - * the Mentor registers (except for setup), use the TI ones and EOI. - * - * Docs describe irq "vector" registers associated with the CPPI and - * USB EOI registers. These hold a bitmask corresponding to the - * current IRQ, not an irq handler address. Would using those bits - * resolve some of the races observed in this dispatch code?? - */ - - /* CPPI interrupts share the same IRQ line, but have their own - * mask, state, "vector", and EOI registers. - */ - cppi = container_of(musb->dma_controller, struct cppi, controller); - if (is_cppi_enabled(musb) && musb->dma_controller && !cppi->irq) - retval = cppi_interrupt(irq, __hci); - - /* ack and handle non-CPPI interrupts */ - tmp = musb_readl(tibase, DAVINCI_USB_INT_SRC_MASKED_REG); - musb_writel(tibase, DAVINCI_USB_INT_SRC_CLR_REG, tmp); - dev_dbg(musb->controller, "IRQ %08x\n", tmp); - - musb->int_rx = (tmp & DAVINCI_USB_RXINT_MASK) - >> DAVINCI_USB_RXINT_SHIFT; - musb->int_tx = (tmp & DAVINCI_USB_TXINT_MASK) - >> DAVINCI_USB_TXINT_SHIFT; - musb->int_usb = (tmp & DAVINCI_USB_USBINT_MASK) - >> DAVINCI_USB_USBINT_SHIFT; - - /* DRVVBUS irqs are the only proxy we have (a very poor one!) for - * DaVinci's missing ID change IRQ. We need an ID change IRQ to - * switch appropriately between halves of the OTG state machine. - * Managing DEVCTL.SESSION per Mentor docs requires we know its - * value, but DEVCTL.BDEVICE is invalid without DEVCTL.SESSION set. - * Also, DRVVBUS pulses for SRP (but not at 5V) ... - */ - if (tmp & (DAVINCI_INTR_DRVVBUS << DAVINCI_USB_USBINT_SHIFT)) { - int drvvbus = musb_readl(tibase, DAVINCI_USB_STAT_REG); - void __iomem *mregs = musb->mregs; - u8 devctl = musb_readb(mregs, MUSB_DEVCTL); - int err = musb->int_usb & MUSB_INTR_VBUSERROR; - - err = musb->int_usb & MUSB_INTR_VBUSERROR; - if (err) { - /* The Mentor core doesn't debounce VBUS as needed - * to cope with device connect current spikes. This - * means it's not uncommon for bus-powered devices - * to get VBUS errors during enumeration. - * - * This is a workaround, but newer RTL from Mentor - * seems to allow a better one: "re"starting sessions - * without waiting (on EVM, a **long** time) for VBUS - * to stop registering in devctl. - */ - musb->int_usb &= ~MUSB_INTR_VBUSERROR; - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL; - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - WARNING("VBUS error workaround (delay coming)\n"); - } else if (drvvbus) { - MUSB_HST_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; - portstate(musb->port1_status |= USB_PORT_STAT_POWER); - del_timer(&musb->dev_timer); - } else { - musb->is_active = 0; - MUSB_DEV_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_B_IDLE; - portstate(musb->port1_status &= ~USB_PORT_STAT_POWER); - } - - /* NOTE: this must complete poweron within 100 msec - * (OTG_TIME_A_WAIT_VRISE) but we don't check for that. - */ - davinci_musb_source_power(musb, drvvbus, 0); - dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", - drvvbus ? "on" : "off", - usb_otg_state_string(musb->xceiv->otg->state), - err ? " ERROR" : "", - devctl); - retval = IRQ_HANDLED; - } - - if (musb->int_tx || musb->int_rx || musb->int_usb) - retval |= musb_interrupt(musb); - - /* irq stays asserted until EOI is written */ - musb_writel(tibase, DAVINCI_USB_EOI_REG, 0); - - /* poll for ID change */ - if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - - spin_unlock_irqrestore(&musb->lock, flags); - - return retval; -} - -static int davinci_musb_set_mode(struct musb *musb, u8 mode) -{ - /* EVM can't do this (right?) */ - return -EIO; -} - -static int davinci_musb_init(struct musb *musb) -{ - void __iomem *tibase = musb->ctrl_base; - u32 revision; - int ret = -ENODEV; - - musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); - if (IS_ERR_OR_NULL(musb->xceiv)) { - ret = -EPROBE_DEFER; - goto unregister; - } - - musb->mregs += DAVINCI_BASE_OFFSET; - - /* returns zero if e.g. not clocked */ - revision = musb_readl(tibase, DAVINCI_USB_VERSION_REG); - if (revision == 0) - goto fail; - - timer_setup(&musb->dev_timer, otg_timer, 0); - - davinci_musb_source_power(musb, 0, 1); - - /* dm355 EVM swaps D+/D- for signal integrity, and - * is clocked from the main 24 MHz crystal. - */ - if (machine_is_davinci_dm355_evm()) { - u32 phy_ctrl = __raw_readl(USB_PHY_CTRL); - - phy_ctrl &= ~(3 << 9); - phy_ctrl |= USBPHY_DATAPOL; - __raw_writel(phy_ctrl, USB_PHY_CTRL); - } - - /* On dm355, the default-A state machine needs DRVVBUS control. - * If we won't be a host, there's no need to turn it on. - */ - if (cpu_is_davinci_dm355()) { - u32 deepsleep = __raw_readl(DM355_DEEPSLEEP); - - deepsleep &= ~DRVVBUS_FORCE; - __raw_writel(deepsleep, DM355_DEEPSLEEP); - } - - /* reset the controller */ - musb_writel(tibase, DAVINCI_USB_CTRL_REG, 0x1); - - /* start the on-chip PHY and its PLL */ - phy_on(); - - msleep(5); - - /* NOTE: irqs are in mixed mode, not bypass to pure-musb */ - pr_debug("DaVinci OTG revision %08x phy %03x control %02x\n", - revision, __raw_readl(USB_PHY_CTRL), - musb_readb(tibase, DAVINCI_USB_CTRL_REG)); - - musb->isr = davinci_musb_interrupt; - return 0; - -fail: - usb_put_phy(musb->xceiv); -unregister: - usb_phy_generic_unregister(); - return ret; -} - -static int davinci_musb_exit(struct musb *musb) -{ - int maxdelay = 30; - u8 devctl, warn = 0; - - del_timer_sync(&musb->dev_timer); - - /* force VBUS off */ - if (cpu_is_davinci_dm355()) { - u32 deepsleep = __raw_readl(DM355_DEEPSLEEP); - - deepsleep &= ~DRVVBUS_FORCE; - deepsleep |= DRVVBUS_OVERRIDE; - __raw_writel(deepsleep, DM355_DEEPSLEEP); - } - - davinci_musb_source_power(musb, 0 /*off*/, 1); - - /* - * delay, to avoid problems with module reload. - * if there's no peripheral connected, this can take a - * long time to fall, especially on EVM with huge C133. - */ - do { - devctl = musb_readb(musb->mregs, MUSB_DEVCTL); - if (!(devctl & MUSB_DEVCTL_VBUS)) - break; - if ((devctl & MUSB_DEVCTL_VBUS) != warn) { - warn = devctl & MUSB_DEVCTL_VBUS; - dev_dbg(musb->controller, "VBUS %d\n", - warn >> MUSB_DEVCTL_VBUS_SHIFT); - } - msleep(1000); - maxdelay--; - } while (maxdelay > 0); - - /* in OTG mode, another host might be connected */ - if (devctl & MUSB_DEVCTL_VBUS) - dev_dbg(musb->controller, "VBUS off timeout (devctl %02x)\n", devctl); - - phy_off(); - - usb_put_phy(musb->xceiv); - - return 0; -} - -static const struct musb_platform_ops davinci_ops = { - .quirks = MUSB_DMA_CPPI, - .init = davinci_musb_init, - .exit = davinci_musb_exit, - -#ifdef CONFIG_USB_TI_CPPI_DMA - .dma_init = cppi_dma_controller_create, - .dma_exit = cppi_dma_controller_destroy, -#endif - .enable = davinci_musb_enable, - .disable = davinci_musb_disable, - - .set_mode = davinci_musb_set_mode, - - .set_vbus = davinci_musb_set_vbus, -}; - -static const struct platform_device_info davinci_dev_info = { - .name = "musb-hdrc", - .id = PLATFORM_DEVID_AUTO, - .dma_mask = DMA_BIT_MASK(32), -}; - -static int davinci_probe(struct platform_device *pdev) -{ - struct resource musb_resources[3]; - struct musb_hdrc_platform_data *pdata = dev_get_platdata(&pdev->dev); - struct platform_device *musb; - struct davinci_glue *glue; - struct platform_device_info pinfo; - struct clk *clk; - - int ret = -ENOMEM; - - glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL); - if (!glue) - goto err0; - - clk = devm_clk_get(&pdev->dev, "usb"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - ret = PTR_ERR(clk); - goto err0; - } - - ret = clk_enable(clk); - if (ret) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto err0; - } - - glue->dev = &pdev->dev; - glue->clk = clk; - - pdata->platform_ops = &davinci_ops; - - glue->vbus = devm_gpiod_get_optional(&pdev->dev, NULL, GPIOD_OUT_LOW); - if (IS_ERR(glue->vbus)) { - ret = PTR_ERR(glue->vbus); - goto err0; - } else { - glue->vbus_state = -1; - INIT_WORK(&glue->vbus_work, evm_deferred_drvvbus); - } - - usb_phy_generic_register(); - platform_set_drvdata(pdev, glue); - - memset(musb_resources, 0x00, sizeof(*musb_resources) * - ARRAY_SIZE(musb_resources)); - - musb_resources[0].name = pdev->resource[0].name; - musb_resources[0].start = pdev->resource[0].start; - musb_resources[0].end = pdev->resource[0].end; - musb_resources[0].flags = pdev->resource[0].flags; - - musb_resources[1].name = pdev->resource[1].name; - musb_resources[1].start = pdev->resource[1].start; - musb_resources[1].end = pdev->resource[1].end; - musb_resources[1].flags = pdev->resource[1].flags; - - /* - * For DM6467 3 resources are passed. A placeholder for the 3rd - * resource is always there, so it's safe to always copy it... - */ - musb_resources[2].name = pdev->resource[2].name; - musb_resources[2].start = pdev->resource[2].start; - musb_resources[2].end = pdev->resource[2].end; - musb_resources[2].flags = pdev->resource[2].flags; - - pinfo = davinci_dev_info; - pinfo.parent = &pdev->dev; - pinfo.res = musb_resources; - pinfo.num_res = ARRAY_SIZE(musb_resources); - pinfo.data = pdata; - pinfo.size_data = sizeof(*pdata); - - glue->musb = musb = platform_device_register_full(&pinfo); - if (IS_ERR(musb)) { - ret = PTR_ERR(musb); - dev_err(&pdev->dev, "failed to register musb device: %d\n", ret); - goto err1; - } - - return 0; - -err1: - clk_disable(clk); - -err0: - return ret; -} - -static int davinci_remove(struct platform_device *pdev) -{ - struct davinci_glue *glue = platform_get_drvdata(pdev); - - platform_device_unregister(glue->musb); - usb_phy_generic_unregister(); - clk_disable(glue->clk); - - return 0; -} - -static struct platform_driver davinci_driver = { - .probe = davinci_probe, - .remove = davinci_remove, - .driver = { - .name = "musb-davinci", - }, -}; - -MODULE_DESCRIPTION("DaVinci MUSB Glue Layer"); -MODULE_AUTHOR("Felipe Balbi "); -MODULE_LICENSE("GPL v2"); -module_platform_driver(davinci_driver); diff --git a/drivers/usb/musb/davinci.h b/drivers/usb/musb/davinci.h deleted file mode 100644 index c8e67d15b510..000000000000 --- a/drivers/usb/musb/davinci.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2005-2006 by Texas Instruments - */ - -#ifndef __MUSB_HDRDF_H__ -#define __MUSB_HDRDF_H__ - -/* - * DaVinci-specific definitions - */ - -/* Integrated highspeed/otg PHY */ -#define USBPHY_CTL_PADDR 0x01c40034 -#define USBPHY_DATAPOL BIT(11) /* (dm355) switch D+/D- */ -#define USBPHY_PHYCLKGD BIT(8) -#define USBPHY_SESNDEN BIT(7) /* v(sess_end) comparator */ -#define USBPHY_VBDTCTEN BIT(6) /* v(bus) comparator */ -#define USBPHY_VBUSSENS BIT(5) /* (dm355,ro) is vbus > 0.5V */ -#define USBPHY_PHYPLLON BIT(4) /* override pll suspend */ -#define USBPHY_CLKO1SEL BIT(3) -#define USBPHY_OSCPDWN BIT(2) -#define USBPHY_OTGPDWN BIT(1) -#define USBPHY_PHYPDWN BIT(0) - -#define DM355_DEEPSLEEP_PADDR 0x01c40048 -#define DRVVBUS_FORCE BIT(2) -#define DRVVBUS_OVERRIDE BIT(1) - -/* For now include usb OTG module registers here */ -#define DAVINCI_USB_VERSION_REG 0x00 -#define DAVINCI_USB_CTRL_REG 0x04 -#define DAVINCI_USB_STAT_REG 0x08 -#define DAVINCI_RNDIS_REG 0x10 -#define DAVINCI_AUTOREQ_REG 0x14 -#define DAVINCI_USB_INT_SOURCE_REG 0x20 -#define DAVINCI_USB_INT_SET_REG 0x24 -#define DAVINCI_USB_INT_SRC_CLR_REG 0x28 -#define DAVINCI_USB_INT_MASK_REG 0x2c -#define DAVINCI_USB_INT_MASK_SET_REG 0x30 -#define DAVINCI_USB_INT_MASK_CLR_REG 0x34 -#define DAVINCI_USB_INT_SRC_MASKED_REG 0x38 -#define DAVINCI_USB_EOI_REG 0x3c -#define DAVINCI_USB_EOI_INTVEC 0x40 - -/* BEGIN CPPI-generic (?) */ - -/* CPPI related registers */ -#define DAVINCI_TXCPPI_CTRL_REG 0x80 -#define DAVINCI_TXCPPI_TEAR_REG 0x84 -#define DAVINCI_CPPI_EOI_REG 0x88 -#define DAVINCI_CPPI_INTVEC_REG 0x8c -#define DAVINCI_TXCPPI_MASKED_REG 0x90 -#define DAVINCI_TXCPPI_RAW_REG 0x94 -#define DAVINCI_TXCPPI_INTENAB_REG 0x98 -#define DAVINCI_TXCPPI_INTCLR_REG 0x9c - -#define DAVINCI_RXCPPI_CTRL_REG 0xC0 -#define DAVINCI_RXCPPI_MASKED_REG 0xD0 -#define DAVINCI_RXCPPI_RAW_REG 0xD4 -#define DAVINCI_RXCPPI_INTENAB_REG 0xD8 -#define DAVINCI_RXCPPI_INTCLR_REG 0xDC - -#define DAVINCI_RXCPPI_BUFCNT0_REG 0xE0 -#define DAVINCI_RXCPPI_BUFCNT1_REG 0xE4 -#define DAVINCI_RXCPPI_BUFCNT2_REG 0xE8 -#define DAVINCI_RXCPPI_BUFCNT3_REG 0xEC - -/* CPPI state RAM entries */ -#define DAVINCI_CPPI_STATERAM_BASE_OFFSET 0x100 - -#define DAVINCI_TXCPPI_STATERAM_OFFSET(chnum) \ - (DAVINCI_CPPI_STATERAM_BASE_OFFSET + ((chnum) * 0x40)) -#define DAVINCI_RXCPPI_STATERAM_OFFSET(chnum) \ - (DAVINCI_CPPI_STATERAM_BASE_OFFSET + 0x20 + ((chnum) * 0x40)) - -/* CPPI masks */ -#define DAVINCI_DMA_CTRL_ENABLE 1 -#define DAVINCI_DMA_CTRL_DISABLE 0 - -#define DAVINCI_DMA_ALL_CHANNELS_ENABLE 0xF -#define DAVINCI_DMA_ALL_CHANNELS_DISABLE 0xF - -/* END CPPI-generic (?) */ - -#define DAVINCI_USB_TX_ENDPTS_MASK 0x1f /* ep0 + 4 tx */ -#define DAVINCI_USB_RX_ENDPTS_MASK 0x1e /* 4 rx */ - -#define DAVINCI_USB_USBINT_SHIFT 16 -#define DAVINCI_USB_TXINT_SHIFT 0 -#define DAVINCI_USB_RXINT_SHIFT 8 - -#define DAVINCI_INTR_DRVVBUS 0x0100 - -#define DAVINCI_USB_USBINT_MASK 0x01ff0000 /* 8 Mentor, DRVVBUS */ -#define DAVINCI_USB_TXINT_MASK \ - (DAVINCI_USB_TX_ENDPTS_MASK << DAVINCI_USB_TXINT_SHIFT) -#define DAVINCI_USB_RXINT_MASK \ - (DAVINCI_USB_RX_ENDPTS_MASK << DAVINCI_USB_RXINT_SHIFT) - -#define DAVINCI_BASE_OFFSET 0x400 - -#endif /* __MUSB_HDRDF_H__ */ From 55f223b8b408cbfd85fb1c5b74ab85ccab319a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 17 Oct 2022 21:59:14 +0200 Subject: [PATCH 0355/4122] usb: dwc2: platform: Improve error reporting for problems during .remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error value in a platform driver's remove callback results in a generic error message being emitted by the driver core, but otherwise it doesn't make a difference. The device goes away anyhow. For each case where ret is non-zero the driver already emits an error message, so suppress the generic error message by returning zero unconditionally. (Side note: The return value handling was unreliable anyhow as the value returned by dwc2_exit_hibernation() was overwritten anyhow if hsotg->in_ppd was non-zero.) Signed-off-by: Uwe Kleine-König Acked-by: Minas Harutyunyan Link: https://lore.kernel.org/r/20221017195914.1426297-1-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index ec4ace0107f5..262c13b6362a 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -321,7 +321,7 @@ static int dwc2_driver_remove(struct platform_device *dev) reset_control_assert(hsotg->reset); reset_control_assert(hsotg->reset_ecc); - return ret; + return 0; } /** From b295d484b97081feba72b071ffcb72fb4638ccfd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Oct 2022 12:21:25 +0300 Subject: [PATCH 0356/4122] device property: Allow const parameter to dev_fwnode() It's not fully correct to take a const parameter pointer to a struct and return a non-const pointer to a member of that struct. Instead, introduce a const version of the dev_fwnode() API which takes and returns const pointers and use it where it's applicable. With this, convert dev_fwnode() to be a macro wrapper on top of const and non-const APIs that chooses one based on the type. Suggested-by: Sakari Ailus Fixes: aade55c86033 ("device property: Add const qualifier to device_get_match_data() parameter") Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20221004092129.19412-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 11 +++++++++-- include/linux/property.h | 7 ++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 4d6278a84868..d77302d28566 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -17,12 +17,19 @@ #include #include -struct fwnode_handle *dev_fwnode(const struct device *dev) +struct fwnode_handle *__dev_fwnode(struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? of_fwnode_handle(dev->of_node) : dev->fwnode; } -EXPORT_SYMBOL_GPL(dev_fwnode); +EXPORT_SYMBOL_GPL(__dev_fwnode); + +const struct fwnode_handle *__dev_fwnode_const(const struct device *dev) +{ + return IS_ENABLED(CONFIG_OF) && dev->of_node ? + of_fwnode_handle(dev->of_node) : dev->fwnode; +} +EXPORT_SYMBOL_GPL(__dev_fwnode_const); /** * device_property_present - check if a property of a device is present diff --git a/include/linux/property.h b/include/linux/property.h index 117cc200c656..587b5b666b5b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -32,7 +32,12 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; -struct fwnode_handle *dev_fwnode(const struct device *dev); +const struct fwnode_handle *__dev_fwnode_const(const struct device *dev); +struct fwnode_handle *__dev_fwnode(struct device *dev); +#define dev_fwnode(dev) \ + _Generic((dev), \ + const struct device *: __dev_fwnode_const, \ + struct device *: __dev_fwnode)(dev) bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, From 23ead33bc6ed62abc9adc5fe27b9911e2ef5d209 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Oct 2022 12:21:26 +0300 Subject: [PATCH 0357/4122] device property: Constify fwnode connection match APIs The fwnode and device parameters are not altered in the fwnode connection match APIs, constify them. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20221004092129.19412-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 8 ++++---- drivers/usb/roles/class.c | 2 +- drivers/usb/typec/mux.c | 8 ++++---- drivers/usb/typec/retimer.c | 2 +- include/linux/property.h | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index d77302d28566..58b8158add5c 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -1213,7 +1213,7 @@ const void *device_get_match_data(const struct device *dev) } EXPORT_SYMBOL_GPL(device_get_match_data); -static unsigned int fwnode_graph_devcon_matches(struct fwnode_handle *fwnode, +static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, @@ -1247,7 +1247,7 @@ static unsigned int fwnode_graph_devcon_matches(struct fwnode_handle *fwnode, return count; } -static unsigned int fwnode_devcon_matches(struct fwnode_handle *fwnode, +static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, @@ -1289,7 +1289,7 @@ static unsigned int fwnode_devcon_matches(struct fwnode_handle *fwnode, * device node. @match will be used to convert the connection description to * data the caller is expecting to be returned. */ -void *fwnode_connection_find_match(struct fwnode_handle *fwnode, +void *fwnode_connection_find_match(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match) { @@ -1326,7 +1326,7 @@ EXPORT_SYMBOL_GPL(fwnode_connection_find_match); * * Return: Number of matches resolved, or negative errno. */ -int fwnode_connection_find_matches(struct fwnode_handle *fwnode, +int fwnode_connection_find_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index dfaed7eee94f..a3575a5a18ce 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -87,7 +87,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) } EXPORT_SYMBOL_GPL(usb_role_switch_get_role); -static void *usb_role_switch_match(struct fwnode_handle *fwnode, const char *id, +static void *usb_role_switch_match(const struct fwnode_handle *fwnode, const char *id, void *data) { struct device *dev; diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c index 941735c73161..c7177ddd4f12 100644 --- a/drivers/usb/typec/mux.c +++ b/drivers/usb/typec/mux.c @@ -32,8 +32,8 @@ static int switch_fwnode_match(struct device *dev, const void *fwnode) return device_match_fwnode(dev, fwnode); } -static void *typec_switch_match(struct fwnode_handle *fwnode, const char *id, - void *data) +static void *typec_switch_match(const struct fwnode_handle *fwnode, + const char *id, void *data) { struct device *dev; @@ -262,8 +262,8 @@ static int mux_fwnode_match(struct device *dev, const void *fwnode) return device_match_fwnode(dev, fwnode); } -static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id, - void *data) +static void *typec_mux_match(const struct fwnode_handle *fwnode, + const char *id, void *data) { const struct typec_altmode_desc *desc = data; struct device *dev; diff --git a/drivers/usb/typec/retimer.c b/drivers/usb/typec/retimer.c index ee94dbbe4745..8e1055783fe2 100644 --- a/drivers/usb/typec/retimer.c +++ b/drivers/usb/typec/retimer.c @@ -34,7 +34,7 @@ static int retimer_fwnode_match(struct device *dev, const void *fwnode) return device_match_fwnode(dev, fwnode) && dev_name_ends_with(dev, "-retimer"); } -static void *typec_retimer_match(struct fwnode_handle *fwnode, const char *id, void *data) +static void *typec_retimer_match(const struct fwnode_handle *fwnode, const char *id, void *data) { struct device *dev; diff --git a/include/linux/property.h b/include/linux/property.h index 587b5b666b5b..8d82775a901a 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -442,21 +442,21 @@ unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode, int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -typedef void *(*devcon_match_fn_t)(struct fwnode_handle *fwnode, const char *id, +typedef void *(*devcon_match_fn_t)(const struct fwnode_handle *fwnode, const char *id, void *data); -void *fwnode_connection_find_match(struct fwnode_handle *fwnode, +void *fwnode_connection_find_match(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match); -static inline void *device_connection_find_match(struct device *dev, +static inline void *device_connection_find_match(const struct device *dev, const char *con_id, void *data, devcon_match_fn_t match) { return fwnode_connection_find_match(dev_fwnode(dev), con_id, data, match); } -int fwnode_connection_find_matches(struct fwnode_handle *fwnode, +int fwnode_connection_find_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len); From a1bfed6094ac6868c43aaa43d021bf562cd93d07 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Oct 2022 12:21:27 +0300 Subject: [PATCH 0358/4122] device property: Constify parameter in fwnode_graph_is_endpoint() Constify parameter in fwnode_graph_is_endpoint() since it doesn't alter anything related to it. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20221004092129.19412-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/property.h b/include/linux/property.h index 8d82775a901a..7abb1792044f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -410,7 +410,7 @@ struct fwnode_handle *fwnode_graph_get_remote_port( struct fwnode_handle *fwnode_graph_get_remote_endpoint( const struct fwnode_handle *fwnode); -static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) +static inline bool fwnode_graph_is_endpoint(const struct fwnode_handle *fwnode) { return fwnode_property_present(fwnode, "remote-endpoint"); } From 7952cd2b8213f20a1752634c25dfd215da537722 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Oct 2022 12:21:28 +0300 Subject: [PATCH 0359/4122] device property: Constify device child node APIs The device parameter is not altered in the device child node APIs, constify them. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20221004092129.19412-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 6 +++--- include/linux/property.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 58b8158add5c..d3ea5f82978f 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -763,7 +763,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); * @dev: Device to find the next child node for. * @child: Handle to one of the device's child nodes or a null handle. */ -struct fwnode_handle *device_get_next_child_node(struct device *dev, +struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { const struct fwnode_handle *fwnode = dev_fwnode(dev); @@ -800,7 +800,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_named_child_node); * @dev: Device to find the named child node for. * @childname: String to match child node name against. */ -struct fwnode_handle *device_get_named_child_node(struct device *dev, +struct fwnode_handle *device_get_named_child_node(const struct device *dev, const char *childname) { return fwnode_get_named_child_node(dev_fwnode(dev), childname); @@ -859,7 +859,7 @@ EXPORT_SYMBOL_GPL(fwnode_device_is_available); * device_get_child_node_count - return the number of child nodes for device * @dev: Device to cound the child nodes for */ -unsigned int device_get_child_node_count(struct device *dev) +unsigned int device_get_child_node_count(const struct device *dev) { struct fwnode_handle *child; unsigned int count = 0; diff --git a/include/linux/property.h b/include/linux/property.h index 7abb1792044f..472689e53ade 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -114,16 +114,16 @@ struct fwnode_handle *fwnode_get_next_available_child_node( for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ child = fwnode_get_next_available_child_node(fwnode, child)) -struct fwnode_handle *device_get_next_child_node( - struct device *dev, struct fwnode_handle *child); +struct fwnode_handle *device_get_next_child_node(const struct device *dev, + struct fwnode_handle *child); #define device_for_each_child_node(dev, child) \ for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) -struct fwnode_handle *fwnode_get_named_child_node( - const struct fwnode_handle *fwnode, const char *childname); -struct fwnode_handle *device_get_named_child_node(struct device *dev, +struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode, + const char *childname); +struct fwnode_handle *device_get_named_child_node(const struct device *dev, const char *childname); struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode); @@ -132,7 +132,7 @@ void fwnode_handle_put(struct fwnode_handle *fwnode); int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); -unsigned int device_get_child_node_count(struct device *dev); +unsigned int device_get_child_node_count(const struct device *dev); static inline bool device_property_read_bool(struct device *dev, const char *propname) From 59789f3418dd3c0a187490d49e900a59a5c8d732 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Oct 2022 12:21:29 +0300 Subject: [PATCH 0360/4122] device property: Constify parameter in device_dma_supported() and device_get_dma_attr() Constify parameter in device_dma_supported() and device_get_dma_attr() since they don't alter anything related to it. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20221004092129.19412-6-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 4 ++-- include/linux/property.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index d3ea5f82978f..68f61d3e3857 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -871,13 +871,13 @@ unsigned int device_get_child_node_count(const struct device *dev) } EXPORT_SYMBOL_GPL(device_get_child_node_count); -bool device_dma_supported(struct device *dev) +bool device_dma_supported(const struct device *dev) { return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported); } EXPORT_SYMBOL_GPL(device_dma_supported); -enum dev_dma_attr device_get_dma_attr(struct device *dev) +enum dev_dma_attr device_get_dma_attr(const struct device *dev) { if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr)) return DEV_DMA_NOT_SUPPORTED; diff --git a/include/linux/property.h b/include/linux/property.h index 472689e53ade..83674f968a8f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -388,9 +388,8 @@ property_entries_dup(const struct property_entry *properties); void property_entries_free(const struct property_entry *properties); -bool device_dma_supported(struct device *dev); - -enum dev_dma_attr device_get_dma_attr(struct device *dev); +bool device_dma_supported(const struct device *dev); +enum dev_dma_attr device_get_dma_attr(const struct device *dev); const void *device_get_match_data(const struct device *dev); From 87fa05b6db47403fa4fbe3a8ce8fa619f7c8667e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 8 Oct 2022 22:45:01 +0300 Subject: [PATCH 0361/4122] thunderbolt: Use str_enabled_disabled() helper Use str_enabled_disabled() helper instead of open coding the same. Signed-off-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 5 +++-- drivers/thunderbolt/xdomain.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 60da5c23ccaf..363d712aa364 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -8,12 +8,13 @@ #include #include +#include #include #include #include #include #include -#include +#include #include "tb.h" @@ -644,7 +645,7 @@ static int __tb_port_enable(struct tb_port *port, bool enable) if (ret) return ret; - tb_port_dbg(port, "lane %sabled\n", enable ? "en" : "dis"); + tb_port_dbg(port, "lane %s\n", str_enabled_disabled(enable)); return 0; } diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index f00b2f62d8e3..ddd8fd2d06f8 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1344,7 +1345,7 @@ static int tb_xdomain_bond_lanes_uuid_high(struct tb_xdomain *xd) tb_port_update_credits(port); tb_xdomain_update_link_attributes(xd); - dev_dbg(&xd->dev, "lane bonding %sabled\n", width == 2 ? "en" : "dis"); + dev_dbg(&xd->dev, "lane bonding %s\n", str_enabled_disabled(width == 2)); return 0; } From b9589c417fedab6b963cf084ef305665166f5326 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Oct 2022 23:57:09 +0100 Subject: [PATCH 0362/4122] thunderbolt: Remove redundant assignment to variable len The variable len is assigned a value that is never read. It is re-assigned a new value in the following do-while loop and never referenced after the loop. The assignment is redundant and can be removed. Cleans up clang scan build warning: drivers/thunderbolt/xdomain.c:344:2: warning: Value stored to 'len' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Signed-off-by: Mika Westerberg --- drivers/thunderbolt/xdomain.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index ddd8fd2d06f8..cfa83486c9da 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -342,7 +342,6 @@ static int tb_xdp_properties_request(struct tb_ctl *ctl, u64 route, memcpy(&req.src_uuid, src_uuid, sizeof(*src_uuid)); memcpy(&req.dst_uuid, dst_uuid, sizeof(*dst_uuid)); - len = 0; data_len = 0; do { From 32c6fefb291bf84c5a4dbc7d52b56a1605ed9aae Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Oct 2022 15:27:54 +0200 Subject: [PATCH 0363/4122] usb: phy: generic: make vcc regulator optional phy-generic uses the existance of the property "vcc-supply" to see if a regulator is optional or not. Use devm_regulator_get_optional() instead which exists for this purpose. Using devm_regulator_get_optional() avoids "supply vcc not found, using dummy regulator" messages. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221012132754.292151-1-s.hauer@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 3dc5c04e7cbf..8ed9327cc4a5 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -209,7 +209,7 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) int err = 0; u32 clk_rate = 0; - bool needs_vcc = false, needs_clk = false; + bool needs_clk = false; if (dev->of_node) { struct device_node *node = dev->of_node; @@ -217,7 +217,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) if (of_property_read_u32(node, "clock-frequency", &clk_rate)) clk_rate = 0; - needs_vcc = of_property_read_bool(node, "vcc-supply"); needs_clk = of_property_read_bool(node, "clocks"); } nop->gpiod_reset = devm_gpiod_get_optional(dev, "reset", @@ -257,13 +256,10 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) } } - nop->vcc = devm_regulator_get(dev, "vcc"); - if (IS_ERR(nop->vcc)) { - dev_dbg(dev, "Error getting vcc regulator: %ld\n", - PTR_ERR(nop->vcc)); - if (needs_vcc) - return -EPROBE_DEFER; - } + nop->vcc = devm_regulator_get_optional(dev, "vcc"); + if (IS_ERR(nop->vcc) && PTR_ERR(nop->vcc) != -ENODEV) + return dev_err_probe(dev, PTR_ERR(nop->vcc), + "could not get vcc regulator\n"); nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); if (PTR_ERR(nop->vbus_draw) == -ENODEV) From e1b5d2bed67c60c30d01a89df32152d74cfc8e63 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Sun, 9 Oct 2022 23:53:36 +0800 Subject: [PATCH 0364/4122] usb: chipidea: core: handle usb role switch in a common way Currently, ci_usb_role_switch_set() may be called before system resume stage when suspended. Worse yet, ci_hdrc device may stay at RPM_ACTIVE state which will cause pm_runtime_get_sync() fail to resume the device. In this case, role-switch may unable to complete transition process due to not exit from lpm state or due to lack some means after system resume. Same as ci_cable_notifier(), usb_role_switch could handle its events based on ci_hdrc_cable mechanism. Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20221009155336.766960-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 63 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 6330fa911792..ae90fee75a32 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -608,49 +608,32 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw, enum usb_role role) { struct ci_hdrc *ci = usb_role_switch_get_drvdata(sw); - struct ci_hdrc_cable *cable = NULL; - enum usb_role current_role = ci_role_to_usb_role(ci); - enum ci_role ci_role = usb_role_to_ci_role(role); - unsigned long flags; + struct ci_hdrc_cable *cable; - if ((ci_role != CI_ROLE_END && !ci->roles[ci_role]) || - (current_role == role)) - return 0; - - pm_runtime_get_sync(ci->dev); - /* Stop current role */ - spin_lock_irqsave(&ci->lock, flags); - if (current_role == USB_ROLE_DEVICE) - cable = &ci->platdata->vbus_extcon; - else if (current_role == USB_ROLE_HOST) + if (role == USB_ROLE_HOST) { cable = &ci->platdata->id_extcon; - - if (cable) { - cable->changed = true; - cable->connected = false; - ci_irq(ci); - spin_unlock_irqrestore(&ci->lock, flags); - if (ci->wq && role != USB_ROLE_NONE) - flush_workqueue(ci->wq); - spin_lock_irqsave(&ci->lock, flags); - } - - cable = NULL; - - /* Start target role */ - if (role == USB_ROLE_DEVICE) - cable = &ci->platdata->vbus_extcon; - else if (role == USB_ROLE_HOST) - cable = &ci->platdata->id_extcon; - - if (cable) { cable->changed = true; cable->connected = true; - ci_irq(ci); + cable = &ci->platdata->vbus_extcon; + cable->changed = true; + cable->connected = false; + } else if (role == USB_ROLE_DEVICE) { + cable = &ci->platdata->id_extcon; + cable->changed = true; + cable->connected = false; + cable = &ci->platdata->vbus_extcon; + cable->changed = true; + cable->connected = true; + } else { + cable = &ci->platdata->id_extcon; + cable->changed = true; + cable->connected = false; + cable = &ci->platdata->vbus_extcon; + cable->changed = true; + cable->connected = false; } - spin_unlock_irqrestore(&ci->lock, flags); - pm_runtime_put_sync(ci->dev); + ci_irq(ci); return 0; } @@ -1305,11 +1288,13 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci) cable_id = &ci->platdata->id_extcon; cable_vbus = &ci->platdata->vbus_extcon; - if (!IS_ERR(cable_id->edev) && ci->is_otg && + if ((!IS_ERR(cable_id->edev) || !IS_ERR(ci->role_switch)) + && ci->is_otg && (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS)) ci_irq(ci); - if (!IS_ERR(cable_vbus->edev) && ci->is_otg && + if ((!IS_ERR(cable_vbus->edev) || !IS_ERR(ci->role_switch)) + && ci->is_otg && (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS)) ci_irq(ci); } From caa7b74493f9c903fb6cd4bdec295bcae0507cc6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 14 Oct 2022 17:55:50 +0800 Subject: [PATCH 0365/4122] dt-bindings: phy: imx8mq-usb: add power-domains property Add optional power-domains property for usb phy. Signed-off-by: Peng Fan Acked-by: Alexander Stein Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221014095550.2125018-1-peng.fan@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 2936f3510a6a..5ba9570ad7bf 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -28,6 +28,9 @@ properties: items: - const: phy + power-domains: + maxItems: 1 + vbus-supply: description: A phandle to the regulator for USB VBUS. From 74494b33211d067427db25824cd8b53fa0eab1ef Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Oct 2022 23:14:35 +0800 Subject: [PATCH 0366/4122] usb: chipidea: core: add controller resume support when controller is powered off For some SoCs, the controler's power will be off during the system suspend, and it needs some recovery operation to let the system back to workable. We add this support in this patch. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20221013151442.3262951-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 80 ++++++++++++++++++++++++++++--------- drivers/usb/chipidea/otg.c | 2 +- drivers/usb/chipidea/otg.h | 1 + 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index ae90fee75a32..80267b973c26 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -637,6 +637,49 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw, return 0; } +static enum ci_role ci_get_role(struct ci_hdrc *ci) +{ + enum ci_role role; + + if (ci->roles[CI_ROLE_HOST] && ci->roles[CI_ROLE_GADGET]) { + if (ci->is_otg) { + role = ci_otg_role(ci); + hw_write_otgsc(ci, OTGSC_IDIE, OTGSC_IDIE); + } else { + /* + * If the controller is not OTG capable, but support + * role switch, the defalt role is gadget, and the + * user can switch it through debugfs. + */ + role = CI_ROLE_GADGET; + } + } else { + role = ci->roles[CI_ROLE_HOST] ? CI_ROLE_HOST + : CI_ROLE_GADGET; + } + + return role; +} + +static void ci_handle_power_lost(struct ci_hdrc *ci) +{ + enum ci_role role; + + disable_irq_nosync(ci->irq); + if (!ci_otg_is_fsm_mode(ci)) { + role = ci_get_role(ci); + + if (ci->role != role) { + ci_handle_id_switch(ci); + } else if (role == CI_ROLE_GADGET) { + if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) + usb_gadget_vbus_connect(&ci->gadget); + } + } + + enable_irq(ci->irq); +} + static struct usb_role_switch_desc ci_role_switch = { .set = ci_usb_role_switch_set, .get = ci_usb_role_switch_get, @@ -1134,25 +1177,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) } } - if (ci->roles[CI_ROLE_HOST] && ci->roles[CI_ROLE_GADGET]) { - if (ci->is_otg) { - ci->role = ci_otg_role(ci); - /* Enable ID change irq */ - hw_write_otgsc(ci, OTGSC_IDIE, OTGSC_IDIE); - } else { - /* - * If the controller is not OTG capable, but support - * role switch, the defalt role is gadget, and the - * user can switch it through debugfs. - */ - ci->role = CI_ROLE_GADGET; - } - } else { - ci->role = ci->roles[CI_ROLE_HOST] - ? CI_ROLE_HOST - : CI_ROLE_GADGET; - } - + ci->role = ci_get_role(ci); if (!ci_otg_is_fsm_mode(ci)) { /* only update vbus status for peripheral */ if (ci->role == CI_ROLE_GADGET) { @@ -1374,8 +1399,16 @@ static int ci_suspend(struct device *dev) static int ci_resume(struct device *dev) { struct ci_hdrc *ci = dev_get_drvdata(dev); + bool power_lost; int ret; + /* Since ASYNCLISTADDR (host mode) and ENDPTLISTADDR (device + * mode) share the same register address. We can check if + * controller resume from power lost based on this address + * due to this register will be reset after power lost. + */ + power_lost = !hw_read(ci, OP_ENDPTLISTADDR, ~0); + if (device_may_wakeup(dev)) disable_irq_wake(ci->irq); @@ -1383,6 +1416,15 @@ static int ci_resume(struct device *dev) if (ret) return ret; + if (power_lost) { + /* shutdown and re-init for phy */ + ci_usb_phy_exit(ci); + ci_usb_phy_init(ci); + } + + if (power_lost) + ci_handle_power_lost(ci); + if (ci->supports_runtime_pm) { pm_runtime_disable(dev); pm_runtime_set_active(dev); diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 7b53274ef966..622c3b68aa1e 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -165,7 +165,7 @@ static int hw_wait_vbus_lower_bsv(struct ci_hdrc *ci) return 0; } -static void ci_handle_id_switch(struct ci_hdrc *ci) +void ci_handle_id_switch(struct ci_hdrc *ci) { enum ci_role role = ci_otg_role(ci); diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h index 5e7a6e571dd2..87629b81e03e 100644 --- a/drivers/usb/chipidea/otg.h +++ b/drivers/usb/chipidea/otg.h @@ -14,6 +14,7 @@ int ci_hdrc_otg_init(struct ci_hdrc *ci); void ci_hdrc_otg_destroy(struct ci_hdrc *ci); enum ci_role ci_otg_role(struct ci_hdrc *ci); void ci_handle_vbus_change(struct ci_hdrc *ci); +void ci_handle_id_switch(struct ci_hdrc *ci); static inline void ci_otg_queue_work(struct ci_hdrc *ci) { disable_irq_nosync(ci->irq); From 450857c6058f092167f17bad97a2cc9c2a39b9a0 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Oct 2022 23:14:36 +0800 Subject: [PATCH 0367/4122] usb: chipidea: core: handle suspend/resume for each role There may be a need to handle suspend/resume per role. This patch will add this support. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20221013151442.3262951-3-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 4 ++++ drivers/usb/chipidea/core.c | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index a4a3be049910..005c67cb3afb 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -127,12 +127,16 @@ enum ci_revision { * struct ci_role_driver - host/gadget role driver * @start: start this role * @stop: stop this role + * @suspend: system suspend handler for this role + * @resume: system resume handler for this role * @irq: irq handler for this role * @name: role name string (host/gadget) */ struct ci_role_driver { int (*start)(struct ci_hdrc *); void (*stop)(struct ci_hdrc *); + void (*suspend)(struct ci_hdrc *ci); + void (*resume)(struct ci_hdrc *ci, bool power_lost); irqreturn_t (*irq)(struct ci_hdrc *); const char *name; }; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 80267b973c26..2b170b434d01 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1383,6 +1383,10 @@ static int ci_suspend(struct device *dev) return 0; } + /* Extra routine per role before system suspend */ + if (ci->role != CI_ROLE_END && ci_role(ci)->suspend) + ci_role(ci)->suspend(ci); + if (device_may_wakeup(dev)) { if (ci_otg_is_fsm_mode(ci)) ci_otg_fsm_suspend_for_srp(ci); @@ -1422,6 +1426,10 @@ static int ci_resume(struct device *dev) ci_usb_phy_init(ci); } + /* Extra routine per role after system resume */ + if (ci->role != CI_ROLE_END && ci_role(ci)->resume) + ci_role(ci)->resume(ci, power_lost); + if (power_lost) ci_handle_power_lost(ci); From 2f64d6a6cdfbd992e8a8c481ebf79bfa9a71325b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Oct 2022 23:14:37 +0800 Subject: [PATCH 0368/4122] usb: chipidea: host: add suspend/resume support for host controller The controller's power may be powered off during system suspend. This will add suspend/resume support when the controller suffers power lost. Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20221013151442.3262951-4-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/host.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index bc3634a54c6b..ebe7400243b1 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -459,6 +459,18 @@ static void ci_hdrc_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb) ci_hdrc_free_dma_aligned_buffer(urb); } +#ifdef CONFIG_PM_SLEEP +static void ci_hdrc_host_suspend(struct ci_hdrc *ci) +{ + ehci_suspend(ci->hcd, device_may_wakeup(ci->dev)); +} + +static void ci_hdrc_host_resume(struct ci_hdrc *ci, bool power_lost) +{ + ehci_resume(ci->hcd, power_lost); +} +#endif + int ci_hdrc_host_init(struct ci_hdrc *ci) { struct ci_role_driver *rdrv; @@ -472,6 +484,10 @@ int ci_hdrc_host_init(struct ci_hdrc *ci) rdrv->start = host_start; rdrv->stop = host_stop; +#ifdef CONFIG_PM_SLEEP + rdrv->suspend = ci_hdrc_host_suspend; + rdrv->resume = ci_hdrc_host_resume; +#endif rdrv->irq = host_irq; rdrv->name = "host"; ci->roles[CI_ROLE_HOST] = rdrv; From 235ffc17d0146d806f6ad8c094c24ff4878f2edb Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Oct 2022 23:14:38 +0800 Subject: [PATCH 0369/4122] usb: chipidea: udc: add suspend/resume support for device controller The controller's power may be powered off during system suspend. This will add suspend/resume support when the controller suffers power lost. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20221013151442.3262951-5-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8c3e3a635ac2..54c09245ad05 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -2181,6 +2181,34 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci) ci->platdata->pins_default); } +#ifdef CONFIG_PM_SLEEP +static void udc_suspend(struct ci_hdrc *ci) +{ + /* + * Set OP_ENDPTLISTADDR to be non-zero for + * checking if controller resume from power lost + * in non-host mode. + */ + if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0) + hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0); +} + +static void udc_resume(struct ci_hdrc *ci, bool power_lost) +{ + if (power_lost) { + if (ci->is_otg) + hw_write_otgsc(ci, OTGSC_BSVIS | OTGSC_BSVIE, + OTGSC_BSVIS | OTGSC_BSVIE); + if (ci->vbus_active) + usb_gadget_vbus_disconnect(&ci->gadget); + } + + /* Restore value 0 if it was set for power lost check */ + if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0xFFFFFFFF) + hw_write(ci, OP_ENDPTLISTADDR, ~0, 0); +} +#endif + /** * ci_hdrc_gadget_init - initialize device related bits * @ci: the controller @@ -2201,6 +2229,10 @@ int ci_hdrc_gadget_init(struct ci_hdrc *ci) rdrv->start = udc_id_switch_for_device; rdrv->stop = udc_id_switch_for_host; +#ifdef CONFIG_PM_SLEEP + rdrv->suspend = udc_suspend; + rdrv->resume = udc_resume; +#endif rdrv->irq = udc_irq; rdrv->name = "gadget"; From b332d6d5c804085ac26d2e7e1a953b59b49644f3 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 13 Oct 2022 23:14:39 +0800 Subject: [PATCH 0370/4122] usb: chipidea: usbmisc: group usbmisc operations for PM As there maybe more APIs of usbmisc for suspend and resume, group them into imx_usbmisc_suspend/resume. Besides, introduced .power_lost_check API, so that proper resume operations can be performed in power lost case. Signed-off-by: Li Jun Link: https://lore.kernel.org/r/20221013151442.3262951-6-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 49 ++++-------- drivers/usb/chipidea/ci_hdrc_imx.h | 4 +- drivers/usb/chipidea/usbmisc_imx.c | 121 +++++++++++++++++++++-------- 3 files changed, 107 insertions(+), 67 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 9ffcecd3058c..923f5c00a1d9 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -527,16 +527,19 @@ static void ci_hdrc_imx_shutdown(struct platform_device *pdev) ci_hdrc_imx_remove(pdev); } -static int __maybe_unused imx_controller_suspend(struct device *dev) +static int __maybe_unused imx_controller_suspend(struct device *dev, + pm_message_t msg) { struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); int ret = 0; dev_dbg(dev, "at %s\n", __func__); - ret = imx_usbmisc_hsic_set_clk(data->usbmisc_data, false); + ret = imx_usbmisc_suspend(data->usbmisc_data, + PMSG_IS_AUTO(msg) || device_may_wakeup(dev)); if (ret) { - dev_err(dev, "usbmisc hsic_set_clk failed, ret=%d\n", ret); + dev_err(dev, + "usbmisc suspend failed, ret=%d\n", ret); return ret; } @@ -549,7 +552,8 @@ static int __maybe_unused imx_controller_suspend(struct device *dev) return 0; } -static int __maybe_unused imx_controller_resume(struct device *dev) +static int __maybe_unused imx_controller_resume(struct device *dev, + pm_message_t msg) { struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); int ret = 0; @@ -570,22 +574,15 @@ static int __maybe_unused imx_controller_resume(struct device *dev) data->in_lpm = false; - ret = imx_usbmisc_set_wakeup(data->usbmisc_data, false); + ret = imx_usbmisc_resume(data->usbmisc_data, + PMSG_IS_AUTO(msg) || device_may_wakeup(dev)); if (ret) { - dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n", ret); + dev_err(dev, "usbmisc resume failed, ret=%d\n", ret); goto clk_disable; } - ret = imx_usbmisc_hsic_set_clk(data->usbmisc_data, true); - if (ret) { - dev_err(dev, "usbmisc hsic_set_clk failed, ret=%d\n", ret); - goto hsic_set_clk_fail; - } - return 0; -hsic_set_clk_fail: - imx_usbmisc_set_wakeup(data->usbmisc_data, true); clk_disable: imx_disable_unprepare_clks(dev); return ret; @@ -601,16 +598,7 @@ static int __maybe_unused ci_hdrc_imx_suspend(struct device *dev) /* The core's suspend doesn't run */ return 0; - if (device_may_wakeup(dev)) { - ret = imx_usbmisc_set_wakeup(data->usbmisc_data, true); - if (ret) { - dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n", - ret); - return ret; - } - } - - ret = imx_controller_suspend(dev); + ret = imx_controller_suspend(dev, PMSG_SUSPEND); if (ret) return ret; @@ -624,7 +612,7 @@ static int __maybe_unused ci_hdrc_imx_resume(struct device *dev) int ret; pinctrl_pm_select_default_state(dev); - ret = imx_controller_resume(dev); + ret = imx_controller_resume(dev, PMSG_RESUME); if (!ret && data->supports_runtime_pm) { pm_runtime_disable(dev); pm_runtime_set_active(dev); @@ -637,25 +625,18 @@ static int __maybe_unused ci_hdrc_imx_resume(struct device *dev) static int __maybe_unused ci_hdrc_imx_runtime_suspend(struct device *dev) { struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); - int ret; if (data->in_lpm) { WARN_ON(1); return 0; } - ret = imx_usbmisc_set_wakeup(data->usbmisc_data, true); - if (ret) { - dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n", ret); - return ret; - } - - return imx_controller_suspend(dev); + return imx_controller_suspend(dev, PMSG_AUTO_SUSPEND); } static int __maybe_unused ci_hdrc_imx_runtime_resume(struct device *dev) { - return imx_controller_resume(dev); + return imx_controller_resume(dev, PMSG_AUTO_RESUME); } static const struct dev_pm_ops ci_hdrc_imx_pm_ops = { diff --git a/drivers/usb/chipidea/ci_hdrc_imx.h b/drivers/usb/chipidea/ci_hdrc_imx.h index 7daccb9c5006..7135b9a5d913 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.h +++ b/drivers/usb/chipidea/ci_hdrc_imx.h @@ -32,9 +32,9 @@ struct imx_usbmisc_data { int imx_usbmisc_init(struct imx_usbmisc_data *data); int imx_usbmisc_init_post(struct imx_usbmisc_data *data); -int imx_usbmisc_set_wakeup(struct imx_usbmisc_data *data, bool enabled); int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data); -int imx_usbmisc_hsic_set_clk(struct imx_usbmisc_data *data, bool on); int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect); +int imx_usbmisc_suspend(struct imx_usbmisc_data *data, bool wakeup); +int imx_usbmisc_resume(struct imx_usbmisc_data *data, bool wakeup); #endif /* __DRIVER_USB_CHIPIDEA_CI_HDRC_IMX_H */ diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index bac0f5458cab..aa815f6d3fe9 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -150,6 +150,8 @@ struct usbmisc_ops { int (*hsic_set_clk)(struct imx_usbmisc_data *data, bool enabled); /* usb charger detection */ int (*charger_detection)(struct imx_usbmisc_data *data); + /* It's called when system resume from usb power lost */ + int (*power_lost_check)(struct imx_usbmisc_data *data); }; struct imx_usbmisc { @@ -1009,31 +1011,30 @@ EXPORT_SYMBOL_GPL(imx_usbmisc_init); int imx_usbmisc_init_post(struct imx_usbmisc_data *data) { struct imx_usbmisc *usbmisc; + int ret = 0; if (!data) return 0; usbmisc = dev_get_drvdata(data->dev); - if (!usbmisc->ops->post) - return 0; - return usbmisc->ops->post(data); + if (usbmisc->ops->post) + ret = usbmisc->ops->post(data); + if (ret) { + dev_err(data->dev, "post init failed, ret=%d\n", ret); + return ret; + } + + if (usbmisc->ops->set_wakeup) + ret = usbmisc->ops->set_wakeup(data, false); + if (ret) { + dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + return ret; + } + + return 0; } EXPORT_SYMBOL_GPL(imx_usbmisc_init_post); -int imx_usbmisc_set_wakeup(struct imx_usbmisc_data *data, bool enabled) -{ - struct imx_usbmisc *usbmisc; - - if (!data) - return 0; - - usbmisc = dev_get_drvdata(data->dev); - if (!usbmisc->ops->set_wakeup) - return 0; - return usbmisc->ops->set_wakeup(data, enabled); -} -EXPORT_SYMBOL_GPL(imx_usbmisc_set_wakeup); - int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data) { struct imx_usbmisc *usbmisc; @@ -1048,20 +1049,6 @@ int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data) } EXPORT_SYMBOL_GPL(imx_usbmisc_hsic_set_connect); -int imx_usbmisc_hsic_set_clk(struct imx_usbmisc_data *data, bool on) -{ - struct imx_usbmisc *usbmisc; - - if (!data) - return 0; - - usbmisc = dev_get_drvdata(data->dev); - if (!usbmisc->ops->hsic_set_clk || !data->hsic) - return 0; - return usbmisc->ops->hsic_set_clk(data, on); -} -EXPORT_SYMBOL_GPL(imx_usbmisc_hsic_set_clk); - int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect) { struct imx_usbmisc *usbmisc; @@ -1094,6 +1081,78 @@ int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect) } EXPORT_SYMBOL_GPL(imx_usbmisc_charger_detection); +int imx_usbmisc_suspend(struct imx_usbmisc_data *data, bool wakeup) +{ + struct imx_usbmisc *usbmisc; + int ret = 0; + + if (!data) + return 0; + + usbmisc = dev_get_drvdata(data->dev); + + if (wakeup && usbmisc->ops->set_wakeup) + ret = usbmisc->ops->set_wakeup(data, true); + if (ret) { + dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + return ret; + } + + if (usbmisc->ops->hsic_set_clk && data->hsic) + ret = usbmisc->ops->hsic_set_clk(data, false); + if (ret) { + dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + return ret; + } + + return ret; +} +EXPORT_SYMBOL_GPL(imx_usbmisc_suspend); + +int imx_usbmisc_resume(struct imx_usbmisc_data *data, bool wakeup) +{ + struct imx_usbmisc *usbmisc; + int ret = 0; + + if (!data) + return 0; + + usbmisc = dev_get_drvdata(data->dev); + + if (usbmisc->ops->power_lost_check) + ret = usbmisc->ops->power_lost_check(data); + if (ret > 0) { + /* re-init if resume from power lost */ + ret = imx_usbmisc_init(data); + if (ret) { + dev_err(data->dev, "re-init failed, ret=%d\n", ret); + return ret; + } + } + + if (wakeup && usbmisc->ops->set_wakeup) + ret = usbmisc->ops->set_wakeup(data, false); + if (ret) { + dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + return ret; + } + + if (usbmisc->ops->hsic_set_clk && data->hsic) + ret = usbmisc->ops->hsic_set_clk(data, true); + if (ret) { + dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + goto hsic_set_clk_fail; + } + + return 0; + +hsic_set_clk_fail: + if (wakeup && usbmisc->ops->set_wakeup) + usbmisc->ops->set_wakeup(data, true); + return ret; +} +EXPORT_SYMBOL_GPL(imx_usbmisc_resume); + static const struct of_device_id usbmisc_imx_dt_ids[] = { { .compatible = "fsl,imx25-usbmisc", From 04ff4d31af40e268c6cde78814be465a6412212d Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 13 Oct 2022 23:14:40 +0800 Subject: [PATCH 0371/4122] usb: chipidea: usbmisc: add power lost check for imx6sx imx6sx mega off can shutdown domain power supply if none of peripheral in this domain is registered as wakeup source, this patch add related codes to check if power is lost. Signed-off-by: Li Jun Link: https://lore.kernel.org/r/20221013151442.3262951-7-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/usbmisc_imx.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index aa815f6d3fe9..7bfbfc83cfe3 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -939,6 +939,25 @@ static int usbmisc_imx7ulp_init(struct imx_usbmisc_data *data) return 0; } +static int usbmisc_imx6sx_power_lost_check(struct imx_usbmisc_data *data) +{ + struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&usbmisc->lock, flags); + val = readl(usbmisc->base + data->index * 4); + spin_unlock_irqrestore(&usbmisc->lock, flags); + /* + * Here use a power on reset value to judge + * if the controller experienced a power lost + */ + if (val == 0x30001000) + return 1; + else + return 0; +} + static const struct usbmisc_ops imx25_usbmisc_ops = { .init = usbmisc_imx25_init, .post = usbmisc_imx25_post, @@ -972,6 +991,7 @@ static const struct usbmisc_ops imx6sx_usbmisc_ops = { .init = usbmisc_imx6sx_init, .hsic_set_connect = usbmisc_imx6_hsic_set_connect, .hsic_set_clk = usbmisc_imx6_hsic_set_clk, + .power_lost_check = usbmisc_imx6sx_power_lost_check, }; static const struct usbmisc_ops imx7d_usbmisc_ops = { From 604ceaa9e9fc223c2cc8d6cf0fc02022a3d14a68 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 13 Oct 2022 23:14:41 +0800 Subject: [PATCH 0372/4122] usb: chipidea: usbmisc: add power lost check for imx7d imx7d can shutdown domain power supply if none of peripheral in this domain is registered as wakeup source, this patch add related codes to check if power is lost. Signed-off-by: Li Jun Link: https://lore.kernel.org/r/20221013151442.3262951-8-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/usbmisc_imx.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 7bfbfc83cfe3..cc17dcd97856 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -939,6 +939,25 @@ static int usbmisc_imx7ulp_init(struct imx_usbmisc_data *data) return 0; } +static int usbmisc_imx7d_power_lost_check(struct imx_usbmisc_data *data) +{ + struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&usbmisc->lock, flags); + val = readl(usbmisc->base); + spin_unlock_irqrestore(&usbmisc->lock, flags); + /* + * Here use a power on reset value to judge + * if the controller experienced a power lost + */ + if (val == 0x30001000) + return 1; + else + return 0; +} + static int usbmisc_imx6sx_power_lost_check(struct imx_usbmisc_data *data) { struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev); @@ -998,6 +1017,7 @@ static const struct usbmisc_ops imx7d_usbmisc_ops = { .init = usbmisc_imx7d_init, .set_wakeup = usbmisc_imx7d_set_wakeup, .charger_detection = imx7d_charger_detection, + .power_lost_check = usbmisc_imx7d_power_lost_check, }; static const struct usbmisc_ops imx7ulp_usbmisc_ops = { From 8127cac0f393abaddf5747bcc7e7ccf6668117fe Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 13 Oct 2022 23:14:42 +0800 Subject: [PATCH 0373/4122] usb: chipidea: usbmisc: add power lost check for imx7ulp imx7ulp can shutdown domain power supply if none of peripheral in this domain is registered as wakeup source, this patch add related power lost check API. Signed-off-by: Li Jun Link: https://lore.kernel.org/r/20221013151442.3262951-9-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/usbmisc_imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index cc17dcd97856..acdb13316cd0 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -1025,6 +1025,7 @@ static const struct usbmisc_ops imx7ulp_usbmisc_ops = { .set_wakeup = usbmisc_imx7d_set_wakeup, .hsic_set_connect = usbmisc_imx6_hsic_set_connect, .hsic_set_clk = usbmisc_imx6_hsic_set_clk, + .power_lost_check = usbmisc_imx7d_power_lost_check, }; static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data) From bf0563502ecdbb97efb65790b37dd50afbe2d9b2 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sat, 22 Oct 2022 01:03:42 +0530 Subject: [PATCH 0374/4122] staging: wlan-ng: remove commented debug printk messages printk messages are added for program flow tracing and are left commented. These commented log messages should be removed as they are no more useful for program execution. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y1L0FiKvrM9jjZG9@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211netdev.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c index e04fc666d218..6bef419e8ad0 100644 --- a/drivers/staging/wlan-ng/p80211netdev.c +++ b/drivers/staging/wlan-ng/p80211netdev.c @@ -881,55 +881,42 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc) wlandev->rx.mgmt++; switch (fstype) { case WLAN_FSTYPE_ASSOCREQ: - /* printk("assocreq"); */ wlandev->rx.assocreq++; break; case WLAN_FSTYPE_ASSOCRESP: - /* printk("assocresp"); */ wlandev->rx.assocresp++; break; case WLAN_FSTYPE_REASSOCREQ: - /* printk("reassocreq"); */ wlandev->rx.reassocreq++; break; case WLAN_FSTYPE_REASSOCRESP: - /* printk("reassocresp"); */ wlandev->rx.reassocresp++; break; case WLAN_FSTYPE_PROBEREQ: - /* printk("probereq"); */ wlandev->rx.probereq++; break; case WLAN_FSTYPE_PROBERESP: - /* printk("proberesp"); */ wlandev->rx.proberesp++; break; case WLAN_FSTYPE_BEACON: - /* printk("beacon"); */ wlandev->rx.beacon++; break; case WLAN_FSTYPE_ATIM: - /* printk("atim"); */ wlandev->rx.atim++; break; case WLAN_FSTYPE_DISASSOC: - /* printk("disassoc"); */ wlandev->rx.disassoc++; break; case WLAN_FSTYPE_AUTHEN: - /* printk("authen"); */ wlandev->rx.authen++; break; case WLAN_FSTYPE_DEAUTHEN: - /* printk("deauthen"); */ wlandev->rx.deauthen++; break; default: - /* printk("unknown"); */ wlandev->rx.mgmt_unknown++; break; } - /* printk("\n"); */ drop = 2; break; @@ -943,35 +930,27 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc) wlandev->rx.ctl++; switch (fstype) { case WLAN_FSTYPE_PSPOLL: - /* printk("pspoll"); */ wlandev->rx.pspoll++; break; case WLAN_FSTYPE_RTS: - /* printk("rts"); */ wlandev->rx.rts++; break; case WLAN_FSTYPE_CTS: - /* printk("cts"); */ wlandev->rx.cts++; break; case WLAN_FSTYPE_ACK: - /* printk("ack"); */ wlandev->rx.ack++; break; case WLAN_FSTYPE_CFEND: - /* printk("cfend"); */ wlandev->rx.cfend++; break; case WLAN_FSTYPE_CFENDCFACK: - /* printk("cfendcfack"); */ wlandev->rx.cfendcfack++; break; default: - /* printk("unknown"); */ wlandev->rx.ctl_unknown++; break; } - /* printk("\n"); */ drop = 2; break; @@ -1007,7 +986,6 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc) wlandev->rx.cfack_cfpoll++; break; default: - /* printk("unknown"); */ wlandev->rx.data_unknown++; break; } From a6f100aa64831f64496e26d55c6337cc7ceb8bfc Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Sun, 23 Oct 2022 16:41:51 +0700 Subject: [PATCH 0375/4122] Staging: rtl8192e: rtl819x_HTProc: fixed missing blank space Added a missing blank space as per the Linux kernel coding-style regulations. The issue was flagged by the checkpatch script as a warning. Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/e6635103e3cf2426220767955b99d2e2b62a7329.1666502177.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_HTProc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index ef3dca51cf99..b763cf0ba356 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -70,6 +70,7 @@ static u8 LINKSYS_MARVELL_4400N[3] = {0x00, 0x14, 0xa4}; void HTUpdateDefaultSetting(struct rtllib_device *ieee) { struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + pHTInfo->bRegShortGI20MHz = 1; pHTInfo->bRegShortGI40MHz = 1; From 354989f7dc1e54abcb3c0e7b47444e03cb060504 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Sun, 23 Oct 2022 16:44:13 +0700 Subject: [PATCH 0376/4122] Staging: rtl8192e: rtl819x_HTProc: fixed alignment matching open parenthesis Aligned multiple statements to match open parenthesis as per Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/98c9e764a4447ab550e5615c48f6a98bf6656b0d.1666502177.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_HTProc.c | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index b763cf0ba356..a701be8c2923 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -152,8 +152,8 @@ bool IsHTHalfNmodeAPs(struct rtllib_device *ieee) (net->ralink_cap_exist)) retValue = true; else if (!memcmp(net->bssid, UNKNOWN_BORADCOM, 3) || - !memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) || - !memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3) || + !memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) || + !memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3) || (net->broadcom_cap_exist)) retValue = true; else if (net->bssht.bd_rt2rt_aggregation) @@ -540,7 +540,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee) pPeerHTCap, sizeof(struct ht_capab_ele)); #endif HTSetConnectBwMode(ieee, (enum ht_channel_width)(pPeerHTCap->ChlWidth), - (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset)); + (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset)); pHTInfo->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ? true : false); @@ -566,9 +566,9 @@ void HTOnAssocRsp(struct rtllib_device *ieee) pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable; if (ieee->rtllib_ap_sec_type && - (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) { + (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) { if ((pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) || - (pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN)) + (pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN)) pHTInfo->bCurrentAMPDUEnable = false; } @@ -617,7 +617,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee) else pMcsFilter = MCS_FILTER_ALL; ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee, - ieee->dot11HTOperationalRateSet, pMcsFilter); + ieee->dot11HTOperationalRateSet, + pMcsFilter); ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; pHTInfo->current_op_mode = pPeerHTInfo->OptMode; @@ -644,13 +645,13 @@ void HTInitializeHTInfo(struct rtllib_device *ieee) pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor; memset((void *)(&(pHTInfo->SelfHTCap)), 0, - sizeof(pHTInfo->SelfHTCap)); + sizeof(pHTInfo->SelfHTCap)); memset((void *)(&(pHTInfo->SelfHTInfo)), 0, - sizeof(pHTInfo->SelfHTInfo)); + sizeof(pHTInfo->SelfHTInfo)); memset((void *)(&(pHTInfo->PeerHTCapBuf)), 0, - sizeof(pHTInfo->PeerHTCapBuf)); + sizeof(pHTInfo->PeerHTCapBuf)); memset((void *)(&(pHTInfo->PeerHTInfoBuf)), 0, - sizeof(pHTInfo->PeerHTInfoBuf)); + sizeof(pHTInfo->PeerHTInfoBuf)); pHTInfo->sw_bw_in_progress = false; @@ -803,8 +804,8 @@ void HTUseDefaultSetting(struct rtllib_device *ieee) HTFilterMCSRate(ieee, ieee->Regdot11TxHTOperationalRateSet, ieee->dot11HTOperationalRateSet); ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee, - ieee->dot11HTOperationalRateSet, - MCS_FILTER_ALL); + ieee->dot11HTOperationalRateSet, + MCS_FILTER_ALL); ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; } else { From 4a1fc310e739d63d2d28952e358b33adde0d87e4 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Sun, 23 Oct 2022 16:45:37 +0700 Subject: [PATCH 0377/4122] Staging: rtl8192e: rtl819x_HTProc: fixed unnecessary parentheses Fixed multiple unnecessary parentheses as per the Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/d2168b90726dda2f02279a3483b53b8d9b34cb30.1666502177.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_HTProc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index a701be8c2923..62aa8e893c34 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -285,7 +285,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap, u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33}; memcpy(posHTCap, EWC11NHTCap, sizeof(EWC11NHTCap)); - pCapELE = (struct ht_capab_ele *)&(posHTCap[4]); + pCapELE = (struct ht_capab_ele *)&posHTCap[4]; *len = 30 + 2; } else { pCapELE = (struct ht_capab_ele *)posHTCap; @@ -644,13 +644,13 @@ void HTInitializeHTInfo(struct rtllib_device *ieee) pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density; pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor; - memset((void *)(&(pHTInfo->SelfHTCap)), 0, + memset((void *)(&pHTInfo->SelfHTCap), 0, sizeof(pHTInfo->SelfHTCap)); - memset((void *)(&(pHTInfo->SelfHTInfo)), 0, + memset((void *)(&pHTInfo->SelfHTInfo), 0, sizeof(pHTInfo->SelfHTInfo)); - memset((void *)(&(pHTInfo->PeerHTCapBuf)), 0, + memset((void *)(&pHTInfo->PeerHTCapBuf), 0, sizeof(pHTInfo->PeerHTCapBuf)); - memset((void *)(&(pHTInfo->PeerHTInfoBuf)), 0, + memset((void *)(&pHTInfo->PeerHTInfoBuf), 0, sizeof(pHTInfo->PeerHTInfoBuf)); pHTInfo->sw_bw_in_progress = false; @@ -666,7 +666,7 @@ void HTInitializeHTInfo(struct rtllib_device *ieee) pHTInfo->iot_ra_func = 0; { - u8 *RegHTSuppRateSets = &(ieee->RegHTSuppRateSet[0]); + u8 *RegHTSuppRateSets = &ieee->RegHTSuppRateSet[0]; RegHTSuppRateSets[0] = 0xFF; RegHTSuppRateSets[1] = 0xFF; From d0b9f28f0da2808b339bb290b539518e69e48ac9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Oct 2022 18:26:11 +0100 Subject: [PATCH 0378/4122] RDMA/qib: Remove not-used variable n The variable n being incremented but it is never referenced, it is redundant and can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221021172611.26763-1-colin.i.king@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qib/qib_tx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 6a8148851f21..1325110237cd 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -82,7 +82,6 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) struct qib_devdata *dd = rcd->dd; unsigned i; unsigned last; - unsigned n = 0; last = rcd->pio_base + rcd->piocnt; /* @@ -102,10 +101,8 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) } spin_lock_irq(&dd->pioavail_lock); for (i = rcd->pio_base; i < last; i++) { - if (__test_and_clear_bit(i, dd->pio_need_disarm)) { - n++; + if (__test_and_clear_bit(i, dd->pio_need_disarm)) dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i)); - } } spin_unlock_irq(&dd->pioavail_lock); return 0; From 5dc1b37d75e7136588480712e7173169b3d4164f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Oct 2022 18:35:04 +0100 Subject: [PATCH 0379/4122] RDMA/qib: Remove not-used variable freeze_cnt The variable freeze_cnt being incremented but it is never referenced, it is redundant and can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221021173504.27546-1-colin.i.king@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qib/qib_iba6120.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index aea571943768..23a81edf3f7a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -799,12 +799,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, hwerrs &= ~TXE_PIO_PARITY; } - if (!hwerrs) { - static u32 freeze_cnt; - - freeze_cnt++; + if (!hwerrs) qib_6120_clear_freeze(dd); - } else + else isfatal = 1; } From 2d5206c4629dfe74a51bb9c54758095139f5d01d Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 13:59:05 +0800 Subject: [PATCH 0380/4122] RDMA/qib: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20221022055905.49176-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index bf2f30d67949..9fe03d6ffac1 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -851,7 +851,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } /* - * This assignment is a bit strange. it's because the + * This assignment is a bit strange. it's because * the pbc counts the number of 32 bit words in the full * packet _except_ the first word of the pbc itself... */ From c4bb733234b0ffd939030bb592b691ac19519455 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 14:00:30 +0800 Subject: [PATCH 0381/4122] RDMA/core: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20221022060030.50900-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 4084d05a4510..2e91d8879326 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, *vlan_id = vlan_dev_vlan_id(ndev); } else { /* If the netdev is upper device and if it's lower - * device is vlan device, consider vlan id of the + * device is vlan device, consider vlan id of * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, From 65bf03427cee48258a227431577dc56bf70461f3 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 13:52:57 +0800 Subject: [PATCH 0382/4122] RDMA/qedr: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20221022055257.42905-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qedr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5152f10d2e6d..5e7069b76d46 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -472,7 +472,7 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle) /* The CQ's CNQ notification counter is checked before * destroying the CQ in a busy-wait loop that waits for all of * the CQ's CNQ interrupts to be processed. It is increased - * here, only after the completion handler, to ensure that the + * here, only after the completion handler, to ensure that * the handler is not running when the CQ is destroyed. */ cq->cnq_notif++; From 71d236399160ad9beaae7267b93d2d487e8f19a0 Mon Sep 17 00:00:00 2001 From: "yangx.jy@fujitsu.com" Date: Fri, 21 Oct 2022 13:45:17 +0000 Subject: [PATCH 0383/4122] RDMA/rxe: Remove the member 'type' of struct rxe_mr The member 'type' is included in both struct rxe_mr and struct ib_mr so remove the duplicate one of struct rxe_mr. Signed-off-by: Xiao Yang Link: https://lore.kernel.org/r/20221021134513.17730-1-yangx.jy@fujitsu.com Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_mr.c | 16 ++++++++-------- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 502e9ada99b3..d4f10c2d1aa7 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -26,7 +26,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - switch (mr->type) { + switch (mr->ibmr.type) { case IB_MR_TYPE_DMA: return 0; @@ -39,7 +39,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) default: pr_warn("%s: mr type (%d) not supported\n", - __func__, mr->type); + __func__, mr->ibmr.type); return -EFAULT; } } @@ -109,7 +109,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr) mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_DMA; + mr->ibmr.type = IB_MR_TYPE_DMA; } int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, @@ -178,7 +178,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->access = access; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_USER; + mr->ibmr.type = IB_MR_TYPE_USER; return 0; @@ -205,7 +205,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = IB_MR_TYPE_MEM_REG; + mr->ibmr.type = IB_MR_TYPE_MEM_REG; return 0; @@ -304,7 +304,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == IB_MR_TYPE_DMA) { + if (mr->ibmr.type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -547,8 +547,8 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) goto err_drop_ref; } - if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { - pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type); ret = -EINVAL; goto err_drop_ref; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5f5cbfcb3569..22a299b0a9f0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -304,7 +304,6 @@ struct rxe_mr { u32 lkey; u32 rkey; enum rxe_mr_state state; - enum ib_mr_type type; u32 offset; int access; From 894c792e3e24c2c15d8aac15aa89ec144468e1b0 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:36 -0700 Subject: [PATCH 0384/4122] MAINTAINERS: git://github -> https://github.com for terrelln Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Nick Terrell --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e04d944005ba..4a2b7a9325dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22800,7 +22800,7 @@ ZSTD M: Nick Terrell S: Maintained B: https://github.com/facebook/zstd/issues -T: git git://github.com/terrelln/linux.git +T: git https://github.com/terrelln/linux.git F: include/linux/zstd* F: lib/zstd/ F: lib/decompress_unzstd.c From 7486f5c6e7b197400678f1bb603ac9e4027fb830 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Fri, 2 Sep 2022 09:32:12 +0800 Subject: [PATCH 0385/4122] lib: zstd: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: Jilin Yuan Signed-off-by: Nick Terrell --- lib/zstd/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c index a4e916008b3a..73fff4c60149 100644 --- a/lib/zstd/compress/zstd_compress.c +++ b/lib/zstd/compress/zstd_compress.c @@ -4441,7 +4441,7 @@ static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { size_t offsetBound; U32 windowSize = 1 << windowLog; - /* posInSrc represents the amount of data the the decoder would decode up to this point. + /* posInSrc represents the amount of data the decoder would decode up to this point. * As long as the amount of data decoded is less than or equal to window size, offsets may be * larger than the total length of output decoded in order to reference the dict, even larger than * window size. After output surpasses windowSize, we're limited to windowSize offsets again. From 19d7df98472851e1d2d11e00c177988d0f49683d Mon Sep 17 00:00:00 2001 From: Xin Gao Date: Mon, 17 Oct 2022 15:18:59 -0700 Subject: [PATCH 0386/4122] lib: zstd: Fix comment typo The double `when' is duplicated in line 999, remove one. Signed-off-by: Xin Gao Signed-off-by: Nick Terrell --- lib/zstd/decompress/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c index b4d81d84479a..6928e85f9d19 100644 --- a/lib/zstd/decompress/zstd_decompress.c +++ b/lib/zstd/decompress/zstd_decompress.c @@ -996,7 +996,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } /* - * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, * we allow taking a partial block as the input. Currently only raw uncompressed blocks can * be streamed. * From 4782c725c1538aa9ef894ae4a3938db40be7f02c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Oct 2022 14:47:04 -0700 Subject: [PATCH 0387/4122] zstd: Move zstd-common module exports to zstd_common_module.c The zstd codebase is imported from the upstream zstd repo, and is over-written on every update. Upstream keeps the kernel specific code separate from the main library. So the module definition is moved into the zstd_common_module.c file. This matches the pattern followed by the zstd-compress and zstd-decompress files. I've done build and boot testing on x86-64, i386, and aarch64. I've verified that zstd built both as modules and built-in build and boot. Signed-off-by: Nick Terrell --- lib/zstd/Makefile | 1 + lib/zstd/common/entropy_common.c | 4 ---- lib/zstd/common/zstd_common.c | 10 ---------- lib/zstd/zstd_common_module.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 lib/zstd/zstd_common_module.c diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index 440bd0007ae2..20f08c644b71 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -35,6 +35,7 @@ zstd_decompress-y := \ decompress/zstd_decompress_block.o \ zstd_common-y := \ + zstd_common_module.o \ common/debug.o \ common/entropy_common.o \ common/error_private.o \ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index a311808c0d56..6353249de614 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -15,7 +15,6 @@ /* ************************************* * Dependencies ***************************************/ -#include #include "mem.h" #include "error_private.h" /* ERR_*, ERROR */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ @@ -240,7 +239,6 @@ size_t FSE_readNCount( { return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); } -EXPORT_SYMBOL_GPL(FSE_readNCount); /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). @@ -256,7 +254,6 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); } -EXPORT_SYMBOL_GPL(HUF_readStats); FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, @@ -357,4 +354,3 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, (void)bmi2; return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } -EXPORT_SYMBOL_GPL(HUF_readStats_wksp); diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c index 0f1f63be25d9..3d7e35b309b5 100644 --- a/lib/zstd/common/zstd_common.c +++ b/lib/zstd/common/zstd_common.c @@ -13,7 +13,6 @@ /*-************************************* * Dependencies ***************************************/ -#include #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "error_private.h" @@ -36,17 +35,14 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } * tells if a return value is an error code * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } -EXPORT_SYMBOL_GPL(ZSTD_isError); /*! ZSTD_getErrorName() : * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } -EXPORT_SYMBOL_GPL(ZSTD_getErrorName); /*! ZSTD_getError() : * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } -EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); /*! ZSTD_getErrorString() : * provides error code string from enum */ @@ -63,7 +59,6 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) return customMem.customAlloc(customMem.opaque, size); return ZSTD_malloc(size); } -EXPORT_SYMBOL_GPL(ZSTD_customMalloc); void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) { @@ -76,7 +71,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) } return ZSTD_calloc(1, size); } -EXPORT_SYMBOL_GPL(ZSTD_customCalloc); void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) { @@ -87,7 +81,3 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) ZSTD_free(ptr); } } -EXPORT_SYMBOL_GPL(ZSTD_customFree); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Common"); diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c new file mode 100644 index 000000000000..22686e367e6f --- /dev/null +++ b/lib/zstd/zstd_common_module.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include + +#include "common/huf.h" +#include "common/fse.h" +#include "common/zstd_internal.h" + +// Export symbols shared by compress and decompress into a common module + +#undef ZSTD_isError /* defined within zstd_internal.h */ +EXPORT_SYMBOL_GPL(FSE_readNCount); +EXPORT_SYMBOL_GPL(HUF_readStats); +EXPORT_SYMBOL_GPL(HUF_readStats_wksp); +EXPORT_SYMBOL_GPL(ZSTD_isError); +EXPORT_SYMBOL_GPL(ZSTD_getErrorName); +EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); +EXPORT_SYMBOL_GPL(ZSTD_customMalloc); +EXPORT_SYMBOL_GPL(ZSTD_customCalloc); +EXPORT_SYMBOL_GPL(ZSTD_customFree); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Common"); From 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 17 Oct 2022 13:32:37 -0700 Subject: [PATCH 0388/4122] zstd: import usptream v1.5.2 Updates the kernel's zstd library to v1.5.2, the latest zstd release. The upstream tag it is updated to is `v1.5.2-kernel`, which contains several cherry-picked commits on top of the v1.5.2 release which are required for the kernel update. I will create this tag once the PR is ready to merge, until then reference the temporary upstream branch `v1.5.2-kernel-cherrypicks`. I plan to submit this patch as part of the v6.2 merge window. I've done basic build testing & testing on x86-64, i386, and aarch64. I'm merging these patches into my `zstd-next` branch, which is pulled into `linux-next` for further testing. I've benchmarked BtrFS with zstd compression on a x86-64 machine, and saw these results. Decompression speed is a small win across the board. The lower compression levels 1-4 see both compression speed and compression ratio wins. The higher compression levels see a small compression speed loss and about neutral ratio. I expect the lower compression levels to be used much more heavily than the high compression levels, so this should be a net win. Level CTime DTime Ratio 1 -2.95% -1.1% -0.7% 3 -3.5% -1.2% -0.5% 5 +3.7% -1.0% +0.0% 7 +3.2% -0.9% +0.0% 9 -4.3% -0.8% +0.1% Signed-off-by: Nick Terrell --- include/linux/zstd_lib.h | 471 ++-- lib/zstd/common/bitstream.h | 9 + lib/zstd/common/compiler.h | 67 +- lib/zstd/common/entropy_common.c | 7 +- lib/zstd/common/error_private.h | 81 +- lib/zstd/common/fse.h | 3 +- lib/zstd/common/fse_decompress.c | 2 +- lib/zstd/common/huf.h | 46 +- lib/zstd/common/mem.h | 2 + lib/zstd/common/portability_macros.h | 93 + lib/zstd/common/zstd_internal.h | 175 +- lib/zstd/compress/clevels.h | 132 ++ lib/zstd/compress/fse_compress.c | 83 +- lib/zstd/compress/huf_compress.c | 642 +++++- lib/zstd/compress/zstd_compress.c | 2002 +++++++++++++---- lib/zstd/compress/zstd_compress_internal.h | 375 ++- lib/zstd/compress/zstd_compress_literals.c | 9 +- lib/zstd/compress/zstd_compress_literals.h | 4 +- lib/zstd/compress/zstd_compress_sequences.c | 31 +- lib/zstd/compress/zstd_compress_superblock.c | 295 +-- lib/zstd/compress/zstd_cwksp.h | 233 +- lib/zstd/compress/zstd_double_fast.c | 413 +++- lib/zstd/compress/zstd_fast.c | 433 ++-- lib/zstd/compress/zstd_lazy.c | 1364 ++++++++--- lib/zstd/compress/zstd_lazy.h | 38 + lib/zstd/compress/zstd_ldm.c | 76 +- lib/zstd/compress/zstd_ldm.h | 1 + lib/zstd/compress/zstd_ldm_geartab.h | 5 +- lib/zstd/compress/zstd_opt.c | 400 ++-- lib/zstd/decompress/huf_decompress.c | 910 ++++++-- lib/zstd/decompress/zstd_decompress.c | 78 +- lib/zstd/decompress/zstd_decompress_block.c | 1020 +++++++-- lib/zstd/decompress/zstd_decompress_block.h | 10 +- .../decompress/zstd_decompress_internal.h | 38 +- lib/zstd/decompress_sources.h | 6 + lib/zstd/zstd_compress_module.c | 6 +- 36 files changed, 6953 insertions(+), 2607 deletions(-) create mode 100644 lib/zstd/common/portability_macros.h create mode 100644 lib/zstd/compress/clevels.h diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index 6b91758b61af..79d55465d5c1 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -17,8 +17,16 @@ /* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#define ZSTDLIB_VISIBILITY -#define ZSTDLIB_API ZSTDLIB_VISIBILITY +#ifndef ZSTDLIB_VISIBLE +# if (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) +# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZSTDLIB_VISIBLE +# define ZSTDLIB_HIDDEN +# endif +#endif +#define ZSTDLIB_API ZSTDLIB_VISIBLE /* ***************************************************************************** @@ -56,8 +64,8 @@ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 10 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 2 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -94,7 +102,6 @@ ZSTDLIB_API const char* ZSTD_versionString(void); #define ZSTD_BLOCKSIZE_MAX (1<= first frame size * @return : the compressed size of the first frame starting at `src`, @@ -165,8 +172,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ /* ************************************* @@ -219,9 +227,9 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, const void* src, size_t srcSize); -/* ************************************* -* Advanced compression API -***************************************/ +/* ******************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ /* API design : * Parameters are pushed one by one into an existing context, @@ -232,7 +240,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * - * This API supercedes all other "advanced" API entry points in the experimental section. + * This API supersedes all other "advanced" API entry points in the experimental section. * In the future, we expect to remove from experimental API entry points which are redundant with this API. */ @@ -251,7 +259,6 @@ typedef enum { ZSTD_fast=1, Only the order (from fast to strong) is guaranteed */ } ZSTD_strategy; - typedef enum { /* compression parameters @@ -317,7 +324,6 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ - /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -374,7 +380,7 @@ typedef enum { ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. * The minimum size is automatically and transparently enforced. */ ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. @@ -404,6 +410,8 @@ typedef enum { * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences + * ZSTD_c_useBlockSplitter + * ZSTD_c_useRowMatchFinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -419,7 +427,10 @@ typedef enum { ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam11=1008, - ZSTD_c_experimentalParam12=1009 + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 } ZSTD_cParameter; typedef struct { @@ -504,9 +515,9 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, const void* src, size_t srcSize); -/* ************************************* -* Advanced decompression API -***************************************/ +/* ********************************************* +* Advanced decompression API (Requires v1.4.0+) +************************************************/ /* The advanced API pushes parameters one by one into an existing DCtx context. * Parameters are sticky, and remain valid for all following frames @@ -668,7 +679,7 @@ typedef enum { : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; -/*! ZSTD_compressStream2() : +/*! ZSTD_compressStream2() : Requires v1.4.0+ * Behaves about the same as ZSTD_compressStream, with additional control on end directive. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) @@ -714,11 +725,11 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output /* ***************************************************************************** - * This following is a legacy streaming API. + * This following is a legacy streaming API, available since v1.0+ . * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. ******************************************************************************/ /*! @@ -796,7 +807,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output /*! ZSTD_compress_usingDict() : * Compression at an explicit compression level using a Dictionary. * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dictBuilder/zdict.h). + * or a buffer with specified information (see zdict.h). * Note : This function loads the dictionary, resulting in significant startup delay. * It's intended for a dictionary used only once. * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ @@ -879,19 +890,25 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, * Dictionary helper functions *******************************/ -/*! ZSTD_getDictID_fromDict() : +/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+ * Provides the dictID stored within dictionary. * if @return == 0, the dictionary is not conformant with Zstandard specification. * It can still be loaded, but as a content-only dictionary. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); -/*! ZSTD_getDictID_fromDDict() : +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ * Provides the dictID of the dictionary loaded into `ddict`. * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); -/*! ZSTD_getDictID_fromFrame() : +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ * Provides the dictID required to decompressed the frame stored within `src`. * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : @@ -905,16 +922,16 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); /* ***************************************************************************** - * Advanced dictionary and prefix API + * Advanced dictionary and prefix API (Requires v1.4.0+) * * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and * only reset with the context is reset with ZSTD_reset_parameters or * ZSTD_reset_session_and_parameters. Prefixes are single-use. ******************************************************************************/ -/*! ZSTD_CCtx_loadDictionary() : +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ * Create an internal CDict from `dict` buffer. * Decompression will have to use same dictionary. * @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -933,7 +950,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * to precisely select how dictionary content must be interpreted. */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -/*! ZSTD_CCtx_refCDict() : +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used for all next compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. @@ -947,7 +964,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -/*! ZSTD_CCtx_refPrefix() : +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) for next compressed frame. * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). * Decompression will need same prefix to properly regenerate data. @@ -968,7 +985,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); -/*! ZSTD_DCtx_loadDictionary() : +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ * Create an internal DDict from dict buffer, * to be used to decompress next frames. * The dictionary remains valid for all future frames, until explicitly invalidated. @@ -985,7 +1002,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, */ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -/*! ZSTD_DCtx_refDDict() : +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. * @@ -1003,7 +1020,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s */ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -/*! ZSTD_DCtx_refPrefix() : +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) to decompress next frame. * This is the reverse operation of ZSTD_CCtx_refPrefix(), * and must use the same prefix as the one used during compression. @@ -1024,7 +1041,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, /* === Memory management === */ -/*! ZSTD_sizeof_*() : +/*! ZSTD_sizeof_*() : Requires v1.4.0+ * These functions give the _current_ memory usage of selected object. * Note that object memory usage can evolve (increase or decrease) over time. */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); @@ -1049,6 +1066,29 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +/* This can be overridden externally to hide static symbols. */ +#ifndef ZSTDLIB_STATIC_API +#define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE +#endif + +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ +#else +# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) +# elif (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + /* ************************************************************************************** * experimental API (static linking only) **************************************************************************************** @@ -1111,9 +1151,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX -/* internal */ -#define ZSTD_HASHLOG3_MAX 17 - /* --- Advanced types --- */ @@ -1255,6 +1292,15 @@ typedef enum { ZSTD_lcm_uncompressed = 2 /*< Always emit uncompressed literals. */ } ZSTD_literalCompressionMode_e; +typedef enum { + /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final + * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable + * or ZSTD_ps_disable allow for a force enable/disable the feature. + */ + ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ + ZSTD_ps_enable = 1, /* Force-enable the feature */ + ZSTD_ps_disable = 2 /* Do not use the feature */ +} ZSTD_paramSwitch_e; /* ************************************* * Frame size functions @@ -1281,7 +1327,7 @@ typedef enum { * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to * read each contained frame header. This is fast as most of the data is skipped, * however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); /*! ZSTD_decompressBound() : * `src` should point to the start of a series of ZSTD encoded and/or skippable frames @@ -1296,13 +1342,13 @@ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: * upper-bound = # blocks * min(128 KB, Window_Size) */ -ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); /*! ZSTD_frameHeaderSize() : * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. * @return : size of the Frame Header, * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); typedef enum { ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ @@ -1325,7 +1371,7 @@ typedef enum { * @return : number of sequences generated */ -ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, +ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : @@ -1339,7 +1385,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters * @return : number of sequences left after merging */ -ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); +ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); /*! ZSTD_compressSequences() : * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. @@ -1369,7 +1415,7 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se * and cannot emit an RLE block that disagrees with the repcode history * @return : final compressed size or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, +ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); @@ -1387,9 +1433,29 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size * * @return : number of bytes written or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, +ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned magicVariant); +/*! ZSTD_readSkippableFrame() : + * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested + * in the magicVariant. + * + * Returns an error if destination buffer is not large enough, or if the frame is not skippable. + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, + const void* src, size_t srcSize); + +/*! ZSTD_isSkippableFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. + */ +ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); + + /* ************************************* * Memory management @@ -1418,10 +1484,10 @@ ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, * Note 2 : only single-threaded compression is supported. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. @@ -1436,20 +1502,20 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * an internal ?Dict will be created, which additional size is not estimated here. * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); /*! ZSTD_estimate?DictSize() : * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); /*! ZSTD_initStatic*() : * Initialize an object using a pre-allocated fixed-size buffer. @@ -1472,20 +1538,20 @@ ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e * Limitation 2 : static cctx currently not compatible with multi-threading. * Limitation 3 : static dctx is incompatible with legacy support. */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ +ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ +ZSTDLIB_STATIC_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( +ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict( void* workspace, size_t workspaceSize, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams); -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( +ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict( void* workspace, size_t workspaceSize, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, @@ -1504,44 +1570,44 @@ static __attribute__((__unused__)) ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams, ZSTD_customMem customMem); -/* ! Thread pool : - * These prototypes make it possible to share a thread pool among multiple compression contexts. - * This can limit resources for applications with multiple threads where each one uses - * a threaded compression mode (via ZSTD_c_nbWorkers parameter). - * ZSTD_createThreadPool creates a new thread pool with a given number of threads. - * Note that the lifetime of such pool must exist while being used. - * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value - * to use an internal thread pool). - * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. +/*! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. */ typedef struct POOL_ctx_s ZSTD_threadPool; -ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); -ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ -ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); +ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); /* * This API is temporary and is expected to change or disappear in the future! */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, const ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( +ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, @@ -1558,28 +1624,22 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( * As a consequence, `dictBuffer` **must** outlive CDict, * and its content must remain unmodified throughout the lifetime of CDict. * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); - -/*! ZSTD_getDictID_fromCDict() : - * Provides the dictID of the dictionary loaded into `cdict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); /*! ZSTD_getCParams() : * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_getParams() : * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_checkCParams() : * Ensure param values remain within authorized range. * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); +ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); /*! ZSTD_adjustCParams() : * optimize params for a given `srcSize` and `dictSize`. @@ -1587,23 +1647,25 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); * `dictSize` must be `0` when there is no dictionary. * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. + * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, @@ -1613,18 +1675,18 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, /*! ZSTD_CCtx_loadDictionary_byReference() : * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /*! ZSTD_CCtx_loadDictionary_advanced() : * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over * how to load the dictionary (by copy ? by reference ?) * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); /*! ZSTD_CCtx_refPrefix_advanced() : * Same as ZSTD_CCtx_refPrefix(), but gives finer control over * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); /* === experimental parameters === */ /* these parameters can be used with ZSTD_setParameter() @@ -1663,9 +1725,15 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * See the comments on that enum for an explanation of the feature. */ #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. +/* Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never compress literals. + * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals + * may still be emitted if huffman is not beneficial to use.) + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * literals compression based on the compression parameters - specifically, + * negative compression levels do not use literal compression. */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 @@ -1728,7 +1796,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * * Note that this means that the CDict tables can no longer be copied into the * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be - * useable. The dictionary can only be attached or reloaded. + * usable. The dictionary can only be attached or reloaded. * * In general, you should expect compression to be faster--sometimes very much * so--and CDict creation to be slightly slower. Eventually, we will probably @@ -1817,12 +1885,55 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 +/* ZSTD_c_useBlockSplitter + * Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never use block splitter. + * Set to ZSTD_ps_enable to always use block splitter. + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * block splitting based on the compression parameters. + */ +#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 + +/* ZSTD_c_useRowMatchFinder + * Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never use row-based matchfinder. + * Set to ZSTD_ps_enable to force usage of row-based matchfinder. + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * the row-based matchfinder based on support for SIMD instructions and the window log. + * Note that this only pertains to compression strategies: greedy, lazy, and lazy2 + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_params : @@ -1842,27 +1953,27 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() * for static allocation of CCtx for single-threaded compression. */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ +ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ /*! ZSTD_CCtxParams_reset() : * Reset params to default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); /*! ZSTD_CCtxParams_init() : * Initializes the compression parameters of cctxParams according to * compression level. All other parameters are reset to their default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); /*! ZSTD_CCtxParams_init_advanced() : * Initializes the compression and frame parameters of cctxParams according to * params. All other parameters are reset to their default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -/*! ZSTD_CCtxParams_setParameter() : +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ * Similar to ZSTD_CCtx_setParameter. * Set one compression parameter, selected by enum ZSTD_cParameter. * Parameters must be applied to a ZSTD_CCtx using @@ -1870,14 +1981,14 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z * @result : a code representing success or failure (which can be tested with * ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); /*! ZSTD_CCtxParams_getParameter() : * Similar to ZSTD_CCtx_getParameter. * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -1886,7 +1997,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, * if nbWorkers>=1, new parameters will be picked up at next job, * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); /*! ZSTD_compressStream2_simpleArgs() : @@ -1895,7 +2006,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( * This variant might be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( +ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs ( ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, size_t* dstPos, const void* src, size_t srcSize, size_t* srcPos, @@ -1911,33 +2022,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); +ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size); /*! ZSTD_createDDict_byReference() : * Create a digested dictionary, ready to start decompression operation without startup delay. * Dictionary content is referenced, and therefore stays in dictBuffer. * It is important that dictBuffer outlives DDict, * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); /*! ZSTD_DCtx_loadDictionary_byReference() : * Same as ZSTD_DCtx_loadDictionary(), * but references `dict` content instead of copying it into `dctx`. * This saves memory if `dict` remains around., * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /*! ZSTD_DCtx_loadDictionary_advanced() : * Same as ZSTD_DCtx_loadDictionary(), * but gives direct control over * how to load the dictionary (by copy ? by reference ?) * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); /*! ZSTD_DCtx_refPrefix_advanced() : * Same as ZSTD_DCtx_refPrefix(), but gives finer control over * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); /*! ZSTD_DCtx_setMaxWindowSize() : * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. @@ -1946,14 +2057,14 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); /*! ZSTD_DCtx_getParameter() : * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); /* ZSTD_d_format * experimental parameter, @@ -2028,11 +2139,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param /*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). * Instruct the decoder context about what kind of data to decode next. * This instruction is mandatory to decode data without a fully-formed header, * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), @@ -2040,7 +2153,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); * This can be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( +ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, size_t* dstPos, const void* src, size_t srcSize, size_t* srcPos); @@ -2056,7 +2169,7 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( /*===== Advanced Streaming compression functions =====*/ /*! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); @@ -2065,15 +2178,15 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( * pledgedSrcSize must be correct. If it is not known at init time, use * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); @@ -2082,15 +2195,15 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, * dict == NULL or dictSize < 8, in which case no dict is used. * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /*! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: + * This function is DEPRECATED, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * // Pseudocode: Set each zstd parameter and leave the rest as-is. * for ((param, value) : params) { @@ -2102,23 +2215,24 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. * pledgedSrcSize must be correct. * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, cdict); * * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /*! ZSTD_initCStream_usingCDict_advanced() : * This function is DEPRECATED, and is approximately equivalent to: @@ -2133,18 +2247,21 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. * pledgedSrcSize must be correct. If srcSize is not known at init time, use * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. * * start a new frame, using same parameters from previous frame. * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. @@ -2154,9 +2271,10 @@ ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); typedef struct { @@ -2174,7 +2292,7 @@ typedef struct { * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. * Aggregates progression inside active worker threads. */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); /*! ZSTD_toFlushNow() : * Tell how many bytes are ready to be flushed immediately. @@ -2189,7 +2307,7 @@ ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx * therefore flush speed is limited by production speed of oldest job * irrespective of the speed of concurrent (and newer) jobs. */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); /*===== Advanced Streaming decompression functions =====*/ @@ -2203,7 +2321,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); * note: no dictionary will be used if dict == NULL or dictSize < 8 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /*! * This function is deprecated, and is equivalent to: @@ -2214,7 +2332,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic * note : ddict is referenced, it must outlive decompression session * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); +ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /*! * This function is deprecated, and is equivalent to: @@ -2224,7 +2342,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDi * re-use decompression parameters from previous init; saves dictionary loading * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); +ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /* ******************************************************************* @@ -2243,8 +2361,7 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); ZSTD_CCtx object can be re-used multiple times within successive compression operations. Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). @@ -2267,17 +2384,19 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); */ /*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ +ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ /* Buffer-less streaming decompression (synchronous mode) @@ -2368,24 +2487,24 @@ typedef struct { * @return : 0, `zfhPtr` is correctly filled, * >0, `srcSize` is too small, value is wanted `srcSize` amount, * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ /*! ZSTD_getFrameHeader_advanced() : * same as ZSTD_getFrameHeader(), * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); @@ -2422,10 +2541,10 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); */ /*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ +ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h index 28248abe8612..feef3a1b1d60 100644 --- a/lib/zstd/common/bitstream.h +++ b/lib/zstd/common/bitstream.h @@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); + /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better + * than accessing memory. When bmi2 instruction is not present, we consider + * such cpus old (pre-Haswell, 2013) and their performance is not of that + * importance. + */ +#if defined(__x86_64__) || defined(_M_X86) + return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); +#else return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +#endif } MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h index f5a9c70a228a..c42d39faf9bd 100644 --- a/lib/zstd/common/compiler.h +++ b/lib/zstd/common/compiler.h @@ -11,6 +11,8 @@ #ifndef ZSTD_COMPILER_H #define ZSTD_COMPILER_H +#include "portability_macros.h" + /*-******************************************************* * Compiler specifics *********************************************************/ @@ -34,7 +36,7 @@ /* On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). - This explictly marks such functions as __cdecl so that the code will still compile + This explicitly marks such functions as __cdecl so that the code will still compile if a CC other than __cdecl has been made the default. */ #define WIN_CDECL @@ -70,25 +72,13 @@ /* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. +/* Target attribute for BMI2 dynamic dispatch. + * Enable lzcnt, bmi, and bmi2. + * We test for bmi1 & bmi2. lzcnt is included in bmi1. */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif +#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2") /* prefetch * can be disabled, by declaring NO_PREFETCH build macro */ @@ -115,8 +105,9 @@ } /* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, + * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # else @@ -134,20 +125,18 @@ #define LIKELY(x) (__builtin_expect((x), 1)) #define UNLIKELY(x) (__builtin_expect((x), 0)) +#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) +# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } +#else +# define ZSTD_UNREACHABLE { assert(0); } +#endif + /* disable warnings */ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ -/* compat. with non-clang compilers */ -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -/* compat. with non-clang compilers */ -#ifndef __has_feature -# define __has_feature(x) 0 -#endif +/* compile time determination of SIMD support */ /* C-language Attributes are added in C23. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) @@ -168,10 +157,28 @@ */ #define ZSTD_FALLTHROUGH fallthrough -/* detects whether we are being compiled under msan */ +/*-************************************************************** +* Alignment check +*****************************************************************/ +/* this test was initially positioned in mem.h, + * but this file is removed (or replaced) for linux kernel + * so it's now hosted in compiler.h, + * which remains valid for both user & kernel spaces. + */ + +#ifndef ZSTD_ALIGNOF +/* covers gcc, clang & MSVC */ +/* note : this section must come first, before C11, + * due to a limitation in the kernel source generator */ +# define ZSTD_ALIGNOF(T) __alignof(T) + +#endif /* ZSTD_ALIGNOF */ + +/*-************************************************************** +* Sanitizer +*****************************************************************/ -/* detects whether we are being compiled under asan */ #endif /* ZSTD_COMPILER_H */ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index 6353249de614..fef67056f052 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default( } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( +BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2( short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) { @@ -240,6 +240,7 @@ size_t FSE_readNCount( return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); } + /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. @@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } @@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r } #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, +static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h index d14e686adf95..ca5101e542fa 100644 --- a/lib/zstd/common/error_private.h +++ b/lib/zstd/common/error_private.h @@ -18,8 +18,10 @@ /* **************************************** * Dependencies ******************************************/ -#include "zstd_deps.h" /* size_t */ #include /* enum list */ +#include "compiler.h" +#include "debug.h" +#include "zstd_deps.h" /* size_t */ /* **************************************** @@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) return ERR_getErrorString(ERR_getErrorCode(code)); } +/* + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/* + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +#define ERR_QUOTE(str) #str + +/* + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/* + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/* + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + #endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h index 0bb174c2c367..4507043b2287 100644 --- a/lib/zstd/common/fse.h +++ b/lib/zstd/common/fse.h @@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + * See FSE_buildCTable_wksp() for breakdown of workspace usage. */ -#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index 2c8bbe3e4c14..a0d06095be83 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) { return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); } diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h index 88c5586646aa..5042ff870308 100644 --- a/lib/zstd/common/huf.h +++ b/lib/zstd/common/huf.h @@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, /* HUF_compress4X_wksp() : * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) -#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) + * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) +#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, @@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_SYMBOLVALUE_MAX 255 -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) # error "HUF_TABLELOG_MAX is too large !" #endif @@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* static allocation of HUF's Compression Table */ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt */ -typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ -#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ -#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +typedef size_t HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t)) #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */ /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; @@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -203,12 +200,13 @@ typedef enum { * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); /* HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. @@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); -/* HUF_getNbBits() : +/* HUF_getNbBitsFromCTable() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private - * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ -U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + * Note 1 : is not inlined, as HUF_CElt definition is private */ +U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); /* * HUF_decompress() does the following: @@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c /* ====================== */ size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); /* HUF_compress1X_repeat() : * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ #ifndef HUF_FORCE_DECOMPRESS_X1 @@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds #ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); #endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif #endif /* HUF_STATIC_LINKING_ONLY */ diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h index dcdd586a9fd9..1d9cc03924ca 100644 --- a/lib/zstd/common/mem.h +++ b/lib/zstd/common/mem.h @@ -30,6 +30,8 @@ * Basic Types *****************************************************************/ typedef uint8_t BYTE; +typedef uint8_t U8; +typedef int8_t S8; typedef uint16_t U16; typedef int16_t S16; typedef uint32_t U32; diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h new file mode 100644 index 000000000000..0e3b2c0a527d --- /dev/null +++ b/lib/zstd/common/portability_macros.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_PORTABILITY_MACROS_H +#define ZSTD_PORTABILITY_MACROS_H + +/* + * This header file contains macro defintions to support portability. + * This header is shared between C and ASM code, so it MUST only + * contain macro definitions. It MUST not contain any C code. + * + * This header ONLY defines macros to detect platforms/feature support. + * + */ + + +/* compat. with non-clang compilers */ +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ + +/* detects whether we are being compiled under asan */ + +/* detects whether we are being compiled under dfsan */ + +/* Mark the internal assembly functions as hidden */ +#ifdef __ELF__ +# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func +#else +# define ZSTD_HIDE_ASM_FUNCTION(func) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X64)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* + * Only enable assembly for GNUC comptabile compilers, + * because other platforms may not support GAS assembly syntax. + * + * Only enable assembly for Linux / MacOS, other platforms may + * work, but they haven't been tested. This could likely be + * extended to BSD systems. + * + * Disable assembly when MSAN is enabled, because MSAN requires + * 100% of code to be instrumented to work. + */ +#define ZSTD_ASM_SUPPORTED 1 + +/* + * Determines whether we should enable assembly for x86-64 + * with BMI2. + * + * Enable if all of the following conditions hold: + * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM + * - Assembly is supported + * - We are compiling for x86-64 and either: + * - DYNAMIC_BMI2 is enabled + * - BMI2 is supported at compile time + */ +#define ZSTD_ENABLE_ASM_X86_64_BMI2 0 + +#endif /* ZSTD_PORTABILITY_MACROS_H */ diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h index fc6f3a9b40c0..93305d9b41bb 100644 --- a/lib/zstd/common/zstd_internal.h +++ b/lib/zstd/common/zstd_internal.h @@ -20,6 +20,7 @@ * Dependencies ***************************************/ #include "compiler.h" +#include "cpu.h" #include "mem.h" #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" @@ -47,81 +48,7 @@ #undef MAX #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) - -/* - * Ignore: this is an internal helper. - * - * This is a helper function to help force C99-correctness during compilation. - * Under strict compilation modes, variadic macro arguments can't be empty. - * However, variadic function arguments can be. Using a function therefore lets - * us statically check that at least one (string) argument was passed, - * independent of the compilation flags. - */ -static INLINE_KEYWORD UNUSED_ATTR -void _force_has_format_string(const char *format, ...) { - (void)format; -} - -/* - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ - if (0) { \ - _force_has_format_string(__VA_ARGS__); \ - } - -/* - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } - -/* - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ - do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } while(0); - -/* - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); +#define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) /*-************************************* @@ -130,7 +57,6 @@ void _force_has_format_string(const char *format, ...) { #define ZSTD_OPT_NUM (1<<12) #define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) @@ -182,7 +108,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy /* Each table cannot take more than #symbols * FSELog bits */ #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) -static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = { +static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, @@ -199,7 +125,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { #define LL_DEFAULTNORMLOG 6 /* for static allocation */ static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; -static UNUSED_ATTR const U32 ML_bits[MaxML+1] = { +static UNUSED_ATTR const U8 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -234,12 +160,31 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; * Shared functions to include for inlining *********************************************/ static void ZSTD_copy8(void* dst, const void* src) { +#if defined(ZSTD_ARCH_ARM_NEON) + vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else ZSTD_memcpy(dst, src, 8); +#endif } - #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } + +/* Need to use memmove here since the literal buffer can now be located within + the dst buffer. In circumstances where the op "catches up" to where the + literal buffer is, there can be partial overlaps in this call on the final + copy if the literal is being shifted by less than 16 bytes. */ static void ZSTD_copy16(void* dst, const void* src) { - ZSTD_memcpy(dst, src, 16); +#if defined(ZSTD_ARCH_ARM_NEON) + vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#elif defined(ZSTD_ARCH_X86_SSE2) + _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); +#elif defined(__clang__) + ZSTD_memmove(dst, src, 16); +#else + /* ZSTD_memmove is not inlined properly by gcc */ + BYTE copy16_buf[16]; + ZSTD_memcpy(copy16_buf, src, 16); + ZSTD_memcpy(dst, copy16_buf, 16); +#endif } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } @@ -267,8 +212,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { /* Handle short offset copies. */ do { @@ -331,11 +274,18 @@ typedef enum { * Private declarations *********************************************/ typedef struct seqDef_s { - U32 offset; /* Offset code of the sequence */ + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ U16 litLength; - U16 matchLength; + U16 mlBase; /* mlBase == matchLength - MINMATCH */ } seqDef; +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + typedef struct { seqDef* sequencesStart; seqDef* sequences; /* ptr to end of sequences */ @@ -347,12 +297,12 @@ typedef struct { size_t maxNbSeq; size_t maxNbLit; - /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment * the existing value of the litLength or matchLength by 0x10000. */ - U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ } seqStore_t; typedef struct { @@ -362,18 +312,18 @@ typedef struct { /* * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. */ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) { ZSTD_sequenceLength seqLen; seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->matchLength + MINMATCH; + seqLen.matchLength = seq->mlBase + MINMATCH; if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { seqLen.litLength += 0xFFFF; } - if (seqStore->longLengthID == 2) { + if (seqStore->longLengthType == ZSTD_llt_matchLength) { seqLen.matchLength += 0xFFFF; } } @@ -419,6 +369,41 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus } } +/* + * Counts the number of trailing zeros of a `size_t`. + * Most compilers should support CTZ as a builtin. A backup + * implementation is provided if the builtin isn't supported, but + * it may not be terribly efficient. + */ +MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) +{ + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return __builtin_ctzll((U64)val); +# else + static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, + 4, 25, 14, 28, 9, 34, 20, 56, + 5, 17, 26, 54, 15, 41, 29, 43, + 10, 31, 38, 35, 21, 45, 49, 57, + 63, 6, 12, 18, 24, 27, 33, 55, + 16, 53, 40, 42, 30, 37, 44, 48, + 62, 11, 23, 32, 52, 39, 36, 47, + 61, 22, 51, 46, 60, 50, 59, 58 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return __builtin_ctz((U32)val); +# else + static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } +} + /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. @@ -445,6 +430,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize); +/* + * @returns true iff the CPU supports dynamic BMI2 dispatch. + */ +MEM_STATIC int ZSTD_cpuSupportsBmi2(void) +{ + ZSTD_cpuid_t cpuid = ZSTD_cpuid(); + return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); +} #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h new file mode 100644 index 000000000000..d9a76112ec3a --- /dev/null +++ b/lib/zstd/compress/clevels.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CLEVELS_H +#define ZSTD_CLEVELS_H + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 + +__attribute__((__unused__)) + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ + { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + + + +#endif /* ZSTD_CLEVELS_H */ diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c index 436985b620e5..ec5b1ca6d71a 100644 --- a/lib/zstd/compress/fse_compress.c +++ b/lib/zstd/compress/fse_compress.c @@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); U32 const step = FSE_TABLESTEP(tableSize); + U32 const maxSV1 = maxSymbolValue+1; - U32* cumul = (U32*)workSpace; - FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + U16* cumul = (U16*)workSpace; /* size = maxSV1 */ + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ U32 highThreshold = tableSize-1; - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); /* CTable header */ tableU16[-2] = (U16) tableLog; @@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, /* symbol start positions */ { U32 u; cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { + for (u=1; u <= maxSV1; u++) { if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ cumul[u] = cumul[u-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + assert(normalizedCounter[u-1] >= 0); + cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; + assert(cumul[u] >= cumul[u-1]); /* no overflow */ } } - cumul[maxSymbolValue+1] = tableSize+1; + cumul[maxSV1] = (U16)(tableSize+1); } /* Spread symbols */ - { U32 position = 0; + if (highThreshold == tableSize - 1) { + /* Case for no low prob count symbols. Lay down 8 bytes at a time + * to reduce branch misses since we are operating on a small block + */ + BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ + { U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s=0); + pos += (size_t)n; + } + } + /* Spread symbols across the table. Lack of lowprob symbols means that + * we don't need variable sized inner loop, so we can unroll the loop and + * reduce branch misses. + */ + { size_t position = 0; + size_t s; + size_t const unroll = 2; /* Experimentally determined optimal unroll */ + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableSymbol[uPosition] = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); /* Must have initialized all positions */ + } + } else { + U32 position = 0; U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { + for (symbol=0; symbol highThreshold) position = (position + step) & tableMask; /* Low proba area */ } } - assert(position==0); /* Must have initialized all positions */ } @@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, case -1: case 1: symbolTT[s].deltaNbBits = (tableLog << 16) - (1< 1); + { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); + U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = total - normalizedCounter[s]; - total += normalizedCounter[s]; + symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); + total += (unsigned)normalizedCounter[s]; } } } } #if 0 /* debug : symbol costs */ @@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, symbol, normalizedCounter[symbol], FSE_getMaxNbBits(symbolTT, symbol), (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); - } - } + } } #endif return 0; @@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, - #ifndef FSE_COMMONDEFS_ONLY - /*-************************************************************** * FSE NCount encoding ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog + + 4 /* bitCount initialized at 4 */ + + 2 /* first two symbols may use one additional bit each */) / 8) + + 1 /* round up to whole nb bytes */ + + 2 /* additional two bytes for bitstream flush */; return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c index f76a526bfa54..74ef0db47621 100644 --- a/lib/zstd/compress/huf_compress.c +++ b/lib/zstd/compress/huf_compress.c @@ -50,6 +50,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS /* ******************************************************* * HUF : Huffman block compression *********************************************************/ +#define HUF_WORKSPACE_MAX_ALIGNMENT 8 + +static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) +{ + size_t const mask = align - 1; + size_t const rem = (size_t)workspace & mask; + size_t const add = (align - rem) & mask; + BYTE* const aligned = (BYTE*)workspace + add; + assert((align & (align - 1)) == 0); /* pow 2 */ + assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT); + if (*workspaceSizePtr >= add) { + assert(add < align); + assert(((size_t)aligned & mask) == 0); + *workspaceSizePtr -= add; + return aligned; + } else { + *workspaceSizePtr = 0; + return NULL; + } +} + + /* HUF_compressWeights() : * Same as FSE_compress(), but dedicated to huff0's weights compression. * The use case needs much less stack memory. @@ -72,7 +94,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT unsigned maxSymbolValue = HUF_TABLELOG_MAX; U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); @@ -103,6 +125,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT return (size_t)(op-ostart); } +static size_t HUF_getNbBits(HUF_CElt elt) +{ + return elt & 0xFF; +} + +static size_t HUF_getNbBitsFast(HUF_CElt elt) +{ + return elt; +} + +static size_t HUF_getValue(HUF_CElt elt) +{ + return elt & ~0xFF; +} + +static size_t HUF_getValueFast(HUF_CElt elt) +{ + return elt; +} + +static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) +{ + assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); + *elt = nbBits; +} + +static void HUF_setValue(HUF_CElt* elt, size_t value) +{ + size_t const nbBits = HUF_getNbBits(*elt); + if (nbBits > 0) { + assert((value >> nbBits) == 0); + *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); + } +} typedef struct { HUF_CompressWeightsWksp wksp; @@ -114,9 +170,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize) { + HUF_CElt const* const ct = CTable + 1; BYTE* op = (BYTE*)dst; U32 n; - HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); /* check conditions */ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); @@ -127,9 +184,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); for (n=0; nhuffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; + wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; /* attempt weights compression by FSE */ + if (maxDstSize < 1) return ERROR(dstSize_tooSmall); { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ op[0] = (BYTE)hSize; @@ -163,6 +221,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; U32 nbSymbols = 0; + HUF_CElt* const ct = CTable + 1; /* get symbol weights */ CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); @@ -172,6 +231,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + CTable[0] = tableLog; + /* Prepare base value per rank */ { U32 n, nextRankStart = 0; for (n=1; n<=tableLog; n++) { @@ -183,13 +244,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void /* fill nbBits */ { U32 n; for (n=0; nn=tableLog+1 */ U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; - { U32 n; for (n=0; n>= 1; } } /* assign value within rank, symbol order */ - { U32 n; for (n=0; n huffNode[i-1].count) { + return 0; + } + } + return 1; +} + +/* Insertion sort by descending order */ +HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { + int i; + int const size = high-low+1; + huffNode += low; + for (i = 1; i < size; ++i) { + nodeElt const key = huffNode[i]; + int j = i - 1; + while (j >= 0 && huffNode[j].count < key.count) { + huffNode[j + 1] = huffNode[j]; + j--; + } + huffNode[j + 1] = key; + } +} + +/* Pivot helper function for quicksort. */ +static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { + /* Simply select rightmost element as pivot. "Better" selectors like + * median-of-three don't experimentally appear to have any benefit. + */ + U32 const pivot = arr[high].count; + int i = low - 1; + int j = low; + for ( ; j < high; j++) { + if (arr[j].count > pivot) { + i++; + HUF_swapNodes(&arr[i], &arr[j]); + } + } + HUF_swapNodes(&arr[i + 1], &arr[high]); + return i + 1; +} + +/* Classic quicksort by descending with partially iterative calls + * to reduce worst case callstack size. + */ +static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { + int const kInsertionSortThreshold = 8; + if (high - low < kInsertionSortThreshold) { + HUF_insertionSort(arr, low, high); + return; + } + while (low < high) { + int const idx = HUF_quickSortPartition(arr, low, high); + if (idx - low < high - idx) { + HUF_simpleQuickSort(arr, low, idx - 1); + low = idx + 1; + } else { + HUF_simpleQuickSort(arr, idx + 1, high); + high = idx - 1; + } + } +} + /* * HUF_sort(): * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. * * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. * Must have (maxSymbolValue + 1) entries. @@ -387,44 +544,52 @@ typedef struct { * @param[in] maxSymbolValue Maximum symbol value. * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. */ -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) -{ - int n; - int const maxSymbolValue1 = (int)maxSymbolValue + 1; +static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { + U32 n; + U32 const maxSymbolValue1 = maxSymbolValue+1; /* Compute base and set curr to base. - * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. - * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. + * See HUF_getIndex to see bucketing strategy. * We attribute each symbol to lowerRank's base value, because we want to know where * each rank begins in the output, so for rank R we want to count ranks R+1 and above. */ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); for (n = 0; n < maxSymbolValue1; ++n) { - U32 lowerRank = BIT_highbit32(count[n] + 1); + U32 lowerRank = HUF_getIndex(count[n]); + assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); rankPosition[lowerRank].base++; } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + /* Set up the rankPosition table */ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { rankPosition[n-1].base += rankPosition[n].base; rankPosition[n-1].curr = rankPosition[n-1].base; } - /* Sort */ + + /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ for (n = 0; n < maxSymbolValue1; ++n) { U32 const c = count[n]; - U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rankPosition[r].curr++; - /* Insert into the correct position in the rank. - * We have at most 256 symbols, so this insertion should be fine. - */ - while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { - huffNode[pos] = huffNode[pos-1]; - pos--; - } + U32 const r = HUF_getIndex(c) + 1; + U32 const pos = rankPosition[r].curr++; + assert(pos < maxSymbolValue1); huffNode[pos].count = c; huffNode[pos].byte = (BYTE)n; } -} + /* Sort each bucket. */ + for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { + U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; + U32 const bucketStartIdx = rankPosition[n].base; + if (bucketSize > 1) { + assert(bucketStartIdx < maxSymbolValue1); + HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); + } + } + + assert(HUF_isSorted(huffNode, maxSymbolValue1)); +} /* HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. @@ -487,6 +652,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) */ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) { + HUF_CElt* const ct = CTable + 1; /* fill result into ctable (val, nbBits) */ int n; U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; @@ -502,20 +668,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i min >>= 1; } } for (n=0; nhuffNodeTbl; nodeElt* const huffNode = huffNode0+1; int nonNullRank; /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return ERROR(workSpace_tooSmall); if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; @@ -533,99 +699,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); return maxNbBits; } size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + HUF_CElt const* ct = CTable + 1; size_t nbBits = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { - nbBits += CTable[s].nbBits * count[s]; + nbBits += HUF_getNbBits(ct[s]) * count[s]; } return nbBits >> 3; } int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + HUF_CElt const* ct = CTable + 1; int bad = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); } return !bad; } size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -FORCE_INLINE_TEMPLATE void -HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +/* HUF_CStream_t: + * Huffman uses its own BIT_CStream_t implementation. + * There are three major differences from BIT_CStream_t: + * 1. HUF_addBits() takes a HUF_CElt (size_t) which is + * the pair (nbBits, value) in the format: + * format: + * - Bits [0, 4) = nbBits + * - Bits [4, 64 - nbBits) = 0 + * - Bits [64 - nbBits, 64) = value + * 2. The bitContainer is built from the upper bits and + * right shifted. E.g. to add a new value of N bits + * you right shift the bitContainer by N, then or in + * the new value into the N upper bits. + * 3. The bitstream has two bit containers. You can add + * bits to the second container and merge them into + * the first container. + */ + +#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) + +typedef struct { + size_t bitContainer[2]; + size_t bitPos[2]; + + BYTE* startPtr; + BYTE* ptr; + BYTE* endPtr; +} HUF_CStream_t; + +/*! HUF_initCStream(): + * Initializes the bitstream. + * @returns 0 or an error code. + */ +static size_t HUF_initCStream(HUF_CStream_t* bitC, + void* startPtr, size_t dstCapacity) { - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); + ZSTD_memset(bitC, 0, sizeof(*bitC)); + bitC->startPtr = (BYTE*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); + if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); + return 0; } -#define HUF_FLUSHBITS(s) BIT_flushBits(s) +/*! HUF_addBits(): + * Adds the symbol stored in HUF_CElt elt to the bitstream. + * + * @param elt The element we're adding. This is a (nbBits, value) pair. + * See the HUF_CStream_t docs for the format. + * @param idx Insert into the bitstream at this idx. + * @param kFast This is a template parameter. If the bitstream is guaranteed + * to have at least 4 unused bits after this call it may be 1, + * otherwise it must be 0. HUF_addBits() is faster when fast is set. + */ +FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) +{ + assert(idx <= 1); + assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); + /* This is efficient on x86-64 with BMI2 because shrx + * only reads the low 6 bits of the register. The compiler + * knows this and elides the mask. When fast is set, + * every operation can use the same value loaded from elt. + */ + bitC->bitContainer[idx] >>= HUF_getNbBits(elt); + bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); + /* We only read the low 8 bits of bitC->bitPos[idx] so it + * doesn't matter that the high bits have noise from the value. + */ + bitC->bitPos[idx] += HUF_getNbBitsFast(elt); + assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + /* The last 4-bits of elt are dirty if fast is set, + * so we must not be overwriting bits that have already been + * inserted into the bit container. + */ +#if DEBUGLEVEL >= 1 + { + size_t const nbBits = HUF_getNbBits(elt); + size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; + (void)dirtyBits; + /* Middle bits are 0. */ + assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); + /* We didn't overwrite any bits in the bit container. */ + assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + (void)dirtyBits; + } +#endif +} -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) +FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) +{ + bitC->bitContainer[1] = 0; + bitC->bitPos[1] = 0; +} + +/*! HUF_mergeIndex1() : + * Merges the bit container @ index 1 into the bit container @ index 0 + * and zeros the bit container @ index 1. + */ +FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) +{ + assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); + bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); + bitC->bitContainer[0] |= bitC->bitContainer[1]; + bitC->bitPos[0] += bitC->bitPos[1]; + assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); +} + +/*! HUF_flushBits() : +* Flushes the bits in the bit container @ index 0. +* +* @post bitPos will be < 8. +* @param kFast If kFast is set then we must know a-priori that +* the bit container will not overflow. +*/ +FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) +{ + /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ + size_t const nbBits = bitC->bitPos[0] & 0xFF; + size_t const nbBytes = nbBits >> 3; + /* The top nbBits bits of bitContainer are the ones we need. */ + size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); + /* Mask bitPos to account for the bytes we consumed. */ + bitC->bitPos[0] &= 7; + assert(nbBits > 0); + assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitContainer); + bitC->ptr += nbBytes; + assert(!kFast || bitC->ptr <= bitC->endPtr); + if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + /* bitContainer doesn't need to be modified because the leftover + * bits are already the top bitPos bits. And we don't care about + * noise in the lower values. + */ +} + +/*! HUF_endMark() + * @returns The Huffman stream end mark: A 1-bit value = 1. + */ +static HUF_CElt HUF_endMark(void) +{ + HUF_CElt endMark; + HUF_setNbBits(&endMark, 1); + HUF_setValue(&endMark, 1); + return endMark; +} + +/*! HUF_closeCStream() : + * @return Size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +static size_t HUF_closeCStream(HUF_CStream_t* bitC) +{ + HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); + HUF_flushBits(bitC, /* kFast */ 0); + { + size_t const nbBits = bitC->bitPos[0] & 0xFF; + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (nbBits > 0); + } +} + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) +{ + HUF_addBits(bitCPtr, CTable[symbol], idx, fast); +} + +FORCE_INLINE_TEMPLATE void +HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, + const BYTE* ip, size_t srcSize, + const HUF_CElt* ct, + int kUnroll, int kFastFlush, int kLastFast) +{ + /* Join to kUnroll */ + int n = (int)srcSize; + int rem = n % kUnroll; + if (rem > 0) { + for (; rem > 0; --rem) { + HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); + } + HUF_flushBits(bitC, kFastFlush); + } + assert(n % kUnroll == 0); + + /* Join to 2 * kUnroll */ + if (n % (2 * kUnroll)) { + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); + HUF_flushBits(bitC, kFastFlush); + n -= kUnroll; + } + assert(n % (2 * kUnroll) == 0); + + for (; n>0; n-= 2 * kUnroll) { + /* Encode kUnroll symbols into the bitstream @ index 0. */ + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); + HUF_flushBits(bitC, kFastFlush); + /* Encode kUnroll symbols into the bitstream @ index 1. + * This allows us to start filling the bit container + * without any data dependencies. + */ + HUF_zeroIndex1(bitC); + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); + /* Merge bitstream @ index 1 into the bitstream @ index 0 */ + HUF_mergeIndex1(bitC); + HUF_flushBits(bitC, kFastFlush); + } + assert(n == 0); + +} + +/* + * Returns a tight upper bound on the output space needed by Huffman + * with 8 bytes buffer to handle over-writes. If the output is at least + * this large we don't need to do bounds checks during Huffman encoding. + */ +static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) +{ + return ((srcSize * tableLog) >> 3) + 8; +} -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) FORCE_INLINE_TEMPLATE size_t HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { + U32 const tableLog = (U32)CTable[0]; + HUF_CElt const* ct = CTable + 1; const BYTE* ip = (const BYTE*) src; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - size_t n; - BIT_CStream_t bitC; + HUF_CStream_t bitC; /* init */ if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); if (HUF_isError(initErr)) return 0; } - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) - { - case 3: - HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - HUF_FLUSHBITS_2(&bitC); - ZSTD_FALLTHROUGH; - case 2: - HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - HUF_FLUSHBITS_1(&bitC); - ZSTD_FALLTHROUGH; - case 1: - HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - HUF_FLUSHBITS(&bitC); - ZSTD_FALLTHROUGH; - case 0: ZSTD_FALLTHROUGH; - default: break; + if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); + else { + if (MEM_32bits()) { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: ZSTD_FALLTHROUGH; + case 9: ZSTD_FALLTHROUGH; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 7: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } else { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 9: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 7: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 6: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } } + assert(bitC.ptr <= bitC.endPtr); - for (; n>0; n-=4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); + return HUF_closeCStream(&bitC); } #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) @@ -667,9 +1068,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); } +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); +} static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, @@ -689,8 +1094,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; } @@ -698,8 +1102,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; } @@ -707,8 +1110,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; } @@ -717,7 +1119,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); assert(ip <= iend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; + if (cSize == 0 || cSize > 65535) return 0; op += cSize; } @@ -726,7 +1128,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); } typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; @@ -750,35 +1157,38 @@ static size_t HUF_compressCTable_internal( typedef struct { unsigned count[HUF_SYMBOLVALUE_MAX + 1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; union { HUF_buildCTable_wksp_tables buildCTable_wksp; HUF_WriteCTableWksp writeCTable_wksp; + U32 hist_wksp[HIST_WKSP_SIZE_U32]; } wksps; } HUF_compress_tables_t; +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ + /* HUF_compress_internal() : * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ static size_t HUF_compress_internal (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, - void* workSpace_align4, size_t wkspSize, + void* workSpace, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2) + const int bmi2, unsigned suspectUncompressible) { - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); - assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); /* checks & inits */ - if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); if (!srcSize) return 0; /* Uncompressed */ if (!dstSize) return 0; /* cannot fit anything within dst budget */ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ @@ -794,8 +1204,23 @@ HUF_compress_internal (void* dst, size_t dstSize, nbStreams, oldHufTable, bmi2); } + /* If uncompressible data is suspected, do a smaller sampling first */ + DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); + if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { + size_t largestTotal = 0; + { unsigned maxSymbolValueBegin = maxSymbolValue; + CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestBegin; + } + { unsigned maxSymbolValueEnd = maxSymbolValue; + CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestEnd; + } + if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ } @@ -820,9 +1245,12 @@ HUF_compress_internal (void* dst, size_t dstSize, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; - /* Zero unused symbols in CTable, so we can check it for validity */ - ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, - sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + /* Zero unused symbols in CTable, so we can check it for validity */ + { + size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); + size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); + ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); } /* Write table description header */ @@ -859,19 +1287,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); + NULL, NULL, 0, 0 /*bmi2*/, 0); } size_t HUF_compress1X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, + int bmi2, unsigned suspectUncompressible) { return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2); + repeat, preferRepeat, bmi2, suspectUncompressible); } /* HUF_compress4X_repeat(): @@ -885,21 +1314,22 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize, return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); + NULL, NULL, 0, 0 /*bmi2*/, 0); } /* HUF_compress4X_repeat(): * compress input using 4 streams. + * consider skipping quickly * re-use an existing huffman compression table */ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) { return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2); + hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); } diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c index 73fff4c60149..f620cafca633 100644 --- a/lib/zstd/compress/zstd_compress.c +++ b/lib/zstd/compress/zstd_compress.c @@ -12,7 +12,6 @@ * Dependencies ***************************************/ #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ -#include "../common/cpu.h" #include "../common/mem.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ @@ -39,6 +38,18 @@ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. */ +/*! + * ZSTD_HASHLOG3_MAX : + * Maximum size of the hash table dedicated to find 3-bytes matches, + * in log format, aka 17 => 1 << 17 == 128Ki positions. + * This structure is only used in zstd_opt. + * Since allocation is centralized for all strategies, it has to be known here. + * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, + * so that zstd_opt.c doesn't need to know about this constant. + */ +#ifndef ZSTD_HASHLOG3_MAX +# define ZSTD_HASHLOG3_MAX 17 +#endif /*-************************************* * Helper functions @@ -69,6 +80,10 @@ struct ZSTD_CDict_s { ZSTD_customMem customMem; U32 dictID; int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -81,7 +96,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) assert(cctx != NULL); ZSTD_memset(cctx, 0, sizeof(*cctx)); cctx->customMem = memManager; - cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + cctx->bmi2 = ZSTD_cpuSupportsBmi2(); { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); assert(!ZSTD_isError(err)); (void)err; @@ -192,12 +207,64 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { + assert(mode != ZSTD_ps_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); +} + +/* Returns row matchfinder usage given an initial mode and cParams */ +static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { +#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_ps_disable; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_ps_enable; + } else { + if (cParams->windowLog > 17) mode = ZSTD_ps_enable; + } + return mode; +} + +/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ +static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_ps_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + /* Returns 1 if compression parameters are such that we should * enable long distance matching (wlog >= 27, strategy >= btopt). * Returns 0 otherwise. */ -static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { - return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( @@ -208,15 +275,15 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); cctxParams.cParams = cParams; - if (ZSTD_CParams_shouldEnableLdm(&cParams)) { - DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); - cctxParams.ldmParams.enableLdm = 1; - /* LDM is enabled by default for optimal parser and window size >= 128MB */ + /* Adjust advanced params according to cParams */ + cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams); + if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); assert(cctxParams.ldmParams.hashRateLog < 32); } - + cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -275,6 +342,11 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par * But, set it for tracing anyway. */ cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); + cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", + cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); } size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) @@ -431,9 +503,9 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_literalCompressionMode: - ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); - bounds.lowerBound = ZSTD_lcm_auto; - bounds.upperBound = ZSTD_lcm_uncompressed; + ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; return bounds; case ZSTD_c_targetCBlockSize: @@ -462,6 +534,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_useBlockSplitter: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_deterministicRefPrefix: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -523,6 +610,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: default: return 0; } @@ -575,6 +665,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -672,7 +765,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, } case ZSTD_c_literalCompressionMode : { - const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); CCtxParams->literalCompressionMode = lcm; return CCtxParams->literalCompressionMode; @@ -699,7 +792,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->enableDedicatedDictSearch; case ZSTD_c_enableLongDistanceMatching : - CCtxParams->ldmParams.enableLdm = (value!=0); + CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; return CCtxParams->ldmParams.enableLdm; case ZSTD_c_ldmHashLog : @@ -758,6 +851,21 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->validateSequences = value; return CCtxParams->validateSequences; + case ZSTD_c_useBlockSplitter: + BOUNDCHECK(ZSTD_c_useBlockSplitter, value); + CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; + return CCtxParams->useBlockSplitter; + + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; + return CCtxParams->useRowMatchFinder; + + case ZSTD_c_deterministicRefPrefix: + BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); + CCtxParams->deterministicRefPrefix = !!value; + return CCtxParams->deterministicRefPrefix; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -863,6 +971,15 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_validateSequences : *value = (int)CCtxParams->validateSequences; break; + case ZSTD_c_useBlockSplitter : + *value = (int)CCtxParams->useBlockSplitter; + break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; + case ZSTD_c_deterministicRefPrefix: + *value = (int)CCtxParams->deterministicRefPrefix; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -889,7 +1006,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( return 0; } -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, @@ -969,14 +1086,14 @@ size_t ZSTD_CCtx_loadDictionary_advanced( return 0; } -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( +size_t ZSTD_CCtx_loadDictionary_byReference( ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { return ZSTD_CCtx_loadDictionary_advanced( cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); } -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { return ZSTD_CCtx_loadDictionary_advanced( cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); @@ -1146,7 +1263,7 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, break; case ZSTD_cpm_createCDict: /* Assume a small source size when creating a dictionary - * with an unkown source size. + * with an unknown source size. */ if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) srcSize = minSrcSize; @@ -1220,7 +1337,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( srcSizeHint = CCtxParams->srcSizeHint; } cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); - if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); assert(!ZSTD_checkCParams(cParams)); /* srcSizeHint == 0 means 0 */ @@ -1229,9 +1346,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( static size_t ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, const U32 forCCtx) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; @@ -1241,43 +1363,53 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + hSize * sizeof(U32) + h3Size * sizeof(U32); size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) + : 0; size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) ? optPotentialSpace : 0; + size_t const slackSpace = ZSTD_cwksp_slack_space_required(); + + /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ + ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_ps_auto); + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, const int isStatic, + const ZSTD_paramSwitch_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize) { - size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (cParams->minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); - size_t const ldmSeqSpace = ldmParams->enableLdm ? - ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? + ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) @@ -1303,19 +1435,32 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); /* estimateCCtxSize is for one-shot compression. So no buffers should * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) @@ -1355,17 +1500,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, ZSTD_CONTENTSIZE_UNKNOWN); } } size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) @@ -1480,20 +1637,27 @@ typedef enum { ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, const ZSTD_compResetPolicy_e crp, const ZSTD_indexResetPolicy_e forceResetIndex, const ZSTD_resetTarget_e forWho) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_ps_auto); if (forceResetIndex == ZSTDirp_reset) { ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); @@ -1532,11 +1696,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + { /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize*sizeof(U16); + ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); + assert(cParams->hashLog >= rowLog); + ms->rowHashLog = cParams->hashLog - rowLog; + } + } + ms->cParams = *cParams; RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, "failed a workspace allocation in ZSTD_reset_matchState"); - return 0; } @@ -1553,61 +1729,87 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); } +/* ZSTD_dictTooBig(): + * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in + * one go generically. So we ensure that in that case we reset the tables to zero, + * so that we can load as much of the dictionary as possible. + */ +static int ZSTD_dictTooBig(size_t const loadedDictSize) +{ + return loadedDictSize > ZSTD_CHUNKSIZE_MAX; +} + /*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ + * @param loadedDictSize The size of the dictionary to be loaded + * into the context, if any. If no dictionary is used, or the + * dictionary is being attached / copied, then pass 0. + * note : `params` are assumed fully validated at this stage. + */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, + ZSTD_CCtx_params const* params, U64 const pledgedSrcSize, + size_t const loadedDictSize, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zc->isFirstBlock = 1; - if (params.ldmParams.enableLdm) { + /* Set applied params early so we can modify them for LDM, + * and point params at the applied params. + */ + zc->appliedParams = *params; + params = &zc->appliedParams; + + assert(params->useRowMatchFinder != ZSTD_ps_auto); + assert(params->useBlockSplitter != ZSTD_ps_auto); + assert(params->ldmParams.enableLdm != ZSTD_ps_auto); + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); + ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); + assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); + assert(params->ldmParams.hashRateLog < 32); } - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; - size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) ? windowSize + blockSize : 0; - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); ZSTD_indexResetPolicy_e needsIndexReset = - (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, buffInSize, buffOutSize, pledgedSrcSize); + int resizeWorkspace; + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - /* Check if workspace is large enough, alloc a new one if needed */ - { + { /* Check if workspace is large enough, alloc a new one if needed */ int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - + resizeWorkspace = workspaceTooSmall || workspaceWasteful; DEBUGLOG(4, "Need %zu B workspace", neededSpace); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - if (workspaceTooSmall || workspaceWasteful) { + if (resizeWorkspace) { DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", ZSTD_cwksp_sizeof(ws) >> 10, neededSpace >> 10); @@ -1629,14 +1831,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); } } ZSTD_cwksp_clear(ws); /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; + zc->blockState.matchState.cParams = params->cParams; zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; zc->consumedSrcSize = 0; zc->producedCSize = 0; @@ -1667,11 +1868,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* TODO: avoid memset? */ size_t const numBuckets = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); + ((size_t)1) << (params->ldmParams.hashLog - + params->ldmParams.bucketSizeLog); zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); } @@ -1687,32 +1888,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, FORWARD_IF_ERROR(ZSTD_reset_matchState( &zc->blockState.matchState, ws, - ¶ms.cParams, + ¶ms->cParams, + params->useRowMatchFinder, crp, needsIndexReset, ZSTD_resetTarget_CCtx), ""); /* ldm hash table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); zc->ldmState.loadedDictEnd = 0; } - /* Due to alignment, when reusing a workspace, we can actually consume - * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h - */ - assert(ZSTD_cwksp_used(ws) >= neededSpace && - ZSTD_cwksp_used(ws) <= neededSpace + 3); - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); + zc->initialized = 1; return 0; @@ -1768,6 +1965,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; @@ -1783,7 +1982,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, cdict->dictContentSize, ZSTD_cpm_attachDict); params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_makeClean, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); } @@ -1827,15 +2028,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; assert(!cdict->matchState.dedicatedDictSearch); - - DEBUGLOG(4, "copying dictionary into context"); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); @@ -1843,17 +2046,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_ps_auto); /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; size_t const hSize = (size_t)1 << cdict_cParams->hashLog; ZSTD_memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, hSize * sizeof(U32)); - ZSTD_memcpy(cctx->blockState.matchState.chainTable, + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_memcpy(cctx->blockState.matchState.chainTable, cdict->matchState.chainTable, chainSize * sizeof(U32)); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize*sizeof(U16); + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + } } /* Zero the hashTable3, since the cdict never fills it */ @@ -1917,16 +2133,22 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, "Can't copy a ctx that's not in init stage."); - + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; + params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; + params.ldmParams = srcCCtx->appliedParams.ldmParams; params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); @@ -1938,7 +2160,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; int const h3log = srcCCtx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; @@ -2005,6 +2231,8 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa int const nbRows = (int)size / ZSTD_ROWSIZE; int cellNb = 0; int rowNb; + /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ + U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ assert(size < (1U<<31)); /* can be casted to int */ @@ -2012,12 +2240,17 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa for (rowNb=0 ; rowNb < nbRows ; rowNb++) { int column; for (column=0; columnhashTable, hSize, reducerValue); } - if (params->cParams.strategy != ZSTD_fast) { + if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { U32 const chainSize = (U32)1 << params->cParams.chainLog; if (params->cParams.strategy == ZSTD_btlazy2) ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); @@ -2072,14 +2305,14 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) assert(nbSeq <= seqStorePtr->maxNbSeq); for (u=0; ulongLengthID==1) + if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) + if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } @@ -2093,10 +2326,161 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } -/* ZSTD_entropyCompressSequences_internal(): - * actually compresses both literals and sequences */ +/* ZSTD_blockSplitterEnabled(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * At the time this function is called, the parameter must be finalized. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); + assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); + return (cctxParams->useBlockSplitter == ZSTD_ps_enable); +} + +/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types + * and size of the sequences statistics + */ +typedef struct { + U32 LLtype; + U32 Offtype; + U32 MLtype; + size_t size; + size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_symbolEncodingTypeStats_t; + +/* ZSTD_buildSequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. + * Modifies `nextEntropy` to have the appropriate values as a side effect. + * nbSeq must be greater than 0. + * + * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) { + BYTE* const ostart = dst; + const BYTE* const oend = dstEnd; + BYTE* op = ostart; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + ZSTD_symbolEncodingTypeStats_t stats; + + stats.lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + assert(op <= oend); + assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, + countWorkspace, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); + stats.size = countSize; + return stats; + } + if (stats.LLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, + countWorkspace, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); + stats.size = countSize; + return stats; + } + if (stats.Offtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, + countWorkspace, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); + stats.size = countSize; + return stats; + } + if (stats.MLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + stats.size = (size_t)(op-ostart); + return stats; +} + +/* ZSTD_entropyCompressSeqStore_internal(): + * compresses both literals and sequences + * Returns compressed size of block, or a zstd error. + */ +#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 MEM_STATIC size_t -ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2110,36 +2494,38 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; + size_t lastCountSize; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; + size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; + size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; + /* Base suspicion of uncompressibility on ratio of literals to sequences */ + unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); size_t const litSize = (size_t)(seqStorePtr->lit - literals); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), + ZSTD_literalsCompressionIsDisabled(cctxParams), op, dstCapacity, literals, litSize, entropyWorkspace, entropyWkspSize, - bmi2); + bmi2, suspectUncompressible); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; @@ -2165,95 +2551,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); return (size_t)(op - ostart); } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + { + ZSTD_symbolEncodingTypeStats_t stats; + BYTE* seqHead = op++; + /* build stats for sequences */ + stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, count, + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); + lastCountSize = stats.lastCountSize; + op += stats.size; + } { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), @@ -2273,9 +2584,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, * In this exceedingly rare case, we will simply emit an uncompressed * block, since it isn't worth optimizing. */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0; @@ -2287,7 +2598,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t -ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2296,7 +2607,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { - size_t const cSize = ZSTD_entropyCompressSequences_internal( + size_t const cSize = ZSTD_entropyCompressSeqStore_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, entropyWorkspace, entropyWkspSize, bmi2); @@ -2306,20 +2617,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); if (cSize >= maxCSize) return 0; /* block not compressed */ } - DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); return cSize; } /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) { static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -2367,7 +2678,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { ZSTD_compressBlock_greedy_row, + ZSTD_compressBlock_lazy_row, + ZSTD_compressBlock_lazy2_row }, + { ZSTD_compressBlock_greedy_extDict_row, + ZSTD_compressBlock_lazy_extDict_row, + ZSTD_compressBlock_lazy2_extDict_row }, + { ZSTD_compressBlock_greedy_dictMatchState_row, + ZSTD_compressBlock_lazy_dictMatchState_row, + ZSTD_compressBlock_lazy2_dictMatchState_row }, + { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_ps_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } assert(selectedCompressor != NULL); return selectedCompressor; } @@ -2383,7 +2715,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) { ssPtr->lit = ssPtr->litStart; ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; + ssPtr->longLengthType = ZSTD_llt_none; } typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; @@ -2430,15 +2762,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(!zc->appliedParams.ldmParams.enableLdm); + assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&zc->externSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm) { + } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; @@ -2452,10 +2785,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&ldmSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -2483,22 +2819,22 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); for (i = 0; i < seqStoreSeqSize; ++i) { - U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; outSeqs[i].litLength = seqStoreSeqs[i].litLength; - outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; outSeqs[i].rep = 0; if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { + } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { outSeqs[i].matchLength += 0x10000; } } - if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { /* Derive the correct offset corresponding to a repcode */ - outSeqs[i].rep = seqStoreSeqs[i].offset; + outSeqs[i].rep = seqStoreSeqs[i].offBase; if (outSeqs[i].litLength != 0) { rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; } else { @@ -2512,9 +2848,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) outSeqs[i].offset = rawOffset; /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode so we provide seqStoreSeqs[i].offset - 1 */ - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, - seqStoreSeqs[i].offset - 1, - seqStoreSeqs[i].litLength == 0); + ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offBase - 1, + seqStoreSeqs[i].litLength == 0); literalsRead += outSeqs[i].litLength; } /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. @@ -2602,16 +2938,740 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore) return nbSeqs < 4 && nbLits < 10; } -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) { - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; + ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; + bs->prevCBlock = bs->nextCBlock; + bs->nextCBlock = tmp; } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) +/* Writes the block header */ +static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); +} + +/* ZSTD_buildBlockEntropyStats_literals() : + * Builds entropy for the literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * Requires ENTROPY_WORKSPACE_SIZE workspace + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int literalsCompressionIsDisabled, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (literalsCompressionIsDisabled) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +#ifndef COMPRESS_LITERALS_SIZE_MIN +#define COMPRESS_LITERALS_SIZE_MIN 63 +#endif + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + + +/* ZSTD_buildDummySequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, + * and updates nextEntropy to the appropriate repeatMode. + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; + nextEntropy->litlength_repeatMode = FSE_repeat_none; + nextEntropy->offcode_repeatMode = FSE_repeat_none; + nextEntropy->matchlength_repeatMode = FSE_repeat_none; + return stats; +} + +/* ZSTD_buildBlockEntropyStats_sequences() : + * Builds entropy for the sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * Requires ENTROPY_WORKSPACE_SIZE wksp. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + unsigned* countWorkspace = (unsigned*)workspace; + unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); + size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); + ZSTD_symbolEncodingTypeStats_t stats; + + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); + stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + prevEntropy, nextEntropy, op, oend, + strategy, countWorkspace, + entropyWorkspace, entropyWorkspaceSize) + : ZSTD_buildDummySequencesStatistics(nextEntropy); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; + fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; + fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; + fseMetadata->lastCountSize = stats.lastCountSize; + return stats.size; +} + + +/* ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * Requires workspace size ENTROPY_WORKSPACE_SIZE + * + * @return : 0 on success or error code + */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_literalsCompressionIsDisabled(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); + return 0; +} + +/* Returns the size estimate for the literals section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ +static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U8* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + (void)defaultMax; + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + return nbSeq * 10; + } + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits >> 3; +} + +/* Returns the size estimate for the sequences section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +/* Returns the size estimate for a given stream of literals, of, ll, ml */ +static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + +/* Builds entropy statistics and uses them for blocksize estimation. + * + * Returns the estimated compressed size of the seqStore, or a zstd error. + */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { + ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; + DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); +} + +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { + size_t literalsBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { + size_t matchBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.mlBase + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. + */ +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, + const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) { + BYTE* const litEnd = originalSeqStore->lit; + size_t literalsBytes; + size_t literalsBytesPreceding = 0; + + *resultSeqStore = *originalSeqStore; + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + /* Move longLengthPos into the correct position if necessary */ + if (originalSeqStore->longLengthType != ZSTD_llt_none) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthType = ZSTD_llt_none; + } else { + resultSeqStore->longLengthPos -= (U32)startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->litStart += literalsBytesPreceding; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ + resultSeqStore->lit = litEnd; + } else { + resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + +/* + * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. + * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). + */ +static U32 +ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) +{ + U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ + assert(STORED_IS_REPCODE(offCode)); + if (adjustedOffCode == ZSTD_REP_NUM) { + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ + assert(rep[0] > 0); + return rep[0] - 1; + } + return rep[adjustedOffCode]; +} + +/* + * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise + * due to emission of RLE/raw blocks that disturb the offset history, + * and replaces any repcodes within the seqStore that may be invalid. + * + * dRepcodes are updated as would be on the decompression side. + * cRepcodes are updated exactly in accordance with the seqStore. + * + * Note : this function assumes seq->offBase respects the following numbering scheme : + * 0 : invalid + * 1-3 : repcode 1-3 + * 4+ : real_offset+3 + */ +static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + seqStore_t* const seqStore, U32 const nbSeq) { + U32 idx = 0; + for (; idx < nbSeq; ++idx) { + seqDef* const seq = seqStore->sequencesStart + idx; + U32 const ll0 = (seq->litLength == 0); + U32 const offCode = OFFBASE_TO_STORED(seq->offBase); + assert(seq->offBase > 0); + if (STORED_IS_REPCODE(offCode)) { + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); + /* Adjust simulated decompression repcode history if we come across a mismatch. Replace + * the repcode with the offset it actually references, determined by the compression + * repcode history. + */ + if (dRawOffset != cRawOffset) { + seq->offBase = cRawOffset + ZSTD_REP_NUM; + } + } + /* Compression repcode history is always updated with values directly from the unmodified seqStore. + * Decompression repcode history may use modified seq->offset value taken from compression repcode history. + */ + ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); + ZSTD_updateRep(cRepcodes->rep, offCode, ll0); + } +} + +/* ZSTD_compressSeqStore_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the total size of that block (including header) or a ZSTD error code. + */ +static size_t +ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) +{ + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize; + + /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ + repcodes_t const dRepOriginal = *dRep; + DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); + if (isPartition) + ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit"); + cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +/* Struct to keep track of where we are in our recursive calls. */ +typedef struct { + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 + +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then + * we do not recurse. + * + * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. + */ +static void +ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + ZSTD_CCtx* zc, const seqStore_t* origSeqStore) +{ + seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; + seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; + seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; + + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { + DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); + return; + } + DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); + DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", + estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = (U32)midIdx; + splits->idx++; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); + } +} + +/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. + * + * Returns the number of splits made (which equals the size of the partition table - 1). + */ +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { + seqStoreSplits splits = {partitions, 0}; + if (nbSeq <= 4) { + DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); + splits.splitLocations[splits.idx] = nbSeq; + DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); + return splits.idx; +} + +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple blocks to improve compression ratio. + * + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. + */ +static size_t +ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) +{ + size_t cSize = 0; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + size_t i = 0; + size_t srcBytesTotal = 0; + U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ + seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; + seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; + size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + + /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history + * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two + * separate repcode histories that simulate repcode history on compression and decompression side, + * and use the histories to determine whether we must replace a particular repcode with its raw offset. + * + * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed + * or RLE. This allows us to retrieve the offset value that an invalid repcode references within + * a nocompress/RLE block. + * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use + * the replacement offset value rather than the original repcode to update the repcode history. + * dRep also will be the final repcode history sent to the next block. + * + * See ZSTD_seqStore_resolveOffCodes() for more details. + */ + repcodes_t dRep; + repcodes_t cRep; + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); + + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + if (numSplits == 0) { + size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); + assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + return cSizeSingleBlock; + } + + ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); + for (i = 0; i <= numSplits; ++i) { + size_t srcBytes; + size_t cSizeChunk; + U32 const lastPartition = (i == numSplits); + U32 lastBlockEntireSrc = 0; + + srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); + srcBytesTotal += srcBytes; + if (lastPartition) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; + lastBlockEntireSrc = lastBlock; + } else { + ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); + } + + cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore, + &dRep, &cRep, + op, dstCapacity, + ip, srcBytes, + lastBlockEntireSrc, 1 /* isPartition */); + DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + *currSeqStore = *nextSeqStore; + assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + } + /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes + * for the next block. + */ + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); + return cSize; +} + +static size_t +ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) +{ + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); + return cSize; + } + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + } + + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); + return cSize; +} + +static size_t +ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) { /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold @@ -2632,12 +3692,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (zc->seqCollector.collectSequences) { ZSTD_copyBlockSequences(zc); - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return 0; } /* encode sequences and literals */ - cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, dst, dstCapacity, @@ -2645,12 +3705,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - return 0; - } - - if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -2666,7 +3720,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2719,7 +3773,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return cSize; } } @@ -2759,9 +3813,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, void const* ip, void const* iend) { - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const maxDist = (U32)1 << params->cParams.windowLog; + if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); @@ -2784,7 +3838,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, * Frame is supposed already started (header already produced) * @return : compressed size, or an error code */ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastFrameChunk) @@ -2814,6 +3868,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, ZSTD_overflowCorrectIfNeeded( ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; @@ -2824,6 +3879,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); assert(cSize > 0); assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); } else { cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, @@ -2946,7 +4005,7 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe { RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, parameter_unsupported, "incompatible with ldm"); cctx->externSeqStore.seq = seq; @@ -2983,11 +4042,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - if (!ZSTD_window_update(&ms->window, src, srcSize)) { + if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { + ms->forceNonContiguous = 0; ms->nextToUpdate = ms->window.dictLimit; } - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); } if (!frame) { @@ -3055,63 +4115,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; - - ZSTD_window_update(&ms->window, src, srcSize); - ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); - - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); - } + int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; /* Assert that we the ms params match the params we're being given */ ZSTD_assertEqualCParams(params->cParams, ms->cParams); + if (srcSize > ZSTD_CHUNKSIZE_MAX) { + /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. + * Dictionaries right at the edge will immediately trigger overflow + * correction, but I don't want to insert extra constraints here. + */ + U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; + /* We must have cleared our windows when our source is this large. */ + assert(ZSTD_window_isEmpty(ms->window)); + if (loadLdmDict) + assert(ZSTD_window_isEmpty(ls->window)); + /* If the dictionary is too large, only load the suffix of the dictionary. */ + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); + ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + ms->forceNonContiguous = params->deterministicRefPrefix; + + if (loadLdmDict) { + ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + if (srcSize <= HASH_READ_SIZE) return 0; - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + if (loadLdmDict) + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, dtlm); + break; - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { - assert(chunk == remaining); /* must load everything in one go */ - ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); - } else if (chunk >= HASH_READ_SIZE) { - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_ps_auto); + if (params->useRowMatchFinder == ZSTD_ps_enable) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; - - default: - assert(0); /* not possible : not a valid strategy id */ } + break; - ip = ichunk; + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ } ms->nextToUpdate = (U32)(iend - ms->window.base); @@ -3250,7 +4333,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, const BYTE* const dictEnd = dictPtr + dictSize; size_t dictID; size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); @@ -3321,6 +4403,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); @@ -3335,7 +4418,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + dictContentSize, ZSTDcrp_makeClean, zbuff) , ""); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( @@ -3350,7 +4434,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; - cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + cctx->dictContentSize = dictContentSize; } return 0; } @@ -3485,15 +4569,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params) { - ZSTD_CCtx_params cctxParams; DEBUGLOG(4, "ZSTD_compress_advanced"); FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - &cctxParams); + &cctx->simpleApiParams); } /* Internal */ @@ -3517,14 +4600,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_CCtx_params cctxParams; { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); assert(params.fParams.contentSizeFlag == 1); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); } DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); } size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, @@ -3561,7 +4643,10 @@ size_t ZSTD_estimateCDictSize_advanced( DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } @@ -3592,9 +4677,6 @@ static size_t ZSTD_initCDict_internal( assert(!ZSTD_checkCParams(params.cParams)); cdict->matchState.cParams = params.cParams; cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; - if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { - cdict->matchState.dedicatedDictSearch = 0; - } if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { @@ -3615,6 +4697,7 @@ static size_t ZSTD_initCDict_internal( &cdict->matchState, &cdict->workspace, ¶ms.cParams, + params.useRowMatchFinder, ZSTDcrp_makeClean, ZSTDirp_reset, ZSTD_resetTarget_CDict), ""); @@ -3638,14 +4721,17 @@ static size_t ZSTD_initCDict_internal( static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) + ZSTD_compressionParameters cParams, + ZSTD_paramSwitch_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { size_t const workspaceSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); @@ -3664,7 +4750,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ - + cdict->useRowMatchFinder = useRowMatchFinder; return cdict; } } @@ -3686,7 +4772,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, &cctxParams, customMem); } -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( +ZSTD_CDict* ZSTD_createCDict_advanced2( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, @@ -3716,10 +4802,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); } + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, customMem); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, @@ -3788,7 +4877,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams) { - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) @@ -3813,6 +4904,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_CCtxParams_init(¶ms, 0); params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, @@ -3839,15 +4932,15 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) return cdict->dictID; } - -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( +/* ZSTD_compressBegin_usingCDict_internal() : + * Implementation of various ZSTD_compressBegin_usingCDict* functions. + */ +static size_t ZSTD_compressBegin_usingCDict_internal( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { ZSTD_CCtx_params cctxParams; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); /* Initialize the cctxParams from the cdict */ { @@ -3879,23 +4972,46 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTDb_not_buffered); } + +/* ZSTD_compressBegin_usingCDict_advanced() : + * This function is DEPRECATED. + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); +} + /* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ + * cdict must be != NULL */ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); } +/*! ZSTD_compress_usingCDict_internal(): + * Implementation of various ZSTD_compress_usingCDict* functions. + */ +static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict_advanced(): + * This function is DEPRECATED. + */ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); } /*! ZSTD_compress_usingCDict() : @@ -3909,7 +5025,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); } @@ -4313,8 +5429,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - if (cctx->cdict) - params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + if (cctx->cdict && !cctx->localDict.cdict) { + /* Let the cdict's compression level take priority over the requested params. + * But do not take the cdict's compression level if the "cdict" is actually a localDict + * generated from ZSTD_initLocalDict(). + */ + params.compressionLevel = cctx->cdict->compressionLevel; + } DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ { @@ -4327,11 +5448,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, dictSize, mode); } - if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { - /* Enable LDM by default for optimal parser and window size >= 128MB */ - DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); - params.ldmParams.enableLdm = 1; - } + params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); + params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); @@ -4436,39 +5555,39 @@ typedef struct { size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; -/* Returns a ZSTD error code if sequence is not valid */ -static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, - size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { - size_t offsetBound; - U32 windowSize = 1 << windowLog; +/* ZSTD_validateSequence() : + * @offCode : is presumed to follow format required by ZSTD_storeSeq() + * @returns a ZSTD error code if sequence is not valid + */ +static size_t +ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize) +{ + U32 const windowSize = 1 << windowLog; /* posInSrc represents the amount of data the decoder would decode up to this point. * As long as the amount of data decoded is less than or equal to window size, offsets may be * larger than the total length of output decoded in order to reference the dict, even larger than * window size. After output surpasses windowSize, we're limited to windowSize offsets again. */ - offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; - RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); - RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); return 0; } /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ -static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { - U32 offCode = rawOffset + ZSTD_REP_MOVE; - U32 repCode = 0; +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) +{ + U32 offCode = STORE_OFFSET(rawOffset); if (!ll0 && rawOffset == rep[0]) { - repCode = 1; + offCode = STORE_REPCODE_1; } else if (rawOffset == rep[1]) { - repCode = 2 - ll0; + offCode = STORE_REPCODE(2 - ll0); } else if (rawOffset == rep[2]) { - repCode = 3 - ll0; + offCode = STORE_REPCODE(3 - ll0); } else if (ll0 && rawOffset == rep[0] - 1) { - repCode = 3; - } - if (repCode) { - /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ - offCode = repCode - 1; + offCode = STORE_REPCODE_3; } return offCode; } @@ -4476,18 +5595,17 @@ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. */ -static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize) { +static size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) +{ U32 idx = seqPos->idx; BYTE const* ip = (BYTE const*)(src); const BYTE* const iend = ip + blockSize; repcodes_t updatedRepcodes; U32 dictSize; - U32 litLength; - U32 matchLength; - U32 ll0; - U32 offCode; if (cctx->cdict) { dictSize = (U32)cctx->cdict->dictContentSize; @@ -4498,23 +5616,22 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS } ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { - litLength = inSeqs[idx].litLength; - matchLength = inSeqs[idx].matchLength; - ll0 = litLength == 0; - offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + U32 const litLength = inSeqs[idx].litLength; + U32 const ll0 = (litLength == 0); + U32 const matchLength = inSeqs[idx].matchLength; + U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - cctx->appliedParams.cParams.minMatch), + cctx->appliedParams.cParams.windowLog, dictSize), "Sequence validation failed"); } RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); @@ -4541,9 +5658,11 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS * avoid splitting a match, or to avoid splitting a match such that it would produce a match * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. */ -static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize) { +static size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) +{ U32 idx = seqPos->idx; U32 startPosInSequence = seqPos->posInSequence; U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; @@ -4553,10 +5672,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq repcodes_t updatedRepcodes; U32 bytesAdjustment = 0; U32 finalMatchSplit = 0; - U32 litLength; - U32 matchLength; - U32 rawOffset; - U32 offCode; if (cctx->cdict) { dictSize = cctx->cdict->dictContentSize; @@ -4570,9 +5685,10 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { const ZSTD_Sequence currSeq = inSeqs[idx]; - litLength = currSeq.litLength; - matchLength = currSeq.matchLength; - rawOffset = currSeq.offset; + U32 litLength = currSeq.litLength; + U32 matchLength = currSeq.matchLength; + U32 const rawOffset = currSeq.offset; + U32 offCode; /* Modify the sequence depending on where endPosInSequence lies */ if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { @@ -4625,22 +5741,21 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq } } /* Check if this offset can be represented with a repcode */ - { U32 ll0 = (litLength == 0); + { U32 const ll0 = (litLength == 0); offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); } if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - cctx->appliedParams.cParams.minMatch), + cctx->appliedParams.cParams.windowLog, dictSize), "Sequence validation failed"); } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); @@ -4665,7 +5780,8 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize); -static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) +{ ZSTD_sequenceCopier sequenceCopier = NULL; assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); if (mode == ZSTD_sf_explicitBlockDelimiters) { @@ -4679,12 +5795,15 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) /* Compress, block-by-block, all of the sequences given. * - * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + * Returns the cumulative size of all compressed blocks (including their headers), + * otherwise a ZSTD error. */ -static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) { +static size_t +ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) +{ size_t cSize = 0; U32 lastBlock; size_t blockSize; @@ -4694,7 +5813,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, BYTE const* ip = (BYTE const*)src; BYTE* op = (BYTE*)dst; - ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); /* Special case: empty frame */ @@ -4732,7 +5851,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, continue; } - compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, &cctx->appliedParams, op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, @@ -4764,7 +5883,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, } else { U32 cBlockHeader; /* Error checking and repcodes update */ - ZSTD_confirmRepcodesAndEntropyTables(cctx); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; @@ -4794,7 +5913,8 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) { + const void* src, size_t srcSize) +{ BYTE* op = (BYTE*)dst; size_t cSize = 0; size_t compressedBlocksSize = 0; @@ -4861,117 +5981,11 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) /*-===== Pre-defined compression levels =====-*/ +#include "clevels.h" -#define ZSTD_MAX_CLEVEL 22 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -}; +int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) { @@ -4999,7 +6013,7 @@ static int ZSTD_dedicatedDictSearch_isSupported( { return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2) - && (cParams->hashLog >= cParams->chainLog) + && (cParams->hashLog > cParams->chainLog) && (cParams->chainLog <= 24); } @@ -5018,6 +6032,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams( case ZSTD_lazy: case ZSTD_lazy2: cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + if (cParams->hashLog < ZSTD_HASHLOG_MIN) { + cParams->hashLog = ZSTD_HASHLOG_MIN; + } break; case ZSTD_btlazy2: case ZSTD_btopt: @@ -5066,6 +6083,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, else row = compressionLevel; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); /* acceleration factor */ if (compressionLevel < 0) { int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h index 685d2f996cc2..71697a11ae30 100644 --- a/lib/zstd/compress/zstd_compress_internal.h +++ b/lib/zstd/compress/zstd_compress_internal.h @@ -57,7 +57,7 @@ typedef struct { } ZSTD_localDict; typedef struct { - HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; HUF_repeat repeatMode; } ZSTD_hufCTables_t; @@ -75,8 +75,55 @@ typedef struct { ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; +/* ********************************************* +* Entropy buffer statistics structs and funcs * +***********************************************/ +/* ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ typedef struct { - U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/* ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/* ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * @return : 0 on success or error code */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/* ******************************* +* Compression internals structs * +*********************************/ + +typedef struct { + U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ U32 len; /* Raw length of match */ } ZSTD_match_t; @@ -126,7 +173,7 @@ typedef struct { U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; } optState_t; typedef struct { @@ -135,14 +182,23 @@ typedef struct { } ZSTD_compressedBlockState_t; typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ + U32 nbOverflowCorrections; /* Number of times overflow correction has run since + * ZSTD_window_init(). Useful for debugging coredumps + * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. + */ } ZSTD_window_t; +#define ZSTD_WINDOW_START_INDEX 2 + typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. @@ -154,9 +210,17 @@ struct ZSTD_matchState_t { */ U32 nextToUpdate; /* index from which to continue table update */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U32* hashTable; U32* hashTable3; U32* chainTable; + + U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + int dedicatedDictSearch; /* Indicates whether this matchState is using the * dedicated dictionary search structure. */ @@ -196,7 +260,7 @@ typedef struct { } ldmState_t; typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ + ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ U32 hashLog; /* Log size of hashTable */ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ @@ -227,7 +291,7 @@ struct ZSTD_CCtx_params_s { * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; /* Multithreading: used to pass parameters to mtctx */ int nbWorkers; @@ -249,6 +313,15 @@ struct ZSTD_CCtx_params_s { ZSTD_sequenceFormat_e blockDelimiters; int validateSequences; + /* Block splitting */ + ZSTD_paramSwitch_e useBlockSplitter; + + /* Param for deciding whether to use row-based matchfinder */ + ZSTD_paramSwitch_e useRowMatchFinder; + + /* Always load a dictionary in ext-dict mode (not prefix mode)? */ + int deterministicRefPrefix; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -266,12 +339,29 @@ typedef enum { ZSTDb_buffered } ZSTD_buffered_policy_e; +/* + * Struct that contains all elements of block splitter that should be allocated + * in a wksp. + */ +#define ZSTD_MAX_NB_BLOCK_SPLITS 196 +typedef struct { + seqStore_t fullSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + seqStore_t currSeqStore; + seqStore_t nextSeqStore; + + U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; + ZSTD_entropyCTablesMetadata_t entropyMetadata; +} ZSTD_blockSplitCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ ZSTD_CCtx_params requestedParams; ZSTD_CCtx_params appliedParams; + ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ U32 dictID; size_t dictContentSize; @@ -296,7 +386,7 @@ struct ZSTD_CCtx_s { ZSTD_blockState_t blockState; U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ - /* Wether we are streaming or not */ + /* Whether we are streaming or not */ ZSTD_buffered_policy_e bufferedPolicy; /* streaming */ @@ -324,6 +414,9 @@ struct ZSTD_CCtx_s { /* Multi-threading */ /* Tracing */ + + /* Workspace for block splitter */ + ZSTD_blockSplitCtx blockSplitCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -358,7 +451,7 @@ typedef enum { typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -392,31 +485,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; - -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - ZSTD_memcpy(&newReps, rep, sizeof(newReps)); - } - } - return newReps; -} - /* ZSTD_cParam_withinBounds: * @return 1 if value is within cParam bounds, * 0 otherwise */ @@ -465,17 +533,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) { switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: + case ZSTD_ps_enable: return 0; - case ZSTD_lcm_uncompressed: + case ZSTD_ps_disable: return 1; default: assert(0 /* impossible: pre-validated */); ZSTD_FALLTHROUGH; - case ZSTD_lcm_auto: + case ZSTD_ps_auto: return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); } } @@ -485,7 +553,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single * large copies. */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { +static void +ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) +{ assert(iend > ilimit_w); if (ip <= ilimit_w) { ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); @@ -495,14 +565,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie while (ip < iend) *op++ = *ip++; } +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +#define STORE_REPCODE_1 STORE_REPCODE(1) +#define STORE_REPCODE_2 STORE_REPCODE(2) +#define STORE_REPCODE_3 STORE_REPCODE(3) +#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) +#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) +#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) +#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) +#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) +#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ +#define STORED_TO_OFFBASE(o) ((o)+1) +#define OFFBASE_TO_STORED(o) ((o)-1) + /*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH + * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. + * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). + * @matchLength : must be >= MINMATCH * Allowed to overread literals up to litLimit. */ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +HINT_INLINE UNUSED_ATTR void +ZSTD_storeSeq(seqStore_t* seqStorePtr, + size_t litLength, const BYTE* literals, const BYTE* litLimit, + U32 offBase_minus1, + size_t matchLength) { BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; BYTE const* const litEnd = literals + litLength; @@ -511,7 +597,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); @@ -535,26 +621,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera /* literal Length */ if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } seqStorePtr->sequences[0].litLength = (U16)litLength; /* match offset */ - seqStorePtr->sequences[0].offset = offCode + 1; + seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + assert(matchLength >= MINMATCH); + { size_t const mlBase = matchLength - MINMATCH; + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].mlBase = (U16)mlBase; } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; seqStorePtr->sequences++; } +/* ZSTD_updateRep() : + * updates in-place @rep (array of repeat offsets) + * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() + */ +MEM_STATIC void +ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ + if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = STORED_OFFSET(offBase_minus1); + } else { /* repcode */ + U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } else { /* repCode == 0 */ + /* nothing to do */ + } + } +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t +ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ + repcodes_t newReps; + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); + return newReps; +} + /*-************************************* * Match length counter @@ -778,6 +904,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) window->dictLimit = end; } +MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) +{ + return window.dictLimit == ZSTD_WINDOW_START_INDEX && + window.lowLimit == ZSTD_WINDOW_START_INDEX && + (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; +} + /* * ZSTD_window_hasExtDict(): * Returns non-zero if the window has a non-empty extDict. @@ -801,15 +934,71 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) ZSTD_noDict; } +/* Defining this macro to non-zero tells zstd to run the overflow correction + * code much more frequently. This is very inefficient, and should only be + * used for tests and fuzzers. + */ +#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY +# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 +# else +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 +# endif +#endif + +/* + * ZSTD_window_canOverflowCorrect(): + * Returns non-zero if the indices are large enough for overflow correction + * to work correctly without impacting compression ratio. + */ +MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src) +{ + U32 const cycleSize = 1u << cycleLog; + U32 const curr = (U32)((BYTE const*)src - window.base); + U32 const minIndexToOverflowCorrect = cycleSize + + MAX(maxDist, cycleSize) + + ZSTD_WINDOW_START_INDEX; + + /* Adjust the min index to backoff the overflow correction frequency, + * so we don't waste too much CPU in overflow correction. If this + * computation overflows we don't really care, we just need to make + * sure it is at least minIndexToOverflowCorrect. + */ + U32 const adjustment = window.nbOverflowCorrections + 1; + U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, + minIndexToOverflowCorrect); + U32 const indexLargeEnough = curr > adjustedIndex; + + /* Only overflow correct early if the dictionary is invalidated already, + * so we don't hurt compression ratio. + */ + U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; + + return indexLargeEnough && dictionaryInvalidated; +} + /* * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow * protection. */ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src, void const* srcEnd) { U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { + return 1; + } + } return curr > ZSTD_CURRENT_MAX; } @@ -821,7 +1010,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, * * The least significant cycleLog bits of the indices must remain the same, * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. */ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, U32 maxDist, void const* src) @@ -845,32 +1033,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); - U32 const currentCycle0 = curr & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; + U32 const currentCycle = curr & cycleMask; + /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ + U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX + ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) + : 0; + U32 const newCurrent = currentCycle + + currentCycleCorrection + + MAX(maxDist, cycleSize); U32 const correction = curr - newCurrent; - assert((maxDist & cycleMask) == 0); + /* maxDist must be a power of two so that: + * (newCurrent & cycleMask) == (curr & cycleMask) + * This is required to not corrupt the chains / binary tree. + */ + assert((maxDist & (maxDist - 1)) == 0); + assert((curr & cycleMask) == (newCurrent & cycleMask)); assert(curr > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); + if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + } window->base += correction; window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; + if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->lowLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->lowLimit -= correction; + } + if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->dictLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->dictLimit -= correction; + } /* Ensure we can still reference the full window. */ assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); + assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); /* Ensure that lowLimit and dictLimit didn't underflow. */ assert(window->lowLimit <= newCurrent); assert(window->dictLimit <= newCurrent); + ++window->nbOverflowCorrections; + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, window->lowLimit); return correction; @@ -975,11 +1183,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { ZSTD_memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ + window->base = (BYTE const*)" "; + window->dictBase = (BYTE const*)" "; + ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ + window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ + window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ + window->nbOverflowCorrections = 0; } /* @@ -990,7 +1200,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { * Returns non-zero if the segment is contiguous. */ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) + void const* src, size_t srcSize, + int forceNonContiguous) { BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; @@ -1000,7 +1211,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, assert(window->base != NULL); assert(window->dictBase != NULL); /* Check if blocks follow each other */ - if (src != window->nextSrc) { + if (src != window->nextSrc || forceNonContiguous) { /* not contiguous */ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); @@ -1030,15 +1241,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, */ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't * valid for the entire block. So this check is sufficient to find the lowest valid match index. */ - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; return matchLowest; } diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c index 655bcda4d1f1..52b0a8059aba 100644 --- a/lib/zstd/compress/zstd_compress_literals.c +++ b/lib/zstd/compress/zstd_compress_literals.c @@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2) + const int bmi2, + unsigned suspectUncompressible) { size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); @@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, HUF_compress1X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : HUF_compress4X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible); if (repeat != HUF_repeat_none) { /* reused the existing table */ DEBUGLOG(5, "Reusing previous huffman table"); @@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, } } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h index 9904c0cd30a0..9775fb97cb70 100644 --- a/lib/zstd/compress/zstd_compress_literals.h +++ b/lib/zstd/compress/zstd_compress_literals.h @@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); +/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ZSTD_hufCTables_t* nextHuf, ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2); + const int bmi2, + unsigned suspectUncompressible); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c index dcfcdc9cc5e8..21ddc1b37acf 100644 --- a/lib/zstd/compress/zstd_compress_sequences.c +++ b/lib/zstd/compress/zstd_compress_sequences.c @@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t { unsigned cost = 0; unsigned s; + + assert(total > 0); for (s = 0; s <= max; ++s) { unsigned norm = (unsigned)((256 * count[s]) / total); if (count[s] != 0 && norm == 0) @@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, assert(nbSeq_1 > 1); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); (void)entropyWorkspaceSize; - FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed"); + assert(oend >= op); + { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed"); return NCountSize; } } @@ -310,19 +313,19 @@ ZSTD_encodeSequences_body( FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); if (longOffsets) { U32 const ofBits = ofCodeTable[nbSeq-1]; unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits); BIT_flushBits(&blockStream); } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits, ofBits - extraBits); } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); } BIT_flushBits(&blockStream); @@ -336,8 +339,8 @@ ZSTD_encodeSequences_body( U32 const mlBits = ML_bits[mlCode]; DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", (unsigned)sequences[n].litLength, - (unsigned)sequences[n].matchLength + MINMATCH, - (unsigned)sequences[n].offset); + (unsigned)sequences[n].mlBase + MINMATCH, + (unsigned)sequences[n].offBase); /* 32b*/ /* 64b*/ /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ @@ -348,18 +351,18 @@ ZSTD_encodeSequences_body( BIT_flushBits(&blockStream); /* (7)*/ BIT_addBits(&blockStream, sequences[n].litLength, llBits); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (longOffsets) { unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_addBits(&blockStream, sequences[n].offBase, extraBits); BIT_flushBits(&blockStream); /* (7)*/ } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + BIT_addBits(&blockStream, sequences[n].offBase >> extraBits, ofBits - extraBits); /* 31 */ } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ } BIT_flushBits(&blockStream); /* (7)*/ DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); @@ -396,7 +399,7 @@ ZSTD_encodeSequences_default( #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_encodeSequences_bmi2( void* dst, size_t dstCapacity, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c index b0610b255653..17d836cc84e8 100644 --- a/lib/zstd/compress/zstd_compress_superblock.c +++ b/lib/zstd/compress/zstd_compress_superblock.c @@ -15,289 +15,10 @@ #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ #include "hist.h" /* HIST_countFast_wksp */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ #include "zstd_compress_sequences.h" #include "zstd_compress_literals.h" -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/* ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/* ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/* ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable_wksp( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/* ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - ZSTD_memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/* ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - /* ZSTD_compressSubBlock_literal() : * Compresses literals section for a sub-block. * When we have to write the Huffman table we will sometimes choose a header @@ -411,8 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* const seqDef* sp = sstart; size_t matchLengthSum = 0; size_t litLengthSum = 0; - /* Only used by assert(), suppress unused variable warnings in production. */ - (void)litLengthSum; + (void)(litLengthSum); /* suppress unused variable warning on some environments */ while (send-sp > 0) { ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); litLengthSum += seqLen.litLength; @@ -605,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, const BYTE* codeTable, unsigned maxCode, size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, + const U8* additionalBits, short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, void* workspace, size_t wkspSize) { @@ -646,8 +366,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, void* workspace, size_t wkspSize, int writeEntropy) { - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ size_t cSeqSizeEstimate = 0; + if (nbSeq == 0) return sequencesSectionHeaderSize; cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, nbSeq, fseTables->offcodeCTable, NULL, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, @@ -754,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, /* I think there is an optimization opportunity here. * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful * since it recalculates estimate from scratch. - * For example, it would recount literal distribution and symbol codes everytime. + * For example, it would recount literal distribution and symbol codes every time. */ cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, &nextCBlock->entropy, entropyMetadata, @@ -818,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, repcodes_t rep; ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); for (seq = sstart; seq < sp; ++seq) { - rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); } ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); } @@ -833,7 +554,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, unsigned lastBlock) { ZSTD_entropyCTablesMetadata_t entropyMetadata; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h index 98e359adf5d4..349fc923c355 100644 --- a/lib/zstd/compress/zstd_cwksp.h +++ b/lib/zstd/compress/zstd_cwksp.h @@ -32,6 +32,10 @@ #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 #endif + +/* Set our tables and aligneds to align by 64 bytes */ +#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 + /*-************************************* * Structures ***************************************/ @@ -114,10 +118,11 @@ typedef enum { * - Tables: these are any of several different datastructures (hash tables, * chain tables, binary trees) that all respect a common format: they are * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. + * Their sizes depend on the cparams. These tables are 64-byte aligned. * * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. + * alignment, but don't require any initialization before they're used. These + * buffers are each aligned to 64 bytes. * * - Buffers: these buffers are used for various purposes that don't require * any alignment or initialization before they're used. This means they can @@ -130,8 +135,7 @@ typedef enum { * * 1. Objects * 2. Buffers - * 3. Aligned - * 4. Tables + * 3. Aligned/Tables * * Attempts to reserve objects of different types out of order will fail. */ @@ -184,6 +188,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { * Since tables aren't currently redzoned, you don't need to call through this * to figure out how much space you need for the matchState tables. Everything * else is though. + * + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). */ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { if (size == 0) @@ -191,53 +197,55 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { return size; } -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( - ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { - assert(phase >= ws->phase); - if (phase > ws->phase) { - if (ws->phase < ZSTD_cwksp_alloc_buffers && - phase >= ZSTD_cwksp_alloc_buffers) { - ws->tableValidEnd = ws->objectEnd; - } - if (ws->phase < ZSTD_cwksp_alloc_aligned && - phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; - } - } - ws->phase = phase; - } +/* + * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. + * Used to determine the number of bytes required for a given "aligned". + */ +MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); } /* - * Returns whether this object/buffer/etc was allocated in this workspace. + * Returns the amount of additional space the cwksp must allocate + * for internal purposes (currently only alignment). */ -MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { - return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { + /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes + * to align the beginning of tables section, as well as another n_2=[0, 63] bytes + * to align the beginning of the aligned section. + * + * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and + * aligneds being sized in multiples of 64 bytes. + */ + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; + return slackSpace; +} + + +/* + * Return the number of additional bytes required to align a pointer to the given number of bytes. + * alignBytes must be a power of two. + */ +MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { + size_t const alignBytesMask = alignBytes - 1; + size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; + assert((alignBytes & alignBytesMask) == 0); + assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); + return bytes; } /* * Internal function. Do not use directly. + * Reserves the given number of bytes within the aligned/buffer segment of the wksp, + * which counts from the end of the wksp (as opposed to the object/table segment). + * + * Returns a pointer to the beginning of that space. */ -MEM_STATIC void* ZSTD_cwksp_reserve_internal( - ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { - void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - - if (bytes == 0) - return NULL; - - +MEM_STATIC void* +ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) +{ + void* const alloc = (BYTE*)ws->allocStart - bytes; + void* const bottom = ws->tableEnd; DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); ZSTD_cwksp_assert_internal_consistency(ws); @@ -247,10 +255,81 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( ws->allocFailed = 1; return NULL; } + /* the area is reserved from the end of wksp. + * If it overlaps with tableValidEnd, it voids guarantees on values' range */ if (alloc < ws->tableValidEnd) { ws->tableValidEnd = alloc; } ws->allocStart = alloc; + return alloc; +} + +/* + * Moves the cwksp to the next phase, and does any necessary allocations. + * cwksp initialization must necessarily go through each phase in order. + * Returns a 0 on success, or zstd error + */ +MEM_STATIC size_t +ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) +{ + assert(phase >= ws->phase); + if (phase > ws->phase) { + /* Going from allocating objects to allocating buffers */ + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + + /* Going from allocating buffers to allocating aligneds/tables */ + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ + size_t const bytesToAlign = + ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); + DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); + ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ + RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), + memory_allocation, "aligned phase - alignment initial allocation failed!"); + } + { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ + void* const alloc = ws->objectEnd; + size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); + void* const objectEnd = (BYTE*)alloc + bytesToAlign; + DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); + RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, + "table phase - alignment initial allocation failed!"); + ws->objectEnd = objectEnd; + ws->tableEnd = objectEnd; /* table area starts being empty */ + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } } } + ws->phase = phase; + ZSTD_cwksp_assert_internal_consistency(ws); + } + return 0; +} + +/* + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) +{ + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/* + * Internal function. Do not use directly. + */ +MEM_STATIC void* +ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) +{ + void* alloc; + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { + return NULL; + } + + + alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); return alloc; @@ -259,33 +338,44 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( /* * Reserves and returns unaligned memory. */ -MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) +{ return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); } /* - * Reserves and returns memory sized on and aligned on sizeof(unsigned). + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). */ -MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) +{ + void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), + ZSTD_cwksp_alloc_aligned); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return ptr; } /* - * Aligned on sizeof(unsigned). These buffers have the special property that + * Aligned on 64 bytes. These buffers have the special property that * their values remain constrained, allowing us to re-use them without * memset()-ing them. */ -MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) +{ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; + void* alloc; + void* end; + void* top; + + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { + return NULL; + } + alloc = ws->tableEnd; + end = (BYTE *)alloc + bytes; + top = ws->allocStart; DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); ZSTD_cwksp_assert_internal_consistency(ws); assert(end <= top); if (end > top) { @@ -296,27 +386,31 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { ws->tableEnd = end; + assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); return alloc; } /* * Aligned on sizeof(void*). + * Note : should happen only once, at workspace first initialization */ -MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { - size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) +{ + size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); void* alloc = ws->objectEnd; void* end = (BYTE*)alloc + roundedBytes; - DEBUGLOG(5, + DEBUGLOG(4, "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); - assert(((size_t)alloc & (sizeof(void*)-1)) == 0); - assert((bytes & (sizeof(void*)-1)) == 0); + assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0); + assert(bytes % ZSTD_ALIGNOF(void*) == 0); ZSTD_cwksp_assert_internal_consistency(ws); /* we must be in the first phase, no advance is possible */ if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { - DEBUGLOG(4, "cwksp: object alloc failed!"); + DEBUGLOG(3, "cwksp: object alloc failed!"); ws->allocFailed = 1; return NULL; } @@ -328,7 +422,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { return alloc; } -MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) +{ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); @@ -451,6 +546,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { * Functions Checking Free Space ***************************************/ +/* ZSTD_alignmentSpaceWithinBounds() : + * Returns if the estimated space needed for a wksp is within an acceptable limit of the + * actual amount of space used. + */ +MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, + size_t const estimatedSpace, int resizedWorkspace) { + if (resizedWorkspace) { + /* Resized/newly allocated wksp should have exact bounds */ + return ZSTD_cwksp_used(ws) == estimatedSpace; + } else { + /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes + * than estimatedSpace. See the comments in zstd_cwksp.h for details. + */ + return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); + } +} + + MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); } diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c index b0424d23ac57..76933dea2624 100644 --- a/lib/zstd/compress/zstd_double_fast.c +++ b/lib/zstd/compress/zstd_double_fast.c @@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( +size_t ZSTD_compressBlock_doubleFast_noDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + size_t mLength; + U32 offset; + U32 curr; + + /* how many positions to search before increasing step size */ + const size_t kStepIncr = 1 << kSearchStrength; + /* the position at which to increment the step size if no match is found */ + const BYTE* nextStep; + size_t step; /* the current step size */ + + size_t hl0; /* the long hash at ip */ + size_t hl1; /* the long hash at ip1 */ + + U32 idxl0; /* the long match index for ip */ + U32 idxl1; /* the long match index for ip1 */ + + const BYTE* matchl0; /* the long match for ip */ + const BYTE* matchs0; /* the short match for ip */ + const BYTE* matchl1; /* the long match for ip1 */ + + const BYTE* ip = istart; /* the current position */ + const BYTE* ip1; /* the next position */ + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); + + /* init */ + ip += ((ip - prefixLowest) == 0); + { + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Outer Loop: one iteration per match found and stored */ + while (1) { + step = 1; + nextStep = ip + kStepIncr; + ip1 = ip + step; + + if (ip1 > ilimit) { + goto _cleanup; + } + + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + idxl0 = hashLong[hl0]; + matchl0 = base + idxl0; + + /* Inner Loop: one iteration per search / position */ + do { + const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 idxs0 = hashSmall[hs0]; + curr = (U32)(ip-base); + matchs0 = base + idxs0; + + hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ + + /* check noDict repcode */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); + goto _match_stored; + } + + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + + if (idxl0 > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchl0) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; + offset = (U32)(ip-matchl0); + while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ + goto _match_found; + } + } + + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + + if (idxs0 > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(matchs0) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + step++; + nextStep += kStepIncr; + } + ip = ip1; + ip1 += step; + + hl0 = hl1; + idxl0 = idxl1; + matchl0 = matchl1; + #if defined(__aarch64__) + PREFETCH_L1(ip+256); + #endif + } while (ip1 <= ilimit); + +_cleanup: + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_search_next_long: + + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; + } + } + + /* if no long +1 match, explore the short match we found */ + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + + /* fall-through */ + +_match_found: /* requires ip, offset, mLength */ + offset_2 = offset_1; + offset_1 = offset; + + if (step < 4) { + /* It is unsafe to write this value back to the hashtable when ip1 is + * greater than or equal to the new ip we will have after we're done + * processing this match. Rather than perform that test directly + * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler + * more predictable test. The minmatch even if we take a short match is + * 4 bytes, so as long as step, the distance between ip and ip1 + * (initially) is less than 4, we know ip1 < new ip. */ + hashLong[hl1] = (U32)(ip1 - base); + } + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } + } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) + U32 const mls /* template */) { ZSTD_compressionParameters const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; @@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic( U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams; + const U32* const dictHashLong = dms->hashTable; + const U32* const dictHashSmall = dms->chainTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); + const U32 dictHBitsL = dictCParams->hashLog; + const U32 dictHBitsS = dictCParams->chainLog; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); /* init */ ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } + + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -135,29 +317,18 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; const U32 repIndex = curr + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? + const BYTE* repMatch = (repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + /* check repcode */ + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); goto _match_stored; } @@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState long match */ U32 const dictMatchIndexL = dictHashLong[dictHL]; const BYTE* dictMatchL = dictBase + dictMatchIndexL; @@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (MEM_read32(match) == MEM_read32(ip)) { goto _search_next_long; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState short match */ U32 const dictMatchIndexS = dictHashSmall[dictHS]; match = dictBase + dictMatchIndexS; @@ -220,7 +391,7 @@ _search_next_long: while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dict long +1 match */ U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; @@ -234,7 +405,7 @@ _search_next_long: } } } /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + if (matchIndexS < prefixLowestIndex) { mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; offset = (U32)(curr - matchIndexS); while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ @@ -248,7 +419,7 @@ _match_found: offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); _match_stored: /* match found */ @@ -266,43 +437,27 @@ _match_stored: } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; anchor = ip; - continue; /* faster when present ... (?) */ - } } } + continue; + } + break; + } + } } /* while (ip < ilimit) */ /* save reps for next block */ @@ -313,6 +468,24 @@ _match_stored: return (size_t)(iend - anchor); } +#define ZSTD_GEN_DFAST_FN(dictMode, mls) \ + static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_DFAST_FN(noDict, 4) +ZSTD_GEN_DFAST_FN(noDict, 5) +ZSTD_GEN_DFAST_FN(noDict, 6) +ZSTD_GEN_DFAST_FN(noDict, 7) + +ZSTD_GEN_DFAST_FN(dictMatchState, 4) +ZSTD_GEN_DFAST_FN(dictMatchState, 5) +ZSTD_GEN_DFAST_FN(dictMatchState, 6) +ZSTD_GEN_DFAST_FN(dictMatchState, 7) + size_t ZSTD_compressBlock_doubleFast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -323,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); } } @@ -343,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); } } @@ -385,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -407,12 +580,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) + & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -423,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -448,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -475,12 +648,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) + & (offset_2 <= current2 - dictStartIndex)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; @@ -498,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( return (size_t)(iend - anchor); } +ZSTD_GEN_DFAST_FN(extDict, 4) +ZSTD_GEN_DFAST_FN(extDict, 5) +ZSTD_GEN_DFAST_FN(extDict, 6) +ZSTD_GEN_DFAST_FN(extDict, 7) size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -508,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); } } diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c index 96b7d48e2868..a752e6beab52 100644 --- a/lib/zstd/compress/zstd_fast.c +++ b/lib/zstd/compress/zstd_fast.c @@ -43,145 +43,294 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } +/* + * If you squint hard enough (and ignore repcodes), the search operation at any + * given position is broken into 4 stages: + * + * 1. Hash (map position to hash value via input read) + * 2. Lookup (map hash val to index via hashtable read) + * 3. Load (map index to value at that position via input read) + * 4. Compare + * + * Each of these steps involves a memory read at an address which is computed + * from the previous step. This means these steps must be sequenced and their + * latencies are cumulative. + * + * Rather than do 1->2->3->4 sequentially for a single position before moving + * onto the next, this implementation interleaves these operations across the + * next few positions: + * + * R = Repcode Read & Compare + * H = Hash + * T = Table Lookup + * M = Match Read & Compare + * + * Pos | Time --> + * ----+------------------- + * N | ... M + * N+1 | ... TM + * N+2 | R H T M + * N+3 | H TM + * N+4 | R H T M + * N+5 | H ... + * N+6 | R ... + * + * This is very much analogous to the pipelining of execution in a CPU. And just + * like a CPU, we have to dump the pipeline when we find a match (i.e., take a + * branch). + * + * When this happens, we throw away our current state, and do the following prep + * to re-enter the loop: + * + * Pos | Time --> + * ----+------------------- + * N | H T + * N+1 | H + * + * This is also the work we do at the beginning to enter the loop initially. + */ FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( +ZSTD_compressBlock_fast_noDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const mls) + U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; + + const BYTE* anchor = istart; + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + U32 rep_offset1 = rep[0]; + U32 rep_offset2 = rep[1]; U32 offsetSaved = 0; - /* init */ + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + U32 idx; /* match idx for ip0 */ + U32 mval; /* src value at match idx */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + + /* ip0 and ip1 are always adjacent. The targetLength skipping and + * uncompressibility acceleration is applied to every other position, + * matching the behavior of #1562. step therefore represents the gap + * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */ + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; { U32 const curr = (U32)(ip0 - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; + if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; } - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; + /* start each op */ +_start: /* Requires: ip0 */ -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif + step = stepSize; + nextStep = ip0 + kStepIncr; - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; - assert(ip0 + 1 == ip1); + if (ip3 >= ilimit) { + goto _cleanup; + } - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + + idx = hashTable[hash0]; + + do { + /* load repcode match for ip[2]*/ + const U32 rval = MEM_read32(ip2 - rep_offset1); + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { + ip0 = ip2; + match0 = ip0 - rep_offset1; + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = STORE_REPCODE_1; mLength += 4; - offcode = 0; goto _match; } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ goto _offset; } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; + + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ goto _offset; } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; + + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip0 + step; + ip3 = ip1 + step; + + /* calculate step */ + if (ip2 >= nextStep) { + step++; + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + nextStep += kStepIncr; } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + } while (ip3 < ilimit); -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; - - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } +_cleanup: + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; /* Return the last literals size */ return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx */ + + /* Compute the offset code. */ + match0 = base + idx; + rep_offset2 = rep_offset1; + rep_offset1 = (U32)(ip0-match0); + offcode = STORE_OFFSET(rep_offset1); + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } + +_match: /* Requires: ip0, match0, offcode */ + + /* Count the forward length. */ + mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); + + ip0 += mLength; + anchor = ip0; + + /* write next hash table entry */ + if (ip1 < ip0) { + hashTable[hash1] = (U32)(ip1 - base); + } + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; + { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + + goto _start; } +#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ + static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ + } + +ZSTD_GEN_FAST_FN(noDict, 4, 1) +ZSTD_GEN_FAST_FN(noDict, 5, 1) +ZSTD_GEN_FAST_FN(noDict, 6, 1) +ZSTD_GEN_FAST_FN(noDict, 7, 1) + +ZSTD_GEN_FAST_FN(noDict, 4, 0) +ZSTD_GEN_FAST_FN(noDict, 5, 0) +ZSTD_GEN_FAST_FN(noDict, 6, 0) +ZSTD_GEN_FAST_FN(noDict, 7, 0) size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -189,24 +338,40 @@ size_t ZSTD_compressBlock_fast( { U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + if (ms->cParams.targetLength > 1) { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); + } + } else { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); + } + } } FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_dictMatchState_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -242,6 +407,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(endIndex - prefixStartIndex <= maxDistance); (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + (void)hasStep; /* not currently specialized on whether it's accelerated */ + /* ensure there will be no underflow * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); @@ -272,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -292,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -307,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } /* match found */ @@ -332,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -351,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( return (size_t)(iend - anchor); } + +ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) + size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -361,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); } } static size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -398,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + (void)hasStep; /* not currently specialized on whether it's accelerated */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -416,14 +591,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repMatch = repBase + repIndex; hashTable[h] = curr; /* update hash table */ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); - assert(offset_1 <= curr +1); /* check repIndex */ - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ + & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); ip += rLength; anchor = ip; } else { @@ -439,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ip += mLength; anchor = ip; } } @@ -453,12 +628,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -475,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( return (size_t)(iend - anchor); } +ZSTD_GEN_FAST_FN(extDict, 4, 0) +ZSTD_GEN_FAST_FN(extDict, 5, 0) +ZSTD_GEN_FAST_FN(extDict, 6, 0) +ZSTD_GEN_FAST_FN(extDict, 7, 0) size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -485,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); } } diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c index fb54d4e28a2b..0298a01a7504 100644 --- a/lib/zstd/compress/zstd_lazy.c +++ b/lib/zstd/compress/zstd_lazy.c @@ -61,7 +61,7 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, * assumption : curr >= btlow == (curr - btmask) * doesn't fail */ static void -ZSTD_insertDUBT1(ZSTD_matchState_t* ms, +ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, U32 curr, const BYTE* inputEnd, U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) @@ -151,7 +151,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, static size_t ZSTD_DUBT_findBetterDictMatch ( - ZSTD_matchState_t* ms, + const ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, size_t* offsetPtr, size_t bestLength, @@ -197,8 +197,8 @@ ZSTD_DUBT_findBetterDictMatch ( U32 matchIndex = dictMatchIndex + dictIndexDelta; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); } if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ @@ -218,7 +218,7 @@ ZSTD_DUBT_findBetterDictMatch ( } if (bestLength >= MINMATCH) { - U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } @@ -328,7 +328,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) { nbCompares = 0; /* in addition to avoiding checking any @@ -368,7 +368,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ if (bestLength >= MINMATCH) { - U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } @@ -391,91 +391,9 @@ ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); } - -static size_t -ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - - /* ********************************* -* Hash Chain +* Dedicated dict search ***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) { @@ -485,7 +403,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32* const chainTable = ms->chainTable; U32 const chainSize = 1 << ms->cParams.chainLog; U32 idx = ms->nextToUpdate; - U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const minChain = chainSize < target - idx ? target - chainSize : idx; U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; U32 const cacheSize = bucketSize - 1; U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; @@ -499,13 +417,12 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; U32* const tmpHashTable = hashTable; U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); - U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; - U32 hashIdx; assert(ms->cParams.chainLog <= 24); - assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(ms->cParams.hashLog > ms->cParams.chainLog); assert(idx != 0); assert(tmpMinChain <= minChain); @@ -536,7 +453,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B if (count == cacheSize) { for (count = 0; count < chainLimit;) { if (i < minChain) { - if (!i || countBeyondMinChain++ > cacheSize) { + if (!i || ++countBeyondMinChain > cacheSize) { /* only allow pulling `cacheSize` number of entries * into the cache or chainTable beyond `minChain`, * to replace the entries pulled out of the @@ -592,10 +509,143 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B ms->nextToUpdate = target; } +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( +size_t ZSTD_HcFindBestMatch( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -653,7 +703,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + *offsetPtr = STORE_OFFSET(curr - matchIndex); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } @@ -663,90 +713,8 @@ size_t ZSTD_HcFindBestMatch_generic ( assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; - U32 ddsAttempt; - - for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 const chainIndex = chainPackedPointer >> 8; - - PREFETCH_L1(&dms->chainTable[chainIndex]); - } - - for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; - match = ddsBase + matchIndex; - - if (!matchIndex) { - return ml; - } - - /* guaranteed by table construction */ - (void)ddsLowestIndex; - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; - } - } - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 chainIndex = chainPackedPointer >> 8; - U32 const chainLength = chainPackedPointer & 0xFF; - U32 const chainAttempts = nbAttempts - ddsAttempt; - U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; - U32 chainAttempt; - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { - PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); - } - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->chainTable[chainIndex]; - match = ddsBase + matchIndex; - - /* guaranteed by table construction */ - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } - } + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); @@ -770,7 +738,8 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } @@ -783,75 +752,725 @@ size_t ZSTD_HcFindBestMatch_generic ( return ml; } +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) +#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) + +typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { + assert(val != 0); +# if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) + if (sizeof(size_t) == 4) { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (leastSignificantWord == 0) { + return 32 + (U32)__builtin_ctz(mostSignificantWord); + } else { + return (U32)__builtin_ctz(leastSignificantWord); + } + } else { + return (U32)__builtin_ctzll(val); + } +# else + /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count + * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer + */ + val = ~val & (val - 1ULL); /* Lowest set bit mask */ + val = val - ((val >> 1) & 0x5555555555555555); + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); + return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); +# endif +} + +/* ZSTD_rotateRight_*(): + * Rotates a bitfield to the right by "count" bits. + * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts + */ +FORCE_INLINE_TEMPLATE +U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { + assert(count < 64); + count &= 0x3F; /* for fickle pattern recognition */ + return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { + assert(count < 32); + count &= 0x1F; /* for fickle pattern recognition */ + return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); +} + +FORCE_INLINE_TEMPLATE +U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { + assert(count < 16); + count &= 0x0F; /* for fickle pattern recognition */ + return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 const next = (*tagRow - 1) & rowMask; + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog >= 5) { + PREFETCH_L1(hashTable + relRow + 16); + /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */ + } + PREFETCH_L1(tagTable + relRow); + if (rowLog == 6) { + PREFETCH_L1(tagTable + relRow + 32); + } + assert(rowLog == 4 || rowLog == 5 || rowLog == 6); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, + * but not beyond iLimit. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iLimit) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + U32 const* const hashTable = ms->hashTable; + U16 const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; + } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); +} + +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + U16 const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls) +{ + U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; } } - -static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +/* ZSTD_row_update_internalImpl(): + * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, + U32 updateStartIdx, U32 const updateEndIdx, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + + DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); + for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls) + : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. + Explicit cast allows us to get exact desired position within each row */ + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); + ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = updateStartIdx; } } - -static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. + * Skips sections of long matches as is necessary. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + U32 idx = ms->nextToUpdate; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + const U32 kSkipThreshold = 384; + const U32 kMaxMatchStartPositionsToUpdate = 96; + const U32 kMaxMatchEndPositionsToUpdate = 32; + + if (useCache) { + /* Only skip positions when using hash cache, i.e. + * if we are loading a dict, don't skip anything. + * If we decide to skip, then we only update a set number + * of positions at the beginning and end of the match. + */ + if (UNLIKELY(target - idx > kSkipThreshold)) { + U32 const bound = idx + kMaxMatchStartPositionsToUpdate; + ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache); + idx = target - kMaxMatchEndPositionsToUpdate; + ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1); + } } + assert(target >= idx); + ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache); + ms->nextToUpdate = target; +} + +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); +} + +#if defined(ZSTD_ARCH_X86_SSE2) +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head) +{ + const __m128i comparisonMask = _mm_set1_epi8((char)tag); + int matches[4] = {0}; + int i; + assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4); + for (i=0; i> chunkSize; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= (chunk * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } else { /* big endian: reverse bits during extraction */ + const size_t msb = xFF ^ (xFF >> 1); + const size_t extractMagic = (msb / 0x1FF) | msb; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= ((chunk >> 7) * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } + matches = ~matches; + if (rowEntries == 16) { + return ZSTD_rotateRight_U16((U16)matches, head); + } else if (rowEntries == 32) { + return ZSTD_rotateRight_U32((U32)matches, head); + } else { + return ZSTD_rotateRight_U64((U64)matches, head); + } + } +#endif +} + +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" + * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can + * be considered as a circular buffer with a "head" index that resides in the tagTable. + * - Also insert the "tag" into the equivalent row and position in the tagTable. + * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. + * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, + * for alignment/performance reasons, leaving some bytes unused. + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_RowFindBestMatch( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + + /* Initialize the following variables to satisfy static analyzer */ + size_t ddsIdx = 0; + U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag = 0; + U32* dmsRow = NULL; + BYTE* dmsTagRow = NULL; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + U16* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + { /* Get the hash for ip, compute the appropriate row */ + U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const head = *tagRow & rowMask; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = row[matchPos]; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - matchIndex); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const head = *dmsTagRow & rowMask; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); + if (ip+currentMl == iLimit) break; + } + } + } + } + return ml; } -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} +/* + * Generate search functions templated on (dictMode, mls, rowLog). + * These functions are outlined for code size & compilation time. + * ZSTD_searchMax() dispatches to the correct implementation function. + * + * TODO: The start of the search function involves loading and calculating a + * bunch of constants from the ZSTD_matchState_t. These computations could be + * done in an initialization function, and saved somewhere in the match state. + * Then we could pass a pointer to the saved state instead of the match state, + * and avoid duplicate computations. + * + * TODO: Move the match re-winding into searchMax. This improves compression + * ratio, and unlocks further simplifications with the next TODO. + * + * TODO: Try moving the repcode search into searchMax. After the re-winding + * and repcode search are in searchMax, there is no more logic in the match + * finder loop that requires knowledge about the dictMode. So we should be + * able to avoid force inlining it, and we can join the extDict loop with + * the single segment loop. It should go in searchMax instead of its own + * function to avoid having multiple virtual function calls per search. + */ +#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls +#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls +#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog + +#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE + +#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offBasePtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \ + return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \ + } \ + +#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \ + X(dictMode, mls, 4) \ + X(dictMode, mls, 5) \ + X(dictMode, mls, 6) + +#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6) + +#define ZSTD_FOR_EACH_MLS(X, dictMode) \ + X(dictMode, 4) \ + X(dictMode, 5) \ + X(dictMode, 6) + +#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \ + X(__VA_ARGS__, noDict) \ + X(__VA_ARGS__, extDict) \ + X(__VA_ARGS__, dictMatchState) \ + X(__VA_ARGS__, dedicatedDictSearch) + +/* Generate row search fns for each combination of (dictMode, mls, rowLog) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN) +/* Generate binary Tree search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN) +/* Generate hash chain search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN) + +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; + +#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + case rowLog: \ + return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr); + +#define ZSTD_SWITCH_MLS(X, dictMode) \ + switch (mls) { \ + ZSTD_FOR_EACH_MLS(X, dictMode) \ + } + +#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \ + case mls: \ + switch (rowLog) { \ + ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \ + } \ + ZSTD_UNREACHABLE; \ + break; + +#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \ + switch (searchMethod) { \ + case search_hashChain: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \ + break; \ + case search_binaryTree: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \ + break; \ + case search_rowHash: \ + ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \ + break; \ + } \ + ZSTD_UNREACHABLE; + +/* + * Searches for the longest match at @p ip. + * Dispatches to the correct implementation function based on the + * (searchMethod, dictMode, mls, rowLog). We use switch statements + * here instead of using an indirect function call through a function + * pointer because after Spectre and Meltdown mitigations, indirect + * function calls can be very costly, especially in the kernel. + * + * NOTE: dictMode and searchMethod should be templated, so those switch + * statements should be optimized out. Only the mls & rowLog switches + * should be left. + * + * @param ms The match state. + * @param ip The position to search at. + * @param iend The end of the input data. + * @param[out] offsetPtr Stores the match offset into this pointer. + * @param mls The minimum search length, in the range [4, 6]. + * @param rowLog The row log (if applicable), in the range [4, 6]. + * @param searchMethod The search method to use (templated). + * @param dictMode The dictMode (templated). + * + * @returns The length of the longest match found, or < mls if no match is found. + * If a match is found its offset is stored in @p offsetPtr. + */ +FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( + ZSTD_matchState_t* ms, + const BYTE* ip, + const BYTE* iend, + size_t* offsetPtr, + U32 const mls, + U32 const rowLog, + searchMethod_e const searchMethod, + ZSTD_dictMode_e const dictMode) +{ + if (dictMode == ZSTD_noDict) { + ZSTD_SWITCH_SEARCH_METHOD(noDict) + } else if (dictMode == ZSTD_extDict) { + ZSTD_SWITCH_SEARCH_METHOD(extDict) + } else if (dictMode == ZSTD_dictMatchState) { + ZSTD_SWITCH_SEARCH_METHOD(dictMatchState) + } else if (dictMode == ZSTD_dedicatedDictSearch) { + ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch) + } + ZSTD_UNREACHABLE; + return 0; +} /* ******************************* * Common parser - lazy strategy *********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( @@ -865,41 +1484,13 @@ ZSTD_compressBlock_lazy_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - - /* - * This table is indexed first by the four ZSTD_dictMode_e values, and then - * by the two searchMethod_e values. NULLs are placed for configurations - * that should never occur (extDict modes go to the other implementation - * below and there is no DDSS for binary tree search yet). - */ - const searchMax_f searchFuncs[4][2] = { - { - ZSTD_HcFindBestMatch_selectMLS, - ZSTD_BtFindBestMatch_selectMLS - }, - { - NULL, - NULL - }, - { - ZSTD_HcFindBestMatch_dictMatchState_selectMLS, - ZSTD_BtFindBestMatch_dictMatchState_selectMLS - }, - { - ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, - NULL - } - }; - - searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const int isDMS = dictMode == ZSTD_dictMatchState; @@ -915,11 +1506,7 @@ ZSTD_compressBlock_lazy_generic( 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - assert(searchMax != NULL); - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { U32 const curr = (U32)(ip - base); @@ -935,6 +1522,12 @@ ZSTD_compressBlock_lazy_generic( assert(offset_2 <= dictAndPrefixLength); } + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + /* Match Loop */ #if defined(__x86_64__) /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the @@ -944,8 +1537,9 @@ ZSTD_compressBlock_lazy_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offset=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; + DEBUGLOG(7, "search baseline (depth 0)"); /* check repCode */ if (isDxS) { @@ -969,9 +1563,9 @@ ZSTD_compressBlock_lazy_generic( /* first search (depth 0) */ { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offcode=offsetFound; } if (matchLength < 4) { @@ -982,14 +1576,15 @@ ZSTD_compressBlock_lazy_generic( /* let's try to find a better solution */ if (depth>=1) while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1001,30 +1596,31 @@ ZSTD_compressBlock_lazy_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1036,46 +1632,45 @@ ZSTD_compressBlock_lazy_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. + * Pay attention that `start[-value]` can lead to strange undefined behavior + * notably if `value` is unsigned, resulting in a large positive `-value`. */ /* catch up */ - if (offset) { + if (STORED_IS_OFFSET(offcode)) { if (dictMode == ZSTD_noDict) { - while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) - && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) + && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } if (isDxS) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); } /* store sequence */ _storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); anchor = ip = start + matchLength; } @@ -1091,8 +1686,8 @@ _storeSequence: && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; @@ -1106,8 +1701,8 @@ _storeSequence: && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1200,6 +1795,70 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } +/* Row-based matchfinder */ +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( @@ -1212,7 +1871,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; @@ -1220,18 +1879,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; const U32 windowLog = ms->cParams.windowLog; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); U32 offset_1 = rep[0], offset_2 = rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); /* init */ ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } /* Match Loop */ #if defined(__x86_64__) @@ -1242,7 +1903,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offset=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; U32 curr = (U32)(ip-base); @@ -1251,7 +1912,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1261,9 +1923,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* first search (depth 0) */ { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offcode=offsetFound; } if (matchLength < 4) { @@ -1277,29 +1939,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ip ++; curr++; /* check repCode */ - if (offset) { + if (offcode) { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 1 */ { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; /* search a better one */ } } @@ -1308,47 +1971,48 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ip ++; curr++; /* check repCode */ - if (offset) { + if (offcode) { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 2 */ { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + if (STORED_IS_OFFSET(offcode)) { + U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); } /* store sequence */ _storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); anchor = ip = start + matchLength; } @@ -1359,13 +2023,14 @@ _storeSequence: const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1412,3 +2077,26 @@ size_t ZSTD_compressBlock_btlazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h index 2fc5a6182134..e5bdf4df8dde 100644 --- a/lib/zstd/compress/zstd_lazy.h +++ b/lib/zstd/compress/zstd_lazy.h @@ -23,6 +23,7 @@ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); @@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy( size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict( size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + #endif /* ZSTD_LAZY_H */ diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c index 8ef7e88a5add..dd86fc83e7dd 100644 --- a/lib/zstd/compress/zstd_ldm.c +++ b/lib/zstd/compress/zstd_ldm.c @@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* } } +/* ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + /* ZSTD_ldm_gear_feed(): * * Registers in the splits array all the split points found in the first @@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; + return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; } size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) { - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; + return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; } /* ZSTD_ldm_getBucket() : @@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable( while (ip < iend) { size_t hashed; unsigned n; - + numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); @@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal( /* Initialize the rolling hash state with the first minMatchLength bytes */ ZSTD_ldm_gear_init(&hashState, params); - { - size_t n = 0; - - while (n < minMatchLength) { - numSplits = 0; - n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, - splits, &numSplits); - } - ip += minMatchLength; - } + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; while (ip < ilimit) { size_t hashed; @@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( for (n = 0; n < numSplits; n++) { size_t forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength; + U32 offset; BYTE const* const split = candidates[n].split; U32 const checksum = candidates[n].checksum; U32 const hash = candidates[n].hash; @@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal( } /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; mLength = forwardMatchLength + backwardMatchLength; { - U32 const offset = (U32)(split - base) - bestEntry->offset; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ @@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal( ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); anchor = split + forwardMatchLength; + + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outer loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } } ip += hashed; @@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( assert(chunkStart < iend); /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); @@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences( return 0; } -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { +void +ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) +{ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; if (srcSize <= seq->litLength) { @@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, void const* src, size_t srcSize) { const ZSTD_compressionParameters* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; @@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, rep[0] = sequence.offset; /* Store the sequence */ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); + STORE_OFFSET(sequence.offset), + sequence.matchLength); ip += sequence.matchLength; } } diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h index 25b25270b72e..fbc6a5e88fd7 100644 --- a/lib/zstd/compress/zstd_ldm.h +++ b/lib/zstd/compress/zstd_ldm.h @@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences( */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, void const* src, size_t srcSize); /* diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h index e5c24d856b0a..647f865be290 100644 --- a/lib/zstd/compress/zstd_ldm_geartab.h +++ b/lib/zstd/compress/zstd_ldm_geartab.h @@ -11,7 +11,10 @@ #ifndef ZSTD_LDM_GEARTAB_H #define ZSTD_LDM_GEARTAB_H -static U64 ZSTD_ldm_gearTab[256] = { +#include "../common/compiler.h" /* UNUSED_ATTR */ +#include "../common/mem.h" /* U64 */ + +static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = { 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c index dfc55e3e8119..fd82acfda62f 100644 --- a/lib/zstd/compress/zstd_opt.c +++ b/lib/zstd/compress/zstd_opt.c @@ -8,25 +8,12 @@ * You may select, at your option, one of the above-listed licenses. */ -/* - * Disable inlining for the optimal parser for the kernel build. - * It is unlikely to be used in the kernel, and where it is used - * latency shouldn't matter because it is very slow to begin with. - * We prefer a ~180KB binary size win over faster optimal parsing. - * - * TODO(https://github.com/facebook/zstd/issues/2862): - * Improve the code size of the optimal parser in general, so we - * don't need this hack for the kernel build. - */ -#define ZSTD_NO_INLINE 1 - #include "zstd_compress_internal.h" #include "hist.h" #include "zstd_opt.h" #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ -#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ @@ -36,11 +23,11 @@ * Price functions for optimal parser ***************************************/ -#if 0 /* approximation at bit level */ +#if 0 /* approximation at bit level (for tests) */ # define BITCOST_ACCURACY 0 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) -#elif 0 /* fractional bit accuracy */ +# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy (for tests) */ # define BITCOST_ACCURACY 8 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) @@ -78,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price) static int ZSTD_compressedLiterals(optState_t const* const optPtr) { - return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; + return optPtr->literalCompressionMode != ZSTD_ps_disable; } static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) @@ -91,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) } -/* ZSTD_downscaleStat() : - * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) - * return the resulting sum of elements */ -static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +static U32 sum_u32(const unsigned table[], size_t nbElts) +{ + size_t n; + U32 total = 0; + for (n=0; n 0 && ZSTD_FREQ_DIV+malus < 31); + DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); + assert(shift < 30); for (s=0; s> (ZSTD_FREQ_DIV+malus)); + table[s] = 1 + (table[s] >> shift); sum += table[s]; } return sum; } +/* ZSTD_scaleStats() : + * reduce all elements in table is sum too large + * return the resulting sum of elements */ +static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) +{ + U32 const prevsum = sum_u32(table, lastEltIndex+1); + U32 const factor = prevsum >> logTarget; + DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); + assert(logTarget < 30); + if (factor <= 1) return prevsum; + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); +} + /* ZSTD_rescaleFreqs() : * if first block (detected by optPtr->litLengthSum == 0) : init statistics * take hints from dictionary if there is one - * or init from zero, using src for literals stats, or flat 1 for match symbols + * and init from zero if there is none, + * using src for literals stats, and baseline stats for sequence symbols * otherwise downscale existing stats, to be used as seed for next block. */ static void @@ -138,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->litSum = 0; for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); assert(bitCost <= scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litSum += optPtr->litFreq[lit]; @@ -186,14 +194,19 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, if (compressedLiterals) { unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); } - { unsigned ll; - for (ll=0; ll<=MaxLL; ll++) - optPtr->litLengthFreq[ll] = 1; + { unsigned const baseLLfreqs[MaxLL+1] = { + 4, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); + optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); } - optPtr->litLengthSum = MaxLL+1; { unsigned ml; for (ml=0; ml<=MaxML; ml++) @@ -201,21 +214,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } optPtr->matchLengthSum = MaxML+1; - { unsigned of; - for (of=0; of<=MaxOff; of++) - optPtr->offCodeFreq[of] = 1; + { unsigned const baseOFCfreqs[MaxOff+1] = { + 6, 2, 1, 1, 2, 3, 4, 4, + 4, 3, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); + optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); } - optPtr->offCodeSum = MaxOff+1; + } } else { /* new block : re-use previous statistics, scaled down */ if (compressedLiterals) - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); + optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); + optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); + optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); } ZSTD_setBasePrices(optPtr, optLevel); @@ -251,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, * cost of literalLength symbol */ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + assert(litLength <= ZSTD_BLOCKSIZE_MAX); + if (optPtr->priceType == zop_predef) + return WEIGHT(litLength, optLevel); + /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX + * because it isn't representable in the zstd format. So instead just + * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block + * would be all literals. + */ + if (litLength == ZSTD_BLOCKSIZE_MAX) + return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -264,15 +291,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP /* ZSTD_getMatchPrice() : * Provides the cost of the match part (offset + matchLength) of a sequence * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. - * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ + * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 + * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) + */ FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, +ZSTD_getMatchPrice(U32 const offcode, U32 const matchLength, const optState_t* const optPtr, int const optLevel) { U32 price; - U32 const offCode = ZSTD_highbit32(offset+1); + U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); @@ -315,8 +344,8 @@ static void ZSTD_updateStats(optState_t* const optPtr, optPtr->litLengthSum++; } - /* match offset code (0-2=>repCode; 3+=>offset+2) */ - { U32 const offCode = ZSTD_highbit32(offsetCode+1); + /* offset code : expected to follow storeSeq() numeric representation */ + { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); assert(offCode <= MaxOff); optPtr->offCodeFreq[offCode]++; optPtr->offCodeSum++; @@ -350,7 +379,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, +static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, U32* nextToUpdate3, const BYTE* const ip) { @@ -376,11 +405,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, * Binary Tree search ***************************************/ /* ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . + * @param ip assumed <= iend-8 . + * @param target The target of ZSTD_updateTree_internal() - we are filling to this position * @return : nb of positions added */ static U32 ZSTD_insertBt1( - ZSTD_matchState_t* ms, + const ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, + U32 const target, U32 const mls, const int extDict) { const ZSTD_compressionParameters* const cParams = &ms->cParams; @@ -403,7 +434,10 @@ static U32 ZSTD_insertBt1( U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; + /* windowLow is based on target because + * we only need positions that will be in the window at the end of the tree update. + */ + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); U32 matchEndIdx = curr+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; @@ -416,6 +450,7 @@ static U32 ZSTD_insertBt1( DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + assert(curr <= target); assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = curr; /* Update Hash Table */ @@ -504,7 +539,7 @@ void ZSTD_updateTree_internal( idx, target, dictMode); while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); assert(idx < (U32)(idx + forward)); idx += forward; } @@ -609,7 +644,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", repCode, ll0, repOffset, repLen); bestLength = repLen; - matches[mnum].off = repCode - ll0; + matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ matches[mnum].len = (U32)repLen; mnum++; if ( (repLen > sufficient_len) @@ -638,7 +673,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( bestLength = mlen; assert(curr > matchIndex3); assert(mnum==0); /* no prior solution */ - matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].off = STORE_OFFSET(curr - matchIndex3); matches[0].len = (U32)mlen; mnum = 1; if ( (mlen > sufficient_len) | @@ -647,7 +682,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( return 1; } } } /* no dictMatchState lookup: dicts don't have a populated HC3 table */ - } + } /* if (mls == 3) */ hashTable[h] = curr; /* Update Hash Table */ @@ -672,20 +707,19 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); assert(matchEndIdx > matchIndex); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ break; /* drop, to preserve bt consistency (miss a little bit of compression) */ - } - } + } } if (match[matchLength] < ip[matchLength]) { /* match smaller than current */ @@ -721,18 +755,17 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { matchIndex = dictMatchIndex + dmsIndexDelta; DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } + } } if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ if (match[matchLength] < ip[matchLength]) { @@ -742,39 +775,91 @@ U32 ZSTD_insertBtAndGetAllMatches ( /* match is larger than current */ commonLengthLarger = matchLength; dictMatchIndex = nextPtr[0]; - } - } - } + } } } /* if (dictMode == ZSTD_dictMatchState) */ assert(matchEndIdx > curr+8); ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ return mnum; } +typedef U32 (*ZSTD_getAllMatchesFn)( + ZSTD_match_t*, + ZSTD_matchState_t*, + U32*, + const BYTE*, + const BYTE*, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat); -FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( - ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, - U32 const lengthToBeat) +FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( + ZSTD_match_t* matches, + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, + const BYTE* const iHighLimit, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat, + const ZSTD_dictMode_e dictMode, + const U32 mls) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->minMatch; - DEBUGLOG(8, "ZSTD_BtGetAllMatches"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); - switch(matchLengthSearch) - { - case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); - default : - case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); - case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); + DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); + if (ip < ms->window.base + ms->nextToUpdate) + return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); + return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); +} + +#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls + +#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ + static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ + ZSTD_match_t* matches, \ + ZSTD_matchState_t* ms, \ + U32* nextToUpdate3, \ + const BYTE* ip, \ + const BYTE* const iHighLimit, \ + const U32 rep[ZSTD_REP_NUM], \ + U32 const ll0, \ + U32 const lengthToBeat) \ + { \ + return ZSTD_btGetAllMatches_internal( \ + matches, ms, nextToUpdate3, ip, iHighLimit, \ + rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ } + +#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) + +GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) + +#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ + { \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ + } + +static ZSTD_getAllMatchesFn +ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) +{ + ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { + ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) + }; + U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); + assert((U32)dictMode < 3); + assert(mls - 3 < 4); + return getAllMatchesFns[(int)dictMode][mls - 3]; } /* *********************** @@ -783,16 +868,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( /* Struct containing info needed to make decision about ldm inclusion */ typedef struct { - rawSeqStore_t seqStore; /* External match candidates store for this block */ - U32 startPosInBlock; /* Start position of the current match candidate */ - U32 endPosInBlock; /* End position of the current match candidate */ - U32 offset; /* Offset of the match candidate */ + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ } ZSTD_optLdm_t; /* ZSTD_optLdm_skipRawSeqStoreBytes(): - * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + * Moves forward in @rawSeqStore by @nbBytes, + * which will update the fields 'pos' and 'posInSequence'. */ -static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) +{ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); while (currPos && rawSeqStore->pos < rawSeqStore->size) { rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; @@ -813,8 +900,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t * Calculates the beginning and end of the next match in the current block. * Updates 'pos' and 'posInSequence' of the ldmSeqStore. */ -static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, - U32 blockBytesRemaining) { +static void +ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) +{ rawSeq currSeq; U32 currBlockEndPos; U32 literalsBytesRemaining; @@ -826,8 +915,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu optLdm->endPosInBlock = UINT_MAX; return; } - /* Calculate appropriate bytes left in matchLength and litLength after adjusting - based on ldmSeqStore->posInSequence */ + /* Calculate appropriate bytes left in matchLength and litLength + * after adjusting based on ldmSeqStore->posInSequence */ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); currBlockEndPos = currPosInBlock + blockBytesRemaining; @@ -863,15 +952,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu } /* ZSTD_optLdm_maybeAddMatch(): - * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' - * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + * Adds a match if it's long enough, + * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', + * into 'matches'. Maintains the correct ordering of 'matches'. */ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, - ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { - U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) +{ + U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ - U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; - U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; /* Ensure that current block position is not outside of the match */ if (currPosInBlock < optLdm->startPosInBlock @@ -881,6 +971,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, } if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", candidateOffCode, candidateMatchLength, currPosInBlock); matches[*nbMatches].len = candidateMatchLength; @@ -892,8 +983,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, /* ZSTD_optLdm_processMatchCandidate(): * Wrapper function to update ldm seq store and call ldm functions as necessary. */ -static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, - U32 currPosInBlock, U32 remainingBytes) { +static void +ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, + ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) +{ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { return; } @@ -904,19 +998,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_ * at the end of a match from the ldm seq store, and will often be some bytes * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" */ - U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); - } + } ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); } ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); } + /*-******************************* * Optimal parser *********************************/ - static U32 ZSTD_totalLen(ZSTD_optimal_t sol) { return sol.litlen + sol.mlen; @@ -957,6 +1051,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, const BYTE* const prefixStart = base + ms->window.dictLimit; const ZSTD_compressionParameters* const cParams = &ms->cParams; + ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; U32 nextToUpdate3 = ms->nextToUpdate; @@ -984,7 +1080,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, (U32)(ip-istart), (U32)(iend - ip)); if (!nbMatches) { ip++; continue; } @@ -998,18 +1094,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, * in every price. We include the literal length to avoid negative * prices when we subtract the previous literal length. */ - opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); + opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); /* large match -> immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; - U32 const maxOffset = matches[nbMatches-1].off; + U32 const maxOffcode = matches[nbMatches-1].off; DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", - nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); if (maxML > sufficient_len) { lastSequence.litlen = litlen; lastSequence.mlen = maxML; - lastSequence.off = maxOffset; + lastSequence.off = maxOffcode; DEBUGLOG(6, "large match (%u>%u), immediate encoding", maxML, sufficient_len); cur = 0; @@ -1018,24 +1114,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + assert(opt[0].price >= 0); + { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); U32 pos; U32 matchNb; for (pos = 1; pos < minMatch; pos++) { opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ } for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; + U32 const offcode = matches[matchNb].off; U32 const end = matches[matchNb].len; for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); U32 const sequencePrice = literalsPrice + matchPrice; DEBUGLOG(7, "rPos:%u => set initial price : %.2f", pos, ZSTD_fCost(sequencePrice)); opt[pos].mlen = pos; - opt[pos].off = offset; + opt[pos].off = offcode; opt[pos].litlen = litlen; - opt[pos].price = sequencePrice; + opt[pos].price = (int)sequencePrice; } } last_pos = pos-1; } @@ -1050,9 +1147,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* Fix current position with one literal if cheaper */ { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; int const price = opt[cur-1].price - + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", @@ -1078,7 +1175,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, assert(cur >= opt[cur].mlen); if (opt[cur].mlen != 0) { U32 const prev = cur - opt[cur].mlen; - repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); } else { ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); @@ -1095,11 +1192,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ } + assert(opt[cur].price >= 0); { U32 const ll0 = (opt[cur].mlen != 0); U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = opt[cur].price; + U32 const previousPrice = (U32)opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); U32 matchNb; ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, @@ -1137,7 +1235,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ U32 const pos = cur + mlen; - int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); if ((pos > last_pos) || (price < opt[pos].price)) { DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", @@ -1167,7 +1265,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ * update them while traversing the sequences. */ if (lastSequence.mlen != 0) { - repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); ZSTD_memcpy(rep, &reps, sizeof(reps)); } else { ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); @@ -1211,7 +1309,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); anchor += advance; ip = anchor; } } @@ -1223,38 +1321,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ return (size_t)(iend - anchor); } +static size_t ZSTD_compressBlock_opt0( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); +} + +static size_t ZSTD_compressBlock_opt2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); +} size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } -/* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) -{ - U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus >= 0); - for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); -} /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. @@ -1276,7 +1366,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ /* invalidate first scan from history */ ZSTD_resetSeqStore(seqStore); @@ -1285,8 +1375,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->window.lowLimit = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit; - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); } size_t ZSTD_compressBlock_btultra( @@ -1294,7 +1382,7 @@ size_t ZSTD_compressBlock_btultra( const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } size_t ZSTD_compressBlock_btultra2( @@ -1322,35 +1410,35 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } /* note : no btultra2 variant for extDict nor dictMatchState, diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c index 5105e59ac04a..89b269a641c7 100644 --- a/lib/zstd/decompress/huf_decompress.c +++ b/lib/zstd/decompress/huf_decompress.c @@ -22,6 +22,13 @@ #define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" #include "../common/error_private.h" +#include "../common/zstd_internal.h" + +/* ************************************************************** +* Constants +****************************************************************/ + +#define HUF_DECODER_FAST_TABLELOG 11 /* ************************************************************** * Macros @@ -36,6 +43,26 @@ #error "Cannot force the use of the X1 and X2 decoders at the same time!" #endif +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 +# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE +#else +# define HUF_ASM_X86_64_BMI2_ATTRS +#endif + +#define HUF_EXTERN_C +#define HUF_ASM_DECL HUF_EXTERN_C + +#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) +# define HUF_NEED_BMI2_FUNCTION 1 +#else +# define HUF_NEED_BMI2_FUNCTION 0 +#endif + +#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) +# define HUF_NEED_DEFAULT_FUNCTION 1 +#else +# define HUF_NEED_DEFAULT_FUNCTION 0 +#endif /* ************************************************************** * Error Management @@ -65,7 +92,7 @@ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ } \ \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ void* dst, size_t dstSize, \ const void* cSrc, size_t cSrcSize, \ const HUF_DTable* DTable) \ @@ -107,13 +134,147 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) return dtd; } +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +static size_t HUF_initDStream(BYTE const* ip) { + BYTE const lastByte = ip[7]; + size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + size_t const value = MEM_readLEST(ip) | 1; + assert(bitsConsumed <= 8); + return value << bitsConsumed; +} +typedef struct { + BYTE const* ip[4]; + BYTE* op[4]; + U64 bits[4]; + void const* dt; + BYTE const* ilimit; + BYTE* oend; + BYTE const* iend[4]; +} HUF_DecompressAsmArgs; + +/* + * Initializes args for the asm decoding loop. + * @returns 0 on success + * 1 if the fallback implementation should be used. + * Or an error code on failure. + */ +static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) +{ + void const* dt = DTable + 1; + U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; + + const BYTE* const ilimit = (const BYTE*)src + 6 + 8; + + BYTE* const oend = (BYTE*)dst + dstSize; + + /* The following condition is false on x32 platform, + * but HUF_asm is not compatible with this ABI */ + if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; + + /* strict minimum : jump table + 1 byte per stream */ + if (srcSize < 10) + return ERROR(corruption_detected); + + /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers. + * If table log is not correct at this point, fallback to the old decoder. + * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. + */ + if (dtLog != HUF_DECODER_FAST_TABLELOG) + return 1; + + /* Read the jump table. */ + { + const BYTE* const istart = (const BYTE*)src; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = srcSize - (length1 + length2 + length3 + 6); + args->iend[0] = istart + 6; /* jumpTable */ + args->iend[1] = args->iend[0] + length1; + args->iend[2] = args->iend[1] + length2; + args->iend[3] = args->iend[2] + length3; + + /* HUF_initDStream() requires this, and this small of an input + * won't benefit from the ASM loop anyways. + * length1 must be >= 16 so that ip[0] >= ilimit before the loop + * starts. + */ + if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) + return 1; + if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ + } + /* ip[] contains the position that is currently loaded into bits[]. */ + args->ip[0] = args->iend[1] - sizeof(U64); + args->ip[1] = args->iend[2] - sizeof(U64); + args->ip[2] = args->iend[3] - sizeof(U64); + args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64); + + /* op[] contains the output pointers. */ + args->op[0] = (BYTE*)dst; + args->op[1] = args->op[0] + (dstSize+3)/4; + args->op[2] = args->op[1] + (dstSize+3)/4; + args->op[3] = args->op[2] + (dstSize+3)/4; + + /* No point to call the ASM loop for tiny outputs. */ + if (args->op[3] >= oend) + return 1; + + /* bits[] is the bit container. + * It is read from the MSB down to the LSB. + * It is shifted left as it is read, and zeros are + * shifted in. After the lowest valid bit a 1 is + * set, so that CountTrailingZeros(bits[]) can be used + * to count how many bits we've consumed. + */ + args->bits[0] = HUF_initDStream(args->ip[0]); + args->bits[1] = HUF_initDStream(args->ip[1]); + args->bits[2] = HUF_initDStream(args->ip[2]); + args->bits[3] = HUF_initDStream(args->ip[3]); + + /* If ip[] >= ilimit, it is guaranteed to be safe to + * reload bits[]. It may be beyond its section, but is + * guaranteed to be valid (>= istart). + */ + args->ilimit = ilimit; + + args->oend = oend; + args->dt = dt; + + return 0; +} + +static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) +{ + /* Validate that we haven't overwritten. */ + if (args->op[stream] > segmentEnd) + return ERROR(corruption_detected); + /* Validate that we haven't read beyond iend[]. + * Note that ip[] may be < iend[] because the MSB is + * the next bit to read, and we may have consumed 100% + * of the stream, so down to iend[i] - 8 is valid. + */ + if (args->ip[stream] < args->iend[stream] - 8) + return ERROR(corruption_detected); + + /* Construct the BIT_DStream_t. */ + bit->bitContainer = MEM_readLE64(args->ip[stream]); + bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); + bit->start = (const char*)args->iend[0]; + bit->limitPtr = bit->start + sizeof(size_t); + bit->ptr = (const char*)args->ip[stream]; + + return 0; +} +#endif + #ifndef HUF_FORCE_DECOMPRESS_X2 /*-***************************/ /* single-symbol decoding */ /*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ +typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */ /* * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at @@ -122,14 +283,44 @@ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decodi static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { U64 D4; if (MEM_isLittleEndian()) { - D4 = symbol + (nbBits << 8); - } else { D4 = (symbol << 8) + nbBits; + } else { + D4 = symbol + (nbBits << 8); } D4 *= 0x0001000100010001ULL; return D4; } +/* + * Increase the tableLog to targetTableLog and rescales the stats. + * If tableLog > targetTableLog this is a no-op. + * @returns New tableLog + */ +static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) +{ + if (tableLog > targetTableLog) + return tableLog; + if (tableLog < targetTableLog) { + U32 const scale = targetTableLog - tableLog; + U32 s; + /* Increase the weight for all non-zero probability symbols by scale. */ + for (s = 0; s < nbSymbols; ++s) { + huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale); + } + /* Update rankVal to reflect the new weights. + * All weights except 0 get moved to weight + scale. + * Weights [1, scale] are empty. + */ + for (s = targetTableLog; s > scale; --s) { + rankVal[s] = rankVal[s - scale]; + } + for (s = scale; s > 0; --s) { + rankVal[s] = 0; + } + } + return targetTableLog; +} + typedef struct { U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; @@ -162,8 +353,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); if (HUF_isError(iSize)) return iSize; + /* Table header */ { DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog + 1; + U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); + tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; @@ -207,7 +402,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr /* fill DTable * We fill all entries of each weight in order. - * That way length is a constant for each iteration of the outter loop. + * That way length is a constant for each iteration of the outer loop. * We can switch based on the length to a different inner loop which is * optimized for that particular case. */ @@ -304,11 +499,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons BYTE* const pStart = p; /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + if ((pEnd - p) > 3) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + } else { + BIT_reloadDStream(bitDPtr); } /* [0-3] symbols remaining */ @@ -388,33 +587,36 @@ HUF_decompress4X1_usingDTable_internal_body( U32 endSignal = 1; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } } /* check corruption */ @@ -440,6 +642,79 @@ HUF_decompress4X1_usingDTable_internal_body( } } +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if HUF_NEED_DEFAULT_FUNCTION +static +size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; + +static HUF_ASM_X86_64_BMI2_ATTRS +size_t +HUF_decompress4X1_usingDTable_internal_bmi2_asm( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + void const* dt = DTable + 1; + const BYTE* const iend = (const BYTE*)cSrc + 6; + BYTE* const oend = (BYTE*)dst + dstSize; + HUF_DecompressAsmArgs args; + { + size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init asm args"); + if (ret != 0) + return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + } + + assert(args.ip[0] >= args.ilimit); + HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); + + /* Our loop guarantees that ip[] >= ilimit and that we haven't + * overwritten any op[]. + */ + assert(args.ip[0] >= iend); + assert(args.ip[1] >= iend); + assert(args.ip[2] >= iend); + assert(args.ip[3] >= iend); + assert(args.op[3] <= oend); + (void)iend; + + /* finish bit streams one by one. */ + { + size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + /* Decompress and validate that we've produced exactly the expected length. */ + args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) return ERROR(corruption_detected); + } + } + + /* decoded size */ + return dstSize; +} +#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const void *cSrc, @@ -447,8 +722,28 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const HUF_DTable *DTable); HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) +static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +# else + return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); +# endif + } +#else + (void)bmi2; +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +#else + return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); +#endif +} size_t HUF_decompress1X1_usingDTable( @@ -518,106 +813,226 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* *************************/ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef struct { BYTE symbol; } sortedSymbol_t; typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; +/* + * Constructs a HUF_DEltX2 in a U32. + */ +static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) +{ + U32 seq; + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3); + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32)); + if (MEM_isLittleEndian()) { + seq = level == 1 ? symbol : (baseSeq + (symbol << 8)); + return seq + (nbBits << 16) + ((U32)level << 24); + } else { + seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol); + return (seq << 16) + (nbBits << 8) + (U32)level; + } +} + +/* + * Constructs a HUF_DEltX2. + */ +static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) +{ + HUF_DEltX2 DElt; + U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val)); + ZSTD_memcpy(&DElt, &val, sizeof(val)); + return DElt; +} + +/* + * Constructs 2 HUF_DEltX2s and packs them into a U64. + */ +static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) +{ + U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + return (U64)DElt + ((U64)DElt << 32); +} + +/* + * Fills the DTable rank with all the symbols from [begin, end) that are each + * nbBits long. + * + * @param DTableRank The start of the rank in the DTable. + * @param begin The first symbol to fill (inclusive). + * @param end The last symbol to fill (exclusive). + * @param nbBits Each symbol is nbBits long. + * @param tableLog The table log. + * @param baseSeq If level == 1 { 0 } else { the first level symbol } + * @param level The level in the table. Must be 1 or 2. + */ +static void HUF_fillDTableX2ForWeight( + HUF_DEltX2* DTableRank, + sortedSymbol_t const* begin, sortedSymbol_t const* end, + U32 nbBits, U32 tableLog, + U16 baseSeq, int const level) +{ + U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */); + const sortedSymbol_t* ptr; + assert(level >= 1 && level <= 2); + switch (length) { + case 1: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + *DTableRank++ = DElt; + } + break; + case 2: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + DTableRank[0] = DElt; + DTableRank[1] = DElt; + DTableRank += 2; + } + break; + case 4: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + DTableRank += 4; + } + break; + case 8: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + DTableRank += 8; + } + break; + default: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + HUF_DEltX2* const DTableRankEnd = DTableRank + length; + for (; DTableRank != DTableRankEnd; DTableRank += 8) { + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + } + } + break; + } +} /* HUF_fillDTableX2Level2() : * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits, + const U32* rankVal, const int minWeight, const int maxWeight1, + const sortedSymbol_t* sortedSymbols, U32 const* rankStart, + U32 nbBitsBaseline, U16 baseSeq) { - HUF_DEltX2 DElt; - U32* rankVal = wksp; - - assert(wkspSize >= HUF_TABLELOG_MAX + 1); - (void)wkspSize; - /* get pre-calculated rankVal */ - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); - - /* fill skipped values */ + /* Fill skipped values (all positions up to rankVal[minWeight]). + * These are positions only get a single symbol because the combined weight + * is too large. + */ if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; + U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */); + U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1); + int const skipSize = rankVal[minWeight]; + assert(length > 1); + assert((U32)skipSize < length); + switch (length) { + case 2: + assert(skipSize == 1); + ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2)); + break; + case 4: + assert(skipSize <= 4); + ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2)); + break; + default: + { + int i; + for (i = 0; i < skipSize; i += 8) { + ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2)); + } + } + } } - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ - - rankVal[weight] += length; - } } + /* Fill each of the second level symbols by weight. */ + { + int w; + for (w = minWeight; w < maxWeight1; ++w) { + int const begin = rankStart[w]; + int const end = rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; + U32 const totalBits = nbBits + consumedBits; + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedSymbols + begin, sortedSymbols + end, + totalBits, targetLog, + baseSeq, /* level */ 2); + } + } } - static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, + const sortedSymbol_t* sortedList, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) + const U32 nbBitsBaseline) { - U32* rankVal = wksp; + U32* const rankVal = rankValOrigin[0]; const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; + int w; + int const wEnd = (int)maxWeight + 1; - assert(wkspSize >= HUF_TABLELOG_MAX + 1); - wksp += HUF_TABLELOG_MAX + 1; - wkspSize -= HUF_TABLELOG_MAX + 1; + /* Fill DTable in order of weight. */ + for (w = 1; w < wEnd; ++w) { + int const begin = (int)rankStart[w]; + int const end = (int)rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); - - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; + if (targetLog-nbBits >= minBits) { + /* Enough room for a second symbol. */ + int start = rankVal[w]; + U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */); int minWeight = nbBits + scaleLog; + int s; if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol, wksp, wkspSize); + /* Fill the DTable for every symbol of weight w. + * These symbols get at least 1 second symbol. + */ + for (s = begin; s != end; ++s) { + HUF_fillDTableX2Level2( + DTable + start, targetLog, nbBits, + rankValOrigin[nbBits], minWeight, wEnd, + sortedList, rankStart, + nbBitsBaseline, sortedList[s].symbol); + start += length; + } } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; + /* Only a single symbol. */ + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedList + begin, sortedList + end, + nbBits, targetLog, + /* baseSeq */ 0, /* level */ 1); + } } } typedef struct { rankValCol_t rankVal[HUF_TABLELOG_MAX]; U32 rankStats[HUF_TABLELOG_MAX + 1]; - U32 rankStart0[HUF_TABLELOG_MAX + 2]; + U32 rankStart0[HUF_TABLELOG_MAX + 3]; sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; @@ -627,9 +1042,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { - U32 tableLog, maxW, sizeOfSort, nbSymbols; + return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog, maxW, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; + U32 maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; @@ -647,11 +1069,12 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); if (HUF_isError(iSize)) return iSize; /* check result */ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG; /* find maxWeight */ for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ @@ -664,7 +1087,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, rankStart[w] = curr; } rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ - sizeOfSort = nextRankStart; + rankStart[maxW+1] = nextRankStart; } /* sort symbols by weight */ @@ -673,7 +1096,6 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, U32 const w = wksp->weightList[s]; U32 const r = rankStart[w]++; wksp->sortedSymbol[r].symbol = (BYTE)s; - wksp->sortedSymbol[r].weight = (BYTE)w; } rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ } @@ -698,10 +1120,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, } } } } HUF_fillDTableX2(dt, maxTableLog, - wksp->sortedSymbol, sizeOfSort, + wksp->sortedSymbol, wksp->rankStart0, wksp->rankVal, maxW, - tableLog+1, - wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + tableLog+1); dtd.tableLog = (BYTE)maxTableLog; dtd.tableType = 1; @@ -714,7 +1135,7 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - ZSTD_memcpy(op, dt+val, 2); + ZSTD_memcpy(op, &dt[val].sequence, 2); BIT_skipBits(DStream, dt[val].nbBits); return dt[val].length; } @@ -723,15 +1144,17 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - ZSTD_memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { + ZSTD_memcpy(op, &dt[val].sequence, 1); + if (dt[val].length==1) { + BIT_skipBits(DStream, dt[val].nbBits); + } else { if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { BIT_skipBits(DStream, dt[val].nbBits); if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } + } + } return 1; } @@ -753,19 +1176,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, BYTE* const pStart = p; /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) { + if (dtLog <= 11 && MEM_64bits()) { + /* up to 10 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) { + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } else { + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } + } else { + BIT_reloadDStream(bitDPtr); } /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + if ((size_t)(pEnd - p) >= 2) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + } if (p < pEnd) p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); @@ -799,7 +1240,6 @@ HUF_decompress1X2_usingDTable_internal_body( /* decoded size */ return dstSize; } - FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -841,57 +1281,60 @@ HUF_decompress4X2_usingDTable_internal_body( U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit); ) { #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; #else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = (U32)LIKELY((U32) - (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY((U32) + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); #endif + } } /* check corruption */ @@ -915,8 +1358,99 @@ HUF_decompress4X2_usingDTable_internal_body( } } +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if HUF_NEED_DEFAULT_FUNCTION +static +size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; + +static HUF_ASM_X86_64_BMI2_ATTRS size_t +HUF_decompress4X2_usingDTable_internal_bmi2_asm( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { + void const* dt = DTable + 1; + const BYTE* const iend = (const BYTE*)cSrc + 6; + BYTE* const oend = (BYTE*)dst + dstSize; + HUF_DecompressAsmArgs args; + { + size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init asm args"); + if (ret != 0) + return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + } + + assert(args.ip[0] >= args.ilimit); + HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); + + /* note : op4 already verified within main loop */ + assert(args.ip[0] >= iend); + assert(args.ip[1] >= iend); + assert(args.ip[2] >= iend); + assert(args.ip[3] >= iend); + assert(args.op[3] <= oend); + (void)iend; + + /* finish bitStreams one by one */ + { + size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) + return ERROR(corruption_detected); + } + } + + /* decoded size */ + return dstSize; +} +#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ + +static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +# else + return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); +# endif + } +#else + (void)bmi2; +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +#else + return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); +#endif +} + HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) size_t HUF_decompress1X2_usingDTable( void* dst, size_t dstSize, @@ -1025,25 +1559,25 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = { /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ + {{0,0}, {1,1}}, /* Q==0 : impossible */ + {{0,0}, {1,1}}, /* Q==1 : impossible */ + {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */ + {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */ + {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */ + {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */ + {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */ + {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */ + {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */ + {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */ + {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */ + {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */ + {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */ + {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */ + {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */ + {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */ }; #endif @@ -1070,7 +1604,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) U32 const D256 = (U32)(dstSize >> 8); U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ return DTime1 < DTime0; } #endif diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c index 6928e85f9d19..b9b935a9f5c0 100644 --- a/lib/zstd/decompress/zstd_decompress.c +++ b/lib/zstd/decompress/zstd_decompress.c @@ -53,7 +53,6 @@ * Dependencies *********************************************************/ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ -#include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" @@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->inBuffSize = 0; dctx->outBuffSize = 0; dctx->streamStage = zdss_init; - dctx->legacyContext = NULL; - dctx->previousLegacyVersion = 0; dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; - dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); +#if DYNAMIC_BMI2 + dctx->bmi2 = ZSTD_cpuSupportsBmi2(); +#endif dctx->ddictSet = NULL; ZSTD_DCtx_resetParameters(dctx); #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION @@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) return dctx; } -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ +static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); @@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) } } +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_internal(customMem); +} + ZSTD_DCtx* ZSTD_createDCtx(void) { DEBUGLOG(3, "ZSTD_createDCtx"); - return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); } static void ZSTD_clearDict(ZSTD_DCtx* dctx) @@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) return 0; } +/*! ZSTD_isSkippableFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + */ +unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } + return 0; +} + /* ZSTD_frameHeaderSize_internal() : * srcSize must be large enough to reach header size fields. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. @@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); } - /* ZSTD_getFrameContentSize() : * compatible with legacy mode * @return : decompressed size of the single frame pointed to be `src` if known, otherwise @@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) } } +/*! ZSTD_readSkippableFrame() : + * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested + * in the magicVariant. + * + * Returns an error if destination buffer is not large enough, or if the frame is not skippable. + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, + const void* src, size_t srcSize) +{ + U32 const magicNumber = MEM_readLE32(src); + size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); + size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; + + /* check input validity */ + RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); + RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); + + /* deliver payload */ + if (skippableContentSize > 0 && dst != NULL) + ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); + if (magicVariant != NULL) + *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; + return skippableContentSize; +} + /* ZSTD_findDecompressedSize() : * compatible with legacy mode * `srcSize` must be the exact length of some number of ZSTD compressed and/or @@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming); break; case bt_raw : decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); @@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr { #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem); RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); @@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t return dctx->expected; if (dctx->bType != bt_raw) return dctx->expected; - return MIN(MAX(inputSize, 1), dctx->expected); + return BOUNDED(1, inputSize, dctx->expected); } ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { @@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : @@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, ZSTD_DStream* ZSTD_createDStream(void) { DEBUGLOG(3, "ZSTD_createDStream"); - return ZSTD_createDStream_advanced(ZSTD_defaultCMem); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); } ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) @@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) { - return ZSTD_createDCtx_advanced(customMem); + return ZSTD_createDCtx_internal(customMem); } size_t ZSTD_freeDStream(ZSTD_DStream* zds) @@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) { size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ + unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, @@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(5, "stage zdss_init => transparent reset "); zds->streamStage = zdss_loadHeader; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; zds->hostageByte = 0; zds->expectedOutBuffer = *output; ZSTD_FALLTHROUGH; diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c index 2d101d9a842e..c1913b8e7c89 100644 --- a/lib/zstd/decompress/zstd_decompress_block.c +++ b/lib/zstd/decompress/zstd_decompress_block.c @@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, } } +/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */ +static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, + const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) +{ + if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) + { + /* room for litbuffer to fit without read faulting */ + dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_in_dst; + } + else if (litSize > ZSTD_LITBUFFEREXTRASIZE) + { + /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + if (splitImmediately) { + /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; + } + else { + /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; + dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; + } + dctx->litBufferLocation = ZSTD_split; + } + else + { + /* fits entirely within litExtraBuffer, so no split is necessary */ + dctx->litBuffer = dctx->litExtraBuffer; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + } +} /* Hidden declaration for fullbench */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize); + const void* src, size_t srcSize, + void* dst, size_t dstCapacity, const streaming_operation streaming); /*! ZSTD_decodeLiteralsBlock() : + * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored + * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current + * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being + * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write. + * * @return : nb of bytes read from src (< srcSize ) * note : symbol not declared but exposed for fullbench */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ + const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ + void* dst, size_t dstCapacity, const streaming_operation streaming) { DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); @@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); size_t hufSuccess; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (singleStream) { hufSuccess = HUF_decompress1X_usingDTable_bmi2( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); + dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); } else { hufSuccess = HUF_decompress4X_usingDTable_bmi2( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); + dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); } } else { if (singleStream) { @@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); + sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); #endif } else { hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); + sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); } } + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); + dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; + } RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); @@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litSize = litSize; dctx->litEntropy = 1; if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return litCSize + lhSize; } case set_basic: { size_t litSize, lhSize; U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize); + } dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; - ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return lhSize+litSize; } /* direct reference into compressed stream */ dctx->litPtr = istart+lhSize; dctx->litSize = litSize; + dctx->litBufferEnd = dctx->litPtr + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; return lhSize+litSize; } case set_rle: { U32 const lhlCode = ((istart[0]) >> 2) & 3; size_t litSize, lhSize; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize); + } dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; @@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<nbBits = 0; cell->nextState = 0; assert(nbAddBits < 255); - cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->nbAdditionalBits = nbAddBits; cell->baseValue = baseValue; } @@ -367,7 +443,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB FORCE_INLINE_TEMPLATE void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; @@ -478,7 +554,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); assert(nbAdditionalBits[symbol] < 255); - tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol]; + tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; tableDecode[u].baseValue = baseValue[symbol]; } } @@ -487,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, /* Avoids the FORCE_INLINE of the _body() function. */ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, @@ -495,9 +571,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, +BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, @@ -507,7 +583,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize, int bmi2) { #if DYNAMIC_BMI2 @@ -529,7 +605,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, symbolEncodingType_e type, unsigned max, U32 maxLog, const void* src, size_t srcSize, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, int bmi2) @@ -541,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; - U32 const nbBits = nbAdditionalBits[symbol]; + U8 const nbBits = nbAdditionalBits[symbol]; ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); } *DTablePtr = DTableSpace; @@ -620,7 +696,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -632,7 +708,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -644,7 +720,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } @@ -658,7 +734,6 @@ typedef struct { size_t litLength; size_t matchLength; size_t offset; - const BYTE* match; } seq_t; typedef struct { @@ -672,9 +747,6 @@ typedef struct { ZSTD_fseState stateOffb; ZSTD_fseState stateML; size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; } seqState_t; /*! ZSTD_overlapCopy8() : @@ -717,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. * The src buffer must be before the dst buffer. */ -static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { +static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { ptrdiff_t const diff = op - ip; BYTE* const oend = op + length; @@ -733,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ assert(length >= 8); ZSTD_overlapCopy8(&op, &ip, diff); + length -= 8; assert(op - ip >= 8); assert(op <= oend); } @@ -747,12 +820,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ assert(oend > oend_w); ZSTD_wildcopy(op, ip, oend_w - op, ovtype); ip += oend_w - op; - op = oend_w; + op += oend_w - op; } /* Handle the leftovers. */ while (op < oend) *op++ = *ip++; } +/* ZSTD_safecopyDstBeforeSrc(): + * This version allows overlap with dst before src, or handles the non-overlap case with dst after src + * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + if (length < 8 || diff > -8) { + /* Handle short lengths, close overlaps, and dst not before src. */ + while (op < oend) *op++ = *ip++; + return; + } + + if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) { + ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap); + ip += oend - WILDCOPY_OVERLENGTH - op; + op += oend - WILDCOPY_OVERLENGTH - op; + } + + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + /* ZSTD_execSequenceEnd(): * This version handles cases that are near the end of the output buffer. It requires * more careful checks to make sure there is no overflow. By separating out these hard @@ -763,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ */ FORCE_NOINLINE size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -788,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op, if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); - match = dictEnd - (prefixStart-match); + match = dictEnd - (prefixStart - match); if (match + sequence.matchLength <= dictEnd) { ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - ZSTD_memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +/* ZSTD_execSequenceEndSplitLitBuffer(): + * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer"); + ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart - match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); return sequenceLength; } HINT_INLINE size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -817,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op, const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + assert(op != NULL /* Precondition */); assert(oend_w < oend /* No underflow */); /* Handle edge cases in a slow path: @@ -828,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op, iLitEnd > litLimit || oMatchEnd > oend_w || (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) - return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ assert(op <= oLitEnd /* No overflow */); @@ -896,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op, return sequenceLength; } + static void ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) { @@ -909,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS } FORCE_INLINE_TEMPLATE void -ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits) { - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) -{ - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; + DStatePtr->state = nextState + lowBits; } /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum @@ -936,116 +1164,105 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { seq_t seq; - ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; - ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; - ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; - U32 const llBase = llDInfo.baseValue; - U32 const mlBase = mlDInfo.baseValue; - U32 const ofBase = ofDInfo.baseValue; - BYTE const llBits = llDInfo.nbAdditionalBits; - BYTE const mlBits = mlDInfo.nbAdditionalBits; - BYTE const ofBits = ofDInfo.nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; + const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; + const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; + const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; + seq.matchLength = mlDInfo->baseValue; + seq.litLength = llDInfo->baseValue; + { U32 const ofBase = ofDInfo->baseValue; + BYTE const llBits = llDInfo->nbAdditionalBits; + BYTE const mlBits = mlDInfo->nbAdditionalBits; + BYTE const ofBits = ofDInfo->nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llBase == 0); - if (LIKELY((ofBits == 0))) { - if (LIKELY(!ll0)) - offset = seqState->prevOffset[0]; - else { - offset = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; + U16 const llNext = llDInfo->nextState; + U16 const mlNext = mlDInfo->nextState; + U16 const ofNext = ofDInfo->nextState; + U32 const llnbBits = llDInfo->nbBits; + U32 const mlnbBits = mlDInfo->nbBits; + U32 const ofnbBits = ofDInfo->nbBits; + /* + * As gcc has better branch and block analyzers, sometimes it is only + * valuable to mark likelyness for clang, it gives around 3-4% of + * performance. + */ + + /* sequence */ + { size_t offset; + #if defined(__clang__) + if (LIKELY(ofBits > 1)) { + #else + if (ofBits > 1) { + #endif + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } - - seq.matchLength = mlBase; - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase; - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update - * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). - * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). - * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the - * better option, so it is the default for other compilers. But, if you - * measure that it is worse, please put up a pull request. - */ - { -#if !defined(__clang__) - const int kUseUpdateFseState = 1; -#else - const int kUseUpdateFseState = 0; -#endif - if (kUseUpdateFseState) { - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - } else { - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + U32 const ll0 = (llDInfo->baseValue == 0); + if (LIKELY((ofBits == 0))) { + offset = seqState->prevOffset[ll0]; + seqState->prevOffset[1] = seqState->prevOffset[!ll0]; + seqState->prevOffset[0] = offset; + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; } + + #if defined(__clang__) + if (UNLIKELY(mlBits > 0)) + #else + if (mlBits > 0) + #endif + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + #if defined(__clang__) + if (UNLIKELY(llBits > 0)) + #else + if (llBits > 0) + #endif + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ } return seq; @@ -1098,9 +1315,11 @@ MEM_STATIC void ZSTD_assertValidSequence( #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + + FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE -ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, +ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset, @@ -1112,17 +1331,16 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_body"); + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); (void)frame; /* Regen sequences */ if (nbSeq) { seqState_t seqState; - size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } RETURN_ERROR_IF( @@ -1138,70 +1356,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BIT_DStream_endOfBuffer < BIT_DStream_completed && BIT_DStream_completed < BIT_DStream_overflow); + /* decompress without overrunning litPtr begins */ + { + seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * This issue has been reproduced on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * Alignment is done for each of the three major decompression loops: + * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer + * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer + * - ZSTD_decompressSequences_body + * Alignment choices are made to minimize large swings on bad cases and influence on performance + * from changes external to this code, rather than to overoptimize on the current commit. + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ #if defined(__x86_64__) - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression - * speed swings based on the alignment of the decompression loop. This - * performance swing is caused by parts of the decompression loop falling - * out of the DSB. The entire decompression loop should fit in the DSB, - * when it can't we get much worse performance. You can measure if you've - * hit the good case or the bad case with this perf command for some - * compressed file test.zst: - * - * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ - * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst - * - * If you see most cycles served out of the MITE you've hit the bad case. - * If you see most cycles served out of the DSB you've hit the good case. - * If it is pretty even then you may be in an okay case. - * - * I've been able to reproduce this issue on the following CPUs: - * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 - * Use Instruments->Counters to get DSB/MITE cycles. - * I never got performance swings, but I was able to - * go from the good case of mostly DSB to half of the - * cycles served from MITE. - * - Coffeelake: Intel i9-9900k - * - * I haven't been able to reproduce the instability or DSB misses on any - * of the following CPUS: - * - Haswell - * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH - * - Skylake - * - * If you are seeing performance stability this script can help test. - * It tests on 4 commits in zstd where I saw performance change. - * - * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 - */ - __asm__(".p2align 5"); - __asm__("nop"); - __asm__(".p2align 4"); + __asm__(".p2align 6"); +# if __GNUC__ >= 7 + /* good for gcc-7, gcc-9, and gcc-11 */ + __asm__("nop"); + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +# if __GNUC__ == 8 || __GNUC__ == 10 + /* good for gcc-8 and gcc-10 */ + __asm__("nop"); + __asm__(".p2align 3"); +# endif +# endif #endif + + /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ + for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { + size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; + BIT_reloadDStream(&(seqState.DStream)); + sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + } + + /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ + if (nbSeq > 0) { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence.litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (--nbSeq) + BIT_reloadDStream(&(seqState.DStream)); + } + } + } + + if (nbSeq > 0) /* there is remaining lit from extra buffer */ + { + +#if defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ != 7 + /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */ + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# elif __GNUC__ >= 11 + __asm__(".p2align 3"); +# else + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif +#endif + + for (; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; + BIT_reloadDStream(&(seqState.DStream)); + } + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ + { + size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + } + { size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); + const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ >= 7 + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# else + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif +#endif + for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); op += oneSeqSize; - /* gcc and clang both don't like early returns in this loop. - * Instead break and check for an error at the end of the loop. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) { - error = oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; - } - if (UNLIKELY(!--nbSeq)) break; + BIT_reloadDStream(&(seqState.DStream)); } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; RETURN_ERROR_IF(nbSeq, corruption_detected, ""); RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ @@ -1229,9 +1632,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } + +static size_t +ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE size_t +ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, @@ -1243,10 +1674,10 @@ ZSTD_decompressSequencesLong_body( const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + maxDstSize; + BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); @@ -1254,18 +1685,17 @@ ZSTD_decompressSequencesLong_body( /* Regen sequences */ if (nbSeq) { -#define STORED_SEQS 4 +#define STORED_SEQS 8 #define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 +#define ADVANCED_SEQS STORED_SEQS seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState; int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + dctx->fseEntropy = 1; { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( @@ -1277,36 +1707,100 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) + { + /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ - sequences[seqNb & STORED_SEQS_MASK] = sequence; - op += oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } + else + { + /* lit buffer is either wholly contained in first or second split, or not split at all*/ + oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } } RETURN_ERROR_IF(seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) + { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence->litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { + size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + } + else + { + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } /* save reps for next block */ @@ -1314,10 +1808,21 @@ ZSTD_decompressSequencesLong_body( } /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; + if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ + { + size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + } + { size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); + ZSTD_memmove(op, litPtr, lastLLSize); op += lastLLSize; } } @@ -1341,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #if DYNAMIC_BMI2 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1351,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } +static BMI2_TARGET_ATTRIBUTE size_t +DONT_VECTORIZE +ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, @@ -1383,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 - if (dctx->bmi2) { + if (ZSTD_DCtx_get_bmi2(dctx)) { return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +static size_t +ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); +#if DYNAMIC_BMI2 + if (ZSTD_DCtx_get_bmi2(dctx)) { + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1407,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 - if (dctx->bmi2) { + if (ZSTD_DCtx_get_bmi2(dctx)) { return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif @@ -1448,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame) + const void* src, size_t srcSize, const int frame, const streaming_operation streaming) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; /* isLongOffset must be true if there are long offsets. @@ -1463,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); if (ZSTD_isError(litCSize)) return litCSize; ip += litCSize; @@ -1511,7 +2040,10 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + if (dctx->litBufferLocation == ZSTD_split) + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + else + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } @@ -1534,7 +2066,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, { size_t dSize; ZSTD_checkContinuity(dctx, dst, dstCapacity); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); dctx->previousDstEnd = (char*)dst + dSize; return dSize; } diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h index e7f5f6689459..3d2d57a5d25a 100644 --- a/lib/zstd/decompress/zstd_decompress_block.h +++ b/lib/zstd/decompress/zstd_decompress_block.h @@ -33,6 +33,12 @@ */ + /* Streaming state is used to inform allocation of the literal buffer */ +typedef enum { + not_streaming = 0, + is_streaming = 1 +} streaming_operation; + /* ZSTD_decompressBlock_internal() : * decompress block, starting at `src`, * into destination buffer `dst`. @@ -41,7 +47,7 @@ */ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame); + const void* src, size_t srcSize, const int frame, const streaming_operation streaming); /* ZSTD_buildFSETable() : * generate FSE decoding table for one symbol (ll, ml or off) @@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, */ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize, int bmi2); diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h index 4b9052f68755..98102edb6a83 100644 --- a/lib/zstd/decompress/zstd_decompress_internal.h +++ b/lib/zstd/decompress/zstd_decompress_internal.h @@ -20,7 +20,7 @@ * Dependencies *********************************************************/ #include "../common/mem.h" /* BYTE, U16, U32 */ -#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ +#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ @@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { +static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -106,6 +106,22 @@ typedef struct { size_t ddictPtrCount; } ZSTD_DDictHashSet; +#ifndef ZSTD_DECODER_INTERNAL_BUFFER +# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) +#endif + +#define ZSTD_LBMIN 64 +#define ZSTD_LBMAX (128 << 10) + +/* extra buffer, compensates when dst is not large enough to store litBuffer */ +#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) + +typedef enum { + ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ + ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ + ZSTD_split = 2 /* Split between litExtraBuffer and dst */ +} ZSTD_litLocation_e; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -136,7 +152,9 @@ struct ZSTD_DCtx_s size_t litSize; size_t rleSize; size_t staticSize; +#if DYNAMIC_BMI2 != 0 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ +#endif /* dictionary */ ZSTD_DDict* ddictLocal; @@ -158,16 +176,16 @@ struct ZSTD_DCtx_s size_t outStart; size_t outEnd; size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; U32 hostageByte; int noForwardProgress; ZSTD_bufferMode_e outBufferMode; ZSTD_outBuffer expectedOutBuffer; /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE* litBuffer; + const BYTE* litBufferEnd; + ZSTD_litLocation_e litBufferLocation; + BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; size_t oversizedDuration; @@ -180,6 +198,14 @@ struct ZSTD_DCtx_s /* Tracing */ }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ +MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { +#if DYNAMIC_BMI2 != 0 + return dctx->bmi2; +#else + (void)dctx; + return 0; +#endif +} /*-******************************************************* * Shared internal functions diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h index 0fbec508f285..a06ca187aab5 100644 --- a/lib/zstd/decompress_sources.h +++ b/lib/zstd/decompress_sources.h @@ -16,6 +16,12 @@ * decompression. */ +/* + * Disable the ASM Huffman implementation because we need to + * include all the sources. + */ +#define ZSTD_DISABLE_ASM 1 + #include "common/debug.c" #include "common/entropy_common.c" #include "common/error_private.c" diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c index 65548a4bb934..04e1b5c01d9b 100644 --- a/lib/zstd/zstd_compress_module.c +++ b/lib/zstd/zstd_compress_module.c @@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream); size_t zstd_reset_cstream(zstd_cstream *cstream, unsigned long long pledged_src_size) { - return ZSTD_resetCStream(cstream, pledged_src_size); + if (pledged_src_size == 0) + pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; + ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) ); + ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) ); + return 0; } EXPORT_SYMBOL(zstd_reset_cstream); From a1ccd3d911382f68753033c6adcf69663c2a9fc5 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 19 Oct 2022 15:41:25 -0500 Subject: [PATCH 0389/4122] PCI/portdrv: Squash into portdrv.c Squash portdrv_core.c and portdrv_pci.c into portdrv.c to make it easier to find things. The whole thing is less than 1000 lines, and it's a pain to bounce back and forth between two files. Several portdrv_core.c functions were non-static because they were referenced from portdrv_pci.c. Make them static since they're now all in portdrv.c. No functional change intended. Link: https://lore.kernel.org/r/20221019204127.44463-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/pci/pcie/Makefile | 2 +- .../pci/pcie/{portdrv_core.c => portdrv.c} | 252 +++++++++++++++++- drivers/pci/pcie/portdrv.h | 10 - drivers/pci/pcie/portdrv_pci.c | 252 ------------------ 4 files changed, 244 insertions(+), 272 deletions(-) rename drivers/pci/pcie/{portdrv_core.c => portdrv.c} (70%) delete mode 100644 drivers/pci/pcie/portdrv_pci.c diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 5783a2f79e6a..8de4ed5f98f1 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o rcec.o +pcieportdrv-y := portdrv.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv.c similarity index 70% rename from drivers/pci/pcie/portdrv_core.c rename to drivers/pci/pcie/portdrv.c index 1ac7fec47d6f..0b4a1f9c2a6b 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Purpose: PCI Express Port Bus Driver's Core Functions + * Purpose: PCI Express Port Bus Driver * * Copyright (C) 2004 Intel * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) */ +#include +#include #include #include #include @@ -308,7 +310,7 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq) * Allocate the port extension structure and register services associated with * the port. */ -int pcie_port_device_register(struct pci_dev *dev) +static int pcie_port_device_register(struct pci_dev *dev) { int status, capabilities, i, nr_service; int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; @@ -362,7 +364,7 @@ error_disable: typedef int (*pcie_callback_t)(struct pcie_device *); -int pcie_port_device_iter(struct device *dev, void *data) +static int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; @@ -382,13 +384,13 @@ int pcie_port_device_iter(struct device *dev, void *data) * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_suspend(struct device *dev) +static int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); } -int pcie_port_device_resume_noirq(struct device *dev) +static int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -398,7 +400,7 @@ int pcie_port_device_resume_noirq(struct device *dev) * pcie_port_device_resume - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct device *dev) +static int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -408,7 +410,7 @@ int pcie_port_device_resume(struct device *dev) * pcie_port_device_runtime_suspend - runtime suspend port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_suspend(struct device *dev) +static int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -418,7 +420,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) * pcie_port_device_runtime_resume - runtime resume port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_resume(struct device *dev) +static int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -482,7 +484,7 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device); * Remove PCI Express port service devices associated with given port and * disable MSI-X or MSI for the port. */ -void pcie_port_device_remove(struct pci_dev *dev) +static void pcie_port_device_remove(struct pci_dev *dev) { device_for_each_child(&dev->dev, NULL, remove_iter); pci_free_irq_vectors(dev); @@ -584,3 +586,235 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv) driver_unregister(&drv->driver); } EXPORT_SYMBOL(pcie_port_service_unregister); + +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If the user specified "pcie_ports=native", use the PCIe services regardless + * of whether the platform has given us permission. On ACPI systems, this + * means we ignore _OSC. + */ +bool pcie_ports_native; + +/* + * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe + * service even if the platform hasn't given us permission. + */ +bool pcie_ports_dpc_native; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) + pcie_ports_disabled = true; + else if (!strncmp(str, "native", 6)) + pcie_ports_native = true; + else if (!strncmp(str, "dpc-native", 10)) + pcie_ports_dpc_native = true; + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + +/* global data */ + +#ifdef CONFIG_PM +static int pcie_port_runtime_suspend(struct device *dev) +{ + if (!to_pci_dev(dev)->bridge_d3) + return -EBUSY; + + return pcie_port_device_runtime_suspend(dev); +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static const struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume_noirq = pcie_port_device_resume_noirq, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore_noirq = pcie_port_device_resume_noirq, + .restore = pcie_port_device_resume, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_device_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, +}; + +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) + +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ + +/* + * pcie_portdrv_probe - Probe PCI-Express port devices + * @dev: PCI-Express port device being probed + * + * If detected invokes the pcie_port_device_register() method for + * this port device. + * + */ +static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int type = pci_pcie_type(dev); + int status; + + if (!pci_is_pcie(dev) || + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + + status = pcie_port_device_register(dev); + if (status) + return status; + + pci_save_state(dev); + + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | + DPM_FLAG_SMART_SUSPEND); + + if (pci_bridge_d3_possible(dev)) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + + return 0; +} + +static void pcie_portdrv_remove(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); +} + +static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + if (error == pci_channel_io_frozen) + return PCI_ERS_RESULT_NEED_RESET; + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + + pci_restore_state(dev); + pci_save_state(dev); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +/* + * LINUX Device Driver Model + */ +static const struct pci_device_id port_pci_ids[] = { + /* handle any PCI-Express port */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, + /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, + { }, +}; + +static const struct pci_error_handlers pcie_portdrv_err_handler = { + .error_detected = pcie_portdrv_error_detected, + .slot_reset = pcie_portdrv_slot_reset, + .mmio_enabled = pcie_portdrv_mmio_enabled, +}; + +static struct pci_driver pcie_portdriver = { + .name = "pcieport", + .id_table = &port_pci_ids[0], + + .probe = pcie_portdrv_probe, + .remove = pcie_portdrv_remove, + .shutdown = pcie_portdrv_remove, + + .err_handler = &pcie_portdrv_err_handler, + + .driver_managed_dma = true, + + .driver.pm = PCIE_PORTDRV_PM_OPS, +}; + +static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) +{ + pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", + d->ident); + pcie_pme_disable_msi(); + return 0; +} + +static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { + /* + * Boxes that should not use MSI for PCIe PME signaling. + */ + { + .callback = dmi_pcie_pme_disable_msi, + .ident = "MSI Wind U-100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), + }, + }, + {} +}; + +static void __init pcie_init_services(void) +{ + pcie_aer_init(); + pcie_pme_init(); + pcie_dpc_init(); + pcie_hp_init(); +} + +static int __init pcie_portdrv_init(void) +{ + if (pcie_ports_disabled) + return -EACCES; + + pcie_init_services(); + dmi_check_system(pcie_portdrv_dmi_table); + + return pci_register_driver(&pcie_portdriver); +} +device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 0ef4bf5f811d..bf380bcea6a5 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -108,16 +108,6 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new); #define get_descriptor_id(type, service) (((type - 4) << 8) | service) extern struct bus_type pcie_port_bus_type; -int pcie_port_device_register(struct pci_dev *dev); -int pcie_port_device_iter(struct device *dev, void *data); -#ifdef CONFIG_PM -int pcie_port_device_suspend(struct device *dev); -int pcie_port_device_resume_noirq(struct device *dev); -int pcie_port_device_resume(struct device *dev); -int pcie_port_device_runtime_suspend(struct device *dev); -int pcie_port_device_runtime_resume(struct device *dev); -#endif -void pcie_port_device_remove(struct pci_dev *dev); struct pci_dev; diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c deleted file mode 100644 index 7f8788a970ae..000000000000 --- a/drivers/pci/pcie/portdrv_pci.c +++ /dev/null @@ -1,252 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Purpose: PCI Express Port Bus Driver - * Author: Tom Nguyen - * - * Copyright (C) 2004 Intel - * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../pci.h" -#include "portdrv.h" - -/* If this switch is set, PCIe port native services should not be enabled. */ -bool pcie_ports_disabled; - -/* - * If the user specified "pcie_ports=native", use the PCIe services regardless - * of whether the platform has given us permission. On ACPI systems, this - * means we ignore _OSC. - */ -bool pcie_ports_native; - -/* - * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe - * service even if the platform hasn't given us permission. - */ -bool pcie_ports_dpc_native; - -static int __init pcie_port_setup(char *str) -{ - if (!strncmp(str, "compat", 6)) - pcie_ports_disabled = true; - else if (!strncmp(str, "native", 6)) - pcie_ports_native = true; - else if (!strncmp(str, "dpc-native", 10)) - pcie_ports_dpc_native = true; - - return 1; -} -__setup("pcie_ports=", pcie_port_setup); - -/* global data */ - -#ifdef CONFIG_PM -static int pcie_port_runtime_suspend(struct device *dev) -{ - if (!to_pci_dev(dev)->bridge_d3) - return -EBUSY; - - return pcie_port_device_runtime_suspend(dev); -} - -static int pcie_port_runtime_idle(struct device *dev) -{ - /* - * Assume the PCI core has set bridge_d3 whenever it thinks the port - * should be good to go to D3. Everything else, including moving - * the port to D3, is handled by the PCI core. - */ - return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; -} - -static const struct dev_pm_ops pcie_portdrv_pm_ops = { - .suspend = pcie_port_device_suspend, - .resume_noirq = pcie_port_device_resume_noirq, - .resume = pcie_port_device_resume, - .freeze = pcie_port_device_suspend, - .thaw = pcie_port_device_resume, - .poweroff = pcie_port_device_suspend, - .restore_noirq = pcie_port_device_resume_noirq, - .restore = pcie_port_device_resume, - .runtime_suspend = pcie_port_runtime_suspend, - .runtime_resume = pcie_port_device_runtime_resume, - .runtime_idle = pcie_port_runtime_idle, -}; - -#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) - -#else /* !PM */ - -#define PCIE_PORTDRV_PM_OPS NULL -#endif /* !PM */ - -/* - * pcie_portdrv_probe - Probe PCI-Express port devices - * @dev: PCI-Express port device being probed - * - * If detected invokes the pcie_port_device_register() method for - * this port device. - * - */ -static int pcie_portdrv_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - int type = pci_pcie_type(dev); - int status; - - if (!pci_is_pcie(dev) || - ((type != PCI_EXP_TYPE_ROOT_PORT) && - (type != PCI_EXP_TYPE_UPSTREAM) && - (type != PCI_EXP_TYPE_DOWNSTREAM) && - (type != PCI_EXP_TYPE_RC_EC))) - return -ENODEV; - - if (type == PCI_EXP_TYPE_RC_EC) - pcie_link_rcec(dev); - - status = pcie_port_device_register(dev); - if (status) - return status; - - pci_save_state(dev); - - dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | - DPM_FLAG_SMART_SUSPEND); - - if (pci_bridge_d3_possible(dev)) { - /* - * Keep the port resumed 100ms to make sure things like - * config space accesses from userspace (lspci) will not - * cause the port to repeatedly suspend and resume. - */ - pm_runtime_set_autosuspend_delay(&dev->dev, 100); - pm_runtime_use_autosuspend(&dev->dev); - pm_runtime_mark_last_busy(&dev->dev); - pm_runtime_put_autosuspend(&dev->dev); - pm_runtime_allow(&dev->dev); - } - - return 0; -} - -static void pcie_portdrv_remove(struct pci_dev *dev) -{ - if (pci_bridge_d3_possible(dev)) { - pm_runtime_forbid(&dev->dev); - pm_runtime_get_noresume(&dev->dev); - pm_runtime_dont_use_autosuspend(&dev->dev); - } - - pcie_port_device_remove(dev); -} - -static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, - pci_channel_state_t error) -{ - if (error == pci_channel_io_frozen) - return PCI_ERS_RESULT_NEED_RESET; - return PCI_ERS_RESULT_CAN_RECOVER; -} - -static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) -{ - size_t off = offsetof(struct pcie_port_service_driver, slot_reset); - device_for_each_child(&dev->dev, &off, pcie_port_device_iter); - - pci_restore_state(dev); - pci_save_state(dev); - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) -{ - return PCI_ERS_RESULT_RECOVERED; -} - -/* - * LINUX Device Driver Model - */ -static const struct pci_device_id port_pci_ids[] = { - /* handle any PCI-Express port */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, - /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, - /* handle any Root Complex Event Collector */ - { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, - { }, -}; - -static const struct pci_error_handlers pcie_portdrv_err_handler = { - .error_detected = pcie_portdrv_error_detected, - .slot_reset = pcie_portdrv_slot_reset, - .mmio_enabled = pcie_portdrv_mmio_enabled, -}; - -static struct pci_driver pcie_portdriver = { - .name = "pcieport", - .id_table = &port_pci_ids[0], - - .probe = pcie_portdrv_probe, - .remove = pcie_portdrv_remove, - .shutdown = pcie_portdrv_remove, - - .err_handler = &pcie_portdrv_err_handler, - - .driver_managed_dma = true, - - .driver.pm = PCIE_PORTDRV_PM_OPS, -}; - -static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) -{ - pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", - d->ident); - pcie_pme_disable_msi(); - return 0; -} - -static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { - /* - * Boxes that should not use MSI for PCIe PME signaling. - */ - { - .callback = dmi_pcie_pme_disable_msi, - .ident = "MSI Wind U-100", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, - "MICRO-STAR INTERNATIONAL CO., LTD"), - DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), - }, - }, - {} -}; - -static void __init pcie_init_services(void) -{ - pcie_aer_init(); - pcie_pme_init(); - pcie_dpc_init(); - pcie_hp_init(); -} - -static int __init pcie_portdrv_init(void) -{ - if (pcie_ports_disabled) - return -EACCES; - - pcie_init_services(); - dmi_check_system(pcie_portdrv_dmi_table); - - return pci_register_driver(&pcie_portdriver); -} -device_initcall(pcie_portdrv_init); From 29f193feeea3e3af1c4650870f08ca896b83e1db Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 19 Oct 2022 15:41:26 -0500 Subject: [PATCH 0390/4122] PCI/portdrv: Move private things to portdrv.c Previously several things used by portdrv_core.c and portdrv_pci.c were shared by defining them in portdrv.h. Now that portdrv_core.c and portdrv_pci.c have been squashed, move things that can be private into portdrv.c. No functional change intended. Link: https://lore.kernel.org/r/20221019204127.44463-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/pci/pcie/portdrv.c | 9 +++++++++ drivers/pci/pcie/portdrv.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index 0b4a1f9c2a6b..ae8da5b2e922 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -21,6 +21,15 @@ #include "../pci.h" #include "portdrv.h" +/* + * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must + * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI + * supports a maximum of 32 vectors per function. + */ +#define PCIE_PORT_MAX_MSI_ENTRIES 32 + +#define get_descriptor_id(type, service) (((type - 4) << 8) | service) + struct portdrv_service_data { struct pcie_port_service_driver *drv; struct device *dev; diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index bf380bcea6a5..58a2b1a1cae4 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -98,15 +98,6 @@ struct pcie_port_service_driver { int pcie_port_service_register(struct pcie_port_service_driver *new); void pcie_port_service_unregister(struct pcie_port_service_driver *new); -/* - * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must - * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI - * supports a maximum of 32 vectors per function. - */ -#define PCIE_PORT_MAX_MSI_ENTRIES 32 - -#define get_descriptor_id(type, service) (((type - 4) << 8) | service) - extern struct bus_type pcie_port_bus_type; struct pci_dev; From 461a65d7d1a4f56b97c9115eda3e8619516f40fb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 19 Oct 2022 15:41:27 -0500 Subject: [PATCH 0391/4122] PCI/portdrv: Unexport pcie_port_service_register(), pcie_port_service_unregister() pcie_port_service_register() and pcie_port_service_unregister() are used only by the pciehp, aer, dpc, and pme PCIe port service drivers, none of which can be modules. Unexport pcie_port_service_register() and pcie_port_service_unregister(). No functional change intended. Link: https://lore.kernel.org/r/20221019204127.44463-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/pci/pcie/portdrv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index ae8da5b2e922..a6c4225505d5 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -584,7 +584,6 @@ int pcie_port_service_register(struct pcie_port_service_driver *new) return driver_register(&new->driver); } -EXPORT_SYMBOL(pcie_port_service_register); /** * pcie_port_service_unregister - unregister PCI Express port service driver @@ -594,7 +593,6 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv) { driver_unregister(&drv->driver); } -EXPORT_SYMBOL(pcie_port_service_unregister); /* If this switch is set, PCIe port native services should not be enabled. */ bool pcie_ports_disabled; From 2f7a29debae2efef94b981377fa3622986cd57f5 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Sep 2022 10:28:39 +0800 Subject: [PATCH 0392/4122] apparmor: remove useless static inline functions Remove the following useless static inline functions: 1. label_is_visible() is a static function in security/apparmor/label.c, and it's not used, aa_ns_visible() can do the same things as it, so it's redundant. 2. is_deleted() is a static function in security/apparmor/file.c, and it's not used since commit aebd873e8d3e ("apparmor: refactor path name lookup and permission checks around labels"), so it's redundant. They are redundant, so remove them. Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen --- security/apparmor/file.c | 13 ------------- security/apparmor/label.c | 6 ------ 2 files changed, 19 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index e7dc5ea38997..deb73480f0c6 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -141,19 +141,6 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, return aa_audit(type, profile, &sa, file_audit_cb); } -/** - * is_deleted - test if a file has been completely unlinked - * @dentry: dentry of file to test for deletion (NOT NULL) - * - * Returns: true if deleted else false - */ -static inline bool is_deleted(struct dentry *dentry) -{ - if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0) - return true; - return false; -} - static int path_name(const char *op, struct aa_label *label, const struct path *path, int flags, char *buffer, const char **name, struct path_cond *cond, u32 request) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index aa4031628af5..8a2af96f4da5 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1256,12 +1256,6 @@ out: return label; } -static inline bool label_is_visible(struct aa_profile *profile, - struct aa_label *label) -{ - return aa_ns_visible(profile->ns, labels_ns(label), true); -} - /* match a profile and its associated ns component if needed * Assumes visibility test has already been done. * If a subns profile is not to be matched should be prescreened with From 1f2bc06a8dbff73957f433b22c6fd35fccfb47a4 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Sep 2022 19:48:38 +0800 Subject: [PATCH 0393/4122] apparmor: fix obsoleted comments for aa_getprocattr() and audit_resource() Update the comments for aa_getprocattr() and audit_resource(), the args of them have beed changed since commit 76a1d263aba3 ("apparmor: switch getprocattr to using label_print fns()"). Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen --- security/apparmor/procattr.c | 11 +++++------ security/apparmor/resource.c | 2 ++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index 86ad26ef72ed..197d41f9c32b 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -17,14 +17,13 @@ /** - * aa_getprocattr - Return the profile information for @profile - * @profile: the profile to print profile info about (NOT NULL) - * @string: Returns - string containing the profile info (NOT NULL) + * aa_getprocattr - Return the label information for @label + * @label: the label to print label info about (NOT NULL) + * @string: Returns - string containing the label info (NOT NULL) * - * Requires: profile != NULL + * Requires: label != NULL && string != NULL * - * Creates a string containing the namespace_name://profile_name for - * @profile. + * Creates a string containing the label information for @label. * * Returns: size of string placed in @string else error code on failure */ diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index ed543f4edfd9..1b75d8343a8d 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -45,6 +45,8 @@ static void audit_cb(struct audit_buffer *ab, void *va) * @profile: profile being enforced (NOT NULL) * @resource: rlimit being auditing * @value: value being set + * @peer: aa_albel of the task being set + * @info: info being auditing * @error: error value * * Returns: 0 or sa->error else other error code on failure From 58f89ce58bb4f5cf5963b20a19aaa2431b0412d8 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 3 Oct 2022 02:48:24 -0700 Subject: [PATCH 0394/4122] apparmor: refactor code that alloc null profiles Bother unconfined and learning profiles use the null profile as their base. Refactor so they are share a common base routine. This doesn't save much atm but will be important when the feature set of the parent is inherited. Signed-off-by: John Johansen --- security/apparmor/domain.c | 12 ++++---- security/apparmor/include/policy.h | 6 ++-- security/apparmor/policy.c | 47 ++++++++++++++++++++---------- security/apparmor/policy_ns.c | 6 +--- 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index d4b09f061aee..b447bc13ea8e 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -681,8 +681,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile, /* no exec permission - learning mode */ struct aa_profile *new_profile = NULL; - new_profile = aa_new_null_profile(profile, false, name, - GFP_KERNEL); + new_profile = aa_new_learning_profile(profile, false, name, + GFP_KERNEL); if (!new_profile) { error = -ENOMEM; info = "could not create null profile"; @@ -1009,8 +1009,8 @@ static struct aa_label *build_change_hat(struct aa_profile *profile, if (!hat) { error = -ENOENT; if (COMPLAIN_MODE(profile)) { - hat = aa_new_null_profile(profile, true, name, - GFP_KERNEL); + hat = aa_new_learning_profile(profile, true, name, + GFP_KERNEL); if (!hat) { info = "failed null profile create"; error = -ENOMEM; @@ -1361,8 +1361,8 @@ int aa_change_profile(const char *fqname, int flags) !COMPLAIN_MODE(labels_profile(label))) goto audit; /* released below */ - tprofile = aa_new_null_profile(labels_profile(label), false, - fqname, GFP_KERNEL); + tprofile = aa_new_learning_profile(labels_profile(label), false, + fqname, GFP_KERNEL); if (!tprofile) { info = "failed null profile create"; error = -ENOMEM; diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 5cadfb20df29..545f791cabda 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -234,8 +234,10 @@ void aa_free_proxy_kref(struct kref *kref); struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp); struct aa_profile *aa_alloc_profile(const char *name, struct aa_proxy *proxy, gfp_t gfp); -struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, - const char *base, gfp_t gfp); +struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, + gfp_t gfp); +struct aa_profile *aa_new_learning_profile(struct aa_profile *parent, bool hat, + const char *base, gfp_t gfp); void aa_free_profile(struct aa_profile *profile); void aa_free_profile_kref(struct kref *kref); struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name); diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 6f4cc8bfe03d..c17ccedd35f1 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -524,8 +524,36 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base, return profile; } + +struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, + gfp_t gfp) +{ + struct aa_profile *profile; + struct aa_ruleset *rules; + + profile = aa_alloc_profile(name, NULL, gfp); + if (!profile) + return NULL; + + /* TODO: ideally we should inherit abi from parent */ + profile->label.flags |= FLAG_NULL; + rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules->file.dfa = aa_get_dfa(nulldfa); + rules->policy.dfa = aa_get_dfa(nulldfa); + + if (parent) { + profile->path_flags = parent->path_flags; + + /* released on free_profile */ + rcu_assign_pointer(profile->parent, aa_get_profile(parent)); + profile->ns = aa_get_ns(parent->ns); + } + + return profile; +} + /** - * aa_new_null_profile - create or find a null-X learning profile + * aa_new_learning_profile - create or find a null-X learning profile * @parent: profile that caused this profile to be created (NOT NULL) * @hat: true if the null- learning profile is a hat * @base: name to base the null profile off of @@ -542,10 +570,9 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base, * * Returns: new refcounted profile else NULL on failure */ -struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, - const char *base, gfp_t gfp) +struct aa_profile *aa_new_learning_profile(struct aa_profile *parent, bool hat, + const char *base, gfp_t gfp) { - struct aa_ruleset *rules; struct aa_profile *p, *profile; const char *bname; char *name = NULL; @@ -575,22 +602,12 @@ name: if (profile) goto out; - profile = aa_alloc_profile(name, NULL, gfp); + profile = aa_alloc_null(parent, name, gfp); if (!profile) goto fail; - profile->mode = APPARMOR_COMPLAIN; - profile->label.flags |= FLAG_NULL; if (hat) profile->label.flags |= FLAG_HAT; - profile->path_flags = parent->path_flags; - - /* released on free_profile */ - rcu_assign_pointer(profile->parent, aa_get_profile(parent)); - profile->ns = aa_get_ns(parent->ns); - rules = list_first_entry(&profile->rules, typeof(*rules), list); - rules->file.dfa = aa_get_dfa(nulldfa); - rules->policy.dfa = aa_get_dfa(nulldfa); mutex_lock_nested(&profile->ns->lock, profile->ns->level); p = __find_child(&parent->base.profiles, bname); diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 121aa79bccaa..5c38563a6dcf 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -83,18 +83,14 @@ const char *aa_ns_name(struct aa_ns *curr, struct aa_ns *view, bool subns) static struct aa_profile *alloc_unconfined(const char *name) { struct aa_profile *profile; - struct aa_ruleset *rules; - profile = aa_alloc_profile(name, NULL, GFP_KERNEL); + profile = aa_alloc_null(NULL, name, GFP_KERNEL); if (!profile) return NULL; profile->label.flags |= FLAG_IX_ON_NAME_ERROR | FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED; profile->mode = APPARMOR_UNCONFINED; - rules = list_first_entry(&profile->rules, typeof(*rules), list); - rules->file.dfa = aa_get_dfa(nulldfa); - rules->policy.dfa = aa_get_dfa(nulldfa); return profile; } From 5ebc548f4f54fe971d741d80cd108f1a45c9e88d Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 13 Oct 2022 10:47:23 +0900 Subject: [PATCH 0395/4122] RDMA/rxe: Make responder handle RDMA Read failures Currently, responder can reply packets with invalid payloads if it fails to copy messages to the packets. Add an error handling in read_reply() to inform a requesting node of the failure. Link: https://lore.kernel.org/r/20221013014724.3786212-1-matsuda-daisuke@fujitsu.com Suggested-by: Li Zhijian Signed-off-by: Daisuke Matsuda Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ed5a09e86417..82b74e926e09 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -809,10 +809,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (!skb) return RESPST_ERR_RNR; - rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ); + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); if (mr) rxe_put(mr); + if (err) { + kfree_skb(skb); + return RESPST_ERR_RKEY_VIOLATION; + } if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; From 5ac814e02ece516761d2e244cef93843df911ae0 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 13 Oct 2022 10:47:24 +0900 Subject: [PATCH 0396/4122] RDMA/rxe: Handle remote errors in the midst of a Read reply sequence Requesting nodes do not handle a reported error correctly if it is generated in the middle of multi-packet Read responses, and the node tries to resend the request endlessly. Let completer terminate the connection in that case. Link: https://lore.kernel.org/r/20221013014724.3786212-2-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_comp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index fb0c008af78c..c9170dd99f3a 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -200,6 +200,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -228,6 +232,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from From 686d348476ee8006087cfcbef591e28f4f91bd8b Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Mon, 24 Oct 2022 03:31:54 +0000 Subject: [PATCH 0397/4122] RDMA/rxe: Remove unnecessary mr testing Before the testing, we already passed it to rxe_mr_copy() where mr could be dereferenced. so this checking is not needed. The only way that mr is NULL is when it reaches below line 780 with 'qp->resp.mr = NULL', which is not possible in Bob's explanation[1]. 778 if (res->state == rdatm_res_state_new) { 779 if (!res->replay) { 780 mr = qp->resp.mr; 781 qp->resp.mr = NULL; 782 } else { [1] https://lore.kernel.org/lkml/30ff25c4-ce66-eac4-eaa2-64c0db203a19@gmail.com/ Link: https://lore.kernel.org/r/1666582315-2-1-git-send-email-lizhijian@fujitsu.com CC: Bob Pearson Signed-off-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 82b74e926e09..95d372db934d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -811,8 +811,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); - if (mr) - rxe_put(mr); + rxe_put(mr); if (err) { kfree_skb(skb); return RESPST_ERR_RKEY_VIOLATION; From 7872d4236f74ad6df4d2c9189b66f95157e24a9b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 26 Sep 2022 22:17:29 +0800 Subject: [PATCH 0398/4122] fpga: zynq: Switch to use dev_err_probe() helper In the probe path, dev_err() can be replaced with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. It also sets the defer probe reason which can be checked later through debugfs. It's more simple in error path. Signed-off-by: Yang Yingliang Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20220926141729.2292311-1-yangyingliang@huawei.com Signed-off-by: Xu Yilun --- drivers/fpga/zynq-fpga.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index 426aa34c6a0d..ae0da361e6c6 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -582,11 +582,9 @@ static int zynq_fpga_probe(struct platform_device *pdev) return priv->irq; priv->clk = devm_clk_get(dev, "ref_clk"); - if (IS_ERR(priv->clk)) { - if (PTR_ERR(priv->clk) != -EPROBE_DEFER) - dev_err(dev, "input clock not found\n"); - return PTR_ERR(priv->clk); - } + if (IS_ERR(priv->clk)) + return dev_err_probe(dev, PTR_ERR(priv->clk), + "input clock not found\n"); err = clk_prepare_enable(priv->clk); if (err) { From 463dd43bc976d1dfaba5cd46b2a10addf8d3b4de Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Tue, 25 Oct 2022 08:39:46 +0300 Subject: [PATCH 0399/4122] fpga: lattice-sysconfig-spi: add Lattice sysCONFIG FPGA manager Add support to the FPGA manager for programming Lattice ECP5 FPGA over slave SPI sysCONFIG interface. sysCONFIG interface core functionality is separate from both ECP5 and SPI specifics, so support for other FPGAs with different port types can be added in the future. Signed-off-by: Ivan Bornyakov Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20221025053947.2737-2-i.bornyakov@metrotek.ru [yilun.xu@intel.com: remove redundant blank line after kmemdup] Signed-off-by: Xu Yilun --- drivers/fpga/Kconfig | 11 + drivers/fpga/Makefile | 2 + drivers/fpga/lattice-sysconfig-spi.c | 152 ++++++++++ drivers/fpga/lattice-sysconfig.c | 397 +++++++++++++++++++++++++++ drivers/fpga/lattice-sysconfig.h | 39 +++ 5 files changed, 601 insertions(+) create mode 100644 drivers/fpga/lattice-sysconfig-spi.c create mode 100644 drivers/fpga/lattice-sysconfig.c create mode 100644 drivers/fpga/lattice-sysconfig.h diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 6c416955da53..d1a8107fdcb3 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -263,4 +263,15 @@ config FPGA_MGR_MICROCHIP_SPI programming over slave SPI interface with .dat formatted bitstream image. +config FPGA_MGR_LATTICE_SYSCONFIG + tristate + +config FPGA_MGR_LATTICE_SYSCONFIG_SPI + tristate "Lattice sysCONFIG SPI FPGA manager" + depends on SPI + select FPGA_MGR_LATTICE_SYSCONFIG + help + FPGA manager driver support for Lattice FPGAs programming over slave + SPI sysCONFIG interface. + endif # FPGA diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile index 42ae8b58abce..72e554b4d2f7 100644 --- a/drivers/fpga/Makefile +++ b/drivers/fpga/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA) += zynq-fpga.o obj-$(CONFIG_FPGA_MGR_ZYNQMP_FPGA) += zynqmp-fpga.o obj-$(CONFIG_FPGA_MGR_VERSAL_FPGA) += versal-fpga.o obj-$(CONFIG_FPGA_MGR_MICROCHIP_SPI) += microchip-spi.o +obj-$(CONFIG_FPGA_MGR_LATTICE_SYSCONFIG) += lattice-sysconfig.o +obj-$(CONFIG_FPGA_MGR_LATTICE_SYSCONFIG_SPI) += lattice-sysconfig-spi.o obj-$(CONFIG_ALTERA_PR_IP_CORE) += altera-pr-ip-core.o obj-$(CONFIG_ALTERA_PR_IP_CORE_PLAT) += altera-pr-ip-core-plat.o diff --git a/drivers/fpga/lattice-sysconfig-spi.c b/drivers/fpga/lattice-sysconfig-spi.c new file mode 100644 index 000000000000..2702b26b7f55 --- /dev/null +++ b/drivers/fpga/lattice-sysconfig-spi.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Lattice FPGA programming over slave SPI sysCONFIG interface. + */ + +#include + +#include "lattice-sysconfig.h" + +static const u32 ecp5_spi_max_speed_hz = 60000000; + +static int sysconfig_spi_cmd_transfer(struct sysconfig_priv *priv, + const void *tx_buf, size_t tx_len, + void *rx_buf, size_t rx_len) +{ + struct spi_device *spi = to_spi_device(priv->dev); + + return spi_write_then_read(spi, tx_buf, tx_len, rx_buf, rx_len); +} + +static int sysconfig_spi_bitstream_burst_init(struct sysconfig_priv *priv) +{ + const u8 lsc_bitstream_burst[] = SYSCONFIG_LSC_BITSTREAM_BURST; + struct spi_device *spi = to_spi_device(priv->dev); + struct spi_transfer xfer = {}; + struct spi_message msg; + size_t buf_len; + void *buf; + int ret; + + buf_len = sizeof(lsc_bitstream_burst); + + buf = kmemdup(lsc_bitstream_burst, buf_len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + xfer.len = buf_len; + xfer.tx_buf = buf; + xfer.cs_change = 1; + + spi_message_init_with_transfers(&msg, &xfer, 1); + + /* + * Lock SPI bus for exclusive usage until FPGA programming is done. + * SPI bus will be released in sysconfig_spi_bitstream_burst_complete(). + */ + spi_bus_lock(spi->controller); + + ret = spi_sync_locked(spi, &msg); + if (ret) + spi_bus_unlock(spi->controller); + + kfree(buf); + + return ret; +} + +static int sysconfig_spi_bitstream_burst_write(struct sysconfig_priv *priv, + const char *buf, size_t len) +{ + struct spi_device *spi = to_spi_device(priv->dev); + struct spi_transfer xfer = { + .tx_buf = buf, + .len = len, + .cs_change = 1, + }; + struct spi_message msg; + + spi_message_init_with_transfers(&msg, &xfer, 1); + + return spi_sync_locked(spi, &msg); +} + +static int sysconfig_spi_bitstream_burst_complete(struct sysconfig_priv *priv) +{ + struct spi_device *spi = to_spi_device(priv->dev); + + /* Bitstream burst write is done, release SPI bus */ + spi_bus_unlock(spi->controller); + + /* Toggle CS to finish bitstream write */ + return spi_write(spi, NULL, 0); +} + +static int sysconfig_spi_probe(struct spi_device *spi) +{ + const struct spi_device_id *dev_id; + struct device *dev = &spi->dev; + struct sysconfig_priv *priv; + const u32 *spi_max_speed; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + spi_max_speed = device_get_match_data(dev); + if (!spi_max_speed) { + dev_id = spi_get_device_id(spi); + if (!dev_id) + return -ENODEV; + + spi_max_speed = (const u32 *)dev_id->driver_data; + } + + if (!spi_max_speed) + return -EINVAL; + + if (spi->max_speed_hz > *spi_max_speed) { + dev_err(dev, "SPI speed %u is too high, maximum speed is %u\n", + spi->max_speed_hz, *spi_max_speed); + return -EINVAL; + } + + priv->dev = dev; + priv->command_transfer = sysconfig_spi_cmd_transfer; + priv->bitstream_burst_write_init = sysconfig_spi_bitstream_burst_init; + priv->bitstream_burst_write = sysconfig_spi_bitstream_burst_write; + priv->bitstream_burst_write_complete = sysconfig_spi_bitstream_burst_complete; + + return sysconfig_probe(priv); +} + +static const struct spi_device_id sysconfig_spi_ids[] = { + { + .name = "sysconfig-ecp5", + .driver_data = (kernel_ulong_t)&ecp5_spi_max_speed_hz, + }, {}, +}; +MODULE_DEVICE_TABLE(spi, sysconfig_spi_ids); + +#if IS_ENABLED(CONFIG_OF) +static const struct of_device_id sysconfig_of_ids[] = { + { + .compatible = "lattice,sysconfig-ecp5", + .data = &ecp5_spi_max_speed_hz, + }, {}, +}; +MODULE_DEVICE_TABLE(of, sysconfig_of_ids); +#endif /* IS_ENABLED(CONFIG_OF) */ + +static struct spi_driver lattice_sysconfig_driver = { + .probe = sysconfig_spi_probe, + .id_table = sysconfig_spi_ids, + .driver = { + .name = "lattice_sysconfig_spi_fpga_mgr", + .of_match_table = of_match_ptr(sysconfig_of_ids), + }, +}; +module_spi_driver(lattice_sysconfig_driver); + +MODULE_DESCRIPTION("Lattice sysCONFIG Slave SPI FPGA Manager"); +MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/lattice-sysconfig.c b/drivers/fpga/lattice-sysconfig.c new file mode 100644 index 000000000000..ba51a60f672f --- /dev/null +++ b/drivers/fpga/lattice-sysconfig.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Lattice FPGA sysCONFIG interface functions independent of port type. + */ + +#include +#include +#include +#include + +#include "lattice-sysconfig.h" + +static int sysconfig_cmd_write(struct sysconfig_priv *priv, const void *buf, + size_t buf_len) +{ + return priv->command_transfer(priv, buf, buf_len, NULL, 0); +} + +static int sysconfig_cmd_read(struct sysconfig_priv *priv, const void *tx_buf, + size_t tx_len, void *rx_buf, size_t rx_len) +{ + return priv->command_transfer(priv, tx_buf, tx_len, rx_buf, rx_len); +} + +static int sysconfig_read_busy(struct sysconfig_priv *priv) +{ + const u8 lsc_check_busy[] = SYSCONFIG_LSC_CHECK_BUSY; + u8 busy; + int ret; + + ret = sysconfig_cmd_read(priv, lsc_check_busy, sizeof(lsc_check_busy), + &busy, sizeof(busy)); + + return ret ? : busy; +} + +static int sysconfig_poll_busy(struct sysconfig_priv *priv) +{ + int ret, busy; + + ret = read_poll_timeout(sysconfig_read_busy, busy, busy <= 0, + SYSCONFIG_POLL_INTERVAL_US, + SYSCONFIG_POLL_BUSY_TIMEOUT_US, false, priv); + + return ret ? : busy; +} + +static int sysconfig_read_status(struct sysconfig_priv *priv, u32 *status) +{ + const u8 lsc_read_status[] = SYSCONFIG_LSC_READ_STATUS; + __be32 device_status; + int ret; + + ret = sysconfig_cmd_read(priv, lsc_read_status, sizeof(lsc_read_status), + &device_status, sizeof(device_status)); + if (ret) + return ret; + + *status = be32_to_cpu(device_status); + + return 0; +} + +static int sysconfig_poll_status(struct sysconfig_priv *priv, u32 *status) +{ + int ret = sysconfig_poll_busy(priv); + + if (ret) + return ret; + + return sysconfig_read_status(priv, status); +} + +static int sysconfig_poll_gpio(struct gpio_desc *gpio, bool is_active) +{ + int ret, val; + + ret = read_poll_timeout(gpiod_get_value, val, + val < 0 || !!val == is_active, + SYSCONFIG_POLL_INTERVAL_US, + SYSCONFIG_POLL_GPIO_TIMEOUT_US, false, gpio); + + if (val < 0) + return val; + + return ret; +} + +static int sysconfig_gpio_refresh(struct sysconfig_priv *priv) +{ + struct gpio_desc *program = priv->program; + struct gpio_desc *init = priv->init; + struct gpio_desc *done = priv->done; + int ret; + + /* Enter init mode */ + gpiod_set_value(program, 1); + + ret = sysconfig_poll_gpio(init, true); + if (!ret) + ret = sysconfig_poll_gpio(done, false); + + if (ret) + return ret; + + /* Enter program mode */ + gpiod_set_value(program, 0); + + return sysconfig_poll_gpio(init, false); +} + +static int sysconfig_lsc_refresh(struct sysconfig_priv *priv) +{ + static const u8 lsc_refresh[] = SYSCONFIG_LSC_REFRESH; + int ret; + + ret = sysconfig_cmd_write(priv, lsc_refresh, sizeof(lsc_refresh)); + if (ret) + return ret; + + usleep_range(4000, 8000); + + return 0; +} + +static int sysconfig_refresh(struct sysconfig_priv *priv) +{ + struct gpio_desc *program = priv->program; + struct gpio_desc *init = priv->init; + struct gpio_desc *done = priv->done; + + if (program && init && done) + return sysconfig_gpio_refresh(priv); + + return sysconfig_lsc_refresh(priv); +} + +static int sysconfig_isc_enable(struct sysconfig_priv *priv) +{ + u8 isc_enable[] = SYSCONFIG_ISC_ENABLE; + u32 status; + int ret; + + ret = sysconfig_cmd_write(priv, isc_enable, sizeof(isc_enable)); + if (ret) + return ret; + + ret = sysconfig_poll_status(priv, &status); + if (ret) + return ret; + + if (status & SYSCONFIG_STATUS_FAIL) + return -EFAULT; + + return 0; +} + +static int sysconfig_isc_erase(struct sysconfig_priv *priv) +{ + u8 isc_erase[] = SYSCONFIG_ISC_ERASE; + u32 status; + int ret; + + ret = sysconfig_cmd_write(priv, isc_erase, sizeof(isc_erase)); + if (ret) + return ret; + + ret = sysconfig_poll_status(priv, &status); + if (ret) + return ret; + + if (status & SYSCONFIG_STATUS_FAIL) + return -EFAULT; + + return 0; +} + +static int sysconfig_isc_init(struct sysconfig_priv *priv) +{ + int ret = sysconfig_isc_enable(priv); + + if (ret) + return ret; + + return sysconfig_isc_erase(priv); +} + +static int sysconfig_lsc_init_addr(struct sysconfig_priv *priv) +{ + const u8 lsc_init_addr[] = SYSCONFIG_LSC_INIT_ADDR; + + return sysconfig_cmd_write(priv, lsc_init_addr, sizeof(lsc_init_addr)); +} + +static int sysconfig_burst_write_init(struct sysconfig_priv *priv) +{ + return priv->bitstream_burst_write_init(priv); +} + +static int sysconfig_burst_write_complete(struct sysconfig_priv *priv) +{ + return priv->bitstream_burst_write_complete(priv); +} + +static int sysconfig_bitstream_burst_write(struct sysconfig_priv *priv, + const char *buf, size_t count) +{ + int ret = priv->bitstream_burst_write(priv, buf, count); + + if (ret) + sysconfig_burst_write_complete(priv); + + return ret; +} + +static int sysconfig_isc_disable(struct sysconfig_priv *priv) +{ + const u8 isc_disable[] = SYSCONFIG_ISC_DISABLE; + + return sysconfig_cmd_write(priv, isc_disable, sizeof(isc_disable)); +} + +static void sysconfig_cleanup(struct sysconfig_priv *priv) +{ + sysconfig_isc_erase(priv); + sysconfig_refresh(priv); +} + +static int sysconfig_isc_finish(struct sysconfig_priv *priv) +{ + struct gpio_desc *done_gpio = priv->done; + u32 status; + int ret; + + if (done_gpio) { + ret = sysconfig_isc_disable(priv); + if (ret) + return ret; + + return sysconfig_poll_gpio(done_gpio, true); + } + + ret = sysconfig_poll_status(priv, &status); + if (ret) + return ret; + + if ((status & SYSCONFIG_STATUS_DONE) && + !(status & SYSCONFIG_STATUS_BUSY) && + !(status & SYSCONFIG_STATUS_ERR)) + return sysconfig_isc_disable(priv); + + return -EFAULT; +} + +static enum fpga_mgr_states sysconfig_ops_state(struct fpga_manager *mgr) +{ + struct sysconfig_priv *priv = mgr->priv; + struct gpio_desc *done = priv->done; + u32 status; + int ret; + + if (done && (gpiod_get_value(done) > 0)) + return FPGA_MGR_STATE_OPERATING; + + ret = sysconfig_read_status(priv, &status); + if (!ret && (status & SYSCONFIG_STATUS_DONE)) + return FPGA_MGR_STATE_OPERATING; + + return FPGA_MGR_STATE_UNKNOWN; +} + +static int sysconfig_ops_write_init(struct fpga_manager *mgr, + struct fpga_image_info *info, + const char *buf, size_t count) +{ + struct sysconfig_priv *priv = mgr->priv; + struct device *dev = &mgr->dev; + int ret; + + if (info->flags & FPGA_MGR_PARTIAL_RECONFIG) { + dev_err(dev, "Partial reconfiguration is not supported\n"); + return -EOPNOTSUPP; + } + + /* Enter program mode */ + ret = sysconfig_refresh(priv); + if (ret) { + dev_err(dev, "Failed to go to program mode\n"); + return ret; + } + + /* Enter ISC mode */ + ret = sysconfig_isc_init(priv); + if (ret) { + dev_err(dev, "Failed to go to ISC mode\n"); + return ret; + } + + /* Initialize the Address Shift Register */ + ret = sysconfig_lsc_init_addr(priv); + if (ret) { + dev_err(dev, + "Failed to initialize the Address Shift Register\n"); + return ret; + } + + /* Prepare for bitstream burst write */ + ret = sysconfig_burst_write_init(priv); + if (ret) + dev_err(dev, "Failed to prepare for bitstream burst write\n"); + + return ret; +} + +static int sysconfig_ops_write(struct fpga_manager *mgr, const char *buf, + size_t count) +{ + return sysconfig_bitstream_burst_write(mgr->priv, buf, count); +} + +static int sysconfig_ops_write_complete(struct fpga_manager *mgr, + struct fpga_image_info *info) +{ + struct sysconfig_priv *priv = mgr->priv; + struct device *dev = &mgr->dev; + int ret; + + ret = sysconfig_burst_write_complete(priv); + if (!ret) + ret = sysconfig_poll_busy(priv); + + if (ret) { + dev_err(dev, "Error while waiting bitstream write to finish\n"); + goto fail; + } + + ret = sysconfig_isc_finish(priv); + +fail: + if (ret) + sysconfig_cleanup(priv); + + return ret; +} + +static const struct fpga_manager_ops sysconfig_fpga_mgr_ops = { + .state = sysconfig_ops_state, + .write_init = sysconfig_ops_write_init, + .write = sysconfig_ops_write, + .write_complete = sysconfig_ops_write_complete, +}; + +int sysconfig_probe(struct sysconfig_priv *priv) +{ + struct gpio_desc *program, *init, *done; + struct device *dev = priv->dev; + struct fpga_manager *mgr; + + if (!dev) + return -ENODEV; + + if (!priv->command_transfer || + !priv->bitstream_burst_write_init || + !priv->bitstream_burst_write || + !priv->bitstream_burst_write_complete) { + dev_err(dev, "Essential callback is missing\n"); + return -EINVAL; + } + + program = devm_gpiod_get_optional(dev, "program", GPIOD_OUT_LOW); + if (IS_ERR(program)) + return dev_err_probe(dev, PTR_ERR(program), + "Failed to get PROGRAM GPIO\n"); + + init = devm_gpiod_get_optional(dev, "init", GPIOD_IN); + if (IS_ERR(init)) + return dev_err_probe(dev, PTR_ERR(init), + "Failed to get INIT GPIO\n"); + + done = devm_gpiod_get_optional(dev, "done", GPIOD_IN); + if (IS_ERR(done)) + return dev_err_probe(dev, PTR_ERR(done), + "Failed to get DONE GPIO\n"); + + priv->program = program; + priv->init = init; + priv->done = done; + + mgr = devm_fpga_mgr_register(dev, "Lattice sysCONFIG FPGA Manager", + &sysconfig_fpga_mgr_ops, priv); + + return PTR_ERR_OR_ZERO(mgr); +} +EXPORT_SYMBOL(sysconfig_probe); + +MODULE_DESCRIPTION("Lattice sysCONFIG FPGA Manager Core"); +MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/lattice-sysconfig.h b/drivers/fpga/lattice-sysconfig.h new file mode 100644 index 000000000000..df47d9a524f6 --- /dev/null +++ b/drivers/fpga/lattice-sysconfig.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LATTICE_SYSCONFIG_H +#define __LATTICE_SYSCONFIG_H + +#define SYSCONFIG_ISC_ENABLE {0xC6, 0x00, 0x00, 0x00} +#define SYSCONFIG_ISC_DISABLE {0x26, 0x00, 0x00, 0x00} +#define SYSCONFIG_ISC_ERASE {0x0E, 0x01, 0x00, 0x00} +#define SYSCONFIG_LSC_READ_STATUS {0x3C, 0x00, 0x00, 0x00} +#define SYSCONFIG_LSC_CHECK_BUSY {0xF0, 0x00, 0x00, 0x00} +#define SYSCONFIG_LSC_REFRESH {0x79, 0x00, 0x00, 0x00} +#define SYSCONFIG_LSC_INIT_ADDR {0x46, 0x00, 0x00, 0x00} +#define SYSCONFIG_LSC_BITSTREAM_BURST {0x7a, 0x00, 0x00, 0x00} + +#define SYSCONFIG_STATUS_DONE BIT(8) +#define SYSCONFIG_STATUS_BUSY BIT(12) +#define SYSCONFIG_STATUS_FAIL BIT(13) +#define SYSCONFIG_STATUS_ERR GENMASK(25, 23) + +#define SYSCONFIG_POLL_INTERVAL_US 30 +#define SYSCONFIG_POLL_BUSY_TIMEOUT_US 1000000 +#define SYSCONFIG_POLL_GPIO_TIMEOUT_US 100000 + +struct sysconfig_priv { + struct gpio_desc *program; + struct gpio_desc *init; + struct gpio_desc *done; + struct device *dev; + int (*command_transfer)(struct sysconfig_priv *priv, const void *tx_buf, + size_t tx_len, void *rx_buf, size_t rx_len); + int (*bitstream_burst_write_init)(struct sysconfig_priv *priv); + int (*bitstream_burst_write)(struct sysconfig_priv *priv, + const char *tx_buf, size_t tx_len); + int (*bitstream_burst_write_complete)(struct sysconfig_priv *priv); +}; + +int sysconfig_probe(struct sysconfig_priv *priv); + +#endif /* __LATTICE_SYSCONFIG_H */ From ee31d5038c06b56ea515f4fe490274628c0f80e1 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Tue, 25 Oct 2022 08:39:47 +0300 Subject: [PATCH 0400/4122] dt-bindings: fpga: document Lattice sysCONFIG FPGA manager Add Device Tree Binding doc for configuring Lattice ECP5 FPGA over Slave SPI sysCONFIG interface. Signed-off-by: Ivan Bornyakov Reviewed-by: Krzysztof Kozlowski Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20221025053947.2737-3-i.bornyakov@metrotek.ru Signed-off-by: Xu Yilun --- .../bindings/fpga/lattice,sysconfig.yaml | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml diff --git a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml new file mode 100644 index 000000000000..4fb05eb84e2a --- /dev/null +++ b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/fpga/lattice,sysconfig.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Lattice Slave SPI sysCONFIG FPGA manager + +maintainers: + - Ivan Bornyakov + +description: | + Lattice sysCONFIG port, which is used for FPGA configuration, among others, + have Slave Serial Peripheral Interface. Only full reconfiguration is + supported. + + Programming of ECP5 is done by writing uncompressed bitstream image in .bit + format into FPGA's SRAM configuration memory. + +properties: + compatible: + enum: + - lattice,sysconfig-ecp5 + + reg: + maxItems: 1 + + program-gpios: + description: + A GPIO line connected to PROGRAMN (active low) pin of the device. + Initiates configuration sequence. + maxItems: 1 + + init-gpios: + description: + A GPIO line connected to INITN (active low) pin of the device. + Indicates that the FPGA is ready to be configured. + maxItems: 1 + + done-gpios: + description: + A GPIO line connected to DONE (active high) pin of the device. + Indicates that the configuration sequence is complete. + maxItems: 1 + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml + + - if: + properties: + compatible: + contains: + const: lattice,sysconfig-ecp5 + then: + properties: + spi-max-frequency: + maximum: 60000000 + +unevaluatedProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + fpga-mgr@0 { + compatible = "lattice,sysconfig-ecp5"; + reg = <0>; + spi-max-frequency = <20000000>; + program-gpios = <&gpio3 4 GPIO_ACTIVE_LOW>; + init-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>; + done-gpios = <&gpio3 2 GPIO_ACTIVE_HIGH>; + }; + }; From 665b1856dc2399828d8ee07a18d4fd79868e729a Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 3 Oct 2022 06:06:26 -0700 Subject: [PATCH 0401/4122] apparmor: Fix loading of child before parent Unfortunately it is possible for some userspace's to load children profiles before the parent profile. This can even happen when the child and the parent are in different load sets. Fix this by creating a null place holder profile that grants no permissions and can be replaced by the parent once it is loaded. Signed-off-by: John Johansen --- security/apparmor/policy.c | 87 ++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index c17ccedd35f1..66034cf96f4c 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -423,6 +423,57 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns, return &profile->base; } +/** + * __create_missing_ancestors - create place holders for missing ancestores + * @ns: namespace to lookup profile in (NOT NULL) + * @hname: hierarchical profile name to find parent of (NOT NULL) + * @gfp: type of allocation. + * + * Returns: NULL on error, parent profile on success + * + * Requires: ns mutex lock held + * + * Returns: unrefcounted parent policy or NULL if error creating + * place holder profiles. + */ +static struct aa_policy *__create_missing_ancestors(struct aa_ns *ns, + const char *hname, + gfp_t gfp) +{ + struct aa_policy *policy; + struct aa_profile *parent, *profile = NULL; + char *split; + + AA_BUG(!ns); + AA_BUG(!hname); + + policy = &ns->base; + + for (split = strstr(hname, "//"); split;) { + parent = profile; + profile = __strn_find_child(&policy->profiles, hname, + split - hname); + if (!profile) { + const char *name = kstrndup(hname, split - hname, + gfp); + if (!name) + return NULL; + profile = aa_alloc_null(parent, name, gfp); + kfree(name); + if (!profile) + return NULL; + if (!parent) + profile->ns = aa_get_ns(ns); + } + policy = &profile->base; + hname = split + 2; + split = strstr(hname, "//"); + } + if (!profile) + return &ns->base; + return &profile->base; +} + /** * __lookupn_profile - lookup the profile matching @hname * @base: base list to start looking up profile name from (NOT NULL) @@ -1032,6 +1083,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, /* setup parent and ns info */ list_for_each_entry(ent, &lh, list) { struct aa_policy *policy; + struct aa_profile *p; if (aa_g_export_binary) ent->new->rawdata = aa_get_loaddata(udata); @@ -1056,21 +1108,38 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, continue; /* no ref on policy only use inside lock */ + p = NULL; policy = __lookup_parent(ns, ent->new->base.hname); if (!policy) { - struct aa_profile *p; + /* first check for parent in the load set */ p = __list_lookup_parent(&lh, ent->new); if (!p) { - error = -ENOENT; - info = "parent does not exist"; - goto fail_lock; + /* + * fill in missing parent with null + * profile that doesn't have + * permissions. This allows for + * individual profile loading where + * the child is loaded before the + * parent, and outside of the current + * atomic set. This unfortunately can + * happen with some userspaces. The + * null profile will be replaced once + * the parent is loaded. + */ + policy = __create_missing_ancestors(ns, + ent->new->base.hname, + GFP_KERNEL); + if (!policy) { + error = -ENOENT; + info = "parent does not exist"; + goto fail_lock; + } } - rcu_assign_pointer(ent->new->parent, aa_get_profile(p)); - } else if (policy != &ns->base) { - /* released on profile replacement or free_profile */ - struct aa_profile *p = (struct aa_profile *) policy; - rcu_assign_pointer(ent->new->parent, aa_get_profile(p)); } + if (!p && policy != &ns->base) + /* released on profile replacement or free_profile */ + p = (struct aa_profile *) policy; + rcu_assign_pointer(ent->new->parent, aa_get_profile(p)); } /* create new fs entries for introspection if needed */ From 64a27ba984342d6c5cf5facc278de5c5df1fd3ff Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sat, 8 Oct 2022 14:34:09 +0800 Subject: [PATCH 0402/4122] AppArmor: Fix kernel-doc security/apparmor/audit.c:93: warning: expecting prototype for audit_base(). Prototype was for audit_pre() instead. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2339 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 8dfdda98fbf1..5a7978aa4b19 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -83,7 +83,7 @@ static const char *const aa_class_names[] = { */ /** - * audit_base - core AppArmor function. + * audit_pre() - core AppArmor function. * @ab: audit buffer to fill (NOT NULL) * @ca: audit structure containing data to audit (NOT NULL) * From 391f121150a5191c932e02775b6e29e59a3f5a94 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sat, 8 Oct 2022 14:34:10 +0800 Subject: [PATCH 0403/4122] LSM: Fix kernel-doc security/apparmor/lsm.c:753: warning: expecting prototype for apparmor_bprm_committed_cred(). Prototype was for apparmor_bprm_committed_creds() instead. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2338 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8e2b951c4988..ca4d190a737d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -741,7 +741,7 @@ static void apparmor_bprm_committing_creds(struct linux_binprm *bprm) } /** - * apparmor_bprm_committed_cred - do cleanup after new creds committed + * apparmor_bprm_committed_creds() - do cleanup after new creds committed * @bprm: binprm for the exec (NOT NULL) */ static void apparmor_bprm_committed_creds(struct linux_binprm *bprm) From a2217387c3ec09117b3b6eaa5ec8a0d7d347d4ba Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sat, 8 Oct 2022 14:34:11 +0800 Subject: [PATCH 0404/4122] AppArmor: Fix kernel-doc security/apparmor/ipc.c:53: warning: expecting prototype for audit_cb(). Prototype was for audit_signal_cb() instead. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2337 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 1d4099385bdf..5acde746775f 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -45,7 +45,7 @@ static const char *audit_signal_mask(u32 mask) } /** - * audit_cb - call back for signal specific audit fields + * audit_signal_cb() - call back for signal specific audit fields * @ab: audit_buffer (NOT NULL) * @va: audit struct to audit values of (NOT NULL) */ From 37923d4321b1e38170086da2c117f78f2b0f49c6 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 21 Oct 2022 08:46:04 +0800 Subject: [PATCH 0405/4122] apparmor: Use pointer to struct aa_label for lbs_cred According to the implementations of cred_label() and set_cred_label(), we should use pointer to struct aa_label for lbs_cred instead of struct aa_task_ctx, this patch fixes it. Fixes: bbd3662a8348 ("Infrastructure management of the cred security blob") Signed-off-by: Xiu Jianfeng Signed-off-by: John Johansen --- security/apparmor/lsm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ca4d190a737d..25114735bc11 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1198,10 +1198,10 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb #endif /* - * The cred blob is a pointer to, not an instance of, an aa_task_ctx. + * The cred blob is a pointer to, not an instance of, an aa_label. */ struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { - .lbs_cred = sizeof(struct aa_task_ctx *), + .lbs_cred = sizeof(struct aa_label *), .lbs_file = sizeof(struct aa_file_ctx), .lbs_task = sizeof(struct aa_task_ctx), }; From d44c692350d9a376abab46aa0d70587951971068 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 14 Oct 2022 16:42:55 +0800 Subject: [PATCH 0406/4122] apparmor: Fix spelling of function name in comment block 'resouce' -> 'resource' Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2396 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: John Johansen --- security/apparmor/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 1b75d8343a8d..e85948164896 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -68,7 +68,7 @@ static int audit_resource(struct aa_profile *profile, unsigned int resource, } /** - * aa_map_resouce - map compiled policy resource to internal # + * aa_map_resource - map compiled policy resource to internal # * @resource: flattened policy resource number * * Returns: resource # for the current architecture. From 7dd426e33e2f9275ac03a306efdc89aa86515a52 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 25 Oct 2022 11:59:30 +0800 Subject: [PATCH 0407/4122] apparmor: fix a memleak in free_ruleset() When the aa_profile is released, we will call free_ruleset to release aa_ruleset, but we don't free the memory of aa_ruleset, so there will be memleak, fix it. unreferenced object 0xffff8881475df800 (size 1024): comm "apparmor_parser", pid 883, jiffies 4294899650 (age 9114.088s) hex dump (first 32 bytes): 00 f8 5d 47 81 88 ff ff 00 f8 5d 47 81 88 ff ff ..]G......]G.... 00 00 00 00 00 00 00 00 00 dc 65 47 81 88 ff ff ..........eG.... backtrace: [<00000000370e658e>] __kmem_cache_alloc_node+0x182/0x700 [<00000000f2f5a6d2>] kmalloc_trace+0x2c/0x130 [<00000000c5c905b3>] aa_alloc_profile+0x1bc/0x5c0 [<00000000bc4fa72b>] unpack_profile+0x319/0x30c0 [<00000000eab791e9>] aa_unpack+0x307/0x1450 [<000000002c3a6ee1>] aa_replace_profiles+0x1b8/0x3790 [<00000000d0c3fd54>] policy_update+0x35a/0x890 [<00000000d04fed90>] profile_replace+0x1d1/0x260 [<00000000cba0c0a7>] vfs_write+0x283/0xd10 [<000000006bae64a5>] ksys_write+0x134/0x260 [<00000000b2fd8f31>] __x64_sys_write+0x78/0xb0 [<00000000f3c8a015>] do_syscall_64+0x5c/0x90 [<00000000a242b1db>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments") Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen --- security/apparmor/policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 66034cf96f4c..51e8184e0fec 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -215,6 +215,7 @@ static void free_ruleset(struct aa_ruleset *rules) for (i = 0; i < rules->secmark_count; i++) kfree_sensitive(rules->secmark[i].label); kfree_sensitive(rules->secmark); + kfree_sensitive(rules); } struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) From 3265949f7cd36a724a35020202c618094be1cf28 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 21 Oct 2022 17:36:02 +0800 Subject: [PATCH 0408/4122] apparmor: Fix memleak issue in unpack_profile() Before aa_alloc_profile(), it has allocated string for @*ns_name if @tmpns is not NULL, so directly return -ENOMEM if aa_alloc_profile() failed will cause a memleak issue, and even if aa_alloc_profile() succeed, in the @fail_profile tag of aa_unpack(), it need to free @ns_name as well, this patch fixes them. Fixes: 736ec752d95e ("AppArmor: policy routines for loading and unpacking policy") Fixes: 04dc715e24d0 ("apparmor: audit policy ns specified in policy load") Signed-off-by: Xiu Jianfeng Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 2e028d540c6b..1bf8cfb8700a 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -858,8 +858,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } profile = aa_alloc_profile(name, NULL, GFP_KERNEL); - if (!profile) - return ERR_PTR(-ENOMEM); + if (!profile) { + info = "out of memory"; + error = -ENOMEM; + goto fail; + } rules = list_first_entry(&profile->rules, typeof(*rules), list); /* profile renaming is optional */ @@ -1090,6 +1093,10 @@ fail: if (error == 0) /* default error covers most cases */ error = -EPROTO; + if (*ns_name) { + kfree(*ns_name); + *ns_name = NULL; + } if (profile) name = NULL; else if (!name) @@ -1392,6 +1399,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, { struct aa_load_ent *tmp, *ent; struct aa_profile *profile = NULL; + char *ns_name = NULL; int error; struct aa_ext e = { .start = udata->data, @@ -1401,7 +1409,6 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, *ns = NULL; while (e.pos < e.end) { - char *ns_name = NULL; void *start; error = verify_header(&e, e.pos == e.start, ns); if (error) @@ -1432,6 +1439,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, ent->new = profile; ent->ns_name = ns_name; + ns_name = NULL; list_add_tail(&ent->list, lh); } udata->abi = e.version & K_ABI_MASK; @@ -1452,6 +1460,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, return 0; fail_profile: + kfree(ns_name); aa_put_profile(profile); fail: From 6de0cb80e601df16f481a614daa0e84adbf0b552 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 24 Oct 2022 16:08:28 +0800 Subject: [PATCH 0409/4122] gpio: ftgpio010: use device name for gpiochip name & label Currently, we use just the fixed string "FTGPIO010" as the gpiochip name for ftgpio010 drivers. Because it's fixed, this means we cannot distinguish multiple ftgpio010 devices present on a single system. This change uses the dev_name() instead, which should be unique between multiple instances. Signed-off-by: Jeremy Kerr Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-ftgpio010.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c index f77a965f5780..2728672ef9f8 100644 --- a/drivers/gpio/gpio-ftgpio010.c +++ b/drivers/gpio/gpio-ftgpio010.c @@ -277,7 +277,7 @@ static int ftgpio_gpio_probe(struct platform_device *pdev) dev_err(dev, "unable to init generic GPIO\n"); goto dis_clk; } - g->gc.label = "FTGPIO010"; + g->gc.label = dev_name(dev); g->gc.base = -1; g->gc.parent = dev; g->gc.owner = THIS_MODULE; From b9b1fc1ae1191243d3956888c65a280a9b2c847f Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 18 Sep 2022 12:50:43 -0400 Subject: [PATCH 0410/4122] gpio: idio-16: Introduce the ACCES IDIO-16 GPIO library module Exposes consumer library functions to facilitate communication with devices within the ACCES IDIO-16 family such as the 104-IDIO-16 and the PCI-IDIO-16. A CONFIG_GPIO_IDIO_16 Kconfig option is introduced by this patch. Modules wanting access to these idio-16 library functions should select this Kconfig option and import the GPIO_IDIO_16 symbol namespace. Cc: Andy Shevchenko Signed-off-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 7 ++ drivers/gpio/Kconfig | 9 +++ drivers/gpio/Makefile | 1 + drivers/gpio/gpio-idio-16.c | 146 ++++++++++++++++++++++++++++++++++++ drivers/gpio/gpio-idio-16.h | 71 ++++++++++++++++++ 5 files changed, 234 insertions(+) create mode 100644 drivers/gpio/gpio-idio-16.c create mode 100644 drivers/gpio/gpio-idio-16.h diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..74efa0492c43 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -312,6 +312,13 @@ L: linux-iio@vger.kernel.org S: Maintained F: drivers/counter/104-quad-8.c +ACCES IDIO-16 GPIO LIBRARY +M: William Breathitt Gray +L: linux-gpio@vger.kernel.org +S: Maintained +F: drivers/gpio/gpio-idio-16.c +F: drivers/gpio/gpio-idio-16.h + ACCES PCI-IDIO-16 GPIO DRIVER M: William Breathitt Gray L: linux-gpio@vger.kernel.org diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e034f752e7ce..3f8cf6e2165e 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -109,6 +109,15 @@ config GPIO_REGMAP config GPIO_MAX730X tristate +config GPIO_IDIO_16 + tristate + help + Enables support for the idio-16 library functions. The idio-16 library + provides functions to facilitate communication with devices within the + ACCES IDIO-16 family such as the 104-IDIO-16 and the PCI-IDIO-16. + + If built as a module its name will be gpio-idio-16. + menu "Memory mapped GPIO drivers" depends on HAS_IOMEM diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 84fae267e8eb..37a0b7ebda43 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_GPIO_HLWD) += gpio-hlwd.o obj-$(CONFIG_HTC_EGPIO) += gpio-htc-egpio.o obj-$(CONFIG_GPIO_I8255) += gpio-i8255.o obj-$(CONFIG_GPIO_ICH) += gpio-ich.o +obj-$(CONFIG_GPIO_IDIO_16) += gpio-idio-16.o obj-$(CONFIG_GPIO_IDT3243X) += gpio-idt3243x.o obj-$(CONFIG_GPIO_IMX_SCU) += gpio-imx-scu.o obj-$(CONFIG_GPIO_IOP) += gpio-iop.o diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c new file mode 100644 index 000000000000..13315242d220 --- /dev/null +++ b/drivers/gpio/gpio-idio-16.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPIO library for the ACCES IDIO-16 family + * Copyright (C) 2022 William Breathitt Gray + */ +#include +#include +#include +#include +#include +#include + +#include "gpio-idio-16.h" + +#define DEFAULT_SYMBOL_NAMESPACE GPIO_IDIO_16 + +/** + * idio_16_get - get signal value at signal offset + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @offset: offset of signal to get + * + * Returns the signal value (0=low, 1=high) for the signal at @offset. + */ +int idio_16_get(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, const unsigned long offset) +{ + const unsigned long mask = BIT(offset); + + if (offset < IDIO_16_NOUT) + return test_bit(offset, state->out_state); + + if (offset < 24) + return !!(ioread8(®->in0_7) & (mask >> IDIO_16_NOUT)); + + if (offset < 32) + return !!(ioread8(®->in8_15) & (mask >> 24)); + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(idio_16_get); + +/** + * idio_16_get_multiple - get multiple signal values at multiple signal offsets + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @mask: mask of signals to get + * @bits: bitmap to store signal values + * + * Stores in @bits the values (0=low, 1=high) for the signals defined by @mask. + */ +void idio_16_get_multiple(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, + const unsigned long *const mask, + unsigned long *const bits) +{ + unsigned long flags; + const unsigned long out_mask = GENMASK(IDIO_16_NOUT - 1, 0); + + spin_lock_irqsave(&state->lock, flags); + + bitmap_replace(bits, bits, state->out_state, &out_mask, IDIO_16_NOUT); + if (*mask & GENMASK(23, 16)) + bitmap_set_value8(bits, ioread8(®->in0_7), 16); + if (*mask & GENMASK(31, 24)) + bitmap_set_value8(bits, ioread8(®->in8_15), 24); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_get_multiple); + +/** + * idio_16_set - set signal value at signal offset + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @offset: offset of signal to set + * @value: value of signal to set + * + * Assigns output @value for the signal at @offset. + */ +void idio_16_set(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, const unsigned long offset, + const unsigned long value) +{ + unsigned long flags; + + if (offset >= IDIO_16_NOUT) + return; + + spin_lock_irqsave(&state->lock, flags); + + __assign_bit(offset, state->out_state, value); + if (offset < 8) + iowrite8(bitmap_get_value8(state->out_state, 0), ®->out0_7); + else + iowrite8(bitmap_get_value8(state->out_state, 8), ®->out8_15); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_set); + +/** + * idio_16_set_multiple - set signal values at multiple signal offsets + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @mask: mask of signals to set + * @bits: bitmap of signal output values + * + * Assigns output values defined by @bits for the signals defined by @mask. + */ +void idio_16_set_multiple(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, + const unsigned long *const mask, + const unsigned long *const bits) +{ + unsigned long flags; + + spin_lock_irqsave(&state->lock, flags); + + bitmap_replace(state->out_state, state->out_state, bits, mask, + IDIO_16_NOUT); + if (*mask & GENMASK(7, 0)) + iowrite8(bitmap_get_value8(state->out_state, 0), ®->out0_7); + if (*mask & GENMASK(15, 8)) + iowrite8(bitmap_get_value8(state->out_state, 8), ®->out8_15); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_set_multiple); + +/** + * idio_16_state_init - initialize idio_16_state structure + * @state: ACCES IDIO-16 device state + * + * Initializes the ACCES IDIO-16 device @state for use in idio-16 library + * functions. + */ +void idio_16_state_init(struct idio_16_state *const state) +{ + spin_lock_init(&state->lock); +} +EXPORT_SYMBOL_GPL(idio_16_state_init); + +MODULE_AUTHOR("William Breathitt Gray"); +MODULE_DESCRIPTION("ACCES IDIO-16 GPIO Library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-idio-16.h b/drivers/gpio/gpio-idio-16.h new file mode 100644 index 000000000000..928f8251a2bd --- /dev/null +++ b/drivers/gpio/gpio-idio-16.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2022 William Breathitt Gray */ +#ifndef _IDIO_16_H_ +#define _IDIO_16_H_ + +#include +#include + +/** + * struct idio_16 - IDIO-16 registers structure + * @out0_7: Read: FET Drive Outputs 0-7 + * Write: FET Drive Outputs 0-7 + * @in0_7: Read: Isolated Inputs 0-7 + * Write: Clear Interrupt + * @irq_ctl: Read: Enable IRQ + * Write: Disable IRQ + * @filter_ctl: Read: Activate Input Filters 0-15 + * Write: Deactivate Input Filters 0-15 + * @out8_15: Read: FET Drive Outputs 8-15 + * Write: FET Drive Outputs 8-15 + * @in8_15: Read: Isolated Inputs 8-15 + * Write: Unused + * @irq_status: Read: Interrupt status + * Write: Unused + */ +struct idio_16 { + u8 out0_7; + u8 in0_7; + u8 irq_ctl; + u8 filter_ctl; + u8 out8_15; + u8 in8_15; + u8 irq_status; +}; + +#define IDIO_16_NOUT 16 + +/** + * struct idio_16_state - IDIO-16 state structure + * @lock: synchronization lock for accessing device state + * @out_state: output signals state + */ +struct idio_16_state { + spinlock_t lock; + DECLARE_BITMAP(out_state, IDIO_16_NOUT); +}; + +/** + * idio_16_get_direction - get the I/O direction for a signal offset + * @offset: offset of signal to get direction + * + * Returns the signal direction (0=output, 1=input) for the signal at @offset. + */ +static inline int idio_16_get_direction(const unsigned long offset) +{ + return (offset >= IDIO_16_NOUT) ? 1 : 0; +} + +int idio_16_get(struct idio_16 __iomem *reg, struct idio_16_state *state, + unsigned long offset); +void idio_16_get_multiple(struct idio_16 __iomem *reg, + struct idio_16_state *state, + const unsigned long *mask, unsigned long *bits); +void idio_16_set(struct idio_16 __iomem *reg, struct idio_16_state *state, + unsigned long offset, unsigned long value); +void idio_16_set_multiple(struct idio_16 __iomem *reg, + struct idio_16_state *state, + const unsigned long *mask, const unsigned long *bits); +void idio_16_state_init(struct idio_16_state *state); + +#endif /* _IDIO_16_H_ */ From c4ec384cf726379e600764c7f2f7ad487280890a Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 25 Oct 2022 09:57:57 +0200 Subject: [PATCH 0411/4122] gpio: 104-idio-16: Utilize the idio-16 GPIO library The ACCES 104-IDIO-16 device is part of the ACCES IDIO-16 family, so the idio-16 GPIO library module is selected and utilized to consolidate code. Signed-off-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 1 + drivers/gpio/gpio-104-idio-16.c | 88 ++++++--------------------------- 2 files changed, 17 insertions(+), 72 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3f8cf6e2165e..3b0a030ce79b 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -858,6 +858,7 @@ config GPIO_104_IDIO_16 depends on PC104 select ISA_BUS_API select GPIOLIB_IRQCHIP + select GPIO_IDIO_16 help Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16, 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, 104-IDO-8). The diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c index 718bd54e2a25..098fbefdbe22 100644 --- a/drivers/gpio/gpio-104-idio-16.c +++ b/drivers/gpio/gpio-104-idio-16.c @@ -6,7 +6,7 @@ * This driver supports the following ACCES devices: 104-IDIO-16, * 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, and 104-IDO-8. */ -#include +#include #include #include #include @@ -21,6 +21,8 @@ #include #include +#include "gpio-idio-16.h" + #define IDIO_16_EXTENT 8 #define MAX_NUM_IDIO_16 max_num_isa_dev(IDIO_16_EXTENT) @@ -34,49 +36,26 @@ static unsigned int num_irq; module_param_hw_array(irq, uint, irq, &num_irq, 0); MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers"); -/** - * struct idio_16_reg - device registers structure - * @out0_7: Read: N/A - * Write: FET Drive Outputs 0-7 - * @in0_7: Read: Isolated Inputs 0-7 - * Write: Clear Interrupt - * @irq_ctl: Read: Enable IRQ - * Write: Disable IRQ - * @unused: N/A - * @out8_15: Read: N/A - * Write: FET Drive Outputs 8-15 - * @in8_15: Read: Isolated Inputs 8-15 - * Write: N/A - */ -struct idio_16_reg { - u8 out0_7; - u8 in0_7; - u8 irq_ctl; - u8 unused; - u8 out8_15; - u8 in8_15; -}; - /** * struct idio_16_gpio - GPIO device private data structure * @chip: instance of the gpio_chip * @lock: synchronization lock to prevent I/O race conditions * @irq_mask: I/O bits affected by interrupts * @reg: I/O address offset for the device registers - * @out_state: output bits state + * @state: ACCES IDIO-16 device state */ struct idio_16_gpio { struct gpio_chip chip; raw_spinlock_t lock; unsigned long irq_mask; - struct idio_16_reg __iomem *reg; - unsigned int out_state; + struct idio_16 __iomem *reg; + struct idio_16_state state; }; static int idio_16_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { - if (offset > 15) + if (idio_16_get_direction(offset)) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; @@ -98,15 +77,8 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip, static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - const unsigned int mask = BIT(offset-16); - if (offset < 16) - return -EINVAL; - - if (offset < 24) - return !!(ioread8(&idio16gpio->reg->in0_7) & mask); - - return !!(ioread8(&idio16gpio->reg->in8_15) & (mask>>8)); + return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset); } static int idio_16_gpio_get_multiple(struct gpio_chip *chip, @@ -114,11 +86,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - *bits = 0; - if (*mask & GENMASK(23, 16)) - *bits |= (unsigned long)ioread8(&idio16gpio->reg->in0_7) << 16; - if (*mask & GENMASK(31, 24)) - *bits |= (unsigned long)ioread8(&idio16gpio->reg->in8_15) << 24; + idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); return 0; } @@ -127,44 +95,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - const unsigned int mask = BIT(offset); - unsigned long flags; - if (offset > 15) - return; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - if (value) - idio16gpio->out_state |= mask; - else - idio16gpio->out_state &= ~mask; - - if (offset > 7) - iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15); - else - iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value); } static void idio_16_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long flags; - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - idio16gpio->out_state &= ~*mask; - idio16gpio->out_state |= *mask & *bits; - - if (*mask & 0xFF) - iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7); - if ((*mask >> 8) & 0xFF) - iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); } static void idio_16_irq_ack(struct irq_data *data) @@ -301,7 +241,10 @@ static int idio_16_probe(struct device *dev, unsigned int id) idio16gpio->chip.get_multiple = idio_16_gpio_get_multiple; idio16gpio->chip.set = idio_16_gpio_set; idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple; - idio16gpio->out_state = 0xFFFF; + + idio_16_state_init(&idio16gpio->state); + /* FET off states are represented by bit values of "1" */ + bitmap_fill(idio16gpio->state.out_state, IDIO_16_NOUT); girq = &idio16gpio->chip.irq; gpio_irq_chip_set_chip(girq, &idio_16_irqchip); @@ -343,3 +286,4 @@ module_isa_driver_with_irq(idio_16_driver, num_idio_16, num_irq); MODULE_AUTHOR("William Breathitt Gray "); MODULE_DESCRIPTION("ACCES 104-IDIO-16 GPIO driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(GPIO_IDIO_16); From e7f758fa9b7fda8b91f2e429b2be93ae0b88ac33 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 18 Sep 2022 12:50:45 -0400 Subject: [PATCH 0412/4122] gpio: pci-idio-16: Utilize the idio-16 GPIO library The ACCES PCI-IDIO-16 device is part of the ACCES IDIO-16 family, so the idio-16 GPIO library module is selected and utilized to consolidate code. Signed-off-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 1 + drivers/gpio/gpio-pci-idio-16.c | 119 ++++---------------------------- 2 files changed, 14 insertions(+), 106 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3b0a030ce79b..5a04990f03cc 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1563,6 +1563,7 @@ config GPIO_PCH config GPIO_PCI_IDIO_16 tristate "ACCES PCI-IDIO-16 GPIO support" select GPIOLIB_IRQCHIP + select GPIO_IDIO_16 help Enables GPIO support for the ACCES PCI-IDIO-16. An interrupt is generated when any of the inputs change state (low to high or high to diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 71a13a394050..a86ce748384b 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -3,8 +3,7 @@ * GPIO driver for the ACCES PCI-IDIO-16 * Copyright (C) 2017 William Breathitt Gray */ -#include -#include +#include #include #include #include @@ -16,51 +15,28 @@ #include #include -/** - * struct idio_16_gpio_reg - GPIO device registers structure - * @out0_7: Read: FET Drive Outputs 0-7 - * Write: FET Drive Outputs 0-7 - * @in0_7: Read: Isolated Inputs 0-7 - * Write: Clear Interrupt - * @irq_ctl: Read: Enable IRQ - * Write: Disable IRQ - * @filter_ctl: Read: Activate Input Filters 0-15 - * Write: Deactivate Input Filters 0-15 - * @out8_15: Read: FET Drive Outputs 8-15 - * Write: FET Drive Outputs 8-15 - * @in8_15: Read: Isolated Inputs 8-15 - * Write: Unused - * @irq_status: Read: Interrupt status - * Write: Unused - */ -struct idio_16_gpio_reg { - u8 out0_7; - u8 in0_7; - u8 irq_ctl; - u8 filter_ctl; - u8 out8_15; - u8 in8_15; - u8 irq_status; -}; +#include "gpio-idio-16.h" /** * struct idio_16_gpio - GPIO device private data structure * @chip: instance of the gpio_chip * @lock: synchronization lock to prevent I/O race conditions * @reg: I/O address offset for the GPIO device registers + * @state: ACCES IDIO-16 device state * @irq_mask: I/O bits affected by interrupts */ struct idio_16_gpio { struct gpio_chip chip; raw_spinlock_t lock; - struct idio_16_gpio_reg __iomem *reg; + struct idio_16 __iomem *reg; + struct idio_16_state state; unsigned long irq_mask; }; static int idio_16_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { - if (offset > 15) + if (idio_16_get_direction(offset)) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; @@ -82,43 +58,16 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip, static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long mask = BIT(offset); - if (offset < 8) - return !!(ioread8(&idio16gpio->reg->out0_7) & mask); - - if (offset < 16) - return !!(ioread8(&idio16gpio->reg->out8_15) & (mask >> 8)); - - if (offset < 24) - return !!(ioread8(&idio16gpio->reg->in0_7) & (mask >> 16)); - - return !!(ioread8(&idio16gpio->reg->in8_15) & (mask >> 24)); + return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset); } static int idio_16_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long offset; - unsigned long gpio_mask; - void __iomem *ports[] = { - &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, - &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15, - }; - void __iomem *port_addr; - unsigned long port_state; - - /* clear bits array to a clean slate */ - bitmap_zero(bits, chip->ngpio); - - for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { - port_addr = ports[offset / 8]; - port_state = ioread8(port_addr) & gpio_mask; - - bitmap_set_value8(bits, port_state, offset); - } + idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); return 0; } @@ -126,61 +75,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned int mask = BIT(offset); - void __iomem *base; - unsigned long flags; - unsigned int out_state; - if (offset > 15) - return; - - if (offset > 7) { - mask >>= 8; - base = &idio16gpio->reg->out8_15; - } else - base = &idio16gpio->reg->out0_7; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - if (value) - out_state = ioread8(base) | mask; - else - out_state = ioread8(base) & ~mask; - - iowrite8(out_state, base); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value); } static void idio_16_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long offset; - unsigned long gpio_mask; - void __iomem *ports[] = { - &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, - }; - size_t index; - void __iomem *port_addr; - unsigned long bitmask; - unsigned long flags; - unsigned long out_state; - for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { - index = offset / 8; - port_addr = ports[index]; - - bitmask = bitmap_get_value8(bits, offset) & gpio_mask; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - out_state = ioread8(port_addr) & ~gpio_mask; - out_state |= bitmask; - iowrite8(out_state, port_addr); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); - } + idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); } static void idio_16_irq_ack(struct irq_data *data) @@ -335,6 +239,8 @@ static int idio_16_probe(struct pci_dev *pdev, const struct pci_device_id *id) idio16gpio->chip.set = idio_16_gpio_set; idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple; + idio_16_state_init(&idio16gpio->state); + girq = &idio16gpio->chip.irq; girq->chip = &idio_16_irqchip; /* This will let us handle the parent IRQ in the driver */ @@ -379,3 +285,4 @@ module_pci_driver(idio_16_driver); MODULE_AUTHOR("William Breathitt Gray "); MODULE_DESCRIPTION("ACCES PCI-IDIO-16 GPIO driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(GPIO_IDIO_16); From 848dba781f1951636c966c9f3a6a41a5b2f8b572 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 24 Oct 2022 14:39:33 +0200 Subject: [PATCH 0413/4122] container_of: remove container_of_safe() It came in from a staging driver that has been long removed from the tree, and there are no in-kernel users of the macro, and it's very dubious if anyone should ever use this thing, so just remove it entirely. Reviewed-by: Sakari Ailus Acked-by: Rafael J. Wysocki Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221024123933.3331116-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 2f4944b791b8..a6f242137b11 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -21,20 +21,4 @@ "pointer type mismatch in container_of()"); \ ((type *)(__mptr - offsetof(type, member))); }) -/** - * container_of_safe - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. - */ -#define container_of_safe(ptr, type, member) ({ \ - void *__mptr = (void *)(ptr); \ - static_assert(__same_type(*(ptr), ((type *)0)->member) || \ - __same_type(*(ptr), void), \ - "pointer type mismatch in container_of_safe()"); \ - IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ - ((type *)(__mptr - offsetof(type, member))); }) - #endif /* _LINUX_CONTAINER_OF_H */ From 7376e561fd2e017e9a53f975209777234b8b434e Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 24 Oct 2022 14:16:27 +0300 Subject: [PATCH 0414/4122] linux/container_of.h: Warn about loss of constness container_of() casts the original type to another which leads to the loss of the const qualifier if it is not specified in the caller-provided type. This easily leads to container_of() returning a non-const pointer to a const struct which the C compiler does not warn about. Acked-by: Andy Shevchenko Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20221024111627.75183-1-sakari.ailus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index a6f242137b11..2008e9f4058c 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -13,6 +13,7 @@ * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. * + * WARNING: any const qualifier of @ptr is lost. */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ From 2ae18cc2269fc2d05d36bf44a8daa4404fa11dde Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 24 Oct 2022 10:48:46 +0300 Subject: [PATCH 0415/4122] thunderbolt: ACPI: Use the helper fwnode_find_reference() Replacing the direct fwnode_property_get_reference_args() call will this wrapper function. No functional changes intended. Signed-off-by: Heikki Krogerus Signed-off-by: Mika Westerberg --- drivers/thunderbolt/acpi.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c index 7a8adf5ad5a0..317e4f5fdb97 100644 --- a/drivers/thunderbolt/acpi.c +++ b/drivers/thunderbolt/acpi.c @@ -15,24 +15,20 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, void **return_value) { struct acpi_device *adev = acpi_fetch_acpi_dev(handle); - struct fwnode_reference_args args; struct fwnode_handle *fwnode; struct tb_nhi *nhi = data; struct pci_dev *pdev; struct device *dev; - int ret; if (!adev) return AE_OK; - fwnode = acpi_fwnode_handle(adev); - ret = fwnode_property_get_reference_args(fwnode, "usb4-host-interface", - NULL, 0, 0, &args); - if (ret) + fwnode = fwnode_find_reference(acpi_fwnode_handle(adev), "usb4-host-interface", 0); + if (IS_ERR(fwnode)) return AE_OK; /* It needs to reference this NHI */ - if (dev_fwnode(&nhi->pdev->dev) != args.fwnode) + if (dev_fwnode(&nhi->pdev->dev) != fwnode) goto out_put; /* @@ -100,7 +96,7 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, } out_put: - fwnode_handle_put(args.fwnode); + fwnode_handle_put(fwnode); return AE_OK; } From eac001bf4a5b7d857ec228cd18b4e3644a5ceeb9 Mon Sep 17 00:00:00 2001 From: Xiang Yang Date: Thu, 20 Oct 2022 09:44:26 +0800 Subject: [PATCH 0416/4122] gpiolib: acpi: Use METHOD_NAME__AEI macro for acpi_walk_resources Using the METHOD_NAME__AEI macro instead of using "_AEI" directly. Signed-off-by: Xiang Yang Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a7d2358736fe..064ba5150fd4 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -512,7 +512,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; - acpi_walk_resources(handle, "_AEI", + acpi_walk_resources(handle, METHOD_NAME__AEI, acpi_gpiochip_alloc_event, acpi_gpio); mutex_lock(&acpi_gpio_deferred_req_irqs_lock); From 8d259847243d1e21a866e828c4ce90d759f3d17b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 20 Oct 2022 18:39:14 +0300 Subject: [PATCH 0417/4122] gpiolib: cdev: Fix typo in kernel doc for struct line When eflags has been renamed to the edflags, the kernel doc change were missed. Update kernel doc accordingly. Fixes: b1a92e94560d ("gpiolib: cdev: consolidate edge detector configuration flags") Signed-off-by: Andy Shevchenko Reviewed-by: Kent Gibson --- drivers/gpio/gpiolib-cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 0cb6b468f364..08606f32372c 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -410,7 +410,7 @@ out_free_lh: * @desc: the GPIO descriptor for this line. * @req: the corresponding line request * @irq: the interrupt triggered in response to events on this GPIO - * @eflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or + * @edflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or * GPIO_V2_LINE_FLAG_EDGE_FALLING, indicating the edge detection applied * @timestamp_ns: cache for the timestamp storing it between hardirq and * IRQ thread, used to bring the timestamp close to the actual event From 1662cea4623f75d8251adf07370bbaa958f0355d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 25 Oct 2022 15:15:49 +0800 Subject: [PATCH 0418/4122] kset: fix memory leak when kset_register() returns error Inject fault while loading module, kset_register() may fail. If it fails, the kset.kobj.name allocated by kobject_set_name() which must be called before a call to kset_register() may be leaked, since refcount of kobj was set in kset_init(). To mitigate this, we free the name in kset_register() when an error is encountered, i.e. when kset_register() returns an error. A kset may be embedded in a larger structure which may be dynamically allocated in callers, it needs to be freed in ktype.release() or error path in callers, in this case, we can not call kset_put() in kset_register(), or it will cause double free, so just call kfree_const() to free the name and set it to NULL to avoid accessing bad pointer in callers. With this fix, the callers don't need care about freeing the name and may call kset_put() if kset_register() fails. Suggested-by: Luben Tuikov Signed-off-by: Yang Yingliang Reviewed-by: Link: https://lore.kernel.org/r/20221025071549.1280528-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/kobject.c b/lib/kobject.c index 0380ec889a6a..ba1017cd67d1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -834,6 +834,9 @@ EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register() - Initialize and add a kset. * @k: kset. + * + * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name() + * is freed, it can not be used any more. */ int kset_register(struct kset *k) { @@ -844,8 +847,12 @@ int kset_register(struct kset *k) kset_init(k); err = kobject_add_internal(&k->kobj); - if (err) + if (err) { + kfree_const(k->kobj.name); + /* Set it to NULL to avoid accessing bad pointer in callers. */ + k->kobj.name = NULL; return err; + } kobject_uevent(&k->kobj, KOBJ_ADD); return 0; } From a12960f970d3d47d1aefd4293738b878a6e7d024 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:01 +0200 Subject: [PATCH 0419/4122] staging: r8188eu: restructure mlme subfunction handling Move some code around in rtw_mlme_ext.c to make it simpler. mlme_sta_tbl is used only by mgt_dispatcher. Move the table inside the function. Move mgt_dispatcher behind the handler functions. We can then make the handler functions static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 116 ++++++++++---------- 1 file changed, 57 insertions(+), 59 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index d146b94307b8..3435610ca411 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -12,24 +12,6 @@ #include "../include/rtl8188e_xmit.h" #include "../include/rtl8188e_dm.h" -/* response function for each management frame subtype, do not reorder */ -static mlme_handler mlme_sta_tbl[] = { - OnAssocReq, - OnAssocRsp, - OnAssocReq, - OnAssocRsp, - OnProbeReq, - OnProbeRsp, - NULL, - NULL, - OnBeacon, - NULL, - OnDisassoc, - OnAuthClient, - OnDeAuth, - OnAction, -}; - static u8 null_addr[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; /************************************************** @@ -393,47 +375,6 @@ void free_mlme_ext_priv(struct mlme_ext_priv *pmlmeext) } } -void mgt_dispatcher(struct adapter *padapter, struct recv_frame *precv_frame) -{ - int index; - mlme_handler fct; - struct mlme_priv *pmlmepriv = &padapter->mlmepriv; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data; - struct sta_info *psta = rtw_get_stainfo(&padapter->stapriv, hdr->addr2); - - if (!ieee80211_is_mgmt(hdr->frame_control)) - return; - - /* receive the frames that ra(a1) is my address or ra(a1) is bc address. */ - if (memcmp(hdr->addr1, myid(&padapter->eeprompriv), ETH_ALEN) && - !is_broadcast_ether_addr(hdr->addr1)) - return; - - index = (le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_STYPE) >> 4; - if (index >= ARRAY_SIZE(mlme_sta_tbl)) - return; - fct = mlme_sta_tbl[index]; - - if (psta) { - if (ieee80211_has_retry(hdr->frame_control)) { - if (precv_frame->attrib.seq_num == psta->RxMgmtFrameSeqNum) - /* drop the duplicate management frame */ - return; - } - psta->RxMgmtFrameSeqNum = precv_frame->attrib.seq_num; - } - - if (ieee80211_is_auth(hdr->frame_control)) { - if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) - fct = OnAuth; - else - fct = OnAuthClient; - } - - if (fct) - fct(padapter, precv_frame); -} - static u32 p2p_listen_state_process(struct adapter *padapter, unsigned char *da) { bool response = true; @@ -4008,6 +3949,63 @@ struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv) return pmgntframe; } +void mgt_dispatcher(struct adapter *padapter, struct recv_frame *precv_frame) +{ + mlme_handler mlme_sta_tbl[] = { + OnAssocReq, + OnAssocRsp, + OnAssocReq, + OnAssocRsp, + OnProbeReq, + OnProbeRsp, + NULL, + NULL, + OnBeacon, + NULL, + OnDisassoc, + OnAuthClient, + OnDeAuth, + OnAction, + }; + int index; + mlme_handler fct; + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data; + struct sta_info *psta = rtw_get_stainfo(&padapter->stapriv, hdr->addr2); + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return; + + /* receive the frames that ra(a1) is my address or ra(a1) is bc address. */ + if (memcmp(hdr->addr1, myid(&padapter->eeprompriv), ETH_ALEN) && + !is_broadcast_ether_addr(hdr->addr1)) + return; + + index = (le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_STYPE) >> 4; + if (index >= ARRAY_SIZE(mlme_sta_tbl)) + return; + fct = mlme_sta_tbl[index]; + + if (psta) { + if (ieee80211_has_retry(hdr->frame_control)) { + if (precv_frame->attrib.seq_num == psta->RxMgmtFrameSeqNum) + /* drop the duplicate management frame */ + return; + } + psta->RxMgmtFrameSeqNum = precv_frame->attrib.seq_num; + } + + if (ieee80211_is_auth(hdr->frame_control)) { + if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) + fct = OnAuth; + else + fct = OnAuthClient; + } + + if (fct) + fct(padapter, precv_frame); +} + /**************************************************************************** Following are some TX functions for WiFi MLME From b8d4f50557d52fff6356e8a83d50283232eedb98 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:02 +0200 Subject: [PATCH 0420/4122] staging: r8188eu: make OnAssocReq static OnAssocReq is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 3435610ca411..576f56ad4383 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -911,7 +911,7 @@ static void UpdateBrateTblForSoftAP(u8 *bssrateset, u32 bssratelen) } } -unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame) { u16 capab_info; struct rtw_ieee802_11_elems elems; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 66aa8b497aa1..41e31cec9abe 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnAssocReq(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnProbeReq(struct adapter *padapter, From 411c3890cb0d385bcd8ddef90abac82a32274153 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:03 +0200 Subject: [PATCH 0421/4122] staging: r8188eu: make OnAssocRsp static OnAssocRsp is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 576f56ad4383..a3cfab2627d5 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1319,7 +1319,7 @@ OnAssocReqFail: return _FAIL; } -unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; uint i; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 41e31cec9abe..c2b1aa2f378b 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnAssocRsp(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnProbeRsp(struct adapter *padapter, From 94941c42c24813da86128f47bec0949184798b62 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:04 +0200 Subject: [PATCH 0422/4122] staging: r8188eu: make OnProbeReq static OnProbeReq is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index a3cfab2627d5..5e1e2970ccd9 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -447,7 +447,7 @@ Following are the callback functions for each subtype of the management frames *****************************************************************************/ -unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int ielen; unsigned char *p; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index c2b1aa2f378b..b24aae8108b6 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnProbeReq(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnBeacon(struct adapter *padapter, From bd0bd67c6bd8e2e5f1a3741f3d323e58ea3fc7ff Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:05 +0200 Subject: [PATCH 0423/4122] staging: r8188eu: make OnProbeRsp static OnProbeRsp is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 5e1e2970ccd9..4ccc6c6e45e6 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -515,7 +515,7 @@ _issue_probersp: return _SUCCESS; } -unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame) { struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct wifidirect_info *pwdinfo = &padapter->wdinfo; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index b24aae8108b6..52991ec7b3e8 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnProbeRsp(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnDisassoc(struct adapter *padapter, From 1aad70df18457dfbd4244dbe620f6f12429e32d5 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:06 +0200 Subject: [PATCH 0424/4122] staging: r8188eu: make OnBeacon static OnBeacon is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 4ccc6c6e45e6..1a279c812f5f 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -561,7 +561,7 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec return _SUCCESS; } -unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) { int cam_idx; struct sta_info *psta; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 52991ec7b3e8..b4868598eba8 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnBeacon(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnAuth(struct adapter *padapter, From 8ef3cd4f527c76832fefa2bb9960b5afb5f0a1d0 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:07 +0200 Subject: [PATCH 0425/4122] staging: r8188eu: make OnDisassoc static OnDisassoc is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 1a279c812f5f..309376aba41f 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1458,7 +1458,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) return _SUCCESS; } -unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) { u16 reason; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index b4868598eba8..c1bc554202a0 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnDisassoc(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnAuthClient(struct adapter *padapter, From de20e195c221c808f7d9e2babe6f4cd9e7c92818 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:08 +0200 Subject: [PATCH 0426/4122] staging: r8188eu: make OnAuthClient static OnAuthClient is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 309376aba41f..a63146ccc6b4 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -800,7 +800,7 @@ auth_fail: return _FAIL; } -unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int seq, len, status, offset; unsigned char *p; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index c1bc554202a0..866d358f5f10 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter); unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame); -unsigned int OnAuthClient(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int OnAction(struct adapter *padapter, From 51877bf3ab4ccd0279fedbf97736791234ed0cfe Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:09 +0200 Subject: [PATCH 0427/4122] staging: r8188eu: make OnDeAuth static OnDeAuth is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index a63146ccc6b4..df817d7594a2 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1396,7 +1396,7 @@ report_assoc_result: return _SUCCESS; } -unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; unsigned short reason; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 866d358f5f10..14df83574478 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter); unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame); -unsigned int OnDeAuth(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame); From 37552ad1fa2ed3924364e54f99f31ee56d902bcb Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:10 +0200 Subject: [PATCH 0428/4122] staging: r8188eu: make OnAction static OnAction is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-11-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index df817d7594a2..0eb7f586d755 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3910,7 +3910,7 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra return _SUCCESS; } -unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 14df83574478..f192733a5c53 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter); unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame); -unsigned int OnAction(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame); From 19adbd9a2f26a64be96702f488ebed77b5334f79 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:11 +0200 Subject: [PATCH 0429/4122] staging: r8188eu: make OnAuth static OnAuth is used only in rtw_mlme_ext.c. Make this function static. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-12-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 0eb7f586d755..be34c3b21bd7 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -651,7 +651,7 @@ _END_ONBEACON_: return _SUCCESS; } -unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame) +static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int auth_mode, ie_len; u16 seq; diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index f192733a5c53..f77e6dc81831 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,9 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnAuth(struct adapter *padapter, - struct recv_frame *precv_frame); - unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame); unsigned int on_action_public(struct adapter *padapter, From 05aa6bf9c2546379f8ba57e2ffe8f88082d50a52 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:12 +0200 Subject: [PATCH 0430/4122] staging: r8188eu: change mlme handlers to void The mlme handlers that are called from mgt_dispatcher return an error code. mgt_dispatcher doesn't check this error code, we can remove it and change the handler functions to void. For now, make only the minimum changes to the handlers for removing the error codes. If handlers can be simplified, that'll be done it separate patches. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-13-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 93 ++++++++----------- .../staging/r8188eu/include/rtw_mlme_ext.h | 2 +- 2 files changed, 41 insertions(+), 54 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index be34c3b21bd7..d89f494d12af 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -447,7 +447,7 @@ Following are the callback functions for each subtype of the management frames *****************************************************************************/ -static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int ielen; unsigned char *p; @@ -481,17 +481,17 @@ static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *prec report_survey_event(padapter, precv_frame); p2p_listen_state_process(padapter, get_sa(pframe)); - return _SUCCESS; + return; } } } if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) - return _SUCCESS; + return; if (!check_fwstate(pmlmepriv, _FW_LINKED) && !check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE | WIFI_AP_STATE)) - return _SUCCESS; + return; p = rtw_get_ie(pframe + WLAN_HDR_A3_LEN + _PROBEREQ_IE_OFFSET_, _SSID_IE_, (int *)&ielen, len - WLAN_HDR_A3_LEN - _PROBEREQ_IE_OFFSET_); @@ -503,7 +503,7 @@ static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *prec if ((ielen != 0 && memcmp((void *)(p + 2), (void *)cur->Ssid.Ssid, cur->Ssid.SsidLength)) || (ielen == 0 && pmlmeinfo->hidden_ssid_mode)) - return _SUCCESS; + return; _issue_probersp: @@ -512,10 +512,9 @@ _issue_probersp: check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) issue_probersp(padapter, get_sa(pframe), is_valid_p2p_probereq); } - return _SUCCESS; } -static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame) { struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct wifidirect_info *pwdinfo = &padapter->wdinfo; @@ -537,7 +536,7 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec } } } - return _SUCCESS; + return; } else if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_GONEGO_ING)) { if (pwdinfo->nego_req_info.benable) { if (!memcmp(pwdinfo->nego_req_info.peerDevAddr, GetAddr2Ptr(pframe), ETH_ALEN)) { @@ -555,13 +554,11 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec } if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) { report_survey_event(padapter, precv_frame); - return _SUCCESS; + return; } - - return _SUCCESS; } -static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) { int cam_idx; struct sta_info *psta; @@ -576,7 +573,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_ if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) { report_survey_event(padapter, precv_frame); - return _SUCCESS; + return; } if (!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) { @@ -600,7 +597,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_ /* start auth */ start_clnt_auth(padapter); - return _SUCCESS; + return; } if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) { @@ -610,7 +607,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_ if (!ret) { receive_disconnect(padapter, pmlmeinfo->network.MacAddress, 0); - return _SUCCESS; + return; } /* update WMM, ERP in the beacon */ /* todo: the timer is used instead of the number of the beacon received */ @@ -629,12 +626,12 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_ /* allocate a new CAM entry for IBSS station */ cam_idx = allocate_fw_sta_entry(padapter); if (cam_idx == NUM_STA) - goto _END_ONBEACON_; + return; /* get supported rate */ if (update_sta_support_rate(padapter, (pframe + WLAN_HDR_A3_LEN + _BEACON_IE_OFFSET_), (len - WLAN_HDR_A3_LEN - _BEACON_IE_OFFSET_), cam_idx) == _FAIL) { pmlmeinfo->FW_sta_info[cam_idx].status = 0; - goto _END_ONBEACON_; + return; } /* update TSF Value */ @@ -645,13 +642,9 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_ } } } - -_END_ONBEACON_: - - return _SUCCESS; } -static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAuth(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int auth_mode, ie_len; u16 seq; @@ -668,7 +661,7 @@ static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_fr uint len = precv_frame->len; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) - return _FAIL; + return; sa = GetAddr2Ptr(pframe); @@ -784,7 +777,7 @@ static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_fr if (pstat->state & WIFI_FW_AUTH_SUCCESS) pstat->auth_seq = 0; - return _SUCCESS; + return; auth_fail: @@ -797,10 +790,9 @@ auth_fail: memcpy(pstat->hwaddr, sa, 6); issue_auth(padapter, pstat, (unsigned short)status); - return _FAIL; } -static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame) { unsigned int seq, len, status, offset; unsigned char *p; @@ -812,10 +804,10 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr /* check A1 matches or not */ if (memcmp(myid(&padapter->eeprompriv), get_da(pframe), ETH_ALEN)) - return _SUCCESS; + return; if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE)) - return _SUCCESS; + return; offset = (GetPrivacy(pframe)) ? 4 : 0; @@ -848,7 +840,7 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr issue_auth(padapter, NULL, 0); set_link_timer(pmlmeext, REAUTH_TO); - return _SUCCESS; + return; } else { /* open system */ go2asoc = 1; @@ -865,10 +857,10 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr if (go2asoc) { start_clnt_assoc(padapter); - return _SUCCESS; + return; } authclnt_fail: - return _FAIL; + return; } static void UpdateBrateTbl(u8 *mbrate) @@ -911,7 +903,7 @@ static void UpdateBrateTblForSoftAP(u8 *bssrateset, u32 bssratelen) } } -static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame) { u16 capab_info; struct rtw_ieee802_11_elems elems; @@ -937,7 +929,7 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec u32 p2pielen = 0; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) - return _FAIL; + return; frame_type = GetFrameSubType(pframe); if (frame_type == WIFI_ASSOCREQ) @@ -946,7 +938,7 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec ie_offset = _REASOCREQ_IE_OFFSET_; if (pkt_len < IEEE80211_3ADDR_LEN + ie_offset) - return _FAIL; + return; pstat = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe)); if (pstat == (struct sta_info *)NULL) { @@ -1300,13 +1292,13 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec report_add_sta_event(padapter, pstat->hwaddr, pstat->aid); } - return _SUCCESS; + return; asoc_class2_error: issue_deauth(padapter, (void *)GetAddr2Ptr(pframe), status); - return _FAIL; + return; OnAssocReqFail: @@ -1316,10 +1308,10 @@ OnAssocReqFail: else issue_asocrsp(padapter, status, pstat, WIFI_REASSOCRSP); - return _FAIL; + return; } -static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; uint i; @@ -1333,13 +1325,13 @@ static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *prec /* check A1 matches or not */ if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) - return _SUCCESS; + return; if (!(pmlmeinfo->state & (WIFI_FW_AUTH_SUCCESS | WIFI_FW_ASSOC_STATE))) - return _SUCCESS; + return; if (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) - return _SUCCESS; + return; _cancel_timer_ex(&pmlmeext->link_timer); @@ -1392,11 +1384,9 @@ static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *prec report_assoc_result: report_join_res(padapter, res); - - return _SUCCESS; } -static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; unsigned short reason; @@ -1406,7 +1396,7 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_ struct wifidirect_info *pwdinfo = &padapter->wdinfo; if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) - return _SUCCESS; + return; if (pwdinfo->rx_invitereq_info.scan_op_ch_only) { _cancel_timer_ex(&pwdinfo->reset_ch_sitesurvey); @@ -1422,7 +1412,7 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_ psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (!psta) - return _SUCCESS; + return; spin_lock_bh(&pstapriv->asoc_list_lock); if (!list_empty(&psta->asoc_list)) { @@ -1455,10 +1445,9 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_ pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } - return _SUCCESS; } -static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) { u16 reason; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; @@ -1469,7 +1458,7 @@ static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *prec /* check A3 */ if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN))) - return _SUCCESS; + return; if (pwdinfo->rx_invitereq_info.scan_op_ch_only) { _cancel_timer_ex(&pwdinfo->reset_ch_sitesurvey); @@ -1497,12 +1486,11 @@ static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *prec associated_clients_update(padapter, updated); } - return _SUCCESS; + return; } else { receive_disconnect(padapter, GetAddr3Ptr(pframe), reason); } pmlmepriv->LinkDetectInfo.bBusyTraffic = false; - return _SUCCESS; } unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame) @@ -3910,7 +3898,7 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra return _SUCCESS; } -static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; @@ -3925,7 +3913,6 @@ static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_ OnAction_p2p(padapter, precv_frame); break; } - return _SUCCESS; } struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv) diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index f77e6dc81831..c8beaa927cba 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -184,7 +184,7 @@ enum SCAN_STATE { SCAN_STATE_MAX, }; -typedef unsigned int (*mlme_handler)(struct adapter *adapt, struct recv_frame *frame); +typedef void (*mlme_handler)(struct adapter *adapt, struct recv_frame *frame); struct ss_res { int state; From 2d64ae536ef0c78924fdd99e0bb3018d7b6b3178 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:13 +0200 Subject: [PATCH 0431/4122] staging: r8188eu: remove unnecessary label Remove a label on OnAuthClient that just calls return. We can return directly instead of jumping to this label. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-14-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index d89f494d12af..e2e7c35019f2 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -823,7 +823,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram } set_link_timer(pmlmeext, 1); - goto authclnt_fail; + return; } if (seq == 2) { @@ -833,7 +833,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram pkt_len - WLAN_HDR_A3_LEN - _AUTH_IE_OFFSET_); if (!p) - goto authclnt_fail; + return; memcpy((void *)(pmlmeinfo->chg_txt), (void *)(p + 2), len); pmlmeinfo->auth_seq = 3; @@ -849,18 +849,16 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared) go2asoc = 1; else - goto authclnt_fail; + return; } else { /* this is also illegal */ - goto authclnt_fail; + return; } if (go2asoc) { start_clnt_assoc(padapter); return; } -authclnt_fail: - return; } static void UpdateBrateTbl(u8 *mbrate) From 3cf90ead512362a1b2b4b9a0d48ec47ca26744c2 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:14 +0200 Subject: [PATCH 0432/4122] staging: r8188eu: remove unnecessary else branch Remove an else branch in OnAuthClient that is not needed. If we go into the else branch, go2asoc is 0. We can simply continue and the last if condition will be false. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-15-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index e2e7c35019f2..6ac3e41af992 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -850,9 +850,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram go2asoc = 1; else return; - } else { - /* this is also illegal */ - return; } if (go2asoc) { From cb2cff04d040eb02555c3c51a045fa31f4cc0e79 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:15 +0200 Subject: [PATCH 0433/4122] staging: r8188eu: remove unnecessary return Remove a return statement at the end of a function. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-16-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 6ac3e41af992..a93133a9ca56 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -852,10 +852,8 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram return; } - if (go2asoc) { + if (go2asoc) start_clnt_assoc(padapter); - return; - } } static void UpdateBrateTbl(u8 *mbrate) From d667d36d689eb3b0cd1bd0d09b36cbad9625f10b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:16 +0200 Subject: [PATCH 0434/4122] staging: r8188eu: remove an else branch If we go into this else branch, go2asoc is 0. We can continue to the end of the function. The final if condition will be false. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-17-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index a93133a9ca56..ba95d2391319 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -848,8 +848,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram } else if (seq == 4) { if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared) go2asoc = 1; - else - return; } if (go2asoc) From 8d5c6a1df1204280354bfb59316fd056e978a864 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 24 Oct 2022 10:14:17 +0200 Subject: [PATCH 0435/4122] staging: r8188eu: go2asoc is not needed Remove the go2asoc variable in OnAuthClient and call start_clnt_assoc directly. This makes the code a tiny bit shorter. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221024081417.66441-18-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index ba95d2391319..254832a0177e 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -796,7 +796,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram { unsigned int seq, len, status, offset; unsigned char *p; - unsigned int go2asoc = 0; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; u8 *pframe = precv_frame->rx_data; @@ -843,15 +842,12 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram return; } else { /* open system */ - go2asoc = 1; + start_clnt_assoc(padapter); } } else if (seq == 4) { if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared) - go2asoc = 1; + start_clnt_assoc(padapter); } - - if (go2asoc) - start_clnt_assoc(padapter); } static void UpdateBrateTbl(u8 *mbrate) From ec6d91016437fe8e5fdcb8cc60c14887e588998f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 23 Oct 2022 19:08:04 +0200 Subject: [PATCH 0436/4122] staging: r8188eu: use standard multicast addr check Use is_multicast_ether_addr to check for a multicast address instead of reimplementing this check in the driver. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221023170808.46233-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_sta_mgt.c | 2 +- drivers/staging/r8188eu/include/wifi.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c index bbde5c03f9ae..51324e708697 100644 --- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c +++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c @@ -404,7 +404,7 @@ struct sta_info *rtw_get_stainfo(struct sta_priv *pstapriv, u8 *hwaddr) if (!hwaddr) return NULL; - if (IS_MCAST(hwaddr)) + if (is_multicast_ether_addr(hwaddr)) addr = bc_addr; else addr = hwaddr; diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index 381385a7e118..fdb7c1bf3573 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -236,11 +236,6 @@ enum WIFI_REG_DOMAIN { #define GetAddr4Ptr(pbuf) ((unsigned char *)((size_t)(pbuf) + 24)) -static inline bool IS_MCAST(unsigned char *da) -{ - return (*da) & 0x01; -} - static inline unsigned char *get_da(unsigned char *pframe) { unsigned char *da; From 2ce164e9b363ee845287b7e693a1f9e7429ecc08 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 23 Oct 2022 19:08:05 +0200 Subject: [PATCH 0437/4122] staging: r8188eu: don't set pcmd_obj components to 0 pcmd_obj was allocated with kzalloc, its memory is filled with 0s. There's no need to set rsp and rspsz to NULL or 0 again. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221023170808.46233-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 254832a0177e..6c71fbfec32d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -6725,9 +6725,6 @@ void report_join_res(struct adapter *padapter, int res) pcmd_obj->cmdsz = cmdsz; pcmd_obj->parmbuf = pevtcmd; - pcmd_obj->rsp = NULL; - pcmd_obj->rspsz = 0; - pc2h_evt_hdr = (struct C2HEvent_Header *)(pevtcmd); pc2h_evt_hdr->len = sizeof(struct joinbss_event); pc2h_evt_hdr->ID = GEN_EVT_CODE(_JoinBss); From 9e9e26190ee651ba868281ae16eab5dc1bcecf0e Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 23 Oct 2022 19:08:06 +0200 Subject: [PATCH 0438/4122] staging: r8188eu: NetworkTypeInUse is not in use NetworkTypeInUse from struct wlan_bssid_ex is not used. Remove NetworkTypeInUse itself, the code to set it and the enum for its possible values. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221023170808.46233-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme.c | 16 ---------------- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- drivers/staging/r8188eu/include/wlan_bssdef.h | 9 --------- 3 files changed, 28 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c index 1f69e5c57d5d..a47ae33454b3 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme.c +++ b/drivers/staging/r8188eu/core/rtw_mlme.c @@ -1819,22 +1819,6 @@ void rtw_update_registrypriv_dev_network(struct adapter *adapter) pdev_network->Rssi = 0; - switch (pregistrypriv->wireless_mode) { - case WIRELESS_11B: - pdev_network->NetworkTypeInUse = (Ndis802_11DS); - break; - case WIRELESS_11G: - case WIRELESS_11BG: - case WIRELESS_11_24N: - case WIRELESS_11G_24N: - case WIRELESS_11BG_24N: - pdev_network->NetworkTypeInUse = (Ndis802_11OFDM24); - break; - default: - /* TODO */ - break; - } - pdev_network->Configuration.DSConfig = (pregistrypriv->channel); if (cur_network->network.InfrastructureMode == Ndis802_11IBSS) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 6c71fbfec32d..fc37d17f8c7e 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -6160,9 +6160,6 @@ u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, st memcpy(bssid->SupportedRates + i, (p + 2), len); } - /* todo: */ - bssid->NetworkTypeInUse = Ndis802_11OFDM24; - if (bssid->IELength < 12) return _FAIL; diff --git a/drivers/staging/r8188eu/include/wlan_bssdef.h b/drivers/staging/r8188eu/include/wlan_bssdef.h index 81bda91a4136..831c465df500 100644 --- a/drivers/staging/r8188eu/include/wlan_bssdef.h +++ b/drivers/staging/r8188eu/include/wlan_bssdef.h @@ -17,14 +17,6 @@ struct ndis_802_11_ssid { u8 Ssid[32]; }; -enum NDIS_802_11_NETWORK_TYPE { - Ndis802_11FH, - Ndis802_11DS, - Ndis802_11OFDM5, - Ndis802_11OFDM24, - Ndis802_11NetworkTypeMax /* dummy upper bound */ -}; - struct ndis_802_11_config_fh { u32 Length; /* Length of structure */ u32 HopPattern; /* As defined by 802.11, MSB set */ @@ -233,7 +225,6 @@ struct wlan_bssid_ex { struct ndis_802_11_ssid Ssid; u32 Privacy; NDIS_802_11_RSSI Rssi;/* in dBM,raw data ,get from PHY) */ - enum NDIS_802_11_NETWORK_TYPE NetworkTypeInUse; struct ndis_802_11_config Configuration; enum ndis_802_11_network_infra InfrastructureMode; unsigned char SupportedRates[NDIS_802_11_LENGTH_RATES_EX]; From e329c18341b72c8386a1a25f8176ec541e66bf2b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 23 Oct 2022 19:08:07 +0200 Subject: [PATCH 0439/4122] staging: r8188eu: remove wait_ack param from _issue_probereq_p2p The only caller of _issue_probereq_p2p sets wait_ack = false. Remove the wait_ack parameter and the code that would run only if wait_ack == true. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221023170808.46233-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index fc37d17f8c7e..ffb708f242e6 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3207,7 +3207,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da) dump_mgntframe(padapter, pmgntframe); } -static int _issue_probereq_p2p(struct adapter *padapter, u8 *da, int wait_ack) +static int _issue_probereq_p2p(struct adapter *padapter, u8 *da) { int ret = _FAIL; struct xmit_frame *pmgntframe; @@ -3480,12 +3480,8 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da, int wait_ack) pattrib->last_txcmdsz = pattrib->pktlen; - if (wait_ack) { - ret = dump_mgntframe_and_wait_ack(padapter, pmgntframe); - } else { - dump_mgntframe(padapter, pmgntframe); - ret = _SUCCESS; - } + dump_mgntframe(padapter, pmgntframe); + ret = _SUCCESS; exit: return ret; @@ -3493,7 +3489,7 @@ exit: inline void issue_probereq_p2p(struct adapter *adapter, u8 *da) { - _issue_probereq_p2p(adapter, da, false); + _issue_probereq_p2p(adapter, da); } static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token) From 5229004f800219e849969eaa8197dff3b55f4b02 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 23 Oct 2022 19:08:08 +0200 Subject: [PATCH 0440/4122] staging: r8188eu: bCardDisableWOHSM is write-only bCardDisableWOHSM in struct adapter is only written but never read. It can be removed. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221023170808.46233-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/drv_types.h | 1 - drivers/staging/r8188eu/os_dep/os_intfs.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/staging/r8188eu/include/drv_types.h b/drivers/staging/r8188eu/include/drv_types.h index 1bd0c8f3a358..8fef5759c36a 100644 --- a/drivers/staging/r8188eu/include/drv_types.h +++ b/drivers/staging/r8188eu/include/drv_types.h @@ -167,7 +167,6 @@ struct adapter { s32 bDriverStopped; s32 bSurpriseRemoved; - s32 bCardDisableWOHSM; u8 hw_init_completed; s8 signal_strength; diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 38e324754c8f..970f380bac96 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -620,7 +620,6 @@ static int _netdev_open(struct net_device *pnetdev) if (!padapter->bup) { padapter->bDriverStopped = false; padapter->bSurpriseRemoved = false; - padapter->bCardDisableWOHSM = false; status = rtw_hal_init(padapter); if (status == _FAIL) @@ -686,7 +685,6 @@ static int ips_netdrv_open(struct adapter *padapter) padapter->bDriverStopped = false; padapter->bSurpriseRemoved = false; - padapter->bCardDisableWOHSM = false; status = rtw_hal_init(padapter); if (status == _FAIL) @@ -718,13 +716,11 @@ int rtw_ips_pwr_up(struct adapter *padapter) void rtw_ips_pwr_down(struct adapter *padapter) { - padapter->bCardDisableWOHSM = true; padapter->net_closed = true; rtw_led_control(padapter, LED_CTL_POWER_OFF); rtw_ips_dev_unload(padapter); - padapter->bCardDisableWOHSM = false; } static void rtw_fifo_cleanup(struct adapter *adapter) From db213ea614e0b65d4be9c1cb78d14465a2c5b146 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Mon, 24 Oct 2022 19:26:07 +0700 Subject: [PATCH 0441/4122] Staging: rtl8192e: rtllib_tx: fixed multiple blank lines Multiple blank lines are condensed into a single blank line as per the Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/a3e044ff9e5b198af15ebc8b91fd048743881340.1666612946.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_tx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index e307020580a0..abb5b57d142b 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -191,7 +191,6 @@ int rtllib_encrypt_fragment(struct rtllib_device *ieee, struct sk_buff *frag, return 0; } - void rtllib_txb_free(struct rtllib_txb *txb) { if (unlikely(!txb)) @@ -475,7 +474,6 @@ NO_PROTECTION: tcb_desc->bRTSBW = false; } - static void rtllib_txrate_selectmode(struct rtllib_device *ieee, struct cb_desc *tcb_desc) { @@ -582,7 +580,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) goto success; } - if (likely(ieee->raw_tx == 0)) { if (unlikely(skb->len < SNAP_SIZE + sizeof(u16))) { netdev_warn(ieee->dev, "skb too small (%d).\n", @@ -895,7 +892,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) tcb_desc->bTxDisableRateFallBack = 1; } - tcb_desc->RATRIndex = 7; tcb_desc->bTxUseDriverAssingedRate = 1; } else { @@ -920,7 +916,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) tcb_desc->bTxDisableRateFallBack = 1; } - tcb_desc->RATRIndex = 7; tcb_desc->bTxUseDriverAssingedRate = 1; tcb_desc->bdhcp = 1; From 2de698578ff331f98021d53e626e46acc4a91b46 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Mon, 24 Oct 2022 19:26:59 +0700 Subject: [PATCH 0442/4122] Staging: rtl8192e: rtllib_tx: fixed alignment matching open parenthesis Aligned multiple statements to match the open parenthesis on the line before it as per the Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/b9a984cf481018a8ef61e4a579cb307eaf1bdc56.1666612946.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_tx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index abb5b57d142b..38dfdcf99fec 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -290,7 +290,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, return; if (pHTInfo->bCurrentAMPDUEnable) { if (!GetTs(ieee, (struct ts_common_info **)(&pTxTs), hdr->addr1, - skb->priority, TX_DIR, true)) { + skb->priority, TX_DIR, true)) { netdev_info(ieee->dev, "%s: can't get TS\n", __func__); return; } @@ -306,7 +306,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, goto FORCED_AGG_SETTING; } else if (!pTxTs->bUsingBa) { if (SN_LESS(pTxTs->TxAdmittedBARecord.ba_start_seq_ctrl.field.seq_num, - (pTxTs->TxCurSeq+1)%4096)) + (pTxTs->TxCurSeq+1)%4096)) pTxTs->bUsingBa = true; else goto FORCED_AGG_SETTING; @@ -442,7 +442,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, u8 HTOpMode = pHTInfo->current_op_mode; if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 || - HTOpMode == 3)) || + HTOpMode == 3)) || (!pHTInfo->bCurBW40MHz && HTOpMode == 3)) { tcb_desc->rts_rate = MGN_24M; tcb_desc->bRTSEnable = true; @@ -501,7 +501,7 @@ static u16 rtllib_query_seqnum(struct rtllib_device *ieee, struct sk_buff *skb, struct tx_ts_record *pTS = NULL; if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst, - skb->priority, TX_DIR, true)) + skb->priority, TX_DIR, true)) return 0; seqnum = pTS->TxCurSeq; pTS->TxCurSeq = (pTS->TxCurSeq+1)%4096; @@ -618,7 +618,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2)); if (((((u8 *)udp)[1] == 68) && - (((u8 *)udp)[3] == 67)) || + (((u8 *)udp)[3] == 67)) || ((((u8 *)udp)[1] == 67) && (((u8 *)udp)[3] == 68))) { bdhcp = true; @@ -712,11 +712,11 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) /* in case we are a client verify acm is not set for this ac */ while (unlikely(ieee->wmm_acm & (0x01 << skb->priority))) { netdev_info(ieee->dev, "skb->priority = %x\n", - skb->priority); + skb->priority); if (wme_downgrade_ac(skb)) break; netdev_info(ieee->dev, "converted skb->priority = %x\n", - skb->priority); + skb->priority); } qos_ctl |= skb->priority; @@ -813,7 +813,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) if ((qos_activated) && (!bIsMulticast)) { frag_hdr->seq_ctl = cpu_to_le16(rtllib_query_seqnum(ieee, skb_frag, - header.addr1)); + header.addr1)); frag_hdr->seq_ctl = cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl)<<4 | i); } else { From 5b773c5504205398ce7ebcf42c5092a7759b556a Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Mon, 24 Oct 2022 19:27:59 +0700 Subject: [PATCH 0443/4122] Staging: rtl8192e: rtllib_tx: added spaces around operators Added multiple spaces around operators which needs it as per the Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/b8bf422436ff4b2d9b530c61ebf814582ab8836a.1666612946.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_tx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index 38dfdcf99fec..fa998cd76917 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -306,7 +306,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, goto FORCED_AGG_SETTING; } else if (!pTxTs->bUsingBa) { if (SN_LESS(pTxTs->TxAdmittedBARecord.ba_start_seq_ctrl.field.seq_num, - (pTxTs->TxCurSeq+1)%4096)) + (pTxTs->TxCurSeq + 1) % 4096)) pTxTs->bUsingBa = true; else goto FORCED_AGG_SETTING; @@ -403,7 +403,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, if (tcb_desc->bBroadcast || tcb_desc->bMulticast) return; - if (is_broadcast_ether_addr(skb->data+16)) + if (is_broadcast_ether_addr(skb->data + 16)) return; if (ieee->mode < IEEE_N_24G) { @@ -504,7 +504,7 @@ static u16 rtllib_query_seqnum(struct rtllib_device *ieee, struct sk_buff *skb, skb->priority, TX_DIR, true)) return 0; seqnum = pTS->TxCurSeq; - pTS->TxCurSeq = (pTS->TxCurSeq+1)%4096; + pTS->TxCurSeq = (pTS->TxCurSeq + 1) % 4096; return seqnum; } return 0; @@ -611,7 +611,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) if (skb->len > 282) { if (ether_type == ETH_P_IP) { const struct iphdr *ip = (struct iphdr *) - ((u8 *)skb->data+14); + ((u8 *)skb->data + 14); if (ip->protocol == IPPROTO_UDP) { struct udphdr *udp; @@ -815,10 +815,10 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) cpu_to_le16(rtllib_query_seqnum(ieee, skb_frag, header.addr1)); frag_hdr->seq_ctl = - cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl)<<4 | i); + cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl) << 4 | i); } else { frag_hdr->seq_ctl = - cpu_to_le16(ieee->seq_ctrl[0]<<4 | i); + cpu_to_le16(ieee->seq_ctrl[0] << 4 | i); } /* Put a SNAP header on the first fragment */ if (i == 0) { From 73fb5660fc300ecd9a611e1997c68bc81c8098db Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Mon, 24 Oct 2022 19:29:14 +0700 Subject: [PATCH 0444/4122] Staging: rtl8192e: rtllib_tx: fixed lines ending with an open parenthesis Fixed several lines that end with an open parenthesis to not end with it anymore, as per the Linux kernel coding-style regulations. The issues were flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/a8e67d281b4e9ab1254e0b58294f566994ac7d9d.1666612946.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_tx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index fa998cd76917..4647efb0f868 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -802,8 +802,8 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) * MOREFRAGS bit to the frame control */ if (i != nr_frags - 1) { - frag_hdr->frame_ctl = cpu_to_le16( - fc | RTLLIB_FCTL_MOREFRAGS); + frag_hdr->frame_ctl = cpu_to_le16(fc | + RTLLIB_FCTL_MOREFRAGS); bytes = bytes_per_frag; } else { @@ -822,9 +822,9 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) } /* Put a SNAP header on the first fragment */ if (i == 0) { - rtllib_put_snap( - skb_put(skb_frag, SNAP_SIZE + - sizeof(u16)), ether_type); + rtllib_put_snap(skb_put(skb_frag, + SNAP_SIZE + + sizeof(u16)), ether_type); bytes -= SNAP_SIZE + sizeof(u16); } From 438b4ade12582c358a6e6ab762408923f1af2d44 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Mon, 24 Oct 2022 19:30:28 +0700 Subject: [PATCH 0445/4122] Staging: rtl8192e: rtllib_tx: removed unnecessary blank line before a close brace Removed a blank line before a closing brace as it is not necessary as per the Linux coding-style regulations. The issue was flagged by the checkpatch script. Signed-off-by: Aaron Lawrence Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/234a3e8d21080ae2a1f75127a5c1a0ed131db924.1666612946.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_tx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index 4647efb0f868..101f44129145 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -954,7 +954,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); stats->tx_errors++; return 1; - } netdev_tx_t rtllib_xmit(struct sk_buff *skb, struct net_device *dev) From 6e006508f6e20a0f09dd9e64bc23c0592d92cc2d Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Mon, 24 Oct 2022 14:47:25 -0700 Subject: [PATCH 0446/4122] staging: rtl8723bs: Fix indentation in conditional statements Remove/add tabs in block statements in rtl8723bs/core to fix checkpatch warnings for suspect code indent for conditionals. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/Y1cH7br3mMcT4Dm5@marshmallow Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_mlme.c | 30 +++++++++---------- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- drivers/staging/rtl8723bs/core/rtw_recv.c | 6 ++-- drivers/staging/rtl8723bs/core/rtw_sta_mgt.c | 2 +- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c index 6498fd17e1d3..c6fd6cf741ef 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c @@ -389,7 +389,7 @@ int is_same_network(struct wlan_bssid_ex *src, struct wlan_bssid_ex *dst, u8 fea __le16 tmps, tmpd; if (rtw_bug_check(dst, src, &s_cap, &d_cap) == false) - return false; + return false; memcpy((u8 *)&tmps, rtw_get_capability_from_ie(src->ies), 2); memcpy((u8 *)&tmpd, rtw_get_capability_from_ie(dst->ies), 2); @@ -669,7 +669,7 @@ int rtw_is_desired_network(struct adapter *adapter, struct wlan_network *pnetwor uint ie_len = 0; if ((desired_encmode == Ndis802_11EncryptionDisabled) && (privacy != 0)) - bselected = false; + bselected = false; if (psecuritypriv->ndisauthtype == Ndis802_11AuthModeWPA2PSK) { p = rtw_get_ie(pnetwork->network.ies + _BEACON_IE_OFFSET_, WLAN_EID_RSN, &ie_len, (pnetwork->network.ie_length - _BEACON_IE_OFFSET_)); @@ -795,7 +795,7 @@ void rtw_surveydone_event_callback(struct adapter *adapter, u8 *pbuf) pmlmepriv->to_join = false; s_ret = rtw_select_and_join_from_scanned_queue(pmlmepriv); if (s_ret == _SUCCESS) { - _set_timer(&pmlmepriv->assoc_timer, MAX_JOIN_TIMEOUT); + _set_timer(&pmlmepriv->assoc_timer, MAX_JOIN_TIMEOUT); } else if (s_ret == 2) {/* there is no need to wait for join */ _clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING); rtw_indicate_connect(adapter); @@ -2010,8 +2010,8 @@ int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_ if (in_ie[i] == 0xDD && in_ie[i+2] == 0x00 && in_ie[i+3] == 0x50 && in_ie[i+4] == 0xF2 && in_ie[i+5] == 0x02 && i+5 < in_len) { /* WMM element ID and OUI */ for (j = i; j < i + 9; j++) { - out_ie[ielength] = in_ie[j]; - ielength++; + out_ie[ielength] = in_ie[j]; + ielength++; } out_ie[initial_out_len + 1] = 0x07; out_ie[initial_out_len + 6] = 0x00; @@ -2064,15 +2064,13 @@ static int rtw_append_pmkid(struct adapter *Adapter, int iEntry, u8 *ie, uint ie if (ie[13] <= 20) { /* The RSN IE didn't include the PMK ID, append the PMK information */ - ie[ie_len] = 1; - ie_len++; - ie[ie_len] = 0; /* PMKID count = 0x0100 */ - ie_len++; - memcpy(&ie[ie_len], &psecuritypriv->PMKIDList[iEntry].PMKID, 16); - - ie_len += 16; - ie[13] += 18;/* PMKID length = 2+16 */ - + ie[ie_len] = 1; + ie_len++; + ie[ie_len] = 0; /* PMKID count = 0x0100 */ + ie_len++; + memcpy(&ie[ie_len], &psecuritypriv->PMKIDList[iEntry].PMKID, 16); + ie_len += 16; + ie[13] += 18;/* PMKID length = 2+16 */ } return ie_len; } @@ -2091,9 +2089,9 @@ signed int rtw_restruct_sec_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, u memcpy(out_ie, in_ie, 12); ielength = 12; if ((ndisauthmode == Ndis802_11AuthModeWPA) || (ndisauthmode == Ndis802_11AuthModeWPAPSK)) - authmode = WLAN_EID_VENDOR_SPECIFIC; + authmode = WLAN_EID_VENDOR_SPECIFIC; if ((ndisauthmode == Ndis802_11AuthModeWPA2) || (ndisauthmode == Ndis802_11AuthModeWPA2PSK)) - authmode = WLAN_EID_RSN; + authmode = WLAN_EID_RSN; if (check_fwstate(pmlmepriv, WIFI_UNDER_WPS)) { memcpy(out_ie+ielength, psecuritypriv->wps_ie, psecuritypriv->wps_ie_len); diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index 8e74b4f47b94..1148c9829890 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -5447,7 +5447,7 @@ u8 disconnect_hdl(struct adapter *padapter, unsigned char *pbuf) u8 val8; if (is_client_associated_to_ap(padapter)) - issue_deauth_ex(padapter, pnetwork->mac_address, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms/100, 100); + issue_deauth_ex(padapter, pnetwork->mac_address, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms/100, 100); if (((pmlmeinfo->state&0x03) == WIFI_FW_ADHOC_STATE) || ((pmlmeinfo->state&0x03) == WIFI_FW_AP_STATE)) { /* Stop BCN */ diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c index 2825375bff94..7c7b6495965f 100644 --- a/drivers/staging/rtl8723bs/core/rtw_recv.c +++ b/drivers/staging/rtl8723bs/core/rtw_recv.c @@ -161,7 +161,7 @@ int rtw_free_recvframe(union recv_frame *precvframe, struct __queue *pfree_recv_ if (padapter) { if (pfree_recv_queue == &precvpriv->free_recv_queue) - precvpriv->free_recvframe_cnt++; + precvpriv->free_recvframe_cnt++; } spin_unlock_bh(&pfree_recv_queue->lock); return _SUCCESS; @@ -691,8 +691,8 @@ static signed int sta2sta_data_frame(struct adapter *adapter, union recv_frame * if (bmcast) { /* For AP mode, if DA == MCAST, then BSSID should be also MCAST */ if (!IS_MCAST(pattrib->bssid)) { - ret = _FAIL; - goto exit; + ret = _FAIL; + goto exit; } } else { /* not mc-frame */ /* For AP mode, if DA is non-MCAST, then it must be BSSID, and bssid == BSSID */ diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c index beb11d89db18..c7de81f21bec 100644 --- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c +++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c @@ -551,7 +551,7 @@ u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr) else if (pacl_list->mode == 2)/* deny unless in accept list */ res = match; else - res = true; + res = true; return res; } From 5f803b22bd2befdb43dfbee69d90932edd3cc1d0 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Tue, 25 Oct 2022 17:44:02 +0700 Subject: [PATCH 0447/4122] staging: rtl8192e: rtllib_crypt_wep: multiple blank lines removal Removal of multiple unnecessary blank lines in accordance with the Linux kernel coding-style regulations. Said issues ware detected on this file by the checkpatch script. Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/20221025104402.xvbfobi7sdnwlqs2@plymouth Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_wep.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_wep.c b/drivers/staging/rtl8192e/rtllib_crypt_wep.c index 7790271a6a40..062285e4d939 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_wep.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_wep.c @@ -27,7 +27,6 @@ struct prism2_wep_data { struct arc4_ctx tx_ctx_arc4; }; - static void *prism2_wep_init(int keyidx) { struct prism2_wep_data *priv; @@ -46,7 +45,6 @@ static void *prism2_wep_init(int keyidx) return priv; } - static void prism2_wep_deinit(void *priv) { kfree_sensitive(priv); @@ -120,7 +118,6 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - /* Perform WEP decryption on given struct buffer. Buffer includes whole WEP * part of the frame: IV (4 bytes), encrypted payload (including SNAP header), * ICV (4 bytes). len includes both IV and ICV. @@ -180,7 +177,6 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv) { struct prism2_wep_data *wep = priv; @@ -194,7 +190,6 @@ static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv) return 0; } - static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv) { struct prism2_wep_data *wep = priv; @@ -207,7 +202,6 @@ static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv) return wep->key_len; } - static void prism2_wep_print_stats(struct seq_file *m, void *priv) { struct prism2_wep_data *wep = priv; @@ -231,13 +225,11 @@ static struct lib80211_crypto_ops rtllib_crypt_wep = { .owner = THIS_MODULE, }; - static int __init rtllib_crypto_wep_init(void) { return lib80211_register_crypto_ops(&rtllib_crypt_wep); } - static void __exit rtllib_crypto_wep_exit(void) { lib80211_unregister_crypto_ops(&rtllib_crypt_wep); From 8cd75652300f957066b86e4ede5525c756fe7a42 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Tue, 25 Oct 2022 17:27:22 +0200 Subject: [PATCH 0448/4122] staging: r8188eu: remove unused macros from wifi.h There are some unused macros in wifi.h. Remove them. Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221025152722.14926-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/wifi.h | 27 -------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index fdb7c1bf3573..6b50089cea29 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -140,7 +140,6 @@ enum WIFI_REG_DOMAIN { #define _PWRMGT_ BIT(12) #define _MORE_DATA_ BIT(13) #define _PRIVACY_ BIT(14) -#define _ORDER_ BIT(15) #define SetToDs(pbuf) \ *(__le16 *)(pbuf) |= cpu_to_le16(_TO_DS_) @@ -187,17 +186,6 @@ enum WIFI_REG_DOMAIN { *(__le16 *)(pbuf) |= cpu_to_le16(type); \ } while (0) -#define GetTupleCache(pbuf) \ - (cpu_to_le16(*(unsigned short *)((size_t)(pbuf) + 22))) - -#define SetFragNum(pbuf, num) \ - do { \ - *(unsigned short *)((size_t)(pbuf) + 22) = \ - ((*(unsigned short *)((size_t)(pbuf) + 22)) & \ - le16_to_cpu(~(0x000f))) | \ - cpu_to_le16(0x0f & (num)); \ - } while (0) - #define SetSeqNum(pbuf, num) \ do { \ *(__le16 *)((size_t)(pbuf) + 22) = \ @@ -221,13 +209,6 @@ enum WIFI_REG_DOMAIN { #define GetAMsdu(pbuf) (((le16_to_cpu(*(__le16 *)pbuf)) >> 7) & 0x1) -#define SetAMsdu(pbuf, amsdu) \ - *(__le16 *)(pbuf) |= cpu_to_le16((amsdu & 1) << 7) - -#define GetTid(pbuf) (le16_to_cpu(*(__le16 *)((size_t)(pbuf) + \ - (((GetToDs(pbuf)<<1) | GetFrDs(pbuf)) == 3 ? \ - 30 : 24))) & 0x000f) - #define GetAddr1Ptr(pbuf) ((unsigned char *)((size_t)(pbuf) + 4)) #define GetAddr2Ptr(pbuf) ((unsigned char *)((size_t)(pbuf) + 10)) @@ -410,14 +391,6 @@ static inline unsigned char *get_hdr_bssid(unsigned char *pframe) Below is the definition for 802.11n ------------------------------------------------------------------------------*/ -#define SetOrderBit(pbuf) \ - do { \ - *(unsigned short *)(pbuf) |= cpu_to_le16(_ORDER_); \ - } while (0) - -#define GetOrderBit(pbuf) \ - (((*(unsigned short *)(pbuf)) & le16_to_cpu(_ORDER_)) != 0) - /** * struct rtw_ieee80211_bar - HT Block Ack Request * From c3db3c2fd9992c08f49aa93752d3c103c3a4f6aa Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 24 Oct 2022 19:30:12 +0200 Subject: [PATCH 0449/4122] f2fs: should put a page when checking the summary info The commit introduces another bug. Cc: stable@vger.kernel.org Fixes: c6ad7fd16657e ("f2fs: fix to do sanity check on summary info") Signed-off-by: Pavel Machek Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4546e01b2ee0..dab794225cce 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1110,6 +1110,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", ofs_in_node, dni->ino, dni->nid, max_addrs); + f2fs_put_page(node_page, 1); return false; } From 14dc00a0e2dbea4b685ab9723ff511fcfd223c18 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Oct 2022 17:52:05 -0700 Subject: [PATCH 0450/4122] f2fs: let's avoid to get cp_rwsem twice by f2fs_evict_inode by d_invalidate f2fs_unlink -> f2fs_lock_op -> d_invalidate -> shrink_dentry_list -> iput_final -> f2fs_evict_inode -> f2fs_lock_op Reviewed-by: Chao Yu Tested-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a389772fd212..e104409c3a0e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -632,6 +632,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); + f2fs_unlock_op(sbi); + #if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -642,8 +644,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif - f2fs_unlock_op(sbi); - if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: From ae25e00ba84073450c07d8ffd2d74f914a027230 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Oct 2022 18:32:49 +0300 Subject: [PATCH 0451/4122] x86/retpoline: Fix crash printing warning The first argument of WARN() is a condition, so this will use "addr" as the format string and possibly crash. Fixes: 3b6c1747da48 ("x86/retpoline: Add SKL retthunk retpolines") Signed-off-by: Dan Carpenter Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/Y1gBoUZrRK5N%2FlCB@kili/ --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 19221d77dc27..b4ac4e58c010 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -418,7 +418,7 @@ clang_jcc: break; default: - WARN("%pS %px %*ph\n", addr, addr, 6, addr); + WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr); return -1; } From d233ab3c5c5ed4b3d2201bddb71dab5a2946c31b Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 25 Sep 2022 02:47:57 +0200 Subject: [PATCH 0452/4122] riscv/vdso: typo therefor The adverbs 'therefor' and 'therefore' have different meaning. As the meaning here is 'consequently' the spelling should be 'therefore'. Signed-off-by: Heinrich Schuchardt Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20220925004757.9089-1-heinrich.schuchardt@canonical.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index af981426fe0f..a7644f46d0e5 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ /* * All systems with an MMU have a VDSO, but systems without an MMU don't - * support shared libraries and therefor don't have one. + * support shared libraries and therefore don't have one. */ #ifdef CONFIG_MMU From 079f0c21ef6d79f80b19b64f5e0218d5a328c4cd Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:55 +0200 Subject: [PATCH 0453/4122] s390/mm: gmap: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Nico Boehr Reviewed-by: Pierre Morel Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-2-nrb@linux.ibm.com Message-Id: <20221020143159.294605-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/mm/gmap.c | 147 +++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 02d15c8dc92e..2ccfcc8a3863 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -72,7 +72,7 @@ static struct gmap *gmap_alloc(unsigned long limit) goto out_free; page->index = 0; list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); + table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; gmap->asce = atype | _ASCE_TABLE_LENGTH | @@ -311,12 +311,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; - new = (unsigned long *) page_to_phys(page); + new = page_to_virt(page); crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); page->index = gaddr; page = NULL; @@ -557,7 +557,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, gaddr & _REGION1_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -565,7 +565,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, gaddr & _REGION2_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -573,7 +573,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, gaddr & _REGION3_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; /* Walk the parent mm page table */ @@ -813,7 +813,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION2: table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -821,7 +821,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION3: table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -829,7 +829,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_SEGMENT: table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; @@ -837,7 +837,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = __va(*table & _SEGMENT_ENTRY_ORIGIN); table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; } return table; @@ -1150,7 +1150,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; - *val = *(unsigned long *) address; + *val = *(unsigned long *)__va(address); set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; @@ -1335,7 +1335,8 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) { - unsigned long sto, *ste, *pgt; + unsigned long *ste; + phys_addr_t sto, pgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1343,13 +1344,13 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) return; gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); - sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); + sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); - pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); + pgt = *ste & _SEGMENT_ENTRY_ORIGIN; *ste = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1365,19 +1366,19 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, unsigned long *sgt) { - unsigned long *pgt; struct page *page; + phys_addr_t pgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; - pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); + pgt = sgt[i] & _REGION_ENTRY_ORIGIN; sgt[i] = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1392,7 +1393,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) { - unsigned long r3o, *r3e, *sgt; + unsigned long r3o, *r3e; + phys_addr_t sgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1401,12 +1403,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); - gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); - sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); + sgt = *r3e & _REGION_ENTRY_ORIGIN; *r3e = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1422,19 +1424,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, unsigned long *r3t) { - unsigned long *sgt; struct page *page; + phys_addr_t sgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; - sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); + sgt = r3t[i] & _REGION_ENTRY_ORIGIN; r3t[i] = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1449,7 +1451,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) { - unsigned long r2o, *r2e, *r3t; + unsigned long r2o, *r2e; + phys_addr_t r3t; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1458,12 +1461,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); - gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); - r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); + r3t = *r2e & _REGION_ENTRY_ORIGIN; *r2e = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1479,7 +1482,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, unsigned long *r2t) { - unsigned long *r3t; + phys_addr_t r3t; struct page *page; int i; @@ -1487,11 +1490,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; - r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); + r3t = r2t[i] & _REGION_ENTRY_ORIGIN; r2t[i] = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1506,8 +1509,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) { - unsigned long r1o, *r1e, *r2t; + unsigned long r1o, *r1e; struct page *page; + phys_addr_t r2t; BUG_ON(!gmap_is_shadow(sg)); r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ @@ -1515,12 +1519,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); - gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); - r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); + r2t = *r1e & _REGION_ENTRY_ORIGIN; *r1e = _REGION1_ENTRY_EMPTY; - __gmap_unshadow_r2t(sg, raddr, r2t); + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1536,22 +1540,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, unsigned long *r1t) { - unsigned long asce, *r2t; + unsigned long asce; struct page *page; + phys_addr_t r2t; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; + asce = __pa(r1t) | _ASCE_TYPE_REGION1; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) continue; - r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); - __gmap_unshadow_r2t(sg, raddr, r2t); + r2t = r1t[i] & _REGION_ENTRY_ORIGIN; + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Clear entry and flush translation r1t -> r2t */ gmap_idte_one(asce, raddr); r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1573,7 +1578,7 @@ static void gmap_unshadow(struct gmap *sg) sg->removed = 1; gmap_call_notifier(sg, 0, -1UL); gmap_flush_tlb(sg); - table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); + table = __va(sg->asce & _ASCE_ORIGIN); switch (sg->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: __gmap_unshadow_r1t(sg, 0, table); @@ -1748,7 +1753,8 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r2t, *table; + unsigned long *table; + phys_addr_t s_r2t; struct page *page; int rc; @@ -1760,7 +1766,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page->index = r2t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r2t = (unsigned long *) page_to_phys(page); + s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ @@ -1775,9 +1781,9 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); + crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | + *table = s_r2t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); @@ -1798,8 +1804,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 4); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r2t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1832,7 +1837,8 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r3t, *table; + unsigned long *table; + phys_addr_t s_r3t; struct page *page; int rc; @@ -1844,7 +1850,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page->index = r3t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r3t = (unsigned long *) page_to_phys(page); + s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ @@ -1859,9 +1865,9 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); + crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | + *table = s_r3t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); @@ -1882,8 +1888,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 3); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r3t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1916,7 +1921,8 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_sgt, *table; + unsigned long *table; + phys_addr_t s_sgt; struct page *page; int rc; @@ -1928,7 +1934,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page->index = sgt & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_sgt = (unsigned long *) page_to_phys(page); + s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ @@ -1943,9 +1949,9 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); + crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | + *table = s_sgt | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; @@ -1966,8 +1972,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 2); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_sgt) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -2040,8 +2045,9 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake) { unsigned long raddr, origin; - unsigned long *s_pgt, *table; + unsigned long *table; struct page *page; + phys_addr_t s_pgt; int rc; BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); @@ -2052,7 +2058,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, page->index = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_pgt = (unsigned long *) page_to_phys(page); + s_pgt = page_to_phys(page); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ @@ -2085,8 +2091,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 1); - if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != - (unsigned long) s_pgt) + if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_SEGMENT_ENTRY_INVALID; From 6b33e68ab30949f9657e2acc59766977ae63e1cc Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:56 +0200 Subject: [PATCH 0454/4122] s390/entry: sort out physical vs virtual pointers usage in sie64a Fix virtual vs physical address confusion (which currently are the same). sie_block is accessed in entry.S and passed it to hardware, which is why both its physical and virtual address are needed. To avoid every caller having to do the virtual-physical conversion, add a new function sie64a() which converts the virtual address to physical. Signed-off-by: Nico Boehr Reviewed-by: Alexander Gordeev Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-3-nrb@linux.ibm.com Message-Id: <20221020143159.294605-3-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 8 +++++++- arch/s390/include/asm/stacktrace.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 26 +++++++++++++++----------- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152..9a31d00e99b3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1017,7 +1017,13 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); -extern int sie64a(struct kvm_s390_sie_block *, u64 *); +int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa); + +static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) +{ + return __sie64a(virt_to_phys(sie_block), sie_block, rsa); +} + extern char sie_exit; extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index b23c658dce77..1802be5abb5d 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -46,6 +46,7 @@ struct stack_frame { unsigned long sie_savearea; unsigned long sie_reason; unsigned long sie_flags; + unsigned long sie_control_block_phys; }; }; unsigned long gprs[10]; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index d8ce965c0a97..3f8e760298c2 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -62,6 +62,7 @@ int main(void) OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); + OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d2a1f2f4f5b8..12e1773a94a4 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -225,18 +225,20 @@ ENDPROC(__switch_to) #if IS_ENABLED(CONFIG_KVM) /* - * sie64a calling convention: - * %r2 pointer to sie control block - * %r3 guest register save area + * __sie64a calling convention: + * %r2 pointer to sie control block phys + * %r3 pointer to sie control block virt + * %r4 guest register save area */ -ENTRY(sie64a) +ENTRY(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers lg %r12,__LC_CURRENT - stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer - stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area + stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. + stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses + stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lmg %r0,%r13,0(%r4) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz .Lsie_gmap @@ -248,6 +250,7 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed + lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) @@ -258,13 +261,14 @@ ENTRY(sie64a) BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. -# Other instructions between sie64a and .Lsie_done should not cause program +# Other instructions between __sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. .Lrewind_pad6: nopr 7 @@ -293,8 +297,8 @@ sie_exit: EX_TABLE(.Lrewind_pad4,.Lsie_fault) EX_TABLE(.Lrewind_pad2,.Lsie_fault) EX_TABLE(sie_exit,.Lsie_fault) -ENDPROC(sie64a) -EXPORT_SYMBOL(sie64a) +ENDPROC(__sie64a) +EXPORT_SYMBOL(__sie64a) EXPORT_SYMBOL(sie_exit) #endif @@ -373,7 +377,7 @@ ENTRY(pgm_check_handler) j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for program checks in sie64a + # cleanup critical section for program checks in __sie64a OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f SIEEXIT lghi %r10,_PIF_GUEST_FAULT From fe0ef00304639cae82df7c9ad6a15286bd5f876e Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:57 +0200 Subject: [PATCH 0455/4122] KVM: s390: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-4-nrb@linux.ibm.com Message-Id: <20221020143159.294605-4-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 2 +- arch/s390/kvm/kvm-s390.c | 44 ++++++++++++++++++-------------- arch/s390/kvm/kvm-s390.h | 5 ++-- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9a31d00e99b3..931f97875899 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -276,6 +276,7 @@ struct kvm_s390_sie_block { #define ECB3_AES 0x04 #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU __u32 scaol; /* 0x0064 */ __u8 sdf; /* 0x0068 */ __u8 epdx; /* 0x0069 */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 88112065d941..b703b5202f25 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu) return 0; if (current->thread.per_flags & PER_FLAG_NO_TE) return 0; - itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + itdb = phys_to_virt(vcpu->arch.sie_block->itdba); rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); if (rc) return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b07..0f7ff0c9019f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3329,28 +3329,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) static void sca_add_vcpu(struct kvm_vcpu *vcpu) { if (!kvm_s390_use_sca_entries()) { - struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; return; } read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); } read_unlock(&vcpu->kvm->arch.sca_lock); @@ -3381,6 +3383,7 @@ static int sca_switch_to_extended(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long vcpu_idx; u32 scaol, scaoh; + phys_addr_t new_sca_phys; if (kvm->arch.use_esca) return 0; @@ -3389,8 +3392,9 @@ static int sca_switch_to_extended(struct kvm *kvm) if (!new_sca) return -ENOMEM; - scaoh = (u32)((u64)(new_sca) >> 32); - scaol = (u32)(u64)(new_sca) & ~0x3fU; + new_sca_phys = virt_to_phys(new_sca); + scaoh = new_sca_phys >> 32; + scaol = new_sca_phys & ESCA_SCAOL_MASK; kvm_s390_vcpu_block_all(kvm); write_lock(&kvm->arch.sca_lock); @@ -3610,15 +3614,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) { - free_page(vcpu->arch.sie_block->cbrlo); + free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo)); vcpu->arch.sie_block->cbrlo = 0; } int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.sie_block->cbrlo) + void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + + if (!cbrlo_page) return -ENOMEM; + + vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page); return 0; } @@ -3628,7 +3635,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ibc = model->ibc; if (test_kvm_facility(vcpu->kvm, 7)) - vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; + vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list); } static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) @@ -3685,9 +3692,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); } - vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) - | SDNXC; - vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; + vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC; + vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb); if (sclp.has_kss) kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); @@ -3737,7 +3743,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; - vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb); /* the real guest size will always be smaller than msl */ vcpu->arch.sie_block->mso = 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e..a60d1e5c44cd 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -23,7 +23,8 @@ /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 -#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +#define IS_ITDB_VALID(vcpu) \ + ((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1)) extern debug_info_t *kvm_s390_dbf; extern debug_info_t *kvm_s390_dbf_uv; @@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = (u32)(u64)kvm->arch.gisa_int.origin; + u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; From b99f4512197acc10f63b5fb462c088c2f62b5120 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:58 +0200 Subject: [PATCH 0456/4122] KVM: s390: sida: sort out physical vs virtual pointers usage All callers of the sida_origin() macro actually expected a virtual address, so rename it to sida_addr() and hand out a virtual address. At some places, the macro wasn't used, potentially creating problems if the sida size ever becomes nonzero (not currently the case), so let's start using it everywhere now while at it. Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-5-nrb@linux.ibm.com Message-Id: <20221020143159.294605-5-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 3 +-- arch/s390/kvm/intercept.c | 7 +++---- arch/s390/kvm/kvm-s390.c | 9 +++++---- arch/s390/kvm/priv.c | 3 +-- arch/s390/kvm/pv.c | 8 +++++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 931f97875899..21f1339a4197 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -142,8 +142,7 @@ struct mcck_volatile_info { CR14_EXTERNAL_DAMAGE_SUBMASK) #define SIDAD_SIZE_MASK 0xff -#define sida_origin(sie_block) \ - ((sie_block)->sidad & PAGE_MASK) +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) #define sida_size(sie_block) \ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b703b5202f25..0ee02dae14b2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)(sida_origin(vcpu->arch.sie_block)), - sctns, PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE); } else { r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); if (r) { @@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) static int handle_pv_spx(struct kvm_vcpu *vcpu) { - u32 pref = *(u32 *)vcpu->arch.sie_block->sidad; + u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block); kvm_s390_set_prefix(vcpu, pref); trace_kvm_s390_handle_prefix(vcpu, 1, pref); @@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu) static int handle_pv_uvc(struct kvm_vcpu *vcpu) { - struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad; + struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block); struct uv_cb_cts uvcb = { .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED, .header.len = sizeof(uvcb), diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0f7ff0c9019f..bd6e0201bfe5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5167,6 +5167,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + void *sida_addr; int r = 0; if (mop->flags || !mop->size) @@ -5178,16 +5179,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, if (!kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; + sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset; + switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: - if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), mop->size)) + if (copy_to_user(uaddr, sida_addr, mop->size)) r = -EFAULT; break; case KVM_S390_MEMOP_SIDA_WRITE: - if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), uaddr, mop->size)) + if (copy_from_user(sida_addr, uaddr, mop->size)) r = -EFAULT; break; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3335fa09b6f1..9f8a192bd750 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return -EREMOTE; } if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, - PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE); rc = 0; } else { rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7cb7799a0acb..c7435c37cdfe 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -44,7 +44,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); - free_page(sida_origin(vcpu->arch.sie_block)); + free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); vcpu->arch.sie_block->pv_handle_cpu = 0; vcpu->arch.sie_block->pv_handle_config = 0; memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); @@ -66,6 +66,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) .header.cmd = UVC_CMD_CREATE_SEC_CPU, .header.len = sizeof(uvcb), }; + void *sida_addr; int cc; if (kvm_s390_pv_cpu_get_handle(vcpu)) @@ -83,12 +84,13 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vcpu->arch.sie_block->sidad) { + sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!sida_addr) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); return -ENOMEM; } + vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); cc = uv_call(0, (u64)&uvcb); *rc = uvcb.header.rc; From 4435b79a366495a5cb43b792d9e7d69d489428cd Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:59 +0200 Subject: [PATCH 0457/4122] KVM: s390: pv: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-6-nrb@linux.ibm.com Message-Id: <20221020143159.294605-6-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/pv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c7435c37cdfe..48c4f57d5d76 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -80,8 +80,8 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) /* Input */ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); uvcb.num = vcpu->arch.sie_block->icpua; - uvcb.state_origin = (u64)vcpu->arch.sie_block; - uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); + uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); /* Alloc Secure Instruction Data Area Designation */ sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); @@ -228,8 +228,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ uvcb.guest_stor_len = kvm->arch.pv.guest_len; uvcb.guest_asce = kvm->arch.gmap->asce; - uvcb.guest_sca = (unsigned long)kvm->arch.sca; - uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; + uvcb.guest_sca = virt_to_phys(kvm->arch.sca); + uvcb.conf_base_stor_origin = + virt_to_phys((void *)kvm->arch.pv.stor_base); uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; cc = uv_call_sched(0, (u64)&uvcb); From 77b533411595668659ce5aaade4ca36c7aa2c488 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 25 Oct 2022 10:20:39 +0200 Subject: [PATCH 0458/4122] KVM: s390: VSIE: sort out virtual/physical address in pin_guest_page pin_guest_page() used page_to_virt() to calculate the hpa of the pinned page. This currently works, because virtual and physical addresses are the same. Use page_to_phys() instead to resolve the virtual-real address confusion. One caller of pin_guest_page() actually expected the hpa to be a hva, so add the missing phys_to_virt() conversion here. Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Acked-by: David Hildenbrand Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20221025082039.117372-2-nrb@linux.ibm.com Message-Id: <20221025082039.117372-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/vsie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..0e9d020d7093 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -654,7 +654,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) page = gfn_to_page(kvm, gpa_to_gfn(gpa)); if (is_error_page(page)) return -EINVAL; - *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); + *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK); return 0; } @@ -869,7 +869,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, WARN_ON_ONCE(rc); return 1; } - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + vsie_page->scb_o = phys_to_virt(hpa); return 0; } From 2a903ca922d007a0b40ca425ce55b5f0a0e01956 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 7 Oct 2022 13:46:47 +0200 Subject: [PATCH 0459/4122] dt-bindings: gpio: Add gpio-latch binding document This adds a binding for a GPIO multiplexer driver based on latches connected to other GPIOs. Signed-off-by: Sascha Hauer Reviewed-by: Rob Herring Reviewed-by: Serge Semin Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- .../devicetree/bindings/gpio/gpio-latch.yaml | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 Documentation/devicetree/bindings/gpio/gpio-latch.yaml diff --git a/Documentation/devicetree/bindings/gpio/gpio-latch.yaml b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml new file mode 100644 index 000000000000..1ed82a2cebda --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/gpio-latch.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPIO latch controller + +maintainers: + - Sascha Hauer + +description: | + This binding describes a GPIO multiplexer based on latches connected to + other GPIOs, like this: + + CLK0 ----------------------. ,--------. + CLK1 -------------------. `--------|> #0 | + | | | + OUT0 ----------------+--|-----------|D0 Q0|-----|< + OUT1 --------------+-|--|-----------|D1 Q1|-----|< + OUT2 ------------+-|-|--|-----------|D2 Q2|-----|< + OUT3 ----------+-|-|-|--|-----------|D3 Q3|-----|< + OUT4 --------+-|-|-|-|--|-----------|D4 Q4|-----|< + OUT5 ------+-|-|-|-|-|--|-----------|D5 Q5|-----|< + OUT6 ----+-|-|-|-|-|-|--|-----------|D6 Q6|-----|< + OUT7 --+-|-|-|-|-|-|-|--|-----------|D7 Q7|-----|< + | | | | | | | | | `--------' + | | | | | | | | | + | | | | | | | | | ,--------. + | | | | | | | | `-----------|> #1 | + | | | | | | | | | | + | | | | | | | `--------------|D0 Q0|-----|< + | | | | | | `----------------|D1 Q1|-----|< + | | | | | `------------------|D2 Q2|-----|< + | | | | `--------------------|D3 Q3|-----|< + | | | `----------------------|D4 Q4|-----|< + | | `------------------------|D5 Q5|-----|< + | `--------------------------|D6 Q6|-----|< + `----------------------------|D7 Q7|-----|< + `--------' + + The number of clk-gpios and latched-gpios is not fixed. The actual number + of number of latches and the number of inputs per latch is derived from + the number of GPIOs given in the corresponding device tree properties. + +properties: + compatible: + const: gpio-latch + "#gpio-cells": + const: 2 + + clk-gpios: + description: Array of GPIOs to be used to clock a latch + + latched-gpios: + description: Array of GPIOs to be used as inputs per latch + + setup-duration-ns: + description: Delay in nanoseconds to wait after the latch inputs have been + set up + + clock-duration-ns: + description: Delay in nanoseconds to wait between clock output changes + + gpio-controller: true + + gpio-line-names: true + +required: + - compatible + - "#gpio-cells" + - gpio-controller + - clk-gpios + - latched-gpios + +additionalProperties: false + +examples: + - | + gpio-latch { + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_di_do_leds>; + compatible = "gpio-latch"; + gpio-controller; + setup-duration-ns = <100>; + clock-duration-ns = <100>; + + clk-gpios = <&gpio3 7 0>, <&gpio3 8 0>; + latched-gpios = <&gpio3 21 0>, <&gpio3 22 0>, + <&gpio3 23 0>, <&gpio3 24 0>, + <&gpio3 25 0>, <&gpio3 26 0>, + <&gpio3 27 0>, <&gpio3 28 0>; + }; From 1454a928b637bd169d99fc91a46b3b36cea76f9f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 7 Oct 2022 13:46:46 +0200 Subject: [PATCH 0460/4122] gpio: Add gpio latch driver This driver implements a GPIO multiplexer based on latches connected to other GPIOs. A set of data GPIOs is connected to the data input of multiple latches. The clock input of each latch is driven by another set of GPIOs. With two 8-bit latches 10 GPIOs can be multiplexed into 16 GPIOs. GPOs might be a better term as in fact the multiplexed pins are output only. Signed-off-by: Sascha Hauer Reviewed-by: Serge Semin Reviewed-by: Linus Walleij [Bartosz: fixed the strange of_device_id formatting] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 6 ++ drivers/gpio/Makefile | 1 + drivers/gpio/gpio-latch.c | 219 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 drivers/gpio/gpio-latch.c diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 5a04990f03cc..8c756cb29214 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1684,6 +1684,12 @@ config GPIO_AGGREGATOR industrial control context, to be operated from userspace using the GPIO chardev interface. +config GPIO_LATCH + tristate "GPIO latch driver" + help + Say yes here to enable a driver for GPIO multiplexers based on latches + connected to other GPIOs. + config GPIO_MOCKUP tristate "GPIO Testing Driver" select IRQ_SIM diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 37a0b7ebda43..8629e9eaf79e 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -76,6 +76,7 @@ obj-$(CONFIG_GPIO_IT87) += gpio-it87.o obj-$(CONFIG_GPIO_IXP4XX) += gpio-ixp4xx.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o obj-$(CONFIG_GPIO_KEMPLD) += gpio-kempld.o +obj-$(CONFIG_GPIO_LATCH) += gpio-latch.o obj-$(CONFIG_GPIO_LOGICVC) += gpio-logicvc.o obj-$(CONFIG_GPIO_LOONGSON1) += gpio-loongson1.o obj-$(CONFIG_GPIO_LOONGSON) += gpio-loongson.o diff --git a/drivers/gpio/gpio-latch.c b/drivers/gpio/gpio-latch.c new file mode 100644 index 000000000000..d7c3b20c8482 --- /dev/null +++ b/drivers/gpio/gpio-latch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPIO latch driver + * + * Copyright (C) 2022 Sascha Hauer + * + * This driver implements a GPIO (or better GPO as there is no input) + * multiplexer based on latches like this: + * + * CLK0 ----------------------. ,--------. + * CLK1 -------------------. `--------|> #0 | + * | | | + * OUT0 ----------------+--|-----------|D0 Q0|-----|< + * OUT1 --------------+-|--|-----------|D1 Q1|-----|< + * OUT2 ------------+-|-|--|-----------|D2 Q2|-----|< + * OUT3 ----------+-|-|-|--|-----------|D3 Q3|-----|< + * OUT4 --------+-|-|-|-|--|-----------|D4 Q4|-----|< + * OUT5 ------+-|-|-|-|-|--|-----------|D5 Q5|-----|< + * OUT6 ----+-|-|-|-|-|-|--|-----------|D6 Q6|-----|< + * OUT7 --+-|-|-|-|-|-|-|--|-----------|D7 Q7|-----|< + * | | | | | | | | | `--------' + * | | | | | | | | | + * | | | | | | | | | ,--------. + * | | | | | | | | `-----------|> #1 | + * | | | | | | | | | | + * | | | | | | | `--------------|D0 Q0|-----|< + * | | | | | | `----------------|D1 Q1|-----|< + * | | | | | `------------------|D2 Q2|-----|< + * | | | | `--------------------|D3 Q3|-----|< + * | | | `----------------------|D4 Q4|-----|< + * | | `------------------------|D5 Q5|-----|< + * | `--------------------------|D6 Q6|-----|< + * `----------------------------|D7 Q7|-----|< + * `--------' + * + * The above is just an example. The actual number of number of latches and + * the number of inputs per latch is derived from the number of GPIOs given + * in the corresponding device tree properties. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gpiolib.h" + +struct gpio_latch_priv { + struct gpio_chip gc; + struct gpio_descs *clk_gpios; + struct gpio_descs *latched_gpios; + int n_latched_gpios; + unsigned int setup_duration_ns; + unsigned int clock_duration_ns; + unsigned long *shadow; + /* + * Depending on whether any of the underlying GPIOs may sleep we either + * use a mutex or a spinlock to protect our shadow map. + */ + union { + struct mutex mutex; /* protects @shadow */ + spinlock_t spinlock; /* protects @shadow */ + }; +}; + +static int gpio_latch_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + return GPIO_LINE_DIRECTION_OUT; +} + +static void gpio_latch_set_unlocked(struct gpio_latch_priv *priv, + void (*set)(struct gpio_desc *desc, int value), + unsigned int offset, bool val) +{ + int latch = offset / priv->n_latched_gpios; + int i; + + assign_bit(offset, priv->shadow, val); + + for (i = 0; i < priv->n_latched_gpios; i++) + set(priv->latched_gpios->desc[i], + test_bit(latch * priv->n_latched_gpios + i, priv->shadow)); + + ndelay(priv->setup_duration_ns); + set(priv->clk_gpios->desc[latch], 1); + ndelay(priv->clock_duration_ns); + set(priv->clk_gpios->desc[latch], 0); +} + +static void gpio_latch_set(struct gpio_chip *gc, unsigned int offset, int val) +{ + struct gpio_latch_priv *priv = gpiochip_get_data(gc); + unsigned long flags; + + spin_lock_irqsave(&priv->spinlock, flags); + + gpio_latch_set_unlocked(priv, gpiod_set_value, offset, val); + + spin_unlock_irqrestore(&priv->spinlock, flags); +} + +static void gpio_latch_set_can_sleep(struct gpio_chip *gc, unsigned int offset, int val) +{ + struct gpio_latch_priv *priv = gpiochip_get_data(gc); + + mutex_lock(&priv->mutex); + + gpio_latch_set_unlocked(priv, gpiod_set_value_cansleep, offset, val); + + mutex_unlock(&priv->mutex); +} + +static bool gpio_latch_can_sleep(struct gpio_latch_priv *priv, unsigned int n_latches) +{ + int i; + + for (i = 0; i < n_latches; i++) + if (gpiod_cansleep(priv->clk_gpios->desc[i])) + return true; + + for (i = 0; i < priv->n_latched_gpios; i++) + if (gpiod_cansleep(priv->latched_gpios->desc[i])) + return true; + + return false; +} + +/* + * Some value which is still acceptable to delay in atomic context. + * If we need to go higher we might have to switch to usleep_range(), + * but that cannot ne used in atomic context and the driver would have + * to be adjusted to support that. + */ +#define DURATION_NS_MAX 5000 + +static int gpio_latch_probe(struct platform_device *pdev) +{ + struct gpio_latch_priv *priv; + unsigned int n_latches; + struct device_node *np = pdev->dev.of_node; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk_gpios = devm_gpiod_get_array(&pdev->dev, "clk", GPIOD_OUT_LOW); + if (IS_ERR(priv->clk_gpios)) + return PTR_ERR(priv->clk_gpios); + + priv->latched_gpios = devm_gpiod_get_array(&pdev->dev, "latched", GPIOD_OUT_LOW); + if (IS_ERR(priv->latched_gpios)) + return PTR_ERR(priv->latched_gpios); + + n_latches = priv->clk_gpios->ndescs; + priv->n_latched_gpios = priv->latched_gpios->ndescs; + + priv->shadow = devm_bitmap_zalloc(&pdev->dev, n_latches * priv->n_latched_gpios, + GFP_KERNEL); + if (!priv->shadow) + return -ENOMEM; + + if (gpio_latch_can_sleep(priv, n_latches)) { + priv->gc.can_sleep = true; + priv->gc.set = gpio_latch_set_can_sleep; + mutex_init(&priv->mutex); + } else { + priv->gc.can_sleep = false; + priv->gc.set = gpio_latch_set; + spin_lock_init(&priv->spinlock); + } + + of_property_read_u32(np, "setup-duration-ns", &priv->setup_duration_ns); + if (priv->setup_duration_ns > DURATION_NS_MAX) { + dev_warn(&pdev->dev, "setup-duration-ns too high, limit to %d\n", + DURATION_NS_MAX); + priv->setup_duration_ns = DURATION_NS_MAX; + } + + of_property_read_u32(np, "clock-duration-ns", &priv->clock_duration_ns); + if (priv->clock_duration_ns > DURATION_NS_MAX) { + dev_warn(&pdev->dev, "clock-duration-ns too high, limit to %d\n", + DURATION_NS_MAX); + priv->clock_duration_ns = DURATION_NS_MAX; + } + + priv->gc.get_direction = gpio_latch_get_direction; + priv->gc.ngpio = n_latches * priv->n_latched_gpios; + priv->gc.owner = THIS_MODULE; + priv->gc.base = -1; + priv->gc.parent = &pdev->dev; + + platform_set_drvdata(pdev, priv); + + return devm_gpiochip_add_data(&pdev->dev, &priv->gc, priv); +} + +static const struct of_device_id gpio_latch_ids[] = { + { + .compatible = "gpio-latch", + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, gpio_latch_ids); + +static struct platform_driver gpio_latch_driver = { + .driver = { + .name = "gpio-latch", + .of_match_table = gpio_latch_ids, + }, + .probe = gpio_latch_probe, +}; +module_platform_driver(gpio_latch_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Sascha Hauer "); +MODULE_DESCRIPTION("GPIO latch driver"); From b4e83d369015e3045418ca86984c3cd8dcf5a365 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Oct 2022 20:06:00 +0300 Subject: [PATCH 0461/4122] gpio: exar: Allow IO port access It's possible that PCI device can provide an IO port resource for the device. regmap MMIO currently uses MMIO by default. With an additional flag we enable support for IO port accesses. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Acked-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-exar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index 482f678c893e..df1bdaae441c 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -141,6 +141,7 @@ static const struct regmap_config exar_regmap_config = { .name = "exar-gpio", .reg_bits = 16, .val_bits = 8, + .io_port = true, }; static int gpio_exar_probe(struct platform_device *pdev) From 94e9f9a23fe4b093cd5a8b292165fad840242b79 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:45 -0500 Subject: [PATCH 0462/4122] agp/efficeon: Convert to generic power management Convert agpgart-efficeon from legacy PCI power management to the generic power management framework. Previously agpgart-efficeon used legacy PCI power management, which means agp_efficeon_suspend() and agp_efficeon_resume() were responsible for both device-specific things and generic PCI things like saving and restoring config space and managing power state. In this case, agp_efficeon_suspend() was empty, and agp_efficeon_resume() already did only device-specific things, so simply convert it to take a struct device * instead of a struct pci_dev *. Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by Vaibhav Gupta . Link: https://lore.kernel.org/r/20221025203852.681822-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/efficeon-agp.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c index c53f0f9ef5b0..f28d42319269 100644 --- a/drivers/char/agp/efficeon-agp.c +++ b/drivers/char/agp/efficeon-agp.c @@ -412,18 +412,11 @@ static void agp_efficeon_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state) -{ - return 0; -} - -static int agp_efficeon_resume(struct pci_dev *pdev) +static int agp_efficeon_resume(struct device *dev) { printk(KERN_DEBUG PFX "agp_efficeon_resume()\n"); return efficeon_configure(); } -#endif static const struct pci_device_id agp_efficeon_pci_table[] = { { @@ -437,6 +430,8 @@ static const struct pci_device_id agp_efficeon_pci_table[] = { { } }; +static DEFINE_SIMPLE_DEV_PM_OPS(agp_efficeon_pm_ops, NULL, agp_efficeon_resume); + MODULE_DEVICE_TABLE(pci, agp_efficeon_pci_table); static struct pci_driver agp_efficeon_pci_driver = { @@ -444,10 +439,7 @@ static struct pci_driver agp_efficeon_pci_driver = { .id_table = agp_efficeon_pci_table, .probe = agp_efficeon_probe, .remove = agp_efficeon_remove, -#ifdef CONFIG_PM - .suspend = agp_efficeon_suspend, - .resume = agp_efficeon_resume, -#endif + .driver.pm = &agp_efficeon_pm_ops, }; static int __init agp_efficeon_init(void) From 7f142022e6bfd2dd5ed998f7165e396bf5966513 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:46 -0500 Subject: [PATCH 0463/4122] agp/intel: Convert to generic power management Convert agpgart-intel from legacy PCI power management to the generic power management framework. Previously agpgart-intel used legacy PCI power management, and agp_intel_resume() was responsible for both device-specific things and generic PCI things like saving and restoring config space and managing power state. In this case, agp_intel_suspend() was empty, and agp_intel_resume() already did only device-specific things, so simply convert it to take a struct device * instead of a struct pci_dev *. Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by Vaibhav Gupta . Link: https://lore.kernel.org/r/20221025203852.681822-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/intel-agp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 9e4f27a6cb5a..c518b3a9db04 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -817,16 +817,15 @@ static void agp_intel_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_intel_resume(struct pci_dev *pdev) +static int agp_intel_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct agp_bridge_data *bridge = pci_get_drvdata(pdev); bridge->driver->configure(); return 0; } -#endif static const struct pci_device_id agp_intel_pci_table[] = { #define ID(x) \ @@ -895,14 +894,14 @@ static const struct pci_device_id agp_intel_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_intel_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_intel_pm_ops, NULL, agp_intel_resume); + static struct pci_driver agp_intel_pci_driver = { .name = "agpgart-intel", .id_table = agp_intel_pci_table, .probe = agp_intel_probe, .remove = agp_intel_remove, -#ifdef CONFIG_PM - .resume = agp_intel_resume, -#endif + .driver.pm = &agp_intel_pm_ops, }; static int __init agp_intel_init(void) From c78679d1fe43f9165b11c5ccd3f79c7108b066fe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:47 -0500 Subject: [PATCH 0464/4122] agp/amd-k7: Convert to generic power management Convert agpgart-amdk7 from legacy PCI power management to the generic power management framework. Previously agpgart-amdk7 used legacy PCI power management, and agp_amdk7_suspend() and agp_amdk7_resume() were responsible for both device-specific things and generic PCI things like saving and restoring config space and managing power state: agp_amdk7_suspend pci_save_state <-- generic PCI pci_set_power_state <-- generic PCI agp_amdk7_resume pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI amd_irongate_driver.configure <-- device-specific Convert to generic power management where the PCI bus PM methods do the generic PCI things, and the driver needs only the device-specific part, i.e., suspend_devices_and_enter dpm_suspend_start(PMSG_SUSPEND) pci_pm_suspend # PCI bus .suspend() method agp_amdk7_suspend <-- not needed at all; removed suspend_enter dpm_suspend_noirq(PMSG_SUSPEND) pci_pm_suspend_noirq # PCI bus .suspend_noirq() method pci_save_state <-- generic PCI pci_prepare_to_sleep <-- generic PCI pci_set_power_state ... dpm_resume_end(PMSG_RESUME) pci_pm_resume # PCI bus .resume() method pci_restore_standard_config pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI agp_amdk7_resume # driver->pm->resume amd_irongate_driver.configure <-- device-specific Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by Vaibhav Gupta . Link: https://lore.kernel.org/r/20221025203852.681822-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/amd-k7-agp.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c index 2b2095542816..55397ba765d2 100644 --- a/drivers/char/agp/amd-k7-agp.c +++ b/drivers/char/agp/amd-k7-agp.c @@ -488,26 +488,11 @@ static void agp_amdk7_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM - -static int agp_amdk7_suspend(struct pci_dev *pdev, pm_message_t state) +static int agp_amdk7_resume(struct device *dev) { - pci_save_state(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - - return 0; -} - -static int agp_amdk7_resume(struct pci_dev *pdev) -{ - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - return amd_irongate_driver.configure(); } -#endif /* CONFIG_PM */ - /* must be the same order as name table above */ static const struct pci_device_id agp_amdk7_pci_table[] = { { @@ -539,15 +524,14 @@ static const struct pci_device_id agp_amdk7_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_amdk7_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_amdk7_pm_ops, NULL, agp_amdk7_resume); + static struct pci_driver agp_amdk7_pci_driver = { .name = "agpgart-amdk7", .id_table = agp_amdk7_pci_table, .probe = agp_amdk7_probe, .remove = agp_amdk7_remove, -#ifdef CONFIG_PM - .suspend = agp_amdk7_suspend, - .resume = agp_amdk7_resume, -#endif + .driver.pm = &agp_amdk7_pm_ops, }; static int __init agp_amdk7_init(void) From 6a1274ea0e5dfb2eca85b0175820d7b5183c9cae Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:48 -0500 Subject: [PATCH 0465/4122] agp/ati: Convert to generic power management Convert agpgart-ati from legacy PCI power management to the generic power management framework. Previously agpgart-ati used legacy PCI power management, and agp_ati_suspend() and agp_ati_resume() were responsible for both device-specific things and generic PCI things like saving and restoring config space and managing power state: agp_ati_suspend pci_save_state <-- generic PCI pci_set_power_state(PCI_D3hot) <-- generic PCI agp_ati_resume pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI ati_configure <-- device-specific With generic power management, the PCI bus PM methods do the generic PCI things, and the driver needs only the device-specific part, i.e., suspend_devices_and_enter dpm_suspend_start(PMSG_SUSPEND) pci_pm_suspend # PCI bus .suspend() method agp_ati_suspend <-- not needed at all; removed suspend_enter dpm_suspend_noirq(PMSG_SUSPEND) pci_pm_suspend_noirq # PCI bus .suspend_noirq() method pci_save_state <-- generic PCI pci_prepare_to_sleep <-- generic PCI pci_set_power_state ... dpm_resume_end(PMSG_RESUME) pci_pm_resume # PCI bus .resume() method pci_restore_standard_config pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI agp_ati_resume # driver->pm->resume ati_configure <-- device-specific Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by Vaibhav Gupta . Link: https://lore.kernel.org/r/20221025203852.681822-5-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/ati-agp.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c index 6f5530482d83..3c1fce48aabe 100644 --- a/drivers/char/agp/ati-agp.c +++ b/drivers/char/agp/ati-agp.c @@ -238,23 +238,10 @@ static int ati_configure(void) } -#ifdef CONFIG_PM -static int agp_ati_suspend(struct pci_dev *dev, pm_message_t state) +static int agp_ati_resume(struct device *dev) { - pci_save_state(dev); - pci_set_power_state(dev, PCI_D3hot); - - return 0; -} - -static int agp_ati_resume(struct pci_dev *dev) -{ - pci_set_power_state(dev, PCI_D0); - pci_restore_state(dev); - return ati_configure(); } -#endif /* *Since we don't need contiguous memory we just try @@ -559,15 +546,14 @@ static const struct pci_device_id agp_ati_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_ati_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_ati_pm_ops, NULL, agp_ati_resume); + static struct pci_driver agp_ati_pci_driver = { .name = "agpgart-ati", .id_table = agp_ati_pci_table, .probe = agp_ati_probe, .remove = agp_ati_remove, -#ifdef CONFIG_PM - .suspend = agp_ati_suspend, - .resume = agp_ati_resume, -#endif + .driver.pm = &agp_ati_pm_ops, }; static int __init agp_ati_init(void) From 11a8d8774e68e07385a5b10d9546598f57ace7da Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:49 -0500 Subject: [PATCH 0466/4122] agp/nvidia: Convert to generic power management Convert agpgart-nvidia from legacy PCI power management to the generic power management framework. Previously agpgart-nvidia used legacy PCI power management, and agp_nvidia_suspend() and agp_nvidia_resume() were responsible for both device-specific things and generic PCI things: agp_nvidia_suspend pci_save_state <-- generic PCI pci_set_power_state(PCI_D3hot) <-- generic PCI agp_nvidia_resume pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI nvidia_configure <-- device-specific Convert to generic power management where the PCI bus PM methods do the generic PCI things, and the driver needs only the device-specific part, i.e., suspend_devices_and_enter dpm_suspend_start(PMSG_SUSPEND) pci_pm_suspend # PCI bus .suspend() method agp_nvidia_suspend <-- not needed at all; removed suspend_enter dpm_suspend_noirq(PMSG_SUSPEND) pci_pm_suspend_noirq # PCI bus .suspend_noirq() method pci_save_state <-- generic PCI pci_prepare_to_sleep <-- generic PCI pci_set_power_state ... dpm_resume_end(PMSG_RESUME) pci_pm_resume # PCI bus .resume() method pci_restore_standard_config pci_set_power_state(PCI_D0) <-- generic PCI pci_restore_state <-- generic PCI agp_nvidia_resume # driver->pm->resume nvidia_configure <-- device-specific Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by Vaibhav Gupta . Link: https://lore.kernel.org/r/20221025203852.681822-6-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/nvidia-agp.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c index 826dbd06f6bb..dbcbc06cc202 100644 --- a/drivers/char/agp/nvidia-agp.c +++ b/drivers/char/agp/nvidia-agp.c @@ -404,28 +404,13 @@ static void agp_nvidia_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_nvidia_suspend(struct pci_dev *pdev, pm_message_t state) +static int agp_nvidia_resume(struct device *dev) { - pci_save_state(pdev); - pci_set_power_state(pdev, PCI_D3hot); - - return 0; -} - -static int agp_nvidia_resume(struct pci_dev *pdev) -{ - /* set power state 0 and restore PCI space */ - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* reconfigure AGP hardware again */ nvidia_configure(); return 0; } -#endif - static const struct pci_device_id agp_nvidia_pci_table[] = { { @@ -449,15 +434,14 @@ static const struct pci_device_id agp_nvidia_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_nvidia_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_nvidia_pm_ops, NULL, agp_nvidia_resume); + static struct pci_driver agp_nvidia_pci_driver = { .name = "agpgart-nvidia", .id_table = agp_nvidia_pci_table, .probe = agp_nvidia_probe, .remove = agp_nvidia_remove, -#ifdef CONFIG_PM - .suspend = agp_nvidia_suspend, - .resume = agp_nvidia_resume, -#endif + .driver.pm = &agp_nvidia_pm_ops, }; static int __init agp_nvidia_init(void) From 8c1f82c710f18c5f51c3b43402cd8175d6655369 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:50 -0500 Subject: [PATCH 0467/4122] agp/amd64: Update to DEFINE_SIMPLE_DEV_PM_OPS() As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks don't need to be wrapped with #ifdef CONFIG_PM or tagged with __maybe_unused. Convert to DEFINE_SIMPLE_DEV_PM_OPS(). No functional change intended. Link: https://lore.kernel.org/r/20221025203852.681822-7-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/amd64-agp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 84a4aa9312cf..ce8651436609 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -588,9 +588,7 @@ static void agp_amd64_remove(struct pci_dev *pdev) agp_bridges_found--; } -#define agp_amd64_suspend NULL - -static int __maybe_unused agp_amd64_resume(struct device *dev) +static int agp_amd64_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -727,7 +725,7 @@ static const struct pci_device_id agp_amd64_pci_promisc_table[] = { { } }; -static SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, agp_amd64_suspend, agp_amd64_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume); static struct pci_driver agp_amd64_pci_driver = { .name = "agpgart-amd64", From 746e926b9fe327ace4be187144913abd7cfc2f4a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:51 -0500 Subject: [PATCH 0468/4122] agp/sis: Update to DEFINE_SIMPLE_DEV_PM_OPS() As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks don't need to be wrapped with #ifdef CONFIG_PM or tagged with __maybe_unused. Convert to DEFINE_SIMPLE_DEV_PM_OPS(). No functional change intended. Link: https://lore.kernel.org/r/20221025203852.681822-8-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/sis-agp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c index f8a02f4bef1b..484bb101c53b 100644 --- a/drivers/char/agp/sis-agp.c +++ b/drivers/char/agp/sis-agp.c @@ -217,10 +217,7 @@ static void agp_sis_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#define agp_sis_suspend NULL - -static int __maybe_unused agp_sis_resume( - __attribute__((unused)) struct device *dev) +static int agp_sis_resume(__attribute__((unused)) struct device *dev) { return sis_driver.configure(); } @@ -407,7 +404,7 @@ static const struct pci_device_id agp_sis_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_sis_pci_table); -static SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, agp_sis_suspend, agp_sis_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, NULL, agp_sis_resume); static struct pci_driver agp_sis_pci_driver = { .name = "agpgart-sis", From 73fcd4520edb430684246448d096f8f17f107c97 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 15:38:52 -0500 Subject: [PATCH 0469/4122] agp/via: Update to DEFINE_SIMPLE_DEV_PM_OPS() As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks don't need to be wrapped with #ifdef CONFIG_PM or tagged with __maybe_unused. Convert to DEFINE_SIMPLE_DEV_PM_OPS(). No functional change intended. Link: https://lore.kernel.org/r/20221025203852.681822-9-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Dave Airlie --- drivers/char/agp/via-agp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index b2f484f527fb..bc5140af2dcb 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c @@ -489,9 +489,7 @@ static void agp_via_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#define agp_via_suspend NULL - -static int __maybe_unused agp_via_resume(struct device *dev) +static int agp_via_resume(struct device *dev) { struct agp_bridge_data *bridge = dev_get_drvdata(dev); @@ -551,7 +549,7 @@ static const struct pci_device_id agp_via_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_via_pci_table); -static SIMPLE_DEV_PM_OPS(agp_via_pm_ops, agp_via_suspend, agp_via_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_via_pm_ops, NULL, agp_via_resume); static struct pci_driver agp_via_pci_driver = { .name = "agpgart-via", From 5984de0b41bf8f261e41b45c4fe64a32236e0c42 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 25 Oct 2022 14:35:02 -0500 Subject: [PATCH 0470/4122] PCI/PM: Remove unused 'state' parameter to pci_legacy_suspend_late() 1a1daf097e21 ("PCI/PM: Remove unused pci_driver.suspend_late() hook") removed the legacy .suspend_late() hook, which was the only user of the "state" parameter to pci_legacy_suspend_late(), but it neglected to remove the parameter. Remove the unused "state" parameter to pci_legacy_suspend_late(). Link: https://lore.kernel.org/r/20221025193502.669091-1-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 107d77f3c846..a2ceeacc33eb 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -646,7 +646,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) return 0; } -static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) +static int pci_legacy_suspend_late(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -848,7 +848,7 @@ static int pci_pm_suspend_noirq(struct device *dev) return 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_SUSPEND); + return pci_legacy_suspend_late(dev); if (!pm) { pci_save_state(pci_dev); @@ -1060,7 +1060,7 @@ static int pci_pm_freeze_noirq(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_FREEZE); + return pci_legacy_suspend_late(dev); if (pm && pm->freeze_noirq) { int error; @@ -1179,7 +1179,7 @@ static int pci_pm_poweroff_noirq(struct device *dev) return 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); + return pci_legacy_suspend_late(dev); if (!pm) { pci_fixup_device(pci_fixup_suspend_late, pci_dev); From b9b8782f8966a7f219ec2e2db3ffe5eeb23943ab Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:16 +0300 Subject: [PATCH 0471/4122] scsi: target: core: Add support for RSOC command Add support for REPORT SUPPORTED OPERATION CODES command according to SPC4. Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-2-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_spc.c | 200 ++++++++++++++++++++++++++++++ include/scsi/scsi_proto.h | 7 ++ include/target/target_core_base.h | 12 ++ 3 files changed, 219 insertions(+) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 7cca3b15472b..afd5ea0344f3 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1314,6 +1314,202 @@ spc_emulate_testunitready(struct se_cmd *cmd) return 0; } + +static struct target_opcode_descriptor *tcm_supported_opcodes[] = { +}; + +static int +spc_rsoc_encode_command_timeouts_descriptor(unsigned char *buf, u8 ctdp, + struct target_opcode_descriptor *descr) +{ + if (!ctdp) + return 0; + + put_unaligned_be16(0xa, buf); + buf[3] = descr->specific_timeout; + put_unaligned_be32(descr->nominal_timeout, &buf[4]); + put_unaligned_be32(descr->recommended_timeout, &buf[8]); + + return 12; +} + +static int +spc_rsoc_encode_command_descriptor(unsigned char *buf, u8 ctdp, + struct target_opcode_descriptor *descr) +{ + int td_size = 0; + + buf[0] = descr->opcode; + + put_unaligned_be16(descr->service_action, &buf[2]); + + buf[5] = (ctdp << 1) | descr->serv_action_valid; + put_unaligned_be16(descr->cdb_size, &buf[6]); + + td_size = spc_rsoc_encode_command_timeouts_descriptor(&buf[8], ctdp, + descr); + + return 8 + td_size; +} + +static int +spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp, + struct target_opcode_descriptor *descr) +{ + int td_size = 0; + + if (!descr) { + buf[1] = (ctdp << 7) | SCSI_SUPPORT_NOT_SUPPORTED; + return 2; + } + + buf[1] = (ctdp << 7) | SCSI_SUPPORT_FULL; + put_unaligned_be16(descr->cdb_size, &buf[2]); + memcpy(&buf[4], descr->usage_bits, descr->cdb_size); + + td_size = spc_rsoc_encode_command_timeouts_descriptor( + &buf[4 + descr->cdb_size], ctdp, descr); + + return 4 + descr->cdb_size + td_size; +} + +static sense_reason_t +spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) +{ + struct target_opcode_descriptor *descr; + struct se_session *sess = cmd->se_sess; + unsigned char *cdb = cmd->t_task_cdb; + u8 opts = cdb[2] & 0x3; + u8 requested_opcode; + u16 requested_sa; + int i; + + requested_opcode = cdb[3]; + requested_sa = ((u16)cdb[4]) << 8 | cdb[5]; + *opcode = NULL; + + if (opts > 3) { + pr_debug("TARGET_CORE[%s]: Invalid REPORT SUPPORTED OPERATION CODES" + " with unsupported REPORTING OPTIONS %#x for 0x%08llx from %s\n", + cmd->se_tfo->fabric_name, opts, + cmd->se_lun->unpacked_lun, + sess->se_node_acl->initiatorname); + return TCM_INVALID_CDB_FIELD; + } + + for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) { + descr = tcm_supported_opcodes[i]; + if (descr->opcode != requested_opcode) + continue; + + switch (opts) { + case 0x1: + /* + * If the REQUESTED OPERATION CODE field specifies an + * operation code for which the device server implements + * service actions, then the device server shall + * terminate the command with CHECK CONDITION status, + * with the sense key set to ILLEGAL REQUEST, and the + * additional sense code set to INVALID FIELD IN CDB + */ + if (descr->serv_action_valid) + return TCM_INVALID_CDB_FIELD; + *opcode = descr; + break; + case 0x2: + /* + * If the REQUESTED OPERATION CODE field specifies an + * operation code for which the device server does not + * implement service actions, then the device server + * shall terminate the command with CHECK CONDITION + * status, with the sense key set to ILLEGAL REQUEST, + * and the additional sense code set to INVALID FIELD IN CDB. + */ + if (descr->serv_action_valid && + descr->service_action == requested_sa) + *opcode = descr; + else if (!descr->serv_action_valid) + return TCM_INVALID_CDB_FIELD; + break; + case 0x3: + /* + * The command support data for the operation code and + * service action a specified in the REQUESTED OPERATION + * CODE field and REQUESTED SERVICE ACTION field shall + * be returned in the one_command parameter data format. + */ + if (descr->service_action == requested_sa) + *opcode = descr; + break; + } + } + return 0; +} + +static sense_reason_t +spc_emulate_report_supp_op_codes(struct se_cmd *cmd) +{ + int descr_num = ARRAY_SIZE(tcm_supported_opcodes); + struct target_opcode_descriptor *descr = NULL; + unsigned char *cdb = cmd->t_task_cdb; + u8 rctd = (cdb[2] >> 7) & 0x1; + unsigned char *buf = NULL; + int response_length = 0; + u8 opts = cdb[2] & 0x3; + unsigned char *rbuf; + sense_reason_t ret = 0; + int i; + + rbuf = transport_kmap_data_sg(cmd); + if (cmd->data_length && !rbuf) { + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto out; + } + + if (opts == 0) + response_length = 4 + (8 + rctd * 12) * descr_num; + else { + ret = spc_rsoc_get_descr(cmd, &descr); + if (ret) + goto out; + + if (descr) + response_length = 4 + descr->cdb_size + rctd * 12; + else + response_length = 2; + } + + buf = kzalloc(response_length, GFP_KERNEL); + if (!buf) { + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto out; + } + response_length = 0; + + if (opts == 0) { + response_length += 4; + + for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) { + descr = tcm_supported_opcodes[i]; + response_length += spc_rsoc_encode_command_descriptor( + &buf[response_length], rctd, descr); + } + put_unaligned_be32(response_length - 3, buf); + } else { + response_length = spc_rsoc_encode_one_command_descriptor( + &buf[response_length], rctd, descr); + } + + memcpy(rbuf, buf, min_t(u32, response_length, cmd->data_length)); +out: + kfree(buf); + transport_kunmap_data_sg(cmd); + + if (!ret) + target_complete_cmd_with_length(cmd, SAM_STAT_GOOD, response_length); + return ret; +} + sense_reason_t spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) { @@ -1439,6 +1635,10 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) cmd->execute_cmd = target_emulate_report_target_port_groups; } + if ((cdb[1] & 0x1f) == + MI_REPORT_SUPPORTED_OPERATION_CODES) + cmd->execute_cmd = + spc_emulate_report_supp_op_codes; *size = get_unaligned_be32(&cdb[6]); } else { /* diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index c03e35fc382c..651b5183451c 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -342,4 +342,11 @@ enum scsi_version_descriptor { SCSI_VERSION_DESCRIPTOR_SRP = 0x0940 }; +enum scsi_support_opcode { + SCSI_SUPPORT_NO_INFO = 0, + SCSI_SUPPORT_NOT_SUPPORTED = 1, + SCSI_SUPPORT_FULL = 3, + SCSI_SUPPORT_VENDOR = 5, +}; + #endif /* _SCSI_PROTO_H_ */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 8c920456edd9..02a2d48d20b6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -867,6 +867,18 @@ struct se_device { struct se_device_queue *queues; }; +struct target_opcode_descriptor { + u8 support:3; + u8 serv_action_valid:1; + u8 opcode; + u16 service_action; + u32 cdb_size; + u8 specific_timeout; + u16 nominal_timeout; + u16 recommended_timeout; + u8 usage_bits[]; +}; + struct se_hba { u16 hba_tpgt; u32 hba_id; From 0016e820716ff863a76e960cb91bd72373ac2e74 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:17 +0300 Subject: [PATCH 0472/4122] scsi: target: core: Add list of opcodes for RSOC Fill the strucures for supported opcodes and usage bits that are reported in REPORT SUPPORTED OPERATION CODES command response. Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-3-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_spc.c | 568 +++++++++++++++++++++++++++++++ include/scsi/scsi_proto.h | 3 + 2 files changed, 571 insertions(+) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index afd5ea0344f3..31cd6f31f6b1 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1314,8 +1314,576 @@ spc_emulate_testunitready(struct se_cmd *cmd) return 0; } +static struct target_opcode_descriptor tcm_opcode_read6 = { + .support = SCSI_SUPPORT_FULL, + .opcode = READ_6, + .cdb_size = 6, + .usage_bits = {READ_6, 0x1f, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = READ_10, + .cdb_size = 10, + .usage_bits = {READ_10, 0xf8, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read12 = { + .support = SCSI_SUPPORT_FULL, + .opcode = READ_12, + .cdb_size = 12, + .usage_bits = {READ_12, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = READ_16, + .cdb_size = 16, + .usage_bits = {READ_16, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write6 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_6, + .cdb_size = 6, + .usage_bits = {WRITE_6, 0x1f, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_10, + .cdb_size = 10, + .usage_bits = {WRITE_10, 0xf8, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write_verify10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_VERIFY, + .cdb_size = 10, + .usage_bits = {WRITE_VERIFY, 0xf0, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write12 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_12, + .cdb_size = 12, + .usage_bits = {WRITE_12, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_16, + .cdb_size = 16, + .usage_bits = {WRITE_16, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write_verify16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_VERIFY_16, + .cdb_size = 16, + .usage_bits = {WRITE_VERIFY_16, 0xf0, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write_same32 = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = VARIABLE_LENGTH_CMD, + .service_action = WRITE_SAME_32, + .cdb_size = 32, + .usage_bits = {VARIABLE_LENGTH_CMD, SCSI_CONTROL_MASK, 0x00, 0x00, + 0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0x18, + 0x00, WRITE_SAME_32, 0xe8, 0x00, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff}, +}; + +static struct target_opcode_descriptor tcm_opcode_compare_write = { + .support = SCSI_SUPPORT_FULL, + .opcode = COMPARE_AND_WRITE, + .cdb_size = 16, + .usage_bits = {COMPARE_AND_WRITE, 0x18, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x00, + 0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read_capacity = { + .support = SCSI_SUPPORT_FULL, + .opcode = READ_CAPACITY, + .cdb_size = 10, + .usage_bits = {READ_CAPACITY, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x00, + 0x01, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read_capacity16 = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = SERVICE_ACTION_IN_16, + .service_action = SAI_READ_CAPACITY_16, + .cdb_size = 16, + .usage_bits = {SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_read_report_refferals = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = SERVICE_ACTION_IN_16, + .service_action = SAI_REPORT_REFERRALS, + .cdb_size = 16, + .usage_bits = {SERVICE_ACTION_IN_16, SAI_REPORT_REFERRALS, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_sync_cache = { + .support = SCSI_SUPPORT_FULL, + .opcode = SYNCHRONIZE_CACHE, + .cdb_size = 10, + .usage_bits = {SYNCHRONIZE_CACHE, 0x02, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_sync_cache16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = SYNCHRONIZE_CACHE_16, + .cdb_size = 16, + .usage_bits = {SYNCHRONIZE_CACHE_16, 0x02, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_unmap = { + .support = SCSI_SUPPORT_FULL, + .opcode = UNMAP, + .cdb_size = 10, + .usage_bits = {UNMAP, 0x00, 0x00, 0x00, + 0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write_same = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_SAME, + .cdb_size = 10, + .usage_bits = {WRITE_SAME, 0xe8, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_write_same16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = WRITE_SAME_16, + .cdb_size = 16, + .usage_bits = {WRITE_SAME_16, 0xe8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_verify = { + .support = SCSI_SUPPORT_FULL, + .opcode = VERIFY, + .cdb_size = 10, + .usage_bits = {VERIFY, 0x00, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_verify16 = { + .support = SCSI_SUPPORT_FULL, + .opcode = VERIFY_16, + .cdb_size = 16, + .usage_bits = {VERIFY_16, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_start_stop = { + .support = SCSI_SUPPORT_FULL, + .opcode = START_STOP, + .cdb_size = 6, + .usage_bits = {START_STOP, 0x01, 0x00, 0x00, + 0x01, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_mode_select = { + .support = SCSI_SUPPORT_FULL, + .opcode = MODE_SELECT, + .cdb_size = 6, + .usage_bits = {MODE_SELECT, 0x10, 0x00, 0x00, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_mode_select10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = MODE_SELECT_10, + .cdb_size = 10, + .usage_bits = {MODE_SELECT_10, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_mode_sense = { + .support = SCSI_SUPPORT_FULL, + .opcode = MODE_SENSE, + .cdb_size = 6, + .usage_bits = {MODE_SENSE, 0x08, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_mode_sense10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = MODE_SENSE_10, + .cdb_size = 10, + .usage_bits = {MODE_SENSE_10, 0x18, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pri_read_keys = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_IN, + .service_action = PRI_READ_KEYS, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_KEYS, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pri_read_resrv = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_IN, + .service_action = PRI_READ_RESERVATION, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_RESERVATION, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pri_read_caps = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_IN, + .service_action = PRI_REPORT_CAPABILITIES, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_IN, PRI_REPORT_CAPABILITIES, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_IN, + .service_action = PRI_READ_FULL_STATUS, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_FULL_STATUS, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_register = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_REGISTER, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_reserve = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_RESERVE, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RESERVE, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_release = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_RELEASE, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RELEASE, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_clear = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_CLEAR, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_CLEAR, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_preempt = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_PREEMPT, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_PREEMPT_AND_ABORT, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT_AND_ABORT, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_REGISTER_AND_IGNORE_EXISTING_KEY, + .cdb_size = 10, + .usage_bits = { + PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_IGNORE_EXISTING_KEY, + 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_pro_register_move = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = PERSISTENT_RESERVE_OUT, + .service_action = PRO_REGISTER_AND_MOVE, + .cdb_size = 10, + .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_MOVE, 0xff, 0x00, + 0x00, 0xff, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_release = { + .support = SCSI_SUPPORT_FULL, + .opcode = RELEASE, + .cdb_size = 6, + .usage_bits = {RELEASE, 0x00, 0x00, 0x00, + 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_release10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = RELEASE_10, + .cdb_size = 10, + .usage_bits = {RELEASE_10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_reserve = { + .support = SCSI_SUPPORT_FULL, + .opcode = RESERVE, + .cdb_size = 6, + .usage_bits = {RESERVE, 0x00, 0x00, 0x00, + 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_reserve10 = { + .support = SCSI_SUPPORT_FULL, + .opcode = RESERVE_10, + .cdb_size = 10, + .usage_bits = {RESERVE_10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_request_sense = { + .support = SCSI_SUPPORT_FULL, + .opcode = REQUEST_SENSE, + .cdb_size = 6, + .usage_bits = {REQUEST_SENSE, 0x00, 0x00, 0x00, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_inquiry = { + .support = SCSI_SUPPORT_FULL, + .opcode = INQUIRY, + .cdb_size = 6, + .usage_bits = {INQUIRY, 0x01, 0xff, 0xff, + 0xff, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = EXTENDED_COPY, + .cdb_size = 16, + .usage_bits = {EXTENDED_COPY, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = RECEIVE_COPY_RESULTS, + .service_action = RCR_SA_OPERATING_PARAMETERS, + .cdb_size = 16, + .usage_bits = {RECEIVE_COPY_RESULTS, RCR_SA_OPERATING_PARAMETERS, + 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_report_luns = { + .support = SCSI_SUPPORT_FULL, + .opcode = REPORT_LUNS, + .cdb_size = 12, + .usage_bits = {REPORT_LUNS, 0x00, 0xff, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_test_unit_ready = { + .support = SCSI_SUPPORT_FULL, + .opcode = TEST_UNIT_READY, + .cdb_size = 6, + .usage_bits = {TEST_UNIT_READY, 0x00, 0x00, 0x00, + 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_report_target_pgs = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = MAINTENANCE_IN, + .service_action = MI_REPORT_TARGET_PGS, + .cdb_size = 12, + .usage_bits = {MAINTENANCE_IN, 0xE0 | MI_REPORT_TARGET_PGS, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = MAINTENANCE_IN, + .service_action = MI_REPORT_SUPPORTED_OPERATION_CODES, + .cdb_size = 12, + .usage_bits = {MAINTENANCE_IN, MI_REPORT_SUPPORTED_OPERATION_CODES, + 0x87, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; + +static struct target_opcode_descriptor tcm_opcode_set_tpg = { + .support = SCSI_SUPPORT_FULL, + .serv_action_valid = 1, + .opcode = MAINTENANCE_OUT, + .service_action = MO_SET_TARGET_PGS, + .cdb_size = 12, + .usage_bits = {MAINTENANCE_OUT, MO_SET_TARGET_PGS, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, +}; static struct target_opcode_descriptor *tcm_supported_opcodes[] = { + &tcm_opcode_read6, + &tcm_opcode_read10, + &tcm_opcode_read12, + &tcm_opcode_read16, + &tcm_opcode_write6, + &tcm_opcode_write10, + &tcm_opcode_write_verify10, + &tcm_opcode_write12, + &tcm_opcode_write16, + &tcm_opcode_write_verify16, + &tcm_opcode_write_same32, + &tcm_opcode_compare_write, + &tcm_opcode_read_capacity, + &tcm_opcode_read_capacity16, + &tcm_opcode_read_report_refferals, + &tcm_opcode_sync_cache, + &tcm_opcode_sync_cache16, + &tcm_opcode_unmap, + &tcm_opcode_write_same, + &tcm_opcode_write_same16, + &tcm_opcode_verify, + &tcm_opcode_verify16, + &tcm_opcode_start_stop, + &tcm_opcode_mode_select, + &tcm_opcode_mode_select10, + &tcm_opcode_mode_sense, + &tcm_opcode_mode_sense10, + &tcm_opcode_pri_read_keys, + &tcm_opcode_pri_read_resrv, + &tcm_opcode_pri_read_caps, + &tcm_opcode_pri_read_full_status, + &tcm_opcode_pro_register, + &tcm_opcode_pro_reserve, + &tcm_opcode_pro_release, + &tcm_opcode_pro_clear, + &tcm_opcode_pro_preempt, + &tcm_opcode_pro_preempt_abort, + &tcm_opcode_pro_reg_ign_exist, + &tcm_opcode_pro_register_move, + &tcm_opcode_release, + &tcm_opcode_release10, + &tcm_opcode_reserve, + &tcm_opcode_reserve10, + &tcm_opcode_request_sense, + &tcm_opcode_inquiry, + &tcm_opcode_extended_copy_lid1, + &tcm_opcode_rcv_copy_res_op_params, + &tcm_opcode_report_luns, + &tcm_opcode_test_unit_ready, + &tcm_opcode_report_target_pgs, + &tcm_opcode_report_supp_opcodes, + &tcm_opcode_set_tpg, }; static int diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 651b5183451c..cb722225b3bc 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -349,4 +349,7 @@ enum scsi_support_opcode { SCSI_SUPPORT_VENDOR = 5, }; +#define SCSI_CONTROL_MASK 0 +#define SCSI_GROUP_NUMBER_MASK 0 + #endif /* _SCSI_PROTO_H_ */ From 553b08d9b3a78aa602f818c0c94705774f018df0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:18 +0300 Subject: [PATCH 0473/4122] scsi: target: core: Dynamic opcode support in RSOC Report supported opcodes depending on a dynamic device configuration. Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-4-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_spc.c | 120 ++++++++++++++++++++++++++++-- include/target/target_core_base.h | 1 + 2 files changed, 116 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 31cd6f31f6b1..e1cf9c352fd3 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1405,6 +1405,15 @@ static struct target_opcode_descriptor tcm_opcode_write_verify16 = { 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, }; +static bool tcm_is_ws_enabled(struct se_cmd *cmd) +{ + struct sbc_ops *ops = cmd->protocol_data; + struct se_device *dev = cmd->se_dev; + + return (dev->dev_attrib.emulate_tpws && !!ops->execute_unmap) || + !!ops->execute_write_same; +} + static struct target_opcode_descriptor tcm_opcode_write_same32 = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1419,8 +1428,16 @@ static struct target_opcode_descriptor tcm_opcode_write_same32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, + .enabled = tcm_is_ws_enabled, }; +static bool tcm_is_caw_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + return dev->dev_attrib.emulate_caw; +} + static struct target_opcode_descriptor tcm_opcode_compare_write = { .support = SCSI_SUPPORT_FULL, .opcode = COMPARE_AND_WRITE, @@ -1429,6 +1446,7 @@ static struct target_opcode_descriptor tcm_opcode_compare_write = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .enabled = tcm_is_caw_enabled, }; static struct target_opcode_descriptor tcm_opcode_read_capacity = { @@ -1452,6 +1470,20 @@ static struct target_opcode_descriptor tcm_opcode_read_capacity16 = { 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, }; +static bool tcm_is_rep_ref_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + spin_lock(&dev->t10_alua.lba_map_lock); + if (list_empty(&dev->t10_alua.lba_map_list)) { + spin_unlock(&dev->t10_alua.lba_map_lock); + return false; + } + spin_unlock(&dev->t10_alua.lba_map_lock); + return true; + +} + static struct target_opcode_descriptor tcm_opcode_read_report_refferals = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1462,6 +1494,7 @@ static struct target_opcode_descriptor tcm_opcode_read_report_refferals = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_rep_ref_enabled, }; static struct target_opcode_descriptor tcm_opcode_sync_cache = { @@ -1483,6 +1516,14 @@ static struct target_opcode_descriptor tcm_opcode_sync_cache16 = { 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, }; +static bool tcm_is_unmap_enabled(struct se_cmd *cmd) +{ + struct sbc_ops *ops = cmd->protocol_data; + struct se_device *dev = cmd->se_dev; + + return ops->execute_unmap && dev->dev_attrib.emulate_tpu; +} + static struct target_opcode_descriptor tcm_opcode_unmap = { .support = SCSI_SUPPORT_FULL, .opcode = UNMAP, @@ -1490,6 +1531,7 @@ static struct target_opcode_descriptor tcm_opcode_unmap = { .usage_bits = {UNMAP, 0x00, 0x00, 0x00, 0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_unmap_enabled, }; static struct target_opcode_descriptor tcm_opcode_write_same = { @@ -1499,6 +1541,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same = { .usage_bits = {WRITE_SAME, 0xe8, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_ws_enabled, }; static struct target_opcode_descriptor tcm_opcode_write_same16 = { @@ -1509,6 +1552,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .enabled = tcm_is_ws_enabled, }; static struct target_opcode_descriptor tcm_opcode_verify = { @@ -1594,6 +1638,13 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_resrv = { 0xff, SCSI_CONTROL_MASK}, }; +static bool tcm_is_pr_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + return dev->dev_attrib.emulate_pr; +} + static struct target_opcode_descriptor tcm_opcode_pri_read_caps = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1603,6 +1654,7 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_caps = { .usage_bits = {PERSISTENT_RESERVE_IN, PRI_REPORT_CAPABILITIES, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = { @@ -1614,6 +1666,7 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = { .usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_FULL_STATUS, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_register = { @@ -1625,6 +1678,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_register = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_reserve = { @@ -1636,6 +1690,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_reserve = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RESERVE, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_release = { @@ -1647,6 +1702,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_release = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RELEASE, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_clear = { @@ -1658,6 +1714,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_clear = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_CLEAR, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_preempt = { @@ -1669,6 +1726,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_preempt = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = { @@ -1680,6 +1738,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT_AND_ABORT, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = { @@ -1693,6 +1752,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = { 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; static struct target_opcode_descriptor tcm_opcode_pro_register_move = { @@ -1704,14 +1764,23 @@ static struct target_opcode_descriptor tcm_opcode_pro_register_move = { .usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_MOVE, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_pr_enabled, }; +static bool tcm_is_scsi2_reservations_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + return dev->dev_attrib.emulate_pr; +} + static struct target_opcode_descriptor tcm_opcode_release = { .support = SCSI_SUPPORT_FULL, .opcode = RELEASE, .cdb_size = 6, .usage_bits = {RELEASE, 0x00, 0x00, 0x00, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_scsi2_reservations_enabled, }; static struct target_opcode_descriptor tcm_opcode_release10 = { @@ -1721,6 +1790,7 @@ static struct target_opcode_descriptor tcm_opcode_release10 = { .usage_bits = {RELEASE_10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_scsi2_reservations_enabled, }; static struct target_opcode_descriptor tcm_opcode_reserve = { @@ -1729,6 +1799,7 @@ static struct target_opcode_descriptor tcm_opcode_reserve = { .cdb_size = 6, .usage_bits = {RESERVE, 0x00, 0x00, 0x00, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_scsi2_reservations_enabled, }; static struct target_opcode_descriptor tcm_opcode_reserve10 = { @@ -1738,6 +1809,7 @@ static struct target_opcode_descriptor tcm_opcode_reserve10 = { .usage_bits = {RESERVE_10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, SCSI_CONTROL_MASK}, + .enabled = tcm_is_scsi2_reservations_enabled, }; static struct target_opcode_descriptor tcm_opcode_request_sense = { @@ -1756,6 +1828,13 @@ static struct target_opcode_descriptor tcm_opcode_inquiry = { 0xff, SCSI_CONTROL_MASK}, }; +static bool tcm_is_3pc_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + return dev->dev_attrib.emulate_3pc; +} + static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1765,6 +1844,7 @@ static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_3pc_enabled, }; static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = { @@ -1778,6 +1858,7 @@ static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_3pc_enabled, }; static struct target_opcode_descriptor tcm_opcode_report_luns = { @@ -1820,6 +1901,26 @@ static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = { 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, }; +static bool tcm_is_set_tpg_enabled(struct se_cmd *cmd) +{ + struct t10_alua_tg_pt_gp *l_tg_pt_gp; + struct se_lun *l_lun = cmd->se_lun; + + rcu_read_lock(); + l_tg_pt_gp = rcu_dereference(l_lun->lun_tg_pt_gp); + if (!l_tg_pt_gp) { + rcu_read_unlock(); + return false; + } + if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) { + rcu_read_unlock(); + return false; + } + rcu_read_unlock(); + + return true; +} + static struct target_opcode_descriptor tcm_opcode_set_tpg = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1829,6 +1930,7 @@ static struct target_opcode_descriptor tcm_opcode_set_tpg = { .usage_bits = {MAINTENANCE_OUT, MO_SET_TARGET_PGS, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, + .enabled = tcm_is_set_tpg_enabled, }; static struct target_opcode_descriptor *tcm_supported_opcodes[] = { @@ -1982,7 +2084,9 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) */ if (descr->serv_action_valid) return TCM_INVALID_CDB_FIELD; - *opcode = descr; + + if (!descr->enabled || descr->enabled(cmd)) + *opcode = descr; break; case 0x2: /* @@ -1994,9 +2098,10 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) * and the additional sense code set to INVALID FIELD IN CDB. */ if (descr->serv_action_valid && - descr->service_action == requested_sa) - *opcode = descr; - else if (!descr->serv_action_valid) + descr->service_action == requested_sa) { + if (!descr->enabled || descr->enabled(cmd)) + *opcode = descr; + } else if (!descr->serv_action_valid) return TCM_INVALID_CDB_FIELD; break; case 0x3: @@ -2007,10 +2112,12 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) * be returned in the one_command parameter data format. */ if (descr->service_action == requested_sa) - *opcode = descr; + if (!descr->enabled || descr->enabled(cmd)) + *opcode = descr; break; } } + return 0; } @@ -2059,6 +2166,9 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd) for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) { descr = tcm_supported_opcodes[i]; + if (descr->enabled && !descr->enabled(cmd)) + continue; + response_length += spc_rsoc_encode_command_descriptor( &buf[response_length], rctd, descr); } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 02a2d48d20b6..7542a8de8fb5 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -876,6 +876,7 @@ struct target_opcode_descriptor { u8 specific_timeout; u16 nominal_timeout; u16 recommended_timeout; + bool (*enabled)(struct se_cmd *cmd); u8 usage_bits[]; }; From bd217b8c3a1f705f2d92d30974412fbd5f43271a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:19 +0300 Subject: [PATCH 0474/4122] scsi: target: core: Add emulate_rsoc attribute Allow support for RSOC to be turned off via the emulate_rsoc attibute. This is just for testing purposes. Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-5-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 20 ++++++++++++++++++++ drivers/target/target_core_device.c | 1 + drivers/target/target_core_spc.c | 12 ++++++++++++ include/target/target_core_base.h | 3 +++ 4 files changed, 36 insertions(+) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 416514c5c7ac..533524299ed6 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -547,6 +547,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity); DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity_alignment); DEF_CONFIGFS_ATTRIB_SHOW(unmap_zeroes_data); DEF_CONFIGFS_ATTRIB_SHOW(max_write_same_len); +DEF_CONFIGFS_ATTRIB_SHOW(emulate_rsoc); #define DEF_CONFIGFS_ATTRIB_STORE_U32(_name) \ static ssize_t _name##_store(struct config_item *item, const char *page,\ @@ -1186,6 +1187,23 @@ static ssize_t pgr_support_store(struct config_item *item, return count; } +static ssize_t emulate_rsoc_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_dev_attrib *da = to_attrib(item); + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + da->emulate_rsoc = flag; + pr_debug("dev[%p]: SE Device REPORT_SUPPORTED_OPERATION_CODES_EMULATION flag: %d\n", + da->da_dev, flag); + return count; +} + CONFIGFS_ATTR(, emulate_model_alias); CONFIGFS_ATTR(, emulate_dpo); CONFIGFS_ATTR(, emulate_fua_write); @@ -1198,6 +1216,7 @@ CONFIGFS_ATTR(, emulate_tpws); CONFIGFS_ATTR(, emulate_caw); CONFIGFS_ATTR(, emulate_3pc); CONFIGFS_ATTR(, emulate_pr); +CONFIGFS_ATTR(, emulate_rsoc); CONFIGFS_ATTR(, pi_prot_type); CONFIGFS_ATTR_RO(, hw_pi_prot_type); CONFIGFS_ATTR(, pi_prot_format); @@ -1261,6 +1280,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = { &attr_max_write_same_len, &attr_alua_support, &attr_pgr_support, + &attr_emulate_rsoc, NULL, }; EXPORT_SYMBOL(sbc_attrib_attrs); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b7f16ee8aa0e..e7d202b57405 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -785,6 +785,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.emulate_caw = DA_EMULATE_CAW; dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; dev->dev_attrib.emulate_pr = DA_EMULATE_PR; + dev->dev_attrib.emulate_rsoc = DA_EMULATE_RSOC; dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT; dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index e1cf9c352fd3..91f03312a5ea 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1889,6 +1889,14 @@ static struct target_opcode_descriptor tcm_opcode_report_target_pgs = { 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, }; + +static bool spc_rsoc_enabled(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + return dev->dev_attrib.emulate_rsoc; +} + static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = { .support = SCSI_SUPPORT_FULL, .serv_action_valid = 1, @@ -1899,6 +1907,7 @@ static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = { 0x87, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, SCSI_CONTROL_MASK}, + .enabled = spc_rsoc_enabled, }; static bool tcm_is_set_tpg_enabled(struct se_cmd *cmd) @@ -2135,6 +2144,9 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd) sense_reason_t ret = 0; int i; + if (!cmd->se_dev->dev_attrib.emulate_rsoc) + return TCM_UNSUPPORTED_SCSI_OPCODE; + rbuf = transport_kmap_data_sg(cmd); if (cmd->data_length && !rbuf) { ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7542a8de8fb5..062ee8b6c433 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -91,6 +91,8 @@ #define DA_EMULATE_ALUA 0 /* Emulate SCSI2 RESERVE/RELEASE and Persistent Reservations by default */ #define DA_EMULATE_PR 1 +/* Emulation for REPORT SUPPORTED OPERATION CODES */ +#define DA_EMULATE_RSOC 1 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ #define DA_ENFORCE_PR_ISIDS 1 /* Force SPC-3 PR Activate Persistence across Target Power Loss */ @@ -690,6 +692,7 @@ struct se_dev_attrib { bool emulate_caw; bool emulate_3pc; bool emulate_pr; + bool emulate_rsoc; enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; bool pi_prot_verify; From b8908e5e1d1de66e6905fbec7cdfbbf8ac3ecf9a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:20 +0300 Subject: [PATCH 0475/4122] scsi: target: core: Check emulate_3pc for RECEIVE COPY RECEIVE COPY RESULTS is an opcode from 3rd party copy command set and shall be rejected if emulate_3pc attribute is off like EXTENDED COPY. Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-6-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 8713cda0c2fb..edf522208285 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -1009,8 +1009,14 @@ sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd) { unsigned char *cdb = &se_cmd->t_task_cdb[0]; int sa = (cdb[1] & 0x1f), list_id = cdb[2]; + struct se_device *dev = se_cmd->se_dev; sense_reason_t rc = TCM_NO_SENSE; + if (!dev->dev_attrib.emulate_3pc) { + pr_debug("Third-party copy operations explicitly disabled\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:" " 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length); From 415d82b4401150c32687e1b7cc68de621ad24663 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 6 Sep 2022 13:34:21 +0300 Subject: [PATCH 0476/4122] scsi: target: core: Dynamically set DPO and FUA in usage_bits libiscsi tests check the support of DPO & FUA bits in usage bits of RSOC response. This patch adds support for dynamic usage bits for each opcode. Set support of DPO & FUA bits in usage_bits of RSOC response depending on support DPOFUA in the backstore device. Reviewed-by: Roman Bolshakov Reviewed-by: Konstantin Shelekhin Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20220906103421.22348-7-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_spc.c | 34 +++++++++++++++++++++++++++++-- include/target/target_core_base.h | 2 ++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 91f03312a5ea..ffe02e195733 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1314,6 +1314,22 @@ spc_emulate_testunitready(struct se_cmd *cmd) return 0; } +static void set_dpofua_usage_bits(u8 *usage_bits, struct se_device *dev) +{ + if (!target_check_fua(dev)) + usage_bits[1] &= ~0x18; + else + usage_bits[1] |= 0x18; +} + +static void set_dpofua_usage_bits32(u8 *usage_bits, struct se_device *dev) +{ + if (!target_check_fua(dev)) + usage_bits[10] &= ~0x18; + else + usage_bits[10] |= 0x18; +} + static struct target_opcode_descriptor tcm_opcode_read6 = { .support = SCSI_SUPPORT_FULL, .opcode = READ_6, @@ -1329,6 +1345,7 @@ static struct target_opcode_descriptor tcm_opcode_read10 = { .usage_bits = {READ_10, 0xf8, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, 0xff, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_read12 = { @@ -1338,6 +1355,7 @@ static struct target_opcode_descriptor tcm_opcode_read12 = { .usage_bits = {READ_12, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_read16 = { @@ -1348,6 +1366,7 @@ static struct target_opcode_descriptor tcm_opcode_read16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_write6 = { @@ -1365,6 +1384,7 @@ static struct target_opcode_descriptor tcm_opcode_write10 = { .usage_bits = {WRITE_10, 0xf8, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, 0xff, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_write_verify10 = { @@ -1374,6 +1394,7 @@ static struct target_opcode_descriptor tcm_opcode_write_verify10 = { .usage_bits = {WRITE_VERIFY, 0xf0, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff, 0xff, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_write12 = { @@ -1383,6 +1404,7 @@ static struct target_opcode_descriptor tcm_opcode_write12 = { .usage_bits = {WRITE_12, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_write16 = { @@ -1393,6 +1415,7 @@ static struct target_opcode_descriptor tcm_opcode_write16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_write_verify16 = { @@ -1403,6 +1426,7 @@ static struct target_opcode_descriptor tcm_opcode_write_verify16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, + .update_usage_bits = set_dpofua_usage_bits, }; static bool tcm_is_ws_enabled(struct se_cmd *cmd) @@ -1429,6 +1453,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same32 = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, .enabled = tcm_is_ws_enabled, + .update_usage_bits = set_dpofua_usage_bits32, }; static bool tcm_is_caw_enabled(struct se_cmd *cmd) @@ -1447,6 +1472,7 @@ static struct target_opcode_descriptor tcm_opcode_compare_write = { 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK}, .enabled = tcm_is_caw_enabled, + .update_usage_bits = set_dpofua_usage_bits, }; static struct target_opcode_descriptor tcm_opcode_read_capacity = { @@ -2033,7 +2059,8 @@ spc_rsoc_encode_command_descriptor(unsigned char *buf, u8 ctdp, static int spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp, - struct target_opcode_descriptor *descr) + struct target_opcode_descriptor *descr, + struct se_device *dev) { int td_size = 0; @@ -2045,6 +2072,8 @@ spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp, buf[1] = (ctdp << 7) | SCSI_SUPPORT_FULL; put_unaligned_be16(descr->cdb_size, &buf[2]); memcpy(&buf[4], descr->usage_bits, descr->cdb_size); + if (descr->update_usage_bits) + descr->update_usage_bits(&buf[4], dev); td_size = spc_rsoc_encode_command_timeouts_descriptor( &buf[4 + descr->cdb_size], ctdp, descr); @@ -2187,7 +2216,8 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd) put_unaligned_be32(response_length - 3, buf); } else { response_length = spc_rsoc_encode_one_command_descriptor( - &buf[response_length], rctd, descr); + &buf[response_length], rctd, descr, + cmd->se_dev); } memcpy(rbuf, buf, min_t(u32, response_length, cmd->data_length)); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 062ee8b6c433..0c1e43980985 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -880,6 +880,8 @@ struct target_opcode_descriptor { u16 nominal_timeout; u16 recommended_timeout; bool (*enabled)(struct se_cmd *cmd); + void (*update_usage_bits)(u8 *usage_bits, + struct se_device *dev); u8 usage_bits[]; }; From a301d487d7bde62de43671a1642f8f5a2e2cceef Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 24 Oct 2022 20:06:02 +0800 Subject: [PATCH 0477/4122] scsi: ufs: core: Print events for WLUN suspend and resume failures WLUN suspend and resume events are currently not handled by ufshcd_print_evt_hist(). Add the missing events. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20221024120602.30019-1-peter.wang@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Asutosh Das Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b2203dd79e8c..008fc60392fc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -486,6 +486,9 @@ static void ufshcd_print_evt_hist(struct ufs_hba *hba) ufshcd_print_evt(hba, UFS_EVT_RESUME_ERR, "resume_fail"); ufshcd_print_evt(hba, UFS_EVT_SUSPEND_ERR, "suspend_fail"); + ufshcd_print_evt(hba, UFS_EVT_WL_RES_ERR, "wlun resume_fail"); + ufshcd_print_evt(hba, UFS_EVT_WL_SUSP_ERR, + "wlun suspend_fail"); ufshcd_print_evt(hba, UFS_EVT_DEV_RESET, "dev_reset"); ufshcd_print_evt(hba, UFS_EVT_HOST_RESET, "host_reset"); ufshcd_print_evt(hba, UFS_EVT_ABORT, "task_abort"); From 25ad6f63e77eeafc3a9f17c92aadd66c56599fdc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2022 15:11:01 +0100 Subject: [PATCH 0478/4122] scsi: pcmcia: nsp_cs: Remove unused variable i Variable i is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221024141101.2161167-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pcmcia/nsp_cs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index 48acab03a8a0..a5a1406a2bde 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -450,8 +450,6 @@ static int nsp_analyze_sdtr(struct scsi_cmnd *SCpnt) sync_data *sync = &(data->Sync[target]); struct nsp_sync_table *sync_table; unsigned int period, offset; - int i; - nsp_dbg(NSP_DEBUG_SYNC, "in"); @@ -466,7 +464,7 @@ static int nsp_analyze_sdtr(struct scsi_cmnd *SCpnt) sync_table = nsp_sync_table_40M; } - for ( i = 0; sync_table->max_period != 0; i++, sync_table++) { + for (; sync_table->max_period != 0; sync_table++) { if ( period >= sync_table->min_period && period <= sync_table->max_period ) { break; From b43678ea5bbd92388339ecae47ed44955474f53b Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 26 Oct 2022 00:24:28 +0200 Subject: [PATCH 0479/4122] scsi: ufs: core: Revert "WB is only available on LUN #0 to #7" Ccommit d3d9c4570285 ("scsi: ufs: Fix memory corruption by ufshcd_read_desc_param()") has properly fixed stack overflow issue. As a result, commit a2fca52ee640 ("scsi: ufs: WB is only available on LUN #0 to #7") is no longer required. Revert it. Cc: Jaegeuk Kim Signed-off-by: Bean Huo Link: https://lore.kernel.org/r/20221025222430.277768-2-beanhuo@iokpp.de Reviewed-by: Arthur Simchaev Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 3 +-- drivers/ufs/core/ufshcd-priv.h | 6 +----- drivers/ufs/core/ufshcd.c | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 53aea56d1de1..eb6b278c4e79 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1234,8 +1234,7 @@ static ssize_t _pname##_show(struct device *dev, \ struct scsi_device *sdev = to_scsi_device(dev); \ struct ufs_hba *hba = shost_priv(sdev->host); \ u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun); \ - if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun, \ - _duname##_DESC_PARAM##_puname)) \ + if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun)) \ return -EINVAL; \ return ufs_sysfs_read_desc_param(hba, QUERY_DESC_IDN_##_duname, \ lun, _duname##_DESC_PARAM##_puname, buf, _size); \ diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index f68ca33f6ac7..a9e8e1f5afe7 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -293,16 +293,12 @@ static inline int ufshcd_rpm_put(struct ufs_hba *hba) * @lun: LU number to check * @return: true if the lun has a matching unit descriptor, false otherwise */ -static inline bool ufs_is_valid_unit_desc_lun(struct ufs_dev_info *dev_info, - u8 lun, u8 param_offset) +static inline bool ufs_is_valid_unit_desc_lun(struct ufs_dev_info *dev_info, u8 lun) { if (!dev_info || !dev_info->max_lu_supported) { pr_err("Max General LU supported by UFS isn't initialized\n"); return false; } - /* WB is available only for the logical unit from 0 to 7 */ - if (param_offset == UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS) - return lun < UFS_UPIU_MAX_WB_LUN_ID; return lun == UFS_UPIU_RPMB_WLUN || (lun < dev_info->max_lu_supported); } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 008fc60392fc..94ae0fb25358 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3608,7 +3608,7 @@ static inline int ufshcd_read_unit_desc_param(struct ufs_hba *hba, * Unit descriptors are only available for general purpose LUs (LUN id * from 0 to 7) and RPMB Well known LU. */ - if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun, param_offset)) + if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun)) return -EOPNOTSUPP; return ufshcd_read_desc_param(hba, QUERY_DESC_IDN_UNIT, lun, From dca899bc02231214e25cffd3014cc77018dae942 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 26 Oct 2022 00:24:29 +0200 Subject: [PATCH 0480/4122] scsi: ufs: core: Clean up ufshcd_slave_alloc() Combine ufshcd_get_lu_power_on_wp_status() and ufshcd_set_queue_depth() into one single ufshcd_lu_init(), so that we only need to read the LUN descriptor once. Signed-off-by: Bean Huo Link: https://lore.kernel.org/r/20221025222430.277768-3-beanhuo@iokpp.de Reviewed-by: Arthur Simchaev Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 150 ++++++++++++++------------------------ 1 file changed, 53 insertions(+), 97 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 94ae0fb25358..ee73d7036133 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4861,100 +4861,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba) return err; } -/** - * ufshcd_set_queue_depth - set lun queue depth - * @sdev: pointer to SCSI device - * - * Read bLUQueueDepth value and activate scsi tagged command - * queueing. For WLUN, queue depth is set to 1. For best-effort - * cases (bLUQueueDepth = 0) the queue depth is set to a maximum - * value that host can queue. - */ -static void ufshcd_set_queue_depth(struct scsi_device *sdev) -{ - int ret = 0; - u8 lun_qdepth; - struct ufs_hba *hba; - - hba = shost_priv(sdev->host); - - lun_qdepth = hba->nutrs; - ret = ufshcd_read_unit_desc_param(hba, - ufshcd_scsi_to_upiu_lun(sdev->lun), - UNIT_DESC_PARAM_LU_Q_DEPTH, - &lun_qdepth, - sizeof(lun_qdepth)); - - /* Some WLUN doesn't support unit descriptor */ - if (ret == -EOPNOTSUPP) - lun_qdepth = 1; - else if (!lun_qdepth) - /* eventually, we can figure out the real queue depth */ - lun_qdepth = hba->nutrs; - else - lun_qdepth = min_t(int, lun_qdepth, hba->nutrs); - - dev_dbg(hba->dev, "%s: activate tcq with queue depth %d\n", - __func__, lun_qdepth); - scsi_change_queue_depth(sdev, lun_qdepth); -} - -/* - * ufshcd_get_lu_wp - returns the "b_lu_write_protect" from UNIT DESCRIPTOR - * @hba: per-adapter instance - * @lun: UFS device lun id - * @b_lu_write_protect: pointer to buffer to hold the LU's write protect info - * - * Returns 0 in case of success and b_lu_write_protect status would be returned - * @b_lu_write_protect parameter. - * Returns -ENOTSUPP if reading b_lu_write_protect is not supported. - * Returns -EINVAL in case of invalid parameters passed to this function. - */ -static int ufshcd_get_lu_wp(struct ufs_hba *hba, - u8 lun, - u8 *b_lu_write_protect) -{ - int ret; - - if (!b_lu_write_protect) - ret = -EINVAL; - /* - * According to UFS device spec, RPMB LU can't be write - * protected so skip reading bLUWriteProtect parameter for - * it. For other W-LUs, UNIT DESCRIPTOR is not available. - */ - else if (lun >= hba->dev_info.max_lu_supported) - ret = -ENOTSUPP; - else - ret = ufshcd_read_unit_desc_param(hba, - lun, - UNIT_DESC_PARAM_LU_WR_PROTECT, - b_lu_write_protect, - sizeof(*b_lu_write_protect)); - return ret; -} - -/** - * ufshcd_get_lu_power_on_wp_status - get LU's power on write protect - * status - * @hba: per-adapter instance - * @sdev: pointer to SCSI device - * - */ -static inline void ufshcd_get_lu_power_on_wp_status(struct ufs_hba *hba, - const struct scsi_device *sdev) -{ - if (hba->dev_info.f_power_on_wp_en && - !hba->dev_info.is_lu_power_on_wp) { - u8 b_lu_write_protect; - - if (!ufshcd_get_lu_wp(hba, ufshcd_scsi_to_upiu_lun(sdev->lun), - &b_lu_write_protect) && - (b_lu_write_protect == UFS_LU_POWER_ON_WP)) - hba->dev_info.is_lu_power_on_wp = true; - } -} - /** * ufshcd_setup_links - associate link b/w device wlun and other luns * @sdev: pointer to SCSI device @@ -4992,6 +4898,58 @@ static void ufshcd_setup_links(struct ufs_hba *hba, struct scsi_device *sdev) } } +/** + * ufshcd_lu_init - Initialize the relevant parameters of the LU + * @hba: per-adapter instance + * @sdev: pointer to SCSI device + */ +static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev) +{ + int len = hba->desc_size[QUERY_DESC_IDN_UNIT]; + u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun); + u8 lun_qdepth = hba->nutrs; + u8 *desc_buf; + int ret; + + desc_buf = kzalloc(len, GFP_KERNEL); + if (!desc_buf) + goto set_qdepth; + + ret = ufshcd_read_unit_desc_param(hba, lun, 0, desc_buf, len); + if (ret < 0) { + if (ret == -EOPNOTSUPP) + /* If LU doesn't support unit descriptor, its queue depth is set to 1 */ + lun_qdepth = 1; + kfree(desc_buf); + goto set_qdepth; + } + + if (desc_buf[UNIT_DESC_PARAM_LU_Q_DEPTH]) { + /* + * In per-LU queueing architecture, bLUQueueDepth will not be 0, then we will + * use the smaller between UFSHCI CAP.NUTRS and UFS LU bLUQueueDepth + */ + lun_qdepth = min_t(int, desc_buf[UNIT_DESC_PARAM_LU_Q_DEPTH], hba->nutrs); + } + /* + * According to UFS device specification, the write protection mode is only supported by + * normal LU, not supported by WLUN. + */ + if (hba->dev_info.f_power_on_wp_en && lun < hba->dev_info.max_lu_supported && + !hba->dev_info.is_lu_power_on_wp && + desc_buf[UNIT_DESC_PARAM_LU_WR_PROTECT] == UFS_LU_POWER_ON_WP) + hba->dev_info.is_lu_power_on_wp = true; + + kfree(desc_buf); +set_qdepth: + /* + * For WLUNs that don't support unit descriptor, queue depth is set to 1. For LUs whose + * bLUQueueDepth == 0, the queue depth is set to a maximum value that host can queue. + */ + dev_dbg(hba->dev, "Set LU %x queue depth %d\n", lun, lun_qdepth); + scsi_change_queue_depth(sdev, lun_qdepth); +} + /** * ufshcd_slave_alloc - handle initial SCSI device configurations * @sdev: pointer to SCSI device @@ -5019,9 +4977,7 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) /* WRITE_SAME command is not supported */ sdev->no_write_same = 1; - ufshcd_set_queue_depth(sdev); - - ufshcd_get_lu_power_on_wp_status(hba, sdev); + ufshcd_lu_init(hba, sdev); ufshcd_setup_links(hba, sdev); From 9d266e792b0fb4c25448dc240a808667e0932ef2 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 26 Oct 2022 00:24:30 +0200 Subject: [PATCH 0481/4122] scsi: ufs: core: Use is_visible to control UFS unit descriptor sysfs nodes UFS Boot and Device W-LUs do not have unit descriptors and RPMB does not support WB. Use is_visible() to control which nodes are visible and which are not. Signed-off-by: Bean Huo Link: https://lore.kernel.org/r/20221025222430.277768-4-beanhuo@iokpp.de Reviewed-by: Bart Van Assche Reviewed-by: Arthur Simchaev Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index eb6b278c4e79..883f0e44b54e 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1285,9 +1285,27 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { NULL, }; +static umode_t ufs_unit_descriptor_is_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct scsi_device *sdev = to_scsi_device(dev); + u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun); + umode_t mode = attr->mode; + + if (lun == UFS_UPIU_BOOT_WLUN || lun == UFS_UPIU_UFS_DEVICE_WLUN) + /* Boot and device WLUN have no unit descriptors */ + mode = 0; + if (lun == UFS_UPIU_RPMB_WLUN && attr == &dev_attr_wb_buf_alloc_units.attr) + mode = 0; + + return mode; +} + + const struct attribute_group ufs_sysfs_unit_descriptor_group = { .name = "unit_descriptor", .attrs = ufs_sysfs_unit_descriptor, + .is_visible = ufs_unit_descriptor_is_visible, }; static ssize_t dyn_cap_needed_attribute_show(struct device *dev, From 4481bdc677c1aa9b0138ee9234c8c2d14142b42e Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 26 Oct 2022 18:56:04 +0800 Subject: [PATCH 0482/4122] scsi: pm8001: Drop !task check in pm8001_abort_task() In commit 0b639decf651 ("scsi: pm8001: Modify task abort handling for SATA task"), code was introduced to dereference "task" pointer in pm8001_abort_task(). However there was a pre-existing later check for "!task", which spooked the kernel test robot. Function pm8001_abort_task() should never be passed NULL for "task" pointer, so remove that check. Also remove the "unlikely" hint, as this is not fastpath code. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666781764-123090-1-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 2359e827c9e6..e5673c774f66 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -979,7 +979,7 @@ int pm8001_abort_task(struct sas_task *task) u32 phy_id, port_id; struct sas_task_slow slow_task; - if (unlikely(!task || !task->lldd_task || !task->dev)) + if (!task->lldd_task || !task->dev) return TMF_RESP_FUNC_FAILED; dev = task->dev; From e6629dcb00adeebcfeaee45b1c987a84eb3ce1ba Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 26 Oct 2022 19:33:28 +0800 Subject: [PATCH 0483/4122] scsi: MAINTAINERS: Make Xiang Chen HiSilicon SAS controller driver maintainer I am soon leaving Huawei, so will no longer maintain this driver. However I will stay active in upstream Linux storage domain. Xiang Chen has worked on the driver for as long as I have and has good knowledge of the driver, so should do a good job. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1666784008-125519-1-git-send-email-john.garry@huawei.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..174de5e8d4c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9290,7 +9290,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: John Garry +M: Xiang Chen S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt From f7ec74c14f24d78f054efd4ddbda0b4a174cf39f Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Wed, 26 Oct 2022 20:45:41 +0530 Subject: [PATCH 0484/4122] dt-bindings: gpio: pca9570: Add compatible for slg7xl45106 This patch adds compatible string for the SLG7XL45106, I2C GPO expander. Acked-by: Krzysztof Kozlowski Signed-off-by: Shubhrajyoti Datta Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml index 1acaa0a3d35a..48bf414aa50e 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml @@ -12,6 +12,7 @@ maintainers: properties: compatible: enum: + - dlg,slg7xl45106 - nxp,pca9570 - nxp,pca9571 From b8a34582c7f7f22f82852f9d3cc192e050f892fd Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Wed, 26 Oct 2022 20:45:42 +0530 Subject: [PATCH 0485/4122] gpio: pca9570: add a platform data structure Add struct pca9570_platform_data for adding the platform data structure. Also modify the existing structs for pca9570 and pca9571 Signed-off-by: Shubhrajyoti Datta Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca9570.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c index ab2a652964ec..e8c2ddb1bcd8 100644 --- a/drivers/gpio/gpio-pca9570.c +++ b/drivers/gpio/gpio-pca9570.c @@ -15,14 +15,26 @@ #include #include +/** + * struct pca9570_platform_data - GPIO platformdata + * @ngpio: no of gpios + * @command: Command to be sent + */ +struct pca9570_platform_data { + u16 ngpio; + u32 command; +}; + /** * struct pca9570 - GPIO driver data * @chip: GPIO controller chip + * @p_data: GPIO controller platform data * @lock: Protects write sequences * @out: Buffer for device register */ struct pca9570 { struct gpio_chip chip; + const struct pca9570_platform_data *p_data; struct mutex lock; u8 out; }; @@ -106,7 +118,8 @@ static int pca9570_probe(struct i2c_client *client) gpio->chip.get = pca9570_get; gpio->chip.set = pca9570_set; gpio->chip.base = -1; - gpio->chip.ngpio = (uintptr_t)device_get_match_data(&client->dev); + gpio->p_data = device_get_match_data(&client->dev); + gpio->chip.ngpio = gpio->p_data->ngpio; gpio->chip.can_sleep = true; mutex_init(&gpio->lock); @@ -119,16 +132,24 @@ static int pca9570_probe(struct i2c_client *client) return devm_gpiochip_add_data(&client->dev, &gpio->chip, gpio); } +static const struct pca9570_platform_data pca9570_gpio = { + .ngpio = 4, +}; + +static const struct pca9570_platform_data pca9571_gpio = { + .ngpio = 8, +}; + static const struct i2c_device_id pca9570_id_table[] = { - { "pca9570", 4 }, - { "pca9571", 8 }, + { "pca9570", (kernel_ulong_t)&pca9570_gpio}, + { "pca9571", (kernel_ulong_t)&pca9571_gpio }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, pca9570_id_table); static const struct of_device_id pca9570_of_match_table[] = { - { .compatible = "nxp,pca9570", .data = (void *)4 }, - { .compatible = "nxp,pca9571", .data = (void *)8 }, + { .compatible = "nxp,pca9570", .data = &pca9570_gpio }, + { .compatible = "nxp,pca9571", .data = &pca9571_gpio }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, pca9570_of_match_table); From fbb19fe17eaef7b6ba2e68dbf0600a97060f2909 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Wed, 26 Oct 2022 20:45:43 +0530 Subject: [PATCH 0486/4122] gpio: pca9570: add slg7xl45106 support Dialog semiconductors SLG7XL45106 is an 8-bit I2C GPO expander. The output port is controlled by a data byte with register address. Add a compatible string for the same. Also update the driver to write and read from it. Reviewed-by: Linus Walleij Signed-off-by: Shubhrajyoti Datta Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca9570.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c index e8c2ddb1bcd8..6c07a8811a7a 100644 --- a/drivers/gpio/gpio-pca9570.c +++ b/drivers/gpio/gpio-pca9570.c @@ -15,6 +15,8 @@ #include #include +#define SLG7XL45106_GPO_REG 0xDB + /** * struct pca9570_platform_data - GPIO platformdata * @ngpio: no of gpios @@ -44,7 +46,11 @@ static int pca9570_read(struct pca9570 *gpio, u8 *value) struct i2c_client *client = to_i2c_client(gpio->chip.parent); int ret; - ret = i2c_smbus_read_byte(client); + if (gpio->p_data->command != 0) + ret = i2c_smbus_read_byte_data(client, gpio->p_data->command); + else + ret = i2c_smbus_read_byte(client); + if (ret < 0) return ret; @@ -56,6 +62,9 @@ static int pca9570_write(struct pca9570 *gpio, u8 value) { struct i2c_client *client = to_i2c_client(gpio->chip.parent); + if (gpio->p_data->command != 0) + return i2c_smbus_write_byte_data(client, gpio->p_data->command, value); + return i2c_smbus_write_byte(client, value); } @@ -140,14 +149,21 @@ static const struct pca9570_platform_data pca9571_gpio = { .ngpio = 8, }; +static const struct pca9570_platform_data slg7xl45106_gpio = { + .ngpio = 8, + .command = SLG7XL45106_GPO_REG, +}; + static const struct i2c_device_id pca9570_id_table[] = { { "pca9570", (kernel_ulong_t)&pca9570_gpio}, { "pca9571", (kernel_ulong_t)&pca9571_gpio }, + { "slg7xl45106", (kernel_ulong_t)&slg7xl45106_gpio }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, pca9570_id_table); static const struct of_device_id pca9570_of_match_table[] = { + { .compatible = "dlg,slg7xl45106", .data = &slg7xl45106_gpio}, { .compatible = "nxp,pca9570", .data = &pca9570_gpio }, { .compatible = "nxp,pca9571", .data = &pca9571_gpio }, { /* sentinel */ } From ba4ff1cb6cac8acca928ea41588cf84b18ffdedb Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 25 Oct 2022 01:19:08 -0700 Subject: [PATCH 0487/4122] dt-bindings: PCI: ti,j721e-pci-host: add interrupt controller definition Add missing 'interrupt-controller' property and related subnodes to resolve the following warning: arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dtb: pcie@2910000: Unevaluated properties are not allowed ('interrupt-controller' was unexpected) From schema: Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml Link: https://lore.kernel.org/r/20221025081909.404107-2-mranostay@ti.com Signed-off-by: Matt Ranostay Signed-off-by: Lorenzo Pieralisi Acked-by: Rob Herring --- .../devicetree/bindings/pci/ti,j721e-pci-host.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml index 2115d5a3f0e1..0f5914a22c14 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml @@ -76,6 +76,19 @@ properties: msi-map: true + interrupt-controller: + type: object + additionalProperties: false + + properties: + interrupt-controller: true + + '#interrupt-cells': + const: 1 + + interrupts: + maxItems: 1 + required: - compatible - reg From 598418e6035622c0dc735764f0f1b7293c0c7d48 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 25 Oct 2022 01:19:09 -0700 Subject: [PATCH 0488/4122] dt-bindings: PCI: ti,j721e-pci-*: Add missing interrupt properties Both interrupts, and interrupt names weren't defined in both EP and host yaml. Also define the only possible interrupt-name as link_state, and maxItems of interrupts to one. This patch resolves the following warning: arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dtb: pcie-ep@2910000: Unevaluated properties are not allowed ('interrupt-names', 'interrupts' were unexpected) From schema Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml Link: https://lore.kernel.org/r/20221025081909.404107-3-mranostay@ti.com Signed-off-by: Matt Ranostay Signed-off-by: Lorenzo Pieralisi Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml | 7 +++++++ .../devicetree/bindings/pci/ti,j721e-pci-host.yaml | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml index aed437dac363..10e6eabdff53 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml @@ -58,6 +58,13 @@ properties: dma-coherent: description: Indicates that the PCIe IP block can ensure the coherency + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: link_state + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml index 0f5914a22c14..d9df7cd922f1 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml @@ -76,6 +76,13 @@ properties: msi-map: true + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: link_state + interrupt-controller: type: object additionalProperties: false From 66110361281b2f7da0c8bd51eaf1f152f4236035 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 26 Sep 2022 16:49:23 +0530 Subject: [PATCH 0489/4122] PCI: dwc: Fix n_fts[] array overrun commit aeaa0bfe89654 ("PCI: dwc: Move N_FTS setup to common setup") incorrectly uses pci->link_gen in deriving the index to the n_fts[] array also introducing the issue of accessing beyond the boundaries of array for greater than Gen-2 speeds. This change fixes that issue. Link: https://lore.kernel.org/r/20220926111923.22487-1-vidyas@nvidia.com Fixes: aeaa0bfe8965 ("PCI: dwc: Move N_FTS setup to common setup") Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Jingoo Han --- drivers/pci/controller/dwc/pcie-designware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index c6725c519a47..9e4d96e5a3f5 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -641,7 +641,7 @@ void dw_pcie_setup(struct dw_pcie *pci) if (pci->n_fts[1]) { val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); val &= ~PORT_LOGIC_N_FTS_MASK; - val |= pci->n_fts[pci->link_gen - 1]; + val |= pci->n_fts[1]; dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); } From 4508d32ccced24c972bc4592104513e1ff8439b5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 25 Oct 2022 10:37:13 +0300 Subject: [PATCH 0490/4122] RDMA/core: Fix order of nldev_exit call Create symmetrical exit flow by calling to nldev_exit() after call to rdma_nl_unregister(RDMA_NL_LS). Fixes: 6c80b41abe22 ("RDMA/netlink: Add nldev initialization flows") Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/64e676774a53a406f4cde265d5a4cfd6b8e97df9.1666683334.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ae60c73babcc..3409c55ea88b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2843,8 +2843,8 @@ err: static void __exit ib_core_cleanup(void) { roce_gid_mgmt_cleanup(); - nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); unregister_pernet_device(&rdma_dev_net_ops); unregister_blocking_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); From e32e1e26c4098d8a866ce09fd26d8004da4ddf9e Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 19 Sep 2022 20:03:39 +0530 Subject: [PATCH 0491/4122] PCI: Add PCI_PTM_CAP_RES macro Add macro defining Responder capable bit in Precision Time Measurement capability register. Link: https://lore.kernel.org/r/20220919143340.4527-2-vidyas@nvidia.com Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Reviewed-by: Jingoo Han --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd..1c3591c8e09e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1058,6 +1058,7 @@ /* Precision Time Measurement */ #define PCI_PTM_CAP 0x04 /* PTM Capability */ #define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_RES 0x00000002 /* Responder capable */ #define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ #define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ #define PCI_PTM_CTRL 0x08 /* PTM Control */ From 442ae919e6ca77354551a7b8717746b44272e274 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 19 Sep 2022 20:03:40 +0530 Subject: [PATCH 0492/4122] PCI: designware-ep: Disable PTM capabilities for EP mode Dual mode DesignWare PCIe IP has PTM capability enabled (if supported) even in the EP mode. The PCIe compliance for the EP mode expects PTM capabilities (ROOT_CAPABLE, RES_CAPABLE, CLK_GRAN) be disabled. Hence disable PTM for the EP mode. Link: https://lore.kernel.org/r/20220919143340.4527-3-vidyas@nvidia.com Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Acked-by: Jingoo Han --- .../pci/controller/dwc/pcie-designware-ep.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 83ddb190292e..efc6c6360e28 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -643,7 +643,7 @@ static unsigned int dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap) int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) { struct dw_pcie *pci = to_dw_pcie_from_ep(ep); - unsigned int offset; + unsigned int offset, ptm_cap_base; unsigned int nbars; u8 hdr_type; u32 reg; @@ -659,6 +659,7 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) } offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR); + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM); dw_pcie_dbi_ro_wr_en(pci); @@ -671,6 +672,22 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); } + /* + * PTM responder capability can be disabled only after disabling + * PTM root capability. + */ + if (ptm_cap_base) { + dw_pcie_dbi_ro_wr_en(pci); + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); + reg &= ~PCI_PTM_CAP_ROOT; + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); + + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK); + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); + dw_pcie_dbi_ro_wr_dis(pci); + } + dw_pcie_setup(pci); dw_pcie_dbi_ro_wr_dis(pci); From 7711cbb4862aa00909a248f011ba3fa578bd1cf3 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 23 Jun 2022 09:38:17 +0900 Subject: [PATCH 0493/4122] PCI: endpoint: Fix WARN() when an endpoint driver is removed Since there is no release callback defined for the PCI EPC device, the below warning is thrown by driver core when a PCI endpoint driver is removed: Device 'e65d0000.pcie-ep' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst. WARNING: CPU: 0 PID: 139 at drivers/base/core.c:2232 device_release+0x78/0x8c Hence, add the release callback and also move the kfree(epc) from pci_epc_destroy() so that the epc memory is freed when all references are dropped. Link: https://lore.kernel.org/r/20220623003817.298173-1-yoshihiro.shimoda.uh@renesas.com Tested-by: Vidya Sagar Signed-off-by: Yoshihiro Shimoda Signed-off-by: Lorenzo Pieralisi Reviewed-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 3bc9273d0a08..2542196e8c3d 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -724,7 +724,6 @@ void pci_epc_destroy(struct pci_epc *epc) { pci_ep_cfs_remove_epc_group(epc->group); device_unregister(&epc->dev); - kfree(epc); } EXPORT_SYMBOL_GPL(pci_epc_destroy); @@ -746,6 +745,11 @@ void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc) } EXPORT_SYMBOL_GPL(devm_pci_epc_destroy); +static void pci_epc_release(struct device *dev) +{ + kfree(to_pci_epc(dev)); +} + /** * __pci_epc_create() - create a new endpoint controller (EPC) device * @dev: device that is creating the new EPC @@ -779,6 +783,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, device_initialize(&epc->dev); epc->dev.class = pci_epc_class; epc->dev.parent = dev; + epc->dev.release = pci_epc_release; epc->ops = ops; ret = dev_set_name(&epc->dev, "%s", dev_name(dev)); From 16e3f40779659ff525364e5d9df369953fa7192b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Sep 2022 23:30:53 -0700 Subject: [PATCH 0494/4122] PCI: tegra: Switch to using devm_fwnode_gpiod_get [devm_]gpiod_get_from_of_node in drivers usage should be limited so that gpiolib can be cleaned up; let's switch to the generic device property API. It may even help with handling secondary fwnodes when gpiolib is taught to handle gpios described by swnodes. Link: https://lore.kernel.org/r/20220903-gpiod_get_from_of_node-remove-v1-1-b29adfb27a6c@gmail.com Signed-off-by: Dmitry Torokhov [lpieralisi@kernel.org: commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Linus Walleij --- drivers/pci/controller/pci-tegra.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index 24478ae5a345..b6f77b102709 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2197,10 +2197,11 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) * and in this case fall back to using AFI per port register * to toggle PERST# SFIO line. */ - rp->reset_gpio = devm_gpiod_get_from_of_node(dev, port, - "reset-gpios", 0, - GPIOD_OUT_LOW, - label); + rp->reset_gpio = devm_fwnode_gpiod_get(dev, + of_fwnode_handle(port), + "reset", + GPIOD_OUT_LOW, + label); if (IS_ERR(rp->reset_gpio)) { if (PTR_ERR(rp->reset_gpio) == -ENOENT) { rp->reset_gpio = NULL; From 6acd25cc98ce0c9ee4fefdaf44fc8bca534b26e5 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Thu, 25 Aug 2022 18:01:01 +0900 Subject: [PATCH 0495/4122] PCI: pci-epf-test: Register notifier if only core_init_notifier is enabled The pci_epf_test_notifier function should be installed also if only core_init_notifier is enabled. Fix the current logic. Link: https://lore.kernel.org/r/20220825090101.20474-1-hayashi.kunihiko@socionext.com Fixes: 5e50ee27d4a5 ("PCI: pci-epf-test: Add support to defer core initialization") Signed-off-by: Kunihiko Hayashi Signed-off-by: Lorenzo Pieralisi Acked-by: Om Prakash Singh Acked-by: Kishon Vijay Abraham I --- drivers/pci/endpoint/functions/pci-epf-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 36b1801a061b..55283d2379a6 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -979,7 +979,7 @@ static int pci_epf_test_bind(struct pci_epf *epf) if (ret) epf_test->dma_supported = false; - if (linkup_notifier) { + if (linkup_notifier || core_init_notifier) { epf->nb.notifier_call = pci_epf_test_notifier; pci_epc_register_notifier(epc, &epf->nb); } else { From 25c2e5911705c69d7b01c35ea0b32cbe6ada67cd Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:38 +0300 Subject: [PATCH 0496/4122] perf tools riscv: Add support for get_cpuid_str function The get_cpuid_str function returns the string that contains values of MVENDORID, MARCHID and MIMPID in hex format separated by coma. The values themselves are taken from first cpu entry in "/proc/cpuid" that contains "mvendorid", "marchid" and "mimpid". Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-2-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/riscv/util/Build | 1 + tools/perf/arch/riscv/util/header.c | 104 ++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 tools/perf/arch/riscv/util/header.c diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 7d3050134ae0..603dbb5ae4dc 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,4 +1,5 @@ perf-y += perf_regs.o +perf-y += header.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c new file mode 100644 index 000000000000..4a41856938a8 --- /dev/null +++ b/tools/perf/arch/riscv/util/header.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin + */ + +#include +#include +#include +#include +#include "../../util/debug.h" +#include "../../util/header.h" + +#define CPUINFO_MVEN "mvendorid" +#define CPUINFO_MARCH "marchid" +#define CPUINFO_MIMP "mimpid" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + char *line = NULL; + char *mvendorid = NULL; + char *marchid = NULL; + char *mimpid = NULL; + char *cpuid = NULL; + int read; + unsigned long line_sz; + FILE *cpuinfo; + + cpuinfo = fopen(CPUINFO, "r"); + if (cpuinfo == NULL) + return cpuid; + + while ((read = getline(&line, &line_sz, cpuinfo)) != -1) { + if (!strncmp(line, CPUINFO_MVEN, strlen(CPUINFO_MVEN))) { + mvendorid = _get_field(line); + if (!mvendorid) + goto free; + } else if (!strncmp(line, CPUINFO_MARCH, strlen(CPUINFO_MARCH))) { + marchid = _get_field(line); + if (!marchid) + goto free; + } else if (!strncmp(line, CPUINFO_MIMP, strlen(CPUINFO_MIMP))) { + mimpid = _get_field(line); + if (!mimpid) + goto free; + + break; + } + } + + if (!mvendorid || !marchid || !mimpid) + goto free; + + if (asprintf(&cpuid, "%s-%s-%s", mvendorid, marchid, mimpid) < 0) + cpuid = NULL; + +free: + fclose(cpuinfo); + free(mvendorid); + free(marchid); + free(mimpid); + + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + char *cpuid = _get_cpuid(); + int ret = 0; + + if (sz < strlen(cpuid)) { + ret = -EINVAL; + goto free; + } + + scnprintf(buffer, sz, "%s", cpuid); +free: + free(cpuid); + return ret; +} + +char * +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} From 8f0dcb4e7364af3e5b156e6a3fdac0860733eb86 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:39 +0300 Subject: [PATCH 0497/4122] perf arch events: riscv sbi firmware std event files Firmware events are defined by "RISC-V Supervisor Binary Interface Specification", which means they should be always available as long as firmware supports >= 0.3.0 SBI. Expose them to arch std events, so they can be reused by particular PMU bindings. Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-3-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/riscv/riscv-sbi-firmware.json | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json new file mode 100644 index 000000000000..a9939823b14b --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -0,0 +1,134 @@ +[ + { + "PublicDescription": "Misaligned load trap", + "ConfigCode": "0x8000000000000000", + "EventName": "FW_MISALIGNED_LOAD", + "BriefDescription": "Misaligned load trap event" + }, + { + "PublicDescription": "Misaligned store trap", + "ConfigCode": "0x8000000000000001", + "EventName": "FW_MISALIGNED_STORE", + "BriefDescription": "Misaligned store trap event" + }, + { + "PublicDescription": "Load access trap", + "ConfigCode": "0x8000000000000002", + "EventName": "FW_ACCESS_LOAD", + "BriefDescription": "Load access trap event" + }, + { + "PublicDescription": "Store access trap", + "ConfigCode": "0x8000000000000003", + "EventName": "FW_ACCESS_STORE", + "BriefDescription": "Store access trap event" + }, + { + "PublicDescription": "Illegal instruction trap", + "ConfigCode": "0x8000000000000004", + "EventName": "FW_ILLEGAL_INSN", + "BriefDescription": "Illegal instruction trap event" + }, + { + "PublicDescription": "Set timer event", + "ConfigCode": "0x8000000000000005", + "EventName": "FW_SET_TIMER", + "BriefDescription": "Set timer event" + }, + { + "PublicDescription": "Sent IPI to other HART event", + "ConfigCode": "0x8000000000000006", + "EventName": "FW_IPI_SENT", + "BriefDescription": "Sent IPI to other HART event" + }, + { + "PublicDescription": "Received IPI from other HART event", + "ConfigCode": "0x8000000000000007", + "EventName": "FW_IPI_RECEIVED", + "BriefDescription": "Received IPI from other HART event" + }, + { + "PublicDescription": "Sent FENCE.I request to other HART event", + "ConfigCode": "0x8000000000000008", + "EventName": "FW_FENCE_I_SENT", + "BriefDescription": "Sent FENCE.I request to other HART event" + }, + { + "PublicDescription": "Received FENCE.I request from other HART event", + "ConfigCode": "0x8000000000000009", + "EventName": "FW_FENCE_I_RECEIVED", + "BriefDescription": "Received FENCE.I request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA request to other HART event", + "ConfigCode": "0x800000000000000a", + "EventName": "FW_SFENCE_VMA_SENT", + "BriefDescription": "Sent SFENCE.VMA request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA request from other HART event", + "ConfigCode": "0x800000000000000b", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Received SFENCE.VMA request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", + "ConfigCode": "0x800000000000000c", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA with ASID request from other HART event", + "ConfigCode": "0x800000000000000d", + "EventName": "FW_SFENCE_VMA_ASID_RECEIVED", + "BriefDescription": "Received SFENCE.VMA with ASID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA request to other HART event", + "ConfigCode": "0x800000000000000e", + "EventName": "FW_HFENCE_GVMA_SENT", + "BriefDescription": "Sent HFENCE.GVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA request from other HART event", + "ConfigCode": "0x800000000000000f", + "EventName": "FW_HFENCE_GVMA_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA with VMID request to other HART event", + "ConfigCode": "0x8000000000000010", + "EventName": "FW_HFENCE_GVMA_VMID_SENT", + "BriefDescription": "Sent HFENCE.GVMA with VMID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA with VMID request from other HART event", + "ConfigCode": "0x8000000000000011", + "EventName": "FW_HFENCE_GVMA_VMID_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA with VMID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA request to other HART event", + "ConfigCode": "0x8000000000000012", + "EventName": "FW_HFENCE_VVMA_SENT", + "BriefDescription": "Sent HFENCE.VVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA request from other HART event", + "ConfigCode": "0x8000000000000013", + "EventName": "FW_HFENCE_VVMA_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA with ASID request to other HART event", + "ConfigCode": "0x8000000000000014", + "EventName": "FW_HFENCE_VVMA_ASID_SENT", + "BriefDescription": "Sent HFENCE.VVMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA with ASID request from other HART event", + "ConfigCode": "0x8000000000000015", + "EventName": "FW_HFENCE_VVMA_ASID_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA with ASID request from other HART event" + } +] From c4f769d4093d114f3c374ba06d6eef1fb763b56c Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:40 +0300 Subject: [PATCH 0498/4122] perf vendor events riscv: add Sifive U74 JSON file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch add the Sifive U74 JSON file. Link: https://sifive.cdn.prismic.io/sifive/ad5577a0-9a00-45c9-a5d0-424a3d586060_u74_core_complex_manual_21G3.pdf Derived-from-code-by: João Mário Domingos Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-4-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/riscv/mapfile.csv | 17 ++++ .../arch/riscv/sifive/u74/firmware.json | 68 ++++++++++++++ .../arch/riscv/sifive/u74/instructions.json | 92 +++++++++++++++++++ .../arch/riscv/sifive/u74/memory.json | 32 +++++++ .../arch/riscv/sifive/u74/microarch.json | 57 ++++++++++++ 5 files changed, 266 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv new file mode 100644 index 000000000000..c61b3d6ef616 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -0,0 +1,17 @@ +# Format: +# MVENDORID-MARCHID-MIMPID,Version,JSON/file/pathname,Type +# +# where +# MVENDORID JEDEC code of the core provider +# MARCHID base microarchitecture of the hart +# MIMPID unique encoding of the version +# of the processor implementation +# Version could be used to track version of JSON file +# but currently unused. +# JSON/file/pathname is the path to JSON file, relative +# to tools/perf/pmu-events/arch/riscv/. +# Type is core, uncore etc +# +# +#MVENDORID-MARCHID-MIMPID,Version,Filename,EventType +0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json new file mode 100644 index 000000000000..5eab718c9256 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json @@ -0,0 +1,92 @@ +[ + { + "EventName": "EXCEPTION_TAKEN", + "EventCode": "0x0000100", + "BriefDescription": "Exception taken" + }, + { + "EventName": "INTEGER_LOAD_RETIRED", + "EventCode": "0x0000200", + "BriefDescription": "Integer load instruction retired" + }, + { + "EventName": "INTEGER_STORE_RETIRED", + "EventCode": "0x0000400", + "BriefDescription": "Integer store instruction retired" + }, + { + "EventName": "ATOMIC_MEMORY_RETIRED", + "EventCode": "0x0000800", + "BriefDescription": "Atomic memory operation retired" + }, + { + "EventName": "SYSTEM_INSTRUCTION_RETIRED", + "EventCode": "0x0001000", + "BriefDescription": "System instruction retired" + }, + { + "EventName": "INTEGER_ARITHMETIC_RETIRED", + "EventCode": "0x0002000", + "BriefDescription": "Integer arithmetic instruction retired" + }, + { + "EventName": "CONDITIONAL_BRANCH_RETIRED", + "EventCode": "0x0004000", + "BriefDescription": "Conditional branch retired" + }, + { + "EventName": "JAL_INSTRUCTION_RETIRED", + "EventCode": "0x0008000", + "BriefDescription": "JAL instruction retired" + }, + { + "EventName": "JALR_INSTRUCTION_RETIRED", + "EventCode": "0x0010000", + "BriefDescription": "JALR instruction retired" + }, + { + "EventName": "INTEGER_MULTIPLICATION_RETIRED", + "EventCode": "0x0020000", + "BriefDescription": "Integer multiplication instruction retired" + }, + { + "EventName": "INTEGER_DIVISION_RETIRED", + "EventCode": "0x0040000", + "BriefDescription": "Integer division instruction retired" + }, + { + "EventName": "FP_LOAD_RETIRED", + "EventCode": "0x0080000", + "BriefDescription": "Floating-point load instruction retired" + }, + { + "EventName": "FP_STORE_RETIRED", + "EventCode": "0x0100000", + "BriefDescription": "Floating-point store instruction retired" + }, + { + "EventName": "FP_ADDITION_RETIRED", + "EventCode": "0x0200000", + "BriefDescription": "Floating-point addition retired" + }, + { + "EventName": "FP_MULTIPLICATION_RETIRED", + "EventCode": "0x0400000", + "BriefDescription": "Floating-point multiplication retired" + }, + { + "EventName": "FP_FUSEDMADD_RETIRED", + "EventCode": "0x0800000", + "BriefDescription": "Floating-point fused multiply-add retired" + }, + { + "EventName": "FP_DIV_SQRT_RETIRED", + "EventCode": "0x1000000", + "BriefDescription": "Floating-point division or square-root retired" + }, + { + "EventName": "OTHER_FP_RETIRED", + "EventCode": "0x2000000", + "BriefDescription": "Other floating-point instruction retired" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json new file mode 100644 index 000000000000..be1a46312ac3 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json @@ -0,0 +1,32 @@ +[ + { + "EventName": "ICACHE_RETIRED", + "EventCode": "0x0000102", + "BriefDescription": "Instruction cache miss" + }, + { + "EventName": "DCACHE_MISS_MMIO_ACCESSES", + "EventCode": "0x0000202", + "BriefDescription": "Data cache miss or memory-mapped I/O access" + }, + { + "EventName": "DCACHE_WRITEBACK", + "EventCode": "0x0000402", + "BriefDescription": "Data cache write-back" + }, + { + "EventName": "INST_TLB_MISS", + "EventCode": "0x0000802", + "BriefDescription": "Instruction TLB miss" + }, + { + "EventName": "DATA_TLB_MISS", + "EventCode": "0x0001002", + "BriefDescription": "Data TLB miss" + }, + { + "EventName": "UTLB_MISS", + "EventCode": "0x0002002", + "BriefDescription": "UTLB miss" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json new file mode 100644 index 000000000000..50ffa55418cb --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json @@ -0,0 +1,57 @@ +[ + { + "EventName": "ADDRESSGEN_INTERLOCK", + "EventCode": "0x0000101", + "BriefDescription": "Address-generation interlock" + }, + { + "EventName": "LONGLAT_INTERLOCK", + "EventCode": "0x0000201", + "BriefDescription": "Long-latency interlock" + }, + { + "EventName": "CSR_READ_INTERLOCK", + "EventCode": "0x0000401", + "BriefDescription": "CSR read interlock" + }, + { + "EventName": "ICACHE_ITIM_BUSY", + "EventCode": "0x0000801", + "BriefDescription": "Instruction cache/ITIM busy" + }, + { + "EventName": "DCACHE_DTIM_BUSY", + "EventCode": "0x0001001", + "BriefDescription": "Data cache/DTIM busy" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0002001", + "BriefDescription": "Branch direction misprediction" + }, + { + "EventName": "BRANCH_TARGET_MISPREDICTION", + "EventCode": "0x0004001", + "BriefDescription": "Branch/jump target misprediction" + }, + { + "EventName": "PIPE_FLUSH_CSR_WRITE", + "EventCode": "0x0008001", + "BriefDescription": "Pipeline flush from CSR write" + }, + { + "EventName": "PIPE_FLUSH_OTHER_EVENT", + "EventCode": "0x0010001", + "BriefDescription": "Pipeline flush from other event" + }, + { + "EventName": "INTEGER_MULTIPLICATION_INTERLOCK", + "EventCode": "0x0020001", + "BriefDescription": "Integer multiplication interlock" + }, + { + "EventName": "FP_INTERLOCK", + "EventCode": "0x0040001", + "BriefDescription": "Floating-point interlock" + } +] \ No newline at end of file From f7400262ea2192fc79b6f5a68242b3495b016981 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:08 -0700 Subject: [PATCH 0499/4122] perf tools: Save evsel->pmu in parse_events() Now evsel has a pmu pointer, let's save the info and use it like in evsel__find_pmu(). The missing feature check needs to be changed as the pmu pointer can be set from the beginning. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 20 ++++++++++---------- tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 4 ++++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde3507..b7140beca970 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -467,6 +467,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; + evsel->pmu = orig->pmu; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; @@ -1966,17 +1967,16 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && - (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { - if (evsel->pmu == NULL) { + } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { + if (evsel->pmu == NULL) evsel->pmu = evsel__find_pmu(evsel); - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } + + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; } if (evsel->exclude_GH) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5973f46c2375..6502cd679f57 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; + evsel->pmu = pmu; if (name) evsel->name = strdup(name); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 03284059175f..6a86e6af0903 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1065,11 +1065,15 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel) { struct perf_pmu *pmu = NULL; + if (evsel->pmu) + return evsel->pmu; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { if (pmu->type == evsel->core.attr.type) break; } + evsel->pmu = pmu; return pmu; } From b86ac6796b6c1dea83b744812d36922c21f43323 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:09 -0700 Subject: [PATCH 0500/4122] perf tools: Use pmu info in evsel__is_hybrid() If evsel has pmu, it can use pmu->is_hybrid directly. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b7140beca970..c9fef26d0702 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3131,6 +3131,9 @@ void evsel__zero_per_pkg(struct evsel *evsel) bool evsel__is_hybrid(struct evsel *evsel) { + if (evsel->pmu) + return evsel->pmu->is_hybrid; + return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } From 93d5e700156e03e66eb1bf2158ba3b8a8b354c71 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:10 -0700 Subject: [PATCH 0501/4122] perf stat: Use evsel__is_hybrid() more In the stat-display code, it needs to check if the current evsel is hybrid but it uses perf_pmu__has_hybrid() which can return true for non-hybrid event too. I think it's better to use evsel__is_hybrid(). Also remove a NULL check for the 'config' parameter in the hybrid_merge() since it's called after config->no_merge check. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5c47ee9963a7..4113aa86772f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -704,7 +704,7 @@ static void uniquify_event_name(struct evsel *counter) counter->name = new_name; } } else { - if (perf_pmu__has_hybrid()) { + if (evsel__is_hybrid(counter)) { ret = asprintf(&new_name, "%s/%s/", counter->pmu_name, counter->name); } else { @@ -744,26 +744,14 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c } } -static bool is_uncore(struct evsel *evsel) -{ - struct perf_pmu *pmu = evsel__find_pmu(evsel); - - return pmu && pmu->is_uncore; -} - -static bool hybrid_uniquify(struct evsel *evsel) -{ - return perf_pmu__has_hybrid() && !is_uncore(evsel); -} - static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, bool check) { - if (hybrid_uniquify(counter)) { + if (evsel__is_hybrid(counter)) { if (check) - return config && config->hybrid_merge; + return config->hybrid_merge; else - return config && !config->hybrid_merge; + return !config->hybrid_merge; } return false; From 375369abcdb774abadbc1c82d2e6c24f0f1f49a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:11 -0700 Subject: [PATCH 0502/4122] perf stat: Add aggr id for global mode To make the code simpler, I'd like to use the same aggregation code for the global mode. We can simply add an id function to return cpu 0 and use print_aggr(). No functional change intended. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 36 ++++++++++++++++++++++++++++++++++-- tools/perf/util/cpumap.c | 10 ++++++++++ tools/perf/util/cpumap.h | 6 +++++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 265b05157972..75d16e9705a4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1330,6 +1330,12 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __ return aggr_cpu_id__node(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__global(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { @@ -1366,6 +1372,12 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } +static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_global, cpu); +} + static bool term_percore_set(void) { struct evsel *counter; @@ -1395,6 +1407,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) return NULL; case AGGR_GLOBAL: + return aggr_cpu_id__global; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1420,6 +1433,7 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) } return NULL; case AGGR_GLOBAL: + return perf_stat__get_global_cached; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1535,6 +1549,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused, + void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + id.cpu = (struct perf_cpu){ .cpu = 0 }; + return id; +} + static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1558,6 +1582,12 @@ static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *conf return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1569,8 +1599,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: return perf_env__get_node_aggr_by_cpu; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_env__get_global_aggr_by_cpu; + case AGGR_NONE: case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1590,8 +1621,9 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) return perf_stat__get_core_file; case AGGR_NODE: return perf_stat__get_node_file; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_stat__get_global_file; + case AGGR_NONE: case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8486ca3bec75..60209fe87456 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -354,6 +354,16 @@ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unu return id; } +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + cpu.cpu = 0; + id.cpu = cpu; + return id; +} + /* setup simple routines to easily access node numbers given a cpu number */ static int get_max_num(char *path, int *max) { diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 4a6d029576ee..b2ff648bc417 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -133,5 +133,9 @@ struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); * cpu. The function signature is compatible with aggr_cpu_id_get_t. */ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); - +/** + * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation. + * The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ From 8938cfa7480282fb3c75548958b239444affcc50 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:12 -0700 Subject: [PATCH 0503/4122] perf stat: Add cpu aggr id for no aggregation mode Likewise, add an aggr_id for cpu for none aggregation mode. This is not used actually yet but later code will use to unify the aggregation code. No functional change intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 48 +++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 75d16e9705a4..b03b530fe9a6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1336,6 +1336,12 @@ static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config return aggr_cpu_id__global(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__cpu(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { @@ -1378,6 +1384,12 @@ static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config * return perf_stat__get_aggr(config, perf_stat__get_global, cpu); } +static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); +} + static bool term_percore_set(void) { struct evsel *counter; @@ -1404,8 +1416,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NONE: if (term_percore_set()) return aggr_cpu_id__core; - - return NULL; + return aggr_cpu_id__cpu; case AGGR_GLOBAL: return aggr_cpu_id__global; case AGGR_THREAD: @@ -1428,10 +1439,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) { + if (term_percore_set()) return perf_stat__get_core_cached; - } - return NULL; + return perf_stat__get_cpu_cached; case AGGR_GLOBAL: return perf_stat__get_global_cached; case AGGR_THREAD: @@ -1541,6 +1551,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data) +{ + struct perf_env *env = data; + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + if (cpu.cpu != -1) { + /* + * core_id is relative to socket and die, + * we need a global id. So we set + * socket, die id and core id + */ + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; + id.cpu = cpu; + } + + return id; +} + static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -1576,6 +1606,12 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1602,6 +1638,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) case AGGR_GLOBAL: return perf_env__get_global_aggr_by_cpu; case AGGR_NONE: + return perf_env__get_cpu_aggr_by_cpu; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1624,6 +1661,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) case AGGR_GLOBAL: return perf_stat__get_global_file; case AGGR_NONE: + return perf_stat__get_cpu_file; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: From 505ac48ba759cd88d9fd40ec5354cda28e17377b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:13 -0700 Subject: [PATCH 0504/4122] perf stat: Add 'needs_sort' argument to cpu_aggr_map__new() In case of no aggregation, it needs to keep the original (cpu) ordering in the aggr_map so that it can be in sync with the cpu map. This will make the code easier to handle AGGR_NONE similar to others. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 +++++-- tools/perf/util/cpumap.c | 6 ++++-- tools/perf/util/cpumap.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b03b530fe9a6..9053fd4d15a7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1458,8 +1458,9 @@ static int perf_stat_init_aggr_mode(void) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); if (get_id) { + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, - get_id, /*data=*/NULL); + get_id, /*data=*/NULL, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1674,11 +1675,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; if (!get_id) return 0; - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env); + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, + get_id, env, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 60209fe87456..6e3fcf523de9 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -234,7 +234,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data) + void *data, bool needs_sort) { int idx; struct perf_cpu cpu; @@ -270,8 +270,10 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, if (trimmed_c) c = trimmed_c; } + /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + if (needs_sort) + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); return c; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b2ff648bc417..da28b3146ef9 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -97,7 +97,7 @@ typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data) */ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data); + void *data, bool needs_sort); bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); From ca68b374d0409bea5cacd4c5a8e0fbb407922d38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:14 -0700 Subject: [PATCH 0505/4122] perf stat: Add struct perf_stat_aggr to perf_stat_evsel The perf_stat_aggr struct is to keep aggregated counter values and the states according to the aggregation mode. The number of entries is depends on the mode and this is a preparation for the later use. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 34 +++++++++++++++++++++++++++------- tools/perf/util/stat.h | 19 +++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8ec8bb4a9912..c9d5aa295b54 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -133,15 +133,33 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) static void evsel__reset_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; + struct perf_stat_aggr *aggr = ps->aggr; init_stats(&ps->res_stats); + + if (aggr) + memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } -static int evsel__alloc_stat_priv(struct evsel *evsel) + +static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { - evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->stats == NULL) + struct perf_stat_evsel *ps; + + ps = zalloc(sizeof(*ps)); + if (ps == NULL) return -ENOMEM; + + if (nr_aggr) { + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) { + free(ps); + return -ENOMEM; + } + } + + evsel->stats = ps; perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; @@ -151,8 +169,10 @@ static void evsel__free_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; - if (ps) + if (ps) { + zfree(&ps->aggr); zfree(&ps->group_data); + } zfree(&evsel->stats); } @@ -181,9 +201,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) perf_counts__reset(evsel->prev_raw_counts); } -static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) +static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) { - if (evsel__alloc_stat_priv(evsel) < 0 || + if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 || evsel__alloc_counts(evsel) < 0 || (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; @@ -196,7 +216,7 @@ int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, alloc_raw)) + if (evsel__alloc_stats(evsel, 0, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b0899c6e002f..42453513ffea 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -8,6 +8,7 @@ #include #include "cpumap.h" #include "rblist.h" +#include "counts.h" struct perf_cpu_map; struct perf_stat_config; @@ -42,9 +43,27 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; +/* hold aggregated event info */ +struct perf_stat_aggr { + /* aggregated values */ + struct perf_counts_values counts; + /* number of entries (CPUs) aggregated */ + int nr; + /* whether any entry has failed to read/process event */ + bool failed; +}; + +/* per-evsel event stats */ struct perf_stat_evsel { + /* used for repeated runs */ struct stats res_stats; + /* evsel id for quick check */ enum perf_stat_evsel_id id; + /* number of allocated 'aggr' */ + int nr_aggr; + /* aggregated event values */ + struct perf_stat_aggr *aggr; + /* used for group read */ u64 *group_data; }; From 1f297a6eb2bd90663518cbb6e9e2a3b2add34b73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:15 -0700 Subject: [PATCH 0506/4122] perf stat: Allocate evsel->stats->aggr properly The perf_stat_config.aggr_map should have a correct size of the aggregation map. Use it to allocate aggr_counts. Also AGGR_NONE with per-core events can be tricky because it doesn't aggreate basically but it needs to do so for per-core events only. So only per-core evsels will have stats->aggr data. Note that other caller of evlist__alloc_stat() might not have stat_config or aggr_map. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- tools/perf/builtin-stat.c | 6 +++--- tools/perf/tests/parse-metric.c | 2 +- tools/perf/tests/pmu-events.c | 2 +- tools/perf/util/stat.c | 9 +++++++-- tools/perf/util/stat.h | 3 ++- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7ca238277d83..d7ec8c1af293 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2049,7 +2049,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, u64 val; if (!evsel->stats) - evlist__alloc_stats(script->session->evlist, false); + evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false); if (evsel_script(leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; @@ -3632,7 +3632,7 @@ static int set_maps(struct perf_script *script) perf_evlist__set_maps(&evlist->core, script->cpus, script->threads); - if (evlist__alloc_stats(evlist, true)) + if (evlist__alloc_stats(&stat_config, evlist, /*alloc_raw=*/true)) return -ENOMEM; script->allocated = true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9053fd4d15a7..92a8e4512f98 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2121,7 +2121,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true)) return -ENOMEM; st->maps_allocated = true; @@ -2568,10 +2568,10 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__alloc_stats(evsel_list, interval)) + if (perf_stat_init_aggr_mode()) goto out; - if (perf_stat_init_aggr_mode()) + if (evlist__alloc_stats(&stat_config, evsel_list, interval)) goto out; /* diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 68f5a2a03242..21b7ac00d798 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -103,7 +103,7 @@ static int __compute_metric(const char *name, struct value *vals, if (err) goto out; - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 097e05c796ab..5d0d3b239a68 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -889,7 +889,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e goto out_err; } - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out_err; /* diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c9d5aa295b54..374149628507 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -211,12 +211,17 @@ static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) return 0; } -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw) { struct evsel *evsel; + int nr_aggr = 0; + + if (config && config->aggr_map) + nr_aggr = config->aggr_map->nr; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, 0, alloc_raw)) + if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 42453513ffea..0980875b9be1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -267,7 +267,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct runtime_stat *st); void perf_stat__collect_metric_expr(struct evlist *); -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw); void evlist__free_stats(struct evlist *evlist); void evlist__reset_stats(struct evlist *evlist); void evlist__reset_prev_raw_counts(struct evlist *evlist); From f976bc6b6bfc9b14eeaf9a8859191c8f85c253dc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:16 -0700 Subject: [PATCH 0507/4122] perf stat: Aggregate events using evsel->stats->aggr Add a logic to aggregate counter values to the new evsel->stats->aggr. This is not used yet so shadow stats are not updated. But later patch will convert the existing code to use it. With that, we don't need to handle AGGR_GLOBAL specially anymore. It can use the same logic with counts, prev_counts and aggr_counts. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 -- tools/perf/util/evsel.c | 9 +--- .../scripting-engines/trace-event-python.c | 6 --- tools/perf/util/stat.c | 46 ++++++++++++++++--- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 92a8e4512f98..abede56d79b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -963,9 +963,6 @@ try_again_reset: init_stats(&walltime_nsecs_stats); update_stats(&walltime_nsecs_stats, t1 - t0); - if (stat_config.aggr_mode == AGGR_GLOBAL) - evlist__save_aggr_prev_raw_counts(evsel_list); - evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c9fef26d0702..cdde5b5f8ad2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1526,13 +1526,8 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, if (!evsel->prev_raw_counts) return; - if (cpu_map_idx == -1) { - tmp = evsel->prev_raw_counts->aggr; - evsel->prev_raw_counts->aggr = *count; - } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; - } + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; count->val = count->val - tmp.val; count->ena = count->ena - tmp.ena; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1f2040f36d4e..7bc8559dce6a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1653,12 +1653,6 @@ static void python_process_stat(struct perf_stat_config *config, struct perf_cpu_map *cpus = counter->core.cpus; int cpu, thread; - if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, - &counter->counts->aggr); - return; - } - for (thread = 0; thread < threads->nr; thread++) { for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { process_stat(counter, perf_cpu_map__cpu(cpus, cpu), diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 374149628507..99874254809d 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -387,6 +387,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; + struct perf_stat_evsel *ps = evsel->stats; static struct perf_counts_values zero; bool skip = false; @@ -398,6 +399,44 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, if (skip) count = &zero; + if (!evsel->snapshot) + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + + if (ps->aggr) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu); + struct perf_stat_aggr *ps_aggr; + int i; + + for (i = 0; i < ps->nr_aggr; i++) { + if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i])) + continue; + + ps_aggr = &ps->aggr[i]; + ps_aggr->nr++; + + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (count->ena == 0 || count->run == 0 || + evsel->counts->scaled == -1) { + ps_aggr->counts.val = 0; + ps_aggr->counts.ena = 0; + ps_aggr->counts.run = 0; + ps_aggr->failed = true; + } + + if (!ps_aggr->failed) { + ps_aggr->counts.val += count->val; + ps_aggr->counts.ena += count->ena; + ps_aggr->counts.run += count->run; + } + break; + } + } + switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: @@ -405,9 +444,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_SOCKET: case AGGR_NODE: case AGGR_NONE: - if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu_map_idx, thread, count); - perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, cpu_map_idx, &rt_stat); @@ -469,10 +505,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; - if (!counter->snapshot) - evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); - update_stats(&ps->res_stats, *count); if (verbose > 0) { From 049aba09e2156dd2ff1a61bf1b8738b0fc864c9d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:17 -0700 Subject: [PATCH 0508/4122] perf stat: Factor out evsel__count_has_error() It's possible to have 0 enabled/running time for some per-task or per-cgroup events since it's not scheduled on any CPU. Treating the whole event as failed would not work in this case. Thinking again, the code only existed when any CPU-level aggregation is enabled (like per-socket, per-core, ...). To make it clearer, factor out the condition check into the new evsel__count_has_error() function and add some comments. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 99874254809d..dc075d5a0f72 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -381,6 +381,25 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, return ret; } +static bool evsel__count_has_error(struct evsel *evsel, + struct perf_counts_values *count, + struct perf_stat_config *config) +{ + /* the evsel was failed already */ + if (evsel->err || evsel->counts->scaled == -1) + return true; + + /* this is meaningful for CPU aggregation modes only */ + if (config->aggr_mode == AGGR_GLOBAL) + return false; + + /* it's considered ok when it actually ran */ + if (count->ena != 0 && count->run != 0) + return false; + + return true; +} + static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, int cpu_map_idx, int thread, @@ -420,8 +439,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, * When any result is bad, make them all to give * consistent output in interval mode. */ - if (count->ena == 0 || count->run == 0 || - evsel->counts->scaled == -1) { + if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) { ps_aggr->counts.val = 0; ps_aggr->counts.ena = 0; ps_aggr->counts.run = 0; From 050059e1b1affc080ede9fe691768e2383eb6367 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:18 -0700 Subject: [PATCH 0509/4122] perf stat: Aggregate per-thread stats using evsel->stats->aggr Per-thread aggregation doesn't use the CPU numbers but the logic should be the same. Initialize cpu_aggr_map separately for AGGR_THREAD and use thread map idx to aggregate counter values. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 31 +++++++++++++++++++++++++++++++ tools/perf/util/stat.c | 24 ++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index abede56d79b6..6777fef0d56c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1465,6 +1465,21 @@ static int perf_stat_init_aggr_mode(void) stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } + if (stat_config.aggr_mode == AGGR_THREAD) { + nr = perf_thread_map__nr(evsel_list->core.threads); + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + /* * The evsel_list->cpus is the base we operate on, * taking the highest cpu number to be the size of @@ -1674,6 +1689,22 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); bool needs_sort = stat_config.aggr_mode != AGGR_NONE; + if (stat_config.aggr_mode == AGGR_THREAD) { + int nr = perf_thread_map__nr(evsel_list->core.threads); + + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + if (!get_id) return 0; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index dc075d5a0f72..5b04c9d16156 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -422,6 +422,24 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, evsel__compute_deltas(evsel, cpu_map_idx, thread, count); perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_THREAD) { + struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (count->val == 0 && config->system_wide) + return 0; + + ps->aggr[thread].nr++; + + aggr_counts->val += count->val; + aggr_counts->ena += count->ena; + aggr_counts->run += count->run; + goto update; + } + if (ps->aggr) { struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu); @@ -436,8 +454,9 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, ps_aggr->nr++; /* - * When any result is bad, make them all to give - * consistent output in interval mode. + * When any result is bad, make them all to give consistent output + * in interval mode. But per-task counters can have 0 enabled time + * when some tasks are idle. */ if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) { ps_aggr->counts.val = 0; @@ -455,6 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } +update: switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: From ae7e6492ee54e18cc3d6ed4bb5ca857726a7e9c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:19 -0700 Subject: [PATCH 0510/4122] perf stat: Allocate aggr counts for recorded data In the process_stat_config_event() it sets the aggr_mode that means the earlier evlist__alloc_stats() cannot allocate the aggr counts due to the missing aggr_mode. Do it after setting the aggr_map using evlist__alloc_aggr_stats(). Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 20 +++++++++++++++----- tools/perf/util/stat.c | 39 +++++++++++++++++++++++++++++++-------- tools/perf/util/stat.h | 2 ++ 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6777fef0d56c..2a6a5d0c5563 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1342,7 +1342,11 @@ static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __m static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id; + + /* per-process mode - should use global aggr mode */ + if (cpu.cpu == -1) + return get_id(config, cpu); if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); @@ -2125,17 +2129,23 @@ int process_stat_config_event(struct perf_session *session, if (perf_cpu_map__empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); - return 0; - } - - if (st->aggr_mode != AGGR_UNSET) + } else if (st->aggr_mode != AGGR_UNSET) { stat_config.aggr_mode = st->aggr_mode; + } if (perf_stat.data.is_pipe) perf_stat_init_aggr_mode(); else perf_stat_init_aggr_mode_file(st); + if (stat_config.aggr_map) { + int nr_aggr = stat_config.aggr_map->nr; + + if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) { + pr_err("cannot allocate aggr counts\n"); + return -1; + } + } return 0; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5b04c9d16156..1b9048115a18 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -141,6 +141,31 @@ static void evsel__reset_stat_priv(struct evsel *evsel) memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } +static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr) +{ + struct perf_stat_evsel *ps = evsel->stats; + + if (ps == NULL) + return 0; + + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) + return -ENOMEM; + + return 0; +} + +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) + return -1; + } + return 0; +} static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { @@ -150,16 +175,14 @@ static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) if (ps == NULL) return -ENOMEM; - if (nr_aggr) { - ps->nr_aggr = nr_aggr; - ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); - if (ps->aggr == NULL) { - free(ps); - return -ENOMEM; - } + evsel->stats = ps; + + if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) { + evsel->stats = NULL; + free(ps); + return -ENOMEM; } - evsel->stats = ps; perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0980875b9be1..4c00f814bd79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -275,6 +275,8 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist); void evlist__copy_prev_raw_counts(struct evlist *evlist); void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); + int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); struct perf_tool; From 8f97963e09761c239522649921d7b1c57ff2debb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:20 -0700 Subject: [PATCH 0511/4122] perf stat: Reset aggr counts for each interval The evsel->stats->aggr->count should be reset for interval processing since we want to use the values directly for display. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +++ tools/perf/util/stat.c | 20 +++++++++++++++++--- tools/perf/util/stat.h | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2a6a5d0c5563..bff28a199dfd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -492,6 +492,8 @@ static void process_interval(void) diff_timespec(&rs, &ts, &ref_time); perf_stat__reset_shadow_per_stat(&rt_stat); + evlist__reset_aggr_stats(evsel_list); + read_counters(&rs); if (STAT_RECORD) { @@ -965,6 +967,7 @@ try_again_reset: evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_aggr_stats(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); } else { update_stats(&walltime_nsecs_stats, t1 - t0); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1b9048115a18..a4066f0d3637 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -130,17 +130,23 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) } } -static void evsel__reset_stat_priv(struct evsel *evsel) +static void evsel__reset_aggr_stats(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; struct perf_stat_aggr *aggr = ps->aggr; - init_stats(&ps->res_stats); - if (aggr) memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } +static void evsel__reset_stat_priv(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + + init_stats(&ps->res_stats); + evsel__reset_aggr_stats(evsel); +} + static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr) { struct perf_stat_evsel *ps = evsel->stats; @@ -276,6 +282,14 @@ void evlist__reset_stats(struct evlist *evlist) } } +void evlist__reset_aggr_stats(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__reset_aggr_stats(evsel); +} + void evlist__reset_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 4c00f814bd79..809f9f0aff0c 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -276,6 +276,7 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist); void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); +void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); From 8962cbec5a0672b3966e9c48ac22e207d56e13ca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:21 -0700 Subject: [PATCH 0512/4122] perf stat: Split process_counters() to share it with process_stat_round_event() It'd do more processing with aggregation. Let's split the function so that it can be shared with by process_stat_round_event() too. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bff28a199dfd..838d29590bed 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -465,15 +465,19 @@ static int read_bpf_map_counters(void) return 0; } -static void read_counters(struct timespec *rs) +static int read_counters(struct timespec *rs) { - struct evsel *counter; - if (!stat_config.stop_read_counter) { if (read_bpf_map_counters() || read_affinity_counters(rs)) - return; + return -1; } + return 0; +} + +static void process_counters(void) +{ + struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -494,7 +498,8 @@ static void process_interval(void) perf_stat__reset_shadow_per_stat(&rt_stat); evlist__reset_aggr_stats(evsel_list); - read_counters(&rs); + if (read_counters(&rs) == 0) + process_counters(); if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) @@ -980,7 +985,8 @@ try_again_reset: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); + if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0) + process_counters(); /* * We need to keep evsel_list alive, because it's processed @@ -2099,13 +2105,11 @@ static int process_stat_round_event(struct perf_session *session, union perf_event *event) { struct perf_record_stat_round *stat_round = &event->stat_round; - struct evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each_entry(evsel_list, counter) - perf_stat_process_counter(&stat_config, counter); + process_counters(); if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) update_stats(&walltime_nsecs_stats, stat_round->time); From 942c5593393d9418bf521e77fa1eab47599efc4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:22 -0700 Subject: [PATCH 0513/4122] perf stat: Add perf_stat_merge_counters() The perf_stat_merge_counters() is to aggregate the same events in different PMUs like in case of uncore or hybrid. The same logic is in the stat-display routines but I think it should be handled when it processes the event counters. As it works on the aggr_counters, it doesn't change the output yet. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 + tools/perf/util/stat.c | 96 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 2 + 3 files changed, 100 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 838d29590bed..371d6e896942 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -486,6 +486,8 @@ static void process_counters(void) pr_warning("failed to process counter %s\n", counter->name); counter->err = 0; } + + perf_stat_merge_counters(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index a4066f0d3637..aff1e7390585 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -595,6 +595,102 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; } +static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) +{ + struct perf_stat_evsel *ps_a = evsel->stats; + struct perf_stat_evsel *ps_b = alias->stats; + int i; + + if (ps_a->aggr == NULL && ps_b->aggr == NULL) + return 0; + + if (ps_a->nr_aggr != ps_b->nr_aggr) { + pr_err("Unmatched aggregation mode between aliases\n"); + return -1; + } + + for (i = 0; i < ps_a->nr_aggr; i++) { + struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; + struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; + + /* NB: don't increase aggr.nr for aliases */ + + aggr_counts_a->val += aggr_counts_b->val; + aggr_counts_a->ena += aggr_counts_b->ena; + aggr_counts_a->run += aggr_counts_b->run; + } + + return 0; +} +/* events should have the same name, scale, unit, cgroup but on different PMUs */ +static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) +{ + if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) + return false; + + if (evsel_a->scale != evsel_b->scale) + return false; + + if (evsel_a->cgrp != evsel_b->cgrp) + return false; + + if (strcmp(evsel_a->unit, evsel_b->unit)) + return false; + + if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) + return false; + + return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); +} + +static void evsel__merge_aliases(struct evsel *evsel) +{ + struct evlist *evlist = evsel->evlist; + struct evsel *alias; + + alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); + list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { + /* Merge the same events on different PMUs. */ + if (evsel__is_alias(evsel, alias)) { + evsel__merge_aggr_counters(evsel, alias); + alias->merged_stat = true; + } + } +} + +static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config) +{ + struct perf_pmu *pmu; + + if (!config->hybrid_merge) + return false; + + pmu = evsel__find_pmu(evsel); + return pmu && pmu->is_hybrid; +} + +static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) +{ + /* this evsel is already merged */ + if (evsel->merged_stat) + return; + + if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + evsel__merge_aliases(evsel); +} + +/* merge the same uncore and hybrid events if requested */ +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->no_merge) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__merge_stats(evsel, config); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 809f9f0aff0c..728bbc823b0d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -280,6 +280,8 @@ void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); + struct perf_tool; union perf_event; struct perf_session; From 1d6d2bea5b97359ff09a8d793674aab3e5f79023 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:23 -0700 Subject: [PATCH 0514/4122] perf stat: Add perf_stat_process_percore() The perf_stat_process_percore() is to aggregate counts for an event per-core even if the aggr_mode is AGGR_NONE. This is enabled when user requested it on the command line. To handle that, it keeps the per-cpu counts at first. And then it aggregates the counts that have the same core id in the aggr->counts and updates the values for each cpu back. Later, per-core events will skip one of the CPUs unless percore-show-thread option is given. In that case, it can simply print all cpu stats with the updated (per-core) values. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-17-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat.c | 71 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 3 ++ 3 files changed, 75 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 371d6e896942..d6a006e41da0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -488,6 +488,7 @@ static void process_counters(void) } perf_stat_merge_counters(&stat_config, evsel_list); + perf_stat_process_percore(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index aff1e7390585..26c48ef7ca92 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -691,6 +691,77 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev evsel__merge_stats(evsel, config); } +static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct perf_counts_values counts = { 0, }; + struct aggr_cpu_id id; + struct perf_cpu cpu; + int idx; + + /* collect per-core counts */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + counts.val += aggr->counts.val; + counts.ena += aggr->counts.ena; + counts.run += aggr->counts.run; + } + + /* update aggregated per-core counts for each CPU */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + aggr->counts.val = counts.val; + aggr->counts.ena = counts.ena; + aggr->counts.run = counts.run; + + aggr->used = true; + } +} + +/* we have an aggr_map for cpu, but want to aggregate the counters per-core */ +static void evsel__process_percore(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct aggr_cpu_id core_id; + struct perf_cpu cpu; + int idx; + + if (!evsel->percore) + return; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + if (aggr->used) + continue; + + core_id = aggr_cpu_id__core(cpu, NULL); + evsel__update_percore_stats(evsel, &core_id); + } +} + +/* process cpu stats on per-core events */ +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->aggr_mode != AGGR_NONE) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__process_percore(evsel); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 728bbc823b0d..d23f8743e442 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -51,6 +51,8 @@ struct perf_stat_aggr { int nr; /* whether any entry has failed to read/process event */ bool failed; + /* to mark this data is processed already */ + bool used; }; /* per-evsel event stats */ @@ -281,6 +283,7 @@ void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist); struct perf_tool; union perf_event; From 88f1d3512c947ad8e396e50acfce5ee55133df0a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:24 -0700 Subject: [PATCH 0515/4122] perf stat: Add perf_stat_process_shadow_stats() This function updates the shadow stats using the aggregated counts uniformly since it uses the aggr_counts for the every aggr mode. It'd have duplicate shadow stats for each items for now since the display routines will update them once again. But that'd be fine as it shows the average values and it'd be gone eventually. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-18-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat.c | 50 ++++++++++++++++++++------------------- tools/perf/util/stat.h | 1 + 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d6a006e41da0..d7c52cef70a3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -489,6 +489,7 @@ static void process_counters(void) perf_stat_merge_counters(&stat_config, evsel_list); perf_stat_process_percore(&stat_config, evsel_list); + perf_stat_process_shadow_stats(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 26c48ef7ca92..c0955a0427ab 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -474,7 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, aggr_counts->val += count->val; aggr_counts->ena += count->ena; aggr_counts->run += count->run; - goto update; + return 0; } if (ps->aggr) { @@ -511,32 +511,10 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } -update: - switch (config->aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_DIE: - case AGGR_SOCKET: - case AGGR_NODE: - case AGGR_NONE: - if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { - perf_stat__update_shadow_stats(evsel, count->val, - cpu_map_idx, &rt_stat); - } - - if (config->aggr_mode == AGGR_THREAD) { - perf_stat__update_shadow_stats(evsel, count->val, - thread, &rt_stat); - } - break; - case AGGR_GLOBAL: + if (config->aggr_mode == AGGR_GLOBAL) { aggr->val += count->val; aggr->ena += count->ena; aggr->run += count->run; - case AGGR_UNSET: - case AGGR_MAX: - default: - break; } return 0; @@ -762,6 +740,30 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e evsel__process_percore(evsel); } +static void evsel__update_shadow_stats(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + int i; + + if (ps->aggr == NULL) + return; + + for (i = 0; i < ps->nr_aggr; i++) { + struct perf_counts_values *aggr_counts = &ps->aggr[i].counts; + + perf_stat__update_shadow_stats(evsel, aggr_counts->val, i, &rt_stat); + } +} + +void perf_stat_process_shadow_stats(struct perf_stat_config *config __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__update_shadow_stats(evsel); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d23f8743e442..3d413ba8c68a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -284,6 +284,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_shadow_stats(struct perf_stat_config *config, struct evlist *evlist); struct perf_tool; union perf_event; From 91f85f98da7ab8c32105f42dd03884c01ec4498f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:25 -0700 Subject: [PATCH 0516/4122] perf stat: Display event stats using aggr counts Now aggr counts are ready for use. Convert the display routines to use the aggr counts and update the shadow stat with them. It doesn't need to aggregate counts or collect aliases anymore during the display. Get rid of now unused struct perf_aggr_thread_value. Note that there's a difference in the display order among the aggr mode. For per-core/die/socket/node aggregation, it shows relevant events in the same unit together, whereas global/thread/no aggregation it shows the same events for different units together. So it still uses separate codes to display them due to the ordering. One more thing to note is that it breaks per-core event display for now. The next patch will fix it to have identical output as of now. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-19-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 421 ++++----------------------------- tools/perf/util/stat.c | 5 - tools/perf/util/stat.h | 9 - 3 files changed, 49 insertions(+), 386 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4113aa86772f..bfae2784609c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -442,31 +442,6 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_map_idx(struct perf_stat_config *config, - struct evsel *evsel, const struct aggr_cpu_id *id) -{ - struct perf_cpu_map *cpus = evsel__cpus(evsel); - struct perf_cpu cpu; - int idx; - - if (config->aggr_mode == AGGR_NONE) - return perf_cpu_map__idx(cpus, id->cpu); - - if (config->aggr_mode == AGGR_THREAD) - return id->thread_idx; - - if (!config->aggr_get_id) - return 0; - - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); - - if (aggr_cpu_id__equal(&cpu_id, id)) - return idx; - } - return 0; -} - static void abs_printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) { @@ -537,7 +512,7 @@ static bool is_mixed_hw_group(struct evsel *counter) static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, - struct runtime_stat *st) + struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; struct outstate os = { @@ -648,8 +623,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int print_running(config, run, ena); } - perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_map_idx(config, counter, &id), + perf_stat__print_shadow_stats(config, counter, uval, map_idx, &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only && !config->json_output) { print_noise(config, counter, noise); @@ -657,34 +631,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } } -static void aggr_update_shadow(struct perf_stat_config *config, - struct evlist *evlist) -{ - int idx, s; - struct perf_cpu cpu; - struct aggr_cpu_id s2, id; - u64 val; - struct evsel *counter; - struct perf_cpu_map *cpus; - - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - evlist__for_each_entry(evlist, counter) { - cpus = evsel__cpus(counter); - val = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &id)) - continue; - val += perf_counts(counter->counts, idx, 0)->val; - } - perf_stat__update_shadow_stats(counter, val, - first_shadow_map_idx(config, counter, &id), - &rt_stat); - } - } -} - static void uniquify_event_name(struct evsel *counter) { char *new_name; @@ -721,137 +667,51 @@ static void uniquify_event_name(struct evsel *counter) counter->uniquified_name = true; } -static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) +static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) { - struct evlist *evlist = counter->evlist; - struct evsel *alias; - - alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); - list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - /* Merge events with the same name, etc. but on different PMUs. */ - if (!strcmp(evsel__name(alias), evsel__name(counter)) && - alias->scale == counter->scale && - alias->cgrp == counter->cgrp && - !strcmp(alias->unit, counter->unit) && - evsel__is_clock(alias) == evsel__is_clock(counter) && - strcmp(alias->pmu_name, counter->pmu_name)) { - alias->merged_stat = true; - cb(config, alias, data, false); - } - } + return evsel__is_hybrid(evsel) && !config->hybrid_merge; } -static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, - bool check) +static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (evsel__is_hybrid(counter)) { - if (check) - return config->hybrid_merge; - else - return !config->hybrid_merge; - } - - return false; -} - -static bool collect_data(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) -{ - if (counter->merged_stat) - return false; - cb(config, counter, data, true); - if (config->no_merge || hybrid_merge(counter, config, false)) + if (config->no_merge || hybrid_uniquify(counter, config)) uniquify_event_name(counter); - else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) - collect_all_aliases(config, counter, cb, data); - return true; -} - -struct aggr_data { - u64 ena, run, val; - struct aggr_cpu_id id; - int nr; - int cpu_map_idx; -}; - -static void aggr_cb(struct perf_stat_config *config, - struct evsel *counter, void *data, bool first) -{ - struct aggr_data *ad = data; - int idx; - struct perf_cpu cpu; - struct perf_cpu_map *cpus; - struct aggr_cpu_id s2; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct perf_counts_values *counts; - - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &ad->id)) - continue; - if (first) - ad->nr++; - counts = perf_counts(counter->counts, idx, 0); - /* - * When any result is bad, make them all to give - * consistent output in interval mode. - */ - if (counts->ena == 0 || counts->run == 0 || - counter->counts->scaled == -1) { - ad->ena = 0; - ad->run = 0; - break; - } - ad->val += counts->val; - ad->ena += counts->ena; - ad->run += counts->run; - } } static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, struct perf_cpu cpu) + bool *first) { - struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int nr; - struct aggr_cpu_id id; double uval; + struct perf_stat_evsel *ps = counter->stats; + struct perf_stat_aggr *aggr = &ps->aggr[s]; + struct aggr_cpu_id id = config->aggr_map->map[s]; + double avg = aggr->counts.val; - ad.id = id = config->aggr_map->map[s]; - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) + if (counter->supported && aggr->nr == 0) return; - if (perf_pmu__has_hybrid() && ad.ena == 0) - return; + uniquify_counter(config, counter); + + val = aggr->counts.val; + ena = aggr->counts.ena; + run = aggr->counts.run; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; if (*first && metric_only) { *first = false; - aggr_printout(config, counter, id, nr); + aggr_printout(config, counter, id, aggr->nr); } if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu.cpu != -1) - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, nr, counter, uval, - prefix, run, ena, 1.0, &rt_stat); + printout(config, id, aggr->nr, counter, uval, + prefix, run, ena, avg, &rt_stat, s); + if (!metric_only) fputc('\n', output); } @@ -869,8 +729,6 @@ static void print_aggr(struct perf_stat_config *config, if (!config->aggr_map || !config->aggr_get_id) return; - aggr_update_shadow(config, evlist); - /* * With metric_only everything is on a single line. * Without each counter has its own line. @@ -881,188 +739,36 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { + if (counter->merged_stat) + continue; + print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, + &first); } if (metric_only) fputc('\n', output); } } -static int cmp_val(const void *a, const void *b) -{ - return ((struct perf_aggr_thread_value *)b)->val - - ((struct perf_aggr_thread_value *)a)->val; -} - -static struct perf_aggr_thread_value *sort_aggr_thread( - struct evsel *counter, - int *ret, - struct target *_target) -{ - int nthreads = perf_thread_map__nr(counter->core.threads); - int i = 0; - double uval; - struct perf_aggr_thread_value *buf; - - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); - if (!buf) - return NULL; - - for (int thread = 0; thread < nthreads; thread++) { - int idx; - u64 ena = 0, run = 0, val = 0; - - perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { - struct perf_counts_values *counts = - perf_counts(counter->counts, idx, thread); - - val += counts->val; - ena += counts->ena; - run += counts->run; - } - - uval = val * counter->scale; - - /* - * Skip value 0 when enabling --per-thread globally, - * otherwise too many 0 output. - */ - if (uval == 0.0 && target__has_per_thread(_target)) - continue; - - buf[i].counter = counter; - buf[i].id = aggr_cpu_id__empty(); - buf[i].id.thread_idx = thread; - buf[i].uval = uval; - buf[i].val = val; - buf[i].run = run; - buf[i].ena = ena; - i++; - } - - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); - - if (ret) - *ret = i; - - return buf; -} - -static void print_aggr_thread(struct perf_stat_config *config, - struct target *_target, - struct evsel *counter, char *prefix) -{ - FILE *output = config->output; - int thread, sorted_threads; - struct aggr_cpu_id id; - struct perf_aggr_thread_value *buf; - - buf = sort_aggr_thread(counter, &sorted_threads, _target); - if (!buf) { - perror("cannot sort aggr thread"); - return; - } - - for (thread = 0; thread < sorted_threads; thread++) { - if (prefix) - fprintf(output, "%s", prefix); - - id = buf[thread].id; - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); - fputc('\n', output); - } - - free(buf); -} - -struct caggr_data { - double avg, avg_enabled, avg_running; -}; - -static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct caggr_data *cd = data; - struct perf_counts_values *aggr = &counter->counts->aggr; - - cd->avg += aggr->val; - cd->avg_enabled += aggr->ena; - cd->avg_running += aggr->run; -} - -/* - * Print out the results of a single counter: - * aggregated counts in system-wide mode - */ -static void print_counter_aggr(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - bool metric_only = config->metric_only; - FILE *output = config->output; - double uval; - struct caggr_data cd = { .avg = 0.0 }; - - if (!collect_data(config, counter, counter_aggr_cb, &cd)) - return; - - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = cd.avg * counter->scale; - printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, - cd.avg_enabled, cd.avg, &rt_stat); - if (!metric_only) - fprintf(output, "\n"); -} - -static void counter_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct aggr_data *ad = data; - - ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; -} - -/* - * Print out the results of a single counter: - * does not use aggregated count in system-wide - */ static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - FILE *output = config->output; - u64 ena, run, val; - double uval; - int idx; - struct perf_cpu cpu; - struct aggr_cpu_id id; + bool metric_only = config->metric_only; + bool first = false; + int s; - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { - struct aggr_data ad = { .cpu_map_idx = idx }; + /* AGGR_THREAD doesn't have config->aggr_get_id */ + if (!config->aggr_map) + return; - if (!collect_data(config, counter, counter_cb, &ad)) - return; - val = ad.val; - ena = ad.ena; - run = ad.run; + if (counter->merged_stat) + return; - if (prefix) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); - - fputc('\n', output); + for (s = 0; s < config->aggr_map->nr; s++) { + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); } } @@ -1081,6 +787,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, u64 ena, run, val; double uval; struct aggr_cpu_id id; + struct perf_stat_evsel *ps = counter->stats; int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); if (counter_idx < 0) @@ -1093,13 +800,13 @@ static void print_no_aggr_metric(struct perf_stat_config *config, aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, counter_idx, 0)->val; - ena = perf_counts(counter->counts, counter_idx, 0)->ena; - run = perf_counts(counter->counts, counter_idx, 0)->run; + val = ps->aggr[counter_idx].counts.val; + ena = ps->aggr[counter_idx].counts.ena; + run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + run, ena, 1.0, &rt_stat, counter_idx); } if (!first) fputc('\n', config->output); @@ -1135,8 +842,8 @@ static void print_metric_headers(struct perf_stat_config *config, }; bool first = true; - if (config->json_output && !config->interval) - fprintf(config->output, "{"); + if (config->json_output && !config->interval) + fprintf(config->output, "{"); if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); @@ -1379,31 +1086,6 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } -static void print_percore_thread(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - int s; - struct aggr_cpu_id s2, id; - struct perf_cpu_map *cpus; - bool first = true; - int idx; - struct perf_cpu cpu; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - if (aggr_cpu_id__equal(&s2, &id)) - break; - } - - print_counter_aggrdata(config, counter, s, - prefix, false, - &first, cpu); - } -} - static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1416,15 +1098,14 @@ static void print_percore(struct perf_stat_config *config, return; if (config->percore_show_thread) - return print_percore_thread(config, counter, prefix); + return print_counter(config, counter, prefix); for (s = 0; s < config->aggr_map->nr; s++) { if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, &first); } if (metric_only) @@ -1469,17 +1150,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf print_aggr(config, evlist, prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evlist, counter) { - print_aggr_thread(config, _target, counter, prefix); - } - break; case AGGR_GLOBAL: if (config->iostat_run) iostat_print_counters(evlist, config, ts, prefix = buf, - print_counter_aggr); + print_counter); else { evlist__for_each_entry(evlist, counter) { - print_counter_aggr(config, counter, prefix); + print_counter(config, counter, prefix); } if (metric_only) fputc('\n', config->output); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c0955a0427ab..0316557adce9 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -565,11 +565,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, evsel__name(counter), count[0], count[1], count[2]); } - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); - return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 3d413ba8c68a..382a1ab92ce1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -224,15 +224,6 @@ static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rus struct evsel; struct evlist; -struct perf_aggr_thread_value { - struct evsel *counter; - struct aggr_cpu_id id; - double uval; - u64 val; - u64 run; - u64 ena; -}; - bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ From cec94d69636a023302d484ec036158b792001d3c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:26 -0700 Subject: [PATCH 0517/4122] perf stat: Display percore events properly The recent change in the perf stat broke the percore event display. Note that the aggr counts are already processed so that the every sibling thread in the same core will get the per-core counter values. Check percore evsels and skip the sibling threads in the display. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-20-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 ---------------- tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d7c52cef70a3..9d35a3338976 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1404,18 +1404,6 @@ static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *con return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); } -static bool term_percore_set(void) -{ - struct evsel *counter; - - evlist__for_each_entry(evsel_list, counter) { - if (counter->percore) - return true; - } - - return false; -} - static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1428,8 +1416,6 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NODE: return aggr_cpu_id__node; case AGGR_NONE: - if (term_percore_set()) - return aggr_cpu_id__core; return aggr_cpu_id__cpu; case AGGR_GLOBAL: return aggr_cpu_id__global; @@ -1453,8 +1439,6 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) - return perf_stat__get_core_cached; return perf_stat__get_cpu_cached; case AGGR_GLOBAL: return perf_stat__get_global_cached; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bfae2784609c..657434cd29ee 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1091,7 +1091,8 @@ static void print_percore(struct perf_stat_config *config, { bool metric_only = config->metric_only; FILE *output = config->output; - int s; + struct cpu_aggr_map *core_map; + int s, c, i; bool first = true; if (!config->aggr_map || !config->aggr_get_id) @@ -1100,13 +1101,35 @@ static void print_percore(struct perf_stat_config *config, if (config->percore_show_thread) return print_counter(config, counter, prefix); - for (s = 0; s < config->aggr_map->nr; s++) { + core_map = cpu_aggr_map__empty_new(config->aggr_map->nr); + if (core_map == NULL) { + fprintf(output, "Cannot allocate per-core aggr map for display\n"); + return; + } + + for (s = 0, c = 0; s < config->aggr_map->nr; s++) { + struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu; + struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL); + bool found = false; + + for (i = 0; i < c; i++) { + if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) { + found = true; + break; + } + } + if (found) + continue; + if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, prefix, metric_only, &first); + + core_map->map[c++] = core_id; } + free(core_map); if (metric_only) fputc('\n', output); From 8b76a3188b85724f345ea5d03ff206cc9bbe6d72 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:27 -0700 Subject: [PATCH 0518/4122] perf stat: Remove unused perf_counts.aggr field The aggr field in the struct perf_counts is to keep the aggregated value in the AGGR_GLOBAL for the old code. But it's not used anymore. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-21-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/counts.c | 1 - tools/perf/util/counts.h | 1 - tools/perf/util/stat.c | 39 ++++++--------------------------------- 3 files changed, 6 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 7a447d918458..11cd85b278a6 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts) { xyarray__reset(counts->loaded); xyarray__reset(counts->values); - memset(&counts->aggr, 0, sizeof(struct perf_counts_values)); } void evsel__reset_counts(struct evsel *evsel) diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 5de275194f2b..42760242e0df 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -11,7 +11,6 @@ struct evsel; struct perf_counts { s8 scaled; - struct perf_counts_values aggr; struct xyarray *values; struct xyarray *loaded; }; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0316557adce9..3a432a949d46 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -308,8 +308,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel) *perf_counts(evsel->prev_raw_counts, idx, thread); } } - - evsel->counts->aggr = evsel->prev_raw_counts->aggr; } void evlist__copy_prev_raw_counts(struct evlist *evlist) @@ -320,26 +318,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) -{ - struct evsel *evsel; - - /* - * To collect the overall statistics for interval mode, - * we copy the counts from evsel->prev_raw_counts to - * evsel->counts. The perf_stat_process_counter creates - * aggr values from per cpu values, but the per cpu values - * are 0 for AGGR_GLOBAL. So we use a trick that saves the - * previous aggr value to the first member of perf_counts, - * then aggr calculation in process_counter_values can work - * correctly. - */ - evlist__for_each_entry(evlist, evsel) { - *perf_counts(evsel->prev_raw_counts, 0, 0) = - evsel->prev_raw_counts->aggr; - } -} - static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { uint64_t *key = (uint64_t *) __key; @@ -442,7 +420,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { - struct perf_counts_values *aggr = &evsel->counts->aggr; struct perf_stat_evsel *ps = evsel->stats; static struct perf_counts_values zero; bool skip = false; @@ -511,12 +488,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } - if (config->aggr_mode == AGGR_GLOBAL) { - aggr->val += count->val; - aggr->ena += count->ena; - aggr->run += count->run; - } - return 0; } @@ -541,13 +512,10 @@ static int process_counter_maps(struct perf_stat_config *config, int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter) { - struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->stats; - u64 *count = counter->counts->aggr.values; + u64 *count; int ret; - aggr->val = aggr->ena = aggr->run = 0; - if (counter->per_pkg) evsel__zero_per_pkg(counter); @@ -558,6 +526,11 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; + /* + * GLOBAL aggregation mode only has a single aggr counts, + * so we can use ps->aggr[0] as the actual output. + */ + count = ps->aggr[0].counts.values; update_stats(&ps->res_stats, *count); if (verbose > 0) { From a87edbec35725ced484f6b8275f1246ed7194329 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 22 Oct 2022 17:27:34 +0800 Subject: [PATCH 0519/4122] perf daemon: Complete list of supported subcommand in help message perf daemon supports start, signal, stop and ping subcommands, complete it Before: # perf daemon -h Usage: perf daemon start [] or: perf daemon [] -v, --verbose be more verbose -x, --field-separator[=] print counts with custom separator --base base directory --config config file path After: # perf daemon -h Usage: perf daemon {start|signal|stop|ping} [] or: perf daemon [] -v, --verbose be more verbose -x, --field-separator[=] print counts with custom separator --base base directory --config config file path Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221022092735.114967-3-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 6cb3f6cc36d0..3ce0c960ccc9 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -100,7 +100,7 @@ static struct daemon __daemon = { }; static const char * const daemon_usage[] = { - "perf daemon start []", + "perf daemon {start|signal|stop|ping} []", "perf daemon []", NULL }; From 0cef66a98420f1210be697ed47bd7417ed428847 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 22 Oct 2022 17:27:35 +0800 Subject: [PATCH 0520/4122] perf config: Add missing newline on pr_warning() call in home_perfconfig() Add missing newline on pr_warning() call in home_perfconfig(). Before: # perf record File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it.Couldn't synthesize bpf events. After: # perf record File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it. Couldn't synthesize bpf events. Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221022092735.114967-4-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3f2ae19a1dd4..658170b8dcef 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -556,7 +556,7 @@ static char *home_perfconfig(void) config = strdup(mkpath("%s/.perfconfig", home)); if (config == NULL) { - pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home); return NULL; } @@ -564,7 +564,7 @@ static char *home_perfconfig(void) goto out_free; if (st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, ignoring it.", config); + pr_warning("File %s not owned by current user or root, ignoring it.\n", config); goto out_free; } From 743ef218c2fbe63502615a2044977041ee068322 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:25:18 -0300 Subject: [PATCH 0521/4122] perf unwind arm64: Remove needless event.h & thread.h includes To reduce compile time and header dependency chains just add forward declarations for pointer types and include linux/types.h for u64. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm64-frame-pointer-unwind-support.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h index 32af9ce94398..42d3a45490f5 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.h +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -2,8 +2,10 @@ #ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H #define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H -#include "event.h" -#include "thread.h" +#include + +struct perf_sample; +struct thread; u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); From 6bc13cab5798bd9b049694983ae5702666d24e83 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:32:19 -0300 Subject: [PATCH 0522/4122] perf arch x86: Add missing stdlib.h to get free() prototype It was getting indirectly, out of luck, add it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/event.c | 1 + tools/perf/arch/x86/util/tsc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index e670f3547581..55ff6aec10fd 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "../../../util/event.h" #include "../../../util/synthetic-events.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index eb2b5195bd02..9b99f48b923c 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "../../../util/debug.h" #include "../../../util/tsc.h" From 06bf28cbc63287c69fe834b527127a56b65de2d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:33:04 -0300 Subject: [PATCH 0523/4122] perf scripting python: Add missing util/perf_regs.h include to get perf_reg_name() prototype It was getting it via event.h, that doesn't need that include anymore and will drop it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7bc8559dce6a..1985d1a42a22 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -52,6 +52,7 @@ #include "print_binary.h" #include "stat.h" #include "mem-events.h" +#include "util/perf_regs.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ From b15cf900d11d4db2c2dac544a27f3e1217bdd0d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:34:44 -0300 Subject: [PATCH 0524/4122] perf event: Drop perf_regs.h include, not needed anymore Since commit c897899752478d4c ("perf tools: Prevent out-of-bounds access to registers") the util/event.h header doesn't use anything from util/perf_regs.h, so drop it to untangle the header dependency tree a bit, speeding up compilation. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 12eae6917022..65495f6945b4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -12,8 +12,6 @@ #include #include -#include "perf_regs.h" - struct dso; struct machine; struct perf_event_attr; From ad7ad6b5ddf63b436a5344fb686887f2d8b7cf3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 20 Oct 2022 18:25:09 +0300 Subject: [PATCH 0525/4122] perf scripts python: intel-pt-events.py: Add ability interleave output Intel PT timestamps are not provided for every branch, let alone every instruction, so there can be many samples with the same timestamp. With per-cpu contexts, decoding is done for each CPU in turn, which can make it difficult to see what is happening on different CPUs at the same time. Currently the interleaving from perf script --itrace=i0ns is quite coarse grained. There are often long stretches executing on one CPU and nothing on another. Some people are interested in seeing what happened on multiple CPUs before a crash to debug races etc. To improve perf script interleaving for parallel execution, the intel-pt-events.py script has been enhanced to enable interleaving the output with the same timestamp from different CPUs. It is understood that interleaving is not perfect or causal. Add parameter --interleave [] to interleave sample output for the same timestamp so that no more than n samples for a CPU are displayed in a row. 'n' defaults to 4. Note this only affects the order of output, and only when the timestamp is the same. Example: $ perf script intel-pt-events.py --insn-trace --interleave 3 ... bash 2267/2267 [004] 9323.692625625 563caa3c86f0 jz 0x563caa3c89c7 run_pending_traps+0x30 (/usr/bin/bash) IPC: 1.52 (38/25) bash 2267/2267 [004] 9323.692625625 563caa3c89c7 movq 0x118(%rsp), %rax run_pending_traps+0x307 (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89cf subq %fs:0x28, %rax run_pending_traps+0x30f (/usr/bin/bash) bash 2270/2270 [007] 9323.692625625 55dc58cabf02 jz 0x55dc58cabf48 unquoted_glob_pattern_p+0x102 (/usr/bin/bash) IPC: 1.56 (25/16) bash 2270/2270 [007] 9323.692625625 55dc58cabf04 cmp $0x5d, %al unquoted_glob_pattern_p+0x104 (/usr/bin/bash) bash 2270/2270 [007] 9323.692625625 55dc58cabf06 jnz 0x55dc58cabf10 unquoted_glob_pattern_p+0x106 (/usr/bin/bash) bash 2264/2264 [001] 9323.692625625 7fd556a4376c jbe 0x7fd556a43ac8 round_and_return+0x3fc (/usr/lib/x86_64-linux-gnu/libc.so.6) IPC: 4.30 (43/10) bash 2264/2264 [001] 9323.692625625 7fd556a43772 and $0x8, %edx round_and_return+0x402 (/usr/lib/x86_64-linux-gnu/libc.so.6) bash 2264/2264 [001] 9323.692625625 7fd556a43775 jnz 0x7fd556a43ac8 round_and_return+0x405 (/usr/lib/x86_64-linux-gnu/libc.so.6) bash 2267/2267 [004] 9323.692625625 563caa3c89d8 jnz 0x563caa3c8b11 run_pending_traps+0x318 (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89de add $0x128, %rsp run_pending_traps+0x31e (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89e5 popq %rbx run_pending_traps+0x325 (/usr/bin/bash) ... Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20221020152509.5298-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 12 +++- tools/perf/scripts/python/intel-pt-events.py | 65 +++++++++++++++++++- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 92464a5d7eaf..7b6ccd2fa3bf 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -189,8 +189,16 @@ There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. The script also displays branches, and supports 2 additional modes selected by option: - --insn-trace - instruction trace - --src-trace - source trace + - --insn-trace - instruction trace + - --src-trace - source trace + +The intel-pt-events.py script also has options: + + - --all-switch-events - display all switch events, not only the last consecutive. + - --interleave [] - interleave sample output for the same timestamp so that + no more than n samples for a CPU are displayed in a row. 'n' defaults to 4. + Note this only affects the order of output, and only when the timestamp is the + same. As mentioned above, it is easy to capture too much data. One way to limit the data captured is to use 'snapshot' mode which is explained further below. diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 6be7fd8fd615..08862a2582f4 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -13,10 +13,12 @@ from __future__ import print_function +import io import os import sys import struct import argparse +import contextlib from libxed import LibXED from ctypes import create_string_buffer, addressof @@ -39,6 +41,11 @@ glb_src = False glb_source_file_name = None glb_line_number = None glb_dso = None +glb_stash_dict = {} +glb_output = None +glb_output_pos = 0 +glb_cpu = -1 +glb_time = 0 def get_optional_null(perf_dict, field): if field in perf_dict: @@ -70,6 +77,7 @@ def trace_begin(): ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') ap.add_argument("--all-switch-events", action='store_true') + ap.add_argument("--interleave", type=int, nargs='?', const=4, default=0) global glb_args global glb_insn global glb_src @@ -94,11 +102,39 @@ def trace_begin(): perf_set_itrace_options(perf_script_context, itrace) def trace_end(): + if glb_args.interleave: + flush_stashed_output() print("End") def trace_unhandled(event_name, context, event_fields_dict): print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) +def stash_output(): + global glb_stash_dict + global glb_output_pos + output_str = glb_output.getvalue()[glb_output_pos:] + n = len(output_str) + if n: + glb_output_pos += n + if glb_cpu not in glb_stash_dict: + glb_stash_dict[glb_cpu] = [] + glb_stash_dict[glb_cpu].append(output_str) + +def flush_stashed_output(): + global glb_stash_dict + while glb_stash_dict: + cpus = list(glb_stash_dict.keys()) + # Output at most glb_args.interleave output strings per cpu + for cpu in cpus: + items = glb_stash_dict[cpu] + countdown = glb_args.interleave + while len(items) and countdown: + sys.stdout.write(items[0]) + del items[0] + countdown -= 1 + if not items: + del glb_stash_dict[cpu] + def print_ptwrite(raw_buf): data = struct.unpack_from("= 2 and x[0]: machine_pid = x[0] vcpu = x[1] @@ -403,6 +464,8 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): + if glb_args.interleave: + flush_stashed_output() if out: out_str = "Switch out " else: From 439dbef2a94e825241160e1bbd050d01e5728608 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:36 -0700 Subject: [PATCH 0526/4122] perf test: Do not use instructions:u explicitly I think it's to support non-root user tests. But perf record can handle the case and fall back to a software event (cpu-clock). Practically this would affect when it's run on a VM, but it seems no reason to prevent running the test in the guest. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 301f95427159..747c33a1ec45 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -21,18 +21,18 @@ trap trap_cleanup exit term int test_per_thread() { echo "Basic --per-thread mode test" - if ! perf record -e instructions:u -o ${perfdata} --quiet true 2> /dev/null + if ! perf record -o /dev/null --quiet true 2> /dev/null then - echo "Per-thread record [Skipped instructions:u not supported]" + echo "Per-thread record [Skipped event not supported]" if [ $err -ne 1 ] then err=2 fi return fi - if ! perf record -e instructions:u --per-thread -o ${perfdata} true 2> /dev/null + if ! perf record --per-thread -o ${perfdata} true 2> /dev/null then - echo "Per-thread record of instructions:u [Failed]" + echo "Per-thread record [Failed record]" err=1 return fi @@ -49,7 +49,7 @@ test_register_capture() { echo "Register capture test" if ! perf list | egrep -q 'br_inst_retired.near_call' then - echo "Register capture test [Skipped missing instruction]" + echo "Register capture test [Skipped missing event]" if [ $err -ne 1 ] then err=2 From 9e455f4f29e3aeab29b6c1f7d086ddc43fc1a502 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:37 -0700 Subject: [PATCH 0527/4122] perf test: Fix shellcheck issues in the record test Basically there are 3 issues: 1. quote shell expansion 2. do not use egrep 3. use upper case letters for signal names Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 747c33a1ec45..464071462809 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -8,16 +8,16 @@ err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) cleanup() { - rm -f ${perfdata} - rm -f ${perfdata}.old - trap - exit term int + rm -f "${perfdata}" + rm -f "${perfdata}".old + trap - EXIT TERM INT } trap_cleanup() { cleanup exit 1 } -trap trap_cleanup exit term int +trap trap_cleanup EXIT TERM INT test_per_thread() { echo "Basic --per-thread mode test" @@ -30,13 +30,13 @@ test_per_thread() { fi return fi - if ! perf record --per-thread -o ${perfdata} true 2> /dev/null + if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null then echo "Per-thread record [Failed record]" err=1 return fi - if ! perf report -i ${perfdata} -q | egrep -q true + if ! perf report -i "${perfdata}" -q | grep -q true then echo "Per-thread record [Failed missing output]" err=1 @@ -47,7 +47,7 @@ test_per_thread() { test_register_capture() { echo "Register capture test" - if ! perf list | egrep -q 'br_inst_retired.near_call' + if ! perf list | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" if [ $err -ne 1 ] @@ -56,7 +56,7 @@ test_register_capture() { fi return fi - if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' + if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' then echo "Register capture test [Skipped missing registers]" return @@ -64,7 +64,7 @@ test_register_capture() { if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ -c 1000 --per-thread true 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ - | egrep -q "DI:" + | grep -q "DI:" then echo "Register capture test [Failed missing output]" err=1 From 4321ad4ee98b7325d6133e1d5b7fa25bcbdeb57e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:38 -0700 Subject: [PATCH 0528/4122] perf test: Use a test program in 'perf record' tests If the system has cc it could build a test program with two threads and then use it for more detailed testing. Also it accepts an option to run a thread forever to ensure multi-thread runs. If cc is not found, it falls back to use the default value 'true'. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 64 ++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 464071462809..952981481239 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -6,10 +6,17 @@ set -e err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) +testsym="test_loop" cleanup() { rm -f "${perfdata}" rm -f "${perfdata}".old + + if [ "${testprog}" != "true" ]; then + rm -f "${testprog}" + fi + trap - EXIT TERM INT } @@ -19,9 +26,56 @@ trap_cleanup() { } trap trap_cleanup EXIT TERM INT +build_test_program() { + if ! [ -x "$(command -v cc)" ]; then + # No CC found. Fall back to 'true' + testprog=true + testsym=true + return + fi + + echo "Build a test program" + cat < +#include +#include + +void test_loop(void) { + volatile int count = 1000000; + + while (count--) + continue; +} + +void *thfunc(void *arg) { + int forever = *(int *)arg; + + do { + test_loop(); + } while (forever); + + return NULL; +} + +int main(int argc, char *argv[]) { + pthread_t th; + int forever = 0; + + if (argc > 1) + forever = atoi(argv[1]); + + pthread_create(&th, NULL, thfunc, &forever); + test_loop(); + pthread_join(th, NULL); + + return 0; +} +EOF +} + test_per_thread() { echo "Basic --per-thread mode test" - if ! perf record -o /dev/null --quiet true 2> /dev/null + if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null then echo "Per-thread record [Skipped event not supported]" if [ $err -ne 1 ] @@ -30,13 +84,13 @@ test_per_thread() { fi return fi - if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null + if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null then echo "Per-thread record [Failed record]" err=1 return fi - if ! perf report -i "${perfdata}" -q | grep -q true + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" then echo "Per-thread record [Failed missing output]" err=1 @@ -62,7 +116,7 @@ test_register_capture() { return fi if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ - -c 1000 --per-thread true 2> /dev/null \ + -c 1000 --per-thread ${testprog} 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | grep -q "DI:" then @@ -73,6 +127,8 @@ test_register_capture() { echo "Register capture test [Success]" } +build_test_program + test_per_thread test_register_capture From 6b7e02ab1262141cfebd05b68410fa297f557961 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:39 -0700 Subject: [PATCH 0529/4122] perf test: Wait for a new thread when testing --per-thread record Just running the target program is not enough to test multi-thread target because it'd be racy perf vs target startup. I used the initial delay but it cannot guarantee for perf to see the thread. Instead, use wait_for_threads helper from shell/lib/waiting.sh to make sure it starts the sibling thread first. Then perf record can use -p option to profile the target process. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 952981481239..d1640d1daf2e 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -4,6 +4,9 @@ set -e +shelldir=$(dirname "$0") +. "${shelldir}"/lib/waiting.sh + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) @@ -96,6 +99,30 @@ test_per_thread() { err=1 return fi + + # run the test program in background (forever) + ${testprog} 1 & + TESTPID=$! + + rm -f "${perfdata}" + + wait_for_threads ${TESTPID} 2 + perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null + kill ${TESTPID} + + if [ ! -e "${perfdata}" ] + then + echo "Per-thread record [Failed record -p]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Per-thread record [Failed -p missing output]" + err=1 + return + fi + echo "Basic --per-thread mode test [Success]" } From 2cadf2c7b99a980455500c34571a540300c49c72 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:40 -0700 Subject: [PATCH 0530/4122] perf test: Add system-wide mode in 'perf record' tests Add system wide recording test with the same pattern. It'd skip the test when it fails to run 'perf record'. For system-wide mode, it needs to avoid build-id collection and synthesis because the test only cares about the test program and kernel would generate the necessary events as the process starts. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index d1640d1daf2e..345764afb745 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -154,10 +154,31 @@ test_register_capture() { echo "Register capture test [Success]" } +test_system_wide() { + echo "Basic --system-wide mode test" + if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Skipped not supported]" + if [ $err -ne 1 ] + then + err=2 + fi + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed missing output]" + err=1 + return + fi + echo "Basic --system-wide mode test [Success]" +} + build_test_program test_per_thread test_register_capture +test_system_wide cleanup exit $err From c8c935677487ba6f7dd18d48e39ab30bbb4cb5d9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:41 -0700 Subject: [PATCH 0531/4122] perf test: Add target workload test in 'perf record' tests Add a subtest which profiles the given workload on the command line. As it's a minimal requirement, the test should run ok so it doesn't skip the test even if it failed to run the 'perf record' command. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 345764afb745..c59d1459c960 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -174,11 +174,29 @@ test_system_wide() { echo "Basic --system-wide mode test [Success]" } +test_workload() { + echo "Basic target workload test" + if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + err=1 + return + fi + echo "Basic target workload test [Success]" +} + build_test_program test_per_thread test_register_capture test_system_wide +test_workload cleanup exit $err From 7f4ed3f0b1fc951bcb91a78bf68ac45aac13ec0b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:42 -0700 Subject: [PATCH 0532/4122] perf test: Test record with --threads option The --threads option changed the 'perf record' behavior significantly, so it'd be nice if we test it separately. Add --threads options with different argument in each test supported and check the result. Also update the cleanup routine because threads recording produces data in a directory. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index c59d1459c960..01aa9531b369 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -13,8 +13,8 @@ testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) testsym="test_loop" cleanup() { - rm -f "${perfdata}" - rm -f "${perfdata}".old + rm -rf "${perfdata}" + rm -rf "${perfdata}".old if [ "${testprog}" != "true" ]; then rm -f "${testprog}" @@ -171,6 +171,19 @@ test_system_wide() { err=1 return fi + if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed --threads missing output]" + err=1 + return + fi echo "Basic --system-wide mode test [Success]" } @@ -188,6 +201,19 @@ test_workload() { err=1 return fi + if ! perf record -e cpu-clock,cs --threads=package \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed --threads missing output]" + err=1 + return + fi echo "Basic target workload test [Success]" } From 8b380e6afd124d18cd51a43d2505e4eb05e2ba09 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:43 -0700 Subject: [PATCH 0533/4122] perf test: Do not set TEST_SKIP for record subtests It now has 4 sub tests and at least one of them should run. But once the TEST_SKIP (= 2) return value is set, it won't be overwritten unless there's a failure. I think we should return success when one or more tests are skipped but the remaining subtests are passed. So update the test code not to set the err variable when it skips the test. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 01aa9531b369..e93b3a8871fe 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -81,10 +81,6 @@ test_per_thread() { if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null then echo "Per-thread record [Skipped event not supported]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null @@ -131,10 +127,6 @@ test_register_capture() { if ! perf list | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' @@ -159,10 +151,6 @@ test_system_wide() { if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null then echo "System-wide record [Skipped not supported]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" From 65319890c32db29fb56b41f84265a2c7029943f4 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 18 Oct 2022 10:41:35 +0100 Subject: [PATCH 0534/4122] perf tools: Fix "kernel lock contention analysis" test by not printing warnings in quiet mode Especially when CONFIG_LOCKDEP and other debug configs are enabled, Perf can print the following warning when running the "kernel lock contention analysis" test: Warning: Processed 1378918 events and lost 4 chunks! Check IO/CPU overload! Warning: Processed 4593325 samples and lost 70.00%! The test already supplies -q to run in quiet mode, so extend quiet mode to perf_stdio__warning() and also ui__warning() for consistency. This fixes the following failure due to the extra lines counted: perf test "lock cont" -vvv 82: kernel lock contention analysis test : --- start --- test child forked, pid 3125 Testing perf lock record and perf lock contention [Fail] Recorded result count is not 1: 9 test child finished with -1 ---- end ---- kernel lock contention analysis test: FAILED! Fixes: ec685de25b6718f8 ("perf test: Add kernel lock contention test") Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221018094137.783081-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/util.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index 689b27c34246..1d38ddf01b60 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -15,6 +15,9 @@ static int perf_stdio__error(const char *format, va_list args) static int perf_stdio__warning(const char *format, va_list args) { + if (quiet) + return 0; + fprintf(stderr, "Warning:\n"); vfprintf(stderr, format, args); return 0; @@ -45,6 +48,8 @@ int ui__warning(const char *format, ...) { int ret; va_list args; + if (quiet) + return 0; va_start(args, format); ret = perf_eops->warning(format, args); From a527c2c1e2d43e9f145f5d0c5d6ac0bdf5220e22 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 18 Oct 2022 10:41:36 +0100 Subject: [PATCH 0535/4122] perf tools: Make quiet mode consistent between tools Use the global quiet variable everywhere so that all tools hide warnings in quiet mode and update the documentation to reflect this. 'perf probe' claimed that errors are not printed in quiet mode but I don't see this so remove it from the docs. Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221018094137.783081-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 2 +- tools/perf/Documentation/perf-diff.txt | 2 +- tools/perf/Documentation/perf-lock.txt | 2 +- tools/perf/Documentation/perf-probe.txt | 2 +- tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-stat.txt | 4 ++-- tools/perf/bench/numa.c | 9 +++++---- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-lock.c | 2 +- tools/perf/builtin-probe.c | 7 +++---- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 8 ++++---- tools/perf/util/stat.h | 1 - 16 files changed, 25 insertions(+), 26 deletions(-) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 18fcc52809fb..980fe2c29275 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -41,7 +41,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index be65bd55ab2a..f3067a4af294 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -f:: --force:: diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 3b1e16563b79..4958a1ffa1cc 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,7 +42,7 @@ COMMON OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -D:: --dump-raw-trace:: diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 080981d38d7b..7f8e8ba3a787 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -57,7 +57,7 @@ OPTIONS -q:: --quiet:: - Be quiet (do not show any messages including errors). + Do not show any warnings or messages. Can not use with -v. -a:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e41ae950fdc3..9ea6d44aca58 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -282,7 +282,7 @@ OPTIONS -q:: --quiet:: - Don't print any message, useful for scripting. + Don't print any warnings or messages, useful for scripting. -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4533db2ee56b..4fa509b15948 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -27,7 +27,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index d7ff1867feda..18abdc1dce05 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -354,8 +354,8 @@ forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. --quiet:: -Don't print output. This is useful with perf stat record below to only -write data to the perf.data file. +Don't print output, warnings or messages. This is useful with perf stat +record below to only write data to the perf.data file. STAT RECORD ----------- diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index e78dedf9e682..9717c6c17433 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,6 @@ struct params { long bytes_thread; int nr_tasks; - bool show_quiet; bool show_convergence; bool measure_convergence; @@ -197,7 +197,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), + OPT_BOOLEAN('q', "quiet" , &quiet, + "quiet mode (do not show any warnings or messages)"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -1474,7 +1475,7 @@ static int init(void) /* char array in count_process_nodes(): */ BUG_ON(g->p.nr_nodes < 0); - if (g->p.show_quiet && !g->p.show_details) + if (quiet && !g->p.show_details) g->p.show_details = -1; /* Some memory should be specified: */ @@ -1553,7 +1554,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (!g->p.show_quiet) + if (!quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f839e69492e8..517d928c00e3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -525,7 +525,7 @@ int cmd_annotate(int argc, const char **argv) OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), #ifdef HAVE_GTK2_SUPPORT diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d925096dd7f0..ed07cc6cca56 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1260,7 +1260,7 @@ static const char * const diff_usage[] = { static const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 9722d4ab2e55..66520712a167 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1869,7 +1869,7 @@ int cmd_lock(int argc, const char **argv) "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_END() }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index f62298f5db3b..2ae50fc9e597 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -40,7 +40,6 @@ static struct { int command; /* Command short_name */ bool list_events; bool uprobes; - bool quiet; bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; @@ -514,8 +513,8 @@ __cmd_probe(int argc, const char **argv) struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), - OPT_BOOLEAN('q', "quiet", ¶ms.quiet, - "be quiet (do not show any messages)"), + OPT_BOOLEAN('q', "quiet", &quiet, + "be quiet (do not show any warnings or messages)"), OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT", "list up probe events", opt_set_filter_with_command, DEFAULT_LIST_FILTER), @@ -634,7 +633,7 @@ __cmd_probe(int argc, const char **argv) if (ret) return ret; - if (params.quiet) { + if (quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); return -EINVAL; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e128b855ddde..59f3d98a0196 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3388,7 +3388,7 @@ static struct option __record_options[] = { &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8361890176c2..b6d77d3da64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1222,7 +1222,7 @@ int cmd_report(int argc, const char **argv) "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9d35a3338976..e52601a54b26 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1033,7 +1033,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; - if (stat_config.quiet) + if (quiet) return; evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); @@ -1283,8 +1283,8 @@ static struct option stat_options[] = { "print summary for interval mode"), OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, "don't print 'summary' for CSV summary output"), - OPT_BOOLEAN(0, "quiet", &stat_config.quiet, - "don't print output (useful with record)"), + OPT_BOOLEAN(0, "quiet", &quiet, + "don't print any output, messages or warnings (useful with record)"), OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", "Only enable events on applying cpu with this type " "for hybrid platform (e.g. core or atom)", @@ -2383,7 +2383,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output && !stat_config.quiet) { + if (!output && !quiet) { struct timespec tm; mode = append_file ? "a" : "w"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 382a1ab92ce1..499c3bf81333 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -160,7 +160,6 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool stop_read_counter; - bool quiet; bool iostat_run; char *user_requested_cpu_list; bool system_wide; From cff624146450bd25a1acea0439b7654167e0f722 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 13:50:17 -0300 Subject: [PATCH 0536/4122] perf bpf: No need to include compiler.h when HAVE_LIBBPF_SUPPORT is true Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_map.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h index d6abd5e47af8..c2f7c13cba23 100644 --- a/tools/perf/util/bpf_map.h +++ b/tools/perf/util/bpf_map.h @@ -3,7 +3,6 @@ #define __PERF_BPF_MAP_H 1 #include -#include struct bpf_map; #ifdef HAVE_LIBBPF_SUPPORT @@ -12,6 +11,8 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp); #else +#include + static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused) { return 0; From 5e9c68ea777594a2d63fa44c0509782e90821707 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 12 Oct 2022 01:18:40 +0200 Subject: [PATCH 0537/4122] RISC-V: Cache SBI vendor values sbi_get_mvendorid(), sbi_get_marchid() and sbi_get_mimpid() might get called multiple times, though the values of these CSRs should not change during the runtime of a specific machine. Though the values can be different depending on which hart of the system they get called. So hook into the newly introduced cpuinfo struct to allow retrieving these cached values via new functions. Also use arch_initcall for the cpuinfo setup instead, as that now clearly is "architecture specific initialization" and also makes these information available slightly earlier. [caching vendor ids] Suggested-by: Atish Patra [using cpuinfo struct as cache] Suggested-by: Anup Patel Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/ Signed-off-by: Heiko Stuebner Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 5 +++++ arch/riscv/kernel/cpu.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2a0ef738695e..4ca7fbacff42 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -327,4 +327,9 @@ int sbi_err_map_linux_errno(int err); static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ + +unsigned long riscv_cached_mvendorid(unsigned int cpu_id); +unsigned long riscv_cached_marchid(unsigned int cpu_id); +unsigned long riscv_cached_mimpid(unsigned int cpu_id); + #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index fa427bdcf773..bf9dd6764bad 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -70,8 +70,6 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } -#ifdef CONFIG_PROC_FS - struct riscv_cpuinfo { unsigned long mvendorid; unsigned long marchid; @@ -79,6 +77,30 @@ struct riscv_cpuinfo { }; static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +unsigned long riscv_cached_mvendorid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mvendorid; +} +EXPORT_SYMBOL(riscv_cached_mvendorid); + +unsigned long riscv_cached_marchid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->marchid; +} +EXPORT_SYMBOL(riscv_cached_marchid); + +unsigned long riscv_cached_mimpid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mimpid; +} +EXPORT_SYMBOL(riscv_cached_mimpid); + static int riscv_cpuinfo_starting(unsigned int cpu) { struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); @@ -113,7 +135,9 @@ static int __init riscv_cpuinfo_init(void) return 0; } -device_initcall(riscv_cpuinfo_init); +arch_initcall(riscv_cpuinfo_init); + +#ifdef CONFIG_PROC_FS #define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ { \ From 65e9fb081877a18c432c6ff344937b7277c044b5 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 12 Oct 2022 01:18:41 +0200 Subject: [PATCH 0538/4122] drivers/perf: riscv_pmu_sbi: add support for PMU variant on T-Head C9xx cores With the T-HEAD C9XX cores being designed before or during the ratification to the SSCOFPMF extension, it implements functionality very similar but not equal to it. It implements overflow handling and also some privilege-mode filtering. While SSCOFPMF supports this for all modes, the C9XX only implements the filtering for M-mode and S-mode but not user-mode. So add some adaptions to allow the C9XX to still handle its PMU through the regular SBI PMU interface instead of defining new interfaces or drivers. To work properly, this requires a matching change in SBI, though the actual interface between kernel and SBI does not change. The main differences are a the overflow CSR and irq number. As the reading of the overflow-csr is in the hot-path during irq handling, use an errata and alternatives to not introduce new conditionals there. Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/ Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.erratas | 13 +++++++++++ arch/riscv/errata/thead/errata.c | 19 ++++++++++++++++ arch/riscv/include/asm/errata_list.h | 16 ++++++++++++- drivers/perf/riscv_pmu_sbi.c | 34 ++++++++++++++++++++-------- 4 files changed, 71 insertions(+), 11 deletions(-) diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index f3623df23b5f..69621ae6d647 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -66,4 +66,17 @@ config ERRATA_THEAD_CMO If you don't know what to do here, say "Y". +config ERRATA_THEAD_PMU + bool "Apply T-Head PMU errata" + depends on ERRATA_THEAD && RISCV_PMU_SBI + default y + help + The T-Head C9xx cores implement a PMU overflow extension very + similar to the core SSCOFPMF extension. + + This will apply the overflow errata to handle the non-standard + behaviour via the regular SBI PMU driver and interface. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 21546937db39..fac5742d1c1e 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -47,6 +47,22 @@ static bool errata_probe_cmo(unsigned int stage, return true; } +static bool errata_probe_pmu(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PMU)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -58,6 +74,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_cmo(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + if (errata_probe_pmu(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 19a771085781..4180312d2a70 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -6,6 +6,7 @@ #define ASM_ERRATA_LIST_H #include +#include #include #ifdef CONFIG_ERRATA_SIFIVE @@ -17,7 +18,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_PMU 2 +#define ERRATA_THEAD_NUMBER 3 #endif #define CPUFEATURE_SVPBMT 0 @@ -142,6 +144,18 @@ asm volatile(ALTERNATIVE_2( \ "r"((unsigned long)(_start) + (_size)) \ : "a0") +#define THEAD_C9XX_RV_IRQ_PMU 17 +#define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 + +#define ALT_SBI_PMU_OVERFLOW(__ovl) \ +asm volatile(ALTERNATIVE( \ + "csrr %0, " __stringify(CSR_SSCOUNTOVF), \ + "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ + THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ + CONFIG_ERRATA_THEAD_PMU) \ + : "=r" (__ovl) : \ + : "memory") + #endif /* __ASSEMBLY__ */ #endif diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 3852c18362f5..f6507efe2a58 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -47,6 +48,8 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = { * per_cpu in case of harts with different pmu counters */ static union sbi_pmu_ctr_info *pmu_ctr_list; +static bool riscv_pmu_use_irq; +static unsigned int riscv_pmu_irq_num; static unsigned int riscv_pmu_irq; struct sbi_pmu_event_data { @@ -580,7 +583,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); event = cpu_hw_evt->events[fidx]; if (!event) { - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); return IRQ_NONE; } @@ -588,13 +591,13 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) pmu_sbi_stop_hw_ctrs(pmu); /* Overflow status register should only be read after counter are stopped */ - overflow = csr_read(CSR_SSCOUNTOVF); + ALT_SBI_PMU_OVERFLOW(overflow); /* * Overflow interrupt pending bit should only be cleared after stopping * all the counters to avoid any race condition. */ - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); /* No overflow bit is set */ if (!overflow) @@ -661,10 +664,10 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) /* Stop all the counters so that they can be enabled from perf */ pmu_sbi_stop_all(pmu); - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { cpu_hw_evt->irq = riscv_pmu_irq; - csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); - csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IP, BIT(riscv_pmu_irq_num)); + csr_set(CSR_IE, BIT(riscv_pmu_irq_num)); enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); } @@ -673,9 +676,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { disable_percpu_irq(riscv_pmu_irq); - csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IE, BIT(riscv_pmu_irq_num)); } /* Disable all counters access for user mode now */ @@ -691,7 +694,18 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde struct device_node *cpu, *child; struct irq_domain *domain = NULL; - if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + riscv_pmu_irq_num = RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) && + riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && + riscv_cached_marchid(0) == 0 && + riscv_cached_mimpid(0) == 0) { + riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } + + if (!riscv_pmu_use_irq) return -EOPNOTSUPP; for_each_of_cpu_node(cpu) { @@ -713,7 +727,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return -ENODEV; } - riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num); if (!riscv_pmu_irq) { pr_err("Failed to map PMU interrupt for node\n"); return -ENODEV; From 2348e6bf44213c5f447ff698e43c089185241ed7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 18 Oct 2022 22:12:00 +0800 Subject: [PATCH 0539/4122] riscv: remove special treatment for the link order of head.o arch/riscv/kernel/head.o does not need any special treatment - the only requirement is the ".head.text" section must be placed before the normal ".text" section. The linker script does the right thing to do. The build system does not need to manipulate the link order of head.o. Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221018141200.1040-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- scripts/head-object-list.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index b16326a92c45..105ea7ac4751 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -39,7 +39,6 @@ arch/powerpc/kernel/entry_64.o arch/powerpc/kernel/fpu.o arch/powerpc/kernel/vector.o arch/powerpc/kernel/prom_init.o -arch/riscv/kernel/head.o arch/s390/kernel/head64.o arch/sh/kernel/head_32.o arch/sparc/kernel/head_32.o From 28fc4e9077ce59ab28c89c20dc6be5154473218f Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 18 Oct 2022 10:45:32 +0800 Subject: [PATCH 0540/4122] f2fs: Fix the race condition of resize flag between resizefs Because the set/clear SBI_IS_RESIZEFS flag not between any locks, In the following case: thread1 thread2 ->ioctl(resizefs) ->set RESIZEFS flag ->ioctl(resizefs) ... ->set RESIZEFS flag ->clear RESIZEFS flag ->resizefs stream # No RESIZEFS flag in the stream Also before freeze_super, the resizefs not started, we should not set the SBI_IS_RESIZEFS flag. So move the set/clear SBI_IS_RESIZEFS flag between the cp_mutex and gc_lock. Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Signed-off-by: Zhang Xiaoxu Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index dab794225cce..7b4be412cec0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2134,8 +2134,6 @@ out_unlock: if (err) return err; - set_sbi_flag(sbi, SBI_IS_RESIZEFS); - freeze_super(sbi->sb); f2fs_down_write(&sbi->gc_lock); f2fs_down_write(&sbi->cp_global_sem); @@ -2151,6 +2149,7 @@ out_unlock: if (err) goto out_err; + set_sbi_flag(sbi, SBI_IS_RESIZEFS); err = free_segment_range(sbi, secs, false); if (err) goto recover_out; @@ -2174,6 +2173,7 @@ out_unlock: f2fs_commit_super(sbi, false); } recover_out: + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); @@ -2186,6 +2186,5 @@ out_err: f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); - clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; } From 299c481fa5c121f892420d97f1123a853b7f1079 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:39 +0000 Subject: [PATCH 0541/4122] crypto: rockchip - use dev_err for error message about interrupt Interrupt is mandatory so the message should be printed as error. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 35d73061d156..45cc5f766788 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -371,8 +371,7 @@ static int rk_crypto_probe(struct platform_device *pdev) crypto_info->irq = platform_get_irq(pdev, 0); if (crypto_info->irq < 0) { - dev_warn(crypto_info->dev, - "control Interrupt is not available.\n"); + dev_err(&pdev->dev, "control Interrupt is not available.\n"); err = crypto_info->irq; goto err_crypto; } From 8ccd9c8cd1d1618f5e073c86ffcfe15f292eefe6 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:40 +0000 Subject: [PATCH 0542/4122] crypto: rockchip - do not use uninitialized variable crypto_info->dev is not yet set, so use pdev->dev instead. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 45cc5f766788..21d3f1458584 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -381,7 +381,7 @@ static int rk_crypto_probe(struct platform_device *pdev) "rk-crypto", pdev); if (err) { - dev_err(crypto_info->dev, "irq request failed.\n"); + dev_err(&pdev->dev, "irq request failed.\n"); goto err_crypto; } From c50ef1411c8cbad0c7db100c477126076b6e3348 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:41 +0000 Subject: [PATCH 0543/4122] crypto: rockchip - do not do custom power management The clock enable/disable at tfm init/exit is fragile, if 2 tfm are init in the same time and one is removed just after, it will leave the hardware uncloked even if a user remains. Instead simply enable clocks at probe time. We will do PM later. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 4 ++-- drivers/crypto/rockchip/rk3288_crypto.h | 2 -- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 3 +-- drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 5 +++-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 21d3f1458584..4cff49b82983 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -394,8 +394,7 @@ static int rk_crypto_probe(struct platform_device *pdev) rk_crypto_done_task_cb, (unsigned long)crypto_info); crypto_init_queue(&crypto_info->queue, 50); - crypto_info->enable_clk = rk_crypto_enable_clk; - crypto_info->disable_clk = rk_crypto_disable_clk; + rk_crypto_enable_clk(crypto_info); crypto_info->load_data = rk_load_data; crypto_info->unload_data = rk_unload_data; crypto_info->enqueue = rk_crypto_enqueue; @@ -422,6 +421,7 @@ static int rk_crypto_remove(struct platform_device *pdev) struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); rk_crypto_unregister(); + rk_crypto_disable_clk(crypto_tmp); tasklet_kill(&crypto_tmp->done_task); tasklet_kill(&crypto_tmp->queue_task); return 0; diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 97278c2574ff..2fa7131e4060 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -220,8 +220,6 @@ struct rk_crypto_info { int (*start)(struct rk_crypto_info *dev); int (*update)(struct rk_crypto_info *dev); void (*complete)(struct crypto_async_request *base, int err); - int (*enable_clk)(struct rk_crypto_info *dev); - void (*disable_clk)(struct rk_crypto_info *dev); int (*load_data)(struct rk_crypto_info *dev, struct scatterlist *sg_src, struct scatterlist *sg_dst); diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index ed03058497bc..49017d1fb510 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -301,7 +301,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) sizeof(struct rk_ahash_rctx) + crypto_ahash_reqsize(tctx->fallback_tfm)); - return tctx->dev->enable_clk(tctx->dev); + return 0; } static void rk_cra_hash_exit(struct crypto_tfm *tfm) @@ -309,7 +309,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); free_page((unsigned long)tctx->dev->addr_vir); - return tctx->dev->disable_clk(tctx->dev); } struct rk_crypto_tmp rk_ahash_sha1 = { diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 5bbf0d2722e1..8c44a19eab75 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -388,8 +388,10 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) ctx->dev->update = rk_ablk_rx; ctx->dev->complete = rk_crypto_complete; ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL); + if (!ctx->dev->addr_vir) + return -ENOMEM; - return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM; + return 0; } static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) @@ -397,7 +399,6 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); free_page((unsigned long)ctx->dev->addr_vir); - ctx->dev->disable_clk(ctx->dev); } struct rk_crypto_tmp rk_ecb_aes_alg = { From 6d11c9387865723fd779be00ae37a4588e60133d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:42 +0000 Subject: [PATCH 0544/4122] crypto: rockchip - fix privete/private typo This fix a simple typo on private word. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 2fa7131e4060..656d6795d400 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -235,7 +235,7 @@ struct rk_ahash_ctx { struct crypto_ahash *fallback_tfm; }; -/* the privete variable of hash for fallback */ +/* the private variable of hash for fallback */ struct rk_ahash_rctx { struct ahash_request fallback_req; u32 mode; From 87e356c4966444866186f68f05832fdcc0f351a3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:43 +0000 Subject: [PATCH 0545/4122] crypto: rockchip - do not store mode globally Storing the mode globally does not work if 2 requests are handled in the same time. We should store it in a request context. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.h | 5 +- .../crypto/rockchip/rk3288_crypto_skcipher.c | 58 ++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 656d6795d400..c919d9a43a08 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -245,10 +245,13 @@ struct rk_ahash_rctx { struct rk_cipher_ctx { struct rk_crypto_info *dev; unsigned int keylen; - u32 mode; u8 iv[AES_BLOCK_SIZE]; }; +struct rk_cipher_rctx { + u32 mode; +}; + enum alg_type { ALG_TYPE_HASH, ALG_TYPE_CIPHER, diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 8c44a19eab75..bbd0bf52bf07 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -76,9 +76,10 @@ static int rk_aes_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_AES_ECB_MODE; + rctx->mode = RK_CRYPTO_AES_ECB_MODE; return rk_handle_req(dev, req); } @@ -86,9 +87,10 @@ static int rk_aes_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; + rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -96,9 +98,10 @@ static int rk_aes_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_AES_CBC_MODE; + rctx->mode = RK_CRYPTO_AES_CBC_MODE; return rk_handle_req(dev, req); } @@ -106,9 +109,10 @@ static int rk_aes_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; + rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -116,9 +120,10 @@ static int rk_des_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = 0; + rctx->mode = 0; return rk_handle_req(dev, req); } @@ -126,9 +131,10 @@ static int rk_des_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_DEC; + rctx->mode = RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -136,9 +142,10 @@ static int rk_des_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; + rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; return rk_handle_req(dev, req); } @@ -146,9 +153,10 @@ static int rk_des_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; + rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -156,9 +164,10 @@ static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_SELECT; + rctx->mode = RK_CRYPTO_TDES_SELECT; return rk_handle_req(dev, req); } @@ -166,9 +175,10 @@ static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -176,9 +186,10 @@ static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; return rk_handle_req(dev, req); } @@ -186,9 +197,10 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *dev = ctx->dev; - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -199,6 +211,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) skcipher_request_cast(dev->async_req); struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); u32 ivsize, block, conf_reg = 0; @@ -206,22 +219,22 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) ivsize = crypto_skcipher_ivsize(cipher); if (block == DES_BLOCK_SIZE) { - ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | + rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | RK_CRYPTO_TDES_BYTESWAP_KEY | RK_CRYPTO_TDES_BYTESWAP_IV; - CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode); + CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize); conf_reg = RK_CRYPTO_DESSEL; } else { - ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | + rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | RK_CRYPTO_AES_KEY_CHANGE | RK_CRYPTO_AES_BYTESWAP_KEY | RK_CRYPTO_AES_BYTESWAP_IV; if (ctx->keylen == AES_KEYSIZE_192) - ctx->mode |= RK_CRYPTO_AES_192BIT_key; + rctx->mode |= RK_CRYPTO_AES_192BIT_key; else if (ctx->keylen == AES_KEYSIZE_256) - ctx->mode |= RK_CRYPTO_AES_256BIT_key; - CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode); + rctx->mode |= RK_CRYPTO_AES_256BIT_key; + CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | @@ -246,6 +259,7 @@ static int rk_set_data_start(struct rk_crypto_info *dev) struct skcipher_request *req = skcipher_request_cast(dev->async_req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); u32 ivsize = crypto_skcipher_ivsize(tfm); u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + @@ -254,7 +268,7 @@ static int rk_set_data_start(struct rk_crypto_info *dev) /* Store the iv that need to be updated in chain mode. * And update the IV buffer to contain the next IV for decryption mode. */ - if (ctx->mode & RK_CRYPTO_DEC) { + if (rctx->mode & RK_CRYPTO_DEC) { memcpy(ctx->iv, src_last_blk, ivsize); sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv, ivsize, dev->total - ivsize); @@ -294,11 +308,12 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) struct skcipher_request *req = skcipher_request_cast(dev->async_req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); u32 ivsize = crypto_skcipher_ivsize(tfm); /* Update the IV buffer to contain the next IV for encryption mode. */ - if (!(ctx->mode & RK_CRYPTO_DEC)) { + if (!(rctx->mode & RK_CRYPTO_DEC)) { if (dev->aligned) { memcpy(req->iv, sg_virt(dev->sg_dst) + dev->sg_dst->length - ivsize, ivsize); @@ -314,11 +329,12 @@ static void rk_update_iv(struct rk_crypto_info *dev) struct skcipher_request *req = skcipher_request_cast(dev->async_req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); u32 ivsize = crypto_skcipher_ivsize(tfm); u8 *new_iv = NULL; - if (ctx->mode & RK_CRYPTO_DEC) { + if (rctx->mode & RK_CRYPTO_DEC) { new_iv = ctx->iv; } else { new_iv = page_address(sg_page(dev->sg_dst)) + From 68ef8af09a1a912a5ed2cfaa4cca7606f52cef90 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:44 +0000 Subject: [PATCH 0546/4122] crypto: rockchip - add fallback for cipher The hardware does not handle 0 size length request, let's add a fallback. Furthermore fallback will be used for all unaligned case the hardware cannot handle. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 4 + drivers/crypto/rockchip/rk3288_crypto.h | 2 + .../crypto/rockchip/rk3288_crypto_skcipher.c | 97 ++++++++++++++++--- 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 55e75fbb658e..113b35f69598 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -669,6 +669,10 @@ config CRYPTO_DEV_IMGTEC_HASH config CRYPTO_DEV_ROCKCHIP tristate "Rockchip's Cryptographic Engine driver" depends on OF && ARCH_ROCKCHIP + depends on PM + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_DES select CRYPTO_AES select CRYPTO_LIB_DES select CRYPTO_MD5 diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index c919d9a43a08..8b1e15d8ddc6 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -246,10 +246,12 @@ struct rk_cipher_ctx { struct rk_crypto_info *dev; unsigned int keylen; u8 iv[AES_BLOCK_SIZE]; + struct crypto_skcipher *fallback_tfm; }; struct rk_cipher_rctx { u32 mode; + struct skcipher_request fallback_req; // keep at the end }; enum alg_type { diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index bbd0bf52bf07..eac5bba66e25 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -13,6 +13,63 @@ #define RK_CRYPTO_DEC BIT(0) +static int rk_cipher_need_fallback(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + unsigned int bs = crypto_skcipher_blocksize(tfm); + struct scatterlist *sgs, *sgd; + unsigned int stodo, dtodo, len; + + if (!req->cryptlen) + return true; + + len = req->cryptlen; + sgs = req->src; + sgd = req->dst; + while (sgs && sgd) { + if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { + return true; + } + if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { + return true; + } + stodo = min(len, sgs->length); + if (stodo % bs) { + return true; + } + dtodo = min(len, sgd->length); + if (dtodo % bs) { + return true; + } + if (stodo != dtodo) { + return true; + } + len -= stodo; + sgs = sg_next(sgs); + sgd = sg_next(sgd); + } + return false; +} + +static int rk_cipher_fallback(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); + int err; + + skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, + areq->base.complete, areq->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, + areq->cryptlen, areq->iv); + if (rctx->mode & RK_CRYPTO_DEC) + err = crypto_skcipher_decrypt(&rctx->fallback_req); + else + err = crypto_skcipher_encrypt(&rctx->fallback_req); + return err; +} + static void rk_crypto_complete(struct crypto_async_request *base, int err) { if (base->complete) @@ -22,10 +79,10 @@ static void rk_crypto_complete(struct crypto_async_request *base, int err) static int rk_handle_req(struct rk_crypto_info *dev, struct skcipher_request *req) { - if (!IS_ALIGNED(req->cryptlen, dev->align_size)) - return -EINVAL; - else - return dev->enqueue(dev, &req->base); + if (rk_cipher_need_fallback(req)) + return rk_cipher_fallback(req); + + return dev->enqueue(dev, &req->base); } static int rk_aes_setkey(struct crypto_skcipher *cipher, @@ -39,7 +96,8 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher, return -EINVAL; ctx->keylen = keylen; memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen); - return 0; + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_des_setkey(struct crypto_skcipher *cipher, @@ -54,7 +112,8 @@ static int rk_des_setkey(struct crypto_skcipher *cipher, ctx->keylen = keylen; memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); - return 0; + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_tdes_setkey(struct crypto_skcipher *cipher, @@ -69,7 +128,7 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher, ctx->keylen = keylen; memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); - return 0; + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_aes_ecb_encrypt(struct skcipher_request *req) @@ -394,6 +453,7 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + const char *name = crypto_tfm_alg_name(&tfm->base); struct rk_crypto_tmp *algt; algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); @@ -407,6 +467,16 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) if (!ctx->dev->addr_vir) return -ENOMEM; + ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) { + dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", + name, PTR_ERR(ctx->fallback_tfm)); + return PTR_ERR(ctx->fallback_tfm); + } + + tfm->reqsize = sizeof(struct rk_cipher_rctx) + + crypto_skcipher_reqsize(ctx->fallback_tfm); + return 0; } @@ -415,6 +485,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); free_page((unsigned long)ctx->dev->addr_vir); + crypto_free_skcipher(ctx->fallback_tfm); } struct rk_crypto_tmp rk_ecb_aes_alg = { @@ -423,7 +494,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = { .base.cra_name = "ecb(aes)", .base.cra_driver_name = "ecb-aes-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x0f, @@ -445,7 +516,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "cbc-aes-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x0f, @@ -468,7 +539,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = { .base.cra_name = "ecb(des)", .base.cra_driver_name = "ecb-des-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, @@ -490,7 +561,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = { .base.cra_name = "cbc(des)", .base.cra_driver_name = "cbc-des-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, @@ -513,7 +584,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "ecb-des3-ede-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, @@ -535,7 +606,7 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = { .base.cra_name = "cbc(des3_ede)", .base.cra_driver_name = "cbc-des3-ede-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, From 816600485cb597b3ff7d6806a95a78512839f775 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:45 +0000 Subject: [PATCH 0547/4122] crypto: rockchip - add fallback for ahash Adds a fallback for all case hardware cannot handle. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 49017d1fb510..16009bb0bf16 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -16,6 +16,40 @@ * so we put the fixed hash out when met zero message. */ +static bool rk_ahash_need_fallback(struct ahash_request *req) +{ + struct scatterlist *sg; + + sg = req->src; + while (sg) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + return true; + } + if (sg->length % 4) { + return true; + } + sg = sg_next(sg); + } + return false; +} + +static int rk_ahash_digest_fb(struct ahash_request *areq) +{ + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); + rctx->fallback_req.base.flags = areq->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = areq->nbytes; + rctx->fallback_req.src = areq->src; + rctx->fallback_req.result = areq->result; + + return crypto_ahash_digest(&rctx->fallback_req); +} + static int zero_message_process(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -167,6 +201,9 @@ static int rk_ahash_digest(struct ahash_request *req) struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct rk_crypto_info *dev = tctx->dev; + if (rk_ahash_need_fallback(req)) + return rk_ahash_digest_fb(req); + if (!req->nbytes) return zero_message_process(req); else @@ -309,6 +346,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); free_page((unsigned long)tctx->dev->addr_vir); + crypto_free_ahash(tctx->fallback_tfm); } struct rk_crypto_tmp rk_ahash_sha1 = { From d6b23ccef82816050c2fd458c9dabfa0e0af09b9 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:46 +0000 Subject: [PATCH 0548/4122] crypto: rockchip - better handle cipher key The key should not be set in hardware too much in advance, this will fail it 2 TFM with different keys generate alternative requests. The key should be stored and used just before doing cipher operations. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.h | 1 + drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 8b1e15d8ddc6..540b81a14b9b 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -245,6 +245,7 @@ struct rk_ahash_rctx { struct rk_cipher_ctx { struct rk_crypto_info *dev; unsigned int keylen; + u8 key[AES_MAX_KEY_SIZE]; u8 iv[AES_BLOCK_SIZE]; struct crypto_skcipher *fallback_tfm; }; diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index eac5bba66e25..1ef94f8db2c5 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -95,7 +95,7 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher, keylen != AES_KEYSIZE_256) return -EINVAL; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen); + memcpy(ctx->key, key, keylen); return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } @@ -111,7 +111,7 @@ static int rk_des_setkey(struct crypto_skcipher *cipher, return err; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); + memcpy(ctx->key, key, keylen); return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } @@ -127,7 +127,8 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher, return err; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); + memcpy(ctx->key, key, keylen); + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } @@ -283,6 +284,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) RK_CRYPTO_TDES_BYTESWAP_IV; CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize); + memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); conf_reg = RK_CRYPTO_DESSEL; } else { rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | @@ -295,6 +297,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) rctx->mode |= RK_CRYPTO_AES_256BIT_key; CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize); + memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | RK_CRYPTO_BYTESWAP_BRFIFO; @@ -484,6 +487,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + memzero_explicit(ctx->key, ctx->keylen); free_page((unsigned long)ctx->dev->addr_vir); crypto_free_skcipher(ctx->fallback_tfm); } From bb3c7b73363c9a149b12b74c44ae94b73a8fddf8 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:47 +0000 Subject: [PATCH 0549/4122] crypto: rockchip - remove non-aligned handling Now driver have fallback for un-aligned cases, remove all code handling those cases. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 67 +++++-------------- drivers/crypto/rockchip/rk3288_crypto.h | 4 -- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 22 ++---- .../crypto/rockchip/rk3288_crypto_skcipher.c | 39 +++-------- 4 files changed, 30 insertions(+), 102 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 4cff49b82983..b3db096e2ec2 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -88,63 +88,26 @@ static int rk_load_data(struct rk_crypto_info *dev, { unsigned int count; - dev->aligned = dev->aligned ? - check_alignment(sg_src, sg_dst, dev->align_size) : - dev->aligned; - if (dev->aligned) { - count = min(dev->left_bytes, sg_src->length); - dev->left_bytes -= count; + count = min(dev->left_bytes, sg_src->length); + dev->left_bytes -= count; - if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) { - dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n", + if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) { + dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n", __func__, __LINE__); - return -EINVAL; - } - dev->addr_in = sg_dma_address(sg_src); + return -EINVAL; + } + dev->addr_in = sg_dma_address(sg_src); - if (sg_dst) { - if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) { - dev_err(dev->dev, + if (sg_dst) { + if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) { + dev_err(dev->dev, "[%s:%d] dma_map_sg(dst) error\n", __func__, __LINE__); - dma_unmap_sg(dev->dev, sg_src, 1, - DMA_TO_DEVICE); - return -EINVAL; - } - dev->addr_out = sg_dma_address(sg_dst); - } - } else { - count = (dev->left_bytes > PAGE_SIZE) ? - PAGE_SIZE : dev->left_bytes; - - if (!sg_pcopy_to_buffer(dev->first, dev->src_nents, - dev->addr_vir, count, - dev->total - dev->left_bytes)) { - dev_err(dev->dev, "[%s:%d] pcopy err\n", - __func__, __LINE__); + dma_unmap_sg(dev->dev, sg_src, 1, + DMA_TO_DEVICE); return -EINVAL; } - dev->left_bytes -= count; - sg_init_one(&dev->sg_tmp, dev->addr_vir, count); - if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) { - dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp) error\n", - __func__, __LINE__); - return -ENOMEM; - } - dev->addr_in = sg_dma_address(&dev->sg_tmp); - - if (sg_dst) { - if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, - DMA_FROM_DEVICE)) { - dev_err(dev->dev, - "[%s:%d] dma_map_sg(sg_tmp) error\n", - __func__, __LINE__); - dma_unmap_sg(dev->dev, &dev->sg_tmp, 1, - DMA_TO_DEVICE); - return -ENOMEM; - } - dev->addr_out = sg_dma_address(&dev->sg_tmp); - } + dev->addr_out = sg_dma_address(sg_dst); } dev->count = count; return 0; @@ -154,11 +117,11 @@ static void rk_unload_data(struct rk_crypto_info *dev) { struct scatterlist *sg_in, *sg_out; - sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp; + sg_in = dev->sg_src; dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE); if (dev->sg_dst) { - sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp; + sg_out = dev->sg_dst; dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE); } } diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 540b81a14b9b..a7de5738f6dc 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -204,12 +204,8 @@ struct rk_crypto_info { /* the public variable */ struct scatterlist *sg_src; struct scatterlist *sg_dst; - struct scatterlist sg_tmp; struct scatterlist *first; unsigned int left_bytes; - void *addr_vir; - int aligned; - int align_size; size_t src_nents; size_t dst_nents; unsigned int total; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 16009bb0bf16..c762e462eb57 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -236,8 +236,6 @@ static int rk_ahash_start(struct rk_crypto_info *dev) dev->total = req->nbytes; dev->left_bytes = req->nbytes; - dev->aligned = 0; - dev->align_size = 4; dev->sg_dst = NULL; dev->sg_src = req->src; dev->first = req->src; @@ -272,15 +270,13 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) dev->unload_data(dev); if (dev->left_bytes) { - if (dev->aligned) { - if (sg_is_last(dev->sg_src)) { - dev_warn(dev->dev, "[%s:%d], Lack of data\n", - __func__, __LINE__); - err = -ENOMEM; - goto out_rx; - } - dev->sg_src = sg_next(dev->sg_src); + if (sg_is_last(dev->sg_src)) { + dev_warn(dev->dev, "[%s:%d], Lack of data\n", + __func__, __LINE__); + err = -ENOMEM; + goto out_rx; } + dev->sg_src = sg_next(dev->sg_src); err = rk_ahash_set_data_start(dev); } else { /* @@ -318,11 +314,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) algt = container_of(alg, struct rk_crypto_tmp, alg.hash); tctx->dev = algt->dev; - tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL); - if (!tctx->dev->addr_vir) { - dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n"); - return -ENOMEM; - } tctx->dev->start = rk_ahash_start; tctx->dev->update = rk_ahash_crypto_rx; tctx->dev->complete = rk_ahash_crypto_complete; @@ -345,7 +336,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) { struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); - free_page((unsigned long)tctx->dev->addr_vir); crypto_free_ahash(tctx->fallback_tfm); } diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 1ef94f8db2c5..d067b7f09165 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -356,7 +356,6 @@ static int rk_ablk_start(struct rk_crypto_info *dev) dev->src_nents = sg_nents(req->src); dev->sg_dst = req->dst; dev->dst_nents = sg_nents(req->dst); - dev->aligned = 1; spin_lock_irqsave(&dev->lock, flags); rk_ablk_hw_init(dev); @@ -376,13 +375,9 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) /* Update the IV buffer to contain the next IV for encryption mode. */ if (!(rctx->mode & RK_CRYPTO_DEC)) { - if (dev->aligned) { - memcpy(req->iv, sg_virt(dev->sg_dst) + - dev->sg_dst->length - ivsize, ivsize); - } else { - memcpy(req->iv, dev->addr_vir + - dev->count - ivsize, ivsize); - } + memcpy(req->iv, + sg_virt(dev->sg_dst) + dev->sg_dst->length - ivsize, + ivsize); } } @@ -420,27 +415,16 @@ static int rk_ablk_rx(struct rk_crypto_info *dev) skcipher_request_cast(dev->async_req); dev->unload_data(dev); - if (!dev->aligned) { - if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents, - dev->addr_vir, dev->count, - dev->total - dev->left_bytes - - dev->count)) { - err = -EINVAL; - goto out_rx; - } - } if (dev->left_bytes) { rk_update_iv(dev); - if (dev->aligned) { - if (sg_is_last(dev->sg_src)) { - dev_err(dev->dev, "[%s:%d] Lack of data\n", + if (sg_is_last(dev->sg_src)) { + dev_err(dev->dev, "[%s:%d] Lack of data\n", __func__, __LINE__); - err = -ENOMEM; - goto out_rx; - } - dev->sg_src = sg_next(dev->sg_src); - dev->sg_dst = sg_next(dev->sg_dst); + err = -ENOMEM; + goto out_rx; } + dev->sg_src = sg_next(dev->sg_src); + dev->sg_dst = sg_next(dev->sg_dst); err = rk_set_data_start(dev); } else { rk_iv_copyback(dev); @@ -462,13 +446,9 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); ctx->dev = algt->dev; - ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1; ctx->dev->start = rk_ablk_start; ctx->dev->update = rk_ablk_rx; ctx->dev->complete = rk_crypto_complete; - ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL); - if (!ctx->dev->addr_vir) - return -ENOMEM; ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback_tfm)) { @@ -488,7 +468,6 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); memzero_explicit(ctx->key, ctx->keylen); - free_page((unsigned long)ctx->dev->addr_vir); crypto_free_skcipher(ctx->fallback_tfm); } From 57d67c6e8219b2a034c16d6149e30fb40fd39935 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:48 +0000 Subject: [PATCH 0550/4122] crypto: rockchip - rework by using crypto_engine Instead of doing manual queue management, let's use the crypto/engine for that. In the same time, rework the requests handling to be easier to understand (and fix all bugs related to them). Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 1 + drivers/crypto/rockchip/rk3288_crypto.c | 152 +--------- drivers/crypto/rockchip/rk3288_crypto.h | 37 +-- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 140 +++++----- .../crypto/rockchip/rk3288_crypto_skcipher.c | 262 +++++++++--------- 5 files changed, 224 insertions(+), 368 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 113b35f69598..c30b5a39c2ac 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -674,6 +674,7 @@ config CRYPTO_DEV_ROCKCHIP select CRYPTO_CBC select CRYPTO_DES select CRYPTO_AES + select CRYPTO_ENGINE select CRYPTO_LIB_DES select CRYPTO_MD5 select CRYPTO_SHA1 diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index b3db096e2ec2..1afb65eee6c9 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -65,149 +65,24 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev) clk_disable_unprepare(dev->sclk); } -static int check_alignment(struct scatterlist *sg_src, - struct scatterlist *sg_dst, - int align_mask) -{ - int in, out, align; - - in = IS_ALIGNED((uint32_t)sg_src->offset, 4) && - IS_ALIGNED((uint32_t)sg_src->length, align_mask); - if (!sg_dst) - return in; - out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) && - IS_ALIGNED((uint32_t)sg_dst->length, align_mask); - align = in && out; - - return (align && (sg_src->length == sg_dst->length)); -} - -static int rk_load_data(struct rk_crypto_info *dev, - struct scatterlist *sg_src, - struct scatterlist *sg_dst) -{ - unsigned int count; - - count = min(dev->left_bytes, sg_src->length); - dev->left_bytes -= count; - - if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) { - dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n", - __func__, __LINE__); - return -EINVAL; - } - dev->addr_in = sg_dma_address(sg_src); - - if (sg_dst) { - if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) { - dev_err(dev->dev, - "[%s:%d] dma_map_sg(dst) error\n", - __func__, __LINE__); - dma_unmap_sg(dev->dev, sg_src, 1, - DMA_TO_DEVICE); - return -EINVAL; - } - dev->addr_out = sg_dma_address(sg_dst); - } - dev->count = count; - return 0; -} - -static void rk_unload_data(struct rk_crypto_info *dev) -{ - struct scatterlist *sg_in, *sg_out; - - sg_in = dev->sg_src; - dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE); - - if (dev->sg_dst) { - sg_out = dev->sg_dst; - dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE); - } -} - static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) { struct rk_crypto_info *dev = platform_get_drvdata(dev_id); u32 interrupt_status; - spin_lock(&dev->lock); interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); + dev->status = 1; if (interrupt_status & 0x0a) { dev_warn(dev->dev, "DMA Error\n"); - dev->err = -EFAULT; + dev->status = 0; } - tasklet_schedule(&dev->done_task); + complete(&dev->complete); - spin_unlock(&dev->lock); return IRQ_HANDLED; } -static int rk_crypto_enqueue(struct rk_crypto_info *dev, - struct crypto_async_request *async_req) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&dev->lock, flags); - ret = crypto_enqueue_request(&dev->queue, async_req); - if (dev->busy) { - spin_unlock_irqrestore(&dev->lock, flags); - return ret; - } - dev->busy = true; - spin_unlock_irqrestore(&dev->lock, flags); - tasklet_schedule(&dev->queue_task); - - return ret; -} - -static void rk_crypto_queue_task_cb(unsigned long data) -{ - struct rk_crypto_info *dev = (struct rk_crypto_info *)data; - struct crypto_async_request *async_req, *backlog; - unsigned long flags; - int err = 0; - - dev->err = 0; - spin_lock_irqsave(&dev->lock, flags); - backlog = crypto_get_backlog(&dev->queue); - async_req = crypto_dequeue_request(&dev->queue); - - if (!async_req) { - dev->busy = false; - spin_unlock_irqrestore(&dev->lock, flags); - return; - } - spin_unlock_irqrestore(&dev->lock, flags); - - if (backlog) { - backlog->complete(backlog, -EINPROGRESS); - backlog = NULL; - } - - dev->async_req = async_req; - err = dev->start(dev); - if (err) - dev->complete(dev->async_req, err); -} - -static void rk_crypto_done_task_cb(unsigned long data) -{ - struct rk_crypto_info *dev = (struct rk_crypto_info *)data; - - if (dev->err) { - dev->complete(dev->async_req, dev->err); - return; - } - - dev->err = dev->update(dev); - if (dev->err) - dev->complete(dev->async_req, dev->err); -} - static struct rk_crypto_tmp *rk_cipher_algs[] = { &rk_ecb_aes_alg, &rk_cbc_aes_alg, @@ -300,8 +175,6 @@ static int rk_crypto_probe(struct platform_device *pdev) if (err) goto err_crypto; - spin_lock_init(&crypto_info->lock); - crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(crypto_info->reg)) { err = PTR_ERR(crypto_info->reg); @@ -351,17 +224,11 @@ static int rk_crypto_probe(struct platform_device *pdev) crypto_info->dev = &pdev->dev; platform_set_drvdata(pdev, crypto_info); - tasklet_init(&crypto_info->queue_task, - rk_crypto_queue_task_cb, (unsigned long)crypto_info); - tasklet_init(&crypto_info->done_task, - rk_crypto_done_task_cb, (unsigned long)crypto_info); - crypto_init_queue(&crypto_info->queue, 50); + crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); + crypto_engine_start(crypto_info->engine); + init_completion(&crypto_info->complete); rk_crypto_enable_clk(crypto_info); - crypto_info->load_data = rk_load_data; - crypto_info->unload_data = rk_unload_data; - crypto_info->enqueue = rk_crypto_enqueue; - crypto_info->busy = false; err = rk_crypto_register(crypto_info); if (err) { @@ -373,9 +240,9 @@ static int rk_crypto_probe(struct platform_device *pdev) return 0; err_register_alg: - tasklet_kill(&crypto_info->queue_task); - tasklet_kill(&crypto_info->done_task); + crypto_engine_exit(crypto_info->engine); err_crypto: + dev_err(dev, "Crypto Accelerator not successfully registered\n"); return err; } @@ -385,8 +252,7 @@ static int rk_crypto_remove(struct platform_device *pdev) rk_crypto_unregister(); rk_crypto_disable_clk(crypto_tmp); - tasklet_kill(&crypto_tmp->done_task); - tasklet_kill(&crypto_tmp->queue_task); + crypto_engine_exit(crypto_tmp->engine); return 0; } diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index a7de5738f6dc..65ed645e0168 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -193,39 +195,15 @@ struct rk_crypto_info { struct reset_control *rst; void __iomem *reg; int irq; - struct crypto_queue queue; - struct tasklet_struct queue_task; - struct tasklet_struct done_task; - struct crypto_async_request *async_req; - int err; - /* device lock */ - spinlock_t lock; - /* the public variable */ - struct scatterlist *sg_src; - struct scatterlist *sg_dst; - struct scatterlist *first; - unsigned int left_bytes; - size_t src_nents; - size_t dst_nents; - unsigned int total; - unsigned int count; - dma_addr_t addr_in; - dma_addr_t addr_out; - bool busy; - int (*start)(struct rk_crypto_info *dev); - int (*update)(struct rk_crypto_info *dev); - void (*complete)(struct crypto_async_request *base, int err); - int (*load_data)(struct rk_crypto_info *dev, - struct scatterlist *sg_src, - struct scatterlist *sg_dst); - void (*unload_data)(struct rk_crypto_info *dev); - int (*enqueue)(struct rk_crypto_info *dev, - struct crypto_async_request *async_req); + struct crypto_engine *engine; + struct completion complete; + int status; }; /* the private variable of hash */ struct rk_ahash_ctx { + struct crypto_engine_ctx enginectx; struct rk_crypto_info *dev; /* for fallback */ struct crypto_ahash *fallback_tfm; @@ -235,10 +213,12 @@ struct rk_ahash_ctx { struct rk_ahash_rctx { struct ahash_request fallback_req; u32 mode; + int nrsg; }; /* the private variable of cipher */ struct rk_cipher_ctx { + struct crypto_engine_ctx enginectx; struct rk_crypto_info *dev; unsigned int keylen; u8 key[AES_MAX_KEY_SIZE]; @@ -247,6 +227,7 @@ struct rk_cipher_ctx { }; struct rk_cipher_rctx { + u8 backup_iv[AES_BLOCK_SIZE]; u32 mode; struct skcipher_request fallback_req; // keep at the end }; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index c762e462eb57..edd40e16a3f0 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -9,6 +9,7 @@ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. */ #include +#include #include "rk3288_crypto.h" /* @@ -72,16 +73,12 @@ static int zero_message_process(struct ahash_request *req) return 0; } -static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) +static void rk_ahash_reg_init(struct ahash_request *req) { - if (base->complete) - base->complete(base, err); -} - -static void rk_ahash_reg_init(struct rk_crypto_info *dev) -{ - struct ahash_request *req = ahash_request_cast(dev->async_req); struct rk_ahash_rctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + struct rk_crypto_info *dev = tctx->dev; int reg_status; reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | @@ -108,7 +105,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev) RK_CRYPTO_BYTESWAP_BRFIFO | RK_CRYPTO_BYTESWAP_BTFIFO); - CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total); + CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes); } static int rk_ahash_init(struct ahash_request *req) @@ -206,44 +203,59 @@ static int rk_ahash_digest(struct ahash_request *req) if (!req->nbytes) return zero_message_process(req); - else - return dev->enqueue(dev, &req->base); + + return crypto_transfer_hash_request_to_engine(dev->engine, req); } -static void crypto_ahash_dma_start(struct rk_crypto_info *dev) +static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) { - CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in); - CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4); + CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg)); + CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4); CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | (RK_CRYPTO_HASH_START << 16)); } -static int rk_ahash_set_data_start(struct rk_crypto_info *dev) +static int rk_hash_prepare(struct crypto_engine *engine, void *breq) { - int err; + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + int ret; - err = dev->load_data(dev, dev->sg_src, NULL); - if (!err) - crypto_ahash_dma_start(dev); - return err; + ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); + if (ret <= 0) + return -EINVAL; + + rctx->nrsg = ret; + + return 0; } -static int rk_ahash_start(struct rk_crypto_info *dev) +static int rk_hash_unprepare(struct crypto_engine *engine, void *breq) { - struct ahash_request *req = ahash_request_cast(dev->async_req); - struct crypto_ahash *tfm; - struct rk_ahash_rctx *rctx; + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + + dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); + return 0; +} + +static int rk_hash_run(struct crypto_engine *engine, void *breq) +{ + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + struct scatterlist *sg = areq->src; + int err = 0; + int i; + u32 v; - dev->total = req->nbytes; - dev->left_bytes = req->nbytes; - dev->sg_dst = NULL; - dev->sg_src = req->src; - dev->first = req->src; - dev->src_nents = sg_nents(req->src); - rctx = ahash_request_ctx(req); rctx->mode = 0; - tfm = crypto_ahash_reqtfm(req); switch (crypto_ahash_digestsize(tfm)) { case SHA1_DIGEST_SIZE: rctx->mode = RK_CRYPTO_HASH_SHA1; @@ -255,30 +267,26 @@ static int rk_ahash_start(struct rk_crypto_info *dev) rctx->mode = RK_CRYPTO_HASH_MD5; break; default: - return -EINVAL; + err = -EINVAL; + goto theend; } - rk_ahash_reg_init(dev); - return rk_ahash_set_data_start(dev); -} + rk_ahash_reg_init(areq); -static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) -{ - int err = 0; - struct ahash_request *req = ahash_request_cast(dev->async_req); - struct crypto_ahash *tfm; - - dev->unload_data(dev); - if (dev->left_bytes) { - if (sg_is_last(dev->sg_src)) { - dev_warn(dev->dev, "[%s:%d], Lack of data\n", - __func__, __LINE__); - err = -ENOMEM; - goto out_rx; + while (sg) { + reinit_completion(&tctx->dev->complete); + tctx->dev->status = 0; + crypto_ahash_dma_start(tctx->dev, sg); + wait_for_completion_interruptible_timeout(&tctx->dev->complete, + msecs_to_jiffies(2000)); + if (!tctx->dev->status) { + dev_err(tctx->dev->dev, "DMA timeout\n"); + err = -EFAULT; + goto theend; } - dev->sg_src = sg_next(dev->sg_src); - err = rk_ahash_set_data_start(dev); - } else { + sg = sg_next(sg); + } + /* * it will take some time to process date after last dma * transmission. @@ -289,18 +297,20 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) * efficiency, and make it response quickly when dma * complete. */ - while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS)) - udelay(10); + while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS)) + udelay(10); - tfm = crypto_ahash_reqtfm(req); - memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0, - crypto_ahash_digestsize(tfm)); - dev->complete(dev->async_req, 0); - tasklet_schedule(&dev->queue_task); + for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { + v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); + put_unaligned_le32(v, areq->result + i * 4); } -out_rx: - return err; +theend: + local_bh_disable(); + crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); + + return 0; } static int rk_cra_hash_init(struct crypto_tfm *tfm) @@ -314,9 +324,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) algt = container_of(alg, struct rk_crypto_tmp, alg.hash); tctx->dev = algt->dev; - tctx->dev->start = rk_ahash_start; - tctx->dev->update = rk_ahash_crypto_rx; - tctx->dev->complete = rk_ahash_crypto_complete; /* for fallback */ tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, @@ -325,10 +332,15 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) dev_err(tctx->dev->dev, "Could not load fallback driver.\n"); return PTR_ERR(tctx->fallback_tfm); } + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx) + crypto_ahash_reqsize(tctx->fallback_tfm)); + tctx->enginectx.op.do_one_request = rk_hash_run; + tctx->enginectx.op.prepare_request = rk_hash_prepare; + tctx->enginectx.op.unprepare_request = rk_hash_unprepare; + return 0; } diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index d067b7f09165..67a7e05d5ae3 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -9,6 +9,7 @@ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. */ #include +#include #include "rk3288_crypto.h" #define RK_CRYPTO_DEC BIT(0) @@ -70,19 +71,15 @@ static int rk_cipher_fallback(struct skcipher_request *areq) return err; } -static void rk_crypto_complete(struct crypto_async_request *base, int err) -{ - if (base->complete) - base->complete(base, err); -} - static int rk_handle_req(struct rk_crypto_info *dev, struct skcipher_request *req) { + struct crypto_engine *engine = dev->engine; + if (rk_cipher_need_fallback(req)) return rk_cipher_fallback(req); - return dev->enqueue(dev, &req->base); + return crypto_transfer_skcipher_request_to_engine(engine, req); } static int rk_aes_setkey(struct crypto_skcipher *cipher, @@ -265,25 +262,21 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) return rk_handle_req(dev, req); } -static void rk_ablk_hw_init(struct rk_crypto_info *dev) +static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) { - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); - u32 ivsize, block, conf_reg = 0; + u32 block, conf_reg = 0; block = crypto_tfm_alg_blocksize(tfm); - ivsize = crypto_skcipher_ivsize(cipher); if (block == DES_BLOCK_SIZE) { rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | RK_CRYPTO_TDES_BYTESWAP_KEY | RK_CRYPTO_TDES_BYTESWAP_IV; CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); - memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize); memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); conf_reg = RK_CRYPTO_DESSEL; } else { @@ -296,7 +289,6 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) else if (ctx->keylen == AES_KEYSIZE_256) rctx->mode |= RK_CRYPTO_AES_256BIT_key; CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); - memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize); memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | @@ -306,133 +298,138 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); } -static void crypto_dma_start(struct rk_crypto_info *dev) +static void crypto_dma_start(struct rk_crypto_info *dev, + struct scatterlist *sgs, + struct scatterlist *sgd, unsigned int todo) { - CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in); - CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4); - CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out); + CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs)); + CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo); + CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd)); CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | _SBF(RK_CRYPTO_BLOCK_START, 16)); } -static int rk_set_data_start(struct rk_crypto_info *dev) +static int rk_cipher_run(struct crypto_engine *engine, void *async_req) { - int err; - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); - u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + - dev->sg_src->offset + dev->sg_src->length - ivsize; - - /* Store the iv that need to be updated in chain mode. - * And update the IV buffer to contain the next IV for decryption mode. - */ - if (rctx->mode & RK_CRYPTO_DEC) { - memcpy(ctx->iv, src_last_blk, ivsize); - sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv, - ivsize, dev->total - ivsize); - } - - err = dev->load_data(dev, dev->sg_src, dev->sg_dst); - if (!err) - crypto_dma_start(dev); - return err; -} - -static int rk_ablk_start(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - unsigned long flags; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); + struct scatterlist *sgs, *sgd; int err = 0; + int ivsize = crypto_skcipher_ivsize(tfm); + int offset; + u8 iv[AES_BLOCK_SIZE]; + u8 biv[AES_BLOCK_SIZE]; + u8 *ivtouse = areq->iv; + unsigned int len = areq->cryptlen; + unsigned int todo; - dev->left_bytes = req->cryptlen; - dev->total = req->cryptlen; - dev->sg_src = req->src; - dev->first = req->src; - dev->src_nents = sg_nents(req->src); - dev->sg_dst = req->dst; - dev->dst_nents = sg_nents(req->dst); - - spin_lock_irqsave(&dev->lock, flags); - rk_ablk_hw_init(dev); - err = rk_set_data_start(dev); - spin_unlock_irqrestore(&dev->lock, flags); - return err; -} - -static void rk_iv_copyback(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); - - /* Update the IV buffer to contain the next IV for encryption mode. */ - if (!(rctx->mode & RK_CRYPTO_DEC)) { - memcpy(req->iv, - sg_virt(dev->sg_dst) + dev->sg_dst->length - ivsize, - ivsize); - } -} - -static void rk_update_iv(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); - u8 *new_iv = NULL; - - if (rctx->mode & RK_CRYPTO_DEC) { - new_iv = ctx->iv; - } else { - new_iv = page_address(sg_page(dev->sg_dst)) + - dev->sg_dst->offset + dev->sg_dst->length - ivsize; - } - - if (ivsize == DES_BLOCK_SIZE) - memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); - else if (ivsize == AES_BLOCK_SIZE) - memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); -} - -/* return: - * true some err was occurred - * fault no err, continue - */ -static int rk_ablk_rx(struct rk_crypto_info *dev) -{ - int err = 0; - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - - dev->unload_data(dev); - if (dev->left_bytes) { - rk_update_iv(dev); - if (sg_is_last(dev->sg_src)) { - dev_err(dev->dev, "[%s:%d] Lack of data\n", - __func__, __LINE__); - err = -ENOMEM; - goto out_rx; + ivsize = crypto_skcipher_ivsize(tfm); + if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { + if (rctx->mode & RK_CRYPTO_DEC) { + offset = areq->cryptlen - ivsize; + scatterwalk_map_and_copy(rctx->backup_iv, areq->src, + offset, ivsize, 0); } - dev->sg_src = sg_next(dev->sg_src); - dev->sg_dst = sg_next(dev->sg_dst); - err = rk_set_data_start(dev); - } else { - rk_iv_copyback(dev); - /* here show the calculation is over without any err */ - dev->complete(dev->async_req, 0); - tasklet_schedule(&dev->queue_task); } -out_rx: + + sgs = areq->src; + sgd = areq->dst; + + while (sgs && sgd && len) { + if (!sgs->length) { + sgs = sg_next(sgs); + sgd = sg_next(sgd); + continue; + } + if (rctx->mode & RK_CRYPTO_DEC) { + /* we backup last block of source to be used as IV at next step */ + offset = sgs->length - ivsize; + scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); + } + if (sgs == sgd) { + err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + if (err <= 0) { + err = -EINVAL; + goto theend_iv; + } + } else { + err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); + if (err <= 0) { + err = -EINVAL; + goto theend_iv; + } + err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + if (err <= 0) { + err = -EINVAL; + goto theend_sgs; + } + } + err = 0; + rk_ablk_hw_init(ctx->dev, areq); + if (ivsize) { + if (ivsize == DES_BLOCK_SIZE) + memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); + else + memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); + } + reinit_completion(&ctx->dev->complete); + ctx->dev->status = 0; + + todo = min(sg_dma_len(sgs), len); + len -= todo; + crypto_dma_start(ctx->dev, sgs, sgd, todo / 4); + wait_for_completion_interruptible_timeout(&ctx->dev->complete, + msecs_to_jiffies(2000)); + if (!ctx->dev->status) { + dev_err(ctx->dev->dev, "DMA timeout\n"); + err = -EFAULT; + goto theend; + } + if (sgs == sgd) { + dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + } + if (rctx->mode & RK_CRYPTO_DEC) { + memcpy(iv, biv, ivsize); + ivtouse = iv; + } else { + offset = sgd->length - ivsize; + scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0); + ivtouse = iv; + } + sgs = sg_next(sgs); + sgd = sg_next(sgd); + } + + if (areq->iv && ivsize > 0) { + offset = areq->cryptlen - ivsize; + if (rctx->mode & RK_CRYPTO_DEC) { + memcpy(areq->iv, rctx->backup_iv, ivsize); + memzero_explicit(rctx->backup_iv, ivsize); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, offset, + ivsize, 0); + } + } + +theend: + local_bh_disable(); + crypto_finalize_skcipher_request(engine, areq, err); + local_bh_enable(); + return 0; + +theend_sgs: + if (sgs == sgd) { + dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + } +theend_iv: return err; } @@ -446,9 +443,6 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); ctx->dev = algt->dev; - ctx->dev->start = rk_ablk_start; - ctx->dev->update = rk_ablk_rx; - ctx->dev->complete = rk_crypto_complete; ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback_tfm)) { @@ -460,6 +454,8 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) tfm->reqsize = sizeof(struct rk_cipher_rctx) + crypto_skcipher_reqsize(ctx->fallback_tfm); + ctx->enginectx.op.do_one_request = rk_cipher_run; + return 0; } From 6d55c4a206d29006c733b5083ba5da8391abbdbd Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:49 +0000 Subject: [PATCH 0551/4122] crypto: rockchip - rewrite type Instead of using a custom type for classify algorithms, let's just use already defined ones. And let's made a bit more verbose about what is registered. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 26 +++++++++++++------ drivers/crypto/rockchip/rk3288_crypto.h | 7 +---- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 6 ++--- .../crypto/rockchip/rk3288_crypto_skcipher.c | 12 ++++----- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 1afb65eee6c9..8f9664acc78d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -102,12 +102,22 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info) for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { rk_cipher_algs[i]->dev = crypto_info; - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) - err = crypto_register_skcipher( - &rk_cipher_algs[i]->alg.skcipher); - else - err = crypto_register_ahash( - &rk_cipher_algs[i]->alg.hash); + switch (rk_cipher_algs[i]->type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + dev_info(crypto_info->dev, "Register %s as %s\n", + rk_cipher_algs[i]->alg.skcipher.base.cra_name, + rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name); + err = crypto_register_skcipher(&rk_cipher_algs[i]->alg.skcipher); + break; + case CRYPTO_ALG_TYPE_AHASH: + dev_info(crypto_info->dev, "Register %s as %s\n", + rk_cipher_algs[i]->alg.hash.halg.base.cra_name, + rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name); + err = crypto_register_ahash(&rk_cipher_algs[i]->alg.hash); + break; + default: + dev_err(crypto_info->dev, "unknown algorithm\n"); + } if (err) goto err_cipher_algs; } @@ -115,7 +125,7 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info) err_cipher_algs: for (k = 0; k < i; k++) { - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) + if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher); else crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash); @@ -128,7 +138,7 @@ static void rk_crypto_unregister(void) unsigned int i; for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) + if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher); else crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash); diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 65ed645e0168..d924ea17402a 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -232,18 +232,13 @@ struct rk_cipher_rctx { struct skcipher_request fallback_req; // keep at the end }; -enum alg_type { - ALG_TYPE_HASH, - ALG_TYPE_CIPHER, -}; - struct rk_crypto_tmp { + u32 type; struct rk_crypto_info *dev; union { struct skcipher_alg skcipher; struct ahash_alg hash; } alg; - enum alg_type type; }; extern struct rk_crypto_tmp rk_ecb_aes_alg; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index edd40e16a3f0..d08e2438d356 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -352,7 +352,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) } struct rk_crypto_tmp rk_ahash_sha1 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, @@ -382,7 +382,7 @@ struct rk_crypto_tmp rk_ahash_sha1 = { }; struct rk_crypto_tmp rk_ahash_sha256 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, @@ -412,7 +412,7 @@ struct rk_crypto_tmp rk_ahash_sha256 = { }; struct rk_crypto_tmp rk_ahash_md5 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 67a7e05d5ae3..1ed297f5d809 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -468,7 +468,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) } struct rk_crypto_tmp rk_ecb_aes_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(aes)", .base.cra_driver_name = "ecb-aes-rk", @@ -490,7 +490,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = { }; struct rk_crypto_tmp rk_cbc_aes_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "cbc-aes-rk", @@ -513,7 +513,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = { }; struct rk_crypto_tmp rk_ecb_des_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des)", .base.cra_driver_name = "ecb-des-rk", @@ -535,7 +535,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = { }; struct rk_crypto_tmp rk_cbc_des_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(des)", .base.cra_driver_name = "cbc-des-rk", @@ -558,7 +558,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = { }; struct rk_crypto_tmp rk_ecb_des3_ede_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "ecb-des3-ede-rk", @@ -580,7 +580,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { }; struct rk_crypto_tmp rk_cbc_des3_ede_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(des3_ede)", .base.cra_driver_name = "cbc-des3-ede-rk", From 48d904d428b68080abd9161148ca2ab1331124a4 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:50 +0000 Subject: [PATCH 0552/4122] crypto: rockchip - add debugfs This patch enable to access usage stats for each algorithm. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 10 ++++ drivers/crypto/rockchip/rk3288_crypto.c | 47 +++++++++++++++++++ drivers/crypto/rockchip/rk3288_crypto.h | 11 +++++ drivers/crypto/rockchip/rk3288_crypto_ahash.c | 8 ++++ .../crypto/rockchip/rk3288_crypto_skcipher.c | 15 ++++++ 5 files changed, 91 insertions(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c30b5a39c2ac..2947888d3b82 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -686,6 +686,16 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_ROCKCHIP_DEBUG + bool "Enable Rockchip crypto stats" + depends on CRYPTO_DEV_ROCKCHIP + depends on DEBUG_FS + help + Say y to enable Rockchip crypto debug stats. + This will create /sys/kernel/debug/rk3288_crypto/stats for displaying + the number of requests per algorithm and other internal stats. + + config CRYPTO_DEV_ZYNQMP_AES tristate "Support for Xilinx ZynqMP AES hw accelerator" depends on ZYNQMP_FIRMWARE || COMPILE_TEST diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 8f9664acc78d..3e1b4f3b2422 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -95,6 +95,41 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = { &rk_ahash_md5, }; +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG +static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { + if (!rk_cipher_algs[i]->dev) + continue; + switch (rk_cipher_algs[i]->type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", + rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name, + rk_cipher_algs[i]->alg.skcipher.base.cra_name, + rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); + seq_printf(seq, "\tfallback due to length: %lu\n", + rk_cipher_algs[i]->stat_fb_len); + seq_printf(seq, "\tfallback due to alignment: %lu\n", + rk_cipher_algs[i]->stat_fb_align); + seq_printf(seq, "\tfallback due to SGs: %lu\n", + rk_cipher_algs[i]->stat_fb_sgdiff); + break; + case CRYPTO_ALG_TYPE_AHASH: + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", + rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name, + rk_cipher_algs[i]->alg.hash.halg.base.cra_name, + rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); + break; + } + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs); +#endif + static int rk_crypto_register(struct rk_crypto_info *crypto_info) { unsigned int i, k; @@ -246,6 +281,15 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_register_alg; } +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + /* Ignore error of debugfs */ + crypto_info->dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); + crypto_info->dbgfs_stats = debugfs_create_file("stats", 0444, + crypto_info->dbgfs_dir, + crypto_info, + &rk_crypto_debugfs_fops); +#endif + dev_info(dev, "Crypto Accelerator successfully registered\n"); return 0; @@ -260,6 +304,9 @@ static int rk_crypto_remove(struct platform_device *pdev) { struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + debugfs_remove_recursive(crypto_tmp->dbgfs_dir); +#endif rk_crypto_unregister(); rk_crypto_disable_clk(crypto_tmp); crypto_engine_exit(crypto_tmp->engine); diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index d924ea17402a..945a8184bbad 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -199,6 +200,10 @@ struct rk_crypto_info { struct crypto_engine *engine; struct completion complete; int status; +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + struct dentry *dbgfs_dir; + struct dentry *dbgfs_stats; +#endif }; /* the private variable of hash */ @@ -239,6 +244,12 @@ struct rk_crypto_tmp { struct skcipher_alg skcipher; struct ahash_alg hash; } alg; + unsigned long stat_req; + unsigned long stat_fb; + unsigned long stat_fb_len; + unsigned long stat_fb_sglen; + unsigned long stat_fb_align; + unsigned long stat_fb_sgdiff; }; extern struct rk_crypto_tmp rk_ecb_aes_alg; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index d08e2438d356..8856c6226be6 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -39,6 +39,10 @@ static int rk_ahash_digest_fb(struct ahash_request *areq) struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); + + algt->stat_fb++; ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); rctx->fallback_req.base.flags = areq->base.flags & @@ -249,6 +253,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); struct scatterlist *sg = areq->src; int err = 0; int i; @@ -256,6 +262,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) rctx->mode = 0; + algt->stat_req++; + switch (crypto_ahash_digestsize(tfm)) { case SHA1_DIGEST_SIZE: rctx->mode = RK_CRYPTO_HASH_SHA1; diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 1ed297f5d809..91b8a4c574da 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -18,6 +18,8 @@ static int rk_cipher_need_fallback(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); unsigned int bs = crypto_skcipher_blocksize(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); struct scatterlist *sgs, *sgd; unsigned int stodo, dtodo, len; @@ -29,20 +31,25 @@ static int rk_cipher_need_fallback(struct skcipher_request *req) sgd = req->dst; while (sgs && sgd) { if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { + algt->stat_fb_align++; return true; } if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { + algt->stat_fb_align++; return true; } stodo = min(len, sgs->length); if (stodo % bs) { + algt->stat_fb_len++; return true; } dtodo = min(len, sgd->length); if (dtodo % bs) { + algt->stat_fb_len++; return true; } if (stodo != dtodo) { + algt->stat_fb_sgdiff++; return true; } len -= stodo; @@ -57,8 +64,12 @@ static int rk_cipher_fallback(struct skcipher_request *areq) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); int err; + algt->stat_fb++; + skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, areq->base.complete, areq->base.data); @@ -324,6 +335,10 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) u8 *ivtouse = areq->iv; unsigned int len = areq->cryptlen; unsigned int todo; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + + algt->stat_req++; ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { From a216be3964c15661579005012b1f0d7d20a1f265 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:51 +0000 Subject: [PATCH 0553/4122] crypto: rockchip - introduce PM Add runtime PM support for rockchip crypto. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 51 ++++++++++++++++++- drivers/crypto/rockchip/rk3288_crypto.h | 1 + drivers/crypto/rockchip/rk3288_crypto_ahash.c | 10 ++++ .../crypto/rockchip/rk3288_crypto_skcipher.c | 9 ++++ 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 3e1b4f3b2422..d9258b9e71b3 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -65,6 +65,48 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev) clk_disable_unprepare(dev->sclk); } +/* + * Power management strategy: The device is suspended unless a TFM exists for + * one of the algorithms proposed by this driver. + */ +static int rk_crypto_pm_suspend(struct device *dev) +{ + struct rk_crypto_info *rkdev = dev_get_drvdata(dev); + + rk_crypto_disable_clk(rkdev); + return 0; +} + +static int rk_crypto_pm_resume(struct device *dev) +{ + struct rk_crypto_info *rkdev = dev_get_drvdata(dev); + + return rk_crypto_enable_clk(rkdev); +} + +static const struct dev_pm_ops rk_crypto_pm_ops = { + SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL) +}; + +static int rk_crypto_pm_init(struct rk_crypto_info *rkdev) +{ + int err; + + pm_runtime_use_autosuspend(rkdev->dev); + pm_runtime_set_autosuspend_delay(rkdev->dev, 2000); + + err = pm_runtime_set_suspended(rkdev->dev); + if (err) + return err; + pm_runtime_enable(rkdev->dev); + return err; +} + +static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev) +{ + pm_runtime_disable(rkdev->dev); +} + static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) { struct rk_crypto_info *dev = platform_get_drvdata(dev_id); @@ -273,7 +315,9 @@ static int rk_crypto_probe(struct platform_device *pdev) crypto_engine_start(crypto_info->engine); init_completion(&crypto_info->complete); - rk_crypto_enable_clk(crypto_info); + err = rk_crypto_pm_init(crypto_info); + if (err) + goto err_pm; err = rk_crypto_register(crypto_info); if (err) { @@ -294,6 +338,8 @@ static int rk_crypto_probe(struct platform_device *pdev) return 0; err_register_alg: + rk_crypto_pm_exit(crypto_info); +err_pm: crypto_engine_exit(crypto_info->engine); err_crypto: dev_err(dev, "Crypto Accelerator not successfully registered\n"); @@ -308,7 +354,7 @@ static int rk_crypto_remove(struct platform_device *pdev) debugfs_remove_recursive(crypto_tmp->dbgfs_dir); #endif rk_crypto_unregister(); - rk_crypto_disable_clk(crypto_tmp); + rk_crypto_pm_exit(crypto_tmp); crypto_engine_exit(crypto_tmp->engine); return 0; } @@ -318,6 +364,7 @@ static struct platform_driver crypto_driver = { .remove = rk_crypto_remove, .driver = { .name = "rk3288-crypto", + .pm = &rk_crypto_pm_ops, .of_match_table = crypto_of_id_table, }, }; diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 945a8184bbad..ddbb9246ce16 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 8856c6226be6..137013bd4410 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -328,6 +328,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); const char *alg_name = crypto_tfm_alg_name(tfm); + int err; algt = container_of(alg, struct rk_crypto_tmp, alg.hash); @@ -349,7 +350,15 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) tctx->enginectx.op.prepare_request = rk_hash_prepare; tctx->enginectx.op.unprepare_request = rk_hash_unprepare; + err = pm_runtime_resume_and_get(tctx->dev->dev); + if (err < 0) + goto error_pm; + return 0; +error_pm: + crypto_free_ahash(tctx->fallback_tfm); + + return err; } static void rk_cra_hash_exit(struct crypto_tfm *tfm) @@ -357,6 +366,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); crypto_free_ahash(tctx->fallback_tfm); + pm_runtime_put_autosuspend(tctx->dev->dev); } struct rk_crypto_tmp rk_ahash_sha1 = { diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 91b8a4c574da..3bdb304aa794 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -454,6 +454,7 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); const char *name = crypto_tfm_alg_name(&tfm->base); struct rk_crypto_tmp *algt; + int err; algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); @@ -471,7 +472,14 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) ctx->enginectx.op.do_one_request = rk_cipher_run; + err = pm_runtime_resume_and_get(ctx->dev->dev); + if (err < 0) + goto error_pm; + return 0; +error_pm: + crypto_free_skcipher(ctx->fallback_tfm); + return err; } static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) @@ -480,6 +488,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) memzero_explicit(ctx->key, ctx->keylen); crypto_free_skcipher(ctx->fallback_tfm); + pm_runtime_put_autosuspend(ctx->dev->dev); } struct rk_crypto_tmp rk_ecb_aes_alg = { From 6f61192549d0214f8d9d1e1d3152e450658ed1e9 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:52 +0000 Subject: [PATCH 0554/4122] crypto: rockchip - handle reset also in PM reset could be handled by PM functions. We keep the initial reset pulse to be sure the hw is a know device state after probe. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index d9258b9e71b3..399829ef92e0 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -74,14 +74,23 @@ static int rk_crypto_pm_suspend(struct device *dev) struct rk_crypto_info *rkdev = dev_get_drvdata(dev); rk_crypto_disable_clk(rkdev); + reset_control_assert(rkdev->rst); + return 0; } static int rk_crypto_pm_resume(struct device *dev) { struct rk_crypto_info *rkdev = dev_get_drvdata(dev); + int ret; + + ret = rk_crypto_enable_clk(rkdev); + if (ret) + return ret; + + reset_control_deassert(rkdev->rst); + return 0; - return rk_crypto_enable_clk(rkdev); } static const struct dev_pm_ops rk_crypto_pm_ops = { @@ -222,13 +231,6 @@ static void rk_crypto_unregister(void) } } -static void rk_crypto_action(void *data) -{ - struct rk_crypto_info *crypto_info = data; - - reset_control_assert(crypto_info->rst); -} - static const struct of_device_id crypto_of_id_table[] = { { .compatible = "rockchip,rk3288-crypto" }, {} @@ -258,10 +260,6 @@ static int rk_crypto_probe(struct platform_device *pdev) usleep_range(10, 20); reset_control_deassert(crypto_info->rst); - err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info); - if (err) - goto err_crypto; - crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(crypto_info->reg)) { err = PTR_ERR(crypto_info->reg); From 3a6fd464f48ad35d8cf15d81fd92094132dc862a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:53 +0000 Subject: [PATCH 0555/4122] crypto: rockchip - use clk_bulk to simplify clock management rk3328 does not have the same clock names than rk3288, instead of using a complex clock management, let's use clk_bulk to simplify their handling. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 66 ++++--------------------- drivers/crypto/rockchip/rk3288_crypto.h | 6 +-- 2 files changed, 11 insertions(+), 61 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 399829ef92e0..a635029ac71d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -22,47 +22,16 @@ static int rk_crypto_enable_clk(struct rk_crypto_info *dev) { int err; - err = clk_prepare_enable(dev->sclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock sclk\n", - __func__, __LINE__); - goto err_return; - } - err = clk_prepare_enable(dev->aclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock aclk\n", - __func__, __LINE__); - goto err_aclk; - } - err = clk_prepare_enable(dev->hclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock hclk\n", - __func__, __LINE__); - goto err_hclk; - } - err = clk_prepare_enable(dev->dmaclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock dmaclk\n", - __func__, __LINE__); - goto err_dmaclk; - } - return err; -err_dmaclk: - clk_disable_unprepare(dev->hclk); -err_hclk: - clk_disable_unprepare(dev->aclk); -err_aclk: - clk_disable_unprepare(dev->sclk); -err_return: + err = clk_bulk_prepare_enable(dev->num_clks, dev->clks); + if (err) + dev_err(dev->dev, "Could not enable clock clks\n"); + return err; } static void rk_crypto_disable_clk(struct rk_crypto_info *dev) { - clk_disable_unprepare(dev->dmaclk); - clk_disable_unprepare(dev->hclk); - clk_disable_unprepare(dev->aclk); - clk_disable_unprepare(dev->sclk); + clk_bulk_disable_unprepare(dev->num_clks, dev->clks); } /* @@ -266,27 +235,10 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_crypto; } - crypto_info->aclk = devm_clk_get(&pdev->dev, "aclk"); - if (IS_ERR(crypto_info->aclk)) { - err = PTR_ERR(crypto_info->aclk); - goto err_crypto; - } - - crypto_info->hclk = devm_clk_get(&pdev->dev, "hclk"); - if (IS_ERR(crypto_info->hclk)) { - err = PTR_ERR(crypto_info->hclk); - goto err_crypto; - } - - crypto_info->sclk = devm_clk_get(&pdev->dev, "sclk"); - if (IS_ERR(crypto_info->sclk)) { - err = PTR_ERR(crypto_info->sclk); - goto err_crypto; - } - - crypto_info->dmaclk = devm_clk_get(&pdev->dev, "apb_pclk"); - if (IS_ERR(crypto_info->dmaclk)) { - err = PTR_ERR(crypto_info->dmaclk); + crypto_info->num_clks = devm_clk_bulk_get_all(&pdev->dev, + &crypto_info->clks); + if (crypto_info->num_clks < 3) { + err = -EINVAL; goto err_crypto; } diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index ddbb9246ce16..28bf09fe1c1d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -190,10 +190,8 @@ struct rk_crypto_info { struct device *dev; - struct clk *aclk; - struct clk *hclk; - struct clk *sclk; - struct clk *dmaclk; + struct clk_bulk_data *clks; + int num_clks; struct reset_control *rst; void __iomem *reg; int irq; From e803188400d32d28ecfbef0878c289e3c7026723 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:54 +0000 Subject: [PATCH 0556/4122] crypto: rockchip - add myself as maintainer Nobody is set as maintainer of rockchip crypto, I propose to do it as I have already reworked lot of this code. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..3489126acd1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17753,6 +17753,13 @@ F: Documentation/ABI/*/sysfs-driver-hid-roccat* F: drivers/hid/hid-roccat* F: include/linux/hid-roccat* +ROCKCHIP CRYPTO DRIVERS +M: Corentin Labbe +L: linux-crypto@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml +F: drivers/crypto/rockchip/ + ROCKCHIP I2S TDM DRIVER M: Nicolas Frattaroli L: linux-rockchip@lists.infradead.org From 37bc22159c456ad43fb852fc6ed60f4081df25df Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:55 +0000 Subject: [PATCH 0557/4122] crypto: rockchip - use read_poll_timeout Use read_poll_timeout instead of open coding it. In the same time, fix indentation of related comment. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 137013bd4410..1fbab86c9238 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -10,6 +10,7 @@ */ #include #include +#include #include "rk3288_crypto.h" /* @@ -295,18 +296,17 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) sg = sg_next(sg); } - /* - * it will take some time to process date after last dma - * transmission. - * - * waiting time is relative with the last date len, - * so cannot set a fixed time here. - * 10us makes system not call here frequently wasting - * efficiency, and make it response quickly when dma - * complete. - */ - while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS)) - udelay(10); + /* + * it will take some time to process date after last dma + * transmission. + * + * waiting time is relative with the last date len, + * so cannot set a fixed time here. + * 10us makes system not call here frequently wasting + * efficiency, and make it response quickly when dma + * complete. + */ + readl_poll_timeout(tctx->dev->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); From 456698746b40008eb0924eb7e9ec908330948b2d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:56 +0000 Subject: [PATCH 0558/4122] crypto: rockchip - fix style issue This patch fixes some warning reported by checkpatch Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 1fbab86c9238..fae779d73c84 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -336,7 +336,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) /* for fallback */ tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, - CRYPTO_ALG_NEED_FALLBACK); + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(tctx->fallback_tfm)) { dev_err(tctx->dev->dev, "Could not load fallback driver.\n"); return PTR_ERR(tctx->fallback_tfm); @@ -394,8 +394,8 @@ struct rk_crypto_tmp rk_ahash_sha1 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } - } + } + } } }; @@ -424,8 +424,8 @@ struct rk_crypto_tmp rk_ahash_sha256 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } - } + } + } } }; @@ -454,7 +454,7 @@ struct rk_crypto_tmp rk_ahash_md5 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } } + } } }; From e65e90101329de0fe304e2df057f68c5f0fa4748 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:57 +0000 Subject: [PATCH 0559/4122] crypto: rockchip - add support for rk3328 The rk3328 could be used as-is by the rockchip driver. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index a635029ac71d..c92559b83f7d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -202,6 +202,7 @@ static void rk_crypto_unregister(void) static const struct of_device_id crypto_of_id_table[] = { { .compatible = "rockchip,rk3288-crypto" }, + { .compatible = "rockchip,rk3328-crypto" }, {} }; MODULE_DEVICE_TABLE(of, crypto_of_id_table); From a7fa0644dd0b91fab97398de7ea4672a6526261f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:58 +0000 Subject: [PATCH 0560/4122] crypto: rockchip - rename ablk functions to cipher Some functions have still ablk in their name even if there are not handling ablk_cipher anymore. So let's rename them. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/rockchip/rk3288_crypto_skcipher.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 3bdb304aa794..d60c206e717d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -273,7 +273,7 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) return rk_handle_req(dev, req); } -static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) +static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) { struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); @@ -382,7 +382,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) } } err = 0; - rk_ablk_hw_init(ctx->dev, areq); + rk_cipher_hw_init(ctx->dev, areq); if (ivsize) { if (ivsize == DES_BLOCK_SIZE) memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); @@ -448,7 +448,7 @@ theend_iv: return err; } -static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) +static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); @@ -482,7 +482,7 @@ error_pm: return err; } -static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) +static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -503,8 +503,8 @@ struct rk_crypto_tmp rk_ecb_aes_alg = { .base.cra_alignmask = 0x0f, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .setkey = rk_aes_setkey, @@ -525,8 +525,8 @@ struct rk_crypto_tmp rk_cbc_aes_alg = { .base.cra_alignmask = 0x0f, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, @@ -548,8 +548,8 @@ struct rk_crypto_tmp rk_ecb_des_alg = { .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, .setkey = rk_des_setkey, @@ -570,8 +570,8 @@ struct rk_crypto_tmp rk_cbc_des_alg = { .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, @@ -593,8 +593,8 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, .setkey = rk_tdes_setkey, @@ -615,8 +615,8 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = { .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, From 2e3b149578c30275db9c3501c1d9dec36d16622a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:54:59 +0000 Subject: [PATCH 0561/4122] crypto: rockchip - rework rk_handle_req function This patch rework the rk_handle_req(), simply removing the rk_crypto_info parameter. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/rockchip/rk3288_crypto_skcipher.c | 68 +++++-------------- 1 file changed, 17 insertions(+), 51 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index d60c206e717d..3187869c4c68 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -82,10 +82,12 @@ static int rk_cipher_fallback(struct skcipher_request *areq) return err; } -static int rk_handle_req(struct rk_crypto_info *dev, - struct skcipher_request *req) +static int rk_cipher_handle_req(struct skcipher_request *req) { - struct crypto_engine *engine = dev->engine; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm); + struct rk_crypto_info *rkc = tctx->dev; + struct crypto_engine *engine = rkc->engine; if (rk_cipher_need_fallback(req)) return rk_cipher_fallback(req); @@ -142,135 +144,99 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher, static int rk_aes_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_AES_ECB_MODE; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_aes_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_aes_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_AES_CBC_MODE; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_aes_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = 0; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_SELECT; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *dev = ctx->dev; rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) From c018c7a9dd198ce965ca4d10c7b083849bc533be Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:00 +0000 Subject: [PATCH 0562/4122] crypto: rockchip - use a rk_crypto_info variable instead of lot of indirection Instead of using lot of ctx->dev->xx indirections, use an intermediate variable for rk_crypto_info. This will help later, when 2 different rk_crypto_info would be used. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 23 +++++++----- .../crypto/rockchip/rk3288_crypto_skcipher.c | 37 ++++++++++--------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index fae779d73c84..636dbcde0ca3 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -226,9 +226,10 @@ static int rk_hash_prepare(struct crypto_engine *engine, void *breq) struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + struct rk_crypto_info *rkc = tctx->dev; int ret; - ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); + ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); if (ret <= 0) return -EINVAL; @@ -243,8 +244,9 @@ static int rk_hash_unprepare(struct crypto_engine *engine, void *breq) struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + struct rk_crypto_info *rkc = tctx->dev; - dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); + dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); return 0; } @@ -257,6 +259,7 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); struct scatterlist *sg = areq->src; + struct rk_crypto_info *rkc = tctx->dev; int err = 0; int i; u32 v; @@ -283,13 +286,13 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) rk_ahash_reg_init(areq); while (sg) { - reinit_completion(&tctx->dev->complete); - tctx->dev->status = 0; - crypto_ahash_dma_start(tctx->dev, sg); - wait_for_completion_interruptible_timeout(&tctx->dev->complete, + reinit_completion(&rkc->complete); + rkc->status = 0; + crypto_ahash_dma_start(rkc, sg); + wait_for_completion_interruptible_timeout(&rkc->complete, msecs_to_jiffies(2000)); - if (!tctx->dev->status) { - dev_err(tctx->dev->dev, "DMA timeout\n"); + if (!rkc->status) { + dev_err(rkc->dev, "DMA timeout\n"); err = -EFAULT; goto theend; } @@ -306,10 +309,10 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) * efficiency, and make it response quickly when dma * complete. */ - readl_poll_timeout(tctx->dev->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); + readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { - v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); + v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); put_unaligned_le32(v, areq->result + i * 4); } diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 3187869c4c68..6a1bea98fded 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -303,6 +303,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) unsigned int todo; struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + struct rk_crypto_info *rkc = ctx->dev; algt->stat_req++; @@ -330,49 +331,49 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); } if (sgs == sgd) { - err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); if (err <= 0) { err = -EINVAL; goto theend_iv; } } else { - err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); + err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); if (err <= 0) { err = -EINVAL; goto theend_iv; } - err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); if (err <= 0) { err = -EINVAL; goto theend_sgs; } } err = 0; - rk_cipher_hw_init(ctx->dev, areq); + rk_cipher_hw_init(rkc, areq); if (ivsize) { if (ivsize == DES_BLOCK_SIZE) - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); + memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); else - memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); + memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); } - reinit_completion(&ctx->dev->complete); - ctx->dev->status = 0; + reinit_completion(&rkc->complete); + rkc->status = 0; todo = min(sg_dma_len(sgs), len); len -= todo; - crypto_dma_start(ctx->dev, sgs, sgd, todo / 4); - wait_for_completion_interruptible_timeout(&ctx->dev->complete, + crypto_dma_start(rkc, sgs, sgd, todo / 4); + wait_for_completion_interruptible_timeout(&rkc->complete, msecs_to_jiffies(2000)); - if (!ctx->dev->status) { - dev_err(ctx->dev->dev, "DMA timeout\n"); + if (!rkc->status) { + dev_err(rkc->dev, "DMA timeout\n"); err = -EFAULT; goto theend; } if (sgs == sgd) { - dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); - dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); } if (rctx->mode & RK_CRYPTO_DEC) { memcpy(iv, biv, ivsize); @@ -405,10 +406,10 @@ theend: theend_sgs: if (sgs == sgd) { - dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); - dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); } theend_iv: return err; From ea389be9857721252367fd2cf81bc8068e060693 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:01 +0000 Subject: [PATCH 0563/4122] crypto: rockchip - use the rk_crypto_info given as parameter Instead of using the crypto_info from TFM ctx, use the one given as parameter. Reviewed-by: John Keeping Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 6a1bea98fded..cf0dfb6029d8 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -254,7 +254,7 @@ static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_reques RK_CRYPTO_TDES_BYTESWAP_KEY | RK_CRYPTO_TDES_BYTESWAP_IV; CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); + memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); conf_reg = RK_CRYPTO_DESSEL; } else { rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | @@ -266,7 +266,7 @@ static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_reques else if (ctx->keylen == AES_KEYSIZE_256) rctx->mode |= RK_CRYPTO_AES_256BIT_key; CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); - memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); + memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | RK_CRYPTO_BYTESWAP_BRFIFO; From 81aaf680e85207d6521b250b2a80ba7c91cc9cbe Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:02 +0000 Subject: [PATCH 0564/4122] dt-bindings: crypto: convert rockchip-crypto to YAML Convert rockchip-crypto to YAML. Reviewed-by: John Keeping Reviewed-by: Krzysztof Kozlowski Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/rockchip,rk3288-crypto.yaml | 64 +++++++++++++++++++ .../bindings/crypto/rockchip-crypto.txt | 28 -------- 2 files changed, 64 insertions(+), 28 deletions(-) create mode 100644 Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml delete mode 100644 Documentation/devicetree/bindings/crypto/rockchip-crypto.txt diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml new file mode 100644 index 000000000000..8a219d439d02 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip Electronics Security Accelerator + +maintainers: + - Heiko Stuebner + +properties: + compatible: + enum: + - rockchip,rk3288-crypto + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 4 + + clock-names: + items: + - const: aclk + - const: hclk + - const: sclk + - const: apb_pclk + + resets: + maxItems: 1 + + reset-names: + items: + - const: crypto-rst + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - resets + - reset-names + +additionalProperties: false + +examples: + - | + #include + #include + crypto@ff8a0000 { + compatible = "rockchip,rk3288-crypto"; + reg = <0xff8a0000 0x4000>; + interrupts = ; + clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>, + <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>; + clock-names = "aclk", "hclk", "sclk", "apb_pclk"; + resets = <&cru SRST_CRYPTO>; + reset-names = "crypto-rst"; + }; diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt deleted file mode 100644 index 5e2ba385b8c9..000000000000 --- a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt +++ /dev/null @@ -1,28 +0,0 @@ -Rockchip Electronics And Security Accelerator - -Required properties: -- compatible: Should be "rockchip,rk3288-crypto" -- reg: Base physical address of the engine and length of memory mapped - region -- interrupts: Interrupt number -- clocks: Reference to the clocks about crypto -- clock-names: "aclk" used to clock data - "hclk" used to clock data - "sclk" used to clock crypto accelerator - "apb_pclk" used to clock dma -- resets: Must contain an entry for each entry in reset-names. - See ../reset/reset.txt for details. -- reset-names: Must include the name "crypto-rst". - -Examples: - - crypto: cypto-controller@ff8a0000 { - compatible = "rockchip,rk3288-crypto"; - reg = <0xff8a0000 0x4000>; - interrupts = ; - clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>, - <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>; - clock-names = "aclk", "hclk", "sclk", "apb_pclk"; - resets = <&cru SRST_CRYPTO>; - reset-names = "crypto-rst"; - }; From d1b5749687618d969c0be6428174a18a7e94ebd2 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:03 +0000 Subject: [PATCH 0565/4122] dt-bindings: crypto: rockchip: add new compatible Since driver support new compatible, we need to update the driver bindings. Signed-off-by: Corentin Labbe Reviewed-by: Rob Herring Signed-off-by: Herbert Xu --- .../crypto/rockchip,rk3288-crypto.yaml | 79 +++++++++++++++++-- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml index 8a219d439d02..f1a9da8bff7a 100644 --- a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml +++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml @@ -13,6 +13,8 @@ properties: compatible: enum: - rockchip,rk3288-crypto + - rockchip,rk3328-crypto + - rockchip,rk3399-crypto reg: maxItems: 1 @@ -21,21 +23,82 @@ properties: maxItems: 1 clocks: + minItems: 3 maxItems: 4 clock-names: - items: - - const: aclk - - const: hclk - - const: sclk - - const: apb_pclk + minItems: 3 + maxItems: 4 resets: - maxItems: 1 + minItems: 1 + maxItems: 3 reset-names: - items: - - const: crypto-rst + minItems: 1 + maxItems: 3 + +allOf: + - if: + properties: + compatible: + contains: + const: rockchip,rk3288-crypto + then: + properties: + clocks: + minItems: 4 + clock-names: + items: + - const: aclk + - const: hclk + - const: sclk + - const: apb_pclk + resets: + maxItems: 1 + reset-names: + items: + - const: crypto-rst + - if: + properties: + compatible: + contains: + const: rockchip,rk3328-crypto + then: + properties: + clocks: + maxItems: 3 + clock-names: + items: + - const: hclk_master + - const: hclk_slave + - const: sclk + resets: + maxItems: 1 + reset-names: + items: + - const: crypto-rst + - if: + properties: + compatible: + contains: + const: rockchip,rk3399-crypto + then: + properties: + clocks: + maxItems: 3 + clock-names: + items: + - const: hclk_master + - const: hclk_slave + - const: sclk + resets: + minItems: 3 + reset-names: + items: + - const: master + - const: slave + - const: crypto-rst required: - compatible From 2d3c756adcd7a7ee15b6a55cf01b363e3f134e79 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:07 +0000 Subject: [PATCH 0566/4122] crypto: rockchip - store crypto_info in request context The crypto_info to use must be stored in the request context. This will help when 2 crypto_info will be available on rk3399. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.h | 2 ++ drivers/crypto/rockchip/rk3288_crypto_ahash.c | 14 ++++++-------- drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 6 ++++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 28bf09fe1c1d..ff9fc25972eb 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -215,6 +215,7 @@ struct rk_ahash_ctx { /* the private variable of hash for fallback */ struct rk_ahash_rctx { + struct rk_crypto_info *dev; struct ahash_request fallback_req; u32 mode; int nrsg; @@ -231,6 +232,7 @@ struct rk_cipher_ctx { }; struct rk_cipher_rctx { + struct rk_crypto_info *dev; u8 backup_iv[AES_BLOCK_SIZE]; u32 mode; struct skcipher_request fallback_req; // keep at the end diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 636dbcde0ca3..d1bf68cb390d 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -200,6 +200,7 @@ static int rk_ahash_export(struct ahash_request *req, void *out) static int rk_ahash_digest(struct ahash_request *req) { + struct rk_ahash_rctx *rctx = ahash_request_ctx(req); struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct rk_crypto_info *dev = tctx->dev; @@ -209,6 +210,8 @@ static int rk_ahash_digest(struct ahash_request *req) if (!req->nbytes) return zero_message_process(req); + rctx->dev = dev; + return crypto_transfer_hash_request_to_engine(dev->engine, req); } @@ -223,10 +226,8 @@ static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlis static int rk_hash_prepare(struct crypto_engine *engine, void *breq) { struct ahash_request *areq = container_of(breq, struct ahash_request, base); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); - struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); - struct rk_crypto_info *rkc = tctx->dev; + struct rk_crypto_info *rkc = rctx->dev; int ret; ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); @@ -241,10 +242,8 @@ static int rk_hash_prepare(struct crypto_engine *engine, void *breq) static int rk_hash_unprepare(struct crypto_engine *engine, void *breq) { struct ahash_request *areq = container_of(breq, struct ahash_request, base); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); - struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); - struct rk_crypto_info *rkc = tctx->dev; + struct rk_crypto_info *rkc = rctx->dev; dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); return 0; @@ -255,11 +254,10 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) struct ahash_request *areq = container_of(breq, struct ahash_request, base); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); - struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); struct scatterlist *sg = areq->src; - struct rk_crypto_info *rkc = tctx->dev; + struct rk_crypto_info *rkc = rctx->dev; int err = 0; int i; u32 v; diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index cf0dfb6029d8..0b1c90ababb7 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -86,12 +86,15 @@ static int rk_cipher_handle_req(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_crypto_info *rkc = tctx->dev; struct crypto_engine *engine = rkc->engine; if (rk_cipher_need_fallback(req)) return rk_cipher_fallback(req); + rctx->dev = rkc; + return crypto_transfer_skcipher_request_to_engine(engine, req); } @@ -290,7 +293,6 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) { struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); struct scatterlist *sgs, *sgd; int err = 0; @@ -303,7 +305,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) unsigned int todo; struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); - struct rk_crypto_info *rkc = ctx->dev; + struct rk_crypto_info *rkc = rctx->dev; algt->stat_req++; From e220e6719438f7a99fe0a73e6e126481380202fa Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:08 +0000 Subject: [PATCH 0567/4122] crypto: rockchip - Check for clocks numbers and their frequencies Add the number of clocks needed for each compatible. Rockchip's datasheet give maximum frequencies for some clocks, so add checks for verifying they are within limits. Let's start with rk3288 for clock frequency check, other will came later. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 75 +++++++++++++++++++++---- drivers/crypto/rockchip/rk3288_crypto.h | 16 +++++- 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index c92559b83f7d..232dc625d6e5 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -14,10 +14,58 @@ #include #include #include +#include #include #include #include +static const struct rk_variant rk3288_variant = { + .num_clks = 4, + .rkclks = { + { "sclk", 150000000}, + } +}; + +static const struct rk_variant rk3328_variant = { + .num_clks = 3, +}; + +static int rk_crypto_get_clks(struct rk_crypto_info *dev) +{ + int i, j, err; + unsigned long cr; + + dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks); + if (dev->num_clks < dev->variant->num_clks) { + dev_err(dev->dev, "Missing clocks, got %d instead of %d\n", + dev->num_clks, dev->variant->num_clks); + return -EINVAL; + } + + for (i = 0; i < dev->num_clks; i++) { + cr = clk_get_rate(dev->clks[i].clk); + for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) { + if (dev->variant->rkclks[j].max == 0) + continue; + if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id)) + continue; + if (cr > dev->variant->rkclks[j].max) { + err = clk_set_rate(dev->clks[i].clk, + dev->variant->rkclks[j].max); + if (err) + dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n", + dev->variant->rkclks[j].name, cr, + dev->variant->rkclks[j].max); + else + dev_info(dev->dev, "Downclocking %s from %lu to %lu\n", + dev->variant->rkclks[j].name, cr, + dev->variant->rkclks[j].max); + } + } + } + return 0; +} + static int rk_crypto_enable_clk(struct rk_crypto_info *dev) { int err; @@ -201,8 +249,12 @@ static void rk_crypto_unregister(void) } static const struct of_device_id crypto_of_id_table[] = { - { .compatible = "rockchip,rk3288-crypto" }, - { .compatible = "rockchip,rk3328-crypto" }, + { .compatible = "rockchip,rk3288-crypto", + .data = &rk3288_variant, + }, + { .compatible = "rockchip,rk3328-crypto", + .data = &rk3328_variant, + }, {} }; MODULE_DEVICE_TABLE(of, crypto_of_id_table); @@ -220,6 +272,15 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_crypto; } + crypto_info->dev = &pdev->dev; + platform_set_drvdata(pdev, crypto_info); + + crypto_info->variant = of_device_get_match_data(&pdev->dev); + if (!crypto_info->variant) { + dev_err(&pdev->dev, "Missing variant\n"); + return -EINVAL; + } + crypto_info->rst = devm_reset_control_get(dev, "crypto-rst"); if (IS_ERR(crypto_info->rst)) { err = PTR_ERR(crypto_info->rst); @@ -236,12 +297,9 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_crypto; } - crypto_info->num_clks = devm_clk_bulk_get_all(&pdev->dev, - &crypto_info->clks); - if (crypto_info->num_clks < 3) { - err = -EINVAL; + err = rk_crypto_get_clks(crypto_info); + if (err) goto err_crypto; - } crypto_info->irq = platform_get_irq(pdev, 0); if (crypto_info->irq < 0) { @@ -259,9 +317,6 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_crypto; } - crypto_info->dev = &pdev->dev; - platform_set_drvdata(pdev, crypto_info); - crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); crypto_engine_start(crypto_info->engine); init_completion(&crypto_info->complete); diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index ff9fc25972eb..ac979d67ced9 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -188,14 +188,26 @@ #define CRYPTO_WRITE(dev, offset, val) \ writel_relaxed((val), ((dev)->reg + (offset))) +#define RK_MAX_CLKS 4 + +struct rk_clks { + const char *name; + unsigned long max; +}; + +struct rk_variant { + int num_clks; + struct rk_clks rkclks[RK_MAX_CLKS]; +}; + struct rk_crypto_info { struct device *dev; struct clk_bulk_data *clks; - int num_clks; + int num_clks; struct reset_control *rst; void __iomem *reg; int irq; - + const struct rk_variant *variant; struct crypto_engine *engine; struct completion complete; int status; From 0d31b14c9e4178a129a1aa5e491e4da1489c07de Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:09 +0000 Subject: [PATCH 0568/4122] crypto: rockchip - rk_ahash_reg_init use crypto_info from parameter rk_ahash_reg_init() use crypto_info from TFM context, since we will remove it, let's take if from parameters. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index d1bf68cb390d..30f78256c955 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -78,12 +78,10 @@ static int zero_message_process(struct ahash_request *req) return 0; } -static void rk_ahash_reg_init(struct ahash_request *req) +static void rk_ahash_reg_init(struct ahash_request *req, + struct rk_crypto_info *dev) { struct rk_ahash_rctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); - struct rk_crypto_info *dev = tctx->dev; int reg_status; reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | @@ -281,7 +279,7 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) goto theend; } - rk_ahash_reg_init(areq); + rk_ahash_reg_init(areq, rkc); while (sg) { reinit_completion(&rkc->complete); From c5a1e104c35e5134b6048f1e03960a6ac9c42935 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:10 +0000 Subject: [PATCH 0569/4122] crypto: rockchip - permit to have more than one reset The RK3399 has 3 resets, so the driver to handle multiple resets. This is done by using devm_reset_control_array_get_exclusive(). Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 232dc625d6e5..d96f375423d5 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -281,7 +281,7 @@ static int rk_crypto_probe(struct platform_device *pdev) return -EINVAL; } - crypto_info->rst = devm_reset_control_get(dev, "crypto-rst"); + crypto_info->rst = devm_reset_control_array_get_exclusive(dev); if (IS_ERR(crypto_info->rst)) { err = PTR_ERR(crypto_info->rst); goto err_crypto; From 9dcd71c863a6f6476378d076d3e9189c854d49fd Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 27 Sep 2022 07:55:11 +0000 Subject: [PATCH 0570/4122] crypto: rockchip - Add support for RK3399 The RK3399 has 2 rk3288 compatible crypto device named crypto0 and crypto1. The only difference is lack of RSA in crypto1. We need to add driver support for 2 parallel instance as only one need to register crypto algorithms. Then the driver will round robin each request on each device. For avoiding complexity (device bringup after a TFM is created), PM is modified to be handled per request. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 92 +++++++++++++++---- drivers/crypto/rockchip/rk3288_crypto.h | 25 +++-- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 39 ++++---- .../crypto/rockchip/rk3288_crypto_skcipher.c | 37 ++++---- 4 files changed, 124 insertions(+), 69 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index d96f375423d5..6217e73ba4c4 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -19,6 +19,23 @@ #include #include +static struct rockchip_ip rocklist = { + .dev_list = LIST_HEAD_INIT(rocklist.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(rocklist.lock), +}; + +struct rk_crypto_info *get_rk_crypto(void) +{ + struct rk_crypto_info *first; + + spin_lock(&rocklist.lock); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + list_rotate_left(&rocklist.dev_list); + spin_unlock(&rocklist.lock); + return first; +} + static const struct rk_variant rk3288_variant = { .num_clks = 4, .rkclks = { @@ -30,6 +47,10 @@ static const struct rk_variant rk3328_variant = { .num_clks = 3, }; +static const struct rk_variant rk3399_variant = { + .num_clks = 3, +}; + static int rk_crypto_get_clks(struct rk_crypto_info *dev) { int i, j, err; @@ -83,8 +104,8 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev) } /* - * Power management strategy: The device is suspended unless a TFM exists for - * one of the algorithms proposed by this driver. + * Power management strategy: The device is suspended until a request + * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s. */ static int rk_crypto_pm_suspend(struct device *dev) { @@ -166,8 +187,17 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = { #ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) { + struct rk_crypto_info *dd; unsigned int i; + spin_lock(&rocklist.lock); + list_for_each_entry(dd, &rocklist.dev_list, list) { + seq_printf(seq, "%s %s requests: %lu\n", + dev_driver_string(dd->dev), dev_name(dd->dev), + dd->nreq); + } + spin_unlock(&rocklist.lock); + for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { if (!rk_cipher_algs[i]->dev) continue; @@ -198,6 +228,18 @@ static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs); #endif +static void register_debugfs(struct rk_crypto_info *crypto_info) +{ +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + /* Ignore error of debugfs */ + rocklist.dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); + rocklist.dbgfs_stats = debugfs_create_file("stats", 0444, + rocklist.dbgfs_dir, + &rocklist, + &rk_crypto_debugfs_fops); +#endif +} + static int rk_crypto_register(struct rk_crypto_info *crypto_info) { unsigned int i, k; @@ -255,6 +297,9 @@ static const struct of_device_id crypto_of_id_table[] = { { .compatible = "rockchip,rk3328-crypto", .data = &rk3328_variant, }, + { .compatible = "rockchip,rk3399-crypto", + .data = &rk3399_variant, + }, {} }; MODULE_DEVICE_TABLE(of, crypto_of_id_table); @@ -262,7 +307,7 @@ MODULE_DEVICE_TABLE(of, crypto_of_id_table); static int rk_crypto_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rk_crypto_info *crypto_info; + struct rk_crypto_info *crypto_info, *first; int err = 0; crypto_info = devm_kzalloc(&pdev->dev, @@ -325,22 +370,22 @@ static int rk_crypto_probe(struct platform_device *pdev) if (err) goto err_pm; - err = rk_crypto_register(crypto_info); - if (err) { - dev_err(dev, "err in register alg"); - goto err_register_alg; + spin_lock(&rocklist.lock); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + list_add_tail(&crypto_info->list, &rocklist.dev_list); + spin_unlock(&rocklist.lock); + + if (!first) { + err = rk_crypto_register(crypto_info); + if (err) { + dev_err(dev, "Fail to register crypto algorithms"); + goto err_register_alg; + } + + register_debugfs(crypto_info); } -#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG - /* Ignore error of debugfs */ - crypto_info->dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); - crypto_info->dbgfs_stats = debugfs_create_file("stats", 0444, - crypto_info->dbgfs_dir, - crypto_info, - &rk_crypto_debugfs_fops); -#endif - - dev_info(dev, "Crypto Accelerator successfully registered\n"); return 0; err_register_alg: @@ -355,11 +400,20 @@ err_crypto: static int rk_crypto_remove(struct platform_device *pdev) { struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); + struct rk_crypto_info *first; + spin_lock_bh(&rocklist.lock); + list_del(&crypto_tmp->list); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + spin_unlock_bh(&rocklist.lock); + + if (!first) { #ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG - debugfs_remove_recursive(crypto_tmp->dbgfs_dir); + debugfs_remove_recursive(rocklist.dbgfs_dir); #endif - rk_crypto_unregister(); + rk_crypto_unregister(); + } rk_crypto_pm_exit(crypto_tmp); crypto_engine_exit(crypto_tmp->engine); return 0; diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index ac979d67ced9..b2695258cade 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -190,6 +190,20 @@ #define RK_MAX_CLKS 4 +/* + * struct rockchip_ip - struct for managing a list of RK crypto instance + * @dev_list: Used for doing a list of rk_crypto_info + * @lock: Control access to dev_list + * @dbgfs_dir: Debugfs dentry for statistic directory + * @dbgfs_stats: Debugfs dentry for statistic counters + */ +struct rockchip_ip { + struct list_head dev_list; + spinlock_t lock; /* Control access to dev_list */ + struct dentry *dbgfs_dir; + struct dentry *dbgfs_stats; +}; + struct rk_clks { const char *name; unsigned long max; @@ -201,6 +215,7 @@ struct rk_variant { }; struct rk_crypto_info { + struct list_head list; struct device *dev; struct clk_bulk_data *clks; int num_clks; @@ -208,19 +223,15 @@ struct rk_crypto_info { void __iomem *reg; int irq; const struct rk_variant *variant; + unsigned long nreq; struct crypto_engine *engine; struct completion complete; int status; -#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG - struct dentry *dbgfs_dir; - struct dentry *dbgfs_stats; -#endif }; /* the private variable of hash */ struct rk_ahash_ctx { struct crypto_engine_ctx enginectx; - struct rk_crypto_info *dev; /* for fallback */ struct crypto_ahash *fallback_tfm; }; @@ -236,7 +247,6 @@ struct rk_ahash_rctx { /* the private variable of cipher */ struct rk_cipher_ctx { struct crypto_engine_ctx enginectx; - struct rk_crypto_info *dev; unsigned int keylen; u8 key[AES_MAX_KEY_SIZE]; u8 iv[AES_BLOCK_SIZE]; @@ -252,7 +262,7 @@ struct rk_cipher_rctx { struct rk_crypto_tmp { u32 type; - struct rk_crypto_info *dev; + struct rk_crypto_info *dev; union { struct skcipher_alg skcipher; struct ahash_alg hash; @@ -276,4 +286,5 @@ extern struct rk_crypto_tmp rk_ahash_sha1; extern struct rk_crypto_tmp rk_ahash_sha256; extern struct rk_crypto_tmp rk_ahash_md5; +struct rk_crypto_info *get_rk_crypto(void); #endif diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 30f78256c955..a78ff3dcd0b1 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -199,8 +199,8 @@ static int rk_ahash_export(struct ahash_request *req, void *out) static int rk_ahash_digest(struct ahash_request *req) { struct rk_ahash_rctx *rctx = ahash_request_ctx(req); - struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - struct rk_crypto_info *dev = tctx->dev; + struct rk_crypto_info *dev; + struct crypto_engine *engine; if (rk_ahash_need_fallback(req)) return rk_ahash_digest_fb(req); @@ -208,9 +208,12 @@ static int rk_ahash_digest(struct ahash_request *req) if (!req->nbytes) return zero_message_process(req); - rctx->dev = dev; + dev = get_rk_crypto(); - return crypto_transfer_hash_request_to_engine(dev->engine, req); + rctx->dev = dev; + engine = dev->engine; + + return crypto_transfer_hash_request_to_engine(engine, req); } static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) @@ -260,9 +263,14 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) int i; u32 v; + err = pm_runtime_resume_and_get(rkc->dev); + if (err) + return err; + rctx->mode = 0; algt->stat_req++; + rkc->nreq++; switch (crypto_ahash_digestsize(tfm)) { case SHA1_DIGEST_SIZE: @@ -313,6 +321,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) } theend: + pm_runtime_put_autosuspend(rkc->dev); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); @@ -323,21 +333,15 @@ theend: static int rk_cra_hash_init(struct crypto_tfm *tfm) { struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); - struct rk_crypto_tmp *algt; - struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); - const char *alg_name = crypto_tfm_alg_name(tfm); - int err; - - algt = container_of(alg, struct rk_crypto_tmp, alg.hash); - - tctx->dev = algt->dev; + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); /* for fallback */ tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(tctx->fallback_tfm)) { - dev_err(tctx->dev->dev, "Could not load fallback driver.\n"); + dev_err(algt->dev->dev, "Could not load fallback driver.\n"); return PTR_ERR(tctx->fallback_tfm); } @@ -349,15 +353,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) tctx->enginectx.op.prepare_request = rk_hash_prepare; tctx->enginectx.op.unprepare_request = rk_hash_unprepare; - err = pm_runtime_resume_and_get(tctx->dev->dev); - if (err < 0) - goto error_pm; - return 0; -error_pm: - crypto_free_ahash(tctx->fallback_tfm); - - return err; } static void rk_cra_hash_exit(struct crypto_tfm *tfm) @@ -365,7 +361,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm) struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); crypto_free_ahash(tctx->fallback_tfm); - pm_runtime_put_autosuspend(tctx->dev->dev); } struct rk_crypto_tmp rk_ahash_sha1 = { diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 0b1c90ababb7..59069457582b 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -17,11 +17,11 @@ static int rk_cipher_need_fallback(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - unsigned int bs = crypto_skcipher_blocksize(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); struct scatterlist *sgs, *sgd; unsigned int stodo, dtodo, len; + unsigned int bs = crypto_skcipher_blocksize(tfm); if (!req->cryptlen) return true; @@ -84,15 +84,16 @@ static int rk_cipher_fallback(struct skcipher_request *areq) static int rk_cipher_handle_req(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm); struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - struct rk_crypto_info *rkc = tctx->dev; - struct crypto_engine *engine = rkc->engine; + struct rk_crypto_info *rkc; + struct crypto_engine *engine; if (rk_cipher_need_fallback(req)) return rk_cipher_fallback(req); + rkc = get_rk_crypto(); + + engine = rkc->engine; rctx->dev = rkc; return crypto_transfer_skcipher_request_to_engine(engine, req); @@ -307,7 +308,12 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); struct rk_crypto_info *rkc = rctx->dev; + err = pm_runtime_resume_and_get(rkc->dev); + if (err) + return err; + algt->stat_req++; + rkc->nreq++; ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { @@ -401,6 +407,8 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req) } theend: + pm_runtime_put_autosuspend(rkc->dev); + local_bh_disable(); crypto_finalize_skcipher_request(engine, areq, err); local_bh_enable(); @@ -420,18 +428,13 @@ theend_iv: static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); const char *name = crypto_tfm_alg_name(&tfm->base); - struct rk_crypto_tmp *algt; - int err; - - algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); - - ctx->dev = algt->dev; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback_tfm)) { - dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", + dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", name, PTR_ERR(ctx->fallback_tfm)); return PTR_ERR(ctx->fallback_tfm); } @@ -441,14 +444,7 @@ static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) ctx->enginectx.op.do_one_request = rk_cipher_run; - err = pm_runtime_resume_and_get(ctx->dev->dev); - if (err < 0) - goto error_pm; - return 0; -error_pm: - crypto_free_skcipher(ctx->fallback_tfm); - return err; } static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) @@ -457,7 +453,6 @@ static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) memzero_explicit(ctx->key, ctx->keylen); crypto_free_skcipher(ctx->fallback_tfm); - pm_runtime_put_autosuspend(ctx->dev->dev); } struct rk_crypto_tmp rk_ecb_aes_alg = { From 7984ceb134bf31aa9a597f10ed52d831d5aede14 Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 17 Oct 2022 14:25:00 -0500 Subject: [PATCH 0571/4122] crypto: af_alg - Support symmetric encryption via keyring keys We want to leverage keyring to store sensitive keys, and then use those keys for symmetric encryption via the crypto API. Among the key types we wish to support are: user, logon, encrypted, and trusted. User key types are already able to have their data copied to user space, but logon does not support this. Further, trusted and encrypted keys will return their encrypted data back to user space on read, which does not make them ideal for symmetric encryption. To support symmetric encryption for these key types, add a new ALG_SET_KEY_BY_KEY_SERIAL setsockopt() option to the crypto API. This allows users to pass a key_serial_t to the crypto API to perform symmetric encryption. The behavior is the same as ALG_SET_KEY, but the crypto key data is copied in kernel space from a keyring key, which allows for the support of logon, encrypted, and trusted key types. Keyring keys must have the KEY_(POS|USR|GRP|OTH)_SEARCH permission set to leverage this feature. This follows the asymmetric_key type where key lookup calls eventually lead to keyring_search_rcu() without the KEYRING_SEARCH_NO_CHECK_PERM flag set. Signed-off-by: Frederick Lawler Signed-off-by: Herbert Xu --- Documentation/crypto/userspace-if.rst | 15 ++- crypto/af_alg.c | 135 +++++++++++++++++++++++++- include/uapi/linux/if_alg.h | 1 + 3 files changed, 147 insertions(+), 4 deletions(-) diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst index b45dabbf69d6..f80f243e227e 100644 --- a/Documentation/crypto/userspace-if.rst +++ b/Documentation/crypto/userspace-if.rst @@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message digest, the flag MSG_TRUNC is set by the kernel. In order to set a message digest key, the calling application must use -the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC -operation is performed without the initial HMAC state change caused by -the key. +the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the +key is not set the HMAC operation is performed without the initial HMAC state +change caused by the key. Symmetric Cipher API -------------------- @@ -382,6 +382,15 @@ mentioned optname: - the RNG cipher type to provide the seed +- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t. + This operation behaves the same as ALG_SET_KEY. The decrypted + data is copied from a keyring key, and uses that data as the + key for symmetric encryption. + + The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH + permission set, otherwise -EPERM is returned. Supports key types: user, + logon, encrypted, and trusted. + - ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for AEAD ciphers. For a encryption operation, the authentication tag of the given size will be generated. For a decryption operation, the diff --git a/crypto/af_alg.c b/crypto/af_alg.c index e893c0f6c879..0a4fa2a429e2 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,6 +21,10 @@ #include #include #include +#include +#include +#include +#include struct alg_type_list { const struct af_alg_type *type; @@ -222,6 +228,129 @@ out: return err; } +#ifdef CONFIG_KEYS + +static const u8 *key_data_ptr_user(const struct key *key, + unsigned int *datalen) +{ + const struct user_key_payload *ukp; + + ukp = user_key_payload_locked(key); + if (IS_ERR_OR_NULL(ukp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = key->datalen; + + return ukp->data; +} + +static const u8 *key_data_ptr_encrypted(const struct key *key, + unsigned int *datalen) +{ + const struct encrypted_key_payload *ekp; + + ekp = dereference_key_locked(key); + if (IS_ERR_OR_NULL(ekp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = ekp->decrypted_datalen; + + return ekp->decrypted_data; +} + +static const u8 *key_data_ptr_trusted(const struct key *key, + unsigned int *datalen) +{ + const struct trusted_key_payload *tkp; + + tkp = dereference_key_locked(key); + if (IS_ERR_OR_NULL(tkp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = tkp->key_len; + + return tkp->key; +} + +static struct key *lookup_key(key_serial_t serial) +{ + key_ref_t key_ref; + + key_ref = lookup_user_key(serial, 0, KEY_NEED_SEARCH); + if (IS_ERR(key_ref)) + return ERR_CAST(key_ref); + + return key_ref_to_ptr(key_ref); +} + +static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval, + unsigned int optlen) +{ + const struct af_alg_type *type = ask->type; + u8 *key_data = NULL; + unsigned int key_datalen; + key_serial_t serial; + struct key *key; + const u8 *ret; + int err; + + if (optlen != sizeof(serial)) + return -EINVAL; + + if (copy_from_sockptr(&serial, optval, optlen)) + return -EFAULT; + + key = lookup_key(serial); + if (IS_ERR(key)) + return PTR_ERR(key); + + down_read(&key->sem); + + ret = ERR_PTR(-ENOPROTOOPT); + if (!strcmp(key->type->name, "user") || + !strcmp(key->type->name, "logon")) { + ret = key_data_ptr_user(key, &key_datalen); + } else if (IS_REACHABLE(CONFIG_ENCRYPTED_KEYS) && + !strcmp(key->type->name, "encrypted")) { + ret = key_data_ptr_encrypted(key, &key_datalen); + } else if (IS_REACHABLE(CONFIG_TRUSTED_KEYS) && + !strcmp(key->type->name, "trusted")) { + ret = key_data_ptr_trusted(key, &key_datalen); + } + + if (IS_ERR(ret)) { + up_read(&key->sem); + return PTR_ERR(ret); + } + + key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL); + if (!key_data) { + up_read(&key->sem); + return -ENOMEM; + } + + memcpy(key_data, ret, key_datalen); + + up_read(&key->sem); + + err = type->setkey(ask->private, key_data, key_datalen); + + sock_kzfree_s(&ask->sk, key_data, key_datalen); + + return err; +} + +#else + +static inline int alg_setkey_by_key_serial(struct alg_sock *ask, + sockptr_t optval, + unsigned int optlen) +{ + return -ENOPROTOOPT; +} + +#endif + static int alg_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -242,12 +371,16 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case ALG_SET_KEY: + case ALG_SET_KEY_BY_KEY_SERIAL: if (sock->state == SS_CONNECTED) goto unlock; if (!type->setkey) goto unlock; - err = alg_setkey(sk, optval, optlen); + if (optname == ALG_SET_KEY_BY_KEY_SERIAL) + err = alg_setkey_by_key_serial(ask, optval, optlen); + else + err = alg_setkey(sk, optval, optlen); break; case ALG_SET_AEAD_AUTHSIZE: if (sock->state == SS_CONNECTED) diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 578b18aab821..0824fbc026a1 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -52,6 +52,7 @@ struct af_alg_iv { #define ALG_SET_AEAD_ASSOCLEN 4 #define ALG_SET_AEAD_AUTHSIZE 5 #define ALG_SET_DRBG_ENTROPY 6 +#define ALG_SET_KEY_BY_KEY_SERIAL 7 /* Operations */ #define ALG_OP_DECRYPT 0 From 3efe90af4c0c46c58dba1b306de142827153d9c0 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Oct 2022 01:17:44 +0000 Subject: [PATCH 0572/4122] crypto: hisilicon/qm - increase the memory of local variables Increase the buffer to prevent stack overflow by fuzz test. The maximum length of the qos configuration buffer is 256 bytes. Currently, the value of the 'val buffer' is only 32 bytes. The sscanf does not check the dest memory length. So the 'val buffer' may stack overflow. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index e3edb176d976..5d79e9f0e7e1 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -250,7 +250,6 @@ #define QM_QOS_MIN_CIR_B 100 #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 -#define QM_QOS_VAL_MAX_LEN 32 #define QM_DFX_BASE 0x0100000 #define QM_DFX_STATE1 0x0104000 #define QM_DFX_STATE2 0x01040C8 @@ -4612,7 +4611,7 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, unsigned int *fun_index) { char tbuf_bdf[QM_DBG_READ_LEN] = {0}; - char val_buf[QM_QOS_VAL_MAX_LEN] = {0}; + char val_buf[QM_DBG_READ_LEN] = {0}; u32 tmp1, device, function; int ret, bus; From 22d7a6c39cabab811f42cb2daed2343c87b0aca5 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Oct 2022 01:17:45 +0000 Subject: [PATCH 0573/4122] crypto: hisilicon/qm - add pci bdf number check The pci bdf number check is added for qos written by using the pci api. Directly get the devfn by pci_dev, so delete some redundant code. And use the kstrtoul instead of sscanf to simplify code. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 37 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 5d79e9f0e7e1..80eeb966cf89 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4589,49 +4589,36 @@ err_put_dfx_access: return ret; } -static ssize_t qm_qos_value_init(const char *buf, unsigned long *val) -{ - int buflen = strlen(buf); - int ret, i; - - for (i = 0; i < buflen; i++) { - if (!isdigit(buf[i])) - return -EINVAL; - } - - ret = sscanf(buf, "%lu", val); - if (ret != QM_QOS_VAL_NUM) - return -EINVAL; - - return 0; -} - static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, unsigned long *val, unsigned int *fun_index) { + struct bus_type *bus_type = qm->pdev->dev.bus; char tbuf_bdf[QM_DBG_READ_LEN] = {0}; char val_buf[QM_DBG_READ_LEN] = {0}; - u32 tmp1, device, function; - int ret, bus; + struct pci_dev *pdev; + struct device *dev; + int ret; ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf); if (ret != QM_QOS_PARAM_NUM) return -EINVAL; - ret = qm_qos_value_init(val_buf, val); + ret = kstrtoul(val_buf, 10, val); if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) { pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n"); return -EINVAL; } - ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function); - if (ret != QM_QOS_BDF_PARAM_NUM) { - pci_err(qm->pdev, "input pci bdf value is error!\n"); - return -EINVAL; + dev = bus_find_device_by_name(bus_type, NULL, tbuf_bdf); + if (!dev) { + pci_err(qm->pdev, "input pci bdf number is error!\n"); + return -ENODEV; } - *fun_index = PCI_DEVFN(device, function); + pdev = container_of(dev, struct pci_dev, dev); + + *fun_index = pdev->devfn; return 0; } From 8f82f4ae8946d665f1e38da8e2b39b929d2435b1 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Oct 2022 01:17:46 +0000 Subject: [PATCH 0574/4122] crypto: hisilicon/qm - delete redundancy check Because the permission on the VF debugfs file is "0444". So the VF function checking is redundant in qos writing api. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 80eeb966cf89..363a02810a16 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4632,9 +4632,6 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, unsigned long val; int len, ret; - if (qm->fun_type == QM_HW_VF) - return -EINVAL; - if (*pos != 0) return 0; From d6e9aa6e1ea872d1bbdf08ac78245cf8efeda19c Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 13:38:02 +0800 Subject: [PATCH 0575/4122] crypto: octeontx - fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h index 205eacac4a34..f8aedafdfdc5 100644 --- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -534,7 +534,7 @@ union otx_cptx_vqx_misc_ena_w1s { * Word0 * reserved_20_63:44 [63:20] Reserved. * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add - * to the CPT instruction doorbell count. Readback value is the the + * to the CPT instruction doorbell count. Readback value is the * current number of pending doorbell requests. If counter overflows * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], From b39301ee1f268c89bd2a8eae257b7d2f50308598 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:07 +0100 Subject: [PATCH 0576/4122] soundwire: qcom: remove unused SWRM_SPECIAL_CMD_ID Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-4-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index b33d5db494a5..185d75ee81ee 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -104,7 +104,6 @@ #define SWRM_REG_VAL_PACK(data, dev, id, reg) \ ((reg) | ((id) << 16) | ((dev) << 20) | ((data) << 24)) -#define SWRM_SPECIAL_CMD_ID 0xF #define MAX_FREQ_NUM 1 #define TIMEOUT_MS 100 #define QCOM_SWRM_MAX_RD_LEN 0x1 From 1cdbfd4c9dc95d9b1e6bcbeba71cfdc70732b50e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:08 +0100 Subject: [PATCH 0577/4122] soundwire: qcom: make reset optional for v1.6 controller On Some Qualcomm SOCs like sc8280xp which uses v1.6 soundwire controller reset is not mandatory, so make this an optional one. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-5-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 185d75ee81ee..49e6d745e0fa 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1321,8 +1321,8 @@ static int qcom_swrm_probe(struct platform_device *pdev) } if (data->sw_clk_gate_required) { - ctrl->audio_cgcr = devm_reset_control_get_exclusive(dev, "swr_audio_cgcr"); - if (IS_ERR_OR_NULL(ctrl->audio_cgcr)) { + ctrl->audio_cgcr = devm_reset_control_get_optional_exclusive(dev, "swr_audio_cgcr"); + if (IS_ERR(ctrl->audio_cgcr)) { dev_err(dev, "Failed to get cgcr reset ctrl required for SW gating\n"); ret = PTR_ERR(ctrl->audio_cgcr); goto err_init; From df73f66c7dd4474a05e07f911427043bc32cff31 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:09 +0100 Subject: [PATCH 0578/4122] dt-bindings: soundwire: qcom: add v1.7.0 support Signed-off-by: Srinivas Kandagatla Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221026110210.6575-6-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/soundwire/qcom,sdw.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt index c85c25779e3f..e0faed8dceac 100644 --- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt +++ b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt @@ -13,6 +13,7 @@ board specific bus parameters. "qcom,soundwire-v1.5.0" "qcom,soundwire-v1.5.1" "qcom,soundwire-v1.6.0" + "qcom,soundwire-v1.7.0" - reg: Usage: required Value type: From cf43cd33b67a291fadcd16b1ad2f435bd2e60749 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:10 +0100 Subject: [PATCH 0579/4122] soundwire: qcom: add support for v1.7 Soundwire Controller This patch add support for v1.7 SoundWire Controller which has support for Multi-EE (Execution Environment), resulting in a new register and extending field in BUS_CTRL register. With these updates v1.7.0 is fully supported. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-7-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 49e6d745e0fa..ce00a5cf980c 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -25,6 +25,8 @@ #define SWRM_COMP_SW_RESET 0x008 #define SWRM_COMP_STATUS 0x014 +#define SWRM_LINK_MANAGER_EE 0x018 +#define SWRM_EE_CPU 1 #define SWRM_FRM_GEN_ENABLED BIT(0) #define SWRM_COMP_HW_VERSION 0x00 #define SWRM_COMP_CFG_ADDR 0x04 @@ -684,7 +686,14 @@ static int qcom_swrm_init(struct qcom_swrm_ctrl *ctrl) u32p_replace_bits(&val, SWRM_DEF_CMD_NO_PINGS, SWRM_MCP_CFG_MAX_NUM_OF_CMD_NO_PINGS_BMSK); ctrl->reg_write(ctrl, SWRM_MCP_CFG_ADDR, val); - ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + if (ctrl->version >= 0x01070000) { + ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU); + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, + SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU); + } else { + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + } + /* Configure number of retries of a read/write cmd */ if (ctrl->version > 0x01050001) { /* Only for versions >= 1.5.1 */ @@ -1509,7 +1518,13 @@ static int __maybe_unused swrm_runtime_resume(struct device *dev) } else { reset_control_reset(ctrl->audio_cgcr); - ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + if (ctrl->version >= 0x01070000) { + ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU); + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, + SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU); + } else { + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + } ctrl->reg_write(ctrl, SWRM_INTERRUPT_CLEAR, SWRM_INTERRUPT_STATUS_MASTER_CLASH_DET); @@ -1573,6 +1588,7 @@ static const struct of_device_id qcom_swrm_of_match[] = { { .compatible = "qcom,soundwire-v1.3.0", .data = &swrm_v1_3_data }, { .compatible = "qcom,soundwire-v1.5.1", .data = &swrm_v1_5_data }, { .compatible = "qcom,soundwire-v1.6.0", .data = &swrm_v1_6_data }, + { .compatible = "qcom,soundwire-v1.7.0", .data = &swrm_v1_5_data }, {/* sentinel */}, }; From 28d74fc36a3e667b51a437fbf6c45264a0c8f2db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:28 +0200 Subject: [PATCH 0580/4122] phy: qcom-qmp: drop regulator error message Regulator core already logs an error message in case requesting a regulator fails so drop the mostly redundant error message from probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7b434e2ee640..998c8f80ccd8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2816,8 +2816,7 @@ static int qmp_combo_probe(struct platform_device *pdev) ret = qmp_combo_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 5fdd85a1dc3e..45c0e2958bf6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -869,8 +869,7 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev) ret = qmp_pcie_msm8996_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 30838ae8f027..dc7f8ba413b9 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2445,8 +2445,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) ret = qmp_pcie_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index e28c45ab74ea..566365fbfe1a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1210,8 +1210,7 @@ static int qmp_ufs_probe(struct platform_device *pdev) ret = qmp_ufs_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index b0b13fb6cb59..a0b97fd5d0a5 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2746,8 +2746,7 @@ static int qmp_usb_probe(struct platform_device *pdev) ret = qmp_usb_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ From 17302d3630030db09947241451f3d984bc0d3144 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:29 +0200 Subject: [PATCH 0581/4122] phy: qcom-qmp: drop superfluous comments Drop some unnecessary or incorrect comments. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ---- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 5 ----- 5 files changed, 18 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 998c8f80ccd8..3889dcf73c59 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1949,7 +1949,6 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) return 0; } - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2779,7 +2778,6 @@ static int qmp_combo_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ combo_cfg = of_device_get_match_data(dev); if (!combo_cfg) return -EINVAL; @@ -2787,7 +2785,6 @@ static int qmp_combo_probe(struct platform_device *pdev) usb_cfg = combo_cfg->usb_cfg; cfg = usb_cfg; /* Setup clks and regulators */ - /* per PHY serdes; usually located at base address */ usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); @@ -2796,7 +2793,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); - /* Only two serdes for combo PHY */ dp_serdes = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 45c0e2958bf6..8b74948eb467 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -420,7 +420,6 @@ static int qmp_pcie_msm8996_com_init(struct qmp_phy *qphy) return 0; } - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -845,12 +844,10 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index dc7f8ba413b9..de04d8dd5350 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1978,7 +1978,6 @@ static int qmp_pcie_init(struct phy *phy) const struct qmp_phy_cfg *cfg = qphy->cfg; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2425,12 +2424,10 @@ static int qmp_pcie_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 566365fbfe1a..ab69f648ee38 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -849,7 +849,6 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) void __iomem *pcs = qphy->pcs; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -1194,12 +1193,10 @@ static int qmp_ufs_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index a0b97fd5d0a5..2c5e4041bcf9 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2120,7 +2120,6 @@ static int qmp_usb_init(struct phy *phy) void __iomem *dp_com = qmp->dp_com; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2229,7 +2228,6 @@ static int qmp_usb_power_on(struct phy *phy) cfg->rx_tbl, cfg->rx_tbl_num, 2); } - /* Configure link rate, swing, etc. */ qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) @@ -2719,17 +2717,14 @@ static int qmp_usb_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); - /* per PHY dp_com; if PHY has dp_com control block */ if (cfg->has_phy_dp_com_ctrl) { qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) From ccf6f83b1b0bbdcde1ec7c0a35dde014f7101507 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:30 +0200 Subject: [PATCH 0582/4122] phy: qcom-qmp-combo: drop unused in-layout configuration The QMP combo PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 54 ++++++----------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 3889dcf73c59..84380852ba5b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -69,11 +69,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -88,14 +83,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -1346,7 +1333,6 @@ static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { }; static void qmp_combo_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -1361,19 +1347,15 @@ static void qmp_combo_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_combo_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_combo_configure_lane(base, regs, tbl, num, 0xff); + qmp_combo_configure_lane(base, tbl, num, 0xff); } static int qmp_combo_serdes_init(struct qmp_phy *qphy) @@ -1384,28 +1366,24 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_combo_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); if (cfg->type == PHY_TYPE_DP) { switch (dp_opts->link_rate) { case 1620: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_rbr, + qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, cfg->serdes_tbl_rbr_num); break; case 2700: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, cfg->serdes_tbl_hbr_num); break; case 5400: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr2, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, cfg->serdes_tbl_hbr2_num); break; case 8100: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr3, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, cfg->serdes_tbl_hbr3_num); break; default: @@ -2069,29 +2047,25 @@ static int qmp_combo_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_combo_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_combo_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); /* Configure special DP tx tunings */ if (cfg->type == PHY_TYPE_DP) cfg->configure_dp_tx(qphy); - qmp_combo_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_combo_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); /* Configure link rate, swing, etc. */ if (cfg->type == PHY_TYPE_DP) cfg->configure_dp_phy(qphy); else - qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); From f2175762b4ed90048b739b32a739a2df790d4e13 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:31 +0200 Subject: [PATCH 0583/4122] phy: qcom-qmp-pcie: drop redundant ipq8074 power on The PCS initialisation table for IPQ8074 includes updates of the reset and start-control registers which is already handled explicitly by the driver during power on. Drop the redundant register write from the IPQ8074 configuration table and along with it the now unused "in-layout" configuration macro and code. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 42 +++++------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index de04d8dd5350..fa8bc6aeedf1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -42,11 +42,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -61,14 +56,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -388,8 +375,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V2_PCS_RX_SIGDET_LVL, 0x99), QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M6DB_V0, 0x15), QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M3P5DB_V0, 0xe), - QMP_PHY_INIT_CFG_L(QPHY_SW_RESET, 0x0), - QMP_PHY_INIT_CFG_L(QPHY_START_CTRL, 0x3), }; static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_serdes_tbl[] = { @@ -1896,7 +1881,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { }; static void qmp_pcie_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -1911,30 +1895,25 @@ static void qmp_pcie_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_pcie_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_pcie_configure_lane(base, regs, tbl, num, 0xff); + qmp_pcie_configure_lane(base, tbl, num, 0xff); } static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; if (!tables) return; - qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num); + qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); } static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) @@ -1946,29 +1925,26 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t if (!tables) return; - qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1); + qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1); + qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2); } static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *pcs = qphy->pcs; void __iomem *pcs_misc = qphy->pcs_misc; if (!tables) return; - qmp_pcie_configure(pcs, cfg->regs, - tables->pcs, tables->pcs_num); - qmp_pcie_configure(pcs_misc, cfg->regs, - tables->pcs_misc, tables->pcs_misc_num); + qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num); + qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) From d3ef88635e318a7cc7e2fc26a58b4e8b56c9fb9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:32 +0200 Subject: [PATCH 0584/4122] phy: qcom-qmp-pcie-msm8996: drop unused in-layout configuration The MSM8996 QMP PCIe PHY driver no longer uses the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 34 ++++--------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 8b74948eb467..31ac405d3785 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -46,11 +46,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -65,14 +60,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -346,7 +333,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -361,19 +347,15 @@ static void qmp_pcie_msm8996_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_pcie_msm8996_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_pcie_msm8996_configure_lane(base, regs, tbl, num, 0xff); + qmp_pcie_msm8996_configure_lane(base, tbl, num, 0xff); } static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) @@ -387,7 +369,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) unsigned int mask, val; int ret; - qmp_pcie_msm8996_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num); qphy_clrbits(serdes, cfg->regs[QPHY_COM_SW_RESET], SW_RESET); qphy_setbits(serdes, cfg->regs[QPHY_COM_START_CONTROL], @@ -531,13 +513,9 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_msm8996_configure_lane(tx, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 1); - - qmp_pcie_msm8996_configure_lane(rx, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 1); - - qmp_pcie_msm8996_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_pcie_msm8996_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_pcie_msm8996_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_pcie_msm8996_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); /* * Pull out PHY from POWER DOWN state. From 91496846a9e863f7caa2db4a828844746b6f6b32 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:33 +0200 Subject: [PATCH 0585/4122] phy: qcom-qmp-ufs: drop unused in-layout configuration The QMP UFS PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 42 ++++++------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index ab69f648ee38..02931b82132f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -41,11 +41,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -60,14 +55,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -800,7 +787,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { }; static void qmp_ufs_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -815,19 +801,15 @@ static void qmp_ufs_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_ufs_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_ufs_configure_lane(base, regs, tbl, num, 0xff); + qmp_ufs_configure_lane(base, tbl, num, 0xff); } static int qmp_ufs_serdes_init(struct qmp_phy *qphy) @@ -837,7 +819,7 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_ufs_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_ufs_configure(serdes, serdes_tbl, serdes_tbl_num); return 0; } @@ -941,21 +923,17 @@ static int qmp_ufs_power_on(struct phy *phy) qmp_ufs_serdes_init(qphy); /* Tx, Rx, and PCS configurations */ - qmp_ufs_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_ufs_configure_lane(qphy->tx2, cfg->regs, - cfg->tx_tbl, cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_ufs_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_ufs_configure_lane(qphy->rx2, cfg->regs, - cfg->rx_tbl, cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - qmp_ufs_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); ret = reset_control_deassert(qmp->ufs_reset); if (ret) From 9d452c3ac257d36740580e5ce2b899bfca99fd62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:34 +0200 Subject: [PATCH 0586/4122] phy: qcom-qmp-usb: drop unused in-layout configuration The QMP USB PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 42 ++++++------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2c5e4041bcf9..3aab9ea90078 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -69,11 +69,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -88,14 +83,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -2069,7 +2056,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { }; static void qmp_usb_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -2084,19 +2070,15 @@ static void qmp_usb_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_usb_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_usb_configure_lane(base, regs, tbl, num, 0xff); + qmp_usb_configure_lane(base, tbl, num, 0xff); } static int qmp_usb_serdes_init(struct qmp_phy *qphy) @@ -2106,7 +2088,7 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_usb_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_usb_configure(serdes, serdes_tbl, serdes_tbl_num); return 0; } @@ -2214,21 +2196,17 @@ static int qmp_usb_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_usb_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_usb_configure_lane(qphy->tx2, cfg->regs, - cfg->tx_tbl, cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_usb_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_usb_configure_lane(qphy->rx2, cfg->regs, - cfg->rx_tbl, cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); From e71906144b432135b483e228d65be59fbb44c310 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:35 +0200 Subject: [PATCH 0587/4122] phy: qcom-qmp-pcie: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 27 +----------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index fa8bc6aeedf1..315de484f875 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1362,9 +1362,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* QMP PHY pipe clock interface rate */ unsigned long pipe_clock_rate; @@ -1500,8 +1497,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { @@ -1529,8 +1524,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ .pipe_clock_rate = 250000000, }; @@ -1562,8 +1555,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { @@ -1594,8 +1585,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { @@ -1624,8 +1613,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { @@ -1666,8 +1653,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { @@ -1708,8 +1693,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg msm8998_pciephy_cfg = { @@ -1765,8 +1748,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { @@ -1797,8 +1778,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .phy_status = PHYSTATUS_4_20, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { @@ -1829,8 +1808,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { @@ -1876,8 +1853,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .phy_status = PHYSTATUS_4_20, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static void qmp_pcie_configure_lane(void __iomem *base, @@ -2037,7 +2012,7 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, mode_tables); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(1000, 1200); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); From 51bd33069f80705aba5f4725287bc5688ca6d92a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:36 +0200 Subject: [PATCH 0588/4122] phy: qcom-qmp-pcie: replace power-down delay The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). But as the vendor kernel do have a 1 ms delay *after* starting the PHY and before starting to poll the status it is possible that later contributors have simply not noticed that the mainline power-down delay is not equivalent. As the current delay before even starting the PHY is pretty much pointless and likely a mistake, move the delay after starting the PHY which avoids a few iterations of polling and speeds up startup by 1 ms (the poll loop otherwise takes about 1.8 ms). Note that MSM8998 has never used a power-down delay so add a flag to skip the delay in case starting the PHY is faster on MSM8998. This can be removed after someone takes a measurement. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 33 +++++------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 315de484f875..151baa63e8e8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1360,8 +1360,7 @@ struct qmp_phy_cfg { /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; - /* true, if PHY needs delay after POWER_DOWN */ - bool has_pwrdn_delay; + bool skip_start_delay; /* QMP PHY pipe clock interface rate */ unsigned long pipe_clock_rate; @@ -1495,8 +1494,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { @@ -1523,8 +1520,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - .has_pwrdn_delay = true, - .pipe_clock_rate = 250000000, }; @@ -1553,8 +1548,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { @@ -1583,8 +1576,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { @@ -1611,8 +1602,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { @@ -1651,8 +1640,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { @@ -1691,8 +1678,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg msm8998_pciephy_cfg = { @@ -1719,6 +1704,8 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, + + .skip_start_delay = true, }; static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { @@ -1746,8 +1733,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { @@ -1776,8 +1761,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS_4_20, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { @@ -1806,8 +1789,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { @@ -1851,8 +1832,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS_4_20, - - .has_pwrdn_delay = true, }; static void qmp_pcie_configure_lane(void __iomem *base, @@ -2011,15 +1990,15 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, &cfg->tables); qmp_pcie_pcs_init(qphy, mode_tables); - if (cfg->has_pwrdn_delay) - usleep_range(1000, 1200); - /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + if (!cfg->skip_start_delay) + usleep_range(1000, 1200); + status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; ready = 0; From abc0841666b9ab6568229e6b9816505c987d8a59 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:37 +0200 Subject: [PATCH 0589/4122] phy: qcom-qmp-pcie-msm8996: drop power-down delay config The power-down delay was included in the first version of the QMP driver for MSM8996 as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant configuration options while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 31ac405d3785..899be7bd4d92 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -41,7 +41,7 @@ #define PHY_INIT_COMPLETE_TIMEOUT 10000 #define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 +#define POWER_DOWN_DELAY_US_MAX 20 struct qmp_phy_init_tbl { unsigned int offset; @@ -203,12 +203,6 @@ struct qmp_phy_cfg { unsigned int mask_com_pcs_ready; /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; - - /* true, if PHY needs delay after POWER_DOWN */ - bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; }; /** @@ -326,10 +320,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .mask_com_pcs_ready = PCS_READY, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -523,8 +513,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) */ qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); - if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); From d71eb7083e5eea8ddddab52e9b57a9783603a95f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:38 +0200 Subject: [PATCH 0590/4122] phy: qcom-qmp-combo: drop sc8280xp power-down delay The SC8280XP combo PHY does not need a delay before starting the USB PHY (which is what the has_pwrdn_delay config option really controls) so drop the unnecessary delay. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 84380852ba5b..a8e09333072e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1210,10 +1210,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { From acfee73b635bca04e8a942a162bae0c0cd84b796 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:39 +0200 Subject: [PATCH 0591/4122] phy: qcom-qmp-combo: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index a8e09333072e..82055d3a3536 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -63,8 +63,6 @@ #define CLAMP_EN BIT(0) /* enables i/o clamp_n */ #define PHY_INIT_COMPLETE_TIMEOUT 10000 -#define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 struct qmp_phy_init_tbl { unsigned int offset; @@ -860,9 +858,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* Offset from PCS to PCS_USB region */ unsigned int pcs_usb_offset; @@ -1031,8 +1026,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc7180_dpphy_cfg = { @@ -1102,8 +1095,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { @@ -1138,10 +1129,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { @@ -1282,8 +1270,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8250_dpphy_cfg = { @@ -2064,7 +2050,7 @@ static int qmp_combo_power_on(struct phy *phy) qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(10, 20); if (cfg->type != PHY_TYPE_DP) { /* Pull PHY out of reset state */ From 898ab85d6b1e8f6271d180c47ef8a024dea9e357 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:40 +0200 Subject: [PATCH 0592/4122] phy: qcom-qmp-usb: drop sc8280xp power-down delay The SC8280XP PHY does not need a delay before starting the PHY (which is what the has_pwrdn_delay config option really controls) so drop the unnecessary delay. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 3aab9ea90078..57dda1ecefe6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1718,10 +1718,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { From 38cd167d1fc6b5bf038229b1fa02bb1f551a564f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:41 +0200 Subject: [PATCH 0593/4122] phy: qcom-qmp-usb: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 35 +------------------------ 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 57dda1ecefe6..751f628710eb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -63,8 +63,6 @@ #define CLAMP_EN BIT(0) /* enables i/o clamp_n */ #define PHY_INIT_COMPLETE_TIMEOUT 10000 -#define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 struct qmp_phy_init_tbl { unsigned int offset; @@ -1452,9 +1450,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* true, if PHY has a separate DP_COM control block */ bool has_phy_dp_com_ctrl; @@ -1660,9 +1655,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1690,9 +1682,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1744,8 +1733,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { @@ -1798,11 +1785,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1833,8 +1816,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { @@ -1864,9 +1845,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1897,8 +1875,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { @@ -1928,8 +1904,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { @@ -1959,8 +1933,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { @@ -1990,9 +1962,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -2023,8 +1992,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { @@ -2205,7 +2172,7 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(10, 20); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); From 0983529d7513e5417a5010f70582e1040c404551 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Fri, 14 Oct 2022 00:08:41 +0530 Subject: [PATCH 0594/4122] phy: tegra: p2u: Set ENABLE_L2_EXIT_RATE_CHANGE in calibration Set ENABLE_L2_EXIT_RATE_CHANGE register bit to request UPHY PLL rate change to Gen1 during initialization. This helps in the below surprise link down cases, - Surprise link down happens at Gen3/Gen4 link speed. - Surprise link down happens and external REFCLK is cut off, which causes UPHY PLL rate to deviate to an invalid rate. Signed-off-by: Vidya Sagar Link: https://lore.kernel.org/r/20221013183854.21087-9-vidyas@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/phy-tegra194-p2u.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c b/drivers/phy/tegra/phy-tegra194-p2u.c index 1415ca71de38..633e6b747275 100644 --- a/drivers/phy/tegra/phy-tegra194-p2u.c +++ b/drivers/phy/tegra/phy-tegra194-p2u.c @@ -15,6 +15,7 @@ #include #define P2U_CONTROL_CMN 0x74 +#define P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE BIT(13) #define P2U_CONTROL_CMN_SKP_SIZE_PROTECTION_EN BIT(20) #define P2U_PERIODIC_EQ_CTRL_GEN3 0xc0 @@ -85,8 +86,21 @@ static int tegra_p2u_power_on(struct phy *x) return 0; } +static int tegra_p2u_calibrate(struct phy *x) +{ + struct tegra_p2u *phy = phy_get_drvdata(x); + u32 val; + + val = p2u_readl(phy, P2U_CONTROL_CMN); + val |= P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE; + p2u_writel(phy, val, P2U_CONTROL_CMN); + + return 0; +} + static const struct phy_ops ops = { .power_on = tegra_p2u_power_on, + .calibrate = tegra_p2u_calibrate, .owner = THIS_MODULE, }; From 4a9eac5ae2200f1b208dd33738777f89f93dc0fe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:43 +0200 Subject: [PATCH 0595/4122] phy: qcom-qmp-pcie: fix sc8180x initialisation The phy_status mask was never set for SC8180X which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: f839f14e24f2 ("phy: qcom-qmp: Add sc8180x PCIe support") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 151baa63e8e8..a7677b61f96e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1733,6 +1733,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { From 94b7288eadf6e2c09e6280c65a9d07cca01bf434 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:44 +0200 Subject: [PATCH 0596/4122] phy: qcom-qmp-pcie: fix ipq8074-gen3 initialisation The phy_status mask was never set for IPQ8074 (gen3) which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: 334fad185415 ("phy: qcom-qmp-pcie: add IPQ8074 PCIe Gen3 QMP PHY support") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index a7677b61f96e..f1e94b879d31 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1519,6 +1519,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, .pipe_clock_rate = 250000000, }; From 30518b19895789aa9101474af2ee0f62cd882d5e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:45 +0200 Subject: [PATCH 0597/4122] phy: qcom-qmp-pcie: fix ipq6018 initialisation The phy_status mask was never set for IPQ6018 which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: 520264db3bf9 ("phy: qcom-qmp: add QMP V2 PCIe PHY support for ipq60xx") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f1e94b879d31..9b866ed19ddc 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1549,6 +1549,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { From 2577ba8c39dafe4320e1eb206b732e08bf871c83 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:46 +0200 Subject: [PATCH 0598/4122] phy: qcom-qmp-pcie: clean up status polling Clean up the status polling by dropping the ready bit mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 9b866ed19ddc..4af6b9e50d16 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1966,7 +1966,7 @@ static int qmp_pcie_power_on(struct phy *phy) const struct qmp_phy_cfg_tables *mode_tables; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int mask, val; int ret; qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], @@ -2004,9 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy) status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & mask), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 5cbeb75a36aa28353e51f5e6926e19449a4f3389 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:47 +0200 Subject: [PATCH 0599/4122] phy: qcom-qmp-pcie: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 4af6b9e50d16..d3e7e673114f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2004,7 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy) status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; - ret = readl_poll_timeout(status, val, !(val & mask), 10, + ret = readl_poll_timeout(status, val, !(val & mask), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 1a3ae97c2490b2217810a17ab4f47552e9f6f70f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:48 +0200 Subject: [PATCH 0600/4122] phy: qcom-qmp-pcie-msm8996: clean up ready and status polling Clean up the PHY ready and status polling by dropping the configuration masks which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 899be7bd4d92..b9260c8746bf 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -35,7 +35,6 @@ #define PLL_READY_GATE_EN BIT(3) /* QPHY_PCS_STATUS bit */ #define PHYSTATUS BIT(6) -#define PHYSTATUS_4_20 BIT(7) /* QPHY_COM_PCS_READY_STATUS bit */ #define PCS_READY BIT(0) @@ -200,9 +199,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - unsigned int mask_com_pcs_ready; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; }; /** @@ -318,8 +314,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .start_ctrl = PCS_START | PLL_READY_GATE_EN, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - .mask_com_pcs_ready = PCS_READY, - .phy_status = PHYSTATUS, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -356,7 +350,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; void __iomem *status; - unsigned int mask, val; + unsigned int val; int ret; qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num); @@ -366,9 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) SERDES_START | PCS_START); status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS]; - mask = cfg->mask_com_pcs_ready; - - ret = readl_poll_timeout(status, val, (val & mask), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, @@ -484,7 +476,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_pcie_msm8996_serdes_init(qphy); @@ -522,10 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 3894f6d03c4e96665232fbe2e04589f1228cbb0c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:49 +0200 Subject: [PATCH 0601/4122] phy: qcom-qmp-pcie-msm8996: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index b9260c8746bf..cd8fafe4c295 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -360,7 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) SERDES_START | PCS_START); status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS]; - ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, @@ -514,7 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From f7075f4905e79e340b0e5f0f097c8ce896be8bb3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:50 +0200 Subject: [PATCH 0602/4122] phy: qcom-qmp-combo: clean up status polling Clean up the PHY status polling by dropping the configuration mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 82055d3a3536..e312cad6d9c2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -853,8 +853,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1023,7 +1021,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1092,7 +1089,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1127,7 +1123,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1197,7 +1192,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { @@ -1267,7 +1261,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -2017,7 +2010,7 @@ static int qmp_combo_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_combo_serdes_init(qphy); @@ -2059,10 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 0c1154d69511a030b8ebb2b873095a7fa851e189 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:51 +0200 Subject: [PATCH 0603/4122] phy: qcom-qmp-combo: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index e312cad6d9c2..0071c73ac1c8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2052,7 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From c8f5c188156b87c115f27d44004428ede2e262f8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:52 +0200 Subject: [PATCH 0604/4122] phy: qcom-qmp-ufs: drop unused phy-status config Drop the unused phy-status configuration mask which has never been used for UFS PHYs. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 02931b82132f..1c7d8fc9b94a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -31,8 +31,6 @@ /* QPHY_START_CONTROL bits */ #define SERDES_START BIT(0) #define PCS_START BIT(1) -/* QPHY_PCS_STATUS bit */ -#define PHYSTATUS BIT(6) /* QPHY_PCS_READY_STATUS bit */ #define PCS_READY BIT(0) @@ -548,8 +546,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PCS block has no separate SW_RESET register */ bool no_pcs_sw_reset; @@ -668,7 +664,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .no_pcs_sw_reset = true, }; @@ -692,7 +687,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .no_pcs_sw_reset = true, }; @@ -739,7 +733,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { @@ -761,7 +754,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { @@ -783,7 +775,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static void qmp_ufs_configure_lane(void __iomem *base, From 2f561b687cf47e289b7e068881ad87530c1f1435 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:53 +0200 Subject: [PATCH 0605/4122] phy: qcom-qmp-ufs: clean up ready polling Clean up the PHY ready polling by dropping the mask variables which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 1c7d8fc9b94a..8380904cf26c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -908,7 +908,7 @@ static int qmp_ufs_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_ufs_serdes_init(qphy); @@ -937,10 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; - mask = PCS_READY; - ready = PCS_READY; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 7516edbfaf708e5b987f1b9f23aa7336dd4a812d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:54 +0200 Subject: [PATCH 0606/4122] phy: qcom-qmp-ufs: increase ready polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 8380904cf26c..1a51f803928b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -937,7 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; - ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From f5ef85adece529a6cd1e7563081c41038923a9ed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:55 +0200 Subject: [PATCH 0607/4122] phy: qcom-qmp-usb: clean up status polling Clean up the PHY status polling by dropping the configuration mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 751f628710eb..840b67167581 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1445,8 +1445,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1604,7 +1602,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { @@ -1628,7 +1625,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { @@ -1652,7 +1648,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1679,7 +1674,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1706,7 +1700,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -1730,7 +1723,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1756,7 +1748,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { @@ -1783,7 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1813,7 +1803,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1842,7 +1831,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1872,7 +1860,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1901,7 +1888,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1930,7 +1916,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1959,7 +1944,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1989,7 +1973,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -2015,7 +1998,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static void qmp_usb_configure_lane(void __iomem *base, @@ -2147,7 +2129,7 @@ static int qmp_usb_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_usb_serdes_init(qphy); @@ -2181,10 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From 7612890b9df8f3f4f9b4fd39d988a5afd97aa3e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:56 +0200 Subject: [PATCH 0608/4122] phy: qcom-qmp-usb: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 840b67167581..0bd9291e6a7b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2163,7 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); From d4b81490fe44429203ae6e55df8a556e5b77c88e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:57 +0200 Subject: [PATCH 0609/4122] phy: qcom-qmp-combo: drop start and pwrdn-ctrl abstraction All USB PHYs need to start and stop the SerDes and PCS so drop the start-ctrl abstraction which is no longer needed since the QMP driver split. Similarly, drop the pwrdn-ctrl abstraction which also is not needed since the split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 29 +++++------------------ 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 0071c73ac1c8..2fab8d5ec0f1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -851,9 +851,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1019,9 +1016,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1087,9 +1081,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1121,9 +1112,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1189,9 +1177,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { @@ -1259,9 +1244,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1944,8 +1926,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -2049,7 +2030,8 @@ static int qmp_combo_power_on(struct phy *phy) /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -2082,11 +2064,12 @@ static int qmp_combo_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); } return 0; From 5806b87dea8fc1b65a542ef93cbe5f6114157a74 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:58 +0200 Subject: [PATCH 0610/4122] phy: qcom-qmp-pcie: drop start-ctrl abstraction All PCIe PHYs need to start and stop the SerDes and PCS so drop the start-ctrl abstraction which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index d3e7e673114f..5534a4ad0243 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1355,7 +1355,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; unsigned int pwrdn_ctrl; /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; @@ -1491,7 +1490,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .num_vregs = 0, .regs = pciephy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1517,7 +1515,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .num_vregs = 0, .regs = ipq_pciephy_gen3_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, @@ -1547,7 +1544,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .num_vregs = 0, .regs = ipq_pciephy_gen3_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1575,7 +1571,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_qmp_pciephy_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1601,7 +1596,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_qhp_pciephy_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1639,7 +1633,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1677,7 +1670,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1703,7 +1695,6 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = pciephy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, @@ -1733,7 +1724,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1761,7 +1751,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS_4_20, }; @@ -1789,7 +1778,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1832,7 +1820,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS_4_20, }; @@ -1997,7 +1984,7 @@ static int qmp_pcie_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); if (!cfg->skip_start_delay) usleep_range(1000, 1200); @@ -2030,7 +2017,8 @@ static int qmp_pcie_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], From 73ad6a9dd51799afd104edc2bf2016a347a717fe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:59 +0200 Subject: [PATCH 0611/4122] phy: qcom-qmp-pcie: add config sanity checks The driver expects every configuration to set the pwrdn_ctrl and phy_status masks. Add some probe WARN_ON_ONCE() to probe to catch any new driver support that fails to provide them. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 5534a4ad0243..7c81667dd968 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2347,6 +2347,9 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!cfg) return -EINVAL; + WARN_ON_ONCE(!cfg->pwrdn_ctrl); + WARN_ON_ONCE(!cfg->phy_status); + serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); From 3d3db6f024e70255899a32323e20561c8c6f5850 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:00 +0200 Subject: [PATCH 0612/4122] phy: qcom-qmp-pcie-msm8996: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-18-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index cd8fafe4c295..ff198d846fd2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -196,9 +196,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; }; /** @@ -311,9 +308,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = pciephy_regs_layout, - - .start_ctrl = PCS_START | PLL_READY_GATE_EN, - .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -503,7 +497,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) * Pull out PHY from POWER DOWN state. * This is active low enable signal to power-down PHY. */ - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); + qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, + SW_PWRDN | REFCLK_DRV_DSBL); usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX); @@ -511,7 +506,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], + PCS_START | PLL_READY_GATE_EN); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -542,11 +538,12 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + SW_PWRDN | REFCLK_DRV_DSBL); return 0; } From cb4a982fa94a106c3e5d7d9f596375ae442a71ba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:01 +0200 Subject: [PATCH 0613/4122] phy: qcom-qmp-ufs: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-19-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 35 +++++-------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 1a51f803928b..9b58d742af3b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -544,9 +544,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PCS block has no separate SW_RESET register */ bool no_pcs_sw_reset; }; @@ -662,9 +659,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .regs = msm8996_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -685,9 +679,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -708,9 +699,6 @@ static const struct qmp_phy_cfg sm6115_ufsphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm6115_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -730,9 +718,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { @@ -751,9 +736,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { @@ -772,9 +754,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static void qmp_ufs_configure_lane(void __iomem *base, @@ -832,8 +811,7 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) if (ret) goto err_disable_regulators; - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -933,8 +911,9 @@ static int qmp_ufs_power_on(struct phy *phy) /* Pull PHY out of reset state */ if (!cfg->no_pcs_sw_reset) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + + /* start SerDes */ + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, @@ -956,12 +935,12 @@ static int qmp_ufs_power_off(struct phy *phy) if (!cfg->no_pcs_sw_reset) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + /* stop SerDes */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); return 0; } From 47b009db545ae90f0b50149029a6b8137685f524 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:02 +0200 Subject: [PATCH 0614/4122] phy: qcom-qmp-usb: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-20-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 61 ++----------------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0bd9291e6a7b..d0c433197080 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1443,9 +1443,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1599,9 +1596,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { @@ -1622,9 +1616,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { @@ -1646,9 +1637,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1672,9 +1660,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1697,9 +1682,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -1721,9 +1703,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1745,9 +1724,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { @@ -1772,9 +1748,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1801,9 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1829,9 +1799,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1858,9 +1825,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1886,9 +1850,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1914,9 +1875,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x1000, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1942,9 +1900,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1971,9 +1926,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x1000, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1995,9 +1947,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qcm2290_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static void qmp_usb_configure_lane(void __iomem *base, @@ -2092,8 +2041,7 @@ static int qmp_usb_init(struct phy *phy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); } - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2160,7 +2108,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -2189,11 +2137,12 @@ static int qmp_usb_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); return 0; } From 922adfd59efd337059f8445a8d8968552b06ed4e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 29 Sep 2022 22:00:17 +0300 Subject: [PATCH 0615/4122] phy: qcom-qmp-usb: correct registers layout for IPQ8074 USB3 PHY According to the kernel 4.4 sources from NHSS.QSDK.9.0.2 and according to hardware docs, the PHY registers layout used for IPQ8074 USB3 PHY is incorrect. This platform uses offset 0x174 for the PCS_STATUS register, 0xd8 for PCS_AUTONOMOUS_MODE_CTRL, etc. Correct the PHY registers layout. Fixes: 94a407cc17a4 ("phy: qcom-qmp: create copies of QMP PHY driver") Fixes: 507156f5a99f ("phy: qcom-qmp: Add USB QMP PHY support for IPQ8074") Signed-off-by: Dmitry Baryshkov Reviewed-by: Kathiravan T Link: https://lore.kernel.org/r/20220929190017.529207-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index d0c433197080..cd167508f528 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1595,7 +1595,7 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = usb3phy_regs_layout, + .regs = qmp_v3_usb3phy_regs_layout, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { From d907774ed5aab5a33ef4106ea3830e673196313b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:32 +0200 Subject: [PATCH 0616/4122] phy: qcom-qmp-ufs: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 78 ++++++++++++------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 9b58d742af3b..acb8efa1d758 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1081,45 +1081,6 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_ufs_of_match_table[] = { - { - .compatible = "qcom,msm8996-qmp-ufs-phy", - .data = &msm8996_ufs_cfg, - }, { - .compatible = "qcom,msm8998-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sc8280xp-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sm6115-qmp-ufs-phy", - .data = &sm6115_ufsphy_cfg, - }, { - .compatible = "qcom,sm6350-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-ufs-phy", - .data = &sm8450_ufsphy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table); - static int qmp_ufs_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; @@ -1185,6 +1146,45 @@ err_node_put: return ret; } +static const struct of_device_id qmp_ufs_of_match_table[] = { + { + .compatible = "qcom,msm8996-qmp-ufs-phy", + .data = &msm8996_ufs_cfg, + }, { + .compatible = "qcom,msm8998-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-ufs-phy", + .data = &sm8350_ufsphy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sm6115-qmp-ufs-phy", + .data = &sm6115_ufsphy_cfg, + }, { + .compatible = "qcom,sm6350-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-ufs-phy", + .data = &sm8350_ufsphy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-ufs-phy", + .data = &sm8450_ufsphy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table); + static struct platform_driver qmp_ufs_driver = { .probe = qmp_ufs_probe, .driver = { From a36032db30deb2235bc18a8f1088c9a801bf66b0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:33 +0200 Subject: [PATCH 0617/4122] phy: qcom-qmp-ufs: merge driver data The UFS QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 171 +++++++++--------------- 1 file changed, 63 insertions(+), 108 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index acb8efa1d758..b4c3b3d97f52 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -548,54 +548,24 @@ struct qmp_phy_cfg { bool no_pcs_sw_reset; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @qmp: QMP phy to which this lane belongs - */ -struct qmp_phy { - struct phy *phy; +struct qmp_ufs { + struct device *dev; + const struct qmp_phy_cfg *cfg; + void __iomem *serdes; + void __iomem *pcs; + void __iomem *pcs_misc; void __iomem *tx; void __iomem *rx; - void __iomem *pcs; void __iomem *tx2; void __iomem *rx2; - void __iomem *pcs_misc; - struct qcom_qmp *qmp; -}; - -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - * @ufs_reset: optional UFS PHY reset handle - */ -struct qcom_qmp { - struct device *dev; struct clk_bulk_data *clks; struct regulator_bulk_data *vregs; - - struct qmp_phy **phys; - struct reset_control *ufs_reset; + + struct phy *phy; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -782,10 +752,10 @@ static void qmp_ufs_configure(void __iomem *base, qmp_ufs_configure_lane(base, tbl, num, 0xff); } -static int qmp_ufs_serdes_init(struct qmp_phy *qphy) +static int qmp_ufs_serdes_init(struct qmp_ufs *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -794,11 +764,10 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy) return 0; } -static int qmp_ufs_com_init(struct qmp_phy *qphy) +static int qmp_ufs_com_init(struct qmp_ufs *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs = qmp->pcs; int ret; ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); @@ -821,10 +790,9 @@ err_disable_regulators: return ret; } -static int qmp_ufs_com_exit(struct qmp_phy *qphy) +static int qmp_ufs_com_exit(struct qmp_ufs *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_assert(qmp->ufs_reset); @@ -837,9 +805,8 @@ static int qmp_ufs_com_exit(struct qmp_phy *qphy) static int qmp_ufs_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; dev_vdbg(qmp->dev, "Initializing QMP phy\n"); @@ -870,7 +837,7 @@ static int qmp_ufs_init(struct phy *phy) return ret; } - ret = qmp_ufs_com_init(qphy); + ret = qmp_ufs_com_init(qmp); if (ret) return ret; @@ -879,28 +846,27 @@ static int qmp_ufs_init(struct phy *phy) static int qmp_ufs_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *pcs = qphy->pcs; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; - qmp_ufs_serdes_init(qphy); + qmp_ufs_serdes_init(qmp); /* Tx, Rx, and PCS configurations */ qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -928,18 +894,18 @@ static int qmp_ufs_power_on(struct phy *phy) static int qmp_ufs_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; /* PHY reset */ if (!cfg->no_pcs_sw_reset) - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -947,9 +913,9 @@ static int qmp_ufs_power_off(struct phy *phy) static int qmp_ufs_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_ufs *qmp = phy_get_drvdata(phy); - qmp_ufs_com_exit(qphy); + qmp_ufs_com_exit(qmp); return 0; } @@ -981,7 +947,7 @@ static int qmp_ufs_disable(struct phy *phy) static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -997,7 +963,7 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -1017,78 +983,71 @@ static const struct phy_ops qcom_qmp_ufs_ops = { .owner = THIS_MODULE, }; -static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, +static int qmp_ufs_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc)) + if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_ufs_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_ufs *qmp; int num, id; int ret; @@ -1120,14 +1079,10 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_ufs_create(dev, child, id, serdes, cfg); + ret = qmp_ufs_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); From cb2c3d2ee46fe56144b74a22504e023aa59835aa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:34 +0200 Subject: [PATCH 0618/4122] phy: qcom-qmp-ufs: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index b4c3b3d97f52..25744b3576f3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1048,7 +1048,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_ufs *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -1074,23 +1073,15 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_ufs_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = qmp_ufs_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; - id++; - } + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From 018dfc99aef2f487f95e07fcbd600e02d290ba18 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:35 +0200 Subject: [PATCH 0619/4122] phy: qcom-qmp-ufs: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 37 ++++++++++++------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 25744b3576f3..057f9a3fdd6d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -945,9 +945,10 @@ static int qmp_ufs_disable(struct phy *phy) return qmp_ufs_exit(phy); } -static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_vreg_init(struct qmp_ufs *qmp) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -961,9 +962,10 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_clk_init(struct qmp_ufs *qmp) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -983,15 +985,13 @@ static const struct phy_ops qcom_qmp_ufs_ops = { .owner = THIS_MODULE, }; -static int qmp_ufs_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -1045,8 +1045,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_ufs *qmp; int ret; @@ -1055,21 +1053,20 @@ static int qmp_ufs_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - ret = qmp_ufs_clk_init(dev, cfg); + ret = qmp_ufs_clk_init(qmp); if (ret) return ret; - ret = qmp_ufs_vreg_init(dev, cfg); + ret = qmp_ufs_vreg_init(qmp); if (ret) return ret; @@ -1077,7 +1074,7 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_ufs_create(dev, child, serdes, cfg); + ret = qmp_ufs_create(qmp, child); if (ret) goto err_node_put; From b98e44e608bcb4a2c235b50cc48144e7043595b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:36 +0200 Subject: [PATCH 0620/4122] phy: qcom-qmp-ufs: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 057f9a3fdd6d..90583b87efd1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -945,6 +945,12 @@ static int qmp_ufs_disable(struct phy *phy) return qmp_ufs_exit(phy); } +static const struct phy_ops qcom_qmp_ufs_phy_ops = { + .power_on = qmp_ufs_enable, + .power_off = qmp_ufs_disable, + .owner = THIS_MODULE, +}; + static int qmp_ufs_vreg_init(struct qmp_ufs *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -979,12 +985,6 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp) return devm_clk_bulk_get(dev, num, qmp->clks); } -static const struct phy_ops qcom_qmp_ufs_ops = { - .power_on = qmp_ufs_enable, - .power_off = qmp_ufs_disable, - .owner = THIS_MODULE, -}; - static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -1027,7 +1027,7 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops); + generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); From e0a0c761d2203955585b2c93126d6a712726c368 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:37 +0200 Subject: [PATCH 0621/4122] phy: qcom-qmp-ufs: clean up PHY init Clean up the PHY initialisation somewhat programming both tx and rx for the second lane after the first lane. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 90583b87efd1..fa703e856eb7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -859,14 +859,12 @@ static int qmp_ufs_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) + if (cfg->lanes >= 2) { + qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + } qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); From 54293c08f2c01efff4a8c1c61290e5f8e34df2df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:38 +0200 Subject: [PATCH 0622/4122] dt-bindings: phy: qcom,qmp-ufs: rename current bindings The current QMP UFS PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP UFS PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers. In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current bindings after MSM8996 and add a reference to the SC8280XP bindings. Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024090041.19574-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} (95%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml similarity index 95% rename from Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml index 815c375d0f7b..438f9606414a 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-ufs-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-ufs-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (UFS) +title: Qualcomm QMP PHY controller (UFS, MSM8996) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-ufs-phy.yaml. + properties: compatible: enum: From 7741f31ae44568f9c32046aaf4c6c41a51359f6d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:39 +0200 Subject: [PATCH 0623/4122] dt-bindings: phy: qcom,qmp-ufs: fix sc8280xp binding The current QMP UFS PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP UFS PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers. Add a new binding for the UFS QMP PHYs found on SC8280XP which further bindings can be based on. Note that the current binding is simply removed instead of being deprecated as it was only recently merged and support for SC8280XP is still under development. Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024090041.19574-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,msm8996-qmp-ufs-phy.yaml | 10 +-- .../phy/qcom,sc8280xp-qmp-ufs-phy.yaml | 83 +++++++++++++++++++ 2 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml index 438f9606414a..be41acbd3b6c 100644 --- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml @@ -22,7 +22,6 @@ properties: - qcom,msm8996-qmp-ufs-phy - qcom,msm8998-qmp-ufs-phy - qcom,sc8180x-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6115-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy @@ -122,7 +121,6 @@ allOf: enum: - qcom,msm8998-qmp-ufs-phy - qcom,sc8180x-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6115-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy @@ -159,7 +157,6 @@ allOf: contains: enum: - qcom,msm8998-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy - qcom,sm8150-qmp-ufs-phy @@ -214,11 +211,12 @@ allOf: examples: - | - #include + #include #include + phy-wrapper@1d87000 { - compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0x01d87000 0xe10>; + compatible = "qcom,sm8250-qmp-ufs-phy"; + reg = <0x01d87000 0x1c0>; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x01d87000 0x1000>; diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml new file mode 100644 index 000000000000..dde86a19f792 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-ufs-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (UFS, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-ufs-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 2 + + clock-names: + items: + - const: ref + - const: ref_aux + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: ufsphy + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + + ufs_mem_phy: phy@1d87000 { + compatible = "qcom,sc8280xp-qmp-ufs-phy"; + reg = <0x01d87000 0x1000>; + + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; + clock-names = "ref", "ref_aux"; + + power-domains = <&gcc UFS_PHY_GDSC>; + + resets = <&ufs_mem_hc 0>; + reset-names = "ufsphy"; + + vdda-phy-supply = <&vreg_l6b>; + vdda-pll-supply = <&vreg_l3b>; + + #phy-cells = <0>; + }; From c64d39b403d8dc751ea6a56f97962f93f811fed4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:40 +0200 Subject: [PATCH 0624/4122] phy: qcom-qmp-ufs: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes resource in what is now the legacy devicetree helper. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 34 ++++++++++++------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index fa703e856eb7..bf5c1a6b9ca4 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -983,12 +983,15 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp) return devm_clk_bulk_get(dev, num, qmp->clks); } -static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) +static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); /* * Get memory resources for the PHY: @@ -1025,16 +1028,6 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -1056,10 +1049,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - ret = qmp_ufs_clk_init(qmp); if (ret) return ret; @@ -1072,10 +1061,19 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_ufs_create(qmp, child); + ret = qmp_ufs_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; + qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From 0e089bb8b31f7651d364723122af7ba7be7b98a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:41 +0200 Subject: [PATCH 0625/4122] phy: qcom-qmp-ufs: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Also note that the driver will continue to accept the old binding, at least for the time being. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 90 ++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index bf5c1a6b9ca4..189103d1bd18 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -520,10 +520,21 @@ static const struct qmp_phy_init_tbl sm8350_ufsphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1, 0x02), }; +struct qmp_ufs_offsets { + u16 serdes; + u16 pcs; + u16 tx; + u16 rx; + u16 tx2; + u16 rx2; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; + const struct qmp_ufs_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -611,6 +622,15 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_ufs_offsets qmp_ufs_offsets_v5 = { + .serdes = 0, + .pcs = 0xc00, + .tx = 0x400, + .rx = 0x600, + .tx2 = 0x800, + .rx2 = 0xa00, +}; + static const struct qmp_phy_cfg msm8996_ufs_cfg = { .lanes = 1, @@ -632,6 +652,26 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .no_pcs_sw_reset = true, }; +static const struct qmp_phy_cfg sc8280xp_ufsphy_cfg = { + .lanes = 2, + + .offsets = &qmp_ufs_offsets_v5, + + .serdes_tbl = sm8350_ufsphy_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sm8350_ufsphy_serdes_tbl), + .tx_tbl = sm8350_ufsphy_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_tx_tbl), + .rx_tbl = sm8350_ufsphy_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_rx_tbl), + .pcs_tbl = sm8350_ufsphy_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sm8350_ufsphy_pcs_tbl), + .clk_list = sdm845_ufs_phy_clk_l, + .num_clks = ARRAY_SIZE(sdm845_ufs_phy_clk_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8150_ufsphy_regs_layout, +}; + static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .lanes = 2, @@ -1031,11 +1071,38 @@ static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np) return 0; } +static int qmp_ufs_parse_dt(struct qmp_ufs *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_ufs_offsets *offs = cfg->offsets; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + if (cfg->lanes >= 2) { + qmp->tx2 = base + offs->tx2; + qmp->rx2 = base + offs->rx2; + } + + return 0; +} + static int qmp_ufs_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_ufs *qmp; int ret; @@ -1057,15 +1124,18 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; - - ret = qmp_ufs_parse_dt_legacy(qmp, child); + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_ufs_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_ufs_parse_dt(qmp); + } if (ret) goto err_node_put; - qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -1074,14 +1144,14 @@ static int qmp_ufs_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } @@ -1097,7 +1167,7 @@ static const struct of_device_id qmp_ufs_of_match_table[] = { .data = &sm8150_ufsphy_cfg, }, { .compatible = "qcom,sc8280xp-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, + .data = &sc8280xp_ufsphy_cfg, }, { .compatible = "qcom,sdm845-qmp-ufs-phy", .data = &sdm845_ufsphy_cfg, From 2e5632aef677cc2e22cbc76704526b51e29b3a7b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 5 Oct 2022 17:13:48 +0100 Subject: [PATCH 0626/4122] power: supply: lp8788: make const array name static Don't populate the read-only array name on the stack but instead make it static. Since the data and the pointers don't change also add in a missing const. Also makes the object code a little smaller. Signed-off-by: Colin Ian King Signed-off-by: Sebastian Reichel --- drivers/power/supply/lp8788-charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c index 56c57529c228..f5f47a0aa1e3 100644 --- a/drivers/power/supply/lp8788-charger.c +++ b/drivers/power/supply/lp8788-charger.c @@ -520,7 +520,7 @@ err_free_irq: static int lp8788_irq_register(struct platform_device *pdev, struct lp8788_charger *pchg) { - const char *name[] = { + static const char * const name[] = { LP8788_CHG_IRQ, LP8788_PRSW_IRQ, LP8788_BATT_IRQ }; int i; From 3f5b93f4077b37d576b6cdbcb636f443aa09eb03 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 23 Sep 2022 22:36:03 +0200 Subject: [PATCH 0627/4122] dt-bindings: power: reset: restart-handler: add common schema Add common schema for restart and shutdown handlers, so they all use same meaning of "priority" field. The Linux drivers already have this property and some systems want to customize it per-board in DTS. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Sebastian Reichel --- .../bindings/power/reset/gpio-restart.yaml | 13 ++------ .../bindings/power/reset/restart-handler.yaml | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 Documentation/devicetree/bindings/power/reset/restart-handler.yaml diff --git a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml index a72d5c721516..d3d18e0f5db3 100644 --- a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml +++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml @@ -25,6 +25,9 @@ description: > inactive-delay, the GPIO is driven active again. After a delay specified by wait-delay, the restart handler completes allowing other restart handlers to be attempted. +allOf: + - $ref: restart-handler.yaml# + properties: compatible: const: gpio-restart @@ -41,16 +44,6 @@ properties: in its inactive state. priority: - $ref: /schemas/types.yaml#/definitions/uint32 - description: | - A priority ranging from 0 to 255 (default 129) according to the following guidelines: - - 0: Restart handler of last resort, with limited restart capabilities. - 128: Default restart handler; use if no other restart handler is expected to be available, - and/or if restart functionality is sufficient to restart the entire system. - 255: Highest priority restart handler, will preempt all other restart handlers. - minimum: 0 - maximum: 255 default: 129 active-delay: diff --git a/Documentation/devicetree/bindings/power/reset/restart-handler.yaml b/Documentation/devicetree/bindings/power/reset/restart-handler.yaml new file mode 100644 index 000000000000..1f9a2aac53c0 --- /dev/null +++ b/Documentation/devicetree/bindings/power/reset/restart-handler.yaml @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/reset/restart-handler.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Restart and shutdown handler generic binding + +maintainers: + - Sebastian Reichel + +description: + Restart and shutdown handler device is responsible for powering off the + system, e.g. my cutting off the power. System might have several restart + handlers, which usually are tried from most precise to last resort. + +properties: + priority: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + A priority ranging from 0 to 255 according to the following guidelines:: + 0:: Restart handler of last resort, with limited restart capabilities. + 128:: Typical, default restart handler; use if no other restart handler + is expected to be available, and/or if restart functionality is + sufficient to restart the entire system. + 255:: Highest priority restart handler, will preempt all other restart handlers. + minimum: 0 + maximum: 255 + +additionalProperties: true From 03b33d4ac41e4d0bb3e4654f21e06caba4ccb400 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 13:45:44 +0800 Subject: [PATCH 0628/4122] power/supply: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Signed-off-by: Sebastian Reichel --- drivers/power/supply/ab8500_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c index c19c50442761..cf45a2c26b7f 100644 --- a/drivers/power/supply/ab8500_charger.c +++ b/drivers/power/supply/ab8500_charger.c @@ -1940,7 +1940,7 @@ static int ab8500_charger_get_ext_psy_data(struct device *dev, void *data) * * Due to a asic bug it is necessary to lower the input current to the vbus * charger when charging with at some specific levels. This issue is only valid - * for below a certain battery voltage. This function makes sure that the + * for below a certain battery voltage. This function makes sure that * the allowed current limit isn't exceeded. */ static void ab8500_charger_check_vbat_work(struct work_struct *work) From 17c13c724b143c835fe3a9109daab524dff3d06f Mon Sep 17 00:00:00 2001 From: Maarten Zanders Date: Fri, 28 Oct 2022 12:56:43 +0200 Subject: [PATCH 0629/4122] leds: lp5523: fix out-of-bounds bug in lp5523_selftest() When not all LED channels of the led chip are configured, the sysfs selftest functionality gives erroneous results and tries to test all channels of the chip. There is a potential for LED overcurrent conditions since the test current will be set to values from out-of-bound regions. It is wrong to use pdata->led_config[i].led_current to skip absent channels as led_config[] only contains the configured LED channels. Instead of iterating over all the physical channels of the device, loop over the available LED configurations and use led->chan_nr to access the correct i2c registers. Keep the zero-check for the LED current as existing users might depend on this to disable a channel. Reported-by: Arne Staessen Signed-off-by: Maarten Zanders Signed-off-by: Pavel Machek --- drivers/leds/leds-lp5523.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 369d40b0b65b..e08e3de1428d 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -581,8 +581,8 @@ static ssize_t lp5523_selftest(struct device *dev, struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); struct lp55xx_chip *chip = led->chip; struct lp55xx_platform_data *pdata = chip->pdata; - int i, ret, pos = 0; - u8 status, adc, vdd; + int ret, pos = 0; + u8 status, adc, vdd, i; mutex_lock(&chip->lock); @@ -612,20 +612,21 @@ static ssize_t lp5523_selftest(struct device *dev, vdd--; /* There may be some fluctuation in measurement */ - for (i = 0; i < LP5523_MAX_LEDS; i++) { - /* Skip non-existing channels */ + for (i = 0; i < pdata->num_channels; i++) { + /* Skip disabled channels */ if (pdata->led_config[i].led_current == 0) continue; /* Set default current */ - lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + i, + lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + led->chan_nr, pdata->led_config[i].led_current); - lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + i, 0xff); + lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + led->chan_nr, + 0xff); /* let current stabilize 2 - 4ms before measurements start */ usleep_range(2000, 4000); lp55xx_write(chip, LP5523_REG_LED_TEST_CTRL, - LP5523_EN_LEDTEST | i); + LP5523_EN_LEDTEST | led->chan_nr); /* ADC conversion time is 2.7 ms typically */ usleep_range(3000, 6000); ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); @@ -633,20 +634,22 @@ static ssize_t lp5523_selftest(struct device *dev, goto fail; if (!(status & LP5523_LEDTEST_DONE)) - usleep_range(3000, 6000);/* Was not ready. Wait. */ + usleep_range(3000, 6000); /* Was not ready. Wait. */ ret = lp55xx_read(chip, LP5523_REG_LED_TEST_ADC, &adc); if (ret < 0) goto fail; if (adc >= vdd || adc < LP5523_ADC_SHORTCIRC_LIM) - pos += sprintf(buf + pos, "LED %d FAIL\n", i); + pos += sprintf(buf + pos, "LED %d FAIL\n", + led->chan_nr); - lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + i, 0x00); + lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + led->chan_nr, + 0x00); /* Restore current */ - lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + i, - led->led_current); + lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + led->chan_nr, + led->led_current); led++; } if (pos == 0) From 05e88ebb9ecfe9631ccc6483a79b0eabf554da60 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:02 -0500 Subject: [PATCH 0630/4122] RDMA/rxe: Remove redundant header files Remove unneeded include files. Link: https://lore.kernel.org/r/20221021200118.2163-2-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index ec2b7de1c497..3fbaba9eec39 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -4,10 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include -#include -#include - #include "rxe.h" int __rxe_do_task(struct rxe_task *task) From 98a54f170617746b5d09b18b23b295efc7a42a5e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:03 -0500 Subject: [PATCH 0631/4122] RDMA/rxe: Remove init of task locks from rxe_qp.c The calls to spin_lock_init() for the tasklet spinlocks in rxe_qp_init_misc() are redundant since they are intiialized in rxe_init_task(). This patch removes them. Link: https://lore.kernel.org/r/20221021200118.2163-3-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a62bab88415c..57c3f05ad15b 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->state_lock); - spin_lock_init(&qp->req.task.state_lock); - spin_lock_init(&qp->resp.task.state_lock); - spin_lock_init(&qp->comp.task.state_lock); - spin_lock_init(&qp->sq.sq_lock); spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); From de669ae8af49ceed0eed44f5b3d51dc62affc5e4 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:04 -0500 Subject: [PATCH 0632/4122] RDMA/rxe: Removed unused name from rxe_task struct The name field in struct rxe_task is never used. This patch removes it. Link: https://lore.kernel.org/r/20221021200118.2163-4-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 9 +++------ drivers/infiniband/sw/rxe/rxe_task.c | 4 +--- drivers/infiniband/sw/rxe/rxe_task.h | 4 +--- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 57c3f05ad15b..03bd9f3e9956 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -238,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->req_pkts); - rxe_init_task(&qp->req.task, qp, - rxe_requester, "req"); - rxe_init_task(&qp->comp.task, qp, - rxe_completer, "comp"); + rxe_init_task(&qp->req.task, qp, rxe_requester); + rxe_init_task(&qp->comp.task, qp, rxe_completer); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -288,8 +286,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(&qp->resp.task, qp, - rxe_responder, "resp"); + rxe_init_task(&qp->resp.task, qp, rxe_responder); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 3fbaba9eec39..0cbba455fefd 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -90,12 +90,10 @@ void rxe_do_task(struct tasklet_struct *t) task->ret = ret; } -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name) +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) { task->arg = arg; task->func = func; - snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; tasklet_setup(&task->tasklet, rxe_do_task); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 7f612a1c68a7..b3dfd970d1dc 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -25,7 +25,6 @@ struct rxe_task { void *arg; int (*func)(void *arg); int ret; - char name[16]; bool destroyed; }; @@ -34,8 +33,7 @@ struct rxe_task { * arg => parameter to pass to fcn * func => function to call until it returns != 0 */ -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name); +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)); /* cleanup task */ void rxe_cleanup_task(struct rxe_task *task); From dccb23f6c312e4480fe32ccbc2afac1a5cac7e5e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:05 -0500 Subject: [PATCH 0633/4122] RDMA/rxe: Split rxe_run_task() into two subroutines Split rxe_run_task(task, sched) into rxe_run_task(task) and rxe_sched_task(task). Link: https://lore.kernel.org/r/20221021200118.2163-5-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 19 +++++++++++-------- drivers/infiniband/sw/rxe/rxe_net.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_qp.c | 10 +++++----- drivers/infiniband/sw/rxe/rxe_req.c | 10 +++++----- drivers/infiniband/sw/rxe/rxe_resp.c | 5 ++++- drivers/infiniband/sw/rxe/rxe_task.c | 15 ++++++++++----- drivers/infiniband/sw/rxe/rxe_task.h | 7 +++---- drivers/infiniband/sw/rxe/rxe_verbs.c | 8 ++++---- 8 files changed, 44 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index c9170dd99f3a..66f392810c86 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -118,7 +118,7 @@ void retransmit_timer(struct timer_list *t) if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } } @@ -132,7 +132,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) if (must_sched != 0) rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - rxe_run_task(&qp->comp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->comp.task); + else + rxe_run_task(&qp->comp.task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -313,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } return COMPST_ERROR_RETRY; @@ -460,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -474,7 +477,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -520,7 +523,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } @@ -654,7 +657,7 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } state = COMPST_DONE; @@ -722,7 +725,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } goto done; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 35f327b9d4b8..c36cad9c7a66 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,7 +345,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) if (unlikely(qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); rxe_put(qp); } @@ -429,7 +429,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 03bd9f3e9956..3f6d62a80bea 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -536,10 +536,10 @@ static void rxe_qp_drain(struct rxe_qp *qp) if (qp->req.state != QP_STATE_DRAINED) { qp->req.state = QP_STATE_DRAIN; if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } } @@ -553,13 +553,13 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } /* called by the modify qp verb */ diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index f63771207970..41f1d84f0acb 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -105,7 +105,7 @@ void rnr_nak_timer(struct timer_list *t) /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) @@ -608,7 +608,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) * which can lead to a deadlock. So go ahead and complete * it now. */ - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return 0; } @@ -733,7 +733,7 @@ int rxe_requester(void *arg) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); goto done; } payload = mtu; @@ -795,7 +795,7 @@ int rxe_requester(void *arg) rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (err == -EAGAIN) { - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); goto exit; } @@ -817,7 +817,7 @@ err: qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; qp->req.state = QP_STATE_ERROR; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); exit: ret = -EAGAIN; out: diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 95d372db934d..c32bc12cc82f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -91,7 +91,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || (skb_queue_len(&qp->req_pkts) > 1); - rxe_run_task(&qp->resp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->resp.task); + else + rxe_run_task(&qp->resp.task); } static inline enum resp_states get_req(struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 0cbba455fefd..442b7348acdc 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -123,15 +123,20 @@ void rxe_cleanup_task(struct rxe_task *task) tasklet_kill(&task->tasklet); } -void rxe_run_task(struct rxe_task *task, int sched) +void rxe_run_task(struct rxe_task *task) { if (task->destroyed) return; - if (sched) - tasklet_schedule(&task->tasklet); - else - rxe_do_task(&task->tasklet); + rxe_do_task(&task->tasklet); +} + +void rxe_sched_task(struct rxe_task *task) +{ + if (task->destroyed) + return; + + tasklet_schedule(&task->tasklet); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index b3dfd970d1dc..590b1c1d7e7c 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -52,10 +52,9 @@ int __rxe_do_task(struct rxe_task *task); */ void rxe_do_task(struct tasklet_struct *t); -/* run a task, else schedule it to run as a tasklet, The decision - * to run or schedule tasklet is based on the parameter sched. - */ -void rxe_run_task(struct rxe_task *task, int sched); +void rxe_run_task(struct rxe_task *task); + +void rxe_sched_task(struct rxe_task *task); /* keep a task from scheduling */ void rxe_disable_task(struct rxe_task *task); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 88825edc7dce..f2f82efbaf6d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -695,9 +695,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, wr = next; } - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); if (unlikely(qp->req.state == QP_STATE_ERROR)) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return err; } @@ -719,7 +719,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->is_user) { /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); return 0; } else return rxe_post_send_kernel(qp, wr, bad_wr); @@ -759,7 +759,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); err1: return err; From dcef28528cce82a82134abd393aa0f38f2edf77e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:06 -0500 Subject: [PATCH 0634/4122] RDMA/rxe: Make rxe_do_task static The subroutine rxe_do_task() is only called in rxe_task.c. This patch makes it static and renames it do_task(). Link: https://lore.kernel.org/r/20221021200118.2163-6-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_task.h | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 442b7348acdc..fb953f5195b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -24,7 +24,7 @@ int __rxe_do_task(struct rxe_task *task) * a second caller finds the task already running * but looks just after the last call to func */ -void rxe_do_task(struct tasklet_struct *t) +static void do_task(struct tasklet_struct *t) { int cont; int ret; @@ -96,7 +96,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) task->func = func; task->destroyed = false; - tasklet_setup(&task->tasklet, rxe_do_task); + tasklet_setup(&task->tasklet, do_task); task->state = TASK_STATE_START; spin_lock_init(&task->state_lock); @@ -128,7 +128,7 @@ void rxe_run_task(struct rxe_task *task) if (task->destroyed) return; - rxe_do_task(&task->tasklet); + do_task(&task->tasklet); } void rxe_sched_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 590b1c1d7e7c..99e0173e5c46 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -44,14 +44,6 @@ void rxe_cleanup_task(struct rxe_task *task); */ int __rxe_do_task(struct rxe_task *task); -/* - * common function called by any of the main tasklets - * If there is any chance that there is additional - * work to do someone must reschedule the task before - * leaving - */ -void rxe_do_task(struct tasklet_struct *t); - void rxe_run_task(struct rxe_task *task); void rxe_sched_task(struct rxe_task *task); From 63a18baef2653f59a7c5b990283628bd54d062fd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:07 -0500 Subject: [PATCH 0635/4122] RDMA/rxe: Rename task->state_lock to task->lock Rename task-state_lock to task->lock Link: https://lore.kernel.org/r/20221021200118.2163-7-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 18 +++++++++--------- drivers/infiniband/sw/rxe/rxe_task.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index fb953f5195b8..0208d833a41b 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -31,22 +31,22 @@ static void do_task(struct tasklet_struct *t) struct rxe_task *task = from_tasklet(task, t, tasklet); unsigned int iterations = RXE_MAX_ITERATIONS; - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); break; case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; fallthrough; case TASK_STATE_ARMED: - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); return; default: - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); pr_warn("%s failed with bad state %d\n", __func__, task->state); return; } @@ -55,7 +55,7 @@ static void do_task(struct tasklet_struct *t) cont = 0; ret = task->func(task->arg); - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_BUSY: if (ret) { @@ -84,7 +84,7 @@ static void do_task(struct tasklet_struct *t) pr_warn("%s failed with bad state %d\n", __func__, task->state); } - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (cont); task->ret = ret; @@ -99,7 +99,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) tasklet_setup(&task->tasklet, do_task); task->state = TASK_STATE_START; - spin_lock_init(&task->state_lock); + spin_lock_init(&task->lock); return 0; } @@ -115,9 +115,9 @@ void rxe_cleanup_task(struct rxe_task *task) task->destroyed = true; do { - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); idle = (task->state == TASK_STATE_START); - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (!idle); tasklet_kill(&task->tasklet); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 99e0173e5c46..7b88129702ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -21,7 +21,7 @@ enum { struct rxe_task { struct tasklet_struct tasklet; int state; - spinlock_t state_lock; /* spinlock for task state */ + spinlock_t lock; void *arg; int (*func)(void *arg); int ret; From 46db0ba12be6cfd043df1d09b5831cb84ce2307f Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 10 Oct 2022 12:32:04 +0800 Subject: [PATCH 0636/4122] bus: mhi: host: Use mhi_soc_reset() API in place of register write Currently, a direct register write is used when ramdump collection in panic path occurs. Replace that with new mhi_soc_reset() API such that a controller defined reset() function is exercised if one is present and the regular SOC reset is done if it is not. Signed-off-by: Qiang Yu Reviewed-by: Loic Poulain Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1665376324-34258-1-git-send-email-quic_qianyu@quicinc.com Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/boot.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c index 26d0eddb1477..1c69feee1703 100644 --- a/drivers/bus/mhi/host/boot.c +++ b/drivers/bus/mhi/host/boot.c @@ -118,9 +118,7 @@ static int __mhi_download_rddm_in_panic(struct mhi_controller *mhi_cntrl) /* Hardware reset so force device to enter RDDM */ dev_dbg(dev, "Did not enter RDDM, do a host req reset\n"); - mhi_write_reg(mhi_cntrl, mhi_cntrl->regs, - MHI_SOC_RESET_REQ_OFFSET, - MHI_SOC_RESET_REQ); + mhi_soc_reset(mhi_cntrl); udelay(delayus); } From 869a99907faea6d1835b0bd0d0422ae3519c6ea9 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sun, 16 Oct 2022 11:05:32 +0800 Subject: [PATCH 0637/4122] bus: mhi: host: Fix race between channel preparation and M0 event There is a race condition where mhi_prepare_channel() updates the read and write pointers as the base address and in parallel, if an M0 transition occurs, the tasklet goes ahead and rings doorbells for all channels with a delta in TRE rings assuming they are already enabled. This causes a null pointer access. Fix it by adding a channel enabled check before ringing channel doorbells. Cc: stable@vger.kernel.org # 5.19 Fixes: a6e2e3522f29 "bus: mhi: core: Add support for PM state transitions" Signed-off-by: Qiang Yu Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1665889532-13634-1-git-send-email-quic_qianyu@quicinc.com [mani: CCed stable list] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c index 4a42186ff111..083459028a4b 100644 --- a/drivers/bus/mhi/host/pm.c +++ b/drivers/bus/mhi/host/pm.c @@ -301,7 +301,8 @@ int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl) read_lock_irq(&mhi_chan->lock); /* Only ring DB if ring is not empty */ - if (tre_ring->base && tre_ring->wp != tre_ring->rp) + if (tre_ring->base && tre_ring->wp != tre_ring->rp && + mhi_chan->ch_state == MHI_CH_STATE_ENABLED) mhi_ring_chan_db(mhi_cntrl, mhi_chan); read_unlock_irq(&mhi_chan->lock); } From 2d5253a096c6057bbf7caa5520856dcdf7eca8bb Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 16 Sep 2022 16:43:29 +0200 Subject: [PATCH 0638/4122] bus: mhi: host: pci_generic: Add a secondary AT port to Telit FN990 Add a secondary AT port using one of OEM reserved channel. Signed-off-by: Fabio Porcedda Reviewed-by: Loic Poulain Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20220916144329.243368-3-fabio.porcedda@gmail.com Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index caa4ce28cf9e..c4259cb2d289 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -340,6 +340,8 @@ static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = { MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0), MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0), MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0), + MHI_CHANNEL_CONFIG_UL(92, "DUN2", 32, 1), + MHI_CHANNEL_CONFIG_DL(93, "DUN2", 32, 1), MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2), MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3), }; From 875ab4a8d9a7e559c4aaad28f5886d39923301b7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 27 Sep 2022 13:53:27 +0800 Subject: [PATCH 0639/4122] RDMA/rxe: Make sure requested access is a subset of {mr,mw}->access We should reject the requests with access flags that is not registered by MR/MW. For example, lookup_mr() should return NULL when requested access is 0x03 and mr->access is 0x01. Link: https://lore.kernel.org/r/20220927055337.22630-2-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_mw.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d4f10c2d1aa7..014c27bba049 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -511,7 +511,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || - mr_pd(mr) != pd || (access && !(access & mr->access)) || + mr_pd(mr) != pd || ((access & mr->access) != access) || mr->state != RXE_MR_STATE_VALID)) { rxe_put(mr); mr = NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 902b7df7aaed..8df1c9066ed8 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || - (mw->length == 0) || - (access && !(access & mw->access)) || + (mw->length == 0) || ((access & mw->access) != access) || mw->state != RXE_MW_STATE_VALID)) { rxe_put(mw); return NULL; From b071850ef62e36b2fc2ec81863f07be857151409 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 27 Oct 2022 07:31:33 +0000 Subject: [PATCH 0640/4122] RDMA/rxe: Remove the duplicate assignment of mr->map_shift mr->map_shift is set to ilog2(RXE_BUF_PER_MAP) in both rxe_mr_init() and rxe_mr_alloc() so remove the duplicate one in rxe_mr_init(). Link: https://lore.kernel.org/r/1666855893-145-1-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 014c27bba049..bc081002bddc 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -62,7 +62,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->rkey = mr->ibmr.rkey = rkey; mr->state = RXE_MR_STATE_INVALID; - mr->map_shift = ilog2(RXE_BUF_PER_MAP); } static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) From 692373d186205dfb1b56f35f22702412d94d9420 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 28 Oct 2022 15:50:53 +0800 Subject: [PATCH 0641/4122] RDMA/rxe: cleanup some error handling in rxe_verbs.c Instead of 'goto and return', just return directly to simplify the error handling, and avoid some unnecessary return value check. Link: https://lore.kernel.org/r/20221028075053.3990467-1-xuhaoyue1@hisilicon.com Signed-off-by: Yunsheng Lin Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 80 ++++++++------------------- 1 file changed, 23 insertions(+), 57 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f2f82efbaf6d..bcdfdadaebbc 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -238,7 +238,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { - int err; int i; u32 length; struct rxe_recv_wqe *recv_wqe; @@ -246,15 +245,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) int full; full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); - if (unlikely(full)) { - err = -ENOMEM; - goto err1; - } + if (unlikely(full)) + return -ENOMEM; - if (unlikely(num_sge > rq->max_sge)) { - err = -EINVAL; - goto err1; - } + if (unlikely(num_sge > rq->max_sge)) + return -EINVAL; length = 0; for (i = 0; i < num_sge; i++) @@ -275,9 +270,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); return 0; - -err1: - return err; } static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, @@ -343,10 +335,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (err) return err; - err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); - if (err) - return err; - return 0; + return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -453,11 +442,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) - goto err1; + return err; err = rxe_qp_from_attr(qp, attr, mask, udata); if (err) - goto err1; + return err; if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, @@ -465,9 +454,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->attr.dest_qp_num); return 0; - -err1: - return err; } static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -501,24 +487,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, struct rxe_sq *sq = &qp->sq; if (unlikely(num_sge > sq->max_sge)) - goto err1; + return -EINVAL; if (unlikely(mask & WR_ATOMIC_MASK)) { if (length < 8) - goto err1; + return -EINVAL; if (atomic_wr(ibwr)->remote_addr & 0x7) - goto err1; + return -EINVAL; } if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && (length > sq->max_inline))) - goto err1; + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, @@ -735,14 +718,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } if (unlikely(qp->srq)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } spin_lock_irqsave(&rq->producer_lock, flags); @@ -761,7 +742,6 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (qp->resp.state == QP_STATE_ERROR) rxe_sched_task(&qp->resp.task); -err1: return err; } @@ -826,16 +806,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) err = rxe_cq_chk_attr(rxe, cq, cqe, 0); if (err) - goto err1; + return err; - err = rxe_cq_resize_queue(cq, cqe, uresp, udata); - if (err) - goto err1; - - return 0; - -err1: - return err; + return rxe_cq_resize_queue(cq, cqe, uresp, udata); } static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) @@ -921,26 +894,22 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, struct rxe_mr *mr; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err2; - } - + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) - goto err3; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err3: +err1: rxe_cleanup(mr); -err2: return ERR_PTR(err); } @@ -956,25 +925,22 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return ERR_PTR(-EINVAL); mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; err = rxe_mr_init_fast(max_num_sg, mr); if (err) - goto err2; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err2: - rxe_cleanup(mr); err1: + rxe_cleanup(mr); return ERR_PTR(err); } From 1b52861f0e04da43013f88dd56464b5719a974e3 Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Sun, 9 Oct 2022 21:45:03 +0800 Subject: [PATCH 0642/4122] riscv: support update_mmu_tlb() Add macro definition to support update_mmu_tlb() for riscv, this function is from commit:7df676974359 ("mm/memory.c:Update local TLB if PTE entry exists"). update_mmu_tlb() is used when a thread notice that other cpu thread has handled the fault and changed the PTE. For MIPS, it's worth to do that,this cpu thread will trap in tlb fault again otherwise. For RISCV, it's also better to flush local tlb than do nothing in update_mmu_tlb(). There are two kinds of page fault that have update_mmu_tlb() inside: 1.page fault which PTE is NOT none, only protection check error, like write protection fault. If updata_mmu_tlb() is empty, after finsh page fault this time and re-execute, cpu will find address but protection checked error in tlb again. So this will cause another page fault. PTE in memory is good now,so update_mmu_cache() in handle_pte_fault() will be executed. If updata_mmu_tlb() is not empty flush local tlb, cpu won't find this address in tlb next time, and get entry in physical memory, so it won't cause another page fault. 2.page fault which PTE is none or swapped. For this case, this cpu thread won't cause another page fault,cpu will have tlb miss when re-execute, and get entry in memory directly. But "set pte in phycial memory and flush local tlb" is pratice in Linux, it's better to flush local tlb if it find entry in phycial memory has changed. Maybe it's same for other ARCH which can't detect PTE changed and update it in local tlb automatically. Signed-off-by: Jinyu Tang Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20221009134503.18783-1-tjytimi@163.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..c61ae83aadee 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -418,6 +418,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, local_flush_tlb_page(address); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { From 3558927fc2b2fd0af309648f4071035e08719866 Mon Sep 17 00:00:00 2001 From: Cleo John Date: Mon, 10 Oct 2022 20:28:48 +0200 Subject: [PATCH 0643/4122] riscv: fix styling in ucontext header Change the two comments in ucontext.h by getting them up to the coding style proposed by torvalds. Signed-off-by: Cleo John Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221010182848.GA28029@watet-ms7b87 Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/ucontext.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h index 44eb993950e5..516bd0bb0da5 100644 --- a/arch/riscv/include/uapi/asm/ucontext.h +++ b/arch/riscv/include/uapi/asm/ucontext.h @@ -15,19 +15,23 @@ struct ucontext { struct ucontext *uc_link; stack_t uc_stack; sigset_t uc_sigmask; - /* There's some padding here to allow sigset_t to be expanded in the + /* + * There's some padding here to allow sigset_t to be expanded in the * future. Though this is unlikely, other architectures put uc_sigmask * at the end of this structure and explicitly state it can be - * expanded, so we didn't want to box ourselves in here. */ + * expanded, so we didn't want to box ourselves in here. + */ __u8 __unused[1024 / 8 - sizeof(sigset_t)]; - /* We can't put uc_sigmask at the end of this structure because we need + /* + * We can't put uc_sigmask at the end of this structure because we need * to be able to expand sigcontext in the future. For example, the * vector ISA extension will almost certainly add ISA state. We want * to ensure all user-visible ISA state can be saved and restored via a * ucontext, so we're putting this at the end in order to allow for * infinite extensibility. Since we know this will be extended and we * assume sigset_t won't be extended an extreme amount, we're - * prioritizing this. */ + * prioritizing this. + */ struct sigcontext uc_mcontext; }; From 03699f271de1f4df6369cd379506539cd7d590d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Sep 2022 14:33:44 -0700 Subject: [PATCH 0644/4122] string: Rewrite and add more kern-doc for the str*() functions While there were varying degrees of kern-doc for various str*()-family functions, many needed updating and clarification, or to just be entirely written. Update (and relocate) existing kern-doc and add missing functions, sadly shaking my head at how many times I have written "Do not use this function". Include the results in the core kernel API doc. Cc: Bagas Sanjaya Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Andrew Morton Cc: linux-hardening@vger.kernel.org Tested-by: Akira Yokosawa Link: https://lore.kernel.org/lkml/9b0cf584-01b3-3013-b800-1ef59fe82476@gmail.com Signed-off-by: Kees Cook --- Documentation/core-api/kernel-api.rst | 3 + include/linux/fortify-string.h | 133 ++++++++++++++++++++++++-- lib/string.c | 82 ---------------- scripts/kernel-doc | 6 +- 4 files changed, 131 insertions(+), 93 deletions(-) diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 06f4ab122697..0d0c4f87057c 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -36,6 +36,9 @@ String Conversions String Manipulation ------------------- +.. kernel-doc:: include/linux/fortify-string.h + :internal: + .. kernel-doc:: lib/string.c :export: diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 0f00a551939a..e5b39b1cc2fc 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -106,13 +106,13 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) * Instead, please choose an alternative, so that the expectation * of @p's contents is unambiguous: * - * +--------------------+-----------------+------------+ - * | @p needs to be: | padded to @size | not padded | - * +====================+=================+============+ - * | NUL-terminated | strscpy_pad() | strscpy() | - * +--------------------+-----------------+------------+ - * | not NUL-terminated | strtomem_pad() | strtomem() | - * +--------------------+-----------------+------------+ + * +--------------------+--------------------+------------+ + * | **p** needs to be: | padded to **size** | not padded | + * +====================+====================+============+ + * | NUL-terminated | strscpy_pad() | strscpy() | + * +--------------------+--------------------+------------+ + * | not NUL-terminated | strtomem_pad() | strtomem() | + * +--------------------+--------------------+------------+ * * Note strscpy*()'s differing return values for detecting truncation, * and strtomem*()'s expectation that the destination is marked with @@ -131,6 +131,21 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) return __underlying_strncpy(p, q, size); } +/** + * strcat - Append a string to an existing string + * + * @p: pointer to NUL-terminated string to append to + * @q: pointer to NUL-terminated source string to append from + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * read and write overflows, this is only possible when the + * destination buffer size is known to the compiler. Prefer + * building the string with formatting, via scnprintf() or similar. + * At the very least, use strncat(). + * + * Returns @p. + * + */ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { @@ -144,6 +159,16 @@ char *strcat(char * const POS p, const char *q) } extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +/** + * strnlen - Return bounded count of characters in a NUL-terminated string + * + * @p: pointer to NUL-terminated string to count. + * @maxlen: maximum number of characters to count. + * + * Returns number of characters in @p (NOT including the final NUL), or + * @maxlen, if no NUL has been found up to there. + * + */ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { size_t p_size = __member_size(p); @@ -169,6 +194,19 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size * possible for strlen() to be used on compile-time strings for use in * static initializers (i.e. as a constant expression). */ +/** + * strlen - Return count of characters in a NUL-terminated string + * + * @p: pointer to NUL-terminated string to count. + * + * Do not use this function unless the string length is known at + * compile-time. When @p is unterminated, this function may crash + * or return unexpected counts that could lead to memory content + * exposures. Prefer strnlen(). + * + * Returns number of characters in @p (NOT including the final NUL). + * + */ #define strlen(p) \ __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \ __builtin_strlen(p), __fortify_strlen(p)) @@ -187,8 +225,26 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return ret; } -/* defined after fortified strlen to reuse it */ +/* Defined after fortified strlen() to reuse it. */ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); +/** + * strlcpy - Copy a string into another string buffer + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * @size: maximum number of bytes to write at @p + * + * If strlen(@q) >= @size, the copy of @q will be truncated at + * @size - 1 bytes. @p will always be NUL-terminated. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * over-reads when calculating strlen(@q), it is still possible. + * Prefer strscpy(), though note its different return values for + * detecting truncation. + * + * Returns total number of bytes written to @p, including terminating NUL. + * + */ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { size_t p_size = __member_size(p); @@ -214,8 +270,32 @@ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, si return q_len; } -/* defined after fortified strnlen to reuse it */ +/* Defined after fortified strnlen() to reuse it. */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); +/** + * strscpy - Copy a C-string into a sized buffer + * + * @p: Where to copy the string to + * @q: Where to copy the string from + * @size: Size of destination buffer + * + * Copy the source string @p, or as much of it as fits, into the destination + * @q buffer. The behavior is undefined if the string buffers overlap. The + * destination @p buffer is always NUL terminated, unless it's zero-sized. + * + * Preferred to strlcpy() since the API doesn't require reading memory + * from the source @q string beyond the specified @size bytes, and since + * the return value is easier to error-check than strlcpy()'s. + * In addition, the implementation is robust to the string changing out + * from underneath it, unlike the current strlcpy() implementation. + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @p (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. + */ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) { size_t len; @@ -261,7 +341,26 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s return __real_strscpy(p, q, len); } -/* defined after fortified strlen and strnlen to reuse them */ +/** + * strncat - Append a string to an existing string + * + * @p: pointer to NUL-terminated string to append to + * @q: pointer to source string to append from + * @count: Maximum bytes to read from @q + * + * Appends at most @count bytes from @q (stopping at the first + * NUL byte) after the NUL-terminated string at @p. @p will be + * NUL-terminated. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * read and write overflows, this is only possible when the sizes + * of @p and @q are known to the compiler. Prefer building the + * string with formatting, via scnprintf() or similar. + * + * Returns @p. + * + */ +/* Defined after fortified strlen() and strnlen() to reuse them. */ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { @@ -572,6 +671,20 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp return __real_kmemdup(p, size, gfp); } +/** + * strcpy - Copy a string into another string buffer + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * overflows, this is only possible when the sizes of @q and @p are + * known to the compiler. Prefer strscpy(), though note its different + * return values for detecting truncation. + * + * Returns @p. + * + */ /* Defined after fortified strlen to reuse it. */ __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) diff --git a/lib/string.c b/lib/string.c index 3371d26a0e39..4fb566ea610f 100644 --- a/lib/string.c +++ b/lib/string.c @@ -76,11 +76,6 @@ EXPORT_SYMBOL(strcasecmp); #endif #ifndef __HAVE_ARCH_STRCPY -/** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - */ char *strcpy(char *dest, const char *src) { char *tmp = dest; @@ -93,19 +88,6 @@ EXPORT_SYMBOL(strcpy); #endif #ifndef __HAVE_ARCH_STRNCPY -/** - * strncpy - Copy a length-limited, C-string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @count: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @count bytes. - * - * In the case where the length of @src is less than that of - * count, the remainder of @dest will be padded with %NUL. - * - */ char *strncpy(char *dest, const char *src, size_t count) { char *tmp = dest; @@ -122,17 +104,6 @@ EXPORT_SYMBOL(strncpy); #endif #ifndef __HAVE_ARCH_STRLCPY -/** - * strlcpy - Copy a C-string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Compatible with ``*BSD``: the result is always a valid - * NUL-terminated string that fits in the buffer (unless, - * of course, the buffer size is zero). It does not pad - * out the result like strncpy() does. - */ size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -148,30 +119,6 @@ EXPORT_SYMBOL(strlcpy); #endif #ifndef __HAVE_ARCH_STRSCPY -/** - * strscpy - Copy a C-string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @count: Size of destination buffer - * - * Copy the string, or as much of it as fits, into the dest buffer. The - * behavior is undefined if the string buffers overlap. The destination - * buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strlcpy() since the API doesn't require reading memory - * from the src string beyond the specified "count" bytes, and since - * the return value is easier to error-check than strlcpy()'s. - * In addition, the implementation is robust to the string changing out - * from underneath it, unlike the current strlcpy() implementation. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If zeroing is desired please use strscpy_pad(). - * - * Returns: - * * The number of characters copied (not including the trailing %NUL) - * * -E2BIG if count is 0 or @src was truncated. - */ ssize_t strscpy(char *dest, const char *src, size_t count) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; @@ -266,11 +213,6 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src) EXPORT_SYMBOL(stpcpy); #ifndef __HAVE_ARCH_STRCAT -/** - * strcat - Append one %NUL-terminated string to another - * @dest: The string to be appended to - * @src: The string to append to it - */ char *strcat(char *dest, const char *src) { char *tmp = dest; @@ -285,15 +227,6 @@ EXPORT_SYMBOL(strcat); #endif #ifndef __HAVE_ARCH_STRNCAT -/** - * strncat - Append a length-limited, C-string to another - * @dest: The string to be appended to - * @src: The string to append to it - * @count: The maximum numbers of bytes to copy - * - * Note that in contrast to strncpy(), strncat() ensures the result is - * terminated. - */ char *strncat(char *dest, const char *src, size_t count) { char *tmp = dest; @@ -314,12 +247,6 @@ EXPORT_SYMBOL(strncat); #endif #ifndef __HAVE_ARCH_STRLCAT -/** - * strlcat - Append a length-limited, C-string to another - * @dest: The string to be appended to - * @src: The string to append to it - * @count: The size of the destination buffer. - */ size_t strlcat(char *dest, const char *src, size_t count) { size_t dsize = strlen(dest); @@ -484,10 +411,6 @@ EXPORT_SYMBOL(strnchr); #endif #ifndef __HAVE_ARCH_STRLEN -/** - * strlen - Find the length of a string - * @s: The string to be sized - */ size_t strlen(const char *s) { const char *sc; @@ -500,11 +423,6 @@ EXPORT_SYMBOL(strlen); #endif #ifndef __HAVE_ARCH_STRNLEN -/** - * strnlen - Find the length of a length-limited string - * @s: The string to be sized - * @count: The maximum number of bytes to search - */ size_t strnlen(const char *s, size_t count) { const char *sc; diff --git a/scripts/kernel-doc b/scripts/kernel-doc index aea04365bc69..adbc4d307770 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1448,6 +1448,8 @@ sub create_parameterlist($$$$) { foreach my $arg (split($splitter, $args)) { # strip comments $arg =~ s/\/\*.*\*\///; + # ignore argument attributes + $arg =~ s/\sPOS0?\s/ /; # strip leading/trailing spaces $arg =~ s/^\s*//; $arg =~ s/\s*$//; @@ -1657,6 +1659,7 @@ sub dump_function($$) { $prototype =~ s/^__inline +//; $prototype =~ s/^__always_inline +//; $prototype =~ s/^noinline +//; + $prototype =~ s/^__FORTIFY_INLINE +//; $prototype =~ s/__init +//; $prototype =~ s/__init_or_module +//; $prototype =~ s/__deprecated +//; @@ -1666,7 +1669,8 @@ sub dump_function($$) { $prototype =~ s/__weak +//; $prototype =~ s/__sched +//; $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; - $prototype =~ s/__alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; + $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; + $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; my $define = $prototype =~ s/^#\s*define\s+//; #ak added $prototype =~ s/__attribute_const__ +//; $prototype =~ s/__attribute__\s*\(\( From 96fce387d58fa8eae6e8d9b1ecdfbc18292d7a68 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Sep 2022 14:17:05 -0700 Subject: [PATCH 0645/4122] kunit/memcpy: Add dynamic size and window tests The "side effects" memmove() test accidentally found[1] a corner case in the recent refactoring of the i386 assembly memmove(), but missed another corner case. Instead of hoping to get lucky next time, implement much more complete tests of memcpy() and memmove() -- especially the moving window overlap for memmove() -- which catches all the issues encountered and should catch anything new. [1] https://lore.kernel.org/lkml/CAKwvOdkaKTa2aiA90VzFrChNQM6O_ro+b7VWs=op70jx-DKaXA@mail.gmail.com Cc: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Kees Cook --- MAINTAINERS | 1 + lib/memcpy_kunit.c | 205 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..9dd8d74c4df0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8044,6 +8044,7 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: include/linux/fortify-string.h F: lib/fortify_kunit.c +F: lib/memcpy_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 2b5cc70ac53f..c4a7107edd43 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -270,6 +270,208 @@ static void memset_test(struct kunit *test) #undef TEST_OP } +static u8 large_src[1024]; +static u8 large_dst[2048]; +static const u8 large_zero[2048]; + +static void set_random_nonzero(struct kunit *test, u8 *byte) +{ + int failed_rng = 0; + + while (*byte == 0) { + get_random_bytes(byte, 1); + KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100, + "Is the RNG broken?"); + } +} + +static void init_large(struct kunit *test) +{ + + /* Get many bit patterns. */ + get_random_bytes(large_src, ARRAY_SIZE(large_src)); + + /* Make sure we have non-zero edges. */ + set_random_nonzero(test, &large_src[0]); + set_random_nonzero(test, &large_src[ARRAY_SIZE(large_src) - 1]); + + /* Explicitly zero the entire destination. */ + memset(large_dst, 0, ARRAY_SIZE(large_dst)); +} + +/* + * Instead of an indirect function call for "copy" or a giant macro, + * use a bool to pick memcpy or memmove. + */ +static void copy_large_test(struct kunit *test, bool use_memmove) +{ + init_large(test); + + /* Copy a growing number of non-overlapping bytes ... */ + for (int bytes = 1; bytes <= ARRAY_SIZE(large_src); bytes++) { + /* Over a shifting destination window ... */ + for (int offset = 0; offset < ARRAY_SIZE(large_src); offset++) { + int right_zero_pos = offset + bytes; + int right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + /* Copy! */ + if (use_memmove) + memmove(large_dst + offset, large_src, bytes); + else + memcpy(large_dst + offset, large_src, bytes); + + /* Did we touch anything before the copy area? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(large_dst, large_zero, offset), 0, + "with size %d at offset %d", bytes, offset); + /* Did we touch anything after the copy area? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0, + "with size %d at offset %d", bytes, offset); + + /* Are we byte-for-byte exact across the copy? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(large_dst + offset, large_src, bytes), 0, + "with size %d at offset %d", bytes, offset); + + /* Zero out what we copied for the next cycle. */ + memset(large_dst + offset, 0, bytes); + } + /* Avoid stall warnings if this loop gets slow. */ + cond_resched(); + } +} + +static void memcpy_large_test(struct kunit *test) +{ + copy_large_test(test, false); +} + +static void memmove_large_test(struct kunit *test) +{ + copy_large_test(test, true); +} + +/* + * On the assumption that boundary conditions are going to be the most + * sensitive, instead of taking a full step (inc) each iteration, + * take single index steps for at least the first "inc"-many indexes + * from the "start" and at least the last "inc"-many indexes before + * the "end". When in the middle, take full "inc"-wide steps. For + * example, calling next_step(idx, 1, 15, 3) with idx starting at 0 + * would see the following pattern: 1 2 3 4 7 10 11 12 13 14 15. + */ +static int next_step(int idx, int start, int end, int inc) +{ + start += inc; + end -= inc; + + if (idx < start || idx + inc > end) + inc = 1; + return idx + inc; +} + +static void inner_loop(struct kunit *test, int bytes, int d_off, int s_off) +{ + int left_zero_pos, left_zero_size; + int right_zero_pos, right_zero_size; + int src_pos, src_orig_pos, src_size; + int pos; + + /* Place the source in the destination buffer. */ + memcpy(&large_dst[s_off], large_src, bytes); + + /* Copy to destination offset. */ + memmove(&large_dst[d_off], &large_dst[s_off], bytes); + + /* Make sure destination entirely matches. */ + KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Calculate the expected zero spans. */ + if (s_off < d_off) { + left_zero_pos = 0; + left_zero_size = s_off; + + right_zero_pos = d_off + bytes; + right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + src_pos = s_off; + src_orig_pos = 0; + src_size = d_off - s_off; + } else { + left_zero_pos = 0; + left_zero_size = d_off; + + right_zero_pos = s_off + bytes; + right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + src_pos = d_off + bytes; + src_orig_pos = src_pos - s_off; + src_size = right_zero_pos - src_pos; + } + + /* Check non-overlapping source is unchanged.*/ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[src_pos], &large_src[src_orig_pos], src_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Check leading buffer contents are zero. */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[left_zero_pos], large_zero, left_zero_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + /* Check trailing buffer contents are zero. */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Zero out everything not already zeroed.*/ + pos = left_zero_pos + left_zero_size; + memset(&large_dst[pos], 0, right_zero_pos - pos); +} + +static void memmove_overlap_test(struct kunit *test) +{ + /* + * Running all possible offset and overlap combinations takes a + * very long time. Instead, only check up to 128 bytes offset + * into the destination buffer (which should result in crossing + * cachelines), with a step size of 1 through 7 to try to skip some + * redundancy. + */ + static const int offset_max = 128; /* less than ARRAY_SIZE(large_src); */ + static const int bytes_step = 7; + static const int window_step = 7; + + static const int bytes_start = 1; + static const int bytes_end = ARRAY_SIZE(large_src) + 1; + + init_large(test); + + /* Copy a growing number of overlapping bytes ... */ + for (int bytes = bytes_start; bytes < bytes_end; + bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) { + + /* Over a shifting destination window ... */ + for (int d_off = 0; d_off < offset_max; d_off++) { + int s_start = max(d_off - bytes, 0); + int s_end = min_t(int, d_off + bytes, ARRAY_SIZE(large_src)); + + /* Over a shifting source window ... */ + for (int s_off = s_start; s_off < s_end; + s_off = next_step(s_off, s_start, s_end, window_step)) + inner_loop(test, bytes, d_off, s_off); + + /* Avoid stall warnings. */ + cond_resched(); + } + } +} + static void strtomem_test(struct kunit *test) { static const char input[sizeof(unsigned long)] = "hi"; @@ -325,7 +527,10 @@ static void strtomem_test(struct kunit *test) static struct kunit_case memcpy_test_cases[] = { KUNIT_CASE(memset_test), KUNIT_CASE(memcpy_test), + KUNIT_CASE(memcpy_large_test), KUNIT_CASE(memmove_test), + KUNIT_CASE(memmove_large_test), + KUNIT_CASE(memmove_overlap_test), KUNIT_CASE(strtomem_test), {} }; From ef1ca2102e9c546a507ed43994f5dd022f7a80d3 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:21 +0200 Subject: [PATCH 0646/4122] power: supply: bq25890: Document POWER_SUPPLY_PROP_CURRENT_NOW Document that POWER_SUPPLY_PROP_CURRENT_NOW really does refer to ADC-sampled immediate battery charge current I_BAT , since the meaning is not clear with all the currents which might be measured by charger chips. Reviewed-by: Hans de Goede Signed-off-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 6020b58c641d..1298d5720aa4 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -588,7 +588,14 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = 2304000 + ret * 20000; break; - case POWER_SUPPLY_PROP_CURRENT_NOW: + case POWER_SUPPLY_PROP_CURRENT_NOW: /* I_BAT now */ + /* + * This is ADC-sampled immediate charge current supplied + * from charger to battery. The property name is confusing, + * for clarification refer to: + * Documentation/ABI/testing/sysfs-class-power + * /sys/class/power_supply//current_now + */ ret = bq25890_field_read(bq, F_ICHGR); /* read measured value */ if (ret < 0) return ret; From 8327a8abd4d7c842d81108b8361c199d0631d173 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:22 +0200 Subject: [PATCH 0647/4122] power: supply: bq25890: Clean up POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT Clean up misuse of POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT and POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX and document what exactly each value means. The POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT content is newly read back from hardware, while POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX is reported as the maximum value set in DT. Reviewed-by: Hans de Goede Signed-off-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 57 ++++++++++++++++++-------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 1298d5720aa4..5924b036b158 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -529,22 +529,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE; break; - case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX: - val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG); - - /* When temperature is too low, charge current is decreased */ - if (bq->state.ntc_fault == NTC_FAULT_COOL) { - ret = bq25890_field_read(bq, F_JEITA_ISET); - if (ret < 0) - return ret; - - if (ret) - val->intval /= 5; - else - val->intval /= 2; - } - break; - case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: if (!state.online) { val->intval = 0; @@ -604,6 +588,46 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = ret * -50000; break; + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: /* I_BAT user limit */ + /* + * This is user-configured constant charge current supplied + * from charger to battery in first phase of charging, when + * battery voltage is below constant charge voltage. + * + * This value reflects the current hardware setting. + * + * The POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX is the + * maximum value of this property. + */ + ret = bq25890_field_read(bq, F_ICHG); + if (ret < 0) + return ret; + val->intval = bq25890_find_val(ret, TBL_ICHG); + + /* When temperature is too low, charge current is decreased */ + if (bq->state.ntc_fault == NTC_FAULT_COOL) { + ret = bq25890_field_read(bq, F_JEITA_ISET); + if (ret < 0) + return ret; + + if (ret) + val->intval /= 5; + else + val->intval /= 2; + } + break; + + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX: /* I_BAT max */ + /* + * This is maximum allowed constant charge current supplied + * from charger to battery in first phase of charging, when + * battery voltage is below constant charge voltage. + * + * This value is constant for each battery and set from DT. + */ + val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG); + break; + case POWER_SUPPLY_PROP_TEMP: ret = bq25890_field_read(bq, F_TSPCT); if (ret < 0) @@ -887,6 +911,7 @@ static const enum power_supply_property bq25890_power_supply_props[] = { POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_ONLINE, POWER_SUPPLY_PROP_HEALTH, + POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT, POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX, From 7c85237519738ca4808b4fa79c2d448770f8d33f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:23 +0200 Subject: [PATCH 0648/4122] power: supply: bq25890: Clean up POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE Clean up misuse of POWER_SUPPLY_PROP_VOLTAGE, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX and POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE and document what exactly each value means. The POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE content is newly read back from hardware, while POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX is reported as the maximum value set in DT. The POWER_SUPPLY_PROP_VOLTAGE is newly used to report immediate value of battery voltage V_BAT, which is what this property was intended to report and which has been thus far misused to report the charger chip output voltage V_SYS. The V_SYS is no longer reported as there is currently no suitable property to report V_SYS. V_SYS reporting will be reinstated in subsequent patch. Signed-off-by: Marek Vasut Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 72 ++++++++++++++++---------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 5924b036b158..050eef2571e8 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -529,24 +529,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE; break; - case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: - if (!state.online) { - val->intval = 0; - break; - } - - ret = bq25890_field_read(bq, F_BATV); /* read measured value */ - if (ret < 0) - return ret; - - /* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */ - val->intval = 2304000 + ret * 20000; - break; - - case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX: - val->intval = bq25890_find_val(bq->init_data.vreg, TBL_VREG); - break; - case POWER_SUPPLY_PROP_PRECHARGE_CURRENT: val->intval = bq25890_find_val(bq->init_data.iprechg, TBL_ITERM); break; @@ -563,15 +545,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = bq25890_find_val(ret, TBL_IINLIM); break; - case POWER_SUPPLY_PROP_VOLTAGE_NOW: - ret = bq25890_field_read(bq, F_SYSV); /* read measured value */ - if (ret < 0) - return ret; - - /* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */ - val->intval = 2304000 + ret * 20000; - break; - case POWER_SUPPLY_PROP_CURRENT_NOW: /* I_BAT now */ /* * This is ADC-sampled immediate charge current supplied @@ -628,6 +601,51 @@ static int bq25890_power_supply_get_property(struct power_supply *psy, val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG); break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: /* V_BAT now */ + /* + * This is ADC-sampled immediate charge voltage supplied + * from charger to battery. The property name is confusing, + * for clarification refer to: + * Documentation/ABI/testing/sysfs-class-power + * /sys/class/power_supply//voltage_now + */ + ret = bq25890_field_read(bq, F_BATV); /* read measured value */ + if (ret < 0) + return ret; + + /* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */ + val->intval = 2304000 + ret * 20000; + break; + + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: /* V_BAT user limit */ + /* + * This is user-configured constant charge voltage supplied + * from charger to battery in second phase of charging, when + * battery voltage reached constant charge voltage. + * + * This value reflects the current hardware setting. + * + * The POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX is the + * maximum value of this property. + */ + ret = bq25890_field_read(bq, F_VREG); + if (ret < 0) + return ret; + + val->intval = bq25890_find_val(ret, TBL_VREG); + break; + + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX: /* V_BAT max */ + /* + * This is maximum allowed constant charge voltage supplied + * from charger to battery in second phase of charging, when + * battery voltage reached constant charge voltage. + * + * This value is constant for each battery and set from DT. + */ + val->intval = bq25890_find_val(bq->init_data.vreg, TBL_VREG); + break; + case POWER_SUPPLY_PROP_TEMP: ret = bq25890_field_read(bq, F_TSPCT); if (ret < 0) From b63e60ebb3fb071a96667307b1e129ccac76ce6b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:24 +0200 Subject: [PATCH 0649/4122] power: supply: bq25890: Add support for setting user charge current and voltage limit Let user set battery charge current and voltage limit via sysfs. This is useful in case the user space needs to reduce charge current to keep the system within thermal limits. The maximum charge current and voltage are still limited to "ti,charge-current" and "ti,battery-regulation-voltage" values to avoid damaging the hardware in case too high values are set by user space. Reviewed-by: Hans de Goede Signed-off-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 050eef2571e8..95803157ac4a 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -667,9 +667,18 @@ static int bq25890_power_supply_set_property(struct power_supply *psy, const union power_supply_propval *val) { struct bq25890_device *bq = power_supply_get_drvdata(psy); + int maxval; u8 lval; switch (psp) { + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: + maxval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG); + lval = bq25890_find_idx(min(val->intval, maxval), TBL_ICHG); + return bq25890_field_write(bq, F_ICHG, lval); + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: + maxval = bq25890_find_val(bq->init_data.vreg, TBL_VREG); + lval = bq25890_find_idx(min(val->intval, maxval), TBL_VREG); + return bq25890_field_write(bq, F_VREG, lval); case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT: lval = bq25890_find_idx(val->intval, TBL_IINLIM); return bq25890_field_write(bq, F_IINLIM, lval); @@ -682,6 +691,8 @@ static int bq25890_power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp) { switch (psp) { + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: + case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT: return true; default: From 5f5c10ecaf3fdeba9b2b0af5301977420c2c4df0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:25 +0200 Subject: [PATCH 0650/4122] power: supply: bq25890: Factor out regulator registration code Pull the regulator registration code into separate function, so it can be extended to register more regulators later. Currently this is only moving ifdeffery into one place and other preparatory changes. The dev_err_probe() output string is changed to explicitly list vbus regulator failure, so that once more regulators are registered, it would be clear which one failed. Reviewed-by: Hans de Goede Signed-off-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 51 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 95803157ac4a..dad98b782a2f 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1110,6 +1110,36 @@ static const struct regulator_desc bq25890_vbus_desc = { .fixed_uV = 5000000, .n_voltages = 1, }; + +static int bq25890_register_regulator(struct bq25890_device *bq) +{ + struct bq25890_platform_data *pdata = dev_get_platdata(bq->dev); + struct regulator_config cfg = { + .dev = bq->dev, + .driver_data = bq, + }; + struct regulator_dev *reg; + + if (!IS_ERR_OR_NULL(bq->usb_phy)) + return 0; + + if (pdata) + cfg.init_data = pdata->regulator_init_data; + + reg = devm_regulator_register(bq->dev, &bq25890_vbus_desc, &cfg); + if (IS_ERR(reg)) { + return dev_err_probe(bq->dev, PTR_ERR(reg), + "registering vbus regulator"); + } + + return 0; +} +#else +static inline int +bq25890_register_regulator(struct bq25890_device *bq) +{ + return 0; +} #endif static int bq25890_get_chip_version(struct bq25890_device *bq) @@ -1305,27 +1335,16 @@ static int bq25890_probe(struct i2c_client *client, /* OTG reporting */ bq->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); + + ret = bq25890_register_regulator(bq); + if (ret) + return ret; + if (!IS_ERR_OR_NULL(bq->usb_phy)) { INIT_WORK(&bq->usb_work, bq25890_usb_work); bq->usb_nb.notifier_call = bq25890_usb_notifier; usb_register_notifier(bq->usb_phy, &bq->usb_nb); } -#ifdef CONFIG_REGULATOR - else { - struct bq25890_platform_data *pdata = dev_get_platdata(dev); - struct regulator_config cfg = { }; - struct regulator_dev *reg; - - cfg.dev = dev; - cfg.driver_data = bq; - if (pdata) - cfg.init_data = pdata->regulator_init_data; - - reg = devm_regulator_register(dev, &bq25890_vbus_desc, &cfg); - if (IS_ERR(reg)) - return dev_err_probe(dev, PTR_ERR(reg), "registering regulator"); - } -#endif ret = bq25890_power_supply_init(bq); if (ret < 0) { From 85052e90007bd9e11123bd691a8131089178a4f8 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:26 +0200 Subject: [PATCH 0651/4122] power: supply: bq25890: Add get_voltage support to Vbus regulator The chip is capable of reporting Vbus voltage, add .get_voltage implementation to Vbus regulator to report current Vbus voltage. This requires for the Vbus regulator to be registered always instead of the current state where the regulator is registered only in case USB PHY is not found. Do not provide Vbus regulator enable/disable ops in case USB PHY is present, as they would race with USB PHY notifier which is also used to toggle OTG boost mode. Signed-off-by: Marek Vasut Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index dad98b782a2f..ad5811304f88 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1095,10 +1095,18 @@ static int bq25890_vbus_is_enabled(struct regulator_dev *rdev) return bq25890_field_read(bq, F_OTG_CFG); } +static int bq25890_vbus_get_voltage(struct regulator_dev *rdev) +{ + struct bq25890_device *bq = rdev_get_drvdata(rdev); + + return bq25890_get_vbus_voltage(bq); +} + static const struct regulator_ops bq25890_vbus_ops = { .enable = bq25890_vbus_enable, .disable = bq25890_vbus_disable, .is_enabled = bq25890_vbus_is_enabled, + .get_voltage = bq25890_vbus_get_voltage, }; static const struct regulator_desc bq25890_vbus_desc = { @@ -1107,8 +1115,6 @@ static const struct regulator_desc bq25890_vbus_desc = { .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .ops = &bq25890_vbus_ops, - .fixed_uV = 5000000, - .n_voltages = 1, }; static int bq25890_register_regulator(struct bq25890_device *bq) @@ -1120,9 +1126,6 @@ static int bq25890_register_regulator(struct bq25890_device *bq) }; struct regulator_dev *reg; - if (!IS_ERR_OR_NULL(bq->usb_phy)) - return 0; - if (pdata) cfg.init_data = pdata->regulator_init_data; From 14a3d159abf8f6013d40723856283705253e7e9a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Oct 2022 19:24:27 +0200 Subject: [PATCH 0652/4122] power: supply: bq25890: Add Vsys regulator The chip is capable of reporting Vsys voltage supplied to the system. Add regulator which represents the Vsys supply. This can be used e.g. as a supply for system PMIC input. Reviewed-by: Hans de Goede Signed-off-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index ad5811304f88..f0362dcb935e 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1102,6 +1102,20 @@ static int bq25890_vbus_get_voltage(struct regulator_dev *rdev) return bq25890_get_vbus_voltage(bq); } +static int bq25890_vsys_get_voltage(struct regulator_dev *rdev) +{ + struct bq25890_device *bq = rdev_get_drvdata(rdev); + int ret; + + /* Should be some output voltage ? */ + ret = bq25890_field_read(bq, F_SYSV); /* read measured value */ + if (ret < 0) + return ret; + + /* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */ + return 2304000 + ret * 20000; +} + static const struct regulator_ops bq25890_vbus_ops = { .enable = bq25890_vbus_enable, .disable = bq25890_vbus_disable, @@ -1117,6 +1131,18 @@ static const struct regulator_desc bq25890_vbus_desc = { .ops = &bq25890_vbus_ops, }; +static const struct regulator_ops bq25890_vsys_ops = { + .get_voltage = bq25890_vsys_get_voltage, +}; + +static const struct regulator_desc bq25890_vsys_desc = { + .name = "vsys", + .of_match = "vsys", + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + .ops = &bq25890_vsys_ops, +}; + static int bq25890_register_regulator(struct bq25890_device *bq) { struct bq25890_platform_data *pdata = dev_get_platdata(bq->dev); @@ -1135,6 +1161,12 @@ static int bq25890_register_regulator(struct bq25890_device *bq) "registering vbus regulator"); } + reg = devm_regulator_register(bq->dev, &bq25890_vsys_desc, &cfg); + if (IS_ERR(reg)) { + return dev_err_probe(bq->dev, PTR_ERR(reg), + "registering vsys regulator"); + } + return 0; } #else From 310f541a027b1d5dc68f44f176cde618e6ee9691 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 12 Oct 2022 20:00:37 +0800 Subject: [PATCH 0653/4122] riscv: Enable HAVE_ARCH_HUGE_VMAP for 64BIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required page table functions. With this feature, ioremap area will be mapped with huge page granularity according to its actual size. This feature can be disabled by kernel parameter "nohugeiomap". Signed-off-by: Liu Shixin Reviewed-by: Björn Töpel Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20221012120038.1034354-2-liushixin2@huawei.com [Palmer: minor formatting] Signed-off-by: Palmer Dabbelt --- .../features/vm/huge-vmap/arch-support.txt | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/vmalloc.h | 18 ++++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/pgtable.c | 83 +++++++++++++++++++ 5 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/mm/pgtable.c diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index 13b4940e0c3a..7274a4b15bcc 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -21,7 +21,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | TODO | diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..db2082dd456d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -72,6 +72,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index ff9abc00d139..48da5371f1e9 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -1,4 +1,22 @@ #ifndef _ASM_RISCV_VMALLOC_H #define _ASM_RISCV_VMALLOC_H +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#define IOREMAP_MAX_ORDER (PUD_SHIFT) + +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return true; +} + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return true; +} + +#endif + #endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d76aabf4b94d..ce7f121ad2dc 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,6 +13,7 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o +obj-y += pgtable.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c new file mode 100644 index 000000000000..6645ead1a7c1 --- /dev/null +++ b/arch/riscv/mm/pgtable.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} + +void p4d_clear_huge(p4d_t *p4d) +{ +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), prot); + + set_pud(pud, new_pud); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_leaf(READ_ONCE(*pud))) + return 0; + pud_clear(pud); + return 1; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd = pud_pgtable(*pud); + int i; + + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(NULL, pte); + } + } + + pmd_free(NULL, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), prot); + + set_pmd(pmd, new_pmd); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_leaf(READ_ONCE(*pmd))) + return 0; + pmd_clear(pmd); + return 1; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + pte_free_kernel(NULL, pte); + return 1; +} + +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ From be79afc740b5a1b2048cd67580cdb9d76d7e6cc2 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 12 Oct 2022 20:00:38 +0800 Subject: [PATCH 0654/4122] riscv: Enable HAVE_ARCH_HUGE_VMALLOC for 64BIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After we support HAVE_ARCH_HUGE_VMAP, we can now enable HAVE_ARCH_HUGE_VMALLOC too. This feature has been used in kvmalloc and alloc_large_system_hash for now. This feature can be disabled by kernel parameters "nohugevmalloc". Signed-off-by: Liu Shixin Reviewed-by: Björn Töpel Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20221012120038.1034354-3-liushixin2@huawei.com [Palmer: minor formatting] Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index db2082dd456d..7cd981f96f48 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -72,6 +72,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL From 1d96c542b29129abbee819f355a1facbee07626e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 28 Oct 2022 23:09:32 +0530 Subject: [PATCH 0655/4122] MAINTAINERS: Remove Hemant from MHI bus Hemant moved out of Qualcomm and expressed his wish to not continue doing any reviews for MHI patches. So let's remove him from MAINTAINERS file. Reviewed-by: Jeffrey Hugo Signed-off-by: Manivannan Sadhasivam --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..ad9279218885 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13395,7 +13395,6 @@ F: arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts MHI BUS M: Manivannan Sadhasivam -R: Hemant Kumar L: mhi@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained From 2fe5dc3dcc2920669217e3696ec15fec6630a92f Mon Sep 17 00:00:00 2001 From: Emily Peri Date: Fri, 28 Oct 2022 11:39:19 -0700 Subject: [PATCH 0656/4122] staging: rtl8723bs: replace ternary statement with min_t macro Ternary statements that pick the min of two values can be replaced by the macro min_t(). This improves readability, since its quicker to understand min_t(type, x, y) than x < y ? x : y. Issue found by coccicheck. Signed-off-by: Emily Peri Link: https://lore.kernel.org/r/Y1wh1zYMAbbKSrGB@marshmallow Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 6aeb169c6ebf..54004f846cf0 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -1551,7 +1551,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel wps_ie = rtw_get_wps_ie(buf, ielen, NULL, &wps_ielen); if (wps_ie && wps_ielen > 0) { - padapter->securitypriv.wps_ie_len = wps_ielen < MAX_WPS_IE_LEN ? wps_ielen : MAX_WPS_IE_LEN; + padapter->securitypriv.wps_ie_len = min_t(uint, wps_ielen, MAX_WPS_IE_LEN); memcpy(padapter->securitypriv.wps_ie, wps_ie, padapter->securitypriv.wps_ie_len); set_fwstate(&padapter->mlmepriv, WIFI_UNDER_WPS); } else { From 737143dc87dfd3cb600a46092387c9caa814ce88 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 30 Oct 2022 09:10:20 +0100 Subject: [PATCH 0657/4122] staging: r8188eu: use min() instead of ternary operator Replace a ternary operator usage with the min() macro to improve readability. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221030081020.8533-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index 8516e253bb03..22aab3f986b4 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -3568,7 +3568,7 @@ static int rtw_wx_set_priv(struct net_device *dev, if ((_VENDOR_SPECIFIC_IE_ == probereq_wpsie[0]) && (!memcmp(&probereq_wpsie[2], wps_oui, 4))) { - cp_sz = probereq_wpsie_len > MAX_WPS_IE_LEN ? MAX_WPS_IE_LEN : probereq_wpsie_len; + cp_sz = min(probereq_wpsie_len, MAX_WPS_IE_LEN); pmlmepriv->wps_probe_req_ie_len = 0; kfree(pmlmepriv->wps_probe_req_ie); From cbcab58a7cbf51e0e1e8e5dec0bd44be48e966db Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Wed, 26 Oct 2022 02:06:18 +0900 Subject: [PATCH 0658/4122] staging: r8188eu: remove unnecessary variable in ioctl_linux Returning value 0 directly instead of storing it in variable ret. This commit can prevent cocci warning as follows: Unneeded variable: "ret". Return "0" on line 3030 Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Kang Minchul Link: https://lore.kernel.org/r/20221025170621.271903-2-tegongkang@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index 22aab3f986b4..f3b3d7468539 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -2979,8 +2979,6 @@ static int rtw_p2p_set(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { - int ret = 0; - if (!memcmp(extra, "enable =", 7)) { rtw_wext_p2p_enable(dev, info, wrqu, &extra[7]); } else if (!memcmp(extra, "setDN =", 6)) { @@ -3027,7 +3025,7 @@ static int rtw_p2p_set(struct net_device *dev, rtw_p2p_set_persistent(dev, info, wrqu, &extra[11]); } - return ret; + return 0; } static int rtw_p2p_get2(struct net_device *dev, From f67469fe658a484f8c000bfcf66a191417bd6e30 Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Wed, 26 Oct 2022 02:06:19 +0900 Subject: [PATCH 0659/4122] staging: r8188eu: remove unnecessary vaiable in rtw_recv Return _SUCCESS directly instead of storing it in a variable. This can prevent cocci warning as follows: Unneeded variable: "ret". Return "_SUCCESS" on line 1516 Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Kang Minchul Link: https://lore.kernel.org/r/20221025170621.271903-3-tegongkang@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_recv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index 4b68a543f68b..94f85cd7038d 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -1415,7 +1415,6 @@ static int amsdu_to_msdu(struct adapter *padapter, struct recv_frame *prframe) struct recv_priv *precvpriv = &padapter->recvpriv; struct __queue *pfree_recv_queue = &precvpriv->free_recv_queue; - int ret = _SUCCESS; nr_subframes = 0; @@ -1513,7 +1512,7 @@ exit: prframe->len = 0; rtw_free_recvframe(prframe, pfree_recv_queue);/* free this recv_frame */ - return ret; + return _SUCCESS; } static bool check_indicate_seq(struct recv_reorder_ctrl *preorder_ctrl, u16 seq_num) From 0a7bf6a948b2a772ebed3915b6c47189d63dc246 Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Wed, 26 Oct 2022 02:06:20 +0900 Subject: [PATCH 0660/4122] staging: r8188eu: remove unnecessary variable in rtl8188eu_xmit Return 0 directly instead of storing it in a variable. This can prevent cocci warning as follows: Unneeded variable: "pull". Return "0" on line 298 Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Kang Minchul Link: https://lore.kernel.org/r/20221025170621.271903-4-tegongkang@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188eu_xmit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c b/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c index 8e4a5acc0b18..6d1f56d1f9d7 100644 --- a/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c +++ b/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c @@ -149,7 +149,6 @@ static void fill_txdesc_phy(struct pkt_attrib *pattrib, __le32 *pdw) static s32 update_txdesc(struct xmit_frame *pxmitframe, u8 *pmem, s32 sz, u8 bagg_pkt) { - int pull = 0; uint qsel; u8 data_rate, pwr_status, offset; struct adapter *adapt = pxmitframe->padapter; @@ -295,7 +294,7 @@ static s32 update_txdesc(struct xmit_frame *pxmitframe, u8 *pmem, s32 sz, u8 bag ODM_SetTxAntByTxInfo_88E(&haldata->odmpriv, pmem, pattrib->mac_id); rtl8188eu_cal_txdesc_chksum(ptxdesc); - return pull; + return 0; } /* for non-agg data frame or management frame */ From 95571b8db71e586c8071670fde0c6db5e4ced353 Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Wed, 26 Oct 2022 02:06:21 +0900 Subject: [PATCH 0661/4122] staging: r8188eu: make rtw_sta_flush to void Make function rtw_sta_flush to void in order to prevent cocci warning as follows: Unneeded variable: "ret". Return "0" on line 1031 Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Kang Minchul Link: https://lore.kernel.org/r/20221025170621.271903-5-tegongkang@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_ap.c | 7 ++----- drivers/staging/r8188eu/include/rtw_ap.h | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_ap.c b/drivers/staging/r8188eu/core/rtw_ap.c index 24eb8dce9bfe..e0ca4b6e17cc 100644 --- a/drivers/staging/r8188eu/core/rtw_ap.c +++ b/drivers/staging/r8188eu/core/rtw_ap.c @@ -1017,10 +1017,9 @@ u8 ap_free_sta(struct adapter *padapter, struct sta_info *psta, return beacon_updated; } -int rtw_sta_flush(struct adapter *padapter) +void rtw_sta_flush(struct adapter *padapter) { struct list_head *phead, *plist; - int ret = 0; struct sta_info *psta = NULL; struct sta_priv *pstapriv = &padapter->stapriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -1028,7 +1027,7 @@ int rtw_sta_flush(struct adapter *padapter) u8 bc_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) - return ret; + return; spin_lock_bh(&pstapriv->asoc_list_lock); phead = &pstapriv->asoc_list; @@ -1050,8 +1049,6 @@ int rtw_sta_flush(struct adapter *padapter) issue_deauth(padapter, bc_addr, WLAN_REASON_DEAUTH_LEAVING); associated_clients_update(padapter, true); - - return ret; } /* called > TSR LEVEL for USB or SDIO Interface*/ diff --git a/drivers/staging/r8188eu/include/rtw_ap.h b/drivers/staging/r8188eu/include/rtw_ap.h index 8b4134eb3095..89b02c97e041 100644 --- a/drivers/staging/r8188eu/include/rtw_ap.h +++ b/drivers/staging/r8188eu/include/rtw_ap.h @@ -26,7 +26,7 @@ u8 bss_cap_update_on_sta_leave(struct adapter *padapter, struct sta_info *psta); void sta_info_update(struct adapter *padapter, struct sta_info *psta); u8 ap_free_sta(struct adapter *padapter, struct sta_info *psta, bool active, u16 reason); -int rtw_sta_flush(struct adapter *padapter); +void rtw_sta_flush(struct adapter *padapter); void start_ap_mode(struct adapter *padapter); void stop_ap_mode(struct adapter *padapter); void update_bmc_sta(struct adapter *padapter); From f369953d9cd99a3977859a4bbad58897bc3da153 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Thu, 27 Oct 2022 20:56:27 +0700 Subject: [PATCH 0662/4122] staging: rtl8192e: rtllib_module: remove unnecessary parentheses This patch is intended to remove unnecessary parentheses in the rtllib_module.c file following the Linux kernel coding-style regulations. The modification is recommended by the checkpatch script. Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/20221027135627.vzc3woeuhrivozqz@plymouth Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c index 41697ef55dbd..ce8b73f437a3 100644 --- a/drivers/staging/rtl8192e/rtllib_module.c +++ b/drivers/staging/rtl8192e/rtllib_module.c @@ -107,7 +107,7 @@ struct net_device *alloc_rtllib(int sizeof_priv) spin_lock_init(&ieee->lock); spin_lock_init(&ieee->wpax_suitlist_lock); spin_lock_init(&ieee->reorder_spinlock); - atomic_set(&(ieee->atm_swbw), 0); + atomic_set(&ieee->atm_swbw, 0); /* SAM FIXME */ lib80211_crypt_info_init(&ieee->crypt_info, "RTLLIB", &ieee->lock); From 607732b42ce6aad76bbca3de8ea853c03e374958 Mon Sep 17 00:00:00 2001 From: Yogesh Hegde Date: Sun, 30 Oct 2022 19:02:04 +0530 Subject: [PATCH 0663/4122] staging: rtl8192e: Rename variables rateIndex and rateBitmap to avoid CamelCase Rename variables * rateIndex to rate_index * rateBitmap to rate_bitmap to avoid CamelCase which is not accepted by checkpatch.pl . Signed-off-by: Yogesh Hegde Link: https://lore.kernel.org/r/20221030133204.GA416592@zephyrus Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index c9e495538e2c..767c746fc73d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -2121,8 +2121,8 @@ static void _rtl92e_dm_end_sw_fsync(struct net_device *dev) static void _rtl92e_dm_start_sw_fsync(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - u32 rateIndex; - u32 rateBitmap; + u32 rate_index; + u32 rate_bitmap; priv->rate_record = 0; priv->ContinueDiffCount = 0; @@ -2136,12 +2136,12 @@ static void _rtl92e_dm_start_sw_fsync(struct net_device *dev) priv->rtllib->fsync_firstdiff_ratethreshold = 200; priv->rtllib->fsync_seconddiff_ratethreshold = 200; } - for (rateIndex = 0; rateIndex <= 27; rateIndex++) { - rateBitmap = 1 << rateIndex; - if (priv->rtllib->fsync_rate_bitmap & rateBitmap) + for (rate_index = 0; rate_index <= 27; rate_index++) { + rate_bitmap = 1 << rate_index; + if (priv->rtllib->fsync_rate_bitmap & rate_bitmap) priv->rate_record += priv->stats.received_rate_histogram[1] - [rateIndex]; + [rate_index]; } if (timer_pending(&priv->fsync_timer)) del_timer_sync(&priv->fsync_timer); From 130d7c481008703b4b103c9322e5fdfac130cc55 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:39:28 +0700 Subject: [PATCH 0664/4122] staging: rtl8192e: rtllib_crypt_tkip: multiple blank lines removal Removed multiple unnecessary blank lines in accordance with the Linux kernel coding-style regulations. The issues were reported by the checkpatch script. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/f2103758c23f37b61fcbe14f8ed0da8d6b31f5c6.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 24 -------------------- 1 file changed, 24 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index 8bc95651e384..468f28f48eba 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -91,7 +91,6 @@ fail: return NULL; } - static void rtllib_tkip_deinit(void *priv) { struct rtllib_tkip_data *_priv = priv; @@ -103,49 +102,41 @@ static void rtllib_tkip_deinit(void *priv) kfree_sensitive(priv); } - static inline u16 RotR1(u16 val) { return (val >> 1) | (val << 15); } - static inline u8 Lo8(u16 val) { return val & 0xff; } - static inline u8 Hi8(u16 val) { return val >> 8; } - static inline u16 Lo16(u32 val) { return val & 0xffff; } - static inline u16 Hi16(u32 val) { return val >> 16; } - static inline u16 Mk16(u8 hi, u8 lo) { return lo | (hi << 8); } - static inline u16 Mk16_le(u16 *v) { return *v; } - static const u16 Sbox[256] = { 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, @@ -181,17 +172,14 @@ static const u16 Sbox[256] = { 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, }; - static inline u16 _S_(u16 v) { u16 t = Sbox[Hi8(v)]; return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); } - #define PHASE1_LOOP_COUNT 8 - static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32) { int i, j; @@ -213,7 +201,6 @@ static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32) } } - static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK, u16 IV16) { @@ -263,7 +250,6 @@ static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK, #endif } - static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct rtllib_tkip_data *tkey = priv; @@ -293,7 +279,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) } else tkey->tx_phase1_done = 1; - len = skb->len - hdr_len; pos = skb_push(skb, 8); memmove(pos, pos + 8, hdr_len); @@ -337,7 +322,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return ret; return 0; - } static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) @@ -453,7 +437,6 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } - static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, u8 *data, size_t data_len, u8 *mic) { @@ -511,7 +494,6 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr) hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ } - static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) { struct rtllib_tkip_data *tkey = priv; @@ -539,7 +521,6 @@ static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - static void rtllib_michael_mic_failure(struct net_device *dev, struct rtllib_hdr_4addr *hdr, int keyidx) @@ -609,7 +590,6 @@ static int rtllib_michael_mic_verify(struct sk_buff *skb, int keyidx, return 0; } - static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv) { struct rtllib_tkip_data *tkey = priv; @@ -640,7 +620,6 @@ static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv) return 0; } - static int rtllib_tkip_get_key(void *key, int len, u8 *seq, void *priv) { struct rtllib_tkip_data *tkey = priv; @@ -671,7 +650,6 @@ static int rtllib_tkip_get_key(void *key, int len, u8 *seq, void *priv) return TKIP_KEY_LEN; } - static void rtllib_tkip_print_stats(struct seq_file *m, void *priv) { struct rtllib_tkip_data *tkip = priv; @@ -713,13 +691,11 @@ static struct lib80211_crypto_ops rtllib_crypt_tkip = { .owner = THIS_MODULE, }; - static int __init rtllib_crypto_tkip_init(void) { return lib80211_register_crypto_ops(&rtllib_crypt_tkip); } - static void __exit rtllib_crypto_tkip_exit(void) { lib80211_unregister_crypto_ops(&rtllib_crypt_tkip); From 6360fc223387179bb943d8865b853d0bb273dcf4 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:40:15 +0700 Subject: [PATCH 0665/4122] staging: rtl8192e: rtllib_crypt_tkip: blank line before close brace removal Removed multiple blank lines that are not necessary before a closing brace. The issues were found with the checkpatch script and were dealt with in accordance with the Linux kernel coding-style guidelines. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/1ba11754f1b1e39b1525a837b8493ba8434d5e3b.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index 468f28f48eba..40930c9b0017 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -321,7 +321,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!tcb_desc->bHwSec) return ret; return 0; - } static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) @@ -420,7 +419,6 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) tkey->dot11RSNAStatsTKIPICVErrors++; return -5; } - } /* Update real counters only after Michael MIC verification has From dcbdcfca138ae185d7013e25ff02d9defd392884 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:41:08 +0700 Subject: [PATCH 0666/4122] staging: rtl8192e: rtllib_crypt_tkip: fixed alignment matching open parentheses Aligned multiple lines to be at the same indentation of open parentheses before it in accordance with the Linux kernel coding-style regulations. The issues were found by running the checkpatch script on the file. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/4253b94d6b7d94713afb02fa63d0a98686e77cc1.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index 40930c9b0017..6c98f43f967e 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -271,7 +271,7 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!tcb_desc->bHwSec) { if (!tkey->tx_phase1_done) { tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, - tkey->tx_iv32); + tkey->tx_iv32); tkey->tx_phase1_done = 1; } tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, @@ -372,8 +372,8 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!tcb_desc->bHwSec || (skb->cb[0] == 1)) { if ((iv32 < tkey->rx_iv32 || - (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) && - tkey->initialized) { + (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) && + tkey->initialized) { if (net_ratelimit()) { netdev_dbg(skb->dev, "Replay detected: STA= %pM previous TSC %08x%04x received TSC %08x%04x\n", @@ -513,7 +513,7 @@ static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) tkey->tx_hdr[12] = *(skb->data + hdr_len - 2) & 0x07; pos = skb_put(skb, 8); if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) + skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) return -1; return 0; From 8d1dcc729ada8e604442190e2887ff85d37ea6ca Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:41:56 +0700 Subject: [PATCH 0667/4122] staging: rtl8192e: rtllib_crypt_tkip: fixes on unbalanced braces Added braces around needed arms of statements which needs them in accordance with the Linux kernel coding-style regulations. The issues were found with the help of the checkpatch script. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/641d8360c5f86b54efc96d7f8ef70be1371db480.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index 6c98f43f967e..d1b86de76eb2 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -276,8 +276,9 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) } tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); - } else + } else { tkey->tx_phase1_done = 1; + } len = skb->len - hdr_len; pos = skb_push(skb, 8); @@ -610,10 +611,11 @@ static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv) (seq[3] << 8) | seq[2]; tkey->rx_iv16 = (seq[1] << 8) | seq[0]; } - } else if (len == 0) + } else if (len == 0) { tkey->key_set = 0; - else + } else { return -1; + } return 0; } From 1f610736f7f40efbcac07a1c100ef703c74d30c1 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:42:55 +0700 Subject: [PATCH 0668/4122] staging: rtl8192e: rtllib_crypt_tkip: split multiple assignments Split a multiple assignments statement to individual assignments on different lines in accordance with the Linux kernel coding-style regulations. Also repositioned comments on it and the statement before for increased legibility. The multiple assignments issue was found by the checkpatch script, with the comments legibility issue were through direct observation. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/a9ab257d0042afd3b3231eefe4f58c0c3ac7649f.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index d1b86de76eb2..b8a52b9f9d07 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -488,9 +488,13 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr) break; } - hdr[12] = 0; /* priority */ + /* priority */ + hdr[12] = 0; - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ + /* reserved */ + hdr[13] = 0; + hdr[14] = 0; + hdr[15] = 0; } static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) From 1838742b1d4e6d89d9448fba2f3340b0aaaede73 Mon Sep 17 00:00:00 2001 From: Aaron Lawrence Date: Wed, 26 Oct 2022 19:43:51 +0700 Subject: [PATCH 0669/4122] staging: rtl8192e: rtllib_crypt_tkip: rewritten comparison to NULL Rewritten a comparison to NULL with a negation operator in accordance with the Linux kernel coding-style regulations. The fix was directly recommended by the checkpatch script. Tested-by: Philipp Hortmann Signed-off-by: Aaron Lawrence Link: https://lore.kernel.org/r/ca33296630627020694f4b653580f689a8a3d1c7.1666787061.git.t4rmin@zohomail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c index b8a52b9f9d07..9fdfcc017ee6 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c @@ -62,7 +62,7 @@ static void *rtllib_tkip_init(int key_idx) return NULL; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) + if (!priv) goto fail; priv->key_idx = key_idx; From 1a048cde49046f37f91d340d25d7622f8f87a994 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 29 Oct 2022 19:10:10 +0200 Subject: [PATCH 0670/4122] staging: r8188eu: replace get_da with ieee80211_get_DA Replace a call to the driver-specific get_da function with ieee80211_get_DA from ieee80211.h. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221029171011.1572091-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index ffb708f242e6..127dbc4e8b9a 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -798,11 +798,12 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram unsigned char *p; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data; u8 *pframe = precv_frame->rx_data; uint pkt_len = precv_frame->len; /* check A1 matches or not */ - if (memcmp(myid(&padapter->eeprompriv), get_da(pframe), ETH_ALEN)) + if (memcmp(myid(&padapter->eeprompriv), ieee80211_get_DA(hdr), ETH_ALEN)) return; if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE)) From dbc97f832aa798c16453eeb4eb5fe74b5c998223 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 29 Oct 2022 19:10:11 +0200 Subject: [PATCH 0671/4122] staging: r8188eu: remove get_da Replace the last get_da call with ieee80211_get_DA and remove the get_da function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221029171011.1572091-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 3 ++- drivers/staging/r8188eu/include/wifi.h | 22 ------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c index dff0cba751df..f01ae71bcdb1 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c @@ -108,6 +108,7 @@ void update_recvframe_attrib_88e(struct recv_frame *precvframe, struct recv_stat */ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat *pphy_status) { + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precvframe->rx_data; struct adapter *padapter = precvframe->adapter; struct rx_pkt_attrib *pattrib = &precvframe->attrib; struct hal_data_8188e *pHalData = &padapter->haldata; @@ -125,7 +126,7 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat get_bssid(&padapter->mlmepriv), ETH_ALEN)); pkt_info.bPacketToSelf = pkt_info.bPacketMatchBSSID && - (!memcmp(get_da(wlanhdr), + (!memcmp(ieee80211_get_DA(hdr), myid(&padapter->eeprompriv), ETH_ALEN)); pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc); diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index 6b50089cea29..92a584a8b6c0 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -217,28 +217,6 @@ enum WIFI_REG_DOMAIN { #define GetAddr4Ptr(pbuf) ((unsigned char *)((size_t)(pbuf) + 24)) -static inline unsigned char *get_da(unsigned char *pframe) -{ - unsigned char *da; - unsigned int to_fr_ds = (GetToDs(pframe) << 1) | GetFrDs(pframe); - - switch (to_fr_ds) { - case 0x00: /* ToDs=0, FromDs=0 */ - da = GetAddr1Ptr(pframe); - break; - case 0x01: /* ToDs=0, FromDs=1 */ - da = GetAddr1Ptr(pframe); - break; - case 0x02: /* ToDs=1, FromDs=0 */ - da = GetAddr3Ptr(pframe); - break; - default: /* ToDs=1, FromDs=1 */ - da = GetAddr3Ptr(pframe); - break; - } - return da; -} - static inline unsigned char *get_sa(unsigned char *pframe) { unsigned char *sa; From c5a7eecdcdb6c9d3a907ee66db2b5cffc8bdb4e1 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 25 Oct 2022 11:12:23 +0200 Subject: [PATCH 0672/4122] Revert "staging: r8712u: Tracking kmemleak false positives." This reverts commit 5d3da4a20a271e3cf5496a50cbb8118aa019374f. This commit annotated false positive for kmemleak. The reasoning is that the buffers are freed when the driver is unloaded. However, there is actually potential memory leak when probe fails. Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/26ce206b2c40c7db48c146aa6105789db9dfcc1a.1666688642.git.namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl871x_recv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c index de9a568eaffa..4db7eed64a03 100644 --- a/drivers/staging/rtl8712/rtl871x_recv.c +++ b/drivers/staging/rtl8712/rtl871x_recv.c @@ -17,9 +17,7 @@ #define _RTL871X_RECV_C_ #include -#include #include -#include #include #include #include @@ -61,7 +59,6 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv, GFP_ATOMIC); if (!precvpriv->pallocated_frame_buf) return; - kmemleak_not_leak(precvpriv->pallocated_frame_buf); precvpriv->precv_frame_buf = precvpriv->pallocated_frame_buf + RXFRAME_ALIGN_SZ - ((addr_t)(precvpriv->pallocated_frame_buf) & From 63b5e50571ce4d87cd6c1c4d99de74c9d86fd0d8 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 25 Oct 2022 11:12:24 +0200 Subject: [PATCH 0673/4122] staging: rtl8712: check for alloc fail in _r8712_init_recv_priv() The function _r8712_init_recv_priv() and also r8712_init_recv_priv() just returns silently if they fail to allocate memory. Change their return type to int and add necessary checks and handling if they return -ENOMEM Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/506ac35a667e511db568b06b86834fd0ceeba453.1666688642.git.namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/os_intfs.c | 4 +++- drivers/staging/rtl8712/recv_osdep.h | 8 ++++---- drivers/staging/rtl8712/rtl8712_recv.c | 7 ++++--- drivers/staging/rtl8712/rtl871x_recv.c | 13 +++++++++---- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c index 003e97205124..12adb470d216 100644 --- a/drivers/staging/rtl8712/os_intfs.c +++ b/drivers/staging/rtl8712/os_intfs.c @@ -309,7 +309,9 @@ int r8712_init_drv_sw(struct _adapter *padapter) if (ret) return ret; _r8712_init_xmit_priv(&padapter->xmitpriv, padapter); - _r8712_init_recv_priv(&padapter->recvpriv, padapter); + ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter); + if (ret) + return ret; memset((unsigned char *)&padapter->securitypriv, 0, sizeof(struct security_priv)); timer_setup(&padapter->securitypriv.tkip_timer, diff --git a/drivers/staging/rtl8712/recv_osdep.h b/drivers/staging/rtl8712/recv_osdep.h index d8c1fa74f544..fbe3f2868506 100644 --- a/drivers/staging/rtl8712/recv_osdep.h +++ b/drivers/staging/rtl8712/recv_osdep.h @@ -18,15 +18,15 @@ #include "drv_types.h" #include -void _r8712_init_recv_priv(struct recv_priv *precvpriv, - struct _adapter *padapter); +int _r8712_init_recv_priv(struct recv_priv *precvpriv, + struct _adapter *padapter); void _r8712_free_recv_priv(struct recv_priv *precvpriv); void r8712_recv_entry(union recv_frame *precv_frame); void r8712_recv_indicatepkt(struct _adapter *adapter, union recv_frame *precv_frame); void r8712_handle_tkip_mic_err(struct _adapter *padapter, u8 bgroup); -void r8712_init_recv_priv(struct recv_priv *precvpriv, - struct _adapter *padapter); +int r8712_init_recv_priv(struct recv_priv *precvpriv, + struct _adapter *padapter); void r8712_free_recv_priv(struct recv_priv *precvpriv); void r8712_os_recv_resource_alloc(struct _adapter *padapter, union recv_frame *precvframe); diff --git a/drivers/staging/rtl8712/rtl8712_recv.c b/drivers/staging/rtl8712/rtl8712_recv.c index 7f1fdd058551..7da014ab0723 100644 --- a/drivers/staging/rtl8712/rtl8712_recv.c +++ b/drivers/staging/rtl8712/rtl8712_recv.c @@ -30,8 +30,8 @@ static void recv_tasklet(struct tasklet_struct *t); -void r8712_init_recv_priv(struct recv_priv *precvpriv, - struct _adapter *padapter) +int r8712_init_recv_priv(struct recv_priv *precvpriv, + struct _adapter *padapter) { int i; struct recv_buf *precvbuf; @@ -44,7 +44,7 @@ void r8712_init_recv_priv(struct recv_priv *precvpriv, precvpriv->pallocated_recv_buf = kzalloc(NR_RECVBUFF * sizeof(struct recv_buf) + 4, GFP_ATOMIC); if (!precvpriv->pallocated_recv_buf) - return; + return -ENOMEM; precvpriv->precv_buf = precvpriv->pallocated_recv_buf + 4 - ((addr_t)(precvpriv->pallocated_recv_buf) & 3); precvbuf = (struct recv_buf *)precvpriv->precv_buf; @@ -75,6 +75,7 @@ void r8712_init_recv_priv(struct recv_priv *precvpriv, } pskb = NULL; } + return 0; } void r8712_free_recv_priv(struct recv_priv *precvpriv) diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c index 4db7eed64a03..8a3566214af7 100644 --- a/drivers/staging/rtl8712/rtl871x_recv.c +++ b/drivers/staging/rtl8712/rtl871x_recv.c @@ -42,9 +42,10 @@ void _r8712_init_sta_recv_priv(struct sta_recv_priv *psta_recvpriv) _init_queue(&psta_recvpriv->defrag_q); } -void _r8712_init_recv_priv(struct recv_priv *precvpriv, - struct _adapter *padapter) +int _r8712_init_recv_priv(struct recv_priv *precvpriv, + struct _adapter *padapter) { + int ret; sint i; union recv_frame *precvframe; @@ -58,7 +59,7 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv, sizeof(union recv_frame) + RXFRAME_ALIGN_SZ, GFP_ATOMIC); if (!precvpriv->pallocated_frame_buf) - return; + return -ENOMEM; precvpriv->precv_frame_buf = precvpriv->pallocated_frame_buf + RXFRAME_ALIGN_SZ - ((addr_t)(precvpriv->pallocated_frame_buf) & @@ -73,7 +74,11 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv, precvframe++; } precvpriv->rx_pending_cnt = 1; - r8712_init_recv_priv(precvpriv, padapter); + ret = r8712_init_recv_priv(precvpriv, padapter); + if (ret) + kfree(precvpriv->pallocated_frame_buf); + + return ret; } void _r8712_free_recv_priv(struct recv_priv *precvpriv) From 242443430dd8ef932a56cf483080a46f2948dd65 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 25 Oct 2022 11:12:25 +0200 Subject: [PATCH 0674/4122] staging: rtl8712: check for return value of _r8712_init_xmit_priv() The return value of _r8712_init_xmit_priv() is never checked and the driver always continue execution as if all is well. This will cause problems if, for example, buffers cannot be allocated and the driver continue and use those buffers. Check for return value of _r8712_init_xmit_priv() and return error (if any) during probing. Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/b550803561acf26af71f2377215c28b94435a644.1666688642.git.namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/os_intfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c index 12adb470d216..205b7d66a40a 100644 --- a/drivers/staging/rtl8712/os_intfs.c +++ b/drivers/staging/rtl8712/os_intfs.c @@ -308,7 +308,9 @@ int r8712_init_drv_sw(struct _adapter *padapter) ret = r8712_init_mlme_priv(padapter); if (ret) return ret; - _r8712_init_xmit_priv(&padapter->xmitpriv, padapter); + ret = _r8712_init_xmit_priv(&padapter->xmitpriv, padapter); + if (ret) + return ret; ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter); if (ret) return ret; From 336ccc31cda111c830332d4b56def49c9c1d48e6 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 25 Oct 2022 11:12:26 +0200 Subject: [PATCH 0675/4122] staging: rtl8712: fix potential memory leak In r8712_init_drv_sw(), whenever any function call returns error, it is returned immediately without properly cleaning up the other successfully executed functions. This can cause memory leak. Instead of return immediately, free all the allocated buffers first. Tested-by: Philipp Hortmann Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/0a3414b12031f6cdcba81a8725e91eb9567ff34f.1666688642.git.namcaov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/os_intfs.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c index 205b7d66a40a..a2f3645be0cc 100644 --- a/drivers/staging/rtl8712/os_intfs.c +++ b/drivers/staging/rtl8712/os_intfs.c @@ -304,29 +304,42 @@ int r8712_init_drv_sw(struct _adapter *padapter) padapter->cmdpriv.padapter = padapter; ret = r8712_init_evt_priv(&padapter->evtpriv); if (ret) - return ret; + goto free_cmd; ret = r8712_init_mlme_priv(padapter); if (ret) - return ret; + goto free_evt; ret = _r8712_init_xmit_priv(&padapter->xmitpriv, padapter); if (ret) - return ret; + goto free_mlme; ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter); if (ret) - return ret; + goto free_xmit; memset((unsigned char *)&padapter->securitypriv, 0, sizeof(struct security_priv)); timer_setup(&padapter->securitypriv.tkip_timer, r8712_use_tkipkey_handler, 0); ret = _r8712_init_sta_priv(&padapter->stapriv); if (ret) - return ret; + goto free_recv; padapter->stapriv.padapter = padapter; r8712_init_bcmc_stainfo(padapter); r8712_init_pwrctrl_priv(padapter); mp871xinit(padapter); init_default_value(padapter); r8712_InitSwLeds(padapter); + + return 0; + +free_recv: + _r8712_free_recv_priv(&padapter->recvpriv); +free_xmit: + _free_xmit_priv(&padapter->xmitpriv); +free_mlme: + r8712_free_mlme_priv(&padapter->mlmepriv); +free_evt: + r8712_free_evt_priv(&padapter->evtpriv); +free_cmd: + r8712_free_cmd_priv(&padapter->cmdpriv); return ret; } From 5c4fb46e9116bd7cf87bd76b417a865626e13c30 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:14 +0100 Subject: [PATCH 0676/4122] staging: r8188eu: replace a GetAddr1Ptr call Define a struct ieee80211_mgmt and use it to read the destination address. This replaces one call to the driver-specific GetAddr1Ptr function, which should eventually be removed. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 127dbc4e8b9a..5a366688a3f7 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3815,13 +3815,14 @@ exit: unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv_frame) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; unsigned int ret = _FAIL; u8 *pframe = precv_frame->rx_data; u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); u8 category, action; /* check RA matches or not */ - if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN)) + if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) goto exit; category = frame_body[0]; From 5998e3192274da5fd9521b1019cc62b5ea2283b7 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:15 +0100 Subject: [PATCH 0677/4122] staging: r8188eu: remove duplicate category check The caller of on_action_public has already checked the action category. We can remove the check in on_action_public. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 5a366688a3f7..7d4f208d161b 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3819,16 +3819,12 @@ unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv unsigned int ret = _FAIL; u8 *pframe = precv_frame->rx_data; u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); - u8 category, action; + u8 action; /* check RA matches or not */ if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) goto exit; - category = frame_body[0]; - if (category != WLAN_CATEGORY_PUBLIC) - goto exit; - action = frame_body[1]; switch (action) { case ACT_PUBLIC_VENDOR: From 721d7f496a0602d061bd127bd88de7e70181f521 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:16 +0100 Subject: [PATCH 0678/4122] staging: r8188eu: make on_action_public static void The on_action_public function is called only by OnAction. This function also lives in rtw_mlme_ext.c and does not check the return value from on_action_public. We can make on_action_public a static void function. The ret variable is no longer needed if we don't return a value. It can be removed. Reported-by: kernel test robot Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 12 ++++-------- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 7d4f208d161b..88600f62ffb4 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3813,30 +3813,26 @@ exit: return ret; } -unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv_frame) +static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; - unsigned int ret = _FAIL; u8 *pframe = precv_frame->rx_data; u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); u8 action; /* check RA matches or not */ if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) - goto exit; + return; action = frame_body[1]; switch (action) { case ACT_PUBLIC_VENDOR: - ret = on_action_public_vendor(precv_frame); + on_action_public_vendor(precv_frame); break; default: - ret = on_action_public_default(precv_frame); + on_action_public_default(precv_frame); break; } - -exit: - return ret; } unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index c8beaa927cba..ec2e9352011b 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter); unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame); -unsigned int on_action_public(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame); From b22b8618ab3cf011a5536e30e3fb99b32c911ecb Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:17 +0100 Subject: [PATCH 0679/4122] staging: r8188eu: make OnAction_back static void OnAction_back is called only by OnAction, its return value is not checked. We can make it a static void function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +++++-------- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 -- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 88600f62ffb4..779c022b1c50 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1481,7 +1481,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } -unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; struct sta_info *psta = NULL; @@ -1494,21 +1494,20 @@ unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_fr struct sta_priv *pstapriv = &padapter->stapriv; /* check RA matches or not */ if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */ - return _SUCCESS; + return; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) if (!(pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) - return _SUCCESS; + return; psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (!psta) - return _SUCCESS; + return; frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); if (!pmlmeinfo->HT_enable) - return _SUCCESS; + return; /* All union members start with an action code, it's ok to use addba_req. */ switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: @@ -1550,8 +1549,6 @@ unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_fr default: break; } - - return _SUCCESS; } static int get_reg_classes_full_count(struct p2p_channels *channel_list) diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index ec2e9352011b..4ccdce1ad9be 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnAction_back(struct adapter *padapter, - struct recv_frame *precv_frame); unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame); From aa415931f9682fd4ac71ac34ffa5c157cef2b680 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:18 +0100 Subject: [PATCH 0680/4122] staging: r8188eu: make OnAction_p2p static void OnAction_p2p is called only by OnAction, its return value is not checked. We can make it a static void function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 9 ++++----- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 --- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 779c022b1c50..f1054192bfb1 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3832,7 +3832,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_ } } -unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) +static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) { u8 *frame_body; u8 category, OUI_Subtype; @@ -3842,16 +3842,16 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra /* check RA matches or not */ if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN))/* for if1, sta/ap mode */ - return _SUCCESS; + return; frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); category = frame_body[0]; if (category != RTW_WLAN_CATEGORY_P2P) - return _SUCCESS; + return; if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI) - return _SUCCESS; + return; len -= sizeof(struct ieee80211_hdr_3addr); OUI_Subtype = frame_body[5]; @@ -3869,7 +3869,6 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra default: break; } - return _SUCCESS; } static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame) diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 4ccdce1ad9be..ce5b57e23e53 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -536,9 +536,6 @@ void start_clnt_auth(struct adapter *padapter); void start_clnt_join(struct adapter *padapter); void start_create_ibss(struct adapter *padapter); -unsigned int OnAction_p2p(struct adapter *padapter, - struct recv_frame *precv_frame); - void mlmeext_joinbss_event_callback(struct adapter *padapter, int join_res); void mlmeext_sta_del_event_callback(struct adapter *padapter); void mlmeext_sta_add_event_callback(struct adapter *padapter, From b31b29788f103c2720898af2d499d8b53c1eb980 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:19 +0100 Subject: [PATCH 0681/4122] staging: r8188eu: remove category check in OnAction_p2p The caller of OnAction_p2p has already checked the action category. We can remove the check in OnAction_p2p. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index f1054192bfb1..efcb2f3b6d3f 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3835,7 +3835,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) { u8 *frame_body; - u8 category, OUI_Subtype; + u8 OUI_Subtype; u8 *pframe = precv_frame->rx_data; uint len = precv_frame->len; struct wifidirect_info *pwdinfo = &padapter->wdinfo; @@ -3846,10 +3846,6 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); - category = frame_body[0]; - if (category != RTW_WLAN_CATEGORY_P2P) - return; - if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI) return; From e246bf42ef82ac25d8a7ad45ddd07fbf161f7e7f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:20 +0100 Subject: [PATCH 0682/4122] staging: r8188eu: replace switch-case with if OnAction_p2p has a switch-case statement where only a single case is handled. Use if instead, this makes the code shorter and easier to read. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index efcb2f3b6d3f..64d01da9c814 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3852,19 +3852,8 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram len -= sizeof(struct ieee80211_hdr_3addr); OUI_Subtype = frame_body[5]; - switch (OUI_Subtype) { - case P2P_NOTICE_OF_ABSENCE: - break; - case P2P_PRESENCE_REQUEST: + if (OUI_Subtype == P2P_PRESENCE_REQUEST) process_p2p_presence_req(pwdinfo, pframe, len); - break; - case P2P_PRESENCE_RESPONSE: - break; - case P2P_GO_DISC_REQUEST: - break; - default: - break; - } } static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame) From fc47cb05cf19017c3eb87da9b82e20f57b595ad3 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:21 +0100 Subject: [PATCH 0683/4122] staging: r8188eu: replace GetAddr1Ptr call in OnAction_p2p Define a struct ieee80211_mgmt in OnAction_p2p and use it to check the destination address. This replaces a call to the driver-specific GetAddr1Ptr function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 64d01da9c814..6d95d3bc23e6 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3834,6 +3834,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; u8 *frame_body; u8 OUI_Subtype; u8 *pframe = precv_frame->rx_data; @@ -3841,7 +3842,7 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram struct wifidirect_info *pwdinfo = &padapter->wdinfo; /* check RA matches or not */ - if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN))/* for if1, sta/ap mode */ + if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */ return; frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); From a05159da5a4e49fd4b34cb00fb33badeb4dc9ec3 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:22 +0100 Subject: [PATCH 0684/4122] staging: r8188eu: clean up on_action_public Use the struct mgmt to read the action_code. This is much simpler than parsing the message ourselves. Add a comment about reading the action code. All members of the action enum start with an action_code byte. It does not matter which member we use. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 6d95d3bc23e6..b3cef3504ad3 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3813,23 +3813,16 @@ exit: static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; - u8 *pframe = precv_frame->rx_data; - u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); - u8 action; /* check RA matches or not */ if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) return; - action = frame_body[1]; - switch (action) { - case ACT_PUBLIC_VENDOR: + /* All members of the action enum start with action_code. */ + if (mgmt->u.action.u.s1g.action_code == WLAN_PUB_ACTION_VENDOR_SPECIFIC) on_action_public_vendor(precv_frame); - break; - default: + else on_action_public_default(precv_frame); - break; - } } static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) From a399a3b05209dc759b20c6d6e4aa331f2e60bc8a Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:23 +0100 Subject: [PATCH 0685/4122] staging: r8188eu: remove return value from on_action_public_vendor The only caller of on_action_public_vendor does not check the return value. We can make it a void function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-11-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index b3cef3504ad3..b395457a6a60 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3779,17 +3779,13 @@ static unsigned int on_action_public_p2p(struct recv_frame *precv_frame) return _SUCCESS; } -static unsigned int on_action_public_vendor(struct recv_frame *precv_frame) +static void on_action_public_vendor(struct recv_frame *precv_frame) { - unsigned int ret = _FAIL; u8 *pframe = precv_frame->rx_data; u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); - if (!memcmp(frame_body + 2, P2P_OUI, 4)) { - ret = on_action_public_p2p(precv_frame); - } - - return ret; + if (!memcmp(frame_body + 2, P2P_OUI, 4)) + on_action_public_p2p(precv_frame); } static unsigned int on_action_public_default(struct recv_frame *precv_frame) From 8161a8335821bb6944eef2de8b750544d59c49c2 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:24 +0100 Subject: [PATCH 0686/4122] staging: r8188eu: remove return value from on_action_public_default The only caller of on_action_public_default does not check the return value. We can make it a void function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-12-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index b395457a6a60..f5923792f067 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3788,22 +3788,15 @@ static void on_action_public_vendor(struct recv_frame *precv_frame) on_action_public_p2p(precv_frame); } -static unsigned int on_action_public_default(struct recv_frame *precv_frame) +static void on_action_public_default(struct recv_frame *precv_frame) { - unsigned int ret = _FAIL; u8 *pframe = precv_frame->rx_data; u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr); u8 token; token = frame_body[2]; - if (rtw_action_public_decache(precv_frame, token) == _FAIL) - goto exit; - - ret = _SUCCESS; - -exit: - return ret; + rtw_action_public_decache(precv_frame, token); } static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame) From 9001c5029dded946e7862e5785278f7c1d9dfe55 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:25 +0100 Subject: [PATCH 0687/4122] staging: r8188eu: rtw_action_public_decache's token is a u8 Both callers of rtw_action_public_decache pass a u8 value for the token parameter. We can change token from s32 to u8 and remove the code for token < 0. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-13-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index f5923792f067..93f3d387e92d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3490,7 +3490,7 @@ inline void issue_probereq_p2p(struct adapter *adapter, u8 *da) _issue_probereq_p2p(adapter, da); } -static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token) +static s32 rtw_action_public_decache(struct recv_frame *recv_frame, u8 token) { struct adapter *adapter = recv_frame->adapter; struct mlme_ext_priv *mlmeext = &adapter->mlmeextpriv; @@ -3499,21 +3499,13 @@ static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token) (recv_frame->attrib.frag_num & 0xf); if (GetRetry(frame)) { - if (token >= 0) { - if ((seq_ctrl == mlmeext->action_public_rxseq) && - (token == mlmeext->action_public_dialog_token)) - return _FAIL; - } else { - if (seq_ctrl == mlmeext->action_public_rxseq) - return _FAIL; - } + if ((seq_ctrl == mlmeext->action_public_rxseq) && + (token == mlmeext->action_public_dialog_token)) + return _FAIL; } mlmeext->action_public_rxseq = seq_ctrl; - - if (token >= 0) - mlmeext->action_public_dialog_token = token; - + mlmeext->action_public_dialog_token = token; return _SUCCESS; } From ae85931f8f483ed58f7c90c99c75afd8482e0399 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 30 Oct 2022 18:33:26 +0100 Subject: [PATCH 0688/4122] staging: r8188eu: check destination address in OnAction All subfunctions of OnAction check if the destination address matches the local interface's address. It's simpler to move this check to OnAction. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221030173326.1588647-14-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 93f3d387e92d..e985fc5fc575 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1492,9 +1492,6 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; u8 *pframe = precv_frame->rx_data; struct sta_priv *pstapriv = &padapter->stapriv; - /* check RA matches or not */ - if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */ - return; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) if (!(pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) @@ -3795,10 +3792,6 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_ { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; - /* check RA matches or not */ - if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) - return; - /* All members of the action enum start with action_code. */ if (mgmt->u.action.u.s1g.action_code == WLAN_PUB_ACTION_VENDOR_SPECIFIC) on_action_public_vendor(precv_frame); @@ -3808,17 +3801,12 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame) { - struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; u8 *frame_body; u8 OUI_Subtype; u8 *pframe = precv_frame->rx_data; uint len = precv_frame->len; struct wifidirect_info *pwdinfo = &padapter->wdinfo; - /* check RA matches or not */ - if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */ - return; - frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI) @@ -3835,6 +3823,9 @@ static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; + if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) + return; + switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: OnAction_back(padapter, precv_frame); From 085bdaa6eb1476ec054164bdc4001bc3916ff5cb Mon Sep 17 00:00:00 2001 From: Shaoqin Huang Date: Tue, 11 Oct 2022 14:21:20 +0800 Subject: [PATCH 0689/4122] memblock test: Add test to memblock_add() 129th region Add 129th region into the memblock, and this will trigger the memblock_double_array() function, this needs valid memory regions. So using dummy_physical_memory_init() to allocate a large enough memory region, and split it into a large enough memory which can be choosed by memblock_double_array(), and the left memory will be split into small memory region, and add them into the memblock. It make sure the memblock_double_array() will always choose the valid memory region that is allocated by the dummy_physical_memory_init(). So memblock_double_array() must success. Another thing should be done is to restore the memory.regions after memblock_double_array(), due to now the memory.regions is pointing to a memory region allocated by dummy_physical_memory_init(). And it will affect the subsequent tests if we don't restore the memory region. So simply record the origin region, and restore it after the test. Signed-off-by: Shaoqin Huang Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20221011062128.49359-2-shaoqin.huang@intel.com --- tools/testing/memblock/tests/basic_api.c | 93 ++++++++++++++++++++++++ tools/testing/memblock/tests/common.c | 7 +- tools/testing/memblock/tests/common.h | 6 +- 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index a13a57ba0815..4d61a4b474be 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -423,6 +423,98 @@ static int memblock_add_near_max_check(void) return 0; } +/* + * A test that trying to add the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * memory.regions. + */ +static int memblock_add_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_memory_regions_size; + phys_addr_t base, size = SZ_64; + phys_addr_t gap_size = SZ_64; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + /* + * We allocated enough memory by using dummy_physical_memory_init(), and + * split it into small block. First we split a large enough memory block + * as the memory region which will be choosed by memblock_double_array(). + */ + base = PAGE_ALIGN(dummy_physical_memory_base()); + new_memory_regions_size = PAGE_ALIGN(INIT_MEMBLOCK_REGIONS * 2 * + sizeof(struct memblock_region)); + memblock_add(base, new_memory_regions_size); + + /* This is the base of small memory block. */ + base += new_memory_regions_size + gap_size; + + orig_region = memblock.memory.regions; + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* + * Add these small block to fulfill the memblock. We keep a + * gap between the nearby memory to avoid being merged. + */ + memblock_add(base, size); + base += size + gap_size; + + ASSERT_EQ(memblock.memory.cnt, i + 2); + ASSERT_EQ(memblock.memory.total_size, new_memory_regions_size + + (i + 1) * size); + } + + /* + * At there, memblock_double_array() has been succeed, check if it + * update the memory.max. + */ + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + /* memblock_double_array() will reserve the memory it used. Check it. */ + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, new_memory_regions_size); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_add() still works as normal. + */ + memblock_add(r.base, r.size); + ASSERT_EQ(memblock.memory.regions[0].base, r.base); + ASSERT_EQ(memblock.memory.regions[0].size, r.size); + + ASSERT_EQ(memblock.memory.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.memory.total_size, INIT_MEMBLOCK_REGIONS * size + + new_memory_regions_size + + r.size); + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current memory.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.memory.regions = orig_region; + memblock.memory.cnt = INIT_MEMBLOCK_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_add_checks(void) { prefix_reset(); @@ -438,6 +530,7 @@ static int memblock_add_checks(void) memblock_add_twice_check(); memblock_add_between_check(); memblock_add_near_max_check(); + memblock_add_many_check(); prefix_pop(); diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index 3f795047bbe1..f43b6f414983 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -5,8 +5,6 @@ #include #include -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #define PREFIXES_MAX 15 #define DELIM ": " #define BASIS 10000 @@ -115,6 +113,11 @@ void dummy_physical_memory_cleanup(void) free(memory_block.base); } +phys_addr_t dummy_physical_memory_base(void) +{ + return (phys_addr_t)memory_block.base; +} + static void usage(const char *prog) { BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1); diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index d6bbbe63bfc3..cc82b85151b6 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -10,9 +10,12 @@ #include #include <../selftests/kselftest.h> -#define MEM_SIZE SZ_16K +#define MEM_SIZE SZ_32K #define NUMA_NODES 8 +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS + enum test_flags { /* No special request. */ TEST_F_NONE = 0x0, @@ -124,6 +127,7 @@ void setup_memblock(void); void setup_numa_memblock(const unsigned int node_fracs[]); void dummy_physical_memory_init(void); void dummy_physical_memory_cleanup(void); +phys_addr_t dummy_physical_memory_base(void); void parse_args(int argc, char **argv); void test_fail(void); From 5b27dd7968b9c916da4af48d0310f94152744f8e Mon Sep 17 00:00:00 2001 From: Shaoqin Huang Date: Tue, 11 Oct 2022 14:21:21 +0800 Subject: [PATCH 0690/4122] memblock test: Add test to memblock_reserve() 129th region Reserve 129th region in the memblock, and this will trigger the memblock_double_array() function, this needs valid memory regions. So using dummy_physical_memory_init() to allocate a valid memory region. At the same time, reserve 128 faked memory region, and make sure these reserved region not intersect with the valid memory region. So memblock_double_array() will choose the valid memory region, and it will success. Also need to restore the reserved.regions after memblock_double_array(), to make sure the subsequent tests can run as normal. Signed-off-by: Shaoqin Huang Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20221011062128.49359-3-shaoqin.huang@intel.com --- tools/testing/memblock/tests/basic_api.c | 91 ++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 4d61a4b474be..411647094cc3 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -892,6 +892,96 @@ static int memblock_reserve_near_max_check(void) return 0; } +/* + * A test that trying to reserve the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * reserved.regions. + */ +static int memblock_reserve_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t memory_base = SZ_128K; + phys_addr_t new_reserved_regions_size; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + /* Add a valid memory region used by double_array(). */ + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(memory_base, MEM_SIZE); + + ASSERT_EQ(memblock.reserved.cnt, i + 1); + ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE); + + /* Keep the gap so these memory region will not be merged. */ + memory_base += MEM_SIZE * 2; + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(memory_base, MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -906,6 +996,7 @@ static int memblock_reserve_checks(void) memblock_reserve_twice_check(); memblock_reserve_between_check(); memblock_reserve_near_max_check(); + memblock_reserve_many_check(); prefix_pop(); From 62a56c540797681a5b50a4c06bf638f79b6013bc Mon Sep 17 00:00:00 2001 From: Shaoqin Huang Date: Tue, 11 Oct 2022 14:21:22 +0800 Subject: [PATCH 0691/4122] memblock test: Update TODO list Remove the completed items from TODO list. Signed-off-by: Shaoqin Huang Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20221011062128.49359-4-shaoqin.huang@intel.com --- tools/testing/memblock/TODO | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index 33044c634ea7..503cc96fcdc3 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -1,17 +1,10 @@ TODO ===== -1. Add tests trying to memblock_add() or memblock_reserve() 129th region. - This will trigger memblock_double_array(), make sure it succeeds. - *Important:* These tests require valid memory ranges, use dummy physical - memory block from common.c to implement them. It is also very - likely that the current MEM_SIZE won't be enough for these - test cases. Use realloc to adjust the size accordingly. - -2. Add test cases using this functions (implement them for both directions): +1. Add test cases using this functions (implement them for both directions): + memblock_alloc_raw() + memblock_alloc_exact_nid_raw() + memblock_alloc_try_nid_raw() -3. Add tests for memblock_alloc_node() to check if the correct NUMA node is set +2. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region From 3c728e079d83f581a1f8b7755f6e26087b15c4fb Mon Sep 17 00:00:00 2001 From: Tamas Zsoldos Date: Tue, 5 Jul 2022 16:59:35 +0200 Subject: [PATCH 0692/4122] coresight: etm4x: add CPU hotplug support for probing etm4x devices cannot be successfully probed when their CPU is offline. For example, when booting with maxcpus=n, ETM probing will fail on CPUs >n, and the probing won't be reattempted once the CPUs come online. This will leave those CPUs unable to make use of ETM. This change adds a mechanism to delay the probing if the corresponding CPU is offline, and to try it again when the CPU comes online. Signed-off-by: Tamas Zsoldos Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220705145935.24679-1-tamas.zsoldos@arm.com --- .../coresight/coresight-etm4x-core.c | 153 +++++++++++++----- 1 file changed, 113 insertions(+), 40 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 80fefaba58ee..9dc0d7f57565 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -66,10 +66,13 @@ static enum cpuhp_state hp_online; struct etm4_init_arg { unsigned int pid; - struct etmv4_drvdata *drvdata; + struct device *dev; struct csdev_access *csa; }; +static DEFINE_PER_CPU(struct etm4_init_arg *, delayed_probe); +static int etm4_probe_cpu(unsigned int cpu); + /* * Check if TRCSSPCICRn(i) is implemented for a given instance. * @@ -1085,7 +1088,7 @@ static void etm4_init_arch_data(void *info) struct csdev_access *csa; int i; - drvdata = init_arg->drvdata; + drvdata = dev_get_drvdata(init_arg->dev); csa = init_arg->csa; /* @@ -1528,7 +1531,7 @@ void etm4_config_trace_mode(struct etmv4_config *config) static int etm4_online_cpu(unsigned int cpu) { if (!etmdrvdata[cpu]) - return 0; + return etm4_probe_cpu(cpu); if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable) coresight_enable(etmdrvdata[cpu]->csdev); @@ -1904,48 +1907,20 @@ static void etm4_pm_clear(void) } } -static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +static int etm4_add_coresight_dev(struct etm4_init_arg *init_arg) { int ret; struct coresight_platform_data *pdata = NULL; - struct etmv4_drvdata *drvdata; + struct device *dev = init_arg->dev; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev); struct coresight_desc desc = { 0 }; - struct etm4_init_arg init_arg = { 0 }; u8 major, minor; char *type_name; - drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) - return -ENOMEM; + return -EINVAL; - dev_set_drvdata(dev, drvdata); - - if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) - pm_save_enable = coresight_loses_context_with_cpu(dev) ? - PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER; - - if (pm_save_enable != PARAM_PM_SAVE_NEVER) { - drvdata->save_state = devm_kmalloc(dev, - sizeof(struct etmv4_save_state), GFP_KERNEL); - if (!drvdata->save_state) - return -ENOMEM; - } - - drvdata->base = base; - - spin_lock_init(&drvdata->spinlock); - - drvdata->cpu = coresight_get_cpu(dev); - if (drvdata->cpu < 0) - return drvdata->cpu; - - init_arg.drvdata = drvdata; - init_arg.csa = &desc.access; - init_arg.pid = etm_pid; - - if (smp_call_function_single(drvdata->cpu, - etm4_init_arch_data, &init_arg, 1)) - dev_err(dev, "ETM arch init failed\n"); + desc.access = *init_arg->csa; if (!drvdata->arch) return -EINVAL; @@ -2016,6 +1991,68 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) return 0; } +static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) +{ + struct etmv4_drvdata *drvdata; + struct csdev_access access = { 0 }; + struct etm4_init_arg init_arg = { 0 }; + struct etm4_init_arg *delayed; + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + dev_set_drvdata(dev, drvdata); + + if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) + pm_save_enable = coresight_loses_context_with_cpu(dev) ? + PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER; + + if (pm_save_enable != PARAM_PM_SAVE_NEVER) { + drvdata->save_state = devm_kmalloc(dev, + sizeof(struct etmv4_save_state), GFP_KERNEL); + if (!drvdata->save_state) + return -ENOMEM; + } + + drvdata->base = base; + + spin_lock_init(&drvdata->spinlock); + + drvdata->cpu = coresight_get_cpu(dev); + if (drvdata->cpu < 0) + return drvdata->cpu; + + init_arg.dev = dev; + init_arg.csa = &access; + init_arg.pid = etm_pid; + + /* + * Serialize against CPUHP callbacks to avoid race condition + * between the smp call and saving the delayed probe. + */ + cpus_read_lock(); + if (smp_call_function_single(drvdata->cpu, + etm4_init_arch_data, &init_arg, 1)) { + /* The CPU was offline, try again once it comes online. */ + delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL); + if (!delayed) { + cpus_read_unlock(); + return -ENOMEM; + } + + *delayed = init_arg; + + per_cpu(delayed_probe, drvdata->cpu) = delayed; + + cpus_read_unlock(); + return 0; + } + cpus_read_unlock(); + + return etm4_add_coresight_dev(&init_arg); +} + static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id) { void __iomem *base; @@ -2054,6 +2091,35 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) return ret; } +static int etm4_probe_cpu(unsigned int cpu) +{ + int ret; + struct etm4_init_arg init_arg; + struct csdev_access access = { 0 }; + struct etm4_init_arg *iap = *this_cpu_ptr(&delayed_probe); + + if (!iap) + return 0; + + init_arg = *iap; + devm_kfree(init_arg.dev, iap); + *this_cpu_ptr(&delayed_probe) = NULL; + + ret = pm_runtime_resume_and_get(init_arg.dev); + if (ret < 0) { + dev_err(init_arg.dev, "Failed to get PM runtime!\n"); + return 0; + } + + init_arg.csa = &access; + etm4_init_arch_data(&init_arg); + + etm4_add_coresight_dev(&init_arg); + + pm_runtime_put(init_arg.dev); + return 0; +} + static struct amba_cs_uci_id uci_id_etm4[] = { { /* ETMv4 UCI data */ @@ -2068,16 +2134,20 @@ static void clear_etmdrvdata(void *info) int cpu = *(int *)info; etmdrvdata[cpu] = NULL; + per_cpu(delayed_probe, cpu) = NULL; } static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) { - etm_perf_symlink(drvdata->csdev, false); + bool had_delayed_probe; /* * Taking hotplug lock here to avoid racing between etm4_remove_dev() * and CPU hotplug call backs. */ cpus_read_lock(); + + had_delayed_probe = per_cpu(delayed_probe, drvdata->cpu); + /* * The readers for etmdrvdata[] are CPU hotplug call backs * and PM notification call backs. Change etmdrvdata[i] on @@ -2085,12 +2155,15 @@ static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) * inside one call back function. */ if (smp_call_function_single(drvdata->cpu, clear_etmdrvdata, &drvdata->cpu, 1)) - etmdrvdata[drvdata->cpu] = NULL; + clear_etmdrvdata(&drvdata->cpu); cpus_read_unlock(); - cscfg_unregister_csdev(drvdata->csdev); - coresight_unregister(drvdata->csdev); + if (!had_delayed_probe) { + etm_perf_symlink(drvdata->csdev, false); + cscfg_unregister_csdev(drvdata->csdev); + coresight_unregister(drvdata->csdev); + } return 0; } From 8d0d129e94d4518fd17c13b4991ff10b7f4cd85a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 13:52:17 -0300 Subject: [PATCH 0693/4122] perf bpf: No need to include headers just use forward declarations In the bpf-prologue.h header we are just using pointers, so no need to include headers for that, just provide forward declarations for those types. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-prologue.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index c50c7358009f..66dcf751ef65 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -6,9 +6,8 @@ #ifndef __BPF_PROLOGUE_H #define __BPF_PROLOGUE_H -#include -#include -#include "probe-event.h" +struct probe_trace_arg; +struct bpf_insn; #define BPF_PROLOGUE_MAX_ARGS 3 #define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 @@ -19,6 +18,7 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, struct bpf_insn *new_prog, size_t *new_cnt, size_t cnt_space); #else +#include #include static inline int From 08043330167f1e21abe60ff7e124ed87d4fd029d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 17:02:23 -0300 Subject: [PATCH 0694/4122] perf branch: Remove some needless headers, add a needed one map_symbol.h is needed because we have structs that contains 'struct addr_map_symbol', so add it, remove the others. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index f838b23db180..94f36a187fe7 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -7,11 +7,9 @@ * detected in at least musl libc, used in Alpine Linux. -acme */ #include -#include -#include -#include #include #include +#include "util/map_symbol.h" #include "event.h" struct branch_flags { From 9823147da6c893d9295949e5ed982a8630deb6db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 17:24:27 -0300 Subject: [PATCH 0695/4122] perf tools: Move 'struct perf_sample' to a separate header file to disentangle headers Some places were including event.h just to get 'struct perf_sample', move it to a separate place so that we speed up a bit the build. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/unwind-libdw.c | 2 +- tools/perf/arch/arm64/util/machine.c | 1 + tools/perf/arch/arm64/util/unwind-libdw.c | 2 +- tools/perf/arch/powerpc/util/event.c | 1 + tools/perf/arch/powerpc/util/unwind-libdw.c | 2 +- tools/perf/arch/s390/util/unwind-libdw.c | 1 + tools/perf/arch/x86/tests/sample-parsing.c | 1 + tools/perf/arch/x86/util/event.c | 1 + tools/perf/arch/x86/util/unwind-libdw.c | 2 +- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-mem.c | 1 + tools/perf/tests/dlfilter-test.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 1 + tools/perf/tests/parse-no-sample-id-all.c | 1 + tools/perf/tests/perf-record.c | 1 + tools/perf/tests/perf-time-to-tsc.c | 1 + tools/perf/tests/sw-clock.c | 1 + tools/perf/tests/switch-tracking.c | 1 + tools/perf/util/amd-sample-raw.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/branch.h | 2 +- tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/event.h | 111 +---------------- tools/perf/util/evlist.c | 1 + .../intel-pt-decoder/intel-pt-insn-decoder.c | 1 + tools/perf/util/perf_regs.c | 2 +- tools/perf/util/s390-cpumsf.c | 1 + tools/perf/util/s390-sample-raw.c | 1 + tools/perf/util/sample.h | 117 ++++++++++++++++++ tools/perf/util/trace-event-scripting.c | 2 +- 31 files changed, 147 insertions(+), 118 deletions(-) create mode 100644 tools/perf/util/sample.h diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index b7692cb0c733..1834a0cd9ce3 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 41c1596e5207..235a0a1e1ec7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -7,6 +7,7 @@ #include "symbol.h" #include "callchain.h" #include "record.h" +#include "util/perf_regs.h" void arch__add_leaf_frame_record_opts(struct record_opts *opts) { diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index a50941629649..09385081bb03 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index cf430a4c55b9..77d8cc2b5691 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -9,6 +9,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "../../../util/sample.h" void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c index 7b2d96ec28e3..e616642c754c 100644 --- a/tools/perf/arch/powerpc/util/unwind-libdw.c +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -3,7 +3,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ static const int special_regs[3][2] = { diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c index 387c698cdd1b..7d92452d5287 100644 --- a/tools/perf/arch/s390/util/unwind-libdw.c +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -3,6 +3,7 @@ #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" #include "../../util/event.h" +#include "../../util/sample.h" #include "dwarf-regs-table.h" diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index bfbd3662b69e..690c7c07e90d 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -10,6 +10,7 @@ #include "event.h" #include "evsel.h" #include "debug.h" +#include "util/sample.h" #include "util/synthetic-events.h" #include "tests/tests.h" diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 55ff6aec10fd..a3acefe6d0c6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -10,6 +10,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "util/sample.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index eea2bf87232b..ef71e8bf80bf 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 17672790f123..4561bda0ce6a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -19,10 +19,10 @@ #include "util/data.h" #include "util/stat.h" #include "util/debug.h" -#include "util/event.h" #include "util/symbol.h" #include "util/session.h" #include "util/build-id.h" +#include "util/sample.h" #include "util/synthetic-events.h" #define MMAP_DEV_MAJOR 8 diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 923fb8316fda..dedd612eae5e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -20,6 +20,7 @@ #include "util/symbol.h" #include "util/pmu.h" #include "util/pmu-hybrid.h" +#include "util/sample.h" #include "util/string2.h" #include diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 84352d55347d..99aa72e425e4 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -33,6 +33,7 @@ #include "archinsn.h" #include "dlfilter.h" #include "tests.h" +#include "util/sample.h" #define MAP_START 0x400000 diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 8322fc2295fa..6377906c1318 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -10,6 +10,7 @@ #include "thread_map.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include #include #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index a7b2800652e4..888df8eca981 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -14,6 +14,7 @@ #include "util/mmap.h" #include #include +#include "util/sample.h" #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index d62e31595ab2..202f0a9a6796 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "header.h" #include "debug.h" +#include "util/sample.h" static int process_event(struct evlist **pevlist, union perf_event *event) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 7aa946aa886d..d82539e2ae64 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -11,6 +11,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index c3aaa1ddff29..efcd71c2738a 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,6 +20,7 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" +#include "util/sample.h" /* * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 9cd6fec375ee..4d7493fa0105 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -13,6 +13,7 @@ #include "util/evlist.h" #include "util/cpumap.h" #include "util/mmap.h" +#include "util/sample.h" #include "util/thread_map.h" #include #include diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 87f565c7f650..b3bd14b025a8 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -19,6 +19,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include "pmu.h" static int spin_sleep(void) diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 238305868644..b0e70ce9d87a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -16,6 +16,7 @@ #include "evlist.h" #include "sample-raw.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 46ada5ec3f9a..265d20cc126b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -59,6 +59,7 @@ #include #include "symbol/kallsyms.h" #include +#include "util/sample.h" /* * Make a group from 'leader' to 'last', requiring that the events were not diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 94f36a187fe7..d6017c9b1872 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -10,7 +10,7 @@ #include #include #include "util/map_symbol.h" -#include "event.h" +#include "util/sample.h" struct branch_flags { union { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 9e0aee276df8..c65cdaf6975e 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -34,6 +34,7 @@ #include #include "util.h" #include "clockid.h" +#include "util/sample.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 65495f6945b4..8b71ac1af81b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -15,6 +15,7 @@ struct dso; struct machine; struct perf_event_attr; +struct perf_sample; #ifdef __LP64__ /* @@ -42,61 +43,6 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) -/* number of register is bound by the number of bits in regs_dump::mask (64) */ -#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) - -struct regs_dump { - u64 abi; - u64 mask; - u64 *regs; - - /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; - u64 cache_mask; -}; - -struct stack_dump { - u16 offset; - u64 size; - char *data; -}; - -struct sample_read_value { - u64 value; - u64 id; /* only if PERF_FORMAT_ID */ - u64 lost; /* only if PERF_FORMAT_LOST */ -}; - -struct sample_read { - u64 time_enabled; - u64 time_running; - union { - struct { - u64 nr; - struct sample_read_value *values; - } group; - struct sample_read_value one; - }; -}; - -static inline size_t sample_read_value_size(u64 read_format) -{ - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_LOST) - return sizeof(struct sample_read_value); - else - return offsetof(struct sample_read_value, lost); -} - -static inline struct sample_read_value * -next_sample_read_value(struct sample_read_value *v, u64 read_format) -{ - return (void *)v + sample_read_value_size(read_format); -} - -#define sample_read_group__for_each(v, nr, rf) \ - for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) - struct ip_callchain { u64 nr; u64 ips[]; @@ -138,52 +84,6 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) -#define MAX_INSN 16 - -struct aux_sample { - u64 size; - void *data; -}; - -struct perf_sample { - u64 ip; - u32 pid, tid; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - u64 period; - u64 weight; - u64 transaction; - u64 insn_cnt; - u64 cyc_cnt; - u32 cpu; - u32 raw_size; - u64 data_src; - u64 phys_addr; - u64 data_page_size; - u64 code_page_size; - u64 cgroup; - u32 flags; - u32 machine_pid; - u32 vcpu; - u16 insn_len; - u8 cpumode; - u16 misc; - u16 ins_lat; - u16 p_stage_cyc; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ - char insn[MAX_INSN]; - void *raw_data; - struct ip_callchain *callchain; - struct branch_stack *branch_stack; - struct regs_dump user_regs; - struct regs_dump intr_regs; - struct stack_dump user_stack; - struct sample_read read; - struct aux_sample aux_sample; -}; - #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ @@ -342,15 +242,6 @@ struct perf_synth_intel_iflag_chg { u64 branch_ip; /* If via_branch */ }; -/* - * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get - * 8-byte alignment. - */ -static inline void *perf_sample__synth_ptr(struct perf_sample *sample) -{ - return sample->raw_data - 4; -} - static inline void *perf_synth__raw_data(void *p) { return p + 4; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6612b00949e7..112850d629cb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -29,6 +29,7 @@ #include "util/evsel_fprintf.h" #include "util/evlist-hybrid.h" #include "util/pmu.h" +#include "util/sample.h" #include #include #include diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 1376077183f7..22308dd93010 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -18,6 +18,7 @@ #include "intel-pt-insn-decoder.h" #include "dump-insn.h" +#include "util/sample.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 872dd3d38782..57a567ee2cea 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,7 +2,7 @@ #include #include #include "perf_regs.h" -#include "event.h" +#include "util/sample.h" int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f3fdad28a852..6fe478b0b61b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -163,6 +163,7 @@ #include "s390-cpumsf-kernel.h" #include "s390-cpumcf-kernel.h" #include "config.h" +#include "util/sample.h" struct s390_cpumsf { struct auxtrace auxtrace; diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 9a631d97471c..c10b891dbad6 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -28,6 +28,7 @@ #include "sample-raw.h" #include "s390-cpumcf-kernel.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static size_t ctrset_size(struct cf_ctrset_entry *set) { diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h new file mode 100644 index 000000000000..60ec79d4eea4 --- /dev/null +++ b/tools/perf/util/sample.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SAMPLE_H +#define __PERF_SAMPLE_H + +#include +#include + +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + +struct regs_dump { + u64 abi; + u64 mask; + u64 *regs; + + /* Cached values/mask filled by first register access. */ + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; + u64 cache_mask; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + +struct sample_read_value { + u64 value; + u64 id; /* only if PERF_FORMAT_ID */ + u64 lost; /* only if PERF_FORMAT_LOST */ +}; + +struct sample_read { + u64 time_enabled; + u64 time_running; + union { + struct { + u64 nr; + struct sample_read_value *values; + } group; + struct sample_read_value one; + }; +}; + +static inline size_t sample_read_value_size(u64 read_format) +{ + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_LOST) + return sizeof(struct sample_read_value); + else + return offsetof(struct sample_read_value, lost); +} + +static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format) +{ + return (void *)v + sample_read_value_size(read_format); +} + +#define sample_read_group__for_each(v, nr, rf) \ + for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) + +#define MAX_INSN 16 + +struct aux_sample { + u64 size; + void *data; +}; + +struct perf_sample { + u64 ip; + u32 pid, tid; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + u64 period; + u64 weight; + u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; + u32 cpu; + u32 raw_size; + u64 data_src; + u64 phys_addr; + u64 data_page_size; + u64 code_page_size; + u64 cgroup; + u32 flags; + u32 machine_pid; + u32 vcpu; + u16 insn_len; + u8 cpumode; + u16 misc; + u16 ins_lat; + u16 p_stage_cyc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + char insn[MAX_INSN]; + void *raw_data; + struct ip_callchain *callchain; + struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct regs_dump intr_regs; + struct stack_dump user_stack; + struct sample_read read; + struct aux_sample aux_sample; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +#endif /* __PERF_SAMPLE_H */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 7172ca05265f..636a010d929b 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -12,9 +12,9 @@ #include "debug.h" #include "trace-event.h" -#include "event.h" #include "evsel.h" #include +#include "util/sample.h" struct scripting_context *scripting_context; From 628d69995e66343266475e6afc76192f5878b605 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:49:58 -0300 Subject: [PATCH 0696/4122] perf kwork: Remove includes not needed in kwork.h Leave just some forward declarations for pointers, move the includes to where they are really needed. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kwork.c | 6 ++++++ tools/perf/util/bpf_kwork.c | 3 +++ tools/perf/util/kwork.h | 12 ++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index fb8c63656ad8..4ffbf5908070 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -6,10 +6,15 @@ */ #include "builtin.h" +#include "perf.h" #include "util/data.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/header.h" #include "util/kwork.h" #include "util/debug.h" +#include "util/session.h" #include "util/symbol.h" #include "util/thread.h" #include "util/string2.h" @@ -21,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c index b629dd679d3f..6eb2c78fd7f4 100644 --- a/tools/perf/util/bpf_kwork.c +++ b/tools/perf/util/bpf_kwork.c @@ -7,15 +7,18 @@ #include #include +#include #include #include #include #include "util/debug.h" +#include "util/evsel.h" #include "util/kwork.h" #include +#include #include "util/bpf_skel/kwork_trace.skel.h" diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h index 320c0a6d2e08..53b7327550b8 100644 --- a/tools/perf/util/kwork.h +++ b/tools/perf/util/kwork.h @@ -1,16 +1,16 @@ #ifndef PERF_UTIL_KWORK_H #define PERF_UTIL_KWORK_H -#include "perf.h" - #include "util/tool.h" -#include "util/event.h" -#include "util/evlist.h" -#include "util/session.h" #include "util/time-utils.h" -#include #include +#include +#include +#include + +struct perf_sample; +struct perf_session; enum kwork_class_type { KWORK_CLASS_IRQ, From 7e5c6f2c1aa2daa0d8aca657377450529f381fe6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: [PATCH 0697/4122] perf machine: Move machine__resolve() from event.h Its a machine method, so move it to machine.h, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 3 --- tools/perf/util/machine.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8b71ac1af81b..ea5bd1f62b0f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -337,9 +337,6 @@ int perf_event__process(struct perf_tool *tool, struct addr_location; -int machine__resolve(struct machine *machine, struct addr_location *al, - struct perf_sample *sample); - void addr_location__put(struct addr_location *al); struct thread; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 74935dfaa937..6267c1d6f232 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -305,4 +305,7 @@ int machine__create_extra_kernel_map(struct machine *machine, int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_MACHINE_H */ From d1e633e4cdc0d06cec82d4772c025f13c3b25a6c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: [PATCH 0698/4122] perf symbol: Move addr_location__put() from event.h Its a addr_location method, so move it to symbol.h, where 'struct addr_location' is, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 3 --- tools/perf/util/symbol.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ea5bd1f62b0f..bc6c1e2206cf 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -336,9 +336,6 @@ int perf_event__process(struct perf_tool *tool, struct machine *machine); struct addr_location; - -void addr_location__put(struct addr_location *al); - struct thread; bool is_bts_event(struct perf_event_attr *attr); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b893dcc8ea6..e297de14184c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -132,6 +132,8 @@ struct addr_location { s32 socket; }; +void addr_location__put(struct addr_location *al); + int dso__load(struct dso *dso, struct map *map); int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated); From cde5671268faf6419026ebd5e0f1783b3a84cf39 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: [PATCH 0699/4122] perf thread: Move thread__resolve() from event.h Its a thread method, so move it to thread.h, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 5 ----- tools/perf/util/thread.h | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index bc6c1e2206cf..6663a676eadc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -335,13 +335,8 @@ int perf_event__process(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine); -struct addr_location; -struct thread; - bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void thread__resolve(struct thread *thread, struct addr_location *al, - struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 241f300d7d6e..395c626699a9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -158,4 +158,7 @@ static inline bool thread__is_filtered(struct thread *thread) void thread__free_stitch_list(struct thread *thread); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_THREAD_H */ From fd8d5a3b076c033f5589186ac49d76e74b39f97f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:02:57 -0300 Subject: [PATCH 0700/4122] perf tests: Add missing event.h include It uses things like perf_event__name() but were not including event.h, where its prototype lives, fix it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/perf-record.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 6377906c1318..e68ca6229756 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -5,6 +5,7 @@ #include #include "debug.h" +#include "event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index d82539e2ae64..1c4feec1adff 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -5,6 +5,7 @@ #include #include +#include "event.h" #include "evlist.h" #include "evsel.h" #include "debug.h" From 606f70ab7f5ace535514a5fa6f9ad62ae6e515eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:42:52 -0300 Subject: [PATCH 0701/4122] perf mmap: Remove several unneeded includes from util/mmap.h Those headers are not needed in util/mmap.h, remove them. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cd4ccec7f361..f944c3cd5efa 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -2,18 +2,13 @@ #define __PERF_MMAP_H 1 #include -#include -#include #include -#include #include #include -#include #ifdef HAVE_AIO_SUPPORT #include #endif #include "auxtrace.h" -#include "event.h" #include "util/compress.h" struct aiocb; From 2e5a738abc69601a4ea4a3544ec29cab9189eaae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:44:07 -0300 Subject: [PATCH 0702/4122] perf evlist: Add missing util/event.h header Needed to get the event_attr_init() and perf_event_paranoid() prototypes that were being obtained indirectly, by sheer luck. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 112850d629cb..fbf3192bced9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -24,6 +24,7 @@ #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" +#include "util/event.h" #include "util/string2.h" #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" From 146edff3d7ed135269dae8abef3219083c45b21e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 12 Oct 2022 18:12:05 -0700 Subject: [PATCH 0703/4122] perf test: Parse events workaround for dash/minus Skip an event configuration for event names with a dash/minus in them. Events with a dash/minus in their name cause parsing issues as legacy encoding of events would use a dash/minus as a separator. The parser separates events with dashes into prefixes and suffixes and then recombines them. Unfortunately if an event has part of its name that matches a legacy token then the recombining fails. This is seen for branch-brs where branch is a legacy token. branch-brs was introduced to sysfs in: https://lore.kernel.org/all/20220322221517.2510440-5-eranian@google.com/ The failure is shown below as well as the workaround to use a config where the dash/minus isn't treated specially: ``` $ perf stat -e branch-brs true event syntax error: 'branch-brs' \___ parser error $ perf stat -e cpu/branch-brs/ true Performance counter stats for 'true': 46,179 cpu/branch-brs/ ``` Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221013011205.3151391-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 459afdb256a1..3440dd2616b0 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2237,6 +2237,19 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest pr_debug("Test PMU event failed for '%s'", name); ret = combine_test_results(ret, test_ret); } + /* + * Names containing '-' are recognized as prefixes and suffixes + * due to '-' being a legacy PMU separator. This fails when the + * prefix or suffix collides with an existing legacy token. For + * example, branch-brs has a prefix (branch) that collides with + * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix + * isn't expected after this. As event names in the config + * slashes are allowed a '-' in the name we check this works + * above. + */ + if (strchr(ent->d_name, '-')) + continue; + snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); e.name = name; e.check = test__checkevent_pmu_events_mix; From 005ef2b41b119caf65ac611109ce7ae68d857b46 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:25 -0700 Subject: [PATCH 0704/4122] perf lock contention: Fix memory sanitizer issue The msan reported a use-of-uninitialized-value warning for the struct lock_contention_data in lock_contention_read(). While it'd be filled by bpf_map_lookup_elem(), let's just initialize it to silence the warning. ==12524==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x562b0f16b1cd in lock_contention_read util/bpf_lock_contention.c:139:7 #1 0x562b0ef65ec6 in __cmd_contention builtin-lock.c:1737:3 #2 0x562b0ef65ec6 in cmd_lock builtin-lock.c:1992:8 #3 0x562b0ee7f50b in run_builtin perf.c:322:11 #4 0x562b0ee7efc1 in handle_internal_command perf.c:376:8 #5 0x562b0ee7e1e9 in run_argv perf.c:420:2 #6 0x562b0ee7e1e9 in main perf.c:550:3 #7 0x7f065f10e632 in __libc_start_main (/usr/lib64/libc.so.6+0x61632) #8 0x562b0edf2fa9 in _start (perf+0xfa9) SUMMARY: MemorySanitizer: use-of-uninitialized-value (perf+0xe15160) in lock_contention_read Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index fc4d613cb979..06466da792e4 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -110,7 +110,7 @@ int lock_contention_read(struct lock_contention *con) { int fd, stack; s32 prev_key, key; - struct lock_contention_data data; + struct lock_contention_data data = {}; struct lock_stat *st; struct machine *machine = con->machine; u64 stack_trace[con->max_stack]; From 0a277b6226703812ba90543d5bb49476e03f816e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:26 -0700 Subject: [PATCH 0705/4122] perf lock contention: Check --max-stack option The --max-stack option is used to allocate the BPF stack map and stack trace array in the userspace. Check the value properly before using. Practically it cannot be greater than the sysctl_perf_event_max_stack. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 66520712a167..6f79175365a8 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -24,6 +24,7 @@ #include "util/data.h" #include "util/string2.h" #include "util/map.h" +#include "util/util.h" #include #include @@ -1858,6 +1859,29 @@ static int parse_map_entry(const struct option *opt, const char *str, return 0; } +static int parse_max_stack(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + unsigned long *len = (unsigned long *)opt->value; + long val; + char *endptr; + + errno = 0; + val = strtol(str, &endptr, 0); + if (*endptr != '\0' || errno != 0) { + pr_err("invalid max stack depth: %s\n", str); + return -1; + } + + if (val < 0 || val > sysctl__max_stack()) { + pr_err("invalid max stack depth: %ld\n", val); + return -1; + } + + *len = val; + return 0; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { @@ -1913,9 +1937,9 @@ int cmd_lock(int argc, const char **argv) "Trace on existing thread id (exclusive to --pid)"), OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", "Max number of BPF map entries", parse_map_entry), - OPT_INTEGER(0, "max-stack", &max_stack_depth, - "Set the maximum stack depth when collecting lock contention, " - "Default: " __stringify(CONTENTION_STACK_DEPTH)), + OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", + "Set the maximum stack depth when collecting lopck contention, " + "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), OPT_INTEGER(0, "stack-skip", &stack_skip, "Set the number of stack depth to skip when finding a lock caller, " "Default: " __stringify(CONTENTION_STACK_SKIP)), From 9e9c5f3cf912c3e068b6c24d77f6f07ac242395b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:27 -0700 Subject: [PATCH 0706/4122] perf lock contention: Avoid variable length arrays The msan also warns about the use of VLA for stack_trace variable. We can dynamically allocate instead. While at it, simplify the error handle a bit (and fix bugs). Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++++--------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 06466da792e4..0deec1178778 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -108,28 +108,36 @@ int lock_contention_stop(void) int lock_contention_read(struct lock_contention *con) { - int fd, stack; + int fd, stack, err = 0; s32 prev_key, key; struct lock_contention_data data = {}; - struct lock_stat *st; + struct lock_stat *st = NULL; struct machine *machine = con->machine; - u64 stack_trace[con->max_stack]; + u64 *stack_trace; + size_t stack_size = con->max_stack * sizeof(*stack_trace); fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); con->lost = skel->bss->lost; + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + return -1; + prev_key = 0; while (!bpf_map_get_next_key(fd, &prev_key, &key)) { struct map *kmap; struct symbol *sym; int idx = 0; + /* to handle errors in the loop body */ + err = -1; + bpf_map_lookup_elem(fd, &key, &data); st = zalloc(sizeof(*st)); if (st == NULL) - return -1; + break; st->nr_contended = data.count; st->wait_time_total = data.total_time; @@ -163,25 +171,32 @@ int lock_contention_read(struct lock_contention *con) st->name = strdup(sym->name); if (ret < 0 || st->name == NULL) - return -1; + break; } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) { - free(st); - return -1; + break; } if (verbose) { - st->callstack = memdup(stack_trace, sizeof(stack_trace)); - if (st->callstack == NULL) { - free(st); - return -1; - } + st->callstack = memdup(stack_trace, stack_size); + if (st->callstack == NULL) + break; } hlist_add_head(&st->hash_entry, con->result); prev_key = key; + + /* we're fine now, reset the values */ + st = NULL; + err = 0; } - return 0; + free(stack_trace); + if (st) { + free(st->name); + free(st); + } + + return err; } int lock_contention_finish(void) From c940fa276b5a4210255e0cb9766f06e38443303a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:28 -0700 Subject: [PATCH 0707/4122] perf lock contention: Increase default stack skip to 4 In most configurations, it works well with skipping 4 entries by default. If some systems still have 3 BPF internal stack frames, the next frame should be in a lock function which will be skipped later when it tries to find a caller. So increasing to 4 won't affect such systems too. With --stack-skip=0, I can see something like this: 24 49.84 us 7.41 us 2.08 us mutex bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffffc045040e bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffffc045040e bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffff82ea2071 bpf_trace_run2+0x51 0xffffffff82de775b __bpf_trace_contention_begin+0xb 0xffffffff82c02045 __mutex_lock+0x245 0xffffffff82c019e3 __mutex_lock_slowpath+0x13 0xffffffff82c019c0 mutex_lock+0x20 0xffffffff830a083c kernfs_iop_permission+0x2c Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221028180128.3311491-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/lock-contention.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b8cb8830b7bc..e3c061b1795b 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -91,7 +91,7 @@ struct thread_stat { * Number of stack trace entries to skip when finding callers. * The first few entries belong to the locking implementation itself. */ -#define CONTENTION_STACK_SKIP 3 +#define CONTENTION_STACK_SKIP 4 /* * flags for lock:contention_begin From 98c148cd5c73a4cf1d78319e309a79eef50b4220 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 31 Oct 2022 16:18:08 +0100 Subject: [PATCH 0708/4122] power: supply: core: repair kernel-doc for power_supply_vbat2ri() The function power_supply_vbat2ri() does not have a parameter called table, despite it being mentioned in the kernel-doc comment. The table is actually obtained from the info parameter, the battery information container. Hence, ./scripts/kernel-doc -none drivers/power/supply/power_supply_core.c warns about this excess function parameter. Adjust the kernel-doc comment for power_supply_vbat2ri() for make W=1 happiness. Fixes: e9e7d165b4b0 ("power: supply: Support VBAT-to-Ri lookup tables") Signed-off-by: Lukas Bulwahn Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 4b5fb172fa99..9035e349bf53 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -870,7 +870,6 @@ EXPORT_SYMBOL_GPL(power_supply_temp2resist_simple); * power_supply_vbat2ri() - find the battery internal resistance * from the battery voltage * @info: The battery information container - * @table: Pointer to battery resistance temperature table * @vbat_uv: The battery voltage in microvolt * @charging: If we are charging (true) or not (false) * From 9a265e04b99dd2e9c2fac26875e120c777f14f43 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 31 Oct 2022 09:36:21 +0200 Subject: [PATCH 0709/4122] power: supply: bd99954: Use LINEAR_RANGE() Do minor clean-up by using newly inroduced LINEAR_RANGE() initialization macro. Signed-off-by: Matti Vaittinen Signed-off-by: Sebastian Reichel --- drivers/power/supply/bd99954-charger.c | 84 ++++---------------------- 1 file changed, 13 insertions(+), 71 deletions(-) diff --git a/drivers/power/supply/bd99954-charger.c b/drivers/power/supply/bd99954-charger.c index 96e93e1b8094..250362e15c98 100644 --- a/drivers/power/supply/bd99954-charger.c +++ b/drivers/power/supply/bd99954-charger.c @@ -768,27 +768,13 @@ static const struct power_supply_desc bd9995x_power_supply_desc = { * Describe the setting in linear_range table. */ static const struct linear_range input_current_limit_ranges[] = { - { - .min = 0, - .step = 32000, - .min_sel = 0x0, - .max_sel = 0x1ff, - }, + LINEAR_RANGE(0, 0x0, 0x1ff, 32000), }; /* Possible trickle, pre-charging and termination current values */ static const struct linear_range charging_current_ranges[] = { - { - .min = 0, - .step = 64000, - .min_sel = 0x0, - .max_sel = 0x10, - }, { - .min = 1024000, - .step = 0, - .min_sel = 0x11, - .max_sel = 0x1f, - }, + LINEAR_RANGE(0, 0x0, 0x10, 64000), + LINEAR_RANGE(1024000, 0x11, 0x1f, 0), }; /* @@ -796,72 +782,28 @@ static const struct linear_range charging_current_ranges[] = { * and battery over voltage protection have same possible values */ static const struct linear_range charge_voltage_regulation_ranges[] = { - { - .min = 2560000, - .step = 0, - .min_sel = 0, - .max_sel = 0xA0, - }, { - .min = 2560000, - .step = 16000, - .min_sel = 0xA0, - .max_sel = 0x4B0, - }, { - .min = 19200000, - .step = 0, - .min_sel = 0x4B0, - .max_sel = 0x7FF, - }, + LINEAR_RANGE(2560000, 0, 0xA0, 0), + LINEAR_RANGE(2560000, 0xA0, 0x4B0, 16000), + LINEAR_RANGE(19200000, 0x4B0, 0x7FF, 0), }; /* Possible VSYS voltage regulation values */ static const struct linear_range vsys_voltage_regulation_ranges[] = { - { - .min = 2560000, - .step = 0, - .min_sel = 0, - .max_sel = 0x28, - }, { - .min = 2560000, - .step = 64000, - .min_sel = 0x28, - .max_sel = 0x12C, - }, { - .min = 19200000, - .step = 0, - .min_sel = 0x12C, - .max_sel = 0x1FF, - }, + LINEAR_RANGE(2560000, 0, 0x28, 0), + LINEAR_RANGE(2560000, 0x28, 0x12C, 64000), + LINEAR_RANGE(19200000, 0x12C, 0x1FF, 0), }; /* Possible settings for switching from trickle to pre-charging limits */ static const struct linear_range trickle_to_pre_threshold_ranges[] = { - { - .min = 2048000, - .step = 0, - .min_sel = 0, - .max_sel = 0x20, - }, { - .min = 2048000, - .step = 64000, - .min_sel = 0x20, - .max_sel = 0x12C, - }, { - .min = 19200000, - .step = 0, - .min_sel = 0x12C, - .max_sel = 0x1FF - } + LINEAR_RANGE(2048000, 0, 0x20, 0), + LINEAR_RANGE(2048000, 0x20, 0x12C, 64000), + LINEAR_RANGE(19200000, 0x12C, 0x1FF, 0), }; /* Possible current values for fast-charging constant current phase */ static const struct linear_range fast_charge_current_ranges[] = { - { - .min = 0, - .step = 64000, - .min_sel = 0, - .max_sel = 0xFF, - } + LINEAR_RANGE(0, 0, 0xFF, 64000), }; struct battery_init { From eae063f64714c2ab92eda900031c5e34a5597201 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 31 Oct 2022 11:25:00 +0200 Subject: [PATCH 0710/4122] power: supply: mt6360: Use LINEAR_RANGE_IDX() Do minor clean-up by using the newly inroduced LINEAR_RANGE_IDX() initialization macro. Signed-off-by: Matti Vaittinen Signed-off-by: Sebastian Reichel --- drivers/power/supply/mt6360_charger.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c index 3abaa72e0668..92e48e3a4853 100644 --- a/drivers/power/supply/mt6360_charger.c +++ b/drivers/power/supply/mt6360_charger.c @@ -113,16 +113,13 @@ enum { MT6360_RANGE_MAX, }; -#define MT6360_LINEAR_RANGE(idx, _min, _min_sel, _max_sel, _step) \ - [idx] = REGULATOR_LINEAR_RANGE(_min, _min_sel, _max_sel, _step) - static const struct linear_range mt6360_chg_range[MT6360_RANGE_MAX] = { - MT6360_LINEAR_RANGE(MT6360_RANGE_VMIVR, 3900000, 0, 0x5F, 100000), - MT6360_LINEAR_RANGE(MT6360_RANGE_ICHG, 100000, 0, 0x31, 100000), - MT6360_LINEAR_RANGE(MT6360_RANGE_VOREG, 3900000, 0, 0x51, 10000), - MT6360_LINEAR_RANGE(MT6360_RANGE_AICR, 100000, 0, 0x3F, 50000), - MT6360_LINEAR_RANGE(MT6360_RANGE_IPREC, 100000, 0, 0x0F, 50000), - MT6360_LINEAR_RANGE(MT6360_RANGE_IEOC, 100000, 0, 0x0F, 50000), + LINEAR_RANGE_IDX(MT6360_RANGE_VMIVR, 3900000, 0, 0x5F, 100000), + LINEAR_RANGE_IDX(MT6360_RANGE_ICHG, 100000, 0, 0x31, 100000), + LINEAR_RANGE_IDX(MT6360_RANGE_VOREG, 3900000, 0, 0x51, 10000), + LINEAR_RANGE_IDX(MT6360_RANGE_AICR, 100000, 0, 0x3F, 50000), + LINEAR_RANGE_IDX(MT6360_RANGE_IPREC, 100000, 0, 0x0F, 50000), + LINEAR_RANGE_IDX(MT6360_RANGE_IEOC, 100000, 0, 0x0F, 50000), }; struct mt6360_chg_info { From e9e6fa49dbab6d84c676666f3fe7d360497fd65b Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 28 Oct 2022 20:33:20 +0800 Subject: [PATCH 0711/4122] apparmor: Fix memleak in alloc_ns() After changes in commit a1bd627b46d1 ("apparmor: share profile name on replacement"), the hname member of struct aa_policy is not valid slab object, but a subset of that, it can not be freed by kfree_sensitive(), use aa_policy_destroy() to fix it. Fixes: a1bd627b46d1 ("apparmor: share profile name on replacement") Signed-off-by: Xiu Jianfeng Signed-off-by: John Johansen --- security/apparmor/policy_ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 5c38563a6dcf..fd5b7afbcb48 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -132,7 +132,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name) return ns; fail_unconfined: - kfree_sensitive(ns->base.hname); + aa_policy_destroy(&ns->base); fail_ns: kfree_sensitive(ns); return NULL; From 969864efae78eb51b0baa1d14b2dfe08151b5874 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Tue, 25 Oct 2022 23:41:24 +0530 Subject: [PATCH 0712/4122] i2c: amd-mp2: use msix/msi if the hardware supports Use msix or msi interrupts if the hardware supports it. Else, fallback to legacy interrupts. Co-developed-by: Basavaraj Natikar Signed-off-by: Basavaraj Natikar Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd-mp2-pci.c | 30 +++++++++++++++++++--------- drivers/i2c/busses/i2c-amd-mp2.h | 1 + 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd-mp2-pci.c b/drivers/i2c/busses/i2c-amd-mp2-pci.c index f57077a7448d..143165300949 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-pci.c +++ b/drivers/i2c/busses/i2c-amd-mp2-pci.c @@ -288,7 +288,7 @@ static void amd_mp2_clear_reg(struct amd_mp2_dev *privdata) static int amd_mp2_pci_init(struct amd_mp2_dev *privdata, struct pci_dev *pci_dev) { - int rc; + int irq_flag = 0, rc; pci_set_drvdata(pci_dev, privdata); @@ -311,17 +311,29 @@ static int amd_mp2_pci_init(struct amd_mp2_dev *privdata, if (rc) goto err_dma_mask; - /* Set up intx irq */ + /* request and enable interrupt */ writel(0, privdata->mmio + AMD_P2C_MSG_INTEN); - pci_intx(pci_dev, 1); - rc = devm_request_irq(&pci_dev->dev, pci_dev->irq, amd_mp2_irq_isr, - IRQF_SHARED, dev_name(&pci_dev->dev), privdata); - if (rc) - pci_err(pci_dev, "Failure requesting irq %i: %d\n", - pci_dev->irq, rc); + rc = pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_ALL_TYPES); + if (rc < 0) { + dev_err(&pci_dev->dev, "Failed to allocate single IRQ err=%d\n", rc); + goto err_dma_mask; + } + + privdata->dev_irq = pci_irq_vector(pci_dev, 0); + if (!pci_dev->msix_enabled && !pci_dev->msi_enabled) + irq_flag = IRQF_SHARED; + + rc = devm_request_irq(&pci_dev->dev, privdata->dev_irq, + amd_mp2_irq_isr, irq_flag, dev_name(&pci_dev->dev), privdata); + if (rc) { + pci_err(pci_dev, "Failure requesting irq %i: %d\n", privdata->dev_irq, rc); + goto free_irq_vectors; + } return rc; +free_irq_vectors: + free_irq(privdata->dev_irq, privdata); err_dma_mask: pci_clear_master(pci_dev); err_pci_enable: @@ -364,7 +376,7 @@ static void amd_mp2_pci_remove(struct pci_dev *pci_dev) pm_runtime_forbid(&pci_dev->dev); pm_runtime_get_noresume(&pci_dev->dev); - pci_intx(pci_dev, 0); + free_irq(privdata->dev_irq, privdata); pci_clear_master(pci_dev); amd_mp2_clear_reg(privdata); diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h index ddecd0c88656..018a42de8b1e 100644 --- a/drivers/i2c/busses/i2c-amd-mp2.h +++ b/drivers/i2c/busses/i2c-amd-mp2.h @@ -183,6 +183,7 @@ struct amd_mp2_dev { struct mutex c2p_lock; u8 c2p_lock_busid; unsigned int probed; + int dev_irq; }; /* PCIe communication driver */ From bb2617f0f2abbd8c622b2401e5e4984a4eef895b Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sun, 9 Oct 2022 12:16:30 +0200 Subject: [PATCH 0713/4122] dt-bindings: i2c: update bindings for mt7986 soc Add i2c compatible for MT7986 SOC. Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml index 4e730fb7be56..421563bf576c 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml @@ -23,6 +23,7 @@ properties: - const: mediatek,mt6577-i2c - const: mediatek,mt6589-i2c - const: mediatek,mt7622-i2c + - const: mediatek,mt7986-i2c - const: mediatek,mt8168-i2c - const: mediatek,mt8173-i2c - const: mediatek,mt8183-i2c From e0b7afc0eba88e8270860a573fe4ea28fe2d467c Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sun, 9 Oct 2022 12:16:31 +0200 Subject: [PATCH 0714/4122] i2c: mediatek: add mt7986 support Add i2c support for MT7986 SoC. Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index fc7bfd98156b..d80e59340d97 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -431,6 +431,19 @@ static const struct mtk_i2c_compatible mt8168_compat = { .max_dma_support = 33, }; +static const struct mtk_i2c_compatible mt7986_compat = { + .quirks = &mt7622_i2c_quirks, + .regs = mt_i2c_regs_v1, + .pmic_i2c = 0, + .dcm = 1, + .auto_restart = 1, + .aux_len_reg = 1, + .timing_adjust = 0, + .dma_sync = 1, + .ltiming_adjust = 0, + .max_dma_support = 32, +}; + static const struct mtk_i2c_compatible mt8173_compat = { .regs = mt_i2c_regs_v1, .pmic_i2c = 0, @@ -503,6 +516,7 @@ static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt6577-i2c", .data = &mt6577_compat }, { .compatible = "mediatek,mt6589-i2c", .data = &mt6589_compat }, { .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat }, + { .compatible = "mediatek,mt7986-i2c", .data = &mt7986_compat }, { .compatible = "mediatek,mt8168-i2c", .data = &mt8168_compat }, { .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat }, { .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat }, From a826b6e9e467ed378a2c50c4a03cb863ab681198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 8 Oct 2022 14:59:23 +0200 Subject: [PATCH 0715/4122] i2c: npcm7xx: Group bank 0/1 registers together for readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unlabelled registers NPCM_I2CCTL4 to NPCM_I2CSCLHT overlap with the bank 1 registers below, and they are accessed after selecting bank 0, so they clearly belong to bank 0. Move them together with the other bank 0 registers, and move the unrelated definition of npcm_i2caddr down to keep the banked registers in one piece. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Tali Perry Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..9a7a2d0bf576 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -106,7 +106,7 @@ enum i2c_addr { #define NPCM_I2CCST3 0x19 #define I2C_VER 0x1F -/*BANK0 regs*/ +/* BANK 0 regs */ #define NPCM_I2CADDR3 0x10 #define NPCM_I2CADDR7 0x11 #define NPCM_I2CADDR4 0x12 @@ -115,6 +115,20 @@ enum i2c_addr { #define NPCM_I2CADDR9 0x15 #define NPCM_I2CADDR6 0x16 #define NPCM_I2CADDR10 0x17 +#define NPCM_I2CCTL4 0x1A +#define NPCM_I2CCTL5 0x1B +#define NPCM_I2CSCLLT 0x1C /* SCL Low Time */ +#define NPCM_I2CFIF_CTL 0x1D /* FIFO Control */ +#define NPCM_I2CSCLHT 0x1E /* SCL High Time */ + +/* BANK 1 regs */ +#define NPCM_I2CFIF_CTS 0x10 /* Both FIFOs Control and Status */ +#define NPCM_I2CTXF_CTL 0x12 /* Tx-FIFO Control */ +#define NPCM_I2CT_OUT 0x14 /* Bus T.O. */ +#define NPCM_I2CPEC 0x16 /* PEC Data */ +#define NPCM_I2CTXF_STS 0x1A /* Tx-FIFO Status */ +#define NPCM_I2CRXF_STS 0x1C /* Rx-FIFO Status */ +#define NPCM_I2CRXF_CTL 0x1E /* Rx-FIFO Control */ #if IS_ENABLED(CONFIG_I2C_SLAVE) /* @@ -131,21 +145,6 @@ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = { }; #endif -#define NPCM_I2CCTL4 0x1A -#define NPCM_I2CCTL5 0x1B -#define NPCM_I2CSCLLT 0x1C /* SCL Low Time */ -#define NPCM_I2CFIF_CTL 0x1D /* FIFO Control */ -#define NPCM_I2CSCLHT 0x1E /* SCL High Time */ - -/* BANK 1 regs */ -#define NPCM_I2CFIF_CTS 0x10 /* Both FIFOs Control and Status */ -#define NPCM_I2CTXF_CTL 0x12 /* Tx-FIFO Control */ -#define NPCM_I2CT_OUT 0x14 /* Bus T.O. */ -#define NPCM_I2CPEC 0x16 /* PEC Data */ -#define NPCM_I2CTXF_STS 0x1A /* Tx-FIFO Status */ -#define NPCM_I2CRXF_STS 0x1C /* Rx-FIFO Status */ -#define NPCM_I2CRXF_CTL 0x1E /* Rx-FIFO Control */ - /* NPCM_I2CST reg fields */ #define NPCM_I2CST_XMIT BIT(0) #define NPCM_I2CST_MASTER BIT(1) From 3ca8217dc450f7a50f90d2ad97787e38088af8e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 8 Oct 2022 14:59:24 +0200 Subject: [PATCH 0716/4122] i2c: npcm7xx: Annotate register field definitions with longer names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the code easier to understand, add longer names to the definitions of register fields. These longer names are based on source code published by DELL/AESS for WPCM450, but should apply just as well to NPCM7xx and NPCM8xx. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Tali Perry Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 56 ++++++++++++++++---------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 9a7a2d0bf576..bbc7359e67f7 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -146,50 +146,50 @@ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = { #endif /* NPCM_I2CST reg fields */ -#define NPCM_I2CST_XMIT BIT(0) -#define NPCM_I2CST_MASTER BIT(1) -#define NPCM_I2CST_NMATCH BIT(2) -#define NPCM_I2CST_STASTR BIT(3) -#define NPCM_I2CST_NEGACK BIT(4) -#define NPCM_I2CST_BER BIT(5) -#define NPCM_I2CST_SDAST BIT(6) -#define NPCM_I2CST_SLVSTP BIT(7) +#define NPCM_I2CST_XMIT BIT(0) /* Transmit mode */ +#define NPCM_I2CST_MASTER BIT(1) /* Master mode */ +#define NPCM_I2CST_NMATCH BIT(2) /* New match */ +#define NPCM_I2CST_STASTR BIT(3) /* Stall after start */ +#define NPCM_I2CST_NEGACK BIT(4) /* Negative ACK */ +#define NPCM_I2CST_BER BIT(5) /* Bus error */ +#define NPCM_I2CST_SDAST BIT(6) /* SDA status */ +#define NPCM_I2CST_SLVSTP BIT(7) /* Slave stop */ /* NPCM_I2CCST reg fields */ -#define NPCM_I2CCST_BUSY BIT(0) -#define NPCM_I2CCST_BB BIT(1) -#define NPCM_I2CCST_MATCH BIT(2) -#define NPCM_I2CCST_GCMATCH BIT(3) -#define NPCM_I2CCST_TSDA BIT(4) -#define NPCM_I2CCST_TGSCL BIT(5) -#define NPCM_I2CCST_MATCHAF BIT(6) -#define NPCM_I2CCST_ARPMATCH BIT(7) +#define NPCM_I2CCST_BUSY BIT(0) /* Busy */ +#define NPCM_I2CCST_BB BIT(1) /* Bus busy */ +#define NPCM_I2CCST_MATCH BIT(2) /* Address match */ +#define NPCM_I2CCST_GCMATCH BIT(3) /* Global call match */ +#define NPCM_I2CCST_TSDA BIT(4) /* Test SDA line */ +#define NPCM_I2CCST_TGSCL BIT(5) /* Toggle SCL line */ +#define NPCM_I2CCST_MATCHAF BIT(6) /* Match address field */ +#define NPCM_I2CCST_ARPMATCH BIT(7) /* ARP address match */ /* NPCM_I2CCTL1 reg fields */ -#define NPCM_I2CCTL1_START BIT(0) -#define NPCM_I2CCTL1_STOP BIT(1) -#define NPCM_I2CCTL1_INTEN BIT(2) +#define NPCM_I2CCTL1_START BIT(0) /* Generate start condition */ +#define NPCM_I2CCTL1_STOP BIT(1) /* Generate stop condition */ +#define NPCM_I2CCTL1_INTEN BIT(2) /* Interrupt enable */ #define NPCM_I2CCTL1_EOBINTE BIT(3) #define NPCM_I2CCTL1_ACK BIT(4) -#define NPCM_I2CCTL1_GCMEN BIT(5) -#define NPCM_I2CCTL1_NMINTE BIT(6) -#define NPCM_I2CCTL1_STASTRE BIT(7) +#define NPCM_I2CCTL1_GCMEN BIT(5) /* Global call match enable */ +#define NPCM_I2CCTL1_NMINTE BIT(6) /* New match interrupt enable */ +#define NPCM_I2CCTL1_STASTRE BIT(7) /* Stall after start enable */ /* RW1S fields (inside a RW reg): */ #define NPCM_I2CCTL1_RWS \ (NPCM_I2CCTL1_START | NPCM_I2CCTL1_STOP | NPCM_I2CCTL1_ACK) /* npcm_i2caddr reg fields */ -#define NPCM_I2CADDR_A GENMASK(6, 0) -#define NPCM_I2CADDR_SAEN BIT(7) +#define NPCM_I2CADDR_A GENMASK(6, 0) /* Address */ +#define NPCM_I2CADDR_SAEN BIT(7) /* Slave address enable */ /* NPCM_I2CCTL2 reg fields */ -#define I2CCTL2_ENABLE BIT(0) -#define I2CCTL2_SCLFRQ6_0 GENMASK(7, 1) +#define I2CCTL2_ENABLE BIT(0) /* Module enable */ +#define I2CCTL2_SCLFRQ6_0 GENMASK(7, 1) /* Bits 0:6 of frequency divisor */ /* NPCM_I2CCTL3 reg fields */ -#define I2CCTL3_SCLFRQ8_7 GENMASK(1, 0) -#define I2CCTL3_ARPMEN BIT(2) +#define I2CCTL3_SCLFRQ8_7 GENMASK(1, 0) /* Bits 7:8 of frequency divisor */ +#define I2CCTL3_ARPMEN BIT(2) /* ARP match enable */ #define I2CCTL3_IDL_START BIT(3) #define I2CCTL3_400K_MODE BIT(4) #define I2CCTL3_BNK_SEL BIT(5) From b1f37ef655cf372f96015bf54abdb76a91aff27e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 31 Oct 2022 11:10:56 +0100 Subject: [PATCH 0717/4122] x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces Lukas reported someone fat fingered the CONFIG_ symbol; fix er up. Fixes: 5d8213864ade ("x86/retbleed: Add SKL return thunk") Reported-by: Lukas Bulwahn Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y1+fL4qQEIGZEEKB@hirez.programming.kicks-ass.net --- arch/x86/include/asm/nospec-branch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 82580adbca4b..3ab90f23e7f7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -285,7 +285,7 @@ */ .macro UNTRAIN_RET #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_X86_FEATURE_CALL_DEPTH) + defined(CONFIG_CALL_DEPTH_TRACKING) ANNOTATE_UNRET_END ALTERNATIVE_3 "", \ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ @@ -296,7 +296,7 @@ .macro UNTRAIN_RET_FROM_CALL #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_X86_FEATURE_CALL_DEPTH) + defined(CONFIG_CALL_DEPTH_TRACKING) ANNOTATE_UNRET_END ALTERNATIVE_3 "", \ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ From 5ebddd7c4951c50142bcb239d4c6a82eff15759e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Oct 2022 15:26:51 +0200 Subject: [PATCH 0718/4122] kallsyms: Revert "Take callthunks into account" This is a full revert of commit: f1389181622a ("kallsyms: Take callthunks into account") The commit assumes a number of things that are not quite right. Notably it assumes every symbol has PADDING_BYTES in front of it that are not claimed by another symbol. This is not true; even when compiled with: -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES} Notably things like .cold subfunctions do not need to adhere to this change in ABI. It it also not true when build with CFI_CLANG, which claims these PADDING_BYTES in the __cfi_##name symbol. Once the prefix bytes are not consistent and or otherwise claimed the approach this patch takes goes out the window and kallsym resolution will report invalid symbol names. Therefore revert this to make room for another approach. Reported-by: Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yujie Liu Link: https://lore.kernel.org/r/202210241614.2ae4c1f5-yujie.liu@intel.com Link: https://lkml.kernel.org/r/20221028194453.330970755@infradead.org --- kernel/kallsyms.c | 45 +++++---------------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index cc244c02b4cf..60c20f301a6b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -293,12 +293,6 @@ static unsigned long get_symbol_pos(unsigned long addr, return low; } -#ifdef CONFIG_FUNCTION_PADDING_BYTES -#define PADDING_BYTES CONFIG_FUNCTION_PADDING_BYTES -#else -#define PADDING_BYTES 0 -#endif - /* * Lookup an address but don't bother to find any names. */ @@ -306,25 +300,13 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { char namebuf[KSYM_NAME_LEN]; - int ret; - - addr += PADDING_BYTES; if (is_ksym_addr(addr)) { get_symbol_pos(addr, symbolsize, offset); - ret = 1; - goto found; + return 1; } - - ret = !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf); - if (!ret) { - ret = !!__bpf_address_lookup(addr, symbolsize, - offset, namebuf); - } -found: - if (ret && offset) - *offset -= PADDING_BYTES; - return ret; + return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) || + !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } static const char *kallsyms_lookup_buildid(unsigned long addr, @@ -337,8 +319,6 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; - addr += PADDING_BYTES; - if (is_ksym_addr(addr)) { unsigned long pos; @@ -368,8 +348,6 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, found: cleanup_symbol_name(namebuf); - if (ret && offset) - *offset -= PADDING_BYTES; return ret; } @@ -396,8 +374,6 @@ int lookup_symbol_name(unsigned long addr, char *symname) symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; - addr += PADDING_BYTES; - if (is_ksym_addr(addr)) { unsigned long pos; @@ -425,8 +401,6 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, name[0] = '\0'; name[KSYM_NAME_LEN - 1] = '\0'; - addr += PADDING_BYTES; - if (is_ksym_addr(addr)) { unsigned long pos; @@ -443,8 +417,6 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, return res; found: - if (offset) - *offset -= PADDING_BYTES; cleanup_symbol_name(name); return 0; } @@ -470,15 +442,8 @@ static int __sprint_symbol(char *buffer, unsigned long address, len = strlen(buffer); offset -= symbol_offset; - if (add_offset) { - char s = '+'; - - if ((long)offset < 0) { - s = '-'; - offset = 0UL - offset; - } - len += sprintf(buffer + len, "%c%#lx/%#lx", s, offset, size); - } + if (add_offset) + len += sprintf(buffer + len, "+%#lx/%#lx", offset, size); if (modname) { len += sprintf(buffer + len, " [%s", modname); From 4c91be8e926c6b3734d59b9348e305431484d42b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Oct 2022 15:49:26 +0200 Subject: [PATCH 0719/4122] objtool: Slice up elf_create_section_symbol() In order to facilitate creation of more symbol types, slice up elf_create_section_symbol() to extract a generic helper that deals with adding ELF symbols. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yujie Liu Link: https://lkml.kernel.org/r/20221028194453.396634875@infradead.org --- tools/objtool/elf.c | 56 ++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 89b37cd4ab1d..3ad89d963e59 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -717,11 +717,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } static struct symbol * -elf_create_section_symbol(struct elf *elf, struct section *sec) +__elf_create_symbol(struct elf *elf, struct symbol *sym) { struct section *symtab, *symtab_shndx; Elf32_Word first_non_local, new_idx; - struct symbol *sym, *old; + struct symbol *old; symtab = find_section_by_name(elf, ".symtab"); if (symtab) { @@ -731,27 +731,16 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) return NULL; } - sym = calloc(1, sizeof(*sym)); - if (!sym) { - perror("malloc"); - return NULL; - } + new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; - sym->name = sec->name; - sym->sec = sec; - - // st_name 0 - sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); - // st_other 0 - // st_value 0 - // st_size 0 + if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL) + goto non_local; /* * Move the first global symbol, as per sh_info, into a new, higher * symbol index. This fees up a spot for a new local symbol. */ first_non_local = symtab->sh.sh_info; - new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; old = find_symbol_by_index(elf, first_non_local); if (old) { old->idx = new_idx; @@ -769,18 +758,43 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) new_idx = first_non_local; } + /* + * Either way, we will add a LOCAL symbol. + */ + symtab->sh.sh_info += 1; + +non_local: sym->idx = new_idx; if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { WARN("elf_update_symbol"); return NULL; } - /* - * Either way, we added a LOCAL symbol. - */ - symtab->sh.sh_info += 1; + return sym; +} - elf_add_symbol(elf, sym); +static struct symbol * +elf_create_section_symbol(struct elf *elf, struct section *sec) +{ + struct symbol *sym = calloc(1, sizeof(*sym)); + + if (!sym) { + perror("malloc"); + return NULL; + } + + sym->name = sec->name; + sym->sec = sec; + + // st_name 0 + sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); + // st_other 0 + // st_value 0 + // st_size 0 + + sym = __elf_create_symbol(elf, sym); + if (sym) + elf_add_symbol(elf, sym); return sym; } From 13f60e80e15dd0657c90bcca372ba045630ed9de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Oct 2022 20:29:51 +0200 Subject: [PATCH 0720/4122] objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf Due to how gelf_update_sym*() requires an Elf_Data pointer, and how libelf keeps Elf_Data in a linked list per section, elf_update_symbol() ends up having to iterate this list on each update to find the correct Elf_Data for the index'ed symbol. By allocating one Elf_Data per new symbol, the list grows per new symbol, giving an effective O(n^2) insertion time. This is obviously bloody terrible. Therefore over-allocate the Elf_Data when an extention is needed. Except it turns out libelf disregards Elf_Scn::sh_size in favour of the sum of Elf_Data::d_size. IOW it will happily write out all the unused space and fill it with: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND entries (aka zeros). Which obviously violates the STB_LOCAL placement rule, and is a general pain in the backside for not being the desired behaviour. Manually fix-up the Elf_Data size to avoid this problem before calling elf_update(). This significantly improves performance when adding a significant number of symbols. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yujie Liu Link: https://lkml.kernel.org/r/20221028194453.461658986@infradead.org --- tools/objtool/elf.c | 89 +++++++++++++++++++++++++++-- tools/objtool/include/objtool/elf.h | 2 +- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ad89d963e59..36dc78796f58 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -634,6 +634,12 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, /* end-of-list */ if (!symtab_data) { + /* + * Over-allocate to avoid O(n^2) symbol creation + * behaviour. The down side is that libelf doesn't + * like this; see elf_truncate_section() for the fixup. + */ + int num = max(1U, sym->idx/3); void *buf; if (idx) { @@ -647,28 +653,34 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, if (t) shndx_data = elf_newdata(t); - buf = calloc(1, entsize); + buf = calloc(num, entsize); if (!buf) { WARN("malloc"); return -1; } symtab_data->d_buf = buf; - symtab_data->d_size = entsize; + symtab_data->d_size = num * entsize; symtab_data->d_align = 1; symtab_data->d_type = ELF_T_SYM; - symtab->sh.sh_size += entsize; symtab->changed = true; + symtab->truncate = true; if (t) { - shndx_data->d_buf = &sym->sec->idx; - shndx_data->d_size = sizeof(Elf32_Word); + buf = calloc(num, sizeof(Elf32_Word)); + if (!buf) { + WARN("malloc"); + return -1; + } + + shndx_data->d_buf = buf; + shndx_data->d_size = num * sizeof(Elf32_Word); shndx_data->d_align = sizeof(Elf32_Word); shndx_data->d_type = ELF_T_WORD; - symtab_shndx->sh.sh_size += sizeof(Elf32_Word); symtab_shndx->changed = true; + symtab_shndx->truncate = true; } break; @@ -770,6 +782,14 @@ non_local: return NULL; } + symtab->sh.sh_size += symtab->sh.sh_entsize; + symtab->changed = true; + + if (symtab_shndx) { + symtab_shndx->sh.sh_size += sizeof(Elf32_Word); + symtab_shndx->changed = true; + } + return sym; } @@ -1286,6 +1306,60 @@ int elf_write_reloc(struct elf *elf, struct reloc *reloc) return 0; } +/* + * When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size + * do you: + * + * A) adhere to the section header and truncate the data, or + * B) ignore the section header and write out all the data you've got? + * + * Yes, libelf sucks and we need to manually truncate if we over-allocate data. + */ +static int elf_truncate_section(struct elf *elf, struct section *sec) +{ + u64 size = sec->sh.sh_size; + bool truncated = false; + Elf_Data *data = NULL; + Elf_Scn *s; + + s = elf_getscn(elf->elf, sec->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + for (;;) { + /* get next data descriptor for the relevant section */ + data = elf_getdata(s, data); + + if (!data) { + if (size) { + WARN("end of section data but non-zero size left\n"); + return -1; + } + return 0; + } + + if (truncated) { + /* when we remove symbols */ + WARN("truncated; but more data\n"); + return -1; + } + + if (!data->d_size) { + WARN("zero size data"); + return -1; + } + + if (data->d_size > size) { + truncated = true; + data->d_size = size; + } + + size -= data->d_size; + } +} + int elf_write(struct elf *elf) { struct section *sec; @@ -1296,6 +1370,9 @@ int elf_write(struct elf *elf) /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { + if (sec->truncate) + elf_truncate_section(elf, sec); + if (sec->changed) { s = elf_getscn(elf->elf, sec->idx); if (!s) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index d28533106b78..9e96a613c50f 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -38,7 +38,7 @@ struct section { Elf_Data *data; char *name; int idx; - bool changed, text, rodata, noinstr, init; + bool changed, text, rodata, noinstr, init, truncate; }; struct symbol { From 9f2899fe36a623885d8576604cb582328ad32b3c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Oct 2022 15:50:42 +0200 Subject: [PATCH 0721/4122] objtool: Add option to generate prefix symbols When code is compiled with: -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES} functions will have PADDING_BYTES of NOP in front of them. Unwinders and other things that symbolize code locations will typically attribute these bytes to the preceding function. Given that these bytes nominally belong to the following symbol this mis-attribution is confusing. Inspired by the fact that CFI_CLANG emits __cfi_##name symbols to claim these bytes, allow objtool to emit __pfx_##name symbols to do the same. Therefore add the objtool --prefix=N argument, to conditionally place a __pfx_##name symbol at N bytes ahead of symbol 'name' when: all these preceding bytes are NOP and name-N is an instruction boundary. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yujie Liu Link: https://lkml.kernel.org/r/20221028194453.526899822@infradead.org --- tools/objtool/builtin-check.c | 1 + tools/objtool/check.c | 33 ++++++++++++++++++++++++- tools/objtool/elf.c | 31 +++++++++++++++++++++++ tools/objtool/include/objtool/builtin.h | 1 + tools/objtool/include/objtool/elf.h | 2 ++ 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 0a04f8ea4432..95fcecee60ce 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -75,6 +75,7 @@ const struct option check_options[] = { OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), + OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"), OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7936312e10c7..27f35f5f831a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3417,7 +3417,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { /* Ignore KCFI type preambles, which always fall through */ - if (!strncmp(func->name, "__cfi_", 6)) + if (!strncmp(func->name, "__cfi_", 6) || + !strncmp(func->name, "__pfx_", 6)) return 0; WARN("%s() falls through to next function %s()", @@ -3972,6 +3973,34 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return false; } +static int add_prefix_symbol(struct objtool_file *file, struct symbol *func, + struct instruction *insn) +{ + if (!opts.prefix) + return 0; + + for (;;) { + struct instruction *prev = list_prev_entry(insn, list); + u64 offset; + + if (&prev->list == &file->insn_list) + break; + + if (prev->type != INSN_NOP) + break; + + offset = func->offset - prev->offset; + if (offset >= opts.prefix) { + if (offset == opts.prefix) + elf_create_prefix_symbol(file->elf, func, opts.prefix); + break; + } + insn = prev; + } + + return 0; +} + static int validate_symbol(struct objtool_file *file, struct section *sec, struct symbol *sym, struct insn_state *state) { @@ -3990,6 +4019,8 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, if (!insn || insn->ignore || insn->visited) return 0; + add_prefix_symbol(file, sym, insn); + state->uaccess = sym->uaccess_safe; ret = validate_branch(file, insn_func(insn), insn, *state); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 36dc78796f58..3d636d12d679 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -819,6 +819,37 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) return sym; } +static int elf_add_string(struct elf *elf, struct section *strtab, char *str); + +struct symbol * +elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size) +{ + struct symbol *sym = calloc(1, sizeof(*sym)); + size_t namelen = strlen(orig->name) + sizeof("__pfx_"); + char *name = malloc(namelen); + + if (!sym || !name) { + perror("malloc"); + return NULL; + } + + snprintf(name, namelen, "__pfx_%s", orig->name); + + sym->name = name; + sym->sec = orig->sec; + + sym->sym.st_name = elf_add_string(elf, NULL, name); + sym->sym.st_info = orig->sym.st_info; + sym->sym.st_value = orig->sym.st_value - size; + sym->sym.st_size = size; + + sym = __elf_create_symbol(elf, sym); + if (sym) + elf_add_symbol(elf, sym); + + return sym; +} + int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct section *insn_sec, unsigned long insn_off) diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 22092a9f3cf6..f341b620dead 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -26,6 +26,7 @@ struct opts { bool stackval; bool static_call; bool uaccess; + int prefix; /* options: */ bool backtrace; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 9e96a613c50f..b6974e3173aa 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -146,6 +146,8 @@ static inline bool has_multiple_files(struct elf *elf) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); +struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size); + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, s64 addend); int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, From b341b20d648bb7e9a3307c33163e7399f0913e66 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Oct 2022 21:08:19 +0200 Subject: [PATCH 0722/4122] x86: Add prefix symbols for function padding When code is compiled with: -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES} functions will have PADDING_BYTES of NOP in front of them. Unwinders and other things that symbolize code locations will typically attribute these bytes to the preceding function. Given that these bytes nominally belong to the following symbol this mis-attribution is confusing. Inspired by the fact that CFI_CLANG emits __cfi_##name symbols to claim these bytes, use objtool to emit __pfx_##name symbols to do the same when CFI_CLANG is not used. This then shows the callthunk for symbol 'name' as: __pfx_##name+0x6/0x10 Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yujie Liu Link: https://lkml.kernel.org/r/20221028194453.592512209@infradead.org --- arch/x86/Kconfig | 4 ++++ scripts/Makefile.lib | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b52ad13f0f44..32818aa1dca4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2471,6 +2471,10 @@ config CALL_THUNKS def_bool n select FUNCTION_ALIGNMENT_16B +config PREFIX_SYMBOLS + def_bool y + depends on CALL_THUNKS && !CFI_CLANG + menuconfig SPECULATION_MITIGATIONS bool "Mitigations for speculative execution vulnerabilities" default y diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 85f02756dc9c..2e03bcbf2b9b 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -265,6 +265,7 @@ objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable +objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ From 9a479f766be1dd777e12e3e57b6ee4c3028a40a5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 11:28:13 +0200 Subject: [PATCH 0723/4122] objtool: Add --cfi to generate the .cfi_sites section Add the location of all __cfi_##name symbols (as generated by kCFI) to a section such that we might re-write things at kernel boot. Notably; boot time re-hashing and FineIBT are the intended use of this. Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20221027092842.568039454@infradead.org --- tools/objtool/builtin-check.c | 1 + tools/objtool/check.c | 69 +++++++++++++++++++++++++ tools/objtool/include/objtool/builtin.h | 1 + 3 files changed, 71 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 95fcecee60ce..868e3e363786 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -80,6 +80,7 @@ const struct option check_options[] = { OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"), + OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"), OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), OPT_GROUP("Options:"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 27f35f5f831a..55066c493570 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -861,6 +861,68 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) return 0; } +static int create_cfi_sections(struct objtool_file *file) +{ + struct section *sec, *s; + struct symbol *sym; + unsigned int *loc; + int idx; + + sec = find_section_by_name(file->elf, ".cfi_sites"); + if (sec) { + INIT_LIST_HEAD(&file->call_list); + WARN("file already has .cfi_sites section, skipping"); + return 0; + } + + idx = 0; + for_each_sec(file, s) { + if (!s->text) + continue; + + list_for_each_entry(sym, &s->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + + if (strncmp(sym->name, "__cfi_", 6)) + continue; + + idx++; + } + } + + sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx); + if (!sec) + return -1; + + idx = 0; + for_each_sec(file, s) { + if (!s->text) + continue; + + list_for_each_entry(sym, &s->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + + if (strncmp(sym->name, "__cfi_", 6)) + continue; + + loc = (unsigned int *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned int)); + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned int), + R_X86_64_PC32, + s, sym->offset)) + return -1; + + idx++; + } + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -4430,6 +4492,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (opts.cfi) { + ret = create_cfi_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (opts.rethunk) { ret = create_return_sites_sections(file); if (ret < 0) diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index f341b620dead..c44ff39df80c 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -27,6 +27,7 @@ struct opts { bool static_call; bool uaccess; int prefix; + bool cfi; /* options: */ bool backtrace; From 931ab63664f02b17d2213ef36b83e1e50190a0aa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 11:28:14 +0200 Subject: [PATCH 0724/4122] x86/ibt: Implement FineIBT Implement an alternative CFI scheme that merges both the fine-grained nature of kCFI but also takes full advantage of the coarse grained hardware CFI as provided by IBT. To contrast: kCFI is a pure software CFI scheme and relies on being able to read text -- specifically the instruction *before* the target symbol, and does the hash validation *before* doing the call (otherwise control flow is compromised already). FineIBT is a software and hardware hybrid scheme; by ensuring every branch target starts with a hash validation it is possible to place the hash validation after the branch. This has several advantages: o the (hash) load is avoided; no memop; no RX requirement. o IBT WAIT-FOR-ENDBR state is a speculation stop; by placing the hash validation in the immediate instruction after the branch target there is a minimal speculation window and the whole is a viable defence against SpectreBHB. o Kees feels obliged to mention it is slightly more vulnerable when the attacker can write code. Obviously this patch relies on kCFI, but additionally it also relies on the padding from the call-depth-tracking patches. It uses this padding to place the hash-validation while the call-sites are re-written to modify the indirect target to be 16 bytes in front of the original target, thus hitting this new preamble. Notably, there is no hardware that needs call-depth-tracking (Skylake) and supports IBT (Tigerlake and onwards). Suggested-by: Joao Moreira (Intel) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20221027092842.634714496@infradead.org --- arch/um/kernel/um_arch.c | 5 + arch/x86/Kconfig | 14 +- arch/x86/Makefile | 2 +- arch/x86/include/asm/alternative.h | 2 + arch/x86/include/asm/linkage.h | 6 +- arch/x86/kernel/alternative.c | 257 +++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/module.c | 20 ++- arch/x86/kernel/vmlinux.lds.S | 9 + include/linux/bpf.h | 2 +- scripts/Makefile.lib | 1 + 11 files changed, 296 insertions(+), 23 deletions(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8adf8e89b255..786b44dc20c9 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end) { } +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 32818aa1dca4..479ee63898f5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2463,17 +2463,27 @@ config FUNCTION_PADDING_BYTES default FUNCTION_PADDING_CFI if CFI_CLANG default FUNCTION_ALIGNMENT +config CALL_PADDING + def_bool n + depends on CC_HAS_ENTRY_PADDING && OBJTOOL + select FUNCTION_ALIGNMENT_16B + +config FINEIBT + def_bool y + depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE + select CALL_PADDING + config HAVE_CALL_THUNKS def_bool y depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL config CALL_THUNKS def_bool n - select FUNCTION_ALIGNMENT_16B + select CALL_PADDING config PREFIX_SYMBOLS def_bool y - depends on CALL_THUNKS && !CFI_CLANG + depends on CALL_PADDING && !CFI_CLANG menuconfig SPECULATION_MITIGATIONS bool "Mitigations for speculative execution vulnerabilities" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1640e005092b..a3a07df8a609 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -208,7 +208,7 @@ ifdef CONFIG_SLS KBUILD_CFLAGS += -mharden-sls=all endif -ifdef CONFIG_CALL_THUNKS +ifdef CONFIG_CALL_PADDING PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) KBUILD_CFLAGS += $(PADDING_CFLAGS) export PADDING_CFLAGS diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 664c0779375c..7659217f4d49 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -78,6 +78,8 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); +extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, + s32 *start_cfi, s32 *end_cfi); struct module; struct paravirt_patch_site; diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 45e0df850645..dd9b8118f784 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -15,7 +15,7 @@ #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) -#if defined(CONFIG_CALL_THUNKS) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90; #else #define FUNCTION_PADDING @@ -57,7 +57,7 @@ #endif /* __ASSEMBLY__ */ /* - * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_THUNKS) the + * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the * CFI symbol layout changes. * * Without CALL_THUNKS: @@ -81,7 +81,7 @@ * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized. */ -#ifdef CONFIG_CALL_THUNKS +#ifdef CONFIG_CALL_PADDING #define CFI_PRE_PADDING #define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; #else diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b4ac4e58c010..91b0e63a6238 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern s32 __retpoline_sites[], __retpoline_sites_end[]; extern s32 __return_sites[], __return_sites_end[]; +extern s32 __cfi_sites[], __cfi_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -656,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT +static void poison_endbr(void *addr, bool warn) +{ + u32 endbr, poison = gen_endbr_poison(); + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + return; + + if (!is_endbr(endbr)) { + WARN_ON_ONCE(warn); + return; + } + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); +} + /* * Generated by: objtool --ibt */ @@ -664,23 +687,11 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) s32 *s; for (s = start; s < end; s++) { - u32 endbr, poison = gen_endbr_poison(); void *addr = (void *)s + *s; - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) - continue; - - if (WARN_ON_ONCE(!is_endbr(endbr))) - continue; - - DPRINTK("ENDBR at: %pS (%px)", addr, addr); - - /* - * When we have IBT, the lack of ENDBR will trigger #CP - */ - DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); - DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + poison_endbr(addr, true); + if (IS_ENABLED(CONFIG_FINEIBT)) + poison_endbr(addr - 16, false); } } @@ -690,6 +701,219 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ +#ifdef CONFIG_FINEIBT +/* + * kCFI FineIBT + * + * __cfi_\func: __cfi_\func: + * movl $0x12345678,%eax // 5 endbr64 // 4 + * nop subl $0x12345678,%r10d // 7 + * nop jz 1f // 2 + * nop ud2 // 2 + * nop 1: nop // 1 + * nop + * nop + * nop + * nop + * nop + * nop + * nop + * + * + * caller: caller: + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 + * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4 + * je 1f // 2 nop4 // 4 + * ud2 // 2 + * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5 + * + */ + +asm( ".pushsection .rodata \n" + "fineibt_preamble_start: \n" + " endbr64 \n" + " subl $0x12345678, %r10d \n" + " je fineibt_preamble_end \n" + " ud2 \n" + " nop \n" + "fineibt_preamble_end: \n" + ".popsection\n" +); + +extern u8 fineibt_preamble_start[]; +extern u8 fineibt_preamble_end[]; + +#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) +#define fineibt_preamble_hash 7 + +asm( ".pushsection .rodata \n" + "fineibt_caller_start: \n" + " movl $0x12345678, %r10d \n" + " sub $16, %r11 \n" + ASM_NOP4 + "fineibt_caller_end: \n" + ".popsection \n" +); + +extern u8 fineibt_caller_start[]; +extern u8 fineibt_caller_end[]; + +#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) +#define fineibt_caller_hash 2 + +#define fineibt_caller_jmp (fineibt_caller_size - 2) + +static u32 decode_preamble_hash(void *addr) +{ + u8 *p = addr; + + /* b8 78 56 34 12 mov $0x12345678,%eax */ + if (p[0] == 0xb8) + return *(u32 *)(addr + 1); + + return 0; /* invalid hash value */ +} + +static u32 decode_caller_hash(void *addr) +{ + u8 *p = addr; + + /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */ + if (p[0] == 0x41 && p[1] == 0xba) + return -*(u32 *)(addr + 2); + + /* e8 0c 78 56 34 12 jmp.d8 +12 */ + if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp) + return -*(u32 *)(addr + 2); + + return 0; /* invalid hash value */ +} + +/* .retpoline_sites */ +static int cfi_disable_callers(s32 *start, s32 *end) +{ + /* + * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate + * in tact for later usage. Also see decode_caller_hash() and + * cfi_rewrite_callers(). + */ + const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp }; + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (!hash) /* nocfi callers */ + continue; + + text_poke_early(addr, jmp, 2); + } + + return 0; +} + +/* .cfi_sites */ +static int cfi_rewrite_preamble(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + hash = decode_preamble_hash(addr); + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", + addr, addr, 5, addr)) + return -EINVAL; + + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); + } + + return 0; +} + +/* .retpoline_sites */ +static int cfi_rewrite_callers(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (hash) { + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(addr + fineibt_caller_hash, &hash, 4); + } + /* rely on apply_retpolines() */ + } + + return 0; +} + +static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi, bool builtin) +{ + int ret; + + if (WARN_ONCE(fineibt_preamble_size != 16, + "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) + return; + + if (!HAS_KERNEL_IBT || !cpu_feature_enabled(X86_FEATURE_IBT)) + return; + + /* + * Rewrite the callers to not use the __cfi_ stubs, such that we might + * rewrite them. This disables all CFI. If this succeeds but any of the + * later stages fails, we're without CFI. + */ + ret = cfi_disable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + ret = cfi_rewrite_preamble(start_cfi, end_cfi); + if (ret) + goto err; + + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (builtin) + pr_info("Using FineIBT CFI\n"); + + return; + +err: + pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); +} + +#else + +static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi, bool builtin) +{ +} + +#endif + +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ + return __apply_fineibt(start_retpoline, end_retpoline, + start_cfi, end_cfi, + /* .builtin = */ false); +} + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -996,6 +1220,9 @@ void __init alternative_instructions(void) */ apply_paravirt(__parainstructions, __parainstructions_end); + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, + __cfi_sites, __cfi_sites_end, true); + /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2bec4b4b2c50..423a760fa9de 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -609,6 +609,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c) if (!ibt_selftest()) { pr_err("IBT selftest: Failed!\n"); + wrmsrl(MSR_IA32_S_CET, 0); setup_clear_cpu_cap(X86_FEATURE_IBT); return; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 2fb9de2cef40..0142982e94c5 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -255,7 +255,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, - *calls = NULL; + *calls = NULL, *cfi = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -277,6 +277,8 @@ int module_finalize(const Elf_Ehdr *hdr, returns = s; if (!strcmp(".call_sites", secstrings + s->sh_name)) calls = s; + if (!strcmp(".cfi_sites", secstrings + s->sh_name)) + cfi = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -289,6 +291,22 @@ int module_finalize(const Elf_Ehdr *hdr, void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + if (retpolines || cfi) { + void *rseg = NULL, *cseg = NULL; + unsigned int rsize = 0, csize = 0; + + if (retpolines) { + rseg = (void *)retpolines->sh_addr; + rsize = retpolines->sh_size; + } + + if (cfi) { + cseg = (void *)cfi->sh_addr; + csize = cfi->sh_size; + } + + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 49f3f86433c7..2e0ee14229bf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -309,6 +309,15 @@ SECTIONS } #endif +#ifdef CONFIG_FINEIBT + . = ALIGN(8); + .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) { + __cfi_sites = .; + *(.cfi_sites) + __cfi_sites_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5296aea9b5b4..923a3d508047 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -984,7 +984,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func } #ifdef CONFIG_X86_64 -#ifdef CONFIG_CALL_THUNKS +#ifdef CONFIG_CALL_PADDING #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5+CONFIG_FUNCTION_PADDING_BYTES,CONFIG_FUNCTION_PADDING_BYTES))) #else #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 2e03bcbf2b9b..2b2fab705a63 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -256,6 +256,7 @@ objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt +objtool-args-$(CONFIG_FINEIBT) += --cfi objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount objtool-args-$(CONFIG_UNWINDER_ORC) += --orc objtool-args-$(CONFIG_RETPOLINE) += --retpoline From 082c4c815252ea333b0f3a51e336df60c2314fe2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 11:28:15 +0200 Subject: [PATCH 0725/4122] x86/cfi: Boot time selection of CFI scheme Add the "cfi=" boot parameter to allow people to select a CFI scheme at boot time. Mostly useful for development / debugging. Requested-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20221027092842.699804264@infradead.org --- arch/x86/kernel/alternative.c | 99 ++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 91b0e63a6238..9d3b58748ca5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -702,6 +702,47 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ #ifdef CONFIG_FINEIBT + +enum cfi_mode { + CFI_DEFAULT, + CFI_OFF, + CFI_KCFI, + CFI_FINEIBT, +}; + +static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT; + +static __init int cfi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "auto")) { + cfi_mode = CFI_DEFAULT; + } else if (!strcmp(str, "off")) { + cfi_mode = CFI_OFF; + } else if (!strcmp(str, "kcfi")) { + cfi_mode = CFI_KCFI; + } else if (!strcmp(str, "fineibt")) { + cfi_mode = CFI_FINEIBT; + } else { + pr_err("Ignoring unknown cfi option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("cfi", cfi_parse_cmdline); + /* * kCFI FineIBT * @@ -868,30 +909,52 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) return; - if (!HAS_KERNEL_IBT || !cpu_feature_enabled(X86_FEATURE_IBT)) + if (cfi_mode == CFI_DEFAULT) { + cfi_mode = CFI_KCFI; + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + cfi_mode = CFI_FINEIBT; + } + + switch (cfi_mode) { + case CFI_OFF: + ret = cfi_disable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (builtin) + pr_info("Disabling CFI\n"); return; - /* - * Rewrite the callers to not use the __cfi_ stubs, such that we might - * rewrite them. This disables all CFI. If this succeeds but any of the - * later stages fails, we're without CFI. - */ - ret = cfi_disable_callers(start_retpoline, end_retpoline); - if (ret) - goto err; + case CFI_KCFI: + if (builtin) + pr_info("Using kCFI\n"); + return; - ret = cfi_rewrite_preamble(start_cfi, end_cfi); - if (ret) - goto err; + case CFI_FINEIBT: + /* + * Rewrite the callers to not use the __cfi_ stubs, such that we might + * rewrite them. This disables all CFI. If this succeeds but any of the + * later stages fails, we're without CFI. + */ + ret = cfi_disable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; - ret = cfi_rewrite_callers(start_retpoline, end_retpoline); - if (ret) - goto err; + ret = cfi_rewrite_preamble(start_cfi, end_cfi); + if (ret) + goto err; - if (builtin) - pr_info("Using FineIBT CFI\n"); + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); + if (ret) + goto err; - return; + if (builtin) + pr_info("Using FineIBT CFI\n"); + return; + + default: + break; + } err: pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); From 0c3e806ec0f9771fa1f34c60499097d9260a8bb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 11:28:16 +0200 Subject: [PATCH 0726/4122] x86/cfi: Add boot time hash randomization In order to avoid known hashes (from knowing the boot image), randomize the CFI hashes with a per-boot random seed. Suggested-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20221027092842.765195516@infradead.org --- arch/x86/kernel/alternative.c | 120 ++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9d3b58748ca5..aa7f791585c5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -711,6 +711,24 @@ enum cfi_mode { }; static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT; +static bool cfi_rand __ro_after_init = true; +static u32 cfi_seed __ro_after_init; + +/* + * Re-hash the CFI hash with a boot-time seed while making sure the result is + * not a valid ENDBR instruction. + */ +static u32 cfi_rehash(u32 hash) +{ + hash ^= cfi_seed; + while (unlikely(is_endbr(hash) || is_endbr(-hash))) { + bool lsb = hash & 1; + hash >>= 1; + if (lsb) + hash ^= 0x80200003; + } + return hash; +} static __init int cfi_parse_cmdline(char *str) { @@ -728,10 +746,13 @@ static __init int cfi_parse_cmdline(char *str) cfi_mode = CFI_DEFAULT; } else if (!strcmp(str, "off")) { cfi_mode = CFI_OFF; + cfi_rand = false; } else if (!strcmp(str, "kcfi")) { cfi_mode = CFI_KCFI; } else if (!strcmp(str, "fineibt")) { cfi_mode = CFI_FINEIBT; + } else if (!strcmp(str, "norand")) { + cfi_rand = false; } else { pr_err("Ignoring unknown cfi option (%s).", str); } @@ -856,7 +877,50 @@ static int cfi_disable_callers(s32 *start, s32 *end) return 0; } +static int cfi_enable_callers(s32 *start, s32 *end) +{ + /* + * Re-enable kCFI, undo what cfi_disable_callers() did. + */ + const u8 mov[] = { 0x41, 0xba }; + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (!hash) /* nocfi callers */ + continue; + + text_poke_early(addr, mov, 2); + } + + return 0; +} + /* .cfi_sites */ +static int cfi_rand_preamble(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + hash = decode_preamble_hash(addr); + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", + addr, addr, 5, addr)) + return -EINVAL; + + hash = cfi_rehash(hash); + text_poke_early(addr + 1, &hash, 4); + } + + return 0; +} + static int cfi_rewrite_preamble(s32 *start, s32 *end) { s32 *s; @@ -879,6 +943,25 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) } /* .retpoline_sites */ +static int cfi_rand_callers(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (hash) { + hash = -cfi_rehash(hash); + text_poke_early(addr + 2, &hash, 4); + } + } + + return 0; +} + static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; @@ -915,31 +998,44 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_mode = CFI_FINEIBT; } - switch (cfi_mode) { - case CFI_OFF: - ret = cfi_disable_callers(start_retpoline, end_retpoline); + /* + * Rewrite the callers to not use the __cfi_ stubs, such that we might + * rewrite them. This disables all CFI. If this succeeds but any of the + * later stages fails, we're without CFI. + */ + ret = cfi_disable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (cfi_rand) { + if (builtin) + cfi_seed = get_random_u32(); + + ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; + ret = cfi_rand_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + } + + switch (cfi_mode) { + case CFI_OFF: if (builtin) pr_info("Disabling CFI\n"); return; case CFI_KCFI: + ret = cfi_enable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + if (builtin) pr_info("Using kCFI\n"); return; case CFI_FINEIBT: - /* - * Rewrite the callers to not use the __cfi_ stubs, such that we might - * rewrite them. This disables all CFI. If this succeeds but any of the - * later stages fails, we're without CFI. - */ - ret = cfi_disable_callers(start_retpoline, end_retpoline); - if (ret) - goto err; - ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; From 9e4a617757273a86b560c1ece40c48e4940a3c79 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 29 Sep 2022 02:24:53 -0700 Subject: [PATCH 0727/4122] string: Add __realloc_size hint to kmemdup() Add __realloc_size() hint to kmemdup() so the compiler can reason about the length of the returned buffer. (These must not use __alloc_size, since those include __malloc which says the contents aren't defined[1]). [1] https://lore.kernel.org/linux-hardening/d199c2af-06af-8a50-a6a1-00eefa0b67b4@prevas.dk/ Cc: Rasmus Villemoes Cc: Guenter Roeck Cc: Andy Shevchenko Cc: Paolo Abeni Cc: Geert Uytterhoeven Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 3 ++- include/linux/string.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index e5b39b1cc2fc..49782f63f015 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -659,7 +659,8 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) return __real_memchr_inv(p, c, size); } -extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup) + __realloc_size(2); __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { size_t p_size = __struct_size(p); diff --git a/include/linux/string.h b/include/linux/string.h index cf7607b32102..db28802ab0a6 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -176,7 +176,7 @@ extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); From 41eefc46a3a4682976afb5f8c4b9734ed6bfd406 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 2 Oct 2022 09:51:46 -0700 Subject: [PATCH 0728/4122] string: Convert strscpy() self-test to KUnit Convert the strscpy() self-test to a KUnit test. Cc: David Gow Cc: Tobin C. Harding Tested-by: Nathan Chancellor Link: https://lore.kernel.org/lkml/Y072ZMk/hNkfwqMv@dev-arch.thelio-3990X Signed-off-by: Kees Cook --- MAINTAINERS | 1 + lib/Kconfig.debug | 8 ++- lib/Makefile | 2 +- lib/strscpy_kunit.c | 129 +++++++++++++++++++++++++++++++++++++ lib/test_strscpy.c | 150 -------------------------------------------- 5 files changed, 136 insertions(+), 154 deletions(-) create mode 100644 lib/strscpy_kunit.c delete mode 100644 lib/test_strscpy.c diff --git a/MAINTAINERS b/MAINTAINERS index 9dd8d74c4df0..232d78340d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8045,6 +8045,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: include/linux/fortify-string.h F: lib/fortify_kunit.c F: lib/memcpy_kunit.c +F: lib/strscpy_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3fc7abffc7aa..e0a4d52e434c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2215,9 +2215,6 @@ config STRING_SELFTEST config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" -config TEST_STRSCPY - tristate "Test strscpy*() family of functions at runtime" - config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" @@ -2583,6 +2580,11 @@ config HW_BREAKPOINT_KUNIT_TEST If unsure, say N. +config STRSCPY_KUNIT_TEST + tristate "Test strscpy*() family of functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 161d6a724ff7..1905e5c26849 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -82,7 +82,6 @@ obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_SCANF) += test_scanf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o -obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o @@ -380,6 +379,7 @@ obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o +obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c new file mode 100644 index 000000000000..98523f828d3a --- /dev/null +++ b/lib/strscpy_kunit.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Kernel module for testing 'strscpy' family of functions. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +/* + * tc() - Run a specific test case. + * @src: Source string, argument to strscpy_pad() + * @count: Size of destination buffer, argument to strscpy_pad() + * @expected: Expected return value from call to strscpy_pad() + * @terminator: 1 if there should be a terminating null byte 0 otherwise. + * @chars: Number of characters from the src string expected to be + * written to the dst buffer. + * @pad: Number of pad characters expected (in the tail of dst buffer). + * (@pad does not include the null terminator byte.) + * + * Calls strscpy_pad() and verifies the return value and state of the + * destination buffer after the call returns. + */ +static void tc(struct kunit *test, char *src, int count, int expected, + int chars, int terminator, int pad) +{ + int nr_bytes_poison; + int max_expected; + int max_count; + int written; + char buf[6]; + int index, i; + const char POISON = 'z'; + + KUNIT_ASSERT_TRUE_MSG(test, src != NULL, + "null source string not supported"); + + memset(buf, POISON, sizeof(buf)); + /* Future proofing test suite, validate args */ + max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ + max_expected = count - 1; /* Space for the null */ + + KUNIT_ASSERT_LE_MSG(test, count, max_count, + "count (%d) is too big (%d) ... aborting", count, max_count); + KUNIT_EXPECT_LE_MSG(test, expected, max_expected, + "expected (%d) is bigger than can possibly be returned (%d)", + expected, max_expected); + + written = strscpy_pad(buf, src, count); + KUNIT_ASSERT_EQ(test, written, expected); + + if (count && written == -E2BIG) { + KUNIT_ASSERT_EQ_MSG(test, 0, strncmp(buf, src, count - 1), + "buffer state invalid for -E2BIG"); + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "too big string is not null terminated correctly"); + } + + for (i = 0; i < chars; i++) + KUNIT_ASSERT_EQ_MSG(test, buf[i], src[i], + "buf[i]==%c != src[i]==%c", buf[i], src[i]); + + if (terminator) + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "string is not null terminated correctly"); + + for (i = 0; i < pad; i++) { + index = chars + terminator + i; + KUNIT_ASSERT_EQ_MSG(test, buf[index], '\0', + "padding missing at index: %d", i); + } + + nr_bytes_poison = sizeof(buf) - chars - terminator - pad; + for (i = 0; i < nr_bytes_poison; i++) { + index = sizeof(buf) - 1 - i; /* Check from the end back */ + KUNIT_ASSERT_EQ_MSG(test, buf[index], POISON, + "poison value missing at index: %d", i); + } +} + +static void strscpy_test(struct kunit *test) +{ + /* + * tc() uses a destination buffer of size 6 and needs at + * least 2 characters spare (one for null and one to check for + * overflow). This means we should only call tc() with + * strings up to a maximum of 4 characters long and 'count' + * should not exceed 4. To test with longer strings increase + * the buffer size in tc(). + */ + + /* tc(test, src, count, expected, chars, terminator, pad) */ + tc(test, "a", 0, -E2BIG, 0, 0, 0); + tc(test, "", 0, -E2BIG, 0, 0, 0); + + tc(test, "a", 1, -E2BIG, 0, 1, 0); + tc(test, "", 1, 0, 0, 1, 0); + + tc(test, "ab", 2, -E2BIG, 1, 1, 0); + tc(test, "a", 2, 1, 1, 1, 0); + tc(test, "", 2, 0, 0, 1, 1); + + tc(test, "abc", 3, -E2BIG, 2, 1, 0); + tc(test, "ab", 3, 2, 2, 1, 0); + tc(test, "a", 3, 1, 1, 1, 1); + tc(test, "", 3, 0, 0, 1, 2); + + tc(test, "abcd", 4, -E2BIG, 3, 1, 0); + tc(test, "abc", 4, 3, 3, 1, 0); + tc(test, "ab", 4, 2, 2, 1, 1); + tc(test, "a", 4, 1, 1, 1, 2); + tc(test, "", 4, 0, 0, 1, 3); +} + +static struct kunit_case strscpy_test_cases[] = { + KUNIT_CASE(strscpy_test), + {} +}; + +static struct kunit_suite strscpy_test_suite = { + .name = "strscpy", + .test_cases = strscpy_test_cases, +}; + +kunit_test_suite(strscpy_test_suite); + +MODULE_AUTHOR("Tobin C. Harding "); +MODULE_LICENSE("GPL"); diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c deleted file mode 100644 index a827f94601f5..000000000000 --- a/lib/test_strscpy.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include - -#include "../tools/testing/selftests/kselftest_module.h" - -/* - * Kernel module for testing 'strscpy' family of functions. - */ - -KSTM_MODULE_GLOBALS(); - -/* - * tc() - Run a specific test case. - * @src: Source string, argument to strscpy_pad() - * @count: Size of destination buffer, argument to strscpy_pad() - * @expected: Expected return value from call to strscpy_pad() - * @terminator: 1 if there should be a terminating null byte 0 otherwise. - * @chars: Number of characters from the src string expected to be - * written to the dst buffer. - * @pad: Number of pad characters expected (in the tail of dst buffer). - * (@pad does not include the null terminator byte.) - * - * Calls strscpy_pad() and verifies the return value and state of the - * destination buffer after the call returns. - */ -static int __init tc(char *src, int count, int expected, - int chars, int terminator, int pad) -{ - int nr_bytes_poison; - int max_expected; - int max_count; - int written; - char buf[6]; - int index, i; - const char POISON = 'z'; - - total_tests++; - - if (!src) { - pr_err("null source string not supported\n"); - return -1; - } - - memset(buf, POISON, sizeof(buf)); - /* Future proofing test suite, validate args */ - max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ - max_expected = count - 1; /* Space for the null */ - if (count > max_count) { - pr_err("count (%d) is too big (%d) ... aborting", count, max_count); - return -1; - } - if (expected > max_expected) { - pr_warn("expected (%d) is bigger than can possibly be returned (%d)", - expected, max_expected); - } - - written = strscpy_pad(buf, src, count); - if ((written) != (expected)) { - pr_err("%d != %d (written, expected)\n", written, expected); - goto fail; - } - - if (count && written == -E2BIG) { - if (strncmp(buf, src, count - 1) != 0) { - pr_err("buffer state invalid for -E2BIG\n"); - goto fail; - } - if (buf[count - 1] != '\0') { - pr_err("too big string is not null terminated correctly\n"); - goto fail; - } - } - - for (i = 0; i < chars; i++) { - if (buf[i] != src[i]) { - pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]); - goto fail; - } - } - - if (terminator) { - if (buf[count - 1] != '\0') { - pr_err("string is not null terminated correctly\n"); - goto fail; - } - } - - for (i = 0; i < pad; i++) { - index = chars + terminator + i; - if (buf[index] != '\0') { - pr_err("padding missing at index: %d\n", i); - goto fail; - } - } - - nr_bytes_poison = sizeof(buf) - chars - terminator - pad; - for (i = 0; i < nr_bytes_poison; i++) { - index = sizeof(buf) - 1 - i; /* Check from the end back */ - if (buf[index] != POISON) { - pr_err("poison value missing at index: %d\n", i); - goto fail; - } - } - - return 0; -fail: - failed_tests++; - return -1; -} - -static void __init selftest(void) -{ - /* - * tc() uses a destination buffer of size 6 and needs at - * least 2 characters spare (one for null and one to check for - * overflow). This means we should only call tc() with - * strings up to a maximum of 4 characters long and 'count' - * should not exceed 4. To test with longer strings increase - * the buffer size in tc(). - */ - - /* tc(src, count, expected, chars, terminator, pad) */ - KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0)); - KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0)); - - KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0)); - KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0)); - - KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0)); - KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0)); - KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1)); - - KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0)); - KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0)); - KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1)); - KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2)); - - KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0)); - KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0)); - KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1)); - KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2)); - KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3)); -} - -KSTM_MODULE_LOADERS(test_strscpy); -MODULE_AUTHOR("Tobin C. Harding "); -MODULE_LICENSE("GPL"); From 62e1cbfc5d795381a0f237ae7ee229a92d51cf9e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 2 Oct 2022 09:17:03 -0700 Subject: [PATCH 0729/4122] fortify: Short-circuit known-safe calls to strscpy() Replacing compile-time safe calls of strcpy()-related functions with strscpy() was always calling the full strscpy() logic when a builtin would be better. For example: char buf[16]; strcpy(buf, "yes"); would reduce to __builtin_memcpy(buf, "yes", 4), but not if it was: strscpy(buf, yes, sizeof(buf)); Fix this by checking if all sizes are known at compile-time. Cc: linux-hardening@vger.kernel.org Tested-by: Nathan Chancellor Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 10 ++++++++++ lib/strscpy_kunit.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 49782f63f015..32a66d4b30ca 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -314,6 +314,16 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s if (__compiletime_lessthan(p_size, size)) __write_overflow(); + /* Short-circuit for compile-time known-safe lengths. */ + if (__compiletime_lessthan(p_size, SIZE_MAX)) { + len = __compiletime_strlen(q); + + if (len < SIZE_MAX && __compiletime_lessthan(len, size)) { + __underlying_memcpy(p, q, len + 1); + return len; + } + } + /* * This call protects from read overflow, because len will default to q * length if it smaller than size. diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c index 98523f828d3a..a6b6344354ed 100644 --- a/lib/strscpy_kunit.c +++ b/lib/strscpy_kunit.c @@ -81,6 +81,8 @@ static void tc(struct kunit *test, char *src, int count, int expected, static void strscpy_test(struct kunit *test) { + char dest[8]; + /* * tc() uses a destination buffer of size 6 and needs at * least 2 characters spare (one for null and one to check for @@ -111,6 +113,17 @@ static void strscpy_test(struct kunit *test) tc(test, "ab", 4, 2, 2, 1, 1); tc(test, "a", 4, 1, 1, 1, 2); tc(test, "", 4, 0, 0, 1, 3); + + /* Compile-time-known source strings. */ + KUNIT_EXPECT_EQ(test, strscpy(dest, "", ARRAY_SIZE(dest)), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 3), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 1), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", ARRAY_SIZE(dest)), 5); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 3), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 1), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG); } static struct kunit_case strscpy_test_cases[] = { From fb3d88ab354b3b07e805aba9d67cbb43d23dc70e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 2 Oct 2022 19:45:23 -0700 Subject: [PATCH 0730/4122] siphash: Convert selftest to KUnit Convert the siphash self-test to KUnit so it will be included in "all KUnit tests" coverage, and can be run individually still: $ ./tools/testing/kunit/kunit.py run siphash ... [02:58:45] Starting KUnit Kernel (1/1)... [02:58:45] ============================================================ [02:58:45] =================== siphash (1 subtest) ==================== [02:58:45] [PASSED] siphash_test [02:58:45] ===================== [PASSED] siphash ===================== [02:58:45] ============================================================ [02:58:45] Testing complete. Ran 1 tests: passed: 1 [02:58:45] Elapsed time: 21.421s total, 4.306s configuring, 16.947s building, 0.148s running Cc: Vlastimil Babka Cc: "Steven Rostedt (Google)" Cc: Yury Norov Cc: Sander Vanheule Acked-by: "Jason A. Donenfeld" Link: https://lore.kernel.org/lkml/CAHmME9r+9MPH6zk3Vn=buEMSbQiWMFryqqzerKarmjYk+tHLJA@mail.gmail.com Tested-by: David Gow Signed-off-by: Kees Cook --- MAINTAINERS | 2 +- lib/Kconfig.debug | 20 +-- lib/Makefile | 2 +- lib/{test_siphash.c => siphash_kunit.c} | 165 ++++++++++-------------- 4 files changed, 83 insertions(+), 106 deletions(-) rename lib/{test_siphash.c => siphash_kunit.c} (60%) diff --git a/MAINTAINERS b/MAINTAINERS index 232d78340d79..1cd80c113721 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18864,7 +18864,7 @@ M: Jason A. Donenfeld S: Maintained F: include/linux/siphash.h F: lib/siphash.c -F: lib/test_siphash.c +F: lib/siphash_kunit.c SIS 190 ETHERNET DRIVER M: Francois Romieu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e0a4d52e434c..50cc1c4efadd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2244,15 +2244,6 @@ config TEST_RHASHTABLE If unsure, say N. -config TEST_SIPHASH - tristate "Perform selftest on siphash functions" - help - Enable this option to test the kernel's siphash () hash - functions on boot (or module load). - - This is intended to help people writing architecture-specific - optimized versions. If unsure, say N. - config TEST_IDA tristate "Perform selftest on IDA functions" @@ -2585,6 +2576,17 @@ config STRSCPY_KUNIT_TEST depends on KUNIT default KUNIT_ALL_TESTS +config SIPHASH_KUNIT_TEST + tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test the kernel's siphash () hash + functions on boot (or module load). + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 1905e5c26849..77c7951c8cf0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -62,7 +62,6 @@ obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o -obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o obj-$(CONFIG_TEST_IDA) += test_ida.o obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o @@ -380,6 +379,7 @@ CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o +obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/test_siphash.c b/lib/siphash_kunit.c similarity index 60% rename from lib/test_siphash.c rename to lib/siphash_kunit.c index a96788d0141d..a3c697e8be35 100644 --- a/lib/test_siphash.c +++ b/lib/siphash_kunit.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -109,114 +110,88 @@ static const u32 test_vectors_hsiphash[64] = { }; #endif -static int __init siphash_test_init(void) +#define chk(hash, vector, fmt...) \ + KUNIT_EXPECT_EQ_MSG(test, hash, vector, fmt) + +static void siphash_test(struct kunit *test) { u8 in[64] __aligned(SIPHASH_ALIGNMENT); u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT); u8 i; - int ret = 0; for (i = 0; i < 64; ++i) { in[i] = i; in_unaligned[i + 1] = i; - if (siphash(in, i, &test_key_siphash) != - test_vectors_siphash[i]) { - pr_info("siphash self-test aligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (siphash(in_unaligned + 1, i, &test_key_siphash) != - test_vectors_siphash[i]) { - pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (hsiphash(in, i, &test_key_hsiphash) != - test_vectors_hsiphash[i]) { - pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) != - test_vectors_hsiphash[i]) { - pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } + chk(siphash(in, i, &test_key_siphash), + test_vectors_siphash[i], + "siphash self-test aligned %u: FAIL", i + 1); + chk(siphash(in_unaligned + 1, i, &test_key_siphash), + test_vectors_siphash[i], + "siphash self-test unaligned %u: FAIL", i + 1); + chk(hsiphash(in, i, &test_key_hsiphash), + test_vectors_hsiphash[i], + "hsiphash self-test aligned %u: FAIL", i + 1); + chk(hsiphash(in_unaligned + 1, i, &test_key_hsiphash), + test_vectors_hsiphash[i], + "hsiphash self-test unaligned %u: FAIL", i + 1); } - if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != - test_vectors_siphash[8]) { - pr_info("siphash self-test 1u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, - &test_key_siphash) != test_vectors_siphash[16]) { - pr_info("siphash self-test 2u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, - 0x1716151413121110ULL, &test_key_siphash) != - test_vectors_siphash[24]) { - pr_info("siphash self-test 3u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + chk(siphash_1u64(0x0706050403020100ULL, &test_key_siphash), + test_vectors_siphash[8], + "siphash self-test 1u64: FAIL"); + chk(siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + &test_key_siphash), + test_vectors_siphash[16], + "siphash self-test 2u64: FAIL"); + chk(siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, &test_key_siphash), + test_vectors_siphash[24], + "siphash self-test 3u64: FAIL"); + chk(siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, - &test_key_siphash) != test_vectors_siphash[32]) { - pr_info("siphash self-test 4u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_1u32(0x03020100U, &test_key_siphash) != - test_vectors_siphash[4]) { - pr_info("siphash self-test 1u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) != - test_vectors_siphash[8]) { - pr_info("siphash self-test 2u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_3u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, &test_key_siphash) != - test_vectors_siphash[12]) { - pr_info("siphash self-test 3u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_4u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) != - test_vectors_siphash[16]) { - pr_info("siphash self-test 4u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) != - test_vectors_hsiphash[4]) { - pr_info("hsiphash self-test 1u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) != - test_vectors_hsiphash[8]) { - pr_info("hsiphash self-test 2u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_3u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, &test_key_hsiphash) != - test_vectors_hsiphash[12]) { - pr_info("hsiphash self-test 3u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_4u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) != - test_vectors_hsiphash[16]) { - pr_info("hsiphash self-test 4u32: FAIL\n"); - ret = -EINVAL; - } - if (!ret) - pr_info("self-tests: pass\n"); - return ret; + &test_key_siphash), + test_vectors_siphash[32], + "siphash self-test 4u64: FAIL"); + chk(siphash_1u32(0x03020100U, &test_key_siphash), + test_vectors_siphash[4], + "siphash self-test 1u32: FAIL"); + chk(siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash), + test_vectors_siphash[8], + "siphash self-test 2u32: FAIL"); + chk(siphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_siphash), + test_vectors_siphash[12], + "siphash self-test 3u32: FAIL"); + chk(siphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash), + test_vectors_siphash[16], + "siphash self-test 4u32: FAIL"); + chk(hsiphash_1u32(0x03020100U, &test_key_hsiphash), + test_vectors_hsiphash[4], + "hsiphash self-test 1u32: FAIL"); + chk(hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash), + test_vectors_hsiphash[8], + "hsiphash self-test 2u32: FAIL"); + chk(hsiphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_hsiphash), + test_vectors_hsiphash[12], + "hsiphash self-test 3u32: FAIL"); + chk(hsiphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash), + test_vectors_hsiphash[16], + "hsiphash self-test 4u32: FAIL"); } -static void __exit siphash_test_exit(void) -{ -} +static struct kunit_case siphash_test_cases[] = { + KUNIT_CASE(siphash_test), + {} +}; -module_init(siphash_test_init); -module_exit(siphash_test_exit); +static struct kunit_suite siphash_test_suite = { + .name = "siphash", + .test_cases = siphash_test_cases, +}; + +kunit_test_suite(siphash_test_suite); MODULE_AUTHOR("Jason A. Donenfeld "); MODULE_LICENSE("Dual BSD/GPL"); From e9a40e1585d792751d3a122392695e5a53032809 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 25 Oct 2022 16:05:18 -0700 Subject: [PATCH 0731/4122] fortify: Do not cast to "unsigned char" Do not cast to "unsigned char", as this needlessly creates type problems when attempting builds without -Wno-pointer-sign[1]. The intent of the cast is to drop possible "const" types. [1] https://lore.kernel.org/lkml/CAHk-=wgz3Uba8w7kdXhsqR1qvfemYL+OFQdefJnkeqXG8qZ_pA@mail.gmail.com/ Suggested-by: Linus Torvalds Fixes: 3009f891bb9f ("fortify: Allow strlen() and strnlen() to pass compile-time known lengths") Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 32a66d4b30ca..aa31f54f8b57 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" #define __compiletime_strlen(p) \ ({ \ - unsigned char *__p = (unsigned char *)(p); \ + char *__p = (char *)(p); \ size_t __ret = SIZE_MAX; \ size_t __p_size = __member_size(p); \ if (__p_size != SIZE_MAX && \ From 5a17f040fa332e71a45ca9ff02d6979d9176a423 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 26 Oct 2022 16:31:11 -0700 Subject: [PATCH 0732/4122] cred: Do not default to init_cred in prepare_kernel_cred() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A common exploit pattern for ROP attacks is to abuse prepare_kernel_cred() in order to construct escalated privileges[1]. Instead of providing a short-hand argument (NULL) to the "daemon" argument to indicate using init_cred as the base cred, require that "daemon" is always set to an actual task. Replace all existing callers that were passing NULL with &init_task. Future attacks will need to have sufficiently powerful read/write primitives to have found an appropriately privileged task and written it to the ROP stack as an argument to succeed, which is similarly difficult to the prior effort needed to escalate privileges before struct cred existed: locate the current cred and overwrite the uid member. This has the added benefit of meaning that prepare_kernel_cred() can no longer exceed the privileges of the init task, which may have changed from the original init_cred (e.g. dropping capabilities from the bounding set). [1] https://google.com/search?q=commit_creds(prepare_kernel_cred(0)) Cc: "Eric W. Biederman" Cc: David Howells Cc: "Rafael J. Wysocki" Cc: Steve French Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: Namjae Jeon Cc: Trond Myklebust Cc: Anna Schumaker Cc: Chuck Lever Cc: Jeff Layton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: "Michal Koutný" Cc: Peter Zijlstra Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Luis Chamberlain Reviewed-by: Sergey Senozhatsky Acked-by: Russ Weight Acked-by: Greg Kroah-Hartman Acked-by: Paulo Alcantara (SUSE) Link: https://lore.kernel.org/r/20221026232943.never.775-kees@kernel.org --- drivers/base/firmware_loader/main.c | 2 +- fs/cifs/cifs_spnego.c | 2 +- fs/cifs/cifsacl.c | 2 +- fs/ksmbd/smb_common.c | 2 +- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++-- fs/nfs/nfs4idmap.c | 2 +- fs/nfsd/nfs4callback.c | 2 +- kernel/cred.c | 15 +++++++-------- net/dns_resolver/dns_key.c | 2 +- 9 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 7c3590fd97c2..017c4cdb219e 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -821,7 +821,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, * called by a driver when serving an unrelated request from userland, we use * the kernel credentials to read the file. */ - kern_cred = prepare_kernel_cred(NULL); + kern_cred = prepare_kernel_cred(&init_task); if (!kern_cred) { ret = -ENOMEM; goto out; diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 342717bf1dc2..6f3285f1dfee 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -189,7 +189,7 @@ init_cifs_spnego(void) * spnego upcalls. */ - cred = prepare_kernel_cred(NULL); + cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fa480d62f313..574de2b225ae 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -465,7 +465,7 @@ init_cifs_idmap(void) * this is used to prevent malicious redirections from being installed * with add_key(). */ - cred = prepare_kernel_cred(NULL); + cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index d96da872d70a..2a4fbbd55b91 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -623,7 +623,7 @@ int ksmbd_override_fsids(struct ksmbd_work *work) if (share->force_gid != KSMBD_SHARE_INVALID_GID) gid = share->force_gid; - cred = prepare_kernel_cred(NULL); + cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1ec79ccf89ad..7deb3cd76abe 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -493,10 +493,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, gid = make_kgid(&init_user_ns, id); if (gfp_flags & __GFP_FS) - kcred = prepare_kernel_cred(NULL); + kcred = prepare_kernel_cred(&init_task); else { unsigned int nofs_flags = memalloc_nofs_save(); - kcred = prepare_kernel_cred(NULL); + kcred = prepare_kernel_cred(&init_task); memalloc_nofs_restore(nofs_flags); } rc = -ENOMEM; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index e3fdd2f45b01..25a7c771cfd8 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -203,7 +203,7 @@ int nfs_idmap_init(void) printk(KERN_NOTICE "NFS: Registering the %s key type\n", key_type_id_resolver.name); - cred = prepare_kernel_cred(NULL); + cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f0e69edf5f0f..4a9e8d17e56a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -870,7 +870,7 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r } else { struct cred *kcred; - kcred = prepare_kernel_cred(NULL); + kcred = prepare_kernel_cred(&init_task); if (!kcred) return NULL; diff --git a/kernel/cred.c b/kernel/cred.c index e10c15f51c1f..811ad654abd1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -701,9 +701,9 @@ void __init cred_init(void) * override a task's own credentials so that work can be done on behalf of that * task that requires a different subjective context. * - * @daemon is used to provide a base for the security record, but can be NULL. - * If @daemon is supplied, then the security data will be derived from that; - * otherwise they'll be set to 0 and no groups, full capabilities and no keys. + * @daemon is used to provide a base cred, with the security data derived from + * that; if this is "&init_task", they'll be set to 0, no groups, full + * capabilities, and no keys. * * The caller may change these controls afterwards if desired. * @@ -714,17 +714,16 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) const struct cred *old; struct cred *new; + if (WARN_ON_ONCE(!daemon)) + return NULL; + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; kdebug("prepare_kernel_cred() alloc %p", new); - if (daemon) - old = get_task_cred(daemon); - else - old = get_cred(&init_cred); - + old = get_task_cred(daemon); validate_creds(old); *new = *old; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 3aced951d5ab..01e54b46ae0b 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -337,7 +337,7 @@ static int __init init_dns_resolver(void) * this is used to prevent malicious redirections from being installed * with add_key(). */ - cred = prepare_kernel_cred(NULL); + cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; From e1789d7c752ed001cf1a4bbbd624f70a7dd3c6db Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 25 Oct 2022 00:30:23 -0700 Subject: [PATCH 0733/4122] kbuild: upgrade the orphan section warning to an error if CONFIG_WERROR is set Andrew Cooper suggested upgrading the orphan section warning to a hard link error. However Nathan Chancellor said outright turning the warning into an error with no escape hatch might be too aggressive, as we have had these warnings triggered by new compiler generated sections, and suggested turning orphan sections into an error only if CONFIG_WERROR is set. Kees Cook echoed and emphasized that the mandate from Linus is that we should avoid breaking builds. It wrecks bisection, it causes problems across compiler versions, etc. Thus upgrade the orphan section warning to a hard link error only if CONFIG_WERROR is set. Suggested-by: Andrew Cooper Suggested-by: Nathan Chancellor Signed-off-by: Xin Li Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221025073023.16137-2-xin3.li@intel.com --- Makefile | 2 +- arch/arm/boot/compressed/Makefile | 2 +- arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso32/Makefile | 2 +- arch/x86/boot/compressed/Makefile | 2 +- init/Kconfig | 15 ++++++++++++--- 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index f41ec8c8426b..9a496bef3170 100644 --- a/Makefile +++ b/Makefile @@ -1118,7 +1118,7 @@ endif # We never want expected sections to be placed heuristically by the # linker. All sections should be explicitly named in the linker script. ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Align the bit size of userspace programs with the kernel diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 41bcbb460fac..53cadc3aaff1 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -123,7 +123,7 @@ LDFLAGS_vmlinux += --no-undefined LDFLAGS_vmlinux += -X # Report orphan sections ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 619e2dc7ee14..beaf9586338f 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -27,7 +27,7 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN - ldflags-y += --orphan-handling=warn + ldflags-y += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif ldflags-y += -T diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66cad25..f59bd1a4ead6 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -104,7 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 -VDSO_LDFLAGS += --orphan-handling=warn +VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3a261abb6d15..66b8a8cb5a0f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -68,7 +68,7 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) diff --git a/init/Kconfig b/init/Kconfig index 694f7c160c9c..bb1225ef04e7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -159,10 +159,12 @@ config WERROR help A kernel build should not cause any compiler warnings, and this enables the '-Werror' (for C) and '-Dwarnings' (for Rust) flags - to enforce that rule by default. + to enforce that rule by default. Certain warnings from other tools + such as the linker may be upgraded to errors with this option as + well. - However, if you have a new (or very old) compiler with odd and - unusual warnings, or you have some architecture with problems, + However, if you have a new (or very old) compiler or linker with odd + and unusual warnings, or you have some architecture with problems, you may need to disable this config option in order to successfully build the kernel. @@ -1454,6 +1456,13 @@ config LD_ORPHAN_WARN def_bool y depends on ARCH_WANT_LD_ORPHAN_WARN depends on $(ld-option,--orphan-handling=warn) + depends on $(ld-option,--orphan-handling=error) + +config LD_ORPHAN_WARN_LEVEL + string + depends on LD_ORPHAN_WARN + default "error" if WERROR + default "warn" config SYSCTL bool From cd536db050993f7c220a6cfb01de5356032b6f8e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 02:10:11 -0700 Subject: [PATCH 0734/4122] dma-buf: Proactively round up to kmalloc bucket size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of discovering the kmalloc bucket size _after_ allocation, round up proactively so the allocation is explicitly made for the full size, allowing the compiler to correctly reason about the resulting size of the buffer through the existing __alloc_size() hint. Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Christian König Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221018090858.never.941-kees@kernel.org --- drivers/dma-buf/dma-resv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index e3885c90a3ac..1c76aed8e262 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -98,12 +98,17 @@ static void dma_resv_list_set(struct dma_resv_list *list, static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences) { struct dma_resv_list *list; + size_t size; - list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL); + /* Round up to the next kmalloc bucket size. */ + size = kmalloc_size_roundup(struct_size(list, table, max_fences)); + + list = kmalloc(size, GFP_KERNEL); if (!list) return NULL; - list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) / + /* Given the resulting bucket size, recalculated max_fences. */ + list->max_fences = (size - offsetof(typeof(*list), table)) / sizeof(*list->table); return list; From 905889bc6c842d18f369bf2834cf7219f32709ae Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Sep 2022 13:28:13 -0700 Subject: [PATCH 0735/4122] btrfs: send: Proactively round up to kmalloc bucket size Instead of discovering the kmalloc bucket size _after_ allocation, round up proactively so the allocation is explicitly made for the full size, allowing the compiler to correctly reason about the resulting size of the buffer through the existing __alloc_size() hint. Cc: Chris Mason Cc: Josef Bacik Cc: linux-btrfs@vger.kernel.org Acked-by: David Sterba Link: https://lore.kernel.org/lkml/20220922133014.GI32411@suse.cz Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220923202822.2667581-8-keescook@chromium.org --- fs/btrfs/send.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ef4167072b8..f53e8049473d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -438,6 +438,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) path_len = p->end - p->start; old_buf_len = p->buf_len; + /* + * Allocate to the next largest kmalloc bucket size, to let + * the fast path happen most of the time. + */ + len = kmalloc_size_roundup(len); /* * First time the inline_buf does not suffice */ @@ -451,11 +456,7 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) if (!tmp_buf) return -ENOMEM; p->buf = tmp_buf; - /* - * The real size of the buffer is bigger, this will let the fast path - * happen most of the time - */ - p->buf_len = ksize(p->buf); + p->buf_len = len; if (p->reversed) { tmp_buf = p->buf + old_buf_len - path_len - 1; From 6dd142d9013ca82155d0c069434c60a0d5755ec0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 Sep 2022 14:13:05 -0700 Subject: [PATCH 0736/4122] coredump: Proactively round up to kmalloc bucket size Instead of discovering the kmalloc bucket size _after_ allocation, round up proactively so the allocation is explicitly made for the full size, allowing the compiler to correctly reason about the resulting size of the buffer through the existing __alloc_size() hint. Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook --- fs/coredump.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 7bad7785e8e6..97eaee325251 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -68,7 +68,10 @@ struct core_name { static int expand_corename(struct core_name *cn, int size) { - char *corename = krealloc(cn->corename, size, GFP_KERNEL); + char *corename; + + size = kmalloc_size_roundup(size); + corename = krealloc(cn->corename, size, GFP_KERNEL); if (!corename) return -ENOMEM; @@ -76,7 +79,7 @@ static int expand_corename(struct core_name *cn, int size) if (size > core_name_size) /* racy but harmless */ core_name_size = size; - cn->size = ksize(corename); + cn->size = size; cn->corename = corename; return 0; } From 79218fd0b38bb05e8dcb80a49342836274046432 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Oct 2022 16:00:45 -0700 Subject: [PATCH 0737/4122] iommu/amd: Drop unnecessary checks in amd_iommu_attach_device() The same checks are done in amd_iommu_probe_device(). If any of them fails there, then the device won't get a group, so there's no way for it to even reach amd_iommu_attach_device anymore. Link: https://lore.kernel.org/r/c054654a81f2b675c73108fe4bf10e45335a721a.1666042872.git.nicolinc@nvidia.com Suggested-by: Robin Murphy Cc: Joerg Roedel Cc: Suravee Suthikulpanit Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/amd/iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3b39d0416fa..45299eb7e8e3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2155,21 +2155,13 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; + struct amd_iommu *iommu = rlookup_amd_iommu(dev); int ret; - if (!check_device(dev)) - return -EINVAL; - - dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return -EINVAL; - if (dev_data->domain) detach_device(dev); From 00208852d351ca6e4a8b9ff0c5376fa3a8ed8eaa Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Oct 2022 16:01:22 -0700 Subject: [PATCH 0738/4122] iommu: Add return value rules to attach_dev op and APIs Cases like VFIO wish to attach a device to an existing domain that was not allocated specifically from the device. This raises a condition where the IOMMU driver can fail the domain attach because the domain and device are incompatible with each other. This is a soft failure that can be resolved by using a different domain. Provide a dedicated errno EINVAL from the IOMMU driver during attach that the reason why the attach failed is because of domain incompatibility. VFIO can use this to know that the attach is a soft failure and it should continue searching. Otherwise, the attach will be a hard failure and VFIO will return the code to userspace. Update kdocs to add rules of return value to the attach_dev op and APIs. Link: https://lore.kernel.org/r/bd56d93c18621104a0fa1b0de31e9b760b81b769.1666042872.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu.c | 24 ++++++++++++++++++++++++ include/linux/iommu.h | 12 ++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 65a3b3d886dc..972731f0b328 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1949,6 +1949,18 @@ static int __iommu_attach_device(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_device - Attach an IOMMU domain to a device + * @domain: IOMMU domain to attach + * @dev: Device that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the device. In this case attaching a different domain to the + * device may succeed. + */ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2075,6 +2087,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_group - Attach an IOMMU domain to an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the group. In this case attaching a different domain to the + * group may succeed. + */ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { int ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3c9da1f8979e..857898d102b3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -266,6 +266,18 @@ struct iommu_ops { /** * struct iommu_domain_ops - domain specific operations * @attach_dev: attach an iommu domain to a device + * Return: + * * 0 - success + * * EINVAL - can indicate that device and domain are incompatible due to + * some previous configuration of the domain, in which case the + * driver shouldn't log an error, since it is legitimate for a + * caller to test reuse of existing domains. Otherwise, it may + * still represent some other fundamental problem + * * ENOMEM - out of memory + * * ENOSPC - non-ENOMEM type of resource allocation failures + * * EBUSY - device is attached to a domain and cannot be changed + * * ENODEV - device specific errors, not able to be attached + * * - treated as ENODEV by the caller. Use is discouraged * @detach_dev: detach an iommu domain from a device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to From bd7ebb7719356d750b1b4d671535922bae43fb3b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Oct 2022 16:02:13 -0700 Subject: [PATCH 0739/4122] iommu: Regulate EINVAL in ->attach_dev callback functions Following the new rules in include/linux/iommu.h kdocs, EINVAL now can be used to indicate that domain and device are incompatible by a caller that treats it as a soft failure and tries attaching to another domain. On the other hand, there are ->attach_dev callback functions returning it for obvious device-specific errors. They will result in some inefficiency in the caller handling routine. Update these places to corresponding errnos following the new rules. Link: https://lore.kernel.org/r/5924c03bea637f05feb2a20d624bae086b555ec5.1666042872.git.nicolinc@nvidia.com Reviewed-by: Jean-Philippe Brucker Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/fsl_pamu.c | 2 +- drivers/iommu/fsl_pamu_domain.c | 4 ++-- drivers/iommu/intel/pasid.c | 6 ++++-- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/omap-iommu.c | 4 ++-- drivers/iommu/virtio-iommu.c | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 0d03f837a5d4..2eb3211c8167 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -211,7 +211,7 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot) ppaace->op_encode.index_ot.omi = omi; } else if (~omi != 0) { pr_debug("bad operation mapping index: %d\n", omi); - return -EINVAL; + return -ENODEV; } /* configure stash id */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index fa20f4b03e12..4408ac3c49b6 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -258,7 +258,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn = of_get_property(dev->of_node, "fsl,liodn", &len); if (!liodn) { pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); - return -EINVAL; + return -ENODEV; } spin_lock_irqsave(&dma_domain->domain_lock, flags); @@ -267,7 +267,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, if (liodn[i] >= PAACE_NUMBER_ENTRIES) { pr_debug("Invalid liodn %d, attach device failed for %pOF\n", liodn[i], dev->of_node); - ret = -EINVAL; + ret = -ENODEV; break; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c30ddac40ee5..95d73f19ab61 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -101,8 +101,10 @@ int intel_pasid_alloc_table(struct device *dev) might_sleep(); info = dev_iommu_priv_get(dev); - if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) - return -EINVAL; + if (WARN_ON(!info || !dev_is_pci(dev))) + return -ENODEV; + if (WARN_ON(info->pasid_table)) + return -EEXIST; pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ab2ecfe01f8..eda441d0c6b6 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -609,7 +609,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); if (!dom->iop) { dev_err(data->dev, "Failed to alloc io pgtable\n"); - return -EINVAL; + return -ENOMEM; } /* Update our support page sizes bitmap */ diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 07ee2600113c..3f153f9e0ac5 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1414,7 +1414,7 @@ static int omap_iommu_attach_init(struct device *dev, odomain->num_iommus = omap_iommu_count(dev); if (!odomain->num_iommus) - return -EINVAL; + return -ENODEV; odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), GFP_ATOMIC); @@ -1464,7 +1464,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); - return -EINVAL; + return -ENODEV; } spin_lock(&omap_domain->lock); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 8b1b5c270e50..0b64e7f64e68 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -670,7 +670,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, dev_err(vdev->dev, "granule 0x%lx larger than system page size 0x%lx\n", viommu_page_size, PAGE_SIZE); - return -EINVAL; + return -ENODEV; } ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, From f4a14773579302e5f0c4bf80b03f0db7ce67f2ce Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Oct 2022 16:02:21 -0700 Subject: [PATCH 0740/4122] iommu: Use EINVAL for incompatible device/domain in ->attach_dev Following the new rules in include/linux/iommu.h kdocs, update all drivers ->attach_dev callback functions to return EINVAL in the failure paths that are related to domain incompatibility. Also, drop adjacent error prints to prevent a kernel log spam. Link: https://lore.kernel.org/r/f52a07f7320da94afe575c9631340d0019a203a7.1666042873.git.nicolinc@nvidia.com Reviewed-by: Jean-Philippe Brucker Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 11 +---------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 7 +------ drivers/iommu/intel/iommu.c | 10 +++------- drivers/iommu/ipmmu-vmsa.c | 2 -- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/sprd-iommu.c | 4 +--- drivers/iommu/tegra-gart.c | 2 +- drivers/iommu/virtio-iommu.c | 3 +-- 9 files changed, 9 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c46..8b0a1e476d44 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2430,23 +2430,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } } else if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s (upstream of %s)\n", - dev_name(smmu_domain->smmu->dev), - dev_name(smmu->dev)); - ret = -ENXIO; + ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { - dev_err(dev, - "cannot attach to incompatible domain (%u SSID bits != %u)\n", - smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && smmu_domain->stall_enabled != master->stall_enabled) { - dev_err(dev, "cannot attach to stall-%s domain\n", - smmu_domain->stall_enabled ? "enabled" : "disabled"); ret = -EINVAL; goto out_unlock; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 30dab1418e3f..719fbca1fe52 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1150,9 +1150,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) * different SMMUs. */ if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", - dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); ret = -EINVAL; goto rpm_put; } diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 3869c3ecda8c..bfd7b51eb5db 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -381,13 +381,8 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev * Sanity check the domain. We don't support domains across * different IOMMUs. */ - if (qcom_domain->iommu != qcom_iommu) { - dev_err(dev, "cannot attach to IOMMU %s while already " - "attached to domain on IOMMU %s\n", - dev_name(qcom_domain->iommu->dev), - dev_name(qcom_iommu->dev)); + if (qcom_domain->iommu != qcom_iommu) return -EINVAL; - } return 0; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 48cdcd0a5cf3..6f1a59206d2e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4194,19 +4194,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, return -ENODEV; if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) - return -EOPNOTSUPP; + return -EINVAL; /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) addr_width = cap_mgaw(iommu->cap); - if (dmar_domain->max_addr > (1LL << addr_width)) { - dev_err(dev, "%s: iommu width (%d) is not " - "sufficient for the mapped address (%llx)\n", - __func__, addr_width, dmar_domain->max_addr); - return -EFAULT; - } + if (dmar_domain->max_addr > (1LL << addr_width)) + return -EINVAL; dmar_domain->gaw = addr_width; /* diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3b30c0752274..22230cc15dcd 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -628,8 +628,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, * Something is wrong, we can't attach two devices using * different IOMMUs to the same domain. */ - dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", - dev_name(mmu->dev), dev_name(domain->mmu->dev)); ret = -EINVAL; } else dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 3f153f9e0ac5..2fd7702c6709 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1472,7 +1472,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) /* only a single client device can be attached to a domain */ if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); - ret = -EBUSY; + ret = -EINVAL; goto out; } diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index fadd2c907222..e02793375598 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -237,10 +237,8 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, struct sprd_iommu_domain *dom = to_sprd_domain(domain); size_t pgt_size = sprd_iommu_pgt_size(domain); - if (dom->sdev) { - pr_err("There's already a device attached to this domain.\n"); + if (dom->sdev) return -EINVAL; - } dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); if (!dom->pgt_va) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index e5ca3cf1a949..ed53279d1106 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -112,7 +112,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, spin_lock(&gart->dom_lock); if (gart->active_domain && gart->active_domain != domain) { - ret = -EBUSY; + ret = -EINVAL; } else if (dev_iommu_priv_get(dev) != domain) { dev_iommu_priv_set(dev, domain); gart->active_domain = domain; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 0b64e7f64e68..8226b4da4350 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -734,8 +734,7 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) */ ret = viommu_domain_finalise(vdev, domain); } else if (vdomain->viommu != vdev->viommu) { - dev_err(dev, "cannot attach to foreign vIOMMU\n"); - ret = -EXDEV; + ret = -EINVAL; } mutex_unlock(&vdomain->mutex); From 04cee82e04d2aff3d177ef0021ecdff228daf7b8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Oct 2022 16:02:36 -0700 Subject: [PATCH 0741/4122] iommu: Propagate return value in ->attach_dev callback functions The mtk_iommu and virtio drivers have places in the ->attach_dev callback functions that return hardcode errnos instead of the returned values, but callers of these ->attach_dv callback functions may care. Propagate them directly without the extra conversions. Link: https://lore.kernel.org/r/ca8c5a447b87002334f83325f28823008b4ce420.1666042873.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jason Gunthorpe Reviewed-by: Yong Wu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/virtio-iommu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index eda441d0c6b6..b383c8327f9c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -668,7 +668,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); if (ret) { mutex_unlock(&dom->mutex); - return -ENODEV; + return ret; } dom->bank = &data->bank[bankid]; } diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 8226b4da4350..5b8fe9bfa9a5 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -697,7 +697,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, if (ret) { ida_free(&viommu->domain_ids, vdomain->id); vdomain->viommu = NULL; - return -EOPNOTSUPP; + return ret; } } From 91586ce0d39a05f88795aa8814fb99b1387236b3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 21 Oct 2022 10:34:22 +0800 Subject: [PATCH 0742/4122] f2fs: fix to invalidate dcc->f2fs_issue_discard in error path Syzbot reports a NULL pointer dereference issue as below: __refcount_add include/linux/refcount.h:193 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] get_task_struct include/linux/sched/task.h:110 [inline] kthread_stop+0x34/0x1c0 kernel/kthread.c:703 f2fs_stop_discard_thread+0x3c/0x5c fs/f2fs/segment.c:1638 kill_f2fs_super+0x5c/0x194 fs/f2fs/super.c:4522 deactivate_locked_super+0x70/0xe8 fs/super.c:332 deactivate_super+0xd0/0xd4 fs/super.c:363 cleanup_mnt+0x1f8/0x234 fs/namespace.c:1186 __cleanup_mnt+0x20/0x30 fs/namespace.c:1193 task_work_run+0xc4/0x14c kernel/task_work.c:177 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x26c/0xbe0 kernel/exit.c:795 do_group_exit+0x60/0xe8 kernel/exit.c:925 __do_sys_exit_group kernel/exit.c:936 [inline] __se_sys_exit_group kernel/exit.c:934 [inline] __wake_up_parent+0x0/0x40 kernel/exit.c:934 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 The root cause of this issue is in error path of f2fs_start_discard_thread(), it missed to invalidate dcc->f2fs_issue_discard, later kthread_stop() may access invalid pointer. Fixes: 4d67490498ac ("f2fs: Don't create discard thread when device doesn't support realtime discard") Reported-by: syzbot+035a381ea1afb63f098d@syzkaller.appspotmail.com Reported-by: syzbot+729c925c2d9fc495ddee@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index acf3d3fa4363..7a4f7c88b8b9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2025,8 +2025,10 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(dcc->f2fs_issue_discard)) + if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); + dcc->f2fs_issue_discard = NULL; + } return err; } From 18792e64c86dd7e34ba28e4f61faba472b7bf5fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Oct 2022 23:09:28 +0800 Subject: [PATCH 0743/4122] f2fs: support fault injection for f2fs_is_valid_blkaddr() This patch supports to inject fault into f2fs_is_valid_blkaddr() to simulate accessing inconsistent data/meta block addressses from caller. Usage: a) echo 262144 > /sys/fs/f2fs//inject_type or b) mount -o fault_type=262144 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 1 + fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + 4 files changed, 8 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 17df9a02ccff..b797e8ec96ed 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -199,6 +199,7 @@ fault_type=%d Support configuring fault injection type, should be FAULT_SLAB_ALLOC 0x000008000 FAULT_DQUOT_INIT 0x000010000 FAULT_LOCK_OP 0x000020000 + FAULT_BLKADDR 0x000040000 =================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0c82dae082aa..c00694a50222 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -171,6 +171,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { + if (time_to_inject(sbi, FAULT_BLKADDR)) { + f2fs_show_injection_info(sbi, FAULT_BLKADDR); + return false; + } + switch (type) { case META_NAT: break; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e6355a5683b7..f57cb49dc383 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -60,6 +60,7 @@ enum { FAULT_SLAB_ALLOC, FAULT_DQUOT_INIT, FAULT_LOCK_OP, + FAULT_BLKADDR, FAULT_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3834ead04620..df26fbe2bf58 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -61,6 +61,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_SLAB_ALLOC] = "slab alloc", [FAULT_DQUOT_INIT] = "dquot initialize", [FAULT_LOCK_OP] = "lock_op", + [FAULT_BLKADDR] = "invalid blkaddr", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, From 3688cbe39b7a9ef3feb73234fb351de33fd1da52 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 11:08:31 +0800 Subject: [PATCH 0744/4122] f2fs: remove batched_trim_sections node commit 377224c47118("f2fs: don't split checkpoint in fstrim") obsolete batch mode and related sysfs entry. Since this testing sysfs node has been deprecated for a long time, let's remove it. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 --- fs/f2fs/sysfs.c | 5 ----- 2 files changed, 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f57cb49dc383..e990870bdab9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1063,9 +1063,6 @@ struct f2fs_sm_info { /* a threshold to reclaim prefree segments */ unsigned int rec_prefree_segments; - /* for batched trimming */ - unsigned int trim_sections; /* # of sections to trim */ - struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index df27afd71ef4..926b7a844362 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -488,9 +488,6 @@ out: return -EINVAL; } - if (!strcmp(a->attr.name, "trim_sections")) - return -EINVAL; - if (!strcmp(a->attr.name, "gc_urgent")) { if (t == 0) { sbi->gc_mode = GC_NORMAL; @@ -790,7 +787,6 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); @@ -919,7 +915,6 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(max_discard_issue_time), ATTR_LIST(discard_granularity), ATTR_LIST(pending_discard), - ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), From 6359a1aaca527311b7145ec6eb16890a5ddf5214 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 14:50:24 +0800 Subject: [PATCH 0745/4122] f2fs: fix gc mode when gc_urgent_high_remaining is 1 Under the current logic, when gc_urgent_high_remaining is set to 1, the mode will be switched to normal at the beginning, instead of running in gc_urgent mode. Let's switch the gc mode back to normal when the gc ends. Fixes: 265576181b4a ("f2fs: remove gc_urgent_high_limited for cleanup") Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7b4be412cec0..d2e9c280773f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,16 +96,6 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT_HIGH) { - spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_remaining--; - if (!sbi->gc_urgent_high_remaining) - sbi->gc_mode = GC_NORMAL; - } - spin_unlock(&sbi->gc_urgent_high_lock); - } - if (sbi->gc_mode == GC_URGENT_HIGH || sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; @@ -162,6 +152,15 @@ do_gc: /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi, true); next: + if (sbi->gc_mode == GC_URGENT_HIGH) { + spin_lock(&sbi->gc_urgent_high_lock); + if (sbi->gc_urgent_high_remaining) { + sbi->gc_urgent_high_remaining--; + if (!sbi->gc_urgent_high_remaining) + sbi->gc_mode = GC_NORMAL; + } + spin_unlock(&sbi->gc_urgent_high_lock); + } sb_end_write(sbi->sb); } while (!kthread_should_stop()); From 44b9d01f2ee32884a7de270394b0fb7f75f87dba Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 16:05:26 +0800 Subject: [PATCH 0746/4122] f2fs: cleanup in f2fs_create_flush_cmd_control() Just cleanup for readable, no functional changes. Suggested-by: Chao Yu Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7a4f7c88b8b9..0df47ad80efb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -620,12 +620,12 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; - int err = 0; + int err; if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; if (fcc->f2fs_issue_flush) - return err; + return 0; goto init_thread; } @@ -638,7 +638,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; if (!test_opt(sbi, FLUSH_MERGE)) - return err; + return 0; init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, @@ -650,7 +650,7 @@ init_thread: return err; } - return err; + return 0; } void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) From b5f1a218ae5e4339130d6e733f0e63d623e09a2c Mon Sep 17 00:00:00 2001 From: Dongdong Zhang Date: Tue, 25 Oct 2022 17:40:36 +0800 Subject: [PATCH 0747/4122] f2fs: fix normal discard process In the DPOLICY_BG mode, there is a conflict between the two conditions "i + 1 < dpolicy->granularity" and "i < DEFAULT_DISCARD_GRANULARITY". If i = 15, the first condition is false, it will enter the second condition and dispatch all small granularity discards in function __issue_discard_cmd_orderly. The restrictive effect of the first condition to small discards will be invalidated. These two conditions should align. Fixes: 20ee4382322c ("f2fs: issue small discard by LBA order") Signed-off-by: Dongdong Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0df47ad80efb..38f6a2bcb158 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1448,7 +1448,7 @@ retry: if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy); pend_list = &dcc->pend_list[i]; From 6047de5482c33d5f912cdc907336fde9ebc5714e Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 01:54:01 +0800 Subject: [PATCH 0748/4122] f2fs: add barrier mount option This patch adds a mount option, barrier, in f2fs. The barrier option is the opposite of nobarrier. If this option is set, cache_flush commands are allowed to be issued. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 2 ++ fs/f2fs/super.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index b797e8ec96ed..6e67c5e6c7c3 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -154,6 +154,8 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +barrier If this option is set, cache_flush commands are allowed to be + issued. fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance can be sacrificed. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index df26fbe2bf58..a247027711d8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -111,6 +111,7 @@ enum { Opt_noinline_dentry, Opt_flush_merge, Opt_noflush_merge, + Opt_barrier, Opt_nobarrier, Opt_fastboot, Opt_extent_cache, @@ -187,6 +188,7 @@ static match_table_t f2fs_tokens = { {Opt_noinline_dentry, "noinline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_noflush_merge, "noflush_merge"}, + {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, @@ -807,6 +809,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_barrier: + clear_opt(sbi, NOBARRIER); + break; case Opt_fastboot: set_opt(sbi, FASTBOOT); break; @@ -1940,6 +1945,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + else + seq_puts(seq, ",barrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); if (test_opt(sbi, EXTENT_CACHE)) From a995627e6dd81d4485d40ce64880017a080d71e6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Oct 2022 16:00:35 -0700 Subject: [PATCH 0749/4122] f2fs: allow to set compression for inlined file The below commit disallows to set compression on empty created file which has a inline_data. Let's fix it. Fixes: 7165841d578e ("f2fs: fix to check inline_data during compressed inode conversion") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 82cda1258227..f96bbfa8b399 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1915,6 +1915,10 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (!f2fs_disable_compressed_file(inode)) return -EINVAL; } else { + /* try to convert inline_data to support compression */ + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; if (!f2fs_may_compress(inode)) return -EINVAL; if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) From c46867e9b9b8e0cdd6a5212c2b5ae616583a3bfd Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 16:32:26 +0800 Subject: [PATCH 0750/4122] f2fs: introduce max_ordered_discard sysfs node The current max_ordered_discard is a fixed value, change it to be configurable through the sys node. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/segment.c | 3 ++- fs/f2fs/sysfs.c | 11 +++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 483639fb727b..53f70eadec96 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -99,6 +99,12 @@ Description: Controls the issue rate of discard commands that consist of small checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. +What: /sys/fs/f2fs//max_ordered_discard +Date: October 2022 +Contact: "Yangtao Li" +Description: Controls the maximum ordered discard, the unit size is one block(4KB). + Set it to 16 by default. + What: /sys/fs/f2fs//max_discard_request Date: December 2021 Contact: "Konstantin Vyshetsky" diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e990870bdab9..fa8dc00dfb2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -331,6 +331,8 @@ struct discard_entry { /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +/* default maximum discard granularity of ordered discard, unit: block count */ +#define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY 16 /* max discard pend list number */ #define MAX_PLIST_NUM 512 @@ -410,6 +412,7 @@ struct discard_cmd_control { unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ unsigned int max_discard_issue_time; /* max. interval between discard issue */ unsigned int discard_granularity; /* discard granularity */ + unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 38f6a2bcb158..c470b443615f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1448,7 +1448,7 @@ retry: if (i + 1 < dpolicy->granularity) break; - if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy); pend_list = &dcc->pend_list[i]; @@ -2048,6 +2048,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) dcc->discard_granularity = sbi->blocks_per_seg; else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 926b7a844362..8095345ebdad 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -483,6 +483,15 @@ out: return count; } + if (!strcmp(a->attr.name, "max_ordered_discard")) { + if (t == 0 || t > MAX_PLIST_NUM) + return -EINVAL; + if (!f2fs_block_unit_discard(sbi)) + return -EINVAL; + *ui = t; + return count; + } + if (!strcmp(a->attr.name, "migration_granularity")) { if (t == 0 || t > sbi->segs_per_sec) return -EINVAL; @@ -786,6 +795,7 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); @@ -914,6 +924,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(mid_discard_issue_time), ATTR_LIST(max_discard_issue_time), ATTR_LIST(discard_granularity), + ATTR_LIST(max_ordered_discard), ATTR_LIST(pending_discard), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), From a5029a57a2f3f2e2711f4ac2d876c3c83d1758fe Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Thu, 27 Oct 2022 20:01:05 +0900 Subject: [PATCH 0751/4122] f2fs: Fix typo in comments Change "truncateion" to "truncation". Signed-off-by: Keoseong Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f96bbfa8b399..c605a4f2bce2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -571,7 +571,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + base + ofs; - /* Assumption: truncateion starts with cluster */ + /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { block_t blkaddr = le32_to_cpu(*addr); From 0db18eec0d9a7ee525209e31e3ac2f673545b12f Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 27 Oct 2022 14:42:40 +0530 Subject: [PATCH 0752/4122] f2fs: fix the assign logic of iocb commit 18ae8d12991b ("f2fs: show more DIO information in tracepoint") introduces iocb field in 'f2fs_direct_IO_enter' trace event And it only assigns the pointer and later it accesses its field in trace print log. Unable to handle kernel paging request at virtual address ffffffc04cef3d30 Mem abort info: ESR = 0x96000007 EC = 0x25: DABT (current EL), IL = 32 bits pc : trace_raw_output_f2fs_direct_IO_enter+0x54/0xa4 lr : trace_raw_output_f2fs_direct_IO_enter+0x2c/0xa4 sp : ffffffc0443cbbd0 x29: ffffffc0443cbbf0 x28: ffffff8935b120d0 x27: ffffff8935b12108 x26: ffffff8935b120f0 x25: ffffff8935b12100 x24: ffffff8935b110c0 x23: ffffff8935b10000 x22: ffffff88859a936c x21: ffffff88859a936c x20: ffffff8935b110c0 x19: ffffff8935b10000 x18: ffffffc03b195060 x17: ffffff8935b11e76 x16: 00000000000000cc x15: ffffffef855c4f2c x14: 0000000000000001 x13: 000000000000004e x12: ffff0000ffffff00 x11: ffffffef86c350d0 x10: 00000000000010c0 x9 : 000000000fe0002c x8 : ffffffc04cef3d28 x7 : 7f7f7f7f7f7f7f7f x6 : 0000000002000000 x5 : ffffff8935b11e9a x4 : 0000000000006250 x3 : ffff0a00ffffff04 x2 : 0000000000000002 x1 : ffffffef86a0a31f x0 : ffffff8935b10000 Call trace: trace_raw_output_f2fs_direct_IO_enter+0x54/0xa4 print_trace_fmt+0x9c/0x138 print_trace_line+0x154/0x254 tracing_read_pipe+0x21c/0x380 vfs_read+0x108/0x3ac ksys_read+0x7c/0xec __arm64_sys_read+0x20/0x30 invoke_syscall+0x60/0x150 el0_svc_common.llvm.1237943816091755067+0xb8/0xf8 do_el0_svc+0x28/0xa0 Fix it by copying the required variables for printing and while at it fix the similar issue at some other places in the same file. Fixes: bd984c03097b ("f2fs: show more DIO information in tracepoint") Signed-off-by: Mukesh Ojha Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index c6b372401c27..ff57e7f9914c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -322,7 +322,7 @@ TRACE_EVENT(f2fs_unlink_enter, __field(ino_t, ino) __field(loff_t, size) __field(blkcnt_t, blocks) - __field(const char *, name) + __string(name, dentry->d_name.name) ), TP_fast_assign( @@ -330,7 +330,7 @@ TRACE_EVENT(f2fs_unlink_enter, __entry->ino = dir->i_ino; __entry->size = dir->i_size; __entry->blocks = dir->i_blocks; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); ), TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " @@ -338,7 +338,7 @@ TRACE_EVENT(f2fs_unlink_enter, show_dev_ino(__entry), __entry->size, (unsigned long long)__entry->blocks, - __entry->name) + __get_str(name)) ); DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, @@ -940,25 +940,29 @@ TRACE_EVENT(f2fs_direct_IO_enter, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(struct kiocb *, iocb) + __field(loff_t, ki_pos) + __field(int, ki_flags) + __field(u16, ki_ioprio) __field(unsigned long, len) __field(int, rw) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->iocb = iocb; - __entry->len = len; - __entry->rw = rw; + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ki_pos = iocb->ki_pos; + __entry->ki_flags = iocb->ki_flags; + __entry->ki_ioprio = iocb->ki_ioprio; + __entry->len = len; + __entry->rw = rw; ), TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_ioprio = %x rw = %d", show_dev_ino(__entry), - __entry->iocb->ki_pos, + __entry->ki_pos, __entry->len, - __entry->iocb->ki_flags, - __entry->iocb->ki_ioprio, + __entry->ki_flags, + __entry->ki_ioprio, __entry->rw) ); @@ -1407,19 +1411,19 @@ TRACE_EVENT(f2fs_write_checkpoint, TP_STRUCT__entry( __field(dev_t, dev) __field(int, reason) - __field(char *, msg) + __string(dest_msg, msg) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->reason = reason; - __entry->msg = msg; + __assign_str(dest_msg, msg); ), TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", show_dev(__entry->dev), show_cpreason(__entry->reason), - __entry->msg) + __get_str(dest_msg)) ); DECLARE_EVENT_CLASS(f2fs_discard, From 195623f2d8e9361eaddec071ad298998ec0590ba Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 27 Oct 2022 14:42:41 +0530 Subject: [PATCH 0753/4122] f2fs: fix the msg data type Data type of msg in f2fs_write_checkpoint trace should be const char * instead of char *. Signed-off-by: Mukesh Ojha Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ff57e7f9914c..7fbfce498472 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1404,7 +1404,7 @@ TRACE_EVENT(f2fs_readpages, TRACE_EVENT(f2fs_write_checkpoint, - TP_PROTO(struct super_block *sb, int reason, char *msg), + TP_PROTO(struct super_block *sb, int reason, const char *msg), TP_ARGS(sb, reason, msg), From 146dbcbf17a6d07169e75224d949cc2670de2e20 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 27 Oct 2022 18:24:46 +0800 Subject: [PATCH 0754/4122] f2fs: fix return val in f2fs_start_ckpt_thread() Return PTR_ERR(cprc->f2fs_issue_ckpt) instead of -ENOMEM; Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +++- fs/f2fs/gc.c | 15 +++++++-------- fs/f2fs/segment.c | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c00694a50222..56f7d0d6a8b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1902,8 +1902,10 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi) cprc->f2fs_issue_ckpt = kthread_run(issue_checkpoint_thread, sbi, "f2fs_ckpt-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(cprc->f2fs_issue_ckpt)) { + int err = PTR_ERR(cprc->f2fs_issue_ckpt); + cprc->f2fs_issue_ckpt = NULL; - return -ENOMEM; + return err; } set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d2e9c280773f..15f56859966c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -171,13 +171,10 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; - int err = 0; gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) { - err = -ENOMEM; - goto out; - } + if (!gc_th) + return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; @@ -192,12 +189,14 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { - err = PTR_ERR(gc_th->f2fs_gc_task); + int err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); sbi->gc_thread = NULL; + return err; } -out: - return err; + + return 0; } void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c470b443615f..c4270cd6eaab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -620,7 +620,6 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; - int err; if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; @@ -644,7 +643,8 @@ init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); + int err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; From 7b02b2201893a71b881026cf574902019ab00db5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Oct 2022 17:30:26 +0800 Subject: [PATCH 0755/4122] f2fs: fix to destroy sbi->post_read_wq in error path of f2fs_fill_super() In error path of f2fs_fill_super(), this patch fixes to call f2fs_destroy_post_read_wq() once if we fail in f2fs_start_ckpt_thread(). Fixes: 261eeb9c1585 ("f2fs: introduce checkpoint_merge mount option") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a247027711d8..e6365f040171 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4531,9 +4531,9 @@ free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); - f2fs_destroy_post_read_wq(sbi); stop_ckpt_thread: f2fs_stop_ckpt_thread(sbi); + f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); kvfree(sbi->ckpt); From a3951cd199a5d26138532d4e55af41262237632e Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 11:32:16 +0800 Subject: [PATCH 0756/4122] f2fs: introduce gc_mode sysfs node Revert "f2fs: make gc_urgent and gc_segment_mode sysfs node readable". Add a gc_mode sysfs node to show the current gc_mode as a string. Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/sysfs.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 53f70eadec96..ef2b3572ba18 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -640,3 +640,9 @@ Date: July 2022 Contact: "Daeho Jeong" Description: Show the accumulated total revoked atomic write block count after boot. If you write "0" here, you can initialize to "0". + +What: /sys/fs/f2fs//gc_mode +Date: October 2022 +Contact: "Yangtao Li" +Description: Show the current gc_mode as a string. + This is a read-only entry. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fa8dc00dfb2b..662b27c19de1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1319,6 +1319,7 @@ enum { MAX_TIME, }; +/* Note that you need to keep synchronization with this gc_mode_names array */ enum { GC_NORMAL, GC_IDLE_CB, diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8095345ebdad..1fbd41c48328 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -143,6 +143,12 @@ static ssize_t pending_discard_show(struct f2fs_attr *a, &SM_I(sbi)->dcc_info->discard_cmd_cnt)); } +static ssize_t gc_mode_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%s\n", gc_mode_names[sbi->gc_mode]); +} + static ssize_t features_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -332,13 +338,8 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return sysfs_emit(buf, "%u\n", sbi->compr_new_inode); #endif - if (!strcmp(a->attr.name, "gc_urgent")) - return sysfs_emit(buf, "%s\n", - gc_mode_names[sbi->gc_mode]); - if (!strcmp(a->attr.name, "gc_segment_mode")) - return sysfs_emit(buf, "%s\n", - gc_mode_names[sbi->gc_segment_mode]); + return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode); if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { return sysfs_emit(buf, "%u\n", @@ -844,6 +845,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -926,6 +928,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(discard_granularity), ATTR_LIST(max_ordered_discard), ATTR_LIST(pending_discard), + ATTR_LIST(gc_mode), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), From 23ddc81b087c8bd9a73272afa076e204dc2e5410 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Oct 2022 09:49:53 -0700 Subject: [PATCH 0757/4122] f2fs: use sysfs_emit instead of sprintf Let's use sysfs_emit. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 1fbd41c48328..af23ed6121b0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -95,28 +95,28 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) static ssize_t dirty_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(dirty_segments(sbi))); } static ssize_t free_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(free_segments(sbi))); } static ssize_t ovp_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(overprovision_segments(sbi))); } static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + ((f2fs_get_sectors_written(sbi) - sbi->sectors_written_start) >> 1))); @@ -125,13 +125,13 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, static ssize_t sb_status_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%lx\n", sbi->s_flag); + return sysfs_emit(buf, "%lx\n", sbi->s_flag); } static ssize_t cp_status_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags)); + return sysfs_emit(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags)); } static ssize_t pending_discard_show(struct f2fs_attr *a, @@ -139,7 +139,7 @@ static ssize_t pending_discard_show(struct f2fs_attr *a, { if (!SM_I(sbi)->dcc_info) return -EINVAL; - return sprintf(buf, "%llu\n", (unsigned long long)atomic_read( + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read( &SM_I(sbi)->dcc_info->discard_cmd_cnt)); } @@ -205,7 +205,7 @@ static ssize_t features_show(struct f2fs_attr *a, static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%u\n", sbi->current_reserved_blocks); + return sysfs_emit(buf, "%u\n", sbi->current_reserved_blocks); } static ssize_t unusable_show(struct f2fs_attr *a, @@ -217,7 +217,7 @@ static ssize_t unusable_show(struct f2fs_attr *a, unusable = sbi->unusable_block_count; else unusable = f2fs_get_unusable_blocks(sbi); - return sprintf(buf, "%llu\n", (unsigned long long)unusable); + return sysfs_emit(buf, "%llu\n", (unsigned long long)unusable); } static ssize_t encoding_show(struct f2fs_attr *a, @@ -232,13 +232,13 @@ static ssize_t encoding_show(struct f2fs_attr *a, (sb->s_encoding->version >> 8) & 0xff, sb->s_encoding->version & 0xff); #endif - return sprintf(buf, "(none)"); + return sysfs_emit(buf, "(none)"); } static ssize_t mounted_time_sec_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time); + return sysfs_emit(buf, "%llu", SIT_I(sbi)->mounted_time); } #ifdef CONFIG_F2FS_STAT_FS @@ -247,7 +247,7 @@ static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, { struct f2fs_stat_info *si = F2FS_STAT(sbi); - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->tot_blks - (si->bg_data_blks + si->bg_node_blks))); } @@ -257,7 +257,7 @@ static ssize_t moved_blocks_background_show(struct f2fs_attr *a, { struct f2fs_stat_info *si = F2FS_STAT(sbi); - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->bg_data_blks + si->bg_node_blks)); } @@ -268,7 +268,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, si->dirty_count = dirty_segments(sbi); f2fs_update_sit_info(sbi); - return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); } #endif @@ -363,7 +363,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, ui = (unsigned int *)(ptr + a->offset); - return sprintf(buf, "%u\n", *ui); + return sysfs_emit(buf, "%u\n", *ui); } static ssize_t __sbi_store(struct f2fs_attr *a, @@ -728,7 +728,7 @@ static void f2fs_sb_release(struct kobject *kobj) static ssize_t f2fs_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "supported\n"); + return sysfs_emit(buf, "supported\n"); } #define F2FS_FEATURE_RO_ATTR(_name) \ @@ -741,8 +741,8 @@ static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { if (F2FS_HAS_FEATURE(sbi, a->id)) - return sprintf(buf, "supported\n"); - return sprintf(buf, "unsupported\n"); + return sysfs_emit(buf, "supported\n"); + return sysfs_emit(buf, "unsupported\n"); } #define F2FS_SB_FEATURE_RO_ATTR(_name, _feat) \ From eebd36a408bb6fc5d7adbb4b8c6db993d0a850f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Oct 2022 10:07:13 -0700 Subject: [PATCH 0758/4122] f2fs: add missing bracket in doc Let's add missing <>. Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index ef2b3572ba18..a6a60268dcc5 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -241,7 +241,7 @@ Description: Shows total written kbytes issued to disk. What: /sys/fs/f2fs//features Date: July 2017 Contact: "Jaegeuk Kim" -Description: /feature_list/ +Description: /feature_list/> Shows all enabled features in current device. Supported features: encryption, blkzoned, extra_attr, projquota, inode_checksum, From e5a0db6a9e2eafe50e3ebc73a8285ae561e7d850 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 14:50:25 +0800 Subject: [PATCH 0759/4122] f2fs: replace gc_urgent_high_remaining with gc_remaining_trials The user can set the trial count limit for GC urgent and idle mode with replaced gc_remaining_trials.. If GC thread gets to the limit, the mode will turn back to GC normal mode finally. It was applied only to GC_URGENT, while this patch expands it for GC_IDLE. Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++---- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/gc.c | 12 ++++++------ fs/f2fs/super.c | 2 +- fs/f2fs/sysfs.c | 12 ++++++------ 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a6a60268dcc5..24e7cb77f265 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -598,10 +598,10 @@ Description: With "mode=fragment:block" mount options, we can scatter block allo in the length of 1.. by turns. This value can be set between 1..512 and the default value is 4. -What: /sys/fs/f2fs//gc_urgent_high_remaining -Date: December 2021 -Contact: "Daeho Jeong" -Description: You can set the trial count limit for GC urgent high mode with this value. +What: /sys/fs/f2fs//gc_remaining_trials +Date: October 2022 +Contact: "Yangtao Li" +Description: You can set the trial count limit for GC urgent and idle mode with this value. If GC thread gets to the limit, the mode will turn back to GC normal mode. By default, the value is zero, which means there is no limit like before. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 662b27c19de1..04ef4cce3d7f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1736,8 +1736,9 @@ struct f2fs_sb_info { unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ - spinlock_t gc_urgent_high_lock; - unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */ + spinlock_t gc_remaining_trials_lock; + /* remaining trial count for GC_URGENT_* and GC_IDLE_* */ + unsigned int gc_remaining_trials; /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 15f56859966c..6466db75af5d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -152,14 +152,14 @@ do_gc: /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi, true); next: - if (sbi->gc_mode == GC_URGENT_HIGH) { - spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_remaining--; - if (!sbi->gc_urgent_high_remaining) + if (sbi->gc_mode != GC_NORMAL) { + spin_lock(&sbi->gc_remaining_trials_lock); + if (sbi->gc_remaining_trials) { + sbi->gc_remaining_trials--; + if (!sbi->gc_remaining_trials) sbi->gc_mode = GC_NORMAL; } - spin_unlock(&sbi->gc_urgent_high_lock); + spin_unlock(&sbi->gc_remaining_trials_lock); } sb_end_write(sbi->sb); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e6365f040171..a43d8a46a6e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3624,7 +3624,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; - spin_lock_init(&sbi->gc_urgent_high_lock); + spin_lock_init(&sbi->gc_remaining_trials_lock); atomic64_set(&sbi->current_atomic_write, 0); sbi->dir_level = DEF_DIR_LEVEL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index af23ed6121b0..032c03e09580 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -538,10 +538,10 @@ out: return count; } - if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) { - spin_lock(&sbi->gc_urgent_high_lock); - sbi->gc_urgent_high_remaining = t; - spin_unlock(&sbi->gc_urgent_high_lock); + if (!strcmp(a->attr.name, "gc_remaining_trials")) { + spin_lock(&sbi->gc_remaining_trials_lock); + sbi->gc_remaining_trials = t; + spin_unlock(&sbi->gc_remaining_trials_lock); return count; } @@ -832,7 +832,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_remaining_trials, gc_remaining_trials); F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); @@ -961,7 +961,7 @@ static struct attribute *f2fs_attrs[] = { #endif ATTR_LIST(data_io_flag), ATTR_LIST(node_io_flag), - ATTR_LIST(gc_urgent_high_remaining), + ATTR_LIST(gc_remaining_trials), ATTR_LIST(ckpt_thread_ioprio), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), From 3b21b794b5797d35f4fad930b53b1cd881c12dd3 Mon Sep 17 00:00:00 2001 From: "wangkailong@jari.cn" Date: Sat, 29 Oct 2022 22:49:30 +0800 Subject: [PATCH 0760/4122] f2fs: replace ternary operator with max() Fix the following coccicheck warning: ./fs/f2fs/segment.c:877:24-25: WARNING opportunity for max() Signed-off-by: KaiLong Wang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4270cd6eaab..aa4be7f25963 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -856,7 +856,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + unusable = max(holes[DATA], holes[NODE]); if (unusable > ovp_holes) return unusable - ovp_holes; return 0; From f6c64dc32ab91b4c37fa2a255d2270f4ff0b95ba Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 29 Oct 2022 09:25:05 +0800 Subject: [PATCH 0761/4122] apparmor: Add __init annotation to aa_{setup/teardown}_dfa_engine() The aa_setup_dfa_engine() and aa_teardown_dfa_engine() is only called in apparmor_init(), so let us add __init annotation to them. Fixes: 11c236b89d7c ("apparmor: add a default null dfa") Signed-off-by: Xiu Jianfeng Signed-off-by: John Johansen --- security/apparmor/match.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 5095c26ca683..b97ef5e1db73 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -31,7 +31,7 @@ static char stacksplitdfa_src[] = { }; struct aa_dfa *stacksplitdfa; -int aa_setup_dfa_engine(void) +int __init aa_setup_dfa_engine(void) { int error; @@ -59,7 +59,7 @@ int aa_setup_dfa_engine(void) return 0; } -void aa_teardown_dfa_engine(void) +void __init aa_teardown_dfa_engine(void) { aa_put_dfa(stacksplitdfa); aa_put_dfa(nulldfa); From 4295c60bbe9e63e35d330546eeaa1d2b62dae303 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 1 Nov 2022 05:40:40 -0700 Subject: [PATCH 0762/4122] apparmor: Fix uninitialized symbol 'array_size' in policy_unpack_test.c Make sure array_size is initialized in the kunit test to get rid of compiler warnings. This will also make sure the following tests fail consistently if the first test fails. Reported-by: kernel test robot Signed-off-by: John Johansen --- security/apparmor/policy_unpack_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index b214f6ea8a72..7465da42492d 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -140,7 +140,7 @@ static void policy_unpack_test_inbounds_when_out_of_bounds(struct kunit *test) static void policy_unpack_test_unpack_array_with_null_name(struct kunit *test) { struct policy_unpack_fixture *puf = test->priv; - u16 array_size; + u16 array_size = 0; puf->e->pos += TEST_ARRAY_BUF_OFFSET; @@ -155,7 +155,7 @@ static void policy_unpack_test_unpack_array_with_name(struct kunit *test) { struct policy_unpack_fixture *puf = test->priv; const char name[] = TEST_ARRAY_NAME; - u16 array_size; + u16 array_size = 0; puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET; From d780a47c2de9a761bb3d754a24e76495b3f0d55f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 5 Oct 2022 18:13:45 +0100 Subject: [PATCH 0763/4122] serial: sifive: select by default if SOC_SIFIVE With the aim of dropping direct selects of drivers from Kconfig.socs, default the SiFive serial drivers to the value of SOC_SIFIVE. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221005171348.167476-3-conor@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 434f83168546..94457b54125b 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -958,6 +958,7 @@ config SERIAL_OMAP_CONSOLE config SERIAL_SIFIVE tristate "SiFive UART support" depends on OF + default SOC_SIFIVE select SERIAL_CORE help Select this option if you are building a kernel for a device that @@ -967,6 +968,7 @@ config SERIAL_SIFIVE config SERIAL_SIFIVE_CONSOLE bool "Console on SiFive UART" depends on SERIAL_SIFIVE=y + default SOC_SIFIVE select SERIAL_CORE_CONSOLE select SERIAL_EARLYCON help From f58a16043a2154661aafe8287cea24e6ae51a9d9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 5 Oct 2022 18:13:46 +0100 Subject: [PATCH 0764/4122] serial: sifive: select by default if SOC_CANAAN With the aim of dropping direct selects of drivers from Kconfig.socs, default the SiFive serial drivers to enabled if SOC_CANAAN. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221005171348.167476-4-conor@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 94457b54125b..c55b947f3cdb 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -958,7 +958,7 @@ config SERIAL_OMAP_CONSOLE config SERIAL_SIFIVE tristate "SiFive UART support" depends on OF - default SOC_SIFIVE + default SOC_SIFIVE || SOC_CANAAN select SERIAL_CORE help Select this option if you are building a kernel for a device that @@ -968,7 +968,7 @@ config SERIAL_SIFIVE config SERIAL_SIFIVE_CONSOLE bool "Console on SiFive UART" depends on SERIAL_SIFIVE=y - default SOC_SIFIVE + default SOC_SIFIVE || SOC_CANAAN select SERIAL_CORE_CONSOLE select SERIAL_EARLYCON help From 72da688b457d738b943016dabc603efb1be5f4e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Oct 2022 07:20:52 +0200 Subject: [PATCH 0765/4122] tty: evh_bytechan: Replace NO_IRQ by 0 NO_IRQ is used to check the return of irq_of_parse_and_map(). On some architecture NO_IRQ is 0, on other architectures it is -1. irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ. So use 0 instead of using NO_IRQ. Reviewed-by: Jiri Slaby Acked-by: Laurentiu Tudor Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/23f608ca57e7e19bc7060d3e563de383e0b2b337.1665033575.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- drivers/tty/ehv_bytechan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index 19d32cb6af84..8595483f4697 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -118,7 +118,7 @@ static int find_console_handle(void) return 0; stdout_irq = irq_of_parse_and_map(np, 0); - if (stdout_irq == NO_IRQ) { + if (!stdout_irq) { pr_err("ehv-bc: no 'interrupts' property in %pOF node\n", np); return 0; } @@ -696,7 +696,7 @@ static int ehv_bc_tty_probe(struct platform_device *pdev) bc->rx_irq = irq_of_parse_and_map(np, 0); bc->tx_irq = irq_of_parse_and_map(np, 1); - if ((bc->rx_irq == NO_IRQ) || (bc->tx_irq == NO_IRQ)) { + if (!bc->rx_irq || !bc->tx_irq) { dev_err(&pdev->dev, "no 'interrupts' property in %pOFn node\n", np); ret = -ENODEV; From e5c3ddd394a409021377b5454a5bd1efa1ba8447 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 31 Oct 2022 21:54:10 +0100 Subject: [PATCH 0766/4122] staging: r8188eu: use ether_addr_equal for address comparison We can use ether_addr_equal instead of memcmp in update_recvframe_phyinfo_88e for comparing the incoming frame's destination address with our local address. Both struct ieee80211_hdr and struct eeprom_priv's mac_addr component are 2-byte aligned. Suggested-by: Joe Perches Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221031205412.124871-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c index f01ae71bcdb1..10bb2e602984 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c @@ -126,8 +126,8 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat get_bssid(&padapter->mlmepriv), ETH_ALEN)); pkt_info.bPacketToSelf = pkt_info.bPacketMatchBSSID && - (!memcmp(ieee80211_get_DA(hdr), - myid(&padapter->eeprompriv), ETH_ALEN)); + ether_addr_equal(ieee80211_get_DA(hdr), + myid(&padapter->eeprompriv)); pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc); if (pkt_info.bPacketBeacon) { From 97cc476fc6fbfabb989d508df09208c4a598e149 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 31 Oct 2022 21:54:11 +0100 Subject: [PATCH 0767/4122] staging: r8188eu: use hdr->frame_control instead of fc We can remove the fc variable in update_recvframe_phyinfo_88e and use hdr->frame_control instead. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221031205412.124871-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c index 10bb2e602984..4e9d8c8285c4 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c @@ -114,13 +114,12 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat struct hal_data_8188e *pHalData = &padapter->haldata; struct phy_info *pPHYInfo = &pattrib->phy_info; u8 *wlanhdr = precvframe->rx_data; - __le16 fc = *(__le16 *)wlanhdr; struct odm_per_pkt_info pkt_info; u8 *sa = NULL; struct sta_priv *pstapriv; struct sta_info *psta; - pkt_info.bPacketMatchBSSID = ((!ieee80211_is_ctl(fc)) && + pkt_info.bPacketMatchBSSID = ((!ieee80211_is_ctl(hdr->frame_control)) && !pattrib->icv_err && !pattrib->crc_err && !memcmp(get_hdr_bssid(wlanhdr), get_bssid(&padapter->mlmepriv), ETH_ALEN)); @@ -129,7 +128,8 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat ether_addr_equal(ieee80211_get_DA(hdr), myid(&padapter->eeprompriv)); - pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc); + pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && + ieee80211_is_beacon(hdr->frame_control); if (pkt_info.bPacketBeacon) { if (check_fwstate(&padapter->mlmepriv, WIFI_STATION_STATE)) sa = padapter->mlmepriv.cur_network.network.MacAddress; From aa69ca7d6d6cd2ed2209b4b6c102f2fec3e97012 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 31 Oct 2022 21:54:12 +0100 Subject: [PATCH 0768/4122] staging: r8188eu: use ieee80211_get_SA Use ieee80211_get_SA in update_recvframe_phyinfo_88e instead of the driver-specific get_sa function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221031205412.124871-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c index 4e9d8c8285c4..9a61eef8550b 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c @@ -135,7 +135,7 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat sa = padapter->mlmepriv.cur_network.network.MacAddress; /* to do Ad-hoc */ } else { - sa = get_sa(wlanhdr); + sa = ieee80211_get_SA(hdr); } pstapriv = &padapter->stapriv; From 3032eb4690d5d5bac28d0e92cc501962a42ca4be Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Mon, 31 Oct 2022 16:37:43 +0100 Subject: [PATCH 0769/4122] staging: r8188eu: replace ternary operator with min, max, abs macros Replace some ternary operators with the min(), max() or abs() macros to improve readability. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221031153743.8801-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_xmit.c | 3 +-- drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 2 +- drivers/staging/r8188eu/hal/odm_RTL8188E.c | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c index 4f8220428328..bd6c1a401c59 100644 --- a/drivers/staging/r8188eu/core/rtw_xmit.c +++ b/drivers/staging/r8188eu/core/rtw_xmit.c @@ -476,8 +476,7 @@ static uint rtw_pktfile_read(struct pkt_file *pfile, u8 *rmem, uint rlen) { uint len; - len = rtw_remainder_len(pfile); - len = (rlen > len) ? len : rlen; + len = min(rtw_remainder_len(pfile), rlen); if (rmem) skb_copy_bits(pfile->pkt, pfile->buf_len - pfile->pkt_len, rmem, len); diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c index 60cdfcf80daa..622f95d3f2ed 100644 --- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c +++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c @@ -583,7 +583,7 @@ static bool phy_SimularityCompare_8188E( tmp2 = resulta[c2][i]; } - diff = (tmp1 > tmp2) ? (tmp1 - tmp2) : (tmp2 - tmp1); + diff = abs(tmp1 - tmp2); if (diff > MAX_TOLERANCE) { if ((i == 2 || i == 6) && !sim_bitmap) { diff --git a/drivers/staging/r8188eu/hal/odm_RTL8188E.c b/drivers/staging/r8188eu/hal/odm_RTL8188E.c index dd9c8291f025..f3f4074d4316 100644 --- a/drivers/staging/r8188eu/hal/odm_RTL8188E.c +++ b/drivers/staging/r8188eu/hal/odm_RTL8188E.c @@ -199,7 +199,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm) Aux_RSSI = (dm_fat_tbl->AuxAnt_Cnt[i] != 0) ? (dm_fat_tbl->AuxAnt_Sum[i] / dm_fat_tbl->AuxAnt_Cnt[i]) : 0; TargetAnt = (Main_RSSI >= Aux_RSSI) ? MAIN_ANT : AUX_ANT; /* 2 Select MaxRSSI for DIG */ - LocalMaxRSSI = (Main_RSSI > Aux_RSSI) ? Main_RSSI : Aux_RSSI; + LocalMaxRSSI = max(Main_RSSI, Aux_RSSI); if ((LocalMaxRSSI > AntDivMaxRSSI) && (LocalMaxRSSI < 40)) AntDivMaxRSSI = LocalMaxRSSI; if (LocalMaxRSSI > MaxRSSI) @@ -211,7 +211,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm) else if ((dm_fat_tbl->RxIdleAnt == AUX_ANT) && (Aux_RSSI == 0)) Aux_RSSI = Main_RSSI; - LocalMinRSSI = (Main_RSSI > Aux_RSSI) ? Aux_RSSI : Main_RSSI; + LocalMinRSSI = min(Main_RSSI, Aux_RSSI); if (LocalMinRSSI < MinRSSI) { MinRSSI = LocalMinRSSI; RxIdleAnt = TargetAnt; From a653e32aaddb11cc4f159558c4cf7dfa2c5a0222 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 31 Oct 2022 21:51:41 +0100 Subject: [PATCH 0770/4122] staging: r8188eu: use ether_addr_equal in OnAction Use ether_addr_equal to compare two mac addresses in OnAction. Both struct ieee80211_mgmt and struct eeprom_priv's mac_addr component are 2-byte aligned. Suggested-by: Joe Perches Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221031205140.124682-1-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index e985fc5fc575..6679d4037d6b 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3823,7 +3823,7 @@ static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; - if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN)) + if (!ether_addr_equal(myid(&padapter->eeprompriv), mgmt->da)) return; switch (mgmt->u.action.category) { From 3b45e2e139c1e71b17a97ae887ba1caadd8e8ad2 Mon Sep 17 00:00:00 2001 From: Tanjuate Brunostar Date: Tue, 1 Nov 2022 09:55:33 +0000 Subject: [PATCH 0771/4122] staging: vt6655: change variable name wTimeStampOff Remove the use of Hungarian notation, which is not used in the Linux kernel. Signed-off-by: Tanjuate Brunostar Link: https://lore.kernel.org/r/Y2DtFRdhCiyNF2kF@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/rxtx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/rxtx.c b/drivers/staging/vt6655/rxtx.c index 5bdb5176772c..baa3f6f1508f 100644 --- a/drivers/staging/vt6655/rxtx.c +++ b/drivers/staging/vt6655/rxtx.c @@ -54,7 +54,7 @@ */ #define CRITICAL_PACKET_LEN 256 -static const unsigned short wTimeStampOff[2][MAX_RATE] = { +static const unsigned short time_stamp_off[2][MAX_RATE] = { {384, 288, 226, 209, 54, 43, 37, 31, 28, 25, 24, 23}, /* Long Preamble */ {384, 192, 130, 113, 54, 43, 37, 31, 28, 25, 24, 23}, /* Short Preamble */ }; @@ -142,7 +142,7 @@ s_uFillDataHead( static __le16 vnt_time_stamp_off(struct vnt_private *priv, u16 rate) { - return cpu_to_le16(wTimeStampOff[priv->preamble_type % 2] + return cpu_to_le16(time_stamp_off[priv->preamble_type % 2] [rate % MAX_RATE]); } From 3cbb8d0d6d42958c146624a102d1a0e59a820b65 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Nov 2022 09:01:11 +0100 Subject: [PATCH 0772/4122] staging: octeon: cvmx_ptr_to_phys() should return physaddr_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit without physical address extensions (e.g. sh-allmodconfig): drivers/staging/octeon/ethernet-mem.c: In function ‘cvm_oct_free_hw_memory’: ./arch/sh/include/asm/io.h:239:32: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 239 | #define phys_to_virt(address) ((void *)(address)) | ^ drivers/staging/octeon/ethernet-mem.c:123:18: note: in expansion of macro ‘phys_to_virt’ 123 | fpa = (char *)phys_to_virt(cvmx_ptr_to_phys(fpa)); | ^~~~~~~~~~~~ Fix this by making cvmx_ptr_to_phys() return physaddr_t instead of uint64_t. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20221101080111.750748-1-geert@linux-m68k.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/octeon/cvmx.h | 4 ++-- drivers/staging/octeon/octeon-stubs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h index 25854abc95f8..72e775bf31e6 100644 --- a/arch/mips/include/asm/octeon/cvmx.h +++ b/arch/mips/include/asm/octeon/cvmx.h @@ -154,13 +154,13 @@ static inline uint64_t cvmx_build_bits(uint64_t high_bit, /** * Convert a memory pointer (void*) into a hardware compatible - * memory address (uint64_t). Octeon hardware widgets don't + * memory address (phys_addr_t). Octeon hardware widgets don't * understand logical addresses. * * @ptr: C style memory pointer * Returns Hardware physical address */ -static inline uint64_t cvmx_ptr_to_phys(void *ptr) +static inline phys_addr_t cvmx_ptr_to_phys(void *ptr) { if (sizeof(void *) == 8) { /* diff --git a/drivers/staging/octeon/octeon-stubs.h b/drivers/staging/octeon/octeon-stubs.h index 3f8e5713b8a8..7a02e59e283f 100644 --- a/drivers/staging/octeon/octeon-stubs.h +++ b/drivers/staging/octeon/octeon-stubs.h @@ -1212,7 +1212,7 @@ static inline void *cvmx_phys_to_ptr(uint64_t physical_address) return (void *)(uintptr_t)(physical_address); } -static inline uint64_t cvmx_ptr_to_phys(void *ptr) +static inline phys_addr_t cvmx_ptr_to_phys(void *ptr) { return (unsigned long)ptr; } From dba2628c8ec5c2821103090a6ba946eab9da0c1f Mon Sep 17 00:00:00 2001 From: Gabhyun Kim Date: Tue, 1 Nov 2022 15:26:36 +0900 Subject: [PATCH 0773/4122] staging: rtl8192e: Remove line breaks to match coding style Remove redundant line break in function definition to correct coding style. Signed-off-by: Gabhyun Kim Link: https://lore.kernel.org/r/20221101062636.GA3257@ubuntu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_softmac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 1a3ca3e57623..1253de481805 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -148,8 +148,7 @@ static void init_mgmt_queue(struct rtllib_device *ieee) } -u8 -MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee) +u8 MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee) { u16 i; u8 QueryRate = 0; From 8b550eb637b483866f8d4662fffc9518ad9e7eca Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Mon, 31 Oct 2022 23:25:39 +0530 Subject: [PATCH 0774/4122] staging: rtl8192u: remove unnecessary function implementation This driver is a single standalone driver and does not have any loading dependencies on another associated drivers. The build results in one single .ko object. The current implementation of the function ieee80211_tkip_null simply returns back to the caller without any useful instruction executions. It does not lead to auto-loading of any other associated modules as the initial design implementation appears to be. Hence the call to ieee80211_tkip_null() and its implementation is unnecessary and should be removed. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/6f5a4313fc21365bf733c25385aef79554ffb253.1667237959.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/ieee80211/ieee80211.h | 3 --- drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c | 6 ------ drivers/staging/rtl8192u/ieee80211/ieee80211_module.c | 3 --- 3 files changed, 12 deletions(-) diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h index 9cd4b1896745..00c07455cbb3 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h @@ -232,8 +232,6 @@ struct cb_desc { #define ieee80211_ccmp_null ieee80211_ccmp_null_rsl -#define ieee80211_tkip_null ieee80211_tkip_null_rsl - #define free_ieee80211 free_ieee80211_rsl #define alloc_ieee80211 alloc_ieee80211_rsl @@ -2256,7 +2254,6 @@ void ieee80211_ps_tx_ack(struct ieee80211_device *ieee, short success); void softmac_mgmt_xmit(struct sk_buff *skb, struct ieee80211_device *ieee); /* ieee80211_crypt_ccmp&tkip&wep.c */ -void ieee80211_tkip_null(void); int ieee80211_crypto_init(void); void ieee80211_crypto_deinit(void); diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c index 7b120b8cb982..9bfd24ad46b6 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c @@ -716,9 +716,3 @@ void ieee80211_crypto_tkip_exit(void) { ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip); } - -void ieee80211_tkip_null(void) -{ -// printk("============>%s()\n", __func__); - return; -} diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c index b94fe9b449b6..3f93939bc4ee 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c @@ -159,9 +159,6 @@ struct net_device *alloc_ieee80211(int sizeof_priv) ieee->last_packet_time[i] = 0; } -/* These function were added to load crypte module autoly */ - ieee80211_tkip_null(); - return dev; failed: From 1a6d64701922b2ff05b240dc0d69f3feda5e2690 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Tue, 1 Nov 2022 20:14:57 +0100 Subject: [PATCH 0775/4122] staging: r8188eu: remove extern from function prototypes Declaring function prototypes extern is redundant and triggers checkpatch warnings. Remove all extern from function prototypes. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221101191458.8619-2-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/r8188eu/include/osdep_service.h | 2 +- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 4 ++-- drivers/staging/r8188eu/include/sta_info.h | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h index ec2631455f08..f8ed04f32cae 100644 --- a/drivers/staging/r8188eu/include/osdep_service.h +++ b/drivers/staging/r8188eu/include/osdep_service.h @@ -66,7 +66,7 @@ static inline int rtw_netif_queue_stopped(struct net_device *pnetdev) netif_tx_queue_stopped(netdev_get_tx_queue(pnetdev, 3)); } -extern int RTW_STATUS_CODE(int error_code); +int RTW_STATUS_CODE(int error_code); void *rtw_malloc2d(int h, int w, int size); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index ce5b57e23e53..e234a3b9af6f 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -388,7 +388,7 @@ struct mlme_ext_priv { void init_mlme_ext_priv(struct adapter *adapter); int init_hw_mlme_ext(struct adapter *padapter); void free_mlme_ext_priv (struct mlme_ext_priv *pmlmeext); -extern struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv); +struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv); unsigned char networktype_to_raid(unsigned char network_type); u8 judge_network_type(struct adapter *padapter, unsigned char *rate, int len); @@ -483,7 +483,7 @@ void report_add_sta_event(struct adapter *padapter, unsigned char *addr, int cam_idx); void beacon_timing_control(struct adapter *padapter); -extern u8 set_tx_beacon_cmd(struct adapter *padapter); +u8 set_tx_beacon_cmd(struct adapter *padapter); unsigned int setup_beacon_frame(struct adapter *padapter, unsigned char *beacon_frame); void update_mgnt_tx_rate(struct adapter *padapter, u8 rate); diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h index f76e086b5701..55ea5edc2914 100644 --- a/drivers/staging/r8188eu/include/sta_info.h +++ b/drivers/staging/r8188eu/include/sta_info.h @@ -295,19 +295,19 @@ static inline u32 wifi_mac_hash(u8 *mac) return x; } -extern int _rtw_init_sta_priv(struct sta_priv *pstapriv); -extern void _rtw_free_sta_priv(struct sta_priv *pstapriv); +int _rtw_init_sta_priv(struct sta_priv *pstapriv); +void _rtw_free_sta_priv(struct sta_priv *pstapriv); #define stainfo_offset_valid(offset) (offset < NUM_STA && offset >= 0) int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta); struct sta_info *rtw_get_stainfo_by_offset(struct sta_priv *stapriv, int off); -extern struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr); -extern u32 rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta); -extern void rtw_free_all_stainfo(struct adapter *adapt); -extern struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr); -extern u32 rtw_init_bcmc_stainfo(struct adapter *adapt); -extern struct sta_info *rtw_get_bcmc_stainfo(struct adapter *padapter); -extern u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr); +struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr); +u32 rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta); +void rtw_free_all_stainfo(struct adapter *adapt); +struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr); +u32 rtw_init_bcmc_stainfo(struct adapter *adapt); +struct sta_info *rtw_get_bcmc_stainfo(struct adapter *padapter); +u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr); #endif /* _STA_INFO_H_ */ From 29626f3c07d09b79d6510d1915c106e51ba85022 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Tue, 1 Nov 2022 20:14:58 +0100 Subject: [PATCH 0776/4122] staging: r8188eu: convert rtw_free_stainfo() to void The function rtw_free_stainfo() returns always _SUCCESS and none of its callers uses the return value. We can safely make the function void. Yet another tiny step towards getting rid of _FAIL / _SUCCESS. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221101191458.8619-3-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_sta_mgt.c | 8 ++------ drivers/staging/r8188eu/include/sta_info.h | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c index 51324e708697..b4aee8623099 100644 --- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c +++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c @@ -242,7 +242,7 @@ exit: } /* using pstapriv->sta_hash_lock to protect */ -u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) +void rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) { int i; struct __queue *pfree_sta_queue; @@ -252,7 +252,7 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) struct sta_priv *pstapriv = &padapter->stapriv; if (!psta) - goto exit; + return; pfree_sta_queue = &pstapriv->free_sta_queue; @@ -356,10 +356,6 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) spin_lock_bh(&pfree_sta_queue->lock); list_add_tail(&psta->list, get_list_head(pfree_sta_queue)); spin_unlock_bh(&pfree_sta_queue->lock); - -exit: - - return _SUCCESS; } /* free all stainfo which in sta_hash[all] */ diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h index 55ea5edc2914..e42f4b4c6e24 100644 --- a/drivers/staging/r8188eu/include/sta_info.h +++ b/drivers/staging/r8188eu/include/sta_info.h @@ -303,7 +303,7 @@ int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta); struct sta_info *rtw_get_stainfo_by_offset(struct sta_priv *stapriv, int off); struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr); -u32 rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta); +void rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta); void rtw_free_all_stainfo(struct adapter *adapt); struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr); u32 rtw_init_bcmc_stainfo(struct adapter *adapt); From 72cdc5aec2913bef5dd62eb75cadad8bff27fd8e Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:18:10 +0530 Subject: [PATCH 0777/4122] staging: wlan-ng: Remove unused struct wlan_ie_ssid references Pointer reference to struct wlan_ie_ssid is added as a member variable to 5 different structures. However, these references are never used. Remove such unused struct references. The cleanup also renders the struct useless; so remove it as well. Issue identified as part of coccicheck based code analysis. Suggested-by: Dan Carpenter Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/6e39ef59d01d65a1e179f6aecfbb0d68b81fa257.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index 1ef30d3f3159..dcff56d18498 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -225,13 +225,6 @@ struct wlan_ie { u8 len; } __packed; -/*-- Service Set Identity (SSID) -----------------*/ -struct wlan_ie_ssid { - u8 eid; - u8 len; - u8 ssid[1]; /* may be zero, ptrs may overlap */ -} __packed; - /*-- Supported Rates -----------------------------*/ struct wlan_ie_supp_rates { u8 eid; @@ -319,7 +312,6 @@ struct wlan_fr_beacon { u16 *bcn_int; u16 *cap_info; /*-- info elements ----------*/ - struct wlan_ie_ssid *ssid; struct wlan_ie_supp_rates *supp_rates; struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; @@ -372,7 +364,6 @@ struct wlan_fr_assocreq { u16 *cap_info; u16 *listen_int; /*-- info elements ----------*/ - struct wlan_ie_ssid *ssid; struct wlan_ie_supp_rates *supp_rates; }; @@ -407,7 +398,6 @@ struct wlan_fr_reassocreq { u16 *listen_int; u8 *curr_ap; /*-- info elements ----------*/ - struct wlan_ie_ssid *ssid; struct wlan_ie_supp_rates *supp_rates; }; @@ -439,7 +429,6 @@ struct wlan_fr_probereq { void *priv; /*-- fixed fields -----------*/ /*-- info elements ----------*/ - struct wlan_ie_ssid *ssid; struct wlan_ie_supp_rates *supp_rates; }; @@ -457,7 +446,6 @@ struct wlan_fr_proberesp { u16 *bcn_int; u16 *cap_info; /*-- info elements ----------*/ - struct wlan_ie_ssid *ssid; struct wlan_ie_supp_rates *supp_rates; struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; From 2a899064b60ad1ede8a3990bd512e859b15896d5 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:18:35 +0530 Subject: [PATCH 0778/4122] staging: wlan-ng: Remove unused struct wlan_ie_supp_rates references Pointer reference to struct wlan_ie_supp_rates is added as a member variable to 7 different structures. However, these references are never used. Remove such unused struct references. The cleanup also renders the struct useless; so remove it as well. Issue identified as part of coccicheck based code analysis. Suggested-by: Dan Carpenter Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/370feb1f300896af66fa1c443d3ad19dc8934be3.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index dcff56d18498..536794bdd1f0 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -225,13 +225,6 @@ struct wlan_ie { u8 len; } __packed; -/*-- Supported Rates -----------------------------*/ -struct wlan_ie_supp_rates { - u8 eid; - u8 len; - u8 rates[1]; /* had better be at LEAST one! */ -} __packed; - /*-- FH Parameter Set ----------------------------*/ struct wlan_ie_fh_parms { u8 eid; @@ -312,7 +305,6 @@ struct wlan_fr_beacon { u16 *bcn_int; u16 *cap_info; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; struct wlan_ie_cf_parms *cf_parms; @@ -364,7 +356,6 @@ struct wlan_fr_assocreq { u16 *cap_info; u16 *listen_int; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; }; @@ -381,7 +372,6 @@ struct wlan_fr_assocresp { u16 *status; u16 *aid; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; }; @@ -398,7 +388,6 @@ struct wlan_fr_reassocreq { u16 *listen_int; u8 *curr_ap; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; }; @@ -415,7 +404,6 @@ struct wlan_fr_reassocresp { u16 *status; u16 *aid; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; }; @@ -429,7 +417,6 @@ struct wlan_fr_probereq { void *priv; /*-- fixed fields -----------*/ /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; }; @@ -446,7 +433,6 @@ struct wlan_fr_proberesp { u16 *bcn_int; u16 *cap_info; /*-- info elements ----------*/ - struct wlan_ie_supp_rates *supp_rates; struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; struct wlan_ie_cf_parms *cf_parms; From a5f994fc062971aa801eda9e8af214e785e32aff Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:19:05 +0530 Subject: [PATCH 0779/4122] staging: wlan-ng: Remove unused struct wlan_ie_tim references Pointer reference to struct wlan_ie_tim is added as a member variable of a structure; However, this references is never used. Remove such unused struct reference. The cleanup also renders the struct useless; so remove it as well. Issue identified as part of coccicheck based code analysis. Suggested-by: Dan Carpenter Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/36834007587a2e0ef7a782f5919f3a4c756b7840.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index 536794bdd1f0..d95ffe0097e9 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -253,16 +253,6 @@ struct wlan_ie_cf_parms { u16 cfp_durremaining; } __packed; -/*-- TIM ------------------------------------------*/ -struct wlan_ie_tim { - u8 eid; - u8 len; - u8 dtim_cnt; - u8 dtim_period; - u8 bitmap_ctl; - u8 virt_bm[1]; -} __packed; - /*-- IBSS Parameter Set ---------------------------*/ struct wlan_ie_ibss_parms { u8 eid; @@ -309,8 +299,6 @@ struct wlan_fr_beacon { struct wlan_ie_ds_parms *ds_parms; struct wlan_ie_cf_parms *cf_parms; struct wlan_ie_ibss_parms *ibss_parms; - struct wlan_ie_tim *tim; - }; /*-- IBSS ATIM ------------------------------------*/ From a2c896ab66f2c65cf6eeba140b97200a47d0450e Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:19:30 +0530 Subject: [PATCH 0780/4122] staging: wlan-ng: Remove unused struct wlan_ie_ibss_parms references Pointer reference to struct wlan_ie_ibss_parms is added as a member variable to 2 structures; However, these references are never used. Remove such unused struct references. The cleanup also renders the struct useless; so remove it as well. Issue identified as part of coccicheck based code analysis. Suggested-by: Dan Carpenter Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/e53084cdd3ca3ffea6a32393f6f28c7b7650d1a4.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index d95ffe0097e9..816b25641f5a 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -253,13 +253,6 @@ struct wlan_ie_cf_parms { u16 cfp_durremaining; } __packed; -/*-- IBSS Parameter Set ---------------------------*/ -struct wlan_ie_ibss_parms { - u8 eid; - u8 len; - u16 atim_win; -} __packed; - /*-- Challenge Text ------------------------------*/ struct wlan_ie_challenge { u8 eid; @@ -298,7 +291,6 @@ struct wlan_fr_beacon { struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; struct wlan_ie_cf_parms *cf_parms; - struct wlan_ie_ibss_parms *ibss_parms; }; /*-- IBSS ATIM ------------------------------------*/ @@ -424,7 +416,6 @@ struct wlan_fr_proberesp { struct wlan_ie_fh_parms *fh_parms; struct wlan_ie_ds_parms *ds_parms; struct wlan_ie_cf_parms *cf_parms; - struct wlan_ie_ibss_parms *ibss_parms; }; /*-- Authentication -------------------------------*/ From 5d2366684f9331dc91449b7b866f172d7641ecf3 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:19:54 +0530 Subject: [PATCH 0781/4122] staging: wlan-ng: Remove unused struct p80211macarray definition struct p80211macarray is defined but is never used. Remove the unused struct declaration. Issue identified as part of coccicheck based code analysis. Suggested-by: Dan Carpenter Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/b42eff9b8f73542bc9015948e5e4684416982e1b.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211types.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211types.h b/drivers/staging/wlan-ng/p80211types.h index 6486612a8f31..b2ed96960413 100644 --- a/drivers/staging/wlan-ng/p80211types.h +++ b/drivers/staging/wlan-ng/p80211types.h @@ -231,12 +231,6 @@ struct p80211pstr32 { u8 data[MAXLEN_PSTR32]; } __packed; -/* MAC address array */ -struct p80211macarray { - u32 cnt; - u8 data[1][MAXLEN_PSTR6]; -} __packed; - /* prototype template */ struct p80211item { u32 did; From 49af5dd7209ea2242a3c55e0b180f7ff7913fa3b Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:20:13 +0530 Subject: [PATCH 0782/4122] staging: wlan-ng: Remove unused function declarations Several functions are declared but are not implemented or used in any part of the code. Remove such unimplemented function declarations. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/09e2af8d702e33c15ed9f655b0a1190b4e8bec86.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index 816b25641f5a..da824bc36365 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -449,26 +449,4 @@ struct wlan_fr_deauthen { /*-- info elements ----------*/ }; - -void wlan_mgmt_encode_beacon(struct wlan_fr_beacon *f); -void wlan_mgmt_decode_beacon(struct wlan_fr_beacon *f); -void wlan_mgmt_encode_disassoc(struct wlan_fr_disassoc *f); -void wlan_mgmt_decode_disassoc(struct wlan_fr_disassoc *f); -void wlan_mgmt_encode_assocreq(struct wlan_fr_assocreq *f); -void wlan_mgmt_decode_assocreq(struct wlan_fr_assocreq *f); -void wlan_mgmt_encode_assocresp(struct wlan_fr_assocresp *f); -void wlan_mgmt_decode_assocresp(struct wlan_fr_assocresp *f); -void wlan_mgmt_encode_reassocreq(struct wlan_fr_reassocreq *f); -void wlan_mgmt_decode_reassocreq(struct wlan_fr_reassocreq *f); -void wlan_mgmt_encode_reassocresp(struct wlan_fr_reassocresp *f); -void wlan_mgmt_decode_reassocresp(struct wlan_fr_reassocresp *f); -void wlan_mgmt_encode_probereq(struct wlan_fr_probereq *f); -void wlan_mgmt_decode_probereq(struct wlan_fr_probereq *f); -void wlan_mgmt_encode_proberesp(struct wlan_fr_proberesp *f); -void wlan_mgmt_decode_proberesp(struct wlan_fr_proberesp *f); -void wlan_mgmt_encode_authen(struct wlan_fr_authen *f); -void wlan_mgmt_decode_authen(struct wlan_fr_authen *f); -void wlan_mgmt_encode_deauthen(struct wlan_fr_deauthen *f); -void wlan_mgmt_decode_deauthen(struct wlan_fr_deauthen *f); - #endif /* _P80211MGMT_H */ From d911a624cfb40e356b98f135d52914b30214fd7f Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 1 Nov 2022 19:20:40 +0530 Subject: [PATCH 0783/4122] staging: wlan-ng: Remove unused structure definitions Remove structure definitions that are never used in the code. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/74a79ea400ec26624e445692f3353424fb6fc29e.1667308828.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/p80211mgmt.h | 232 --------------------------- 1 file changed, 232 deletions(-) diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h index da824bc36365..fc23fae5651b 100644 --- a/drivers/staging/wlan-ng/p80211mgmt.h +++ b/drivers/staging/wlan-ng/p80211mgmt.h @@ -217,236 +217,4 @@ #define WLAN_SET_MGMT_CAP_INFO_PBCC(n) ((n) << 6) #define WLAN_SET_MGMT_CAP_INFO_AGILITY(n) ((n) << 7) -/*-- Information Element Types --------------------*/ -/* prototype structure, all IEs start with these members */ - -struct wlan_ie { - u8 eid; - u8 len; -} __packed; - -/*-- FH Parameter Set ----------------------------*/ -struct wlan_ie_fh_parms { - u8 eid; - u8 len; - u16 dwell; - u8 hopset; - u8 hoppattern; - u8 hopindex; -} __packed; - -/*-- DS Parameter Set ----------------------------*/ -struct wlan_ie_ds_parms { - u8 eid; - u8 len; - u8 curr_ch; -} __packed; - -/*-- CF Parameter Set ----------------------------*/ - -struct wlan_ie_cf_parms { - u8 eid; - u8 len; - u8 cfp_cnt; - u8 cfp_period; - u16 cfp_maxdur; - u16 cfp_durremaining; -} __packed; - -/*-- Challenge Text ------------------------------*/ -struct wlan_ie_challenge { - u8 eid; - u8 len; - u8 challenge[1]; -} __packed; - -/*-------------------------------------------------*/ -/* Frame Types */ - -/* prototype structure, all mgmt frame types will start with these members */ -struct wlan_fr_mgmt { - u16 type; - u16 len; /* DOES NOT include CRC !!!! */ - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - /*-- info elements ----------*/ -}; - -/*-- Beacon ---------------------------------------*/ -struct wlan_fr_beacon { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u64 *ts; - u16 *bcn_int; - u16 *cap_info; - /*-- info elements ----------*/ - struct wlan_ie_fh_parms *fh_parms; - struct wlan_ie_ds_parms *ds_parms; - struct wlan_ie_cf_parms *cf_parms; -}; - -/*-- IBSS ATIM ------------------------------------*/ -struct wlan_fr_ibssatim { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - - /*-- fixed fields -----------*/ - /*-- info elements ----------*/ - - /* this frame type has a null body */ - -}; - -/*-- Disassociation -------------------------------*/ -struct wlan_fr_disassoc { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *reason; - - /*-- info elements ----------*/ - -}; - -/*-- Association Request --------------------------*/ -struct wlan_fr_assocreq { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *cap_info; - u16 *listen_int; - /*-- info elements ----------*/ - -}; - -/*-- Association Response -------------------------*/ -struct wlan_fr_assocresp { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *cap_info; - u16 *status; - u16 *aid; - /*-- info elements ----------*/ - -}; - -/*-- Reassociation Request ------------------------*/ -struct wlan_fr_reassocreq { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *cap_info; - u16 *listen_int; - u8 *curr_ap; - /*-- info elements ----------*/ - -}; - -/*-- Reassociation Response -----------------------*/ -struct wlan_fr_reassocresp { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *cap_info; - u16 *status; - u16 *aid; - /*-- info elements ----------*/ - -}; - -/*-- Probe Request --------------------------------*/ -struct wlan_fr_probereq { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - /*-- info elements ----------*/ - -}; - -/*-- Probe Response -------------------------------*/ -struct wlan_fr_proberesp { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u64 *ts; - u16 *bcn_int; - u16 *cap_info; - /*-- info elements ----------*/ - struct wlan_ie_fh_parms *fh_parms; - struct wlan_ie_ds_parms *ds_parms; - struct wlan_ie_cf_parms *cf_parms; -}; - -/*-- Authentication -------------------------------*/ -struct wlan_fr_authen { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *auth_alg; - u16 *auth_seq; - u16 *status; - /*-- info elements ----------*/ - struct wlan_ie_challenge *challenge; - -}; - -/*-- Deauthenication -----------------------------*/ -struct wlan_fr_deauthen { - u16 type; - u16 len; - u8 *buf; - struct p80211_hdr *hdr; - /* used for target specific data, skb in Linux */ - void *priv; - /*-- fixed fields -----------*/ - u16 *reason; - - /*-- info elements ----------*/ - -}; #endif /* _P80211MGMT_H */ From a37068715956111af1d49105d9e41b4cc8f69ea0 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Wed, 2 Nov 2022 00:36:13 +0000 Subject: [PATCH 0784/4122] staging: r8188eu: convert rtw_setdatarate_cmd to correct error semantics Convert rtw_setdatarate_cmd function to use proper error return codes rather than _SUCCESS and _FAIL, and a simpler 'return 0;' style. For now, wrap rtw_enqueue_cmd call and return -EPERM if it fails, as converting this function makes more sense later on due to its large number of callers. Also change rtw_wx_set_rate function to pass through the proper error code rather than just 0 or -1. Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20221102003613.971-1-phil@philpotter.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_cmd.c | 18 +++++++----------- drivers/staging/r8188eu/include/rtw_cmd.h | 2 +- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 7 ++----- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c index fdc0c71e28a4..19b2f73d481d 100644 --- a/drivers/staging/r8188eu/core/rtw_cmd.c +++ b/drivers/staging/r8188eu/core/rtw_cmd.c @@ -340,33 +340,29 @@ u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, return res; } -u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset) +int rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset) { struct cmd_obj *ph2c; struct setdatarate_parm *pbsetdataratepara; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; - u8 res = _SUCCESS; ph2c = kzalloc(sizeof(*ph2c), GFP_ATOMIC); - if (!ph2c) { - res = _FAIL; - goto exit; - } + if (!ph2c) + return -ENOMEM; pbsetdataratepara = kzalloc(sizeof(*pbsetdataratepara), GFP_ATOMIC); if (!pbsetdataratepara) { kfree(ph2c); - res = _FAIL; - goto exit; + return -ENOMEM; } init_h2fwcmd_w_parm_no_rsp(ph2c, pbsetdataratepara, GEN_CMD_CODE(_SetDataRate)); pbsetdataratepara->mac_id = 5; memcpy(pbsetdataratepara->datarates, rateset, NumRates); - res = rtw_enqueue_cmd(pcmdpriv, ph2c); -exit: + if (rtw_enqueue_cmd(pcmdpriv, ph2c) == _FAIL) + return -EPERM; - return res; + return 0; } void rtw_getbbrfreg_cmdrsp_callback(struct adapter *padapter, struct cmd_obj *pcmd) diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h index a740a9a101d8..c330a4435b31 100644 --- a/drivers/staging/r8188eu/include/rtw_cmd.h +++ b/drivers/staging/r8188eu/include/rtw_cmd.h @@ -727,7 +727,7 @@ u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue) u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network *pnetwork); u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue); u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype); -u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset); +int rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset); u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode); u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval); diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index f3b3d7468539..dda48a2a6d0c 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -1340,7 +1340,7 @@ static int rtw_wx_set_rate(struct net_device *dev, struct iw_request_info *a, union iwreq_data *wrqu, char *extra) { - int i, ret = 0; + int i; struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev); u8 datarates[NumRates]; u32 target_rate = wrqu->bitrate.value; @@ -1408,10 +1408,7 @@ set_rate: } } - if (rtw_setdatarate_cmd(padapter, datarates) != _SUCCESS) - ret = -1; - - return ret; + return rtw_setdatarate_cmd(padapter, datarates); } static int rtw_wx_get_rate(struct net_device *dev, From 4b21d25bf519c9487935a664886956bb18f04f6d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Oct 2022 23:11:25 +0300 Subject: [PATCH 0785/4122] overflow: Introduce overflows_type() and castable_to_type() Implement a robust overflows_type() macro to test if a variable or constant value would overflow another variable or type. This can be used as a constant expression for static_assert() (which requires a constant expression[1][2]) when used on constant values. This must be constructed manually, since __builtin_add_overflow() does not produce a constant expression[3]. Additionally adds castable_to_type(), similar to __same_type(), but for checking if a constant value would overflow if cast to a given type. Add unit tests for overflows_type(), __same_type(), and castable_to_type() to the existing KUnit "overflow" test: [16:03:33] ================== overflow (21 subtests) ================== ... [16:03:33] [PASSED] overflows_type_test [16:03:33] [PASSED] same_type_test [16:03:33] [PASSED] castable_to_type_test [16:03:33] ==================== [PASSED] overflow ===================== [16:03:33] ============================================================ [16:03:33] Testing complete. Ran 21 tests: passed: 21 [16:03:33] Elapsed time: 24.022s total, 0.002s configuring, 22.598s building, 0.767s running [1] https://en.cppreference.com/w/c/language/_Static_assert [2] C11 standard (ISO/IEC 9899:2011): 6.7.10 Static assertions [3] https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html 6.56 Built-in Functions to Perform Arithmetic with Overflow Checking Built-in Function: bool __builtin_add_overflow (type1 a, type2 b, Cc: Luc Van Oostenryck Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Tom Rix Cc: Daniel Latypov Cc: Vitor Massaru Iha Cc: "Gustavo A. R. Silva" Cc: Jani Nikula Cc: Mauro Carvalho Chehab Cc: linux-hardening@vger.kernel.org Cc: llvm@lists.linux.dev Co-developed-by: Gwan-gyeong Mun Signed-off-by: Gwan-gyeong Mun Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221024201125.1416422-1-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/i915_user_extensions.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 4 - include/linux/compiler.h | 1 + include/linux/overflow.h | 47 +++ lib/Makefile | 1 + lib/overflow_kunit.c | 381 ++++++++++++++++++++ 6 files changed, 431 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c index c822d0aafd2d..e3f808372c47 100644 --- a/drivers/gpu/drm/i915/i915_user_extensions.c +++ b/drivers/gpu/drm/i915/i915_user_extensions.c @@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext, return err; if (get_user(next, &ext->next_extension) || - overflows_type(next, ext)) + overflows_type(next, uintptr_t)) return -EFAULT; ext = u64_to_user_ptr(next); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 6c14d13364bf..67a66d4d5c70 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -111,10 +111,6 @@ bool i915_error_injected(void); #define range_overflows_end_t(type, start, size, max) \ range_overflows_end((type)(start), (type)(size), (type)(max)) -/* Note we don't consider signbits :| */ -#define overflows_type(x, T) \ - (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) - #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 973a1bfd7ef5..947a60b801db 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -236,6 +236,7 @@ static inline void *offset_to_ptr(const int *off) * bool and also pointer types. */ #define is_signed_type(type) (((type)(-1)) < (__force type)1) +#define is_unsigned_type(type) (!is_signed_type(type)) /* * This is needed in functions which generate the stack canary, see diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 1d3be1a2204c..0e33b5cbdb9f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -128,6 +128,53 @@ static inline bool __must_check __must_check_overflow(bool overflow) (*_d >> _to_shift) != _a); \ })) +#define __overflows_type_constexpr(x, T) ( \ + is_unsigned_type(typeof(x)) ? \ + (x) > type_max(typeof(T)) : \ + is_unsigned_type(typeof(T)) ? \ + (x) < 0 || (x) > type_max(typeof(T)) : \ + (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + +#define __overflows_type(x, T) ({ \ + typeof(T) v = 0; \ + check_add_overflow((x), v, &v); \ +}) + +/** + * overflows_type - helper for checking the overflows between value, variables, + * or data type + * + * @n: source constant value or variable to be checked + * @T: destination variable or data type proposed to store @x + * + * Compares the @x expression for whether or not it can safely fit in + * the storage of the type in @T. @x and @T can have different types. + * If @x is a constant expression, this will also resolve to a constant + * expression. + * + * Returns: true if overflow can occur, false otherwise. + */ +#define overflows_type(n, T) \ + __builtin_choose_expr(__is_constexpr(n), \ + __overflows_type_constexpr(n, T), \ + __overflows_type(n, T)) + +/** + * castable_to_type - like __same_type(), but also allows for casted literals + * + * @n: variable or constant value + * @T: variable or data type + * + * Unlike the __same_type() macro, this allows a constant value as the + * first argument. If this value would not overflow into an assignment + * of the second argument's type, it returns true. Otherwise, this falls + * back to __same_type(). + */ +#define castable_to_type(n, T) \ + __builtin_choose_expr(__is_constexpr(n), \ + !__overflows_type_constexpr(n, T), \ + __same_type(n, T)) + /** * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX * @factor1: first factor diff --git a/lib/Makefile b/lib/Makefile index 77c7951c8cf0..322178b9f7fb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -374,6 +374,7 @@ obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o +CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-of-range-compare) obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index b8556a2e7bb1..dcd3ba102db6 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -736,6 +736,384 @@ static void overflow_size_helpers_test(struct kunit *test) #undef check_one_size_helper } +static void overflows_type_test(struct kunit *test) +{ + int count = 0; + unsigned int var; + +#define __TEST_OVERFLOWS_TYPE(func, arg1, arg2, of) do { \ + bool __of = func(arg1, arg2); \ + KUNIT_EXPECT_EQ_MSG(test, __of, of, \ + "expected " #func "(" #arg1 ", " #arg2 " to%s overflow\n",\ + of ? "" : " not"); \ + count++; \ +} while (0) + +/* Args are: first type, second type, value, overflow expected */ +#define TEST_OVERFLOWS_TYPE(__t1, __t2, v, of) do { \ + __t1 t1 = (v); \ + __t2 t2; \ + __TEST_OVERFLOWS_TYPE(__overflows_type, t1, t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type, t1, __t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, __t2, of);\ +} while (0) + + TEST_OVERFLOWS_TYPE(u8, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, u16, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, s8, U8_MAX, true); + TEST_OVERFLOWS_TYPE(u8, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, s8, (u8)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u8, s16, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u8, -1, true); + TEST_OVERFLOWS_TYPE(s8, u8, S8_MIN, true); + TEST_OVERFLOWS_TYPE(s8, u16, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u16, -1, true); + TEST_OVERFLOWS_TYPE(s8, u16, S8_MIN, true); + TEST_OVERFLOWS_TYPE(s8, u32, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u32, -1, true); + TEST_OVERFLOWS_TYPE(s8, u32, S8_MIN, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s8, u64, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u64, -1, true); + TEST_OVERFLOWS_TYPE(s8, u64, S8_MIN, true); +#endif + TEST_OVERFLOWS_TYPE(s8, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s8, s16, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, s16, S8_MIN, false); + TEST_OVERFLOWS_TYPE(u16, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u16, u8, (u16)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, u8, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s8, (u16)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, s8, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s16, (u16)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, s16, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, u32, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s32, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u8, (s16)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s16, u8, -1, true); + TEST_OVERFLOWS_TYPE(s16, u8, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, u16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u16, -1, true); + TEST_OVERFLOWS_TYPE(s16, u16, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, u32, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u32, -1, true); + TEST_OVERFLOWS_TYPE(s16, u32, S16_MIN, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s16, u64, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u64, -1, true); + TEST_OVERFLOWS_TYPE(s16, u64, S16_MIN, true); +#endif + TEST_OVERFLOWS_TYPE(s16, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s16, s8, S16_MAX, true); + TEST_OVERFLOWS_TYPE(s16, s8, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s16, s32, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s32, S16_MIN, false); + TEST_OVERFLOWS_TYPE(u32, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u32, u8, (u32)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, u8, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s8, (u32)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, s8, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, u16, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s16, (u32)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, s16, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s32, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s32, (u32)S32_MAX + 1, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(u32, u64, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s64, U32_MAX, false); +#endif + TEST_OVERFLOWS_TYPE(s32, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u8, (s32)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, u8, -1, true); + TEST_OVERFLOWS_TYPE(s32, u8, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u16, (s32)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, u16, -1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, u32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u32, -1, true); + TEST_OVERFLOWS_TYPE(s32, u32, S32_MIN, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s32, u64, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u64, -1, true); + TEST_OVERFLOWS_TYPE(s32, u64, S32_MIN, true); +#endif + TEST_OVERFLOWS_TYPE(s32, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s32, s8, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, s8, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s32, s16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, s16, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s32, S32_MIN, false); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s32, s64, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s64, S32_MIN, false); + TEST_OVERFLOWS_TYPE(u64, u8, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u8, (u64)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u16, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u16, (u64)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u32, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u32, (u64)U32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u64, U64_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s8, (u64)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s8, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s16, (u64)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s16, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s32, (u64)S32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s32, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s64, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s64, (u64)S64_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u8, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u8, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u8, -1, true); + TEST_OVERFLOWS_TYPE(s64, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u8, (s64)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u16, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u16, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u16, -1, true); + TEST_OVERFLOWS_TYPE(s64, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u16, (s64)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u32, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u32, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u32, -1, true); + TEST_OVERFLOWS_TYPE(s64, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u32, (s64)U32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u64, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u64, -1, true); + TEST_OVERFLOWS_TYPE(s64, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s8, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s16, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s32, S32_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s32, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s64, S64_MIN, false); +#endif + + /* Check for macro side-effects. */ + var = INT_MAX - 1; + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, true); + var = INT_MAX - 1; + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, true); + + kunit_info(test, "%d overflows_type() tests finished\n", count); +#undef TEST_OVERFLOWS_TYPE +#undef __TEST_OVERFLOWS_TYPE +} + +static void same_type_test(struct kunit *test) +{ + int count = 0; + int var; + +#define TEST_SAME_TYPE(t1, t2, same) do { \ + typeof(t1) __t1h = type_max(t1); \ + typeof(t1) __t1l = type_min(t1); \ + typeof(t2) __t2h = type_max(t2); \ + typeof(t2) __t2l = type_min(t2); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1h)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1l)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t1h, t1)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t1l, t1)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2h)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2l)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t2h, t2)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t2l, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1h)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1l)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t1h, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t1l, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2h)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2l)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t2h, t1)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t2l, t1)); \ +} while (0) + +#if BITS_PER_LONG == 64 +# define TEST_SAME_TYPE64(base, t, m) TEST_SAME_TYPE(base, t, m) +#else +# define TEST_SAME_TYPE64(base, t, m) do { } while (0) +#endif + +#define TEST_TYPE_SETS(base, mu8, mu16, mu32, ms8, ms16, ms32, mu64, ms64) \ +do { \ + TEST_SAME_TYPE(base, u8, mu8); \ + TEST_SAME_TYPE(base, u16, mu16); \ + TEST_SAME_TYPE(base, u32, mu32); \ + TEST_SAME_TYPE(base, s8, ms8); \ + TEST_SAME_TYPE(base, s16, ms16); \ + TEST_SAME_TYPE(base, s32, ms32); \ + TEST_SAME_TYPE64(base, u64, mu64); \ + TEST_SAME_TYPE64(base, s64, ms64); \ +} while (0) + + TEST_TYPE_SETS(u8, true, false, false, false, false, false, false, false); + TEST_TYPE_SETS(u16, false, true, false, false, false, false, false, false); + TEST_TYPE_SETS(u32, false, false, true, false, false, false, false, false); + TEST_TYPE_SETS(s8, false, false, false, true, false, false, false, false); + TEST_TYPE_SETS(s16, false, false, false, false, true, false, false, false); + TEST_TYPE_SETS(s32, false, false, false, false, false, true, false, false); +#if BITS_PER_LONG == 64 + TEST_TYPE_SETS(u64, false, false, false, false, false, false, true, false); + TEST_TYPE_SETS(s64, false, false, false, false, false, false, false, true); +#endif + + /* Check for macro side-effects. */ + var = 4; + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(var++, int)); + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(int, var++)); + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(var++, var++)); + KUNIT_EXPECT_EQ(test, var, 4); + + kunit_info(test, "%d __same_type() tests finished\n", count); + +#undef TEST_TYPE_SETS +#undef TEST_SAME_TYPE64 +#undef TEST_SAME_TYPE +} + +static void castable_to_type_test(struct kunit *test) +{ + int count = 0; + +#define TEST_CASTABLE_TO_TYPE(arg1, arg2, pass) do { \ + bool __pass = castable_to_type(arg1, arg2); \ + KUNIT_EXPECT_EQ_MSG(test, __pass, pass, \ + "expected castable_to_type(" #arg1 ", " #arg2 ") to%s pass\n",\ + pass ? "" : " not"); \ + count++; \ +} while (0) + + TEST_CASTABLE_TO_TYPE(16, u8, true); + TEST_CASTABLE_TO_TYPE(16, u16, true); + TEST_CASTABLE_TO_TYPE(16, u32, true); + TEST_CASTABLE_TO_TYPE(16, s8, true); + TEST_CASTABLE_TO_TYPE(16, s16, true); + TEST_CASTABLE_TO_TYPE(16, s32, true); + TEST_CASTABLE_TO_TYPE(-16, s8, true); + TEST_CASTABLE_TO_TYPE(-16, s16, true); + TEST_CASTABLE_TO_TYPE(-16, s32, true); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE(16, u64, true); + TEST_CASTABLE_TO_TYPE(-16, s64, true); +#endif + +#define TEST_CASTABLE_TO_TYPE_VAR(width) do { \ + u ## width u ## width ## var = 0; \ + s ## width s ## width ## var = 0; \ + \ + /* Constant expressions that fit types. */ \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), s ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), s ## width ## var, true); \ + /* Constant expressions that do not fit types. */ \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width, false); \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width, false); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width ## var, false); \ + /* Non-constant expression with mismatched type. */ \ + TEST_CASTABLE_TO_TYPE(s ## width ## var, u ## width, false); \ + TEST_CASTABLE_TO_TYPE(u ## width ## var, s ## width, false); \ +} while (0) + +#define TEST_CASTABLE_TO_TYPE_RANGE(width) do { \ + unsigned long big = U ## width ## _MAX; \ + signed long small = S ## width ## _MIN; \ + u ## width u ## width ## var = 0; \ + s ## width s ## width ## var = 0; \ + \ + /* Constant expression in range. */ \ + TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width, true); \ + TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width, true); \ + TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width ## var, true); \ + /* Constant expression out of range. */ \ + TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width, false); \ + TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width, false); \ + TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width ## var, false); \ + /* Non-constant expression with mismatched type. */ \ + TEST_CASTABLE_TO_TYPE(big, u ## width, false); \ + TEST_CASTABLE_TO_TYPE(big, u ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE(small, s ## width, false); \ + TEST_CASTABLE_TO_TYPE(small, s ## width ## var, false); \ +} while (0) + + TEST_CASTABLE_TO_TYPE_VAR(8); + TEST_CASTABLE_TO_TYPE_VAR(16); + TEST_CASTABLE_TO_TYPE_VAR(32); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE_VAR(64); +#endif + + TEST_CASTABLE_TO_TYPE_RANGE(8); + TEST_CASTABLE_TO_TYPE_RANGE(16); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE_RANGE(32); +#endif + kunit_info(test, "%d castable_to_type() tests finished\n", count); + +#undef TEST_CASTABLE_TO_TYPE_RANGE +#undef TEST_CASTABLE_TO_TYPE_VAR +#undef TEST_CASTABLE_TO_TYPE +} + static struct kunit_case overflow_test_cases[] = { KUNIT_CASE(u8_u8__u8_overflow_test), KUNIT_CASE(s8_s8__s8_overflow_test), @@ -755,6 +1133,9 @@ static struct kunit_case overflow_test_cases[] = { KUNIT_CASE(shift_nonsense_test), KUNIT_CASE(overflow_allocation_test), KUNIT_CASE(overflow_size_helpers_test), + KUNIT_CASE(overflows_type_test), + KUNIT_CASE(same_type_test), + KUNIT_CASE(castable_to_type_test), {} }; From a072f249b1b3a457196c83df622e3aa376b1f8df Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 1 Nov 2022 14:16:52 +0000 Subject: [PATCH 0786/4122] dt-bindings: i2c: mv64xxx: Add F1C100s compatible string The I2C controller IP used in the Allwinner F1C100s series of SoCs is compatible with the ones used in the other Allwinner SoCs. Add an F1C100s specific compatible string to the list of existing names. Signed-off-by: Andre Przywara Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml index 93c164aa00da..984fc1ed3ec6 100644 --- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml @@ -19,6 +19,7 @@ properties: - const: allwinner,sun6i-a31-i2c - items: - enum: + - allwinner,suniv-f1c100s-i2c - allwinner,sun8i-a23-i2c - allwinner,sun8i-a83t-i2c - allwinner,sun8i-v536-i2c From 52951ea193ad3b77c433497425a1049520fd6f22 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Tue, 1 Nov 2022 16:07:27 +0800 Subject: [PATCH 0787/4122] i2c: hisi: Add initial device tree support The HiSilicon I2C controller can be used on embedded platform, which boot from devicetree. Signed-off-by: Weilong Chen Acked-by: Yicong Yang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 2 +- drivers/i2c/busses/i2c-hisi.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e50f9603d189..a7bfddf08fa7 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -673,7 +673,7 @@ config I2C_HIGHLANDER config I2C_HISI tristate "HiSilicon I2C controller" - depends on (ARM64 && ACPI) || COMPILE_TEST + depends on ARM64 || COMPILE_TEST help Say Y here if you want to have Hisilicon I2C controller support available on the Kunpeng Server. diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 76c3d8f6fc3c..bcc97e4fcb65 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -489,11 +489,18 @@ static const struct acpi_device_id hisi_i2c_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, hisi_i2c_acpi_ids); +static const struct of_device_id hisi_i2c_dts_ids[] = { + { .compatible = "hisilicon,ascend910-i2c", }, + { } +}; +MODULE_DEVICE_TABLE(of, hisi_i2c_dts_ids); + static struct platform_driver hisi_i2c_driver = { .probe = hisi_i2c_probe, .driver = { .name = "hisi-i2c", .acpi_match_table = hisi_i2c_acpi_ids, + .of_match_table = hisi_i2c_dts_ids, }, }; module_platform_driver(hisi_i2c_driver); From e77f7ba726cc0c9b1c62b295d2aac42c3a18ebd1 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Tue, 1 Nov 2022 16:07:28 +0800 Subject: [PATCH 0788/4122] dt-bindings: i2c: add entry for hisilicon,ascend910-i2c Add the new compatible for HiSilicon i2c. Signed-off-by: Weilong Chen Reviewed-by: Rob Herring Reviewed-by: Yicong Yang Signed-off-by: Wolfram Sang --- .../bindings/i2c/hisilicon,ascend910-i2c.yaml | 73 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 74 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml diff --git a/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml new file mode 100644 index 000000000000..7d7a8de7bcd8 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/hisilicon,ascend910-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon common I2C controller + +maintainers: + - Yicong Yang + +description: + The HiSilicon common I2C controller can be used for many different + types of SoC such as Huawei Ascend AI series chips. + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: hisilicon,ascend910-i2c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + default: 400000 + + i2c-sda-falling-time-ns: + default: 343 + + i2c-scl-falling-time-ns: + default: 203 + + i2c-sda-hold-time-ns: + default: 830 + + i2c-scl-rising-time-ns: + default: 365 + + i2c-digital-filter-width-ns: + default: 0 + +required: + - compatible + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + #include + + i2c@38b0000 { + compatible = "hisilicon,ascend910-i2c"; + reg = <0x38b0000 0x10000>; + interrupts = ; + i2c-sda-falling-time-ns = <56>; + i2c-scl-falling-time-ns = <56>; + i2c-sda-hold-time-ns = <56>; + i2c-scl-rising-time-ns = <56>; + i2c-digital-filter; + i2c-digital-filter-width-ns = <0x0>; + clocks = <&alg_clk>; + clock-frequency = <400000>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 379945f82a64..1d09b36b9b9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9214,6 +9214,7 @@ M: Yicong Yang L: linux-i2c@vger.kernel.org S: Maintained W: https://www.hisilicon.com +F: Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml F: drivers/i2c/busses/i2c-hisi.c HISILICON LPC BUS DRIVER From 5fd8c2d3de3dd3cc6d36a0c7a08e44cd5bf173e6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 22 Oct 2022 11:29:48 -0700 Subject: [PATCH 0789/4122] tty: Move sysctl setup into "core" tty logic In preparation for adding another sysctl to the tty subsystem, move the tty setup code into the "core" tty code, which contains tty_init() itself. Cc: Greg Kroah-Hartman Cc: Jiri Slaby Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221022182949.2684794-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty.h | 2 +- drivers/tty/tty_io.c | 34 ++++++++++++++++++++++++++++++++-- drivers/tty/tty_ldisc.c | 38 +------------------------------------- 3 files changed, 34 insertions(+), 40 deletions(-) diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index 1c08c9b67b16..f45cd683c02e 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -93,7 +93,7 @@ void tty_ldisc_release(struct tty_struct *tty); int __must_check tty_ldisc_init(struct tty_struct *tty); void tty_ldisc_deinit(struct tty_struct *tty); -void tty_sysctl_init(void); +extern int tty_ldisc_autoload; /* tty_audit.c */ #ifdef CONFIG_AUDIT diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index de06c3c2ff70..fe77a3d41326 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3572,13 +3572,44 @@ void console_sysfs_notify(void) sysfs_notify(&consdev->kobj, NULL, "active"); } +static struct ctl_table tty_table[] = { + { + .procname = "ldisc_autoload", + .data = &tty_ldisc_autoload, + .maxlen = sizeof(tty_ldisc_autoload), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static struct ctl_table tty_dir_table[] = { + { + .procname = "tty", + .mode = 0555, + .child = tty_table, + }, + { } +}; + +static struct ctl_table tty_root_table[] = { + { + .procname = "dev", + .mode = 0555, + .child = tty_dir_table, + }, + { } +}; + /* * Ok, now we can initialize the rest of the tty devices and can count * on memory allocations, interrupts etc.. */ int __init tty_init(void) { - tty_sysctl_init(); + register_sysctl_table(tty_root_table); cdev_init(&tty_cdev, &tty_fops); if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0) @@ -3600,4 +3631,3 @@ int __init tty_init(void) #endif return 0; } - diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 776d8a62f77c..e758f44729e7 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -117,7 +117,7 @@ static void put_ldops(struct tty_ldisc_ops *ldops) raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } -static int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD); +int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD); /** * tty_ldisc_get - take a reference to an ldisc @@ -817,39 +817,3 @@ void tty_ldisc_deinit(struct tty_struct *tty) tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; } - -static struct ctl_table tty_table[] = { - { - .procname = "ldisc_autoload", - .data = &tty_ldisc_autoload, - .maxlen = sizeof(tty_ldisc_autoload), - .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { } -}; - -static struct ctl_table tty_dir_table[] = { - { - .procname = "tty", - .mode = 0555, - .child = tty_table, - }, - { } -}; - -static struct ctl_table tty_root_table[] = { - { - .procname = "dev", - .mode = 0555, - .child = tty_dir_table, - }, - { } -}; - -void tty_sysctl_init(void) -{ - register_sysctl_table(tty_root_table); -} From 83efeeeb3d04b22aaed1df99bc70a48fe9d22c4d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 22 Oct 2022 11:29:49 -0700 Subject: [PATCH 0790/4122] tty: Allow TIOCSTI to be disabled TIOCSTI continues its long history of being used in privilege escalation attacks[1]. Prior attempts to provide a mechanism to disable this have devolved into discussions around creating full-blown LSMs to provide arbitrary ioctl filtering, which is hugely over-engineered -- only TIOCSTI is being used this way. 3 years ago OpenBSD entirely removed TIOCSTI[2], Android has had it filtered for longer[3], and the tools that had historically used TIOCSTI either do not need it, are not commonly built with it, or have had its use removed. Provide a simple CONFIG and global sysctl to disable this for the system builders who have wanted this functionality for literally decades now, much like the ldisc_autoload CONFIG and sysctl. [1] https://lore.kernel.org/linux-hardening/Y0m9l52AKmw6Yxi1@hostpad [2] https://undeadly.org/cgi?action=article;sid=20170701132619 [3] https://lore.kernel.org/lkml/CAFJ0LnFGRuEEn1tCLhoki8ZyWrKfktbF+rwwN7WzyC_kBFoQVA@mail.gmail.com/ Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Simon Brand Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221022182949.2684794-2-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/Kconfig | 19 +++++++++++++++++++ drivers/tty/tty_io.c | 11 +++++++++++ 2 files changed, 30 insertions(+) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index cc30ff93e2e4..d35fc068da74 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -149,6 +149,25 @@ config LEGACY_PTY_COUNT When not in use, each legacy PTY occupies 12 bytes on 32-bit architectures and 24 bytes on 64-bit architectures. +config LEGACY_TIOCSTI + bool "Allow legacy TIOCSTI usage" + default y + help + Historically the kernel has allowed TIOCSTI, which will push + characters into a controlling TTY. This continues to be used + as a malicious privilege escalation mechanism, and provides no + meaningful real-world utility any more. Its use is considered + a dangerous legacy operation, and can be disabled on most + systems. + + Say 'Y here only if you have confirmed that your system's + userspace depends on this functionality to continue operating + normally. + + This functionality can be changed at runtime with the + dev.tty.legacy_tiocsti sysctl. This configuration option sets + the default value of the sysctl. + config LDISC_AUTOLOAD bool "Automatically load TTY Line Disciplines" default y diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index fe77a3d41326..a6a16cf986b7 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2268,11 +2268,15 @@ static int tty_fasync(int fd, struct file *filp, int on) * * Called functions take tty_ldiscs_lock * * current->signal->tty check is safe without locks */ +static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); static int tiocsti(struct tty_struct *tty, char __user *p) { char ch, mbz = 0; struct tty_ldisc *ld; + if (!tty_legacy_tiocsti) + return -EIO; + if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) @@ -3573,6 +3577,13 @@ void console_sysfs_notify(void) } static struct ctl_table tty_table[] = { + { + .procname = "legacy_tiocsti", + .data = &tty_legacy_tiocsti, + .maxlen = sizeof(tty_legacy_tiocsti), + .mode = 0644, + .proc_handler = proc_dobool, + }, { .procname = "ldisc_autoload", .data = &tty_ldisc_autoload, From 8275b48b278096edc1e3ea5aa9cf946a10022f79 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 4 Oct 2022 12:49:25 +0200 Subject: [PATCH 0791/4122] tty: serial: introduce transmit helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many serial drivers do the same thing: * send x_char if set * keep sending from the xmit circular buffer until either - the loop reaches the end of the xmit buffer - TX is stopped - HW fifo is full * check for pending characters and: - wake up tty writers to fill for more data into xmit buffer - stop TX if there is nothing in the xmit buffer The only differences are: * how to write the character to the HW fifo * the check of the end condition: - is the HW fifo full? - is limit of the written characters reached? So unify the above into two helpers: * uart_port_tx_limited() -- it performs the above taking the written characters limit into account, and * uart_port_tx() -- the same as above, except it only checks the HW readiness, not the characters limit. The HW specific operations (as stated as "differences" above) are passed as arguments to the macros. They are: * tx_ready -- returns true if HW can accept more data. * put_char -- write a character to the device. * tx_done -- when the write loop is done, perform arbitrary action before potential invocation of ops->stop_tx() happens. Note that the above are macros. This means the code is generated in place and the above 3 arguments are "inlined". I.e. no added penalty by generating call instructions for every single character. Nor any indirect calls. (As in some previous versions of this patchset.) Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221004104927.14361-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/serial/driver.rst | 3 + include/linux/serial_core.h | 80 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst index 23c6b956cd90..98d268555dcc 100644 --- a/Documentation/driver-api/serial/driver.rst +++ b/Documentation/driver-api/serial/driver.rst @@ -78,6 +78,9 @@ Other functions uart_get_lsr_info uart_handle_dcd_change uart_handle_cts_change uart_try_toggle_sysrq uart_get_console +.. kernel-doc:: include/linux/serial_core.h + :identifiers: uart_port_tx_limited uart_port_tx + Other notes ----------- diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d657f2a42a7b..dbbc4408bb19 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -664,6 +664,86 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); +#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ + for_post) \ +({ \ + struct uart_port *__port = (uport); \ + struct circ_buf *xmit = &__port->state->xmit; \ + unsigned int pending; \ + \ + for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) { \ + if (__port->x_char) { \ + (ch) = __port->x_char; \ + (put_char); \ + __port->x_char = 0; \ + continue; \ + } \ + \ + if (uart_circ_empty(xmit) || uart_tx_stopped(__port)) \ + break; \ + \ + (ch) = xmit->buf[xmit->tail]; \ + (put_char); \ + xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE; \ + } \ + \ + (tx_done); \ + \ + pending = uart_circ_chars_pending(xmit); \ + if (pending < WAKEUP_CHARS) { \ + uart_write_wakeup(__port); \ + \ + if (pending == 0) \ + __port->ops->stop_tx(__port); \ + } \ + \ + pending; \ +}) + +/** + * uart_port_tx_limited -- transmit helper for uart_port with count limiting + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @count: a limit of characters to send + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * @tx_done: function to call after the loop is done + * + * This helper transmits characters from the xmit buffer to the hardware using + * @put_char(). It does so until @count characters are sent and while @tx_ready + * evaluates to true. + * + * Returns: the number of characters in the xmit buffer when done. + * + * The expression in macro parameters shall be designed as follows: + * * **tx_ready:** should evaluate to true if the HW can accept more data to + * be sent. This parameter can be %true, which means the HW is always ready. + * * **put_char:** shall write @ch to the device of @port. + * * **tx_done:** when the write loop is done, this can perform arbitrary + * action before potential invocation of ops->stop_tx() happens. If the + * driver does not need to do anything, use e.g. ({}). + * + * For all of them, @port->lock is held, interrupts are locally disabled and + * the expressions must not sleep. + */ +#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ + unsigned int __count = (count); \ + __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __count--); \ +}) + +/** + * uart_port_tx -- transmit helper for uart_port + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx(port, ch, tx_ready, put_char) \ + __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + /* * Baud rate helpers. */ From 2d141e683e9ac7041c0350bb7b5e31f5f02ddbe3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 4 Oct 2022 12:49:26 +0200 Subject: [PATCH 0792/4122] tty: serial: use uart_port_tx() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uart_port_tx() is a new helper to send characters to the device. Use it in these drivers. Cc: Tobias Klauser Cc: Richard Genoud Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Cc: Vladimir Zapolskiy Cc: Liviu Dudau Cc: Sudeep Holla Cc: Lorenzo Pieralisi Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: "Andreas Färber" Cc: Manivannan Sadhasivam Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221004104927.14361-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_uart.c | 29 ++++------------------ drivers/tty/serial/atmel_serial.c | 28 +++++---------------- drivers/tty/serial/fsl_lpuart.c | 30 ++++------------------- drivers/tty/serial/lantiq.c | 36 +++------------------------ drivers/tty/serial/lpc32xx_hs.c | 33 +++---------------------- drivers/tty/serial/mcf.c | 34 ++++++-------------------- drivers/tty/serial/mpc52xx_uart.c | 39 +++--------------------------- drivers/tty/serial/mps2-uart.c | 26 +++----------------- drivers/tty/serial/mxs-auart.c | 32 ++++++------------------ drivers/tty/serial/owl-uart.c | 32 +++--------------------- drivers/tty/serial/sa1100.c | 34 +++----------------------- drivers/tty/serial/vt8500_serial.c | 30 +++-------------------- 12 files changed, 60 insertions(+), 323 deletions(-) diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c index 82f2790de28d..316074bb23e9 100644 --- a/drivers/tty/serial/altera_uart.c +++ b/drivers/tty/serial/altera_uart.c @@ -247,31 +247,12 @@ static void altera_uart_rx_chars(struct uart_port *port) static void altera_uart_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - if (port->x_char) { - /* Send special char - probably flow control */ - altera_uart_writel(port, port->x_char, ALTERA_UART_TXDATA_REG); - port->x_char = 0; - port->icount.tx++; - return; - } - - while (altera_uart_readl(port, ALTERA_UART_STATUS_REG) & - ALTERA_UART_STATUS_TRDY_MSK) { - if (xmit->head == xmit->tail) - break; - altera_uart_writel(port, xmit->buf[xmit->tail], - ALTERA_UART_TXDATA_REG); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - altera_uart_stop_tx(port); + uart_port_tx(port, ch, + altera_uart_readl(port, ALTERA_UART_STATUS_REG) & + ALTERA_UART_STATUS_TRDY_MSK, + altera_uart_writel(port, ch, ALTERA_UART_TXDATA_REG)); } static irqreturn_t altera_uart_interrupt(int irq, void *data) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index bd07f79a2df9..a6b4d30c5888 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -824,30 +824,14 @@ static void atmel_rx_chars(struct uart_port *port) */ static void atmel_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); + bool pending; + u8 ch; - if (port->x_char && - (atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY)) { - atmel_uart_write_char(port, port->x_char); - port->icount.tx++; - port->x_char = 0; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) - return; - - while (atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY) { - atmel_uart_write_char(port, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (!uart_circ_empty(xmit)) { + pending = uart_port_tx(port, ch, + atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY, + atmel_uart_write_char(port, ch)); + if (pending) { /* we still have characters to transmit, so we should continue * transmitting them when TX is ready, regardless of * mode or duplexity diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 67fa113f77d4..d811eda1844e 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -742,32 +742,12 @@ static int lpuart32_poll_get_char(struct uart_port *port) static inline void lpuart_transmit_buffer(struct lpuart_port *sport) { - struct circ_buf *xmit = &sport->port.state->xmit; + struct uart_port *port = &sport->port; + u8 ch; - if (sport->port.x_char) { - writeb(sport->port.x_char, sport->port.membase + UARTDR); - sport->port.icount.tx++; - sport->port.x_char = 0; - return; - } - - if (lpuart_stopped_or_empty(&sport->port)) { - lpuart_stop_tx(&sport->port); - return; - } - - while (!uart_circ_empty(xmit) && - (readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size)) { - writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&sport->port); - - if (uart_circ_empty(xmit)) - lpuart_stop_tx(&sport->port); + uart_port_tx(port, ch, + readb(port->membase + UARTTCFIFO) < sport->txfifo_size, + writeb(ch, port->membase + UARTDR)); } static inline void lpuart32_transmit_buffer(struct lpuart_port *sport) diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index c892f3c7d1ab..a58e9277dfad 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -95,7 +95,6 @@ #define ASCFSTAT_TXFFLMASK 0x3F00 #define ASCFSTAT_TXFREEMASK 0x3F000000 -static void lqasc_tx_chars(struct uart_port *port); static struct ltq_uart_port *lqasc_port[MAXPORTS]; static struct uart_driver lqasc_reg; @@ -151,9 +150,12 @@ lqasc_start_tx(struct uart_port *port) { unsigned long flags; struct ltq_uart_port *ltq_port = to_ltq_uart_port(port); + u8 ch; spin_lock_irqsave(<q_port->lock, flags); - lqasc_tx_chars(port); + uart_port_tx(port, ch, + lqasc_tx_ready(port), + writeb(ch, port->membase + LTQ_ASC_TBUF)); spin_unlock_irqrestore(<q_port->lock, flags); return; } @@ -226,36 +228,6 @@ lqasc_rx_chars(struct uart_port *port) return 0; } -static void -lqasc_tx_chars(struct uart_port *port) -{ - struct circ_buf *xmit = &port->state->xmit; - if (uart_tx_stopped(port)) { - lqasc_stop_tx(port); - return; - } - - while (lqasc_tx_ready(port)) { - if (port->x_char) { - writeb(port->x_char, port->membase + LTQ_ASC_TBUF); - port->icount.tx++; - port->x_char = 0; - continue; - } - - if (uart_circ_empty(xmit)) - break; - - writeb(port->state->xmit.buf[port->state->xmit.tail], - port->membase + LTQ_ASC_TBUF); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); -} - static irqreturn_t lqasc_tx_int(int irq, void *_port) { diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c index ed47f4768338..b38fe4728c26 100644 --- a/drivers/tty/serial/lpc32xx_hs.c +++ b/drivers/tty/serial/lpc32xx_hs.c @@ -276,8 +276,6 @@ static void __serial_lpc32xx_rx(struct uart_port *port) tty_flip_buffer_push(tport); } -static void serial_lpc32xx_stop_tx(struct uart_port *port); - static bool serial_lpc32xx_tx_ready(struct uart_port *port) { u32 level = readl(LPC32XX_HSUART_LEVEL(port->membase)); @@ -287,34 +285,11 @@ static bool serial_lpc32xx_tx_ready(struct uart_port *port) static void __serial_lpc32xx_tx(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - if (port->x_char) { - writel((u32)port->x_char, LPC32XX_HSUART_FIFO(port->membase)); - port->icount.tx++; - port->x_char = 0; - return; - } - - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) - goto exit_tx; - - /* Transfer data */ - while (serial_lpc32xx_tx_ready(port)) { - writel((u32) xmit->buf[xmit->tail], - LPC32XX_HSUART_FIFO(port->membase)); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - -exit_tx: - if (uart_circ_empty(xmit)) - serial_lpc32xx_stop_tx(port); + uart_port_tx(port, ch, + serial_lpc32xx_tx_ready(port), + writel(ch, LPC32XX_HSUART_FIFO(port->membase))); } static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id) diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c index b1cd9a76dd93..3239babe12a4 100644 --- a/drivers/tty/serial/mcf.c +++ b/drivers/tty/serial/mcf.c @@ -327,34 +327,16 @@ static void mcf_rx_chars(struct mcf_uart *pp) static void mcf_tx_chars(struct mcf_uart *pp) { struct uart_port *port = &pp->port; - struct circ_buf *xmit = &port->state->xmit; + bool pending; + u8 ch; - if (port->x_char) { - /* Send special char - probably flow control */ - writeb(port->x_char, port->membase + MCFUART_UTB); - port->x_char = 0; - port->icount.tx++; - return; - } + pending = uart_port_tx(port, ch, + readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY, + writeb(ch, port->membase + MCFUART_UTB)); - while (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY) { - if (uart_circ_empty(xmit)) - break; - writeb(xmit->buf[xmit->tail], port->membase + MCFUART_UTB); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE -1); - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) { - mcf_stop_tx(port); - /* Disable TX to negate RTS automatically */ - if (port->rs485.flags & SER_RS485_ENABLED) - writeb(MCFUART_UCR_TXDISABLE, - port->membase + MCFUART_UCR); - } + /* Disable TX to negate RTS automatically */ + if (!pending && (port->rs485.flags & SER_RS485_ENABLED)) + writeb(MCFUART_UCR_TXDISABLE, port->membase + MCFUART_UCR); } /****************************************************************************/ diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index 73362d4bc45d..384ca195e3d5 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -1428,42 +1428,11 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port) static inline bool mpc52xx_uart_int_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - /* Process out of band chars */ - if (port->x_char) { - psc_ops->write_char(port, port->x_char); - port->icount.tx++; - port->x_char = 0; - return true; - } - - /* Nothing to do ? */ - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - mpc52xx_uart_stop_tx(port); - return false; - } - - /* Send chars */ - while (psc_ops->raw_tx_rdy(port)) { - psc_ops->write_char(port, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } - - /* Wake up */ - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - /* Maybe we're done after all */ - if (uart_circ_empty(xmit)) { - mpc52xx_uart_stop_tx(port); - return false; - } - - return true; + return uart_port_tx(port, ch, + psc_ops->raw_tx_rdy(port), + psc_ops->write_char(port, ch)); } static irqreturn_t diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c index 2e3e6cf16817..860d161fa594 100644 --- a/drivers/tty/serial/mps2-uart.c +++ b/drivers/tty/serial/mps2-uart.c @@ -129,29 +129,11 @@ static void mps2_uart_stop_tx(struct uart_port *port) static void mps2_uart_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - while (!(mps2_uart_read8(port, UARTn_STATE) & UARTn_STATE_TX_FULL)) { - if (port->x_char) { - mps2_uart_write8(port, port->x_char, UARTn_DATA); - port->x_char = 0; - port->icount.tx++; - continue; - } - - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) - break; - - mps2_uart_write8(port, xmit->buf[xmit->tail], UARTn_DATA); - xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE; - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - mps2_uart_stop_tx(port); + uart_port_tx(port, ch, + mps2_uart_tx_empty(port), + mps2_uart_write8(port, ch, UARTn_DATA)); } static void mps2_uart_start_tx(struct uart_port *port) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index d21a4f3ef2fe..ef6e7bb6105c 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -569,6 +569,8 @@ static int mxs_auart_dma_tx(struct mxs_auart_port *s, int size) static void mxs_auart_tx_chars(struct mxs_auart_port *s) { struct circ_buf *xmit = &s->port.state->xmit; + bool pending; + u8 ch; if (auart_dma_enabled(s)) { u32 i = 0; @@ -603,31 +605,13 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s) return; } - - while (!(mxs_read(s, REG_STAT) & AUART_STAT_TXFF)) { - if (s->port.x_char) { - s->port.icount.tx++; - mxs_write(s->port.x_char, s, REG_DATA); - s->port.x_char = 0; - continue; - } - if (!uart_circ_empty(xmit) && !uart_tx_stopped(&s->port)) { - s->port.icount.tx++; - mxs_write(xmit->buf[xmit->tail], s, REG_DATA); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - } else - break; - } - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&s->port); - - if (uart_circ_empty(&(s->port.state->xmit))) - mxs_clr(AUART_INTR_TXIEN, s, REG_INTR); - else + pending = uart_port_tx(&s->port, ch, + !(mxs_read(s, REG_STAT) & AUART_STAT_TXFF), + mxs_write(ch, s, REG_DATA)); + if (pending) mxs_set(AUART_INTR_TXIEN, s, REG_INTR); - - if (uart_tx_stopped(&s->port)) - mxs_auart_stop_tx(&s->port); + else + mxs_clr(AUART_INTR_TXIEN, s, REG_INTR); } static void mxs_auart_rx_char(struct mxs_auart_port *s) diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index fde39cc1145d..e99970a9437f 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -181,35 +181,11 @@ static void owl_uart_start_tx(struct uart_port *port) static void owl_uart_send_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; - unsigned int ch; + u8 ch; - if (port->x_char) { - while (!(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU)) - cpu_relax(); - owl_uart_write(port, port->x_char, OWL_UART_TXDAT); - port->icount.tx++; - port->x_char = 0; - } - - if (uart_tx_stopped(port)) - return; - - while (!(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU)) { - if (uart_circ_empty(xmit)) - break; - - ch = xmit->buf[xmit->tail]; - owl_uart_write(port, ch, OWL_UART_TXDAT); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - owl_uart_stop_tx(port); + uart_port_tx(port, ch, + !(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU), + owl_uart_write(port, ch, OWL_UART_TXDAT)); } static void owl_uart_receive_chars(struct uart_port *port) diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c index dd9e3253cab4..55107bbc00ce 100644 --- a/drivers/tty/serial/sa1100.c +++ b/drivers/tty/serial/sa1100.c @@ -228,14 +228,7 @@ sa1100_rx_chars(struct sa1100_port *sport) static void sa1100_tx_chars(struct sa1100_port *sport) { - struct circ_buf *xmit = &sport->port.state->xmit; - - if (sport->port.x_char) { - UART_PUT_CHAR(sport, sport->port.x_char); - sport->port.icount.tx++; - sport->port.x_char = 0; - return; - } + u8 ch; /* * Check the modem control lines before @@ -243,28 +236,9 @@ static void sa1100_tx_chars(struct sa1100_port *sport) */ sa1100_mctrl_check(sport); - if (uart_circ_empty(xmit) || uart_tx_stopped(&sport->port)) { - sa1100_stop_tx(&sport->port); - return; - } - - /* - * Tried using FIFO (not checking TNF) for fifo fill: - * still had the '4 bytes repeated' problem. - */ - while (UART_GET_UTSR1(sport) & UTSR1_TNF) { - UART_PUT_CHAR(sport, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&sport->port); - - if (uart_circ_empty(xmit)) - sa1100_stop_tx(&sport->port); + uart_port_tx(&sport->port, ch, + UART_GET_UTSR1(sport) & UTSR1_TNF, + UART_PUT_CHAR(sport, ch)); } static irqreturn_t sa1100_int(int irq, void *dev_id) diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c index 10fbdb09965f..deedb6513160 100644 --- a/drivers/tty/serial/vt8500_serial.c +++ b/drivers/tty/serial/vt8500_serial.c @@ -196,33 +196,11 @@ static unsigned int vt8500_tx_empty(struct uart_port *port) static void handle_tx(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - if (port->x_char) { - writeb(port->x_char, port->membase + VT8500_TXFIFO); - port->icount.tx++; - port->x_char = 0; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - vt8500_stop_tx(port); - return; - } - - while (vt8500_tx_empty(port)) { - if (uart_circ_empty(xmit)) - break; - - writeb(xmit->buf[xmit->tail], port->membase + VT8500_TXFIFO); - - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - vt8500_stop_tx(port); + uart_port_tx(port, ch, + vt8500_tx_empty(port), + writeb(ch, port->membase + VT8500_TXFIFO)); } static void vt8500_start_tx(struct uart_port *port) From d11cc8c3c4b65e00e01f20a920c5fa412415204a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 4 Oct 2022 12:49:27 +0200 Subject: [PATCH 0793/4122] tty: serial: use uart_port_tx_limited() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uart_port_tx_limited() is a new helper to send characters to the device. Use it in these drivers. mux.c also needs to define tx_done(). But I'm not sure if the driver really wants to wait for all the characters to dismiss from the HW fifo at this code point. Hence I marked this as FIXME. Cc: Russell King Cc: Florian Fainelli Cc: bcm-kernel-feedback-list@broadcom.com Cc: "Pali Rohár" Cc: Kevin Cernekee Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Orson Zhai Cc: Baolin Wang Cc: Chunyan Zhang Cc: Patrice Chotard Cc: linux-riscv@lists.infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221004104927.14361-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/21285.c | 32 +++---------------- drivers/tty/serial/altera_jtaguart.c | 36 ++++----------------- drivers/tty/serial/amba-pl010.c | 32 +++---------------- drivers/tty/serial/apbuart.c | 34 +++----------------- drivers/tty/serial/bcm63xx_uart.c | 47 ++++++--------------------- drivers/tty/serial/mux.c | 45 ++++++++------------------ drivers/tty/serial/mvebu-uart.c | 38 +++------------------- drivers/tty/serial/omap-serial.c | 32 +++---------------- drivers/tty/serial/pxa.c | 33 +++---------------- drivers/tty/serial/rp2.c | 31 ++++-------------- drivers/tty/serial/serial_txx9.c | 32 +++---------------- drivers/tty/serial/sifive.c | 31 +++--------------- drivers/tty/serial/sprd_serial.c | 33 +++---------------- drivers/tty/serial/st-asc.c | 48 +++------------------------- 14 files changed, 85 insertions(+), 419 deletions(-) diff --git a/drivers/tty/serial/21285.c b/drivers/tty/serial/21285.c index c7d34823f715..185462fd959c 100644 --- a/drivers/tty/serial/21285.c +++ b/drivers/tty/serial/21285.c @@ -154,35 +154,13 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id) static irqreturn_t serial21285_tx_chars(int irq, void *dev_id) { struct uart_port *port = dev_id; - struct circ_buf *xmit = &port->state->xmit; - int count = 256; + u8 ch; - if (port->x_char) { - *CSR_UARTDR = port->x_char; - port->icount.tx++; - port->x_char = 0; - goto out; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - serial21285_stop_tx(port); - goto out; - } + uart_port_tx_limited(port, ch, 256, + !(*CSR_UARTFLG & 0x20), + *CSR_UARTDR = ch, + ({})); - do { - *CSR_UARTDR = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0 && !(*CSR_UARTFLG & 0x20)); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - serial21285_stop_tx(port); - - out: return IRQ_HANDLED; } diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index c2d154d78e54..aa49553fac58 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -146,37 +146,15 @@ static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp) static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp) { struct uart_port *port = &pp->port; - struct circ_buf *xmit = &port->state->xmit; - unsigned int pending, count; + unsigned int count; + u8 ch; - if (port->x_char) { - /* Send special char - probably flow control */ - writel(port->x_char, port->membase + ALTERA_JTAGUART_DATA_REG); - port->x_char = 0; - port->icount.tx++; - return; - } + count = altera_jtaguart_tx_space(port, NULL); - pending = uart_circ_chars_pending(xmit); - if (pending > 0) { - count = altera_jtaguart_tx_space(port, NULL); - if (count > pending) - count = pending; - if (count > 0) { - pending -= count; - while (count--) { - writel(xmit->buf[xmit->tail], - port->membase + ALTERA_JTAGUART_DATA_REG); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - } - if (pending < WAKEUP_CHARS) - uart_write_wakeup(port); - } - } - - if (pending == 0) - altera_jtaguart_stop_tx(port); + uart_port_tx_limited(port, ch, count, + true, + writel(ch, port->membase + ALTERA_JTAGUART_DATA_REG), + ({})); } static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c index af27fb8ec145..a98fae2ca422 100644 --- a/drivers/tty/serial/amba-pl010.c +++ b/drivers/tty/serial/amba-pl010.c @@ -164,34 +164,12 @@ static void pl010_rx_chars(struct uart_port *port) static void pl010_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; - int count; + u8 ch; - if (port->x_char) { - writel(port->x_char, port->membase + UART01x_DR); - port->icount.tx++; - port->x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - pl010_stop_tx(port); - return; - } - - count = port->fifosize >> 1; - do { - writel(xmit->buf[xmit->tail], port->membase + UART01x_DR); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - pl010_stop_tx(port); + uart_port_tx_limited(port, ch, port->fifosize >> 1, + true, + writel(ch, port->membase + UART01x_DR), + ({})); } static void pl010_modem_status(struct uart_amba_port *uap) diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c index 450f4edfda0f..915ee4b0d594 100644 --- a/drivers/tty/serial/apbuart.c +++ b/drivers/tty/serial/apbuart.c @@ -122,36 +122,12 @@ static void apbuart_rx_chars(struct uart_port *port) static void apbuart_tx_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; - int count; + u8 ch; - if (port->x_char) { - UART_PUT_CHAR(port, port->x_char); - port->icount.tx++; - port->x_char = 0; - return; - } - - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - apbuart_stop_tx(port); - return; - } - - /* amba: fill FIFO */ - count = port->fifosize >> 1; - do { - UART_PUT_CHAR(port, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - apbuart_stop_tx(port); + uart_port_tx_limited(port, ch, port->fifosize >> 1, + true, + UART_PUT_CHAR(port, ch), + ({})); } static irqreturn_t apbuart_int(int irq, void *dev_id) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 5d9737c2d1f2..62bc7244dc67 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -303,53 +303,24 @@ static void bcm_uart_do_rx(struct uart_port *port) */ static void bcm_uart_do_tx(struct uart_port *port) { - struct circ_buf *xmit; - unsigned int val, max_count; - - if (port->x_char) { - bcm_uart_writel(port, port->x_char, UART_FIFO_REG); - port->icount.tx++; - port->x_char = 0; - return; - } - - if (uart_tx_stopped(port)) { - bcm_uart_stop_tx(port); - return; - } - - xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) - goto txq_empty; + unsigned int val; + bool pending; + u8 ch; val = bcm_uart_readl(port, UART_MCTL_REG); val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT; - max_count = port->fifosize - val; - while (max_count--) { - unsigned int c; + pending = uart_port_tx_limited(port, ch, port->fifosize - val, + true, + bcm_uart_writel(port, ch, UART_FIFO_REG), + ({})); + if (pending) + return; - c = xmit->buf[xmit->tail]; - bcm_uart_writel(port, c, UART_FIFO_REG); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - goto txq_empty; - return; - -txq_empty: /* nothing to send, disable transmit interrupt */ val = bcm_uart_readl(port, UART_IR_REG); val &= ~UART_TX_INT_MASK; bcm_uart_writel(port, val, UART_IR_REG); - return; } /* diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c index ed0e763f622a..85ce1e9af44a 100644 --- a/drivers/tty/serial/mux.c +++ b/drivers/tty/serial/mux.c @@ -171,6 +171,13 @@ static void mux_break_ctl(struct uart_port *port, int break_state) { } +static void mux_tx_done(struct uart_port *port) +{ + /* FIXME js: really needs to wait? */ + while (UART_GET_FIFO_CNT(port)) + udelay(1); +} + /** * mux_write - Write chars to the mux fifo. * @port: Ptr to the uart_port. @@ -180,39 +187,13 @@ static void mux_break_ctl(struct uart_port *port, int break_state) */ static void mux_write(struct uart_port *port) { - int count; - struct circ_buf *xmit = &port->state->xmit; + u8 ch; - if(port->x_char) { - UART_PUT_CHAR(port, port->x_char); - port->icount.tx++; - port->x_char = 0; - return; - } - - if(uart_circ_empty(xmit) || uart_tx_stopped(port)) { - mux_stop_tx(port); - return; - } - - count = (port->fifosize) - UART_GET_FIFO_CNT(port); - do { - UART_PUT_CHAR(port, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if(uart_circ_empty(xmit)) - break; - - } while(--count > 0); - - while(UART_GET_FIFO_CNT(port)) - udelay(1); - - if(uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - mux_stop_tx(port); + uart_port_tx_limited(port, ch, + port->fifosize - UART_GET_FIFO_CNT(port), + true, + UART_PUT_CHAR(port, ch), + mux_tx_done(port)); } /** diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index ba16e1da6bd3..7b566404cb33 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -335,40 +335,12 @@ ignore_char: static void mvebu_uart_tx_chars(struct uart_port *port, unsigned int status) { - struct circ_buf *xmit = &port->state->xmit; - unsigned int count; - unsigned int st; + u8 ch; - if (port->x_char) { - writel(port->x_char, port->membase + UART_TSH(port)); - port->icount.tx++; - port->x_char = 0; - return; - } - - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - mvebu_uart_stop_tx(port); - return; - } - - for (count = 0; count < port->fifosize; count++) { - writel(xmit->buf[xmit->tail], port->membase + UART_TSH(port)); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - - if (uart_circ_empty(xmit)) - break; - - st = readl(port->membase + UART_STAT); - if (st & STAT_TX_FIFO_FUL) - break; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - mvebu_uart_stop_tx(port); + uart_port_tx_limited(port, ch, port->fifosize, + !(readl(port->membase + UART_STAT) & STAT_TX_FIFO_FUL), + writel(ch, port->membase + UART_TSH(port)), + ({})); } static irqreturn_t mvebu_uart_isr(int irq, void *dev_id) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7d0d2718ef59..82d35dbbfa6c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -347,34 +347,12 @@ static void serial_omap_put_char(struct uart_omap_port *up, unsigned char ch) static void transmit_chars(struct uart_omap_port *up, unsigned int lsr) { - struct circ_buf *xmit = &up->port.state->xmit; - int count; + u8 ch; - if (up->port.x_char) { - serial_omap_put_char(up, up->port.x_char); - up->port.icount.tx++; - up->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { - serial_omap_stop_tx(&up->port); - return; - } - count = up->port.fifosize / 4; - do { - serial_omap_put_char(up, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; - - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); - - if (uart_circ_empty(xmit)) - serial_omap_stop_tx(&up->port); + uart_port_tx_limited(&up->port, ch, up->port.fifosize / 4, + true, + serial_omap_put_char(up, ch), + ({})); } static inline void serial_omap_enable_ier_thri(struct uart_omap_port *up) diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c index 2d25231fad84..444fa4b654ac 100644 --- a/drivers/tty/serial/pxa.c +++ b/drivers/tty/serial/pxa.c @@ -174,35 +174,12 @@ static inline void receive_chars(struct uart_pxa_port *up, int *status) static void transmit_chars(struct uart_pxa_port *up) { - struct circ_buf *xmit = &up->port.state->xmit; - int count; + u8 ch; - if (up->port.x_char) { - serial_out(up, UART_TX, up->port.x_char); - up->port.icount.tx++; - up->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { - serial_pxa_stop_tx(&up->port); - return; - } - - count = up->port.fifosize / 2; - do { - serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); - - - if (uart_circ_empty(xmit)) - serial_pxa_stop_tx(&up->port); + uart_port_tx_limited(&up->port, ch, up->port.fifosize / 2, + true, + serial_out(up, UART_TX, ch), + ({})); } static void serial_pxa_start_tx(struct uart_port *port) diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c index b81afb06f1f4..749b873a5d99 100644 --- a/drivers/tty/serial/rp2.c +++ b/drivers/tty/serial/rp2.c @@ -427,32 +427,13 @@ static void rp2_rx_chars(struct rp2_uart_port *up) static void rp2_tx_chars(struct rp2_uart_port *up) { - u16 max_tx = FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT); - struct circ_buf *xmit = &up->port.state->xmit; + u8 ch; - if (uart_tx_stopped(&up->port)) { - rp2_uart_stop_tx(&up->port); - return; - } - - for (; max_tx != 0; max_tx--) { - if (up->port.x_char) { - writeb(up->port.x_char, up->base + RP2_DATA_BYTE); - up->port.x_char = 0; - up->port.icount.tx++; - continue; - } - if (uart_circ_empty(xmit)) { - rp2_uart_stop_tx(&up->port); - break; - } - writeb(xmit->buf[xmit->tail], up->base + RP2_DATA_BYTE); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); + uart_port_tx_limited(&up->port, ch, + FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT), + true, + writeb(ch, up->base + RP2_DATA_BYTE), + ({})); } static void rp2_ch_interrupt(struct rp2_uart_port *up) diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index e12f1dc18c38..eab387b01e36 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -321,34 +321,12 @@ receive_chars(struct uart_port *up, unsigned int *status) static inline void transmit_chars(struct uart_port *up) { - struct circ_buf *xmit = &up->state->xmit; - int count; + u8 ch; - if (up->x_char) { - sio_out(up, TXX9_SITFIFO, up->x_char); - up->icount.tx++; - up->x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(up)) { - serial_txx9_stop_tx(up); - return; - } - - count = TXX9_SIO_TX_FIFO; - do { - sio_out(up, TXX9_SITFIFO, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(up); - - if (uart_circ_empty(xmit)) - serial_txx9_stop_tx(up); + uart_port_tx_limited(up, ch, TXX9_SIO_TX_FIFO, + true, + sio_out(up, TXX9_SITFIFO, ch), + ({})); } static irqreturn_t serial_txx9_interrupt(int irq, void *dev_id) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 7fb6760b5c37..1f565a216e74 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -288,33 +288,12 @@ static void __ssp_transmit_char(struct sifive_serial_port *ssp, int ch) */ static void __ssp_transmit_chars(struct sifive_serial_port *ssp) { - struct circ_buf *xmit = &ssp->port.state->xmit; - int count; + u8 ch; - if (ssp->port.x_char) { - __ssp_transmit_char(ssp, ssp->port.x_char); - ssp->port.icount.tx++; - ssp->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&ssp->port)) { - sifive_serial_stop_tx(&ssp->port); - return; - } - count = SIFIVE_TX_FIFO_DEPTH; - do { - __ssp_transmit_char(ssp, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - ssp->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&ssp->port); - - if (uart_circ_empty(xmit)) - sifive_serial_stop_tx(&ssp->port); + uart_port_tx_limited(&ssp->port, ch, SIFIVE_TX_FIFO_DEPTH, + true, + __ssp_transmit_char(ssp, ch), + ({})); } /** diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 342a87967631..3f34f7bb7700 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -626,35 +626,12 @@ static inline void sprd_rx(struct uart_port *port) static inline void sprd_tx(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; - int count; + u8 ch; - if (port->x_char) { - serial_out(port, SPRD_TXD, port->x_char); - port->icount.tx++; - port->x_char = 0; - return; - } - - if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - sprd_stop_tx(port); - return; - } - - count = THLD_TX_EMPTY; - do { - serial_out(port, SPRD_TXD, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - sprd_stop_tx(port); + uart_port_tx_limited(port, ch, THLD_TX_EMPTY, + true, + serial_out(port, SPRD_TXD, ch), + ({})); } /* this handles the interrupt from one port */ diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index fcecea689a0d..5215e6910f68 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -237,50 +237,12 @@ static inline unsigned asc_hw_txroom(struct uart_port *port) */ static void asc_transmit_chars(struct uart_port *port) { - struct circ_buf *xmit = &port->state->xmit; - int txroom; - unsigned char c; + u8 ch; - txroom = asc_hw_txroom(port); - - if ((txroom != 0) && port->x_char) { - c = port->x_char; - port->x_char = 0; - asc_out(port, ASC_TXBUF, c); - port->icount.tx++; - txroom = asc_hw_txroom(port); - } - - if (uart_tx_stopped(port)) { - /* - * We should try and stop the hardware here, but I - * don't think the ASC has any way to do that. - */ - asc_disable_tx_interrupts(port); - return; - } - - if (uart_circ_empty(xmit)) { - asc_disable_tx_interrupts(port); - return; - } - - if (txroom == 0) - return; - - do { - c = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - asc_out(port, ASC_TXBUF, c); - port->icount.tx++; - txroom--; - } while ((txroom > 0) && (!uart_circ_empty(xmit))); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - if (uart_circ_empty(xmit)) - asc_disable_tx_interrupts(port); + uart_port_tx_limited(port, ch, asc_hw_txroom(port), + true, + asc_out(port, ASC_TXBUF, ch), + ({})); } static void asc_receive_chars(struct uart_port *port) From 35781d8356a2eecaa6074ceeb80ee22e252fcdae Mon Sep 17 00:00:00 2001 From: Aniket Randive Date: Fri, 7 Oct 2022 11:53:00 +0530 Subject: [PATCH 0794/4122] tty: serial: qcom-geni-serial: Add support for Hibernation feature Added changes to support the hibernation feature for serial UART. Added support for freeze, restore and thaw callbacks to put the device into hibernation. Signed-off-by: Aniket Randive Link: https://lore.kernel.org/r/1665123780-20557-1-git-send-email-quic_arandive@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 83b66b73303a..b487823f0e61 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -924,6 +924,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) false, true, true); geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2); geni_se_select_mode(&port->se, GENI_SE_FIFO); + qcom_geni_serial_start_rx(uport); port->setup = true; return 0; @@ -1547,9 +1548,43 @@ static int __maybe_unused qcom_geni_serial_sys_resume(struct device *dev) return ret; } +static int qcom_geni_serial_sys_hib_resume(struct device *dev) +{ + int ret = 0; + struct uart_port *uport; + struct qcom_geni_private_data *private_data; + struct qcom_geni_serial_port *port = dev_get_drvdata(dev); + + uport = &port->uport; + private_data = uport->private_data; + + if (uart_console(uport)) { + geni_icc_set_tag(&port->se, 0x7); + geni_icc_set_bw(&port->se); + ret = uart_resume_port(private_data->drv, uport); + /* + * For hibernation usecase clients for + * console UART won't call port setup during restore, + * hence call port setup for console uart. + */ + qcom_geni_serial_port_setup(uport); + } else { + /* + * Peripheral register settings are lost during hibernation. + * Update setup flag such that port setup happens again + * during next session. Clients of HS-UART will close and + * open the port during hibernation. + */ + port->setup = false; + } + return ret; +} + static const struct dev_pm_ops qcom_geni_serial_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(qcom_geni_serial_sys_suspend, qcom_geni_serial_sys_resume) + .restore = qcom_geni_serial_sys_hib_resume, + .thaw = qcom_geni_serial_sys_hib_resume, }; static const struct of_device_id qcom_geni_serial_match_table[] = { From 801954d1210a89b767176e1e34cf5976f41ca6d3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 16 Oct 2022 10:02:00 +0200 Subject: [PATCH 0795/4122] serial: 8250: 8250_omap: Support native RS485 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent TI Sitara SoCs such as AM64/AM65 have gained the ability to automatically assert RTS when data is transmitted, obviating the need to emulate this functionality in software. The feature is controlled through new DIR_EN and DIR_POL bits in the Mode Definition Register 3. For details see page 8783 and 8890 of the AM65 TRM: https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf Cc: Ilpo Järvinen Cc: Su Bao Cheng Cc: Vignesh Raghavendra Cc: Jan Kiszka Cc: Bin Liu Tested-by: Zeng Chao Reviewed-by: Ilpo Järvinen Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/e9f25f5c9200a35d3162973c2b45d6b892cc9bf2.1665906869.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 85 ++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 41b8c6b27136..1c8a48fdc8f2 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -44,6 +44,7 @@ #define UART_HAS_EFR2 BIT(4) #define UART_HAS_RHR_IT_DIS BIT(5) #define UART_RX_TIMEOUT_QUIRK BIT(6) +#define UART_HAS_NATIVE_RS485 BIT(7) #define OMAP_UART_FCR_RX_TRIG 6 #define OMAP_UART_FCR_TX_TRIG 4 @@ -101,6 +102,11 @@ #define UART_OMAP_IER2 0x1B #define UART_OMAP_IER2_RHR_IT_DIS BIT(2) +/* Mode Definition Register 3 */ +#define UART_OMAP_MDR3 0x20 +#define UART_OMAP_MDR3_DIR_POL BIT(3) +#define UART_OMAP_MDR3_DIR_EN BIT(4) + /* Enhanced features register 2 */ #define UART_OMAP_EFR2 0x23 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE BIT(6) @@ -112,6 +118,7 @@ struct omap8250_priv { int line; u8 habit; u8 mdr1; + u8 mdr3; u8 efr; u8 scr; u8 wer; @@ -343,7 +350,10 @@ static void omap8250_restore_regs(struct uart_8250_port *up) up->port.ops->set_mctrl(&up->port, up->port.mctrl); - if (up->port.rs485.flags & SER_RS485_ENABLED) + serial_out(up, UART_OMAP_MDR3, priv->mdr3); + + if (up->port.rs485.flags & SER_RS485_ENABLED && + up->port.rs485_config == serial8250_em485_config) serial8250_em485_stop_tx(up); } @@ -792,6 +802,74 @@ static void omap_8250_unthrottle(struct uart_port *port) pm_runtime_put_autosuspend(port->dev); } +static int omap8250_rs485_config(struct uart_port *port, + struct ktermios *termios, + struct serial_rs485 *rs485) +{ + struct omap8250_priv *priv = port->private_data; + struct uart_8250_port *up = up_to_u8250p(port); + u32 fixed_delay_rts_before_send = 0; + u32 fixed_delay_rts_after_send = 0; + unsigned int baud; + + /* + * There is a fixed delay of 3 bit clock cycles after the TX shift + * register is going empty to allow time for the stop bit to transition + * through the transceiver before direction is changed to receive. + * + * Additionally there appears to be a 1 bit clock delay between writing + * to the THR register and transmission of the start bit, per page 8783 + * of the AM65 TRM: https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf + */ + if (priv->quot) { + if (priv->mdr1 & UART_OMAP_MDR1_16X_MODE) + baud = port->uartclk / (16 * priv->quot); + else + baud = port->uartclk / (13 * priv->quot); + + fixed_delay_rts_after_send = 3 * MSEC_PER_SEC / baud; + fixed_delay_rts_before_send = 1 * MSEC_PER_SEC / baud; + } + + /* + * Fall back to RS485 software emulation if the UART is missing + * hardware support, if the device tree specifies an mctrl_gpio + * (indicates that RTS is unavailable due to a pinmux conflict) + * or if the requested delays exceed the fixed hardware delays. + */ + if (!(priv->habit & UART_HAS_NATIVE_RS485) || + mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS) || + rs485->delay_rts_after_send > fixed_delay_rts_after_send || + rs485->delay_rts_before_send > fixed_delay_rts_before_send) { + priv->mdr3 &= ~UART_OMAP_MDR3_DIR_EN; + serial_out(up, UART_OMAP_MDR3, priv->mdr3); + + port->rs485_config = serial8250_em485_config; + return serial8250_em485_config(port, termios, rs485); + } + + rs485->delay_rts_after_send = fixed_delay_rts_after_send; + rs485->delay_rts_before_send = fixed_delay_rts_before_send; + + if (rs485->flags & SER_RS485_ENABLED) + priv->mdr3 |= UART_OMAP_MDR3_DIR_EN; + else + priv->mdr3 &= ~UART_OMAP_MDR3_DIR_EN; + + /* + * Retain same polarity semantics as RS485 software emulation, + * i.e. SER_RS485_RTS_ON_SEND means driving RTS low on send. + */ + if (rs485->flags & SER_RS485_RTS_ON_SEND) + priv->mdr3 &= ~UART_OMAP_MDR3_DIR_POL; + else + priv->mdr3 |= UART_OMAP_MDR3_DIR_POL; + + serial_out(up, UART_OMAP_MDR3, priv->mdr3); + + return 0; +} + #ifdef CONFIG_SERIAL_8250_DMA static int omap_8250_rx_dma(struct uart_8250_port *p); @@ -1241,7 +1319,7 @@ static struct omap8250_dma_params am33xx_dma = { static struct omap8250_platdata am654_platdata = { .dma_params = &am654_dma, .habit = UART_HAS_EFR2 | UART_HAS_RHR_IT_DIS | - UART_RX_TIMEOUT_QUIRK, + UART_RX_TIMEOUT_QUIRK | UART_HAS_NATIVE_RS485, }; static struct omap8250_platdata am33xx_platdata = { @@ -1334,7 +1412,8 @@ static int omap8250_probe(struct platform_device *pdev) up.port.shutdown = omap_8250_shutdown; up.port.throttle = omap_8250_throttle; up.port.unthrottle = omap_8250_unthrottle; - up.port.rs485_config = serial8250_em485_config; + up.port.rs485_config = omap8250_rs485_config; + /* same rs485_supported for software emulation and native RS485 */ up.port.rs485_supported = serial8250_em485_supported; up.rs485_start_tx = serial8250_em485_start_tx; up.rs485_stop_tx = serial8250_em485_stop_tx; From f4000a06f40f6008c9cd8092c30ed3ffb62a1587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:08 +0300 Subject: [PATCH 0796/4122] serial: dz: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/dz.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c index 829b452daee9..6b7ed7f2f3ca 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -279,9 +279,8 @@ static inline void dz_transmit_chars(struct dz_mux *mux) * so we go one char at a time) :-< */ tmp = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (DZ_XMIT_SIZE - 1); dz_out(dport, DZ_TDR, tmp); - dport->port.icount.tx++; + uart_xmit_advance(&dport->port, 1); if (uart_circ_chars_pending(xmit) < DZ_WAKEUP_CHARS) uart_write_wakeup(&dport->port); From 20b01af85291b8e25133ab22399f2de6bbad861e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:09 +0300 Subject: [PATCH 0797/4122] serial: men_z135_uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/men_z135_uart.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c index 3690f5cf0f43..d2502aaa3e8c 100644 --- a/drivers/tty/serial/men_z135_uart.c +++ b/drivers/tty/serial/men_z135_uart.c @@ -352,11 +352,8 @@ static void men_z135_handle_tx(struct men_z135_port *uart) n = min(n, s); memcpy_toio(port->membase + MEN_Z135_TX_RAM, &xmit->buf[xmit->tail], n); - xmit->tail = (xmit->tail + n) & (UART_XMIT_SIZE - 1); - iowrite32(n & 0x3ff, port->membase + MEN_Z135_TX_CTRL); - - port->icount.tx += n; + uart_xmit_advance(port, n); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); From 1fcff75f3932bdc5ef923bbe8e973d1f3ed59096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:10 +0300 Subject: [PATCH 0798/4122] serial: msm: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 7dd19a281579..2b2e0f74b75a 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -464,12 +464,9 @@ static void msm_complete_tx_dma(void *args) } count = dma->count - state.residue; - port->icount.tx += count; + uart_xmit_advance(port, count); dma->count = 0; - xmit->tail += count; - xmit->tail &= UART_XMIT_SIZE - 1; - /* Restore "Tx FIFO below watermark" interrupt */ msm_port->imr |= MSM_UART_IMR_TXLEV; msm_write(port, msm_port->imr, MSM_UART_IMR); @@ -866,13 +863,11 @@ static void msm_handle_tx_pio(struct uart_port *port, unsigned int tx_count) else num_chars = 1; - for (i = 0; i < num_chars; i++) { + for (i = 0; i < num_chars; i++) buf[i] = xmit->buf[xmit->tail + i]; - port->icount.tx++; - } iowrite32_rep(tf, buf, 1); - xmit->tail = (xmit->tail + num_chars) & (UART_XMIT_SIZE - 1); + uart_xmit_advance(port, num_chars); tf_pointer += num_chars; } From a5c9611ddc51b846dd699c3ce2a6bcbfa544ef20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:11 +0300 Subject: [PATCH 0799/4122] serial: pch_uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index c59ce7886579..c76719c0f453 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -738,15 +738,12 @@ static void pch_dma_tx_complete(void *arg) { struct eg20t_port *priv = arg; struct uart_port *port = &priv->port; - struct circ_buf *xmit = &port->state->xmit; struct scatterlist *sg = priv->sg_tx_p; int i; - for (i = 0; i < priv->nent; i++, sg++) { - xmit->tail += sg_dma_len(sg); - port->icount.tx += sg_dma_len(sg); - } - xmit->tail &= UART_XMIT_SIZE - 1; + for (i = 0; i < priv->nent; i++, sg++) + uart_xmit_advance(port, sg_dma_len(sg)); + async_tx_ack(priv->desc_tx); dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE); priv->tx_dma_use = 0; @@ -843,8 +840,7 @@ static unsigned int handle_tx(struct eg20t_port *priv) while (!uart_tx_stopped(port) && !uart_circ_empty(xmit) && fifo_size) { iowrite8(xmit->buf[xmit->tail], priv->membase + PCH_UART_THR); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); fifo_size--; tx_empty = 0; } From a2a74303b3085109b27160a49e61d4d067acfae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:12 +0300 Subject: [PATCH 0800/4122] serial: sc16is7xx: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 524921360ca7..39f92eb1e698 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -686,13 +686,10 @@ static void sc16is7xx_handle_tx(struct uart_port *port) } to_send = (to_send > txlen) ? txlen : to_send; - /* Add data to send */ - port->icount.tx += to_send; - /* Convert to linear buffer */ for (i = 0; i < to_send; ++i) { s->buf[i] = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + uart_xmit_advance(port, 1); } sc16is7xx_fifo_write(port, to_send); From fc59f80b087447a198274e8b3adf2b8ddcecb561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:13 +0300 Subject: [PATCH 0801/4122] serial: 8250_bcm7271: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_bcm7271.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index fa8ccf204d86..062177b64d21 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -425,9 +425,7 @@ static int brcmuart_tx_dma(struct uart_8250_port *p) priv->dma.tx_err = 0; memcpy(priv->tx_buf, &xmit->buf[xmit->tail], tx_size); - xmit->tail += tx_size; - xmit->tail &= UART_XMIT_SIZE - 1; - p->port.icount.tx += tx_size; + uart_xmit_advance(&p->port, tx_size); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&p->port); From 051ef7c8d81ff25b843afb661776e0a77a4f9374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:14 +0300 Subject: [PATCH 0802/4122] serial: 8250: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fe8662cd9402..b94e60e75326 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1842,8 +1842,7 @@ void serial8250_tx_chars(struct uart_8250_port *up) */ serial_in(up, UART_SCR); } - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_empty(xmit)) break; if ((up->capabilities & UART_CAP_HFIFO) && From 71a67573d0ede98f9c5f6466da6f6a7d7663498b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:15 +0300 Subject: [PATCH 0803/4122] serial: pl011: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 5cdced39eafd..6d8552506091 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -677,8 +677,7 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap) * Now we know that DMA will fire, so advance the ring buffer * with the stuff we just dispatched. */ - xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); - uap->port.icount.tx += count; + uart_xmit_advance(&uap->port, count); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&uap->port); From d29d947c14d1704d567db3025a293c4ee54cd90c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:16 +0300 Subject: [PATCH 0804/4122] serial: ar933x: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-10-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ar933x_uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 925484a42c82..4c3d04c6826a 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -425,8 +425,7 @@ static void ar933x_uart_tx_chars(struct ar933x_uart_port *up) ar933x_uart_putc(up, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); } while (--count > 0); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From 8a8dee2cdbb3d147430684d408127e5c9e910fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:17 +0300 Subject: [PATCH 0805/4122] serial: arc: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-11-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/arc_uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c index 2a65ea2660e1..748e8b1cf4f7 100644 --- a/drivers/tty/serial/arc_uart.c +++ b/drivers/tty/serial/arc_uart.c @@ -166,8 +166,7 @@ static void arc_serial_tx_chars(struct uart_port *port) sent = 1; } else if (!uart_circ_empty(xmit)) { ch = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); while (!(UART_GET_STATUS(port) & TXEMPTY)) cpu_relax(); UART_SET_DATA(port, ch); From add147a4591e20732c3f51ed63414d9e89757e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:18 +0300 Subject: [PATCH 0806/4122] serial: atmel: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Claudiu Beznea Acked-By: Richard GENOUD Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-12-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index a6b4d30c5888..4ca04676c406 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -859,10 +859,7 @@ static void atmel_complete_tx_dma(void *arg) if (chan) dmaengine_terminate_all(chan); - xmit->tail += atmel_port->tx_len; - xmit->tail &= UART_XMIT_SIZE - 1; - - port->icount.tx += atmel_port->tx_len; + uart_xmit_advance(port, atmel_port->tx_len); spin_lock_irq(&atmel_port->lock_tx); async_tx_ack(atmel_port->desc_tx); @@ -1455,11 +1452,7 @@ static void atmel_tx_pdc(struct uart_port *port) /* nothing left to transmit? */ if (atmel_uart_readl(port, ATMEL_PDC_TCR)) return; - - xmit->tail += pdc->ofs; - xmit->tail &= UART_XMIT_SIZE - 1; - - port->icount.tx += pdc->ofs; + uart_xmit_advance(port, pdc->ofs); pdc->ofs = 0; /* more to transmit - setup next transfer */ From 4146765cae90bac4643e2225a49670da3749311f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:19 +0300 Subject: [PATCH 0807/4122] serial: clps711x: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-13-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/clps711x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/clps711x.c b/drivers/tty/serial/clps711x.c index 404b43a5ae33..e190dce58f46 100644 --- a/drivers/tty/serial/clps711x.c +++ b/drivers/tty/serial/clps711x.c @@ -166,8 +166,7 @@ static irqreturn_t uart_clps711x_int_tx(int irq, void *dev_id) u32 sysflg = 0; writew(xmit->buf[xmit->tail], port->membase + UARTDR_OFFSET); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); regmap_read(s->syscon, SYSFLG_OFFSET, &sysflg); if (sysflg & SYSFLG_UTXFF) From f8097f0caaf2188e8e4e552116cbd8d5f7746f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:20 +0300 Subject: [PATCH 0808/4122] serial: cpm_uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-14-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index b4369ed45ae2..5565f302cb21 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -684,8 +684,7 @@ static int cpm_uart_tx_pump(struct uart_port *port) p = cpm2cpu_addr(in_be32(&bdp->cbd_bufaddr), pinfo); while (count < pinfo->tx_fifosize) { *p++ = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); count++; if (xmit->head == xmit->tail) break; From cb867f542e2a895e9f2bdcd846f1241b51e3cc2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:21 +0300 Subject: [PATCH 0809/4122] serial: digicolor: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Acked-by: Baruch Siach Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-15-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/digicolor-usart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c index 0c0a62346f23..ed197705f7ee 100644 --- a/drivers/tty/serial/digicolor-usart.c +++ b/drivers/tty/serial/digicolor-usart.c @@ -202,8 +202,7 @@ static void digicolor_uart_tx(struct uart_port *port) while (!uart_circ_empty(xmit)) { writeb(xmit->buf[xmit->tail], port->membase + UA_EMI_REC); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (digicolor_uart_tx_full(port)) break; From 7840a92a3e7d7f78bdc16b75c62101bc240bfa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:22 +0300 Subject: [PATCH 0810/4122] serial: linflexuart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-16-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_linflexuart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c index 84e8153e5420..6fc21b6684e6 100644 --- a/drivers/tty/serial/fsl_linflexuart.c +++ b/drivers/tty/serial/fsl_linflexuart.c @@ -178,8 +178,7 @@ static inline void linflex_transmit_buffer(struct uart_port *sport) while (!uart_circ_empty(xmit)) { linflex_put_char(sport, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->icount.tx++; + uart_xmit_advance(sport, 1); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From cacf7f689b9b783d1efaf4b545d8038ee6edc73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:23 +0300 Subject: [PATCH 0811/4122] serial: fsl_lpuart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-17-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index d811eda1844e..849ef313b824 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -535,9 +535,7 @@ static void lpuart_dma_tx_complete(void *arg) dma_unmap_sg(chan->device->dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE); - xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1); - - sport->port.icount.tx += sport->dma_tx_bytes; + uart_xmit_advance(&sport->port, sport->dma_tx_bytes); sport->dma_tx_in_progress = false; spin_unlock_irqrestore(&sport->port.lock, flags); @@ -772,8 +770,7 @@ static inline void lpuart32_transmit_buffer(struct lpuart_port *sport) txcnt &= UARTWATER_COUNT_MASK; while (!uart_circ_empty(xmit) && (txcnt < sport->txfifo_size)) { lpuart32_write(&sport->port, xmit->buf[xmit->tail], UARTDATA); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx++; + uart_xmit_advance(&sport->port, 1); txcnt = lpuart32_read(&sport->port, UARTWATER); txcnt = txcnt >> UARTWATER_TXCNT_OFF; txcnt &= UARTWATER_COUNT_MASK; From 26e8f1d9a88144b35b35ce3b9df2d437d8cb2ed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:24 +0300 Subject: [PATCH 0812/4122] serial: imx: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Uwe Kleine-König Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-18-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 05b432dc7a85..a7548d0a1aee 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -563,8 +563,7 @@ static inline void imx_uart_transmit_buffer(struct imx_port *sport) /* send xmit->buf[xmit->tail] * out the port here */ imx_uart_writel(sport, xmit->buf[xmit->tail], URTX0); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx++; + uart_xmit_advance(&sport->port, 1); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) @@ -590,9 +589,7 @@ static void imx_uart_dma_tx_callback(void *data) ucr1 &= ~UCR1_TXDMAEN; imx_uart_writel(sport, ucr1, UCR1); - /* update the stat */ - xmit->tail = (xmit->tail + sport->tx_bytes) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx += sport->tx_bytes; + uart_xmit_advance(&sport->port, sport->tx_bytes); dev_dbg(sport->port.dev, "we finish the TX DMA.\n"); From daf63432f4626e4cf97dac0cf12c7969617ff1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:25 +0300 Subject: [PATCH 0813/4122] serial: ip22zilog: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-19-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ip22zilog.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/ip22zilog.c b/drivers/tty/serial/ip22zilog.c index dd0a8915ce4f..b1f27e168135 100644 --- a/drivers/tty/serial/ip22zilog.c +++ b/drivers/tty/serial/ip22zilog.c @@ -409,8 +409,7 @@ static void ip22zilog_transmit_chars(struct uart_ip22zilog_port *up, ZSDELAY(); ZS_WSYNC(channel); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&up->port); @@ -609,8 +608,7 @@ static void ip22zilog_start_tx(struct uart_port *port) ZSDELAY(); ZS_WSYNC(channel); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&up->port); From 53c3d62f46872fd7bb177a9181291e58edcc3cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:26 +0300 Subject: [PATCH 0814/4122] serial: liteuart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Acked-by: Gabriel Somlo Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-20-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/liteuart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c index 4c0604325ee9..062812fe1b09 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -136,8 +136,7 @@ static void liteuart_start_tx(struct uart_port *port) } else if (!uart_circ_empty(xmit)) { while (xmit->head != xmit->tail) { ch = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); liteuart_putchar(port, ch); } } From 502b13cc5aa2c9b2b2a802a7d473734439dc09b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:27 +0300 Subject: [PATCH 0815/4122] serial: max3100: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-21-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max3100.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index c69602f356fd..bb74f23251fe 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -292,9 +292,7 @@ static void max3100_work(struct work_struct *w) } else if (!uart_circ_empty(xmit) && !uart_tx_stopped(&s->port)) { tx = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & - (UART_XMIT_SIZE - 1); - s->port.icount.tx++; + uart_xmit_advance(&s->port, 1); } if (tx != 0xffff) { max3100_calc_parity(s, &tx); From d41727dbdfcbb25bf6e30ddcbe330bb9e7b9fcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:28 +0300 Subject: [PATCH 0816/4122] serial: max310x: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-22-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index fbf6e2b3161c..4eb24e3407f8 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -787,10 +787,7 @@ static void max310x_handle_tx(struct uart_port *port) } else { max310x_batch_write(port, xmit->buf + xmit->tail, to_send); } - - /* Add data to send */ - port->icount.tx += to_send; - xmit->tail = (xmit->tail + to_send) & (UART_XMIT_SIZE - 1); + uart_xmit_advance(port, to_send); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From 681ef4219bee73139c713226d90bd068d0bd9b89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:29 +0300 Subject: [PATCH 0817/4122] serial: meson: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Acked-by: Martin Blumenstingl Acked-by: Neil Armstrong Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-23-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/meson_uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c index 056243c12836..74110017988a 100644 --- a/drivers/tty/serial/meson_uart.c +++ b/drivers/tty/serial/meson_uart.c @@ -162,8 +162,7 @@ static void meson_uart_start_tx(struct uart_port *port) ch = xmit->buf[xmit->tail]; writel(ch, port->membase + AML_UART_WFIFO); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } if (!uart_circ_empty(xmit)) { From 5c664457a9373394eab65f1459ad55c36e147c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:30 +0300 Subject: [PATCH 0818/4122] serial: milbeaut_usio: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-24-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/milbeaut_usio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c index c15e0d84dc7e..44988a2941b8 100644 --- a/drivers/tty/serial/milbeaut_usio.c +++ b/drivers/tty/serial/milbeaut_usio.c @@ -98,8 +98,7 @@ static void mlb_usio_tx_chars(struct uart_port *port) do { writew(xmit->buf[xmit->tail], port->membase + MLB_USIO_REG_DR); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_empty(xmit)) break; From 269599fa886f63dc425857f4672e1d0c8df2a5f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:31 +0300 Subject: [PATCH 0819/4122] serial: mvebu-uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Pali Rohár Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-25-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 7b566404cb33..31f739c7a08b 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -223,8 +223,7 @@ static void mvebu_uart_start_tx(struct uart_port *port) if (IS_EXTENDED(port) && !uart_circ_empty(xmit)) { writel(xmit->buf[xmit->tail], port->membase + UART_TSH(port)); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } ctl = readl(port->membase + UART_INTR(port)); From 98fdebeebbad98393d691aaa092e3c7889e5cbbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:32 +0300 Subject: [PATCH 0820/4122] serial: pic32: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-26-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pic32_uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c index 2beada66c824..52d026865a32 100644 --- a/drivers/tty/serial/pic32_uart.c +++ b/drivers/tty/serial/pic32_uart.c @@ -376,8 +376,7 @@ static void pic32_uart_do_tx(struct uart_port *port) pic32_uart_writel(sport, PIC32_UART_TX, c); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_empty(xmit)) break; if (--max_count == 0) From b31b07a7d21a10f5068b92563c8037a8c2b15f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:33 +0300 Subject: [PATCH 0821/4122] serial: pmac_zilog: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-27-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pmac_zilog.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index fe2e4ec423f7..13668ffdb1e7 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -410,8 +410,7 @@ static void pmz_transmit_chars(struct uart_pmac_port *uap) write_zsdata(uap, xmit->buf[xmit->tail]); zssync(uap); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - uap->port.icount.tx++; + uart_xmit_advance(&uap->port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&uap->port); @@ -627,8 +626,7 @@ static void pmz_start_tx(struct uart_port *port) return; write_zsdata(uap, xmit->buf[xmit->tail]); zssync(uap); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&uap->port); From 3d4d838423a56a3c18174d194508e728ce9fe6af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:34 +0300 Subject: [PATCH 0822/4122] serial: rda: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-28-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/rda-uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c index 0e387e2144fa..be5c842b5ba9 100644 --- a/drivers/tty/serial/rda-uart.c +++ b/drivers/tty/serial/rda-uart.c @@ -353,8 +353,7 @@ static void rda_uart_send_chars(struct uart_port *port) ch = xmit->buf[xmit->tail]; rda_uart_write(port, ch, RDA_UART_RXTX_BUFFER); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From ec04d75fe4386c7e5eafb72367e962f56694d490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:35 +0300 Subject: [PATCH 0823/4122] serial: samsung_tty: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Sam Protsenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-29-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 77d1363029f5..7e34361a1085 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -288,7 +288,6 @@ static void s3c24xx_serial_stop_tx(struct uart_port *port) { struct s3c24xx_uart_port *ourport = to_ourport(port); struct s3c24xx_uart_dma *dma = ourport->dma; - struct circ_buf *xmit = &port->state->xmit; struct dma_tx_state state; int count; @@ -316,8 +315,7 @@ static void s3c24xx_serial_stop_tx(struct uart_port *port) DMA_TO_DEVICE); async_tx_ack(dma->tx_desc); count = dma->tx_bytes_requested - state.residue; - xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); - port->icount.tx += count; + uart_xmit_advance(port, count); } ourport->tx_enabled = 0; @@ -351,8 +349,7 @@ static void s3c24xx_serial_tx_dma_complete(void *args) spin_lock_irqsave(&port->lock, flags); - xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); - port->icount.tx += count; + uart_xmit_advance(port, count); ourport->tx_in_progress = 0; if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) @@ -916,8 +913,7 @@ static void s3c24xx_serial_tx_chars(struct s3c24xx_uart_port *ourport) break; wr_reg(port, S3C2410_UTXH, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); count--; } From 10b459d2c3da954286e1fcc3ce33ec42d0ea5126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:36 +0300 Subject: [PATCH 0824/4122] serial: sb1250-duart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-30-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sb1250-duart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index c5d2b6cdcb4a..de56f383964e 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -399,8 +399,7 @@ static void sbd_transmit_chars(struct sbd_port *sport) /* Send char. */ if (!stop_tx) { write_sbdchn(sport, R_DUART_TX_HOLD, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - sport->port.icount.tx++; + uart_xmit_advance(&sport->port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&sport->port); From 3ea03c021dcceac88bbb38f7b67bc08d23c80c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:37 +0300 Subject: [PATCH 0825/4122] serial: sccnxp: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-31-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sccnxp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index dd98509f52e5..7df687822634 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -468,8 +468,7 @@ static void sccnxp_handle_tx(struct uart_port *port) break; sccnxp_port_write(port, SCCNXP_THR_REG, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From b7e2647671a2e7eb8d5dd1cb73464d7d760f6139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:38 +0300 Subject: [PATCH 0826/4122] serial: tegra: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Thierry Reding Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-32-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index b7170cb9a544..4304e3f9307d 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -496,8 +496,7 @@ static void tegra_uart_fill_tx_fifo(struct tegra_uart_port *tup, int max_bytes) break; } tegra_uart_write(tup, xmit->buf[xmit->tail], UART_TX); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - tup->uport.icount.tx++; + uart_xmit_advance(&tup->uport, 1); } } From e234ef0ef1dec33384968c695f0c2c751b0621ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:39 +0300 Subject: [PATCH 0827/4122] serial: sh-sci: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-33-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 62f773286d44..a92a89780357 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1181,10 +1181,7 @@ static void sci_dma_tx_complete(void *arg) spin_lock_irqsave(&port->lock, flags); - xmit->tail += s->tx_dma_len; - xmit->tail &= UART_XMIT_SIZE - 1; - - port->icount.tx += s->tx_dma_len; + uart_xmit_advance(port, s->tx_dma_len); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); From b92df54ccf7355256e4eebe2f36dc2b83808e9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:40 +0300 Subject: [PATCH 0828/4122] serial: sprd: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Reviewed-by: Baolin Wang Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-34-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sprd_serial.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 3f34f7bb7700..492a3bdab5ba 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -206,7 +206,6 @@ static void sprd_stop_tx_dma(struct uart_port *port) { struct sprd_uart_port *sp = container_of(port, struct sprd_uart_port, port); - struct circ_buf *xmit = &port->state->xmit; struct dma_tx_state state; u32 trans_len; @@ -215,8 +214,7 @@ static void sprd_stop_tx_dma(struct uart_port *port) dmaengine_tx_status(sp->tx_dma.chn, sp->tx_dma.cookie, &state); if (state.residue) { trans_len = state.residue - sp->tx_dma.phys_addr; - xmit->tail = (xmit->tail + trans_len) & (UART_XMIT_SIZE - 1); - port->icount.tx += trans_len; + uart_xmit_advance(port, trans_len); dma_unmap_single(port->dev, sp->tx_dma.phys_addr, sp->tx_dma.trans_len, DMA_TO_DEVICE); } @@ -253,8 +251,7 @@ static void sprd_complete_tx_dma(void *data) dma_unmap_single(port->dev, sp->tx_dma.phys_addr, sp->tx_dma.trans_len, DMA_TO_DEVICE); - xmit->tail = (xmit->tail + sp->tx_dma.trans_len) & (UART_XMIT_SIZE - 1); - port->icount.tx += sp->tx_dma.trans_len; + uart_xmit_advance(port, sp->tx_dma.trans_len); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); From 29d8c07b49578cc58f48fdea361c1f419515076c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:41 +0300 Subject: [PATCH 0829/4122] serial: stm32: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-35-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index dfdbcf092fac..24def72b2565 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -596,8 +596,7 @@ static void stm32_usart_transmit_chars_pio(struct uart_port *port) if (!(readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE)) break; writel_relaxed(xmit->buf[xmit->tail], port->membase + ofs->tdr); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } /* rely on TXE irq (mask or unmask) for sending remaining data */ @@ -673,8 +672,8 @@ static void stm32_usart_transmit_chars_dma(struct uart_port *port) stm32_usart_set_bits(port, ofs->cr3, USART_CR3_DMAT); - xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); - port->icount.tx += count; + uart_xmit_advance(port, count); + return; fallback_err: From c5fd4b7d7e58e20b05b5d24eddab70567ec7a724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:42 +0300 Subject: [PATCH 0830/4122] serial: sunhv: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-36-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunhv.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 1938ba5e98c0..16c746a63258 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -47,8 +47,7 @@ static void transmit_chars_putchar(struct uart_port *port, struct circ_buf *xmit if (status != HV_EOK) break; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } } @@ -63,8 +62,7 @@ static void transmit_chars_write(struct uart_port *port, struct circ_buf *xmit) status = sun4v_con_write(ra, len, &sent); if (status != HV_EOK) break; - xmit->tail = (xmit->tail + sent) & (UART_XMIT_SIZE - 1); - port->icount.tx += sent; + uart_xmit_advance(port, sent); } } From 54ffabbe2203817d1d70ceb97407a4382633d2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:43 +0300 Subject: [PATCH 0831/4122] serial: sunplus-uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-37-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunplus-uart.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/sunplus-uart.c b/drivers/tty/serial/sunplus-uart.c index 7afe61a0e72e..727942c43c45 100644 --- a/drivers/tty/serial/sunplus-uart.c +++ b/drivers/tty/serial/sunplus-uart.c @@ -216,9 +216,7 @@ static void transmit_chars(struct uart_port *port) do { sp_uart_put_char(port, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE; - port->icount.tx++; - + uart_xmit_advance(port, 1); if (uart_circ_empty(xmit)) break; } while (sunplus_tx_buf_not_full(port)); From 5aaae464d6de0913a962d4f9412986a528b0acf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:44 +0300 Subject: [PATCH 0832/4122] serial: sunsab: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-38-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunsab.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c index 99608b2a2b74..94db67f21abf 100644 --- a/drivers/tty/serial/sunsab.c +++ b/drivers/tty/serial/sunsab.c @@ -266,8 +266,7 @@ static void transmit_chars(struct uart_sunsab_port *up, for (i = 0; i < up->port.fifosize; i++) { writeb(xmit->buf[xmit->tail], &up->regs->w.xfifo[i]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); if (uart_circ_empty(xmit)) break; } @@ -453,8 +452,7 @@ static void sunsab_start_tx(struct uart_port *port) for (i = 0; i < up->port.fifosize; i++) { writeb(xmit->buf[xmit->tail], &up->regs->w.xfifo[i]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); if (uart_circ_empty(xmit)) break; } From 7f20ab70940e121c9af3f4ca013f38c762ef19e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:45 +0300 Subject: [PATCH 0833/4122] serial: sunsu: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-39-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunsu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index 9ea7e567540d..fed052a0b931 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -417,8 +417,7 @@ static void transmit_chars(struct uart_sunsu_port *up) count = up->port.fifosize; do { serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); if (uart_circ_empty(xmit)) break; } while (--count > 0); From 81eb6227afea551fbfcb7958bdfcd70b6e5419f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:46 +0300 Subject: [PATCH 0834/4122] serial: sunzilog: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-40-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunzilog.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c index 87425290687d..ccb809216e94 100644 --- a/drivers/tty/serial/sunzilog.c +++ b/drivers/tty/serial/sunzilog.c @@ -508,8 +508,7 @@ static void sunzilog_transmit_chars(struct uart_sunzilog_port *up, ZSDELAY(); ZS_WSYNC(channel); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; + uart_xmit_advance(&up->port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&up->port); @@ -709,8 +708,7 @@ static void sunzilog_start_tx(struct uart_port *port) ZSDELAY(); ZS_WSYNC(channel); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&up->port); From b421cbb2f33c7692aa209ce2781647919a1265b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:47 +0300 Subject: [PATCH 0835/4122] serial: timbuart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-41-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/timbuart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c index bb19ed012def..0859394a78cd 100644 --- a/drivers/tty/serial/timbuart.c +++ b/drivers/tty/serial/timbuart.c @@ -101,8 +101,7 @@ static void timbuart_tx_chars(struct uart_port *port) !uart_circ_empty(xmit)) { iowrite8(xmit->buf[xmit->tail], port->membase + TIMBUART_TXFIFO); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); } dev_dbg(port->dev, From 852322ff4f2be566b0165a1f9c2adfcc0a86f8b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:48 +0300 Subject: [PATCH 0836/4122] serial: uartlite: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-42-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/uartlite.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index eca41ac5477c..94584e54ebbe 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -203,8 +203,7 @@ static int ulite_transmit(struct uart_port *port, int stat) return 0; uart_out32(xmit->buf[xmit->tail], ULITE_TX, port); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1); - port->icount.tx++; + uart_xmit_advance(port, 1); /* wake up */ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From 41e804c4dec667e64c9204e13862cf634df794a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:49 +0300 Subject: [PATCH 0837/4122] serial: ucc_uart: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-43-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ucc_uart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c index 82cf14dd3d43..b09b6496ee3e 100644 --- a/drivers/tty/serial/ucc_uart.c +++ b/drivers/tty/serial/ucc_uart.c @@ -372,8 +372,7 @@ static int qe_uart_tx_pump(struct uart_qe_port *qe_port) p = qe2cpu_addr(be32_to_cpu(bdp->buf), qe_port); while (count < qe_port->tx_fifosize) { *p++ = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; + uart_xmit_advance(port, 1); count++; if (xmit->head == xmit->tail) break; From edc62b17ed9f6db3de3521793e4689e15aad4625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:50 +0300 Subject: [PATCH 0838/4122] serial: xuartps: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-44-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 2eff7cff57c4..01d8027e64fd 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -326,9 +326,7 @@ static void cdns_uart_handle_tx(void *dev_id) !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) { writel(xmit->buf[xmit->tail], port->membase + CDNS_UART_FIFO); - - port->icount.tx++; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + uart_xmit_advance(port, 1); numbytes--; } From c2087b37d10404220d06f4503ef165ab7835b246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:11:51 +0300 Subject: [PATCH 0839/4122] serial: zs: Use uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take advantage of the new uart_xmit_advance() helper. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019091151.6692-45-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/zs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index 688db7d8b748..730c648e32ff 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -623,8 +623,7 @@ static void zs_raw_transmit_chars(struct zs_port *zport) /* Send char. */ write_zsdata(zport, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - zport->port.icount.tx++; + uart_xmit_advance(&zport->port, 1); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&zport->port); From 109a951a9f1fd8a34ebd1896cbbd5d5cede880a7 Mon Sep 17 00:00:00 2001 From: Kartik Date: Tue, 18 Oct 2022 20:28:06 +0530 Subject: [PATCH 0840/4122] serial: tegra: Read DMA status before terminating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read the DMA status before terminating the DMA, as doing so deletes the DMA desc. Also, to get the correct transfer status information, pause the DMA using dmaengine_pause() before reading the DMA status. Fixes: e9ea096dd225 ("serial: tegra: add serial driver") Reviewed-by: Jon Hunter Reviewed-by: Ilpo Järvinen Acked-by: Thierry Reding Signed-off-by: Akhil R Signed-off-by: Kartik Link: https://lore.kernel.org/r/1666105086-17326-1-git-send-email-kkartik@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 4304e3f9307d..e5b9773db5e3 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -618,8 +618,9 @@ static void tegra_uart_stop_tx(struct uart_port *u) if (tup->tx_in_progress != TEGRA_UART_TX_DMA) return; - dmaengine_terminate_all(tup->tx_dma_chan); + dmaengine_pause(tup->tx_dma_chan); dmaengine_tx_status(tup->tx_dma_chan, tup->tx_cookie, &state); + dmaengine_terminate_all(tup->tx_dma_chan); count = tup->tx_bytes_requested - state.residue; async_tx_ack(tup->tx_dma_desc); uart_xmit_advance(&tup->uport, count); @@ -762,8 +763,9 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup) return; } - dmaengine_terminate_all(tup->rx_dma_chan); + dmaengine_pause(tup->rx_dma_chan); dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state); + dmaengine_terminate_all(tup->rx_dma_chan); tegra_uart_rx_buffer_push(tup, state.residue); tup->rx_dma_active = false; From 6dd07781b4cdd38103c81ddcc88fa4e8a31ebf71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:39 +0300 Subject: [PATCH 0841/4122] serial: Convert serial_rs485 to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert struct serial_rs485 comments to kernel doc format and include it into documentation. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- .../driver-api/serial/serial-rs485.rst | 13 +++-- include/uapi/linux/serial.h | 55 ++++++++++++------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst index 6ebad75c74ed..264e4b753713 100644 --- a/Documentation/driver-api/serial/serial-rs485.rst +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -29,11 +29,11 @@ RS485 Serial Communications 3. Data Structures Already Available in the Kernel ================================================== - The Linux kernel provides the serial_rs485 structure (see [1]) to handle - RS485 communications. This data structure is used to set and configure RS485 + The Linux kernel provides the serial_rs485 structure to handle RS485 + communications. This data structure is used to set and configure RS485 parameters in the platform data and in ioctls. - The device tree can also provide RS485 boot time parameters (see [2] + The device tree can also provide RS485 boot time parameters (see [1] for bindings). The driver is in charge of filling this data structure from the values given by the device tree. @@ -47,6 +47,9 @@ RS485 Serial Communications for the uart_port. TIOCGRS485 ioctl can be used to read back the serial_rs485 structure matching to the current configuration. +.. kernel-doc:: include/uapi/linux/serial.h + :identifiers: serial_rs485 + 4. Usage from user-level ======================== @@ -126,6 +129,4 @@ RS485 Serial Communications 6. References ============= - [1] include/uapi/linux/serial.h - - [2] Documentation/devicetree/bindings/serial/rs485.txt + [1] Documentation/devicetree/bindings/serial/rs485.txt diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index cea06924b295..53bc1af67a41 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -107,33 +107,50 @@ struct serial_icounter_struct { int reserved[9]; }; -/* +/** + * struct serial_rs485 - serial interface for controlling RS485 settings. + * @flags: RS485 feature flags. + * @delay_rts_before_send: Delay before send (milliseconds). + * @delay_rts_after_send: Delay after send (milliseconds). + * @addr_recv: Receive filter for RS485 addressing mode + * (used only when %SER_RS485_ADDR_RECV is set). + * @addr_dest: Destination address for RS485 addressing mode + * (used only when %SER_RS485_ADDR_DEST is set). + * @padding0: Padding (set to zero). + * @padding1: Padding (set to zero). + * @padding: Deprecated, use @padding0 and @padding1 instead. + * Do not use with @addr_recv and @addr_dest (due to + * overlap). + * * Serial interface for controlling RS485 settings on chips with suitable * support. Set with TIOCSRS485 and get with TIOCGRS485 if supported by your * platform. The set function returns the new state, with any unsupported bits * reverted appropriately. + * + * The flag bits are: + * + * * %SER_RS485_ENABLED - RS485 enabled. + * * %SER_RS485_RTS_ON_SEND - Logical level for RTS pin when sending. + * * %SER_RS485_RTS_AFTER_SEND - Logical level for RTS pin after sent. + * * %SER_RS485_RX_DURING_TX - Full-duplex RS485 line. + * * %SER_RS485_TERMINATE_BUS - Enable bus termination (if supported). + * * %SER_RS485_ADDRB - Enable RS485 addressing mode. + * * %SER_RS485_ADDR_RECV - Receive address filter (enables @addr_recv). Requires %SER_RS485_ADDRB. + * * %SER_RS485_ADDR_DEST - Destination address (enables @addr_dest). Requires %SER_RS485_ADDRB. */ - struct serial_rs485 { - __u32 flags; /* RS485 feature flags */ -#define SER_RS485_ENABLED (1 << 0) /* If enabled */ -#define SER_RS485_RTS_ON_SEND (1 << 1) /* Logical level for - RTS pin when - sending */ -#define SER_RS485_RTS_AFTER_SEND (1 << 2) /* Logical level for - RTS pin after sent*/ + __u32 flags; +#define SER_RS485_ENABLED (1 << 0) +#define SER_RS485_RTS_ON_SEND (1 << 1) +#define SER_RS485_RTS_AFTER_SEND (1 << 2) #define SER_RS485_RX_DURING_TX (1 << 4) -#define SER_RS485_TERMINATE_BUS (1 << 5) /* Enable bus - termination - (if supported) */ +#define SER_RS485_TERMINATE_BUS (1 << 5) +#define SER_RS485_ADDRB (1 << 6) +#define SER_RS485_ADDR_RECV (1 << 7) +#define SER_RS485_ADDR_DEST (1 << 8) -/* RS-485 addressing mode */ -#define SER_RS485_ADDRB (1 << 6) /* Enable addressing mode */ -#define SER_RS485_ADDR_RECV (1 << 7) /* Receive address filter */ -#define SER_RS485_ADDR_DEST (1 << 8) /* Destination address */ - - __u32 delay_rts_before_send; /* Delay before send (milliseconds) */ - __u32 delay_rts_after_send; /* Delay after send (milliseconds) */ + __u32 delay_rts_before_send; + __u32 delay_rts_after_send; /* The fields below are defined by flags */ union { From 851453abc8e9cfe92bbf8fdf07ee024ec372f09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:40 +0300 Subject: [PATCH 0842/4122] Documentation: rs485: Link reference properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link DT bindings reference properly. Reviewed-by: Andy Shevchenko Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/serial/serial-rs485.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst index 264e4b753713..513758a702a6 100644 --- a/Documentation/driver-api/serial/serial-rs485.rst +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -33,9 +33,9 @@ RS485 Serial Communications communications. This data structure is used to set and configure RS485 parameters in the platform data and in ioctls. - The device tree can also provide RS485 boot time parameters (see [1] - for bindings). The driver is in charge of filling this data structure from - the values given by the device tree. + The device tree can also provide RS485 boot time parameters + [#DT-bindings]_. The driver is in charge of filling this data structure + from the values given by the device tree. Any driver for devices capable of working both as RS232 and RS485 should implement the rs485_config callback and provide rs485_supported in the @@ -129,4 +129,4 @@ RS485 Serial Communications 6. References ============= - [1] Documentation/devicetree/bindings/serial/rs485.txt +.. [#DT-bindings] Documentation/devicetree/bindings/serial/rs485.txt From 0f4648a1a6d36487e4134a4fd75d8cdae0cd6bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:41 +0300 Subject: [PATCH 0843/4122] Documentation: rs485: Mention uart_get_rs485_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add to rs485 documentation that serial core prepares the struct serial_rs485 when uart_get_rs485_mode() is called. Remove the wrong claim that the driver must fill it by itself. Reviewed-by: Andy Shevchenko Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/serial/serial-rs485.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst index 513758a702a6..ee34c8e1e56e 100644 --- a/Documentation/driver-api/serial/serial-rs485.rst +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -34,8 +34,9 @@ RS485 Serial Communications parameters in the platform data and in ioctls. The device tree can also provide RS485 boot time parameters - [#DT-bindings]_. The driver is in charge of filling this data structure - from the values given by the device tree. + [#DT-bindings]_. The serial core fills the struct serial_rs485 from the + values given by the device tree when the driver calls + uart_get_rs485_mode(). Any driver for devices capable of working both as RS232 and RS485 should implement the rs485_config callback and provide rs485_supported in the @@ -48,7 +49,7 @@ RS485 Serial Communications serial_rs485 structure matching to the current configuration. .. kernel-doc:: include/uapi/linux/serial.h - :identifiers: serial_rs485 + :identifiers: serial_rs485 uart_get_rs485_mode 4. Usage from user-level ======================== From 891e999394f56b52ed81b9ebcb7fe0bf050a3157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:42 +0300 Subject: [PATCH 0844/4122] Documentation: rs485: Fix struct referencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use "struct serial_rs485" to get the references properly recognized. Reviewed-by: Andy Shevchenko Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- .../driver-api/serial/serial-rs485.rst | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst index ee34c8e1e56e..e53aa291bcd7 100644 --- a/Documentation/driver-api/serial/serial-rs485.rst +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -29,7 +29,7 @@ RS485 Serial Communications 3. Data Structures Already Available in the Kernel ================================================== - The Linux kernel provides the serial_rs485 structure to handle RS485 + The Linux kernel provides the struct serial_rs485 to handle RS485 communications. This data structure is used to set and configure RS485 parameters in the platform data and in ioctls. @@ -40,13 +40,14 @@ RS485 Serial Communications Any driver for devices capable of working both as RS232 and RS485 should implement the rs485_config callback and provide rs485_supported in the - uart_port structure. The serial core calls rs485_config to do the device - specific part in response to TIOCSRS485 ioctl (see below). The rs485_config - callback receives a pointer to a sanitizated serial_rs485 structure. The - serial_rs485 userspace provides is sanitized before calling rs485_config - using rs485_supported that indicates what RS485 features the driver supports - for the uart_port. TIOCGRS485 ioctl can be used to read back the - serial_rs485 structure matching to the current configuration. + struct uart_port. The serial core calls rs485_config to do the device + specific part in response to TIOCSRS485 ioctl (see below). The + rs485_config callback receives a pointer to a sanitizated struct + serial_rs485. The struct serial_rs485 userspace provides is sanitized + before calling rs485_config using rs485_supported that indicates what + RS485 features the driver supports for the struct uart_port. TIOCGRS485 + ioctl can be used to read back the struct serial_rs485 matching to the + current configuration. .. kernel-doc:: include/uapi/linux/serial.h :identifiers: serial_rs485 uart_get_rs485_mode @@ -108,8 +109,8 @@ RS485 Serial Communications The Linux kernel provides addressing mode for multipoint RS-485 serial communications line. The addressing mode is enabled with SER_RS485_ADDRB - flag in serial_rs485. Struct serial_rs485 has two additional flags and - fields for enabling receive and destination addresses. + flag in struct serial_rs485. The struct serial_rs485 has two additional + flags and fields for enabling receive and destination addresses. Address mode flags: - SER_RS485_ADDRB: Enabled addressing mode (sets also ADDRB in termios). From 7b1c56e08a45693ce6f708a4ee499e44c4fb9ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:43 +0300 Subject: [PATCH 0845/4122] Documentation: Make formatting consistent for rs485 docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tweak styling of names that come directly from the code. Suggested-by: Bagas Sanjaya Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- .../driver-api/serial/serial-rs485.rst | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst index e53aa291bcd7..dce061ef7647 100644 --- a/Documentation/driver-api/serial/serial-rs485.rst +++ b/Documentation/driver-api/serial/serial-rs485.rst @@ -39,15 +39,15 @@ RS485 Serial Communications uart_get_rs485_mode(). Any driver for devices capable of working both as RS232 and RS485 should - implement the rs485_config callback and provide rs485_supported in the - struct uart_port. The serial core calls rs485_config to do the device - specific part in response to TIOCSRS485 ioctl (see below). The - rs485_config callback receives a pointer to a sanitizated struct + implement the ``rs485_config`` callback and provide ``rs485_supported`` + in the ``struct uart_port``. The serial core calls ``rs485_config`` to do + the device specific part in response to TIOCSRS485 ioctl (see below). The + ``rs485_config`` callback receives a pointer to a sanitizated struct serial_rs485. The struct serial_rs485 userspace provides is sanitized - before calling rs485_config using rs485_supported that indicates what - RS485 features the driver supports for the struct uart_port. TIOCGRS485 - ioctl can be used to read back the struct serial_rs485 matching to the - current configuration. + before calling ``rs485_config`` using ``rs485_supported`` that indicates + what RS485 features the driver supports for the ``struct uart_port``. + TIOCGRS485 ioctl can be used to read back the struct serial_rs485 + matching to the current configuration. .. kernel-doc:: include/uapi/linux/serial.h :identifiers: serial_rs485 uart_get_rs485_mode @@ -108,23 +108,24 @@ RS485 Serial Communications ======================== The Linux kernel provides addressing mode for multipoint RS-485 serial - communications line. The addressing mode is enabled with SER_RS485_ADDRB - flag in struct serial_rs485. The struct serial_rs485 has two additional - flags and fields for enabling receive and destination addresses. + communications line. The addressing mode is enabled with + ``SER_RS485_ADDRB`` flag in struct serial_rs485. The struct serial_rs485 + has two additional flags and fields for enabling receive and destination + addresses. Address mode flags: - - SER_RS485_ADDRB: Enabled addressing mode (sets also ADDRB in termios). - - SER_RS485_ADDR_RECV: Receive (filter) address enabled. - - SER_RS485_ADDR_DEST: Set destination address. + - ``SER_RS485_ADDRB``: Enabled addressing mode (sets also ADDRB in termios). + - ``SER_RS485_ADDR_RECV``: Receive (filter) address enabled. + - ``SER_RS485_ADDR_DEST``: Set destination address. - Address fields (enabled with corresponding SER_RS485_ADDR_* flag): - - addr_recv: Receive address. - - addr_dest: Destination address. + Address fields (enabled with corresponding ``SER_RS485_ADDR_*`` flag): + - ``addr_recv``: Receive address. + - ``addr_dest``: Destination address. Once a receive address is set, the communication can occur only with the particular device and other peers are filtered out. It is left up to the receiver side to enforce the filtering. Receive address will be cleared - if SER_RS485_ADDR_RECV is not set. + if ``SER_RS485_ADDR_RECV`` is not set. Note: not all devices supporting RS485 support multipoint addressing. From 0c3c184c5218d9209fcb8ee151074bc629bd4024 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Mon, 31 Oct 2022 21:40:39 +0300 Subject: [PATCH 0846/4122] dt-bindings: serial: ingenic: Add support for the JZ4750/55 SoCs These SoCs UART block are the same as JZ4725b' one, the difference is outside of the block - it is in the clock generation unit (CGU). The difference requires to make a quirk for early console init. Acked-by: Krzysztof Kozlowski Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221031184041.1338129-2-lis8215@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/ingenic,uart.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml index 9ca7a18ecd8b..315ceb722e19 100644 --- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml +++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml @@ -20,6 +20,7 @@ properties: oneOf: - enum: - ingenic,jz4740-uart + - ingenic,jz4750-uart - ingenic,jz4760-uart - ingenic,jz4780-uart - ingenic,x1000-uart @@ -31,6 +32,9 @@ properties: - items: - const: ingenic,jz4725b-uart - const: ingenic,jz4740-uart + - items: + - const: ingenic,jz4755-uart + - const: ingenic,jz4750-uart reg: maxItems: 1 From e9c29d80278c0f5c6198ac741b10a534672042ca Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Mon, 31 Oct 2022 21:40:40 +0300 Subject: [PATCH 0847/4122] serial: 8250/ingenic: Add support for the JZ4750/JZ4755 JZ4750/55/60 (but not JZ4760b) have an optional /2 divider between the EXT oscillator and some peripherals including UART, which will be enabled if using a 24 MHz oscillator, and disabled when using a 12 MHz oscillator. This behavior relies on hardware differences: most boards (if not all) with those SoCs have 12 or 24 MHz oscillators but many peripherals want 12Mhz to operate properly (AIC and USB-PHY at least). The 16MHz threshold looks arbitrary but used in vendor's bootloader code for enable the divider. The patch doesn't affect JZ4760's behavior as it is subject for another patchset with re-classification of all supported ingenic UARTs. Link: https://github.com/carlos-wong/uboot_jz4755/blob/master/cpu/mips/jz_serial.c#L158 Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221031184041.1338129-3-lis8215@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ingenic.c | 32 +++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c index 2b2f5d8d24b9..617b8ce60d6b 100644 --- a/drivers/tty/serial/8250/8250_ingenic.c +++ b/drivers/tty/serial/8250/8250_ingenic.c @@ -87,7 +87,7 @@ static void __init ingenic_early_console_setup_clock(struct earlycon_device *dev dev->port.uartclk = be32_to_cpup(prop); } -static int __init ingenic_early_console_setup(struct earlycon_device *dev, +static int __init ingenic_earlycon_setup_tail(struct earlycon_device *dev, const char *opt) { struct uart_port *port = &dev->port; @@ -103,8 +103,6 @@ static int __init ingenic_early_console_setup(struct earlycon_device *dev, uart_parse_options(opt, &baud, &parity, &bits, &flow); } - ingenic_early_console_setup_clock(dev); - if (dev->baud) baud = dev->baud; divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * baud); @@ -129,9 +127,36 @@ static int __init ingenic_early_console_setup(struct earlycon_device *dev, return 0; } +static int __init ingenic_early_console_setup(struct earlycon_device *dev, + const char *opt) +{ + ingenic_early_console_setup_clock(dev); + + return ingenic_earlycon_setup_tail(dev, opt); +} + +static int __init jz4750_early_console_setup(struct earlycon_device *dev, + const char *opt) +{ + /* + * JZ4750/55/60 have an optional /2 divider between the EXT + * oscillator and some peripherals including UART, which will + * be enabled if using a 24 MHz oscillator, and disabled when + * using a 12 MHz oscillator. + */ + ingenic_early_console_setup_clock(dev); + if (dev->port.uartclk >= 16000000) + dev->port.uartclk /= 2; + + return ingenic_earlycon_setup_tail(dev, opt); +} + OF_EARLYCON_DECLARE(jz4740_uart, "ingenic,jz4740-uart", ingenic_early_console_setup); +OF_EARLYCON_DECLARE(jz4750_uart, "ingenic,jz4750-uart", + jz4750_early_console_setup); + OF_EARLYCON_DECLARE(jz4770_uart, "ingenic,jz4770-uart", ingenic_early_console_setup); @@ -328,6 +353,7 @@ static const struct ingenic_uart_config x1000_uart_config = { static const struct of_device_id of_match[] = { { .compatible = "ingenic,jz4740-uart", .data = &jz4740_uart_config }, + { .compatible = "ingenic,jz4750-uart", .data = &jz4760_uart_config }, { .compatible = "ingenic,jz4760-uart", .data = &jz4760_uart_config }, { .compatible = "ingenic,jz4770-uart", .data = &jz4760_uart_config }, { .compatible = "ingenic,jz4775-uart", .data = &jz4760_uart_config }, From 79d0224f6bf296d04cd843cfc49921b19c97bb09 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 29 Sep 2022 16:44:00 +0200 Subject: [PATCH 0848/4122] tty: serial: imx: Handle RS485 DE signal active high The default polarity of RS485 DE signal is active high. This driver does not handle such case properly. Currently, when a pin is multiplexed as a UART CTS_B on boot, this pin is pulled HIGH by the i.MX UART CTS circuit, which activates DE signal on the RS485 transceiver and thus behave as if the RS485 was transmitting data, so the system blocks the RS485 bus when it starts and until user application takes over. This behavior is not OK. The problem consists of two separate parts. First, the i.MX UART IP requires UCR1 UARTEN and UCR2 RXEN to be set for UCR2 CTSC and CTS bits to have any effect. The UCR2 CTSC bit permits the driver to set CTS (RTS_B or RS485 DE signal) to either level sychronous to the internal UART IP clock. Compared to other options, like GPIO CTS control, this has the benefit of being synchronous to the UART IP clock and thus without glitches or bus delays. The reason for the CTS design is likely because when the Receiver is disabled, the UART IP can never indicate that it is ready to receive data by assering CTS signal, so the CTS is always pulled HIGH by default. When the port is closed by user space, imx_uart_stop_rx() clears UCR2 RXEN bit, and imx_uart_shutdown() clears UCR1 UARTEN bit. This disables UART Receiver and UART itself, and forces CTS signal HIGH, which leads to the RS485 bus being blocked because RS485 DE is incorrectly active. The proposed solution for this problem is to keep the Receiver running even after the port is closed, but in loopback mode. This disconnects the RX FIFO input from the RXD external signal, and since UCR2 TXEN is cleared, the UART Transmitter is disabled, so nothing can feed data in the RX FIFO. Because the Receiver is still enabled, the UCR2 CTSC and CTS bits still have effect and the CTS (RS485 DE) control is retained. Note that in case of RS485 DE signal active low, there is no problem and no special handling is necessary. The CTS signal defaults to HIGH, thus the RS485 is by default set to Receive and the bus is not blocked. Note that while there is the possibility to control CTS using GPIO with either CTS polarity, this has the downside of not being synchronous to the UART IP clock and thus glitchy and susceptible to slow DE switching. Second, on boot, before the UART driver probe callback is called, the driver core triggers pinctrl_init_done() and configures the IOMUXC to default state. At this point, UCR1 UARTEN and UCR2 RXEN are both still cleared, but UART CTS_B (RS485 DE) is configured as CTS function, thus the RTS signal is pulled HIGH by the UART IP CTS circuit. One part of the solution here is to enable UCR1 UARTEN and UCR2 RXEN and UTS loopback in this driver probe callback, thus unblocking the CTSC and CTS control early on. But this is still too late, since the pin control is already configured and CTS has been pulled HIGH for a short period of time. When Linux kernel boots and this driver is bound, the pin control is set to special "init" state if the state is available, and driver can switch the "default" state afterward when ready. This state can be used to set the CTS line as a GPIO in DT temporarily, and a GPIO hog can force such GPIO to LOW, thus keeping the RS485 DE line LOW early on boot. Once the driver takes over and UCR1 UARTEN and UCR2 RXEN and UTS loopback are all enabled, the driver can switch to "default" pin control state and control the CTS line as function instead. DT binding example is below: " &gpio6 { rts-init-hog { gpio-hog; gpios = <5 0>; output-low; line-name = "rs485-de"; }; }; &uart5 { /* DHCOM UART2 */ pinctrl-0 = <&pinctrl_uart5>; pinctrl-1 = <&pinctrl_uart5_init>; pinctrl-names = "default", "init"; ... }; pinctrl_uart5_init: uart5-init-grp { fsl,pins = < ... MX6QDL_PAD_CSI0_DAT19__GPIO6_IO05 0x30b1 >; }; pinctrl_uart5: uart5-grp { fsl,pins = < ... MX6QDL_PAD_CSI0_DAT19__UART5_CTS_B 0x30b1 >; }; " Tested-by: Christoph Niedermaier Reviewed-by: Fabio Estevam Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220929144400.13571-1-marex@denx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 64 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a7548d0a1aee..11ac805663ae 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -489,7 +489,7 @@ static void imx_uart_stop_tx(struct uart_port *port) static void imx_uart_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - u32 ucr1, ucr2, ucr4; + u32 ucr1, ucr2, ucr4, uts; ucr1 = imx_uart_readl(sport, UCR1); ucr2 = imx_uart_readl(sport, UCR2); @@ -505,7 +505,18 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); imx_uart_writel(sport, ucr4, UCR4); - ucr2 &= ~UCR2_RXEN; + /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ + if (port->rs485.flags & SER_RS485_ENABLED && + port->rs485.flags & SER_RS485_RTS_ON_SEND && + sport->have_rtscts && !sport->have_rtsgpio) { + uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); + uts |= UTS_LOOP; + imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); + ucr2 |= UCR2_RXEN; + } else { + ucr2 &= ~UCR2_RXEN; + } + imx_uart_writel(sport, ucr2, UCR2); } @@ -1390,7 +1401,7 @@ static int imx_uart_startup(struct uart_port *port) int retval, i; unsigned long flags; int dma_is_inited = 0; - u32 ucr1, ucr2, ucr3, ucr4; + u32 ucr1, ucr2, ucr3, ucr4, uts; retval = clk_prepare_enable(sport->clk_per); if (retval) @@ -1495,6 +1506,11 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } + /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ + uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); + uts &= ~UTS_LOOP; + imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); + spin_unlock_irqrestore(&sport->port.lock, flags); return 0; @@ -1504,7 +1520,7 @@ static void imx_uart_shutdown(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; unsigned long flags; - u32 ucr1, ucr2, ucr4; + u32 ucr1, ucr2, ucr4, uts; if (sport->dma_is_enabled) { dmaengine_terminate_sync(sport->dma_chan_tx); @@ -1548,7 +1564,18 @@ static void imx_uart_shutdown(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); ucr1 = imx_uart_readl(sport, UCR1); - ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | UCR1_RXDMAEN | UCR1_ATDMAEN); + ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_RXDMAEN | UCR1_ATDMAEN); + /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ + if (port->rs485.flags & SER_RS485_ENABLED && + port->rs485.flags & SER_RS485_RTS_ON_SEND && + sport->have_rtscts && !sport->have_rtsgpio) { + uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); + uts |= UTS_LOOP; + imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); + ucr1 |= UCR1_UARTEN; + } else { + ucr1 &= ~UCR1_UARTEN; + } imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4); @@ -2210,7 +2237,7 @@ static int imx_uart_probe(struct platform_device *pdev) void __iomem *base; u32 dma_buf_conf[2]; int ret = 0; - u32 ucr1; + u32 ucr1, ucr2, uts; struct resource *res; int txirq, rxirq, rtsirq; @@ -2347,6 +2374,31 @@ static int imx_uart_probe(struct platform_device *pdev) ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN); imx_uart_writel(sport, ucr1, UCR1); + /* + * In case RS485 is enabled without GPIO RTS control, the UART IP + * is used to control CTS signal. Keep both the UART and Receiver + * enabled, otherwise the UART IP pulls CTS signal always HIGH no + * matter how the UCR2 CTSC and CTS bits are set. To prevent any + * data from being fed into the RX FIFO, enable loopback mode in + * UTS register, which disconnects the RX path from external RXD + * pin and connects it to the Transceiver, which is disabled, so + * no data can be fed to the RX FIFO that way. + */ + if (sport->port.rs485.flags & SER_RS485_ENABLED && + sport->have_rtscts && !sport->have_rtsgpio) { + uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); + uts |= UTS_LOOP; + imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); + + ucr1 = imx_uart_readl(sport, UCR1); + ucr1 |= UCR1_UARTEN; + imx_uart_writel(sport, ucr1, UCR1); + + ucr2 = imx_uart_readl(sport, UCR2); + ucr2 |= UCR2_RXEN; + imx_uart_writel(sport, ucr2, UCR2); + } + if (!imx_uart_is_imx1(sport) && sport->dte_mode) { /* * The DCEDTE bit changes the direction of DSR, DCD, DTR and RI From 2cfc64f3f0e1c1136b1a8247e53dc24c54f0bf93 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Oct 2022 20:16:33 +0300 Subject: [PATCH 0849/4122] serial: 8250_core: Use str_enabled_disabled() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use str_enabled_disabled() helper instead of open coding the same. Reviewed-by: Ilpo Järvinen Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221017171633.65275-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 94fbf0add2ce..80a2fc2fbd4d 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_SPARC @@ -1175,8 +1176,8 @@ static int __init serial8250_init(void) serial8250_isa_init_ports(); - pr_info("Serial: 8250/16550 driver, %d ports, IRQ sharing %sabled\n", - nr_uarts, share_irqs ? "en" : "dis"); + pr_info("Serial: 8250/16550 driver, %d ports, IRQ sharing %s\n", + nr_uarts, str_enabled_disabled(share_irqs)); #ifdef CONFIG_SPARC ret = sunserial_register_minors(&serial8250_reg, UART_NR); From cc72a1eea5e3f712ab4d59615bf1d4479cee16fc Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Wed, 19 Oct 2022 14:44:12 +0800 Subject: [PATCH 0850/4122] tty: hvc: make hvc_rtas_dev static The symbol is not used outside of the file, so mark it static. Fixes the following warning: drivers/tty/hvc/hvc_rtas.c:29:19: warning: symbol 'hvc_rtas_dev' was not declared. Should it be static? Reviewed-by: Jiri Slaby Signed-off-by: ruanjinjie Link: https://lore.kernel.org/r/20221019064412.3759874-1-ruanjinjie@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_rtas.c b/drivers/tty/hvc/hvc_rtas.c index e8b8c645482b..184d325abeed 100644 --- a/drivers/tty/hvc/hvc_rtas.c +++ b/drivers/tty/hvc/hvc_rtas.c @@ -26,7 +26,7 @@ #include "hvc_console.h" #define hvc_rtas_cookie 0x67781e15 -struct hvc_struct *hvc_rtas_dev; +static struct hvc_struct *hvc_rtas_dev; static int rtascons_put_char_token = RTAS_UNKNOWN_SERVICE; static int rtascons_get_char_token = RTAS_UNKNOWN_SERVICE; From fa31528a214701e3afb3997f1c2c2b7290e1d05c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 21 Oct 2022 16:02:16 +0200 Subject: [PATCH 0851/4122] dt-bindings: serial: renesas,scif: Document r8a779g0 support Document support for the Serial Communication Interface with FIFO (SCIF) in the Renesas R-Car V4H (R8A779G0) SoC. Reviewed-by: Wolfram Sang Acked-by: Krzysztof Kozlowski Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/8a2d04651f04a97d652395b4d933af5c3c8d5b5b.1666360789.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,scif.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index f930e7f1349f..f81f2d67a1ed 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -67,6 +67,7 @@ properties: - enum: - renesas,scif-r8a779a0 # R-Car V3U - renesas,scif-r8a779f0 # R-Car S4-8 + - renesas,scif-r8a779g0 # R-Car V4H - const: renesas,rcar-gen4-scif # R-Car Gen4 - const: renesas,scif # generic SCIF compatible UART From 15730dc45dc7432713c7af9ee5abad76872f6405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 13:55:04 +0300 Subject: [PATCH 0852/4122] tty: Cleanup tty buffer align mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't use decimal for mask. Don't use literal for aligning. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019105504.16800-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 5e287dedce01..3f057805560f 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -21,7 +21,7 @@ #include "tty.h" #define MIN_TTYB_SIZE 256 -#define TTYB_ALIGN_MASK 255 +#define TTYB_ALIGN_MASK 0xff /* * Byte threshold to limit memory consumption for flip buffers. @@ -37,7 +37,7 @@ * logic this must match. */ -#define TTY_BUFFER_PAGE (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF) +#define TTY_BUFFER_PAGE (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~TTYB_ALIGN_MASK) /** * tty_buffer_lock_exclusive - gain exclusive access to buffer From 98e4c68ddcaf3721df9bef809775a8e3562cb6f9 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:30 +0900 Subject: [PATCH 0853/4122] perf probe: Fix to avoid crashing if DW_AT_decl_file is NULL Since clang generates DWARF5 which sets DW_AT_decl_file as 0, dwarf_decl_file() thinks that is invalid and returns NULL. In that case 'perf probe' SIGSEGVs because it doesn't expect a NULL decl_file. This adds a dwarf_decl_file() return value check to avoid such SEGV with clang generated DWARF5 info. Without this, 'perf probe' crashes: $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Segmentation fault $ With this, it just warns about it: $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Debuginfo analysis failed. Error: Failed to show lines. $ Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051077.2100653.15626653369345128302.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 7 ++++++- tools/perf/util/probe-finder.c | 29 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 609ca1671501..406b7bdc851a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -137,7 +137,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, } out: - return *lineno ?: -ENOENT; + return (*lineno && *fname) ? *lineno : -ENOENT; } static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data); @@ -874,6 +874,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); dwarf_decl_line(rt_die, &decl); decf = dwarf_decl_file(rt_die); + if (!decf) { + pr_debug2("Failed to get the declared file name of %s\n", + dwarf_diename(rt_die)); + return -EINVAL; + } } else cu_die = rt_die; if (!cu_die) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 50d861a80f57..1aa8fcc41c76 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1063,6 +1063,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct probe_finder *pf = param->data; struct perf_probe_point *pp = &pf->pev->point; + const char *fname; /* Check tag and diename */ if (!die_is_func_def(sp_die) || @@ -1070,12 +1071,17 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; /* Check declared file */ - if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die))) + fname = dwarf_decl_file(sp_die); + if (!fname) { + pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n"); + return DWARF_CB_OK; + } + if (pp->file && fname && strtailcmp(pp->file, fname)) return DWARF_CB_OK; pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); - pf->fname = dwarf_decl_file(sp_die); + pf->fname = fname; if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1134,6 +1140,7 @@ struct pubname_callback_param { static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) { struct pubname_callback_param *param = data; + const char *fname; if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) { if (dwarf_tag(param->sp_die) != DW_TAG_subprogram) @@ -1143,9 +1150,11 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die)) return DWARF_CB_OK; - if (param->file && - strtailcmp(param->file, dwarf_decl_file(param->sp_die))) - return DWARF_CB_OK; + if (param->file) { + fname = dwarf_decl_file(param->sp_die); + if (!fname || strtailcmp(param->file, fname)) + return DWARF_CB_OK; + } param->found = 1; return DWARF_CB_ABORT; @@ -1779,7 +1788,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, } /* Verify the lineno and baseline are in a same file */ tmp = dwarf_decl_file(&spdie); - if (!tmp || strcmp(tmp, fname) != 0) + if (!tmp || (fname && strcmp(tmp, fname) != 0)) lineno = 0; } @@ -1889,10 +1898,14 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct line_finder *lf = param->data; struct line_range *lr = lf->lr; + const char *fname; /* Check declared file */ - if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) - return DWARF_CB_OK; + if (lr->file) { + fname = dwarf_decl_file(sp_die); + if (!fname || strtailcmp(lr->file, fname)) + return DWARF_CB_OK; + } if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { lf->fname = dwarf_decl_file(sp_die); From f828929ab7f0dc3353e4a617f94f297fa8f3dec3 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:39 +0900 Subject: [PATCH 0854/4122] perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute acccessor functions, so that it can find the specified attribute from abstact origin DIE etc. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 406b7bdc851a..216fc3d959e8 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formudata(&attr, result) != 0) return -ENOENT; @@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formsdata(&attr, result) != 0) return -ENOENT; From dc9a5d2ccd5c823cc05cafe75fcf19b682d8152c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:49 +0900 Subject: [PATCH 0855/4122] perf probe: Fix to get declared file name from clang DWARF5 Fix to get the declared file name even if it uses file index 0 in DWARF5, using custom die_get_decl_file() function. Actually, the DWARF5 standard says file index 0 of the DW_AT_decl_file is invalid(1), but there is a discussion and maybe this will be updated [2]. Anyway, clang generates such DWARF5 file for the linux kernel. Thus it must be handled. Without this, 'perf probe' returns an error: $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Debuginfo analysis failed. Error: Failed to show lines. With this, it can handle the case correctly: $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 11 ret = rw_verify_area(READ, file, pos, count); 12 if (ret) return ret; [1] DWARF5 specification 2.14 says "The value 0 indicates that no source file has been specified.") [2] http://wiki.dwarfstd.org/index.php?title=DWARF5_Line_Table_File_Numbers) Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731052936.2100653.13380621874859467731.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 47 +++++++++++++++++++++++----------- tools/perf/util/dwarf-aux.h | 3 +++ tools/perf/util/probe-finder.c | 14 +++++----- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 216fc3d959e8..30b36b525681 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -123,7 +123,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, if (die_find_realfunc(cu_die, addr, &die_mem) && die_entrypc(&die_mem, &faddr) == 0 && faddr == addr) { - *fname = dwarf_decl_file(&die_mem); + *fname = die_get_decl_file(&die_mem); dwarf_decl_line(&die_mem, lineno); goto out; } @@ -486,6 +486,19 @@ static int die_get_decl_fileno(Dwarf_Die *pdie) return -ENOENT; } +/* Return the file name by index */ +static const char *die_get_file_name(Dwarf_Die *dw_die, int idx) +{ + Dwarf_Die cu_die; + Dwarf_Files *files; + + if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) || + dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) + return NULL; + + return dwarf_filesrc(files, idx, NULL, NULL); +} + /** * die_get_call_file - Get callsite file name of inlined function instance * @in_die: a DIE of an inlined function instance @@ -495,18 +508,22 @@ static int die_get_decl_fileno(Dwarf_Die *pdie) */ const char *die_get_call_file(Dwarf_Die *in_die) { - Dwarf_Die cu_die; - Dwarf_Files *files; - int idx; - - idx = die_get_call_fileno(in_die); - if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) || - dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) - return NULL; - - return dwarf_filesrc(files, idx, NULL, NULL); + return die_get_file_name(in_die, die_get_call_fileno(in_die)); } +/** + * die_get_decl_file - Find the declared file name of this DIE + * @dw_die: a DIE for something declared. + * + * Get declared file name of @dw_die. + * NOTE: Since some version of clang DWARF5 implementation incorrectly uses + * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for + * such cases. Use this function instead. + */ +const char *die_get_decl_file(Dwarf_Die *dw_die) +{ + return die_get_file_name(dw_die, die_get_decl_fileno(dw_die)); +} /** * die_find_child - Generic DIE search function in DIE tree @@ -790,7 +807,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) } if (addr) { - fname = dwarf_decl_file(in_die); + fname = die_get_decl_file(in_die); if (fname && dwarf_decl_line(in_die, &lineno) == 0) { lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) @@ -818,7 +835,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, int lineno; /* Handle function declaration line */ - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && die_entrypc(sp_die, &addr) == 0) { lw.retval = callback(fname, lineno, addr, data); @@ -873,7 +890,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); dwarf_decl_line(rt_die, &decl); - decf = dwarf_decl_file(rt_die); + decf = die_get_decl_file(rt_die); if (!decf) { pr_debug2("Failed to get the declared file name of %s\n", dwarf_diename(rt_die)); @@ -928,7 +945,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) dwarf_decl_line(&die_mem, &inl); if (inl != decl || - decf != dwarf_decl_file(&die_mem)) + decf != die_get_decl_file(&die_mem)) continue; } } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ee0fa19b5c4..7ec8bc1083bb 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -50,6 +50,9 @@ int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ const char *die_get_call_file(Dwarf_Die *in_die); +/* Get declared file name of a DIE */ +const char *die_get_decl_file(Dwarf_Die *dw_die); + /* Get type die */ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1aa8fcc41c76..54b49ce85c9f 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -763,7 +763,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) /* Skip if declared file name does not match */ if (fsp->file) { - file = dwarf_decl_file(fn_die); + file = die_get_decl_file(fn_die); if (!file || strcmp(fsp->file, file) != 0) return 0; } @@ -1071,7 +1071,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; /* Check declared file */ - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (!fname) { pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n"); return DWARF_CB_OK; @@ -1151,7 +1151,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) return DWARF_CB_OK; if (param->file) { - fname = dwarf_decl_file(param->sp_die); + fname = die_get_decl_file(param->sp_die); if (!fname || strtailcmp(param->file, fname)) return DWARF_CB_OK; } @@ -1750,7 +1750,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, goto post; } - fname = dwarf_decl_file(&spdie); + fname = die_get_decl_file(&spdie); if (addr == baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; @@ -1787,7 +1787,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, } } /* Verify the lineno and baseline are in a same file */ - tmp = dwarf_decl_file(&spdie); + tmp = die_get_decl_file(&spdie); if (!tmp || (fname && strcmp(tmp, fname) != 0)) lineno = 0; } @@ -1902,13 +1902,13 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) /* Check declared file */ if (lr->file) { - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (!fname || strtailcmp(lr->file, fname)) return DWARF_CB_OK; } if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { - lf->fname = dwarf_decl_file(sp_die); + lf->fname = die_get_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); lf->lno_s = lr->offset + lr->start; From f3c9bd4e16a503cb14891963428d388a4f70ffb8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:06 -0700 Subject: [PATCH 0856/4122] perf build: Update to C standard to gnu11 C11 has become the standard for mainstream kernel development [1], allowing it in the perf build enables libraries like stdatomic.h to be assumed to be present. This came up in the context of [2]. [1] https://lore.kernel.org/lkml/CAHk-=whWbENRz-vLY6vpESDLj6kGUTKO3khGtVfipHqwewh2HQ@mail.gmail.com/ [2] https://lore.kernel.org/lkml/20221024011024.462518-1-irogers@google.com/ Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 898226ea8cad..d3d3c13a9f25 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -307,7 +307,7 @@ CORE_CFLAGS += -ggdb3 CORE_CFLAGS += -funwind-tables CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra -CORE_CFLAGS += -std=gnu99 +CORE_CFLAGS += -std=gnu11 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall From 8ed28c2b56b78442989ef1afee2b968e9d51a65c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:07 -0700 Subject: [PATCH 0857/4122] perf record: Use sig_atomic_t for signal handlers This removes undefined behavior as described in: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Suggested-by: Leo Yan Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 59f3d98a0196..bd462a3f2bbd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -646,10 +646,10 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) return record__write(rec, map, bf, size); } -static volatile int signr = -1; -static volatile int child_finished; +static volatile sig_atomic_t signr = -1; +static volatile sig_atomic_t child_finished; #ifdef HAVE_EVENTFD_SUPPORT -static volatile int done_fd = -1; +static volatile sig_atomic_t done_fd = -1; #endif static void sig_handler(int sig) @@ -1926,7 +1926,7 @@ static void record__read_lost_samples(struct record *rec) } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 From 7f3374299f9762ba7946138bf0d5cfbd50da111b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:08 -0700 Subject: [PATCH 0858/4122] perf daemon: Use sig_atomic_t to avoid UB Use sig_atomic_t for a variable written to in a signal handler and read elsewhere. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 3ce0c960ccc9..7036ec92d47d 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -105,7 +105,7 @@ static const char * const daemon_usage[] = { NULL }; -static bool done; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { From 853596fb71f7c2f7ff0de7b13f09d6598d4d56cd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:09 -0700 Subject: [PATCH 0859/4122] perf ftrace: Use sig_atomic_t to avoid UB Use sig_atomic_t for a variable written to in a signal handler and read elsewhere. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 7de07bb16d23..d7fe00f66b83 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -36,8 +36,8 @@ #define DEFAULT_TRACER "function_graph" -static volatile int workload_exec_errno; -static bool done; +static volatile sig_atomic_t workload_exec_errno; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { From 057929f9d083e80c9b30c324add69d2054ca6d82 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:10 -0700 Subject: [PATCH 0860/4122] perf session: Change type to avoid undefined behaviour in a signal handler The 'session_done' variable is written to inside the signal handler of 'perf report' and 'perf script'. Switch its type to avoid undefined behavior. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a4f10de29ff..0e1a3d6bacb9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -2022,7 +2023,7 @@ static int perf_session__flush_thread_stacks(struct perf_session *session) NULL); } -volatile int session_done; +volatile sig_atomic_t session_done; static int __perf_session__process_decomp_events(struct perf_session *session); From 01513fdc18f395dbcc924bc5e9962b12f86f947a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:11 -0700 Subject: [PATCH 0861/4122] perf stat: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e52601a54b26..d5e1670bca20 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -173,7 +173,7 @@ static struct target target = { #define METRIC_ONLY_LEN 20 -static volatile pid_t child_pid = -1; +static volatile sig_atomic_t child_pid = -1; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; @@ -208,7 +208,7 @@ struct perf_stat { static struct perf_stat perf_stat; #define STAT_RECORD perf_stat.record -static volatile int done = 0; +static volatile sig_atomic_t done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, @@ -580,7 +580,7 @@ static void disable_counters(void) } } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 @@ -1039,7 +1039,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } -static volatile int signr = -1; +static volatile sig_atomic_t signr = -1; static void skip_signal(int signo) { From 691768968f2a13eba8d52e8475dca7feb288d4f2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:12 -0700 Subject: [PATCH 0862/4122] perf top: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4b3ff7687236..bb5bd241246b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -87,8 +87,8 @@ #include #include -static volatile int done; -static volatile int resize; +static volatile sig_atomic_t done; +static volatile sig_atomic_t resize; #define HEADER_LINE_NR 5 From 8330b9ebf9ef10156b01d40176b9fff1ce2a374c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Oct 2022 07:24:03 +0200 Subject: [PATCH 0863/4122] iommu/fsl_pamu: Replace NO_IRQ by 0 NO_IRQ is used to check the return of irq_of_parse_and_map(). On some architecture NO_IRQ is 0, on other architectures it is -1. irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ. So use 0 instead of using NO_IRQ. Signed-off-by: Christophe Leroy Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/2a2570a8d12c80a7d36837b6c586daa708ca09d7.1665033732.git.christophe.leroy@csgroup.eu Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 0d03f837a5d4..1b53d2da2c19 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -779,7 +779,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) of_get_address(dev->of_node, 0, &size, NULL); irq = irq_of_parse_and_map(dev->of_node, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_warn(dev, "no interrupts listed in PAMU node\n"); goto error; } @@ -903,7 +903,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; error: - if (irq != NO_IRQ) + if (irq) free_irq(irq, data); kfree_sensitive(data); From 2a48b15972a3b2a2622e6e537e1d53f457670395 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2022 14:43:01 +0100 Subject: [PATCH 0864/4122] iommu/amd: Remove variable cnt ind iommu_poll_ga_log() Variable cnt is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Reviewed-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20221024134301.2158939-1-colin.i.king@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3b39d0416fa..3847f3bdc568 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -767,7 +767,7 @@ EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); static void iommu_poll_ga_log(struct amd_iommu *iommu) { - u32 head, tail, cnt = 0; + u32 head, tail; if (iommu->ga_log == NULL) return; @@ -780,7 +780,6 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu) u64 log_entry; raw = (u64 *)(iommu->ga_log + head); - cnt++; /* Avoid memcpy function-call overhead */ log_entry = *raw; From bf8d2dd2ed0825a58f31cc510245a1eb46f8a87e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:52 +0200 Subject: [PATCH 0865/4122] iommu/s390: Fix duplicate domain attachments Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls") we can end up with duplicates in the list of devices attached to a domain. This is inefficient and confusing since only one domain can actually be in control of the IOMMU translations for a device. Fix this by detaching the device from the previous domain, if any, on attach. Add a WARN_ON() in case we still have attached devices on freeing the domain. While here remove the re-attach on failure dance as it was determined to be unlikely to help and may confuse debug and recovery. Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls") Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-2-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 106 ++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3c071782f6f1..c2e5e81d609e 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); + WARN_ON(!list_empty(&s390_domain->devices)); dma_cleanup_tables(s390_domain->dma_table); kfree(s390_domain); } +static void __s390_iommu_detach_device(struct zpci_dev *zdev) +{ + struct s390_domain *s390_domain = zdev->s390_domain; + struct s390_domain_device *domain_device, *tmp; + unsigned long flags; + + if (!s390_domain) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, + list) { + if (domain_device->zdev == zdev) { + list_del(&domain_device->list); + kfree(domain_device); + break; + } + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + zpci_unregister_ioat(zdev, 0); + zdev->s390_domain = NULL; + zdev->dma_table = NULL; +} + static int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -90,7 +116,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct zpci_dev *zdev = to_zpci_dev(dev); struct s390_domain_device *domain_device; unsigned long flags; - int cc, rc; + int cc, rc = 0; if (!zdev) return -ENODEV; @@ -99,24 +125,18 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, if (!domain_device) return -ENOMEM; - if (zdev->dma_table && !zdev->s390_domain) { - cc = zpci_dma_exit_device(zdev); - if (cc) { - rc = -EIO; - goto out_free; - } - } - if (zdev->s390_domain) - zpci_unregister_ioat(zdev, 0); + __s390_iommu_detach_device(zdev); + else if (zdev->dma_table) + zpci_dma_exit_device(zdev); - zdev->dma_table = s390_domain->dma_table; cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(s390_domain->dma_table)); if (cc) { rc = -EIO; - goto out_restore; + goto out_free; } + zdev->dma_table = s390_domain->dma_table; spin_lock_irqsave(&s390_domain->list_lock, flags); /* First device defines the DMA range limits */ @@ -127,9 +147,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, /* Allow only devices with identical DMA range limits */ } else if (domain->geometry.aperture_start != zdev->start_dma || domain->geometry.aperture_end != zdev->end_dma) { - rc = -EINVAL; spin_unlock_irqrestore(&s390_domain->list_lock, flags); - goto out_restore; + rc = -EINVAL; + goto out_unregister; } domain_device->zdev = zdev; zdev->s390_domain = s390_domain; @@ -138,14 +158,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; -out_restore: - if (!zdev->s390_domain) { - zpci_dma_init_device(zdev); - } else { - zdev->dma_table = zdev->s390_domain->dma_table; - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); - } +out_unregister: + zpci_unregister_ioat(zdev, 0); + zdev->dma_table = NULL; out_free: kfree(domain_device); @@ -155,32 +170,12 @@ out_free: static void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device, *tmp; - unsigned long flags; - int found = 0; - if (!zdev) - return; + WARN_ON(zdev->s390_domain != to_s390_domain(domain)); - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, - list) { - if (domain_device->zdev == zdev) { - list_del(&domain_device->list); - kfree(domain_device); - found = 1; - break; - } - } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - - if (found && (zdev->s390_domain == s390_domain)) { - zdev->s390_domain = NULL; - zpci_unregister_ioat(zdev, 0); - zpci_dma_init_device(zdev); - } + __s390_iommu_detach_device(zdev); + zpci_dma_init_device(zdev); } static struct iommu_device *s390_iommu_probe_device(struct device *dev) @@ -198,24 +193,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) static void s390_iommu_release_device(struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); - struct iommu_domain *domain; /* - * This is a workaround for a scenario where the IOMMU API common code - * "forgets" to call the detach_dev callback: After binding a device - * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers - * the attach_dev), removing the device via - * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, - * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE - * notifier. - * - * So let's call detach_dev from here if it hasn't been called before. + * release_device is expected to detach any domain currently attached + * to the device, but keep it attached to other devices in the group. */ - if (zdev && zdev->s390_domain) { - domain = iommu_get_domain_for_dev(dev); - if (domain) - s390_iommu_detach_device(domain, dev); - } + if (zdev) + __s390_iommu_detach_device(zdev); } static int s390_iommu_update_trans(struct s390_domain *s390_domain, From 1a3a7d64bbce3179401f4e691522ff992aa1b8a1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:53 +0200 Subject: [PATCH 0866/4122] iommu/s390: Get rid of s390_domain_device The struct s390_domain_device serves the sole purpose as list entry for the devices list of a struct s390_domain. As it contains no additional information besides a list_head and a pointer to the struct zpci_dev we can simplify things and just thread the device list through struct zpci_dev directly. This removes the need to allocate during domain attach and gets rid of one level of indirection during mapping operations. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-3-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 + drivers/iommu/s390-iommu.c | 37 +++++++------------------------------ 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 108e732d7b14..15f8714ca9b7 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -117,6 +117,7 @@ struct zpci_bus { struct zpci_dev { struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head iommu_list; struct kref kref; struct hotplug_slot hotplug_slot; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index c2e5e81d609e..af83ccde16a4 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -29,11 +29,6 @@ struct s390_domain { spinlock_t list_lock; }; -struct s390_domain_device { - struct list_head list; - struct zpci_dev *zdev; -}; - static struct s390_domain *to_s390_domain(struct iommu_domain *dom) { return container_of(dom, struct s390_domain, domain); @@ -87,21 +82,13 @@ static void s390_domain_free(struct iommu_domain *domain) static void __s390_iommu_detach_device(struct zpci_dev *zdev) { struct s390_domain *s390_domain = zdev->s390_domain; - struct s390_domain_device *domain_device, *tmp; unsigned long flags; if (!s390_domain) return; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, - list) { - if (domain_device->zdev == zdev) { - list_del(&domain_device->list); - kfree(domain_device); - break; - } - } + list_del_init(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); zpci_unregister_ioat(zdev, 0); @@ -114,17 +101,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device; unsigned long flags; int cc, rc = 0; if (!zdev) return -ENODEV; - domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); - if (!domain_device) - return -ENOMEM; - if (zdev->s390_domain) __s390_iommu_detach_device(zdev); else if (zdev->dma_table) @@ -132,10 +114,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(s390_domain->dma_table)); - if (cc) { - rc = -EIO; - goto out_free; - } + if (cc) + return -EIO; zdev->dma_table = s390_domain->dma_table; spin_lock_irqsave(&s390_domain->list_lock, flags); @@ -151,9 +131,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, rc = -EINVAL; goto out_unregister; } - domain_device->zdev = zdev; zdev->s390_domain = s390_domain; - list_add(&domain_device->list, &s390_domain->devices); + list_add(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; @@ -161,8 +140,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, out_unregister: zpci_unregister_ioat(zdev, 0); zdev->dma_table = NULL; -out_free: - kfree(domain_device); return rc; } @@ -206,10 +183,10 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, size_t size, int flags) { - struct s390_domain_device *domain_device; phys_addr_t page_addr = pa & PAGE_MASK; dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags, nr_pages, i; + struct zpci_dev *zdev; unsigned long *entry; int rc = 0; @@ -234,8 +211,8 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, } spin_lock(&s390_domain->list_lock); - list_for_each_entry(domain_device, &s390_domain->devices, list) { - rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rc = zpci_refresh_trans((u64)zdev->fh << 32, start_dma_addr, nr_pages * PAGE_SIZE); if (rc) break; From cbf7827bc5dcfa4301aaea6f57eba9a94dbee7b1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:54 +0200 Subject: [PATCH 0867/4122] iommu/s390: Fix potential s390_domain aperture shrinking The s390 IOMMU driver currently sets the IOMMU domain's aperture to match the device specific DMA address range of the device that is first attached. This is not ideal. For one if the domain has no device attached in the meantime the aperture could be shrunk allowing translations outside the aperture to exist in the translation tables. Also this is a bit of a misuse of the aperture which really should describe what addresses can be translated and not some device specific limitations. Instead of misusing the aperture like this we can instead create reserved ranges for the ranges inaccessible to the attached devices allowing devices with overlapping ranges to still share an IOMMU domain. This also significantly simplifies s390_iommu_attach_device() allowing us to move the aperture check to the beginning of the function and removing the need to hold the device list's lock to check the aperture. As we then use the same aperture for all domains and it only depends on the table properties we can already check zdev->start_dma/end_dma at probe time and turn the check on attach into a WARN_ON(). Suggested-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-4-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index af83ccde16a4..9b3adc61005c 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) kfree(s390_domain); return NULL; } + s390_domain->domain.geometry.force_aperture = true; + s390_domain->domain.geometry.aperture_start = 0; + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); @@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; - int cc, rc = 0; + int cc; if (!zdev) return -ENODEV; + if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma || + domain->geometry.aperture_end < zdev->start_dma)) + return -EINVAL; + if (zdev->s390_domain) __s390_iommu_detach_device(zdev); else if (zdev->dma_table) @@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return -EIO; zdev->dma_table = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->list_lock, flags); - /* First device defines the DMA range limits */ - if (list_empty(&s390_domain->devices)) { - domain->geometry.aperture_start = zdev->start_dma; - domain->geometry.aperture_end = zdev->end_dma; - domain->geometry.force_aperture = true; - /* Allow only devices with identical DMA range limits */ - } else if (domain->geometry.aperture_start != zdev->start_dma || - domain->geometry.aperture_end != zdev->end_dma) { - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - rc = -EINVAL; - goto out_unregister; - } + zdev->dma_table = s390_domain->dma_table; zdev->s390_domain = s390_domain; + + spin_lock_irqsave(&s390_domain->list_lock, flags); list_add(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; - -out_unregister: - zpci_unregister_ioat(zdev, 0); - zdev->dma_table = NULL; - - return rc; } static void s390_iommu_detach_device(struct iommu_domain *domain, @@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, zpci_dma_init_device(zdev); } +static void s390_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct iommu_resv_region *region; + + if (zdev->start_dma) { + region = iommu_alloc_resv_region(0, zdev->start_dma, 0, + IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); + } + + if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) { + region = iommu_alloc_resv_region(zdev->end_dma + 1, + ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1, + 0, IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); + } +} + static struct iommu_device *s390_iommu_probe_device(struct device *dev) { struct zpci_dev *zdev; @@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) zdev = to_zpci_dev(dev); + if (zdev->start_dma > zdev->end_dma || + zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1) + return ERR_PTR(-EINVAL); + + if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) + zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + return &zdev->iommu_dev; } @@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = { .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .detach_dev = s390_iommu_detach_device, From a4d996c2c4b55a42b21d0f7026b2bd6f7396f666 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:55 +0200 Subject: [PATCH 0868/4122] iommu/s390: Fix incorrect aperture check The domain->geometry.aperture_end specifies the last valid address treat it as such when checking if a DMA address is valid. Reviewed-by: Pierre Morel Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-5-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9b3adc61005c..3e601ca6ee0f 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -213,7 +213,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, int rc = 0; if (dma_addr < s390_domain->domain.geometry.aperture_start || - dma_addr + size > s390_domain->domain.geometry.aperture_end) + (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end) return -EINVAL; nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; From b4d8ae0e907b096583491101ddfc5143b7c08918 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:56 +0200 Subject: [PATCH 0869/4122] iommu/s390: Fix incorrect pgsize_bitmap The .pgsize_bitmap property of struct iommu_ops is not a page mask but rather has a bit set for each size of pages the IOMMU supports. As the comment correctly pointed out at this moment the code only support 4K pages so simply use SZ_4K here. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-6-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3e601ca6ee0f..104dfbec1037 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -12,13 +12,6 @@ #include #include -/* - * Physically contiguous memory regions can be mapped with 4 KiB alignment, - * we allow all page sizes that are an order of 4KiB (no special large page - * support so far). - */ -#define S390_IOMMU_PGSIZES (~0xFFFUL) - static const struct iommu_ops s390_iommu_ops; struct s390_domain { @@ -363,7 +356,7 @@ static const struct iommu_ops s390_iommu_ops = { .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, - .pgsize_bitmap = S390_IOMMU_PGSIZES, + .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, From f3cc4f874efa8d5b10ebd9dc8702cd25b9e536a3 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:57 +0200 Subject: [PATCH 0870/4122] iommu/s390: Implement map_pages()/unmap_pages() instead of map()/unmap() While s390-iommu currently implements the map_page()/unmap_page() operations which only map/unmap a single page at a time the internal s390_iommu_update_trans() API already supports mapping/unmapping a range of pages at once. Take advantage of this by implementing the map_pages()/unmap_pages() operations instead thus allowing users of the IOMMU drivers to map multiple pages in a single call followed by a single I/O TLB flush if needed. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-7-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 48 +++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 104dfbec1037..7fb512bece9a 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -196,20 +196,15 @@ static void s390_iommu_release_device(struct device *dev) static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - size_t size, int flags) + unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; dma_addr_t start_dma_addr = dma_addr; - unsigned long irq_flags, nr_pages, i; + unsigned long irq_flags, i; struct zpci_dev *zdev; unsigned long *entry; int rc = 0; - if (dma_addr < s390_domain->domain.geometry.aperture_start || - (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end) - return -EINVAL; - - nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; if (!nr_pages) return 0; @@ -252,11 +247,24 @@ undo_cpu_trans: return rc; } -static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int s390_iommu_map_pages(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct s390_domain *s390_domain = to_s390_domain(domain); int flags = ZPCI_PTE_VALID, rc = 0; + size_t size = pgcount << __ffs(pgsize); + + if (pgsize != SZ_4K) + return -EINVAL; + + if (iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + if (!IS_ALIGNED(iova | paddr, pgsize)) + return -EINVAL; if (!(prot & IOMMU_READ)) return -EINVAL; @@ -265,7 +273,9 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + pgcount, flags); + if (!rc) + *mapped = size; return rc; } @@ -301,21 +311,27 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } -static size_t s390_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *gather) +static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = pgcount << __ffs(pgsize); int flags = ZPCI_PTE_INVALID; phys_addr_t paddr; int rc; + if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) + return 0; + paddr = s390_iommu_iova_to_phys(domain, iova); if (!paddr) return 0; rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + pgcount, flags); if (rc) return 0; @@ -361,8 +377,8 @@ static const struct iommu_ops s390_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .detach_dev = s390_iommu_detach_device, - .map = s390_iommu_map, - .unmap = s390_iommu_unmap, + .map_pages = s390_iommu_map_pages, + .unmap_pages = s390_iommu_unmap_pages, .iova_to_phys = s390_iommu_iova_to_phys, .free = s390_domain_free, } From 92ea0720ba9cf7f09589a711245c2da145125958 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:13 -0700 Subject: [PATCH 0871/4122] perf trace: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d3c757769b96..72991528687e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1535,8 +1535,8 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) } static pid_t workload_pid = -1; -static bool done = false; -static bool interrupted = false; +static volatile sig_atomic_t done = false; +static volatile sig_atomic_t interrupted = false; static void sighandler_interrupt(int sig __maybe_unused) { From 9ad0c1252e84dbc664f0462707182245ed603237 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:11 +0200 Subject: [PATCH 0872/4122] iommu/sun50i: Fix reset release Reset signal is asserted by writing 0 to the corresponding locations of masters we want to reset. So in order to deassert all reset signals, we should write 1's to all locations. Current code writes 1's to locations of masters which were just reset which is good. However, at the same time it also writes 0's to other locations and thus asserts reset signals of remaining masters. Fix code by writing all 1's when we want to deassert all reset signals. This bug was discovered when working with Cedrus (video decoder). When it faulted, display went blank due to reset signal assertion. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-2-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index cd9b74ee24de..270204a6ec4a 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -27,6 +27,7 @@ #include #define IOMMU_RESET_REG 0x010 +#define IOMMU_RESET_RELEASE_ALL 0xffffffff #define IOMMU_ENABLE_REG 0x020 #define IOMMU_ENABLE_ENABLE BIT(0) @@ -893,7 +894,7 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) iommu_write(iommu, IOMMU_INT_CLR_REG, status); iommu_write(iommu, IOMMU_RESET_REG, ~status); - iommu_write(iommu, IOMMU_RESET_REG, status); + iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); spin_unlock(&iommu->iommu_lock); From cef20703e2b2276aaa402ec5a65ec9a09963b83e Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:12 +0200 Subject: [PATCH 0873/4122] iommu/sun50i: Consider all fault sources for reset We have to reset masters for all faults - permissions, L1 fault or L2 fault. Currently it's done only for permissions. If other type of fault happens, master is in locked up state. Fix that by really considering all fault sources. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-3-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 270204a6ec4a..bbc269500800 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -869,8 +869,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) { + u32 status, l1_status, l2_status, resets; struct sun50i_iommu *iommu = dev_id; - u32 status; spin_lock(&iommu->iommu_lock); @@ -880,6 +880,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) return IRQ_NONE; } + l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG); + l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG); + if (status & IOMMU_INT_INVALID_L2PG) sun50i_iommu_handle_pt_irq(iommu, IOMMU_INT_ERR_ADDR_L2_REG, @@ -893,7 +896,8 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) iommu_write(iommu, IOMMU_INT_CLR_REG, status); - iommu_write(iommu, IOMMU_RESET_REG, ~status); + resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK; + iommu_write(iommu, IOMMU_RESET_REG, ~resets); iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); spin_unlock(&iommu->iommu_lock); From eac0104dc69be50bed86926d6f32e82b44f8c921 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:13 +0200 Subject: [PATCH 0874/4122] iommu/sun50i: Fix R/W permission check Because driver has enum type permissions and iommu subsystem has bitmap type, we have to be careful how check for combined read and write permissions is done. In such case, we have to mask both permissions and check that both are set at the same time. Current code just masks both flags but doesn't check that both are set. In short, it always sets R/W permission, regardles if requested permissions were RO, WO or RW. Fix that. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-4-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index bbc269500800..df871af04bcb 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -271,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot) enum sun50i_iommu_aci aci; u32 flags = 0; - if (prot & (IOMMU_READ | IOMMU_WRITE)) + if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE)) aci = SUN50I_IOMMU_ACI_RD_WR; else if (prot & IOMMU_READ) aci = SUN50I_IOMMU_ACI_RD; From 67a8a67f9eceb72e4c73d1d09ed9ab04f4b8e12d Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:14 +0200 Subject: [PATCH 0875/4122] iommu/sun50i: Fix flush size Function sun50i_table_flush() takes number of entries as an argument, not number of bytes. Fix that mistake in sun50i_dte_get_page_table(). Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-5-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index df871af04bcb..e62e245060ac 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -512,7 +512,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, sun50i_iommu_free_page_table(iommu, drop_pt); } - sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); + sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES); sun50i_table_flush(sun50i_domain, dte_addr, 1); return page_table; From e563cc0c787c85a4d9def0a77078dc5d3f445e3d Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:15 +0200 Subject: [PATCH 0876/4122] iommu/sun50i: Implement .iotlb_sync_map Allocated iova ranges need to be invalidated immediately or otherwise they might or might not work when used by master or CPU. This was discovered when running video decoder conformity test with Cedrus. Some videos were now and then decoded incorrectly and generated page faults. According to vendor driver, it's enough to invalidate just start and end TLB and PTW cache lines. Documentation says that neighbouring lines must be invalidated too. Finally, when page fault occurs, that iova must be invalidated the same way, according to documentation. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-6-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index e62e245060ac..5cb2d44dfb92 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -93,6 +93,8 @@ #define NUM_PT_ENTRIES 256 #define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE) +#define SPAGE_SIZE 4096 + struct sun50i_iommu { struct iommu_device iommu; @@ -295,6 +297,62 @@ static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain, dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE); } +static void sun50i_iommu_zap_iova(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_MASK_REG, GENMASK(31, 12)); + iommu_write(iommu, IOMMU_TLB_IVLD_ENABLE_REG, + IOMMU_TLB_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_TLB_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "TLB invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_ptw_cache(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_PC_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_PC_IVLD_ENABLE_REG, + IOMMU_PC_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_PC_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "PTW cache invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_range(struct sun50i_iommu *iommu, + unsigned long iova, size_t size) +{ + assert_spin_locked(&iommu->iommu_lock); + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, 0); + + sun50i_iommu_zap_iova(iommu, iova); + sun50i_iommu_zap_iova(iommu, iova + SPAGE_SIZE); + if (size > SPAGE_SIZE) { + sun50i_iommu_zap_iova(iommu, iova + size); + sun50i_iommu_zap_iova(iommu, iova + size + SPAGE_SIZE); + } + sun50i_iommu_zap_ptw_cache(iommu, iova); + sun50i_iommu_zap_ptw_cache(iommu, iova + SZ_1M); + if (size > SZ_1M) { + sun50i_iommu_zap_ptw_cache(iommu, iova + size); + sun50i_iommu_zap_ptw_cache(iommu, iova + size + SZ_1M); + } + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE); +} + static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu) { u32 reg; @@ -344,6 +402,18 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } +static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + sun50i_iommu_zap_range(iommu, iova, size); + spin_unlock_irqrestore(&iommu->iommu_lock, flags); +} + static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { @@ -767,6 +837,7 @@ static const struct iommu_ops sun50i_iommu_ops = { .attach_dev = sun50i_iommu_attach_device, .detach_dev = sun50i_iommu_detach_device, .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, .iova_to_phys = sun50i_iommu_iova_to_phys, .map = sun50i_iommu_map, @@ -786,6 +857,8 @@ static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, report_iommu_fault(iommu->domain, iommu->dev, iova, prot); else dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n"); + + sun50i_iommu_zap_range(iommu, iova, SPAGE_SIZE); } static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu, From 7a09c1269702db8eccb6f718da2b00173e1e0034 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 2 Nov 2022 14:13:19 -0400 Subject: [PATCH 0877/4122] USB: core: Change configuration warnings to notices It has been pointed out that the kernel log messages warning about problems in USB configuration and related descriptors are vexing for users. The warning log level has a fairly high priority, but the user can do nothing to fix the underlying errors in the device's firmware. To reduce the amount of useless information produced by tools that filter high-priority log messages, we can change these warnings to notices, i.e., change dev_warn() to dev_notice(). The same holds for a few messages that currently use dev_err(): Unless they indicate a failure that might make a device unusable (such as inability to transfer a config descriptor), change them to dev_notice() also. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216630 Suggested-by: Artem S. Tashkinov Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Y2KzPx0h6z1jXCuN@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 82 +++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 48bc8a4814ac..725b8dbcfe5f 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -61,7 +61,7 @@ static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev, desc = (struct usb_ssp_isoc_ep_comp_descriptor *) buffer; if (desc->bDescriptorType != USB_DT_SSP_ISOC_ENDPOINT_COMP || size < USB_DT_SSP_ISOC_EP_COMP_SIZE) { - dev_warn(ddev, "Invalid SuperSpeedPlus isoc endpoint companion" + dev_notice(ddev, "Invalid SuperSpeedPlus isoc endpoint companion" "for config %d interface %d altsetting %d ep %d.\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); return; @@ -83,7 +83,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP || size < USB_DT_SS_EP_COMP_SIZE) { - dev_warn(ddev, "No SuperSpeed endpoint companion for config %d " + dev_notice(ddev, "No SuperSpeed endpoint companion for config %d " " interface %d altsetting %d ep %d: " "using minimum values\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); @@ -109,13 +109,13 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, /* Check the various values */ if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) { - dev_warn(ddev, "Control endpoint with bMaxBurst = %d in " + dev_notice(ddev, "Control endpoint with bMaxBurst = %d in " "config %d interface %d altsetting %d ep %d: " "setting to zero\n", desc->bMaxBurst, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bMaxBurst = 0; } else if (desc->bMaxBurst > 15) { - dev_warn(ddev, "Endpoint with bMaxBurst = %d in " + dev_notice(ddev, "Endpoint with bMaxBurst = %d in " "config %d interface %d altsetting %d ep %d: " "setting to 15\n", desc->bMaxBurst, cfgno, inum, asnum, ep->desc.bEndpointAddress); @@ -125,7 +125,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, if ((usb_endpoint_xfer_control(&ep->desc) || usb_endpoint_xfer_int(&ep->desc)) && desc->bmAttributes != 0) { - dev_warn(ddev, "%s endpoint with bmAttributes = %d in " + dev_notice(ddev, "%s endpoint with bmAttributes = %d in " "config %d interface %d altsetting %d ep %d: " "setting to zero\n", usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk", @@ -134,7 +134,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, ep->ss_ep_comp.bmAttributes = 0; } else if (usb_endpoint_xfer_bulk(&ep->desc) && desc->bmAttributes > 16) { - dev_warn(ddev, "Bulk endpoint with more than 65536 streams in " + dev_notice(ddev, "Bulk endpoint with more than 65536 streams in " "config %d interface %d altsetting %d ep %d: " "setting to max\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); @@ -142,7 +142,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, } else if (usb_endpoint_xfer_isoc(&ep->desc) && !USB_SS_SSP_ISOC_COMP(desc->bmAttributes) && USB_SS_MULT(desc->bmAttributes) > 3) { - dev_warn(ddev, "Isoc endpoint has Mult of %d in " + dev_notice(ddev, "Isoc endpoint has Mult of %d in " "config %d interface %d altsetting %d ep %d: " "setting to 3\n", USB_SS_MULT(desc->bmAttributes), @@ -160,7 +160,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, else max_tx = 999999; if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) { - dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in " + dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in " "config %d interface %d altsetting %d ep %d: " "setting to %d\n", usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int", @@ -273,7 +273,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, else if (d->bLength >= USB_DT_ENDPOINT_SIZE) n = USB_DT_ENDPOINT_SIZE; else { - dev_warn(ddev, "config %d interface %d altsetting %d has an " + dev_notice(ddev, "config %d interface %d altsetting %d has an " "invalid endpoint descriptor of length %d, skipping\n", cfgno, inum, asnum, d->bLength); goto skip_to_next_endpoint_or_interface_descriptor; @@ -281,7 +281,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, i = d->bEndpointAddress & ~USB_ENDPOINT_DIR_MASK; if (i >= 16 || i == 0) { - dev_warn(ddev, "config %d interface %d altsetting %d has an " + dev_notice(ddev, "config %d interface %d altsetting %d has an " "invalid endpoint with address 0x%X, skipping\n", cfgno, inum, asnum, d->bEndpointAddress); goto skip_to_next_endpoint_or_interface_descriptor; @@ -293,7 +293,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, /* Check for duplicate endpoint addresses */ if (config_endpoint_is_duplicate(config, inum, asnum, d)) { - dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", cfgno, inum, asnum, d->bEndpointAddress); goto skip_to_next_endpoint_or_interface_descriptor; } @@ -301,7 +301,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, /* Ignore some endpoints */ if (udev->quirks & USB_QUIRK_ENDPOINT_IGNORE) { if (usb_endpoint_is_ignored(udev, ifp, d)) { - dev_warn(ddev, "config %d interface %d altsetting %d has an ignored endpoint with address 0x%X, skipping\n", + dev_notice(ddev, "config %d interface %d altsetting %d has an ignored endpoint with address 0x%X, skipping\n", cfgno, inum, asnum, d->bEndpointAddress); goto skip_to_next_endpoint_or_interface_descriptor; @@ -378,7 +378,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, } } if (d->bInterval < i || d->bInterval > j) { - dev_warn(ddev, "config %d interface %d altsetting %d " + dev_notice(ddev, "config %d interface %d altsetting %d " "endpoint 0x%X has an invalid bInterval %d, " "changing to %d\n", cfgno, inum, asnum, @@ -391,7 +391,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * them usable, we will try treating them as Interrupt endpoints. */ if (udev->speed == USB_SPEED_LOW && usb_endpoint_xfer_bulk(d)) { - dev_warn(ddev, "config %d interface %d altsetting %d " + dev_notice(ddev, "config %d interface %d altsetting %d " "endpoint 0x%X is Bulk; changing to Interrupt\n", cfgno, inum, asnum, d->bEndpointAddress); endpoint->desc.bmAttributes = USB_ENDPOINT_XFER_INT; @@ -408,7 +408,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, */ maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize); if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { - dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", + dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); } @@ -439,7 +439,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)]; if (maxp > j) { - dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n", + dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n", cfgno, inum, asnum, d->bEndpointAddress, maxp, j); maxp = j; endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp); @@ -452,7 +452,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, */ if (udev->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) { if (maxp != 512) - dev_warn(ddev, "config %d interface %d altsetting %d " + dev_notice(ddev, "config %d interface %d altsetting %d " "bulk endpoint 0x%X has invalid maxpacket %d\n", cfgno, inum, asnum, d->bEndpointAddress, maxp); @@ -533,7 +533,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno, i < intfc->num_altsetting; (++i, ++alt)) { if (alt->desc.bAlternateSetting == asnum) { - dev_warn(ddev, "Duplicate descriptor for config %d " + dev_notice(ddev, "Duplicate descriptor for config %d " "interface %d altsetting %d, skipping\n", cfgno, inum, asnum); goto skip_to_next_interface_descriptor; @@ -559,7 +559,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno, num_ep = num_ep_orig = alt->desc.bNumEndpoints; alt->desc.bNumEndpoints = 0; /* Use as a counter */ if (num_ep > USB_MAXENDPOINTS) { - dev_warn(ddev, "too many endpoints for config %d interface %d " + dev_notice(ddev, "too many endpoints for config %d interface %d " "altsetting %d: %d, using maximum allowed: %d\n", cfgno, inum, asnum, num_ep, USB_MAXENDPOINTS); num_ep = USB_MAXENDPOINTS; @@ -590,7 +590,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno, } if (n != num_ep_orig) - dev_warn(ddev, "config %d interface %d altsetting %d has %d " + dev_notice(ddev, "config %d interface %d altsetting %d has %d " "endpoint descriptor%s, different from the interface " "descriptor's value: %d\n", cfgno, inum, asnum, n, plural(n), num_ep_orig); @@ -625,7 +625,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, if (config->desc.bDescriptorType != USB_DT_CONFIG || config->desc.bLength < USB_DT_CONFIG_SIZE || config->desc.bLength > size) { - dev_err(ddev, "invalid descriptor for config index %d: " + dev_notice(ddev, "invalid descriptor for config index %d: " "type = 0x%X, length = %d\n", cfgidx, config->desc.bDescriptorType, config->desc.bLength); return -EINVAL; @@ -636,7 +636,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, size -= config->desc.bLength; if (nintf > USB_MAXINTERFACES) { - dev_warn(ddev, "config %d has too many interfaces: %d, " + dev_notice(ddev, "config %d has too many interfaces: %d, " "using maximum allowed: %d\n", cfgno, nintf, USB_MAXINTERFACES); nintf = USB_MAXINTERFACES; @@ -650,7 +650,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, (buffer2 += header->bLength, size2 -= header->bLength)) { if (size2 < sizeof(struct usb_descriptor_header)) { - dev_warn(ddev, "config %d descriptor has %d excess " + dev_notice(ddev, "config %d descriptor has %d excess " "byte%s, ignoring\n", cfgno, size2, plural(size2)); break; @@ -658,7 +658,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, header = (struct usb_descriptor_header *) buffer2; if ((header->bLength > size2) || (header->bLength < 2)) { - dev_warn(ddev, "config %d has an invalid descriptor " + dev_notice(ddev, "config %d has an invalid descriptor " "of length %d, skipping remainder of the config\n", cfgno, header->bLength); break; @@ -670,7 +670,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, d = (struct usb_interface_descriptor *) header; if (d->bLength < USB_DT_INTERFACE_SIZE) { - dev_warn(ddev, "config %d has an invalid " + dev_notice(ddev, "config %d has an invalid " "interface descriptor of length %d, " "skipping\n", cfgno, d->bLength); continue; @@ -680,7 +680,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, if ((dev->quirks & USB_QUIRK_HONOR_BNUMINTERFACES) && n >= nintf_orig) { - dev_warn(ddev, "config %d has more interface " + dev_notice(ddev, "config %d has more interface " "descriptors, than it declares in " "bNumInterfaces, ignoring interface " "number: %d\n", cfgno, inum); @@ -688,7 +688,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, } if (inum >= nintf_orig) - dev_warn(ddev, "config %d has an invalid " + dev_notice(ddev, "config %d has an invalid " "interface number: %d but max is %d\n", cfgno, inum, nintf_orig - 1); @@ -713,14 +713,14 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, d = (struct usb_interface_assoc_descriptor *)header; if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) { - dev_warn(ddev, + dev_notice(ddev, "config %d has an invalid interface association descriptor of length %d, skipping\n", cfgno, d->bLength); continue; } if (iad_num == USB_MAXIADS) { - dev_warn(ddev, "found more Interface " + dev_notice(ddev, "found more Interface " "Association Descriptors " "than allocated for in " "configuration %d\n", cfgno); @@ -731,7 +731,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, } else if (header->bDescriptorType == USB_DT_DEVICE || header->bDescriptorType == USB_DT_CONFIG) - dev_warn(ddev, "config %d contains an unexpected " + dev_notice(ddev, "config %d contains an unexpected " "descriptor of type 0x%X, skipping\n", cfgno, header->bDescriptorType); @@ -740,11 +740,11 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, config->desc.wTotalLength = cpu_to_le16(buffer2 - buffer0); if (n != nintf) - dev_warn(ddev, "config %d has %d interface%s, different from " + dev_notice(ddev, "config %d has %d interface%s, different from " "the descriptor's value: %d\n", cfgno, n, plural(n), nintf_orig); else if (n == 0) - dev_warn(ddev, "config %d has no interfaces?\n", cfgno); + dev_notice(ddev, "config %d has no interfaces?\n", cfgno); config->desc.bNumInterfaces = nintf = n; /* Check for missing interface numbers */ @@ -754,7 +754,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, break; } if (j >= nintf) - dev_warn(ddev, "config %d has no interface number " + dev_notice(ddev, "config %d has no interface number " "%d\n", cfgno, i); } @@ -762,7 +762,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, for (i = 0; i < nintf; ++i) { j = nalts[i]; if (j > USB_MAXALTSETTING) { - dev_warn(ddev, "too many alternate settings for " + dev_notice(ddev, "too many alternate settings for " "config %d interface %d: %d, " "using maximum allowed: %d\n", cfgno, inums[i], j, USB_MAXALTSETTING); @@ -811,7 +811,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, break; } if (n >= intfc->num_altsetting) - dev_warn(ddev, "config %d interface %d has no " + dev_notice(ddev, "config %d interface %d has no " "altsetting %d\n", cfgno, inums[i], j); } } @@ -868,7 +868,7 @@ int usb_get_configuration(struct usb_device *dev) int result; if (ncfg > USB_MAXCONFIG) { - dev_warn(ddev, "too many configurations: %d, " + dev_notice(ddev, "too many configurations: %d, " "using maximum allowed: %d\n", ncfg, USB_MAXCONFIG); dev->descriptor.bNumConfigurations = ncfg = USB_MAXCONFIG; } @@ -902,7 +902,7 @@ int usb_get_configuration(struct usb_device *dev) "descriptor/%s: %d\n", cfgno, "start", result); if (result != -EPIPE) goto err; - dev_err(ddev, "chopping to %d config(s)\n", cfgno); + dev_notice(ddev, "chopping to %d config(s)\n", cfgno); dev->descriptor.bNumConfigurations = cfgno; break; } else if (result < 4) { @@ -934,7 +934,7 @@ int usb_get_configuration(struct usb_device *dev) goto err; } if (result < length) { - dev_warn(ddev, "config index %d descriptor too short " + dev_notice(ddev, "config index %d descriptor too short " "(expected %i, got %i)\n", cfgno, length, result); length = result; } @@ -993,7 +993,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) /* Get BOS descriptor */ ret = usb_get_descriptor(dev, USB_DT_BOS, 0, bos, USB_DT_BOS_SIZE); if (ret < USB_DT_BOS_SIZE || bos->bLength < USB_DT_BOS_SIZE) { - dev_err(ddev, "unable to get BOS descriptor or descriptor too short\n"); + dev_notice(ddev, "unable to get BOS descriptor or descriptor too short\n"); if (ret >= 0) ret = -ENOMSG; kfree(bos); @@ -1021,7 +1021,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) ret = usb_get_descriptor(dev, USB_DT_BOS, 0, buffer, total_len); if (ret < total_len) { - dev_err(ddev, "unable to get BOS descriptor set\n"); + dev_notice(ddev, "unable to get BOS descriptor set\n"); if (ret >= 0) ret = -ENOMSG; goto err; @@ -1046,7 +1046,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) } if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { - dev_warn(ddev, "descriptor type invalid, skip\n"); + dev_notice(ddev, "descriptor type invalid, skip\n"); continue; } From 372488c6936f4e7734e4ff5613c504affb49ff68 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:13:55 +0100 Subject: [PATCH 0878/4122] usb: core: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f01ef2ddaf12a6412127611617786adc1234e0b4.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 3 ++- drivers/usb/core/sysfs.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 38c1a4f4fdea..015204fc67a1 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -7,6 +7,7 @@ * Author: Lan Tianyu */ +#include #include #include #include @@ -63,7 +64,7 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, bool disabled; int rc; - rc = strtobool(buf, &disabled); + rc = kstrtobool(buf, &disabled); if (rc) return rc; diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 631574718d8a..8217032dfb85 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -13,6 +13,7 @@ #include +#include #include #include #include @@ -505,7 +506,7 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev, if (ret < 0) return -EINTR; - ret = strtobool(buf, &value); + ret = kstrtobool(buf, &value); if (!ret) { udev->usb2_hw_lpm_allowed = value; @@ -975,7 +976,7 @@ static ssize_t interface_authorized_default_store(struct device *dev, int rc = count; bool val; - if (strtobool(buf, &val) != 0) + if (kstrtobool(buf, &val) != 0) return -EINVAL; if (val) @@ -1176,7 +1177,7 @@ static ssize_t interface_authorized_store(struct device *dev, struct usb_interface *intf = to_usb_interface(dev); bool val; - if (strtobool(buf, &val) != 0) + if (kstrtobool(buf, &val) != 0) return -EINVAL; if (val) From a8bc8cc193c69e41df5e757d1a592346526e136d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:13:56 +0100 Subject: [PATCH 0879/4122] usb: gadget: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/09bc980d8432a4b5f7d88388ec0df5b085583139.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 3 ++- drivers/usb/gadget/function/f_mass_storage.c | 3 ++- drivers/usb/gadget/function/storage_common.c | 9 +++++---- drivers/usb/gadget/function/u_serial.c | 3 ++- drivers/usb/gadget/legacy/serial.c | 3 ++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 3a6b4926193e..96121d1c8df4 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -800,7 +801,7 @@ static ssize_t os_desc_use_store(struct config_item *item, const char *page, bool use; mutex_lock(&gi->lock); - ret = strtobool(page, &use); + ret = kstrtobool(page, &use); if (!ret) { gi->use_os_desc = use; ret = len; diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3abf7f586e2a..3a30feb47073 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -176,6 +176,7 @@ #include #include #include +#include #include #include #include @@ -3387,7 +3388,7 @@ static ssize_t fsg_opts_stall_store(struct config_item *item, const char *page, return -EBUSY; } - ret = strtobool(page, &stall); + ret = kstrtobool(page, &stall); if (!ret) { opts->common->can_stall = stall; ret = len; diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index 208c6a92780a..2a4163b0f6fe 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "storage_common.h" @@ -396,7 +397,7 @@ ssize_t fsg_store_ro(struct fsg_lun *curlun, struct rw_semaphore *filesem, ssize_t rc; bool ro; - rc = strtobool(buf, &ro); + rc = kstrtobool(buf, &ro); if (rc) return rc; @@ -419,7 +420,7 @@ ssize_t fsg_store_nofua(struct fsg_lun *curlun, const char *buf, size_t count) bool nofua; int ret; - ret = strtobool(buf, &nofua); + ret = kstrtobool(buf, &nofua); if (ret) return ret; @@ -470,7 +471,7 @@ ssize_t fsg_store_cdrom(struct fsg_lun *curlun, struct rw_semaphore *filesem, bool cdrom; int ret; - ret = strtobool(buf, &cdrom); + ret = kstrtobool(buf, &cdrom); if (ret) return ret; @@ -493,7 +494,7 @@ ssize_t fsg_store_removable(struct fsg_lun *curlun, const char *buf, bool removable; int ret; - ret = strtobool(buf, &removable); + ret = kstrtobool(buf, &removable); if (ret) return ret; diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 7538279f9817..840626e064e1 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1070,7 +1071,7 @@ ssize_t gserial_set_console(unsigned char port_num, const char *page, size_t cou bool enable; int ret; - ret = strtobool(page, &enable); + ret = kstrtobool(page, &enable); if (ret) return ret; diff --git a/drivers/usb/gadget/legacy/serial.c b/drivers/usb/gadget/legacy/serial.c index dcd3a6603d90..4974bee6049a 100644 --- a/drivers/usb/gadget/legacy/serial.c +++ b/drivers/usb/gadget/legacy/serial.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -109,7 +110,7 @@ static int enable_set(const char *s, const struct kernel_param *kp) if (!s) /* called for no-arg enable == default */ return 0; - ret = strtobool(s, &do_enable); + ret = kstrtobool(s, &do_enable); if (ret || enable == do_enable) return ret; From 1adf3cc20d693569ebee90fd91fa34b0570fcd6f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:05 +0800 Subject: [PATCH 0880/4122] iommu: Add max_pasids field in struct iommu_device Use this field to keep the number of supported PASIDs that an IOMMU hardware is able to support. This is a generic attribute of an IOMMU and lifting it into the per-IOMMU device structure makes it possible to allocate a PASID for device without calls into the IOMMU drivers. Any iommu driver that supports PASID related features should set this field before enabling them on the devices. In the Intel IOMMU driver, intel_iommu_sm is moved to CONFIG_INTEL_IOMMU enclave so that the pasid_supported() helper could be used in dmar.c without compilation errors. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + drivers/iommu/intel/dmar.c | 7 +++++++ drivers/iommu/intel/iommu.h | 4 ++-- include/linux/iommu.h | 2 ++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c46..21cb13da122c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3543,6 +3543,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) /* SID/SSID sizes */ smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); + smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; /* * If the SMMU supports fewer bits than would fill a single L2 stream diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..3528058d253e 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1104,6 +1104,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); + /* + * A value of N in PSS field of eCap register indicates hardware + * supports PASID field of N+1 bits. + */ + if (pasid_supported(iommu)) + iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap); + /* * This is only for hotplug; at boot time intel_iommu_enabled won't * be set yet. When intel_iommu_init() runs, it registers the units diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..cce0598f4109 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -480,8 +480,6 @@ enum { #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) #define VTD_FLAG_SVM_CAPABLE (1 << 2) -extern int intel_iommu_sm; - #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) @@ -795,6 +793,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, extern const struct iommu_ops intel_iommu_ops; #ifdef CONFIG_INTEL_IOMMU +extern int intel_iommu_sm; extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; @@ -810,6 +809,7 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) } #define dmar_disabled (1) #define intel_iommu_enabled (0) +#define intel_iommu_sm (0) #endif static inline const char *decode_prq_descriptor(char *str, size_t size, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3c9da1f8979e..e3af4f46e6e0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -322,12 +322,14 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @max_pasids: number of supported PASIDs */ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + u32 max_pasids; }; /** From 22d2c7afb3697a68c7fc05c935ef662dee06dc60 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:06 +0800 Subject: [PATCH 0881/4122] iommu: Add max_pasids field in struct dev_iommu Use this field to save the number of PASIDs that a device is able to consume. It is a generic attribute of a device and lifting it into the per-device dev_iommu struct could help to avoid the boilerplate code in various IOMMU drivers. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 20 ++++++++++++++++++++ include/linux/iommu.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 65a3b3d886dc..297ac79bc21c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -278,6 +279,24 @@ static void dev_iommu_free(struct device *dev) kfree(param); } +static u32 dev_iommu_get_max_pasids(struct device *dev) +{ + u32 max_pasids = 0, bits = 0; + int ret; + + if (dev_is_pci(dev)) { + ret = pci_max_pasids(to_pci_dev(dev)); + if (ret > 0) + max_pasids = ret; + } else { + ret = device_property_read_u32(dev, "pasid-num-bits", &bits); + if (!ret) + max_pasids = 1UL << bits; + } + + return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); +} + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; @@ -303,6 +322,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list } dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e3af4f46e6e0..ac3f6c6dcc6d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -368,6 +368,7 @@ struct iommu_fault_param { * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data + * @max_pasids: number of PASIDs this device can consume * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -379,6 +380,7 @@ struct dev_iommu { struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; + u32 max_pasids; }; int iommu_device_register(struct iommu_device *iommu, From 942fd5435dccb273f90176b046ae6bbba60cfbd8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:07 +0800 Subject: [PATCH 0882/4122] iommu: Remove SVM_FLAG_SUPERVISOR_MODE support The current kernel DMA with PASID support is based on the SVA with a flag SVM_FLAG_SUPERVISOR_MODE. The IOMMU driver binds the kernel memory address space to a PASID of the device. The device driver programs the device with kernel virtual address (KVA) for DMA access. There have been security and functional issues with this approach: - The lack of IOTLB synchronization upon kernel page table updates. (vmalloc, module/BPF loading, CONFIG_DEBUG_PAGEALLOC etc.) - Other than slight more protection, using kernel virtual address (KVA) has little advantage over physical address. There are also no use cases yet where DMA engines need kernel virtual addresses for in-kernel DMA. This removes SVM_FLAG_SUPERVISOR_MODE support from the IOMMU interface. The device drivers are suggested to handle kernel DMA with PASID through the kernel DMA APIs. The drvdata parameter in iommu_sva_bind_device() and all callbacks is not needed anymore. Cleanup them as well. Link: https://lore.kernel.org/linux-iommu/20210511194726.GP1002214@nvidia.com/ Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Fenghua Yu Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/dma/idxd/cdev.c | 3 +- drivers/dma/idxd/init.c | 25 +-------- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 3 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 +- drivers/iommu/intel/iommu.h | 3 +- drivers/iommu/intel/svm.c | 55 +++++-------------- drivers/iommu/iommu.c | 5 +- drivers/misc/uacce/uacce.c | 2 +- include/linux/intel-svm.h | 13 ----- include/linux/iommu.h | 8 +-- 10 files changed, 25 insertions(+), 97 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c2808fd081d6..66720001ba1c 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) filp->private_data = ctx; if (device_user_pasid_enabled(idxd)) { - sva = iommu_sva_bind_device(dev, current->mm, NULL); + sva = iommu_sva_bind_device(dev, current->mm); if (IS_ERR(sva)) { rc = PTR_ERR(sva); dev_err(dev, "pasid allocation failed: %d\n", rc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2b18d512cbfc..2c0fcfdc75c7 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d static int idxd_enable_system_pasid(struct idxd_device *idxd) { - int flags; - unsigned int pasid; - struct iommu_sva *sva; - - flags = SVM_FLAG_SUPERVISOR_MODE; - - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags); - if (IS_ERR(sva)) { - dev_warn(&idxd->pdev->dev, - "iommu sva bind failed: %ld\n", PTR_ERR(sva)); - return PTR_ERR(sva); - } - - pasid = iommu_sva_get_pasid(sva); - if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); - return -ENODEV; - } - - idxd->sva = sva; - idxd->pasid = pasid; - dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid); - return 0; + return -EOPNOTSUPP; } static void idxd_disable_system_pasid(struct idxd_device *idxd) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 5968a568aae2..8fcf0df4bd0e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -367,8 +367,7 @@ err_free_bond: return ERR_PTR(ret); } -struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) { struct iommu_sva *handle; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cd48590ada30..d2ba86470c42 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -754,8 +754,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); +struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm); void arm_smmu_sva_unbind(struct iommu_sva *handle); u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle); void arm_smmu_sva_notifier_synchronize(void); @@ -791,7 +790,7 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master } static inline struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) { return ERR_PTR(-ENODEV); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index cce0598f4109..33e5bcaf2a6c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -748,8 +748,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm); void intel_svm_unbind(struct iommu_sva *handle); u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..94bc47b68c93 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -296,8 +296,7 @@ out: return 0; } -static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, - unsigned int flags) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm) { ioasid_t max_pasid = dev_is_pci(dev) ? pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; @@ -307,8 +306,7 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct mm_struct *mm, - unsigned int flags) + struct mm_struct *mm) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_svm_dev *sdev; @@ -324,22 +322,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, svm->pasid = mm->pasid; svm->mm = mm; - svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); - if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { - svm->notifier.ops = &intel_mmuops; - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - kfree(svm); - return ERR_PTR(ret); - } + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); } ret = pasid_private_add(svm->pasid, svm); if (ret) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); kfree(svm); return ERR_PTR(ret); } @@ -374,9 +368,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, } /* Setup the pasid table: */ - sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? - PASID_FLAG_SUPERVISOR_MODE : 0; - sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, FLPT_DEFAULT_DID, sflags); if (ret) @@ -390,8 +382,7 @@ free_sdev: kfree(sdev); free_svm: if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(mm->pasid); kfree(svm); } @@ -780,40 +771,20 @@ prq_advance: return IRQ_RETVAL(handled); } -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - unsigned int flags = 0; struct iommu_sva *sva; int ret; - if (drvdata) - flags = *(unsigned int *)drvdata; - - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) { - dev_err(dev, "%s: Supervisor PASID not supported\n", - iommu->name); - return ERR_PTR(-EOPNOTSUPP); - } - - if (mm) { - dev_err(dev, "%s: Supervisor PASID with user provided mm\n", - iommu->name); - return ERR_PTR(-EINVAL); - } - - mm = &init_mm; - } - mutex_lock(&pasid_mutex); - ret = intel_svm_alloc_pasid(dev, mm, flags); + ret = intel_svm_alloc_pasid(dev, mm); if (ret) { mutex_unlock(&pasid_mutex); return ERR_PTR(ret); } - sva = intel_svm_bind_mm(iommu, dev, mm, flags); + sva = intel_svm_bind_mm(iommu, dev, mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 297ac79bc21c..a94ec648c88b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2750,7 +2750,6 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to it - * @drvdata: opaque data pointer to pass to bind callback * * Create a bond between device and address space, allowing the device to access * the mm using the returned PASID. If a bond already exists between @device and @@ -2763,7 +2762,7 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); * On error, returns an ERR_PTR value. */ struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { struct iommu_group *group; struct iommu_sva *handle = ERR_PTR(-EINVAL); @@ -2788,7 +2787,7 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) if (iommu_group_device_count(group) != 1) goto out_unlock; - handle = ops->sva_bind(dev, mm, drvdata); + handle = ops->sva_bind(dev, mm); out_unlock: mutex_unlock(&group->mutex); diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index b70a013139c7..905eff1f840e 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -108,7 +108,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA)) return 0; - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL); + handle = iommu_sva_bind_device(uacce->parent, current->mm); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 207ef06ba3e1..f9a0d44f6fdb 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -13,17 +13,4 @@ #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) -/* - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only - * for access to kernel addresses. No IOTLB flushes are automatically done - * for kernel mappings; it is valid only for access to the kernel's static - * 1:1 mapping of physical memory — not to vmalloc or even module mappings. - * A future API addition may permit the use of such ranges, by means of an - * explicit IOTLB flush call (akin to the DMA API's unmap method). - * - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to - * do such IOTLB flushes automatically. - */ -#define SVM_FLAG_SUPERVISOR_MODE BIT(0) - #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ac3f6c6dcc6d..72bb0531aa76 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -247,8 +247,7 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, - void *drvdata); + struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm); void (*sva_unbind)(struct iommu_sva *handle); u32 (*sva_get_pasid)(struct iommu_sva *handle); @@ -668,8 +667,7 @@ int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); struct iommu_sva *iommu_sva_bind_device(struct device *dev, - struct mm_struct *mm, - void *drvdata); + struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); @@ -1000,7 +998,7 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } static inline struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { return NULL; } From 201007ef707a8bb5592cd07dd46fc9222c48e0b9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:08 +0800 Subject: [PATCH 0883/4122] PCI: Enable PASID only when ACS RR & UF enabled on upstream path The Requester ID/Process Address Space ID (PASID) combination identifies an address space distinct from the PCI bus address space, e.g., an address space defined by an IOMMU. But the PCIe fabric routes Memory Requests based on the TLP address, ignoring any PASID (PCIe r6.0, sec 2.2.10.4), so a TLP with PASID that SHOULD go upstream to the IOMMU may instead be routed as a P2P Request if its address falls in a bridge window. To ensure that all Memory Requests with PASID are routed upstream, only enable PASID if ACS P2P Request Redirect and Upstream Forwarding are enabled for the path leading to the device. Suggested-by: Jason Gunthorpe Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Acked-by: Bjorn Helgaas Reviewed-by: Jason Gunthorpe Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/ats.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c967ad6e2626..f9cc2e10b676 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -382,6 +382,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features) if (!pasid) return -EINVAL; + if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF)) + return -EINVAL; + pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported); supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV; From 16603704559c7a68718059c4f75287886c01b20f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:09 +0800 Subject: [PATCH 0884/4122] iommu: Add attach/detach_dev_pasid iommu interfaces Attaching an IOMMU domain to a PASID of a device is a generic operation for modern IOMMU drivers which support PASID-granular DMA address translation. Currently visible usage scenarios include (but not limited): - SVA (Shared Virtual Address) - kernel DMA with PASID - hardware-assist mediated device This adds the set_dev_pasid domain ops for setting the domain onto a PASID of a device and remove_dev_pasid iommu ops for removing any setup on a PASID of device. This also adds interfaces for device drivers to attach/detach/retrieve a domain for a PASID of a device. If multiple devices share a single group, it's fine as long the fabric always routes every TLP marked with a PASID to the host bridge and only the host bridge. For example, ACS achieves this universally and has been checked when pci_enable_pasid() is called. As we can't reliably tell the source apart in a group, all the devices in a group have to be considered as the same source, and mapped to the same PASID table. The DMA ownership is about the whole device (more precisely, iommu group), including the RID and PASIDs. When the ownership is converted, the pasid array must be empty. This also adds necessary checks in the DMA ownership interfaces. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 141 ++++++++++++++++++++++++++++++++++++++++-- include/linux/iommu.h | 32 ++++++++++ 2 files changed, 169 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a94ec648c88b..bf22992beb98 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -43,6 +43,7 @@ struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; struct list_head devices; + struct xarray pasid_array; struct mutex mutex; void *iommu_data; void (*iommu_data_release)(void *iommu_data); @@ -723,6 +724,7 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); + xa_init(&group->pasid_array); ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); if (ret < 0) { @@ -3106,7 +3108,8 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); if (group->owner_cnt) { - if (group->owner || !iommu_is_default_domain(group)) { + if (group->owner || !iommu_is_default_domain(group) || + !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; } @@ -3137,7 +3140,7 @@ void iommu_device_unuse_default_domain(struct device *dev) return; mutex_lock(&group->mutex); - if (!WARN_ON(!group->owner_cnt)) + if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) group->owner_cnt--; mutex_unlock(&group->mutex); @@ -3185,7 +3188,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) ret = -EPERM; goto unlock_out; } else { - if (group->domain && group->domain != group->default_domain) { + if ((group->domain && group->domain != group->default_domain) || + !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; } @@ -3219,7 +3223,8 @@ void iommu_group_release_dma_owner(struct iommu_group *group) int ret; mutex_lock(&group->mutex); - if (WARN_ON(!group->owner_cnt || !group->owner)) + if (WARN_ON(!group->owner_cnt || !group->owner || + !xa_empty(&group->pasid_array))) goto unlock_out; group->owner_cnt = 0; @@ -3250,3 +3255,131 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group) return user; } EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); + +static int __iommu_set_group_pasid(struct iommu_domain *domain, + struct iommu_group *group, ioasid_t pasid) +{ + struct group_device *device; + int ret = 0; + + list_for_each_entry(device, &group->devices, list) { + ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); + if (ret) + break; + } + + return ret; +} + +static void __iommu_remove_group_pasid(struct iommu_group *group, + ioasid_t pasid) +{ + struct group_device *device; + const struct iommu_ops *ops; + + list_for_each_entry(device, &group->devices, list) { + ops = dev_iommu_ops(device->dev); + ops->remove_dev_pasid(device->dev, pasid); + } +} + +/* + * iommu_attach_device_pasid() - Attach a domain to pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * Return: 0 on success, or an error. + */ +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct iommu_group *group; + void *curr; + int ret; + + if (!domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + group = iommu_group_get(dev); + if (!group) + return -ENODEV; + + mutex_lock(&group->mutex); + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); + if (curr) { + ret = xa_err(curr) ? : -EBUSY; + goto out_unlock; + } + + ret = __iommu_set_group_pasid(domain, group, pasid); + if (ret) { + __iommu_remove_group_pasid(group, pasid); + xa_erase(&group->pasid_array, pasid); + } +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); + +/* + * iommu_detach_device_pasid() - Detach the domain from pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * The @domain must have been attached to @pasid of the @dev with + * iommu_attach_device_pasid(). + */ +void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid) +{ + struct iommu_group *group = iommu_group_get(dev); + + mutex_lock(&group->mutex); + __iommu_remove_group_pasid(group, pasid); + WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); + +/* + * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev + * @dev: the queried device + * @pasid: the pasid of the device + * @type: matched domain type, 0 for any match + * + * This is a variant of iommu_get_domain_for_dev(). It returns the existing + * domain attached to pasid of a device. Callers must hold a lock around this + * function, and both iommu_attach/detach_dev_pasid() whenever a domain of + * type is being manipulated. This API does not internally resolve races with + * attach/detach. + * + * Return: attached domain on success, NULL otherwise. + */ +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, + ioasid_t pasid, + unsigned int type) +{ + struct iommu_domain *domain; + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + return NULL; + + xa_lock(&group->pasid_array); + domain = xa_load(&group->pasid_array, pasid); + if (type && domain && domain->type != type) + domain = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + iommu_group_put(group); + + return domain; +} +EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 72bb0531aa76..5d2b78ac5416 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -223,6 +223,9 @@ struct iommu_iotlb_gather { * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @remove_dev_pasid: Remove any translation configurations of a specific + * pasid, so that any DMA transactions with this pasid + * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -256,6 +259,7 @@ struct iommu_ops { struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); + void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; @@ -266,6 +270,7 @@ struct iommu_ops { * struct iommu_domain_ops - domain specific operations * @attach_dev: attach an iommu domain to a device * @detach_dev: detach an iommu domain from a device + * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. @@ -286,6 +291,8 @@ struct iommu_ops { struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); @@ -678,6 +685,13 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1040,6 +1054,24 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) { return false; } + +static inline int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + return -ENODEV; +} + +static inline void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ +} + +static inline struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type) +{ + return NULL; +} #endif /* CONFIG_IOMMU_API */ /** From 136467962e49931dbc6240aea8197fab7e407ba4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:10 +0800 Subject: [PATCH 0885/4122] iommu: Add IOMMU SVA domain support The SVA iommu_domain represents a hardware pagetable that the IOMMU hardware could use for SVA translation. This adds some infrastructures to support SVA domain in the iommu core. It includes: - Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA domain type. The IOMMU drivers that support allocation of the SVA domain should provide its own SVA domain specific iommu_domain_ops. - Add a helper to allocate an SVA domain. The iommu_domain_free() is still used to free an SVA domain. The report_iommu_fault() should be replaced by the new iommu_report_device_fault(). Leave the existing fault handler with the existing users and the newly added SVA members excludes it. Suggested-by: Jean-Philippe Brucker Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 20 ++++++++++++++++++++ include/linux/iommu.h | 25 +++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bf22992beb98..6a1cd2018e30 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dma-iommu.h" @@ -1934,6 +1935,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { + if (domain->type == IOMMU_DOMAIN_SVA) + mmdrop(domain->mm); iommu_put_dma_cookie(domain); domain->ops->free(domain); } @@ -3383,3 +3386,20 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, return domain; } EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *domain; + + domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); + if (!domain) + return NULL; + + domain->type = IOMMU_DOMAIN_SVA; + mmgrab(mm); + domain->mm = mm; + + return domain; +} diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5d2b78ac5416..776baa375967 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,8 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ +#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ + /* * This are the possible domain-types * @@ -77,6 +79,8 @@ struct iommu_domain_geometry { * certain optimizations for these domains * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB * invalidation. + * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses + * represented by mm_struct's. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -86,15 +90,24 @@ struct iommu_domain_geometry { #define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ - iommu_fault_handler_t handler; - void *handler_token; struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; + union { + struct { + iommu_fault_handler_t handler; + void *handler_token; + }; + struct { /* IOMMU_DOMAIN_SVA */ + struct mm_struct *mm; + int users; + }; + }; }; static inline bool iommu_is_dma_domain(struct iommu_domain *domain) @@ -685,6 +698,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -1055,6 +1070,12 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) return false; } +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { From eaca8889a1ef50783bcaad143668b735d136fe46 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:11 +0800 Subject: [PATCH 0886/4122] iommu/vt-d: Add SVA domain support Add support for SVA domain allocation and provide an SVA-specific iommu_domain_ops. This implementation is based on the existing SVA code. Possible cleanup and refactoring are left for incremental changes later. The VT-d driver will also need to support setting a DMA domain to a PASID of device. Current SVA implementation uses different data structures to track the domain and device PASID relationship. That's the reason why we need to check the domain type in remove_dev_pasid callback. Eventually we'll consolidate the data structures and remove the need of domain type check. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 25 ++++++++++++++++++++ drivers/iommu/intel/iommu.h | 10 ++++++++ drivers/iommu/intel/svm.c | 47 +++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 48cdcd0a5cf3..7b67e431dd36 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4169,6 +4169,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return domain; case IOMMU_DOMAIN_IDENTITY: return &si_domain->domain; + case IOMMU_DOMAIN_SVA: + return intel_svm_domain_alloc(); default: return NULL; } @@ -4712,6 +4714,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct iommu_domain *domain; + + /* Domain type specific cleanup: */ + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); + if (domain) { + switch (domain->type) { + case IOMMU_DOMAIN_SVA: + intel_svm_remove_dev_pasid(dev, pasid); + break; + default: + /* should never reach here */ + WARN_ON(1); + break; + } + } + + intel_pasid_tear_down_entry(iommu, dev, pasid, false); +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4724,6 +4748,7 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, + .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM .sva_bind = intel_svm_bind, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 33e5bcaf2a6c..252fa344f88a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -753,6 +753,8 @@ void intel_svm_unbind(struct iommu_sva *handle); u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); +struct iommu_domain *intel_svm_domain_alloc(void); +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); struct intel_svm_dev { struct list_head list; @@ -777,6 +779,14 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline struct iommu_domain *intel_svm_domain_alloc(void) +{ + return NULL; +} + +static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ +} #endif #ifdef CONFIG_INTEL_IOMMU_DEBUGFS diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 94bc47b68c93..86c8ea0d9635 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -882,3 +882,50 @@ int intel_svm_page_response(struct device *dev, out: return ret; } + +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + mutex_lock(&pasid_mutex); + intel_svm_unbind_mm(dev, pasid); + mutex_unlock(&pasid_mutex); +} + +static int intel_svm_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct mm_struct *mm = domain->mm; + struct iommu_sva *sva; + int ret = 0; + + mutex_lock(&pasid_mutex); + sva = intel_svm_bind_mm(iommu, dev, mm); + if (IS_ERR(sva)) + ret = PTR_ERR(sva); + mutex_unlock(&pasid_mutex); + + return ret; +} + +static void intel_svm_domain_free(struct iommu_domain *domain) +{ + kfree(to_dmar_domain(domain)); +} + +static const struct iommu_domain_ops intel_svm_domain_ops = { + .set_dev_pasid = intel_svm_set_dev_pasid, + .free = intel_svm_domain_free +}; + +struct iommu_domain *intel_svm_domain_alloc(void) +{ + struct dmar_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->domain.ops = &intel_svm_domain_ops; + + return &domain->domain; +} From 386fa64fd52baadb849ed60c78b024fd1618278e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:12 +0800 Subject: [PATCH 0887/4122] arm-smmu-v3/sva: Add SVA domain support Add support for SVA domain allocation and provide an SVA-specific iommu_domain_ops. This implementation is based on the existing SVA code. Possible cleanup and refactoring are left for incremental changes later. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Link: https://lore.kernel.org/r/20221031005917.45690-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 61 +++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 14 +++++ 3 files changed, 90 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 8fcf0df4bd0e..2d188d12419e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -549,3 +549,64 @@ void arm_smmu_sva_notifier_synchronize(void) */ mmu_notifier_synchronize(); } + +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct mm_struct *mm = domain->mm; + struct arm_smmu_bond *bond = NULL, *t; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + + mutex_lock(&sva_lock); + list_for_each_entry(t, &master->bonds, list) { + if (t->mm == mm) { + bond = t; + break; + } + } + + if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { + list_del(&bond->list); + arm_smmu_mmu_notifier_put(bond->smmu_mn); + kfree(bond); + } + mutex_unlock(&sva_lock); +} + +static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + int ret = 0; + struct iommu_sva *handle; + struct mm_struct *mm = domain->mm; + + mutex_lock(&sva_lock); + handle = __arm_smmu_sva_bind(dev, mm); + if (IS_ERR(handle)) + ret = PTR_ERR(handle); + mutex_unlock(&sva_lock); + + return ret; +} + +static void arm_smmu_sva_domain_free(struct iommu_domain *domain) +{ + kfree(domain); +} + +static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { + .set_dev_pasid = arm_smmu_sva_set_dev_pasid, + .free = arm_smmu_sva_domain_free +}; + +struct iommu_domain *arm_smmu_sva_domain_alloc(void) +{ + struct iommu_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->ops = &arm_smmu_sva_domain_ops; + + return domain; +} diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 21cb13da122c..eed2eb8effa3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2009,6 +2009,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; + if (type == IOMMU_DOMAIN_SVA) + return arm_smmu_sva_domain_alloc(); + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ && @@ -2838,6 +2841,17 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA); + if (WARN_ON(IS_ERR(domain)) || !domain) + return; + + arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -2846,6 +2860,7 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, + .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, .sva_bind = arm_smmu_sva_bind, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d2ba86470c42..5aa853e98d38 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -758,6 +758,9 @@ struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm); void arm_smmu_sva_unbind(struct iommu_sva *handle); u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle); void arm_smmu_sva_notifier_synchronize(void); +struct iommu_domain *arm_smmu_sva_domain_alloc(void); +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { @@ -803,5 +806,16 @@ static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) } static inline void arm_smmu_sva_notifier_synchronize(void) {} + +static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) +{ + return NULL; +} + +static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, + ioasid_t id) +{ +} #endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* _ARM_SMMU_V3_H */ From be51b1d6bbff48c7d1943a8ff1e5a55777807f6e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:13 +0800 Subject: [PATCH 0888/4122] iommu/sva: Refactoring iommu_sva_bind/unbind_device() The existing iommu SVA interfaces are implemented by calling the SVA specific iommu ops provided by the IOMMU drivers. There's no need for any SVA specific ops in iommu_ops vector anymore as we can achieve this through the generic attach/detach_dev_pasid domain ops. This refactors the IOMMU SVA interfaces implementation by using the iommu_attach/detach_device_pasid interfaces and align them with the concept of the SVA iommu domain. Put the new SVA code in the SVA related file in order to make it self-contained. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva-lib.c | 111 ++++++++++++++++++++++++++++++++++ drivers/iommu/iommu.c | 91 ---------------------------- include/linux/iommu.h | 43 +++++++------ 3 files changed, 134 insertions(+), 111 deletions(-) diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index 106506143896..e425573a1787 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -4,6 +4,7 @@ */ #include #include +#include #include "iommu-sva-lib.h" @@ -69,3 +70,113 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid) return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); } EXPORT_SYMBOL_GPL(iommu_sva_find); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to mm_users + * + * Create a bond between device and address space, allowing the device to + * access the mm using the PASID returned by iommu_sva_get_pasid(). If a + * bond already exists between @device and @mm, an additional internal + * reference is taken. Caller must call iommu_sva_unbind_device() + * to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + struct iommu_domain *domain; + struct iommu_sva *handle; + ioasid_t max_pasids; + int ret; + + max_pasids = dev->iommu->max_pasids; + if (!max_pasids) + return ERR_PTR(-EOPNOTSUPP); + + /* Allocate mm->pasid if necessary. */ + ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + if (ret) + return ERR_PTR(ret); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); + + mutex_lock(&iommu_sva_lock); + /* Search for an existing domain. */ + domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid, + IOMMU_DOMAIN_SVA); + if (IS_ERR(domain)) { + ret = PTR_ERR(domain); + goto out_unlock; + } + + if (domain) { + domain->users++; + goto out; + } + + /* Allocate a new domain and set it on device pasid. */ + domain = iommu_sva_domain_alloc(dev, mm); + if (!domain) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = iommu_attach_device_pasid(domain, dev, mm->pasid); + if (ret) + goto out_free_domain; + domain->users = 1; +out: + mutex_unlock(&iommu_sva_lock); + handle->dev = dev; + handle->domain = domain; + + return handle; + +out_free_domain: + iommu_domain_free(domain); +out_unlock: + mutex_unlock(&iommu_sva_lock); + kfree(handle); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + ioasid_t pasid = domain->mm->pasid; + struct device *dev = handle->dev; + + mutex_lock(&iommu_sva_lock); + if (--domain->users == 0) { + iommu_detach_device_pasid(domain, dev, pasid); + iommu_domain_free(domain); + } + mutex_unlock(&iommu_sva_lock); + kfree(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + + return domain->mm->pasid; +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6a1cd2018e30..c9da0a1bb3b8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2751,97 +2751,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); -/** - * iommu_sva_bind_device() - Bind a process address space to a device - * @dev: the device - * @mm: the mm to bind, caller must hold a reference to it - * - * Create a bond between device and address space, allowing the device to access - * the mm using the returned PASID. If a bond already exists between @device and - * @mm, it is returned and an additional reference is taken. Caller must call - * iommu_sva_unbind_device() to release each reference. - * - * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to - * initialize the required SVA features. - * - * On error, returns an ERR_PTR value. - */ -struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) -{ - struct iommu_group *group; - struct iommu_sva *handle = ERR_PTR(-EINVAL); - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_bind) - return ERR_PTR(-ENODEV); - - group = iommu_group_get(dev); - if (!group) - return ERR_PTR(-ENODEV); - - /* Ensure device count and domain don't change while we're binding */ - mutex_lock(&group->mutex); - - /* - * To keep things simple, SVA currently doesn't support IOMMU groups - * with more than one device. Existing SVA-capable systems are not - * affected by the problems that required IOMMU groups (lack of ACS - * isolation, device ID aliasing and other hardware issues). - */ - if (iommu_group_device_count(group) != 1) - goto out_unlock; - - handle = ops->sva_bind(dev, mm); - -out_unlock: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return handle; -} -EXPORT_SYMBOL_GPL(iommu_sva_bind_device); - -/** - * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device - * @handle: the handle returned by iommu_sva_bind_device() - * - * Put reference to a bond between device and address space. The device should - * not be issuing any more transaction for this PASID. All outstanding page - * requests for this PASID must have been flushed to the IOMMU. - */ -void iommu_sva_unbind_device(struct iommu_sva *handle) -{ - struct iommu_group *group; - struct device *dev = handle->dev; - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_unbind) - return; - - group = iommu_group_get(dev); - if (!group) - return; - - mutex_lock(&group->mutex); - ops->sva_unbind(handle); - mutex_unlock(&group->mutex); - - iommu_group_put(group); -} -EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); - -u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - const struct iommu_ops *ops = dev_iommu_ops(handle->dev); - - if (!ops->sva_get_pasid) - return IOMMU_PASID_INVALID; - - return ops->sva_get_pasid(handle); -} -EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); - /* * Changes the default domain of an iommu group that has *only* one device * diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 776baa375967..bee5659d07eb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -645,6 +645,7 @@ struct iommu_fwspec { */ struct iommu_sva { struct device *dev; + struct iommu_domain *domain; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, @@ -686,11 +687,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -struct iommu_sva *iommu_sva_bind_device(struct device *dev, - struct mm_struct *mm); -void iommu_sva_unbind_device(struct iommu_sva *handle); -u32 iommu_sva_get_pasid(struct iommu_sva *handle); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -1026,21 +1022,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - -static inline void iommu_sva_unbind_device(struct iommu_sva *handle) -{ -} - -static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - return IOMMU_PASID_INVALID; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; @@ -1154,4 +1135,26 @@ static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_m #endif /* CONFIG_IOMMU_DMA */ +#ifdef CONFIG_IOMMU_SVA +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm); +void iommu_sva_unbind_device(struct iommu_sva *handle); +u32 iommu_sva_get_pasid(struct iommu_sva *handle); +#else +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} +#endif /* CONFIG_IOMMU_SVA */ + #endif /* __LINUX_IOMMU_H */ From 1c263576f4735e063e234fa5f43fd3046d36b5b3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:14 +0800 Subject: [PATCH 0889/4122] iommu: Remove SVA related callbacks from iommu ops These ops'es have been deprecated. There's no need for them anymore. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 40 --------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 -- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 16 ------ drivers/iommu/intel/iommu.c | 3 -- drivers/iommu/intel/iommu.h | 3 -- drivers/iommu/intel/svm.c | 49 ------------------- include/linux/iommu.h | 7 --- 7 files changed, 121 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 2d188d12419e..9541afbba73c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -344,11 +344,6 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) if (!bond) return ERR_PTR(-ENOMEM); - /* Allocate a PASID for this mm if necessary */ - ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1); - if (ret) - goto err_free_bond; - bond->mm = mm; bond->sva.dev = dev; refcount_set(&bond->refs, 1); @@ -367,41 +362,6 @@ err_free_bond: return ERR_PTR(ret); } -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) -{ - struct iommu_sva *handle; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return ERR_PTR(-EINVAL); - - mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); - mutex_unlock(&sva_lock); - return handle; -} - -void arm_smmu_sva_unbind(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - mutex_lock(&sva_lock); - if (refcount_dec_and_test(&bond->refs)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - return bond->mm->pasid; -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index eed2eb8effa3..891e87ea54db 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2863,9 +2863,6 @@ static struct iommu_ops arm_smmu_ops = { .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, - .sva_bind = arm_smmu_sva_bind, - .sva_unbind = arm_smmu_sva_unbind, - .sva_get_pasid = arm_smmu_sva_get_pasid, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 5aa853e98d38..8d772ea8a583 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -754,9 +754,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm); -void arm_smmu_sva_unbind(struct iommu_sva *handle); -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle); void arm_smmu_sva_notifier_synchronize(void); struct iommu_domain *arm_smmu_sva_domain_alloc(void); void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, @@ -792,19 +789,6 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master return false; } -static inline struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) -{ - return ERR_PTR(-ENODEV); -} - -static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {} - -static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) -{ - return IOMMU_PASID_INVALID; -} - static inline void arm_smmu_sva_notifier_synchronize(void) {} static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7b67e431dd36..5a41b10593b7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4751,9 +4751,6 @@ const struct iommu_ops intel_iommu_ops = { .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM - .sva_bind = intel_svm_bind, - .sva_unbind = intel_svm_unbind, - .sva_get_pasid = intel_svm_get_pasid, .page_response = intel_svm_page_response, #endif .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 252fa344f88a..251a609fdce3 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -748,9 +748,6 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm); -void intel_svm_unbind(struct iommu_sva *handle); -u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 86c8ea0d9635..fceae9387018 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -296,14 +296,6 @@ out: return 0; } -static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm) -{ - ioasid_t max_pasid = dev_is_pci(dev) ? - pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - - return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm) @@ -771,47 +763,6 @@ prq_advance: return IRQ_RETVAL(handled); } -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct iommu_sva *sva; - int ret; - - mutex_lock(&pasid_mutex); - ret = intel_svm_alloc_pasid(dev, mm); - if (ret) { - mutex_unlock(&pasid_mutex); - return ERR_PTR(ret); - } - - sva = intel_svm_bind_mm(iommu, dev, mm); - mutex_unlock(&pasid_mutex); - - return sva; -} - -void intel_svm_unbind(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev = to_intel_svm_dev(sva); - - mutex_lock(&pasid_mutex); - intel_svm_unbind_mm(sdev->dev, sdev->pasid); - mutex_unlock(&pasid_mutex); -} - -u32 intel_svm_get_pasid(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev; - u32 pasid; - - mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); - pasid = sdev->pasid; - mutex_unlock(&pasid_mutex); - - return pasid; -} - int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bee5659d07eb..c337ef1c97bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -227,9 +227,6 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. - * @sva_bind: Bind process address space to device - * @sva_unbind: Unbind process address space from device - * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain @@ -263,10 +260,6 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm); - void (*sva_unbind)(struct iommu_sva *handle); - u32 (*sva_get_pasid)(struct iommu_sva *handle); - int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); From 8cc93159f91960b4812ea48887e9e7501babc95a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:15 +0800 Subject: [PATCH 0890/4122] iommu: Prepare IOMMU domain for IOPF This adds some mechanisms around the iommu_domain so that the I/O page fault handling framework could route a page fault to the domain and call the fault handler from it. Add pointers to the page fault handler and its private data in struct iommu_domain. The fault handler will be called with the private data as a parameter once a page fault is routed to the domain. Any kernel component which owns an iommu domain could install handler and its private parameter so that the page fault could be further routed and handled. This also prepares the SVA implementation to be the first consumer of the per-domain page fault handling model. The I/O page fault handler for SVA is copied to the SVA file with mmget_not_zero() added before mmap_read_lock(). Suggested-by: Jean-Philippe Brucker Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 7 +++++ drivers/iommu/iommu-sva-lib.c | 58 +++++++++++++++++++++++++++++++++++ drivers/iommu/iommu-sva-lib.h | 8 +++++ drivers/iommu/iommu.c | 4 +++ include/linux/iommu.h | 3 ++ 5 files changed, 80 insertions(+) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 1df8c1dcae77..aee9e033012f 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -181,6 +181,13 @@ static void iopf_handle_group(struct work_struct *work) * request completes, outstanding faults will have been dealt with by the time * the PASID is freed. * + * Any valid page fault will be eventually routed to an iommu domain and the + * page fault handler installed there will get called. The users of this + * handling framework should guarantee that the iommu domain could only be + * freed after the device has stopped generating page faults (or the iommu + * hardware has been set to block the page faults) and the pending page faults + * have been flushed. + * * Return: 0 on success and <0 on error. */ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index e425573a1787..089fd61ff453 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -180,3 +180,61 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) return domain->mm->pasid; } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); + +/* + * I/O page fault handler for SVA + */ +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + vm_fault_t ret; + struct vm_area_struct *vma; + struct mm_struct *mm = data; + unsigned int access_flags = 0; + unsigned int fault_flags = FAULT_FLAG_REMOTE; + struct iommu_fault_page_request *prm = &fault->prm; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; + + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) + return status; + + if (!mmget_not_zero(mm)) + return status; + + mmap_read_lock(mm); + + vma = find_extend_vma(mm, prm->addr); + if (!vma) + /* Unmapped area */ + goto out_put_mm; + + if (prm->perm & IOMMU_FAULT_PERM_READ) + access_flags |= VM_READ; + + if (prm->perm & IOMMU_FAULT_PERM_WRITE) { + access_flags |= VM_WRITE; + fault_flags |= FAULT_FLAG_WRITE; + } + + if (prm->perm & IOMMU_FAULT_PERM_EXEC) { + access_flags |= VM_EXEC; + fault_flags |= FAULT_FLAG_INSTRUCTION; + } + + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) + fault_flags |= FAULT_FLAG_USER; + + if (access_flags & ~vma->vm_flags) + /* Access fault */ + goto out_put_mm; + + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : + IOMMU_PAGE_RESP_SUCCESS; + +out_put_mm: + mmap_read_unlock(mm); + mmput(mm); + + return status; +} diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 8909ea1094e3..1b3ace4b5863 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -26,6 +26,8 @@ int iopf_queue_flush_dev(struct device *dev); struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data); #else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) @@ -63,5 +65,11 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } + +static inline enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + return IOMMU_PAGE_RESP_INVALID; +} #endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c9da0a1bb3b8..9e0fb18e1b34 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -33,6 +33,8 @@ #include "dma-iommu.h" +#include "iommu-sva-lib.h" + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); @@ -3309,6 +3311,8 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, domain->type = IOMMU_DOMAIN_SVA; mmgrab(mm); domain->mm = mm; + domain->iopf_handler = iommu_sva_handle_iopf; + domain->fault_data = mm; return domain; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c337ef1c97bc..7d2648058e43 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -98,6 +98,9 @@ struct iommu_domain { unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; + enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, + void *data); + void *fault_data; union { struct { iommu_fault_handler_t handler; From 4bb4211e48fbfb392bb07168b75b1a92832b62f5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:16 +0800 Subject: [PATCH 0891/4122] iommu: Per-domain I/O page fault handling Tweak the I/O page fault handling framework to route the page faults to the domain and call the page fault handler retrieved from the domain. This makes the I/O page fault handling framework possible to serve more usage scenarios as long as they have an IOMMU domain and install a page fault handler in it. Some unused functions are also removed to avoid dead code. The iommu_get_domain_for_dev_pasid() which retrieves attached domain for a {device, PASID} pair is used. It will be used by the page fault handling framework which knows {device, PASID} reported from the iommu driver. We have a guarantee that the SVA domain doesn't go away during IOPF handling, because unbind() won't free the domain until all the pending page requests have been flushed from the pipeline. The drivers either call iopf_queue_flush_dev() explicitly, or in stall case, the device driver is required to flush all DMAs including stalled transactions before calling unbind(). This also renames iopf_handle_group() to iopf_handler() to avoid confusing. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 68 +++++--------------------------------- 1 file changed, 9 insertions(+), 59 deletions(-) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index aee9e033012f..d046d89cec55 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, return iommu_page_response(dev, &resp); } -static enum iommu_page_response_code -iopf_handle_single(struct iopf_fault *iopf) -{ - vm_fault_t ret; - struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned int access_flags = 0; - unsigned int fault_flags = FAULT_FLAG_REMOTE; - struct iommu_fault_page_request *prm = &iopf->fault.prm; - enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; - - if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) - return status; - - mm = iommu_sva_find(prm->pasid); - if (IS_ERR_OR_NULL(mm)) - return status; - - mmap_read_lock(mm); - - vma = find_extend_vma(mm, prm->addr); - if (!vma) - /* Unmapped area */ - goto out_put_mm; - - if (prm->perm & IOMMU_FAULT_PERM_READ) - access_flags |= VM_READ; - - if (prm->perm & IOMMU_FAULT_PERM_WRITE) { - access_flags |= VM_WRITE; - fault_flags |= FAULT_FLAG_WRITE; - } - - if (prm->perm & IOMMU_FAULT_PERM_EXEC) { - access_flags |= VM_EXEC; - fault_flags |= FAULT_FLAG_INSTRUCTION; - } - - if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) - fault_flags |= FAULT_FLAG_USER; - - if (access_flags & ~vma->vm_flags) - /* Access fault */ - goto out_put_mm; - - ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); - status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : - IOMMU_PAGE_RESP_SUCCESS; - -out_put_mm: - mmap_read_unlock(mm); - mmput(mm); - - return status; -} - -static void iopf_handle_group(struct work_struct *work) +static void iopf_handler(struct work_struct *work) { struct iopf_group *group; + struct iommu_domain *domain; struct iopf_fault *iopf, *next; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); + domain = iommu_get_domain_for_dev_pasid(group->dev, + group->last_fault.fault.prm.pasid, 0); + if (!domain || !domain->iopf_handler) + status = IOMMU_PAGE_RESP_INVALID; list_for_each_entry_safe(iopf, next, &group->faults, list) { /* @@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work) * faults in the group if there is an error. */ if (status == IOMMU_PAGE_RESP_SUCCESS) - status = iopf_handle_single(iopf); + status = domain->iopf_handler(&iopf->fault, + domain->fault_data); if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) @@ -242,7 +192,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); list_add(&group->last_fault.list, &group->faults); - INIT_WORK(&group->work, iopf_handle_group); + INIT_WORK(&group->work, iopf_handler); /* See if we have partial faults for this group */ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { From 757636ed2607a3269cd2764e3e4a0480384c6c26 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 31 Oct 2022 08:59:17 +0800 Subject: [PATCH 0892/4122] iommu: Rename iommu-sva-lib.{c,h} Rename iommu-sva-lib.c[h] to iommu-sva.c[h] as it contains all code for SVA implementation in iommu core. Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao Tested-by: Tony Zhu Link: https://lore.kernel.org/r/20221031005917.45690-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/Makefile | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/intel/svm.c | 2 +- drivers/iommu/io-pgfault.c | 2 +- drivers/iommu/{iommu-sva-lib.c => iommu-sva.c} | 2 +- drivers/iommu/{iommu-sva-lib.h => iommu-sva.h} | 6 +++--- drivers/iommu/iommu.c | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) rename drivers/iommu/{iommu-sva-lib.c => iommu-sva.c} (99%) rename drivers/iommu/{iommu-sva-lib.h => iommu-sva.h} (95%) diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index cc9f381013c3..7fbf6a337662 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -28,6 +28,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9541afbba73c..a5a63b1c947e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -10,7 +10,7 @@ #include #include "arm-smmu-v3.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" #include "../../io-pgtable-arm.h" struct arm_smmu_mmu_notifier { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 891e87ea54db..94a2e53368af 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -29,7 +29,7 @@ #include "arm-smmu-v3.h" #include "../../dma-iommu.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5a41b10593b7..a934a46bb9e6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,7 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index fceae9387018..f32de15da61a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -24,7 +24,7 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index d046d89cec55..e5b8b9110c13 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -11,7 +11,7 @@ #include #include -#include "iommu-sva-lib.h" +#include "iommu-sva.h" /** * struct iopf_queue - IO Page Fault queue diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva.c similarity index 99% rename from drivers/iommu/iommu-sva-lib.c rename to drivers/iommu/iommu-sva.c index 089fd61ff453..24bf9b2b58aa 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva.c @@ -6,7 +6,7 @@ #include #include -#include "iommu-sva-lib.h" +#include "iommu-sva.h" static DEFINE_MUTEX(iommu_sva_lock); static DECLARE_IOASID_SET(iommu_sva_pasid); diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva.h similarity index 95% rename from drivers/iommu/iommu-sva-lib.h rename to drivers/iommu/iommu-sva.h index 1b3ace4b5863..7215a761b962 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva.h @@ -2,8 +2,8 @@ /* * SVA library for IOMMU drivers */ -#ifndef _IOMMU_SVA_LIB_H -#define _IOMMU_SVA_LIB_H +#ifndef _IOMMU_SVA_H +#define _IOMMU_SVA_H #include #include @@ -72,4 +72,4 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) return IOMMU_PAGE_RESP_INVALID; } #endif /* CONFIG_IOMMU_SVA */ -#endif /* _IOMMU_SVA_LIB_H */ +#endif /* _IOMMU_SVA_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9e0fb18e1b34..c50f68b2b656 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -33,7 +33,7 @@ #include "dma-iommu.h" -#include "iommu-sva-lib.h" +#include "iommu-sva.h" static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); From 7eb99841f340b80be0d0973b0deb592d75fb8928 Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Wed, 2 Nov 2022 07:35:53 +0100 Subject: [PATCH 0893/4122] iommu/rockchip: fix permission bits in page table entries v2 As pointed out in the corresponding downstream fix [0], the permission bits of the page table entries are compatible between v1 and v2 of the IOMMU. This is in contrast to the current mainline code that incorrectly assumes that the read and write permission bits are switched. Fix the permission bits by reusing the v1 bit defines. [0] https://github.com/rockchip-linux/kernel/commit/e3bc123a2260145e34b57454da3db0edd117eb8e Fixes: c55356c534aa ("iommu: rockchip: Add support for iommu v2") Signed-off-by: Michael Riesch Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221102063553.2464161-1-michael.riesch@wolfvision.net Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a3fc59b814ab..a68eadd64f38 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -280,19 +280,17 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) * 11:9 - Page address bit 34:32 * 8:4 - Page address bit 39:35 * 3 - Security - * 2 - Readable - * 1 - Writable + * 2 - Writable + * 1 - Readable * 0 - 1 if Page @ Page address is valid */ -#define RK_PTE_PAGE_READABLE_V2 BIT(2) -#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) static u32 rk_mk_pte_v2(phys_addr_t page, int prot) { u32 flags = 0; - flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; - flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; return rk_mk_dte_v2(page) | flags; } From fdaeb224e2bf747942653e4f70226fcfe60fbf73 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 26 Oct 2022 12:16:13 -0700 Subject: [PATCH 0894/4122] crypto: tcrypt - Use pr_cont to print test results For some test cases, a line break gets inserted between the test banner and the results. For example, with mode=211 this is the output: [...] testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption [...] test 0 (160 bit key, 16 byte blocks): [...] 1 operation in 2373 cycles (16 bytes) --snip-- [...] testing speed of gcm(aes) (generic-gcm-aesni) encryption [...] test 0 (128 bit key, 16 byte blocks): [...] 1 operation in 2338 cycles (16 bytes) Similar behavior is seen in the following cases as well: modprobe tcrypt mode=212 modprobe tcrypt mode=213 modprobe tcrypt mode=221 modprobe tcrypt mode=300 sec=1 modprobe tcrypt mode=400 sec=1 This doesn't happen with mode=215: [...] tcrypt: testing speed of multibuffer rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption [...] tcrypt: test 0 (160 bit key, 16 byte blocks): 1 operation in 2215 cycles (16 bytes) --snip-- [...] tcrypt: testing speed of multibuffer gcm(aes) (generic-gcm-aesni) encryption [...] tcrypt: test 0 (128 bit key, 16 byte blocks): 1 operation in 2191 cycles (16 bytes) This print inconsistency is because printk() is used instead of pr_cont() in a few places. Change these to be pr_cont(). checkpatch warns that pr_cont() shouldn't be used. This can be ignored in this context as tcrypt already uses pr_cont(). Signed-off-by: Anirudh Venkataramanan Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 3f7dc94a63e0..2822405a0d45 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -506,8 +506,8 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) out: if (ret == 0) - printk("1 operation in %lu cycles (%d bytes)\n", - (cycles + 4) / 8, blen); + pr_cont("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / 8, blen); return ret; } @@ -727,8 +727,8 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, return ret; } - printk("%6u opers/sec, %9lu bytes/sec\n", - bcount / secs, ((long)bcount * blen) / secs); + pr_cont("%6u opers/sec, %9lu bytes/sec\n", + bcount / secs, ((long)bcount * blen) / secs); return 0; } From 837a99f59043c3505d69761575172da1d09220b5 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 26 Oct 2022 12:16:14 -0700 Subject: [PATCH 0895/4122] crypto: tcrypt - Use pr_info/pr_err Currently, there's mixed use of printk() and pr_info()/pr_err(). The latter prints the module name (because pr_fmt() is defined so) but the former does not. As a result there's inconsistency in the printed output. For example: modprobe mode=211: [...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes) [...] test 1 (160 bit key, 64 byte blocks): 1 operation in 2336 cycles (64 bytes) modprobe mode=215: [...] tcrypt: test 0 (160 bit key, 16 byte blocks): 1 operation in 2173 cycles (16 bytes) [...] tcrypt: test 1 (160 bit key, 64 byte blocks): 1 operation in 2241 cycles (64 bytes) Replace all instances of printk() with pr_info()/pr_err() so that the module name is printed consistently. Signed-off-by: Anirudh Venkataramanan Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 2822405a0d45..78b236bc9cd8 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -575,8 +575,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } crypto_init_wait(&wait); - printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, - get_driver_name(crypto_aead, tfm), e); + pr_info("\ntesting speed of %s (%s) %s\n", algo, + get_driver_name(crypto_aead, tfm), e); req = aead_request_alloc(tfm, GFP_KERNEL); if (!req) { @@ -624,8 +624,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, memset(iv, 0xff, iv_len); crypto_aead_clear_flags(tfm, ~0); - printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", - i, *keysize * 8, bs); + pr_info("test %u (%d bit key, %d byte blocks): ", + i, *keysize * 8, bs); memset(tvmem[0], 0xff, PAGE_SIZE); @@ -877,8 +877,8 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs, return; } - printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo, - get_driver_name(crypto_ahash, tfm)); + pr_info("\ntesting speed of async %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) { pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm), @@ -2885,7 +2885,7 @@ static int __init tcrypt_mod_init(void) err = do_test(alg, type, mask, mode, num_mb); if (err) { - printk(KERN_ERR "tcrypt: one or more tests failed!\n"); + pr_err("one or more tests failed!\n"); goto err_free_tv; } else { pr_debug("all tests passed\n"); From a2ef563000aff04352a4aded7b2d2bf19a1674bf Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 26 Oct 2022 12:16:15 -0700 Subject: [PATCH 0896/4122] crypto: tcrypt - Drop module name from print string The pr_fmt() define includes KBUILD_MODNAME, and so there's no need for pr_err() to also print it. Drop module name from the print string. Signed-off-by: Anirudh Venkataramanan Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 78b236bc9cd8..40e72ec0f537 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1329,8 +1329,7 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { - pr_err("tcrypt: skcipher: Failed to allocate request for %s\n", - algo); + pr_err("skcipher: Failed to allocate request for %s\n", algo); goto out; } From 3513828cb8f6db714ee48b1559e6253a57ecf1f6 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 26 Oct 2022 12:16:16 -0700 Subject: [PATCH 0897/4122] crypto: tcrypt - Drop leading newlines from prints The top level print banners have a leading newline. It's not entirely clear why this exists, but it makes it harder to parse tcrypt test output using a script. Drop said newlines. tcrypt output before this patch: [...] testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption [...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes) tcrypt output with this patch: [...] testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption [...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes) Signed-off-by: Anirudh Venkataramanan Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 40e72ec0f537..b096ae901aa8 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -324,7 +324,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, crypto_req_done, &data[i].wait); } - pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo, + pr_info("testing speed of multibuffer %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); i = 0; @@ -575,7 +575,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } crypto_init_wait(&wait); - pr_info("\ntesting speed of %s (%s) %s\n", algo, + pr_info("testing speed of %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); req = aead_request_alloc(tfm, GFP_KERNEL); @@ -877,7 +877,7 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs, return; } - pr_info("\ntesting speed of async %s (%s)\n", algo, + pr_info("testing speed of async %s (%s)\n", algo, get_driver_name(crypto_ahash, tfm)); if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) { @@ -1117,7 +1117,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, crypto_init_wait(&data[i].wait); } - pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo, + pr_info("testing speed of multibuffer %s (%s) %s\n", algo, get_driver_name(crypto_skcipher, tfm), e); i = 0; @@ -1324,7 +1324,7 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, return; } - pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync", + pr_info("testing speed of %s %s (%s) %s\n", async ? "async" : "sync", algo, get_driver_name(crypto_skcipher, tfm), e); req = skcipher_request_alloc(tfm, GFP_KERNEL); From e1fa51aa2b04830495c1fc9d3d5dee4a4419b70d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:53 +0800 Subject: [PATCH 0898/4122] crypto: arm64/sm3 - raise the priority of the CE implementation Raise the priority of the sm3-ce algorithm from 200 to 400, this is to make room for the implementation of sm3-neon. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm3-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index ee98954ae8ca..54bf6ebcfffb 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -84,7 +84,7 @@ static struct shash_alg sm3_alg = { .base.cra_driver_name = "sm3-ce", .base.cra_blocksize = SM3_BLOCK_SIZE, .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, + .base.cra_priority = 400, }; static int __init sm3_ce_mod_init(void) From a41b2129461f6c88e087ca9a6e2fde34cb6deb48 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:54 +0800 Subject: [PATCH 0899/4122] crypto: arm64/sm3 - add NEON assembly implementation This patch adds the NEON acceleration implementation of the SM3 hash algorithm. The main algorithm is based on SM3 NEON accelerated work of the libgcrypt project. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 326 mode of tcrypt, and compares the performance data of sm3-generic and sm3-ce. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: update-size | 16 64 256 1024 2048 4096 8192 ---------------+-------------------------------------------------------- sm3-generic | 185.24 221.28 301.26 307.43 300.83 308.82 308.91 sm3-neon | 171.81 220.20 322.94 339.28 334.09 343.61 343.87 sm3-ce | 227.48 333.48 502.62 527.87 520.45 534.91 535.40 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 11 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm3-neon-core.S | 600 ++++++++++++++++++++++++++++++ arch/arm64/crypto/sm3-neon-glue.c | 103 +++++ 4 files changed, 717 insertions(+) create mode 100644 arch/arm64/crypto/sm3-neon-core.S create mode 100644 arch/arm64/crypto/sm3-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 8bd80508a710..4b121dc0cfba 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -96,6 +96,17 @@ config CRYPTO_SHA3_ARM64 Architecture: arm64 using: - ARMv8.2 Crypto Extensions +config CRYPTO_SM3_NEON + tristate "Hash functions: SM3 (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) + + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions + config CRYPTO_SM3_ARM64_CE tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 24bb0c4610de..087f1625e775 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -17,6 +17,9 @@ sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o +obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o +sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o + obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S new file mode 100644 index 000000000000..3e3b4e5c736f --- /dev/null +++ b/arch/arm64/crypto/sm3-neon-core.S @@ -0,0 +1,600 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * sm3-neon-core.S - SM3 secure hash using NEON instructions + * + * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64 + * + * Copyright (C) 2021 Jussi Kivilinna + * Copyright (c) 2022 Tianjia Zhang + */ + +#include +#include + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) + +#define STACK_W (0) +#define STACK_SIZE (STACK_W + STACK_W_SIZE) + +/* Register macros */ + +#define RSTATE x0 +#define RDATA x1 +#define RNBLKS x2 +#define RKPTR x28 +#define RFRAME x29 + +#define ra w3 +#define rb w4 +#define rc w5 +#define rd w6 +#define re w7 +#define rf w8 +#define rg w9 +#define rh w10 + +#define t0 w11 +#define t1 w12 +#define t2 w13 +#define t3 w14 +#define t4 w15 +#define t5 w16 +#define t6 w17 + +#define k_even w19 +#define k_odd w20 + +#define addr0 x21 +#define addr1 x22 + +#define s0 w23 +#define s1 w24 +#define s2 w25 +#define s3 w26 + +#define W0 v0 +#define W1 v1 +#define W2 v2 +#define W3 v3 +#define W4 v4 +#define W5 v5 + +#define XTMP0 v6 +#define XTMP1 v7 +#define XTMP2 v16 +#define XTMP3 v17 +#define XTMP4 v18 +#define XTMP5 v19 +#define XTMP6 v20 + +/* Helper macros. */ + +#define _(...) /*_*/ + +#define clear_vec(x) \ + movi x.8h, #0; + +#define rolw(o, a, n) \ + ror o, a, #(32 - n); + +/* Round function macros. */ + +#define GG1_1(x, y, z, o, t) \ + eor o, x, y; +#define GG1_2(x, y, z, o, t) \ + eor o, o, z; +#define GG1_3(x, y, z, o, t) + +#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t) +#define FF1_2(x, y, z, o, t) +#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t) + +#define GG2_1(x, y, z, o, t) \ + bic o, z, x; +#define GG2_2(x, y, z, o, t) \ + and t, y, x; +#define GG2_3(x, y, z, o, t) \ + eor o, o, t; + +#define FF2_1(x, y, z, o, t) \ + eor o, x, y; +#define FF2_2(x, y, z, o, t) \ + and t, x, y; \ + and o, o, z; +#define FF2_3(x, y, z, o, t) \ + eor o, o, t; + +#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + K_LOAD(round); \ + ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \ + rolw(t0, a, 12); /* rol(a, 12) => t0 */ \ + IOP(1, iop_param); \ + FF##i##_1(a, b, c, t1, t2); \ + ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \ + add k, k, e; \ + IOP(2, iop_param); \ + GG##i##_1(e, f, g, t3, t4); \ + FF##i##_2(a, b, c, t1, t2); \ + IOP(3, iop_param); \ + add k, k, t0; \ + add h, h, t5; \ + add d, d, t6; /* w1w2 + d => d */ \ + IOP(4, iop_param); \ + rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \ + GG##i##_2(e, f, g, t3, t4); \ + add h, h, k; /* h + w1 + k => h */ \ + IOP(5, iop_param); \ + FF##i##_3(a, b, c, t1, t2); \ + eor t0, t0, k; /* k ^ t0 => t0 */ \ + GG##i##_3(e, f, g, t3, t4); \ + add d, d, t1; /* FF(a,b,c) + d => d */ \ + IOP(6, iop_param); \ + add t3, t3, h; /* GG(e,f,g) + h => t3 */ \ + rolw(b, b, 9); /* rol(b, 9) => b */ \ + eor h, t3, t3, ror #(32-9); \ + IOP(7, iop_param); \ + add d, d, t0; /* t0 + d => d */ \ + rolw(f, f, 19); /* rol(f, 19) => f */ \ + IOP(8, iop_param); \ + eor h, h, t3, ror #(32-17); /* P0(t3) => h */ + +#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) + +#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) + +#define KL(round) \ + ldp k_even, k_odd, [RKPTR, #(4*(round))]; + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4)) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4)) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16) + +/* Input block loading. + * Interleaving within round function needed for in-order CPUs. */ +#define LOAD_W_VEC_1_1() \ + add addr0, sp, #IW_W1_ADDR(0, 0); +#define LOAD_W_VEC_1_2() \ + add addr1, sp, #IW_W1_ADDR(4, 0); +#define LOAD_W_VEC_1_3() \ + ld1 {W0.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_4() \ + ld1 {W1.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_5() \ + ld1 {W2.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_6() \ + ld1 {W3.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_7() \ + rev32 XTMP0.16b, W0.16b; +#define LOAD_W_VEC_1_8() \ + rev32 XTMP1.16b, W1.16b; +#define LOAD_W_VEC_2_1() \ + rev32 XTMP2.16b, W2.16b; +#define LOAD_W_VEC_2_2() \ + rev32 XTMP3.16b, W3.16b; +#define LOAD_W_VEC_2_3() \ + eor XTMP4.16b, XTMP1.16b, XTMP0.16b; +#define LOAD_W_VEC_2_4() \ + eor XTMP5.16b, XTMP2.16b, XTMP1.16b; +#define LOAD_W_VEC_2_5() \ + st1 {XTMP0.16b}, [addr0], #16; +#define LOAD_W_VEC_2_6() \ + st1 {XTMP4.16b}, [addr0]; \ + add addr0, sp, #IW_W1_ADDR(8, 0); +#define LOAD_W_VEC_2_7() \ + eor XTMP6.16b, XTMP3.16b, XTMP2.16b; +#define LOAD_W_VEC_2_8() \ + ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */ +#define LOAD_W_VEC_3_1() \ + mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */ +#define LOAD_W_VEC_3_2() \ + st1 {XTMP1.16b}, [addr1], #16; +#define LOAD_W_VEC_3_3() \ + st1 {XTMP5.16b}, [addr1]; \ + ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */ +#define LOAD_W_VEC_3_4() \ + ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */ +#define LOAD_W_VEC_3_5() \ + ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */ +#define LOAD_W_VEC_3_6() \ + st1 {XTMP2.16b}, [addr0], #16; +#define LOAD_W_VEC_3_7() \ + st1 {XTMP6.16b}, [addr0]; +#define LOAD_W_VEC_3_8() \ + ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */ + +#define LOAD_W_VEC_1(iop_num, ...) \ + LOAD_W_VEC_1_##iop_num() +#define LOAD_W_VEC_2(iop_num, ...) \ + LOAD_W_VEC_2_##iop_num() +#define LOAD_W_VEC_3(iop_num, ...) \ + LOAD_W_VEC_3_##iop_num() + +/* Message scheduling. Note: 3 words per vector register. + * Interleaving within round function needed for in-order CPUs. */ +#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + /* Load (w[i - 13]) => XTMP5 */ \ + ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */ +#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP5.16b, w1.16b, w1.16b, #12; +#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */ +#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP5.16b, XTMP5.16b, w2.16b, #12; +#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 9] == w3 */ \ + /* W3 ^ XTMP0 => XTMP0 */ \ + eor XTMP0.16b, XTMP0.16b, w3.16b; +#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + /* rol(XTMP5, 7) => XTMP1 */ \ + add addr0, sp, #XW_W1_ADDR((round), 0); \ + shl XTMP2.4s, w5.4s, #15; +#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \ + shl XTMP1.4s, XTMP5.4s, #7; +#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP2.4s, w5.4s, #(32-15); +#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP1.4s, XTMP5.4s, #(32-7); +#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \ + eor XTMP0.16b, XTMP0.16b, XTMP2.16b; +#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 6] == W4 */ \ + /* W4 ^ XTMP1 => XTMP1 */ \ + eor XTMP1.16b, XTMP1.16b, w4.16b; +#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \ + /* P1(XTMP0) ^ XTMP1 => W0 */ \ + shl XTMP3.4s, XTMP0.4s, #15; +#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \ + shl XTMP4.4s, XTMP0.4s, #23; +#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, XTMP1.16b, XTMP0.16b; +#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP3.4s, XTMP0.4s, #(32-15); +#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP4.4s, XTMP0.4s, #(32-23); +#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, w0.16b, XTMP3.16b; +#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 3]) => XTMP2 */ \ + ext XTMP2.16b, w4.16b, w4.16b, #12; +#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, w0.16b, XTMP4.16b; +#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP2.16b, XTMP2.16b, w5.16b, #12; +#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \ + /* W1 ^ W2 => XTMP3 */ \ + eor XTMP3.16b, XTMP2.16b, w0.16b; +#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5) +#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \ + st1 {XTMP2.16b-XTMP3.16b}, [addr0]; +#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5) + +#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5) +#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5) +#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5) + +#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0) +#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0) +#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0) + +#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1) +#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1) +#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1) + +#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2) +#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2) +#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2) + +#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3) +#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3) +#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3) + +#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4) +#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4) +#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4) + + + /* + * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'. + * + * void sm3_neon_transform(struct sm3_state *sst, u8 const *src, + * int blocks) + */ + .text +.align 3 +SYM_FUNC_START(sm3_neon_transform) + ldp ra, rb, [RSTATE, #0] + ldp rc, rd, [RSTATE, #8] + ldp re, rf, [RSTATE, #16] + ldp rg, rh, [RSTATE, #24] + + stp x28, x29, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + mov RFRAME, sp + + sub addr0, sp, #STACK_SIZE + adr_l RKPTR, .LKtable + and sp, addr0, #(~63) + + /* Preload first block. */ + LOAD_W_VEC_1(1, 0) + LOAD_W_VEC_1(2, 0) + LOAD_W_VEC_1(3, 0) + LOAD_W_VEC_1(4, 0) + LOAD_W_VEC_1(5, 0) + LOAD_W_VEC_1(6, 0) + LOAD_W_VEC_1(7, 0) + LOAD_W_VEC_1(8, 0) + LOAD_W_VEC_2(1, 0) + LOAD_W_VEC_2(2, 0) + LOAD_W_VEC_2(3, 0) + LOAD_W_VEC_2(4, 0) + LOAD_W_VEC_2(5, 0) + LOAD_W_VEC_2(6, 0) + LOAD_W_VEC_2(7, 0) + LOAD_W_VEC_2(8, 0) + LOAD_W_VEC_3(1, 0) + LOAD_W_VEC_3(2, 0) + LOAD_W_VEC_3(3, 0) + LOAD_W_VEC_3(4, 0) + LOAD_W_VEC_3(5, 0) + LOAD_W_VEC_3(6, 0) + LOAD_W_VEC_3(7, 0) + LOAD_W_VEC_3(8, 0) + +.balign 16 +.Loop: + /* Transform 0-3 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0) + + /* Transform 4-7 + Precalc 12-14 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12) + + /* Transform 8-11 + Precalc 12-17 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15) + + /* Transform 12-14 + Precalc 18-20 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18) + + /* Transform 15-17 + Precalc 21-23 */ + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21) + + /* Transform 18-20 + Precalc 24-26 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24) + + /* Transform 21-23 + Precalc 27-29 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27) + + /* Transform 24-26 + Precalc 30-32 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30) + + /* Transform 27-29 + Precalc 33-35 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33) + + /* Transform 30-32 + Precalc 36-38 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36) + + /* Transform 33-35 + Precalc 39-41 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39) + + /* Transform 36-38 + Precalc 42-44 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42) + + /* Transform 39-41 + Precalc 45-47 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45) + + /* Transform 42-44 + Precalc 48-50 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48) + + /* Transform 45-47 + Precalc 51-53 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51) + + /* Transform 48-50 + Precalc 54-56 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54) + + /* Transform 51-53 + Precalc 57-59 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57) + + /* Transform 54-56 + Precalc 60-62 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60) + + /* Transform 57-59 + Precalc 63 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63) + + /* Transform 60 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _) + subs RNBLKS, RNBLKS, #1 + b.eq .Lend + + /* Transform 61-63 + Preload next block */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _) + ldp s0, s1, [RSTATE, #0] + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _) + ldp s2, s3, [RSTATE, #8] + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _) + + /* Update the chaining variables. */ + eor ra, ra, s0 + eor rb, rb, s1 + ldp s0, s1, [RSTATE, #16] + eor rc, rc, s2 + ldp k_even, k_odd, [RSTATE, #24] + eor rd, rd, s3 + eor re, re, s0 + stp ra, rb, [RSTATE, #0] + eor rf, rf, s1 + stp rc, rd, [RSTATE, #8] + eor rg, rg, k_even + stp re, rf, [RSTATE, #16] + eor rh, rh, k_odd + stp rg, rh, [RSTATE, #24] + b .Loop + +.Lend: + /* Transform 61-63 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _) + ldp s0, s1, [RSTATE, #0] + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _) + ldp s2, s3, [RSTATE, #8] + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _) + + /* Update the chaining variables. */ + eor ra, ra, s0 + clear_vec(W0) + eor rb, rb, s1 + clear_vec(W1) + ldp s0, s1, [RSTATE, #16] + clear_vec(W2) + eor rc, rc, s2 + clear_vec(W3) + ldp k_even, k_odd, [RSTATE, #24] + clear_vec(W4) + eor rd, rd, s3 + clear_vec(W5) + eor re, re, s0 + clear_vec(XTMP0) + stp ra, rb, [RSTATE, #0] + clear_vec(XTMP1) + eor rf, rf, s1 + clear_vec(XTMP2) + stp rc, rd, [RSTATE, #8] + clear_vec(XTMP3) + eor rg, rg, k_even + clear_vec(XTMP4) + stp re, rf, [RSTATE, #16] + clear_vec(XTMP5) + eor rh, rh, k_odd + clear_vec(XTMP6) + stp rg, rh, [RSTATE, #24] + + /* Clear message expansion area */ + add addr0, sp, #STACK_W + st1 {W0.16b-W3.16b}, [addr0], #64 + st1 {W0.16b-W3.16b}, [addr0], #64 + st1 {W0.16b-W3.16b}, [addr0] + + mov sp, RFRAME + + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x28, x29, [sp], #16 + + ret +SYM_FUNC_END(sm3_neon_transform) + + + .section ".rodata", "a" + + .align 4 +.LKtable: + .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb + .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc + .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce + .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6 + .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c + .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce + .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec + .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 + .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53 + .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d + .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4 + .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43 + .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c + .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce + .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec + .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c new file mode 100644 index 000000000000..7182ee683f14 --- /dev/null +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * sm3-neon-glue.c - SM3 secure hash using NEON instructions + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, + int blocks); + +static int sm3_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + if (!crypto_simd_usable()) { + sm3_update(shash_desc_ctx(desc), data, len); + return 0; + } + + kernel_neon_begin(); + sm3_base_do_update(desc, data, len, sm3_neon_transform); + kernel_neon_end(); + + return 0; +} + +static int sm3_neon_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_neon_begin(); + sm3_base_do_finalize(desc, sm3_neon_transform); + kernel_neon_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, out); + return 0; + } + + kernel_neon_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_neon_transform); + sm3_base_do_finalize(desc, sm3_neon_transform); + kernel_neon_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_neon_update, + .final = sm3_neon_final, + .finup = sm3_neon_finup, + .descsize = sizeof(struct sm3_state), + .base.cra_name = "sm3", + .base.cra_driver_name = "sm3-neon", + .base.cra_blocksize = SM3_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .base.cra_priority = 200, +}; + +static int __init sm3_neon_init(void) +{ + return crypto_register_shash(&sm3_alg); +} + +static void __exit sm3_neon_fini(void) +{ + crypto_unregister_shash(&sm3_alg); +} + +module_init(sm3_neon_init); +module_exit(sm3_neon_fini); + +MODULE_DESCRIPTION("SM3 secure hash using NEON instructions"); +MODULE_AUTHOR("Jussi Kivilinna "); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); From 62508017a264133a62987fe40f70c68af9a36572 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:55 +0800 Subject: [PATCH 0900/4122] crypto: arm64/sm4 - refactor and simplify NEON implementation This patch does not add new features. The main work is to refactor and simplify the implementation of SM4 NEON, which is reflected in the following aspects: The accelerated implementation supports the arbitrary number of blocks, not just multiples of 8, which simplifies the implementation and brings some optimization acceleration for data that is not aligned by 8 blocks. When loading the input data, use the ld4 instruction to replace the original ld1 instruction as much as possible, which will save the cost of matrix transposition of the input data. Use 8-block parallelism whenever possible to speed up matrix transpose and rotation operations, instead of up to 4-block parallelism. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-neon-core.S | 604 ++++++++++++++++++++---------- arch/arm64/crypto/sm4-neon-glue.c | 180 +++------ 2 files changed, 447 insertions(+), 337 deletions(-) diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S index 3d5256b354d2..f295b4b7d70a 100644 --- a/arch/arm64/crypto/sm4-neon-core.S +++ b/arch/arm64/crypto/sm4-neon-core.S @@ -18,6 +18,11 @@ #define RTMP2 v10 #define RTMP3 v11 +#define RTMP4 v12 +#define RTMP5 v13 +#define RTMP6 v14 +#define RTMP7 v15 + #define RX0 v12 #define RX1 v13 #define RKEY v14 @@ -25,7 +30,7 @@ /* Helper macros. */ -#define PREPARE \ +#define SM4_PREPARE() \ adr_l x5, crypto_sm4_sbox; \ ld1 {v16.16b-v19.16b}, [x5], #64; \ ld1 {v20.16b-v23.16b}, [x5], #64; \ @@ -42,7 +47,25 @@ zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ zip2 s3.2d, RTMP2.2d, RTMP3.2d; -#define rotate_clockwise_90(s0, s1, s2, s3) \ +#define transpose_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \ + zip1 RTMP0.4s, s0.4s, s1.4s; \ + zip1 RTMP1.4s, s2.4s, s3.4s; \ + zip2 RTMP2.4s, s0.4s, s1.4s; \ + zip2 RTMP3.4s, s2.4s, s3.4s; \ + zip1 RTMP4.4s, s4.4s, s5.4s; \ + zip1 RTMP5.4s, s6.4s, s7.4s; \ + zip2 RTMP6.4s, s4.4s, s5.4s; \ + zip2 RTMP7.4s, s6.4s, s7.4s; \ + zip1 s0.2d, RTMP0.2d, RTMP1.2d; \ + zip2 s1.2d, RTMP0.2d, RTMP1.2d; \ + zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ + zip2 s3.2d, RTMP2.2d, RTMP3.2d; \ + zip1 s4.2d, RTMP4.2d, RTMP5.2d; \ + zip2 s5.2d, RTMP4.2d, RTMP5.2d; \ + zip1 s6.2d, RTMP6.2d, RTMP7.2d; \ + zip2 s7.2d, RTMP6.2d, RTMP7.2d; + +#define rotate_clockwise_4x4(s0, s1, s2, s3) \ zip1 RTMP0.4s, s1.4s, s0.4s; \ zip2 RTMP1.4s, s1.4s, s0.4s; \ zip1 RTMP2.4s, s3.4s, s2.4s; \ @@ -52,6 +75,24 @@ zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ zip2 s3.2d, RTMP3.2d, RTMP1.2d; +#define rotate_clockwise_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \ + zip1 RTMP0.4s, s1.4s, s0.4s; \ + zip1 RTMP2.4s, s3.4s, s2.4s; \ + zip2 RTMP1.4s, s1.4s, s0.4s; \ + zip2 RTMP3.4s, s3.4s, s2.4s; \ + zip1 RTMP4.4s, s5.4s, s4.4s; \ + zip1 RTMP6.4s, s7.4s, s6.4s; \ + zip2 RTMP5.4s, s5.4s, s4.4s; \ + zip2 RTMP7.4s, s7.4s, s6.4s; \ + zip1 s0.2d, RTMP2.2d, RTMP0.2d; \ + zip2 s1.2d, RTMP2.2d, RTMP0.2d; \ + zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ + zip2 s3.2d, RTMP3.2d, RTMP1.2d; \ + zip1 s4.2d, RTMP6.2d, RTMP4.2d; \ + zip2 s5.2d, RTMP6.2d, RTMP4.2d; \ + zip1 s6.2d, RTMP7.2d, RTMP5.2d; \ + zip2 s7.2d, RTMP7.2d, RTMP5.2d; + #define ROUND4(round, s0, s1, s2, s3) \ dup RX0.4s, RKEY.s[round]; \ /* rk ^ s1 ^ s2 ^ s3 */ \ @@ -87,14 +128,7 @@ /* s0 ^= RTMP3 */ \ eor s0.16b, s0.16b, RTMP3.16b; -#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - \ - transpose_4x4(b0, b1, b2, b3); \ - \ +#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \ mov x6, 8; \ 4: \ ld1 {RKEY.4s}, [x0], #16; \ @@ -107,15 +141,23 @@ \ bne 4b; \ \ - rotate_clockwise_90(b0, b1, b2, b3); \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ rev32 b3.16b, b3.16b; \ \ + rotate_clockwise_4x4(b0, b1, b2, b3); \ + \ /* repoint to rkey */ \ sub x0, x0, #128; +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + SM4_CRYPT_BLK4_BE(b0, b1, b2, b3); + #define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \ /* rk ^ s1 ^ s2 ^ s3 */ \ dup RX0.4s, RKEY.s[round]; \ @@ -175,7 +217,7 @@ eor s0.16b, s0.16b, RTMP0.16b; \ eor t0.16b, t0.16b, RTMP1.16b; -#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ +#define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7) \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ @@ -185,9 +227,6 @@ rev32 b6.16b, b6.16b; \ rev32 b7.16b, b7.16b; \ \ - transpose_4x4(b0, b1, b2, b3); \ - transpose_4x4(b4, b5, b6, b7); \ - \ mov x6, 8; \ 8: \ ld1 {RKEY.4s}, [x0], #16; \ @@ -200,8 +239,6 @@ \ bne 8b; \ \ - rotate_clockwise_90(b0, b1, b2, b3); \ - rotate_clockwise_90(b4, b5, b6, b7); \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ @@ -214,274 +251,429 @@ /* repoint to rkey */ \ sub x0, x0, #128; +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7); \ + rotate_clockwise_4x4_2x(b0, b1, b2, b3, b4, b5, b6, b7); \ + .align 3 -SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4) +SYM_FUNC_START(sm4_neon_crypt) /* input: * x0: round key array, CTX * x1: dst * x2: src - * w3: num blocks (1..4) + * w3: nblocks */ - PREPARE; + SM4_PREPARE() - ld1 {v0.16b}, [x2], #16; - mov v1.16b, v0.16b; - mov v2.16b, v0.16b; - mov v3.16b, v0.16b; - cmp w3, #2; - blt .Lblk4_load_input_done; - ld1 {v1.16b}, [x2], #16; - beq .Lblk4_load_input_done; - ld1 {v2.16b}, [x2], #16; - cmp w3, #3; - beq .Lblk4_load_input_done; - ld1 {v3.16b}, [x2]; +.Lcrypt_loop_8x: + sub w3, w3, #8 + tbnz w3, #31, .Lcrypt_4x -.Lblk4_load_input_done: - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld4 {v0.4s-v3.4s}, [x2], #64 + ld4 {v4.4s-v7.4s}, [x2], #64 - st1 {v0.16b}, [x1], #16; - cmp w3, #2; - blt .Lblk4_store_output_done; - st1 {v1.16b}, [x1], #16; - beq .Lblk4_store_output_done; - st1 {v2.16b}, [x1], #16; - cmp w3, #3; - beq .Lblk4_store_output_done; - st1 {v3.16b}, [x1]; + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) -.Lblk4_store_output_done: - ret; -SYM_FUNC_END(__sm4_neon_crypt_blk1_4) + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 -.align 3 -SYM_FUNC_START(sm4_neon_crypt_blk1_8) - /* input: - * x0: round key array, CTX - * x1: dst - * x2: src - * w3: num blocks (1..8) - */ - cmp w3, #5; - blt __sm4_neon_crypt_blk1_4; + cbz w3, .Lcrypt_end + b .Lcrypt_loop_8x - PREPARE; +.Lcrypt_4x: + add w3, w3, #8 + cmp w3, #4 + blt .Lcrypt_tail - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b}, [x2], #16; - mov v5.16b, v4.16b; - mov v6.16b, v4.16b; - mov v7.16b, v4.16b; - beq .Lblk8_load_input_done; - ld1 {v5.16b}, [x2], #16; - cmp w3, #7; - blt .Lblk8_load_input_done; - ld1 {v6.16b}, [x2], #16; - beq .Lblk8_load_input_done; - ld1 {v7.16b}, [x2]; + sub w3, w3, #4 -.Lblk8_load_input_done: - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + ld4 {v0.4s-v3.4s}, [x2], #64 - cmp w3, #6; - st1 {v0.16b-v3.16b}, [x1], #64; - st1 {v4.16b}, [x1], #16; - blt .Lblk8_store_output_done; - st1 {v5.16b}, [x1], #16; - beq .Lblk8_store_output_done; - st1 {v6.16b}, [x1], #16; - cmp w3, #7; - beq .Lblk8_store_output_done; - st1 {v7.16b}, [x1]; + SM4_CRYPT_BLK4(v0, v1, v2, v3) -.Lblk8_store_output_done: - ret; -SYM_FUNC_END(sm4_neon_crypt_blk1_8) + st1 {v0.16b-v3.16b}, [x1], #64 -.align 3 -SYM_FUNC_START(sm4_neon_crypt_blk8) - /* input: - * x0: round key array, CTX - * x1: dst - * x2: src - * w3: nblocks (multiples of 8) - */ - PREPARE; + cbz w3, .Lcrypt_end -.Lcrypt_loop_blk: - subs w3, w3, #8; - bmi .Lcrypt_end; +.Lcrypt_tail: + cmp w3, #2 + ld1 {v0.16b}, [x2], #16 + blt .Lcrypt_tail_load_done + ld1 {v1.16b}, [x2], #16 + beq .Lcrypt_tail_load_done + ld1 {v2.16b}, [x2], #16 - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2], #64; +.Lcrypt_tail_load_done: + transpose_4x4(v0, v1, v2, v3) - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + SM4_CRYPT_BLK4(v0, v1, v2, v3) - st1 {v0.16b-v3.16b}, [x1], #64; - st1 {v4.16b-v7.16b}, [x1], #64; - - b .Lcrypt_loop_blk; + cmp w3, #2 + st1 {v0.16b}, [x1], #16 + blt .Lcrypt_end + st1 {v1.16b}, [x1], #16 + beq .Lcrypt_end + st1 {v2.16b}, [x1], #16 .Lcrypt_end: - ret; -SYM_FUNC_END(sm4_neon_crypt_blk8) + ret +SYM_FUNC_END(sm4_neon_crypt) .align 3 -SYM_FUNC_START(sm4_neon_cbc_dec_blk8) +SYM_FUNC_START(sm4_neon_cbc_dec) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: iv (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() - ld1 {RIV.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcbc_loop_blk: - subs w4, w4, #8; - bmi .Lcbc_end; +.Lcbc_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcbc_dec_4x - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2]; + ld4 {v0.4s-v3.4s}, [x2], #64 + ld4 {v4.4s-v7.4s}, [x2] - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7) - sub x2, x2, #64; - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + /* Avoid overwriting the RIV register */ + rotate_clockwise_4x4(v0, v1, v2, v3) + rotate_clockwise_4x4(v4, v5, v6, v7) - eor v4.16b, v4.16b, RTMP3.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v5.16b, v5.16b, RTMP0.16b; - eor v6.16b, v6.16b, RTMP1.16b; - eor v7.16b, v7.16b, RTMP2.16b; + sub x2, x2, #64 - mov RIV.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + eor v0.16b, v0.16b, RIV.16b - b .Lcbc_loop_blk; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 -.Lcbc_end: + eor v1.16b, v1.16b, RTMP0.16b + eor v2.16b, v2.16b, RTMP1.16b + eor v3.16b, v3.16b, RTMP2.16b + eor v4.16b, v4.16b, RTMP3.16b + eor v5.16b, v5.16b, RTMP4.16b + eor v6.16b, v6.16b, RTMP5.16b + eor v7.16b, v7.16b, RTMP6.16b + + mov RIV.16b, RTMP7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lcbc_dec_end + b .Lcbc_dec_loop_8x + +.Lcbc_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcbc_dec_tail + + sub w4, w4, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + rev32 v4.16b, v0.16b + rev32 v5.16b, v1.16b + rev32 v6.16b, v2.16b + rev32 v7.16b, v3.16b + + transpose_4x4(v4, v5, v6, v7) + + SM4_CRYPT_BLK4_BE(v4, v5, v6, v7) + + eor v4.16b, v4.16b, RIV.16b + eor v5.16b, v5.16b, v0.16b + eor v6.16b, v6.16b, v1.16b + eor v7.16b, v7.16b, v2.16b + + mov RIV.16b, v3.16b + + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lcbc_dec_end + +.Lcbc_dec_tail: + cmp w4, #2 + ld1 {v0.16b}, [x2], #16 + blt .Lcbc_dec_tail_load_done + ld1 {v1.16b}, [x2], #16 + beq .Lcbc_dec_tail_load_done + ld1 {v2.16b}, [x2], #16 + +.Lcbc_dec_tail_load_done: + rev32 v4.16b, v0.16b + rev32 v5.16b, v1.16b + rev32 v6.16b, v2.16b + + transpose_4x4(v4, v5, v6, v7) + + SM4_CRYPT_BLK4_BE(v4, v5, v6, v7) + + cmp w4, #2 + eor v4.16b, v4.16b, RIV.16b + mov RIV.16b, v0.16b + st1 {v4.16b}, [x1], #16 + blt .Lcbc_dec_end + + eor v5.16b, v5.16b, v0.16b + mov RIV.16b, v1.16b + st1 {v5.16b}, [x1], #16 + beq .Lcbc_dec_end + + eor v6.16b, v6.16b, v1.16b + mov RIV.16b, v2.16b + st1 {v6.16b}, [x1], #16 + +.Lcbc_dec_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; -SYM_FUNC_END(sm4_neon_cbc_dec_blk8) + ret +SYM_FUNC_END(sm4_neon_cbc_dec) .align 3 -SYM_FUNC_START(sm4_neon_cfb_dec_blk8) +SYM_FUNC_START(sm4_neon_cfb_dec) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: iv (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() - ld1 {v0.16b}, [x3]; + ld1 {v0.16b}, [x3] -.Lcfb_loop_blk: - subs w4, w4, #8; - bmi .Lcfb_end; +.Lcfb_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcfb_dec_4x - ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; - ld1 {v4.16b-v7.16b}, [x2]; + ld1 {v1.16b-v3.16b}, [x2], #48 + ld4 {v4.4s-v7.4s}, [x2] - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + transpose_4x4(v0, v1, v2, v3) - sub x2, x2, #48; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + sub x2, x2, #48 + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 - mov v0.16b, RTMP3.16b; + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + eor v4.16b, v4.16b, RTMP4.16b + eor v5.16b, v5.16b, RTMP5.16b + eor v6.16b, v6.16b, RTMP6.16b + eor v7.16b, v7.16b, RTMP7.16b - b .Lcfb_loop_blk; + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 -.Lcfb_end: + mov v0.16b, RTMP7.16b + + cbz w4, .Lcfb_dec_end + b .Lcfb_dec_loop_8x + +.Lcfb_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcfb_dec_tail + + sub w4, w4, #4 + + ld1 {v4.16b-v7.16b}, [x2], #64 + + rev32 v0.16b, v0.16b /* v0 is IV register */ + rev32 v1.16b, v4.16b + rev32 v2.16b, v5.16b + rev32 v3.16b, v6.16b + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4_BE(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + + mov v0.16b, v7.16b + + cbz w4, .Lcfb_dec_end + +.Lcfb_dec_tail: + cmp w4, #2 + ld1 {v4.16b}, [x2], #16 + blt .Lcfb_dec_tail_load_done + ld1 {v5.16b}, [x2], #16 + beq .Lcfb_dec_tail_load_done + ld1 {v6.16b}, [x2], #16 + +.Lcfb_dec_tail_load_done: + rev32 v0.16b, v0.16b /* v0 is IV register */ + rev32 v1.16b, v4.16b + rev32 v2.16b, v5.16b + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4_BE(v0, v1, v2, v3) + + cmp w4, #2 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x1], #16 + mov v0.16b, v4.16b + blt .Lcfb_dec_end + + eor v1.16b, v1.16b, v5.16b + st1 {v1.16b}, [x1], #16 + mov v0.16b, v5.16b + beq .Lcfb_dec_end + + eor v2.16b, v2.16b, v6.16b + st1 {v2.16b}, [x1], #16 + mov v0.16b, v6.16b + +.Lcfb_dec_end: /* store new IV */ - st1 {v0.16b}, [x3]; + st1 {v0.16b}, [x3] - ret; -SYM_FUNC_END(sm4_neon_cfb_dec_blk8) + ret +SYM_FUNC_END(sm4_neon_cfb_dec) .align 3 -SYM_FUNC_START(sm4_neon_ctr_enc_blk8) +SYM_FUNC_START(sm4_neon_ctr_crypt) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: ctr (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() - ldp x7, x8, [x3]; - rev x7, x7; - rev x8, x8; + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 -.Lctr_loop_blk: - subs w4, w4, #8; - bmi .Lctr_end; +.Lctr_crypt_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lctr_crypt_4x -#define inc_le128(vctr) \ - mov vctr.d[1], x8; \ - mov vctr.d[0], x7; \ - adds x8, x8, #1; \ - adc x7, x7, xzr; \ - rev64 vctr.16b, vctr.16b; +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ - inc_le128(v4); /* +4 */ - inc_le128(v5); /* +5 */ - inc_le128(v6); /* +6 */ - inc_le128(v7); /* +7 */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + inc_le128(v4) /* +4 */ + inc_le128(v5) /* +5 */ + inc_le128(v6) /* +6 */ + inc_le128(v7) /* +7 */ - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 - b .Lctr_loop_blk; + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + eor v4.16b, v4.16b, RTMP4.16b + eor v5.16b, v5.16b, RTMP5.16b + eor v6.16b, v6.16b, RTMP6.16b + eor v7.16b, v7.16b, RTMP7.16b -.Lctr_end: + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lctr_crypt_end + b .Lctr_crypt_loop_8x + +.Lctr_crypt_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lctr_crypt_tail + + sub w4, w4, #4 + + /* construct CTRs */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + + ld1 {v4.16b-v7.16b}, [x2], #64 + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + + cbz w4, .Lctr_crypt_end + +.Lctr_crypt_tail: + /* inc_le128 will change the sign bit */ + ld1 {v4.16b}, [x2], #16 + inc_le128(v0) + cmp w4, #2 + blt .Lctr_crypt_tail_load_done + + ld1 {v5.16b}, [x2], #16 + inc_le128(v1) + cmp w4, #2 + beq .Lctr_crypt_tail_load_done + + ld1 {v6.16b}, [x2], #16 + inc_le128(v2) + +.Lctr_crypt_tail_load_done: + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + cmp w4, #2 + + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x1], #16 + blt .Lctr_crypt_end + + eor v1.16b, v1.16b, v5.16b + st1 {v1.16b}, [x1], #16 + beq .Lctr_crypt_end + + eor v2.16b, v2.16b, v6.16b + st1 {v2.16b}, [x1], #16 + +.Lctr_crypt_end: /* store new CTR */ - rev x7, x7; - rev x8, x8; - stp x7, x8, [x3]; + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] - ret; -SYM_FUNC_END(sm4_neon_ctr_enc_blk8) + ret +SYM_FUNC_END(sm4_neon_ctr_crypt) diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c index 03a6a6866a31..7b19accf5c03 100644 --- a/arch/arm64/crypto/sm4-neon-glue.c +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -18,19 +18,14 @@ #include #include -#define BYTES2BLKS(nbytes) ((nbytes) >> 4) -#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1)) - -asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src, - unsigned int nblks); -asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src, - unsigned int nblks); -asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); -asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); -asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_crypt(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblocks); +asmlinkage void sm4_neon_cbc_dec(const u32 *rkey_dec, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); +asmlinkage void sm4_neon_cfb_dec(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); +asmlinkage void sm4_neon_ctr_crypt(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) @@ -51,27 +46,18 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_crypt_blk8(rkey, dst, src, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; + sm4_neon_crypt(rkey, dst, src, nblocks); + + kernel_neon_end(); } - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_neon_crypt_blk1_8(rkey, dst, src, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; @@ -138,48 +124,19 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); + + kernel_neon_end(); } - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - u8 iv[SM4_BLOCK_SIZE]; - int i; - - sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream, - src, nblks); - - src += ((int)nblks - 2) * SM4_BLOCK_SIZE; - dst += (nblks - 1) * SM4_BLOCK_SIZE; - memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); - - for (i = nblks - 1; i > 0; i--) { - crypto_xor_cpy(dst, src, - &keystream[i * SM4_BLOCK_SIZE], - SM4_BLOCK_SIZE); - src -= SM4_BLOCK_SIZE; - dst -= SM4_BLOCK_SIZE; - } - crypto_xor_cpy(dst, walk.iv, - keystream, SM4_BLOCK_SIZE); - memcpy(walk.iv, iv, SM4_BLOCK_SIZE); - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; @@ -238,42 +195,22 @@ static int sm4_cfb_decrypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; + sm4_neon_cfb_dec(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + + kernel_neon_end(); + + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; } - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - - memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); - if (nblks > 1) - memcpy(&keystream[SM4_BLOCK_SIZE], src, - (nblks - 1) * SM4_BLOCK_SIZE); - memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE, - SM4_BLOCK_SIZE); - - sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, - keystream, nblks); - - crypto_xor_cpy(dst, src, keystream, - nblks * SM4_BLOCK_SIZE); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - /* tail */ if (walk.nbytes == walk.total && nbytes > 0) { u8 keystream[SM4_BLOCK_SIZE]; @@ -302,41 +239,22 @@ static int sm4_ctr_crypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + + kernel_neon_end(); + + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; } - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - int i; - - for (i = 0; i < nblks; i++) { - memcpy(&keystream[i * SM4_BLOCK_SIZE], - walk.iv, SM4_BLOCK_SIZE); - crypto_inc(walk.iv, SM4_BLOCK_SIZE); - } - sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, - keystream, nblks); - - crypto_xor_cpy(dst, src, keystream, - nblks * SM4_BLOCK_SIZE); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - /* tail */ if (walk.nbytes == walk.total && nbytes > 0) { u8 keystream[SM4_BLOCK_SIZE]; From c24ee936c79d7c381750f6c23bbef1257850279f Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:56 +0800 Subject: [PATCH 0901/4122] crypto: testmgr - add SM4 cts-cbc/xts/xcbc test vectors This patch newly adds the test vectors of CTS-CBC/XTS/XCBC modes of the SM4 algorithm, and also added some test vectors for SM4 GCM/CCM. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/testmgr.c | 19 + crypto/testmgr.h | 977 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 996 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index bcd059caa1c8..e2806ef044fd 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4712,6 +4712,12 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "cts(cbc(paes))", .test = alg_test_null, .fips_allowed = 1, + }, { + .alg = "cts(cbc(sm4))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_cts_tv_template) + } }, { .alg = "curve25519", .test = alg_test_kpp, @@ -5586,6 +5592,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xcbc(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_xcbc128_tv_template) + } }, { .alg = "xchacha12", .test = alg_test_skcipher, @@ -5640,6 +5652,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .cipher = __VECS(serpent_xts_tv_template) } + }, { + .alg = "xts(sm4)", + .generic_driver = "xts(ecb(sm4-generic))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_xts_tv_template) + } }, { .alg = "xts(twofish)", .generic_driver = "xts(ecb(twofish-generic))", diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d6088e26f326..f10bfb9d9973 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14882,6 +14882,353 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = { } }; +static const struct cipher_testvec sm4_cts_tv_template[] = { + /* Generated from AES-CTS test vectors */ + { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20", + .len = 17, + .ctext = "\x05\xfe\x23\xee\x17\xa2\x89\x98" + "\xbc\x97\x0a\x0b\x54\x67\xca\xd7" + "\xd6", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20", + .len = 31, + .ctext = "\x15\x46\xe4\x95\xa4\xec\xf0\xb8" + "\x49\xd6\x6a\x9d\x89\xc7\xfd\x70" + "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43", + .len = 32, + .ctext = "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3" + "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c", + .len = 47, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\xd3\xe1\xdc\xeb\xfa\x04\x11\x99" + "\xde\xcf\x6f\x4d\x7b\x09\x92\x7f" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c\x20", + .len = 48, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f" + "\xbd\x99\x21\x0c\x5e\x4d\xed\x20" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c\x20" + "\x61\x6e\x64\x20\x77\x6f\x6e\x74" + "\x6f\x6e\x20\x73\x6f\x75\x70\x2e", + .len = 64, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3" + "\x58\x19\xa4\x8f\xa9\x68\x5e\x6b" + "\x2c\x0f\x81\x60\x15\x98\x27\x4f" + "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f" + "\xbd\x99\x21\x0c\x5e\x4d\xed\x20", + } +}; + +static const struct cipher_testvec sm4_xts_tv_template[] = { + /* Generated from AES-XTS test vectors */ + { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ctext = "\xd9\xb4\x21\xf7\x31\xc8\x94\xfd" + "\xc3\x5b\x77\x29\x1f\xe4\xe3\xb0" + "\x2a\x1f\xb7\x66\x98\xd5\x9f\x0e" + "\x51\x37\x6c\x4a\xda\x5b\xc7\x5d", + .len = 32, + }, { + .key = "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\xa7\x4d\x72\x6c\x11\x19\x6a\x32" + "\xbe\x04\xe0\x01\xff\x29\xd0\xc7" + "\x93\x2f\x9f\x3e\xc2\x9b\xfc\xb6" + "\x4d\xd1\x7f\x63\xcb\xd3\xea\x31", + .len = 32, + }, { + .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" + "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\x7f\x76\x08\x8e\xff\xad\xf7\x0c" + "\x02\xea\x9f\x95\xda\x06\x28\xd3" + "\x51\xbf\xcb\x9e\xac\x05\x63\xbc" + "\xf1\x7b\x71\x0d\xab\x0a\x98\x26", + .len = 32, + }, { + .key = "\x27\x18\x28\x18\x28\x45\x90\x45" + "\x23\x53\x60\x28\x74\x71\x35\x26" + "\x31\x41\x59\x26\x53\x58\x97\x93" + "\x23\x84\x62\x64\x33\x83\x27\x95", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .ctext = "\x54\xdd\x65\xb6\x32\x6f\xae\xa8" + "\xfa\xd1\xa8\x3c\x63\x61\x4a\xf3" + "\x9f\x72\x1d\x8d\xfe\x17\x7a\x30" + "\xb6\x6a\xbf\x6a\x44\x99\x80\xe1" + "\xcd\xbe\x06\xaf\xb7\x33\x36\xf3" + "\x7a\x4d\x39\xde\x96\x4a\x30\xd7" + "\xd0\x4a\x37\x99\x16\x9c\x60\x25" + "\x8f\x6b\x74\x8a\x61\x86\x1a\xa5" + "\xec\x92\xa2\xc1\x5b\x2b\x7c\x61" + "\x5a\x42\xab\xa4\x99\xbb\xd6\xb7" + "\x1d\xb9\xc7\x89\xb2\x18\x20\x89" + "\xa2\x5d\xd3\xdf\x80\x0e\xd1\x86" + "\x4d\x19\xf7\xed\x45\xfd\x17\xa9" + "\x48\x0b\x0f\xb8\x2d\x9b\x7f\xc3" + "\xed\x57\xe9\xa1\x14\x0e\xaa\x77" + "\x8d\xd2\xdd\x67\x9e\x3e\xdc\x3d" + "\xc4\xd5\x5c\x95\x0e\xbc\x53\x1d" + "\x95\x92\xf7\xc4\x63\x82\x56\xd5" + "\x65\x18\x29\x2a\x20\xaf\x98\xfd" + "\xd3\xa6\x36\x00\x35\x0a\x70\xab" + "\x5a\x40\xf4\xc2\x85\x03\x7c\xa0" + "\x1f\x25\x1f\x19\xec\xae\x03\x29" + "\xff\x77\xad\x88\xcd\x5a\x4c\xde" + "\xa2\xae\xab\xc2\x21\x48\xff\xbd" + "\x23\x9b\xd1\x05\x15\xbd\xe1\x13" + "\x1d\xec\x84\x04\xe4\x43\xdc\x76" + "\x31\x40\xd5\xf2\x2b\xf3\x3e\x0c" + "\x68\x72\xd6\xb8\x1d\x63\x0f\x6f" + "\x00\xcd\xd0\x58\xfe\x80\xf9\xcb" + "\xfb\x77\x70\x7f\x93\xce\xe2\xca" + "\x92\xb9\x15\xb8\x30\x40\x27\xc1" + "\x90\xa8\x4e\x2d\x65\xe0\x18\xcc" + "\x6a\x38\x7d\x37\x66\xac\xdb\x28" + "\x25\x32\x84\xe8\xdb\x9a\xcf\x8f" + "\x52\x28\x0d\xdc\x6d\x00\x33\xd2" + "\xcc\xaa\xa4\xf9\xae\xff\x12\x36" + "\x69\xbc\x02\x4f\xd6\x76\x8e\xdf" + "\x8b\xc1\xf8\xd6\x22\xc1\x9c\x60" + "\x9e\xf9\x7f\x60\x91\x90\xcd\x11" + "\x02\x41\xe7\xfb\x08\x4e\xd8\x94" + "\x2d\xa1\xf9\xb9\xcf\x1b\x51\x4b" + "\x61\xa3\x88\xb3\x0e\xa6\x1a\x4a" + "\x74\x5b\x38\x1e\xe7\xad\x6c\x4d" + "\xb1\x27\x54\x53\xb8\x41\x3f\x98" + "\xdf\x6e\x4a\x40\x98\x6e\xe4\xb5" + "\x9a\xf5\xdf\xae\xcd\x30\x12\x65" + "\x17\x90\x67\xa0\x0d\x7c\xa3\x5a" + "\xb9\x5a\xbd\x61\x7a\xde\xa2\x8e" + "\xc1\xc2\x6a\x97\xde\x28\xb8\xbf" + "\xe3\x01\x20\xd6\xae\xfb\xd2\x58" + "\xc5\x9e\x42\xd1\x61\xe8\x06\x5a" + "\x78\x10\x6b\xdc\xa5\xcd\x90\xfb" + "\x3a\xac\x4e\x93\x86\x6c\x8a\x7f" + "\x96\x76\x86\x0a\x79\x14\x5b\xd9" + "\x2e\x02\xe8\x19\xa9\x0b\xe0\xb9" + "\x7c\xc5\x22\xb3\x21\x06\x85\x6f" + "\xdf\x0e\x54\xd8\x8e\x46\x24\x15" + "\x5a\x2f\x1c\x14\xea\xea\xa1\x63" + "\xf8\x58\xe9\x9a\x80\x6e\x79\x1a" + "\xcd\x82\xf1\xb0\xe2\x9f\x00\x28" + "\xa4\xc3\x8e\x97\x6f\x57\x1a\x93" + "\xf4\xfd\x57\xd7\x87\xc2\x4d\xb0" + "\xe0\x1c\xa3\x04\xe5\xa5\xc4\xdd" + "\x50\xcf\x8b\xdb\xf4\x91\xe5\x7c", + .len = 512, + }, { + .key = "\x62\x49\x77\x57\x24\x70\x93\x69" + "\x99\x59\x57\x49\x66\x96\x76\x27" + "\x02\x88\x41\x97\x16\x93\x99\x37" + "\x51\x05\x82\x09\x74\x94\x45\x92", + .klen = 32, + .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xf8\xf9\xfa\xfb\xfc", + .ctext = "\xa2\x9f\x9e\x4e\x71\xdb\x28\x3c" + "\x80\x0e\xf6\xb7\x8e\x57\x1c\xba" + "\x90\xda\x3b\x6c\x22\x00\x68\x30" + "\x1d\x63\x0d\x9e\x6a\xad\x37\x55" + "\xbc\x77\x1e\xc9\xad\x83\x30\xd5" + "\x27\xb2\x66\x77\x18\x3c\xa6\x39" + "\x9c\x0a\xaa\x1f\x02\xe1\xd5\x65" + "\x9b\x8d\xc5\x97\x3d\xc5\x04\x53" + "\x78\x00\xe3\xb0\x1a\x43\x4e\xb7" + "\xc4\x9f\x38\xc5\x7b\xa4\x70\x64" + "\x78\xe6\x32\xd9\x65\x44\xc5\x64" + "\xb8\x42\x35\x99\xff\x66\x75\xb0" + "\x22\xd3\x9b\x6e\x8d\xcf\x6a\x24" + "\xfd\x92\xb7\x1b\x04\x28\x2a\x61" + "\xdc\x96\x2a\x20\x7a\x2c\xf1\xf9" + "\x12\x15\xf0\x4d\xcf\x2b\xde\x33" + "\x41\xbc\xe7\x85\x87\x22\xb7\x16" + "\x02\x1c\xd8\xa2\x0f\x1f\xa3\xe9" + "\xd8\x45\x48\xe7\xbe\x08\x4e\x4e" + "\x23\x79\x84\xdb\x40\x76\xf5\x13" + "\x78\x92\x4a\x2f\xf9\x1b\xf2\x80" + "\x25\x74\x51\x45\x9a\x77\x78\x97" + "\xd3\xe0\xc7\xc4\x35\x67\x2a\xe6" + "\xb3\x0d\x62\x9f\x8b", + .len = 189, + }, +}; + static const struct aead_testvec sm4_gcm_tv_template[] = { { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" @@ -14913,6 +15260,298 @@ static const struct aead_testvec sm4_gcm_tv_template[] = { "\x83\xDE\x35\x41\xE4\xC2\xB5\x81" "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC", .clen = 80, + }, { /* Generated from AES-GCM test vectors */ + .key = zeroed_string, + .klen = 16, + .ctext = "\x23\x2f\x0c\xfe\x30\x8b\x49\xea" + "\x6f\xc8\x82\x29\xb5\xdc\x85\x8d", + .clen = 16, + }, { + .key = zeroed_string, + .klen = 16, + .ptext = zeroed_string, + .plen = 16, + .ctext = "\x7d\xe2\xaa\x7f\x11\x10\x18\x82" + "\x18\x06\x3b\xe1\xbf\xeb\x6d\x89" + "\xb8\x51\xb5\xf3\x94\x93\x75\x2b" + "\xe5\x08\xf1\xbb\x44\x82\xc5\x57", + .clen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55", + .plen = 64, + .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6" + "\x76\x21\x6a\x33\x83\x10\x41\xeb" + "\x09\x58\x00\x11\x7b\xdc\x3f\x75" + "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb" + "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07" + "\x1a\xe5\x48\x3f\xed\xde\x98\x5d" + "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf" + "\xe3\x63\x36\x83\x23\xf7\x5b\x80" + "\x7d\xfe\x77\xef\x71\xb1\x5e\xc9" + "\x52\x6b\x09\xab\x84\x28\x4b\x8a", + .clen = 80, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39", + .plen = 60, + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6" + "\x76\x21\x6a\x33\x83\x10\x41\xeb" + "\x09\x58\x00\x11\x7b\xdc\x3f\x75" + "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb" + "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07" + "\x1a\xe5\x48\x3f\xed\xde\x98\x5d" + "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf" + "\xe3\x63\x36\x83" + "\x89\xf6\xba\x35\xb8\x18\xd3\xcc" + "\x38\x6c\x05\xb3\x8a\xcb\xc9\xde", + .clen = 76, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39", + .plen = 60, + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = "\xc1\x11\x44\x51\xd9\x25\x87\x5b" + "\x0f\xd9\x06\xf3\x33\x44\xbb\x87" + "\x8b\xa3\x77\xd2\x0c\x60\xfa\xcc" + "\x85\x50\x6f\x96\x0c\x54\x54\xc1" + "\x58\x04\x88\x6e\xf4\x26\x35\x7e" + "\x94\x80\x48\x6c\xf2\xf4\x88\x1f" + "\x19\x63\xea\xae\xba\x81\x1a\x5d" + "\x0e\x6f\x59\x08" + "\x33\xac\x5b\xa8\x19\x60\xdb\x1d" + "\xdd\x2e\x22\x2e\xe0\x87\x51\x5d", + .clen = 76, + }, { + .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 16, + .iv = "\x00\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff", + .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5", + .plen = 719, + .ctext = "\xdc\xb1\x0f\x2a\xe8\x2d\x1c\x57" + "\xc4\x82\xfa\xd6\x87\xe6\x2f\x50" + "\xbd\x9e\x0a\x42\x31\xf2\xc7\xbb" + "\x21\x63\xa7\x05\x43\x33\xef\x33" + "\x5c\xd3\x47\x55\xce\x5c\xe4\xd4" + "\xe5\x07\x62\x22\xac\x01\xa8\x35" + "\x9c\x59\x34\x30\x8e\xff\x9f\xb4" + "\xd2\x4e\x74\x90\x64\xf2\x78\x5e" + "\x63\xb7\xc5\x08\x1b\x37\xa5\x9e" + "\xc0\xde\xff\xa9\x7f\x0b\xd3\x02" + "\x83\x6e\x33\xfa\x43\x11\xd3\xda" + "\x02\xcf\xcd\x4a\xc0\x78\x1f\x39" + "\x62\xcb\xa3\x95\x7e\x13\x92\x28" + "\xb2\xc4\x7a\xba\xd1\xc6\xf6\x1f" + "\xda\x0b\xf1\xd1\x99\x54\xd8\x3b" + "\x16\xf8\xe6\x97\x1e\xa7\xcf\x49" + "\x69\x84\x01\x4c\xdc\x7a\x34\xff" + "\x01\x08\xa3\x0b\x39\xac\x21\x37" + "\xd8\xb4\x04\x19\x8b\x7a\x7d\x17" + "\x44\xd1\x18\xaf\x1f\xa9\x29\xfe" + "\xfa\x77\xe0\x40\x42\x0c\x79\xb7" + "\xc3\x15\x1b\xd9\x0c\x82\xfc\x16" + "\x70\xd6\x2a\xe9\x94\x72\xc5\xa5" + "\x8a\x58\xbc\xfa\xe0\x88\x39\x4a" + "\x80\xe8\xec\xaf\x60\xac\xe7\xf8" + "\x9c\xf0\xfc\x61\x39\x07\x98\x6b" + "\x88\xe3\x98\x22\x28\x18\x4a\x2d" + "\x25\xef\x10\xe3\x83\x66\x3f\xfd" + "\xc7\x0b\xa3\xfd\x97\xa9\xf4\xbd" + "\xd8\x2a\xee\x4a\x50\xad\xcc\xb5" + "\xc7\xab\xb8\x79\x9c\xd1\xf1\x27" + "\x08\xf5\xf5\xe8\x1b\x66\xce\x41" + "\x56\x60\x94\x86\xf0\x78\xc2\xfa" + "\x5b\x63\x40\xb1\xd1\x1a\x38\x69" + "\x0b\x8c\xb2\xf5\xa2\xbe\x90\x9d" + "\x46\x23\x79\x8b\x3b\x4a\xf4\xbb" + "\x55\xf7\x58\x9d\xaf\x59\xff\x74" + "\xf3\xb9\xc4\x26\xb1\xf8\xe1\x28" + "\x8b\x5e\x8f\x6d\x64\xe7\xe8\x63" + "\xd2\x9e\xcb\xee\xae\x19\x04\x1d" + "\x05\xf0\x9d\x99\x7b\x33\x33\xae" + "\x6e\xe5\x09\xdd\x67\x51\xc4\xc8" + "\x6a\xc7\x36\x35\xc9\x93\x76\xa1" + "\xa8\x1c\xfa\x75\x92\x34\x0e\x7d" + "\x3d\x1d\xef\x00\xfd\xa5\x25\x12" + "\x7c\x91\x21\x41\xcc\x50\x47\xa9" + "\x22\x50\x24\x96\x34\x79\x3d\xe8" + "\x3f\xa0\x56\xaf\x98\x53\x55\xc3" + "\x46\x1b\x17\x54\xb8\xb0\xb7\xe0" + "\xe0\xab\x47\x6f\x06\xda\xcc\x75" + "\xa7\x96\xb7\x92\xf3\xa0\x5f\xe6" + "\xba\x97\xe3\x2f\x97\x05\xb2\x99" + "\xa0\x09\x10\x98\x9c\xd3\x2e\xd1" + "\x7e\x2a\x30\x54\x3c\xb9\x33\xe3" + "\xf2\xaf\xd3\xa5\xee\xd0\x0b\x8a" + "\x19\x54\x0f\x02\x51\x1f\x91\xdf" + "\x71\x9c\xad\x77\x35\x28\x55\x6d" + "\xcd\x7a\xd9\xa3\x41\x98\x6b\x37" + "\x19\x0f\xbe\xae\x69\xb2\x25\x01" + "\xee\x0e\x51\x4b\x53\xea\x0f\x5f" + "\x85\x74\x79\x36\x32\x0a\x2a\x40" + "\xad\x6b\x78\x41\x54\x99\xe9\xc1" + "\x2b\x6c\x9b\x42\x21\xef\xe2\x50" + "\x56\x8d\x78\xdf\x58\xbe\x0a\x0f" + "\xfc\xfc\x0d\x2e\xd0\xcb\xa6\x0a" + "\xa8\xd9\x1e\xa9\xd4\x7c\x99\x88" + "\xcf\x11\xad\x1c\xd3\x04\x63\x55" + "\xef\x85\x0b\x69\xa1\x40\xf1\x75" + "\x24\xf4\xe5\x2c\xd4\x7a\x24\x50" + "\x8f\xa2\x71\xc9\x92\x20\xcd\xcf" + "\xda\x40\xbe\xf6\xfe\x1a\xca\xc7" + "\x4a\x80\x45\x55\xcb\xdd\xb7\x01" + "\xb0\x8d\xcb\xd2\xae\xbd\xa4\xd0" + "\x5c\x10\x05\x66\x7b\xd4\xff\xd9" + "\xc4\x23\x9d\x8d\x6b\x24\xf8\x3f" + "\x73\x4d\x5c\x2b\x33\x4c\x5e\x63" + "\x74\x6d\x03\xa1\x7a\x35\x65\x17" + "\x38\x7f\x3b\xc1\x69\xcf\x61\x34" + "\x30\x21\xaf\x97\x47\x12\x3f\xa1" + "\xa7\x50\xc5\x87\xfb\x3f\x70\x32" + "\x86\x17\x5f\x25\xe4\x74\xc6\xd0" + "\x9b\x39\xe6\xe1\x5a\xec\x8f\x40" + "\xce\xcc\x37\x3b\xd8\x72\x1c\x31" + "\x75\xa4\xa6\x89\x8c\xdd\xd6\xd2" + "\x32\x3d\xe8\xc3\x54\xab\x1f\x35" + "\x52\xb4\x94\x81\xb0\x37\x3a\x03" + "\xbb\xb1\x99\x30\xa5\xf8\x21\xcd" + "\x93\x5d\xa7\x13\xed\xc7\x49\x09" + "\x70\xda\x08\x39\xaa\x15\x9e\x45" + "\x35\x2b\x0f\x5c\x8c\x8b\xc9" + "\xa8\xb8\x9f\xfd\x37\x36\x31\x7e" + "\x34\x4f\xc1\xc0\xca\x8a\x22\xfd", + .clen = 735, } }; @@ -14947,6 +15586,282 @@ static const struct aead_testvec sm4_ccm_tv_template[] = { "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A" "\xB3\x32\x56\x97\x1F\xA1\x10\xF4", .clen = 80, + }, { /* Generated from AES-CCM test vectors */ + .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf", + .klen = 16, + .iv = "\x01\x00\x00\x00\x03\x02\x01\x00" + "\xa0\xa1\xa2\xa3\xa4\xa5\x00\x00", + .assoc = "\x00\x01\x02\x03\x04\x05\x06\x07", + .alen = 8, + .ptext = "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e", + .plen = 23, + .ctext = "\x7b\xff\x4a\x15\xf5\x73\xce\x82" + "\x6e\xc2\x31\x1d\xe2\x53\x02\xac" + "\xa4\x48\xf9\xe4\xf5\x1f\x81\x70" + "\x18\xbc\xb6\x84\x01\xb8\xae", + .clen = 31, + }, { + .key = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1" + "\x53\x14\x73\x66\x8d\x88\xf6\x80", + .klen = 16, + .iv = "\x03\xa0\x20\x35\x26\xf2\x21\x8d" + "\x50\x20\xda\xe2\x00\x00\x00\x00", + .assoc = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1" + "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47" + "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d" + "\xd8\x9e\x2b\x56\x10\x23\x56\xe7", + .alen = 32, + .ctext = "\x23\x58\xce\xdc\x40\xb1\xcd\x92" + "\x47\x96\x59\xfc\x8a\x26\x4f\xcf", + .clen = 16, + }, { + .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1" + "\xff\x80\x2e\x48\x7d\x82\xf8\xb9", + .klen = 16, + .iv = "\x03\xaf\x94\x87\x78\x35\x82\x81" + "\x7f\x88\x94\x68\x00\x00\x00\x00", + .alen = 0, + .ptext = "\x00", + .plen = 0, + .ctext = "\x72\x7e\xf5\xd6\x39\x7a\x2b\x43", + .clen = 8, + }, { + .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73" + "\xa4\x48\x93\x39\x26\x71\x4a\xc6", + .klen = 16, + .iv = "\x03\xee\x49\x83\xe9\xa9\xff\xe9" + "\x57\xba\xfd\x9e\x00\x00\x00\x00", + .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1" + "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64" + "\xa4\xf0\x13\x05\xd1\x77\x99\x67" + "\x11\xc4\xc6\xdb\x00\x56\x36\x61", + .alen = 32, + .ptext = "\x00", + .plen = 0, + .ctext = "\xb0\x9d\xc6\xfb\x7d\xb5\xa1\x0e", + .clen = 8, + }, { + .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7" + "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b", + .klen = 16, + .iv = "\x03\xcf\x76\x3f\xd9\x95\x75\x8f" + "\x44\x89\x40\x7b\x00\x00\x00\x00", + .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88" + "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b" + "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b" + "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe", + .alen = 32, + .ptext = "\xc2\x54\xc8\xde\x78\x87\x77\x40" + "\x49\x71\xe4\xb7\xe7\xcb\x76\x61" + "\x0a\x41\xb9\xe9\xc0\x76\x54\xab" + "\x04\x49\x3b\x19\x93\x57\x25\x5d", + .plen = 32, + .ctext = "\xc9\xae\xef\x1d\xf3\x2c\xd3\x38" + "\xc9\x7f\x7e\x28\xe8\xaa\xb3\x60" + "\x49\xdc\x66\xca\x7b\x3d\xe0\x3c" + "\xcb\x45\x9c\x1b\xb2\xbe\x07\x90" + "\x87\xa6\x6b\x89\x0d\x0f\x90\xaa" + "\x7d\xf6\x5a\x9a\x68\x2b\x81\x92", + .clen = 48, + }, { + .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 16, + .iv = "\x02\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff\xff\x00\x00\x00", + .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88" + "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b" + "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b" + "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe" + "\xc8\xf3\x5c\x52\x10\x63", + .alen = 38, + .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5", + .plen = 719, + .ctext = "\xc5\x50\x85\x02\x72\xa8\xb3\x62" + "\xf9\xcd\x77\x7b\x43\xa5\x04\x70" + "\x68\x40\x57\x21\x1c\xfe\xef\x05" + "\x4d\xb8\x44\xba\x59\xea\x62\x32" + "\xcb\x6b\x6a\x39\x9b\xf3\xe5\xa4" + "\x36\x38\xde\x7d\xcf\xb6\xcd\xe3" + "\x89\xbf\x37\xc9\x96\x3c\x70\x10" + "\x92\x47\xcc\xac\x6f\xf8\x55\x9a" + "\x26\x43\x34\xb4\x92\x7d\x68\xfc" + "\x60\x37\x74\x2a\x55\xba\xc7\xd7" + "\x98\x69\xb7\xcf\x42\xfd\xb2\x10" + "\xa0\x59\xe1\x2c\x73\x66\x12\x97" + "\x85\x8b\x28\xcc\x29\x02\x15\x89" + "\x23\xd3\x32\x92\x87\x57\x09\x13" + "\x04\x7e\x8b\x6c\x3a\xc1\x4e\x6c" + "\xe1\x9f\xc8\xcc\x47\x9c\xd8\x10" + "\xf4\xb7\x5c\x30\x7a\x8b\x0f\x01" + "\x52\x38\x02\x92\x99\xac\x03\x90" + "\x18\x32\x2d\x21\x6a\x0a\x2a\xe7" + "\xc2\xcc\x15\x84\x4e\x2b\x0b\x3a" + "\x4c\xdc\xb0\x6b\x10\xd1\x27\x10" + "\xf0\x4a\x5c\x43\xa0\x34\x34\x59" + "\x47\x43\x48\xcb\x69\xa7\xff\x52" + "\xb8\xca\x23\x09\x07\xd7\xc5\xe4" + "\x2a\x4f\x99\xd5\x83\x36\x2a\x2d" + "\x59\xd0\xca\xb0\xfa\x40\x8c\xab" + "\xdf\x69\x08\xd9\x79\x1d\xde\xa8" + "\x0b\x34\x74\x4d\xf5\xa0\x4c\x81" + "\x7f\x93\x06\x40\x24\xfe\x7d\xcd" + "\xe4\xfe\xf8\xf8\x30\xce\xd0\x5d" + "\x70\xfd\x0d\x5a\x78\x85\x74\x2d" + "\xe4\xb5\x40\x18\x99\x11\xe4\x6a" + "\xdf\xfa\x4f\x25\x2c\xde\x15\xb7" + "\x12\xd8\xc6\x90\x0d\x0f\xc9\xfb" + "\x21\xf1\xed\xfe\x98\xe1\x03\xe2" + "\x5c\xef\xb6\xc7\x87\x77\x0e\xcd" + "\xff\x78\x94\xc9\xbe\xd3\x47\xf7" + "\x8d\x37\x48\x01\x42\xe2\x17\x96" + "\xfc\xc0\xcb\x7b\x7b\x57\xaf\x3b" + "\xc9\xd0\x94\xce\x5e\x1b\xa9\x47" + "\x02\x4d\x74\xcc\x45\x1d\xd3\x2d" + "\x5f\x4f\x7f\xf2\x4b\xf9\x59\xee" + "\x9e\x9e\xb9\x95\x29\x19\xd1\x5f" + "\x72\xab\x8d\xf1\x28\xd1\x1c\xae" + "\xc2\xba\xf7\x22\x84\x2c\x83\x51" + "\x03\xad\xa3\xef\x81\xa7\xdc\xf1" + "\x44\x51\x50\x96\x70\xd1\xe5\x47" + "\x57\xf9\x30\x90\xe4\xbf\xfc\x75" + "\x14\xaa\x4d\xb7\xb1\xe7\x79\x33" + "\x43\xc2\x5c\xc1\xbc\x09\x92\x0f" + "\xa7\xaf\x68\x51\x51\xec\x0b\xc3" + "\x3d\x2b\x94\x30\x45\x29\x1b\x9e" + "\x70\x56\xf8\xd6\x67\x2d\x39\x3b" + "\x3c\xd2\xd0\xd3\xdc\x7d\x84\xe9" + "\x06\x31\x98\xa6\x5c\xbf\x10\x58" + "\xce\xbb\xa7\xe1\x65\x7e\x51\x87" + "\x70\x46\xb4\x7f\xf9\xec\x92\x1c" + "\x9b\x24\x49\xc1\x04\xbe\x1c\x5f" + "\xcc\xb3\x33\x8c\xad\xe7\xdc\x32" + "\x54\xa2\x0d\x83\x0f\x3c\x12\x5d" + "\x71\xe3\x9c\xae\x71\xa3\x2a\x10" + "\xc5\x91\xb4\x73\x96\x60\xdb\x5d" + "\x1f\xd5\x9a\xd2\x69\xc3\xd7\x4b" + "\xa2\x66\x81\x96\x4a\xaa\x02\xd6" + "\xd5\x44\x9b\x42\x3a\x15\x5f\xe7" + "\x4d\x7c\xf6\x71\x4a\xea\xe8\x43" + "\xd7\x68\xe4\xbc\x05\x87\x49\x05" + "\x3b\x47\xb2\x6d\x5f\xd1\x11\xa6" + "\x58\xd4\xa2\x45\xec\xb5\x54\x55" + "\xd3\xd6\xd2\x6a\x8b\x21\x9e\x2c" + "\xf1\x27\x4b\x5b\xe3\xff\xe0\xfd" + "\x4b\xf1\xe7\xe2\x84\xf2\x17\x37" + "\x11\x68\xc4\x92\x4b\x6b\xef\x8e" + "\x75\xf5\xc2\x7d\x5c\xe9\x7c\xfc" + "\x2b\x00\x33\x0e\x7d\x69\xd8\xd4" + "\x9b\xa8\x38\x54\x7e\x6d\x23\x51" + "\x2c\xd6\xc4\x58\x23\x1c\x22\x2a" + "\x59\xc5\x9b\xec\x9d\xbf\x03\x0f" + "\xb3\xdd\xba\x02\x22\xa0\x34\x37" + "\x19\x56\xc2\x5b\x32\x1d\x1e\x66" + "\x68\xf4\x47\x05\x04\x18\xa7\x28" + "\x80\xf2\xc7\x99\xed\x1e\x72\x48" + "\x8f\x97\x5d\xb3\x74\x42\xfd\x0c" + "\x0f\x5f\x29\x0c\xf1\x35\x22\x90" + "\xd6\x7c\xb8\xa3\x2a\x89\x38\x71" + "\xe9\x7a\x55\x3c\x3b\xf2\x6e\x1a" + "\x22\x8f\x07\x81\xc1\xe1\xf1\x76" + "\x2a\x75\xab\x86\xc4\xcc\x52\x59" + "\x83\x19\x5e\xb3\x53\xe2\x81\xdf" + "\xe6\x15\xb3\xba\x0c\x0e\xba" + "\xa9\x2c\xed\x51\xd5\x06\xc8\xc6" + "\x4b\x9f\x5d\x1b\x61\x31\xad\xf4", + .clen = 735, } }; @@ -15030,6 +15945,68 @@ static const struct hash_testvec sm4_cmac128_tv_template[] = { } }; +static const struct hash_testvec sm4_xcbc128_tv_template[] = { + { /* Generated from AES-XCBC128 test vectors */ + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = zeroed_string, + .digest = "\xa9\x9a\x5c\x44\xe2\x34\xee\x2c" + "\x9b\xe4\x9d\xca\x64\xb0\xa5\xc4", + .psize = 0, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02", + .digest = "\x17\x27\x62\xf3\x8b\x88\x1d\xc0" + "\x97\x35\x9c\x3e\x9f\x27\xb7\x83", + .psize = 3, + .ksize = 16, + } , { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .digest = "\xda\x45\xd1\xac\xec\x4d\xab\x46" + "\xdd\x59\xe0\x44\xff\x59\xd5\xfc", + .psize = 16, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13", + .digest = "\xbe\x24\x5d\x81\x8c\x8a\x10\xa4" + "\x8e\xc2\x16\xfa\xa4\x83\xc9\x2a", + .psize = 20, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .digest = "\x91\x82\x31\x56\xd5\x77\xa4\xc5" + "\x88\x2d\xce\x3a\x87\x5e\xbd\xba", + .psize = 32, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21", + .digest = "\x2a\xae\xa5\x24\x0c\x12\x9f\x5f" + "\x55\xfb\xae\x35\x13\x0d\x22\x2d", + .psize = 34, + .ksize = 16, + } +}; + /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_tv_template[] = { { From 3c3836378dd578a0d510420c0f67818e8dc49f0e Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:57 +0800 Subject: [PATCH 0902/4122] crypto: tcrypt - add SM4 cts-cbc/xts/xcbc test Added CTS-CBC/XTS/XCBC tests for SM4 algorithms, as well as corresponding speed tests, this is to test performance-optimized implementations of these modes. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index b096ae901aa8..0f101897e90f 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1710,6 +1710,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret = min(ret, tcrypt_test("gcm(aria)")); break; + case 59: + ret = min(ret, tcrypt_test("cts(cbc(sm4))")); + break; + case 100: ret = min(ret, tcrypt_test("hmac(md5)")); break; @@ -1810,6 +1814,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret = min(ret, tcrypt_test("cmac(sm4)")); break; + case 160: + ret = min(ret, tcrypt_test("xcbc(sm4)")); + break; + case 181: ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))")); break; @@ -1845,6 +1853,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret = min(ret, tcrypt_test("cbc(sm4)")); ret = min(ret, tcrypt_test("cfb(sm4)")); ret = min(ret, tcrypt_test("ctr(sm4)")); + ret = min(ret, tcrypt_test("xts(sm4)")); break; case 192: ret = min(ret, tcrypt_test("ecb(aria)")); @@ -2108,6 +2117,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("cts(cbc(sm4))", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_cipher_speed("cts(cbc(sm4))", DECRYPT, sec, NULL, 0, + speed_template_16); test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, speed_template_16); test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, @@ -2116,6 +2129,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_32); + test_cipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0, + speed_template_32); break; case 219: @@ -2629,6 +2646,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_acipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_32); + test_acipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0, + speed_template_32); break; case 519: From ce41fefd2443c25166458f24621b53a28fff989f Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:58 +0800 Subject: [PATCH 0903/4122] crypto: arm64/sm4 - refactor and simplify CE implementation This patch does not add new features, but only refactors and simplifies the implementation of the Crypto Extension acceleration of the SM4 algorithm: Extract the macro optimized by SM4 Crypto Extension for reuse in the subsequent optimization of CCM/GCM modes. Encryption in CBC and CFB modes processes four blocks at a time instead of one, allowing the ld1 instruction to load 64 bytes of data at a time, which will reduces unnecessary memory accesses. CBC/CFB/CTR makes full use of free registers to reduce redundant memory accesses, and rearranges some instructions to improve out-of-order execution capabilities. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-asm.h | 209 +++++++++++ arch/arm64/crypto/sm4-ce-core.S | 636 ++++++++++++++------------------ arch/arm64/crypto/sm4-ce-glue.c | 66 ++-- 3 files changed, 515 insertions(+), 396 deletions(-) create mode 100644 arch/arm64/crypto/sm4-ce-asm.h diff --git a/arch/arm64/crypto/sm4-ce-asm.h b/arch/arm64/crypto/sm4-ce-asm.h new file mode 100644 index 000000000000..7ea98e42e779 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-asm.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 helper macros for Crypto Extensions + * Copyright (C) 2022 Tianjia Zhang + */ + +#define SM4_PREPARE(ptr) \ + ld1 {v24.16b-v27.16b}, [ptr], #64; \ + ld1 {v28.16b-v31.16b}, [ptr]; + +#define SM4_CRYPT_BLK_BE(b0) \ + sm4e b0.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_BLK(b0) \ + rev32 b0.16b, b0.16b; \ + SM4_CRYPT_BLK_BE(b0); + +#define SM4_CRYPT_BLK2_BE(b0, b1) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + +#define SM4_CRYPT_BLK2(b0, b1) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + SM4_CRYPT_BLK2_BE(b0, b1); + +#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + SM4_CRYPT_BLK4_BE(b0, b1, b2, b3); + +#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b4.4s, v24.4s; \ + sm4e b5.4s, v24.4s; \ + sm4e b6.4s, v24.4s; \ + sm4e b7.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b4.4s, v25.4s; \ + sm4e b5.4s, v25.4s; \ + sm4e b6.4s, v25.4s; \ + sm4e b7.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b4.4s, v26.4s; \ + sm4e b5.4s, v26.4s; \ + sm4e b6.4s, v26.4s; \ + sm4e b7.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b4.4s, v27.4s; \ + sm4e b5.4s, v27.4s; \ + sm4e b6.4s, v27.4s; \ + sm4e b7.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b4.4s, v28.4s; \ + sm4e b5.4s, v28.4s; \ + sm4e b6.4s, v28.4s; \ + sm4e b7.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b4.4s, v29.4s; \ + sm4e b5.4s, v29.4s; \ + sm4e b6.4s, v29.4s; \ + sm4e b7.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b4.4s, v30.4s; \ + sm4e b5.4s, v30.4s; \ + sm4e b6.4s, v30.4s; \ + sm4e b7.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + sm4e b4.4s, v31.4s; \ + sm4e b5.4s, v31.4s; \ + sm4e b6.4s, v31.4s; \ + sm4e b7.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + rev64 b4.4s, b4.4s; \ + rev64 b5.4s, b5.4s; \ + rev64 b6.4s, b6.4s; \ + rev64 b7.4s, b7.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + ext b4.16b, b4.16b, b4.16b, #8; \ + ext b5.16b, b5.16b, b5.16b, #8; \ + ext b6.16b, b6.16b, b6.16b, #8; \ + ext b7.16b, b7.16b, b7.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7); diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 934e0f093279..41fc745a8528 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -10,10 +10,12 @@ #include #include +#include "sm4-ce-asm.h" .arch armv8-a+crypto -.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31 +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ + 20, 24, 25, 26, 27, 28, 29, 30, 31 .set .Lv\b\().4s, \b .endr @@ -34,174 +36,6 @@ #define RIV v20 -/* Helper macros. */ - -#define PREPARE \ - ld1 {v24.16b-v27.16b}, [x0], #64; \ - ld1 {v28.16b-v31.16b}, [x0]; - -#define SM4_CRYPT_BLK(b0) \ - rev32 b0.16b, b0.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - rev32 b0.16b, b0.16b; - -#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b1.4s, v24.4s; \ - sm4e b2.4s, v24.4s; \ - sm4e b3.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b1.4s, v25.4s; \ - sm4e b2.4s, v25.4s; \ - sm4e b3.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b1.4s, v26.4s; \ - sm4e b2.4s, v26.4s; \ - sm4e b3.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b1.4s, v27.4s; \ - sm4e b2.4s, v27.4s; \ - sm4e b3.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b1.4s, v28.4s; \ - sm4e b2.4s, v28.4s; \ - sm4e b3.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b1.4s, v29.4s; \ - sm4e b2.4s, v29.4s; \ - sm4e b3.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b1.4s, v30.4s; \ - sm4e b2.4s, v30.4s; \ - sm4e b3.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - sm4e b1.4s, v31.4s; \ - sm4e b2.4s, v31.4s; \ - sm4e b3.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - rev64 b1.4s, b1.4s; \ - rev64 b2.4s, b2.4s; \ - rev64 b3.4s, b3.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - ext b1.16b, b1.16b, b1.16b, #8; \ - ext b2.16b, b2.16b, b2.16b, #8; \ - ext b3.16b, b3.16b, b3.16b, #8; \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; - -#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - rev32 b4.16b, b4.16b; \ - rev32 b5.16b, b5.16b; \ - rev32 b6.16b, b6.16b; \ - rev32 b7.16b, b7.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b1.4s, v24.4s; \ - sm4e b2.4s, v24.4s; \ - sm4e b3.4s, v24.4s; \ - sm4e b4.4s, v24.4s; \ - sm4e b5.4s, v24.4s; \ - sm4e b6.4s, v24.4s; \ - sm4e b7.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b1.4s, v25.4s; \ - sm4e b2.4s, v25.4s; \ - sm4e b3.4s, v25.4s; \ - sm4e b4.4s, v25.4s; \ - sm4e b5.4s, v25.4s; \ - sm4e b6.4s, v25.4s; \ - sm4e b7.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b1.4s, v26.4s; \ - sm4e b2.4s, v26.4s; \ - sm4e b3.4s, v26.4s; \ - sm4e b4.4s, v26.4s; \ - sm4e b5.4s, v26.4s; \ - sm4e b6.4s, v26.4s; \ - sm4e b7.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b1.4s, v27.4s; \ - sm4e b2.4s, v27.4s; \ - sm4e b3.4s, v27.4s; \ - sm4e b4.4s, v27.4s; \ - sm4e b5.4s, v27.4s; \ - sm4e b6.4s, v27.4s; \ - sm4e b7.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b1.4s, v28.4s; \ - sm4e b2.4s, v28.4s; \ - sm4e b3.4s, v28.4s; \ - sm4e b4.4s, v28.4s; \ - sm4e b5.4s, v28.4s; \ - sm4e b6.4s, v28.4s; \ - sm4e b7.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b1.4s, v29.4s; \ - sm4e b2.4s, v29.4s; \ - sm4e b3.4s, v29.4s; \ - sm4e b4.4s, v29.4s; \ - sm4e b5.4s, v29.4s; \ - sm4e b6.4s, v29.4s; \ - sm4e b7.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b1.4s, v30.4s; \ - sm4e b2.4s, v30.4s; \ - sm4e b3.4s, v30.4s; \ - sm4e b4.4s, v30.4s; \ - sm4e b5.4s, v30.4s; \ - sm4e b6.4s, v30.4s; \ - sm4e b7.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - sm4e b1.4s, v31.4s; \ - sm4e b2.4s, v31.4s; \ - sm4e b3.4s, v31.4s; \ - sm4e b4.4s, v31.4s; \ - sm4e b5.4s, v31.4s; \ - sm4e b6.4s, v31.4s; \ - sm4e b7.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - rev64 b1.4s, b1.4s; \ - rev64 b2.4s, b2.4s; \ - rev64 b3.4s, b3.4s; \ - rev64 b4.4s, b4.4s; \ - rev64 b5.4s, b5.4s; \ - rev64 b6.4s, b6.4s; \ - rev64 b7.4s, b7.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - ext b1.16b, b1.16b, b1.16b, #8; \ - ext b2.16b, b2.16b, b2.16b, #8; \ - ext b3.16b, b3.16b, b3.16b, #8; \ - ext b4.16b, b4.16b, b4.16b, #8; \ - ext b5.16b, b5.16b, b5.16b, #8; \ - ext b6.16b, b6.16b, b6.16b, #8; \ - ext b7.16b, b7.16b, b7.16b, #8; \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - rev32 b4.16b, b4.16b; \ - rev32 b5.16b, b5.16b; \ - rev32 b6.16b, b6.16b; \ - rev32 b7.16b, b7.16b; - .align 3 SYM_FUNC_START(sm4_ce_expand_key) @@ -268,7 +102,7 @@ SYM_FUNC_START(sm4_ce_crypt_block) * x1: dst * x2: src */ - PREPARE; + SM4_PREPARE(x0) ld1 {v0.16b}, [x2]; SM4_CRYPT_BLK(v0); @@ -285,7 +119,7 @@ SYM_FUNC_START(sm4_ce_crypt) * x2: src * w3: nblocks */ - PREPARE; + SM4_PREPARE(x0) .Lcrypt_loop_blk: sub w3, w3, #8; @@ -337,26 +171,50 @@ SYM_FUNC_START(sm4_ce_cbc_enc) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {RIV.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcbc_enc_loop: - sub w4, w4, #1; +.Lcbc_enc_loop_4x: + cmp w4, #4 + blt .Lcbc_enc_loop_1x - ld1 {RTMP0.16b}, [x2], #16; - eor RIV.16b, RIV.16b, RTMP0.16b; + sub w4, w4, #4 - SM4_CRYPT_BLK(RIV); + ld1 {v0.16b-v3.16b}, [x2], #64 - st1 {RIV.16b}, [x1], #16; + eor v0.16b, v0.16b, RIV.16b + SM4_CRYPT_BLK(v0) + eor v1.16b, v1.16b, v0.16b + SM4_CRYPT_BLK(v1) + eor v2.16b, v2.16b, v1.16b + SM4_CRYPT_BLK(v2) + eor v3.16b, v3.16b, v2.16b + SM4_CRYPT_BLK(v3) - cbnz w4, .Lcbc_enc_loop; + st1 {v0.16b-v3.16b}, [x1], #64 + mov RIV.16b, v3.16b + cbz w4, .Lcbc_enc_end + b .Lcbc_enc_loop_4x + +.Lcbc_enc_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + + eor RIV.16b, RIV.16b, v0.16b + SM4_CRYPT_BLK(RIV) + + st1 {RIV.16b}, [x1], #16 + + cbnz w4, .Lcbc_enc_loop_1x + +.Lcbc_enc_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cbc_enc) .align 3 @@ -368,79 +226,93 @@ SYM_FUNC_START(sm4_ce_cbc_dec) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {RIV.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcbc_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lcbc_tail8; +.Lcbc_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcbc_dec_4x - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2]; + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + rev32 v8.16b, v0.16b + rev32 v9.16b, v1.16b + rev32 v10.16b, v2.16b + rev32 v11.16b, v3.16b + rev32 v12.16b, v4.16b + rev32 v13.16b, v5.16b + rev32 v14.16b, v6.16b + rev32 v15.16b, v7.16b - sub x2, x2, #64; - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) - eor v4.16b, v4.16b, RTMP3.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v5.16b, v5.16b, RTMP0.16b; - eor v6.16b, v6.16b, RTMP1.16b; - eor v7.16b, v7.16b, RTMP2.16b; + eor v8.16b, v8.16b, RIV.16b + eor v9.16b, v9.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + eor v11.16b, v11.16b, v2.16b + eor v12.16b, v12.16b, v3.16b + eor v13.16b, v13.16b, v4.16b + eor v14.16b, v14.16b, v5.16b + eor v15.16b, v15.16b, v6.16b - mov RIV.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + st1 {v8.16b-v11.16b}, [x1], #64 + st1 {v12.16b-v15.16b}, [x1], #64 - cbz w4, .Lcbc_end; - b .Lcbc_loop_blk; + mov RIV.16b, v7.16b -.Lcbc_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lcbc_tail4; + cbz w4, .Lcbc_dec_end + b .Lcbc_dec_loop_8x - sub w4, w4, #4; +.Lcbc_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcbc_dec_loop_1x - ld1 {v0.16b-v3.16b}, [x2]; + sub w4, w4, #4 - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v0.16b-v3.16b}, [x2], #64 - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; + rev32 v8.16b, v0.16b + rev32 v9.16b, v1.16b + rev32 v10.16b, v2.16b + rev32 v11.16b, v3.16b - mov RIV.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) - cbz w4, .Lcbc_end; + eor v8.16b, v8.16b, RIV.16b + eor v9.16b, v9.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + eor v11.16b, v11.16b, v2.16b -.Lcbc_tail4: - sub w4, w4, #1; + st1 {v8.16b-v11.16b}, [x1], #64 - ld1 {v0.16b}, [x2]; + mov RIV.16b, v3.16b - SM4_CRYPT_BLK(v0); + cbz w4, .Lcbc_dec_end - eor v0.16b, v0.16b, RIV.16b; - ld1 {RIV.16b}, [x2], #16; - st1 {v0.16b}, [x1], #16; +.Lcbc_dec_loop_1x: + sub w4, w4, #1 - cbnz w4, .Lcbc_tail4; + ld1 {v0.16b}, [x2], #16 -.Lcbc_end: + rev32 v8.16b, v0.16b + + SM4_CRYPT_BLK_BE(v8) + + eor v8.16b, v8.16b, RIV.16b + st1 {v8.16b}, [x1], #16 + + mov RIV.16b, v0.16b + + cbnz w4, .Lcbc_dec_loop_1x + +.Lcbc_dec_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cbc_dec) .align 3 @@ -452,25 +324,57 @@ SYM_FUNC_START(sm4_ce_cfb_enc) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {RIV.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcfb_enc_loop: - sub w4, w4, #1; +.Lcfb_enc_loop_4x: + cmp w4, #4 + blt .Lcfb_enc_loop_1x - SM4_CRYPT_BLK(RIV); + sub w4, w4, #4 - ld1 {RTMP0.16b}, [x2], #16; - eor RIV.16b, RIV.16b, RTMP0.16b; - st1 {RIV.16b}, [x1], #16; + ld1 {v0.16b-v3.16b}, [x2], #64 - cbnz w4, .Lcfb_enc_loop; + rev32 v8.16b, RIV.16b + SM4_CRYPT_BLK_BE(v8) + eor v0.16b, v0.16b, v8.16b + rev32 v8.16b, v0.16b + SM4_CRYPT_BLK_BE(v8) + eor v1.16b, v1.16b, v8.16b + + rev32 v8.16b, v1.16b + SM4_CRYPT_BLK_BE(v8) + eor v2.16b, v2.16b, v8.16b + + rev32 v8.16b, v2.16b + SM4_CRYPT_BLK_BE(v8) + eor v3.16b, v3.16b, v8.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + mov RIV.16b, v3.16b + + cbz w4, .Lcfb_enc_end + b .Lcfb_enc_loop_4x + +.Lcfb_enc_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK(RIV) + eor RIV.16b, RIV.16b, v0.16b + + st1 {RIV.16b}, [x1], #16 + + cbnz w4, .Lcfb_enc_loop_1x + +.Lcfb_enc_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cfb_enc) .align 3 @@ -482,79 +386,91 @@ SYM_FUNC_START(sm4_ce_cfb_dec) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {v0.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcfb_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lcfb_tail8; +.Lcfb_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcfb_dec_4x - ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; - ld1 {v4.16b-v7.16b}, [x2]; + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + rev32 v8.16b, RIV.16b + rev32 v9.16b, v0.16b + rev32 v10.16b, v1.16b + rev32 v11.16b, v2.16b + rev32 v12.16b, v3.16b + rev32 v13.16b, v4.16b + rev32 v14.16b, v5.16b + rev32 v15.16b, v6.16b - sub x2, x2, #48; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + mov RIV.16b, v7.16b - mov v0.16b, RTMP3.16b; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b - cbz w4, .Lcfb_end; - b .Lcfb_loop_blk; + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 -.Lcfb_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lcfb_tail4; + cbz w4, .Lcfb_dec_end + b .Lcfb_dec_loop_8x - sub w4, w4, #4; +.Lcfb_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcfb_dec_loop_1x - ld1 {v1.16b, v2.16b, v3.16b}, [x2]; + sub w4, w4, #4 - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v0.16b-v3.16b}, [x2], #64 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + rev32 v8.16b, RIV.16b + rev32 v9.16b, v0.16b + rev32 v10.16b, v1.16b + rev32 v11.16b, v2.16b - mov v0.16b, RTMP3.16b; + SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) - cbz w4, .Lcfb_end; + mov RIV.16b, v3.16b -.Lcfb_tail4: - sub w4, w4, #1; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b - SM4_CRYPT_BLK(v0); + st1 {v0.16b-v3.16b}, [x1], #64 - ld1 {RTMP0.16b}, [x2], #16; - eor v0.16b, v0.16b, RTMP0.16b; - st1 {v0.16b}, [x1], #16; + cbz w4, .Lcfb_dec_end - mov v0.16b, RTMP0.16b; +.Lcfb_dec_loop_1x: + sub w4, w4, #1 - cbnz w4, .Lcfb_tail4; + ld1 {v0.16b}, [x2], #16 -.Lcfb_end: + SM4_CRYPT_BLK(RIV) + + eor RIV.16b, RIV.16b, v0.16b + st1 {RIV.16b}, [x1], #16 + + mov RIV.16b, v0.16b + + cbnz w4, .Lcfb_dec_loop_1x + +.Lcfb_dec_end: /* store new IV */ - st1 {v0.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cfb_dec) .align 3 @@ -566,95 +482,99 @@ SYM_FUNC_START(sm4_ce_ctr_enc) * x3: ctr (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ldp x7, x8, [x3]; - rev x7, x7; - rev x8, x8; + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 -.Lctr_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lctr_tail8; +.Lctr_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lctr_4x -#define inc_le128(vctr) \ - mov vctr.d[1], x8; \ - mov vctr.d[0], x7; \ - adds x8, x8, #1; \ - adc x7, x7, xzr; \ - rev64 vctr.16b, vctr.16b; +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ - inc_le128(v4); /* +4 */ - inc_le128(v5); /* +5 */ - inc_le128(v6); /* +6 */ - inc_le128(v7); /* +7 */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + inc_le128(v4) /* +4 */ + inc_le128(v5) /* +5 */ + inc_le128(v6) /* +6 */ + inc_le128(v7) /* +7 */ - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + ld1 {v8.16b-v11.16b}, [x2], #64 + ld1 {v12.16b-v15.16b}, [x2], #64 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b - cbz w4, .Lctr_end; - b .Lctr_loop_blk; + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 -.Lctr_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lctr_tail4; + cbz w4, .Lctr_end + b .Lctr_loop_8x - sub w4, w4, #4; +.Lctr_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lctr_loop_1x + + sub w4, w4, #4 /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v8.16b-v11.16b}, [x2], #64 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK4(v0, v1, v2, v3) - cbz w4, .Lctr_end; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b -.Lctr_tail4: - sub w4, w4, #1; + st1 {v0.16b-v3.16b}, [x1], #64 + + cbz w4, .Lctr_end + +.Lctr_loop_1x: + sub w4, w4, #1 /* construct CTRs */ - inc_le128(v0); + inc_le128(v0) - SM4_CRYPT_BLK(v0); + ld1 {v8.16b}, [x2], #16 - ld1 {RTMP0.16b}, [x2], #16; - eor v0.16b, v0.16b, RTMP0.16b; - st1 {v0.16b}, [x1], #16; + SM4_CRYPT_BLK(v0) - cbnz w4, .Lctr_tail4; + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 + + cbnz w4, .Lctr_loop_1x .Lctr_end: /* store new CTR */ - rev x7, x7; - rev x8, x8; - stp x7, x8, [x3]; + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_ctr_enc) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 496d55c0d01a..e56e81b1f35f 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -26,9 +26,9 @@ asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src, unsigned int nblks); asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); + u8 *iv, unsigned int nblocks); asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); + u8 *iv, unsigned int nblocks); asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, @@ -94,66 +94,56 @@ static int sm4_ecb_decrypt(struct skcipher_request *req) return sm4_ecb_do_crypt(req, ctx->rkey_dec); } -static int sm4_cbc_encrypt(struct skcipher_request *req) +static int sm4_cbc_crypt(struct skcipher_request *req, + struct sm4_ctx *ctx, bool encrypt) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; + if (encrypt) + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + else + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); + + kernel_neon_end(); } - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; } +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_cbc_crypt(req, ctx, true); +} + static int sm4_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) > 0) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - unsigned int nblks; - - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); - } - - return err; + return sm4_cbc_crypt(req, ctx, false); } static int sm4_cfb_encrypt(struct skcipher_request *req) From cb9ba02b07d18172c6a6dcc69410c56482903230 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:54:59 +0800 Subject: [PATCH 0904/4122] crypto: arm64/sm4 - simplify sm4_ce_expand_key() of CE implementation Use a 128-bit swap mask and tbl instruction to simplify the implementation for generating SM4 rkey_dec. Also fixed the issue of not being wrapped by kernel_neon_begin/end() when using the sm4_ce_expand_key() function. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-core.S | 46 ++++++++++++++++----------------- arch/arm64/crypto/sm4-ce-glue.c | 2 ++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 41fc745a8528..9e4b4f01cdf3 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -65,32 +65,23 @@ SYM_FUNC_START(sm4_ce_expand_key) sm4ekey v6.4s, v5.4s, v30.4s; sm4ekey v7.4s, v6.4s, v31.4s; + adr_l x5, .Lbswap128_mask + ld1 {v24.16b}, [x5] + st1 {v0.16b-v3.16b}, [x1], #64; st1 {v4.16b-v7.16b}, [x1]; - rev64 v7.4s, v7.4s; - rev64 v6.4s, v6.4s; - rev64 v5.4s, v5.4s; - rev64 v4.4s, v4.4s; - rev64 v3.4s, v3.4s; - rev64 v2.4s, v2.4s; - rev64 v1.4s, v1.4s; - rev64 v0.4s, v0.4s; - ext v7.16b, v7.16b, v7.16b, #8; - ext v6.16b, v6.16b, v6.16b, #8; - ext v5.16b, v5.16b, v5.16b, #8; - ext v4.16b, v4.16b, v4.16b, #8; - ext v3.16b, v3.16b, v3.16b, #8; - ext v2.16b, v2.16b, v2.16b, #8; - ext v1.16b, v1.16b, v1.16b, #8; - ext v0.16b, v0.16b, v0.16b, #8; - st1 {v7.16b}, [x2], #16; - st1 {v6.16b}, [x2], #16; - st1 {v5.16b}, [x2], #16; - st1 {v4.16b}, [x2], #16; - st1 {v3.16b}, [x2], #16; - st1 {v2.16b}, [x2], #16; - st1 {v1.16b}, [x2], #16; - st1 {v0.16b}, [x2]; + + tbl v16.16b, {v7.16b}, v24.16b + tbl v17.16b, {v6.16b}, v24.16b + tbl v18.16b, {v5.16b}, v24.16b + tbl v19.16b, {v4.16b}, v24.16b + tbl v20.16b, {v3.16b}, v24.16b + tbl v21.16b, {v2.16b}, v24.16b + tbl v22.16b, {v1.16b}, v24.16b + tbl v23.16b, {v0.16b}, v24.16b + + st1 {v16.16b-v19.16b}, [x2], #64 + st1 {v20.16b-v23.16b}, [x2] ret; SYM_FUNC_END(sm4_ce_expand_key) @@ -578,3 +569,10 @@ SYM_FUNC_START(sm4_ce_ctr_enc) ret SYM_FUNC_END(sm4_ce_ctr_enc) + + + .section ".rodata", "a" + .align 4 +.Lbswap128_mask: + .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b + .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index e56e81b1f35f..ff2d8442d473 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -44,8 +44,10 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; + kernel_neon_begin(); sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); return 0; } From 45089dbe5952e9afbe2a3b3054105f2a694930f1 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:00 +0800 Subject: [PATCH 0905/4122] crypto: arm64/sm4 - export reusable CE acceleration functions In the accelerated implementation of the SM4 algorithm using the Crypto Extension instructions, there are some functions that can be reused in the upcoming accelerated implementation of the GCM/CCM mode, and the CBC/CFB encryption is reused in the optimized implementation of SVESM4. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-glue.c | 5 +++++ arch/arm64/crypto/sm4-ce.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 arch/arm64/crypto/sm4-ce.h diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index ff2d8442d473..63abcadc684b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -36,6 +36,11 @@ asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); +EXPORT_SYMBOL(sm4_ce_expand_key); +EXPORT_SYMBOL(sm4_ce_crypt_block); +EXPORT_SYMBOL(sm4_ce_cbc_enc); +EXPORT_SYMBOL(sm4_ce_cfb_enc); + static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { diff --git a/arch/arm64/crypto/sm4-ce.h b/arch/arm64/crypto/sm4-ce.h new file mode 100644 index 000000000000..109c21b37590 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 common functions for Crypto Extensions + * Copyright (C) 2022 Tianjia Zhang + */ + +void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec, + const u32 *fk, const u32 *ck); + +void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); + +void sm4_ce_cbc_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); + +void sm4_ce_cfb_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); From b1863fd0742f8da21f6f994e14e820db5831bd74 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:01 +0800 Subject: [PATCH 0906/4122] crypto: arm64/sm4 - add CE implementation for CTS-CBC mode This patch is a CE-optimized assembly implementation for CTS-CBC mode. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt, and compared the performance before and after this patch (the driver used before this patch is cts(cbc-sm4-ce)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before: cts(cbc-sm4-ce) | 16 64 128 256 1024 1420 4096 ----------------+-------------------------------------------------------------- CTS-CBC enc | 286.09 297.17 457.97 627.75 868.58 900.80 957.69 CTS-CBC dec | 286.67 285.63 538.35 947.08 2241.03 2577.32 3391.14 After: cts-cbc-sm4-ce | 16 64 128 256 1024 1420 4096 ----------------+-------------------------------------------------------------- CTS-CBC enc | 288.19 428.80 593.57 741.04 911.73 931.80 950.00 CTS-CBC dec | 292.22 468.99 838.23 1380.76 2741.17 3036.42 3409.62 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-core.S | 102 ++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-glue.c | 94 +++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 9e4b4f01cdf3..414d29f8110b 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -306,6 +306,100 @@ SYM_FUNC_START(sm4_ce_cbc_dec) ret SYM_FUNC_END(sm4_ce_cbc_dec) +.align 3 +SYM_FUNC_START(sm4_ce_cbc_cts_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nbytes + */ + SM4_PREPARE(x0) + + sub w5, w4, #16 + uxtw x5, w5 + + ld1 {RIV.16b}, [x3] + + ld1 {v0.16b}, [x2] + eor RIV.16b, RIV.16b, v0.16b + SM4_CRYPT_BLK(RIV) + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v0.16b, {RIV.16b}, v3.16b + /* padding Pn with zeros */ + tbl v1.16b, {v1.16b}, v4.16b + + eor v1.16b, v1.16b, RIV.16b + SM4_CRYPT_BLK(v1) + + /* overlapping stores */ + add x5, x1, x5 + st1 {v0.16b}, [x5] + st1 {v1.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_cbc_cts_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_cts_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nbytes + */ + SM4_PREPARE(x0) + + sub w5, w4, #16 + uxtw x5, w5 + + ld1 {RIV.16b}, [x3] + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + ld1 {v0.16b}, [x2], x5 + ld1 {v1.16b}, [x2] + + SM4_CRYPT_BLK(v0) + /* select the first Ln bytes of Xn to create Pn */ + tbl v2.16b, {v0.16b}, v3.16b + eor v2.16b, v2.16b, v1.16b + + /* overwrite the first Ln bytes with Cn to create En-1 */ + tbx v0.16b, {v1.16b}, v4.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, RIV.16b + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_cbc_cts_dec) + .align 3 SYM_FUNC_START(sm4_ce_cfb_enc) /* input: @@ -576,3 +670,11 @@ SYM_FUNC_END(sm4_ce_ctr_enc) .Lbswap128_mask: .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 + +.Lcts_permute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 63abcadc684b..4d4072c7bfa2 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define BYTES2BLKS(nbytes) ((nbytes) >> 4) @@ -29,6 +30,10 @@ asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblocks); asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblocks); +asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes); +asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes); asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, @@ -153,6 +158,78 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) return sm4_cbc_crypt(req, ctx, false); } +static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int cbc_blocks; + int err; + + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == SM4_BLOCK_SIZE) + return sm4_cbc_crypt(req, ctx, encrypt); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + /* handle the CBC cryption part */ + cbc_blocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2; + if (cbc_blocks) { + skcipher_request_set_crypt(&subreq, src, dst, + cbc_blocks * SM4_BLOCK_SIZE, + req->iv); + + err = sm4_cbc_crypt(&subreq, ctx, encrypt); + if (err) + return err; + + dst = src = scatterwalk_ffwd(sg_src, src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * SM4_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + else + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int sm4_cbc_cts_encrypt(struct skcipher_request *req) +{ + return sm4_cbc_cts_crypt(req, true); +} + +static int sm4_cbc_cts_decrypt(struct skcipher_request *req) +{ + return sm4_cbc_cts_crypt(req, false); +} + static int sm4_cfb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -342,6 +419,22 @@ static struct skcipher_alg sm4_algs[] = { .setkey = sm4_setkey, .encrypt = sm4_ctr_crypt, .decrypt = sm4_ctr_crypt, + }, { + .base = { + .cra_name = "cts(cbc(sm4))", + .cra_driver_name = "cts-cbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = SM4_BLOCK_SIZE * 2, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_cts_encrypt, + .decrypt = sm4_cbc_cts_decrypt, } }; @@ -365,5 +458,6 @@ MODULE_ALIAS_CRYPTO("ecb(sm4)"); MODULE_ALIAS_CRYPTO("cbc(sm4)"); MODULE_ALIAS_CRYPTO("cfb(sm4)"); MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_ALIAS_CRYPTO("cts(cbc(sm4))"); MODULE_AUTHOR("Tianjia Zhang "); MODULE_LICENSE("GPL v2"); From 01f633113b19534ab4f4e9cf72d8e72fb3568901 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:02 +0800 Subject: [PATCH 0907/4122] crypto: arm64/sm4 - add CE implementation for XTS mode This patch is a CE-optimized assembly implementation for XTS mode. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt, and compared the performance before and after this patch (the driver used before this patch is xts(ecb-sm4-ce)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before: xts(ecb-sm4-ce) | 16 64 128 256 1024 1420 4096 ----------------+-------------------------------------------------------------- XTS enc | 117.17 430.56 732.92 1134.98 2007.03 2136.23 2347.20 XTS dec | 116.89 429.02 733.40 1132.96 2006.13 2130.50 2347.92 After: xts-sm4-ce | 16 64 128 256 1024 1420 4096 ----------------+-------------------------------------------------------------- XTS enc | 224.68 798.91 1248.08 1714.60 2413.73 2467.84 2612.62 XTS dec | 229.85 791.34 1237.79 1720.00 2413.30 2473.84 2611.95 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 +- arch/arm64/crypto/sm4-ce-core.S | 343 ++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-glue.c | 159 ++++++++++++++- 3 files changed, 504 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4b121dc0cfba..8939f5ae9214 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -231,7 +231,7 @@ config CRYPTO_SM4_ARM64_CE - NEON (Advanced SIMD) extensions config CRYPTO_SM4_ARM64_CE_BLK - tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)" + tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR/XTS (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_SM4 @@ -242,6 +242,8 @@ config CRYPTO_SM4_ARM64_CE_BLK - CBC (Cipher Block Chaining) mode (NIST SP800-38A) - CFB (Cipher Feedback) mode (NIST SP800-38A) - CTR (Counter) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) Architecture: arm64 using: - ARMv8 Crypto Extensions diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 414d29f8110b..ddd15ec09d38 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -35,6 +35,7 @@ #define RTMP3 v19 #define RIV v20 +#define RMASK v21 .align 3 @@ -665,6 +666,348 @@ SYM_FUNC_START(sm4_ce_ctr_enc) SYM_FUNC_END(sm4_ce_ctr_enc) +#define tweak_next(vt, vin, RTMP) \ + sshr RTMP.2d, vin.2d, #63; \ + and RTMP.16b, RTMP.16b, RMASK.16b; \ + add vt.2d, vin.2d, vin.2d; \ + ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \ + eor vt.16b, vt.16b, RTMP.16b; + +.align 3 +SYM_FUNC_START(sm4_ce_xts_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: tweak (big endian, 128 bit) + * w4: nbytes + * x5: round key array for IV + */ + ld1 {v8.16b}, [x3] + + cbz x5, .Lxts_enc_nofirst + + SM4_PREPARE(x5) + + /* Generate first tweak */ + SM4_CRYPT_BLK(v8) + +.Lxts_enc_nofirst: + SM4_PREPARE(x0) + + ands w5, w4, #15 + lsr w4, w4, #4 + sub w6, w4, #1 + csel w4, w4, w6, eq + uxtw x5, w5 + + movi RMASK.2s, #0x1 + movi RTMP0.2s, #0x87 + uzp1 RMASK.4s, RMASK.4s, RTMP0.4s + + cbz w4, .Lxts_enc_cts + +.Lxts_enc_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lxts_enc_4x + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + tweak_next(v12, v11, RTMP3) + tweak_next(v13, v12, RTMP0) + tweak_next(v14, v13, RTMP1) + tweak_next(v15, v14, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + tweak_next(v8, v15, RTMP3) + + cbz w4, .Lxts_enc_cts + b .Lxts_enc_loop_8x + +.Lxts_enc_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lxts_enc_loop_1x + + sub w4, w4, #4 + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + tweak_next(v8, v11, RTMP3) + + cbz w4, .Lxts_enc_cts + +.Lxts_enc_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + eor v0.16b, v0.16b, v8.16b + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 + + tweak_next(v8, v8, RTMP0) + + cbnz w4, .Lxts_enc_loop_1x + +.Lxts_enc_cts: + cbz x5, .Lxts_enc_end + + /* cipher text stealing */ + + tweak_next(v9, v8, RTMP0) + ld1 {v0.16b}, [x2] + eor v0.16b, v0.16b, v8.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v8.16b + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v2.16b, {v0.16b}, v3.16b + /* padding Pn with En-1 at the end */ + tbx v0.16b, {v1.16b}, v4.16b + + eor v0.16b, v0.16b, v9.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v9.16b + + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + b .Lxts_enc_ret + +.Lxts_enc_end: + /* store new tweak */ + st1 {v8.16b}, [x3] + +.Lxts_enc_ret: + ret +SYM_FUNC_END(sm4_ce_xts_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_xts_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: tweak (big endian, 128 bit) + * w4: nbytes + * x5: round key array for IV + */ + ld1 {v8.16b}, [x3] + + cbz x5, .Lxts_dec_nofirst + + SM4_PREPARE(x5) + + /* Generate first tweak */ + SM4_CRYPT_BLK(v8) + +.Lxts_dec_nofirst: + SM4_PREPARE(x0) + + ands w5, w4, #15 + lsr w4, w4, #4 + sub w6, w4, #1 + csel w4, w4, w6, eq + uxtw x5, w5 + + movi RMASK.2s, #0x1 + movi RTMP0.2s, #0x87 + uzp1 RMASK.4s, RMASK.4s, RTMP0.4s + + cbz w4, .Lxts_dec_cts + +.Lxts_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lxts_dec_4x + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + tweak_next(v12, v11, RTMP3) + tweak_next(v13, v12, RTMP0) + tweak_next(v14, v13, RTMP1) + tweak_next(v15, v14, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + tweak_next(v8, v15, RTMP3) + + cbz w4, .Lxts_dec_cts + b .Lxts_dec_loop_8x + +.Lxts_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lxts_dec_loop_1x + + sub w4, w4, #4 + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + tweak_next(v8, v11, RTMP3) + + cbz w4, .Lxts_dec_cts + +.Lxts_dec_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + eor v0.16b, v0.16b, v8.16b + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 + + tweak_next(v8, v8, RTMP0) + + cbnz w4, .Lxts_dec_loop_1x + +.Lxts_dec_cts: + cbz x5, .Lxts_dec_end + + /* cipher text stealing */ + + tweak_next(v9, v8, RTMP0) + ld1 {v0.16b}, [x2] + eor v0.16b, v0.16b, v9.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v9.16b + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v2.16b, {v0.16b}, v3.16b + /* padding Pn with En-1 at the end */ + tbx v0.16b, {v1.16b}, v4.16b + + eor v0.16b, v0.16b, v8.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v8.16b + + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + b .Lxts_dec_ret + +.Lxts_dec_end: + /* store new tweak */ + st1 {v8.16b}, [x3] + +.Lxts_dec_ret: + ret +SYM_FUNC_END(sm4_ce_xts_dec) + + .section ".rodata", "a" .align 4 .Lbswap128_mask: diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 4d4072c7bfa2..8222766f712a 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #define BYTES2BLKS(nbytes) ((nbytes) >> 4) @@ -40,12 +41,23 @@ asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src, + u8 *tweak, unsigned int nbytes, + const u32 *rkey2_enc); +asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src, + u8 *tweak, unsigned int nbytes, + const u32 *rkey2_enc); EXPORT_SYMBOL(sm4_ce_expand_key); EXPORT_SYMBOL(sm4_ce_crypt_block); EXPORT_SYMBOL(sm4_ce_cbc_enc); EXPORT_SYMBOL(sm4_ce_cfb_enc); +struct sm4_xts_ctx { + struct sm4_ctx key1; + struct sm4_ctx key2; +}; + static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { @@ -61,6 +73,29 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, return 0; } +static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int ret; + + if (key_len != SM4_KEY_SIZE * 2) + return -EINVAL; + + ret = xts_verify_key(tfm, key, key_len); + if (ret) + return ret; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->key1.rkey_enc, + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + + return 0; +} + static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) { struct skcipher_walk walk; @@ -357,6 +392,111 @@ static int sm4_ctr_crypt(struct skcipher_request *req) return err; } +static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % SM4_BLOCK_SIZE; + const u32 *rkey2_enc = ctx->key2.rkey_enc; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int nblocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + nblocks * SM4_BLOCK_SIZE, req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + } else { + tail = 0; + } + + while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) { + if (nbytes < walk.total) + nbytes &= ~(SM4_BLOCK_SIZE - 1); + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + + kernel_neon_end(); + + rkey2_enc = NULL; + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + if (err) + return err; + } + + if (likely(tail == 0)) + return 0; + + /* handle ciphertext stealing */ + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen); + + skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int sm4_xts_encrypt(struct skcipher_request *req) +{ + return sm4_xts_crypt(req, true); +} + +static int sm4_xts_decrypt(struct skcipher_request *req) +{ + return sm4_xts_crypt(req, false); +} + static struct skcipher_alg sm4_algs[] = { { .base = { @@ -435,6 +575,22 @@ static struct skcipher_alg sm4_algs[] = { .setkey = sm4_setkey, .encrypt = sm4_cbc_cts_encrypt, .decrypt = sm4_cbc_cts_decrypt, + }, { + .base = { + .cra_name = "xts(sm4)", + .cra_driver_name = "xts-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_xts_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE * 2, + .max_keysize = SM4_KEY_SIZE * 2, + .ivsize = SM4_BLOCK_SIZE, + .walksize = SM4_BLOCK_SIZE * 2, + .setkey = sm4_xts_setkey, + .encrypt = sm4_xts_encrypt, + .decrypt = sm4_xts_decrypt, } }; @@ -451,7 +607,7 @@ static void __exit sm4_exit(void) module_cpu_feature_match(SM4, sm4_init); module_exit(sm4_exit); -MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions"); +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR/XTS using ARMv8 Crypto Extensions"); MODULE_ALIAS_CRYPTO("sm4-ce"); MODULE_ALIAS_CRYPTO("sm4"); MODULE_ALIAS_CRYPTO("ecb(sm4)"); @@ -459,5 +615,6 @@ MODULE_ALIAS_CRYPTO("cbc(sm4)"); MODULE_ALIAS_CRYPTO("cfb(sm4)"); MODULE_ALIAS_CRYPTO("ctr(sm4)"); MODULE_ALIAS_CRYPTO("cts(cbc(sm4))"); +MODULE_ALIAS_CRYPTO("xts(sm4)"); MODULE_AUTHOR("Tianjia Zhang "); MODULE_LICENSE("GPL v2"); From 6b5360a5e0ad357b73776d092437715ba4a77865 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:03 +0800 Subject: [PATCH 0908/4122] crypto: arm64/sm4 - add CE implementation for cmac/xcbc/cbcmac This patch is a CE-optimized assembly implementation for cmac/xcbc/cbcmac. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 300 mode of tcrypt, and compared the performance before and after this patch (the driver used before this patch is XXXmac(sm4-ce)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before: update-size | 16 64 256 1024 2048 4096 8192 ---------------+-------------------------------------------------------- cmac(sm4-ce) | 293.33 403.69 503.76 527.78 531.10 535.46 535.81 xcbc(sm4-ce) | 292.83 402.50 504.02 529.08 529.87 536.55 538.24 cbcmac(sm4-ce) | 318.42 415.79 497.12 515.05 523.15 521.19 523.01 After: update-size | 16 64 256 1024 2048 4096 8192 ---------------+-------------------------------------------------------- cmac-sm4-ce | 371.99 675.28 903.56 971.65 980.57 990.40 991.04 xcbc-sm4-ce | 372.11 674.55 903.47 971.61 980.96 990.42 991.10 cbcmac-sm4-ce | 371.63 675.33 903.23 972.07 981.42 990.93 991.45 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-core.S | 70 +++++++++ arch/arm64/crypto/sm4-ce-glue.c | 267 +++++++++++++++++++++++++++++++- 2 files changed, 336 insertions(+), 1 deletion(-) diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index ddd15ec09d38..877b80c54a0d 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -35,6 +35,7 @@ #define RTMP3 v19 #define RIV v20 +#define RMAC v20 #define RMASK v21 @@ -1007,6 +1008,75 @@ SYM_FUNC_START(sm4_ce_xts_dec) ret SYM_FUNC_END(sm4_ce_xts_dec) +.align 3 +SYM_FUNC_START(sm4_ce_mac_update) + /* input: + * x0: round key array, CTX + * x1: digest + * x2: src + * w3: nblocks + * w4: enc_before + * w5: enc_after + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x1] + + cbz w4, .Lmac_update + + SM4_CRYPT_BLK(RMAC) + +.Lmac_update: + cbz w3, .Lmac_ret + + sub w6, w3, #1 + cmp w5, wzr + csel w3, w3, w6, ne + + cbz w3, .Lmac_end + +.Lmac_loop_4x: + cmp w3, #4 + blt .Lmac_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v3.16b + SM4_CRYPT_BLK(RMAC) + + cbz w3, .Lmac_end + b .Lmac_loop_4x + +.Lmac_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + + cbnz w3, .Lmac_loop_1x + + +.Lmac_end: + cbnz w5, .Lmac_ret + + ld1 {v0.16b}, [x2], #16 + eor RMAC.16b, RMAC.16b, v0.16b + +.Lmac_ret: + st1 {RMAC.16b}, [x1] + ret +SYM_FUNC_END(sm4_ce_mac_update) + .section ".rodata", "a" .align 4 diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 8222766f712a..0a2d32ed3bde 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -14,8 +14,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -47,6 +49,9 @@ asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src, asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src, u8 *tweak, unsigned int nbytes, const u32 *rkey2_enc); +asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest, + const u8 *src, unsigned int nblocks, + bool enc_before, bool enc_after); EXPORT_SYMBOL(sm4_ce_expand_key); EXPORT_SYMBOL(sm4_ce_crypt_block); @@ -58,6 +63,16 @@ struct sm4_xts_ctx { struct sm4_ctx key2; }; +struct sm4_mac_tfm_ctx { + struct sm4_ctx key; + u8 __aligned(8) consts[]; +}; + +struct sm4_mac_desc_ctx { + unsigned int len; + u8 digest[SM4_BLOCK_SIZE]; +}; + static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { @@ -594,13 +609,260 @@ static struct skcipher_alg sm4_algs[] = { } }; +static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + + return 0; +} + +static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + be128 *consts = (be128 *)ctx->consts; + u64 a, b; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + memset(consts, 0, SM4_BLOCK_SIZE); + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + /* encrypt the zero block */ + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); + + kernel_neon_end(); + + /* gf(2^128) multiply zero-ciphertext with u and u^2 */ + a = be64_to_cpu(consts[0].a); + b = be64_to_cpu(consts[0].b); + consts[0].a = cpu_to_be64((a << 1) | (b >> 63)); + consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0)); + + a = be64_to_cpu(consts[0].a); + b = be64_to_cpu(consts[0].b); + consts[1].a = cpu_to_be64((a << 1) | (b >> 63)); + consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0)); + + return 0; +} + +static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + u8 __aligned(8) key2[SM4_BLOCK_SIZE]; + static u8 const ks[3][SM4_BLOCK_SIZE] = { + { [0 ... SM4_BLOCK_SIZE - 1] = 0x1}, + { [0 ... SM4_BLOCK_SIZE - 1] = 0x2}, + { [0 ... SM4_BLOCK_SIZE - 1] = 0x3}, + }; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); + + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + kernel_neon_end(); + + return 0; +} + +static int sm4_mac_init(struct shash_desc *desc) +{ + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + + memset(ctx->digest, 0, SM4_BLOCK_SIZE); + ctx->len = 0; + + return 0; +} + +static int sm4_mac_update(struct shash_desc *desc, const u8 *p, + unsigned int len) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int l, nblocks; + + if (len == 0) + return 0; + + if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) { + l = min(len, SM4_BLOCK_SIZE - ctx->len); + + crypto_xor(ctx->digest + ctx->len, p, l); + ctx->len += l; + len -= l; + p += l; + } + + if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) { + kernel_neon_begin(); + + if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) { + sm4_ce_crypt_block(tctx->key.rkey_enc, + ctx->digest, ctx->digest); + ctx->len = 0; + } else { + nblocks = len / SM4_BLOCK_SIZE; + len %= SM4_BLOCK_SIZE; + + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, (ctx->len == SM4_BLOCK_SIZE), + (len != 0)); + + p += nblocks * SM4_BLOCK_SIZE; + + if (len == 0) + ctx->len = SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + if (len) { + crypto_xor(ctx->digest, p, len); + ctx->len = len; + } + } + + return 0; +} + +static int sm4_cmac_final(struct shash_desc *desc, u8 *out) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + const u8 *consts = tctx->consts; + + if (ctx->len != SM4_BLOCK_SIZE) { + ctx->digest[ctx->len] ^= 0x80; + consts += SM4_BLOCK_SIZE; + } + + kernel_neon_begin(); + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, + false, true); + kernel_neon_end(); + + memcpy(out, ctx->digest, SM4_BLOCK_SIZE); + + return 0; +} + +static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + + if (ctx->len) { + kernel_neon_begin(); + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, + ctx->digest); + kernel_neon_end(); + } + + memcpy(out, ctx->digest, SM4_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg sm4_mac_algs[] = { + { + .base = { + .cra_name = "cmac(sm4)", + .cra_driver_name = "cmac-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + + SM4_BLOCK_SIZE * 2, + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cmac_final, + .setkey = sm4_cmac_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), + }, { + .base = { + .cra_name = "xcbc(sm4)", + .cra_driver_name = "xcbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + + SM4_BLOCK_SIZE * 2, + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cmac_final, + .setkey = sm4_xcbc_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), + }, { + .base = { + .cra_name = "cbcmac(sm4)", + .cra_driver_name = "cbcmac-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx), + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cbcmac_final, + .setkey = sm4_cbcmac_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), + } +}; + static int __init sm4_init(void) { - return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + int err; + + err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + if (err) + return err; + + err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs)); + if (err) + goto out_err; + + return 0; + +out_err: + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + return err; } static void __exit sm4_exit(void) { + crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs)); crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); } @@ -616,5 +878,8 @@ MODULE_ALIAS_CRYPTO("cfb(sm4)"); MODULE_ALIAS_CRYPTO("ctr(sm4)"); MODULE_ALIAS_CRYPTO("cts(cbc(sm4))"); MODULE_ALIAS_CRYPTO("xts(sm4)"); +MODULE_ALIAS_CRYPTO("cmac(sm4)"); +MODULE_ALIAS_CRYPTO("xcbc(sm4)"); +MODULE_ALIAS_CRYPTO("cbcmac(sm4)"); MODULE_AUTHOR("Tianjia Zhang "); MODULE_LICENSE("GPL v2"); From 67fa3a7fdf80c80ee737840dfdd225260e5c1044 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:04 +0800 Subject: [PATCH 0909/4122] crypto: arm64/sm4 - add CE implementation for CCM mode This patch is a CE-optimized assembly implementation for CCM mode. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 223 and 225 modes of tcrypt, and compared the performance before and after this patch (the driver used before this patch is ccm_base(ctr-sm4-ce,cbcmac-sm4-ce)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before (rfc4309(ccm_base(ctr-sm4-ce,cbcmac-sm4-ce))): ccm(sm4) | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------- CCM enc | 35.07 125.40 336.47 468.17 581.97 619.18 712.56 736.01 CCM dec | 34.87 124.40 335.08 466.75 581.04 618.81 712.25 735.89 CCM mb enc | 34.71 123.96 333.92 465.39 579.91 617.49 711.45 734.92 CCM mb dec | 34.42 122.80 331.02 462.81 578.28 616.42 709.88 734.19 After (rfc4309(ccm-sm4-ce)): ccm-sm4-ce | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------- CCM enc | 77.12 249.82 569.94 725.17 839.27 867.71 952.87 969.89 CCM dec | 75.90 247.26 566.29 722.12 836.90 865.95 951.74 968.57 CCM mb enc | 75.98 245.25 562.91 718.99 834.76 864.70 950.17 967.90 CCM mb dec | 75.06 243.78 560.58 717.13 833.68 862.70 949.35 967.11 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 16 ++ arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-ce-ccm-core.S | 328 ++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-ccm-glue.c | 303 +++++++++++++++++++++++++ 4 files changed, 650 insertions(+) create mode 100644 arch/arm64/crypto/sm4-ce-ccm-core.S create mode 100644 arch/arm64/crypto/sm4-ce-ccm-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 8939f5ae9214..2611036a3e3f 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -281,6 +281,22 @@ config CRYPTO_AES_ARM64_CE_CCM - ARMv8 Crypto Extensions - NEON (Advanced SIMD) extensions +config CRYPTO_SM4_ARM64_CE_CCM + tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AEAD + select CRYPTO_SM4 + select CRYPTO_SM4_ARM64_CE_BLK + help + AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with + CCM (Counter with Cipher Block Chaining-Message Authentication Code) + authenticated encryption mode (NIST SP800-38C) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - NEON (Advanced SIMD) extensions + config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF (PMULL)" depends on KERNEL_MODE_NEON && CRC_T10DIF diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 087f1625e775..843ea5266965 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -29,6 +29,9 @@ sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o +sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o + obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S new file mode 100644 index 000000000000..028207c4afd0 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-ccm-core.S @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include "sm4-ce-asm.h" + +.arch armv8-a+crypto + +.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +#define RMAC v16 + +/* Helper macros. */ + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; + + +.align 3 +SYM_FUNC_START(sm4_ce_cbcmac_update) + /* input: + * x0: round key array, CTX + * x1: mac + * x2: src + * w3: nblocks + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x1] + +.Lcbcmac_loop_4x: + cmp w3, #4 + blt .Lcbcmac_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v3.16b + + cbz w3, .Lcbcmac_end + b .Lcbcmac_loop_4x + +.Lcbcmac_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v0.16b + + cbnz w3, .Lcbcmac_loop_1x + +.Lcbcmac_end: + st1 {RMAC.16b}, [x1] + ret +SYM_FUNC_END(sm4_ce_cbcmac_update) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_final) + /* input: + * x0: round key array, CTX + * x1: ctr0 (big endian, 128 bit) + * x2: mac + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x2] + ld1 {v0.16b}, [x1] + + SM4_CRYPT_BLK2(RMAC, v0) + + /* en-/decrypt the mac with ctr0 */ + eor RMAC.16b, RMAC.16b, v0.16b + st1 {RMAC.16b}, [x2] + + ret +SYM_FUNC_END(sm4_ce_ccm_final) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: mac + */ + SM4_PREPARE(x0) + + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 + + ld1 {RMAC.16b}, [x5] + +.Lccm_enc_loop_4x: + cmp w4, #(4 * 16) + blt .Lccm_enc_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc_le128(v8) /* +0 */ + inc_le128(v9) /* +1 */ + inc_le128(v10) /* +2 */ + inc_le128(v11) /* +3 */ + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK2(v9, RMAC) + eor v9.16b, v9.16b, v1.16b + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK2(v10, RMAC) + eor v10.16b, v10.16b, v2.16b + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK2(v11, RMAC) + eor v11.16b, v11.16b, v3.16b + eor RMAC.16b, RMAC.16b, v3.16b + + st1 {v8.16b-v11.16b}, [x1], #64 + + cbz w4, .Lccm_enc_end + b .Lccm_enc_loop_4x + +.Lccm_enc_loop_1x: + cmp w4, #16 + blt .Lccm_enc_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc_le128(v8) + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v0.16b + + st1 {v8.16b}, [x1], #16 + + cbz w4, .Lccm_enc_end + b .Lccm_enc_loop_1x + +.Lccm_enc_tail: + /* construct CTRs */ + inc_le128(v8) + + SM4_CRYPT_BLK2(RMAC, v8) + + /* store new MAC */ + st1 {RMAC.16b}, [x5] + +.Lccm_enc_tail_loop: + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w9, v8.b[0] /* get top crypted CTR byte */ + umov w6, RMAC.b[0] /* get top MAC byte */ + + eor w9, w9, w0 /* w9 = CTR ^ input */ + eor w6, w6, w0 /* w6 = MAC ^ input */ + + strb w9, [x1], #1 /* store out byte */ + strb w6, [x5], #1 /* store MAC byte */ + + subs w4, w4, #1 + beq .Lccm_enc_ret + + /* shift out one byte */ + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 + ext v8.16b, v8.16b, v8.16b, #1 + + b .Lccm_enc_tail_loop + +.Lccm_enc_end: + /* store new MAC */ + st1 {RMAC.16b}, [x5] + + /* store new CTR */ + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] + +.Lccm_enc_ret: + ret +SYM_FUNC_END(sm4_ce_ccm_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: mac + */ + SM4_PREPARE(x0) + + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 + + ld1 {RMAC.16b}, [x5] + +.Lccm_dec_loop_4x: + cmp w4, #(4 * 16) + blt .Lccm_dec_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc_le128(v8) /* +0 */ + inc_le128(v9) /* +1 */ + inc_le128(v10) /* +2 */ + inc_le128(v11) /* +3 */ + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v8.16b + SM4_CRYPT_BLK2(v9, RMAC) + eor v9.16b, v9.16b, v1.16b + eor RMAC.16b, RMAC.16b, v9.16b + SM4_CRYPT_BLK2(v10, RMAC) + eor v10.16b, v10.16b, v2.16b + eor RMAC.16b, RMAC.16b, v10.16b + SM4_CRYPT_BLK2(v11, RMAC) + eor v11.16b, v11.16b, v3.16b + eor RMAC.16b, RMAC.16b, v11.16b + + st1 {v8.16b-v11.16b}, [x1], #64 + + cbz w4, .Lccm_dec_end + b .Lccm_dec_loop_4x + +.Lccm_dec_loop_1x: + cmp w4, #16 + blt .Lccm_dec_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc_le128(v8) + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v8.16b + + st1 {v8.16b}, [x1], #16 + + cbz w4, .Lccm_dec_end + b .Lccm_dec_loop_1x + +.Lccm_dec_tail: + /* construct CTRs */ + inc_le128(v8) + + SM4_CRYPT_BLK2(RMAC, v8) + + /* store new MAC */ + st1 {RMAC.16b}, [x5] + +.Lccm_dec_tail_loop: + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w9, v8.b[0] /* get top crypted CTR byte */ + umov w6, RMAC.b[0] /* get top MAC byte */ + + eor w9, w9, w0 /* w9 = CTR ^ input */ + eor w6, w6, w9 /* w6 = MAC ^ output */ + + strb w9, [x1], #1 /* store out byte */ + strb w6, [x5], #1 /* store MAC byte */ + + subs w4, w4, #1 + beq .Lccm_dec_ret + + /* shift out one byte */ + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 + ext v8.16b, v8.16b, v8.16b, #1 + + b .Lccm_dec_tail_loop + +.Lccm_dec_end: + /* store new MAC */ + st1 {RMAC.16b}, [x5] + + /* store new CTR */ + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] + +.Lccm_dec_ret: + ret +SYM_FUNC_END(sm4_ce_ccm_dec) diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c new file mode 100644 index 000000000000..f2cec7b52efc --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sm4-ce.h" + +asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac, + const u8 *src, unsigned int nblocks); +asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes, u8 *mac); +asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes, u8 *mac); +asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac); + + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_aead_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + + return 0; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_format_input(u8 info[], struct aead_request *req, + unsigned int msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int l = req->iv[0] + 1; + unsigned int m; + __be32 len; + + /* verify that CCM dimension 'L': 2 <= L <= 8 */ + if (l < 2 || l > 8) + return -EINVAL; + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l); + + memcpy(info, req->iv, SM4_BLOCK_SIZE); + + m = crypto_aead_authsize(aead); + + /* format flags field per RFC 3610/NIST 800-38C */ + *info |= ((m - 2) / 2) << 3; + if (req->assoclen) + *info |= (1 << 6); + + /* + * format message length field, + * Linux uses a u32 type to represent msglen + */ + if (l >= 4) + l = 4; + + len = cpu_to_be32(msglen); + memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l); + + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; } aadlen; + u32 assoclen = req->assoclen; + struct scatter_walk walk; + unsigned int len; + + if (assoclen < 0xff00) { + aadlen.l = cpu_to_be16(assoclen); + len = 2; + } else { + aadlen.l = cpu_to_be16(0xfffe); + put_unaligned_be32(assoclen, &aadlen.h); + len = 6; + } + + sm4_ce_crypt_block(ctx->rkey_enc, mac, mac); + crypto_xor(mac, (const u8 *)&aadlen, len); + + scatterwalk_start(&walk, req->src); + + do { + u32 n = scatterwalk_clamp(&walk, assoclen); + u8 *p, *ptr; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, assoclen); + } + + p = ptr = scatterwalk_map(&walk); + assoclen -= n; + scatterwalk_advance(&walk, n); + + while (n > 0) { + unsigned int l, nblocks; + + if (len == SM4_BLOCK_SIZE) { + if (n < SM4_BLOCK_SIZE) { + sm4_ce_crypt_block(ctx->rkey_enc, + mac, mac); + + len = 0; + } else { + nblocks = n / SM4_BLOCK_SIZE; + sm4_ce_cbcmac_update(ctx->rkey_enc, + mac, ptr, nblocks); + + ptr += nblocks * SM4_BLOCK_SIZE; + n %= SM4_BLOCK_SIZE; + + continue; + } + } + + l = min(n, SM4_BLOCK_SIZE - len); + if (l) { + crypto_xor(mac + len, ptr, l); + len += l; + ptr += l; + n -= l; + } + } + + scatterwalk_unmap(p); + scatterwalk_done(&walk, 0, assoclen); + } while (assoclen); +} + +static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, + u32 *rkey_enc, u8 mac[], + void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *mac)) +{ + u8 __aligned(8) ctr0[SM4_BLOCK_SIZE]; + int err; + + /* preserve the initial ctr0 for the TAG */ + memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); + crypto_inc(walk->iv, SM4_BLOCK_SIZE); + + kernel_neon_begin(); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + + if (walk->nbytes == walk->total) + tail = 0; + + if (walk->nbytes - tail) + sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv, + walk->nbytes - tail, mac); + + if (walk->nbytes == walk->total) + sm4_ce_ccm_final(rkey_enc, ctr0, mac); + + kernel_neon_end(); + + if (walk->nbytes) { + err = skcipher_walk_done(walk, tail); + if (err) + return err; + if (walk->nbytes) + kernel_neon_begin(); + } + } while (walk->nbytes > 0); + + return 0; +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = ccm_format_input(mac, req, req->cryptlen); + if (err) + return err; + + err = skcipher_walk_aead_encrypt(&walk, req, false); + if (err) + return err; + + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc); + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int authsize = crypto_aead_authsize(aead); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; + u8 authtag[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = ccm_format_input(mac, req, req->cryptlen - authsize); + if (err) + return err; + + err = skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; + + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec); + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(authtag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + + if (crypto_memneq(authtag, mac, authsize)) + return -EBADMSG; + + return 0; +} + +static struct aead_alg sm4_ccm_alg = { + .base = { + .cra_name = "ccm(sm4)", + .cra_driver_name = "ccm-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .maxauthsize = SM4_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, +}; + +static int __init sm4_ce_ccm_init(void) +{ + return crypto_register_aead(&sm4_ccm_alg); +} + +static void __exit sm4_ce_ccm_exit(void) +{ + crypto_unregister_aead(&sm4_ccm_alg); +} + +module_cpu_feature_match(SM4, sm4_ce_ccm_init); +module_exit(sm4_ce_ccm_exit); + +MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("ccm(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); From ae1b83c7d572101b3b5cfbf40415c4cc5d469bde Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 27 Oct 2022 14:55:05 +0800 Subject: [PATCH 0910/4122] crypto: arm64/sm4 - add CE implementation for GCM mode This patch is a CE-optimized assembly implementation for GCM mode. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 224 and 224 modes of tcrypt, and compared the performance before and after this patch (the driver used before this patch is gcm_base(ctr-sm4-ce,ghash-generic)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before (gcm_base(ctr-sm4-ce,ghash-generic)): gcm(sm4) | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------------- GCM enc | 25.24 64.65 104.66 116.69 123.81 125.12 129.67 130.62 GCM dec | 25.40 64.80 104.74 116.70 123.81 125.21 129.68 130.59 GCM mb enc | 24.95 64.06 104.20 116.38 123.55 124.97 129.63 130.61 GCM mb dec | 24.92 64.00 104.13 116.34 123.55 124.98 129.56 130.48 After: gcm-sm4-ce | 16 64 256 512 1024 1420 4096 8192 -------------+--------------------------------------------------------------------- GCM enc | 108.62 397.18 971.60 1283.92 1522.77 1513.39 1777.00 1806.96 GCM dec | 116.36 398.14 1004.27 1319.11 1624.21 1635.43 1932.54 1974.20 GCM mb enc | 107.13 391.79 962.05 1274.94 1514.76 1508.57 1769.07 1801.58 GCM mb dec | 113.40 389.36 988.51 1307.68 1619.10 1631.55 1931.70 1970.86 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 16 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-ce-gcm-core.S | 741 ++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-gcm-glue.c | 286 +++++++++++ 4 files changed, 1046 insertions(+) create mode 100644 arch/arm64/crypto/sm4-ce-gcm-core.S create mode 100644 arch/arm64/crypto/sm4-ce-gcm-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2611036a3e3f..6793d5bc3ee5 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -297,6 +297,22 @@ config CRYPTO_SM4_ARM64_CE_CCM - ARMv8 Crypto Extensions - NEON (Advanced SIMD) extensions +config CRYPTO_SM4_ARM64_CE_GCM + tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AEAD + select CRYPTO_SM4 + select CRYPTO_SM4_ARM64_CE_BLK + help + AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with + GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - PMULL (Polynomial Multiply Long) instructions + - NEON (Advanced SIMD) extensions + config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF (PMULL)" depends on KERNEL_MODE_NEON && CRC_T10DIF diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 843ea5266965..4818e204c2ac 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -32,6 +32,9 @@ sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o +sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o + obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S new file mode 100644 index 000000000000..7aa3ec18a289 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-gcm-core.S @@ -0,0 +1,741 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2016 Jussi Kivilinna + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include "sm4-ce-asm.h" + +.arch armv8-a+crypto + +.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +/* Used for both encryption and decryption */ +#define RHASH v21 +#define RRCONST v22 +#define RZERO v23 + +/* Helper macros. */ + +/* + * input: m0, m1 + * output: r0:r1 (low 128-bits in r0, high in r1) + */ +#define PMUL_128x128(r0, r1, m0, m1, T0, T1) \ + ext T0.16b, m1.16b, m1.16b, #8; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + eor T0.16b, T0.16b, T1.16b; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r1.16b, r1.16b, T0.16b; + +#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1, \ + r2, r3, m2, m3, T2, T3, \ + r4, r5, m4, m5, T4, T5, \ + r6, r7, m6, m7, T6, T7) \ + ext T0.16b, m1.16b, m1.16b, #8; \ + ext T2.16b, m3.16b, m3.16b, #8; \ + ext T4.16b, m5.16b, m5.16b, #8; \ + ext T6.16b, m7.16b, m7.16b, #8; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull r2.1q, m2.1d, m3.1d; \ + pmull r4.1q, m4.1d, m5.1d; \ + pmull r6.1q, m6.1d, m7.1d; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull T3.1q, m2.1d, T2.1d; \ + pmull T5.1q, m4.1d, T4.1d; \ + pmull T7.1q, m6.1d, T6.1d; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 T2.1q, m2.2d, T2.2d; \ + pmull2 T4.1q, m4.2d, T4.2d; \ + pmull2 T6.1q, m6.2d, T6.2d; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + pmull2 r3.1q, m2.2d, m3.2d; \ + pmull2 r5.1q, m4.2d, m5.2d; \ + pmull2 r7.1q, m6.2d, m7.2d; \ + eor T0.16b, T0.16b, T1.16b; \ + eor T2.16b, T2.16b, T3.16b; \ + eor T4.16b, T4.16b, T5.16b; \ + eor T6.16b, T6.16b, T7.16b; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T3.16b, RZERO.16b, T2.16b, #8; \ + ext T5.16b, RZERO.16b, T4.16b, #8; \ + ext T7.16b, RZERO.16b, T6.16b, #8; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + ext T2.16b, T2.16b, RZERO.16b, #8; \ + ext T4.16b, T4.16b, RZERO.16b, #8; \ + ext T6.16b, T6.16b, RZERO.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r2.16b, r2.16b, T3.16b; \ + eor r4.16b, r4.16b, T5.16b; \ + eor r6.16b, r6.16b, T7.16b; \ + eor r1.16b, r1.16b, T0.16b; \ + eor r3.16b, r3.16b, T2.16b; \ + eor r5.16b, r5.16b, T4.16b; \ + eor r7.16b, r7.16b, T6.16b; + +/* + * input: r0:r1 (low 128-bits in r0, high in r1) + * output: a + */ +#define REDUCTION(a, r0, r1, rconst, T0, T1) \ + pmull2 T0.1q, r1.2d, rconst.2d; \ + ext T1.16b, T0.16b, RZERO.16b, #8; \ + ext T0.16b, RZERO.16b, T0.16b, #8; \ + eor r1.16b, r1.16b, T1.16b; \ + eor r0.16b, r0.16b, T0.16b; \ + pmull T0.1q, r1.1d, rconst.1d; \ + eor a.16b, r0.16b, T0.16b; + +#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1) \ + rev32 b0.16b, b0.16b; \ + ext T0.16b, m1.16b, m1.16b, #8; \ + sm4e b0.4s, v24.4s; \ + pmull r0.1q, m0.1d, m1.1d; \ + sm4e b0.4s, v25.4s; \ + pmull T1.1q, m0.1d, T0.1d; \ + sm4e b0.4s, v26.4s; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + sm4e b0.4s, v27.4s; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + sm4e b0.4s, v28.4s; \ + eor T0.16b, T0.16b, T1.16b; \ + sm4e b0.4s, v29.4s; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + sm4e b0.4s, v30.4s; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + sm4e b0.4s, v31.4s; \ + eor r0.16b, r0.16b, T1.16b; \ + rev64 b0.4s, b0.4s; \ + eor r1.16b, r1.16b, T0.16b; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2, \ + r0, r1, m0, m1, T0, T1, \ + r2, r3, m2, m3, T2, T3, \ + r4, r5, m4, m5, T4, T5) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + ext T0.16b, m1.16b, m1.16b, #8; \ + ext T2.16b, m3.16b, m3.16b, #8; \ + ext T4.16b, m5.16b, m5.16b, #8; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull r2.1q, m2.1d, m3.1d; \ + pmull r4.1q, m4.1d, m5.1d; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull T3.1q, m2.1d, T2.1d; \ + pmull T5.1q, m4.1d, T4.1d; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 T2.1q, m2.2d, T2.2d; \ + pmull2 T4.1q, m4.2d, T4.2d; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + pmull2 r3.1q, m2.2d, m3.2d; \ + pmull2 r5.1q, m4.2d, m5.2d; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + eor T0.16b, T0.16b, T1.16b; \ + eor T2.16b, T2.16b, T3.16b; \ + eor T4.16b, T4.16b, T5.16b; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T3.16b, RZERO.16b, T2.16b, #8; \ + ext T5.16b, RZERO.16b, T4.16b, #8; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + ext T2.16b, T2.16b, RZERO.16b, #8; \ + ext T4.16b, T4.16b, RZERO.16b, #8; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r2.16b, r2.16b, T3.16b; \ + eor r4.16b, r4.16b, T5.16b; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + eor r1.16b, r1.16b, T0.16b; \ + eor r3.16b, r3.16b, T2.16b; \ + eor r5.16b, r5.16b, T4.16b; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + eor r0.16b, r0.16b, r2.16b; \ + eor r1.16b, r1.16b, r3.16b; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + eor r0.16b, r0.16b, r4.16b; \ + eor r1.16b, r1.16b, r5.16b; + +#define inc32_le128(vctr) \ + mov vctr.d[1], x9; \ + add w6, w9, #1; \ + mov vctr.d[0], x8; \ + bfi x9, x6, #0, #32; \ + rev64 vctr.16b, vctr.16b; + +#define GTAG_HASH_LENGTHS(vctr0, vlen) \ + ld1 {vlen.16b}, [x7]; \ + /* construct CTR0 */ \ + /* the lower 32-bits of initial IV is always be32(1) */ \ + mov x6, #0x1; \ + bfi x9, x6, #0, #32; \ + mov vctr0.d[0], x8; \ + mov vctr0.d[1], x9; \ + rbit vlen.16b, vlen.16b; \ + rev64 vctr0.16b, vctr0.16b; \ + /* authtag = GCTR(CTR0, GHASH) */ \ + eor RHASH.16b, RHASH.16b, vlen.16b; \ + SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \ + RTMP0, RTMP1); \ + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3); \ + rbit RHASH.16b, RHASH.16b; \ + eor RHASH.16b, RHASH.16b, vctr0.16b; + + +/* Register macros for encrypt and ghash */ + +/* can be the same as input v0-v3 */ +#define RR1 v0 +#define RR3 v1 +#define RR5 v2 +#define RR7 v3 + +#define RR0 v4 +#define RR2 v5 +#define RR4 v6 +#define RR6 v7 + +#define RTMP0 v8 +#define RTMP1 v9 +#define RTMP2 v10 +#define RTMP3 v11 +#define RTMP4 v12 +#define RTMP5 v13 +#define RTMP6 v14 +#define RTMP7 v15 + +#define RH1 v16 +#define RH2 v17 +#define RH3 v18 +#define RH4 v19 + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_ghash_setup) + /* input: + * x0: round key array, CTX + * x1: ghash table + */ + SM4_PREPARE(x0) + + adr_l x2, .Lghash_rconst + ld1r {RRCONST.2d}, [x2] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + /* H = E(K, 0^128) */ + rev32 v0.16b, RZERO.16b + SM4_CRYPT_BLK_BE(v0) + + /* H ^ 1 */ + rbit RH1.16b, v0.16b + + /* H ^ 2 */ + PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1) + REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3) + + /* H ^ 3 */ + PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1) + REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3) + + /* H ^ 4 */ + PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1) + REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3) + + st1 {RH1.16b-RH4.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_pmull_ghash_setup) + +.align 3 +SYM_FUNC_START(pmull_ghash_update) + /* input: + * x0: ghash table + * x1: ghash result + * x2: src + * w3: nblocks + */ + ld1 {RH1.16b-RH4.16b}, [x0] + + ld1 {RHASH.16b}, [x1] + rbit RHASH.16b, RHASH.16b + + adr_l x4, .Lghash_rconst + ld1r {RRCONST.2d}, [x4] + + eor RZERO.16b, RZERO.16b, RZERO.16b + +.Lghash_loop_4x: + cmp w3, #4 + blt .Lghash_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + rbit v0.16b, v0.16b + rbit v1.16b, v1.16b + rbit v2.16b, v2.16b + rbit v3.16b, v3.16b + + /* + * (in0 ^ HASH) * H^4 => rr0:rr1 + * (in1) * H^3 => rr2:rr3 + * (in2) * H^2 => rr4:rr5 + * (in3) * H^1 => rr6:rr7 + */ + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, + RR2, RR3, v1, RH3, RTMP2, RTMP3, + RR4, RR5, v2, RH2, RTMP4, RTMP5, + RR6, RR7, v3, RH1, RTMP6, RTMP7) + + eor RR0.16b, RR0.16b, RR2.16b + eor RR1.16b, RR1.16b, RR3.16b + eor RR0.16b, RR0.16b, RR4.16b + eor RR1.16b, RR1.16b, RR5.16b + eor RR0.16b, RR0.16b, RR6.16b + eor RR1.16b, RR1.16b, RR7.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + cbz w3, .Lghash_end + b .Lghash_loop_4x + +.Lghash_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + cbnz w3, .Lghash_loop_1x + +.Lghash_end: + rbit RHASH.16b, RHASH.16b + st1 {RHASH.2d}, [x1] + + ret +SYM_FUNC_END(pmull_ghash_update) + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_gcm_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: ghash result + * x6: ghash table + * x7: lengths (only for last block) + */ + SM4_PREPARE(x0) + + ldp x8, x9, [x3] + rev x8, x8 + rev x9, x9 + + ld1 {RH1.16b-RH4.16b}, [x6] + + ld1 {RHASH.16b}, [x5] + rbit RHASH.16b, RHASH.16b + + adr_l x6, .Lghash_rconst + ld1r {RRCONST.2d}, [x6] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + cbz w4, .Lgcm_enc_hash_len + +.Lgcm_enc_loop_4x: + cmp w4, #(4 * 16) + blt .Lgcm_enc_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc32_le128(v0) /* +0 */ + inc32_le128(v1) /* +1 */ + inc32_le128(v2) /* +2 */ + inc32_le128(v3) /* +3 */ + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + /* ghash update */ + + rbit v0.16b, v0.16b + rbit v1.16b, v1.16b + rbit v2.16b, v2.16b + rbit v3.16b, v3.16b + + /* + * (in0 ^ HASH) * H^4 => rr0:rr1 + * (in1) * H^3 => rr2:rr3 + * (in2) * H^2 => rr4:rr5 + * (in3) * H^1 => rr6:rr7 + */ + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, + RR2, RR3, v1, RH3, RTMP2, RTMP3, + RR4, RR5, v2, RH2, RTMP4, RTMP5, + RR6, RR7, v3, RH1, RTMP6, RTMP7) + + eor RR0.16b, RR0.16b, RR2.16b + eor RR1.16b, RR1.16b, RR3.16b + eor RR0.16b, RR0.16b, RR4.16b + eor RR1.16b, RR1.16b, RR5.16b + eor RR0.16b, RR0.16b, RR6.16b + eor RR1.16b, RR1.16b, RR7.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + cbz w4, .Lgcm_enc_hash_len + b .Lgcm_enc_loop_4x + +.Lgcm_enc_loop_1x: + cmp w4, #16 + blt .Lgcm_enc_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc32_le128(v0) + + ld1 {RTMP0.16b}, [x2], #16 + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, RTMP0.16b + st1 {v0.16b}, [x1], #16 + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + cbz w4, .Lgcm_enc_hash_len + b .Lgcm_enc_loop_1x + +.Lgcm_enc_tail: + /* construct CTRs */ + inc32_le128(v0) + SM4_CRYPT_BLK(v0) + + /* load permute table */ + adr_l x0, .Lcts_permute_table + add x0, x0, #32 + sub x0, x0, w4, uxtw + ld1 {v3.16b}, [x0] + +.Lgcm_enc_tail_loop: + /* do encrypt */ + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w6, v0.b[0] /* get top crypted byte */ + eor w6, w6, w0 /* w6 = CTR ^ input */ + strb w6, [x1], #1 /* store out byte */ + + /* shift right out one byte */ + ext v0.16b, v0.16b, v0.16b, #1 + /* the last ciphertext is placed in high bytes */ + ins v0.b[15], w6 + + subs w4, w4, #1 + bne .Lgcm_enc_tail_loop + + /* padding last block with zeros */ + tbl v0.16b, {v0.16b}, v3.16b + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + +.Lgcm_enc_hash_len: + cbz x7, .Lgcm_enc_end + + GTAG_HASH_LENGTHS(v1, v3) + + b .Lgcm_enc_ret + +.Lgcm_enc_end: + /* store new CTR */ + rev x8, x8 + rev x9, x9 + stp x8, x9, [x3] + + rbit RHASH.16b, RHASH.16b + +.Lgcm_enc_ret: + /* store new MAC */ + st1 {RHASH.2d}, [x5] + + ret +SYM_FUNC_END(sm4_ce_pmull_gcm_enc) + +#undef RR1 +#undef RR3 +#undef RR5 +#undef RR7 +#undef RR0 +#undef RR2 +#undef RR4 +#undef RR6 +#undef RTMP0 +#undef RTMP1 +#undef RTMP2 +#undef RTMP3 +#undef RTMP4 +#undef RTMP5 +#undef RTMP6 +#undef RTMP7 +#undef RH1 +#undef RH2 +#undef RH3 +#undef RH4 + + +/* Register macros for decrypt */ + +/* v0-v2 for building CTRs, v3-v5 for saving inputs */ + +#define RR1 v6 +#define RR3 v7 +#define RR5 v8 + +#define RR0 v9 +#define RR2 v10 +#define RR4 v11 + +#define RTMP0 v12 +#define RTMP1 v13 +#define RTMP2 v14 +#define RTMP3 v15 +#define RTMP4 v16 +#define RTMP5 v17 + +#define RH1 v18 +#define RH2 v19 +#define RH3 v20 + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_gcm_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: ghash result + * x6: ghash table + * x7: lengths (only for last block) + */ + SM4_PREPARE(x0) + + ldp x8, x9, [x3] + rev x8, x8 + rev x9, x9 + + ld1 {RH1.16b-RH3.16b}, [x6] + + ld1 {RHASH.16b}, [x5] + rbit RHASH.16b, RHASH.16b + + adr_l x6, .Lghash_rconst + ld1r {RRCONST.2d}, [x6] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + cbz w4, .Lgcm_dec_hash_len + +.Lgcm_dec_loop_3x: + cmp w4, #(3 * 16) + blt .Lgcm_dec_loop_1x + + sub w4, w4, #(3 * 16) + + ld1 {v3.16b-v5.16b}, [x2], #(3 * 16) + + /* construct CTRs */ + inc32_le128(v0) /* +0 */ + rbit v6.16b, v3.16b + inc32_le128(v1) /* +1 */ + rbit v7.16b, v4.16b + inc32_le128(v2) /* +2 */ + rbit v8.16b, v5.16b + + eor RHASH.16b, RHASH.16b, v6.16b + + /* decrypt & ghash update */ + SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2, + RR0, RR1, RHASH, RH3, RTMP0, RTMP1, + RR2, RR3, v7, RH2, RTMP2, RTMP3, + RR4, RR5, v8, RH1, RTMP4, RTMP5) + + eor v0.16b, v0.16b, v3.16b + eor v1.16b, v1.16b, v4.16b + eor v2.16b, v2.16b, v5.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + st1 {v0.16b-v2.16b}, [x1], #(3 * 16) + + cbz w4, .Lgcm_dec_hash_len + b .Lgcm_dec_loop_3x + +.Lgcm_dec_loop_1x: + cmp w4, #16 + blt .Lgcm_dec_tail + + sub w4, w4, #16 + + ld1 {v3.16b}, [x2], #16 + + /* construct CTRs */ + inc32_le128(v0) + rbit v6.16b, v3.16b + + eor RHASH.16b, RHASH.16b, v6.16b + + SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + + eor v0.16b, v0.16b, v3.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + st1 {v0.16b}, [x1], #16 + + cbz w4, .Lgcm_dec_hash_len + b .Lgcm_dec_loop_1x + +.Lgcm_dec_tail: + /* construct CTRs */ + inc32_le128(v0) + SM4_CRYPT_BLK(v0) + + /* load permute table */ + adr_l x0, .Lcts_permute_table + add x0, x0, #32 + sub x0, x0, w4, uxtw + ld1 {v3.16b}, [x0] + +.Lgcm_dec_tail_loop: + /* do decrypt */ + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w6, v0.b[0] /* get top crypted byte */ + eor w6, w6, w0 /* w6 = CTR ^ input */ + strb w6, [x1], #1 /* store out byte */ + + /* shift right out one byte */ + ext v0.16b, v0.16b, v0.16b, #1 + /* the last ciphertext is placed in high bytes */ + ins v0.b[15], w0 + + subs w4, w4, #1 + bne .Lgcm_dec_tail_loop + + /* padding last block with zeros */ + tbl v0.16b, {v0.16b}, v3.16b + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + +.Lgcm_dec_hash_len: + cbz x7, .Lgcm_dec_end + + GTAG_HASH_LENGTHS(v1, v3) + + b .Lgcm_dec_ret + +.Lgcm_dec_end: + /* store new CTR */ + rev x8, x8 + rev x9, x9 + stp x8, x9, [x3] + + rbit RHASH.16b, RHASH.16b + +.Lgcm_dec_ret: + /* store new MAC */ + st1 {RHASH.2d}, [x5] + + ret +SYM_FUNC_END(sm4_ce_pmull_gcm_dec) + + .section ".rodata", "a" + .align 4 +.Lcts_permute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +.Lghash_rconst: + .quad 0x87 diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c new file mode 100644 index 000000000000..e90ea0f17beb --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -0,0 +1,286 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sm4-ce.h" + +asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table); +asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash, + const u8 *src, unsigned int nblocks); +asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths); +asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths); + +#define GHASH_BLOCK_SIZE 16 +#define GCM_IV_SIZE 12 + +struct sm4_gcm_ctx { + struct sm4_ctx key; + u8 ghash_table[16 * 4]; +}; + + +static int gcm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); + + kernel_neon_end(); + return 0; +} + +static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12 ... 16: + return 0; + default: + return -EINVAL; + } +} + +static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) buffer[GHASH_BLOCK_SIZE]; + u32 assoclen = req->assoclen; + struct scatter_walk walk; + unsigned int buflen = 0; + + scatterwalk_start(&walk, req->src); + + do { + u32 n = scatterwalk_clamp(&walk, assoclen); + u8 *p, *ptr; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, assoclen); + } + + p = ptr = scatterwalk_map(&walk); + assoclen -= n; + scatterwalk_advance(&walk, n); + + if (n + buflen < GHASH_BLOCK_SIZE) { + memcpy(&buffer[buflen], ptr, n); + buflen += n; + } else { + unsigned int nblocks; + + if (buflen) { + unsigned int l = GHASH_BLOCK_SIZE - buflen; + + memcpy(&buffer[buflen], ptr, l); + ptr += l; + n -= l; + + pmull_ghash_update(ctx->ghash_table, ghash, + buffer, 1); + } + + nblocks = n / GHASH_BLOCK_SIZE; + if (nblocks) { + pmull_ghash_update(ctx->ghash_table, ghash, + ptr, nblocks); + ptr += nblocks * GHASH_BLOCK_SIZE; + } + + buflen = n % GHASH_BLOCK_SIZE; + if (buflen) + memcpy(&buffer[0], ptr, buflen); + } + + scatterwalk_unmap(p); + scatterwalk_done(&walk, 0, assoclen); + } while (assoclen); + + /* padding with '0' */ + if (buflen) { + memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen); + pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1); + } +} + +static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, + struct sm4_gcm_ctx *ctx, u8 ghash[], + void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc, + u8 *dst, const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths)) +{ + u8 __aligned(8) iv[SM4_BLOCK_SIZE]; + be128 __aligned(8) lengths; + int err; + + memset(ghash, 0, SM4_BLOCK_SIZE); + + lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.b = cpu_to_be64(walk->total * 8); + + memcpy(iv, walk->iv, GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + + kernel_neon_begin(); + + if (req->assoclen) + gcm_calculate_auth_mac(req, ghash); + + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + + if (walk->nbytes == walk->total) { + tail = 0; + + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, + walk->nbytes, ghash, + ctx->ghash_table, + (const u8 *)&lengths); + } else if (walk->nbytes - tail) { + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, + walk->nbytes - tail, ghash, + ctx->ghash_table, NULL); + } + + kernel_neon_end(); + + err = skcipher_walk_done(walk, tail); + if (err) + return err; + if (walk->nbytes) + kernel_neon_begin(); + } while (walk->nbytes > 0); + + return 0; +} + +static int gcm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_aead_encrypt(&walk, req, false); + if (err) + return err; + + err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc); + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int gcm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int authsize = crypto_aead_authsize(aead); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; + u8 authtag[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; + + err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec); + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(authtag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + + if (crypto_memneq(authtag, ghash, authsize)) + return -EBADMSG; + + return 0; +} + +static struct aead_alg sm4_gcm_alg = { + .base = { + .cra_name = "gcm(sm4)", + .cra_driver_name = "gcm-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_gcm_ctx), + .cra_module = THIS_MODULE, + }, + .ivsize = GCM_IV_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .maxauthsize = SM4_BLOCK_SIZE, + .setkey = gcm_setkey, + .setauthsize = gcm_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, +}; + +static int __init sm4_ce_gcm_init(void) +{ + if (!cpu_have_named_feature(PMULL)) + return -ENODEV; + + return crypto_register_aead(&sm4_gcm_alg); +} + +static void __exit sm4_ce_gcm_exit(void) +{ + crypto_unregister_aead(&sm4_gcm_alg); +} + +static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = { + { cpu_feature(PMULL) }, + {} +}; +MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature); + +module_cpu_feature_match(SM4, sm4_ce_gcm_init); +module_exit(sm4_ce_gcm_exit); + +MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("gcm(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); From 329cfa42e5280decfc9247598b9996e13b28c380 Mon Sep 17 00:00:00 2001 From: Ralph Siemsen Date: Thu, 27 Oct 2022 15:35:44 -0400 Subject: [PATCH 0911/4122] crypto: doc - use correct function name The hashing API does not have a function called .finish() Signed-off-by: Ralph Siemsen Signed-off-by: Herbert Xu --- Documentation/crypto/devel-algos.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst index f225a953ab4b..3506899ef83e 100644 --- a/Documentation/crypto/devel-algos.rst +++ b/Documentation/crypto/devel-algos.rst @@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from other part of the kernel. Note that the .setkey() call might happen before or after any of these schematics happen, but must not happen during any of these are in-flight. Please note that calling .init() -followed immediately by .finish() is also a perfectly valid +followed immediately by .final() is also a perfectly valid transformation. :: From 80629af0f3e438b94532cf88f3fb53c711f95138 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 20 Oct 2022 10:21:03 +0200 Subject: [PATCH 0912/4122] MAINTAINERS: remove section INTEL IOP-ADMA DMA DRIVER Commit cd0ab43ec91a ("dmaengine: remove iop-adma driver") removes the driver's source code, but misses to remove the MAINTAINERS section. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken file pattern. Remove the INTEL IOP-ADMA DMA DRIVER section pointing to the removed driver. Signed-off-by: Lukas Bulwahn Acked-by: Dan Williams Link: https://lore.kernel.org/r/20221020082103.29218-1-lukas.bulwahn@gmail.com Signed-off-by: Vinod Koul --- MAINTAINERS | 5 ----- 1 file changed, 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..cd1264d24db8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10332,11 +10332,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ F: include/linux/intel-svm.h -INTEL IOP-ADMA DMA DRIVER -R: Dan Williams -S: Odd fixes -F: drivers/dma/iop-adma.c - INTEL IPU3 CSI-2 CIO2 DRIVER M: Yong Zhi M: Sakari Ailus From cccc46ae362398e43c6b8be38fdb7e39def9e21b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Oct 2022 22:27:47 +0200 Subject: [PATCH 0913/4122] dmaengine: remove s3c24xx driver The s3c24xx platform was removed and this driver is no longer needed. Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221021203329.4143397-14-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 12 - drivers/dma/Makefile | 1 - drivers/dma/s3c24xx-dma.c | 1428 --------------------- include/linux/platform_data/dma-s3c24xx.h | 48 - 4 files changed, 1489 deletions(-) delete mode 100644 drivers/dma/s3c24xx-dma.c delete mode 100644 include/linux/platform_data/dma-s3c24xx.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index b73fc89ba877..ea81d825575f 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -609,18 +609,6 @@ config SPRD_DMA help Enable support for the on-chip DMA controller on Spreadtrum platform. -config S3C24XX_DMAC - bool "Samsung S3C24XX DMA support" - depends on ARCH_S3C24XX || COMPILE_TEST - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - Support for the Samsung S3C24XX DMA controller driver. The - DMA controller is having multiple DMA channels which can be - configured for different peripherals like audio, UART, SPI. - The DMA controller can transfer data from memory to peripheral, - periphal to memory, periphal to periphal and memory to memory. - config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 5b55ada052a7..a4fd1ce29510 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -70,7 +70,6 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o obj-$(CONFIG_SPRD_DMA) += sprd-dma.o -obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c deleted file mode 100644 index a09eeb545f7d..000000000000 --- a/drivers/dma/s3c24xx-dma.c +++ /dev/null @@ -1,1428 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * S3C24XX DMA handling - * - * Copyright (c) 2013 Heiko Stuebner - * - * based on amba-pl08x.c - * - * Copyright (c) 2006 ARM Ltd. - * Copyright (c) 2010 ST-Ericsson SA - * - * Author: Peter Pearse - * Author: Linus Walleij - * - * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals - * that can be routed to any of the 4 to 8 hardware-channels. - * - * Therefore on these DMA controllers the number of channels - * and the number of incoming DMA signals are two totally different things. - * It is usually not possible to theoretically handle all physical signals, - * so a multiplexing scheme with possible denial of use is necessary. - * - * Open items: - * - bursts - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dmaengine.h" -#include "virt-dma.h" - -#define MAX_DMA_CHANNELS 8 - -#define S3C24XX_DISRC 0x00 -#define S3C24XX_DISRCC 0x04 -#define S3C24XX_DISRCC_INC_INCREMENT 0 -#define S3C24XX_DISRCC_INC_FIXED BIT(0) -#define S3C24XX_DISRCC_LOC_AHB 0 -#define S3C24XX_DISRCC_LOC_APB BIT(1) - -#define S3C24XX_DIDST 0x08 -#define S3C24XX_DIDSTC 0x0c -#define S3C24XX_DIDSTC_INC_INCREMENT 0 -#define S3C24XX_DIDSTC_INC_FIXED BIT(0) -#define S3C24XX_DIDSTC_LOC_AHB 0 -#define S3C24XX_DIDSTC_LOC_APB BIT(1) -#define S3C24XX_DIDSTC_INT_TC0 0 -#define S3C24XX_DIDSTC_INT_RELOAD BIT(2) - -#define S3C24XX_DCON 0x10 - -#define S3C24XX_DCON_TC_MASK 0xfffff -#define S3C24XX_DCON_DSZ_BYTE (0 << 20) -#define S3C24XX_DCON_DSZ_HALFWORD (1 << 20) -#define S3C24XX_DCON_DSZ_WORD (2 << 20) -#define S3C24XX_DCON_DSZ_MASK (3 << 20) -#define S3C24XX_DCON_DSZ_SHIFT 20 -#define S3C24XX_DCON_AUTORELOAD 0 -#define S3C24XX_DCON_NORELOAD BIT(22) -#define S3C24XX_DCON_HWTRIG BIT(23) -#define S3C24XX_DCON_HWSRC_SHIFT 24 -#define S3C24XX_DCON_SERV_SINGLE 0 -#define S3C24XX_DCON_SERV_WHOLE BIT(27) -#define S3C24XX_DCON_TSZ_UNIT 0 -#define S3C24XX_DCON_TSZ_BURST4 BIT(28) -#define S3C24XX_DCON_INT BIT(29) -#define S3C24XX_DCON_SYNC_PCLK 0 -#define S3C24XX_DCON_SYNC_HCLK BIT(30) -#define S3C24XX_DCON_DEMAND 0 -#define S3C24XX_DCON_HANDSHAKE BIT(31) - -#define S3C24XX_DSTAT 0x14 -#define S3C24XX_DSTAT_STAT_BUSY BIT(20) -#define S3C24XX_DSTAT_CURRTC_MASK 0xfffff - -#define S3C24XX_DMASKTRIG 0x20 -#define S3C24XX_DMASKTRIG_SWTRIG BIT(0) -#define S3C24XX_DMASKTRIG_ON BIT(1) -#define S3C24XX_DMASKTRIG_STOP BIT(2) - -#define S3C24XX_DMAREQSEL 0x24 -#define S3C24XX_DMAREQSEL_HW BIT(0) - -/* - * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel - * for a DMA source. Instead only specific channels are valid. - * All of these SoCs have 4 physical channels and the number of request - * source bits is 3. Additionally we also need 1 bit to mark the channel - * as valid. - * Therefore we separate the chansel element of the channel data into 4 - * parts of 4 bits each, to hold the information if the channel is valid - * and the hw request source to use. - * - * Example: - * SDI is valid on channels 0, 2 and 3 - with varying hw request sources. - * For it the chansel field would look like - * - * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1 - * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2 - * ((BIT(3) | 2) << 0 * 4) // channel 0, with request source 2 - */ -#define S3C24XX_CHANSEL_WIDTH 4 -#define S3C24XX_CHANSEL_VALID BIT(3) -#define S3C24XX_CHANSEL_REQ_MASK 7 - -/* - * struct soc_data - vendor-specific config parameters for individual SoCs - * @stride: spacing between the registers of each channel - * @has_reqsel: does the controller use the newer requestselection mechanism - * @has_clocks: are controllable dma-clocks present - */ -struct soc_data { - int stride; - bool has_reqsel; - bool has_clocks; -}; - -/* - * enum s3c24xx_dma_chan_state - holds the virtual channel states - * @S3C24XX_DMA_CHAN_IDLE: the channel is idle - * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport - * channel and is running a transfer on it - * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport - * channel to become available (only pertains to memcpy channels) - */ -enum s3c24xx_dma_chan_state { - S3C24XX_DMA_CHAN_IDLE, - S3C24XX_DMA_CHAN_RUNNING, - S3C24XX_DMA_CHAN_WAITING, -}; - -/* - * struct s3c24xx_sg - structure containing data per sg - * @src_addr: src address of sg - * @dst_addr: dst address of sg - * @len: transfer len in bytes - * @node: node for txd's dsg_list - */ -struct s3c24xx_sg { - dma_addr_t src_addr; - dma_addr_t dst_addr; - size_t len; - struct list_head node; -}; - -/* - * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor - * @vd: virtual DMA descriptor - * @dsg_list: list of children sg's - * @at: sg currently being transfered - * @width: transfer width - * @disrcc: value for source control register - * @didstc: value for destination control register - * @dcon: base value for dcon register - * @cyclic: indicate cyclic transfer - */ -struct s3c24xx_txd { - struct virt_dma_desc vd; - struct list_head dsg_list; - struct list_head *at; - u8 width; - u32 disrcc; - u32 didstc; - u32 dcon; - bool cyclic; -}; - -struct s3c24xx_dma_chan; - -/* - * struct s3c24xx_dma_phy - holder for the physical channels - * @id: physical index to this channel - * @valid: does the channel have all required elements - * @base: virtual memory base (remapped) for the this channel - * @irq: interrupt for this channel - * @clk: clock for this channel - * @lock: a lock to use when altering an instance of this struct - * @serving: virtual channel currently being served by this physicalchannel - * @host: a pointer to the host (internal use) - */ -struct s3c24xx_dma_phy { - unsigned int id; - bool valid; - void __iomem *base; - int irq; - struct clk *clk; - spinlock_t lock; - struct s3c24xx_dma_chan *serving; - struct s3c24xx_dma_engine *host; -}; - -/* - * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel - * @id: the id of the channel - * @name: name of the channel - * @vc: wrapped virtual channel - * @phy: the physical channel utilized by this channel, if there is one - * @runtime_addr: address for RX/TX according to the runtime config - * @at: active transaction on this channel - * @lock: a lock for this channel data - * @host: a pointer to the host (internal use) - * @state: whether the channel is idle, running etc - * @slave: whether this channel is a device (slave) or for memcpy - */ -struct s3c24xx_dma_chan { - int id; - const char *name; - struct virt_dma_chan vc; - struct s3c24xx_dma_phy *phy; - struct dma_slave_config cfg; - struct s3c24xx_txd *at; - struct s3c24xx_dma_engine *host; - enum s3c24xx_dma_chan_state state; - bool slave; -}; - -/* - * struct s3c24xx_dma_engine - the local state holder for the S3C24XX - * @pdev: the corresponding platform device - * @pdata: platform data passed in from the platform/machine - * @base: virtual memory base (remapped) - * @slave: slave engine for this instance - * @memcpy: memcpy engine for this instance - * @phy_chans: array of data for the physical channels - */ -struct s3c24xx_dma_engine { - struct platform_device *pdev; - const struct s3c24xx_dma_platdata *pdata; - struct soc_data *sdata; - void __iomem *base; - struct dma_device slave; - struct dma_device memcpy; - struct s3c24xx_dma_phy *phy_chans; -}; - -/* - * Physical channel handling - */ - -/* - * Check whether a certain channel is busy or not. - */ -static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy) -{ - unsigned int val = readl(phy->base + S3C24XX_DSTAT); - return val & S3C24XX_DSTAT_STAT_BUSY; -} - -static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan, - struct s3c24xx_dma_phy *phy) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - int phyvalid; - - /* every phy is valid for memcopy channels */ - if (!s3cchan->slave) - return true; - - /* On newer variants all phys can be used for all virtual channels */ - if (s3cdma->sdata->has_reqsel) - return true; - - phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH)); - return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false; -} - -/* - * Allocate a physical channel for a virtual channel - * - * Try to locate a physical channel to be used for this transfer. If all - * are taken return NULL and the requester will have to cope by using - * some fallback PIO mode or retrying later. - */ -static -struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy = NULL; - unsigned long flags; - int i; - int ret; - - for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { - phy = &s3cdma->phy_chans[i]; - - if (!phy->valid) - continue; - - if (!s3c24xx_dma_phy_valid(s3cchan, phy)) - continue; - - spin_lock_irqsave(&phy->lock, flags); - - if (!phy->serving) { - phy->serving = s3cchan; - spin_unlock_irqrestore(&phy->lock, flags); - break; - } - - spin_unlock_irqrestore(&phy->lock, flags); - } - - /* No physical channel available, cope with it */ - if (i == s3cdma->pdata->num_phy_channels) { - dev_warn(&s3cdma->pdev->dev, "no phy channel available\n"); - return NULL; - } - - /* start the phy clock */ - if (s3cdma->sdata->has_clocks) { - ret = clk_enable(phy->clk); - if (ret) { - dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n", - phy->id, ret); - phy->serving = NULL; - return NULL; - } - } - - return phy; -} - -/* - * Mark the physical channel as free. - * - * This drops the link between the physical and virtual channel. - */ -static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy) -{ - struct s3c24xx_dma_engine *s3cdma = phy->host; - - if (s3cdma->sdata->has_clocks) - clk_disable(phy->clk); - - phy->serving = NULL; -} - -/* - * Stops the channel by writing the stop bit. - * This should not be used for an on-going transfer, but as a method of - * shutting down a channel (eg, when it's no longer used) or terminating a - * transfer. - */ -static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy) -{ - writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG); -} - -/* - * Virtual channel handling - */ - -static inline -struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan) -{ - return container_of(chan, struct s3c24xx_dma_chan, vc.chan); -} - -static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_phy *phy = s3cchan->phy; - struct s3c24xx_txd *txd = s3cchan->at; - u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK; - - return tc * txd->width; -} - -static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - unsigned long flags; - int ret = 0; - - /* Reject definitely invalid configurations */ - if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || - config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) - return -EINVAL; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - - if (!s3cchan->slave) { - ret = -EINVAL; - goto out; - } - - s3cchan->cfg = *config; - -out: - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - return ret; -} - -/* - * Transfer handling - */ - -static inline -struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx) -{ - return container_of(tx, struct s3c24xx_txd, vd.tx); -} - -static struct s3c24xx_txd *s3c24xx_dma_get_txd(void) -{ - struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); - - if (txd) { - INIT_LIST_HEAD(&txd->dsg_list); - txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD; - } - - return txd; -} - -static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd) -{ - struct s3c24xx_sg *dsg, *_dsg; - - list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { - list_del(&dsg->node); - kfree(dsg); - } - - kfree(txd); -} - -static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan, - struct s3c24xx_txd *txd) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy = s3cchan->phy; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node); - u32 dcon = txd->dcon; - u32 val; - - /* transfer-size and -count from len and width */ - switch (txd->width) { - case 1: - dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len; - break; - case 2: - dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2); - break; - case 4: - dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4); - break; - } - - if (s3cchan->slave) { - struct s3c24xx_dma_channel *cdata = - &pdata->channels[s3cchan->id]; - - if (s3cdma->sdata->has_reqsel) { - writel_relaxed((cdata->chansel << 1) | - S3C24XX_DMAREQSEL_HW, - phy->base + S3C24XX_DMAREQSEL); - } else { - int csel = cdata->chansel >> (phy->id * - S3C24XX_CHANSEL_WIDTH); - - csel &= S3C24XX_CHANSEL_REQ_MASK; - dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT; - dcon |= S3C24XX_DCON_HWTRIG; - } - } else { - if (s3cdma->sdata->has_reqsel) - writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL); - } - - writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC); - writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC); - writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST); - writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC); - writel_relaxed(dcon, phy->base + S3C24XX_DCON); - - val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG); - val &= ~S3C24XX_DMASKTRIG_STOP; - val |= S3C24XX_DMASKTRIG_ON; - - /* trigger the dma operation for memcpy transfers */ - if (!s3cchan->slave) - val |= S3C24XX_DMASKTRIG_SWTRIG; - - writel(val, phy->base + S3C24XX_DMASKTRIG); -} - -/* - * Set the initial DMA register values and start first sg. - */ -static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_phy *phy = s3cchan->phy; - struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc); - struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); - - list_del(&txd->vd.node); - - s3cchan->at = txd; - - /* Wait for channel inactive */ - while (s3c24xx_dma_phy_busy(phy)) - cpu_relax(); - - /* point to the first element of the sg list */ - txd->at = txd->dsg_list.next; - s3c24xx_dma_start_next_sg(s3cchan, txd); -} - -/* - * Try to allocate a physical channel. When successful, assign it to - * this virtual channel, and initiate the next descriptor. The - * virtual channel lock must be held at this point. - */ -static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy; - - phy = s3c24xx_dma_get_phy(s3cchan); - if (!phy) { - dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n", - s3cchan->name); - s3cchan->state = S3C24XX_DMA_CHAN_WAITING; - return; - } - - dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n", - phy->id, s3cchan->name); - - s3cchan->phy = phy; - s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; - - s3c24xx_dma_start_next_txd(s3cchan); -} - -static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy, - struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - - dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n", - phy->id, s3cchan->name); - - /* - * We do this without taking the lock; we're really only concerned - * about whether this pointer is NULL or not, and we're guaranteed - * that this will only be called when it _already_ is non-NULL. - */ - phy->serving = s3cchan; - s3cchan->phy = phy; - s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; - s3c24xx_dma_start_next_txd(s3cchan); -} - -/* - * Free a physical DMA channel, potentially reallocating it to another - * virtual channel if we have any pending. - */ -static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_chan *p, *next; - -retry: - next = NULL; - - /* Find a waiting virtual channel for the next transfer. */ - list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node) - if (p->state == S3C24XX_DMA_CHAN_WAITING) { - next = p; - break; - } - - if (!next) { - list_for_each_entry(p, &s3cdma->slave.channels, - vc.chan.device_node) - if (p->state == S3C24XX_DMA_CHAN_WAITING && - s3c24xx_dma_phy_valid(p, s3cchan->phy)) { - next = p; - break; - } - } - - /* Ensure that the physical channel is stopped */ - s3c24xx_dma_terminate_phy(s3cchan->phy); - - if (next) { - bool success; - - /* - * Eww. We know this isn't going to deadlock - * but lockdep probably doesn't. - */ - spin_lock(&next->vc.lock); - /* Re-check the state now that we have the lock */ - success = next->state == S3C24XX_DMA_CHAN_WAITING; - if (success) - s3c24xx_dma_phy_reassign_start(s3cchan->phy, next); - spin_unlock(&next->vc.lock); - - /* If the state changed, try to find another channel */ - if (!success) - goto retry; - } else { - /* No more jobs, so free up the physical channel */ - s3c24xx_dma_put_phy(s3cchan->phy); - } - - s3cchan->phy = NULL; - s3cchan->state = S3C24XX_DMA_CHAN_IDLE; -} - -static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd) -{ - struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan); - - if (!s3cchan->slave) - dma_descriptor_unmap(&vd->tx); - - s3c24xx_dma_free_txd(txd); -} - -static irqreturn_t s3c24xx_dma_irq(int irq, void *data) -{ - struct s3c24xx_dma_phy *phy = data; - struct s3c24xx_dma_chan *s3cchan = phy->serving; - struct s3c24xx_txd *txd; - - dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id); - - /* - * Interrupts happen to notify the completion of a transfer and the - * channel should have moved into its stop state already on its own. - * Therefore interrupts on channels not bound to a virtual channel - * should never happen. Nevertheless send a terminate command to the - * channel if the unlikely case happens. - */ - if (unlikely(!s3cchan)) { - dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n", - phy->id); - - s3c24xx_dma_terminate_phy(phy); - - return IRQ_HANDLED; - } - - spin_lock(&s3cchan->vc.lock); - txd = s3cchan->at; - if (txd) { - /* when more sg's are in this txd, start the next one */ - if (!list_is_last(txd->at, &txd->dsg_list)) { - txd->at = txd->at->next; - if (txd->cyclic) - vchan_cyclic_callback(&txd->vd); - s3c24xx_dma_start_next_sg(s3cchan, txd); - } else if (!txd->cyclic) { - s3cchan->at = NULL; - vchan_cookie_complete(&txd->vd); - - /* - * And start the next descriptor (if any), - * otherwise free this channel. - */ - if (vchan_next_desc(&s3cchan->vc)) - s3c24xx_dma_start_next_txd(s3cchan); - else - s3c24xx_dma_phy_free(s3cchan); - } else { - vchan_cyclic_callback(&txd->vd); - - /* Cyclic: reset at beginning */ - txd->at = txd->dsg_list.next; - s3c24xx_dma_start_next_sg(s3cchan, txd); - } - } - spin_unlock(&s3cchan->vc.lock); - - return IRQ_HANDLED; -} - -/* - * The DMA ENGINE API - */ - -static int s3c24xx_dma_terminate_all(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - LIST_HEAD(head); - unsigned long flags; - int ret; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - - if (!s3cchan->phy && !s3cchan->at) { - dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n", - s3cchan->id); - ret = -EINVAL; - goto unlock; - } - - s3cchan->state = S3C24XX_DMA_CHAN_IDLE; - - /* Mark physical channel as free */ - if (s3cchan->phy) - s3c24xx_dma_phy_free(s3cchan); - - /* Dequeue current job */ - if (s3cchan->at) { - vchan_terminate_vdesc(&s3cchan->at->vd); - s3cchan->at = NULL; - } - - /* Dequeue jobs not yet fired as well */ - - vchan_get_all_descriptors(&s3cchan->vc, &head); - - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - vchan_dma_desc_free_list(&s3cchan->vc, &head); - - return 0; - -unlock: - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - return ret; -} - -static void s3c24xx_dma_synchronize(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - - vchan_synchronize(&s3cchan->vc); -} - -static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan) -{ - /* Ensure all queued descriptors are freed */ - vchan_free_chan_resources(to_virt_chan(chan)); -} - -static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan, - dma_cookie_t cookie, struct dma_tx_state *txstate) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - struct virt_dma_desc *vd; - unsigned long flags; - enum dma_status ret; - size_t bytes = 0; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - ret = dma_cookie_status(chan, cookie, txstate); - - /* - * There's no point calculating the residue if there's - * no txstate to store the value. - */ - if (ret == DMA_COMPLETE || !txstate) { - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - return ret; - } - - vd = vchan_find_desc(&s3cchan->vc, cookie); - if (vd) { - /* On the issued list, so hasn't been processed yet */ - txd = to_s3c24xx_txd(&vd->tx); - - list_for_each_entry(dsg, &txd->dsg_list, node) - bytes += dsg->len; - } else { - /* - * Currently running, so sum over the pending sg's and - * the currently active one. - */ - txd = s3cchan->at; - - dsg = list_entry(txd->at, struct s3c24xx_sg, node); - list_for_each_entry_from(dsg, &txd->dsg_list, node) - bytes += dsg->len; - - bytes += s3c24xx_dma_getbytes_chan(s3cchan); - } - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - /* - * This cookie not complete yet - * Get number of bytes left in the active transactions and queue - */ - dma_set_residue(txstate, bytes); - - /* Whether waiting or running, we're in progress */ - return ret; -} - -/* - * Initialize a descriptor to be used by memcpy submit - */ -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( - struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, - size_t len, unsigned long flags) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - int src_mod, dest_mod; - - dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n", - len, s3cchan->name); - - if ((len & S3C24XX_DCON_TC_MASK) != len) { - dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len); - return NULL; - } - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->src_addr = src; - dsg->dst_addr = dest; - dsg->len = len; - - /* - * Determine a suitable transfer width. - * The DMA controller cannot fetch/store information which is not - * naturally aligned on the bus, i.e., a 4 byte fetch must start at - * an address divisible by 4 - more generally addr % width must be 0. - */ - src_mod = src % 4; - dest_mod = dest % 4; - switch (len % 4) { - case 0: - txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1; - break; - case 2: - txd->width = ((src_mod == 2 || src_mod == 0) && - (dest_mod == 2 || dest_mod == 0)) ? 2 : 1; - break; - default: - txd->width = 1; - break; - } - - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT; - txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK | - S3C24XX_DCON_SERV_WHOLE; - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( - struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction direction, unsigned long flags) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - unsigned sg_len; - dma_addr_t slave_addr; - u32 hwcfg = 0; - int i; - - dev_dbg(&s3cdma->pdev->dev, - "prepare cyclic transaction of %zu bytes with period %zu from %s\n", - size, period, s3cchan->name); - - if (!is_slave_direction(direction)) { - dev_err(&s3cdma->pdev->dev, - "direction %d unsupported\n", direction); - return NULL; - } - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - txd->cyclic = 1; - - if (cdata->handshake) - txd->dcon |= S3C24XX_DCON_HANDSHAKE; - - switch (cdata->bus) { - case S3C24XX_DMA_APB: - txd->dcon |= S3C24XX_DCON_SYNC_PCLK; - hwcfg |= S3C24XX_DISRCC_LOC_APB; - break; - case S3C24XX_DMA_AHB: - txd->dcon |= S3C24XX_DCON_SYNC_HCLK; - hwcfg |= S3C24XX_DISRCC_LOC_AHB; - break; - } - - /* - * Always assume our peripheral desintation is a fixed - * address in memory. - */ - hwcfg |= S3C24XX_DISRCC_INC_FIXED; - - /* - * Individual dma operations are requested by the slave, - * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). - */ - txd->dcon |= S3C24XX_DCON_SERV_SINGLE; - - if (direction == DMA_MEM_TO_DEV) { - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | - S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = hwcfg; - slave_addr = s3cchan->cfg.dst_addr; - txd->width = s3cchan->cfg.dst_addr_width; - } else { - txd->disrcc = hwcfg; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | - S3C24XX_DIDSTC_INC_INCREMENT; - slave_addr = s3cchan->cfg.src_addr; - txd->width = s3cchan->cfg.src_addr_width; - } - - sg_len = size / period; - - for (i = 0; i < sg_len; i++) { - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->len = period; - /* Check last period length */ - if (i == sg_len - 1) - dsg->len = size - period * i; - if (direction == DMA_MEM_TO_DEV) { - dsg->src_addr = addr + period * i; - dsg->dst_addr = slave_addr; - } else { /* DMA_DEV_TO_MEM */ - dsg->src_addr = slave_addr; - dsg->dst_addr = addr + period * i; - } - } - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg( - struct dma_chan *chan, struct scatterlist *sgl, - unsigned int sg_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - struct scatterlist *sg; - dma_addr_t slave_addr; - u32 hwcfg = 0; - int tmp; - - dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n", - sg_dma_len(sgl), s3cchan->name); - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - if (cdata->handshake) - txd->dcon |= S3C24XX_DCON_HANDSHAKE; - - switch (cdata->bus) { - case S3C24XX_DMA_APB: - txd->dcon |= S3C24XX_DCON_SYNC_PCLK; - hwcfg |= S3C24XX_DISRCC_LOC_APB; - break; - case S3C24XX_DMA_AHB: - txd->dcon |= S3C24XX_DCON_SYNC_HCLK; - hwcfg |= S3C24XX_DISRCC_LOC_AHB; - break; - } - - /* - * Always assume our peripheral desintation is a fixed - * address in memory. - */ - hwcfg |= S3C24XX_DISRCC_INC_FIXED; - - /* - * Individual dma operations are requested by the slave, - * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). - */ - txd->dcon |= S3C24XX_DCON_SERV_SINGLE; - - if (direction == DMA_MEM_TO_DEV) { - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | - S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = hwcfg; - slave_addr = s3cchan->cfg.dst_addr; - txd->width = s3cchan->cfg.dst_addr_width; - } else if (direction == DMA_DEV_TO_MEM) { - txd->disrcc = hwcfg; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | - S3C24XX_DIDSTC_INC_INCREMENT; - slave_addr = s3cchan->cfg.src_addr; - txd->width = s3cchan->cfg.src_addr_width; - } else { - s3c24xx_dma_free_txd(txd); - dev_err(&s3cdma->pdev->dev, - "direction %d unsupported\n", direction); - return NULL; - } - - for_each_sg(sgl, sg, sg_len, tmp) { - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->len = sg_dma_len(sg); - if (direction == DMA_MEM_TO_DEV) { - dsg->src_addr = sg_dma_address(sg); - dsg->dst_addr = slave_addr; - } else { /* DMA_DEV_TO_MEM */ - dsg->src_addr = slave_addr; - dsg->dst_addr = sg_dma_address(sg); - } - } - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -/* - * Slave transactions callback to the slave device to allow - * synchronization of slave DMA signals with the DMAC enable - */ -static void s3c24xx_dma_issue_pending(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - unsigned long flags; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - if (vchan_issue_pending(&s3cchan->vc)) { - if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING) - s3c24xx_dma_phy_alloc_and_start(s3cchan); - } - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); -} - -/* - * Bringup and teardown - */ - -/* - * Initialise the DMAC memcpy/slave channels. - * Make a local wrapper to hold required data - */ -static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma, - struct dma_device *dmadev, unsigned int channels, bool slave) -{ - struct s3c24xx_dma_chan *chan; - int i; - - INIT_LIST_HEAD(&dmadev->channels); - - /* - * Register as many memcpy as we have physical channels, - * we won't always be able to use all but the code will have - * to cope with that situation. - */ - for (i = 0; i < channels; i++) { - chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL); - if (!chan) - return -ENOMEM; - - chan->id = i; - chan->host = s3cdma; - chan->state = S3C24XX_DMA_CHAN_IDLE; - - if (slave) { - chan->slave = true; - chan->name = kasprintf(GFP_KERNEL, "slave%d", i); - if (!chan->name) - return -ENOMEM; - } else { - chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); - if (!chan->name) - return -ENOMEM; - } - dev_dbg(dmadev->dev, - "initialize virtual channel \"%s\"\n", - chan->name); - - chan->vc.desc_free = s3c24xx_dma_desc_free; - vchan_init(&chan->vc, dmadev); - } - dev_info(dmadev->dev, "initialized %d virtual %s channels\n", - i, slave ? "slave" : "memcpy"); - return i; -} - -static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev) -{ - struct s3c24xx_dma_chan *chan = NULL; - struct s3c24xx_dma_chan *next; - - list_for_each_entry_safe(chan, - next, &dmadev->channels, vc.chan.device_node) { - list_del(&chan->vc.chan.device_node); - tasklet_kill(&chan->vc.task); - } -} - -/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */ -static struct soc_data soc_s3c2410 = { - .stride = 0x40, - .has_reqsel = false, - .has_clocks = false, -}; - -/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */ -static struct soc_data soc_s3c2412 = { - .stride = 0x40, - .has_reqsel = true, - .has_clocks = true, -}; - -/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */ -static struct soc_data soc_s3c2443 = { - .stride = 0x100, - .has_reqsel = true, - .has_clocks = true, -}; - -static const struct platform_device_id s3c24xx_dma_driver_ids[] = { - { - .name = "s3c2410-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2410, - }, { - .name = "s3c2412-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2412, - }, { - .name = "s3c2443-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2443, - }, - { }, -}; - -static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev) -{ - return (struct soc_data *) - platform_get_device_id(pdev)->driver_data; -} - -static int s3c24xx_dma_probe(struct platform_device *pdev) -{ - const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); - struct s3c24xx_dma_engine *s3cdma; - struct soc_data *sdata; - struct resource *res; - int ret; - int i; - - if (!pdata) { - dev_err(&pdev->dev, "platform data missing\n"); - return -ENODEV; - } - - /* Basic sanity check */ - if (pdata->num_phy_channels > MAX_DMA_CHANNELS) { - dev_err(&pdev->dev, "too many dma channels %d, max %d\n", - pdata->num_phy_channels, MAX_DMA_CHANNELS); - return -EINVAL; - } - - sdata = s3c24xx_dma_get_soc_data(pdev); - if (!sdata) - return -EINVAL; - - s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL); - if (!s3cdma) - return -ENOMEM; - - s3cdma->pdev = pdev; - s3cdma->pdata = pdata; - s3cdma->sdata = sdata; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - s3cdma->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(s3cdma->base)) - return PTR_ERR(s3cdma->base); - - s3cdma->phy_chans = devm_kcalloc(&pdev->dev, - pdata->num_phy_channels, - sizeof(struct s3c24xx_dma_phy), - GFP_KERNEL); - if (!s3cdma->phy_chans) - return -ENOMEM; - - /* acquire irqs and clocks for all physical channels */ - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - char clk_name[6]; - - phy->id = i; - phy->base = s3cdma->base + (i * sdata->stride); - phy->host = s3cdma; - - phy->irq = platform_get_irq(pdev, i); - if (phy->irq < 0) - continue; - - ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq, - 0, pdev->name, phy); - if (ret) { - dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n", - i, ret); - continue; - } - - if (sdata->has_clocks) { - sprintf(clk_name, "dma.%d", i); - phy->clk = devm_clk_get(&pdev->dev, clk_name); - if (IS_ERR(phy->clk) && sdata->has_clocks) { - dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n", - i, PTR_ERR(phy->clk)); - continue; - } - - ret = clk_prepare(phy->clk); - if (ret) { - dev_err(&pdev->dev, "clock for phy %d failed, error %d\n", - i, ret); - continue; - } - } - - spin_lock_init(&phy->lock); - phy->valid = true; - - dev_dbg(&pdev->dev, "physical channel %d is %s\n", - i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE"); - } - - /* Initialize memcpy engine */ - dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask); - dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask); - s3cdma->memcpy.dev = &pdev->dev; - s3cdma->memcpy.device_free_chan_resources = - s3c24xx_dma_free_chan_resources; - s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy; - s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status; - s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending; - s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config; - s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all; - s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize; - - /* Initialize slave engine for SoC internal dedicated peripherals */ - dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask); - dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask); - dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask); - s3cdma->slave.dev = &pdev->dev; - s3cdma->slave.device_free_chan_resources = - s3c24xx_dma_free_chan_resources; - s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status; - s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending; - s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg; - s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic; - s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config; - s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all; - s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize; - s3cdma->slave.filter.map = pdata->slave_map; - s3cdma->slave.filter.mapcnt = pdata->slavecnt; - s3cdma->slave.filter.fn = s3c24xx_dma_filter; - - /* Register as many memcpy channels as there are physical channels */ - ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy, - pdata->num_phy_channels, false); - if (ret <= 0) { - dev_warn(&pdev->dev, - "%s failed to enumerate memcpy channels - %d\n", - __func__, ret); - goto err_memcpy; - } - - /* Register slave channels */ - ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave, - pdata->num_channels, true); - if (ret <= 0) { - dev_warn(&pdev->dev, - "%s failed to enumerate slave channels - %d\n", - __func__, ret); - goto err_slave; - } - - ret = dma_async_device_register(&s3cdma->memcpy); - if (ret) { - dev_warn(&pdev->dev, - "%s failed to register memcpy as an async device - %d\n", - __func__, ret); - goto err_memcpy_reg; - } - - ret = dma_async_device_register(&s3cdma->slave); - if (ret) { - dev_warn(&pdev->dev, - "%s failed to register slave as an async device - %d\n", - __func__, ret); - goto err_slave_reg; - } - - platform_set_drvdata(pdev, s3cdma); - dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n", - pdata->num_phy_channels); - - return 0; - -err_slave_reg: - dma_async_device_unregister(&s3cdma->memcpy); -err_memcpy_reg: - s3c24xx_dma_free_virtual_channels(&s3cdma->slave); -err_slave: - s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); -err_memcpy: - if (sdata->has_clocks) - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - if (phy->valid) - clk_unprepare(phy->clk); - } - - return ret; -} - -static void s3c24xx_dma_free_irq(struct platform_device *pdev, - struct s3c24xx_dma_engine *s3cdma) -{ - int i; - - for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - - devm_free_irq(&pdev->dev, phy->irq, phy); - } -} - -static int s3c24xx_dma_remove(struct platform_device *pdev) -{ - const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); - struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev); - struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev); - int i; - - dma_async_device_unregister(&s3cdma->slave); - dma_async_device_unregister(&s3cdma->memcpy); - - s3c24xx_dma_free_irq(pdev, s3cdma); - - s3c24xx_dma_free_virtual_channels(&s3cdma->slave); - s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); - - if (sdata->has_clocks) - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - if (phy->valid) - clk_unprepare(phy->clk); - } - - return 0; -} - -static struct platform_driver s3c24xx_dma_driver = { - .driver = { - .name = "s3c24xx-dma", - }, - .id_table = s3c24xx_dma_driver_ids, - .probe = s3c24xx_dma_probe, - .remove = s3c24xx_dma_remove, -}; - -module_platform_driver(s3c24xx_dma_driver); - -bool s3c24xx_dma_filter(struct dma_chan *chan, void *param) -{ - struct s3c24xx_dma_chan *s3cchan; - - if (chan->device->dev->driver != &s3c24xx_dma_driver.driver) - return false; - - s3cchan = to_s3c24xx_dma_chan(chan); - - return s3cchan->id == (uintptr_t)param; -} -EXPORT_SYMBOL(s3c24xx_dma_filter); - -MODULE_DESCRIPTION("S3C24XX DMA Driver"); -MODULE_AUTHOR("Heiko Stuebner"); -MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h deleted file mode 100644 index 96d02dbeea67..000000000000 --- a/include/linux/platform_data/dma-s3c24xx.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * S3C24XX DMA handling - * - * Copyright (c) 2013 Heiko Stuebner - */ - -/* Helper to encode the source selection constraints for early s3c socs. */ -#define S3C24XX_DMA_CHANREQ(src, chan) ((BIT(3) | src) << chan * 4) - -enum s3c24xx_dma_bus { - S3C24XX_DMA_APB, - S3C24XX_DMA_AHB, -}; - -/** - * @bus: on which bus does the peripheral reside - AHB or APB. - * @handshake: is a handshake with the peripheral necessary - * @chansel: channel selection information, depending on variant; reqsel for - * s3c2443 and later and channel-selection map for earlier SoCs - * see CHANSEL doc in s3c2443-dma.c - */ -struct s3c24xx_dma_channel { - enum s3c24xx_dma_bus bus; - bool handshake; - u16 chansel; -}; - -struct dma_slave_map; - -/** - * struct s3c24xx_dma_platdata - platform specific settings - * @num_phy_channels: number of physical channels - * @channels: array of virtual channel descriptions - * @num_channels: number of virtual channels - * @slave_map: dma slave map matching table - * @slavecnt: number of elements in slave_map - */ -struct s3c24xx_dma_platdata { - int num_phy_channels; - struct s3c24xx_dma_channel *channels; - int num_channels; - const struct dma_slave_map *slave_map; - int slavecnt; -}; - -struct dma_chan; -bool s3c24xx_dma_filter(struct dma_chan *chan, void *param); From a92b744f28e86e05514e6bbd889131f6693b6eed Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 25 Oct 2022 13:12:16 +0530 Subject: [PATCH 0914/4122] dmaengine: xilinx_dma: fix xilinx_dma_child_probe() return documentation Modify xilinx_dma_child_probe() return documentation to be inline with implementation i.e. can also return failure value on error. Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1666683737-7668-2-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 8cd4e69dc7b4..3b8cfeccf2da 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2924,7 +2924,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, * @xdev: Driver specific device structure * @node: Device node * - * Return: 0 always. + * Return: '0' on success and failure value on error. */ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) From 73f11324738a4c23159cf22b08225b6642232982 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 25 Oct 2022 13:12:17 +0530 Subject: [PATCH 0915/4122] dmaengine: xilinx_dma : add xilinx_dma_device_config() return documentation document xilinx_dma_device_config() return value. Fixes below kernel-doc warning. xilinx_dma.c:1665: warning: No description found for return value of 'xilinx_dma_device_config' Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1666683737-7668-3-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 3b8cfeccf2da..a8d23cdf883e 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1659,6 +1659,8 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan) * xilinx_dma_device_config - Configure the DMA channel * @dchan: DMA channel * @config: channel configuration + * + * Return: 0 always. */ static int xilinx_dma_device_config(struct dma_chan *dchan, struct dma_slave_config *config) From 14e4b9f4289aed2c8d4858cd750748041b6c434f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:31 -0700 Subject: [PATCH 0916/4122] perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility Don't use deprecated and now broken map style. Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Committer notes: Add /usr/include to the include path so that bpf/bpf_helpers.h can be found, remove sys/socket.h, adding the sockaddr_storage definition, also remove stdbool.h, both were preventing building the augmented_raw_syscalls.c file with clang, revisit later. Testing it: Asking for syscalls that have string arguments: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,string --max-events 10 0.000 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj", flags: RDONLY) = 13 0.158 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj", flags: RDONLY) = 13 0.215 thermald/1144 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone3/temp", flags: RDONLY) = 13 16.448 cgroupify/36478 openat(dfd: 4, filename: ".", flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 5 16.468 cgroupify/36478 newfstatat(dfd: 5, filename: "", statbuf: 0x7fffca5b4130, flag: 4096) = 0 16.473 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 16.499 systemd-oomd/972 newfstatat(dfd: 12, filename: "", statbuf: 0x7ffd2bc73cc0, flag: 4096) = 0 16.516 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 16.538 abrt-dump-jour/1370 newfstatat(dfd: 21, filename: "", statbuf: 0x7ffc651b8980, flag: 4096) = 0 16.540 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 # Networking syscalls: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,sendto*,connect* --max-events 10 0.000 isc-net-0005/1206 connect(fd: 512, uservaddr: { .family: INET, port: 53, addr: 23.211.132.65 }, addrlen: 16) = 0 0.070 isc-net-0002/1203 connect(fd: 515, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.031 isc-net-0006/1207 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.079 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a40611b0, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106 0.180 isc-net-0006/1207 connect(fd: 519, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.211 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a4061230, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106 0.298 isc-net-0006/1207 connect(fd: 515, uservaddr: { .family: INET, port: 53, addr: 96.7.49.67 }, addrlen: 16) = 0 0.109 isc-net-0004/1205 connect(fd: 518, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.164 isc-net-0002/1203 sendto(fd: 3, buff: 0x7f73ac064300, len: 107, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 107 0.247 isc-net-0002/1203 connect(fd: 522, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) # Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +- .../examples/bpf/augmented_raw_syscalls.c | 103 +++++++++++++++--- tools/perf/util/llvm-utils.c | 2 +- 3 files changed, 89 insertions(+), 19 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d3d3c13a9f25..59420676dee8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1239,7 +1239,8 @@ includedir = $(abspath $(prefix)/$(includedir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core -perf_include_dir = lib/perf/include +# FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance +perf_include_dir = /usr/include perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index a262dcd020f4..926238efd7d8 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -14,13 +14,21 @@ * code that will combine entry/exit in a strace like way. */ -#include +#include +#include #include -#include -#include + +// FIXME: These should come from system headers +typedef char bool; +typedef int pid_t; /* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __augmented_syscalls__ SEC(".maps"); /* * string_args_len: one per syscall arg, 0 means not a string or don't copy it, @@ -29,24 +37,39 @@ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); */ struct syscall { bool enabled; - u16 string_args_len[6]; + __u16 string_args_len[6]; }; -bpf_map(syscalls, ARRAY, int, struct syscall, 512); +struct syscalls { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct syscall); + __uint(max_entries, 512); +} syscalls SEC(".maps"); /* * What to augment at entry? * * Pointer arg payloads (filenames, etc) passed from userspace to the kernel */ -bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_enter SEC(".maps"); /* * What to augment at exit? * * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. */ -bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { unsigned long long common_tp_fields; @@ -66,7 +89,38 @@ struct augmented_arg { char value[PATH_MAX]; }; -pid_filter(pids_filtered); +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; struct augmented_args_payload { struct syscall_enter_args args; @@ -79,7 +133,12 @@ struct augmented_args_payload { }; // We need more tmp space than the BPF stack can give us -bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1); +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); static inline struct augmented_args_payload *augmented_args_payload(void) { @@ -90,14 +149,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void) static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) { /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); } static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = probe_read_str(&augmented_arg->value, arg_len, arg); + int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); augmented_arg->size = augmented_arg->err = 0; /* @@ -146,7 +205,7 @@ int sys_enter_connect(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -165,7 +224,7 @@ int sys_enter_sendto(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -234,6 +293,16 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { @@ -257,7 +326,7 @@ int sys_enter(struct syscall_enter_args *args) if (augmented_args == NULL) return 1; - probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); /* * Jump to syscall specific augmenter, even if the default one, @@ -278,7 +347,7 @@ int sys_exit(struct syscall_exit_args *args) if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&exit_args, sizeof(exit_args), args); + bpf_probe_read(&exit_args, sizeof(exit_args), args); /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the @@ -291,4 +360,4 @@ int sys_exit(struct syscall_exit_args *args) return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 2dc797007419..a5cac85783d8 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0) + if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0) goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); From 514607e3c0f0e381aa4f6fe866b70b1fa9bfae74 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:34 -0700 Subject: [PATCH 0917/4122] perf trace: hello fix libbpf 1.0+ compatibility Don't use deprecated and now broken map style. Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Switch to raw_syscalls:sys_enter to avoid the evlist being empty and fixing generating output. Committer testing: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/hello.c --call-graph=dwarf --max-events 5 0.000 perf/206852 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___sched_setaffinity_new (/usr/lib64/libc.so.6) 8.561 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __libc_read (/usr/lib64/libc.so.6) 8.571 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___ioctl (/usr/lib64/libc.so.6) 8.586 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___write (/usr/lib64/libc.so.6) 8.592 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __timerfd_settime (/usr/lib64/libc.so.6) # Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/hello.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c index cf3c2fdc7f79..e9080b0df158 100644 --- a/tools/perf/examples/bpf/hello.c +++ b/tools/perf/examples/bpf/hello.c @@ -1,9 +1,27 @@ -#include +// SPDX-License-Identifier: GPL-2.0 +#include +#include -int syscall_enter(openat)(void *args) +struct __bpf_stdout__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __bpf_stdout__ SEC(".maps"); + +#define puts(from) \ + ({ const int __len = sizeof(from); \ + char __from[sizeof(from)] = from; \ + bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ + &__from, __len & (sizeof(from) - 1)); }) + +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) { puts("Hello, world\n"); return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; From baddab891a21e5870723d182020fec445b0874b3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:35 -0700 Subject: [PATCH 0918/4122] perf trace: empty fix libbpf 1.0+ compatibility Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Add raw_syscalls:sys_enter to avoid the evlist being empty. Committer testing: # time perf trace -e ~acme/git/perf/tools/perf/examples/bpf/empty.c sleep 5 real 0m5.697s user 0m0.217s sys 0m0.453s # I.e. it sets up everything successfully (use -v to see the details) and filters out all syscalls, then exits when the workload (sleep 5) finishes. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/empty.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 7d7fb0c9fe76..3e296c0c53d7 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,12 @@ -#include +// SPDX-License-Identifier: GPL-2.0 +#include +#include -license(GPL); +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; From 71811e8c77e974a0ab978c86a2b32b3f2c82f455 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:36 -0700 Subject: [PATCH 0919/4122] perf trace: 5sec fix libbpf 1.0+ compatibility Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Committer testing: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c sleep 5 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000) # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c/max-stack=7/ sleep 5 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___clock_nanosleep (/usr/lib64/libc.so.6) [0] ([unknown]) # Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index e6b6181c6dc6..3bd7fc17631f 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,13 +39,15 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include +#include #define NSEC_PER_SEC 1000000000L -int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec) +SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp") +int hrtimer_nanosleep(void *ctx, int err, long long sec) { return sec / NSEC_PER_SEC == 5ULL; } -license(GPL); +char _license[] SEC("license") = "GPL"; From cfddf0d4a5571bcc94760c27729a60daefda38bb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:37 -0700 Subject: [PATCH 0920/4122] perf bpf: Remove now unused BPF headers Example code has migrated to use standard BPF header files, remove unnecessary perf equivalents. Update install step to not try to copy these. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 5 -- tools/perf/include/bpf/bpf.h | 70 --------------------------- tools/perf/include/bpf/linux/socket.h | 24 --------- tools/perf/include/bpf/pid_filter.h | 21 -------- tools/perf/include/bpf/stdio.h | 16 ------ tools/perf/include/bpf/unistd.h | 10 ---- 6 files changed, 146 deletions(-) delete mode 100644 tools/perf/include/bpf/bpf.h delete mode 100644 tools/perf/include/bpf/linux/socket.h delete mode 100644 tools/perf/include/bpf/pid_filter.h delete mode 100644 tools/perf/include/bpf/stdio.h delete mode 100644 tools/perf/include/bpf/unistd.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a432e59afc42..67819f905611 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -960,11 +960,6 @@ endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBBPF - $(call QUIET_INSTALL, bpf-headers) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \ - $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux' $(call QUIET_INSTALL, bpf-examples) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h deleted file mode 100644 index b422aeef5339..000000000000 --- a/tools/perf/include/bpf/bpf.h +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _PERF_BPF_H -#define _PERF_BPF_H - -#include - -/* - * A helper structure used by eBPF C program to describe map attributes to - * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h: - */ -struct bpf_map { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -#define bpf_map(name, _type, type_key, type_val, _max_entries) \ -struct bpf_map SEC("maps") name = { \ - .type = BPF_MAP_TYPE_##_type, \ - .key_size = sizeof(type_key), \ - .value_size = sizeof(type_val), \ - .max_entries = _max_entries, \ -}; \ -struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ -}; \ -struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \ - ____btf_map_##name = { } - -/* - * FIXME: this should receive .max_entries as a parameter, as careful - * tuning of these limits is needed to avoid hitting limits that - * prevents other BPF constructs, such as tracepoint handlers, - * to get installed, with cryptic messages from libbpf, etc. - * For the current need, 'perf trace --filter-pids', 64 should - * be good enough, but this surely needs to be revisited. - */ -#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64) - -static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; -static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; - -static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *)BPF_FUNC_tail_call; - -#define SEC(NAME) __attribute__((section(NAME), used)) - -#define probe(function, vars) \ - SEC(#function "=" #function " " #vars) function - -#define syscall_enter(name) \ - SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name - -#define syscall_exit(name) \ - SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name - -#define license(name) \ -char _license[] SEC("license") = #name; \ -int _version SEC("version") = LINUX_VERSION_CODE; - -static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read; -static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str; - -static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; - -#endif /* _PERF_BPF_H */ diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h deleted file mode 100644 index 7f844568dab8..000000000000 --- a/tools/perf/include/bpf/linux/socket.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_SOCKET_H -#define _UAPI_LINUX_SOCKET_H - -/* - * Desired design of maximum size and alignment (see RFC2553) - */ -#define _K_SS_MAXSIZE 128 /* Implementation specific max size */ -#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) - /* Implementation specific desired alignment */ - -typedef unsigned short __kernel_sa_family_t; - -struct __kernel_sockaddr_storage { - __kernel_sa_family_t ss_family; /* address family */ - /* Following field(s) are implementation specific */ - char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; - /* space to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_family */ -} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ - -#define sockaddr_storage __kernel_sockaddr_storage - -#endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h deleted file mode 100644 index 6e61c4bdf548..000000000000 --- a/tools/perf/include/bpf/pid_filter.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#ifndef _PERF_BPF_PID_FILTER_ -#define _PERF_BPF_PID_FILTER_ - -#include - -#define pid_filter(name) pid_map(name, bool) - -static int pid_filter__add(struct bpf_map *pids, pid_t pid) -{ - bool value = true; - return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST); -} - -static bool pid_filter__has(struct bpf_map *pids, pid_t pid) -{ - return bpf_map_lookup_elem(pids, &pid) != NULL; -} - -#endif // _PERF_BPF_PID_FILTER_ diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h deleted file mode 100644 index 316af5b2ff35..000000000000 --- a/tools/perf/include/bpf/stdio.h +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include - -struct bpf_map SEC("maps") __bpf_stdout__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; - -#define puts(from) \ - ({ const int __len = sizeof(from); \ - char __from[__len] = from; \ - perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ - &__from, __len & (sizeof(from) - 1)); }) diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h deleted file mode 100644 index ca7877f9a976..000000000000 --- a/tools/perf/include/bpf/unistd.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#include - -static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; - -static pid_t getpid(void) -{ - return bpf_get_current_pid_tgid(); -} From 3cd65616f607cd3769647e2e0490e4048c0f289e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 11:15:51 -0300 Subject: [PATCH 0921/4122] perf examples bpf: Remove augmented_syscalls.c, the raw_syscalls one should be used instead The attempt at using BPF to copy syscall pointer arguments to show them like strace does started with sys_{enter,exit}_SYSCALL_NAME tracepoints, in tools/perf/examples/bpf/augmented_syscalls.c, but then achieving this result using raw_syscalls:{enter,exit} and BPF tail calls was deemed more flexible. The 'perf trace' codebase was adapted to using it while trying to continue supporting the old style per-syscall tracepoints, which at some point became too unwieldly and now isn't working properly. So lets scale back and concentrate on the augmented_raw_syscalls.c model on the way to using BPF skeletons. For the same reason remove the etcsnoop.c example, that used the old style per-tp syscalls just for the 'open' and 'openat' syscalls, looking at the pathnames starting with "/etc/", we should be able to do this later using filters, after we move to BPF skels. The augmented_raw_syscalls.c one continues to work, now with libbpf 1.0, after Ian work on using the libbpf map style: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,open* --max-events 4 0.000 ping/194815 openat(dfd: CWD, filename: "/etc/hosts", flags: RDONLY|CLOEXEC) = 5 20.225 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 20.285 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 20.301 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 # This is using this: # cat ~/.perfconfig [trace] show_zeros = yes show_duration = no no_inherit = yes args_alignment = 40 Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 169 ------------------- tools/perf/examples/bpf/etcsnoop.c | 76 --------- 2 files changed, 245 deletions(-) delete mode 100644 tools/perf/examples/bpf/augmented_syscalls.c delete mode 100644 tools/perf/examples/bpf/etcsnoop.c diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c deleted file mode 100644 index 524fdb8534b3..000000000000 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment syscalls with the contents of the pointer arguments. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them, that will - * check if perf_sample->raw_data is more than what is expected for each - * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the - * contents of pointer arguments. - */ - -#include -#include - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct syscall_exit_args { - unsigned long long common_tp_fields; - long syscall_nr; - long ret; -}; - -struct augmented_filename { - unsigned int size; - int reserved; - char value[256]; -}; - -#define augmented_filename_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - unsigned int len = sizeof(augmented_args); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ - len &= sizeof(augmented_args.filename.value) - 1; \ - } \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(open); - -struct syscall_enter_inotify_add_watch_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - char *filename_ptr; - long mask; -}; - -augmented_filename_syscall(inotify_add_watch); - -struct statbuf; - -struct syscall_enter_newstat_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - struct stat *statbuf; -}; - -augmented_filename_syscall(newstat); - -#ifndef _K_SS_MAXSIZE -#define _K_SS_MAXSIZE 128 -#endif - -#define augmented_sockaddr_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct sockaddr_storage addr; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args; \ - unsigned long addrlen = sizeof(augmented_args.addr); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ -/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ -/* if (addrlen > augmented_args.args.addrlen) */ \ -/* addrlen = augmented_args.args.addrlen; */ \ -/* */ \ - probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct sockaddr; - -struct syscall_enter_bind_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(bind); - -struct syscall_enter_connect_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(connect); - -struct syscall_enter_sendto_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - void *buff; - long len; - unsigned long flags; - struct sockaddr *addr_ptr; - long addr_len; -}; - -augmented_sockaddr_syscall(sendto); - -license(GPL); diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c deleted file mode 100644 index e81b535346c0..000000000000 --- a/tools/perf/examples/bpf/etcsnoop.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment the filename syscalls with the contents of the filename pointer argument - * filtering only those that do not start with /etc/. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. - */ - -#include - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct augmented_filename { - int size; - int reserved; - char value[64]; -}; - -#define augmented_filename_syscall_enter(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - char etc[6] = "/etc/"; \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ - return 0; \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(open); - -license(GPL); From 3e98b9bd8469d0b78975be9b36e423b30b0badbe Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 28 Oct 2022 13:53:36 +0200 Subject: [PATCH 0922/4122] dmaengine: sh: Remove unused shdma-arm.h shdma-arm.h was introduced with commit 1e69653d40f1 ("DMA: shdma: add r8a73a4 DMAC data to the device ID table"), and its sole user was removed with commit a19788612f51 ("dmaengine: sh: Remove R-Mobile APE6 support"). The latter mentions r8a73a4.dtsi but shdma support was removed from that with commit cfda82037780 ("ARM: dts: r8a73a4: Remove non-functional DMA support"), so it seems this is safe to remove. Signed-off-by: Stephen Kitt Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20221028115336.1052782-1-steve@sk2.org Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-arm.h | 48 -------------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 drivers/dma/sh/shdma-arm.h diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h deleted file mode 100644 index 7459f9a13b5b..000000000000 --- a/drivers/dma/sh/shdma-arm.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Renesas SuperH DMA Engine support - * - * Copyright (C) 2013 Renesas Electronics, Inc. - */ - -#ifndef SHDMA_ARM_H -#define SHDMA_ARM_H - -#include "shdma.h" - -/* Transmit sizes and respective CHCR register values */ -enum { - XMIT_SZ_8BIT = 0, - XMIT_SZ_16BIT = 1, - XMIT_SZ_32BIT = 2, - XMIT_SZ_64BIT = 7, - XMIT_SZ_128BIT = 3, - XMIT_SZ_256BIT = 4, - XMIT_SZ_512BIT = 5, -}; - -/* log2(size / 8) - used to calculate number of transfers */ -#define SH_DMAE_TS_SHIFT { \ - [XMIT_SZ_8BIT] = 0, \ - [XMIT_SZ_16BIT] = 1, \ - [XMIT_SZ_32BIT] = 2, \ - [XMIT_SZ_64BIT] = 3, \ - [XMIT_SZ_128BIT] = 4, \ - [XMIT_SZ_256BIT] = 5, \ - [XMIT_SZ_512BIT] = 6, \ -} - -#define TS_LOW_BIT 0x3 /* --xx */ -#define TS_HI_BIT 0xc /* xx-- */ - -#define TS_LOW_SHIFT (3) -#define TS_HI_SHIFT (20 - 2) /* 2 bits for shifted low TS */ - -#define TS_INDEX2VAL(i) \ - ((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\ - (((i) & TS_HI_BIT) << TS_HI_SHIFT)) - -#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz))) -#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz))) - -#endif From 9a8ddb35a9d5d3ad76784a012459b256a9d7de7e Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 22 Oct 2022 15:49:49 +0800 Subject: [PATCH 0923/4122] dmaengine: idxd: Make read buffer sysfs attributes invisible for Intel IAA In current code, the following sysfs attributes are exposed to user to show or update the values: max_read_buffers (max_tokens) read_buffer_limit (token_limit) group/read_buffers_allowed (group/tokens_allowed) group/read_buffers_reserved (group/tokens_reserved) group/use_read_buffer_limit (group/use_token_limit) >From Intel IAA spec [1], Intel IAA does not support Read Buffer allocation control. So these sysfs attributes should not be supported on IAA device. Fix this issue by making these sysfs attributes invisible through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that these attributes are not visible when the device does not support Read Buffer allocation control. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: fde212e44f45 ("dmaengine: idxd: deprecate token sysfs attributes for read buffers") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221022074949.11719-1-xiaochen.shen@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 10 ++++++ drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 69e2d9155e0d..3becc9a82bdf 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -50,6 +50,8 @@ Description: The total number of read buffers supported by this device. The read buffers represent resources within the DSA implementation, and these resources are allocated by engines to support operations. See DSA spec v1.2 9.2.4 Total Read Buffers. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/max_transfer_size Date: Oct 25, 2019 @@ -123,6 +125,8 @@ Contact: dmaengine@vger.kernel.org Description: The maximum number of read buffers that may be in use at one time by operations that access low bandwidth memory in the device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/cmd_status Date: Aug 28, 2020 @@ -252,6 +256,8 @@ KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org Description: Enable the use of global read buffer limit for the group. See DSA spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_allowed Date: Dec 10, 2021 @@ -260,6 +266,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates max number of read buffers that may be in use at one time by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Allowed. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_reserved Date: Dec 10, 2021 @@ -268,6 +276,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./desc_progress_limit Date: Sept 14, 2022 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index f30aad90537b..7c7ec8323cb7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -528,6 +528,22 @@ static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, !idxd->hw.group_cap.progress_limit; } +static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_group_use_token_limit.attr || + attr == &dev_attr_group_use_read_buffer_limit.attr || + attr == &dev_attr_group_tokens_allowed.attr || + attr == &dev_attr_group_read_buffers_allowed.attr || + attr == &dev_attr_group_tokens_reserved.attr || + attr == &dev_attr_group_read_buffers_reserved.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_group_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -538,6 +554,9 @@ static umode_t idxd_group_attr_visible(struct kobject *kobj, if (idxd_group_attr_progress_limit_invisible(attr, idxd)) return 0; + if (idxd_group_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1552,6 +1571,20 @@ static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_max_tokens.attr || + attr == &dev_attr_max_read_buffers.attr || + attr == &dev_attr_token_limit.attr || + attr == &dev_attr_read_buffer_limit.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1561,6 +1594,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_device_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } From b018899e620b8ee4529f43bd02e9e8e43043e33e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 11:45:36 -0300 Subject: [PATCH 0924/4122] perf bpf: Rename perf_include_dir to libbpf_include_dir As this is where we expect to find bpf/bpf_helpers.h, etc. This needs more work to make it follow LIBBPF_DYNAMIC=1 usage, i.e. when not using the system libbpf it should use the headers in the in-kernel sources libbpf in tools/lib/bpf. We need to do that anyway to avoid this mixup system libbpf and in-kernel files, so we'll get this sorted out that way. And this also may become moot as we move to using BPF skels for this feature. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 12 ++++++------ tools/perf/util/Build | 2 +- tools/perf/util/llvm-utils.c | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 59420676dee8..a7f6c0669fae 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1216,7 +1216,7 @@ endif # Among the variables below, these: # perfexecdir -# perf_include_dir +# libbpf_include_dir # perf_examples_dir # template_dir # mandir @@ -1240,7 +1240,7 @@ mandir = share/man infodir = share/info perfexecdir = libexec/perf-core # FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance -perf_include_dir = /usr/include +libbpf_include_dir = /usr/include perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates @@ -1273,7 +1273,7 @@ includedir_SQ = $(subst ','\'',$(includedir)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) -perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir)) +libbpf_include_dir_SQ = $(subst ','\'',$(libbpf_include_dir)) perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) @@ -1285,13 +1285,13 @@ srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) -perf_include_instdir = $(perf_include_dir) +perf_include_instdir = $(libbpf_include_dir) perf_examples_instdir = $(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) -perf_include_instdir = $(prefix)/$(perf_include_dir) +perf_include_instdir = $(prefix)/$(libbpf_include_dir) perf_examples_instdir = $(prefix)/$(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) tip_instdir = $(prefix)/$(tipdir) @@ -1353,7 +1353,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) -$(call detected_var,perf_include_dir_SQ) +$(call detected_var,libbpf_include_dir_SQ) $(call detected_var,perf_examples_dir_SQ) $(call detected_var,tipdir_SQ) $(call detected_var,srcdir_SQ) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e315ecaec323..47a7db3ad0a1 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -220,7 +220,7 @@ perf-$(CONFIG_CXX) += c++/ perf-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" +CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index a5cac85783d8..650ffe336f3a 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -463,7 +463,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char *pipe_template = NULL; const char *opts = llvm_param.opts; char *command_echo = NULL, *command_out; - char *perf_include_dir = system_path(PERF_INCLUDE_DIR); + char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR); if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0) + if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0) goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); @@ -556,7 +556,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(kbuild_dir); free(kbuild_include_opts); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); if (!p_obj_buf) free(obj_buf); @@ -572,7 +572,7 @@ errout: free(kbuild_include_opts); free(obj_buf); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); free(pipe_template); if (p_obj_buf) *p_obj_buf = NULL; From 17a36713babe882928b869b427729c85deeb1267 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:53 +0200 Subject: [PATCH 0925/4122] dt-bindings: dmaengine: qcom: gpi: add compatible for SM6115 Document the compatible for GPI DMA controller on SM6115 SoC. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221030094258.486428-4-iskren.chernev@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 0c2894498845..232895fa1d8d 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -25,6 +25,7 @@ properties: - items: - enum: - qcom,sc7280-gpi-dma + - qcom,sm6115-gpi-dma - qcom,sm8350-gpi-dma - qcom,sm8450-gpi-dma - const: qcom,sm6350-gpi-dma From 739153a6ae6891ff42ed9dbbd8e72dd99e6c8ba5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 2 Nov 2022 10:27:01 +0800 Subject: [PATCH 0926/4122] Documentation: devres: add missing devm_acpi_dma_controller_free() helper Add missing devm_acpi_dma_controller_free() to devres.rst, it's introduced by commit 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers"). Fixes: 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers") Cc: Andy Shevchenko Cc: Mika Westerberg Cc: Rafael J. Wysocki Cc: Vinod Koul Cc: Jonathan Corbet Signed-off-by: Yang Yingliang Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221102022701.1407289-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- Documentation/driver-api/driver-model/devres.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048e..99a989ebee87 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -437,6 +437,7 @@ SERDEV SLAVE DMA ENGINE devm_acpi_dma_controller_register() + devm_acpi_dma_controller_free() SPI devm_spi_alloc_master() From d59fdbc7164a6b2a0ed45c13387deac8efeed5a2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Nov 2022 22:30:05 +0200 Subject: [PATCH 0927/4122] gpiolib: of: Make use of device_match_of_node() Make use of device_match_of_node() instead of open coding its functionality. Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 52616848a37c..4b91e19366a8 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -85,7 +85,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) { struct of_phandle_args *gpiospec = data; - return chip->gpiodev->dev.of_node == gpiospec->np && + return device_match_of_node(&chip->gpiodev->dev, gpiospec->np) && chip->of_xlate && chip->of_xlate(chip, gpiospec, NULL) >= 0; } From 34cb9352b62366038fd2d5b9d9f393f35d0be1e0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 29 Oct 2022 21:40:46 -0700 Subject: [PATCH 0928/4122] gpiolib: of: factor out quirk setting polarity via separate property Several legacy bindings use a separate property to specify polarity of GPIOs instead of specifying it directly in the GPIO property. Factor out this code to make it easier to add more such cases. Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 98 +++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 4b91e19366a8..e72b56a87b86 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -186,33 +186,68 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, } } -static void of_gpio_flags_quirks(const struct device_node *np, - const char *propname, - enum of_gpio_flags *flags, - int index) +static void of_gpio_set_polarity_by_property(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags) { - of_gpio_try_fixup_polarity(np, propname, flags); + static const struct { + const char *compatible; + const char *gpio_propname; + const char *polarity_propname; + } gpios[] = { +#if IS_ENABLED(CONFIG_FEC) + /* Freescale Fast Ethernet Controller */ + { "fsl,imx25-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx27-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx28-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6q-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,mvf600-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6sx-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6ul-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx8mq-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, +#endif - /* - * Some GPIO fixed regulator quirks. - * Note that active low is the default. - */ - if (IS_ENABLED(CONFIG_REGULATOR) && - (of_device_is_compatible(np, "regulator-fixed") || - of_device_is_compatible(np, "reg-fixed-voltage") || - (!(strcmp(propname, "enable-gpio") && - strcmp(propname, "enable-gpios")) && - of_device_is_compatible(np, "regulator-gpio")))) { - bool active_high = of_property_read_bool(np, - "enable-active-high"); /* * The regulator GPIO handles are specified such that the * presence or absence of "enable-active-high" solely controls * the polarity of the GPIO line. Any phandle flags must * be actively ignored. */ - of_gpio_quirk_polarity(np, active_high, flags); +#if IS_ENABLED(CONFIG_REGULATOR_FIXED_VOLTAGE) + { "regulator-fixed", "gpios", "enable-active-high" }, + { "regulator-fixed", "gpio", "enable-active-high" }, + { "reg-fixed-voltage", "gpios", "enable-active-high" }, + { "reg-fixed-voltage", "gpio", "enable-active-high" }, +#endif +#if IS_ENABLED(CONFIG_REGULATOR_GPIO) + { "regulator-gpio", "enable-gpio", "enable-active-high" }, + { "regulator-gpio", "enable-gpios", "enable-active-high" }, +#endif + }; + unsigned int i; + bool active_high; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (of_device_is_compatible(np, gpios[i].compatible) && + !strcmp(propname, gpios[i].gpio_propname)) { + active_high = of_property_read_bool(np, + gpios[i].polarity_propname); + of_gpio_quirk_polarity(np, active_high, flags); + break; + } } +} + +static void of_gpio_flags_quirks(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags, + int index) +{ + of_gpio_try_fixup_polarity(np, propname, flags); + of_gpio_set_polarity_by_property(np, propname, flags); + /* * Legacy open drain handling for fixed voltage regulators. */ @@ -267,33 +302,6 @@ static void of_gpio_flags_quirks(const struct device_node *np, !strcmp(propname, "snps,reset-gpio") && of_property_read_bool(np, "snps,reset-active-low")) *flags |= OF_GPIO_ACTIVE_LOW; - - /* - * Freescale Fast Ethernet Controller uses a separate property to - * describe polarity of the phy reset line. - */ - if (IS_ENABLED(CONFIG_FEC)) { - static const char * const fec_devices[] = { - "fsl,imx25-fec", - "fsl,imx27-fec", - "fsl,imx28-fec", - "fsl,imx6q-fec", - "fsl,mvf600-fec", - "fsl,imx6sx-fec", - "fsl,imx6ul-fec", - "fsl,imx8mq-fec", - "fsl,imx8qm-fec", - "fsl,s32v234-fec", - NULL - }; - - if (!strcmp(propname, "phy-reset-gpios") && - of_device_compatible_match(np, fec_devices)) { - bool active_high = of_property_read_bool(np, - "phy-reset-active-high"); - of_gpio_quirk_polarity(np, active_high, flags); - } - } } /** From b8b80348c57b360019071e17380298619c5d8066 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 29 Oct 2022 21:40:47 -0700 Subject: [PATCH 0929/4122] gpiolib: of: add polarity quirk for Freescale PCIe controller Bindings for Freescale PCIe controller use a separate property called "reset-gpio-active-high" to control polarity of its reset line, add it to the list of quirks in gpiolib so that gpiod API can be used in the driver. Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e72b56a87b86..be9c34cca322 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -208,6 +208,15 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np, { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, #endif +#if IS_ENABLED(CONFIG_PCI_IMX6) + { "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx6qp-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx7d-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mq-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mm-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mp-pcie", "reset-gpio", "reset-gpio-active-high" }, +#endif /* * The regulator GPIO handles are specified such that the From 503fa23614dc95f96af883a8e2e873d5c6cd53d8 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 17 Sep 2022 13:03:09 +0100 Subject: [PATCH 0930/4122] PCI: Access Link 2 registers only for devices with Links PCIe r2.0, sec 7.8 added Link Capabilities/Status/Control 2 registers to the PCIe Capability with Capability Version 2. Previously we assumed these registers were implemented for all PCIe Capabilities of version 2 or greater, but in fact they are only implemented for devices with Links. Update pcie_capability_reg_implemented() to check whether the device has a Link. [bhelgaas: commit log, squash export] Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057070.2275@angie.orcam.me.uk Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057300.2275@angie.orcam.me.uk Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas --- drivers/pci/access.c | 8 +++++++- drivers/pci/pci.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 708c7529647f..3c230ca3de58 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -350,6 +350,11 @@ bool pcie_cap_has_lnkctl(const struct pci_dev *dev) type == PCI_EXP_TYPE_PCIE_BRIDGE; } +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev) +{ + return pcie_cap_has_lnkctl(dev) && pcie_cap_version(dev) > 1; +} + static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev) { return pcie_downstream_port(dev) && @@ -390,10 +395,11 @@ static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) return pcie_cap_has_rtctl(dev); case PCI_EXP_DEVCAP2: case PCI_EXP_DEVCTL2: + return pcie_cap_version(dev) > 1; case PCI_EXP_LNKCAP2: case PCI_EXP_LNKCTL2: case PCI_EXP_LNKSTA2: - return pcie_cap_version(dev) > 1; + return pcie_cap_has_lnkctl2(dev); default: return false; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b1ebb7ab8805..9ed3b5550043 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -15,6 +15,7 @@ extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ From 4c6b3af3906d0c59497d3bfb07760f3a082b4150 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:23 +0200 Subject: [PATCH 0931/4122] phy: qcom-qmp-usb: fix sc8280xp PCS_USB offset The PCS_USB register block lives at an offset of 0x1000 from the PCS region on SC8280XP so add the missing offset to avoid corrupting unrelated registers on runtime suspend. Note that the current binding is broken as it does not describe the PCS_USB region and the PCS register size does not cover PCS_USB and the regions in between. As Linux currently maps full pages, simply adding the offset to driver works until the binding has been fixed. Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index cd167508f528..2e1f5dd7e76d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1682,6 +1682,7 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x1000, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { From 9fe6b4e87c225cfd8aad1be0e73be86611c35b01 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:24 +0200 Subject: [PATCH 0932/4122] phy: qcom-qmp-usb: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2e1f5dd7e76d..52b4bee867db 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2503,14 +2503,20 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, static const struct of_device_id qmp_usb_of_match_table[] = { { + .compatible = "qcom,ipq6018-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { .compatible = "qcom,ipq8074-qmp-usb3-phy", .data = &ipq8074_usb3phy_cfg, }, { .compatible = "qcom,msm8996-qmp-usb3-phy", .data = &msm8996_usb3phy_cfg, }, { - .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, + .compatible = "qcom,msm8998-qmp-usb3-phy", + .data = &msm8998_usb3phy_cfg, + }, { + .compatible = "qcom,qcm2290-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, }, { .compatible = "qcom,sc7180-qmp-usb3-phy", .data = &sc7180_usb3phy_cfg, @@ -2527,8 +2533,11 @@ static const struct of_device_id qmp_usb_of_match_table[] = { .compatible = "qcom,sdm845-qmp-usb3-uni-phy", .data = &qmp_v3_usb3_uniphy_cfg, }, { - .compatible = "qcom,msm8998-qmp-usb3-phy", - .data = &msm8998_usb3phy_cfg, + .compatible = "qcom,sdx55-qmp-usb3-uni-phy", + .data = &sdx55_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx65-qmp-usb3-uni-phy", + .data = &sdx65_usb3_uniphy_cfg, }, { .compatible = "qcom,sm8150-qmp-usb3-phy", .data = &sm8150_usb3phy_cfg, @@ -2541,12 +2550,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sm8250-qmp-usb3-uni-phy", .data = &sm8250_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-usb3-uni-phy", - .data = &sdx55_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx65-qmp-usb3-uni-phy", - .data = &sdx65_usb3_uniphy_cfg, }, { .compatible = "qcom,sm8350-qmp-usb3-phy", .data = &sm8350_usb3phy_cfg, @@ -2556,9 +2559,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sm8450-qmp-usb3-phy", .data = &sm8350_usb3phy_cfg, - }, { - .compatible = "qcom,qcm2290-qmp-usb3-phy", - .data = &qcm2290_usb3phy_cfg, }, { }, }; From 95dd63b8988cd914c8b5e805e5599cda2cad161e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:25 +0200 Subject: [PATCH 0933/4122] phy: qcom-qmp-usb: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 126 ++++++++++++------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 52b4bee867db..ead136c7bd01 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2501,69 +2501,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_usb_of_match_table[] = { - { - .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, - }, { - .compatible = "qcom,msm8996-qmp-usb3-phy", - .data = &msm8996_usb3phy_cfg, - }, { - .compatible = "qcom,msm8998-qmp-usb3-phy", - .data = &msm8998_usb3phy_cfg, - }, { - .compatible = "qcom,qcm2290-qmp-usb3-phy", - .data = &qcm2290_usb3phy_cfg, - }, { - .compatible = "qcom,sc7180-qmp-usb3-phy", - .data = &sc7180_usb3phy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-usb3-phy", - .data = &sm8150_usb3phy_cfg, - }, { - .compatible = "qcom,sc8280xp-qmp-usb3-uni-phy", - .data = &sc8280xp_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-usb3-phy", - .data = &qmp_v3_usb3phy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-usb3-uni-phy", - .data = &qmp_v3_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-usb3-uni-phy", - .data = &sdx55_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx65-qmp-usb3-uni-phy", - .data = &sdx65_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-usb3-phy", - .data = &sm8150_usb3phy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-usb3-uni-phy", - .data = &sm8150_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-usb3-phy", - .data = &sm8250_usb3phy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-usb3-uni-phy", - .data = &sm8250_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-usb3-phy", - .data = &sm8350_usb3phy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-usb3-uni-phy", - .data = &sm8350_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-usb3-phy", - .data = &sm8350_usb3phy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table); - static const struct dev_pm_ops qmp_usb_pm_ops = { SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, qmp_usb_runtime_resume, NULL) @@ -2665,6 +2602,69 @@ err_node_put: return ret; } +static const struct of_device_id qmp_usb_of_match_table[] = { + { + .compatible = "qcom,ipq6018-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { + .compatible = "qcom,msm8996-qmp-usb3-phy", + .data = &msm8996_usb3phy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-usb3-phy", + .data = &msm8998_usb3phy_cfg, + }, { + .compatible = "qcom,qcm2290-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, + }, { + .compatible = "qcom,sc7180-qmp-usb3-phy", + .data = &sc7180_usb3phy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-usb3-phy", + .data = &sm8150_usb3phy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-usb3-uni-phy", + .data = &sc8280xp_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-usb3-phy", + .data = &qmp_v3_usb3phy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-usb3-uni-phy", + .data = &qmp_v3_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-usb3-uni-phy", + .data = &sdx55_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx65-qmp-usb3-uni-phy", + .data = &sdx65_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-usb3-phy", + .data = &sm8150_usb3phy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-usb3-uni-phy", + .data = &sm8150_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-usb3-phy", + .data = &sm8250_usb3phy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-usb3-uni-phy", + .data = &sm8250_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-usb3-phy", + .data = &sm8350_usb3phy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-usb3-uni-phy", + .data = &sm8350_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-usb3-phy", + .data = &sm8350_usb3phy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table); + static struct platform_driver qmp_usb_driver = { .probe = qmp_usb_probe, .driver = { From 9c9beef111a763513545a0ac0c60220fea64b063 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:26 +0200 Subject: [PATCH 0934/4122] phy: qcom-qmp-usb: move pm ops Move the PM ops structure next to the implementation to keep the driver callbacks grouped. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index ead136c7bd01..2a4535494d38 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2280,6 +2280,11 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) return 0; } +static const struct dev_pm_ops qmp_usb_pm_ops = { + SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, + qmp_usb_runtime_resume, NULL) +}; + static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2501,11 +2506,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct dev_pm_ops qmp_usb_pm_ops = { - SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, - qmp_usb_runtime_resume, NULL) -}; - static int qmp_usb_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; From 2a55ec4f0a048e0aa12022b52009d8d8667ee3d3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:27 +0200 Subject: [PATCH 0935/4122] phy: qcom-qmp-usb: merge driver data The USB QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 241 ++++++++++-------------- 1 file changed, 97 insertions(+), 144 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2a4535494d38..4a1c7ac3f784 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1453,60 +1453,30 @@ struct qmp_phy_cfg { unsigned int pcs_usb_offset; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pcs_usb: iomapped memory space for lane's pcs_usb - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: current PHY mode - */ -struct qmp_phy { - struct phy *phy; +struct qmp_usb { + struct device *dev; + const struct qmp_phy_cfg *cfg; + void __iomem *serdes; - void __iomem *tx; - void __iomem *rx; void __iomem *pcs; - void __iomem *tx2; - void __iomem *rx2; void __iomem *pcs_misc; void __iomem *pcs_usb; - struct clk *pipe_clk; - struct qcom_qmp *qmp; - enum phy_mode mode; -}; + void __iomem *tx; + void __iomem *rx; + void __iomem *tx2; + void __iomem *rx2; -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * @dp_com: iomapped memory space for phy's dp_com control block - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - */ -struct qcom_qmp { - struct device *dev; void __iomem *dp_com; + struct clk *pipe_clk; struct clk_bulk_data *clks; struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; - struct qmp_phy **phys; + enum phy_mode mode; + + struct phy *phy; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -1976,10 +1946,10 @@ static void qmp_usb_configure(void __iomem *base, qmp_usb_configure_lane(base, tbl, num, 0xff); } -static int qmp_usb_serdes_init(struct qmp_phy *qphy) +static int qmp_usb_serdes_init(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -1990,10 +1960,9 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy) static int qmp_usb_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs = qmp->pcs; void __iomem *dp_com = qmp->dp_com; int ret; @@ -2056,9 +2025,8 @@ err_disable_regulators: static int qmp_usb_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_bulk_assert(cfg->num_resets, qmp->resets); @@ -2071,19 +2039,18 @@ static int qmp_usb_exit(struct phy *phy) static int qmp_usb_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *pcs = qphy->pcs; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; - qmp_usb_serdes_init(qphy); + qmp_usb_serdes_init(qmp); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; @@ -2093,12 +2060,12 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -2122,27 +2089,27 @@ static int qmp_usb_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_usb_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2175,22 +2142,22 @@ static int qmp_usb_disable(struct phy *phy) static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_usb *qmp = phy_get_drvdata(phy); - qphy->mode = mode; + qmp->mode = mode; return 0; } -static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; u32 intr_mask; - if (qphy->mode == PHY_MODE_USB_HOST_SS || - qphy->mode == PHY_MODE_USB_DEVICE_SS) + if (qmp->mode == PHY_MODE_USB_HOST_SS || + qmp->mode == PHY_MODE_USB_DEVICE_SS) intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN; else intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL; @@ -2211,11 +2178,11 @@ static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy) qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN); } -static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_usb_disable_autonomous_mode(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; /* Disable i/o clamp_n on resume for normal mode */ if (pcs_misc) @@ -2231,20 +2198,19 @@ static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; - dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode); - if (!qphy->phy->init_count) { + if (!qmp->phy->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } - qmp_usb_enable_autonomous_mode(qphy); + qmp_usb_enable_autonomous_mode(qmp); - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return 0; @@ -2252,14 +2218,13 @@ static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev) static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret = 0; - dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode); - if (!qphy->phy->init_count) { + if (!qmp->phy->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } @@ -2268,14 +2233,14 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(dev, "pipe_clk enable failed, err=%d\n", ret); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return ret; } - qmp_usb_disable_autonomous_mode(qphy); + qmp_usb_disable_autonomous_mode(qmp); return 0; } @@ -2287,7 +2252,7 @@ static const struct dev_pm_ops qmp_usb_pm_ops = { static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -2303,7 +2268,7 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2324,7 +2289,7 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2361,7 +2326,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2420,13 +2385,11 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static -int qmp_usb_create(struct device *dev, struct device_node *np, int id, +static int qmp_usb_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; bool exclusive = true; int ret; @@ -2439,81 +2402,75 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy")) exclusive = false; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = qmp_usb_iomap(dev, np, 2, exclusive); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = qmp_usb_iomap(dev, np, 2, exclusive); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->pcs_usb_offset) - qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; + qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset; if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc)) { + if (IS_ERR(qmp->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - qphy->pcs_misc = NULL; + qmp->pcs_misc = NULL; } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe clock\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_usb_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_usb_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_usb *qmp; int num, id; int ret; @@ -2555,10 +2512,6 @@ static int qmp_usb_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2572,7 +2525,7 @@ static int qmp_usb_probe(struct platform_device *pdev) id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_usb_create(dev, child, id, serdes, cfg); + ret = qmp_usb_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); From 8ec02ba8493639c721fc63ed05c06891061ef9b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:28 +0200 Subject: [PATCH 0936/4122] phy: qcom-qmp-usb: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 36 +++++++------------------ 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 4a1c7ac3f784..05ceab23258a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2471,7 +2471,6 @@ static int qmp_usb_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_usb *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2507,44 +2506,29 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) - return ret; + goto err_node_put; /* * Prevent runtime pm from being ON by default. Users can enable * it using power/control in sysfs. */ pm_runtime_forbid(dev); - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_usb_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = qmp_usb_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } + ret = phy_pipe_clk_register(qmp, child); + if (ret) + goto err_node_put; - id++; - } + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From 413db06c05e729639e9b64cf7ab5d918b8182006 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:29 +0200 Subject: [PATCH 0937/4122] phy: qcom-qmp-usb: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 05ceab23258a..d3c0b994b939 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2250,9 +2250,10 @@ static const struct dev_pm_ops qmp_usb_pm_ops = { qmp_usb_runtime_resume, NULL) }; -static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_vreg_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -2266,9 +2267,10 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_reset_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2287,9 +2289,10 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) return 0; } -static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_clk_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2385,10 +2388,10 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static int qmp_usb_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; bool exclusive = true; int ret; @@ -2402,8 +2405,6 @@ static int qmp_usb_create(struct device *dev, struct device_node *np, if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy")) exclusive = false; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2468,8 +2469,6 @@ static int qmp_usb_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_usb *qmp; int ret; @@ -2478,31 +2477,30 @@ static int qmp_usb_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - if (cfg->has_phy_dp_com_ctrl) { + if (qmp->cfg->has_phy_dp_com_ctrl) { qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); } - ret = qmp_usb_clk_init(dev, cfg); + ret = qmp_usb_clk_init(qmp); if (ret) return ret; - ret = qmp_usb_reset_init(dev, cfg); + ret = qmp_usb_reset_init(qmp); if (ret) return ret; - ret = qmp_usb_vreg_init(dev, cfg); + ret = qmp_usb_vreg_init(qmp); if (ret) return ret; @@ -2520,7 +2518,7 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_create(dev, child, serdes, cfg); + ret = qmp_usb_create(qmp, child); if (ret) goto err_node_put; From 8fe2b2b745a123640ead94743cf28c380fec9b58 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:30 +0200 Subject: [PATCH 0938/4122] phy: qcom-qmp-usb: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index d3c0b994b939..0158399920b8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2149,6 +2149,13 @@ static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_usb_phy_ops = { + .init = qmp_usb_enable, + .exit = qmp_usb_disable, + .set_mode = qmp_usb_set_mode, + .owner = THIS_MODULE, +}; + static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2366,13 +2373,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_usb_ops = { - .init = qmp_usb_enable, - .exit = qmp_usb_disable, - .set_mode = qmp_usb_set_mode, - .owner = THIS_MODULE, -}; - static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, int index, bool exclusive) { @@ -2451,7 +2451,7 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_usb_ops); + generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); From 876420fb7b98934a4f78d8976c7ff095a13c90b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:31 +0200 Subject: [PATCH 0939/4122] phy: qcom-qmp-usb: clean up PHY init Clean up the PHY initialisation somewhat by programming both tx and rx for the second lane after the first lane. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0158399920b8..cf1e04e9daf7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2058,14 +2058,12 @@ static int qmp_usb_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) + if (cfg->lanes >= 2) { + qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + } qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); From 500e9d37fb9ead52498af1b035933c62a487efe8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:32 +0200 Subject: [PATCH 0940/4122] dt-bindings: phy: qcom,qmp-usb: rename current bindings The current QMP USB PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. Later QMP USB PHY blocks only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current bindings after MSM8996 and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...com,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} (97%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml similarity index 97% rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml index 7acb4b7de7f9..58ac84de8eee 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-usb-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-usb3-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (USB) +title: Qualcomm QMP PHY controller (USB, MSM8996) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-usb3-uni-phy.yaml. + properties: compatible: enum: From e8e58e29a0c9310a917448d0c4a1857f0dbfd917 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:33 +0200 Subject: [PATCH 0941/4122] dt-bindings: phy: qcom,qmp-usb: fix sc8280xp binding The current QMP USB PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). Note that PCS_USB region is also not described by the current bindings despite being used by the driver and this has led to people increasing the size of the PCS region in the devicetree so that it includes PCS_USB registers even though other regions like TX and RX may lie in between. Add a new binding for the QMP USB PHYs found on SC8280XP which further bindings can be based on. Note that this also fixes the SC8280XP "phy_phy" reset name. Also note that the current binding is simply removed instead of being deprecated as it was only recently merged and support for SC8280XP is still under development. And, specifically, there is no support in mainline for the multiport controller that uses these PHYs. Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221028160435.26948-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,msm8996-qmp-usb3-phy.yaml | 13 --- .../phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml | 105 ++++++++++++++++++ 2 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml index 58ac84de8eee..0c6b3ba7346b 100644 --- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml @@ -26,7 +26,6 @@ properties: - qcom,qcm2290-qmp-usb3-phy - qcom,sc7180-qmp-usb3-phy - qcom,sc8180x-qmp-usb3-phy - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sdm845-qmp-usb3-phy - qcom,sdm845-qmp-usb3-uni-phy - qcom,sdx55-qmp-usb3-uni-phy @@ -204,7 +203,6 @@ allOf: compatible: contains: enum: - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sm8150-qmp-usb3-phy - qcom,sm8150-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy @@ -271,16 +269,6 @@ allOf: - const: phy_phy - const: phy - - if: - properties: - compatible: - contains: - enum: - - qcom,sc8280xp-qmp-usb3-uni-phy - then: - required: - - power-domains - - if: properties: compatible: @@ -352,7 +340,6 @@ allOf: contains: enum: - qcom,msm8996-qmp-usb3-phy - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy - qcom,sm8350-qmp-usb3-uni-phy then: diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml new file mode 100644 index 000000000000..ef080509747a --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (USB, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-usb3-uni-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 5 + + clock-names: + items: + - const: aux + - const: ref_clk_src + - const: ref + - const: com_aux + - const: pipe + + power-domains: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: phy + - const: phy_phy + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#clock-cells": + const: 0 + + clock-output-names: + maxItems: 1 + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - clock-output-names + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + #include + + phy@88ef000 { + compatible = "qcom,sc8280xp-qmp-usb3-uni-phy"; + reg = <0x088ef000 0x2000>; + + clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_USB3_MP0_CLKREF_CLK>, + <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>; + clock-names = "aux", "ref_clk_src", "ref", "com_aux", + "pipe"; + + power-domains = <&gcc USB30_MP_GDSC>; + + resets = <&gcc GCC_USB3_UNIPHY_MP0_BCR>, + <&gcc GCC_USB3UNIPHY_PHY_MP0_BCR>; + reset-names = "phy", "phy_phy"; + + vdda-phy-supply = <&vreg_l3a>; + vdda-pll-supply = <&vreg_l5a>; + + #clock-cells = <0>; + clock-output-names = "usb2_phy0_pipe_clk"; + + #phy-cells = <0>; + }; From 183462e8c92cce5cbfabecc1719fb61c61b70833 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:34 +0200 Subject: [PATCH 0942/4122] phy: qcom-qmp-usb: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes and dp_com resources in what is now the legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index cf1e04e9daf7..fa87e90a821e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2386,13 +2386,22 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) +static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; bool exclusive = true; - int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); + + if (cfg->has_phy_dp_com_ctrl) { + qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->dp_com)) + return PTR_ERR(qmp->dp_com); + } /* * FIXME: These bindings should be fixed to not rely on overlapping @@ -2449,16 +2458,6 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -2480,16 +2479,6 @@ static int qmp_usb_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - - if (qmp->cfg->has_phy_dp_com_ctrl) { - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); - } - ret = qmp_usb_clk_init(qmp); if (ret) return ret; @@ -2516,7 +2505,7 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_create(qmp, child); + ret = qmp_usb_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; @@ -2524,6 +2513,15 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) goto err_node_put; + qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From c0a6c25283672facaa57cb3daad71c6586736312 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:35 +0200 Subject: [PATCH 0943/4122] phy: qcom-qmp-usb: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. This includes the PCS_USB region which was initially overlooked. Note that the driver will no longer accept the old binding due to the fixed "phy_phy" reset name. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 82 ++++++++++++++++++++----- 1 file changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index fa87e90a821e..55029ea63f73 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1414,10 +1414,20 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +struct qmp_usb_offsets { + u16 serdes; + u16 pcs; + u16 pcs_usb; + u16 tx; + u16 rx; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; + const struct qmp_usb_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -1548,6 +1558,14 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_usb_offsets qmp_usb_offsets_v5 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_usb = 0x1200, + .tx = 0x0e00, + .rx = 0x1000, +}; + static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, @@ -1637,6 +1655,8 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .lanes = 1, + .offsets = &qmp_usb_offsets_v5, + .serdes_tbl = sc8280xp_usb3_uniphy_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_serdes_tbl), .tx_tbl = sc8280xp_usb3_uniphy_tx_tbl, @@ -1647,12 +1667,11 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .reset_list = qcm2290_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(qcm2290_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x1000, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -2461,11 +2480,41 @@ static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np) return 0; } +static int qmp_usb_parse_dt(struct qmp_usb *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_usb_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->pcs_usb = base + offs->pcs_usb; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + qmp->pipe_clk = devm_clk_get(dev, "pipe"); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); + } + + return 0; +} + static int qmp_usb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_usb *qmp; int ret; @@ -2491,9 +2540,16 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_usb_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_usb_parse_dt(qmp); + } + if (ret) + goto err_node_put; pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); @@ -2505,15 +2561,11 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_parse_dt_legacy(qmp, child); + ret = phy_pipe_clk_register(qmp, np); if (ret) goto err_node_put; - ret = phy_pipe_clk_register(qmp, child); - if (ret) - goto err_node_put; - - qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -2522,14 +2574,14 @@ static int qmp_usb_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } From 32fb07f35675c4c3311ae370471ee1ae6cc3e694 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:13 -0700 Subject: [PATCH 0944/4122] phy: usb: Improve port mode selection Split port modes into two different variables. Supported port modes is what the hardware supports. While port mode is how the hardware is currently configured and can be dynamically changed through the sysfs. We initialize all supported port modes on init even though the port mode may not be selected because we cannot guarantee the downstream interface from the phy will be active or not. This also fixes an issue where port modes selected via sysfs were not being saved through suspend/resume. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-2-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 12 ++++++------ drivers/phy/broadcom/phy-brcm-usb-init.c | 10 +++++----- drivers/phy/broadcom/phy-brcm-usb-init.h | 10 +++++----- drivers/phy/broadcom/phy-brcm-usb.c | 14 +++++++++----- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index d2524b70ea16..430a8ae0cd24 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -195,10 +195,10 @@ static void usb_init_common(struct brcm_usb_init_params *params) if (USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE)) { reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE); - reg |= params->mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } - switch (params->mode) { + switch (params->supported_port_modes) { case USB_CTLR_MODE_HOST: USB_CTRL_UNSET(ctrl, USB_PM, BDC_SOFT_RESETB); break; @@ -276,7 +276,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) /* Set the PHY_MODE */ reg = brcm_usb_readl(usb_phy + USB_PHY_UTMI_CTL_1); reg &= ~USB_PHY_UTMI_CTL_1_PHY_MODE_MASK; - reg |= params->mode << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT; + reg |= params->supported_port_modes << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT; brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1); usb_init_common(params); @@ -286,7 +286,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) * the default "Read Transaction Size" of 6 (1024 bytes). * Set it to 4 (256 bytes). */ - if ((params->mode != USB_CTLR_MODE_HOST) && bdc_ec) { + if ((params->supported_port_modes != USB_CTLR_MODE_HOST) && bdc_ec) { reg = brcm_usb_readl(bdc_ec + BDC_EC_AXIRDA); reg &= ~BDC_EC_AXIRDA_RTS_MASK; reg |= (0x4 << BDC_EC_AXIRDA_RTS_SHIFT); @@ -385,7 +385,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params) return reg; } -static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) +static void usb_set_dual_select(struct brcm_usb_init_params *params) { void __iomem *ctrl = params->regs[BRCM_REGS_CTRL]; u32 reg; @@ -394,7 +394,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE); - reg |= mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index dddcbd3cd5f3..a7f8b3d3264d 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -876,11 +876,11 @@ static void usb_init_common(struct brcm_usb_init_params *params) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1, PORT_MODE); - reg |= params->mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } if (USB_CTRL_MASK_FAMILY(params, USB_PM, BDC_SOFT_RESETB)) { - switch (params->mode) { + switch (params->supported_port_modes) { case USB_CTLR_MODE_HOST: USB_CTRL_UNSET_FAMILY(params, USB_PM, BDC_SOFT_RESETB); break; @@ -891,7 +891,7 @@ static void usb_init_common(struct brcm_usb_init_params *params) } } if (USB_CTRL_MASK_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE)) { - if (params->mode == USB_CTLR_MODE_TYPEC_PD) + if (params->supported_port_modes == USB_CTLR_MODE_TYPEC_PD) USB_CTRL_SET_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE); else USB_CTRL_UNSET_FAMILY(params, SETUP, @@ -1000,7 +1000,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params) return reg; } -static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) +static void usb_set_dual_select(struct brcm_usb_init_params *params) { void __iomem *ctrl = params->regs[BRCM_REGS_CTRL]; u32 reg; @@ -1011,7 +1011,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1, PORT_MODE); - reg |= mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index 1ccb5ddab865..bedf2b8e2f19 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -45,14 +45,15 @@ struct brcm_usb_init_ops { void (*uninit_eohci)(struct brcm_usb_init_params *params); void (*uninit_xhci)(struct brcm_usb_init_params *params); int (*get_dual_select)(struct brcm_usb_init_params *params); - void (*set_dual_select)(struct brcm_usb_init_params *params, int mode); + void (*set_dual_select)(struct brcm_usb_init_params *params); }; struct brcm_usb_init_params { void __iomem *regs[BRCM_REGS_MAX]; int ioc; int ipp; - int mode; + int supported_port_modes; + int port_mode; u32 family_id; u32 product_id; int selected_family; @@ -153,11 +154,10 @@ static inline int brcm_usb_get_dual_select(struct brcm_usb_init_params *ini) return 0; } -static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini, - int mode) +static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini) { if (ini->ops->set_dual_select) - ini->ops->set_dual_select(ini, mode); + ini->ops->set_dual_select(ini); } #endif /* _USB_BRCM_COMMON_INIT_H */ diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 2cb3779fcdf8..99d4deabfd97 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -233,7 +233,7 @@ static ssize_t dr_mode_show(struct device *dev, return sprintf(buf, "%s\n", value_to_name(&brcm_dr_mode_to_name[0], ARRAY_SIZE(brcm_dr_mode_to_name), - priv->ini.mode)); + priv->ini.supported_port_modes)); } static DEVICE_ATTR_RO(dr_mode); @@ -249,7 +249,8 @@ static ssize_t dual_select_store(struct device *dev, res = name_to_value(&brcm_dual_mode_to_name[0], ARRAY_SIZE(brcm_dual_mode_to_name), buf, &value); if (!res) { - brcm_usb_set_dual_select(&priv->ini, value); + priv->ini.port_mode = value; + brcm_usb_set_dual_select(&priv->ini); res = len; } mutex_unlock(&sysfs_lock); @@ -495,13 +496,16 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) of_property_read_u32(dn, "brcm,ipp", &priv->ini.ipp); of_property_read_u32(dn, "brcm,ioc", &priv->ini.ioc); - priv->ini.mode = USB_CTLR_MODE_HOST; + priv->ini.supported_port_modes = USB_CTLR_MODE_HOST; err = of_property_read_string(dn, "dr_mode", &mode); if (err == 0) { name_to_value(&brcm_dr_mode_to_name[0], ARRAY_SIZE(brcm_dr_mode_to_name), - mode, &priv->ini.mode); + mode, &priv->ini.supported_port_modes); } + /* Default port_mode to supported port_modes */ + priv->ini.port_mode = priv->ini.supported_port_modes; + if (of_property_read_bool(dn, "brcm,has-xhci")) priv->has_xhci = true; if (of_property_read_bool(dn, "brcm,has-eohci")) @@ -539,7 +543,7 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) * Create sysfs entries for mode. * Remove "dual_select" attribute if not in dual mode */ - if (priv->ini.mode != USB_CTLR_MODE_DRD) + if (priv->ini.supported_port_modes != USB_CTLR_MODE_DRD) brcm_usb_phy_attrs[1] = NULL; err = sysfs_create_group(&dev->kobj, &brcm_usb_phy_group); if (err) From 19526717f768bf2f89ca01bd2a595728ebe57540 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Nov 2022 22:31:19 +0100 Subject: [PATCH 0945/4122] objtool: Optimize elf_dirty_reloc_sym() When moving a symbol in the symtab its index changes and any reloc referring that symtol-table-index will need to be rewritten too. In order to facilitate this, objtool simply marks the whole reloc section 'changed' which will cause the whole section to be re-generated. However, finding the relocs that use any given symbol is implemented rather crudely -- a fully iteration of all sections and their relocs. Given that some builds have over 20k sections (kallsyms etc..) iterating all that for *each* symbol moved takes a bit of time. Instead have each symbol keep a list of relocs that reference it. This *vastly* improves build times for certain configs. Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y2LlRA7x+8UsE1xf@hirez.programming.kicks-ass.net --- tools/objtool/elf.c | 27 ++++++++++----------------- tools/objtool/include/objtool/elf.h | 2 ++ 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3d636d12d679..8cd7f018002c 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -356,6 +356,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) struct rb_node *pnode; struct symbol *iter; + INIT_LIST_HEAD(&sym->reloc_list); INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; @@ -557,6 +558,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, reloc->sym = sym; reloc->addend = addend; + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); @@ -573,21 +575,10 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, */ static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) { - struct section *sec; + struct reloc *reloc; - list_for_each_entry(sec, &elf->sections, list) { - struct reloc *reloc; - - if (sec->changed) - continue; - - list_for_each_entry(reloc, &sec->reloc_list, list) { - if (reloc->sym == sym) { - sec->changed = true; - break; - } - } - } + list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry) + reloc->sec->changed = true; } /* @@ -902,11 +893,12 @@ static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsi static int read_relocs(struct elf *elf) { + unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; struct section *sec; struct reloc *reloc; - int i; unsigned int symndx; - unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; + struct symbol *sym; + int i; if (!elf_alloc_hash(reloc, elf->text_size / 16)) return -1; @@ -947,13 +939,14 @@ static int read_relocs(struct elf *elf) reloc->sec = sec; reloc->idx = i; - reloc->sym = find_symbol_by_index(elf, symndx); + reloc->sym = sym = find_symbol_by_index(elf, symndx); if (!reloc->sym) { WARN("can't find reloc entry symbol %d for %s", symndx, sec->name); return -1; } + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); list_add_tail(&reloc->list, &sec->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index b6974e3173aa..bca719b2104b 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -62,6 +62,7 @@ struct symbol { u8 fentry : 1; u8 profiling_func : 1; struct list_head pv_target; + struct list_head reloc_list; }; struct reloc { @@ -73,6 +74,7 @@ struct reloc { }; struct section *sec; struct symbol *sym; + struct list_head sym_reloc_entry; unsigned long offset; unsigned int type; s64 addend; From 023f2340f053537cce170c31c430b0886c6f07ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 20:57:51 +0100 Subject: [PATCH 0946/4122] objtool: Fix weak hole vs prefix symbol Boris (and the robot) reported that objtool grew a new complaint about unreachable instructions. Upon inspection it was immediately clear the __weak zombie instructions struck again. For the unweary, the linker will simply remove the symbol for overriden __weak symbols but leave the instructions in place, creating unreachable instructions -- and objtool likes to report these. Commit 4adb23686795 ("objtool: Ignore extra-symbol code") was supposed to have dealt with that, but the new commit 9f2899fe36a6 ("objtool: Add option to generate prefix symbols") subtly broke that logic by created unvisited symbols. Fixes: 9f2899fe36a6 ("objtool: Add option to generate prefix symbols") Reported-by: Borislav Petkov Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) --- tools/objtool/check.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 55066c493570..4f1a7384426b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4053,8 +4053,28 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func, offset = func->offset - prev->offset; if (offset >= opts.prefix) { - if (offset == opts.prefix) + if (offset == opts.prefix) { + /* + * Since the sec->symbol_list is ordered by + * offset (see elf_add_symbol()) the added + * symbol will not be seen by the iteration in + * validate_section(). + * + * Hence the lack of list_for_each_entry_safe() + * there. + * + * The direct concequence is that prefix symbols + * don't get visited (because pointless), except + * for the logic in ignore_unreachable_insn() + * that needs the terminating insn to be visited + * otherwise it will report the hole. + * + * Hence mark the first instruction of the + * prefix symbol as visisted. + */ + prev->visited |= VISITED_BRANCH; elf_create_prefix_symbol(file->elf, func, opts.prefix); + } break; } insn = prev; From b32fd8a60f5d855758208c2b5b49cba8087f03c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 21:17:03 +0100 Subject: [PATCH 0947/4122] x86,pm: Force out-of-line memcpy() GCC fancies inlining memcpy(), and because it cannot prove the destination is page-aligned (it is) it ends up generating atrocious code like: 19e: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 1a5 1a1: R_X86_64_PC32 core_restore_code-0x4 1a5: 48 8d 78 08 lea 0x8(%rax),%rdi 1a9: 48 89 c1 mov %rax,%rcx 1ac: 48 c7 c6 00 00 00 00 mov $0x0,%rsi 1af: R_X86_64_32S core_restore_code 1b3: 48 83 e7 f8 and $0xfffffffffffffff8,%rdi 1b7: 48 29 f9 sub %rdi,%rcx 1ba: 48 89 10 mov %rdx,(%rax) 1bd: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 1c4 1c0: R_X86_64_PC32 core_restore_code+0xff4 1c4: 48 29 ce sub %rcx,%rsi 1c7: 81 c1 00 10 00 00 add $0x1000,%ecx 1cd: 48 89 90 f8 0f 00 00 mov %rdx,0xff8(%rax) 1d4: c1 e9 03 shr $0x3,%ecx 1d7: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) Notably the alignment code generates a text reference to code_restore_code+0xff8, for which objtool raises the objection: vmlinux.o: warning: objtool: relocate_restore_code+0x3d: relocation to !ENDBR: next_arg+0x18 Applying some __assume_aligned(PAGE_SIZE) improve code-gen to: 19e: 48 89 c7 mov %rax,%rdi 1a1: 48 c7 c6 00 00 00 00 mov $0x0,%rsi 1a4: R_X86_64_32S core_restore_code 1a8: b9 00 02 00 00 mov $0x200,%ecx 1ad: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) And resolve the problem, however, none of this is important code and a much simpler solution still is to force a memcpy() call: 1a1: ba 00 10 00 00 mov $0x1000,%edx 1a6: 48 c7 c6 00 00 00 00 mov $0x0,%rsi 1a9: R_X86_64_32S core_restore_code 1ad: e8 00 00 00 00 call 1b2 1ae: R_X86_64_PLT32 __memcpy-0x4 Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index e94e0050a583..6f955eb1e163 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -159,7 +159,7 @@ int relocate_restore_code(void) if (!relocated_restore_code) return -ENOMEM; - memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); + __memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); /* Make the page containing the relocated code executable */ pgd = (pgd_t *)__va(read_cr3_pa()) + From 4fd5f70ce14da230c6a29648c3d51a48ee0b4bfd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 Nov 2022 10:25:07 -0700 Subject: [PATCH 0948/4122] x86/Kconfig: Enable kernel IBT by default The kernel IBT defense strongly mitigates the common "first step" of ROP attacks, by eliminating arbitrary stack pivots (that appear either at the end of a function or in immediate values), which cannot be reached if indirect calls must be to marked function entry addresses. IBT is also required to be enabled to gain the FineIBT feature when built with Kernel Control Flow Integrity. Additionally, given that this feature is runtime enabled via CPU ID, it clearly should be built in by default; it will only be enabled if the CPU supports it. The build takes 2 seconds longer, which seems a small price to pay for gaining this coverage by default. Suggested-by: Sami Tolvanen Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221101172503.gonna.094-kees@kernel.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 479ee63898f5..aaf1f0f46161 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1856,7 +1856,7 @@ config CC_HAS_IBT config X86_KERNEL_IBT prompt "Indirect Branch Tracking" - bool + def_bool y depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f depends on !LD_IS_LLD || LLD_VERSION >= 140000 From c7d47d51663c2cbb9f07ac827f9dffa98d6cab21 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Nov 2022 14:44:59 -0400 Subject: [PATCH 0949/4122] dt-bindings: power: supply: define monitored-battery in common place Define the type of monitored-battery in power-supply.yaml common schema. Reference the schema where applicable to enforce the above in bindings which have monitored-battery property. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Sebastian Reichel --- Documentation/devicetree/bindings/mfd/ene-kb930.yaml | 6 +++--- Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml | 2 ++ Documentation/devicetree/bindings/power/supply/bq27xxx.yaml | 2 -- .../devicetree/bindings/power/supply/ingenic,battery.yaml | 4 ++-- .../devicetree/bindings/power/supply/power-supply.yaml | 6 ++++++ .../devicetree/bindings/power/supply/rohm,bd99954.yaml | 1 + .../devicetree/bindings/power/supply/sc2731-charger.yaml | 1 - 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml index 06ed9ec8f4bb..7c0a42390f18 100644 --- a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml +++ b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml @@ -13,6 +13,8 @@ description: | maintainers: - Dmitry Osipenko +$ref: /schemas/power/supply/power-supply.yaml + properties: compatible: items: @@ -22,15 +24,13 @@ properties: reg: maxItems: 1 - monitored-battery: true - power-supplies: true system-power-controller: true required: - compatible - reg -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml index 935e17099213..269fb85b2027 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml @@ -124,6 +124,8 @@ properties: The child node for the charger to hold additional properties. If a battery is not in use, this node can be omitted. type: object + $ref: /schemas/power/supply/power-supply.yaml + properties: monitored-battery: description: | diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml index 65fc6049efc1..347d4433adc5 100644 --- a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml @@ -60,13 +60,11 @@ properties: monitored-battery: description: | - phandle of battery characteristics node. The fuel gauge uses the following battery properties: - energy-full-design-microwatt-hours - charge-full-design-microamp-hours - voltage-min-design-microvolt Both or neither of the *-full-design-*-hours properties must be set. - See Documentation/devicetree/bindings/power/supply/battery.yaml power-supplies: true diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml index 46527038bf22..42fcfc026972 100644 --- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml @@ -10,6 +10,8 @@ title: Ingenic JZ47xx battery bindings maintainers: - Artur Rojek +$ref: power-supply.yaml# + properties: compatible: oneOf: @@ -28,8 +30,6 @@ properties: monitored-battery: description: > - phandle to a "simple-battery" compatible node. - This property must be a phandle to a node using the format described in battery.yaml, with the following properties being required: - voltage-min-design-microvolt: drained battery voltage, diff --git a/Documentation/devicetree/bindings/power/supply/power-supply.yaml b/Documentation/devicetree/bindings/power/supply/power-supply.yaml index 2f672e6e8d72..4e54c937973e 100644 --- a/Documentation/devicetree/bindings/power/supply/power-supply.yaml +++ b/Documentation/devicetree/bindings/power/supply/power-supply.yaml @@ -18,4 +18,10 @@ properties: This property is added to a supply in order to list the devices which supply it power, referenced by their phandles. + monitored-battery: + $ref: /schemas/types.yaml#/definitions/phandle + description: + The battery (with "simple-battery" compatible) being monitored by this + power supply. + additionalProperties: true diff --git a/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml b/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml index 24b06957b4ca..14d9b42eda27 100644 --- a/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml +++ b/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml @@ -18,6 +18,7 @@ description: | provides a Dual-source Battery Charger, two port BC1.2 detection and a Battery Monitor. +$ref: power-supply.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml index eeb043f9bb4f..735f7d372ae1 100644 --- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml @@ -28,7 +28,6 @@ properties: The charger uses the following battery properties - charge-term-current-microamp: current for charge termination phase. - constant-charge-voltage-max-microvolt: maximum constant input voltage. - See Documentation/devicetree/bindings/power/supply/battery.yaml additionalProperties: false From 8c9e038a2a36af49526cd509dbd8a1b34284cac6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Nov 2022 14:45:00 -0400 Subject: [PATCH 0950/4122] dt-bindings: power: supply: bq25890: allow power-supply fields The BQ25890 schema references common power-supply.yaml, so allow all its properties to fix warnings like: arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: charger@6a: 'monitored-battery', 'power-supplies', ... do not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Sebastian Reichel --- Documentation/devicetree/bindings/power/supply/bq25890.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml index 204c0147188f..fd01ff10b4e0 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml @@ -93,7 +93,7 @@ required: - ti,boost-voltage - ti,boost-max-current -additionalProperties: false +unevaluatedProperties: false examples: - | From 70547f34deedcb039c8f92c603b70bc8d8992432 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Nov 2022 14:45:01 -0400 Subject: [PATCH 0951/4122] dt-bindings: power: supply: maxim,max17042: allow power-supplies MAX17042 Fuel Gauge uses power-supplies supplies: arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: fuel-gauge@36: 'power-supplies' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Sebastian Reichel --- .../devicetree/bindings/power/supply/maxim,max17042.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml index aff5d0792e0f..64a0edb7bc47 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml @@ -59,6 +59,8 @@ properties: Voltage threshold to report battery as over voltage (in mV). Default is not to report over-voltage events. + power-supplies: true + required: - compatible - reg From 09b327c991d21f7b432ba7b88b2368a6970c3916 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Nov 2022 14:20:11 -0400 Subject: [PATCH 0952/4122] dt-bindings: power: supply: bq25890: use one fallback compatible BQ2589[0256] seem compatible between each other in major aspects and remaining features are auto-detectable (by reading device revision ID register). Existing DTS already uses the compatibles with a fallback, so adjust the bindings to reflect this and fix dtbs_check warning: arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: charger@6a: compatible: ['ti,bq25895', 'ti,bq25890'] is too long Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Sebastian Reichel --- .../devicetree/bindings/power/supply/bq25890.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml index fd01ff10b4e0..ee51b6335e72 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml @@ -15,11 +15,15 @@ allOf: properties: compatible: - enum: - - ti,bq25890 - - ti,bq25892 - - ti,bq25895 - - ti,bq25896 + oneOf: + - enum: + - ti,bq25890 + - items: + - enum: + - ti,bq25892 + - ti,bq25895 + - ti,bq25896 + - const: ti,bq25890 reg: maxItems: 1 From b1599915f09157e98f59556e1b2eafe473603347 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 6 Nov 2022 09:55:56 +0100 Subject: [PATCH 0953/4122] x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit Reallocate a soft-cpufeatures bit allocated for call-depth tracking code, which clashes with this recent KVM/SGX patch being worked on: KVM/VMX: Allow exposing EDECCSSA user leaf function to KVM guest Instead of reallocating cpufeatures bits in evil merges, make the allocation explicit. Acked-by: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index aefd0816a333..864c9b0dda68 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,7 +304,8 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_CALL_DEPTH (11*32+18) /* "" Call depth tracking for RSB stuffing */ + /* Hole left for X86_FEATURE_SGX_EDECCSSA */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ From abef378c434e6f5abd46fd536e9972374fb74e98 Mon Sep 17 00:00:00 2001 From: Arumugam Kolappan Date: Tue, 1 Nov 2022 00:27:44 -0700 Subject: [PATCH 0954/4122] RDMA/mlx5: Change debug log level for remote access error syndromes The mlx5 driver dumps the entire CQE buffer by default for few syndromes. Some syndromes are expected due to the application behavior [ex: MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch converts the log level from KERN_WARNING to KERN_DEBUG. This enables the application to get the CQE buffer dump by changing to KERN_DEBUG level as and when needed. Suggested-by: Leon Romanovsky Signed-off-by: Arumugam Kolappan Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index be189e0525de..efc9e4a6df04 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } -static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, + struct ib_wc *wc, const char *level) { - mlx5_ib_warn(dev, "dump error cqe\n"); - mlx5_dump_err_cqe(dev->mdev, cqe); + mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status, + ib_wc_status_msg(wc->status)); + print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), false); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, struct ib_wc *wc) { - int dump = 1; + const char *dump = KERN_WARNING; switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: @@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_LOC_QP_OP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_LOC_PROT_ERR; break; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: - dump = 0; + dump = NULL; wc->status = IB_WC_WR_FLUSH_ERR; break; case MLX5_CQE_SYNDROME_MW_BIND_ERR: @@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_REM_INV_REQ_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_OP_ERR; break; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RNR_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IB_WC_REM_ABORT_ERR; @@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, } wc->vendor_err = cqe->vendor_err_synd; - if (dump) { - mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status, - ib_wc_status_msg(wc->status)); - dump_cqe(dev, cqe); - } + if (dump) + dump_cqe(dev, cqe, wc, dump); } static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a7f7064bd0e..8b91babdd4c0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -38,6 +38,10 @@ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) +#define mlx5_ib_log(lvl, _dev, format, arg...) \ + dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##arg) + #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) From 118e021b4b66f758f8e8f21dc0e5e0a4c721e69e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 7 Nov 2022 10:09:11 +1100 Subject: [PATCH 0955/4122] xfs: write page faults in iomap are not buffered writes When we reserve a delalloc region in xfs_buffered_write_iomap_begin, we mark the iomap as IOMAP_F_NEW so that the the write context understands that it allocated the delalloc region. If we then fail that buffered write, xfs_buffered_write_iomap_end() checks for the IOMAP_F_NEW flag and if it is set, it punches out the unused delalloc region that was allocated for the write. The assumption this code makes is that all buffered write operations that can allocate space are run under an exclusive lock (i_rwsem). This is an invalid assumption: page faults in mmap()d regions call through this same function pair to map the file range being faulted and this runs only holding the inode->i_mapping->invalidate_lock in shared mode. IOWs, we can have races between page faults and write() calls that fail the nested page cache write operation that result in data loss. That is, the failing iomap_end call will punch out the data that the other racing iomap iteration brought into the page cache. This can be reproduced with generic/34[46] if we arbitrarily fail page cache copy-in operations from write() syscalls. Code analysis tells us that the iomap_page_mkwrite() function holds the already instantiated and uptodate folio locked across the iomap mapping iterations. Hence the folio cannot be removed from memory whilst we are mapping the range it covers, and as such we do not care if the mapping changes state underneath the iomap iteration loop: 1. if the folio is not already dirty, there is no writeback races possible. 2. if we allocated the mapping (delalloc or unwritten), the folio cannot already be dirty. See #1. 3. If the folio is already dirty, it must be up to date. As we hold it locked, it cannot be reclaimed from memory. Hence we always have valid data in the page cache while iterating the mapping. 4. Valid data in the page cache can exist when the underlying mapping is DELALLOC, UNWRITTEN or WRITTEN. Having the mapping change from DELALLOC->UNWRITTEN or UNWRITTEN->WRITTEN does not change the data in the page - it only affects actions if we are initialising a new page. Hence #3 applies and we don't care about these extent map transitions racing with iomap_page_mkwrite(). 5. iomap_page_mkwrite() checks for page invalidation races (truncate, hole punch, etc) after it locks the folio. We also hold the mapping->invalidation_lock here, and hence the mapping cannot change due to extent removal operations while we are iterating the folio. As such, filesystems that don't use bufferheads will never fail the iomap_folio_mkwrite_iter() operation on the current mapping, regardless of whether the iomap should be considered stale. Further, the range we are asked to iterate is limited to the range inside EOF that the folio spans. Hence, for XFS, we will only map the exact range we are asked for, and we will only do speculative preallocation with delalloc if we are mapping a hole at the EOF page. The iterator will consume the entire range of the folio that is within EOF, and anything beyond the EOF block cannot be accessed. We never need to truncate this post-EOF speculative prealloc away in the context of the iomap_page_mkwrite() iterator because if it remains unused we'll remove it when the last reference to the inode goes away. Hence we don't actually need an .iomap_end() cleanup/error handling path at all for iomap_page_mkwrite() for XFS. This means we can separate the page fault processing from the complexity of the .iomap_end() processing in the buffered write path. This also means that the buffered write path will also be able to take the mapping->invalidate_lock as necessary. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_iomap.c | 9 +++++++++ fs/xfs/xfs_iomap.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e462d39c840e..595a5bcf46b9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1325,7 +1325,7 @@ __xfs_filemap_fault( if (write_fault) { xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = iomap_page_mkwrite(vmf, - &xfs_buffered_write_iomap_ops); + &xfs_page_mkwrite_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); } else { ret = filemap_fault(vmf); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 07da03976ec1..5cea069a38b4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1187,6 +1187,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = { .iomap_end = xfs_buffered_write_iomap_end, }; +/* + * iomap_page_mkwrite() will never fail in a way that requires delalloc extents + * that it allocated to be revoked. Hence we do not need an .iomap_end method + * for this operation. + */ +const struct iomap_ops xfs_page_mkwrite_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, +}; + static int xfs_read_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index c782e8c0479c..0f62ab633040 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -47,6 +47,7 @@ xfs_aligned_fsb_count( } extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_page_mkwrite_iomap_ops; extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; From f7fc5b7090372fc4dd7798c874635ca41b8ba733 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 5 Oct 2022 14:30:14 -0700 Subject: [PATCH 0956/4122] phy: usb: s2 WoL wakeup_count not incremented for USB->Eth devices The PHY's "wakeup_count" is not incrementing when waking from WoL. The wakeup count can be found in sysfs at: /sys/bus/platform/devices/rdb/*.usb-phy/power/wakeup_count. The problem is that the system wakup event handler was being passed the wrong "device" by the PHY driver. Fixes: f1c0db40a3ad ("phy: usb: Add "wake on" functionality") Signed-off-by: Al Cooper Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-3-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 99d4deabfd97..d97fa58cd63a 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -102,9 +102,9 @@ static int brcm_pm_notifier(struct notifier_block *notifier, static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id) { - struct phy *gphy = dev_id; + struct device *dev = dev_id; - pm_wakeup_event(&gphy->dev, 0); + pm_wakeup_event(dev, 0); return IRQ_HANDLED; } @@ -452,7 +452,7 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev, if (priv->wake_irq >= 0) { err = devm_request_irq(dev, priv->wake_irq, brcm_usb_phy_wake_isr, 0, - dev_name(dev), gphy); + dev_name(dev), dev); if (err < 0) return err; device_set_wakeup_capable(dev, 1); From 7e81153d0f16dd7e6f571bd168bc3d8b46f9f5b7 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:15 -0700 Subject: [PATCH 0957/4122] phy: usb: Migrate to BIT and BITMASK macros Using BIT and BITMASK macros makes it much easier to read and make modifications. Also reordered some constants to be in numerical order. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-4-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- .../phy/broadcom/phy-brcm-usb-init-synopsys.c | 68 ++++++++-------- drivers/phy/broadcom/phy-brcm-usb-init.c | 80 +++++++++---------- 2 files changed, 74 insertions(+), 74 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 430a8ae0cd24..26e9585eca60 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -18,14 +18,14 @@ #define PIARBCTL_CAM 0x00 #define PIARBCTL_SPLITTER 0x04 #define PIARBCTL_MISC 0x08 -#define PIARBCTL_MISC_SECURE_MASK 0x80000000 -#define PIARBCTL_MISC_USB_SELECT_MASK 0x40000000 -#define PIARBCTL_MISC_USB_4G_SDRAM_MASK 0x20000000 -#define PIARBCTL_MISC_USB_PRIORITY_MASK 0x000f0000 -#define PIARBCTL_MISC_USB_MEM_PAGE_MASK 0x0000f000 -#define PIARBCTL_MISC_CAM1_MEM_PAGE_MASK 0x00000f00 -#define PIARBCTL_MISC_CAM0_MEM_PAGE_MASK 0x000000f0 -#define PIARBCTL_MISC_SATA_PRIORITY_MASK 0x0000000f +#define PIARBCTL_MISC_SATA_PRIORITY_MASK GENMASK(3, 0) +#define PIARBCTL_MISC_CAM0_MEM_PAGE_MASK GENMASK(7, 4) +#define PIARBCTL_MISC_CAM1_MEM_PAGE_MASK GENMASK(11, 8) +#define PIARBCTL_MISC_USB_MEM_PAGE_MASK GENMASK(15, 12) +#define PIARBCTL_MISC_USB_PRIORITY_MASK GENMASK(19, 16) +#define PIARBCTL_MISC_USB_4G_SDRAM_MASK BIT(29) +#define PIARBCTL_MISC_USB_SELECT_MASK BIT(30) +#define PIARBCTL_MISC_SECURE_MASK BIT(31) #define PIARBCTL_MISC_USB_ONLY_MASK \ (PIARBCTL_MISC_USB_SELECT_MASK | \ @@ -35,46 +35,46 @@ /* Register definitions for the USB CTRL block */ #define USB_CTRL_SETUP 0x00 -#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK 0x02000000 -#define USB_CTRL_SETUP_SCB2_EN_MASK 0x00008000 -#define USB_CTRL_SETUP_tca_drv_sel_MASK 0x01000000 -#define USB_CTRL_SETUP_SCB1_EN_MASK 0x00004000 -#define USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK 0x00000200 -#define USB_CTRL_SETUP_IPP_MASK 0x00000020 -#define USB_CTRL_SETUP_IOC_MASK 0x00000010 +#define USB_CTRL_SETUP_IOC_MASK BIT(4) +#define USB_CTRL_SETUP_IPP_MASK BIT(5) +#define USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK BIT(9) +#define USB_CTRL_SETUP_SCB1_EN_MASK BIT(14) +#define USB_CTRL_SETUP_SCB2_EN_MASK BIT(15) +#define USB_CTRL_SETUP_tca_drv_sel_MASK BIT(24) +#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK BIT(25) #define USB_CTRL_USB_PM 0x04 -#define USB_CTRL_USB_PM_USB_PWRDN_MASK 0x80000000 -#define USB_CTRL_USB_PM_SOFT_RESET_MASK 0x40000000 -#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK 0x00800000 -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK 0x00400000 -#define USB_CTRL_USB_PM_XHC_PME_EN_MASK 0x00000010 -#define USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK 0x00000008 +#define USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK BIT(3) +#define USB_CTRL_USB_PM_XHC_PME_EN_MASK BIT(4) +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK BIT(22) +#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK BIT(23) +#define USB_CTRL_USB_PM_SOFT_RESET_MASK BIT(30) +#define USB_CTRL_USB_PM_USB_PWRDN_MASK BIT(31) #define USB_CTRL_USB_PM_STATUS 0x08 #define USB_CTRL_USB_DEVICE_CTL1 0x10 -#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK 0x00000003 +#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK GENMASK(1, 0) #define USB_CTRL_TEST_PORT_CTL 0x30 -#define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK 0x000000ff +#define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK GENMASK(7, 0) #define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_PME_GEN_MASK 0x0000002e #define USB_CTRL_TP_DIAG1 0x34 -#define USB_CTLR_TP_DIAG1_wake_MASK 0x00000002 +#define USB_CTLR_TP_DIAG1_wake_MASK BIT(1) #define USB_CTRL_CTLR_CSHCR 0x50 -#define USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK 0x00040000 +#define USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK BIT(18) /* Register definitions for the USB_PHY block in 7211b0 */ #define USB_PHY_PLL_CTL 0x00 -#define USB_PHY_PLL_CTL_PLL_RESETB_MASK 0x40000000 +#define USB_PHY_PLL_CTL_PLL_RESETB_MASK BIT(30) #define USB_PHY_PLL_LDO_CTL 0x08 -#define USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK 0x00000004 -#define USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK 0x00000002 -#define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK 0x00000001 +#define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK BIT(0) +#define USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK BIT(1) +#define USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK BIT(2) #define USB_PHY_UTMI_CTL_1 0x04 -#define USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK 0x00000800 -#define USB_PHY_UTMI_CTL_1_PHY_MODE_MASK 0x0000000c +#define USB_PHY_UTMI_CTL_1_PHY_MODE_MASK GENMASK(3, 2) #define USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT 2 +#define USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK BIT(11) #define USB_PHY_IDDQ 0x1c -#define USB_PHY_IDDQ_phy_iddq_MASK 0x00000001 +#define USB_PHY_IDDQ_phy_iddq_MASK BIT(0) #define USB_PHY_STATUS 0x20 -#define USB_PHY_STATUS_pll_lock_MASK 0x00000001 +#define USB_PHY_STATUS_pll_lock_MASK BIT(0) /* Register definitions for the MDIO registers in the DWC2 block of * the 7211b0. @@ -86,7 +86,7 @@ /* Register definitions for the BDC EC block in 7211b0 */ #define BDC_EC_AXIRDA 0x0c -#define BDC_EC_AXIRDA_RTS_MASK 0xf0000000 +#define BDC_EC_AXIRDA_RTS_MASK GENMASK(31, 28) #define BDC_EC_AXIRDA_RTS_SHIFT 28 diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index a7f8b3d3264d..a1ca83308f98 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -21,57 +21,57 @@ /* Register definitions for the USB CTRL block */ #define USB_CTRL_SETUP 0x00 -#define USB_CTRL_SETUP_IOC_MASK 0x00000010 -#define USB_CTRL_SETUP_IPP_MASK 0x00000020 -#define USB_CTRL_SETUP_BABO_MASK 0x00000001 -#define USB_CTRL_SETUP_FNHW_MASK 0x00000002 -#define USB_CTRL_SETUP_FNBO_MASK 0x00000004 -#define USB_CTRL_SETUP_WABO_MASK 0x00000008 -#define USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK 0x00002000 /* option */ -#define USB_CTRL_SETUP_SCB1_EN_MASK 0x00004000 /* option */ -#define USB_CTRL_SETUP_SCB2_EN_MASK 0x00008000 /* option */ -#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK 0X00020000 /* option */ -#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK 0x00010000 /* option */ -#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK 0x02000000 /* option */ -#define USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK 0x04000000 /* option */ -#define USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK 0x08000000 /* opt */ -#define USB_CTRL_SETUP_OC3_DISABLE_MASK 0xc0000000 /* option */ +#define USB_CTRL_SETUP_BABO_MASK BIT(0) +#define USB_CTRL_SETUP_FNHW_MASK BIT(1) +#define USB_CTRL_SETUP_FNBO_MASK BIT(2) +#define USB_CTRL_SETUP_WABO_MASK BIT(3) +#define USB_CTRL_SETUP_IOC_MASK BIT(4) +#define USB_CTRL_SETUP_IPP_MASK BIT(5) +#define USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK BIT(13) /* option */ +#define USB_CTRL_SETUP_SCB1_EN_MASK BIT(14) /* option */ +#define USB_CTRL_SETUP_SCB2_EN_MASK BIT(15) /* option */ +#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK BIT(17) /* option */ +#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK BIT(16) /* option */ +#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK BIT(25) /* option */ +#define USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK BIT(26) /* option */ +#define USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK BIT(27) /* opt */ +#define USB_CTRL_SETUP_OC3_DISABLE_MASK GENMASK(31, 30) /* option */ #define USB_CTRL_PLL_CTL 0x04 -#define USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK 0x08000000 -#define USB_CTRL_PLL_CTL_PLL_RESETB_MASK 0x40000000 -#define USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK 0x80000000 /* option */ +#define USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK BIT(27) +#define USB_CTRL_PLL_CTL_PLL_RESETB_MASK BIT(30) +#define USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK BIT(31) /* option */ #define USB_CTRL_EBRIDGE 0x0c -#define USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK 0x00020000 /* option */ -#define USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK 0x00000f80 /* option */ +#define USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK GENMASK(11, 7) /* option */ +#define USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK BIT(17) /* option */ #define USB_CTRL_OBRIDGE 0x10 -#define USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK 0x08000000 +#define USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK BIT(27) #define USB_CTRL_MDIO 0x14 #define USB_CTRL_MDIO2 0x18 #define USB_CTRL_UTMI_CTL_1 0x2c -#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK 0x00000800 -#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK 0x08000000 +#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK BIT(11) +#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK BIT(27) #define USB_CTRL_USB_PM 0x34 -#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK 0x00800000 /* option */ -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK 0x00400000 /* option */ -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK 0x40000000 /* option */ -#define USB_CTRL_USB_PM_USB_PWRDN_MASK 0x80000000 /* option */ -#define USB_CTRL_USB_PM_SOFT_RESET_MASK 0x40000000 /* option */ -#define USB_CTRL_USB_PM_USB20_HC_RESETB_MASK 0x30000000 /* option */ -#define USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK 0x00300000 /* option */ -#define USB_CTRL_USB_PM_RMTWKUP_EN_MASK 0x00000001 +#define USB_CTRL_USB_PM_RMTWKUP_EN_MASK BIT(0) +#define USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK GENMASK(21, 20) /* option */ +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK BIT(22) /* option */ +#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK BIT(23) /* option */ +#define USB_CTRL_USB_PM_USB20_HC_RESETB_MASK GENMASK(29, 28) /* option */ +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK BIT(30) /* option */ +#define USB_CTRL_USB_PM_SOFT_RESET_MASK BIT(30) /* option */ +#define USB_CTRL_USB_PM_USB_PWRDN_MASK BIT(31) /* option */ #define USB_CTRL_USB_PM_STATUS 0x38 #define USB_CTRL_USB30_CTL1 0x60 -#define USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK 0x00000010 -#define USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK 0x00010000 -#define USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK 0x00020000 /* option */ -#define USB_CTRL_USB30_CTL1_USB3_IOC_MASK 0x10000000 /* option */ -#define USB_CTRL_USB30_CTL1_USB3_IPP_MASK 0x20000000 /* option */ +#define USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK BIT(4) +#define USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK BIT(16) +#define USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK BIT(17) /* option */ +#define USB_CTRL_USB30_CTL1_USB3_IOC_MASK BIT(28) /* option */ +#define USB_CTRL_USB30_CTL1_USB3_IPP_MASK BIT(29) /* option */ #define USB_CTRL_USB30_PCTL 0x70 -#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK 0x00000002 -#define USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK 0x00008000 -#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK 0x00020000 +#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK BIT(1) +#define USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK BIT(15) +#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK BIT(17) #define USB_CTRL_USB_DEVICE_CTL1 0x90 -#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK 0x00000003 /* option */ +#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK GENMASK(1, 0) /* option */ /* Register definitions for the XHCI EC block */ #define USB_XHCI_EC_IRAADR 0x658 From 833c173ebab420997b98a0d888fc5b55ee4a8a7e Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:16 -0700 Subject: [PATCH 0958/4122] phy: usb: Disable phy auto-suspend The BDC block requires the PLL lock in order to grab the PLL clock. The phy auto-suspend feature turns off the phy when nothing is attached leading to the PLL to not lock. This leads the BDC block to grab the AUX clock instead of the PLL clock. This is not ideal, so lets turn this feature off. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-5-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 26e9585eca60..6a4d47886e0e 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -62,6 +62,7 @@ /* Register definitions for the USB_PHY block in 7211b0 */ #define USB_PHY_PLL_CTL 0x00 +#define USB_PHY_PLL_CTL_PLL_SUSPEND_MASK BIT(27) #define USB_PHY_PLL_CTL_PLL_RESETB_MASK BIT(30) #define USB_PHY_PLL_LDO_CTL 0x08 #define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK BIT(0) @@ -259,6 +260,11 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1); } + /* Disable PLL auto suspend */ + reg = brcm_usb_readl(usb_phy + USB_PHY_PLL_CTL); + reg |= USB_PHY_PLL_CTL_PLL_SUSPEND_MASK; + brcm_usb_writel(reg, usb_phy + USB_PHY_PLL_CTL); + /* Init the PHY */ reg = USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK | USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK | From 700c44b508020a3ea29d297c677f8d4ab14b7e6a Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:17 -0700 Subject: [PATCH 0959/4122] phy: usb: Use slow clock for wake enabled suspend The logic was incorrect when switching to slow clock. We want the slow clock if wake_enabled is set. Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-6-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 6a4d47886e0e..f78cff24eec8 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -337,13 +337,12 @@ static void usb_uninit_common_7216(struct brcm_usb_init_params *params) pr_debug("%s\n", __func__); - if (!params->wake_enabled) { - USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN); - + if (params->wake_enabled) { /* Switch to using slower clock during suspend to save power */ USB_CTRL_SET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN); - } else { usb_wake_enable_7216(params, true); + } else { + USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN); } } From 8484199c09347bdd5d81ee8a2bc530850f900797 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:18 -0700 Subject: [PATCH 0960/4122] phy: usb: Fix clock imbalance for suspend/resume We should be disabling clocks when wake from USB is not needed. Since this wasn't done, we had a clock imbalance since clocks were always being enabled on resume. Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers") Fixes: b0c0b66c0b43 ("phy: usb: Add support for wake and USB low power mode for 7211 S2/S5") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-7-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 2 -- drivers/phy/broadcom/phy-brcm-usb-init.h | 1 - drivers/phy/broadcom/phy-brcm-usb.c | 8 +++++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index f78cff24eec8..76cf4280d7ed 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -430,7 +430,6 @@ void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params) params->family_name = "7216"; params->ops = &bcm7216_ops; - params->suspend_with_clocks = true; } void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params) @@ -440,5 +439,4 @@ void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params) params->family_name = "7211"; params->ops = &bcm7211b0_ops; - params->suspend_with_clocks = true; } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index bedf2b8e2f19..f9fbf8fb80e5 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -62,7 +62,6 @@ struct brcm_usb_init_params { const struct brcm_usb_init_ops *ops; struct regmap *syscon_piarbctl; bool wake_enabled; - bool suspend_with_clocks; }; void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params); diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index d97fa58cd63a..aafba4a04701 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -602,7 +602,7 @@ static int brcm_usb_phy_suspend(struct device *dev) * and newer XHCI->2.0-clks/3.0-clks. */ - if (!priv->ini.suspend_with_clocks) { + if (!priv->ini.wake_enabled) { if (priv->phys[BRCM_USB_PHY_3_0].inited) clk_disable_unprepare(priv->usb_30_clk); if (priv->phys[BRCM_USB_PHY_2_0].inited || @@ -619,8 +619,10 @@ static int brcm_usb_phy_resume(struct device *dev) { struct brcm_usb_phy_data *priv = dev_get_drvdata(dev); - clk_prepare_enable(priv->usb_20_clk); - clk_prepare_enable(priv->usb_30_clk); + if (!priv->ini.wake_enabled) { + clk_prepare_enable(priv->usb_20_clk); + clk_prepare_enable(priv->usb_30_clk); + } brcm_usb_init_ipp(&priv->ini); /* From 6964affe65066651eca21e97247d3b7cac5153dc Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:53 +0000 Subject: [PATCH 0961/4122] dt-bindings: phy: Add special clock for Allwinner H616 PHY The USB PHY IP in the Allwinner H616 SoC requires a quirk that involves some resources from port 2's PHY and HCI IP. In particular the PMU clock for port 2 must be surely ungated before accessing the REG_HCI_PHY_CTL register of port 2. To allow each USB port to be controlled independently of port 2, we need a handle to that particular PMU clock in the *PHY* node, as the HCI and PHY part might be handled by separate drivers. Add that clock to the requirements of the H616 PHY binding, so that a PHY driver can apply the quirk in isolation, without requiring help from port 2's HCI driver. Signed-off-by: Andre Przywara Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221031111358.3387297-3-andre.przywara@arm.com Signed-off-by: Vinod Koul --- .../phy/allwinner,sun8i-h3-usb-phy.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml index 77539b4601c2..2df012d13655 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml @@ -36,18 +36,22 @@ properties: - const: pmu3 clocks: + minItems: 4 items: - description: USB OTG PHY bus clock - description: USB Host 0 PHY bus clock - description: USB Host 1 PHY bus clock - description: USB Host 2 PHY bus clock + - description: PMU clock for host port 2 clock-names: + minItems: 4 items: - const: usb0_phy - const: usb1_phy - const: usb2_phy - const: usb3_phy + - const: pmu2_clk resets: items: @@ -96,6 +100,28 @@ required: - resets - reset-names +allOf: + - if: + properties: + compatible: + contains: + enum: + - allwinner,sun50i-h616-usb-phy + then: + properties: + clocks: + minItems: 5 + + clock-names: + minItems: 5 + else: + properties: + clocks: + maxItems: 4 + + clock-names: + maxItems: 4 + additionalProperties: false examples: From b45c6d80325bec2b78c716629a518b6442d8bdc6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:54 +0000 Subject: [PATCH 0962/4122] phy: sun4i-usb: Introduce port2 SIDDQ quirk At least the Allwinner H616 SoC requires a weird quirk to make most USB PHYs work: Only port2 works out of the box, but all other ports need some help from this port2 to work correctly: The CLK_BUS_PHY2 and RST_USB_PHY2 clock and reset need to be enabled, and the SIDDQ bit in the PMU PHY control register needs to be cleared. For this register to be accessible, CLK_BUS_ECHI2 needs to be ungated. Don't ask .... Instead of disguising this as some generic feature, treat it more like a quirk (what it really is): If the quirk bit is set, and we initialise a PHY other than PHY2, ungate this one special clock, and clear the SIDDQ bit. We also pick the clock and reset from PHY2 and enable them as well. Signed-off-by: Andre Przywara Link: https://lore.kernel.org/r/20221031111358.3387297-4-andre.przywara@arm.com Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun4i-usb.c | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index 3a3831f6059a..e39f5ad62cc1 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -120,6 +120,7 @@ struct sun4i_usb_phy_cfg { u8 phyctl_offset; bool dedicated_clocks; bool phy0_dual_route; + bool needs_phy2_siddq; int missing_phys; }; @@ -289,6 +290,50 @@ static int sun4i_usb_phy_init(struct phy *_phy) return ret; } + /* Some PHYs on some SoCs need the help of PHY2 to work. */ + if (data->cfg->needs_phy2_siddq && phy->index != 2) { + struct sun4i_usb_phy *phy2 = &data->phys[2]; + + ret = clk_prepare_enable(phy2->clk); + if (ret) { + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + ret = reset_control_deassert(phy2->reset); + if (ret) { + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + /* + * This extra clock is just needed to access the + * REG_HCI_PHY_CTL PMU register for PHY2. + */ + ret = clk_prepare_enable(phy2->clk2); + if (ret) { + reset_control_assert(phy2->reset); + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + if (phy2->pmu && data->cfg->hci_phy_ctl_clear) { + val = readl(phy2->pmu + REG_HCI_PHY_CTL); + val &= ~data->cfg->hci_phy_ctl_clear; + writel(val, phy2->pmu + REG_HCI_PHY_CTL); + } + + clk_disable_unprepare(phy->clk2); + } + if (phy->pmu && data->cfg->hci_phy_ctl_clear) { val = readl(phy->pmu + REG_HCI_PHY_CTL); val &= ~data->cfg->hci_phy_ctl_clear; @@ -354,6 +399,13 @@ static int sun4i_usb_phy_exit(struct phy *_phy) data->phy0_init = false; } + if (data->cfg->needs_phy2_siddq && phy->index != 2) { + struct sun4i_usb_phy *phy2 = &data->phys[2]; + + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy2->reset); + } + sun4i_usb_phy_passby(phy, 0); reset_control_assert(phy->reset); clk_disable_unprepare(phy->clk2); @@ -785,6 +837,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) dev_err(dev, "failed to get clock %s\n", name); return PTR_ERR(phy->clk2); } + } else { + snprintf(name, sizeof(name), "pmu%d_clk", i); + phy->clk2 = devm_clk_get_optional(dev, name); + if (IS_ERR(phy->clk2)) { + dev_err(dev, "failed to get clock %s\n", name); + return PTR_ERR(phy->clk2); + } } snprintf(name, sizeof(name), "usb%d_reset", i); From 0f607406525d25019dd9c498bcc0b42734fc59d5 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:55 +0000 Subject: [PATCH 0963/4122] phy: sun4i-usb: Add support for the H616 USB PHY The USB PHY used in the Allwinner H616 SoC inherits some traits from its various predecessors: it has four full PHYs like the H3, needs some extra bits to be set like the H6, and puts SIDDQ on a different bit like the A100. Plus it needs this weird PHY2 quirk. Name all those properties in a new config struct and assign a new compatible name to it. Signed-off-by: Andre Przywara Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20221031111358.3387297-5-andre.przywara@arm.com Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun4i-usb.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index e39f5ad62cc1..5472db9e87ef 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -1032,6 +1032,17 @@ static const struct sun4i_usb_phy_cfg sun50i_h6_cfg = { .missing_phys = BIT(1) | BIT(2), }; +static const struct sun4i_usb_phy_cfg sun50i_h616_cfg = { + .num_phys = 4, + .type = sun50i_h6_phy, + .disc_thresh = 3, + .phyctl_offset = REG_PHYCTL_A33, + .dedicated_clocks = true, + .phy0_dual_route = true, + .hci_phy_ctl_clear = PHY_CTL_SIDDQ, + .needs_phy2_siddq = true, +}; + static const struct of_device_id sun4i_usb_phy_of_match[] = { { .compatible = "allwinner,sun4i-a10-usb-phy", .data = &sun4i_a10_cfg }, { .compatible = "allwinner,sun5i-a13-usb-phy", .data = &sun5i_a13_cfg }, @@ -1047,6 +1058,7 @@ static const struct of_device_id sun4i_usb_phy_of_match[] = { { .compatible = "allwinner,sun50i-a64-usb-phy", .data = &sun50i_a64_cfg}, { .compatible = "allwinner,sun50i-h6-usb-phy", .data = &sun50i_h6_cfg }, + { .compatible = "allwinner,sun50i-h616-usb-phy", .data = &sun50i_h616_cfg }, { }, }; MODULE_DEVICE_TABLE(of, sun4i_usb_phy_of_match); From 8ca2a81bff096b359736bba9b7d06cf5bc04fa88 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:30 +0530 Subject: [PATCH 0964/4122] dt-bindings: phy: ti: phy-gmii-sel: Add bindings for J721e TI's J721e SoC supports additional PHY modes like QSGMII and SGMII that are not supported on earlier SoCs. Add a compatible for it. Extend ti,qsgmii-main-ports property to support selection of upto two main ports at once across the two QSGMII interfaces. Signed-off-by: Siddharth Vadapalli Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221026074532.109220-2-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- .../bindings/phy/ti,phy-gmii-sel.yaml | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml index da7cac537e15..3a6d686383cf 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml @@ -54,6 +54,7 @@ properties: - ti,dm814-phy-gmii-sel - ti,am654-phy-gmii-sel - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel reg: maxItems: 1 @@ -63,14 +64,17 @@ properties: ti,qsgmii-main-ports: $ref: /schemas/types.yaml#/definitions/uint32-array description: | - Required only for QSGMII mode. Array to select the port for - QSGMII main mode. Rest of the ports are selected as QSGMII_SUB - ports automatically. Any one of the 4 CPSW5G ports can act as the - main port with the rest of them being the QSGMII_SUB ports. - maxItems: 1 + Required only for QSGMII mode. Array to select the port/s for QSGMII + main mode. The size of the array corresponds to the number of QSGMII + interfaces and thus, the number of distinct QSGMII main ports, + supported by the device. If the device supports two QSGMII interfaces + but only one QSGMII interface is desired, repeat the QSGMII main port + value corresponding to the QSGMII interface in the array. + minItems: 1 + maxItems: 2 items: minimum: 1 - maximum: 4 + maximum: 8 allOf: - if: @@ -81,12 +85,43 @@ allOf: - ti,dra7xx-phy-gmii-sel - ti,dm814-phy-gmii-sel - ti,am654-phy-gmii-sel + - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel then: properties: '#phy-cells': const: 1 description: CPSW port number (starting from 1) + - if: + properties: + compatible: + contains: + enum: + - ti,j7200-cpsw5g-phy-gmii-sel + then: + properties: + ti,qsgmii-main-ports: + maxItems: 1 + items: + minimum: 1 + maximum: 4 + + - if: + properties: + compatible: + contains: + enum: + - ti,j721e-cpsw9g-phy-gmii-sel + then: + properties: + ti,qsgmii-main-ports: + minItems: 2 + maxItems: 2 + items: + minimum: 1 + maximum: 8 + - if: not: properties: @@ -94,6 +129,7 @@ allOf: contains: enum: - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel then: properties: ti,qsgmii-main-ports: false From 3b66ab69c566e79d58cc38bd7c90a6b2b0b84a7d Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:31 +0530 Subject: [PATCH 0965/4122] phy: ti: gmii-sel: Update methods for fetching and using qsgmii main port The number of QSGMII main ports are specific to the device. TI's J7200 for which the QSGMII main port property is fetched from the device-tree has only one QSGMII main port. However, devices like TI's J721e support up to two QSGMII main ports. Thus, the existing methods for fetching and using the QSGMII main port are not scalable. Update the existing methods for handling the QSGMII main ports and its associated requirements to make it scalable for future devices. Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221026074532.109220-3-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 0bcfd6d96b4d..c8f30d2e1f46 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -50,6 +50,7 @@ struct phy_gmii_sel_soc_data { const struct reg_field (*regfields)[PHY_GMII_SEL_LAST]; bool use_of_data; u64 extra_modes; + u32 num_qsgmii_main_ports; }; struct phy_gmii_sel_priv { @@ -213,6 +214,8 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { .use_of_data = true, .regfields = phy_gmii_sel_fields_am654, .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), + .num_ports = 4, + .num_qsgmii_main_ports = 1, }; static const struct of_device_id phy_gmii_sel_id_table[] = { @@ -378,11 +381,13 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) static int phy_gmii_sel_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + const struct phy_gmii_sel_soc_data *soc_data; struct device_node *node = dev->of_node; const struct of_device_id *of_id; struct phy_gmii_sel_priv *priv; u32 main_ports = 1; int ret; + u32 i; of_id = of_match_node(phy_gmii_sel_id_table, pdev->dev.of_node); if (!of_id) @@ -394,16 +399,26 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) priv->dev = &pdev->dev; priv->soc_data = of_id->data; + soc_data = priv->soc_data; priv->num_ports = priv->soc_data->num_ports; - of_property_read_u32(node, "ti,qsgmii-main-ports", &main_ports); + priv->qsgmii_main_ports = 0; + /* - * Ensure that main_ports is within bounds. If the property - * ti,qsgmii-main-ports is not mentioned, or the value mentioned - * is out of bounds, default to 1. + * Based on the compatible, try to read the appropriate number of + * QSGMII main ports from the "ti,qsgmii-main-ports" property from + * the device-tree node. */ - if (main_ports < 1 || main_ports > 4) - main_ports = 1; - priv->qsgmii_main_ports = PHY_GMII_PORT(main_ports); + for (i = 0; i < soc_data->num_qsgmii_main_ports; i++) { + of_property_read_u32_index(node, "ti,qsgmii-main-ports", i, &main_ports); + /* + * Ensure that main_ports is within bounds. + */ + if (main_ports < 1 || main_ports > soc_data->num_ports) { + dev_err(dev, "Invalid qsgmii main port provided\n"); + return -EINVAL; + } + priv->qsgmii_main_ports |= PHY_GMII_PORT(main_ports); + } priv->regmap = syscon_node_to_regmap(node->parent); if (IS_ERR(priv->regmap)) { From 5bd78c00d753d4e80e151555565334c475a559d3 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:32 +0530 Subject: [PATCH 0966/4122] phy: ti: gmii-sel: Add support for CPSW9G GMII SEL in J721e Each of the CPSW9G ports in J721e support additional modes like QSGMII. Add a new compatible for J721e to support the additional modes. In TI's J721e, each of the CPSW9G ethernet interfaces can act as a QSGMII main or QSGMII-SUB port. The QSGMII main interface is responsible for performing auto-negotiation between the MAC and the PHY while the rest of the interfaces are designated as QSGMII-SUB interfaces, indicating that they will not be taking part in the auto-negotiation process. Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221026074532.109220-4-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index c8f30d2e1f46..8c667819c39a 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -218,6 +218,15 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { .num_qsgmii_main_ports = 1, }; +static const +struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j721e = { + .use_of_data = true, + .regfields = phy_gmii_sel_fields_am654, + .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), + .num_ports = 8, + .num_qsgmii_main_ports = 2, +}; + static const struct of_device_id phy_gmii_sel_id_table[] = { { .compatible = "ti,am3352-phy-gmii-sel", @@ -243,6 +252,10 @@ static const struct of_device_id phy_gmii_sel_id_table[] = { .compatible = "ti,j7200-cpsw5g-phy-gmii-sel", .data = &phy_gmii_sel_cpsw5g_soc_j7200, }, + { + .compatible = "ti,j721e-cpsw9g-phy-gmii-sel", + .data = &phy_gmii_sel_cpsw9g_soc_j721e, + }, {} }; MODULE_DEVICE_TABLE(of, phy_gmii_sel_id_table); From 53bffe0055741440a6c91abb80bad1c62ea443e3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Oct 2022 15:44:49 -0700 Subject: [PATCH 0967/4122] phy: phy-brcm-usb: Utilize platform_get_irq_byname_optional() The wake-up interrupt lines are entirely optional, avoid printing messages that interrupts were not found by switching to the _optional variant. Signed-off-by: Florian Fainelli Acked-by: Justin Chen Link: https://lore.kernel.org/r/20221026224450.2958762-1-f.fainelli@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index aafba4a04701..4de39999f43d 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -446,9 +446,9 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev, priv->suspend_clk = NULL; } - priv->wake_irq = platform_get_irq_byname(pdev, "wake"); + priv->wake_irq = platform_get_irq_byname_optional(pdev, "wake"); if (priv->wake_irq < 0) - priv->wake_irq = platform_get_irq_byname(pdev, "wakeup"); + priv->wake_irq = platform_get_irq_byname_optional(pdev, "wakeup"); if (priv->wake_irq >= 0) { err = devm_request_irq(dev, priv->wake_irq, brcm_usb_phy_wake_isr, 0, From 2428787f16155aa03aa63d5c130e83809a7df5cf Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Wed, 5 Oct 2022 16:40:31 +0800 Subject: [PATCH 0968/4122] phy: tegra: xusb: Remove usb3 supply Remove redundant codes for getting the vbus supply of usb3 ports because we get and control the vbus supply by the companion usb2 ports Signed-off-by: Wayne Chang Signed-off-by: Haotien Hsu Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/20221005084031.2154251-1-haotienh@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra124.c | 1 - drivers/phy/tegra/xusb-tegra186.c | 1 - drivers/phy/tegra/xusb-tegra210.c | 1 - drivers/phy/tegra/xusb.c | 10 +--------- drivers/phy/tegra/xusb.h | 2 -- 5 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/phy/tegra/xusb-tegra124.c b/drivers/phy/tegra/xusb-tegra124.c index db56c7fbe60b..f4f75ea033b8 100644 --- a/drivers/phy/tegra/xusb-tegra124.c +++ b/drivers/phy/tegra/xusb-tegra124.c @@ -1652,7 +1652,6 @@ tegra124_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra124_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra124_usb3_port_enable, .disable = tegra124_usb3_port_disable, .map = tegra124_usb3_port_map, diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 0996ede63387..6a8bd87cfdbd 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -1185,7 +1185,6 @@ tegra186_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra186_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra186_usb3_port_enable, .disable = tegra186_usb3_port_disable, .map = tegra186_usb3_port_map, diff --git a/drivers/phy/tegra/xusb-tegra210.c b/drivers/phy/tegra/xusb-tegra210.c index eedfc7c2cc05..ebc8a7e21a31 100644 --- a/drivers/phy/tegra/xusb-tegra210.c +++ b/drivers/phy/tegra/xusb-tegra210.c @@ -3078,7 +3078,6 @@ tegra210_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra210_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra210_usb3_port_enable, .disable = tegra210_usb3_port_disable, .map = tegra210_usb3_port_map, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index dce45fbbd699..ff4b930879f3 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -954,8 +954,7 @@ static int tegra_xusb_usb3_port_parse_dt(struct tegra_xusb_usb3_port *usb3) return -EINVAL; } - usb3->supply = regulator_get(&port->dev, "vbus"); - return PTR_ERR_OR_ZERO(usb3->supply); + return 0; } static int tegra_xusb_add_usb3_port(struct tegra_xusb_padctl *padctl, @@ -1012,13 +1011,6 @@ void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port) kfree(usb3); } -void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port) -{ - struct tegra_xusb_usb3_port *usb3 = to_usb3_port(port); - - regulator_put(usb3->supply); -} - static void __tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl) { struct tegra_xusb_port *port, *tmp; diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h index 8cfbbdbd6e0c..c384734a61c2 100644 --- a/drivers/phy/tegra/xusb.h +++ b/drivers/phy/tegra/xusb.h @@ -359,7 +359,6 @@ void tegra_xusb_hsic_port_release(struct tegra_xusb_port *port); struct tegra_xusb_usb3_port { struct tegra_xusb_port base; - struct regulator *supply; bool context_saved; unsigned int port; bool internal; @@ -381,7 +380,6 @@ struct tegra_xusb_usb3_port * tegra_xusb_find_usb3_port(struct tegra_xusb_padctl *padctl, unsigned int index); void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port); -void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port); struct tegra_xusb_port_ops { void (*release)(struct tegra_xusb_port *port); From 846d479224537185768276dd4a84c1bda2bbcd4e Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 18 Oct 2022 13:58:41 -0400 Subject: [PATCH 0969/4122] doc: phy: Document typical order of API calls Document the typical order of API calls to used by new drivers and controllers. Many existing controllers follow this order, but some do not. This is especially true for controllers designed to work with one particular PHY driver, which may not need a call to (for example) phy_init. Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20221018175841.1906611-1-sean.anderson@seco.com Signed-off-by: Vinod Koul --- Documentation/driver-api/phy/phy.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst index 8fc1ce0bb905..8e8b3e8f9523 100644 --- a/Documentation/driver-api/phy/phy.rst +++ b/Documentation/driver-api/phy/phy.rst @@ -94,7 +94,8 @@ Inorder to dereference the private data (in phy_ops), the phy provider driver can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in phy_ops to get back the private data. -4. Getting a reference to the PHY +Getting a reference to the PHY +============================== Before the controller can make use of the PHY, it has to get a reference to it. This framework provides the following APIs to get a reference to the PHY. @@ -130,6 +131,28 @@ the phy_init() and phy_exit() calls, and phy_power_on() and phy_power_off() calls are all NOP when applied to a NULL phy. The NULL phy is useful in devices for handling optional phy devices. +Order of API calls +================== + +The general order of calls should be:: + + [devm_][of_]phy_get() + phy_init() + phy_power_on() + [phy_set_mode[_ext]()] + ... + phy_power_off() + phy_exit() + [[of_]phy_put()] + +Some PHY drivers may not implement :c:func:`phy_init` or :c:func:`phy_power_on`, +but controllers should always call these functions to be compatible with other +PHYs. Some PHYs may require :c:func:`phy_set_mode `, while +others may use a default mode (typically configured via devicetree or other +firmware). For compatibility, you should always call this function if you know +what mode you will be using. Generally, this function should be called after +:c:func:`phy_power_on`, although some PHY drivers may allow it at any time. + Releasing a reference to the PHY ================================ From 4eace75e0853273755b878ffa9cce6de84df975a Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 4 Nov 2022 18:49:57 -0500 Subject: [PATCH 0970/4122] RDMA/irdma: Report the correct link speed The active link speed is currently hard-coded in irdma_query_port due to which the port rate in ibstatus does reflect the active link speed. Call ib_get_eth_speed in irdma_query_port to get the active link speed. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Reported-by: Kamal Heib Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20221104234957.1135-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 35 +++-------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a22afbb25bc5..434241789f12 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -63,36 +63,6 @@ static int irdma_query_device(struct ib_device *ibdev, return 0; } -/** - * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed - * @link_speed: netdev phy link speed - * @active_speed: IB port speed - * @active_width: IB port width - */ -static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed, - u8 *active_width) -{ - if (link_speed <= SPEED_1000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_SDR; - } else if (link_speed <= SPEED_10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (link_speed <= SPEED_20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (link_speed <= SPEED_25000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_EDR; - } else if (link_speed <= SPEED_40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - /** * irdma_query_port - get port attributes * @ibdev: device pointer from stack @@ -120,8 +90,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port, props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } - irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed, - &props->active_width); + + ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); if (rdma_protocol_roce(ibdev, 1)) { props->gid_tbl_len = 32; From ece43fad220ba03c529cc0f6f302d796044e8476 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 7 Nov 2022 10:18:43 +0800 Subject: [PATCH 0971/4122] RDMA/erdma: Extend access right field of FRMR and REG MR to support atomic To support atomic operations, IB_ACCESS_REMOTE_ATOMIC right should be passed to hardware for permission check. Since "access mode" field in FRMR SQE and RegMr command is never used by hw, we remove the "access mode" field, so that we can then have enough space to extend access fields. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20221107021845.44598-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 6 ++---- drivers/infiniband/hw/erdma/erdma_qp.c | 3 +-- drivers/infiniband/hw/erdma/erdma_verbs.c | 3 +-- drivers/infiniband/hw/erdma/erdma_verbs.h | 12 +++++++----- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index e788887732e1..2a9a4c73d52c 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -224,8 +224,7 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg1 */ #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) -#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2) -#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0) +#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1) /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) @@ -370,8 +369,7 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ -#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0) -#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2) +#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 5fe1a339a435..c7f343173cb9 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -397,8 +397,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); regmr_sge->length = cpu_to_le32(mr->ibmr.length); regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); - attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | - FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | + attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 62be98e2b941..f3bf87f17527 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -118,8 +118,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) | FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) | - FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) | - FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0); + FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index ab6380635e9e..a5574f0252bb 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -71,16 +71,18 @@ struct erdma_pd { #define ERDMA_MR_INLINE_MTT 0 #define ERDMA_MR_INDIRECT_MTT 1 -#define ERDMA_MR_ACC_LR BIT(0) -#define ERDMA_MR_ACC_LW BIT(1) -#define ERDMA_MR_ACC_RR BIT(2) -#define ERDMA_MR_ACC_RW BIT(3) +#define ERDMA_MR_ACC_RA BIT(0) +#define ERDMA_MR_ACC_LR BIT(1) +#define ERDMA_MR_ACC_LW BIT(2) +#define ERDMA_MR_ACC_RR BIT(3) +#define ERDMA_MR_ACC_RW BIT(4) static inline u8 to_erdma_access_flags(int access) { return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) | (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) | - (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0); + (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) | + (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } struct erdma_mem { From 71c6925f280ae8cb52eafee2404ae75c176c28ba Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 7 Nov 2022 10:18:44 +0800 Subject: [PATCH 0972/4122] RDMA/erdma: Report atomic capacity when hardware supports atomic feature Introduce "capacity flags" field at where hardware put all zeros originally in "query device" response. Using this field, hardware can report atomic feature if supports. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20221107021845.44598-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma.h | 1 + drivers/infiniband/hw/erdma/erdma_hw.h | 5 +++++ drivers/infiniband/hw/erdma/erdma_main.c | 1 + drivers/infiniband/hw/erdma/erdma_verbs.c | 4 ++++ 4 files changed, 11 insertions(+) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 730783fbc894..bb23d897c710 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -124,6 +124,7 @@ struct erdma_devattr { u32 fw_version; unsigned char peer_addr[ETH_ALEN]; + unsigned long cap_flags; int numa_node; enum erdma_cc_alg cc; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 2a9a4c73d52c..808e7ee56d93 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -303,6 +303,7 @@ struct erdma_cmdq_destroy_qp_req { /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) +#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) @@ -314,6 +315,10 @@ struct erdma_cmdq_destroy_qp_req { #define ERDMA_NQP_PER_QBLOCK 1024 +enum { + ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, +}; + #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) /* CQE hdr */ diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 49778bb294ae..e44b06fea595 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -374,6 +374,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; + dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0); dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; dev->attrs.max_ord = ERDMA_MAX_ORD; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index f3bf87f17527..d843ce1f35f3 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -288,6 +288,10 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attrs.max_mw; attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) + attr->atomic_cap = IB_ATOMIC_GLOB; + attr->fw_ver = dev->attrs.fw_version; if (dev->netdev) From 0ca9c2e2844aa285c3656a29d4803839cfa8bca9 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 7 Nov 2022 10:18:45 +0800 Subject: [PATCH 0973/4122] RDMA/erdma: Implement atomic operations support Add atomic operations support in post_send and poll_cq implementation. Also, rename 'laddr' and 'lkey' in struct erdma_sge to 'addr' and 'key', because this structure is used for both local and remote SGEs. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20221107021845.44598-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_cq.c | 2 ++ drivers/infiniband/hw/erdma/erdma_hw.h | 18 ++++++++++-- drivers/infiniband/hw/erdma/erdma_qp.c | 40 ++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 58e0dc5c75d1..cabd8678b355 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -64,6 +64,8 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_REG_MR] = IB_WC_REG_MR, [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, + [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP, + [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD, }; static const struct { diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 808e7ee56d93..1b2e2b70678f 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -344,9 +344,9 @@ struct erdma_cqe { }; struct erdma_sge { - __aligned_le64 laddr; + __aligned_le64 addr; __le32 length; - __le32 lkey; + __le32 key; }; /* Receive Queue Element */ @@ -413,6 +413,16 @@ struct erdma_readreq_sqe { __le32 rsvd; }; +struct erdma_atomic_sqe { + __le64 hdr; + __le64 rsvd; + __le64 fetchadd_swap_data; + __le64 cmp_data; + + struct erdma_sge remote; + struct erdma_sge sgl; +}; + struct erdma_reg_mr_sqe { __le64 hdr; __le64 addr; @@ -472,7 +482,9 @@ enum erdma_opcode { ERDMA_OP_REG_MR = 14, ERDMA_OP_LOCAL_INV = 15, ERDMA_OP_READ_WITH_INV = 16, - ERDMA_NUM_OPCODES = 17, + ERDMA_OP_ATOMIC_CAS = 17, + ERDMA_OP_ATOMIC_FAD = 18, + ERDMA_NUM_OPCODES = 19, ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 }; diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index c7f343173cb9..521e97258de7 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -285,15 +285,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; u32 idx = *pi & (qp->attrs.sq_size - 1); enum ib_wr_opcode op = send_wr->opcode; + struct erdma_atomic_sqe *atomic_sqe; struct erdma_readreq_sqe *read_sqe; struct erdma_reg_mr_sqe *regmr_sge; struct erdma_write_sqe *write_sqe; struct erdma_send_sqe *send_sqe; struct ib_rdma_wr *rdma_wr; - struct erdma_mr *mr; + struct erdma_sge *sge; __le32 *length_field; + struct erdma_mr *mr; u64 wqe_hdr, *entry; - struct ib_sge *sge; u32 attrs; int ret; @@ -360,9 +361,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT); - sge->addr = rdma_wr->remote_addr; - sge->lkey = rdma_wr->rkey; - sge->length = send_wr->sg_list[0].length; + sge->addr = cpu_to_le64(rdma_wr->remote_addr); + sge->key = cpu_to_le32(rdma_wr->rkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); wqe_size = sizeof(struct erdma_readreq_sqe) + send_wr->num_sge * sizeof(struct ib_sge); @@ -423,6 +424,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey); wqe_size = sizeof(struct erdma_reg_mr_sqe); goto out; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + atomic_sqe = (struct erdma_atomic_sqe *)entry; + if (op == IB_WR_ATOMIC_CMP_AND_SWP) { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_CAS); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->swap); + atomic_sqe->cmp_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } else { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_FAD); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } + + sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, + qp->attrs.sq_size, SQEBB_SHIFT); + sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr); + sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey); + sge++; + + sge->addr = cpu_to_le64(send_wr->sg_list[0].addr); + sge->key = cpu_to_le32(send_wr->sg_list[0].lkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); + + wqe_size = sizeof(*atomic_sqe); + goto out; default: return -EOPNOTSUPP; } From c8a51f03503633ec4a3f390aaadc3e8959fa44de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Nov 2022 22:28:17 +0200 Subject: [PATCH 0974/4122] gpio: Add Generic regmap GPIO conversion to the TODO list It's actually preferable to use Generic regmap GPIO over other simple approaches. Add a TODO item for that. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/TODO | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index f87ff3fa8a53..76560744587a 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -124,6 +124,13 @@ Work items: this with dry-coding and sending to maintainers to test +Generic regmap GPIO + +In the very similar way to Generic MMIO GPIO convert the users which can +take advantage of using regmap over direct IO accessors. Note, even in +MMIO case the regmap MMIO with gpio-regmap.c is preferable over gpio-mmio.c. + + GPIOLIB irqchip The GPIOLIB irqchip is a helper irqchip for "simple cases" that should From 46af287cd5d7de316f1afd7966b27fc4369c6c35 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 4 Nov 2022 10:39:13 +0100 Subject: [PATCH 0975/4122] bus: mhi: host: pci_generic: add support for sc8280xp-crd SDX55 variant The SC8280XP Compute Reference Design (CRD) has an on-PCB SDX55 modem which uses MBIM. The exact channel configuration is not known but the Foxconn SDX55 configuration allows the modem to be used so reuse that one for now. Signed-off-by: Johan Hovold Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221104093913.23347-1-johan+linaro@kernel.org [mani: modified the subject to format "bus: mhi: host"] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index c4259cb2d289..fb3b050aed70 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -544,6 +544,8 @@ static const struct mhi_pci_dev_info mhi_telit_fn990_info = { static const struct pci_device_id mhi_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, PCI_VENDOR_ID_QCOM, 0x010c), + .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info }, /* EM919x (sdx55), use the same vid:pid as qcom-sdx55m */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x18d7, 0x0200), .driver_data = (kernel_ulong_t) &mhi_sierra_em919x_info }, From a5cfc9d65879c0d377f732531a2e80ee3a9eebbc Mon Sep 17 00:00:00 2001 From: Rajat Khandelwal Date: Tue, 1 Nov 2022 17:20:42 +0530 Subject: [PATCH 0976/4122] thunderbolt: Add wake on connect/disconnect on USB4 ports Wake on connect/disconnect is only supported while runtime suspend for now, which is obviously necessary. It is also not inherently desired for the system to wakeup on Thunderbolt/USB4 hot plug events. However, we can still make user in control of waking up the system in the events of hot plug/unplug. This patch adds 'wakeup' attribute under 'usb4_portX/power' sysfs attribute and only enables wakes on connect/disconnect to the respective port when 'wakeup' is set to 'enabled'. The attribute is set to 'disabled' by default. Signed-off-by: Rajat Khandelwal Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb_regs.h | 2 ++ drivers/thunderbolt/usb4.c | 33 +++++++++++++++++++++++++-------- drivers/thunderbolt/usb4_port.c | 3 +++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h index 86319dca0f8c..3c38b0cb8f74 100644 --- a/drivers/thunderbolt/tb_regs.h +++ b/drivers/thunderbolt/tb_regs.h @@ -361,6 +361,8 @@ struct tb_regs_port_header { #define PORT_CS_18_BE BIT(8) #define PORT_CS_18_TCM BIT(9) #define PORT_CS_18_CPS BIT(10) +#define PORT_CS_18_WOCS BIT(16) +#define PORT_CS_18_WODS BIT(17) #define PORT_CS_18_WOU4S BIT(18) #define PORT_CS_19 0x13 #define PORT_CS_19_PC BIT(3) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index f986854aa207..2ed50fcbcca7 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -155,6 +155,8 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode, static void usb4_switch_check_wakes(struct tb_switch *sw) { + bool wakeup_usb4 = false; + struct usb4_port *usb4; struct tb_port *port; bool wakeup = false; u32 val; @@ -173,20 +175,31 @@ static void usb4_switch_check_wakes(struct tb_switch *sw) wakeup = val & (ROUTER_CS_6_WOPS | ROUTER_CS_6_WOUS); } - /* Check for any connected downstream ports for USB4 wake */ + /* + * Check for any downstream ports for USB4 wake, + * connection wake and disconnection wake. + */ tb_switch_for_each_port(sw, port) { - if (!tb_port_has_remote(port)) + if (!port->cap_usb4) continue; if (tb_port_read(port, &val, TB_CFG_PORT, port->cap_usb4 + PORT_CS_18, 1)) break; - tb_port_dbg(port, "USB4 wake: %s\n", - (val & PORT_CS_18_WOU4S) ? "yes" : "no"); + tb_port_dbg(port, "USB4 wake: %s, connection wake: %s, disconnection wake: %s\n", + (val & PORT_CS_18_WOU4S) ? "yes" : "no", + (val & PORT_CS_18_WOCS) ? "yes" : "no", + (val & PORT_CS_18_WODS) ? "yes" : "no"); - if (val & PORT_CS_18_WOU4S) - wakeup = true; + wakeup_usb4 = val & (PORT_CS_18_WOU4S | PORT_CS_18_WOCS | + PORT_CS_18_WODS); + + usb4 = port->usb4; + if (device_may_wakeup(&usb4->dev) && wakeup_usb4) + pm_wakeup_event(&usb4->dev, 0); + + wakeup |= wakeup_usb4; } if (wakeup) @@ -366,6 +379,7 @@ bool usb4_switch_lane_bonding_possible(struct tb_switch *sw) */ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) { + struct usb4_port *usb4; struct tb_port *port; u64 route = tb_route(sw); u32 val; @@ -395,10 +409,13 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) val |= PORT_CS_19_WOU4; } else { bool configured = val & PORT_CS_19_PC; + usb4 = port->usb4; - if ((flags & TB_WAKE_ON_CONNECT) && !configured) + if (((flags & TB_WAKE_ON_CONNECT) | + device_may_wakeup(&usb4->dev)) && !configured) val |= PORT_CS_19_WOC; - if ((flags & TB_WAKE_ON_DISCONNECT) && configured) + if (((flags & TB_WAKE_ON_DISCONNECT) | + device_may_wakeup(&usb4->dev)) && configured) val |= PORT_CS_19_WOD; if ((flags & TB_WAKE_ON_USB4) && configured) val |= PORT_CS_19_WOU4; diff --git a/drivers/thunderbolt/usb4_port.c b/drivers/thunderbolt/usb4_port.c index 1a30c0a23286..e355bfd6343f 100644 --- a/drivers/thunderbolt/usb4_port.c +++ b/drivers/thunderbolt/usb4_port.c @@ -284,6 +284,9 @@ struct usb4_port *usb4_port_device_add(struct tb_port *port) } } + if (!tb_is_upstream_port(port)) + device_set_wakeup_capable(&usb4->dev, true); + pm_runtime_no_callbacks(&usb4->dev); pm_runtime_set_active(&usb4->dev); pm_runtime_enable(&usb4->dev); From 58635d6615f1e5a870548ae8999870fdfcdecec0 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 7 Nov 2022 13:12:21 +0100 Subject: [PATCH 0977/4122] s390/mm: fix virtual-physical address confusion for swiotlb swiotlb passes virtual addresses to set_memory_encrypted() and set_memory_decrypted(), but uv_remove_shared() and uv_set_shared() expect physical addresses. This currently works, because virtual and physical addresses are the same. Add virt_to_phys() to resolve the virtual-physical confusion. Reported-by: Marc Hartmayer Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20221107121221.156274-2-nrb@linux.ibm.com Message-Id: <20221107121221.156274-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/mem_encrypt.h | 4 ++-- arch/s390/mm/init.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h index 08a8b96606d7..b85e13505a0f 100644 --- a/arch/s390/include/asm/mem_encrypt.h +++ b/arch/s390/include/asm/mem_encrypt.h @@ -4,8 +4,8 @@ #ifndef __ASSEMBLY__ -int set_memory_encrypted(unsigned long addr, int numpages); -int set_memory_decrypted(unsigned long addr, int numpages); +int set_memory_encrypted(unsigned long vaddr, int numpages); +int set_memory_decrypted(unsigned long vaddr, int numpages); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 97d66a3e60fb..d509656c67d7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -140,25 +140,25 @@ void mark_rodata_ro(void) debug_checkwx(); } -int set_memory_encrypted(unsigned long addr, int numpages) +int set_memory_encrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages unshared, (swiotlb, dma_free) */ for (i = 0; i < numpages; ++i) { - uv_remove_shared(addr); - addr += PAGE_SIZE; + uv_remove_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } -int set_memory_decrypted(unsigned long addr, int numpages) +int set_memory_decrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages shared (swiotlb, dma_alloca) */ for (i = 0; i < numpages; ++i) { - uv_set_shared(addr); - addr += PAGE_SIZE; + uv_set_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } From d8425a8c3a8419dd505016951dd6393f21fb394e Mon Sep 17 00:00:00 2001 From: Song Fuchang Date: Mon, 7 Nov 2022 19:18:35 +0530 Subject: [PATCH 0978/4122] bus: mhi: host: pci_generic: Add HP variant of T99W175 The Foxconn T99W175 modem has an HP variant, which has the following output from lspci: 01:00.0 Wireless controller [0d40]: Device 03f0:0a6c It also has some HP-specific serial numbers on the metal case. It works well with this driver, so add support for this to the pci_generic driver. Signed-off-by: Song Fuchang Reviewed-by: Manivannan Sadhasivam [mani: manually applied the patch] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index fb3b050aed70..b58a30367896 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -598,6 +598,9 @@ static const struct pci_device_id mhi_pci_id_table[] = { /* MV32-WB (Cinterion) */ { PCI_DEVICE(0x1269, 0x00bb), .driver_data = (kernel_ulong_t) &mhi_mv32_info }, + /* T99W175 (sdx55), HP variant */ + { PCI_DEVICE(0x03f0, 0x0a6c), + .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info }, { } }; MODULE_DEVICE_TABLE(pci, mhi_pci_id_table); From a9cd6c6766857212894dd736d9f2bc29f1416f6a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 16:44:52 -0300 Subject: [PATCH 0979/4122] perf trace: Add BPF augmenter to perf_event_open()'s 'struct perf_event_attr' arg Using BPF for that, doing a cleverish reuse of perf_event_attr__fprintf(), that really needs to be turned into __snprintf(), etc. But since the plan is to go the BTF way probably use libbpf's btf_dump__dump_type_data(). Example: [root@quaco ~]# perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,perf_event_open --max-events 10 perf stat --quiet sleep 0.001 fg 0.000 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 0.067 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 0.120 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 0.172 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 7 0.190 perf_event_open(attr_uptr: { size: 128, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 8 0.199 perf_event_open(attr_uptr: { size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 9 0.204 perf_event_open(attr_uptr: { size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 10 0.210 perf_event_open(attr_uptr: { size: 128, config: 0x5, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 11 [root@quaco ~]# Suggested-by: Ian Rogers Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/Y2V2Tpu+2vzJyon2@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 +- .../examples/bpf/augmented_raw_syscalls.c | 44 +++++++++++++++++++ tools/perf/trace/beauty/perf_event_open.c | 44 +++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 72991528687e..5690c33c523b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1053,7 +1053,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "perf_event_open", - .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, + .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ }, + [2] = { .scnprintf = SCA_INT, /* cpu */ }, [3] = { .scnprintf = SCA_FD, /* group_fd */ }, [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 926238efd7d8..0599823e8ae1 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -129,6 +129,7 @@ struct augmented_args_payload { struct augmented_arg arg, arg2; }; struct sockaddr_storage saddr; + char __data[sizeof(struct augmented_arg)]; }; }; @@ -293,6 +294,49 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("!syscalls:sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + static pid_t getpid(void) { return bpf_get_current_pid_tgid(); diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c index 11d47dbe63bd..01ee15fe9d0c 100644 --- a/tools/perf/trace/beauty/perf_event_open.c +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, } #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags + +struct attr_fprintf_args { + size_t size, printed; + char *bf; + bool first; +}; + +static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv) +{ + struct attr_fprintf_args *args = priv; + size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val); + + args->first = false; + args->printed += printed; + return printed; +} + +static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused) +{ + struct attr_fprintf_args args = { + .printed = scnprintf(bf, size, "{ "), + .size = size, + .first = true, + .bf = bf, + }; + + perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args); + return args.printed + scnprintf(bf + args.printed, size - args.printed, " }"); +} + +static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size) +{ + return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros); +} + +static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} + +#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr From 95e7fc84c78adeef654acb919f04d98a87e6372d Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Tue, 1 Nov 2022 16:24:42 +0800 Subject: [PATCH 0980/4122] dt-bindings: gpio: add entry for hisilicon,ascend910-gpio Add the new compatible for HiSilicon gpio controller driver. Signed-off-by: Weilong Chen Reviewed-by: Rob Herring Reviewed-by: Yicong Yang Signed-off-by: Bartosz Golaszewski --- .../gpio/hisilicon,ascend910-gpio.yaml | 56 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 57 insertions(+) create mode 100644 Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml diff --git a/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml new file mode 100644 index 000000000000..735d97d645a0 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/hisilicon,ascend910-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon common GPIO controller + +maintainers: + - Jay Fang + +description: + The HiSilicon common GPIO controller can be used for many different + types of SoC such as Huawei Ascend AI series chips. + +properties: + compatible: + const: hisilicon,ascend910-gpio + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + ngpios: + minimum: 1 + maximum: 32 + +required: + - compatible + - reg + - interrupts + - gpio-controller + - "#gpio-cells" + - ngpios + +additionalProperties: false + +examples: + - | + #include + + gpio@840d0000 { + compatible = "hisilicon,ascend910-gpio"; + reg = <0x840d0000 0x1000>; + ngpios = <32>; + gpio-controller; + #gpio-cells = <2>; + interrupts = ; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 74efa0492c43..02f333c1093e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9198,6 +9198,7 @@ HISILICON GPIO DRIVER M: Jay Fang L: linux-gpio@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml F: drivers/gpio/gpio-hisi.c HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE) From 80280df758c1498485988b30cf6887fde7796056 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Tue, 1 Nov 2022 16:24:41 +0800 Subject: [PATCH 0981/4122] gpio: hisi: Add initial device tree support Add support for HiSilicon GPIO controller in embedded platform, which boot from devicetree. Signed-off-by: Weilong Chen Acked-by: Jay Fang Reviewed-by: Yicong Yang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-hisi.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 8c756cb29214..4bfedb0109a7 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -319,7 +319,7 @@ config GPIO_GRGPIO config GPIO_HISI tristate "HiSilicon GPIO controller driver" - depends on (ARM64 && ACPI) || COMPILE_TEST + depends on ARM64 || COMPILE_TEST select GPIO_GENERIC select GPIOLIB_IRQCHIP help diff --git a/drivers/gpio/gpio-hisi.c b/drivers/gpio/gpio-hisi.c index 3caabef5c7a2..55bd69043bf4 100644 --- a/drivers/gpio/gpio-hisi.c +++ b/drivers/gpio/gpio-hisi.c @@ -221,6 +221,12 @@ static const struct acpi_device_id hisi_gpio_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, hisi_gpio_acpi_match); +static const struct of_device_id hisi_gpio_dts_match[] = { + { .compatible = "hisilicon,ascend910-gpio", }, + { } +}; +MODULE_DEVICE_TABLE(of, hisi_gpio_dts_match); + static void hisi_gpio_get_pdata(struct device *dev, struct hisi_gpio *hisi_gpio) { @@ -311,6 +317,7 @@ static struct platform_driver hisi_gpio_driver = { .driver = { .name = HISI_GPIO_DRIVER_NAME, .acpi_match_table = hisi_gpio_acpi_match, + .of_match_table = hisi_gpio_dts_match, }, .probe = hisi_gpio_probe, }; From 9f0b4cc174c3c5ceb9322d01372369610f327c42 Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Fri, 4 Nov 2022 11:24:30 +0800 Subject: [PATCH 0982/4122] PCI/ACPI: Use METHOD_NAME__UID instead of plain string Replace the string "_UID" with the METHOD_NAME__UID macro so instances are easier to find. Link: https://lore.kernel.org/r/20221104032430.186424-1-zouyipeng@huawei.com Signed-off-by: Yipeng Zou Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a46fec776ad7..068d6745bf98 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -67,7 +67,7 @@ static acpi_status acpi_match_rc(acpi_handle handle, u32 lvl, void *context, unsigned long long uid; acpi_status status; - status = acpi_evaluate_integer(handle, "_UID", NULL, &uid); + status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid); if (ACPI_FAILURE(status) || uid != *segment) return AE_CTRL_DEPTH; From 44e985938e85503d0a69ec538e15fd33c1a4df05 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 7 Nov 2022 15:31:08 -0600 Subject: [PATCH 0983/4122] Revert "PCI: Clear PCI_STATUS when setting up device" This reverts commit 6cd514e58f12b211d638dbf6f791fa18d854f09c. Christophe Fergeau reported that 6cd514e58f12 ("PCI: Clear PCI_STATUS when setting up device") causes boot failures when trying to start linux guests with Apple's virtualization framework (for example using https://developer.apple.com/documentation/virtualization/running_linux_in_a_virtual_machine?language=objc) 6cd514e58f12 only solved a cosmetic problem, so revert it to fix the boot failures. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2137803 Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b66fa42c4b1f..1d6f7b502020 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1891,9 +1891,6 @@ int pci_setup_device(struct pci_dev *dev) dev->broken_intx_masking = pci_intx_mask_broken(dev); - /* Clear errors left from system firmware */ - pci_write_config_word(dev, PCI_STATUS, 0xffff); - switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ if (class == PCI_CLASS_BRIDGE_PCI) From bc77fb9ce40c276cedf889dca2bc6d1b1edc2763 Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Fri, 28 Oct 2022 16:35:53 +0900 Subject: [PATCH 0984/4122] scsi: ufs: core: Refactor ufshcd_hba_enable() Use "if error return" style in ufshcd_hba_enable(). No functional change. Cc: Bart Van Assche Cc: Alim Akhtar Signed-off-by: Keoseong Park Link: https://lore.kernel.org/r/20221028073553epcms2p6dc4f8bdbebdc8f96f43fc4197b3edd0c@epcms2p6 Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ee73d7036133..0591d05c078a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4668,14 +4668,18 @@ int ufshcd_hba_enable(struct ufs_hba *hba) /* enable UIC related interrupts */ ufshcd_enable_intr(hba, UFSHCD_UIC_MASK); ret = ufshcd_dme_reset(hba); - if (!ret) { - ret = ufshcd_dme_enable(hba); - if (!ret) - ufshcd_vops_hce_enable_notify(hba, POST_CHANGE); - if (ret) - dev_err(hba->dev, - "Host controller enable failed with non-hce\n"); + if (ret) { + dev_err(hba->dev, "DME_RESET failed\n"); + return ret; } + + ret = ufshcd_dme_enable(hba); + if (ret) { + dev_err(hba->dev, "Enabling DME failed\n"); + return ret; + } + + ufshcd_vops_hce_enable_notify(hba, POST_CHANGE); } else { ret = ufshcd_hba_execute_hce(hba); } From e47c49219c1e20760cd66cef4411b35a3a86c0a2 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Fri, 28 Oct 2022 21:37:08 +0800 Subject: [PATCH 0985/4122] scsi: NCR5380: Fix repeated words in comment Delete the redundant word 'the'. Signed-off-by: Jilin Yuan Link: https://lore.kernel.org/r/20221028133708.60030-1-yuanjilin@cdjrlc.com Acked-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/NCR5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index dece7d9eb4d3..ca85bddb582b 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -858,7 +858,7 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance) * latency, but a bus reset will reset chip logic. Checking for parity error * is unnecessary because that interrupt is never enabled. A Loss of BSY * condition will clear DMA Mode. We can tell when this occurs because the - * the Busy Monitor interrupt is enabled together with DMA Mode. + * Busy Monitor interrupt is enabled together with DMA Mode. */ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) From c7cbaab2d464484008b4f8ed66b6bb0ee99745c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 31 Oct 2022 15:45:38 +0000 Subject: [PATCH 0986/4122] scsi: message: fusion: Remove variable 'where' Variable 'where' is just being incremented and it's never used anywhere else. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221031154538.870223-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 52c7020c9d19..1decd09a08d8 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -2879,7 +2879,6 @@ static struct mpt_pci_driver mptctl_driver = { static int __init mptctl_init(void) { int err; - int where = 1; show_mptmod_ver(my_NAME, my_VERSION); @@ -2898,7 +2897,6 @@ static int __init mptctl_init(void) /* * Install our handler */ - ++where; mptctl_id = mpt_register(mptctl_reply, MPTCTL_DRIVER, "mptctl_reply"); if (!mptctl_id || mptctl_id >= MPT_MAX_PROTOCOL_DRIVERS) { From 81cb3eb68af5d0bee61ea45a72a0e6e3862b246f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 31 Oct 2022 16:05:12 +0000 Subject: [PATCH 0987/4122] scsi: BusLogic: Remove variable 'adapter_count' Variable 'adapter_count' is just being incremented and it's never used anywhere else. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221031160512.872153-1-colin.i.king@gmail.com Acked-by: Khalid Aziz Signed-off-by: Martin K. Petersen --- drivers/scsi/BusLogic.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index f2abffce2659..f7b7ffda1161 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -2198,7 +2198,7 @@ static int blogic_slaveconfig(struct scsi_device *dev) static int __init blogic_init(void) { - int adapter_count = 0, drvr_optindex = 0, probeindex; + int drvr_optindex = 0, probeindex; struct blogic_adapter *adapter; int ret = 0; @@ -2368,10 +2368,8 @@ static int __init blogic_init(void) list_del(&myadapter->host_list); scsi_host_put(host); ret = -ENODEV; - } else { + } else scsi_scan_host(host); - adapter_count++; - } } } else { /* From b817e6ffbad7a1a0a5ca5bb7d4020823c3f4d9d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 11:34:21 -0700 Subject: [PATCH 0988/4122] scsi: ufs: core: Introduce ufshcd_abort_all() Move the code for aborting all SCSI commands and TMFs into a new function. This patch makes the ufshcd_err_handler() easier to read. Except for adding more logging, this patch does not change any functionality. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031183433.2443554-1-bvanassche@acm.org Reviewed-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 62 +++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0591d05c078a..768cb49d269c 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6161,6 +6161,38 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) return false; } +static bool ufshcd_abort_all(struct ufs_hba *hba) +{ + bool needs_reset = false; + int tag, ret; + + /* Clear pending transfer requests */ + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { + ret = ufshcd_try_to_abort_task(hba, tag); + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, + ret ? "failed" : "succeeded"); + if (ret) { + needs_reset = true; + goto out; + } + } + + /* Clear pending task management requests */ + for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) { + if (ufshcd_clear_tm_cmd(hba, tag)) { + needs_reset = true; + goto out; + } + } + +out: + /* Complete the requests that are cleared by s/w */ + ufshcd_complete_requests(hba); + + return needs_reset; +} + /** * ufshcd_err_handler - handle UFS errors that require s/w attention * @work: pointer to work structure @@ -6172,10 +6204,7 @@ static void ufshcd_err_handler(struct work_struct *work) unsigned long flags; bool needs_restore; bool needs_reset; - bool err_xfer; - bool err_tm; int pmc_err; - int tag; hba = container_of(work, struct ufs_hba, eh_work); @@ -6204,8 +6233,6 @@ static void ufshcd_err_handler(struct work_struct *work) again: needs_restore = false; needs_reset = false; - err_xfer = false; - err_tm = false; if (hba->ufshcd_state != UFSHCD_STATE_ERROR) hba->ufshcd_state = UFSHCD_STATE_RESET; @@ -6274,34 +6301,13 @@ again: hba->silence_err_logs = true; /* release lock as clear command might sleep */ spin_unlock_irqrestore(hba->host->host_lock, flags); - /* Clear pending transfer requests */ - for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { - if (ufshcd_try_to_abort_task(hba, tag)) { - err_xfer = true; - goto lock_skip_pending_xfer_clear; - } - dev_err(hba->dev, "Aborted tag %d / CDB %#02x\n", tag, - hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1); - } - /* Clear pending task management requests */ - for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) { - if (ufshcd_clear_tm_cmd(hba, tag)) { - err_tm = true; - goto lock_skip_pending_xfer_clear; - } - } - -lock_skip_pending_xfer_clear: - /* Complete the requests that are cleared by s/w */ - ufshcd_complete_requests(hba); + needs_reset = ufshcd_abort_all(hba); spin_lock_irqsave(hba->host->host_lock, flags); hba->silence_err_logs = false; - if (err_xfer || err_tm) { - needs_reset = true; + if (needs_reset) goto do_reset; - } /* * After all reqs and tasks are cleared from doorbell, From 3d75e766b58a7410d4e835c534e1b4664a8f62d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 2 Nov 2022 09:19:06 -0700 Subject: [PATCH 0989/4122] scsi: elx: libefc: Fix second parameter type in state callbacks With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. A proposed warning in clang aims to catch these at compile time, which reveals: drivers/scsi/elx/libefc/efc_node.c:811:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict] ctx->current_state = state; ^ ~~~~~ drivers/scsi/elx/libefc/efc_node.c:878:21: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict] node->nodedb_state = state; ^ ~~~~~ drivers/scsi/elx/libefc/efc_node.c:905:6: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' from 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') [-Werror,-Wincompatible-function-pointer-types-strict] pf = node->nodedb_state; ^ ~~~~~~~~~~~~~~~~~~ drivers/scsi/elx/libefc/efc_device.c:455:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict] node->nodedb_state = __efc_d_init; ^ ~~~~~~~~~~~~ drivers/scsi/elx/libefc/efc_sm.c:41:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict] ctx->current_state = state; ^ ~~~~~ The type of the second parameter in the prototypes of ->current_state() and ->nodedb_state() ('u32') does not match the implementations, which have a second parameter type of 'enum efc_sm_event'. Update the prototypes to have the correct second parameter type, clearing up all the warnings and CFI failures. Link: https://github.com/ClangBuiltLinux/linux/issues/1750 Reported-by: Sami Tolvanen Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20221102161906.2781508-1-nathan@kernel.org Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/libefc/efclib.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/elx/libefc/efclib.h b/drivers/scsi/elx/libefc/efclib.h index dde20891c2dd..57e338612812 100644 --- a/drivers/scsi/elx/libefc/efclib.h +++ b/drivers/scsi/elx/libefc/efclib.h @@ -58,10 +58,12 @@ enum efc_node_send_ls_acc { #define EFC_LINK_STATUS_UP 0 #define EFC_LINK_STATUS_DOWN 1 +enum efc_sm_event; + /* State machine context header */ struct efc_sm_ctx { void (*current_state)(struct efc_sm_ctx *ctx, - u32 evt, void *arg); + enum efc_sm_event evt, void *arg); const char *description; void *app; @@ -365,7 +367,7 @@ struct efc_node { int prev_evt; void (*nodedb_state)(struct efc_sm_ctx *ctx, - u32 evt, void *arg); + enum efc_sm_event evt, void *arg); struct timer_list gidpt_delay_timer; u64 time_last_gidpt_msec; From 3d6d7930928ace6b982258ebb81d585fe20e9f44 Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Thu, 3 Nov 2022 14:53:49 +0900 Subject: [PATCH 0990/4122] scsi: ufs: core: Remove check_upiu_size() from ufshcd.h Commit 68078d5cc1a5 ("[SCSI] ufs: Set fDeviceInit flag to initiate device initialization") added check_upiu_size(), but no caller. Cc: Dolev Raviv Link: https://lore.kernel.org/r/20221103055349epcms2p338f2550c2dd78d00231a83b24719a3d4@epcms2p3 Signed-off-by: Keoseong Park Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 96538eb3a6c0..5cf81dff60aa 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1072,12 +1072,6 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); void ufshcd_schedule_eh_work(struct ufs_hba *hba); -static inline void check_upiu_size(void) -{ - BUILD_BUG_ON(ALIGNED_UPIU_SIZE < - GENERAL_UPIU_REQUEST_SIZE + QUERY_DESC_MAX_SIZE); -} - /** * ufshcd_set_variant - set variant specific data to the hba * @hba: per adapter instance From 0b25e17e9018a0ea68a9f0b4787672e8c68fa8d5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 15:47:25 -0700 Subject: [PATCH 0991/4122] scsi: alua: Move a scsi_device_put() call out of alua_check_vpd() Fix the following smatch warning: drivers/scsi/device_handler/scsi_dh_alua.c:1013 alua_rtpg_queue() warn: sleeping in atomic context alua_check_vpd() <- disables preempt -> alua_rtpg_queue() -> scsi_device_put() Cc: Hannes Reinecke Cc: Dan Carpenter Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031224728.2607760-2-bvanassche@acm.org Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 23 ++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 610a51538f03..f7bc81cc59ab 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -324,6 +324,7 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, struct alua_port_group *pg, *old_pg = NULL; bool pg_updated = false; unsigned long flags; + bool put_sdev; group_id = scsi_vpd_tpg_id(sdev, &rel_port); if (group_id < 0) { @@ -373,11 +374,14 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, list_add_rcu(&h->node, &pg->dh_list); spin_unlock_irqrestore(&pg->lock, flags); - alua_rtpg_queue(rcu_dereference_protected(h->pg, + put_sdev = alua_rtpg_queue(rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)), sdev, NULL, true); spin_unlock(&h->pg_lock); + if (put_sdev) + scsi_device_put(sdev); + if (old_pg) kref_put(&old_pg->kref, release_port_group); @@ -968,9 +972,10 @@ static void alua_rtpg_work(struct work_struct *work) * RTPG already has been scheduled. * * Returns true if and only if alua_rtpg_work() will be called asynchronously. - * That function is responsible for calling @qdata->fn(). + * That function is responsible for calling @qdata->fn(). If this function + * returns true, the caller is responsible for invoking scsi_device_put(@sdev). */ -static bool alua_rtpg_queue(struct alua_port_group *pg, +static bool __must_check alua_rtpg_queue(struct alua_port_group *pg, struct scsi_device *sdev, struct alua_queue_data *qdata, bool force) { @@ -1009,8 +1014,6 @@ static bool alua_rtpg_queue(struct alua_port_group *pg, else kref_put(&pg->kref, release_port_group); } - if (sdev) - scsi_device_put(sdev); return true; } @@ -1117,10 +1120,12 @@ static int alua_activate(struct scsi_device *sdev, rcu_read_unlock(); mutex_unlock(&h->init_mutex); - if (alua_rtpg_queue(pg, sdev, qdata, true)) + if (alua_rtpg_queue(pg, sdev, qdata, true)) { + scsi_device_put(sdev); fn = NULL; - else + } else { err = SCSI_DH_DEV_OFFLINED; + } kref_put(&pg->kref, release_port_group); out: if (fn) @@ -1146,7 +1151,9 @@ static void alua_check(struct scsi_device *sdev, bool force) return; } rcu_read_unlock(); - alua_rtpg_queue(pg, sdev, NULL, force); + + if (alua_rtpg_queue(pg, sdev, NULL, force)) + scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); } From 379e2554e3d10e87c0c0a728ef538f3c26d82a98 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 15:47:26 -0700 Subject: [PATCH 0992/4122] scsi: alua: Move a scsi_device_put() call out of alua_rtpg_select_sdev() Move a scsi_device_put() call from alua_rtpg_select_sdev() to its callers. Fixes the following smatch complaint: drivers/scsi/device_handler/scsi_dh_alua.c:853 alua_rtpg_select_sdev() warn: sleeping in atomic context alua_rtpg_work() <- disables preempt -> alua_rtpg_select_sdev() -> scsi_device_put() Cc: Hannes Reinecke Cc: Dan Carpenter Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031224728.2607760-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 38 ++++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index f7bc81cc59ab..693cd827e138 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -815,14 +815,19 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) return SCSI_DH_RETRY; } -static bool alua_rtpg_select_sdev(struct alua_port_group *pg) +/* + * The caller must call scsi_device_put() on the returned pointer if it is not + * NULL. + */ +static struct scsi_device * __must_check +alua_rtpg_select_sdev(struct alua_port_group *pg) { struct alua_dh_data *h; - struct scsi_device *sdev = NULL; + struct scsi_device *sdev = NULL, *prev_sdev; lockdep_assert_held(&pg->lock); if (WARN_ON(!pg->rtpg_sdev)) - return false; + return NULL; /* * RCU protection isn't necessary for dh_list here @@ -849,22 +854,22 @@ static bool alua_rtpg_select_sdev(struct alua_port_group *pg) pr_warn("%s: no device found for rtpg\n", (pg->device_id_len ? (char *)pg->device_id_str : "(nameless PG)")); - return false; + return NULL; } sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n"); - scsi_device_put(pg->rtpg_sdev); + prev_sdev = pg->rtpg_sdev; pg->rtpg_sdev = sdev; - return true; + return prev_sdev; } static void alua_rtpg_work(struct work_struct *work) { struct alua_port_group *pg = container_of(work, struct alua_port_group, rtpg_work.work); - struct scsi_device *sdev; + struct scsi_device *sdev, *prev_sdev = NULL; LIST_HEAD(qdata_list); int err = SCSI_DH_OK; struct alua_queue_data *qdata, *tmp; @@ -905,7 +910,7 @@ static void alua_rtpg_work(struct work_struct *work) /* If RTPG failed on the current device, try using another */ if (err == SCSI_DH_RES_TEMP_UNAVAIL && - alua_rtpg_select_sdev(pg)) + (prev_sdev = alua_rtpg_select_sdev(pg))) err = SCSI_DH_IMM_RETRY; if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY || @@ -917,9 +922,7 @@ static void alua_rtpg_work(struct work_struct *work) pg->interval = ALUA_RTPG_RETRY_DELAY; pg->flags |= ALUA_PG_RUN_RTPG; spin_unlock_irqrestore(&pg->lock, flags); - queue_delayed_work(kaluad_wq, &pg->rtpg_work, - pg->interval * HZ); - return; + goto queue_rtpg; } if (err != SCSI_DH_OK) pg->flags &= ~ALUA_PG_RUN_STPG; @@ -934,9 +937,7 @@ static void alua_rtpg_work(struct work_struct *work) pg->interval = 0; pg->flags &= ~ALUA_PG_RUNNING; spin_unlock_irqrestore(&pg->lock, flags); - queue_delayed_work(kaluad_wq, &pg->rtpg_work, - pg->interval * HZ); - return; + goto queue_rtpg; } } @@ -950,6 +951,9 @@ static void alua_rtpg_work(struct work_struct *work) pg->rtpg_sdev = NULL; spin_unlock_irqrestore(&pg->lock, flags); + if (prev_sdev) + scsi_device_put(prev_sdev); + list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { list_del(&qdata->entry); if (qdata->callback_fn) @@ -961,6 +965,12 @@ static void alua_rtpg_work(struct work_struct *work) spin_unlock_irqrestore(&pg->lock, flags); scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); + return; + +queue_rtpg: + if (prev_sdev) + scsi_device_put(prev_sdev); + queue_delayed_work(kaluad_wq, &pg->rtpg_work, pg->interval * HZ); } /** From 2e5a6c3baccd31476ed00c3fbc413b48ddd87993 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 15:47:27 -0700 Subject: [PATCH 0993/4122] scsi: bfa: Convert bfad_reset_sdev_bflags() from a macro into a function Before modifying bfad_reset_sdev_bflags(), convert it from a macro into a function. Cc: Anil Gurumurthy Cc: Sudarsana Kalluru Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031224728.2607760-4-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_bsg.c | 27 +++++++++++++++++++++++++++ drivers/scsi/bfa/bfad_im.h | 26 -------------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index be8dfbe13e90..73754032e25c 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -2540,6 +2540,33 @@ out: return 0; } +/* + * Set the SCSI device sdev_bflags - sdev_bflags are used by the + * SCSI mid-layer to choose LUN Scanning mode REPORT_LUNS vs. Sequential Scan + * + * Internally iterates over all the ITNIM's part of the im_port & sets the + * sdev_bflags for the scsi_device associated with LUN #0. + */ +static void bfad_reset_sdev_bflags(struct bfad_im_port_s *im_port, + int lunmask_cfg) +{ + const u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN; + struct bfad_itnim_s *itnim; + struct scsi_device *sdev; + + list_for_each_entry(itnim, &im_port->itnim_mapped_list, list_entry) { + sdev = scsi_device_lookup(im_port->shost, itnim->channel, + itnim->scsi_tgt_id, 0); + if (sdev) { + if (lunmask_cfg == BFA_TRUE) + sdev->sdev_bflags |= scan_flags; + else + sdev->sdev_bflags &= ~scan_flags; + scsi_device_put(sdev); + } + } +} + /* Function to reset the LUN SCAN mode */ static void bfad_iocmd_lunmask_reset_lunscan_mode(struct bfad_s *bfad, int lunmask_cfg) diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h index c03b225ea1ba..4353feedf76a 100644 --- a/drivers/scsi/bfa/bfad_im.h +++ b/drivers/scsi/bfa/bfad_im.h @@ -198,30 +198,4 @@ irqreturn_t bfad_intx(int irq, void *dev_id); int bfad_im_bsg_request(struct bsg_job *job); int bfad_im_bsg_timeout(struct bsg_job *job); -/* - * Macro to set the SCSI device sdev_bflags - sdev_bflags are used by the - * SCSI mid-layer to choose LUN Scanning mode REPORT_LUNS vs. Sequential Scan - * - * Internally iterate's over all the ITNIM's part of the im_port & set's the - * sdev_bflags for the scsi_device associated with LUN #0. - */ -#define bfad_reset_sdev_bflags(__im_port, __lunmask_cfg) do { \ - struct scsi_device *__sdev = NULL; \ - struct bfad_itnim_s *__itnim = NULL; \ - u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN; \ - list_for_each_entry(__itnim, &((__im_port)->itnim_mapped_list), \ - list_entry) { \ - __sdev = scsi_device_lookup((__im_port)->shost, \ - __itnim->channel, \ - __itnim->scsi_tgt_id, 0); \ - if (__sdev) { \ - if ((__lunmask_cfg) == BFA_TRUE) \ - __sdev->sdev_bflags |= scan_flags; \ - else \ - __sdev->sdev_bflags &= ~scan_flags; \ - scsi_device_put(__sdev); \ - } \ - } \ -} while (0) - #endif From 2e79cf37b15b1936f8630d9c5805d2c76bde213b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 15:47:28 -0700 Subject: [PATCH 0994/4122] scsi: bfa: Rework bfad_reset_sdev_bflags() Since commit f93ed747e2c7 ("scsi: core: Release SCSI devices synchronously") it is no longer allowed to call scsi_device_put() from atomic context. Rework bfad_reset_sdev_bflags() such that scsi_device_put() is no longer called. This fixes the following smatch warning: drivers/scsi/bfa/bfad_bsg.c:2551 bfad_iocmd_lunmask_reset_lunscan_mode() warn: sleeping in atomic context bfad_iocmd_lunmask() <- disables preempt -> bfad_iocmd_lunmask_reset_lunscan_mode() -> scsi_device_put() Cc: Anil Gurumurthy Cc: Sudarsana Kalluru Cc: Dan Carpenter Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031224728.2607760-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_bsg.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 73754032e25c..79d4f7ee5bcb 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -2553,18 +2553,20 @@ static void bfad_reset_sdev_bflags(struct bfad_im_port_s *im_port, const u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN; struct bfad_itnim_s *itnim; struct scsi_device *sdev; + unsigned long flags; + spin_lock_irqsave(im_port->shost->host_lock, flags); list_for_each_entry(itnim, &im_port->itnim_mapped_list, list_entry) { - sdev = scsi_device_lookup(im_port->shost, itnim->channel, - itnim->scsi_tgt_id, 0); + sdev = __scsi_device_lookup(im_port->shost, itnim->channel, + itnim->scsi_tgt_id, 0); if (sdev) { if (lunmask_cfg == BFA_TRUE) sdev->sdev_bflags |= scan_flags; else sdev->sdev_bflags &= ~scan_flags; - scsi_device_put(sdev); } } + spin_unlock_irqrestore(im_port->shost->host_lock, flags); } /* Function to reset the LUN SCAN mode */ From 4fb2169d66b837a2986f569f5d5b81f79e6e4a4c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 31 Oct 2022 15:48:18 -0700 Subject: [PATCH 0995/4122] scsi: qla2xxx: Fix set-but-not-used variable warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following two compiler warnings: drivers/scsi/qla2xxx/qla_init.c: In function ‘qla24xx_async_abort_cmd’: drivers/scsi/qla2xxx/qla_init.c:171:17: warning: variable ‘bail’ set but not used [-Wunused-but-set-variable] 171 | uint8_t bail; | ^~~~ drivers/scsi/qla2xxx/qla_init.c: In function ‘qla2x00_async_tm_cmd’: drivers/scsi/qla2xxx/qla_init.c:2023:17: warning: variable ‘bail’ set but not used [-Wunused-but-set-variable] 2023 | uint8_t bail; | ^~~~ Cc: Arun Easi Cc: Giridhar Malavali Fixes: feafb7b1714c ("[SCSI] qla2xxx: Fix vport delete issues") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221031224818.2607882-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 22 +++++++++++----------- drivers/scsi/qla2xxx/qla_init.c | 6 ++---- drivers/scsi/qla2xxx/qla_inline.h | 4 +--- drivers/scsi/qla2xxx/qla_os.c | 4 +--- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 802eec6407d9..a26a373be9da 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5136,17 +5136,17 @@ struct secure_flash_update_block_pk { (test_bit(ISP_ABORT_NEEDED, &ha->dpc_flags) || \ test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags)) -#define QLA_VHA_MARK_BUSY(__vha, __bail) do { \ - atomic_inc(&__vha->vref_count); \ - mb(); \ - if (__vha->flags.delete_progress) { \ - atomic_dec(&__vha->vref_count); \ - wake_up(&__vha->vref_waitq); \ - __bail = 1; \ - } else { \ - __bail = 0; \ - } \ -} while (0) +static inline bool qla_vha_mark_busy(scsi_qla_host_t *vha) +{ + atomic_inc(&vha->vref_count); + mb(); + if (vha->flags.delete_progress) { + atomic_dec(&vha->vref_count); + wake_up(&vha->vref_waitq); + return true; + } + return false; +} #define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ atomic_dec(&__vha->vref_count); \ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e12db95de688..631993504a76 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -168,7 +168,6 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) struct srb_iocb *abt_iocb; srb_t *sp; int rval = QLA_FUNCTION_FAILED; - uint8_t bail; /* ref: INIT for ABTS command */ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, @@ -176,7 +175,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) if (!sp) return QLA_MEMORY_ALLOC_FAILED; - QLA_VHA_MARK_BUSY(vha, bail); + qla_vha_mark_busy(vha); abt_iocb = &sp->u.iocb_cmd; sp->type = SRB_ABT_CMD; sp->name = "abort"; @@ -2020,14 +2019,13 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, struct srb_iocb *tm_iocb; srb_t *sp; int rval = QLA_FUNCTION_FAILED; - uint8_t bail; /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; - QLA_VHA_MARK_BUSY(vha, bail); + qla_vha_mark_busy(vha); sp->type = SRB_TM_CMD; sp->name = "tmf"; qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index db17f7f410cd..5185dc5daf80 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -225,11 +225,9 @@ static inline srb_t * qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag) { srb_t *sp = NULL; - uint8_t bail; struct qla_qpair *qpair; - QLA_VHA_MARK_BUSY(vha, bail); - if (unlikely(bail)) + if (unlikely(qla_vha_mark_busy(vha))) return NULL; qpair = vha->hw->base_qpair; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2c85f3cce726..96ba1398f20c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5069,13 +5069,11 @@ struct qla_work_evt * qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type) { struct qla_work_evt *e; - uint8_t bail; if (test_bit(UNLOADING, &vha->dpc_flags)) return NULL; - QLA_VHA_MARK_BUSY(vha, bail); - if (bail) + if (qla_vha_mark_busy(vha)) return NULL; e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC); From e137b81d30e7ef8ec27a77c3b2cbbad52845872a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Nov 2022 10:47:33 +0000 Subject: [PATCH 0996/4122] scsi: qla2xxx: Remove unused variable 'found_devs' Variable 'found_devs' is just being incremented and it's never used anywhere else. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221101104733.30363-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 631993504a76..ce4c5d728407 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5544,7 +5544,6 @@ static int qla2x00_configure_local_loop(scsi_qla_host_t *vha) { int rval, rval2; - int found_devs; int found; fc_port_t *fcport, *new_fcport; uint16_t index; @@ -5559,7 +5558,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) if (N2N_TOPO(ha)) return qla2x00_configure_n2n_loop(vha); - found_devs = 0; new_fcport = NULL; entries = MAX_FIBRE_DEVICES_LOOP; @@ -5718,8 +5716,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) /* Base iIDMA settings on HBA port speed. */ fcport->fp_speed = ha->link_data_rate; - - found_devs++; } list_for_each_entry(fcport, &vha->vp_fcports, list) { From 0aa46eba29553035d6af8384f19dfee2258d2a46 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Nov 2022 12:32:35 +0000 Subject: [PATCH 0997/4122] scsi: csiostor: Remove unused variable 'n' Variable 'n' is just being incremented and it's never used anywhere else. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221101123235.52152-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_wr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c index fe0355c964bc..a516df019c22 100644 --- a/drivers/scsi/csiostor/csio_wr.c +++ b/drivers/scsi/csiostor/csio_wr.c @@ -1051,7 +1051,6 @@ csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q, struct csio_fl_dma_buf flb; struct csio_dma_buf *buf, *fbuf; uint32_t bufsz, len, lastlen = 0; - int n; struct csio_q *flq = hw->wrm.q_arr[q->un.iq.flq_idx]; CSIO_DB_ASSERT(flq != NULL); @@ -1071,7 +1070,7 @@ csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q, flb.totlen = len; /* Consume all freelist buffers used for len bytes */ - for (n = 0, fbuf = flb.flbufs; ; n++, fbuf++) { + for (fbuf = flb.flbufs; ; fbuf++) { buf = &flq->un.fl.bufs[flq->cidx]; bufsz = csio_wr_fl_bufsz(sge, buf); From bc81131813aaf6fe764d1cc6b942a35a8c0c5c36 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Nov 2022 10:53:26 +0000 Subject: [PATCH 0998/4122] scsi: target: core: Remove unused variable 'unit_serial_len' Variable 'unit_serial_len' is just being assigned and it's never used anywhere else. Remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221101105326.31037-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_spc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index ffe02e195733..621a460ba234 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -227,7 +227,7 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf) struct t10_alua_tg_pt_gp *tg_pt_gp; unsigned char *prod = &dev->t10_wwn.model[0]; u32 prod_len; - u32 unit_serial_len, off = 0; + u32 off = 0; u16 len = 0, id_len; off = 4; @@ -272,13 +272,9 @@ check_t10_vend_desc: prod_len += strlen(prod); prod_len++; /* For : */ - if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) { - unit_serial_len = strlen(&dev->t10_wwn.unit_serial[0]); - unit_serial_len++; /* For NULL Terminator */ - + if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) id_len += sprintf(&buf[off+12], "%s:%s", prod, &dev->t10_wwn.unit_serial[0]); - } buf[off] = 0x2; /* ASCII */ buf[off+1] = 0x1; /* T10 Vendor ID */ buf[off+2] = 0x0; From e56ca6bcd2136207868516f5a304fbb82cc0cb82 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:13:52 +0100 Subject: [PATCH 0999/4122] scsi: target: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/fcddc0a53b4fc6e3c2e93592d3f61c5c63121855.1667336095.git.christophe.jaillet@wanadoo.fr Reviewed-by: Chaitanya Kulkarni Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 29 ++++++++++---------- drivers/target/target_core_fabric_configfs.c | 3 +- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 533524299ed6..b8a5c8d6cfde 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -12,6 +12,7 @@ * ****************************************************************************/ +#include #include #include #include @@ -578,7 +579,7 @@ static ssize_t _name##_store(struct config_item *item, const char *page, \ bool flag; \ int ret; \ \ - ret = strtobool(page, &flag); \ + ret = kstrtobool(page, &flag); \ if (ret < 0) \ return ret; \ da->_name = flag; \ @@ -638,7 +639,7 @@ static ssize_t emulate_model_alias_store(struct config_item *item, return -EINVAL; } - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -660,7 +661,7 @@ static ssize_t emulate_write_cache_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -712,7 +713,7 @@ static ssize_t emulate_tas_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -737,7 +738,7 @@ static ssize_t emulate_tpu_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -767,7 +768,7 @@ static ssize_t emulate_tpws_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -866,7 +867,7 @@ static ssize_t pi_prot_format_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -903,7 +904,7 @@ static ssize_t pi_prot_verify_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -932,7 +933,7 @@ static ssize_t force_pr_aptpl_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; if (da->da_dev->export_count) { @@ -954,7 +955,7 @@ static ssize_t emulate_rest_reord_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -977,7 +978,7 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -1126,7 +1127,7 @@ static ssize_t alua_support_store(struct config_item *item, bool flag, oldflag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -1165,7 +1166,7 @@ static ssize_t pgr_support_store(struct config_item *item, bool flag, oldflag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; @@ -1194,7 +1195,7 @@ static ssize_t emulate_rsoc_store(struct config_item *item, bool flag; int ret; - ret = strtobool(page, &flag); + ret = kstrtobool(page, &flag); if (ret < 0) return ret; diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 95a88f6224cd..67b18a67317a 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -11,6 +11,7 @@ * ****************************************************************************/ +#include #include #include #include @@ -829,7 +830,7 @@ static ssize_t target_fabric_tpg_base_enable_store(struct config_item *item, int ret; bool op; - ret = strtobool(page, &op); + ret = kstrtobool(page, &op); if (ret) return ret; From 61da03328a603d2d4a5b2e80cbe29bbf0122e6f8 Mon Sep 17 00:00:00 2001 From: Rebecca Mckeever Date: Mon, 7 Nov 2022 00:28:05 -0600 Subject: [PATCH 1000/4122] memblock tests: introduce range tests for memblock_alloc_exact_nid_raw Add TEST_F_EXACT flag, which specifies that tests should run memblock_alloc_exact_nid_raw(). Introduce range tests for memblock_alloc_exact_nid_raw() by using the TEST_F_EXACT flag to run the range tests in alloc_nid_api.c, since memblock_alloc_exact_nid_raw() and memblock_alloc_try_nid_raw() behave the same way when nid = NUMA_NO_NODE. Rename tests and other functions in alloc_nid_api.c by removing "_try". Since the test names will be displayed in verbose output, they need to be general enough to refer to any of the memblock functions that the tests may run. Acked-by: David Hildenbrand Signed-off-by: Rebecca Mckeever Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/5a4b6d1b6130ab7375314e1c45a6d5813dfdabbd.1667802195.git.remckee0@gmail.com --- tools/testing/memblock/Makefile | 2 +- tools/testing/memblock/main.c | 2 + .../memblock/tests/alloc_exact_nid_api.c | 22 + .../memblock/tests/alloc_exact_nid_api.h | 9 + tools/testing/memblock/tests/alloc_nid_api.c | 544 +++++++++--------- tools/testing/memblock/tests/alloc_nid_api.h | 1 + tools/testing/memblock/tests/common.h | 2 + 7 files changed, 319 insertions(+), 263 deletions(-) create mode 100644 tools/testing/memblock/tests/alloc_exact_nid_api.c create mode 100644 tools/testing/memblock/tests/alloc_exact_nid_api.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 246f7ac8489b..2310ac4d080e 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \ LDFLAGS += -fsanitize=address -fsanitize=undefined TARGETS = main TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \ - tests/basic_api.o tests/common.o + tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o OFILES = main.o $(DEP_OFILES) $(TEST_OFILES) EXTR_SRC = ../../../mm/memblock.c diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c index 4ca1024342b1..278f9dec5008 100644 --- a/tools/testing/memblock/main.c +++ b/tools/testing/memblock/main.c @@ -3,6 +3,7 @@ #include "tests/alloc_api.h" #include "tests/alloc_helpers_api.h" #include "tests/alloc_nid_api.h" +#include "tests/alloc_exact_nid_api.h" #include "tests/common.h" int main(int argc, char **argv) @@ -12,6 +13,7 @@ int main(int argc, char **argv) memblock_alloc_checks(); memblock_alloc_helpers_checks(); memblock_alloc_nid_checks(); + memblock_alloc_exact_nid_checks(); return 0; } diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c new file mode 100644 index 000000000000..6406496623a0 --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "alloc_exact_nid_api.h" +#include "alloc_nid_api.h" + +#define FUNC_NAME "memblock_alloc_exact_nid_raw" + +int memblock_alloc_exact_nid_checks(void) +{ + prefix_reset(); + prefix_push(FUNC_NAME); + + reset_memblock_attributes(); + dummy_physical_memory_init(); + + memblock_alloc_exact_nid_range_checks(); + + dummy_physical_memory_cleanup(); + + prefix_pop(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h new file mode 100644 index 000000000000..4408719de3b9 --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _MEMBLOCK_ALLOC_EXACT_NID_H +#define _MEMBLOCK_ALLOC_EXACT_NID_H + +#include "common.h" + +int memblock_alloc_exact_nid_checks(void); + +#endif diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 2c2d60f4e3e3..49ef68cccd6f 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -18,18 +18,29 @@ static const unsigned int node_fractions[] = { 625, /* 1/16 */ }; -static inline const char * const get_memblock_alloc_try_nid_name(int flags) +static inline const char * const get_memblock_alloc_nid_name(int flags) { + if (flags & TEST_F_EXACT) + return "memblock_alloc_exact_nid_raw"; if (flags & TEST_F_RAW) return "memblock_alloc_try_nid_raw"; return "memblock_alloc_try_nid"; } -static inline void *run_memblock_alloc_try_nid(phys_addr_t size, - phys_addr_t align, - phys_addr_t min_addr, - phys_addr_t max_addr, int nid) +static inline void *run_memblock_alloc_nid(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr, + phys_addr_t max_addr, int nid) { + assert(!(alloc_nid_test_flags & TEST_F_EXACT) || + (alloc_nid_test_flags & TEST_F_RAW)); + /* + * TEST_F_EXACT should be checked before TEST_F_RAW since + * memblock_alloc_exact_nid_raw() performs raw allocations. + */ + if (alloc_nid_test_flags & TEST_F_EXACT) + return memblock_alloc_exact_nid_raw(size, align, min_addr, + max_addr, nid); if (alloc_nid_test_flags & TEST_F_RAW) return memblock_alloc_try_nid_raw(size, align, min_addr, max_addr, nid); @@ -50,7 +61,7 @@ static inline void *run_memblock_alloc_try_nid(phys_addr_t size, * * Expect to allocate a region that ends at max_addr. */ -static int alloc_try_nid_top_down_simple_check(void) +static int alloc_nid_top_down_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -65,9 +76,9 @@ static int alloc_try_nid_top_down_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -102,7 +113,7 @@ static int alloc_try_nid_top_down_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_top_down_end_misaligned_check(void) +static int alloc_nid_top_down_end_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -118,9 +129,9 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512 + misalign; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -153,7 +164,7 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) * Expect to allocate a region that starts at min_addr and ends at * max_addr, given that min_addr is aligned. */ -static int alloc_try_nid_exact_address_generic_check(void) +static int alloc_nid_exact_address_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -168,9 +179,9 @@ static int alloc_try_nid_exact_address_generic_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -205,7 +216,7 @@ static int alloc_try_nid_exact_address_generic_check(void) * Expect to drop the lower limit and allocate a memory region which * ends at max_addr (if the address is aligned). */ -static int alloc_try_nid_top_down_narrow_range_check(void) +static int alloc_nid_top_down_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -219,9 +230,9 @@ static int alloc_try_nid_top_down_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -257,7 +268,7 @@ static int alloc_try_nid_top_down_narrow_range_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_low_max_generic_check(void) +static int alloc_nid_low_max_generic_check(void) { void *allocated_ptr = NULL; phys_addr_t size = SZ_1K; @@ -270,9 +281,9 @@ static int alloc_try_nid_low_max_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -295,7 +306,7 @@ static int alloc_try_nid_low_max_generic_check(void) * * Expect a merge of both regions. Only the region size gets updated. */ -static int alloc_try_nid_min_reserved_generic_check(void) +static int alloc_nid_min_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -315,9 +326,9 @@ static int alloc_try_nid_min_reserved_generic_check(void) memblock_reserve(reserved_base, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -347,7 +358,7 @@ static int alloc_try_nid_min_reserved_generic_check(void) * * Expect a merge of regions. Only the region size gets updated. */ -static int alloc_try_nid_max_reserved_generic_check(void) +static int alloc_nid_max_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -365,9 +376,9 @@ static int alloc_try_nid_max_reserved_generic_check(void) memblock_reserve(max_addr, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -400,7 +411,7 @@ static int alloc_try_nid_max_reserved_generic_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_top_down_reserved_with_space_check(void) +static int alloc_nid_top_down_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -428,9 +439,9 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -465,7 +476,7 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_reserved_full_merge_generic_check(void) +static int alloc_nid_reserved_full_merge_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -491,9 +502,9 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -527,7 +538,7 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) * Expect to merge the new region with r2. The second region does not get * updated. The total size counter gets updated. */ -static int alloc_try_nid_top_down_reserved_no_space_check(void) +static int alloc_nid_top_down_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -555,9 +566,9 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -596,7 +607,7 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) * Expect no allocation to happen. */ -static int alloc_try_nid_reserved_all_generic_check(void) +static int alloc_nid_reserved_all_generic_check(void) { void *allocated_ptr = NULL; struct region r1, r2; @@ -620,9 +631,9 @@ static int alloc_try_nid_reserved_all_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -636,7 +647,7 @@ static int alloc_try_nid_reserved_all_generic_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_max_check(void) +static int alloc_nid_top_down_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -650,9 +661,9 @@ static int alloc_try_nid_top_down_cap_max_check(void) min_addr = memblock_end_of_DRAM() - SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -673,7 +684,7 @@ static int alloc_try_nid_top_down_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_min_check(void) +static int alloc_nid_top_down_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -687,9 +698,9 @@ static int alloc_try_nid_top_down_cap_min_check(void) min_addr = memblock_start_of_DRAM() - SZ_256; max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -719,7 +730,7 @@ static int alloc_try_nid_top_down_cap_min_check(void) * * Expect to allocate a region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_simple_check(void) +static int alloc_nid_bottom_up_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -734,9 +745,9 @@ static int alloc_try_nid_bottom_up_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -771,7 +782,7 @@ static int alloc_try_nid_bottom_up_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_start_misaligned_check(void) +static int alloc_nid_bottom_up_start_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -787,9 +798,9 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) min_addr = memblock_start_of_DRAM() + misalign; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -824,7 +835,7 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) * Expect to drop the lower limit and allocate a memory region which * starts at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_narrow_range_check(void) +static int alloc_nid_bottom_up_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -838,9 +849,9 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -873,7 +884,7 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_bottom_up_reserved_with_space_check(void) +static int alloc_nid_bottom_up_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -901,9 +912,9 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -942,7 +953,7 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) * Other regions are not modified. */ -static int alloc_try_nid_bottom_up_reserved_no_space_check(void) +static int alloc_nid_bottom_up_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[2]; struct memblock_region *rgn2 = &memblock.reserved.regions[1]; @@ -971,9 +982,9 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -1000,7 +1011,7 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that starts at the min_addr. */ -static int alloc_try_nid_bottom_up_cap_max_check(void) +static int alloc_nid_bottom_up_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1014,9 +1025,9 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) min_addr = memblock_start_of_DRAM() + SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1037,7 +1048,7 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_cap_min_check(void) +static int alloc_nid_bottom_up_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1051,9 +1062,9 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM() - SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1070,133 +1081,133 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) } /* Test case wrappers for range tests */ -static int alloc_try_nid_simple_check(void) +static int alloc_nid_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_simple_check(); + alloc_nid_top_down_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_simple_check(); + alloc_nid_bottom_up_simple_check(); return 0; } -static int alloc_try_nid_misaligned_check(void) +static int alloc_nid_misaligned_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_end_misaligned_check(); + alloc_nid_top_down_end_misaligned_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_start_misaligned_check(); + alloc_nid_bottom_up_start_misaligned_check(); return 0; } -static int alloc_try_nid_narrow_range_check(void) +static int alloc_nid_narrow_range_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_narrow_range_check(); + alloc_nid_top_down_narrow_range_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_narrow_range_check(); + alloc_nid_bottom_up_narrow_range_check(); return 0; } -static int alloc_try_nid_reserved_with_space_check(void) +static int alloc_nid_reserved_with_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_with_space_check(); + alloc_nid_top_down_reserved_with_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_with_space_check(); + alloc_nid_bottom_up_reserved_with_space_check(); return 0; } -static int alloc_try_nid_reserved_no_space_check(void) +static int alloc_nid_reserved_no_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_no_space_check(); + alloc_nid_top_down_reserved_no_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_no_space_check(); + alloc_nid_bottom_up_reserved_no_space_check(); return 0; } -static int alloc_try_nid_cap_max_check(void) +static int alloc_nid_cap_max_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_max_check(); + alloc_nid_top_down_cap_max_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_max_check(); + alloc_nid_bottom_up_cap_max_check(); return 0; } -static int alloc_try_nid_cap_min_check(void) +static int alloc_nid_cap_min_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_min_check(); + alloc_nid_top_down_cap_min_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_min_check(); + alloc_nid_bottom_up_cap_min_check(); return 0; } -static int alloc_try_nid_min_reserved_check(void) +static int alloc_nid_min_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_min_reserved_generic_check); - run_bottom_up(alloc_try_nid_min_reserved_generic_check); + run_top_down(alloc_nid_min_reserved_generic_check); + run_bottom_up(alloc_nid_min_reserved_generic_check); return 0; } -static int alloc_try_nid_max_reserved_check(void) +static int alloc_nid_max_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_max_reserved_generic_check); - run_bottom_up(alloc_try_nid_max_reserved_generic_check); + run_top_down(alloc_nid_max_reserved_generic_check); + run_bottom_up(alloc_nid_max_reserved_generic_check); return 0; } -static int alloc_try_nid_exact_address_check(void) +static int alloc_nid_exact_address_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_exact_address_generic_check); - run_bottom_up(alloc_try_nid_exact_address_generic_check); + run_top_down(alloc_nid_exact_address_generic_check); + run_bottom_up(alloc_nid_exact_address_generic_check); return 0; } -static int alloc_try_nid_reserved_full_merge_check(void) +static int alloc_nid_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_reserved_full_merge_generic_check); + run_top_down(alloc_nid_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_reserved_all_check(void) +static int alloc_nid_reserved_all_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_all_generic_check); - run_bottom_up(alloc_try_nid_reserved_all_generic_check); + run_top_down(alloc_nid_reserved_all_generic_check); + run_bottom_up(alloc_nid_reserved_all_generic_check); return 0; } -static int alloc_try_nid_low_max_check(void) +static int alloc_nid_low_max_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_low_max_generic_check); - run_bottom_up(alloc_try_nid_low_max_generic_check); + run_top_down(alloc_nid_low_max_generic_check); + run_bottom_up(alloc_nid_low_max_generic_check); return 0; } @@ -1204,22 +1215,22 @@ static int alloc_try_nid_low_max_check(void) static int memblock_alloc_nid_range_checks(void) { test_print("Running %s range tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_simple_check(); - alloc_try_nid_misaligned_check(); - alloc_try_nid_narrow_range_check(); - alloc_try_nid_reserved_with_space_check(); - alloc_try_nid_reserved_no_space_check(); - alloc_try_nid_cap_max_check(); - alloc_try_nid_cap_min_check(); + alloc_nid_simple_check(); + alloc_nid_misaligned_check(); + alloc_nid_narrow_range_check(); + alloc_nid_reserved_with_space_check(); + alloc_nid_reserved_no_space_check(); + alloc_nid_cap_max_check(); + alloc_nid_cap_min_check(); - alloc_try_nid_min_reserved_check(); - alloc_try_nid_max_reserved_check(); - alloc_try_nid_exact_address_check(); - alloc_try_nid_reserved_full_merge_check(); - alloc_try_nid_reserved_all_check(); - alloc_try_nid_low_max_check(); + alloc_nid_min_reserved_check(); + alloc_nid_max_reserved_check(); + alloc_nid_exact_address_check(); + alloc_nid_reserved_full_merge_check(); + alloc_nid_reserved_all_check(); + alloc_nid_low_max_check(); return 0; } @@ -1229,7 +1240,7 @@ static int memblock_alloc_nid_range_checks(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the end of the requested node. */ -static int alloc_try_nid_top_down_numa_simple_check(void) +static int alloc_nid_top_down_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1247,8 +1258,8 @@ static int alloc_try_nid_top_down_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1280,7 +1291,7 @@ static int alloc_try_nid_top_down_numa_simple_check(void) * Expect to allocate an aligned region at the end of the last node that has * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_small_node_check(void) +static int alloc_nid_top_down_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 6; @@ -1299,8 +1310,8 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1333,7 +1344,7 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) * large enough and has enough unreserved memory (in this case, nid = 6) after * falling back to NUMA_NO_NODE. The region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_node_reserved_check(void) +static int alloc_nid_top_down_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 6; @@ -1353,8 +1364,8 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1386,7 +1397,7 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) * Expect to allocate an aligned region at the end of the requested node. The * region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_check(void) +static int alloc_nid_top_down_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[1]; @@ -1408,8 +1419,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1444,7 +1455,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count * and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) +static int alloc_nid_top_down_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = NUMA_NODES - 1; @@ -1469,8 +1480,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1507,7 +1518,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_split_range_low_check(void) +static int alloc_nid_top_down_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1525,8 +1536,8 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1563,7 +1574,7 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region that * ends at the end of the first node that overlaps with the range. */ -static int alloc_try_nid_top_down_numa_split_range_high_check(void) +static int alloc_nid_top_down_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = nid_req - 1; @@ -1582,8 +1593,8 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) min_addr = exp_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1620,7 +1631,7 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) +static int alloc_nid_top_down_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1638,8 +1649,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1677,7 +1688,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) +static int alloc_nid_top_down_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1694,8 +1705,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1733,7 +1744,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) +static int alloc_nid_top_down_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1750,8 +1761,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1773,7 +1784,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_simple_check(void) +static int alloc_nid_bottom_up_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1791,8 +1802,8 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1824,7 +1835,7 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) * Expect to allocate an aligned region at the beginning of the first node that * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_small_node_check(void) +static int alloc_nid_bottom_up_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 0; @@ -1843,8 +1854,8 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1878,7 +1889,7 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) * after falling back to NUMA_NO_NODE. The region count and total size get * updated. */ -static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) +static int alloc_nid_bottom_up_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 0; @@ -1898,8 +1909,8 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1931,7 +1942,7 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) * Expect to allocate an aligned region in the requested node that merges with * the existing reserved region. The total size gets updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1955,8 +1966,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) total_size = size + r1.size; memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1991,7 +2002,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size * get updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = 0; @@ -2016,8 +2027,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2054,7 +2065,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the requested node. */ -static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) +static int alloc_nid_bottom_up_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2072,8 +2083,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2110,7 +2121,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the first node that has enough memory. */ -static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) +static int alloc_nid_bottom_up_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = 0; @@ -2130,8 +2141,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) min_addr = req_node->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2168,7 +2179,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that starts at * the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2186,8 +2197,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2225,7 +2236,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2242,8 +2253,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2281,7 +2292,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2298,8 +2309,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2330,7 +2341,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_large_region_generic_check(void) +static int alloc_nid_numa_large_region_generic_check(void) { int nid_req = 3; void *allocated_ptr = NULL; @@ -2344,8 +2355,8 @@ static int alloc_try_nid_numa_large_region_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_EQ(allocated_ptr, NULL); test_pass_pop(); @@ -2374,7 +2385,7 @@ static int alloc_try_nid_numa_large_region_generic_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) +static int alloc_nid_numa_reserved_full_merge_generic_check(void) { int nid_req = 6; int nid_next = nid_req + 1; @@ -2404,8 +2415,8 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2448,7 +2459,7 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_split_all_reserved_generic_check(void) +static int alloc_nid_numa_split_all_reserved_generic_check(void) { void *allocated_ptr = NULL; struct memblock_region *next_node = &memblock.memory.regions[7]; @@ -2472,9 +2483,9 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -2484,139 +2495,139 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) } /* Test case wrappers for NUMA tests */ -static int alloc_try_nid_numa_simple_check(void) +static int alloc_nid_numa_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_simple_check(); + alloc_nid_top_down_numa_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_simple_check(); + alloc_nid_bottom_up_numa_simple_check(); return 0; } -static int alloc_try_nid_numa_small_node_check(void) +static int alloc_nid_numa_small_node_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_small_node_check(); + alloc_nid_top_down_numa_small_node_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_small_node_check(); + alloc_nid_bottom_up_numa_small_node_check(); return 0; } -static int alloc_try_nid_numa_node_reserved_check(void) +static int alloc_nid_numa_node_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_node_reserved_check(); + alloc_nid_top_down_numa_node_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_node_reserved_check(); + alloc_nid_bottom_up_numa_node_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_check(void) +static int alloc_nid_numa_part_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_check(); + alloc_nid_top_down_numa_part_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_check(); + alloc_nid_bottom_up_numa_part_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_fallback_check(void) +static int alloc_nid_numa_part_reserved_fallback_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_fallback_check(); + alloc_nid_top_down_numa_part_reserved_fallback_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(); + alloc_nid_bottom_up_numa_part_reserved_fallback_check(); return 0; } -static int alloc_try_nid_numa_split_range_low_check(void) +static int alloc_nid_numa_split_range_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_low_check(); + alloc_nid_top_down_numa_split_range_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_low_check(); + alloc_nid_bottom_up_numa_split_range_low_check(); return 0; } -static int alloc_try_nid_numa_split_range_high_check(void) +static int alloc_nid_numa_split_range_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_high_check(); + alloc_nid_top_down_numa_split_range_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_high_check(); + alloc_nid_bottom_up_numa_split_range_high_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_split_check(void) +static int alloc_nid_numa_no_overlap_split_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_split_check(); + alloc_nid_top_down_numa_no_overlap_split_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_split_check(); + alloc_nid_bottom_up_numa_no_overlap_split_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_low_check(void) +static int alloc_nid_numa_no_overlap_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_low_check(); + alloc_nid_top_down_numa_no_overlap_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_low_check(); + alloc_nid_bottom_up_numa_no_overlap_low_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_high_check(void) +static int alloc_nid_numa_no_overlap_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_high_check(); + alloc_nid_top_down_numa_no_overlap_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_high_check(); + alloc_nid_bottom_up_numa_no_overlap_high_check(); return 0; } -static int alloc_try_nid_numa_large_region_check(void) +static int alloc_nid_numa_large_region_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_large_region_generic_check); - run_bottom_up(alloc_try_nid_numa_large_region_generic_check); + run_top_down(alloc_nid_numa_large_region_generic_check); + run_bottom_up(alloc_nid_numa_large_region_generic_check); return 0; } -static int alloc_try_nid_numa_reserved_full_merge_check(void) +static int alloc_nid_numa_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_numa_reserved_full_merge_generic_check); + run_top_down(alloc_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_numa_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_numa_split_all_reserved_check(void) +static int alloc_nid_numa_split_all_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_split_all_reserved_generic_check); - run_bottom_up(alloc_try_nid_numa_split_all_reserved_generic_check); + run_top_down(alloc_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_nid_numa_split_all_reserved_generic_check); return 0; } @@ -2624,22 +2635,22 @@ static int alloc_try_nid_numa_split_all_reserved_check(void) int __memblock_alloc_nid_numa_checks(void) { test_print("Running %s NUMA tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_numa_simple_check(); - alloc_try_nid_numa_small_node_check(); - alloc_try_nid_numa_node_reserved_check(); - alloc_try_nid_numa_part_reserved_check(); - alloc_try_nid_numa_part_reserved_fallback_check(); - alloc_try_nid_numa_split_range_low_check(); - alloc_try_nid_numa_split_range_high_check(); + alloc_nid_numa_simple_check(); + alloc_nid_numa_small_node_check(); + alloc_nid_numa_node_reserved_check(); + alloc_nid_numa_part_reserved_check(); + alloc_nid_numa_part_reserved_fallback_check(); + alloc_nid_numa_split_range_low_check(); + alloc_nid_numa_split_range_high_check(); - alloc_try_nid_numa_no_overlap_split_check(); - alloc_try_nid_numa_no_overlap_low_check(); - alloc_try_nid_numa_no_overlap_high_check(); - alloc_try_nid_numa_large_region_check(); - alloc_try_nid_numa_reserved_full_merge_check(); - alloc_try_nid_numa_split_all_reserved_check(); + alloc_nid_numa_no_overlap_split_check(); + alloc_nid_numa_no_overlap_low_check(); + alloc_nid_numa_no_overlap_high_check(); + alloc_nid_numa_large_region_check(); + alloc_nid_numa_reserved_full_merge_check(); + alloc_nid_numa_split_all_reserved_check(); return 0; } @@ -2649,7 +2660,7 @@ static int memblock_alloc_nid_checks_internal(int flags) alloc_nid_test_flags = flags; prefix_reset(); - prefix_push(get_memblock_alloc_try_nid_name(flags)); + prefix_push(get_memblock_alloc_nid_name(flags)); reset_memblock_attributes(); dummy_physical_memory_init(); @@ -2671,3 +2682,12 @@ int memblock_alloc_nid_checks(void) return 0; } + +int memblock_alloc_exact_nid_range_checks(void) +{ + alloc_nid_test_flags = (TEST_F_RAW | TEST_F_EXACT); + + memblock_alloc_nid_range_checks(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h index 92d07d230e18..2b8cabacacb8 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.h +++ b/tools/testing/memblock/tests/alloc_nid_api.h @@ -5,6 +5,7 @@ #include "common.h" int memblock_alloc_nid_checks(void); +int memblock_alloc_exact_nid_range_checks(void); int __memblock_alloc_nid_numa_checks(void); #ifdef CONFIG_NUMA diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index cc82b85151b6..4f23302ee677 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -21,6 +21,8 @@ enum test_flags { TEST_F_NONE = 0x0, /* Perform raw allocations (no zeroing of memory). */ TEST_F_RAW = 0x1, + /* Perform allocations on the exact node specified. */ + TEST_F_EXACT = 0x2 }; /** From bfc05a4ce3650a1e5a47ccdaf8c87f814829b4a7 Mon Sep 17 00:00:00 2001 From: Rebecca Mckeever Date: Mon, 7 Nov 2022 00:28:06 -0600 Subject: [PATCH 1001/4122] memblock tests: add top-down NUMA tests for memblock_alloc_exact_nid_raw Add tests for memblock_alloc_exact_nid_raw() where the simulated physical memory is set up with multiple NUMA nodes. Additionally, all of these tests set nid != NUMA_NO_NODE. These tests are run with a top-down allocation direction. The tested scenarios are: Range unrestricted: - region can be allocated in the specific node requested: + there are no previously reserved regions + the requested node is partially reserved but has enough space Range restricted: - region can be allocated in the specific node requested after dropping min_addr: + range partially overlaps with two different nodes, where the first node is the requested node + range partially overlaps with two different nodes, where the requested node ends before min_addr + range overlaps with multiple nodes along node boundaries, and the requested node ends before min_addr Acked-by: David Hildenbrand Signed-off-by: Rebecca Mckeever Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/2cc0883243d68ddc3faf833d2d9e86f48534c1d7.1667802195.git.remckee0@gmail.com --- .../memblock/tests/alloc_exact_nid_api.c | 344 ++++++++++++++++++ .../memblock/tests/alloc_exact_nid_api.h | 16 + 2 files changed, 360 insertions(+) diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c index 6406496623a0..79150784b373 100644 --- a/tools/testing/memblock/tests/alloc_exact_nid_api.c +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -4,6 +4,349 @@ #define FUNC_NAME "memblock_alloc_exact_nid_raw" +/* + * contains the fraction of MEM_SIZE contained in each node in basis point + * units (one hundredth of 1% or 1/10000) + */ +static const unsigned int node_fractions[] = { + 2500, /* 1/4 */ + 625, /* 1/16 */ + 1250, /* 1/8 */ + 1250, /* 1/8 */ + 625, /* 1/16 */ + 625, /* 1/16 */ + 2500, /* 1/4 */ + 625, /* 1/16 */ +}; + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+----------+ + * + * | +------------------+ +-----+ | + * | | reserved | | new | | + * +-----------+------------------+--------------+-----+----------+ + * + * Expect to allocate an aligned region at the end of the requested node. The + * region count and total size get updated. + */ +static int alloc_exact_nid_top_down_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------------------+-----------+--------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node_end - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+----------+ + * + + + * | +---------+ | + * | | rgn | | + * +----------+---------+-------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * | +-----+ | + * | | rgn | | + * +-----+-----+----------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* Test case wrappers for NUMA tests */ +static int alloc_exact_nid_numa_simple_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_simple_check(); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_part_reserved_check(); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_split_range_low_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_split_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_split_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_low_check(); + + return 0; +} + +int __memblock_alloc_exact_nid_numa_checks(void) +{ + test_print("Running %s NUMA tests...\n", FUNC_NAME); + + alloc_exact_nid_numa_simple_check(); + alloc_exact_nid_numa_part_reserved_check(); + alloc_exact_nid_numa_split_range_low_check(); + alloc_exact_nid_numa_no_overlap_split_check(); + alloc_exact_nid_numa_no_overlap_low_check(); + + return 0; +} + int memblock_alloc_exact_nid_checks(void) { prefix_reset(); @@ -13,6 +356,7 @@ int memblock_alloc_exact_nid_checks(void) dummy_physical_memory_init(); memblock_alloc_exact_nid_range_checks(); + memblock_alloc_exact_nid_numa_checks(); dummy_physical_memory_cleanup(); diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h index 4408719de3b9..cef419d55d2a 100644 --- a/tools/testing/memblock/tests/alloc_exact_nid_api.h +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h @@ -5,5 +5,21 @@ #include "common.h" int memblock_alloc_exact_nid_checks(void); +int __memblock_alloc_exact_nid_numa_checks(void); + +#ifdef CONFIG_NUMA +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + __memblock_alloc_exact_nid_numa_checks(); + return 0; +} + +#else +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ #endif From b6df23edb1ba65b0b46788a872ddc85dfe86ccf5 Mon Sep 17 00:00:00 2001 From: Rebecca Mckeever Date: Mon, 7 Nov 2022 00:28:07 -0600 Subject: [PATCH 1002/4122] memblock tests: add bottom-up NUMA tests for memblock_alloc_exact_nid_raw Add tests for memblock_alloc_exact_nid_raw() where the simulated physical memory is set up with multiple NUMA nodes. Additionally, all of these tests set nid != NUMA_NO_NODE. These tests are run with a bottom-up allocation direction. The tested scenarios are: Range unrestricted: - region can be allocated in the specific node requested: + there are no previously reserved regions + the requested node is partially reserved but has enough space Range restricted: - region can be allocated in the specific node requested after dropping min_addr: + range partially overlaps with two different nodes, where the first node is the requested node + range partially overlaps with two different nodes, where the requested node ends before min_addr + range overlaps with multiple nodes along node boundaries, and the requested node ends before min_addr Acked-by: David Hildenbrand Signed-off-by: Rebecca Mckeever Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/935f0eed5e06fd44dc67d9f49b277923d7896bd3.1667802195.git.remckee0@gmail.com --- .../memblock/tests/alloc_exact_nid_api.c | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c index 79150784b373..b97b5c04de05 100644 --- a/tools/testing/memblock/tests/alloc_exact_nid_api.c +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -288,12 +288,286 @@ static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void) return 0; } +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+---------+ + * + * | +------------------+-----+ | + * | | reserved | new | | + * +-----------+------------------+-----+------------------------+ + * + * Expect to allocate an aligned region in the requested node that merges with + * the existing reserved region. The total size gets updated. + */ +static int alloc_exact_nid_bottom_up_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t total_size; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + total_size = size + r1.size; + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------+-----------+--------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region at the beginning + * of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), req_node_end); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+---------+ + * + + + * | +---------+ | + * | | rgn | | + * +----+---------+------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * |-----+ | + * | rgn | | + * +-----+----------------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + /* Test case wrappers for NUMA tests */ static int alloc_exact_nid_numa_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_exact_nid_top_down_numa_simple_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_simple_check(); return 0; } @@ -303,6 +577,8 @@ static int alloc_exact_nid_numa_part_reserved_check(void) test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_exact_nid_top_down_numa_part_reserved_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_part_reserved_check(); return 0; } @@ -312,6 +588,8 @@ static int alloc_exact_nid_numa_split_range_low_check(void) test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_exact_nid_top_down_numa_split_range_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_split_range_low_check(); return 0; } @@ -321,6 +599,8 @@ static int alloc_exact_nid_numa_no_overlap_split_check(void) test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_exact_nid_top_down_numa_no_overlap_split_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_split_check(); return 0; } @@ -330,6 +610,8 @@ static int alloc_exact_nid_numa_no_overlap_low_check(void) test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_exact_nid_top_down_numa_no_overlap_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_low_check(); return 0; } From 62bdc99008b372a6c5f81e6d968f3b077a1e3667 Mon Sep 17 00:00:00 2001 From: Rebecca Mckeever Date: Mon, 7 Nov 2022 00:28:08 -0600 Subject: [PATCH 1003/4122] memblock tests: add generic NUMA tests for memblock_alloc_exact_nid_raw Add tests for memblock_alloc_exact_nid_raw() where the simulated physical memory is set up with multiple NUMA nodes. Additionally, all but one of these tests set nid != NUMA_NO_NODE. All tests are run for both top-down and bottom-up allocation directions. The tested scenarios are: Range unrestricted: - region cannot be allocated: + there are no previously reserved regions, but requested node is too small + the requested node is fully reserved + the requested node is partially reserved and does not have enough space + none of the nodes have enough memory to allocate the region Range restricted: - region can be allocated in the specific node requested without dropping min_addr: + the range fully overlaps with the node, and there are adjacent reserved regions - region cannot be allocated: + range partially overlaps with two different nodes, where the second node is the requested node + range overlaps with multiple nodes along node boundaries, and the requested node starts after max_addr + nid is set to NUMA_NO_NODE and the total range can fit the region, but the range is split between two nodes and everything else is reserved Acked-by: David Hildenbrand Signed-off-by: Rebecca Mckeever Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/51b14da46e6591428df3aefc5acc7dca9341a541.1667802195.git.remckee0@gmail.com --- .../memblock/tests/alloc_exact_nid_api.c | 465 ++++++++++++++++++ 1 file changed, 465 insertions(+) diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c index b97b5c04de05..6e14447da6e1 100644 --- a/tools/testing/memblock/tests/alloc_exact_nid_api.c +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -560,6 +560,390 @@ static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void) return 0; } +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size: + * + * | +-----+ | + * | | req | | + * +---+-----+----------------------------+ + * + * +---------+ + * | rgn | + * +---------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_small_node_generic_check(void) +{ + int nid_req = 1; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_2 * req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is fully reserved: + * + * | +---------+ | + * | |requested| | + * +--------------+---------+-------------+ + * + * | +---------+ | + * | | reserved| | + * +--------------+---------+-------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_node_reserved_generic_check(void) +{ + int nid_req = 2; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(req_node->base, req_node->size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved and does not have enough contiguous memory for the + * allocated region: + * + * | +-----------------------+ | + * | | requested | | + * +-----------+-----------------------+----+ + * + * | +----------+ | + * | | reserved | | + * +-----------------+----------+-----------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_part_reserved_fail_generic_check(void) +{ + int nid_req = 4; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_2; + r1.base = req_node->base + (size / SZ_2); + r1.size = size; + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the second + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +--------------------------+---------+ | + * | | first node |requested| | + * +------+--------------------------+---------+----------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_range_high_generic_check(void) +{ + int nid_req = 3; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = req_node->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node starts + * after max_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * | +----------+----...----+----------+ +-----------+ | + * | | min node | ... | max node | | requested | | + * +-----+----------+----...----+----------+--------+-----------+---+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_no_overlap_high_generic_check(void) +{ + int nid_req = 7; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size. + * Additionally, none of the nodes have enough memory to allocate the region: + * + * +-----------------------------------+ + * | new | + * +-----------------------------------+ + * |-------+-------+-------+-------+-------+-------+-------+-------| + * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_large_region_generic_check(void) +{ + int nid_req = 3; + void *allocated_ptr = NULL; + phys_addr_t size = MEM_SIZE / SZ_2; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_addr range when + * there are two reserved regions at the borders. The requested node starts at + * min_addr and ends at max_addr and is the same size as the region to be + * allocated: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------+-----------------------+-----------------------| + * | | node5 | requested | node7 | + * +------+-----------+-----------------------+-----------------------+ + * + + + * | +----+-----------------------+----+ | + * | | r2 | new | r1 | | + * +-------------+----+-----------------------+----+------------------+ + * + * Expect to merge all of the regions into one. The region counter and total + * size fields get updated. + */ +static int alloc_exact_nid_numa_reserved_full_merge_generic_check(void) +{ + int nid_req = 6; + int nid_next = nid_req + 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *next_node = &memblock.memory.regions[nid_next]; + void *allocated_ptr = NULL; + struct region r1, r2; + phys_addr_t size = req_node->size; + phys_addr_t total_size; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r1.base = next_node->base; + r1.size = SZ_128; + + r2.size = SZ_128; + r2.base = r1.base - (size + r2.size); + + total_size = r1.size + r2.size + size; + min_addr = r2.base + r2.size; + max_addr = r1.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, r2.base); + + ASSERT_LE(new_rgn->base, req_node->base); + ASSERT_LE(region_end(req_node), region_end(new_rgn)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range, + * where the total range can fit the region, but it is split between two nodes + * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE + * instead of requesting a specific node: + * + * +-----------+ + * | new | + * +-----------+ + * | +---------------------+-----------| + * | | prev node | next node | + * +------+---------------------+-----------+ + * + + + * |----------------------+ +-----| + * | r1 | | r2 | + * +----------------------+-----------+-----+ + * ^ ^ + * | | + * | max_addr + * | + * min_addr + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_all_reserved_generic_check(void) +{ + void *allocated_ptr = NULL; + struct memblock_region *next_node = &memblock.memory.regions[7]; + struct region r1, r2; + phys_addr_t size = SZ_256; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r2.base = next_node->base + SZ_128; + r2.size = memblock_end_of_DRAM() - r2.base; + + r1.size = MEM_SIZE - (r2.size + size); + r1.base = memblock_start_of_DRAM(); + + min_addr = r1.base + r1.size; + max_addr = r2.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + /* Test case wrappers for NUMA tests */ static int alloc_exact_nid_numa_simple_check(void) { @@ -616,6 +1000,78 @@ static int alloc_exact_nid_numa_no_overlap_low_check(void) return 0; } +static int alloc_exact_nid_numa_small_node_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_small_node_generic_check); + run_bottom_up(alloc_exact_nid_numa_small_node_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_node_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_node_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_node_reserved_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_fail_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_part_reserved_fail_generic_check); + run_bottom_up(alloc_exact_nid_numa_part_reserved_fail_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_range_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_range_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_no_overlap_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_no_overlap_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_large_region_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_large_region_generic_check); + run_bottom_up(alloc_exact_nid_numa_large_region_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_reserved_full_merge_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_exact_nid_numa_reserved_full_merge_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_all_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_all_reserved_generic_check); + + return 0; +} + int __memblock_alloc_exact_nid_numa_checks(void) { test_print("Running %s NUMA tests...\n", FUNC_NAME); @@ -626,6 +1082,15 @@ int __memblock_alloc_exact_nid_numa_checks(void) alloc_exact_nid_numa_no_overlap_split_check(); alloc_exact_nid_numa_no_overlap_low_check(); + alloc_exact_nid_numa_small_node_check(); + alloc_exact_nid_numa_node_reserved_check(); + alloc_exact_nid_numa_part_reserved_fail_check(); + alloc_exact_nid_numa_split_range_high_check(); + alloc_exact_nid_numa_no_overlap_high_check(); + alloc_exact_nid_numa_large_region_check(); + alloc_exact_nid_numa_reserved_full_merge_check(); + alloc_exact_nid_numa_split_all_reserved_check(); + return 0; } From 80c2fe022ef5d29f3bafee90c37dbcff18cab57a Mon Sep 17 00:00:00 2001 From: Rebecca Mckeever Date: Mon, 7 Nov 2022 00:28:09 -0600 Subject: [PATCH 1004/4122] memblock tests: remove completed TODO item Remove completed item from TODO list. Reviewed-by: David Hildenbrand Signed-off-by: Rebecca Mckeever Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/f2263abe45613b28f1583fbf04a4bffcf735bcf6.1667802195.git.remckee0@gmail.com --- tools/testing/memblock/TODO | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index 503cc96fcdc3..e306c90c535f 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -1,10 +1,5 @@ TODO ===== -1. Add test cases using this functions (implement them for both directions): - + memblock_alloc_raw() - + memblock_alloc_exact_nid_raw() - + memblock_alloc_try_nid_raw() - -2. Add tests for memblock_alloc_node() to check if the correct NUMA node is set +1. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region From ad9f64cd2d4a17f0d5ecf04d38170fdc34f21c61 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 3 Oct 2022 09:58:48 -0700 Subject: [PATCH 1005/4122] LICENSES: Add the copyleft-next-0.3.1 license Add the full text of the copyleft-next-0.3.1 license to the kernel tree as well as the required tags for reference and tooling. The license text was copied directly from the copyleft-next project's git tree [0]. Discussion of using copyleft-next-0.3.1 on Linux started since June, 2016 [1]. In the end Linus' preference was to have drivers use MODULE_LICENSE("GPL") to make it clear that the GPL applies when it comes to Linux [2]. Additionally, even though copyleft-next-0.3.1 has been found to be to be GPLv2 compatible by three attorneys at SUSE and Redhat [3], to err on the side of caution we simply recommend to always use the "OR" language for this license [4]. Even though it has been a goal of the project to be GPL-v2 compatible to be certain in 2016 I asked for a clarification about what makes copyleft-next GPLv2 compatible and also asked for a summary of benefits. This prompted some small minor changes to make compatibility even further clear and as of copyleft 0.3.1 compatibility should be crystal clear [5]. The summary of why copyleft-next 0.3.1 is compatible with GPLv2 is explained as follows: Like GPLv2, copyleft-next requires distribution of derivative works ("Derived Works" in copyleft-next 0.3.x) to be under the same license. Ordinarily this would make the two licenses incompatible. However, copyleft-next 0.3.1 says: "If the Derived Work includes material licensed under the GPL, You may instead license the Derived Work under the GPL." "GPL" is defined to include GPLv2. In practice this means copyleft-next code in Linux may be licensed under the GPL2, however there are additional obvious gains for bringing contributions from Linux outbound where copyleft-next is preferred. A summary of benefits why projects outside of Linux might prefer to use copyleft-next >= 0.3.1 over GPLv2: o It is much shorter and simpler o It has an explicit patent license grant, unlike GPLv2 o Its notice preservation conditions are clearer o More free software/open source licenses are compatible with it (via section 4) o The source code requirement triggered by binary distribution is much simpler in a procedural sense o Recipients potentially have a contract claim against distributors who are noncompliant with the source code requirement o There is a built-in inbound=outbound policy for upstream contributions (cf. Apache License 2.0 section 5) o There are disincentives to engage in the controversial practice of copyleft/ proprietary dual-licensing o In 15 years copyleft expires, which can be advantageous for legacy code o There are explicit disincentives to bringing patent infringement claims accusing the licensed work of infringement (see 10b) o There is a cure period for licensees who are not compliant with the license (there is no cure opportunity in GPLv2) o copyleft-next has a 'built-in or-later' provision The first driver submission to Linux under this dual strategy was lib/test_sysctl.c through commit 9308f2f9e7f05 ("test_sysctl: add dedicated proc sysctl test driver") merged in July 2017. Shortly after that I also added test_kmod through commit d9c6a72d6fa29 ("kmod: add test driver to stress test the module loader") in the same month. These two drivers went in just a few months before the SPDX license practice kicked in. In 2018 Kuno Woudt went through the process to get SPDX identifiers for copyleft-next [6] [7]. Although there are SPDX tags for copyleft-next-0.3.0, we only document use in Linux starting from copyleft-next-0.3.1 which makes GPLv2 compatibility crystal clear. This patch will let us update the two Linux selftest drivers in subsequent patches with their respective SPDX license identifiers and let us remove repetitive license boiler plate. [0] https://github.com/copyleft-next/copyleft-next/blob/master/Releases/copyleft-next-0.3.1 [1] https://lore.kernel.org/lkml/1465929311-13509-1-git-send-email-mcgrof@kernel.org/ [2] https://lore.kernel.org/lkml/CA+55aFyhxcvD+q7tp+-yrSFDKfR0mOHgyEAe=f_94aKLsOu0Og@mail.gmail.com/ [3] https://lore.kernel.org/lkml/20170516232702.GL17314@wotan.suse.de/ [4] https://lkml.kernel.org/r/1495234558.7848.122.camel@linux.intel.com [5] https://lists.fedorahosted.org/archives/list/copyleft-next@lists.fedorahosted.org/thread/JTGV56DDADWGKU7ZKTZA4DLXTGTLNJ57/#SQMDIKBRAVDOCT4UVNOOCRGBN2UJIKHZ [6] https://spdx.org/licenses/copyleft-next-0.3.0.html [7] https://spdx.org/licenses/copyleft-next-0.3.1.html Cc: Goldwyn Rodrigues Cc: Kuno Woudt Cc: Richard Fontana Cc: copyleft-next@lists.fedorahosted.org Cc: Ciaran Farrell Cc: Christopher De Nicolo Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Jonathan Corbet Cc: Thorsten Leemhuis Cc: Andrew Morton Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain Reviewed-by: Tim Bird Signed-off-by: Greg Kroah-Hartman --- LICENSES/dual/copyleft-next-0.3.1 | 236 ++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 LICENSES/dual/copyleft-next-0.3.1 diff --git a/LICENSES/dual/copyleft-next-0.3.1 b/LICENSES/dual/copyleft-next-0.3.1 new file mode 100644 index 000000000000..c81acf710657 --- /dev/null +++ b/LICENSES/dual/copyleft-next-0.3.1 @@ -0,0 +1,236 @@ +Valid-License-Identifier: copyleft-next-0.3.1 +SPDX-URL: https://spdx.org/licenses/copyleft-next-0.3.1 +Usage-Guide: + copyleft-next-0.3.1 is explicitly compatible with GPLv2 (or later) and + can therefore be used for kernel code. Though the best and recommended + practice is to express this in the SPDX license identifier by + licensing the code under both licenses expressed by the OR operator. + To use the copyleft-next-0.3.1 license put the following SPDX tag/value + pair into a comment according to the placement guidelines in the + licensing rules documentation: + SPDX-License-Identifier: GPL-2.0-only OR copyleft-next 0.3.1 + SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +License-Text: + +======================================================================= + + copyleft-next 0.3.1 ("this License") + Release date: 2016-04-29 + +1. License Grants; No Trademark License + + Subject to the terms of this License, I grant You: + + a) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable + copyright license, to reproduce, Distribute, prepare derivative works + of, publicly perform and publicly display My Work. + + b) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable + patent license under Licensed Patents to make, have made, use, sell, + offer for sale, and import Covered Works. + + This License does not grant any rights in My name, trademarks, service + marks, or logos. + +2. Distribution: General Conditions + + You may Distribute Covered Works, provided that You (i) inform + recipients how they can obtain a copy of this License; (ii) satisfy the + applicable conditions of sections 3 through 6; and (iii) preserve all + Legal Notices contained in My Work (to the extent they remain + pertinent). "Legal Notices" means copyright notices, license notices, + license texts, and author attributions, but does not include logos, + other graphical images, trademarks or trademark legends. + +3. Conditions for Distributing Derived Works; Outbound GPL Compatibility + + If You Distribute a Derived Work, You must license the entire Derived + Work as a whole under this License, with prominent notice of such + licensing. This condition may not be avoided through such means as + separate Distribution of portions of the Derived Work. + + If the Derived Work includes material licensed under the GPL, You may + instead license the Derived Work under the GPL. + +4. Condition Against Further Restrictions; Inbound License Compatibility + + When Distributing a Covered Work, You may not impose further + restrictions on the exercise of rights in the Covered Work granted under + this License. This condition is not excused merely because such + restrictions result from Your compliance with conditions or obligations + extrinsic to this License (such as a court order or an agreement with a + third party). + + However, You may Distribute a Covered Work incorporating material + governed by a license that is both OSI-Approved and FSF-Free as of the + release date of this License, provided that compliance with such + other license would not conflict with any conditions stated in other + sections of this License. + +5. Conditions for Distributing Object Code + + You may Distribute an Object Code form of a Covered Work, provided that + you accompany the Object Code with a URL through which the Corresponding + Source is made available, at no charge, by some standard or customary + means of providing network access to source code. + + If you Distribute the Object Code in a physical product or tangible + storage medium ("Product"), the Corresponding Source must be available + through such URL for two years from the date of Your most recent + Distribution of the Object Code in the Product. However, if the Product + itself contains or is accompanied by the Corresponding Source (made + available in a customarily accessible manner), You need not also comply + with the first paragraph of this section. + + Each direct and indirect recipient of the Covered Work from You is an + intended third-party beneficiary of this License solely as to this + section 5, with the right to enforce its terms. + +6. Symmetrical Licensing Condition for Upstream Contributions + + If You Distribute a work to Me specifically for inclusion in or + modification of a Covered Work (a "Patch"), and no explicit licensing + terms apply to the Patch, You license the Patch under this License, to + the extent of Your copyright in the Patch. This condition does not + negate the other conditions of this License, if applicable to the Patch. + +7. Nullification of Copyleft/Proprietary Dual Licensing + + If I offer to license, for a fee, a Covered Work under terms other than + a license that is OSI-Approved or FSF-Free as of the release date of this + License or a numbered version of copyleft-next released by the + Copyleft-Next Project, then the license I grant You under section 1 is no + longer subject to the conditions in sections 3 through 5. + +8. Copyleft Sunset + + The conditions in sections 3 through 5 no longer apply once fifteen + years have elapsed from the date of My first Distribution of My Work + under this License. + +9. Pass-Through + + When You Distribute a Covered Work, the recipient automatically receives + a license to My Work from Me, subject to the terms of this License. + +10. Termination + + Your license grants under section 1 are automatically terminated if You + + a) fail to comply with the conditions of this License, unless You cure + such noncompliance within thirty days after becoming aware of it, or + + b) initiate a patent infringement litigation claim (excluding + declaratory judgment actions, counterclaims, and cross-claims) + alleging that any part of My Work directly or indirectly infringes + any patent. + + Termination of Your license grants extends to all copies of Covered + Works You subsequently obtain. Termination does not terminate the + rights of those who have received copies or rights from You subject to + this License. + + To the extent permission to make copies of a Covered Work is necessary + merely for running it, such permission is not terminable. + +11. Later License Versions + + The Copyleft-Next Project may release new versions of copyleft-next, + designated by a distinguishing version number ("Later Versions"). + Unless I explicitly remove the option of Distributing Covered Works + under Later Versions, You may Distribute Covered Works under any Later + Version. + +** 12. No Warranty ** +** ** +** My Work is provided "as-is", without warranty. You bear the risk ** +** of using it. To the extent permitted by applicable law, each ** +** Distributor of My Work excludes the implied warranties of title, ** +** merchantability, fitness for a particular purpose and ** +** non-infringement. ** + +** 13. Limitation of Liability ** +** ** +** To the extent permitted by applicable law, in no event will any ** +** Distributor of My Work be liable to You for any damages ** +** whatsoever, whether direct, indirect, special, incidental, or ** +** consequential damages, whether arising under contract, tort ** +** (including negligence), or otherwise, even where the Distributor ** +** knew or should have known about the possibility of such damages. ** + +14. Severability + + The invalidity or unenforceability of any provision of this License + does not affect the validity or enforceability of the remainder of + this License. Such provision is to be reformed to the minimum extent + necessary to make it valid and enforceable. + +15. Definitions + + "Copyleft-Next Project" means the project that maintains the source + code repository at + as of the release date of this License. + + "Corresponding Source" of a Covered Work in Object Code form means (i) + the Source Code form of the Covered Work; (ii) all scripts, + instructions and similar information that are reasonably necessary for + a skilled developer to generate such Object Code from the Source Code + provided under (i); and (iii) a list clearly identifying all Separate + Works (other than those provided in compliance with (ii)) that were + specifically used in building and (if applicable) installing the + Covered Work (for example, a specified proprietary compiler including + its version number). Corresponding Source must be machine-readable. + + "Covered Work" means My Work or a Derived Work. + + "Derived Work" means a work of authorship that copies from, modifies, + adapts, is based on, is a derivative work of, transforms, translates or + contains all or part of My Work, such that copyright permission is + required. The following are not Derived Works: (i) Mere Aggregation; + (ii) a mere reproduction of My Work; and (iii) if My Work fails to + explicitly state an expectation otherwise, a work that merely makes + reference to My Work. + + "Distribute" means to distribute, transfer or make a copy available to + someone else, such that copyright permission is required. + + "Distributor" means Me and anyone else who Distributes a Covered Work. + + "FSF-Free" means classified as 'free' by the Free Software Foundation. + + "GPL" means a version of the GNU General Public License or the GNU + Affero General Public License. + + "I"/"Me"/"My" refers to the individual or legal entity that places My + Work under this License. "You"/"Your" refers to the individual or legal + entity exercising rights in My Work under this License. A legal entity + includes each entity that controls, is controlled by, or is under + common control with such legal entity. "Control" means (a) the power to + direct the actions of such legal entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent of the + outstanding shares or beneficial ownership of such legal entity. + + "Licensed Patents" means all patent claims licensable royalty-free by + Me, now or in the future, that are necessarily infringed by making, + using, or selling My Work, and excludes claims that would be infringed + only as a consequence of further modification of My Work. + + "Mere Aggregation" means an aggregation of a Covered Work with a + Separate Work. + + "My Work" means the particular work of authorship I license to You + under this License. + + "Object Code" means any form of a work that is not Source Code. + + "OSI-Approved" means approved as 'Open Source' by the Open Source + Initiative. + + "Separate Work" means a work that is separate from and independent of a + particular Covered Work and is not by its nature an extension or + enhancement of the Covered Work, and/or a runtime library, standard + library or similar component that is used to generate an Object Code + form of a Covered Work. + + "Source Code" means the preferred form of a work for making + modifications to it. From 6cad1ecd4e3213d892b70afa999a81849d1f0206 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 3 Oct 2022 09:58:49 -0700 Subject: [PATCH 1006/4122] testing: use the copyleft-next-0.3.1 SPDX tag Two selftests drivers exist under the copyleft-next license. These drivers were added prior to SPDX practice taking full swing in the kernel. Now that we have an SPDX tag for copyleft-next-0.3.1 documented, embrace it and remove the boiler plate. Cc: Goldwyn Rodrigues Cc: Kuno Woudt Cc: Richard Fontana Cc: copyleft-next@lists.fedorahosted.org Cc: Ciaran Farrell Cc: Christopher De Nicolo Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Jonathan Corbet Cc: Thorsten Leemhuis Cc: Andrew Morton Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain Reviewed-by: Tim Bird Signed-off-by: Greg Kroah-Hartman --- lib/test_kmod.c | 12 +----------- lib/test_sysctl.c | 12 +----------- tools/testing/selftests/kmod/kmod.sh | 13 +------------ tools/testing/selftests/sysctl/sysctl.sh | 12 +----------- 4 files changed, 4 insertions(+), 45 deletions(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index cb800b1d0d99..6423df9fa8dd 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1,18 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 /* * kmod stress test driver * * Copyright (C) 2017 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or at your option any - * later version; or, when distributed separately from the Linux kernel or - * when incorporated into other software packages, subject to the following - * license: - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of copyleft-next (version 0.3.1 or later) as published - * at http://copyleft-next.org/. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 9a564971f539..e2a816d85ea2 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -1,18 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 /* * proc sysctl test driver * * Copyright (C) 2017 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or at your option any - * later version; or, when distributed separately from the Linux kernel or - * when incorporated into other software packages, subject to the following - * license: - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of copyleft-next (version 0.3.1 or later) as published - * at http://copyleft-next.org/. */ /* diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index afd42387e8b2..7189715d7960 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -1,18 +1,7 @@ #!/bin/bash -# +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez # -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. - # This is a stress test script for kmod, the kernel module loader. It uses # test_kmod which exposes a series of knobs for the API for us so we can # tweak each test in userspace rather than in kernelspace. diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index f50778a3d744..bfc54b422f25 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -1,16 +1,6 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. # This performs a series tests against the proc sysctl interface. From 5c30f3e4a6e67c88c979ad30554bf4ef9b24fbd0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 6 Nov 2022 19:46:35 -0800 Subject: [PATCH 1007/4122] tty: Move TIOCSTI toggle variable before kerndoc The variable "tty_legacy_tiocsti" should be defined before the kerndoc for the tiocsti() function. The new variable was breaking the "htmldocs" build target: drivers/tty/tty_io.c:2271: warning: cannot understand function prototype: 'bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); ' Fixes: 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/lkml/20221107143434.66f7be35@canb.auug.org.au Cc: Jiri Slaby Signed-off-by: Kees Cook Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20221107034631.never.637-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a6a16cf986b7..24ebcb495145 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2255,6 +2255,7 @@ static int tty_fasync(int fd, struct file *filp, int on) return retval; } +static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); /** * tiocsti - fake input character * @tty: tty to fake input into @@ -2268,7 +2269,6 @@ static int tty_fasync(int fd, struct file *filp, int on) * * Called functions take tty_ldiscs_lock * * current->signal->tty check is safe without locks */ -static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); static int tiocsti(struct tty_struct *tty, char __user *p) { char ch, mbz = 0; From 96a53586ec095dcc426df5bc6a568108dcfa8a0a Mon Sep 17 00:00:00 2001 From: Tanjuate Brunostar Date: Thu, 3 Nov 2022 07:34:22 +0000 Subject: [PATCH 1008/4122] staging: vt6655: change 2 variable names wFB_Opt0 and wFB_Opt1 These variables are named using Hungarian notation, which is not used in the Linux kernel. Signed-off-by: Tanjuate Brunostar Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/Y2Nu/i957Ill7r+d@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/rxtx.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/staging/vt6655/rxtx.c b/drivers/staging/vt6655/rxtx.c index baa3f6f1508f..522d34ca9b0f 100644 --- a/drivers/staging/vt6655/rxtx.c +++ b/drivers/staging/vt6655/rxtx.c @@ -59,12 +59,12 @@ static const unsigned short time_stamp_off[2][MAX_RATE] = { {384, 192, 130, 113, 54, 43, 37, 31, 28, 25, 24, 23}, /* Short Preamble */ }; -static const unsigned short wFB_Opt0[2][5] = { +static const unsigned short fb_opt0[2][5] = { {RATE_12M, RATE_18M, RATE_24M, RATE_36M, RATE_48M}, /* fallback_rate0 */ {RATE_12M, RATE_12M, RATE_18M, RATE_24M, RATE_36M}, /* fallback_rate1 */ }; -static const unsigned short wFB_Opt1[2][5] = { +static const unsigned short fb_opt1[2][5] = { {RATE_12M, RATE_18M, RATE_24M, RATE_24M, RATE_36M}, /* fallback_rate0 */ {RATE_6M, RATE_6M, RATE_12M, RATE_12M, RATE_18M}, /* fallback_rate1 */ }; @@ -310,9 +310,9 @@ s_uGetDataDuration( wRate -= RATE_18M; if (byFBOption == AUTO_FB_0) - wRate = wFB_Opt0[FB_RATE0][wRate]; + wRate = fb_opt0[FB_RATE0][wRate]; else - wRate = wFB_Opt1[FB_RATE0][wRate]; + wRate = fb_opt1[FB_RATE0][wRate]; uNextPktTime = s_uGetTxRsvTime(pDevice, byPktType, len, wRate, bNeedAck); @@ -365,52 +365,52 @@ s_uGetRTSCTSDuration( case RTSDUR_BA_F0: /* RTSDuration_ba_f0 */ uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopCCKBasicRate); if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); break; case RTSDUR_AA_F0: /* RTSDuration_aa_f0 */ uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopOFDMBasicRate); if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); break; case RTSDUR_BA_F1: /* RTSDuration_ba_f1 */ uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopCCKBasicRate); if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); break; case RTSDUR_AA_F1: /* RTSDuration_aa_f1 */ uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopOFDMBasicRate); if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); break; case CTSDUR_BA_F0: /* CTSDuration_ba_f0 */ if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); + uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck); break; case CTSDUR_BA_F1: /* CTSDuration_ba_f1 */ if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck); else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M)) - uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); + uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck); break; From f541335e830eb1abf3e4e260fcef98c429165aac Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 3 Nov 2022 04:06:59 +0530 Subject: [PATCH 1009/4122] staging: rtl8192e: Use min_t/max_t macros for variable comparison Simplify code by using min_t and max_t helper macros in place of lengthy if/else block oriented logical evaluation and value assignment. This issue is identified by coccicheck using the minmax.cocci file. Use the *_t variants of min/max macros to avoid compiler warnings about data typecast. Also, use u32 as type for min_t macro to avoid any truncation of data associated with enum constant HT_AGG_SIZE_32K. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/Y2LxC2kziM1TznhO@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_HTProc.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 62aa8e893c34..ccb86660ab48 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -587,17 +587,12 @@ void HTOnAssocRsp(struct rtllib_device *ieee) else pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_64K; } else { - if (pPeerHTCap->MaxRxAMPDUFactor < HT_AGG_SIZE_32K) - pHTInfo->CurrentAMPDUFactor = - pPeerHTCap->MaxRxAMPDUFactor; - else - pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_32K; + pHTInfo->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor, + HT_AGG_SIZE_32K); } } - if (pHTInfo->MPDU_Density > pPeerHTCap->MPDUDensity) - pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density; - else - pHTInfo->current_mpdu_density = pPeerHTCap->MPDUDensity; + pHTInfo->current_mpdu_density = max_t(u8, pHTInfo->MPDU_Density, + pPeerHTCap->MPDUDensity); if (pHTInfo->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) { pHTInfo->bCurrentAMPDUEnable = false; pHTInfo->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE; From 6c6ff293337b62c0c646d1ede2e9962f7501f9e4 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 3 Nov 2022 15:03:29 +0530 Subject: [PATCH 1010/4122] staging: rtl8723bs: Use min/max macros for variable comparison Simplify code by using min and max helper macros in place of lengthy if/else block oriented logical evaluation and value assignment. This issue is identified by coccicheck using the minmax.cocci file. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y2OK6fcIkH3S2/1f@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_wlan_util.c | 12 ++++-------- drivers/staging/rtl8723bs/hal/odm_DIG.c | 5 +---- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c index 18ba846c0b7b..ba39c8b1a9ae 100644 --- a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c +++ b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c @@ -986,15 +986,11 @@ void HT_caps_handler(struct adapter *padapter, struct ndis_80211_var_ie *pIE) pmlmeinfo->HT_caps.u.HT_cap[i] &= (pIE->data[i]); } else { /* modify from fw by Thomas 2010/11/17 */ - if ((pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3) > (pIE->data[i] & 0x3)) - max_AMPDU_len = (pIE->data[i] & 0x3); - else - max_AMPDU_len = (pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3); + max_AMPDU_len = min(pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3, + pIE->data[i] & 0x3); - if ((pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c) > (pIE->data[i] & 0x1c)) - min_MPDU_spacing = (pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c); - else - min_MPDU_spacing = (pIE->data[i] & 0x1c); + min_MPDU_spacing = max(pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c, + pIE->data[i] & 0x1c); pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para = max_AMPDU_len | min_MPDU_spacing; } diff --git a/drivers/staging/rtl8723bs/hal/odm_DIG.c b/drivers/staging/rtl8723bs/hal/odm_DIG.c index 07edf74ccfe5..97a51546463a 100644 --- a/drivers/staging/rtl8723bs/hal/odm_DIG.c +++ b/drivers/staging/rtl8723bs/hal/odm_DIG.c @@ -598,10 +598,7 @@ void odm_DIGbyRSSI_LPS(void *pDM_VOID) /* Lower bound checking */ /* RSSI Lower bound check */ - if ((pDM_Odm->RSSI_Min-10) > DM_DIG_MIN_NIC) - RSSI_Lower = pDM_Odm->RSSI_Min-10; - else - RSSI_Lower = DM_DIG_MIN_NIC; + RSSI_Lower = max(pDM_Odm->RSSI_Min - 10, DM_DIG_MIN_NIC); /* Upper and Lower Bound checking */ if (CurrentIGI > DM_DIG_MAX_NIC) From 84415762be0736efe7ed1b26d72a65ef9e0c6e58 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 3 Nov 2022 22:58:06 +0530 Subject: [PATCH 1011/4122] staging: r8188eu: remove unused ijk_matrix_regs_set implementation Instance IQKMatrixRegSetting of struct ijk_matrix_regs_set is initialised and its member variables are assigned values, but it is not used anywhere. Remove the structure and its unused implementation. Suggested-by: Dan Carpenter Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y2P6Jj+IcPss0wFd@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 8 -------- drivers/staging/r8188eu/include/odm.h | 7 ------- 2 files changed, 15 deletions(-) diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c index 622f95d3f2ed..26e710ef5134 100644 --- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c +++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c @@ -882,14 +882,6 @@ void PHY_IQCalibrate_8188E(struct adapter *adapt, bool recovery) if (RegE94 != 0) patha_fill_iqk(adapt, pathaok, result, final_candidate, (RegEA4 == 0)); -/* To Fix BSOD when final_candidate is 0xff */ -/* by sherry 20120321 */ - if (final_candidate < 4) { - for (i = 0; i < IQK_Matrix_REG_NUM; i++) - dm_odm->RFCalibrateInfo.IQKMatrixRegSetting.Value[0][i] = result[final_candidate][i]; - dm_odm->RFCalibrateInfo.IQKMatrixRegSetting.bIQKDone = true; - } - _PHY_SaveADDARegisters(adapt, IQK_BB_REG_92C, dm_odm->RFCalibrateInfo.IQK_BB_backup_recover, 9); } diff --git a/drivers/staging/r8188eu/include/odm.h b/drivers/staging/r8188eu/include/odm.h index 89b01dd614ba..8cea166b7b73 100644 --- a/drivers/staging/r8188eu/include/odm.h +++ b/drivers/staging/r8188eu/include/odm.h @@ -80,7 +80,6 @@ struct odm_rate_adapt { #define HP_THERMAL_NUM 8 #define AVG_THERMAL_NUM 8 -#define IQK_Matrix_REG_NUM 8 struct odm_phy_dbg_info { /* ODM Write,debug info */ @@ -164,11 +163,6 @@ struct odm_ra_info { u8 PTSmoothFactor; }; -struct ijk_matrix_regs_set { - bool bIQKDone; - s32 Value[1][IQK_Matrix_REG_NUM]; -}; - struct odm_rf_cal { /* for tx power tracking */ u32 RegA24; /* for TempCCK */ @@ -206,7 +200,6 @@ struct odm_rf_cal { u8 ThermalValue_HP[HP_THERMAL_NUM]; u8 ThermalValue_HP_index; - struct ijk_matrix_regs_set IQKMatrixRegSetting; u8 Delta_IQK; u8 Delta_LCK; From 1d6290639bfce477f71392392436d2aa830bca2e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 3 Nov 2022 13:06:19 +0000 Subject: [PATCH 1012/4122] staging: rtl8192e: rtl819x_HTProc: make arrays const and one static Make two dead-only arrays const. Make array EWC11NHTCap static const so it is not populated on the stack, makes the code smaller too. Signed-off-by: Colin Ian King Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/20221103130619.78413-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_HTProc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index ccb86660ab48..2c0a8d5c8f27 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -282,7 +282,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap, memset(posHTCap, 0, *len); if ((bAssoc) && (pHT->ePeerHTSpecVer == HT_SPEC_VER_EWC)) { - u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33}; + static const u8 EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 }; memcpy(posHTCap, EWC11NHTCap, sizeof(EWC11NHTCap)); pCapELE = (struct ht_capab_ele *)&posHTCap[4]; @@ -515,8 +515,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee) u16 nMaxAMSDUSize = 0; u8 *pMcsFilter = NULL; - static u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33}; - static u8 EWC11NHTInfo[] = {0x00, 0x90, 0x4c, 0x34}; + static const u8 EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 }; + static const u8 EWC11NHTInfo[] = { 0x00, 0x90, 0x4c, 0x34 }; if (!pHTInfo->bCurrentHTSupport) { netdev_warn(ieee->dev, "%s(): HT_DISABLE\n", __func__); From dcb18f5140ab7b4ea23417598c601b9468141d99 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Thu, 3 Nov 2022 23:06:32 +0000 Subject: [PATCH 1013/4122] staging: r8188eu: change return type of rtw_set_802_11_disassociate to void Change return type of rtw_set_802_11_disassociate to void. This function always returns 'true' no matter what, so there is no need to return a value, and no need to check for it in the two call sites within rtw_wx_set_mlme. Also, as we are no longer using ret in rtw_wx_set_mlme except as the return value, just remove it and return 0 directly. Suggested-by: Michael Straube Signed-off-by: Phillip Potter Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221103230632.6946-1-phil@philpotter.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_ioctl_set.c | 4 +--- drivers/staging/r8188eu/include/rtw_ioctl_set.h | 2 +- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 9 +++------ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_ioctl_set.c b/drivers/staging/r8188eu/core/rtw_ioctl_set.c index 786431826659..785c0dba508f 100644 --- a/drivers/staging/r8188eu/core/rtw_ioctl_set.c +++ b/drivers/staging/r8188eu/core/rtw_ioctl_set.c @@ -314,7 +314,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter, return true; } -u8 rtw_set_802_11_disassociate(struct adapter *padapter) +void rtw_set_802_11_disassociate(struct adapter *padapter) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; @@ -328,8 +328,6 @@ u8 rtw_set_802_11_disassociate(struct adapter *padapter) } spin_unlock_bh(&pmlmepriv->lock); - - return true; } u8 rtw_set_802_11_bssid_list_scan(struct adapter *padapter, struct ndis_802_11_ssid *pssid, int ssid_max_num) diff --git a/drivers/staging/r8188eu/include/rtw_ioctl_set.h b/drivers/staging/r8188eu/include/rtw_ioctl_set.h index abe460d6504d..c3eb2479f27b 100644 --- a/drivers/staging/r8188eu/include/rtw_ioctl_set.h +++ b/drivers/staging/r8188eu/include/rtw_ioctl_set.h @@ -12,7 +12,7 @@ u8 rtw_set_802_11_authentication_mode(struct adapter *adapt, enum ndis_802_11_auth_mode authmode); u8 rtw_set_802_11_bssid(struct adapter *adapter, u8 *bssid); u8 rtw_set_802_11_add_wep(struct adapter *adapter, struct ndis_802_11_wep *wep); -u8 rtw_set_802_11_disassociate(struct adapter *adapter); +void rtw_set_802_11_disassociate(struct adapter *adapter); u8 rtw_set_802_11_bssid_list_scan(struct adapter *adapter, struct ndis_802_11_ssid *pssid, int ssid_max_num); diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index dda48a2a6d0c..8e9b7b0664bc 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -1011,7 +1011,6 @@ static int rtw_wx_set_mlme(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { - int ret = 0; struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev); struct iw_mlme *mlme = (struct iw_mlme *)extra; @@ -1020,17 +1019,15 @@ static int rtw_wx_set_mlme(struct net_device *dev, switch (mlme->cmd) { case IW_MLME_DEAUTH: - if (!rtw_set_802_11_disassociate(padapter)) - ret = -1; + rtw_set_802_11_disassociate(padapter); break; case IW_MLME_DISASSOC: - if (!rtw_set_802_11_disassociate(padapter)) - ret = -1; + rtw_set_802_11_disassociate(padapter); break; default: return -EOPNOTSUPP; } - return ret; + return 0; } static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a, From bb5b5e2104ca000888df89e35ff42c677df22671 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 3 Nov 2022 20:27:50 -0500 Subject: [PATCH 1014/4122] staging: rtl8192e: Fix divide fault when calculating beacon age When the configuration parameter CONFIG_HZ is less that 100, the compiler generates an error as follows: ../drivers/staging/rtl8192e/rtllib_wx.c: In function 'rtl819x_translate_scan': ../drivers/staging/rtl8192e/rtllib_wx.c:220:57: warning: division by zero [-Wdiv-by-zero] 220 | (jiffies - network->last_scanned) / (HZ / 100)); | ^ In file included from ../include/linux/skbuff.h:45, from ../include/linux/if_ether.h:19, from ../include/linux/etherdevice.h:20, from ../drivers/staging/rtl8192e/rtllib_wx.c:18: ../drivers/staging/rtl8192e/rtllib_wx.c: In function 'rtllib_wx_get_scan': ../drivers/staging/rtl8192e/rtllib_wx.c:261:70: warning: division by zero [-Wdiv-by-zero] 261 | (jiffies - network->last_scanned) / | In fact, is HZ is not a multiple of 100, the calculation will be wrong, but it will compile correctly. The fix is to get rid of the (HZ / 100) portion. To decrease any round-off errors, the compiler is forced to perform the 100 * jiffies-difference before dividing by HZ. This patch is only compile tested. Reported-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: Larry Finger Tested-by: Philipp Hortmann Link: https://lore.kernel.org/r/20221104012750.2076-1-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_wx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_wx.c b/drivers/staging/rtl8192e/rtllib_wx.c index da2c41c9b92f..217426ee2e92 100644 --- a/drivers/staging/rtl8192e/rtllib_wx.c +++ b/drivers/staging/rtl8192e/rtllib_wx.c @@ -217,7 +217,7 @@ static inline char *rtl819x_translate_scan(struct rtllib_device *ieee, p = custom; p += scnprintf(p, MAX_CUSTOM_LEN - (p - custom), " Last beacon: %lums ago", - (jiffies - network->last_scanned) / (HZ / 100)); + (100 * (jiffies - network->last_scanned)) / HZ); iwe.u.data.length = p - custom; if (iwe.u.data.length) start = iwe_stream_add_point_rsl(info, start, stop, @@ -258,8 +258,8 @@ int rtllib_wx_get_scan(struct rtllib_device *ieee, escape_essid(network->ssid, network->ssid_len), network->bssid, - (jiffies - network->last_scanned) / - (HZ / 100)); + (100 * (jiffies - network->last_scanned)) / + HZ); } spin_unlock_irqrestore(&ieee->lock, flags); From 11454ddeb2f68a1c03b90f0c2bba18570f1a3019 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sat, 5 Nov 2022 16:45:52 +0530 Subject: [PATCH 1015/4122] staging: rtl8723bs: replace underutilized struct by array variable For structure iqk_matrix_regs_setting, only the "Value" member variable is utilized whereas the other struct members are only declared but not utilised. Replace the struct declaration and implementation by an equivalent variable similar to the only used struct member variable. While in there, update the macro mixed case names to uppercase style. The resultant code is simpler and is easy to maintain. Suggested-by: Dan Carpenter Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y2ZF6O1KU3zZ6r3C@qemulion Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8723bs/hal/HalPhyRf_8723B.c | 21 +++++++++---------- drivers/staging/rtl8723bs/hal/odm.h | 12 +++-------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c b/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c index a52748f7b56e..22e33b97800d 100644 --- a/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c +++ b/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c @@ -244,8 +244,8 @@ void ODM_TxPwrTrackSetPwr_8723B( Final_CCK_Swing_Index = 0; setIqkMatrix_8723B(pDM_Odm, Final_OFDM_Swing_Index, RFPath, - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0], - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]); + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0], + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]); setCCKFilterCoefficient(pDM_Odm, Final_CCK_Swing_Index); @@ -257,8 +257,8 @@ void ODM_TxPwrTrackSetPwr_8723B( pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = Final_OFDM_Swing_Index - PwrTrackingLimit_OFDM; setIqkMatrix_8723B(pDM_Odm, PwrTrackingLimit_OFDM, RFPath, - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0], - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]); + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0], + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]); pDM_Odm->Modify_TxAGC_Flag_PathA = true; PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, OFDM); @@ -267,16 +267,16 @@ void ODM_TxPwrTrackSetPwr_8723B( pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = Final_OFDM_Swing_Index; setIqkMatrix_8723B(pDM_Odm, 0, RFPath, - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0], - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]); + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0], + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]); pDM_Odm->Modify_TxAGC_Flag_PathA = true; PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, OFDM); PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, HT_MCS0_MCS7); } else { setIqkMatrix_8723B(pDM_Odm, Final_OFDM_Swing_Index, RFPath, - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0], - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]); + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0], + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]); if (pDM_Odm->Modify_TxAGC_Flag_PathA) { /* If TxAGC has changed, reset TxAGC again */ pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = 0; @@ -1759,9 +1759,8 @@ void PHY_IQCalibrate_8723B( /* To Fix BSOD when final_candidate is 0xff */ /* by sherry 20120321 */ if (final_candidate < 4) { - for (i = 0; i < IQK_Matrix_REG_NUM; i++) - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[0].Value[0][i] = result[final_candidate][i]; - pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[0].bIQKDone = true; + for (i = 0; i < IQK_MATRIX_REG_NUM; i++) + pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[0][i] = result[final_candidate][i]; } _PHY_SaveADDARegisters8723B(padapter, IQK_BB_REG_92C, pDM_Odm->RFCalibrateInfo.IQK_BB_backup_recover, 9); diff --git a/drivers/staging/rtl8723bs/hal/odm.h b/drivers/staging/rtl8723bs/hal/odm.h index fe9782d2d4fd..f5c804a1b9d5 100644 --- a/drivers/staging/rtl8723bs/hal/odm.h +++ b/drivers/staging/rtl8723bs/hal/odm.h @@ -193,8 +193,8 @@ struct odm_rate_adaptive { #define HP_THERMAL_NUM 8 #define AVG_THERMAL_NUM 8 -#define IQK_Matrix_REG_NUM 8 -#define IQK_Matrix_Settings_NUM 14 /* Channels_2_4G_NUM */ +#define IQK_MATRIX_REG_NUM 8 +#define IQK_MATRIX_SETTINGS_NUM 14 /* Channels_2_4G_NUM */ #define DM_Type_ByFW 0 #define DM_Type_ByDriver 1 @@ -479,12 +479,6 @@ enum odm_type_alna_e { /* tag_ODM_TYPE_ALNA_Definition */ TYPE_ALNA3 = BIT(3)|BIT(2)|BIT(1)|BIT(0) }; -struct iqk_matrix_regs_setting { /* _IQK_MATRIX_REGS_SETTING */ - bool bIQKDone; - s32 Value[3][IQK_Matrix_REG_NUM]; - bool bBWIqkResultSaved[3]; -}; - /* Remove PATHDIV_PARA struct to odm_PathDiv.h */ struct odm_rf_cal_t { /* ODM_RF_Calibration_Structure */ @@ -530,7 +524,7 @@ struct odm_rf_cal_t { /* ODM_RF_Calibration_Structure */ u8 ThermalValue_HP[HP_THERMAL_NUM]; u8 ThermalValue_HP_index; - struct iqk_matrix_regs_setting IQKMatrixRegSetting[IQK_Matrix_Settings_NUM]; + s32 iqk_matrix_regs_setting_value[IQK_MATRIX_SETTINGS_NUM][IQK_MATRIX_REG_NUM]; bool bNeedIQK; bool bIQKInProgress; u8 Delta_IQK; From f6f213ba49107be5ea2e4aad9ffea9e844c58a2a Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sun, 6 Nov 2022 15:51:58 +0530 Subject: [PATCH 1016/4122] staging: emxx_udc: use min helper macro for variable comparison Simplify code by using min helper macros in place of lengthy if/else block oriented logical evaluation and value assignment. This issue is identified by coccicheck using the minmax.cocci file. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y2eKxoUWtbPY/88b@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/emxx_udc/emxx_udc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c index b6abd3770e81..b4e19174bef2 100644 --- a/drivers/staging/emxx_udc/emxx_udc.c +++ b/drivers/staging/emxx_udc/emxx_udc.c @@ -1004,10 +1004,7 @@ static int _nbu2ss_in_dma(struct nbu2ss_udc *udc, struct nbu2ss_ep *ep, /* MAX Packet Size */ mpkt = _nbu2ss_readl(&preg->EP_REGS[num].EP_PCKT_ADRS) & EPN_MPKT; - if ((DMA_MAX_COUNT * mpkt) < length) - i_write_length = DMA_MAX_COUNT * mpkt; - else - i_write_length = length; + i_write_length = min(DMA_MAX_COUNT * mpkt, length); /*------------------------------------------------------------*/ /* Number of transmission packets */ From 38aa1741c3b373fb021b2d2ab1f5f07bec03fbd3 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 3 Nov 2022 02:23:18 +0530 Subject: [PATCH 1017/4122] staging: r8188eu: remove unused struct declarations Some structures are only declared but have not been used anywhere in the code. Remove such unused structs. Issue identified as part of coccicheck report driven code investigation. Suggested-by: Pavel Skripkin Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/Y2LYvhr74ng+xFbz@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/wlan_bssdef.h | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/drivers/staging/r8188eu/include/wlan_bssdef.h b/drivers/staging/r8188eu/include/wlan_bssdef.h index 831c465df500..ffeafa19ef26 100644 --- a/drivers/staging/r8188eu/include/wlan_bssdef.h +++ b/drivers/staging/r8188eu/include/wlan_bssdef.h @@ -177,20 +177,6 @@ struct ndis_802_11_status_ind { /* MIC check time, 60 seconds. */ #define MIC_CHECK_TIME 60000000 -struct ndis_802_11_auth_evt { - struct ndis_802_11_status_ind Status; - struct ndis_802_11_auth_req Request[1]; -}; - -struct ndis_802_11_test { - u32 Length; - u32 Type; - union { - struct ndis_802_11_auth_evt AuthenticationEvent; - NDIS_802_11_RSSI RssiTrigger; - } tt; -}; - #ifndef Ndis802_11APMode #define Ndis802_11APMode (Ndis802_11InfrastructureMax+1) #endif @@ -279,34 +265,6 @@ enum UAPSD_MAX_SP { #define NUM_PRE_AUTH_KEY 16 #define NUM_PMKID_CACHE NUM_PRE_AUTH_KEY -/* -* WPA2 -*/ - -struct pmkid_candidate { - unsigned char BSSID[ETH_ALEN]; - u32 Flags; -}; - -struct ndis_802_11_pmkid_list { - u32 Version; /* Version of the structure */ - u32 NumCandidates; /* No. of pmkid candidates */ - struct pmkid_candidate CandidateList[1]; -}; - -struct ndis_802_11_auth_encrypt { - enum ndis_802_11_auth_mode AuthModeSupported; - enum ndis_802_11_wep_status EncryptStatusSupported; -}; - -struct ndis_802_11_cap { - u32 Length; - u32 Version; - u32 NoOfPMKIDs; - u32 NoOfAuthEncryptPairsSupported; - struct ndis_802_11_auth_encrypt AuthenticationEncryptionSupported[1]; -}; - u8 key_2char2num(u8 hch, u8 lch); u8 key_char2num(u8 ch); u8 str_2char2num(u8 hch, u8 lch); From 30534c72bac17a6c160017229ba38fcd0cc21ac6 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sat, 5 Nov 2022 10:39:16 +0100 Subject: [PATCH 1018/4122] staging: r8188eu: convert three functions to bool The functions is_client_associated_to_ap() is_client_associated_to_ibss() is_IBSS_empty() return boolean values. Convert their return type to bool and replace _FAIL, which is defined as 0, with false. Another step to get rid of _SUCCESS / _FAIL. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221105093916.8255-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_wlan_util.c | 18 +++++++++--------- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c index e50631848cab..c95438a12b59 100644 --- a/drivers/staging/r8188eu/core/rtw_wlan_util.c +++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c @@ -331,35 +331,35 @@ u16 get_beacon_interval(struct wlan_bssid_ex *bss) return le16_to_cpu(val); } -int is_client_associated_to_ap(struct adapter *padapter) +bool is_client_associated_to_ap(struct adapter *padapter) { struct mlme_ext_priv *pmlmeext; struct mlme_ext_info *pmlmeinfo; if (!padapter) - return _FAIL; + return false; pmlmeext = &padapter->mlmeextpriv; pmlmeinfo = &pmlmeext->mlmext_info; if ((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && ((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE)) return true; - else - return _FAIL; + + return false; } -int is_client_associated_to_ibss(struct adapter *padapter) +bool is_client_associated_to_ibss(struct adapter *padapter) { struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; if ((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE)) return true; - else - return _FAIL; + + return false; } -int is_IBSS_empty(struct adapter *padapter) +bool is_IBSS_empty(struct adapter *padapter) { unsigned int i; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -367,7 +367,7 @@ int is_IBSS_empty(struct adapter *padapter) for (i = IBSS_START_MAC_ID; i < NUM_STA; i++) { if (pmlmeinfo->FW_sta_info[i].status == 1) - return _FAIL; + return false; } return true; } diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index e234a3b9af6f..7652e72a03f4 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -432,9 +432,9 @@ void update_network(struct wlan_bssid_ex *dst, struct wlan_bssid_ex *src, u8 *get_my_bssid(struct wlan_bssid_ex *pnetwork); u16 get_beacon_interval(struct wlan_bssid_ex *bss); -int is_client_associated_to_ap(struct adapter *padapter); -int is_client_associated_to_ibss(struct adapter *padapter); -int is_IBSS_empty(struct adapter *padapter); +bool is_client_associated_to_ap(struct adapter *padapter); +bool is_client_associated_to_ibss(struct adapter *padapter); +bool is_IBSS_empty(struct adapter *padapter); unsigned char check_assoc_AP(u8 *pframe, uint len); From cc027b3e4087e27d23e5c1d778e802c35f4c987d Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:53 +0100 Subject: [PATCH 1019/4122] staging: r8188eu: don't store addba request There's no need to store an incoming addba request in struct mlme_ext_info. We only need the addba request to copy some of its fields into our addba response. It's simpler to pass the incoming request's management frame to issue_action_BA as an additional parameter. issue_action_BA can then extract the required fields. If issue_action_BA prepares a request rather than a response, the caller sets the parameter for the incoming request to NULL. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 25 ++++++++----------- .../staging/r8188eu/include/rtw_mlme_ext.h | 4 +-- drivers/staging/r8188eu/include/wifi.h | 8 ------ 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 6679d4037d6b..324757699716 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1486,11 +1486,9 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; struct sta_info *psta = NULL; struct recv_reorder_ctrl *preorder_ctrl; - unsigned char *frame_body; unsigned short tid; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - u8 *pframe = precv_frame->rx_data; struct sta_priv *pstapriv = &padapter->stapriv; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) @@ -1501,23 +1499,19 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra if (!psta) return; - frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr)); - if (!pmlmeinfo->HT_enable) return; /* All union members start with an action code, it's ok to use addba_req. */ switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: - memcpy(&pmlmeinfo->ADDBA_req, &frame_body[2], sizeof(struct ADDBA_request)); tid = u16_get_bits(le16_to_cpu(mgmt->u.action.u.addba_req.capab), IEEE80211_ADDBA_PARAM_TID_MASK); preorder_ctrl = &psta->recvreorder_ctrl[tid]; preorder_ctrl->indicate_seq = 0xffff; preorder_ctrl->enable = pmlmeinfo->bAcceptAddbaReq; - issue_action_BA(padapter, mgmt->sa, WLAN_ACTION_ADDBA_RESP, pmlmeinfo->bAcceptAddbaReq ? - WLAN_STATUS_SUCCESS : WLAN_STATUS_REQUEST_DECLINED); + WLAN_STATUS_SUCCESS : WLAN_STATUS_REQUEST_DECLINED, mgmt); break; case WLAN_ACTION_ADDBA_RESP: tid = u16_get_bits(le16_to_cpu(mgmt->u.action.u.addba_resp.capab), @@ -5377,7 +5371,8 @@ exit: return ret; } -void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, u16 status) +void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, + u16 status, struct ieee80211_mgmt *mgmt_req) { u16 start_seq; u16 BA_starting_seqctrl = 0; @@ -5446,13 +5441,13 @@ void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, break; case WLAN_ACTION_ADDBA_RESP: mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = pmlmeinfo->ADDBA_req.dialog_token; + mgmt->u.action.u.addba_resp.dialog_token = mgmt_req->u.action.u.addba_req.dialog_token; mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - capab = le16_to_cpu(pmlmeinfo->ADDBA_req.BA_para_set) & 0x3f; + capab = le16_to_cpu(mgmt_req->u.action.u.addba_req.capab) & 0x3f; capab |= u16_encode_bits(64, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); capab |= u16_encode_bits(pregpriv->ampdu_amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK); mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = pmlmeinfo->ADDBA_req.BA_timeout_value; + mgmt->u.action.u.addba_resp.timeout = mgmt_req->u.action.u.addba_req.timeout; pattrib->pktlen = offsetofend(struct ieee80211_mgmt, u.action.u.addba_resp.timeout); break; case WLAN_ACTION_DELBA: @@ -5620,7 +5615,8 @@ unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr) if (initiator == 0) { /* recipient */ for (tid = 0; tid < MAXTID; tid++) { if (psta->recvreorder_ctrl[tid].enable) { - issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, (((tid << 1) | initiator) & 0x1F)); + issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, + (((tid << 1) | initiator) & 0x1F), NULL); psta->recvreorder_ctrl[tid].enable = false; psta->recvreorder_ctrl[tid].indicate_seq = 0xffff; } @@ -5628,7 +5624,8 @@ unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr) } else if (initiator == 1) { /* originator */ for (tid = 0; tid < MAXTID; tid++) { if (psta->htpriv.agg_enable_bitmap & BIT(tid)) { - issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, (((tid << 1) | initiator) & 0x1F)); + issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, + (((tid << 1) | initiator) & 0x1F), NULL); psta->htpriv.agg_enable_bitmap &= ~BIT(tid); psta->htpriv.candidate_tid_bitmap &= ~BIT(tid); } @@ -7683,7 +7680,7 @@ u8 add_ba_hdl(struct adapter *padapter, unsigned char *pbuf) if (((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && (pmlmeinfo->HT_enable)) || ((pmlmeinfo->state & 0x03) == WIFI_FW_AP_STATE)) { - issue_action_BA(padapter, pparm->addr, WLAN_ACTION_ADDBA_REQ, (u16)pparm->tid); + issue_action_BA(padapter, pparm->addr, WLAN_ACTION_ADDBA_REQ, (u16)pparm->tid, NULL); _set_timer(&psta->addba_retry_timer, ADDBA_TO); } else { psta->htpriv.candidate_tid_bitmap &= ~BIT(pparm->tid); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 7652e72a03f4..9c0af4704607 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -285,7 +285,6 @@ struct mlme_ext_info { u8 bwmode_updated; u8 hidden_ssid_mode; - struct ADDBA_request ADDBA_req; struct WMM_para_element WMM_param; struct HT_caps_element HT_caps; struct HT_info_element HT_info; @@ -523,7 +522,8 @@ int issue_deauth(struct adapter *padapter, unsigned char *da, unsigned short reason); int issue_deauth_ex(struct adapter *padapter, u8 *da, unsigned short reason, int try_cnt, int wait_ms); -void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, u16 status); +void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, + u16 status, struct ieee80211_mgmt *mgmt_req); unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr); unsigned int send_beacon(struct adapter *padapter); bool get_beacon_valid_bit(struct adapter *adapter); diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index 92a584a8b6c0..2381c519ceaf 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -430,14 +430,6 @@ struct WMM_para_element { struct AC_param ac_param[4]; } __packed; -struct ADDBA_request { - unsigned char action_code; - unsigned char dialog_token; - __le16 BA_para_set; - __le16 BA_timeout_value; - __le16 BA_starting_seqctrl; -} __packed; - #define MAX_AMPDU_FACTOR_64K 3 /* Spatial Multiplexing Power Save Modes */ From 4142c442f2597c806d6fdcaf253aa38bfea4233c Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:54 +0100 Subject: [PATCH 1020/4122] staging: r8188eu: remove some obsolete comments Remove obsolete comments in validate_recv_data_frame. There is no prxcmd variable (any more?). Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_recv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index 94f85cd7038d..cb0f35d7ab98 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -1065,8 +1065,6 @@ static int validate_recv_data_frame(struct adapter *adapter, if (!psta) return _FAIL; - /* psta->rssi = prxcmd->rssi; */ - /* psta->signal_quality = prxcmd->sq; */ precv_frame->psta = psta; pattrib->amsdu = 0; From 8554c4768a36da65ca35a397e4d9c6285b8daf5e Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:55 +0100 Subject: [PATCH 1021/4122] staging: r8188eu: reorder assignments, clarify the header format Reorder some of the assignments in update_recvframe_attrib_88e. This should make it a bit easier to understand the format of the header that is added by the chip's firmware. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c index 9a61eef8550b..d1ac2960f1c4 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c @@ -66,28 +66,25 @@ void update_recvframe_attrib_88e(struct recv_frame *precvframe, struct recv_stat if (pattrib->pkt_rpt_type == NORMAL_RX) { pattrib->pkt_len = le32_to_cpu(prxstat->rxdw0) & 0x00003fff; + pattrib->icv_err = (le32_to_cpu(prxstat->rxdw0) >> 15) & 0x1; pattrib->drvinfo_sz = ((le32_to_cpu(prxstat->rxdw0) >> 16) & 0xf) * 8; - - pattrib->physt = (le32_to_cpu(prxstat->rxdw0) >> 26) & 0x1; - - pattrib->bdecrypted = (le32_to_cpu(prxstat->rxdw0) & BIT(27)) ? 0 : 1; - pattrib->encrypt = (le32_to_cpu(prxstat->rxdw0) >> 20) & 0x7; - + pattrib->encrypt = (u8)((le32_to_cpu(prxstat->rxdw0) >> 20) & 0x7); pattrib->qos = (le32_to_cpu(prxstat->rxdw0) >> 23) & 0x1; - pattrib->priority = (le32_to_cpu(prxstat->rxdw1) >> 8) & 0xf; + pattrib->shift_sz = (le32_to_cpu(prxstat->rxdw0) >> 24) & 0x3; + pattrib->physt = (le32_to_cpu(prxstat->rxdw0) >> 26) & 0x1; + pattrib->bdecrypted = (le32_to_cpu(prxstat->rxdw0) & BIT(27)) ? 0 : 1; + pattrib->priority = (le32_to_cpu(prxstat->rxdw1) >> 8) & 0xf; pattrib->amsdu = (le32_to_cpu(prxstat->rxdw1) >> 13) & 0x1; + pattrib->mdata = (le32_to_cpu(prxstat->rxdw1) >> 26) & 0x1; + pattrib->mfrag = (le32_to_cpu(prxstat->rxdw1) >> 27) & 0x1; pattrib->seq_num = le32_to_cpu(prxstat->rxdw2) & 0x00000fff; pattrib->frag_num = (le32_to_cpu(prxstat->rxdw2) >> 12) & 0xf; - pattrib->mfrag = (le32_to_cpu(prxstat->rxdw1) >> 27) & 0x1; - pattrib->mdata = (le32_to_cpu(prxstat->rxdw1) >> 26) & 0x1; pattrib->mcs_rate = le32_to_cpu(prxstat->rxdw3) & 0x3f; pattrib->rxht = (le32_to_cpu(prxstat->rxdw3) >> 6) & 0x1; - pattrib->icv_err = (le32_to_cpu(prxstat->rxdw0) >> 15) & 0x1; - pattrib->shift_sz = (le32_to_cpu(prxstat->rxdw0) >> 24) & 0x3; } else if (pattrib->pkt_rpt_type == TX_REPORT1) { /* CCX */ pattrib->pkt_len = TX_RPT1_PKT_LEN; } else if (pattrib->pkt_rpt_type == TX_REPORT2) { From 2dbc324eaab66098ae485114b0862653355a0a2b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:56 +0100 Subject: [PATCH 1022/4122] staging: r8188eu: reformat a function header The definition of Hal_EfuseParseIDCode88E can be a single line. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/rtl8188e_hal_init.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c index cc29963f4b49..73855bca76fe 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c @@ -676,11 +676,7 @@ s32 InitLLTTable(struct adapter *padapter, u8 txpktbuf_bndy) return status; } -void -Hal_EfuseParseIDCode88E( - struct adapter *padapter, - u8 *hwinfo - ) +void Hal_EfuseParseIDCode88E(struct adapter *padapter, u8 *hwinfo) { struct eeprom_priv *pEEPROM = &padapter->eeprompriv; struct net_device *netdev = padapter->pnetdev; From d44fa4cb2e30fc22b2763fd425f2c48be958354f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:57 +0100 Subject: [PATCH 1023/4122] staging: r8188eu: remove state checks in rtw_led_control There's no need to check for bSurpriseRemoved or bDriverStopped in the rtw_led_control function. This function schedules a delayed worker which calls SwLedOn or SwLedOff (or the function calls SwLedOff directly). SwLedOn and SwLedOff check bDriverStopped themselves or they initiate a USB control transfer via usb_write, where bSurpriseRemoved is checked. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_led.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index ce8de2eb7845..48725ce9d369 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c +++ b/drivers/staging/r8188eu/core/rtw_led.c @@ -140,8 +140,7 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction) struct registry_priv *registry_par; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; - if ((padapter->bSurpriseRemoved) || (padapter->bDriverStopped) || - (!padapter->hw_init_completed)) + if (!padapter->hw_init_completed) return; if (!pLed->bRegUseLed) From b69373787637e9b38f73ff9f8b7b03e9d9767839 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:58 +0100 Subject: [PATCH 1024/4122] staging: r8188eu: clean up rtw_hal_init Clean up the rtw_hal_init function. Remove the status variable. Exit immediately for errors. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/hal/hal_intf.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/hal/hal_intf.c b/drivers/staging/r8188eu/hal/hal_intf.c index 37935aef71ea..13790e32f11c 100644 --- a/drivers/staging/r8188eu/hal/hal_intf.c +++ b/drivers/staging/r8188eu/hal/hal_intf.c @@ -6,24 +6,19 @@ #include "../include/drv_types.h" #include "../include/hal_intf.h" -uint rtw_hal_init(struct adapter *adapt) +uint rtw_hal_init(struct adapter *adapt) { - uint status = _SUCCESS; - adapt->hw_init_completed = false; - status = rtl8188eu_hal_init(adapt); + if (rtl8188eu_hal_init(adapt) != _SUCCESS) + return _FAIL; - if (status == _SUCCESS) { - adapt->hw_init_completed = true; + adapt->hw_init_completed = true; - if (adapt->registrypriv.notch_filter == 1) - hal_notch_filter_8188e(adapt, 1); - } else { - adapt->hw_init_completed = false; - } + if (adapt->registrypriv.notch_filter == 1) + hal_notch_filter_8188e(adapt, 1); - return status; + return _SUCCESS; } uint rtw_hal_deinit(struct adapter *adapt) From b78ec61d9478d438bc9e723214cf609ac669d132 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:48:59 +0100 Subject: [PATCH 1025/4122] staging: r8188eu: remove get_fwstate The get_fwstate function is not used. Remove it. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/rtw_mlme.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h index ca539c652f26..268f898b151b 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme.h +++ b/drivers/staging/r8188eu/include/rtw_mlme.h @@ -443,11 +443,6 @@ static inline bool check_fwstate(struct mlme_priv *pmlmepriv, int state) return false; } -static inline int get_fwstate(struct mlme_priv *pmlmepriv) -{ - return pmlmepriv->fw_state; -} - /* * No Limit on the calling context, * therefore set it to be the critical section... From 32265aaf61c6bf399a7ed2c06d75a7d00dd5500e Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:49:00 +0100 Subject: [PATCH 1026/4122] staging: r8188eu: merge two rtw_free_network_nolock functions Remove the _rtw_free_network_nolock function and merge it into rtw_free_network_nolock, which is its only caller. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme.c | 23 ++++++++-------------- drivers/staging/r8188eu/include/rtw_mlme.h | 2 -- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c index a47ae33454b3..b272123626ac 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme.c +++ b/drivers/staging/r8188eu/core/rtw_mlme.c @@ -76,19 +76,6 @@ void _rtw_free_network(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwor spin_unlock_bh(&free_queue->lock); } -void _rtw_free_network_nolock(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork) -{ - struct __queue *free_queue = &pmlmepriv->free_bss_pool; - - if (!pnetwork) - return; - if (pnetwork->fixed) - return; - list_del_init(&pnetwork->list); - list_add_tail(&pnetwork->list, get_list_head(free_queue)); - pmlmepriv->num_of_scanned--; -} - /* return the wlan_network with the matching addr @@ -307,9 +294,15 @@ exit: static void rtw_free_network_nolock(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork) { + struct __queue *free_queue = &pmlmepriv->free_bss_pool; - _rtw_free_network_nolock(pmlmepriv, pnetwork); - + if (!pnetwork) + return; + if (pnetwork->fixed) + return; + list_del_init(&pnetwork->list); + list_add_tail(&pnetwork->list, get_list_head(free_queue)); + pmlmepriv->num_of_scanned--; } void rtw_free_network_queue(struct adapter *dev, u8 isfreeall) diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h index 268f898b151b..3ff653ff1d81 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme.h +++ b/drivers/staging/r8188eu/include/rtw_mlme.h @@ -539,8 +539,6 @@ struct wlan_network *rtw_alloc_network(struct mlme_priv *pmlmepriv); void _rtw_free_network(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork, u8 isfreeall); -void _rtw_free_network_nolock(struct mlme_priv *pmlmepriv, - struct wlan_network *pnetwork); struct wlan_network *_rtw_find_network(struct __queue *scanned_queue, u8 *addr); From c12b5b5614ac39c66d6dcf8f367620e614343b25 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 6 Nov 2022 13:49:01 +0100 Subject: [PATCH 1027/4122] staging: r8188eu: remove checks in dump_mgntframe There's no need to check for bSurpriseRemoved and bDriverStopped in dump_mgmtframe. The sequence of function calls is dump_mgntframe rtl8188eu_mgnt_xmit rtw_dump_xframe loop over all fragments For each fragment, rtw_write_port is called. This function checks bSurpriseRemoved and bDriverStopped. Signed-off-by: Martin Kaiser Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106124901.720785-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 324757699716..17803aca83c8 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3959,9 +3959,6 @@ void update_mgntframe_attrib(struct adapter *padapter, struct pkt_attrib *pattri void dump_mgntframe(struct adapter *padapter, struct xmit_frame *pmgntframe) { - if (padapter->bSurpriseRemoved || padapter->bDriverStopped) - return; - rtl8188eu_mgnt_xmit(padapter, pmgntframe); } From 2b62e1b447c643de8bb063d64348e4d000f3c557 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sun, 6 Nov 2022 14:34:43 +0100 Subject: [PATCH 1028/4122] staging: r8188eu: clean up error handling in rtw_start_drv_threads() Convert the error handling in the function rtw_start_drv_threads() to the common logic used in the kernel. Another step to get rid of _FAIL and _SUCCESS which uses inverted logic. Signed-off-by: Michael Straube Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/20221106133443.8872-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/include/osdep_intf.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h index 0f7d74a3ff6d..6d66cb57225e 100644 --- a/drivers/staging/r8188eu/include/osdep_intf.h +++ b/drivers/staging/r8188eu/include/osdep_intf.h @@ -46,7 +46,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter); void rtw_free_drv_sw(struct adapter *padapter); void rtw_reset_drv_sw(struct adapter *padapter); -u32 rtw_start_drv_threads(struct adapter *padapter); +int rtw_start_drv_threads(struct adapter *padapter); void rtw_stop_drv_threads (struct adapter *padapter); void rtw_cancel_all_timer(struct adapter *padapter); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 970f380bac96..66556e07ed93 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -363,18 +363,16 @@ struct net_device *rtw_init_netdev(struct adapter *old_padapter) return pnetdev; } -u32 rtw_start_drv_threads(struct adapter *padapter) +int rtw_start_drv_threads(struct adapter *padapter) { - u32 _status = _SUCCESS; - padapter->cmdThread = kthread_run(rtw_cmd_thread, padapter, "RTW_CMD_THREAD"); if (IS_ERR(padapter->cmdThread)) - _status = _FAIL; - else - /* wait for rtw_cmd_thread() to start running */ - wait_for_completion(&padapter->cmdpriv.start_cmd_thread); + return PTR_ERR(padapter->cmdThread); - return _status; + /* wait for rtw_cmd_thread() to start running */ + wait_for_completion(&padapter->cmdpriv.start_cmd_thread); + + return 0; } void rtw_stop_drv_threads(struct adapter *padapter) @@ -627,8 +625,7 @@ static int _netdev_open(struct net_device *pnetdev) netdev_dbg(pnetdev, "MAC Address = %pM\n", pnetdev->dev_addr); - status = rtw_start_drv_threads(padapter); - if (status == _FAIL) { + if (rtw_start_drv_threads(padapter)) { pr_info("Initialize driver software resource Failed!\n"); goto netdev_open_error; } From c1a84fe1f431cb3de6763aabddcf6073d5053f9d Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sun, 6 Nov 2022 23:25:37 +0530 Subject: [PATCH 1029/4122] staging: r8188eu: simplify complex pointer casting Pointers to structures udphdr and dhcpMessage are derived by casting adjacent pointers with size_t. Such typecast of pointer using size_t is not preferred. The code looks complex and delicate. Simplify such casting by utilizing generic "void *" casting. Suggested-by: Joe Perches Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/1428580d09a9916899209c9278dca40ee2d297d3.1667755987.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index a23f7df373ed..4deaa7e352a3 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -610,12 +610,11 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb) struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN); if (iph->protocol == IPPROTO_UDP) { /* UDP */ - struct udphdr *udph = (struct udphdr *)((size_t)iph + (iph->ihl << 2)); + struct udphdr *udph = (void *)iph + (iph->ihl << 2); if ((udph->source == htons(CLIENT_PORT)) && (udph->dest == htons(SERVER_PORT))) { /* DHCP request */ - struct dhcpMessage *dhcph = - (struct dhcpMessage *)((size_t)udph + sizeof(struct udphdr)); + struct dhcpMessage *dhcph = (void *)udph + sizeof(struct udphdr); u32 cookie = be32_to_cpu((__be32)dhcph->cookie); if (cookie == DHCP_MAGIC) { /* match magic word */ From a57ef65a09a0214776675dcf931ace146724b63c Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sun, 6 Nov 2022 23:27:12 +0530 Subject: [PATCH 1030/4122] staging: r8188eu: remove unnecessary casting The dhcpMessage struct member variable "cookie" is already declared to be of type __be32. There is no need to cast it again as __be32. Signed-off-by: Deepak R Varma Tested-by: Philipp Hortmann # Edimax N150 Link: https://lore.kernel.org/r/c333e22349c5c347c740b425330b35830b969fa9.1667755987.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index 4deaa7e352a3..a7c67014dde0 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -615,7 +615,7 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb) if ((udph->source == htons(CLIENT_PORT)) && (udph->dest == htons(SERVER_PORT))) { /* DHCP request */ struct dhcpMessage *dhcph = (void *)udph + sizeof(struct udphdr); - u32 cookie = be32_to_cpu((__be32)dhcph->cookie); + u32 cookie = be32_to_cpu(dhcph->cookie); if (cookie == DHCP_MAGIC) { /* match magic word */ if (!(dhcph->flags & htons(BROADCAST_FLAG))) { From d54b6ac14d3f1ac033fe88a7f9935ba9f1f47077 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 7 Nov 2022 21:28:22 +0100 Subject: [PATCH 1031/4122] staging: r8188eu: use a qos_hdr in validate_recv_data_frame Define a struct ieee80211_qos_hdr in the validate_recv_data_frame function. Use this struct to replace some numeric offsets and make the code easier to understand. Reported-by: Dan Carpenter Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221107202824.61431-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_recv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index cb0f35d7ab98..5b0a66aebff1 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -1032,7 +1032,6 @@ static int validate_recv_data_frame(struct adapter *adapter, struct recv_frame *precv_frame) { struct sta_info *psta = NULL; - u8 *ptr = precv_frame->rx_data; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data; struct rx_pkt_attrib *pattrib = &precv_frame->attrib; struct security_priv *psecuritypriv = &adapter->securitypriv; @@ -1071,10 +1070,12 @@ static int validate_recv_data_frame(struct adapter *adapter, pattrib->ack_policy = 0; /* parsing QC field */ if (pattrib->qos) { + struct ieee80211_qos_hdr *qos_hdr = (struct ieee80211_qos_hdr *)hdr; + pattrib->priority = ieee80211_get_tid(hdr); - pattrib->ack_policy = GetAckpolicy((ptr + 24)); - pattrib->amsdu = GetAMsdu((ptr + 24)); - pattrib->hdrlen = 26; + pattrib->ack_policy = GetAckpolicy(&qos_hdr->qos_ctrl); + pattrib->amsdu = GetAMsdu(&qos_hdr->qos_ctrl); + pattrib->hdrlen = sizeof(*qos_hdr); if (pattrib->priority != 0 && pattrib->priority != 3) adapter->recvpriv.bIsAnyNonBEPkts = true; From fa2a8d4284225fbc0ee3db4760d6d7339586d61d Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 7 Nov 2022 21:28:23 +0100 Subject: [PATCH 1032/4122] staging: r8188eu: drop another removal/stop check There's no need to check bDriverStopped and bSurpriseRemoved in issue_probereq_ex. The code path looks like issue_probereq_ex _issue_probereq dump_mgntframe or dump_mgntframe_and_wait_ack All paths from dump_mgntframe check the two variables. dump_mgntframe_and_wait_ack contains a check as well. Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221107202824.61431-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 17803aca83c8..bfd6afd7266e 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -4496,9 +4496,6 @@ int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, i++; - if (padapter->bDriverStopped || padapter->bSurpriseRemoved) - break; - if (i < try_cnt && wait_ms > 0 && ret == _FAIL) msleep(wait_ms); From a614e753e8e7f3322b560dcf6eaf44468ec22b3a Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 7 Nov 2022 21:28:24 +0100 Subject: [PATCH 1033/4122] staging: r8188eu: drop removal/stop check in dump_mgntframe_and_wait_ack We can remove the checks for bDriverStopped and bSurpriseRemoved in dump_mgntframe_and_wait_ack. The code path from this function looks like dump_mgntframe_and_wait_ack rtl8188eu_mgnt_xmit rtw_dump_xframe loop over all fragments rtw_write_port is called for each fragment. bSurpriseRemoved and bDriverStopped are checked in rtw_write_port. Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221107202824.61431-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index bfd6afd7266e..be33489d3dfd 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3988,9 +3988,6 @@ s32 dump_mgntframe_and_wait_ack(struct adapter *padapter, struct xmit_frame *pmg u32 timeout_ms = 500;/* 500ms */ struct xmit_priv *pxmitpriv = &padapter->xmitpriv; - if (padapter->bSurpriseRemoved || padapter->bDriverStopped) - return -1; - mutex_lock(&pxmitpriv->ack_tx_mutex); pxmitpriv->ack_tx = true; From 0ce0f9d0785a7ba5637a22b63332cf747772da2a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Nov 2022 00:05:06 +0100 Subject: [PATCH 1034/4122] usb: phy: phy-gpio-vbus-usb: Add device tree probing Make it possible to probe the GPIO VBUS detection driver from the device tree compatible for GPIO USB B connectors. Since this driver is using the "gpio-usb-b-connector" compatible, it is important to discern it from the role switch connector driver (which does not provide a phy), so we add some Kconfig text and depend on !USB_CONN_GPIO. Cc: Rob Herring Cc: Prashant Malani Cc: Felipe Balbi Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221106230506.1646101-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/Kconfig | 6 +++++- drivers/usb/phy/phy-gpio-vbus-usb.c | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 2acbe41fbf7e..efdcafdbe46d 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -93,12 +93,16 @@ config USB_GPIO_VBUS tristate "GPIO based peripheral-only VBUS sensing 'transceiver'" depends on GPIOLIB || COMPILE_TEST depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' + depends on !USB_CONN_GPIO select USB_PHY help Provides simple GPIO VBUS sensing for controllers with an internal transceiver via the usb_phy interface, and optionally control of a D+ pullup GPIO as well as a VBUS - current limit regulator. + current limit regulator. This driver is for devices that do + NOT support role switch. OTG devices that can do role switch + (master/peripheral) shall use the USB based connection + detection driver USB_CONN_GPIO. config OMAP_OTG tristate "OMAP USB OTG controller driver" diff --git a/drivers/usb/phy/phy-gpio-vbus-usb.c b/drivers/usb/phy/phy-gpio-vbus-usb.c index f13f5530746c..12dfeff7de3d 100644 --- a/drivers/usb/phy/phy-gpio-vbus-usb.c +++ b/drivers/usb/phy/phy-gpio-vbus-usb.c @@ -366,12 +366,24 @@ static const struct dev_pm_ops gpio_vbus_dev_pm_ops = { MODULE_ALIAS("platform:gpio-vbus"); +/* + * NOTE: this driver matches against "gpio-usb-b-connector" for + * devices that do NOT support role switch. + */ +static const struct of_device_id gpio_vbus_of_match[] = { + { + .compatible = "gpio-usb-b-connector", + }, + {}, +}; + static struct platform_driver gpio_vbus_driver = { .driver = { .name = "gpio-vbus", #ifdef CONFIG_PM .pm = &gpio_vbus_dev_pm_ops, #endif + .of_match_table = gpio_vbus_of_match, }, .probe = gpio_vbus_probe, .remove = gpio_vbus_remove, From 04914233561377fc0369b984c9d19ec1b6ce2845 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 7 Nov 2022 16:37:55 -0800 Subject: [PATCH 1035/4122] usb: dwc3: gadget: Reduce TRB IOC settings When the TRB ring is full, the dwc3 driver must make sure that there's at least 1 TRB with Interrupt On Completion (IOC) set to notify of available TRBs. The current logic just sets the TRB's IOC whenever we run out of TRBs, but it doesn't consider that there may be other TRBs with IOC/LST set already. This creates more events and unnecessary delay from interrupt handling. Only forcefully set IOC when we run out of TRBs and none of the TRBs in the TRB ring has had IOC set. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/72a1fa448eb1201b152e65be7902a5d1c75b9f3a.1667867687.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5fe2d136dff5..ecddb144871b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1463,8 +1463,18 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, */ if (num_trbs_left == 1 || (needs_extra_trb && num_trbs_left <= 2 && - sg_dma_len(sg_next(s)) >= length)) - must_interrupt = true; + sg_dma_len(sg_next(s)) >= length)) { + struct dwc3_request *r; + + /* Check if previous requests already set IOC */ + list_for_each_entry(r, &dep->started_list, list) { + if (r != req && !r->request.no_interrupt) + break; + + if (r == req) + must_interrupt = true; + } + } dwc3_prepare_one_trb(dep, req, trb_length, 1, i, false, must_interrupt); From 430d57f53eb1cdbf9ba9bbd397317912b3cd2de5 Mon Sep 17 00:00:00 2001 From: Ray Chi Date: Mon, 7 Nov 2022 15:27:54 +0800 Subject: [PATCH 1036/4122] usb: core: stop USB enumeration if too many retries When a broken USB accessory connects to a USB host, usbcore might keep doing enumeration retries. If the host has a watchdog mechanism, the kernel panic will happen on the host. This patch provides an attribute early_stop to limit the numbers of retries for each port of a hub. If a port was marked with early_stop attribute, unsuccessful connection attempts will fail quickly. In addition, if an early_stop port has failed to initialize, it will ignore all future connection events until early_stop attribute is clear. Signed-off-by: Ray Chi Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20221107072754.3336357-1-raychi@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-usb | 11 +++++ drivers/usb/core/hub.c | 60 +++++++++++++++++++++++++ drivers/usb/core/hub.h | 4 ++ drivers/usb/core/port.c | 27 +++++++++++ 4 files changed, 102 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index 568103d3376e..545c2dd97ed0 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb @@ -264,6 +264,17 @@ Description: attached to the port will not be detected, initialized, or enumerated. +What: /sys/bus/usb/devices/...//port/early_stop +Date: Sep 2022 +Contact: Ray Chi +Description: + Some USB hosts have some watchdog mechanisms so that the device + may enter ramdump if it takes a long time during port initialization. + This attribute allows each port just has two attempts so that the + port initialization will be failed quickly. In addition, if a port + which is marked with early_stop has failed to initialize, it will ignore + all future connections until this attribute is clear. + What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout Date: May 2013 Contact: Mathias Nyman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bbab424b0d55..77e73fc8d673 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3081,6 +3081,48 @@ done: return status; } +/* + * hub_port_stop_enumerate - stop USB enumeration or ignore port events + * @hub: target hub + * @port1: port num of the port + * @retries: port retries number of hub_port_init() + * + * Return: + * true: ignore port actions/events or give up connection attempts. + * false: keep original behavior. + * + * This function will be based on retries to check whether the port which is + * marked with early_stop attribute would stop enumeration or ignore events. + * + * Note: + * This function didn't change anything if early_stop is not set, and it will + * prevent all connection attempts when early_stop is set and the attempts of + * the port are more than 1. + */ +static bool hub_port_stop_enumerate(struct usb_hub *hub, int port1, int retries) +{ + struct usb_port *port_dev = hub->ports[port1 - 1]; + + if (port_dev->early_stop) { + if (port_dev->ignore_event) + return true; + + /* + * We want unsuccessful attempts to fail quickly. + * Since some devices may need one failure during + * port initialization, we allow two tries but no + * more. + */ + if (retries < 2) + return false; + + port_dev->ignore_event = 1; + } else + port_dev->ignore_event = 0; + + return port_dev->ignore_event; +} + /* Check if a port is power on */ int usb_port_is_power_on(struct usb_hub *hub, unsigned int portstatus) { @@ -4796,6 +4838,11 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, do_new_scheme = use_new_scheme(udev, retry_counter, port_dev); for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) { + if (hub_port_stop_enumerate(hub, port1, retries)) { + retval = -ENODEV; + break; + } + if (do_new_scheme) { struct usb_device_descriptor *buf; int r = 0; @@ -5246,6 +5293,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, status = 0; for (i = 0; i < PORT_INIT_TRIES; i++) { + if (hub_port_stop_enumerate(hub, port1, i)) { + status = -ENODEV; + break; + } + usb_lock_port(port_dev); mutex_lock(hcd->address0_mutex); retry_locked = true; @@ -5614,6 +5666,10 @@ static void port_event(struct usb_hub *hub, int port1) if (!pm_runtime_active(&port_dev->dev)) return; + /* skip port actions if ignore_event and early_stop are true */ + if (port_dev->ignore_event && port_dev->early_stop) + return; + if (hub_handle_remote_wakeup(hub, port1, portstatus, portchange)) connect_change = 1; @@ -5927,6 +5983,10 @@ static int usb_reset_and_verify_device(struct usb_device *udev) mutex_lock(hcd->address0_mutex); for (i = 0; i < PORT_INIT_TRIES; ++i) { + if (hub_port_stop_enumerate(parent_hub, port1, i)) { + ret = -ENODEV; + break; + } /* ep0 maxpacket size may change; let the HCD know about it. * Other endpoints will be handled by re-enumeration. */ diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index b2925856b4cb..e23833562e4f 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -90,6 +90,8 @@ struct usb_hub { * @is_superspeed cache super-speed status * @usb3_lpm_u1_permit: whether USB3 U1 LPM is permitted. * @usb3_lpm_u2_permit: whether USB3 U2 LPM is permitted. + * @early_stop: whether port initialization will be stopped earlier. + * @ignore_event: whether events of the port are ignored. */ struct usb_port { struct usb_device *child; @@ -103,6 +105,8 @@ struct usb_port { u32 over_current_count; u8 portnum; u32 quirks; + unsigned int early_stop:1; + unsigned int ignore_event:1; unsigned int is_superspeed:1; unsigned int usb3_lpm_u1_permit:1; unsigned int usb3_lpm_u2_permit:1; diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 015204fc67a1..06a8f1f84f6f 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -18,6 +18,32 @@ static int usb_port_block_power_off; static const struct attribute_group *port_dev_group[]; +static ssize_t early_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_port *port_dev = to_usb_port(dev); + + return sysfs_emit(buf, "%s\n", port_dev->early_stop ? "yes" : "no"); +} + +static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct usb_port *port_dev = to_usb_port(dev); + bool value; + + if (kstrtobool(buf, &value)) + return -EINVAL; + + if (value) + port_dev->early_stop = 1; + else + port_dev->early_stop = 0; + + return count; +} +static DEVICE_ATTR_RW(early_stop); + static ssize_t disable_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -237,6 +263,7 @@ static struct attribute *port_dev_attrs[] = { &dev_attr_quirks.attr, &dev_attr_over_current_count.attr, &dev_attr_disable.attr, + &dev_attr_early_stop.attr, NULL, }; From c14f7ccc9f5dcf9d06ddeec706f85405b2c80600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 14 Jul 2022 20:41:30 +0200 Subject: [PATCH 1037/4122] PCI: Assign PCI domain IDs by ida_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace assignment of PCI domain IDs from atomic_inc_return() to ida_alloc(). Use two IDAs, one for static domain allocations (those which are defined in device tree) and second for dynamic allocations (all other). During removal of root bus / host bridge, also release the domain ID. The released ID can be reused again, for example when dynamically loading and unloading native PCI host bridge drivers. This change also allows to mix static device tree assignment and dynamic by kernel as all static allocations are reserved in dynamic pool. [bhelgaas: set "err" if "bus->domain_nr < 0"] Link: https://lore.kernel.org/r/20220714184130.5436-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 107 +++++++++++++++++++++++++------------------ drivers/pci/probe.c | 7 +++ drivers/pci/remove.c | 6 +++ include/linux/pci.h | 1 + 4 files changed, 76 insertions(+), 45 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2127aba3550b..9f3cc829dfee 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6743,60 +6743,70 @@ static void pci_no_domains(void) } #ifdef CONFIG_PCI_DOMAINS_GENERIC -static atomic_t __domain_nr = ATOMIC_INIT(-1); +static DEFINE_IDA(pci_domain_nr_static_ida); +static DEFINE_IDA(pci_domain_nr_dynamic_ida); -static int pci_get_new_domain_nr(void) +static void of_pci_reserve_static_domain_nr(void) { - return atomic_inc_return(&__domain_nr); + struct device_node *np; + int domain_nr; + + for_each_node_by_type(np, "pci") { + domain_nr = of_get_pci_domain_nr(np); + if (domain_nr < 0) + continue; + /* + * Permanently allocate domain_nr in dynamic_ida + * to prevent it from dynamic allocation. + */ + ida_alloc_range(&pci_domain_nr_dynamic_ida, + domain_nr, domain_nr, GFP_KERNEL); + } } static int of_pci_bus_find_domain_nr(struct device *parent) { - static int use_dt_domains = -1; - int domain = -1; + static bool static_domains_reserved = false; + int domain_nr; - if (parent) - domain = of_get_pci_domain_nr(parent->of_node); - - /* - * Check DT domain and use_dt_domains values. - * - * If DT domain property is valid (domain >= 0) and - * use_dt_domains != 0, the DT assignment is valid since this means - * we have not previously allocated a domain number by using - * pci_get_new_domain_nr(); we should also update use_dt_domains to - * 1, to indicate that we have just assigned a domain number from - * DT. - * - * If DT domain property value is not valid (ie domain < 0), and we - * have not previously assigned a domain number from DT - * (use_dt_domains != 1) we should assign a domain number by - * using the: - * - * pci_get_new_domain_nr() - * - * API and update the use_dt_domains value to keep track of method we - * are using to assign domain numbers (use_dt_domains = 0). - * - * All other combinations imply we have a platform that is trying - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(), - * which is a recipe for domain mishandling and it is prevented by - * invalidating the domain value (domain = -1) and printing a - * corresponding error. - */ - if (domain >= 0 && use_dt_domains) { - use_dt_domains = 1; - } else if (domain < 0 && use_dt_domains != 1) { - use_dt_domains = 0; - domain = pci_get_new_domain_nr(); - } else { - if (parent) - pr_err("Node %pOF has ", parent->of_node); - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n"); - domain = -1; + /* On the first call scan device tree for static allocations. */ + if (!static_domains_reserved) { + of_pci_reserve_static_domain_nr(); + static_domains_reserved = true; } - return domain; + if (parent) { + /* + * If domain is in DT, allocate it in static IDA. This + * prevents duplicate static allocations in case of errors + * in DT. + */ + domain_nr = of_get_pci_domain_nr(parent->of_node); + if (domain_nr >= 0) + return ida_alloc_range(&pci_domain_nr_static_ida, + domain_nr, domain_nr, + GFP_KERNEL); + } + + /* + * If domain was not specified in DT, choose a free ID from dynamic + * allocations. All domain numbers from DT are permanently in + * dynamic allocations to prevent assigning them to other DT nodes + * without static domain. + */ + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL); +} + +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (bus->domain_nr < 0) + return; + + /* Release domain from IDA where it was allocated. */ + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr) + ida_free(&pci_domain_nr_static_ida, bus->domain_nr); + else + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr); } int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) @@ -6804,6 +6814,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) return acpi_disabled ? of_pci_bus_find_domain_nr(parent) : acpi_pci_bus_find_domain_nr(bus); } + +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (!acpi_disabled) + return; + of_pci_bus_release_domain_nr(bus, parent); +} #endif /** diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 1d6f7b502020..1e234189aff1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -906,6 +906,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) bus->domain_nr = pci_bus_find_domain_nr(bus, parent); else bus->domain_nr = bridge->domain_nr; + if (bus->domain_nr < 0) { + err = bus->domain_nr; + goto free; + } #endif b = pci_find_bus(pci_domain_nr(bus), bridge->busnr); @@ -1030,6 +1034,9 @@ unregister: device_del(&bridge->dev); free: +#ifdef CONFIG_PCI_DOMAINS_GENERIC + pci_bus_release_domain_nr(bus, parent); +#endif kfree(bus); return err; } diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 4c54c75050dc..0145aef1b930 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus) pci_remove_bus(bus); host_bridge->bus = NULL; +#ifdef CONFIG_PCI_DOMAINS_GENERIC + /* Release domain_nr if it was dynamically allocated */ + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) + pci_bus_release_domain_nr(bus, host_bridge->dev.parent); +#endif + /* remove the host bridge */ device_del(&host_bridge->dev); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 2bda4a4e47e8..28af4414f789 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1726,6 +1726,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) { return 0; } #endif int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent); #endif /* Some architectures require additional setup to direct VGA traffic */ From a9dfc46c67b52ad43b8e335e28f4cf8002c67793 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 5 Nov 2022 12:01:14 +0900 Subject: [PATCH 1038/4122] perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data DWARF version 5 standard Sec 2.14 says that Any debugging information entry representing the declaration of an object, module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and DW_AT_decl_column attributes, each of whose value is an unsigned integer constant. So it should be an unsigned integer data. Also, even though the standard doesn't clearly say the DW_AT_call_file is signed or unsigned, the elfutils (eu-readelf) interprets it as unsigned integer data and it is natural to handle it as unsigned integer data as same as DW_AT_decl_file. This changes the DW_AT_call_file as unsigned integer data too. Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 30b36b525681..b07414409771 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, return 0; } -/* Get attribute and translate it as a sdata */ -static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, - Dwarf_Sword *result) -{ - Dwarf_Attribute attr; - - if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || - dwarf_formsdata(&attr, result) != 0) - return -ENOENT; - - return 0; -} - /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) /* Get the call file index number in CU DIE */ static int die_get_call_fileno(Dwarf_Die *in_die) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) return (int)idx; else return -ENOENT; @@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) /* Get the declared file index number in CU DIE */ static int die_get_decl_fileno(Dwarf_Die *pdie) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) return (int)idx; else return -ENOENT; From 57a196a58421a4b0c45949ae7309f21829aaa77f Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sun, 18 Sep 2022 19:13:48 -0700 Subject: [PATCH 1039/4122] hugetlb: simplify hugetlb handling in follow_page_mask During discussions of this series [1], it was suggested that hugetlb handling code in follow_page_mask could be simplified. At the beginning of follow_page_mask, there currently is a call to follow_huge_addr which 'may' handle hugetlb pages. ia64 is the only architecture which provides a follow_huge_addr routine that does not return error. Instead, at each level of the page table a check is made for a hugetlb entry. If a hugetlb entry is found, a call to a routine associated with that entry is made. Currently, there are two checks for hugetlb entries at each page table level. The first check is of the form: if (p?d_huge()) page = follow_huge_p?d(); the second check is of the form: if (is_hugepd()) page = follow_huge_pd(). We can replace these checks, as well as the special handling routines such as follow_huge_p?d() and follow_huge_pd() with a single routine to handle hugetlb vmas. A new routine hugetlb_follow_page_mask is called for hugetlb vmas at the beginning of follow_page_mask. hugetlb_follow_page_mask will use the existing routine huge_pte_offset to walk page tables looking for hugetlb entries. huge_pte_offset can be overwritten by architectures, and already handles special cases such as hugepd entries. [1] https://lore.kernel.org/linux-mm/cover.1661240170.git.baolin.wang@linux.alibaba.com/ [mike.kravetz@oracle.com: remove vma (pmd sharing) per Peter] Link: https://lkml.kernel.org/r/20221028181108.119432-1-mike.kravetz@oracle.com [mike.kravetz@oracle.com: remove left over hugetlb_vma_unlock_read()] Link: https://lkml.kernel.org/r/20221030225825.40872-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20220919021348.22151-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Aneesh Kumar K.V Cc: Christophe Leroy Cc: Michael Ellerman Cc: Muchun Song Cc: Naoya Horiguchi Signed-off-by: Andrew Morton --- arch/ia64/mm/hugetlbpage.c | 15 --- arch/powerpc/mm/hugetlbpage.c | 37 -------- include/linux/hugetlb.h | 50 ++-------- mm/gup.c | 80 +++------------- mm/hugetlb.c | 172 +++++++++++----------------------- 5 files changed, 76 insertions(+), 278 deletions(-) diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index f993cb36c062..380d2f3966c9 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -91,21 +91,6 @@ int prepare_hugepage_range(struct file *file, return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) -{ - struct page *page; - pte_t *ptep; - - if (REGION_NUMBER(addr) != RGN_HPAGE) - return ERR_PTR(-EINVAL); - - ptep = huge_pte_offset(mm, addr, HPAGE_SIZE); - if (!ptep || pte_none(*ptep)) - return NULL; - page = pte_page(*ptep); - page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} int pmd_huge(pmd_t pmd) { return 0; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5852a86d990d..f1ba8d1e8c1a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -506,43 +506,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, } while (addr = next, addr != end); } -struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, - int flags, int pdshift) -{ - pte_t *ptep; - spinlock_t *ptl; - struct page *page = NULL; - unsigned long mask; - int shift = hugepd_shift(hpd); - struct mm_struct *mm = vma->vm_mm; - -retry: - /* - * hugepage directory entries are protected by mm->page_table_lock - * Use this instead of huge_pte_lockptr - */ - ptl = &mm->page_table_lock; - spin_lock(ptl); - - ptep = hugepte_offset(hpd, address, pdshift); - if (pte_present(*ptep)) { - mask = (1UL << shift) - 1; - page = pte_page(*ptep); - page += ((address & mask) >> PAGE_SHIFT); - if (flags & FOLL_GET) - get_page(page); - } else { - if (is_hugetlb_entry_migration(*ptep)) { - spin_unlock(ptl); - __migration_entry_wait(mm, ptep, ptl); - goto retry; - } - } - spin_unlock(ptl); - return page; -} - bool __init arch_hugetlb_valid_size(unsigned long size) { int shift = __ffs(size); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8b4f93e84868..4a76c0fc6bbf 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -149,6 +149,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); +struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, unsigned long *, long, unsigned int, @@ -209,17 +211,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write); -struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, - int flags, int pdshift); -struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, - int flags); -struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int flags); -struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, - pgd_t *pgd, int flags); void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); @@ -272,6 +263,12 @@ static inline void adjust_range_if_pmd_sharing_possible( { } +static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ +} + static inline long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, @@ -282,12 +279,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm, return 0; } -static inline struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - static inline int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *dst_vma, @@ -320,31 +311,6 @@ static inline void hugetlb_show_meminfo_node(int nid) { } -static inline struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, int flags, - int pdshift) -{ - return NULL; -} - -static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, - unsigned long address, int flags) -{ - return NULL; -} - -static inline struct page *follow_huge_pud(struct mm_struct *mm, - unsigned long address, pud_t *pud, int flags) -{ - return NULL; -} - -static inline struct page *follow_huge_pgd(struct mm_struct *mm, - unsigned long address, pgd_t *pgd, int flags) -{ - return NULL; -} - static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { diff --git a/mm/gup.c b/mm/gup.c index fe195d47de74..6b16aecf5d2c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -537,18 +537,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); - - /* - * Considering PTE level hugetlb, like continuous-PTE hugetlb on - * ARM64 architecture. - */ - if (is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd_pte(vma, address, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); @@ -680,20 +668,6 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, pmdval = READ_ONCE(*pmd); if (pmd_none(pmdval)) return no_page_table(vma, flags); - if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd_pte(vma, address, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pmd_val(pmdval)))) { - page = follow_huge_pd(vma, address, - __hugepd(pmd_val(pmdval)), flags, - PMD_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } retry: if (!pmd_present(pmdval)) { /* @@ -783,20 +757,6 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, pud = pud_offset(p4dp, address); if (pud_none(*pud)) return no_page_table(vma, flags); - if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pud(mm, address, pud, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pud_val(*pud)))) { - page = follow_huge_pd(vma, address, - __hugepd(pud_val(*pud)), flags, - PUD_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } if (pud_devmap(*pud)) { ptl = pud_lock(mm, pud); page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap); @@ -816,7 +776,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, struct follow_page_context *ctx) { p4d_t *p4d; - struct page *page; p4d = p4d_offset(pgdp, address); if (p4d_none(*p4d)) @@ -825,14 +784,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, if (unlikely(p4d_bad(*p4d))) return no_page_table(vma, flags); - if (is_hugepd(__hugepd(p4d_val(*p4d)))) { - page = follow_huge_pd(vma, address, - __hugepd(p4d_val(*p4d)), flags, - P4D_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } return follow_pud_mask(vma, address, p4d, flags, ctx); } @@ -870,10 +821,18 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, ctx->page_mask = 0; - /* make this handle hugepd */ - page = follow_huge_addr(mm, address, flags & FOLL_WRITE); - if (!IS_ERR(page)) { - WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN)); + /* + * Call hugetlb_follow_page_mask for hugetlb vmas as it will use + * special hugetlb page table walking code. This eliminates the + * need to check for hugetlb entries in the general walking code. + * + * hugetlb_follow_page_mask is only for follow_page() handling here. + * Ordinary GUP uses follow_hugetlb_page for hugetlb processing. + */ + if (is_vm_hugetlb_page(vma)) { + page = hugetlb_follow_page_mask(vma, address, flags); + if (!page) + page = no_page_table(vma, flags); return page; } @@ -882,21 +841,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); - if (pgd_huge(*pgd)) { - page = follow_huge_pgd(mm, address, pgd, flags); - if (page) - return page; - return no_page_table(vma, flags); - } - if (is_hugepd(__hugepd(pgd_val(*pgd)))) { - page = follow_huge_pd(vma, address, - __hugepd(pgd_val(*pgd)), flags, - PGDIR_SHIFT); - if (page) - return page; - return no_page_table(vma, flags); - } - return follow_p4d_mask(vma, address, pgd, flags, ctx); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..0af18c1e4b31 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6209,6 +6209,62 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, return false; } +struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; + unsigned long haddr = address & huge_page_mask(h); + struct page *page = NULL; + spinlock_t *ptl; + pte_t *pte, entry; + + /* + * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via + * follow_hugetlb_page(). + */ + if (WARN_ON_ONCE(flags & FOLL_PIN)) + return NULL; + +retry: + pte = huge_pte_offset(mm, haddr, huge_page_size(h)); + if (!pte) + return NULL; + + ptl = huge_pte_lock(h, mm, pte); + entry = huge_ptep_get(pte); + if (pte_present(entry)) { + page = pte_page(entry) + + ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); + /* + * Note that page may be a sub-page, and with vmemmap + * optimizations the page struct may be read only. + * try_grab_page() will increase the ref count on the + * head page, so this will be OK. + * + * try_grab_page() should always succeed here, because we hold + * the ptl lock and have verified pte_present(). + */ + if (WARN_ON_ONCE(!try_grab_page(page, flags))) { + page = NULL; + goto out; + } + } else { + if (is_hugetlb_entry_migration(entry)) { + spin_unlock(ptl); + __migration_entry_wait_huge(pte, ptl); + goto retry; + } + /* + * hwpoisoned entry is treated as no_page_table in + * follow_page_mask(). + */ + } +out: + spin_unlock(ptl); + return page; +} + long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -7201,122 +7257,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) * These functions are overwritable if your architecture needs its own * behavior. */ -struct page * __weak -follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - -struct page * __weak -follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, int flags, int pdshift) -{ - WARN(1, "hugepd follow called with no support for hugepage directory format\n"); - return NULL; -} - -struct page * __weak -follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) -{ - struct hstate *h = hstate_vma(vma); - struct mm_struct *mm = vma->vm_mm; - struct page *page = NULL; - spinlock_t *ptl; - pte_t *ptep, pte; - - /* - * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via - * follow_hugetlb_page(). - */ - if (WARN_ON_ONCE(flags & FOLL_PIN)) - return NULL; - -retry: - ptep = huge_pte_offset(mm, address, huge_page_size(h)); - if (!ptep) - return NULL; - - ptl = huge_pte_lock(h, mm, ptep); - pte = huge_ptep_get(ptep); - if (pte_present(pte)) { - page = pte_page(pte) + - ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); - /* - * try_grab_page() should always succeed here, because: a) we - * hold the pmd (ptl) lock, and b) we've just checked that the - * huge pmd (head) page is present in the page tables. The ptl - * prevents the head page and tail pages from being rearranged - * in any way. So this page must be available at this point, - * unless the page refcount overflowed: - */ - if (WARN_ON_ONCE(!try_grab_page(page, flags))) { - page = NULL; - goto out; - } - } else { - if (is_hugetlb_entry_migration(pte)) { - spin_unlock(ptl); - __migration_entry_wait_huge(ptep, ptl); - goto retry; - } - /* - * hwpoisoned entry is treated as no_page_table in - * follow_page_mask(). - */ - } -out: - spin_unlock(ptl); - return page; -} - -struct page * __weak -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int flags) -{ - struct page *page = NULL; - spinlock_t *ptl; - pte_t pte; - - if (WARN_ON_ONCE(flags & FOLL_PIN)) - return NULL; - -retry: - ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud); - if (!pud_huge(*pud)) - goto out; - pte = huge_ptep_get((pte_t *)pud); - if (pte_present(pte)) { - page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); - if (WARN_ON_ONCE(!try_grab_page(page, flags))) { - page = NULL; - goto out; - } - } else { - if (is_hugetlb_entry_migration(pte)) { - spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pud, ptl); - goto retry; - } - /* - * hwpoisoned entry is treated as no_page_table in - * follow_page_mask(). - */ - } -out: - spin_unlock(ptl); - return page; -} - -struct page * __weak -follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags) -{ - if (flags & (FOLL_GET | FOLL_PIN)) - return NULL; - - return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); -} - int isolate_hugetlb(struct page *page, struct list_head *list) { int ret = 0; From 0538a82c39e94d49fa6985c6a0101ca819be11ee Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Oct 2022 15:31:13 -0400 Subject: [PATCH 1040/4122] mm: vmscan: make rotations a secondary factor in balancing anon vs file We noticed a 2% webserver throughput regression after upgrading from 5.6. This could be tracked down to a shift in the anon/file reclaim balance (confirmed with swappiness) that resulted in worse reclaim efficiency and thus more kswapd activity for the same outcome. The change that exposed the problem is aae466b0052e ("mm/swap: implement workingset detection for anonymous LRU"). By qualifying swapins based on their refault distance, it lowered the cost of anon reclaim in this workload, in turn causing (much) more anon scanning than before. Scanning the anon list is more expensive due to the higher ratio of mmapped pages that may rotate during reclaim, and so the result was an increase in %sys time. Right now, rotations aren't considered a cost when balancing scan pressure between LRUs. We can end up with very few file refaults putting all the scan pressure on hot anon pages that are rotated en masse, don't get reclaimed, and never push back on the file LRU again. We still only reclaim file cache in that case, but we burn a lot CPU rotating anon pages. It's "fair" from an LRU age POV, but doesn't reflect the real cost it imposes on the system. Consider rotations as a secondary factor in balancing the LRUs. This doesn't attempt to make a precise comparison between IO cost and CPU cost, it just says: if reloads are about comparable between the lists, or rotations are overwhelmingly different, adjust for CPU work. This fixed the regression on our webservers. It has since been deployed to the entire Meta fleet and hasn't caused any problems. Link: https://lkml.kernel.org/r/20221013193113.726425-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++-- mm/swap.c | 22 +++++++++++++++++----- mm/vmscan.c | 4 +++- mm/workingset.c | 2 +- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c724..369d7799205d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -384,8 +384,9 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -void lru_note_cost_folio(struct folio *); +void lru_note_cost(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated); +void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); void lru_cache_add(struct page *); diff --git a/mm/swap.c b/mm/swap.c index 955930f41d20..2f12a2ee1d3a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -295,8 +295,20 @@ void folio_rotate_reclaimable(struct folio *folio) } } -void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) +void lru_note_cost(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated) { + unsigned long cost; + + /* + * Reflect the relative cost of incurring IO and spending CPU + * time on rotations. This doesn't attempt to make a precise + * comparison, it just says: if reloads are about comparable + * between the LRU lists, or rotations are overwhelmingly + * different between them, adjust scan balance for CPU work. + */ + cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated; + do { unsigned long lrusize; @@ -310,9 +322,9 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) spin_lock_irq(&lruvec->lru_lock); /* Record cost event */ if (file) - lruvec->file_cost += nr_pages; + lruvec->file_cost += cost; else - lruvec->anon_cost += nr_pages; + lruvec->anon_cost += cost; /* * Decay previous events @@ -335,10 +347,10 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) } while ((lruvec = parent_lruvec(lruvec))); } -void lru_note_cost_folio(struct folio *folio) +void lru_note_cost_refault(struct folio *folio) { lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), - folio_nr_pages(folio)); + folio_nr_pages(folio), 0); } static void folio_activate_fn(struct lruvec *lruvec, struct folio *folio) diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..ffe402e095d3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2499,7 +2499,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); spin_unlock_irq(&lruvec->lru_lock); - lru_note_cost(lruvec, file, stat.nr_pageout); + lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed); mem_cgroup_uncharge_list(&folio_list); free_unref_page_list(&folio_list); @@ -2639,6 +2639,8 @@ static void shrink_active_list(unsigned long nr_to_scan, __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&lruvec->lru_lock); + if (nr_rotated) + lru_note_cost(lruvec, file, 0, nr_rotated); mem_cgroup_uncharge_list(&l_active); free_unref_page_list(&l_active); trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, diff --git a/mm/workingset.c b/mm/workingset.c index ae7e984b23c6..d2d02978588c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -493,7 +493,7 @@ void workingset_refault(struct folio *folio, void *shadow) if (workingset) { folio_set_workingset(folio); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - lru_note_cost_folio(folio); + lru_note_cost_refault(folio); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } out: From de2baa880de3d5eecf2e46ebdee0aaeb565f5917 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 Oct 2022 10:39:18 -0400 Subject: [PATCH 1041/4122] selftests/vm: use memfd for uffd hugetlb tests Patch series "selftests/vm: Drop hugetlb mntpoint in run_vmtests.sh", v2. Clean the code up so we can use the same memfd for both hugetlb and shmem which is cleaner. This patch (of 4): We already used memfd for shmem test, move it forward with hugetlb too so that we don't need user to specify the hugetlb file path explicitly when running hugetlb shared tests. Link: https://lkml.kernel.org/r/20221014143921.93887-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20221014143921.93887-2-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Axel Rasmussen Cc: Axel Rasmussen Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/userfaultfd.c | 62 ++++++++---------------- 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 297f250c1d95..7f22844ed704 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -93,10 +93,8 @@ static volatile bool test_uffdio_zeropage_eexist = true; static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; - static bool map_shared; -static int shm_fd; -static int huge_fd; +static int mem_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; @@ -143,7 +141,7 @@ const char *examples = "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" "./userfaultfd hugetlb 256 50\n\n" "# Run the same hugetlb test but using shared file:\n" - "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" + "./userfaultfd hugetlb_shared 256 50\n\n" "# 10MiB-~6GiB 999 bounces anonymous test, " "continue forever unless an error triggers\n" "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; @@ -260,35 +258,21 @@ static void hugetlb_release_pages(char *rel_area) static void hugetlb_allocate_area(void **alloc_area, bool is_src) { + off_t size = nr_pages * page_size; + off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; - if (!map_shared) - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (is_src ? 0 : MAP_NORESERVE), - -1, - 0); - else - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED | - (is_src ? 0 : MAP_NORESERVE), - huge_fd, - is_src ? 0 : nr_pages * page_size); + *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (is_src ? 0 : MAP_NORESERVE), + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { - area_alias = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - huge_fd, - is_src ? 0 : nr_pages * page_size); + area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } @@ -334,14 +318,14 @@ static void shmem_allocate_area(void **alloc_area, bool is_src) } *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); if (test_collapse && *alloc_area != p) err("mmap of memfd failed at %p", p); area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); if (test_collapse && area_alias != p_alias) @@ -1841,21 +1825,17 @@ int main(int argc, char **argv) } nr_pages = nr_pages_per_cpu * nr_cpus; - if (test_type == TEST_HUGETLB && map_shared) { - if (argc < 5) - usage(); - huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) - err("Open of %s failed", argv[4]); - if (ftruncate(huge_fd, 0)) - err("ftruncate %s to size 0 failed", argv[4]); - } else if (test_type == TEST_SHMEM) { - shm_fd = memfd_create(argv[0], 0); - if (shm_fd < 0) + if (test_type == TEST_SHMEM || test_type == TEST_HUGETLB) { + unsigned int memfd_flags = 0; + + if (test_type == TEST_HUGETLB) + memfd_flags = MFD_HUGETLB; + mem_fd = memfd_create(argv[0], memfd_flags); + if (mem_fd < 0) err("memfd_create"); - if (ftruncate(shm_fd, nr_pages * page_size * 2)) + if (ftruncate(mem_fd, nr_pages * page_size * 2)) err("ftruncate"); - if (fallocate(shm_fd, + if (fallocate(mem_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, nr_pages * page_size * 2)) err("fallocate"); From 62f33fa228003a419a9484eed5133447a280387c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 Oct 2022 10:39:19 -0400 Subject: [PATCH 1042/4122] selftests/vm: use memfd for hugetlb-madvise test For dropping the hugetlb mountpoint in run_vmtests.sh. Since no parameter is needed, drop USAGE too. Link: https://lkml.kernel.org/r/20221014143921.93887-3-peterx@redhat.com Signed-off-by: Peter Xu Cc: Axel Rasmussen Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugetlb-madvise.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index 3c9943131881..f96435b70986 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -12,6 +12,7 @@ * directory. */ +#define _GNU_SOURCE #include #include #include @@ -19,7 +20,6 @@ #define __USE_GNU #include -#define USAGE "USAGE: %s \n" #define MIN_FREE_PAGES 20 #define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */ @@ -103,11 +103,6 @@ int main(int argc, char **argv) int fd; int ret; - if (argc != 2) { - printf(USAGE, argv[0]); - exit(1); - } - huge_page_size = default_huge_page_size(); if (!huge_page_size) { printf("Unable to determine huge page size, exiting!\n"); @@ -125,9 +120,9 @@ int main(int argc, char **argv) exit(1); } - fd = open(argv[1], O_CREAT | O_RDWR, 0755); + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } @@ -406,6 +401,5 @@ int main(int argc, char **argv) (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size); close(fd); - unlink(argv[1]); return 0; } From 4705700d4fef3178e93230f53c2c528569744bb6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 Oct 2022 10:40:13 -0400 Subject: [PATCH 1043/4122] selftests/vm: use memfd for hugepage-mremap test For dropping the hugetlb mountpoint in run_vmtests.sh. Cleaned it up a little bit around the changed codes. Link: https://lkml.kernel.org/r/20221014144013.94027-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Axel Rasmussen Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugepage-mremap.c | 21 +++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index e63a0214f639..e53b5eaa8fce 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -22,6 +22,7 @@ #include /* Definition of SYS_* constants */ #include #include +#include #define DEFAULT_LENGTH_MB 10UL #define MB_TO_BYTES(x) (x * 1024 * 1024) @@ -108,26 +109,23 @@ static void register_region_with_uffd(char *addr, size_t len) int main(int argc, char *argv[]) { size_t length = 0; + int ret = 0, fd; - if (argc != 2 && argc != 3) { - printf("Usage: %s [length_in_MB] \n", argv[0]); + if (argc >= 2 && !strcmp(argv[1], "-h")) { + printf("Usage: %s [length_in_MB]\n", argv[0]); exit(1); } /* Read memory length as the first arg if valid, otherwise fallback to * the default length. */ - if (argc == 3) - length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL; + if (argc >= 2) + length = (size_t)atoi(argv[1]); + else + length = DEFAULT_LENGTH_MB; - length = length > 0 ? length : DEFAULT_LENGTH_MB; length = MB_TO_BYTES(length); - - int ret = 0; - - /* last arg is the hugetlb file name */ - int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755); - + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { perror("Open failed"); exit(1); @@ -185,7 +183,6 @@ int main(int argc, char *argv[]) } close(fd); - unlink(argv[argc-1]); return ret; } From 0796c7b8be84415994fa37e9a022e5595d915dd7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 Oct 2022 10:40:15 -0400 Subject: [PATCH 1044/4122] selftests/vm: drop mnt point for hugetlb in run_vmtests.sh After converting all the three relevant testcases (uffd, madvise, mremap) to use memfd, no test will need the hugetlb mount point anymore. Drop the code. Link: https://lkml.kernel.org/r/20221014144015.94039-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Axel Rasmussen Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e780e76c26b8..0dc9f545a32d 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -5,7 +5,6 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -mnt=./huge exitcode=0 #get huge pagesize and freepages from /proc/meminfo @@ -84,9 +83,6 @@ run_test() { fi } -mkdir "$mnt" -mount -t hugetlbfs none "$mnt" - run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) @@ -98,14 +94,9 @@ echo "$shmmax" > /proc/sys/kernel/shmmax echo "$shmall" > /proc/sys/kernel/shmall run_test ./map_hugetlb - -run_test ./hugepage-mremap "$mnt"/huge_mremap -rm -f "$mnt"/huge_mremap - +run_test ./hugepage-mremap run_test ./hugepage-vmemmap - -run_test ./hugetlb-madvise "$mnt"/madvise-test -rm -f "$mnt"/madvise-test +run_test ./hugetlb-madvise echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" @@ -126,14 +117,11 @@ for mod in "${uffd_mods[@]}"; do # Hugetlb tests require source and destination huge pages. Pass in half # the size ($half_ufd_size_MB), which is used for *each*. run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 - run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test - rm -f "$mnt"/uffd-test + run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 run_test ./userfaultfd shmem${mod} 20 16 done #cleanup -umount "$mnt" -rm -rf "$mnt" echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages run_test ./compaction_test From d03c376d9066532551dc56837c7c5490e4fcbbfe Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:03 -0500 Subject: [PATCH 1045/4122] mm/hugetlb: add folio support to hugetlb specific flag macros Patch series "begin converting hugetlb code to folios", v4. This patch series starts the conversion of the hugetlb code to operate on struct folios rather than struct pages. This removes the ambiguitiy of whether functions are operating on head pages, tail pages of compound pages, or base pages. This series passes the linux test project hugetlb test cases. Patch 1 adds hugeltb specific page macros that can operate on folios. Patch 2 adds the private field of the first tail page to struct page. For 32-bit, _private_1 alinging with page[1].private was confirmed by using pahole. Patch 3 introduces hugetlb subpool helper functions which operate on struct folios. These patches were tested using the hugepage-mmap.c selftest along with the migratepages command. Patch 4 converts hugetlb_delete_from_page_cache() to use folios. Patch 5 adds a folio_hstate() function to get hstate information from a folio and adds a user of folio_hstate(). Bpftrace was used to track time spent in the free_huge_pages function during the ltp test cases as it is a caller of the hugetlb subpool functions. From the histogram, the performance is similar before and after the patch series. Time spent in 'free_huge_page' 6.0.0-rc2.master.20220823 @nsecs: [256, 512) 14770 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |@@@@@@@@@@@@@@@@@@@@@@@@@ | [512, 1K) 155 | | [1K, 2K) 169 | | [2K, 4K) 50 | | [4K, 8K) 14 | | [8K, 16K) 3 | | [16K, 32K) 3 | | 6.0.0-rc2.master.20220823 + patch series @nsecs: [256, 512) 13678 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |@@@@@@@@@@@@@@@@@@@@@@@@@ | [512, 1K) 142 | | [1K, 2K) 199 | | [2K, 4K) 44 | | [4K, 8K) 13 | | [8K, 16K) 4 | | [16K, 32K) 1 | | This patch (of 5): Allow the macros which test, set, and clear hugetlb specific page flags to take a hugetlb folio as an input. The macrros are generated as folio_{test, set, clear}_hugetlb_{restore_reserve, migratable, temporary, freed, vmemmap_optimized, raw_hwp_unreliable}. Link: https://lkml.kernel.org/r/20220922154207.1575343-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20220922154207.1575343-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 4a76c0fc6bbf..3ff5d2dd3ca3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -589,26 +589,50 @@ enum hugetlb_page_flags { */ #ifdef CONFIG_HUGETLB_PAGE #define TESTHPAGEFLAG(uname, flname) \ +static __always_inline \ +bool folio_test_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + return test_bit(HPG_##flname, private); \ + } \ static inline int HPage##uname(struct page *page) \ { return test_bit(HPG_##flname, &(page->private)); } #define SETHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_set_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + set_bit(HPG_##flname, private); \ + } \ static inline void SetHPage##uname(struct page *page) \ { set_bit(HPG_##flname, &(page->private)); } #define CLEARHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_clear_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + clear_bit(HPG_##flname, private); \ + } \ static inline void ClearHPage##uname(struct page *page) \ { clear_bit(HPG_##flname, &(page->private)); } #else #define TESTHPAGEFLAG(uname, flname) \ +static inline bool \ +folio_test_hugetlb_##flname(struct folio *folio) \ + { return 0; } \ static inline int HPage##uname(struct page *page) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ +static inline void \ +folio_set_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void SetHPage##uname(struct page *page) \ { } #define CLEARHPAGEFLAG(uname, flname) \ +static inline void \ +folio_clear_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void ClearHPage##uname(struct page *page) \ { } #endif From d340625f4849ab5dbfebbc7d84709fbfcd39e52f Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:04 -0500 Subject: [PATCH 1046/4122] mm: add private field of first tail to struct page and struct folio Allow struct folio to store hugetlb metadata that is contained in the private field of the first tail page. On 32-bit, _private_1 aligns with page[1].private. Link: https://lkml.kernel.org/r/20220922154207.1575343-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 500e536796ca..2d5b1575ffe0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -144,6 +144,7 @@ struct page { atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ + unsigned long _private_1; #endif }; struct { /* Second tail page of compound page */ @@ -264,6 +265,7 @@ struct page { * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_private_1: Do not use directly, call folio_get_private_1(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -311,6 +313,7 @@ struct folio { #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif + unsigned long _private_1; }; #define FOLIO_MATCH(pg, fl) \ @@ -338,6 +341,7 @@ FOLIO_MATCH(compound_mapcount, _total_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); +FOLIO_MATCH(_private_1, _private_1); #endif #undef FOLIO_MATCH @@ -383,6 +387,16 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } +static inline void folio_set_private_1(struct folio *folio, unsigned long private) +{ + folio->_private_1 = private; +} + +static inline unsigned long folio_get_private_1(struct folio *folio) +{ + return folio->_private_1; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) From 149562f7509404c382c32c3fa8a6ba356135e5cf Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:05 -0500 Subject: [PATCH 1047/4122] mm/hugetlb: add hugetlb_folio_subpool() helpers Allow hugetlbfs_migrate_folio to check and read subpool information by passing in a folio. Link: https://lkml.kernel.org/r/20220922154207.1575343-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 8 ++++---- include/linux/hugetlb.h | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..c5137607e523 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1091,10 +1091,10 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (hugetlb_page_subpool(&src->page)) { - hugetlb_set_page_subpool(&dst->page, - hugetlb_page_subpool(&src->page)); - hugetlb_set_page_subpool(&src->page, NULL); + if (hugetlb_folio_subpool(src)) { + hugetlb_set_folio_subpool(dst, + hugetlb_folio_subpool(src)); + hugetlb_set_folio_subpool(src, NULL); } if (mode != MIGRATE_SYNC_NO_COPY) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3ff5d2dd3ca3..496d02bdb997 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -718,18 +718,29 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +{ + return (void *)folio_get_private_1(folio); +} + /* * hugetlb page subpool pointer located in hpage[1].private */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { - return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL); + return hugetlb_folio_subpool(page_folio(hpage)); +} + +static inline void hugetlb_set_folio_subpool(struct folio *folio, + struct hugepage_subpool *subpool) +{ + folio_set_private_1(folio, (unsigned long)subpool); } static inline void hugetlb_set_page_subpool(struct page *hpage, struct hugepage_subpool *subpool) { - set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool); + hugetlb_set_folio_subpool(page_folio(hpage), subpool); } static inline struct hstate *hstate_file(struct file *f) From ece62684dcfb714b7d8452056b4a33d426b16457 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:06 -0500 Subject: [PATCH 1048/4122] hugetlbfs: convert hugetlb_delete_from_page_cache() to use folios Remove the last caller of delete_from_page_cache() by converting the code to its folio equivalent. Link: https://lkml.kernel.org/r/20220922154207.1575343-5-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 12 ++++++------ include/linux/pagemap.h | 1 - mm/folio-compat.c | 5 ----- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c5137607e523..00495fc128c5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -364,11 +364,11 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, return -EINVAL; } -static void hugetlb_delete_from_page_cache(struct page *page) +static void hugetlb_delete_from_page_cache(struct folio *folio) { - ClearPageDirty(page); - ClearPageUptodate(page); - delete_from_page_cache(page); + folio_clear_dirty(folio); + folio_clear_uptodate(folio); + filemap_remove_folio(folio); } /* @@ -574,8 +574,8 @@ static bool remove_inode_single_folio(struct hstate *h, struct inode *inode, * map could fail. Correspondingly, the subpool and global * reserve usage count can need to be adjusted. */ - VM_BUG_ON(HPageRestoreReserve(&folio->page)); - hugetlb_delete_from_page_cache(&folio->page); + VM_BUG_ON_FOLIO(folio_test_hugetlb_restore_reserve(folio), folio); + hugetlb_delete_from_page_cache(folio); ret = true; if (!truncate_op) { if (unlikely(hugetlb_unreserve_pages(inode, index, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bbccb4044222..060ee98474ef 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1102,7 +1102,6 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); void filemap_remove_folio(struct folio *folio); -void delete_from_page_cache(struct page *page); void __filemap_remove_folio(struct folio *folio, void *shadow); void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, diff --git a/mm/folio-compat.c b/mm/folio-compat.c index e1e23b4947d7..8ae39c06da62 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -124,11 +124,6 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, } EXPORT_SYMBOL(grab_cache_page_write_begin); -void delete_from_page_cache(struct page *page) -{ - return filemap_remove_folio(page_folio(page)); -} - int try_to_release_page(struct page *page, gfp_t gfp) { return filemap_release_folio(page_folio(page), gfp); From e51da3a9b6c2f67879880259a25c51dbda01c462 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:07 -0500 Subject: [PATCH 1049/4122] mm/hugetlb: add folio_hstate() Helper function to retrieve hstate information from a hugetlb folio. Link: https://lkml.kernel.org/r/20220922154207.1575343-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reported-by: kernel test robot Reviewed-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 14 ++++++++++++-- mm/migrate.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 496d02bdb997..20a0d5a08395 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -824,10 +824,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +static inline struct hstate *folio_hstate(struct folio *folio) +{ + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + return size_to_hstate(folio_size(folio)); +} + static inline struct hstate *page_hstate(struct page *page) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - return size_to_hstate(page_size(page)); + return folio_hstate(page_folio(page)); } static inline unsigned hstate_index_to_shift(unsigned index) @@ -1036,6 +1041,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return NULL; } +static inline struct hstate *folio_hstate(struct folio *folio) +{ + return NULL; +} + static inline struct hstate *page_hstate(struct page *page) { return NULL; diff --git a/mm/migrate.c b/mm/migrate.c index dff333593a8a..556cb1c86e53 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1620,7 +1620,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) nid = folio_nid(folio); if (folio_test_hugetlb(folio)) { - struct hstate *h = page_hstate(&folio->page); + struct hstate *h = folio_hstate(folio); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); From d8e454eb44473b2270e2675fb44a9d79dee36097 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Fri, 14 Oct 2022 09:39:31 +0800 Subject: [PATCH 1050/4122] mm/rmap: fix comment in anon_vma_clone() Commit 2555283eb40d ("mm/rmap: Fix anon_vma->degree ambiguity leading to double-reuse") use num_children and num_active_vmas to replace the origin degree to fix anon_vma UAF problem. Update the comment in anon_vma_clone to fit this change. Link: https://lkml.kernel.org/r/20221014013931.1565969-1-mawupeng1@huawei.com Signed-off-by: Ma Wupeng Signed-off-by: Andrew Morton --- mm/rmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 2ec925e5fa6a..92ed6fe3d038 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -315,8 +315,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) enomem_failure: /* - * dst->anon_vma is dropped here otherwise its degree can be incorrectly - * decremented in unlink_anon_vmas(). + * dst->anon_vma is dropped here otherwise its num_active_vmas can + * be incorrectly decremented in unlink_anon_vmas(). * We can safely do this because callers of anon_vma_clone() don't care * about dst->anon_vma if anon_vma_clone() failed. */ From 3392ca121872dd8c33015c7703d4981c78819be3 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 17 Oct 2022 09:17:59 -0700 Subject: [PATCH 1051/4122] filemap: find_lock_entries() now updates start offset Patch series "Rework find_get_entries() and find_lock_entries()", v3. Originally the callers of find_get_entries() and find_lock_entries() were keeping track of the start index themselves as they traverse the search range. This resulted in hacky code such as in shmem_undo_range(): index = folio->index + folio_nr_pages(folio) - 1; where the - 1 is only present to stay in the right spot after incrementing index later. This sort of calculation was also being done on every folio despite not even using index later within that function. These patches change find_get_entries() and find_lock_entries() to calculate the new index instead of leaving it to the callers so we can avoid all these complications. This patch (of 2): Initially, find_lock_entries() was being passed in the start offset as a value. That left the calculation of the offset to the callers. This led to complexity in the callers trying to keep track of the index. Now find_lock_entries() takes in a pointer to the start offset and updates the value to be directly after the last entry found. If no entry is found, the offset is not changed. This gets rid of multiple hacky calculations that kept track of the start offset. Link: https://lkml.kernel.org/r/20221017161800.2003-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20221017161800.2003-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/filemap.c | 15 ++++++++++++--- mm/internal.h | 2 +- mm/shmem.c | 8 ++------ mm/truncate.c | 11 +++-------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 08341616ae7a..3a73b7b8c2a4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2085,16 +2085,16 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, * * Return: The number of entries which were found. */ -unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, +unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) { - XA_STATE(xas, &mapping->i_pages, start); + XA_STATE(xas, &mapping->i_pages, *start); struct folio *folio; rcu_read_lock(); while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { if (!xa_is_value(folio)) { - if (folio->index < start) + if (folio->index < *start) goto put; if (folio->index + folio_nr_pages(folio) - 1 > end) goto put; @@ -2117,6 +2117,15 @@ put: } rcu_read_unlock(); + if (folio_batch_count(fbatch)) { + unsigned long nr = 1; + int idx = folio_batch_count(fbatch) - 1; + + folio = fbatch->folios[idx]; + if (!xa_is_value(folio) && !folio_test_hugetlb(folio)) + nr = folio_nr_pages(folio); + *start = indices[idx] + nr; + } return folio_batch_count(fbatch); } diff --git a/mm/internal.h b/mm/internal.h index 6b7ef495b56d..c504ac7267e0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -106,7 +106,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping, force_page_cache_ra(&ractl, nr_to_read); } -unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, +unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); diff --git a/mm/shmem.c b/mm/shmem.c index c1d8b8a1aa3b..6b560c3915af 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -922,21 +922,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, folio_batch_init(&fbatch); index = start; - while (index < end && find_lock_entries(mapping, index, end - 1, + while (index < end && find_lock_entries(mapping, &index, end - 1, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { folio = fbatch.folios[i]; - index = indices[i]; - if (xa_is_value(folio)) { if (unfalloc) continue; nr_swaps_freed += !shmem_free_swap(mapping, - index, folio); + indices[i], folio); continue; } - index += folio_nr_pages(folio) - 1; if (!unfalloc || !folio_test_uptodate(folio)) truncate_inode_folio(mapping, folio); @@ -945,7 +942,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); - index++; } same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); diff --git a/mm/truncate.c b/mm/truncate.c index c0be77e5c008..b6065a494c71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -361,9 +361,8 @@ void truncate_inode_pages_range(struct address_space *mapping, folio_batch_init(&fbatch); index = start; - while (index < end && find_lock_entries(mapping, index, end - 1, + while (index < end && find_lock_entries(mapping, &index, end - 1, &fbatch, indices)) { - index = indices[folio_batch_count(&fbatch) - 1] + 1; truncate_folio_batch_exceptionals(mapping, &fbatch, indices); for (i = 0; i < folio_batch_count(&fbatch); i++) truncate_cleanup_folio(fbatch.folios[i]); @@ -510,20 +509,17 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping, int i; folio_batch_init(&fbatch); - while (find_lock_entries(mapping, index, end, &fbatch, indices)) { + while (find_lock_entries(mapping, &index, end, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ - index = indices[i]; if (xa_is_value(folio)) { count += invalidate_exceptional_entry(mapping, - index, - folio); + indices[i], folio); continue; } - index += folio_nr_pages(folio) - 1; ret = mapping_evict_folio(mapping, folio); folio_unlock(folio); @@ -542,7 +538,6 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping, folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); - index++; } return count; } From 9fb6beea79c6e7c959adf4fb7b94cf9a6028b941 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 17 Oct 2022 09:18:00 -0700 Subject: [PATCH 1052/4122] filemap: find_get_entries() now updates start offset Initially, find_get_entries() was being passed in the start offset as a value. That left the calculation of the offset to the callers. This led to complexity in the callers trying to keep track of the index. Now find_get_entries() takes in a pointer to the start offset and updates the value to be directly after the last entry found. If no entry is found, the offset is not changed. This gets rid of multiple hacky calculations that kept track of the start offset. Link: https://lkml.kernel.org/r/20221017161800.2003-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/filemap.c | 13 +++++++++++-- mm/internal.h | 2 +- mm/shmem.c | 11 ++++------- mm/truncate.c | 19 +++++++------------ 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 3a73b7b8c2a4..65eee6ec1066 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2048,10 +2048,10 @@ reset: * * Return: The number of entries which were found. */ -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, +unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) { - XA_STATE(xas, &mapping->i_pages, start); + XA_STATE(xas, &mapping->i_pages, *start); struct folio *folio; rcu_read_lock(); @@ -2062,6 +2062,15 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, } rcu_read_unlock(); + if (folio_batch_count(fbatch)) { + unsigned long nr = 1; + int idx = folio_batch_count(fbatch) - 1; + + folio = fbatch->folios[idx]; + if (!xa_is_value(folio) && !folio_test_hugetlb(folio)) + nr = folio_nr_pages(folio); + *start = indices[idx] + nr; + } return folio_batch_count(fbatch); } diff --git a/mm/internal.h b/mm/internal.h index c504ac7267e0..68afdbe7106e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -108,7 +108,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping, unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, +unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); diff --git a/mm/shmem.c b/mm/shmem.c index 6b560c3915af..9c897cf3fb99 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -973,7 +973,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, while (index < end) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &fbatch, + if (!find_get_entries(mapping, &index, end - 1, &fbatch, indices)) { /* If all gone or hole-punch or unfalloc, we're done */ if (index == start || end != -1) @@ -985,13 +985,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, for (i = 0; i < folio_batch_count(&fbatch); i++) { folio = fbatch.folios[i]; - index = indices[i]; if (xa_is_value(folio)) { if (unfalloc) continue; - if (shmem_free_swap(mapping, index, folio)) { + if (shmem_free_swap(mapping, indices[i], folio)) { /* Swap was replaced by page: retry */ - index--; + index = indices[i]; break; } nr_swaps_freed++; @@ -1004,19 +1003,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (folio_mapping(folio) != mapping) { /* Page was replaced by swap: retry */ folio_unlock(folio); - index--; + index = indices[i]; break; } VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); truncate_inode_folio(mapping, folio); } - index = folio->index + folio_nr_pages(folio) - 1; folio_unlock(folio); } folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); - index++; } spin_lock_irq(&info->lock); diff --git a/mm/truncate.c b/mm/truncate.c index b6065a494c71..c7bfd247a651 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -400,7 +400,7 @@ void truncate_inode_pages_range(struct address_space *mapping, index = start; while (index < end) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &fbatch, + if (!find_get_entries(mapping, &index, end - 1, &fbatch, indices)) { /* If all gone from start onwards, we're done */ if (index == start) @@ -414,21 +414,18 @@ void truncate_inode_pages_range(struct address_space *mapping, struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing page->index */ - index = indices[i]; if (xa_is_value(folio)) continue; folio_lock(folio); - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); truncate_inode_folio(mapping, folio); folio_unlock(folio); - index = folio_index(folio) + folio_nr_pages(folio) - 1; } truncate_folio_batch_exceptionals(mapping, &fbatch, indices); folio_batch_release(&fbatch); - index++; } } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -636,16 +633,15 @@ int invalidate_inode_pages2_range(struct address_space *mapping, folio_batch_init(&fbatch); index = start; - while (find_get_entries(mapping, index, end, &fbatch, indices)) { + while (find_get_entries(mapping, &index, end, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ - index = indices[i]; if (xa_is_value(folio)) { if (!invalidate_exceptional_entry2(mapping, - index, folio)) + indices[i], folio)) ret = -EBUSY; continue; } @@ -655,13 +651,13 @@ int invalidate_inode_pages2_range(struct address_space *mapping, * If folio is mapped, before taking its lock, * zap the rest of the file in one hit. */ - unmap_mapping_pages(mapping, index, - (1 + end - index), false); + unmap_mapping_pages(mapping, indices[i], + (1 + end - indices[i]), false); did_range_unmap = 1; } folio_lock(folio); - VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); if (folio->mapping != mapping) { folio_unlock(folio); continue; @@ -684,7 +680,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); - index++; } /* * For DAX we invalidate page tables after invalidating page cache. We From 70ec04f3486103819807b061b50a99f6e1d2bf36 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 18 Oct 2022 16:51:54 +0200 Subject: [PATCH 1053/4122] zram: use try_cmpxchg in update_used_max Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in update_used_max. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, reorder code a bit to remove additional compare and conditional jump from the assembly code. Together, hese two changes save 15 bytes from the function when compiled for x86_64. No functional change intended. Link: https://lkml.kernel.org/r/20221018145154.3699-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Reviewed-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 966aab902d19..87711ddf4b54 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -188,16 +188,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec) static inline void update_used_max(struct zram *zram, const unsigned long pages) { - unsigned long old_max, cur_max; - - old_max = atomic_long_read(&zram->stats.max_used_pages); + unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); do { - cur_max = old_max; - if (pages > cur_max) - old_max = atomic_long_cmpxchg( - &zram->stats.max_used_pages, cur_max, pages); - } while (old_max != cur_max); + if (cur_max >= pages) + return; + } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, + &cur_max, pages)); } static inline void zram_fill_page(void *ptr, unsigned long len, From 3e0ee843427a573e3e1187a5331e4b7fb00a76f3 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Fri, 7 Oct 2022 13:37:41 +0200 Subject: [PATCH 1054/4122] mm: fix typo in struct vm_operations_struct comments There is no eprotect(), so I assume this is about mprotect(). Link: https://lkml.kernel.org/r/2385684.8vm7BOzihM@mobilepool36.emlix.com Signed-off-by: Rolf Eike Beer Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..f6d2d2d9e284 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -549,7 +549,7 @@ struct vm_operations_struct { /* * Called by mprotect() to make driver-specific permission * checks before mprotect() is finalised. The VMA must not - * be modified. Returns 0 if eprotect() can proceed. + * be modified. Returns 0 if mprotect() can proceed. */ int (*mprotect)(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long newflags); From 6fe7d712d798e9312e3dff69ec3f5f62f4d03a04 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 7 Oct 2022 10:50:27 +0200 Subject: [PATCH 1055/4122] mm/shmem: remove unneeded assignments in shmem_get_folio_gfp() After the rework of shmem_get_folio_gfp() to use a folio, the local variable hindex is only needed to be set once before passing it to shmem_add_to_page_cache(). Remove the unneeded initialization and assignments of the variable hindex before the actual effective assignment and first use. No functional change. No change in object code. Link: https://lkml.kernel.org/r/20221007085027.6309-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/shmem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 9c897cf3fb99..57d878b6391d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1826,7 +1826,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, struct shmem_sb_info *sbinfo; struct mm_struct *charge_mm; struct folio *folio; - pgoff_t hindex = index; + pgoff_t hindex; gfp_t huge_gfp; int error; int once = 0; @@ -1864,7 +1864,6 @@ repeat: } if (folio) { - hindex = folio->index; if (sgp == SGP_WRITE) folio_mark_accessed(folio); if (folio_test_uptodate(folio)) From 97955f6941f0e7dea64dea22711382daf1db2f76 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Thu, 6 Oct 2022 23:03:45 -0400 Subject: [PATCH 1056/4122] mm/mincore.c: use vma_lookup() instead of find_vma() Using vma_lookup() verifies the start address is contained in the found vma. This results in easier to read the code. Link: https://lkml.kernel.org/r/20221007030345.5029-1-wangdeming@inspur.com Signed-off-by: Deming Wang Signed-off-by: Andrew Morton --- mm/mincore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mincore.c b/mm/mincore.c index fa200c14185f..e7e046fe17d7 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -190,8 +190,8 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v unsigned long end; int err; - vma = find_vma(current->mm, addr); - if (!vma || addr < vma->vm_start) + vma = vma_lookup(current->mm, addr); + if (!vma) return -ENOMEM; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); if (!can_do_mincore(vma)) { From 7848ed6284ec4791eba22026e28edb2062790a3d Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Fri, 30 Sep 2022 19:14:33 +0530 Subject: [PATCH 1057/4122] mm: memcontrol: use mem_cgroup_is_root() helper Replace the checks for memcg is root memcg, with mem_cgroup_is_root() helper. Link: https://lkml.kernel.org/r/20220930134433.338103-1-kamalesh.babulal@oracle.com Signed-off-by: Kamalesh Babulal Reviewed-by: Muchun Song Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Kamalesh Babulal Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tom Hromatka Signed-off-by: Andrew Morton --- mm/memcontrol.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d8549ae1b30..f264a856ba86 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1219,7 +1219,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) * cgroup root (root_mem_cgroup). So we have to handle * dead_memcg from cgroup root separately. */ - if (last != root_mem_cgroup) + if (!mem_cgroup_is_root(last)) __invalidate_reclaim_iterators(root_mem_cgroup, dead_memcg); } @@ -1243,7 +1243,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, struct mem_cgroup *iter; int ret = 0; - BUG_ON(memcg == root_mem_cgroup); + BUG_ON(mem_cgroup_is_root(memcg)); for_each_mem_cgroup_tree(iter, memcg) { struct css_task_iter it; @@ -1272,7 +1272,7 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) memcg = folio_memcg(folio); if (!memcg) - VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio); + VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio); else VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio); } @@ -2036,7 +2036,7 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, rcu_read_lock(); memcg = mem_cgroup_from_task(victim); - if (memcg == root_mem_cgroup) + if (mem_cgroup_is_root(memcg)) goto out; /* @@ -2995,7 +2995,7 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) { struct obj_cgroup *objcg = NULL; - for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) { + for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) { objcg = rcu_dereference(memcg->objcg); if (objcg && obj_cgroup_tryget(objcg)) break; @@ -7163,7 +7163,7 @@ void mem_cgroup_sk_alloc(struct sock *sk) rcu_read_lock(); memcg = mem_cgroup_from_task(current); - if (memcg == root_mem_cgroup) + if (mem_cgroup_is_root(memcg)) goto out; if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcpmem_active) goto out; @@ -7298,7 +7298,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) * The root cgroup cannot be destroyed, so it's refcount must * always be >= 1. */ - if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { + if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { VM_BUG_ON(1); break; } @@ -7462,7 +7462,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) if (mem_cgroup_disabled() || do_memsw_account()) return nr_swap_pages; - for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) + for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) nr_swap_pages = min_t(long, nr_swap_pages, READ_ONCE(memcg->swap.max) - page_counter_read(&memcg->swap)); @@ -7484,7 +7484,7 @@ bool mem_cgroup_swap_full(struct folio *folio) if (!memcg) return false; - for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) { + for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) { unsigned long usage = page_counter_read(&memcg->swap); if (usage * 2 >= READ_ONCE(memcg->swap.high) || @@ -7648,7 +7648,7 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg) return true; original_memcg = get_mem_cgroup_from_objcg(objcg); - for (memcg = original_memcg; memcg != root_mem_cgroup; + for (memcg = original_memcg; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) { unsigned long max = READ_ONCE(memcg->zswap_max); unsigned long pages; From a5454f95246aa1d3527ef5e128cd3a10bc8371de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 28 Sep 2022 12:45:35 +0200 Subject: [PATCH 1058/4122] tmpfs: ensure O_LARGEFILE with generic_file_open() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this check open() will open large files on tmpfs although O_LARGEFILE was not specified. This is inconsistent with other filesystems. Also it will later result in EOVERFLOW on stat() or EFBIG on write(). Link: https://lore.kernel.org/lkml/76bedae6-22ea-4abc-8c06-b424ceb39217@t-8ch.de/ Link: https://lkml.kernel.org/r/20220928104535.61186-1-linux@weissschuh.net Signed-off-by: Thomas Weißschuh Acked-by: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/shmem.c b/mm/shmem.c index 57d878b6391d..0a7c4a748811 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3902,6 +3902,7 @@ EXPORT_SYMBOL(shmem_aops); static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, + .open = generic_file_open, .get_unmapped_area = shmem_get_unmapped_area, #ifdef CONFIG_TMPFS .llseek = shmem_file_llseek, From 7ce0ea19d50e4e97a8da69f616ffa8afbb532a93 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 27 Sep 2022 19:09:09 +0200 Subject: [PATCH 1059/4122] kasan: switch kunit tests to console tracepoints Switch KUnit-compatible KASAN tests from using per-task KUnit resources to console tracepoints. This allows for two things: 1. Migrating tests that trigger a KASAN report in the context of a task other than current to KUnit framework. This is implemented in the patches that follow. 2. Parsing and matching the contents of KASAN reports. This is not yet implemented. Link: https://lkml.kernel.org/r/9345acdd11e953b207b0ed4724ff780e63afeb36.1664298455.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- lib/Kconfig.kasan | 2 +- mm/kasan/kasan.h | 8 ---- mm/kasan/kasan_test.c | 85 +++++++++++++++++++++++++++++++------------ mm/kasan/report.c | 31 ---------------- 4 files changed, 63 insertions(+), 63 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index ca09b1cf8ee9..ba5b27962c34 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -181,7 +181,7 @@ config KASAN_VMALLOC config KASAN_KUNIT_TEST tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS - depends on KASAN && KUNIT + depends on KASAN && KUNIT && TRACEPOINTS default KUNIT_ALL_TESTS help A KUnit-based KASAN test suite. Triggers different kinds of diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index abbcc1b0eec5..a84491bc4867 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -261,14 +261,6 @@ struct kasan_stack_ring { #endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ -#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -/* Used in KUnit-compatible KASAN tests. */ -struct kunit_kasan_status { - bool report_found; - bool sync_fault; -}; -#endif - #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) static inline const void *kasan_shadow_to_mem(const void *shadow_addr) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0d59098f0876..0ff20bfa3376 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -5,8 +5,12 @@ * Author: Andrey Ryabinin */ +#define pr_fmt(fmt) "kasan_test: " fmt + +#include #include #include +#include #include #include #include @@ -14,21 +18,28 @@ #include #include #include +#include #include #include +#include #include -#include #include -#include +#include #include -#include - #include "kasan.h" #define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE) +static bool multishot; + +/* Fields set based on lines observed in the console. */ +static struct { + bool report_found; + bool async_fault; +} test_status; + /* * Some tests use these global variables to store return values from function * calls that could otherwise be eliminated by the compiler as dead code. @@ -36,35 +47,61 @@ void *kasan_ptr_result; int kasan_int_result; -static struct kunit_resource resource; -static struct kunit_kasan_status test_status; -static bool multishot; +/* Probe for console output: obtains test_status lines of interest. */ +static void probe_console(void *ignore, const char *buf, size_t len) +{ + if (strnstr(buf, "BUG: KASAN: ", len)) + WRITE_ONCE(test_status.report_found, true); + else if (strnstr(buf, "Asynchronous fault: ", len)) + WRITE_ONCE(test_status.async_fault, true); +} -/* - * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the - * first detected bug and panic the kernel if panic_on_warn is enabled. For - * hardware tag-based KASAN also allow tag checking to be reenabled for each - * test, see the comment for KUNIT_EXPECT_KASAN_FAIL(). - */ -static int kasan_test_init(struct kunit *test) +static void register_tracepoints(struct tracepoint *tp, void *ignore) +{ + check_trace_callback_type_console(probe_console); + if (!strcmp(tp->name, "console")) + WARN_ON(tracepoint_probe_register(tp, probe_console, NULL)); +} + +static void unregister_tracepoints(struct tracepoint *tp, void *ignore) +{ + if (!strcmp(tp->name, "console")) + tracepoint_probe_unregister(tp, probe_console, NULL); +} + +static int kasan_suite_init(struct kunit_suite *suite) { if (!kasan_enabled()) { - kunit_err(test, "can't run KASAN tests with KASAN disabled"); + pr_err("Can't run KASAN tests with KASAN disabled"); return -1; } + /* + * Temporarily enable multi-shot mode. Otherwise, KASAN would only + * report the first detected bug and panic the kernel if panic_on_warn + * is enabled. + */ multishot = kasan_save_enable_multi_shot(); - test_status.report_found = false; - test_status.sync_fault = false; - kunit_add_named_resource(test, NULL, NULL, &resource, - "kasan_status", &test_status); + + /* + * Because we want to be able to build the test as a module, we need to + * iterate through all known tracepoints, since the static registration + * won't work here. + */ + for_each_kernel_tracepoint(register_tracepoints, NULL); return 0; } +static void kasan_suite_exit(struct kunit_suite *suite) +{ + kasan_restore_multi_shot(multishot); + for_each_kernel_tracepoint(unregister_tracepoints, NULL); + tracepoint_synchronize_unregister(); +} + static void kasan_test_exit(struct kunit *test) { - kasan_restore_multi_shot(multishot); - KUNIT_EXPECT_FALSE(test, test_status.report_found); + KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); } /** @@ -106,11 +143,12 @@ static void kasan_test_exit(struct kunit *test) if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ kasan_sync_fault_possible()) { \ if (READ_ONCE(test_status.report_found) && \ - READ_ONCE(test_status.sync_fault)) \ + !READ_ONCE(test_status.async_fault)) \ kasan_enable_tagging(); \ migrate_enable(); \ } \ WRITE_ONCE(test_status.report_found, false); \ + WRITE_ONCE(test_status.async_fault, false); \ } while (0) #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \ @@ -1447,9 +1485,10 @@ static struct kunit_case kasan_kunit_test_cases[] = { static struct kunit_suite kasan_kunit_test_suite = { .name = "kasan", - .init = kasan_test_init, .test_cases = kasan_kunit_test_cases, .exit = kasan_test_exit, + .suite_init = kasan_suite_init, + .suite_exit = kasan_suite_exit, }; kunit_test_suite(kasan_kunit_test_suite); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index df3602062bfd..31355851a5ec 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -30,8 +30,6 @@ #include -#include - #include "kasan.h" #include "../slab.h" @@ -114,41 +112,12 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); #endif -#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -static void update_kunit_status(bool sync) -{ - struct kunit *test; - struct kunit_resource *resource; - struct kunit_kasan_status *status; - - test = current->kunit_test; - if (!test) - return; - - resource = kunit_find_named_resource(test, "kasan_status"); - if (!resource) { - kunit_set_failure(test); - return; - } - - status = (struct kunit_kasan_status *)resource->data; - WRITE_ONCE(status->report_found, true); - WRITE_ONCE(status->sync_fault, sync); - - kunit_put_resource(resource); -} -#else -static void update_kunit_status(bool sync) { } -#endif - static DEFINE_SPINLOCK(report_lock); static void start_report(unsigned long *flags, bool sync) { /* Respect the /proc/sys/kernel/traceoff_on_warning interface. */ disable_trace_on_warning(); - /* Update status of the currently running KASAN test. */ - update_kunit_status(sync); /* Do not allow LOCKDEP mangling KASAN reports. */ lockdep_off(); /* Make sure we don't end up in loop. */ From 8516e837cab0b2c740b90603b66039aa7dcecda4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 27 Sep 2022 19:09:10 +0200 Subject: [PATCH 1060/4122] kasan: migrate kasan_rcu_uaf test to kunit Migrate the kasan_rcu_uaf test to the KUnit framework. Changes to the implementation of the test: - Call rcu_barrier() after call_rcu() to make that the RCU callbacks get triggered before the test is over. - Cast pointer passed to rcu_dereference_protected as __rcu to get rid of the Sparse warning. - Check that KASAN prints a report via KUNIT_EXPECT_KASAN_FAIL. Initially, this test was intended to check that Generic KASAN prints auxiliary stack traces for RCU objects. Nevertheless, the test is enabled for all modes to make that KASAN reports bad accesses in RCU callbacks. The presence of auxiliary stack traces for the Generic mode needs to be inspected manually. Link: https://lkml.kernel.org/r/897ee08d6cd0ba7e8a4fbfd9d8502823a2f922e6.1664298455.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kasan/kasan_test.c | 37 ++++++++++++++++++++++++++++++++++++ mm/kasan/kasan_test_module.c | 30 ----------------------------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0ff20bfa3376..38bf6ed61cb8 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1141,6 +1141,42 @@ static void kmalloc_double_kzfree(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr)); } +static struct kasan_rcu_info { + int i; + struct rcu_head rcu; +} *global_rcu_ptr; + +static void rcu_uaf_reclaim(struct rcu_head *rp) +{ + struct kasan_rcu_info *fp = + container_of(rp, struct kasan_rcu_info, rcu); + + kfree(fp); + ((volatile struct kasan_rcu_info *)fp)->i; +} + +/* + * Check that Generic KASAN prints auxiliary stack traces for RCU callbacks. + * The report needs to be inspected manually. + * + * This test is still enabled for other KASAN modes to make sure that all modes + * report bad accesses in tested scenarios. + */ +static void rcu_uaf(struct kunit *test) +{ + struct kasan_rcu_info *ptr; + + ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + global_rcu_ptr = rcu_dereference_protected( + (struct kasan_rcu_info __rcu *)ptr, NULL); + + KUNIT_EXPECT_KASAN_FAIL(test, + call_rcu(&global_rcu_ptr->rcu, rcu_uaf_reclaim); + rcu_barrier()); +} + static void vmalloc_helpers_tags(struct kunit *test) { void *ptr; @@ -1472,6 +1508,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kasan_bitops_generic), KUNIT_CASE(kasan_bitops_tags), KUNIT_CASE(kmalloc_double_kzfree), + KUNIT_CASE(rcu_uaf), KUNIT_CASE(vmalloc_helpers_tags), KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmap_tags), diff --git a/mm/kasan/kasan_test_module.c b/mm/kasan/kasan_test_module.c index e4ca82dc2c16..4688cbcd722d 100644 --- a/mm/kasan/kasan_test_module.c +++ b/mm/kasan/kasan_test_module.c @@ -62,35 +62,6 @@ static noinline void __init copy_user_test(void) kfree(kmem); } -static struct kasan_rcu_info { - int i; - struct rcu_head rcu; -} *global_rcu_ptr; - -static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp) -{ - struct kasan_rcu_info *fp = container_of(rp, - struct kasan_rcu_info, rcu); - - kfree(fp); - ((volatile struct kasan_rcu_info *)fp)->i; -} - -static noinline void __init kasan_rcu_uaf(void) -{ - struct kasan_rcu_info *ptr; - - pr_info("use-after-free in kasan_rcu_reclaim\n"); - ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL); - if (!ptr) { - pr_err("Allocation failed\n"); - return; - } - - global_rcu_ptr = rcu_dereference_protected(ptr, NULL); - call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim); -} - static noinline void __init kasan_workqueue_work(struct work_struct *work) { kfree(work); @@ -130,7 +101,6 @@ static int __init test_kasan_module_init(void) bool multishot = kasan_save_enable_multi_shot(); copy_user_test(); - kasan_rcu_uaf(); kasan_workqueue_uaf(); kasan_restore_multi_shot(multishot); From b2c5bd4c69ce28500ed2176d11002a4e9b30da36 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 27 Sep 2022 19:09:11 +0200 Subject: [PATCH 1061/4122] kasan: migrate workqueue_uaf test to kunit Migrate the workqueue_uaf test to the KUnit framework. Initially, this test was intended to check that Generic KASAN prints auxiliary stack traces for workqueues. Nevertheless, the test is enabled for all modes to make that KASAN reports bad accesses in the tested scenario. The presence of auxiliary stack traces for the Generic mode needs to be inspected manually. Link: https://lkml.kernel.org/r/1d81b6cc2a58985126283d1e0de8e663716dd930.1664298455.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kasan/kasan_test.c | 40 +++++++++++++++++++++++++++++------- mm/kasan/kasan_test_module.c | 30 --------------------------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 38bf6ed61cb8..e27591ef2777 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1141,6 +1141,14 @@ static void kmalloc_double_kzfree(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr)); } +/* + * The two tests below check that Generic KASAN prints auxiliary stack traces + * for RCU callbacks and workqueues. The reports need to be inspected manually. + * + * These tests are still enabled for other KASAN modes to make sure that all + * modes report bad accesses in tested scenarios. + */ + static struct kasan_rcu_info { int i; struct rcu_head rcu; @@ -1155,13 +1163,6 @@ static void rcu_uaf_reclaim(struct rcu_head *rp) ((volatile struct kasan_rcu_info *)fp)->i; } -/* - * Check that Generic KASAN prints auxiliary stack traces for RCU callbacks. - * The report needs to be inspected manually. - * - * This test is still enabled for other KASAN modes to make sure that all modes - * report bad accesses in tested scenarios. - */ static void rcu_uaf(struct kunit *test) { struct kasan_rcu_info *ptr; @@ -1177,6 +1178,30 @@ static void rcu_uaf(struct kunit *test) rcu_barrier()); } +static void workqueue_uaf_work(struct work_struct *work) +{ + kfree(work); +} + +static void workqueue_uaf(struct kunit *test) +{ + struct workqueue_struct *workqueue; + struct work_struct *work; + + workqueue = create_workqueue("kasan_workqueue_test"); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, workqueue); + + work = kmalloc(sizeof(struct work_struct), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, work); + + INIT_WORK(work, workqueue_uaf_work); + queue_work(workqueue, work); + destroy_workqueue(workqueue); + + KUNIT_EXPECT_KASAN_FAIL(test, + ((volatile struct work_struct *)work)->data); +} + static void vmalloc_helpers_tags(struct kunit *test) { void *ptr; @@ -1509,6 +1534,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kasan_bitops_tags), KUNIT_CASE(kmalloc_double_kzfree), KUNIT_CASE(rcu_uaf), + KUNIT_CASE(workqueue_uaf), KUNIT_CASE(vmalloc_helpers_tags), KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmap_tags), diff --git a/mm/kasan/kasan_test_module.c b/mm/kasan/kasan_test_module.c index 4688cbcd722d..7be7bed456ef 100644 --- a/mm/kasan/kasan_test_module.c +++ b/mm/kasan/kasan_test_module.c @@ -62,35 +62,6 @@ static noinline void __init copy_user_test(void) kfree(kmem); } -static noinline void __init kasan_workqueue_work(struct work_struct *work) -{ - kfree(work); -} - -static noinline void __init kasan_workqueue_uaf(void) -{ - struct workqueue_struct *workqueue; - struct work_struct *work; - - workqueue = create_workqueue("kasan_wq_test"); - if (!workqueue) { - pr_err("Allocation failed\n"); - return; - } - work = kmalloc(sizeof(struct work_struct), GFP_KERNEL); - if (!work) { - pr_err("Allocation failed\n"); - return; - } - - INIT_WORK(work, kasan_workqueue_work); - queue_work(workqueue, work); - destroy_workqueue(workqueue); - - pr_info("use-after-free on workqueue\n"); - ((volatile struct work_struct *)work)->data; -} - static int __init test_kasan_module_init(void) { /* @@ -101,7 +72,6 @@ static int __init test_kasan_module_init(void) bool multishot = kasan_save_enable_multi_shot(); copy_user_test(); - kasan_workqueue_uaf(); kasan_restore_multi_shot(multishot); return -EAGAIN; From 69c66add566395eaf4c08cb5975b45dec70dbe85 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:14 +0200 Subject: [PATCH 1062/4122] selftests/vm: anon_cow: test COW handling of anonymous memory Patch series "selftests/vm: test COW handling of anonymous memory". This is my current set of tests for testing COW handling of anonymous memory, especially when interacting with GUP. I developed these tests while working on PageAnonExclusive and managed to clean them up just now. On current upstream Linux, all tests pass except the hugetlb tests that rely on vmsplice -- these tests should pass as soon as vmsplice properly uses FOLL_PIN instead of FOLL_GET. I'm working on additional tests for COW handling in private mappings, focusing on long-term R/O pinning e.g., of the shared zeropage, pagecache pages and KSM pages. These tests, however, will go into a different file. So this is everything I have regarding tests for anonymous memory. This patch (of 7): Let's start adding tests for our COW handling of anonymous memory. We'll focus on basic tests that we can achieve without additional libraries or gup_test extensions. We'll add THP and hugetlb tests separately. [david@redhat.com: s/size_t/ssize_t/ on `cur', `total', `transferred';] Link: https://lkml.kernel.org/r/51302b9e-dc69-d709-3214-f23868028555@redhat.com Link: https://lkml.kernel.org/r/20220927110120.106906-1-david@redhat.com Link: https://lkml.kernel.org/r/20220927110120.106906-2-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 4 +- tools/testing/selftests/vm/anon_cow.c | 401 ++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 3 + tools/testing/selftests/vm/vm_util.c | 7 + tools/testing/selftests/vm/vm_util.h | 1 + 6 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/vm/anon_cow.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 7b9dc2426f18..8a536c731e3c 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +anon_cow hugepage-mmap hugepage-mremap hugepage-shm diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 163c2fde3cb3..ad07d7a84c3e 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -25,7 +25,8 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_FILES = compaction_test +TEST_GEN_FILES = anon_cow +TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise @@ -95,6 +96,7 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk +$(OUTPUT)/anon_cow: vm_util.c $(OUTPUT)/khugepaged: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c new file mode 100644 index 000000000000..4613294af758 --- /dev/null +++ b/tools/testing/selftests/vm/anon_cow.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * COW (Copy On Write) tests for anonymous memory. + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" +#include "vm_util.h" + +static size_t pagesize; +static int pagemap_fd; + +struct comm_pipes { + int child_ready[2]; + int parent_ready[2]; +}; + +static int setup_comm_pipes(struct comm_pipes *comm_pipes) +{ + if (pipe(comm_pipes->child_ready) < 0) + return -errno; + if (pipe(comm_pipes->parent_ready) < 0) { + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + return -errno; + } + + return 0; +} + +static void close_comm_pipes(struct comm_pipes *comm_pipes) +{ + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + close(comm_pipes->parent_ready[0]); + close(comm_pipes->parent_ready[1]); +} + +static int child_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + char *old = malloc(size); + char buf; + + /* Backup the original content. */ + memcpy(old, mem, size); + + /* Wait until the parent modified the page. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values. */ + return memcmp(old, mem, size); +} + +static int child_vmsplice_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + char *old, *new; + int fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + /* Backup the original content. */ + memcpy(old, mem, size); + + if (pipe(fds) < 0) + return -errno; + + /* Trigger a read-only pin. */ + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred < 0) + return -errno; + if (transferred == 0) + return -EINVAL; + + /* Unmap it from our page tables. */ + if (munmap(mem, size) < 0) + return -errno; + + /* Wait until the parent modified it. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values via the pipe. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) + return -errno; + } + + return memcmp(old, new, transferred); +} + +typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); + +static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn) +{ + struct comm_pipes comm_pipes; + char buf; + int ret; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + exit(fn(mem, size, &comm_pipes)); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + /* Modify the page. */ + memset(mem, 0xff, size); + write(comm_pipes.parent_ready[1], "0", 1); + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + + ksft_test_result(!ret, "No leak from parent into child\n"); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_cow_in_parent(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, child_memcmp_fn); +} + +static void test_vmsplice_in_child(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, child_vmsplice_memcmp_fn); +} + +static void do_test_vmsplice_in_parent(char *mem, size_t size, + bool before_fork) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + struct comm_pipes comm_pipes; + char *old, *new; + int ret, fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + memcpy(old, mem, size); + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free; + } + + if (pipe(fds) < 0) { + ksft_test_result_fail("pipe() failed\n"); + goto close_comm_pipes; + } + + if (before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + goto close_pipe; + } + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_pipe; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + /* Modify page content in the child. */ + memset(mem, 0xff, size); + exit(0); + } + + if (!before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + wait(&ret); + goto close_pipe; + } + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + if (munmap(mem, size) < 0) { + ksft_test_result_fail("munmap() failed\n"); + goto close_pipe; + } + write(comm_pipes.parent_ready[1], "0", 1); + + /* Wait until the child is done writing. */ + wait(&ret); + if (!WIFEXITED(ret)) { + ksft_test_result_fail("wait() failed\n"); + goto close_pipe; + } + + /* See if we still read the old values. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) { + ksft_test_result_fail("read() failed\n"); + goto close_pipe; + } + } + + ksft_test_result(!memcmp(old, new, transferred), + "No leak from child into parent\n"); +close_pipe: + close(fds[0]); + close(fds[1]); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free: + free(old); + free(new); +} + +static void test_vmsplice_before_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, true); +} + +static void test_vmsplice_after_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, false); +} + +typedef void (*test_fn)(char *mem, size_t size); + +static void do_run_with_base_page(test_fn fn, bool swapout) +{ + char *mem; + int ret; + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); + /* Ignore if not around on a kernel. */ + if (ret && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto munmap; + } + + /* Populate a base page. */ + memset(mem, 0, pagesize); + + if (swapout) { + madvise(mem, pagesize, MADV_PAGEOUT); + if (!pagemap_is_swapped(pagemap_fd, mem)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + } + + fn(mem, pagesize); +munmap: + munmap(mem, pagesize); +} + +static void run_with_base_page(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with base page\n", desc); + do_run_with_base_page(fn, false); +} + +static void run_with_base_page_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + do_run_with_base_page(fn, true); +} + +struct test_case { + const char *desc; + test_fn fn; +}; + +static const struct test_case test_cases[] = { + /* + * Basic COW tests for fork() without any GUP. If we miss to break COW, + * either the child can observe modifications by the parent or the + * other way around. + */ + { + "Basic COW after fork()", + test_cow_in_parent, + }, + /* + * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If + * we miss to break COW, the child observes modifications by the parent. + * This is CVE-2020-29374 reported by Jann Horn. + */ + { + "vmsplice() + unmap in child", + test_vmsplice_in_child + }, + /* + * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after + * fork(); modify in the child. If we miss to break COW, the parent + * observes modifications by the child. + */ + { + "vmsplice() before fork(), unmap in parent after fork()", + test_vmsplice_before_fork, + }, + /* + * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the + * child. If we miss to break COW, the parent observes modifications by + * the child. + */ + { + "vmsplice() + unmap in parent after fork()", + test_vmsplice_after_fork, + }, +}; + +static void run_test_case(struct test_case const *test_case) +{ + run_with_base_page(test_case->fn, test_case->desc); + run_with_base_page_swap(test_case->fn, test_case->desc); +} + +static void run_test_cases(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) + run_test_case(&test_cases[i]); +} + +int main(int argc, char **argv) +{ + int nr_test_cases = ARRAY_SIZE(test_cases); + int err; + + pagesize = getpagesize(); + + ksft_print_header(); + ksft_set_plan(nr_test_cases * 2); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + run_test_cases(); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 0dc9f545a32d..1fa783732296 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -185,4 +185,7 @@ fi run_test ./soft-dirty +# COW tests for anonymous memory +run_test ./anon_cow + exit $exitcode diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index f11f8adda521..37dd230673ee 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -28,6 +28,13 @@ bool pagemap_is_softdirty(int fd, char *start) return entry & 0x0080000000000000ull; } +bool pagemap_is_swapped(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + return entry & 0x4000000000000000ull; +} + void clear_softdirty(void) { int ret; diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 5c35de454e08..833df81b2694 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -4,6 +4,7 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); +bool pagemap_is_swapped(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); From a905e82ae44b22a25ea73415a3ec1228775eb9a9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:15 +0200 Subject: [PATCH 1063/4122] selftests/vm: factor out pagemap_is_populated() into vm_util We'll reuse it in the anon_cow test next. Link: https://lkml.kernel.org/r/20220927110120.106906-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/madv_populate.c | 8 -------- tools/testing/selftests/vm/vm_util.c | 8 ++++++++ tools/testing/selftests/vm/vm_util.h | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 715a42e8e2cd..60547245e479 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -27,14 +27,6 @@ static size_t pagesize; -static bool pagemap_is_populated(int fd, char *start) -{ - uint64_t entry = pagemap_get_entry(fd, start); - - /* Present or swapped. */ - return entry & 0xc000000000000000ull; -} - static void sense_support(void) { char *addr; diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index 37dd230673ee..5bbf7641a0f0 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -35,6 +35,14 @@ bool pagemap_is_swapped(int fd, char *start) return entry & 0x4000000000000000ull; } +bool pagemap_is_populated(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* Present or swapped. */ + return entry & 0xc000000000000000ull; +} + void clear_softdirty(void) { int ret; diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 833df81b2694..80d5a6ad413b 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -5,6 +5,7 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); +bool pagemap_is_populated(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); From f4b5fd6946e244cdedc3bbb9a1f24c8133b2077a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:16 +0200 Subject: [PATCH 1064/4122] selftests/vm: anon_cow: THP tests Let's add various THP variants that we'll run with our existing test cases. Link: https://lkml.kernel.org/r/20220927110120.106906-4-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/anon_cow.c | 259 +++++++++++++++++++++++++- 1 file changed, 258 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c index 4613294af758..c1681c9d255f 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/anon_cow.c @@ -24,6 +24,43 @@ static size_t pagesize; static int pagemap_fd; +static size_t thpsize; + +static void detect_thpsize(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + size_t size = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + size = strtoul(buf, NULL, 10); + if (size < pagesize) + size = 0; + if (size > 0) { + thpsize = size; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", + thpsize / 1024); + } + } + + close(fd); +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} struct comm_pipes { int child_ready[2]; @@ -319,6 +356,206 @@ static void run_with_base_page_swap(test_fn fn, const char *desc) do_run_with_base_page(fn, true); } +enum thp_run { + THP_RUN_PMD, + THP_RUN_PMD_SWAPOUT, + THP_RUN_PTE, + THP_RUN_PTE_SWAPOUT, + THP_RUN_SINGLE_PTE, + THP_RUN_SINGLE_PTE_SWAPOUT, + THP_RUN_PARTIAL_MREMAP, + THP_RUN_PARTIAL_SHARED, +}; + +static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +{ + char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; + size_t size, mmap_size, mremap_size; + int ret; + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Try to populate a THP. Touch the first sub-page and test if we get + * another sub-page populated automatically. + */ + mem[0] = 0; + if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + ksft_test_result_skip("Did not get a THP populated\n"); + goto munmap; + } + memset(mem, 0, thpsize); + + size = thpsize; + switch (thp_run) { + case THP_RUN_PMD: + case THP_RUN_PMD_SWAPOUT: + break; + case THP_RUN_PTE: + case THP_RUN_PTE_SWAPOUT: + /* + * Trigger PTE-mapping the THP by temporarily mapping a single + * subpage R/O. + */ + ret = mprotect(mem + pagesize, pagesize, PROT_READ); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + break; + case THP_RUN_SINGLE_PTE: + case THP_RUN_SINGLE_PTE_SWAPOUT: + /* + * Discard all but a single subpage of that PTE-mapped THP. What + * remains is a single PTE mapping a single subpage. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); + if (ret) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto munmap; + } + size = pagesize; + break; + case THP_RUN_PARTIAL_MREMAP: + /* + * Remap half of the THP. We need some new memory location + * for that. + */ + mremap_size = thpsize / 2; + mremap_mem = mmap(NULL, mremap_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + tmp = mremap(mem + mremap_size, mremap_size, mremap_size, + MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); + if (tmp != mremap_mem) { + ksft_test_result_fail("mremap() failed\n"); + goto munmap; + } + size = mremap_size; + break; + case THP_RUN_PARTIAL_SHARED: + /* + * Share the first page of the THP with a child and quit the + * child. This will result in some parts of the THP never + * have been shared. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); + if (ret) { + ksft_test_result_fail("MADV_DONTFORK failed\n"); + goto munmap; + } + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto munmap; + } else if (!ret) { + exit(0); + } + wait(&ret); + /* Allow for sharing all pages again. */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); + if (ret) { + ksft_test_result_fail("MADV_DOFORK failed\n"); + goto munmap; + } + break; + default: + assert(false); + } + + switch (thp_run) { + case THP_RUN_PMD_SWAPOUT: + case THP_RUN_PTE_SWAPOUT: + case THP_RUN_SINGLE_PTE_SWAPOUT: + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + break; + default: + break; + } + + fn(mem, size); +munmap: + munmap(mmap_mem, mmap_size); + if (mremap_mem != MAP_FAILED) + munmap(mremap_mem, mremap_size); +} + +static void run_with_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD); +} + +static void run_with_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); +} + +static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE); +} + +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); +} + +static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE); +} + +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); +} + +static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); +} + +static void run_with_partial_shared_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); +} + struct test_case { const char *desc; test_fn fn; @@ -367,6 +604,16 @@ static void run_test_case(struct test_case const *test_case) { run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); + if (thpsize) { + run_with_thp(test_case->fn, test_case->desc); + run_with_thp_swap(test_case->fn, test_case->desc); + run_with_pte_mapped_thp(test_case->fn, test_case->desc); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); + run_with_single_pte_of_thp(test_case->fn, test_case->desc); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); + run_with_partial_mremap_thp(test_case->fn, test_case->desc); + run_with_partial_shared_thp(test_case->fn, test_case->desc); + } } static void run_test_cases(void) @@ -377,15 +624,25 @@ static void run_test_cases(void) run_test_case(&test_cases[i]); } +static int tests_per_test_case(void) +{ + int tests = 2; + + if (thpsize) + tests += 8; + return tests; +} + int main(int argc, char **argv) { int nr_test_cases = ARRAY_SIZE(test_cases); int err; pagesize = getpagesize(); + detect_thpsize(); ksft_print_header(); - ksft_set_plan(nr_test_cases * 2); + ksft_set_plan(nr_test_cases * tests_per_test_case()); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) From 7dad331be7816103eba8c12caeb88fbd3599c0b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:17 +0200 Subject: [PATCH 1065/4122] selftests/vm: anon_cow: hugetlb tests Let's run all existing test cases with all hugetlb sizes we're able to detect. Note that some tests cases still fail. This will, for example, be fixed once vmsplice properly uses FOLL_PIN instead of FOLL_GET for pinning. With 2 MiB and 1 GiB hugetlb on x86_64, the expected failures are: # [RUN] vmsplice() + unmap in child ... with hugetlb (2048 kB) not ok 23 No leak from parent into child # [RUN] vmsplice() + unmap in child ... with hugetlb (1048576 kB) not ok 24 No leak from parent into child # [RUN] vmsplice() before fork(), unmap in parent after fork() ... with hugetlb (2048 kB) not ok 35 No leak from child into parent # [RUN] vmsplice() before fork(), unmap in parent after fork() ... with hugetlb (1048576 kB) not ok 36 No leak from child into parent # [RUN] vmsplice() + unmap in parent after fork() ... with hugetlb (2048 kB) not ok 47 No leak from child into parent # [RUN] vmsplice() + unmap in parent after fork() ... with hugetlb (1048576 kB) not ok 48 No leak from child into parent Link: https://lkml.kernel.org/r/20220927110120.106906-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/anon_cow.c | 70 ++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c index c1681c9d255f..78dfdd983380 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/anon_cow.c @@ -25,6 +25,8 @@ static size_t pagesize; static int pagemap_fd; static size_t thpsize; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; static void detect_thpsize(void) { @@ -54,6 +56,31 @@ static void detect_thpsize(void) close(fd); } +static void detect_hugetlbsizes(void) +{ + DIR *dir = opendir("/sys/kernel/mm/hugepages/"); + + if (!dir) + return; + + while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { + struct dirent *entry = readdir(dir); + size_t kb; + + if (!entry) + break; + if (entry->d_type != DT_DIR) + continue; + if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) + continue; + hugetlbsizes[nr_hugetlbsizes] = kb * 1024; + nr_hugetlbsizes++; + ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", + kb); + } + closedir(dir); +} + static bool range_is_swapped(void *addr, size_t size) { for (; size; addr += pagesize, size -= pagesize) @@ -556,6 +583,41 @@ static void run_with_partial_shared_thp(test_fn fn, const char *desc) do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); } +static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + char *mem, *dummy; + + ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; + + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + return; + } + + /* Populate an huge page. */ + memset(mem, 0, hugetlbsize); + + /* + * We need a total of two hugetlb pages to handle COW/unsharing + * properly, otherwise we might get zapped by a SIGBUS. + */ + dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (dummy == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto munmap; + } + munmap(dummy, hugetlbsize); + + fn(mem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); +} + struct test_case { const char *desc; test_fn fn; @@ -602,6 +664,8 @@ static const struct test_case test_cases[] = { static void run_test_case(struct test_case const *test_case) { + int i; + run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); if (thpsize) { @@ -614,6 +678,9 @@ static void run_test_case(struct test_case const *test_case) run_with_partial_mremap_thp(test_case->fn, test_case->desc); run_with_partial_shared_thp(test_case->fn, test_case->desc); } + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); } static void run_test_cases(void) @@ -626,7 +693,7 @@ static void run_test_cases(void) static int tests_per_test_case(void) { - int tests = 2; + int tests = 2 + nr_hugetlbsizes; if (thpsize) tests += 8; @@ -640,6 +707,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); detect_thpsize(); + detect_hugetlbsizes(); ksft_print_header(); ksft_set_plan(nr_test_cases * tests_per_test_case()); From e487ebbd12986facc7f77129d3ca80de84841170 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:18 +0200 Subject: [PATCH 1066/4122] selftests/vm: anon_cow: add liburing test cases io_uring provides a simple mechanism to test long-term, R/W GUP pins -- via fixed buffers -- and can be used to verify that GUP pins stay in sync with the pages in the page table even if a page would temporarily get mapped R/O or concurrent fork() could accidentially end up sharing pinned pages with the child. Note that this essentially re-introduces local_config support that was removed recently in commit 6f83d6c74ea5 ("Kselftests: remove support of libhugetlbfs from kselftests"). [david@redhat.com: s/size_t/ssize_t/ on `cur', `total'.] Link: https://lkml.kernel.org/r/445fe1ae-9e22-0d1d-4d09-272231d2f84a@redhat.com Link: https://lkml.kernel.org/r/20220927110120.106906-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 21 ++- tools/testing/selftests/vm/anon_cow.c | 190 +++++++++++++++++++++ tools/testing/selftests/vm/check_config.sh | 31 ++++ 3 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/vm/check_config.sh diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index ad07d7a84c3e..00920cb8b499 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests -LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + +include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') @@ -152,8 +154,25 @@ warn_32bit_failure: endif endif +# ANON_COW_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. +$(OUTPUT)/anon_cow: LDLIBS += $(ANON_COW_EXTRA_LIBS) + $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma + +local_config.mk local_config.h: check_config.sh + /bin/sh ./check_config.sh $(CC) + +EXTRA_CLEAN += local_config.mk local_config.h + +ifeq ($(ANON_COW_EXTRA_LIBS),) +all: warn_missing_liburing + +warn_missing_liburing: + @echo ; \ + echo "Warning: missing liburing support. Some COW tests will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c index 78dfdd983380..01417a604eda 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/anon_cow.c @@ -19,6 +19,11 @@ #include #include +#include "local_config.h" +#ifdef LOCAL_CONFIG_HAVE_LIBURING +#include +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + #include "../kselftest.h" #include "vm_util.h" @@ -334,6 +339,170 @@ static void test_vmsplice_after_fork(char *mem, size_t size) do_test_vmsplice_in_parent(mem, size, false); } +#ifdef LOCAL_CONFIG_HAVE_LIBURING +static void do_test_iouring(char *mem, size_t size, bool use_fork) +{ + struct comm_pipes comm_pipes; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + ssize_t cur, total; + struct iovec iov; + char *buf, *tmp; + int ret, fd; + FILE *file; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + goto close_comm_pipes; + } + fd = fileno(file); + assert(fd); + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + goto close_file; + } + + /* Skip on errors, as we might just lack kernel support. */ + ret = io_uring_queue_init(1, &ring, 0); + if (ret < 0) { + ksft_test_result_skip("io_uring_queue_init() failed\n"); + goto free_tmp; + } + + /* + * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN + * | FOLL_LONGTERM the range. + * + * Skip on errors, as we might just lack kernel support or might not + * have sufficient MEMLOCK permissions. + */ + iov.iov_base = mem; + iov.iov_len = size; + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) { + ksft_test_result_skip("io_uring_register_buffers() failed\n"); + goto queue_exit; + } + + if (use_fork) { + /* + * fork() and keep the child alive until we're done. Note that + * we expect the pinned page to not get shared with the child. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unregister_buffers; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + } else { + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto unregister_buffers; + } + } + + /* + * Modify the page and write page content as observed by the fixed + * buffer pin to the file so we can verify it. + */ + memset(mem, 0xff, size); + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + ksft_test_result_fail("io_uring_get_sqe() failed\n"); + goto quit_child; + } + io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); + + ret = io_uring_submit(&ring); + if (ret < 0) { + ksft_test_result_fail("io_uring_submit() failed\n"); + goto quit_child; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + goto quit_child; + } + + if (cqe->res != size) { + ksft_test_result_fail("write_fixed failed\n"); + goto quit_child; + } + io_uring_cqe_seen(&ring, cqe); + + /* Read back the file content to the temporary buffer. */ + total = 0; + while (total < size) { + cur = pread(fd, tmp + total, size - total, total); + if (cur < 0) { + ksft_test_result_fail("pread() failed\n"); + goto quit_child; + } + total += cur; + } + + /* Finally, check if we read what we expected. */ + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/W pin is reliable\n"); + +quit_child: + if (use_fork) { + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + } +unregister_buffers: + io_uring_unregister_buffers(&ring); +queue_exit: + io_uring_queue_exit(&ring); +free_tmp: + free(tmp); +close_file: + fclose(file); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_iouring_ro(char *mem, size_t size) +{ + do_test_iouring(mem, size, false); +} + +static void test_iouring_fork(char *mem, size_t size) +{ + do_test_iouring(mem, size, true); +} + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + typedef void (*test_fn)(char *mem, size_t size); static void do_run_with_base_page(test_fn fn, bool swapout) @@ -660,6 +829,27 @@ static const struct test_case test_cases[] = { "vmsplice() + unmap in parent after fork()", test_vmsplice_after_fork, }, +#ifdef LOCAL_CONFIG_HAVE_LIBURING + /* + * Take a R/W longterm pin and then map the page R/O into the page + * table to trigger a write fault on next access. When modifying the + * page, the page content must be visible via the pin. + */ + { + "R/O-mapping a page registered as iouring fixed buffer", + test_iouring_ro, + }, + /* + * Take a R/W longterm pin and then fork() a child. When modifying the + * page, the page content must be visible via the pin. We expect the + * pinned page to not get shared with the child. + */ + { + "fork() with an iouring fixed buffer", + test_iouring_fork, + }, + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ }; static void run_test_case(struct test_case const *test_case) diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh new file mode 100644 index 000000000000..9a44c6520925 --- /dev/null +++ b/tools/testing/selftests/vm/check_config.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Probe for libraries and create header files to record the results. Both C +# header files and Makefile include fragments are created. + +OUTPUT_H_FILE=local_config.h +OUTPUT_MKFILE=local_config.mk + +tmpname=$(mktemp) +tmpfile_c=${tmpname}.c +tmpfile_o=${tmpname}.o + +# liburing +echo "#include " > $tmpfile_c +echo "#include " >> $tmpfile_c +echo "int func(void) { return 0; }" >> $tmpfile_c + +CC=${1:?"Usage: $0 # example compiler: gcc"} +$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 + +if [ -f $tmpfile_o ]; then + echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE + echo "ANON_COW_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE +else + echo "// No liburing support found" > $OUTPUT_H_FILE + echo "# No liburing support found, so:" > $OUTPUT_MKFILE + echo "ANON_COW_EXTRA_LIBS = " >> $OUTPUT_MKFILE +fi + +rm ${tmpname}.* From c77369b437f983a862bb6741814670d4ad38478c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:19 +0200 Subject: [PATCH 1067/4122] mm/gup_test: start/stop/read functionality for PIN LONGTERM test We want an easy way to take a R/O or R/W longterm pin on a range and be able to observe the content of the pinned pages, so we can properly test how longterm puns interact with our COW logic. [david@redhat.com: silence a warning on 32-bit] Link: https://lkml.kernel.org/r/74adbb51-6e33-f636-8a9c-2ad87bd9007e@redhat.com [yang.lee@linux.alibaba.com: ./mm/gup_test.c:281:2-3: Unneeded semicolon] Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2455 Link: https://lkml.kernel.org/r/20221020024035.113619-1-yang.lee@linux.alibaba.com Link: https://lkml.kernel.org/r/20220927110120.106906-7-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Yang Li Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/gup_test.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/gup_test.h | 12 +++++ 2 files changed, 153 insertions(+) diff --git a/mm/gup_test.c b/mm/gup_test.c index 12b0a91767d3..0d76d9b4bb5a 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -203,6 +203,135 @@ free_pages: return ret; } +static DEFINE_MUTEX(pin_longterm_test_mutex); +static struct page **pin_longterm_test_pages; +static unsigned long pin_longterm_test_nr_pages; + +static inline void pin_longterm_test_stop(void) +{ + if (pin_longterm_test_pages) { + if (pin_longterm_test_nr_pages) + unpin_user_pages(pin_longterm_test_pages, + pin_longterm_test_nr_pages); + kfree(pin_longterm_test_pages); + pin_longterm_test_pages = NULL; + pin_longterm_test_nr_pages = 0; + } +} + +static inline int pin_longterm_test_start(unsigned long arg) +{ + long nr_pages, cur_pages, addr, remaining_pages; + int gup_flags = FOLL_LONGTERM; + struct pin_longterm_test args; + struct page **pages; + int ret = 0; + bool fast; + + if (pin_longterm_test_pages) + return -EINVAL; + + if (copy_from_user(&args, (void __user *)arg, sizeof(args))) + return -EFAULT; + + if (args.flags & + ~(PIN_LONGTERM_TEST_FLAG_USE_WRITE|PIN_LONGTERM_TEST_FLAG_USE_FAST)) + return -EINVAL; + if (!IS_ALIGNED(args.addr | args.size, PAGE_SIZE)) + return -EINVAL; + if (args.size > LONG_MAX) + return -EINVAL; + nr_pages = args.size / PAGE_SIZE; + if (!nr_pages) + return -EINVAL; + + pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (args.flags & PIN_LONGTERM_TEST_FLAG_USE_WRITE) + gup_flags |= FOLL_WRITE; + fast = !!(args.flags & PIN_LONGTERM_TEST_FLAG_USE_FAST); + + if (!fast && mmap_read_lock_killable(current->mm)) { + kfree(pages); + return -EINTR; + } + + pin_longterm_test_pages = pages; + pin_longterm_test_nr_pages = 0; + + while (nr_pages - pin_longterm_test_nr_pages) { + remaining_pages = nr_pages - pin_longterm_test_nr_pages; + addr = args.addr + pin_longterm_test_nr_pages * PAGE_SIZE; + + if (fast) + cur_pages = pin_user_pages_fast(addr, remaining_pages, + gup_flags, pages); + else + cur_pages = pin_user_pages(addr, remaining_pages, + gup_flags, pages, NULL); + if (cur_pages < 0) { + pin_longterm_test_stop(); + ret = cur_pages; + break; + } + pin_longterm_test_nr_pages += cur_pages; + pages += cur_pages; + } + + if (!fast) + mmap_read_unlock(current->mm); + return ret; +} + +static inline int pin_longterm_test_read(unsigned long arg) +{ + __u64 user_addr; + unsigned long i; + + if (!pin_longterm_test_pages) + return -EINVAL; + + if (copy_from_user(&user_addr, (void __user *)arg, sizeof(user_addr))) + return -EFAULT; + + for (i = 0; i < pin_longterm_test_nr_pages; i++) { + void *addr = page_to_virt(pin_longterm_test_pages[i]); + + if (copy_to_user((void __user *)(unsigned long)user_addr, addr, + PAGE_SIZE)) + return -EFAULT; + user_addr += PAGE_SIZE; + } + return 0; +} + +static long pin_longterm_test_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + + if (mutex_lock_killable(&pin_longterm_test_mutex)) + return -EINTR; + + switch (cmd) { + case PIN_LONGTERM_TEST_START: + ret = pin_longterm_test_start(arg); + break; + case PIN_LONGTERM_TEST_STOP: + pin_longterm_test_stop(); + ret = 0; + break; + case PIN_LONGTERM_TEST_READ: + ret = pin_longterm_test_read(arg); + break; + } + + mutex_unlock(&pin_longterm_test_mutex); + return ret; +} + static long gup_test_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { @@ -217,6 +346,10 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd, case PIN_BASIC_TEST: case DUMP_USER_PAGES_TEST: break; + case PIN_LONGTERM_TEST_START: + case PIN_LONGTERM_TEST_STOP: + case PIN_LONGTERM_TEST_READ: + return pin_longterm_test_ioctl(filep, cmd, arg); default: return -EINVAL; } @@ -234,9 +367,17 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd, return 0; } +static int gup_test_release(struct inode *inode, struct file *file) +{ + pin_longterm_test_stop(); + + return 0; +} + static const struct file_operations gup_test_fops = { .open = nonseekable_open, .unlocked_ioctl = gup_test_ioctl, + .release = gup_test_release, }; static int __init gup_test_init(void) diff --git a/mm/gup_test.h b/mm/gup_test.h index 887ac1d5f5bc..5b37b54e8bea 100644 --- a/mm/gup_test.h +++ b/mm/gup_test.h @@ -10,6 +10,9 @@ #define GUP_BASIC_TEST _IOWR('g', 4, struct gup_test) #define PIN_BASIC_TEST _IOWR('g', 5, struct gup_test) #define DUMP_USER_PAGES_TEST _IOWR('g', 6, struct gup_test) +#define PIN_LONGTERM_TEST_START _IOW('g', 7, struct pin_longterm_test) +#define PIN_LONGTERM_TEST_STOP _IO('g', 8) +#define PIN_LONGTERM_TEST_READ _IOW('g', 9, __u64) #define GUP_TEST_MAX_PAGES_TO_DUMP 8 @@ -30,4 +33,13 @@ struct gup_test { __u32 which_pages[GUP_TEST_MAX_PAGES_TO_DUMP]; }; +#define PIN_LONGTERM_TEST_FLAG_USE_WRITE 1 +#define PIN_LONGTERM_TEST_FLAG_USE_FAST 2 + +struct pin_longterm_test { + __u64 addr; + __u64 size; + __u32 flags; +}; + #endif /* __GUP_TEST_H */ From 6f1405efc61b6e686bdf6e2e09b41d2d3a5c14e2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 27 Sep 2022 13:01:20 +0200 Subject: [PATCH 1068/4122] selftests/vm: anon_cow: add R/O longterm tests via gup_test Let's trigger a R/O longterm pin on three cases of R/O mapped anonymous pages: * exclusive (never shared) * shared (child still alive) * previously shared (child no longer alive) ... and make sure that the pin is reliable: whatever we write via the page tables has to be observable via the pin. Link: https://lkml.kernel.org/r/20220927110120.106906-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Christoph von Recklinghausen Cc: Don Dutile Cc: Jason Gunthorpe Cc: John Hubbard Cc: Mike Rapoport Cc: Nadav Amit Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/anon_cow.c | 210 ++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c index 01417a604eda..705bd0b3db11 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/anon_cow.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "local_config.h" @@ -24,6 +25,7 @@ #include #endif /* LOCAL_CONFIG_HAVE_LIBURING */ +#include "../../../../mm/gup_test.h" #include "../kselftest.h" #include "vm_util.h" @@ -32,6 +34,7 @@ static int pagemap_fd; static size_t thpsize; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; +static int gup_fd; static void detect_thpsize(void) { @@ -503,6 +506,170 @@ static void test_iouring_fork(char *mem, size_t size) #endif /* LOCAL_CONFIG_HAVE_LIBURING */ +enum ro_pin_test { + RO_PIN_TEST_SHARED, + RO_PIN_TEST_PREVIOUSLY_SHARED, + RO_PIN_TEST_RO_EXCLUSIVE, +}; + +static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, + bool fast) +{ + struct pin_longterm_test args; + struct comm_pipes comm_pipes; + char *tmp, buf; + __u64 tmp_val; + int ret; + + if (gup_fd < 0) { + ksft_test_result_skip("gup_test not available\n"); + return; + } + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + return; + } + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free_tmp; + } + + switch (test) { + case RO_PIN_TEST_SHARED: + case RO_PIN_TEST_PREVIOUSLY_SHARED: + /* + * Share the pages with our child. As the pages are not pinned, + * this should just work. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + /* Wait until our child is ready. */ + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + + if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { + /* + * Tell the child to quit now and wait until it quit. + * The pages should now be mapped R/O into our page + * tables, but they are no longer shared. + */ + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + } + break; + case RO_PIN_TEST_RO_EXCLUSIVE: + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto close_comm_pipes; + } + break; + default: + assert(false); + } + + /* Take a R/O pin. This should trigger unsharing. */ + args.addr = (__u64)mem; + args.size = size; + args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); + if (ret) { + if (errno == EINVAL) + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + else + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + goto wait; + } + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* + * Read back the content via the pin to the temporary buffer and + * test if we observed the modification. + */ + tmp_val = (__u64)tmp; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); + if (ret) + ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); + else + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/O pin is reliable\n"); + + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); + if (ret) + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); +wait: + switch (test) { + case RO_PIN_TEST_SHARED: + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + break; + default: + break; + } +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free_tmp: + free(tmp); +} + +static void test_ro_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); +} + +static void test_ro_fast_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); +} + +static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); +} + +static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); +} + +static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); +} + +static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); +} + typedef void (*test_fn)(char *mem, size_t size); static void do_run_with_base_page(test_fn fn, bool swapout) @@ -850,6 +1017,48 @@ static const struct test_case test_cases[] = { }, #endif /* LOCAL_CONFIG_HAVE_LIBURING */ + /* + * Take a R/O longterm pin on a R/O-mapped shared anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped shared page", + test_ro_pin_on_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped shared page", + test_ro_fast_pin_on_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that + * was previously shared. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped previously-shared page", + test_ro_pin_on_ro_previously_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped previously-shared page", + test_ro_fast_pin_on_ro_previously_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped exclusive page", + test_ro_pin_on_ro_exclusive, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped exclusive page", + test_ro_fast_pin_on_ro_exclusive, + }, }; static void run_test_case(struct test_case const *test_case) @@ -902,6 +1111,7 @@ int main(int argc, char **argv) ksft_print_header(); ksft_set_plan(nr_test_cases * tests_per_test_case()); + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_fail_msg("opening pagemap failed\n"); From f3ad032c2d06be970d78384885c63917974a4af4 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 27 Sep 2022 14:38:26 +0800 Subject: [PATCH 1069/4122] mm: rmap: rename page_not_mapped() to folio_not_mapped() Since commit 2f031c6f042c ("mm/rmap: Convert rmap_walk() to take a folio"), page_not_mapped() takes folio as parameter, rename it to be consistent. Link: https://lkml.kernel.org/r/20220927063826.159590-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: David Hildenbrand Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/rmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 92ed6fe3d038..9bba65b30e4d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1801,7 +1801,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) return vma_is_temporary_stack(vma); } -static int page_not_mapped(struct folio *folio) +static int folio_not_mapped(struct folio *folio) { return !folio_mapped(folio); } @@ -1822,7 +1822,7 @@ void try_to_unmap(struct folio *folio, enum ttu_flags flags) struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)flags, - .done = page_not_mapped, + .done = folio_not_mapped, .anon_lock = folio_lock_anon_vma_read, }; @@ -2150,7 +2150,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags) struct rmap_walk_control rwc = { .rmap_one = try_to_migrate_one, .arg = (void *)flags, - .done = page_not_mapped, + .done = folio_not_mapped, .anon_lock = folio_lock_anon_vma_read, }; @@ -2297,7 +2297,7 @@ static bool folio_make_device_exclusive(struct folio *folio, }; struct rmap_walk_control rwc = { .rmap_one = page_make_device_exclusive_one, - .done = page_not_mapped, + .done = folio_not_mapped, .anon_lock = folio_lock_anon_vma_read, .arg = &args, }; From f9e60beceee5c85dc9d5e71c1090cfed97ab0897 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:40 +0800 Subject: [PATCH 1070/4122] cgroup/cpuset: use hotplug_memory_notifier() directly Patch series "mm: Use hotplug_memory_notifier() instead of register_hotmemory_notifier()", v4. Commit f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") introduced register_hotmemory_notifier() to avoid a compile problem with gcc-4.4.4: When CONFIG_MEMORY_HOTPLUG=n, we don't want the memory-hotplug notifier handlers to be included in the .o files, for space reasons. The existing hotplug_memory_notifier() tries to handle this but testing with gcc-4.4.4 shows that it doesn't work - the hotplug functions are still present in the .o files. Since commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") has already updated the minimum gcc version to 5.1. The previous problem mentioned in f02c69680088 does not exist. So we can now revert to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). In the last patch, we move all hotplug memory notifier priority to same file for easy sorting. This patch (of 8): Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-1-liushixin2@huawei.com Link: https://lkml.kernel.org/r/20220923033347.3935160-2-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- kernel/cgroup/cpuset.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index b474289c15b8..0c6db6a4f427 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3630,11 +3630,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block cpuset_track_online_nodes_nb = { - .notifier_call = cpuset_track_online_nodes, - .priority = 10, /* ??! */ -}; - /** * cpuset_init_smp - initialize cpus_allowed * @@ -3652,7 +3647,7 @@ void __init cpuset_init_smp(void) cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); top_cpuset.effective_mems = node_states[N_MEMORY]; - register_hotmemory_notifier(&cpuset_track_online_nodes_nb); + hotplug_memory_notifier(cpuset_track_online_nodes, 10); cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); BUG_ON(!cpuset_migrate_mm_wq); From 5d89c224328bce791d051bf60aa92d90bae93c01 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:41 +0800 Subject: [PATCH 1071/4122] fs/proc/kcore.c: use hotplug_memory_notifier() directly Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-3-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- fs/proc/kcore.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index dff921f7ca33..7692a360972d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -638,10 +637,6 @@ static int __meminit kcore_callback(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block kcore_callback_nb __meminitdata = { - .notifier_call = kcore_callback, - .priority = 0, -}; static struct kcore_list kcore_vmalloc; @@ -694,7 +689,7 @@ static int __init proc_kcore_init(void) add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); - register_hotmemory_notifier(&kcore_callback_nb); + hotplug_memory_notifier(kcore_callback, 0); return 0; } From 946d5f9c9dcdbaedcd664fad08cea7910139d10f Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:42 +0800 Subject: [PATCH 1072/4122] mm/slub.c: use hotplug_memory_notifier() directly Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-4-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- mm/slub.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 157527d7101b..f37e6a51e233 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4771,11 +4771,6 @@ static int slab_memory_callback(struct notifier_block *self, return ret; } -static struct notifier_block slab_memory_callback_nb = { - .notifier_call = slab_memory_callback, - .priority = SLAB_CALLBACK_PRI, -}; - /******************************************************************** * Basic setup of slabs *******************************************************************/ @@ -4841,7 +4836,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); - register_hotmemory_notifier(&slab_memory_callback_nb); + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); /* Able to allocate the per node structures */ slab_state = PARTIAL; From cddb8d09ff1e477de8236a061a5017b21bab3c14 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:43 +0800 Subject: [PATCH 1073/4122] mm/mmap: use hotplug_memory_notifier() directly Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-5-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- mm/mmap.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 2def55555e05..3f47fd57d165 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3749,13 +3749,9 @@ static int reserve_mem_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static struct notifier_block reserve_mem_nb = { - .notifier_call = reserve_mem_notifier, -}; - static int __meminit init_reserve_notifier(void) { - if (register_hotmemory_notifier(&reserve_mem_nb)) + if (hotplug_memory_notifier(reserve_mem_notifier, 0)) pr_err("Failed registering memory add/remove notifier for admin reserve\n"); return 0; From d46722ef1c090541d56f706f3a90f3f2e84cdf0c Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:44 +0800 Subject: [PATCH 1074/4122] mm/mm_init.c: use hotplug_memory_notifier() directly Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-6-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- mm/mm_init.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 0d7b2bd2454a..44aadc162d1f 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -178,16 +178,10 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block compute_batch_nb __meminitdata = { - .notifier_call = mm_compute_batch_notifier, - .priority = IPC_CALLBACK_PRI, /* use lowest priority */ -}; - static int __init mm_compute_batch_init(void) { mm_compute_batch(sysctl_overcommit_memory); - register_hotmemory_notifier(&compute_batch_nb); - + hotplug_memory_notifier(mm_compute_batch_notifier, IPC_CALLBACK_PRI); return 0; } From 82f8661a7982efea3e4437777e9f914781fac640 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:45 +0800 Subject: [PATCH 1075/4122] ACPI: HMAT: use hotplug_memory_notifier() directly Commit 76ae847497bc52 ("Documentation: raise minimum supported version of GCC to 5.1") updated the minimum gcc version to 5.1. So the problem mentioned in f02c69680088 ("include/linux/memory.h: implement register_hotmemory_notifier()") no longer exist. So we can now switch to use hotplug_memory_notifier() directly rather than register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-7-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- drivers/acpi/numa/hmat.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..0ecefb604734 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -767,11 +767,6 @@ static int hmat_callback(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block hmat_callback_nb = { - .notifier_call = hmat_callback, - .priority = 2, -}; - static __init void hmat_free_structures(void) { struct memory_target *target, *tnext; @@ -854,7 +849,7 @@ static __init int hmat_init(void) hmat_register_targets(); /* Keep the table and structures if the notifier may use them */ - if (!register_hotmemory_notifier(&hmat_callback_nb)) + if (!hotplug_memory_notifier(hmat_callback, 2)) return 0; out_put: hmat_free_structures(); From eafd296e0cc0cc03b4ae01c2b3b07273514d757c Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:46 +0800 Subject: [PATCH 1076/4122] memory: remove unused register_hotmemory_notifier() Remove unused register_hotmemory_notifier(). Link: https://lkml.kernel.org/r/20220923033347.3935160-8-liushixin2@huawei.com Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Cc: Christoph Lameter Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- include/linux/memory.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/memory.h b/include/linux/memory.h index aa619464a1df..98d2a2ebcc10 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -19,7 +19,6 @@ #include #include #include -#include #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) @@ -136,9 +135,6 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) { return 0; } -/* These aren't inline functions due to a GCC bug. */ -#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) -#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) #else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); @@ -166,8 +162,6 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, { .notifier_call = fn, .priority = pri };\ register_memory_notifier(&fn##_mem_nb); \ }) -#define register_hotmemory_notifier(nb) register_memory_notifier(nb) -#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #ifdef CONFIG_NUMA void memory_block_add_nid(struct memory_block *mem, int nid, From 1eeaa4fd39b0b1b3e986f8eab6978e69b01e3c5e Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 23 Sep 2022 11:33:47 +0800 Subject: [PATCH 1077/4122] memory: move hotplug memory notifier priority to same file for easy sorting The priority of hotplug memory callback is defined in a different file. And there are some callers using numbers directly. Collect them together into include/linux/memory.h for easy reading. This allows us to sort their priorities more intuitively without additional comments. Link: https://lkml.kernel.org/r/20220923033347.3935160-9-liushixin2@huawei.com Signed-off-by: Liu Shixin Cc: Christoph Lameter Cc: David Hildenbrand Cc: Kefeng Wang Cc: Waiman Long Cc: zefan li Signed-off-by: Andrew Morton --- drivers/acpi/numa/hmat.c | 2 +- fs/proc/kcore.c | 2 +- include/linux/memory-tiers.h | 1 - include/linux/memory.h | 9 +++++++-- kernel/cgroup/cpuset.c | 2 +- mm/kasan/shadow.c | 2 +- mm/ksm.c | 2 +- mm/memory-tiers.c | 2 +- mm/mm_init.c | 2 +- mm/mmap.c | 2 +- mm/page_ext.c | 2 +- 11 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 0ecefb604734..139e3b41653e 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -849,7 +849,7 @@ static __init int hmat_init(void) hmat_register_targets(); /* Keep the table and structures if the notifier may use them */ - if (!hotplug_memory_notifier(hmat_callback, 2)) + if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI)) return 0; out_put: hmat_free_structures(); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 7692a360972d..98f3289556e4 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -689,7 +689,7 @@ static int __init proc_kcore_init(void) add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); - hotplug_memory_notifier(kcore_callback, 0); + hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI); return 0; } diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 965009aa01d7..fc9647b1b4f9 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -18,7 +18,6 @@ * the same memory tier. */ #define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) -#define MEMTIER_HOTPLUG_PRIO 100 struct memory_tier; struct memory_dev_type { diff --git a/include/linux/memory.h b/include/linux/memory.h index 98d2a2ebcc10..463662ef7614 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -112,8 +112,13 @@ struct mem_section; * Priorities for the hotplug memory callback routines (stored in decreasing * order in the callback chain) */ -#define SLAB_CALLBACK_PRI 1 -#define IPC_CALLBACK_PRI 10 +#define DEFAULT_CALLBACK_PRI 0 +#define SLAB_CALLBACK_PRI 1 +#define HMAT_CALLBACK_PRI 2 +#define MM_COMPUTE_BATCH_PRI 10 +#define CPUSET_CALLBACK_PRI 10 +#define MEMTIER_HOTPLUG_PRI 100 +#define KSM_CALLBACK_PRI 100 #ifndef CONFIG_MEMORY_HOTPLUG static inline void memory_dev_init(void) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 0c6db6a4f427..3ea2e836e93e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3647,7 +3647,7 @@ void __init cpuset_init_smp(void) cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); top_cpuset.effective_mems = node_states[N_MEMORY]; - hotplug_memory_notifier(cpuset_track_online_nodes, 10); + hotplug_memory_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI); cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); BUG_ON(!cpuset_migrate_mm_wq); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 0e3648b603a6..2fba1f51f042 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -244,7 +244,7 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, static int __init kasan_memhotplug_init(void) { - hotplug_memory_notifier(kasan_mem_notifier, 0); + hotplug_memory_notifier(kasan_mem_notifier, DEFAULT_CALLBACK_PRI); return 0; } diff --git a/mm/ksm.c b/mm/ksm.c index c19fcca9bc03..7ba97f86d831 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -3211,7 +3211,7 @@ static int __init ksm_init(void) #ifdef CONFIG_MEMORY_HOTREMOVE /* There is no significance to this priority 100 */ - hotplug_memory_notifier(ksm_memory_callback, 100); + hotplug_memory_notifier(ksm_memory_callback, KSM_CALLBACK_PRI); #endif return 0; diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index fa8c9d07f9ce..939e200c283b 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -664,7 +664,7 @@ static int __init memory_tier_init(void) establish_demotion_targets(); mutex_unlock(&memory_tier_lock); - hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRIO); + hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI); return 0; } subsys_initcall(memory_tier_init); diff --git a/mm/mm_init.c b/mm/mm_init.c index 44aadc162d1f..c1883362e71d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -181,7 +181,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self, static int __init mm_compute_batch_init(void) { mm_compute_batch(sysctl_overcommit_memory); - hotplug_memory_notifier(mm_compute_batch_notifier, IPC_CALLBACK_PRI); + hotplug_memory_notifier(mm_compute_batch_notifier, MM_COMPUTE_BATCH_PRI); return 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 3f47fd57d165..c697771d406b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3751,7 +3751,7 @@ static int reserve_mem_notifier(struct notifier_block *nb, static int __meminit init_reserve_notifier(void) { - if (hotplug_memory_notifier(reserve_mem_notifier, 0)) + if (hotplug_memory_notifier(reserve_mem_notifier, DEFAULT_CALLBACK_PRI)) pr_err("Failed registering memory add/remove notifier for admin reserve\n"); return 0; diff --git a/mm/page_ext.c b/mm/page_ext.c index affe80243b6d..b2ff5c9129f4 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -513,7 +513,7 @@ void __init page_ext_init(void) cond_resched(); } } - hotplug_memory_notifier(page_ext_callback, 0); + hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI); pr_info("allocated %ld bytes of page_ext\n", total_usage); invoke_init_callbacks(); return; From 3c0c9bc9c9596d5cd69529da822526f88673365b Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:47 +0200 Subject: [PATCH 1078/4122] mm: vmalloc: add alloc_vmap_area trace event Patch series "Add basic trace events for vmap/vmalloc (v2)", v2. This small series add some basic trace events for the vmap/vmalloc code. Since currently we lack any, sometimes it is hard to start debuging vmap code if an issue is reported or occured. For example https://lore.kernel.org/linux-mm/Y0p8BZIiDXLQbde%2F@pc636/T/ The final patch adds two reviewers for vmalloc code. This patch (of 7): It is for debug purposes and for validation of passed parameters. Link: https://lkml.kernel.org/r/20221018181053.434508-1-urezki@gmail.com Link: https://lkml.kernel.org/r/20221018181053.434508-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- include/trace/events/vmalloc.h | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/trace/events/vmalloc.h diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h new file mode 100644 index 000000000000..39fbd77c91e7 --- /dev/null +++ b/include/trace/events/vmalloc.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vmalloc + +#if !defined(_TRACE_VMALLOC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VMALLOC_H + +#include + +/** + * alloc_vmap_area - called when a new vmap allocation occurs + * @addr: an allocated address + * @size: a requested size + * @align: a requested alignment + * @vstart: a requested start range + * @vend: a requested end range + * @failed: an allocation failed or not + * + * This event is used for a debug purpose, it can give an extra + * information for a developer about how often it occurs and which + * parameters are passed for further validation. + */ +TRACE_EVENT(alloc_vmap_area, + + TP_PROTO(unsigned long addr, unsigned long size, unsigned long align, + unsigned long vstart, unsigned long vend, int failed), + + TP_ARGS(addr, size, align, vstart, vend, failed), + + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, size) + __field(unsigned long, align) + __field(unsigned long, vstart) + __field(unsigned long, vend) + __field(int, failed) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->size = size; + __entry->align = align; + __entry->vstart = vstart; + __entry->vend = vend; + __entry->failed = failed; + ), + + TP_printk("va_start: %lu size=%lu align=%lu vstart=0x%lx vend=0x%lx failed=%d", + __entry->addr, __entry->size, __entry->align, + __entry->vstart, __entry->vend, __entry->failed) +); + +#endif /* _TRACE_VMALLOC_H */ + +/* This part must be outside protection */ +#include From b3a5a7b099162e1b11db459f8128d4374f7d1c05 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:48 +0200 Subject: [PATCH 1079/4122] mm: vmalloc: add purge_vmap_area_lazy trace event It is for debug purposes to track number of freed vmap areas including a range it occurs on. Link: https://lkml.kernel.org/r/20221018181053.434508-3-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- include/trace/events/vmalloc.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h index 39fbd77c91e7..afeb8003a0f2 100644 --- a/include/trace/events/vmalloc.h +++ b/include/trace/events/vmalloc.h @@ -50,6 +50,39 @@ TRACE_EVENT(alloc_vmap_area, __entry->vstart, __entry->vend, __entry->failed) ); +/** + * purge_vmap_area_lazy - called when vmap areas were lazily freed + * @start: purging start address + * @end: purging end address + * @npurged: numbed of purged vmap areas + * + * This event is used for a debug purpose. It gives some + * indication about start:end range and how many objects + * are released. + */ +TRACE_EVENT(purge_vmap_area_lazy, + + TP_PROTO(unsigned long start, unsigned long end, + unsigned int npurged), + + TP_ARGS(start, end, npurged), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, npurged) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->npurged = npurged; + ), + + TP_printk("start=0x%lx end=0x%lx num_purged=%u", + __entry->start, __entry->end, __entry->npurged) +); + #endif /* _TRACE_VMALLOC_H */ /* This part must be outside protection */ From fabc27f7649e070c4f6c742e436a51ff68c4a280 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:49 +0200 Subject: [PATCH 1080/4122] mm: vmalloc: add free_vmap_area_noflush trace event This event is used in order to validate/debug a start address of freed VA, number of currently outstanding and maximum allowed areas. Link: https://lkml.kernel.org/r/20221018181053.434508-4-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- include/trace/events/vmalloc.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h index afeb8003a0f2..ad4e02191f35 100644 --- a/include/trace/events/vmalloc.h +++ b/include/trace/events/vmalloc.h @@ -83,6 +83,40 @@ TRACE_EVENT(purge_vmap_area_lazy, __entry->start, __entry->end, __entry->npurged) ); +/** + * free_vmap_area_noflush - called when a vmap area is freed + * @va_start: a start address of VA + * @nr_lazy: number of current lazy pages + * @nr_lazy_max: number of maximum lazy pages + * + * This event is used for a debug purpose. It gives some + * indication about a VA that is released, number of current + * outstanding areas and a maximum allowed threshold before + * dropping all of them. + */ +TRACE_EVENT(free_vmap_area_noflush, + + TP_PROTO(unsigned long va_start, unsigned long nr_lazy, + unsigned long nr_lazy_max), + + TP_ARGS(va_start, nr_lazy, nr_lazy_max), + + TP_STRUCT__entry( + __field(unsigned long, va_start) + __field(unsigned long, nr_lazy) + __field(unsigned long, nr_lazy_max) + ), + + TP_fast_assign( + __entry->va_start = va_start; + __entry->nr_lazy = nr_lazy; + __entry->nr_lazy_max = nr_lazy_max; + ), + + TP_printk("va_start=0x%lx nr_lazy=%lu nr_lazy_max=%lu", + __entry->va_start, __entry->nr_lazy, __entry->nr_lazy_max) +); + #endif /* _TRACE_VMALLOC_H */ /* This part must be outside protection */ From cf243da6ab3987b65b95357194926a31415095b8 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:50 +0200 Subject: [PATCH 1081/4122] mm: vmalloc: use trace_alloc_vmap_area event This is for debug purpose and is called when an allocation attempt occurs. This event gives some information about: - start address of allocated area; - size that is requested; - alignment that is required; - vstart/vend restriction; - if an allocation fails. Link: https://lkml.kernel.org/r/20221018181053.434508-5-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- mm/vmalloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ccaa461998f3..849563d334fb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -43,6 +43,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "internal.h" #include "pgalloc-track.h" @@ -1620,6 +1623,8 @@ retry: size, align, vstart, vend); spin_unlock(&free_vmap_area_lock); + trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend); + /* * If an allocation fails, the "vend" address is * returned. Therefore trigger the overflow path. From 6030fd5fd1f7baaac3661a5301cc7838d4e3b7f6 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:51 +0200 Subject: [PATCH 1082/4122] mm: vmalloc: use trace_purge_vmap_area_lazy event This is for debug purposes and is called when all outstanding areas are removed back to the vmap space. It gives some extra information about: - a start:end range where set of vmap ares were freed; - a number of purged areas which were backed off. [urezki@gmail.com: simplify return boolean expression] Link: https://lkml.kernel.org/r/20221020125247.5053-1-urezki@gmail.com Link: https://lkml.kernel.org/r/20221018181053.434508-6-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- mm/vmalloc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 849563d334fb..1b1205ade1cf 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1730,6 +1730,7 @@ static void purge_fragmented_blocks_allcpus(void); static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) { unsigned long resched_threshold; + unsigned int num_purged_areas = 0; struct list_head local_purge_list; struct vmap_area *va, *n_va; @@ -1741,7 +1742,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) spin_unlock(&purge_vmap_area_lock); if (unlikely(list_empty(&local_purge_list))) - return false; + goto out; start = min(start, list_first_entry(&local_purge_list, @@ -1776,12 +1777,16 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) va->va_start, va->va_end); atomic_long_sub(nr, &vmap_lazy_nr); + num_purged_areas++; if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) cond_resched_lock(&free_vmap_area_lock); } spin_unlock(&free_vmap_area_lock); - return true; + +out: + trace_purge_vmap_area_lazy(start, end, num_purged_areas); + return num_purged_areas > 0; } /* From 8c4196fe810a6717a8f9e528083911703f6a5a51 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:52 +0200 Subject: [PATCH 1083/4122] mm: vmalloc: use trace_free_vmap_area_noflush event It is for debug purposes and is called when a vmap area gets freed. This event gives some indication about: - a start address of released area; - a current number of outstanding pages; - a maximum number of allowed outstanding pages. Link: https://lkml.kernel.org/r/20221018181053.434508-7-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- mm/vmalloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1b1205ade1cf..ca71de7c9d77 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1821,6 +1821,8 @@ static void drain_vmap_area_work(struct work_struct *work) */ static void free_vmap_area_noflush(struct vmap_area *va) { + unsigned long nr_lazy_max = lazy_max_pages(); + unsigned long va_start = va->va_start; unsigned long nr_lazy; spin_lock(&vmap_area_lock); @@ -1838,8 +1840,10 @@ static void free_vmap_area_noflush(struct vmap_area *va) &purge_vmap_area_root, &purge_vmap_area_list); spin_unlock(&purge_vmap_area_lock); + trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max); + /* After this point, we may free va at any time */ - if (unlikely(nr_lazy > lazy_max_pages())) + if (unlikely(nr_lazy > nr_lazy_max)) schedule_work(&drain_vmap_work); } From 65f199b2b40d82e48c79af2f4b5e9fafb290b231 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 18 Oct 2022 20:10:53 +0200 Subject: [PATCH 1084/4122] vmalloc: add reviewers for vmalloc code Add myself and Christoph Hellwig as reviewers for vmalloc. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20221018181053.434508-8-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Steven Rostedt (Google) Acked-by: Christoph Hellwig Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- MAINTAINERS | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 046ff06ff97f..631bd2e8c5c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13294,10 +13294,20 @@ F: include/linux/memory_hotplug.h F: include/linux/mm.h F: include/linux/mmzone.h F: include/linux/pagewalk.h -F: include/linux/vmalloc.h F: mm/ F: tools/testing/selftests/vm/ +VMALLOC +M: Andrew Morton +R: Uladzislau Rezki +R: Christoph Hellwig +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/vmalloc.h +F: mm/vmalloc.c + MEMORY HOT(UN)PLUG M: David Hildenbrand M: Oscar Salvador From e025ab842ec35225b1a8e163d1f311beb9e38ce9 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 18 Oct 2022 15:40:14 +0800 Subject: [PATCH 1085/4122] mm: remove kern_addr_valid() completely Most architectures (except arm64/x86/sparc) simply return 1 for kern_addr_valid(), which is only used in read_kcore(), and it calls copy_from_kernel_nofault() which could check whether the address is a valid kernel address. So as there is no need for kern_addr_valid(), let's remove it. Link: https://lkml.kernel.org/r/20221018074014.185687-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Geert Uytterhoeven [m68k] Acked-by: Heiko Carstens [s390] Acked-by: Christoph Hellwig Acked-by: Helge Deller [parisc] Acked-by: Michael Ellerman [powerpc] Acked-by: Guo Ren [csky] Acked-by: Catalin Marinas [arm64] Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Anton Ivanov Cc: Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Greg Ungerer Cc: H. Peter Anvin Cc: Huacai Chen Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Johannes Berg Cc: Jonas Bonn Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Richard Henderson Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Sven Schnelle Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Cc: Xuerui Wang Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/alpha/include/asm/pgtable.h | 2 - arch/arc/include/asm/pgtable-bits-arcv2.h | 2 - arch/arm/include/asm/pgtable-nommu.h | 2 - arch/arm/include/asm/pgtable.h | 4 -- arch/arm64/include/asm/pgtable.h | 2 - arch/arm64/mm/mmu.c | 47 ----------------------- arch/arm64/mm/pageattr.c | 3 +- arch/csky/include/asm/pgtable.h | 3 -- arch/hexagon/include/asm/page.h | 7 ---- arch/ia64/include/asm/pgtable.h | 16 -------- arch/loongarch/include/asm/pgtable.h | 2 - arch/m68k/include/asm/pgtable_mm.h | 2 - arch/m68k/include/asm/pgtable_no.h | 1 - arch/microblaze/include/asm/pgtable.h | 3 -- arch/mips/include/asm/pgtable.h | 2 - arch/nios2/include/asm/pgtable.h | 2 - arch/openrisc/include/asm/pgtable.h | 2 - arch/parisc/include/asm/pgtable.h | 15 -------- arch/powerpc/include/asm/pgtable.h | 7 ---- arch/riscv/include/asm/pgtable.h | 2 - arch/s390/include/asm/pgtable.h | 2 - arch/sh/include/asm/pgtable.h | 2 - arch/sparc/include/asm/pgtable_32.h | 6 --- arch/sparc/mm/init_32.c | 3 +- arch/sparc/mm/init_64.c | 1 - arch/um/include/asm/pgtable.h | 2 - arch/x86/include/asm/pgtable_32.h | 9 ----- arch/x86/include/asm/pgtable_64.h | 1 - arch/x86/mm/init_64.c | 41 -------------------- arch/xtensa/include/asm/pgtable.h | 2 - fs/proc/kcore.c | 26 +++++-------- 31 files changed, 11 insertions(+), 210 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 3ea9661c09ff..9e45f6735d5d 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -313,8 +313,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - #define pte_ERROR(e) \ printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index b23be557403e..515e82db519f 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -120,8 +120,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - #ifdef CONFIG_TRANSPARENT_HUGEPAGE #include #endif diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..25d8c7bb07e0 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -21,8 +21,6 @@ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) #define pgd_clear(pgdp) -#define kern_addr_valid(addr) (1) -/* FIXME */ /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..00954ab1a039 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -298,10 +298,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -/* FIXME: this is not correct */ -#define kern_addr_valid(addr) (1) - /* * We provide our own arch_get_unmapped_area to cope with VIPT caches. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 71a1af42f0e8..4873c1d6e7d0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1021,8 +1021,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -extern int kern_addr_valid(unsigned long addr); - #ifdef CONFIG_ARM64_MTE #define __HAVE_ARCH_PREPARE_TO_SWAP diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9a7c38965154..556154d821bf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -814,53 +814,6 @@ void __init paging_init(void) create_idmap(); } -/* - * Check whether a kernel address is valid (derived from arch/x86/). - */ -int kern_addr_valid(unsigned long addr) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp, pud; - pmd_t *pmdp, pmd; - pte_t *ptep, pte; - - addr = arch_kasan_reset_tag(addr); - if ((((long)addr) >> VA_BITS) != -1UL) - return 0; - - pgdp = pgd_offset_k(addr); - if (pgd_none(READ_ONCE(*pgdp))) - return 0; - - p4dp = p4d_offset(pgdp, addr); - if (p4d_none(READ_ONCE(*p4dp))) - return 0; - - pudp = pud_offset(p4dp, addr); - pud = READ_ONCE(*pudp); - if (pud_none(pud)) - return 0; - - if (pud_sect(pud)) - return pfn_valid(pud_pfn(pud)); - - pmdp = pmd_offset(pudp, addr); - pmd = READ_ONCE(*pmdp); - if (pmd_none(pmd)) - return 0; - - if (pmd_sect(pmd)) - return pfn_valid(pmd_pfn(pmd)); - - ptep = pte_offset_kernel(pmdp, addr); - pte = READ_ONCE(*ptep); - if (pte_none(pte)) - return 0; - - return pfn_valid(pte_pfn(pte)); -} - #ifdef CONFIG_MEMORY_HOTPLUG static void free_hotplug_page_range(struct page *page, size_t size, struct vmem_altmap *altmap) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index d107c3d434e2..0a741a910a6a 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -201,8 +201,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) /* * This function is used to determine if a linear map page has been marked as - * not-valid. Walk the page table and check the PTE_VALID bit. This is based - * on kern_addr_valid(), which almost does what we need. + * not-valid. Walk the page table and check the PTE_VALID bit. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index c3d9b92cbe61..77bc6caff2d2 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -249,9 +249,6 @@ extern void paging_init(void); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte); -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define kern_addr_valid(addr) (1) - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index 7cbf719c578e..d7d4f9fca327 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -131,13 +131,6 @@ static inline void clear_page(void *page) #define page_to_virt(page) __va(page_to_phys(page)) -/* - * For port to Hexagon Virtual Machine, MAYBE we check for attempts - * to reference reserved HVM space, but in any case, the VM will be - * protected. - */ -#define kern_addr_valid(addr) (1) - #include #include /* XXX Todo: implement assembly-optimized version of getorder. */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 6925e28ae61d..01517a5e6778 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -181,22 +181,6 @@ ia64_phys_addr_valid (unsigned long addr) return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; } -/* - * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel - * memory. For the return value to be meaningful, ADDR must be >= - * PAGE_OFFSET. This operation can be relatively expensive (e.g., - * require a hash-, or multi-level tree-lookup or something of that - * sort) but it guarantees to return TRUE only if accessing the page - * at that address does not cause an error. Note that there may be - * addresses for which kern_addr_valid() returns FALSE even though an - * access would not cause an error (e.g., this is typically true for - * memory mapped I/O regions. - * - * XXX Need to implement this for IA-64. - */ -#define kern_addr_valid(addr) (1) - - /* * Now come the defines and routines to manage and access the three-level * page table. diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..fc70b7041b76 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -421,8 +421,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, __update_tlb(vma, address, (pte_t *)pmdp); } -#define kern_addr_valid(addr) (1) - static inline unsigned long pmd_pfn(pmd_t pmd) { return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 9b4e2fe2ac82..b93c41fe2067 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -145,8 +145,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #endif /* !__ASSEMBLY__ */ -#define kern_addr_valid(addr) (1) - /* MMU-specific headers */ #ifdef CONFIG_SUN3 diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index bce5ca56c388..fed58da3a6b6 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -20,7 +20,6 @@ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) #define pgd_clear(pgdp) -#define kern_addr_valid(addr) (1) #define pmd_offset(a, b) ((void *)0) #define PAGE_NONE __pgprot(0) diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index ba348e997dbb..42f5988e998b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -416,9 +416,6 @@ extern unsigned long iopa(unsigned long addr); #define IOMAP_NOCACHE_NONSER 2 #define IOMAP_NO_COPYBACK 3 -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define kern_addr_valid(addr) (1) - void do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code); diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..364a06033105 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -550,8 +550,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, __update_tlb(vma, address, pte); } -#define kern_addr_valid(addr) (1) - /* * Allow physical addresses to be fixed up to help 36-bit peripherals. */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index b3d45e815295..ab793bc517f5 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -249,8 +249,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define kern_addr_valid(addr) (1) - extern void __init paging_init(void); extern void __init mmu_init(void); diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index dcae8aea132f..6477c17b3062 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -395,8 +395,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - typedef pte_t *pte_addr_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index ecd028854469..bd09a44cfb2d 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -23,21 +23,6 @@ #include #include -/* - * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel - * memory. For the return value to be meaningful, ADDR must be >= - * PAGE_OFFSET. This operation can be relatively expensive (e.g., - * require a hash-, or multi-level tree-lookup or something of that - * sort) but it guarantees to return TRUE only if accessing the page - * at that address does not cause an error. Note that there may be - * addresses for which kern_addr_valid() returns FALSE even though an - * access would not cause an error (e.g., this is typically true for - * memory mapped I/O regions. - * - * XXX Need to implement this for parisc. - */ -#define kern_addr_valid(addr) (1) - /* This is for the serialization of PxTLB broadcasts. At least on the N class * systems, only one PxTLB inter processor broadcast can be active at any one * time on the Merced bus. */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 283f40d05a4d..9972626ddaf6 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -81,13 +81,6 @@ void poking_init(void); extern unsigned long ioremap_bot; extern const pgprot_t protection_map[16]; -/* - * kern_addr_valid is intended to indicate whether an address is a valid - * kernel address. Most 32-bit archs define it as always true (like this) - * but most 64-bit archs actually perform a test. What should we do here? - */ -#define kern_addr_valid(addr) (1) - #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..c7993bdf749f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -801,8 +801,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #endif /* !CONFIG_MMU */ -#define kern_addr_valid(addr) (1) /* FIXME */ - extern char _start[]; extern void *_dtb_early_va; extern uintptr_t _dtb_early_pa; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..e1db07211818 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1773,8 +1773,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 6fb9ec54cf9b..3ce30becf6df 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -92,8 +92,6 @@ static inline unsigned long phys_addr_mask(void) typedef pte_t *pte_addr_t; -#define kern_addr_valid(addr) (1) - #define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) struct vm_area_struct; diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 8ff549004fac..5acc05b572e6 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -368,12 +368,6 @@ __get_iospace (unsigned long addr) } } -extern unsigned long *sparc_valid_addr_bitmap; - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define kern_addr_valid(addr) \ - (test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap)) - /* * For sparc32&64, the pfn in io_remap_pfn_range() carries in * its high 4 bits. These macros/functions put it there or get it from there. diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index d88e774c8eb4..9c0ea457bdf0 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -37,8 +37,7 @@ #include "mm_32.h" -unsigned long *sparc_valid_addr_bitmap; -EXPORT_SYMBOL(sparc_valid_addr_bitmap); +static unsigned long *sparc_valid_addr_bitmap; unsigned long phys_base; EXPORT_SYMBOL(phys_base); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index d6faee23c77d..04f9db0c3111 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1667,7 +1667,6 @@ bool kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } -EXPORT_SYMBOL(kern_addr_valid); static unsigned long __ref kernel_map_hugepud(unsigned long vstart, unsigned long vend, diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 66bc3f99d9be..4e3052f2671a 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -298,8 +298,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 7c9c968a42ef..7d4ad8907297 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -47,15 +47,6 @@ do { \ #endif /* !__ASSEMBLY__ */ -/* - * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM - */ -#ifdef CONFIG_FLATMEM -#define kern_addr_valid(addr) (1) -#else -#define kern_addr_valid(kaddr) (0) -#endif - /* * This is used to calculate the .brk reservation for initial pagetables. * Enough space is reserved to allocate pagetables sufficient to cover all diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e479491da8d5..7929327abe00 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -240,7 +240,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) #define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) -extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3f040c6e5d13..e8db4edd7cc9 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1416,47 +1416,6 @@ void mark_rodata_ro(void) debug_checkwx(); } -int kern_addr_valid(unsigned long addr) -{ - unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (above != 0 && above != -1UL) - return 0; - - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) - return 0; - - p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) - return 0; - - pud = pud_offset(p4d, addr); - if (!pud_present(*pud)) - return 0; - - if (pud_large(*pud)) - return pfn_valid(pud_pfn(*pud)); - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - return 0; - - if (pmd_large(*pmd)) - return pfn_valid(pmd_pfn(*pmd)); - - pte = pte_offset_kernel(pmd, addr); - if (pte_none(*pte)) - return 0; - - return pfn_valid(pte_pfn(*pte)); -} - /* * Block size is the minimum amount of memory which can be hotplugged or * hotremoved. It must be power of two and must be equal or larger than diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 54f577c13afa..5b5484d707b2 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -386,8 +386,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #else -#define kern_addr_valid(addr) (1) - extern void update_mmu_cache(struct vm_area_struct * vma, unsigned long address, pte_t *ptep); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 98f3289556e4..71157ee35c1a 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -540,25 +540,17 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) fallthrough; case KCORE_VMEMMAP: case KCORE_TEXT: - if (kern_addr_valid(start)) { - /* - * Using bounce buffer to bypass the - * hardened user copy kernel text checks. - */ - if (copy_from_kernel_nofault(buf, (void *)start, - tsz)) { - if (clear_user(buffer, tsz)) { - ret = -EFAULT; - goto out; - } - } else { - if (copy_to_user(buffer, buf, tsz)) { - ret = -EFAULT; - goto out; - } + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { + if (clear_user(buffer, tsz)) { + ret = -EFAULT; + goto out; } } else { - if (clear_user(buffer, tsz)) { + if (copy_to_user(buffer, buf, tsz)) { ret = -EFAULT; goto out; } From 9ee2c086271639d82ad8f6e96b91fa7991800c0a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 19 Oct 2022 19:33:29 +0100 Subject: [PATCH 1086/4122] mm/huge_memory: convert split_huge_pages_in_file() to use a folio Patch series "Remove FGP_HEAD flag". We have just two users left of the FGP_HEAD flag and both of them are better off; sometimes startlingly so as a result of conversion to use folios. This patch (of 4): Removes a number of calls to compound_head() and a call to pagecache_get_page(). Link: https://lkml.kernel.org/r/20221019183332.2802139-1-willy@infradead.org Link: https://lkml.kernel.org/r/20221019183332.2802139-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/huge_memory.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 561a42567477..bc5dda3d2ad7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3066,28 +3066,28 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start, mapping = candidate->f_mapping; for (index = off_start; index < off_end; index += nr_pages) { - struct page *fpage = pagecache_get_page(mapping, index, - FGP_ENTRY | FGP_HEAD, 0); + struct folio *folio = __filemap_get_folio(mapping, index, + FGP_ENTRY, 0); nr_pages = 1; - if (xa_is_value(fpage) || !fpage) + if (xa_is_value(folio) || !folio) continue; - if (!is_transparent_hugepage(fpage)) + if (!folio_test_large(folio)) goto next; total++; - nr_pages = thp_nr_pages(fpage); + nr_pages = folio_nr_pages(folio); - if (!trylock_page(fpage)) + if (!folio_trylock(folio)) goto next; - if (!split_huge_page(fpage)) + if (!split_folio(folio)) split++; - unlock_page(fpage); + folio_unlock(folio); next: - put_page(fpage); + folio_put(folio); cond_resched(); } From dd8095b15a6034165bc48da1eb6d0acc73c1558a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 19 Oct 2022 19:33:30 +0100 Subject: [PATCH 1087/4122] mm/swap: convert find_get_incore_page to use folios Eliminates a use of FGP_HEAD and saves 35 bytes of text. Link: https://lkml.kernel.org/r/20221019183332.2802139-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/swap_state.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 438d0676c5be..44e3530520e8 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -386,17 +386,14 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) { swp_entry_t swp; struct swap_info_struct *si; - struct page *page = pagecache_get_page(mapping, index, - FGP_ENTRY | FGP_HEAD, 0); + struct folio *folio = __filemap_get_folio(mapping, index, FGP_ENTRY, 0); - if (!page) - return page; - if (!xa_is_value(page)) - return find_subpage(page, index); + if (!xa_is_value(folio)) + goto out; if (!shmem_mapping(mapping)) return NULL; - swp = radix_to_swp_entry(page); + swp = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ if (non_swap_entry(swp)) return NULL; @@ -404,9 +401,13 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) si = get_swap_device(swp); if (!si) return NULL; - page = find_get_page(swap_address_space(swp), swp_offset(swp)); + index = swp_offset(swp); + folio = filemap_get_folio(swap_address_space(swp), index); put_swap_device(si); - return page; +out: + if (!folio) + return NULL; + return folio_file_page(folio, index); } struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, From 524984ff66ee4b63264dffe568c3547a20b4136c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 19 Oct 2022 19:33:31 +0100 Subject: [PATCH 1088/4122] mm: convert find_get_incore_page() to filemap_get_incore_folio() Return the containing folio instead of the precise page. One of the callers wants the folio and the other can do the folio->page conversion itself. Nets 442 bytes of text size reduction, 478 bytes removed and 36 bytes added. Link: https://lkml.kernel.org/r/20221019183332.2802139-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/memcontrol.c | 12 +++++++++--- mm/mincore.c | 10 +++++----- mm/swap.h | 8 +++++--- mm/swap_state.c | 15 +++++++-------- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f264a856ba86..fd707dcd6d04 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5648,15 +5648,21 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, static struct page *mc_handle_file_pte(struct vm_area_struct *vma, unsigned long addr, pte_t ptent) { + unsigned long index; + struct folio *folio; + if (!vma->vm_file) /* anonymous vma */ return NULL; if (!(mc.flags & MOVE_FILE)) return NULL; - /* page is moved even if it's not RSS of this task(page-faulted). */ + /* folio is moved even if it's not RSS of this task(page-faulted). */ /* shmem/tmpfs may report page out on swap: account for that too. */ - return find_get_incore_page(vma->vm_file->f_mapping, - linear_page_index(vma, addr)); + index = linear_page_index(vma, addr); + folio = filemap_get_incore_folio(vma->vm_file->f_mapping, index); + if (!folio) + return NULL; + return folio_file_page(folio, index); } /** diff --git a/mm/mincore.c b/mm/mincore.c index e7e046fe17d7..a085a2aeabd8 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -52,7 +52,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, static unsigned char mincore_page(struct address_space *mapping, pgoff_t index) { unsigned char present = 0; - struct page *page; + struct folio *folio; /* * When tmpfs swaps out a page from a file, any process mapping that @@ -60,10 +60,10 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t index) * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here. */ - page = find_get_incore_page(mapping, index); - if (page) { - present = PageUptodate(page); - put_page(page); + folio = filemap_get_incore_folio(mapping, index); + if (folio) { + present = folio_test_uptodate(folio); + folio_put(folio); } return present; diff --git a/mm/swap.h b/mm/swap.h index cc08c459c619..f78065c8ef52 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -41,7 +41,8 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); -struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); +struct folio *filemap_get_incore_folio(struct address_space *mapping, + pgoff_t index); struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, @@ -105,9 +106,10 @@ static inline struct folio *swap_cache_get_folio(swp_entry_t entry, } static inline -struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) +struct folio *filemap_get_incore_folio(struct address_space *mapping, + pgoff_t index) { - return find_get_page(mapping, index); + return filemap_get_folio(mapping, index); } static inline bool add_to_swap(struct folio *folio) diff --git a/mm/swap_state.c b/mm/swap_state.c index 44e3530520e8..40fe6f23e105 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -373,16 +373,17 @@ struct folio *swap_cache_get_folio(swp_entry_t entry, } /** - * find_get_incore_page - Find and get a page from the page or swap caches. + * filemap_get_incore_folio - Find and get a folio from the page or swap caches. * @mapping: The address_space to search. * @index: The page cache index. * - * This differs from find_get_page() in that it will also look for the - * page in the swap cache. + * This differs from filemap_get_folio() in that it will also look for the + * folio in the swap cache. * - * Return: The found page or %NULL. + * Return: The found folio or %NULL. */ -struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) +struct folio *filemap_get_incore_folio(struct address_space *mapping, + pgoff_t index) { swp_entry_t swp; struct swap_info_struct *si; @@ -405,9 +406,7 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) folio = filemap_get_folio(swap_address_space(swp), index); put_swap_device(si); out: - if (!folio) - return NULL; - return folio_file_page(folio, index); + return folio; } struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, From c5255b421fd04ba6a405809b7216a3b6ebd5493a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 19 Oct 2022 19:33:32 +0100 Subject: [PATCH 1089/4122] mm: remove FGP_HEAD This is no longer used; all callers have been converted to use folios instead. Somehow this manages to save 11 bytes of text. Link: https://lkml.kernel.org/r/20221019183332.2802139-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 5 ++--- mm/folio-compat.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 060ee98474ef..b33ab86d5dca 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -504,9 +504,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x00000010 #define FGP_NOWAIT 0x00000020 #define FGP_FOR_MMAP 0x00000040 -#define FGP_HEAD 0x00000080 -#define FGP_ENTRY 0x00000100 -#define FGP_STABLE 0x00000200 +#define FGP_ENTRY 0x00000080 +#define FGP_STABLE 0x00000100 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 8ae39c06da62..bac2a366aada 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -108,7 +108,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, struct folio *folio; folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); - if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio)) + if (!folio || xa_is_value(folio)) return &folio->page; return folio_file_page(folio, index); } From 93d38b72e4c1a518f46fd6dfcb5ad7c5003fa372 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Oct 2022 11:49:41 +0800 Subject: [PATCH 1090/4122] nios2: remove unused INIT_MMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: cleanup with VM_ACCESS_FLAGS". This patch (of 5): It seems that INIT_MMAP is gone in 2.4.10, not sure, anyways, it is useless now, kill it. Link: https://lkml.kernel.org/r/20221019034945.93081-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20221019034945.93081-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Dinh Nguyen Cc: Dave Hansen Cc: Jarkko Sakkinen Cc: Kefeng Wang Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton --- arch/nios2/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 8916d93d5c2d..eb44130364a9 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -50,9 +50,6 @@ struct thread_struct { unsigned long kpsr; }; -#define INIT_MMAP \ - { &init_mm, (0), (0), __pgprot(0x0), VM_READ | VM_WRITE | VM_EXEC } - # define INIT_THREAD { \ .kregs = NULL, \ .ksp = 0, \ From 4f20566f5c0f42c451f6a43be9bfa6f0b3d142df Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Oct 2022 11:49:42 +0800 Subject: [PATCH 1091/4122] x86/sgx: use VM_ACCESS_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS. Link: https://lkml.kernel.org/r/20221019034945.93081-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Jarkko Sakkinen Cc: Dave Hansen Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Dinh Nguyen Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton --- arch/x86/kernel/cpu/sgx/encl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 1ec20807de1e..6225c525372d 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -268,7 +268,7 @@ static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl, unsigned long addr, unsigned long vm_flags) { - unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS; struct sgx_encl_page *entry; entry = xa_load(&encl->page_array, PFN_DOWN(addr)); @@ -502,7 +502,7 @@ static void sgx_vma_open(struct vm_area_struct *vma) int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, unsigned long end, unsigned long vm_flags) { - unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS; struct sgx_encl_page *page; unsigned long count = 0; int ret = 0; From e39ee675f42e993bbf1c04b1ad7526db820ccdce Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Oct 2022 11:49:43 +0800 Subject: [PATCH 1092/4122] mm: mprotect: use VM_ACCESS_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS. Link: https://lkml.kernel.org/r/20221019034945.93081-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: Dinh Nguyen Cc: Jarkko Sakkinen Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton --- mm/mprotect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 668bfaa6ed2a..99762403cc8f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -756,8 +756,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, * If a permission is not passed to mprotect(), it must be * cleared from the VMA. */ - mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC | - VM_FLAGS_CLEAR; + mask_off_old_flags = VM_ACCESS_FLAGS | VM_FLAGS_CLEAR; new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); newflags = calc_vm_prot_bits(prot, new_vma_pkey); From d7e679b6f9d9f7337f0fdc5011f2ecc9b16f821b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Oct 2022 11:49:44 +0800 Subject: [PATCH 1093/4122] mm: debug_vm_pgtable: use VM_ACCESS_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly use VM_ACCESS_FLAGS instead VMFLAGS. Link: https://lkml.kernel.org/r/20221019034945.93081-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: Dinh Nguyen Cc: Jarkko Sakkinen Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton --- mm/debug_vm_pgtable.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index dc7df1254f0a..2b61fde8c38c 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -38,11 +38,7 @@ * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics * expectations that are being validated here. All future changes in here * or the documentation need to be in sync. - */ - -#define VMFLAGS (VM_READ|VM_WRITE|VM_EXEC) - -/* + * * On s390 platform, the lower 4 bits are used to identify given page table * entry type. But these bits might affect the ability to clear entries with * pxx_clear() because of how dynamic page table folding works on s390. So @@ -1125,7 +1121,7 @@ static int __init init_args(struct pgtable_debug_args *args) */ memset(args, 0, sizeof(*args)); args->vaddr = get_random_vaddr(); - args->page_prot = vm_get_page_prot(VMFLAGS); + args->page_prot = vm_get_page_prot(VM_ACCESS_FLAGS); args->page_prot_none = vm_get_page_prot(VM_NONE); args->is_contiguous_page = false; args->pud_pfn = ULONG_MAX; From cc03817c0e8417419ede18a8e0749c5b9699b135 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Oct 2022 11:49:45 +0800 Subject: [PATCH 1094/4122] amdgpu: use VM_ACCESS_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS. Link: https://lkml.kernel.org/r/20221019034945.93081-6-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: David Airlie Cc: Daniel Vetter Cc: Dave Hansen Cc: Dinh Nguyen Cc: Jarkko Sakkinen Signed-off-by: Andrew Morton --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ef31d687ef3..4728be161828 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -255,7 +255,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str * becoming writable and makes is_cow_mapping(vm_flags) false. */ if (is_cow_mapping(vma->vm_flags) && - !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + !(vma->vm_flags & VM_ACCESS_FLAGS)) vma->vm_flags &= ~VM_MAYWRITE; return drm_gem_ttm_mmap(obj, vma); From 4781593d5dbae50500d1c7975be03b590ae2b92a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 20 Oct 2022 15:38:32 -0400 Subject: [PATCH 1095/4122] mm/hugetlb: unify clearing of RestoreReserve for private pages A trivial cleanup to move clearing of RestoreReserve into adding anon rmap of private hugetlb mappings. It matches with the shared mappings where we only clear the bit when adding into page cache, rather than spreading it around the code paths. Link: https://lkml.kernel.org/r/20221020193832.776173-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 14 ++++---------- mm/rmap.c | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0af18c1e4b31..d11e92117d4a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4775,7 +4775,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr hugepage_add_new_anon_rmap(new_page, vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); - ClearHPageRestoreReserve(new_page); SetHPageMigratable(new_page); } @@ -5438,8 +5437,6 @@ retry_avoidcopy: spin_lock(ptl); ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { - ClearHPageRestoreReserve(new_page); - /* Break COW or unshare */ huge_ptep_clear_flush(vma, haddr, ptep); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -5734,10 +5731,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (!pte_same(huge_ptep_get(ptep), old_pte)) goto backout; - if (anon_rmap) { - ClearHPageRestoreReserve(page); + if (anon_rmap) hugepage_add_new_anon_rmap(page, vma, haddr); - } else + else page_dup_file_rmap(page, true); new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); @@ -6120,12 +6116,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) goto out_release_unlock; - if (page_in_pagecache) { + if (page_in_pagecache) page_dup_file_rmap(page, true); - } else { - ClearHPageRestoreReserve(page); + else hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); - } /* * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY diff --git a/mm/rmap.c b/mm/rmap.c index 9bba65b30e4d..3b2d18bbdc44 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2571,7 +2571,7 @@ void hugepage_add_new_anon_rmap(struct page *page, BUG_ON(address < vma->vm_start || address >= vma->vm_end); atomic_set(compound_mapcount_ptr(page), 0); atomic_set(compound_pincount_ptr(page), 0); - + ClearHPageRestoreReserve(page); __page_set_anon_rmap(page, vma, address, 1); } #endif /* CONFIG_HUGETLB_PAGE */ From 6e2be1f2ebcea42ed6044432f72f32434e60b34d Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 21 Oct 2022 13:59:52 +0200 Subject: [PATCH 1096/4122] compiler-gcc: be consistent with underscores use for `no_sanitize` Patch series "compiler-gcc: be consistent with underscores use for `no_sanitize`". This patch (of 5): Other macros that define shorthands for attributes in e.g. `compiler_attributes.h` and elsewhere use underscores. Link: https://lkml.kernel.org/r/20221021115956.9947-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Cc: Marco Elver Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Dan Li Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Nick Desaulniers Cc: Uros Bizjak Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efdb97..b9530d3515ac 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -83,25 +83,25 @@ #endif #if __has_attribute(__no_sanitize_address__) -#define __no_sanitize_address __attribute__((no_sanitize_address)) +#define __no_sanitize_address __attribute__((__no_sanitize_address__)) #else #define __no_sanitize_address #endif #if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) -#define __no_sanitize_thread __attribute__((no_sanitize_thread)) +#define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) #else #define __no_sanitize_thread #endif #if __has_attribute(__no_sanitize_undefined__) -#define __no_sanitize_undefined __attribute__((no_sanitize_undefined)) +#define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__)) #else #define __no_sanitize_undefined #endif #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__) -#define __no_sanitize_coverage __attribute__((no_sanitize_coverage)) +#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__)) #else #define __no_sanitize_coverage #endif From ae37a9a2c2d0960d643d782b426ea1aa9c05727a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 21 Oct 2022 13:59:53 +0200 Subject: [PATCH 1097/4122] compiler-gcc: remove attribute support check for `__no_sanitize_address__` The attribute was added in GCC 4.8, while the minimum GCC version supported by the kernel is GCC 5.1. Therefore, remove the check. Link: https://godbolt.org/z/84v56vcn8 Link: https://lkml.kernel.org/r/20221021115956.9947-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Dan Li Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Marco Elver Cc: Nick Desaulniers Cc: Uros Bizjak Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index b9530d3515ac..bfce7f4d0978 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -82,11 +82,7 @@ #define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#if __has_attribute(__no_sanitize_address__) #define __no_sanitize_address __attribute__((__no_sanitize_address__)) -#else -#define __no_sanitize_address -#endif #if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) #define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) From 095ac0763ac507dd4e1a71ad9784f49f51498483 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 21 Oct 2022 13:59:54 +0200 Subject: [PATCH 1098/4122] compiler-gcc: remove attribute support check for `__no_sanitize_thread__` The attribute was added in GCC 5.1, which matches the minimum GCC version supported by the kernel. Therefore, remove the check. Link: https://godbolt.org/z/vbxKejxbx Link: https://lkml.kernel.org/r/20221021115956.9947-3-ojeda@kernel.org Signed-off-by: Miguel Ojeda Acked-by: Marco Elver Reviewed-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Dan Li Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Nick Desaulniers Cc: Uros Bizjak Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index bfce7f4d0978..ba207deb77ca 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -84,7 +84,7 @@ #define __no_sanitize_address __attribute__((__no_sanitize_address__)) -#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) +#if defined(__SANITIZE_THREAD__) #define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) #else #define __no_sanitize_thread From 689540cbda7f69594ae5e13fef4c8239519d8b66 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 21 Oct 2022 13:59:55 +0200 Subject: [PATCH 1099/4122] compiler-gcc: remove attribute support check for `__no_sanitize_undefined__` The attribute was added in GCC 4.9, while the minimum GCC version supported by the kernel is GCC 5.1. Therefore, remove the check. Link: https://godbolt.org/z/GrMeo6fYr Link: https://lkml.kernel.org/r/20221021115956.9947-4-ojeda@kernel.org Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Dan Li Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Marco Elver Cc: Nick Desaulniers Cc: Uros Bizjak Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index ba207deb77ca..7f2c2bb73815 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -90,11 +90,7 @@ #define __no_sanitize_thread #endif -#if __has_attribute(__no_sanitize_undefined__) #define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__)) -#else -#define __no_sanitize_undefined -#endif #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__) #define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__)) From f39556bc2530c83a22bc11b73c7a46df9a340685 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 21 Oct 2022 13:59:56 +0200 Subject: [PATCH 1100/4122] compiler-gcc: document minimum version for `__no_sanitize_coverage__` The attribute was added in GCC 12.1. This will simplify future cleanups, and is closer to what we do in `compiler_attributes.h`. Link: https://godbolt.org/z/MGbT76j6G Link: https://lkml.kernel.org/r/20221021115956.9947-5-ojeda@kernel.org Signed-off-by: Miguel Ojeda Acked-by: Marco Elver Reviewed-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Dan Li Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Nick Desaulniers Cc: Uros Bizjak Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7f2c2bb73815..7af9e34ec261 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -92,6 +92,9 @@ #define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__)) +/* + * Only supported since gcc >= 12 + */ #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__) #define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__)) #else From b5f1fc98c62b6b75e9f7499e7519dc67684affd3 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 21 Oct 2022 16:46:09 +0800 Subject: [PATCH 1101/4122] mm: memory-failure: make put_ref_page() more useful Pass pfn/flags to put_ref_page(), then check MF_COUNT_INCREASED and drop refcount to make the code look cleaner. Link: https://lkml.kernel.org/r/20221021084611.53765-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Signed-off-by: Andrew Morton --- mm/memory-failure.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 145bb561ddb3..8c6a19b9790f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1910,17 +1910,25 @@ static inline unsigned long free_raw_hwp_pages(struct page *hpage, bool flag) } #endif /* CONFIG_HUGETLB_PAGE */ +/* Drop the extra refcount in case we come from madvise() */ +static void put_ref_page(unsigned long pfn, int flags) +{ + struct page *page; + + if (!(flags & MF_COUNT_INCREASED)) + return; + + page = pfn_to_page(pfn); + if (page) + put_page(page); +} + static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) { - struct page *page = pfn_to_page(pfn); int rc = -ENXIO; - if (flags & MF_COUNT_INCREASED) - /* - * Drop the extra refcount in case we come from madvise(). - */ - put_page(page); + put_ref_page(pfn, flags); /* device metadata space is not recoverable */ if (!pgmap_pfn_valid(pgmap, pfn)) @@ -2513,12 +2521,6 @@ static int soft_offline_in_use_page(struct page *page) return ret; } -static void put_ref_page(struct page *page) -{ - if (page) - put_page(page); -} - /** * soft_offline_page - Soft offline a page. * @pfn: pfn to soft-offline @@ -2547,19 +2549,17 @@ int soft_offline_page(unsigned long pfn, int flags) { int ret; bool try_again = true; - struct page *page, *ref_page = NULL; + struct page *page; WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED)); if (!pfn_valid(pfn)) return -ENXIO; - if (flags & MF_COUNT_INCREASED) - ref_page = pfn_to_page(pfn); /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ page = pfn_to_online_page(pfn); if (!page) { - put_ref_page(ref_page); + put_ref_page(pfn, flags); return -EIO; } @@ -2567,7 +2567,7 @@ int soft_offline_page(unsigned long pfn, int flags) if (PageHWPoison(page)) { pr_info("%s: %#lx page already poisoned\n", __func__, pfn); - put_ref_page(ref_page); + put_ref_page(pfn, flags); mutex_unlock(&mf_mutex); return 0; } From 183a7c5d15d3c56f49955662d3edd0092141df78 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 21 Oct 2022 16:46:10 +0800 Subject: [PATCH 1102/4122] mm: memory-failure: avoid pfn_valid() twice in soft_offline_page() Simplify WARN_ON_ONCE(flags & MF_COUNT_INCREASED) under !pfn_valid(). Link: https://lkml.kernel.org/r/20221021084611.53765-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Signed-off-by: Andrew Morton --- mm/memory-failure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8c6a19b9790f..b5e0dba02192 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2551,10 +2551,10 @@ int soft_offline_page(unsigned long pfn, int flags) bool try_again = true; struct page *page; - WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED)); - - if (!pfn_valid(pfn)) + if (!pfn_valid(pfn)) { + WARN_ON_ONCE(flags & MF_COUNT_INCREASED); return -ENXIO; + } /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ page = pfn_to_online_page(pfn); From b66d00dfebe79ebd0d5a0ec4ee4e26583432c381 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 21 Oct 2022 16:46:11 +0800 Subject: [PATCH 1103/4122] mm: memory-failure: make action_result() return int Check mf_result in action_result(), only return 0 when MF_RECOVERED, or return -EBUSY, which will simplify code a bit. [wangkefeng.wang@huawei.com: v2] Link: https://lkml.kernel.org/r/20221024035138.99119-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20221021084611.53765-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Signed-off-by: Andrew Morton --- mm/memory-failure.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b5e0dba02192..13594556146c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1179,14 +1179,16 @@ static struct page_state error_states[] = { * "Dirty/Clean" indication is not 100% accurate due to the possibility of * setting PG_dirty outside page lock. See also comment above set_page_dirty(). */ -static void action_result(unsigned long pfn, enum mf_action_page_type type, - enum mf_result result) +static int action_result(unsigned long pfn, enum mf_action_page_type type, + enum mf_result result) { trace_memory_failure_event(pfn, type, result); num_poisoned_pages_inc(); pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); + + return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; } static int page_action(struct page_state *ps, struct page *p, @@ -1197,14 +1199,12 @@ static int page_action(struct page_state *ps, struct page *p, /* page p should be unlocked after returning from ps->action(). */ result = ps->action(ps, p); - action_result(pfn, ps->type, result); - /* Could do more checks here if page looks ok */ /* * Could adjust zone counters here to correct for the missing page. */ - return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; + return action_result(pfn, ps->type, result); } static inline bool PageHWPoisonTakenOff(struct page *page) @@ -1853,8 +1853,7 @@ retry: flags |= MF_NO_RETRY; goto retry; } - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - return res; + return action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); } head = compound_head(p); @@ -1880,22 +1879,17 @@ retry: } else { res = MF_FAILED; } - action_result(pfn, MF_MSG_FREE_HUGE, res); - return res == MF_RECOVERED ? 0 : -EBUSY; + return action_result(pfn, MF_MSG_FREE_HUGE, res); } page_flags = head->flags; if (!hwpoison_user_mappings(p, pfn, flags, head)) { - action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); - res = -EBUSY; - goto out; + unlock_page(head); + return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); } return identify_page_state(pfn, p, page_flags); -out: - unlock_page(head); - return res; } #else @@ -2060,16 +2054,13 @@ try_again: } res = MF_FAILED; } - action_result(pfn, MF_MSG_BUDDY, res); - res = res == MF_RECOVERED ? 0 : -EBUSY; + res = action_result(pfn, MF_MSG_BUDDY, res); } else { - action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); } goto unlock_mutex; } else if (res < 0) { - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); goto unlock_mutex; } } @@ -2090,8 +2081,7 @@ try_again: */ SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p) < 0) { - action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); goto unlock_mutex; } VM_BUG_ON_PAGE(!page_count(p), p); @@ -2124,8 +2114,7 @@ try_again: retry = false; goto try_again; } - action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); goto unlock_page; } @@ -2165,8 +2154,7 @@ try_again: * Abort on fail: __filemap_remove_folio() assumes unmapped page. */ if (!hwpoison_user_mappings(p, pfn, flags, p)) { - action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); goto unlock_page; } @@ -2174,8 +2162,7 @@ try_again: * Torn down by someone else? */ if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { - action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); - res = -EBUSY; + res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); goto unlock_page; } From 26215b7ee923b9251f7bb12c4e5f09dc465d35f2 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 21 Oct 2022 07:16:08 +0800 Subject: [PATCH 1104/4122] hugetlbfs: fix null-ptr-deref in hugetlbfs_parse_param() Syzkaller reports a null-ptr-deref bug as follows: ====================================================== KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:hugetlbfs_parse_param+0x1dd/0x8e0 fs/hugetlbfs/inode.c:1380 [...] Call Trace: vfs_parse_fs_param fs/fs_context.c:148 [inline] vfs_parse_fs_param+0x1f9/0x3c0 fs/fs_context.c:129 vfs_parse_fs_string+0xdb/0x170 fs/fs_context.c:191 generic_parse_monolithic+0x16f/0x1f0 fs/fs_context.c:231 do_new_mount fs/namespace.c:3036 [inline] path_mount+0x12de/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] ====================================================== According to commit "vfs: parse: deal with zero length string value", kernel will set the param->string to null pointer in vfs_parse_fs_string() if fs string has zero length. Yet the problem is that, hugetlbfs_parse_param() will dereference the param->string, without checking whether it is a null pointer. To be more specific, if hugetlbfs_parse_param() parses an illegal mount parameter, such as "size=,", kernel will constructs struct fs_parameter with null pointer in vfs_parse_fs_string(), then passes this struct fs_parameter to hugetlbfs_parse_param(), which triggers the above null-ptr-deref bug. This patch solves it by adding sanity check on param->string in hugetlbfs_parse_param(). Link: https://lkml.kernel.org/r/20221020231609.4810-1-yin31149@gmail.com Reported-by: syzbot+a3e6acd85ded5c16a709@syzkaller.appspotmail.com Tested-by: syzbot+a3e6acd85ded5c16a709@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/0000000000005ad00405eb7148c6@google.com/ Signed-off-by: Hawkins Jiawei Reviewed-by: Mike Kravetz Cc: Hawkins Jiawei Cc: Muchun Song Cc: Ian Kent Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 00495fc128c5..09e644f80a4a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1378,7 +1378,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_size: /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(param->string[0])) + if (!param->string || !isdigit(param->string[0])) goto bad_val; ctx->max_size_opt = memparse(param->string, &rest); ctx->max_val_type = SIZE_STD; @@ -1388,7 +1388,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_nr_inodes: /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(param->string[0])) + if (!param->string || !isdigit(param->string[0])) goto bad_val; ctx->nr_inodes = memparse(param->string, &rest); return 0; @@ -1404,7 +1404,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_min_size: /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(param->string[0])) + if (!param->string || !isdigit(param->string[0])) goto bad_val; ctx->min_size_opt = memparse(param->string, &rest); ctx->min_val_type = SIZE_STD; From b12fdbf15f92b6cf5fecdd8a1855afe8809e5c58 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 24 Oct 2022 15:33:36 -0400 Subject: [PATCH 1105/4122] Revert "mm/uffd: fix warning without PTE_MARKER_UFFD_WP compiled in" With " mm/uffd: Fix vma check on userfault for wp" to fix the registration, we'll be safe to remove the macro hacks now. Link: https://lkml.kernel.org/r/20221024193336.1233616-3-peterx@redhat.com Signed-off-by: Peter Xu Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ---- mm/memory.c | 2 -- mm/mprotect.c | 2 -- 3 files changed, 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d11e92117d4a..fc8908d715d6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5114,7 +5114,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct * unmapped and its refcount is dropped, so just clear pte here. */ if (unlikely(!pte_present(pte))) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP /* * If the pte was wr-protected by uffd-wp in any of the * swap forms, meanwhile the caller does not want to @@ -5126,7 +5125,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); else -#endif huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); continue; @@ -5155,13 +5153,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); -#ifdef CONFIG_PTE_MARKER_UFFD_WP /* Leave a uffd-wp pte marker if needed */ if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); -#endif hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); diff --git a/mm/memory.c b/mm/memory.c index f88c351aecd4..81cc75e71888 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1393,12 +1393,10 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, struct zap_details *details, pte_t pteval) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP if (zap_drop_file_uffd_wp(details)) return; pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); -#endif } static unsigned long zap_pte_range(struct mmu_gather *tlb, diff --git a/mm/mprotect.c b/mm/mprotect.c index 99762403cc8f..8d770855b591 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -267,7 +267,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, } else { /* It must be an none page, or what else?.. */ WARN_ON_ONCE(!pte_none(oldpte)); -#ifdef CONFIG_PTE_MARKER_UFFD_WP if (unlikely(uffd_wp && !vma_is_anonymous(vma))) { /* * For file-backed mem, we need to be able to @@ -279,7 +278,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, make_pte_marker(PTE_MARKER_UFFD_WP)); pages++; } -#endif } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); From fd4a7ac32918d3d7a2d17dc06c5520f45e36eb52 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 24 Oct 2022 16:34:22 +0800 Subject: [PATCH 1106/4122] mm: migrate: try again if THP split is failed due to page refcnt When creating a virtual machine, we will use memfd_create() to get a file descriptor which can be used to create share memory mappings using the mmap function, meanwhile the mmap() will set the MAP_POPULATE flag to allocate physical pages for the virtual machine. When allocating physical pages for the guest, the host can fallback to allocate some CMA pages for the guest when over half of the zone's free memory is in the CMA area. In guest os, when the application wants to do some data transaction with DMA, our QEMU will call VFIO_IOMMU_MAP_DMA ioctl to do longterm-pin and create IOMMU mappings for the DMA pages. However, when calling VFIO_IOMMU_MAP_DMA ioctl to pin the physical pages, we found it will be failed to longterm-pin sometimes. After some invetigation, we found the pages used to do DMA mapping can contain some CMA pages, and these CMA pages will cause a possible failure of the longterm-pin, due to failed to migrate the CMA pages. The reason of migration failure may be temporary reference count or memory allocation failure. So that will cause the VFIO_IOMMU_MAP_DMA ioctl returns error, which makes the application failed to start. I observed one migration failure case (which is not easy to reproduce) is that, the 'thp_migration_fail' count is 1 and the 'thp_split_page_failed' count is also 1. That means when migrating a THP which is in CMA area, but can not allocate a new THP due to memory fragmentation, so it will split the THP. However THP split is also failed, probably the reason is temporary reference count of this THP. And the temporary reference count can be caused by dropping page caches (I observed the drop caches operation in the system), but we can not drop the shmem page caches due to they are already dirty at that time. Especially for THP split failure, which is caused by temporary reference count, we can try again to mitigate the failure of migration in this case according to previous discussion [1]. [1] https://lore.kernel.org/all/470dc638-a300-f261-94b4-e27250e42f96@redhat.com/ Link: https://lkml.kernel.org/r/6784730480a1df82e8f4cba1ed088e4ac767994b.1666599848.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: "Huang, Ying" Cc: Alistair Popple Cc: David Hildenbrand Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++-- mm/migrate.c | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bc5dda3d2ad7..a00e9c335e69 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2712,7 +2712,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) * split PMDs */ if (!can_split_folio(folio, &extra_pins)) { - ret = -EBUSY; + ret = -EAGAIN; goto out_unlock; } @@ -2762,7 +2762,7 @@ fail: xas_unlock(&xas); local_irq_enable(); remap_page(folio, folio_nr_pages(folio)); - ret = -EBUSY; + ret = -EAGAIN; } out_unlock: diff --git a/mm/migrate.c b/mm/migrate.c index 556cb1c86e53..f8c85b42e2bc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1506,9 +1506,22 @@ thp_subpage_migration: if (is_thp) { nr_thp_failed++; /* THP NUMA faulting doesn't split THP to retry. */ - if (!nosplit && !try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; - break; + if (!nosplit) { + int ret = try_split_thp(page, &thp_split_pages); + + if (!ret) { + nr_thp_split++; + break; + } else if (reason == MR_LONGTERM_PIN && + ret == -EAGAIN) { + /* + * Try again to split THP to mitigate + * the failure of longterm pinning. + */ + thp_retry++; + nr_retry_pages += nr_subpages; + break; + } } } else if (!no_subpage_counting) { nr_failed++; From e591ef7d96d6ea249916f351dc26a636e565c635 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 24 Oct 2022 15:20:09 +0900 Subject: [PATCH 1107/4122] mm,hwpoison,hugetlb,memory_hotplug: hotremove memory section with hwpoisoned hugepage Patch series "mm, hwpoison: improve handling workload related to hugetlb and memory_hotplug", v7. This patchset tries to solve the issue among memory_hotplug, hugetlb and hwpoison. In this patchset, memory hotplug handles hwpoison pages like below: - hwpoison pages should not prevent memory hotremove, - memory block with hwpoison pages should not be onlined. This patch (of 4): HWPoisoned page is not supposed to be accessed once marked, but currently such accesses can happen during memory hotremove because do_migrate_range() can be called before dissolve_free_huge_pages() is called. Clear HPageMigratable for hwpoisoned hugepages to prevent them from being migrated. This should be done in hugetlb_lock to avoid race against isolate_hugetlb(). get_hwpoison_huge_page() needs to have a flag to show it's called from unpoison to take refcount of hwpoisoned hugepages, so add it. [naoya.horiguchi@linux.dev: remove TestClearHPageMigratable and reduce to test and clear separately] Link: https://lkml.kernel.org/r/20221025053559.GA2104800@ik1-406-35019.vs.sakura.ne.jp Link: https://lkml.kernel.org/r/20221024062012.1520887-1-naoya.horiguchi@linux.dev Link: https://lkml.kernel.org/r/20221024062012.1520887-2-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: Miaohe Lin Reviewed-by: Oscar Salvador Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Jane Chu Cc: Mike Kravetz Cc: Muchun Song Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 10 ++++++---- include/linux/mm.h | 6 ++++-- mm/hugetlb.c | 9 +++++---- mm/memory-failure.c | 21 +++++++++++++++++---- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 20a0d5a08395..65ea34022aa2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -183,8 +183,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); int isolate_hugetlb(struct page *page, struct list_head *list); -int get_hwpoison_huge_page(struct page *page, bool *hugetlb); -int get_huge_page_for_hwpoison(unsigned long pfn, int flags); +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); +int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared); void putback_active_hugepage(struct page *page); void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); @@ -391,12 +392,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list) return -EBUSY; } -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) { return 0; } -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index f6d2d2d9e284..e2ac6fff03a8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3277,9 +3277,11 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); +extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared); #else -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fc8908d715d6..fdb36afea2b2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7265,7 +7265,7 @@ unlock: return ret; } -int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) { int ret = 0; @@ -7275,7 +7275,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) *hugetlb = true; if (HPageFreed(page)) ret = 0; - else if (HPageMigratable(page)) + else if (HPageMigratable(page) || unpoison) ret = get_page_unless_zero(page); else ret = -EBUSY; @@ -7284,12 +7284,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) return ret; } -int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { int ret; spin_lock_irq(&hugetlb_lock); - ret = __get_huge_page_for_hwpoison(pfn, flags); + ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared); spin_unlock_irq(&hugetlb_lock); return ret; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 13594556146c..4fff0b36c61d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags) int ret = 0; bool hugetlb = false; - ret = get_hwpoison_huge_page(head, &hugetlb); + ret = get_hwpoison_huge_page(head, &hugetlb, false); if (hugetlb) return ret; @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page) int ret = 0; bool hugetlb = false; - ret = get_hwpoison_huge_page(head, &hugetlb); + ret = get_hwpoison_huge_page(head, &hugetlb, true); if (hugetlb) return ret; @@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage) * -EBUSY - the hugepage is busy (try to retry) * -EHWPOISON - the hugepage is already hwpoisoned */ -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); struct page *head = compound_head(page); @@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) goto out; } + /* + * Clearing HPageMigratable for hwpoisoned hugepages to prevent them + * from being migrated by memory hotremove. + */ + if (count_increased && HPageMigratable(head)) { + ClearHPageMigratable(head); + *migratable_cleared = true; + } + return ret; out: if (count_increased) @@ -1834,10 +1844,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb struct page *p = pfn_to_page(pfn); struct page *head; unsigned long page_flags; + bool migratable_cleared = false; *hugetlb = 1; retry: - res = get_huge_page_for_hwpoison(pfn, flags); + res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared); if (res == 2) { /* fallback to normal page handling */ *hugetlb = 0; return 0; @@ -1861,6 +1872,8 @@ retry: if (hwpoison_filter(p)) { hugetlb_clear_page_hwpoison(head); + if (migratable_cleared) + SetHPageMigratable(head); unlock_page(head); if (res == 1) put_page(head); From d027122d8363e58cd8bc2fa6a16917f7f69b85bb Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 24 Oct 2022 15:20:10 +0900 Subject: [PATCH 1108/4122] mm/hwpoison: move definitions of num_poisoned_pages_* to memory-failure.c These interfaces will be used by drivers/base/memory.c by later patch, so as a preparatory work move them to more common header file visible to the file. Link: https://lkml.kernel.org/r/20221024062012.1520887-3-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Jane Chu Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/parisc/kernel/pdt.c | 3 +-- include/linux/mm.h | 5 +++++ include/linux/swapops.h | 24 ++---------------------- mm/memory-failure.c | 10 ++++++++++ 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index e391b175f5ec..fdc880e2575a 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -18,8 +18,7 @@ #include #include #include -#include -#include +#include #include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index e2ac6fff03a8..c667a6c4b657 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3279,12 +3279,17 @@ extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); +extern void num_poisoned_pages_inc(void); #else static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { return 0; } + +static inline void num_poisoned_pages_inc(void) +{ +} #endif #ifndef arch_memory_failure diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 86b95ccb81bb..3ba9bf56899d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -581,8 +581,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) #ifdef CONFIG_MEMORY_FAILURE -extern atomic_long_t num_poisoned_pages __read_mostly; - /* * Support for hardware poisoned pages */ @@ -597,17 +595,7 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline void num_poisoned_pages_inc(void) -{ - atomic_long_inc(&num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long i) -{ - atomic_long_sub(i, &num_poisoned_pages); -} - -#else /* CONFIG_MEMORY_FAILURE */ +#else static inline swp_entry_t make_hwpoison_entry(struct page *page) { @@ -618,15 +606,7 @@ static inline int is_hwpoison_entry(swp_entry_t swp) { return 0; } - -static inline void num_poisoned_pages_inc(void) -{ -} - -static inline void num_poisoned_pages_sub(long i) -{ -} -#endif /* CONFIG_MEMORY_FAILURE */ +#endif static inline int non_swap_entry(swp_entry_t entry) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4fff0b36c61d..44d7bf6ff214 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -74,6 +74,16 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); static bool hw_memory_failure __read_mostly = false; +inline void num_poisoned_pages_inc(void) +{ + atomic_long_inc(&num_poisoned_pages); +} + +static inline void num_poisoned_pages_sub(long i) +{ + atomic_long_sub(i, &num_poisoned_pages); +} + /* * Return values: * 1: the page is dissolved (if needed) and taken off from buddy, From a46c9304b4bbf1b164154976cbb7e648980c7b5b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 24 Oct 2022 15:20:11 +0900 Subject: [PATCH 1109/4122] mm/hwpoison: pass pfn to num_poisoned_pages_*() No functional change. Link: https://lkml.kernel.org/r/20221024062012.1520887-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Jane Chu Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/parisc/kernel/pdt.c | 2 +- include/linux/mm.h | 4 ++-- mm/memory-failure.c | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index fdc880e2575a..80943a00e245 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -231,7 +231,7 @@ void __init pdc_pdt_init(void) /* mark memory page bad */ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); - num_poisoned_pages_inc(); + num_poisoned_pages_inc(addr >> PAGE_SHIFT); } } diff --git a/include/linux/mm.h b/include/linux/mm.h index c667a6c4b657..78ae2ee09a24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3279,7 +3279,7 @@ extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -extern void num_poisoned_pages_inc(void); +extern void num_poisoned_pages_inc(unsigned long pfn); #else static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) @@ -3287,7 +3287,7 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void num_poisoned_pages_inc(void) +static inline void num_poisoned_pages_inc(unsigned long pfn) { } #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 44d7bf6ff214..757a46e172de 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -74,12 +74,12 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); static bool hw_memory_failure __read_mostly = false; -inline void num_poisoned_pages_inc(void) +inline void num_poisoned_pages_inc(unsigned long pfn) { atomic_long_inc(&num_poisoned_pages); } -static inline void num_poisoned_pages_sub(long i) +static inline void num_poisoned_pages_sub(unsigned long pfn, long i) { atomic_long_sub(i, &num_poisoned_pages); } @@ -125,7 +125,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo if (release) put_page(page); page_ref_inc(page); - num_poisoned_pages_inc(); + num_poisoned_pages_inc(page_to_pfn(page)); return true; } @@ -1194,7 +1194,7 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type, { trace_memory_failure_event(pfn, type, result); - num_poisoned_pages_inc(); + num_poisoned_pages_inc(pfn); pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); @@ -1741,7 +1741,7 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page) llist_add(&raw_hwp->node, head); /* the first error event will be counted in action_result(). */ if (ret) - num_poisoned_pages_inc(); + num_poisoned_pages_inc(page_to_pfn(page)); } else { /* * Failed to save raw error info. We no longer trace all @@ -2414,7 +2414,7 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); if (!ret || freeit) { - num_poisoned_pages_sub(count); + num_poisoned_pages_sub(pfn, count); unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", page_to_pfn(p), &unpoison_rs); } @@ -2630,5 +2630,5 @@ void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } } if (total) - num_poisoned_pages_sub(total); + num_poisoned_pages_sub(0, total); } From 5033091de814ab4b5623faed2755f3064e19e2d2 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 24 Oct 2022 15:20:12 +0900 Subject: [PATCH 1110/4122] mm/hwpoison: introduce per-memory_block hwpoison counter Currently PageHWPoison flag does not behave well when experiencing memory hotremove/hotplug. Any data field in struct page is unreliable when the associated memory is offlined, and the current mechanism can't tell whether a memory block is onlined because a new memory devices is installed or because previous failed offline operations are undone. Especially if there's a hwpoisoned memory, it's unclear what the best option is. So introduce a new mechanism to make struct memory_block remember that a memory block has hwpoisoned memory inside it. And make any online event fail if the onlining memory block contains hwpoison. struct memory_block is freed and reallocated over ACPI-based hotremove/hotplug, but not over sysfs-based hotremove/hotplug. So the new counter can distinguish these cases. Link: https://lkml.kernel.org/r/20221024062012.1520887-5-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: kernel test robot Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Jane Chu Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- drivers/base/memory.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/memory.h | 3 +++ include/linux/mm.h | 20 +++++++++++++++++++- mm/internal.h | 8 -------- mm/memory-failure.c | 36 +++++++++++------------------------- mm/sparse.c | 2 -- 6 files changed, 71 insertions(+), 36 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 9aa0da991cfb..fe98fb8d94e5 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -175,6 +175,15 @@ int memory_notify(unsigned long val, void *v) return blocking_notifier_call_chain(&memory_chain, val, v); } +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) +static unsigned long memblk_nr_poison(struct memory_block *mem); +#else +static inline unsigned long memblk_nr_poison(struct memory_block *mem) +{ + return 0; +} +#endif + static int memory_block_online(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -183,6 +192,9 @@ static int memory_block_online(struct memory_block *mem) struct zone *zone; int ret; + if (memblk_nr_poison(mem)) + return -EHWPOISON; + zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, start_pfn, nr_pages); @@ -864,6 +876,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size) mem = find_memory_block_by_id(block_id); if (WARN_ON_ONCE(!mem)) continue; + num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem)); unregister_memory_block_under_nodes(mem); remove_memory_block(mem); } @@ -1164,3 +1177,28 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, } return ret; } + +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) +void memblk_nr_poison_inc(unsigned long pfn) +{ + const unsigned long block_id = pfn_to_block_id(pfn); + struct memory_block *mem = find_memory_block_by_id(block_id); + + if (mem) + atomic_long_inc(&mem->nr_hwpoison); +} + +void memblk_nr_poison_sub(unsigned long pfn, long i) +{ + const unsigned long block_id = pfn_to_block_id(pfn); + struct memory_block *mem = find_memory_block_by_id(block_id); + + if (mem) + atomic_long_sub(i, &mem->nr_hwpoison); +} + +static unsigned long memblk_nr_poison(struct memory_block *mem) +{ + return atomic_long_read(&mem->nr_hwpoison); +} +#endif diff --git a/include/linux/memory.h b/include/linux/memory.h index 463662ef7614..31343566c221 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -84,6 +84,9 @@ struct memory_block { unsigned long nr_vmemmap_pages; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) + atomic_long_t nr_hwpoison; +#endif }; int arch_get_memory_phys_device(unsigned long start_pfn); diff --git a/include/linux/mm.h b/include/linux/mm.h index 78ae2ee09a24..429ff89bfe06 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3279,7 +3279,8 @@ extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -extern void num_poisoned_pages_inc(unsigned long pfn); +void num_poisoned_pages_inc(unsigned long pfn); +void num_poisoned_pages_sub(unsigned long pfn, long i); #else static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) @@ -3290,6 +3291,23 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, static inline void num_poisoned_pages_inc(unsigned long pfn) { } + +static inline void num_poisoned_pages_sub(unsigned long pfn, long i) +{ +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) +extern void memblk_nr_poison_inc(unsigned long pfn); +extern void memblk_nr_poison_sub(unsigned long pfn, long i); +#else +static inline void memblk_nr_poison_inc(unsigned long pfn) +{ +} + +static inline void memblk_nr_poison_sub(unsigned long pfn, long i) +{ +} #endif #ifndef arch_memory_failure diff --git a/mm/internal.h b/mm/internal.h index 68afdbe7106e..bcf75a8b032d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; -#ifdef CONFIG_MEMORY_FAILURE -void clear_hwpoisoned_pages(struct page *memmap, int nr_pages); -#else -static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) -{ -} -#endif - extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 757a46e172de..9b82402ec242 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -77,11 +77,14 @@ static bool hw_memory_failure __read_mostly = false; inline void num_poisoned_pages_inc(unsigned long pfn) { atomic_long_inc(&num_poisoned_pages); + memblk_nr_poison_inc(pfn); } -static inline void num_poisoned_pages_sub(unsigned long pfn, long i) +inline void num_poisoned_pages_sub(unsigned long pfn, long i) { atomic_long_sub(i, &num_poisoned_pages); + if (pfn != -1UL) + memblk_nr_poison_sub(pfn, i); } /* @@ -1706,6 +1709,8 @@ static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag) if (move_flag) SetPageHWPoison(p->page); + else + num_poisoned_pages_sub(page_to_pfn(p->page), 1); kfree(p); count++; } @@ -2332,6 +2337,7 @@ int unpoison_memory(unsigned long pfn) int ret = -EBUSY; int freeit = 0; unsigned long count = 1; + bool huge = false; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2380,6 +2386,7 @@ int unpoison_memory(unsigned long pfn) ret = get_hwpoison_page(p, MF_UNPOISON); if (!ret) { if (PageHuge(p)) { + huge = true; count = free_raw_hwp_pages(page, false); if (count == 0) { ret = -EBUSY; @@ -2395,6 +2402,7 @@ int unpoison_memory(unsigned long pfn) pfn, &unpoison_rs); } else { if (PageHuge(p)) { + huge = true; count = free_raw_hwp_pages(page, false); if (count == 0) { ret = -EBUSY; @@ -2414,7 +2422,8 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); if (!ret || freeit) { - num_poisoned_pages_sub(pfn, count); + if (!huge) + num_poisoned_pages_sub(pfn, 1); unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", page_to_pfn(p), &unpoison_rs); } @@ -2609,26 +2618,3 @@ retry: return ret; } - -void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) -{ - int i, total = 0; - - /* - * A further optimization is to have per section refcounted - * num_poisoned_pages. But that would need more space per memmap, so - * for now just do a quick global check to speed up this routine in the - * absence of bad pages. - */ - if (atomic_long_read(&num_poisoned_pages) == 0) - return; - - for (i = 0; i < nr_pages; i++) { - if (PageHWPoison(&memmap[i])) { - total++; - ClearPageHWPoison(&memmap[i]); - } - } - if (total) - num_poisoned_pages_sub(0, total); -} diff --git a/mm/sparse.c b/mm/sparse.c index e5a8a3a0edd7..2779b419ef2a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) { - clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, - nr_pages - map_offset); section_deactivate(pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ From ea0ffd0c08d0fef1f6e93eb07badbeeabf6b43d6 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 24 Oct 2022 00:25:33 +0800 Subject: [PATCH 1111/4122] swap: add a limit for readahead page-cluster value Currenty there is no upper limit for /proc/sys/vm/page-cluster, and it's a bit shift value, so it could result in overflow of the 32-bit integer. Add a reasonable upper limit for it, read-in at most 2**31 pages, which is a large enough value for readahead. Link: https://lkml.kernel.org/r/20221023162533.81561-1-ryncsn@gmail.com Signed-off-by: Kairui Song Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 + kernel/sysctl.c | 1 + mm/swap.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 429ff89bfe06..255931ebf2dc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -74,6 +74,7 @@ static inline void totalram_pages_add(long count) extern void * high_memory; extern int page_cluster; +extern const int page_cluster_max; #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 188c305aeb8b..71a4350ac601 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2125,6 +2125,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&page_cluster_max, }, { .procname = "dirtytime_expire_seconds", diff --git a/mm/swap.c b/mm/swap.c index 2f12a2ee1d3a..b9a6817e07ff 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -43,8 +43,9 @@ #define CREATE_TRACE_POINTS #include -/* How many pages do we try to swap or page in/out together? */ +/* How many pages do we try to swap or page in/out together? As a power of 2 */ int page_cluster; +const int page_cluster_max = 31; /* Protecting only lru_rotate.fbatch which requires disabling interrupts */ struct lru_rotate { From e0767e391079687081c5564b1390983c36b49cd1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Nov 2022 10:35:20 +0800 Subject: [PATCH 1112/4122] soundwire: cadence: rename sdw_cdns_dai_dma_data as sdw_cdns_dai_runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing 'struct sdw_cdns_dma_data' has really nothing to do with DMAs. The information is stored in the dai->dma_data, but this is really private data that should be stored in a different context. Beyond the academic elegance discussion, using dma_data is a problem for new Intel hardware where the dma_data structure is already used for true DMA handling performed by other parts of the code. This patch prepares a transition away from the use of dma_data, for now with a rename-only change. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221101023521.2384586-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 30 +++++----- drivers/soundwire/cadence_master.h | 4 +- drivers/soundwire/intel.c | 96 +++++++++++++++--------------- 3 files changed, 65 insertions(+), 65 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 93929f19d083..235617b0542f 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1707,40 +1707,40 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, void *stream, int direction) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; if (stream) { /* first paranoia check */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; else - dma = dai->capture_dma_data; + dai_runtime = dai->capture_dma_data; - if (dma) { + if (dai_runtime) { dev_err(dai->dev, - "dma_data already allocated for dai %s\n", + "dai_runtime already allocated for dai %s\n", dai->name); return -EINVAL; } - /* allocate and set dma info */ - dma = kzalloc(sizeof(*dma), GFP_KERNEL); - if (!dma) + /* allocate and set dai_runtime info */ + dai_runtime = kzalloc(sizeof(*dai_runtime), GFP_KERNEL); + if (!dai_runtime) return -ENOMEM; - dma->stream_type = SDW_STREAM_PCM; + dai_runtime->stream_type = SDW_STREAM_PCM; - dma->bus = &cdns->bus; - dma->link_id = cdns->instance; + dai_runtime->bus = &cdns->bus; + dai_runtime->link_id = cdns->instance; - dma->stream = stream; + dai_runtime->stream = stream; if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai->playback_dma_data = dma; + dai->playback_dma_data = dai_runtime; else - dai->capture_dma_data = dma; + dai->capture_dma_data = dai_runtime; } else { - /* for NULL stream we release allocated dma_data */ + /* for NULL stream we release allocated dai_runtime */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) { kfree(dai->playback_dma_data); dai->playback_dma_data = NULL; diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index ca9e805bab88..93f23bd46e2c 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -70,7 +70,7 @@ struct sdw_cdns_stream_config { }; /** - * struct sdw_cdns_dma_data: Cadence DMA data + * struct sdw_cdns_dai_runtime: Cadence DAI runtime data * * @name: SoundWire stream name * @stream: stream runtime @@ -82,7 +82,7 @@ struct sdw_cdns_stream_config { * @suspended: status set when suspended, to be used in .prepare * @paused: status set in .trigger, to be used in suspend */ -struct sdw_cdns_dma_data { +struct sdw_cdns_dai_runtime { char *name; struct sdw_stream_runtime *stream; struct sdw_cdns_pdi *pdi; diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 244209358784..1e9c6df4b62c 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -824,15 +824,15 @@ static int intel_hw_params(struct snd_pcm_substream *substream, { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; struct sdw_cdns_pdi *pdi; struct sdw_stream_config sconfig; struct sdw_port_config *pconfig; int ch, dir; int ret; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) return -EIO; ch = params_channels(params); @@ -854,10 +854,10 @@ static int intel_hw_params(struct snd_pcm_substream *substream, sdw_cdns_config_stream(cdns, ch, dir, pdi); /* store pdi and hw_params, may be needed in prepare step */ - dma->paused = false; - dma->suspended = false; - dma->pdi = pdi; - dma->hw_params = params; + dai_runtime->paused = false; + dai_runtime->suspended = false; + dai_runtime->pdi = pdi; + dai_runtime->hw_params = params; /* Inform DSP about PDI stream number */ ret = intel_params_stream(sdw, substream->stream, dai, params, @@ -869,7 +869,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, sconfig.direction = dir; sconfig.ch_count = ch; sconfig.frame_rate = params_rate(params); - sconfig.type = dma->stream_type; + sconfig.type = dai_runtime->stream_type; sconfig.bps = snd_pcm_format_width(params_format(params)); @@ -884,7 +884,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, pconfig->ch_mask = (1 << ch) - 1; ret = sdw_stream_add_master(&cdns->bus, &sconfig, - pconfig, 1, dma->stream); + pconfig, 1, dai_runtime->stream); if (ret) dev_err(cdns->dev, "add master to stream failed:%d\n", ret); @@ -898,19 +898,19 @@ static int intel_prepare(struct snd_pcm_substream *substream, { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ch, dir; int ret = 0; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) { - dev_err(dai->dev, "failed to get dma data in %s\n", + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) { + dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); return -EIO; } - if (dma->suspended) { - dma->suspended = false; + if (dai_runtime->suspended) { + dai_runtime->suspended = false; /* * .prepare() is called after system resume, where we @@ -921,21 +921,21 @@ static int intel_prepare(struct snd_pcm_substream *substream, */ /* configure stream */ - ch = params_channels(dma->hw_params); + ch = params_channels(dai_runtime->hw_params); if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) dir = SDW_DATA_DIR_RX; else dir = SDW_DATA_DIR_TX; - intel_pdi_shim_configure(sdw, dma->pdi); - intel_pdi_alh_configure(sdw, dma->pdi); - sdw_cdns_config_stream(cdns, ch, dir, dma->pdi); + intel_pdi_shim_configure(sdw, dai_runtime->pdi); + intel_pdi_alh_configure(sdw, dai_runtime->pdi); + sdw_cdns_config_stream(cdns, ch, dir, dai_runtime->pdi); /* Inform DSP about PDI stream number */ ret = intel_params_stream(sdw, substream->stream, dai, - dma->hw_params, + dai_runtime->hw_params, sdw->instance, - dma->pdi->intel_alh_id); + dai_runtime->pdi->intel_alh_id); } return ret; @@ -946,11 +946,11 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ret; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) return -EIO; /* @@ -959,10 +959,10 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) * DEPREPARED for the first cpu-dai and to RELEASED for the last * cpu-dai. */ - ret = sdw_stream_remove_master(&cdns->bus, dma->stream); + ret = sdw_stream_remove_master(&cdns->bus, dai_runtime->stream); if (ret < 0) { dev_err(dai->dev, "remove master from stream %s failed: %d\n", - dma->stream->name, ret); + dai_runtime->stream->name, ret); return ret; } @@ -972,8 +972,8 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) return ret; } - dma->hw_params = NULL; - dma->pdi = NULL; + dai_runtime->hw_params = NULL; + dai_runtime->pdi = NULL; return 0; } @@ -996,17 +996,17 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai, static void *intel_get_sdw_stream(struct snd_soc_dai *dai, int direction) { - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; else - dma = dai->capture_dma_data; + dai_runtime = dai->capture_dma_data; - if (!dma) + if (!dai_runtime) return ERR_PTR(-EINVAL); - return dma->stream; + return dai_runtime->stream; } static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) @@ -1014,7 +1014,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_intel_link_res *res = sdw->link_res; - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ret = 0; /* @@ -1025,9 +1025,9 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn if (res->ops && res->ops->trigger) res->ops->trigger(dai, cmd, substream->stream); - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) { - dev_err(dai->dev, "failed to get dma data in %s\n", + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) { + dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); return -EIO; } @@ -1042,17 +1042,17 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn * the .trigger callback is used to track the suspend case only. */ - dma->suspended = true; + dai_runtime->suspended = true; ret = intel_free_stream(sdw, substream->stream, dai, sdw->instance); break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - dma->paused = true; + dai_runtime->paused = true; break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - dma->paused = false; + dai_runtime->paused = false; break; default: break; @@ -1091,25 +1091,25 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) for_each_component_dais(component, dai) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int stream; int ret; - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; stream = SNDRV_PCM_STREAM_PLAYBACK; - if (!dma) { - dma = dai->capture_dma_data; + if (!dai_runtime) { + dai_runtime = dai->capture_dma_data; stream = SNDRV_PCM_STREAM_CAPTURE; } - if (!dma) + if (!dai_runtime) continue; - if (dma->suspended) + if (dai_runtime->suspended) continue; - if (dma->paused) { - dma->suspended = true; + if (dai_runtime->paused) { + dai_runtime->suspended = true; ret = intel_free_stream(sdw, stream, dai, sdw->instance); if (ret < 0) From 7dddead766c0826a998e7053e7d1c92b3422f8d6 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Nov 2022 10:35:21 +0800 Subject: [PATCH 1113/4122] soundwire: cadence: use dai_runtime_array instead of dma_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the code with a Cadence-specific dai_runtime_array, indexed with dai->id, instead of abusing dma_data. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221101023521.2384586-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 30 +++++++++++++-------------- drivers/soundwire/cadence_master.h | 5 +++++ drivers/soundwire/intel.c | 33 +++++++++++++++--------------- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 235617b0542f..a1de363eba3f 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1709,13 +1709,10 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_cdns_dai_runtime *dai_runtime; + dai_runtime = cdns->dai_runtime_array[dai->id]; + if (stream) { /* first paranoia check */ - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai_runtime = dai->playback_dma_data; - else - dai_runtime = dai->capture_dma_data; - if (dai_runtime) { dev_err(dai->dev, "dai_runtime already allocated for dai %s\n", @@ -1734,20 +1731,21 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, dai_runtime->link_id = cdns->instance; dai_runtime->stream = stream; + dai_runtime->direction = direction; - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai->playback_dma_data = dai_runtime; - else - dai->capture_dma_data = dai_runtime; + cdns->dai_runtime_array[dai->id] = dai_runtime; } else { - /* for NULL stream we release allocated dai_runtime */ - if (direction == SNDRV_PCM_STREAM_PLAYBACK) { - kfree(dai->playback_dma_data); - dai->playback_dma_data = NULL; - } else { - kfree(dai->capture_dma_data); - dai->capture_dma_data = NULL; + /* second paranoia check */ + if (!dai_runtime) { + dev_err(dai->dev, + "dai_runtime not allocated for dai %s\n", + dai->name); + return -EINVAL; } + + /* for NULL stream we release allocated dai_runtime */ + kfree(dai_runtime); + cdns->dai_runtime_array[dai->id] = NULL; } return 0; } diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index 93f23bd46e2c..0434d70d4b1f 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -81,6 +81,7 @@ struct sdw_cdns_stream_config { * @hw_params: hw_params to be applied in .prepare step * @suspended: status set when suspended, to be used in .prepare * @paused: status set in .trigger, to be used in suspend + * @direction: stream direction */ struct sdw_cdns_dai_runtime { char *name; @@ -92,6 +93,7 @@ struct sdw_cdns_dai_runtime { struct snd_pcm_hw_params *hw_params; bool suspended; bool paused; + int direction; }; /** @@ -108,6 +110,7 @@ struct sdw_cdns_dai_runtime { * @registers: Cadence registers * @link_up: Link status * @msg_count: Messages sent on bus + * @dai_runtime_array: runtime context for each allocated DAI. */ struct sdw_cdns { struct device *dev; @@ -135,6 +138,8 @@ struct sdw_cdns { struct work_struct work; struct list_head list; + + struct sdw_cdns_dai_runtime **dai_runtime_array; }; #define bus_to_cdns(_bus) container_of(_bus, struct sdw_cdns, bus) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 1e9c6df4b62c..e8855a2115f6 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -831,7 +831,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, int ch, dir; int ret; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return -EIO; @@ -902,7 +902,7 @@ static int intel_prepare(struct snd_pcm_substream *substream, int ch, dir; int ret = 0; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) { dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); @@ -949,7 +949,7 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) struct sdw_cdns_dai_runtime *dai_runtime; int ret; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return -EIO; @@ -996,13 +996,10 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai, static void *intel_get_sdw_stream(struct snd_soc_dai *dai, int direction) { + struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_cdns_dai_runtime *dai_runtime; - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai_runtime = dai->playback_dma_data; - else - dai_runtime = dai->capture_dma_data; - + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return ERR_PTR(-EINVAL); @@ -1025,7 +1022,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn if (res->ops && res->ops->trigger) res->ops->trigger(dai, cmd, substream->stream); - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) { dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); @@ -1092,15 +1089,9 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_cdns_dai_runtime *dai_runtime; - int stream; int ret; - dai_runtime = dai->playback_dma_data; - stream = SNDRV_PCM_STREAM_PLAYBACK; - if (!dai_runtime) { - dai_runtime = dai->capture_dma_data; - stream = SNDRV_PCM_STREAM_CAPTURE; - } + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) continue; @@ -1111,7 +1102,7 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) if (dai_runtime->paused) { dai_runtime->suspended = true; - ret = intel_free_stream(sdw, stream, dai, sdw->instance); + ret = intel_free_stream(sdw, dai_runtime->direction, dai, sdw->instance); if (ret < 0) return ret; } @@ -1178,6 +1169,7 @@ static int intel_create_dai(struct sdw_cdns *cdns, static int intel_register_dai(struct sdw_intel *sdw) { + struct sdw_cdns_dai_runtime **dai_runtime_array; struct sdw_cdns_stream_config config; struct sdw_cdns *cdns = &sdw->cdns; struct sdw_cdns_streams *stream; @@ -1195,6 +1187,13 @@ static int intel_register_dai(struct sdw_intel *sdw) /* DAIs are created based on total number of PDIs supported */ num_dai = cdns->pcm.num_pdi; + dai_runtime_array = devm_kcalloc(cdns->dev, num_dai, + sizeof(struct sdw_cdns_dai_runtime *), + GFP_KERNEL); + if (!dai_runtime_array) + return -ENOMEM; + cdns->dai_runtime_array = dai_runtime_array; + dais = devm_kcalloc(cdns->dev, num_dai, sizeof(*dais), GFP_KERNEL); if (!dais) return -ENOMEM; From febc50b82bc95089ef1d6f68a101c8a2b701e9ce Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Tue, 8 Nov 2022 20:16:03 +0530 Subject: [PATCH 1114/4122] dt-bindings: soundwire: Convert text bindings to DT Schema Convert soundwire text bindings to DT Schema format. Update interrupt property items as per device tree, as it is not appropriately described in text file. Update some of the properties description with minimum and maximum range. Update secondary node info which is used to describe slave devices. Signed-off-by: Srinivasa Rao Mandadapu Co-developed-by: Ratna Deepthi Kudaravalli Signed-off-by: Ratna Deepthi Kudaravalli Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1667918763-32445-5-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Vinod Koul --- .../bindings/soundwire/qcom,sdw.txt | 215 -------------- .../bindings/soundwire/qcom,soundwire.yaml | 270 ++++++++++++++++++ 2 files changed, 270 insertions(+), 215 deletions(-) delete mode 100644 Documentation/devicetree/bindings/soundwire/qcom,sdw.txt create mode 100644 Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt deleted file mode 100644 index e0faed8dceac..000000000000 --- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt +++ /dev/null @@ -1,215 +0,0 @@ -Qualcomm SoundWire Controller Bindings - - -This binding describes the Qualcomm SoundWire Controller along with its -board specific bus parameters. - -- compatible: - Usage: required - Value type: - Definition: must be "qcom,soundwire-v..", - Example: - "qcom,soundwire-v1.3.0" - "qcom,soundwire-v1.5.0" - "qcom,soundwire-v1.5.1" - "qcom,soundwire-v1.6.0" - "qcom,soundwire-v1.7.0" -- reg: - Usage: required - Value type: - Definition: the base address and size of SoundWire controller - address space. - -- interrupts: - Usage: required - Value type: - Definition: should specify the SoundWire Controller core and optional - wake IRQ - -- interrupt-names: - Usage: Optional - Value type: boolean - Value type: - Definition: should be "core" for core and "wakeup" for wake interrupt. - -- wakeup-source: - Usage: Optional - Value type: boolean - Definition: should specify if SoundWire Controller is wake up capable. - -- clock-names: - Usage: required - Value type: - Definition: should be "iface" for SoundWire Controller interface clock - -- clocks: - Usage: required - Value type: - Definition: should specify the SoundWire Controller interface clock - -- #sound-dai-cells: - Usage: required - Value type: - Definition: must be 1 for digital audio interfaces on the controller. - -- qcom,dout-ports: - Usage: required - Value type: - Definition: must be count of data out ports - -- qcom,din-ports: - Usage: required - Value type: - Definition: must be count of data in ports - -- qcom,ports-offset1: - Usage: required - Value type: - Definition: should specify payload transport window offset1 of each - data port. Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-offset2: - Usage: required - Value type: - Definition: should specify payload transport window offset2 of each - data port. Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-sinterval-low: - Usage: required - Value type: - Definition: should be sample interval low of each data port. - Out ports followed by In ports. Used for Sample Interval - calculation. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-word-length: - Usage: optional - Value type: - Definition: should be size of payload channel sample. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-block-pack-mode: - Usage: optional - Value type: - Definition: should be 0 or 1 to indicate the block packing mode. - 0 to indicate Blocks are per Channel - 1 to indicate Blocks are per Port. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-block-group-count: - Usage: optional - Value type: - Definition: should be in range 1 to 4 to indicate how many sample - intervals are combined into a payload. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-lane-control: - Usage: optional - Value type: - Definition: should be in range 0 to 7 to identify which data lane - the data port uses. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-hstart: - Usage: optional - Value type: - Definition: should be number identifying lowerst numbered coloum in - SoundWire Frame, i.e. left edge of the Transport sub-frame - for each port. Values between 0 and 15 are valid. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-hstop: - Usage: optional - Value type: - Definition: should be number identifying highest numbered coloum in - SoundWire Frame, i.e. the right edge of the Transport - sub-frame for each port. Values between 0 and 15 are valid. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,dports-type: - Usage: optional - Value type: - Definition: should be one of the following types - 0 for reduced port - 1 for simple ports - 2 for full port - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- reset: - Usage: optional - Value type: - Definition: Should specify the SoundWire audio CSR reset controller interface, - which is required for SoundWire version 1.6.0 and above. - -- reset-names: - Usage: optional - Value type: - Definition: should be "swr_audio_cgcr" for SoundWire audio CSR reset - controller interface. - -Note: - More Information on detail of encoding of these fields can be -found in MIPI Alliance SoundWire 1.0 Specifications. - -= SoundWire devices -Each subnode of the bus represents SoundWire device attached to it. -The properties of these nodes are defined by the individual bindings. - -= EXAMPLE -The following example represents a SoundWire controller on DB845c board -which has controller integrated inside WCD934x codec on SDM845 SoC. - -soundwire: soundwire@c85 { - compatible = "qcom,soundwire-v1.3.0"; - reg = <0xc85 0x20>; - interrupts = <20 IRQ_TYPE_EDGE_RISING>; - clocks = <&wcc>; - clock-names = "iface"; - resets = <&lpass_audiocc LPASS_AUDIO_SWR_TX_CGCR>; - reset-names = "swr_audio_cgcr"; - #sound-dai-cells = <1>; - qcom,dports-type = <0>; - qcom,dout-ports = <6>; - qcom,din-ports = <2>; - qcom,ports-sinterval-low = /bits/ 8 <0x07 0x1F 0x3F 0x7 0x1F 0x3F 0x0F 0x0F>; - qcom,ports-offset1 = /bits/ 8 <0x01 0x02 0x0C 0x6 0x12 0x0D 0x07 0x0A >; - qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x1F 0x00 0x00 0x1F 0x00 0x00>; - - /* Left Speaker */ - left{ - .... - }; - - /* Right Speaker */ - right{ - .... - }; -}; diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml new file mode 100644 index 000000000000..bcbfa71536cd --- /dev/null +++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml @@ -0,0 +1,270 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soundwire/qcom,soundwire.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SoundWire Controller + +maintainers: + - Srinivas Kandagatla + - Srinivasa Rao Mandadapu + +description: + The Qualcomm SoundWire controller along with its board specific bus parameters. + +properties: + compatible: + enum: + - qcom,soundwire-v1.3.0 + - qcom,soundwire-v1.5.0 + - qcom,soundwire-v1.5.1 + - qcom,soundwire-v1.6.0 + - qcom,soundwire-v1.7.0 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + items: + - description: specify the SoundWire controller core. + - description: specify the Soundwire controller wake IRQ. + + interrupt-names: + minItems: 1 + items: + - const: core + - const: wakeup + + clocks: + items: + - description: iface clock + + clock-names: + items: + - const: iface + + resets: + items: + - description: SWR_AUDIO_CGCR RESET + + reset-names: + items: + - const: swr_audio_cgcr + + '#sound-dai-cells': + const: 1 + + '#address-cells': + const: 2 + + '#size-cells': + const: 0 + + wakeup-source: true + + qcom,din-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: count of data in ports + + qcom,dout-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: count of data out ports + + qcom,ports-word-length: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Size of payload channel sample. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + + qcom,ports-sinterval-low: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Sample interval low of each data port. + Out ports followed by In ports. Used for Sample Interval calculation. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-offset1: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Payload transport window offset1 of each data port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-offset2: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Payload transport window offset2 of each data port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-lane-control: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identify which data lane the data port uses. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + + qcom,ports-block-pack-mode: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Indicate the block packing mode. + 0 to indicate Blocks are per Channel + 1 to indicate Blocks are per Port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + items: + oneOf: + - minimum: 0 + maximum: 1 + - const: 0xff + + qcom,ports-hstart: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identifying lowerst numbered coloum in SoundWire Frame, + i.e. left edge of the Transport sub-frame for each port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 15 + - const: 0xff + + qcom,ports-hstop: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identifying highest numbered coloum in SoundWire Frame, + i.e. the right edge of the Transport + sub-frame for each port. Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 15 + - const: 0xff + + qcom,ports-block-group-count: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + In range 1 to 4 to indicate how many sample intervals are combined + into a payload. Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 4 + - const: 0xff + + label: + maxItems: 1 + +patternProperties: + "^.*@[0-9a-f],[0-9a-f]$": + type: object + description: + Child nodes for a standalone audio codec or speaker amplifier IC. + It has RX and TX Soundwire secondary devices. + properties: + compatible: + pattern: "^sdw[0-9a-f]{1}[0-9a-f]{4}[0-9a-f]{4}[0-9a-f]{2}$" + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - '#sound-dai-cells' + - '#address-cells' + - '#size-cells' + - qcom,dout-ports + - qcom,din-ports + - qcom,ports-sinterval-low + - qcom,ports-offset1 + - qcom,ports-offset2 + +additionalProperties: false + +examples: + - | + #include + #include + #include + + soundwire@3210000 { + compatible = "qcom,soundwire-v1.6.0"; + reg = <0x03210000 0x2000>; + + interrupts = , + <&pdc 130 IRQ_TYPE_LEVEL_HIGH>; + + interrupt-names = "core", "wakeup"; + + clocks = <&lpass_rx_macro>; + clock-names = "iface"; + + qcom,din-ports = <0>; + qcom,dout-ports = <5>; + + resets = <&lpass_audiocc LPASS_AUDIO_SWR_RX_CGCR>; + reset-names = "swr_audio_cgcr"; + + qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff>; + qcom,ports-sinterval-low = /bits/ 8 <0x03 0x3f 0x1f 0x03 0x03>; + qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x01>; + qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00>; + qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00>; + qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff>; + qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff>; + qcom,ports-hstop = /bits/ 8 <0xff 0x06 0xff 0xff 0xff>; + qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0x00>; + + #sound-dai-cells = <1>; + #address-cells = <2>; + #size-cells = <0>; + + codec@0,4 { + compatible = "sdw20217010d00"; + reg = <0 4>; + qcom,rx-port-mapping = <1 2 3 4 5>; + }; + }; From 0349fdab2ff0673cc3c3f300316522d4f2bb1af9 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 4 Nov 2022 22:55:13 +0100 Subject: [PATCH 1115/4122] usb: gadget: at91-udc: simplify at91rm9200_udc_pullup callback Just simplify the use of is_on and get rid of superfluous condition. Cc: gregkh@linuxfoundation.org Cc: nicolas.ferre@microchip.com Cc: alexandre.belloni@bootlin.com Cc: linux-usb@vger.kernel.org Cc: kernel@pengutronix.de Reviewed-by: Claudiu Beznea Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20221104215516.2874922-2-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/at91_udc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index a9a7b3fc60ec..922b4187004b 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -1628,10 +1628,7 @@ static int at91rm9200_udc_init(struct at91_udc *udc) static void at91rm9200_udc_pullup(struct at91_udc *udc, int is_on) { - if (is_on) - gpiod_set_value(udc->board.pullup_pin, 1); - else - gpiod_set_value(udc->board.pullup_pin, 0); + gpiod_set_value(udc->board.pullup_pin, is_on); } static const struct at91_udc_caps at91rm9200_udc_caps = { From afb21a5155a136a2549947c151b13507a34976ae Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 3 Nov 2022 12:59:23 +0100 Subject: [PATCH 1116/4122] dt-bindings: usb: usb-drd: Describe default dual-role mode The dual-role mode default, in the absence of the dr_mode property, is already documented to be OTG. Use the "default" property to mark it as such more explicitly. Signed-off-by: Thierry Reding Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221103115923.1467525-1-thierry.reding@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-drd.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/usb-drd.yaml b/Documentation/devicetree/bindings/usb/usb-drd.yaml index 1567549b05ce..114fb5dc0498 100644 --- a/Documentation/devicetree/bindings/usb/usb-drd.yaml +++ b/Documentation/devicetree/bindings/usb/usb-drd.yaml @@ -27,6 +27,7 @@ properties: should default to OTG. $ref: /schemas/types.yaml#/definitions/string enum: [host, peripheral, otg] + default: otg hnp-disable: description: From fff61d4ccf3d1124bf7aa82fa996536833b8204a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 7 Nov 2022 14:42:48 +0100 Subject: [PATCH 1117/4122] dt-bindings: usb: usb251xb: Convert to YAML schema Convert the usb251xb hub DT bindings from text to yaml schema so it is possible to validate DTs against the schema. Adjust the example to describe two different hubs at different I2C bus addresses, to avoid I2C address collission in the example. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221107134248.21899-1-marex@denx.de Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/usb251xb.txt | 89 ------ .../devicetree/bindings/usb/usb251xb.yaml | 271 ++++++++++++++++++ 2 files changed, 271 insertions(+), 89 deletions(-) delete mode 100644 Documentation/devicetree/bindings/usb/usb251xb.txt create mode 100644 Documentation/devicetree/bindings/usb/usb251xb.yaml diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt deleted file mode 100644 index 1a934eab175e..000000000000 --- a/Documentation/devicetree/bindings/usb/usb251xb.txt +++ /dev/null @@ -1,89 +0,0 @@ -Microchip USB 2.0 Hi-Speed Hub Controller - -The device node for the configuration of a Microchip USB251x/xBi USB 2.0 -Hi-Speed Controller. - -Required properties : - - compatible : Should be "microchip,usb251xb" or one of the specific types: - "microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b", - "microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi", - "microchip,usb2517", "microchip,usb2517i", "microchip,usb2422" - - reg : I2C address on the selected bus (default is <0x2C>) - -Optional properties : - - reset-gpios : Should specify the gpio for hub reset - - vdd-supply : Should specify the phandle to the regulator supplying vdd - - skip-config : Skip Hub configuration, but only send the USB-Attach command - - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424) - - product-id : Set USB Product ID of the hub (16 bit, default depends on type) - - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3) - - language-id : Set USB Language ID (16 bit, default is 0x0000) - - manufacturer : Set USB Manufacturer string (max 31 characters long) - - product : Set USB Product string (max 31 characters long) - - serial : Set USB Serial string (max 31 characters long) - - {bus,self}-powered : selects between self- and bus-powered operation - (boolean, default is self-powered) - - disable-hi-speed : disable USB Hi-Speed support (boolean) - - {multi,single}-tt : selects between multi- and single-transaction-translator - (boolean, default is multi-tt) - - disable-eop : disable End of Packet generation in full-speed mode (boolean) - - {ganged,individual}-sensing : select over-current sense type in self-powered - mode (boolean, default is individual) - - {ganged,individual}-port-switching : select port power switching mode - (boolean, default is individual) - - dynamic-power-switching : enable auto-switching from self- to bus-powered - operation if the local power source is removed or unavailable (boolean) - - oc-delay-us : Delay time (in microseconds) for filtering the over-current - sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If - an invalid value is given, the default is used instead. - - compound-device : indicate the hub is part of a compound device (boolean) - - port-mapping-mode : enable port mapping mode (boolean) - - led-{usb,speed}-mode : led usb/speed indication mode selection - (boolean, default is speed mode) - - string-support : enable string descriptor support (required for manufacturer, - product and serial string configuration) - - non-removable-ports : Should specify the ports which have a non-removable - device connected. - - sp-disabled-ports : Specifies the ports which will be self-power disabled - - bp-disabled-ports : Specifies the ports which will be bus-power disabled - - sp-max-total-current-microamp: Specifies max current consumed by the hub - from VBUS when operating in self-powered hub. It includes the hub - silicon along with all associated circuitry including a permanently - attached peripheral (range: 0 - 100000 uA, default 1000 uA) - - bp-max-total-current-microamp: Specifies max current consumed by the hub - from VBUS when operating in self-powered hub. It includes the hub - silicon along with all associated circuitry including a permanently - attached peripheral (range: 0 - 510000 uA, default 100000 uA) - - sp-max-removable-current-microamp: Specifies max current consumed by the hub - from VBUS when operating in self-powered hub. It includes the hub - silicon along with all associated circuitry excluding a permanently - attached peripheral (range: 0 - 100000 uA, default 1000 uA) - - bp-max-removable-current-microamp: Specifies max current consumed by the hub - from VBUS when operating in self-powered hub. It includes the hub - silicon along with all associated circuitry excluding a permanently - attached peripheral (range: 0 - 510000 uA, default 100000 uA) - - power-on-time-ms : Specifies the time it takes from the time the host - initiates the power-on sequence to a port until the port has adequate - power. The value is given in ms in a 0 - 510 range (default is 100ms). - - swap-dx-lanes : Specifies the ports which will swap the differential-pair - (D+/D-), default is not-swapped. - -Examples: - usb2512b@2c { - compatible = "microchip,usb2512b"; - reg = <0x2c>; - reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; - }; - - usb2514b@2c { - compatible = "microchip,usb2514b"; - reg = <0x2c>; - vendor-id = /bits/ 16 <0x0000>; - product-id = /bits/ 16 <0x0000>; - string-support; - manufacturer = "Foo"; - product = "Foo-Bar"; - serial = "1234567890A"; - /* correct misplaced usb connectors on port 1,2 */ - swap-dx-lanes = <1 2>; - }; diff --git a/Documentation/devicetree/bindings/usb/usb251xb.yaml b/Documentation/devicetree/bindings/usb/usb251xb.yaml new file mode 100644 index 000000000000..4d1530816817 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/usb251xb.yaml @@ -0,0 +1,271 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/usb251xb.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip USB 2.0 Hi-Speed Hub Controller + +maintainers: + - Richard Leitner + +properties: + compatible: + enum: + - microchip,usb2422 + - microchip,usb2512b + - microchip,usb2512bi + - microchip,usb2513b + - microchip,usb2513bi + - microchip,usb2514b + - microchip,usb2514bi + - microchip,usb2517 + - microchip,usb2517i + - microchip,usb251xb + + reg: + maxItems: 1 + + reset-gpios: + description: | + Should specify the gpio for hub reset + + vdd-supply: + description: | + Should specify the phandle to the regulator supplying vdd + + skip-config: + $ref: /schemas/types.yaml#/definitions/flag + description: | + Skip Hub configuration, but only send the USB-Attach command + + vendor-id: + $ref: /schemas/types.yaml#/definitions/uint16 + default: 0x0424 + description: | + Set USB Vendor ID of the hub + + product-id: + $ref: /schemas/types.yaml#/definitions/uint16 + description: | + Set USB Product ID of the hub + + device-id: + $ref: /schemas/types.yaml#/definitions/uint16 + default: 0x0bb3 + description: | + Set USB Device ID of the hub + + language-id: + $ref: /schemas/types.yaml#/definitions/uint16 + default: 0x0000 + description: | + Set USB Language ID + + manufacturer: + $ref: /schemas/types.yaml#/definitions/string + description: | + Set USB Manufacturer string (max 31 characters long) + + product: + $ref: /schemas/types.yaml#/definitions/string + description: | + Set USB Product string (max 31 characters long) + + serial: + $ref: /schemas/types.yaml#/definitions/string + description: | + Set USB Serial string (max 31 characters long) + + bus-powered: + $ref: /schemas/types.yaml#/definitions/flag + description: | + selects between self- and bus-powered operation + (boolean, default is self-powered) + + self-powered: + $ref: /schemas/types.yaml#/definitions/flag + description: | + selects between self- and bus-powered operation + (boolean, default is self-powered) + + disable-hi-speed: + $ref: /schemas/types.yaml#/definitions/flag + description: | + disable USB Hi-Speed support (boolean) + + multi-tt: + $ref: /schemas/types.yaml#/definitions/flag + description: | + selects between multi- and single-transaction-translator + (boolean, default is multi-tt) + + single-tt: + $ref: /schemas/types.yaml#/definitions/flag + description: | + selects between multi- and single-transaction-translator + (boolean, default is multi-tt) + + disable-eop: + $ref: /schemas/types.yaml#/definitions/flag + description: | + disable End of Packet generation in full-speed mode (boolean) + + ganged-sensing: + $ref: /schemas/types.yaml#/definitions/flag + description: | + select over-current sense type in self-powered mode + (boolean, default is individual) + + individual-sensing: + $ref: /schemas/types.yaml#/definitions/flag + description: | + select over-current sense type in self-powered mode + (boolean, default is individual) + + ganged-port-switching: + $ref: /schemas/types.yaml#/definitions/flag + description: | + select port power switching mode (boolean, default is individual) + + individual-port-switching: + $ref: /schemas/types.yaml#/definitions/flag + description: | + select port power switching mode (boolean, default is individual) + + dynamic-power-switching: + $ref: /schemas/types.yaml#/definitions/flag + description: | + enable auto-switching from self- to bus-powered operation if the + local power source is removed or unavailable (boolean) + + oc-delay-us: + enum: [100, 4000, 8000, 16000] + default: 8000 + description: | + Delay time (in microseconds) for filtering the over-current sense + inputs. If an invalid value is given, the default is used instead. + + compound-device: + $ref: /schemas/types.yaml#/definitions/flag + description: | + indicate the hub is part of a compound device (boolean) + + port-mapping-mode: + $ref: /schemas/types.yaml#/definitions/flag + description: | + enable port mapping mode (boolean) + + led-usb-mode: + $ref: /schemas/types.yaml#/definitions/flag + description: | + led usb/speed indication mode selection (boolean, default is speed mode) + + led-speed-mode: + $ref: /schemas/types.yaml#/definitions/flag + description: | + led usb/speed indication mode selection (boolean, default is speed mode) + + string-support: + $ref: /schemas/types.yaml#/definitions/flag + description: | + enable string descriptor support (required for manufacturer, product + and serial string configuration) + + non-removable-ports: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: | + Should specify the ports which have a non-removable device connected. + + sp-disabled-ports: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: | + Specifies the ports which will be self-power disabled + + bp-disabled-ports: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: | + Specifies the ports which will be bus-power disabled + + sp-max-total-current-microamp: + maximum: 100000 + default: 1000 + description: | + Specifies max current consumed by the hub from VBUS when + operating in self-powered hub. It includes the hub silicon + along with all associated circuitry including a permanently + attached peripheral. + + bp-max-total-current-microamp: + maximum: 510000 + default: 100000 + description: | + Specifies max current consumed by the hub from VBUS when + operating in self-powered hub. It includes the hub silicon + along with all associated circuitry including a permanently + attached peripheral. + + sp-max-removable-current-microamp: + maximum: 100000 + default: 1000 + description: | + Specifies max current consumed by the hub from VBUS when + operating in self-powered hub. It includes the hub silicon + along with all associated circuitry excluding a permanently + attached peripheral. + + bp-max-removable-current-microamp: + maximum: 510000 + default: 100000 + description: | + Specifies max current consumed by the hub from VBUS when + operating in self-powered hub. It includes the hub silicon + along with all associated circuitry excluding a permanently + attached peripheral. + + power-on-time-ms: + maximum: 510 + default: 100 + description: | + Specifies the time it takes from the time the host initiates the + power-on sequence to a port until the port has adequate power. + + swap-dx-lanes: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: | + Specifies the ports which will swap the differential-pair (D+/D-), + default is not-swapped. + +additionalProperties: false + +required: + - compatible + - reg + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + usb-hub@2c { + compatible = "microchip,usb2512b"; + reg = <0x2c>; + reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + }; + + usb-hub@2d { + compatible = "microchip,usb2514b"; + reg = <0x2d>; + vendor-id = /bits/ 16 <0x0000>; + product-id = /bits/ 16 <0x0000>; + string-support; + manufacturer = "Foo"; + product = "Foo-Bar"; + serial = "1234567890A"; + /* correct misplaced usb connectors on port 1,2 */ + swap-dx-lanes = <1 2>; + }; + }; From 434d806f077cad81d87a757adc631894bfa01ac2 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 26 Oct 2022 14:12:21 +0800 Subject: [PATCH 1118/4122] dt-bindings: usb: usb-nop-xceiv: add wakeup-source property USB phy may be a system wakeup source, so add wakeup source property to keep its resource (e.g. power domain) active to make USB remote wakeup work. Signed-off-by: Li Jun Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1666764742-4201-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml index 2824c17285ee..326131dcf14d 100644 --- a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml +++ b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml @@ -39,6 +39,11 @@ properties: the VBus line. $ref: /schemas/types.yaml#/definitions/phandle + wakeup-source: + description: + Specify if the USB phy can detect the remote wakeup signal + while the system sleep. + required: - compatible - '#phy-cells' From 4567d1a97f5290cb895a564feff0a5c770d6c332 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 26 Oct 2022 14:12:22 +0800 Subject: [PATCH 1119/4122] usb: phy: generic: Add wakeup capability In case USB phy is the wakeup source, enable its wakeup capability. Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1666764742-4201-2-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 8ed9327cc4a5..c1309ea24a52 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -286,6 +286,7 @@ EXPORT_SYMBOL_GPL(usb_phy_gen_create_phy); static int usb_phy_generic_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct device_node *dn = dev->of_node; struct usb_phy_generic *nop; int err; @@ -323,6 +324,9 @@ static int usb_phy_generic_probe(struct platform_device *pdev) platform_set_drvdata(pdev, nop); + device_set_wakeup_capable(&pdev->dev, + of_property_read_bool(dn, "wakeup-source")); + return 0; } From ee9834636f9b07fe1dcf3fffbb325318cdb267d5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 28 Oct 2022 18:45:34 -0700 Subject: [PATCH 1120/4122] usb: ehci-pci: Set PROBE_PREFER_ASYNCHRONOUS This driver often takes on the order of 8ms to start, but every little bit counts. It shouldn't have many cross-device dependencies to race with, nor racy access to shared state with other drivers, so this should be a relatively low risk change. This driver was pinpointed as part of a survey of top slowest initcalls (i.e., are built in, and probing synchronously) on a lab of ChromeOS systems. Signed-off-by: Brian Norris Acked-by: Alan Stern Link: https://lore.kernel.org/r/20221028184507.v2.1.I9a5353f81d1509f85f3a04f0cdc9099f6fe60811@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 17f8b6ea0c35..4b148fe5e43b 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -411,11 +411,12 @@ static struct pci_driver ehci_pci_driver = { .remove = ehci_pci_remove, .shutdown = usb_hcd_pci_shutdown, -#ifdef CONFIG_PM .driver = { - .pm = &usb_hcd_pci_pm_ops - }, +#ifdef CONFIG_PM + .pm = &usb_hcd_pci_pm_ops, #endif + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int __init ehci_pci_init(void) From 4c2604a9a6899bab195edbee35fc8d64ce1444aa Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 28 Oct 2022 18:45:35 -0700 Subject: [PATCH 1121/4122] usb: xhci-pci: Set PROBE_PREFER_ASYNCHRONOUS This driver often takes on the order of 10ms to start, but in some cases takes more than 100ms. It shouldn't have many cross-device dependencies to race with, nor racy access to shared state with other drivers, so this should be a relatively low risk change. This driver was pinpointed as part of a survey of top slowest initcalls (i.e., are built in, and probing synchronously) on a lab of ChromeOS systems. Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20221028184507.v2.2.I5a309231785d3a4e37118a25e84f5caa0136a343@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7bccbe50bab1..a29b681b562e 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -673,11 +673,12 @@ static struct pci_driver xhci_pci_driver = { /* suspend and resume implemented later */ .shutdown = usb_hcd_pci_shutdown, -#ifdef CONFIG_PM .driver = { - .pm = &usb_hcd_pci_pm_ops - }, +#ifdef CONFIG_PM + .pm = &usb_hcd_pci_pm_ops, #endif + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int __init xhci_pci_init(void) From 9c3959bb4cbf2b45c5b53bf8a19426e5ddb5c56c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 4 Nov 2022 10:58:38 +0100 Subject: [PATCH 1122/4122] usb: chipidea: ci_hdrc_imx: Fix a typo ("regualator") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change "regualator" to "regulator" in this comment. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Mukesh Ojha Link: https://lore.kernel.org/r/20221104095838.2132945-1-j.neuschaefer@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 923f5c00a1d9..0dc482542d85 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -355,7 +355,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) data->hsic_pad_regulator = devm_regulator_get_optional(dev, "hsic"); if (PTR_ERR(data->hsic_pad_regulator) == -ENODEV) { - /* no pad regualator is needed */ + /* no pad regulator is needed */ data->hsic_pad_regulator = NULL; } else if (IS_ERR(data->hsic_pad_regulator)) return dev_err_probe(dev, PTR_ERR(data->hsic_pad_regulator), From 83045e19feae937c425248824d1dc0fc95583842 Mon Sep 17 00:00:00 2001 From: Henry Tian Date: Mon, 24 Oct 2022 09:48:53 +0000 Subject: [PATCH 1123/4122] usb: gadget: aspeed: fix buffer overflow In ast_vhub_epn_handle_ack() when the received data length exceeds the buffer, it does not check the case and just copies to req.buf and cause a buffer overflow, kernel oops on this case. This issue could be reproduced on a BMC with an OS that enables the lan over USB: 1. In OS, enable the usb eth dev, verify it pings the BMC OK; 2. In OS, set the usb dev mtu to 2000. (Default is 1500); 3. In OS, ping the BMC with `-s 2000` argument. The BMC kernel will get oops with below logs: skbuff: skb_over_panic: text:8058e098 len:2048 put:2048 head:84c678a0 data:84c678c2 tail:0x84c680c2 end:0x84c67f00 dev:usb0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:113! Internal error: Oops - BUG: 0 [#1] ARM CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.69-c9fb275-dirty-d1e579a #1 Hardware name: Generic DT based system PC is at skb_panic+0x60/0x6c LR is at irq_work_queue+0x6c/0x94 Fix the issue by checking the length and set `-EOVERFLOW`. Tested: Verify the BMC kernel does not get oops in the above case, and the usb ethernet gets RX packets errors instead. Signed-off-by: Lei YU Signed-off-by: Henry Tian Reviewed-by: Neal Liu Acked-by: Benjamin Herrenschmidt Link: https://lore.kernel.org/r/20221024094853.2877441-1-yulei.sh@bytedance.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/aspeed-vhub/core.c | 2 +- drivers/usb/gadget/udc/aspeed-vhub/epn.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/aspeed-vhub/core.c b/drivers/usb/gadget/udc/aspeed-vhub/core.c index 7a635c499777..ac3ca24f8b04 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/core.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/core.c @@ -37,7 +37,7 @@ void ast_vhub_done(struct ast_vhub_ep *ep, struct ast_vhub_req *req, list_del_init(&req->queue); - if (req->req.status == -EINPROGRESS) + if ((req->req.status == -EINPROGRESS) || (status == -EOVERFLOW)) req->req.status = status; if (req->req.dma) { diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c index b5252880b389..56e55472daa1 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/epn.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c @@ -84,6 +84,7 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep) { struct ast_vhub_req *req; unsigned int len; + int status = 0; u32 stat; /* Read EP status */ @@ -119,9 +120,15 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep) len = VHUB_EP_DMA_TX_SIZE(stat); /* If not using DMA, copy data out if needed */ - if (!req->req.dma && !ep->epn.is_in && len) - memcpy(req->req.buf + req->req.actual, ep->buf, len); - + if (!req->req.dma && !ep->epn.is_in && len) { + if (req->req.actual + len > req->req.length) { + req->last_desc = 1; + status = -EOVERFLOW; + goto done; + } else { + memcpy(req->req.buf + req->req.actual, ep->buf, len); + } + } /* Adjust size */ req->req.actual += len; @@ -129,9 +136,10 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep) if (len < ep->ep.maxpacket) req->last_desc = 1; +done: /* That's it ? complete the request and pick a new one */ if (req->last_desc >= 0) { - ast_vhub_done(ep, req, 0); + ast_vhub_done(ep, req, status); req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req, queue); From d119cd95c62ddf6a1d76a006be273f255fd6c5a8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 31 Oct 2022 09:54:26 +0100 Subject: [PATCH 1124/4122] usb: musb: remove left-over after USB_TI_CPPI_DMA removal Commit 32fee1df5110 ("usb: musb: remove unused davinci support") removes the config USB_TI_CPPI_DMA, but misses some left-over references in drivers/usb/musb/musb_dma.h. Remove the left-over dependent on this removed config. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20221031085426.17175-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dma.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/usb/musb/musb_dma.h b/drivers/usb/musb/musb_dma.h index 7d67b69df0a0..e2445ca3356d 100644 --- a/drivers/usb/musb/musb_dma.h +++ b/drivers/usb/musb/musb_dma.h @@ -61,12 +61,6 @@ struct musb_hw_ep; #define musb_dma_cppi41(musb) 0 #endif -#ifdef CONFIG_USB_TI_CPPI_DMA -#define musb_dma_cppi(musb) (musb->ops->quirks & MUSB_DMA_CPPI) -#else -#define musb_dma_cppi(musb) 0 -#endif - #ifdef CONFIG_USB_TUSB_OMAP_DMA #define tusb_dma_omap(musb) (musb->ops->quirks & MUSB_DMA_TUSB_OMAP) #else @@ -79,11 +73,10 @@ struct musb_hw_ep; #define musb_dma_inventra(musb) 0 #endif -#if defined(CONFIG_USB_TI_CPPI_DMA) || defined(CONFIG_USB_TI_CPPI41_DMA) -#define is_cppi_enabled(musb) \ - (musb_dma_cppi(musb) || musb_dma_cppi41(musb)) +#if defined(CONFIG_USB_TI_CPPI41_DMA) +#define is_cppi_enabled(musb) musb_dma_cppi41(musb) #else -#define is_cppi_enabled(musb) 0 +#define is_cppi_enabled(musb) 0 #endif /* From 77ece8123fed2bef451ef31a34b8327849375d26 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 3 Nov 2022 10:06:25 +0800 Subject: [PATCH 1125/4122] Documentation: devres: add missing PHY helpers Add devm_usb_get_phy_by_phandle() to devres.rst. It's introduced by commit 5d3c28b5a42d ("usb: otg: add device tree support to otg library"). Add devm_usb_get_phy_by_node() to devres.rst. It's introduced by commit e842b84c8e72 ("usb: phy: Add interface to get phy give of device_node.") Fixes: 5d3c28b5a42d ("usb: otg: add device tree support to otg library") Fixes: e842b84c8e72 ("usb: phy: Add interface to get phy give of device_node.") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221103020625.1003759-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/driver-model/devres.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048e..6007ef0704e3 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -387,6 +387,8 @@ PCI PHY devm_usb_get_phy() + devm_usb_get_phy_by_node() + devm_usb_get_phy_by_phandle() devm_usb_put_phy() PINCTRL From dced88922c1179dfa2664690318d4cba57ebffb5 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 26 Oct 2022 20:11:57 +0800 Subject: [PATCH 1126/4122] usb: chipidea: core: wrap ci_handle_power_lost() with CONFIG_PM_SLEEP If CONFIG_PM_SLEEP is not set, the following error will be shown up when build kernel: error: 'ci_handle_power_lost' defined but not used. This will move ci_handle_power_lost() to an area wrapped by CONFIG_PM_SLEEP. Signed-off-by: Xu Yang Fixes: 74494b33211d ("usb: chipidea: core: add controller resume support when controller is powered off") Reported-by: Conor Dooley Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221026121157.1491302-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 2b170b434d01..484b1cd23431 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -661,25 +661,6 @@ static enum ci_role ci_get_role(struct ci_hdrc *ci) return role; } -static void ci_handle_power_lost(struct ci_hdrc *ci) -{ - enum ci_role role; - - disable_irq_nosync(ci->irq); - if (!ci_otg_is_fsm_mode(ci)) { - role = ci_get_role(ci); - - if (ci->role != role) { - ci_handle_id_switch(ci); - } else if (role == CI_ROLE_GADGET) { - if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) - usb_gadget_vbus_connect(&ci->gadget); - } - } - - enable_irq(ci->irq); -} - static struct usb_role_switch_desc ci_role_switch = { .set = ci_usb_role_switch_set, .get = ci_usb_role_switch_get, @@ -1400,6 +1381,25 @@ static int ci_suspend(struct device *dev) return 0; } +static void ci_handle_power_lost(struct ci_hdrc *ci) +{ + enum ci_role role; + + disable_irq_nosync(ci->irq); + if (!ci_otg_is_fsm_mode(ci)) { + role = ci_get_role(ci); + + if (ci->role != role) { + ci_handle_id_switch(ci); + } else if (role == CI_ROLE_GADGET) { + if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) + usb_gadget_vbus_connect(&ci->gadget); + } + } + + enable_irq(ci->irq); +} + static int ci_resume(struct device *dev) { struct ci_hdrc *ci = dev_get_drvdata(dev); From c5edb757baa99f6d30180b1a4b4f81f7e7f92217 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Sat, 29 Oct 2022 21:43:12 +0530 Subject: [PATCH 1127/4122] tools: usb: ffs-aio-example: Fix build error with aarch64-*-gnu-gcc toolchain(s) The tools/usb/aio_simple.c file when cross-compiled with aarch64-*-gnu-gcc toolchain(s) leads to the following errors: aio_simple.c:30:10: fatal error: endian.h: No such file or directory 30 | #include | ^~~~~~~~~~ aio_simple.c:88:14: note: (near initialization for 'descriptors.fs_count') aio_simple.c:110:14: error: initializer element is not constant 110 | .hs_count = htole32(3), | ^~~~~~~ aio_simple.c:110:14: note: (near initialization for 'descriptors.hs_count') aio_simple.c:124:22: error: initializer element is not constant 124 | .wMaxPacketSize = htole16(512), | ^~~~~~~ aio_simple.c:124:22: note: (near initialization for 'descriptors.hs_descs.bulk_sink.wMaxPacketSize') Fix these compilation issues by: - Switching to _DEFAULT_SOURCE: _BSD_SOURCE is deprecated and gives a build warning. Let's use _DEFAULT_SOURCE instead. - Currently this file uses library htole16/32 function calls. Replace these with equivalent 'cpu_to_le16/32' calls. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Signed-off-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20221029161312.171165-1-bhupesh.sharma@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../simple/device_app/aio_simple.c | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c index 1f44a29818bf..96616eb4600b 100644 --- a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c +++ b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c @@ -25,7 +25,9 @@ * For more information, please refer to */ -#define _BSD_SOURCE /* for endian.h */ +/* $(CROSS_COMPILE)cc -g -o aio_simple aio_simple.c -laio */ + +#define _DEFAULT_SOURCE /* for endian.h */ #include #include @@ -49,6 +51,22 @@ #define BUF_LEN 8192 +/* + * cpu_to_le16/32 are used when initializing structures, a context where a + * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way + * that allows them to be used when initializing structures. + */ + +#if BYTE_ORDER == __LITTLE_ENDIAN +#define cpu_to_le16(x) (x) +#define cpu_to_le32(x) (x) +#else +#define cpu_to_le16(x) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)) +#define cpu_to_le32(x) \ + ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \ + (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24)) +#endif + /******************** Descriptors and Strings *******************************/ static const struct { @@ -62,12 +80,12 @@ static const struct { } __attribute__ ((__packed__)) fs_descs, hs_descs; } __attribute__ ((__packed__)) descriptors = { .header = { - .magic = htole32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2), - .flags = htole32(FUNCTIONFS_HAS_FS_DESC | + .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2), + .flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC | FUNCTIONFS_HAS_HS_DESC), - .length = htole32(sizeof(descriptors)), + .length = cpu_to_le32(sizeof(descriptors)), }, - .fs_count = htole32(3), + .fs_count = cpu_to_le32(3), .fs_descs = { .intf = { .bLength = sizeof(descriptors.fs_descs.intf), @@ -89,7 +107,7 @@ static const struct { .bmAttributes = USB_ENDPOINT_XFER_BULK, }, }, - .hs_count = htole32(3), + .hs_count = cpu_to_le32(3), .hs_descs = { .intf = { .bLength = sizeof(descriptors.hs_descs.intf), @@ -103,14 +121,14 @@ static const struct { .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 1 | USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_BULK, - .wMaxPacketSize = htole16(512), + .wMaxPacketSize = cpu_to_le16(512), }, .bulk_source = { .bLength = sizeof(descriptors.hs_descs.bulk_source), .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 2 | USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_BULK, - .wMaxPacketSize = htole16(512), + .wMaxPacketSize = cpu_to_le16(512), }, }, }; @@ -125,13 +143,13 @@ static const struct { } __attribute__ ((__packed__)) lang0; } __attribute__ ((__packed__)) strings = { .header = { - .magic = htole32(FUNCTIONFS_STRINGS_MAGIC), - .length = htole32(sizeof(strings)), - .str_count = htole32(1), - .lang_count = htole32(1), + .magic = cpu_to_le32(FUNCTIONFS_STRINGS_MAGIC), + .length = cpu_to_le32(sizeof(strings)), + .str_count = cpu_to_le32(1), + .lang_count = cpu_to_le32(1), }, .lang0 = { - htole16(0x0409), /* en-us */ + cpu_to_le16(0x0409), /* en-us */ STR_INTERFACE, }, }; From 1dd33a9f1b95ab59cd60f14a7a83fed14697867b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 23 Oct 2022 16:47:06 +0200 Subject: [PATCH 1128/4122] usb: fotg210: Collect pieces of dual mode controller The Faraday FOTG210 is a dual-mode OTG USB controller that can act as host, peripheral or both. To be able to probe from one hardware description and to follow the pattern of other dual- mode controllers such as MUSB or MTU3 we need to collect the two, currently completely separate drivers in the same directory. After this, users need to select the main symbol USB_FOTG210 and then each respective subdriver. We pave the road to compile both drivers into the same kernel and select the one we want to use at probe() time, and possibly add OTG support in the end. This patch doesn't do much more than create the new symbol and collect the drivers in one place. We also add a comment for the section of dual-mode controllers in the Kconfig file so people can see what these selections are about. Also add myself as maintainer as there has been little response on my patches to these drivers. Cc: Fabian Vogt Cc: Yuan-Hsin Chen Cc: Felipe Balbi Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221023144708.3596563-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 ++++ drivers/usb/Kconfig | 4 +++ drivers/usb/Makefile | 2 ++ drivers/usb/fotg210/Kconfig | 36 +++++++++++++++++++ drivers/usb/fotg210/Makefile | 3 ++ drivers/usb/{host => fotg210}/fotg210-hcd.c | 2 +- .../{host/fotg210.h => fotg210/fotg210-hcd.h} | 0 .../usb/{gadget/udc => fotg210}/fotg210-udc.c | 2 +- .../udc/fotg210.h => fotg210/fotg210-udc.h} | 0 drivers/usb/gadget/udc/Kconfig | 11 ------ drivers/usb/gadget/udc/Makefile | 1 - drivers/usb/host/Kconfig | 11 ------ drivers/usb/host/Makefile | 1 - 13 files changed, 53 insertions(+), 26 deletions(-) create mode 100644 drivers/usb/fotg210/Kconfig create mode 100644 drivers/usb/fotg210/Makefile rename drivers/usb/{host => fotg210}/fotg210-hcd.c (99%) rename drivers/usb/{host/fotg210.h => fotg210/fotg210-hcd.h} (100%) rename drivers/usb/{gadget/udc => fotg210}/fotg210-udc.c (99%) rename drivers/usb/{gadget/udc/fotg210.h => fotg210/fotg210-udc.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 379945f82a64..52ddfc938ac9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7873,6 +7873,12 @@ F: fs/notify/fanotify/ F: include/linux/fanotify.h F: include/uapi/linux/fanotify.h +FARADAY FOTG210 USB2 DUAL-ROLE CONTROLLER +M: Linus Walleij +L: linux-usb@vger.kernel.org +S: Maintained +F: drivers/usb/fotg210/ + FARSYNC SYNCHRONOUS DRIVER M: Kevin Curtis S: Supported diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 578a439e71b5..a871a988829d 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -111,8 +111,12 @@ source "drivers/usb/usbip/Kconfig" endif +comment "USB dual-mode controller drivers" + source "drivers/usb/cdns3/Kconfig" +source "drivers/usb/fotg210/Kconfig" + source "drivers/usb/mtu3/Kconfig" source "drivers/usb/musb/Kconfig" diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 643edf5fe18c..a81e6ef293af 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_USB_CDNS_SUPPORT) += cdns3/ obj-$(CONFIG_USB_CDNS3) += cdns3/ obj-$(CONFIG_USB_CDNSP_PCI) += cdns3/ +obj-$(CONFIG_USB_FOTG210) += fotg210/ + obj-$(CONFIG_USB_MON) += mon/ obj-$(CONFIG_USB_MTU3) += mtu3/ diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig new file mode 100644 index 000000000000..e7a106785f5d --- /dev/null +++ b/drivers/usb/fotg210/Kconfig @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 + +config USB_FOTG210 + tristate "Faraday FOTG210 USB2 Dual Role controller" + depends on USB || USB_GADGET + depends on HAS_DMA && HAS_IOMEM + default ARCH_GEMINI + help + Faraday FOTG210 is a dual-mode USB controller that can act + in both host controller and peripheral controller mode. + +if USB_FOTG210 + +config USB_FOTG210_HCD + tristate "Faraday FOTG210 USB Host Controller support" + depends on USB + help + Faraday FOTG210 is an OTG controller which can be configured as + an USB2.0 host. It is designed to meet USB2.0 EHCI specification + with minor modification. + + To compile this driver as a module, choose M here: the + module will be called fotg210-hcd. + +config USB_FOTG210_UDC + depends on USB_GADGET + tristate "Faraday FOTG210 USB Peripheral Controller support" + help + Faraday USB2.0 OTG controller which can be configured as + high speed or full speed USB device. This driver suppports + Bulk Transfer so far. + + Say "y" to link the driver statically, or "m" to build a + dynamically linked module called "fotg210-udc". + +endif diff --git a/drivers/usb/fotg210/Makefile b/drivers/usb/fotg210/Makefile new file mode 100644 index 000000000000..f4a26ca0e563 --- /dev/null +++ b/drivers/usb/fotg210/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_USB_FOTG210_HCD) += fotg210-hcd.o +obj-$(CONFIG_USB_FOTG210_UDC) += fotg210-udc.o diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c similarity index 99% rename from drivers/usb/host/fotg210-hcd.c rename to drivers/usb/fotg210/fotg210-hcd.c index 3d1dbcf4c073..8fbf63e76d7d 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/fotg210/fotg210-hcd.c @@ -77,7 +77,7 @@ MODULE_PARM_DESC(hird, "host initiated resume duration, +1 for each 75us"); #define INTR_MASK (STS_IAA | STS_FATAL | STS_PCD | STS_ERR | STS_INT) -#include "fotg210.h" +#include "fotg210-hcd.h" #define fotg210_dbg(fotg210, fmt, args...) \ dev_dbg(fotg210_to_hcd(fotg210)->self.controller, fmt, ## args) diff --git a/drivers/usb/host/fotg210.h b/drivers/usb/fotg210/fotg210-hcd.h similarity index 100% rename from drivers/usb/host/fotg210.h rename to drivers/usb/fotg210/fotg210-hcd.h diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c similarity index 99% rename from drivers/usb/gadget/udc/fotg210-udc.c rename to drivers/usb/fotg210/fotg210-udc.c index fdca28e72a3b..01a4509775b2 100644 --- a/drivers/usb/gadget/udc/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -16,7 +16,7 @@ #include #include -#include "fotg210.h" +#include "fotg210-udc.h" #define DRIVER_DESC "FOTG210 USB Device Controller Driver" #define DRIVER_VERSION "30-April-2013" diff --git a/drivers/usb/gadget/udc/fotg210.h b/drivers/usb/fotg210/fotg210-udc.h similarity index 100% rename from drivers/usb/gadget/udc/fotg210.h rename to drivers/usb/fotg210/fotg210-udc.h diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 5756acb07b8d..16243964b1cd 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -108,17 +108,6 @@ config USB_FUSB300 help Faraday usb device controller FUSB300 driver -config USB_FOTG210_UDC - depends on HAS_DMA - tristate "Faraday FOTG210 USB Peripheral Controller" - help - Faraday USB2.0 OTG controller which can be configured as - high speed or full speed USB device. This driver supppors - Bulk Transfer so far. - - Say "y" to link the driver statically, or "m" to build a - dynamically linked module called "fotg210_udc". - config USB_GR_UDC tristate "Aeroflex Gaisler GRUSBDC USB Peripheral Controller Driver" depends on HAS_DMA diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile index 12f9e4c9eb0c..39daf36a2baa 100644 --- a/drivers/usb/gadget/udc/Makefile +++ b/drivers/usb/gadget/udc/Makefile @@ -34,7 +34,6 @@ obj-$(CONFIG_USB_EG20T) += pch_udc.o obj-$(CONFIG_USB_MV_UDC) += mv_udc.o mv_udc-y := mv_udc_core.o obj-$(CONFIG_USB_FUSB300) += fusb300_udc.o -obj-$(CONFIG_USB_FOTG210_UDC) += fotg210-udc.o obj-$(CONFIG_USB_MV_U3D) += mv_u3d_core.o obj-$(CONFIG_USB_GR_UDC) += gr_udc.o obj-$(CONFIG_USB_GADGET_XILINX) += udc-xilinx.o diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 8e8db71021a5..8d799d23c476 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -389,17 +389,6 @@ config USB_ISP1362_HCD To compile this driver as a module, choose M here: the module will be called isp1362-hcd. -config USB_FOTG210_HCD - tristate "FOTG210 HCD support" - depends on USB && HAS_DMA && HAS_IOMEM - help - Faraday FOTG210 is an OTG controller which can be configured as - an USB2.0 host. It is designed to meet USB2.0 EHCI specification - with minor modification. - - To compile this driver as a module, choose M here: the - module will be called fotg210-hcd. - config USB_MAX3421_HCD tristate "MAX3421 HCD (USB-over-SPI) support" depends on USB && SPI diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 2c8a61be7e46..6d8ee264c9b2 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -84,6 +84,5 @@ obj-$(CONFIG_USB_EHCI_FSL) += ehci-fsl.o obj-$(CONFIG_USB_EHCI_MV) += ehci-mv.o obj-$(CONFIG_USB_HCD_BCMA) += bcma-hcd.o obj-$(CONFIG_USB_HCD_SSB) += ssb-hcd.o -obj-$(CONFIG_USB_FOTG210_HCD) += fotg210-hcd.o obj-$(CONFIG_USB_MAX3421_HCD) += max3421-hcd.o obj-$(CONFIG_USB_XEN_HCD) += xen-hcd.o From aeffd2c3b09f4f50438ec8960095129798bcb33a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 23 Oct 2022 16:47:07 +0200 Subject: [PATCH 1129/4122] usb: fotg210: Compile into one module It is since ages perfectly possible to compile both of these modules into the same kernel, which makes no sense since it is one piece of hardware. Compile one module named "fotg210.ko" for both HCD and UDC drivers by collecting the init calls into a fotg210-core.c file and start to centralize things handling one and the same piece of hardware. Stub out the initcalls if one or the other part of the driver was not selected. Tested by compiling one or the other or both of the drivers into the kernel and as modules. Cc: Fabian Vogt Cc: Yuan-Hsin Chen Cc: Felipe Balbi Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221023144708.3596563-2-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/Kconfig | 4 +- drivers/usb/fotg210/Makefile | 11 ++++- drivers/usb/fotg210/fotg210-core.c | 79 ++++++++++++++++++++++++++++++ drivers/usb/fotg210/fotg210-hcd.c | 49 +++--------------- drivers/usb/fotg210/fotg210-udc.c | 19 ++----- drivers/usb/fotg210/fotg210.h | 42 ++++++++++++++++ 6 files changed, 142 insertions(+), 62 deletions(-) create mode 100644 drivers/usb/fotg210/fotg210-core.c create mode 100644 drivers/usb/fotg210/fotg210.h diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig index e7a106785f5d..933c513b5728 100644 --- a/drivers/usb/fotg210/Kconfig +++ b/drivers/usb/fotg210/Kconfig @@ -12,7 +12,7 @@ config USB_FOTG210 if USB_FOTG210 config USB_FOTG210_HCD - tristate "Faraday FOTG210 USB Host Controller support" + bool "Faraday FOTG210 USB Host Controller support" depends on USB help Faraday FOTG210 is an OTG controller which can be configured as @@ -24,7 +24,7 @@ config USB_FOTG210_HCD config USB_FOTG210_UDC depends on USB_GADGET - tristate "Faraday FOTG210 USB Peripheral Controller support" + bool "Faraday FOTG210 USB Peripheral Controller support" help Faraday USB2.0 OTG controller which can be configured as high speed or full speed USB device. This driver suppports diff --git a/drivers/usb/fotg210/Makefile b/drivers/usb/fotg210/Makefile index f4a26ca0e563..5aecff21f24b 100644 --- a/drivers/usb/fotg210/Makefile +++ b/drivers/usb/fotg210/Makefile @@ -1,3 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_USB_FOTG210_HCD) += fotg210-hcd.o -obj-$(CONFIG_USB_FOTG210_UDC) += fotg210-udc.o + +# This setup links the different object files into one single +# module so we don't have to EXPORT() a lot of internal symbols +# or create unnecessary submodules. +fotg210-objs-y += fotg210-core.o +fotg210-objs-$(CONFIG_USB_FOTG210_HCD) += fotg210-hcd.o +fotg210-objs-$(CONFIG_USB_FOTG210_UDC) += fotg210-udc.o +fotg210-objs := $(fotg210-objs-y) +obj-$(CONFIG_USB_FOTG210) += fotg210.o diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c new file mode 100644 index 000000000000..ab7b8974bc18 --- /dev/null +++ b/drivers/usb/fotg210/fotg210-core.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Central probing code for the FOTG210 dual role driver + * We register one driver for the hardware and then we decide + * whether to proceed with probing the host or the peripheral + * driver. + */ +#include +#include +#include +#include +#include + +#include "fotg210.h" + +static int fotg210_probe(struct platform_device *pdev) +{ + int ret; + + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) { + ret = fotg210_hcd_probe(pdev); + if (ret) + return ret; + } + if (IS_ENABLED(CONFIG_USB_FOTG210_UDC)) + ret = fotg210_udc_probe(pdev); + + return ret; +} + +static int fotg210_remove(struct platform_device *pdev) +{ + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) + fotg210_hcd_remove(pdev); + if (IS_ENABLED(CONFIG_USB_FOTG210_UDC)) + fotg210_udc_remove(pdev); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id fotg210_of_match[] = { + { .compatible = "faraday,fotg210" }, + {}, +}; +MODULE_DEVICE_TABLE(of, fotg210_of_match); +#endif + +static struct platform_driver fotg210_driver = { + .driver = { + .name = "fotg210", + .of_match_table = of_match_ptr(fotg210_of_match), + }, + .probe = fotg210_probe, + .remove = fotg210_remove, +}; + +static int __init fotg210_init(void) +{ + if (usb_disabled()) + return -ENODEV; + + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) + fotg210_hcd_init(); + return platform_driver_register(&fotg210_driver); +} +module_init(fotg210_init); + +static void __exit fotg210_cleanup(void) +{ + platform_driver_unregister(&fotg210_driver); + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) + fotg210_hcd_cleanup(); +} +module_exit(fotg210_cleanup); + +MODULE_AUTHOR("Yuan-Hsin Chen, Feng-Hsin Chiang"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FOTG210 Dual Role Controller Driver"); diff --git a/drivers/usb/fotg210/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c index 8fbf63e76d7d..51ac93a2eb98 100644 --- a/drivers/usb/fotg210/fotg210-hcd.c +++ b/drivers/usb/fotg210/fotg210-hcd.c @@ -39,8 +39,8 @@ #include #include -#define DRIVER_AUTHOR "Yuan-Hsin Chen" -#define DRIVER_DESC "FOTG210 Host Controller (EHCI) Driver" +#include "fotg210.h" + static const char hcd_name[] = "fotg210_hcd"; #undef FOTG210_URB_TRACE @@ -5490,9 +5490,6 @@ static int fotg210_get_frame(struct usb_hcd *hcd) * functions and in order to facilitate role switching we cannot * give the fotg210 driver exclusive access to those. */ -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_LICENSE("GPL"); static const struct hc_driver fotg210_fotg210_hc_driver = { .description = hcd_name, @@ -5560,7 +5557,7 @@ static void fotg210_init(struct fotg210_hcd *fotg210) * then invokes the start() method for the HCD associated with it * through the hotplug entry's driver_data. */ -static int fotg210_hcd_probe(struct platform_device *pdev) +int fotg210_hcd_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct usb_hcd *hcd; @@ -5652,7 +5649,7 @@ fail_create_hcd: * @dev: USB Host Controller being removed * */ -static int fotg210_hcd_remove(struct platform_device *pdev) +int fotg210_hcd_remove(struct platform_device *pdev) { struct usb_hcd *hcd = platform_get_drvdata(pdev); struct fotg210_hcd *fotg210 = hcd_to_fotg210(hcd); @@ -5668,27 +5665,8 @@ static int fotg210_hcd_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF -static const struct of_device_id fotg210_of_match[] = { - { .compatible = "faraday,fotg210" }, - {}, -}; -MODULE_DEVICE_TABLE(of, fotg210_of_match); -#endif - -static struct platform_driver fotg210_hcd_driver = { - .driver = { - .name = "fotg210-hcd", - .of_match_table = of_match_ptr(fotg210_of_match), - }, - .probe = fotg210_hcd_probe, - .remove = fotg210_hcd_remove, -}; - -static int __init fotg210_hcd_init(void) +int __init fotg210_hcd_init(void) { - int retval = 0; - if (usb_disabled()) return -ENODEV; @@ -5704,24 +5682,11 @@ static int __init fotg210_hcd_init(void) fotg210_debug_root = debugfs_create_dir("fotg210", usb_debug_root); - retval = platform_driver_register(&fotg210_hcd_driver); - if (retval < 0) - goto clean; - return retval; - -clean: - debugfs_remove(fotg210_debug_root); - fotg210_debug_root = NULL; - - clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded); - return retval; + return 0; } -module_init(fotg210_hcd_init); -static void __exit fotg210_hcd_cleanup(void) +void __exit fotg210_hcd_cleanup(void) { - platform_driver_unregister(&fotg210_hcd_driver); debugfs_remove(fotg210_debug_root); clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded); } -module_exit(fotg210_hcd_cleanup); diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 01a4509775b2..7757aaa11d6f 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -16,6 +16,7 @@ #include #include +#include "fotg210.h" #include "fotg210-udc.h" #define DRIVER_DESC "FOTG210 USB Device Controller Driver" @@ -1068,7 +1069,7 @@ static const struct usb_gadget_ops fotg210_gadget_ops = { .udc_stop = fotg210_udc_stop, }; -static int fotg210_udc_remove(struct platform_device *pdev) +int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = platform_get_drvdata(pdev); int i; @@ -1085,7 +1086,7 @@ static int fotg210_udc_remove(struct platform_device *pdev) return 0; } -static int fotg210_udc_probe(struct platform_device *pdev) +int fotg210_udc_probe(struct platform_device *pdev) { struct resource *res, *ires; struct fotg210_udc *fotg210 = NULL; @@ -1208,17 +1209,3 @@ err_alloc: err: return ret; } - -static struct platform_driver fotg210_driver = { - .driver = { - .name = udc_name, - }, - .probe = fotg210_udc_probe, - .remove = fotg210_udc_remove, -}; - -module_platform_driver(fotg210_driver); - -MODULE_AUTHOR("Yuan-Hsin Chen, Feng-Hsin Chiang "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/usb/fotg210/fotg210.h b/drivers/usb/fotg210/fotg210.h new file mode 100644 index 000000000000..ef79d8323d89 --- /dev/null +++ b/drivers/usb/fotg210/fotg210.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __FOTG210_H +#define __FOTG210_H + +#ifdef CONFIG_USB_FOTG210_HCD +int fotg210_hcd_probe(struct platform_device *pdev); +int fotg210_hcd_remove(struct platform_device *pdev); +int fotg210_hcd_init(void); +void fotg210_hcd_cleanup(void); +#else +static inline int fotg210_hcd_probe(struct platform_device *pdev) +{ + return 0; +} +static inline int fotg210_hcd_remove(struct platform_device *pdev) +{ + return 0; +} +static inline int fotg210_hcd_init(void) +{ + return 0; +} +static inline void fotg210_hcd_cleanup(void) +{ +} +#endif + +#ifdef CONFIG_USB_FOTG210_UDC +int fotg210_udc_probe(struct platform_device *pdev); +int fotg210_udc_remove(struct platform_device *pdev); +#else +static inline int fotg210_udc_probe(struct platform_device *pdev) +{ + return 0; +} +static inline int fotg210_udc_remove(struct platform_device *pdev) +{ + return 0; +} +#endif + +#endif /* __FOTG210_H */ From 1fac1c4da8a225ffbfae294ae36e18a3a65cb87e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 23 Oct 2022 16:47:08 +0200 Subject: [PATCH 1130/4122] usb: fotg210: Select subdriver by mode Check which mode the hardware is in, and selecte the peripheral driver if the hardware is in explicit peripheral mode, otherwise select host mode. This should solve the immediate problem that both subdrivers can get probed. Cc: Fabian Vogt Cc: Yuan-Hsin Chen Cc: Felipe Balbi Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221023144708.3596563-3-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-core.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c index ab7b8974bc18..3d07ee46f6d1 100644 --- a/drivers/usb/fotg210/fotg210-core.c +++ b/drivers/usb/fotg210/fotg210-core.c @@ -10,30 +10,37 @@ #include #include #include +#include #include "fotg210.h" static int fotg210_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; + enum usb_dr_mode mode; int ret; - if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) { - ret = fotg210_hcd_probe(pdev); - if (ret) - return ret; - } - if (IS_ENABLED(CONFIG_USB_FOTG210_UDC)) + mode = usb_get_dr_mode(dev); + + if (mode == USB_DR_MODE_PERIPHERAL) ret = fotg210_udc_probe(pdev); + else + ret = fotg210_hcd_probe(pdev); return ret; } static int fotg210_remove(struct platform_device *pdev) { - if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) - fotg210_hcd_remove(pdev); - if (IS_ENABLED(CONFIG_USB_FOTG210_UDC)) + struct device *dev = &pdev->dev; + enum usb_dr_mode mode; + + mode = usb_get_dr_mode(dev); + + if (mode == USB_DR_MODE_PERIPHERAL) fotg210_udc_remove(pdev); + else + fotg210_hcd_remove(pdev); return 0; } From 21acc656a06e912341d9db66c67b58cc7ed071e7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:51 +0100 Subject: [PATCH 1131/4122] usb: musb: Add and use inline functions musb_{get,set}_state Instead of manipulating musb->xceiv->otg->state directly, use the newly introduced musb_get_state() and musb_set_state() inline functions. Later, these inline functions will be modified to get rid of the musb->xceiv dependency, which prevents the musb code from using the generic PHY subsystem. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-2-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 62 ++++++++++++++++----------------- drivers/usb/musb/musb_core.h | 11 ++++++ drivers/usb/musb/musb_debugfs.c | 6 ++-- drivers/usb/musb/musb_gadget.c | 28 +++++++-------- drivers/usb/musb/musb_host.c | 6 ++-- drivers/usb/musb/musb_virthub.c | 18 +++++----- 6 files changed, 71 insertions(+), 60 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 03027c6fa3ab..a0fe2516870b 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -502,7 +502,7 @@ int musb_set_host(struct musb *musb) init_data: musb->is_active = 1; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; + musb_set_state(musb, OTG_STATE_A_IDLE); MUSB_HST_MODE(musb); return error; @@ -549,7 +549,7 @@ int musb_set_peripheral(struct musb *musb) init_data: musb->is_active = 0; - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); MUSB_DEV_MODE(musb); return error; @@ -599,12 +599,12 @@ static void musb_otg_timer_func(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&musb->lock, flags); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_B_WAIT_ACON: musb_dbg(musb, "HNP: b_wait_acon timeout; back to b_peripheral"); musb_g_disconnect(musb); - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb->is_active = 0; break; case OTG_STATE_A_SUSPEND: @@ -612,7 +612,7 @@ static void musb_otg_timer_func(struct timer_list *t) musb_dbg(musb, "HNP: %s timeout", usb_otg_state_string(musb->xceiv->otg->state)); musb_platform_set_vbus(musb, 0); - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL; + musb_set_state(musb, OTG_STATE_A_WAIT_VFALL); break; default: musb_dbg(musb, "HNP: Unhandled mode %s", @@ -633,7 +633,7 @@ void musb_hnp_stop(struct musb *musb) musb_dbg(musb, "HNP: stop from %s", usb_otg_state_string(musb->xceiv->otg->state)); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_PERIPHERAL: musb_g_disconnect(musb); musb_dbg(musb, "HNP: back to %s", @@ -643,7 +643,7 @@ void musb_hnp_stop(struct musb *musb) musb_dbg(musb, "HNP: Disabling HR"); if (hcd) hcd->self.is_b_host = 0; - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); MUSB_DEV_MODE(musb); reg = musb_readb(mbase, MUSB_POWER); reg |= MUSB_POWER_SUSPENDM; @@ -671,7 +671,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl) usb_otg_state_string(musb->xceiv->otg->state)); if (devctl & MUSB_DEVCTL_HM) { - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: /* remote wakeup? */ musb->port1_status |= @@ -679,14 +679,14 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl) | MUSB_PORT_STAT_RESUME; musb->rh_timer = jiffies + msecs_to_jiffies(USB_RESUME_TIMEOUT); - musb->xceiv->otg->state = OTG_STATE_A_HOST; + musb_set_state(musb, OTG_STATE_A_HOST); musb->is_active = 1; musb_host_resume_root_hub(musb); schedule_delayed_work(&musb->finish_resume_work, msecs_to_jiffies(USB_RESUME_TIMEOUT)); break; case OTG_STATE_B_WAIT_ACON: - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb->is_active = 1; MUSB_DEV_MODE(musb); break; @@ -696,10 +696,10 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl) usb_otg_state_string(musb->xceiv->otg->state)); } } else { - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: /* possibly DISCONNECT is upcoming */ - musb->xceiv->otg->state = OTG_STATE_A_HOST; + musb_set_state(musb, OTG_STATE_A_HOST); musb_host_resume_root_hub(musb); break; case OTG_STATE_B_WAIT_ACON: @@ -750,7 +750,7 @@ static irqreturn_t musb_handle_intr_sessreq(struct musb *musb, u8 devctl) */ musb_writeb(mbase, MUSB_DEVCTL, MUSB_DEVCTL_SESSION); musb->ep0_stage = MUSB_EP0_START; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; + musb_set_state(musb, OTG_STATE_A_IDLE); MUSB_HST_MODE(musb); musb_platform_set_vbus(musb, 1); @@ -777,7 +777,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl) * REVISIT: do delays from lots of DEBUG_KERNEL checks * make trouble here, keeping VBUS < 4.4V ? */ - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: /* recovery is dicey once we've gotten past the * initial stages of enumeration, but if VBUS @@ -833,7 +833,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl) musb_dbg(musb, "SUSPEND (%s) devctl %02x", usb_otg_state_string(musb->xceiv->otg->state), devctl); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_PERIPHERAL: /* We also come here if the cable is removed, since * this silicon doesn't report ID-no-longer-grounded. @@ -858,7 +858,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl) musb_g_suspend(musb); musb->is_active = musb->g.b_hnp_enable; if (musb->is_active) { - musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON; + musb_set_state(musb, OTG_STATE_B_WAIT_ACON); musb_dbg(musb, "HNP: Setting timer for b_ase0_brst"); mod_timer(&musb->otg_timer, jiffies + msecs_to_jiffies( @@ -871,7 +871,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl) + msecs_to_jiffies(musb->a_wait_bcon)); break; case OTG_STATE_A_HOST: - musb->xceiv->otg->state = OTG_STATE_A_SUSPEND; + musb_set_state(musb, OTG_STATE_A_SUSPEND); musb->is_active = musb->hcd->self.b_hnp_enable; break; case OTG_STATE_B_HOST: @@ -909,7 +909,7 @@ static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb) musb->port1_status |= USB_PORT_STAT_LOW_SPEED; /* indicate new connection to OTG machine */ - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_B_PERIPHERAL: if (int_usb & MUSB_INTR_SUSPEND) { musb_dbg(musb, "HNP: SUSPEND+CONNECT, now b_host"); @@ -921,7 +921,7 @@ static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb) case OTG_STATE_B_WAIT_ACON: musb_dbg(musb, "HNP: CONNECT, now b_host"); b_host: - musb->xceiv->otg->state = OTG_STATE_B_HOST; + musb_set_state(musb, OTG_STATE_B_HOST); if (musb->hcd) musb->hcd->self.is_b_host = 1; del_timer(&musb->otg_timer); @@ -929,7 +929,7 @@ b_host: default: if ((devctl & MUSB_DEVCTL_VBUS) == (3 << MUSB_DEVCTL_VBUS_SHIFT)) { - musb->xceiv->otg->state = OTG_STATE_A_HOST; + musb_set_state(musb, OTG_STATE_A_HOST); if (hcd) hcd->self.is_b_host = 0; } @@ -948,7 +948,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl) usb_otg_state_string(musb->xceiv->otg->state), MUSB_MODE(musb), devctl); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: case OTG_STATE_A_SUSPEND: musb_host_resume_root_hub(musb); @@ -966,7 +966,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl) musb_root_disconnect(musb); if (musb->hcd) musb->hcd->self.is_b_host = 0; - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); MUSB_DEV_MODE(musb); musb_g_disconnect(musb); break; @@ -1006,7 +1006,7 @@ static void musb_handle_intr_reset(struct musb *musb) } else { musb_dbg(musb, "BUS RESET as %s", usb_otg_state_string(musb->xceiv->otg->state)); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: musb_g_reset(musb); fallthrough; @@ -1025,11 +1025,11 @@ static void musb_handle_intr_reset(struct musb *musb) case OTG_STATE_B_WAIT_ACON: musb_dbg(musb, "HNP: RESET (%s), to b_peripheral", usb_otg_state_string(musb->xceiv->otg->state)); - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb_g_reset(musb); break; case OTG_STATE_B_IDLE: - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); fallthrough; case OTG_STATE_B_PERIPHERAL: musb_g_reset(musb); @@ -1216,8 +1216,8 @@ void musb_start(struct musb *musb) * (c) peripheral initiates, using SRP */ if (musb->port_mode != MUSB_HOST && - musb->xceiv->otg->state != OTG_STATE_A_WAIT_BCON && - (devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) { + musb_get_state(musb) != OTG_STATE_A_WAIT_BCON && + (devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) { musb->is_active = 1; } else { devctl |= MUSB_DEVCTL_SESSION; @@ -1908,7 +1908,7 @@ vbus_store(struct device *dev, struct device_attribute *attr, spin_lock_irqsave(&musb->lock, flags); /* force T(a_wait_bcon) to be zero/unlimited *OR* valid */ musb->a_wait_bcon = val ? max_t(int, val, OTG_TIME_A_WAIT_BCON) : 0 ; - if (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON) + if (musb_get_state(musb) == OTG_STATE_A_WAIT_BCON) musb->is_active = 0; musb_platform_try_idle(musb, jiffies + msecs_to_jiffies(val)); spin_unlock_irqrestore(&musb->lock, flags); @@ -2089,8 +2089,8 @@ static void musb_irq_work(struct work_struct *data) musb_pm_runtime_check_session(musb); - if (musb->xceiv->otg->state != musb->xceiv_old_state) { - musb->xceiv_old_state = musb->xceiv->otg->state; + if (musb_get_state(musb) != musb->xceiv_old_state) { + musb->xceiv_old_state = musb_get_state(musb); sysfs_notify(&musb->controller->kobj, NULL, "mode"); } @@ -2532,7 +2532,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) } MUSB_DEV_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); switch (musb->port_mode) { case MUSB_HOST: diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index a8a65effe68b..4a4d485d37bd 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -592,6 +592,17 @@ static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum) musb->ops->clear_ep_rxintr(musb, epnum); } +static inline void musb_set_state(struct musb *musb, + enum usb_otg_state otg_state) +{ + musb->xceiv->otg->state = otg_state; +} + +static inline enum usb_otg_state musb_get_state(struct musb *musb) +{ + return musb->xceiv->otg->state; +} + /* * gets the "dr_mode" property from DT and converts it into musb_mode * if the property is not found or not recognized returns MUSB_OTG diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 30a89aa8a3e7..78c726a71b17 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -235,7 +235,7 @@ static int musb_softconnect_show(struct seq_file *s, void *unused) u8 reg; int connect; - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: case OTG_STATE_A_WAIT_BCON: pm_runtime_get_sync(musb->controller); @@ -275,7 +275,7 @@ static ssize_t musb_softconnect_write(struct file *file, pm_runtime_get_sync(musb->controller); if (!strncmp(buf, "0", 1)) { - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: musb_root_disconnect(musb); reg = musb_readb(musb->mregs, MUSB_DEVCTL); @@ -286,7 +286,7 @@ static ssize_t musb_softconnect_write(struct file *file, break; } } else if (!strncmp(buf, "1", 1)) { - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_WAIT_BCON: /* * musb_save_context() called in musb_runtime_suspend() diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6704a62a1665..b5c7deb288d2 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1523,7 +1523,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget) spin_lock_irqsave(&musb->lock, flags); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_B_PERIPHERAL: /* NOTE: OTG state machine doesn't include B_SUSPENDED; * that's part of the standard usb 1.1 state machine, and @@ -1787,7 +1787,7 @@ int musb_gadget_setup(struct musb *musb) musb->g.speed = USB_SPEED_UNKNOWN; MUSB_DEV_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); /* this "gadget" abstracts/virtualizes the controller */ musb->g.name = musb_driver_name; @@ -1852,7 +1852,7 @@ static int musb_gadget_start(struct usb_gadget *g, musb->is_active = 1; otg_set_peripheral(otg, &musb->g); - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); spin_unlock_irqrestore(&musb->lock, flags); musb_start(musb); @@ -1897,7 +1897,7 @@ static int musb_gadget_stop(struct usb_gadget *g) (void) musb_gadget_vbus_draw(&musb->g, 0); - musb->xceiv->otg->state = OTG_STATE_UNDEFINED; + musb_set_state(musb, OTG_STATE_UNDEFINED); musb_stop(musb); otg_set_peripheral(musb->xceiv->otg, NULL); @@ -1926,7 +1926,7 @@ static int musb_gadget_stop(struct usb_gadget *g) void musb_g_resume(struct musb *musb) { musb->is_suspended = 0; - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_B_IDLE: break; case OTG_STATE_B_WAIT_ACON: @@ -1952,10 +1952,10 @@ void musb_g_suspend(struct musb *musb) devctl = musb_readb(musb->mregs, MUSB_DEVCTL); musb_dbg(musb, "musb_g_suspend: devctl %02x", devctl); - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_B_IDLE: if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); break; case OTG_STATE_B_PERIPHERAL: musb->is_suspended = 1; @@ -2001,22 +2001,22 @@ void musb_g_disconnect(struct musb *musb) spin_lock(&musb->lock); } - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { default: musb_dbg(musb, "Unhandled disconnect %s, setting a_idle", usb_otg_state_string(musb->xceiv->otg->state)); - musb->xceiv->otg->state = OTG_STATE_A_IDLE; + musb_set_state(musb, OTG_STATE_A_IDLE); MUSB_HST_MODE(musb); break; case OTG_STATE_A_PERIPHERAL: - musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON; + musb_set_state(musb, OTG_STATE_A_WAIT_BCON); MUSB_HST_MODE(musb); break; case OTG_STATE_B_WAIT_ACON: case OTG_STATE_B_HOST: case OTG_STATE_B_PERIPHERAL: case OTG_STATE_B_IDLE: - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); break; case OTG_STATE_B_SRP_INIT: break; @@ -2080,13 +2080,13 @@ __acquires(musb->lock) * In that case, do not rely on devctl for setting * peripheral mode. */ - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb->g.is_a_peripheral = 0; } else if (devctl & MUSB_DEVCTL_BDEVICE) { - musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; + musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb->g.is_a_peripheral = 0; } else { - musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL; + musb_set_state(musb, OTG_STATE_A_PERIPHERAL); musb->g.is_a_peripheral = 1; } diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 9ff7d891b4b7..ed631447a253 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2501,7 +2501,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd) if (!is_host_active(musb)) return 0; - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: return 0; case OTG_STATE_A_WAIT_VRISE: @@ -2511,7 +2511,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd) */ devctl = musb_readb(musb->mregs, MUSB_DEVCTL); if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) - musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON; + musb_set_state(musb, OTG_STATE_A_WAIT_BCON); break; default: break; @@ -2720,7 +2720,7 @@ int musb_host_setup(struct musb *musb, int power_budget) if (musb->port_mode == MUSB_HOST) { MUSB_HST_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_A_IDLE; + musb_set_state(musb, OTG_STATE_A_IDLE); } otg_set_host(musb->xceiv->otg, &hcd->self); /* don't support otg protocols */ diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index cafc69536e1d..d1cfd45d69e3 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -43,7 +43,7 @@ void musb_host_finish_resume(struct work_struct *work) musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16; usb_hcd_poll_rh_status(musb->hcd); /* NOTE: it might really be A_WAIT_BCON ... */ - musb->xceiv->otg->state = OTG_STATE_A_HOST; + musb_set_state(musb, OTG_STATE_A_HOST); spin_unlock_irqrestore(&musb->lock, flags); } @@ -85,9 +85,9 @@ int musb_port_suspend(struct musb *musb, bool do_suspend) musb_dbg(musb, "Root port suspended, power %02x", power); musb->port1_status |= USB_PORT_STAT_SUSPEND; - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: - musb->xceiv->otg->state = OTG_STATE_A_SUSPEND; + musb_set_state(musb, OTG_STATE_A_SUSPEND); musb->is_active = otg->host->b_hnp_enable; if (musb->is_active) mod_timer(&musb->otg_timer, jiffies @@ -96,7 +96,7 @@ int musb_port_suspend(struct musb *musb, bool do_suspend) musb_platform_try_idle(musb, 0); break; case OTG_STATE_B_HOST: - musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON; + musb_set_state(musb, OTG_STATE_B_WAIT_ACON); musb->is_active = otg->host->b_hnp_enable; musb_platform_try_idle(musb, 0); break; @@ -123,7 +123,7 @@ void musb_port_reset(struct musb *musb, bool do_reset) u8 power; void __iomem *mbase = musb->mregs; - if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) { + if (musb_get_state(musb) == OTG_STATE_B_IDLE) { musb_dbg(musb, "HNP: Returning from HNP; no hub reset from b_idle"); musb->port1_status &= ~USB_PORT_STAT_RESET; return; @@ -204,20 +204,20 @@ void musb_root_disconnect(struct musb *musb) usb_hcd_poll_rh_status(musb->hcd); musb->is_active = 0; - switch (musb->xceiv->otg->state) { + switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: if (otg->host->b_hnp_enable) { - musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL; + musb_set_state(musb, OTG_STATE_A_PERIPHERAL); musb->g.is_a_peripheral = 1; break; } fallthrough; case OTG_STATE_A_HOST: - musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON; + musb_set_state(musb, OTG_STATE_A_WAIT_BCON); musb->is_active = 0; break; case OTG_STATE_A_WAIT_VFALL: - musb->xceiv->otg->state = OTG_STATE_B_IDLE; + musb_set_state(musb, OTG_STATE_B_IDLE); break; default: musb_dbg(musb, "host disconnect (%s)", From 285f28bfed89a56ed619054f21125b0bd2f0d4d6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:52 +0100 Subject: [PATCH 1132/4122] usb: musb: Add and use inline function musb_otg_state_string The musb_otg_state_string() simply calls usb_otg_state_string(). This will make it easier to get rid of the musb->xceiv dependency later. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-3-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 43 +++++++++++++++------------------ drivers/usb/musb/musb_core.h | 5 ++++ drivers/usb/musb/musb_gadget.c | 8 +++--- drivers/usb/musb/musb_host.c | 2 +- drivers/usb/musb/musb_virthub.c | 4 +-- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index a0fe2516870b..9bf0ebaa3b7c 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -610,13 +610,13 @@ static void musb_otg_timer_func(struct timer_list *t) case OTG_STATE_A_SUSPEND: case OTG_STATE_A_WAIT_BCON: musb_dbg(musb, "HNP: %s timeout", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); musb_platform_set_vbus(musb, 0); musb_set_state(musb, OTG_STATE_A_WAIT_VFALL); break; default: musb_dbg(musb, "HNP: Unhandled mode %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } spin_unlock_irqrestore(&musb->lock, flags); } @@ -630,14 +630,12 @@ void musb_hnp_stop(struct musb *musb) void __iomem *mbase = musb->mregs; u8 reg; - musb_dbg(musb, "HNP: stop from %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_dbg(musb, "HNP: stop from %s", musb_otg_state_string(musb)); switch (musb_get_state(musb)) { case OTG_STATE_A_PERIPHERAL: musb_g_disconnect(musb); - musb_dbg(musb, "HNP: back to %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_dbg(musb, "HNP: back to %s", musb_otg_state_string(musb)); break; case OTG_STATE_B_HOST: musb_dbg(musb, "HNP: Disabling HR"); @@ -652,7 +650,7 @@ void musb_hnp_stop(struct musb *musb) break; default: musb_dbg(musb, "HNP: Stopping in unknown state %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } /* @@ -667,8 +665,7 @@ static void musb_recover_from_babble(struct musb *musb); static void musb_handle_intr_resume(struct musb *musb, u8 devctl) { - musb_dbg(musb, "RESUME (%s)", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_dbg(musb, "RESUME (%s)", musb_otg_state_string(musb)); if (devctl & MUSB_DEVCTL_HM) { switch (musb_get_state(musb)) { @@ -693,7 +690,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl) default: WARNING("bogus %s RESUME (%s)\n", "host", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } else { switch (musb_get_state(musb)) { @@ -722,7 +719,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl) default: WARNING("bogus %s RESUME (%s)\n", "peripheral", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } } @@ -738,8 +735,7 @@ static irqreturn_t musb_handle_intr_sessreq(struct musb *musb, u8 devctl) return IRQ_HANDLED; } - musb_dbg(musb, "SESSION_REQUEST (%s)", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_dbg(musb, "SESSION_REQUEST (%s)", musb_otg_state_string(musb)); /* IRQ arrives from ID pin sense or (later, if VBUS power * is removed) SRP. responses are time critical: @@ -806,7 +802,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl) dev_printk(ignore ? KERN_DEBUG : KERN_ERR, musb->controller, "VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n", - usb_otg_state_string(musb->xceiv->otg->state), + musb_otg_state_string(musb), devctl, ({ char *s; switch (devctl & MUSB_DEVCTL_VBUS) { @@ -831,7 +827,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl) static void musb_handle_intr_suspend(struct musb *musb, u8 devctl) { musb_dbg(musb, "SUSPEND (%s) devctl %02x", - usb_otg_state_string(musb->xceiv->otg->state), devctl); + musb_otg_state_string(musb), devctl); switch (musb_get_state(musb)) { case OTG_STATE_A_PERIPHERAL: @@ -939,13 +935,13 @@ b_host: musb_host_poke_root_hub(musb); musb_dbg(musb, "CONNECT (%s) devctl %02x", - usb_otg_state_string(musb->xceiv->otg->state), devctl); + musb_otg_state_string(musb), devctl); } static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl) { musb_dbg(musb, "DISCONNECT (%s) as %s, devctl %02x", - usb_otg_state_string(musb->xceiv->otg->state), + musb_otg_state_string(musb), MUSB_MODE(musb), devctl); switch (musb_get_state(musb)) { @@ -981,7 +977,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl) break; default: WARNING("unhandled DISCONNECT transition (%s)\n", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); break; } } @@ -1004,8 +1000,7 @@ static void musb_handle_intr_reset(struct musb *musb) dev_err(musb->controller, "Babble\n"); musb_recover_from_babble(musb); } else { - musb_dbg(musb, "BUS RESET as %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_dbg(musb, "BUS RESET as %s", musb_otg_state_string(musb)); switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: musb_g_reset(musb); @@ -1013,7 +1008,7 @@ static void musb_handle_intr_reset(struct musb *musb) case OTG_STATE_A_WAIT_BCON: /* OPT TD.4.7-900ms */ /* never use invalid T(a_wait_bcon) */ musb_dbg(musb, "HNP: in %s, %d msec timeout", - usb_otg_state_string(musb->xceiv->otg->state), + musb_otg_state_string(musb), TA_WAIT_BCON(musb)); mod_timer(&musb->otg_timer, jiffies + msecs_to_jiffies(TA_WAIT_BCON(musb))); @@ -1024,7 +1019,7 @@ static void musb_handle_intr_reset(struct musb *musb) break; case OTG_STATE_B_WAIT_ACON: musb_dbg(musb, "HNP: RESET (%s), to b_peripheral", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); musb_set_state(musb, OTG_STATE_B_PERIPHERAL); musb_g_reset(musb); break; @@ -1036,7 +1031,7 @@ static void musb_handle_intr_reset(struct musb *musb) break; default: musb_dbg(musb, "Unhandled BUS RESET as %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } } @@ -1863,7 +1858,7 @@ mode_show(struct device *dev, struct device_attribute *attr, char *buf) int ret; spin_lock_irqsave(&musb->lock, flags); - ret = sprintf(buf, "%s\n", usb_otg_state_string(musb->xceiv->otg->state)); + ret = sprintf(buf, "%s\n", musb_otg_state_string(musb)); spin_unlock_irqrestore(&musb->lock, flags); return ret; diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 4a4d485d37bd..a497c44ab0da 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -603,6 +603,11 @@ static inline enum usb_otg_state musb_get_state(struct musb *musb) return musb->xceiv->otg->state; } +static inline const char *musb_otg_state_string(struct musb *musb) +{ + return usb_otg_state_string(musb_get_state(musb)); +} + /* * gets the "dr_mode" property from DT and converts it into musb_mode * if the property is not found or not recognized returns MUSB_OTG diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index b5c7deb288d2..9f5c531de387 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1564,7 +1564,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget) goto done; default: musb_dbg(musb, "Unhandled wake: %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); goto done; } @@ -1940,7 +1940,7 @@ void musb_g_resume(struct musb *musb) break; default: WARNING("unhandled RESUME transition (%s)\n", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } @@ -1970,7 +1970,7 @@ void musb_g_suspend(struct musb *musb) * A_PERIPHERAL may need care too */ WARNING("unhandled SUSPEND transition (%s)", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } @@ -2004,7 +2004,7 @@ void musb_g_disconnect(struct musb *musb) switch (musb_get_state(musb)) { default: musb_dbg(musb, "Unhandled disconnect %s, setting a_idle", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); musb_set_state(musb, OTG_STATE_A_IDLE); MUSB_HST_MODE(musb); break; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index ed631447a253..b7553da7f4bc 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2519,7 +2519,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd) if (musb->is_active) { WARNING("trying to suspend as %s while active\n", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); return -EBUSY; } else return 0; diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index d1cfd45d69e3..7eb929d75280 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -102,7 +102,7 @@ int musb_port_suspend(struct musb *musb, bool do_suspend) break; default: musb_dbg(musb, "bogus rh suspend? %s", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } else if (power & MUSB_POWER_SUSPENDM) { power &= ~MUSB_POWER_SUSPENDM; @@ -221,7 +221,7 @@ void musb_root_disconnect(struct musb *musb) break; default: musb_dbg(musb, "host disconnect (%s)", - usb_otg_state_string(musb->xceiv->otg->state)); + musb_otg_state_string(musb)); } } EXPORT_SYMBOL_GPL(musb_root_disconnect); From a6d45ea063f0a9272f62925c8150439af5640e74 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:53 +0100 Subject: [PATCH 1133/4122] usb: musb: Allow running without CONFIG_USB_PHY Modify the core so that musb->xceiv is never deferenced without being checked first. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-4-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 2 +- drivers/usb/musb/musb_core.h | 12 ++++++++++-- drivers/usb/musb/musb_gadget.c | 21 +++++++++++++-------- drivers/usb/musb/musb_host.c | 8 ++++++-- drivers/usb/musb/musb_virthub.c | 11 +++++------ 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 9bf0ebaa3b7c..648bb6021c5e 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2448,7 +2448,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) else musb->io.set_toggle = musb_default_set_toggle; - if (!musb->xceiv->io_ops) { + if (IS_ENABLED(CONFIG_USB_PHY) && musb->xceiv && !musb->xceiv->io_ops) { musb->xceiv->io_dev = musb->controller; musb->xceiv->io_priv = musb->mregs; musb->xceiv->io_ops = &musb_ulpi_access; diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index a497c44ab0da..b7588d11cfc5 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -339,6 +339,8 @@ struct musb { struct usb_phy *xceiv; struct phy *phy; + enum usb_otg_state otg_state; + int nIrq; unsigned irq_wake:1; @@ -595,12 +597,18 @@ static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum) static inline void musb_set_state(struct musb *musb, enum usb_otg_state otg_state) { - musb->xceiv->otg->state = otg_state; + if (musb->xceiv) + musb->xceiv->otg->state = otg_state; + else + musb->otg_state = otg_state; } static inline enum usb_otg_state musb_get_state(struct musb *musb) { - return musb->xceiv->otg->state; + if (musb->xceiv) + return musb->xceiv->otg->state; + + return musb->otg_state; } static inline const char *musb_otg_state_string(struct musb *musb) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 9f5c531de387..66c8b32b16bb 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1552,9 +1552,11 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget) break; } - spin_unlock_irqrestore(&musb->lock, flags); - otg_start_srp(musb->xceiv->otg); - spin_lock_irqsave(&musb->lock, flags); + if (musb->xceiv) { + spin_unlock_irqrestore(&musb->lock, flags); + otg_start_srp(musb->xceiv->otg); + spin_lock_irqsave(&musb->lock, flags); + } /* Block idling for at least 1s */ musb_platform_try_idle(musb, @@ -1628,7 +1630,7 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA) { struct musb *musb = gadget_to_musb(gadget); - if (!musb->xceiv->set_power) + if (!musb->xceiv || !musb->xceiv->set_power) return -EOPNOTSUPP; return usb_phy_set_power(musb->xceiv, mA); } @@ -1834,7 +1836,6 @@ static int musb_gadget_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { struct musb *musb = gadget_to_musb(g); - struct usb_otg *otg = musb->xceiv->otg; unsigned long flags; int retval = 0; @@ -1851,7 +1852,9 @@ static int musb_gadget_start(struct usb_gadget *g, spin_lock_irqsave(&musb->lock, flags); musb->is_active = 1; - otg_set_peripheral(otg, &musb->g); + if (musb->xceiv) + otg_set_peripheral(musb->xceiv->otg, &musb->g); + musb_set_state(musb, OTG_STATE_B_IDLE); spin_unlock_irqrestore(&musb->lock, flags); @@ -1861,7 +1864,7 @@ static int musb_gadget_start(struct usb_gadget *g, * handles power budgeting ... this way also * ensures HdrcStart is indirectly called. */ - if (musb->xceiv->last_event == USB_EVENT_ID) + if (musb->xceiv && musb->xceiv->last_event == USB_EVENT_ID) musb_platform_set_vbus(musb, 1); pm_runtime_mark_last_busy(musb->controller); @@ -1899,7 +1902,9 @@ static int musb_gadget_stop(struct usb_gadget *g) musb_set_state(musb, OTG_STATE_UNDEFINED); musb_stop(musb); - otg_set_peripheral(musb->xceiv->otg, NULL); + + if (musb->xceiv) + otg_set_peripheral(musb->xceiv->otg, NULL); musb->is_active = 0; musb->gadget_driver = NULL; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index b7553da7f4bc..8ad39ecd3b6f 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2722,10 +2722,14 @@ int musb_host_setup(struct musb *musb, int power_budget) MUSB_HST_MODE(musb); musb_set_state(musb, OTG_STATE_A_IDLE); } - otg_set_host(musb->xceiv->otg, &hcd->self); + + if (musb->xceiv) { + otg_set_host(musb->xceiv->otg, &hcd->self); + musb->xceiv->otg->host = &hcd->self; + } + /* don't support otg protocols */ hcd->self.otg_port = 0; - musb->xceiv->otg->host = &hcd->self; hcd->power_budget = 2 * (power_budget ? : 250); hcd->skip_phy_initialization = 1; diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 7eb929d75280..2b2164e028b3 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -50,7 +50,6 @@ void musb_host_finish_resume(struct work_struct *work) int musb_port_suspend(struct musb *musb, bool do_suspend) { - struct usb_otg *otg = musb->xceiv->otg; u8 power; void __iomem *mbase = musb->mregs; @@ -88,7 +87,8 @@ int musb_port_suspend(struct musb *musb, bool do_suspend) switch (musb_get_state(musb)) { case OTG_STATE_A_HOST: musb_set_state(musb, OTG_STATE_A_SUSPEND); - musb->is_active = otg->host->b_hnp_enable; + musb->is_active = musb->xceiv && + musb->xceiv->otg->host->b_hnp_enable; if (musb->is_active) mod_timer(&musb->otg_timer, jiffies + msecs_to_jiffies( @@ -97,7 +97,8 @@ int musb_port_suspend(struct musb *musb, bool do_suspend) break; case OTG_STATE_B_HOST: musb_set_state(musb, OTG_STATE_B_WAIT_ACON); - musb->is_active = otg->host->b_hnp_enable; + musb->is_active = musb->xceiv && + musb->xceiv->otg->host->b_hnp_enable; musb_platform_try_idle(musb, 0); break; default: @@ -196,8 +197,6 @@ void musb_port_reset(struct musb *musb, bool do_reset) void musb_root_disconnect(struct musb *musb) { - struct usb_otg *otg = musb->xceiv->otg; - musb->port1_status = USB_PORT_STAT_POWER | (USB_PORT_STAT_C_CONNECTION << 16); @@ -206,7 +205,7 @@ void musb_root_disconnect(struct musb *musb) switch (musb_get_state(musb)) { case OTG_STATE_A_SUSPEND: - if (otg->host->b_hnp_enable) { + if (musb->xceiv && musb->xceiv->otg->host->b_hnp_enable) { musb_set_state(musb, OTG_STATE_A_PERIPHERAL); musb->g.is_a_peripheral = 1; break; From 0afddf1e49d1172a87c7a73002e62aa66b6af677 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:54 +0100 Subject: [PATCH 1134/4122] usb: musb: Support setting OTG mode using generic PHY When musb->xceiv is not provided but musb->phy is, support setting the OTG mode (host, peripheral) using the generic PHY framework. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-5-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 4 ++++ drivers/usb/musb/musb_host.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 66c8b32b16bb..6cb9514ef340 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1854,6 +1854,8 @@ static int musb_gadget_start(struct usb_gadget *g, if (musb->xceiv) otg_set_peripheral(musb->xceiv->otg, &musb->g); + else + phy_set_mode(musb->phy, PHY_MODE_USB_DEVICE); musb_set_state(musb, OTG_STATE_B_IDLE); spin_unlock_irqrestore(&musb->lock, flags); @@ -1905,6 +1907,8 @@ static int musb_gadget_stop(struct usb_gadget *g) if (musb->xceiv) otg_set_peripheral(musb->xceiv->otg, NULL); + else + phy_set_mode(musb->phy, PHY_MODE_INVALID); musb->is_active = 0; musb->gadget_driver = NULL; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 8ad39ecd3b6f..a02c29216955 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2726,6 +2726,8 @@ int musb_host_setup(struct musb *musb, int power_budget) if (musb->xceiv) { otg_set_host(musb->xceiv->otg, &hcd->self); musb->xceiv->otg->host = &hcd->self; + } else { + phy_set_mode(musb->phy, PHY_MODE_USB_HOST); } /* don't support otg protocols */ From d9b324307777404f978660b5752fb264ee344a22 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:55 +0100 Subject: [PATCH 1135/4122] usb: musb: jz4740: Don't disable external hubs The jz4740-musb driver does not really support OTG, so it has no reason to disable external hubs, especially since it's a system-wide setting and we don't want external hubs to be disabled for other USB host controllers. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-6-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index f9eec666103c..290df4d5d5ce 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -107,7 +107,6 @@ config USB_MUSB_JZ4740 depends on OF depends on MIPS || COMPILE_TEST depends on USB_MUSB_GADGET - depends on USB=n || USB_OTG_DISABLE_EXTERNAL_HUB select USB_ROLE_SWITCH config USB_MUSB_MEDIATEK From 9cd074798ef6bf9c361c78084abc2c30e551ecdc Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:56 +0100 Subject: [PATCH 1136/4122] usb: musb: jz4740: Support the generic PHY framework Support PHYs implemented using the generic PHY framework instead of the deprecated USB-PHY framework. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-7-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/jz4740.c | 62 ++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c index d1e4e0deb753..c7b1d2a394d9 100644 --- a/drivers/usb/musb/jz4740.c +++ b/drivers/usb/musb/jz4740.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,9 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw, struct jz4740_glue *glue = usb_role_switch_get_drvdata(sw); struct usb_phy *phy = glue->musb->xceiv; + if (!phy) + return 0; + switch (role) { case USB_ROLE_NONE: atomic_notifier_call_chain(&phy->notifier, USB_EVENT_NONE, phy); @@ -105,21 +109,51 @@ static int jz4740_musb_init(struct musb *musb) .driver_data = glue, .fwnode = dev_fwnode(dev), }; + int err; glue->musb = musb; - if (dev->of_node) - musb->xceiv = devm_usb_get_phy_by_phandle(dev, "phys", 0); - else - musb->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); - if (IS_ERR(musb->xceiv)) - return dev_err_probe(dev, PTR_ERR(musb->xceiv), - "No transceiver configured\n"); + if (IS_ENABLED(CONFIG_GENERIC_PHY)) { + musb->phy = devm_of_phy_get_by_index(dev, dev->of_node, 0); + if (IS_ERR(musb->phy)) { + err = PTR_ERR(musb->phy); + if (err != -ENODEV) { + dev_err(dev, "Unable to get PHY\n"); + return err; + } + + musb->phy = NULL; + } + } + + if (musb->phy) { + err = phy_init(musb->phy); + if (err) { + dev_err(dev, "Failed to init PHY\n"); + return err; + } + + err = phy_power_on(musb->phy); + if (err) { + dev_err(dev, "Unable to power on PHY\n"); + goto err_phy_shutdown; + } + } else { + if (dev->of_node) + musb->xceiv = devm_usb_get_phy_by_phandle(dev, "phys", 0); + else + musb->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); + if (IS_ERR(musb->xceiv)) { + dev_err(dev, "No transceiver configured\n"); + return PTR_ERR(musb->xceiv); + } + } glue->role_sw = usb_role_switch_register(dev, &role_sw_desc); if (IS_ERR(glue->role_sw)) { dev_err(dev, "Failed to register USB role switch\n"); - return PTR_ERR(glue->role_sw); + err = PTR_ERR(glue->role_sw); + goto err_phy_power_down; } /* @@ -131,6 +165,14 @@ static int jz4740_musb_init(struct musb *musb) musb->isr = jz4740_musb_interrupt; return 0; + +err_phy_power_down: + if (musb->phy) + phy_power_off(musb->phy); +err_phy_shutdown: + if (musb->phy) + phy_exit(musb->phy); + return err; } static int jz4740_musb_exit(struct musb *musb) @@ -138,6 +180,10 @@ static int jz4740_musb_exit(struct musb *musb) struct jz4740_glue *glue = dev_get_drvdata(musb->controller->parent); usb_role_switch_unregister(glue->role_sw); + if (musb->phy) { + phy_power_off(musb->phy); + phy_exit(musb->phy); + } return 0; } From 3f2d1f2e40666d6536b663c5050a3a23ca5d9ce8 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 26 Oct 2022 19:26:57 +0100 Subject: [PATCH 1137/4122] usb: phy: jz4770: Remove driver This driver has been replaced by the Ingenic PHY driver that uses the generic PHY framework. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221026182657.146630-8-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/Kconfig | 8 - drivers/usb/phy/Makefile | 1 - drivers/usb/phy/phy-jz4770.c | 353 ----------------------------------- 3 files changed, 362 deletions(-) delete mode 100644 drivers/usb/phy/phy-jz4770.c diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index efdcafdbe46d..915df5726a5c 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -189,12 +189,4 @@ config USB_ULPI_VIEWPORT Provides read/write operations to the ULPI phy register set for controllers with a viewport register (e.g. Chipidea/ARC controllers). -config JZ4770_PHY - tristate "Ingenic SoCs Transceiver Driver" - depends on MIPS || COMPILE_TEST - select USB_PHY - help - This driver provides PHY support for the USB controller found - on the JZ-series and X-series SoCs from Ingenic. - endmenu diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index b352bdbe8712..df1d99010079 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile @@ -24,4 +24,3 @@ obj-$(CONFIG_USB_MXS_PHY) += phy-mxs-usb.o obj-$(CONFIG_USB_ULPI) += phy-ulpi.o obj-$(CONFIG_USB_ULPI_VIEWPORT) += phy-ulpi-viewport.o obj-$(CONFIG_KEYSTONE_USB_PHY) += phy-keystone.o -obj-$(CONFIG_JZ4770_PHY) += phy-jz4770.o diff --git a/drivers/usb/phy/phy-jz4770.c b/drivers/usb/phy/phy-jz4770.c deleted file mode 100644 index f16adcacdce3..000000000000 --- a/drivers/usb/phy/phy-jz4770.c +++ /dev/null @@ -1,353 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Ingenic SoCs USB PHY driver - * Copyright (c) Paul Cercueil - * Copyright (c) 漆鹏振 (Qi Pengzhen) - * Copyright (c) 周琰杰 (Zhou Yanjie) - */ - -#include -#include -#include -#include -#include -#include -#include - -/* OTGPHY register offsets */ -#define REG_USBPCR_OFFSET 0x00 -#define REG_USBRDT_OFFSET 0x04 -#define REG_USBVBFIL_OFFSET 0x08 -#define REG_USBPCR1_OFFSET 0x0c - -/* bits within the USBPCR register */ -#define USBPCR_USB_MODE BIT(31) -#define USBPCR_AVLD_REG BIT(30) -#define USBPCR_COMMONONN BIT(25) -#define USBPCR_VBUSVLDEXT BIT(24) -#define USBPCR_VBUSVLDEXTSEL BIT(23) -#define USBPCR_POR BIT(22) -#define USBPCR_SIDDQ BIT(21) -#define USBPCR_OTG_DISABLE BIT(20) -#define USBPCR_TXPREEMPHTUNE BIT(6) - -#define USBPCR_IDPULLUP_LSB 28 -#define USBPCR_IDPULLUP_MASK GENMASK(29, USBPCR_IDPULLUP_LSB) -#define USBPCR_IDPULLUP_ALWAYS (0x2 << USBPCR_IDPULLUP_LSB) -#define USBPCR_IDPULLUP_SUSPEND (0x1 << USBPCR_IDPULLUP_LSB) -#define USBPCR_IDPULLUP_OTG (0x0 << USBPCR_IDPULLUP_LSB) - -#define USBPCR_COMPDISTUNE_LSB 17 -#define USBPCR_COMPDISTUNE_MASK GENMASK(19, USBPCR_COMPDISTUNE_LSB) -#define USBPCR_COMPDISTUNE_DFT (0x4 << USBPCR_COMPDISTUNE_LSB) - -#define USBPCR_OTGTUNE_LSB 14 -#define USBPCR_OTGTUNE_MASK GENMASK(16, USBPCR_OTGTUNE_LSB) -#define USBPCR_OTGTUNE_DFT (0x4 << USBPCR_OTGTUNE_LSB) - -#define USBPCR_SQRXTUNE_LSB 11 -#define USBPCR_SQRXTUNE_MASK GENMASK(13, USBPCR_SQRXTUNE_LSB) -#define USBPCR_SQRXTUNE_DCR_20PCT (0x7 << USBPCR_SQRXTUNE_LSB) -#define USBPCR_SQRXTUNE_DFT (0x3 << USBPCR_SQRXTUNE_LSB) - -#define USBPCR_TXFSLSTUNE_LSB 7 -#define USBPCR_TXFSLSTUNE_MASK GENMASK(10, USBPCR_TXFSLSTUNE_LSB) -#define USBPCR_TXFSLSTUNE_DCR_50PPT (0xf << USBPCR_TXFSLSTUNE_LSB) -#define USBPCR_TXFSLSTUNE_DCR_25PPT (0x7 << USBPCR_TXFSLSTUNE_LSB) -#define USBPCR_TXFSLSTUNE_DFT (0x3 << USBPCR_TXFSLSTUNE_LSB) -#define USBPCR_TXFSLSTUNE_INC_25PPT (0x1 << USBPCR_TXFSLSTUNE_LSB) -#define USBPCR_TXFSLSTUNE_INC_50PPT (0x0 << USBPCR_TXFSLSTUNE_LSB) - -#define USBPCR_TXHSXVTUNE_LSB 4 -#define USBPCR_TXHSXVTUNE_MASK GENMASK(5, USBPCR_TXHSXVTUNE_LSB) -#define USBPCR_TXHSXVTUNE_DFT (0x3 << USBPCR_TXHSXVTUNE_LSB) -#define USBPCR_TXHSXVTUNE_DCR_15MV (0x1 << USBPCR_TXHSXVTUNE_LSB) - -#define USBPCR_TXRISETUNE_LSB 4 -#define USBPCR_TXRISETUNE_MASK GENMASK(5, USBPCR_TXRISETUNE_LSB) -#define USBPCR_TXRISETUNE_DFT (0x3 << USBPCR_TXRISETUNE_LSB) - -#define USBPCR_TXVREFTUNE_LSB 0 -#define USBPCR_TXVREFTUNE_MASK GENMASK(3, USBPCR_TXVREFTUNE_LSB) -#define USBPCR_TXVREFTUNE_INC_25PPT (0x7 << USBPCR_TXVREFTUNE_LSB) -#define USBPCR_TXVREFTUNE_DFT (0x5 << USBPCR_TXVREFTUNE_LSB) - -/* bits within the USBRDTR register */ -#define USBRDT_UTMI_RST BIT(27) -#define USBRDT_HB_MASK BIT(26) -#define USBRDT_VBFIL_LD_EN BIT(25) -#define USBRDT_IDDIG_EN BIT(24) -#define USBRDT_IDDIG_REG BIT(23) -#define USBRDT_VBFIL_EN BIT(2) - -/* bits within the USBPCR1 register */ -#define USBPCR1_BVLD_REG BIT(31) -#define USBPCR1_DPPD BIT(29) -#define USBPCR1_DMPD BIT(28) -#define USBPCR1_USB_SEL BIT(28) -#define USBPCR1_WORD_IF_16BIT BIT(19) - -enum ingenic_usb_phy_version { - ID_JZ4770, - ID_JZ4780, - ID_X1000, - ID_X1830, -}; - -struct ingenic_soc_info { - enum ingenic_usb_phy_version version; - - void (*usb_phy_init)(struct usb_phy *phy); -}; - -struct jz4770_phy { - const struct ingenic_soc_info *soc_info; - - struct usb_phy phy; - struct usb_otg otg; - struct device *dev; - void __iomem *base; - struct clk *clk; - struct regulator *vcc_supply; -}; - -static inline struct jz4770_phy *otg_to_jz4770_phy(struct usb_otg *otg) -{ - return container_of(otg, struct jz4770_phy, otg); -} - -static inline struct jz4770_phy *phy_to_jz4770_phy(struct usb_phy *phy) -{ - return container_of(phy, struct jz4770_phy, phy); -} - -static int ingenic_usb_phy_set_peripheral(struct usb_otg *otg, - struct usb_gadget *gadget) -{ - struct jz4770_phy *priv = otg_to_jz4770_phy(otg); - u32 reg; - - if (priv->soc_info->version >= ID_X1000) { - reg = readl(priv->base + REG_USBPCR1_OFFSET); - reg |= USBPCR1_BVLD_REG; - writel(reg, priv->base + REG_USBPCR1_OFFSET); - } - - reg = readl(priv->base + REG_USBPCR_OFFSET); - reg &= ~USBPCR_USB_MODE; - reg |= USBPCR_VBUSVLDEXT | USBPCR_VBUSVLDEXTSEL | USBPCR_OTG_DISABLE; - writel(reg, priv->base + REG_USBPCR_OFFSET); - - return 0; -} - -static int ingenic_usb_phy_set_host(struct usb_otg *otg, struct usb_bus *host) -{ - struct jz4770_phy *priv = otg_to_jz4770_phy(otg); - u32 reg; - - reg = readl(priv->base + REG_USBPCR_OFFSET); - reg &= ~(USBPCR_VBUSVLDEXT | USBPCR_VBUSVLDEXTSEL | USBPCR_OTG_DISABLE); - reg |= USBPCR_USB_MODE; - writel(reg, priv->base + REG_USBPCR_OFFSET); - - return 0; -} - -static int ingenic_usb_phy_init(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - int err; - u32 reg; - - err = regulator_enable(priv->vcc_supply); - if (err) { - dev_err(priv->dev, "Unable to enable VCC: %d\n", err); - return err; - } - - err = clk_prepare_enable(priv->clk); - if (err) { - dev_err(priv->dev, "Unable to start clock: %d\n", err); - return err; - } - - priv->soc_info->usb_phy_init(phy); - - /* Wait for PHY to reset */ - usleep_range(30, 300); - reg = readl(priv->base + REG_USBPCR_OFFSET); - writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET); - usleep_range(300, 1000); - - return 0; -} - -static void ingenic_usb_phy_shutdown(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - - clk_disable_unprepare(priv->clk); - regulator_disable(priv->vcc_supply); -} - -static void ingenic_usb_phy_remove(void *phy) -{ - usb_remove_phy(phy); -} - -static void jz4770_usb_phy_init(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - u32 reg; - - reg = USBPCR_AVLD_REG | USBPCR_COMMONONN | USBPCR_IDPULLUP_ALWAYS | - USBPCR_COMPDISTUNE_DFT | USBPCR_OTGTUNE_DFT | USBPCR_SQRXTUNE_DFT | - USBPCR_TXFSLSTUNE_DFT | USBPCR_TXRISETUNE_DFT | USBPCR_TXVREFTUNE_DFT | - USBPCR_POR; - writel(reg, priv->base + REG_USBPCR_OFFSET); -} - -static void jz4780_usb_phy_init(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - u32 reg; - - reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_USB_SEL | - USBPCR1_WORD_IF_16BIT; - writel(reg, priv->base + REG_USBPCR1_OFFSET); - - reg = USBPCR_TXPREEMPHTUNE | USBPCR_COMMONONN | USBPCR_POR; - writel(reg, priv->base + REG_USBPCR_OFFSET); -} - -static void x1000_usb_phy_init(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - u32 reg; - - reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_WORD_IF_16BIT; - writel(reg, priv->base + REG_USBPCR1_OFFSET); - - reg = USBPCR_SQRXTUNE_DCR_20PCT | USBPCR_TXPREEMPHTUNE | - USBPCR_TXHSXVTUNE_DCR_15MV | USBPCR_TXVREFTUNE_INC_25PPT | - USBPCR_COMMONONN | USBPCR_POR; - writel(reg, priv->base + REG_USBPCR_OFFSET); -} - -static void x1830_usb_phy_init(struct usb_phy *phy) -{ - struct jz4770_phy *priv = phy_to_jz4770_phy(phy); - u32 reg; - - /* rdt */ - writel(USBRDT_VBFIL_EN | USBRDT_UTMI_RST, priv->base + REG_USBRDT_OFFSET); - - reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_WORD_IF_16BIT | - USBPCR1_DMPD | USBPCR1_DPPD; - writel(reg, priv->base + REG_USBPCR1_OFFSET); - - reg = USBPCR_IDPULLUP_OTG | USBPCR_VBUSVLDEXT | USBPCR_TXPREEMPHTUNE | - USBPCR_COMMONONN | USBPCR_POR; - writel(reg, priv->base + REG_USBPCR_OFFSET); -} - -static const struct ingenic_soc_info jz4770_soc_info = { - .version = ID_JZ4770, - - .usb_phy_init = jz4770_usb_phy_init, -}; - -static const struct ingenic_soc_info jz4780_soc_info = { - .version = ID_JZ4780, - - .usb_phy_init = jz4780_usb_phy_init, -}; - -static const struct ingenic_soc_info x1000_soc_info = { - .version = ID_X1000, - - .usb_phy_init = x1000_usb_phy_init, -}; - -static const struct ingenic_soc_info x1830_soc_info = { - .version = ID_X1830, - - .usb_phy_init = x1830_usb_phy_init, -}; - -static const struct of_device_id ingenic_usb_phy_of_matches[] = { - { .compatible = "ingenic,jz4770-phy", .data = &jz4770_soc_info }, - { .compatible = "ingenic,jz4780-phy", .data = &jz4780_soc_info }, - { .compatible = "ingenic,x1000-phy", .data = &x1000_soc_info }, - { .compatible = "ingenic,x1830-phy", .data = &x1830_soc_info }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, ingenic_usb_phy_of_matches); - -static int jz4770_phy_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct jz4770_phy *priv; - int err; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->soc_info = device_get_match_data(&pdev->dev); - if (!priv->soc_info) { - dev_err(&pdev->dev, "Error: No device match found\n"); - return -ENODEV; - } - - platform_set_drvdata(pdev, priv); - priv->dev = dev; - priv->phy.dev = dev; - priv->phy.otg = &priv->otg; - priv->phy.label = "ingenic-usb-phy"; - priv->phy.init = ingenic_usb_phy_init; - priv->phy.shutdown = ingenic_usb_phy_shutdown; - - priv->otg.state = OTG_STATE_UNDEFINED; - priv->otg.usb_phy = &priv->phy; - priv->otg.set_host = ingenic_usb_phy_set_host; - priv->otg.set_peripheral = ingenic_usb_phy_set_peripheral; - - priv->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(priv->base)) { - dev_err(dev, "Failed to map registers\n"); - return PTR_ERR(priv->base); - } - - priv->clk = devm_clk_get(dev, NULL); - if (IS_ERR(priv->clk)) - return dev_err_probe(dev, PTR_ERR(priv->clk), - "Failed to get clock\n"); - - priv->vcc_supply = devm_regulator_get(dev, "vcc"); - if (IS_ERR(priv->vcc_supply)) - return dev_err_probe(dev, PTR_ERR(priv->vcc_supply), - "Failed to get regulator\n"); - - err = usb_add_phy(&priv->phy, USB_PHY_TYPE_USB2); - if (err) - return dev_err_probe(dev, err, "Unable to register PHY\n"); - - return devm_add_action_or_reset(dev, ingenic_usb_phy_remove, &priv->phy); -} - -static struct platform_driver ingenic_phy_driver = { - .probe = jz4770_phy_probe, - .driver = { - .name = "jz4770-phy", - .of_match_table = ingenic_usb_phy_of_matches, - }, -}; -module_platform_driver(ingenic_phy_driver); - -MODULE_AUTHOR("周琰杰 (Zhou Yanjie) "); -MODULE_AUTHOR("漆鹏振 (Qi Pengzhen) "); -MODULE_AUTHOR("Paul Cercueil "); -MODULE_DESCRIPTION("Ingenic SoCs USB PHY driver"); -MODULE_LICENSE("GPL"); From 321b59870f850a10dbb211ecd2bd87b41497ea6f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 4 Nov 2022 14:10:30 +0100 Subject: [PATCH 1138/4122] usb: gadget: u_ether: Do not make UDC parent of the net device The UDC is not a suitable parent of the net device as the UDC can change or vanish during the lifecycle of the ethernet gadget. This can be illustrated with the following: mkdir -p /sys/kernel/config/usb_gadget/mygadget cd /sys/kernel/config/usb_gadget/mygadget mkdir -p configs/c.1/strings/0x409 echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration mkdir -p functions/ecm.usb0 ln -s functions/ecm.usb0 configs/c.1/ echo "dummy_udc.0" > UDC rmmod dummy_hcd The 'rmmod' removes the UDC from the just created gadget, leaving the still existing net device with a no longer existing parent. Accessing the ethernet device with commands like: ip --details link show usb0 will result in a KASAN splat: ================================================================== BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528 Read of size 4 at addr c5c84754 by task ip/357 CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-dirty #324 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from print_report+0x134/0x4d4 print_report from kasan_report+0x78/0x10c kasan_report from if_nlmsg_size+0x3e8/0x528 if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0 rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674 rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8 netlink_rcv_skb from netlink_unicast+0x294/0x478 netlink_unicast from netlink_sendmsg+0x328/0x640 netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4 ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c ___sys_sendmsg from sys_sendmsg+0xa0/0x120 sys_sendmsg from ret_fast_syscall+0x0/0x1c Solve this by not setting the parent of the ethernet device. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221104131031.850850-2-s.hauer@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index e06022873df1..8f12f3f8f6ee 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; - SET_NETDEV_DEV(net, &g->dev); SET_NETDEV_DEVTYPE(net, &gadget_type); status = register_netdev(net); @@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device *net) struct usb_gadget *g; int status; - if (!net->dev.parent) - return -EINVAL; dev = netdev_priv(net); g = dev->gadget; @@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g) dev = netdev_priv(net); dev->gadget = g; - SET_NETDEV_DEV(net, &g->dev); } EXPORT_SYMBOL_GPL(gether_set_gadget); From d65e6b6e884a38360fc1cadf8ff31858151da57f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 4 Nov 2022 14:10:31 +0100 Subject: [PATCH 1139/4122] usb: gadget: f_ecm: Always set current gadget in ecm_bind() The gadget may change over bind/unbind cycles, so set it each time during bind, not only the first time. Without it we get a use-after-free with the following example: cd /sys/kernel/config/usb_gadget/; mkdir -p mygadget; cd mygadget mkdir -p configs/c.1/strings/0x409 echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration mkdir -p functions/ecm.usb0 ln -s functions/ecm.usb0 configs/c.1/ rmmod dummy_hcd modprobe dummy_hcd KASAN will complain shortly after the 'modprobe': usb 2-1: New USB device found, idVendor=0000, idProduct=0000, bcdDevice= 6.01 usb 2-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 ================================================================== BUG: KASAN: use-after-free in gether_connect+0xb8/0x30c Read of size 4 at addr cbef170c by task swapper/3/0 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.1.0-rc3-00014-g41ff012f50cb-dirty #322 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from print_report+0x134/0x4d4 print_report from kasan_report+0x78/0x10c kasan_report from gether_connect+0xb8/0x30c gether_connect from ecm_set_alt+0x124/0x254 ecm_set_alt from composite_setup+0xb98/0x2b18 composite_setup from configfs_composite_setup+0x80/0x98 configfs_composite_setup from dummy_timer+0x8f0/0x14a0 [dummy_hcd] ... Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221104131031.850850-3-s.hauer@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ecm.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index ffe2486fce71..a7ab30e603e2 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -685,7 +685,7 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f) struct usb_composite_dev *cdev = c->cdev; struct f_ecm *ecm = func_to_ecm(f); struct usb_string *us; - int status; + int status = 0; struct usb_ep *ep; struct f_ecm_opts *ecm_opts; @@ -695,23 +695,19 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f) ecm_opts = container_of(f->fi, struct f_ecm_opts, func_inst); - /* - * in drivers/usb/gadget/configfs.c:configfs_composite_bind() - * configurations are bound in sequence with list_for_each_entry, - * in each configuration its functions are bound in sequence - * with list_for_each_entry, so we assume no race condition - * with regard to ecm_opts->bound access - */ + mutex_lock(&ecm_opts->lock); + + gether_set_gadget(ecm_opts->net, cdev->gadget); + if (!ecm_opts->bound) { - mutex_lock(&ecm_opts->lock); - gether_set_gadget(ecm_opts->net, cdev->gadget); status = gether_register_netdev(ecm_opts->net); - mutex_unlock(&ecm_opts->lock); - if (status) - return status; ecm_opts->bound = true; } + mutex_unlock(&ecm_opts->lock); + if (status) + return status; + ecm_string_defs[1].s = ecm->ethaddr; us = usb_gstrings_attach(cdev, ecm_strings, From 2e2b4b896159f9d47d063ccf4ed0a7af9a40f1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 13:55:03 +0300 Subject: [PATCH 1140/4122] tty: Convert tty_buffer flags to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct tty_buffer has flags which is only used for storing TTYB_NORMAL. There is also a few quite confusing operations for checking the presense of TTYB_NORMAL. Simplify things by converting flags to bool. Despite the name remaining the same, the meaning of "flags" is altered slightly by this change. Previously it referred to flags of the buffer (only TTYB_NORMAL being used as a flag). After this change, flags tell whether the buffer contains/should be allocated with flags array along with character data array. It is much more suitable name that TTYB_NORMAL was for this purpose, thus the name remains. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019105504.16800-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 28 ++++++++++++++-------------- include/linux/tty_buffer.h | 5 +---- include/linux/tty_flip.h | 4 ++-- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 3f057805560f..2df86ed90574 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -107,7 +107,7 @@ static void tty_buffer_reset(struct tty_buffer *p, size_t size) p->commit = 0; p->lookahead = 0; p->read = 0; - p->flags = 0; + p->flags = true; } /** @@ -249,7 +249,7 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld) * __tty_buffer_request_room - grow tty buffer if needed * @port: tty port * @size: size desired - * @flags: buffer flags if new buffer allocated (default = 0) + * @flags: buffer has to store flags along character data * * Make at least @size bytes of linear space available for the tty buffer. * @@ -260,19 +260,19 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld) * Returns: the size we managed to find. */ static int __tty_buffer_request_room(struct tty_port *port, size_t size, - int flags) + bool flags) { struct tty_bufhead *buf = &port->buf; struct tty_buffer *b, *n; int left, change; b = buf->tail; - if (b->flags & TTYB_NORMAL) + if (!b->flags) left = 2 * b->size - b->used; else left = b->size - b->used; - change = (b->flags & TTYB_NORMAL) && (~flags & TTYB_NORMAL); + change = !b->flags && flags; if (change || left < size) { /* This is the slow path - looking for new buffers to use */ n = tty_buffer_alloc(port, size); @@ -300,7 +300,7 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size, int tty_buffer_request_room(struct tty_port *port, size_t size) { - return __tty_buffer_request_room(port, size, 0); + return __tty_buffer_request_room(port, size, true); } EXPORT_SYMBOL_GPL(tty_buffer_request_room); @@ -320,17 +320,17 @@ int tty_insert_flip_string_fixed_flag(struct tty_port *port, const unsigned char *chars, char flag, size_t size) { int copied = 0; + bool flags = flag != TTY_NORMAL; do { int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE); - int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; int space = __tty_buffer_request_room(port, goal, flags); struct tty_buffer *tb = port->buf.tail; if (unlikely(space == 0)) break; memcpy(char_buf_ptr(tb, tb->used), chars, space); - if (~tb->flags & TTYB_NORMAL) + if (tb->flags) memset(flag_buf_ptr(tb, tb->used), flag, space); tb->used += space; copied += space; @@ -393,13 +393,13 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) { struct tty_buffer *tb; - int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; + bool flags = flag != TTY_NORMAL; if (!__tty_buffer_request_room(port, 1, flags)) return 0; tb = port->buf.tail; - if (~tb->flags & TTYB_NORMAL) + if (tb->flags) *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; @@ -424,13 +424,13 @@ EXPORT_SYMBOL(__tty_insert_flip_char); int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size) { - int space = __tty_buffer_request_room(port, size, TTYB_NORMAL); + int space = __tty_buffer_request_room(port, size, false); if (likely(space)) { struct tty_buffer *tb = port->buf.tail; *chars = char_buf_ptr(tb, tb->used); - if (~tb->flags & TTYB_NORMAL) + if (tb->flags) memset(flag_buf_ptr(tb, tb->used), TTY_NORMAL, space); tb->used += space; } @@ -492,7 +492,7 @@ static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head) unsigned char *p, *f = NULL; p = char_buf_ptr(head, head->lookahead); - if (~head->flags & TTYB_NORMAL) + if (head->flags) f = flag_buf_ptr(head, head->lookahead); port->client_ops->lookahead_buf(port, p, f, count); @@ -509,7 +509,7 @@ receive_buf(struct tty_port *port, struct tty_buffer *head, int count) const char *f = NULL; int n; - if (~head->flags & TTYB_NORMAL) + if (head->flags) f = flag_buf_ptr(head, head->read); n = port->client_ops->receive_buf(port, p, f, count); diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h index 1796648c2907..6ceb2789e6c8 100644 --- a/include/linux/tty_buffer.h +++ b/include/linux/tty_buffer.h @@ -17,14 +17,11 @@ struct tty_buffer { int commit; int lookahead; /* Lazy update on recv, can become less than "read" */ int read; - int flags; + bool flags; /* Data points here */ unsigned long data[]; }; -/* Values for .flags field of tty_buffer */ -#define TTYB_NORMAL 1 /* buffer has no flags buffer */ - static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) { return ((unsigned char *)b->data) + ofs; diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 483d41cbcbb7..bfaaeee61a05 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -25,9 +25,9 @@ static inline int tty_insert_flip_char(struct tty_port *port, struct tty_buffer *tb = port->buf.tail; int change; - change = (tb->flags & TTYB_NORMAL) && (flag != TTY_NORMAL); + change = !tb->flags && (flag != TTY_NORMAL); if (!change && tb->used < tb->size) { - if (~tb->flags & TTYB_NORMAL) + if (tb->flags) *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; return 1; From c6d30576bd6ce33095d39fe66a51ea821e953ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 4 Nov 2022 11:37:19 +0100 Subject: [PATCH 1141/4122] serial: Fix a typo ("ignorning") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the two instances of this typo present in the MSM and VT8500 serial drivers. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Mukesh Ojha Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221104103719.2234098-1-j.neuschaefer@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 2 +- drivers/tty/serial/vt8500_serial.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 2b2e0f74b75a..843798e63084 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -816,7 +816,7 @@ static void msm_handle_rx(struct uart_port *port) port->icount.rx++; } - /* Mask conditions we're ignorning. */ + /* Mask conditions we're ignoring. */ sr &= port->read_status_mask; if (sr & MSM_UART_SR_RX_BREAK) diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c index deedb6513160..cc9157df732f 100644 --- a/drivers/tty/serial/vt8500_serial.c +++ b/drivers/tty/serial/vt8500_serial.c @@ -168,7 +168,7 @@ static void handle_rx(struct uart_port *port) c = readw(port->membase + VT8500_RXFIFO) & 0x3ff; - /* Mask conditions we're ignorning. */ + /* Mask conditions we're ignoring. */ c &= ~port->read_status_mask; if (c & FER) { From 56dc5074cbec02a6922c4bbce11de9827640bb4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 7 Nov 2022 12:21:26 +0200 Subject: [PATCH 1142/4122] serial: 8250_dma: Rearm DMA Rx if more data is pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When DMA Rx completes, the current behavior is to just exit the DMA completion handler without future actions. If the transfer is still on-going, UART will trigger an interrupt and that eventually rearms the DMA Rx. The extra interrupt round-trip has an inherent latency cost that increases the risk of FIFO overrun. In such situations, the latency margin tends to already be less due to FIFO not being empty. Add check into DMA Rx completion handler to detect if LSR has DR (Data Ready) still set. DR indicates there will be more characters pending and DMA Rx can be rearmed right away to handle them. Cc: Gilles BULOZ Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221107102126.56481-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dma.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index b85c82616e8c..37d6af2ec427 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -38,9 +38,8 @@ static void __dma_tx_complete(void *param) spin_unlock_irqrestore(&p->port.lock, flags); } -static void __dma_rx_complete(void *param) +static void __dma_rx_complete(struct uart_8250_port *p) { - struct uart_8250_port *p = param; struct uart_8250_dma *dma = p->dma; struct tty_port *tty_port = &p->port.state->port; struct dma_tx_state state; @@ -57,6 +56,20 @@ static void __dma_rx_complete(void *param) tty_flip_buffer_push(tty_port); } +static void dma_rx_complete(void *param) +{ + struct uart_8250_port *p = param; + struct uart_8250_dma *dma = p->dma; + unsigned long flags; + + __dma_rx_complete(p); + + spin_lock_irqsave(&p->port.lock, flags); + if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR)) + p->dma->rx_dma(p); + spin_unlock_irqrestore(&p->port.lock, flags); +} + int serial8250_tx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; @@ -130,7 +143,7 @@ int serial8250_rx_dma(struct uart_8250_port *p) return -EBUSY; dma->rx_running = 1; - desc->callback = __dma_rx_complete; + desc->callback = dma_rx_complete; desc->callback_param = p; dma->rx_cookie = dmaengine_submit(desc); From 6a3ff858915fa8ca36c7eb02c87c9181ae2fc333 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 9 Nov 2022 07:21:10 +0000 Subject: [PATCH 1143/4122] serial: 8250_bcm7271: Fix error handling in brcmuart_init() A problem about 8250_bcm7271 create debugfs failed is triggered with the following log given: [ 324.516635] debugfs: Directory 'bcm7271-uart' with parent '/' already present! The reason is that brcmuart_init() returns platform_driver_register() directly without checking its return value, if platform_driver_register() failed, it returns without destroy the newly created debugfs, resulting the debugfs of 8250_bcm7271 can never be created later. brcmuart_init() debugfs_create_dir() # create debugfs directory platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when platform_driver_register() returns error. Fixes: 41a469482de2 ("serial: 8250: Add new 8250-core based Broadcom STB driver") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221109072110.117291-2-yuancan@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_bcm7271.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index 062177b64d21..ed5a94747692 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -1210,9 +1210,17 @@ static struct platform_driver brcmuart_platform_driver = { static int __init brcmuart_init(void) { + int ret; + brcmuart_debugfs_root = debugfs_create_dir( brcmuart_platform_driver.driver.name, NULL); - return platform_driver_register(&brcmuart_platform_driver); + ret = platform_driver_register(&brcmuart_platform_driver); + if (ret) { + debugfs_remove_recursive(brcmuart_debugfs_root); + return ret; + } + + return 0; } module_init(brcmuart_init); From d85bf5105853d57ed27c6c21ac35424cc44a0fbb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 9 Nov 2022 08:04:34 +0100 Subject: [PATCH 1144/4122] serial: 8250: 8250_omap: Fix calculation of RS485 delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 801954d1210a ("serial: 8250: 8250_omap: Support native RS485") calculates RS485 delays from the baudrate. The baudrate is generated with either a 16x or 13x divisor. The divisor is set in the Mode Definition Register 1 (MDR1). The commit erroneously assumes that the register stores the divisor as a bitmask and uses a logical AND to differentiate between 16x and 13x divisors. However the divisor is really stored as a 3-bit mode (see lines 363ff in include/uapi/linux/serial_reg.h). The logical AND operation is performed with UART_OMAP_MDR1_16X_MODE, which is defined as 0x0 and hence yields false. So the commit always assumes a 13x divisor. Fix by using an equal comparison. This works because we never set any of the other 5 bits in the register. (They pertain to IrDA mode, which is not supported by the driver). Fixes: 801954d1210a ("serial: 8250: 8250_omap: Support native RS485") Link: https://lore.kernel.org/linux-serial/202211070440.8hWunFUN-lkp@intel.com/ Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Lukas Wunner Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/7d5b04da13d89b8708b9543a0b125f2b6062a77b.1667977259.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1c8a48fdc8f2..7bb9da7558a1 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -822,7 +822,7 @@ static int omap8250_rs485_config(struct uart_port *port, * of the AM65 TRM: https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf */ if (priv->quot) { - if (priv->mdr1 & UART_OMAP_MDR1_16X_MODE) + if (priv->mdr1 == UART_OMAP_MDR1_16X_MODE) baud = port->uartclk / (16 * priv->quot); else baud = port->uartclk / (13 * priv->quot); From 7a1212475f5e313c11300272f34ffa32462bbeef Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 3 Nov 2022 10:17:41 +0100 Subject: [PATCH 1145/4122] tty: n_gsm: introduce macro for minimal unit size n_gsm has a minimal protocol overhead of 7 bytes. The current code already checks whether the configured MRU/MTU size is at least one byte more than this. Introduce the macro MIN_MTU to make this value more obvious. Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20221103091743.2119-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 5e516f5cac5a..570c40a3d78f 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -89,6 +89,7 @@ module_param(debug, int, 0600); */ #define MAX_MRU 1500 #define MAX_MTU 1500 +#define MIN_MTU (PROT_OVERHEAD + 1) /* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ #define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) @@ -2712,7 +2713,9 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) if ((c->adaption != 1 && c->adaption != 2) || c->k) return -EOPNOTSUPP; /* Check the MRU/MTU range looks sane */ - if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8) + if (c->mru < MIN_MTU || c->mtu < MIN_MTU) + return -EINVAL; + if (c->mru > MAX_MRU || c->mtu > MAX_MTU) return -EINVAL; if (c->n2 > 255) return -EINVAL; @@ -3296,7 +3299,7 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc) return -ENOMEM; } net->mtu = dlci->gsm->mtu; - net->min_mtu = 8; + net->min_mtu = MIN_MTU; net->max_mtu = dlci->gsm->mtu; mux_net = netdev_priv(net); mux_net->dlci = dlci; From 2ec7a802a04c545ceea96bf67ee818d8bb0349e2 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 3 Nov 2022 10:17:42 +0100 Subject: [PATCH 1146/4122] tty: n_gsm: add parameters used with parameter negotiation n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.6.3.1 describes the encoding of the parameter negotiation messages. Add the parameters used there to 'gsm_mux' and 'gsm_dlci' and initialize both according to the value ranges and recommended defaults defined in chapter 5.7. Replace the use of the DLC default values from the 'gsm_mux' fields with the DLC specific values from the 'gsm_dlci' fields where applicable. Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20221103091743.2119-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 58 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 570c40a3d78f..c217013b3e16 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -75,7 +76,12 @@ module_param(debug, int, 0600); #define T1 10 /* 100mS */ #define T2 34 /* 333mS */ +#define T3 10 /* 10s */ #define N2 3 /* Retry 3 times */ +#define K 2 /* outstanding I frames */ + +#define MAX_T3 255 /* In seconds. */ +#define MAX_WINDOW_SIZE 7 /* Limit of K in error recovery mode. */ /* Use long timers for testing at low speed with debug on */ #ifdef DEBUG_TIMING @@ -160,7 +166,12 @@ struct gsm_dlci { int prev_adaption; u32 modem_rx; /* Our incoming virtual modem lines */ u32 modem_tx; /* Our outgoing modem lines */ + unsigned int mtu; bool dead; /* Refuse re-open */ + /* Configuration */ + u8 prio; /* Priority */ + u8 ftype; /* Frame type */ + u8 k; /* Window size */ /* Flow control */ bool throttled; /* Private copy of throttle state */ bool constipated; /* Throttle status for outgoing */ @@ -283,7 +294,9 @@ struct gsm_mux { int adaption; /* 1 or 2 supported */ u8 ftype; /* UI or UIH */ int t1, t2; /* Timers in 1/100th of a sec */ + unsigned int t3; /* Power wake-up timer in seconds. */ int n2; /* Retry count */ + u8 k; /* Window size */ /* Statistics (not currently exposed) */ unsigned long bad_fcs; @@ -1075,12 +1088,12 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) return 0; /* MTU/MRU count only the data bits but watch adaption mode */ - if ((len + h) > gsm->mtu) - len = gsm->mtu - h; + if ((len + h) > dlci->mtu) + len = dlci->mtu - h; size = len + h; - msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) return -ENOMEM; dp = msg->data; @@ -1144,19 +1157,19 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, len = dlci->skb->len + overhead; /* MTU/MRU count only the data bits */ - if (len > gsm->mtu) { + if (len > dlci->mtu) { if (dlci->adaption == 3) { /* Over long frame, bin it */ dev_kfree_skb_any(dlci->skb); dlci->skb = NULL; return 0; } - len = gsm->mtu; + len = dlci->mtu; } else last = 1; size = len + overhead; - msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (msg == NULL) { skb_queue_tail(&dlci->skb_list, dlci->skb); dlci->skb = NULL; @@ -1213,7 +1226,7 @@ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, return -EINVAL; } - msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) { pr_err("%s: gsm_data_alloc error", __func__); return -ENOMEM; @@ -1338,8 +1351,9 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) static int gsm_control_command(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { - struct gsm_msg *msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype); + struct gsm_msg *msg; + msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->dlci[0]->ftype); if (msg == NULL) return -ENOMEM; @@ -1365,7 +1379,8 @@ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { struct gsm_msg *msg; - msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype); + + msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->dlci[0]->ftype); if (msg == NULL) return; msg->data[0] = (cmd & 0xFE) << 1 | EA; /* Clear C/R */ @@ -2075,6 +2090,13 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) dlci->gsm = gsm; dlci->addr = addr; dlci->adaption = gsm->adaption; + dlci->mtu = gsm->mtu; + if (addr == 0) + dlci->prio = 0; + else + dlci->prio = roundup(addr + 1, 8) - 1; + dlci->ftype = gsm->ftype; + dlci->k = gsm->k; dlci->state = DLCI_CLOSED; if (addr) { dlci->data = gsm_dlci_data; @@ -2650,7 +2672,9 @@ static struct gsm_mux *gsm_alloc_mux(void) gsm->t1 = T1; gsm->t2 = T2; + gsm->t3 = T3; gsm->n2 = N2; + gsm->k = K; gsm->ftype = UIH; gsm->adaption = 1; gsm->encoding = GSM_ADV_OPT; @@ -2691,7 +2715,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm, c->initiator = gsm->initiator; c->t1 = gsm->t1; c->t2 = gsm->t2; - c->t3 = 0; /* Not supported */ + c->t3 = gsm->t3; c->n2 = gsm->n2; if (gsm->ftype == UIH) c->i = 1; @@ -2700,7 +2724,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm, pr_debug("Ftype %d i %d\n", gsm->ftype, c->i); c->mru = gsm->mru; c->mtu = gsm->mtu; - c->k = 0; + c->k = gsm->k; } static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) @@ -2717,12 +2741,16 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) return -EINVAL; if (c->mru > MAX_MRU || c->mtu > MAX_MTU) return -EINVAL; + if (c->t3 > MAX_T3) + return -EINVAL; if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; if (c->initiator > 1) return -EINVAL; + if (c->k > MAX_WINDOW_SIZE) + return -EINVAL; if (c->i == 0 || c->i > 2) /* UIH and UI only */ return -EINVAL; /* @@ -2770,6 +2798,10 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) gsm->t1 = c->t1; if (c->t2) gsm->t2 = c->t2; + if (c->t3) + gsm->t3 = c->t3; + if (c->k) + gsm->k = c->k; /* * FIXME: We need to separate activation/deactivation from adding @@ -3298,9 +3330,9 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc) pr_err("alloc_netdev failed\n"); return -ENOMEM; } - net->mtu = dlci->gsm->mtu; + net->mtu = dlci->mtu; net->min_mtu = MIN_MTU; - net->max_mtu = dlci->gsm->mtu; + net->max_mtu = dlci->mtu; mux_net = netdev_priv(net); mux_net->dlci = dlci; kref_init(&mux_net->ref); From 92f1f0c3290d994d753dde588698daf1ef91504b Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 3 Nov 2022 10:17:43 +0100 Subject: [PATCH 1147/4122] tty: n_gsm: add parameter negotiation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.1.8.1.1 describes the parameter negotiation messages and parameters. Chapter 5.4.1 states that the default parameters are to be used if no negotiation is performed. Chapter 5.4.6.3.1 describes the encoding of the parameter negotiation message. The meaning of the parameters and allowed value ranges can be found in chapter 5.7. Add parameter negotiation support accordingly. DLCI specific parameter configuration by the user requires additional ioctls. This is subject to another patch. Signed-off-by: Daniel Starke Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221103091743.2119-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 335 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 327 insertions(+), 8 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index c217013b3e16..cde1e846c81e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ struct gsm_msg { enum gsm_dlci_state { DLCI_CLOSED, + DLCI_CONFIGURE, /* Sending PN (for adaption > 1) */ DLCI_OPENING, /* Sending SABM not seen UA */ DLCI_OPEN, /* SABM/UA complete */ DLCI_CLOSING, /* Sending DISC not seen UA/DM */ @@ -184,6 +186,32 @@ struct gsm_dlci { struct net_device *net; /* network interface, if created */ }; +/* + * Parameter bits used for parameter negotiation according to 3GPP 27.010 + * chapter 5.4.6.3.1. + */ + +struct gsm_dlci_param_bits { + u8 d_bits; + u8 i_cl_bits; + u8 p_bits; + u8 t_bits; + __le16 n_bits; + u8 na_bits; + u8 k_bits; +}; + +static_assert(sizeof(struct gsm_dlci_param_bits) == 8); + +#define PN_D_FIELD_DLCI GENMASK(5, 0) +#define PN_I_CL_FIELD_FTYPE GENMASK(3, 0) +#define PN_I_CL_FIELD_ADAPTION GENMASK(7, 4) +#define PN_P_FIELD_PRIO GENMASK(5, 0) +#define PN_T_FIELD_T1 GENMASK(7, 0) +#define PN_N_FIELD_N1 GENMASK(15, 0) +#define PN_NA_FIELD_N2 GENMASK(7, 0) +#define PN_K_FIELD_K GENMASK(2, 0) + /* Total number of supported devices */ #define GSM_TTY_MINORS 256 @@ -411,6 +439,7 @@ static const u8 gsm_fcs8[256] = { #define INIT_FCS 0xFF #define GOOD_FCS 0xCF +static void gsm_dlci_close(struct gsm_dlci *dlci); static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, @@ -533,6 +562,57 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data, kfree(prefix); } +/** + * gsm_encode_params - encode DLCI parameters + * @dlci: DLCI to encode from + * @params: buffer to fill with the encoded parameters + * + * Encodes the parameters according to GSM 07.10 section 5.4.6.3.1 + * table 3. + */ +static int gsm_encode_params(const struct gsm_dlci *dlci, + struct gsm_dlci_param_bits *params) +{ + const struct gsm_mux *gsm = dlci->gsm; + unsigned int i, cl; + + switch (dlci->ftype) { + case UIH: + i = 0; /* UIH */ + break; + case UI: + i = 1; /* UI */ + break; + default: + pr_debug("unsupported frame type %d\n", dlci->ftype); + return -EINVAL; + } + + switch (dlci->adaption) { + case 1: /* Unstructured */ + cl = 0; /* convergence layer type 1 */ + break; + case 2: /* Unstructured with modem bits. */ + cl = 1; /* convergence layer type 2 */ + break; + default: + pr_debug("unsupported adaption %d\n", dlci->adaption); + return -EINVAL; + } + + params->d_bits = FIELD_PREP(PN_D_FIELD_DLCI, dlci->addr); + /* UIH, convergence layer type 1 */ + params->i_cl_bits = FIELD_PREP(PN_I_CL_FIELD_FTYPE, i) | + FIELD_PREP(PN_I_CL_FIELD_ADAPTION, cl); + params->p_bits = FIELD_PREP(PN_P_FIELD_PRIO, dlci->prio); + params->t_bits = FIELD_PREP(PN_T_FIELD_T1, gsm->t1); + params->n_bits = cpu_to_le16(FIELD_PREP(PN_N_FIELD_N1, dlci->mtu)); + params->na_bits = FIELD_PREP(PN_NA_FIELD_N2, gsm->n2); + params->k_bits = FIELD_PREP(PN_K_FIELD_K, dlci->k); + + return 0; +} + /** * gsm_register_devices - register all tty devices for a given mux index * @@ -1450,6 +1530,116 @@ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci, dlci->modem_rx = mlines; } +/** + * gsm_process_negotiation - process received parameters + * @gsm: GSM channel + * @addr: DLCI address + * @cr: command/response + * @params: encoded parameters from the parameter negotiation message + * + * Used when the response for our parameter negotiation command was + * received. + */ +static int gsm_process_negotiation(struct gsm_mux *gsm, unsigned int addr, + unsigned int cr, + const struct gsm_dlci_param_bits *params) +{ + struct gsm_dlci *dlci = gsm->dlci[addr]; + unsigned int ftype, i, adaption, prio, n1, k; + + i = FIELD_GET(PN_I_CL_FIELD_FTYPE, params->i_cl_bits); + adaption = FIELD_GET(PN_I_CL_FIELD_ADAPTION, params->i_cl_bits) + 1; + prio = FIELD_GET(PN_P_FIELD_PRIO, params->p_bits); + n1 = FIELD_GET(PN_N_FIELD_N1, get_unaligned_le16(¶ms->n_bits)); + k = FIELD_GET(PN_K_FIELD_K, params->k_bits); + + if (n1 < MIN_MTU) { + if (debug & DBG_ERRORS) + pr_info("%s N1 out of range in PN\n", __func__); + return -EINVAL; + } + + switch (i) { + case 0x00: + ftype = UIH; + break; + case 0x01: + ftype = UI; + break; + case 0x02: /* I frames are not supported */ + if (debug & DBG_ERRORS) + pr_info("%s unsupported I frame request in PN\n", + __func__); + return -EINVAL; + default: + if (debug & DBG_ERRORS) + pr_info("%s i out of range in PN\n", __func__); + return -EINVAL; + } + + if (!cr && gsm->initiator) { + if (adaption != dlci->adaption) { + if (debug & DBG_ERRORS) + pr_info("%s invalid adaption %d in PN\n", + __func__, adaption); + return -EINVAL; + } + if (prio != dlci->prio) { + if (debug & DBG_ERRORS) + pr_info("%s invalid priority %d in PN", + __func__, prio); + return -EINVAL; + } + if (n1 > gsm->mru || n1 > dlci->mtu) { + /* We requested a frame size but the other party wants + * to send larger frames. The standard allows only a + * smaller response value than requested (5.4.6.3.1). + */ + if (debug & DBG_ERRORS) + pr_info("%s invalid N1 %d in PN\n", __func__, + n1); + return -EINVAL; + } + dlci->mtu = n1; + if (ftype != dlci->ftype) { + if (debug & DBG_ERRORS) + pr_info("%s invalid i %d in PN\n", __func__, i); + return -EINVAL; + } + if (ftype != UI && ftype != UIH && k > dlci->k) { + if (debug & DBG_ERRORS) + pr_info("%s invalid k %d in PN\n", __func__, k); + return -EINVAL; + } + dlci->k = k; + } else if (cr && !gsm->initiator) { + /* Only convergence layer type 1 and 2 are supported. */ + if (adaption != 1 && adaption != 2) { + if (debug & DBG_ERRORS) + pr_info("%s invalid adaption %d in PN\n", + __func__, adaption); + return -EINVAL; + } + dlci->adaption = adaption; + if (n1 > gsm->mru) { + /* Propose a smaller value */ + dlci->mtu = gsm->mru; + } else if (n1 > MAX_MTU) { + /* Propose a smaller value */ + dlci->mtu = MAX_MTU; + } else { + dlci->mtu = n1; + } + dlci->prio = prio; + dlci->ftype = ftype; + dlci->k = k; + } else { + return -EINVAL; + } + + return 0; +} + /** * gsm_control_modem - modem status received * @gsm: GSM channel @@ -1503,6 +1693,65 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) gsm_control_reply(gsm, CMD_MSC, data, clen); } +/** + * gsm_control_negotiation - parameter negotiation received + * @gsm: GSM channel + * @cr: command/response flag + * @data: data following command + * @dlen: data length + * + * We have received a parameter negotiation message. This is used by + * the GSM mux protocol to configure protocol parameters for a new DLCI. + */ +static void gsm_control_negotiation(struct gsm_mux *gsm, unsigned int cr, + const u8 *data, unsigned int dlen) +{ + unsigned int addr; + struct gsm_dlci_param_bits pn_reply; + struct gsm_dlci *dlci; + struct gsm_dlci_param_bits *params; + + if (dlen < sizeof(struct gsm_dlci_param_bits)) + return; + + /* Invalid DLCI? */ + params = (struct gsm_dlci_param_bits *)data; + addr = FIELD_GET(PN_D_FIELD_DLCI, params->d_bits); + if (addr == 0 || addr >= NUM_DLCI || !gsm->dlci[addr]) + return; + dlci = gsm->dlci[addr]; + + /* Too late for parameter negotiation? */ + if ((!cr && dlci->state == DLCI_OPENING) || dlci->state == DLCI_OPEN) + return; + + /* Process the received parameters */ + if (gsm_process_negotiation(gsm, addr, cr, params) != 0) { + /* Negotiation failed. Close the link. */ + if (debug & DBG_ERRORS) + pr_info("%s PN failed\n", __func__); + gsm_dlci_close(dlci); + return; + } + + if (cr) { + /* Reply command with accepted parameters. */ + if (gsm_encode_params(dlci, &pn_reply) == 0) + gsm_control_reply(gsm, CMD_PN, (const u8 *)&pn_reply, + sizeof(pn_reply)); + else if (debug & DBG_ERRORS) + pr_info("%s PN invalid\n", __func__); + } else if (dlci->state == DLCI_CONFIGURE) { + /* Proceed with link setup by sending SABM before UA */ + dlci->state = DLCI_OPENING; + gsm_command(gsm, dlci->addr, SABM|PF); + mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); + } else { + if (debug & DBG_ERRORS) + pr_info("%s PN in invalid state\n", __func__); + } +} + /** * gsm_control_rls - remote line status * @gsm: GSM channel @@ -1612,8 +1861,12 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, /* Modem wishes to enter power saving state */ gsm_control_reply(gsm, CMD_PSC, NULL, 0); break; + /* Optional commands */ + case CMD_PN: + /* Modem sends a parameter negotiation command */ + gsm_control_negotiation(gsm, 1, data, clen); + break; /* Optional unsupported commands */ - case CMD_PN: /* Parameter negotiation */ case CMD_RPN: /* Remote port negotiation */ case CMD_SNC: /* Service negotiation command */ default: @@ -1646,8 +1899,8 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; - /* Does the reply match our command */ command |= 1; + /* Does the reply match our command */ if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) { /* Our command was replied to, kill the retry timer */ del_timer(&gsm->t2_timer); @@ -1657,6 +1910,9 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, ctrl->error = -EOPNOTSUPP; ctrl->done = 1; wake_up(&gsm->event); + /* Or did we receive the PN response to our PN command */ + } else if (command == CMD_PN) { + gsm_control_negotiation(gsm, 0, data, clen); } spin_unlock_irqrestore(&gsm->control_lock, flags); } @@ -1834,6 +2090,32 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) wake_up(&dlci->gsm->event); } +/** + * gsm_dlci_negotiate - start parameter negotiation + * @dlci: DLCI to open + * + * Starts the parameter negotiation for the new DLCI. This needs to be done + * before the DLCI initialized the channel via SABM. + */ +static int gsm_dlci_negotiate(struct gsm_dlci *dlci) +{ + struct gsm_mux *gsm = dlci->gsm; + struct gsm_dlci_param_bits params; + int ret; + + ret = gsm_encode_params(dlci, ¶ms); + if (ret != 0) + return ret; + + /* We cannot asynchronous wait for the command response with + * gsm_command() and gsm_control_wait() at this point. + */ + ret = gsm_control_command(gsm, CMD_PN, (const u8 *)¶ms, + sizeof(params)); + + return ret; +} + /** * gsm_dlci_t1 - T1 timer expiry * @t: timer contained in the DLCI that opened @@ -1855,6 +2137,14 @@ static void gsm_dlci_t1(struct timer_list *t) struct gsm_mux *gsm = dlci->gsm; switch (dlci->state) { + case DLCI_CONFIGURE: + if (dlci->retries && gsm_dlci_negotiate(dlci) == 0) { + dlci->retries--; + mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); + } else { + gsm_dlci_begin_close(dlci); /* prevent half open link */ + } + break; case DLCI_OPENING: if (dlci->retries) { dlci->retries--; @@ -1893,17 +2183,46 @@ static void gsm_dlci_t1(struct timer_list *t) * to the modem which should then reply with a UA or ADM, at which point * we will move into open state. Opening is done asynchronously with retry * running off timers and the responses. + * Parameter negotiation is performed before SABM if required. */ static void gsm_dlci_begin_open(struct gsm_dlci *dlci) { - struct gsm_mux *gsm = dlci->gsm; - if (dlci->state == DLCI_OPEN || dlci->state == DLCI_OPENING) + struct gsm_mux *gsm = dlci ? dlci->gsm : NULL; + bool need_pn = false; + + if (!gsm) return; - dlci->retries = gsm->n2; - dlci->state = DLCI_OPENING; - gsm_command(dlci->gsm, dlci->addr, SABM|PF); - mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); + + if (dlci->addr != 0) { + if (gsm->adaption != 1 || gsm->adaption != dlci->adaption) + need_pn = true; + if (dlci->prio != (roundup(dlci->addr + 1, 8) - 1)) + need_pn = true; + if (gsm->ftype != dlci->ftype) + need_pn = true; + } + + switch (dlci->state) { + case DLCI_CLOSED: + case DLCI_CLOSING: + dlci->retries = gsm->n2; + if (!need_pn) { + dlci->state = DLCI_OPENING; + gsm_command(gsm, dlci->addr, SABM|PF); + } else { + /* Configure DLCI before setup */ + dlci->state = DLCI_CONFIGURE; + if (gsm_dlci_negotiate(dlci) != 0) { + gsm_dlci_close(dlci); + return; + } + } + mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); + break; + default: + break; + } } /** From a3be423f0657e603b45998ef2bb9e5d27dc226c3 Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sat, 5 Nov 2022 11:26:56 -0400 Subject: [PATCH 1148/4122] tty: n_gsm: Delete unneeded semicolon Delete the unneeded semicolon after curly braces. Signed-off-by: Shaomin Deng Link: https://lore.kernel.org/r/20221105152656.4638-1-dengshaomin@cdjrlc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index cde1e846c81e..4ef88e6b1870 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1379,7 +1379,7 @@ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) } if (!sent) break; - }; + } return ret; } From a055204b063ade914e6dd6e270d3c2c0453a3cf5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 2 Sep 2022 17:55:25 -0700 Subject: [PATCH 1149/4122] leds: gpio: switch to using devm_fwnode_gpiod_get() devm_fwnode_get_gpiod_from_child() is going away as the name is too unwieldy, let's switch to using the new devm_fwnode_gpiod_get(). Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/leds/leds-gpio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 092eb59a7d32..ce4e79939731 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -151,9 +151,8 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev) * will be updated after LED class device is registered, * Only then the final LED name is known. */ - led.gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL, child, - GPIOD_ASIS, - NULL); + led.gpiod = devm_fwnode_gpiod_get(dev, child, NULL, GPIOD_ASIS, + NULL); if (IS_ERR(led.gpiod)) { fwnode_handle_put(child); return ERR_CAST(led.gpiod); From 17521f263fc0ee2c202d5a762679c0ff0cf24b80 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 2 Sep 2022 17:55:26 -0700 Subject: [PATCH 1150/4122] leds: lgm-sso: switch to using devm_fwnode_gpiod_get() devm_fwnode_get_gpiod_from_child() is going away as the name is too unwieldy, let's switch to using the new devm_fwnode_gpiod_get(). Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/leds/blink/leds-lgm-sso.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c index 6f270c0272fb..35c61311e7fd 100644 --- a/drivers/leds/blink/leds-lgm-sso.c +++ b/drivers/leds/blink/leds-lgm-sso.c @@ -635,9 +635,8 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled) led->priv = priv; desc = &led->desc; - led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL, - fwnode_child, - GPIOD_ASIS, NULL); + led->gpiod = devm_fwnode_gpiod_get(dev, fwnode_child, NULL, + GPIOD_ASIS, NULL); if (IS_ERR(led->gpiod)) { ret = dev_err_probe(dev, PTR_ERR(led->gpiod), "led: get gpio fail!\n"); goto __dt_err; From 2fe8e1dcf937272c5425e69947819894fcf077a6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 2 Sep 2022 17:55:27 -0700 Subject: [PATCH 1151/4122] gpiolib: remove devm_fwnode_get_[index_]gpiod_from_child() Now that there are no more users of these APIs in the kernel we can remove them. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 36460ced060b..45da8f137fe5 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -581,27 +581,6 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, flags, label); } -static inline -struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, - const char *con_id, int index, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label) -{ - return devm_fwnode_gpiod_get_index(dev, child, con_id, index, - flags, label); -} - -static inline -struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, - const char *con_id, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label) -{ - return devm_fwnode_gpiod_get_index(dev, child, con_id, 0, flags, label); -} - #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO) struct device_node; From defbab270d45e32b068e7e73c3567232d745c60f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 27 Sep 2022 14:52:56 -0700 Subject: [PATCH 1152/4122] include/uapi/linux/swab: Fix potentially missing __always_inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining of some byteswap operations") added __always_inline to swab functions and commit 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") added a definition of __always_inline for use in exported headers when the kernel's compiler.h is not available. However, since swab.h does not include stddef.h, if the header soup does not indirectly include it, the definition of __always_inline is missing, resulting in a compilation failure, which was observed compiling the perf tool using exported headers containing this commit: In file included from /usr/include/linux/byteorder/little_endian.h:12:0, from /usr/include/asm/byteorder.h:14, from tools/include/uapi/linux/perf_event.h:20, from perf.h:8, from builtin-bench.c:18: /usr/include/linux/swab.h:160:8: error: unknown type name `__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) Fix this by replacing the inclusion of linux/compiler.h with linux/stddef.h to ensure that we pick up that definition if required, without relying on it's indirect inclusion. compiler.h is then included indirectly, via stddef.h. Fixes: 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") Signed-off-by: Matt Redfearn Signed-off-by: Florian Fainelli Signed-off-by: Arnd Bergmann Tested-by: Nathan Chancellor Reviewed-by: Petr Vaněk Signed-off-by: Arnd Bergmann --- include/uapi/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 0723a9cce747..01717181339e 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_SWAB_H #include -#include +#include #include #include From 8afe82550240640617abfb3d6ba2c7579261e7fa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Nov 2022 15:38:52 +0200 Subject: [PATCH 1153/4122] gpiolib: of: Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode GPIO library is getting rid of of_node, fwnode should be utilized instead. Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode. Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Torokhov Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index be9c34cca322..000020eb78d8 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -1104,9 +1104,11 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { return 0; } int of_gpiochip_add(struct gpio_chip *chip) { + struct device_node *np; int ret; - if (!chip->of_node) + np = to_of_node(chip->fwnode); + if (!np) return 0; if (!chip->of_xlate) { @@ -1123,18 +1125,18 @@ int of_gpiochip_add(struct gpio_chip *chip) if (ret) return ret; - of_node_get(chip->of_node); + fwnode_handle_get(chip->fwnode); ret = of_gpiochip_scan_gpios(chip); if (ret) - of_node_put(chip->of_node); + fwnode_handle_put(chip->fwnode); return ret; } void of_gpiochip_remove(struct gpio_chip *chip) { - of_node_put(chip->of_node); + fwnode_handle_put(chip->fwnode); } void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev) From 27043a7d500c4a3debb899c28bbf492492f64e58 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Nov 2022 15:38:53 +0200 Subject: [PATCH 1154/4122] gpiolib: of: Integrate of_gpiochip_init_valid_mask() into gpiochip_init_valid_mask() In preparation to complete fwnode switch, integrate of_gpiochip_init_valid_mask() into gpiochip_init_valid_mask(). Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Torokhov Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 42 ------------------------------ drivers/gpio/gpiolib-of.h | 5 ---- drivers/gpio/gpiolib.c | 54 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 000020eb78d8..4be3c21aa718 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -112,24 +112,6 @@ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip, return gpiochip_get_desc(chip, ret); } -/** - * of_gpio_need_valid_mask() - figure out if the OF GPIO driver needs - * to set the .valid_mask - * @gc: the target gpio_chip - * - * Return: true if the valid mask needs to be set - */ -bool of_gpio_need_valid_mask(const struct gpio_chip *gc) -{ - int size; - const struct device_node *np = gc->of_node; - - size = of_property_count_u32_elems(np, "gpio-reserved-ranges"); - if (size > 0 && size % 2 == 0) - return true; - return false; -} - /* * Overrides stated polarity of a gpio line and warns when there is a * discrepancy. @@ -989,28 +971,6 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc) } EXPORT_SYMBOL_GPL(of_mm_gpiochip_remove); -static void of_gpiochip_init_valid_mask(struct gpio_chip *chip) -{ - int len, i; - u32 start, count; - struct device_node *np = chip->of_node; - - len = of_property_count_u32_elems(np, "gpio-reserved-ranges"); - if (len < 0 || len % 2 != 0) - return; - - for (i = 0; i < len; i += 2) { - of_property_read_u32_index(np, "gpio-reserved-ranges", - i, &start); - of_property_read_u32_index(np, "gpio-reserved-ranges", - i + 1, &count); - if (start >= chip->ngpio || start + count > chip->ngpio) - continue; - - bitmap_clear(chip->valid_mask, start, count); - } -}; - #ifdef CONFIG_PINCTRL static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { @@ -1119,8 +1079,6 @@ int of_gpiochip_add(struct gpio_chip *chip) if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS) return -EINVAL; - of_gpiochip_init_valid_mask(chip); - ret = of_gpiochip_add_pin_range(chip); if (ret) return ret; diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h index 8af2bc899aab..2c32a332ede5 100644 --- a/drivers/gpio/gpiolib-of.h +++ b/drivers/gpio/gpiolib-of.h @@ -14,7 +14,6 @@ struct gpio_desc *of_find_gpio(struct device *dev, int of_gpiochip_add(struct gpio_chip *gc); void of_gpiochip_remove(struct gpio_chip *gc); int of_gpio_get_count(struct device *dev, const char *con_id); -bool of_gpio_need_valid_mask(const struct gpio_chip *gc); void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev); #else static inline struct gpio_desc *of_find_gpio(struct device *dev, @@ -30,10 +29,6 @@ static inline int of_gpio_get_count(struct device *dev, const char *con_id) { return 0; } -static inline bool of_gpio_need_valid_mask(const struct gpio_chip *gc) -{ - return false; -} static inline void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev) { diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e8faedca6b14..11fb7ec883e9 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -445,9 +445,21 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc) return p; } +static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc) +{ + int size; + + /* Format is "start, count, ..." */ + size = fwnode_property_count_u32(gc->fwnode, "gpio-reserved-ranges"); + if (size > 0 && size % 2 == 0) + return size; + + return 0; +} + static int gpiochip_alloc_valid_mask(struct gpio_chip *gc) { - if (!(of_gpio_need_valid_mask(gc) || gc->init_valid_mask)) + if (!(gpiochip_count_reserved_ranges(gc) || gc->init_valid_mask)) return 0; gc->valid_mask = gpiochip_allocate_mask(gc); @@ -457,8 +469,48 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc) return 0; } +static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc) +{ + unsigned int size; + u32 *ranges; + int ret; + + size = gpiochip_count_reserved_ranges(gc); + if (size == 0) + return 0; + + ranges = kmalloc_array(size, sizeof(*ranges), GFP_KERNEL); + if (!ranges) + return -ENOMEM; + + ret = fwnode_property_read_u32_array(gc->fwnode, "gpio-reserved-ranges", ranges, size); + if (ret) { + kfree(ranges); + return ret; + } + + while (size) { + u32 count = ranges[--size]; + u32 start = ranges[--size]; + + if (start >= gc->ngpio || start + count > gc->ngpio) + continue; + + bitmap_clear(gc->valid_mask, start, count); + } + + kfree(ranges); + return 0; +} + static int gpiochip_init_valid_mask(struct gpio_chip *gc) { + int ret; + + ret = gpiochip_apply_reserved_ranges(gc); + if (ret) + return ret; + if (gc->init_valid_mask) return gc->init_valid_mask(gc, gc->valid_mask, From bdf1da5df9da680589a7f74448dd0a94dd3e1446 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Mon, 7 Nov 2022 15:50:57 +0100 Subject: [PATCH 1155/4122] RDMA/siw: Fix immediate work request flush to completion queue Correctly set send queue element opcode during immediate work request flushing in post sendqueue operation, if the QP is in ERROR state. An undefined ocode value results in out-of-bounds access to an array for mapping the opcode between siw internal and RDMA core representation in work completion generation. It resulted in a KASAN BUG report of type 'global-out-of-bounds' during NFSoRDMA testing. This patch further fixes a potential case of a malicious user which may write undefined values for completion queue elements status or opcode, if the CQ is memory mapped to user land. It avoids the same out-of-bounds access to arrays for status and opcode mapping as described above. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Fixes: b0fff7317bb4 ("rdma/siw: completion queue methods") Reported-by: Olga Kornievskaia Reviewed-by: Tom Talpey Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20221107145057.895747-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cq.c | 24 ++++++++++++++-- drivers/infiniband/sw/siw/siw_verbs.c | 40 ++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index d68e37859e73..acc7bcd538b5 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { memset(wc, 0, sizeof(*wc)); wc->wr_id = cqe->id; - wc->status = map_cqe_status[cqe->status].ib; - wc->opcode = map_wc_opcode[cqe->opcode]; wc->byte_len = cqe->bytes; /* @@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->status = map_cqe_status[cqe->status].ib; siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, cqe->flags, (void *)(uintptr_t)cqe->id); + } else { + /* + * A malicious user may set invalid opcode or + * status in the user mmapped CQE array. + * Sanity check and correct values in that case + * to avoid out-of-bounds access to global arrays + * for opcode and status mapping. + */ + u8 opcode = cqe->opcode; + u16 status = cqe->status; + + if (opcode >= SIW_NUM_OPCODES) { + opcode = 0; + status = IB_WC_GENERAL_ERR; + } else if (status >= SIW_NUM_WC_STATUS) { + status = IB_WC_GENERAL_ERR; + } + wc->opcode = map_wc_opcode[opcode]; + wc->status = map_cqe_status[status].ib; + } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 3e814cfb298c..906fde1a2a0d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct siw_sqe sqe = {}; int rv = 0; while (wr) { - sqe.id = wr->wr_id; - sqe.opcode = wr->opcode; - rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + struct siw_sqe sqe = {}; + + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + sqe.opcode = SIW_OP_WRITE; + break; + case IB_WR_RDMA_READ: + sqe.opcode = SIW_OP_READ; + break; + case IB_WR_RDMA_READ_WITH_INV: + sqe.opcode = SIW_OP_READ_LOCAL_INV; + break; + case IB_WR_SEND: + sqe.opcode = SIW_OP_SEND; + break; + case IB_WR_SEND_WITH_IMM: + sqe.opcode = SIW_OP_SEND_WITH_IMM; + break; + case IB_WR_SEND_WITH_INV: + sqe.opcode = SIW_OP_SEND_REMOTE_INV; + break; + case IB_WR_LOCAL_INV: + sqe.opcode = SIW_OP_INVAL_STAG; + break; + case IB_WR_REG_MR: + sqe.opcode = SIW_OP_REG_MR; + break; + default: + rv = -EINVAL; + break; + } + if (!rv) { + sqe.id = wr->wr_id; + rv = siw_sqe_complete(qp, &sqe, 0, + SIW_WC_WR_FLUSH_ERR); + } if (rv) { if (bad_wr) *bad_wr = wr; From ce8cc75c7419ad54cb99437543a54c97c7446db5 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 18 Oct 2022 16:34:24 +0800 Subject: [PATCH 1156/4122] drivers: staging: r8188eu: Fix sleep-in-atomic-context bug in rtw_join_timeout_handler The rtw_join_timeout_handler() is a timer handler that runs in atomic context, but it could call msleep(). As a result, the sleep-in-atomic-context bug will happen. The process is shown below: (atomic context) rtw_join_timeout_handler _rtw_join_timeout_handler rtw_do_join rtw_select_and_join_from_scanned_queue rtw_indicate_disconnect rtw_lps_ctrl_wk_cmd lps_ctrl_wk_hdl LPS_Leave LPS_RF_ON_check msleep //sleep in atomic context Fix by removing msleep() and replacing with mdelay(). Fixes: 15865124feed ("staging: r8188eu: introduce new core dir for RTL8188eu driver") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20221018083424.79741-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_pwrctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_pwrctrl.c b/drivers/staging/r8188eu/core/rtw_pwrctrl.c index 870d81735b8d..5290ac36f08c 100644 --- a/drivers/staging/r8188eu/core/rtw_pwrctrl.c +++ b/drivers/staging/r8188eu/core/rtw_pwrctrl.c @@ -273,7 +273,7 @@ static s32 LPS_RF_ON_check(struct adapter *padapter, u32 delay_ms) err = -1; break; } - msleep(1); + mdelay(1); } return err; From 8ce256540f41c84cb910a681c2cb84e8c80ab8db Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Nov 2022 14:34:46 -0600 Subject: [PATCH 1157/4122] staging: ks7010: Avoid clashing function prototypes When built with Control Flow Integrity, function prototypes between caller and function declaration must match. These mismatches are visible at compile time with the new -Wcast-function-type-strict in Clang[1]. Fix a total of 27 warnings like these: drivers/staging/ks7010/ks_wlan_net.c:2415:2: warning: cast from 'int (*)(struct net_device *, struct iw_request_info *, struct iw_point *, char *)' to 'iw_handler' (aka 'int (*)(struct net_device *, struct iw_request_info *, union iwreq_data *, char *)') converts to incompatible function type [-Wcast-function-type-strict] (iw_handler)ks_wlan_get_firmware_version,/* 3 KS_WLAN_GET_FIRM_VERSION */ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ks_wlan_net Wireless Extension handler callbacks (iw_handler) use a union for the data argument. Actually use the union and perform explicit member selection in the function body instead of having a function prototype mismatch. There are no resulting binary differences before/after changes. These changes were made partly manually and partly with the help of Coccinelle. Link: https://reviews.llvm.org/D134831 [1] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/8d2ceee1248b5a76e9b6c379f578e65482c91168.1667934775.git.gustavoars@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ks7010/ks_wlan_net.c | 248 ++++++++++++++------------- 1 file changed, 126 insertions(+), 122 deletions(-) diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index 7e8d37c169f0..044c807ca022 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -1763,8 +1763,8 @@ static struct iw_statistics *ks_get_wireless_stats(struct net_device *dev) } static int ks_wlan_set_stop_request(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); @@ -1772,7 +1772,7 @@ static int ks_wlan_set_stop_request(struct net_device *dev, return -EPERM; /* for SLEEP MODE */ - if (!(*uwrq)) + if (!(uwrq->mode)) return -EINVAL; hostif_sme_enqueue(priv, SME_STOP_REQUEST); @@ -1786,7 +1786,9 @@ static int ks_wlan_set_mlme(struct net_device *dev, { struct ks_wlan_private *priv = netdev_priv(dev); struct iw_mlme *mlme = (struct iw_mlme *)extra; - __u32 mode = 1; + union iwreq_data uwrq; + + uwrq.mode = 1; if (priv->sleep_mode == SLP_SLEEP) return -EPERM; @@ -1799,13 +1801,14 @@ static int ks_wlan_set_mlme(struct net_device *dev, mlme->reason_code == WLAN_REASON_MIC_FAILURE) return 0; - return ks_wlan_set_stop_request(dev, NULL, &mode, NULL); + return ks_wlan_set_stop_request(dev, NULL, &uwrq, NULL); } static int ks_wlan_get_firmware_version(struct net_device *dev, struct iw_request_info *info, - struct iw_point *dwrq, char *extra) + union iwreq_data *uwrq, char *extra) { + struct iw_point *dwrq = &uwrq->data; struct ks_wlan_private *priv = netdev_priv(dev); dwrq->length = priv->version_size + 1; @@ -1814,8 +1817,8 @@ static int ks_wlan_get_firmware_version(struct net_device *dev, } static int ks_wlan_set_preamble(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); @@ -1823,17 +1826,17 @@ static int ks_wlan_set_preamble(struct net_device *dev, return -EPERM; /* for SLEEP MODE */ - if (*uwrq != LONG_PREAMBLE && *uwrq != SHORT_PREAMBLE) + if (uwrq->mode != LONG_PREAMBLE && uwrq->mode != SHORT_PREAMBLE) return -EINVAL; - priv->reg.preamble = *uwrq; + priv->reg.preamble = uwrq->mode; priv->need_commit |= SME_MODE_SET; return -EINPROGRESS; /* Call commit handler */ } static int ks_wlan_get_preamble(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); @@ -1841,37 +1844,37 @@ static int ks_wlan_get_preamble(struct net_device *dev, return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.preamble; + uwrq->mode = priv->reg.preamble; return 0; } static int ks_wlan_set_power_mgmt(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; - if (*uwrq != POWER_MGMT_ACTIVE && - *uwrq != POWER_MGMT_SAVE1 && - *uwrq != POWER_MGMT_SAVE2) + if (uwrq->mode != POWER_MGMT_ACTIVE && + uwrq->mode != POWER_MGMT_SAVE1 && + uwrq->mode != POWER_MGMT_SAVE2) return -EINVAL; - if ((*uwrq == POWER_MGMT_SAVE1 || *uwrq == POWER_MGMT_SAVE2) && + if ((uwrq->mode == POWER_MGMT_SAVE1 || uwrq->mode == POWER_MGMT_SAVE2) && (priv->reg.operation_mode != MODE_INFRASTRUCTURE)) return -EINVAL; - priv->reg.power_mgmt = *uwrq; + priv->reg.power_mgmt = uwrq->mode; hostif_sme_enqueue(priv, SME_POW_MNGMT_REQUEST); return 0; } static int ks_wlan_get_power_mgmt(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); @@ -1879,13 +1882,13 @@ static int ks_wlan_get_power_mgmt(struct net_device *dev, return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.power_mgmt; + uwrq->mode = priv->reg.power_mgmt; return 0; } static int ks_wlan_set_scan_type(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); @@ -1893,39 +1896,39 @@ static int ks_wlan_set_scan_type(struct net_device *dev, return -EPERM; /* for SLEEP MODE */ - if (*uwrq != ACTIVE_SCAN && *uwrq != PASSIVE_SCAN) + if (uwrq->mode != ACTIVE_SCAN && uwrq->mode != PASSIVE_SCAN) return -EINVAL; - priv->reg.scan_type = *uwrq; + priv->reg.scan_type = uwrq->mode; return 0; } static int ks_wlan_get_scan_type(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.scan_type; + uwrq->mode = priv->reg.scan_type; return 0; } static int ks_wlan_set_beacon_lost(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - if (*uwrq > BEACON_LOST_COUNT_MAX) + if (uwrq->mode > BEACON_LOST_COUNT_MAX) return -EINVAL; - priv->reg.beacon_lost_count = *uwrq; + priv->reg.beacon_lost_count = uwrq->mode; if (priv->reg.operation_mode == MODE_INFRASTRUCTURE) { priv->need_commit |= SME_MODE_SET; @@ -1936,101 +1939,101 @@ static int ks_wlan_set_beacon_lost(struct net_device *dev, } static int ks_wlan_get_beacon_lost(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.beacon_lost_count; + uwrq->mode = priv->reg.beacon_lost_count; return 0; } static int ks_wlan_set_phy_type(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; - if (*uwrq != D_11B_ONLY_MODE && - *uwrq != D_11G_ONLY_MODE && - *uwrq != D_11BG_COMPATIBLE_MODE) + if (uwrq->mode != D_11B_ONLY_MODE && + uwrq->mode != D_11G_ONLY_MODE && + uwrq->mode != D_11BG_COMPATIBLE_MODE) return -EINVAL; /* for SLEEP MODE */ - priv->reg.phy_type = *uwrq; + priv->reg.phy_type = uwrq->mode; priv->need_commit |= SME_MODE_SET; return -EINPROGRESS; /* Call commit handler */ } static int ks_wlan_get_phy_type(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.phy_type; + uwrq->mode = priv->reg.phy_type; return 0; } static int ks_wlan_set_cts_mode(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - if (*uwrq != CTS_MODE_FALSE && *uwrq != CTS_MODE_TRUE) + if (uwrq->mode != CTS_MODE_FALSE && uwrq->mode != CTS_MODE_TRUE) return -EINVAL; - priv->reg.cts_mode = (*uwrq == CTS_MODE_FALSE) ? *uwrq : + priv->reg.cts_mode = (uwrq->mode == CTS_MODE_FALSE) ? uwrq->mode : (priv->reg.phy_type == D_11G_ONLY_MODE || priv->reg.phy_type == D_11BG_COMPATIBLE_MODE) ? - *uwrq : !*uwrq; + uwrq->mode : !uwrq->mode; priv->need_commit |= SME_MODE_SET; return -EINPROGRESS; /* Call commit handler */ } static int ks_wlan_get_cts_mode(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->reg.cts_mode; + uwrq->mode = priv->reg.cts_mode; return 0; } static int ks_wlan_set_sleep_mode(struct net_device *dev, struct iw_request_info *info, - __u32 *uwrq, char *extra) + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); - if (*uwrq != SLP_SLEEP && - *uwrq != SLP_ACTIVE) { - netdev_err(dev, "SET_SLEEP_MODE %d error\n", *uwrq); + if (uwrq->mode != SLP_SLEEP && + uwrq->mode != SLP_ACTIVE) { + netdev_err(dev, "SET_SLEEP_MODE %d error\n", uwrq->mode); return -EINVAL; } - priv->sleep_mode = *uwrq; + priv->sleep_mode = uwrq->mode; netdev_info(dev, "SET_SLEEP_MODE %d\n", priv->sleep_mode); - if (*uwrq == SLP_SLEEP) + if (uwrq->mode == SLP_SLEEP) hostif_sme_enqueue(priv, SME_STOP_REQUEST); hostif_sme_enqueue(priv, SME_SLEEP_REQUEST); @@ -2040,52 +2043,53 @@ static int ks_wlan_set_sleep_mode(struct net_device *dev, static int ks_wlan_get_sleep_mode(struct net_device *dev, struct iw_request_info *info, - __u32 *uwrq, char *extra) + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); - *uwrq = priv->sleep_mode; + uwrq->mode = priv->sleep_mode; return 0; } static int ks_wlan_set_wps_enable(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - if (*uwrq != 0 && *uwrq != 1) + if (uwrq->mode != 0 && uwrq->mode != 1) return -EINVAL; - priv->wps.wps_enabled = *uwrq; + priv->wps.wps_enabled = uwrq->mode; hostif_sme_enqueue(priv, SME_WPS_ENABLE_REQUEST); return 0; } static int ks_wlan_get_wps_enable(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->wps.wps_enabled; - netdev_info(dev, "return=%d\n", *uwrq); + uwrq->mode = priv->wps.wps_enabled; + netdev_info(dev, "return=%d\n", uwrq->mode); return 0; } static int ks_wlan_set_wps_probe_req(struct net_device *dev, struct iw_request_info *info, - struct iw_point *dwrq, char *extra) + union iwreq_data *uwrq, char *extra) { + struct iw_point *dwrq = &uwrq->data; u8 *p = extra; unsigned char len; struct ks_wlan_private *priv = netdev_priv(dev); @@ -2114,76 +2118,76 @@ static int ks_wlan_set_wps_probe_req(struct net_device *dev, } static int ks_wlan_set_tx_gain(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - if (*uwrq > 0xFF) + if (uwrq->mode > 0xFF) return -EINVAL; - priv->gain.tx_gain = (u8)*uwrq; + priv->gain.tx_gain = (u8)uwrq->mode; priv->gain.tx_mode = (priv->gain.tx_gain < 0xFF) ? 1 : 0; hostif_sme_enqueue(priv, SME_SET_GAIN); return 0; } static int ks_wlan_get_tx_gain(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->gain.tx_gain; + uwrq->mode = priv->gain.tx_gain; hostif_sme_enqueue(priv, SME_GET_GAIN); return 0; } static int ks_wlan_set_rx_gain(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - if (*uwrq > 0xFF) + if (uwrq->mode > 0xFF) return -EINVAL; - priv->gain.rx_gain = (u8)*uwrq; + priv->gain.rx_gain = (u8)uwrq->mode; priv->gain.rx_mode = (priv->gain.rx_gain < 0xFF) ? 1 : 0; hostif_sme_enqueue(priv, SME_SET_GAIN); return 0; } static int ks_wlan_get_rx_gain(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); if (priv->sleep_mode == SLP_SLEEP) return -EPERM; /* for SLEEP MODE */ - *uwrq = priv->gain.rx_gain; + uwrq->mode = priv->gain.rx_gain; hostif_sme_enqueue(priv, SME_GET_GAIN); return 0; } static int ks_wlan_get_eeprom_cksum(struct net_device *dev, - struct iw_request_info *info, __u32 *uwrq, - char *extra) + struct iw_request_info *info, + union iwreq_data *uwrq, char *extra) { struct ks_wlan_private *priv = netdev_priv(dev); - *uwrq = priv->eeprom_checksum; + uwrq->mode = priv->eeprom_checksum; return 0; } @@ -2302,7 +2306,7 @@ static void print_hif_event(struct net_device *dev, int event) /* get host command history */ static int ks_wlan_hostt(struct net_device *dev, struct iw_request_info *info, - __u32 *uwrq, char *extra) + union iwreq_data *uwrq, char *extra) { int i, event; struct ks_wlan_private *priv = netdev_priv(dev); @@ -2409,38 +2413,38 @@ static const iw_handler ks_wlan_handler[] = { /* private_handler */ static const iw_handler ks_wlan_private_handler[] = { - (iw_handler)NULL, /* 0 */ - (iw_handler)NULL, /* 1, KS_WLAN_GET_DRIVER_VERSION */ - (iw_handler)NULL, /* 2 */ - (iw_handler)ks_wlan_get_firmware_version,/* 3 KS_WLAN_GET_FIRM_VERSION */ - (iw_handler)ks_wlan_set_wps_enable, /* 4 KS_WLAN_SET_WPS_ENABLE */ - (iw_handler)ks_wlan_get_wps_enable, /* 5 KS_WLAN_GET_WPS_ENABLE */ - (iw_handler)ks_wlan_set_wps_probe_req, /* 6 KS_WLAN_SET_WPS_PROBE_REQ */ - (iw_handler)ks_wlan_get_eeprom_cksum, /* 7 KS_WLAN_GET_CONNECT */ - (iw_handler)ks_wlan_set_preamble, /* 8 KS_WLAN_SET_PREAMBLE */ - (iw_handler)ks_wlan_get_preamble, /* 9 KS_WLAN_GET_PREAMBLE */ - (iw_handler)ks_wlan_set_power_mgmt, /* 10 KS_WLAN_SET_POWER_SAVE */ - (iw_handler)ks_wlan_get_power_mgmt, /* 11 KS_WLAN_GET_POWER_SAVE */ - (iw_handler)ks_wlan_set_scan_type, /* 12 KS_WLAN_SET_SCAN_TYPE */ - (iw_handler)ks_wlan_get_scan_type, /* 13 KS_WLAN_GET_SCAN_TYPE */ - (iw_handler)ks_wlan_set_rx_gain, /* 14 KS_WLAN_SET_RX_GAIN */ - (iw_handler)ks_wlan_get_rx_gain, /* 15 KS_WLAN_GET_RX_GAIN */ - (iw_handler)ks_wlan_hostt, /* 16 KS_WLAN_HOSTT */ - (iw_handler)NULL, /* 17 */ - (iw_handler)ks_wlan_set_beacon_lost, /* 18 KS_WLAN_SET_BECAN_LOST */ - (iw_handler)ks_wlan_get_beacon_lost, /* 19 KS_WLAN_GET_BECAN_LOST */ - (iw_handler)ks_wlan_set_tx_gain, /* 20 KS_WLAN_SET_TX_GAIN */ - (iw_handler)ks_wlan_get_tx_gain, /* 21 KS_WLAN_GET_TX_GAIN */ - (iw_handler)ks_wlan_set_phy_type, /* 22 KS_WLAN_SET_PHY_TYPE */ - (iw_handler)ks_wlan_get_phy_type, /* 23 KS_WLAN_GET_PHY_TYPE */ - (iw_handler)ks_wlan_set_cts_mode, /* 24 KS_WLAN_SET_CTS_MODE */ - (iw_handler)ks_wlan_get_cts_mode, /* 25 KS_WLAN_GET_CTS_MODE */ - (iw_handler)NULL, /* 26 */ - (iw_handler)NULL, /* 27 */ - (iw_handler)ks_wlan_set_sleep_mode, /* 28 KS_WLAN_SET_SLEEP_MODE */ - (iw_handler)ks_wlan_get_sleep_mode, /* 29 KS_WLAN_GET_SLEEP_MODE */ - (iw_handler)NULL, /* 30 */ - (iw_handler)NULL, /* 31 */ + NULL, /* 0 */ + NULL, /* 1, KS_WLAN_GET_DRIVER_VERSION */ + NULL, /* 2 */ + ks_wlan_get_firmware_version, /* 3 KS_WLAN_GET_FIRM_VERSION */ + ks_wlan_set_wps_enable, /* 4 KS_WLAN_SET_WPS_ENABLE */ + ks_wlan_get_wps_enable, /* 5 KS_WLAN_GET_WPS_ENABLE */ + ks_wlan_set_wps_probe_req, /* 6 KS_WLAN_SET_WPS_PROBE_REQ */ + ks_wlan_get_eeprom_cksum, /* 7 KS_WLAN_GET_CONNECT */ + ks_wlan_set_preamble, /* 8 KS_WLAN_SET_PREAMBLE */ + ks_wlan_get_preamble, /* 9 KS_WLAN_GET_PREAMBLE */ + ks_wlan_set_power_mgmt, /* 10 KS_WLAN_SET_POWER_SAVE */ + ks_wlan_get_power_mgmt, /* 11 KS_WLAN_GET_POWER_SAVE */ + ks_wlan_set_scan_type, /* 12 KS_WLAN_SET_SCAN_TYPE */ + ks_wlan_get_scan_type, /* 13 KS_WLAN_GET_SCAN_TYPE */ + ks_wlan_set_rx_gain, /* 14 KS_WLAN_SET_RX_GAIN */ + ks_wlan_get_rx_gain, /* 15 KS_WLAN_GET_RX_GAIN */ + ks_wlan_hostt, /* 16 KS_WLAN_HOSTT */ + NULL, /* 17 */ + ks_wlan_set_beacon_lost, /* 18 KS_WLAN_SET_BECAN_LOST */ + ks_wlan_get_beacon_lost, /* 19 KS_WLAN_GET_BECAN_LOST */ + ks_wlan_set_tx_gain, /* 20 KS_WLAN_SET_TX_GAIN */ + ks_wlan_get_tx_gain, /* 21 KS_WLAN_GET_TX_GAIN */ + ks_wlan_set_phy_type, /* 22 KS_WLAN_SET_PHY_TYPE */ + ks_wlan_get_phy_type, /* 23 KS_WLAN_GET_PHY_TYPE */ + ks_wlan_set_cts_mode, /* 24 KS_WLAN_SET_CTS_MODE */ + ks_wlan_get_cts_mode, /* 25 KS_WLAN_GET_CTS_MODE */ + NULL, /* 26 */ + NULL, /* 27 */ + ks_wlan_set_sleep_mode, /* 28 KS_WLAN_SET_SLEEP_MODE */ + ks_wlan_get_sleep_mode, /* 29 KS_WLAN_GET_SLEEP_MODE */ + NULL, /* 30 */ + NULL, /* 31 */ }; static const struct iw_handler_def ks_wlan_handler_def = { @@ -2461,7 +2465,7 @@ static int ks_wlan_netdev_ioctl(struct net_device *dev, struct ifreq *rq, switch (cmd) { case SIOCIWFIRSTPRIV + 20: /* KS_WLAN_SET_STOP_REQ */ - ret = ks_wlan_set_stop_request(dev, NULL, &wrq->u.mode, NULL); + ret = ks_wlan_set_stop_request(dev, NULL, &wrq->u, NULL); break; // All other calls are currently unsupported default: From fb7d5ebea988899500e9a067d950034dbe8a0fbd Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Wed, 9 Nov 2022 16:12:43 +0530 Subject: [PATCH 1158/4122] staging: fieldbus: use sysfs_emit() in show functions The show() methods should only use sysfs_emit() when formatting values to be returned to the user space. Ref: Documentation/filesystems/sysfs.rst Issue identified by coccicheck. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y2uEIzebbM/Fs5Jz@qemulion Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fieldbus/dev_core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/staging/fieldbus/dev_core.c b/drivers/staging/fieldbus/dev_core.c index 5aab734606ea..5f54f2674bd1 100644 --- a/drivers/staging/fieldbus/dev_core.c +++ b/drivers/staging/fieldbus/dev_core.c @@ -28,7 +28,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, { struct fieldbus_dev *fb = dev_get_drvdata(dev); - return sprintf(buf, "%d\n", !!fb->online); + return sysfs_emit(buf, "%d\n", !!fb->online); } static DEVICE_ATTR_RO(online); @@ -39,7 +39,7 @@ static ssize_t enabled_show(struct device *dev, struct device_attribute *attr, if (!fb->enable_get) return -EINVAL; - return sprintf(buf, "%d\n", !!fb->enable_get(fb)); + return sysfs_emit(buf, "%d\n", !!fb->enable_get(fb)); } static ssize_t enabled_store(struct device *dev, struct device_attribute *attr, @@ -66,11 +66,8 @@ static ssize_t card_name_show(struct device *dev, struct device_attribute *attr, { struct fieldbus_dev *fb = dev_get_drvdata(dev); - /* - * card_name was provided by child driver, could potentially be long. - * protect against buffer overrun. - */ - return snprintf(buf, PAGE_SIZE, "%s\n", fb->card_name); + /* card_name was provided by child driver. */ + return sysfs_emit(buf, "%s\n", fb->card_name); } static DEVICE_ATTR_RO(card_name); @@ -79,7 +76,7 @@ static ssize_t read_area_size_show(struct device *dev, { struct fieldbus_dev *fb = dev_get_drvdata(dev); - return sprintf(buf, "%zu\n", fb->read_area_sz); + return sysfs_emit(buf, "%zu\n", fb->read_area_sz); } static DEVICE_ATTR_RO(read_area_size); @@ -88,7 +85,7 @@ static ssize_t write_area_size_show(struct device *dev, { struct fieldbus_dev *fb = dev_get_drvdata(dev); - return sprintf(buf, "%zu\n", fb->write_area_sz); + return sysfs_emit(buf, "%zu\n", fb->write_area_sz); } static DEVICE_ATTR_RO(write_area_size); @@ -116,7 +113,7 @@ static ssize_t fieldbus_type_show(struct device *dev, break; } - return sprintf(buf, "%s\n", t); + return sysfs_emit(buf, "%s\n", t); } static DEVICE_ATTR_RO(fieldbus_type); From 335140116703920ddcbd9a09ae9edfb02902f3d1 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Tue, 8 Nov 2022 23:39:56 +0530 Subject: [PATCH 1159/4122] staging: rtl8192u: remove redundant macro definitions Several ieee80211_* symbol names are extended with _rsl tag using macros. This is done to avoid a conflict when a similar symbol is already in use in another part of kernel and may lead to conflicts. However, most of these base symbol names are not found to being used anywhere in the code and hence are not useful today. These symbols are not used outside of the module and hence can be safely removed. The code continues to use the original symbol names. Suggested-by: Greg Kroah-Hartman Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/17f69a042e215c484931a0327fdf7775eea5f918.1667930292.git.drv@mailo.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192u/ieee80211/ieee80211.h | 61 ------------------- 1 file changed, 61 deletions(-) diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h index 00c07455cbb3..c5c43d2fb93e 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h @@ -223,71 +223,10 @@ struct cb_desc { #define MAX_IE_LEN 0xff // added for kernel conflict -#define ieee80211_crypt_deinit_entries ieee80211_crypt_deinit_entries_rsl -#define ieee80211_crypt_deinit_handler ieee80211_crypt_deinit_handler_rsl -#define ieee80211_crypt_delayed_deinit ieee80211_crypt_delayed_deinit_rsl -#define ieee80211_register_crypto_ops ieee80211_register_crypto_ops_rsl -#define ieee80211_unregister_crypto_ops ieee80211_unregister_crypto_ops_rsl -#define ieee80211_get_crypto_ops ieee80211_get_crypto_ops_rsl - -#define ieee80211_ccmp_null ieee80211_ccmp_null_rsl - -#define free_ieee80211 free_ieee80211_rsl -#define alloc_ieee80211 alloc_ieee80211_rsl - -#define ieee80211_rx ieee80211_rx_rsl -#define ieee80211_rx_mgt ieee80211_rx_mgt_rsl - -#define ieee80211_get_beacon ieee80211_get_beacon_rsl #define ieee80211_wake_queue ieee80211_wake_queue_rsl #define ieee80211_stop_queue ieee80211_stop_queue_rsl -#define ieee80211_reset_queue ieee80211_reset_queue_rsl -#define ieee80211_softmac_stop_protocol ieee80211_softmac_stop_protocol_rsl -#define ieee80211_softmac_start_protocol ieee80211_softmac_start_protocol_rsl -#define ieee80211_is_shortslot ieee80211_is_shortslot_rsl -#define ieee80211_is_54g ieee80211_is_54g_rsl -#define ieee80211_wpa_supplicant_ioctl ieee80211_wpa_supplicant_ioctl_rsl -#define ieee80211_ps_tx_ack ieee80211_ps_tx_ack_rsl -#define ieee80211_softmac_xmit ieee80211_softmac_xmit_rsl -#define ieee80211_stop_send_beacons ieee80211_stop_send_beacons_rsl #define notify_wx_assoc_event notify_wx_assoc_event_rsl #define SendDisassociation SendDisassociation_rsl -#define ieee80211_disassociate ieee80211_disassociate_rsl -#define ieee80211_start_send_beacons ieee80211_start_send_beacons_rsl -#define ieee80211_stop_scan ieee80211_stop_scan_rsl -#define ieee80211_send_probe_requests ieee80211_send_probe_requests_rsl -#define ieee80211_softmac_scan_syncro ieee80211_softmac_scan_syncro_rsl -#define ieee80211_start_scan_syncro ieee80211_start_scan_syncro_rsl - -#define ieee80211_wx_get_essid ieee80211_wx_get_essid_rsl -#define ieee80211_wx_set_essid ieee80211_wx_set_essid_rsl -#define ieee80211_wx_set_rate ieee80211_wx_set_rate_rsl -#define ieee80211_wx_get_rate ieee80211_wx_get_rate_rsl -#define ieee80211_wx_set_wap ieee80211_wx_set_wap_rsl -#define ieee80211_wx_get_wap ieee80211_wx_get_wap_rsl -#define ieee80211_wx_set_mode ieee80211_wx_set_mode_rsl -#define ieee80211_wx_get_mode ieee80211_wx_get_mode_rsl -#define ieee80211_wx_set_scan ieee80211_wx_set_scan_rsl -#define ieee80211_wx_get_freq ieee80211_wx_get_freq_rsl -#define ieee80211_wx_set_freq ieee80211_wx_set_freq_rsl -#define ieee80211_wx_set_rawtx ieee80211_wx_set_rawtx_rsl -#define ieee80211_wx_get_name ieee80211_wx_get_name_rsl -#define ieee80211_wx_set_power ieee80211_wx_set_power_rsl -#define ieee80211_wx_get_power ieee80211_wx_get_power_rsl -#define ieee80211_wlan_frequencies ieee80211_wlan_frequencies_rsl -#define ieee80211_wx_set_rts ieee80211_wx_set_rts_rsl -#define ieee80211_wx_get_rts ieee80211_wx_get_rts_rsl - -#define ieee80211_txb_free ieee80211_txb_free_rsl - -#define ieee80211_wx_set_gen_ie ieee80211_wx_set_gen_ie_rsl -#define ieee80211_wx_get_scan ieee80211_wx_get_scan_rsl -#define ieee80211_wx_set_encode ieee80211_wx_set_encode_rsl -#define ieee80211_wx_get_encode ieee80211_wx_get_encode_rsl -#define ieee80211_wx_set_mlme ieee80211_wx_set_mlme_rsl -#define ieee80211_wx_set_auth ieee80211_wx_set_auth_rsl -#define ieee80211_wx_set_encode_ext ieee80211_wx_set_encode_ext_rsl -#define ieee80211_wx_get_encode_ext ieee80211_wx_get_encode_ext_rsl struct ieee_param { From 8c3e8a6bdb5253b97ad532570f8b5db5f7a06407 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 26 Oct 2022 16:28:03 +0800 Subject: [PATCH 1160/4122] class: fix possible memory leak in __class_register() If class_add_groups() returns error, the 'cp->subsys' need be unregister, and the 'cp' need be freed. We can not call kset_unregister() here, because the 'cls' will be freed in callback function class_release() and it's also freed in caller's error path, it will cause double free. So fix this by calling kobject_del() and kfree_const(name) to cleanup kobject. Besides, call kfree() to free the 'cp'. Fault injection test can trigger this: unreferenced object 0xffff888102fa8190 (size 8): comm "modprobe", pid 502, jiffies 4294906074 (age 49.296s) hex dump (first 8 bytes): 70 6b 74 63 64 76 64 00 pktcdvd. backtrace: [<00000000e7c7703d>] __kmalloc_track_caller+0x1ae/0x320 [<000000005e4d70bc>] kstrdup+0x3a/0x70 [<00000000c2e5e85a>] kstrdup_const+0x68/0x80 [<000000000049a8c7>] kvasprintf_const+0x10b/0x190 [<0000000029123163>] kobject_set_name_vargs+0x56/0x150 [<00000000747219c9>] kobject_set_name+0xab/0xe0 [<0000000005f1ea4e>] __class_register+0x15c/0x49a unreferenced object 0xffff888037274000 (size 1024): comm "modprobe", pid 502, jiffies 4294906074 (age 49.296s) hex dump (first 32 bytes): 00 40 27 37 80 88 ff ff 00 40 27 37 80 88 ff ff .@'7.....@'7.... 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... backtrace: [<00000000151f9600>] kmem_cache_alloc_trace+0x17c/0x2f0 [<00000000ecf3dd95>] __class_register+0x86/0x49a Fixes: ced6473e7486 ("driver core: class: add class_groups support") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221026082803.3458760-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/base/class.c b/drivers/base/class.c index 64f7b9a0970f..8ceafb7d0203 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -192,6 +192,11 @@ int __class_register(struct class *cls, struct lock_class_key *key) } error = class_add_groups(class_get(cls), cls->class_groups); class_put(cls); + if (error) { + kobject_del(&cp->subsys.kobj); + kfree_const(cp->subsys.kobj.name); + kfree(cp); + } return error; } EXPORT_SYMBOL_GPL(__class_register); From 9e6002ad9e28209dcefa342977ac683779556dbd Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 2 Nov 2022 10:55:34 +0800 Subject: [PATCH 1161/4122] Documentation: devres: add missing MEM helper Add missing devm_kstrdup_const() to devres.rst. It's introduce by commit 09d1ea1c7309 ("devres: provide devm_kstrdup_const()"). Fixes: 09d1ea1c7309 ("devres: provide devm_kstrdup_const()") Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Jonathan Corbet Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221102025534.1450337-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/driver-model/devres.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048e..ac9b9ff37f50 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -357,6 +357,7 @@ MEM devm_kmemdup() devm_krealloc() devm_kstrdup() + devm_kstrdup_const() devm_kvasprintf() devm_kzalloc() From 6fcd7e702d3d91cc2c3194acffd7d67b2c10b81f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 02:04:10 -0700 Subject: [PATCH 1162/4122] devres: Use kmalloc_size_roundup() to match ksize() usage Round up allocations with kmalloc_size_roundup() so that devres's use of ksize() is always accurate and no special handling of the memory is needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE. Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221018090406.never.856-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/devres.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 4ab2b50ee38f..c0e100074aa3 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -101,6 +101,9 @@ static bool check_dr_size(size_t size, size_t *tot_size) size, tot_size))) return false; + /* Actually allocate the full kmalloc bucket size. */ + *tot_size = kmalloc_size_roundup(*tot_size); + return true; } From e63e99397b2613d50a5f4f02ed07307e67a190f1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 14:40:36 +0800 Subject: [PATCH 1163/4122] drivers: dio: fix possible memory leak in dio_init() If device_register() returns error, the 'dev' and name needs be freed. Add a release function, and then call put_device() in the error path, so the name is freed in kobject_cleanup() and to the 'dev' is freed in release function. Fixes: 2e4c77bea3d8 ("m68k: dio - Kill warn_unused_result warnings") Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221109064036.1835346-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/dio/dio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index 0e5a5662d5a4..0a051d656880 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -109,6 +109,12 @@ static char dio_no_name[] = { 0 }; #endif /* CONFIG_DIO_CONSTANTS */ +static void dio_dev_release(struct device *dev) +{ + struct dio_dev *ddev = container_of(dev, typeof(struct dio_dev), dev); + kfree(ddev); +} + int __init dio_find(int deviceid) { /* Called to find a DIO device before the full bus scan has run. @@ -225,6 +231,7 @@ static int __init dio_init(void) dev->bus = &dio_bus; dev->dev.parent = &dio_bus.dev; dev->dev.bus = &dio_bus_type; + dev->dev.release = dio_dev_release; dev->scode = scode; dev->resource.start = pa; dev->resource.end = pa + DIO_SIZE(scode, va); @@ -252,6 +259,7 @@ static int __init dio_init(void) if (error) { pr_err("DIO: Error registering device %s\n", dev->name); + put_device(&dev->dev); continue; } error = dio_create_sysfs_dev_files(dev); From 92cf87051894921ab6b14e9ad60dbae2d669bf15 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 3 Nov 2022 17:38:35 -0700 Subject: [PATCH 1164/4122] debugfs: small Documentation cleaning Fix punctuation in a parenthetical phrase. Add 2 article adjectives and change one from "an" to "a". Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20221104003835.29472-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/debugfs.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst index 71b1fee56d2a..dc35da8b8792 100644 --- a/Documentation/filesystems/debugfs.rst +++ b/Documentation/filesystems/debugfs.rst @@ -155,8 +155,8 @@ any code which does so in the mainline. Note that all files created with debugfs_create_blob() are read-only. If you want to dump a block of registers (something that happens quite -often during development, even if little such code reaches mainline. -Debugfs offers two functions: one to make a registers-only file, and +often during development, even if little such code reaches mainline), +debugfs offers two functions: one to make a registers-only file, and another to insert a register block in the middle of another sequential file:: @@ -183,7 +183,7 @@ The "base" argument may be 0, but you may want to build the reg32 array using __stringify, and a number of register names (macros) are actually byte offsets over a base for the register block. -If you want to dump an u32 array in debugfs, you can create file with:: +If you want to dump a u32 array in debugfs, you can create a file with:: struct debugfs_u32_array { u32 *array; @@ -197,7 +197,7 @@ If you want to dump an u32 array in debugfs, you can create file with:: The "array" argument wraps a pointer to the array's data and the number of its elements. Note: Once array is created its size can not be changed. -There is a helper function to create device related seq_file:: +There is a helper function to create a device-related seq_file:: void debugfs_create_devm_seqfile(struct device *dev, const char *name, From 9a6800d1b9da21302758fb37e58b3ab1cadfc643 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 3 Nov 2022 17:39:21 -0700 Subject: [PATCH 1165/4122] sysfs: update Documentation Make editing corrections and updates to sysfs.rst: - spell "sysfs" consistently (vs. "Sysfs") - align field names in a struct - fix some punctuation and grammar - list more /sys top-level subdirectories - change 'fuse.txt' to 'fuse.rst' (although I don't see where the example is) Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20221104003921.31616-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/sysfs.rst | 41 +++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst index 8bba676b1365..f8187d466b97 100644 --- a/Documentation/filesystems/sysfs.rst +++ b/Documentation/filesystems/sysfs.rst @@ -12,10 +12,10 @@ Mike Murphy :Original: 10 January 2003 -What it is: -~~~~~~~~~~~ +What it is +~~~~~~~~~~ -sysfs is a ram-based filesystem initially based on ramfs. It provides +sysfs is a RAM-based filesystem initially based on ramfs. It provides a means to export kernel data structures, their attributes, and the linkages between them to userspace. @@ -43,7 +43,7 @@ userspace. Top-level directories in sysfs represent the common ancestors of object hierarchies; i.e. the subsystems the objects belong to. -Sysfs internally stores a pointer to the kobject that implements a +sysfs internally stores a pointer to the kobject that implements a directory in the kernfs_node object associated with the directory. In the past this kobject pointer has been used by sysfs to do reference counting directly on the kobject whenever the file is opened or closed. @@ -55,7 +55,7 @@ Attributes ~~~~~~~~~~ Attributes can be exported for kobjects in the form of regular files in -the filesystem. Sysfs forwards file I/O operations to methods defined +the filesystem. sysfs forwards file I/O operations to methods defined for the attributes, providing a means to read and write kernel attributes. @@ -72,8 +72,8 @@ you publicly humiliated and your code rewritten without notice. An attribute definition is simply:: struct attribute { - char * name; - struct module *owner; + char *name; + struct module *owner; umode_t mode; }; @@ -138,7 +138,7 @@ __ATTR_WO(name): assumes a name_store only and is restricted to mode 0200 that is root write access only. __ATTR_RO_MODE(name, mode): - fore more restrictive RO access currently + for more restrictive RO access; currently only use case is the EFI System Resource Table (see drivers/firmware/efi/esrt.c) __ATTR_RW(name): @@ -207,7 +207,7 @@ IOW, they should take only an object, an attribute, and a buffer as parameters. sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the -method. Sysfs will call the method exactly once for each read or +method. sysfs will call the method exactly once for each read or write. This forces the following behavior on the method implementations: @@ -221,7 +221,7 @@ implementations: be called again, rearmed, to fill the buffer. - On write(2), sysfs expects the entire buffer to be passed during the - first write. Sysfs then passes the entire buffer to the store() method. + first write. sysfs then passes the entire buffer to the store() method. A terminating null is added after the data on stores. This makes functions like sysfs_streq() safe to use. @@ -237,7 +237,7 @@ Other notes: - Writing causes the show() method to be rearmed regardless of current file position. -- The buffer will always be PAGE_SIZE bytes in length. On i386, this +- The buffer will always be PAGE_SIZE bytes in length. On x86, this is 4096. - show() methods should return the number of bytes printed into the @@ -253,7 +253,7 @@ Other notes: through, be sure to return an error. - The object passed to the methods will be pinned in memory via sysfs - referencing counting its embedded object. However, the physical + reference counting its embedded object. However, the physical entity (e.g. device) the object represents may not be present. Be sure to have a way to check this, if necessary. @@ -295,8 +295,12 @@ The top level sysfs directory looks like:: dev/ devices/ firmware/ - net/ fs/ + hypervisor/ + kernel/ + module/ + net/ + power/ devices/ contains a filesystem representation of the device tree. It maps directly to the internal kernel device tree, which is a hierarchy of @@ -317,15 +321,18 @@ span multiple bus types). fs/ contains a directory for some filesystems. Currently each filesystem wanting to export attributes must create its own hierarchy -below fs/ (see ./fuse.txt for an example). +below fs/ (see ./fuse.rst for an example). -dev/ contains two directories char/ and block/. Inside these two +module/ contains parameter values and state information for all +loaded system modules, for both builtin and loadable modules. + +dev/ contains two directories: char/ and block/. Inside these two directories there are symlinks named :. These symlinks point to the sysfs directory for the given device. /sys/dev provides a quick way to lookup the sysfs interface for a device from the result of a stat(2) operation. -More information can driver-model specific features can be found in +More information on driver-model specific features can be found in Documentation/driver-api/driver-model/. @@ -335,7 +342,7 @@ TODO: Finish this section. Current Interfaces ~~~~~~~~~~~~~~~~~~ -The following interface layers currently exist in sysfs: +The following interface layers currently exist in sysfs. devices (include/linux/device.h) From fa627348cfc7fb174468d88756b83c2d97890b07 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Oct 2022 18:54:26 +0200 Subject: [PATCH 1166/4122] driver core: class: make namespace and get_ownership take const * The callbacks in struct class namespace() and get_ownership() do not modify the struct device passed to them, so mark the pointer as constant and fix up all callbacks in the kernel to have the correct function signature. This helps make it more obvious what calls and callbacks do, and do not, modify structures passed to them. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20221001165426.2690912-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- drivers/infiniband/core/device.c | 4 ++-- drivers/net/ipvlan/ipvtap.c | 4 ++-- drivers/net/macvtap.c | 4 ++-- include/linux/device/class.h | 4 ++-- net/core/net-sysfs.c | 8 ++++---- net/wireless/sysfs.c | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index d02501933467..f07b1c349f79 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2336,7 +2336,7 @@ static void device_release(struct kobject *kobj) static const void *device_namespace(struct kobject *kobj) { - struct device *dev = kobj_to_dev(kobj); + const struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; if (dev->class && dev->class->ns_type) @@ -2347,7 +2347,7 @@ static const void *device_namespace(struct kobject *kobj) static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) { - struct device *dev = kobj_to_dev(kobj); + const struct device *dev = kobj_to_dev(kobj); if (dev->class && dev->class->get_ownership) dev->class->get_ownership(dev, uid, gid); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ae60c73babcc..3893b6517421 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -524,9 +524,9 @@ static int ib_device_uevent(struct device *device, return 0; } -static const void *net_namespace(struct device *d) +static const void *net_namespace(const struct device *d) { - struct ib_core_device *coredev = + const struct ib_core_device *coredev = container_of(d, struct ib_core_device, dev); return read_pnet(&coredev->rdma_net); diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index cbabca167a07..dde272586e80 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -30,9 +30,9 @@ static dev_t ipvtap_major; static struct cdev ipvtap_cdev; -static const void *ipvtap_net_namespace(struct device *d) +static const void *ipvtap_net_namespace(const struct device *d) { - struct net_device *dev = to_net_dev(d->parent); + const struct net_device *dev = to_net_dev(d->parent); return dev_net(dev); } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d1f435788e90..031344239f27 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -35,9 +35,9 @@ struct macvtap_dev { */ static dev_t macvtap_major; -static const void *macvtap_net_namespace(struct device *d) +static const void *macvtap_net_namespace(const struct device *d) { - struct net_device *dev = to_net_dev(d->parent); + const struct net_device *dev = to_net_dev(d->parent); return dev_net(dev); } diff --git a/include/linux/device/class.h b/include/linux/device/class.h index e61ec5502019..20103e0b03c3 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -68,9 +68,9 @@ struct class { int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; - const void *(*namespace)(struct device *dev); + const void *(*namespace)(const struct device *dev); - void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid); + void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid); const struct dev_pm_ops *pm; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8409d41405df..a8c5a7cd9701 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1910,16 +1910,16 @@ static void netdev_release(struct device *d) netdev_freemem(dev); } -static const void *net_namespace(struct device *d) +static const void *net_namespace(const struct device *d) { - struct net_device *dev = to_net_dev(d); + const struct net_device *dev = to_net_dev(d); return dev_net(dev); } -static void net_get_ownership(struct device *d, kuid_t *uid, kgid_t *gid) +static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid) { - struct net_device *dev = to_net_dev(d); + const struct net_device *dev = to_net_dev(d); const struct net *net = dev_net(dev); net_ns_get_ownership(net, uid, gid); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0c3f05c9be27..cdb638647e0b 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -148,7 +148,7 @@ static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume); #define WIPHY_PM_OPS NULL #endif -static const void *wiphy_namespace(struct device *d) +static const void *wiphy_namespace(const struct device *d) { struct wiphy *wiphy = container_of(d, struct wiphy, dev); From 0b9ca98b722969660ad98b39f766a561ccb39f5f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:07 +0000 Subject: [PATCH 1167/4122] perf/x86/core: Zero @lbr instead of returning -1 in x86_perf_get_lbr() stub Drop the return value from x86_perf_get_lbr() and have the stub zero out the @lbr structure instead of returning -1 to indicate "no LBR support". KVM doesn't actually check the return value, and instead subtly relies on zeroing the number of LBRs in intel_pmu_init(). Formalize "nr=0 means unsupported" so that KVM doesn't need to add a pointless check on the return value to fix KVM's benign bug. Note, the stub is necessary even though KVM x86 selects PERF_EVENTS and the caller exists only when CONFIG_KVM_INTEL=y. Despite the name, KVM_INTEL doesn't strictly require CPU_SUP_INTEL, it can be built with any of INTEL || CENTAUR || ZHAOXIN CPUs. Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/events/intel/lbr.c | 6 +----- arch/x86/include/asm/perf_event.h | 6 +++--- arch/x86/kvm/vmx/capabilities.h | 3 ++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8259d725054d..4dbde69c423b 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1603,10 +1603,8 @@ clear_arch_lbr: * x86_perf_get_lbr - get the LBR records information * * @lbr: the caller's memory to store the LBR records information - * - * Returns: 0 indicates the LBR info has been successfully obtained */ -int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { int lbr_fmt = x86_pmu.intel_cap.lbr_format; @@ -1614,8 +1612,6 @@ int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; - - return 0; } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 9ac46dbe57d4..5d0f6891ae61 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -543,12 +543,12 @@ static inline void perf_check_microcode(void) { } #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); -extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr); +extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr); #else struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); -static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { - return -1; + memset(lbr, 0, sizeof(*lbr)); } #endif diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 07254314f3dd..479124e49bbd 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -407,7 +407,8 @@ static inline u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr) + x86_perf_get_lbr(&lbr); + if (lbr.nr) perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; if (vmx_pebs_supported()) { From bec46859fb9d797a21c983100b1f425bebe89747 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:11 +0000 Subject: [PATCH 1168/4122] KVM: x86: Track supported PERF_CAPABILITIES in kvm_caps Track KVM's supported PERF_CAPABILITIES in kvm_caps instead of computing the supported capabilities on the fly every time. Using kvm_caps will also allow for future cleanups as the kvm_caps values can be used directly in common x86 code. Signed-off-by: Sean Christopherson Acked-by: Like Xu Message-Id: <20221006000314.73240-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/capabilities.h | 25 ------------------------ arch/x86/kvm/vmx/pmu_intel.c | 2 +- arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++---- arch/x86/kvm/x86.h | 1 + 5 files changed, 34 insertions(+), 30 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9f88c8e6766e..d15f1934c904 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2714,6 +2714,7 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: + msr->data = kvm_caps.supported_perf_cap; return 0; default: return KVM_MSR_RET_INVALID; @@ -4865,6 +4866,7 @@ static __init void svm_set_cpu_caps(void) { kvm_set_cpu_caps(); + kvm_caps.supported_perf_cap = 0; kvm_caps.supported_xss = 0; /* CPUID 0x80000001 and 0x8000000A (SVM features) */ diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 479124e49bbd..cd2ac9536c99 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -395,31 +395,6 @@ static inline bool vmx_pebs_supported(void) return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept; } -static inline u64 vmx_get_perf_capabilities(void) -{ - u64 perf_cap = PMU_CAP_FW_WRITES; - struct x86_pmu_lbr lbr; - u64 host_perf_cap = 0; - - if (!enable_pmu) - return 0; - - if (boot_cpu_has(X86_FEATURE_PDCM)) - rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - - x86_perf_get_lbr(&lbr); - if (lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; - - if (vmx_pebs_supported()) { - perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; - } - - return perf_cap; -} - static inline bool cpu_has_notify_vmexit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 10b33da9bd05..9fabfe71fd87 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -631,7 +631,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->fixed_counters[i].current_config = 0; } - vcpu->arch.perf_capabilities = vmx_get_perf_capabilities(); + vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; lbr_desc->records.nr = 0; lbr_desc->event = NULL; lbr_desc->msr_passthrough = false; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63247c57c72c..3fec43953051 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1850,7 +1850,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); case MSR_IA32_PERF_CAPABILITIES: - msr->data = vmx_get_perf_capabilities(); + msr->data = kvm_caps.supported_perf_cap; return 0; default: return KVM_MSR_RET_INVALID; @@ -2029,7 +2029,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; - if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && + if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; @@ -2342,14 +2342,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (data & PMU_CAP_LBR_FMT) { if ((data & PMU_CAP_LBR_FMT) != - (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)) + (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) return 1; if (!cpuid_model_is_consistent(vcpu)) return 1; } if (data & PERF_CAP_PEBS_FORMAT) { if ((data & PERF_CAP_PEBS_MASK) != - (vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK)) + (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK)) return 1; if (!guest_cpuid_has(vcpu, X86_FEATURE_DS)) return 1; @@ -7669,6 +7669,31 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_update_exception_bitmap(vcpu); } +static u64 vmx_get_perf_capabilities(void) +{ + u64 perf_cap = PMU_CAP_FW_WRITES; + struct x86_pmu_lbr lbr; + u64 host_perf_cap = 0; + + if (!enable_pmu) + return 0; + + if (boot_cpu_has(X86_FEATURE_PDCM)) + rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); + + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + + if (vmx_pebs_supported()) { + perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; + if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) + perf_cap &= ~PERF_CAP_PEBS_BASELINE; + } + + return perf_cap; +} + static __init void vmx_set_cpu_caps(void) { kvm_set_cpu_caps(); @@ -7691,6 +7716,7 @@ static __init void vmx_set_cpu_caps(void) if (!enable_pmu) kvm_cpu_cap_clear(X86_FEATURE_PDCM); + kvm_caps.supported_perf_cap = vmx_get_perf_capabilities(); if (!enable_sgx) { kvm_cpu_cap_clear(X86_FEATURE_SGX); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 829d3134c1eb..9de72586f406 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -27,6 +27,7 @@ struct kvm_caps { u64 supported_mce_cap; u64 supported_xcr0; u64 supported_xss; + u64 supported_perf_cap; }; void kvm_spurious_fault(void); From 6c6f82bea96fddb96898edbbee248ad362b768f4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:12 +0000 Subject: [PATCH 1169/4122] KVM: x86: Init vcpu->arch.perf_capabilities in common x86 code Initialize vcpu->arch.perf_capabilities in x86's kvm_arch_vcpu_create() instead of deferring initialization to vendor code. For better or worse, common x86 handles reads and writes to the MSR, and so common x86 should also handle initializing the MSR. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 1 - arch/x86/kvm/x86.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9fabfe71fd87..b7c3a9874a93 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -631,7 +631,6 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->fixed_counters[i].current_config = 0; } - vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; lbr_desc->records.nr = 0; lbr_desc->event = NULL; lbr_desc->msr_passthrough = false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecea83f0da49..6ce7c80180d2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11893,6 +11893,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; kvm_async_pf_hash_reset(vcpu); + + vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; From 5fe9805dc2f58bebb3bf5dd298559c4bad5f0448 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:13 +0000 Subject: [PATCH 1170/4122] KVM: x86: Handle PERF_CAPABILITIES in common x86's kvm_get_msr_feature() Handle PERF_CAPABILITIES directly in kvm_get_msr_feature() now that the supported value is available in kvm_caps. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 --- arch/x86/kvm/vmx/vmx.c | 3 --- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d15f1934c904..3fc8e4999891 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2713,9 +2713,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; break; - case MSR_IA32_PERF_CAPABILITIES: - msr->data = kvm_caps.supported_perf_cap; - return 0; default: return KVM_MSR_RET_INVALID; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3fec43953051..7a7e14d4d78c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1849,9 +1849,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); - case MSR_IA32_PERF_CAPABILITIES: - msr->data = kvm_caps.supported_perf_cap; - return 0; default: return KVM_MSR_RET_INVALID; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ce7c80180d2..6f8a370d2b1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1648,6 +1648,9 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) case MSR_IA32_ARCH_CAPABILITIES: msr->data = kvm_get_arch_capabilities(); break; + case MSR_IA32_PERF_CAPABILITIES: + msr->data = kvm_caps.supported_perf_cap; + break; case MSR_IA32_UCODE_REV: rdmsrl_safe(msr->index, &msr->data); break; From 686e0f0324f07ea1979462f659b24b1195996fe0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:14 +0000 Subject: [PATCH 1171/4122] KVM: x86: Directly query supported PERF_CAPABILITIES for WRMSR checks Use kvm_caps.supported_perf_cap directly instead of bouncing through kvm_get_msr_feature() when checking the incoming value for writes to PERF_CAPABILITIES. Note, kvm_get_msr_feature() is guaranteed to succeed when getting PERF_CAPABILITIES, i.e. dropping that check is a nop. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f8a370d2b1d..30a5365f4b32 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3563,20 +3563,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.arch_capabilities = data; break; - case MSR_IA32_PERF_CAPABILITIES: { - struct kvm_msr_entry msr_ent = {.index = msr, .data = 0}; - + case MSR_IA32_PERF_CAPABILITIES: if (!msr_info->host_initiated) return 1; - if (kvm_get_msr_feature(&msr_ent)) - return 1; - if (data & ~msr_ent.data) + if (data & ~kvm_caps.supported_perf_cap) return 1; vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); return 0; - } case MSR_EFER: return set_efer(vcpu, msr_info); case MSR_K7_HWCR: From 0f9edb8cab29667ef5ac50736bb1f9a0557e7f94 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 13 Sep 2022 17:05:37 +0800 Subject: [PATCH 1172/4122] KVM: x86: remove obsolete kvm_mmu_gva_to_gpa_fetch() There's no caller. Remove it. Signed-off-by: Miaohe Lin Reviewed-by: Sean Christopherson Message-Id: <20220913090537.25195-1-linmiaohe@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/x86.c | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f05ebaa26f0f..1723a357190d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1909,8 +1909,6 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, - struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 30a5365f4b32..3c63ba5512e9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7154,16 +7154,6 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, } EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); - gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, - struct x86_exception *exception) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; - access |= PFERR_FETCH_MASK; - return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); -} - gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) { From fa3e42037ef53c2aea85faf51d6f947e16dd8fc5 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 13 Sep 2022 17:17:25 +0800 Subject: [PATCH 1173/4122] KVM: x86/mmu: fix some comment typos Fix some typos in comments. Signed-off-by: Miaohe Lin Reviewed-by: Sean Christopherson Message-Id: <20220913091725.35953-1-linmiaohe@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/spte.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f81539061d6..639ac64a4e8c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1894,7 +1894,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) if (sp->role.invalid) return true; - /* TDP MMU pages due not use the MMU generation. */ + /* TDP MMU pages do not use the MMU generation. */ return !sp->tdp_mmu_page && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 7670c13ce251..79560d77aa4c 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -188,7 +188,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * should not modify the SPTE. * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on - * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF + * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF * vulnerability. Use only low bits to avoid 64-bit immediates. * * Only used by the TDP MMU. From 3adbdf81038801ee84367845e78bb1a3b36bde39 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 13 Sep 2022 16:54:52 +0800 Subject: [PATCH 1174/4122] KVM: x86/mmu: use helper macro SPTE_ENT_PER_PAGE Use helper macro SPTE_ENT_PER_PAGE to get the number of spte entries per page. Minor readability improvement. Signed-off-by: Miaohe Lin Reviewed-by: Sean Christopherson Message-Id: <20220913085452.25561-1-linmiaohe@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 639ac64a4e8c..2640871bdcf1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1645,7 +1645,7 @@ static int is_empty_shadow_page(u64 *spt) u64 *pos; u64 *end; - for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) + for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++) if (is_shadow_present_pte(*pos)) { printk(KERN_ERR "%s: %p %llx\n", __func__, pos, *pos); From bb5c8abea09453d8b137e6613980b8e257485868 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 30 Aug 2022 15:52:09 -0700 Subject: [PATCH 1175/4122] KVM: x86: Insert "AMD" in KVM_X86_FEATURE_PSFD Intel and AMD have separate CPUID bits for each SPEC_CTRL bit. In the case of every bit other than PFSD, the Intel CPUID bit has no vendor name qualifier, but the AMD CPUID bit does. For consistency, rename KVM_X86_FEATURE_PSFD to KVM_X86_FEATURE_AMD_PSFD. No functional change intended. Signed-off-by: Jim Mattson Cc: Babu Moger Message-Id: <20220830225210.2381310-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62bc7a01cecc..6b5912578edd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -62,7 +62,7 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) * This one is tied to SSB in the user API, and not * visible in /proc/cpuinfo. */ -#define KVM_X86_FEATURE_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define F feature_bit #define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0) @@ -694,7 +694,7 @@ void kvm_set_cpu_caps(void) F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - __feature_bit(KVM_X86_FEATURE_PSFD) + __feature_bit(KVM_X86_FEATURE_AMD_PSFD) ); /* From 00009406f0dbc53b95b9062c0cc297d6893ff394 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Thu, 20 Oct 2022 23:01:13 -0300 Subject: [PATCH 1176/4122] x86/kvm: Remove unused virt to phys translation in kvm_guest_cpu_init() Presumably, this was introduced due to a conflict resolution with commit ef68017eb570 ("x86/kvm: Handle async page faults directly through do_page_fault()"), given that the last posted version [1] of the blamed commit was not based on the aforementioned commit. [1] https://lore.kernel.org/kvm/20200525144125.143875-9-vkuznets@redhat.com/ Fixes: b1d405751cd5 ("KVM: x86: Switch KVM guest to using interrupts for page ready APF delivery") Signed-off-by: Rafael Mendonca Message-Id: <20221021020113.922027-1-rafaelmendsr@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d4e48b4a438b..cf886f86038a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -349,7 +349,7 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { - u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); + u64 pa; WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); From 07a368b3f55a79d33cad113d506b279e04fd2a00 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:27 +0300 Subject: [PATCH 1177/4122] bug: introduce ASSERT_STRUCT_OFFSET ASSERT_STRUCT_OFFSET allows to assert during the build of the kernel that a field in a struct have an expected offset. KVM used to have such macro, but there is almost nothing KVM specific in it so move it to build_bug.h, so that it can be used in other places in KVM. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmcs12.h | 5 ++--- include/linux/build_bug.h | 9 +++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 746129ddd5ae..01936013428b 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -208,9 +208,8 @@ struct __packed vmcs12 { /* * For save/restore compatibility, the vmcs12 field offsets must not change. */ -#define CHECK_OFFSET(field, loc) \ - BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \ - "Offset of " #field " in struct vmcs12 has changed.") +#define CHECK_OFFSET(field, loc) \ + ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc) static inline void vmx_check_vmcs12_offsets(void) { diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index e3a0be2c90ad..3aa3640f8c18 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,4 +77,13 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) + +/* + * Compile time check that field has an expected offset + */ +#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \ + BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \ + "Offset of " #field " in " #type " has changed.") + + #endif /* _LINUX_BUILD_BUG_H */ From d08b48585309247d4d28051dd7a315eef5d1db26 Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Mon, 24 Oct 2022 11:44:48 -0500 Subject: [PATCH 1178/4122] KVM: SVM: Name and check reserved fields with structs offset Rename reserved fields on all structs in arch/x86/include/asm/svm.h following their offset within the structs. Include compile time checks for this in the same place where other BUILD_BUG_ON for the structs are. This also solves that fields of struct sev_es_save_area are named by their order of appearance, but right now they jump from reserved_5 to reserved_7. Link: https://lkml.org/lkml/2022/10/22/376 Signed-off-by: Carlos Bilbao Message-Id: <20221024164448.203351-1-carlos.bilbao@amd.com> [Use ASSERT_STRUCT_OFFSET + fix a couple wrong offsets. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 93 ++++++++++++++++++++++++++------------ arch/x86/kvm/svm/sev.c | 2 +- 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0361626841bc..4352b46dd20c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -293,12 +293,13 @@ struct vmcb_save_area { struct vmcb_seg ldtr; struct vmcb_seg idtr; struct vmcb_seg tr; - u8 reserved_1[42]; + /* Reserved fields are named following their struct offset */ + u8 reserved_0xa0[42]; u8 vmpl; u8 cpl; - u8 reserved_2[4]; + u8 reserved_0xcc[4]; u64 efer; - u8 reserved_3[112]; + u8 reserved_0xd8[112]; u64 cr4; u64 cr3; u64 cr0; @@ -306,7 +307,7 @@ struct vmcb_save_area { u64 dr6; u64 rflags; u64 rip; - u8 reserved_4[88]; + u8 reserved_0x180[88]; u64 rsp; u64 s_cet; u64 ssp; @@ -321,14 +322,14 @@ struct vmcb_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_5[32]; + u8 reserved_0x248[32]; u64 g_pat; u64 dbgctl; u64 br_from; u64 br_to; u64 last_excp_from; u64 last_excp_to; - u8 reserved_6[72]; + u8 reserved_0x298[72]; u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ } __packed; @@ -349,12 +350,12 @@ struct sev_es_save_area { u64 vmpl2_ssp; u64 vmpl3_ssp; u64 u_cet; - u8 reserved_1[2]; + u8 reserved_0xc8[2]; u8 vmpl; u8 cpl; - u8 reserved_2[4]; + u8 reserved_0xcc[4]; u64 efer; - u8 reserved_3[104]; + u8 reserved_0xd8[104]; u64 xss; u64 cr4; u64 cr3; @@ -371,7 +372,7 @@ struct sev_es_save_area { u64 dr1_addr_mask; u64 dr2_addr_mask; u64 dr3_addr_mask; - u8 reserved_4[24]; + u8 reserved_0x1c0[24]; u64 rsp; u64 s_cet; u64 ssp; @@ -386,21 +387,21 @@ struct sev_es_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_5[32]; + u8 reserved_0x248[32]; u64 g_pat; u64 dbgctl; u64 br_from; u64 br_to; u64 last_excp_from; u64 last_excp_to; - u8 reserved_7[80]; + u8 reserved_0x298[80]; u32 pkru; - u8 reserved_8[20]; - u64 reserved_9; /* rax already available at 0x01f8 */ + u32 tsc_aux; + u8 reserved_0x2f0[24]; u64 rcx; u64 rdx; u64 rbx; - u64 reserved_10; /* rsp already available at 0x01d8 */ + u64 reserved_0x320; /* rsp already available at 0x01d8 */ u64 rbp; u64 rsi; u64 rdi; @@ -412,7 +413,7 @@ struct sev_es_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_11[16]; + u8 reserved_0x380[16]; u64 guest_exit_info_1; u64 guest_exit_info_2; u64 guest_exit_int_info; @@ -425,7 +426,7 @@ struct sev_es_save_area { u64 pcpu_id; u64 event_inj; u64 xcr0; - u8 reserved_12[16]; + u8 reserved_0x3f0[16]; /* Floating point area */ u64 x87_dp; @@ -443,23 +444,23 @@ struct sev_es_save_area { } __packed; struct ghcb_save_area { - u8 reserved_1[203]; + u8 reserved_0x0[203]; u8 cpl; - u8 reserved_2[116]; + u8 reserved_0xcc[116]; u64 xss; - u8 reserved_3[24]; + u8 reserved_0x148[24]; u64 dr7; - u8 reserved_4[16]; + u8 reserved_0x168[16]; u64 rip; - u8 reserved_5[88]; + u8 reserved_0x180[88]; u64 rsp; - u8 reserved_6[24]; + u8 reserved_0x1e0[24]; u64 rax; - u8 reserved_7[264]; + u8 reserved_0x200[264]; u64 rcx; u64 rdx; u64 rbx; - u8 reserved_8[8]; + u8 reserved_0x320[8]; u64 rbp; u64 rsi; u64 rdi; @@ -471,12 +472,12 @@ struct ghcb_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_9[16]; + u8 reserved_0x380[16]; u64 sw_exit_code; u64 sw_exit_info_1; u64 sw_exit_info_2; u64 sw_scratch; - u8 reserved_10[56]; + u8 reserved_0x3b0[56]; u64 xcr0; u8 valid_bitmap[16]; u64 x87_state_gpa; @@ -490,7 +491,7 @@ struct ghcb { u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; - u8 reserved_1[10]; + u8 reserved_0xff0[10]; u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ u32 ghcb_usage; } __packed; @@ -502,6 +503,9 @@ struct ghcb { #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE +#define BUILD_BUG_RESERVED_OFFSET(x, y) \ + ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y) + static inline void __unused_size_checks(void) { BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); @@ -509,6 +513,39 @@ static inline void __unused_size_checks(void) BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); + + /* Check offsets of reserved fields */ + + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298); + + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0); + + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0); + + BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0); } struct vmcb { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index efaaef2b7ae1..69dbf17f0d6a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2648,7 +2648,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) ghcb_scratch_beg = control->ghcb_gpa + offsetof(struct ghcb, shared_buffer); ghcb_scratch_end = control->ghcb_gpa + - offsetof(struct ghcb, reserved_1); + offsetof(struct ghcb, reserved_0xff0); /* * If the scratch area begins within the GHCB, it must be From b0b42197b5c6f0d9447e5b710d64c671be8deec1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:09 -0400 Subject: [PATCH 1179/4122] KVM: x86: start moving SMM-related functions to new files Create a new header and source with code related to system management mode emulation. Entry and exit will move there too; for now, opportunistically rename put_smstate to PUT_SMSTATE while moving it to smm.h, and adjust the SMM state saving code. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-2-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 -- arch/x86/kvm/Makefile | 1 + arch/x86/kvm/emulate.c | 1 + arch/x86/kvm/kvm_cache_regs.h | 5 -- arch/x86/kvm/lapic.c | 8 +- arch/x86/kvm/lapic.h | 2 +- arch/x86/kvm/mmu/mmu.c | 1 + arch/x86/kvm/smm.c | 37 ++++++++ arch/x86/kvm/smm.h | 25 ++++++ arch/x86/kvm/svm/nested.c | 1 + arch/x86/kvm/svm/svm.c | 5 +- arch/x86/kvm/vmx/nested.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 148 ++++++++++++-------------------- 14 files changed, 132 insertions(+), 110 deletions(-) create mode 100644 arch/x86/kvm/smm.c create mode 100644 arch/x86/kvm/smm.h diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1723a357190d..c70e84e69cca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2087,12 +2087,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #endif } -#define put_smstate(type, buf, offset, val) \ - *(type *)((buf) + (offset) - 0x7e00) = val - -#define GET_SMSTATE(type, buf, offset) \ - (*(type *)((buf) + (offset) - 0x7e00)) - int kvm_cpu_dirty_log_size(void); int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f453a0f96e24..b584cb0e06bd 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -20,6 +20,7 @@ endif kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o kvm-$(CONFIG_KVM_XEN) += xen.o +kvm-y += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a43261d25a2..eea29aac83bd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -30,6 +30,7 @@ #include "tss.h" #include "mmu.h" #include "pmu.h" +#include "smm.h" /* * Operand types diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 3febc342360c..c09174f73a34 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu) return vcpu->arch.hflags & HF_GUEST_MASK; } -static inline bool is_smm(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.hflags & HF_SMM_MASK; -} - #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d7639d126e6c..1bb63746e991 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -42,6 +42,7 @@ #include "x86.h" #include "cpuid.h" #include "hyperv.h" +#include "smm.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -1170,9 +1171,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_SMI: - result = 1; - kvm_make_request(KVM_REQ_SMI, vcpu); - kvm_vcpu_kick(vcpu); + if (!kvm_inject_smi(vcpu)) { + kvm_vcpu_kick(vcpu); + result = 1; + } break; case APIC_DM_NMI: diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a5ac4a5a5179..28e3769066e2 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -7,7 +7,7 @@ #include #include "hyperv.h" -#include "kvm_cache_regs.h" +#include "smm.h" #define KVM_APIC_INIT 0 #define KVM_APIC_SIPI 1 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2640871bdcf1..f8c92a4a35fa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -22,6 +22,7 @@ #include "tdp_mmu.h" #include "x86.h" #include "kvm_cache_regs.h" +#include "smm.h" #include "kvm_emulate.h" #include "cpuid.h" #include "spte.h" diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c new file mode 100644 index 000000000000..b91c48d91f6e --- /dev/null +++ b/arch/x86/kvm/smm.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include "x86.h" +#include "kvm_cache_regs.h" +#include "kvm_emulate.h" +#include "smm.h" +#include "trace.h" + +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) +{ + trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); + + if (entering_smm) { + vcpu->arch.hflags |= HF_SMM_MASK; + } else { + vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); + + /* Process a latched INIT or SMI, if any. */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + + /* + * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, + * on SMM exit we still need to reload them from + * guest memory + */ + vcpu->arch.pdptrs_from_userspace = false; + } + + kvm_mmu_reset_context(vcpu); +} + +void process_smi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.smi_pending = true; + kvm_make_request(KVM_REQ_EVENT, vcpu); +} diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h new file mode 100644 index 000000000000..d85d4ccd32dd --- /dev/null +++ b/arch/x86/kvm/smm.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_KVM_SMM_H +#define ASM_KVM_SMM_H + +#define GET_SMSTATE(type, buf, offset) \ + (*(type *)((buf) + (offset) - 0x7e00)) + +#define PUT_SMSTATE(type, buf, offset, val) \ + *(type *)((buf) + (offset) - 0x7e00) = val + +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) +{ + kvm_make_request(KVM_REQ_SMI, vcpu); + return 0; +} + +static inline bool is_smm(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hflags & HF_SMM_MASK; +} + +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); +void process_smi(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..cc0fd75f7cba 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -25,6 +25,7 @@ #include "trace.h" #include "mmu.h" #include "x86.h" +#include "smm.h" #include "cpuid.h" #include "lapic.h" #include "svm.h" diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3fc8e4999891..3bb07ec78985 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -6,6 +6,7 @@ #include "mmu.h" #include "kvm_cache_regs.h" #include "x86.h" +#include "smm.h" #include "cpuid.h" #include "pmu.h" @@ -4407,9 +4408,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; /* FED8h - SVM Guest */ - put_smstate(u64, smstate, 0x7ed8, 1); + PUT_SMSTATE(u64, smstate, 0x7ed8, 1); /* FEE0h - SVM Guest VMCB Physical Address */ - put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); + PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..61a2e551640a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -16,6 +16,7 @@ #include "trace.h" #include "vmx.h" #include "x86.h" +#include "smm.h" static bool __read_mostly enable_shadow_vmcs = 1; module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7a7e14d4d78c..49065614a3db 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -66,6 +66,7 @@ #include "vmcs12.h" #include "vmx.h" #include "x86.h" +#include "smm.h" MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c63ba5512e9..d936b0f15d7d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -30,6 +30,7 @@ #include "hyperv.h" #include "lapic.h" #include "xen.h" +#include "smm.h" #include #include @@ -119,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); -static void process_smi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); @@ -4889,13 +4889,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) return 0; } -static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu) -{ - kvm_make_request(KVM_REQ_SMI, vcpu); - - return 0; -} - static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -5118,8 +5111,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm); - static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -5572,7 +5563,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SMI: { - r = kvm_vcpu_ioctl_smi(vcpu); + r = kvm_inject_smi(vcpu); break; } case KVM_SET_CPUID: { @@ -8569,29 +8560,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, static int complete_emulated_mmio(struct kvm_vcpu *vcpu); static int complete_emulated_pio(struct kvm_vcpu *vcpu); -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) -{ - trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); - - if (entering_smm) { - vcpu->arch.hflags |= HF_SMM_MASK; - } else { - vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); - - /* Process a latched INIT or SMI, if any. */ - kvm_make_request(KVM_REQ_EVENT, vcpu); - - /* - * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, - * on SMM exit we still need to reload them from - * guest memory - */ - vcpu->arch.pdptrs_from_userspace = false; - } - - kvm_mmu_reset_context(vcpu); -} - static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, unsigned long *db) { @@ -10088,16 +10056,16 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) int offset; kvm_get_segment(vcpu, &seg, n); - put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); + PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector); if (n < 3) offset = 0x7f84 + n * 12; else offset = 0x7f2c + (n - 3) * 12; - put_smstate(u32, buf, offset + 8, seg.base); - put_smstate(u32, buf, offset + 4, seg.limit); - put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg)); + PUT_SMSTATE(u32, buf, offset + 8, seg.base); + PUT_SMSTATE(u32, buf, offset + 4, seg.limit); + PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg)); } #ifdef CONFIG_X86_64 @@ -10111,10 +10079,10 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) offset = 0x7e00 + n * 16; flags = enter_smm_get_segment_flags(&seg) >> 8; - put_smstate(u16, buf, offset, seg.selector); - put_smstate(u16, buf, offset + 2, flags); - put_smstate(u32, buf, offset + 4, seg.limit); - put_smstate(u64, buf, offset + 8, seg.base); + PUT_SMSTATE(u16, buf, offset, seg.selector); + PUT_SMSTATE(u16, buf, offset + 2, flags); + PUT_SMSTATE(u32, buf, offset + 4, seg.limit); + PUT_SMSTATE(u64, buf, offset + 8, seg.base); } #endif @@ -10125,47 +10093,47 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) unsigned long val; int i; - put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); - put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); - put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); - put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); for (i = 0; i < 8; i++) - put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); + PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); kvm_get_dr(vcpu, 6, &val); - put_smstate(u32, buf, 0x7fcc, (u32)val); + PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val); kvm_get_dr(vcpu, 7, &val); - put_smstate(u32, buf, 0x7fc8, (u32)val); + PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val); kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - put_smstate(u32, buf, 0x7fc4, seg.selector); - put_smstate(u32, buf, 0x7f64, seg.base); - put_smstate(u32, buf, 0x7f60, seg.limit); - put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); + PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector); + PUT_SMSTATE(u32, buf, 0x7f64, seg.base); + PUT_SMSTATE(u32, buf, 0x7f60, seg.limit); + PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - put_smstate(u32, buf, 0x7fc0, seg.selector); - put_smstate(u32, buf, 0x7f80, seg.base); - put_smstate(u32, buf, 0x7f7c, seg.limit); - put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); + PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector); + PUT_SMSTATE(u32, buf, 0x7f80, seg.base); + PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit); + PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); static_call(kvm_x86_get_gdt)(vcpu, &dt); - put_smstate(u32, buf, 0x7f74, dt.address); - put_smstate(u32, buf, 0x7f70, dt.size); + PUT_SMSTATE(u32, buf, 0x7f74, dt.address); + PUT_SMSTATE(u32, buf, 0x7f70, dt.size); static_call(kvm_x86_get_idt)(vcpu, &dt); - put_smstate(u32, buf, 0x7f58, dt.address); - put_smstate(u32, buf, 0x7f54, dt.size); + PUT_SMSTATE(u32, buf, 0x7f58, dt.address); + PUT_SMSTATE(u32, buf, 0x7f54, dt.size); for (i = 0; i < 6; i++) enter_smm_save_seg_32(vcpu, buf, i); - put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); + PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); /* revision id */ - put_smstate(u32, buf, 0x7efc, 0x00020000); - put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000); + PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase); } #ifdef CONFIG_X86_64 @@ -10177,46 +10145,46 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) int i; for (i = 0; i < 16; i++) - put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); + PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); - put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); - put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu)); + PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); kvm_get_dr(vcpu, 6, &val); - put_smstate(u64, buf, 0x7f68, val); + PUT_SMSTATE(u64, buf, 0x7f68, val); kvm_get_dr(vcpu, 7, &val); - put_smstate(u64, buf, 0x7f60, val); + PUT_SMSTATE(u64, buf, 0x7f60, val); - put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); - put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); - put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); - put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); + PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase); /* revision id */ - put_smstate(u32, buf, 0x7efc, 0x00020064); + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064); - put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer); + PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer); kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - put_smstate(u16, buf, 0x7e90, seg.selector); - put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); - put_smstate(u32, buf, 0x7e94, seg.limit); - put_smstate(u64, buf, 0x7e98, seg.base); + PUT_SMSTATE(u16, buf, 0x7e90, seg.selector); + PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); + PUT_SMSTATE(u32, buf, 0x7e94, seg.limit); + PUT_SMSTATE(u64, buf, 0x7e98, seg.base); static_call(kvm_x86_get_idt)(vcpu, &dt); - put_smstate(u32, buf, 0x7e84, dt.size); - put_smstate(u64, buf, 0x7e88, dt.address); + PUT_SMSTATE(u32, buf, 0x7e84, dt.size); + PUT_SMSTATE(u64, buf, 0x7e88, dt.address); kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - put_smstate(u16, buf, 0x7e70, seg.selector); - put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); - put_smstate(u32, buf, 0x7e74, seg.limit); - put_smstate(u64, buf, 0x7e78, seg.base); + PUT_SMSTATE(u16, buf, 0x7e70, seg.selector); + PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); + PUT_SMSTATE(u32, buf, 0x7e74, seg.limit); + PUT_SMSTATE(u64, buf, 0x7e78, seg.base); static_call(kvm_x86_get_gdt)(vcpu, &dt); - put_smstate(u32, buf, 0x7e64, dt.size); - put_smstate(u64, buf, 0x7e68, dt.address); + PUT_SMSTATE(u32, buf, 0x7e64, dt.size); + PUT_SMSTATE(u64, buf, 0x7e68, dt.address); for (i = 0; i < 6; i++) enter_smm_save_seg_64(vcpu, buf, i); @@ -10302,12 +10270,6 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } -static void process_smi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.smi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); -} - void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap) { From c53da4f3af6e613e82f88abc6eb988e44c5dadd7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:10 -0400 Subject: [PATCH 1180/4122] KVM: x86: move SMM entry to a new file Some users of KVM implement the UEFI variable store through a paravirtual device that does not require the "SMM lockbox" component of edk2, and would like to compile out system management mode. In preparation for that, move the SMM entry code out of x86.c and into a new file. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-3-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/smm.c | 235 +++++++++++++++++++++++++++++++ arch/x86/kvm/smm.h | 1 + arch/x86/kvm/x86.c | 239 +------------------------------- 4 files changed, 239 insertions(+), 237 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c70e84e69cca..612ef60631c1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1844,6 +1844,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index b91c48d91f6e..26a6859e421f 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -5,6 +5,7 @@ #include "kvm_cache_regs.h" #include "kvm_emulate.h" #include "smm.h" +#include "cpuid.h" #include "trace.h" void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) @@ -35,3 +36,237 @@ void process_smi(struct kvm_vcpu *vcpu) vcpu->arch.smi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); } + +static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) +{ + u32 flags = 0; + flags |= seg->g << 23; + flags |= seg->db << 22; + flags |= seg->l << 21; + flags |= seg->avl << 20; + flags |= seg->present << 15; + flags |= seg->dpl << 13; + flags |= seg->s << 12; + flags |= seg->type << 8; + return flags; +} + +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) +{ + struct kvm_segment seg; + int offset; + + kvm_get_segment(vcpu, &seg, n); + PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector); + + if (n < 3) + offset = 0x7f84 + n * 12; + else + offset = 0x7f2c + (n - 3) * 12; + + PUT_SMSTATE(u32, buf, offset + 8, seg.base); + PUT_SMSTATE(u32, buf, offset + 4, seg.limit); + PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg)); +} + +#ifdef CONFIG_X86_64 +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) +{ + struct kvm_segment seg; + int offset; + u16 flags; + + kvm_get_segment(vcpu, &seg, n); + offset = 0x7e00 + n * 16; + + flags = enter_smm_get_segment_flags(&seg) >> 8; + PUT_SMSTATE(u16, buf, offset, seg.selector); + PUT_SMSTATE(u16, buf, offset + 2, flags); + PUT_SMSTATE(u32, buf, offset + 4, seg.limit); + PUT_SMSTATE(u64, buf, offset + 8, seg.base); +} +#endif + +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) +{ + struct desc_ptr dt; + struct kvm_segment seg; + unsigned long val; + int i; + + PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); + PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); + + for (i = 0; i < 8; i++) + PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); + + kvm_get_dr(vcpu, 6, &val); + PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val); + kvm_get_dr(vcpu, 7, &val); + PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val); + + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); + PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector); + PUT_SMSTATE(u32, buf, 0x7f64, seg.base); + PUT_SMSTATE(u32, buf, 0x7f60, seg.limit); + PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); + + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); + PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector); + PUT_SMSTATE(u32, buf, 0x7f80, seg.base); + PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit); + PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); + PUT_SMSTATE(u32, buf, 0x7f74, dt.address); + PUT_SMSTATE(u32, buf, 0x7f70, dt.size); + + static_call(kvm_x86_get_idt)(vcpu, &dt); + PUT_SMSTATE(u32, buf, 0x7f58, dt.address); + PUT_SMSTATE(u32, buf, 0x7f54, dt.size); + + for (i = 0; i < 6; i++) + enter_smm_save_seg_32(vcpu, buf, i); + + PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); + + /* revision id */ + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000); + PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase); +} + +#ifdef CONFIG_X86_64 +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) +{ + struct desc_ptr dt; + struct kvm_segment seg; + unsigned long val; + int i; + + for (i = 0; i < 16; i++) + PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); + + PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu)); + PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); + + kvm_get_dr(vcpu, 6, &val); + PUT_SMSTATE(u64, buf, 0x7f68, val); + kvm_get_dr(vcpu, 7, &val); + PUT_SMSTATE(u64, buf, 0x7f60, val); + + PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); + PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); + + PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase); + + /* revision id */ + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064); + + PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer); + + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); + PUT_SMSTATE(u16, buf, 0x7e90, seg.selector); + PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); + PUT_SMSTATE(u32, buf, 0x7e94, seg.limit); + PUT_SMSTATE(u64, buf, 0x7e98, seg.base); + + static_call(kvm_x86_get_idt)(vcpu, &dt); + PUT_SMSTATE(u32, buf, 0x7e84, dt.size); + PUT_SMSTATE(u64, buf, 0x7e88, dt.address); + + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); + PUT_SMSTATE(u16, buf, 0x7e70, seg.selector); + PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); + PUT_SMSTATE(u32, buf, 0x7e74, seg.limit); + PUT_SMSTATE(u64, buf, 0x7e78, seg.base); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); + PUT_SMSTATE(u32, buf, 0x7e64, dt.size); + PUT_SMSTATE(u64, buf, 0x7e68, dt.address); + + for (i = 0; i < 6; i++) + enter_smm_save_seg_64(vcpu, buf, i); +} +#endif + +void enter_smm(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs, ds; + struct desc_ptr dt; + unsigned long cr0; + char buf[512]; + + memset(buf, 0, 512); +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + enter_smm_save_state_64(vcpu, buf); + else +#endif + enter_smm_save_state_32(vcpu, buf); + + /* + * Give enter_smm() a chance to make ISA-specific changes to the vCPU + * state (e.g. leave guest mode) after we've saved the state into the + * SMM state-save area. + */ + static_call(kvm_x86_enter_smm)(vcpu, buf); + + kvm_smm_changed(vcpu, true); + kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); + + if (static_call(kvm_x86_get_nmi_mask)(vcpu)) + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; + else + static_call(kvm_x86_set_nmi_mask)(vcpu, true); + + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0x8000); + + cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); + static_call(kvm_x86_set_cr0)(vcpu, cr0); + vcpu->arch.cr0 = cr0; + + static_call(kvm_x86_set_cr4)(vcpu, 0); + + /* Undocumented: IDT limit is set to zero on entry to SMM. */ + dt.address = dt.size = 0; + static_call(kvm_x86_set_idt)(vcpu, &dt); + + kvm_set_dr(vcpu, 7, DR7_FIXED_1); + + cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; + cs.base = vcpu->arch.smbase; + + ds.selector = 0; + ds.base = 0; + + cs.limit = ds.limit = 0xffffffff; + cs.type = ds.type = 0x3; + cs.dpl = ds.dpl = 0; + cs.db = ds.db = 0; + cs.s = ds.s = 1; + cs.l = ds.l = 0; + cs.g = ds.g = 1; + cs.avl = ds.avl = 0; + cs.present = ds.present = 1; + cs.unusable = ds.unusable = 0; + cs.padding = ds.padding = 0; + + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); + kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + static_call(kvm_x86_set_efer)(vcpu, 0); +#endif + + kvm_update_cpuid_runtime(vcpu); + kvm_mmu_reset_context(vcpu); +} diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index d85d4ccd32dd..aacc6dac2c99 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -20,6 +20,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu) } void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); +void enter_smm(struct kvm_vcpu *vcpu); void process_smi(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d936b0f15d7d..0730b16564f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -120,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); -static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); static int sync_regs(struct kvm_vcpu *vcpu); @@ -7108,8 +7107,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) return handled; } -static void kvm_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +void kvm_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) { static_call(kvm_x86_set_segment)(vcpu, var, seg); } @@ -10036,240 +10035,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } -static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) -{ - u32 flags = 0; - flags |= seg->g << 23; - flags |= seg->db << 22; - flags |= seg->l << 21; - flags |= seg->avl << 20; - flags |= seg->present << 15; - flags |= seg->dpl << 13; - flags |= seg->s << 12; - flags |= seg->type << 8; - return flags; -} - -static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) -{ - struct kvm_segment seg; - int offset; - - kvm_get_segment(vcpu, &seg, n); - PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - PUT_SMSTATE(u32, buf, offset + 8, seg.base); - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); - PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg)); -} - -#ifdef CONFIG_X86_64 -static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) -{ - struct kvm_segment seg; - int offset; - u16 flags; - - kvm_get_segment(vcpu, &seg, n); - offset = 0x7e00 + n * 16; - - flags = enter_smm_get_segment_flags(&seg) >> 8; - PUT_SMSTATE(u16, buf, offset, seg.selector); - PUT_SMSTATE(u16, buf, offset + 2, flags); - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); - PUT_SMSTATE(u64, buf, offset + 8, seg.base); -} -#endif - -static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) -{ - struct desc_ptr dt; - struct kvm_segment seg; - unsigned long val; - int i; - - PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); - - for (i = 0; i < 8; i++) - PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); - - kvm_get_dr(vcpu, 6, &val); - PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val); - kvm_get_dr(vcpu, 7, &val); - PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector); - PUT_SMSTATE(u32, buf, 0x7f64, seg.base); - PUT_SMSTATE(u32, buf, 0x7f60, seg.limit); - PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector); - PUT_SMSTATE(u32, buf, 0x7f80, seg.base); - PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit); - PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); - - static_call(kvm_x86_get_gdt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7f74, dt.address); - PUT_SMSTATE(u32, buf, 0x7f70, dt.size); - - static_call(kvm_x86_get_idt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7f58, dt.address); - PUT_SMSTATE(u32, buf, 0x7f54, dt.size); - - for (i = 0; i < 6; i++) - enter_smm_save_seg_32(vcpu, buf, i); - - PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); - - /* revision id */ - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000); - PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase); -} - -#ifdef CONFIG_X86_64 -static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) -{ - struct desc_ptr dt; - struct kvm_segment seg; - unsigned long val; - int i; - - for (i = 0; i < 16; i++) - PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); - - PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu)); - PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); - - kvm_get_dr(vcpu, 6, &val); - PUT_SMSTATE(u64, buf, 0x7f68, val); - kvm_get_dr(vcpu, 7, &val); - PUT_SMSTATE(u64, buf, 0x7f60, val); - - PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); - PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); - PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); - - PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase); - - /* revision id */ - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064); - - PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - PUT_SMSTATE(u16, buf, 0x7e90, seg.selector); - PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); - PUT_SMSTATE(u32, buf, 0x7e94, seg.limit); - PUT_SMSTATE(u64, buf, 0x7e98, seg.base); - - static_call(kvm_x86_get_idt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7e84, dt.size); - PUT_SMSTATE(u64, buf, 0x7e88, dt.address); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - PUT_SMSTATE(u16, buf, 0x7e70, seg.selector); - PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); - PUT_SMSTATE(u32, buf, 0x7e74, seg.limit); - PUT_SMSTATE(u64, buf, 0x7e78, seg.base); - - static_call(kvm_x86_get_gdt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7e64, dt.size); - PUT_SMSTATE(u64, buf, 0x7e68, dt.address); - - for (i = 0; i < 6; i++) - enter_smm_save_seg_64(vcpu, buf, i); -} -#endif - -static void enter_smm(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs, ds; - struct desc_ptr dt; - unsigned long cr0; - char buf[512]; - - memset(buf, 0, 512); -#ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - enter_smm_save_state_64(vcpu, buf); - else -#endif - enter_smm_save_state_32(vcpu, buf); - - /* - * Give enter_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. leave guest mode) after we've saved the state into the - * SMM state-save area. - */ - static_call(kvm_x86_enter_smm)(vcpu, buf); - - kvm_smm_changed(vcpu, true); - kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); - - if (static_call(kvm_x86_get_nmi_mask)(vcpu)) - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; - else - static_call(kvm_x86_set_nmi_mask)(vcpu, true); - - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0x8000); - - cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); - static_call(kvm_x86_set_cr0)(vcpu, cr0); - vcpu->arch.cr0 = cr0; - - static_call(kvm_x86_set_cr4)(vcpu, 0); - - /* Undocumented: IDT limit is set to zero on entry to SMM. */ - dt.address = dt.size = 0; - static_call(kvm_x86_set_idt)(vcpu, &dt); - - kvm_set_dr(vcpu, 7, DR7_FIXED_1); - - cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; - cs.base = vcpu->arch.smbase; - - ds.selector = 0; - ds.base = 0; - - cs.limit = ds.limit = 0xffffffff; - cs.type = ds.type = 0x3; - cs.dpl = ds.dpl = 0; - cs.db = ds.db = 0; - cs.s = ds.s = 1; - cs.l = ds.l = 0; - cs.g = ds.g = 1; - cs.avl = ds.avl = 0; - cs.present = ds.present = 1; - cs.unusable = ds.unusable = 0; - cs.padding = ds.padding = 0; - - kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); - kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); - -#ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - static_call(kvm_x86_set_efer)(vcpu, 0); -#endif - - kvm_update_cpuid_runtime(vcpu); - kvm_mmu_reset_context(vcpu); -} - void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap) { From f1554150d3c694e30e92c681c20ce9714cac3d42 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Oct 2022 06:01:26 -0400 Subject: [PATCH 1181/4122] KVM: x86: move SMM exit to a new file Some users of KVM implement the UEFI variable store through a paravirtual device that does not require the "SMM lockbox" component of edk2, and would like to compile out system management mode. In preparation for that, move the SMM exit code out of emulate.c and into a new file. The code is still written as a series of invocations of the emulator callbacks, but the two exiting_smm and leave_smm callbacks are merged into one, and all the code from em_rsm is now part of the callback. This removes all knowledge of the format of the SMM save state area from the emulator. Further patches will clean up the code and invoke KVM's own functions to access control registers, descriptor caches, etc. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-4-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 356 +------------------------------------ arch/x86/kvm/kvm_emulate.h | 34 +++- arch/x86/kvm/smm.c | 316 ++++++++++++++++++++++++++++++++ arch/x86/kvm/smm.h | 1 + arch/x86/kvm/x86.c | 14 -- 5 files changed, 351 insertions(+), 370 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index eea29aac83bd..5cc3efa0e21c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -30,7 +30,6 @@ #include "tss.h" #include "mmu.h" #include "pmu.h" -#include "smm.h" /* * Operand types @@ -243,37 +242,6 @@ enum x86_transfer_type { X86_TRANSFER_TASK_SWITCH, }; -static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) - nr &= NR_EMULATOR_GPRS - 1; - - if (!(ctxt->regs_valid & (1 << nr))) { - ctxt->regs_valid |= 1 << nr; - ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); - } - return ctxt->_regs[nr]; -} - -static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) - nr &= NR_EMULATOR_GPRS - 1; - - BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS); - BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS); - - ctxt->regs_valid |= 1 << nr; - ctxt->regs_dirty |= 1 << nr; - return &ctxt->_regs[nr]; -} - -static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - reg_read(ctxt, nr); - return reg_write(ctxt, nr); -} - static void writeback_registers(struct x86_emulate_ctxt *ctxt) { unsigned long dirty = ctxt->regs_dirty; @@ -2339,335 +2307,15 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) return rc; } -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) -{ -#ifdef CONFIG_X86_64 - return ctxt->ops->guest_has_long_mode(ctxt); -#else - return false; -#endif -} - -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) -{ - desc->g = (flags >> 23) & 1; - desc->d = (flags >> 22) & 1; - desc->l = (flags >> 21) & 1; - desc->avl = (flags >> 20) & 1; - desc->p = (flags >> 15) & 1; - desc->dpl = (flags >> 13) & 3; - desc->s = (flags >> 12) & 1; - desc->type = (flags >> 8) & 15; -} - -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - u32 base3; - - offset = 0x7e00 + n * 16; - - selector = GET_SMSTATE(u16, smstate, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - base3 = GET_SMSTATE(u32, smstate, offset + 12); - - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); - return X86EMUL_CONTINUE; -} -#endif - -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr3, u64 cr4) -{ - int bad; - u64 pcid; - - /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ - pcid = 0; - if (cr4 & X86_CR4_PCIDE) { - pcid = cr3 & 0xfff; - cr3 &= ~0xfff; - } - - bad = ctxt->ops->set_cr(ctxt, 3, cr3); - if (bad) - return X86EMUL_UNHANDLEABLE; - - /* - * First enable PAE, long mode needs it before CR0.PG = 1 is set. - * Then enable protected mode. However, PCID cannot be enabled - * if EFER.LMA=0, so set it separately. - */ - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - if (bad) - return X86EMUL_UNHANDLEABLE; - - bad = ctxt->ops->set_cr(ctxt, 0, cr0); - if (bad) - return X86EMUL_UNHANDLEABLE; - - if (cr4 & X86_CR4_PCIDE) { - bad = ctxt->ops->set_cr(ctxt, 4, cr4); - if (bad) - return X86EMUL_UNHANDLEABLE; - if (pcid) { - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); - if (bad) - return X86EMUL_UNHANDLEABLE; - } - - } - - return X86EMUL_CONTINUE; -} - -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u16 selector; - u32 val, cr0, cr3, cr4; - int i; - - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); - - for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); - - val = GET_SMSTATE(u32, smstate, 0x7fcc); - - if (ctxt->ops->set_dr(ctxt, 6, val)) - return X86EMUL_UNHANDLEABLE; - - val = GET_SMSTATE(u32, smstate, 0x7fc8); - - if (ctxt->ops->set_dr(ctxt, 7, val)) - return X86EMUL_UNHANDLEABLE; - - selector = GET_SMSTATE(u32, smstate, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); - - selector = GET_SMSTATE(u32, smstate, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f74); - dt.size = GET_SMSTATE(u32, smstate, 0x7f70); - ctxt->ops->set_gdt(ctxt, &dt); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f58); - dt.size = GET_SMSTATE(u32, smstate, 0x7f54); - ctxt->ops->set_idt(ctxt, &dt); - - for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); - - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); -} - -#ifdef CONFIG_X86_64 -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u64 val, cr0, cr3, cr4; - u32 base3; - u16 selector; - int i, r; - - for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); - - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - - val = GET_SMSTATE(u64, smstate, 0x7f68); - - if (ctxt->ops->set_dr(ctxt, 6, val)) - return X86EMUL_UNHANDLEABLE; - - val = GET_SMSTATE(u64, smstate, 0x7f60); - - if (ctxt->ops->set_dr(ctxt, 7, val)) - return X86EMUL_UNHANDLEABLE; - - cr0 = GET_SMSTATE(u64, smstate, 0x7f58); - cr3 = GET_SMSTATE(u64, smstate, 0x7f50); - cr4 = GET_SMSTATE(u64, smstate, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); - val = GET_SMSTATE(u64, smstate, 0x7ed0); - - if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) - return X86EMUL_UNHANDLEABLE; - - selector = GET_SMSTATE(u32, smstate, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); - base3 = GET_SMSTATE(u32, smstate, 0x7e9c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e84); - dt.address = GET_SMSTATE(u64, smstate, 0x7e88); - ctxt->ops->set_idt(ctxt, &dt); - - selector = GET_SMSTATE(u32, smstate, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); - base3 = GET_SMSTATE(u32, smstate, 0x7e7c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e64); - dt.address = GET_SMSTATE(u64, smstate, 0x7e68); - ctxt->ops->set_gdt(ctxt, &dt); - - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); - if (r != X86EMUL_CONTINUE) - return r; - - for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - return X86EMUL_CONTINUE; -} -#endif - static int em_rsm(struct x86_emulate_ctxt *ctxt) { - unsigned long cr0, cr4, efer; - char buf[512]; - u64 smbase; - int ret; - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); - smbase = ctxt->ops->get_smbase(ctxt); + if (ctxt->ops->leave_smm(ctxt)) + ctxt->ops->triple_fault(ctxt); - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); - if (ret != X86EMUL_CONTINUE) - return X86EMUL_UNHANDLEABLE; - - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); - - ctxt->ops->exiting_smm(ctxt); - - /* - * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU - * supports long mode. - */ - if (emulator_has_longmode(ctxt)) { - struct desc_struct cs_desc; - - /* Zero CR4.PCIDE before CR0.PG. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PCIDE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - - /* A 32-bit code segment is required to clear EFER.LMA. */ - memset(&cs_desc, 0, sizeof(cs_desc)); - cs_desc.type = 0xb; - cs_desc.s = cs_desc.g = cs_desc.p = 1; - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); - } - - /* For the 64-bit case, this will clear EFER.LMA. */ - cr0 = ctxt->ops->get_cr(ctxt, 0); - if (cr0 & X86_CR0_PE) - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - - if (emulator_has_longmode(ctxt)) { - /* Clear CR4.PAE before clearing EFER.LME. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); - } - - /* - * Give leave_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. enter guest mode) before loading state from the SMM - * state-save area. - */ - if (ctxt->ops->leave_smm(ctxt, buf)) - goto emulate_shutdown; - -#ifdef CONFIG_X86_64 - if (emulator_has_longmode(ctxt)) - ret = rsm_load_state_64(ctxt, buf); - else -#endif - ret = rsm_load_state_32(ctxt, buf); - - if (ret != X86EMUL_CONTINUE) - goto emulate_shutdown; - - /* - * Note, the ctxt->ops callbacks are responsible for handling side - * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID - * runtime updates, etc... If that changes, e.g. this flow is moved - * out of the emulator to make it look more like enter_smm(), then - * those side effects need to be explicitly handled for both success - * and shutdown. - */ return emulator_recalc_and_set_mode(ctxt); - -emulate_shutdown: - ctxt->ops->triple_fault(ctxt); - return X86EMUL_CONTINUE; } static void diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 89246446d6aa..d7afbc448dd2 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -234,8 +234,7 @@ struct x86_emulate_ops { void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); - void (*exiting_smm)(struct x86_emulate_ctxt *ctxt); - int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate); + int (*leave_smm)(struct x86_emulate_ctxt *ctxt); void (*triple_fault)(struct x86_emulate_ctxt *ctxt); int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); }; @@ -526,4 +525,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); +static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) + nr &= NR_EMULATOR_GPRS - 1; + + if (!(ctxt->regs_valid & (1 << nr))) { + ctxt->regs_valid |= 1 << nr; + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); + } + return ctxt->_regs[nr]; +} + +static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) + nr &= NR_EMULATOR_GPRS - 1; + + BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS); + BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS); + + ctxt->regs_valid |= 1 << nr; + ctxt->regs_dirty |= 1 << nr; + return &ctxt->_regs[nr]; +} + +static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + reg_read(ctxt, nr); + return reg_write(ctxt, nr); +} + #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 26a6859e421f..073dad4f04b5 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -270,3 +270,319 @@ void enter_smm(struct kvm_vcpu *vcpu) kvm_update_cpuid_runtime(vcpu); kvm_mmu_reset_context(vcpu); } + +static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) +{ +#ifdef CONFIG_X86_64 + return ctxt->ops->guest_has_long_mode(ctxt); +#else + return false; +#endif +} + +static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) +{ + desc->g = (flags >> 23) & 1; + desc->d = (flags >> 22) & 1; + desc->l = (flags >> 21) & 1; + desc->avl = (flags >> 20) & 1; + desc->p = (flags >> 15) & 1; + desc->dpl = (flags >> 13) & 3; + desc->s = (flags >> 12) & 1; + desc->type = (flags >> 8) & 15; +} + +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) +{ + struct desc_struct desc; + int offset; + u16 selector; + + selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); + + if (n < 3) + offset = 0x7f84 + n * 12; + else + offset = 0x7f2c + (n - 3) * 12; + + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); + ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); + return X86EMUL_CONTINUE; +} + +#ifdef CONFIG_X86_64 +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) +{ + struct desc_struct desc; + int offset; + u16 selector; + u32 base3; + + offset = 0x7e00 + n * 16; + + selector = GET_SMSTATE(u16, smstate, offset); + rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + base3 = GET_SMSTATE(u32, smstate, offset + 12); + + ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); + return X86EMUL_CONTINUE; +} +#endif + +static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, + u64 cr0, u64 cr3, u64 cr4) +{ + int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; + + /* + * First enable PAE, long mode needs it before CR0.PG = 1 is set. + * Then enable protected mode. However, PCID cannot be enabled + * if EFER.LMA=0, so set it separately. + */ + bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + if (bad) + return X86EMUL_UNHANDLEABLE; + + bad = ctxt->ops->set_cr(ctxt, 0, cr0); + if (bad) + return X86EMUL_UNHANDLEABLE; + + if (cr4 & X86_CR4_PCIDE) { + bad = ctxt->ops->set_cr(ctxt, 4, cr4); + if (bad) + return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + + } + + return X86EMUL_CONTINUE; +} + +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + const char *smstate) +{ + struct desc_struct desc; + struct desc_ptr dt; + u16 selector; + u32 val, cr0, cr3, cr4; + int i; + + cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); + cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); + + for (i = 0; i < 8; i++) + *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); + + val = GET_SMSTATE(u32, smstate, 0x7fcc); + + if (ctxt->ops->set_dr(ctxt, 6, val)) + return X86EMUL_UNHANDLEABLE; + + val = GET_SMSTATE(u32, smstate, 0x7fc8); + + if (ctxt->ops->set_dr(ctxt, 7, val)) + return X86EMUL_UNHANDLEABLE; + + selector = GET_SMSTATE(u32, smstate, 0x7fc4); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); + + selector = GET_SMSTATE(u32, smstate, 0x7fc0); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); + + dt.address = GET_SMSTATE(u32, smstate, 0x7f74); + dt.size = GET_SMSTATE(u32, smstate, 0x7f70); + ctxt->ops->set_gdt(ctxt, &dt); + + dt.address = GET_SMSTATE(u32, smstate, 0x7f58); + dt.size = GET_SMSTATE(u32, smstate, 0x7f54); + ctxt->ops->set_idt(ctxt, &dt); + + for (i = 0; i < 6; i++) { + int r = rsm_load_seg_32(ctxt, smstate, i); + if (r != X86EMUL_CONTINUE) + return r; + } + + cr4 = GET_SMSTATE(u32, smstate, 0x7f14); + + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); + + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); +} + +#ifdef CONFIG_X86_64 +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, + const char *smstate) +{ + struct desc_struct desc; + struct desc_ptr dt; + u64 val, cr0, cr3, cr4; + u32 base3; + u16 selector; + int i, r; + + for (i = 0; i < 16; i++) + *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); + + ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; + + val = GET_SMSTATE(u64, smstate, 0x7f68); + + if (ctxt->ops->set_dr(ctxt, 6, val)) + return X86EMUL_UNHANDLEABLE; + + val = GET_SMSTATE(u64, smstate, 0x7f60); + + if (ctxt->ops->set_dr(ctxt, 7, val)) + return X86EMUL_UNHANDLEABLE; + + cr0 = GET_SMSTATE(u64, smstate, 0x7f58); + cr3 = GET_SMSTATE(u64, smstate, 0x7f50); + cr4 = GET_SMSTATE(u64, smstate, 0x7f48); + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); + val = GET_SMSTATE(u64, smstate, 0x7ed0); + + if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) + return X86EMUL_UNHANDLEABLE; + + selector = GET_SMSTATE(u32, smstate, 0x7e90); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); + base3 = GET_SMSTATE(u32, smstate, 0x7e9c); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); + + dt.size = GET_SMSTATE(u32, smstate, 0x7e84); + dt.address = GET_SMSTATE(u64, smstate, 0x7e88); + ctxt->ops->set_idt(ctxt, &dt); + + selector = GET_SMSTATE(u32, smstate, 0x7e70); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); + base3 = GET_SMSTATE(u32, smstate, 0x7e7c); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); + + dt.size = GET_SMSTATE(u32, smstate, 0x7e64); + dt.address = GET_SMSTATE(u64, smstate, 0x7e68); + ctxt->ops->set_gdt(ctxt, &dt); + + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); + if (r != X86EMUL_CONTINUE) + return r; + + for (i = 0; i < 6; i++) { + r = rsm_load_seg_64(ctxt, smstate, i); + if (r != X86EMUL_CONTINUE) + return r; + } + + return X86EMUL_CONTINUE; +} +#endif + +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + unsigned long cr0, cr4, efer; + char buf[512]; + u64 smbase; + int ret; + + smbase = ctxt->ops->get_smbase(ctxt); + + ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); + if (ret != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) + ctxt->ops->set_nmi_mask(ctxt, false); + + kvm_smm_changed(vcpu, false); + + /* + * Get back to real mode, to prepare a safe state in which to load + * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU + * supports long mode. + * + * The ctxt->ops callbacks will handle all side effects when writing + * writing MSRs and CRs, e.g. MMU context resets, CPUID + * runtime updates, etc. + */ + if (emulator_has_longmode(ctxt)) { + struct desc_struct cs_desc; + + /* Zero CR4.PCIDE before CR0.PG. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PCIDE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + + /* A 32-bit code segment is required to clear EFER.LMA. */ + memset(&cs_desc, 0, sizeof(cs_desc)); + cs_desc.type = 0xb; + cs_desc.s = cs_desc.g = cs_desc.p = 1; + ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); + } + + /* For the 64-bit case, this will clear EFER.LMA. */ + cr0 = ctxt->ops->get_cr(ctxt, 0); + if (cr0 & X86_CR0_PE) + ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); + + if (emulator_has_longmode(ctxt)) { + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PAE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + + /* And finally go back to 32-bit mode. */ + efer = 0; + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + } + + /* + * Give leave_smm() a chance to make ISA-specific changes to the vCPU + * state (e.g. enter guest mode) before loading state from the SMM + * state-save area. + */ + if (static_call(kvm_x86_leave_smm)(vcpu, buf)) + return X86EMUL_UNHANDLEABLE; + +#ifdef CONFIG_X86_64 + if (emulator_has_longmode(ctxt)) + return rsm_load_state_64(ctxt, buf); + else +#endif + return rsm_load_state_32(ctxt, buf); +} diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index aacc6dac2c99..b0602a92e511 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -21,6 +21,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu) void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); void enter_smm(struct kvm_vcpu *vcpu); +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt); void process_smi(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0730b16564f9..b953f0184208 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8150,19 +8150,6 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) return emul_to_vcpu(ctxt)->arch.hflags; } -static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - kvm_smm_changed(vcpu, false); -} - -static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate); -} - static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt) { kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt)); @@ -8226,7 +8213,6 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, - .exiting_smm = emulator_exiting_smm, .leave_smm = emulator_leave_smm, .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, From 1d0da94cdafe38b2c501a8d55f981204e588e259 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:12 -0400 Subject: [PATCH 1182/4122] KVM: x86: do not go through ctxt->ops when emulating rsm Now that RSM is implemented in a single emulator callback, there is no point in going through other callbacks for the sake of modifying processor state. Just invoke KVM's own internal functions directly, and remove the callbacks that were only used by em_rsm; the only substantial difference is in the handling of the segment registers and descriptor cache, which have to be parsed into a struct kvm_segment instead of a struct desc_struct. This also fixes a bug where emulator_set_segment was shifting the limit left by 12 if the G bit is set, but the limit had not been shifted right upon entry to SMM. The emulator context is still used to restore EIP and the general purpose registers. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-5-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_emulate.h | 13 --- arch/x86/kvm/smm.c | 182 +++++++++++++++++-------------------- arch/x86/kvm/x86.c | 33 ------- 3 files changed, 85 insertions(+), 143 deletions(-) diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index d7afbc448dd2..84b1f2661463 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -116,16 +116,6 @@ struct x86_emulate_ops { unsigned int bytes, struct x86_exception *fault, bool system); - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Physical address from which to read. - * @val: [OUT] Value read from memory. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, - void *val, unsigned int bytes); - /* * write_std: Write bytes of standard (non-emulated/special) memory. * Used for descriptor writing. @@ -209,11 +199,8 @@ struct x86_emulate_ops { int (*cpl)(struct x86_emulate_ctxt *ctxt); void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 073dad4f04b5..102ecb852564 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) -{ -#ifdef CONFIG_X86_64 - return ctxt->ops->guest_has_long_mode(ctxt); -#else - return false; -#endif -} - -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) +static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) { desc->g = (flags >> 23) & 1; - desc->d = (flags >> 22) & 1; + desc->db = (flags >> 22) & 1; desc->l = (flags >> 21) & 1; desc->avl = (flags >> 20) & 1; - desc->p = (flags >> 15) & 1; + desc->present = (flags >> 15) & 1; desc->dpl = (flags >> 13) & 3; desc->s = (flags >> 12) & 1; desc->type = (flags >> 8) & 15; + + desc->unusable = !desc->present; + desc->padding = 0; } -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, +static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate, int n) { - struct desc_struct desc; + struct kvm_segment desc; int offset; - u16 selector; - - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); if (n < 3) offset = 0x7f84 + n * 12; else offset = 0x7f2c + (n - 3) * 12; - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); + desc.base = GET_SMSTATE(u32, smstate, offset + 8); + desc.limit = GET_SMSTATE(u32, smstate, offset + 4); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); + kvm_set_segment(vcpu, &desc, n); return X86EMUL_CONTINUE; } #ifdef CONFIG_X86_64 -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, +static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate, int n) { - struct desc_struct desc; + struct kvm_segment desc; int offset; - u16 selector; - u32 base3; offset = 0x7e00 + n * 16; - selector = GET_SMSTATE(u16, smstate, offset); + desc.selector = GET_SMSTATE(u16, smstate, offset); rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - base3 = GET_SMSTATE(u32, smstate, offset + 12); - - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); + desc.limit = GET_SMSTATE(u32, smstate, offset + 4); + desc.base = GET_SMSTATE(u64, smstate, offset + 8); + kvm_set_segment(vcpu, &desc, n); return X86EMUL_CONTINUE; } #endif -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, +static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, u64 cr0, u64 cr3, u64 cr4) { int bad; @@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, cr3 &= ~0xfff; } - bad = ctxt->ops->set_cr(ctxt, 3, cr3); + bad = kvm_set_cr3(vcpu, cr3); if (bad) return X86EMUL_UNHANDLEABLE; @@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, * Then enable protected mode. However, PCID cannot be enabled * if EFER.LMA=0, so set it separately. */ - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); if (bad) return X86EMUL_UNHANDLEABLE; - bad = ctxt->ops->set_cr(ctxt, 0, cr0); + bad = kvm_set_cr0(vcpu, cr0); if (bad) return X86EMUL_UNHANDLEABLE; if (cr4 & X86_CR4_PCIDE) { - bad = ctxt->ops->set_cr(ctxt, 4, cr4); + bad = kvm_set_cr4(vcpu, cr4); if (bad) return X86EMUL_UNHANDLEABLE; if (pcid) { - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + bad = kvm_set_cr3(vcpu, cr3 | pcid); if (bad) return X86EMUL_UNHANDLEABLE; } @@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, const char *smstate) { - struct desc_struct desc; + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_segment desc; struct desc_ptr dt; - u16 selector; u32 val, cr0, cr3, cr4; int i; @@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, val = GET_SMSTATE(u32, smstate, 0x7fcc); - if (ctxt->ops->set_dr(ctxt, 6, val)) + if (kvm_set_dr(vcpu, 6, val)) return X86EMUL_UNHANDLEABLE; val = GET_SMSTATE(u32, smstate, 0x7fc8); - if (ctxt->ops->set_dr(ctxt, 7, val)) + if (kvm_set_dr(vcpu, 7, val)) return X86EMUL_UNHANDLEABLE; - selector = GET_SMSTATE(u32, smstate, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); + desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4); + desc.base = GET_SMSTATE(u32, smstate, 0x7f64); + desc.limit = GET_SMSTATE(u32, smstate, 0x7f60); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); + kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); - selector = GET_SMSTATE(u32, smstate, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); + desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0); + desc.base = GET_SMSTATE(u32, smstate, 0x7f80); + desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); + kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); dt.address = GET_SMSTATE(u32, smstate, 0x7f74); dt.size = GET_SMSTATE(u32, smstate, 0x7f70); - ctxt->ops->set_gdt(ctxt, &dt); + static_call(kvm_x86_set_gdt)(vcpu, &dt); dt.address = GET_SMSTATE(u32, smstate, 0x7f58); dt.size = GET_SMSTATE(u32, smstate, 0x7f54); - ctxt->ops->set_idt(ctxt, &dt); + static_call(kvm_x86_set_idt)(vcpu, &dt); for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smstate, i); + int r = rsm_load_seg_32(vcpu, smstate, i); if (r != X86EMUL_CONTINUE) return r; } cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); + vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8); - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); + return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); } #ifdef CONFIG_X86_64 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, const char *smstate) { - struct desc_struct desc; + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_segment desc; struct desc_ptr dt; u64 val, cr0, cr3, cr4; - u32 base3; - u16 selector; int i, r; for (i = 0; i < 16; i++) @@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, val = GET_SMSTATE(u64, smstate, 0x7f68); - if (ctxt->ops->set_dr(ctxt, 6, val)) + if (kvm_set_dr(vcpu, 6, val)) return X86EMUL_UNHANDLEABLE; val = GET_SMSTATE(u64, smstate, 0x7f60); - if (ctxt->ops->set_dr(ctxt, 7, val)) + if (kvm_set_dr(vcpu, 7, val)) return X86EMUL_UNHANDLEABLE; cr0 = GET_SMSTATE(u64, smstate, 0x7f58); cr3 = GET_SMSTATE(u64, smstate, 0x7f50); cr4 = GET_SMSTATE(u64, smstate, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); + vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00); val = GET_SMSTATE(u64, smstate, 0x7ed0); - if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) + if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA)) return X86EMUL_UNHANDLEABLE; - selector = GET_SMSTATE(u32, smstate, 0x7e90); + desc.selector = GET_SMSTATE(u32, smstate, 0x7e90); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); - base3 = GET_SMSTATE(u32, smstate, 0x7e9c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); + desc.limit = GET_SMSTATE(u32, smstate, 0x7e94); + desc.base = GET_SMSTATE(u64, smstate, 0x7e98); + kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); dt.size = GET_SMSTATE(u32, smstate, 0x7e84); dt.address = GET_SMSTATE(u64, smstate, 0x7e88); - ctxt->ops->set_idt(ctxt, &dt); + static_call(kvm_x86_set_idt)(vcpu, &dt); - selector = GET_SMSTATE(u32, smstate, 0x7e70); + desc.selector = GET_SMSTATE(u32, smstate, 0x7e70); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); - base3 = GET_SMSTATE(u32, smstate, 0x7e7c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); + desc.limit = GET_SMSTATE(u32, smstate, 0x7e74); + desc.base = GET_SMSTATE(u64, smstate, 0x7e78); + kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); dt.size = GET_SMSTATE(u32, smstate, 0x7e64); dt.address = GET_SMSTATE(u64, smstate, 0x7e68); - ctxt->ops->set_gdt(ctxt, &dt); + static_call(kvm_x86_set_gdt)(vcpu, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); + r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smstate, i); + r = rsm_load_seg_64(vcpu, smstate, i); if (r != X86EMUL_CONTINUE) return r; } @@ -515,19 +500,19 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; - unsigned long cr0, cr4, efer; + unsigned long cr0; char buf[512]; u64 smbase; int ret; - smbase = ctxt->ops->get_smbase(ctxt); + smbase = vcpu->arch.smbase; - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); - if (ret != X86EMUL_CONTINUE) + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf)); + if (ret < 0) return X86EMUL_UNHANDLEABLE; - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); + if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0) + static_call(kvm_x86_set_nmi_mask)(vcpu, false); kvm_smm_changed(vcpu, false); @@ -535,41 +520,44 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) * Get back to real mode, to prepare a safe state in which to load * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU * supports long mode. - * - * The ctxt->ops callbacks will handle all side effects when writing - * writing MSRs and CRs, e.g. MMU context resets, CPUID - * runtime updates, etc. */ - if (emulator_has_longmode(ctxt)) { - struct desc_struct cs_desc; +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + struct kvm_segment cs_desc; + unsigned long cr4; /* Zero CR4.PCIDE before CR0.PG. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); + cr4 = kvm_read_cr4(vcpu); if (cr4 & X86_CR4_PCIDE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); /* A 32-bit code segment is required to clear EFER.LMA. */ memset(&cs_desc, 0, sizeof(cs_desc)); cs_desc.type = 0xb; - cs_desc.s = cs_desc.g = cs_desc.p = 1; - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); + cs_desc.s = cs_desc.g = cs_desc.present = 1; + kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS); } +#endif /* For the 64-bit case, this will clear EFER.LMA. */ - cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 = kvm_read_cr0(vcpu); if (cr0 & X86_CR0_PE) - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); + kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + unsigned long cr4, efer; - if (emulator_has_longmode(ctxt)) { /* Clear CR4.PAE before clearing EFER.LME. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); + cr4 = kvm_read_cr4(vcpu); if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE); /* And finally go back to 32-bit mode. */ efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + kvm_set_msr(vcpu, MSR_EFER, efer); } +#endif /* * Give leave_smm() a chance to make ISA-specific changes to the vCPU @@ -580,7 +568,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; #ifdef CONFIG_X86_64 - if (emulator_has_longmode(ctxt)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) return rsm_load_state_64(ctxt, buf); else #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b953f0184208..019ba8725412 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7256,15 +7256,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); - - return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; -} - static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) @@ -8056,26 +8047,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); } -static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 data) -{ - return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); -} - -static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - return vcpu->arch.smbase; -} - -static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - vcpu->arch.smbase = smbase; -} - static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc) { @@ -8174,7 +8145,6 @@ static const struct x86_emulate_ops emulate_ops = { .write_gpr = emulator_write_gpr, .read_std = emulator_read_std, .write_std = emulator_write_std, - .read_phys = kvm_read_guest_phys_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, @@ -8194,11 +8164,8 @@ static const struct x86_emulate_ops emulate_ops = { .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .get_smbase = emulator_get_smbase, - .set_smbase = emulator_set_smbase, .set_msr_with_filter = emulator_set_msr_with_filter, .get_msr_with_filter = emulator_get_msr_with_filter, - .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .check_pmc = emulator_check_pmc, .read_pmc = emulator_read_pmc, From 4b8e1b32013da2495244dbdee70f2456e6bc7aca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:13 -0400 Subject: [PATCH 1183/4122] KVM: allow compiling out SMM support Some users of KVM implement the UEFI variable store through a paravirtual device that does not require the "SMM lockbox" component of edk2; allow them to compile out system management mode, which is not a full implementation especially in how it interacts with nested virtualization. Suggested-by: Sean Christopherson Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-6-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 11 ++++++++++ arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/smm.h | 12 ++++++++++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 22 +++++++++++++++++-- tools/testing/selftests/kvm/x86_64/smm_test.c | 2 ++ 7 files changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 67be7f217e37..fbeaa9ddef59 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -118,6 +118,17 @@ config KVM_AMD_SEV Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. +config KVM_SMM + bool "System Management Mode emulation" + default y + depends on KVM + help + Provides support for KVM to emulate System Management Mode (SMM) + in virtual machines. This can be used by the virtual machine + firmware to implement UEFI secure boot. + + If unsure, say Y. + config KVM_XEN bool "Support for Xen hypercall interface" depends on KVM diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b584cb0e06bd..b8a494b6a5ec 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -20,7 +20,7 @@ endif kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o kvm-$(CONFIG_KVM_XEN) += xen.o -kvm-y += smm.o +kvm-$(CONFIG_KVM_SMM) += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index b0602a92e511..0e1bd8bd6dc4 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -8,6 +8,7 @@ #define PUT_SMSTATE(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +#ifdef CONFIG_KVM_SMM static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { kvm_make_request(KVM_REQ_SMI, vcpu); @@ -23,5 +24,16 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); void enter_smm(struct kvm_vcpu *vcpu); int emulator_leave_smm(struct x86_emulate_ctxt *ctxt); void process_smi(struct kvm_vcpu *vcpu); +#else +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } +static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } +static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } +static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } + +/* + * emulator_leave_smm is used as a function pointer, so the + * stub is defined in x86.c. + */ +#endif #endif diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3bb07ec78985..4cc014b46406 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4115,6 +4115,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; case MSR_IA32_SMBASE: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + return false; /* SEV-ES guests do not support SMM, so report false */ if (kvm && sev_es_guest(kvm)) return false; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 49065614a3db..6a0b65815206 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6842,6 +6842,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + return false; /* * We cannot do SMM unless we can run the guest in big * real mode. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 019ba8725412..0a80cd1d91c8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3642,7 +3642,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; } case MSR_IA32_SMBASE: - if (!msr_info->host_initiated) + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) return 1; vcpu->arch.smbase = data; break; @@ -4058,7 +4058,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_misc_enable_msr; break; case MSR_IA32_SMBASE: - if (!msr_info->host_initiated) + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) return 1; msr_info->data = vcpu->arch.smbase; break; @@ -4432,6 +4432,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; case KVM_CAP_X86_SMM: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + break; + /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, * so do not report SMM to be available if real mode is @@ -5182,6 +5185,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { +#ifdef CONFIG_KVM_SMM if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); @@ -5196,6 +5200,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; } +#else + if (events->smi.smm || events->smi.pending || + events->smi.smm_inside_nmi) + return -EINVAL; +#endif + if (lapic_in_kernel(vcpu)) { if (events->smi.latched_init) set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); @@ -8121,6 +8131,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) return emul_to_vcpu(ctxt)->arch.hflags; } +#ifndef CONFIG_KVM_SMM +static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) +{ + WARN_ON_ONCE(1); + return X86EMUL_UNHANDLEABLE; +} +#endif + static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt) { kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt)); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 1f136a81858e..cb38a478e1f6 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; int stage, stage_reported; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); From 31e83e21cf00fe5b669eb352ff3ed70e74b40fad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:14 -0400 Subject: [PATCH 1184/4122] KVM: x86: compile out vendor-specific code if SMM is disabled Vendor-specific code that deals with SMI injection and saving/restoring SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window. The users in svm/nested.c and x86.c also have to be compiled out; the amount of #ifdef'ed code is small and it's not worth moving it to smm.c. enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub can therefore be removed. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-7-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 2 ++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/smm.h | 1 - arch/x86/kvm/svm/nested.c | 2 ++ arch/x86/kvm/svm/svm.c | 4 ++++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ 7 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 82ba4a564e58..ea58e67e9a67 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) KVM_X86_OP_OPTIONAL(set_hv_timer) KVM_X86_OP_OPTIONAL(cancel_hv_timer) KVM_X86_OP(setup_mce) +#ifdef CONFIG_KVM_SMM KVM_X86_OP(smi_allowed) KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) +#endif KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 612ef60631c1..3e5e54d7baa6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1612,10 +1612,12 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM_SMM int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); void (*enable_smi_window)(struct kvm_vcpu *vcpu); +#endif int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index 0e1bd8bd6dc4..8debe81494c6 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -27,7 +27,6 @@ void process_smi(struct kvm_vcpu *vcpu); #else static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } -static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } /* diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cc0fd75f7cba..b258d6988f5d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1378,6 +1378,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) return 0; } +#ifdef CONFIG_KVM_SMM if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { if (block_nested_events) return -EBUSY; @@ -1386,6 +1387,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) nested_svm_simple_vmexit(svm, SVM_EXIT_SMI); return 0; } +#endif if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { if (block_nested_events) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4cc014b46406..d28de3e59f7f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4373,6 +4373,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } +#ifdef CONFIG_KVM_SMM bool svm_smi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4522,6 +4523,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) /* We must be in SMM; RSM will cause a vmexit anyway. */ } } +#endif static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) @@ -4797,10 +4799,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .pi_update_irte = avic_pi_update_irte, .setup_mce = svm_setup_mce, +#ifdef CONFIG_KVM_SMM .smi_allowed = svm_smi_allowed, .enter_smm = svm_enter_smm, .leave_smm = svm_leave_smm, .enable_smi_window = svm_enable_smi_window, +#endif .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6a0b65815206..6be991b29bb7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7932,6 +7932,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEAT_CTL_LMCE_ENABLED; } +#ifdef CONFIG_KVM_SMM static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ @@ -7986,6 +7987,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu) { /* RSM will cause a vmexit anyway. */ } +#endif static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { @@ -8153,10 +8155,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .setup_mce = vmx_setup_mce, +#ifdef CONFIG_KVM_SMM .smi_allowed = vmx_smi_allowed, .enter_smm = vmx_enter_smm, .leave_smm = vmx_leave_smm, .enable_smi_window = vmx_enable_smi_window, +#endif .can_emulate_instruction = vmx_can_emulate_instruction, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a80cd1d91c8..9ac51c848fc8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9919,6 +9919,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, * in order to make progress and get back here for another iteration. * The kvm_x86_ops hooks communicate this by returning -EBUSY. */ +#ifdef CONFIG_KVM_SMM if (vcpu->arch.smi_pending) { r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY; if (r < 0) @@ -9931,6 +9932,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, } else static_call(kvm_x86_enable_smi_window)(vcpu); } +#endif if (vcpu->arch.nmi_pending) { r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY; @@ -12580,10 +12582,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) static_call(kvm_x86_nmi_allowed)(vcpu, false))) return true; +#ifdef CONFIG_KVM_SMM if (kvm_test_request(KVM_REQ_SMI, vcpu) || (vcpu->arch.smi_pending && static_call(kvm_x86_smi_allowed)(vcpu, false))) return true; +#endif if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || From ba97bb07e0b28c962015aaf219005928774b886c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:15 -0400 Subject: [PATCH 1185/4122] KVM: x86: remove SMRAM address space if SMM is not supported If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and we can spare userspace the hassle of setting up the SMRAM address space simply by reporting that only one address space is supported. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-8-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3e5e54d7baa6..b9f6f854dcef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1998,11 +1998,14 @@ enum { #define HF_SMM_MASK (1 << 6) #define HF_SMM_INSIDE_NMI_MASK (1 << 7) -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE -#define KVM_ADDRESS_SPACE_NUM 2 - -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +#ifdef CONFIG_KVM_SMM +# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +# define KVM_ADDRESS_SPACE_NUM 2 +# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +#else +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) +#endif #define KVM_ARCH_WANT_MMU_NOTIFIER From cf7316d0361c5d3289611402b5ac0c64c918b20b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 29 Sep 2022 13:20:16 -0400 Subject: [PATCH 1186/4122] KVM: x86: do not define KVM_REQ_SMI if SMM disabled This ensures that all the relevant code is compiled out, in fact the process_smi stub can be removed too. Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky Message-Id: <20220929172016.319443-9-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/smm.h | 1 - arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b9f6f854dcef..24a2152a77cd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -81,7 +81,9 @@ #define KVM_REQ_NMI KVM_ARCH_REQ(9) #define KVM_REQ_PMU KVM_ARCH_REQ(10) #define KVM_REQ_PMI KVM_ARCH_REQ(11) +#ifdef CONFIG_KVM_SMM #define KVM_REQ_SMI KVM_ARCH_REQ(12) +#endif #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) #define KVM_REQ_MCLOCK_INPROGRESS \ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index 8debe81494c6..53c81394ebdb 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -27,7 +27,6 @@ void process_smi(struct kvm_vcpu *vcpu); #else static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } -static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } /* * emulator_leave_smm is used as a function pointer, so the diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ac51c848fc8..73c32030d514 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5026,8 +5026,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, process_nmi(vcpu); +#ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); +#endif /* * KVM's ABI only allows for one exception to be migrated. Luckily, @@ -10266,8 +10268,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); +#ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); +#endif if (kvm_check_request(KVM_REQ_NMI, vcpu)) process_nmi(vcpu); if (kvm_check_request(KVM_REQ_PMU, vcpu)) @@ -12628,7 +12632,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) return true; if (kvm_test_request(KVM_REQ_NMI, vcpu) || +#ifdef CONFIG_KVM_SMM kvm_test_request(KVM_REQ_SMI, vcpu) || +#endif kvm_test_request(KVM_REQ_EVENT, vcpu)) return true; From 85672346a707d8e4d5657279dac6b356e1edf24a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Oct 2022 12:44:28 -0400 Subject: [PATCH 1187/4122] KVM: zero output of KVM_GET_VCPU_EVENTS before filling in the struct This allows making some fields optional, as will be the case soon for SMM-related data. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 73c32030d514..84aa51613e4f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5057,16 +5057,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, ex->pending && ex->has_payload) kvm_deliver_exception_payload(vcpu, ex); + memset(events, 0, sizeof(*events)); + /* * The API doesn't provide the instruction length for software * exceptions, so don't report them. As long as the guest RIP * isn't advanced, we should expect to encounter the exception * again. */ - if (kvm_exception_is_soft(ex->vector)) { - events->exception.injected = 0; - events->exception.pending = 0; - } else { + if (!kvm_exception_is_soft(ex->vector)) { events->exception.injected = ex->injected; events->exception.pending = ex->pending; /* @@ -5086,15 +5085,13 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; - events->interrupt.soft = 0; events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending != 0; events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu); - events->nmi.pad = 0; - events->sipi_vector = 0; /* never valid when reporting to user space */ + /* events->sipi_vector is never valid when reporting to user space */ events->smi.smm = is_smm(vcpu); events->smi.pending = vcpu->arch.smi_pending; @@ -5111,8 +5108,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu); events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; } - - memset(&events->reserved, 0, sizeof(events->reserved)); } static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, From a7662aa5e56ffa9adf65699eda541f94e157cc83 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Nov 2022 10:11:42 -0500 Subject: [PATCH 1188/4122] KVM: x86: do not define SMM-related constants if SMM disabled The hidden processor flags HF_SMM_MASK and HF_SMM_INSIDE_NMI_MASK are not needed if CONFIG_KVM_SMM is turned off. Remove the definitions altogether and the code that uses them. Suggested-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/kvm_emulate.h | 1 - arch/x86/kvm/smm.c | 2 ++ arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24a2152a77cd..e9862eaac43b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1997,10 +1997,11 @@ enum { #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ + +#ifdef CONFIG_KVM_SMM #define HF_SMM_MASK (1 << 6) #define HF_SMM_INSIDE_NMI_MASK (1 << 7) -#ifdef CONFIG_KVM_SMM # define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE # define KVM_ADDRESS_SPACE_NUM 2 # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 84b1f2661463..2d9662be8333 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -278,7 +278,6 @@ enum x86emul_mode { /* These match some of the HF_* flags defined in kvm_host.h */ #define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ #define X86EMUL_SMM_MASK (1 << 6) -#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) /* * fastop functions are declared as taking a never-defined fastop parameter, diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 102ecb852564..d9cf104ad94f 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -10,6 +10,8 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) { + BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); + trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); if (entering_smm) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 84aa51613e4f..cbec2e675c18 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5093,10 +5093,12 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, /* events->sipi_vector is never valid when reporting to user space */ +#ifdef CONFIG_KVM_SMM events->smi.smm = is_smm(vcpu); events->smi.pending = vcpu->arch.smi_pending; events->smi.smm_inside_nmi = !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK); +#endif events->smi.latched_init = kvm_lapic_latched_init(vcpu); events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING @@ -8267,8 +8269,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); - BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); - BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); ctxt->interruptibility = 0; ctxt->have_exception = false; From 89dccf82e99e95ee465e2b00428494fe64679256 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:33 +0300 Subject: [PATCH 1189/4122] KVM: x86: smm: check for failures on smm entry In the rare case of the failure on SMM entry, the KVM should at least terminate the VM instead of going south. Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-16-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index d9cf104ad94f..2d5bb2af70e4 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -213,11 +213,17 @@ void enter_smm(struct kvm_vcpu *vcpu) * Give enter_smm() a chance to make ISA-specific changes to the vCPU * state (e.g. leave guest mode) after we've saved the state into the * SMM state-save area. + * + * Kill the VM in the unlikely case of failure, because the VM + * can be in undefined state in this case. */ - static_call(kvm_x86_enter_smm)(vcpu, buf); + if (static_call(kvm_x86_enter_smm)(vcpu, buf)) + goto error; kvm_smm_changed(vcpu, true); - kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); + + if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf))) + goto error; if (static_call(kvm_x86_get_nmi_mask)(vcpu)) vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; @@ -237,7 +243,8 @@ void enter_smm(struct kvm_vcpu *vcpu) dt.address = dt.size = 0; static_call(kvm_x86_set_idt)(vcpu, &dt); - kvm_set_dr(vcpu, 7, DR7_FIXED_1); + if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1))) + goto error; cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; cs.base = vcpu->arch.smbase; @@ -266,11 +273,15 @@ void enter_smm(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - static_call(kvm_x86_set_efer)(vcpu, 0); + if (static_call(kvm_x86_set_efer)(vcpu, 0)) + goto error; #endif kvm_update_cpuid_runtime(vcpu); kvm_mmu_reset_context(vcpu); + return; +error: + kvm_vm_dead(vcpu->kvm); } static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) From 09779c16e3eda95312ca14cd263dbb05da147b75 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:34 +0300 Subject: [PATCH 1190/4122] KVM: x86: smm: add structs for KVM's smram layout Add structs that will be used to define and read/write the KVM's SMRAM layout, instead of reading/writing to raw offsets. Also document the differences between KVM's SMRAM layout and SMRAM layout that is used by real Intel/AMD cpus. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-17-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.c | 98 +++++++++++++++++++++++++++++++++ arch/x86/kvm/smm.h | 133 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 2d5bb2af70e4..2e6ec79a581e 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -8,6 +8,102 @@ #include "cpuid.h" #include "trace.h" +#define CHECK_SMRAM32_OFFSET(field, offset) \ + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00) + +#define CHECK_SMRAM64_OFFSET(field, offset) \ + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00) + +static void check_smram_offsets(void) +{ + /* 32 bit SMRAM image */ + CHECK_SMRAM32_OFFSET(reserved1, 0xFE00); + CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); + CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); + CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00); + CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02); + CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04); + CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08); + CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C); + CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10); + CHECK_SMRAM32_OFFSET(cr4, 0xFF14); + CHECK_SMRAM32_OFFSET(reserved3, 0xFF18); + CHECK_SMRAM32_OFFSET(ds, 0xFF2C); + CHECK_SMRAM32_OFFSET(fs, 0xFF38); + CHECK_SMRAM32_OFFSET(gs, 0xFF44); + CHECK_SMRAM32_OFFSET(idtr, 0xFF50); + CHECK_SMRAM32_OFFSET(tr, 0xFF5C); + CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C); + CHECK_SMRAM32_OFFSET(ldtr, 0xFF78); + CHECK_SMRAM32_OFFSET(es, 0xFF84); + CHECK_SMRAM32_OFFSET(cs, 0xFF90); + CHECK_SMRAM32_OFFSET(ss, 0xFF9C); + CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8); + CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC); + CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0); + CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4); + CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8); + CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC); + CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0); + CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4); + CHECK_SMRAM32_OFFSET(dr7, 0xFFC8); + CHECK_SMRAM32_OFFSET(dr6, 0xFFCC); + CHECK_SMRAM32_OFFSET(gprs, 0xFFD0); + CHECK_SMRAM32_OFFSET(eip, 0xFFF0); + CHECK_SMRAM32_OFFSET(eflags, 0xFFF4); + CHECK_SMRAM32_OFFSET(cr3, 0xFFF8); + CHECK_SMRAM32_OFFSET(cr0, 0xFFFC); + + /* 64 bit SMRAM image */ + CHECK_SMRAM64_OFFSET(es, 0xFE00); + CHECK_SMRAM64_OFFSET(cs, 0xFE10); + CHECK_SMRAM64_OFFSET(ss, 0xFE20); + CHECK_SMRAM64_OFFSET(ds, 0xFE30); + CHECK_SMRAM64_OFFSET(fs, 0xFE40); + CHECK_SMRAM64_OFFSET(gs, 0xFE50); + CHECK_SMRAM64_OFFSET(gdtr, 0xFE60); + CHECK_SMRAM64_OFFSET(ldtr, 0xFE70); + CHECK_SMRAM64_OFFSET(idtr, 0xFE80); + CHECK_SMRAM64_OFFSET(tr, 0xFE90); + CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0); + CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8); + CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); + CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); + CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); + CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); + CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); + CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); + CHECK_SMRAM64_OFFSET(reserved2, 0xFECA); + CHECK_SMRAM64_OFFSET(efer, 0xFED0); + CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); + CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); + CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8); + CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0); + CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC); + CHECK_SMRAM64_OFFSET(smbase, 0xFF00); + CHECK_SMRAM64_OFFSET(reserved4, 0xFF04); + CHECK_SMRAM64_OFFSET(ssp, 0xFF18); + CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20); + CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28); + CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30); + CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38); + CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40); + CHECK_SMRAM64_OFFSET(cr4, 0xFF48); + CHECK_SMRAM64_OFFSET(cr3, 0xFF50); + CHECK_SMRAM64_OFFSET(cr0, 0xFF58); + CHECK_SMRAM64_OFFSET(dr7, 0xFF60); + CHECK_SMRAM64_OFFSET(dr6, 0xFF68); + CHECK_SMRAM64_OFFSET(rflags, 0xFF70); + CHECK_SMRAM64_OFFSET(rip, 0xFF78); + CHECK_SMRAM64_OFFSET(gprs, 0xFF80); + + BUILD_BUG_ON(sizeof(union kvm_smram) != 512); +} + +#undef CHECK_SMRAM64_OFFSET +#undef CHECK_SMRAM32_OFFSET + + void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) { BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); @@ -201,6 +297,8 @@ void enter_smm(struct kvm_vcpu *vcpu) unsigned long cr0; char buf[512]; + check_smram_offsets(); + memset(buf, 0, 512); #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index 53c81394ebdb..b66da263ec82 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -2,6 +2,8 @@ #ifndef ASM_KVM_SMM_H #define ASM_KVM_SMM_H +#include + #define GET_SMSTATE(type, buf, offset) \ (*(type *)((buf) + (offset) - 0x7e00)) @@ -9,6 +11,137 @@ *(type *)((buf) + (offset) - 0x7e00) = val #ifdef CONFIG_KVM_SMM + + +/* + * 32 bit KVM's emulated SMM layout. Based on Intel P6 layout + * (https://www.sandpile.org/x86/smm.htm). + */ + +struct kvm_smm_seg_state_32 { + u32 flags; + u32 limit; + u32 base; +} __packed; + +struct kvm_smram_state_32 { + u32 reserved1[62]; + u32 smbase; + u32 smm_revision; + u16 io_inst_restart; + u16 auto_hlt_restart; + u32 io_restart_rdi; + u32 io_restart_rcx; + u32 io_restart_rsi; + u32 io_restart_rip; + u32 cr4; + + /* A20M#, CPL, shutdown and other reserved/undocumented fields */ + u32 reserved3[5]; + + struct kvm_smm_seg_state_32 ds; + struct kvm_smm_seg_state_32 fs; + struct kvm_smm_seg_state_32 gs; + struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */ + struct kvm_smm_seg_state_32 tr; + u32 reserved; + struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */ + struct kvm_smm_seg_state_32 ldtr; + struct kvm_smm_seg_state_32 es; + struct kvm_smm_seg_state_32 cs; + struct kvm_smm_seg_state_32 ss; + + u32 es_sel; + u32 cs_sel; + u32 ss_sel; + u32 ds_sel; + u32 fs_sel; + u32 gs_sel; + u32 ldtr_sel; + u32 tr_sel; + + u32 dr7; + u32 dr6; + u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */ + u32 eip; + u32 eflags; + u32 cr3; + u32 cr0; +} __packed; + + +/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */ + +struct kvm_smm_seg_state_64 { + u16 selector; + u16 attributes; + u32 limit; + u64 base; +}; + +struct kvm_smram_state_64 { + + struct kvm_smm_seg_state_64 es; + struct kvm_smm_seg_state_64 cs; + struct kvm_smm_seg_state_64 ss; + struct kvm_smm_seg_state_64 ds; + struct kvm_smm_seg_state_64 fs; + struct kvm_smm_seg_state_64 gs; + struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/ + struct kvm_smm_seg_state_64 ldtr; + struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/ + struct kvm_smm_seg_state_64 tr; + + /* I/O restart and auto halt restart are not implemented by KVM */ + u64 io_restart_rip; + u64 io_restart_rcx; + u64 io_restart_rsi; + u64 io_restart_rdi; + u32 io_restart_dword; + u32 reserved1; + u8 io_inst_restart; + u8 auto_hlt_restart; + u8 reserved2[6]; + + u64 efer; + + /* + * Two fields below are implemented on AMD only, to store + * SVM guest vmcb address if the #SMI was received while in the guest mode. + */ + u64 svm_guest_flag; + u64 svm_guest_vmcb_gpa; + u64 svm_guest_virtual_int; /* unknown purpose, not implemented */ + + u32 reserved3[3]; + u32 smm_revison; + u32 smbase; + u32 reserved4[5]; + + /* ssp and svm_* fields below are not implemented by KVM */ + u64 ssp; + u64 svm_guest_pat; + u64 svm_host_efer; + u64 svm_host_cr4; + u64 svm_host_cr3; + u64 svm_host_cr0; + + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */ +}; + +union kvm_smram { + struct kvm_smram_state_64 smram64; + struct kvm_smram_state_32 smram32; + u8 bytes[512]; +}; + static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { kvm_make_request(KVM_REQ_SMI, vcpu); From 58c1d206d545464f9051ad080674b719d553215b Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:35 +0300 Subject: [PATCH 1191/4122] KVM: x86: smm: use smram structs in the common code Use kvm_smram union instad of raw arrays in the common smm code. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-18-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++-- arch/x86/kvm/smm.c | 27 ++++++++++++++------------- arch/x86/kvm/svm/svm.c | 8 ++++++-- arch/x86/kvm/vmx/vmx.c | 4 ++-- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e9862eaac43b..444386905632 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -206,6 +206,7 @@ typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; +union kvm_smram; enum x86_intercept; enum x86_intercept_stage; @@ -1616,8 +1617,8 @@ struct kvm_x86_ops { #ifdef CONFIG_KVM_SMM int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); - int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); + int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); + int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 2e6ec79a581e..ba2733e535fd 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -295,17 +295,18 @@ void enter_smm(struct kvm_vcpu *vcpu) struct kvm_segment cs, ds; struct desc_ptr dt; unsigned long cr0; - char buf[512]; + union kvm_smram smram; check_smram_offsets(); - memset(buf, 0, 512); + memset(smram.bytes, 0, sizeof(smram.bytes)); + #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - enter_smm_save_state_64(vcpu, buf); + enter_smm_save_state_64(vcpu, smram.bytes); else #endif - enter_smm_save_state_32(vcpu, buf); + enter_smm_save_state_32(vcpu, smram.bytes); /* * Give enter_smm() a chance to make ISA-specific changes to the vCPU @@ -315,12 +316,12 @@ void enter_smm(struct kvm_vcpu *vcpu) * Kill the VM in the unlikely case of failure, because the VM * can be in undefined state in this case. */ - if (static_call(kvm_x86_enter_smm)(vcpu, buf)) + if (static_call(kvm_x86_enter_smm)(vcpu, &smram)) goto error; kvm_smm_changed(vcpu, true); - if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf))) + if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram))) goto error; if (static_call(kvm_x86_get_nmi_mask)(vcpu)) @@ -480,7 +481,7 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, } static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, - const char *smstate) + u8 *smstate) { struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_segment desc; @@ -541,7 +542,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, - const char *smstate) + u8 *smstate) { struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_segment desc; @@ -612,13 +613,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; unsigned long cr0; - char buf[512]; + union kvm_smram smram; u64 smbase; int ret; smbase = vcpu->arch.smbase; - ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf)); + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram)); if (ret < 0) return X86EMUL_UNHANDLEABLE; @@ -675,13 +676,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) * state (e.g. enter guest mode) before loading state from the SMM * state-save area. */ - if (static_call(kvm_x86_leave_smm)(vcpu, buf)) + if (static_call(kvm_x86_leave_smm)(vcpu, &smram)) return X86EMUL_UNHANDLEABLE; #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - return rsm_load_state_64(ctxt, buf); + return rsm_load_state_64(ctxt, smram.bytes); else #endif - return rsm_load_state_32(ctxt, buf); + return rsm_load_state_32(ctxt, smram.bytes); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d28de3e59f7f..9d08214031d2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4401,12 +4401,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return 1; } -static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map_save; int ret; + char *smstate = (char *)smram; + if (!is_guest_mode(vcpu)) return 0; @@ -4448,7 +4450,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map, map_save; @@ -4456,6 +4458,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) struct vmcb *vmcb12; int ret; + const char *smstate = (const char *)smram; + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) return 0; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6be991b29bb7..aca88524fd1e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7941,7 +7941,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !is_smm(vcpu); } -static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7962,7 +7962,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; From f34bdf4c1707cdc687db87965d08bb5a51300c58 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:36 +0300 Subject: [PATCH 1192/4122] KVM: x86: smm: use smram struct for 32 bit smram load/restore Use kvm_smram_state_32 struct to save/restore 32 bit SMM state (used when X86_FEATURE_LM is not present in the guest CPUID). Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-19-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.c | 157 ++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 95 deletions(-) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index ba2733e535fd..2c808d0a8e92 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -149,22 +149,17 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) return flags; } -static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, + struct kvm_smm_seg_state_32 *state, + u32 *selector, int n) { struct kvm_segment seg; - int offset; kvm_get_segment(vcpu, &seg, n); - PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - PUT_SMSTATE(u32, buf, offset + 8, seg.base); - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); - PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg)); + *selector = seg.selector; + state->base = seg.base; + state->limit = seg.limit; + state->flags = enter_smm_get_segment_flags(&seg); } #ifdef CONFIG_X86_64 @@ -185,54 +180,48 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) } #endif -static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, + struct kvm_smram_state_32 *smram) { struct desc_ptr dt; - struct kvm_segment seg; unsigned long val; int i; - PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); - PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); + smram->cr0 = kvm_read_cr0(vcpu); + smram->cr3 = kvm_read_cr3(vcpu); + smram->eflags = kvm_get_rflags(vcpu); + smram->eip = kvm_rip_read(vcpu); for (i = 0; i < 8; i++) - PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); + smram->gprs[i] = kvm_register_read_raw(vcpu, i); kvm_get_dr(vcpu, 6, &val); - PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val); + smram->dr6 = (u32)val; kvm_get_dr(vcpu, 7, &val); - PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val); + smram->dr7 = (u32)val; - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector); - PUT_SMSTATE(u32, buf, 0x7f64, seg.base); - PUT_SMSTATE(u32, buf, 0x7f60, seg.limit); - PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector); - PUT_SMSTATE(u32, buf, 0x7f80, seg.base); - PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit); - PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); + enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); + enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); static_call(kvm_x86_get_gdt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7f74, dt.address); - PUT_SMSTATE(u32, buf, 0x7f70, dt.size); + smram->gdtr.base = dt.address; + smram->gdtr.limit = dt.size; static_call(kvm_x86_get_idt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7f58, dt.address); - PUT_SMSTATE(u32, buf, 0x7f54, dt.size); + smram->idtr.base = dt.address; + smram->idtr.limit = dt.size; - for (i = 0; i < 6; i++) - enter_smm_save_seg_32(vcpu, buf, i); + enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); + enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); + enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); - PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); + enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); + enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); + enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); - /* revision id */ - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000); - PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase); + smram->cr4 = kvm_read_cr4(vcpu); + smram->smm_revision = 0x00020000; + smram->smbase = vcpu->arch.smbase; } #ifdef CONFIG_X86_64 @@ -306,7 +295,7 @@ void enter_smm(struct kvm_vcpu *vcpu) enter_smm_save_state_64(vcpu, smram.bytes); else #endif - enter_smm_save_state_32(vcpu, smram.bytes); + enter_smm_save_state_32(vcpu, &smram.smram32); /* * Give enter_smm() a chance to make ISA-specific changes to the vCPU @@ -398,21 +387,16 @@ static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) desc->padding = 0; } -static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate, - int n) +static int rsm_load_seg_32(struct kvm_vcpu *vcpu, + const struct kvm_smm_seg_state_32 *state, + u16 selector, int n) { struct kvm_segment desc; - int offset; - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); - desc.base = GET_SMSTATE(u32, smstate, offset + 8); - desc.limit = GET_SMSTATE(u32, smstate, offset + 4); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); + desc.selector = selector; + desc.base = state->base; + desc.limit = state->limit; + rsm_set_desc_flags(&desc, state->flags); kvm_set_segment(vcpu, &desc, n); return X86EMUL_CONTINUE; } @@ -481,63 +465,46 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, } static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, - u8 *smstate) + const struct kvm_smram_state_32 *smstate) { struct kvm_vcpu *vcpu = ctxt->vcpu; - struct kvm_segment desc; struct desc_ptr dt; - u32 val, cr0, cr3, cr4; int i; - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); + ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; + ctxt->_eip = smstate->eip; for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); + *reg_write(ctxt, i) = smstate->gprs[i]; - val = GET_SMSTATE(u32, smstate, 0x7fcc); - - if (kvm_set_dr(vcpu, 6, val)) + if (kvm_set_dr(vcpu, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; + if (kvm_set_dr(vcpu, 7, smstate->dr7)) return X86EMUL_UNHANDLEABLE; - val = GET_SMSTATE(u32, smstate, 0x7fc8); + rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); + rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); - if (kvm_set_dr(vcpu, 7, val)) - return X86EMUL_UNHANDLEABLE; - - desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4); - desc.base = GET_SMSTATE(u32, smstate, 0x7f64); - desc.limit = GET_SMSTATE(u32, smstate, 0x7f60); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); - kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); - - desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0); - desc.base = GET_SMSTATE(u32, smstate, 0x7f80); - desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); - kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f74); - dt.size = GET_SMSTATE(u32, smstate, 0x7f70); + dt.address = smstate->gdtr.base; + dt.size = smstate->gdtr.limit; static_call(kvm_x86_set_gdt)(vcpu, &dt); - dt.address = GET_SMSTATE(u32, smstate, 0x7f58); - dt.size = GET_SMSTATE(u32, smstate, 0x7f54); + dt.address = smstate->idtr.base; + dt.size = smstate->idtr.limit; static_call(kvm_x86_set_idt)(vcpu, &dt); - for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(vcpu, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } + rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES); + rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); + rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); - cr4 = GET_SMSTATE(u32, smstate, 0x7f14); + rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); + rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); + rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); - vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8); + vcpu->arch.smbase = smstate->smbase; - return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); + return rsm_enter_protected_mode(vcpu, smstate->cr0, + smstate->cr3, smstate->cr4); } #ifdef CONFIG_X86_64 @@ -684,5 +651,5 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) return rsm_load_state_64(ctxt, smram.bytes); else #endif - return rsm_load_state_32(ctxt, smram.bytes); + return rsm_load_state_32(ctxt, &smram.smram32); } From 8bcda1dee95ae88cade0ad671e0f4d371c005c4d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:37 +0300 Subject: [PATCH 1193/4122] KVM: x86: smm: use smram struct for 64 bit smram load/restore Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state (used when X86_FEATURE_LM is present in the guest CPUID, regardless of 32-bitness of the guest). Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-20-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.c | 155 +++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 91 deletions(-) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 2c808d0a8e92..e3ecb1f84168 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -163,20 +163,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, } #ifdef CONFIG_X86_64 -static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, + struct kvm_smm_seg_state_64 *state, + int n) { struct kvm_segment seg; - int offset; - u16 flags; kvm_get_segment(vcpu, &seg, n); - offset = 0x7e00 + n * 16; - - flags = enter_smm_get_segment_flags(&seg) >> 8; - PUT_SMSTATE(u16, buf, offset, seg.selector); - PUT_SMSTATE(u16, buf, offset + 2, flags); - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); - PUT_SMSTATE(u64, buf, offset + 8, seg.base); + state->selector = seg.selector; + state->attributes = enter_smm_get_segment_flags(&seg) >> 8; + state->limit = seg.limit; + state->base = seg.base; } #endif @@ -225,57 +222,52 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, } #ifdef CONFIG_X86_64 -static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, + struct kvm_smram_state_64 *smram) { struct desc_ptr dt; - struct kvm_segment seg; unsigned long val; int i; for (i = 0; i < 16; i++) - PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); + smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); + + smram->rip = kvm_rip_read(vcpu); + smram->rflags = kvm_get_rflags(vcpu); - PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu)); - PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); kvm_get_dr(vcpu, 6, &val); - PUT_SMSTATE(u64, buf, 0x7f68, val); + smram->dr6 = val; kvm_get_dr(vcpu, 7, &val); - PUT_SMSTATE(u64, buf, 0x7f60, val); + smram->dr7 = val; - PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); - PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); - PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); + smram->cr0 = kvm_read_cr0(vcpu); + smram->cr3 = kvm_read_cr3(vcpu); + smram->cr4 = kvm_read_cr4(vcpu); - PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase); + smram->smbase = vcpu->arch.smbase; + smram->smm_revison = 0x00020064; - /* revision id */ - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064); + smram->efer = vcpu->arch.efer; - PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - PUT_SMSTATE(u16, buf, 0x7e90, seg.selector); - PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); - PUT_SMSTATE(u32, buf, 0x7e94, seg.limit); - PUT_SMSTATE(u64, buf, 0x7e98, seg.base); + enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); static_call(kvm_x86_get_idt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7e84, dt.size); - PUT_SMSTATE(u64, buf, 0x7e88, dt.address); + smram->idtr.limit = dt.size; + smram->idtr.base = dt.address; - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - PUT_SMSTATE(u16, buf, 0x7e70, seg.selector); - PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); - PUT_SMSTATE(u32, buf, 0x7e74, seg.limit); - PUT_SMSTATE(u64, buf, 0x7e78, seg.base); + enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); static_call(kvm_x86_get_gdt)(vcpu, &dt); - PUT_SMSTATE(u32, buf, 0x7e64, dt.size); - PUT_SMSTATE(u64, buf, 0x7e68, dt.address); + smram->gdtr.limit = dt.size; + smram->gdtr.base = dt.address; - for (i = 0; i < 6; i++) - enter_smm_save_seg_64(vcpu, buf, i); + enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); + enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); + enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); + enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); + enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); + enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); } #endif @@ -292,7 +284,7 @@ void enter_smm(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - enter_smm_save_state_64(vcpu, smram.bytes); + enter_smm_save_state_64(vcpu, &smram.smram64); else #endif enter_smm_save_state_32(vcpu, &smram.smram32); @@ -402,18 +394,17 @@ static int rsm_load_seg_32(struct kvm_vcpu *vcpu, } #ifdef CONFIG_X86_64 -static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate, + +static int rsm_load_seg_64(struct kvm_vcpu *vcpu, + const struct kvm_smm_seg_state_64 *state, int n) { struct kvm_segment desc; - int offset; - offset = 0x7e00 + n * 16; - - desc.selector = GET_SMSTATE(u16, smstate, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); - desc.limit = GET_SMSTATE(u32, smstate, offset + 4); - desc.base = GET_SMSTATE(u64, smstate, offset + 8); + desc.selector = state->selector; + rsm_set_desc_flags(&desc, state->attributes << 8); + desc.limit = state->limit; + desc.base = state->base; kvm_set_segment(vcpu, &desc, n); return X86EMUL_CONTINUE; } @@ -509,68 +500,50 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, - u8 *smstate) + const struct kvm_smram_state_64 *smstate) { struct kvm_vcpu *vcpu = ctxt->vcpu; - struct kvm_segment desc; struct desc_ptr dt; - u64 val, cr0, cr3, cr4; int i, r; for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); + *reg_write(ctxt, i) = smstate->gprs[15 - i]; - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; + ctxt->_eip = smstate->rip; + ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; - val = GET_SMSTATE(u64, smstate, 0x7f68); - - if (kvm_set_dr(vcpu, 6, val)) + if (kvm_set_dr(vcpu, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; + if (kvm_set_dr(vcpu, 7, smstate->dr7)) return X86EMUL_UNHANDLEABLE; - val = GET_SMSTATE(u64, smstate, 0x7f60); + vcpu->arch.smbase = smstate->smbase; - if (kvm_set_dr(vcpu, 7, val)) + if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) return X86EMUL_UNHANDLEABLE; - cr0 = GET_SMSTATE(u64, smstate, 0x7f58); - cr3 = GET_SMSTATE(u64, smstate, 0x7f50); - cr4 = GET_SMSTATE(u64, smstate, 0x7f48); - vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00); - val = GET_SMSTATE(u64, smstate, 0x7ed0); + rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); - if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA)) - return X86EMUL_UNHANDLEABLE; - - desc.selector = GET_SMSTATE(u32, smstate, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); - desc.limit = GET_SMSTATE(u32, smstate, 0x7e94); - desc.base = GET_SMSTATE(u64, smstate, 0x7e98); - kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e84); - dt.address = GET_SMSTATE(u64, smstate, 0x7e88); + dt.size = smstate->idtr.limit; + dt.address = smstate->idtr.base; static_call(kvm_x86_set_idt)(vcpu, &dt); - desc.selector = GET_SMSTATE(u32, smstate, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); - desc.limit = GET_SMSTATE(u32, smstate, 0x7e74); - desc.base = GET_SMSTATE(u64, smstate, 0x7e78); - kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); + rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR); - dt.size = GET_SMSTATE(u32, smstate, 0x7e64); - dt.address = GET_SMSTATE(u64, smstate, 0x7e68); + dt.size = smstate->gdtr.limit; + dt.address = smstate->gdtr.base; static_call(kvm_x86_set_gdt)(vcpu, &dt); - r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); + r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); if (r != X86EMUL_CONTINUE) return r; - for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(vcpu, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } + rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES); + rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS); + rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS); + rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS); + rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); + rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); return X86EMUL_CONTINUE; } @@ -648,7 +621,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - return rsm_load_state_64(ctxt, smram.bytes); + return rsm_load_state_64(ctxt, &smram.smram64); else #endif return rsm_load_state_32(ctxt, &smram.smram32); From e6a82199b610d843d810bc90d1f5df667906c402 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:38 +0300 Subject: [PATCH 1194/4122] KVM: svm: drop explicit return value of kvm_vcpu_map if kvm_vcpu_map returns non zero value, error path should be triggered regardless of the exact returned error value. Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-21-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9d08214031d2..dfcdca3f538b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4437,8 +4437,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) * that, see svm_prepare_switch_to_guest()) which must be * preserved. */ - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), - &map_save) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) return 1; BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); @@ -4475,11 +4474,11 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) return 1; vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) return 1; ret = 1; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) goto unmap_map; if (svm_allocate_nested(svm)) From dd5045fed588b3e7ac0a4546138b2fe16d5d0fff Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:39 +0300 Subject: [PATCH 1195/4122] KVM: x86: SVM: use smram structs Use SMM structs in the SVM code as well, which removes the last user of put_smstate/GET_SMSTATE so remove these macros as well. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-22-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.h | 6 ------ arch/x86/kvm/svm/svm.c | 21 +++++++-------------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index b66da263ec82..520217467ac2 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -4,12 +4,6 @@ #include -#define GET_SMSTATE(type, buf, offset) \ - (*(type *)((buf) + (offset) - 0x7e00)) - -#define PUT_SMSTATE(type, buf, offset, val) \ - *(type *)((buf) + (offset) - 0x7e00) = val - #ifdef CONFIG_KVM_SMM diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dfcdca3f538b..2be8050bf981 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4407,15 +4407,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) struct kvm_host_map map_save; int ret; - char *smstate = (char *)smram; - if (!is_guest_mode(vcpu)) return 0; - /* FED8h - SVM Guest */ - PUT_SMSTATE(u64, smstate, 0x7ed8, 1); - /* FEE0h - SVM Guest VMCB Physical Address */ - PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); + smram->smram64.svm_guest_flag = 1; + smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; @@ -4453,28 +4449,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map, map_save; - u64 saved_efer, vmcb12_gpa; struct vmcb *vmcb12; int ret; - const char *smstate = (const char *)smram; + const struct kvm_smram_state_64 *smram64 = &smram->smram64; if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) return 0; /* Non-zero if SMI arrived while vCPU was in guest mode. */ - if (!GET_SMSTATE(u64, smstate, 0x7ed8)) + if (!smram64->svm_guest_flag) return 0; if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return 1; - saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); - if (!(saved_efer & EFER_SVME)) + if (!(smram64->efer & EFER_SVME)) return 1; - vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map)) return 1; ret = 1; @@ -4500,7 +4493,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); + ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); if (ret) goto unmap_save; From 95504c7c981b3260b3b238ace03f3519bd9a0b6d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:40 +0300 Subject: [PATCH 1196/4122] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not long mode capable When the guest CPUID doesn't have support for long mode, 32 bit SMRAM layout is used and it has no support for preserving EFER and/or SVM state. Note that this isn't relevant to running 32 bit guests on VM which is long mode capable - such VM can still run 32 bit guests in compatibility mode. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-23-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2be8050bf981..527f18d8cc44 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4410,6 +4410,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) if (!is_guest_mode(vcpu)) return 0; + /* + * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is + * responsible for ensuring nested SVM and SMIs are mutually exclusive. + */ + + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return 1; + smram->smram64.svm_guest_flag = 1; smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; From fb28875fd7da184079150295da7ee8d80a70917e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:41 +0300 Subject: [PATCH 1197/4122] KVM: x86: smm: preserve interrupt shadow in SMRAM When #SMI is asserted, the CPU can be in interrupt shadow due to sti or mov ss. It is not mandatory in Intel/AMD prm to have the #SMI blocked during the shadow, and on top of that, since neither SVM nor VMX has true support for SMI window, waiting for one instruction would mean single stepping the guest. Instead, allow #SMI in this case, but both reset the interrupt window and stash its value in SMRAM to restore it on exit from SMM. This fixes rare failures seen mostly on windows guests on VMX, when #SMI falls on the sti instruction which mainfest in VM entry failure due to EFLAGS.IF not being set, but STI interrupt window still being set in the VMCS. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-24-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/smm.c | 29 +++++++++++++++++++++++++---- arch/x86/kvm/smm.h | 8 ++++++-- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index e3ecb1f84168..a9c1c2af8d94 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -27,7 +27,9 @@ static void check_smram_offsets(void) CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C); CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10); CHECK_SMRAM32_OFFSET(cr4, 0xFF14); - CHECK_SMRAM32_OFFSET(reserved3, 0xFF18); + CHECK_SMRAM32_OFFSET(reserved2, 0xFF18); + CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A); + CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B); CHECK_SMRAM32_OFFSET(ds, 0xFF2C); CHECK_SMRAM32_OFFSET(fs, 0xFF38); CHECK_SMRAM32_OFFSET(gs, 0xFF44); @@ -73,7 +75,9 @@ static void check_smram_offsets(void) CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); - CHECK_SMRAM64_OFFSET(reserved2, 0xFECA); + CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA); + CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB); + CHECK_SMRAM64_OFFSET(reserved2, 0xFECC); CHECK_SMRAM64_OFFSET(efer, 0xFED0); CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); @@ -219,6 +223,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, smram->cr4 = kvm_read_cr4(vcpu); smram->smm_revision = 0x00020000; smram->smbase = vcpu->arch.smbase; + + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); } #ifdef CONFIG_X86_64 @@ -268,6 +274,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); + + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); } #endif @@ -313,6 +321,8 @@ void enter_smm(struct kvm_vcpu *vcpu) kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0x8000); + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); static_call(kvm_x86_set_cr0)(vcpu, cr0); vcpu->arch.cr0 = cr0; @@ -460,7 +470,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, { struct kvm_vcpu *vcpu = ctxt->vcpu; struct desc_ptr dt; - int i; + int i, r; ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; ctxt->_eip = smstate->eip; @@ -494,8 +504,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, vcpu->arch.smbase = smstate->smbase; - return rsm_enter_protected_mode(vcpu, smstate->cr0, + r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); + + if (r != X86EMUL_CONTINUE) + return r; + + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + ctxt->interruptibility = (u8)smstate->int_shadow; + + return r; } #ifdef CONFIG_X86_64 @@ -545,6 +563,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + ctxt->interruptibility = (u8)smstate->int_shadow; + return X86EMUL_CONTINUE; } #endif diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index 520217467ac2..a1cf2ac5bd78 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -31,7 +31,9 @@ struct kvm_smram_state_32 { u32 cr4; /* A20M#, CPL, shutdown and other reserved/undocumented fields */ - u32 reserved3[5]; + u16 reserved2; + u8 int_shadow; /* KVM extension */ + u8 reserved3[17]; struct kvm_smm_seg_state_32 ds; struct kvm_smm_seg_state_32 fs; @@ -95,7 +97,9 @@ struct kvm_smram_state_64 { u32 reserved1; u8 io_inst_restart; u8 auto_hlt_restart; - u8 reserved2[6]; + u8 amd_nmi_mask; /* Documented in AMD BKDG as NMI mask, not used by KVM */ + u8 int_shadow; + u32 reserved2; u64 efer; From 93c5c61d9e58a9ea423439d358c198be5b674a58 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:06 -0400 Subject: [PATCH 1198/4122] mm/gup: Add FOLL_INTERRUPTIBLE We have had FAULT_FLAG_INTERRUPTIBLE but it was never applied to GUPs. One issue with it is that not all GUP paths are able to handle signal delivers besides SIGKILL. That's not ideal for the GUP users who are actually able to handle these cases, like KVM. KVM uses GUP extensively on faulting guest pages, during which we've got existing infrastructures to retry a page fault at a later time. Allowing the GUP to be interrupted by generic signals can make KVM related threads to be more responsive. For examples: (1) SIGUSR1: which QEMU/KVM uses to deliver an inter-process IPI, e.g. when the admin issues a vm_stop QMP command, SIGUSR1 can be generated to kick the vcpus out of kernel context immediately, (2) SIGINT: which can be used with interactive hypervisor users to stop a virtual machine with Ctrl-C without any delays/hangs, (3) SIGTRAP: which grants GDB capability even during page faults that are stuck for a long time. Normally hypervisor will be able to receive these signals properly, but not if we're stuck in a GUP for a long time for whatever reason. It happens easily with a stucked postcopy migration when e.g. a network temp failure happens, then some vcpu threads can hang death waiting for the pages. With the new FOLL_INTERRUPTIBLE, we can allow GUP users like KVM to selectively enable the ability to trap these signals. Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Signed-off-by: Peter Xu Message-Id: <20221011195809.557016-2-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mm.h | 1 + mm/gup.c | 33 +++++++++++++++++++++++++++++---- mm/hugetlb.c | 5 ++++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..3c84f4e48cd7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ +#define FOLL_INTERRUPTIBLE 0x100000 /* allow interrupts from generic signals */ /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each diff --git a/mm/gup.c b/mm/gup.c index fe195d47de74..90e372352e82 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -989,8 +989,17 @@ static int faultin_page(struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) fault_flags |= FAULT_FLAG_REMOTE; - if (locked) + if (locked) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + /* + * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set + * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE. + * That's because some callers may not be prepared to + * handle early exits caused by non-fatal signals. + */ + if (*flags & FOLL_INTERRUPTIBLE) + fault_flags |= FAULT_FLAG_INTERRUPTIBLE; + } if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; if (*flags & FOLL_TRIED) { @@ -1391,6 +1400,22 @@ retry: } EXPORT_SYMBOL_GPL(fixup_user_fault); +/* + * GUP always responds to fatal signals. When FOLL_INTERRUPTIBLE is + * specified, it'll also respond to generic signals. The caller of GUP + * that has FOLL_INTERRUPTIBLE should take care of the GUP interruption. + */ +static bool gup_signal_pending(unsigned int flags) +{ + if (fatal_signal_pending(current)) + return true; + + if (!(flags & FOLL_INTERRUPTIBLE)) + return false; + + return signal_pending(current); +} + /* * Please note that this function, unlike __get_user_pages will not * return 0 for nr_pages > 0 without FOLL_NOWAIT @@ -1472,11 +1497,11 @@ retry: * Repeat on the address that fired VM_FAULT_RETRY * with both FAULT_FLAG_ALLOW_RETRY and * FAULT_FLAG_TRIED. Note that GUP can be interrupted - * by fatal signals, so we need to check it before we + * by fatal signals of even common signals, depending on + * the caller's request. So we need to check it before we * start trying again otherwise it can loop forever. */ - - if (fatal_signal_pending(current)) { + if (gup_signal_pending(flags)) { if (!pages_done) pages_done = -EINTR; break; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..b5ed54f760bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6285,9 +6285,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_WRITE; else if (unshare) fault_flags |= FAULT_FLAG_UNSHARE; - if (locked) + if (locked) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (flags & FOLL_INTERRUPTIBLE) + fault_flags |= FAULT_FLAG_INTERRUPTIBLE; + } if (flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; From fe5ed56c79733b7808f968567c581118ab79552e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:07 -0400 Subject: [PATCH 1199/4122] kvm: Add KVM_PFN_ERR_SIGPENDING Add a new pfn error to show that we've got a pending signal to handle during hva_to_pfn_slow() procedure (of -EINTR retval). Signed-off-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20221011195809.557016-3-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/kvm_main.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..911b064878df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -96,6 +96,7 @@ #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) +#define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) /* * error pfns indicate that the gfn is in slot but faild to @@ -106,6 +107,15 @@ static inline bool is_error_pfn(kvm_pfn_t pfn) return !!(pfn & KVM_PFN_ERR_MASK); } +/* + * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted + * by a pending signal. Note, the signal may or may not be fatal. + */ +static inline bool is_sigpending_pfn(kvm_pfn_t pfn) +{ + return pfn == KVM_PFN_ERR_SIGPENDING; +} + /* * error_noslot pfns indicate that the gfn can not be * translated to pfn - it is not in slot or failed to diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25d7872b29c1..558f52dbebbd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2667,6 +2667,8 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); if (npages == 1) return pfn; + if (npages == -EINTR) + return KVM_PFN_ERR_SIGPENDING; mmap_read_lock(current->mm); if (npages == -EHWPOISON || From c8b88b332bedf47a9aa008dfb69998c90623375c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:08 -0400 Subject: [PATCH 1200/4122] kvm: Add interruptible flag to __gfn_to_pfn_memslot() Add a new "interruptible" flag showing that the caller is willing to be interrupted by signals during the __gfn_to_pfn_memslot() request. Wire it up with a FOLL_INTERRUPTIBLE flag that we've just introduced. This prepares KVM to be able to respond to SIGUSR1 (for QEMU that's the SIGIPI) even during e.g. handling an userfaultfd page fault. No functional change intended. Signed-off-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20221011195809.557016-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/mmu.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 4 ++-- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 28 ++++++++++++++++---------- virt/kvm/kvm_mm.h | 4 ++-- virt/kvm/pfncache.c | 2 +- 8 files changed, 27 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..f154d4a7fae0 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1239,7 +1239,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, write_fault, &writable, NULL); if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(hva, vma_shift); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index e9744b41a226..4939f57b6f6a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -598,7 +598,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, write_ok = true; } else { /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, writing, &write_ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5d5e12f3bf86..9d3743ca16d5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -846,7 +846,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long pfn; /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f8c92a4a35fa..0bbfb33fa735 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4170,7 +4170,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } async = false; - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async, + fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, fault->write, &fault->map_writable, &fault->hva); if (!async) @@ -4187,7 +4187,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } } - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL, + fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, NULL, fault->write, &fault->map_writable, &fault->hva); return RET_PF_CONTINUE; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 911b064878df..8fe4665bd020 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1150,8 +1150,8 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool *async, bool write_fault, - bool *writable, hva_t *hva); + bool atomic, bool interruptible, bool *async, + bool write_fault, bool *writable, hva_t *hva); void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 558f52dbebbd..43bbe4fde078 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2514,7 +2514,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, * 1 indicates success, -errno is returned if error is detected. */ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, - bool *writable, kvm_pfn_t *pfn) + bool interruptible, bool *writable, kvm_pfn_t *pfn) { unsigned int flags = FOLL_HWPOISON; struct page *page; @@ -2529,6 +2529,8 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, flags |= FOLL_WRITE; if (async) flags |= FOLL_NOWAIT; + if (interruptible) + flags |= FOLL_INTERRUPTIBLE; npages = get_user_pages_unlocked(addr, 1, &page, flags); if (npages != 1) @@ -2638,6 +2640,7 @@ out: * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest * @atomic: whether this function can sleep + * @interruptible: whether the process can be interrupted by non-fatal signals * @async: whether this function need to wait IO complete if the * host page is not in the memory * @write_fault: whether we should get a writable host page @@ -2648,8 +2651,8 @@ out: * 2): @write_fault = false && @writable, @writable will tell the caller * whether the mapping is writable. */ -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable) +kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, + bool *async, bool write_fault, bool *writable) { struct vm_area_struct *vma; kvm_pfn_t pfn; @@ -2664,7 +2667,8 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, if (atomic) return KVM_PFN_ERR_FAULT; - npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); + npages = hva_to_pfn_slow(addr, async, write_fault, interruptible, + writable, &pfn); if (npages == 1) return pfn; if (npages == -EINTR) @@ -2699,8 +2703,8 @@ exit: } kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool *async, bool write_fault, - bool *writable, hva_t *hva) + bool atomic, bool interruptible, bool *async, + bool write_fault, bool *writable, hva_t *hva) { unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); @@ -2725,7 +2729,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, writable = NULL; } - return hva_to_pfn(addr, atomic, async, write_fault, + return hva_to_pfn(addr, atomic, interruptible, async, write_fault, writable); } EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); @@ -2733,20 +2737,22 @@ EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable) { - return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL, - write_fault, writable, NULL); + return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false, + NULL, write_fault, writable, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { - return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL); + return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true, + NULL, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn) { - return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL); + return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true, + NULL, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 41da467d99c9..a1ab15006af3 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -24,8 +24,8 @@ #define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock) #endif /* KVM_HAVE_MMU_RWLOCK */ -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable); +kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, + bool *async, bool write_fault, bool *writable); #ifdef CONFIG_HAVE_KVM_PFNCACHE void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 346e47f15572..bd4a46aee384 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -185,7 +185,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) } /* We always request a writeable mapping */ - new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL); + new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); if (is_error_noslot_pfn(new_pfn)) goto out_error; From 766576874b9732ad6a65595296de351c064b4c0b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:59:47 -0400 Subject: [PATCH 1201/4122] kvm: x86: Allow to respond to generic signals during slow PF Enable x86 slow page faults to be able to respond to non-fatal signals, returning -EINTR properly when it happens. Signed-off-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20221011195947.557281-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0bbfb33fa735..5d662b43a63e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3149,8 +3149,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); } -static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) +static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) { + if (is_sigpending_pfn(pfn)) { + kvm_handle_signal_exit(vcpu); + return -EINTR; + } + /* * Do not cache the mmio info caused by writing the readonly gfn * into the spte otherwise read access on readonly gfn also can @@ -3172,7 +3177,7 @@ static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fau { /* The pfn is invalid, report the error! */ if (unlikely(is_error_pfn(fault->pfn))) - return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); + return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn); if (unlikely(!fault->slot)) { gva_t gva = fault->is_tdp ? 0 : fault->addr; @@ -4187,7 +4192,12 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } } - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, NULL, + /* + * Allow gup to bail on pending non-fatal signals when it's also allowed + * to wait for IO. Note, gup always bails if it is unable to quickly + * get a page and a fatal signal, i.e. SIGKILL, is pending. + */ + fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, fault->write, &fault->map_writable, &fault->hva); return RET_PF_CONTINUE; From be83794210e7020fef98596f4513aafbed659cd1 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:21 +0000 Subject: [PATCH 1202/4122] KVM: x86: Disallow the use of KVM_MSR_FILTER_DEFAULT_ALLOW in the kernel Protect the kernel from using the flag KVM_MSR_FILTER_DEFAULT_ALLOW. Its value is 0, and using it incorrectly could have unintended consequences. E.g. prevent someone in the kernel from writing something like this. if (filter.flags & KVM_MSR_FILTER_DEFAULT_ALLOW) and getting confused when it doesn't work. It would be more ideal to remove this flag altogether, but userspace may already be using it, so protecting the kernel is all that can reasonably be done at this point. Suggested-by: Sean Christopherson Signed-off-by: Aaron Lewis Reviewed-by: Sean Christopherson Message-Id: <20220921151525.904162-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 46de10a809ec..73ad693aa653 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -222,7 +222,9 @@ struct kvm_msr_filter_range { #define KVM_MSR_FILTER_MAX_RANGES 16 struct kvm_msr_filter { +#ifndef __KERNEL__ #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#endif #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) __u32 flags; struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; From db205f7e1edc8d8f0880f0218d3a03b13fe94af3 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:22 +0000 Subject: [PATCH 1203/4122] KVM: x86: Add a VALID_MASK for the MSR exit reason flags Add the mask KVM_MSR_EXIT_REASON_VALID_MASK for the MSR exit reason flags. This simplifies checks that validate these flags, and makes it easier to introduce new flags in the future. No functional change intended. Signed-off-by: Aaron Lewis Message-Id: <20220921151525.904162-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 +--- include/uapi/linux/kvm.h | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cbec2e675c18..9ba8c1b73db4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6230,9 +6230,7 @@ split_irqchip_unlock: break; case KVM_CAP_X86_USER_SPACE_MSR: r = -EINVAL; - if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER)) + if (cap->args[0] & ~KVM_MSR_EXIT_REASON_VALID_MASK) break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..7fea12369245 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -485,6 +485,9 @@ struct kvm_run { #define KVM_MSR_EXIT_REASON_INVAL (1 << 0) #define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) #define KVM_MSR_EXIT_REASON_FILTER (1 << 2) +#define KVM_MSR_EXIT_REASON_VALID_MASK (KVM_MSR_EXIT_REASON_INVAL | \ + KVM_MSR_EXIT_REASON_UNKNOWN | \ + KVM_MSR_EXIT_REASON_FILTER) __u32 reason; /* kernel -> user */ __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ From c1340fe3590ebbe729294c728234434ef31a7c7a Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:23 +0000 Subject: [PATCH 1204/4122] KVM: x86: Add a VALID_MASK for the flag in kvm_msr_filter Add the mask KVM_MSR_FILTER_VALID_MASK for the flag in the struct kvm_msr_filter. This makes it easier to introduce new flags in the future. No functional change intended. Signed-off-by: Aaron Lewis Message-Id: <20220921151525.904162-4-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/x86.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 73ad693aa653..ae4324674c49 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -226,6 +226,7 @@ struct kvm_msr_filter { #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) #endif #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) +#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY) __u32 flags; struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ba8c1b73db4..5208b9501c88 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6441,7 +6441,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, int r = 0; u32 i; - if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK) return -EINVAL; for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) From 8aff460f216753d86ab90ddbcab0125ab2400335 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:24 +0000 Subject: [PATCH 1205/4122] KVM: x86: Add a VALID_MASK for the flags in kvm_msr_filter_range Add the mask KVM_MSR_FILTER_RANGE_VALID_MASK for the flags in the struct kvm_msr_filter_range. This simplifies checks that validate these flags, and makes it easier to introduce new flags in the future. No functional change intended. Signed-off-by: Aaron Lewis Message-Id: <20220921151525.904162-5-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 2 ++ arch/x86/kvm/x86.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ae4324674c49..c6df6b16a088 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -214,6 +214,8 @@ struct kvm_msr_list { struct kvm_msr_filter_range { #define KVM_MSR_FILTER_READ (1 << 0) #define KVM_MSR_FILTER_WRITE (1 << 1) +#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \ + KVM_MSR_FILTER_WRITE) __u32 flags; __u32 nmsrs; /* number of msrs in bitmap */ __u32 base; /* MSR index the bitmap starts at */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5208b9501c88..e46e458c5b08 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6407,7 +6407,7 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, if (!user_range->nmsrs) return 0; - if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) + if (user_range->flags & ~KVM_MSR_FILTER_RANGE_VALID_MASK) return -EINVAL; if (!user_range->flags) From f7d64772712350fd35f1d76d16dec030a81029eb Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:25 +0000 Subject: [PATCH 1206/4122] selftests: kvm/x86: Test the flags in MSR filtering and MSR exiting When using the flags in KVM_X86_SET_MSR_FILTER and KVM_CAP_X86_USER_SPACE_MSR it is expected that an attempt to write to any of the unused bits will fail. Add testing to walk over every bit in each of the flag fields in MSR filtering and MSR exiting to verify that unused bits return and error and used bits, i.e. valid bits, succeed. Signed-off-by: Aaron Lewis Message-Id: <20220921151525.904162-6-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- .../kvm/x86_64/userspace_msr_exit_test.c | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index a4f06370a245..fae95089e655 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -733,6 +733,89 @@ static void test_msr_permission_bitmap(void) kvm_vm_free(vm); } +#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ +({ \ + int r = __vm_ioctl(vm, cmd, arg); \ + \ + if (flag & valid_mask) \ + TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ + else \ + TEST_ASSERT(r == -1 && errno == EINVAL, \ + "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ + #cmd, flag, r, errno, strerror(errno)); \ +}) + +static void run_user_space_msr_flag_test(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; + int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + + for (i = 0; i < nflags; i++) { + cap.args[0] = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, + BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); + } +} + +static void run_msr_filter_flag_test(struct kvm_vm *vm) +{ + u64 deny_bits = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = 0, + .bitmap = (uint8_t *)&deny_bits, + }, + }, + }; + int nflags; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + nflags = sizeof(filter.flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); + } + + filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.ranges[0].flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); + } +} + +/* Test that attempts to write to the unused bits in a flag fails. */ +static void test_user_exit_msr_flags(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ + run_user_space_msr_flag_test(vm); + + /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ + run_msr_filter_flag_test(vm); + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { /* Tell stdout not to buffer its content */ @@ -744,5 +827,7 @@ int main(int argc, char *argv[]) test_msr_permission_bitmap(); + test_user_exit_msr_flags(); + return 0; } From 428e921611bcad9ab95078baf9abe14688de43f0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:11 +0000 Subject: [PATCH 1207/4122] KVM: x86/mmu: Tag disallowed NX huge pages even if they're not tracked Tag shadow pages that cannot be replaced with an NX huge page regardless of whether or not zapping the page would allow KVM to immediately create a huge page, e.g. because something else prevents creating a huge page. I.e. track pages that are disallowed from being NX huge pages regardless of whether or not the page could have been huge at the time of fault. KVM currently tracks pages that were disallowed from being huge due to the NX workaround if and only if the page could otherwise be huge. But that fails to handled the scenario where whatever restriction prevented KVM from installing a huge page goes away, e.g. if dirty logging is disabled, the host mapping level changes, etc... Failure to tag shadow pages appropriately could theoretically lead to false negatives, e.g. if a fetch fault requests a small page and thus isn't tracked, and a read/write fault later requests a huge page, KVM will not reject the huge page as it should. To avoid yet another flag, initialize the list_head and use list_empty() to determine whether or not a page is on the list of NX huge pages that should be recovered. Note, the TDP MMU accounting is still flawed as fixing the TDP MMU is more involved due to mmu_lock being held for read. This will be addressed in a future commit. Fixes: 5bcaf3e1715f ("KVM: x86/mmu: Account NX huge page disallowed iff huge page was requested") Signed-off-by: Sean Christopherson Message-Id: <20221019165618.927057-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 32 ++++++++++++++++++++++++-------- arch/x86/kvm/mmu/mmu_internal.h | 10 +++++++++- arch/x86/kvm/mmu/paging_tmpl.h | 6 +++--- arch/x86/kvm/mmu/tdp_mmu.c | 4 +++- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5d662b43a63e..989586e7dd86 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -803,15 +803,25 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); } -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible) { - if (sp->lpage_disallowed) + sp->lpage_disallowed = true; + + /* + * If it's possible to replace the shadow page with an NX huge page, + * i.e. if the shadow page is the only thing currently preventing KVM + * from using a huge page, add the shadow page to the list of "to be + * zapped for NX recovery" pages. Note, the shadow page can already be + * on the list if KVM is reusing an existing shadow page, i.e. if KVM + * links a shadow page at multiple points. + */ + if (!nx_huge_page_possible || !list_empty(&sp->lpage_disallowed_link)) return; ++kvm->stat.nx_lpage_splits; list_add_tail(&sp->lpage_disallowed_link, &kvm->arch.lpage_disallowed_mmu_pages); - sp->lpage_disallowed = true; } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -833,9 +843,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - --kvm->stat.nx_lpage_splits; sp->lpage_disallowed = false; - list_del(&sp->lpage_disallowed_link); + + if (list_empty(&sp->lpage_disallowed_link)) + return; + + --kvm->stat.nx_lpage_splits; + list_del_init(&sp->lpage_disallowed_link); } static struct kvm_memory_slot * @@ -2130,6 +2144,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + INIT_LIST_HEAD(&sp->lpage_disallowed_link); + /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() * depends on valid pages being added to the head of the list. See @@ -3127,9 +3143,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) continue; link_shadow_page(vcpu, it.sptep, sp); - if (fault->is_tdp && fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->is_tdp && fault->huge_page_disallowed) + account_huge_nx_page(vcpu->kvm, sp, + fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 582def531d4d..cca1ad75d096 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -100,6 +100,13 @@ struct kvm_mmu_page { }; }; + /* + * Tracks shadow pages that, if zapped, would allow KVM to create an NX + * huge page. A shadow page will have lpage_disallowed set but not be + * on the list if a huge page is disallowed for other reasons, e.g. + * because KVM is shadowing a PTE at the same gfn, the memslot isn't + * properly aligned, etc... + */ struct list_head lpage_disallowed_link; #ifdef CONFIG_X86_32 /* @@ -315,7 +322,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible); void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 5ab5f94dcb6f..8fd0c4e1e575 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -713,9 +713,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, continue; link_shadow_page(vcpu, it.sptep, sp); - if (fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->huge_page_disallowed) + account_huge_nx_page(vcpu->kvm, sp, + fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 672f0432d777..80a4a1a09131 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -284,6 +284,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role) { + INIT_LIST_HEAD(&sp->lpage_disallowed_link); + set_page_private(virt_to_page(sp->spt), (unsigned long)sp); sp->role = role; @@ -1141,7 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); if (account_nx) - account_huge_nx_page(kvm, sp); + account_huge_nx_page(kvm, sp, true); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); From 55c510e26ab6181c132327a8b90c864e6193ce27 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:12 +0000 Subject: [PATCH 1208/4122] KVM: x86/mmu: Rename NX huge pages fields/functions for consistency Rename most of the variables/functions involved in the NX huge page mitigation to provide consistency, e.g. lpage vs huge page, and NX huge vs huge NX, and also to provide clarity, e.g. to make it obvious the flag applies only to the NX huge page mitigation, not to any condition that prevents creating a huge page. Add a comment explaining what the newly named "possible_nx_huge_pages" tracks. Leave the nx_lpage_splits stat alone as the name is ABI and thus set in stone. Signed-off-by: Sean Christopherson Reviewed-by: Mingwei Zhang Message-Id: <20221019165618.927057-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 19 +++++++-- arch/x86/kvm/mmu/mmu.c | 71 +++++++++++++++++---------------- arch/x86/kvm/mmu/mmu_internal.h | 22 ++++++---- arch/x86/kvm/mmu/paging_tmpl.h | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 8 ++-- 5 files changed, 71 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 444386905632..9030e6263f95 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1159,7 +1159,18 @@ struct kvm_arch { struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; - struct list_head lpage_disallowed_mmu_pages; + /* + * A list of kvm_mmu_page structs that, if zapped, could possibly be + * replaced by an NX huge page. A shadow page is on this list if its + * existence disallows an NX huge page (nx_huge_page_disallowed is set) + * and there are no other conditions that prevent a huge page, e.g. + * the backing host page is huge, dirtly logging is not enabled for its + * memslot, etc... Note, zapping shadow pages on this list doesn't + * guarantee an NX huge page will be created in its stead, e.g. if the + * guest attempts to execute from the region then KVM obviously can't + * create an NX huge page (without hanging the guest). + */ + struct list_head possible_nx_huge_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; /* @@ -1275,7 +1286,7 @@ struct kvm_arch { bool sgx_provisioning_allowed; struct kvm_pmu_event_filter __rcu *pmu_event_filter; - struct task_struct *nx_lpage_recovery_thread; + struct task_struct *nx_huge_page_recovery_thread; #ifdef CONFIG_X86_64 /* @@ -1320,8 +1331,8 @@ struct kvm_arch { * - tdp_mmu_roots (above) * - tdp_mmu_pages (above) * - the link field of kvm_mmu_page structs used by the TDP MMU - * - lpage_disallowed_mmu_pages - * - the lpage_disallowed_link field of kvm_mmu_page structs used + * - possible_nx_huge_pages; + * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * It is acceptable, but not necessary, to acquire this lock when * the thread holds the MMU lock in write mode. diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 989586e7dd86..09482ef4d832 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -803,10 +803,10 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); } -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp, +void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible) { - sp->lpage_disallowed = true; + sp->nx_huge_page_disallowed = true; /* * If it's possible to replace the shadow page with an NX huge page, @@ -816,12 +816,13 @@ void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp, * on the list if KVM is reusing an existing shadow page, i.e. if KVM * links a shadow page at multiple points. */ - if (!nx_huge_page_possible || !list_empty(&sp->lpage_disallowed_link)) + if (!nx_huge_page_possible || + !list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; - list_add_tail(&sp->lpage_disallowed_link, - &kvm->arch.lpage_disallowed_mmu_pages); + list_add_tail(&sp->possible_nx_huge_page_link, + &kvm->arch.possible_nx_huge_pages); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -841,15 +842,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - sp->lpage_disallowed = false; + sp->nx_huge_page_disallowed = false; - if (list_empty(&sp->lpage_disallowed_link)) + if (list_empty(&sp->possible_nx_huge_page_link)) return; --kvm->stat.nx_lpage_splits; - list_del_init(&sp->lpage_disallowed_link); + list_del_init(&sp->possible_nx_huge_page_link); } static struct kvm_memory_slot * @@ -2144,7 +2145,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); - INIT_LIST_HEAD(&sp->lpage_disallowed_link); + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() @@ -2503,8 +2504,8 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, zapped_root = !is_obsolete_sp(kvm, sp); } - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + unaccount_nx_huge_page(kvm, sp); sp->role.invalid = 1; @@ -3144,7 +3145,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) link_shadow_page(vcpu, it.sptep, sp); if (fault->is_tdp && fault->huge_page_disallowed) - account_huge_nx_page(vcpu->kvm, sp, + account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } @@ -5998,7 +5999,7 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); - INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); r = kvm_mmu_init_tdp_mmu(kvm); @@ -6683,7 +6684,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); - wake_up_process(kvm->arch.nx_lpage_recovery_thread); + wake_up_process(kvm->arch.nx_huge_page_recovery_thread); } mutex_unlock(&kvm_lock); } @@ -6815,7 +6816,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - wake_up_process(kvm->arch.nx_lpage_recovery_thread); + wake_up_process(kvm->arch.nx_huge_page_recovery_thread); mutex_unlock(&kvm_lock); } @@ -6823,7 +6824,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel return err; } -static void kvm_recover_nx_lpages(struct kvm *kvm) +static void kvm_recover_nx_huge_pages(struct kvm *kvm) { unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; int rcu_idx; @@ -6846,23 +6847,25 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) ratio = READ_ONCE(nx_huge_pages_recovery_ratio); to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { - if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) + if (list_empty(&kvm->arch.possible_nx_huge_pages)) break; /* * We use a separate list instead of just using active_mmu_pages - * because the number of lpage_disallowed pages is expected to - * be relatively small compared to the total. + * because the number of shadow pages that be replaced with an + * NX huge page is expected to be relatively small compared to + * the total number of shadow pages. And because the TDP MMU + * doesn't use active_mmu_pages. */ - sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, struct kvm_mmu_page, - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); + possible_nx_huge_page_link); + WARN_ON_ONCE(!sp->nx_huge_page_disallowed); if (is_tdp_mmu_page(sp)) { flush |= kvm_tdp_mmu_zap_sp(kvm, sp); } else { kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->lpage_disallowed); + WARN_ON_ONCE(sp->nx_huge_page_disallowed); } if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { @@ -6883,7 +6886,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, rcu_idx); } -static long get_nx_lpage_recovery_timeout(u64 start_time) +static long get_nx_huge_page_recovery_timeout(u64 start_time) { bool enabled; uint period; @@ -6894,19 +6897,19 @@ static long get_nx_lpage_recovery_timeout(u64 start_time) : MAX_SCHEDULE_TIMEOUT; } -static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) +static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data) { u64 start_time; long remaining_time; while (true) { start_time = get_jiffies_64(); - remaining_time = get_nx_lpage_recovery_timeout(start_time); + remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop() && remaining_time > 0) { schedule_timeout(remaining_time); - remaining_time = get_nx_lpage_recovery_timeout(start_time); + remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); } @@ -6915,7 +6918,7 @@ static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) if (kthread_should_stop()) return 0; - kvm_recover_nx_lpages(kvm); + kvm_recover_nx_huge_pages(kvm); } } @@ -6923,17 +6926,17 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) { int err; - err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, + err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0, "kvm-nx-lpage-recovery", - &kvm->arch.nx_lpage_recovery_thread); + &kvm->arch.nx_huge_page_recovery_thread); if (!err) - kthread_unpark(kvm->arch.nx_lpage_recovery_thread); + kthread_unpark(kvm->arch.nx_huge_page_recovery_thread); return err; } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) { - if (kvm->arch.nx_lpage_recovery_thread) - kthread_stop(kvm->arch.nx_lpage_recovery_thread); + if (kvm->arch.nx_huge_page_recovery_thread) + kthread_stop(kvm->arch.nx_huge_page_recovery_thread); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index cca1ad75d096..67879459a25c 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -57,7 +57,13 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; u8 mmu_valid_gen; - bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + + /* + * The shadow page can't be replaced by an equivalent huge page + * because it is being used to map an executable page in the guest + * and the NX huge page mitigation is enabled. + */ + bool nx_huge_page_disallowed; /* * The following two entries are used to key the shadow page in the @@ -102,12 +108,12 @@ struct kvm_mmu_page { /* * Tracks shadow pages that, if zapped, would allow KVM to create an NX - * huge page. A shadow page will have lpage_disallowed set but not be - * on the list if a huge page is disallowed for other reasons, e.g. - * because KVM is shadowing a PTE at the same gfn, the memslot isn't - * properly aligned, etc... + * huge page. A shadow page will have nx_huge_page_disallowed set but + * not be on the list if a huge page is disallowed for other reasons, + * e.g. because KVM is shadowing a PTE at the same gfn, the memslot + * isn't properly aligned, etc... */ - struct list_head lpage_disallowed_link; + struct list_head possible_nx_huge_page_link; #ifdef CONFIG_X86_32 /* * Used out of the mmu-lock to avoid reading spte values while an @@ -322,8 +328,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp, +void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible); -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 8fd0c4e1e575..0f6455072055 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -714,7 +714,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, link_shadow_page(vcpu, it.sptep, sp); if (fault->huge_page_disallowed) - account_huge_nx_page(vcpu->kvm, sp, + account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 80a4a1a09131..73eb28ed1f03 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -284,7 +284,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role) { - INIT_LIST_HEAD(&sp->lpage_disallowed_link); + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); @@ -403,8 +403,8 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, lockdep_assert_held_write(&kvm->mmu_lock); list_del(&sp->link); - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + unaccount_nx_huge_page(kvm, sp); if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1143,7 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); if (account_nx) - account_huge_nx_page(kvm, sp, true); + account_nx_huge_page(kvm, sp, true); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); From b5b0977f4aa28ef2166894b05f37d8f8028a76ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:13 +0000 Subject: [PATCH 1209/4122] KVM: x86/mmu: Properly account NX huge page workaround for nonpaging MMUs Account and track NX huge pages for nonpaging MMUs so that a future enhancement to precisely check if a shadow page can't be replaced by a NX huge page doesn't get false positives. Without correct tracking, KVM can get stuck in a loop if an instruction is fetching and writing data on the same huge page, e.g. KVM installs a small executable page on the fetch fault, replaces it with an NX huge page on the write fault, and faults again on the fetch. Alternatively, and perhaps ideally, KVM would simply not enforce the workaround for nonpaging MMUs. The guest has no page tables to abuse and KVM is guaranteed to switch to a different MMU on CR0.PG being toggled so there's no security or performance concerns. However, getting make_spte() to play nice now and in the future is unnecessarily complex. In the current code base, make_spte() can enforce the mitigation if TDP is enabled or the MMU is indirect, but make_spte() may not always have a vCPU/MMU to work with, e.g. if KVM were to support in-line huge page promotion when disabling dirty logging. Without a vCPU/MMU, KVM could either pass in the correct information and/or derive it from the shadow page, but the former is ugly and the latter subtly non-trivial due to the possibility of direct shadow pages in indirect MMUs. Given that using shadow paging with an unpaged guest is far from top priority _and_ has been subjected to the workaround since its inception, keep it simple and just fix the accounting glitch. Signed-off-by: Sean Christopherson Reviewed-by: David Matlack Reviewed-by: Mingwei Zhang Message-Id: <20221019165618.927057-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/spte.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 09482ef4d832..f11e4bbfc0bc 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3144,7 +3144,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) continue; link_shadow_page(vcpu, it.sptep, sp); - if (fault->is_tdp && fault->huge_page_disallowed) + if (fault->huge_page_disallowed) account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 2e08b2a45361..c0fd7e049b4e 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -161,6 +161,18 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!prefetch) spte |= spte_shadow_accessed_mask(spte); + /* + * For simplicity, enforce the NX huge page mitigation even if not + * strictly necessary. KVM could ignore the mitigation if paging is + * disabled in the guest, as the guest doesn't have an page tables to + * abuse. But to safely ignore the mitigation, KVM would have to + * ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG + * is toggled on, and that's a net negative for performance when TDP is + * enabled. When TDP is disabled, KVM will always switch to a new MMU + * when CR0.PG is toggled, but leveraging that to ignore the mitigation + * would tie make_spte() further to vCPU/MMU state, and add complexity + * just to optimize a mode that is anything but performance critical. + */ if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(vcpu->kvm)) { pte_access &= ~ACC_EXEC_MASK; From 61f94478547bb4fdcd4c4f37a0aa723d610a7422 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:14 +0000 Subject: [PATCH 1210/4122] KVM: x86/mmu: Set disallowed_nx_huge_page in TDP MMU before setting SPTE Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP visible to other readers, i.e. before setting its SPTE. This will allow KVM to query the flag when determining if a shadow page can be replaced by a NX huge page without violating the rules of the mitigation. Note, the shadow/legacy MMU holds mmu_lock for write, so it's impossible for another CPU to see a shadow page without an up-to-date nx_huge_page_disallowed, i.e. only the TDP MMU needs the complicated dance. Signed-off-by: Sean Christopherson Reviewed-by: David Matlack Reviewed-by: Yan Zhao Message-Id: <20221019165618.927057-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 28 +++++++++++++++++++--------- arch/x86/kvm/mmu/mmu_internal.h | 5 ++--- arch/x86/kvm/mmu/tdp_mmu.c | 31 ++++++++++++++++++------------- 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f11e4bbfc0bc..e384b78e099c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -803,11 +803,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); } -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool nx_huge_page_possible) +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - sp->nx_huge_page_disallowed = true; - /* * If it's possible to replace the shadow page with an NX huge page, * i.e. if the shadow page is the only thing currently preventing KVM @@ -816,8 +813,7 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, * on the list if KVM is reusing an existing shadow page, i.e. if KVM * links a shadow page at multiple points. */ - if (!nx_huge_page_possible || - !list_empty(&sp->possible_nx_huge_page_link)) + if (!list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; @@ -825,6 +821,15 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, &kvm->arch.possible_nx_huge_pages); } +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible) +{ + sp->nx_huge_page_disallowed = true; + + if (nx_huge_page_possible) + track_possible_nx_huge_page(kvm, sp); +} + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; @@ -842,10 +847,8 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - sp->nx_huge_page_disallowed = false; - if (list_empty(&sp->possible_nx_huge_page_link)) return; @@ -853,6 +856,13 @@ void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) list_del_init(&sp->possible_nx_huge_page_link); } +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + sp->nx_huge_page_disallowed = false; + + untrack_possible_nx_huge_page(kvm, sp); +} + static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 67879459a25c..22152241bd29 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -328,8 +328,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool nx_huge_page_possible); -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 73eb28ed1f03..059231c82345 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -403,8 +403,11 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, lockdep_assert_held_write(&kvm->mmu_lock); list_del(&sp->link); - if (sp->nx_huge_page_disallowed) - unaccount_nx_huge_page(kvm, sp); + + if (sp->nx_huge_page_disallowed) { + sp->nx_huge_page_disallowed = false; + untrack_possible_nx_huge_page(kvm, sp); + } if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1118,16 +1121,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set * @sp: The new TDP page table to install. - * @account_nx: True if this page table is being installed to split a - * non-executable huge page. * @shared: This operation is running under the MMU lock in read mode. * * Returns: 0 if the new page table was installed. Non-0 if the page table * could not be installed (e.g. the atomic compare-exchange failed). */ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *sp, bool account_nx, - bool shared) + struct kvm_mmu_page *sp, bool shared) { u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); int ret = 0; @@ -1142,8 +1142,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - if (account_nx) - account_nx_huge_page(kvm, sp, true); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); @@ -1157,6 +1155,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu *mmu = vcpu->arch.mmu; + struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; int ret; @@ -1193,9 +1192,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } if (!is_shadow_present_pte(iter.old_spte)) { - bool account_nx = fault->huge_page_disallowed && - fault->req_level >= iter.level; - /* * If SPTE has been frozen by another thread, just * give up and retry, avoiding unnecessary page table @@ -1207,10 +1203,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) sp = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_child_sp(sp, &iter); - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; + + if (tdp_mmu_link_sp(kvm, &iter, sp, true)) { tdp_mmu_free_sp(sp); break; } + + if (fault->huge_page_disallowed && + fault->req_level >= iter.level) { + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + track_possible_nx_huge_page(kvm, sp); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + } } } @@ -1498,7 +1503,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, * correctness standpoint since the translation will be the same either * way. */ - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); if (ret) goto out; From d25ceb9264364dca0683748b301340097fdab6c7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:15 +0000 Subject: [PATCH 1211/4122] KVM: x86/mmu: Track the number of TDP MMU pages, but not the actual pages Track the number of TDP MMU "shadow" pages instead of tracking the pages themselves. With the NX huge page list manipulation moved out of the common linking flow, elminating the list-based tracking means the happy path of adding a shadow page doesn't need to acquire a spinlock and can instead inc/dec an atomic. Keep the tracking as the WARN during TDP MMU teardown on leaked shadow pages is very, very useful for detecting KVM bugs. Tracking the number of pages will also make it trivial to expose the counter to userspace as a stat in the future, which may or may not be desirable. Note, the TDP MMU needs to use a separate counter (and stat if that ever comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the TDP MMU consumes so few pages relative to shadow paging), and including TDP MMU pages in that counter would break both the shrinker and shadow MMUs, e.g. if a VM is using nested TDP. Cc: Yan Zhao Reviewed-by: Mingwei Zhang Reviewed-by: David Matlack Signed-off-by: Sean Christopherson Reviewed-by: Yan Zhao Message-Id: <20221019165618.927057-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 11 +++-------- arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++----------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9030e6263f95..9362b9736d87 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1298,6 +1298,9 @@ struct kvm_arch { */ bool tdp_mmu_enabled; + /* The number of TDP MMU pages across all roots. */ + atomic64_t tdp_mmu_pages; + /* * List of kvm_mmu_page structs being used as roots. * All kvm_mmu_page structs in the list should have @@ -1318,18 +1321,10 @@ struct kvm_arch { */ struct list_head tdp_mmu_roots; - /* - * List of kvm_mmu_page structs not being used as roots. - * All kvm_mmu_page structs in the list should have - * tdp_mmu_page set and a tdp_mmu_root_count of 0. - */ - struct list_head tdp_mmu_pages; - /* * Protects accesses to the following fields when the MMU lock * is held in read mode: * - tdp_mmu_roots (above) - * - tdp_mmu_pages (above) * - the link field of kvm_mmu_page structs used by the TDP MMU * - possible_nx_huge_pages; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 059231c82345..4e5b3ae824c1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); kvm->arch.tdp_mmu_zap_wq = wq; return 1; } @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, +1); + atomic64_inc(&kvm->arch.tdp_mmu_pages); } static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, -1); + atomic64_dec(&kvm->arch.tdp_mmu_pages); } /** @@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, bool shared) { tdp_unaccount_mmu_page(kvm, sp); + + if (!sp->nx_huge_page_disallowed) + return; + if (shared) spin_lock(&kvm->arch.tdp_mmu_pages_lock); else lockdep_assert_held_write(&kvm->mmu_lock); - list_del(&sp->link); - - if (sp->nx_huge_page_disallowed) { - sp->nx_huge_page_disallowed = false; - untrack_possible_nx_huge_page(kvm, sp); - } + sp->nx_huge_page_disallowed = false; + untrack_possible_nx_huge_page(kvm, sp); if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, tdp_mmu_set_spte(kvm, iter, spte); } - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); return 0; From 5e3edd7e8b7e8004c9bb8310fd669a9ca81de207 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:16 +0000 Subject: [PATCH 1212/4122] KVM: x86/mmu: Add helper to convert SPTE value to its shadow page Add a helper to convert a SPTE to its shadow page to deduplicate a variety of flows and hopefully avoid future bugs, e.g. if KVM attempts to get the shadow page for a SPTE without dropping high bits. Opportunistically add a comment in mmu_free_root_page() documenting why it treats the root HPA as a SPTE. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221019165618.927057-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 17 ++++++++++------- arch/x86/kvm/mmu/mmu_internal.h | 12 ------------ arch/x86/kvm/mmu/spte.h | 17 +++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.h | 2 ++ 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e384b78e099c..5f9b57c6e506 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1819,7 +1819,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, continue; } - child = to_shadow_page(ent & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(ent); if (child->unsync_children) { if (mmu_pages_add(pvec, child, i)) @@ -2378,7 +2378,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, * so we should update the spte at this point to get * a new sp with the correct access. */ - child = to_shadow_page(*sptep & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(*sptep); if (child->role.access == direct_access) return; @@ -2399,7 +2399,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, if (is_last_spte(pte, sp->role.level)) { drop_spte(kvm, spte); } else { - child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(pte); drop_parent_pte(child, spte); /* @@ -2838,7 +2838,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, struct kvm_mmu_page *child; u64 pte = *sptep; - child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(pte); drop_parent_pte(child, sptep); flush = true; } else if (pfn != spte_to_pfn(*sptep)) { @@ -3455,7 +3455,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, if (!VALID_PAGE(*root_hpa)) return; - sp = to_shadow_page(*root_hpa & SPTE_BASE_ADDR_MASK); + /* + * The "root" may be a special root, e.g. a PAE entry, treat it as a + * SPTE to ensure any non-PA bits are dropped. + */ + sp = spte_to_child_sp(*root_hpa); if (WARN_ON(!sp)) return; @@ -3940,8 +3944,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->arch.mmu->pae_root[i]; if (IS_VALID_PAE_ROOT(root)) { - root &= SPTE_BASE_ADDR_MASK; - sp = to_shadow_page(root); + sp = spte_to_child_sp(root); mmu_sync_children(vcpu, sp, true); } } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 22152241bd29..dbaf6755c5a7 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -133,18 +133,6 @@ struct kvm_mmu_page { extern struct kmem_cache *mmu_page_header_cache; -static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page) -{ - struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); - - return (struct kvm_mmu_page *)page_private(page); -} - -static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) -{ - return to_shadow_page(__pa(sptep)); -} - static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role) { return role.smm ? 1 : 0; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 79560d77aa4c..1f03701b943a 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -219,6 +219,23 @@ static inline int spte_index(u64 *sptep) */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; +static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page) +{ + struct page *page = pfn_to_page((shadow_page) >> PAGE_SHIFT); + + return (struct kvm_mmu_page *)page_private(page); +} + +static inline struct kvm_mmu_page *spte_to_child_sp(u64 spte) +{ + return to_shadow_page(spte & SPTE_BASE_ADDR_MASK); +} + +static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) +{ + return to_shadow_page(__pa(sptep)); +} + static inline bool is_mmio_spte(u64 spte) { return (spte & shadow_mmio_mask) == shadow_mmio_value && diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index c163f7cc23ca..d3714200b932 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -5,6 +5,8 @@ #include +#include "spte.h" + hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) From 76901e56fb517db61939efdf54e9581b117d615d Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 19 Oct 2022 16:56:17 +0000 Subject: [PATCH 1213/4122] KVM: x86/mmu: explicitly check nx_hugepage in disallowed_hugepage_adjust() Explicitly check if a NX huge page is disallowed when determining if a page fault needs to be forced to use a smaller sized page. KVM currently assumes that the NX huge page mitigation is the only scenario where KVM will force a shadow page instead of a huge page, and so unnecessarily keeps an existing shadow page instead of replacing it with a huge page. Any scenario that causes KVM to zap leaf SPTEs may result in having a SP that can be made huge without violating the NX huge page mitigation. E.g. prior to commit 5ba7c4c6d1c7 ("KVM: x86/MMU: Zap non-leaf SPTEs when disabling dirty logging"), KVM would keep shadow pages after disabling dirty logging due to a live migration being canceled, resulting in degraded performance due to running with 4kb pages instead of huge pages. Although the dirty logging case is "fixed", that fix is coincidental, i.e. is an implementation detail, and there are other scenarios where KVM will zap leaf SPTEs. E.g. zapping leaf SPTEs in response to a host page migration (mmu_notifier invalidation) to create a huge page would yield a similar result; KVM would see the shadow-present non-leaf SPTE and assume a huge page is disallowed. Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Reviewed-by: Ben Gardon Reviewed-by: David Matlack Signed-off-by: Mingwei Zhang [sean: use spte_to_child_sp(), massage changelog, fold into if-statement] Signed-off-by: Sean Christopherson Reviewed-by: Yan Zhao Message-Id: <20221019165618.927057-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5f9b57c6e506..efce5e4e24c3 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3112,7 +3112,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ if (cur_level > PG_LEVEL_4K && cur_level == fault->goal_level && is_shadow_present_pte(spte) && - !is_large_pte(spte)) { + !is_large_pte(spte) && + spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* * A small SPTE exists for this pfn, but FNAME(fetch) * and __direct_map would like to create a large PTE From 3a05675722250a522c148f6de0cc190f407c4bb5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 19 Oct 2022 16:56:18 +0000 Subject: [PATCH 1214/4122] KVM: x86/mmu: WARN if TDP MMU SP disallows hugepage after being zapped Extend the accounting sanity check in kvm_recover_nx_huge_pages() to the TDP MMU, i.e. verify that zapping a shadow page unaccounts the disallowed NX huge page regardless of the MMU type. Recovery runs while holding mmu_lock for write and so it should be impossible to get false positives on the WARN. Suggested-by: Yan Zhao Signed-off-by: Sean Christopherson Message-Id: <20221019165618.927057-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index efce5e4e24c3..93c389eaf471 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6875,12 +6875,11 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); - if (is_tdp_mmu_page(sp)) { + if (is_tdp_mmu_page(sp)) flush |= kvm_tdp_mmu_zap_sp(kvm, sp); - } else { + else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->nx_huge_page_disallowed); - } + WARN_ON_ONCE(sp->nx_huge_page_disallowed); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); From f1c5651fda43e0c62a32456cdc6f254f40457409 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 23 Sep 2022 00:13:52 +0000 Subject: [PATCH 1215/4122] KVM: x86/pmu: Force reprogramming of all counters on PMU filter change Force vCPUs to reprogram all counters on a PMU filter change to provide a sane ABI for userspace. Use the existing KVM_REQ_PMU to do the programming, and take advantage of the fact that the reprogram_pmi bitmap fits in a u64 to set all bits in a single atomic update. Note, setting the bitmap and making the request needs to be done _after_ the SRCU synchronization to ensure that vCPUs will reprogram using the new filter. KVM's current "lazy" approach is confusing and non-deterministic. It's confusing because, from a developer perspective, the code is buggy as it makes zero sense to let userspace modify the filter but then not actually enforce the new filter. The lazy approach is non-deterministic because KVM enforces the filter whenever a counter is reprogrammed, not just on guest WRMSRs, i.e. a guest might gain/lose access to an event at random times depending on what is going on in the host. Note, the resulting behavior is still non-determinstic while the filter is in flux. If userspace wants to guarantee deterministic behavior, all vCPUs should be paused during the filter update. Jim Mattson Fixes: 66bb8a065f5a ("KVM: x86: PMU Event Filter") Cc: Aaron Lewis Signed-off-by: Sean Christopherson Message-Id: <20220923001355.3741194-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 11 ++++++++++- arch/x86/kvm/pmu.c | 13 ++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9362b9736d87..58a562bb197c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -527,7 +527,16 @@ struct kvm_pmu { struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; - DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + + /* + * Overlay the bitmap with a 64-bit atomic so that all bits can be + * set in a single access, e.g. to reprogram all counters when the PMU + * filter changes. + */ + union { + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + atomic64_t __reprogram_pmi; + }; DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index de1fd7369736..bf2c32ea3255 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -577,6 +577,8 @@ EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) { struct kvm_pmu_event_filter tmp, *filter; + struct kvm_vcpu *vcpu; + unsigned long i; size_t size; int r; @@ -613,9 +615,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, mutex_is_locked(&kvm->lock)); + synchronize_srcu_expedited(&kvm->srcu); + + BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) > + sizeof(((struct kvm_pmu *)0)->__reprogram_pmi)); + + kvm_for_each_vcpu(i, vcpu, kvm) + atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull); + + kvm_make_all_cpus_request(kvm, KVM_REQ_PMU); + mutex_unlock(&kvm->lock); - synchronize_srcu_expedited(&kvm->srcu); r = 0; cleanup: kfree(filter); From dcbb816a2842e41d3ec22605c6760837d800b20a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 23 Sep 2022 00:13:53 +0000 Subject: [PATCH 1216/4122] KVM: x86/pmu: Clear "reprogram" bit if counter is disabled or disallowed When reprogramming a counter, clear the counter's "reprogram pending" bit if the counter is disabled (by the guest) or is disallowed (by the userspace filter). In both cases, there's no need to re-attempt programming on the next coincident KVM_REQ_PMU as enabling the counter by either method will trigger reprogramming. Signed-off-by: Sean Christopherson Message-Id: <20220923001355.3741194-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bf2c32ea3255..be8ce5aeb454 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -150,9 +150,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event, __kvm_perf_overflow(pmc, true); } -static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, - u64 config, bool exclude_user, - bool exclude_kernel, bool intr) +static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, + bool exclude_user, bool exclude_kernel, + bool intr) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); struct perf_event *event; @@ -204,14 +204,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, if (IS_ERR(event)) { pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", PTR_ERR(event), pmc->idx); - return; + return PTR_ERR(event); } pmc->perf_event = event; pmc_to_pmu(pmc)->event_count++; - clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); pmc->is_paused = false; pmc->intr = intr || pebs; + return 0; } static void pmc_pause_counter(struct kvm_pmc *pmc) @@ -245,7 +245,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) perf_event_enable(pmc->perf_event); pmc->is_paused = false; - clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); return true; } @@ -303,10 +302,10 @@ void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) - return; + goto reprogram_complete; if (!check_pmu_event_filter(pmc)) - return; + goto reprogram_complete; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -324,16 +323,27 @@ void reprogram_counter(struct kvm_pmc *pmc) } if (pmc->current_config == new_config && pmc_resume_counter(pmc)) - return; + goto reprogram_complete; pmc_release_perf_event(pmc); pmc->current_config = new_config; - pmc_reprogram_counter(pmc, PERF_TYPE_RAW, - (eventsel & pmu->raw_event_mask), - !(eventsel & ARCH_PERFMON_EVENTSEL_USR), - !(eventsel & ARCH_PERFMON_EVENTSEL_OS), - eventsel & ARCH_PERFMON_EVENTSEL_INT); + + /* + * If reprogramming fails, e.g. due to contention, leave the counter's + * regprogram bit set, i.e. opportunistically try again on the next PMU + * refresh. Don't make a new request as doing so can stall the guest + * if reprogramming repeatedly fails. + */ + if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW, + (eventsel & pmu->raw_event_mask), + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), + eventsel & ARCH_PERFMON_EVENTSEL_INT)) + return; + +reprogram_complete: + clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); } EXPORT_SYMBOL_GPL(reprogram_counter); From 68fb4757e8678894530ee0b15c29a3567207b970 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 23 Sep 2022 00:13:54 +0000 Subject: [PATCH 1217/4122] KVM: x86/pmu: Defer reprogram_counter() to kvm_pmu_handle_event() Batch reprogramming PMU counters by setting KVM_REQ_PMU and thus deferring reprogramming kvm_pmu_handle_event() to avoid reprogramming a counter multiple times during a single VM-Exit. Deferring programming will also allow KVM to fix a bug where immediately reprogramming a counter can result in sleeping (taking a mutex) while interrupts are disabled in the VM-Exit fastpath. Introduce kvm_pmu_request_counter_reprogam() to make it obvious that KVM is _requesting_ a reprogram and not actually doing the reprogram. Opportunistically refine related comments to avoid misunderstandings. Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20220831085328.45489-5-likexu@tencent.com Signed-off-by: Sean Christopherson Message-Id: <20220923001355.3741194-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/pmu.c | 17 ++++++++++++----- arch/x86/kvm/pmu.h | 6 +++++- arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 6 +++--- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 58a562bb197c..afadbc4b72c4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -496,6 +496,7 @@ struct kvm_pmc { struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* + * only for creating or reusing perf_event, * eventsel value for general purpose counters, * ctrl value for fixed counters. */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index be8ce5aeb454..3054b35b4143 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -101,7 +101,11 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) struct kvm_pmu *pmu = pmc_to_pmu(pmc); bool skip_pmi = false; - /* Ignore counters that have been reprogrammed already. */ + /* + * Ignore overflow events for counters that are scheduled to be + * reprogrammed, e.g. if a PMI for the previous event races with KVM's + * handling of a related guest WRMSR. + */ if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) return; @@ -292,7 +296,7 @@ out: return allow_event; } -void reprogram_counter(struct kvm_pmc *pmc) +static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); u64 eventsel = pmc->eventsel; @@ -345,7 +349,6 @@ void reprogram_counter(struct kvm_pmc *pmc) reprogram_complete: clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); } -EXPORT_SYMBOL_GPL(reprogram_counter); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { @@ -355,10 +358,11 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); - if (unlikely(!pmc || !pmc->perf_event)) { + if (unlikely(!pmc)) { clear_bit(bit, pmu->reprogram_pmi); continue; } + reprogram_counter(pmc); } @@ -552,12 +556,15 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, static inline bool cpl_is_matched(struct kvm_pmc *pmc) { bool select_os, select_user; - u64 config = pmc->current_config; + u64 config; if (pmc_is_gp(pmc)) { + config = pmc->eventsel; select_os = config & ARCH_PERFMON_EVENTSEL_OS; select_user = config & ARCH_PERFMON_EVENTSEL_USR; } else { + config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED); select_os = config & 0x1; select_user = config & 0x2; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 5cc5721f260b..85ff3c0588ba 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -183,7 +183,11 @@ static inline void kvm_init_pmu_capability(void) KVM_PMC_MAX_FIXED); } -void reprogram_counter(struct kvm_pmc *pmc); +static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +{ + set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); +} void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 9d65cd095691..c4b322ffdac4 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -159,7 +159,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b7c3a9874a93..6c80dff37b77 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -52,7 +52,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -76,7 +76,7 @@ static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (pmc) - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -477,7 +477,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; if (!(data & reserved_bits)) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); return 0; } } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) From de0f619564f4713bd548b82d535a954ffa1ee7d8 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 23 Sep 2022 00:13:55 +0000 Subject: [PATCH 1218/4122] KVM: x86/pmu: Defer counter emulated overflow via pmc->prev_counter Defer reprogramming counters and handling overflow via KVM_REQ_PMU when incrementing counters. KVM skips emulated WRMSR in the VM-Exit fastpath, the fastpath runs with IRQs disabled, skipping instructions can increment and reprogram counters, reprogramming counters can sleep, and sleeping is disallowed while IRQs are disabled. [*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 [*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM [*] preempt_count: 1, expected: 0 [*] RCU nest depth: 0, expected: 0 [*] INFO: lockdep is turned off. [*] irq event stamp: 0 [*] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [*] hardirqs last disabled at (0): [] copy_process+0x146a/0x62d0 [*] softirqs last enabled at (0): [] copy_process+0x14a9/0x62d0 [*] softirqs last disabled at (0): [<0000000000000000>] 0x0 [*] Preemption disabled at: [*] [] vcpu_enter_guest+0x1001/0x3dc0 [kvm] [*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2 [*] Call Trace: [*] [*] dump_stack_lvl+0x6c/0x9b [*] __might_resched.cold+0x22e/0x297 [*] __mutex_lock+0xc0/0x23b0 [*] perf_event_ctx_lock_nested+0x18f/0x340 [*] perf_event_pause+0x1a/0x110 [*] reprogram_counter+0x2af/0x1490 [kvm] [*] kvm_pmu_trigger_event+0x429/0x950 [kvm] [*] kvm_skip_emulated_instruction+0x48/0x90 [kvm] [*] handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm] [*] vmx_vcpu_run+0x268e/0x3b80 [kvm_intel] [*] vcpu_enter_guest+0x1d22/0x3dc0 [kvm] Add a field to kvm_pmc to track the previous counter value in order to defer overflow detection to kvm_pmu_handle_event() (the counter must be paused before handling overflow, and that may increment the counter). Opportunistically shrink sizeof(struct kvm_pmc) a bit. Suggested-by: Wanpeng Li Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20220831085328.45489-6-likexu@tencent.com [sean: avoid re-triggering KVM_REQ_PMU on overflow, tweak changelog] Signed-off-by: Sean Christopherson Message-Id: <20220923001355.3741194-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++-- arch/x86/kvm/pmu.c | 32 ++++++++++++++++---------------- arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afadbc4b72c4..81114a376c4e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -491,7 +491,10 @@ enum pmc_type { struct kvm_pmc { enum pmc_type type; u8 idx; + bool is_paused; + bool intr; u64 counter; + u64 prev_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; @@ -501,8 +504,6 @@ struct kvm_pmc { * ctrl value for fixed counters. */ u64 current_config; - bool is_paused; - bool intr; }; /* More counters may conflict with other existing Architectural MSRs */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 3054b35b4143..684393c22105 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -101,14 +101,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) struct kvm_pmu *pmu = pmc_to_pmu(pmc); bool skip_pmi = false; - /* - * Ignore overflow events for counters that are scheduled to be - * reprogrammed, e.g. if a PMI for the previous event races with KVM's - * handling of a related guest WRMSR. - */ - if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) - return; - if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { if (!in_pmi) { /* @@ -126,7 +118,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) } else { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - kvm_make_request(KVM_REQ_PMU, pmc->vcpu); if (!pmc->intr || skip_pmi) return; @@ -151,7 +142,17 @@ static void kvm_perf_overflow(struct perf_event *perf_event, { struct kvm_pmc *pmc = perf_event->overflow_handler_context; + /* + * Ignore overflow events for counters that are scheduled to be + * reprogrammed, e.g. if a PMI for the previous event races with KVM's + * handling of a related guest WRMSR. + */ + if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi)) + return; + __kvm_perf_overflow(pmc, true); + + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); } static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, @@ -311,6 +312,9 @@ static void reprogram_counter(struct kvm_pmc *pmc) if (!check_pmu_event_filter(pmc)) goto reprogram_complete; + if (pmc->counter < pmc->prev_counter) + __kvm_perf_overflow(pmc, false); + if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -348,6 +352,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) reprogram_complete: clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); + pmc->prev_counter = 0; } void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) @@ -536,14 +541,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { - u64 prev_count; - - prev_count = pmc->counter; + pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - - reprogram_counter(pmc); - if (pmc->counter < prev_count) - __kvm_perf_overflow(pmc, false); + kvm_pmu_request_counter_reprogam(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index c4b322ffdac4..0e313fbae055 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -212,7 +212,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->prev_counter = pmc->eventsel = 0; } } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 6c80dff37b77..e5cec07ca8d9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -646,14 +646,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->prev_counter = pmc->eventsel = 0; } for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); - pmc->counter = 0; + pmc->counter = pmc->prev_counter = 0; } pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; From d663b8a285986072428a6a145e5994bc275df994 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Nov 2022 10:44:10 -0400 Subject: [PATCH 1219/4122] KVM: replace direct irq.h inclusion virt/kvm/irqchip.c is including "irq.h" from the arch-specific KVM source directory (i.e. not from arch/*/include) for the sole purpose of retrieving irqchip_in_kernel. Making the function inline in a header that is already included, such as asm/kvm_host.h, is not possible because it needs to look at struct kvm which is defined after asm/kvm_host.h is included. So add a kvm_arch_irqchip_in_kernel non-inline function; irqchip_in_kernel() is only performance critical on arm64 and x86, and the non-inline function is enough on all other architectures. irq.h can then be deleted from all architectures except x86. Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/kvm/irq.h | 16 ---------------- arch/powerpc/kvm/irq.h | 22 ---------------------- arch/powerpc/kvm/powerpc.c | 18 ++++++++++++++++-- arch/s390/kvm/irq.h | 19 ------------------- arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/kvm/irq.c | 5 +++++ include/linux/kvm_host.h | 2 ++ virt/kvm/irqchip.c | 3 +-- 9 files changed, 34 insertions(+), 61 deletions(-) delete mode 100644 arch/arm64/kvm/irq.h delete mode 100644 arch/powerpc/kvm/irq.h delete mode 100644 arch/s390/kvm/irq.h diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..7b107fa540fa 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2130,6 +2130,11 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) return NULL; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} + bool kvm_arch_has_irq_bypass(void) { return true; diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h deleted file mode 100644 index 0d257de42c10..000000000000 --- a/arch/arm64/kvm/irq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * irq.h: in kernel interrupt controller related definitions - * Copyright (c) 2016 Red Hat, Inc. - * - * This header is included by irqchip.c. However, on ARM, interrupt - * controller declarations are located in include/kvm/arm_vgic.h since - * they are mostly shared between arm and arm64. - */ - -#ifndef __IRQ_H -#define __IRQ_H - -#include - -#endif diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h deleted file mode 100644 index e6463f866abc..000000000000 --- a/arch/powerpc/kvm/irq.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __IRQ_H -#define __IRQ_H - -#include - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - int ret = 0; - -#ifdef CONFIG_KVM_MPIC - ret = ret || (kvm->arch.mpic != NULL); -#endif -#ifdef CONFIG_KVM_XICS - ret = ret || (kvm->arch.xics != NULL); - ret = ret || (kvm->arch.xive != NULL); -#endif - smp_rmb(); - return ret; -} - -#endif diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850b0efa201..04494a4fb37a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,7 +36,6 @@ #include #include "timing.h" -#include "irq.h" #include "../mm/mmu_decl.h" #define CREATE_TRACE_POINTS @@ -2165,10 +2164,25 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm->arch.mpic != NULL); +#endif +#ifdef CONFIG_KVM_XICS + ret = ret || (kvm->arch.xics != NULL); + ret = ret || (kvm->arch.xive != NULL); +#endif + smp_rmb(); + return ret; +} + int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status) { - if (!irqchip_in_kernel(kvm)) + if (!kvm_arch_irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h deleted file mode 100644 index 484608c71dd0..000000000000 --- a/arch/s390/kvm/irq.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * s390 irqchip routines - * - * Copyright IBM Corp. 2014 - * - * Author(s): Cornelia Huck - */ -#ifndef __KVM_IRQ_H -#define __KVM_IRQ_H - -#include - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} - -#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bc491a73815c..5c7532dbc96b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5567,6 +5567,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return true; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index f371f1292ca3..d8d50558f165 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -165,3 +165,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); } + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8fe4665bd020..e6e66c5e56f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -663,6 +663,8 @@ struct kvm_irq_routing_table { */ struct hlist_head map[]; }; + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #endif #ifndef KVM_INTERNAL_MEM_SLOTS diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 58e4f88b2b9f..1e567d1f6d3d 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -17,7 +17,6 @@ #include #include #include -#include "irq.h" int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) @@ -50,7 +49,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) + if (!kvm_arch_irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) return -EINVAL; route.msi.address_lo = msi->address_lo; From 837a55847ead27362aac80aa1cf402459a9757f7 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 7 Nov 2022 14:53:38 +0900 Subject: [PATCH 1220/4122] RDMA/rxe: Implement packet length validation on responder The function check_length() is supposed to check the length of inbound packets on responder, but it actually has been a stub since the driver was born. Let it check the payload length and the DMA length. Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20221107055338.357184-1-matsuda-daisuke@fujitsu.com Reviewed-by: Li Zhijian Acked-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 34 ++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c32bc12cc82f..382d2053db43 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -393,16 +393,36 @@ static enum resp_states check_resource(struct rxe_qp *qp, static enum resp_states check_length(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - switch (qp_type(qp)) { - case IB_QPT_RC: - return RESPST_CHK_RKEY; + int mtu = qp->mtu; + u32 payload = payload_size(pkt); + u32 dmalen = reth_len(pkt); - case IB_QPT_UC: - return RESPST_CHK_RKEY; + /* RoCEv2 packets do not have LRH. + * Let's skip checking it. + */ - default: - return RESPST_CHK_RKEY; + if ((pkt->opcode & RXE_START_MASK) && + (pkt->opcode & RXE_END_MASK)) { + /* "only" packets */ + if (payload > mtu) + return RESPST_ERR_LENGTH; + } else if ((pkt->opcode & RXE_START_MASK) || + (pkt->opcode & RXE_MIDDLE_MASK)) { + /* "first" or "middle" packets */ + if (payload != mtu) + return RESPST_ERR_LENGTH; + } else if (pkt->opcode & RXE_END_MASK) { + /* "last" packets */ + if ((payload == 0) || (payload > mtu)) + return RESPST_ERR_LENGTH; } + + if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) { + if (dmalen > (1 << 31)) + return RESPST_ERR_LENGTH; + } + + return RESPST_CHK_RKEY; } static enum resp_states check_rkey(struct rxe_qp *qp, From 5d0557c75b2f2c7a868742d21a3ad94813ca97f4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:03 +0200 Subject: [PATCH 1221/4122] perf intel-pt: Start turning intel-pt-pkt-decoder-test.c into a suite of intel-pt subtests In preparation for adding more Intel PT testing, rename intel-pt-pkt-decoder-test.c to intel-pt-test.c. Subtests will later be added to intel-pt-test.c. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/Build | 2 +- .../x86/tests/{intel-pt-pkt-decoder-test.c => intel-pt-test.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tools/perf/arch/x86/tests/{intel-pt-pkt-decoder-test.c => intel-pt-test.c} (100%) diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 70b5bcbc15df..6f4e8636c3bf 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += sample-parsing.o -perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c similarity index 100% rename from tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c rename to tools/perf/arch/x86/tests/intel-pt-test.c From 828143f8da2856014df3102f63f7e4e4dc2d1c22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:04 +0200 Subject: [PATCH 1222/4122] perf intel-pt: Redefine test_suite to allow for adding more subtests In preparation for adding more Intel PT testing, redefine the test_suite to allow for adding more subtests. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/arch-tests.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 04018b8aa85b..8d5e4a0831d5 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -5,7 +5,17 @@ #ifdef HAVE_AUXTRACE_SUPPORT DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); -DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder); + +static struct test_case intel_pt_tests[] = { + TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder), + { .name = NULL, } +}; + +struct test_suite suite__intel_pt = { + .desc = "Intel PT packet decoder", + .test_cases = intel_pt_tests, +}; + #endif #if defined(__x86_64__) DEFINE_SUITE("x86 bp modify", bp_modify); @@ -18,7 +28,7 @@ struct test_suite *arch_tests[] = { #endif #ifdef HAVE_AUXTRACE_SUPPORT &suite__insn_x86, - &suite__intel_pt_pkt_decoder, + &suite__intel_pt, #endif #if defined(__x86_64__) &suite__bp_modify, From 44a037f54b97e4215a282d39d0f7f28c588f185c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:05 +0200 Subject: [PATCH 1223/4122] perf intel-pt: Add hybrid CPU compatibility test The kernel driver assumes hybrid CPUs will have Intel PT capabilities that are compatible with the boot CPU. Add a test to check that is the case. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/arch-tests.c | 3 +- tools/perf/arch/x86/tests/intel-pt-test.c | 154 ++++++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a1a1b3c0827..902e9ea9b99e 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -8,6 +8,7 @@ struct test_suite; int test__rdpmc(struct test_suite *test, int subtest); int test__insn_x86(struct test_suite *test, int subtest); int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest); int test__bp_modify(struct test_suite *test, int subtest); int test__x86_sample_parsing(struct test_suite *test, int subtest); diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 8d5e4a0831d5..aae6ea0fe52b 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -8,11 +8,12 @@ DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); static struct test_case intel_pt_tests[] = { TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder), + TEST_CASE("Intel PT hybrid CPU compatibility", intel_pt_hybrid_compat), { .name = NULL, } }; struct test_suite suite__intel_pt = { - .desc = "Intel PT packet decoder", + .desc = "Intel PT", .test_cases = intel_pt_tests, }; diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 42237656f453..70b7f79396b1 100644 --- a/tools/perf/arch/x86/tests/intel-pt-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include +#include +#include #include "intel-pt-decoder/intel-pt-pkt-decoder.h" #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "cpumap.h" /** * struct test_data - Test data. @@ -313,3 +318,152 @@ int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subte return TEST_OK; } + +static int setaffinity(int cpu) +{ + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set)) { + pr_debug("sched_setaffinity() failed for CPU %d\n", cpu); + return -1; + } + return 0; +} + +#define INTEL_PT_ADDR_FILT_CNT_MASK GENMASK(2, 0) +#define INTEL_PT_SUBLEAF_CNT 2 +#define CPUID_REG_CNT 4 + +struct cpuid_result { + union { + struct { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + }; + unsigned int reg[CPUID_REG_CNT]; + }; +}; + +struct pt_caps { + struct cpuid_result subleaf[INTEL_PT_SUBLEAF_CNT]; +}; + +static int get_pt_caps(int cpu, struct pt_caps *caps) +{ + struct cpuid_result r; + int i; + + if (setaffinity(cpu)) + return -1; + + memset(caps, 0, sizeof(*caps)); + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx); + pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i); + pr_debug("eax = 0x%08x\n", r.eax); + pr_debug("ebx = 0x%08x\n", r.ebx); + pr_debug("ecx = 0x%08x\n", r.ecx); + pr_debug("edx = 0x%08x\n", r.edx); + caps->subleaf[i] = r; + } + + return 0; +} + +static bool is_hydrid(void) +{ + unsigned int eax, ebx, ecx, edx = 0; + bool result; + + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + result = edx & BIT(15); + pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n", + result ? "" : "not ", edx); + return result; +} + +static int compare_caps(int cpu, struct pt_caps *caps, struct pt_caps *caps0) +{ + struct pt_caps mask = { /* Mask of bits to check*/ + .subleaf = { + [0] = { + .ebx = GENMASK(8, 0), + .ecx = GENMASK(3, 0), + }, + [1] = { + .eax = GENMASK(31, 16), + .ebx = GENMASK(31, 0), + } + } + }; + unsigned int m, reg, reg0; + int ret = 0; + int i, j; + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + for (j = 0; j < CPUID_REG_CNT; j++) { + m = mask.subleaf[i].reg[j]; + reg = m & caps->subleaf[i].reg[j]; + reg0 = m & caps0->subleaf[i].reg[j]; + if ((reg & reg0) != reg0) { + pr_debug("CPU %d subleaf %d reg %d FAIL %#x vs %#x\n", + cpu, i, j, reg, reg0); + ret = -1; + } + } + } + + m = INTEL_PT_ADDR_FILT_CNT_MASK; + reg = m & caps->subleaf[1].eax; + reg0 = m & caps0->subleaf[1].eax; + if (reg < reg0) { + pr_debug("CPU %d subleaf 1 reg 0 FAIL address filter count %#x vs %#x\n", + cpu, reg, reg0); + ret = -1; + } + + if (!ret) + pr_debug("CPU %d OK\n", cpu); + + return ret; +} + +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) +{ + int max_cpu = cpu__max_cpu().cpu; + struct pt_caps last_caps; + struct pt_caps caps0; + int ret = TEST_OK; + int cpu; + + if (!is_hydrid()) { + test->test_cases[subtest].skip_reason = "not hybrid"; + return TEST_SKIP; + } + + if (get_pt_caps(0, &caps0)) + return TEST_FAIL; + + for (cpu = 1, last_caps = caps0; cpu < max_cpu; cpu++) { + struct pt_caps caps; + + if (get_pt_caps(cpu, &caps)) { + pr_debug("CPU %d not found\n", cpu); + continue; + } + if (!memcmp(&caps, &last_caps, sizeof(caps))) { + pr_debug("CPU %d same caps as previous CPU\n", cpu); + continue; + } + if (compare_caps(cpu, &caps, &caps0)) + ret = TEST_FAIL; + last_caps = caps; + } + + return ret; +} From aa382ffa705bea9931ec92b6f3c70e1fdb372195 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 8 Nov 2022 17:05:59 -0600 Subject: [PATCH 1224/4122] PCI/sysfs: Fix double free in error path When pci_create_attr() fails, pci_remove_resource_files() is called which will iterate over the res_attr[_wc] arrays and frees every non NULL entry. To avoid a double free here set the array entry only after it's clear we successfully initialized it. Fixes: b562ec8f74e4 ("PCI: Don't leak memory if sysfs_create_bin_file() fails") Link: https://lore.kernel.org/r/20221007070735.GX986@pengutronix.de/ Signed-off-by: Sascha Hauer Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/pci-sysfs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 0a2eeb82cebd..ba38fc47d35e 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1175,11 +1175,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) sysfs_bin_attr_init(res_attr); if (write_combine) { - pdev->res_attr_wc[num] = res_attr; sprintf(res_attr_name, "resource%d_wc", num); res_attr->mmap = pci_mmap_resource_wc; } else { - pdev->res_attr[num] = res_attr; sprintf(res_attr_name, "resource%d", num); if (pci_resource_flags(pdev, num) & IORESOURCE_IO) { res_attr->read = pci_read_resource_io; @@ -1197,10 +1195,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) res_attr->size = pci_resource_len(pdev, num); res_attr->private = (void *)(unsigned long)num; retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) + if (retval) { kfree(res_attr); + return retval; + } - return retval; + if (write_combine) + pdev->res_attr_wc[num] = res_attr; + else + pdev->res_attr[num] = res_attr; + + return 0; } /** From a39a1466dae5e3ed0dd7c03334a60894e4d1f334 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:36 -0700 Subject: [PATCH 1225/4122] MAINTAINERS: git://github -> https://github.com for awilliam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20221013214636.30721-1-palmer@rivosinc.com Signed-off-by: Alex Williamson --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 046ff06ff97f..daa6a7a755ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21539,7 +21539,7 @@ M: Alex Williamson R: Cornelia Huck L: kvm@vger.kernel.org S: Maintained -T: git git://github.com/awilliam/linux-vfio.git +T: git https://github.com/awilliam/linux-vfio.git F: Documentation/ABI/testing/sysfs-devices-vfio-dev F: Documentation/driver-api/vfio.rst F: drivers/vfio/ From cd48ebc5c4f2e94830b238f035ebf04f1c3a4433 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 22 Sep 2022 20:35:07 +0800 Subject: [PATCH 1226/4122] vfio/mlx5: Switch to use module_pci_driver() macro Since pci provides the helper macro module_pci_driver(), we may replace the module_init/exit with it. Signed-off-by: Shang XiaoJing Reviewed-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220922123507.11222-1-shangxiaojing@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fd6ccb8454a2..457138b92f13 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -676,18 +676,7 @@ static struct pci_driver mlx5vf_pci_driver = { .driver_managed_dma = true, }; -static void __exit mlx5vf_pci_cleanup(void) -{ - pci_unregister_driver(&mlx5vf_pci_driver); -} - -static int __init mlx5vf_pci_init(void) -{ - return pci_register_driver(&mlx5vf_pci_driver); -} - -module_init(mlx5vf_pci_init); -module_exit(mlx5vf_pci_cleanup); +module_pci_driver(mlx5vf_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Max Gurtovoy "); From e67e070632a665c932d534b8b800477bb3111449 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Tue, 18 Oct 2022 12:28:25 -0300 Subject: [PATCH 1227/4122] vfio: platform: Do not pass return buffer to ACPI _RST method The ACPI _RST method has no return value, there's no need to pass a return buffer to acpi_evaluate_object(). Fixes: d30daa33ec1d ("vfio: platform: call _RST method when using ACPI") Signed-off-by: Rafael Mendonca Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20221018152825.891032-1-rafaelmendsr@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 55dc4f43c31e..1a0a238ffa35 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -72,12 +72,11 @@ static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, const char **extra_dbg) { #ifdef CONFIG_ACPI - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct device *dev = vdev->device; acpi_handle handle = ACPI_HANDLE(dev); acpi_status acpi_ret; - acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer); + acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, NULL); if (ACPI_FAILURE(acpi_ret)) { if (extra_dbg) *extra_dbg = acpi_format_exception(acpi_ret); From ea00d4ededcd8639f9e814513426cfeccdd3aaf0 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 25 Oct 2022 20:31:13 +0100 Subject: [PATCH 1228/4122] vfio/iova_bitmap: Explicitly include linux/slab.h kzalloc/kzfree are used so include `slab.h`. While it happens to work without it, due to commit 8b9f3ac5b01d ("fs: introduce alloc_inode_sb() to allocate filesystems specific inode") which indirectly includes via: . ./include/linux/mm.h .. ./include/linux/huge_mm.h ... ./include/linux/fs.h .... ./include/linux/slab.h Make it explicit should any of its indirect dependencies be dropped/changed for entirely different reasons as it was the cause prior to commit above recently (i.e. <= v5.18). Signed-off-by: Joao Martins Link: https://lore.kernel.org/r/20221025193114.58695-2-joao.m.martins@oracle.com Signed-off-by: Alex Williamson --- drivers/vfio/iova_bitmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 6631e8befe1b..56816ba1be9b 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) From f38044e5ef58ad0346fdabd7027ea5c1e1a3b624 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 25 Oct 2022 20:31:14 +0100 Subject: [PATCH 1229/4122] vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps iova_bitmap_set() doesn't consider the end of the page boundary when the first bitmap page offset isn't zero, and wrongly changes the consecutive page right after. Consequently this leads to missing dirty pages from reported by the device as seen from the VMM. The current logic iterates over a given number of base pages and clamps it to the remaining indexes to iterate in the last page. Instead of having to consider extra pages to pin (e.g. first and extra pages), just handle the first page as its own range and let the rest of the bitmap be handled as if it was base page aligned. This is done by changing iova_bitmap_mapped_remaining() to return PAGE_SIZE - pgoff (on the first bitmap page), and leads to pgoff being set to 0 on following iterations. Fixes: 58ccf0190d19 ("vfio: Add an IOVA bitmap support") Reported-by: Avihai Horon Tested-by: Avihai Horon Signed-off-by: Joao Martins Link: https://lore.kernel.org/r/20221025193114.58695-3-joao.m.martins@oracle.com Signed-off-by: Alex Williamson --- drivers/vfio/iova_bitmap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 56816ba1be9b..de6d6ea5c496 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -296,11 +296,15 @@ void iova_bitmap_free(struct iova_bitmap *bitmap) */ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) { - unsigned long remaining; + unsigned long remaining, bytes; + + /* Cap to one page in the first iteration, if PAGE_SIZE unaligned. */ + bytes = !bitmap->mapped.pgoff ? bitmap->mapped.npages << PAGE_SHIFT : + PAGE_SIZE - bitmap->mapped.pgoff; remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - (bitmap->mapped.npages << PAGE_SHIFT) / sizeof(*bitmap->bitmap)); + bytes / sizeof(*bitmap->bitmap)); return remaining; } From 253b642eec936974cc709ebb6bed83786c391279 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:24 +0100 Subject: [PATCH 1230/4122] phy: qcom-qmp-pcie: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 7c81667dd968..4e5111d19692 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2282,17 +2282,17 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, static const struct of_device_id qmp_pcie_of_match_table[] = { { - .compatible = "qcom,msm8998-qmp-pcie-phy", - .data = &msm8998_pciephy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-pcie-phy", - .data = &ipq8074_pciephy_cfg, + .compatible = "qcom,ipq6018-qmp-pcie-phy", + .data = &ipq6018_pciephy_cfg, }, { .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", .data = &ipq8074_pciephy_gen3_cfg, }, { - .compatible = "qcom,ipq6018-qmp-pcie-phy", - .data = &ipq6018_pciephy_cfg, + .compatible = "qcom,ipq8074-qmp-pcie-phy", + .data = &ipq8074_pciephy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-pcie-phy", + .data = &msm8998_pciephy_cfg, }, { .compatible = "qcom,sc8180x-qmp-pcie-phy", .data = &sc8180x_pciephy_cfg, @@ -2302,6 +2302,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sdm845-qmp-pcie-phy", .data = &sdm845_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-pcie-phy", + .data = &sdx55_qmp_pciephy_cfg, }, { .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", .data = &sm8250_qmp_gen3x1_pciephy_cfg, @@ -2311,9 +2314,6 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sm8250-qmp-modem-pcie-phy", .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-pcie-phy", - .data = &sdx55_qmp_pciephy_cfg, }, { .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", .data = &sm8450_qmp_gen3x1_pciephy_cfg, From cebc6ca76e400a90cb7cbc9f96f26966167f5b6f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:25 +0100 Subject: [PATCH 1231/4122] phy: qcom-qmp-pcie: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 4e5111d19692..e66f6adc404b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2280,51 +2280,6 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_pcie_of_match_table[] = { - { - .compatible = "qcom,ipq6018-qmp-pcie-phy", - .data = &ipq6018_pciephy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", - .data = &ipq8074_pciephy_gen3_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-pcie-phy", - .data = &ipq8074_pciephy_cfg, - }, { - .compatible = "qcom,msm8998-qmp-pcie-phy", - .data = &msm8998_pciephy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-pcie-phy", - .data = &sc8180x_pciephy_cfg, - }, { - .compatible = "qcom,sdm845-qhp-pcie-phy", - .data = &sdm845_qhp_pciephy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-pcie-phy", - .data = &sdm845_qmp_pciephy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-pcie-phy", - .data = &sdx55_qmp_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", - .data = &sm8250_qmp_gen3x1_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy", - .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-modem-pcie-phy", - .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", - .data = &sm8450_qmp_gen3x1_pciephy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy", - .data = &sm8450_qmp_gen4x2_pciephy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table); - static int qmp_pcie_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; @@ -2408,6 +2363,51 @@ err_node_put: return ret; } +static const struct of_device_id qmp_pcie_of_match_table[] = { + { + .compatible = "qcom,ipq6018-qmp-pcie-phy", + .data = &ipq6018_pciephy_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", + .data = &ipq8074_pciephy_gen3_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-pcie-phy", + .data = &ipq8074_pciephy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-pcie-phy", + .data = &msm8998_pciephy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-pcie-phy", + .data = &sc8180x_pciephy_cfg, + }, { + .compatible = "qcom,sdm845-qhp-pcie-phy", + .data = &sdm845_qhp_pciephy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-pcie-phy", + .data = &sdm845_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-pcie-phy", + .data = &sdx55_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", + .data = &sm8250_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy", + .data = &sm8250_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-modem-pcie-phy", + .data = &sm8250_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", + .data = &sm8450_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy", + .data = &sm8450_qmp_gen4x2_pciephy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table); + static struct platform_driver qmp_pcie_driver = { .probe = qmp_pcie_probe, .driver = { From 2fdedef3ea8e8a1a68a1da6eee1537074b308f63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:26 +0100 Subject: [PATCH 1232/4122] phy: qcom-qmp-pcie: merge driver data The PCIe QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 228 +++++++++-------------- 1 file changed, 93 insertions(+), 135 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e66f6adc404b..667a87e7c917 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1365,56 +1365,26 @@ struct qmp_phy_cfg { unsigned long pipe_clock_rate; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: currently selected PHY mode - */ -struct qmp_phy { - struct phy *phy; - const struct qmp_phy_cfg *cfg; - void __iomem *serdes; - void __iomem *tx; - void __iomem *rx; - void __iomem *pcs; - void __iomem *tx2; - void __iomem *rx2; - void __iomem *pcs_misc; - struct clk *pipe_clk; - struct qcom_qmp *qmp; - int mode; -}; - -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - */ -struct qcom_qmp { +struct qmp_pcie { struct device *dev; + const struct qmp_phy_cfg *cfg; + + void __iomem *serdes; + void __iomem *pcs; + void __iomem *pcs_misc; + void __iomem *tx; + void __iomem *rx; + void __iomem *tx2; + void __iomem *rx2; + + struct clk *pipe_clk; struct clk_bulk_data *clks; struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; - struct qmp_phy **phys; + struct phy *phy; + int mode; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -1850,9 +1820,9 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - void __iomem *serdes = qphy->serdes; + void __iomem *serdes = qmp->serdes; if (!tables) return; @@ -1860,11 +1830,11 @@ static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_ qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); } -static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; if (!tables) return; @@ -1872,17 +1842,17 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2); qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2); } -static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - void __iomem *pcs = qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + void __iomem *pcs = qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; if (!tables) return; @@ -1893,9 +1863,8 @@ static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tab static int qmp_pcie_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); @@ -1932,9 +1901,8 @@ err_disable_regulators: static int qmp_pcie_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_bulk_assert(cfg->num_resets, qmp->resets); @@ -1947,11 +1915,10 @@ static int qmp_pcie_exit(struct phy *phy) static int qmp_pcie_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; const struct qmp_phy_cfg_tables *mode_tables; - void __iomem *pcs = qphy->pcs; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int mask, val; int ret; @@ -1959,26 +1926,26 @@ static int qmp_pcie_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], cfg->pwrdn_ctrl); - if (qphy->mode == PHY_MODE_PCIE_RC) + if (qmp->mode == PHY_MODE_PCIE_RC) mode_tables = cfg->tables_rc; else mode_tables = cfg->tables_ep; - qmp_pcie_serdes_init(qphy, &cfg->tables); - qmp_pcie_serdes_init(qphy, mode_tables); + qmp_pcie_serdes_init(qmp, &cfg->tables); + qmp_pcie_serdes_init(qmp, mode_tables); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; } /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qphy, &cfg->tables); - qmp_pcie_lanes_init(qphy, mode_tables); + qmp_pcie_lanes_init(qmp, &cfg->tables); + qmp_pcie_lanes_init(qmp, mode_tables); - qmp_pcie_pcs_init(qphy, &cfg->tables); - qmp_pcie_pcs_init(qphy, mode_tables); + qmp_pcie_pcs_init(qmp, &cfg->tables); + qmp_pcie_pcs_init(qmp, mode_tables); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2001,27 +1968,27 @@ static int qmp_pcie_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_pcie_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], cfg->pwrdn_ctrl); return 0; @@ -2055,12 +2022,12 @@ static int qmp_pcie_disable(struct phy *phy) static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_pcie *qmp = phy_get_drvdata(phy); switch (submode) { case PHY_MODE_PCIE_RC: case PHY_MODE_PCIE_EP: - qphy->mode = submode; + qmp->mode = submode; break; default: dev_err(&phy->dev, "Unsupported submode %d\n", submode); @@ -2072,7 +2039,7 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -2088,7 +2055,7 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2109,7 +2076,7 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2146,7 +2113,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2168,8 +2135,8 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) * Controllers using QMP PHY-s use 125MHz pipe clock interface * unless other frequency is specified in the PHY config. */ - if (qmp->phys[0]->cfg->pipe_clock_rate) - fixed->fixed_rate = qmp->phys[0]->cfg->pipe_clock_rate; + if (qmp->cfg->pipe_clock_rate) + fixed->fixed_rate = qmp->cfg->pipe_clock_rate; else fixed->fixed_rate = 125000000; @@ -2197,97 +2164,92 @@ static const struct phy_ops qmp_pcie_ops = { .owner = THIS_MODULE, }; -static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, +static int qmp_pcie_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; + qmp->mode = PHY_MODE_PCIE_RC; - qphy->mode = PHY_MODE_PCIE_RC; + qmp->cfg = cfg; + qmp->serdes = serdes; - qphy->cfg = cfg; - qphy->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); if (of_device_is_compatible(dev->of_node, "qcom,sdm845-qhp-pcie-phy")) - qphy->rx = qphy->tx; + qmp->rx = qmp->tx; else - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc) && + if (IS_ERR(qmp->pcs_misc) && of_device_is_compatible(dev->of_node, "qcom,ipq6018-qmp-pcie-phy")) - qphy->pcs_misc = qphy->pcs + 0x400; + qmp->pcs_misc = qmp->pcs + 0x400; - if (IS_ERR(qphy->pcs_misc)) { + if (IS_ERR(qmp->pcs_misc)) { if (cfg->tables.pcs_misc || (cfg->tables_rc && cfg->tables_rc->pcs_misc) || - (cfg->tables_ep && cfg->tables_ep->pcs_misc)) - return PTR_ERR(qphy->pcs_misc); + (cfg->tables_ep && cfg->tables_ep->pcs_misc)) { + return PTR_ERR(qmp->pcs_misc); + } } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe clock\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_pcie_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_pcie *qmp; int num, id; int ret; @@ -2326,14 +2288,10 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_pcie_create(dev, child, id, serdes, cfg); + ret = qmp_pcie_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); From 393ed5d515494250712dd1703418bb541765afe3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:27 +0100 Subject: [PATCH 1233/4122] phy: qcom-qmp-pcie: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 34 +++++++----------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 667a87e7c917..bc96518ad6b0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2250,7 +2250,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_pcie *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2283,34 +2282,19 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_pcie_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = qmp_pcie_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } + ret = phy_pipe_clk_register(qmp, child); + if (ret) + goto err_node_put; - id++; - } + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From 52b997732eb6bf64df242403c7510520d89be266 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:28 +0100 Subject: [PATCH 1234/4122] phy: qcom-qmp-pcie: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 49 +++++++++++------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bc96518ad6b0..e30cbc94cbf6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2037,9 +2037,10 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } -static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_vreg_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -2053,9 +2054,10 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_reset_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2074,9 +2076,10 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg return 0; } -static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_clk_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2164,18 +2167,15 @@ static const struct phy_ops qmp_pcie_ops = { .owner = THIS_MODULE, }; -static int qmp_pcie_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; qmp->mode = PHY_MODE_PCIE_RC; - qmp->cfg = cfg; - qmp->serdes = serdes; - /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2247,8 +2247,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_pcie *qmp; int ret; @@ -2257,28 +2255,27 @@ static int qmp_pcie_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - WARN_ON_ONCE(!cfg->pwrdn_ctrl); - WARN_ON_ONCE(!cfg->phy_status); + WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl); + WARN_ON_ONCE(!qmp->cfg->phy_status); - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - ret = qmp_pcie_clk_init(dev, cfg); + ret = qmp_pcie_clk_init(qmp); if (ret) return ret; - ret = qmp_pcie_reset_init(dev, cfg); + ret = qmp_pcie_reset_init(qmp); if (ret) return ret; - ret = qmp_pcie_vreg_init(dev, cfg); + ret = qmp_pcie_vreg_init(qmp); if (ret) return ret; @@ -2286,7 +2283,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_pcie_create(dev, child, serdes, cfg); + ret = qmp_pcie_create(qmp, child); if (ret) goto err_node_put; From 63bf101ae1912570260912087bfd7b1cf420c3dc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:29 +0100 Subject: [PATCH 1235/4122] phy: qcom-qmp-pcie: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e30cbc94cbf6..bd946438e3c3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2037,6 +2037,13 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_pcie_phy_ops = { + .power_on = qmp_pcie_enable, + .power_off = qmp_pcie_disable, + .set_mode = qmp_pcie_set_mode, + .owner = THIS_MODULE, +}; + static int qmp_pcie_vreg_init(struct qmp_pcie *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2160,13 +2167,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_pcie_ops = { - .power_on = qmp_pcie_enable, - .power_off = qmp_pcie_disable, - .set_mode = qmp_pcie_set_mode, - .owner = THIS_MODULE, -}; - static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2229,7 +2229,7 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops); + generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); From f8b641146484b6be33c291fa8279ffe423e169d9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:30 +0100 Subject: [PATCH 1236/4122] phy: qcom-qmp-pcie: clean up PHY lane init Clean up the PHY lane initialisation somewhat by adding further temporary variables and programming both tx and rx for the second lane after the first lane. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bd946438e3c3..dd7e72424fc0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1835,18 +1835,19 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; + void __iomem *tx2 = qmp->tx2; + void __iomem *rx2 = qmp->rx2; if (!tables) return; qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); - - if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); - if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2); + + if (cfg->lanes >= 2) { + qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2); + } } static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) From d8c9a1e9c223951836692eb8c3810e18ae06ffc2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:31 +0100 Subject: [PATCH 1237/4122] phy: qcom-qmp-pcie: use shorter tables identifiers The QMP drivers all use 'tbl' to refer to their register initialisation tables. For consistency use 'tbls' rather than 'tables' to refer to the new aggregate table structures. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221105145939.20318-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index dd7e72424fc0..a977f2bbd831 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1313,7 +1313,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), }; -struct qmp_phy_cfg_tables { +struct qmp_phy_cfg_tbls { const struct qmp_phy_init_tbl *serdes; int serdes_num; const struct qmp_phy_init_tbl *tx; @@ -1331,7 +1331,7 @@ struct qmp_phy_cfg { int lanes; /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ - const struct qmp_phy_cfg_tables tables; + const struct qmp_phy_cfg_tbls tbls; /* * Additional init sequences for PHY blocks, providing additional * register programming. They are used for providing separate sequences @@ -1339,8 +1339,8 @@ struct qmp_phy_cfg { * * If EP mode is not supported, both tables can be left unset. */ - const struct qmp_phy_cfg_tables *tables_rc; - const struct qmp_phy_cfg_tables *tables_ep; + const struct qmp_phy_cfg_tbls *tbls_rc; + const struct qmp_phy_cfg_tbls *tbls_ep; /* clock ids to be requested */ const char * const *clk_list; @@ -1442,7 +1442,7 @@ static const char * const sdm845_pciephy_reset_l[] = { static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq8074_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), .tx = ipq8074_pcie_tx_tbl, @@ -1467,7 +1467,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq8074_pcie_gen3_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), .tx = ipq8074_pcie_gen3_tx_tbl, @@ -1494,7 +1494,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq6018_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), .tx = ipq6018_pcie_tx_tbl, @@ -1521,7 +1521,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sdm845_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), .tx = sdm845_qmp_pcie_tx_tbl, @@ -1548,7 +1548,7 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sdm845_qhp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), .tx = sdm845_qhp_pcie_tx_tbl, @@ -1573,7 +1573,7 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sm8250_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), .tx = sm8250_qmp_pcie_tx_tbl, @@ -1585,7 +1585,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8250_qmp_gen3x1_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), .rx = sm8250_qmp_gen3x1_pcie_rx_tbl, @@ -1610,7 +1610,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sm8250_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), .tx = sm8250_qmp_pcie_tx_tbl, @@ -1622,7 +1622,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .tx = sm8250_qmp_gen3x2_pcie_tx_tbl, .tx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), .rx = sm8250_qmp_gen3x2_pcie_rx_tbl, @@ -1647,7 +1647,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = msm8998_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), .tx = msm8998_pcie_tx_tbl, @@ -1674,7 +1674,7 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sc8180x_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), .tx = sc8180x_qmp_pcie_tx_tbl, @@ -1701,7 +1701,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sdx55_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), .tx = sdx55_qmp_pcie_tx_tbl, @@ -1728,7 +1728,7 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sm8450_qmp_gen3x1_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), .tx = sm8450_qmp_gen3x1_pcie_tx_tbl, @@ -1755,7 +1755,7 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sm8450_qmp_gen4x2_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), .tx = sm8450_qmp_gen4x2_pcie_tx_tbl, @@ -1768,14 +1768,14 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8450_qmp_gen4x2_pcie_rc_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl), .pcs_misc = sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl), }, - .tables_ep = &(const struct qmp_phy_cfg_tables) { + .tbls_ep = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8450_qmp_gen4x2_pcie_ep_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl), .pcs_misc = sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl, @@ -1820,17 +1820,17 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { void __iomem *serdes = qmp->serdes; - if (!tables) + if (!tbls) return; - qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); + qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num); } -static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *tx = qmp->tx; @@ -1838,28 +1838,28 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; - if (!tables) + if (!tbls) return; - qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); - qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); + qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1); + qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1); if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2); + qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2); } } -static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { void __iomem *pcs = qmp->pcs; void __iomem *pcs_misc = qmp->pcs_misc; - if (!tables) + if (!tbls) return; - qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num); - qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num); + qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); + qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) @@ -1918,7 +1918,7 @@ static int qmp_pcie_power_on(struct phy *phy) { struct qmp_pcie *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; - const struct qmp_phy_cfg_tables *mode_tables; + const struct qmp_phy_cfg_tbls *mode_tbls; void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int mask, val; @@ -1928,12 +1928,12 @@ static int qmp_pcie_power_on(struct phy *phy) cfg->pwrdn_ctrl); if (qmp->mode == PHY_MODE_PCIE_RC) - mode_tables = cfg->tables_rc; + mode_tbls = cfg->tbls_rc; else - mode_tables = cfg->tables_ep; + mode_tbls = cfg->tbls_ep; - qmp_pcie_serdes_init(qmp, &cfg->tables); - qmp_pcie_serdes_init(qmp, mode_tables); + qmp_pcie_serdes_init(qmp, &cfg->tbls); + qmp_pcie_serdes_init(qmp, mode_tbls); ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { @@ -1942,11 +1942,11 @@ static int qmp_pcie_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qmp, &cfg->tables); - qmp_pcie_lanes_init(qmp, mode_tables); + qmp_pcie_lanes_init(qmp, &cfg->tbls); + qmp_pcie_lanes_init(qmp, mode_tbls); - qmp_pcie_pcs_init(qmp, &cfg->tables); - qmp_pcie_pcs_init(qmp, mode_tables); + qmp_pcie_pcs_init(qmp, &cfg->tbls); + qmp_pcie_pcs_init(qmp, mode_tbls); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2217,9 +2217,9 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) qmp->pcs_misc = qmp->pcs + 0x400; if (IS_ERR(qmp->pcs_misc)) { - if (cfg->tables.pcs_misc || - (cfg->tables_rc && cfg->tables_rc->pcs_misc) || - (cfg->tables_ep && cfg->tables_ep->pcs_misc)) { + if (cfg->tbls.pcs_misc || + (cfg->tbls_rc && cfg->tbls_rc->pcs_misc) || + (cfg->tbls_ep && cfg->tbls_ep->pcs_misc)) { return PTR_ERR(qmp->pcs_misc); } } From ec7bc1b40b363c46af0b17d4cb0ea4dc21b7cf9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:32 +0100 Subject: [PATCH 1238/4122] phy: qcom-qmp-pcie: add register init helper Generalise the serdes initialisation helper so that it can be used to initialise all the PHY registers (e.g. serdes, tx, rx, pcs). Note that this defers the ungating of the PIPE clock somewhat, which is fine as it isn't needed until starting the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 41 ++++++------------------ 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index a977f2bbd831..09999d5b5268 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1820,26 +1820,21 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) +static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { + const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *serdes = qmp->serdes; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *tx2 = qmp->tx2; + void __iomem *rx2 = qmp->rx2; + void __iomem *pcs = qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; if (!tbls) return; qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num); -} - -static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) -{ - const struct qmp_phy_cfg *cfg = qmp->cfg; - void __iomem *tx = qmp->tx; - void __iomem *rx = qmp->rx; - void __iomem *tx2 = qmp->tx2; - void __iomem *rx2 = qmp->rx2; - - if (!tbls) - return; qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1); qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1); @@ -1848,15 +1843,6 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2); qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2); } -} - -static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) -{ - void __iomem *pcs = qmp->pcs; - void __iomem *pcs_misc = qmp->pcs_misc; - - if (!tbls) - return; qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); @@ -1932,8 +1918,8 @@ static int qmp_pcie_power_on(struct phy *phy) else mode_tbls = cfg->tbls_ep; - qmp_pcie_serdes_init(qmp, &cfg->tbls); - qmp_pcie_serdes_init(qmp, mode_tbls); + qmp_pcie_init_registers(qmp, &cfg->tbls); + qmp_pcie_init_registers(qmp, mode_tbls); ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { @@ -1941,13 +1927,6 @@ static int qmp_pcie_power_on(struct phy *phy) return ret; } - /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qmp, &cfg->tbls); - qmp_pcie_lanes_init(qmp, mode_tbls); - - qmp_pcie_pcs_init(qmp, &cfg->tbls); - qmp_pcie_pcs_init(qmp, mode_tbls); - /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); From dcb93f47dd14cb0c206424b3258399b4cd205b20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:33 +0100 Subject: [PATCH 1239/4122] dt-bindings: phy: qcom,qmp-pcie: rename current bindings The current QMP PCIe PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. Later QMP PCIe PHY blocks only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current schema file after IPQ8074, which was the first SoC added to the bindings after MSM8996 (which has already been split out), and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...om,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} (96%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml similarity index 96% rename from Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 324ad7d03a38..62045dcfb20c 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-pcie-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,ipq8074-qmp-pcie-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (PCIe) +title: Qualcomm QMP PHY controller (PCIe, IPQ8074) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-pcie-phy.yaml. + properties: compatible: enum: From 306382305c5cefce892784da8686c242956f5fd2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:34 +0100 Subject: [PATCH 1240/4122] dt-bindings: phy: qcom,qmp-pcie: add sc8280xp bindings Add bindings for the PCIe QMP PHYs found on SC8280XP. The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in 4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as PCIe2A and PCIe2B). The configuration for a specific system can be read from a TCSR register. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml new file mode 100644 index 000000000000..80aa8d2507fb --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-pcie-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (PCIe, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-gen3x1-pcie-phy + - qcom,sc8280xp-qmp-gen3x2-pcie-phy + - qcom,sc8280xp-qmp-gen3x4-pcie-phy + + reg: + minItems: 1 + maxItems: 2 + + clocks: + maxItems: 6 + + clock-names: + items: + - const: aux + - const: cfg_ahb + - const: ref + - const: rchng + - const: pipe + - const: pipediv2 + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: phy + + vdda-phy-supply: true + + vdda-pll-supply: true + + qcom,4ln-config-sel: + description: PCIe 4-lane configuration + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - items: + - description: phandle of TCSR syscon + - description: offset of PCIe 4-lane configuration register + - description: offset of configuration bit for this PHY + + "#clock-cells": + const: 0 + + clock-output-names: + maxItems: 1 + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - clock-output-names + - "#phy-cells" + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,sc8280xp-qmp-gen3x4-pcie-phy + then: + properties: + reg: + items: + - description: port a + - description: port b + required: + - qcom,4ln-config-sel + else: + properties: + reg: + maxItems: 1 + +examples: + - | + #include + + pcie2b_phy: phy@1c18000 { + compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy"; + reg = <0x01c18000 0x2000>; + + clocks = <&gcc GCC_PCIE_2B_AUX_CLK>, + <&gcc GCC_PCIE_2B_CFG_AHB_CLK>, + <&gcc GCC_PCIE_2A2B_CLKREF_CLK>, + <&gcc GCC_PCIE2B_PHY_RCHNG_CLK>, + <&gcc GCC_PCIE_2B_PIPE_CLK>, + <&gcc GCC_PCIE_2B_PIPEDIV2_CLK>; + clock-names = "aux", "cfg_ahb", "ref", "rchng", + "pipe", "pipediv2"; + + power-domains = <&gcc PCIE_2B_GDSC>; + + resets = <&gcc GCC_PCIE_2B_PHY_BCR>; + reset-names = "phy"; + + vdda-phy-supply = <&vreg_l6d>; + vdda-pll-supply = <&vreg_l4d>; + + #clock-cells = <0>; + clock-output-names = "pcie_2b_pipe_clk"; + + #phy-cells = <0>; + }; + + pcie2a_phy: phy@1c24000 { + compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy"; + reg = <0x01c24000 0x2000>, <0x01c26000 0x2000>; + + clocks = <&gcc GCC_PCIE_2A_AUX_CLK>, + <&gcc GCC_PCIE_2A_CFG_AHB_CLK>, + <&gcc GCC_PCIE_2A2B_CLKREF_CLK>, + <&gcc GCC_PCIE2A_PHY_RCHNG_CLK>, + <&gcc GCC_PCIE_2A_PIPE_CLK>, + <&gcc GCC_PCIE_2A_PIPEDIV2_CLK>; + clock-names = "aux", "cfg_ahb", "ref", "rchng", + "pipe", "pipediv2"; + + power-domains = <&gcc PCIE_2A_GDSC>; + + resets = <&gcc GCC_PCIE_2A_PHY_BCR>; + reset-names = "phy"; + + vdda-phy-supply = <&vreg_l6d>; + vdda-pll-supply = <&vreg_l4d>; + + qcom,4ln-config-sel = <&tcsr 0xa044 0>; + + #clock-cells = <0>; + clock-output-names = "pcie_2a_pipe_clk"; + + #phy-cells = <0>; + }; From 7bc609e34899dd3065fc9cbc73bb8a4902e257df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:35 +0100 Subject: [PATCH 1241/4122] phy: qcom-qmp-pcie: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes resource in what is now the legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 36 +++++++++++------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 09999d5b5268..8af84ff755ab 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2147,14 +2147,15 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) +static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; - qmp->mode = PHY_MODE_PCIE_RC; + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); /* * Get memory resources for the PHY: @@ -2209,16 +2210,6 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -2243,10 +2234,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl); WARN_ON_ONCE(!qmp->cfg->phy_status); - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - ret = qmp_pcie_clk_init(qmp); if (ret) return ret; @@ -2263,7 +2250,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_pcie_create(qmp, child); + ret = qmp_pcie_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; @@ -2271,6 +2258,17 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) goto err_node_put; + qmp->mode = PHY_MODE_PCIE_RC; + + qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); From fffdeaf853d8088c5149fc776974344b0f815dc8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:36 +0100 Subject: [PATCH 1242/4122] phy: qcom-qmp-pcie: fix initialisation reset Add the missing delay after asserting reset. This is specifically needed for the reset to have any effect on SC8280XP. The vendor driver uses a 1 ms delay, but that seems a bit excessive. Instead use a 200 us delay which appears to be more than enough and also matches the UFS reset delay added by commit 870b1279c7a0 ("scsi: ufs-qcom: Add reset control support for host controller"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 8af84ff755ab..06844552922e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1866,6 +1866,8 @@ static int qmp_pcie_init(struct phy *phy) goto err_disable_regulators; } + usleep_range(200, 300); + ret = reset_control_bulk_deassert(cfg->num_resets, qmp->resets); if (ret) { dev_err(qmp->dev, "reset deassert failed\n"); From 9e420f1e7eddbbb6b73a78aca2c280ddd8a63096 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:37 +0100 Subject: [PATCH 1243/4122] phy: qcom-qmp-pcie: add support for pipediv2 clock Some QMP PHYs have a second fixed-divider pipe clock that needs to be enabled along with the pipe clock. Add support for an optional "pipediv2" clock. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 25 ++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 06844552922e..d671b05c73dd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1378,8 +1378,10 @@ struct qmp_pcie { void __iomem *tx2; void __iomem *rx2; - struct clk *pipe_clk; struct clk_bulk_data *clks; + struct clk_bulk_data pipe_clks[2]; + int num_pipe_clks; + struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; @@ -1923,11 +1925,9 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_init_registers(qmp, &cfg->tbls); qmp_pcie_init_registers(qmp, mode_tbls); - ret = clk_prepare_enable(qmp->pipe_clk); - if (ret) { - dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); + ret = clk_bulk_prepare_enable(qmp->num_pipe_clks, qmp->pipe_clks); + if (ret) return ret; - } /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -1950,7 +1950,7 @@ static int qmp_pcie_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qmp->pipe_clk); + clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks); return ret; } @@ -1960,7 +1960,7 @@ static int qmp_pcie_power_off(struct phy *phy) struct qmp_pcie *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qmp->pipe_clk); + clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks); /* PHY reset */ qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2154,6 +2154,7 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; + struct clk *clk; qmp->serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(qmp->serdes)) @@ -2206,12 +2207,16 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np } } - qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qmp->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(clk)) { + return dev_err_probe(dev, PTR_ERR(clk), "failed to get pipe clock\n"); } + qmp->num_pipe_clks = 1; + qmp->pipe_clks[0].id = "pipe"; + qmp->pipe_clks[0].clk = clk; + return 0; } From d0a846ba28ddb589fc5a5741ae566e19c1034034 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:38 +0100 Subject: [PATCH 1244/4122] phy: qcom-qmp-pcie: add support for sc8280xp Add support for the single and dual-lane PHYs found on SC8280XP. Note that the SC8280XP binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 299 +++++++++++++++++- .../phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h | 2 + 2 files changed, 291 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index d671b05c73dd..f507a67a8361 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -834,6 +834,143 @@ static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), }; +static const struct qmp_phy_init_tbl sc8280xp_qmp_pcie_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x68), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0xaa), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE0, 0x24), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE1, 0xb4), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE2_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xb9), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0x94), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_BUF_ENABLE, 0x07), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x1d), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbf), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_TX_ADAPT_POST_THRESH, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x77), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_EQ_CONFIG2, 0x0f), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_tx_tbl[] = { + QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x02, 1), + QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x04, 2), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xd5), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x88), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x0f), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2, 0x1d), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), +}; + static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08), QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34), @@ -1313,6 +1450,16 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), }; +struct qmp_pcie_offsets { + u16 serdes; + u16 pcs; + u16 pcs_misc; + u16 tx; + u16 rx; + u16 tx2; + u16 rx2; +}; + struct qmp_phy_cfg_tbls { const struct qmp_phy_init_tbl *serdes; int serdes_num; @@ -1330,6 +1477,8 @@ struct qmp_phy_cfg_tbls { struct qmp_phy_cfg { int lanes; + const struct qmp_pcie_offsets *offsets; + /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_cfg_tbls tbls; /* @@ -1422,6 +1571,9 @@ static const char * const msm8996_phy_clk_l[] = { "aux", "cfg_ahb", "ref", }; +static const char * const sc8280xp_pciephy_clk_l[] = { + "aux", "cfg_ahb", "ref", "rchng", +}; static const char * const sdm845_pciephy_clk_l[] = { "aux", "cfg_ahb", "ref", "refgen", @@ -1441,6 +1593,16 @@ static const char * const sdm845_pciephy_reset_l[] = { "phy", }; +static const struct qmp_pcie_offsets qmp_pcie_offsets_v5 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_misc = 0x0600, + .tx = 0x0e00, + .rx = 0x1000, + .tx2 = 0x1600, + .rx2 = 0x1800, +}; + static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, @@ -1700,6 +1862,76 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .phy_status = PHYSTATUS, }; +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x1_pciephy_cfg = { + .lanes = 1, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x1_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl), + }, + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = { + .lanes = 2, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl), + }, + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, @@ -2220,11 +2452,49 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np return 0; } +static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_pcie_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + int ret; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->pcs_misc = base + offs->pcs_misc; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + if (cfg->lanes >= 2) { + qmp->tx2 = base + offs->tx2; + qmp->rx2 = base + offs->rx2; + } + + qmp->num_pipe_clks = 2; + qmp->pipe_clks[0].id = "pipe"; + qmp->pipe_clks[1].id = "pipediv2"; + + ret = devm_clk_bulk_get(dev, qmp->num_pipe_clks, qmp->pipe_clks); + if (ret) + return ret; + + return 0; +} + static int qmp_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_pcie *qmp; int ret; @@ -2253,21 +2523,24 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; - - ret = qmp_pcie_parse_dt_legacy(qmp, child); + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_pcie_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_pcie_parse_dt(qmp); + } if (ret) goto err_node_put; - ret = phy_pipe_clk_register(qmp, child); + ret = phy_pipe_clk_register(qmp, np); if (ret) goto err_node_put; qmp->mode = PHY_MODE_PCIE_RC; - qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -2276,14 +2549,14 @@ static int qmp_pcie_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } @@ -2303,6 +2576,12 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sc8180x-qmp-pcie-phy", .data = &sc8180x_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x1-pcie-phy", + .data = &sc8280xp_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy", + .data = &sc8280xp_qmp_gen3x2_pciephy_cfg, }, { .compatible = "qcom,sdm845-qhp-pcie-phy", .data = &sdm845_qhp_pciephy_cfg, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h index 2e19fb3f051e..a469ae2a10a1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h @@ -8,6 +8,8 @@ #define QCOM_PHY_QMP_PCS_PCIE_V5_H_ /* Only for QMP V5 PHY - PCS_PCIE registers */ +#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2 0x0c +#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4 0x14 #define QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x20 #define QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1 0x54 #define QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS 0x94 From 6c37a02b25180350ed7bd199c074a79fe6d16e51 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:39 +0100 Subject: [PATCH 1245/4122] phy: qcom-qmp-pcie: add support for sc8280xp 4-lane PHYs The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in 4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as PCIe2A and PCIe2B). Add support for fetching the 4-lane configuration from the TCSR and programming the lane registers of the second port when in 4-lane mode. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 1 + drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 117 +++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 5c98850f5a36..eb9ddc685b38 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -54,6 +54,7 @@ config PHY_QCOM_QMP tristate "Qualcomm QMP PHY Driver" depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) select GENERIC_PHY + select MFD_SYSCON help Enable this to support the QMP PHY transceiver that is used with controllers such as PCIe, UFS, and USB on Qualcomm chips. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f507a67a8361..111716e25b17 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -886,6 +888,10 @@ static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] = QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), }; +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x1c), +}; + static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20), QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75), @@ -1491,6 +1497,9 @@ struct qmp_phy_cfg { const struct qmp_phy_cfg_tbls *tbls_rc; const struct qmp_phy_cfg_tbls *tbls_ep; + const struct qmp_phy_init_tbl *serdes_4ln_tbl; + int serdes_4ln_num; + /* clock ids to be requested */ const char * const *clk_list; int num_clks; @@ -1518,6 +1527,7 @@ struct qmp_pcie { struct device *dev; const struct qmp_phy_cfg *cfg; + bool tcsr_4ln_config; void __iomem *serdes; void __iomem *pcs; @@ -1527,6 +1537,8 @@ struct qmp_pcie { void __iomem *tx2; void __iomem *rx2; + void __iomem *port_b; + struct clk_bulk_data *clks; struct clk_bulk_data pipe_clks[2]; int num_pipe_clks; @@ -1932,6 +1944,44 @@ static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = { .phy_status = PHYSTATUS, }; +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x4_pciephy_cfg = { + .lanes = 4, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl), + }, + + .serdes_4ln_tbl = sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl, + .serdes_4ln_num = ARRAY_SIZE(sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl), + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, @@ -2054,6 +2104,24 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } +static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) +{ + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_pcie_offsets *offs = cfg->offsets; + void __iomem *tx3, *rx3, *tx4, *rx4; + + tx3 = qmp->port_b + offs->tx; + rx3 = qmp->port_b + offs->rx; + tx4 = qmp->port_b + offs->tx2; + rx4 = qmp->port_b + offs->rx2; + + qmp_pcie_configure_lane(tx3, tbls->tx, tbls->tx_num, 1); + qmp_pcie_configure_lane(rx3, tbls->rx, tbls->rx_num, 1); + + qmp_pcie_configure_lane(tx4, tbls->tx, tbls->tx_num, 2); + qmp_pcie_configure_lane(rx4, tbls->rx, tbls->rx_num, 2); +} + static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2080,6 +2148,11 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); + + if (cfg->lanes >= 4 && qmp->tcsr_4ln_config) { + qmp_pcie_configure(serdes, cfg->serdes_4ln_tbl, cfg->serdes_4ln_num); + qmp_pcie_init_port_b(qmp, tbls); + } } static int qmp_pcie_init(struct phy *phy) @@ -2452,6 +2525,37 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np return 0; } +static int qmp_pcie_get_4ln_config(struct qmp_pcie *qmp) +{ + struct regmap *tcsr; + unsigned int args[2]; + int ret; + + tcsr = syscon_regmap_lookup_by_phandle_args(qmp->dev->of_node, + "qcom,4ln-config-sel", + ARRAY_SIZE(args), args); + if (IS_ERR(tcsr)) { + ret = PTR_ERR(tcsr); + if (ret == -ENOENT) + return 0; + + dev_err(qmp->dev, "failed to lookup syscon: %d\n", ret); + return ret; + } + + ret = regmap_test_bits(tcsr, args[0], BIT(args[1])); + if (ret < 0) { + dev_err(qmp->dev, "failed to read tcsr: %d\n", ret); + return ret; + } + + qmp->tcsr_4ln_config = ret; + + dev_dbg(qmp->dev, "4ln_config_sel = %d\n", qmp->tcsr_4ln_config); + + return 0; +} + static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) { struct platform_device *pdev = to_platform_device(qmp->dev); @@ -2464,6 +2568,10 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) if (!offs) return -EINVAL; + ret = qmp_pcie_get_4ln_config(qmp); + if (ret) + return ret; + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); @@ -2479,6 +2587,12 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) qmp->rx2 = base + offs->rx2; } + if (qmp->cfg->lanes >= 4 && qmp->tcsr_4ln_config) { + qmp->port_b = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->port_b)) + return PTR_ERR(qmp->port_b); + } + qmp->num_pipe_clks = 2; qmp->pipe_clks[0].id = "pipe"; qmp->pipe_clks[1].id = "pipediv2"; @@ -2582,6 +2696,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy", .data = &sc8280xp_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy", + .data = &sc8280xp_qmp_gen3x4_pciephy_cfg, }, { .compatible = "qcom,sdm845-qhp-pcie-phy", .data = &sdm845_qhp_pciephy_cfg, From 2584068a9ef4a7bff3b9302dd058a4c95ce68631 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Nov 2022 22:21:24 +0100 Subject: [PATCH 1246/4122] phy: qcom-qmp-pcie: split pcs_misc init cfg for ipq8074 pcs table Commit af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018 pcie gen3") reworked the pcs regs values and removed the 0x400 offset for each pcs_misc regs. This change caused the malfunction of ipq8074 downstream since it still has the legacy pcs table where pcs_misc are not placed on a different table and instead put together assuming the offset of 0x400 for the related pcs_misc regs. Split pcs_misc init cfg from the ipq8074 pcs init table to be handled correctly to prepare for actual support for gen3 pcie for ipq8074. Fixes: af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018 pcie gen3") Reported-by: Robert Marko Tested-by: Robert Marko Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20221103212125.17156-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 111716e25b17..43bd4576bee2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -487,6 +487,13 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_FLL_CNTRL1, 0x01), QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_H, 0x0), QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_L, 0x1), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d), +}; + +static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_ACTIONS, 0x0), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_H, 0x00), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_L, 0x01), @@ -499,11 +506,7 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG2, 0x50), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG4, 0x1a), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x6), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d), }; static const struct qmp_phy_init_tbl sdm845_qmp_pcie_serdes_tbl[] = { @@ -1652,6 +1655,8 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .rx_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), .pcs = ipq8074_pcie_gen3_pcs_tbl, .pcs_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + .pcs_misc = ipq8074_pcie_gen3_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_misc_tbl), }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), From 9ddcd920f8edfe65c3670fbd0b49db00e1e562fe Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 13:48:34 +0530 Subject: [PATCH 1247/4122] phy: qcom-qmp-pcie: Fix high latency with 4x2 PHY when ASPM is enabled The PCIe QMP 4x2 RC PHY generates high latency when ASPM is enabled. This seem to be fixed by clearing the QPHY_V5_20_PCS_PCIE_PRESET_P10_POST register of the pcs_misc register space. Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221102081835.41892-1-manivannan.sadhasivam@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 43bd4576bee2..cb45f53a965b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1430,6 +1430,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_PRESET_P10_POST, 0x00), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index c9fa90b45475..3d9713d348fe 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -11,6 +11,7 @@ #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5 0x084 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0 +#define QPHY_V5_20_PCS_PCIE_PRESET_P10_POST 0x0e0 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 #define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN 0x15c #define QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3 0x184 From 883aebf6e1ea88145d64dcf940dbcb5181313338 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 13:48:35 +0530 Subject: [PATCH 1248/4122] phy: qcom-qmp-pcie: Fix sm8450_qmp_gen4x2_pcie_pcs_tbl[] register names sm8450_qmp_gen4x2_pcie_pcs_tbl[] contains the init sequence for PCS registers of QMP PHY v5.20. So use the v5.20 specific register names. Only major change is the rename of PCS_EQ_CONFIG{2/3} registers to PCS_EQ_CONFIG{4/5}. Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221102081835.41892-2-manivannan.sadhasivam@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 ++++---- drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h | 14 ++++++++++++++ drivers/phy/qualcomm/phy-qcom-qmp.h | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index cb45f53a965b..47cccc4b35b2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1414,10 +1414,10 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rx_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG2, 0x16), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x22), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_G3S2_PRE_GAIN, 0x2e), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x99), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x99), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h new file mode 100644 index 000000000000..9a5a20daf62c --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, Linaro Ltd. + */ + +#ifndef QCOM_PHY_QMP_PCS_V5_20_H_ +#define QCOM_PHY_QMP_PCS_V5_20_H_ + +#define QPHY_V5_20_PCS_G3S2_PRE_GAIN 0x170 +#define QPHY_V5_20_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V5_20_PCS_EQ_CONFIG4 0x1e0 +#define QPHY_V5_20_PCS_EQ_CONFIG5 0x1e4 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 26274e3c0cf9..29a48f0436d2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -38,6 +38,7 @@ #include "phy-qcom-qmp-pcs-pcie-v4_20.h" #include "phy-qcom-qmp-pcs-v5.h" +#include "phy-qcom-qmp-pcs-v5_20.h" #include "phy-qcom-qmp-pcs-pcie-v5.h" #include "phy-qcom-qmp-pcs-usb-v5.h" #include "phy-qcom-qmp-pcs-ufs-v5.h" From d7abac084536b6d7efcc0c1edc7d9035c34314d9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Nov 2022 09:54:59 +0900 Subject: [PATCH 1249/4122] dt-bindings: phy: renesas: Document Renesas Ethernet SERDES Document Renesas Etherent SERDES for R-Car S4-8 (r8a779f0). Signed-off-by: Yoshihiro Shimoda Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221108005500.3011449-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- .../phy/renesas,r8a779f0-ether-serdes.yaml | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml diff --git a/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml new file mode 100644 index 000000000000..93ab72874228 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/renesas,r8a779f0-ether-serdes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas Ethernet SERDES + +maintainers: + - Yoshihiro Shimoda + +properties: + compatible: + const: renesas,r8a779f0-ether-serdes + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + + power-domains: + maxItems: 1 + + '#phy-cells': + description: Port number of SERDES. + const: 1 + +required: + - compatible + - reg + - clocks + - resets + - power-domains + - '#phy-cells' + +additionalProperties: false + +examples: + - | + #include + #include + + phy@e6444000 { + compatible = "renesas,r8a779f0-ether-serdes"; + reg = <0xe6444000 0xc00>; + clocks = <&cpg CPG_MOD 1506>; + power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>; + resets = <&cpg 1506>; + #phy-cells = <1>; + }; From 742859441d44be4b408274206244011a09618a91 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Nov 2022 09:55:00 +0900 Subject: [PATCH 1250/4122] phy: renesas: Add Renesas Ethernet SERDES driver for R-Car S4-8 Add Renesas Ethernet SERDES driver for R-Car S4-8 (r8a779f0). The datasheet describes initialization procedure without any information about registers' name/bits. So, this is all black magic to initialize the hardware. Especially, all channels should be initialized at once. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20221108005500.3011449-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/Kconfig | 8 + drivers/phy/renesas/Makefile | 1 + drivers/phy/renesas/r8a779f0-ether-serdes.c | 417 ++++++++++++++++++++ 3 files changed, 426 insertions(+) create mode 100644 drivers/phy/renesas/r8a779f0-ether-serdes.c diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index 111bdcae775c..36505fc5f386 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -2,6 +2,14 @@ # # Phy drivers for Renesas platforms # +# NOTE: Please sorted config names alphabetically. +config PHY_R8A779F0_ETHERNET_SERDES + tristate "Renesas R-Car S4-8 Ethernet SERDES driver" + depends on ARCH_RENESAS || COMPILE_TEST + select GENERIC_PHY + help + Support for Ethernet SERDES found on Renesas R-Car S4-8 SoCs. + config PHY_RCAR_GEN2 tristate "Renesas R-Car generation 2 USB PHY driver" depends on ARCH_RENESAS diff --git a/drivers/phy/renesas/Makefile b/drivers/phy/renesas/Makefile index b599ff8a4349..8896d1919faa 100644 --- a/drivers/phy/renesas/Makefile +++ b/drivers/phy/renesas/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_PHY_R8A779F0_ETHERNET_SERDES) += r8a779f0-ether-serdes.o obj-$(CONFIG_PHY_RCAR_GEN2) += phy-rcar-gen2.o obj-$(CONFIG_PHY_RCAR_GEN3_PCIE) += phy-rcar-gen3-pcie.o obj-$(CONFIG_PHY_RCAR_GEN3_USB2) += phy-rcar-gen3-usb2.o diff --git a/drivers/phy/renesas/r8a779f0-ether-serdes.c b/drivers/phy/renesas/r8a779f0-ether-serdes.c new file mode 100644 index 000000000000..ec6594e6dc27 --- /dev/null +++ b/drivers/phy/renesas/r8a779f0-ether-serdes.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Renesas Ethernet SERDES device driver + * + * Copyright (C) 2022 Renesas Electronics Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define R8A779F0_ETH_SERDES_NUM 3 +#define R8A779F0_ETH_SERDES_OFFSET 0x0400 +#define R8A779F0_ETH_SERDES_BANK_SELECT 0x03fc +#define R8A779F0_ETH_SERDES_TIMEOUT_US 100000 +#define R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP 3 +#define R8A779F0_ETH_SERDES_NUM_RETRY_INIT 3 + +struct r8a779f0_eth_serdes_drv_data; +struct r8a779f0_eth_serdes_channel { + struct r8a779f0_eth_serdes_drv_data *dd; + struct phy *phy; + void __iomem *addr; + phy_interface_t phy_interface; + int speed; + int index; +}; + +struct r8a779f0_eth_serdes_drv_data { + void __iomem *addr; + struct platform_device *pdev; + struct reset_control *reset; + struct r8a779f0_eth_serdes_channel channel[R8A779F0_ETH_SERDES_NUM]; + bool initialized; +}; + +/* + * The datasheet describes initialization procedure without any information + * about registers' name/bits. So, this is all black magic to initialize + * the hardware. + */ +static void r8a779f0_eth_serdes_write32(void __iomem *addr, u32 offs, u32 bank, u32 data) +{ + iowrite32(bank, addr + R8A779F0_ETH_SERDES_BANK_SELECT); + iowrite32(data, addr + offs); +} + +static int +r8a779f0_eth_serdes_reg_wait(struct r8a779f0_eth_serdes_channel *channel, + u32 offs, u32 bank, u32 mask, u32 expected) +{ + int ret; + u32 val; + + iowrite32(bank, channel->addr + R8A779F0_ETH_SERDES_BANK_SELECT); + + ret = readl_poll_timeout_atomic(channel->addr + offs, val, + (val & mask) == expected, + 1, R8A779F0_ETH_SERDES_TIMEOUT_US); + if (ret) + dev_dbg(&channel->phy->dev, + "%s: index %d, offs %x, bank %x, mask %x, expected %x\n", + __func__, channel->index, offs, bank, mask, expected); + + return ret; +} + +static int +r8a779f0_eth_serdes_common_init_ram(struct r8a779f0_eth_serdes_drv_data *dd) +{ + struct r8a779f0_eth_serdes_channel *channel; + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + channel = &dd->channel[i]; + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x026c, 0x180, BIT(0), 0x01); + if (ret) + return ret; + } + + r8a779f0_eth_serdes_write32(dd->addr, 0x026c, 0x180, 0x03); + + return ret; +} + +static int +r8a779f0_eth_serdes_common_setting(struct r8a779f0_eth_serdes_channel *channel) +{ + struct r8a779f0_eth_serdes_drv_data *dd = channel->dd; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + r8a779f0_eth_serdes_write32(dd->addr, 0x0244, 0x180, 0x0097); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d0, 0x180, 0x0060); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d8, 0x180, 0x2200); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d4, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(dd->addr, 0x01e0, 0x180, 0x003d); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +r8a779f0_eth_serdes_chan_setting(struct r8a779f0_eth_serdes_channel *channel) +{ + int ret; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2000); + r8a779f0_eth_serdes_write32(channel->addr, 0x01c0, 0x180, 0x0011); + r8a779f0_eth_serdes_write32(channel->addr, 0x0248, 0x180, 0x0540); + r8a779f0_eth_serdes_write32(channel->addr, 0x0258, 0x180, 0x0015); + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x01a0, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x00d0, 0x180, 0x0002); + r8a779f0_eth_serdes_write32(channel->addr, 0x0150, 0x180, 0x0003); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x0174, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x0160, 0x180, 0x0007); + r8a779f0_eth_serdes_write32(channel->addr, 0x01ac, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x0310); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x380, 0x0101); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x00c8, 0x0180, BIT(0), 0); + if (ret) + return ret; + + r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0101); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0148, 0x0180, BIT(0), 0); + if (ret) + return ret; + + r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x1310); + r8a779f0_eth_serdes_write32(channel->addr, 0x00d8, 0x180, 0x1800); + r8a779f0_eth_serdes_write32(channel->addr, 0x00dc, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x001c, 0x300, 0x0001); + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2100); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0000, 0x0380, BIT(8), 0); + if (ret) + return ret; + + if (channel->speed == 1000) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x0140); + else if (channel->speed == 100) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x2100); + + /* For AN_ON */ + r8a779f0_eth_serdes_write32(channel->addr, 0x0004, 0x1f80, 0x0005); + r8a779f0_eth_serdes_write32(channel->addr, 0x0028, 0x1f80, 0x07a1); + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f80, 0x0208); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int +r8a779f0_eth_serdes_chan_speed(struct r8a779f0_eth_serdes_channel *channel) +{ + int ret; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + /* For AN_ON */ + if (channel->speed == 1000) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x1140); + else if (channel->speed == 100) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x3100); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0008, 0x1f80, BIT(0), 1); + if (ret) + return ret; + r8a779f0_eth_serdes_write32(channel->addr, 0x0008, 0x1f80, 0x0000); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + + +static int r8a779f0_eth_serdes_monitor_linkup(struct r8a779f0_eth_serdes_channel *channel) +{ + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP; i++) { + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0004, 0x300, + BIT(2), BIT(2)); + if (!ret) + break; + + /* restart */ + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100); + udelay(1); + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0000); + } + + return ret; +} + +static int r8a779f0_eth_serdes_hw_init(struct r8a779f0_eth_serdes_channel *channel) +{ + struct r8a779f0_eth_serdes_drv_data *dd = channel->dd; + int i, ret; + + if (dd->initialized) + return 0; + + ret = r8a779f0_eth_serdes_common_init_ram(dd); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[i], 0x0000, + 0x300, BIT(15), 0); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d4, 0x380, 0x0443); + + ret = r8a779f0_eth_serdes_common_setting(channel); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0001); + + + r8a779f0_eth_serdes_write32(dd->addr, 0x0000, 0x380, 0x8000); + + ret = r8a779f0_eth_serdes_common_init_ram(dd); + if (ret) + return ret; + + ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[0], 0x0000, 0x380, BIT(15), 0); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_chan_setting(&dd->channel[i]); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_chan_speed(&dd->channel[i]); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03c0, 0x380, 0x0000); + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0000); + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_monitor_linkup(&dd->channel[i]); + if (ret) + return ret; + } + + return 0; +} + +static int r8a779f0_eth_serdes_init(struct phy *p) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_INIT; i++) { + ret = r8a779f0_eth_serdes_hw_init(channel); + if (!ret) { + channel->dd->initialized = true; + break; + } + usleep_range(1000, 2000); + } + + return ret; +} + +static int r8a779f0_eth_serdes_set_mode(struct phy *p, enum phy_mode mode, + int submode) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + + if (mode != PHY_MODE_ETHERNET) + return -EOPNOTSUPP; + + switch (submode) { + case PHY_INTERFACE_MODE_GMII: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_USXGMII: + channel->phy_interface = submode; + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int r8a779f0_eth_serdes_set_speed(struct phy *p, int speed) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + + channel->speed = speed; + + return 0; +} + +static const struct phy_ops r8a779f0_eth_serdes_ops = { + .init = r8a779f0_eth_serdes_init, + .set_mode = r8a779f0_eth_serdes_set_mode, + .set_speed = r8a779f0_eth_serdes_set_speed, +}; + +static struct phy *r8a779f0_eth_serdes_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct r8a779f0_eth_serdes_drv_data *dd = dev_get_drvdata(dev); + + if (args->args[0] >= R8A779F0_ETH_SERDES_NUM) + return ERR_PTR(-ENODEV); + + return dd->channel[args->args[0]].phy; +} + +static const struct of_device_id r8a779f0_eth_serdes_of_table[] = { + { .compatible = "renesas,r8a779f0-ether-serdes", }, + { } +}; +MODULE_DEVICE_TABLE(of, r8a779f0_eth_serdes_of_table); + +static int r8a779f0_eth_serdes_probe(struct platform_device *pdev) +{ + struct r8a779f0_eth_serdes_drv_data *dd; + struct phy_provider *provider; + struct resource *res; + int i; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "invalid resource\n"); + return -EINVAL; + } + + dd = devm_kzalloc(&pdev->dev, sizeof(*dd), GFP_KERNEL); + if (!dd) + return -ENOMEM; + + platform_set_drvdata(pdev, dd); + dd->pdev = pdev; + dd->addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dd->addr)) + return PTR_ERR(dd->addr); + + dd->reset = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(dd->reset)) + return PTR_ERR(dd->reset); + + reset_control_reset(dd->reset); + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + struct r8a779f0_eth_serdes_channel *channel = &dd->channel[i]; + + channel->phy = devm_phy_create(&pdev->dev, NULL, + &r8a779f0_eth_serdes_ops); + if (IS_ERR(channel->phy)) + return PTR_ERR(channel->phy); + channel->addr = dd->addr + R8A779F0_ETH_SERDES_OFFSET * i; + channel->dd = dd; + channel->index = i; + phy_set_drvdata(channel->phy, channel); + } + + provider = devm_of_phy_provider_register(&pdev->dev, + r8a779f0_eth_serdes_xlate); + if (IS_ERR(provider)) + return PTR_ERR(provider); + + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + return 0; +} + +static int r8a779f0_eth_serdes_remove(struct platform_device *pdev) +{ + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver r8a779f0_eth_serdes_driver_platform = { + .probe = r8a779f0_eth_serdes_probe, + .remove = r8a779f0_eth_serdes_remove, + .driver = { + .name = "r8a779f0_eth_serdes", + .of_match_table = r8a779f0_eth_serdes_of_table, + } +}; +module_platform_driver(r8a779f0_eth_serdes_driver_platform); +MODULE_AUTHOR("Yoshihiro Shimoda"); +MODULE_DESCRIPTION("Renesas Ethernet SERDES device driver"); +MODULE_LICENSE("GPL"); From 5c20311d76cbaeb7ed2ecf9c8b8322f8fc4a7ae3 Mon Sep 17 00:00:00 2001 From: Leonid Ravich Date: Wed, 9 Nov 2022 11:57:17 +0200 Subject: [PATCH 1251/4122] IB/mad: Don't call to function that might sleep while in atomic context Tracepoints are not allowed to sleep, as such the following splat is generated due to call to ib_query_pkey() in atomic context. WARNING: CPU: 0 PID: 1888000 at kernel/trace/ring_buffer.c:2492 rb_commit+0xc1/0x220 CPU: 0 PID: 1888000 Comm: kworker/u9:0 Kdump: loaded Tainted: G OE --------- - - 4.18.0-305.3.1.el8.x86_64 #1 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module_el8.3.0+555+a55c8938 04/01/2014 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] RIP: 0010:rb_commit+0xc1/0x220 RSP: 0000:ffffa8ac80f9bca0 EFLAGS: 00010202 RAX: ffff8951c7c01300 RBX: ffff8951c7c14a00 RCX: 0000000000000246 RDX: ffff8951c707c000 RSI: ffff8951c707c57c RDI: ffff8951c7c14a00 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8951c7c01300 R11: 0000000000000001 R12: 0000000000000246 R13: 0000000000000000 R14: ffffffff964c70c0 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8951fbc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f20e8f39010 CR3: 000000002ca10005 CR4: 0000000000170ef0 Call Trace: ring_buffer_unlock_commit+0x1d/0xa0 trace_buffer_unlock_commit_regs+0x3b/0x1b0 trace_event_buffer_commit+0x67/0x1d0 trace_event_raw_event_ib_mad_recv_done_handler+0x11c/0x160 [ib_core] ib_mad_recv_done+0x48b/0xc10 [ib_core] ? trace_event_raw_event_cq_poll+0x6f/0xb0 [ib_core] __ib_process_cq+0x91/0x1c0 [ib_core] ib_cq_poll_work+0x26/0x80 [ib_core] process_one_work+0x1a7/0x360 ? create_worker+0x1a0/0x1a0 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x35/0x40 ---[ end trace 78ba8509d3830a16 ]--- Fixes: 821bf1de45a1 ("IB/MAD: Add recv path trace point") Signed-off-by: Leonid Ravich Link: https://lore.kernel.org/r/Y2t5feomyznrVj7V@leonid-Inspiron-3421 Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/mad.c | 5 ----- include/trace/events/ib_mad.h | 13 ++++--------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1893aa613ad7..674344eb8e2f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry) { - u16 pkey; - struct ib_device *dev = qp_info->port_priv->device; - u32 pnum = qp_info->port_priv->port_num; struct ib_ud_wr *wr = &mad_send_wr->send_wr; struct rdma_ah_attr attr = {}; @@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, /* These are common */ entry->sl = attr.sl; - ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); - entry->pkey = pkey; entry->rqpn = wr->remote_qpn; entry->rqkey = wr->remote_qkey; entry->dlid = rdma_ah_get_dlid(&attr); diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h index 59363a083ecb..d92691c78cff 100644 --- a/include/trace/events/ib_mad.h +++ b/include/trace/events/ib_mad.h @@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, __field(int, retries_left) __field(int, max_retries) __field(int, retry) - __field(u16, pkey) ), TP_fast_assign( @@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ - "pkey 0x%x rpqn 0x%x rqpkey 0x%x", + "rpqn 0x%x rqpkey 0x%x", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->agent_priv, be64_to_cpu(__entry->wrtid), __entry->retries_left, __entry->max_retries, @@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), - be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, + be32_to_cpu(__entry->dlid), __entry->sl, __entry->rqpn, __entry->rqkey ) ); @@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, __field(u16, wc_status) __field(u32, slid) __field(u32, dev_index) - __field(u16, pkey) ), TP_fast_assign( @@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, __entry->slid = wc->slid; __entry->src_qp = wc->src_qp; __entry->sl = wc->sl; - ib_query_pkey(qp_info->port_priv->device, - qp_info->port_priv->port_num, - wc->pkey_index, &__entry->pkey); __entry->wc_status = wc->status; ), @@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ "method 0x%02x status 0x%04x class_specific 0x%04x " \ "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ - "slid 0x%08x src QP%d, sl %d pkey 0x%04x", + "slid 0x%08x src QP%d, sl %d", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->wc_status, __entry->length, @@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), - __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey + __entry->slid, __entry->src_qp, __entry->sl ) ); From cf87ac739e488055a6046a410caa8f4da108948f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:08 +0800 Subject: [PATCH 1252/4122] KVM: x86: Introduce KVM_REQ_DIRTY_RING_SOFT_FULL The VCPU isn't expected to be runnable when the dirty ring becomes soft full, until the dirty pages are harvested and the dirty ring is reset from userspace. So there is a check in each guest's entrace to see if the dirty ring is soft full or not. The VCPU is stopped from running if its dirty ring has been soft full. The similar check will be needed when the feature is going to be supported on ARM64. As Marc Zyngier suggested, a new event will avoid pointless overhead to check the size of the dirty ring ('vcpu->kvm->dirty_ring_size') in each guest's entrance. Add KVM_REQ_DIRTY_RING_SOFT_FULL. The event is raised when the dirty ring becomes soft full in kvm_dirty_ring_push(). The event is only cleared in the check, done in the newly added helper kvm_dirty_ring_check_request(). Since the VCPU is not runnable when the dirty ring becomes soft full, the KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent the VCPU from running until the dirty pages are harvested and the dirty ring is reset by userspace. kvm_dirty_ring_soft_full() becomes a private function with the newly added helper kvm_dirty_ring_check_request(). The alignment for the various event definitions in kvm_host.h is changed to tab character by the way. In order to avoid using 'container_of()', the argument @ring is replaced by @vcpu in kvm_dirty_ring_push(). Link: https://lore.kernel.org/kvmarm/87lerkwtm5.wl-maz@kernel.org Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Reviewed-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-2-gshan@redhat.com --- arch/x86/kvm/x86.c | 15 ++++++--------- include/linux/kvm_dirty_ring.h | 12 ++++-------- include/linux/kvm_host.h | 9 +++++---- virt/kvm/dirty_ring.c | 32 ++++++++++++++++++++++++++++++-- virt/kvm/kvm_main.c | 3 +-- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9cf1ba865562..d0d32e67ebf3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10499,20 +10499,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - /* Forbid vmenter if vcpu dirty ring is soft-full */ - if (unlikely(vcpu->kvm->dirty_ring_size && - kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { - vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; - trace_kvm_dirty_ring_exit(vcpu); - r = 0; - goto out; - } - if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) { r = -EIO; goto out; } + + if (kvm_dirty_ring_check_request(vcpu)) { + r = 0; + goto out; + } + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 906f899813dc..9c13c4c3d30c 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -49,7 +49,7 @@ static inline int kvm_dirty_ring_reset(struct kvm *kvm, return 0; } -static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, +static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset) { } @@ -64,11 +64,6 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) { } -static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) -{ - return true; -} - #else /* CONFIG_HAVE_KVM_DIRTY_RING */ u32 kvm_dirty_ring_get_rsvd_entries(void); @@ -84,13 +79,14 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); * returns =0: successfully pushed * <0: unable to push, need to wait */ -void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); +void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset); + +bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu); /* for use in vm_operations_struct */ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); -bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); #endif /* CONFIG_HAVE_KVM_DIRTY_RING */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 00c3448ba7f8..648d663f32c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -153,10 +153,11 @@ static inline bool is_error_page(struct page *page) * Architecture-independent vcpu->requests bit members * Bits 3-7 are reserved for more arch-independent bits. */ -#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_UNBLOCK 2 -#define KVM_REQUEST_ARCH_BASE 8 +#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_UNBLOCK 2 +#define KVM_REQ_DIRTY_RING_SOFT_FULL 3 +#define KVM_REQUEST_ARCH_BASE 8 /* * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index d6fabf238032..fecbb7d75ad2 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -26,7 +26,7 @@ static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); } -bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +static bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) { return kvm_dirty_ring_used(ring) >= ring->soft_limit; } @@ -142,13 +142,19 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); + /* + * The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared + * by the VCPU thread next time when it enters the guest. + */ + trace_kvm_dirty_ring_reset(ring); return count; } -void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) +void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset) { + struct kvm_dirty_ring *ring = &vcpu->dirty_ring; struct kvm_dirty_gfn *entry; /* It should never get full */ @@ -166,6 +172,28 @@ void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) kvm_dirty_gfn_set_dirtied(entry); ring->dirty_index++; trace_kvm_dirty_ring_push(ring, slot, offset); + + if (kvm_dirty_ring_soft_full(ring)) + kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu); +} + +bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu) +{ + /* + * The VCPU isn't runnable when the dirty ring becomes soft full. + * The KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent + * the VCPU from running until the dirty pages are harvested and + * the dirty ring is reset by userspace. + */ + if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) && + kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) { + kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu); + vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; + trace_kvm_dirty_ring_exit(vcpu); + return true; + } + + return false; } struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 46e8ed1ae647..04b22d2f99d8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3314,8 +3314,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, u32 slot = (memslot->as_id << 16) | memslot->id; if (kvm->dirty_ring_size) - kvm_dirty_ring_push(&vcpu->dirty_ring, - slot, rel_gfn); + kvm_dirty_ring_push(vcpu, slot, rel_gfn); else set_bit_le(rel_gfn, memslot->dirty_bitmap); } From e8a18565e59303ac12c626a161d72bd890bd2062 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:09 +0800 Subject: [PATCH 1253/4122] KVM: Move declaration of kvm_cpu_dirty_log_size() to kvm_dirty_ring.h Not all architectures like ARM64 need to override the function. Move its declaration to kvm_dirty_ring.h to avoid the following compiling warning on ARM64 when the feature is enabled. arch/arm64/kvm/../../../virt/kvm/dirty_ring.c:14:12: \ warning: no previous prototype for 'kvm_cpu_dirty_log_size' \ [-Wmissing-prototypes] \ int __weak kvm_cpu_dirty_log_size(void) Reported-by: kernel test robot Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-3-gshan@redhat.com --- arch/x86/include/asm/kvm_host.h | 2 -- include/linux/kvm_dirty_ring.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7551b6f9c31c..b4dbde7d9eb1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2090,8 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define GET_SMSTATE(type, buf, offset) \ (*(type *)((buf) + (offset) - 0x7e00)) -int kvm_cpu_dirty_log_size(void); - int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 9c13c4c3d30c..199ead37b104 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -66,6 +66,7 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ +int kvm_cpu_dirty_log_size(void); u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); From 86bdf3ebcfe1ded055282536fecce13001874740 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:10 +0800 Subject: [PATCH 1254/4122] KVM: Support dirty ring in conjunction with bitmap ARM64 needs to dirty memory outside of a VCPU context when VGIC/ITS is enabled. It's conflicting with that ring-based dirty page tracking always requires a running VCPU context. Introduce a new flavor of dirty ring that requires the use of both VCPU dirty rings and a dirty bitmap. The expectation is that for non-VCPU sources of dirty memory (such as the VGIC/ITS on arm64), KVM writes to the dirty bitmap. Userspace should scan the dirty bitmap before migrating the VM to the target. Use an additional capability to advertise this behavior. The newly added capability (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) can't be enabled before KVM_CAP_DIRTY_LOG_RING_ACQ_REL on ARM64. In this way, the newly added capability is treated as an extension of KVM_CAP_DIRTY_LOG_RING_ACQ_REL. Suggested-by: Marc Zyngier Suggested-by: Peter Xu Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Signed-off-by: Gavin Shan Acked-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-4-gshan@redhat.com --- Documentation/virt/kvm/api.rst | 34 ++++++++--- .../virt/kvm/devices/arm-vgic-its.rst | 5 +- include/linux/kvm_dirty_ring.h | 7 +++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/Kconfig | 6 ++ virt/kvm/dirty_ring.c | 14 +++++ virt/kvm/kvm_main.c | 61 ++++++++++++++++--- 8 files changed, 112 insertions(+), 17 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..1f1b09aa6db4 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8003,13 +8003,6 @@ flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one needs to kick the vcpu out of KVM_RUN using a signal. The resulting vmexit ensures that all dirty GFNs are flushed to the dirty rings. -NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding -ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls -KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling -KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual -machine will switch to ring-buffer dirty page tracking and further -KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. - NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that should be exposed by weakly ordered architecture, in order to indicate the additional memory ordering requirements imposed on userspace when @@ -8018,6 +8011,33 @@ Architecture with TSO-like ordering (such as x86) are allowed to expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL to userspace. +After enabling the dirty rings, the userspace needs to detect the +capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the +ring structures can be backed by per-slot bitmaps. With this capability +advertised, it means the architecture can dirty guest pages without +vcpu/ring context, so that some of the dirty information will still be +maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP +can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL +hasn't been enabled, or any memslot has been existing. + +Note that the bitmap here is only a backup of the ring structure. The +use of the ring and bitmap combination is only beneficial if there is +only a very small amount of memory that is dirtied out of vcpu/ring +context. Otherwise, the stand-alone per-slot bitmap mechanism needs to +be considered. + +To collect dirty bits in the backup bitmap, userspace can use the same +KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all +the generation of the dirty bits is done in a single pass. Collecting +the dirty bitmap should be the very last thing that the VMM does before +considering the state as complete. VMM needs to ensure that the dirty +state is final and avoid missing dirty pages from another ioctl ordered +after the bitmap collection. + +NOTE: One example of using the backup bitmap is saving arm64 vgic/its +tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on +KVM device "kvm-arm-vgic-its" when dirty ring is enabled. + 8.30 KVM_CAP_XEN_HVM -------------------- diff --git a/Documentation/virt/kvm/devices/arm-vgic-its.rst b/Documentation/virt/kvm/devices/arm-vgic-its.rst index d257eddbae29..e053124f77c4 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-its.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-its.rst @@ -52,7 +52,10 @@ KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEV_ARM_ITS_SAVE_TABLES save the ITS table data into guest RAM, at the location provisioned - by the guest in corresponding registers/table entries. + by the guest in corresponding registers/table entries. Should userspace + require a form of dirty tracking to identify which pages are modified + by the saving process, it should use a bitmap even if using another + mechanism to track the memory dirtied by the vCPUs. The layout of the tables in guest memory defines an ABI. The entries are laid out in little endian format as described in the last paragraph. diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 199ead37b104..4862c98d80d3 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -37,6 +37,11 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(void) return 0; } +static inline bool kvm_use_dirty_bitmap(struct kvm *kvm) +{ + return true; +} + static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) { @@ -67,6 +72,8 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ int kvm_cpu_dirty_log_size(void); +bool kvm_use_dirty_bitmap(struct kvm *kvm); +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm); u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 648d663f32c4..db83f63f4e61 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -779,6 +779,7 @@ struct kvm { pid_t userspace_pid; unsigned int max_halt_poll_ns; u32 dirty_ring_size; + bool dirty_ring_with_bitmap; bool vm_bugged; bool vm_dead; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..c87b5882d7ae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 224 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 800f9470e36b..9fb1ff6f19e5 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL bool select HAVE_KVM_DIRTY_RING +# Allow enabling both the dirty bitmap and dirty ring. Only architectures +# that need to dirty memory outside of a vCPU context should select this. +config NEED_KVM_DIRTY_RING_WITH_BITMAP + bool + depends on HAVE_KVM_DIRTY_RING + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index fecbb7d75ad2..c1cd7dfe4a90 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -21,6 +21,20 @@ u32 kvm_dirty_ring_get_rsvd_entries(void) return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); } +bool kvm_use_dirty_bitmap(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->slots_lock); + + return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap; +} + +#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + return false; +} +#endif + static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) { return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04b22d2f99d8..be40d1ce6e91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1617,7 +1617,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm, new->dirty_bitmap = NULL; else if (old && old->dirty_bitmap) new->dirty_bitmap = old->dirty_bitmap; - else if (!kvm->dirty_ring_size) { + else if (kvm_use_dirty_bitmap(kvm)) { r = kvm_alloc_dirty_bitmap(new); if (r) return r; @@ -2060,8 +2060,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, unsigned long n; unsigned long any = 0; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; *memslot = NULL; @@ -2125,8 +2125,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) unsigned long *dirty_bitmap_buffer; bool flush; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; as_id = log->slot >> 16; @@ -2237,8 +2237,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long *dirty_bitmap_buffer; bool flush; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; as_id = log->slot >> 16; @@ -3305,7 +3305,10 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); #ifdef CONFIG_HAVE_KVM_DIRTY_RING - if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm)) + if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) + return; + + if (WARN_ON_ONCE(!kvm_arch_allow_write_without_running_vcpu(kvm) && !vcpu)) return; #endif @@ -3313,7 +3316,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; - if (kvm->dirty_ring_size) + if (kvm->dirty_ring_size && vcpu) kvm_dirty_ring_push(vcpu, slot, rel_gfn); else set_bit_le(rel_gfn, memslot->dirty_bitmap); @@ -4482,6 +4485,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); #else return 0; +#endif +#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP + case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: #endif case KVM_CAP_BINARY_STATS_FD: case KVM_CAP_SYSTEM_EVENT_DATA: @@ -4558,6 +4564,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, return -EINVAL; } +static bool kvm_are_all_memslots_empty(struct kvm *kvm) +{ + int i; + + lockdep_assert_held(&kvm->slots_lock); + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + if (!kvm_memslots_empty(__kvm_memslots(kvm, i))) + return false; + } + + return true; +} + static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -4588,6 +4608,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); + case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: { + int r = -EINVAL; + + if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) || + !kvm->dirty_ring_size || cap->flags) + return r; + + mutex_lock(&kvm->slots_lock); + + /* + * For simplicity, allow enabling ring+bitmap if and only if + * there are no memslots, e.g. to ensure all memslots allocate + * a bitmap after the capability is enabled. + */ + if (kvm_are_all_memslots_empty(kvm)) { + kvm->dirty_ring_with_bitmap = true; + r = 0; + } + + mutex_unlock(&kvm->slots_lock); + + return r; + } default: return kvm_vm_ioctl_enable_cap(kvm, cap); } From 9cb1096f8590bc590326087bea65db932b53c3b5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:11 +0800 Subject: [PATCH 1255/4122] KVM: arm64: Enable ring-based dirty memory tracking Enable ring-based dirty memory tracking on ARM64: - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL. - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP. - Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page offset. - Add ARM64 specific kvm_arch_allow_write_without_running_vcpu() to keep the site of saving vgic/its tables out of the no-running-vcpu radar. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-5-gshan@redhat.com --- Documentation/virt/kvm/api.rst | 2 +- arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/Kconfig | 2 ++ arch/arm64/kvm/arm.c | 3 +++ arch/arm64/kvm/vgic/vgic-its.c | 20 ++++++++++++++++++++ include/kvm/arm_vgic.h | 1 + 6 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1f1b09aa6db4..773e4b202f47 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7921,7 +7921,7 @@ regardless of what has actually been exposed through the CPUID leaf. 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL ---------------------------------------------------------- -:Architectures: x86 +:Architectures: x86, arm64 :Parameters: args[0] - size of the dirty log ring KVM is capable of tracking dirty memory using ring buffers that are diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 316917b98707..a7a857f1784d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 815cc118c675..05da3c8f7e88 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -32,6 +32,8 @@ menuconfig KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_DIRTY_RING_ACQ_REL + select NEED_KVM_DIRTY_RING_WITH_BITMAP select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..6b097605e38c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -746,6 +746,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) return kvm_vcpu_suspend(vcpu); + + if (kvm_dirty_ring_check_request(vcpu)) + return 0; } return 1; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 733b53055f97..94a666dd1443 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev, static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ @@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) vgic_its_reset(kvm, its); break; case KVM_DEV_ARM_ITS_SAVE_TABLES: + dist->save_its_tables_in_progress = true; ret = abi->save_tables(its); + dist->save_its_tables_in_progress = false; break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); @@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return ret; } +/* + * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory + * without the running VCPU when dirty ring is enabled. + * + * The running VCPU is required to track dirty guest pages when dirty ring + * is enabled. Otherwise, the backup bitmap should be used to track the + * dirty guest pages. When vgic/its tables are being saved, the backup + * bitmap is used to track the dirty guest pages due to the missed running + * VCPU in the period. + */ +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + return dist->save_its_tables_in_progress; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4df9e73a8bb5..9270cd87da3f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -263,6 +263,7 @@ struct vgic_dist { struct vgic_io_device dist_iodev; bool has_its; + bool save_its_tables_in_progress; /* * Contains the attributes and gpa of the LPI configuration table. From a737f5ffb1e883e580730122be11c9eb832a7749 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:12 +0800 Subject: [PATCH 1256/4122] KVM: selftests: Use host page size to map ring buffer in dirty_log_test In vcpu_map_dirty_ring(), the guest's page size is used to figure out the offset in the virtual area. It works fine when we have same page sizes on host and guest. However, it fails when the page sizes on host and guest are different on arm64, like below error messages indicates. # ./dirty_log_test -M dirty-ring -m 7 Setting log mode to: 'dirty-ring' Test iterations: 32, interval: 10 (ms) Testing guest mode: PA-bits:40, VA-bits:48, 64K pages guest physical test memory offset: 0xffbffc0000 vcpu stops because vcpu is kicked out... Notifying vcpu to continue vcpu continues now. ==== Test Assertion Failure ==== lib/kvm_util.c:1477: addr == MAP_FAILED pid=9000 tid=9000 errno=0 - Success 1 0x0000000000405f5b: vcpu_map_dirty_ring at kvm_util.c:1477 2 0x0000000000402ebb: dirty_ring_collect_dirty_pages at dirty_log_test.c:349 3 0x00000000004029b3: log_mode_collect_dirty_pages at dirty_log_test.c:478 4 (inlined by) run_test at dirty_log_test.c:778 5 (inlined by) run_test at dirty_log_test.c:691 6 0x0000000000403a57: for_each_guest_mode at guest_modes.c:105 7 0x0000000000401ccf: main at dirty_log_test.c:921 8 0x0000ffffb06ec79b: ?? ??:0 9 0x0000ffffb06ec86b: ?? ??:0 10 0x0000000000401def: _start at ??:? Dirty ring mapped private Fix the issue by using host's page size to map the ring buffer. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-6-gshan@redhat.com --- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..89a1a420ebd5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1506,7 +1506,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = vcpu->vm->page_size; + uint32_t page_size = getpagesize(); uint32_t size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); From 7167190ddb863bd061c0c6b61f4cec94184b40da Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:13 +0800 Subject: [PATCH 1257/4122] KVM: selftests: Clear dirty ring states between two modes in dirty_log_test There are two states, which need to be cleared before next mode is executed. Otherwise, we will hit failure as the following messages indicate. - The variable 'dirty_ring_vcpu_ring_full' shared by main and vcpu thread. It's indicating if the vcpu exit due to full ring buffer. The value can be carried from previous mode (VM_MODE_P40V48_4K) to current one (VM_MODE_P40V48_64K) when VM_MODE_P40V48_16K isn't supported. - The current ring buffer index needs to be reset before next mode (VM_MODE_P40V48_64K) is executed. Otherwise, the stale value is carried from previous mode (VM_MODE_P40V48_4K). # ./dirty_log_test -M dirty-ring Setting log mode to: 'dirty-ring' Test iterations: 32, interval: 10 (ms) Testing guest mode: PA-bits:40, VA-bits:48, 4K pages guest physical test memory offset: 0xffbfffc000 : Dirtied 995328 pages Total bits checked: dirty (1012434), clear (7114123), track_next (966700) Testing guest mode: PA-bits:40, VA-bits:48, 64K pages guest physical test memory offset: 0xffbffc0000 vcpu stops because vcpu is kicked out... vcpu continues now. Notifying vcpu to continue Iteration 1 collected 0 pages vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... ==== Test Assertion Failure ==== dirty_log_test.c:369: cleared == count pid=10541 tid=10541 errno=22 - Invalid argument 1 0x0000000000403087: dirty_ring_collect_dirty_pages at dirty_log_test.c:369 2 0x0000000000402a0b: log_mode_collect_dirty_pages at dirty_log_test.c:492 3 (inlined by) run_test at dirty_log_test.c:795 4 (inlined by) run_test at dirty_log_test.c:705 5 0x0000000000403a37: for_each_guest_mode at guest_modes.c:100 6 0x0000000000401ccf: main at dirty_log_test.c:938 7 0x0000ffff9ecd279b: ?? ??:0 8 0x0000ffff9ecd286b: ?? ??:0 9 0x0000000000401def: _start at ??:? Reset dirty pages (0) mismatch with collected (35566) Fix the issues by clearing 'dirty_ring_vcpu_ring_full' and the ring buffer index before next new mode is to be executed. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-7-gshan@redhat.com --- tools/testing/selftests/kvm/dirty_log_test.c | 27 ++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..8758c10ec850 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,7 +350,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); @@ -406,7 +408,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); @@ -471,13 +474,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -696,6 +700,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -771,6 +776,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +784,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why From dc6df7d4d0633e65850d5372ae9f1234bcc6e26e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:14 +0800 Subject: [PATCH 1258/4122] KVM: selftests: Automate choosing dirty ring size in dirty_log_test In the dirty ring case, we rely on vcpu exit due to full dirty ring state. On ARM64 system, there are 4096 host pages when the host page size is 64KB. In this case, the vcpu never exits due to the full dirty ring state. The similar case is 4KB page size on host and 64KB page size on guest. The vcpu corrupts same set of host pages, but the dirty page information isn't collected in the main thread. This leads to infinite loop as the following log shows. # ./dirty_log_test -M dirty-ring -c 65536 -m 5 Setting log mode to: 'dirty-ring' Test iterations: 32, interval: 10 (ms) Testing guest mode: PA-bits:40, VA-bits:48, 4K pages guest physical test memory offset: 0xffbffe0000 vcpu stops because vcpu is kicked out... Notifying vcpu to continue vcpu continues now. Iteration 1 collected 576 pages Fix the issue by automatically choosing the best dirty ring size, to ensure vcpu exit due to full dirty ring state. The option '-c' becomes a hint to the dirty ring count, instead of the value of it. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-8-gshan@redhat.com --- tools/testing/selftests/kvm/dirty_log_test.c | 26 +++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8758c10ec850..a87e5f78ebf1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -24,6 +24,9 @@ #include "guest_modes.h" #include "processor.h" +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 @@ -273,6 +276,24 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { + uint64_t pages; + uint32_t limit; + + /* + * We rely on vcpu exit due to full dirty ring state. Adjust + * the ring buffer size to ensure we're able to reach the + * full dirty ring state. + */ + pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3; + pages = vm_adjust_num_guest_pages(vm->mode, pages); + if (vm->page_size < getpagesize()) + pages = vm_num_host_pages(vm->mode, pages); + + limit = 1 << (31 - __builtin_clz(pages)); + test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count)); + test_dirty_ring_count = min(limit, test_dirty_ring_count); + pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count); + /* * Switch to dirty ring mode after VM creation but before any * of the vcpu creation. @@ -685,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, return vm; } -#define DIRTY_MEM_BITS 30 /* 1G */ -#define PAGE_SHIFT_4K 12 - struct test_params { unsigned long iterations; unsigned long interval; @@ -830,7 +848,7 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); - printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" -c: hint to dirty ring size, in number of entries\n"); printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", From 8dab99c9eab3162bfb4326c35579a3388dbf68f2 Mon Sep 17 00:00:00 2001 From: Guillaume La Roque Date: Mon, 7 Nov 2022 18:29:21 +0100 Subject: [PATCH 1259/4122] gpio: davinci: add support of module build Added module build support for the davinci gpio driver Signed-off-by: Guillaume La Roque Signed-off-by: Nicolas Frayer Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-davinci.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 4bfedb0109a7..ec7cfd4f52b1 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -228,7 +228,7 @@ config GPIO_CLPS711X Say yes here to support GPIO on CLPS711X SoCs. config GPIO_DAVINCI - bool "TI Davinci/Keystone GPIO support" + tristate "TI Davinci/Keystone GPIO support" default y if ARCH_DAVINCI depends on (ARM || ARM64) && (ARCH_DAVINCI || ARCH_KEYSTONE || ARCH_K3) help diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 1018860c83c2..fa51a91afa54 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -727,3 +727,14 @@ static int __init davinci_gpio_drv_reg(void) return platform_driver_register(&davinci_gpio_driver); } postcore_initcall(davinci_gpio_drv_reg); + +static void __exit davinci_gpio_exit(void) +{ + platform_driver_unregister(&davinci_gpio_driver); +} +module_exit(davinci_gpio_exit); + +MODULE_AUTHOR("Jan Kotas "); +MODULE_DESCRIPTION("DAVINCI GPIO driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:gpio-davinci"); From dfc7a7769ab7f2a2f629c673717ef1fa7b63aa42 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:31 +0000 Subject: [PATCH 1260/4122] KVM: arm64: Combine visitor arguments into a context structure Passing new arguments by value to the visitor callbacks is extremely inflexible for stuffing new parameters used by only some of the visitors. Use a context structure instead and pass the pointer through to the visitor callback. While at it, redefine the 'flags' parameter to the visitor to contain the bit indicating the phase of the walk. Pass the entire set of flags through the context structure such that the walker can communicate additional state to the visitor callback. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 15 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 10 +- arch/arm64/kvm/hyp/nvhe/setup.c | 16 +- arch/arm64/kvm/hyp/pgtable.c | 269 +++++++++++++------------- 4 files changed, 154 insertions(+), 156 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe..607f9bb8aab4 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -199,10 +199,17 @@ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_TABLE_POST = BIT(2), }; -typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg); +struct kvm_pgtable_visit_ctx { + kvm_pte_t *ptep; + void *arg; + u64 addr; + u64 end; + u32 level; + enum kvm_pgtable_walk_flags flags; +}; + +typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit); /** * struct kvm_pgtable_walker - Hook into a page-table walk. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 1e78acf9662e..8f5b6a36a039 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -417,13 +417,11 @@ struct check_walk_data { enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); }; -static int __check_page_state_visitor(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct check_walk_data *d = arg; - kvm_pte_t pte = *ptep; + struct check_walk_data *d = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e8d4ea2fcfa0..a293cf5eba1b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -186,15 +186,13 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ptep; + kvm_pte_t pte = *ctx->ptep; phys_addr_t phys; if (!kvm_pte_valid(pte)) @@ -205,11 +203,11 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, * was unable to access the hyp_vmemmap and so the buddy allocator has * initialised the refcount to '1'. */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) + mm_ops->get_page(ctx->ptep); + if (visit != KVM_PGTABLE_WALK_LEAF) return 0; - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; phys = kvm_pte_to_phys(pte); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be1..900c8b9c0cfc 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -64,20 +64,20 @@ static bool kvm_phys_is_valid(u64 phys) return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); } -static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) { - u64 granule = kvm_granule_size(level); + u64 granule = kvm_granule_size(ctx->level); - if (!kvm_level_supports_block_mapping(level)) + if (!kvm_level_supports_block_mapping(ctx->level)) return false; - if (granule > (end - addr)) + if (granule > (ctx->end - ctx->addr)) return false; if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) return false; - return IS_ALIGNED(addr, granule); + return IS_ALIGNED(ctx->addr, granule); } static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) @@ -172,12 +172,12 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); } -static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, - u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag) +static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, + const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; - return walker->cb(addr, data->end, level, ptep, flag, walker->arg); + return walker->cb(ctx, visit); } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, @@ -186,20 +186,24 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pte_t *ptep, u32 level) { + enum kvm_pgtable_walk_flags flags = data->walker->flags; + struct kvm_pgtable_visit_ctx ctx = { + .ptep = ptep, + .arg = data->walker->arg, + .addr = data->addr, + .end = data->end, + .level = level, + .flags = flags, + }; int ret = 0; - u64 addr = data->addr; kvm_pte_t *childp, pte = *ptep; bool table = kvm_pte_table(pte, level); - enum kvm_pgtable_walk_flags flags = data->walker->flags; - if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_PRE); - } + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); - if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_LEAF); + if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); pte = *ptep; table = kvm_pte_table(pte, level); } @@ -218,10 +222,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, if (ret) goto out; - if (flags & KVM_PGTABLE_WALK_TABLE_POST) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_POST); - } + if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); out: return ret; @@ -292,13 +294,13 @@ struct leaf_walk_data { u32 level; }; -static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct leaf_walk_data *data = arg; + struct leaf_walk_data *data = ctx->arg; - data->pte = *ptep; - data->level = level; + data->pte = *ctx->ptep; + data->level = ctx->level; return 0; } @@ -383,47 +385,47 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, struct hyp_map_data *data) +static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, + struct hyp_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new, old = *ctx->ptep; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; - if (!kvm_block_mapping_supported(addr, end, phys, level)) + if (!kvm_block_mapping_supported(ctx, phys)) return false; data->phys += granule; - new = kvm_init_valid_leaf_pte(phys, data->attr, level); + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); if (old == new) return true; if (!kvm_pte_valid(old)) - data->mm_ops->get_page(ptep); + data->mm_ops->get_page(ctx->ptep); else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; - smp_store_release(ptep, new); + smp_store_release(ctx->ptep, new); return true; } -static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { kvm_pte_t *childp; - struct hyp_map_data *data = arg; + struct hyp_map_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) + if (hyp_map_walker_try_leaf(ctx, data)) return 0; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); if (!childp) return -ENOMEM; - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + kvm_set_table_pte(ctx->ptep, childp, mm_ops); + mm_ops->get_page(ctx->ptep); return 0; } @@ -456,39 +458,39 @@ struct hyp_unmap_data { struct kvm_pgtable_mm_ops *mm_ops; }; -static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep, *childp = NULL; - u64 granule = kvm_granule_size(level); - struct hyp_unmap_data *data = arg; + kvm_pte_t pte = *ctx->ptep, *childp = NULL; + u64 granule = kvm_granule_size(ctx->level); + struct hyp_unmap_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return -EINVAL; - if (kvm_pte_table(pte, level)) { + if (kvm_pte_table(pte, ctx->level)) { childp = kvm_pte_follow(pte, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); } else { - if (end - addr < granule) + if (ctx->end - ctx->addr < granule) return -EINVAL; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); data->unmapped += granule; } dsb(ish); isb(); - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); if (childp) mm_ops->put_page(childp); @@ -532,18 +534,18 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, return 0; } -static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (!kvm_pte_valid(pte)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) + if (kvm_pte_table(pte, ctx->level)) mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); return 0; @@ -682,19 +684,19 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } -static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, - u32 level, struct kvm_pgtable_mm_ops *mm_ops) +static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { /* * Clear the existing PTE, and perform break-before-make with * TLB maintenance if it was valid. */ - if (kvm_pte_valid(*ptep)) { - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + if (kvm_pte_valid(*ctx->ptep)) { + kvm_clear_pte(ctx->ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); } - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); } static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) @@ -708,29 +710,28 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } -static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, +static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1))) + if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) return false; - return kvm_block_mapping_supported(addr, end, data->phys, level); + return kvm_block_mapping_supported(ctx, data->phys); } -static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new, old = *ctx->ptep; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return -E2BIG; if (kvm_phys_is_valid(phys)) - new = kvm_init_valid_leaf_pte(phys, data->attr, level); + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); else new = kvm_init_invalid_leaf_owner(data->owner_id); @@ -744,7 +745,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!stage2_pte_needs_update(old, new)) return -EAGAIN; - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + stage2_put_pte(ctx, data->mmu, mm_ops); } /* Perform CMOs before installation of the guest stage-2 PTE */ @@ -755,26 +756,25 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ptep, new); + smp_store_release(ctx->ptep, new); if (stage2_pte_is_counted(new)) - mm_ops->get_page(ptep); + mm_ops->get_page(ctx->ptep); if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; } -static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { if (data->anchor) return 0; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(*ptep, data->mm_ops); - kvm_clear_pte(ptep); + data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops); + kvm_clear_pte(ctx->ptep); /* * Invalidate the whole stage-2, as we may have numerous leaf @@ -782,29 +782,29 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, * individually. */ kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - data->anchor = ptep; + data->anchor = ctx->ptep; return 0; } -static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, +static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp, pte = *ptep; + kvm_pte_t *childp, pte = *ctx->ptep; int ret; if (data->anchor) { if (stage2_pte_is_counted(pte)) - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); return 0; } - ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); + ret = stage2_map_walker_try_leaf(ctx, data); if (ret != -E2BIG) return ret; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; if (!data->memcache) @@ -820,16 +820,15 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * will be mapped lazily. */ if (stage2_pte_is_counted(pte)) - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + stage2_put_pte(ctx, data->mmu, mm_ops); - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + kvm_set_table_pte(ctx->ptep, childp, mm_ops); + mm_ops->get_page(ctx->ptep); return 0; } -static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; @@ -839,17 +838,17 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, if (!data->anchor) return 0; - if (data->anchor == ptep) { + if (data->anchor == ctx->ptep) { childp = data->childp; data->anchor = NULL; data->childp = NULL; - ret = stage2_map_walk_leaf(addr, end, level, ptep, data); + ret = stage2_map_walk_leaf(ctx, data); } else { - childp = kvm_pte_follow(*ptep, mm_ops); + childp = kvm_pte_follow(*ctx->ptep, mm_ops); } mm_ops->put_page(childp); - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); return ret; } @@ -873,18 +872,18 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, * the page-table, installing the block entry when it revisits the anchor * pointer and clearing the anchor to NULL. */ -static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct stage2_map_data *data = arg; + struct stage2_map_data *data = ctx->arg; - switch (flag) { + switch (visit) { case KVM_PGTABLE_WALK_TABLE_PRE: - return stage2_map_walk_table_pre(addr, end, level, ptep, data); + return stage2_map_walk_table_pre(ctx, data); case KVM_PGTABLE_WALK_LEAF: - return stage2_map_walk_leaf(addr, end, level, ptep, data); + return stage2_map_walk_leaf(ctx, data); case KVM_PGTABLE_WALK_TABLE_POST: - return stage2_map_walk_table_post(addr, end, level, ptep, data); + return stage2_map_walk_table_post(ctx, data); } return -EINVAL; @@ -949,25 +948,24 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep, *childp = NULL; + kvm_pte_t pte = *ctx->ptep, *childp = NULL; bool need_flush = false; if (!kvm_pte_valid(pte)) { if (stage2_pte_is_counted(pte)) { - kvm_clear_pte(ptep); - mm_ops->put_page(ptep); + kvm_clear_pte(ctx->ptep); + mm_ops->put_page(ctx->ptep); } return 0; } - if (kvm_pte_table(pte, level)) { + if (kvm_pte_table(pte, ctx->level)) { childp = kvm_pte_follow(pte, mm_ops); if (mm_ops->page_count(childp) != 1) @@ -981,11 +979,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ptep, mmu, addr, level, mm_ops); + stage2_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_granule_size(ctx->level)); if (childp) mm_ops->put_page(childp); @@ -1012,18 +1010,17 @@ struct stage2_attr_data { struct kvm_pgtable_mm_ops *mm_ops; }; -static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep; - struct stage2_attr_data *data = arg; + kvm_pte_t pte = *ctx->ptep; + struct stage2_attr_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; - data->level = level; + data->level = ctx->level; data->pte = pte; pte &= ~data->attr_clr; pte |= data->attr_set; @@ -1039,10 +1036,10 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * stage-2 PTE if we are going to add executable permission. */ if (mm_ops->icache_inval_pou && - stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); - WRITE_ONCE(*ptep, pte); + kvm_granule_size(ctx->level)); + WRITE_ONCE(*ctx->ptep, pte); } return 0; @@ -1140,20 +1137,19 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, return ret; } -static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep; + kvm_pte_t pte = *ctx->ptep; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; if (mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_granule_size(ctx->level)); return 0; } @@ -1200,19 +1196,18 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } -static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + kvm_pte_t pte = *ctx->ptep; if (!stage2_pte_is_counted(pte)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) + if (kvm_pte_table(pte, ctx->level)) mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); return 0; From 83844a2317ecad935f6735abd854e4bf3f757040 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:32 +0000 Subject: [PATCH 1261/4122] KVM: arm64: Stash observed pte value in visitor context Rather than reading the ptep all over the shop, read the ptep once from __kvm_pgtable_visit() and stick it in the visitor context. Reread the ptep after visiting a leaf in case the callback installed a new table underneath. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 5 +- arch/arm64/kvm/hyp/nvhe/setup.c | 7 +-- arch/arm64/kvm/hyp/pgtable.c | 86 +++++++++++++-------------- 4 files changed, 48 insertions(+), 51 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 607f9bb8aab4..14d4b68a1e92 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -201,6 +201,7 @@ enum kvm_pgtable_walk_flags { struct kvm_pgtable_visit_ctx { kvm_pte_t *ptep; + kvm_pte_t old; void *arg; u64 addr; u64 end; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8f5b6a36a039..d21d1b08a055 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -421,12 +421,11 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct check_walk_data *d = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + if (kvm_pte_valid(ctx->old) && !addr_is_memory(kvm_pte_to_phys(ctx->old))) return -EINVAL; - return d->get_page_state(pte) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a293cf5eba1b..6af443c9d78e 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -192,10 +192,9 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ctx->ptep; phys_addr_t phys; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; /* @@ -210,7 +209,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; - phys = kvm_pte_to_phys(pte); + phys = kvm_pte_to_phys(ctx->old); if (!addr_is_memory(phys)) return -EINVAL; @@ -218,7 +217,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx * Adjust the host stage-2 mappings to match the ownership attributes * configured in the hypervisor stage-1. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); switch (state) { case PKVM_PAGE_OWNED: return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 900c8b9c0cfc..fb3696b3a997 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -189,6 +189,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, enum kvm_pgtable_walk_flags flags = data->walker->flags; struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, + .old = READ_ONCE(*ptep), .arg = data->walker->arg, .addr = data->addr, .end = data->end, @@ -196,16 +197,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .flags = flags, }; int ret = 0; - kvm_pte_t *childp, pte = *ptep; - bool table = kvm_pte_table(pte, level); + kvm_pte_t *childp; + bool table = kvm_pte_table(ctx.old, level); if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); - pte = *ptep; - table = kvm_pte_table(pte, level); + ctx.old = READ_ONCE(*ptep); + table = kvm_pte_table(ctx.old, level); } if (ret) @@ -217,7 +218,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(pte, data->pgt->mm_ops); + childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops); ret = __kvm_pgtable_walk(data, childp, level + 1); if (ret) goto out; @@ -299,7 +300,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct leaf_walk_data *data = ctx->arg; - data->pte = *ctx->ptep; + data->pte = ctx->old; data->level = ctx->level; return 0; @@ -388,7 +389,7 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct hyp_map_data *data) { - kvm_pte_t new, old = *ctx->ptep; + kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; if (!kvm_block_mapping_supported(ctx, phys)) @@ -396,11 +397,11 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); - if (old == new) + if (ctx->old == new) return true; - if (!kvm_pte_valid(old)) + if (!kvm_pte_valid(ctx->old)) data->mm_ops->get_page(ctx->ptep); - else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; smp_store_release(ctx->ptep, new); @@ -461,16 +462,16 @@ struct hyp_unmap_data { static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ctx->ptep, *childp = NULL; + kvm_pte_t *childp = NULL; u64 granule = kvm_granule_size(ctx->level); struct hyp_unmap_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return -EINVAL; - if (kvm_pte_table(pte, ctx->level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; @@ -538,15 +539,14 @@ static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, ctx->level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } @@ -691,7 +691,7 @@ static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s * Clear the existing PTE, and perform break-before-make with * TLB maintenance if it was valid. */ - if (kvm_pte_valid(*ctx->ptep)) { + if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); } @@ -722,7 +722,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - kvm_pte_t new, old = *ctx->ptep; + kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; @@ -735,14 +735,14 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, else new = kvm_init_invalid_leaf_owner(data->owner_id); - if (stage2_pte_is_counted(old)) { + if (stage2_pte_is_counted(ctx->old)) { /* * Skip updating the PTE if we are trying to recreate the exact * same mapping or only change the access permissions. Instead, * the vCPU will exit one more time from guest if still needed * and then go through the path of relaxing permissions. */ - if (!stage2_pte_needs_update(old, new)) + if (!stage2_pte_needs_update(ctx->old, new)) return -EAGAIN; stage2_put_pte(ctx, data->mmu, mm_ops); @@ -773,7 +773,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops); + data->childp = kvm_pte_follow(ctx->old, data->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -790,11 +790,11 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp, pte = *ctx->ptep; + kvm_pte_t *childp; int ret; if (data->anchor) { - if (stage2_pte_is_counted(pte)) + if (stage2_pte_is_counted(ctx->old)) mm_ops->put_page(ctx->ptep); return 0; @@ -819,7 +819,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (stage2_pte_is_counted(pte)) + if (stage2_pte_is_counted(ctx->old)) stage2_put_pte(ctx, data->mmu, mm_ops); kvm_set_table_pte(ctx->ptep, childp, mm_ops); @@ -844,7 +844,7 @@ static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, data->childp = NULL; ret = stage2_map_walk_leaf(ctx, data); } else { - childp = kvm_pte_follow(*ctx->ptep, mm_ops); + childp = kvm_pte_follow(ctx->old, mm_ops); } mm_ops->put_page(childp); @@ -954,23 +954,23 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ctx->ptep, *childp = NULL; + kvm_pte_t *childp = NULL; bool need_flush = false; - if (!kvm_pte_valid(pte)) { - if (stage2_pte_is_counted(pte)) { + if (!kvm_pte_valid(ctx->old)) { + if (stage2_pte_is_counted(ctx->old)) { kvm_clear_pte(ctx->ptep); mm_ops->put_page(ctx->ptep); } return 0; } - if (kvm_pte_table(pte, ctx->level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - } else if (stage2_pte_cacheable(pgt, pte)) { + } else if (stage2_pte_cacheable(pgt, ctx->old)) { need_flush = !stage2_has_fwb(pgt); } @@ -982,7 +982,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, stage2_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), kvm_granule_size(ctx->level)); if (childp) @@ -1013,11 +1013,11 @@ struct stage2_attr_data { static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ctx->ptep; + kvm_pte_t pte = ctx->old; struct stage2_attr_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; data->level = ctx->level; @@ -1036,7 +1036,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, * stage-2 PTE if we are going to add executable permission. */ if (mm_ops->icache_inval_pou && - stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep)) + stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), kvm_granule_size(ctx->level)); WRITE_ONCE(*ctx->ptep, pte); @@ -1142,13 +1142,12 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ctx->ptep; - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) + if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) return 0; if (mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), kvm_granule_size(ctx->level)); return 0; } @@ -1200,15 +1199,14 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; - kvm_pte_t pte = *ctx->ptep; - if (!stage2_pte_is_counted(pte)) + if (!stage2_pte_is_counted(ctx->old)) return 0; mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, ctx->level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } From 2a611c7f87f26cca405da63a57f06d0e4dc14240 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:33 +0000 Subject: [PATCH 1262/4122] KVM: arm64: Pass mm_ops through the visitor context As a prerequisite for getting visitors off of struct kvm_pgtable, pass mm_ops through the visitor context. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-4-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/nvhe/setup.c | 3 +- arch/arm64/kvm/hyp/pgtable.c | 63 +++++++++++----------------- 3 files changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 14d4b68a1e92..a752793482cb 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -203,6 +203,7 @@ struct kvm_pgtable_visit_ctx { kvm_pte_t *ptep; kvm_pte_t old; void *arg; + struct kvm_pgtable_mm_ops *mm_ops; u64 addr; u64 end; u32 level; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 6af443c9d78e..1068338d77f3 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -189,7 +189,7 @@ static void hpool_put_page(void *addr) static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; enum kvm_pgtable_prot prot; enum pkvm_page_state state; phys_addr_t phys; @@ -239,7 +239,6 @@ static int finalize_host_mappings(void) struct kvm_pgtable_walker walker = { .cb = finalize_host_mappings_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, }; int i, ret; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index fb3696b3a997..db25e81a9890 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -181,9 +181,10 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *ptep, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; @@ -191,6 +192,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .ptep = ptep, .old = READ_ONCE(*ptep), .arg = data->walker->arg, + .mm_ops = mm_ops, .addr = data->addr, .end = data->end, .level = level, @@ -218,8 +220,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops); - ret = __kvm_pgtable_walk(data, childp, level + 1); + childp = kvm_pte_follow(ctx.old, mm_ops); + ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); if (ret) goto out; @@ -231,7 +233,7 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level) { u32 idx; int ret = 0; @@ -245,7 +247,7 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, if (data->addr >= data->end) break; - ret = __kvm_pgtable_visit(data, ptep, level); + ret = __kvm_pgtable_visit(data, mm_ops, ptep, level); if (ret) break; } @@ -269,7 +271,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; - ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); + ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); if (ret) break; } @@ -332,7 +334,6 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, struct hyp_map_data { u64 phys; kvm_pte_t attr; - struct kvm_pgtable_mm_ops *mm_ops; }; static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) @@ -400,7 +401,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (ctx->old == new) return true; if (!kvm_pte_valid(ctx->old)) - data->mm_ops->get_page(ctx->ptep); + ctx->mm_ops->get_page(ctx->ptep); else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; @@ -413,7 +414,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, { kvm_pte_t *childp; struct hyp_map_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (hyp_map_walker_try_leaf(ctx, data)) return 0; @@ -436,7 +437,6 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int ret; struct hyp_map_data map_data = { .phys = ALIGN_DOWN(phys, PAGE_SIZE), - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = hyp_map_walker, @@ -454,18 +454,13 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } -struct hyp_unmap_data { - u64 unmapped; - struct kvm_pgtable_mm_ops *mm_ops; -}; - static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { kvm_pte_t *childp = NULL; u64 granule = kvm_granule_size(ctx->level); - struct hyp_unmap_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + u64 *unmapped = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return -EINVAL; @@ -486,7 +481,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); - data->unmapped += granule; + *unmapped += granule; } dsb(ish); @@ -501,12 +496,10 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { - struct hyp_unmap_data unmap_data = { - .mm_ops = pgt->mm_ops, - }; + u64 unmapped = 0; struct kvm_pgtable_walker walker = { .cb = hyp_unmap_walker, - .arg = &unmap_data, + .arg = &unmapped, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; @@ -514,7 +507,7 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) return 0; kvm_pgtable_walk(pgt, addr, size, &walker); - return unmap_data.unmapped; + return unmapped; } int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, @@ -538,7 +531,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return 0; @@ -556,7 +549,6 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); @@ -575,8 +567,6 @@ struct stage2_map_data { struct kvm_s2_mmu *mmu; void *memcache; - struct kvm_pgtable_mm_ops *mm_ops; - /* Force mappings to page granularity */ bool force_pte; }; @@ -725,7 +715,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new; u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!stage2_leaf_mapping_allowed(ctx, data)) return -E2BIG; @@ -773,7 +763,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(ctx->old, data->mm_ops); + data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -789,7 +779,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp; int ret; @@ -831,7 +821,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp; int ret = 0; @@ -898,7 +888,6 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = ALIGN_DOWN(phys, PAGE_SIZE), .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), }; struct kvm_pgtable_walker walker = { @@ -929,7 +918,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = KVM_PHYS_INVALID, .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .owner_id = owner_id, .force_pte = true, }; @@ -953,7 +941,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, { struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; - struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; kvm_pte_t *childp = NULL; bool need_flush = false; @@ -1007,7 +995,6 @@ struct stage2_attr_data { kvm_pte_t attr_clr; kvm_pte_t pte; u32 level; - struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -1015,7 +1002,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, { kvm_pte_t pte = ctx->old; struct stage2_attr_data *data = ctx->arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) return 0; @@ -1055,7 +1042,6 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, @@ -1198,7 +1184,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!stage2_pte_is_counted(ctx->old)) return 0; @@ -1218,7 +1204,6 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); From fa002e8e79b3f980455ba585c1f47b26680de5b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:34 +0000 Subject: [PATCH 1263/4122] KVM: arm64: Don't pass kvm_pgtable through kvm_pgtable_walk_data In order to tear down page tables from outside the context of kvm_pgtable (such as an RCU callback), stop passing a pointer through kvm_pgtable_walk_data. No functional change intended. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-5-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index db25e81a9890..93989b750a26 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -50,7 +50,6 @@ #define KVM_MAX_OWNER_ID 1 struct kvm_pgtable_walk_data { - struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; u64 addr; @@ -88,7 +87,7 @@ static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) return (data->addr >> shift) & mask; } -static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) { u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ u64 mask = BIT(pgt->ia_bits) - 1; @@ -96,11 +95,6 @@ static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) return (addr & mask) >> shift; } -static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) -{ - return __kvm_pgd_page_idx(data->pgt, data->addr); -} - static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) { struct kvm_pgtable pgt = { @@ -108,7 +102,7 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) .start_level = start_level, }; - return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; + return kvm_pgd_page_idx(&pgt, -1ULL) + 1; } static bool kvm_pte_table(kvm_pte_t pte, u32 level) @@ -255,11 +249,10 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return ret; } -static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) +static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data) { u32 idx; int ret = 0; - struct kvm_pgtable *pgt = data->pgt; u64 limit = BIT(pgt->ia_bits); if (data->addr > limit || data->end > limit) @@ -268,7 +261,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) if (!pgt->pgd) return -EINVAL; - for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { + for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); @@ -283,13 +276,12 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker) { struct kvm_pgtable_walk_data walk_data = { - .pgt = pgt, .addr = ALIGN_DOWN(addr, PAGE_SIZE), .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; - return _kvm_pgtable_walk(&walk_data); + return _kvm_pgtable_walk(pgt, &walk_data); } struct leaf_walk_data { From 8e94e1252cc054bb31fd3e9a15235cd831970ec1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:35 +0000 Subject: [PATCH 1264/4122] KVM: arm64: Add a helper to tear down unlinked stage-2 subtrees A subsequent change to KVM will move the tear down of an unlinked stage-2 subtree out of the critical path of the break-before-make sequence. Introduce a new helper for tearing down unlinked stage-2 subtrees. Leverage the existing stage-2 free walkers to do so, with a deep call into __kvm_pgtable_walk() as the subtree is no longer reachable from the root. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-6-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++ arch/arm64/kvm/hyp/pgtable.c | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index a752793482cb..93b1feeaebab 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -333,6 +333,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, */ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +/** + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure. + * @mm_ops: Memory management callbacks. + * @pgtable: Unlinked stage-2 paging structure to be freed. + * @level: Level of the stage-2 paging structure to be freed. + * + * The page-table is assumed to be unreachable by any hardware walkers prior to + * freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); + /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 93989b750a26..363a5cce7e1a 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1203,3 +1203,26 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } + +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) +{ + kvm_pte_t *ptep = (kvm_pte_t *)pgtable; + struct kvm_pgtable_walker walker = { + .cb = stage2_free_walker, + .flags = KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + }; + struct kvm_pgtable_walk_data data = { + .walker = &walker, + + /* + * At this point the IPA really doesn't matter, as the page + * table being traversed has already been removed from the stage + * 2. Set an appropriate range to cover the entire page table. + */ + .addr = 0, + .end = kvm_granule_size(level), + }; + + WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level)); +} From 6b91b8f95cadd3441c056182daf9024475ac4a91 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:36 +0000 Subject: [PATCH 1265/4122] KVM: arm64: Use an opaque type for pteps Use an opaque type for pteps and require visitors explicitly dereference the pointer before using. Protecting page table memory with RCU requires that KVM dereferences RCU-annotated pointers before using. However, RCU is not available for use in the nVHE hypervisor and the opaque type can be conditionally annotated with RCU for the stage-2 MMU. Call the type a 'pteref' to avoid a naming collision with raw pteps. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-7-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 9 ++++++++- arch/arm64/kvm/hyp/pgtable.c | 27 ++++++++++++++------------- arch/arm64/kvm/mmu.c | 2 +- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 93b1feeaebab..cbd2851eefc1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) +{ + return pteref; +} + #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -175,7 +182,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, struct kvm_pgtable { u32 ia_bits; u32 start_level; - kvm_pte_t *pgd; + kvm_pteref_t pgd; struct kvm_pgtable_mm_ops *mm_ops; /* Stage-2 only */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 363a5cce7e1a..7511494537e5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -175,13 +175,14 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, - kvm_pte_t *ptep, u32 level) + kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; + kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -193,7 +194,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .flags = flags, }; int ret = 0; - kvm_pte_t *childp; + kvm_pteref_t childp; bool table = kvm_pte_table(ctx.old, level); if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) @@ -214,7 +215,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(ctx.old, mm_ops); + childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); if (ret) goto out; @@ -227,7 +228,7 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) { u32 idx; int ret = 0; @@ -236,12 +237,12 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return -EINVAL; for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { - kvm_pte_t *ptep = &pgtable[idx]; + kvm_pteref_t pteref = &pgtable[idx]; if (data->addr >= data->end) break; - ret = __kvm_pgtable_visit(data, mm_ops, ptep, level); + ret = __kvm_pgtable_visit(data, mm_ops, pteref, level); if (ret) break; } @@ -262,9 +263,9 @@ static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_da return -EINVAL; for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { - kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; + kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE]; - ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level); + ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level); if (ret) break; } @@ -507,7 +508,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, { u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); - pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; @@ -544,7 +545,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - pgt->mm_ops->put_page(pgt->pgd); + pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false)); pgt->pgd = NULL; } @@ -1157,7 +1158,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); if (!pgt->pgd) return -ENOMEM; @@ -1200,7 +1201,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz); pgt->pgd = NULL; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..5e197ae190ef 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -640,7 +640,7 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = { static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { - .pgd = (kvm_pte_t *)kvm->mm->pgd, + .pgd = (kvm_pteref_t)kvm->mm->pgd, .ia_bits = VA_BITS, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), From 5c359cca1faf6d7671537fe1c240e8668467864d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:37 +0000 Subject: [PATCH 1266/4122] KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make The break-before-make sequence is a bit annoying as it opens a window wherein memory is unmapped from the guest. KVM should replace the PTE as quickly as possible and avoid unnecessary work in between. Presently, the stage-2 map walker tears down a removed table before installing a block mapping when coalescing a table into a block. As the removed table is no longer visible to hardware walkers after the DSB+TLBI, it is possible to move the remaining cleanup to happen after installing the new PTE. Reshuffle the stage-2 map walker to install the new block entry in the pre-order callback. Unwire all of the teardown logic and replace it with a call to kvm_pgtable_stage2_free_removed() after fixing the PTE. The post-order visitor is now completely unnecessary, so drop it. Finally, touch up the comments to better represent the now simplified map walker. Note that the call to tear down the unlinked stage-2 is indirected as a subsequent change will use an RCU callback to trigger tear down. RCU is not available to pKVM, so there is a need to use different implementations on pKVM and non-pKVM VMs. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-8-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 3 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 6 ++ arch/arm64/kvm/hyp/pgtable.c | 85 +++++++-------------------- arch/arm64/kvm/mmu.c | 8 +++ 4 files changed, 39 insertions(+), 63 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index cbd2851eefc1..e70cf57b719e 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -92,6 +92,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. + * @free_removed_table: Free a removed paging structure by unlinking and + * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the * refcount reaches 0 the page is automatically @@ -110,6 +112,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); + void (*free_removed_table)(void *addr, u32 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d21d1b08a055..735769886b55 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -79,6 +79,11 @@ static void host_s2_put_page(void *addr) hyp_put_page(&host_s2_pool, addr); } +static void host_s2_free_removed_table(void *addr, u32 level) +{ + kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@ -93,6 +98,7 @@ static int prepare_s2_pool(void *pgt_pool_base) host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, + .free_removed_table = host_s2_free_removed_table, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 7511494537e5..7c9782347570 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -750,13 +750,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->anchor) - return 0; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); + int ret; if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops); kvm_clear_pte(ctx->ptep); /* @@ -765,8 +765,13 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, * individually. */ kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - data->anchor = ctx->ptep; - return 0; + + ret = stage2_map_walker_try_leaf(ctx, data); + + mm_ops->put_page(ctx->ptep); + mm_ops->free_removed_table(childp, ctx->level); + + return ret; } static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, @@ -776,13 +781,6 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t *childp; int ret; - if (data->anchor) { - if (stage2_pte_is_counted(ctx->old)) - mm_ops->put_page(ctx->ptep); - - return 0; - } - ret = stage2_map_walker_try_leaf(ctx, data); if (ret != -E2BIG) return ret; @@ -811,49 +809,14 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx, - struct stage2_map_data *data) -{ - struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - kvm_pte_t *childp; - int ret = 0; - - if (!data->anchor) - return 0; - - if (data->anchor == ctx->ptep) { - childp = data->childp; - data->anchor = NULL; - data->childp = NULL; - ret = stage2_map_walk_leaf(ctx, data); - } else { - childp = kvm_pte_follow(ctx->old, mm_ops); - } - - mm_ops->put_page(childp); - mm_ops->put_page(ctx->ptep); - - return ret; -} - /* - * This is a little fiddly, as we use all three of the walk flags. The idea - * is that the TABLE_PRE callback runs for table entries on the way down, - * looking for table entries which we could conceivably replace with a - * block entry for this mapping. If it finds one, then it sets the 'anchor' - * field in 'struct stage2_map_data' to point at the table entry, before - * clearing the entry to zero and descending into the now detached table. + * The TABLE_PRE callback runs for table entries on the way down, looking + * for table entries which we could conceivably replace with a block entry + * for this mapping. If it finds one it replaces the entry and calls + * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table. * - * The behaviour of the LEAF callback then depends on whether or not the - * anchor has been set. If not, then we're not using a block mapping higher - * up the table and we perform the mapping at the existing leaves instead. - * If, on the other hand, the anchor _is_ set, then we drop references to - * all valid leaves so that the pages beneath the anchor can be freed. - * - * Finally, the TABLE_POST callback does nothing if the anchor has not - * been set, but otherwise frees the page-table pages while walking back up - * the page-table, installing the block entry when it revisits the anchor - * pointer and clearing the anchor to NULL. + * Otherwise, the LEAF callback performs the mapping at the existing leaves + * instead. */ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) @@ -865,11 +828,9 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, return stage2_map_walk_table_pre(ctx, data); case KVM_PGTABLE_WALK_LEAF: return stage2_map_walk_leaf(ctx, data); - case KVM_PGTABLE_WALK_TABLE_POST: - return stage2_map_walk_table_post(ctx, data); + default: + return -EINVAL; } - - return -EINVAL; } int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, @@ -886,8 +847,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -917,8 +877,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -1207,7 +1166,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) { - kvm_pte_t *ptep = (kvm_pte_t *)pgtable; + kvm_pteref_t ptep = (kvm_pteref_t)pgtable; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | @@ -1225,5 +1184,5 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg .end = kvm_granule_size(level), }; - WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level)); + WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 5e197ae190ef..73ae908eb5d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -128,6 +128,13 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) free_pages_exact(virt, size); } +static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; + +static void stage2_free_removed_table(void *addr, u32 level) +{ + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); +} + static void kvm_host_get_page(void *addr) { get_page(virt_to_page(addr)); @@ -662,6 +669,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_s2_zalloc_pages_exact, .free_pages_exact = kvm_s2_free_pages_exact, + .free_removed_table = stage2_free_removed_table, .get_page = kvm_host_get_page, .put_page = kvm_s2_put_page, .page_count = kvm_host_page_count, From c3119ae45dfb6038ca458ab5ba7a9fba2810845b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:38 +0000 Subject: [PATCH 1267/4122] KVM: arm64: Protect stage-2 traversal with RCU Use RCU to safely walk the stage-2 page tables in parallel. Acquire and release the RCU read lock when traversing the page tables. Defer the freeing of table memory to an RCU callback. Indirect the calls into RCU and provide stubs for hypervisor code, as RCU is not available in such a context. The RCU protection doesn't amount to much at the moment, as readers are already protected by the read-write lock (all walkers that free table memory take the write lock). Nonetheless, a subsequent change will futher relax the locking requirements around the stage-2 MMU, thereby depending on RCU. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-9-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 10 +++++- arch/arm64/kvm/mmu.c | 14 +++++++- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e70cf57b719e..7634b6964779 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + typedef kvm_pte_t *kvm_pteref_t; static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared return pteref; } +static inline void kvm_pgtable_walk_begin(void) {} +static inline void kvm_pgtable_walk_end(void) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) +{ + return rcu_dereference_check(pteref, !shared); +} + +static inline void kvm_pgtable_walk_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_pgtable_walk_end(void) +{ + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -202,11 +243,14 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), }; struct kvm_pgtable_visit_ctx { @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx { typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit); +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} + /** * struct kvm_pgtable_walker - Hook into a page-table walk. * @cb: Callback function to invoke during the walk. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 7c9782347570..d8d963521d4e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; + + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); return walker->cb(ctx, visit); } @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; + int r; - return _kvm_pgtable_walk(pgt, &walk_data); + kvm_pgtable_walk_begin(); + r = _kvm_pgtable_walk(pgt, &walk_data); + kvm_pgtable_walk_end(); + + return r; } struct leaf_walk_data { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 73ae908eb5d9..52e042399ba5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + void *pgtable = page_to_virt(page); + u32 level = page_private(page); + + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); +} + static void stage2_free_removed_table(void *addr, u32 level) { - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); + struct page *page = virt_to_page(addr); + + set_page_private(page, (unsigned long)level); + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); } static void kvm_host_get_page(void *addr) From ca5de2448c3b4c018fe3d6223df8b59068be1cd7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:39 +0000 Subject: [PATCH 1268/4122] KVM: arm64: Atomically update stage 2 leaf attributes in parallel walks The stage2 attr walker is already used for parallel walks. Since commit f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging"), KVM acquires the read lock when write-unprotecting a PTE. However, the walker only uses a simple store to update the PTE. This is safe as the only possible race is with hardware updates to the access flag, which is benign. However, a subsequent change to KVM will allow more changes to the stage 2 page tables to be done in parallel. Prepare the stage 2 attribute walker by performing atomic updates to the PTE when walking in parallel. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-10-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d8d963521d4e..a34e2050f931 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -185,7 +185,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; - kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false); + kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -675,6 +675,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } +static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + if (!kvm_pgtable_walk_shared(ctx)) { + WRITE_ONCE(*ctx->ptep, new); + return true; + } + + return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; +} + static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) { @@ -986,7 +996,9 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), kvm_granule_size(ctx->level)); - WRITE_ONCE(*ctx->ptep, pte); + + if (!stage2_try_set_pte(ctx, pte)) + return -EAGAIN; } return 0; @@ -995,7 +1007,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level) + u32 *level, enum kvm_pgtable_walk_flags flags) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; @@ -1006,7 +1018,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, .arg = &data, - .flags = KVM_PGTABLE_WALK_LEAF, + .flags = flags | KVM_PGTABLE_WALK_LEAF, }; ret = kvm_pgtable_walk(pgt, addr, size, &walker); @@ -1025,14 +1037,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL); + NULL, NULL, 0); } kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL); + &pte, NULL, 0); dsb(ishst); return pte; } @@ -1041,7 +1053,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, - &pte, NULL); + &pte, NULL, 0); /* * "But where's the TLBI?!", you scream. * "Over in the core code", I sigh. @@ -1054,7 +1066,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0); return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; } @@ -1077,7 +1089,8 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, + KVM_PGTABLE_WALK_SHARED); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); return ret; From 331aa3a0547d1c794587e0df374d13b16645e832 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:56:40 +0000 Subject: [PATCH 1269/4122] KVM: arm64: Split init and set for table PTE Create a helper to initialize a table and directly call smp_store_release() to install it (for now). Prepare for a subsequent change that generalizes PTE writes with a helper. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215644.1895162-11-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a34e2050f931..f4dd77c6c97d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -136,16 +136,13 @@ static void kvm_clear_pte(kvm_pte_t *ptep) WRITE_ONCE(*ptep, 0); } -static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp, - struct kvm_pgtable_mm_ops *mm_ops) +static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops) { - kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); + kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); pte |= KVM_PTE_VALID; - - WARN_ON(kvm_pte_valid(old)); - smp_store_release(ptep, pte); + return pte; } static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) @@ -413,7 +410,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - kvm_pte_t *childp; + kvm_pte_t *childp, new; struct hyp_map_data *data = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; @@ -427,8 +424,10 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!childp) return -ENOMEM; - kvm_set_table_pte(ctx->ptep, childp, mm_ops); + new = kvm_init_table_pte(childp, mm_ops); mm_ops->get_page(ctx->ptep); + smp_store_release(ctx->ptep, new); + return 0; } @@ -796,7 +795,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - kvm_pte_t *childp; + kvm_pte_t *childp, new; int ret; ret = stage2_map_walker_try_leaf(ctx, data); @@ -821,8 +820,9 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (stage2_pte_is_counted(ctx->old)) stage2_put_pte(ctx, data->mmu, mm_ops); - kvm_set_table_pte(ctx->ptep, childp, mm_ops); + new = kvm_init_table_pte(childp, mm_ops); mm_ops->get_page(ctx->ptep); + smp_store_release(ctx->ptep, new); return 0; } From 0ab12f3574db6cb432917a667f9392a88e8f0dfc Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:58:55 +0000 Subject: [PATCH 1270/4122] KVM: arm64: Make block->table PTE changes parallel-aware In order to service stage-2 faults in parallel, stage-2 table walkers must take exclusive ownership of the PTE being worked on. An additional requirement of the architecture is that software must perform a 'break-before-make' operation when changing the block size used for mapping memory. Roll these two concepts together into helpers for performing a 'break-before-make' sequence. Use a special PTE value to indicate a PTE has been locked by a software walker. Additionally, use an atomic compare-exchange to 'break' the PTE when the stage-2 page tables are possibly shared with another software walker. Elide the DSB + TLBI if the evicted PTE was invalid (and thus not subject to break-before-make). All of the atomics do nothing for now, as the stage-2 walker isn't fully ready to perform parallel walks. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215855.1895367-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 80 +++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f4dd77c6c97d..b9f0d792b8d9 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -49,6 +49,12 @@ #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) #define KVM_MAX_OWNER_ID 1 +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) + struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; @@ -674,6 +680,11 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } +static bool stage2_pte_is_locked(kvm_pte_t pte) +{ + return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); +} + static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) { if (!kvm_pgtable_walk_shared(ctx)) { @@ -684,6 +695,64 @@ static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_ return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; } +/** + * stage2_try_break_pte() - Invalidates a pte according to the + * 'break-before-make' requirements of the + * architecture. + * + * @ctx: context of the visited pte. + * @mmu: stage-2 mmu + * + * Returns: true if the pte was successfully broken. + * + * If the removed pte was valid, performs the necessary serialization and TLB + * invalidation for the old value. For counted ptes, drops the reference count + * on the containing table page. + */ +static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + if (stage2_pte_is_locked(ctx->old)) { + /* + * Should never occur if this walker has exclusive access to the + * page tables. + */ + WARN_ON(!kvm_pgtable_walk_shared(ctx)); + return false; + } + + if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) + return false; + + /* + * Perform the appropriate TLB invalidation based on the evicted pte + * value (if any). + */ + if (kvm_pte_table(ctx->old, ctx->level)) + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + else if (kvm_pte_valid(ctx->old)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (stage2_pte_is_counted(ctx->old)) + mm_ops->put_page(ctx->ptep); + + return true; +} + +static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + WARN_ON(!stage2_pte_is_locked(*ctx->ptep)); + + if (stage2_pte_is_counted(new)) + mm_ops->get_page(ctx->ptep); + + smp_store_release(ctx->ptep, new); +} + static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) { @@ -812,17 +881,18 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (!childp) return -ENOMEM; + if (!stage2_try_break_pte(ctx, data->mmu)) { + mm_ops->put_page(childp); + return -EAGAIN; + } + /* * If we've run into an existing block mapping then replace it with * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (stage2_pte_is_counted(ctx->old)) - stage2_put_pte(ctx, data->mmu, mm_ops); - new = kvm_init_table_pte(childp, mm_ops); - mm_ops->get_page(ctx->ptep); - smp_store_release(ctx->ptep, new); + stage2_make_pte(ctx, new); return 0; } From 946fbfdf336b811479e024136c7cabc00157b6b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 21:59:34 +0000 Subject: [PATCH 1271/4122] KVM: arm64: Make leaf->leaf PTE changes parallel-aware Convert stage2_map_walker_try_leaf() to use the new break-before-make helpers, thereby making the handler parallel-aware. As before, avoid the break-before-make if recreating the existing mapping. Additionally, retry execution if another vCPU thread is modifying the same PTE. Signed-off-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107215934.1895478-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b9f0d792b8d9..238f29389617 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -804,18 +804,17 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, else new = kvm_init_invalid_leaf_owner(data->owner_id); - if (stage2_pte_is_counted(ctx->old)) { - /* - * Skip updating the PTE if we are trying to recreate the exact - * same mapping or only change the access permissions. Instead, - * the vCPU will exit one more time from guest if still needed - * and then go through the path of relaxing permissions. - */ - if (!stage2_pte_needs_update(ctx->old, new)) - return -EAGAIN; + /* + * Skip updating the PTE if we are trying to recreate the exact + * same mapping or only change the access permissions. Instead, + * the vCPU will exit one more time from guest if still needed + * and then go through the path of relaxing permissions. + */ + if (!stage2_pte_needs_update(ctx->old, new)) + return -EAGAIN; - stage2_put_pte(ctx, data->mmu, mm_ops); - } + if (!stage2_try_break_pte(ctx, data->mmu)) + return -EAGAIN; /* Perform CMOs before installation of the guest stage-2 PTE */ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) @@ -825,9 +824,8 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ctx->ptep, new); - if (stage2_pte_is_counted(new)) - mm_ops->get_page(ctx->ptep); + stage2_make_pte(ctx, new); + if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; From af87fc03cfdf6893011df419588d27acdfb9c197 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 22:00:06 +0000 Subject: [PATCH 1272/4122] KVM: arm64: Make table->block changes parallel-aware stage2_map_walker_try_leaf() and friends now handle stage-2 PTEs generically, and perform the correct flush when a table PTE is removed. Additionally, they've been made parallel-aware, using an atomic break to take ownership of the PTE. Stop clearing the PTE in the pre-order callback and instead let stage2_map_walker_try_leaf() deal with it. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107220006.1895572-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 238f29389617..f814422ef795 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -841,21 +841,12 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - kvm_clear_pte(ctx->ptep); - - /* - * Invalidate the whole stage-2, as we may have numerous leaf - * entries below us which would otherwise need invalidating - * individually. - */ - kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - ret = stage2_map_walker_try_leaf(ctx, data); + if (ret) + return ret; - mm_ops->put_page(ctx->ptep); mm_ops->free_removed_table(childp, ctx->level); - - return ret; + return 0; } static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, From 1577cb5823cefdff4416f272a88143ee933d97f5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 7 Nov 2022 22:00:33 +0000 Subject: [PATCH 1273/4122] KVM: arm64: Handle stage-2 faults in parallel The stage-2 map walker has been made parallel-aware, and as such can be called while only holding the read side of the MMU lock. Rip out the conditional locking in user_mem_abort() and instead grab the read lock. Continue to take the write lock from other callsites to kvm_pgtable_stage2_map(). Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221107220033.1895655-1-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 3 ++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- arch/arm64/kvm/hyp/pgtable.c | 5 +++-- arch/arm64/kvm/mmu.c | 31 ++++++--------------------- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 7634b6964779..a874ce0ce7b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -412,6 +412,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg * @prot: Permissions and attributes for the mapping. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -433,7 +434,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc); + void *mc, enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 735769886b55..f6d82bf33ce1 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -257,7 +257,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, &host_s2_pool); + prot, &host_s2_pool, 0); } /* diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f814422ef795..5bca9610d040 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -912,7 +912,7 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc) + void *mc, enum kvm_pgtable_walk_flags flags) { int ret; struct stage2_map_data map_data = { @@ -923,7 +923,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, - .flags = KVM_PGTABLE_WALK_TABLE_PRE | + .flags = flags | + KVM_PGTABLE_WALK_TABLE_PRE | KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 52e042399ba5..410c2a37fe32 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -861,7 +861,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, write_lock(&kvm->mmu_lock); ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, - &cache); + &cache, 0); write_unlock(&kvm->mmu_lock); if (ret) break; @@ -1156,7 +1156,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1191,8 +1190,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - use_read_lock = (fault_status == FSC_PERM && write_fault && - fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1291,15 +1288,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - /* - * To reduce MMU contentions and enhance concurrency during dirty - * logging dirty logging, only acquire read lock for permission - * relaxation. - */ - if (use_read_lock) - read_lock(&kvm->mmu_lock); - else - write_lock(&kvm->mmu_lock); + read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; @@ -1343,15 +1332,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); - } else { - WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); - + else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, - memcache); - } + memcache, KVM_PGTABLE_WALK_SHARED); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1360,10 +1346,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (use_read_lock) - read_unlock(&kvm->mmu_lock); - else - write_unlock(&kvm->mmu_lock); + read_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; @@ -1569,7 +1552,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) */ kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, PAGE_SIZE, __pfn_to_phys(pfn), - KVM_PGTABLE_PROT_R, NULL); + KVM_PGTABLE_PROT_R, NULL, 0); return false; } From fba31beab3578b793060f549188fe682df7d3ed9 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 9 Nov 2022 15:10:39 +0530 Subject: [PATCH 1274/4122] PCI: qcom: Fix error message for reset_control_assert() Fix the error message to mention "assert" instead of "deassert". Link: https://lore.kernel.org/r/20221109094039.25753-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Reviewed-by: Vinod Koul --- drivers/pci/controller/dwc/pcie-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index f711acacaeaf..cf27345f6575 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1236,7 +1236,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) ret = reset_control_assert(res->pci_reset); if (ret < 0) { - dev_err(dev, "cannot deassert pci reset\n"); + dev_err(dev, "cannot assert pci reset\n"); goto err_disable_clocks; } From c9bfd858402c86b6559aa05227eb5dbae3ce862e Mon Sep 17 00:00:00 2001 From: Jianjun Wang Date: Thu, 3 Nov 2022 10:56:54 +0800 Subject: [PATCH 1275/4122] dt-bindings: PCI: mediatek-gen3: Support mt8195 In order to support mt8195 pcie node, update the yaml to support new properties of iommu and power-domain, and update the reset-names property to allow only one 'mac' name. Link: https://lore.kernel.org/r/20221103025656.8714-2-tinghan.shen@mediatek.com Signed-off-by: Jianjun Wang Signed-off-by: TingHan Shen Signed-off-by: Lorenzo Pieralisi Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring --- .../devicetree/bindings/pci/mediatek-pcie-gen3.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml index c00be39af64e..bc90f0ec7bd9 100644 --- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml @@ -70,15 +70,21 @@ properties: minItems: 1 maxItems: 8 + iommu-map: + maxItems: 1 + + iommu-map-mask: + const: 0 + resets: minItems: 1 maxItems: 2 reset-names: minItems: 1 + maxItems: 2 items: - - const: phy - - const: mac + enum: [ phy, mac ] clocks: maxItems: 6 @@ -107,6 +113,9 @@ properties: items: - const: pcie-phy + power-domains: + maxItems: 1 + '#interrupt-cells': const: 1 From 8405d8f0956d227c3355d9bdbabc23f79f721ce4 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Tue, 13 Sep 2022 15:42:37 +0530 Subject: [PATCH 1276/4122] PCI: dwc: Use dev_info for PCIe link down event logging Some of the platforms (like Tegra194 and Tegra234) have open slots and not having an endpoint connected to the slot is not an error. So, changing the macro from dev_err to dev_info to log the event. Link: https://lore.kernel.org/r/20220913101237.4337-1-vidyas@nvidia.com Tested-by: Jon Hunter Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Acked-by: Jon Hunter Acked-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 9e4d96e5a3f5..432aead68d1f 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -448,7 +448,7 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci) } if (retries >= LINK_WAIT_MAX_RETRIES) { - dev_err(pci->dev, "Phy link never came up\n"); + dev_info(pci->dev, "Phy link never came up\n"); return -ETIMEDOUT; } From 72f542ac4f39fb42b8a6380ac8d9b3c39019d2d6 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 28 Oct 2022 02:17:16 -0700 Subject: [PATCH 1277/4122] dt-bindings: PCI: Add host mode device-id for j721s2 platform Add unique device-id of 0xb013 for j721s2 platform to oneOf field. Link: https://lore.kernel.org/r/20221028091716.21414-1-mranostay@ti.com Signed-off-by: Matt Ranostay Signed-off-by: Lorenzo Pieralisi Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml index d9df7cd922f1..b0513b197d08 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml @@ -73,6 +73,8 @@ properties: - const: 0xb00f - items: - const: 0xb010 + - items: + - const: 0xb013 msi-map: true From be7e8b917ead54754cc14b6c03769c8738a3f3f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Nov 2022 15:48:43 +0100 Subject: [PATCH 1278/4122] blkdev: make struct block_device_operations.devnode() take a const * The devnode() callback in struct block_device_operations should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into the one subsystem that actually uses this callback. Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20221109144843.679668-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 2 +- include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 4cea3b08087e..8dc15be95962 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2632,7 +2632,7 @@ static unsigned int pkt_check_events(struct gendisk *disk, return attached_disk->fops->check_events(attached_disk, clearing); } -static char *pkt_devnode(struct gendisk *disk, umode_t *mode) +static char *pkt_devnode(const struct gendisk *disk, umode_t *mode) { return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..2a455793462b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1413,7 +1413,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); - char *(*devnode)(struct gendisk *disk, umode_t *mode); + char *(*devnode)(const struct gendisk *disk, umode_t *mode); /* returns the length of the identifier or a negative errno: */ int (*get_unique_id)(struct gendisk *disk, u8 id[16], enum blk_unique_id id_type); From 927bdd1e65bd14ae035d9c625df2f4ccd51e8a83 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Nov 2022 15:07:10 +0100 Subject: [PATCH 1279/4122] driver core: remove devm_device_remove_groups() There is no in-kernel user of this function, so it is not needed anymore and can be removed. Cc: Dmitry Torokhov Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221109140711.105222-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 17 ----------------- include/linux/device.h | 2 -- 2 files changed, 19 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index f07b1c349f79..5fd99f2df692 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2693,23 +2693,6 @@ int devm_device_add_groups(struct device *dev, } EXPORT_SYMBOL_GPL(devm_device_add_groups); -/** - * devm_device_remove_groups - remove a list of managed groups - * - * @dev: The device for the groups to be removed from - * @groups: NULL terminated list of groups to be removed - * - * If groups is not NULL, remove the specified groups from the device. - */ -void devm_device_remove_groups(struct device *dev, - const struct attribute_group **groups) -{ - WARN_ON(devres_release(dev, devm_attr_groups_remove, - devm_attr_group_match, - /* cast away const */ (void *)groups)); -} -EXPORT_SYMBOL_GPL(devm_device_remove_groups); - static int device_add_attrs(struct device *dev) { struct class *class = dev->class; diff --git a/include/linux/device.h b/include/linux/device.h index 023ea50b1916..4efc607c008c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1062,8 +1062,6 @@ static inline void device_remove_group(struct device *dev, int __must_check devm_device_add_groups(struct device *dev, const struct attribute_group **groups); -void devm_device_remove_groups(struct device *dev, - const struct attribute_group **groups); int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); void devm_device_remove_group(struct device *dev, From 0f0605d550ed986279030d452c7ed10df34da449 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Nov 2022 15:07:11 +0100 Subject: [PATCH 1280/4122] driver core: remove devm_device_remove_group() There is no in-kernel user of this function, so it is not needed anymore and can be removed. Cc: Dmitry Torokhov Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221109140711.105222-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 22 ---------------------- include/linux/device.h | 2 -- 2 files changed, 24 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5fd99f2df692..af721e6c0253 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2585,11 +2585,6 @@ union device_attr_group_devres { const struct attribute_group **groups; }; -static int devm_attr_group_match(struct device *dev, void *res, void *data) -{ - return ((union device_attr_group_devres *)res)->group == data; -} - static void devm_attr_group_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; @@ -2640,23 +2635,6 @@ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) } EXPORT_SYMBOL_GPL(devm_device_add_group); -/** - * devm_device_remove_group: remove a managed group from a device - * @dev: device to remove the group from - * @grp: group to remove - * - * This function removes a group of attributes from a device. The attributes - * previously have to have been created for this group, otherwise it will fail. - */ -void devm_device_remove_group(struct device *dev, - const struct attribute_group *grp) -{ - WARN_ON(devres_release(dev, devm_attr_group_remove, - devm_attr_group_match, - /* cast away const */ (void *)grp)); -} -EXPORT_SYMBOL_GPL(devm_device_remove_group); - /** * devm_device_add_groups - create a bunch of managed attribute groups * @dev: The device to create the group for diff --git a/include/linux/device.h b/include/linux/device.h index 4efc607c008c..84ae52de6746 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1064,8 +1064,6 @@ int __must_check devm_device_add_groups(struct device *dev, const struct attribute_group **groups); int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); -void devm_device_remove_group(struct device *dev, - const struct attribute_group *grp); /* * Platform "fixup" functions - allow the platform to have their say From 189a87f8ef8ceed16b2a230dc0ce65117068ac30 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2022 10:22:55 +0100 Subject: [PATCH 1281/4122] driver core: mark driver_allows_async_probing static driver_allows_async_probing is only used in drivers/base/dd.c, so mark it static and remove the declaration in drivers/base/base.h. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221030092255.872280-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 1 - drivers/base/dd.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index b902d1ecc247..7d4803c03d3e 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -146,7 +146,6 @@ static inline int driver_match_device(struct device_driver *drv, { return drv->bus->match ? drv->bus->match(dev, drv) : 1; } -extern bool driver_allows_async_probing(struct device_driver *drv); extern int driver_add_groups(struct device_driver *drv, const struct attribute_group **groups); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3dda62503102..4001e22617ab 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -843,7 +843,7 @@ static int __init save_async_options(char *buf) } __setup("driver_async_probe=", save_async_options); -bool driver_allows_async_probing(struct device_driver *drv) +static bool driver_allows_async_probing(struct device_driver *drv) { switch (drv->probe_type) { case PROBE_PREFER_ASYNCHRONOUS: From 3da72e18371c41a6f6f96b594854b178168c7757 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 26 Oct 2022 20:59:54 +0200 Subject: [PATCH 1282/4122] cacheinfo: Decrement refcount in cache_setup_of_node() Refcounts to DT nodes are only incremented in the function and never decremented. Decrease the refcounts when necessary. Signed-off-by: Pierre Gondois Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20221026185954.991547-1-pierre.gondois@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 4b5cd08c5a65..04317cde800c 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -196,7 +196,7 @@ static void cache_of_set_props(struct cacheinfo *this_leaf, static int cache_setup_of_node(unsigned int cpu) { - struct device_node *np; + struct device_node *np, *prev; struct cacheinfo *this_leaf; unsigned int index = 0; @@ -206,19 +206,24 @@ static int cache_setup_of_node(unsigned int cpu) return -ENOENT; } + prev = np; + while (index < cache_leaves(cpu)) { this_leaf = per_cpu_cacheinfo_idx(cpu, index); - if (this_leaf->level != 1) + if (this_leaf->level != 1) { np = of_find_next_cache_node(np); - else - np = of_node_get(np);/* cpu node itself */ - if (!np) - break; + of_node_put(prev); + prev = np; + if (!np) + break; + } cache_of_set_props(this_leaf, np); this_leaf->fw_token = np; index++; } + of_node_put(np); + if (index != cache_leaves(cpu)) /* not all OF nodes populated */ return -ENOENT; From 730600223b64918324ab322ab174361bd41073c0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:14:17 +0100 Subject: [PATCH 1283/4122] driver core: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/02ba683a5c0716638ad8ca11e8b0fdca97c4f294.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index af721e6c0253..ab01828fe6c1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1628,7 +1629,7 @@ early_param("fw_devlink", fw_devlink_setup); static bool fw_devlink_strict; static int __init fw_devlink_strict_setup(char *arg) { - return strtobool(arg, &fw_devlink_strict); + return kstrtobool(arg, &fw_devlink_strict); } early_param("fw_devlink.strict", fw_devlink_strict_setup); @@ -2280,7 +2281,7 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, { struct dev_ext_attribute *ea = to_ext_attr(attr); - if (strtobool(buf, ea->var) < 0) + if (kstrtobool(buf, ea->var) < 0) return -EINVAL; return size; @@ -2534,7 +2535,7 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr, bool val; int ret; - ret = strtobool(buf, &val); + ret = kstrtobool(buf, &val); if (ret < 0) return ret; From 27c0d217340e47ec995557f61423ef415afba987 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 20 Sep 2022 17:14:13 -0700 Subject: [PATCH 1284/4122] driver core: Fix bus_type.match() error handling in __driver_attach() When a driver registers with a bus, it will attempt to match with every device on the bus through the __driver_attach() function. Currently, if the bus_type.match() function encounters an error that is not -EPROBE_DEFER, __driver_attach() will return a negative error code, which causes the driver registration logic to stop trying to match with the remaining devices on the bus. This behavior is not correct; a failure while matching a driver to a device does not mean that the driver won't be able to match and bind with other devices on the bus. Update the logic in __driver_attach() to reflect this. Fixes: 656b8035b0ee ("ARM: 8524/1: driver cohandle -EPROBE_DEFER from bus_type.match()") Cc: stable@vger.kernel.org Cc: Saravana Kannan Signed-off-by: Isaac J. Manjarres Link: https://lore.kernel.org/r/20220921001414.4046492-1-isaacmanjarres@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 4001e22617ab..e9b2f9c25efe 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1162,7 +1162,11 @@ static int __driver_attach(struct device *dev, void *data) return 0; } else if (ret < 0) { dev_dbg(dev, "Bus failed to match device: %d\n", ret); - return ret; + /* + * Driver could not match with device, but may match with + * another device on the bus. + */ + return 0; } /* ret > 0 means positive match */ if (driver_allows_async_probing(drv)) { From 02c39bbb36bab698b29f066a28c3e342d1136e53 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Oct 2022 07:27:17 +0200 Subject: [PATCH 1285/4122] virt: fsl_hypervisor: Replace NO_IRQ by 0 NO_IRQ is used to check the return of irq_of_parse_and_map(). On some architecture NO_IRQ is 0, on other architectures it is -1. irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ. So use 0 instead of using NO_IRQ. Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/20dd37b96bac0a72caef28e7462b32c93487a516.1665033909.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- drivers/virt/fsl_hypervisor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 07035249a5e1..f8b4389d60d9 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -839,7 +839,7 @@ static int __init fsl_hypervisor_init(void) handle = of_get_property(np, "interrupts", NULL); irq = irq_of_parse_and_map(np, 0); - if (!handle || (irq == NO_IRQ)) { + if (!handle || !irq) { pr_err("fsl-hv: no 'interrupts' property in %pOF node\n", np); continue; From 2d2879e1834d02852347d9dc910fffd5d22ac337 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 10 Nov 2022 15:37:54 +0530 Subject: [PATCH 1286/4122] dt-bindings: spmi: Add qcom,bus-id For PMIC arbiter version 7 and beyond we need to define if we are using primary or secondary bus, so add a new property of qcom,bus-id Signed-off-by: Vinod Koul Acked-by: Rob Herring Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221110100755.4032505-2-vkoul@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml index fee4f0eb4665..f983b4af6db9 100644 --- a/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml +++ b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml @@ -85,6 +85,14 @@ properties: description: > which of the PMIC Arb provided channels to use for accesses + qcom,bus-id: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 1 + description: > + SPMI bus instance. only applicable to PMIC arbiter version 7 and beyond. + Supported values, 0 = primary bus, 1 = secondary bus + required: - compatible - reg-names @@ -113,5 +121,7 @@ examples: interrupt-controller; #interrupt-cells = <4>; + + qcom,bus-id = <0>; }; From 231601cd22bd60e332dfa9ead22f871e93ad9821 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 10 Nov 2022 15:37:55 +0530 Subject: [PATCH 1287/4122] spmi: pmic-arb: Add support for PMIC v7 PMIC v7 has different offset values and seqeunces, so add support for this new version of PMIC Signed-off-by: David Collins Signed-off-by: Vinod Koul Tested-by: Dmitry Baryshkov # SM8450 HDK Acked-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221110100755.4032505-3-vkoul@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spmi/spmi-pmic-arb.c | 242 ++++++++++++++++++++++++++++++++--- 1 file changed, 221 insertions(+), 21 deletions(-) diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index 2cf3203b2397..8b6a42ab816f 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -22,8 +22,14 @@ #define PMIC_ARB_VERSION_V2_MIN 0x20010000 #define PMIC_ARB_VERSION_V3_MIN 0x30000000 #define PMIC_ARB_VERSION_V5_MIN 0x50000000 +#define PMIC_ARB_VERSION_V7_MIN 0x70000000 #define PMIC_ARB_INT_EN 0x0004 +#define PMIC_ARB_FEATURES 0x0004 +#define PMIC_ARB_FEATURES_PERIPH_MASK GENMASK(10, 0) + +#define PMIC_ARB_FEATURES1 0x0008 + /* PMIC Arbiter channel registers offsets */ #define PMIC_ARB_CMD 0x00 #define PMIC_ARB_CONFIG 0x04 @@ -48,7 +54,6 @@ #define INVALID_EE 0xFF /* Ownership Table */ -#define SPMI_OWNERSHIP_TABLE_REG(N) (0x0700 + (4 * (N))) #define SPMI_OWNERSHIP_PERIPH2OWNER(X) ((X) & 0x7) /* Channel Status fields */ @@ -91,6 +96,7 @@ enum pmic_arb_channel { /* Maximum number of support PMIC peripherals */ #define PMIC_ARB_MAX_PERIPHS 512 +#define PMIC_ARB_MAX_PERIPHS_V7 1024 #define PMIC_ARB_TIMEOUT_US 1000 #define PMIC_ARB_MAX_TRANS_BYTES (8) @@ -104,12 +110,12 @@ enum pmic_arb_channel { ((((slave_id) & 0xF) << 28) | \ (((periph_id) & 0xFF) << 20) | \ (((irq_id) & 0x7) << 16) | \ - (((apid) & 0x1FF) << 0)) + (((apid) & 0x3FF) << 0)) #define hwirq_to_sid(hwirq) (((hwirq) >> 28) & 0xF) #define hwirq_to_per(hwirq) (((hwirq) >> 20) & 0xFF) #define hwirq_to_irq(hwirq) (((hwirq) >> 16) & 0x7) -#define hwirq_to_apid(hwirq) (((hwirq) >> 0) & 0x1FF) +#define hwirq_to_apid(hwirq) (((hwirq) >> 0) & 0x3FF) struct pmic_arb_ver_ops; @@ -130,13 +136,21 @@ struct apid_data { * @channel: execution environment channel to use for accesses. * @irq: PMIC ARB interrupt. * @ee: the current Execution Environment + * @bus_instance: on v7: 0 = primary SPMI bus, 1 = secondary SPMI bus * @min_apid: minimum APID (used for bounding IRQ search) * @max_apid: maximum APID + * @base_apid: on v7: minimum APID associated with the particular SPMI + * bus instance + * @apid_count: on v5 and v7: number of APIDs associated with the + * particular SPMI bus instance * @mapping_table: in-memory copy of PPID -> APID mapping table. * @domain: irq domain object for PMIC IRQ domain * @spmic: SPMI controller object * @ver_ops: version dependent operations. - * @ppid_to_apid in-memory copy of PPID -> APID mapping table. + * @ppid_to_apid: in-memory copy of PPID -> APID mapping table. + * @last_apid: Highest value APID in use + * @apid_data: Table of data for all APIDs + * @max_periphs: Number of elements in apid_data[] */ struct spmi_pmic_arb { void __iomem *rd_base; @@ -149,8 +163,11 @@ struct spmi_pmic_arb { u8 channel; int irq; u8 ee; + u32 bus_instance; u16 min_apid; u16 max_apid; + u16 base_apid; + int apid_count; u32 *mapping_table; DECLARE_BITMAP(mapping_table_valid, PMIC_ARB_MAX_PERIPHS); struct irq_domain *domain; @@ -158,7 +175,8 @@ struct spmi_pmic_arb { const struct pmic_arb_ver_ops *ver_ops; u16 *ppid_to_apid; u16 last_apid; - struct apid_data apid_data[PMIC_ARB_MAX_PERIPHS]; + struct apid_data *apid_data; + int max_periphs; }; /** @@ -180,6 +198,7 @@ struct spmi_pmic_arb { * @irq_clear: on v1 address of PMIC_ARB_SPMI_PIC_IRQ_CLEARn * on v2 address of SPMI_PIC_IRQ_CLEARn. * @apid_map_offset: offset of PMIC_ARB_REG_CHNLn + * @apid_owner: on v2 and later address of SPMI_PERIPHn_2OWNER_TABLE_REG */ struct pmic_arb_ver_ops { const char *ver_str; @@ -196,6 +215,7 @@ struct pmic_arb_ver_ops { void __iomem *(*irq_status)(struct spmi_pmic_arb *pmic_arb, u16 n); void __iomem *(*irq_clear)(struct spmi_pmic_arb *pmic_arb, u16 n); u32 (*apid_map_offset)(u16 n); + void __iomem *(*apid_owner)(struct spmi_pmic_arb *pmic_arb, u16 n); }; static inline void pmic_arb_base_write(struct spmi_pmic_arb *pmic_arb, @@ -627,6 +647,11 @@ static void pmic_arb_chained_irq(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); int first = pmic_arb->min_apid; int last = pmic_arb->max_apid; + /* + * acc_offset will be non-zero for the secondary SPMI bus instance on + * v7 controllers. + */ + int acc_offset = pmic_arb->base_apid >> 5; u8 ee = pmic_arb->ee; u32 status, enable, handled = 0; int i, id, apid; @@ -637,8 +662,7 @@ static void pmic_arb_chained_irq(struct irq_desc *desc) chained_irq_enter(chip, desc); for (i = first >> 5; i <= last >> 5; ++i) { - status = readl_relaxed( - ver_ops->owner_acc_status(pmic_arb, ee, i)); + status = readl_relaxed(ver_ops->owner_acc_status(pmic_arb, ee, i - acc_offset)); if (status) acc_valid = true; @@ -983,8 +1007,8 @@ static u16 pmic_arb_find_apid(struct spmi_pmic_arb *pmic_arb, u16 ppid) if (offset >= pmic_arb->core_size) break; - regval = readl_relaxed(pmic_arb->cnfg + - SPMI_OWNERSHIP_TABLE_REG(apid)); + regval = readl_relaxed(pmic_arb->ver_ops->apid_owner(pmic_arb, + apid)); apidd->irq_ee = SPMI_OWNERSHIP_PERIPH2OWNER(regval); apidd->write_ee = apidd->irq_ee; @@ -1020,21 +1044,30 @@ static int pmic_arb_ppid_to_apid_v2(struct spmi_pmic_arb *pmic_arb, u16 ppid) static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb) { - struct apid_data *apidd = pmic_arb->apid_data; + struct apid_data *apidd; struct apid_data *prev_apidd; - u16 i, apid, ppid; + u16 i, apid, ppid, apid_max; bool valid, is_irq_ee; u32 regval, offset; /* * In order to allow multiple EEs to write to a single PPID in arbiter - * version 5, there is more than one APID mapped to each PPID. + * version 5 and 7, there is more than one APID mapped to each PPID. * The owner field for each of these mappings specifies the EE which is * allowed to write to the APID. The owner of the last (highest) APID * which has the IRQ owner bit set for a given PPID will receive * interrupts from the PPID. + * + * In arbiter version 7, the APID numbering space is divided between + * the primary bus (0) and secondary bus (1) such that: + * APID = 0 to N-1 are assigned to the primary bus + * APID = N to N+M-1 are assigned to the secondary bus + * where N = number of APIDs supported by the primary bus and + * M = number of APIDs supported by the secondary bus */ - for (i = 0; ; i++, apidd++) { + apidd = &pmic_arb->apid_data[pmic_arb->base_apid]; + apid_max = pmic_arb->base_apid + pmic_arb->apid_count; + for (i = pmic_arb->base_apid; i < apid_max; i++, apidd++) { offset = pmic_arb->ver_ops->apid_map_offset(i); if (offset >= pmic_arb->core_size) break; @@ -1045,8 +1078,8 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb) ppid = (regval >> 8) & PMIC_ARB_PPID_MASK; is_irq_ee = PMIC_ARB_CHAN_IS_IRQ_OWNER(regval); - regval = readl_relaxed(pmic_arb->cnfg + - SPMI_OWNERSHIP_TABLE_REG(i)); + regval = readl_relaxed(pmic_arb->ver_ops->apid_owner(pmic_arb, + i)); apidd->write_ee = SPMI_OWNERSHIP_PERIPH2OWNER(regval); apidd->irq_ee = is_irq_ee ? apidd->write_ee : INVALID_EE; @@ -1145,6 +1178,40 @@ static int pmic_arb_offset_v5(struct spmi_pmic_arb *pmic_arb, u8 sid, u16 addr, return offset; } +/* + * v7 offset per ee and per apid for observer channels and per apid for + * read/write channels. + */ +static int pmic_arb_offset_v7(struct spmi_pmic_arb *pmic_arb, u8 sid, u16 addr, + enum pmic_arb_channel ch_type) +{ + u16 apid; + int rc; + u32 offset = 0; + u16 ppid = (sid << 8) | (addr >> 8); + + rc = pmic_arb->ver_ops->ppid_to_apid(pmic_arb, ppid); + if (rc < 0) + return rc; + + apid = rc; + switch (ch_type) { + case PMIC_ARB_CHANNEL_OBS: + offset = 0x8000 * pmic_arb->ee + 0x20 * apid; + break; + case PMIC_ARB_CHANNEL_RW: + if (pmic_arb->apid_data[apid].write_ee != pmic_arb->ee) { + dev_err(&pmic_arb->spmic->dev, "disallowed SPMI write to sid=%u, addr=0x%04X\n", + sid, addr); + return -EPERM; + } + offset = 0x1000 * apid; + break; + } + + return offset; +} + static u32 pmic_arb_fmt_cmd_v1(u8 opc, u8 sid, u16 addr, u8 bc) { return (opc << 27) | ((sid & 0xf) << 20) | (addr << 4) | (bc & 0x7); @@ -1179,6 +1246,12 @@ pmic_arb_owner_acc_status_v5(struct spmi_pmic_arb *pmic_arb, u8 m, u16 n) return pmic_arb->intr + 0x10000 * m + 0x4 * n; } +static void __iomem * +pmic_arb_owner_acc_status_v7(struct spmi_pmic_arb *pmic_arb, u8 m, u16 n) +{ + return pmic_arb->intr + 0x1000 * m + 0x4 * n; +} + static void __iomem * pmic_arb_acc_enable_v1(struct spmi_pmic_arb *pmic_arb, u16 n) { @@ -1197,6 +1270,12 @@ pmic_arb_acc_enable_v5(struct spmi_pmic_arb *pmic_arb, u16 n) return pmic_arb->wr_base + 0x100 + 0x10000 * n; } +static void __iomem * +pmic_arb_acc_enable_v7(struct spmi_pmic_arb *pmic_arb, u16 n) +{ + return pmic_arb->wr_base + 0x100 + 0x1000 * n; +} + static void __iomem * pmic_arb_irq_status_v1(struct spmi_pmic_arb *pmic_arb, u16 n) { @@ -1215,6 +1294,12 @@ pmic_arb_irq_status_v5(struct spmi_pmic_arb *pmic_arb, u16 n) return pmic_arb->wr_base + 0x104 + 0x10000 * n; } +static void __iomem * +pmic_arb_irq_status_v7(struct spmi_pmic_arb *pmic_arb, u16 n) +{ + return pmic_arb->wr_base + 0x104 + 0x1000 * n; +} + static void __iomem * pmic_arb_irq_clear_v1(struct spmi_pmic_arb *pmic_arb, u16 n) { @@ -1233,6 +1318,12 @@ pmic_arb_irq_clear_v5(struct spmi_pmic_arb *pmic_arb, u16 n) return pmic_arb->wr_base + 0x108 + 0x10000 * n; } +static void __iomem * +pmic_arb_irq_clear_v7(struct spmi_pmic_arb *pmic_arb, u16 n) +{ + return pmic_arb->wr_base + 0x108 + 0x1000 * n; +} + static u32 pmic_arb_apid_map_offset_v2(u16 n) { return 0x800 + 0x4 * n; @@ -1243,6 +1334,28 @@ static u32 pmic_arb_apid_map_offset_v5(u16 n) return 0x900 + 0x4 * n; } +static u32 pmic_arb_apid_map_offset_v7(u16 n) +{ + return 0x2000 + 0x4 * n; +} + +static void __iomem * +pmic_arb_apid_owner_v2(struct spmi_pmic_arb *pmic_arb, u16 n) +{ + return pmic_arb->cnfg + 0x700 + 0x4 * n; +} + +/* + * For arbiter version 7, APID ownership table registers have independent + * numbering space for each SPMI bus instance, so each is indexed starting from + * 0. + */ +static void __iomem * +pmic_arb_apid_owner_v7(struct spmi_pmic_arb *pmic_arb, u16 n) +{ + return pmic_arb->cnfg + 0x4 * (n - pmic_arb->base_apid); +} + static const struct pmic_arb_ver_ops pmic_arb_v1 = { .ver_str = "v1", .ppid_to_apid = pmic_arb_ppid_to_apid_v1, @@ -1254,6 +1367,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v1 = { .irq_status = pmic_arb_irq_status_v1, .irq_clear = pmic_arb_irq_clear_v1, .apid_map_offset = pmic_arb_apid_map_offset_v2, + .apid_owner = pmic_arb_apid_owner_v2, }; static const struct pmic_arb_ver_ops pmic_arb_v2 = { @@ -1267,6 +1381,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v2 = { .irq_status = pmic_arb_irq_status_v2, .irq_clear = pmic_arb_irq_clear_v2, .apid_map_offset = pmic_arb_apid_map_offset_v2, + .apid_owner = pmic_arb_apid_owner_v2, }; static const struct pmic_arb_ver_ops pmic_arb_v3 = { @@ -1280,6 +1395,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v3 = { .irq_status = pmic_arb_irq_status_v2, .irq_clear = pmic_arb_irq_clear_v2, .apid_map_offset = pmic_arb_apid_map_offset_v2, + .apid_owner = pmic_arb_apid_owner_v2, }; static const struct pmic_arb_ver_ops pmic_arb_v5 = { @@ -1293,6 +1409,21 @@ static const struct pmic_arb_ver_ops pmic_arb_v5 = { .irq_status = pmic_arb_irq_status_v5, .irq_clear = pmic_arb_irq_clear_v5, .apid_map_offset = pmic_arb_apid_map_offset_v5, + .apid_owner = pmic_arb_apid_owner_v2, +}; + +static const struct pmic_arb_ver_ops pmic_arb_v7 = { + .ver_str = "v7", + .ppid_to_apid = pmic_arb_ppid_to_apid_v5, + .non_data_cmd = pmic_arb_non_data_cmd_v2, + .offset = pmic_arb_offset_v7, + .fmt_cmd = pmic_arb_fmt_cmd_v2, + .owner_acc_status = pmic_arb_owner_acc_status_v7, + .acc_enable = pmic_arb_acc_enable_v7, + .irq_status = pmic_arb_irq_status_v7, + .irq_clear = pmic_arb_irq_clear_v7, + .apid_map_offset = pmic_arb_apid_map_offset_v7, + .apid_owner = pmic_arb_apid_owner_v7, }; static const struct irq_domain_ops pmic_arb_irq_domain_ops = { @@ -1319,8 +1450,18 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev) pmic_arb = spmi_controller_get_drvdata(ctrl); pmic_arb->spmic = ctrl; + /* + * Please don't replace this with devm_platform_ioremap_resource() or + * devm_ioremap_resource(). These both result in a call to + * devm_request_mem_region() which prevents multiple mappings of this + * register address range. SoCs with PMIC arbiter v7 may define two + * arbiter devices, for the two physical SPMI interfaces, which share + * some register address ranges (i.e. "core", "obsrvr", and "chnls"). + * Ensure that both devices probe successfully by calling devm_ioremap() + * which does not result in a devm_request_mem_region() call. + */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "core"); - core = devm_ioremap_resource(&ctrl->dev, res); + core = devm_ioremap(&ctrl->dev, res->start, resource_size(res)); if (IS_ERR(core)) { err = PTR_ERR(core); goto err_put_ctrl; @@ -1349,12 +1490,15 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev) pmic_arb->ver_ops = &pmic_arb_v2; else if (hw_ver < PMIC_ARB_VERSION_V5_MIN) pmic_arb->ver_ops = &pmic_arb_v3; - else + else if (hw_ver < PMIC_ARB_VERSION_V7_MIN) pmic_arb->ver_ops = &pmic_arb_v5; + else + pmic_arb->ver_ops = &pmic_arb_v7; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "obsrvr"); - pmic_arb->rd_base = devm_ioremap_resource(&ctrl->dev, res); + pmic_arb->rd_base = devm_ioremap(&ctrl->dev, res->start, + resource_size(res)); if (IS_ERR(pmic_arb->rd_base)) { err = PTR_ERR(pmic_arb->rd_base); goto err_put_ctrl; @@ -1362,13 +1506,69 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "chnls"); - pmic_arb->wr_base = devm_ioremap_resource(&ctrl->dev, res); + pmic_arb->wr_base = devm_ioremap(&ctrl->dev, res->start, + resource_size(res)); if (IS_ERR(pmic_arb->wr_base)) { err = PTR_ERR(pmic_arb->wr_base); goto err_put_ctrl; } } + pmic_arb->max_periphs = PMIC_ARB_MAX_PERIPHS; + + if (hw_ver >= PMIC_ARB_VERSION_V7_MIN) { + pmic_arb->max_periphs = PMIC_ARB_MAX_PERIPHS_V7; + /* Optional property for v7: */ + of_property_read_u32(pdev->dev.of_node, "qcom,bus-id", + &pmic_arb->bus_instance); + if (pmic_arb->bus_instance > 1) { + err = -EINVAL; + dev_err(&pdev->dev, "invalid bus instance (%u) specified\n", + pmic_arb->bus_instance); + goto err_put_ctrl; + } + + if (pmic_arb->bus_instance == 0) { + pmic_arb->base_apid = 0; + pmic_arb->apid_count = + readl_relaxed(core + PMIC_ARB_FEATURES) & + PMIC_ARB_FEATURES_PERIPH_MASK; + } else { + pmic_arb->base_apid = + readl_relaxed(core + PMIC_ARB_FEATURES) & + PMIC_ARB_FEATURES_PERIPH_MASK; + pmic_arb->apid_count = + readl_relaxed(core + PMIC_ARB_FEATURES1) & + PMIC_ARB_FEATURES_PERIPH_MASK; + } + + if (pmic_arb->base_apid + pmic_arb->apid_count > pmic_arb->max_periphs) { + err = -EINVAL; + dev_err(&pdev->dev, "Unsupported APID count %d detected\n", + pmic_arb->base_apid + pmic_arb->apid_count); + goto err_put_ctrl; + } + } else if (hw_ver >= PMIC_ARB_VERSION_V5_MIN) { + pmic_arb->base_apid = 0; + pmic_arb->apid_count = readl_relaxed(core + PMIC_ARB_FEATURES) & + PMIC_ARB_FEATURES_PERIPH_MASK; + + if (pmic_arb->apid_count > pmic_arb->max_periphs) { + err = -EINVAL; + dev_err(&pdev->dev, "Unsupported APID count %d detected\n", + pmic_arb->apid_count); + goto err_put_ctrl; + } + } + + pmic_arb->apid_data = devm_kcalloc(&ctrl->dev, pmic_arb->max_periphs, + sizeof(*pmic_arb->apid_data), + GFP_KERNEL); + if (!pmic_arb->apid_data) { + err = -ENOMEM; + goto err_put_ctrl; + } + dev_info(&ctrl->dev, "PMIC arbiter version %s (0x%x)\n", pmic_arb->ver_ops->ver_str, hw_ver); @@ -1420,7 +1620,7 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev) } pmic_arb->ee = ee; - mapping_table = devm_kcalloc(&ctrl->dev, PMIC_ARB_MAX_PERIPHS, + mapping_table = devm_kcalloc(&ctrl->dev, pmic_arb->max_periphs, sizeof(*mapping_table), GFP_KERNEL); if (!mapping_table) { err = -ENOMEM; @@ -1431,7 +1631,7 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev) /* Initialize max_apid/min_apid to the opposite bounds, during * the irq domain translation, we are sure to update these */ pmic_arb->max_apid = 0; - pmic_arb->min_apid = PMIC_ARB_MAX_PERIPHS - 1; + pmic_arb->min_apid = pmic_arb->max_periphs - 1; platform_set_drvdata(pdev, ctrl); raw_spin_lock_init(&pmic_arb->lock); From 4634c973096a64662a24d9914c47cebc2a8b72f4 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 2 Nov 2022 15:26:59 +0800 Subject: [PATCH 1288/4122] chardev: Fix potential memory leak when cdev_add() failed Some init function of cdev(like comedi) will call kobject_set_name() before cdev_add(), but won't free the cdev.kobj.name or put the ref cnt of cdev.kobj when cdev_add() failed. As the result, cdev.kobj.name will be leaked. Free the name of kobject in cdev_add() fail path to prevent memleak. With this fix, the callers don't need to care about freeing the name of kobject if cdev_add() fails. unreferenced object 0xffff8881000fa8c0 (size 8): comm "modprobe", pid 239, jiffies 4294905173 (age 51.308s) hex dump (first 8 bytes): 63 6f 6d 65 64 69 00 ff comedi.. backtrace: [<000000005f9878f7>] __kmalloc_node_track_caller+0x4c/0x1c0 [<000000000fd70302>] kstrdup+0x3f/0x70 [<000000009428bc33>] kstrdup_const+0x46/0x60 [<00000000ed50d9de>] kvasprintf_const+0xdb/0xf0 [<00000000b2766964>] kobject_set_name_vargs+0x3c/0xe0 [<00000000f2424ef7>] kobject_set_name+0x62/0x90 [<000000005d5a125b>] 0xffffffffa0013098 [<00000000f331e663>] do_one_initcall+0x7a/0x380 [<00000000aa7bac96>] do_init_module+0x5c/0x230 [<000000005fd72335>] load_module+0x227d/0x2420 [<00000000ad550cf1>] __do_sys_finit_module+0xd5/0x140 [<00000000069a60c5>] do_syscall_64+0x3f/0x90 [<00000000c5e0d521>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Suggested-by: Greg KH Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221102072659.23671-1-shangxiaojing@huawei.com Signed-off-by: Greg Kroah-Hartman --- fs/char_dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/char_dev.c b/fs/char_dev.c index ba0ded7842a7..340e4543b24a 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -483,17 +483,24 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count) p->dev = dev; p->count = count; - if (WARN_ON(dev == WHITEOUT_DEV)) - return -EBUSY; + if (WARN_ON(dev == WHITEOUT_DEV)) { + error = -EBUSY; + goto err; + } error = kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); if (error) - return error; + goto err; kobject_get(p->kobj.parent); return 0; + +err: + kfree_const(p->kobj.name); + p->kobj.name = NULL; + return error; } /** From 982a84455e94bf195f2c35f221a6b4fe239d74d2 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 31 Oct 2022 02:35:57 -0400 Subject: [PATCH 1289/4122] misc: genwqe: card_base: Fix some kernel-doc warnings Fixes the following W=1 kernel build warning(s): drivers/misc/genwqe/card_base.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Signed-off-by: Bo Liu Link: https://lore.kernel.org/r/20221031063557.2710-1-liubo03@inspur.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 693981891870..bae8114f2805 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * IBM Accelerator Family 'GenWQE' * * (C) Copyright IBM Corp. 2013 From e48031603ae644054a9251c6fc7f0e0fd4e2683a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Oct 2022 07:36:50 +0200 Subject: [PATCH 1290/4122] uio: uio_fsl_elbc_gpcm: Replace NO_IRQ by 0 NO_IRQ is used to check the return of irq_of_parse_and_map(). On some architecture NO_IRQ is 0, on other architectures it is -1. irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ. So use 0 instead of using NO_IRQ. Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/68ccdf51811ab26bdb452babf17ae860fa4900c2.1665034535.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_fsl_elbc_gpcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/uio/uio_fsl_elbc_gpcm.c b/drivers/uio/uio_fsl_elbc_gpcm.c index 7d8eb9dc2068..82dda799f327 100644 --- a/drivers/uio/uio_fsl_elbc_gpcm.c +++ b/drivers/uio/uio_fsl_elbc_gpcm.c @@ -390,13 +390,13 @@ static int uio_fsl_elbc_gpcm_probe(struct platform_device *pdev) info->priv = priv; info->name = uio_name; info->version = "0.0.1"; - if (irq != NO_IRQ) { + if (irq) { if (priv->irq_handler) { info->irq = irq; info->irq_flags = IRQF_SHARED; info->handler = priv->irq_handler; } else { - irq = NO_IRQ; + irq = 0; dev_warn(priv->dev, "ignoring irq, no handler\n"); } } @@ -417,7 +417,7 @@ static int uio_fsl_elbc_gpcm_probe(struct platform_device *pdev) dev_info(priv->dev, "eLBC/GPCM device (%s) at 0x%llx, bank %d, irq=%d\n", priv->name, (unsigned long long)res.start, priv->bank, - irq != NO_IRQ ? irq : -1); + irq ? : -1); return 0; out_err2: From d88bd098f45e0dcf317f8924a38b48e8a14a3854 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Sep 2022 22:16:37 +0100 Subject: [PATCH 1291/4122] test_firmware: Fix spelling mistake "EMTPY" -> "EMPTY" There are spelling mistakes in config show text. Fix these. Signed-off-by: Colin Ian King Reviewed-by: Russ Weight Link: https://lore.kernel.org/r/20220928211637.62529-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index c82b65947ce6..0c714cdd51ef 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -284,7 +284,7 @@ static ssize_t config_show(struct device *dev, test_fw_config->name); else len += scnprintf(buf + len, PAGE_SIZE - len, - "name:\tEMTPY\n"); + "name:\tEMPTY\n"); len += scnprintf(buf + len, PAGE_SIZE - len, "num_requests:\t%u\n", test_fw_config->num_requests); @@ -315,7 +315,7 @@ static ssize_t config_show(struct device *dev, test_fw_config->upload_name); else len += scnprintf(buf + len, PAGE_SIZE - len, - "upload_name:\tEMTPY\n"); + "upload_name:\tEMPTY\n"); mutex_unlock(&test_fw_mutex); From 19d54020883c210a0cc78e5c735900ee9e9f64b3 Mon Sep 17 00:00:00 2001 From: Jack Rosenthal Date: Fri, 4 Nov 2022 10:15:28 -0600 Subject: [PATCH 1292/4122] firmware: google: Implement cbmem in sysfs driver The CBMEM area is a downward-growing memory region used by coreboot to dynamically allocate tagged data structures ("CBMEM entries") that remain resident during boot. This implements a driver which exports access to the CBMEM entries via sysfs under /sys/bus/coreboot/devices/cbmem-. This implementation is quite versatile. Examples of how it could be used are given below: * Tools like util/cbmem from the coreboot tree could use this driver instead of finding CBMEM in /dev/mem directly. Alternatively, firmware developers debugging an issue may find the sysfs interface more ergonomic than the cbmem tool and choose to use it directly. * The crossystem tool, which exposes verified boot variables, can use this driver to read the vboot work buffer. * Tools which read the BIOS SPI flash (e.g., flashrom) can find the flash layout in CBMEM directly, which is significantly faster than searching the flash directly. Write access is provided to all CBMEM regions via /sys/bus/coreboot/devices/cbmem-/mem, as the existing cbmem tooling updates this memory region, and envisioned use cases with crossystem can benefit from updating memory regions. Link: https://issuetracker.google.com/239604743 Cc: Stephen Boyd Cc: Tzung-Bi Shih Reviewed-by: Guenter Roeck Reviewed-by: Julius Werner Tested-by: Jack Rosenthal Signed-off-by: Jack Rosenthal Link: https://lore.kernel.org/r/20221104161528.531248-1-jrosenth@chromium.org Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-coreboot | 45 +++++++ drivers/firmware/google/Kconfig | 15 +++ drivers/firmware/google/Makefile | 3 + drivers/firmware/google/cbmem.c | 129 +++++++++++++++++++ drivers/firmware/google/coreboot_table.c | 11 +- drivers/firmware/google/coreboot_table.h | 18 +++ 6 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-coreboot create mode 100644 drivers/firmware/google/cbmem.c diff --git a/Documentation/ABI/testing/sysfs-bus-coreboot b/Documentation/ABI/testing/sysfs-bus-coreboot new file mode 100644 index 000000000000..9c5accecc470 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-coreboot @@ -0,0 +1,45 @@ +What: /sys/bus/coreboot +Date: August 2022 +Contact: Jack Rosenthal +Description: + The coreboot bus provides a variety of virtual devices used to + access data structures created by the Coreboot BIOS. + +What: /sys/bus/coreboot/devices/cbmem- +Date: August 2022 +Contact: Jack Rosenthal +Description: + CBMEM is a downwards-growing memory region created by Coreboot, + and contains tagged data structures to be shared with payloads + in the boot process and the OS. Each CBMEM entry is given a + directory in /sys/bus/coreboot/devices based on its id. + A list of ids known to Coreboot can be found in the coreboot + source tree at + ``src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h``. + +What: /sys/bus/coreboot/devices/cbmem-/address +Date: August 2022 +Contact: Jack Rosenthal +Description: + This is the pyhsical memory address that the CBMEM entry's data + begins at, in hexadecimal (e.g., ``0x76ffe000``). + +What: /sys/bus/coreboot/devices/cbmem-/size +Date: August 2022 +Contact: Jack Rosenthal +Description: + This is the size of the CBMEM entry's data, in hexadecimal + (e.g., ``0x1234``). + +What: /sys/bus/coreboot/devices/cbmem-/mem +Date: August 2022 +Contact: Jack Rosenthal +Description: + A file exposing read/write access to the entry's data. Note + that this file does not support mmap(), as coreboot + does not guarantee that the data will be page-aligned. + + The mode of this file is 0600. While there shouldn't be + anything security-sensitive contained in CBMEM, read access + requires root privileges given this is exposing a small subset + of physical memory. diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 983e07dc022e..9f190eab43ed 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -19,6 +19,21 @@ config GOOGLE_SMI driver provides an interface for reading and writing NVRAM variables. +config GOOGLE_CBMEM + tristate "CBMEM entries in sysfs" + depends on GOOGLE_COREBOOT_TABLE + help + CBMEM is a downwards-growing memory region created by the + Coreboot BIOS containing tagged data structures from the + BIOS. These data structures expose things like the verified + boot firmware variables, flash layout, firmware event log, + and more. + + This option enables the cbmem module, which causes the + kernel to search for Coreboot CBMEM entries, and expose the + memory for each entry in sysfs under + /sys/bus/coreboot/devices/cbmem-. + config GOOGLE_COREBOOT_TABLE tristate "Coreboot Table Access" depends on HAS_IOMEM && (ACPI || OF) diff --git a/drivers/firmware/google/Makefile b/drivers/firmware/google/Makefile index d17caded5d88..8151e323cc43 100644 --- a/drivers/firmware/google/Makefile +++ b/drivers/firmware/google/Makefile @@ -7,5 +7,8 @@ obj-$(CONFIG_GOOGLE_MEMCONSOLE) += memconsole.o obj-$(CONFIG_GOOGLE_MEMCONSOLE_COREBOOT) += memconsole-coreboot.o obj-$(CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY) += memconsole-x86-legacy.o +# Must come after coreboot_table.o, as this driver depends on that bus type. +obj-$(CONFIG_GOOGLE_CBMEM) += cbmem.o + vpd-sysfs-y := vpd.o vpd_decode.o obj-$(CONFIG_GOOGLE_VPD) += vpd-sysfs.o diff --git a/drivers/firmware/google/cbmem.c b/drivers/firmware/google/cbmem.c new file mode 100644 index 000000000000..685f3070ce9d --- /dev/null +++ b/drivers/firmware/google/cbmem.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * cbmem.c + * + * Driver for exporting cbmem entries in sysfs. + * + * Copyright 2022 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coreboot_table.h" + +struct cbmem_entry { + char *mem_file_buf; + u32 size; +}; + +static struct cbmem_entry *to_cbmem_entry(struct kobject *kobj) +{ + return dev_get_drvdata(kobj_to_dev(kobj)); +} + +static ssize_t mem_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t pos, + size_t count) +{ + struct cbmem_entry *entry = to_cbmem_entry(kobj); + + return memory_read_from_buffer(buf, count, &pos, entry->mem_file_buf, + entry->size); +} + +static ssize_t mem_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t pos, + size_t count) +{ + struct cbmem_entry *entry = to_cbmem_entry(kobj); + + if (pos < 0 || pos >= entry->size) + return -EINVAL; + if (count > entry->size - pos) + count = entry->size - pos; + + memcpy(entry->mem_file_buf + pos, buf, count); + return count; +} +static BIN_ATTR_ADMIN_RW(mem, 0); + +static ssize_t address_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct coreboot_device *cbdev = dev_to_coreboot_device(dev); + + return sysfs_emit(buf, "0x%llx\n", cbdev->cbmem_entry.address); +} +static DEVICE_ATTR_RO(address); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct coreboot_device *cbdev = dev_to_coreboot_device(dev); + + return sysfs_emit(buf, "0x%x\n", cbdev->cbmem_entry.entry_size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *attrs[] = { + &dev_attr_address.attr, + &dev_attr_size.attr, + NULL, +}; + +static struct bin_attribute *bin_attrs[] = { + &bin_attr_mem, + NULL, +}; + +static const struct attribute_group cbmem_entry_group = { + .attrs = attrs, + .bin_attrs = bin_attrs, +}; + +static const struct attribute_group *dev_groups[] = { + &cbmem_entry_group, + NULL, +}; + +static int cbmem_entry_probe(struct coreboot_device *dev) +{ + struct cbmem_entry *entry; + + entry = devm_kzalloc(&dev->dev, sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + dev_set_drvdata(&dev->dev, entry); + entry->mem_file_buf = devm_memremap(&dev->dev, dev->cbmem_entry.address, + dev->cbmem_entry.entry_size, + MEMREMAP_WB); + if (!entry->mem_file_buf) + return -ENOMEM; + + entry->size = dev->cbmem_entry.entry_size; + + return 0; +} + +static struct coreboot_driver cbmem_entry_driver = { + .probe = cbmem_entry_probe, + .drv = { + .name = "cbmem", + .owner = THIS_MODULE, + .dev_groups = dev_groups, + }, + .tag = LB_TAG_CBMEM_ENTRY, +}; +module_coreboot_driver(cbmem_entry_driver); + +MODULE_AUTHOR("Jack Rosenthal "); +MODULE_LICENSE("GPL"); diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index c52bcaa9def6..7748067eb9e6 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -97,12 +97,21 @@ static int coreboot_table_populate(struct device *dev, void *ptr) if (!device) return -ENOMEM; - dev_set_name(&device->dev, "coreboot%d", i); device->dev.parent = dev; device->dev.bus = &coreboot_bus_type; device->dev.release = coreboot_device_release; memcpy(&device->entry, ptr_entry, entry->size); + switch (device->entry.tag) { + case LB_TAG_CBMEM_ENTRY: + dev_set_name(&device->dev, "cbmem-%08x", + device->cbmem_entry.id); + break; + default: + dev_set_name(&device->dev, "coreboot%d", i); + break; + } + ret = device_register(&device->dev); if (ret) { put_device(&device->dev); diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h index beb778674acd..37f4d335a606 100644 --- a/drivers/firmware/google/coreboot_table.h +++ b/drivers/firmware/google/coreboot_table.h @@ -39,6 +39,18 @@ struct lb_cbmem_ref { u64 cbmem_addr; }; +#define LB_TAG_CBMEM_ENTRY 0x31 + +/* Corresponds to LB_TAG_CBMEM_ENTRY */ +struct lb_cbmem_entry { + u32 tag; + u32 size; + + u64 address; + u32 entry_size; + u32 id; +}; + /* Describes framebuffer setup by coreboot */ struct lb_framebuffer { u32 tag; @@ -65,10 +77,16 @@ struct coreboot_device { union { struct coreboot_table_entry entry; struct lb_cbmem_ref cbmem_ref; + struct lb_cbmem_entry cbmem_entry; struct lb_framebuffer framebuffer; }; }; +static inline struct coreboot_device *dev_to_coreboot_device(struct device *dev) +{ + return container_of(dev, struct coreboot_device, dev); +} + /* A driver for handling devices described in coreboot tables. */ struct coreboot_driver { int (*probe)(struct coreboot_device *); From 9de255c461d1b3f0242b3ad1450c3323a3e00b34 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 30 Sep 2022 19:40:57 -0300 Subject: [PATCH 1293/4122] uio: uio_dmem_genirq: Fix missing unlock in irq configuration Commit b74351287d4b ("uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") started calling disable_irq() without holding the spinlock because it can sleep. However, that fix introduced another bug: if interrupt is already disabled and a new disable request comes in, then the spinlock is not unlocked: root@localhost:~# printf '\x00\x00\x00\x00' > /dev/uio0 root@localhost:~# printf '\x00\x00\x00\x00' > /dev/uio0 root@localhost:~# [ 14.851538] BUG: scheduling while atomic: bash/223/0x00000002 [ 14.851991] Modules linked in: uio_dmem_genirq uio myfpga(OE) bochs drm_vram_helper drm_ttm_helper ttm drm_kms_helper drm snd_pcm ppdev joydev psmouse snd_timer snd e1000fb_sys_fops syscopyarea parport sysfillrect soundcore sysimgblt input_leds pcspkr i2c_piix4 serio_raw floppy evbug qemu_fw_cfg mac_hid pata_acpi ip_tables x_tables autofs4 [last unloaded: parport_pc] [ 14.854206] CPU: 0 PID: 223 Comm: bash Tainted: G OE 6.0.0-rc7 #21 [ 14.854786] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 14.855664] Call Trace: [ 14.855861] [ 14.856025] dump_stack_lvl+0x4d/0x67 [ 14.856325] dump_stack+0x14/0x1a [ 14.856583] __schedule_bug.cold+0x4b/0x5c [ 14.856915] __schedule+0xe81/0x13d0 [ 14.857199] ? idr_find+0x13/0x20 [ 14.857456] ? get_work_pool+0x2d/0x50 [ 14.857756] ? __flush_work+0x233/0x280 [ 14.858068] ? __schedule+0xa95/0x13d0 [ 14.858307] ? idr_find+0x13/0x20 [ 14.858519] ? get_work_pool+0x2d/0x50 [ 14.858798] schedule+0x6c/0x100 [ 14.859009] schedule_hrtimeout_range_clock+0xff/0x110 [ 14.859335] ? tty_write_room+0x1f/0x30 [ 14.859598] ? n_tty_poll+0x1ec/0x220 [ 14.859830] ? tty_ldisc_deref+0x1a/0x20 [ 14.860090] schedule_hrtimeout_range+0x17/0x20 [ 14.860373] do_select+0x596/0x840 [ 14.860627] ? __kernel_text_address+0x16/0x50 [ 14.860954] ? poll_freewait+0xb0/0xb0 [ 14.861235] ? poll_freewait+0xb0/0xb0 [ 14.861517] ? rpm_resume+0x49d/0x780 [ 14.861798] ? common_interrupt+0x59/0xa0 [ 14.862127] ? asm_common_interrupt+0x2b/0x40 [ 14.862511] ? __uart_start.isra.0+0x61/0x70 [ 14.862902] ? __check_object_size+0x61/0x280 [ 14.863255] core_sys_select+0x1c6/0x400 [ 14.863575] ? vfs_write+0x1c9/0x3d0 [ 14.863853] ? vfs_write+0x1c9/0x3d0 [ 14.864121] ? _copy_from_user+0x45/0x70 [ 14.864526] do_pselect.constprop.0+0xb3/0xf0 [ 14.864893] ? do_syscall_64+0x6d/0x90 [ 14.865228] ? do_syscall_64+0x6d/0x90 [ 14.865556] __x64_sys_pselect6+0x76/0xa0 [ 14.865906] do_syscall_64+0x60/0x90 [ 14.866214] ? syscall_exit_to_user_mode+0x2a/0x50 [ 14.866640] ? do_syscall_64+0x6d/0x90 [ 14.866972] ? do_syscall_64+0x6d/0x90 [ 14.867286] ? do_syscall_64+0x6d/0x90 [ 14.867626] entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] stripped [ 14.872959] ('myfpga' is a simple 'uio_dmem_genirq' driver I wrote to test this) The implementation of "uio_dmem_genirq" was based on "uio_pdrv_genirq" and it is used in a similar manner to the "uio_pdrv_genirq" driver with respect to interrupt configuration and handling. At the time "uio_dmem_genirq" was introduced, both had the same implementation of the 'uio_info' handlers irqcontrol() and handler(). Then commit 34cb27528398 ("UIO: Fix concurrency issue"), which was only applied to "uio_pdrv_genirq", ended up making them a little different. That commit, among other things, changed disable_irq() to disable_irq_nosync() in the implementation of irqcontrol(). The motivation there was to avoid a deadlock between irqcontrol() and handler(), since it added a spinlock in the irq handler, and disable_irq() waits for the completion of the irq handler. By changing disable_irq() to disable_irq_nosync() in irqcontrol(), we also avoid the sleeping-while-atomic bug that commit b74351287d4b ("uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") was trying to fix. Thus, this fixes the missing unlock in irqcontrol() by importing the implementation of irqcontrol() handler from the "uio_pdrv_genirq" driver. In the end, it reverts commit b74351287d4b ("uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") and change disable_irq() to disable_irq_nosync(). It is worth noting that this still does not address the concurrency issue fixed by commit 34cb27528398 ("UIO: Fix concurrency issue"). It will be addressed separately in the next commits. Split out from commit 34cb27528398 ("UIO: Fix concurrency issue"). Fixes: b74351287d4b ("uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") Signed-off-by: Rafael Mendonca Link: https://lore.kernel.org/r/20220930224100.816175-2-rafaelmendsr@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_dmem_genirq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index 1106f3376404..cb283ee36eaa 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -132,13 +132,11 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); - spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) { - spin_unlock_irqrestore(&priv->lock, flags); - disable_irq(dev_info->irq); - } + if (!test_and_set_bit(0, &priv->flags)) + disable_irq_nosync(dev_info->irq); } + spin_unlock_irqrestore(&priv->lock, flags); return 0; } From 118b918018175d9fcd8db667f905012e986cc2c9 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 30 Sep 2022 19:40:58 -0300 Subject: [PATCH 1294/4122] uio: uio_dmem_genirq: Fix deadlock between irq config and handling This fixes a concurrency issue addressed in commit 34cb27528398 ("UIO: Fix concurrency issue"): "In a SMP case there was a race condition issue between Uio_pdrv_genirq_irqcontrol() running on one CPU and irq handler on another CPU. Fix it by spin_locking shared resources access inside irq handler." The implementation of "uio_dmem_genirq" was based on "uio_pdrv_genirq" and it is used in a similar manner to the "uio_pdrv_genirq" driver with respect to interrupt configuration and handling. At the time "uio_dmem_genirq" was merged, both had the same implementation of the 'uio_info' handlers irqcontrol() and handler(), thus, both had the same concurrency issue mentioned by the above commit. However, the above patch was only applied to the "uio_pdrv_genirq" driver. Split out from commit 34cb27528398 ("UIO: Fix concurrency issue"). Fixes: 0a0c3b5a24bd ("Add new uio device for dynamic memory allocation") Signed-off-by: Rafael Mendonca Link: https://lore.kernel.org/r/20220930224100.816175-3-rafaelmendsr@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_dmem_genirq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index cb283ee36eaa..792c3e9c9ce5 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -110,8 +110,10 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info) * remember the state so we can allow user space to enable it later. */ + spin_lock(&priv->lock); if (!test_and_set_bit(0, &priv->flags)) disable_irq_nosync(irq); + spin_unlock(&priv->lock); return IRQ_HANDLED; } @@ -125,7 +127,8 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) * in the interrupt controller, but keep track of the * state to prevent per-irq depth damage. * - * Serialize this operation to support multiple tasks. + * Serialize this operation to support multiple tasks and concurrency + * with irq handler on SMP systems. */ spin_lock_irqsave(&priv->lock, flags); From a3fc57bc49a24960fd6a907457f9360a3e65b968 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 30 Sep 2022 19:40:59 -0300 Subject: [PATCH 1295/4122] uio: uio_dmem_genirq: Use non-atomic bit operations in irq config and handling This finishes the port of the irq configuration and handling from "uio_pdrv_genirq" to "uio_dmem_genirq". It changes the atomic bit-manipulation routines to their non-atomic counterparts as we are already guarding the code by spinlock. Split out from commit 34cb27528398 ("UIO: Fix concurrency issue"). Signed-off-by: Rafael Mendonca Link: https://lore.kernel.org/r/20220930224100.816175-4-rafaelmendsr@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_dmem_genirq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index 792c3e9c9ce5..5313307c2754 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -41,6 +41,11 @@ struct uio_dmem_genirq_platdata { unsigned int refcnt; }; +/* Bits in uio_dmem_genirq_platdata.flags */ +enum { + UIO_IRQ_DISABLED = 0, +}; + static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode) { struct uio_dmem_genirq_platdata *priv = info->priv; @@ -111,7 +116,7 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info) */ spin_lock(&priv->lock); - if (!test_and_set_bit(0, &priv->flags)) + if (!__test_and_set_bit(UIO_IRQ_DISABLED, &priv->flags)) disable_irq_nosync(irq); spin_unlock(&priv->lock); @@ -133,10 +138,10 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) spin_lock_irqsave(&priv->lock, flags); if (irq_on) { - if (test_and_clear_bit(0, &priv->flags)) + if (__test_and_clear_bit(UIO_IRQ_DISABLED, &priv->flags)) enable_irq(dev_info->irq); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!__test_and_set_bit(UIO_IRQ_DISABLED, &priv->flags)) disable_irq_nosync(dev_info->irq); } spin_unlock_irqrestore(&priv->lock, flags); From d4ad017d634561907ecdd1e467a28612b369ee00 Mon Sep 17 00:00:00 2001 From: Soha Jin Date: Sat, 1 Oct 2022 00:26:04 +0800 Subject: [PATCH 1296/4122] platform: use fwnode_irq_get_byname instead of of_irq_get_byname to get irq Not only platform devices described by OF have named interrupts, but devices described by ACPI also have named interrupts. The fwnode is an abstraction to different standards, and using fwnode_irq_get_byname can support more devices. Signed-off-by: Soha Jin Tested-by: Wende Tan Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 51bb2289865c..968f3d71eeab 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -441,8 +441,8 @@ static int __platform_get_irq_byname(struct platform_device *dev, struct resource *r; int ret; - if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) { - ret = of_irq_get_byname(dev->dev.of_node, name); + if (!dev->dev.of_node || IS_ENABLED(CONFIG_OF_IRQ)) { + ret = fwnode_irq_get_byname(dev_fwnode(&dev->dev), name); if (ret > 0 || ret == -EPROBE_DEFER) return ret; } From 2a4e628570d42fcc13a94f1acf25e3cfeaec08f6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Nov 2022 17:56:17 +0200 Subject: [PATCH 1297/4122] resource: Replace printk(KERN_WARNING) by pr_warn(), printk() by pr_info() Replace printk(KERN_WARNING) by pr_warn() and printk() by pr_info(). While at it, use %pa for the resource_size_t variables. With that, for the sake of consistency, introduce a temporary variable for the end address in iomem_map_sanity_check() like it's done in another function in the same module. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221109155618.42276-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- kernel/resource.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 4c5e80b92f2f..ab32b015bd50 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -888,7 +888,7 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) if (conflict->end > new->end) new->end = conflict->end; - printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name); + pr_info("Expanded resource %s due to conflict with %s\n", new->name, conflict->name); } write_unlock(&resource_lock); } @@ -1283,9 +1283,7 @@ void __release_region(struct resource *parent, resource_size_t start, write_unlock(&resource_lock); - printk(KERN_WARNING "Trying to free nonexistent resource " - "<%016llx-%016llx>\n", (unsigned long long)start, - (unsigned long long)end); + pr_warn("Trying to free nonexistent resource <%pa-%pa>\n", &start, &end); } EXPORT_SYMBOL(__release_region); @@ -1658,6 +1656,7 @@ __setup("reserve=", reserve_setup); int iomem_map_sanity_check(resource_size_t addr, unsigned long size) { struct resource *p = &iomem_resource; + resource_size_t end = addr + size - 1; int err = 0; loff_t l; @@ -1667,12 +1666,12 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) * We can probably skip the resources without * IORESOURCE_IO attribute? */ - if (p->start >= addr + size) + if (p->start > end) continue; if (p->end < addr) continue; if (PFN_DOWN(p->start) <= PFN_DOWN(addr) && - PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1)) + PFN_DOWN(p->end) >= PFN_DOWN(end)) continue; /* * if a resource is "BUSY", it's not a hardware resource @@ -1683,10 +1682,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) if (p->flags & IORESOURCE_BUSY) continue; - printk(KERN_WARNING "resource sanity check: requesting [mem %#010llx-%#010llx], which spans more than %s %pR\n", - (unsigned long long)addr, - (unsigned long long)(addr + size - 1), - p->name, p); + pr_warn("resource sanity check: requesting [mem %pa-%pa], which spans more than %s %pR\n", + &addr, &end, p->name, p); err = -1; break; } From 52c4d11f1dce60453ab2a75fd7103118cedb2b58 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Nov 2022 17:56:18 +0200 Subject: [PATCH 1298/4122] resource: Convert DEFINE_RES_NAMED() to be compound literal Currently DEFINE_RES_NAMED() can only be used to fill the static data. In some cases it would be convenient to use it as right value in the assignment operation. But it can't be done as is, because compiler has no clue about the data layout. Converting it to be a compound literal allows the above mentioned usage. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221109155618.42276-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/ioport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 27642ca15d93..67d3fb2133b6 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -155,7 +155,7 @@ enum { /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ - { \ +(struct resource) { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ From 882cf4c913d730a74175db039d941005b883de38 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 17 Oct 2022 15:40:38 +0800 Subject: [PATCH 1299/4122] drivers: bus: simple-pm-bus: Use clocks Simple Power-Managed bus controller may need functional clock(s) to be enabled before child devices connected to the bus can be accessed. Get the clock(s) as a bulk and enable/disable the clock(s) when the bus is being power managed. One example is that Freescale i.MX8qxp pixel link MSI bus controller needs MSI clock and AHB clock to be enabled before accessing child devices. Reviewed-by: Geert Uytterhoeven Signed-off-by: Liu Ying Link: https://lore.kernel.org/r/20221017074039.4181843-2-victor.liu@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/bus/simple-pm-bus.c | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 6b8d6257ed8a..d7b043fefde9 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -8,17 +8,24 @@ * for more details. */ +#include #include #include #include #include +struct simple_pm_bus { + struct clk_bulk_data *clks; + int num_clks; +}; + static int simple_pm_bus_probe(struct platform_device *pdev) { const struct device *dev = &pdev->dev; const struct of_dev_auxdata *lookup = dev_get_platdata(dev); struct device_node *np = dev->of_node; const struct of_device_id *match; + struct simple_pm_bus *bus; /* * Allow user to use driver_override to bind this driver to a @@ -44,6 +51,16 @@ static int simple_pm_bus_probe(struct platform_device *pdev) return -ENODEV; } + bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL); + if (!bus) + return -ENOMEM; + + bus->num_clks = devm_clk_bulk_get_all(&pdev->dev, &bus->clks); + if (bus->num_clks < 0) + return dev_err_probe(&pdev->dev, bus->num_clks, "failed to get clocks\n"); + + dev_set_drvdata(&pdev->dev, bus); + dev_dbg(&pdev->dev, "%s\n", __func__); pm_runtime_enable(&pdev->dev); @@ -67,6 +84,36 @@ static int simple_pm_bus_remove(struct platform_device *pdev) return 0; } +static int simple_pm_bus_runtime_suspend(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + clk_bulk_disable_unprepare(bus->num_clks, bus->clks); + + return 0; +} + +static int simple_pm_bus_runtime_resume(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_prepare_enable(bus->num_clks, bus->clks); + if (ret) { + dev_err(dev, "failed to enable clocks: %d\n", ret); + return ret; + } + + return 0; +} + +static const struct dev_pm_ops simple_pm_bus_pm_ops = { + SET_RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend, + simple_pm_bus_runtime_resume, NULL) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + #define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ static const struct of_device_id simple_pm_bus_of_match[] = { @@ -85,6 +132,7 @@ static struct platform_driver simple_pm_bus_driver = { .driver = { .name = "simple-pm-bus", .of_match_table = simple_pm_bus_of_match, + .pm = &simple_pm_bus_pm_ops, }, }; From c08645ea215c446ceb21029fe5416e6a62cbbed7 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 17 Oct 2022 15:40:39 +0800 Subject: [PATCH 1300/4122] dt-bindings: bus: Add Freescale i.MX8qxp pixel link MSI bus binding Freescale i.MX8qxp pixel link MSI bus is a simple memory-mapped bus. It is used to access peripherals in i.MX8qm/qxp imaging, LVDS, MIPI DSI and HDMI TX subsystems, like I2C controller, PWM controller, MIPI DSI controller and Control and Status Registers (CSR) module. Reference simple-pm-bus bindings and add Freescale i.MX8qxp pixel link MSI bus specific bindings. Reviewed-by: Rob Herring Signed-off-by: Liu Ying Link: https://lore.kernel.org/r/20221017074039.4181843-3-victor.liu@nxp.com Signed-off-by: Greg Kroah-Hartman --- .../bus/fsl,imx8qxp-pixel-link-msi-bus.yaml | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml new file mode 100644 index 000000000000..b568d0ce438d --- /dev/null +++ b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml @@ -0,0 +1,232 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale i.MX8qxp Pixel Link Medium Speed Interconnect (MSI) Bus + +maintainers: + - Liu Ying + +description: | + i.MX8qxp pixel link MSI bus is used to control settings of PHYs, I/Os + sitting together with the PHYs. It is not the same as the MSI bus coming + from i.MX8 System Controller Unit (SCU) which is used to control power, + clock and reset through the i.MX8 Distributed Slave System Controller (DSC). + + i.MX8qxp pixel link MSI bus is a simple memory-mapped bus. Two input clocks, + that is, MSI clock and AHB clock, need to be enabled so that peripherals + connected to the bus can be accessed. Also, the bus is part of a power + domain. The power domain needs to be enabled before the peripherals can + be accessed. + + Peripherals in i.MX8qm/qxp imaging, LVDS, MIPI DSI and HDMI TX subsystems, + like I2C controller, PWM controller, MIPI DSI controller and Control and + Status Registers (CSR) module, are accessed through the bus. + + The i.MX System Controller Firmware (SCFW) owns and uses the i.MX8qm/qxp + pixel link MSI bus controller and does not allow SCFW user to control it. + So, the controller's registers cannot be accessed by SCFW user. Hence, + the interrupts generated by the controller don't make any sense from SCFW + user's point of view. + +allOf: + - $ref: simple-pm-bus.yaml# + +# We need a select here so we don't match all nodes with 'simple-pm-bus'. +select: + properties: + compatible: + contains: + enum: + - fsl,imx8qxp-display-pixel-link-msi-bus + - fsl,imx8qm-display-pixel-link-msi-bus + required: + - compatible + +properties: + compatible: + items: + - enum: + - fsl,imx8qxp-display-pixel-link-msi-bus + - fsl,imx8qm-display-pixel-link-msi-bus + - const: simple-pm-bus + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: master gated clock from system + - description: AHB clock + + clock-names: + items: + - const: msi + - const: ahb + +patternProperties: + "^.*@[0-9a-f]+$": + description: Devices attached to the bus + type: object + properties: + reg: + maxItems: 1 + + required: + - reg + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + +unevaluatedProperties: false + +examples: + - | + #include + #include + bus@56200000 { + compatible = "fsl,imx8qxp-display-pixel-link-msi-bus", "simple-pm-bus"; + reg = <0x56200000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&dc0_irqsteer>; + interrupts = <320>; + ranges; + clocks = <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>, + <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>; + clock-names = "msi", "ahb"; + power-domains = <&pd IMX_SC_R_DC_0>; + + syscon@56221000 { + compatible = "fsl,imx8qxp-mipi-lvds-csr", "syscon", "simple-mfd"; + reg = <0x56221000 0x1000>; + clocks = <&mipi_lvds_0_di_mipi_lvds_regs_lpcg IMX_LPCG_CLK_4>; + clock-names = "ipg"; + + pxl2dpi { + compatible = "fsl,imx8qxp-pxl2dpi"; + fsl,sc-resource = ; + power-domains = <&pd IMX_SC_R_MIPI_0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + mipi_lvds_0_pxl2dpi_dc0_pixel_link0: endpoint@0 { + reg = <0>; + remote-endpoint = <&dc0_pixel_link0_mipi_lvds_0_pxl2dpi>; + }; + + mipi_lvds_0_pxl2dpi_dc0_pixel_link1: endpoint@1 { + reg = <1>; + remote-endpoint = <&dc0_pixel_link1_mipi_lvds_0_pxl2dpi>; + }; + }; + + port@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + + mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0: endpoint@0 { + reg = <0>; + remote-endpoint = <&mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi>; + }; + + mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1: endpoint@1 { + reg = <1>; + remote-endpoint = <&mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi>; + }; + }; + }; + }; + + ldb { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx8qxp-ldb"; + clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_MISC2>, + <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_BYPASS>; + clock-names = "pixel", "bypass"; + power-domains = <&pd IMX_SC_R_LVDS_0>; + + channel@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + phys = <&mipi_lvds_0_phy>; + phy-names = "lvds_phy"; + + port@0 { + reg = <0>; + + mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi: endpoint { + remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0>; + }; + }; + + port@1 { + reg = <1>; + + /* ... */ + }; + }; + + channel@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + phys = <&mipi_lvds_0_phy>; + phy-names = "lvds_phy"; + + port@0 { + reg = <0>; + + mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi: endpoint { + remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1>; + }; + }; + + port@1 { + reg = <1>; + + /* ... */ + }; + }; + }; + }; + + clock-controller@56223004 { + compatible = "fsl,imx8qxp-lpcg"; + reg = <0x56223004 0x4>; + #clock-cells = <1>; + clocks = <&mipi_lvds_0_ipg_clk>; + clock-indices = ; + clock-output-names = "mipi_lvds_0_di_mipi_lvds_regs_lpcg_ipg_clk"; + power-domains = <&pd IMX_SC_R_MIPI_0>; + }; + + phy@56228300 { + compatible = "fsl,imx8qxp-mipi-dphy"; + reg = <0x56228300 0x100>; + clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_PHY>; + clock-names = "phy_ref"; + #phy-cells = <0>; + fsl,syscon = <&mipi_lvds_0_csr>; + power-domains = <&pd IMX_SC_R_MIPI_0>; + }; + }; From 4a4a4e9ebaa3ce903a3cdf8bb173eeaf87828cea Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Mon, 31 Oct 2022 09:44:41 +0700 Subject: [PATCH 1301/4122] misc: smpro-errmon: Add Ampere's SMpro error monitor driver Add Ampere's SMpro error monitor driver for monitoring and reporting RAS-related errors as reported by SMpro co-processor found on Ampere's Altra processor family. Signed-off-by: Quan Nguyen Link: https://lore.kernel.org/r/20221031024442.2490881-3-quan@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- .../sysfs-bus-platform-devices-ampere-smpro | 264 +++++++++ drivers/misc/Kconfig | 12 + drivers/misc/Makefile | 1 + drivers/misc/smpro-errmon.c | 529 ++++++++++++++++++ 4 files changed, 806 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro create mode 100644 drivers/misc/smpro-errmon.c diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro new file mode 100644 index 000000000000..2b84dc8c3149 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro @@ -0,0 +1,264 @@ +What: /sys/bus/platform/devices/smpro-errmon.*/error_[core|mem|pcie|other]_[ce|ue] +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RO) Contains the 48-byte Ampere (Vendor-Specific) Error Record printed + in hex format according to the table below: + + +--------+---------------+-------------+------------------------------------------------------------+ + | Offset | Field | Size (byte) | Description | + +--------+---------------+-------------+------------------------------------------------------------+ + | 00 | Error Type | 1 | See :ref:`the table below ` for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 01 | Subtype | 1 | See :ref:`the table below ` for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 02 | Instance | 2 | See :ref:`the table below ` for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 04 | Error status | 4 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 08 | Error Address | 8 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 16 | Error Misc 0 | 8 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 24 | Error Misc 1 | 8 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 32 | Error Misc 2 | 8 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + | 40 | Error Misc 3 | 8 | See ARM RAS specification for details | + +--------+---------------+-------------+------------------------------------------------------------+ + + The table below defines the value of error types, their subtype, subcomponent and instance: + + .. _smpro-error-types: + + +-----------------+------------+----------+----------------+----------------------------------------+ + | Error Group | Error Type | Sub type | Sub component | Instance | + +-----------------+------------+----------+----------------+----------------------------------------+ + | CPM (core) | 0 | 0 | Snoop-Logic | CPM # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | CPM (core) | 0 | 2 | Armv8 Core 1 | CPM # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 1 | ERR1 | MCU # \| SLOT << 11 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 2 | ERR2 | MCU # \| SLOT << 11 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 3 | ERR3 | MCU # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 4 | ERR4 | MCU # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 5 | ERR5 | MCU # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 6 | ERR6 | MCU # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | MCU (mem) | 1 | 7 | Link Error | MCU # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | Mesh (other) | 2 | 0 | Cross Point | X \| (Y << 5) \| NS <<11 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | Mesh (other) | 2 | 1 | Home Node(IO) | X \| (Y << 5) \| NS <<11 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | Mesh (other) | 2 | 2 | Home Node(Mem) | X \| (Y << 5) \| NS <<11 \| device<<12 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | Mesh (other) | 2 | 4 | CCIX Node | X \| (Y << 5) \| NS <<11 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | 2P Link (other) | 3 | 0 | N/A | Altra 2P Link # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 0 | ERR0 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 1 | ERR1 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 2 | ERR2 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 3 | ERR3 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 4 | ERR4 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 5 | ERR5 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 6 | ERR6 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 7 | ERR7 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 8 | ERR8 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 9 | ERR9 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 10 | ERR10 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 11 | ERR11 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 12 | ERR12 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | GIC (other) | 5 | 13-21 | ERR13 | RC # + 1 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TCU | 100 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU0 | 0 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU1 | 1 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU2 | 2 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU3 | 3 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU4 | 4 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU5 | 5 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU6 | 6 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU7 | 7 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU8 | 8 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMMU (other) | 6 | TBU9 | 9 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PCIe AER (pcie) | 7 | Root | 0 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PCIe AER (pcie) | 7 | Device | 1 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PCIe RC (pcie) | 8 | RCA HB | 0 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PCIe RC (pcie) | 8 | RCB HB | 1 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PCIe RC (pcie) | 8 | RASDP | 8 | RC # | + +-----------------+------------+----------+----------------+----------------------------------------+ + | OCM (other) | 9 | ERR0 | 0 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | OCM (other) | 9 | ERR1 | 1 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | OCM (other) | 9 | ERR2 | 2 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMpro (other) | 10 | ERR0 | 0 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMpro (other) | 10 | ERR1 | 1 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | SMpro (other) | 10 | MPA_ERR | 2 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PMpro (other) | 11 | ERR0 | 0 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PMpro (other) | 11 | ERR1 | 1 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + | PMpro (other) | 11 | MPA_ERR | 2 | 0 | + +-----------------+------------+----------+----------------+----------------------------------------+ + + Example:: + + # cat error_other_ue + 880807001e004010401040101500000001004010401040100c0000000000000000000000000000000000000000000000 + + The detail of each sysfs entries is as below: + + +-------------+---------------------------------------------------------+----------------------------------+ + | Error | Sysfs entry | Description (when triggered) | + +-------------+---------------------------------------------------------+----------------------------------+ + | Core's CE | /sys/bus/platform/devices/smpro-errmon.*/error_core_ce | Core has CE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | Core's UE | /sys/bus/platform/devices/smpro-errmon.*/error_core_ue | Core has UE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ce | Memory has CE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ue | Memory has UE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | PCIe's CE | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ce | any PCIe controller has CE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | PCIe's UE | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ue | any PCIe controller has UE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | Other's CE | /sys/bus/platform/devices/smpro-errmon.*/error_other_ce | any other CE error | + +-------------+---------------------------------------------------------+----------------------------------+ + | Other's UE | /sys/bus/platform/devices/smpro-errmon.*/error_other_ue | any other UE error | + +-------------+---------------------------------------------------------+----------------------------------+ + + UE: Uncorrect-able Error + CE: Correct-able Error + + For details, see section `3.3 Ampere (Vendor-Specific) Error Record Formats, + Altra Family RAS Supplement`. + + +What: /sys/bus/platform/devices/smpro-errmon.*/overflow_[core|mem|pcie|other]_[ce|ue] +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RO) Return the overflow status of each type HW error reported: + + - 0 : No overflow + - 1 : There is an overflow and the oldest HW errors are dropped + + The detail of each sysfs entries is as below: + + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Overflow | Sysfs entry | Description | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Core's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ce | Core CE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Core's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ue | Core UE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ce | Memory CE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ue | Memory UE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | PCIe's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ce | any PCIe controller CE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | PCIe's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ue | any PCIe controller UE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Other's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ce| any other CE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + | Other's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ue| other UE error overflow | + +-------------+-----------------------------------------------------------+---------------------------------------+ + + where: + + - UE: Uncorrect-able Error + - CE: Correct-able Error + +What: /sys/bus/platform/devices/smpro-errmon.*/[error|warn]_[smpro|pmpro] +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RO) Contains the internal firmware error/warning printed as hex format. + + The detail of each sysfs entries is as below: + + +---------------+------------------------------------------------------+--------------------------+ + | Error | Sysfs entry | Description | + +---------------+------------------------------------------------------+--------------------------+ + | SMpro error | /sys/bus/platform/devices/smpro-errmon.*/error_smpro | system has SMpro error | + +---------------+------------------------------------------------------+--------------------------+ + | SMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_smpro | system has SMpro warning | + +---------------+------------------------------------------------------+--------------------------+ + | PMpro error | /sys/bus/platform/devices/smpro-errmon.*/error_pmpro | system has PMpro error | + +---------------+------------------------------------------------------+--------------------------+ + | PMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_pmpro | system has PMpro warning | + +---------------+------------------------------------------------------+--------------------------+ + + For details, see section `5.10 RAS Internal Error Register Definitions, + Altra Family Soc BMC Interface Specification`. + +What: /sys/bus/platform/devices/smpro-errmon.*/event_[vrd_warn_fault|vrd_hot|dimm_hot] +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RO) Contains the detail information in case of VRD/DIMM warning/hot events + in hex format as below:: + + AAAA + + where: + + - ``AAAA``: The event detail information data + + The detail of each sysfs entries is as below: + + +---------------+---------------------------------------------------------------+---------------------+ + | Event | Sysfs entry | Description | + +---------------+---------------------------------------------------------------+---------------------+ + | VRD HOT | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_hot | VRD Hot | + +---------------+---------------------------------------------------------------+---------------------+ + | VR Warn/Fault | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_warn_fault | VR Warning or Fault | + +---------------+---------------------------------------------------------------+---------------------+ + | DIMM HOT | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_hot | DIMM Hot | + +---------------+---------------------------------------------------------------+---------------------+ + + For more details, see section `5.7 GPI Status Registers, + Altra Family Soc BMC Interface Specification`. + diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 358ad56f6524..b9ceee949dab 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -176,6 +176,18 @@ config SGI_XP this feature will allow for direct communication between SSIs based on a network adapter and DMA messaging. +config SMPRO_ERRMON + tristate "Ampere Computing SMPro error monitor driver" + depends on MFD_SMPRO || COMPILE_TEST + help + Say Y here to get support for the SMpro error monitor function + provided by Ampere Computing's Altra and Altra Max SoCs. Upon + loading, the driver creates sysfs files which can be use to gather + multiple HW error data reported via read and write system calls. + + To compile this driver as a module, say M here. The driver will be + called smpro-errmon. + config CS5535_MFGPT tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" depends on MFD_CS5535 diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index ac9b3e757ba1..bbe24d4511a3 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ obj-$(CONFIG_SGI_GRU) += sgi-gru/ +obj-$(CONFIG_SMPRO_ERRMON) += smpro-errmon.o obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o obj-$(CONFIG_GEHC_ACHC) += gehc-achc.o obj-$(CONFIG_HP_ILO) += hpilo.o diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c new file mode 100644 index 000000000000..d1431d419aa4 --- /dev/null +++ b/drivers/misc/smpro-errmon.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Ampere Computing SoC's SMpro Error Monitoring Driver + * + * Copyright (c) 2022, Ampere Computing LLC + * + */ + +#include +#include +#include +#include +#include + +/* GPI RAS Error Registers */ +#define GPI_RAS_ERR 0x7E + +/* Core and L2C Error Registers */ +#define CORE_CE_ERR_CNT 0x80 +#define CORE_CE_ERR_LEN 0x81 +#define CORE_CE_ERR_DATA 0x82 +#define CORE_UE_ERR_CNT 0x83 +#define CORE_UE_ERR_LEN 0x84 +#define CORE_UE_ERR_DATA 0x85 + +/* Memory Error Registers */ +#define MEM_CE_ERR_CNT 0x90 +#define MEM_CE_ERR_LEN 0x91 +#define MEM_CE_ERR_DATA 0x92 +#define MEM_UE_ERR_CNT 0x93 +#define MEM_UE_ERR_LEN 0x94 +#define MEM_UE_ERR_DATA 0x95 + +/* RAS Error/Warning Registers */ +#define ERR_SMPRO_TYPE 0xA0 +#define ERR_PMPRO_TYPE 0xA1 +#define ERR_SMPRO_INFO_LO 0xA2 +#define ERR_SMPRO_INFO_HI 0xA3 +#define ERR_SMPRO_DATA_LO 0xA4 +#define ERR_SMPRO_DATA_HI 0xA5 +#define WARN_SMPRO_INFO_LO 0xAA +#define WARN_SMPRO_INFO_HI 0xAB +#define ERR_PMPRO_INFO_LO 0xA6 +#define ERR_PMPRO_INFO_HI 0xA7 +#define ERR_PMPRO_DATA_LO 0xA8 +#define ERR_PMPRO_DATA_HI 0xA9 +#define WARN_PMPRO_INFO_LO 0xAC +#define WARN_PMPRO_INFO_HI 0xAD + +/* PCIE Error Registers */ +#define PCIE_CE_ERR_CNT 0xC0 +#define PCIE_CE_ERR_LEN 0xC1 +#define PCIE_CE_ERR_DATA 0xC2 +#define PCIE_UE_ERR_CNT 0xC3 +#define PCIE_UE_ERR_LEN 0xC4 +#define PCIE_UE_ERR_DATA 0xC5 + +/* Other Error Registers */ +#define OTHER_CE_ERR_CNT 0xD0 +#define OTHER_CE_ERR_LEN 0xD1 +#define OTHER_CE_ERR_DATA 0xD2 +#define OTHER_UE_ERR_CNT 0xD8 +#define OTHER_UE_ERR_LEN 0xD9 +#define OTHER_UE_ERR_DATA 0xDA + +/* Event Data Registers */ +#define VRD_WARN_FAULT_EVENT_DATA 0x78 +#define VRD_HOT_EVENT_DATA 0x79 +#define DIMM_HOT_EVENT_DATA 0x7A + +#define MAX_READ_BLOCK_LENGTH 48 + +#define RAS_SMPRO_ERR 0 +#define RAS_PMPRO_ERR 1 + +enum RAS_48BYTES_ERR_TYPES { + CORE_CE_ERR, + CORE_UE_ERR, + MEM_CE_ERR, + MEM_UE_ERR, + PCIE_CE_ERR, + PCIE_UE_ERR, + OTHER_CE_ERR, + OTHER_UE_ERR, + NUM_48BYTES_ERR_TYPE, +}; + +struct smpro_error_hdr { + u8 count; /* Number of the RAS errors */ + u8 len; /* Number of data bytes */ + u8 data; /* Start of 48-byte data */ + u8 max_cnt; /* Max num of errors */ +}; + +/* + * Included Address of registers to get Count, Length of data and Data + * of the 48 bytes error data + */ +static struct smpro_error_hdr smpro_error_table[] = { + [CORE_CE_ERR] = { + .count = CORE_CE_ERR_CNT, + .len = CORE_CE_ERR_LEN, + .data = CORE_CE_ERR_DATA, + .max_cnt = 32 + }, + [CORE_UE_ERR] = { + .count = CORE_UE_ERR_CNT, + .len = CORE_UE_ERR_LEN, + .data = CORE_UE_ERR_DATA, + .max_cnt = 32 + }, + [MEM_CE_ERR] = { + .count = MEM_CE_ERR_CNT, + .len = MEM_CE_ERR_LEN, + .data = MEM_CE_ERR_DATA, + .max_cnt = 16 + }, + [MEM_UE_ERR] = { + .count = MEM_UE_ERR_CNT, + .len = MEM_UE_ERR_LEN, + .data = MEM_UE_ERR_DATA, + .max_cnt = 16 + }, + [PCIE_CE_ERR] = { + .count = PCIE_CE_ERR_CNT, + .len = PCIE_CE_ERR_LEN, + .data = PCIE_CE_ERR_DATA, + .max_cnt = 96 + }, + [PCIE_UE_ERR] = { + .count = PCIE_UE_ERR_CNT, + .len = PCIE_UE_ERR_LEN, + .data = PCIE_UE_ERR_DATA, + .max_cnt = 96 + }, + [OTHER_CE_ERR] = { + .count = OTHER_CE_ERR_CNT, + .len = OTHER_CE_ERR_LEN, + .data = OTHER_CE_ERR_DATA, + .max_cnt = 8 + }, + [OTHER_UE_ERR] = { + .count = OTHER_UE_ERR_CNT, + .len = OTHER_UE_ERR_LEN, + .data = OTHER_UE_ERR_DATA, + .max_cnt = 8 + }, +}; + +/* + * List of SCP registers which are used to get + * one type of RAS Internal errors. + */ +struct smpro_int_error_hdr { + u8 type; + u8 info_l; + u8 info_h; + u8 data_l; + u8 data_h; + u8 warn_l; + u8 warn_h; +}; + +static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = { + [RAS_SMPRO_ERR] = { + .type = ERR_SMPRO_TYPE, + .info_l = ERR_SMPRO_INFO_LO, + .info_h = ERR_SMPRO_INFO_HI, + .data_l = ERR_SMPRO_DATA_LO, + .data_h = ERR_SMPRO_DATA_HI, + .warn_l = WARN_SMPRO_INFO_LO, + .warn_h = WARN_SMPRO_INFO_HI, + }, + [RAS_PMPRO_ERR] = { + .type = ERR_PMPRO_TYPE, + .info_l = ERR_PMPRO_INFO_LO, + .info_h = ERR_PMPRO_INFO_HI, + .data_l = ERR_PMPRO_DATA_LO, + .data_h = ERR_PMPRO_DATA_HI, + .warn_l = WARN_PMPRO_INFO_LO, + .warn_h = WARN_PMPRO_INFO_HI, + }, +}; + +struct smpro_errmon { + struct regmap *regmap; +}; + +enum EVENT_TYPES { + VRD_WARN_FAULT_EVENT, + VRD_HOT_EVENT, + DIMM_HOT_EVENT, + NUM_EVENTS_TYPE, +}; + +/* Included Address of event source and data registers */ +static u8 smpro_event_table[NUM_EVENTS_TYPE] = { + VRD_WARN_FAULT_EVENT_DATA, + VRD_HOT_EVENT_DATA, + DIMM_HOT_EVENT_DATA, +}; + +static ssize_t smpro_event_data_read(struct device *dev, + struct device_attribute *da, char *buf, + int channel) +{ + struct smpro_errmon *errmon = dev_get_drvdata(dev); + s32 event_data; + int ret; + + ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data); + if (ret) + return ret; + /* Clear event after read */ + if (event_data != 0) + regmap_write(errmon->regmap, smpro_event_table[channel], event_data); + + return sysfs_emit(buf, "%04x\n", event_data); +} + +static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da, + char *buf, int channel) +{ + struct smpro_errmon *errmon = dev_get_drvdata(dev); + struct smpro_error_hdr *err_info; + s32 err_count; + int ret; + + err_info = &smpro_error_table[channel]; + + ret = regmap_read(errmon->regmap, err_info->count, &err_count); + if (ret) + return ret; + + /* Bit 8 indicates the overflow status */ + return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0); +} + +static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da, + char *buf, int channel) +{ + struct smpro_errmon *errmon = dev_get_drvdata(dev); + unsigned char err_data[MAX_READ_BLOCK_LENGTH]; + struct smpro_error_hdr *err_info; + s32 err_count, err_length; + int ret; + + err_info = &smpro_error_table[channel]; + + ret = regmap_read(errmon->regmap, err_info->count, &err_count); + /* Error count is the low byte */ + err_count &= 0xff; + if (ret || !err_count || err_count > err_info->max_cnt) + return ret; + + ret = regmap_read(errmon->regmap, err_info->len, &err_length); + if (ret || err_length <= 0) + return ret; + + if (err_length > MAX_READ_BLOCK_LENGTH) + err_length = MAX_READ_BLOCK_LENGTH; + + memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH); + ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length); + if (ret < 0) + return ret; + + /* clear the error */ + ret = regmap_write(errmon->regmap, err_info->count, 0x100); + if (ret) + return ret; + /* + * The output of Core/Memory/PCIe/Others UE/CE errors follows the format + * specified in section 5.8.1 CE/UE Error Data record in + * Altra SOC BMC Interface specification. + */ + return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data); +} + +/* + * Output format: + * <4-byte hex value of error info><4-byte hex value of error extensive data> + * Where: + * + error info : The error information + * + error data : Extensive data (32 bits) + * Reference to section 5.10 RAS Internal Error Register Definition in + * Altra SOC BMC Interface specification + */ +static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da, + char *buf, int channel) +{ + struct smpro_errmon *errmon = dev_get_drvdata(dev); + struct smpro_int_error_hdr *err_info; + unsigned int err[4] = { 0 }; + unsigned int err_type; + unsigned int val; + int ret; + + /* read error status */ + ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); + if (ret) + return ret; + + if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || + (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) + return 0; + + err_info = &list_smpro_int_error_hdr[channel]; + ret = regmap_read(errmon->regmap, err_info->type, &val); + if (ret) + return ret; + + err_type = (val & BIT(1)) ? BIT(1) : + (val & BIT(2)) ? BIT(2) : 0; + + if (!err_type) + return 0; + + ret = regmap_read(errmon->regmap, err_info->info_l, err + 1); + if (ret) + return ret; + + ret = regmap_read(errmon->regmap, err_info->info_h, err); + if (ret) + return ret; + + if (err_type & BIT(2)) { + /* Error with data type */ + ret = regmap_read(errmon->regmap, err_info->data_l, err + 3); + if (ret) + return ret; + + ret = regmap_read(errmon->regmap, err_info->data_h, err + 2); + if (ret) + return ret; + } + + /* clear the read errors */ + ret = regmap_write(errmon->regmap, err_info->type, err_type); + if (ret) + return ret; + + return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err); +} + +/* + * Output format: + * <4-byte hex value of warining info> + * Reference to section 5.10 RAS Internal Error Register Definition in + * Altra SOC BMC Interface specification + */ +static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da, + char *buf, int channel) +{ + struct smpro_errmon *errmon = dev_get_drvdata(dev); + struct smpro_int_error_hdr *err_info; + unsigned int warn[2] = { 0 }; + unsigned int val; + int ret; + + /* read error status */ + ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); + if (ret) + return ret; + + if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || + (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) + return 0; + + err_info = &list_smpro_int_error_hdr[channel]; + ret = regmap_read(errmon->regmap, err_info->type, &val); + if (ret) + return ret; + + if (!(val & BIT(0))) + return 0; + + ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1); + if (ret) + return ret; + + ret = regmap_read(errmon->regmap, err_info->warn_h, warn); + if (ret) + return ret; + + /* clear the warning */ + ret = regmap_write(errmon->regmap, err_info->type, BIT(0)); + if (ret) + return ret; + + return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn); +} + +#define ERROR_OVERFLOW_RO(_error, _index) \ + static ssize_t overflow_##_error##_show(struct device *dev, \ + struct device_attribute *da, \ + char *buf) \ + { \ + return smpro_overflow_data_read(dev, da, buf, _index); \ + } \ + static DEVICE_ATTR_RO(overflow_##_error) + +ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR); +ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR); +ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR); +ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR); +ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR); +ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR); +ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR); +ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR); + +#define ERROR_RO(_error, _index) \ + static ssize_t error_##_error##_show(struct device *dev, \ + struct device_attribute *da, \ + char *buf) \ + { \ + return smpro_error_data_read(dev, da, buf, _index); \ + } \ + static DEVICE_ATTR_RO(error_##_error) + +ERROR_RO(core_ce, CORE_CE_ERR); +ERROR_RO(core_ue, CORE_UE_ERR); +ERROR_RO(mem_ce, MEM_CE_ERR); +ERROR_RO(mem_ue, MEM_UE_ERR); +ERROR_RO(pcie_ce, PCIE_CE_ERR); +ERROR_RO(pcie_ue, PCIE_UE_ERR); +ERROR_RO(other_ce, OTHER_CE_ERR); +ERROR_RO(other_ue, OTHER_UE_ERR); + +static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf) +{ + return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR); +} +static DEVICE_ATTR_RO(error_smpro); + +static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) +{ + return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR); +} +static DEVICE_ATTR_RO(error_pmpro); + +static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf) +{ + return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR); +} +static DEVICE_ATTR_RO(warn_smpro); + +static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) +{ + return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR); +} +static DEVICE_ATTR_RO(warn_pmpro); + +#define EVENT_RO(_event, _index) \ + static ssize_t event_##_event##_show(struct device *dev, \ + struct device_attribute *da, \ + char *buf) \ + { \ + return smpro_event_data_read(dev, da, buf, _index); \ + } \ + static DEVICE_ATTR_RO(event_##_event) + +EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT); +EVENT_RO(vrd_hot, VRD_HOT_EVENT); +EVENT_RO(dimm_hot, DIMM_HOT_EVENT); + +static struct attribute *smpro_errmon_attrs[] = { + &dev_attr_overflow_core_ce.attr, + &dev_attr_overflow_core_ue.attr, + &dev_attr_overflow_mem_ce.attr, + &dev_attr_overflow_mem_ue.attr, + &dev_attr_overflow_pcie_ce.attr, + &dev_attr_overflow_pcie_ue.attr, + &dev_attr_overflow_other_ce.attr, + &dev_attr_overflow_other_ue.attr, + &dev_attr_error_core_ce.attr, + &dev_attr_error_core_ue.attr, + &dev_attr_error_mem_ce.attr, + &dev_attr_error_mem_ue.attr, + &dev_attr_error_pcie_ce.attr, + &dev_attr_error_pcie_ue.attr, + &dev_attr_error_other_ce.attr, + &dev_attr_error_other_ue.attr, + &dev_attr_error_smpro.attr, + &dev_attr_error_pmpro.attr, + &dev_attr_warn_smpro.attr, + &dev_attr_warn_pmpro.attr, + &dev_attr_event_vrd_warn_fault.attr, + &dev_attr_event_vrd_hot.attr, + &dev_attr_event_dimm_hot.attr, + NULL +}; + +ATTRIBUTE_GROUPS(smpro_errmon); + +static int smpro_errmon_probe(struct platform_device *pdev) +{ + struct smpro_errmon *errmon; + + errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL); + if (!errmon) + return -ENOMEM; + + platform_set_drvdata(pdev, errmon); + + errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!errmon->regmap) + return -ENODEV; + + return 0; +} + +static struct platform_driver smpro_errmon_driver = { + .probe = smpro_errmon_probe, + .driver = { + .name = "smpro-errmon", + .dev_groups = smpro_errmon_groups, + }, +}; + +module_platform_driver(smpro_errmon_driver); + +MODULE_AUTHOR("Tung Nguyen "); +MODULE_AUTHOR("Thinh Pham "); +MODULE_AUTHOR("Hoang Nguyen "); +MODULE_AUTHOR("Thu Nguyen "); +MODULE_AUTHOR("Quan Nguyen "); +MODULE_DESCRIPTION("Ampere Altra SMpro driver"); +MODULE_LICENSE("GPL"); From 763dc90e9a4332f82ad43c866c6878742b15d4ab Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Mon, 31 Oct 2022 09:44:42 +0700 Subject: [PATCH 1302/4122] misc: smpro-misc: Add Ampere's Altra SMpro misc driver Add driver support for accessing various information reported by Ampere's SMpro co-processor such as Boot Progress and other miscellaneous data. Signed-off-by: Quan Nguyen Link: https://lore.kernel.org/r/20221031024442.2490881-4-quan@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- .../sysfs-bus-platform-devices-ampere-smpro | 48 ++++++ drivers/misc/Kconfig | 10 ++ drivers/misc/Makefile | 1 + drivers/misc/smpro-misc.c | 145 ++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 drivers/misc/smpro-misc.c diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro index 2b84dc8c3149..ca93c215ef99 100644 --- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro @@ -262,3 +262,51 @@ Description: For more details, see section `5.7 GPI Status Registers, Altra Family Soc BMC Interface Specification`. +What: /sys/bus/platform/devices/smpro-misc.*/boot_progress +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RO) Contains the boot stages information in hex as format below:: + + AABBCCCCCCCC + + where: + + - ``AA`` : The boot stages + + - 00: SMpro firmware booting + - 01: PMpro firmware booting + - 02: ATF BL1 firmware booting + - 03: DDR initialization + - 04: DDR training report status + - 05: ATF BL2 firmware booting + - 06: ATF BL31 firmware booting + - 07: ATF BL32 firmware booting + - 08: UEFI firmware booting + - 09: OS booting + + - ``BB`` : Boot status + + - 00: Not started + - 01: Started + - 02: Completed without error + - 03: Failed. + + - ``CCCCCCCC``: Boot status information defined for each boot stages + + For details, see section `5.11 Boot Stage Register Definitions` + and section `6. Processor Boot Progress Codes, Altra Family Soc BMC + Interface Specification`. + + +What: /sys/bus/platform/devices/smpro-misc*/soc_power_limit +KernelVersion: 6.1 +Contact: Quan Nguyen +Description: + (RW) Contains the desired SoC power limit in Watt. + Writes to this sysfs set the desired SoC power limit (W). + Reads from this register return the current SoC power limit (W). + The value ranges: + + - Minimum: 120 W + - Maximum: Socket TDP power diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index b9ceee949dab..9947b7892bd5 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -188,6 +188,16 @@ config SMPRO_ERRMON To compile this driver as a module, say M here. The driver will be called smpro-errmon. +config SMPRO_MISC + tristate "Ampere Computing SMPro miscellaneous driver" + depends on MFD_SMPRO || COMPILE_TEST + help + Say Y here to get support for the SMpro error miscellalenous function + provided by Ampere Computing's Altra and Altra Max SoCs. + + To compile this driver as a module, say M here. The driver will be + called smpro-misc. + config CS5535_MFGPT tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" depends on MFD_CS5535 diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index bbe24d4511a3..87b54a4a4422 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ obj-$(CONFIG_SGI_GRU) += sgi-gru/ obj-$(CONFIG_SMPRO_ERRMON) += smpro-errmon.o +obj-$(CONFIG_SMPRO_MISC) += smpro-misc.o obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o obj-$(CONFIG_GEHC_ACHC) += gehc-achc.o obj-$(CONFIG_HP_ILO) += hpilo.o diff --git a/drivers/misc/smpro-misc.c b/drivers/misc/smpro-misc.c new file mode 100644 index 000000000000..6c427141e51b --- /dev/null +++ b/drivers/misc/smpro-misc.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Ampere Computing SoC's SMpro Misc Driver + * + * Copyright (c) 2022, Ampere Computing LLC + */ +#include +#include +#include +#include + +/* Boot Stage/Progress Registers */ +#define BOOTSTAGE 0xB0 +#define BOOTSTAGE_LO 0xB1 +#define CUR_BOOTSTAGE 0xB2 +#define BOOTSTAGE_HI 0xB3 + +/* SOC State Registers */ +#define SOC_POWER_LIMIT 0xE5 + +struct smpro_misc { + struct regmap *regmap; +}; + +static ssize_t boot_progress_show(struct device *dev, struct device_attribute *da, char *buf) +{ + struct smpro_misc *misc = dev_get_drvdata(dev); + u16 boot_progress[3] = { 0 }; + u32 bootstage; + u8 boot_stage; + u8 cur_stage; + u32 reg_lo; + u32 reg; + int ret; + + /* Read current boot stage */ + ret = regmap_read(misc->regmap, CUR_BOOTSTAGE, ®); + if (ret) + return ret; + + cur_stage = reg & 0xff; + + ret = regmap_read(misc->regmap, BOOTSTAGE, &bootstage); + if (ret) + return ret; + + boot_stage = (bootstage >> 8) & 0xff; + + if (boot_stage > cur_stage) + return -EINVAL; + + ret = regmap_read(misc->regmap, BOOTSTAGE_LO, ®_lo); + if (!ret) + ret = regmap_read(misc->regmap, BOOTSTAGE_HI, ®); + if (ret) + return ret; + + /* Firmware to report new boot stage next time */ + if (boot_stage < cur_stage) { + ret = regmap_write(misc->regmap, BOOTSTAGE, ((bootstage & 0xff00) | 0x1)); + if (ret) + return ret; + } + + boot_progress[0] = bootstage; + boot_progress[1] = swab16(reg); + boot_progress[2] = swab16(reg_lo); + + return sysfs_emit(buf, "%*phN\n", (int)sizeof(boot_progress), boot_progress); +} + +static DEVICE_ATTR_RO(boot_progress); + +static ssize_t soc_power_limit_show(struct device *dev, struct device_attribute *da, char *buf) +{ + struct smpro_misc *misc = dev_get_drvdata(dev); + unsigned int value; + int ret; + + ret = regmap_read(misc->regmap, SOC_POWER_LIMIT, &value); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", value); +} + +static ssize_t soc_power_limit_store(struct device *dev, struct device_attribute *da, + const char *buf, size_t count) +{ + struct smpro_misc *misc = dev_get_drvdata(dev); + unsigned long val; + s32 ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + ret = regmap_write(misc->regmap, SOC_POWER_LIMIT, (unsigned int)val); + if (ret) + return -EPROTO; + + return count; +} + +static DEVICE_ATTR_RW(soc_power_limit); + +static struct attribute *smpro_misc_attrs[] = { + &dev_attr_boot_progress.attr, + &dev_attr_soc_power_limit.attr, + NULL +}; + +ATTRIBUTE_GROUPS(smpro_misc); + +static int smpro_misc_probe(struct platform_device *pdev) +{ + struct smpro_misc *misc; + + misc = devm_kzalloc(&pdev->dev, sizeof(struct smpro_misc), GFP_KERNEL); + if (!misc) + return -ENOMEM; + + platform_set_drvdata(pdev, misc); + + misc->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!misc->regmap) + return -ENODEV; + + return 0; +} + +static struct platform_driver smpro_misc_driver = { + .probe = smpro_misc_probe, + .driver = { + .name = "smpro-misc", + .dev_groups = smpro_misc_groups, + }, +}; + +module_platform_driver(smpro_misc_driver); + +MODULE_AUTHOR("Tung Nguyen "); +MODULE_AUTHOR("Quan Nguyen "); +MODULE_DESCRIPTION("Ampere Altra SMpro Misc driver"); +MODULE_LICENSE("GPL"); From 0d4a030b3db1b53498f1c1ae243677fa0dd2e7ce Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:13:59 +0100 Subject: [PATCH 1303/4122] greybus: svc: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Reviewed-by: Johan Hovold Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/ebf1e6988a53a455990230a37cf759ee542ea7ec.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/svc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c index 56d2b44d6fef..16cced80867a 100644 --- a/drivers/greybus/svc.c +++ b/drivers/greybus/svc.c @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -83,7 +84,7 @@ static ssize_t watchdog_store(struct device *dev, int retval; bool user_request; - retval = strtobool(buf, &user_request); + retval = kstrtobool(buf, &user_request); if (retval) return retval; From d3d76fbde1c456a6d19991baa99ea8c2d6e6696f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 3 Nov 2022 16:24:07 +0100 Subject: [PATCH 1304/4122] kernel/ksysfs.c: export kernel cpu byteorder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain files in procfs are formatted in byteorder-dependent formats. For example the IP addresses in /proc/net/udp. When using emulation like qemu-user, applications are not guaranteed to be using the same byteorder as the kernel. Therefore the kernel needs to provide a way for applications to discover the byteorder used in API-filesystems. Using systemcalls is not enough because these are intercepted and translated by the emulation. Also this makes it easier for non-compiled applications like shellscripts to discover the byteorder. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221103152407.3348-1-linux@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-kernel-cpu_byteorder | 12 ++++++++++++ kernel/ksysfs.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-cpu_byteorder diff --git a/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder new file mode 100644 index 000000000000..f0e6ac1b5356 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder @@ -0,0 +1,12 @@ +What: /sys/kernel/cpu_byteorder +Date: February 2023 +KernelVersion: 6.2 +Contact: Thomas Weißschuh +Description: + The endianness of the running kernel. + + Access: Read + + Valid values: + "little", "big" +Users: util-linux diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 65dba9076f31..2df00b789b90 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -6,6 +6,7 @@ * Copyright (C) 2004 Kay Sievers */ +#include #include #include #include @@ -20,6 +21,14 @@ #include /* rcu_expedited and rcu_normal */ +#if defined(__LITTLE_ENDIAN) +#define CPU_BYTEORDER_STRING "little" +#elif defined(__BIG_ENDIAN) +#define CPU_BYTEORDER_STRING "big" +#else +#error Unknown byteorder +#endif + #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -34,6 +43,14 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj, } KERNEL_ATTR_RO(uevent_seqnum); +/* cpu byteorder */ +static ssize_t cpu_byteorder_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", CPU_BYTEORDER_STRING); +} +KERNEL_ATTR_RO(cpu_byteorder); + #ifdef CONFIG_UEVENT_HELPER /* uevent helper program, used during early boot */ static ssize_t uevent_helper_show(struct kobject *kobj, @@ -215,6 +232,7 @@ EXPORT_SYMBOL_GPL(kernel_kobj); static struct attribute * kernel_attrs[] = { &fscaps_attr.attr, &uevent_seqnum_attr.attr, + &cpu_byteorder_attr.attr, #ifdef CONFIG_UEVENT_HELPER &uevent_helper_attr.attr, #endif From 9b351be25360c5cedfb98b88d6dfd89327849e52 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sat, 22 Oct 2022 16:56:36 -0600 Subject: [PATCH 1305/4122] vmlinux.lds.h: add BOUNDED_SECTION* macros vmlinux.lds.h has ~45 occurrences of this general pattern: __start_foo = .; KEEP(*(foo)) __stop_foo = .; Reduce this pattern to a (group of 4) macros, and use them to reduce linecount. This was inspired by the codetag patchset. no functional change. CC: Suren Baghdasaryan CC: Kent Overstreet Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20221022225637.1406715-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 219 +++++++++++------------------- 1 file changed, 79 insertions(+), 140 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c15de165ec8f..9f6352171f88 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -193,100 +193,99 @@ # endif #endif +#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_) \ + _s_##_label_ = .; \ + KEEP(*(_sec_)) \ + _e_##_label_ = .; + +#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_) \ + _label_##_s_ = .; \ + KEEP(*(_sec_)) \ + _label_##_e_ = .; + +#define BOUNDED_SECTION_BY(_sec_, _label_) \ + BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop) + +#define BOUNDED_SECTION(_sec) BOUNDED_SECTION_BY(_sec, _sec) + #ifdef CONFIG_TRACE_BRANCH_PROFILING -#define LIKELY_PROFILE() __start_annotated_branch_profile = .; \ - KEEP(*(_ftrace_annotated_branch)) \ - __stop_annotated_branch_profile = .; +#define LIKELY_PROFILE() \ + BOUNDED_SECTION_BY(_ftrace_annotated_branch, _annotated_branch_profile) #else #define LIKELY_PROFILE() #endif #ifdef CONFIG_PROFILE_ALL_BRANCHES -#define BRANCH_PROFILE() __start_branch_profile = .; \ - KEEP(*(_ftrace_branch)) \ - __stop_branch_profile = .; +#define BRANCH_PROFILE() \ + BOUNDED_SECTION_BY(_ftrace_branch, _branch_profile) #else #define BRANCH_PROFILE() #endif #ifdef CONFIG_KPROBES -#define KPROBE_BLACKLIST() . = ALIGN(8); \ - __start_kprobe_blacklist = .; \ - KEEP(*(_kprobe_blacklist)) \ - __stop_kprobe_blacklist = .; +#define KPROBE_BLACKLIST() \ + . = ALIGN(8); \ + BOUNDED_SECTION(_kprobe_blacklist) #else #define KPROBE_BLACKLIST() #endif #ifdef CONFIG_FUNCTION_ERROR_INJECTION -#define ERROR_INJECT_WHITELIST() STRUCT_ALIGN(); \ - __start_error_injection_whitelist = .; \ - KEEP(*(_error_injection_whitelist)) \ - __stop_error_injection_whitelist = .; +#define ERROR_INJECT_WHITELIST() \ + STRUCT_ALIGN(); \ + BOUNDED_SECTION(_error_injection_whitelist) #else #define ERROR_INJECT_WHITELIST() #endif #ifdef CONFIG_EVENT_TRACING -#define FTRACE_EVENTS() . = ALIGN(8); \ - __start_ftrace_events = .; \ - KEEP(*(_ftrace_events)) \ - __stop_ftrace_events = .; \ - __start_ftrace_eval_maps = .; \ - KEEP(*(_ftrace_eval_map)) \ - __stop_ftrace_eval_maps = .; +#define FTRACE_EVENTS() \ + . = ALIGN(8); \ + BOUNDED_SECTION(_ftrace_events) \ + BOUNDED_SECTION_BY(_ftrace_eval_map, _ftrace_eval_maps) #else #define FTRACE_EVENTS() #endif #ifdef CONFIG_TRACING -#define TRACE_PRINTKS() __start___trace_bprintk_fmt = .; \ - KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \ - __stop___trace_bprintk_fmt = .; -#define TRACEPOINT_STR() __start___tracepoint_str = .; \ - KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \ - __stop___tracepoint_str = .; +#define TRACE_PRINTKS() BOUNDED_SECTION_BY(__trace_printk_fmt, ___trace_bprintk_fmt) +#define TRACEPOINT_STR() BOUNDED_SECTION_BY(__tracepoint_str, ___tracepoint_str) #else #define TRACE_PRINTKS() #define TRACEPOINT_STR() #endif #ifdef CONFIG_FTRACE_SYSCALLS -#define TRACE_SYSCALLS() . = ALIGN(8); \ - __start_syscalls_metadata = .; \ - KEEP(*(__syscalls_metadata)) \ - __stop_syscalls_metadata = .; +#define TRACE_SYSCALLS() \ + . = ALIGN(8); \ + BOUNDED_SECTION_BY(__syscalls_metadata, _syscalls_metadata) #else #define TRACE_SYSCALLS() #endif #ifdef CONFIG_BPF_EVENTS -#define BPF_RAW_TP() STRUCT_ALIGN(); \ - __start__bpf_raw_tp = .; \ - KEEP(*(__bpf_raw_tp_map)) \ - __stop__bpf_raw_tp = .; +#define BPF_RAW_TP() STRUCT_ALIGN(); \ + BOUNDED_SECTION_BY(__bpf_raw_tp_map, __bpf_raw_tp) #else #define BPF_RAW_TP() #endif #ifdef CONFIG_SERIAL_EARLYCON -#define EARLYCON_TABLE() . = ALIGN(8); \ - __earlycon_table = .; \ - KEEP(*(__earlycon_table)) \ - __earlycon_table_end = .; +#define EARLYCON_TABLE() \ + . = ALIGN(8); \ + BOUNDED_SECTION_POST_LABEL(__earlycon_table, __earlycon_table, , _end) #else #define EARLYCON_TABLE() #endif #ifdef CONFIG_SECURITY -#define LSM_TABLE() . = ALIGN(8); \ - __start_lsm_info = .; \ - KEEP(*(.lsm_info.init)) \ - __end_lsm_info = .; -#define EARLY_LSM_TABLE() . = ALIGN(8); \ - __start_early_lsm_info = .; \ - KEEP(*(.early_lsm_info.init)) \ - __end_early_lsm_info = .; +#define LSM_TABLE() \ + . = ALIGN(8); \ + BOUNDED_SECTION_PRE_LABEL(.lsm_info.init, _lsm_info, __start, __end) + +#define EARLY_LSM_TABLE() \ + . = ALIGN(8); \ + BOUNDED_SECTION_PRE_LABEL(.early_lsm_info.init, _early_lsm_info, __start, __end) #else #define LSM_TABLE() #define EARLY_LSM_TABLE() @@ -312,9 +311,8 @@ #ifdef CONFIG_ACPI #define ACPI_PROBE_TABLE(name) \ . = ALIGN(8); \ - __##name##_acpi_probe_table = .; \ - KEEP(*(__##name##_acpi_probe_table)) \ - __##name##_acpi_probe_table_end = .; + BOUNDED_SECTION_POST_LABEL(__##name##_acpi_probe_table, \ + __##name##_acpi_probe_table,, _end) #else #define ACPI_PROBE_TABLE(name) #endif @@ -322,9 +320,8 @@ #ifdef CONFIG_THERMAL #define THERMAL_TABLE(name) \ . = ALIGN(8); \ - __##name##_thermal_table = .; \ - KEEP(*(__##name##_thermal_table)) \ - __##name##_thermal_table_end = .; + BOUNDED_SECTION_POST_LABEL(__##name##_thermal_table, \ + __##name##_thermal_table,, _end) #else #define THERMAL_TABLE(name) #endif @@ -353,12 +350,8 @@ *(__tracepoints) \ /* implement dynamic printk debug */ \ . = ALIGN(8); \ - __start___dyndbg_classes = .; \ - KEEP(*(__dyndbg_classes)) \ - __stop___dyndbg_classes = .; \ - __start___dyndbg = .; \ - KEEP(*(__dyndbg)) \ - __stop___dyndbg = .; \ + BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \ + BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ @@ -401,19 +394,13 @@ #define JUMP_TABLE_DATA \ . = ALIGN(8); \ - __start___jump_table = .; \ - KEEP(*(__jump_table)) \ - __stop___jump_table = .; + BOUNDED_SECTION_BY(__jump_table, ___jump_table) #ifdef CONFIG_HAVE_STATIC_CALL_INLINE #define STATIC_CALL_DATA \ . = ALIGN(8); \ - __start_static_call_sites = .; \ - KEEP(*(.static_call_sites)) \ - __stop_static_call_sites = .; \ - __start_static_call_tramp_key = .; \ - KEEP(*(.static_call_tramp_key)) \ - __stop_static_call_tramp_key = .; + BOUNDED_SECTION_BY(.static_call_sites, _static_call_sites) \ + BOUNDED_SECTION_BY(.static_call_tramp_key, _static_call_tramp_key) #else #define STATIC_CALL_DATA #endif @@ -439,9 +426,7 @@ #ifdef CONFIG_ARCH_USES_CFI_TRAPS #define KCFI_TRAPS \ __kcfi_traps : AT(ADDR(__kcfi_traps) - LOAD_OFFSET) { \ - __start___kcfi_traps = .; \ - KEEP(*(.kcfi_traps)) \ - __stop___kcfi_traps = .; \ + BOUNDED_SECTION_BY(.kcfi_traps, ___kcfi_traps) \ } #else #define KCFI_TRAPS @@ -459,9 +444,7 @@ SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ . = ALIGN(8); \ - __start___tracepoints_ptrs = .; \ - KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \ - __stop___tracepoints_ptrs = .; \ + BOUNDED_SECTION_BY(__tracepoints_ptrs, ___tracepoints_ptrs) \ *(__tracepoints_strings)/* Tracepoints: strings */ \ } \ \ @@ -471,30 +454,14 @@ \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ - __start_pci_fixups_early = .; \ - KEEP(*(.pci_fixup_early)) \ - __end_pci_fixups_early = .; \ - __start_pci_fixups_header = .; \ - KEEP(*(.pci_fixup_header)) \ - __end_pci_fixups_header = .; \ - __start_pci_fixups_final = .; \ - KEEP(*(.pci_fixup_final)) \ - __end_pci_fixups_final = .; \ - __start_pci_fixups_enable = .; \ - KEEP(*(.pci_fixup_enable)) \ - __end_pci_fixups_enable = .; \ - __start_pci_fixups_resume = .; \ - KEEP(*(.pci_fixup_resume)) \ - __end_pci_fixups_resume = .; \ - __start_pci_fixups_resume_early = .; \ - KEEP(*(.pci_fixup_resume_early)) \ - __end_pci_fixups_resume_early = .; \ - __start_pci_fixups_suspend = .; \ - KEEP(*(.pci_fixup_suspend)) \ - __end_pci_fixups_suspend = .; \ - __start_pci_fixups_suspend_late = .; \ - KEEP(*(.pci_fixup_suspend_late)) \ - __end_pci_fixups_suspend_late = .; \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_early, _pci_fixups_early, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_header, _pci_fixups_header, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_final, _pci_fixups_final, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_enable, _pci_fixups_enable, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_resume, _pci_fixups_resume, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_suspend, _pci_fixups_suspend, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_resume_early, _pci_fixups_resume_early, __start, __end) \ + BOUNDED_SECTION_PRE_LABEL(.pci_fixup_suspend_late, _pci_fixups_suspend_late, __start, __end) \ } \ \ FW_LOADER_BUILT_IN_DATA \ @@ -544,16 +511,12 @@ \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ - __start___param = .; \ - KEEP(*(__param)) \ - __stop___param = .; \ + BOUNDED_SECTION_BY(__param, ___param) \ } \ \ /* Built-in module versions. */ \ __modver : AT(ADDR(__modver) - LOAD_OFFSET) { \ - __start___modver = .; \ - KEEP(*(__modver)) \ - __stop___modver = .; \ + BOUNDED_SECTION_BY(__modver, ___modver) \ } \ \ KCFI_TRAPS \ @@ -663,9 +626,7 @@ #define EXCEPTION_TABLE(align) \ . = ALIGN(align); \ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ - __start___ex_table = .; \ - KEEP(*(__ex_table)) \ - __stop___ex_table = .; \ + BOUNDED_SECTION_BY(__ex_table, ___ex_table) \ } /* @@ -674,9 +635,7 @@ #ifdef CONFIG_DEBUG_INFO_BTF #define BTF \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ - __start_BTF = .; \ - KEEP(*(.BTF)) \ - __stop_BTF = .; \ + BOUNDED_SECTION_BY(.BTF, _BTF) \ } \ . = ALIGN(4); \ .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ @@ -853,9 +812,7 @@ #define BUG_TABLE \ . = ALIGN(8); \ __bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \ - __start___bug_table = .; \ - KEEP(*(__bug_table)) \ - __stop___bug_table = .; \ + BOUNDED_SECTION_BY(__bug_table, ___bug_table) \ } #else #define BUG_TABLE @@ -865,15 +822,11 @@ #define ORC_UNWIND_TABLE \ . = ALIGN(4); \ .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ - __start_orc_unwind_ip = .; \ - KEEP(*(.orc_unwind_ip)) \ - __stop_orc_unwind_ip = .; \ + BOUNDED_SECTION_BY(.orc_unwind_ip, _orc_unwind_ip) \ } \ . = ALIGN(2); \ .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) { \ - __start_orc_unwind = .; \ - KEEP(*(.orc_unwind)) \ - __stop_orc_unwind = .; \ + BOUNDED_SECTION_BY(.orc_unwind, _orc_unwind) \ } \ text_size = _etext - _stext; \ . = ALIGN(4); \ @@ -891,9 +844,7 @@ #ifdef CONFIG_FW_LOADER #define FW_LOADER_BUILT_IN_DATA \ .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ - __start_builtin_fw = .; \ - KEEP(*(.builtin_fw)) \ - __end_builtin_fw = .; \ + BOUNDED_SECTION_PRE_LABEL(.builtin_fw, _builtin_fw, __start, __end) \ } #else #define FW_LOADER_BUILT_IN_DATA @@ -903,9 +854,7 @@ #define TRACEDATA \ . = ALIGN(4); \ .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \ - __tracedata_start = .; \ - KEEP(*(.tracedata)) \ - __tracedata_end = .; \ + BOUNDED_SECTION_POST_LABEL(.tracedata, __tracedata, _start, _end) \ } #else #define TRACEDATA @@ -914,9 +863,7 @@ #ifdef CONFIG_PRINTK_INDEX #define PRINTK_INDEX \ .printk_index : AT(ADDR(.printk_index) - LOAD_OFFSET) { \ - __start_printk_index = .; \ - *(.printk_index) \ - __stop_printk_index = .; \ + BOUNDED_SECTION_BY(.printk_index, _printk_index) \ } #else #define PRINTK_INDEX @@ -924,17 +871,13 @@ #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ - __start_notes = .; \ - KEEP(*(.note.*)) \ - __stop_notes = .; \ + BOUNDED_SECTION_BY(.note.*, _notes) \ } NOTES_HEADERS \ NOTES_HEADERS_RESTORE #define INIT_SETUP(initsetup_align) \ . = ALIGN(initsetup_align); \ - __setup_start = .; \ - KEEP(*(.init.setup)) \ - __setup_end = .; + BOUNDED_SECTION_POST_LABEL(.init.setup, __setup, _start, _end) #define INIT_CALLS_LEVEL(level) \ __initcall##level##_start = .; \ @@ -956,16 +899,12 @@ __initcall_end = .; #define CON_INITCALL \ - __con_initcall_start = .; \ - KEEP(*(.con_initcall.init)) \ - __con_initcall_end = .; + BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \ - __kunit_suites_start = .; \ - KEEP(*(.kunit_test_suites)) \ - __kunit_suites_end = .; + BOUNDED_SECTION_POST_LABEL(.kunit_test_suites, __kunit_suites, _start, _end) #ifdef CONFIG_BLK_DEV_INITRD #define INIT_RAM_FS \ From 2f465b921bb8ff97025017e05f6c7a7a1f6a5749 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Sat, 22 Oct 2022 16:56:37 -0600 Subject: [PATCH 1306/4122] vmlinux.lds.h: place optional header space in BOUNDED_SECTION Extend recently added BOUNDED_SECTION(_name) macro by adding a KEEP(*(.gnu.linkonce.##_name)) before the KEEP(*(_name)). This does nothing by itself, vmlinux is the same before and after this patch. But if a developer adds a .gnu.linkonce.foo record, that record is placed in the front of the section, where it can be used as a header for the table. The intent is to create an up-link to another organizing struct, from where related tables can be referenced. And since every item in a table has a known offset from its header, that same offset can be used to fetch records from the related tables. By itself, this doesnt gain much, unless maybe the pattern of access is to scan 1 or 2 fields in each fat record, but with 2 16 bit .map* fields added, we could de-duplicate 2 related tables. The use case here is struct _ddebug, which has 3 pointers (function, file, module) with substantial repetition; respectively 53%, 90%, and the module column is fully recoverable after dynamic_debug_init() splits the table into a linked list of "module" chunks. On a DYNAMIC_DEBUG=y kernel with 5k pr_debugs, the memory savings should be ~100 KiB. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20221022225637.1406715-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9f6352171f88..b3ca56ac163f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -195,11 +195,13 @@ #define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_) \ _s_##_label_ = .; \ + KEEP(*(.gnu.linkonce.##_sec_)) \ KEEP(*(_sec_)) \ _e_##_label_ = .; #define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_) \ _label_##_s_ = .; \ + KEEP(*(.gnu.linkonce.##_sec_)) \ KEEP(*(_sec_)) \ _label_##_e_ = .; From 9e6f07cd1eaa72c41719050c5ca9d22a8c0b7c02 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:01 +0100 Subject: [PATCH 1307/4122] vfio/ccw: create a parent struct Move the stuff associated with the mdev parent (and thus the subchannel struct) into its own struct, and leave the rest in the existing private structure. The subchannel will point to the parent, and the parent will point to the private, for the areas where one or both are needed. Further separation of these structs will follow. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-2-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 104 ++++++++++++++++++++++------ drivers/s390/cio/vfio_ccw_ops.c | 8 ++- drivers/s390/cio/vfio_ccw_private.h | 20 ++++-- 3 files changed, 104 insertions(+), 28 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 7f5402fe857a..444b32047397 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -36,10 +36,19 @@ debug_info_t *vfio_ccw_debug_trace_id; */ int vfio_ccw_sch_quiesce(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); DECLARE_COMPLETION_ONSTACK(completion); int iretry, ret = 0; + /* + * Probably an impossible situation, after being called through + * FSM callbacks. But in the event it did, register a warning + * and return as if things were fine. + */ + if (WARN_ON(!private)) + return 0; + iretry = 255; do { @@ -121,7 +130,23 @@ static void vfio_ccw_crw_todo(struct work_struct *work) */ static void vfio_ccw_sch_irq(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); + + /* + * The subchannel should still be disabled at this point, + * so an interrupt would be quite surprising. As with an + * interrupt while the FSM is closed, let's attempt to + * disable the subchannel again. + */ + if (!private) { + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: unexpected interrupt\n", + sch->schid.cssid, sch->schid.ssid, + sch->schid.sch_no); + + cio_disable_subchannel(sch); + return; + } inc_irq_stat(IRQIO_CIO); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); @@ -201,10 +226,19 @@ static void vfio_ccw_free_private(struct vfio_ccw_private *private) mutex_destroy(&private->io_mutex); kfree(private); } + +static void vfio_ccw_free_parent(struct device *dev) +{ + struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev); + + kfree(parent); +} + static int vfio_ccw_sch_probe(struct subchannel *sch) { struct pmcw *pmcw = &sch->schib.pmcw; struct vfio_ccw_private *private; + struct vfio_ccw_parent *parent; int ret = -ENOMEM; if (pmcw->qf) { @@ -213,38 +247,58 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) return -ENODEV; } - private = vfio_ccw_alloc_private(sch); - if (IS_ERR(private)) - return PTR_ERR(private); + parent = kzalloc(sizeof(*parent), GFP_KERNEL); + if (!parent) + return -ENOMEM; - dev_set_drvdata(&sch->dev, private); - - private->mdev_type.sysfs_name = "io"; - private->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)"; - private->mdev_types[0] = &private->mdev_type; - ret = mdev_register_parent(&private->parent, &sch->dev, - &vfio_ccw_mdev_driver, - private->mdev_types, 1); + dev_set_name(&parent->dev, "parent"); + parent->dev.parent = &sch->dev; + parent->dev.release = &vfio_ccw_free_parent; + ret = device_register(&parent->dev); if (ret) goto out_free; + private = vfio_ccw_alloc_private(sch); + if (IS_ERR(private)) { + device_unregister(&parent->dev); + return PTR_ERR(private); + } + + dev_set_drvdata(&sch->dev, parent); + dev_set_drvdata(&parent->dev, private); + + parent->mdev_type.sysfs_name = "io"; + parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)"; + parent->mdev_types[0] = &parent->mdev_type; + ret = mdev_register_parent(&parent->parent, &sch->dev, + &vfio_ccw_mdev_driver, + parent->mdev_types, 1); + if (ret) + goto out_unreg; + VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); return 0; +out_unreg: + device_unregister(&parent->dev); out_free: + dev_set_drvdata(&parent->dev, NULL); dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_private(private); + if (private) + vfio_ccw_free_private(private); return ret; } static void vfio_ccw_sch_remove(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); - mdev_unregister_parent(&private->parent); + mdev_unregister_parent(&parent->parent); + device_unregister(&parent->dev); dev_set_drvdata(&sch->dev, NULL); vfio_ccw_free_private(private); @@ -256,7 +310,11 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) static void vfio_ccw_sch_shutdown(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); + + if (WARN_ON(!private)) + return; vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); @@ -274,7 +332,8 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch) */ static int vfio_ccw_sch_event(struct subchannel *sch, int process) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); unsigned long flags; int rc = -EAGAIN; @@ -287,8 +346,10 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) rc = 0; - if (cio_update_schib(sch)) - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + if (cio_update_schib(sch)) { + if (private) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + } out_unlock: spin_unlock_irqrestore(sch->lock, flags); @@ -326,7 +387,8 @@ static void vfio_ccw_queue_crw(struct vfio_ccw_private *private, static int vfio_ccw_chp_event(struct subchannel *sch, struct chp_link *link, int event) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); int mask = chp_ssd_get_mask(&sch->ssd_info, link); int retry = 255; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 6ae4d012d800..dc084883d872 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -55,7 +55,9 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev) static int vfio_ccw_mdev_probe(struct mdev_device *mdev) { - struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); + struct subchannel *sch = to_subchannel(mdev->dev.parent); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); int ret; if (private->state == VFIO_CCW_STATE_NOT_OPER) @@ -100,7 +102,9 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { - struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); + struct subchannel *sch = to_subchannel(mdev->dev.parent); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", private->sch->schid.cssid, diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index bd5fb81456af..1f598d58d969 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -67,6 +67,21 @@ struct vfio_ccw_crw { struct crw crw; }; +/** + * struct vfio_ccw_parent + * + * @dev: embedded device struct + * @parent: parent data structures for mdevs created + * @mdev_type(s): identifying information for mdevs created + */ +struct vfio_ccw_parent { + struct device dev; + + struct mdev_parent parent; + struct mdev_type mdev_type; + struct mdev_type *mdev_types[1]; +}; + /** * struct vfio_ccw_private * @vdev: Embedded VFIO device @@ -89,7 +104,6 @@ struct vfio_ccw_crw { * @io_work: work for deferral process of I/O handling * @crw_work: work for deferral process of CRW handling * @release_comp: synchronization helper for vfio device release - * @parent: parent data structures for mdevs created */ struct vfio_ccw_private { struct vfio_device vdev; @@ -116,10 +130,6 @@ struct vfio_ccw_private { struct work_struct crw_work; struct completion release_comp; - - struct mdev_parent parent; - struct mdev_type mdev_type; - struct mdev_type *mdev_types[1]; } __aligned(8); int vfio_ccw_sch_quiesce(struct subchannel *sch); From 008a011d68036ebfa4dede07cb9b93dedaa958b1 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:02 +0100 Subject: [PATCH 1308/4122] vfio/ccw: remove private->sch These places all rely on the ability to jump from a private struct back to the subchannel struct. Rather than keeping a copy in our back pocket, let's use the relationship provided by the vfio_device embedded within the private. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-3-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_chp.c | 5 +++-- drivers/s390/cio/vfio_ccw_drv.c | 3 +-- drivers/s390/cio/vfio_ccw_fsm.c | 27 ++++++++++++--------------- drivers/s390/cio/vfio_ccw_ops.c | 12 ++++++------ drivers/s390/cio/vfio_ccw_private.h | 7 ++++--- 5 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c index 13b26a1c7988..d3f3a611f95b 100644 --- a/drivers/s390/cio/vfio_ccw_chp.c +++ b/drivers/s390/cio/vfio_ccw_chp.c @@ -16,6 +16,7 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private, char __user *buf, size_t count, loff_t *ppos) { + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS; loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK; struct ccw_schib_region *region; @@ -27,12 +28,12 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private, mutex_lock(&private->io_mutex); region = private->region[i].data; - if (cio_update_schib(private->sch)) { + if (cio_update_schib(sch)) { ret = -ENODEV; goto out; } - memcpy(region, &private->sch->schib, sizeof(*region)); + memcpy(region, &sch->schib, sizeof(*region)); if (copy_to_user(buf, (void *)region + pos, count)) { ret = -EFAULT; diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 444b32047397..2c680a556383 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -160,7 +160,6 @@ static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) if (!private) return ERR_PTR(-ENOMEM); - private->sch = sch; mutex_init(&private->io_mutex); private->state = VFIO_CCW_STATE_STANDBY; INIT_LIST_HEAD(&private->crw); @@ -395,7 +394,7 @@ static int vfio_ccw_chp_event(struct subchannel *sch, if (!private || !mask) return 0; - trace_vfio_ccw_chp_event(private->sch->schid, mask, event); + trace_vfio_ccw_chp_event(sch->schid, mask, event); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no, diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index a59c758869f8..e67fad897af3 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -18,15 +18,13 @@ static int fsm_io_helper(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); union orb *orb; int ccode; __u8 lpm; unsigned long flags; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm); @@ -80,13 +78,11 @@ out: static int fsm_do_halt(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned long flags; int ccode; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); VFIO_CCW_TRACE_EVENT(2, "haltIO"); @@ -121,13 +117,11 @@ static int fsm_do_halt(struct vfio_ccw_private *private) static int fsm_do_clear(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned long flags; int ccode; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); VFIO_CCW_TRACE_EVENT(2, "clearIO"); @@ -160,7 +154,7 @@ static int fsm_do_clear(struct vfio_ccw_private *private) static void fsm_notoper(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n", sch->schid.cssid, @@ -228,7 +222,7 @@ static void fsm_async_retry(struct vfio_ccw_private *private, static void fsm_disabled_irq(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); /* * An interrupt in a disabled state means a previous disable was not @@ -238,7 +232,9 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private, } inline struct subchannel_id get_schid(struct vfio_ccw_private *p) { - return p->sch->schid; + struct subchannel *sch = to_subchannel(p->vdev.dev->parent); + + return sch->schid; } /* @@ -360,10 +356,11 @@ static void fsm_async_request(struct vfio_ccw_private *private, static void fsm_irq(struct vfio_ccw_private *private, enum vfio_ccw_event event) { + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); struct irb *irb = this_cpu_ptr(&cio_irb); VFIO_CCW_TRACE_EVENT(6, "IRQ"); - VFIO_CCW_TRACE_EVENT(6, dev_name(&private->sch->dev)); + VFIO_CCW_TRACE_EVENT(6, dev_name(&sch->dev)); memcpy(&private->irb, irb, sizeof(*irb)); @@ -376,7 +373,7 @@ static void fsm_irq(struct vfio_ccw_private *private, static void fsm_open(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); int ret; spin_lock_irq(sch->lock); @@ -397,7 +394,7 @@ err_unlock: static void fsm_close(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); int ret; spin_lock_irq(sch->lock); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index dc084883d872..79c50cb7dcb8 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -68,9 +68,9 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) return ret; VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", - private->sch->schid.cssid, - private->sch->schid.ssid, - private->sch->schid.sch_no); + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no); ret = vfio_register_emulated_iommu_dev(&private->vdev); if (ret) @@ -107,9 +107,9 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", - private->sch->schid.cssid, - private->sch->schid.ssid, - private->sch->schid.sch_no); + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no); vfio_unregister_group_dev(&private->vdev); diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 1f598d58d969..b28af2f63963 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -85,7 +85,6 @@ struct vfio_ccw_parent { /** * struct vfio_ccw_private * @vdev: Embedded VFIO device - * @sch: pointer to the subchannel * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @io_region: MMIO region to input/output I/O arguments/results @@ -107,7 +106,6 @@ struct vfio_ccw_parent { */ struct vfio_ccw_private { struct vfio_device vdev; - struct subchannel *sch; int state; struct completion *completion; struct ccw_io_region *io_region; @@ -172,7 +170,10 @@ extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS]; static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - trace_vfio_ccw_fsm_event(private->sch->schid, private->state, event); + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); + + if (sch) + trace_vfio_ccw_fsm_event(sch->schid, private->state, event); vfio_ccw_jumptable[private->state][event](private, event); } From 06caaa27df8f1a8a6be78212c5ef5cac04500176 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:03 +0100 Subject: [PATCH 1309/4122] vfio/ccw: move private initialization to callback There's already a device initialization callback that is used to initialize the release completion workaround that was introduced by commit ebb72b765fb49 ("vfio/ccw: Use the new device life cycle helpers"). Move the other elements of the vfio_ccw_private struct that require distinct initialization over to that routine. With that done, the vfio_ccw_alloc_private routine only does a kzalloc, so fold it inline. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-4-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 74 ++++------------------------- drivers/s390/cio/vfio_ccw_ops.c | 43 +++++++++++++++++ drivers/s390/cio/vfio_ccw_private.h | 7 ++- 3 files changed, 58 insertions(+), 66 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 2c680a556383..fbc26338ceab 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -23,10 +23,10 @@ #include "vfio_ccw_private.h" struct workqueue_struct *vfio_ccw_work_q; -static struct kmem_cache *vfio_ccw_io_region; -static struct kmem_cache *vfio_ccw_cmd_region; -static struct kmem_cache *vfio_ccw_schib_region; -static struct kmem_cache *vfio_ccw_crw_region; +struct kmem_cache *vfio_ccw_io_region; +struct kmem_cache *vfio_ccw_cmd_region; +struct kmem_cache *vfio_ccw_schib_region; +struct kmem_cache *vfio_ccw_crw_region; debug_info_t *vfio_ccw_debug_msg_id; debug_info_t *vfio_ccw_debug_trace_id; @@ -79,7 +79,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) return ret; } -static void vfio_ccw_sch_io_todo(struct work_struct *work) +void vfio_ccw_sch_io_todo(struct work_struct *work) { struct vfio_ccw_private *private; struct irb *irb; @@ -115,7 +115,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) eventfd_signal(private->io_trigger, 1); } -static void vfio_ccw_crw_todo(struct work_struct *work) +void vfio_ccw_crw_todo(struct work_struct *work) { struct vfio_ccw_private *private; @@ -152,62 +152,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); } -static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) -{ - struct vfio_ccw_private *private; - - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) - return ERR_PTR(-ENOMEM); - - mutex_init(&private->io_mutex); - private->state = VFIO_CCW_STATE_STANDBY; - INIT_LIST_HEAD(&private->crw); - INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); - INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); - - private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), - GFP_KERNEL); - if (!private->cp.guest_cp) - goto out_free_private; - - private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, - GFP_KERNEL | GFP_DMA); - if (!private->io_region) - goto out_free_cp; - - private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region, - GFP_KERNEL | GFP_DMA); - if (!private->cmd_region) - goto out_free_io; - - private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region, - GFP_KERNEL | GFP_DMA); - - if (!private->schib_region) - goto out_free_cmd; - - private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region, - GFP_KERNEL | GFP_DMA); - - if (!private->crw_region) - goto out_free_schib; - return private; - -out_free_schib: - kmem_cache_free(vfio_ccw_schib_region, private->schib_region); -out_free_cmd: - kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); -out_free_io: - kmem_cache_free(vfio_ccw_io_region, private->io_region); -out_free_cp: - kfree(private->cp.guest_cp); -out_free_private: - mutex_destroy(&private->io_mutex); - kfree(private); - return ERR_PTR(-ENOMEM); -} - static void vfio_ccw_free_private(struct vfio_ccw_private *private) { struct vfio_ccw_crw *crw, *temp; @@ -257,10 +201,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) if (ret) goto out_free; - private = vfio_ccw_alloc_private(sch); - if (IS_ERR(private)) { + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) { device_unregister(&parent->dev); - return PTR_ERR(private); + return -ENOMEM; } dev_set_drvdata(&sch->dev, parent); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 79c50cb7dcb8..eb0b8cc210bb 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -49,8 +49,51 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); + mutex_init(&private->io_mutex); + private->state = VFIO_CCW_STATE_STANDBY; + INIT_LIST_HEAD(&private->crw); + INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); + INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); init_completion(&private->release_comp); + + private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), + GFP_KERNEL); + if (!private->cp.guest_cp) + goto out_free_private; + + private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, + GFP_KERNEL | GFP_DMA); + if (!private->io_region) + goto out_free_cp; + + private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region, + GFP_KERNEL | GFP_DMA); + if (!private->cmd_region) + goto out_free_io; + + private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region, + GFP_KERNEL | GFP_DMA); + if (!private->schib_region) + goto out_free_cmd; + + private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region, + GFP_KERNEL | GFP_DMA); + if (!private->crw_region) + goto out_free_schib; + return 0; + +out_free_schib: + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); +out_free_cmd: + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); +out_free_io: + kmem_cache_free(vfio_ccw_io_region, private->io_region); +out_free_cp: + kfree(private->cp.guest_cp); +out_free_private: + mutex_destroy(&private->io_mutex); + return -ENOMEM; } static int vfio_ccw_mdev_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index b28af2f63963..55d636225cff 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -131,6 +131,8 @@ struct vfio_ccw_private { } __aligned(8); int vfio_ccw_sch_quiesce(struct subchannel *sch); +void vfio_ccw_sch_io_todo(struct work_struct *work); +void vfio_ccw_crw_todo(struct work_struct *work); extern struct mdev_driver vfio_ccw_mdev_driver; @@ -178,7 +180,10 @@ static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, } extern struct workqueue_struct *vfio_ccw_work_q; - +extern struct kmem_cache *vfio_ccw_io_region; +extern struct kmem_cache *vfio_ccw_cmd_region; +extern struct kmem_cache *vfio_ccw_schib_region; +extern struct kmem_cache *vfio_ccw_crw_region; /* s390 debug feature, similar to base cio */ extern debug_info_t *vfio_ccw_debug_msg_id; From 3d62fe18b6a3a93c5360c2d590b9b40b6842133e Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:04 +0100 Subject: [PATCH 1310/4122] vfio/ccw: move private to mdev lifecycle Now that the mdev parent data is split out into its own struct, it is safe to move the remaining private data to follow the mdev probe/remove lifecycle. The mdev parent data will remain where it is, and follow the subchannel and the css driver interfaces. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-5-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 16 +--------------- drivers/s390/cio/vfio_ccw_ops.c | 26 +++++++++++++------------- drivers/s390/cio/vfio_ccw_private.h | 2 ++ 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index fbc26338ceab..9fbd1b27a1ac 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -152,7 +152,7 @@ static void vfio_ccw_sch_irq(struct subchannel *sch) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); } -static void vfio_ccw_free_private(struct vfio_ccw_private *private) +void vfio_ccw_free_private(struct vfio_ccw_private *private) { struct vfio_ccw_crw *crw, *temp; @@ -180,7 +180,6 @@ static void vfio_ccw_free_parent(struct device *dev) static int vfio_ccw_sch_probe(struct subchannel *sch) { struct pmcw *pmcw = &sch->schib.pmcw; - struct vfio_ccw_private *private; struct vfio_ccw_parent *parent; int ret = -ENOMEM; @@ -201,14 +200,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) if (ret) goto out_free; - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) { - device_unregister(&parent->dev); - return -ENOMEM; - } - dev_set_drvdata(&sch->dev, parent); - dev_set_drvdata(&parent->dev, private); parent->mdev_type.sysfs_name = "io"; parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)"; @@ -227,25 +219,19 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) out_unreg: device_unregister(&parent->dev); out_free: - dev_set_drvdata(&parent->dev, NULL); dev_set_drvdata(&sch->dev, NULL); - if (private) - vfio_ccw_free_private(private); return ret; } static void vfio_ccw_sch_remove(struct subchannel *sch) { struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); - struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); mdev_unregister_parent(&parent->parent); device_unregister(&parent->dev); dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_private(private); - VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index eb0b8cc210bb..e45d4acb109b 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -100,15 +100,20 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) { struct subchannel *sch = to_subchannel(mdev->dev.parent); struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); - struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); + struct vfio_ccw_private *private; int ret; - if (private->state == VFIO_CCW_STATE_NOT_OPER) - return -ENODEV; + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) + return -ENOMEM; ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - if (ret) + if (ret) { + kfree(private); return ret; + } + + dev_set_drvdata(&parent->dev, private); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", sch->schid.cssid, @@ -122,6 +127,7 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) return 0; err_put_vdev: + dev_set_drvdata(&parent->dev, NULL); vfio_put_device(&private->vdev); return ret; } @@ -131,15 +137,6 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - /* - * We cannot free vfio_ccw_private here because it includes - * parent info which must be free'ed by css driver. - * - * Use a workaround by memset'ing the core device part and - * then notifying the remove path that all active references - * to this device have been released. - */ - memset(vdev, 0, sizeof(*vdev)); complete(&private->release_comp); } @@ -156,6 +153,7 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&private->vdev); + dev_set_drvdata(&parent->dev, NULL); vfio_put_device(&private->vdev); /* * Wait for all active references on mdev are released so it @@ -166,6 +164,8 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) * cycle. */ wait_for_completion(&private->release_comp); + + vfio_ccw_free_private(private); } static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 55d636225cff..747aba5f5272 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -134,6 +134,8 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch); void vfio_ccw_sch_io_todo(struct work_struct *work); void vfio_ccw_crw_todo(struct work_struct *work); +void vfio_ccw_free_private(struct vfio_ccw_private *private); + extern struct mdev_driver vfio_ccw_mdev_driver; /* From f4da83f7e3f096ece936512d86ef3726e470fbfd Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:05 +0100 Subject: [PATCH 1311/4122] vfio/ccw: remove release completion There's enough separation between the parent and private structs now, that it is fine to remove the release completion hack. Signed-off-by: Eric Farman Reviewed-by: Kevin Tian Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-6-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_ops.c | 14 +------------- drivers/s390/cio/vfio_ccw_private.h | 3 --- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index e45d4acb109b..8a929a9cf3c6 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -54,7 +54,6 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev) INIT_LIST_HEAD(&private->crw); INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); - init_completion(&private->release_comp); private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), GFP_KERNEL); @@ -137,7 +136,7 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - complete(&private->release_comp); + vfio_ccw_free_private(private); } static void vfio_ccw_mdev_remove(struct mdev_device *mdev) @@ -155,17 +154,6 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) dev_set_drvdata(&parent->dev, NULL); vfio_put_device(&private->vdev); - /* - * Wait for all active references on mdev are released so it - * is safe to defer kfree() to a later point. - * - * TODO: the clean fix is to split parent/mdev info from ccw - * private structure so each can be managed in its own life - * cycle. - */ - wait_for_completion(&private->release_comp); - - vfio_ccw_free_private(private); } static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 747aba5f5272..2278fd38d34e 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -102,7 +102,6 @@ struct vfio_ccw_parent { * @req_trigger: eventfd ctx for signaling userspace to return device * @io_work: work for deferral process of I/O handling * @crw_work: work for deferral process of CRW handling - * @release_comp: synchronization helper for vfio device release */ struct vfio_ccw_private { struct vfio_device vdev; @@ -126,8 +125,6 @@ struct vfio_ccw_private { struct eventfd_ctx *req_trigger; struct work_struct io_work; struct work_struct crw_work; - - struct completion release_comp; } __aligned(8); int vfio_ccw_sch_quiesce(struct subchannel *sch); From 6ac73820993c13f30d226f9521f8ffae62acdf42 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Nov 2022 15:30:10 -0300 Subject: [PATCH 1312/4122] perf trace: Add augmenter for clock_gettime's rqtp timespec arg One more before going the BTF way: # perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o,*nanosleep ? pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 ? gpm/1042 ... [continued]: clock_nanosleep()) = 0 1.232 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 1.232 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 327.329 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ... 1002.482 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) = 0 327.329 gpm/1042 ... [continued]: clock_nanosleep()) = 0 2003.947 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 2003.947 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 2327.858 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ... ? crond/1384 ... [continued]: clock_nanosleep()) = 0 3005.382 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 3005.382 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 3675.633 crond/1384 clock_nanosleep(rqtp: { .tv_sec: 60, .tv_nsec: 0 }, rmtp: 0x7ffcc02b66b0) ... ^C# Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 ++ .../examples/bpf/augmented_raw_syscalls.c | 28 +++++++++++++++++++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/timespec.c | 21 ++++++++++++++ 5 files changed, 55 insertions(+) create mode 100644 tools/perf/trace/beauty/timespec.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5690c33c523b..c93b359abc31 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -924,6 +924,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, }, { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, + { .name = "clock_nanosleep", + .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, }, { .name = "clone", .errpid = true, .nr_args = 5, .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 0599823e8ae1..7dc24c9173a7 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -21,6 +21,13 @@ // FIXME: These should come from system headers typedef char bool; typedef int pid_t; +typedef long long int __s64; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; /* bpf-output associated map */ struct __augmented_syscalls__ { @@ -337,6 +344,27 @@ failure: return 1; /* Failure: don't filter */ } +SEC("!syscalls:sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + static pid_t getpid(void) { return bpf_get_current_pid_tgid(); diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 433dc39053a7..d11ce256f511 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -17,4 +17,5 @@ perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o perf-y += sync_file_range.o +perf-y += timespec.o perf-y += tracepoints/ diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index f527a46ab4e7..4c59edddd6a8 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -244,6 +244,9 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_TIMESPEC syscall_arg__scnprintf_timespec + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c new file mode 100644 index 000000000000..e1a61f092aad --- /dev/null +++ b/tools/perf/trace/beauty/timespec.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2022, Red Hat Inc, Arnaldo Carvalho de Melo + +#include "trace/beauty/beauty.h" +#include +#include + +static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size) +{ + struct timespec *ts = (struct timespec *)arg->augmented.args; + + return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec); +} + +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_timespec(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} From d1104f9327df9b26901b97cd026949f80ccab0d3 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:06 +0100 Subject: [PATCH 1313/4122] vfio/ccw: replace vfio_init_device with _alloc_ Now that we have a reasonable separation of structs that follow the subchannel and mdev lifecycles, there's no reason we can't call the official vfio_alloc_device routine for our private data, and behave like everyone else. Signed-off-by: Eric Farman Reviewed-by: Kevin Tian Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-7-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 18 ------------------ drivers/s390/cio/vfio_ccw_ops.c | 28 ++++++++++++++++++---------- drivers/s390/cio/vfio_ccw_private.h | 2 -- drivers/vfio/vfio_main.c | 10 +++++----- include/linux/vfio.h | 2 -- 5 files changed, 23 insertions(+), 37 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 9fbd1b27a1ac..c2a65808605a 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -152,24 +152,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); } -void vfio_ccw_free_private(struct vfio_ccw_private *private) -{ - struct vfio_ccw_crw *crw, *temp; - - list_for_each_entry_safe(crw, temp, &private->crw, next) { - list_del(&crw->next); - kfree(crw); - } - - kmem_cache_free(vfio_ccw_crw_region, private->crw_region); - kmem_cache_free(vfio_ccw_schib_region, private->schib_region); - kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); - kmem_cache_free(vfio_ccw_io_region, private->io_region); - kfree(private->cp.guest_cp); - mutex_destroy(&private->io_mutex); - kfree(private); -} - static void vfio_ccw_free_parent(struct device *dev) { struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 8a929a9cf3c6..1155f8bcedd9 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -102,15 +102,10 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) struct vfio_ccw_private *private; int ret; - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) - return -ENOMEM; - - ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - if (ret) { - kfree(private); - return ret; - } + private = vfio_alloc_device(vfio_ccw_private, vdev, &mdev->dev, + &vfio_ccw_dev_ops); + if (IS_ERR(private)) + return PTR_ERR(private); dev_set_drvdata(&parent->dev, private); @@ -135,8 +130,21 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); + struct vfio_ccw_crw *crw, *temp; - vfio_ccw_free_private(private); + list_for_each_entry_safe(crw, temp, &private->crw, next) { + list_del(&crw->next); + kfree(crw); + } + + kmem_cache_free(vfio_ccw_crw_region, private->crw_region); + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); + kmem_cache_free(vfio_ccw_io_region, private->io_region); + kfree(private->cp.guest_cp); + mutex_destroy(&private->io_mutex); + + vfio_free_device(vdev); } static void vfio_ccw_mdev_remove(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 2278fd38d34e..b441ae6700fd 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -131,8 +131,6 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch); void vfio_ccw_sch_io_todo(struct work_struct *work); void vfio_ccw_crw_todo(struct work_struct *work); -void vfio_ccw_free_private(struct vfio_ccw_private *private); - extern struct mdev_driver vfio_ccw_mdev_driver; /* diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2d168793d4e1..2901b8ad5be9 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -348,6 +348,9 @@ static void vfio_device_release(struct device *dev) device->ops->release(device); } +static int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); + /* * Allocate and initialize vfio_device so it can be registered to vfio * core. @@ -386,11 +389,9 @@ EXPORT_SYMBOL_GPL(_vfio_alloc_device); /* * Initialize a vfio_device so it can be registered to vfio core. - * - * Only vfio-ccw driver should call this interface. */ -int vfio_init_device(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops) +static int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops) { int ret; @@ -422,7 +423,6 @@ out_uninit: ida_free(&vfio.device_ida, device->index); return ret; } -EXPORT_SYMBOL_GPL(vfio_init_device); /* * The helper called by driver @release callback to free the device diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..ba809268a48e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -176,8 +176,6 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, dev, ops), \ struct dev_struct, member) -int vfio_init_device(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops); void vfio_free_device(struct vfio_device *device); static inline void vfio_put_device(struct vfio_device *device) { From 913447d06f032a9e9c84870bec0b1adb8c588f29 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 4 Nov 2022 15:20:07 +0100 Subject: [PATCH 1314/4122] vfio: Remove vfio_free_device With the "mess" sorted out, we should be able to inline the vfio_free_device call introduced by commit cb9ff3f3b84c ("vfio: Add helpers for unifying vfio_device life cycle") and remove them from driver release callbacks. Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Reviewed-by: Tony Krowiak # vfio-ap part Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221104142007.1314999-8-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/kvmgt.c | 1 - drivers/s390/cio/vfio_ccw_ops.c | 2 -- drivers/s390/crypto/vfio_ap_ops.c | 6 ------ drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 - drivers/vfio/pci/vfio_pci_core.c | 1 - drivers/vfio/platform/vfio_amba.c | 1 - drivers/vfio/platform/vfio_platform.c | 1 - drivers/vfio/vfio_main.c | 22 ++++------------------ include/linux/vfio.h | 1 - samples/vfio-mdev/mbochs.c | 1 - samples/vfio-mdev/mdpy.c | 1 - samples/vfio-mdev/mtty.c | 1 - 12 files changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..eee6805e67de 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1461,7 +1461,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); intel_gvt_destroy_vgpu(vgpu); - vfio_free_device(vfio_dev); } static const struct vfio_device_ops intel_vgpu_dev_ops = { diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 1155f8bcedd9..598a3814d428 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -143,8 +143,6 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) kmem_cache_free(vfio_ccw_io_region, private->io_region); kfree(private->cp.guest_cp); mutex_destroy(&private->io_mutex); - - vfio_free_device(vdev); } static void vfio_ccw_mdev_remove(struct mdev_device *mdev) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0b4cc8c597ae..f108c0f14712 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -765,11 +765,6 @@ static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev) } } -static void vfio_ap_mdev_release_dev(struct vfio_device *vdev) -{ - vfio_free_device(vdev); -} - static void vfio_ap_mdev_remove(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); @@ -1784,7 +1779,6 @@ static const struct attribute_group vfio_queue_attr_group = { static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .init = vfio_ap_mdev_init_dev, - .release = vfio_ap_mdev_release_dev, .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index b16874e913e4..7b8889f55007 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -568,7 +568,6 @@ static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev) vfio_fsl_uninit_device(vdev); mutex_destroy(&vdev->igate); - vfio_free_device(core_vdev); } static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index badc9d828cac..9be2d5be5d95 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2109,7 +2109,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->vma_lock); kfree(vdev->region); kfree(vdev->pm_save); - vfio_free_device(core_vdev); } EXPORT_SYMBOL_GPL(vfio_pci_core_release_dev); diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index eaea63e5294c..18faf2678b99 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -95,7 +95,6 @@ static void vfio_amba_release_dev(struct vfio_device *core_vdev) vfio_platform_release_common(vdev); kfree(vdev->name); - vfio_free_device(core_vdev); } static void vfio_amba_remove(struct amba_device *adev) diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 82cedcebfd90..9910451dc341 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -83,7 +83,6 @@ static void vfio_platform_release_dev(struct vfio_device *core_vdev) container_of(core_vdev, struct vfio_platform_device, vdev); vfio_platform_release_common(vdev); - vfio_free_device(core_vdev); } static int vfio_platform_remove(struct platform_device *pdev) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2901b8ad5be9..9835757e2bee 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -339,13 +339,10 @@ static void vfio_device_release(struct device *dev) vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); - /* - * kvfree() cannot be done here due to a life cycle mess in - * vfio-ccw. Before the ccw part is fixed all drivers are - * required to support @release and call vfio_free_device() - * from there. - */ - device->ops->release(device); + if (device->ops->release) + device->ops->release(device); + + kvfree(device); } static int vfio_init_device(struct vfio_device *device, struct device *dev, @@ -424,17 +421,6 @@ out_uninit: return ret; } -/* - * The helper called by driver @release callback to free the device - * structure. Drivers which don't have private data to clean can - * simply use this helper as its @release. - */ -void vfio_free_device(struct vfio_device *device) -{ - kvfree(device); -} -EXPORT_SYMBOL_GPL(vfio_free_device); - static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, enum vfio_group_type type) { diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ba809268a48e..e7480154825e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -176,7 +176,6 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, dev, ops), \ struct dev_struct, member) -void vfio_free_device(struct vfio_device *device); static inline void vfio_put_device(struct vfio_device *device) { put_device(&device->device); diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 117a8d799f71..8b5a3a778a25 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -594,7 +594,6 @@ static void mbochs_release_dev(struct vfio_device *vdev) atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes); kfree(mdev_state->pages); kfree(mdev_state->vconfig); - vfio_free_device(vdev); } static void mbochs_remove(struct mdev_device *mdev) diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 946e8cfde6fd..721fb06c6413 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -283,7 +283,6 @@ static void mdpy_release_dev(struct vfio_device *vdev) vfree(mdev_state->memblk); kfree(mdev_state->vconfig); - vfio_free_device(vdev); } static void mdpy_remove(struct mdev_device *mdev) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index e72085fc1376..3c2a421b9b69 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -784,7 +784,6 @@ static void mtty_release_dev(struct vfio_device *vdev) atomic_add(mdev_state->nr_ports, &mdev_avail_ports); kfree(mdev_state->vconfig); - vfio_free_device(vdev); } static void mtty_remove(struct mdev_device *mdev) From 30b331d2e3bc5c7c95568477d4bf2661b6e6cb3e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 3 Nov 2022 22:14:40 -0700 Subject: [PATCH 1315/4122] perf lock: Allow concurrent record and report To support live monitoring of kernel lock contention without BPF, it should support something like below: # perf lock record -a -o- sleep 1 | perf lock contention -i- contended total wait max wait avg wait type caller 2 10.27 us 6.17 us 5.13 us spinlock load_balance+0xc03 1 5.29 us 5.29 us 5.29 us rwlock:W ep_scan_ready_list+0x54 1 4.12 us 4.12 us 4.12 us spinlock smpboot_thread_fn+0x116 1 3.28 us 3.28 us 3.28 us mutex pipe_read+0x50 To do that, it needs to handle HEAD_ATTR, HEADER_EVENT_UPDATE and HEADER_TRACING_DATA which are generated only for the pipe mode. And setting event handler also should be delayed until it gets the event information. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221104051440.220989-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 68 +++++++++++++++-------- tools/perf/tests/shell/lock_contention.sh | 15 ++++- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6f79175365a8..0d280093b19a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1390,6 +1390,34 @@ static int dump_info(void) return rc; } +static const struct evsel_str_handler lock_tracepoints[] = { + { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ +}; + +static const struct evsel_str_handler contention_tracepoints[] = { + { "lock:contention_begin", evsel__process_contention_begin, }, + { "lock:contention_end", evsel__process_contention_end, }, +}; + +static int process_event_update(struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + int ret; + + ret = perf_event__process_event_update(tool, event, pevlist); + if (ret < 0) + return ret; + + /* this can return -EEXIST since we call it for each evsel */ + perf_session__set_tracepoints_handlers(session, lock_tracepoints); + perf_session__set_tracepoints_handlers(session, contention_tracepoints); + return 0; +} + typedef int (*tracepoint_handler)(struct evsel *evsel, struct perf_sample *sample); @@ -1545,28 +1573,19 @@ next: print_bad_events(bad, total); } -static const struct evsel_str_handler lock_tracepoints[] = { - { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ - { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ -}; - -static const struct evsel_str_handler contention_tracepoints[] = { - { "lock:contention_begin", evsel__process_contention_begin, }, - { "lock:contention_end", evsel__process_contention_end, }, -}; - static bool force; static int __cmd_report(bool display_info) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .namespaces = perf_event__process_namespaces, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1585,17 +1604,19 @@ static int __cmd_report(bool display_info) symbol_conf.sort_by_name = true; symbol__init(&session->header.env); - if (!perf_session__has_traces(session, "lock record")) - goto out_delete; + if (!data.is_pipe) { + if (!perf_session__has_traces(session, "lock record")) + goto out_delete; - if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; - } + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } - if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; + if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } } if (setup_output_field(false, output_fields)) @@ -1633,9 +1654,12 @@ static int __cmd_contention(int argc, const char **argv) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1698,7 +1722,7 @@ static int __cmd_contention(int argc, const char **argv) pr_err("lock contention BPF setup failed\n"); goto out_delete; } - } else { + } else if (!data.is_pipe) { if (!perf_session__has_traces(session, "lock record")) goto out_delete; diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 04bf604e3c6f..f7bd0d8eb5c3 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -53,7 +53,7 @@ test_bpf() if ! perf lock con -b true > /dev/null 2>&1 ; then echo "[Skip] No BPF support" - exit + return fi # the perf lock contention output goes to the stderr @@ -65,9 +65,22 @@ test_bpf() fi } +test_record_concurrent() +{ + echo "Testing perf lock record and perf lock contention at the same time" + perf lock record -o- -- perf bench sched messaging 2> /dev/null | \ + perf lock contention -i- -E 1 -q 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + err=1 + exit + fi +} + check test_record test_bpf +test_record_concurrent exit ${err} From 9d895e46842908aa49a042e699097df64ab20b7f Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Wed, 9 Nov 2022 11:39:32 +0100 Subject: [PATCH 1316/4122] perf data: Add tracepoint fields when converting to JSON When converting recorded data into JSON format, perf data omits probe variables. Add them to the output in the format "field name": "field value" using tep_print_field: $ perf data convert --to-json output.json // output.json { "linux-perf-json-version": 1, "headers": { ... }, "samples": [ { "timestamp": 29182079082999, "pid": 309194, [...] "__probe_ip": "0x93ee35", "query_string_string": "select 2;", "nxids": "0" } ] } Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com> Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221109103932.65675-1-9erthalion6@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-json.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 613d6ae82663..57db59068cb6 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -217,6 +217,26 @@ static int process_sample_event(struct perf_tool *tool, } output_json_format(out, false, 3, "]"); + if (sample->raw_data) { + int i; + struct tep_format_field **fields; + + fields = tep_event_fields(evsel->tp_format); + if (fields) { + i = 0; + while (fields[i]) { + struct trace_seq s; + + trace_seq_init(&s); + tep_print_field(&s, sample->raw_data, fields[i]); + output_json_key_string(out, true, 3, fields[i]->name, s.buffer); + + i++; + } + free(fields); + } + } + output_json_format(out, false, 2, "}"); return 0; } From cf9f67b36303de65596ae7504a2a7573c08876bb Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Sat, 5 Nov 2022 22:59:32 +0900 Subject: [PATCH 1317/4122] perf print-events: Remove redundant comparison with zero Since variable npmus is unsigned int, comparing with 0 is unnecessary. Signed-off-by: Kang Minchul Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221105135932.81612-1-tegongkang@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index c4d5d87fae2f..6df947df1c0f 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -311,10 +311,8 @@ restart: if ((hybrid_supported == 0) || (hybrid_supported == npmus)) { evt_list[evt_i] = strdup(name); - if (npmus > 0) { - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } + for (j = 0; j < npmus; j++) + zfree(&evt_pmus[j]); } else { for (j = 0; j < hybrid_supported; j++) { evt_list[evt_i++] = evt_pmus[j]; From 612a5337ae7a87893f90de7878b20701b2b17d7d Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 20 Oct 2022 14:45:11 +0100 Subject: [PATCH 1318/4122] perf vendor events: Add Arm Neoverse V2 PMU events Rename the neoverse-n2 folder to make it clear that it includes V2, and add V2 to mapfile.csv. V2 has the same events as N2, visible by running the following command in the ARM-software/data github repo [1]: diff pmu/neoverse-v2.json pmu/neoverse-n2.json | grep code Testing: $ perf test pmu 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Ok 10.4: Parsing of PMU event table metrics with fake PMUs : Ok [1]: https://github.com/ARM-software/data Reviewed-by: Nick Forrington Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20221020134512.1345013-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/branch.json | 0 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/bus.json | 0 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/cache.json | 0 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/exception.json | 0 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/instruction.json | 0 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/memory.json | 0 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/pipeline.json | 0 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/spe.json | 0 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/trace.json | 0 tools/perf/pmu-events/arch/arm64/mapfile.csv | 3 ++- 10 files changed, 2 insertions(+), 1 deletion(-) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/branch.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/bus.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/cache.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/exception.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/instruction.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/memory.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/pipeline.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/spe.json (100%) rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/trace.json (100%) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index ad502d00f460..f134e833c069 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -34,7 +34,8 @@ 0x00000000410fd460,v1,arm/cortex-a510,core 0x00000000410fd470,v1,arm/cortex-a710,core 0x00000000410fd480,v1,arm/cortex-x2,core -0x00000000410fd490,v1,arm/neoverse-n2,core +0x00000000410fd490,v1,arm/neoverse-n2-v2,core +0x00000000410fd4f0,v1,arm/neoverse-n2-v2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core From 3bfadb2325891d122771ce534336af531e93d7b2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:04 +0800 Subject: [PATCH 1319/4122] KVM: selftests: memslot_perf_test: Use data->nslots in prepare_vm() In prepare_vm(), 'data->nslots' is assigned with 'max_mem_slots - 1' at the beginning, meaning they are interchangeable. Use 'data->nslots' isntead of 'max_mem_slots - 1'. With this, it becomes easier to move the logic of probing number of slots into upper layer in subsequent patches. No functional change intended. Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-2-gshan@redhat.com --- tools/testing/selftests/kvm/memslot_perf_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 44995446d942..231cc8449c2e 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -280,14 +280,14 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", - max_mem_slots - 1, data->pages_per_slot, rempages); + data->nslots, data->pages_per_slot, rempages); clock_gettime(CLOCK_MONOTONIC, &tstart); - for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; npages = data->pages_per_slot; - if (slot == max_mem_slots - 1) + if (slot == data->nslots) npages += rempages; vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, @@ -297,12 +297,12 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, } *slot_runtime = timespec_elapsed(tstart); - for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { + for (slot = 0, guest_addr = MEM_GPA; slot < data->nslots; slot++) { uint64_t npages; uint64_t gpa; npages = data->pages_per_slot; - if (slot == max_mem_slots - 2) + if (slot == data->nslots - 1) npages += rempages; gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, From 2aae5e6795e1407334bb849f96f11c9051b959e2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:05 +0800 Subject: [PATCH 1320/4122] KVM: selftests: memslot_perf_test: Consolidate loop conditions in prepare_vm() There are two loops in prepare_vm(), which have different conditions. 'slot' is treated as meory slot index in the first loop, but index of the host virtual address array in the second loop. It makes it a bit hard to understand the code. Change the usage of 'slot' in the second loop, to treat it as the memory slot index either. No functional change intended. Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-3-gshan@redhat.com --- tools/testing/selftests/kvm/memslot_perf_test.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 231cc8449c2e..dcb492b3f27b 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -297,21 +297,20 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, } *slot_runtime = timespec_elapsed(tstart); - for (slot = 0, guest_addr = MEM_GPA; slot < data->nslots; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; uint64_t gpa; npages = data->pages_per_slot; - if (slot == data->nslots - 1) + if (slot == data->nslots) npages += rempages; - gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, - slot + 1); + gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, "vm_phy_pages_alloc() failed\n"); - data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); - memset(data->hva_slots[slot], 0, npages * 4096); + data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); + memset(data->hva_slots[slot - 1], 0, npages * 4096); guest_addr += npages * 4096; } From 34396437b11f904fc61b272e3974f4c92868451b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:06 +0800 Subject: [PATCH 1321/4122] KVM: selftests: memslot_perf_test: Probe memory slots for once prepare_vm() is called in every iteration and run. The allowed memory slots (KVM_CAP_NR_MEMSLOTS) are probed for multiple times. It's not free and unnecessary. Move the probing logic for the allowed memory slots to parse_args() for once, which is upper layer of prepare_vm(). No functional change intended. Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-4-gshan@redhat.com --- .../testing/selftests/kvm/memslot_perf_test.c | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index dcb492b3f27b..f0ea3f75b6e1 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -245,27 +245,17 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, void *guest_code, uint64_t mempages, struct timespec *slot_runtime) { - uint32_t max_mem_slots; uint64_t rempages; uint64_t guest_addr; uint32_t slot; struct timespec tstart; struct sync_area *sync; - max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); - TEST_ASSERT(max_mem_slots > 1, - "KVM_CAP_NR_MEMSLOTS should be greater than 1"); - TEST_ASSERT(nslots > 1 || nslots == -1, - "Slot count cap should be greater than 1"); - if (nslots != -1) - max_mem_slots = min(max_mem_slots, (uint32_t)nslots); - pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots); - TEST_ASSERT(mempages > 1, "Can't test without any memory"); data->npages = mempages; - data->nslots = max_mem_slots - 1; + data->nslots = nslots; data->pages_per_slot = mempages / data->nslots; if (!data->pages_per_slot) { *maxslots = mempages + 1; @@ -869,6 +859,7 @@ static void help(char *name, struct test_args *targs) static bool parse_args(int argc, char *argv[], struct test_args *targs) { + uint32_t max_mem_slots; int opt; while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { @@ -885,8 +876,8 @@ static bool parse_args(int argc, char *argv[], break; case 's': targs->nslots = atoi(optarg); - if (targs->nslots <= 0 && targs->nslots != -1) { - pr_info("Slot count cap has to be positive or -1 for no cap\n"); + if (targs->nslots <= 1 && targs->nslots != -1) { + pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); return false; } break; @@ -932,6 +923,21 @@ static bool parse_args(int argc, char *argv[], return false; } + max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + if (max_mem_slots <= 1) { + pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n"); + return false; + } + + /* Memory slot 0 is reserved */ + if (targs->nslots == -1) + targs->nslots = max_mem_slots - 1; + else + targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; + + pr_info_v("Allowed Number of memory slots: %"PRIu32"\n", + targs->nslots + 1); + return true; } From 8675c6f226986ddb67752be22279a0e2385b197e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:07 +0800 Subject: [PATCH 1322/4122] KVM: selftests: memslot_perf_test: Support variable guest page size The test case is obviously broken on aarch64 because non-4KB guest page size is supported. The guest page size on aarch64 could be 4KB, 16KB or 64KB. This supports variable guest page size, mostly for aarch64. - The host determines the guest page size when virtual machine is created. The value is also passed to guest through the synchronization area. - The number of guest pages are unknown until the virtual machine is to be created. So all the related macros are dropped. Instead, their values are dynamically calculated based on the guest page size. - The static checks on memory sizes and pages becomes dependent on guest page size, which is unknown until the virtual machine is about to be created. So all the static checks are converted to dynamic checks, done in check_memory_sizes(). - As the address passed to madvise() should be aligned to host page, the size of page chunk is automatically selected, other than one page. - MEM_TEST_MOVE_SIZE has fixed and non-working 64KB. It will be consolidated in next patch. However, the comments about how it's calculated has been correct. - All other changes included in this patch are almost mechanical replacing '4096' with 'guest_page_size'. Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-5-gshan@redhat.com --- .../testing/selftests/kvm/memslot_perf_test.c | 222 +++++++++++------- 1 file changed, 135 insertions(+), 87 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index f0ea3f75b6e1..9af61ca8ad0a 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -26,14 +26,11 @@ #include #define MEM_SIZE ((512U << 20) + 4096) -#define MEM_SIZE_PAGES (MEM_SIZE / 4096) #define MEM_GPA 0x10000000UL #define MEM_AUX_GPA MEM_GPA #define MEM_SYNC_GPA MEM_AUX_GPA #define MEM_TEST_GPA (MEM_AUX_GPA + 4096) #define MEM_TEST_SIZE (MEM_SIZE - 4096) -static_assert(MEM_SIZE % 4096 == 0, "invalid mem size"); -static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); /* * 32 MiB is max size that gets well over 100 iterations on 509 slots. @@ -42,43 +39,37 @@ static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); * limited resolution). */ #define MEM_SIZE_MAP ((32U << 20) + 4096) -#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096) #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) -#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096) -static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size"); /* * 128 MiB is min size that fills 32k slots with at least one page in each * while at the same time gets 100+ iterations in such test + * + * 2 MiB chunk size like a typical huge page */ #define MEM_TEST_UNMAP_SIZE (128U << 20) -#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096) -/* 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12)) -static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE_PAGES % - (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0, - "invalid unmap test region size"); +#define MEM_TEST_UNMAP_CHUNK_SIZE (2U << 20) /* * For the move active test the middle of the test area is placed on * a memslot boundary: half lies in the memslot being moved, half in * other memslot(s). * - * When running this test with 32k memslots (32764, really) each memslot - * contains 4 pages. - * The last one additionally contains the remaining 21 pages of memory, - * for the total size of 25 pages. - * Hence, the maximum size here is 50 pages. + * We have different number of memory slots, excluding the reserved + * memory slot 0, on various architectures and configurations. The + * memory size in this test is calculated by picking the maximal + * last memory slot's memory size, with alignment to the largest + * supported page size (64KB). In this way, the selected memory + * size for this test is compatible with test_memslot_move_prepare(). + * + * architecture slots memory-per-slot memory-on-last-slot + * -------------------------------------------------------------- + * x86-4KB 32763 16KB 100KB + * arm64-4KB 32766 16KB 52KB + * arm64-16KB 32766 16KB 48KB + * arm64-64KB 8192 64KB 64KB */ -#define MEM_TEST_MOVE_SIZE_PAGES (50) -#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096) +#define MEM_TEST_MOVE_SIZE 0x10000 #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, "invalid move test region size"); @@ -100,6 +91,7 @@ struct vm_data { }; struct sync_area { + uint32_t guest_page_size; atomic_bool start_flag; atomic_bool exit_flag; atomic_bool sync_flag; @@ -192,14 +184,15 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) uint64_t gpage, pgoffs; uint32_t slot, slotoffs; void *base; + uint32_t guest_page_size = data->vm->page_size; TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); - TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, + TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, "Too high gpa to translate"); gpa -= MEM_GPA; - gpage = gpa / 4096; - pgoffs = gpa % 4096; + gpage = gpa / guest_page_size; + pgoffs = gpa % guest_page_size; slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); slotoffs = gpage - (slot * data->pages_per_slot); @@ -217,14 +210,16 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) } base = data->hva_slots[slot]; - return (uint8_t *)base + slotoffs * 4096 + pgoffs; + return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; } static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) { + uint32_t guest_page_size = data->vm->page_size; + TEST_ASSERT(slot < data->nslots, "Too high slot number"); - return MEM_GPA + slot * data->pages_per_slot * 4096; + return MEM_GPA + slot * data->pages_per_slot * guest_page_size; } static struct vm_data *alloc_vm(void) @@ -242,32 +237,34 @@ static struct vm_data *alloc_vm(void) } static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, - void *guest_code, uint64_t mempages, + void *guest_code, uint64_t mem_size, struct timespec *slot_runtime) { - uint64_t rempages; + uint64_t mempages, rempages; uint64_t guest_addr; - uint32_t slot; + uint32_t slot, guest_page_size; struct timespec tstart; struct sync_area *sync; - TEST_ASSERT(mempages > 1, - "Can't test without any memory"); - - data->npages = mempages; - data->nslots = nslots; - data->pages_per_slot = mempages / data->nslots; - if (!data->pages_per_slot) { - *maxslots = mempages + 1; - return false; - } - - rempages = mempages % data->nslots; - data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); - TEST_ASSERT(data->hva_slots, "malloc() fail"); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + mempages = mem_size / guest_page_size; data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); ucall_init(data->vm, NULL); + TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size"); + + data->npages = mempages; + TEST_ASSERT(data->npages > 1, "Can't test without any memory"); + data->nslots = nslots; + data->pages_per_slot = data->npages / data->nslots; + if (!data->pages_per_slot) { + *maxslots = data->npages + 1; + return false; + } + + rempages = data->npages % data->nslots; + data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); + TEST_ASSERT(data->hva_slots, "malloc() fail"); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", data->nslots, data->pages_per_slot, rempages); @@ -283,7 +280,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, guest_addr, slot, npages, 0); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } *slot_runtime = timespec_elapsed(tstart); @@ -300,12 +297,12 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, "vm_phy_pages_alloc() failed\n"); data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); - memset(data->hva_slots[slot - 1], 0, npages * 4096); + memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } - virt_map(data->vm, MEM_GPA, MEM_GPA, mempages); + virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); atomic_init(&sync->start_flag, false); @@ -404,6 +401,7 @@ static bool guest_perform_sync(void) static void guest_code_test_memslot_move(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); GUEST_SYNC(0); @@ -414,7 +412,7 @@ static void guest_code_test_memslot_move(void) uintptr_t ptr; for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; - ptr += 4096) + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; /* @@ -432,6 +430,7 @@ static void guest_code_test_memslot_move(void) static void guest_code_test_memslot_map(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -441,14 +440,16 @@ static void guest_code_test_memslot_map(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) @@ -495,6 +496,9 @@ static void guest_code_test_memslot_unmap(void) static void guest_code_test_memslot_rw(void) { + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + GUEST_SYNC(0); guest_spin_until_start(); @@ -503,14 +507,14 @@ static void guest_code_test_memslot_rw(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; - for (ptr = MEM_TEST_GPA + 4096 / 2; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) { + for (ptr = MEM_TEST_GPA + page_size / 2; + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); @@ -528,6 +532,8 @@ static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, uint64_t *maxslots, bool isactive) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t move_pages = MEM_TEST_MOVE_SIZE / guest_page_size; uint64_t movesrcgpa, movetestgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); @@ -536,7 +542,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint64_t lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); - if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) { + if (lastpages < move_pages / 2) { *maxslots = 0; return false; } @@ -582,8 +588,9 @@ static void test_memslot_do_unmap(struct vm_data *data, uint64_t offsp, uint64_t count) { uint64_t gpa, ctr; + uint32_t guest_page_size = data->vm->page_size; - for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) { + for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { uint64_t npages; void *hva; int ret; @@ -591,12 +598,12 @@ static void test_memslot_do_unmap(struct vm_data *data, hva = vm_gpa2hva(data, gpa, &npages); TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); npages = min(npages, count - ctr); - ret = madvise(hva, npages * 4096, MADV_DONTNEED); + ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED); TEST_ASSERT(!ret, "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, hva, gpa); ctr += npages; - gpa += npages * 4096; + gpa += npages * guest_page_size; } TEST_ASSERT(ctr == count, "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); @@ -607,11 +614,12 @@ static void test_memslot_map_unmap_check(struct vm_data *data, { uint64_t gpa; uint64_t *val; + uint32_t guest_page_size = data->vm->page_size; if (!map_unmap_verify) return; - gpa = MEM_TEST_GPA + offsp * 4096; + gpa = MEM_TEST_GPA + offsp * guest_page_size; val = (typeof(val))vm_gpa2hva(data, gpa, NULL); TEST_ASSERT(*val == valexp, "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", @@ -621,12 +629,14 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; + /* * Unmap the second half of the test area while guest writes to (maps) * the first half. */ - test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2); /* * Wait for the guest to finish writing the first half of the test @@ -637,10 +647,8 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - test_memslot_map_unmap_check(data, - MEM_TEST_MAP_SIZE_PAGES / 2 - 1, - MEM_TEST_VAL_1); - test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1); + test_memslot_do_unmap(data, 0, guest_pages / 2); /* @@ -653,16 +661,16 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) * the test area. */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, - MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2); } static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, uint64_t chunk) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; uint64_t ctr; /* @@ -674,42 +682,49 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk) + for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); /* Likewise, but for the opposite host / guest areas */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2; - ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk) + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); } static void test_memslot_unmap_loop(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, 1); + uint32_t host_page_size = getpagesize(); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? + 1 : host_page_size / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { uint64_t gptr; + uint32_t guest_page_size = data->vm->page_size; - for (gptr = MEM_TEST_GPA + 4096 / 2; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) + for (gptr = MEM_TEST_GPA + guest_page_size / 2; + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; host_perform_sync(sync); for (gptr = MEM_TEST_GPA; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) { + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); uint64_t val = *vptr; @@ -738,7 +753,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, struct timespec *slot_runtime, struct timespec *guest_runtime) { - uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; + uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; struct vm_data *data; struct sync_area *sync; struct timespec tstart; @@ -753,6 +768,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); + sync->guest_page_size = data->vm->page_size; if (tdata->prepare && !tdata->prepare(data, sync, maxslots)) { ret = false; @@ -786,19 +802,19 @@ exit_free: static const struct test_data tests[] = { { .name = "map", - .mem_size = MEM_SIZE_MAP_PAGES, + .mem_size = MEM_SIZE_MAP, .guest_code = guest_code_test_memslot_map, .loop = test_memslot_map_loop, }, { .name = "unmap", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + 4096, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop, }, { .name = "unmap chunked", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + 4096, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop_chunked, }, @@ -856,6 +872,35 @@ static void help(char *name, struct test_args *targs) pr_info("%d: %s\n", ctr, tests[ctr].name); } +static bool check_memory_sizes(void) +{ + uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + + if (MEM_SIZE % guest_page_size || + MEM_TEST_SIZE % guest_page_size) { + pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n"); + return false; + } + + if (MEM_SIZE_MAP % guest_page_size || + MEM_TEST_MAP_SIZE % guest_page_size || + (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || + (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { + pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n"); + return false; + } + + if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE || + MEM_TEST_UNMAP_SIZE % guest_page_size || + (MEM_TEST_UNMAP_SIZE / guest_page_size) % + (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { + pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n"); + return false; + } + + return true; +} + static bool parse_args(int argc, char *argv[], struct test_args *targs) { @@ -1015,6 +1060,9 @@ int main(int argc, char *argv[]) /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); + if (!check_memory_sizes()) + return -1; + if (!parse_args(argc, argv, &targs)) return -1; From 88a64e65484ef6b5cb09fe545d0dd00c950a1131 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:08 +0800 Subject: [PATCH 1323/4122] KVM: selftests: memslot_perf_test: Consolidate memory The addresses and sizes passed to vm_userspace_mem_region_add() and madvise() should be aligned to host page size, which can be 64KB on aarch64. So it's wrong by passing additional fixed 4KB memory area to various tests. Fix it by passing additional fixed 64KB memory area to various tests. We also add checks to ensure that none of host/guest page size exceeds 64KB. MEM_TEST_MOVE_SIZE is fixed up to 192KB either. With this, the following command works fine on 64KB-page-size-host and 4KB-page-size-guest. # ./memslot_perf_test -v -s 512 Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-6-gshan@redhat.com --- .../testing/selftests/kvm/memslot_perf_test.c | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 9af61ca8ad0a..daebc264de5a 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -20,17 +20,20 @@ #include #include +#include #include #include #include -#define MEM_SIZE ((512U << 20) + 4096) -#define MEM_GPA 0x10000000UL +#define MEM_EXTRA_SIZE SZ_64K + +#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE) +#define MEM_GPA SZ_256M #define MEM_AUX_GPA MEM_GPA #define MEM_SYNC_GPA MEM_AUX_GPA -#define MEM_TEST_GPA (MEM_AUX_GPA + 4096) -#define MEM_TEST_SIZE (MEM_SIZE - 4096) +#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE) +#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE) /* * 32 MiB is max size that gets well over 100 iterations on 509 slots. @@ -38,8 +41,8 @@ * 8194 slots in use can then be tested (although with slightly * limited resolution). */ -#define MEM_SIZE_MAP ((32U << 20) + 4096) -#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) +#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) +#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE) /* * 128 MiB is min size that fills 32k slots with at least one page in each @@ -47,8 +50,8 @@ * * 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_SIZE (128U << 20) -#define MEM_TEST_UNMAP_CHUNK_SIZE (2U << 20) +#define MEM_TEST_UNMAP_SIZE SZ_128M +#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M /* * For the move active test the middle of the test area is placed on @@ -64,12 +67,12 @@ * * architecture slots memory-per-slot memory-on-last-slot * -------------------------------------------------------------- - * x86-4KB 32763 16KB 100KB - * arm64-4KB 32766 16KB 52KB - * arm64-16KB 32766 16KB 48KB - * arm64-64KB 8192 64KB 64KB + * x86-4KB 32763 16KB 160KB + * arm64-4KB 32766 16KB 112KB + * arm64-16KB 32766 16KB 112KB + * arm64-64KB 8192 64KB 128KB */ -#define MEM_TEST_MOVE_SIZE 0x10000 +#define MEM_TEST_MOVE_SIZE (3 * SZ_64K) #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, "invalid move test region size"); @@ -533,7 +536,6 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint64_t *maxslots, bool isactive) { uint32_t guest_page_size = data->vm->page_size; - uint64_t move_pages = MEM_TEST_MOVE_SIZE / guest_page_size; uint64_t movesrcgpa, movetestgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); @@ -542,7 +544,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint64_t lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); - if (lastpages < move_pages / 2) { + if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { *maxslots = 0; return false; } @@ -808,13 +810,13 @@ static const struct test_data tests[] = { }, { .name = "unmap", - .mem_size = MEM_TEST_UNMAP_SIZE + 4096, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop, }, { .name = "unmap chunked", - .mem_size = MEM_TEST_UNMAP_SIZE + 4096, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop_chunked, }, @@ -874,8 +876,15 @@ static void help(char *name, struct test_args *targs) static bool check_memory_sizes(void) { + uint32_t host_page_size = getpagesize(); uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { + pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", + host_page_size, guest_page_size); + return false; + } + if (MEM_SIZE % guest_page_size || MEM_TEST_SIZE % guest_page_size) { pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n"); From a69170c65acdf430e24fc1b6174dcc3aa501fe2f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2022 15:12:09 +0800 Subject: [PATCH 1324/4122] KVM: selftests: memslot_perf_test: Report optimal memory slots The memory area in each slot should be aligned to host page size. Otherwise, the test will fail. For example, the following command fails with the following messages with 64KB-page-size-host and 4KB-pae-size-guest. It's not user friendly to abort the test. Lets do something to report the optimal memory slots, instead of failing the test. # ./memslot_perf_test -v -s 1000 Number of memory slots: 999 Testing map performance with 1 runs, 5 seconds each Adding slots 1..999, each slot with 8 pages + 216 extra pages last ==== Test Assertion Failure ==== lib/kvm_util.c:824: vm_adjust_num_guest_pages(vm->mode, npages) == npages pid=19872 tid=19872 errno=0 - Success 1 0x00000000004065b3: vm_userspace_mem_region_add at kvm_util.c:822 2 0x0000000000401d6b: prepare_vm at memslot_perf_test.c:273 3 (inlined by) test_execute at memslot_perf_test.c:756 4 (inlined by) test_loop at memslot_perf_test.c:994 5 (inlined by) main at memslot_perf_test.c:1073 6 0x0000ffff7ebb4383: ?? ??:0 7 0x00000000004021ff: _start at :? Number of guest pages is not compatible with the host. Try npages=16 Report the optimal memory slots instead of failing the test when the memory area in each slot isn't aligned to host page size. With this applied, the optimal memory slots is reported. # ./memslot_perf_test -v -s 1000 Number of memory slots: 999 Testing map performance with 1 runs, 5 seconds each Memslot count too high for this test, decrease the cap (max is 514) Signed-off-by: Gavin Shan Reviewed-by: Maciej S. Szmigiero Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020071209.559062-7-gshan@redhat.com --- .../testing/selftests/kvm/memslot_perf_test.c | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index daebc264de5a..2ad40f7c9c08 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -239,16 +239,52 @@ static struct vm_data *alloc_vm(void) return data; } +static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, + uint64_t pages_per_slot, uint64_t rempages) +{ + if (!pages_per_slot) + return false; + + if ((pages_per_slot * guest_page_size) % host_page_size) + return false; + + if ((rempages * guest_page_size) % host_page_size) + return false; + + return true; +} + + +static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) +{ + uint32_t guest_page_size = data->vm->page_size; + uint64_t mempages, pages_per_slot, rempages; + uint64_t slots; + + mempages = data->npages; + slots = data->nslots; + while (--slots > 1) { + pages_per_slot = mempages / slots; + rempages = mempages % pages_per_slot; + if (check_slot_pages(host_page_size, guest_page_size, + pages_per_slot, rempages)) + return slots + 1; /* slot 0 is reserved */ + } + + return 0; +} + static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, void *guest_code, uint64_t mem_size, struct timespec *slot_runtime) { uint64_t mempages, rempages; uint64_t guest_addr; - uint32_t slot, guest_page_size; + uint32_t slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; + host_page_size = getpagesize(); guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; mempages = mem_size / guest_page_size; @@ -260,12 +296,13 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, TEST_ASSERT(data->npages > 1, "Can't test without any memory"); data->nslots = nslots; data->pages_per_slot = data->npages / data->nslots; - if (!data->pages_per_slot) { - *maxslots = data->npages + 1; + rempages = data->npages % data->nslots; + if (!check_slot_pages(host_page_size, guest_page_size, + data->pages_per_slot, rempages)) { + *maxslots = get_max_slots(data, host_page_size); return false; } - rempages = data->npages % data->nslots; data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); TEST_ASSERT(data->hva_slots, "malloc() fail"); From 1a6182033f2d5c481aec1f8c1c26ebc649693d57 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:54 -0700 Subject: [PATCH 1325/4122] KVM: arm64: selftests: Use FIELD_GET() to extract ID register fields Use FIELD_GET() macro to extract ID register fields for existing aarch64 selftests code. No functional change intended. Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-2-reijiw@google.com --- tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c | 3 ++- tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 3 ++- tools/testing/selftests/kvm/lib/aarch64/processor.c | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 6f9c1f19c7f6..b6a5e8861b35 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" +#include #define BAD_ID_REG_VAL 0x1badc0deul @@ -145,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); - el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT; + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val); return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; } diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 947bd201435c..3808d3d75055 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -2,6 +2,7 @@ #include #include #include +#include #define MDSCR_KDE (1 << 13) #define MDSCR_MDE (1 << 15) @@ -284,7 +285,7 @@ static int debug_version(struct kvm_vcpu *vcpu) uint64_t id_aa64dfr0; vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); - return id_aa64dfr0 & 0xf; + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } static void test_guest_debug_exceptions(void) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..7c96b931edd5 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -11,6 +11,7 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -486,9 +487,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = ((val >> 28) & 0xf) != 0xf; - *ps64k = ((val >> 24) & 0xf) == 0; - *ps16k = ((val >> 20) & 0xf) != 0; + *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf; + *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0; + *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0; close(vcpu_fd); close(vm_fd); From f6d02aa28ae21161d64300bac62b2dde85584004 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:55 -0700 Subject: [PATCH 1326/4122] KVM: arm64: selftests: Add write_dbg{b,w}{c,v}r helpers in debug-exceptions Introduce helpers in the debug-exceptions test to write to dbg{b,w}{c,v}r registers. Those helpers will be useful for test cases that will be added to the test in subsequent patches. No functional change intended. Signed-off-by: Reiji Watanabe Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-3-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 72 +++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 3808d3d75055..d9884907fe87 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -30,6 +30,69 @@ static volatile uint64_t svc_addr; static volatile uint64_t ss_addr[4], ss_idx; #define PC(v) ((uint64_t)&(v)) +#define GEN_DEBUG_WRITE_REG(reg_name) \ +static void write_##reg_name(int num, uint64_t val) \ +{ \ + switch (num) { \ + case 0: \ + write_sysreg(val, reg_name##0_el1); \ + break; \ + case 1: \ + write_sysreg(val, reg_name##1_el1); \ + break; \ + case 2: \ + write_sysreg(val, reg_name##2_el1); \ + break; \ + case 3: \ + write_sysreg(val, reg_name##3_el1); \ + break; \ + case 4: \ + write_sysreg(val, reg_name##4_el1); \ + break; \ + case 5: \ + write_sysreg(val, reg_name##5_el1); \ + break; \ + case 6: \ + write_sysreg(val, reg_name##6_el1); \ + break; \ + case 7: \ + write_sysreg(val, reg_name##7_el1); \ + break; \ + case 8: \ + write_sysreg(val, reg_name##8_el1); \ + break; \ + case 9: \ + write_sysreg(val, reg_name##9_el1); \ + break; \ + case 10: \ + write_sysreg(val, reg_name##10_el1); \ + break; \ + case 11: \ + write_sysreg(val, reg_name##11_el1); \ + break; \ + case 12: \ + write_sysreg(val, reg_name##12_el1); \ + break; \ + case 13: \ + write_sysreg(val, reg_name##13_el1); \ + break; \ + case 14: \ + write_sysreg(val, reg_name##14_el1); \ + break; \ + case 15: \ + write_sysreg(val, reg_name##15_el1); \ + break; \ + default: \ + GUEST_ASSERT(0); \ + } \ +} + +/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */ +GEN_DEBUG_WRITE_REG(dbgbcr) +GEN_DEBUG_WRITE_REG(dbgbvr) +GEN_DEBUG_WRITE_REG(dbgwcr) +GEN_DEBUG_WRITE_REG(dbgwvr) + static void reset_debug_state(void) { asm volatile("msr daifset, #8"); @@ -61,8 +124,9 @@ static void install_wp(uint64_t addr) uint32_t mdscr; wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_sysreg(wcr, dbgwcr0_el1); - write_sysreg(addr, dbgwvr0_el1); + write_dbgwcr(0, wcr); + write_dbgwvr(0, addr); + isb(); asm volatile("msr daifclr, #8"); @@ -78,8 +142,8 @@ static void install_hw_bp(uint64_t addr) uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_sysreg(bcr, dbgbcr0_el1); - write_sysreg(addr, dbgbvr0_el1); + write_dbgbcr(0, bcr); + write_dbgbvr(0, addr); isb(); asm volatile("msr daifclr, #8"); From 700b8860e02cbaa7dd1181a914ff38e0fae18bf0 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:56 -0700 Subject: [PATCH 1327/4122] KVM: arm64: selftests: Remove the hard-coded {b,w}pn#0 from debug-exceptions Remove the hard-coded {break,watch}point #0 from the guest_code() in debug-exceptions to allow {break,watch}point number to be specified. Change reset_debug_state() to zeroing all dbg{b,w}{c,v}r_el0 registers so that guest_code() can use the function to reset those registers even when non-zero {break,watch}points are specified for guest_code(). Subsequent patches will add test cases for non-zero {break,watch}points. Signed-off-by: Reiji Watanabe Reviewed-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-4-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index d9884907fe87..608a6c8db9a2 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -95,6 +95,9 @@ GEN_DEBUG_WRITE_REG(dbgwvr) static void reset_debug_state(void) { + uint8_t brps, wrps, i; + uint64_t dfr0; + asm volatile("msr daifset, #8"); write_sysreg(0, osdlr_el1); @@ -102,11 +105,20 @@ static void reset_debug_state(void) isb(); write_sysreg(0, mdscr_el1); - /* This test only uses the first bp and wp slot. */ - write_sysreg(0, dbgbvr0_el1); - write_sysreg(0, dbgbcr0_el1); - write_sysreg(0, dbgwcr0_el1); - write_sysreg(0, dbgwvr0_el1); + + /* Reset all bcr/bvr/wcr/wvr registers */ + dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0); + for (i = 0; i <= brps; i++) { + write_dbgbcr(i, 0); + write_dbgbvr(i, 0); + } + wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0); + for (i = 0; i <= wrps; i++) { + write_dbgwcr(i, 0); + write_dbgwvr(i, 0); + } + isb(); } @@ -118,14 +130,14 @@ static void enable_os_lock(void) GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); } -static void install_wp(uint64_t addr) +static void install_wp(uint8_t wpn, uint64_t addr) { uint32_t wcr; uint32_t mdscr; wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_dbgwcr(0, wcr); - write_dbgwvr(0, addr); + write_dbgwcr(wpn, wcr); + write_dbgwvr(wpn, addr); isb(); @@ -136,14 +148,14 @@ static void install_wp(uint64_t addr) isb(); } -static void install_hw_bp(uint64_t addr) +static void install_hw_bp(uint8_t bpn, uint64_t addr) { uint32_t bcr; uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_dbgbcr(0, bcr); - write_dbgbvr(0, addr); + write_dbgbcr(bpn, bcr); + write_dbgbvr(bpn, addr); isb(); asm volatile("msr daifclr, #8"); @@ -166,7 +178,7 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(void) +static void guest_code(uint8_t bpn, uint8_t wpn) { GUEST_SYNC(0); @@ -179,7 +191,7 @@ static void guest_code(void) /* Hardware-breakpoint */ reset_debug_state(); - install_hw_bp(PC(hw_bp)); + install_hw_bp(bpn, PC(hw_bp)); asm volatile("hw_bp: nop"); GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); @@ -187,7 +199,7 @@ static void guest_code(void) /* Hardware-breakpoint + svc */ reset_debug_state(); - install_hw_bp(PC(bp_svc)); + install_hw_bp(bpn, PC(bp_svc)); asm volatile("bp_svc: svc #0"); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); @@ -196,7 +208,7 @@ static void guest_code(void) /* Hardware-breakpoint + software-breakpoint */ reset_debug_state(); - install_hw_bp(PC(bp_brk)); + install_hw_bp(bpn, PC(bp_brk)); asm volatile("bp_brk: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); @@ -205,7 +217,7 @@ static void guest_code(void) /* Watchpoint */ reset_debug_state(); - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); @@ -239,7 +251,7 @@ static void guest_code(void) /* OS Lock blocking hardware-breakpoint */ reset_debug_state(); enable_os_lock(); - install_hw_bp(PC(hw_bp2)); + install_hw_bp(bpn, PC(hw_bp2)); hw_bp_addr = 0; asm volatile("hw_bp2: nop"); GUEST_ASSERT_EQ(hw_bp_addr, 0); @@ -251,7 +263,7 @@ static void guest_code(void) enable_os_lock(); write_data = '\0'; wp_data_addr = 0; - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, 0); @@ -376,6 +388,8 @@ static void test_guest_debug_exceptions(void) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); + /* Run tests with breakpoint#0 and watchpoint#0. */ + vcpu_args_set(vcpu, 2, 0, 0); for (stage = 0; stage < 11; stage++) { vcpu_run(vcpu); From 152880d8edf5ad6df5b4b4915a4d9f9085ab8fef Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:57 -0700 Subject: [PATCH 1328/4122] KVM: arm64: selftests: Add helpers to enable debug exceptions Add helpers to enable breakpoint and watchpoint exceptions. No functional change intended. Signed-off-by: Reiji Watanabe Reviewed-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-5-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 608a6c8db9a2..0c237022f4d3 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -130,17 +130,10 @@ static void enable_os_lock(void) GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); } -static void install_wp(uint8_t wpn, uint64_t addr) +static void enable_monitor_debug_exceptions(void) { - uint32_t wcr; uint32_t mdscr; - wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_dbgwcr(wpn, wcr); - write_dbgwvr(wpn, addr); - - isb(); - asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; @@ -148,21 +141,29 @@ static void install_wp(uint8_t wpn, uint64_t addr) isb(); } +static void install_wp(uint8_t wpn, uint64_t addr) +{ + uint32_t wcr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_dbgwcr(wpn, wcr); + write_dbgwvr(wpn, addr); + + isb(); + + enable_monitor_debug_exceptions(); +} + static void install_hw_bp(uint8_t bpn, uint64_t addr) { uint32_t bcr; - uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; write_dbgbcr(bpn, bcr); write_dbgbvr(bpn, addr); isb(); - asm volatile("msr daifclr, #8"); - - mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr, mdscr_el1); - isb(); + enable_monitor_debug_exceptions(); } static void install_ss(void) From 948f439c9d0080972ec937f4aefbe51229546510 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:58 -0700 Subject: [PATCH 1329/4122] KVM: arm64: selftests: Stop unnecessary test stage tracking of debug-exceptions Currently, debug-exceptions test unnecessarily tracks some test stages using GUEST_SYNC(). The code for it needs to be updated as test cases are added or removed. Stop doing the unnecessary stage tracking, as they are not so useful and are a bit pain to maintain. Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-6-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 46 ++++--------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 0c237022f4d3..040e4d7f8755 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -181,23 +181,17 @@ static volatile char write_data; static void guest_code(uint8_t bpn, uint8_t wpn) { - GUEST_SYNC(0); - /* Software-breakpoint */ reset_debug_state(); asm volatile("sw_bp: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); - GUEST_SYNC(1); - /* Hardware-breakpoint */ reset_debug_state(); install_hw_bp(bpn, PC(hw_bp)); asm volatile("hw_bp: nop"); GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); - GUEST_SYNC(2); - /* Hardware-breakpoint + svc */ reset_debug_state(); install_hw_bp(bpn, PC(bp_svc)); @@ -205,8 +199,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); - GUEST_SYNC(3); - /* Hardware-breakpoint + software-breakpoint */ reset_debug_state(); install_hw_bp(bpn, PC(bp_brk)); @@ -214,8 +206,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); - GUEST_SYNC(4); - /* Watchpoint */ reset_debug_state(); install_wp(wpn, PC(write_data)); @@ -223,8 +213,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); - GUEST_SYNC(5); - /* Single-step */ reset_debug_state(); install_ss(); @@ -238,8 +226,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); - GUEST_SYNC(6); - /* OS Lock does not block software-breakpoint */ reset_debug_state(); enable_os_lock(); @@ -247,8 +233,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) asm volatile("sw_bp2: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); - GUEST_SYNC(7); - /* OS Lock blocking hardware-breakpoint */ reset_debug_state(); enable_os_lock(); @@ -257,8 +241,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) asm volatile("hw_bp2: nop"); GUEST_ASSERT_EQ(hw_bp_addr, 0); - GUEST_SYNC(8); - /* OS Lock blocking watchpoint */ reset_debug_state(); enable_os_lock(); @@ -269,8 +251,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn) GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, 0); - GUEST_SYNC(9); - /* OS Lock blocking single-step */ reset_debug_state(); enable_os_lock(); @@ -370,7 +350,6 @@ static void test_guest_debug_exceptions(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - int stage; vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); @@ -391,23 +370,16 @@ static void test_guest_debug_exceptions(void) /* Run tests with breakpoint#0 and watchpoint#0. */ vcpu_args_set(vcpu, 2, 0, 0); - for (stage = 0; stage < 11; stage++) { - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Stage %d: Unexpected sync ucall, got %lx", - stage, (ulong)uc.args[1]); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); } done: From 5dd544e882d96d43b363c5ef64683281f2a386d9 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:41:59 -0700 Subject: [PATCH 1330/4122] KVM: arm64: selftests: Change debug_version() to take ID_AA64DFR0_EL1 Change debug_version() to take the ID_AA64DFR0_EL1 value instead of vcpu as an argument, and change its callsite to read ID_AA64DFR0_EL1 (and pass it to debug_version()). Subsequent patches will reuse the register value in the callsite. No functional change intended. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-7-reijiw@google.com --- tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 040e4d7f8755..72ec9bb16682 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -337,11 +337,8 @@ static void guest_code_ss(int test_cnt) GUEST_DONE(); } -static int debug_version(struct kvm_vcpu *vcpu) +static int debug_version(uint64_t id_aa64dfr0) { - uint64_t id_aa64dfr0; - - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } @@ -466,9 +463,11 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; int opt; int ss_iteration = 10000; + uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - __TEST_REQUIRE(debug_version(vcpu) >= 6, + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); From 142365932f5f296df593dd653d79194ff5457722 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:42:00 -0700 Subject: [PATCH 1331/4122] KVM: arm64: selftests: Add a test case for a linked breakpoint Currently, the debug-exceptions test doesn't have a test case for a linked breakpoint. Add a test case for the linked breakpoint to the test. The new test case uses a pair of breakpoints. One is the higiest numbered context-aware breakpoint (for Context ID match), and the other one is the breakpoint#0 (for Address Match), which is linked to the context-aware breakpoint. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-8-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 63 +++++++++++++++++-- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 72ec9bb16682..362e7668a978 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -12,6 +12,10 @@ #define DBGBCR_EXEC (0x0 << 3) #define DBGBCR_EL1 (0x1 << 1) #define DBGBCR_E (0x1 << 0) +#define DBGBCR_LBN_SHIFT 16 +#define DBGBCR_BT_SHIFT 20 +#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT) +#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT) #define DBGWCR_LEN8 (0xff << 5) #define DBGWCR_RD (0x1 << 3) @@ -22,7 +26,7 @@ #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) -extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; extern unsigned char iter_ss_begin, iter_ss_end; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; @@ -105,6 +109,7 @@ static void reset_debug_state(void) isb(); write_sysreg(0, mdscr_el1); + write_sysreg(0, contextidr_el1); /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); @@ -166,6 +171,31 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr) enable_monitor_debug_exceptions(); } +void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t addr_bcr, ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* + * Setup a normal breakpoint for Linked Address Match, and link it + * to the context-aware breakpoint. + */ + addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_ADDR_LINK_CTX | + ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + write_dbgbcr(addr_bp, addr_bcr); + write_dbgbvr(addr_bp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + static void install_ss(void) { uint32_t mdscr; @@ -179,8 +209,10 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(uint8_t bpn, uint8_t wpn) +static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { + uint64_t ctx = 0xabcdef; /* a random context number */ + /* Software-breakpoint */ reset_debug_state(); asm volatile("sw_bp: brk #0"); @@ -263,6 +295,17 @@ static void guest_code(uint8_t bpn, uint8_t wpn) : : : "x0"); GUEST_ASSERT_EQ(ss_addr[0], 0); + /* Linked hardware-breakpoint */ + hw_bp_addr = 0; + reset_debug_state(); + install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + asm volatile("hw_bp_ctx: nop"); + write_sysreg(0, contextidr_el1); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); + GUEST_DONE(); } @@ -342,11 +385,12 @@ static int debug_version(uint64_t id_aa64dfr0) return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } -static void test_guest_debug_exceptions(void) +static void test_guest_debug_exceptions(uint64_t aa64dfr0) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; + uint8_t brp_num; vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); @@ -365,8 +409,15 @@ static void test_guest_debug_exceptions(void) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - /* Run tests with breakpoint#0 and watchpoint#0. */ - vcpu_args_set(vcpu, 2, 0, 0); + /* Number of breakpoints */ + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); + + /* + * Run tests with breakpoint#0, watchpoint#0, and the higiest + * numbered (context-aware) breakpoint. + */ + vcpu_args_set(vcpu, 3, 0, 0, brp_num - 1); vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { @@ -483,7 +534,7 @@ int main(int argc, char *argv[]) } } - test_guest_debug_exceptions(); + test_guest_debug_exceptions(aa64dfr0); test_single_step_from_userspace(ss_iteration); return 0; From 5ced4e533b676b1a582d89aba5328e4b316957e0 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:42:01 -0700 Subject: [PATCH 1332/4122] KVM: arm64: selftests: Add a test case for a linked watchpoint Currently, the debug-exceptions test doesn't have a test case for a linked watchpoint. Add a test case for the linked watchpoint to the test. The new test case uses the highest numbered context-aware breakpoint (for Context ID match), and the watchpoint#0, which is linked to the context-aware breakpoint. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-9-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 362e7668a978..73a95e6b345e 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -22,6 +22,9 @@ #define DBGWCR_WR (0x2 << 3) #define DBGWCR_EL1 (0x1 << 1) #define DBGWCR_E (0x1 << 0) +#define DBGWCR_LBN_SHIFT 16 +#define DBGWCR_WT_SHIFT 20 +#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT) #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) @@ -171,6 +174,28 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr) enable_monitor_debug_exceptions(); } +static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t wcr; + uint64_t ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | + DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + write_dbgwcr(addr_wp, wcr); + write_dbgwvr(addr_wp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, uint64_t ctx) { @@ -306,6 +331,16 @@ static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) write_sysreg(0, contextidr_el1); GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); + /* Linked watchpoint */ + reset_debug_state(); + install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + GUEST_DONE(); } From ebb8cc10316de3040efc4cfb40030f374cbbaa3b Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Wed, 19 Oct 2022 22:42:02 -0700 Subject: [PATCH 1333/4122] KVM: arm64: selftests: Test with every breakpoint/watchpoint Currently, the debug-exceptions test always uses only {break,watch}point#0 and the highest numbered context-aware breakpoint. Modify the test to use all {break,watch}points and context-aware breakpoints supported on the system. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020054202.2119018-10-reijiw@google.com --- .../selftests/kvm/aarch64/debug-exceptions.c | 54 ++++++++++++++----- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 73a95e6b345e..b30add3e7726 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -420,12 +420,11 @@ static int debug_version(uint64_t id_aa64dfr0) return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } -static void test_guest_debug_exceptions(uint64_t aa64dfr0) +static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint8_t brp_num; vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); @@ -444,15 +443,9 @@ static void test_guest_debug_exceptions(uint64_t aa64dfr0) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; - __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); - - /* - * Run tests with breakpoint#0, watchpoint#0, and the higiest - * numbered (context-aware) breakpoint. - */ - vcpu_args_set(vcpu, 3, 0, 0, brp_num - 1); + /* Specify bpn/wpn/ctx_bpn to be tested */ + vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); + pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn); vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { @@ -535,6 +528,43 @@ void test_single_step_from_userspace(int test_cnt) kvm_vm_free(vm); } +/* + * Run debug testing using the various breakpoint#, watchpoint# and + * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. + */ +void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +{ + uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + int b, w, c; + + /* Number of breakpoints */ + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); + + /* Number of watchpoints */ + wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1; + + /* Number of context aware breakpoints */ + ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1; + + pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, + brp_num, wrp_num, ctx_brp_num); + + /* Number of normal (non-context aware) breakpoints */ + normal_brp_num = brp_num - ctx_brp_num; + + /* Lowest context aware breakpoint number */ + ctx_brp_base = normal_brp_num; + + /* Run tests with all supported breakpoints/watchpoints */ + for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) { + for (b = 0; b < normal_brp_num; b++) { + for (w = 0; w < wrp_num; w++) + test_guest_debug_exceptions(b, w, c); + } + } +} + static void help(char *name) { puts(""); @@ -569,7 +599,7 @@ int main(int argc, char *argv[]) } } - test_guest_debug_exceptions(aa64dfr0); + test_guest_debug_exceptions_all(aa64dfr0); test_single_step_from_userspace(ss_iteration); return 0; From a93871d0ea9fd59fb5eb783619334183d7f07f51 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:21 +0000 Subject: [PATCH 1334/4122] KVM: selftests: Add a userfaultfd library Move the generic userfaultfd code out of demand_paging_test.c into a common library, userfaultfd_util. This library consists of a setup and a stop function. The setup function starts a thread for handling page faults using the handler callback function. This setup returns a uffd_desc object which is then used in the stop function (to wait and destroy the threads). Reviewed-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-2-ricarkol@google.com --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/demand_paging_test.c | 230 +++--------------- .../selftests/kvm/include/userfaultfd_util.h | 45 ++++ .../selftests/kvm/lib/userfaultfd_util.c | 186 ++++++++++++++ 4 files changed, 263 insertions(+), 199 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/userfaultfd_util.h create mode 100644 tools/testing/selftests/kvm/lib/userfaultfd_util.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..08a2606aff33 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/userfaultfd_util.c LIBKVM_STRING += lib/string_override.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..8e1fe4ffcccd 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -22,23 +22,13 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" +#include "userfaultfd_util.h" #ifdef __NR_userfaultfd -#ifdef PRINT_PER_PAGE_UPDATES -#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - -#ifdef PRINT_PER_VCPU_UPDATES -#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + static size_t demand_paging_size; static char *guest_data_prototype; @@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ts_diff.tv_sec, ts_diff.tv_nsec); } -static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) +static int handle_uffd_page_request(int uffd_mode, int uffd, + struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); + uint64_t addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -116,157 +108,6 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) return 0; } -bool quit_uffd_thread; - -struct uffd_handler_args { - int uffd_mode; - int uffd; - int pipefd; - useconds_t delay; -}; - -static void *uffd_handler_thread_fn(void *arg) -{ - struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; - int uffd = uffd_args->uffd; - int pipefd = uffd_args->pipefd; - useconds_t delay = uffd_args->delay; - int64_t pages = 0; - struct timespec start; - struct timespec ts_diff; - - clock_gettime(CLOCK_MONOTONIC, &start); - while (!quit_uffd_thread) { - struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; - int r; - uint64_t addr; - - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; - - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } - - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); - TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); - return NULL; - } - - if (!(pollfd[0].revents & POLLIN)) - continue; - - r = read(uffd, &msg, sizeof(msg)); - if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; - } - - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } - - if (!(msg.event & UFFD_EVENT_PAGEFAULT)) - continue; - - if (delay) - usleep(delay); - addr = msg.arg.pagefault.address; - r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); - if (r < 0) - return NULL; - pages++; - } - - ts_diff = timespec_elapsed(start); - PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", - pages, ts_diff.tv_sec, ts_diff.tv_nsec, - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - - return NULL; -} - -static void setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd, - int uffd_mode, useconds_t uffd_delay, - struct uffd_handler_args *uffd_args, - void *hva, void *alias, uint64_t len) -{ - bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); - int uffd; - struct uffdio_api uffdio_api; - struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; - - PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", - is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); - - /* In order to get minor faults, prefault via the alias. */ - if (is_minor) { - size_t p; - - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; - - TEST_ASSERT(alias != NULL, "Alias required for minor faults"); - for (p = 0; p < (len / demand_paging_size); ++p) { - memcpy(alias + (p * demand_paging_size), - guest_data_prototype, demand_paging_size); - } - } - - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); - - uffdio_api.api = UFFD_API; - uffdio_api.features = 0; - ret = ioctl(uffd, UFFDIO_API, &uffdio_api); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); - - uffdio_register.range.start = (uint64_t)hva; - uffdio_register.range.len = len; - uffdio_register.mode = uffd_mode; - ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); - TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == - expected_ioctls, "missing userfaultfd ioctls"); - - uffd_args->uffd_mode = uffd_mode; - uffd_args->uffd = uffd; - uffd_args->pipefd = pipefd; - uffd_args->delay = uffd_delay; - pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, - uffd_args); - - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); -} - struct test_params { int uffd_mode; useconds_t uffd_delay; @@ -274,16 +115,25 @@ struct test_params { bool partition_vcpu_memory_access; }; +static void prefault_mem(void *alias, uint64_t len) +{ + size_t p; + + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); + for (p = 0; p < (len / demand_paging_size); ++p) { + memcpy(alias + (p * demand_paging_size), + guest_data_prototype, demand_paging_size); + } +} + static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *uffd_handler_threads = NULL; - struct uffd_handler_args *uffd_args = NULL; + struct uffd_desc **uffd_descs = NULL; struct timespec start; struct timespec ts_diff; - int *pipefds = NULL; struct kvm_vm *vm; - int r, i; + int i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode) { - uffd_handler_threads = - malloc(nr_vcpus * sizeof(*uffd_handler_threads)); - TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - - uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); - TEST_ASSERT(uffd_args, "Memory allocation failed"); - - pipefds = malloc(sizeof(int) * nr_vcpus * 2); - TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); + uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + TEST_ASSERT(uffd_descs, "Memory allocation failed"); for (i = 0; i < nr_vcpus; i++) { struct perf_test_vcpu_args *vcpu_args; @@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); + prefault_mem(vcpu_alias, + vcpu_args->pages * perf_test_args.guest_page_size); + /* * Set up user fault fd to handle demand paging * requests. */ - r = pipe2(&pipefds[i * 2], - O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!r, "Failed to set up pipefd"); - - setup_demand_paging(vm, &uffd_handler_threads[i], - pipefds[i * 2], p->uffd_mode, - p->uffd_delay, &uffd_args[i], - vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + uffd_descs[i] = uffd_setup_demand_paging( + p->uffd_mode, p->uffd_delay, vcpu_hva, + vcpu_args->pages * perf_test_args.guest_page_size, + &handle_uffd_page_request); } } @@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { - char c; - /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) { - r = write(pipefds[i * 2 + 1], &c, 1); - TEST_ASSERT(r == 1, "Unable to write to pipefd"); - - pthread_join(uffd_handler_threads[i], NULL); - } + for (i = 0; i < nr_vcpus; i++) + uffd_stop_demand_paging(uffd_descs[i]); } pr_info("Total guest execution time: %ld.%.9lds\n", @@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_destroy_vm(vm); free(guest_data_prototype); - if (p->uffd_mode) { - free(uffd_handler_threads); - free(uffd_args); - free(pipefds); - } + if (p->uffd_mode) + free(uffd_descs); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h new file mode 100644 index 000000000000..877449c34592 --- /dev/null +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM userfaultfd util + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include +#include +#include +#include + +#include "test_util.h" + +typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); + +struct uffd_desc { + int uffd_mode; + int uffd; + int pipefds[2]; + useconds_t delay; + uffd_handler_t handler; + pthread_t thread; +}; + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler); + +void uffd_stop_demand_paging(struct uffd_desc *uffd); + +#ifdef PRINT_PER_PAGE_UPDATES +#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) +#endif + +#ifdef PRINT_PER_VCPU_UPDATES +#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) +#endif diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c new file mode 100644 index 000000000000..3b44846fc277 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM userfaultfd util + * Adapted from demand_paging_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "userfaultfd_util.h" + +#ifdef __NR_userfaultfd + +static void *uffd_handler_thread_fn(void *arg) +{ + struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; + int uffd = uffd_desc->uffd; + int pipefd = uffd_desc->pipefds[0]; + useconds_t delay = uffd_desc->delay; + int64_t pages = 0; + struct timespec start; + struct timespec ts_diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { + struct uffd_msg msg; + struct pollfd pollfd[2]; + char tmp_chr; + int r; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd; + pollfd[1].events = POLLIN; + + r = poll(pollfd, 2, -1); + switch (r) { + case -1: + pr_info("poll err"); + continue; + case 0: + continue; + case 1: + break; + default: + pr_info("Polling uffd returned %d", r); + return NULL; + } + + if (pollfd[0].revents & POLLERR) { + pr_info("uffd revents has POLLERR"); + return NULL; + } + + if (pollfd[1].revents & POLLIN) { + r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(r == 1, + "Error reading pipefd in UFFD thread\n"); + return NULL; + } + + if (!(pollfd[0].revents & POLLIN)) + continue; + + r = read(uffd, &msg, sizeof(msg)); + if (r == -1) { + if (errno == EAGAIN) + continue; + pr_info("Read of uffd got errno %d\n", errno); + return NULL; + } + + if (r != sizeof(msg)) { + pr_info("Read on uffd returned unexpected size: %d bytes", r); + return NULL; + } + + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) + continue; + + if (delay) + usleep(delay); + r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); + if (r < 0) + return NULL; + pages++; + } + + ts_diff = timespec_elapsed(start); + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", + pages, ts_diff.tv_sec, ts_diff.tv_nsec, + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + + return NULL; +} + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler) +{ + struct uffd_desc *uffd_desc; + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); + int uffd; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + int ret; + + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", + is_minor ? "MINOR" : "MISSING", + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + + uffd_desc = malloc(sizeof(struct uffd_desc)); + TEST_ASSERT(uffd_desc, "malloc failed"); + + /* In order to get minor faults, prefault via the alias. */ + if (is_minor) + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, + "ioctl UFFDIO_API failed: %" PRIu64, + (uint64_t)uffdio_api.api); + + uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.len = len; + uffdio_register.mode = uffd_mode; + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, + "ioctl UFFDIO_REGISTER failed"); + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == + expected_ioctls, "missing userfaultfd ioctls"); + + ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd"); + + uffd_desc->uffd_mode = uffd_mode; + uffd_desc->uffd = uffd; + uffd_desc->delay = delay; + uffd_desc->handler = handler; + pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, + uffd_desc); + + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", + hva, hva + len); + + return uffd_desc; +} + +void uffd_stop_demand_paging(struct uffd_desc *uffd) +{ + char c = 0; + int ret; + + ret = write(uffd->pipefds[1], &c, 1); + TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + + ret = pthread_join(uffd->thread, NULL); + TEST_ASSERT(ret == 0, "Pthread_join failed."); + + close(uffd->uffd); + + close(uffd->pipefds[1]); + close(uffd->pipefds[0]); + + free(uffd); +} + +#endif /* __NR_userfaultfd */ From 228f324dc718f702e8777164c4e2e7426824fb13 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:22 +0000 Subject: [PATCH 1335/4122] KVM: selftests: aarch64: Add virt_get_pte_hva() library function Add a library function to get the PTE (a host virtual address) of a given GVA. This will be used in a future commit by a test to clear and check the access flag of a particular page. Reviewed-by: Oliver Upton Reviewed-by: Andrew Jones Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-3-ricarkol@google.com --- .../selftests/kvm/include/aarch64/processor.h | 2 ++ tools/testing/selftests/kvm/lib/aarch64/processor.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index a8124f9dd68a..df4bfac69551 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -109,6 +109,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); + static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..63ef3c78e55e 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -138,7 +138,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; @@ -169,11 +169,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) TEST_FAIL("Page table levels must be 2, 3, or 4"); } - return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + return ptep; unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(1); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep = virt_get_pte_hva(vm, gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) From b6b03b86c0250a80b671313dbc0d7bcdbab78f41 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:23 +0000 Subject: [PATCH 1336/4122] KVM: selftests: Add missing close and munmap in __vm_mem_region_delete() Deleting a memslot (when freeing a VM) is not closing the backing fd, nor it's unmapping the alias mapping. Fix by adding the missing close and munmap. Reviewed-by: Andrew Jones Reviewed-by: Oliver Upton Reviewed-by: Ben Gardon Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-4-ricarkol@google.com --- tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..19e37fb7de7c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -586,6 +586,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + if (region->fd >= 0) { + /* There's an extra map when using shared memory. */ + ret = munmap(region->mmap_alias, region->mmap_size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + close(region->fd); + } free(region); } From 41f5189ea9c08f7fc28340a7aefc93d0d2dcb769 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:24 +0000 Subject: [PATCH 1337/4122] KVM: selftests: aarch64: Construct DEFAULT_MAIR_EL1 using sysreg.h macros Define macros for memory type indexes and construct DEFAULT_MAIR_EL1 with macros from asm/sysreg.h. The index macros can then be used when constructing PTEs (instead of using raw numbers). Reviewed-by: Andrew Jones Reviewed-by: Oliver Upton Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-5-ricarkol@google.com --- .../selftests/kvm/include/aarch64/processor.h | 25 ++++++++++++++----- .../selftests/kvm/lib/aarch64/processor.c | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index df4bfac69551..c1ddca8db225 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -38,12 +38,25 @@ * NORMAL 4 1111:1111 * NORMAL_WT 5 1011:1011 */ -#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ - (0x04ul << (1 * 8)) | \ - (0x0cul << (2 * 8)) | \ - (0x44ul << (3 * 8)) | \ - (0xfful << (4 * 8)) | \ - (0xbbul << (5 * 8))) + +/* Linux doesn't use these memory types, so let's define them. */ +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 + +#define DEFAULT_MAIR_EL1 \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #define MPIDR_HWID_BITMASK (0xff00fffffful) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 63ef3c78e55e..26f0eccff6fe 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -133,7 +133,7 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + uint64_t attr_idx = MT_NORMAL; _virt_pg_map(vm, vaddr, paddr, attr_idx); } From 590b949597b1e811d35df2f32021dd17d8e47f8c Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:25 +0000 Subject: [PATCH 1338/4122] tools: Copy bitfield.h from the kernel sources Copy bitfield.h from include/linux/bitfield.h. A subsequent change will make use of some FIELD_{GET,PREP} macros defined in this header. The header was copied as-is, no changes needed. Cc: Jakub Kicinski Cc: Arnaldo Carvalho de Melo Reviewed-by: Oliver Upton Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-6-ricarkol@google.com --- tools/include/linux/bitfield.h | 176 +++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 tools/include/linux/bitfield.h diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h new file mode 100644 index 000000000000..6093fa6db260 --- /dev/null +++ b/tools/include/linux/bitfield.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Felix Fietkau + * Copyright (C) 2004 - 2009 Ivo van Doorn + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include +#include + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + +/** + * FIELD_FIT() - check if value fits in the field + * @_mask: shifted mask defining the field's length and position + * @_val: value to test against the field + * + * Return: true if @_val can fit inside @_mask, false if @_val is too big. + */ +#define FIELD_FIT(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ + !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +extern void __compiletime_error("value doesn't fit into mask") +__field_overflow(void); +extern void __compiletime_error("bad bitfield mask") +__bad_mask(void); +static __always_inline u64 field_multiplier(u64 field) +{ + if ((field | (field - 1)) & ((field | (field - 1)) + 1)) + __bad_mask(); + return field & -field; +} +static __always_inline u64 field_mask(u64 field) +{ + return field / field_multiplier(field); +} +#define field_max(field) ((typeof(field))field_mask(field)) +#define ____MAKE_OP(type,base,to,from) \ +static __always_inline __##type type##_encode_bits(base v, base field) \ +{ \ + if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ + __field_overflow(); \ + return to((v & field_mask(field)) * field_multiplier(field)); \ +} \ +static __always_inline __##type type##_replace_bits(__##type old, \ + base val, base field) \ +{ \ + return (old & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline void type##p_replace_bits(__##type *p, \ + base val, base field) \ +{ \ + *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline base type##_get_bits(__##type v, base field) \ +{ \ + return (from(v) & field)/field_multiplier(field); \ +} +#define __MAKE_OP(size) \ + ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ + ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ + ____MAKE_OP(u##size,u##size,,) +____MAKE_OP(u8,u8,,) +__MAKE_OP(16) +__MAKE_OP(32) +__MAKE_OP(64) +#undef __MAKE_OP +#undef ____MAKE_OP + +#endif From bd3ed7e1a47eb7b3838ca09439f1eb289ec3be1f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:26 +0000 Subject: [PATCH 1339/4122] KVM: selftests: Stash backing_src_type in struct userspace_mem_region Add the backing_src_type into struct userspace_mem_region. This struct already stores a lot of info about memory regions, except the backing source type. This info will be used by a future commit in order to determine the method for punching a hole. Reviewed-by: Oliver Upton Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-7-ricarkol@google.com --- tools/testing/selftests/kvm/include/kvm_util_base.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..a9264ed22cca 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -34,6 +34,7 @@ struct userspace_mem_region { struct sparsebit *unused_phy_pages; int fd; off_t offset; + enum vm_mem_backing_src_type backing_src_type; void *host_mem; void *host_alias; void *mmap_start; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 19e37fb7de7c..6affce47e899 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -929,6 +929,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->name); } + region->backing_src_type = src_type; region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); From 290c5b54012b7f05e9c51af32d557574bf69a654 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:27 +0000 Subject: [PATCH 1340/4122] KVM: selftests: Add vm->memslots[] and enum kvm_mem_region_type The vm_create() helpers are hardcoded to place most page types (code, page-tables, stacks, etc) in the same memslot #0, and always backed with anonymous 4K. There are a couple of issues with that. First, tests willing to differ a bit, like placing page-tables in a different backing source type must replicate much of what's already done by the vm_create() functions. Second, the hardcoded assumption of memslot #0 holding most things is spread everywhere; this makes it very hard to change. Fix the above issues by having selftests specify how they want memory to be laid out. Start by changing ____vm_create() to not create memslot #0; a test (to come) will specify all memslots used by the VM. Then, add the vm->memslots[] array to specify the right memslot for different memory allocators, e.g.,: lib/elf should use the vm->[MEM_REGION_CODE] memslot. This will be used as a way to specify the page-tables memslots (to be backed by huge pages for example). There is no functional change intended. The current commit lays out memory exactly as before. A future commit will change the allocators to get the region they should be using, e.g.,: like the page table allocators using the pt memslot. Cc: Sean Christopherson Cc: Andrew Jones Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Reviewed-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-8-ricarkol@google.com --- .../selftests/kvm/include/kvm_util_base.h | 26 +++++++++++++++++-- tools/testing/selftests/kvm/lib/kvm_util.c | 18 +++++++------ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a9264ed22cca..6442aa9e9061 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -65,6 +65,14 @@ struct userspace_mem_regions { DECLARE_HASHTABLE(slot_hash, 9); }; +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + struct kvm_vm { int mode; unsigned long type; @@ -93,6 +101,13 @@ struct kvm_vm { int stats_fd; struct kvm_stats_header stats_header; struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; }; @@ -105,6 +120,13 @@ struct kvm_vm { struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot); +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 @@ -647,13 +669,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode); struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT, 0); + return ____vm_create(VM_MODE_DEFAULT); } static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6affce47e899..f3dfa4e9ee0f 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) +struct kvm_vm *____vm_create(enum vm_guest_mode mode) { struct kvm_vm *vm; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); - vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); - if (nr_pages != 0) - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - 0, 0, nr_pages, 0); return vm; } @@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); struct kvm_vm *vm; + int i; - vm = ____vm_create(mode, nr_pages); + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); + + vm = ____vm_create(mode); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; kvm_vm_elf_load(vm, program_invocation_name); From 5485e822e31a75dfac3713d94b6b22025d4895da Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:28 +0000 Subject: [PATCH 1341/4122] KVM: selftests: Fix alignment in virt_arch_pgd_alloc() and vm_vaddr_alloc() Refactor virt_arch_pgd_alloc() and vm_vaddr_alloc() in both RISC-V and aarch64 to fix the alignment of parameters in a couple of calls. This will make it easier to fix the alignment in a future commit that adds an extra parameter (that happens to be very long). No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-9-ricarkol@google.com --- .../selftests/kvm/lib/aarch64/processor.c | 27 ++++++++++--------- .../selftests/kvm/lib/riscv/processor.c | 27 ++++++++++--------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 26f0eccff6fe..6ff2b9d6cea6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -76,13 +76,14 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + vm->pgd_created = true; } static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -325,13 +326,15 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + size_t stack_size; + uint64_t stack_vaddr; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + aarch64_vcpu_setup(vcpu, init); vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 604478151212..ac7fc9d317db 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -55,13 +55,14 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + vm->pgd_created = true; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -279,15 +280,17 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { int r; - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + size_t stack_size; + unsigned long stack_vaddr; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); From 1446e331432d7f24ed56b870ad605a4345fee43f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:29 +0000 Subject: [PATCH 1342/4122] KVM: selftests: Use the right memslot for code, page-tables, and data allocations Now that kvm_vm allows specifying different memslots for code, page tables, and data, use the appropriate memslot when making allocations in common/libraty code. Change them accordingly: - code (allocated by lib/elf) use the CODE memslot - stacks, exception tables, and other core data pages (like the TSS in x86) use the DATA memslot - page tables and the PGD use the PT memslot - test data (anything allocated with vm_vaddr_alloc()) uses the TEST_DATA memslot No functional change intended. All allocators keep using memslot #0. Cc: Sean Christopherson Cc: Andrew Jones Signed-off-by: Ricardo Koller Reviewed-by: Sean Christopherson Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-10-ricarkol@google.com --- .../selftests/kvm/include/kvm_util_base.h | 4 ++ .../selftests/kvm/lib/aarch64/processor.c | 12 ++-- tools/testing/selftests/kvm/lib/elf.c | 3 +- tools/testing/selftests/kvm/lib/kvm_util.c | 57 ++++++++++++------- .../selftests/kvm/lib/riscv/processor.c | 8 ++- .../selftests/kvm/lib/s390x/processor.c | 8 ++- .../selftests/kvm/lib/x86_64/processor.c | 13 +++-- 7 files changed, 65 insertions(+), 40 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 6442aa9e9061..b0da75af1ff3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -407,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6ff2b9d6cea6..2883dfd1ad49 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -82,7 +82,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); vm->pgd_created = true; } @@ -332,8 +333,9 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); aarch64_vcpu_setup(vcpu, init); @@ -438,8 +440,8 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size); + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..51f280c412ba 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); + vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, + MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f3dfa4e9ee0f..5ad4acaec8e0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1226,32 +1226,15 @@ va_found: return pgidx_start * vm->page_size; } -/* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. - */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, 0); + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type]); /* * Find an unused range of virtual page addresses of at least @@ -1272,6 +1255,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) return vaddr_start; } +/* + * VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. The allocated physical space comes from the TEST_DATA memory region. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +{ + return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); +} + /* * VM Virtual Address Allocate Pages * @@ -1291,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +{ + return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); +} + /* * VM Virtual Address Allocate Page * @@ -1856,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) { - return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); } /* diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index ac7fc9d317db..d146ca71e0c0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -61,7 +61,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); vm->pgd_created = true; } @@ -288,8 +289,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 89d7340d9cbd..15945121daf1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); vm->pgd = paddr; @@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..b199dde90e9f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -552,7 +552,7 @@ unmapped_gva: static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) { if (!vm->gdt) - vm->gdt = vm_vaddr_alloc_page(vm); + vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); dt->base = vm->gdt; dt->limit = getpagesize(); @@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, int selector) { if (!vm->tss) - vm->tss = vm_vaddr_alloc_page(vm); + vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); memset(segp, 0, sizeof(*segp)); segp->base = vm->tss; @@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vm_vaddr_t stack_vaddr; struct kvm_vcpu *vcpu; - stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) extern void *idt_handlers; int i; - vm->idt = vm_vaddr_alloc_page(vm); - vm->handlers = vm_vaddr_alloc_page(vm); + vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, From 35c5810157124cb71aaa939cd2d5508192714877 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:30 +0000 Subject: [PATCH 1343/4122] KVM: selftests: aarch64: Add aarch64/page_fault_test Add a new test for stage 2 faults when using different combinations of guest accesses (e.g., write, S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on hugetlbfs with a hole). The next commits will add different handling methods and more faults (e.g., uffd and dirty logging). This first commit starts by adding two sanity checks for all types of accesses: AF setting by the hw, and accessing memslots with holes. Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-11-ricarkol@google.com --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/page_fault_test.c | 594 ++++++++++++++++++ .../selftests/kvm/include/aarch64/processor.h | 8 + 4 files changed, 604 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..4a30d684e208 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -4,6 +4,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/hypercalls +/aarch64/page_fault_test /aarch64/psci_test /aarch64/vcpu_width_config /aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 08a2606aff33..50c30335460f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -153,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c new file mode 100644 index 000000000000..28859a96053f --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "guest_modes.h" +#include "userfaultfd_util.h" + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA (TEST_GVA + 0x8) +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_DATA (1ULL << 3) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + uint32_t pt_memslot_flags; + uint32_t data_memslot_flags; + bool skip; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT(guest_check_lse()); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + par = read_sysreg(par_el1); + isb(); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0" :: "r" (guest_test_memory)); + dsb(ish); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + dsb(ish); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, regs->pc); +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool punch_hole_in_backing_store(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + void *hva = (void *)region->region.userspace_addr; + uint64_t paging_size = region->region.memory_size; + int ret, fd = region->fd; + + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, paging_size); + TEST_ASSERT(ret == 0, "fallocate failed\n"); + } else { + ret = madvise(hva, paging_size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed\n"); + } + + return true; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + struct userspace_mem_region *data_region, *pt_region; + bool continue_test = true; + + data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_backing_store(vm, pt_region); + if (cmd & CMD_HOLE_DATA) + continue_test = punch_hole_in_backing_store(vm, data_region); + + return continue_test; +} + +typedef uint32_t aarch64_insn_t; +extern aarch64_insn_t __exec_test[2]; + +noinline void __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +/* + * Note that this function runs on the host before the test VM starts: there's + * no need to sync the D$ and I$ caches. + */ +static void load_exec_code_for_test(struct kvm_vm *vm) +{ + uint64_t *code; + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + assert(TEST_EXEC_GVA > TEST_GVA); + code = hva + TEST_EXEC_GVA - TEST_GVA; + memcpy(code, __exec_test, sizeof(__exec_test)); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_DABT, no_dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_IABT, no_iabt_handler); +} + +static void setup_gva_maps(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + uint64_t pte_gpa; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + /* Map TEST_GVA first. This will install a new PTE. */ + virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); + /* Then map TEST_PTE_GVA to the above PTE. */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +enum pf_test_memslots { + CODE_AND_DATA_MEMSLOT, + PAGE_TABLE_MEMSLOT, + TEST_DATA_MEMSLOT, +}; + +/* + * Create a memslot for code and data at pfn=0, and test-data and PT ones + * at max_gfn. + */ +static void setup_memslots(struct kvm_vm *vm, struct test_params *p) +{ + uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm->page_size; + uint64_t max_gfn = vm_compute_max_gfn(vm); + /* Enough for 2M of code when using 4K guest pages. */ + uint64_t code_npages = 512; + uint64_t pt_size, data_size, data_gpa; + + /* + * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using + * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 + * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use + * twice that just in case. + */ + pt_size = 26 * guest_page_size; + + /* memslot sizes and gpa's must be aligned to the backing page size */ + pt_size = align_up(pt_size, backing_src_pagesz); + data_size = align_up(guest_page_size, backing_src_pagesz); + data_gpa = (max_gfn * guest_page_size) - data_size; + data_gpa = align_down(data_gpa, backing_src_pagesz); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, + CODE_AND_DATA_MEMSLOT, code_npages, 0); + vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; + vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, + PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, + p->test_desc->pt_memslot_flags); + vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, + data_size / guest_page_size, + p->test_desc->data_memslot_flags); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +/* + * This function either succeeds, skips the test (after setting test->skip), or + * fails with a TEST_FAIL that aborts all tests. + */ +static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + struct ucall uc; + + for (;;) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + test->skip = true; + goto done; + } + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + print_test_banner(mode, p); + + vm = ____vm_create(mode); + setup_memslots(vm, p); + kvm_vm_elf_load(vm, program_invocation_name); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + setup_gva_maps(vm); + + ucall_init(vm, NULL); + + load_exec_code_for_test(vm); + setup_abort_handlers(vm, vcpu, test); + vcpu_args_set(vcpu, 1, test); + + vcpu_run_loop(vm, vcpu, test); + + ucall_uninit(vm); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ +} + +static struct test_desc tests[] = { + + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Punch a hole in the data backing store, and then try multiple + * accesses: reads should rturn zeroes, and writes should + * re-populate the page. Moreover, the test also check that no + * exception was generated in the guest. Note that this + * reading/writing behavior is the same as reading/writing a + * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from + * userspace. + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + + { 0 } +}; + +static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + setbuf(stdout, NULL); + + src_type = DEFAULT_VM_MEM_SRC; + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index c1ddca8db225..5f977528e09c 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -105,11 +105,19 @@ enum { #define ESR_EC_MASK (ESR_EC_NUM - 1) #define ESR_EC_SVC64 0x15 +#define ESR_EC_IABT 0x21 +#define ESR_EC_DABT 0x25 #define ESR_EC_HW_BP_CURRENT 0x31 #define ESR_EC_SSTEP_CURRENT 0x33 #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k); From 3b1d915659c64dce079f4926a648f2271faea008 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:31 +0000 Subject: [PATCH 1344/4122] KVM: selftests: aarch64: Add userfaultfd tests into page_fault_test Add some userfaultfd tests into page_fault_test. Punch holes into the data and/or page-table memslots, perform some accesses, and check that the faults are taken (or not taken) when expected. Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-12-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 28859a96053f..8ecc2ac8c476 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -35,6 +35,12 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; #define PREPARE_FN_NR 10 #define CHECK_FN_NR 10 +static struct event_cnt { + int uffd_faults; + /* uffd_faults is incremented from multiple threads. */ + pthread_mutex_t uffd_faults_mutex; +} events; + struct test_desc { const char *name; uint64_t mem_mark_cmd; @@ -42,11 +48,14 @@ struct test_desc { bool (*guest_prepare[PREPARE_FN_NR])(void); void (*guest_test)(void); void (*guest_test_check[CHECK_FN_NR])(void); + uffd_handler_t uffd_pt_handler; + uffd_handler_t uffd_data_handler; void (*dabt_handler)(struct ex_regs *regs); void (*iabt_handler)(struct ex_regs *regs); uint32_t pt_memslot_flags; uint32_t data_memslot_flags; bool skip; + struct event_cnt expected_events; }; struct test_params { @@ -263,7 +272,110 @@ static void no_iabt_handler(struct ex_regs *regs) GUEST_ASSERT_1(false, regs->pc); } +static struct uffd_args { + char *copy; + void *hva; + uint64_t paging_size; +} pt_args, data_args; + /* Returns true to continue the test, and false if it should be skipped. */ +static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, + struct uffd_args *args, bool expect_write) +{ + uint64_t addr = msg->arg.pagefault.address; + uint64_t flags = msg->arg.pagefault.flags; + struct uffdio_copy copy; + int ret; + + TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, + "The only expected UFFD mode is MISSING"); + ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write); + ASSERT_EQ(addr, (uint64_t)args->hva); + + pr_debug("uffd fault: addr=%p write=%d\n", + (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); + + copy.src = (uint64_t)args->copy; + copy.dst = addr; + copy.len = args->paging_size; + copy.mode = 0; + + ret = ioctl(uffd, UFFDIO_COPY, ©); + if (ret == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", + addr, errno); + return ret; + } + + pthread_mutex_lock(&events.uffd_faults_mutex); + events.uffd_faults += 1; + pthread_mutex_unlock(&events.uffd_faults_mutex); + return 0; +} + +static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &pt_args, true); +} + +static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, true); +} + +static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, false); +} + +static void setup_uffd_args(struct userspace_mem_region *region, + struct uffd_args *args) +{ + args->hva = (void *)region->region.userspace_addr; + args->paging_size = region->region.memory_size; + + args->copy = malloc(args->paging_size); + TEST_ASSERT(args->copy, "Failed to allocate data copy."); + memcpy(args->copy, args->hva, args->paging_size); +} + +static void setup_uffd(struct kvm_vm *vm, struct test_params *p, + struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) +{ + struct test_desc *test = p->test_desc; + int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); + + *pt_uffd = NULL; + if (test->uffd_pt_handler) + *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, + pt_args.hva, + pt_args.paging_size, + test->uffd_pt_handler); + + *data_uffd = NULL; + if (test->uffd_data_handler) + *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, + data_args.hva, + data_args.paging_size, + test->uffd_data_handler); +} + +static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, + struct uffd_desc *data_uffd) +{ + if (test->uffd_pt_handler) + uffd_stop_demand_paging(pt_uffd); + if (test->uffd_data_handler) + uffd_stop_demand_paging(data_uffd); + + free(pt_args.copy); + free(data_args.copy); +} + +/* Returns false if the test should be skipped. */ static bool punch_hole_in_backing_store(struct kvm_vm *vm, struct userspace_mem_region *region) { @@ -404,6 +516,11 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p) vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; } +static void check_event_counts(struct test_desc *test) +{ + ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); +} + static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) { struct test_desc *test = p->test_desc; @@ -414,6 +531,11 @@ static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) vm_mem_backing_src_alias(p->src_type)->name); } +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + /* * This function either succeeds, skips the test (after setting test->skip), or * fails with a TEST_FAIL that aborts all tests. @@ -453,6 +575,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_desc *test = p->test_desc; struct kvm_vm *vm; struct kvm_vcpu *vcpu; + struct uffd_desc *pt_uffd, *data_uffd; print_test_banner(mode, p); @@ -465,7 +588,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) ucall_init(vm, NULL); + reset_event_counts(); + + /* + * Set some code in the data memslot for the guest to execute (only + * applicable to the EXEC tests). This has to be done before + * setup_uffd() as that function copies the memslot data for the uffd + * handler. + */ load_exec_code_for_test(vm); + setup_uffd(vm, p, &pt_uffd, &data_uffd); setup_abort_handlers(vm, vcpu, test); vcpu_args_set(vcpu, 1, test); @@ -473,6 +605,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) ucall_uninit(vm); kvm_vm_free(vm); + free_uffd(test, pt_uffd, data_uffd); + + /* + * Make sure we check the events after the uffd threads have exited, + * which means they updated their respective event counters. + */ + if (!test->skip) + check_event_counts(test); } static void help(char *name) @@ -488,6 +628,7 @@ static void help(char *name) #define SNAME(s) #s #define SCAT2(a, b) SNAME(a ## _ ## b) #define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) +#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) #define _CHECK(_test) _CHECK_##_test #define _PREPARE(_test) _PREPARE_##_test @@ -515,6 +656,21 @@ static void help(char *name) .mem_mark_cmd = _mark_cmd, \ .guest_test = _access, \ .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD(_access, _with_af, _mark_cmd, \ + _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ +{ \ + .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = _uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ } static struct test_desc tests[] = { @@ -545,6 +701,37 @@ static struct test_desc tests[] = { TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + /* + * Punch holes in the data and PT backing stores and mark them for + * userfaultfd handling. This should result in 2 faults: the access + * on the data backing store, and its respective S1 page table walk + * (S1PTW). + */ + TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* no_af should also lead to a PT write. */ + TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* Note how that cas invokes the read handler. */ + TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* + * Can't test guest_at with_af as it's IMPDEF whether the AF is set. + * The S1PTW fault should still be marked as a write. + */ + TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 1), + TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + { 0 } }; From a4edf25b3e25656c69cbc768d1c704868e4a616f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:32 +0000 Subject: [PATCH 1345/4122] KVM: selftests: aarch64: Add dirty logging tests into page_fault_test Add some dirty logging tests into page_fault_test. Mark the data and/or page-table memory regions for dirty logging, perform some accesses, and check that the dirty log bits are set or clean when expected. Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-13-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 8ecc2ac8c476..a36001143aff 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -31,6 +31,11 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; #define CMD_SKIP_TEST (1ULL << 1) #define CMD_HOLE_PT (1ULL << 2) #define CMD_HOLE_DATA (1ULL << 3) +#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) +#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) +#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) +#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) +#define CMD_SET_PTE_AF (1ULL << 8) #define PREPARE_FN_NR 10 #define CHECK_FN_NR 10 @@ -213,6 +218,21 @@ static void guest_check_pte_af(void) GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); } +static void guest_check_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_no_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); +} + static void guest_exec(void) { int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; @@ -395,6 +415,22 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, return true; } +static bool check_write_in_dirty_log(struct kvm_vm *vm, + struct userspace_mem_region *region, + uint64_t host_pg_nr) +{ + unsigned long *bmap; + bool first_page_dirty; + uint64_t size = region->region.memory_size; + + /* getpage_size() is not always equal to vm->page_size */ + bmap = bitmap_zalloc(size / getpagesize()); + kvm_vm_get_dirty_log(vm, region->region.slot, bmap); + first_page_dirty = test_bit(host_pg_nr, bmap); + free(bmap); + return first_page_dirty; +} + /* Returns true to continue the test, and false if it should be skipped. */ static bool handle_cmd(struct kvm_vm *vm, int cmd) { @@ -411,6 +447,18 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) continue_test = punch_hole_in_backing_store(vm, pt_region); if (cmd & CMD_HOLE_DATA) continue_test = punch_hole_in_backing_store(vm, data_region); + if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), + "Missing write in dirty log"); + if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0), + "Missing s1ptw write in dirty log"); + if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), + "Unexpected write in dirty log"); + if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0), + "Unexpected s1ptw write in dirty log"); return continue_test; } @@ -673,6 +721,19 @@ static void help(char *name) .expected_events = { .uffd_faults = _uffd_faults, }, \ } +#define TEST_DIRTY_LOG(_access, _with_af, _test_check) \ +{ \ + .name = SCAT3(dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af), _test_check, \ + guest_check_s1ptw_wr_in_dirty_log}, \ + .expected_events = { 0 }, \ +} + static struct test_desc tests[] = { /* Check that HW is setting the Access Flag (AF) (sanity checks). */ @@ -732,6 +793,21 @@ static struct test_desc tests[] = { TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, uffd_data_read_handler, uffd_pt_write_handler, 2), + /* + * Try accesses when the data and PT memory regions are both + * tracked for dirty logging. + */ + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + { 0 } }; From 45acde40f538a30e759f3b3f4aa5089edf097b2f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:33 +0000 Subject: [PATCH 1346/4122] KVM: selftests: aarch64: Add readonly memslot tests into page_fault_test Add some readonly memslot tests into page_fault_test. Mark the data and/or page-table memory regions as readonly, perform some accesses, and check that the right fault is triggered when expected (e.g., a store with no write-back should lead to an mmio exit). Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-14-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 102 +++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index a36001143aff..727f4f2b6cc4 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -41,6 +41,8 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; #define CHECK_FN_NR 10 static struct event_cnt { + int mmio_exits; + int fail_vcpu_runs; int uffd_faults; /* uffd_faults is incremented from multiple threads. */ pthread_mutex_t uffd_faults_mutex; @@ -57,6 +59,8 @@ struct test_desc { uffd_handler_t uffd_data_handler; void (*dabt_handler)(struct ex_regs *regs); void (*iabt_handler)(struct ex_regs *regs); + void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); + void (*fail_vcpu_run_handler)(int ret); uint32_t pt_memslot_flags; uint32_t data_memslot_flags; bool skip; @@ -415,6 +419,31 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, return true; } +static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + + memcpy(hva, run->mmio.data, run->mmio.len); + events.mmio_exits += 1; +} + +static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + uint64_t data; + + memcpy(&data, run->mmio.data, sizeof(data)); + pr_debug("addr=%lld len=%d w=%d data=%lx\n", + run->mmio.phys_addr, run->mmio.len, + run->mmio.is_write, data); + TEST_FAIL("There was no MMIO exit expected."); +} + static bool check_write_in_dirty_log(struct kvm_vm *vm, struct userspace_mem_region *region, uint64_t host_pg_nr) @@ -463,6 +492,18 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) return continue_test; } +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure\n"); +} + +void fail_vcpu_run_mmio_no_syndrome_handler(int ret) +{ + TEST_ASSERT(errno == ENOSYS, + "The mmio handler should have returned not implemented."); + events.fail_vcpu_runs += 1; +} + typedef uint32_t aarch64_insn_t; extern aarch64_insn_t __exec_test[2]; @@ -564,9 +605,20 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p) vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; } +static void setup_default_handlers(struct test_desc *test) +{ + if (!test->mmio_handler) + test->mmio_handler = mmio_no_handler; + + if (!test->fail_vcpu_run_handler) + test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; +} + static void check_event_counts(struct test_desc *test) { ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); } static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) @@ -591,10 +643,18 @@ static void reset_event_counts(void) static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, struct test_desc *test) { + struct kvm_run *run; struct ucall uc; + int ret; + + run = vcpu->run; for (;;) { - vcpu_run(vcpu); + ret = _vcpu_run(vcpu); + if (ret) { + test->fail_vcpu_run_handler(ret); + goto done; + } switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: @@ -608,6 +668,10 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, break; case UCALL_DONE: goto done; + case UCALL_NONE: + if (run->exit_reason == KVM_EXIT_MMIO) + test->mmio_handler(vm, run); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } @@ -647,6 +711,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) load_exec_code_for_test(vm); setup_uffd(vm, p, &pt_uffd, &data_uffd); setup_abort_handlers(vm, vcpu, test); + setup_default_handlers(test); vcpu_args_set(vcpu, 1, test); vcpu_run_loop(vm, vcpu, test); @@ -734,6 +799,25 @@ static void help(char *name) .expected_events = { 0 }, \ } +#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_test = _access, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + static struct test_desc tests[] = { /* Check that HW is setting the Access Flag (AF) (sanity checks). */ @@ -808,6 +892,22 @@ static struct test_desc tests[] = { TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + /* + * Try accesses when the data memory region is marked read-only + * (with KVM_MEM_READONLY). Writes with a syndrome result in an + * MMIO exit, writes with no syndrome (e.g., CAS) result in a + * failed vcpu run, and reads/execs with and without syndroms do + * not fault. + */ + TEST_RO_MEMSLOT(guest_read64, 0, 0), + TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), + TEST_RO_MEMSLOT(guest_at, 0, 0), + TEST_RO_MEMSLOT(guest_exec, 0, 0), + TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + { 0 } }; From ff2b5509e1d252cd18bb1430b5461d5044701559 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 17 Oct 2022 19:58:34 +0000 Subject: [PATCH 1347/4122] KVM: selftests: aarch64: Add mix of tests into page_fault_test Add some mix of tests into page_fault_test: memory regions with all the pairwise combinations of read-only, userfaultfd, and dirty-logging. For example, writing into a read-only region which has a hole handled with userfaultfd. Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221017195834.2295901-15-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 727f4f2b6cc4..05bb6a6369c2 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -399,6 +399,12 @@ static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, free(data_args.copy); } +static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) +{ + TEST_FAIL("There was no UFFD fault expected."); + return -1; +} + /* Returns false if the test should be skipped. */ static bool punch_hole_in_backing_store(struct kvm_vm *vm, struct userspace_mem_region *region) @@ -799,6 +805,22 @@ static void help(char *name) .expected_events = { 0 }, \ } +#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ + _uffd_faults, _test_check) \ +{ \ + .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test_check = { _CHECK(_with_af), _test_check }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + #define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ { \ .name = SCAT3(ro_memslot, _access, _with_af), \ @@ -818,6 +840,59 @@ static void help(char *name) .expected_events = { .fail_vcpu_runs = 1 }, \ } +#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ + _test_check) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits}, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ +{ \ + .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ + _uffd_data_handler, _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_uffd, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits, \ + .uffd_faults = _uffd_faults }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ + _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1, \ + .uffd_faults = _uffd_faults }, \ +} + static struct test_desc tests[] = { /* Check that HW is setting the Access Flag (AF) (sanity checks). */ @@ -892,6 +967,35 @@ static struct test_desc tests[] = { TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + /* + * Access when the data and PT memory regions are both marked for + * dirty logging and UFFD at the same time. The expected result is + * that writes should mark the dirty log and trigger a userfaultfd + * write fault. Reads/execs should result in a read userfaultfd + * fault, and nothing in the dirty log. Any S1PTW should result in + * a write in the dirty log and a userfaultfd write. + */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler, + 2, guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2, + guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, + uffd_data_write_handler, 2, + guest_check_write_in_dirty_log), + /* * Try accesses when the data memory region is marked read-only * (with KVM_MEM_READONLY). Writes with a syndrome result in an @@ -908,6 +1012,57 @@ static struct test_desc tests[] = { TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + /* + * Access when both the data region is both read-only and marked + * for dirty logging at the same time. The expected result is that + * for writes there should be no write in the dirty log. The + * readonly handling is the same as if the memslot was not marked + * for dirty logging: writes with a syndrome result in an MMIO + * exit, and writes with no syndrome result in a failed vcpu run. + */ + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, + 1, guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, + guest_check_no_write_in_dirty_log), + + /* + * Access when the data region is both read-only and punched with + * holes tracked with userfaultfd. The expected result is the + * union of both userfaultfd and read-only behaviors. For example, + * write accesses result in a userfaultfd write fault and an MMIO + * exit. Writes with no syndrome result in a failed vcpu run and + * no userfaultfd write fault. Reads result in userfaultfd getting + * triggered. + */ + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, + uffd_data_write_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, + uffd_no_handler, 1), + { 0 } }; From 579d7ebe90a332cc5b6c02db9250fd0816a64f63 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:06 +0000 Subject: [PATCH 1348/4122] KVM: arm64: Fix kvm init failure when mode!=vhe and VA_BITS=52. For nvhe and protected modes, the hyp stage 1 page-tables were previously configured to have the same number of VA bits as the kernel's idmap. However, for kernel configs with VA_BITS=52 and where the kernel is loaded in physical memory below 48 bits, the idmap VA bits is actually smaller than the kernel's normal stage 1 VA bits. This can lead to kernel addresses that can't be mapped into the hypervisor, leading to kvm initialization failure during boot: kvm [1]: IPA Size Limit: 48 bits kvm [1]: Cannot map world-switch code kvm [1]: error initializing Hyp mode: -34 Fix this by ensuring that the hyp stage 1 VA size is the maximum of what's used for the idmap and the regular kernel stage 1. At the same time, refactor the code so that the hyp VA bits is only calculated in one place. Prior to 7ba8f2b2d652, the idmap was always 52 bits for a 52 VA bits kernel and therefore the hyp stage1 was also always 52 bits. Fixes: 7ba8f2b2d652 ("arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-2-ryan.roberts@arm.com --- arch/arm64/kvm/arm.c | 20 +++----------------- arch/arm64/kvm/mmu.c | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..803055da3ee3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1518,7 +1518,7 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_prepare_hyp_mode(int cpu) +static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; @@ -1534,23 +1534,9 @@ static void cpu_prepare_hyp_mode(int cpu) params->mair_el2 = read_sysreg(mair_el1); - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; tcr &= ~TCR_T0SZ_MASK; - tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + tcr |= TCR_T0SZ(hyp_va_bits); params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -2054,7 +2040,7 @@ static int init_hyp_mode(void) } /* Prepare the CPU initialization parameters */ - cpu_prepare_hyp_mode(cpu); + cpu_prepare_hyp_mode(cpu, hyp_va_bits); } if (is_protected_kvm_enabled()) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..4efb983cff43 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1618,6 +1618,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { int kvm_mmu_init(u32 *hyp_va_bits) { int err; + u32 idmap_bits; + u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -1631,7 +1633,31 @@ int kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS_MIN, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, in some cases the ID map may be configured for fewer than + * the number of VA bits used by the regular kernel stage 1. This + * happens when VA_BITS=52 and the kernel image is placed in PA space + * below 48 bits. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits to assure that the hypervisor can both ID map its code page + * and map any kernel memory. + */ + idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + kernel_bits = vabits_actual; + *hyp_va_bits = max(idmap_bits, kernel_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", From a0d37784bfd7f699986ba3a64cfeb68a03cb7fd0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:07 +0000 Subject: [PATCH 1349/4122] KVM: arm64: Fix PAR_TO_HPFAR() to work independently of PA_BITS. Kernel configs with PAGE_SIZE=64KB and PA_BITS=48 still advertise 52 bit IPA space on HW that implements LPA. This is by design (admitedly this is a very unlikely configuration in the real world). However on such a config, attempting to create a vm with the guest kernel placed above 48 bits in IPA space results in misbehaviour due to the hypervisor incorrectly interpretting a faulting IPA. Fix up PAR_TO_HPFAR() to always take 52 bits out of the PAR rather than masking to CONFIG_ARM64_PA_BITS. If the system has a smaller implemented PARange this should be safe because the bits are res0. A more robust approach would be to discover the IPA size in use by the page-table and mask based on that, to avoid relying on res0 reading back as zero. But this information is difficult to access safely from the code's location, so take the easy way out. Fixes: bc1d7de8c550 ("kvm: arm64: Add 52bit support for PAR to HPFAR conversoin") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-3-ryan.roberts@arm.com --- arch/arm64/include/asm/kvm_arm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..a82f2493a72b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -340,9 +340,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } From 4554bac48a8c464ff00136a64efe8847e4da4ea8 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:09:59 -0500 Subject: [PATCH 1350/4122] RDMA/rxe: Add ibdev_dbg macros for rxe Add macros borrowed from siw to call dynamic debug macro ibdev_dbg. Link: https://lore.kernel.org/r/20221103171013.20659-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 30fbdf3bc76a..ab334900fcc3 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -38,6 +38,25 @@ #define RXE_ROCE_V2_SPORT (0xc000) +#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \ + "%s: " fmt, __func__, ##__VA_ARGS__) +#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \ + "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \ + "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \ + "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \ + "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \ + "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \ + "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \ + "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \ + "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) + void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); From 27c4c520bd3908c095401e93c71e7d3696ae8bdd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:00 -0500 Subject: [PATCH 1351/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_comp.c Replace calls to pr_xxx() in rxe_comp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-3-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 66f392810c86..4dca4f8bbb5a 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -114,7 +114,7 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + rxe_dbg_qp(qp, "retransmit timer fired\n"); if (qp->valid) { qp->comp.timeout = 1; @@ -334,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -345,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -598,8 +598,7 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -746,8 +745,7 @@ int rxe_completer(void *arg) * rnr timer has fired */ qp->req.wait_for_rnr_timer = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + rxe_dbg_qp(qp, "set rnr nak timer\n"); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); From 52920f537ab08237bd8a3d30ccbb06a9d57717cf Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:01 -0500 Subject: [PATCH 1352/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_cq.c Replace calls to pr_xxx() in rxe_cq.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-4-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index b1a0ab3cd4bd..1df186534639 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int count; if (cqe <= 0) { - pr_warn("cqe(%d) <= 0\n", cqe); + rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe); goto err1; } if (cqe > rxe->attr.max_cqe) { - pr_debug("cqe(%d) > max_cqe(%d)\n", + rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n", cqe, rxe->attr.max_cqe); goto err1; } @@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, if (cq) { count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { - pr_debug("cqe(%d) < current # elements in queue (%d)", + rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)", cqe, count); goto err1; } @@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { - pr_warn("unable to create cq\n"); + rxe_dbg(rxe, "unable to create cq\n"); return -ENOMEM; } From 2778b72b1df0c8ef61e0b3b3ef1c8c62eb03fa75 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:02 -0500 Subject: [PATCH 1353/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c Replace calls to pr_xxx() in rxe_mr.c by rxe_dbg_mr(). Link: https://lore.kernel.org/r/20221103171013.20659-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 40 ++++++++++++--------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 ++ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index bc081002bddc..cd846cf82a84 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -38,8 +38,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) return 0; default: - pr_warn("%s: mr type (%d) not supported\n", - __func__, mr->ibmr.type); + rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type); return -EFAULT; } } @@ -125,8 +124,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { - pr_warn("%s: Unable to pin memory region err = %d\n", - __func__, (int)PTR_ERR(umem)); + rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n", + (int)PTR_ERR(umem)); err = PTR_ERR(umem); goto err_out; } @@ -137,8 +136,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("%s: Unable to allocate memory for map\n", - __func__); + rxe_dbg_mr(mr, "Unable to allocate memory for map\n"); goto err_release_umem; } @@ -159,8 +157,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("%s: Unable to get virtual address\n", - __func__); + rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; goto err_cleanup_map; } @@ -255,7 +252,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) void *addr; if (mr->state != RXE_MR_STATE_VALID) { - pr_warn("mr not in valid state\n"); + rxe_dbg_mr(mr, "Not in valid state\n"); addr = NULL; goto out; } @@ -266,7 +263,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) } if (mr_check_range(mr, iova, length)) { - pr_warn("range violation\n"); + rxe_dbg_mr(mr, "Range violation\n"); addr = NULL; goto out; } @@ -274,7 +271,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); if (offset + length > mr->map[m]->buf[n].size) { - pr_warn("crosses page boundary\n"); + rxe_dbg_mr(mr, "Crosses page boundary\n"); addr = NULL; goto out; } @@ -527,27 +524,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - pr_err("%s: No MR for key %#x\n", __func__, key); + rxe_dbg_mr(mr, "No MR for key %#x\n", key); ret = -EINVAL; goto err; } if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { - pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", - __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); + rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", + key, (mr->rkey ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } if (atomic_read(&mr->num_mw) > 0) { - pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", - __func__); + rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n"); ret = -EINVAL; goto err_drop_ref; } if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { - pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type); + rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type); ret = -EINVAL; goto err_drop_ref; } @@ -576,22 +572,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) /* user can only register MR in free state */ if (unlikely(mr->state != RXE_MR_STATE_FREE)) { - pr_warn("%s: mr->lkey = 0x%x not free\n", - __func__, mr->lkey); + rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey); return -EINVAL; } /* user can only register mr with qp in same protection domain */ if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { - pr_warn("%s: qp->pd and mr->pd don't match\n", - __func__); + rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n"); return -EINVAL; } /* user is only allowed to change key portion of l/rkey */ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { - pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", - __func__, key, mr->lkey); + rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n", + key, mr->lkey); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bcdfdadaebbc..510ae471ac7a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -875,6 +875,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; rxe_mr_init_dma(access, mr); rxe_finalize(mr); @@ -899,6 +900,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) @@ -930,6 +932,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_fast(max_num_sg, mr); if (err) From e8a87efdf87455454d0a14fd486c679769bfeee2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:03 -0500 Subject: [PATCH 1354/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mw.c Replace calls to pr_xxx() int rxe_mw.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-6-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mw.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 8df1c9066ed8..afa5ce1a7116 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, { if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 1 MW not in the valid state\n"); return -EINVAL; } /* o10-36.2.2 */ if (unlikely((mw->access & IB_ZERO_BASED))) { - pr_err_once("attempt to bind a zero based type 1 MW\n"); + rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n"); return -EINVAL; } } @@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->ibmw.type == IB_MW_TYPE_2) { /* o10-37.2.30 */ if (unlikely(mw->state != RXE_MW_STATE_FREE)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 2 MW not in the free state\n"); return -EINVAL; } /* C10-72 */ if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind type 2 MW with qp with different PD\n"); return -EINVAL; } /* o10-37.2.40 */ if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n"); return -EINVAL; } @@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, return 0; if (unlikely(mr->access & IB_ZERO_BASED)) { - pr_err_once("attempt to bind MW to zero based MR\n"); + rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n"); return -EINVAL; } /* C10-73 */ if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an MW to an MR without bind access\n"); return -EINVAL; } @@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((mw->access & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an Writable MW to an MR without local write access\n"); return -EINVAL; } @@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } @@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > (mr->ibmr.iova + mr->ibmr.length)))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; } From 34549e88e0a3088416177023abf1232fe40e721c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:04 -0500 Subject: [PATCH 1355/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_net.c Replace (some) calls to pr_xxx() in rxe_net.c with rxe_dbg_xxx(). Calls with a rxe device not yet in scope are left as is. Link: https://lore.kernel.org/r/20221103171013.20659-7-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 38 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c36cad9c7a66..e02e1624bcf4 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,9 +20,10 @@ static struct rxe_recv_sockets recv_sockets; -static struct dst_entry *rxe_find_route4(struct net_device *ndev, - struct in_addr *saddr, - struct in_addr *daddr) +static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, + struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) { struct rtable *rt; struct flowi4 fl = { { 0 } }; @@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, rt = ip_route_output_key(&init_net, &fl); if (IS_ERR(rt)) { - pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; } @@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, recv_sockets.sk6->sk, &fl6, NULL); if (IS_ERR(ndst)) { - pr_err_ratelimited("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; } if (unlikely(ndst->error)) { - pr_err("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); goto put; } @@ -77,7 +79,8 @@ put: #else -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; - dst = rxe_find_route4(ndev, saddr, daddr); + dst = rxe_find_route4(qp, ndev, saddr, daddr); } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; - dst = rxe_find_route6(ndev, saddr6, daddr6); + dst = rxe_find_route6(qp, ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = @@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else if (skb->protocol == htons(ETH_P_IPV6)) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); } else { - pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); + rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n", + skb->protocol); atomic_dec(&pkt->qp->skb_out); rxe_put(pkt->qp); kfree_skb(skb); @@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } if (unlikely(net_xmit_eval(err))) { - pr_debug("error sending packet: %d\n", err); + rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); return -EAGAIN; } @@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((is_request && (qp->req.state != QP_STATE_READY)) || (!is_request && (qp->resp.state != QP_STATE_READY))) { - pr_info("Packet dropped. QP is not in ready state\n"); + rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } @@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: @@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("ignoring netdev event = %ld for %s\n", + rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n", event, ndev->name); break; } From 6af70060d2e561d6479b33fe94cc2f38582e830b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:05 -0500 Subject: [PATCH 1356/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_qp.c Replace calls to pr_xxx() in rxe_qp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-8-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 65 ++++++++++++++---------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 3f6d62a80bea..bcbfe6068b8b 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid send wr = %u > %d\n", + rxe_dbg(rxe, "invalid send wr = %u > %d\n", cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_debug("invalid send sge = %u > %d\n", + rxe_dbg(rxe, "invalid send sge = %u > %d\n", cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid recv wr = %u > %d\n", + rxe_dbg(rxe, "invalid recv wr = %u > %d\n", cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_debug("invalid recv sge = %u > %d\n", + rxe_dbg(rxe, "invalid recv sge = %u > %d\n", cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_debug("invalid max inline data = %u > %d\n", + rxe_dbg(rxe, "invalid max inline data = %u > %d\n", cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_debug("missing cq\n"); + rxe_dbg(rxe, "missing cq\n"); goto err1; } @@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_debug("invalid port = %d\n", port_num); + rxe_dbg(rxe, "invalid port = %d\n", port_num); goto err1; } port = &rxe->port; if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_debug("GSI QP exists for port %d\n", port_num); + rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num); goto err1; } } @@ -264,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - type = QUEUE_TYPE_FROM_CLIENT; qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, type); @@ -395,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_debug("invalid mask or state for qp\n"); + rxe_dbg_qp(qp, "invalid mask or state\n"); goto err1; } @@ -409,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_debug("invalid port %d\n", attr->port_num); + rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); goto err1; } } @@ -424,11 +421,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_debug("invalid alt port %d\n", attr->alt_port_num); + rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_debug("invalid QP alt timeout %d > 31\n", + rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n", attr->alt_timeout); goto err1; } @@ -441,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, enum ib_mtu mtu = attr->path_mtu; if (mtu > max_mtu) { - pr_debug("invalid mtu (%d) > (%d)\n", + rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n", ib_mtu_enum_to_int(mtu), ib_mtu_enum_to_int(max_mtu)); goto err1; @@ -450,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_debug("invalid max_rd_atomic %d > %d\n", + rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n", attr->max_rd_atomic, rxe->attr.max_qp_rd_atom); goto err1; @@ -459,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_debug("invalid QP timeout %d > 31\n", attr->timeout); + rxe_dbg_qp(qp, "invalid timeout %d > 31\n", + attr->timeout); goto err1; } } @@ -637,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("qp#%d set retry count = %d\n", qp_num(qp), - attr->retry_cnt); + rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), - attr->rnr_retry); + rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), - qp->resp.psn); + rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), + rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n", attr->min_rnr_timer); } @@ -665,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); + rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -679,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp#%d state -> RESET\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RESET\n"); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp#%d state -> INIT\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> INIT\n"); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; qp->comp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp#%d state -> RTR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTR\n"); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp#%d state -> RTS\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTS\n"); qp->req.state = QP_STATE_READY; qp->comp.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp#%d state -> SQD\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQD\n"); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQE !!?\n"); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp#%d state -> ERR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> ERR\n"); rxe_qp_error(qp); break; } @@ -752,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) attr->sq_draining = 0; } - pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining); return 0; } @@ -764,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp) * will fail immediately. */ if (atomic_read(&qp->mcg_num)) { - pr_debug("Attempt to destroy QP while attached to multicast group\n"); + rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n"); return -EBUSY; } From 0edfb15e30a53e8bd039c35a17f61e49cba9f4dd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:06 -0500 Subject: [PATCH 1357/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_req.c Replace calls to pr_xxx() in rxe_req.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-9-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 41f1d84f0acb..4d45f508392f 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -100,7 +100,7 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + rxe_dbg_qp(qp, "nak timer fired\n"); /* request a send queue retry */ qp->req.need_retry = 1; @@ -595,7 +595,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) } break; default: - pr_err("Unexpected send wqe opcode %d\n", opcode); + rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode); wqe->status = IB_WC_LOC_QP_OP_ERR; return -EINVAL; } @@ -748,14 +748,14 @@ int rxe_requester(void *arg) av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { - pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed no address vector\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; goto err; } skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed allocating skb\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; if (ah) rxe_put(ah); @@ -764,7 +764,7 @@ int rxe_requester(void *arg) err = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(err)) { - pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); + rxe_dbg_qp(qp, "Error during finish packet\n"); if (err == -EFAULT) wqe->status = IB_WC_LOC_PROT_ERR; else From 74ddf7233c571d51bcb802bb192a9f7d77cd8830 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:07 -0500 Subject: [PATCH 1358/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_resp.c Replace calls to pr_xxx() in rxe_resp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-10-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 382d2053db43..6761bcd1d4d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -317,7 +317,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); - pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); + rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); return RESPST_ERR_MALFORMED_WQE; } size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); @@ -473,15 +473,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (rkey_is_mw(rkey)) { mw = rxe_lookup_mw(qp, access, rkey); if (!mw) { - pr_debug("%s: no MW matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } mr = mw->mr; if (!mr) { - pr_err("%s: MW doesn't have an MR\n", __func__); + rxe_dbg_qp(qp, "MW doesn't have an MR\n"); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -494,8 +493,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { - pr_debug("%s: no MR matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -1064,7 +1062,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending %s\n", msg); + rxe_dbg_qp(qp, "Failed sending %s\n", msg); return err; } @@ -1310,8 +1308,7 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - resp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); @@ -1468,7 +1465,7 @@ int rxe_responder(void *arg) case RESPST_ERROR: qp->resp.goto_error = 0; - pr_debug("qp#%d moved to error state\n", qp_num(qp)); + rxe_dbg_qp(qp, "moved to error state\n"); rxe_qp_error(qp); goto exit; From 0e6090024b3ebf8d162f82c542eba9632a9c85fc Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:08 -0500 Subject: [PATCH 1359/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_srq.c Replace calls to pr_xxx() in rxe_srq.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-11-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_srq.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 02b39498c370..82e37a41ced4 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) struct ib_srq_attr *attr = &init->attr; if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } @@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) attr->max_wr = RXE_MIN_SRQ_WR; if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n", attr->max_sge, rxe->attr.max_srq_sge); goto err1; } @@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, type = QUEUE_TYPE_FROM_CLIENT; q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { - pr_warn("unable to allocate queue for srq\n"); + rxe_dbg_srq(srq, "Unable to allocate queue\n"); return -ENOMEM; } @@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) { if (srq->error) { - pr_warn("srq in error state\n"); + rxe_dbg_srq(srq, "in error state\n"); goto err1; } if (mask & IB_SRQ_MAX_WR) { if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } if (srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", + rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n", attr->max_wr, srq->limit); goto err1; } @@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, if (mask & IB_SRQ_LIMIT) { if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n", attr->srq_limit, rxe->attr.max_srq_wr); goto err1; } if (attr->srq_limit > srq->rq.queue->buf->index_mask) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", + rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n", attr->srq_limit, srq->rq.queue->buf->index_mask); goto err1; From 14e501fdb0de3b45b8ee8a2a031c3c64aaa01817 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:09 -0500 Subject: [PATCH 1360/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_verbs.c Replace calls to pr_xxx() in rxe_verbs.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-12-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 510ae471ac7a..e6eca21c54e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1103,7 +1103,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) err = ib_register_device(dev, ibdev_name, NULL); if (err) - pr_warn("%s failed with error %d\n", __func__, err); + rxe_dbg(rxe, "failed with error %d\n", err); /* * Note that rxe may be invalid at this point if another thread From 25fd735a4c9e38c65904eea2769ed92f6f8586d0 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:10 -0500 Subject: [PATCH 1361/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_av.c Replace calls to pr_xxx() in rxe_av.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-13-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_av.c | 43 ++++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_loc.h | 8 ++--- drivers/infiniband/sw/rxe/rxe_qp.c | 4 +-- drivers/infiniband/sw/rxe/rxe_verbs.c | 13 ++++---- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 3b05314ca739..889d7adbd455 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); } -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) +static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah) { const struct ib_global_route *grh = rdma_ah_read_grh(attr); struct rxe_port *port; + struct rxe_dev *rxe; + struct rxe_qp *qp; + struct rxe_ah *ah; int type; + if (obj_is_ah) { + ah = obj; + rxe = to_rdev(ah->ibah.device); + } else { + qp = obj; + rxe = to_rdev(qp->ibqp.device); + } + port = &rxe->port; if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { if (grh->sgid_index > port->attr.gid_tbl_len) { - pr_warn("invalid sgid index = %d\n", - grh->sgid_index); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid sgid index = %d\n", + grh->sgid_index); + else + rxe_dbg_qp(qp, "invalid sgid index = %d\n", + grh->sgid_index); return -EINVAL; } type = rdma_gid_attr_network_type(grh->sgid_attr); if (type < RDMA_NETWORK_IPV4 || type > RDMA_NETWORK_IPV6) { - pr_warn("invalid network type for rdma_rxe = %d\n", - type); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n", + type); + else + rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n", + type); return -EINVAL; } } @@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) return 0; } +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr) +{ + return chk_attr(qp, attr, false); +} + +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr) +{ + return chk_attr(ah, attr, true); +} + void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { @@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); if (!ah) { - pr_warn("Unable to find AH matching ah_num\n"); + rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n"); return NULL; } if (rxe_ah_pd(ah) != pkt->qp->pd) { - pr_warn("PDs don't match for AH and QP\n"); + rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n"); rxe_put(ah); return NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index c2a5c8814a48..a22476d27b38 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -9,16 +9,12 @@ /* rxe_av.c */ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av); - -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr); - +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr); +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr); void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); - struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index bcbfe6068b8b..46f6c74ce00e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -414,11 +414,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; - if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; if (mask & IB_QP_ALT_PATH) { - if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + if (rxe_av_chk_attr(qp, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e6eca21c54e6..025b35bf014e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah, ah->is_user = false; } - err = rxe_av_chk_attr(rxe, init_attr->ah_attr); - if (err) - return err; - err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); if (err) @@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah, /* create index > 0 */ ah->ah_num = ah->elem.index; + err = rxe_ah_chk_attr(ah, init_attr->ah_attr); + if (err) { + rxe_cleanup(ah); + return err; + } + if (uresp) { /* only if new user provider */ err = copy_to_user(&uresp->ah_num, &ah->ah_num, @@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah, static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { int err; - struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_ah_chk_attr(ah, attr); if (err) return err; From fc50597934411170ed149d3368b0b733bc5119f1 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:11 -0500 Subject: [PATCH 1362/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_task.c Replace calls to pr_xxx() in rxe_task.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-14-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 0208d833a41b..60b90e33a884 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -29,6 +29,7 @@ static void do_task(struct tasklet_struct *t) int cont; int ret; struct rxe_task *task = from_tasklet(task, t, tasklet); + struct rxe_qp *qp = (struct rxe_qp *)task->arg; unsigned int iterations = RXE_MAX_ITERATIONS; spin_lock_bh(&task->lock); @@ -47,7 +48,7 @@ static void do_task(struct tasklet_struct *t) default: spin_unlock_bh(&task->lock); - pr_warn("%s failed with bad state %d\n", __func__, task->state); + rxe_dbg_qp(qp, "failed with bad state %d\n", task->state); return; } @@ -81,8 +82,8 @@ static void do_task(struct tasklet_struct *t) break; default: - pr_warn("%s failed with bad state %d\n", __func__, - task->state); + rxe_dbg_qp(qp, "failed with bad state %d\n", + task->state); } spin_unlock_bh(&task->lock); } while (cont); From c6aba5ea00550017629c56972b635f33bb6a6903 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:12 -0500 Subject: [PATCH 1363/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe.c Replace calls to pr_xxx() in rxe.c with rxe_dbg_xxx(). Calls with a rxe device not yet in scope are left as is. Link: https://lore.kernel.org/r/20221103171013.20659-15-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb..136c2efe3466 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); - pr_err("already configured on %s\n", ndev->name); + rxe_dbg(exists, "already configured on %s\n", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { - pr_err("failed to add %s\n", ndev->name); + rxe_dbg(exists, "failed to add %s\n", ndev->name); goto err; } err: From 813728043b79a11f7029ba196decfc7f576ae487 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:13 -0500 Subject: [PATCH 1364/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_icrc.c Replace calls to pr_xxx() in rxe_icrc.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-16-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_icrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 46bb07c5c4df..71bc2c189588 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe) tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { - pr_warn("failed to init crc32 algorithm err:%ld\n", + rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } @@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) *(__be32 *)shash_desc_ctx(shash) = crc; err = crypto_shash_update(shash, next, len); if (unlikely(err)) { - pr_warn_ratelimited("failed crc calculation, err: %d\n", err); + rxe_dbg(rxe, "failed crc calculation, err: %d\n", err); return (__force __be32)crc32_le((__force u32)crc, next, len); } From 5de087250f1d8f7b81abaf94110884994793f073 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:14 -0500 Subject: [PATCH 1365/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mmap.c Replace calls to pr_xxx() in rxe_mmap.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-17-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 9149b6095429..a47d72dbc537 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /* Don't allow a mmap larger than the object. */ if (size > ip->info.size) { - pr_err("mmap region is larger than the object!\n"); + rxe_dbg(rxe, "mmap region is larger than the object!\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) goto found_it; } - pr_warn("unable to find pending mmap info\n"); + rxe_dbg(rxe, "unable to find pending mmap info\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -98,7 +98,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("err %d from remap_vmalloc_range\n", ret); + rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret); goto done; } From 8c50cd059c5cd974da4285af17adf0e38905b25b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 31 Oct 2022 10:39:50 -0500 Subject: [PATCH 1366/4122] PCI: altera-msi: Include explicitly pcie-altera-msi.c uses irq_domain_add_linear() and related interfaces, so it needs but doesn't include it directly; it relies on the fact that includes it. But pcie-altera-msi.c *doesn't* need itself. Include directly to remove this implicit dependency so a future patch can drop . Link: https://lore.kernel.org/r/20221031153954.1163623-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-altera-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index 7b1d3ebc34ec..4366e042e98b 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include From 606a0430b37af4a1dd3e1e3baaf073b42fbb53e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 31 Oct 2022 10:39:51 -0500 Subject: [PATCH 1367/4122] PCI: microchip: Include explicitly pcie-microchip-host.c uses irq_domain_add_linear() and related interfaces, so it needs but doesn't include it directly; it relies on the fact that includes it. But pcie-microchip-host.c *doesn't* need itself. Include directly to remove this implicit dependency so a future patch can drop . Link: https://lore.kernel.org/r/20221031153954.1163623-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Conor Dooley --- drivers/pci/controller/pcie-microchip-host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c index 7263d175b5ad..57b2a62f52c8 100644 --- a/drivers/pci/controller/pcie-microchip-host.c +++ b/drivers/pci/controller/pcie-microchip-host.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include From 763d25e7affe13c7be923fec6192ca6325f33c73 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 31 Oct 2022 10:39:52 -0500 Subject: [PATCH 1368/4122] PCI: mvebu: Include explicitly pci-mvebu.c uses irq_domain_add_linear() and related interfaces but relies on but doesn't include it directly; it relies on the fact that includes it. Include directly to remove this implicit dependency. Link: https://lore.kernel.org/r/20221031153954.1163623-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Thomas Petazzoni --- drivers/pci/controller/pci-mvebu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 1ced73726a26..73db99035c2b 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include From 753596dcdb753afb63874c644b2fb20ef7fb3948 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 31 Oct 2022 10:39:53 -0500 Subject: [PATCH 1369/4122] PCI: xgene-msi: Include explicitly pci-xgene-msi.c uses irq_domain_add_linear() and related interfaces, so it needs but doesn't include it directly; it relies on the fact that includes it. But pci-xgene-msi.c *doesn't* need itself. Include directly to remove this implicit dependency so a future patch can drop . Link: https://lore.kernel.org/r/20221031153954.1163623-5-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-xgene-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index bfa259781b69..bacb14e558ee 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include From 277004d7a4a348de185fb4149ff29a651e994ff4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 31 Oct 2022 10:39:54 -0500 Subject: [PATCH 1370/4122] PCI: Remove unnecessary includes Many host controller drivers #include even though they don't need it. Remove the unnecessary #includes. Link: https://lore.kernel.org/r/20221031153954.1163623-6-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Roy Zang --- drivers/pci/controller/cadence/pci-j721e.c | 1 - drivers/pci/controller/dwc/pci-layerscape.c | 1 - drivers/pci/controller/dwc/pcie-armada8k.c | 1 - drivers/pci/controller/dwc/pcie-tegra194.c | 1 - drivers/pci/controller/pci-v3-semi.c | 1 - drivers/pci/controller/pci-xgene-msi.c | 1 - drivers/pci/controller/pci-xgene.c | 1 - drivers/pci/controller/pcie-altera-msi.c | 1 - drivers/pci/controller/pcie-iproc-platform.c | 1 - drivers/pci/controller/pcie-iproc.c | 1 - drivers/pci/controller/pcie-microchip-host.c | 1 - drivers/pci/controller/pcie-rockchip-host.c | 1 - drivers/pci/controller/pcie-xilinx-cpm.c | 1 - drivers/pci/controller/pcie-xilinx-nwl.c | 1 - 14 files changed, 14 deletions(-) diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index a82f845cc4b5..cc83a8925ce0 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c index 879b8692f96a..ed5fb492fe08 100644 --- a/drivers/pci/controller/dwc/pci-layerscape.c +++ b/drivers/pci/controller/dwc/pci-layerscape.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c index dc469ef8e99b..5c999e15c357 100644 --- a/drivers/pci/controller/dwc/pcie-armada8k.c +++ b/drivers/pci/controller/dwc/pcie-armada8k.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "pcie-designware.h" diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 1b6b437823d2..02d78a12b6e7 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c index 154a5398633c..784fcf35599c 100644 --- a/drivers/pci/controller/pci-v3-semi.c +++ b/drivers/pci/controller/pci-v3-semi.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index bacb14e558ee..d7987b281f79 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 549d3bd6d1c2..887b4941ff32 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index 4366e042e98b..65e8a20cc442 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c index 538115246c79..4142a73e611d 100644 --- a/drivers/pci/controller/pcie-iproc-platform.c +++ b/drivers/pci/controller/pcie-iproc-platform.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 2519201b0e51..83029bdfd884 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c index 57b2a62f52c8..0ebf7015e9af 100644 --- a/drivers/pci/controller/pcie-microchip-host.c +++ b/drivers/pci/controller/pcie-microchip-host.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 7352b5ff8d35..c96c0f454570 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c index e4ab48041eb6..4a787a941674 100644 --- a/drivers/pci/controller/pcie-xilinx-cpm.c +++ b/drivers/pci/controller/pcie-xilinx-cpm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 40d070e54ad2..f0271b6c6f8d 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include From 4c2e9ba05c7abac58bdb58e47eb69b156027fb7b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:50 +0300 Subject: [PATCH 1371/4122] dmaengine: at_hdmac: Do not print messages on console while holding the lock The descriptor was already removed from the transfer list, there's no reason to keep the channel lock while printing desc info, thus do the prints without holding the lock. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-17-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 858bd64f1313..f365ac4d87ff 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -549,6 +549,8 @@ static void atc_handle_error(struct at_dma_chan *atchan) atc_dostart(atchan, desc); } + spin_unlock_irqrestore(&atchan->lock, flags); + /* * KERN_CRITICAL may seem harsh, but since this only happens * when someone submits a bad physical address in a @@ -564,8 +566,6 @@ static void atc_handle_error(struct at_dma_chan *atchan) list_for_each_entry(child, &bad_desc->tx_list, desc_node) atc_dump_lli(atchan, &child->lli); - spin_unlock_irqrestore(&atchan->lock, flags); - /* Pretend the descriptor completed successfully */ atc_chain_complete(atchan, bad_desc); } From 83c196152fc9a93c3d5a7cbf3229f42f87f232d8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:51 +0300 Subject: [PATCH 1372/4122] dmaengine: at_hdmac: Return dma_cookie_status()'s ret code when txstate is NULL txstate is an optional parameter used to get a struct with auxilary transfer status information. When not provided the call to device_tx_status() should return the status of the dma cookie. Return the status of dma cookie when the txstate optional parameter is not provided. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-18-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f365ac4d87ff..10b6b0435d52 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1461,14 +1461,8 @@ atc_tx_status(struct dma_chan *chan, int bytes = 0; ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE) + if (ret == DMA_COMPLETE || !txstate) return ret; - /* - * There's no point calculating the residue if there's - * no txstate to store the value. - */ - if (!txstate) - return DMA_ERROR; spin_lock_irqsave(&atchan->lock, flags); From 0e75c28c52962b528947843e947b4bd0c74d40d2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:52 +0300 Subject: [PATCH 1373/4122] dmaengine: at_hdmac: Remove superfluous cast Conversions of void * are applied automatically when other pointer types are assigned to and from void *. Remove the superfluous cast. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-19-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 10b6b0435d52..fbfb207104e9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -603,7 +603,7 @@ static void atc_tasklet(struct tasklet_struct *t) static irqreturn_t at_dma_interrupt(int irq, void *dev_id) { - struct at_dma *atdma = (struct at_dma *)dev_id; + struct at_dma *atdma = dev_id; struct at_dma_chan *atchan; int i; u32 status, pending, imr; From f5d79afa3a858eb6e5cf2bcd894ed53b5bd8b5ff Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:53 +0300 Subject: [PATCH 1374/4122] dmaengine: at_hdmac: Pass residue by address to avoid unnecessary implicit casts struct dma_tx_state defines residue as u32. atc_get_bytes_left() returned an int which could be either an error or the value of the residue. This could cause problems if the controller supported a u32 buffer transfer size and the u32 value was past the max int can hold. Our controller does not support u32 buffer transfer size, but even so, improve the code and pass the residue by address to avoid unnecessary implicit casts and make atc_get_bytes_left() return 0 on success or -errno on errors. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-20-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 54 +++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fbfb207104e9..e2c46f32b284 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -293,7 +293,7 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla) +static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) { u32 btsize = (ctrla & ATC_BTSIZE_MAX); u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); @@ -308,17 +308,20 @@ static inline int atc_calc_bytes_left(int current_len, u32 ctrla) } /** - * atc_get_bytes_left - get the number of bytes residue for a cookie + * atc_get_bytes_left - get the number of bytes residue for a cookie. + * The residue is passed by address and updated on success. * @chan: DMA channel * @cookie: transaction identifier to check status of + * @residue: residue to be updated. + * Return 0 on success, -errono otherwise. */ -static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) +static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie, + u32 *residue) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; - int ret; - u32 ctrla, dscr; + u32 len, ctrla, dscr; unsigned int i; /* @@ -333,7 +336,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) return desc->total_len; /* cookie matches to the currently running transfer */ - ret = desc_first->total_len; + len = desc_first->total_len; if (desc_first->lli.dscr) { /* hardware linked list transfer */ @@ -419,29 +422,31 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ - if (desc_first->lli.dscr == dscr) - return atc_calc_bytes_left(ret, ctrla); + if (desc_first->lli.dscr == dscr) { + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + } - ret -= desc_first->len; + len -= desc_first->len; list_for_each_entry(desc, &desc_first->tx_list, desc_node) { if (desc->lli.dscr == dscr) break; - ret -= desc->len; + len -= desc->len; } /* * For the current descriptor in the chain we can calculate * the remaining bytes using the channel's register. */ - ret = atc_calc_bytes_left(ret, ctrla); + *residue = atc_calc_bytes_left(len, ctrla); } else { /* single transfer */ ctrla = channel_readl(atchan, CTRLA); - ret = atc_calc_bytes_left(ret, ctrla); + *residue = atc_calc_bytes_left(len, ctrla); } - return ret; + return 0; } /** @@ -1457,31 +1462,32 @@ atc_tx_status(struct dma_chan *chan, { struct at_dma_chan *atchan = to_at_dma_chan(chan); unsigned long flags; - enum dma_status ret; - int bytes = 0; + enum dma_status dma_status; + u32 residue; + int ret; - ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE || !txstate) - return ret; + dma_status = dma_cookie_status(chan, cookie, txstate); + if (dma_status == DMA_COMPLETE || !txstate) + return dma_status; spin_lock_irqsave(&atchan->lock, flags); /* Get number of bytes left in the active transactions */ - bytes = atc_get_bytes_left(chan, cookie); + ret = atc_get_bytes_left(chan, cookie, &residue); spin_unlock_irqrestore(&atchan->lock, flags); - if (unlikely(bytes < 0)) { + if (unlikely(ret < 0)) { dev_vdbg(chan2dev(chan), "get residual bytes error\n"); return DMA_ERROR; } else { - dma_set_residue(txstate, bytes); + dma_set_residue(txstate, residue); } - dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n", - ret, cookie, bytes); + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %u\n", + dma_status, cookie, residue); - return ret; + return dma_status; } /** From 91617bf6bb41b32119f5ac007871ad1d115d4ed2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:54 +0300 Subject: [PATCH 1375/4122] dmaengine: at_hdmac: s/atc_get_bytes_left/atc_get_residue Use dmaengine terminology and rename the method to better indicate what it does: it gets the residue value which will be later on set with dma_set_residue(). Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-21-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e2c46f32b284..6c328cd16983 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -308,15 +308,15 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) } /** - * atc_get_bytes_left - get the number of bytes residue for a cookie. + * atc_get_residue - get the number of bytes residue for a cookie. * The residue is passed by address and updated on success. * @chan: DMA channel * @cookie: transaction identifier to check status of * @residue: residue to be updated. * Return 0 on success, -errono otherwise. */ -static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie, - u32 *residue) +static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, + u32 *residue) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); @@ -1471,10 +1471,7 @@ atc_tx_status(struct dma_chan *chan, return dma_status; spin_lock_irqsave(&atchan->lock, flags); - - /* Get number of bytes left in the active transactions */ - ret = atc_get_bytes_left(chan, cookie, &residue); - + ret = atc_get_residue(chan, cookie, &residue); spin_unlock_irqrestore(&atchan->lock, flags); if (unlikely(ret < 0)) { From b50cf4bdfb9164c55d002624217d5a5ef4ab9573 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:55 +0300 Subject: [PATCH 1376/4122] dmaengine: at_hdmac: Introduce atc_get_llis_residue() Introduce a method to get the residue for a hardware linked list transfer. It makes the code easier to read. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-22-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 221 ++++++++++++++++++++--------------------- 1 file changed, 110 insertions(+), 111 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6c328cd16983..6bd9e35db8f9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -307,6 +307,109 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) return current_len - (btsize << src_width); } +/** + * atc_get_llis_residue - Get residue for a hardware linked list transfer + * + * Calculate the residue by removing the length of the child descriptors already + * transferred from the total length. To get the current child descriptor we can + * use the value of the channel's DSCR register and compare it against the value + * of the hardware linked list structure of each child descriptor. + * + * The CTRLA register provides us with the amount of data already read from the + * source for the current child descriptor. So we can compute a more accurate + * residue by also removing the number of bytes corresponding to this amount of + * data. + * + * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a + * race condition may occur: the first read register may refer to one child + * descriptor whereas the second read may refer to a later child descriptor in + * the list because of the DMA transfer progression inbetween the two reads. + * + * One solution could have been to pause the DMA transfer, read the DSCR and + * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some + * drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns are more likey + * to occur depending on the system latency. Taking the USART driver as an + * example, it uses a cyclic DMA transfer to read data from the Receive + * Holding Register (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer to compute the + * residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather avoid to do so + * for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a first time, the + * CTRLA is read in turn, next the DSCR is read a second time. If the two + * consecutive read values of the DSCR are the same then we assume both refers + * to the very same child descriptor as well as the CTRLA value read inbetween + * does. For cyclic tranfers, the assumption is that a full loop is "not so + * fast". If the two DSCR values are different, we read again the CTRLA then the + * DSCR till two consecutive read values from DSCR are equal or till the + * maximum trials is reach. This algorithm is very unlikely not to find a stable + * value for DSCR. + * @atchan: pointer to an atmel hdmac channel. + * @desc: pointer to the descriptor for which the residue is calculated. + * @residue: residue to be set to dma_tx_state. + * Returns 0 on success, -errno otherwise. + */ +static int atc_get_llis_residue(struct at_dma_chan *atchan, + struct at_desc *desc, u32 *residue) +{ + struct at_desc *child; + u32 len, ctrla, dscr; + unsigned int i; + + len = desc->total_len; + dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the DMA + * controller since the previous read, we assume that both the + * dscr and ctrla values refers to the very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the previouly + * read value of CTRLA may refer to an already processed + * descriptor hence could be outdated. We need to update ctrla + * to match the current descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(i == ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; + + /* For the first descriptor we can be more accurate. */ + if (desc->lli.dscr == dscr) { + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + } + + len -= desc->len; + list_for_each_entry(child, &desc->tx_list, desc_node) { + if (child->lli.dscr == dscr) + break; + len -= child->len; + } + + /* + * For the current descriptor in the chain we can calculate the + * remaining bytes using the channel's register. + */ + *residue = atc_calc_bytes_left(len, ctrla); + return 0; +} + /** * atc_get_residue - get the number of bytes residue for a cookie. * The residue is passed by address and updated on success. @@ -321,8 +424,7 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; - u32 len, ctrla, dscr; - unsigned int i; + u32 len, ctrla; /* * If the cookie doesn't match to the currently running transfer then @@ -335,117 +437,14 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, else if (desc != desc_first) return desc->total_len; - /* cookie matches to the currently running transfer */ - len = desc_first->total_len; - - if (desc_first->lli.dscr) { + if (desc_first->lli.dscr) /* hardware linked list transfer */ + return atc_get_llis_residue(atchan, desc_first, residue); - /* - * Calculate the residue by removing the length of the child - * descriptors already transferred from the total length. - * To get the current child descriptor we can use the value of - * the channel's DSCR register and compare it against the value - * of the hardware linked list structure of each child - * descriptor. - * - * The CTRLA register provides us with the amount of data - * already read from the source for the current child - * descriptor. So we can compute a more accurate residue by also - * removing the number of bytes corresponding to this amount of - * data. - * - * However, the DSCR and CTRLA registers cannot be read both - * atomically. Hence a race condition may occur: the first read - * register may refer to one child descriptor whereas the second - * read may refer to a later child descriptor in the list - * because of the DMA transfer progression inbetween the two - * reads. - * - * One solution could have been to pause the DMA transfer, read - * the DSCR and CTRLA then resume the DMA transfer. Nonetheless, - * this approach presents some drawbacks: - * - If the DMA transfer is paused, RX overruns or TX underruns - * are more likey to occur depending on the system latency. - * Taking the USART driver as an example, it uses a cyclic DMA - * transfer to read data from the Receive Holding Register - * (RHR) to avoid RX overruns since the RHR is not protected - * by any FIFO on most Atmel SoCs. So pausing the DMA transfer - * to compute the residue would break the USART driver design. - * - The atc_pause() function masks interrupts but we'd rather - * avoid to do so for system latency purpose. - * - * Then we'd rather use another solution: the DSCR is read a - * first time, the CTRLA is read in turn, next the DSCR is read - * a second time. If the two consecutive read values of the DSCR - * are the same then we assume both refers to the very same - * child descriptor as well as the CTRLA value read inbetween - * does. For cyclic tranfers, the assumption is that a full loop - * is "not so fast". - * If the two DSCR values are different, we read again the CTRLA - * then the DSCR till two consecutive read values from DSCR are - * equal or till the maxium trials is reach. - * This algorithm is very unlikely not to find a stable value for - * DSCR. - */ - - dscr = channel_readl(atchan, DSCR); - rmb(); /* ensure DSCR is read before CTRLA */ - ctrla = channel_readl(atchan, CTRLA); - for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { - u32 new_dscr; - - rmb(); /* ensure DSCR is read after CTRLA */ - new_dscr = channel_readl(atchan, DSCR); - - /* - * If the DSCR register value has not changed inside the - * DMA controller since the previous read, we assume - * that both the dscr and ctrla values refers to the - * very same descriptor. - */ - if (likely(new_dscr == dscr)) - break; - - /* - * DSCR has changed inside the DMA controller, so the - * previouly read value of CTRLA may refer to an already - * processed descriptor hence could be outdated. - * We need to update ctrla to match the current - * descriptor. - */ - dscr = new_dscr; - rmb(); /* ensure DSCR is read before CTRLA */ - ctrla = channel_readl(atchan, CTRLA); - } - if (unlikely(i == ATC_MAX_DSCR_TRIALS)) - return -ETIMEDOUT; - - /* for the first descriptor we can be more accurate */ - if (desc_first->lli.dscr == dscr) { - *residue = atc_calc_bytes_left(len, ctrla); - return 0; - } - - len -= desc_first->len; - list_for_each_entry(desc, &desc_first->tx_list, desc_node) { - if (desc->lli.dscr == dscr) - break; - - len -= desc->len; - } - - /* - * For the current descriptor in the chain we can calculate - * the remaining bytes using the channel's register. - */ - *residue = atc_calc_bytes_left(len, ctrla); - } else { - /* single transfer */ - ctrla = channel_readl(atchan, CTRLA); - *residue = atc_calc_bytes_left(len, ctrla); - } - + /* single transfer */ + len = desc_first->total_len; + ctrla = channel_readl(atchan, CTRLA); + *residue = atc_calc_bytes_left(len, ctrla); return 0; } From 5f1d429b43b34b310a93651681d0cd8a39a86e3d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:56 +0300 Subject: [PATCH 1377/4122] dmaengine: at_hdmac: Use devm_kzalloc() and struct_size() Use the resource-managed kzalloc to simplify error logic. Memory allocated with this function is automatically freed on driver detach. Use struct_size() helper to calculate the size of the atdma structure with its trailing flexible array. While here, move the mem allocation higher in the probe method, as failing to allocate memory indicates a serious system issue, and everything else does not matter anyway. All these help the code look a bit cleaner. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-23-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6bd9e35db8f9..f3fbb0aa8b24 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1786,6 +1787,12 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!plat_dat) return -ENODEV; + atdma = devm_kzalloc(&pdev->dev, + struct_size(atdma, chan, plat_dat->nr_channels), + GFP_KERNEL); + if (!atdma) + return -ENOMEM; + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!io) return -EINVAL; @@ -1794,21 +1801,13 @@ static int __init at_dma_probe(struct platform_device *pdev) if (irq < 0) return irq; - size = sizeof(struct at_dma); - size += plat_dat->nr_channels * sizeof(struct at_dma_chan); - atdma = kzalloc(size, GFP_KERNEL); - if (!atdma) - return -ENOMEM; - /* discover transaction capabilities */ atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; size = resource_size(io); - if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { - err = -EBUSY; - goto err_kfree; - } + if (!request_mem_region(io->start, size, pdev->dev.driver->name)) + return -EBUSY; atdma->regs = ioremap(io->start, size); if (!atdma->regs) { @@ -1963,8 +1962,6 @@ err_clk: atdma->regs = NULL; err_release_r: release_mem_region(io->start, size); -err_kfree: - kfree(atdma); return err; } @@ -2003,8 +2000,6 @@ static int at_dma_remove(struct platform_device *pdev) io = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(io->start, resource_size(io)); - kfree(atdma); - return 0; } From 8bfe4a61d40df2ddb707bf7d0b278907de2dd4f6 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:57 +0300 Subject: [PATCH 1378/4122] dmaengine: at_hdmac: Use devm_platform_ioremap_resource Use devm_platform_ioremap_resource() helper for cleanner code and easier resource management. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-24-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f3fbb0aa8b24..10c250618a33 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1765,9 +1765,7 @@ static void at_dma_off(struct at_dma *atdma) static int __init at_dma_probe(struct platform_device *pdev) { - struct resource *io; struct at_dma *atdma; - size_t size; int irq; int err; int i; @@ -1793,9 +1791,9 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!atdma) return -ENOMEM; - io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!io) - return -EINVAL; + atdma->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(atdma->regs)) + return PTR_ERR(atdma->regs); irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -1805,21 +1803,10 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; - size = resource_size(io); - if (!request_mem_region(io->start, size, pdev->dev.driver->name)) - return -EBUSY; - - atdma->regs = ioremap(io->start, size); - if (!atdma->regs) { - err = -ENOMEM; - goto err_release_r; - } - atdma->clk = clk_get(&pdev->dev, "dma_clk"); - if (IS_ERR(atdma->clk)) { - err = PTR_ERR(atdma->clk); - goto err_clk; - } + if (IS_ERR(atdma->clk)) + return PTR_ERR(atdma->clk); + err = clk_prepare_enable(atdma->clk); if (err) goto err_clk_prepare; @@ -1957,11 +1944,6 @@ err_irq: clk_disable_unprepare(atdma->clk); err_clk_prepare: clk_put(atdma->clk); -err_clk: - iounmap(atdma->regs); - atdma->regs = NULL; -err_release_r: - release_mem_region(io->start, size); return err; } @@ -1969,7 +1951,6 @@ static int at_dma_remove(struct platform_device *pdev) { struct at_dma *atdma = platform_get_drvdata(pdev); struct dma_chan *chan, *_chan; - struct resource *io; at_dma_off(atdma); if (pdev->dev.of_node) @@ -1994,12 +1975,6 @@ static int at_dma_remove(struct platform_device *pdev) clk_disable_unprepare(atdma->clk); clk_put(atdma->clk); - iounmap(atdma->regs); - atdma->regs = NULL; - - io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(io->start, resource_size(io)); - return 0; } From 4c15a4c7f693f1f45ef534ddc428f2a9aa67bd13 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:58 +0300 Subject: [PATCH 1379/4122] dmaengine: at_hdmac: Use devm_clk_get() Clocks that are get with this method will be automatically put on driver detach. Use devm_clk_get() and simplify the error handling. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-25-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 10c250618a33..444aa7d75ff5 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1803,13 +1803,13 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; - atdma->clk = clk_get(&pdev->dev, "dma_clk"); + atdma->clk = devm_clk_get(&pdev->dev, "dma_clk"); if (IS_ERR(atdma->clk)) return PTR_ERR(atdma->clk); err = clk_prepare_enable(atdma->clk); if (err) - goto err_clk_prepare; + return err; /* force dma off, just in case */ at_dma_off(atdma); @@ -1942,8 +1942,6 @@ err_desc_pool_create: free_irq(platform_get_irq(pdev, 0), atdma); err_irq: clk_disable_unprepare(atdma->clk); -err_clk_prepare: - clk_put(atdma->clk); return err; } @@ -1973,7 +1971,6 @@ static int at_dma_remove(struct platform_device *pdev) } clk_disable_unprepare(atdma->clk); - clk_put(atdma->clk); return 0; } From c23cd8c971f0b4697f344d981f13aae4123f866d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:59 +0300 Subject: [PATCH 1380/4122] dmaengine: at_hdmac: Use pm_ptr() Use pm_ptr() macro to fill at_dma_driver.driver.pm. In case CONFIG_PM is not enabled, the macro will return NULL. When NULL, at_dma_dev_pm_ops will end up being unused, so prepend it with the __maybe_unused attribute. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-26-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 444aa7d75ff5..4e3c519e6079 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -2084,7 +2084,7 @@ static int at_dma_resume_noirq(struct device *dev) return 0; } -static const struct dev_pm_ops at_dma_dev_pm_ops = { +static const struct dev_pm_ops __maybe_unused at_dma_dev_pm_ops = { .prepare = at_dma_prepare, .suspend_noirq = at_dma_suspend_noirq, .resume_noirq = at_dma_resume_noirq, @@ -2096,7 +2096,7 @@ static struct platform_driver at_dma_driver = { .id_table = atdma_devtypes, .driver = { .name = "at_hdmac", - .pm = &at_dma_dev_pm_ops, + .pm = pm_ptr(&at_dma_dev_pm_ops), .of_match_table = of_match_ptr(atmel_dma_dt_ids), }, }; From e3e672b8f95be38db26c971bc4c6b43d18a9836a Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:00 +0300 Subject: [PATCH 1381/4122] dmaengine: at_hdmac: Set include entries in alphabetic order It's a good practice to set the include entries in alphabetic order. It helps humans to read the code easier. Alphabetic order should also prove that each header is self-contained, i.e. can be included without prerequisites. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-27-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 4e3c519e6079..a3fa8bffdb74 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -12,16 +12,16 @@ #include #include #include -#include #include +#include #include #include -#include -#include #include #include #include #include +#include +#include #include "at_hdmac_regs.h" #include "dmaengine.h" From 5cecadc3e2a4fb72ab37d9420df0a9e1179b8a3e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:01 +0300 Subject: [PATCH 1382/4122] dmaengine: at_hdmac: Keep register definitions and structures private to at_hdmac.c Do not expose register definitions, structures and helpers via a .h file because there are used only by at_hdmac.c. Since there are no other users, remove the ambiguity and move all the .h contents to the .c file. One may notice some checkpatch warnings and errors with this move. The move was done "as it was", checkpatch complaints can be fixed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-28-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- MAINTAINERS | 1 - drivers/dma/at_hdmac.c | 469 ++++++++++++++++++++++++++++++++++- drivers/dma/at_hdmac_regs.h | 478 ------------------------------------ 3 files changed, 468 insertions(+), 480 deletions(-) delete mode 100644 drivers/dma/at_hdmac_regs.h diff --git a/MAINTAINERS b/MAINTAINERS index cd1264d24db8..c1b11b55ea89 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13415,7 +13415,6 @@ L: dmaengine@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/dma/atmel-dma.txt F: drivers/dma/at_hdmac.c -F: drivers/dma/at_hdmac_regs.h F: drivers/dma/at_xdmac.c F: include/dt-bindings/dma/at91.h diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a3fa8bffdb74..a07e3355f09a 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -23,7 +23,6 @@ #include #include -#include "at_hdmac_regs.h" #include "dmaengine.h" /* @@ -35,6 +34,474 @@ * atc_ / atchan : ATmel DMA Channel entity related */ +#define AT_DMA_MAX_NR_CHANNELS 8 + + +#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ +#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ +#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) +#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) + +#define AT_DMA_EN 0x04 /* Controller Enable Register */ +#define AT_DMA_ENABLE (0x1 << 0) + +#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ +#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ + +#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ +#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ + +#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ +#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ + +#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ +#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ + +/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1C /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) (0x1 << (x)) +#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) +#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) + +#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ +#define AT_DMA_ENA(x) (0x1 << (x)) +#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) +#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) + +#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ +#define AT_DMA_DIS(x) (0x1 << (x)) +#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) + +#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ +#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) +#define AT_DMA_STAL(x) (0x1 << (24 + (x))) + + +#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ + +/* Hardware register offset for each channel */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ + + +/* Bitfield definitions */ + +/* Bitfields in DSCR */ +#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ + +/* Bitfields in CTRLA */ +#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ +#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ +#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) +#define ATC_SCSIZE_1 (0x0 << 16) +#define ATC_SCSIZE_4 (0x1 << 16) +#define ATC_SCSIZE_8 (0x2 << 16) +#define ATC_SCSIZE_16 (0x3 << 16) +#define ATC_SCSIZE_32 (0x4 << 16) +#define ATC_SCSIZE_64 (0x5 << 16) +#define ATC_SCSIZE_128 (0x6 << 16) +#define ATC_SCSIZE_256 (0x7 << 16) +#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ +#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) +#define ATC_DCSIZE_1 (0x0 << 20) +#define ATC_DCSIZE_4 (0x1 << 20) +#define ATC_DCSIZE_8 (0x2 << 20) +#define ATC_DCSIZE_16 (0x3 << 20) +#define ATC_DCSIZE_32 (0x4 << 20) +#define ATC_DCSIZE_64 (0x5 << 20) +#define ATC_DCSIZE_128 (0x6 << 20) +#define ATC_DCSIZE_256 (0x7 << 20) +#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ +#define ATC_SRC_WIDTH(x) ((x) << 24) +#define ATC_SRC_WIDTH_BYTE (0x0 << 24) +#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) +#define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) +#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ +#define ATC_DST_WIDTH(x) ((x) << 28) +#define ATC_DST_WIDTH_BYTE (0x0 << 28) +#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) +#define ATC_DST_WIDTH_WORD (0x2 << 28) +#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ + +/* Bitfields in CTRLB */ +#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ + /* Specify AHB interfaces */ +#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ + +#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ +#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) +#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ +#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) +#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ +#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ + +/* Bitfields in CFG */ +#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ + +#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ +#define ATC_SRC_H2SEL_SW (0x0 << 9) +#define ATC_SRC_H2SEL_HW (0x1 << 9) +#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ +#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ +#define ATC_DST_H2SEL_SW (0x0 << 13) +#define ATC_DST_H2SEL_HW (0x1 << 13) +#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ +#define ATC_SOD (0x1 << 16) /* Stop On Done */ +#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ +#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ +#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) +#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) +#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ +#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) +#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) +#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) + +/* Bitfields in SPIP */ +#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + +/* Bitfields in DPIP */ +#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + + +/*-- descriptors -----------------------------------------------------*/ + +/* LLI == Linked List Item; aka DMA buffer descriptor */ +struct at_lli { + /* values that are not changed by hardware */ + u32 saddr; + u32 daddr; + /* value that may get written back: */ + u32 ctrla; + /* more values that are not changed by hardware */ + u32 ctrlb; + u32 dscr; /* chain to next lli */ +}; + +/** + * struct at_desc - software descriptor + * @at_lli: hardware lli structure + * @txd: support for the async_tx api + * @desc_node: node on the channed descriptors list + * @len: descriptor byte count + * @total_len: total transaction byte count + */ +struct at_desc { + /* FIRST values the hardware uses */ + struct at_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + size_t len; + size_t total_len; + + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; + + /* Memset temporary buffer */ + bool memset_buffer; + dma_addr_t memset_paddr; + int *memset_vaddr; +}; + +static inline struct at_desc * +txd_to_at_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct at_desc, txd); +} + + +/*-- Channels --------------------------------------------------------*/ + +/** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_ERROR = 0, + ATC_IS_PAUSED = 1, + ATC_IS_CYCLIC = 24, +}; + +/** + * struct at_dma_chan - internal representation of an Atmel HDMAC channel + * @chan_common: common dmaengine channel object members + * @device: parent device + * @ch_regs: memory mapped register base + * @mask: channel index in a mask + * @per_if: peripheral interface + * @mem_if: memory interface + * @status: transmit status information from irq/prep* functions + * to tasklet (use atomic operations) + * @tasklet: bottom half to finish transaction work + * @save_cfg: configuration register that is saved on suspend/resume cycle + * @save_dscr: for cyclic operations, preserve next descriptor address in + * the cyclic list on suspend/resume cycle + * @dma_sconfig: configuration for slave transfers, passed via + * .device_config + * @lock: serializes enqueue/dequeue operations to descriptors lists + * @active_list: list of descriptors dmaengine is being running on + * @queue: list of descriptors ready to be submitted to engine + * @free_list: list of descriptors usable by the channel + */ +struct at_dma_chan { + struct dma_chan chan_common; + struct at_dma *device; + void __iomem *ch_regs; + u8 mask; + u8 per_if; + u8 mem_if; + unsigned long status; + struct tasklet_struct tasklet; + u32 save_cfg; + u32 save_dscr; + struct dma_slave_config dma_sconfig; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + struct list_head active_list; + struct list_head queue; + struct list_head free_list; +}; + +#define channel_readl(atchan, name) \ + __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) + +#define channel_writel(atchan, name, val) \ + __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct at_dma_chan, chan_common); +} + +/* + * Fix sconfig's burst size according to at_hdmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. + * + * This can be done by finding most significant bit set. + */ +static inline void convert_burst(u32 *maxburst) +{ + if (*maxburst > 1) + *maxburst = fls(*maxburst) - 2; + else + *maxburst = 0; +} + +/* + * Fix sconfig's bus width according to at_hdmac. + * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. + */ +static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) +{ + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return 1; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return 2; + default: + /* For 1 byte width or fallback */ + return 0; + } +} + +/*-- Controller ------------------------------------------------------*/ + +/** + * struct at_dma - internal representation of an Atmel HDMA Controller + * @chan_common: common dmaengine dma_device object members + * @atdma_devtype: identifier of DMA controller compatibility + * @ch_regs: memory mapped register base + * @clk: dma controller clock + * @save_imr: interrupt mask register that is saved on suspend/resume cycle + * @all_chan_mask: all channels availlable in a mask + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @chan: channels table to store at_dma_chan structures + */ +struct at_dma { + struct dma_device dma_common; + void __iomem *regs; + struct clk *clk; + u32 save_imr; + + u8 all_chan_mask; + + struct dma_pool *dma_desc_pool; + struct dma_pool *memset_pool; + /* AT THE END channels table */ + struct at_dma_chan chan[]; +}; + +#define dma_readl(atdma, name) \ + __raw_readl((atdma)->regs + AT_DMA_##name) +#define dma_writel(atdma, name, val) \ + __raw_writel((val), (atdma)->regs + AT_DMA_##name) + +static inline struct at_dma *to_at_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct at_dma, dma_common); +} + + +/*-- Helper functions ------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +#if defined(VERBOSE_DEBUG) +static void vdbg_dump_regs(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + dev_err(chan2dev(&atchan->chan_common), + " channel %d : imr = 0x%x, chsr = 0x%x\n", + atchan->chan_common.chan_id, + dma_readl(atdma, EBCIMR), + dma_readl(atdma, CHSR)); + + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, CFG), + channel_readl(atchan, DSCR)); +} +#else +static void vdbg_dump_regs(struct at_dma_chan *atchan) {} +#endif + +static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) +{ + dev_crit(chan2dev(&atchan->chan_common), + "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", + &lli->saddr, &lli->daddr, + lli->ctrla, lli->ctrlb, &lli->dscr); +} + + +static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) +{ + u32 ebci; + + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(chan_id) + | AT_DMA_ERR(chan_id); + if (on) + dma_writel(atdma, EBCIER, ebci); + else + dma_writel(atdma, EBCIDR, ebci); +} + +static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 1); +} + +static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 0); +} + + +/** + * atc_chan_is_enabled - test if given channel is enabled + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + return !!(dma_readl(atdma, CHSR) & atchan->mask); +} + +/** + * atc_chan_is_paused - test channel pause/resume status + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_paused(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_PAUSED, &atchan->status); +} + +/** + * atc_chan_is_cyclic - test if given channel has cyclic property set + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_CYCLIC, &atchan->status); +} + +/** + * set_desc_eol - set end-of-link to descriptor so it will end transfer + * @desc: descriptor, signle or at the end of a chain, to end chain on + */ +static void set_desc_eol(struct at_desc *desc) +{ + u32 ctrlb = desc->lli.ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->lli.ctrlb = ctrlb; + desc->lli.dscr = 0; +} + #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) #define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ |ATC_DIF(AT_DMA_MEM_IF)) diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h deleted file mode 100644 index d4d382d74607..000000000000 --- a/drivers/dma/at_hdmac_regs.h +++ /dev/null @@ -1,478 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Header file for the Atmel AHB DMA Controller driver - * - * Copyright (C) 2008 Atmel Corporation - */ -#ifndef AT_HDMAC_REGS_H -#define AT_HDMAC_REGS_H - -#define AT_DMA_MAX_NR_CHANNELS 8 - - -#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ -#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ -#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ -#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) -#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) - -#define AT_DMA_EN 0x04 /* Controller Enable Register */ -#define AT_DMA_ENABLE (0x1 << 0) - -#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ -#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ -#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ - -#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ -#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ -#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ - -#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ -#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ -#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ - -#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ -#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ - -/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ -#define AT_DMA_EBCIER 0x18 /* Enable register */ -#define AT_DMA_EBCIDR 0x1C /* Disable register */ -#define AT_DMA_EBCIMR 0x20 /* Mask Register */ -#define AT_DMA_EBCISR 0x24 /* Status Register */ -#define AT_DMA_CBTC_OFFSET 8 -#define AT_DMA_ERR_OFFSET 16 -#define AT_DMA_BTC(x) (0x1 << (x)) -#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) -#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) - -#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ -#define AT_DMA_ENA(x) (0x1 << (x)) -#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) -#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) - -#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ -#define AT_DMA_DIS(x) (0x1 << (x)) -#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) - -#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ -#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) -#define AT_DMA_STAL(x) (0x1 << (24 + (x))) - - -#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ -#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ - -/* Hardware register offset for each channel */ -#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ -#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ -#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ -#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ -#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ -#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ -#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ -#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ - - -/* Bitfield definitions */ - -/* Bitfields in DSCR */ -#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ - -/* Bitfields in CTRLA */ -#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ -#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ -#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ -#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) -#define ATC_SCSIZE_1 (0x0 << 16) -#define ATC_SCSIZE_4 (0x1 << 16) -#define ATC_SCSIZE_8 (0x2 << 16) -#define ATC_SCSIZE_16 (0x3 << 16) -#define ATC_SCSIZE_32 (0x4 << 16) -#define ATC_SCSIZE_64 (0x5 << 16) -#define ATC_SCSIZE_128 (0x6 << 16) -#define ATC_SCSIZE_256 (0x7 << 16) -#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ -#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) -#define ATC_DCSIZE_1 (0x0 << 20) -#define ATC_DCSIZE_4 (0x1 << 20) -#define ATC_DCSIZE_8 (0x2 << 20) -#define ATC_DCSIZE_16 (0x3 << 20) -#define ATC_DCSIZE_32 (0x4 << 20) -#define ATC_DCSIZE_64 (0x5 << 20) -#define ATC_DCSIZE_128 (0x6 << 20) -#define ATC_DCSIZE_256 (0x7 << 20) -#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ -#define ATC_SRC_WIDTH(x) ((x) << 24) -#define ATC_SRC_WIDTH_BYTE (0x0 << 24) -#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) -#define ATC_SRC_WIDTH_WORD (0x2 << 24) -#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) -#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ -#define ATC_DST_WIDTH(x) ((x) << 28) -#define ATC_DST_WIDTH_BYTE (0x0 << 28) -#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) -#define ATC_DST_WIDTH_WORD (0x2 << 28) -#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ - -/* Bitfields in CTRLB */ -#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ -#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ - /* Specify AHB interfaces */ -#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ -#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ - -#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ -#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ -#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ -#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ -#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ -#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ -#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ -#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ -#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ -#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ -#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ -#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ -#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ -#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) -#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ -#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ -#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ -#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) -#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ -#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ -#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ -#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ -#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ - -/* Bitfields in CFG */ -#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ - -#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ -#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ -#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ -#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ -#define ATC_SRC_H2SEL_SW (0x0 << 9) -#define ATC_SRC_H2SEL_HW (0x1 << 9) -#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ -#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ -#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ -#define ATC_DST_H2SEL_SW (0x0 << 13) -#define ATC_DST_H2SEL_HW (0x1 << 13) -#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ -#define ATC_SOD (0x1 << 16) /* Stop On Done */ -#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ -#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ -#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ -#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) -#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) -#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ -#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ -#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) -#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) -#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) - -/* Bitfields in SPIP */ -#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) - -/* Bitfields in DPIP */ -#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) - - -/*-- descriptors -----------------------------------------------------*/ - -/* LLI == Linked List Item; aka DMA buffer descriptor */ -struct at_lli { - /* values that are not changed by hardware */ - u32 saddr; - u32 daddr; - /* value that may get written back: */ - u32 ctrla; - /* more values that are not changed by hardware */ - u32 ctrlb; - u32 dscr; /* chain to next lli */ -}; - -/** - * struct at_desc - software descriptor - * @at_lli: hardware lli structure - * @txd: support for the async_tx api - * @desc_node: node on the channed descriptors list - * @len: descriptor byte count - * @total_len: total transaction byte count - */ -struct at_desc { - /* FIRST values the hardware uses */ - struct at_lli lli; - - /* THEN values for driver housekeeping */ - struct list_head tx_list; - struct dma_async_tx_descriptor txd; - struct list_head desc_node; - size_t len; - size_t total_len; - - /* Interleaved data */ - size_t boundary; - size_t dst_hole; - size_t src_hole; - - /* Memset temporary buffer */ - bool memset_buffer; - dma_addr_t memset_paddr; - int *memset_vaddr; -}; - -static inline struct at_desc * -txd_to_at_desc(struct dma_async_tx_descriptor *txd) -{ - return container_of(txd, struct at_desc, txd); -} - - -/*-- Channels --------------------------------------------------------*/ - -/** - * atc_status - information bits stored in channel status flag - * - * Manipulated with atomic operations. - */ -enum atc_status { - ATC_IS_ERROR = 0, - ATC_IS_PAUSED = 1, - ATC_IS_CYCLIC = 24, -}; - -/** - * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @chan_common: common dmaengine channel object members - * @device: parent device - * @ch_regs: memory mapped register base - * @mask: channel index in a mask - * @per_if: peripheral interface - * @mem_if: memory interface - * @status: transmit status information from irq/prep* functions - * to tasklet (use atomic operations) - * @tasklet: bottom half to finish transaction work - * @save_cfg: configuration register that is saved on suspend/resume cycle - * @save_dscr: for cyclic operations, preserve next descriptor address in - * the cyclic list on suspend/resume cycle - * @dma_sconfig: configuration for slave transfers, passed via - * .device_config - * @lock: serializes enqueue/dequeue operations to descriptors lists - * @active_list: list of descriptors dmaengine is being running on - * @queue: list of descriptors ready to be submitted to engine - * @free_list: list of descriptors usable by the channel - */ -struct at_dma_chan { - struct dma_chan chan_common; - struct at_dma *device; - void __iomem *ch_regs; - u8 mask; - u8 per_if; - u8 mem_if; - unsigned long status; - struct tasklet_struct tasklet; - u32 save_cfg; - u32 save_dscr; - struct dma_slave_config dma_sconfig; - - spinlock_t lock; - - /* these other elements are all protected by lock */ - struct list_head active_list; - struct list_head queue; - struct list_head free_list; -}; - -#define channel_readl(atchan, name) \ - __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) - -#define channel_writel(atchan, name, val) \ - __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) - -static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) -{ - return container_of(dchan, struct at_dma_chan, chan_common); -} - -/* - * Fix sconfig's burst size according to at_hdmac. We need to convert them as: - * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. - * - * This can be done by finding most significant bit set. - */ -static inline void convert_burst(u32 *maxburst) -{ - if (*maxburst > 1) - *maxburst = fls(*maxburst) - 2; - else - *maxburst = 0; -} - -/* - * Fix sconfig's bus width according to at_hdmac. - * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. - */ -static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) -{ - switch (addr_width) { - case DMA_SLAVE_BUSWIDTH_2_BYTES: - return 1; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - return 2; - default: - /* For 1 byte width or fallback */ - return 0; - } -} - -/*-- Controller ------------------------------------------------------*/ - -/** - * struct at_dma - internal representation of an Atmel HDMA Controller - * @chan_common: common dmaengine dma_device object members - * @atdma_devtype: identifier of DMA controller compatibility - * @ch_regs: memory mapped register base - * @clk: dma controller clock - * @save_imr: interrupt mask register that is saved on suspend/resume cycle - * @all_chan_mask: all channels availlable in a mask - * @dma_desc_pool: base of DMA descriptor region (DMA address) - * @chan: channels table to store at_dma_chan structures - */ -struct at_dma { - struct dma_device dma_common; - void __iomem *regs; - struct clk *clk; - u32 save_imr; - - u8 all_chan_mask; - - struct dma_pool *dma_desc_pool; - struct dma_pool *memset_pool; - /* AT THE END channels table */ - struct at_dma_chan chan[]; -}; - -#define dma_readl(atdma, name) \ - __raw_readl((atdma)->regs + AT_DMA_##name) -#define dma_writel(atdma, name, val) \ - __raw_writel((val), (atdma)->regs + AT_DMA_##name) - -static inline struct at_dma *to_at_dma(struct dma_device *ddev) -{ - return container_of(ddev, struct at_dma, dma_common); -} - - -/*-- Helper functions ------------------------------------------------*/ - -static struct device *chan2dev(struct dma_chan *chan) -{ - return &chan->dev->device; -} - -#if defined(VERBOSE_DEBUG) -static void vdbg_dump_regs(struct at_dma_chan *atchan) -{ - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); - - dev_err(chan2dev(&atchan->chan_common), - " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->chan_common.chan_id, - dma_readl(atdma, EBCIMR), - dma_readl(atdma, CHSR)); - - dev_err(chan2dev(&atchan->chan_common), - " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", - channel_readl(atchan, SADDR), - channel_readl(atchan, DADDR), - channel_readl(atchan, CTRLA), - channel_readl(atchan, CTRLB), - channel_readl(atchan, CFG), - channel_readl(atchan, DSCR)); -} -#else -static void vdbg_dump_regs(struct at_dma_chan *atchan) {} -#endif - -static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) -{ - dev_crit(chan2dev(&atchan->chan_common), - "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", - &lli->saddr, &lli->daddr, - lli->ctrla, lli->ctrlb, &lli->dscr); -} - - -static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) -{ - u32 ebci; - - /* enable interrupts on buffer transfer completion & error */ - ebci = AT_DMA_BTC(chan_id) - | AT_DMA_ERR(chan_id); - if (on) - dma_writel(atdma, EBCIER, ebci); - else - dma_writel(atdma, EBCIDR, ebci); -} - -static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) -{ - atc_setup_irq(atdma, chan_id, 1); -} - -static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) -{ - atc_setup_irq(atdma, chan_id, 0); -} - - -/** - * atc_chan_is_enabled - test if given channel is enabled - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) -{ - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); - - return !!(dma_readl(atdma, CHSR) & atchan->mask); -} - -/** - * atc_chan_is_paused - test channel pause/resume status - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_paused(struct at_dma_chan *atchan) -{ - return test_bit(ATC_IS_PAUSED, &atchan->status); -} - -/** - * atc_chan_is_cyclic - test if given channel has cyclic property set - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) -{ - return test_bit(ATC_IS_CYCLIC, &atchan->status); -} - -/** - * set_desc_eol - set end-of-link to descriptor so it will end transfer - * @desc: descriptor, signle or at the end of a chain, to end chain on - */ -static void set_desc_eol(struct at_desc *desc) -{ - u32 ctrlb = desc->lli.ctrlb; - - ctrlb &= ~ATC_IEN; - ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; - - desc->lli.ctrlb = ctrlb; - desc->lli.dscr = 0; -} - -#endif /* AT_HDMAC_REGS_H */ From d8840a7edcf0aa840e175af17d61476a7dbc65f7 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:02 +0300 Subject: [PATCH 1383/4122] dmaengine: at_hdmac: Use bitfield access macros Use the bitfield access macros in order to clean and to make the driver easier to read. One will see some "line length exceeds 100 columns" checkpatch warnings. I chose to not introduce new lines for regs descriptions in order to preserve the style of the comments throughout the definitions. Style can be fixed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-29-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 400 ++++++++++++++++++++--------------------- 1 file changed, 194 insertions(+), 206 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a07e3355f09a..edec6a8c730c 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -36,176 +37,153 @@ #define AT_DMA_MAX_NR_CHANNELS 8 +/* Global Configuration Register */ +#define AT_DMA_GCFG 0x00 +#define AT_DMA_IF_BIGEND(i) BIT((i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG BIT(4) /* Arbiter mode. */ -#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ -#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ -#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ -#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) -#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) +/* Controller Enable Register */ +#define AT_DMA_EN 0x04 +#define AT_DMA_ENABLE BIT(0) -#define AT_DMA_EN 0x04 /* Controller Enable Register */ -#define AT_DMA_ENABLE (0x1 << 0) +/* Software Single Request Register */ +#define AT_DMA_SREQ 0x08 +#define AT_DMA_SSREQ(x) BIT((x) << 1) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) BIT(1 + ((x) << 1)) /* Request a destination single transfer on channel x */ -#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ -#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ -#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ +/* Software Chunk Transfer Request Register */ +#define AT_DMA_CREQ 0x0c +#define AT_DMA_SCREQ(x) BIT((x) << 1) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) BIT(1 + ((x) << 1)) /* Request a destination chunk transfer on channel x */ -#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ -#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ -#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ +/* Software Last Transfer Flag Register */ +#define AT_DMA_LAST 0x10 +#define AT_DMA_SLAST(x) BIT((x) << 1) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) BIT(1 + ((x) << 1)) /* This dst rq is last tx of buffer on channel x */ -#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ -#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ -#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ - -#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ -#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ +/* Request Synchronization Register */ +#define AT_DMA_SYNC 0x14 +#define AT_DMA_SYR(h) BIT((h)) /* Synchronize handshake line h */ /* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ -#define AT_DMA_EBCIER 0x18 /* Enable register */ -#define AT_DMA_EBCIDR 0x1C /* Disable register */ -#define AT_DMA_EBCIMR 0x20 /* Mask Register */ -#define AT_DMA_EBCISR 0x24 /* Status Register */ -#define AT_DMA_CBTC_OFFSET 8 -#define AT_DMA_ERR_OFFSET 16 -#define AT_DMA_BTC(x) (0x1 << (x)) -#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) -#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1c /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) BIT((x)) +#define AT_DMA_CBTC(x) BIT(AT_DMA_CBTC_OFFSET + (x)) +#define AT_DMA_ERR(x) BIT(AT_DMA_ERR_OFFSET + (x)) -#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ -#define AT_DMA_ENA(x) (0x1 << (x)) -#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) -#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) +/* Channel Handler Enable Register */ +#define AT_DMA_CHER 0x28 +#define AT_DMA_ENA(x) BIT((x)) +#define AT_DMA_SUSP(x) BIT(8 + (x)) +#define AT_DMA_KEEP(x) BIT(24 + (x)) -#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ -#define AT_DMA_DIS(x) (0x1 << (x)) -#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) +/* Channel Handler Disable Register */ +#define AT_DMA_CHDR 0x2c +#define AT_DMA_DIS(x) BIT(x) +#define AT_DMA_RES(x) BIT(8 + (x)) -#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ -#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) -#define AT_DMA_STAL(x) (0x1 << (24 + (x))) +/* Channel Handler Status Register */ +#define AT_DMA_CHSR 0x30 +#define AT_DMA_EMPT(x) BIT(16 + (x)) +#define AT_DMA_STAL(x) BIT(24 + (x)) - -#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ -#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ +/* Channel registers base address */ +#define AT_DMA_CH_REGS_BASE 0x3c +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ /* Hardware register offset for each channel */ -#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ -#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ -#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ -#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ -#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ -#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ -#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ -#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0c /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1c /* Dst PIP Configuration Register */ /* Bitfield definitions */ /* Bitfields in DSCR */ -#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ +#define ATC_DSCR_IF GENMASK(1, 0) /* Dsc feched via AHB-Lite Interface */ /* Bitfields in CTRLA */ -#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ -#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ -#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ -#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) -#define ATC_SCSIZE_1 (0x0 << 16) -#define ATC_SCSIZE_4 (0x1 << 16) -#define ATC_SCSIZE_8 (0x2 << 16) -#define ATC_SCSIZE_16 (0x3 << 16) -#define ATC_SCSIZE_32 (0x4 << 16) -#define ATC_SCSIZE_64 (0x5 << 16) -#define ATC_SCSIZE_128 (0x6 << 16) -#define ATC_SCSIZE_256 (0x7 << 16) -#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ -#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) -#define ATC_DCSIZE_1 (0x0 << 20) -#define ATC_DCSIZE_4 (0x1 << 20) -#define ATC_DCSIZE_8 (0x2 << 20) -#define ATC_DCSIZE_16 (0x3 << 20) -#define ATC_DCSIZE_32 (0x4 << 20) -#define ATC_DCSIZE_64 (0x5 << 20) -#define ATC_DCSIZE_128 (0x6 << 20) -#define ATC_DCSIZE_256 (0x7 << 20) -#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ -#define ATC_SRC_WIDTH(x) ((x) << 24) -#define ATC_SRC_WIDTH_BYTE (0x0 << 24) -#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) -#define ATC_SRC_WIDTH_WORD (0x2 << 24) -#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) -#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ -#define ATC_DST_WIDTH(x) ((x) << 28) -#define ATC_DST_WIDTH_BYTE (0x0 << 28) -#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) -#define ATC_DST_WIDTH_WORD (0x2 << 28) -#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ +#define ATC_BTSIZE_MAX GENMASK(15, 0) /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE GENMASK(15, 0) /* Buffer Transfer Size */ +#define ATC_SCSIZE GENMASK(18, 16) /* Source Chunk Transfer Size */ +#define ATC_DCSIZE GENMASK(22, 20) /* Destination Chunk Transfer Size */ +#define ATC_SRC_WIDTH GENMASK(25, 24) /* Source Single Transfer Size */ +#define ATC_DST_WIDTH GENMASK(29, 28) /* Destination Single Transfer Size */ +#define ATC_DONE BIT(31) /* Tx Done (only written back in descriptor) */ /* Bitfields in CTRLB */ -#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ -#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ - /* Specify AHB interfaces */ -#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ -#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ - -#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ -#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ -#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ -#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ -#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ -#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ -#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ -#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ -#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ -#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ -#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ -#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ -#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ -#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) -#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ -#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ -#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ -#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) -#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ -#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ -#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ -#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ -#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ +#define ATC_SIF GENMASK(1, 0) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF GENMASK(5, 4) /* Dst tx done via AHB-Lite Interface i */ +#define AT_DMA_MEM_IF 0x0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 0x1 /* interface 1 as peripheral interface */ +#define ATC_SRC_PIP BIT(8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP BIT(12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS BIT(16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS BIT(20) /* Dst Descriptor fetch disable */ +#define ATC_FC GENMASK(22, 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM 0x0 /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER 0x1 /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM 0x2 /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER 0x3 /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER 0x4 /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER 0x5 /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER 0x6 /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER 0x7 /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE GENMASK(25, 24) +#define ATC_SRC_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_DST_ADDR_MODE GENMASK(29, 28) +#define ATC_DST_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_IEN BIT(30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO BIT(31) /* Auto multiple buffer tx enable */ /* Bitfields in CFG */ #define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ -#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ -#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ -#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ -#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ -#define ATC_SRC_H2SEL_SW (0x0 << 9) -#define ATC_SRC_H2SEL_HW (0x1 << 9) -#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ -#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ -#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ -#define ATC_DST_H2SEL_SW (0x0 << 13) -#define ATC_DST_H2SEL_HW (0x1 << 13) -#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ -#define ATC_SOD (0x1 << 16) /* Stop On Done */ -#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ -#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ -#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ -#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) -#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) -#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ -#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ -#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) -#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) -#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) +#define ATC_SRC_PER GENMASK(3, 0) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER GENMASK(7, 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP BIT(8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL BIT(9) /* Source Handshaking Mod */ +#define ATC_SRC_PER_MSB GENMASK(11, 10) /* Channel src rq (most significant bits) */ +#define ATC_DST_REP BIT(12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL BIT(13) /* Destination Handshaking Mod */ +#define ATC_DST_PER_MSB GENMASK(15, 14) /* Channel dst rq (most significant bits) */ +#define ATC_SOD BIT(16) /* Stop On Done */ +#define ATC_LOCK_IF BIT(20) /* Interface Lock */ +#define ATC_LOCK_B BIT(21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L BIT(22) /* Master Interface Arbiter Lock */ +#define ATC_AHB_PROT GENMASK(26, 24) /* AHB Protection */ +#define ATC_FIFOCFG GENMASK(29, 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST 0x0 +#define ATC_FIFOCFG_HALFFIFO 0x1 +#define ATC_FIFOCFG_ENOUGHSPACE 0x2 /* Bitfields in SPIP */ -#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) +#define ATC_SPIP_HOLE GENMASK(15, 0) +#define ATC_SPIP_BOUNDARY GENMASK(25, 16) /* Bitfields in DPIP */ -#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) +#define ATC_DPIP_HOLE GENMASK(15, 0) +#define ATC_DPIP_BOUNDARY GENMASK(25, 16) + +#define ATC_SRC_PER_ID(id) (FIELD_PREP(ATC_SRC_PER_MSB, (id)) | \ + FIELD_PREP(ATC_SRC_PER, (id))) +#define ATC_DST_PER_ID(id) (FIELD_PREP(ATC_DST_PER_MSB, (id)) | \ + FIELD_PREP(ATC_DST_PER, (id))) + /*-- descriptors -----------------------------------------------------*/ @@ -502,9 +480,9 @@ static void set_desc_eol(struct at_desc *desc) desc->lli.dscr = 0; } -#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) -#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ - |ATC_DIF(AT_DMA_MEM_IF)) +#define ATC_DEFAULT_CFG FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO) +#define ATC_DEFAULT_CTRLB (FIELD_PREP(ATC_SIF, AT_DMA_MEM_IF) | \ + FIELD_PREP(ATC_DIF, AT_DMA_MEM_IF)) #define ATC_DMA_BUSWIDTHS\ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\ @@ -720,10 +698,12 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); channel_writel(atchan, DSCR, first->txd.phys); - channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) | - ATC_SPIP_BOUNDARY(first->boundary)); - channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | - ATC_DPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE, + first->src_hole) | + FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary)); + channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE, + first->dst_hole) | + FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary)); /* Don't allow CPU to reorder channel enable. */ wmb(); dma_writel(atdma, CHER, atchan->mask); @@ -763,8 +743,8 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, */ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) { - u32 btsize = (ctrla & ATC_BTSIZE_MAX); - u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); + u32 btsize = FIELD_GET(ATC_BTSIZE, ctrla); + u32 src_width = FIELD_GET(ATC_SRC_WIDTH, ctrla); /* * According to the datasheet, when reading the Control A Register @@ -1203,15 +1183,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan, return NULL; } - ctrla = ATC_SRC_WIDTH(dwidth) | - ATC_DST_WIDTH(dwidth); + ctrla = FIELD_PREP(ATC_SRC_WIDTH, dwidth) | + FIELD_PREP(ATC_DST_WIDTH, dwidth); - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN - | ATC_SRC_ADDR_MODE_INCR - | ATC_DST_ADDR_MODE_INCR - | ATC_SRC_PIP - | ATC_DST_PIP - | ATC_FC_MEM2MEM; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + ATC_SRC_PIP | ATC_DST_PIP | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); /* create the transfer */ desc = atc_desc_get(atchan); @@ -1272,10 +1251,10 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return NULL; } - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN - | ATC_SRC_ADDR_MODE_INCR - | ATC_DST_ADDR_MODE_INCR - | ATC_FC_MEM2MEM; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); /* * We can be a lot more clever here, but this should take care @@ -1283,8 +1262,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, */ src_width = dst_width = atc_get_xfer_width(src, dest, len); - ctrla = ATC_SRC_WIDTH(src_width) | - ATC_DST_WIDTH(dst_width); + ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) | + FIELD_PREP(ATC_DST_WIDTH, dst_width); for (offset = 0; offset < len; offset += xfer_count << src_width) { xfer_count = min_t(size_t, (len - offset) >> src_width, @@ -1330,11 +1309,11 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, struct at_desc *desc; size_t xfer_count; - u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2); + u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2); u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | - ATC_SRC_ADDR_MODE_FIXED | - ATC_DST_ADDR_MODE_INCR | - ATC_FC_MEM2MEM; + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); xfer_count = len >> 2; if (xfer_count > ATC_BTSIZE_MAX) { @@ -1549,18 +1528,20 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, return NULL; } - ctrla = ATC_SCSIZE(sconfig->src_maxburst) - | ATC_DCSIZE(sconfig->dst_maxburst); + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst); ctrlb = ATC_IEN; switch (direction) { case DMA_MEM_TO_DEV: reg_width = convert_buswidth(sconfig->dst_addr_width); - ctrla |= ATC_DST_WIDTH(reg_width); - ctrlb |= ATC_DST_ADDR_MODE_FIXED - | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER - | ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if); + ctrla |= FIELD_PREP(ATC_DST_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); reg = sconfig->dst_addr; for_each_sg(sgl, sg, sg_len, i) { struct at_desc *desc; @@ -1584,9 +1565,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lli.saddr = mem; desc->lli.daddr = reg; - desc->lli.ctrla = ctrla - | ATC_SRC_WIDTH(mem_width) - | len >> mem_width; + desc->lli.ctrla = ctrla | + FIELD_PREP(ATC_SRC_WIDTH, mem_width) | + len >> mem_width; desc->lli.ctrlb = ctrlb; desc->len = len; @@ -1596,11 +1577,13 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, break; case DMA_DEV_TO_MEM: reg_width = convert_buswidth(sconfig->src_addr_width); - ctrla |= ATC_SRC_WIDTH(reg_width); - ctrlb |= ATC_DST_ADDR_MODE_INCR - | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM - | ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if); + ctrla |= FIELD_PREP(ATC_SRC_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); reg = sconfig->src_addr; for_each_sg(sgl, sg, sg_len, i) { @@ -1625,9 +1608,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lli.saddr = reg; desc->lli.daddr = mem; - desc->lli.ctrla = ctrla - | ATC_DST_WIDTH(mem_width) - | len >> reg_width; + desc->lli.ctrla = ctrla | + FIELD_PREP(ATC_DST_WIDTH, mem_width) | + len >> reg_width; desc->lli.ctrlb = ctrlb; desc->len = len; @@ -1693,22 +1676,24 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, u32 ctrla; /* prepare common CRTLA value */ - ctrla = ATC_SCSIZE(sconfig->src_maxburst) - | ATC_DCSIZE(sconfig->dst_maxburst) - | ATC_DST_WIDTH(reg_width) - | ATC_SRC_WIDTH(reg_width) - | period_len >> reg_width; + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | + FIELD_PREP(ATC_DST_WIDTH, reg_width) | + FIELD_PREP(ATC_SRC_WIDTH, reg_width) | + period_len >> reg_width; switch (direction) { case DMA_MEM_TO_DEV: desc->lli.saddr = buf_addr + (period_len * period_index); desc->lli.daddr = sconfig->dst_addr; desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED - | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER - | ATC_SIF(atchan->mem_if) - | ATC_DIF(atchan->per_if); + desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); desc->len = period_len; break; @@ -1716,11 +1701,13 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, desc->lli.saddr = sconfig->src_addr; desc->lli.daddr = buf_addr + (period_len * period_index); desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR - | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM - | ATC_SIF(atchan->per_if) - | ATC_DIF(atchan->mem_if); + desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); desc->len = period_len; break; @@ -2115,14 +2102,13 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, return NULL; } - atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW; + atslave->cfg = ATC_DST_H2SEL | ATC_SRC_H2SEL; /* * We can fill both SRC_PER and DST_PER, one of these fields will be * ignored depending on DMA transfer direction. */ per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK; - atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id) - | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id); + atslave->cfg |= ATC_DST_PER_ID(per_id) | ATC_SRC_PER_ID(per_id); /* * We have to translate the value we get from the device tree since * the half FIFO configuration value had to be 0 to keep backward @@ -2130,14 +2116,16 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, */ switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) { case AT91_DMA_CFG_FIFOCFG_ALAP: - atslave->cfg |= ATC_FIFOCFG_LARGESTBURST; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_LARGESTBURST); break; case AT91_DMA_CFG_FIFOCFG_ASAP: - atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_ENOUGHSPACE); break; case AT91_DMA_CFG_FIFOCFG_HALF: default: - atslave->cfg |= ATC_FIFOCFG_HALFFIFO; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO); } atslave->dma_dev = &dmac_pdev->dev; From 1c1114d850b6993184c117edad7c91f7f09cb9d5 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:03 +0300 Subject: [PATCH 1384/4122] dmaengine: at_hdmac: Rename "dma_common" to "dma_device" "dma_common" name was misleading and did not suggest that's actually a struct dma_device underneath. Rename it so that readers can follow the code easier. One may see some checks and a warning when running checkpatch. Those have nothing to do with the rename and will be addressed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-30-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 92 +++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index edec6a8c730c..b2619600f68a 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -338,7 +338,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) /** * struct at_dma - internal representation of an Atmel HDMA Controller - * @chan_common: common dmaengine dma_device object members + * @dma_device: dmaengine dma_device object members * @atdma_devtype: identifier of DMA controller compatibility * @ch_regs: memory mapped register base * @clk: dma controller clock @@ -348,7 +348,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @chan: channels table to store at_dma_chan structures */ struct at_dma { - struct dma_device dma_common; + struct dma_device dma_device; void __iomem *regs; struct clk *clk; u32 save_imr; @@ -368,7 +368,7 @@ struct at_dma { static inline struct at_dma *to_at_dma(struct dma_device *ddev) { - return container_of(ddev, struct at_dma, dma_common); + return container_of(ddev, struct at_dma, dma_device); } @@ -1069,11 +1069,11 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) if (!pending) break; - dev_vdbg(atdma->dma_common.dev, + dev_vdbg(atdma->dma_device.dev, "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", status, imr, pending); - for (i = 0; i < atdma->dma_common.chancnt; i++) { + for (i = 0; i < atdma->dma_device.chancnt; i++) { atchan = &atdma->chan[i]; if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { if (pending & AT_DMA_ERR(i)) { @@ -2000,7 +2000,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) * We need controller-specific data to set up slave * transfers. */ - BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); + BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_device.dev); /* if cfg configuration specified take it instead of default */ if (atslave->cfg) @@ -2011,7 +2011,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = atc_alloc_descriptor(chan, GFP_KERNEL); if (!desc) { - dev_err(atdma->dma_common.dev, + dev_err(atdma->dma_device.dev, "Only %d initial descriptors\n", i); break; } @@ -2255,7 +2255,7 @@ static int __init at_dma_probe(struct platform_device *pdev) return irq; /* discover transaction capabilities */ - atdma->dma_common.cap_mask = plat_dat->cap_mask; + atdma->dma_device.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; atdma->clk = devm_clk_get(&pdev->dev, "dma_clk"); @@ -2299,16 +2299,16 @@ static int __init at_dma_probe(struct platform_device *pdev) cpu_relax(); /* initialize channels related values */ - INIT_LIST_HEAD(&atdma->dma_common.channels); + INIT_LIST_HEAD(&atdma->dma_device.channels); for (i = 0; i < plat_dat->nr_channels; i++) { struct at_dma_chan *atchan = &atdma->chan[i]; atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->chan_common.device = &atdma->dma_common; + atchan->chan_common.device = &atdma->dma_device; dma_cookie_init(&atchan->chan_common); list_add_tail(&atchan->chan_common.device_node, - &atdma->dma_common.channels); + &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); spin_lock_init(&atchan->lock); @@ -2323,49 +2323,49 @@ static int __init at_dma_probe(struct platform_device *pdev) } /* set base routines */ - atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; - atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; - atdma->dma_common.device_tx_status = atc_tx_status; - atdma->dma_common.device_issue_pending = atc_issue_pending; - atdma->dma_common.dev = &pdev->dev; + atdma->dma_device.device_alloc_chan_resources = atc_alloc_chan_resources; + atdma->dma_device.device_free_chan_resources = atc_free_chan_resources; + atdma->dma_device.device_tx_status = atc_tx_status; + atdma->dma_device.device_issue_pending = atc_issue_pending; + atdma->dma_device.dev = &pdev->dev; /* set prep routines based on capability */ - if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask)) - atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved; + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_interleaved_dma = atc_prep_dma_interleaved; - if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) - atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_dma_memcpy = atc_prep_dma_memcpy; - if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) { - atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset; - atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; - atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES; + if (dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_dma_memset = atc_prep_dma_memset; + atdma->dma_device.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; + atdma->dma_device.fill_align = DMAENGINE_ALIGN_4_BYTES; } - if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { - atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + if (dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_slave_sg = atc_prep_slave_sg; /* controller can do slave DMA: can trigger cyclic transfers */ - dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask); - atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic; - atdma->dma_common.device_config = atc_config; - atdma->dma_common.device_pause = atc_pause; - atdma->dma_common.device_resume = atc_resume; - atdma->dma_common.device_terminate_all = atc_terminate_all; - atdma->dma_common.src_addr_widths = ATC_DMA_BUSWIDTHS; - atdma->dma_common.dst_addr_widths = ATC_DMA_BUSWIDTHS; - atdma->dma_common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dma_cap_set(DMA_CYCLIC, atdma->dma_device.cap_mask); + atdma->dma_device.device_prep_dma_cyclic = atc_prep_dma_cyclic; + atdma->dma_device.device_config = atc_config; + atdma->dma_device.device_pause = atc_pause; + atdma->dma_device.device_resume = atc_resume; + atdma->dma_device.device_terminate_all = atc_terminate_all; + atdma->dma_device.src_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.dst_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + atdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; } dma_writel(atdma, EN, AT_DMA_ENABLE); dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", - dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", - dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "", - dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", + dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask) ? "set " : "", + dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask) ? "slave " : "", plat_dat->nr_channels); - err = dma_async_device_register(&atdma->dma_common); + err = dma_async_device_register(&atdma->dma_device); if (err) { dev_err(&pdev->dev, "Unable to register: %d.\n", err); goto err_dma_async_device_register; @@ -2388,7 +2388,7 @@ static int __init at_dma_probe(struct platform_device *pdev) return 0; err_of_dma_controller_register: - dma_async_device_unregister(&atdma->dma_common); + dma_async_device_unregister(&atdma->dma_device); err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: @@ -2408,13 +2408,13 @@ static int at_dma_remove(struct platform_device *pdev) at_dma_off(atdma); if (pdev->dev.of_node) of_dma_controller_free(pdev->dev.of_node); - dma_async_device_unregister(&atdma->dma_common); + dma_async_device_unregister(&atdma->dma_device); dma_pool_destroy(atdma->memset_pool); dma_pool_destroy(atdma->dma_desc_pool); free_irq(platform_get_irq(pdev, 0), atdma); - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); @@ -2443,7 +2443,7 @@ static int at_dma_prepare(struct device *dev) struct at_dma *atdma = dev_get_drvdata(dev); struct dma_chan *chan, *_chan; - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); /* wait for transaction completion (except in cyclic case) */ @@ -2478,7 +2478,7 @@ static int at_dma_suspend_noirq(struct device *dev) struct dma_chan *chan, *_chan; /* preserve data */ - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); @@ -2528,7 +2528,7 @@ static int at_dma_resume_noirq(struct device *dev) /* restore saved data */ dma_writel(atdma, EBCIER, atdma->save_imr); - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); From 304184f79c7eb50b32915b29f7cacd58455048d2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:04 +0300 Subject: [PATCH 1385/4122] dmaengine: at_hdmac: Rename "chan_common" to "dma_chan" "chan_common" was misleading and did not suggest that's actually a struct dma_chan underneath. Rename it so that readers can follow the code easier. One may see some checks when running checkpatch. Those have nothing to do with the rename and will be addressed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-31-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 62 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b2619600f68a..e39b9b47234a 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -252,7 +252,7 @@ enum atc_status { /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @chan_common: common dmaengine channel object members + * @dma_chan: common dmaengine channel object members * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask @@ -272,7 +272,7 @@ enum atc_status { * @free_list: list of descriptors usable by the channel */ struct at_dma_chan { - struct dma_chan chan_common; + struct dma_chan dma_chan; struct at_dma *device; void __iomem *ch_regs; u8 mask; @@ -300,7 +300,7 @@ struct at_dma_chan { static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) { - return container_of(dchan, struct at_dma_chan, chan_common); + return container_of(dchan, struct at_dma_chan, dma_chan); } /* @@ -382,15 +382,15 @@ static struct device *chan2dev(struct dma_chan *chan) #if defined(VERBOSE_DEBUG) static void vdbg_dump_regs(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->chan_common.chan_id, + atchan->dma_chan.chan_id, dma_readl(atdma, EBCIMR), dma_readl(atdma, CHSR)); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -405,7 +405,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", &lli->saddr, &lli->daddr, lli->ctrla, lli->ctrlb, &lli->dscr); @@ -442,7 +442,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) */ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); return !!(dma_readl(atdma, CHSR) & atchan->mask); } @@ -603,16 +603,16 @@ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) ret = desc; break; } - dev_dbg(chan2dev(&atchan->chan_common), + dev_dbg(chan2dev(&atchan->dma_chan), "desc %p not ACKed\n", desc); } spin_unlock_irqrestore(&atchan->lock, flags); - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "scanned %u descriptors on freelist\n", i); /* no more descriptor available in initial pool: create one more */ if (!ret) - ret = atc_alloc_descriptor(&atchan->chan_common, GFP_NOWAIT); + ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT); return ret; } @@ -630,11 +630,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) spin_lock_irqsave(&atchan->lock, flags); list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "moving child desc %p to freelist\n", child); list_splice_init(&desc->tx_list, &atchan->free_list); - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); @@ -673,13 +673,13 @@ static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, */ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); /* ASSERT: channel is idle */ if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), "BUG: Attempted to start non-idle channel\n"); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -905,10 +905,10 @@ static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { struct dma_async_tx_descriptor *txd = &desc->txd; - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); unsigned long flags; - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "descriptor %u complete\n", txd->cookie); spin_lock_irqsave(&atchan->lock, flags); @@ -951,7 +951,7 @@ static void atc_advance_work(struct at_dma_chan *atchan) struct at_desc *desc; unsigned long flags; - dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n"); spin_lock_irqsave(&atchan->lock, flags); if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) @@ -1010,9 +1010,9 @@ static void atc_handle_error(struct at_dma_chan *atchan) * controller flagged an error instead of scribbling over * random memory locations. */ - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), "Bad descriptor submitted for DMA!\n"); - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), " cookie: %d\n", bad_desc->txd.cookie); atc_dump_lli(atchan, &bad_desc->lli); list_for_each_entry(child, &bad_desc->tx_list, desc_node) @@ -1031,7 +1031,7 @@ static void atc_handle_cyclic(struct at_dma_chan *atchan) struct at_desc *first = atc_first_active(atchan); struct dma_async_tx_descriptor *txd = &first->txd; - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "new cyclic period llp 0x%08x\n", channel_readl(atchan, DSCR)); @@ -1825,7 +1825,7 @@ static int atc_pause(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1844,7 +1844,7 @@ static int atc_resume(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1866,7 +1866,7 @@ static int atc_terminate_all(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -2305,9 +2305,9 @@ static int __init at_dma_probe(struct platform_device *pdev) atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->chan_common.device = &atdma->dma_device; - dma_cookie_init(&atchan->chan_common); - list_add_tail(&atchan->chan_common.device_node, + atchan->dma_chan.device = &atdma->dma_device; + dma_cookie_init(&atchan->dma_chan); + list_add_tail(&atchan->dma_chan.device_node, &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); @@ -2455,7 +2455,7 @@ static int at_dma_prepare(struct device *dev) static void atc_suspend_cyclic(struct at_dma_chan *atchan) { - struct dma_chan *chan = &atchan->chan_common; + struct dma_chan *chan = &atchan->dma_chan; /* Channel should be paused by user * do it anyway even if it is not done already */ @@ -2496,7 +2496,7 @@ static int at_dma_suspend_noirq(struct device *dev) static void atc_resume_cyclic(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); /* restore channel status for cyclic descriptors list: * next descriptor in the cyclic list at the time of suspend */ From 993b397752f3babb698932f20c1c363c4eca4efc Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:05 +0300 Subject: [PATCH 1386/4122] dmaengine: at_hdmac: Remove unused member of at_dma_chan The pointer to at_dma engine was never used, remove it. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-32-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e39b9b47234a..143d75c18d1e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -253,7 +253,6 @@ enum atc_status { /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel * @dma_chan: common dmaengine channel object members - * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask * @per_if: peripheral interface @@ -273,7 +272,6 @@ enum atc_status { */ struct at_dma_chan { struct dma_chan dma_chan; - struct at_dma *device; void __iomem *ch_regs; u8 mask; u8 per_if; From ac803b56860f6506c55a3c9330007837e3f4edda Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:06 +0300 Subject: [PATCH 1387/4122] dmaengine: at_hdmac: Convert driver to use virt-dma Convert the driver to use the core virt-dma. The driver will be easier to maintain as it uses the list handling and the tasklet from virt-dma. With the conversion replace the election of a new transfer in the tasklet with the election of the new transfer in the interrupt handler. With this we have a shorter idle window as we remove the scheduling latency of the tasklet. I chose to do this while doing the conversion to virt-dma, because if I made a prerequisite patch with the new transfer election in the irq handler, I would have to duplicate some virt-dma code in the at_hdmac driver that would end up being removed at the virt-dma conversion anyway. So do this in a single step. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-33-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + drivers/dma/at_hdmac.c | 1110 +++++++++++++++------------------------- 2 files changed, 404 insertions(+), 707 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index ea81d825575f..b9d54f20812f 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -97,6 +97,7 @@ config AT_HDMAC tristate "Atmel AHB DMA support" depends on ARCH_AT91 select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help Support the Atmel AHB DMA controller. diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 143d75c18d1e..8858470246e1 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -3,6 +3,7 @@ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) * * Copyright (C) 2008 Atmel Corporation + * Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries * * This supports the Atmel AHB DMA Controller found in several Atmel SoCs. * The only Atmel DMA Controller that is not covered by this driver is the one @@ -25,6 +26,7 @@ #include #include "dmaengine.h" +#include "virt-dma.h" /* * Glossary @@ -200,25 +202,31 @@ struct at_lli { u32 dscr; /* chain to next lli */ }; +/** + * struct atdma_sg - atdma scatter gather entry + * @len: length of the current Linked List Item. + * @lli: linked list item that is passed to the DMA controller + * @lli_phys: physical address of the LLI. + */ +struct atdma_sg { + unsigned int len; + struct at_lli *lli; + dma_addr_t lli_phys; +}; + /** * struct at_desc - software descriptor - * @at_lli: hardware lli structure - * @txd: support for the async_tx api - * @desc_node: node on the channed descriptors list - * @len: descriptor byte count + * @vd: pointer to the virtual dma descriptor. + * @atchan: pointer to the atmel dma channel. * @total_len: total transaction byte count + * @sg_len: number of sg entries. + * @sg: array of sgs. */ struct at_desc { - /* FIRST values the hardware uses */ - struct at_lli lli; - - /* THEN values for driver housekeeping */ - struct list_head tx_list; - struct dma_async_tx_descriptor txd; - struct list_head desc_node; - size_t len; + struct virt_dma_desc vd; + struct at_dma_chan *atchan; size_t total_len; - + unsigned int sglen; /* Interleaved data */ size_t boundary; size_t dst_hole; @@ -228,15 +236,9 @@ struct at_desc { bool memset_buffer; dma_addr_t memset_paddr; int *memset_vaddr; + struct atdma_sg sg[]; }; -static inline struct at_desc * -txd_to_at_desc(struct dma_async_tx_descriptor *txd) -{ - return container_of(txd, struct at_desc, txd); -} - - /*-- Channels --------------------------------------------------------*/ /** @@ -245,49 +247,40 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) * Manipulated with atomic operations. */ enum atc_status { - ATC_IS_ERROR = 0, ATC_IS_PAUSED = 1, ATC_IS_CYCLIC = 24, }; /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @dma_chan: common dmaengine channel object members + * @vc: virtual dma channel entry. + * @atdma: pointer to the driver data. * @ch_regs: memory mapped register base * @mask: channel index in a mask * @per_if: peripheral interface * @mem_if: memory interface * @status: transmit status information from irq/prep* functions * to tasklet (use atomic operations) - * @tasklet: bottom half to finish transaction work * @save_cfg: configuration register that is saved on suspend/resume cycle * @save_dscr: for cyclic operations, preserve next descriptor address in * the cyclic list on suspend/resume cycle * @dma_sconfig: configuration for slave transfers, passed via * .device_config - * @lock: serializes enqueue/dequeue operations to descriptors lists - * @active_list: list of descriptors dmaengine is being running on - * @queue: list of descriptors ready to be submitted to engine - * @free_list: list of descriptors usable by the channel + * @desc: pointer to the atmel dma descriptor. */ struct at_dma_chan { - struct dma_chan dma_chan; + struct virt_dma_chan vc; + struct at_dma *atdma; void __iomem *ch_regs; u8 mask; u8 per_if; u8 mem_if; unsigned long status; - struct tasklet_struct tasklet; u32 save_cfg; u32 save_dscr; - struct dma_slave_config dma_sconfig; - - spinlock_t lock; - - /* these other elements are all protected by lock */ - struct list_head active_list; - struct list_head queue; - struct list_head free_list; + struct dma_slave_config dma_sconfig; + bool cyclic; + struct at_desc *desc; }; #define channel_readl(atchan, name) \ @@ -296,11 +289,6 @@ struct at_dma_chan { #define channel_writel(atchan, name, val) \ __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) -static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) -{ - return container_of(dchan, struct at_dma_chan, dma_chan); -} - /* * Fix sconfig's burst size according to at_hdmac. We need to convert them as: * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. @@ -342,7 +330,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @clk: dma controller clock * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask - * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @lli_pool: hw lli table * @chan: channels table to store at_dma_chan structures */ struct at_dma { @@ -353,7 +341,7 @@ struct at_dma { u8 all_chan_mask; - struct dma_pool *dma_desc_pool; + struct dma_pool *lli_pool; struct dma_pool *memset_pool; /* AT THE END channels table */ struct at_dma_chan chan[]; @@ -364,6 +352,16 @@ struct at_dma { #define dma_writel(atdma, name, val) \ __raw_writel((val), (atdma)->regs + AT_DMA_##name) +static inline struct at_desc *to_atdma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct at_desc, vd.tx); +} + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct at_dma_chan, vc.chan); +} + static inline struct at_dma *to_at_dma(struct dma_device *ddev) { return container_of(ddev, struct at_dma, dma_device); @@ -380,15 +378,15 @@ static struct device *chan2dev(struct dma_chan *chan) #if defined(VERBOSE_DEBUG) static void vdbg_dump_regs(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); - dev_err(chan2dev(&atchan->dma_chan), + dev_err(chan2dev(&atchan->vc.chan), " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->dma_chan.chan_id, + atchan->vc.chan.chan_id, dma_readl(atdma, EBCIMR), dma_readl(atdma, CHSR)); - dev_err(chan2dev(&atchan->dma_chan), + dev_err(chan2dev(&atchan->vc.chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -403,7 +401,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { - dev_crit(chan2dev(&atchan->dma_chan), + dev_crit(chan2dev(&atchan->vc.chan), "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", &lli->saddr, &lli->daddr, lli->ctrla, lli->ctrlb, &lli->dscr); @@ -440,7 +438,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) */ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); return !!(dma_readl(atdma, CHSR) & atchan->mask); } @@ -464,18 +462,19 @@ static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) } /** - * set_desc_eol - set end-of-link to descriptor so it will end transfer + * set_lli_eol - set end-of-link to descriptor so it will end transfer * @desc: descriptor, signle or at the end of a chain, to end chain on + * @i: index of the atmel scatter gather entry that is at the end of the chain. */ -static void set_desc_eol(struct at_desc *desc) +static void set_lli_eol(struct at_desc *desc, unsigned int i) { - u32 ctrlb = desc->lli.ctrlb; + u32 ctrlb = desc->sg[i].lli->ctrlb; ctrlb &= ~ATC_IEN; ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; - desc->lli.ctrlb = ctrlb; - desc->lli.dscr = 0; + desc->sg[i].lli->ctrlb = ctrlb; + desc->sg[i].lli->dscr = 0; } #define ATC_DEFAULT_CFG FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO) @@ -518,13 +517,6 @@ struct at_dma_slave { u32 cfg; }; -/* prototypes */ -static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); -static void atc_issue_pending(struct dma_chan *chan); - - -/*----------------------------------------------------------------------*/ - static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst, size_t len) { @@ -540,196 +532,72 @@ static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst, return width; } -static struct at_desc *atc_first_active(struct at_dma_chan *atchan) +static void atdma_lli_chain(struct at_desc *desc, unsigned int i) { - return list_first_entry(&atchan->active_list, - struct at_desc, desc_node); -} + struct atdma_sg *atdma_sg = &desc->sg[i]; -static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) -{ - return list_first_entry(&atchan->queue, - struct at_desc, desc_node); -} - -/** - * atc_alloc_descriptor - allocate and return an initialized descriptor - * @chan: the channel to allocate descriptors for - * @gfp_flags: GFP allocation flags - * - * Note: The ack-bit is positioned in the descriptor flag at creation time - * to make initial allocation more convenient. This bit will be cleared - * and control will be given to client at usage time (during - * preparation functions). - */ -static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, - gfp_t gfp_flags) -{ - struct at_desc *desc = NULL; - struct at_dma *atdma = to_at_dma(chan->device); - dma_addr_t phys; - - desc = dma_pool_zalloc(atdma->dma_desc_pool, gfp_flags, &phys); - if (desc) { - INIT_LIST_HEAD(&desc->tx_list); - dma_async_tx_descriptor_init(&desc->txd, chan); - /* txd.flags will be overwritten in prep functions */ - desc->txd.flags = DMA_CTRL_ACK; - desc->txd.tx_submit = atc_tx_submit; - desc->txd.phys = phys; - } - - return desc; -} - -/** - * atc_desc_get - get an unused descriptor from free_list - * @atchan: channel we want a new descriptor for - */ -static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) -{ - struct at_desc *desc, *_desc; - struct at_desc *ret = NULL; - unsigned long flags; - unsigned int i = 0; - - spin_lock_irqsave(&atchan->lock, flags); - list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { - i++; - if (async_tx_test_ack(&desc->txd)) { - list_del(&desc->desc_node); - ret = desc; - break; - } - dev_dbg(chan2dev(&atchan->dma_chan), - "desc %p not ACKed\n", desc); - } - spin_unlock_irqrestore(&atchan->lock, flags); - dev_vdbg(chan2dev(&atchan->dma_chan), - "scanned %u descriptors on freelist\n", i); - - /* no more descriptor available in initial pool: create one more */ - if (!ret) - ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT); - - return ret; -} - -/** - * atc_desc_put - move a descriptor, including any children, to the free list - * @atchan: channel we work on - * @desc: descriptor, at the head of a chain, to move to free list - */ -static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) -{ - if (desc) { - struct at_desc *child; - unsigned long flags; - - spin_lock_irqsave(&atchan->lock, flags); - list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&atchan->dma_chan), - "moving child desc %p to freelist\n", - child); - list_splice_init(&desc->tx_list, &atchan->free_list); - dev_vdbg(chan2dev(&atchan->dma_chan), - "moving desc %p to freelist\n", desc); - list_add(&desc->desc_node, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - } -} - -/** - * atc_desc_chain - build chain adding a descriptor - * @first: address of first descriptor of the chain - * @prev: address of previous descriptor of the chain - * @desc: descriptor to queue - * - * Called from prep_* functions - */ -static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, - struct at_desc *desc) -{ - if (!(*first)) { - *first = desc; - } else { - /* inform the HW lli about chaining */ - (*prev)->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &(*first)->tx_list); - } - *prev = desc; + if (i) + desc->sg[i - 1].lli->dscr = atdma_sg->lli_phys; } /** * atc_dostart - starts the DMA engine for real * @atchan: the channel we want to start - * @first: first descriptor in the list we want to begin with - * - * Called with atchan->lock held and bh disabled */ -static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) +static void atc_dostart(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&atchan->vc); + struct at_desc *desc; - /* ASSERT: channel is idle */ - if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->dma_chan), - "BUG: Attempted to start non-idle channel\n"); - dev_err(chan2dev(&atchan->dma_chan), - " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", - channel_readl(atchan, SADDR), - channel_readl(atchan, DADDR), - channel_readl(atchan, CTRLA), - channel_readl(atchan, CTRLB), - channel_readl(atchan, DSCR)); - - /* The tasklet will hopefully advance the queue... */ + if (!vd) { + atchan->desc = NULL; return; } vdbg_dump_regs(atchan); + list_del(&vd->node); + atchan->desc = desc = to_atdma_desc(&vd->tx); + channel_writel(atchan, SADDR, 0); channel_writel(atchan, DADDR, 0); channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); - channel_writel(atchan, DSCR, first->txd.phys); - channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE, - first->src_hole) | - FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary)); - channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE, - first->dst_hole) | - FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary)); + channel_writel(atchan, DSCR, desc->sg[0].lli_phys); + channel_writel(atchan, SPIP, + FIELD_PREP(ATC_SPIP_HOLE, desc->src_hole) | + FIELD_PREP(ATC_SPIP_BOUNDARY, desc->boundary)); + channel_writel(atchan, DPIP, + FIELD_PREP(ATC_DPIP_HOLE, desc->dst_hole) | + FIELD_PREP(ATC_DPIP_BOUNDARY, desc->boundary)); + /* Don't allow CPU to reorder channel enable. */ wmb(); - dma_writel(atdma, CHER, atchan->mask); + dma_writel(atchan->atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); } -/* - * atc_get_desc_by_cookie - get the descriptor of a cookie - * @atchan: the DMA channel - * @cookie: the cookie to get the descriptor for - */ -static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, - dma_cookie_t cookie) +static void atdma_desc_free(struct virt_dma_desc *vd) { - struct at_desc *desc, *_desc; + struct at_dma *atdma = to_at_dma(vd->tx.chan->device); + struct at_desc *desc = to_atdma_desc(&vd->tx); + unsigned int i; - list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) { - if (desc->txd.cookie == cookie) - return desc; + for (i = 0; i < desc->sglen; i++) { + if (desc->sg[i].lli) + dma_pool_free(atdma->lli_pool, desc->sg[i].lli, + desc->sg[i].lli_phys); } - list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { - if (desc->txd.cookie == cookie) - return desc; + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; } - return NULL; + kfree(desc); } /** @@ -756,20 +624,19 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) /** * atc_get_llis_residue - Get residue for a hardware linked list transfer * - * Calculate the residue by removing the length of the child descriptors already - * transferred from the total length. To get the current child descriptor we can - * use the value of the channel's DSCR register and compare it against the value - * of the hardware linked list structure of each child descriptor. + * Calculate the residue by removing the length of the Linked List Item (LLI) + * already transferred from the total length. To get the current LLI we can use + * the value of the channel's DSCR register and compare it against the DSCR + * value of each LLI. * * The CTRLA register provides us with the amount of data already read from the - * source for the current child descriptor. So we can compute a more accurate - * residue by also removing the number of bytes corresponding to this amount of - * data. + * source for the LLI. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of data. * * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a - * race condition may occur: the first read register may refer to one child - * descriptor whereas the second read may refer to a later child descriptor in - * the list because of the DMA transfer progression inbetween the two reads. + * race condition may occur: the first read register may refer to one LLI + * whereas the second read may refer to a later LLI in the list because of the + * DMA transfer progression inbetween the two reads. * * One solution could have been to pause the DMA transfer, read the DSCR and * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some @@ -786,12 +653,11 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) * Then we'd rather use another solution: the DSCR is read a first time, the * CTRLA is read in turn, next the DSCR is read a second time. If the two * consecutive read values of the DSCR are the same then we assume both refers - * to the very same child descriptor as well as the CTRLA value read inbetween - * does. For cyclic tranfers, the assumption is that a full loop is "not so - * fast". If the two DSCR values are different, we read again the CTRLA then the - * DSCR till two consecutive read values from DSCR are equal or till the - * maximum trials is reach. This algorithm is very unlikely not to find a stable - * value for DSCR. + * to the very same LLI as well as the CTRLA value read inbetween does. For + * cyclic tranfers, the assumption is that a full loop is "not so fast". If the + * two DSCR values are different, we read again the CTRLA then the DSCR till two + * consecutive read values from DSCR are equal or till the maximum trials is + * reach. This algorithm is very unlikely not to find a stable value for DSCR. * @atchan: pointer to an atmel hdmac channel. * @desc: pointer to the descriptor for which the residue is calculated. * @residue: residue to be set to dma_tx_state. @@ -800,7 +666,6 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) static int atc_get_llis_residue(struct at_dma_chan *atchan, struct at_desc *desc, u32 *residue) { - struct at_desc *child; u32 len, ctrla, dscr; unsigned int i; @@ -836,24 +701,25 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, return -ETIMEDOUT; /* For the first descriptor we can be more accurate. */ - if (desc->lli.dscr == dscr) { + if (desc->sg[0].lli->dscr == dscr) { *residue = atc_calc_bytes_left(len, ctrla); return 0; } + len -= desc->sg[0].len; - len -= desc->len; - list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.dscr == dscr) + for (i = 1; i < desc->sglen; i++) { + if (desc->sg[i].lli && desc->sg[i].lli->dscr == dscr) break; - len -= child->len; + len -= desc->sg[i].len; } /* - * For the current descriptor in the chain we can calculate the - * remaining bytes using the channel's register. + * For the current LLI in the chain we can calculate the remaining bytes + * using the channel's CTRLA register. */ *residue = atc_calc_bytes_left(len, ctrla); return 0; + } /** @@ -867,139 +733,42 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, u32 *residue) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc_first = atc_first_active(atchan); - struct at_desc *desc; + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct virt_dma_desc *vd; + struct at_desc *desc = NULL; u32 len, ctrla; - /* - * If the cookie doesn't match to the currently running transfer then - * we can return the total length of the associated DMA transfer, - * because it is still queued. - */ - desc = atc_get_desc_by_cookie(atchan, cookie); - if (desc == NULL) - return -EINVAL; - else if (desc != desc_first) - return desc->total_len; + vd = vchan_find_desc(&atchan->vc, cookie); + if (vd) + desc = to_atdma_desc(&vd->tx); + else if (atchan->desc && atchan->desc->vd.tx.cookie == cookie) + desc = atchan->desc; - if (desc_first->lli.dscr) + if (!desc) + return -EINVAL; + + if (desc->sg[0].lli->dscr) /* hardware linked list transfer */ - return atc_get_llis_residue(atchan, desc_first, residue); + return atc_get_llis_residue(atchan, desc, residue); /* single transfer */ - len = desc_first->total_len; + len = desc->total_len; ctrla = channel_readl(atchan, CTRLA); *residue = atc_calc_bytes_left(len, ctrla); return 0; } -/** - * atc_chain_complete - finish work for one transaction chain - * @atchan: channel we work on - * @desc: descriptor at the head of the chain we want do complete - */ -static void -atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) -{ - struct dma_async_tx_descriptor *txd = &desc->txd; - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->dma_chan), - "descriptor %u complete\n", txd->cookie); - - spin_lock_irqsave(&atchan->lock, flags); - - /* mark the descriptor as complete for non cyclic cases only */ - if (!atc_chan_is_cyclic(atchan)) - dma_cookie_complete(txd); - - spin_unlock_irqrestore(&atchan->lock, flags); - - dma_descriptor_unmap(txd); - /* for cyclic transfers, - * no need to replay callback function while stopping */ - if (!atc_chan_is_cyclic(atchan)) - dmaengine_desc_get_callback_invoke(txd, NULL); - - dma_run_dependencies(txd); - - spin_lock_irqsave(&atchan->lock, flags); - /* move children to free_list */ - list_splice_init(&desc->tx_list, &atchan->free_list); - /* add myself to free_list */ - list_add(&desc->desc_node, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - - /* If the transfer was a memset, free our temporary buffer */ - if (desc->memset_buffer) { - dma_pool_free(atdma->memset_pool, desc->memset_vaddr, - desc->memset_paddr); - desc->memset_buffer = false; - } -} - -/** - * atc_advance_work - at the end of a transaction, move forward - * @atchan: channel where the transaction ended - */ -static void atc_advance_work(struct at_dma_chan *atchan) -{ - struct at_desc *desc; - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n"); - - spin_lock_irqsave(&atchan->lock, flags); - if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) - return spin_unlock_irqrestore(&atchan->lock, flags); - - desc = atc_first_active(atchan); - /* Remove the transfer node from the active list. */ - list_del_init(&desc->desc_node); - spin_unlock_irqrestore(&atchan->lock, flags); - atc_chain_complete(atchan, desc); - - /* advance work */ - spin_lock_irqsave(&atchan->lock, flags); - if (!list_empty(&atchan->active_list)) { - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - } - spin_unlock_irqrestore(&atchan->lock, flags); -} - - /** * atc_handle_error - handle errors reported by DMA controller - * @atchan: channel where error occurs + * @atchan: channel where error occurs. + * @i: channel index */ -static void atc_handle_error(struct at_dma_chan *atchan) +static void atc_handle_error(struct at_dma_chan *atchan, unsigned int i) { - struct at_desc *bad_desc; - struct at_desc *desc; - struct at_desc *child; - unsigned long flags; + struct at_desc *desc = atchan->desc; - spin_lock_irqsave(&atchan->lock, flags); - /* - * The descriptor currently at the head of the active list is - * broked. Since we don't have any way to report errors, we'll - * just have to scream loudly and try to carry on. - */ - bad_desc = atc_first_active(atchan); - list_del_init(&bad_desc->desc_node); - - /* Try to restart the controller */ - if (!list_empty(&atchan->active_list)) { - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - } - - spin_unlock_irqrestore(&atchan->lock, flags); + /* Disable channel on AHB error */ + dma_writel(atchan->atdma, CHDR, AT_DMA_RES(i) | atchan->mask); /* * KERN_CRITICAL may seem harsh, but since this only happens @@ -1008,47 +777,37 @@ static void atc_handle_error(struct at_dma_chan *atchan) * controller flagged an error instead of scribbling over * random memory locations. */ - dev_crit(chan2dev(&atchan->dma_chan), - "Bad descriptor submitted for DMA!\n"); - dev_crit(chan2dev(&atchan->dma_chan), - " cookie: %d\n", bad_desc->txd.cookie); - atc_dump_lli(atchan, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->tx_list, desc_node) - atc_dump_lli(atchan, &child->lli); - - /* Pretend the descriptor completed successfully */ - atc_chain_complete(atchan, bad_desc); + dev_crit(chan2dev(&atchan->vc.chan), "Bad descriptor submitted for DMA!\n"); + dev_crit(chan2dev(&atchan->vc.chan), "cookie: %d\n", + desc->vd.tx.cookie); + for (i = 0; i < desc->sglen; i++) + atc_dump_lli(atchan, desc->sg[i].lli); } -/** - * atc_handle_cyclic - at the end of a period, run callback function - * @atchan: channel used for cyclic operations - */ -static void atc_handle_cyclic(struct at_dma_chan *atchan) +static void atdma_handle_chan_done(struct at_dma_chan *atchan, u32 pending, + unsigned int i) { - struct at_desc *first = atc_first_active(atchan); - struct dma_async_tx_descriptor *txd = &first->txd; + struct at_desc *desc; - dev_vdbg(chan2dev(&atchan->dma_chan), - "new cyclic period llp 0x%08x\n", - channel_readl(atchan, DSCR)); + spin_lock(&atchan->vc.lock); + desc = atchan->desc; - dmaengine_desc_get_callback_invoke(txd, NULL); -} + if (desc) { + if (pending & AT_DMA_ERR(i)) { + atc_handle_error(atchan, i); + /* Pretend the descriptor completed successfully */ + } -/*-- IRQ & Tasklet ---------------------------------------------------*/ - -static void atc_tasklet(struct tasklet_struct *t) -{ - struct at_dma_chan *atchan = from_tasklet(atchan, t, tasklet); - - if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) - return atc_handle_error(atchan); - - if (atc_chan_is_cyclic(atchan)) - return atc_handle_cyclic(atchan); - - atc_advance_work(atchan); + if (atc_chan_is_cyclic(atchan)) { + vchan_cyclic_callback(&desc->vd); + } else { + vchan_cookie_complete(&desc->vd); + atchan->desc = NULL; + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + } + spin_unlock(&atchan->vc.lock); } static irqreturn_t at_dma_interrupt(int irq, void *dev_id) @@ -1073,17 +832,10 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) for (i = 0; i < atdma->dma_device.chancnt; i++) { atchan = &atdma->chan[i]; - if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { - if (pending & AT_DMA_ERR(i)) { - /* Disable channel on AHB error */ - dma_writel(atdma, CHDR, - AT_DMA_RES(i) | atchan->mask); - /* Give information to tasklet */ - set_bit(ATC_IS_ERROR, &atchan->status); - } - tasklet_schedule(&atchan->tasklet); - ret = IRQ_HANDLED; - } + if (!(pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i)))) + continue; + atdma_handle_chan_done(atchan, pending, i); + ret = IRQ_HANDLED; } } while (pending); @@ -1091,35 +843,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) return ret; } - /*-- DMA Engine API --------------------------------------------------*/ - -/** - * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine - * @tx: descriptor at the head of the transaction chain - * - * Queue chain if DMA engine is working already - * - * Cookie increment and adding to active_list or queue must be atomic - */ -static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct at_desc *desc = txd_to_at_desc(tx); - struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); - dma_cookie_t cookie; - unsigned long flags; - - spin_lock_irqsave(&atchan->lock, flags); - cookie = dma_cookie_assign(tx); - - list_add_tail(&desc->desc_node, &atchan->queue); - spin_unlock_irqrestore(&atchan->lock, flags); - - dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", - desc->txd.cookie); - return cookie; -} - /** * atc_prep_dma_interleaved - prepare memory to memory interleaved operation * @chan: the channel to prepare operation on @@ -1131,9 +855,12 @@ atc_prep_dma_interleaved(struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct data_chunk *first; - struct at_desc *desc = NULL; + struct atdma_sg *atdma_sg; + struct at_desc *desc; + struct at_lli *lli; size_t xfer_count; unsigned int dwidth; u32 ctrla; @@ -1172,8 +899,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan, len += chunk->size; } - dwidth = atc_get_xfer_width(xt->src_start, - xt->dst_start, len); + dwidth = atc_get_xfer_width(xt->src_start, xt->dst_start, len); xfer_count = len >> dwidth; if (xfer_count > ATC_BTSIZE_MAX) { @@ -1190,32 +916,34 @@ atc_prep_dma_interleaved(struct dma_chan *chan, ATC_SRC_PIP | ATC_DST_PIP | FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); - /* create the transfer */ - desc = atc_desc_get(atchan); - if (!desc) { - dev_err(chan2dev(chan), - "%s: couldn't allocate our descriptor\n", __func__); + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = 1; + + atdma_sg = desc->sg; + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) { + kfree(desc); return NULL; } + lli = atdma_sg->lli; - desc->lli.saddr = xt->src_start; - desc->lli.daddr = xt->dst_start; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + lli->saddr = xt->src_start; + lli->daddr = xt->dst_start; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; desc->boundary = first->size >> dwidth; desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; - desc->txd.cookie = -EBUSY; - desc->total_len = desc->len = len; + atdma_sg->len = len; + desc->total_len = len; - /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(desc); - - desc->txd.flags = flags; /* client is in control of this ack */ - - return &desc->txd; + set_lli_eol(desc, 0); + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); } /** @@ -1230,25 +958,32 @@ static struct dma_async_tx_descriptor * atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc = NULL; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; size_t xfer_count; size_t offset; + size_t sg_len; unsigned int src_width; unsigned int dst_width; + unsigned int i; u32 ctrla; u32 ctrlb; - dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", - &dest, &src, len, flags); + dev_dbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", + &dest, &src, len, flags); if (unlikely(!len)) { - dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + dev_err(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); return NULL; } + sg_len = DIV_ROUND_UP(len, ATC_BTSIZE_MAX); + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | @@ -1263,50 +998,49 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) | FIELD_PREP(ATC_DST_WIDTH, dst_width); - for (offset = 0; offset < len; offset += xfer_count << src_width) { - xfer_count = min_t(size_t, (len - offset) >> src_width, - ATC_BTSIZE_MAX); + for (offset = 0, i = 0; offset < len; + offset += xfer_count << src_width, i++) { + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; - desc->lli.saddr = src + offset; - desc->lli.daddr = dest + offset; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + xfer_count = min_t(size_t, (len - offset) >> src_width, + ATC_BTSIZE_MAX); - desc->txd.cookie = 0; - desc->len = xfer_count << src_width; + lli->saddr = src + offset; + lli->daddr = dest + offset; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; - atc_desc_chain(&first, &prev, desc); + desc->sg[i].len = xfer_count << src_width; + + atdma_lli_chain(desc, i); } - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = len; + desc->total_len = len; /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(desc); + set_lli_eol(desc, i - 1); - first->txd.flags = flags; /* client is in control of this ack */ - - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); err_desc_get: - atc_desc_put(atchan, first); + atdma_desc_free(&desc->vd); return NULL; } -static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, - dma_addr_t psrc, - dma_addr_t pdst, - size_t len) +static int atdma_create_memset_lli(struct dma_chan *chan, + struct atdma_sg *atdma_sg, + dma_addr_t psrc, dma_addr_t pdst, size_t len) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc; + struct at_dma *atdma = to_at_dma(chan->device); + struct at_lli *lli; size_t xfer_count; - u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2); u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) | @@ -1315,27 +1049,24 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, xfer_count = len >> 2; if (xfer_count > ATC_BTSIZE_MAX) { - dev_err(chan2dev(chan), "%s: buffer is too big\n", - __func__); - return NULL; + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return -EINVAL; } - desc = atc_desc_get(atchan); - if (!desc) { - dev_err(chan2dev(chan), "%s: can't get a descriptor\n", - __func__); - return NULL; - } + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; - desc->lli.saddr = psrc; - desc->lli.daddr = pdst; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + lli->saddr = psrc; + lli->daddr = pdst; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; - desc->txd.cookie = 0; - desc->len = len; + atdma_sg->len = len; - return desc; + return 0; } /** @@ -1350,11 +1081,13 @@ static struct dma_async_tx_descriptor * atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); struct at_desc *desc; void __iomem *vaddr; dma_addr_t paddr; char fill_pattern; + int ret; dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__, &dest, value, len, flags); @@ -1385,27 +1118,28 @@ atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, (fill_pattern << 8) | fill_pattern; - desc = atc_create_memset_desc(chan, paddr, dest, len); - if (!desc) { - dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n", - __func__); + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) goto err_free_buffer; - } + desc->sglen = 1; + + ret = atdma_create_memset_lli(chan, desc->sg, paddr, dest, len); + if (ret) + goto err_free_desc; desc->memset_paddr = paddr; desc->memset_vaddr = vaddr; desc->memset_buffer = true; - desc->txd.cookie = -EBUSY; desc->total_len = len; /* set end-of-link on the descriptor */ - set_desc_eol(desc); + set_lli_eol(desc, 0); - desc->txd.flags = flags; - - return &desc->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); +err_free_desc: + kfree(desc); err_free_buffer: dma_pool_free(atdma->memset_pool, vaddr, paddr); return NULL; @@ -1419,12 +1153,13 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc = NULL, *first = NULL, *prev = NULL; + struct at_desc *desc; struct scatterlist *sg; void __iomem *vaddr; dma_addr_t paddr; size_t total_len = 0; int i; + int ret; dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__, value, sg_len, flags); @@ -1443,6 +1178,11 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, } *(u32*)vaddr = value; + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + goto err_free_dma_buf; + desc->sglen = sg_len; + for_each_sg(sgl, sg, sg_len, i) { dma_addr_t dest = sg_dma_address(sg); size_t len = sg_dma_len(sg); @@ -1453,38 +1193,33 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { dev_err(chan2dev(chan), "%s: buffer is not aligned\n", __func__); - goto err_put_desc; + goto err_free_desc; } - desc = atc_create_memset_desc(chan, paddr, dest, len); - if (!desc) - goto err_put_desc; - - atc_desc_chain(&first, &prev, desc); + ret = atdma_create_memset_lli(chan, &desc->sg[i], paddr, dest, + len); + if (ret) + goto err_free_desc; + atdma_lli_chain(desc, i); total_len += len; } - /* - * Only set the buffer pointers on the last descriptor to - * avoid free'ing while we have our transfer still going - */ desc->memset_paddr = paddr; desc->memset_vaddr = vaddr; desc->memset_buffer = true; - first->txd.cookie = -EBUSY; - first->total_len = total_len; + desc->total_len = total_len; /* set end-of-link on the descriptor */ - set_desc_eol(desc); + set_lli_eol(desc, i - 1); - first->txd.flags = flags; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); - return &first->txd; - -err_put_desc: - atc_desc_put(atchan, first); +err_free_desc: + atdma_desc_free(&desc->vd); +err_free_dma_buf: + dma_pool_free(atdma->memset_pool, vaddr, paddr); return NULL; } @@ -1502,11 +1237,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; struct dma_slave_config *sconfig = &atchan->dma_sconfig; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; + struct at_desc *desc; u32 ctrla; u32 ctrlb; dma_addr_t reg; @@ -1526,6 +1261,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, return NULL; } + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst); ctrlb = ATC_IEN; @@ -1542,13 +1282,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, FIELD_PREP(ATC_DIF, atchan->per_if); reg = sconfig->dst_addr; for_each_sg(sgl, sg, sg_len, i) { - struct at_desc *desc; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; u32 len; u32 mem; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; mem = sg_dma_address(sg); len = sg_dma_len(sg); @@ -1561,16 +1305,18 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; - desc->lli.saddr = mem; - desc->lli.daddr = reg; - desc->lli.ctrla = ctrla | - FIELD_PREP(ATC_SRC_WIDTH, mem_width) | - len >> mem_width; - desc->lli.ctrlb = ctrlb; - desc->len = len; + lli->saddr = mem; + lli->daddr = reg; + lli->ctrla = ctrla | + FIELD_PREP(ATC_SRC_WIDTH, mem_width) | + len >> mem_width; + lli->ctrlb = ctrlb; - atc_desc_chain(&first, &prev, desc); + atdma_sg->len = len; total_len += len; + + desc->sg[i].len = len; + atdma_lli_chain(desc, i); } break; case DMA_DEV_TO_MEM: @@ -1585,13 +1331,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = sconfig->src_addr; for_each_sg(sgl, sg, sg_len, i) { - struct at_desc *desc; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; u32 len; u32 mem; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; mem = sg_dma_address(sg); len = sg_dma_len(sg); @@ -1604,16 +1354,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; - desc->lli.saddr = reg; - desc->lli.daddr = mem; - desc->lli.ctrla = ctrla | - FIELD_PREP(ATC_DST_WIDTH, mem_width) | - len >> reg_width; - desc->lli.ctrlb = ctrlb; - desc->len = len; + lli->saddr = reg; + lli->daddr = mem; + lli->ctrla = ctrla | + FIELD_PREP(ATC_DST_WIDTH, mem_width) | + len >> reg_width; + lli->ctrlb = ctrlb; - atc_desc_chain(&first, &prev, desc); + desc->sg[i].len = len; total_len += len; + + atdma_lli_chain(desc, i); } break; default: @@ -1621,21 +1372,16 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(prev); + set_lli_eol(desc, i - 1); - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = total_len; + desc->total_len = total_len; - /* first link descriptor of list is responsible of flags */ - first->txd.flags = flags; /* client is in control of this ack */ - - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); err_desc_get: dev_err(chan2dev(chan), "not enough descriptors available\n"); err: - atc_desc_put(atchan, first); + atdma_desc_free(&desc->vd); return NULL; } @@ -1665,54 +1411,59 @@ err_out: */ static int atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, - unsigned int period_index, dma_addr_t buf_addr, + unsigned int i, dma_addr_t buf_addr, unsigned int reg_width, size_t period_len, enum dma_transfer_direction direction) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct dma_slave_config *sconfig = &atchan->dma_sconfig; - u32 ctrla; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; - /* prepare common CRTLA value */ - ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | - FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | - FIELD_PREP(ATC_DST_WIDTH, reg_width) | - FIELD_PREP(ATC_SRC_WIDTH, reg_width) | - period_len >> reg_width; + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_ATOMIC, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; switch (direction) { case DMA_MEM_TO_DEV: - desc->lli.saddr = buf_addr + (period_len * period_index); - desc->lli.daddr = sconfig->dst_addr; - desc->lli.ctrla = ctrla; - desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, - ATC_DST_ADDR_MODE_FIXED) | - FIELD_PREP(ATC_SRC_ADDR_MODE, - ATC_SRC_ADDR_MODE_INCR) | - FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | - FIELD_PREP(ATC_SIF, atchan->mem_if) | - FIELD_PREP(ATC_DIF, atchan->per_if); - desc->len = period_len; + lli->saddr = buf_addr + (period_len * i); + lli->daddr = sconfig->dst_addr; + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); + break; case DMA_DEV_TO_MEM: - desc->lli.saddr = sconfig->src_addr; - desc->lli.daddr = buf_addr + (period_len * period_index); - desc->lli.ctrla = ctrla; - desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, - ATC_DST_ADDR_MODE_INCR) | - FIELD_PREP(ATC_SRC_ADDR_MODE, - ATC_SRC_ADDR_MODE_FIXED) | - FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | - FIELD_PREP(ATC_SIF, atchan->per_if) | - FIELD_PREP(ATC_DIF, atchan->mem_if); - desc->len = period_len; + lli->saddr = sconfig->src_addr; + lli->daddr = buf_addr + (period_len * i); + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); break; default: return -EINVAL; } + lli->ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | + FIELD_PREP(ATC_DST_WIDTH, reg_width) | + FIELD_PREP(ATC_SRC_WIDTH, reg_width) | + period_len >> reg_width; + desc->sg[i].len = period_len; + return 0; } @@ -1733,8 +1484,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; struct dma_slave_config *sconfig = &atchan->dma_sconfig; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; + struct at_desc *desc; unsigned long was_cyclic; unsigned int reg_width; unsigned int periods = buf_len / period_len; @@ -1768,33 +1518,26 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len)) goto err_out; + desc = kzalloc(struct_size(desc, sg, periods), GFP_ATOMIC); + if (!desc) + goto err_out; + desc->sglen = periods; + /* build cyclic linked list */ for (i = 0; i < periods; i++) { - struct at_desc *desc; - - desc = atc_desc_get(atchan); - if (!desc) - goto err_desc_get; - if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr, reg_width, period_len, direction)) - goto err_desc_get; - - atc_desc_chain(&first, &prev, desc); + goto err_fill_desc; + atdma_lli_chain(desc, i); } - + desc->total_len = buf_len; /* lets make a cyclic list */ - prev->lli.dscr = first->txd.phys; + desc->sg[i - 1].lli->dscr = desc->sg[0].lli_phys; - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = buf_len; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); - return &first->txd; - -err_desc_get: - dev_err(chan2dev(chan), "not enough descriptors available\n"); - atc_desc_put(atchan, first); +err_fill_desc: + atdma_desc_free(&desc->vd); err_out: clear_bit(ATC_IS_CYCLIC, &atchan->status); return NULL; @@ -1823,17 +1566,17 @@ static int atc_pause(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); set_bit(ATC_IS_PAUSED, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); return 0; } @@ -1842,7 +1585,7 @@ static int atc_resume(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1850,12 +1593,12 @@ static int atc_resume(struct dma_chan *chan) if (!atc_chan_is_paused(atchan)) return 0; - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); clear_bit(ATC_IS_PAUSED, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); return 0; } @@ -1864,9 +1607,11 @@ static int atc_terminate_all(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; + LIST_HEAD(list); + dev_vdbg(chan2dev(chan), "%s\n", __func__); /* @@ -1875,7 +1620,7 @@ static int atc_terminate_all(struct dma_chan *chan) * channel. We still have to poll the channel enable bit due * to AHB/HSB limitations. */ - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); /* disabling channel: must also remove suspend state */ dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); @@ -1884,15 +1629,20 @@ static int atc_terminate_all(struct dma_chan *chan) while (dma_readl(atdma, CHSR) & atchan->mask) cpu_relax(); - /* active_list entries will end up before queued entries */ - list_splice_tail_init(&atchan->queue, &atchan->free_list); - list_splice_tail_init(&atchan->active_list, &atchan->free_list); + if (atchan->desc) { + vchan_terminate_vdesc(&atchan->desc->vd); + atchan->desc = NULL; + } + + vchan_get_all_descriptors(&atchan->vc, &list); clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + vchan_dma_desc_free_list(&atchan->vc, &list); return 0; } @@ -1922,9 +1672,10 @@ atc_tx_status(struct dma_chan *chan, if (dma_status == DMA_COMPLETE || !txstate) return dma_status; - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); + /* Get number of bytes left in the active transactions */ ret = atc_get_residue(chan, cookie, &residue); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); if (unlikely(ret < 0)) { dev_vdbg(chan2dev(chan), "get residual bytes error\n"); @@ -1939,27 +1690,17 @@ atc_tx_status(struct dma_chan *chan, return dma_status; } -/** - * atc_issue_pending - takes the first transaction descriptor in the pending - * queue and starts the transfer. - * @chan: target DMA channel - */ static void atc_issue_pending(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc; unsigned long flags; - dev_vdbg(chan2dev(chan), "issue_pending\n"); - - spin_lock_irqsave(&atchan->lock, flags); - if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue)) - return spin_unlock_irqrestore(&atchan->lock, flags); - - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); + if (vchan_issue_pending(&atchan->vc) && !atchan->desc) { + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + spin_unlock_irqrestore(&atchan->vc.lock, flags); } /** @@ -1972,9 +1713,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc; struct at_dma_slave *atslave; - int i; u32 cfg; dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); @@ -1985,11 +1724,6 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) return -EIO; } - if (!list_empty(&atchan->free_list)) { - dev_dbg(chan2dev(chan), "can't allocate channel resources (channel not freed from a previous use)\n"); - return -EIO; - } - cfg = ATC_DEFAULT_CFG; atslave = chan->private; @@ -2005,26 +1739,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) cfg = atslave->cfg; } - /* Allocate initial pool of descriptors */ - for (i = 0; i < init_nr_desc_per_channel; i++) { - desc = atc_alloc_descriptor(chan, GFP_KERNEL); - if (!desc) { - dev_err(atdma->dma_device.dev, - "Only %d initial descriptors\n", i); - break; - } - list_add_tail(&desc->desc_node, &atchan->free_list); - } - - dma_cookie_init(chan); - /* channel parameters */ channel_writel(atchan, CFG, cfg); - dev_dbg(chan2dev(chan), - "alloc_chan_resources: allocated %d descriptors\n", i); - - return i; + return 0; } /** @@ -2034,22 +1752,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) static void atc_free_chan_resources(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc, *_desc; - LIST_HEAD(list); - /* ASSERT: channel is idle */ - BUG_ON(!list_empty(&atchan->active_list)); - BUG_ON(!list_empty(&atchan->queue)); BUG_ON(atc_chan_is_enabled(atchan)); - list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { - dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); - list_del(&desc->desc_node); - /* free link descriptor */ - dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); - } - list_splice_init(&atchan->free_list, &list); + vchan_free_chan_resources(to_virt_chan(chan)); atchan->status = 0; /* @@ -2274,11 +1980,11 @@ static int __init at_dma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, atdma); /* create a pool of consistent memory blocks for hardware descriptors */ - atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", - &pdev->dev, sizeof(struct at_desc), - 4 /* word alignment */, 0); - if (!atdma->dma_desc_pool) { - dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + atdma->lli_pool = dma_pool_create("at_hdmac_lli_pool", + &pdev->dev, sizeof(struct at_lli), + 4 /* word alignment */, 0); + if (!atdma->lli_pool) { + dev_err(&pdev->dev, "Unable to allocate DMA LLI descriptor pool\n"); err = -ENOMEM; goto err_desc_pool_create; } @@ -2303,20 +2009,13 @@ static int __init at_dma_probe(struct platform_device *pdev) atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->dma_chan.device = &atdma->dma_device; - dma_cookie_init(&atchan->dma_chan); - list_add_tail(&atchan->dma_chan.device_node, - &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); - spin_lock_init(&atchan->lock); atchan->mask = 1 << i; - INIT_LIST_HEAD(&atchan->active_list); - INIT_LIST_HEAD(&atchan->queue); - INIT_LIST_HEAD(&atchan->free_list); - - tasklet_setup(&atchan->tasklet, atc_tasklet); + atchan->atdma = atdma; + atchan->vc.desc_free = atdma_desc_free; + vchan_init(&atchan->vc, &atdma->dma_device); atc_enable_chan_irq(atdma, i); } @@ -2390,7 +2089,7 @@ err_of_dma_controller_register: err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: - dma_pool_destroy(atdma->dma_desc_pool); + dma_pool_destroy(atdma->lli_pool); err_desc_pool_create: free_irq(platform_get_irq(pdev, 0), atdma); err_irq: @@ -2409,17 +2108,13 @@ static int at_dma_remove(struct platform_device *pdev) dma_async_device_unregister(&atdma->dma_device); dma_pool_destroy(atdma->memset_pool); - dma_pool_destroy(atdma->dma_desc_pool); + dma_pool_destroy(atdma->lli_pool); free_irq(platform_get_irq(pdev, 0), atdma); list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - /* Disable interrupts */ atc_disable_chan_irq(atdma, chan->chan_id); - - tasklet_kill(&atchan->tasklet); list_del(&chan->device_node); } @@ -2453,7 +2148,7 @@ static int at_dma_prepare(struct device *dev) static void atc_suspend_cyclic(struct at_dma_chan *atchan) { - struct dma_chan *chan = &atchan->dma_chan; + struct dma_chan *chan = &atchan->vc.chan; /* Channel should be paused by user * do it anyway even if it is not done already */ @@ -2494,7 +2189,7 @@ static int at_dma_suspend_noirq(struct device *dev) static void atc_resume_cyclic(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); /* restore channel status for cyclic descriptors list: * next descriptor in the cyclic list at the time of suspend */ @@ -2568,5 +2263,6 @@ module_exit(at_dma_exit); MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); MODULE_AUTHOR("Nicolas Ferre "); +MODULE_AUTHOR("Tudor Ambarus "); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:at_hdmac"); From 00fb05ff87bc63a3e9000e3f7c15c86951aca76d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 9 Nov 2022 21:05:54 +0100 Subject: [PATCH 1388/4122] usb: fotg2: add Gemini-specific handling The Cortina Systems Gemini has bolted on a PHY inside the silicon that can be handled by six bits in a MISC register in the system controller. If we are running on Gemini, look up a syscon regmap through a phandle and enable VBUS and optionally the Mini-B connector. If the device is flagged as "wakeup-source" using the standard DT bindings, we also enable this in the global controller for respective port. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221109200554.1957185-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/Kconfig | 1 + drivers/usb/fotg210/fotg210-core.c | 80 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig index 933c513b5728..534206ee0d1d 100644 --- a/drivers/usb/fotg210/Kconfig +++ b/drivers/usb/fotg210/Kconfig @@ -5,6 +5,7 @@ config USB_FOTG210 depends on USB || USB_GADGET depends on HAS_DMA && HAS_IOMEM default ARCH_GEMINI + select MFD_SYSCON help Faraday FOTG210 is a dual-mode USB controller that can act in both host controller and peripheral controller mode. diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c index 3d07ee46f6d1..8a54edf921ac 100644 --- a/drivers/usb/fotg210/fotg210-core.c +++ b/drivers/usb/fotg210/fotg210-core.c @@ -5,15 +5,86 @@ * whether to proceed with probing the host or the peripheral * driver. */ +#include #include +#include #include #include #include +#include #include #include #include "fotg210.h" +/* + * Gemini-specific initialization function, only executed on the + * Gemini SoC using the global misc control register. + * + * The gemini USB blocks are connected to either Mini-A (host mode) or + * Mini-B (peripheral mode) plugs. There is no role switch support on the + * Gemini SoC, just either-or. + */ +#define GEMINI_GLOBAL_MISC_CTRL 0x30 +#define GEMINI_MISC_USB0_WAKEUP BIT(14) +#define GEMINI_MISC_USB1_WAKEUP BIT(15) +#define GEMINI_MISC_USB0_VBUS_ON BIT(22) +#define GEMINI_MISC_USB1_VBUS_ON BIT(23) +#define GEMINI_MISC_USB0_MINI_B BIT(29) +#define GEMINI_MISC_USB1_MINI_B BIT(30) + +static int fotg210_gemini_init(struct device *dev, struct resource *res, + enum usb_dr_mode mode) +{ + struct device_node *np = dev->of_node; + struct regmap *map; + bool wakeup; + u32 mask, val; + int ret; + + map = syscon_regmap_lookup_by_phandle(np, "syscon"); + if (IS_ERR(map)) { + dev_err(dev, "no syscon\n"); + return PTR_ERR(map); + } + wakeup = of_property_read_bool(np, "wakeup-source"); + + /* + * Figure out if this is USB0 or USB1 by simply checking the + * physical base address. + */ + mask = 0; + if (res->start == 0x69000000) { + mask = GEMINI_MISC_USB1_VBUS_ON | GEMINI_MISC_USB1_MINI_B | + GEMINI_MISC_USB1_WAKEUP; + if (mode == USB_DR_MODE_HOST) + val = GEMINI_MISC_USB1_VBUS_ON; + else + val = GEMINI_MISC_USB1_MINI_B; + if (wakeup) + val |= GEMINI_MISC_USB1_WAKEUP; + } else { + mask = GEMINI_MISC_USB0_VBUS_ON | GEMINI_MISC_USB0_MINI_B | + GEMINI_MISC_USB0_WAKEUP; + if (mode == USB_DR_MODE_HOST) + val = GEMINI_MISC_USB0_VBUS_ON; + else + val = GEMINI_MISC_USB0_MINI_B; + if (wakeup) + val |= GEMINI_MISC_USB0_WAKEUP; + } + + ret = regmap_update_bits(map, GEMINI_GLOBAL_MISC_CTRL, mask, val); + if (ret) { + dev_err(dev, "failed to initialize Gemini PHY\n"); + return ret; + } + + dev_info(dev, "initialized Gemini PHY in %s mode\n", + (mode == USB_DR_MODE_HOST) ? "host" : "gadget"); + return 0; +} + static int fotg210_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -22,6 +93,15 @@ static int fotg210_probe(struct platform_device *pdev) mode = usb_get_dr_mode(dev); + if (of_device_is_compatible(dev->of_node, "cortina,gemini-usb")) { + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ret = fotg210_gemini_init(dev, res, mode); + if (ret) + return ret; + } + if (mode == USB_DR_MODE_PERIPHERAL) ret = fotg210_udc_probe(pdev); else From 46ed6026ca2181c917c8334a82e3eaf40a6234dd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Nov 2022 10:03:17 +0100 Subject: [PATCH 1389/4122] usb: fotg210-udc: Fix ages old endianness issues The code in the FOTG210 driver isn't entirely endianness-agnostic as reported by the kernel robot sparse testing. This came to the surface while moving the files around. The driver is only used on little-endian systems, so this causes no real-world regression, but it is nice to be strict and have some compile coverage also on big endian machines, so fix it up with the right LE accessors. Fixes: b84a8dee23fd ("usb: gadget: add Faraday fotg210_udc driver") Reported-by: kernel test robot Link: https://lore.kernel.org/linux-usb/202211110910.0dJ7nZCn-lkp@intel.com/ Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221111090317.94228-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 7757aaa11d6f..3c357ce42d3b 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -630,10 +630,10 @@ static void fotg210_request_error(struct fotg210_udc *fotg210) static void fotg210_set_address(struct fotg210_udc *fotg210, struct usb_ctrlrequest *ctrl) { - if (ctrl->wValue >= 0x0100) { + if (le16_to_cpu(ctrl->wValue) >= 0x0100) { fotg210_request_error(fotg210); } else { - fotg210_set_dev_addr(fotg210, ctrl->wValue); + fotg210_set_dev_addr(fotg210, le16_to_cpu(ctrl->wValue)); fotg210_set_cxdone(fotg210); } } @@ -714,17 +714,17 @@ static void fotg210_get_status(struct fotg210_udc *fotg210, switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - fotg210->ep0_data = 1 << USB_DEVICE_SELF_POWERED; + fotg210->ep0_data = cpu_to_le16(1 << USB_DEVICE_SELF_POWERED); break; case USB_RECIP_INTERFACE: - fotg210->ep0_data = 0; + fotg210->ep0_data = cpu_to_le16(0); break; case USB_RECIP_ENDPOINT: epnum = ctrl->wIndex & USB_ENDPOINT_NUMBER_MASK; if (epnum) fotg210->ep0_data = - fotg210_is_epnstall(fotg210->ep[epnum]) - << USB_ENDPOINT_HALT; + cpu_to_le16(fotg210_is_epnstall(fotg210->ep[epnum]) + << USB_ENDPOINT_HALT); else fotg210_request_error(fotg210); break; From 8836402d4b208b2211fc60538ff45d6bb3b73a64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 10 Nov 2022 18:54:35 +0100 Subject: [PATCH 1390/4122] usb: Check !irq instead of irq == NO_IRQ NO_IRQ is a relic from the old days. It is not used anymore in core functions. By the way, function irq_of_parse_and_map() returns value 0 on error. In some drivers, NO_IRQ is erroneously used to check the return of irq_of_parse_and_map(). It is not a real bug today because the only architectures using the drivers being fixed by this patch define NO_IRQ as 0, but there are architectures which define NO_IRQ as -1. If one day those architectures start using the non fixed drivers, there will be a problem. Long time ago Linus advocated for not using NO_IRQ, see https://lkml.org/lkml/2005/11/21/221 . He re-iterated the same view recently in https://lkml.org/lkml/2022/10/12/622 So test !irq instead of tesing irq == NO_IRQ. Signed-off-by: Christophe Leroy Acked-by: Alan Stern Link: https://lore.kernel.org/r/13feefdf6b240817944e6441e26a8ddc1d81ced1.1668102802.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-grlib.c | 2 +- drivers/usb/host/ehci-ppc-of.c | 2 +- drivers/usb/host/fhci-hcd.c | 2 +- drivers/usb/host/ohci-ppc-of.c | 2 +- drivers/usb/host/uhci-grlib.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/ehci-grlib.c b/drivers/usb/host/ehci-grlib.c index a2c3b4ec8a8b..0717f2ccf49d 100644 --- a/drivers/usb/host/ehci-grlib.c +++ b/drivers/usb/host/ehci-grlib.c @@ -99,7 +99,7 @@ static int ehci_hcd_grlib_probe(struct platform_device *op) hcd->rsrc_len = resource_size(&res); irq = irq_of_parse_and_map(dn, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n", __FILE__); rv = -EBUSY; diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c index 28a19693c19f..62a0a193798c 100644 --- a/drivers/usb/host/ehci-ppc-of.c +++ b/drivers/usb/host/ehci-ppc-of.c @@ -119,7 +119,7 @@ static int ehci_hcd_ppc_of_probe(struct platform_device *op) hcd->rsrc_len = resource_size(&res); irq = irq_of_parse_and_map(dn, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n", __FILE__); rv = -EBUSY; diff --git a/drivers/usb/host/fhci-hcd.c b/drivers/usb/host/fhci-hcd.c index 95a44462bed0..64a64140c2fd 100644 --- a/drivers/usb/host/fhci-hcd.c +++ b/drivers/usb/host/fhci-hcd.c @@ -676,7 +676,7 @@ static int of_fhci_probe(struct platform_device *ofdev) /* USB Host interrupt. */ usb_irq = irq_of_parse_and_map(node, 0); - if (usb_irq == NO_IRQ) { + if (!usb_irq) { dev_err(dev, "could not get usb irq\n"); ret = -EINVAL; goto err_usb_irq; diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c index 591f675cc930..f2f6c832ec98 100644 --- a/drivers/usb/host/ohci-ppc-of.c +++ b/drivers/usb/host/ohci-ppc-of.c @@ -120,7 +120,7 @@ static int ohci_hcd_ppc_of_probe(struct platform_device *op) } irq = irq_of_parse_and_map(dn, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n", __FILE__); rv = -EBUSY; diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c index 3ef6d52839e5..907d5f01edfd 100644 --- a/drivers/usb/host/uhci-grlib.c +++ b/drivers/usb/host/uhci-grlib.c @@ -116,7 +116,7 @@ static int uhci_hcd_grlib_probe(struct platform_device *op) hcd->rsrc_len = resource_size(&res); irq = irq_of_parse_and_map(dn, 0); - if (irq == NO_IRQ) { + if (!irq) { printk(KERN_ERR "%s: irq_of_parse_and_map failed\n", __FILE__); rv = -EBUSY; goto err_usb; From dd65a243a915ca319ed5fee9161a168c836fa2f2 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 10 Nov 2022 12:47:38 -0700 Subject: [PATCH 1391/4122] usb/usbip: Fix v_recv_cmd_submit() to use PIPE_BULK define Fix v_recv_cmd_submit() to use PIPE_BULK define instead of hard coded values. This also fixes the following signed integer overflow error reported by cppcheck. This is not an issue since pipe is unsigned int. However, this change improves the code to use proper define. drivers/usb/usbip/vudc_rx.c:152:26: error: Signed integer overflow for expression '3<<30'. [integerOverflow] urb_p->urb->pipe &= ~(3 << 30); In addition, add a build time check for PIPE_BULK != 3 as the code path depends on PIPE_BULK = 3. Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20221110194738.38514-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c index d4a2f30a7580..51bb70837b90 100644 --- a/drivers/usb/usbip/vudc_rx.c +++ b/drivers/usb/usbip/vudc_rx.c @@ -149,7 +149,9 @@ static int v_recv_cmd_submit(struct vudc *udc, urb_p->urb->status = -EINPROGRESS; /* FIXME: more pipe setup to please usbip_common */ - urb_p->urb->pipe &= ~(3 << 30); + BUILD_BUG_ON_MSG(PIPE_BULK != 3, "PIPE_* doesn't range from 0 to 3"); + + urb_p->urb->pipe &= ~(PIPE_BULK << 30); switch (urb_p->ep->type) { case USB_ENDPOINT_XFER_BULK: urb_p->urb->pipe |= (PIPE_BULK << 30); From 282a4b71816b6076029017a7bab3a9dcee12a920 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Sun, 30 Oct 2022 11:42:09 +0200 Subject: [PATCH 1392/4122] char: xillybus: Prevent use-after-free due to race condition The driver for XillyUSB devices maintains a kref reference count on each xillyusb_dev structure, which represents a physical device. This reference count reaches zero when the device has been disconnected and there are no open file descriptors that are related to the device. When this occurs, kref_put() calls cleanup_dev(), which clears up the device's data, including the structure itself. However, when xillyusb_open() is called, this reference count becomes tricky: This function needs to obtain the xillyusb_dev structure that relates to the inode's major and minor (as there can be several such). xillybus_find_inode() (which is defined in xillybus_class.c) is called for this purpose. xillybus_find_inode() holds a mutex that is global in xillybus_class.c to protect the list of devices, and releases this mutex before returning. As a result, nothing protects the xillyusb_dev's reference counter from being decremented to zero before xillyusb_open() increments it on its own behalf. Hence the structure can be freed due to a rare race condition. To solve this, a mutex is added. It is locked by xillyusb_open() before the call to xillybus_find_inode() and is released only after the kref counter has been incremented on behalf of the newly opened inode. This protects the kref reference counters of all xillyusb_dev structs from being decremented by xillyusb_disconnect() during this time segment, as the call to kref_put() in this function is done with the same lock held. There is no need to hold the lock on other calls to kref_put(), because if xillybus_find_inode() finds a struct, xillyusb_disconnect() has not made the call to remove it, and hence not made its call to kref_put(), which takes place afterwards. Hence preventing xillyusb_disconnect's call to kref_put() is enough to ensure that the reference doesn't reach zero before it's incremented by xillyusb_open(). It would have been more natural to increment the reference count in xillybus_find_inode() of course, however this function is also called by Xillybus' driver for PCIe / OF, which registers a completely different structure. Therefore, xillybus_find_inode() treats these structures as void pointers, and accordingly can't make any changes. Reported-by: Hyunwoo Kim Suggested-by: Alan Stern Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20221030094209.65916-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 39bcbfd908b4..5a5afa14ca8c 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -184,6 +184,14 @@ struct xillyusb_dev { struct mutex process_in_mutex; /* synchronize wakeup_all() */ }; +/* + * kref_mutex is used in xillyusb_open() to prevent the xillyusb_dev + * struct from being freed during the gap between being found by + * xillybus_find_inode() and having its reference count incremented. + */ + +static DEFINE_MUTEX(kref_mutex); + /* FPGA to host opcodes */ enum { OPCODE_DATA = 0, @@ -1237,9 +1245,16 @@ static int xillyusb_open(struct inode *inode, struct file *filp) int rc; int index; + mutex_lock(&kref_mutex); + rc = xillybus_find_inode(inode, (void **)&xdev, &index); - if (rc) + if (rc) { + mutex_unlock(&kref_mutex); return rc; + } + + kref_get(&xdev->kref); + mutex_unlock(&kref_mutex); chan = &xdev->channels[index]; filp->private_data = chan; @@ -1275,8 +1290,6 @@ static int xillyusb_open(struct inode *inode, struct file *filp) ((filp->f_mode & FMODE_WRITE) && chan->open_for_write)) goto unmutex_fail; - kref_get(&xdev->kref); - if (filp->f_mode & FMODE_READ) chan->open_for_read = 1; @@ -1413,6 +1426,7 @@ unopen: return rc; unmutex_fail: + kref_put(&xdev->kref, cleanup_dev); mutex_unlock(&chan->lock); return rc; } @@ -2227,7 +2241,9 @@ static void xillyusb_disconnect(struct usb_interface *interface) xdev->dev = NULL; + mutex_lock(&kref_mutex); kref_put(&xdev->kref, cleanup_dev); + mutex_unlock(&kref_mutex); } static struct usb_driver xillyusb_driver = { From 0266a177631d4c6b963b5b12dd986a8c5abdbf06 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 3 Nov 2022 12:16:30 -0700 Subject: [PATCH 1393/4122] RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter Add a RDMA VF driver for Microsoft Azure Network Adapter (MANA). Co-developed-by: Ajay Sharma Signed-off-by: Ajay Sharma Reviewed-by: Dexuan Cui Signed-off-by: Long Li Link: https://lore.kernel.org/r/1667502990-2559-13-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 9 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/hw/Makefile | 1 + drivers/infiniband/hw/mana/Kconfig | 10 + drivers/infiniband/hw/mana/Makefile | 4 + drivers/infiniband/hw/mana/cq.c | 79 ++++ drivers/infiniband/hw/mana/device.c | 117 ++++++ drivers/infiniband/hw/mana/main.c | 521 ++++++++++++++++++++++++ drivers/infiniband/hw/mana/mana_ib.h | 162 ++++++++ drivers/infiniband/hw/mana/mr.c | 198 +++++++++ drivers/infiniband/hw/mana/qp.c | 506 +++++++++++++++++++++++ drivers/infiniband/hw/mana/wq.c | 115 ++++++ include/net/mana/mana.h | 3 + include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + include/uapi/rdma/mana-abi.h | 66 +++ 15 files changed, 1793 insertions(+) create mode 100644 drivers/infiniband/hw/mana/Kconfig create mode 100644 drivers/infiniband/hw/mana/Makefile create mode 100644 drivers/infiniband/hw/mana/cq.c create mode 100644 drivers/infiniband/hw/mana/device.c create mode 100644 drivers/infiniband/hw/mana/main.c create mode 100644 drivers/infiniband/hw/mana/mana_ib.h create mode 100644 drivers/infiniband/hw/mana/mr.c create mode 100644 drivers/infiniband/hw/mana/qp.c create mode 100644 drivers/infiniband/hw/mana/wq.c create mode 100644 include/uapi/rdma/mana-abi.h diff --git a/MAINTAINERS b/MAINTAINERS index 441a65d41eb4..4db8e4e02c05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13669,6 +13669,15 @@ F: drivers/scsi/smartpqi/smartpqi*.[ch] F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h +MICROSOFT MANA RDMA DRIVER +M: Long Li +M: Ajay Sharma +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/mana/ +F: include/net/mana +F: include/uapi/rdma/mana-abi.h + MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH M: Maximilian Luz L: platform-driver-x86@vger.kernel.org diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index aa36ac618e72..ccc874478f0b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,6 +85,7 @@ source "drivers/infiniband/hw/erdma/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" source "drivers/infiniband/hw/irdma/Kconfig" +source "drivers/infiniband/hw/mana/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/mthca/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 6b3a88046125..1211f4317a9f 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += qib/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/ +obj-$(CONFIG_MANA_INFINIBAND) += mana/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig new file mode 100644 index 000000000000..546640657bac --- /dev/null +++ b/drivers/infiniband/hw/mana/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config MANA_INFINIBAND + tristate "Microsoft Azure Network Adapter support" + depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA + help + This driver provides low-level RDMA support for Microsoft Azure + Network Adapter (MANA). MANA supports RDMA features that can be used + for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly + access hardware from user-mode processes in Microsoft Azure cloud + environment. diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile new file mode 100644 index 000000000000..88655fe5e398 --- /dev/null +++ b/drivers/infiniband/hw/mana/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o + +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c new file mode 100644 index 000000000000..d141cab8a1e6 --- /dev/null +++ b/drivers/infiniband/hw/mana/cq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct ib_device *ibdev = ibcq->device; + struct mana_ib_create_cq ucmd = {}; + struct mana_ib_dev *mdev; + int err; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + if (udata->inlen < sizeof(ucmd)) + return -EINVAL; + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy from udata for create cq, %d\n", err); + return err; + } + + if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } + + cq->cqe = attr->cqe; + cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->umem)) { + err = PTR_ERR(cq->umem); + ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n", + err); + return err; + } + + err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region); + if (err) { + ibdev_dbg(ibdev, + "Failed to create dma region for create cq, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(ibdev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, cq->gdma_region); + + /* + * The CQ ID is not known at this time. The ID is generated at create_qp + */ + + return 0; + +err_release_umem: + ib_umem_release(cq->umem); + return err; +} + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct ib_device *ibdev = ibcq->device; + struct mana_ib_dev *mdev; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); + ib_umem_release(cq->umem); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c new file mode 100644 index 000000000000..d4541b8707e4 --- /dev/null +++ b/drivers/infiniband/hw/mana/device.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" +#include + +MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(NET_MANA); + +static const struct ib_device_ops mana_ib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MANA, + .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION, + + .alloc_pd = mana_ib_alloc_pd, + .alloc_ucontext = mana_ib_alloc_ucontext, + .create_cq = mana_ib_create_cq, + .create_qp = mana_ib_create_qp, + .create_rwq_ind_table = mana_ib_create_rwq_ind_table, + .create_wq = mana_ib_create_wq, + .dealloc_pd = mana_ib_dealloc_pd, + .dealloc_ucontext = mana_ib_dealloc_ucontext, + .dereg_mr = mana_ib_dereg_mr, + .destroy_cq = mana_ib_destroy_cq, + .destroy_qp = mana_ib_destroy_qp, + .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, + .destroy_wq = mana_ib_destroy_wq, + .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_port_immutable = mana_ib_get_port_immutable, + .mmap = mana_ib_mmap, + .modify_qp = mana_ib_modify_qp, + .modify_wq = mana_ib_modify_wq, + .query_device = mana_ib_query_device, + .query_gid = mana_ib_query_gid, + .query_port = mana_ib_query_port, + .reg_user_mr = mana_ib_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext), + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table, + ib_ind_table), +}; + +static int mana_ib_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mana_adev *madev = container_of(adev, struct mana_adev, adev); + struct gdma_dev *mdev = madev->mdev; + struct mana_context *mc; + struct mana_ib_dev *dev; + int ret; + + mc = mdev->driver_data; + + dev = ib_alloc_device(mana_ib_dev, ib_dev); + if (!dev) + return -ENOMEM; + + ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops); + + dev->ib_dev.phys_port_cnt = mc->num_ports; + + ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev, + mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); + + dev->gdma_dev = mdev; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + + /* + * num_comp_vectors needs to set to the max MSIX index + * when interrupts and event queues are implemented + */ + dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.dev.parent = mdev->gdma_context->dev; + + ret = ib_register_device(&dev->ib_dev, "mana_%d", + mdev->gdma_context->dev); + if (ret) { + ib_dealloc_device(&dev->ib_dev); + return ret; + } + + dev_set_drvdata(&adev->dev, dev); + + return 0; +} + +static void mana_ib_remove(struct auxiliary_device *adev) +{ + struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); + + ib_unregister_device(&dev->ib_dev); + ib_dealloc_device(&dev->ib_dev); +} + +static const struct auxiliary_device_id mana_id_table[] = { + { + .name = "mana.rdma", + }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mana_id_table); + +static struct auxiliary_driver mana_driver = { + .name = "rdma", + .probe = mana_ib_probe, + .remove = mana_ib_remove, + .id_table = mana_id_table, +}; + +module_auxiliary_driver(mana_driver); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c new file mode 100644 index 000000000000..8b3bc302d6f3 --- /dev/null +++ b/drivers/infiniband/hw/mana/main.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, + u32 port) +{ + struct gdma_dev *gd = dev->gdma_dev; + struct mana_port_context *mpc; + struct net_device *ndev; + struct mana_context *mc; + + mc = gd->driver_data; + ndev = mc->ports[port]; + mpc = netdev_priv(ndev); + + mutex_lock(&pd->vport_mutex); + + pd->vport_use_count--; + WARN_ON(pd->vport_use_count < 0); + + if (!pd->vport_use_count) + mana_uncfg_vport(mpc); + + mutex_unlock(&pd->vport_mutex); +} + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd, + u32 doorbell_id) +{ + struct gdma_dev *mdev = dev->gdma_dev; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + int err; + + mc = mdev->driver_data; + ndev = mc->ports[port]; + mpc = netdev_priv(ndev); + + mutex_lock(&pd->vport_mutex); + + pd->vport_use_count++; + if (pd->vport_use_count > 1) { + ibdev_dbg(&dev->ib_dev, + "Skip as this PD is already configured vport\n"); + mutex_unlock(&pd->vport_mutex); + return 0; + } + + err = mana_cfg_vport(mpc, pd->pdn, doorbell_id); + if (err) { + pd->vport_use_count--; + mutex_unlock(&pd->vport_mutex); + + ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err); + return err; + } + + mutex_unlock(&pd->vport_mutex); + + pd->tx_shortform_allowed = mpc->tx_shortform_allowed; + pd->tx_vp_offset = mpc->tx_vp_offset; + + ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n", + mpc->port_handle, pd->pdn, doorbell_id); + + return 0; +} + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct ib_device *ibdev = ibpd->device; + struct gdma_create_pd_resp resp = {}; + struct gdma_create_pd_req req = {}; + enum gdma_pd_flags flags = 0; + struct mana_ib_dev *dev; + struct gdma_dev *mdev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + mdev = dev->gdma_dev; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), + sizeof(resp)); + + req.flags = flags; + err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, + sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to get pd_id err %d status %u\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + pd->pd_handle = resp.pd_handle; + pd->pdn = resp.pd_id; + ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n", + pd->pd_handle, pd->pdn); + + mutex_init(&pd->vport_mutex); + pd->vport_use_count = 0; + return 0; +} + +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct ib_device *ibdev = ibpd->device; + struct gdma_destory_pd_resp resp = {}; + struct gdma_destroy_pd_req req = {}; + struct mana_ib_dev *dev; + struct gdma_dev *mdev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + mdev = dev->gdma_dev; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req), + sizeof(resp)); + + req.pd_handle = pd->pd_handle; + err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, + sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to destroy pd_handle 0x%llx err %d status %u", + pd->pd_handle, err, resp.hdr.status); + if (!err) + err = -EPROTO; + } + + return err; +} + +static int mana_gd_destroy_doorbell_page(struct gdma_context *gc, + int doorbell_page) +{ + struct gdma_destroy_resource_range_req req = {}; + struct gdma_resp_hdr resp = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE, + sizeof(req), sizeof(resp)); + + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; + req.num_resources = 1; + req.allocated_resources = doorbell_page; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.status) { + dev_err(gc->dev, + "Failed to destroy doorbell page: ret %d, 0x%x\n", + err, resp.status); + return err ?: -EPROTO; + } + + return 0; +} + +static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, + int *doorbell_page) +{ + struct gdma_allocate_resource_range_req req = {}; + struct gdma_allocate_resource_range_resp resp = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE, + sizeof(req), sizeof(resp)); + + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; + req.num_resources = 1; + req.alignment = 1; + + /* Have GDMA start searching from 0 */ + req.allocated_resources = 0; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, + "Failed to allocate doorbell page: ret %d, 0x%x\n", + err, resp.hdr.status); + return err ?: -EPROTO; + } + + *doorbell_page = resp.allocated_resources; + + return 0; +} + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, + struct ib_udata *udata) +{ + struct mana_ib_ucontext *ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + struct gdma_dev *dev; + int doorbell_page; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + dev = mdev->gdma_dev; + gc = dev->gdma_context; + + /* Allocate a doorbell page index */ + ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page); + if (ret) { + ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret); + return ret; + } + + ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page); + + ucontext->doorbell = doorbell_page; + + return 0; +} + +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mana_ib_ucontext *mana_ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell); + if (ret) + ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); +} + +static int +mana_ib_gd_first_dma_region(struct mana_ib_dev *dev, + struct gdma_context *gc, + struct gdma_create_dma_region_req *create_req, + size_t num_pages, mana_handle_t *gdma_region) +{ + struct gdma_create_dma_region_resp create_resp = {}; + unsigned int create_req_msg_size; + int err; + + create_req_msg_size = + struct_size(create_req, page_addr_list, num_pages); + create_req->page_addr_list_len = num_pages; + + err = mana_gd_send_request(gc, create_req_msg_size, create_req, + sizeof(create_resp), &create_resp); + if (err || create_resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to create DMA region: %d, 0x%x\n", + err, create_resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + *gdma_region = create_resp.dma_region_handle; + ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n", + *gdma_region); + + return 0; +} + +static int +mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc, + struct gdma_dma_region_add_pages_req *add_req, + unsigned int num_pages, u32 expected_status) +{ + unsigned int add_req_msg_size = + struct_size(add_req, page_addr_list, num_pages); + struct gdma_general_resp add_resp = {}; + int err; + + mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES, + add_req_msg_size, sizeof(add_resp)); + add_req->page_addr_list_len = num_pages; + + err = mana_gd_send_request(gc, add_req_msg_size, add_req, + sizeof(add_resp), &add_resp); + if (err || add_resp.hdr.status != expected_status) { + ibdev_dbg(&dev->ib_dev, + "Failed to create DMA region: %d, 0x%x\n", + err, add_resp.hdr.status); + + if (!err) + err = -EPROTO; + + return err; + } + + return 0; +} + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, + mana_handle_t *gdma_region) +{ + struct gdma_dma_region_add_pages_req *add_req = NULL; + size_t num_pages_processed = 0, num_pages_to_handle; + struct gdma_create_dma_region_req *create_req; + unsigned int create_req_msg_size; + struct hw_channel_context *hwc; + struct ib_block_iter biter; + size_t max_pgs_add_cmd = 0; + size_t max_pgs_create_cmd; + struct gdma_context *gc; + size_t num_pages_total; + struct gdma_dev *mdev; + unsigned long page_sz; + unsigned int tail = 0; + u64 *page_addr_list; + void *request_buf; + int err; + + mdev = dev->gdma_dev; + gc = mdev->gdma_context; + hwc = gc->hwc.driver_data; + + /* Hardware requires dma region to align to chosen page size */ + page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0); + if (!page_sz) { + ibdev_dbg(&dev->ib_dev, "failed to find page size.\n"); + return -ENOMEM; + } + num_pages_total = ib_umem_num_dma_blocks(umem, page_sz); + + max_pgs_create_cmd = + (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64); + num_pages_to_handle = + min_t(size_t, num_pages_total, max_pgs_create_cmd); + create_req_msg_size = + struct_size(create_req, page_addr_list, num_pages_to_handle); + + request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL); + if (!request_buf) + return -ENOMEM; + + create_req = request_buf; + mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION, + create_req_msg_size, + sizeof(struct gdma_create_dma_region_resp)); + + create_req->length = umem->length; + create_req->offset_in_page = umem->address & (page_sz - 1); + create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->page_count = num_pages_total; + + ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n", + umem->length, num_pages_total); + + ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n", + page_sz, create_req->offset_in_page); + + ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u", + num_pages_to_handle, create_req->gdma_page_type); + + page_addr_list = create_req->page_addr_list; + rdma_umem_for_each_dma_block(umem, &biter, page_sz) { + page_addr_list[tail++] = rdma_block_iter_dma_address(&biter); + if (tail < num_pages_to_handle) + continue; + + if (!num_pages_processed) { + /* First create message */ + err = mana_ib_gd_first_dma_region(dev, gc, create_req, + tail, gdma_region); + if (err) + goto out; + + max_pgs_add_cmd = (hwc->max_req_msg_size - + sizeof(*add_req)) / sizeof(u64); + + add_req = request_buf; + add_req->dma_region_handle = *gdma_region; + add_req->reserved3 = 0; + page_addr_list = add_req->page_addr_list; + } else { + /* Subsequent create messages */ + u32 expected_s = 0; + + if (num_pages_processed + num_pages_to_handle < + num_pages_total) + expected_s = GDMA_STATUS_MORE_ENTRIES; + + err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail, + expected_s); + if (err) + break; + } + + num_pages_processed += tail; + tail = 0; + + /* The remaining pages to create */ + num_pages_to_handle = + min_t(size_t, + num_pages_total - num_pages_processed, + max_pgs_add_cmd); + } + + if (err) + mana_ib_gd_destroy_dma_region(dev, *gdma_region); + +out: + kfree(request_buf); + return err; +} + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region) +{ + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + + gc = mdev->gdma_context; + ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region); + + return mana_gd_destroy_dma_region(gc, gdma_region); +} + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct mana_ib_ucontext *mana_ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + phys_addr_t pfn; + pgprot_t prot; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + if (vma->vm_pgoff != 0) { + ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff); + return -EINVAL; + } + + /* Map to the page indexed by ucontext->doorbell */ + pfn = (gc->phys_db_page_base + + gc->db_page_size * mana_ucontext->doorbell) >> + PAGE_SHIFT; + prot = pgprot_writecombine(vma->vm_page_prot); + + ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + NULL); + if (ret) + ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); + else + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", + pfn, gc->db_page_size, ret); + + return ret; +} + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, + struct ib_port_immutable *immutable) +{ + /* + * This version only support RAW_PACKET + * other values need to be filled for other types + */ + immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + + return 0; +} + +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + props->max_qp = MANA_MAX_NUM_QUEUES; + props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE; + + /* + * max_cqe could be potentially much bigger. + * As this version of driver only support RAW QP, set it to the same + * value as max_qp_wr + */ + props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE; + + props->max_mr_size = MANA_IB_MAX_MR_SIZE; + props->max_mr = MANA_IB_MAX_MR; + props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES; + props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES; + + return 0; +} + +int mana_ib_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *props) +{ + /* This version doesn't return port properties */ + return 0; +} + +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, + union ib_gid *gid) +{ + /* This version doesn't return GID properties */ + return 0; +} + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h new file mode 100644 index 000000000000..502cc8672eef --- /dev/null +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_IB_H_ +#define _MANA_IB_H_ + +#include +#include +#include +#include +#include + +#include + +#define PAGE_SZ_BM \ + (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \ + SZ_512K | SZ_1M | SZ_2M) + +/* MANA doesn't have any limit for MR size */ +#define MANA_IB_MAX_MR_SIZE U64_MAX + +/* + * The hardware limit of number of MRs is greater than maximum number of MRs + * that can possibly represent in 24 bits + */ +#define MANA_IB_MAX_MR 0xFFFFFFu + +struct mana_ib_dev { + struct ib_device ib_dev; + struct gdma_dev *gdma_dev; +}; + +struct mana_ib_wq { + struct ib_wq ibwq; + struct ib_umem *umem; + int wqe; + u32 wq_buf_size; + u64 gdma_region; + u64 id; + mana_handle_t rx_object; +}; + +struct mana_ib_pd { + struct ib_pd ibpd; + u32 pdn; + mana_handle_t pd_handle; + + /* Mutex for sharing access to vport_use_count */ + struct mutex vport_mutex; + int vport_use_count; + + bool tx_shortform_allowed; + u32 tx_vp_offset; +}; + +struct mana_ib_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + mana_handle_t mr_handle; +}; + +struct mana_ib_cq { + struct ib_cq ibcq; + struct ib_umem *umem; + int cqe; + u64 gdma_region; + u64 id; +}; + +struct mana_ib_qp { + struct ib_qp ibqp; + + /* Work queue info */ + struct ib_umem *sq_umem; + int sqe; + u64 sq_gdma_region; + u64 sq_id; + mana_handle_t tx_object; + + /* The port on the IB device, starting with 1 */ + u32 port; +}; + +struct mana_ib_ucontext { + struct ib_ucontext ibucontext; + u32 doorbell; +}; + +struct mana_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_ind_table; +}; + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, + mana_handle_t *gdma_region); + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, + mana_handle_t gdma_region); + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata); + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl); + +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags); + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata); + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); + +int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id, + struct mana_ib_pd *pd, u32 doorbell_id); +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, + u32 port); + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, + struct ib_udata *udata); +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma); + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, + struct ib_port_immutable *immutable); +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); +int mana_ib_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *props); +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, + union ib_gid *gid); + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); + +#endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c new file mode 100644 index 000000000000..a56236cdd9ee --- /dev/null +++ b/drivers/infiniband/hw/mana/mr.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +#define VALID_MR_FLAGS \ + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) + +static enum gdma_mr_access_flags +mana_ib_verbs_to_gdma_access_flags(int access_flags) +{ + enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ; + + if (access_flags & IB_ACCESS_LOCAL_WRITE) + flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_WRITE) + flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + flags |= GDMA_ACCESS_FLAG_REMOTE_READ; + + return flags; +} + +static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, + struct gdma_create_mr_params *mr_params) +{ + struct gdma_create_mr_response resp = {}; + struct gdma_create_mr_request req = {}; + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + int err; + + gc = mdev->gdma_context; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req), + sizeof(resp)); + req.pd_handle = mr_params->pd_handle; + req.mr_type = mr_params->mr_type; + + switch (mr_params->mr_type) { + case GDMA_MR_TYPE_GVA: + req.gva.dma_region_handle = mr_params->gva.dma_region_handle; + req.gva.virtual_address = mr_params->gva.virtual_address; + req.gva.access_flags = mr_params->gva.access_flags; + break; + + default: + ibdev_dbg(&dev->ib_dev, + "invalid param (GDMA_MR_TYPE) passed, type %d\n", + req.mr_type); + return -EINVAL; + } + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + mr->ibmr.lkey = resp.lkey; + mr->ibmr.rkey = resp.rkey; + mr->mr_handle = resp.mr_handle; + + return 0; +} + +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, + gdma_obj_handle_t mr_handle) +{ + struct gdma_destroy_mr_response resp = {}; + struct gdma_destroy_mr_request req = {}; + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + int err; + + gc = mdev->gdma_context; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req), + sizeof(resp)); + + req.mr_handle = mr_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + return err; + } + + return 0; +} + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + gdma_obj_handle_t dma_region_handle; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + ibdev_dbg(ibdev, + "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x", + start, iova, length, access_flags); + + if (access_flags & ~VALID_MR_FLAGS) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(ibdev, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(ibdev, + "Failed to get umem for register user-mr, %d\n", err); + goto err_free; + } + + err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle); + if (err) { + ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n", + err); + goto err_umem; + } + + ibdev_dbg(ibdev, + "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err, + dma_region_handle); + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GVA; + mr_params.gva.dma_region_handle = dma_region_handle; + mr_params.gva.virtual_address = iova; + mr_params.gva.access_flags = + mana_ib_verbs_to_gdma_access_flags(access_flags); + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_dma_region; + + /* + * There is no need to keep track of dma_region_handle after MR is + * successfully created. The dma_region_handle is tracked in the PF + * as part of the lifecycle of this MR. + */ + + return &mr->ibmr; + +err_dma_region: + mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context, + dma_region_handle); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); + struct ib_device *ibdev = ibmr->device; + struct mana_ib_dev *dev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + err = mana_ib_gd_destroy_mr(dev, mr->mr_handle); + if (err) + return err; + + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c new file mode 100644 index 000000000000..ea15ec77e321 --- /dev/null +++ b/drivers/infiniband/hw/mana/qp.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, + struct net_device *ndev, + mana_handle_t default_rxobj, + mana_handle_t ind_table[], + u32 log_ind_tbl_size, u32 rx_hash_key_len, + u8 *rx_hash_key) +{ + struct mana_port_context *mpc = netdev_priv(ndev); + struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_resp resp = {}; + mana_handle_t *req_indir_tab; + struct gdma_context *gc; + struct gdma_dev *mdev; + u32 req_buf_size; + int i, err; + + mdev = dev->gdma_dev; + gc = mdev->gdma_context; + + req_buf_size = + sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + + req->vport = mpc->port_handle; + req->rx_enable = 1; + req->update_default_rxobj = 1; + req->default_rxobj = default_rxobj; + req->hdr.dev_id = mdev->dev_id; + + /* If there are more than 1 entries in indirection table, enable RSS */ + if (log_ind_tbl_size) + req->rss_enable = true; + + req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; + req->indir_tab_offset = sizeof(*req); + req->update_indir_tab = true; + + req_indir_tab = (mana_handle_t *)(req + 1); + /* The ind table passed to the hardware must have + * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb + * ind_table to MANA_INDIRECT_TABLE_SIZE if required + */ + ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; + ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, + req_indir_tab[i]); + } + + req->update_hashkey = true; + if (rx_hash_key_len) + memcpy(req->hashkey, rx_hash_key, rx_hash_key_len); + else + netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE); + + ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n", + req->vport, default_rxobj); + + err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp); + if (err) { + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; + } + + if (resp.hdr.status) { + netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + + netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n", + mpc->port_handle, log_ind_tbl_size); + +out: + kfree(req); + return err; +} + +static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct mana_ib_dev *mdev = + container_of(pd->device, struct mana_ib_dev, ib_dev); + struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; + struct mana_ib_create_qp_rss_resp resp = {}; + struct mana_ib_create_qp_rss ucmd = {}; + struct gdma_dev *gd = mdev->gdma_dev; + mana_handle_t *mana_ind_table; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_cq *cq; + struct mana_ib_wq *wq; + unsigned int ind_tbl_size; + struct ib_cq *ibcq; + struct ib_wq *ibwq; + int i = 0; + u32 port; + int ret; + + mc = gd->driver_data; + + if (!udata || udata->inlen < sizeof(ucmd)) + return -EINVAL; + + ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (ret) { + ibdev_dbg(&mdev->ib_dev, + "Failed copy from udata for create rss-qp, err %d\n", + ret); + return ret; + } + + if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_recv_wr %d exceeding limit\n", + attr->cap.max_recv_wr); + return -EINVAL; + } + + if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_recv_sge %d exceeding limit\n", + attr->cap.max_recv_sge); + return -EINVAL; + } + + ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; + if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { + ibdev_dbg(&mdev->ib_dev, + "Indirect table size %d exceeding limit\n", + ind_tbl_size); + return -EINVAL; + } + + if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) { + ibdev_dbg(&mdev->ib_dev, + "RX Hash function is not supported, %d\n", + ucmd.rx_hash_function); + return -EINVAL; + } + + /* IB ports start with 1, MANA start with 0 */ + port = ucmd.port; + if (port < 1 || port > mc->num_ports) { + ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n", + port); + return -EINVAL; + } + ndev = mc->ports[port - 1]; + mpc = netdev_priv(ndev); + + ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n", + ucmd.rx_hash_function, port); + + mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t), + GFP_KERNEL); + if (!mana_ind_table) { + ret = -ENOMEM; + goto fail; + } + + qp->port = port; + + for (i = 0; i < ind_tbl_size; i++) { + struct mana_obj_spec wq_spec = {}; + struct mana_obj_spec cq_spec = {}; + + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + + ibcq = ibwq->cq; + cq = container_of(ibcq, struct mana_ib_cq, ibcq); + + wq_spec.gdma_region = wq->gdma_region; + wq_spec.queue_size = wq->wq_buf_size; + + cq_spec.gdma_region = cq->gdma_region; + cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = GDMA_CQ_NO_EQ; + + ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &wq->rx_object); + if (ret) + goto fail; + + /* The GDMA regions are now owned by the WQ object */ + wq->gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_region = GDMA_INVALID_DMA_REGION; + + wq->id = wq_spec.queue_index; + cq->id = cq_spec.queue_index; + + ibdev_dbg(&mdev->ib_dev, + "ret %d rx_object 0x%llx wq id %llu cq id %llu\n", + ret, wq->rx_object, wq->id, cq->id); + + resp.entries[i].cqid = cq->id; + resp.entries[i].wqid = wq->id; + + mana_ind_table[i] = wq->rx_object; + } + resp.num_entries = i; + + ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object, + mana_ind_table, + ind_tbl->log_ind_tbl_size, + ucmd.rx_hash_key_len, + ucmd.rx_hash_key); + if (ret) + goto fail; + + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy to udata create rss-qp, %d\n", + ret); + goto fail; + } + + kfree(mana_ind_table); + + return 0; + +fail: + while (i-- > 0) { + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); + } + + kfree(mana_ind_table); + + return ret; +} + +static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct mana_ib_dev *mdev = + container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_cq *send_cq = + container_of(attr->send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_ucontext *mana_ucontext = + rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + struct mana_ib_create_qp_resp resp = {}; + struct gdma_dev *gd = mdev->gdma_dev; + struct mana_ib_create_qp ucmd = {}; + struct mana_obj_spec wq_spec = {}; + struct mana_obj_spec cq_spec = {}; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct ib_umem *umem; + int err; + u32 port; + + mc = gd->driver_data; + + if (!mana_ucontext || udata->inlen < sizeof(ucmd)) + return -EINVAL; + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy from udata create qp-raw, %d\n", err); + return err; + } + + /* IB ports start with 1, MANA Ethernet ports start with 0 */ + port = ucmd.port; + if (ucmd.port > mc->num_ports) + return -EINVAL; + + if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_send_wr %d exceeding limit\n", + attr->cap.max_send_wr); + return -EINVAL; + } + + if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_send_sge %d exceeding limit\n", + attr->cap.max_send_sge); + return -EINVAL; + } + + ndev = mc->ports[port - 1]; + mpc = netdev_priv(ndev); + ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc); + + err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell); + if (err) + return -ENODEV; + + qp->port = port; + + ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n", + ucmd.sq_buf_addr, ucmd.port); + + umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + ibdev_dbg(&mdev->ib_dev, + "Failed to get umem for create qp-raw, err %d\n", + err); + goto err_free_vport; + } + qp->sq_umem = umem; + + err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem, + &qp->sq_gdma_region); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create dma region for create qp-raw, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(&mdev->ib_dev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, qp->sq_gdma_region); + + /* Create a WQ on the same port handle used by the Ethernet */ + wq_spec.gdma_region = qp->sq_gdma_region; + wq_spec.queue_size = ucmd.sq_buf_size; + + cq_spec.gdma_region = send_cq->gdma_region; + cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = GDMA_CQ_NO_EQ; + + err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, + &cq_spec, &qp->tx_object); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create wq for create raw-qp, err %d\n", + err); + goto err_destroy_dma_region; + } + + /* The GDMA regions are now owned by the WQ object */ + qp->sq_gdma_region = GDMA_INVALID_DMA_REGION; + send_cq->gdma_region = GDMA_INVALID_DMA_REGION; + + qp->sq_id = wq_spec.queue_index; + send_cq->id = cq_spec.queue_index; + + ibdev_dbg(&mdev->ib_dev, + "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, + qp->tx_object, qp->sq_id, send_cq->id); + + resp.sqid = qp->sq_id; + resp.cqid = send_cq->id; + resp.tx_vp_offset = pd->tx_vp_offset; + + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed copy udata for create qp-raw, %d\n", + err); + goto err_destroy_wq_obj; + } + + return 0; + +err_destroy_wq_obj: + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + +err_destroy_dma_region: + mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); + +err_release_umem: + ib_umem_release(umem); + +err_free_vport: + mana_ib_uncfg_vport(mdev, pd, port - 1); + + return err; +} + +int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + switch (attr->qp_type) { + case IB_QPT_RAW_PACKET: + /* When rwq_ind_tbl is used, it's for creating WQs for RSS */ + if (attr->rwq_ind_tbl) + return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr, + udata); + + return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); + default: + /* Creating QP other than IB_QPT_RAW_PACKET is not supported */ + ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", + attr->qp_type); + } + + return -EINVAL; +} + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + /* modify_qp is not supported by this version of the driver */ + return -EOPNOTSUPP; +} + +static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, + struct ib_rwq_ind_table *ind_tbl, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_dev *gd = mdev->gdma_dev; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_wq *wq; + struct ib_wq *ibwq; + int i; + + mc = gd->driver_data; + ndev = mc->ports[qp->port - 1]; + mpc = netdev_priv(ndev); + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n", + wq->rx_object); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); + } + + return 0; +} + +static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_dev *gd = mdev->gdma_dev; + struct ib_pd *ibpd = qp->ibqp.pd; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_pd *pd; + + mc = gd->driver_data; + ndev = mc->ports[qp->port - 1]; + mpc = netdev_priv(ndev); + pd = container_of(ibpd, struct mana_ib_pd, ibpd); + + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + + if (qp->sq_umem) { + mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); + ib_umem_release(qp->sq_umem); + } + + mana_ib_uncfg_vport(mdev, pd, qp->port - 1); + + return 0; +} + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + + switch (ibqp->qp_type) { + case IB_QPT_RAW_PACKET: + if (ibqp->rwq_ind_tbl) + return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl, + udata); + + return mana_ib_destroy_qp_raw(qp, udata); + + default: + ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", + ibqp->qp_type); + } + + return -ENOENT; +} diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c new file mode 100644 index 000000000000..372d361510e0 --- /dev/null +++ b/drivers/infiniband/hw/mana/wq.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(pd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_create_wq ucmd = {}; + struct mana_ib_wq *wq; + struct ib_umem *umem; + int err; + + if (udata->inlen < sizeof(ucmd)) + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy from udata for create wq, %d\n", err); + return ERR_PTR(err); + } + + wq = kzalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) + return ERR_PTR(-ENOMEM); + + ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr); + + umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + ibdev_dbg(&mdev->ib_dev, + "Failed to get umem for create wq, err %d\n", err); + goto err_free_wq; + } + + wq->umem = umem; + wq->wqe = init_attr->max_wr; + wq->wq_buf_size = ucmd.wq_buf_size; + wq->rx_object = INVALID_MANA_HANDLE; + + err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create dma region for create wq, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(&mdev->ib_dev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, wq->gdma_region); + + /* WQ ID is returned at wq_create time, doesn't know the value yet */ + + return &wq->ibwq; + +err_release_umem: + ib_umem_release(umem); + +err_free_wq: + kfree(wq); + + return ERR_PTR(err); +} + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + /* modify_wq is not supported by this version of the driver */ + return -EOPNOTSUPP; +} + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +{ + struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq); + struct ib_device *ib_dev = ibwq->device; + struct mana_ib_dev *mdev; + + mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev); + + mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region); + ib_umem_release(wq->umem); + + kfree(wq); + + return 0; +} + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + /* + * There is no additional data in ind_table to be maintained by this + * driver, do nothing + */ + return 0; +} + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + /* + * There is no additional data in ind_table to be maintained by this + * driver, do nothing + */ + return 0; +} diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 713a8f8cca9a..20212ffeefb9 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -412,6 +412,9 @@ int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); extern const struct ethtool_ops mana_ethtool_ops; +/* A CQ can be created not associated with any EQ */ +#define GDMA_CQ_NO_EQ 0xffff + struct mana_obj_spec { u32 queue_index; u64 gdma_region; diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 7dd56210226f..e0c25537fd2e 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -251,6 +251,7 @@ enum rdma_driver_id { RDMA_DRIVER_EFA, RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, + RDMA_DRIVER_MANA, }; enum ib_uverbs_gid_type { diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h new file mode 100644 index 000000000000..5fcb31b37fb9 --- /dev/null +++ b/include/uapi/rdma/mana-abi.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#ifndef MANA_ABI_USER_H +#define MANA_ABI_USER_H + +#include +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ + +#define MANA_IB_UVERBS_ABI_VERSION 1 + +struct mana_ib_create_cq { + __aligned_u64 buf_addr; +}; + +struct mana_ib_create_qp { + __aligned_u64 sq_buf_addr; + __u32 sq_buf_size; + __u32 port; +}; + +struct mana_ib_create_qp_resp { + __u32 sqid; + __u32 cqid; + __u32 tx_vp_offset; + __u32 reserved; +}; + +struct mana_ib_create_wq { + __aligned_u64 wq_buf_addr; + __u32 wq_buf_size; + __u32 reserved; +}; + +/* RX Hash function flags */ +enum mana_ib_rx_hash_function_flags { + MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +struct mana_ib_create_qp_rss { + __aligned_u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 reserved[7]; + __u32 rx_hash_key_len; + __u8 rx_hash_key[40]; + __u32 port; +}; + +struct rss_resp_entry { + __u32 cqid; + __u32 wqid; +}; + +struct mana_ib_create_qp_rss_resp { + __aligned_u64 num_entries; + struct rss_resp_entry entries[64]; +}; + +#endif From c5e7ee72862eeeee77fd71b99fa9064f830e0b00 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 23 Jul 2021 12:15:59 -0400 Subject: [PATCH 1394/4122] Revert "csky: Fixup CONFIG_DEBUG_RSEQ" This reverts commit f36e0aab6f1f78d770ce859df3f07a9c5763ce5f. The csky rseq support has been merged without ever notifying the rseq maintainers, and without any of the required asssembler glue in the rseq selftests, which means it is entirely untested. It is also derived from a non-upstream riscv patch which has known bugs. The assembly part of this revert should be carefully reviewed by the architecture maintainer because it touches code which has changed since the merge of the reverted patch. The rseq selftests assembly glue should be introduced at the same time as the architecture rseq support. Without the presence of any test, I recommend reverting rseq support from csky for now. Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/ Signed-off-by: Mathieu Desnoyers Signed-off-by: Guo Ren Cc: Al Viro Cc: Linus Torvalds Cc: linux-csky@vger.kernel.org --- arch/csky/kernel/entry.S | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 547b4cd1b24b..a4b519fbb371 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -50,11 +50,15 @@ ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE zero_fp context_tracking +#ifdef CONFIG_RSEQ_DEBUG + mov a0, sp + jbsr rseq_syscall +#endif psrset ee, ie lrw r9, __NR_syscalls cmphs syscallid, r9 /* Check nr of syscall */ - bt 1f + bt ret_from_exception lrw r9, sys_call_table ixw r9, syscallid @@ -80,11 +84,6 @@ ENTRY(csky_systemcall) jsr syscallid #endif stw a0, (sp, LSAVE_A0) /* Save return value */ -1: -#ifdef CONFIG_DEBUG_RSEQ - mov a0, sp - jbsr rseq_syscall -#endif jmpi ret_from_exception csky_syscall_trace: @@ -113,10 +112,6 @@ csky_syscall_trace: stw a0, (sp, LSAVE_A0) /* Save return value */ 1: -#ifdef CONFIG_DEBUG_RSEQ - mov a0, sp - jbsr rseq_syscall -#endif mov a0, sp /* right now, sp --> pt_regs */ jbsr syscall_trace_exit br ret_from_exception From 7e2004906fb52257772be0ef262fba2d5eb1653b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 23 Jul 2021 12:16:00 -0400 Subject: [PATCH 1395/4122] Revert "csky: Add support for restartable sequence" This reverts commit 9866d141a0977ace974400bf1f793dfc163409ce. The csky rseq support has been merged without ever notifying the rseq maintainers, and without any of the required asssembler glue in the rseq selftests, which means it is entirely untested. It is also derived from a non-upstream riscv patch which has known bugs. The assembly part of this revert should be carefully reviewed by the architecture maintainer because it touches code which has changed since the merge of the reverted patch. The rseq selftests assembly glue should be introduced at the same time as the architecture rseq support. Without the presence of any test, I recommend reverting rseq support from csky for now. Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/ Signed-off-by: Mathieu Desnoyers Signed-off-by: Guo Ren Cc: Al Viro Cc: Linus Torvalds Cc: linux-csky@vger.kernel.org --- arch/csky/Kconfig | 1 - arch/csky/kernel/entry.S | 4 ---- arch/csky/kernel/signal.c | 2 -- 3 files changed, 7 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index e0ecd1cc81a9..dba02da6fa34 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -94,7 +94,6 @@ config CSKY select HAVE_PERF_USER_STACK_DUMP select HAVE_DMA_CONTIGUOUS select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select MAY_HAVE_SPARSE_IRQ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a4b519fbb371..c68cdcc76d60 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -50,10 +50,6 @@ ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE zero_fp context_tracking -#ifdef CONFIG_RSEQ_DEBUG - mov a0, sp - jbsr rseq_syscall -#endif psrset ee, ie lrw r9, __NR_syscalls diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index b7b3685283d7..10da0fefd431 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -179,8 +179,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; - rseq_signal_deliver(ksig, regs); - /* Are we from a system call? */ if (in_syscall(regs)) { /* Avoid additional syscall restarting via ret_from_exception */ From 61c581a46a9668747d355436bd4b2505594539bd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Nov 2022 20:22:57 +0100 Subject: [PATCH 1396/4122] crypto: move gf128mul library into lib/crypto The gf128mul library does not depend on the crypto API at all, so it can be moved into lib/crypto. This will allow us to use it in other library code in a subsequent patch without having to depend on CONFIG_CRYPTO. While at it, change the Kconfig symbol name to align with other crypto library implementations. However, the source file name is retained, as it is reflected in the module .ko filename, and changing this might break things for users. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 2 +- arch/arm64/crypto/Kconfig | 2 +- crypto/Kconfig | 9 +++------ crypto/Makefile | 1 - drivers/crypto/chelsio/Kconfig | 2 +- lib/crypto/Kconfig | 3 +++ lib/crypto/Makefile | 2 ++ {crypto => lib/crypto}/gf128mul.c | 0 8 files changed, 11 insertions(+), 10 deletions(-) rename {crypto => lib/crypto}/gf128mul.c (100%) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 3858c4d4cb98..7b2b7d043d9b 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -18,7 +18,7 @@ config CRYPTO_GHASH_ARM_CE depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_CRYPTD - select CRYPTO_GF128MUL + select CRYPTO_LIB_GF128MUL help GCM GHASH function (NIST SP800-38D) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 6793d5bc3ee5..6d06b448a66e 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -6,8 +6,8 @@ config CRYPTO_GHASH_ARM64_CE tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_GF128MUL select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL select CRYPTO_AEAD help GCM GHASH function (NIST SP800-38D) diff --git a/crypto/Kconfig b/crypto/Kconfig index d779667671b2..9c86f7045157 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -175,9 +175,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS This is intended for developer use only, as these tests take much longer to run than the normal self tests. -config CRYPTO_GF128MUL - tristate - config CRYPTO_NULL tristate "Null algorithms" select CRYPTO_NULL2 @@ -714,9 +711,9 @@ config CRYPTO_KEYWRAP config CRYPTO_LRW tristate "LRW (Liskov Rivest Wagner)" + select CRYPTO_LIB_GF128MUL select CRYPTO_SKCIPHER select CRYPTO_MANAGER - select CRYPTO_GF128MUL select CRYPTO_ECB help LRW (Liskov Rivest Wagner) mode @@ -926,8 +923,8 @@ config CRYPTO_CMAC config CRYPTO_GHASH tristate "GHASH" - select CRYPTO_GF128MUL select CRYPTO_HASH + select CRYPTO_LIB_GF128MUL help GCM GHASH function (NIST SP800-38D) @@ -967,8 +964,8 @@ config CRYPTO_MICHAEL_MIC config CRYPTO_POLYVAL tristate - select CRYPTO_GF128MUL select CRYPTO_HASH + select CRYPTO_LIB_GF128MUL help POLYVAL hash function for HCTR2 diff --git a/crypto/Makefile b/crypto/Makefile index 303b21c43df0..d0126c915834 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -85,7 +85,6 @@ obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930 -obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o obj-$(CONFIG_CRYPTO_CFB) += cfb.o diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index f886401af13e..5dd3f6a4781a 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -3,11 +3,11 @@ config CRYPTO_DEV_CHELSIO tristate "Chelsio Crypto Co-processor Driver" depends on CHELSIO_T4 select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC - select CRYPTO_GF128MUL help The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 7e9683e9f5c6..6767d86959de 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -11,6 +11,9 @@ config CRYPTO_LIB_AES config CRYPTO_LIB_ARC4 tristate +config CRYPTO_LIB_GF128MUL + tristate + config CRYPTO_ARCH_HAVE_LIB_BLAKE2S bool help diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index c852f067ab06..7000eeb72286 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -13,6 +13,8 @@ libaes-y := aes.o obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o +obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o + # blake2s is used by the /dev/random driver which is always builtin obj-y += libblake2s.o libblake2s-y := blake2s.o diff --git a/crypto/gf128mul.c b/lib/crypto/gf128mul.c similarity index 100% rename from crypto/gf128mul.c rename to lib/crypto/gf128mul.c From b67ce439fef69a1a339cf2743c8198e8d90e6821 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Nov 2022 20:22:58 +0100 Subject: [PATCH 1397/4122] crypto: lib/gf128mul - make gf128mul_lle time invariant The gf128mul library has different variants with different memory/performance tradeoffs, where the faster ones use 4k or 64k lookup tables precomputed at runtime, which are based on one of the multiplication factors, which is commonly the key for keyed hash algorithms such as GHASH. The slowest variant is gf128_mul_lle() [and its bbe/ble counterparts], which does not use precomputed lookup tables, but it still relies on a single u16[256] lookup table which is input independent. The use of such a table may cause the execution time of gf128_mul_lle() to correlate with the value of the inputs, which is generally something that must be avoided for cryptographic algorithms. On top of that, the function uses a sequence of if () statements that conditionally invoke be128_xor() based on which bits are set in the second argument of the function, which is usually a pointer to the multiplication factor that represents the key. In order to remove the correlation between the execution time of gf128_mul_lle() and the value of its inputs, let's address the identified shortcomings: - add a time invariant version of gf128mul_x8_lle() that replaces the table lookup with the expression that is used at compile time to populate the lookup table; - make the invocations of be128_xor() unconditional, but pass a zero vector as the third argument if the associated bit in the key is cleared. The resulting code is likely to be significantly slower. However, given that this is the slowest version already, making it even slower in order to make it more secure is assumed to be justified. The bbe and ble counterparts could receive the same treatment, but the former is never used anywhere in the kernel, and the latter is only used in the driver for a asynchronous crypto h/w accelerator (Chelsio), where timing variances are unlikely to matter. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- lib/crypto/gf128mul.c | 58 +++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/lib/crypto/gf128mul.c b/lib/crypto/gf128mul.c index a69ae3e6c16c..8f8c45e0cdcf 100644 --- a/lib/crypto/gf128mul.c +++ b/lib/crypto/gf128mul.c @@ -146,6 +146,17 @@ static void gf128mul_x8_lle(be128 *x) x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); } +/* time invariant version of gf128mul_x8_lle */ +static void gf128mul_x8_lle_ti(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = xda_le(b & 0xff); /* avoid table lookup */ + + x->b = cpu_to_be64((b >> 8) | (a << 56)); + x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); +} + static void gf128mul_x8_bbe(be128 *x) { u64 a = be64_to_cpu(x->a); @@ -169,38 +180,47 @@ EXPORT_SYMBOL(gf128mul_x8_ble); void gf128mul_lle(be128 *r, const be128 *b) { - be128 p[8]; + /* + * The p array should be aligned to twice the size of its element type, + * so that every even/odd pair is guaranteed to share a cacheline + * (assuming a cacheline size of 32 bytes or more, which is by far the + * most common). This ensures that each be128_xor() call in the loop + * takes the same amount of time regardless of the value of 'ch', which + * is derived from function parameter 'b', which is commonly used as a + * key, e.g., for GHASH. The odd array elements are all set to zero, + * making each be128_xor() a NOP if its associated bit in 'ch' is not + * set, and this is equivalent to calling be128_xor() conditionally. + * This approach aims to avoid leaking information about such keys + * through execution time variances. + * + * Unfortunately, __aligned(16) or higher does not work on x86 for + * variables on the stack so we need to perform the alignment by hand. + */ + be128 array[16 + 3] = {}; + be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128)); int i; p[0] = *r; for (i = 0; i < 7; ++i) - gf128mul_x_lle(&p[i + 1], &p[i]); + gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]); memset(r, 0, sizeof(*r)); for (i = 0;;) { u8 ch = ((u8 *)b)[15 - i]; - if (ch & 0x80) - be128_xor(r, r, &p[0]); - if (ch & 0x40) - be128_xor(r, r, &p[1]); - if (ch & 0x20) - be128_xor(r, r, &p[2]); - if (ch & 0x10) - be128_xor(r, r, &p[3]); - if (ch & 0x08) - be128_xor(r, r, &p[4]); - if (ch & 0x04) - be128_xor(r, r, &p[5]); - if (ch & 0x02) - be128_xor(r, r, &p[6]); - if (ch & 0x01) - be128_xor(r, r, &p[7]); + be128_xor(r, r, &p[ 0 + !(ch & 0x80)]); + be128_xor(r, r, &p[ 2 + !(ch & 0x40)]); + be128_xor(r, r, &p[ 4 + !(ch & 0x20)]); + be128_xor(r, r, &p[ 6 + !(ch & 0x10)]); + be128_xor(r, r, &p[ 8 + !(ch & 0x08)]); + be128_xor(r, r, &p[10 + !(ch & 0x04)]); + be128_xor(r, r, &p[12 + !(ch & 0x02)]); + be128_xor(r, r, &p[14 + !(ch & 0x01)]); if (++i >= 16) break; - gf128mul_x8_lle(r); + gf128mul_x8_lle_ti(r); /* use the time invariant version */ } } EXPORT_SYMBOL(gf128mul_lle); From 520af5da664a8edc4f4c1cd8e6e8488ecccdb7e5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Nov 2022 20:22:59 +0100 Subject: [PATCH 1398/4122] crypto: lib/aesgcm - Provide minimal library implementation Implement a minimal library version of AES-GCM based on the existing library implementations of AES and multiplication in GF(2^128). Using these primitives, GCM can be implemented in a straight-forward manner. GCM has a couple of sharp edges, i.e., the amount of input data processed with the same initialization vector (IV) should be capped to protect the counter from 32-bit rollover (or carry), and the size of the authentication tag should be fixed for a given key. [0] The former concern is addressed trivially, given that the function call API uses 32-bit signed types for the input lengths. It is still up to the caller to avoid IV reuse in general, but this is not something we can police at the implementation level. As for the latter concern, let's make the authentication tag size part of the key schedule, and only permit it to be configured as part of the key expansion routine. Note that table based AES implementations are susceptible to known plaintext timing attacks on the encryption key. The AES library already attempts to mitigate this to some extent, but given that the counter mode encryption used by GCM operates exclusively on known plaintext by construction (the IV and therefore the initial counter value are known to an attacker), let's take some extra care to mitigate this, by calling the AES library with interrupts disabled. [0] https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf Link: https://lore.kernel.org/all/c6fb9b25-a4b6-2e4a-2dd1-63adda055a49@amd.com/ Signed-off-by: Ard Biesheuvel Tested-by: Nikunj A Dadhania Signed-off-by: Herbert Xu --- include/crypto/gcm.h | 22 ++ lib/crypto/Kconfig | 6 + lib/crypto/Makefile | 3 + lib/crypto/aesgcm.c | 727 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 758 insertions(+) create mode 100644 lib/crypto/aesgcm.c diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h index 9d7eff04f224..fd9df607a836 100644 --- a/include/crypto/gcm.h +++ b/include/crypto/gcm.h @@ -3,6 +3,9 @@ #include +#include +#include + #define GCM_AES_IV_SIZE 12 #define GCM_RFC4106_IV_SIZE 8 #define GCM_RFC4543_IV_SIZE 8 @@ -60,4 +63,23 @@ static inline int crypto_ipsec_check_assoclen(unsigned int assoclen) return 0; } + +struct aesgcm_ctx { + be128 ghash_key; + struct crypto_aes_ctx aes_ctx; + unsigned int authsize; +}; + +int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key, + unsigned int keysize, unsigned int authsize); + +void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src, + int crypt_len, const u8 *assoc, int assoc_len, + const u8 iv[GCM_AES_IV_SIZE], u8 *authtag); + +bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst, + const u8 *src, int crypt_len, const u8 *assoc, + int assoc_len, const u8 iv[GCM_AES_IV_SIZE], + const u8 *authtag); + #endif diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 6767d86959de..45436bfc6dff 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -8,6 +8,12 @@ config CRYPTO_LIB_UTILS config CRYPTO_LIB_AES tristate +config CRYPTO_LIB_AESGCM + tristate + select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL + select CRYPTO_LIB_UTILS + config CRYPTO_LIB_ARC4 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 7000eeb72286..6ec2d4543d9c 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -10,6 +10,9 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o +obj-$(CONFIG_CRYPTO_LIB_AESGCM) += libaesgcm.o +libaesgcm-y := aesgcm.o + obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c new file mode 100644 index 000000000000..c632d6e17af8 --- /dev/null +++ b/lib/crypto/aesgcm.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Minimal library implementation of GCM + * + * Copyright 2022 Google LLC + */ + +#include + +#include +#include +#include + +#include + +static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst, + const void *src) +{ + unsigned long flags; + + /* + * In AES-GCM, both the GHASH key derivation and the CTR mode + * encryption operate on known plaintext, making them susceptible to + * timing attacks on the encryption key. The AES library already + * mitigates this risk to some extent by pulling the entire S-box into + * the caches before doing any substitutions, but this strategy is more + * effective when running with interrupts disabled. + */ + local_irq_save(flags); + aes_encrypt(ctx, dst, src); + local_irq_restore(flags); +} + +/** + * aesgcm_expandkey - Expands the AES and GHASH keys for the AES-GCM key + * schedule + * + * @ctx: The data structure that will hold the AES-GCM key schedule + * @key: The AES encryption input key + * @keysize: The length in bytes of the input key + * @authsize: The size in bytes of the GCM authentication tag + * + * Returns: 0 on success, or -EINVAL if @keysize or @authsize contain values + * that are not permitted by the GCM specification. + */ +int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key, + unsigned int keysize, unsigned int authsize) +{ + u8 kin[AES_BLOCK_SIZE] = {}; + int ret; + + ret = crypto_gcm_check_authsize(authsize) ?: + aes_expandkey(&ctx->aes_ctx, key, keysize); + if (ret) + return ret; + + ctx->authsize = authsize; + aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin); + + return 0; +} +EXPORT_SYMBOL(aesgcm_expandkey); + +static void aesgcm_ghash(be128 *ghash, const be128 *key, const void *src, + int len) +{ + while (len > 0) { + crypto_xor((u8 *)ghash, src, min(len, GHASH_BLOCK_SIZE)); + gf128mul_lle(ghash, key); + + src += GHASH_BLOCK_SIZE; + len -= GHASH_BLOCK_SIZE; + } +} + +static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len, + const u8 *assoc, int assoc_len, __be32 *ctr, u8 *authtag) +{ + be128 tail = { cpu_to_be64(assoc_len * 8), cpu_to_be64(src_len * 8) }; + u8 buf[AES_BLOCK_SIZE]; + be128 ghash = {}; + + aesgcm_ghash(&ghash, &ctx->ghash_key, assoc, assoc_len); + aesgcm_ghash(&ghash, &ctx->ghash_key, src, src_len); + aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail)); + + ctr[3] = cpu_to_be32(1); + aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr); + crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize); + + memzero_explicit(&ghash, sizeof(ghash)); + memzero_explicit(buf, sizeof(buf)); +} + +static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src, + int len, __be32 *ctr) +{ + u8 buf[AES_BLOCK_SIZE]; + unsigned int n = 2; + + while (len > 0) { + /* + * The counter increment below must not result in overflow or + * carry into the next 32-bit word, as this could result in + * inadvertent IV reuse, which must be avoided at all cost for + * stream ciphers such as AES-CTR. Given the range of 'int + * len', this cannot happen, so no explicit test is necessary. + */ + ctr[3] = cpu_to_be32(n++); + aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr); + crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE)); + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } + memzero_explicit(buf, sizeof(buf)); +} + +/** + * aesgcm_encrypt - Perform AES-GCM encryption on a block of data + * + * @ctx: The AES-GCM key schedule + * @dst: Pointer to the ciphertext output buffer + * @src: Pointer the plaintext (may equal @dst for encryption in place) + * @crypt_len: The size in bytes of the plaintext and ciphertext. + * @assoc: Pointer to the associated data, + * @assoc_len: The size in bytes of the associated data + * @iv: The initialization vector (IV) to use for this block of data + * (must be 12 bytes in size as per the GCM spec recommendation) + * @authtag: The address of the buffer in memory where the authentication + * tag should be stored. The buffer is assumed to have space for + * @ctx->authsize bytes. + */ +void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src, + int crypt_len, const u8 *assoc, int assoc_len, + const u8 iv[GCM_AES_IV_SIZE], u8 *authtag) +{ + __be32 ctr[4]; + + memcpy(ctr, iv, GCM_AES_IV_SIZE); + + aesgcm_crypt(ctx, dst, src, crypt_len, ctr); + aesgcm_mac(ctx, dst, crypt_len, assoc, assoc_len, ctr, authtag); +} +EXPORT_SYMBOL(aesgcm_encrypt); + +/** + * aesgcm_decrypt - Perform AES-GCM decryption on a block of data + * + * @ctx: The AES-GCM key schedule + * @dst: Pointer to the plaintext output buffer + * @src: Pointer the ciphertext (may equal @dst for decryption in place) + * @crypt_len: The size in bytes of the plaintext and ciphertext. + * @assoc: Pointer to the associated data, + * @assoc_len: The size in bytes of the associated data + * @iv: The initialization vector (IV) to use for this block of data + * (must be 12 bytes in size as per the GCM spec recommendation) + * @authtag: The address of the buffer in memory where the authentication + * tag is stored. + * + * Returns: true on success, or false if the ciphertext failed authentication. + * On failure, no plaintext will be returned. + */ +bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst, + const u8 *src, int crypt_len, const u8 *assoc, + int assoc_len, const u8 iv[GCM_AES_IV_SIZE], + const u8 *authtag) +{ + u8 tagbuf[AES_BLOCK_SIZE]; + __be32 ctr[4]; + + memcpy(ctr, iv, GCM_AES_IV_SIZE); + + aesgcm_mac(ctx, src, crypt_len, assoc, assoc_len, ctr, tagbuf); + if (crypto_memneq(authtag, tagbuf, ctx->authsize)) { + memzero_explicit(tagbuf, sizeof(tagbuf)); + return false; + } + aesgcm_crypt(ctx, dst, src, crypt_len, ctr); + return true; +} +EXPORT_SYMBOL(aesgcm_decrypt); + +MODULE_DESCRIPTION("Generic AES-GCM library"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL"); + +#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS + +/* + * Test code below. Vectors taken from crypto/testmgr.h + */ + +static const u8 __initconst ctext0[16] = + "\x58\xe2\xfc\xce\xfa\x7e\x30\x61" + "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a"; + +static const u8 __initconst ptext1[16]; + +static const u8 __initconst ctext1[32] = + "\x03\x88\xda\xce\x60\xb6\xa3\x92" + "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78" + "\xab\x6e\x47\xd4\x2c\xec\x13\xbd" + "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf"; + +static const u8 __initconst ptext2[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext2[80] = + "\x42\x83\x1e\xc2\x21\x77\x74\x24" + "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" + "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" + "\x35\xc1\x7e\x23\x29\xac\xa1\x2e" + "\x21\xd5\x14\xb2\x54\x66\x93\x1c" + "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05" + "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97" + "\x3d\x58\xe0\x91\x47\x3f\x59\x85" + "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6" + "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4"; + +static const u8 __initconst ptext3[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext3[76] = + "\x42\x83\x1e\xc2\x21\x77\x74\x24" + "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" + "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" + "\x35\xc1\x7e\x23\x29\xac\xa1\x2e" + "\x21\xd5\x14\xb2\x54\x66\x93\x1c" + "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05" + "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97" + "\x3d\x58\xe0\x91" + "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb" + "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47"; + +static const u8 __initconst ctext4[16] = + "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b" + "\xa0\x0e\xd1\xf3\x12\x57\x24\x35"; + +static const u8 __initconst ctext5[32] = + "\x98\xe7\x24\x7c\x07\xf0\xfe\x41" + "\x1c\x26\x7e\x43\x84\xb0\xf6\x00" + "\x2f\xf5\x8d\x80\x03\x39\x27\xab" + "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb"; + +static const u8 __initconst ptext6[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext6[80] = + "\x39\x80\xca\x0b\x3c\x00\xe8\x41" + "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" + "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" + "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c" + "\x7d\x77\x3d\x00\xc1\x44\xc5\x25" + "\xac\x61\x9d\x18\xc8\x4a\x3f\x47" + "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9" + "\xcc\xda\x27\x10\xac\xad\xe2\x56" + "\x99\x24\xa7\xc8\x58\x73\x36\xbf" + "\xb1\x18\x02\x4d\xb8\x67\x4a\x14"; + +static const u8 __initconst ctext7[16] = + "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9" + "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b"; + +static const u8 __initconst ctext8[32] = + "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e" + "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18" + "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0" + "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19"; + +static const u8 __initconst ptext9[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext9[80] = + "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" + "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" + "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" + "\x75\x98\xa2\xbd\x25\x55\xd1\xaa" + "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d" + "\xa7\xb0\x8b\x10\x56\x82\x88\x38" + "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a" + "\xbc\xc9\xf6\x62\x89\x80\x15\xad" + "\xb0\x94\xda\xc5\xd9\x34\x71\xbd" + "\xec\x1a\x50\x22\x70\xe3\xcc\x6c"; + +static const u8 __initconst ptext10[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext10[76] = + "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" + "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" + "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" + "\x75\x98\xa2\xbd\x25\x55\xd1\xaa" + "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d" + "\xa7\xb0\x8b\x10\x56\x82\x88\x38" + "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a" + "\xbc\xc9\xf6\x62" + "\x76\xfc\x6e\xce\x0f\x4e\x17\x68" + "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b"; + +static const u8 __initconst ptext11[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext11[76] = + "\x39\x80\xca\x0b\x3c\x00\xe8\x41" + "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" + "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" + "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c" + "\x7d\x77\x3d\x00\xc1\x44\xc5\x25" + "\xac\x61\x9d\x18\xc8\x4a\x3f\x47" + "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9" + "\xcc\xda\x27\x10" + "\x25\x19\x49\x8e\x80\xf1\x47\x8f" + "\x37\xba\x55\xbd\x6d\x27\x61\x8c"; + +static const u8 __initconst ptext12[719] = + "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5"; + +static const u8 __initconst ctext12[735] = + "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20" + "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0" + "\xa2\xb4\x37\x19\x11\x58\xb6\x0b" + "\x4c\x1d\x38\x05\x54\xd1\x16\x73" + "\x8e\x1c\x20\x90\xa2\x9a\xb7\x74" + "\x47\xe6\xd8\xfc\x18\x3a\xb4\xea" + "\xd5\x16\x5a\x2c\x53\x01\x46\xb3" + "\x18\x33\x74\x6c\x50\xf2\xe8\xc0" + "\x73\xda\x60\x22\xeb\xe3\xe5\x9b" + "\x20\x93\x6c\x4b\x37\x99\xb8\x23" + "\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7" + "\xc3\x8f\xfb\x4a\x37\xd9\x39\x95" + "\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b" + "\x02\xf1\x63\xfc\x9b\xfc\xc5\xab" + "\xb9\x14\x13\x21\xdf\xce\xaa\x88" + "\x44\x30\x1e\xce\x26\x01\x92\xf8" + "\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0" + "\x89\xca\x94\x66\x11\x21\x97\xca" + "\x3e\x83\x74\x2d\xdb\x4d\x11\xeb" + "\x97\xc2\x14\xff\x9e\x1e\xa0\x6b" + "\x08\xb4\x31\x2b\x85\xc6\x85\x6c" + "\x90\xec\x39\xc0\xec\xb3\xb5\x4e" + "\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4" + "\x56\xfe\xce\x18\x33\x6d\x0b\x2d" + "\x33\xda\xc8\x05\x5c\xb4\x09\x2a" + "\xde\x6b\x52\x98\x01\xef\x36\x3d" + "\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1" + "\x01\x2d\x42\x49\xc3\xb6\x84\xbb" + "\x48\x96\xe0\x90\x93\x6c\x48\x64" + "\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8" + "\x7a\x23\x7b\xaa\x20\x56\x12\xae" + "\x16\x9d\x94\x0f\x54\xa1\xec\xca" + "\x51\x4e\xf2\x39\xf4\xf8\x5f\x04" + "\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1" + "\xf5\x3c\xd8\x62\xa3\xed\x47\x89" + "\x85\x4c\xe5\xdb\xac\x9e\x17\x1d" + "\x0c\x09\xe3\x3e\x39\x5b\x4d\x74" + "\x0e\xf5\x34\xee\x70\x11\x4c\xfd" + "\xdb\x34\xb1\xb5\x10\x3f\x73\xb7" + "\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c" + "\x8d\x35\x83\xd4\x11\x44\x6e\x6c" + "\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb" + "\xa9\x57\x24\x37\xe6\x1f\xdd\xcf" + "\x16\x2a\x13\xf9\x6a\x2d\x90\xa0" + "\x03\x60\x7a\xed\x69\xd5\x00\x8b" + "\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37" + "\xc1\x26\xce\x90\x97\x22\x64\x64" + "\xc1\x72\x43\x1b\xf6\xac\xc1\x54" + "\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2" + "\xe4\x85\xda\xe0\x20\x5f\xf4\xb4" + "\x15\xb5\xa0\x8d\x12\x74\x49\x23" + "\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb" + "\xf8\xcc\x62\x7b\xfb\x93\x07\x41" + "\x61\x26\x94\x58\x70\xa6\x3c\xe4" + "\xff\x58\xc4\x13\x3d\xcb\x36\x6b" + "\x32\xe5\xb2\x6d\x03\x74\x6f\x76" + "\x93\x77\xde\x48\xc4\xfa\x30\x4a" + "\xda\x49\x80\x77\x0f\x1c\xbe\x11" + "\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1" + "\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2" + "\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91" + "\xb8\xfb\x86\xdc\x46\x24\x91\x60" + "\x6c\x2f\xc9\x41\x37\x51\x49\x54" + "\x09\x81\x21\xf3\x03\x9f\x2b\xe3" + "\x1f\x39\x63\xaf\xf4\xd7\x53\x60" + "\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d" + "\x75\x54\x65\x93\xfe\xb1\x68\x6b" + "\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf" + "\x01\x12\x27\xb4\xfe\xe4\x79\x7a" + "\x40\x5b\x51\x4b\xdf\x38\xec\xb1" + "\x6a\x56\xff\x35\x4d\x42\x33\xaa" + "\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35" + "\x62\x10\xd4\xec\xeb\xc5\x7e\x45" + "\x1c\x6f\x17\xca\x3b\x8e\x2d\x66" + "\x4f\x4b\x36\x56\xcd\x1b\x59\xaa" + "\xd2\x9b\x17\xb9\x58\xdf\x7b\x64" + "\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e" + "\xaa\xe2\x5d\x09\x71\x32\x5f\xb6" + "\x29\xbe\xe7\xc7\x52\x7e\x91\x82" + "\x6b\x6d\x33\xe1\x34\x06\x36\x21" + "\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea" + "\x49\x2c\xb5\xca\xf7\xb0\x37\xea" + "\x1f\xed\x10\x04\xd9\x48\x0d\x1a" + "\x1c\xfb\xe7\x84\x0e\x83\x53\x74" + "\xc7\x65\xe2\x5c\xe5\xba\x73\x4c" + "\x0e\xe1\xb5\x11\x45\x61\x43\x46" + "\xaa\x25\x8f\xbd\x85\x08\xfa\x4c" + "\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb" + "\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d" + "\x38\x58\x9e\x8a\x43\xdc\x57" + "\xd1\x81\x7d\x2b\xe9\xff\x99\x3a" + "\x4b\x24\x52\x58\x55\xe1\x49\x14"; + +static struct { + const u8 *ptext; + const u8 *ctext; + + u8 key[AES_MAX_KEY_SIZE]; + u8 iv[GCM_AES_IV_SIZE]; + u8 assoc[20]; + + int klen; + int clen; + int plen; + int alen; +} const aesgcm_tv[] __initconst = { + { /* From McGrew & Viega - http://citeseer.ist.psu.edu/656989.html */ + .klen = 16, + .ctext = ctext0, + .clen = sizeof(ctext0), + }, { + .klen = 16, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext1, + .clen = sizeof(ctext1), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext2, + .plen = sizeof(ptext2), + .ctext = ctext2, + .clen = sizeof(ctext2), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext3, + .plen = sizeof(ptext3), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext3, + .clen = sizeof(ctext3), + }, { + .klen = 24, + .ctext = ctext4, + .clen = sizeof(ctext4), + }, { + .klen = 24, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext5, + .clen = sizeof(ctext5), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 24, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext6, + .plen = sizeof(ptext6), + .ctext = ctext6, + .clen = sizeof(ctext6), + }, { + .klen = 32, + .ctext = ctext7, + .clen = sizeof(ctext7), + }, { + .klen = 32, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext8, + .clen = sizeof(ctext8), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 32, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext9, + .plen = sizeof(ptext9), + .ctext = ctext9, + .clen = sizeof(ctext9), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 32, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext10, + .plen = sizeof(ptext10), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext10, + .clen = sizeof(ctext10), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 24, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext11, + .plen = sizeof(ptext11), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext11, + .clen = sizeof(ctext11), + }, { + .key = "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6" + "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e" + "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 32, + .iv = "\x00\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff", + .ptext = ptext12, + .plen = sizeof(ptext12), + .ctext = ctext12, + .clen = sizeof(ctext12), + } +}; + +static int __init libaesgcm_init(void) +{ + for (int i = 0; i < ARRAY_SIZE(aesgcm_tv); i++) { + u8 tagbuf[AES_BLOCK_SIZE]; + int plen = aesgcm_tv[i].plen; + struct aesgcm_ctx ctx; + u8 buf[sizeof(ptext12)]; + + if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen, + aesgcm_tv[i].clen - plen)) { + pr_err("aesgcm_expandkey() failed on vector %d\n", i); + return -ENODEV; + } + + if (!aesgcm_decrypt(&ctx, buf, aesgcm_tv[i].ctext, plen, + aesgcm_tv[i].assoc, aesgcm_tv[i].alen, + aesgcm_tv[i].iv, aesgcm_tv[i].ctext + plen) + || memcmp(buf, aesgcm_tv[i].ptext, plen)) { + pr_err("aesgcm_decrypt() #1 failed on vector %d\n", i); + return -ENODEV; + } + + /* encrypt in place */ + aesgcm_encrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc, + aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf); + if (memcmp(buf, aesgcm_tv[i].ctext, plen)) { + pr_err("aesgcm_encrypt() failed on vector %d\n", i); + return -ENODEV; + } + + /* decrypt in place */ + if (!aesgcm_decrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc, + aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf) + || memcmp(buf, aesgcm_tv[i].ptext, plen)) { + pr_err("aesgcm_decrypt() #2 failed on vector %d\n", i); + return -ENODEV; + } + } + return 0; +} +module_init(libaesgcm_init); + +static void __exit libaesgcm_exit(void) +{ +} +module_exit(libaesgcm_exit); +#endif From fb11cddfe24caad33667b5c9dd859562b2be6c75 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 4 Nov 2022 15:45:27 +0800 Subject: [PATCH 1399/4122] crypto: rockchip - Remove surplus dev_err() when using platform_get_irq() There is no need to call the dev_err() function directly to print a custom message when handling an error from either the platform_get_irq() or platform_get_irq_byname() functions as both are going to display an appropriate error message in case of a failure. ./drivers/crypto/rockchip/rk3288_crypto.c:351:2-9: line 351 is redundant because platform_get_irq() already prints an error Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2677 Reported-by: Abaci Robot Signed-off-by: Yang Li Acked-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 6217e73ba4c4..9f6ba770a90a 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -348,7 +348,6 @@ static int rk_crypto_probe(struct platform_device *pdev) crypto_info->irq = platform_get_irq(pdev, 0); if (crypto_info->irq < 0) { - dev_err(&pdev->dev, "control Interrupt is not available.\n"); err = crypto_info->irq; goto err_crypto; } From 557ffd5a4726f8b6f0dd1d4b632ae02c1c063233 Mon Sep 17 00:00:00 2001 From: Shashank Gupta Date: Fri, 4 Nov 2022 13:21:07 -0400 Subject: [PATCH 1400/4122] crypto: qat - remove ADF_STATUS_PF_RUNNING flag from probe The ADF_STATUS_PF_RUNNING bit is set after the successful initialization of the communication between VF to PF in adf_vf2pf_notify_init(). So, it is not required to be set after the execution of the function adf_dev_init(). Signed-off-by: Shashank Gupta Reviewed-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 2 -- drivers/crypto/qat/qat_c62xvf/adf_drv.c | 2 -- drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index fa18d8009f53..cf4ef83e186f 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 686ec752d0e9..0e642c94b929 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 18756b2e1c91..c1485e702b3e 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; From 198acab1772f22f2e91f68a2fc1331e91dad780a Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 11 Oct 2022 14:42:06 -0400 Subject: [PATCH 1401/4122] PCI: brcmstb: Enable Multi-MSI We always wanted to enable Multi-MSI but didn't have a test device until recently. In addition, there are some devices out there that will ask for multiple MSI but refuse to work if they are only granted one. Link: https://lore.kernel.org/r/20221011184211.18128-2-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Lorenzo Pieralisi Acked-by: Florian Fainelli --- drivers/pci/controller/pcie-brcmstb.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 521acd632f1a..a45ce7d61847 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -445,7 +445,8 @@ static struct irq_chip brcm_msi_irq_chip = { static struct msi_domain_info brcm_msi_domain_info = { /* Multi MSI is supported by the controller, but not by this driver */ - .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI), .chip = &brcm_msi_irq_chip, }; @@ -505,21 +506,23 @@ static struct irq_chip brcm_msi_bottom_irq_chip = { .irq_ack = brcm_msi_ack_irq, }; -static int brcm_msi_alloc(struct brcm_msi *msi) +static int brcm_msi_alloc(struct brcm_msi *msi, unsigned int nr_irqs) { int hwirq; mutex_lock(&msi->lock); - hwirq = bitmap_find_free_region(msi->used, msi->nr, 0); + hwirq = bitmap_find_free_region(msi->used, msi->nr, + order_base_2(nr_irqs)); mutex_unlock(&msi->lock); return hwirq; } -static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq) +static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq, + unsigned int nr_irqs) { mutex_lock(&msi->lock); - bitmap_release_region(msi->used, hwirq, 0); + bitmap_release_region(msi->used, hwirq, order_base_2(nr_irqs)); mutex_unlock(&msi->lock); } @@ -527,16 +530,17 @@ static int brcm_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *args) { struct brcm_msi *msi = domain->host_data; - int hwirq; + int hwirq, i; - hwirq = brcm_msi_alloc(msi); + hwirq = brcm_msi_alloc(msi, nr_irqs); if (hwirq < 0) return hwirq; - irq_domain_set_info(domain, virq, (irq_hw_number_t)hwirq, - &brcm_msi_bottom_irq_chip, domain->host_data, - handle_edge_irq, NULL, NULL); + for (i = 0; i < nr_irqs; i++) + irq_domain_set_info(domain, virq + i, hwirq + i, + &brcm_msi_bottom_irq_chip, domain->host_data, + handle_edge_irq, NULL, NULL); return 0; } @@ -546,7 +550,7 @@ static void brcm_irq_domain_free(struct irq_domain *domain, struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct brcm_msi *msi = irq_data_get_irq_chip_data(d); - brcm_msi_free(msi, d->hwirq); + brcm_msi_free(msi, d->hwirq, nr_irqs); } static const struct irq_domain_ops msi_domain_ops = { From 3ae140ad827b359bc4fa7c7985691c4c1e3ca8f4 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 11 Oct 2022 14:42:07 -0400 Subject: [PATCH 1402/4122] PCI: brcmstb: Wait for 100ms following PERST# deassert Be prudent and give some time for power and clocks to become stable. As described in the PCIe CEM specification sections 2.2 and 2.2.1; as well as PCIe r5.0, 6.6.1. Link: https://lore.kernel.org/r/20221011184211.18128-3-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Lorenzo Pieralisi Acked-by: Florian Fainelli --- drivers/pci/controller/pcie-brcmstb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index a45ce7d61847..39b545713ba0 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -1037,8 +1037,15 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) pcie->perst_set(pcie, 0); /* - * Give the RC/EP time to wake up, before trying to configure RC. - * Intermittently check status for link-up, up to a total of 100ms. + * Wait for 100ms after PERST# deassertion; see PCIe CEM specification + * sections 2.2, PCIe r5.0, 6.6.1. + */ + msleep(100); + + /* + * Give the RC/EP even more time to wake up, before trying to + * configure RC. Intermittently check status for link-up, up to a + * total of 100ms. */ for (i = 0; i < 100 && !brcm_pcie_link_up(pcie); i += 5) msleep(5); From ca5dcc76314d1fa6d7307fd3b95039b08d2f2b97 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 11 Oct 2022 14:42:08 -0400 Subject: [PATCH 1403/4122] PCI: brcmstb: Replace status loops with read_poll_timeout_atomic() It would be nice to replace the PCIe link-up loop as well but there are too many uses of this that do not poll (and the read_poll_timeout uses "timeout==0" to loop forever). Link: https://lore.kernel.org/r/20221011184211.18128-4-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Lorenzo Pieralisi Acked-by: Florian Fainelli --- drivers/pci/controller/pcie-brcmstb.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 39b545713ba0..c7210cec1f58 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -302,42 +303,34 @@ static u32 brcm_pcie_mdio_form_pkt(int port, int regad, int cmd) /* negative return value indicates error */ static int brcm_pcie_mdio_read(void __iomem *base, u8 port, u8 regad, u32 *val) { - int tries; u32 data; + int err; writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_READ), base + PCIE_RC_DL_MDIO_ADDR); readl(base + PCIE_RC_DL_MDIO_ADDR); - - data = readl(base + PCIE_RC_DL_MDIO_RD_DATA); - for (tries = 0; !MDIO_RD_DONE(data) && tries < 10; tries++) { - udelay(10); - data = readl(base + PCIE_RC_DL_MDIO_RD_DATA); - } - + err = readl_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_RD_DATA, data, + MDIO_RD_DONE(data), 10, 100); *val = FIELD_GET(MDIO_DATA_MASK, data); - return MDIO_RD_DONE(data) ? 0 : -EIO; + + return err; } /* negative return value indicates error */ static int brcm_pcie_mdio_write(void __iomem *base, u8 port, u8 regad, u16 wrdata) { - int tries; u32 data; + int err; writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_WRITE), base + PCIE_RC_DL_MDIO_ADDR); readl(base + PCIE_RC_DL_MDIO_ADDR); writel(MDIO_DATA_DONE_MASK | wrdata, base + PCIE_RC_DL_MDIO_WR_DATA); - data = readl(base + PCIE_RC_DL_MDIO_WR_DATA); - for (tries = 0; !MDIO_WT_DONE(data) && tries < 10; tries++) { - udelay(10); - data = readl(base + PCIE_RC_DL_MDIO_WR_DATA); - } - - return MDIO_WT_DONE(data) ? 0 : -EIO; + err = readw_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_WR_DATA, data, + MDIO_WT_DONE(data), 10, 100); + return err; } /* From 137b57413f569d558c1054e2a181313574eb9a87 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 11 Oct 2022 14:42:09 -0400 Subject: [PATCH 1404/4122] PCI: brcmstb: Drop needless 'inline' annotations A number of inline functions are called rarely and/or are not time-critical. Take out the "inline" and let the compiler do its work. Link: https://lore.kernel.org/r/20221011184211.18128-5-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Lorenzo Pieralisi Reviewed-by: Bjorn Helgaas Acked-by: Florian Fainelli --- drivers/pci/controller/pcie-brcmstb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index c7210cec1f58..e3045f1eadbc 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -723,7 +723,7 @@ static void __iomem *brcm7425_pcie_map_bus(struct pci_bus *bus, return base + DATA_ADDR(pcie); } -static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val) { u32 tmp, mask = RGR1_SW_INIT_1_INIT_GENERIC_MASK; u32 shift = RGR1_SW_INIT_1_INIT_GENERIC_SHIFT; @@ -733,7 +733,7 @@ static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val) { u32 tmp, mask = RGR1_SW_INIT_1_INIT_7278_MASK; u32 shift = RGR1_SW_INIT_1_INIT_7278_SHIFT; @@ -743,7 +743,7 @@ static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) { if (WARN_ONCE(!pcie->perst_reset, "missing PERST# reset controller\n")) return; @@ -754,7 +754,7 @@ static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) reset_control_deassert(pcie->perst_reset); } -static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) { u32 tmp; @@ -764,7 +764,7 @@ static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) writel(tmp, pcie->base + PCIE_MISC_PCIE_CTRL); } -static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) { u32 tmp; @@ -773,7 +773,7 @@ static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, +static int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, u64 *rc_bar2_size, u64 *rc_bar2_offset) { From 602fb860945fd6dce7989fcd3727d5fe4282f785 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 11 Oct 2022 14:42:10 -0400 Subject: [PATCH 1405/4122] PCI: brcmstb: Set RCB_{MPS,64B}_MODE bits Set RCB_MPS mode bit so that data for PCIe read requests up to the size of the Maximum Payload Size (MPS) are returned in one completion, and data for PCIe read requests greater than the MPS are split at the specified Read Completion Boundary setting. Set RCB_64B so that the Read Compeletion Boundary is 64B. Link: https://lore.kernel.org/r/20221011184211.18128-6-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Lorenzo Pieralisi Acked-by: Florian Fainelli --- drivers/pci/controller/pcie-brcmstb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index e3045f1eadbc..edf283e2b5dd 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -53,6 +53,8 @@ #define PCIE_RC_DL_MDIO_RD_DATA 0x1108 #define PCIE_MISC_MISC_CTRL 0x4008 +#define PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK 0x80 +#define PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK 0x400 #define PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK 0x1000 #define PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK 0x2000 #define PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK 0x300000 @@ -900,11 +902,16 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie) else burst = 0x2; /* 512 bytes */ - /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */ + /* + * Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN, + * RCB_MPS_MODE, RCB_64B_MODE + */ tmp = readl(base + PCIE_MISC_MISC_CTRL); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK); u32p_replace_bits(&tmp, burst, PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK); + u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK); + u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK); writel(tmp, base + PCIE_MISC_MISC_CTRL); ret = brcm_pcie_get_rc_bar2_size_and_offset(pcie, &rc_bar2_size, From d899aa668498c07ff217b666ae9712990306e682 Mon Sep 17 00:00:00 2001 From: Nirmal Patel Date: Wed, 9 Nov 2022 07:26:52 -0700 Subject: [PATCH 1406/4122] PCI: vmd: Disable MSI remapping after suspend MSI remapping is disabled by VMD driver for Intel's Icelake and newer systems in order to improve performance by setting VMCONFIG_MSI_REMAP. By design VMCONFIG_MSI_REMAP register is cleared by firmware during boot. The same register gets cleared when system is put in S3 power state. VMD driver needs to set this register again in order to avoid interrupt issues with devices behind VMD if MSI remapping was disabled before. Link: https://lore.kernel.org/r/20221109142652.450998-1-nirmal.patel@linux.intel.com Fixes: ee81ee84f873 ("PCI: vmd: Disable MSI-X remapping when possible") Signed-off-by: Nirmal Patel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Francisco Munoz --- drivers/pci/controller/vmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e06e9f4fc50f..98e0746e681c 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -980,6 +980,11 @@ static int vmd_resume(struct device *dev) struct vmd_dev *vmd = pci_get_drvdata(pdev); int err, i; + if (vmd->irq_domain) + vmd_set_msi_remapping(vmd, true); + else + vmd_set_msi_remapping(vmd, false); + for (i = 0; i < vmd->msix_count; i++) { err = devm_request_irq(dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, From 3a936b2a5a581e50dd5cab20a48eb0055a527f02 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Nov 2022 10:07:04 +0100 Subject: [PATCH 1407/4122] dt-bindings: PCI: qcom: Add SC8280XP/SA8540P interconnects Add the missing SC8280XP/SA8540P "pcie-mem" and "cpu-pcie" interconnect paths to the bindings. Link: https://lore.kernel.org/r/20221102090705.23634-2-johan+linaro@kernel.org Fixes: 76d777ae045e ("dt-bindings: PCI: qcom: Add SC8280XP to binding") Fixes: 76c4207f4085 ("dt-bindings: PCI: qcom: Add SA8540P to binding") Signed-off-by: Johan Hovold Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Manivannan Sadhasivam --- .../devicetree/bindings/pci/qcom,pcie.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml index 54f07852d279..2f851c804bb0 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml @@ -62,6 +62,14 @@ properties: minItems: 3 maxItems: 13 + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: pcie-mem + - const: cpu-pcie + resets: minItems: 1 maxItems: 12 @@ -631,6 +639,18 @@ allOf: items: - const: pci # PCIe core reset + - if: + properties: + compatible: + contains: + enum: + - qcom,pcie-sa8540p + - qcom,pcie-sc8280xp + then: + required: + - interconnects + - interconnect-names + - if: not: properties: From c4860af88d0cb1bb006df12615c5515ae509f73b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Nov 2022 10:07:05 +0100 Subject: [PATCH 1408/4122] PCI: qcom: Add basic interconnect support On Qualcomm platforms like SC8280XP and SA8540P, interconnect bandwidth must be requested before enabling interconnect clocks. Add basic support for managing an optional "pcie-mem" interconnect path by setting a low constraint before enabling clocks and updating it after the link is up. Note that it is not possible for a controller driver to set anything but a maximum peak bandwidth as expected average bandwidth will vary with use case and actual use (and power policy?). This very much remains an unresolved problem with the interconnect framework. Also note that no constraint is set for the SC8280XP/SA8540P "cpu-pcie" path for now as it is not clear what an appropriate constraint would be (and the system does not crash when left unspecified). Link: https://lore.kernel.org/r/20221102090705.23634-3-johan+linaro@kernel.org Fixes: 70574511f3fc ("PCI: qcom: Add support for SC8280XP") Signed-off-by: Johan Hovold Signed-off-by: Lorenzo Pieralisi Reviewed-by: Brian Masney Reviewed-by: Manivannan Sadhasivam Acked-by: Georgi Djakov --- drivers/pci/controller/dwc/pcie-qcom.c | 76 ++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index f711acacaeaf..ec230b988ba2 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,7 @@ struct qcom_pcie { union qcom_pcie_resources res; struct phy *phy; struct gpio_desc *reset; + struct icc_path *icc_mem; const struct qcom_pcie_cfg *cfg; }; @@ -1639,6 +1641,74 @@ static const struct dw_pcie_ops dw_pcie_ops = { .start_link = qcom_pcie_start_link, }; +static int qcom_pcie_icc_init(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + int ret; + + pcie->icc_mem = devm_of_icc_get(pci->dev, "pcie-mem"); + if (IS_ERR(pcie->icc_mem)) + return PTR_ERR(pcie->icc_mem); + + /* + * Some Qualcomm platforms require interconnect bandwidth constraints + * to be set before enabling interconnect clocks. + * + * Set an initial peak bandwidth corresponding to single-lane Gen 1 + * for the pcie-mem path. + */ + ret = icc_set_bw(pcie->icc_mem, 0, MBps_to_icc(250)); + if (ret) { + dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n", + ret); + return ret; + } + + return 0; +} + +static void qcom_pcie_icc_update(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + u32 offset, status, bw; + int speed, width; + int ret; + + if (!pcie->icc_mem) + return; + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA); + + /* Only update constraints if link is up. */ + if (!(status & PCI_EXP_LNKSTA_DLLLA)) + return; + + speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status); + width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status); + + switch (speed) { + case 1: + bw = MBps_to_icc(250); + break; + case 2: + bw = MBps_to_icc(500); + break; + default: + WARN_ON_ONCE(1); + fallthrough; + case 3: + bw = MBps_to_icc(985); + break; + } + + ret = icc_set_bw(pcie->icc_mem, 0, width * bw); + if (ret) { + dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n", + ret); + } +} + static int qcom_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1699,6 +1769,10 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_pm_runtime_put; } + ret = qcom_pcie_icc_init(pcie); + if (ret) + goto err_pm_runtime_put; + ret = pcie->cfg->ops->get_resources(pcie); if (ret) goto err_pm_runtime_put; @@ -1717,6 +1791,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_phy_exit; } + qcom_pcie_icc_update(pcie); + return 0; err_phy_exit: From 2759ddf7535d63381f9b9b1412e4c46e13ed773a Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Mon, 15 Aug 2022 11:50:06 +0900 Subject: [PATCH 1409/4122] PCI: endpoint: Fix Kconfig indent style Change to follow the Kconfig style guide. This patch fixes to use tab rather than space to indent, while help text is indented an additional two spaces. Link: https://lore.kernel.org/r/20220815025006.48167-1-mie@igel.co.jp Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP") Signed-off-by: Shunsuke Mie Signed-off-by: Lorenzo Pieralisi --- drivers/pci/endpoint/functions/Kconfig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig index 295a033ee9a2..9fd560886871 100644 --- a/drivers/pci/endpoint/functions/Kconfig +++ b/drivers/pci/endpoint/functions/Kconfig @@ -27,13 +27,13 @@ config PCI_EPF_NTB If in doubt, say "N" to disable Endpoint NTB driver. config PCI_EPF_VNTB - tristate "PCI Endpoint NTB driver" - depends on PCI_ENDPOINT - depends on NTB - select CONFIGFS_FS - help - Select this configuration option to enable the Non-Transparent - Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB - between PCI Root Port and PCIe Endpoint. + tristate "PCI Endpoint NTB driver" + depends on PCI_ENDPOINT + depends on NTB + select CONFIGFS_FS + help + Select this configuration option to enable the Non-Transparent + Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB + between PCI Root Port and PCIe Endpoint. - If in doubt, say "N" to disable Endpoint NTB driver. + If in doubt, say "N" to disable Endpoint NTB driver. From ae6b9a65af480144da323436d90e149501ea8937 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 1 Nov 2022 10:57:14 +0100 Subject: [PATCH 1410/4122] PCI: imx6: Initialize PHY before deasserting core reset When the PHY is the reference clock provider then it must be initialized and powered on before the reset on the client is deasserted, otherwise the link will never come up. The order was changed in cf236e0c0d59. Restore the correct order to make the driver work again on boards where the PHY provides the reference clock. This also changes the order for boards where the Soc is the PHY reference clock divider, but this shouldn't do any harm. Link: https://lore.kernel.org/r/20221101095714.440001-1-s.hauer@pengutronix.de Fixes: cf236e0c0d59 ("PCI: imx6: Do not hide PHY driver callbacks and refine the error handling") Tested-by: Richard Zhu Signed-off-by: Sascha Hauer Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/pci-imx6.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 2616585ca5f8..1dde5c579edc 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -952,12 +952,6 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) } } - ret = imx6_pcie_deassert_core_reset(imx6_pcie); - if (ret < 0) { - dev_err(dev, "pcie deassert core reset failed: %d\n", ret); - goto err_phy_off; - } - if (imx6_pcie->phy) { ret = phy_power_on(imx6_pcie->phy); if (ret) { @@ -965,6 +959,13 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) goto err_phy_off; } } + + ret = imx6_pcie_deassert_core_reset(imx6_pcie); + if (ret < 0) { + dev_err(dev, "pcie deassert core reset failed: %d\n", ret); + goto err_phy_off; + } + imx6_setup_phy_mpll(imx6_pcie); return 0; From 83f8a81dece8bc4237d8d94af357fb5df0083e63 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Thu, 3 Nov 2022 13:12:10 +0000 Subject: [PATCH 1411/4122] KVM: arm64: Fix pvtime documentation This includes table format and using reST labels for cross-referencing to vcpu.rst. Suggested-by: Bagas Sanjaya Signed-off-by: Usama Arif Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103131210.3603385-1-usama.arif@bytedance.com --- Documentation/virt/kvm/arm/pvtime.rst | 14 ++++++++------ Documentation/virt/kvm/devices/vcpu.rst | 2 ++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst index 392521af7c90..e88b34e586be 100644 --- a/Documentation/virt/kvm/arm/pvtime.rst +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -23,21 +23,23 @@ the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. PV_TIME_FEATURES - ============= ======== ========== + + ============= ======== ================================================= Function ID: (uint32) 0xC5000020 PV_call_id: (uint32) The function to query for support. Currently only PV_TIME_ST is supported. Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant PV-time feature is supported by the hypervisor. - ============= ======== ========== + ============= ======== ================================================= PV_TIME_ST - ============= ======== ========== + + ============= ======== ============================================== Function ID: (uint32) 0xC5000021 Return value: (int64) IPA of the stolen time data structure for this VCPU. On failure: NOT_SUPPORTED (-1) - ============= ======== ========== + ============= ======== ============================================== The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory with inner and outer write back caching attributes, in the inner shareable @@ -76,5 +78,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of these structures and not used for other purposes, this enables the guest to map the region using 64k pages and avoids conflicting attributes with other memory. -For the user space interface see Documentation/virt/kvm/devices/vcpu.rst -section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL". +For the user space interface see +:ref:`Documentation/virt/kvm/devices/vcpu.rst `. \ No newline at end of file diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 716aa3edae14..31f14ec4a65b 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -171,6 +171,8 @@ configured values on other VCPUs. Userspace should configure the interrupt numbers on at least one VCPU after creating all VCPUs and before running any VCPUs. +.. _kvm_arm_vcpu_pvtime_ctrl: + 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL ================================== From e1b3253340029b06f5f648d8390807cba4e4ec23 Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Sun, 6 Nov 2022 20:30:40 +0800 Subject: [PATCH 1412/4122] KVM: arm64: Fix typo in comment Fix typo in comment (nVHE/VHE). Signed-off-by: Zhiyuan Dai Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1667737840-702-1-git-send-email-daizhiyuan@phytium.com.cn --- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 96bec0ecf9dd..3b9e5464b5b3 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# Makefile for Kernel-based Virtual Machine module, HYP/VHE part # asflags-y := -D__KVM_VHE_HYPERVISOR__ From 0f4f7ae10ee4e6403659b2d9ddf05424eecde45b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:34 +0000 Subject: [PATCH 1413/4122] KVM: arm64: Move hyp refcount manipulation helpers to common header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will soon need to manipulate 'struct hyp_page' refcounts from outside page_alloc.c, so move the helpers to a common header file to allow them to be reused easily. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Oliver Upton Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-2-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 22 ++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/page_alloc.c | 19 ------------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 592b7edb3edb..9422900e5c6a 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -38,6 +38,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) #define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) #define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) +/* + * Refcounting for 'struct hyp_page'. + * hyp_pool::lock must be held if atomic access to the refcount is required. + */ static inline int hyp_page_count(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); @@ -45,4 +49,22 @@ static inline int hyp_page_count(void *addr) return p->refcount; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + BUG_ON(!p->refcount); + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} #endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index d40f0b30b534..1ded09fc9b10 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -144,25 +144,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - BUG_ON(p->refcount == USHRT_MAX); - p->refcount++; -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - BUG_ON(!p->refcount); - p->refcount--; - return (p->refcount == 0); -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - BUG_ON(p->refcount); - p->refcount = 1; -} - static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) { if (hyp_page_ref_dec_and_test(p)) From 72a5bc0f153ce8ca80e9abbd1d9adec7d586915a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:35 +0000 Subject: [PATCH 1414/4122] KVM: arm64: Allow attaching of non-coalescable pages to a hyp pool All the contiguous pages used to initialize a 'struct hyp_pool' are considered coalescable, which means that the hyp page allocator will actively try to merge them with their buddies on the hyp_put_page() path. However, using hyp_put_page() on a page that is not part of the inital memory range given to a hyp_pool() is currently unsupported. In order to allow dynamically extending hyp pools at run-time, add a check to __hyp_attach_page() to allow inserting 'external' pages into the free-list of order 0. This will be necessary to allow lazy donation of pages from the host to the hypervisor when allocating guest stage-2 page-table pages at EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-3-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 1ded09fc9b10..dad88e203598 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -93,11 +93,16 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { + phys_addr_t phys = hyp_page_to_phys(p); unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + /* Skip coalescing for 'external' pages being freed into the pool. */ + if (phys < pool->range_start || phys >= pool->range_end) + goto insert; + /* * Only the first struct hyp_page of a high-order page (otherwise known * as the 'head') should have p->order set. The non-head pages should @@ -116,6 +121,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, p = min(p, buddy); } +insert: /* Mark the new head, and insert it */ p->order = order; page_add_to_list(p, &pool->free_area[order]); From 8e6bcc3a4502a0d8d065466efd888b6b59b85789 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:36 +0000 Subject: [PATCH 1415/4122] KVM: arm64: Back the hypervisor 'struct hyp_page' array for all memory The EL2 'vmemmap' array in nVHE Protected mode is currently very sparse: only memory pages owned by the hypervisor itself have a matching 'struct hyp_page'. However, as the size of this struct has been reduced significantly since its introduction, it appears that we can now afford to back the vmemmap for all of memory. Having an easily accessible 'struct hyp_page' for every physical page in memory provides the hypervisor with a simple mechanism to store metadata (e.g. a refcount) that wouldn't otherwise fit in the very limited number of software bits available in the host stage-2 page-table entries. This will be used in subsequent patches when pinning host memory pages for use by the hypervisor at EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-4-will@kernel.org --- arch/arm64/include/asm/kvm_pkvm.h | 26 +++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mm.h | 14 +------------ arch/arm64/kvm/hyp/nvhe/mm.c | 31 ++++++++++++++++++++++++---- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 4 +--- arch/arm64/kvm/hyp/nvhe/setup.c | 7 +++---- arch/arm64/kvm/pkvm.c | 18 ++-------------- 6 files changed, 60 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59..8f7b8a2314bb 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -14,6 +14,32 @@ extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 42d8eb9bfe72..b2ee6d5df55b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -15,7 +15,7 @@ extern hyp_spinlock_t pkvm_pgd_lock; int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int hyp_back_vmemmap(phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); @@ -24,16 +24,4 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, unsigned long *haddr); int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); -static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, - unsigned long *start, unsigned long *end) -{ - unsigned long nr_pages = size >> PAGE_SHIFT; - struct hyp_page *p = hyp_phys_to_page(phys); - - *start = (unsigned long)p; - *end = *start + nr_pages * sizeof(struct hyp_page); - *start = ALIGN_DOWN(*start, PAGE_SIZE); - *end = ALIGN(*end, PAGE_SIZE); -} - #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 96193cb31a39..d3a3b47181de 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -129,13 +129,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return ret; } -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) +int hyp_back_vmemmap(phys_addr_t back) { - unsigned long start, end; + unsigned long i, start, size, end = 0; + int ret; - hyp_vmemmap_range(phys, size, &start, &end); + for (i = 0; i < hyp_memblock_nr; i++) { + start = hyp_memory[i].base; + start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); + /* + * The begining of the hyp_vmemmap region for the current + * memblock may already be backed by the page backing the end + * the previous region, so avoid mapping it twice. + */ + start = max(start, end); - return __pkvm_create_mappings(start, end - start, back, PAGE_HYP); + end = hyp_memory[i].base + hyp_memory[i].size; + end = PAGE_ALIGN((u64)hyp_phys_to_page(end)); + if (start >= end) + continue; + + size = end - start; + ret = __pkvm_create_mappings(start, size, back, PAGE_HYP); + if (ret) + return ret; + + memset(hyp_phys_to_virt(back), 0, size); + back += size; + } + + return 0; } static void *__hyp_bp_vect_base; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index dad88e203598..803ba3222e75 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -236,10 +236,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - for (i = 0; i < nr_pages; i++) { - p[i].order = 0; + for (i = 0; i < nr_pages; i++) hyp_set_page_refcounted(&p[i]); - } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e8d4ea2fcfa0..579eb4f73476 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -31,12 +31,11 @@ static struct hyp_pool hpool; static int divide_memory_pool(void *virt, unsigned long size) { - unsigned long vstart, vend, nr_pages; + unsigned long nr_pages; hyp_early_alloc_init(virt, size); - hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend); - nr_pages = (vend - vstart) >> PAGE_SHIFT; + nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); vmemmap_base = hyp_early_alloc_contig(nr_pages); if (!vmemmap_base) return -ENOMEM; @@ -78,7 +77,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base)); + ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base)); if (ret) return ret; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index ebecb7c045f4..34229425b25d 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -53,7 +53,7 @@ static int __init register_memblock_regions(void) void __init kvm_hyp_reserve(void) { - u64 nr_pages, prev, hyp_mem_pages = 0; + u64 hyp_mem_pages = 0; int ret; if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) @@ -71,21 +71,7 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); - - /* - * The hyp_vmemmap needs to be backed by pages, but these pages - * themselves need to be present in the vmemmap, so compute the number - * of pages needed by looking for a fixed point. - */ - nr_pages = 0; - do { - prev = nr_pages; - nr_pages = hyp_mem_pages + prev; - nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE, - PAGE_SIZE); - nr_pages += __hyp_pgtable_max_pages(nr_pages); - } while (nr_pages != prev); - hyp_mem_pages += nr_pages; + hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); /* * Try to allocate a PMD-aligned region to reduce TLB pressure once From 0d16d12eb26ef85602ef8a678d94825a66772774 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:37 +0000 Subject: [PATCH 1416/4122] KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2 In order to allow unmapping arbitrary memory pages from the hypervisor stage-1 page-table, fix-up the initial refcount for pages that have been mapped before the 'vmemmap' array was up and running so that it accurately accounts for all existing hypervisor mappings. This is achieved by traversing the entire hypervisor stage-1 page-table during initialisation of EL2 and updating the corresponding 'struct hyp_page' for each valid mapping. Reviewed-by: Oliver Upton Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-5-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 62 +++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 579eb4f73476..8f2726d7e201 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -185,12 +185,11 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) { - struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; kvm_pte_t pte = *ptep; @@ -199,15 +198,6 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, if (!kvm_pte_valid(pte)) return 0; - /* - * Fix-up the refcount for the page-table pages as the early allocator - * was unable to access the hyp_vmemmap and so the buddy allocator has - * initialised the refcount to '1'. - */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) - return 0; - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -236,12 +226,30 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } -static int finalize_host_mappings(void) +static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + if (kvm_pte_valid(pte)) + mm_ops->get_page(ptep); + + return 0; +} + +static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, + .cb = fix_host_ownership_walker, + .flags = KVM_PGTABLE_WALK_LEAF, }; int i, ret; @@ -257,6 +265,18 @@ static int finalize_host_mappings(void) return 0; } +static int fix_hyp_pgtable_refcnt(void) +{ + struct kvm_pgtable_walker walker = { + .cb = fix_hyp_pgtable_refcnt_walker, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), + &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -286,7 +306,11 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = finalize_host_mappings(); + ret = fix_host_ownership(); + if (ret) + goto out; + + ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; From 33bc332d4061e95db55594893c4f80105b1dd813 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:38 +0000 Subject: [PATCH 1417/4122] KVM: arm64: Unify identifiers used to distinguish host and hypervisor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'pkvm_component_id' enum type provides constants to refer to the host and the hypervisor, yet this information is duplicated by the 'pkvm_hyp_id' constant. Remove the definition of 'pkvm_hyp_id' and move the 'pkvm_component_id' type definition to 'mem_protect.h' so that it can be used outside of the memory protection code, for example when initialising the owner for hypervisor-owned pages. Reviewed-by: Oliver Upton Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-6-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 +++++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 8 -------- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 80e99836eac7..f5705a1e972f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -51,7 +51,11 @@ struct host_kvm { }; extern struct host_kvm host_kvm; -extern const u8 pkvm_hyp_id; +/* This corresponds to page-table locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, +}; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 1e78acf9662e..ff86f5bd230f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -26,8 +26,6 @@ struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; -const u8 pkvm_hyp_id = 1; - static void host_lock_component(void) { hyp_spin_lock(&host_kvm.lock); @@ -380,12 +378,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) BUG_ON(ret && ret != -EAGAIN); } -/* This corresponds to locking order */ -enum pkvm_component_id { - PKVM_ID_HOST, - PKVM_ID_HYP, -}; - struct pkvm_mem_transition { u64 nr_pages; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 8f2726d7e201..0312c9c74a5a 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -212,7 +212,7 @@ static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); switch (state) { case PKVM_PAGE_OWNED: - return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); break; From 1ed5c24c26f48ff61dc5d97c655769821f36a622 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:39 +0000 Subject: [PATCH 1418/4122] KVM: arm64: Implement do_donate() helper for donating memory Transferring ownership information of a memory region from one component to another can be achieved using a "donate" operation, which results in the previous owner losing access to the underlying pages entirely and the new owner having exclusive access to the page. Implement a do_donate() helper, along the same lines as do_{un,}share, and provide this functionality for the host-{to,from}-hyp cases as this will later be used to donate/reclaim memory pages to store VM metadata at EL2. In a similar manner to the sharing transitions, permission checks are performed by the hypervisor to ensure that the component initiating the transition really is the owner of the page and also that the completer does not currently have a page mapped at the target address. Tested-by: Vincent Donnefort Co-developed-by: Quentin Perret Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-7-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 239 ++++++++++++++++++ 2 files changed, 241 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index f5705a1e972f..c87b19b2d468 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -60,6 +60,8 @@ enum pkvm_component_id { int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ff86f5bd230f..10069cd32787 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -391,6 +391,9 @@ struct pkvm_mem_transition { /* Address in the completer's address space */ u64 completer_addr; } host; + struct { + u64 completer_addr; + } hyp; }; } initiator; @@ -404,6 +407,10 @@ struct pkvm_mem_share { const enum kvm_pgtable_prot completer_prot; }; +struct pkvm_mem_donation { + const struct pkvm_mem_transition tx; +}; + struct check_walk_data { enum pkvm_page_state desired; enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); @@ -503,6 +510,46 @@ static int host_initiate_unshare(u64 *completer_addr, return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); } +static int host_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u8 owner_id = tx->completer.id; + u64 size = tx->nr_pages * PAGE_SIZE; + + *completer_addr = tx->initiator.host.completer_addr; + return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); +} + +static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HYP); +} + +static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, + enum pkvm_page_state state) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__host_ack_skip_pgtable_check(tx)) + return 0; + + return __host_check_page_state_range(addr, size, state); +} + +static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + return __host_ack_transition(addr, tx, PKVM_NOPAGE); +} + +static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u8 host_id = tx->completer.id; + + return host_stage2_set_owner_locked(addr, size, host_id); +} + static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) { if (!kvm_pte_valid(pte)) @@ -523,6 +570,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size, return check_page_state_range(&pkvm_pgtable, addr, size, &d); } +static int hyp_request_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.hyp.completer_addr; + return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int hyp_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret; + + *completer_addr = tx->initiator.hyp.completer_addr; + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); + return (ret != size) ? -EFAULT : 0; +} + static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) { return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || @@ -554,6 +622,16 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) PKVM_PAGE_SHARED_BORROWED); } +static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, enum kvm_pgtable_prot perms) { @@ -572,6 +650,15 @@ static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) return (ret != size) ? -EFAULT : 0; } +static int hyp_complete_donation(u64 addr, + const struct pkvm_mem_transition *tx) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + + return pkvm_create_mappings_locked(start, end, prot); +} + static int check_share(struct pkvm_mem_share *share) { const struct pkvm_mem_transition *tx = &share->tx; @@ -724,6 +811,94 @@ static int do_unshare(struct pkvm_mem_share *share) return WARN_ON(__do_unshare(share)); } +static int check_donation(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_request_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_ack_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_ack_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_donate(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_donation(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_initiate_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_complete_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_complete_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_donate(): + * + * The page owner transfers ownership to another component, losing access + * as a consequence. + * + * Initiator: OWNED => NOPAGE + * Completer: NOPAGE => OWNED + */ +static int do_donate(struct pkvm_mem_donation *donation) +{ + int ret; + + ret = check_donation(donation); + if (ret) + return ret; + + return WARN_ON(__do_donate(donation)); +} + int __pkvm_host_share_hyp(u64 pfn) { int ret; @@ -789,3 +964,67 @@ int __pkvm_host_unshare_hyp(u64 pfn) return ret; } + +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HYP, + .addr = hyp_addr, + .hyp = { + .completer_addr = host_addr, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} From 43c1ff8b75011bc3e3e923adf31ba815864a2494 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:40 +0000 Subject: [PATCH 1419/4122] KVM: arm64: Prevent the donation of no-map pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory regions marked as "no-map" in the host device-tree routinely include TrustZone carev-outs and DMA pools. Although donating such pages to the hypervisor may not breach confidentiality, it could be used to corrupt its state in uncontrollable ways. To prevent this, let's block host-initiated memory transitions targeting "no-map" pages altogether in nVHE protected mode as there should be no valid reason to do this in current operation. Thankfully, the pKVM EL2 hypervisor has a full copy of the host's list of memblock regions, so we can easily check for the presence of the MEMBLOCK_NOMAP flag on a region containing pages being donated from the host. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-8-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 10069cd32787..f7e3afaf9f11 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ struct kvm_mem_range { u64 end; }; -static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) +static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; struct memblock_region *reg; @@ -216,18 +216,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) } else { range->start = reg->base; range->end = end; - return true; + return reg; } } - return false; + return NULL; } bool addr_is_memory(phys_addr_t phys) { struct kvm_mem_range range; - return find_mem_range(phys, &range); + return !!find_mem_range(phys, &range); +} + +static bool addr_is_allowed_memory(phys_addr_t phys) +{ + struct memblock_region *reg; + struct kvm_mem_range range; + + reg = find_mem_range(phys, &range); + + return reg && !(reg->flags & MEMBLOCK_NOMAP); } static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) @@ -346,7 +356,7 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr static int host_stage2_idmap(u64 addr) { struct kvm_mem_range range; - bool is_memory = find_mem_range(addr, &range); + bool is_memory = !!find_mem_range(addr, &range); enum kvm_pgtable_prot prot; int ret; @@ -424,7 +434,7 @@ static int __check_page_state_visitor(u64 addr, u64 end, u32 level, struct check_walk_data *d = arg; kvm_pte_t pte = *ptep; - if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte))) return -EINVAL; return d->get_page_state(pte) == d->desired ? 0 : -EPERM; From 9926cfce8dcb880255f30ab9ac930add787e1ead Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:41 +0000 Subject: [PATCH 1420/4122] KVM: arm64: Add helpers to pin memory shared with the hypervisor at EL2 Add helpers allowing the hypervisor to check whether a range of pages are currently shared by the host, and 'pin' them if so by blocking host unshare operations until the memory has been unpinned. This will allow the hypervisor to take references on host-provided data-structures (e.g. 'struct kvm') with the guarantee that these pages will remain in a stable state until the hypervisor decides to release them, for example during guest teardown. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-9-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 3 ++ arch/arm64/kvm/hyp/include/nvhe/memory.h | 7 ++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 48 +++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index c87b19b2d468..998bf165af71 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -69,6 +69,9 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); +int hyp_pin_shared_mem(void *from, void *to); +void hyp_unpin_shared_mem(void *from, void *to); + static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 9422900e5c6a..ab205c4d6774 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -55,10 +55,15 @@ static inline void hyp_page_ref_inc(struct hyp_page *p) p->refcount++; } -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +static inline void hyp_page_ref_dec(struct hyp_page *p) { BUG_ON(!p->refcount); p->refcount--; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + hyp_page_ref_dec(p); return (p->refcount == 0); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f7e3afaf9f11..83c2f67e1b58 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -625,6 +625,9 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) { u64 size = tx->nr_pages * PAGE_SIZE; + if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) + return -EBUSY; + if (__hyp_ack_skip_pgtable_check(tx)) return 0; @@ -1038,3 +1041,48 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) return ret; } + +int hyp_pin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + u64 size = end - start; + int ret; + + host_lock_component(); + hyp_lock_component(); + + ret = __host_check_page_state_range(__hyp_pa(start), size, + PKVM_PAGE_SHARED_OWNED); + if (ret) + goto unlock; + + ret = __hyp_check_page_state_range(start, size, + PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_inc(hyp_virt_to_page(cur)); + +unlock: + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +void hyp_unpin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + + host_lock_component(); + hyp_lock_component(); + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_dec(hyp_virt_to_page(cur)); + + hyp_unlock_component(); + host_unlock_component(); +} From 4d968b12e6bbe4440f4f220c41d779e02df8af1a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:42 +0000 Subject: [PATCH 1421/4122] KVM: arm64: Include asm/kvm_mmu.h in nvhe/mem_protect.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvhe/mem_protect.h refers to __load_stage2() in the definition of __load_host_stage2() but doesn't include the relevant header. Include asm/kvm_mmu.h in nvhe/mem_protect.h so that users of the latter don't have to do this themselves. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-10-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 998bf165af71..3bea816296dc 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -8,6 +8,7 @@ #define __KVM_NVHE_MEM_PROTECT__ #include #include +#include #include #include #include From 1c80002e3264552d8b9c0e162e09aa4087403716 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:43 +0000 Subject: [PATCH 1422/4122] KVM: arm64: Add hyp_spinlock_t static initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a static initializer macro for 'hyp_spinlock_t' so that it is straightforward to instantiate global locks at EL2. This will be later utilised for locking the VM table in the hypervisor. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-11-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 4652fd04bdbe..7c7ea8c55405 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -28,9 +28,17 @@ typedef union hyp_spinlock { }; } hyp_spinlock_t; +#define __HYP_SPIN_LOCK_INITIALIZER \ + { .__val = 0 } + +#define __HYP_SPIN_LOCK_UNLOCKED \ + ((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER) + +#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED + #define hyp_spin_lock_init(l) \ do { \ - *(l) = (hyp_spinlock_t){ .__val = 0 }; \ + *(l) = __HYP_SPIN_LOCK_UNLOCKED; \ } while (0) static inline void hyp_spin_lock(hyp_spinlock_t *lock) From 5304002dc3754a5663d75c977bfa2d9e3c08906d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:44 +0000 Subject: [PATCH 1423/4122] KVM: arm64: Rename 'host_kvm' to 'host_mmu' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for introducing VM and vCPU state at EL2, rename the existing 'struct host_kvm' and its singleton 'host_kvm' instance to 'host_mmu' so as to avoid confusion between the structure tracking the host stage-2 MMU state and the host instance of a 'struct kvm' for a protected guest. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-12-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 +-- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 46 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 3bea816296dc..0a6d3e7f2a43 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -44,13 +44,13 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) return prot & PKVM_PAGE_STATE_PROT_MASK; } -struct host_kvm { +struct host_mmu { struct kvm_arch arch; struct kvm_pgtable pgt; struct kvm_pgtable_mm_ops mm_ops; hyp_spinlock_t lock; }; -extern struct host_kvm host_kvm; +extern struct host_mmu host_mmu; /* This corresponds to page-table locking order */ enum pkvm_component_id { @@ -76,7 +76,7 @@ void hyp_unpin_shared_mem(void *from, void *to); static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 83c2f67e1b58..06c6a24c0eae 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -22,18 +22,18 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) extern unsigned long hyp_nr_cpus; -struct host_kvm host_kvm; +struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; static void host_lock_component(void) { - hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&host_mmu.lock); } static void host_unlock_component(void) { - hyp_spin_unlock(&host_kvm.lock); + hyp_spin_unlock(&host_mmu.lock); } static void hyp_lock_component(void) @@ -88,7 +88,7 @@ static int prepare_s2_pool(void *pgt_pool_base) if (ret) return ret; - host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { + host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, .phys_to_virt = hyp_phys_to_virt, @@ -109,7 +109,7 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, id_aa64mmfr1_el1_sys_val, phys_shift); } @@ -117,25 +117,25 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr int kvm_host_prepare_stage2(void *pgt_pool_base) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; int ret; prepare_host_vtcr(); - hyp_spin_lock_init(&host_kvm.lock); - mmu->arch = &host_kvm.arch; + hyp_spin_lock_init(&host_mmu.lock); + mmu->arch = &host_mmu.arch; ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; - ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, + &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, host_stage2_force_pte_cb); if (ret) return ret; - mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); - mmu->pgt = &host_kvm.pgt; + mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); + mmu->pgt = &host_mmu.pgt; atomic64_set(&mmu->vmid.id, 0); return 0; @@ -143,19 +143,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) int __pkvm_prot_finalize(void) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); if (params->hcr_el2 & HCR_VM) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_kvm.arch.vtcr; + params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); /* * Make sure to have an ISB before the TLB maintenance below but only @@ -173,7 +173,7 @@ int __pkvm_prot_finalize(void) static int host_stage2_unmap_dev_all(void) { - struct kvm_pgtable *pgt = &host_kvm.pgt; + struct kvm_pgtable *pgt = &host_mmu.pgt; struct memblock_region *reg; u64 addr = 0; int i, ret; @@ -258,7 +258,7 @@ static bool range_is_memory(u64 start, u64 end) static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { - return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, + return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, prot, &host_s2_pool); } @@ -271,7 +271,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, #define host_stage2_try(fn, ...) \ ({ \ int __ret; \ - hyp_assert_lock_held(&host_kvm.lock); \ + hyp_assert_lock_held(&host_mmu.lock); \ __ret = fn(__VA_ARGS__); \ if (__ret == -ENOMEM) { \ __ret = host_stage2_unmap_dev_all(); \ @@ -294,8 +294,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) u32 level; int ret; - hyp_assert_lock_held(&host_kvm.lock); - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + hyp_assert_lock_held(&host_mmu.lock); + ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); if (ret) return ret; @@ -327,7 +327,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, addr, size, &host_s2_pool, owner_id); } @@ -468,8 +468,8 @@ static int __host_check_page_state_range(u64 addr, u64 size, .get_page_state = host_get_page_state, }; - hyp_assert_lock_held(&host_kvm.lock); - return check_page_state_range(&host_kvm.pgt, addr, size, &d); + hyp_assert_lock_held(&host_mmu.lock); + return check_page_state_range(&host_mmu.pgt, addr, size, &d); } static int __host_set_page_state_range(u64 addr, u64 size, From a1ec5c70d3f63d8a143fb83cd7f53bd8ff2f72c8 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:45 +0000 Subject: [PATCH 1424/4122] KVM: arm64: Add infrastructure to create and track pKVM instances at EL2 Introduce a global table (and lock) to track pKVM instances at EL2, and provide hypercalls that can be used by the untrusted host to create and destroy pKVM VMs and their vCPUs. pKVM VM/vCPU state is directly accessible only by the trusted hypervisor (EL2). Each pKVM VM is directly associated with an untrusted host KVM instance, and is referenced by the host using an opaque handle. Future patches will provide hypercalls to allow the host to initialize/set/get pKVM VM/vCPU state using the opaque handle. Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Co-developed-by: Will Deacon Signed-off-by: Will Deacon [maz: silence warning on unmap_donated_memory_noclear()] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-13-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 3 + arch/arm64/include/asm/kvm_host.h | 8 + arch/arm64/include/asm/kvm_pgtable.h | 8 + arch/arm64/include/asm/kvm_pkvm.h | 8 + arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 3 + arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 58 +++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 31 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 380 ++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 8 + arch/arm64/kvm/hyp/pgtable.c | 9 + arch/arm64/kvm/pkvm.c | 1 + 12 files changed, 531 insertions(+) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/pkvm.h diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 53035763e48e..de52ba775d48 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -76,6 +76,9 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..d3dd7ab9c79e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -115,6 +115,8 @@ struct kvm_smccc_features { unsigned long vendor_hyp_bmap; }; +typedef unsigned int pkvm_handle_t; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -166,6 +168,12 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + + /* + * For an untrusted host VM, 'pkvm_handle' is used to lookup + * the associated pKVM instance in the hypervisor. + */ + pkvm_handle_t pkvm_handle; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe..15c389db1931 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -296,6 +296,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); */ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); +/** + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + /** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 8f7b8a2314bb..f4e3133d6550 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,6 +9,9 @@ #include #include +/* Maximum number of VMs that can co-exist under pKVM. */ +#define KVM_MAX_PVMS 255 + #define HYP_MEMBLOCK_REGIONS 128 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; @@ -40,6 +43,11 @@ static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) return res >> PAGE_SHIFT; } +static inline unsigned long hyp_vm_table_pages(void) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0a6d3e7f2a43..ce9a796a85ee 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -68,10 +69,12 @@ bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm); static __always_inline void __load_host_stage2(void) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h new file mode 100644 index 000000000000..8c653a3b9501 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_PKVM_H__ +#define __ARM64_KVM_NVHE_PKVM_H__ + +#include + +/* + * Holds the relevant data for maintaining the vcpu state completely at hyp. + */ +struct pkvm_hyp_vcpu { + struct kvm_vcpu vcpu; + + /* Backpointer to the host's (untrusted) vCPU instance. */ + struct kvm_vcpu *host_vcpu; +}; + +/* + * Holds the relevant data for running a protected vm. + */ +struct pkvm_hyp_vm { + struct kvm kvm; + + /* Backpointer to the host's (untrusted) KVM instance. */ + struct kvm *host_kvm; + + /* The guest's stage-2 page-table managed by the hypervisor. */ + struct kvm_pgtable pgt; + + /* + * The number of vcpus initialized and ready to run. + * Modifying this is protected by 'vm_table_lock'. + */ + unsigned int nr_vcpus; + + /* Array of the hyp vCPU structures for this VM. */ + struct pkvm_hyp_vcpu *vcpus[]; +}; + +static inline struct pkvm_hyp_vm * +pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); +} + +void pkvm_hyp_vm_table_init(void *tbl); + +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva); +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva); +int __pkvm_teardown_vm(pkvm_handle_t handle); + +#endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3cea4b6ac23e..b5f3fcfe9135 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -15,6 +15,7 @@ #include #include +#include #include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -191,6 +192,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); } +static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); + DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2); + DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3); + + host_kvm = kern_hyp_va(host_kvm); + cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva); +} + +static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2); + DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3); + + host_vcpu = kern_hyp_va(host_vcpu); + cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva); +} + +static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -220,6 +248,9 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), + HANDLE_FUNC(__pkvm_init_vm), + HANDLE_FUNC(__pkvm_init_vcpu), + HANDLE_FUNC(__pkvm_teardown_vm), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 06c6a24c0eae..459957b3082e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -141,6 +141,20 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) +{ + vm->pgt.pgd = pgd; + return 0; +} + +void reclaim_guest_pages(struct pkvm_hyp_vm *vm) +{ + unsigned long nr_pages; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); +} + int __pkvm_prot_finalize(void) { struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 85d3b7ae720f..135c9a095eca 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include /* @@ -183,3 +186,380 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) pvm_init_traps_aa64mmfr0(vcpu); pvm_init_traps_aa64mmfr1(vcpu); } + +/* + * Start the VM table handle at the offset defined instead of at 0. + * Mainly for sanity checking and debugging. + */ +#define HANDLE_OFFSET 0x1000 + +static unsigned int vm_handle_to_idx(pkvm_handle_t handle) +{ + return handle - HANDLE_OFFSET; +} + +static pkvm_handle_t idx_to_vm_handle(unsigned int idx) +{ + return idx + HANDLE_OFFSET; +} + +/* + * Spinlock for protecting state related to the VM table. Protects writes + * to 'vm_table' and 'nr_table_entries' as well as reads and writes to + * 'last_hyp_vcpu_lookup'. + */ +static DEFINE_HYP_SPINLOCK(vm_table_lock); + +/* + * The table of VM entries for protected VMs in hyp. + * Allocated at hyp initialization and setup. + */ +static struct pkvm_hyp_vm **vm_table; + +void pkvm_hyp_vm_table_init(void *tbl) +{ + WARN_ON(vm_table); + vm_table = tbl; +} + +/* + * Return the hyp vm structure corresponding to the handle. + */ +static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(idx >= KVM_MAX_PVMS)) + return NULL; + + return vm_table[idx]; +} + +static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) +{ + if (host_vcpu) + hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); +} + +static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], + unsigned int nr_vcpus) +{ + int i; + + for (i = 0; i < nr_vcpus; i++) + unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); +} + +static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, + unsigned int nr_vcpus) +{ + hyp_vm->host_kvm = host_kvm; + hyp_vm->kvm.created_vcpus = nr_vcpus; + hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; +} + +static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, + struct pkvm_hyp_vm *hyp_vm, + struct kvm_vcpu *host_vcpu, + unsigned int vcpu_idx) +{ + int ret = 0; + + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) + return -EBUSY; + + if (host_vcpu->vcpu_idx != vcpu_idx) { + ret = -EINVAL; + goto done; + } + + hyp_vcpu->host_vcpu = host_vcpu; + + hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; + hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); + hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; + + hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; +done: + if (ret) + unpin_host_vcpu(host_vcpu); + return ret; +} + +static int find_free_vm_table_entry(struct kvm *host_kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_PVMS; ++i) { + if (!vm_table[i]) + return i; + } + + return -ENOMEM; +} + +/* + * Allocate a VM table entry and insert a pointer to the new vm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, + struct pkvm_hyp_vm *hyp_vm) +{ + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = find_free_vm_table_entry(host_kvm); + if (idx < 0) + return idx; + + hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; + + vm_table[idx] = hyp_vm; + return hyp_vm->kvm.arch.pkvm_handle; +} + +/* + * Deallocate and remove the VM table entry corresponding to the handle. + */ +static void remove_vm_table_entry(pkvm_handle_t handle) +{ + hyp_assert_lock_held(&vm_table_lock); + vm_table[vm_handle_to_idx(handle)] = NULL; +} + +static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) +{ + return size_add(sizeof(struct pkvm_hyp_vm), + size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); +} + +static void *map_donated_memory_noclear(unsigned long host_va, size_t size) +{ + void *va = (void *)kern_hyp_va(host_va); + + if (!PAGE_ALIGNED(va)) + return NULL; + + if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)) + return NULL; + + return va; +} + +static void *map_donated_memory(unsigned long host_va, size_t size) +{ + void *va = map_donated_memory_noclear(host_va, size); + + if (va) + memset(va, 0, size); + + return va; +} + +static void __unmap_donated_memory(void *va, size_t size) +{ + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)); +} + +static void unmap_donated_memory(void *va, size_t size) +{ + if (!va) + return; + + memset(va, 0, size); + __unmap_donated_memory(va, size); +} + +static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) +{ + if (!va) + return; + + __unmap_donated_memory(va, size); +} + +/* + * Initialize the hypervisor copy of the protected VM state using the + * memory donated by the host. + * + * Unmaps the donated memory from the host at stage 2. + * + * host_kvm: A pointer to the host's struct kvm. + * vm_hva: The host va of the area being donated for the VM state. + * Must be page aligned. + * pgd_hva: The host va of the area being donated for the stage-2 PGD for + * the VM. Must be page aligned. Its size is implied by the VM's + * VTCR. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva) +{ + struct pkvm_hyp_vm *hyp_vm = NULL; + size_t vm_size, pgd_size; + unsigned int nr_vcpus; + void *pgd = NULL; + int ret; + + ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); + if (ret) + return ret; + + nr_vcpus = READ_ONCE(host_kvm->created_vcpus); + if (nr_vcpus < 1) { + ret = -EINVAL; + goto err_unpin_kvm; + } + + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + + ret = -ENOMEM; + + hyp_vm = map_donated_memory(vm_hva, vm_size); + if (!hyp_vm) + goto err_remove_mappings; + + pgd = map_donated_memory_noclear(pgd_hva, pgd_size); + if (!pgd) + goto err_remove_mappings; + + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); + + hyp_spin_lock(&vm_table_lock); + ret = insert_vm_table_entry(host_kvm, hyp_vm); + if (ret < 0) + goto err_unlock; + + ret = kvm_guest_prepare_stage2(hyp_vm, pgd); + if (ret) + goto err_remove_vm_table_entry; + hyp_spin_unlock(&vm_table_lock); + + return hyp_vm->kvm.arch.pkvm_handle; + +err_remove_vm_table_entry: + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle); +err_unlock: + hyp_spin_unlock(&vm_table_lock); +err_remove_mappings: + unmap_donated_memory(hyp_vm, vm_size); + unmap_donated_memory(pgd, pgd_size); +err_unpin_kvm: + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return ret; +} + +/* + * Initialize the hypervisor copy of the protected vCPU state using the + * memory donated by the host. + * + * handle: The handle for the protected vm. + * host_vcpu: A pointer to the corresponding host vcpu. + * vcpu_hva: The host va of the area being donated for the vcpu state. + * Must be page aligned. The size of the area must be equal to + * the page-aligned size of 'struct pkvm_hyp_vcpu'. + * Return 0 on success, negative error code on failure. + */ +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva) +{ + struct pkvm_hyp_vcpu *hyp_vcpu; + struct pkvm_hyp_vm *hyp_vm; + unsigned int idx; + int ret; + + hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); + if (!hyp_vcpu) + return -ENOMEM; + + hyp_spin_lock(&vm_table_lock); + + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + ret = -ENOENT; + goto unlock; + } + + idx = hyp_vm->nr_vcpus; + if (idx >= hyp_vm->kvm.created_vcpus) { + ret = -EINVAL; + goto unlock; + } + + ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); + if (ret) + goto unlock; + + hyp_vm->vcpus[idx] = hyp_vcpu; + hyp_vm->nr_vcpus++; +unlock: + hyp_spin_unlock(&vm_table_lock); + + if (ret) + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + + return ret; +} + +int __pkvm_teardown_vm(pkvm_handle_t handle) +{ + struct pkvm_hyp_vm *hyp_vm; + struct kvm *host_kvm; + size_t vm_size; + int err; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + err = -ENOENT; + goto err_unlock; + } + + if (WARN_ON(hyp_page_count(hyp_vm))) { + err = -EBUSY; + goto err_unlock; + } + + /* Ensure the VMID is clean before it can be reallocated */ + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); + + /* Reclaim guest pages (including page-table pages) */ + reclaim_guest_pages(hyp_vm); + unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); + + /* Return the metadata pages to the host */ + host_kvm = hyp_vm->host_kvm; + vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); + unmap_donated_memory(hyp_vm, vm_size); + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return 0; + +err_unlock: + hyp_spin_unlock(&vm_table_lock); + return err; +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0312c9c74a5a..2be72fbe7279 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include unsigned long hyp_nr_cpus; @@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus; (unsigned long)__per_cpu_start) static void *vmemmap_base; +static void *vm_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; @@ -40,6 +42,11 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!vmemmap_base) return -ENOMEM; + nr_pages = hyp_vm_table_pages(); + vm_table_base = hyp_early_alloc_contig(nr_pages); + if (!vm_table_base) + return -ENOMEM; + nr_pages = hyp_s1_pgtable_pages(); hyp_pgt_base = hyp_early_alloc_contig(nr_pages); if (!hyp_pgt_base) @@ -314,6 +321,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be1..a1a27f88a312 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1200,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) +{ + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; +} + static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 34229425b25d..71493136e59c 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -71,6 +71,7 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); + hyp_mem_pages += hyp_vm_table_pages(); hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); /* From 9d0c063a4d1d10ef8e6288899b8524413e40cfa0 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 10 Nov 2022 19:02:46 +0000 Subject: [PATCH 1425/4122] KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1 With the pKVM hypervisor at EL2 now offering hypercalls to the host for creating and destroying VM and vCPU structures, plumb these in to the existing arm64 KVM backend to ensure that the hypervisor data structures are allocated and initialised on first vCPU run for a pKVM guest. In the host, 'struct kvm_protected_vm' is introduced to hold the handle of the pKVM VM instance as well as to track references to the memory donated to the hypervisor so that it can be freed back to the host allocator following VM teardown. The stage-2 page-table, hypervisor VM and vCPU structures are allocated separately so as to avoid the need for a large physically-contiguous allocation in the host at run-time. Tested-by: Vincent Donnefort Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-14-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 14 ++- arch/arm64/include/asm/kvm_pkvm.h | 4 + arch/arm64/kvm/arm.c | 14 +++ arch/arm64/kvm/hyp/hyp-constants.c | 3 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 15 +++- arch/arm64/kvm/pkvm.c | 138 +++++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d3dd7ab9c79e..467393e7331f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -117,6 +117,16 @@ struct kvm_smccc_features { typedef unsigned int pkvm_handle_t; +struct kvm_protected_vm { + pkvm_handle_t handle; + + struct { + void *pgd; + void *vm; + void *vcpus[KVM_MAX_VCPUS]; + } hyp_donations; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -170,10 +180,10 @@ struct kvm_arch { struct kvm_smccc_features smccc_feat; /* - * For an untrusted host VM, 'pkvm_handle' is used to lookup + * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. */ - pkvm_handle_t pkvm_handle; + struct kvm_protected_vm pkvm; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index f4e3133d6550..01129b0d4c68 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -14,6 +14,10 @@ #define HYP_MEMBLOCK_REGIONS 128 +int pkvm_init_host_vm(struct kvm *kvm); +int pkvm_create_hyp_vm(struct kvm *kvm); +void pkvm_destroy_hyp_vm(struct kvm *kvm); + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..30d6fc5d3a93 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -150,6 +151,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + ret = pkvm_init_host_vm(kvm); + if (ret) + goto out_free_stage2_pgd; + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { ret = -ENOMEM; goto out_free_stage2_pgd; @@ -187,6 +192,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); + if (is_protected_kvm_enabled()) + pkvm_destroy_hyp_vm(kvm); + kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -569,6 +577,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (is_protected_kvm_enabled()) { + ret = pkvm_create_hyp_vm(kvm); + if (ret) + return ret; + } + if (!irqchip_in_kernel(kvm)) { /* * Tell the rest of the code that there are userspace irqchip diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c index b3742a6691e8..b257a3b4bfc5 100644 --- a/arch/arm64/kvm/hyp/hyp-constants.c +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -2,9 +2,12 @@ #include #include +#include int main(void) { DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); + DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm)); + DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu)); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 135c9a095eca..2c73c4640e4d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -324,7 +324,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, if (idx < 0) return idx; - hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx); + hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); /* VMID 0 is reserved for the host */ atomic64_set(&mmu->vmid.id, idx + 1); @@ -333,7 +333,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, mmu->pgt = &hyp_vm->pgt; vm_table[idx] = hyp_vm; - return hyp_vm->kvm.arch.pkvm_handle; + return hyp_vm->kvm.arch.pkvm.handle; } /* @@ -458,10 +458,10 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, goto err_remove_vm_table_entry; hyp_spin_unlock(&vm_table_lock); - return hyp_vm->kvm.arch.pkvm_handle; + return hyp_vm->kvm.arch.pkvm.handle; err_remove_vm_table_entry: - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle); + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); err_unlock: hyp_spin_unlock(&vm_table_lock); err_remove_mappings: @@ -528,6 +528,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) { struct pkvm_hyp_vm *hyp_vm; struct kvm *host_kvm; + unsigned int idx; size_t vm_size; int err; @@ -553,6 +554,12 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); /* Return the metadata pages to the host */ + for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { + struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; + + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + } + host_kvm = hyp_vm->host_kvm; vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); unmap_donated_memory(hyp_vm, vm_size); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 71493136e59c..8c443b915e43 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -94,3 +95,140 @@ void __init kvm_hyp_reserve(void) kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, hyp_mem_base); } + +/* + * Allocates and donates memory for hypervisor VM structs at EL2. + * + * Allocates space for the VM state, which includes the hyp vm as well as + * the hyp vcpus. + * + * Stores an opaque handler in the kvm struct for future reference. + * + * Return 0 on success, negative error code on failure. + */ +static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; + struct kvm_vcpu *host_vcpu; + pkvm_handle_t handle; + void *pgd, *hyp_vm; + unsigned long idx; + int ret; + + if (host_kvm->created_vcpus < 1) + return -EINVAL; + + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + + /* + * The PGD pages will be reclaimed using a hyp_memcache which implies + * page granularity. So, use alloc_pages_exact() to get individual + * refcounts. + */ + pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); + if (!pgd) + return -ENOMEM; + + /* Allocate memory to donate to hyp for vm and vcpu pointers. */ + hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, + size_mul(sizeof(void *), + host_kvm->created_vcpus))); + hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vm) { + ret = -ENOMEM; + goto free_pgd; + } + + /* Donate the VM memory to hyp and let hyp initialize it. */ + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); + if (ret < 0) + goto free_vm; + + handle = ret; + + host_kvm->arch.pkvm.handle = handle; + host_kvm->arch.pkvm.hyp_donations.pgd = pgd; + host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm; + + /* Donate memory for the vcpus at hyp and initialize it. */ + hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); + kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { + void *hyp_vcpu; + + /* Indexing of the vcpus to be sequential starting at 0. */ + if (WARN_ON(host_vcpu->vcpu_idx != idx)) { + ret = -EINVAL; + goto destroy_vm; + } + + hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vcpu) { + ret = -ENOMEM; + goto destroy_vm; + } + + host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu; + + ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, + hyp_vcpu); + if (ret) + goto destroy_vm; + } + + return 0; + +destroy_vm: + pkvm_destroy_hyp_vm(host_kvm); + return ret; +free_vm: + free_pages_exact(hyp_vm, hyp_vm_sz); +free_pgd: + free_pages_exact(pgd, pgd_sz); + return ret; +} + +int pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + int ret = 0; + + mutex_lock(&host_kvm->lock); + if (!host_kvm->arch.pkvm.handle) + ret = __pkvm_create_hyp_vm(host_kvm); + mutex_unlock(&host_kvm->lock); + + return ret; +} + +void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +{ + unsigned long idx, nr_vcpus = host_kvm->created_vcpus; + size_t pgd_sz, hyp_vm_sz; + + if (host_kvm->arch.pkvm.handle) + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, + host_kvm->arch.pkvm.handle)); + + host_kvm->arch.pkvm.handle = 0; + + for (idx = 0; idx < nr_vcpus; ++idx) { + void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx]; + + if (!hyp_vcpu) + break; + + free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE)); + } + + hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, + size_mul(sizeof(void *), nr_vcpus))); + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + + free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz); + free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz); +} + +int pkvm_init_host_vm(struct kvm *host_kvm) +{ + mutex_init(&host_kvm->lock); + return 0; +} From aa6948f82f0b7060fbbac21911dc7996b144ba3c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:47 +0000 Subject: [PATCH 1426/4122] KVM: arm64: Add per-cpu fixmap infrastructure at EL2 Mapping pages in a guest page-table from within the pKVM hypervisor at EL2 may require cache maintenance to ensure that the initialised page contents is visible even to non-cacheable (e.g. MMU-off) accesses from the guest. In preparation for performing this maintenance at EL2, introduce a per-vCPU fixmap which allows the pKVM hypervisor to map guest pages temporarily into its stage-1 page-table for the purposes of cache maintenance and, in future, poisoning on the reclaim path. The use of a fixmap avoids the need for memory allocation or locking on the map() path. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Co-developed-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-15-will@kernel.org --- arch/arm64/include/asm/kvm_pgtable.h | 14 +++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/include/nvhe/mm.h | 4 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 1 - arch/arm64/kvm/hyp/nvhe/mm.c | 104 ++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 4 + arch/arm64/kvm/hyp/pgtable.c | 12 -- 7 files changed, 128 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 15c389db1931..34cb93f3c96d 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -42,6 +42,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PHYS_INVALID (-1ULL) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; @@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte) return pa; } +static inline kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } + + return pte; +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index ce9a796a85ee..ef31a1872c93 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -59,6 +59,8 @@ enum pkvm_component_id { PKVM_ID_HYP, }; +extern unsigned long hyp_nr_cpus; + int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index b2ee6d5df55b..d5ec972b5c1e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -13,6 +13,10 @@ extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; +int hyp_create_pcpu_fixmap(void); +void *hyp_fixmap_map(phys_addr_t phys); +void hyp_fixmap_unmap(void); + int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t back); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 459957b3082e..8b4d3f0aa7a0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -21,7 +21,6 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) -extern unsigned long hyp_nr_cpus; struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index d3a3b47181de..5648ac21e62d 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,12 @@ unsigned int hyp_memblock_nr; static u64 __io_map_base; +struct hyp_fixmap_slot { + u64 addr; + kvm_pte_t *ptep; +}; +static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots); + static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { @@ -212,6 +219,103 @@ int hyp_map_vectors(void) return 0; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); + kvm_pte_t pte, *ptep = slot->ptep; + + pte = *ptep; + pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); + pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID; + WRITE_ONCE(*ptep, pte); + dsb(ishst); + + return (void *)slot->addr; +} + +static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) +{ + kvm_pte_t *ptep = slot->ptep; + u64 addr = slot->addr; + + WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); + + /* + * Irritatingly, the architecture requires that we use inner-shareable + * broadcast TLB invalidation here in case another CPU speculates + * through our fixmap and decides to create an "amalagamation of the + * values held in the TLB" due to the apparent lack of a + * break-before-make sequence. + * + * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 + */ + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + dsb(ish); + isb(); +} + +void hyp_fixmap_unmap(void) +{ + fixmap_clear_slot(this_cpu_ptr(&fixmap_slots)); +} + +static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg); + + if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + + slot->addr = addr; + slot->ptep = ptep; + + /* + * Clear the PTE, but keep the page-table page refcount elevated to + * prevent it from ever being freed. This lets us manipulate the PTEs + * by hand safely without ever needing to allocate memory. + */ + fixmap_clear_slot(slot); + + return 0; +} + +static int create_fixmap_slot(u64 addr, u64 cpu) +{ + struct kvm_pgtable_walker walker = { + .cb = __create_fixmap_slot_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = (void *)cpu, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); +} + +int hyp_create_pcpu_fixmap(void) +{ + unsigned long addr, i; + int ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); + if (ret) + return ret; + + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) + return ret; + + ret = create_fixmap_slot(addr, i); + if (ret) + return ret; + } + + return 0; +} + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 2be72fbe7279..0f69c1393416 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -321,6 +321,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = hyp_create_pcpu_fixmap(); + if (ret) + goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a1a27f88a312..2bcb2d5903ba 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -57,8 +57,6 @@ struct kvm_pgtable_walk_data { u64 end; }; -#define KVM_PHYS_INVALID (-1ULL) - static bool kvm_phys_is_valid(u64 phys) { return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); @@ -122,16 +120,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level) return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; } -static kvm_pte_t kvm_phys_to_pte(u64 pa) -{ - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); - - return pte; -} - static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) { return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); From 6c165223e9a6384aa1e934b90f2650e71adb972a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:48 +0000 Subject: [PATCH 1427/4122] KVM: arm64: Initialise hypervisor copies of host symbols unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nVHE object at EL2 maintains its own copies of some host variables so that, when pKVM is enabled, the host cannot directly modify the hypervisor state. When running in normal nVHE mode, however, these variables are still mirrored at EL2 but are not initialised. Initialise the hypervisor symbols from the host copies regardless of pKVM, ensuring that any reference to this data at EL2 with normal nVHE will return a sensibly initialised value. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-16-will@kernel.org --- arch/arm64/kvm/arm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 30d6fc5d3a93..584626e11797 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1884,11 +1884,8 @@ static int do_pkvm_init(u32 hyp_va_bits) return ret; } -static int kvm_hyp_init_protection(u32 hyp_va_bits) +static void kvm_hyp_init_symbols(void) { - void *addr = phys_to_virt(hyp_mem_base); - int ret; - kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); @@ -1897,6 +1894,12 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); +} + +static int kvm_hyp_init_protection(u32 hyp_va_bits) +{ + void *addr = phys_to_virt(hyp_mem_base); + int ret; ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) @@ -2071,6 +2074,8 @@ static int init_hyp_mode(void) cpu_prepare_hyp_mode(cpu); } + kvm_hyp_init_symbols(); + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); @@ -2078,9 +2083,7 @@ static int init_hyp_mode(void) err = -ENODEV; goto out_err; } - } - if (is_protected_kvm_enabled()) { err = kvm_hyp_init_protection(hyp_va_bits); if (err) { kvm_err("Failed to init hyp memory protection\n"); From 13e248aab73d2f1c27b458ef09d38b44f3e5bf2e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:49 +0000 Subject: [PATCH 1428/4122] KVM: arm64: Provide I-cache invalidation by virtual address at EL2 In preparation for handling cache maintenance of guest pages from within the pKVM hypervisor at EL2, introduce an EL2 copy of icache_inval_pou() which will later be plumbed into the stage-2 page-table cache maintenance callbacks, ensuring that the initial contents of pages mapped as executable into the guest stage-2 page-table is visible to the instruction fetcher. Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-17-will@kernel.org --- arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/nvhe/cache.S | 11 +++++++++++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +++ 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index aa7fa2a08f06..fd99cf09972d 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -123,4 +123,5 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern unsigned long kvm_nvhe_sym(__icache_flags); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8151412653de..7f4e43bfaade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* Kernel symbol used by icache_is_vpipt(). */ -KVM_NVHE_ALIAS(__icache_flags); - /* VMID bits set by the KVM VMID allocator */ KVM_NVHE_ALIAS(kvm_arm_vmid_bits); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 584626e11797..d99e93e6ddf7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1894,6 +1894,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + kvm_nvhe_sym(__icache_flags) = __icache_flags; } static int kvm_hyp_init_protection(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 0c367eb5f4e2..85936c17ae40 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc) ret SYM_FUNC_END(__pi_dcache_clean_inval_poc) SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) + +SYM_FUNC_START(__pi_icache_inval_pou) +alternative_if ARM64_HAS_CACHE_DIC + isb + ret +alternative_else_nop_endif + + invalidate_icache_by_line x0, x1, x2, x3 + ret +SYM_FUNC_END(__pi_icache_inval_pou) +SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 2c73c4640e4d..0768307566d4 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -12,6 +12,9 @@ #include #include +/* Used by icache_is_vpipt(). */ +unsigned long __icache_flags; + /* * Set trap register values based on features in ID_AA64PFR0. */ From 717a7eebac106a5cc5d5493f8eef9cf4ae6edf19 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:50 +0000 Subject: [PATCH 1429/4122] KVM: arm64: Add generic hyp_memcache helpers The host at EL1 and the pKVM hypervisor at EL2 will soon need to exchange memory pages dynamically for creating and destroying VM state. Indeed, the hypervisor will rely on the host to donate memory pages it can use to create guest stage-2 page-tables and to store VM and vCPU metadata. In order to ease this process, introduce a 'struct hyp_memcache' which is essentially a linked list of available pages, indexed by physical addresses so that it can be passed meaningfully between the different virtual address spaces configured at EL1 and EL2. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-18-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 57 +++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 + arch/arm64/kvm/hyp/nvhe/mm.c | 33 +++++++++++ arch/arm64/kvm/mmu.c | 26 +++++++++ 4 files changed, 118 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 467393e7331f..835987e0f868 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages); + struct kvm_vmid { atomic64_t id; }; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index ef31a1872c93..420b87e755a4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -77,6 +77,8 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); void reclaim_guest_pages(struct pkvm_hyp_vm *vm); +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc); static __always_inline void __load_host_stage2(void) { diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 5648ac21e62d..c80b2c007619 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -340,3 +340,36 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } + +static void *admit_host_page(void *arg) +{ + struct kvm_hyp_memcache *host_mc = arg; + + if (!host_mc->nr_pages) + return NULL; + + /* + * The host still owns the pages in its memcache, so we need to go + * through a full host-to-hyp donation cycle to change it. Fortunately, + * __pkvm_host_donate_hyp() takes care of races for us, so if it + * succeeds we're good to go. + */ + if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1)) + return NULL; + + return pop_hyp_memcache(host_mc, hyp_phys_to_virt); +} + +/* Refill our local memcache by poping pages from the one provided by the host. */ +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc) +{ + struct kvm_hyp_memcache tmp = *host_mc; + int ret; + + ret = __topup_hyp_memcache(mc, min_pages, admit_host_page, + hyp_virt_to_phys, &tmp); + *host_mc = tmp; + + return ret; +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..18061163c607 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -807,6 +807,32 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) } } +static void hyp_mc_free_fn(void *addr, void *unused) +{ + free_page((unsigned long)addr); +} + +static void *hyp_mc_alloc_fn(void *unused) +{ + return (void *)__get_free_page(GFP_KERNEL_ACCOUNT); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc) +{ + if (is_protected_kvm_enabled()) + __free_hyp_memcache(mc, hyp_mc_free_fn, + kvm_host_va, NULL); +} + +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) +{ + if (!is_protected_kvm_enabled()) + return 0; + + return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, + kvm_host_pa, NULL); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * From 315775ff7c6de497dd07c3f6eff499fb538783eb Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:51 +0000 Subject: [PATCH 1430/4122] KVM: arm64: Consolidate stage-2 initialisation into a single function The initialisation of guest stage-2 page-tables is currently split across two functions: kvm_init_stage2_mmu() and kvm_arm_setup_stage2(). That is presumably for historical reasons as kvm_arm_setup_stage2() originates from the (now defunct) KVM port for 32-bit Arm. Simplify this code path by merging both functions into one, taking care to map the 'struct kvm' into the hypervisor stage-1 early on in order to simplify the failure path. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-19-will@kernel.org --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kvm/arm.c | 27 +++++++++++++-------------- arch/arm64/kvm/mmu.c | 27 ++++++++++++++++++++++++++- arch/arm64/kvm/reset.c | 29 ----------------------------- 6 files changed, 41 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..89e63585dae4 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -135,7 +135,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 835987e0f868..57218f0c449e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -990,8 +990,6 @@ int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); - static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7784081088e7..e4a7e6369499 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d99e93e6ddf7..f78eefa02f6b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -139,28 +139,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; - ret = kvm_arm_setup_stage2(kvm, type); - if (ret) - return ret; - - ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); - if (ret) - return ret; - ret = kvm_share_hyp(kvm, kvm + 1); if (ret) - goto out_free_stage2_pgd; + return ret; ret = pkvm_init_host_vm(kvm); if (ret) - goto out_free_stage2_pgd; + goto err_unshare_kvm; if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { ret = -ENOMEM; - goto out_free_stage2_pgd; + goto err_unshare_kvm; } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); + if (ret) + goto err_free_cpumask; + kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ @@ -169,9 +165,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); - return ret; -out_free_stage2_pgd: - kvm_free_stage2_pgd(&kvm->arch.mmu); + return 0; + +err_free_cpumask: + free_cpumask_var(kvm->arch.supported_cpus); +err_unshare_kvm: + kvm_unshare_hyp(kvm, kvm + 1); return ret; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18061163c607..3e56c6393cae 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -675,15 +675,40 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure * @mmu: The pointer to the s2 MMU structure + * @type: The machine type of the virtual machine * * Allocates only the stage-2 HW PGD level table(s). * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) { + u32 kvm_ipa_limit = get_kvm_ipa_limit(); int cpu, err; struct kvm_pgtable *pgt; + u64 mmfr0, mmfr1; + u32 phys_shift; + + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + return -EINVAL; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < ARM64_MIN_PARANGE_BITS) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } + } + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ae18472205a..e0267f672b8a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -395,32 +395,3 @@ int kvm_set_ipa_limit(void) return 0; } - -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) -{ - u64 mmfr0, mmfr1; - u32 phys_shift; - - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { - if (phys_shift > kvm_ipa_limit || - phys_shift < ARM64_MIN_PARANGE_BITS) - return -EINVAL; - } else { - phys_shift = KVM_PHYS_SHIFT; - if (phys_shift > kvm_ipa_limit) { - pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", - current->comm); - return -EINVAL; - } - } - - mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); - - return 0; -} From 60dfe093ec13b056856c672e1daa35134be38283 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:52 +0000 Subject: [PATCH 1431/4122] KVM: arm64: Instantiate guest stage-2 page-tables at EL2 Extend the initialisation of guest data structures within the pKVM hypervisor at EL2 so that we instantiate a memory pool and a full 'struct kvm_s2_mmu' structure for each VM, with a stage-2 page-table entirely independent from the one managed by the host at EL1. The 'struct kvm_pgtable_mm_ops' used by the page-table code is populated with a set of callbacks that can manage guest pages in the hypervisor without any direct intervention from the host, allocating page-table pages from the provided pool and returning these to the host on VM teardown. To keep things simple, the stage-2 MMU for the guest is configured identically to the host stage-2 in the VTCR register and so the IPA size of the guest must match the PA size of the host. For now, the new page-table is unused as there is no way for the host to map anything into it. Yet. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-20-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 6 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 125 ++++++++++++++++++++++++- arch/arm64/kvm/mmu.c | 4 +- 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 8c653a3b9501..d14dfbcb7da1 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -9,6 +9,9 @@ #include +#include +#include + /* * Holds the relevant data for maintaining the vcpu state completely at hyp. */ @@ -30,6 +33,9 @@ struct pkvm_hyp_vm { /* The guest's stage-2 page-table managed by the hypervisor. */ struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + struct hyp_pool pool; + hyp_spinlock_t lock; /* * The number of vcpus initialized and ready to run. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8b4d3f0aa7a0..0162afba6dc4 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -25,6 +25,21 @@ struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; +static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); +#define current_vm (*this_cpu_ptr(&__current_vm)) + +static void guest_lock_component(struct pkvm_hyp_vm *vm) +{ + hyp_spin_lock(&vm->lock); + current_vm = vm; +} + +static void guest_unlock_component(struct pkvm_hyp_vm *vm) +{ + current_vm = NULL; + hyp_spin_unlock(&vm->lock); +} + static void host_lock_component(void) { hyp_spin_lock(&host_mmu.lock); @@ -140,18 +155,124 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +static bool guest_stage2_force_pte_cb(u64 addr, u64 end, + enum kvm_pgtable_prot prot) +{ + return true; +} + +static void *guest_s2_zalloc_pages_exact(size_t size) +{ + void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); + + WARN_ON(size != (PAGE_SIZE << get_order(size))); + hyp_split_page(hyp_virt_to_page(addr)); + + return addr; +} + +static void guest_s2_free_pages_exact(void *addr, unsigned long size) +{ + u8 order = get_order(size); + unsigned int i; + + for (i = 0; i < (1 << order); i++) + hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); +} + +static void *guest_s2_zalloc_page(void *mc) +{ + struct hyp_page *p; + void *addr; + + addr = hyp_alloc_pages(¤t_vm->pool, 0); + if (addr) + return addr; + + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); + if (!addr) + return addr; + + memset(addr, 0, PAGE_SIZE); + p = hyp_virt_to_page(addr); + memset(p, 0, sizeof(*p)); + p->refcount = 1; + + return addr; +} + +static void guest_s2_get_page(void *addr) +{ + hyp_get_page(¤t_vm->pool, addr); +} + +static void guest_s2_put_page(void *addr) +{ + hyp_put_page(¤t_vm->pool, addr); +} + +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) { - vm->pgt.pgd = pgd; + struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; + unsigned long nr_pages; + int ret; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); + if (ret) + return ret; + + hyp_spin_lock_init(&vm->lock); + vm->mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, + .free_pages_exact = guest_s2_free_pages_exact, + .zalloc_page = guest_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = guest_s2_get_page, + .put_page = guest_s2_put_page, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, + }; + + guest_lock_component(vm); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, + guest_stage2_force_pte_cb); + guest_unlock_component(vm); + if (ret) + return ret; + + vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); + return 0; } void reclaim_guest_pages(struct pkvm_hyp_vm *vm) { + void *pgd = vm->pgt.pgd; unsigned long nr_pages; nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; - WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); + + guest_lock_component(vm); + kvm_pgtable_stage2_destroy(&vm->pgt); + vm->kvm.arch.mmu.pgd_phys = 0ULL; + guest_unlock_component(vm); + + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); } int __pkvm_prot_finalize(void) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3e56c6393cae..962f4472601b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -693,7 +693,9 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return -EINVAL; phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { + if (is_protected_kvm_enabled()) { + phys_shift = kvm_ipa_limit; + } else if (phys_shift) { if (phys_shift > kvm_ipa_limit || phys_shift < ARM64_MIN_PARANGE_BITS) return -EINVAL; From f41dff4efb918db68923a826e966ca62c7c8e929 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:53 +0000 Subject: [PATCH 1432/4122] KVM: arm64: Return guest memory from EL2 via dedicated teardown memcache Rather than relying on the host to free the previously-donated pKVM hypervisor VM pages explicitly on teardown, introduce a dedicated teardown memcache which allows the host to reclaim guest memory resources without having to keep track of all of the allocations made by the pKVM hypervisor at EL2. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Will Deacon [maz: dropped __maybe_unused from unmap_donated_memory_noclear()] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-21-will@kernel.org --- arch/arm64/include/asm/kvm_host.h | 7 +---- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 17 ++++++---- arch/arm64/kvm/hyp/nvhe/pkvm.c | 27 ++++++++++++---- arch/arm64/kvm/pkvm.c | 31 ++++--------------- 5 files changed, 40 insertions(+), 44 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57218f0c449e..63307e7dc9c5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -176,12 +176,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; - - struct { - void *pgd; - void *vm; - void *vcpus[KVM_MAX_VCPUS]; - } hyp_donations; + struct kvm_hyp_memcache teardown_mc; }; struct kvm_arch { diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 420b87e755a4..b7bdbe63deed 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -76,7 +76,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); -void reclaim_guest_pages(struct pkvm_hyp_vm *vm); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, struct kvm_hyp_memcache *host_mc); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 0162afba6dc4..94cd48f7850e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -260,19 +260,24 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) return 0; } -void reclaim_guest_pages(struct pkvm_hyp_vm *vm) +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) { - void *pgd = vm->pgt.pgd; - unsigned long nr_pages; - - nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + void *addr; + /* Dump all pgtable pages in the hyp_pool */ guest_lock_component(vm); kvm_pgtable_stage2_destroy(&vm->pgt); vm->kvm.arch.mmu.pgd_phys = 0ULL; guest_unlock_component(vm); - WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); + /* Drain the hyp_pool into the memcache */ + addr = hyp_alloc_pages(&vm->pool, 0); + while (addr) { + memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); + addr = hyp_alloc_pages(&vm->pool, 0); + } } int __pkvm_prot_finalize(void) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 0768307566d4..81835c2f4c5a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -393,7 +393,7 @@ static void unmap_donated_memory(void *va, size_t size) __unmap_donated_memory(va, size); } -static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) +static void unmap_donated_memory_noclear(void *va, size_t size) { if (!va) return; @@ -527,8 +527,21 @@ unlock: return ret; } +static void +teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) +{ + size = PAGE_ALIGN(size); + memset(addr, 0, size); + + for (void *start = addr; start < addr + size; start += PAGE_SIZE) + push_hyp_memcache(mc, start, hyp_virt_to_phys); + + unmap_donated_memory_noclear(addr, size); +} + int __pkvm_teardown_vm(pkvm_handle_t handle) { + struct kvm_hyp_memcache *mc; struct pkvm_hyp_vm *hyp_vm; struct kvm *host_kvm; unsigned int idx; @@ -547,25 +560,27 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) goto err_unlock; } + host_kvm = hyp_vm->host_kvm; + /* Ensure the VMID is clean before it can be reallocated */ __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); remove_vm_table_entry(handle); hyp_spin_unlock(&vm_table_lock); /* Reclaim guest pages (including page-table pages) */ - reclaim_guest_pages(hyp_vm); + mc = &host_kvm->arch.pkvm.teardown_mc; + reclaim_guest_pages(hyp_vm, mc); unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); - /* Return the metadata pages to the host */ + /* Push the metadata pages to the teardown memcache */ for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; - unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); } - host_kvm = hyp_vm->host_kvm; vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); - unmap_donated_memory(hyp_vm, vm_size); + teardown_donated_memory(mc, hyp_vm, vm_size); hyp_unpin_shared_mem(host_kvm, host_kvm + 1); return 0; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 8c443b915e43..cf56958b1492 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -147,8 +147,6 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) handle = ret; host_kvm->arch.pkvm.handle = handle; - host_kvm->arch.pkvm.hyp_donations.pgd = pgd; - host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm; /* Donate memory for the vcpus at hyp and initialize it. */ hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); @@ -167,12 +165,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) goto destroy_vm; } - host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu; - ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, hyp_vcpu); - if (ret) + if (ret) { + free_pages_exact(hyp_vcpu, hyp_vcpu_sz); goto destroy_vm; + } } return 0; @@ -201,30 +199,13 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm) void pkvm_destroy_hyp_vm(struct kvm *host_kvm) { - unsigned long idx, nr_vcpus = host_kvm->created_vcpus; - size_t pgd_sz, hyp_vm_sz; - - if (host_kvm->arch.pkvm.handle) + if (host_kvm->arch.pkvm.handle) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, host_kvm->arch.pkvm.handle)); - - host_kvm->arch.pkvm.handle = 0; - - for (idx = 0; idx < nr_vcpus; ++idx) { - void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx]; - - if (!hyp_vcpu) - break; - - free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE)); } - hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, - size_mul(sizeof(void *), nr_vcpus))); - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); - - free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz); - free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz); + host_kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); } int pkvm_init_host_vm(struct kvm *host_kvm) From fe41a7f8c0ee3ee2f682f8c28c7e1c5ff2be8a79 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:54 +0000 Subject: [PATCH 1433/4122] KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from the host When pKVM is enabled, the hypervisor at EL2 does not trust the host at EL1 and must therefore prevent it from having unrestricted access to internal hypervisor state. The 'kvm_arm_hyp_percpu_base' array holds the offsets for hypervisor per-cpu allocations, so move this this into the nVHE code where it cannot be modified by the untrusted host at EL1. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-22-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 9 ++++----- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 2 ++ 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index de52ba775d48..43c3bc0f9544 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -109,7 +109,7 @@ enum __kvm_host_smccc_func { #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -214,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7f4e43bfaade..ae8f37f4aa8c 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -89,9 +89,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); -/* Array containing bases of nVHE per-CPU memory regions. */ -KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); - /* PMU available static key */ #ifdef CONFIG_HW_PERF_EVENTS KVM_NVHE_ALIAS(kvm_arm_pmu_available); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f78eefa02f6b..25467f24803d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -51,7 +51,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); static bool vgic_present; @@ -1857,13 +1856,13 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order()); + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); } } static int do_pkvm_init(u32 hyp_va_bits) { - void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); int ret; preempt_disable(); @@ -1967,7 +1966,7 @@ static int init_hyp_mode(void) page_addr = page_address(page); memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); - kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr; + kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; } /* @@ -2060,7 +2059,7 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; + char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); /* Map Hyp percpu pages */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 9f54833af400..04d194583f1e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu) return hyp_cpu_logical_map[cpu]; } +unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS]; + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; From 73f38ef2ae531b180685173e0923225551434fcb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:55 +0000 Subject: [PATCH 1434/4122] KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits' at EL2 Sharing 'kvm_arm_vmid_bits' between EL1 and EL2 allows the host to modify the variable arbitrarily, potentially leading to all sorts of shenanians as this is used to configure the VTTBR register for the guest stage-2. In preparation for unmapping host sections entirely from EL2, maintain a copy of 'kvm_arm_vmid_bits' in the pKVM hypervisor and initialise it from the host value while it is still trusted. Tested-by: Vincent Donnefort Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-23-will@kernel.org --- arch/arm64/include/asm/kvm_hyp.h | 2 ++ arch/arm64/kernel/image-vars.h | 3 --- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fd99cf09972d..6797eafe7890 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,4 +124,6 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); extern unsigned long kvm_nvhe_sym(__icache_flags); +extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index ae8f37f4aa8c..31ad75da4d58 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* VMID bits set by the KVM VMID allocator */ -KVM_NVHE_ALIAS(kvm_arm_vmid_bits); - /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 25467f24803d..1d4b8122d010 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1893,6 +1893,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; + kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; } static int kvm_hyp_init_protection(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 81835c2f4c5a..ed6ceac1e854 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -15,6 +15,9 @@ /* Used by icache_is_vpipt(). */ unsigned long __icache_flags; +/* Used by kvm_get_vttbr(). */ +unsigned int kvm_arm_vmid_bits; + /* * Set trap register values based on features in ID_AA64PFR0. */ From 27eb26bfff5d358d42911d04bbecc62e659ec32b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:56 +0000 Subject: [PATCH 1435/4122] KVM: arm64: Explicitly map 'kvm_vgic_global_state' at EL2 The pkvm hypervisor at EL2 may need to read the 'kvm_vgic_global_state' variable from the host, for example when saving and restoring the state of the virtual GIC. Explicitly map 'kvm_vgic_global_state' in the stage-1 page-table of the pKVM hypervisor rather than relying on mapping all of the host '.rodata' section. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-24-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0f69c1393416..5a371ab236db 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -161,6 +161,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; + ret = pkvm_create_mappings(&kvm_vgic_global_state, + &kvm_vgic_global_state + 1, prot); + if (ret) + return ret; + return 0; } From 169cd0f8238f2598b85d2db2e15828e8f8da18e5 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 10 Nov 2022 19:02:57 +0000 Subject: [PATCH 1436/4122] KVM: arm64: Don't unnecessarily map host kernel sections at EL2 We no longer need to map the host's '.rodata' and '.bss' sections in the stage-1 page-table of the pKVM hypervisor at EL2, so remove those mappings and avoid creating any future dependencies at EL2 on host-controlled data structures. Tested-by: Vincent Donnefort Signed-off-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-25-will@kernel.org --- arch/arm64/kernel/image-vars.h | 6 ------ arch/arm64/kvm/hyp/nvhe/setup.c | 14 +++----------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 31ad75da4d58..e3f88b5836a2 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -102,12 +102,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); #endif -/* Kernel memory sections */ -KVM_NVHE_ALIAS(__start_rodata); -KVM_NVHE_ALIAS(__end_rodata); -KVM_NVHE_ALIAS(__bss_start); -KVM_NVHE_ALIAS(__bss_stop); - /* Hyp memory sections */ KVM_NVHE_ALIAS(__hyp_idmap_text_start); KVM_NVHE_ALIAS(__hyp_idmap_text_end); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 5a371ab236db..5cdf3fb09bb4 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -144,23 +144,15 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, } /* - * Map the host's .bss and .rodata sections RO in the hypervisor, but - * transfer the ownership from the host to the hypervisor itself to - * make sure it can't be donated or shared with another entity. + * Map the host sections RO in the hypervisor, but transfer the + * ownership from the host to the hypervisor itself to make sure they + * can't be donated or shared with another entity. * * The ownership transition requires matching changes in the host * stage-2. This will be done later (see finalize_host_mappings()) once * the hyp_vmemmap is addressable. */ prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); - ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); - if (ret) - return ret; - - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); - if (ret) - return ret; - ret = pkvm_create_mappings(&kvm_vgic_global_state, &kvm_vgic_global_state + 1, prot); if (ret) From be66e67f175096f283c9d5614c4991fc9e7ed975 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Nov 2022 19:02:59 +0000 Subject: [PATCH 1437/4122] KVM: arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run() As a stepping stone towards deprivileging the host's access to the guest's vCPU structures, introduce some naive flush/sync routines to copy most of the host vCPU into the hyp vCPU on vCPU run and back again on return to EL1. This allows us to run using the pKVM hyp structures when KVM is initialised in protected mode. Tested-by: Vincent Donnefort Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110190259.26861-27-will@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 4 ++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 79 +++++++++++++++++++++++++- arch/arm64/kvm/hyp/nvhe/pkvm.c | 28 +++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index d14dfbcb7da1..82b3d62538a6 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -61,4 +61,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unsigned long vcpu_hva); int __pkvm_teardown_vm(pkvm_handle_t handle); +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx); +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); + #endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index b5f3fcfe9135..728e01d4536b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -22,11 +22,86 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; + + hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); + hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl; + + hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; + + hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2; + hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; + hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; + + hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; + hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; + + hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); + hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; + + hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; + + hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3; +} + +static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3; + struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; + unsigned int i; + + host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; + + host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2; + host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2; + + host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; + + host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; + host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; + + host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; + for (i = 0; i < hyp_cpu_if->used_lrs; ++i) + host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; +} + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); + int ret; - cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); + host_vcpu = kern_hyp_va(host_vcpu); + + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_hyp_vcpu *hyp_vcpu; + struct kvm *host_kvm; + + host_kvm = kern_hyp_va(host_vcpu->kvm); + hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, + host_vcpu->vcpu_idx); + if (!hyp_vcpu) { + ret = -EINVAL; + goto out; + } + + flush_hyp_vcpu(hyp_vcpu); + + ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); + + sync_hyp_vcpu(hyp_vcpu); + pkvm_put_hyp_vcpu(hyp_vcpu); + } else { + /* The host is fully trusted, run its vCPU directly. */ + ret = __kvm_vcpu_run(host_vcpu); + } + +out: + cpu_reg(host_ctxt, 1) = ret; } static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index ed6ceac1e854..a06ece14a6d8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -241,6 +241,33 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) return vm_table[idx]; } +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx) +{ + struct pkvm_hyp_vcpu *hyp_vcpu = NULL; + struct pkvm_hyp_vm *hyp_vm; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) + goto unlock; + + hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); +unlock: + hyp_spin_unlock(&vm_table_lock); + return hyp_vcpu; +} + +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + + hyp_spin_lock(&vm_table_lock); + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); + hyp_spin_unlock(&vm_table_lock); +} + static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) { if (host_vcpu) @@ -286,6 +313,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; + hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); done: if (ret) unpin_host_vcpu(host_vcpu); From e6ecb142429183cef4835f31d4134050ae660032 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 8 Nov 2022 17:59:34 -0800 Subject: [PATCH 1438/4122] f2fs: allow to read node block after shutdown If block address is still alive, we should give a valid node block even after shutdown. Otherwise, we can see zero data when reading out a file. Cc: stable@vger.kernel.org Fixes: 83a3bfdb5a8a ("f2fs: indicate shutdown f2fs to allow unmount successfully") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 983572f23896..b9ee5a1176a0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1360,8 +1360,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ - if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || - is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { + if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { ClearPageUptodate(page); return -ENOENT; } From 225d6795abf47c3340214ca1b4c22728e463db4f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 25 Oct 2022 21:26:38 +0800 Subject: [PATCH 1439/4122] f2fs: add proc entry to show discard_plist info This patch adds a new proc entry to show discard_plist information in more detail, which is very helpful to know the discard pend list count clearly. Such as: Discard pend list(Show diacrd_cmd count on each entry, .:not exist): 0 390 156 85 67 46 37 26 14 8 17 12 9 9 6 12 11 10 16 5 9 2 4 8 3 4 1 24 3 2 2 5 2 4 5 4 32 3 3 2 3 . 3 3 1 40 . 4 1 3 2 1 2 1 48 1 . 1 1 . 1 1 . 56 . 1 1 1 . 2 . 1 64 1 2 . . . . . . 72 . 1 . . . . . . 80 3 1 . . 1 1 . . 88 1 . . . 1 . . 1 ...... Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 032c03e09580..97bf0dbb0974 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1252,6 +1252,44 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused discard_plist_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i, count; + + seq_puts(seq, "Discard pend list(Show diacrd_cmd count on each entry, .:not exist):\n"); + if (!f2fs_realtime_discard_enable(sbi)) + return 0; + + if (dcc) { + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) { + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + + if (i % 8 == 0) + seq_printf(seq, " %-3d", i); + count = 0; + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) + count++; + if (count) + seq_printf(seq, " %7d", count); + else + seq_puts(seq, " ."); + if (i % 8 == 7) + seq_putc(seq, '\n'); + } + seq_putc(seq, '\n'); + mutex_unlock(&dcc->cmd_lock); + } + + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -1322,6 +1360,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) #endif proc_create_single_data("victim_bits", 0444, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, + discard_plist_seq_show, sb); } return 0; put_feature_list_kobj: @@ -1345,6 +1385,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("discard_plist_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } From 4d8d45df2252980f800c1b2fde941a103a18a70e Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 31 Oct 2022 12:24:15 -0700 Subject: [PATCH 1440/4122] f2fs: correct i_size change for atomic writes We need to make sure i_size doesn't change until atomic write commit is successful and restore it when commit is failed. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/file.c | 18 +++++++++++------- fs/f2fs/inode.c | 5 ++++- fs/f2fs/segment.c | 14 ++++++++++---- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 04ef4cce3d7f..11c475beca2c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -768,6 +768,7 @@ enum { FI_COMPRESS_RELEASED, /* compressed blocks were released */ FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ + FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ FI_MAX, /* max flag, never be used */ }; @@ -826,6 +827,7 @@ struct f2fs_inode_info { unsigned int i_cluster_size; /* cluster size */ unsigned int atomic_write_cnt; + loff_t original_i_size; /* original i_size before atomic write */ }; static inline void get_extent_info(struct extent_info *ext, @@ -3075,6 +3077,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode, set_inode_flag(inode, FI_AUTO_RECOVER); } +static inline bool f2fs_is_atomic_file(struct inode *inode); + static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); @@ -3084,6 +3088,10 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) return; i_size_write(inode, i_size); + + if (f2fs_is_atomic_file(inode)) + return; + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c605a4f2bce2..28f586e77999 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2041,6 +2041,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inode *pinode; + loff_t isize; int ret; if (!inode_owner_or_capable(mnt_userns, inode)) @@ -2099,7 +2100,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_up_write(&fi->i_gc_rwsem[WRITE]); goto out; } - f2fs_i_size_write(fi->cow_inode, i_size_read(inode)); + + f2fs_write_inode(inode, NULL); + + isize = i_size_read(inode); + fi->original_i_size = isize; + f2fs_i_size_write(fi->cow_inode, isize); stat_inc_atomic_inode(inode); @@ -2137,16 +2143,14 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (f2fs_is_atomic_file(inode)) { ret = f2fs_commit_atomic_write(inode); - if (ret) - goto unlock_out; - - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); if (!ret) - f2fs_abort_atomic_write(inode, false); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + + f2fs_abort_atomic_write(inode, ret); } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } -unlock_out: + inode_unlock(inode); mnt_drop_write_file(filp); return ret; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9f0d3864d9f1..577f109b4e1d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -621,9 +621,12 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); + if (!f2fs_is_atomic_file(inode) || + is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) + ri->i_size = cpu_to_le64(i_size_read(inode)); + if (et) { read_lock(&et->lock); set_raw_extent(&et->largest, &ri->i_ext); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa4be7f25963..8aa81238c770 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,14 +192,18 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; - if (clean) - truncate_inode_pages_final(inode->i_mapping); clear_inode_flag(fi->cow_inode, FI_COW_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); stat_dec_atomic_inode(inode); + + if (clean) { + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, fi->original_i_size); + } } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@ -335,10 +339,12 @@ next: } out: - if (ret) + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; - else + } else { sbi->committed_atomic_block += fi->atomic_write_cnt; + set_inode_flag(inode, FI_ATOMIC_COMMITTED); + } __complete_revoke_list(inode, &revoke_list, ret ? true : false); From cc249e4cba9a6002c9d9e1438daf8440a160bc9e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 6 Nov 2022 21:25:44 +0800 Subject: [PATCH 1441/4122] f2fs: fix to avoid accessing uninitialized spinlock syzbot reports a kernel bug: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106 assign_lock_key+0x22a/0x240 kernel/locking/lockdep.c:981 register_lock_class+0x287/0x9b0 kernel/locking/lockdep.c:1294 __lock_acquire+0xe4/0x1f60 kernel/locking/lockdep.c:4934 lock_acquire+0x1a7/0x400 kernel/locking/lockdep.c:5668 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:350 [inline] f2fs_save_errors fs/f2fs/super.c:3868 [inline] f2fs_handle_error+0x29/0x230 fs/f2fs/super.c:3896 f2fs_iget+0x215/0x4bb0 fs/f2fs/inode.c:516 f2fs_fill_super+0x47d3/0x7b50 fs/f2fs/super.c:4222 mount_bdev+0x26c/0x3a0 fs/super.c:1401 legacy_get_tree+0xea/0x180 fs/fs_context.c:610 vfs_get_tree+0x88/0x270 fs/super.c:1531 do_new_mount+0x289/0xad0 fs/namespace.c:3040 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount+0x2e3/0x3d0 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd F2FS-fs (loop1): Failed to read F2FS meta data inode The root cause is if sbi->error_lock may be accessed before its initialization, fix it. Link: https://lore.kernel.org/linux-f2fs-devel/0000000000007edb6605ecbb6442@google.com/T/#u Reported-by: syzbot+40642be9b7e0bb28e0df@syzkaller.appspotmail.com Fixes: 95fa90c9e5a7 ("f2fs: support recording errors into superblock") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a43d8a46a6e5..68a6c2eedcac 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4196,6 +4196,9 @@ try_onemore: if (err) goto free_bio_info; + spin_lock_init(&sbi->error_lock); + memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); + init_f2fs_rwsem(&sbi->cp_rwsem); init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); @@ -4263,9 +4266,6 @@ try_onemore: goto free_devices; } - spin_lock_init(&sbi->error_lock); - memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); - sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, From 59237a21776f70ffb0420611c23e7158e1317037 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Nov 2022 22:33:21 +0800 Subject: [PATCH 1442/4122] f2fs: optimize iteration over sparse directories Wei Chen reports a kernel bug as blew: INFO: task syz-executor.0:29056 blocked for more than 143 seconds. Not tainted 5.15.0-rc5 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor.0 state:D stack:14632 pid:29056 ppid: 6574 flags:0x00000004 Call Trace: __schedule+0x4a1/0x1720 schedule+0x36/0xe0 rwsem_down_write_slowpath+0x322/0x7a0 fscrypt_ioctl_set_policy+0x11f/0x2a0 __f2fs_ioctl+0x1a9f/0x5780 f2fs_ioctl+0x89/0x3a0 __x64_sys_ioctl+0xe8/0x140 do_syscall_64+0x34/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Eric did some investigation on this issue, quoted from reply of Eric: "Well, the quality of this bug report has a lot to be desired (not on upstream kernel, reproducer is full of totally irrelevant stuff, not sent to the mailing list of the filesystem whose disk image is being fuzzed, etc.). But what is going on is that f2fs_empty_dir() doesn't consider the case of a directory with an extremely large i_size on a malicious disk image. Specifically, the reproducer mounts an f2fs image with a directory that has an i_size of 14814520042850357248, then calls FS_IOC_SET_ENCRYPTION_POLICY on it. That results in a call to f2fs_empty_dir() to check whether the directory is empty. f2fs_empty_dir() then iterates through all 3616826182336513 blocks the directory allegedly contains to check whether any contain anything. i_rwsem is held during this, so anything else that tries to take it will hang." In order to solve this issue, let's use f2fs_get_next_page_offset() to speed up iteration by skipping holes for all below functions: - f2fs_empty_dir - f2fs_readdir - find_in_level The way why we can speed up iteration was described in 'commit 3cf4574705b4 ("f2fs: introduce get_next_page_offset to speed up SEEK_DATA")'. Meanwhile, in f2fs_empty_dir(), let's use f2fs_find_data_page() instead f2fs_get_lock_data_page(), due to i_rwsem was held in caller of f2fs_empty_dir(), there shouldn't be any races, so it's fine to not lock dentry page during lookuping dirents in the page. Link: https://lore.kernel.org/lkml/536944df-a0ae-1dd8-148f-510b476e1347@kernel.org/T/ Reported-by: Wei Chen Cc: Eric Biggers Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 ++++++++++++----- fs/f2fs/dir.c | 34 ++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/gc.c | 4 ++-- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a71e818cd67b..9b47ded653d1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1206,7 +1206,8 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) } struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write) + blk_opf_t op_flags, bool for_write, + pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; @@ -1232,12 +1233,17 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + if (err == -ENOENT && next_pgofs) + *next_pgofs = f2fs_get_next_page_offset(&dn, index); goto put_err; + } f2fs_put_dnode(&dn); if (unlikely(dn.data_blkaddr == NULL_ADDR)) { err = -ENOENT; + if (next_pgofs) + *next_pgofs = index + 1; goto put_err; } if (dn.data_blkaddr != NEW_ADDR && @@ -1281,7 +1287,8 @@ put_err: return ERR_PTR(err); } -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -1291,7 +1298,7 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = f2fs_get_read_data_page(inode, index, 0, false); + page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); if (IS_ERR(page)) return page; @@ -1317,7 +1324,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = f2fs_get_read_data_page(inode, index, 0, for_write); + page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); if (IS_ERR(page)) return page; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 21960a899b6a..030b7fd4142f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -340,6 +340,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; + pgoff_t next_pgofs; bool room = false; int max_slots; @@ -350,12 +351,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, le32_to_cpu(fname->hash) % nbucket); end_block = bidx + nblock; - for (; bidx < end_block; bidx++) { + while (bidx < end_block) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = f2fs_find_data_page(dir, bidx); + dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) { room = true; + bidx = next_pgofs; continue; } else { *res_page = dentry_page; @@ -376,6 +378,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, if (max_slots >= s) room = true; f2fs_put_page(dentry_page, 0); + + bidx++; } if (!de && room && F2FS_I(dir)->chash != fname->hash) { @@ -956,7 +960,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_dir(struct inode *dir) { - unsigned long bidx; + unsigned long bidx = 0; struct page *dentry_page; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; @@ -965,13 +969,17 @@ bool f2fs_empty_dir(struct inode *dir) if (f2fs_has_inline_dentry(dir)) return f2fs_empty_inline_dir(dir); - for (bidx = 0; bidx < nblock; bidx++) { - dentry_page = f2fs_get_lock_data_page(dir, bidx, false); + while (bidx < nblock) { + pgoff_t next_pgofs; + + dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) + if (PTR_ERR(dentry_page) == -ENOENT) { + bidx = next_pgofs; continue; - else + } else { return false; + } } dentry_blk = page_address(dentry_page); @@ -983,10 +991,12 @@ bool f2fs_empty_dir(struct inode *dir) NR_DENTRY_IN_BLOCK, bit_pos); - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; + + bidx++; } return true; } @@ -1104,7 +1114,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) goto out_free; } - for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { + for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) { + pgoff_t next_pgofs; /* allow readdir() to be interrupted */ if (fatal_signal_pending(current)) { @@ -1118,11 +1129,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_find_data_page(inode, n); + dentry_page = f2fs_find_data_page(inode, n, &next_pgofs); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { err = 0; + n = next_pgofs; continue; } else { goto out_free; @@ -1141,6 +1153,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } f2fs_put_page(dentry_page, 0); + + n++; } out_free: fscrypt_fname_free_buffer(&fstr); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 11c475beca2c..6a8cbf5bb187 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3807,8 +3807,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write); -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs); struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write); struct page *f2fs_get_new_data_page(struct inode *inode, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6466db75af5d..f1a46519a5fe 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1562,8 +1562,8 @@ next_step: continue; } - data_page = f2fs_get_read_data_page(inode, - start_bidx, REQ_RAHEAD, true); + data_page = f2fs_get_read_data_page(inode, start_bidx, + REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); From 92b4cf5b48955a4bdd15fe4e2067db8ebd87f04c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 9 Nov 2022 07:04:42 +0900 Subject: [PATCH 1443/4122] f2fs: initialize locks earlier in f2fs_fill_super() syzbot is reporting lockdep warning at f2fs_handle_error() [1], for spin_lock(&sbi->error_lock) is called before spin_lock_init() is called. For safe locking in error handling, move initialization of locks (and obvious structures) in f2fs_fill_super() to immediately after memory allocation. Link: https://syzkaller.appspot.com/bug?extid=40642be9b7e0bb28e0df [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Tested-by: syzbot Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 68a6c2eedcac..8f4fc3ad6765 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4103,6 +4103,24 @@ try_onemore: sbi->sb = sb; + /* initialize locks within allocated memory */ + init_f2fs_rwsem(&sbi->gc_lock); + mutex_init(&sbi->writepages); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); + spin_lock_init(&sbi->stat_lock); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); + init_waitqueue_head(&sbi->cp_wait); + spin_lock_init(&sbi->error_lock); + + for (i = 0; i < NR_INODE_TYPE; i++) { + INIT_LIST_HEAD(&sbi->inode_list[i]); + spin_lock_init(&sbi->inode_lock[i]); + } + mutex_init(&sbi->flush_lock); + /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { @@ -4126,6 +4144,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); + /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, @@ -4182,26 +4202,14 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_f2fs_rwsem(&sbi->gc_lock); - mutex_init(&sbi->writepages); - init_f2fs_rwsem(&sbi->cp_global_sem); - init_f2fs_rwsem(&sbi->node_write); - init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); - spin_lock_init(&sbi->stat_lock); err = f2fs_init_write_merge_io(sbi); if (err) goto free_bio_info; - spin_lock_init(&sbi->error_lock); - memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); - - init_f2fs_rwsem(&sbi->cp_rwsem); - init_f2fs_rwsem(&sbi->quota_sem); - init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); err = f2fs_init_iostat(sbi); @@ -4279,12 +4287,6 @@ try_onemore: limit_reserve_root(sbi); adjust_unusable_cap_perc(sbi); - for (i = 0; i < NR_INODE_TYPE; i++) { - INIT_LIST_HEAD(&sbi->inode_list[i]); - spin_lock_init(&sbi->inode_lock[i]); - } - mutex_init(&sbi->flush_lock); - f2fs_init_extent_cache_info(sbi); f2fs_init_ino_entry_info(sbi); From 967eaad1fed5f6335ea97a47d45214744dc57925 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 10 Nov 2022 17:15:01 +0800 Subject: [PATCH 1444/4122] f2fs: fix to set flush_merge opt and show noflush_merge Some minor modifications to flush_merge and related parameters: 1.The FLUSH_MERGE opt is set by default only in non-ro mode. 2.When ro and merge are set at the same time, an error is reported. 3.Display noflush_merge mount opt. Suggested-by: Chao Yu Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8f4fc3ad6765..75027ff85cd9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1353,6 +1353,12 @@ default_check: return -EINVAL; } + if ((f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb)) && + test_opt(sbi, FLUSH_MERGE)) { + f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); + return -EINVAL; + } + if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; @@ -1941,8 +1947,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_dentry"); else seq_puts(seq, ",noinline_dentry"); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) + if (test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); + else + seq_puts(seq, ",noflush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); else @@ -2073,7 +2081,8 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; - set_opt(sbi, FLUSH_MERGE); + if (!f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) + set_opt(sbi, FLUSH_MERGE); if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) { From 98b04dd0b4577894520493d96bc4623387767445 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 26 Oct 2022 02:11:21 -0400 Subject: [PATCH 1445/4122] PCI: Fix pci_device_is_present() for VFs by checking PF pci_device_is_present() previously didn't work for VFs because it reads the Vendor and Device ID, which are 0xffff for VFs, which looks like they aren't present. Check the PF instead. Wei Gong reported that if virtio I/O is in progress when the driver is unbound or "0" is written to /sys/.../sriov_numvfs, the virtio I/O operation hangs, which may result in output like this: task:bash state:D stack: 0 pid: 1773 ppid: 1241 flags:0x00004002 Call Trace: schedule+0x4f/0xc0 blk_mq_freeze_queue_wait+0x69/0xa0 blk_mq_freeze_queue+0x1b/0x20 blk_cleanup_queue+0x3d/0xd0 virtblk_remove+0x3c/0xb0 [virtio_blk] virtio_dev_remove+0x4b/0x80 ... device_unregister+0x1b/0x60 unregister_virtio_device+0x18/0x30 virtio_pci_remove+0x41/0x80 pci_device_remove+0x3e/0xb0 This happened because pci_device_is_present(VF) returned "false" in virtio_pci_remove(), so it called virtio_break_device(). The broken vq meant that vring_interrupt() skipped the vq.callback() that would have completed the virtio I/O operation via virtblk_done(). [bhelgaas: commit log, simplify to always use pci_physfn(), add stable tag] Link: https://lore.kernel.org/r/20221026060912.173250-1-mst@redhat.com Reported-by: Wei Gong Tested-by: Wei Gong Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9f3cc829dfee..fba95486caaf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6447,6 +6447,8 @@ bool pci_device_is_present(struct pci_dev *pdev) { u32 v; + /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */ + pdev = pci_physfn(pdev); if (pci_dev_is_disconnected(pdev)) return false; return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); From c42edde5de3af6285fbb38c9d503a40ef491d10d Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:37 +0200 Subject: [PATCH 1446/4122] i2c: designware: Fix slave state machine for sequential reads Some read types from I2C bus don't work correctly when testing the i2c-designware-slave.c with the slave-eeprom backend. The same reads work correctly when testing with a real 24c02 EEPROM chip. In the following tests an i2c-designware-slave.c instance with the slave-eeprom backend is configured to act as a simulated 24c02 at address 0x65 on an I2C host bus 6: 1. i2cdump -y 6 0x65 b (OK) Random read. Each byte are read using a byte address write with a current address read in a same message. 2. i2cdump -y 6 0x65 c (OK, was NOK before commit 3b5f7f10ff6e when it was repeating the 1st byte) Repeated current address read. One byte address write message followed by repeated current address read messages. 3. i2cdump -y 6 0x65 i (NOK, each 32 byte block repeats the 1st byte of block) Sequential read using SMBus Block Read. For each 32 byte block a byte address write followed by 32 sequental reads in a same message. These findings are explained because the implementation has had a mismatch between hardware interrupts and what I2C slave events should be sent after those interrupts. Despite that the case 1 happened to have always the I2C slave events sent to a right order with a right data between backend and the I2C bus. Hardware generates the DW_IC_INTR_RD_REQ interrupt when another host is attempting to read and for sequential reads after. DW_IC_INTR_RX_DONE occurs when host does not acknowledge a transmitted byte which is an indication the end of transmission. Those interrupts do not match directly with I2C_SLAVE_READ_REQUESTED and I2C_SLAVE_READ_PROCESSED events which is how the code was and is practically using them. The slave-eeprom backend increases the buffer index with the I2C_SLAVE_READ_PROCESSED event and returns the data from current index when receiving only the I2C_SLAVE_READ_REQUESTED event. That explains the repeated bytes in case 3 and also case 2 before commit 3b5f7f10ff6e ("i2c: designware: slave should do WRITE_REQUESTED before WRITE_RECEIVED"). Patch fixes the case 3 while keep cases 1 and 2 working with following changes: - First DW_IC_INTR_RD_REQ interrupt will change the state machine to read in progress state, send I2C_SLAVE_READ_REQUESTED event and transmit the first byte from backend - Subsequent DW_IC_INTR_RD_REQ interrupts will send I2C_SLAVE_READ_PROCESSED events and transmit next bytes from backend - STOP won't change the state machine. Otherwise case 2 won't work since we cannot distinguish current address read from sequentiel read - DW_IC_INTR_RX_DONE interrupt is needless since there is no mechanism to inform it to a backend. It cannot be used to change state machine at the end of read either due the same reason than above - Next host write to us will change the state machine from read to write in progress state - STATUS_WRITE_IN_PROGRESS and STATUS_READ_IN_PROGRESS are considered now to be status flags not the state of the driver. This is how we treat them in i2c-designware-master.c While at it do not test the return code from i2c_slave_event() for I2C_SLAVE_READ_REQUESTED and I2C_SLAVE_READ_PROCESSED since it returns always 0. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 1 - drivers/i2c/busses/i2c-designware-slave.c | 32 +++++++++++------------ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 4d3a3b464ecd..dbf6bdc5f01b 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -103,7 +103,6 @@ #define DW_IC_INTR_MASTER_MASK (DW_IC_INTR_DEFAULT_MASK | \ DW_IC_INTR_TX_EMPTY) #define DW_IC_INTR_SLAVE_MASK (DW_IC_INTR_DEFAULT_MASK | \ - DW_IC_INTR_RX_DONE | \ DW_IC_INTR_RX_UNDER | \ DW_IC_INTR_RD_REQ) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 0d15f4c1e9f7..1eac4f4d5573 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -173,8 +173,9 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) enabled, slave_activity, raw_stat, stat); if (stat & DW_IC_INTR_RX_FULL) { - if (dev->status != STATUS_WRITE_IN_PROGRESS) { - dev->status = STATUS_WRITE_IN_PROGRESS; + if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) { + dev->status |= STATUS_WRITE_IN_PROGRESS; + dev->status &= ~STATUS_READ_IN_PROGRESS; i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val); } @@ -190,24 +191,23 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (slave_activity) { regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - dev->status = STATUS_READ_IN_PROGRESS; - if (!i2c_slave_event(dev->slave, - I2C_SLAVE_READ_REQUESTED, - &val)) - regmap_write(dev->map, DW_IC_DATA_CMD, val); + if (!(dev->status & STATUS_READ_IN_PROGRESS)) { + i2c_slave_event(dev->slave, + I2C_SLAVE_READ_REQUESTED, + &val); + dev->status |= STATUS_READ_IN_PROGRESS; + dev->status &= ~STATUS_WRITE_IN_PROGRESS; + } else { + i2c_slave_event(dev->slave, + I2C_SLAVE_READ_PROCESSED, + &val); + } + regmap_write(dev->map, DW_IC_DATA_CMD, val); } } - if (stat & DW_IC_INTR_RX_DONE) { - if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED, - &val)) - regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp); - } - - if (stat & DW_IC_INTR_STOP_DET) { - dev->status = STATUS_IDLE; + if (stat & DW_IC_INTR_STOP_DET) i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - } return 1; } From dcf1bf648f94d651a3e6fb433e9ba0a3e43c1047 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:38 +0200 Subject: [PATCH 1447/4122] i2c: designware: Empty receive FIFO in slave interrupt handler Writes from I2C bus often fail when testing the i2c-designware-slave.c with the slave-eeprom backend. The same writes work correctly when testing with a real 24c02 EEPROM chip. In the tests below an i2c-designware-slave.c instance with the slave-eeprom backend is configured to act as a simulated 24c02 at address 0x65 on an I2C host bus 6. 1. i2cset -y 6 0x65 0x00 0x55 Single byte 0x55 write into address 0x00. No data goes into simulated EEPROM. Debug prints from the i2c_dw_irq_handler_slave(): 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x714 : INTR_STAT=0x204 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 2. i2ctransfer -y 6 w9@0x65 0x00 0xff- Write 8 bytes with decrementing value starting from 0xff at address 0x00 and forward. Only some of the data goes into arbitrary addresses. Content is something like below but varies: 00000000 f9 f8 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 00000050 00 00 00 00 00 00 ff fe 00 00 00 00 00 00 00 00 |................| 000000f0 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fb fa |................| In this case debug prints were: 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x714 : INTR_STAT=0x204 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x510 : INTR_STAT=0x0 Both cases show there is more data coming from the receive FIFO still after detecting the STOP condition. This can be seen from interrupt status bits DW_IC_INTR_STOP_DET (0x200) and DW_IC_INTR_RX_FULL (0x4). Perhaps due interrupt latencies the receive FIFO is not read fast enough, STOP detection happens synchronously when it occurs on the I2C bus and the DW_IC_INTR_RX_FULL keeps coming as long as there are more bytes in the receive FIFO. Fix this by reading the receive FIFO completely empty whenever DW_IC_INTR_RX_FULL occurs. Use RFNE, Receive FIFO Not Empty bit in the DW_IC_STATUS register to loop through bytes in the FIFO. While at it do not test the return code from i2c_slave_event() for the I2C_SLAVE_WRITE_RECEIVED since to my understanding this hardware cannot generate NACK to incoming bytes and debug print itself does not have much value. Reported-by: Tian Ye Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 1 + drivers/i2c/busses/i2c-designware-slave.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index dbf6bdc5f01b..6d1df28dd93b 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -108,6 +108,7 @@ #define DW_IC_STATUS_ACTIVITY BIT(0) #define DW_IC_STATUS_TFE BIT(2) +#define DW_IC_STATUS_RFNE BIT(3) #define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) #define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 1eac4f4d5573..295774a69b67 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -180,11 +180,13 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) &val); } - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, - &val)) - dev_vdbg(dev->dev, "Byte %X acked!", val); + do { + regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); + val = tmp; + i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, + &val); + regmap_read(dev->map, DW_IC_STATUS, &tmp); + } while (tmp & DW_IC_STATUS_RFNE); } if (stat & DW_IC_INTR_RD_REQ) { From 4d827824b7bb29ce944172f6297db6d7a1ec37b3 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:39 +0200 Subject: [PATCH 1448/4122] i2c: designware: Define software status flags with BIT() Define software status flags with a BIT() macro. While at it remove STATUS_IDLE and replace its use with zero initialization and status flags clearing with a mask. Suggested-by: Andy Shevchenko Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 10 +++++----- drivers/i2c/busses/i2c-designware-master.c | 4 ++-- drivers/i2c/busses/i2c-designware-slave.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 6d1df28dd93b..457e6966f85e 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -123,12 +123,12 @@ #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK GENMASK(3, 2) /* - * status codes + * Sofware status flags */ -#define STATUS_IDLE 0x0 -#define STATUS_ACTIVE 0x1 -#define STATUS_WRITE_IN_PROGRESS 0x2 -#define STATUS_READ_IN_PROGRESS 0x4 +#define STATUS_ACTIVE BIT(0) +#define STATUS_WRITE_IN_PROGRESS BIT(1) +#define STATUS_READ_IN_PROGRESS BIT(2) +#define STATUS_MASK GENMASK(2, 0) /* * operation modes diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index dc3c5a15a95b..1b7db2b58f31 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -574,7 +574,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) dev->msg_write_idx = 0; dev->msg_read_idx = 0; dev->msg_err = 0; - dev->status = STATUS_IDLE; + dev->status = 0; dev->abort_source = 0; dev->rx_outstanding = 0; @@ -731,7 +731,7 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev) if (stat & DW_IC_INTR_TX_ABRT) { dev->cmd_err |= DW_IC_ERR_TX_ABRT; - dev->status = STATUS_IDLE; + dev->status &= ~STATUS_MASK; dev->rx_outstanding = 0; /* diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 295774a69b67..84eb0bec70fa 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -82,7 +82,7 @@ static int i2c_dw_reg_slave(struct i2c_client *slave) dev->msg_write_idx = 0; dev->msg_read_idx = 0; dev->msg_err = 0; - dev->status = STATUS_IDLE; + dev->status = 0; dev->abort_source = 0; dev->rx_outstanding = 0; From 40015c67533548986eaba42a3d9ba9d004bee41e Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:40 +0200 Subject: [PATCH 1449/4122] i2c: designware: Remove needless initializations from i2c_dw_reg_slave() These struct dw_i2c_dev members are not used in i2c-designware-slave.c so remove re-initialization of them from i2c_dw_reg_slave(). Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 84eb0bec70fa..421a604bf68f 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -78,13 +78,7 @@ static int i2c_dw_reg_slave(struct i2c_client *slave) __i2c_dw_enable(dev); - dev->cmd_err = 0; - dev->msg_write_idx = 0; - dev->msg_read_idx = 0; - dev->msg_err = 0; dev->status = 0; - dev->abort_source = 0; - dev->rx_outstanding = 0; return 0; } From 5cd69850308f7da3c2bfc117fca5fbfa3c530ade Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:41 +0200 Subject: [PATCH 1450/4122] i2c: designware: Remove unused completion code from i2c-designware-slave Remove unused completion code from i2c-designware-slave.c. Used only in i2c-designware-master.c. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 421a604bf68f..12f0417aa0ae 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -214,8 +214,6 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) int ret; ret = i2c_dw_irq_handler_slave(dev); - if (ret > 0) - complete(&dev->cmd_complete); return IRQ_RETVAL(ret); } @@ -242,8 +240,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev) struct i2c_adapter *adap = &dev->adapter; int ret; - init_completion(&dev->cmd_complete); - dev->init = i2c_dw_init_slave; dev->disable = i2c_dw_disable; dev->disable_int = i2c_dw_disable_int; From 4c7107c2974270ff369342f6f1bd5d3669c182f1 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:42 +0200 Subject: [PATCH 1451/4122] i2c: designware: Simplify slave interrupt handler nesting Interrupt processing code in i2c-designware-slave.c is bit more readable if not divided into another subroutine. Also explicit IRQ_NONE and IRQ_HANDLED return values are more obvious. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 12f0417aa0ae..3c855cd45c34 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -147,9 +147,9 @@ static u32 i2c_dw_read_clear_intrbits_slave(struct dw_i2c_dev *dev) * Interrupt service routine. This gets called whenever an I2C slave interrupt * occurs. */ - -static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) +static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) { + struct dw_i2c_dev *dev = dev_id; u32 raw_stat, stat, enabled, tmp; u8 val = 0, slave_activity; @@ -159,7 +159,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) slave_activity = ((tmp & DW_IC_STATUS_SLAVE_ACTIVITY) >> 6); if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) - return 0; + return IRQ_NONE; stat = i2c_dw_read_clear_intrbits_slave(dev); dev_dbg(dev->dev, @@ -205,17 +205,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (stat & DW_IC_INTR_STOP_DET) i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - return 1; -} - -static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) -{ - struct dw_i2c_dev *dev = dev_id; - int ret; - - ret = i2c_dw_irq_handler_slave(dev); - - return IRQ_RETVAL(ret); + return IRQ_HANDLED; } static const struct i2c_algorithm i2c_dw_algo = { From cdbd2f169bf1aff9f679a3e07d62244fc4341968 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:43 +0200 Subject: [PATCH 1452/4122] i2c: designware: Do not process interrupt when device is suspended Do not return with interrupt handled if host controller is off and thus interrupt is originating from other device or is spurious. Add a check to detect when controller is runtime suspended or transitioning/reset. In latter case all raw interrupt status register bits may read one. In both cases return IRQ_NONE to indicate interrupt was not from this device. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 1b7db2b58f31..d6fcf955dfc0 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -778,6 +778,8 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) return IRQ_NONE; + if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0)) + return IRQ_NONE; i2c_dw_irq_handler_master(dev); From 184c475ace922e4029f157080be559d49d70009f Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:44 +0200 Subject: [PATCH 1453/4122] i2c: designware: Move debug print in i2c_dw_isr() It is kind of needless to print interrupt status when code immediately after that finds interrupt was not originating from this device. Therefore move it after spurious interrupt detection. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index d6fcf955dfc0..9c2c9d002dc3 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -775,11 +775,11 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) regmap_read(dev->map, DW_IC_ENABLE, &enabled); regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat); - dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) return IRQ_NONE; if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0)) return IRQ_NONE; + dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); i2c_dw_irq_handler_master(dev); From a92c3388b4ce34ee83f8ea398c1e00676a3dc467 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:45 +0200 Subject: [PATCH 1454/4122] i2c: designware: Simplify master interrupt handler nesting In my opinion a few lines of spurious interrupt detection code can be moved to the actual master interrupt handling function i2c_dw_isr() without hurting readability. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 33 ++++++++-------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 9c2c9d002dc3..dfb499e54c05 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -711,9 +711,18 @@ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev) * Interrupt service routine. This gets called whenever an I2C master interrupt * occurs. */ -static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev) +static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) { - u32 stat; + struct dw_i2c_dev *dev = dev_id; + u32 stat, enabled; + + regmap_read(dev->map, DW_IC_ENABLE, &enabled); + regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat); + if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) + return IRQ_NONE; + if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0)) + return IRQ_NONE; + dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); stat = i2c_dw_read_clear_intrbits(dev); @@ -726,7 +735,7 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev) * the HW active). */ regmap_write(dev->map, DW_IC_INTR_MASK, 0); - return 0; + return IRQ_HANDLED; } if (stat & DW_IC_INTR_TX_ABRT) { @@ -765,24 +774,6 @@ tx_aborted: regmap_write(dev->map, DW_IC_INTR_MASK, stat); } - return 0; -} - -static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) -{ - struct dw_i2c_dev *dev = dev_id; - u32 stat, enabled; - - regmap_read(dev->map, DW_IC_ENABLE, &enabled); - regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat); - if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) - return IRQ_NONE; - if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0)) - return IRQ_NONE; - dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); - - i2c_dw_irq_handler_master(dev); - return IRQ_HANDLED; } From fee61247b7f67a628bb24314b1a3a20d1f1c60f0 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:46 +0200 Subject: [PATCH 1455/4122] i2c: designware: Remove common i2c_dw_disable_int() Commit 90312351fd1e ("i2c: designware: MASTER mode as separated driver") introduced disable_int pointer but there is no real use for it. Both i2c-designware-master.c and i2c-designware-slave.c set it to the same i2c_dw_disable_int() and scope is inside the same kernel module. Since i2c_dw_disable_int() is just masking interrupts and the direct DW_IC_INTR_MASK register write looks more clear in the code use that and remove it from common code. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-common.c | 5 ----- drivers/i2c/busses/i2c-designware-core.h | 3 --- drivers/i2c/busses/i2c-designware-master.c | 9 ++++----- drivers/i2c/busses/i2c-designware-slave.c | 3 +-- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index c023b691441e..a3240ece55b2 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -625,10 +625,5 @@ void i2c_dw_disable(struct dw_i2c_dev *dev) i2c_dw_release_lock(dev); } -void i2c_dw_disable_int(struct dw_i2c_dev *dev) -{ - regmap_write(dev->map, DW_IC_INTR_MASK, 0); -} - MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 457e6966f85e..49e5860b1665 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -232,7 +232,6 @@ struct reset_control; * -1 if there is no semaphore. * @shared_with_punit: true if this bus is shared with the SoCs PUNIT * @disable: function to disable the controller - * @disable_int: function to disable all interrupts * @init: function to initialize the I2C hardware * @set_sda_hold_time: callback to retrieve IP specific SDA hold timing * @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE @@ -290,7 +289,6 @@ struct dw_i2c_dev { int semaphore_idx; bool shared_with_punit; void (*disable)(struct dw_i2c_dev *dev); - void (*disable_int)(struct dw_i2c_dev *dev); int (*init)(struct dw_i2c_dev *dev); int (*set_sda_hold_time)(struct dw_i2c_dev *dev); int mode; @@ -331,7 +329,6 @@ int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev); int i2c_dw_set_fifo_size(struct dw_i2c_dev *dev); u32 i2c_dw_func(struct i2c_adapter *adap); void i2c_dw_disable(struct dw_i2c_dev *dev); -void i2c_dw_disable_int(struct dw_i2c_dev *dev); static inline void __i2c_dw_enable(struct dw_i2c_dev *dev) { diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index dfb499e54c05..45f569155bfe 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -239,7 +239,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) msgs[dev->msg_write_idx].addr | ic_tar); /* Enforce disabled interrupts (due to HW issues) */ - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); /* Enable the adapter */ __i2c_dw_enable(dev); @@ -299,7 +299,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; i2c_dw_xfer_init(dev); - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); /* Initiate messages read/write transaction */ for (msg_wrt_idx = 0; msg_wrt_idx < num_msgs; msg_wrt_idx++) { @@ -770,7 +770,7 @@ tx_aborted: else if (unlikely(dev->flags & ACCESS_INTR_MASK)) { /* Workaround to trigger pending interrupt */ regmap_read(dev->map, DW_IC_INTR_MASK, &stat); - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); regmap_write(dev->map, DW_IC_INTR_MASK, stat); } @@ -871,7 +871,6 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) dev->init = i2c_dw_init_master; dev->disable = i2c_dw_disable; - dev->disable_int = i2c_dw_disable_int; ret = i2c_dw_init_regmap(dev); if (ret) @@ -910,7 +909,7 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) if (ret) return ret; - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); i2c_dw_release_lock(dev); ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags, diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 3c855cd45c34..c6d2e4c2ac23 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -87,7 +87,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave) { struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter); - dev->disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); dev->disable(dev); synchronize_irq(dev->irq); dev->slave = NULL; @@ -232,7 +232,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev) dev->init = i2c_dw_init_slave; dev->disable = i2c_dw_disable; - dev->disable_int = i2c_dw_disable_int; ret = i2c_dw_init_regmap(dev); if (ret) From 966b7d3c738ab8d82363eff9e4c62e429697893e Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:47 +0200 Subject: [PATCH 1456/4122] i2c: designware: Align defines in i2c-designware-core.h Align all defines to the same column. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 218 +++++++++++------------ 1 file changed, 109 insertions(+), 109 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 49e5860b1665..0668888d557d 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -18,12 +18,12 @@ #include #include -#define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C | \ - I2C_FUNC_SMBUS_BYTE | \ - I2C_FUNC_SMBUS_BYTE_DATA | \ - I2C_FUNC_SMBUS_WORD_DATA | \ - I2C_FUNC_SMBUS_BLOCK_DATA | \ - I2C_FUNC_SMBUS_I2C_BLOCK) +#define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C | \ + I2C_FUNC_SMBUS_BYTE | \ + I2C_FUNC_SMBUS_BYTE_DATA | \ + I2C_FUNC_SMBUS_WORD_DATA | \ + I2C_FUNC_SMBUS_BLOCK_DATA | \ + I2C_FUNC_SMBUS_I2C_BLOCK) #define DW_IC_CON_MASTER BIT(0) #define DW_IC_CON_SPEED_STD (1 << 1) @@ -43,81 +43,81 @@ /* * Registers offset */ -#define DW_IC_CON 0x00 -#define DW_IC_TAR 0x04 -#define DW_IC_SAR 0x08 -#define DW_IC_DATA_CMD 0x10 -#define DW_IC_SS_SCL_HCNT 0x14 -#define DW_IC_SS_SCL_LCNT 0x18 -#define DW_IC_FS_SCL_HCNT 0x1c -#define DW_IC_FS_SCL_LCNT 0x20 -#define DW_IC_HS_SCL_HCNT 0x24 -#define DW_IC_HS_SCL_LCNT 0x28 -#define DW_IC_INTR_STAT 0x2c -#define DW_IC_INTR_MASK 0x30 -#define DW_IC_RAW_INTR_STAT 0x34 -#define DW_IC_RX_TL 0x38 -#define DW_IC_TX_TL 0x3c -#define DW_IC_CLR_INTR 0x40 -#define DW_IC_CLR_RX_UNDER 0x44 -#define DW_IC_CLR_RX_OVER 0x48 -#define DW_IC_CLR_TX_OVER 0x4c -#define DW_IC_CLR_RD_REQ 0x50 -#define DW_IC_CLR_TX_ABRT 0x54 -#define DW_IC_CLR_RX_DONE 0x58 -#define DW_IC_CLR_ACTIVITY 0x5c -#define DW_IC_CLR_STOP_DET 0x60 -#define DW_IC_CLR_START_DET 0x64 -#define DW_IC_CLR_GEN_CALL 0x68 -#define DW_IC_ENABLE 0x6c -#define DW_IC_STATUS 0x70 -#define DW_IC_TXFLR 0x74 -#define DW_IC_RXFLR 0x78 -#define DW_IC_SDA_HOLD 0x7c -#define DW_IC_TX_ABRT_SOURCE 0x80 -#define DW_IC_ENABLE_STATUS 0x9c -#define DW_IC_CLR_RESTART_DET 0xa8 -#define DW_IC_COMP_PARAM_1 0xf4 -#define DW_IC_COMP_VERSION 0xf8 -#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A -#define DW_IC_COMP_TYPE 0xfc -#define DW_IC_COMP_TYPE_VALUE 0x44570140 +#define DW_IC_CON 0x00 +#define DW_IC_TAR 0x04 +#define DW_IC_SAR 0x08 +#define DW_IC_DATA_CMD 0x10 +#define DW_IC_SS_SCL_HCNT 0x14 +#define DW_IC_SS_SCL_LCNT 0x18 +#define DW_IC_FS_SCL_HCNT 0x1c +#define DW_IC_FS_SCL_LCNT 0x20 +#define DW_IC_HS_SCL_HCNT 0x24 +#define DW_IC_HS_SCL_LCNT 0x28 +#define DW_IC_INTR_STAT 0x2c +#define DW_IC_INTR_MASK 0x30 +#define DW_IC_RAW_INTR_STAT 0x34 +#define DW_IC_RX_TL 0x38 +#define DW_IC_TX_TL 0x3c +#define DW_IC_CLR_INTR 0x40 +#define DW_IC_CLR_RX_UNDER 0x44 +#define DW_IC_CLR_RX_OVER 0x48 +#define DW_IC_CLR_TX_OVER 0x4c +#define DW_IC_CLR_RD_REQ 0x50 +#define DW_IC_CLR_TX_ABRT 0x54 +#define DW_IC_CLR_RX_DONE 0x58 +#define DW_IC_CLR_ACTIVITY 0x5c +#define DW_IC_CLR_STOP_DET 0x60 +#define DW_IC_CLR_START_DET 0x64 +#define DW_IC_CLR_GEN_CALL 0x68 +#define DW_IC_ENABLE 0x6c +#define DW_IC_STATUS 0x70 +#define DW_IC_TXFLR 0x74 +#define DW_IC_RXFLR 0x78 +#define DW_IC_SDA_HOLD 0x7c +#define DW_IC_TX_ABRT_SOURCE 0x80 +#define DW_IC_ENABLE_STATUS 0x9c +#define DW_IC_CLR_RESTART_DET 0xa8 +#define DW_IC_COMP_PARAM_1 0xf4 +#define DW_IC_COMP_VERSION 0xf8 +#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A +#define DW_IC_COMP_TYPE 0xfc +#define DW_IC_COMP_TYPE_VALUE 0x44570140 -#define DW_IC_INTR_RX_UNDER BIT(0) -#define DW_IC_INTR_RX_OVER BIT(1) -#define DW_IC_INTR_RX_FULL BIT(2) -#define DW_IC_INTR_TX_OVER BIT(3) -#define DW_IC_INTR_TX_EMPTY BIT(4) -#define DW_IC_INTR_RD_REQ BIT(5) -#define DW_IC_INTR_TX_ABRT BIT(6) -#define DW_IC_INTR_RX_DONE BIT(7) -#define DW_IC_INTR_ACTIVITY BIT(8) -#define DW_IC_INTR_STOP_DET BIT(9) -#define DW_IC_INTR_START_DET BIT(10) -#define DW_IC_INTR_GEN_CALL BIT(11) -#define DW_IC_INTR_RESTART_DET BIT(12) +#define DW_IC_INTR_RX_UNDER BIT(0) +#define DW_IC_INTR_RX_OVER BIT(1) +#define DW_IC_INTR_RX_FULL BIT(2) +#define DW_IC_INTR_TX_OVER BIT(3) +#define DW_IC_INTR_TX_EMPTY BIT(4) +#define DW_IC_INTR_RD_REQ BIT(5) +#define DW_IC_INTR_TX_ABRT BIT(6) +#define DW_IC_INTR_RX_DONE BIT(7) +#define DW_IC_INTR_ACTIVITY BIT(8) +#define DW_IC_INTR_STOP_DET BIT(9) +#define DW_IC_INTR_START_DET BIT(10) +#define DW_IC_INTR_GEN_CALL BIT(11) +#define DW_IC_INTR_RESTART_DET BIT(12) -#define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \ - DW_IC_INTR_TX_ABRT | \ - DW_IC_INTR_STOP_DET) -#define DW_IC_INTR_MASTER_MASK (DW_IC_INTR_DEFAULT_MASK | \ - DW_IC_INTR_TX_EMPTY) -#define DW_IC_INTR_SLAVE_MASK (DW_IC_INTR_DEFAULT_MASK | \ - DW_IC_INTR_RX_UNDER | \ - DW_IC_INTR_RD_REQ) +#define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \ + DW_IC_INTR_TX_ABRT | \ + DW_IC_INTR_STOP_DET) +#define DW_IC_INTR_MASTER_MASK (DW_IC_INTR_DEFAULT_MASK | \ + DW_IC_INTR_TX_EMPTY) +#define DW_IC_INTR_SLAVE_MASK (DW_IC_INTR_DEFAULT_MASK | \ + DW_IC_INTR_RX_UNDER | \ + DW_IC_INTR_RD_REQ) -#define DW_IC_STATUS_ACTIVITY BIT(0) -#define DW_IC_STATUS_TFE BIT(2) -#define DW_IC_STATUS_RFNE BIT(3) -#define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) -#define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) +#define DW_IC_STATUS_ACTIVITY BIT(0) +#define DW_IC_STATUS_TFE BIT(2) +#define DW_IC_STATUS_RFNE BIT(3) +#define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) +#define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) -#define DW_IC_SDA_HOLD_RX_SHIFT 16 -#define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, 16) +#define DW_IC_SDA_HOLD_RX_SHIFT 16 +#define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, 16) -#define DW_IC_ERR_TX_ABRT 0x1 +#define DW_IC_ERR_TX_ABRT 0x1 -#define DW_IC_TAR_10BITADDR_MASTER BIT(12) +#define DW_IC_TAR_10BITADDR_MASTER BIT(12) #define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH (BIT(2) | BIT(3)) #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK GENMASK(3, 2) @@ -125,16 +125,16 @@ /* * Sofware status flags */ -#define STATUS_ACTIVE BIT(0) -#define STATUS_WRITE_IN_PROGRESS BIT(1) -#define STATUS_READ_IN_PROGRESS BIT(2) -#define STATUS_MASK GENMASK(2, 0) +#define STATUS_ACTIVE BIT(0) +#define STATUS_WRITE_IN_PROGRESS BIT(1) +#define STATUS_READ_IN_PROGRESS BIT(2) +#define STATUS_MASK GENMASK(2, 0) /* * operation modes */ -#define DW_IC_MASTER 0 -#define DW_IC_SLAVE 1 +#define DW_IC_MASTER 0 +#define DW_IC_SLAVE 1 /* * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register @@ -142,20 +142,20 @@ * Only expected abort codes are listed here * refer to the datasheet for the full list */ -#define ABRT_7B_ADDR_NOACK 0 -#define ABRT_10ADDR1_NOACK 1 -#define ABRT_10ADDR2_NOACK 2 -#define ABRT_TXDATA_NOACK 3 -#define ABRT_GCALL_NOACK 4 -#define ABRT_GCALL_READ 5 -#define ABRT_SBYTE_ACKDET 7 -#define ABRT_SBYTE_NORSTRT 9 -#define ABRT_10B_RD_NORSTRT 10 -#define ABRT_MASTER_DIS 11 -#define ARB_LOST 12 -#define ABRT_SLAVE_FLUSH_TXFIFO 13 -#define ABRT_SLAVE_ARBLOST 14 -#define ABRT_SLAVE_RD_INTX 15 +#define ABRT_7B_ADDR_NOACK 0 +#define ABRT_10ADDR1_NOACK 1 +#define ABRT_10ADDR2_NOACK 2 +#define ABRT_TXDATA_NOACK 3 +#define ABRT_GCALL_NOACK 4 +#define ABRT_GCALL_READ 5 +#define ABRT_SBYTE_ACKDET 7 +#define ABRT_SBYTE_NORSTRT 9 +#define ABRT_10B_RD_NORSTRT 10 +#define ABRT_MASTER_DIS 11 +#define ARB_LOST 12 +#define ABRT_SLAVE_FLUSH_TXFIFO 13 +#define ABRT_SLAVE_ARBLOST 14 +#define ABRT_SLAVE_RD_INTX 15 #define DW_IC_TX_ABRT_7B_ADDR_NOACK BIT(ABRT_7B_ADDR_NOACK) #define DW_IC_TX_ABRT_10ADDR1_NOACK BIT(ABRT_10ADDR1_NOACK) @@ -172,11 +172,11 @@ #define DW_IC_RX_ABRT_SLAVE_ARBLOST BIT(ABRT_SLAVE_ARBLOST) #define DW_IC_RX_ABRT_SLAVE_FLUSH_TXFIFO BIT(ABRT_SLAVE_FLUSH_TXFIFO) -#define DW_IC_TX_ABRT_NOACK (DW_IC_TX_ABRT_7B_ADDR_NOACK | \ - DW_IC_TX_ABRT_10ADDR1_NOACK | \ - DW_IC_TX_ABRT_10ADDR2_NOACK | \ - DW_IC_TX_ABRT_TXDATA_NOACK | \ - DW_IC_TX_ABRT_GCALL_NOACK) +#define DW_IC_TX_ABRT_NOACK (DW_IC_TX_ABRT_7B_ADDR_NOACK | \ + DW_IC_TX_ABRT_10ADDR1_NOACK | \ + DW_IC_TX_ABRT_10ADDR2_NOACK | \ + DW_IC_TX_ABRT_TXDATA_NOACK | \ + DW_IC_TX_ABRT_GCALL_NOACK) struct clk; struct device; @@ -295,21 +295,21 @@ struct dw_i2c_dev { struct i2c_bus_recovery_info rinfo; }; -#define ACCESS_INTR_MASK BIT(0) -#define ACCESS_NO_IRQ_SUSPEND BIT(1) -#define ARBITRATION_SEMAPHORE BIT(2) +#define ACCESS_INTR_MASK BIT(0) +#define ACCESS_NO_IRQ_SUSPEND BIT(1) +#define ARBITRATION_SEMAPHORE BIT(2) -#define MODEL_MSCC_OCELOT BIT(8) -#define MODEL_BAIKAL_BT1 BIT(9) -#define MODEL_AMD_NAVI_GPU BIT(10) -#define MODEL_MASK GENMASK(11, 8) +#define MODEL_MSCC_OCELOT BIT(8) +#define MODEL_BAIKAL_BT1 BIT(9) +#define MODEL_AMD_NAVI_GPU BIT(10) +#define MODEL_MASK GENMASK(11, 8) /* * Enable UCSI interrupt by writing 0xd at register * offset 0x474 specified in hardware specification. */ -#define AMD_UCSI_INTR_REG 0x474 -#define AMD_UCSI_INTR_EN 0xd +#define AMD_UCSI_INTR_REG 0x474 +#define AMD_UCSI_INTR_EN 0xd struct i2c_dw_semaphore_callbacks { int (*probe)(struct dw_i2c_dev *dev); From 4bae6da1cbf4658a92e6f58da30bf536746d1e5d Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 7 Nov 2022 15:42:48 +0200 Subject: [PATCH 1457/4122] i2c: designware: Add comment to custom register value constants DW_IC_COMP_VERSION register contains the ASCII representation of the Synopsys component version. Here 0x3131312A == "111*" means version 1.11* required for DW_IC_SDA_HOLD register availability where '*' means any letter starting from 'a'. DW_IC_COMP_TYPE is constant and is derived from two ASCII letters "DW" followed by a 16-bit unsigned number. Suggested-by: Andy Shevchenko Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 0668888d557d..95ebc5eaa5d1 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -79,9 +79,9 @@ #define DW_IC_CLR_RESTART_DET 0xa8 #define DW_IC_COMP_PARAM_1 0xf4 #define DW_IC_COMP_VERSION 0xf8 -#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A +#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A /* "111*" == v1.11* */ #define DW_IC_COMP_TYPE 0xfc -#define DW_IC_COMP_TYPE_VALUE 0x44570140 +#define DW_IC_COMP_TYPE_VALUE 0x44570140 /* "DW" + 0x0140 */ #define DW_IC_INTR_RX_UNDER BIT(0) #define DW_IC_INTR_RX_OVER BIT(1) From c57351a75d013c30e4a726aef1ad441676a99da4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sat, 12 Nov 2022 17:43:22 +0800 Subject: [PATCH 1458/4122] KVM: Push dirty information unconditionally to backup bitmap In mark_page_dirty_in_slot(), we bail out when no running vcpu exists and a running vcpu context is strictly required by architecture. It may cause backwards compatible issue. Currently, saving vgic/its tables is the only known case where no running vcpu context is expected. We may have other unknown cases where no running vcpu context exists and it's reported by the warning message and we bail out without pushing the dirty information to the backup bitmap. For this, the application is going to enable the backup bitmap for the unknown cases. However, the dirty information can't be pushed to the backup bitmap even though the backup bitmap is enabled for those unknown cases in the application, until the unknown cases are added to the allowed list of non-running vcpu context with extra code changes to the host kernel. In order to make the new application, where the backup bitmap has been enabled, to work with the unchanged host, we continue to push the dirty information to the backup bitmap instead of bailing out early. With the added check on 'memslot->dirty_bitmap' to mark_page_dirty_in_slot(), the kernel crash is avoided silently by the combined conditions: no running vcpu context, kvm_arch_allow_write_without_running_vcpu() returns 'true', and the backup bitmap (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) isn't enabled yet. Suggested-by: Sean Christopherson Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221112094322.21911-1-gshan@redhat.com --- virt/kvm/kvm_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be40d1ce6e91..0fa541ba8ab5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3308,8 +3308,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) return; - if (WARN_ON_ONCE(!kvm_arch_allow_write_without_running_vcpu(kvm) && !vcpu)) - return; + WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm)); #endif if (memslot && kvm_slot_dirty_track_enabled(memslot)) { @@ -3318,7 +3317,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, if (kvm->dirty_ring_size && vcpu) kvm_dirty_ring_push(vcpu, slot, rel_gfn); - else + else if (memslot->dirty_bitmap) set_bit_le(rel_gfn, memslot->dirty_bitmap); } } From 8502bee5584235943c4d371597c740d6779991db Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Thu, 10 Nov 2022 17:23:42 +0800 Subject: [PATCH 1459/4122] i2c: imx: use devm_platform_get_and_ioremap_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Reviewed-by: Mukesh Ojha Reviewed-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..1ce0cf7a323f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1449,8 +1449,7 @@ static int i2c_imx_probe(struct platform_device *pdev) if (irq < 0) return irq; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); From e826192cc26bd69746bbf22e6bdf72b87cb3d97b Mon Sep 17 00:00:00 2001 From: Arminder Singh Date: Sat, 5 Nov 2022 07:56:49 -0400 Subject: [PATCH 1460/4122] i2c: /pasemi: PASemi I2C controller IRQ enablement This patch adds IRQ support to the PASemi I2C controller driver to increase the performace of I2C transactions on platforms with PASemi I2C controllers. While primarily intended for Apple silicon platforms, this patch should also help in enabling IRQ support for older PASemi hardware as well should the need arise. This version of the patch has been tested on an M1 Ultra Mac Studio, as well as an M1 MacBook Pro, and userspace launches successfully while using the IRQ path for I2C transactions. Signed-off-by: Arminder Singh Reviewed-by: Sven Peter Reviewed-by: Hector Martin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pasemi-core.c | 32 ++++++++++++++++++++---- drivers/i2c/busses/i2c-pasemi-core.h | 5 ++++ drivers/i2c/busses/i2c-pasemi-platform.c | 6 +++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58cc0..7d54a9f34c74 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -21,6 +21,7 @@ #define REG_MTXFIFO 0x00 #define REG_MRXFIFO 0x04 #define REG_SMSTA 0x14 +#define REG_IMASK 0x18 #define REG_CTL 0x1c #define REG_REV 0x28 @@ -66,6 +67,7 @@ static void pasemi_reset(struct pasemi_smbus *smbus) val |= CTL_EN; reg_write(smbus, REG_CTL, val); + reinit_completion(&smbus->irq_completion); } static void pasemi_smb_clear(struct pasemi_smbus *smbus) @@ -78,14 +80,21 @@ static void pasemi_smb_clear(struct pasemi_smbus *smbus) static int pasemi_smb_waitready(struct pasemi_smbus *smbus) { - int timeout = 10; + int timeout = 100; unsigned int status; - status = reg_read(smbus, REG_SMSTA); - - while (!(status & SMSTA_XEN) && timeout--) { - msleep(1); + if (smbus->use_irq) { + reinit_completion(&smbus->irq_completion); + reg_write(smbus, REG_IMASK, SMSTA_XEN | SMSTA_MTN); + wait_for_completion_timeout(&smbus->irq_completion, msecs_to_jiffies(100)); + reg_write(smbus, REG_IMASK, 0); status = reg_read(smbus, REG_SMSTA); + } else { + status = reg_read(smbus, REG_SMSTA); + while (!(status & SMSTA_XEN) && timeout--) { + msleep(1); + status = reg_read(smbus, REG_SMSTA); + } } /* Got NACK? */ @@ -344,10 +353,14 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) /* set up the sysfs linkage to our parent device */ smbus->adapter.dev.parent = smbus->dev; + smbus->use_irq = 0; + init_completion(&smbus->irq_completion); if (smbus->hw_rev != PASEMI_HW_REV_PCI) smbus->hw_rev = reg_read(smbus, REG_REV); + reg_write(smbus, REG_IMASK, 0); + pasemi_reset(smbus); error = devm_i2c_add_adapter(smbus->dev, &smbus->adapter); @@ -356,3 +369,12 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } + +irqreturn_t pasemi_irq_handler(int irq, void *dev_id) +{ + struct pasemi_smbus *smbus = dev_id; + + reg_write(smbus, REG_IMASK, 0); + complete(&smbus->irq_completion); + return IRQ_HANDLED; +} diff --git a/drivers/i2c/busses/i2c-pasemi-core.h b/drivers/i2c/busses/i2c-pasemi-core.h index 4655124a37f3..88821f4e8a9f 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.h +++ b/drivers/i2c/busses/i2c-pasemi-core.h @@ -7,6 +7,7 @@ #include #include #include +#include #define PASEMI_HW_REV_PCI -1 @@ -16,6 +17,10 @@ struct pasemi_smbus { void __iomem *ioaddr; unsigned int clk_div; int hw_rev; + int use_irq; + struct completion irq_completion; }; int pasemi_i2c_common_probe(struct pasemi_smbus *smbus); + +irqreturn_t pasemi_irq_handler(int irq, void *dev_id); diff --git a/drivers/i2c/busses/i2c-pasemi-platform.c b/drivers/i2c/busses/i2c-pasemi-platform.c index 88a54aaf7e3c..e35945a91dbe 100644 --- a/drivers/i2c/busses/i2c-pasemi-platform.c +++ b/drivers/i2c/busses/i2c-pasemi-platform.c @@ -49,6 +49,7 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev) struct pasemi_smbus *smbus; u32 frequency; int error; + int irq_num; data = devm_kzalloc(dev, sizeof(struct pasemi_platform_i2c_data), GFP_KERNEL); @@ -82,6 +83,11 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev) if (error) goto out_clk_disable; + irq_num = platform_get_irq(pdev, 0); + error = devm_request_irq(smbus->dev, irq_num, pasemi_irq_handler, 0, "pasemi_apple_i2c", (void *)smbus); + + if (!error) + smbus->use_irq = 1; platform_set_drvdata(pdev, data); return 0; From dc901d98b1fe6e52ab81cd3e0879379168e06daa Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 10 Nov 2022 17:27:15 -0800 Subject: [PATCH 1461/4122] dmaengine: idxd: Fix crc_val field for completion record The crc_val in the completion record should be 64 bits and not 32 bits. Fixes: 4ac823e9cd85 ("dmaengine: idxd: fix delta_rec and crc size field for completion record") Reported-by: Nirav N Shah Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221111012715.2031481-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 2b9e7feba3f3..1d553bedbdb5 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -295,7 +295,7 @@ struct dsa_completion_record { }; uint32_t delta_rec_size; - uint32_t crc_val; + uint64_t crc_val; /* DIF check & strip */ struct { From 22c354cf3fec6aa52cf2df6685b33ce5f265edf8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 9 Nov 2022 12:12:27 +0100 Subject: [PATCH 1462/4122] dt-bindings: dmaengine: qcom: gpi: add compatible for SM6375 Document the compatible for GPI DMA controller on SM6375 SoC. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221109111236.46003-3-konrad.dybcio@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 232895fa1d8d..e7ba1c47a88e 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -26,6 +26,7 @@ properties: - enum: - qcom,sc7280-gpi-dma - qcom,sm6115-gpi-dma + - qcom,sm6375-gpi-dma - qcom,sm8350-gpi-dma - qcom,sm8450-gpi-dma - const: qcom,sm6350-gpi-dma From 444eef7d5695393f214d83180f3e4bb99621cd07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:02 +0100 Subject: [PATCH 1463/4122] dmaengine: idxd: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/20221113202428.573536003@linutronix.de Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 6f44fa8f78a5..06f5d3783d77 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "../dmaengine.h" #include "idxd.h" From d57b2a65cde743a490a848236641fe9aa5536a9b Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Nov 2022 22:47:46 +0530 Subject: [PATCH 1464/4122] dt-bindings: dmaengine: Add dma-channel-mask to Tegra GPCDMA Add dma-channel-mask property in Tegra GPCDMA document. The property would help to specify the channels to be used in kernel and reserve few for the firmware. This was previously achieved by limiting the channel number to 31 in the driver. This is wrong and does not align with the hardware. Correct this and set the max interrupts to 32. Signed-off-by: Akhil R Acked-by: Thierry Reding Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221110171748.40304-2-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml index c8894476b6ab..851bd50ee67f 100644 --- a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml +++ b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml @@ -39,7 +39,7 @@ properties: Should contain all of the per-channel DMA interrupts in ascending order with respect to the DMA channel index. minItems: 1 - maxItems: 31 + maxItems: 32 resets: maxItems: 1 @@ -52,6 +52,9 @@ properties: dma-coherent: true + dma-channel-mask: + maxItems: 1 + required: - compatible - reg @@ -60,6 +63,7 @@ required: - reset-names - "#dma-cells" - iommus + - dma-channel-mask additionalProperties: false @@ -108,5 +112,6 @@ examples: #dma-cells = <1>; iommus = <&smmu TEGRA186_SID_GPCDMA_0>; dma-coherent; + dma-channel-mask = <0xfffffffe>; }; ... From 3a0c95b61385f583424f44e79c15f1bdf050776d Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Nov 2022 22:47:48 +0530 Subject: [PATCH 1465/4122] dmaengine: tegra: Add support for dma-channel-mask Add support for dma-channel-mask so that only the specified channels are used. This helps to reserve some channels for the firmware. This was initially achieved by limiting the channel number to 31 in the driver and adjusting the register address to skip channel0 which was reserved for a firmware. This is wrong and does not align with the hardware. Now, with this change, the driver can align more to the actual hardware which has 32 channels. But this implies that there will be a break in the ABI and the device tree need to be updated along with this change for the driver to pickup the right interrupt corresponding to the channel Reviewed-by: Jon Hunter Link: https://lore.kernel.org/all/Y2EFoG1H9YpfxRjs@orome/ Signed-off-by: Akhil R Link: https://lore.kernel.org/r/20221110171748.40304-4-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra186-gpc-dma.c | 37 +++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index fa9bda4a2bc6..1d1180db6d4e 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -161,7 +161,10 @@ #define TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT 5000 /* 5 msec */ /* Channel base address offset from GPCDMA base address */ -#define TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET 0x20000 +#define TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET 0x10000 + +/* Default channel mask reserving channel0 */ +#define TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK 0xfffffffe struct tegra_dma; struct tegra_dma_channel; @@ -246,6 +249,7 @@ struct tegra_dma { const struct tegra_dma_chip_data *chip_data; unsigned long sid_m2d_reserved; unsigned long sid_d2m_reserved; + u32 chan_mask; void __iomem *base_addr; struct device *dev; struct dma_device dma_dev; @@ -1288,7 +1292,7 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, } static const struct tegra_dma_chip_data tegra186_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = false, @@ -1296,7 +1300,7 @@ static const struct tegra_dma_chip_data tegra186_dma_chip_data = { }; static const struct tegra_dma_chip_data tegra194_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = true, @@ -1304,7 +1308,7 @@ static const struct tegra_dma_chip_data tegra194_dma_chip_data = { }; static const struct tegra_dma_chip_data tegra234_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = true, @@ -1380,15 +1384,28 @@ static int tegra_dma_probe(struct platform_device *pdev) } stream_id = iommu_spec->ids[0] & 0xffff; + ret = device_property_read_u32(&pdev->dev, "dma-channel-mask", + &tdma->chan_mask); + if (ret) { + dev_warn(&pdev->dev, + "Missing dma-channel-mask property, using default channel mask %#x\n", + TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK); + tdma->chan_mask = TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK; + } + INIT_LIST_HEAD(&tdma->dma_dev.channels); for (i = 0; i < cdata->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + /* Check for channel mask */ + if (!(tdma->chan_mask & BIT(i))) + continue; + tdc->irq = platform_get_irq(pdev, i); if (tdc->irq < 0) return tdc->irq; - tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET + + tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET + i * cdata->channel_reg_size; snprintf(tdc->name, sizeof(tdc->name), "gpcdma.%d", i); tdc->tdma = tdma; @@ -1449,8 +1466,8 @@ static int tegra_dma_probe(struct platform_device *pdev) return ret; } - dev_info(&pdev->dev, "GPC DMA driver register %d channels\n", - cdata->nr_channels); + dev_info(&pdev->dev, "GPC DMA driver register %lu channels\n", + hweight_long(tdma->chan_mask)); return 0; } @@ -1473,6 +1490,9 @@ static int __maybe_unused tegra_dma_pm_suspend(struct device *dev) for (i = 0; i < tdma->chip_data->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + if (!(tdma->chan_mask & BIT(i))) + continue; + if (tdc->dma_desc) { dev_err(tdma->dev, "channel %u busy\n", i); return -EBUSY; @@ -1492,6 +1512,9 @@ static int __maybe_unused tegra_dma_pm_resume(struct device *dev) for (i = 0; i < tdma->chip_data->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + if (!(tdma->chan_mask & BIT(i))) + continue; + tegra_dma_program_sid(tdc, tdc->stream_id); } From 3574cfdca28543e2e8db649297cd6659ea8e4bb8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 11 Nov 2022 11:55:29 +0200 Subject: [PATCH 1466/4122] RDMA/mana: Remove redefinition of basic u64 type gdma_obj_handle_t is no more than redefinition of basic u64 type. Remove such obfuscation. Link: https://lore.kernel.org/r/3c1e821279e6a165d058655d2343722d6650e776.1668160486.git.leonro@nvidia.com Acked-by: Long Li Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/mr.c | 5 ++- .../net/ethernet/microsoft/mana/gdma_main.c | 3 +- include/net/mana/gdma.h | 31 +++++++++---------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index a56236cdd9ee..351207c60eb6 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -73,8 +73,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, return 0; } -static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, - gdma_obj_handle_t mr_handle) +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle) { struct gdma_destroy_mr_response resp = {}; struct gdma_destroy_mr_request req = {}; @@ -108,9 +107,9 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); struct gdma_create_mr_params mr_params = {}; struct ib_device *ibdev = ibpd->device; - gdma_obj_handle_t dma_region_handle; struct mana_ib_dev *dev; struct mana_ib_mr *mr; + u64 dma_region_handle; int err; dev = container_of(ibdev, struct mana_ib_dev, ib_dev); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 46a7d1e6ece9..69224ff8efb6 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -671,8 +671,7 @@ free_q: return err; } -int mana_gd_destroy_dma_region(struct gdma_context *gc, - gdma_obj_handle_t dma_region_handle) +int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) { struct gdma_destroy_dma_region_req req = {}; struct gdma_general_resp resp = {}; diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 221adc96340c..a9fdae14d24c 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -65,8 +65,6 @@ enum { GDMA_DEVICE_MANA = 2, }; -typedef u64 gdma_obj_handle_t; - struct gdma_resource { /* Protect the bitmap */ spinlock_t lock; @@ -200,7 +198,7 @@ struct gdma_mem_info { u64 length; /* Allocated by the PF driver */ - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; }; #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 @@ -624,7 +622,7 @@ struct gdma_create_queue_req { u32 reserved1; u32 pdid; u32 doolbell_id; - gdma_obj_handle_t gdma_region; + u64 gdma_region; u32 reserved2; u32 queue_size; u32 log2_throttle_limit; @@ -699,14 +697,14 @@ struct gdma_create_dma_region_req { struct gdma_create_dma_region_resp { struct gdma_resp_hdr hdr; - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; }; /* HW DATA */ /* GDMA_DMA_REGION_ADD_PAGES */ struct gdma_dma_region_add_pages_req { struct gdma_req_hdr hdr; - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; u32 page_addr_list_len; u32 reserved3; @@ -718,7 +716,7 @@ struct gdma_dma_region_add_pages_req { struct gdma_destroy_dma_region_req { struct gdma_req_hdr hdr; - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; }; /* HW DATA */ enum gdma_pd_flags { @@ -733,14 +731,14 @@ struct gdma_create_pd_req { struct gdma_create_pd_resp { struct gdma_resp_hdr hdr; - gdma_obj_handle_t pd_handle; + u64 pd_handle; u32 pd_id; u32 reserved; };/* HW DATA */ struct gdma_destroy_pd_req { struct gdma_req_hdr hdr; - gdma_obj_handle_t pd_handle; + u64 pd_handle; };/* HW DATA */ struct gdma_destory_pd_resp { @@ -756,11 +754,11 @@ enum gdma_mr_type { }; struct gdma_create_mr_params { - gdma_obj_handle_t pd_handle; + u64 pd_handle; enum gdma_mr_type mr_type; union { struct { - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; @@ -769,13 +767,13 @@ struct gdma_create_mr_params { struct gdma_create_mr_request { struct gdma_req_hdr hdr; - gdma_obj_handle_t pd_handle; + u64 pd_handle; enum gdma_mr_type mr_type; u32 reserved_1; union { struct { - gdma_obj_handle_t dma_region_handle; + u64 dma_region_handle; u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; @@ -786,14 +784,14 @@ struct gdma_create_mr_request { struct gdma_create_mr_response { struct gdma_resp_hdr hdr; - gdma_obj_handle_t mr_handle; + u64 mr_handle; u32 lkey; u32 rkey; };/* HW DATA */ struct gdma_destroy_mr_request { struct gdma_req_hdr hdr; - gdma_obj_handle_t mr_handle; + u64 mr_handle; };/* HW DATA */ struct gdma_destroy_mr_response { @@ -827,7 +825,6 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi); int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, u32 resp_len, void *resp); -int mana_gd_destroy_dma_region(struct gdma_context *gc, - gdma_obj_handle_t dma_region_handle); +int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); #endif /* _GDMA_H */ From f613facc82cfd4b02b937b14872f24e27da4b909 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 13 Nov 2022 21:10:27 +0200 Subject: [PATCH 1467/4122] mfd: vexpress-sysreg: Fix resource compound literal assignments Since DEFINE_RES_*() macros were converted to provide a compound literal the user doesn't need to repeat it. Moreover, it may not be compiled. Fixes: 52c4d11f1dce ("resource: Convert DEFINE_RES_NAMED() to be compound literal") Reported-by: kernel test robot Reported-by: Stephen Rothwell Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221113191027.2327-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/vexpress-sysreg.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c index aaf24af287dd..eab82619ec31 100644 --- a/drivers/mfd/vexpress-sysreg.c +++ b/drivers/mfd/vexpress-sysreg.c @@ -61,35 +61,27 @@ static struct mfd_cell vexpress_sysreg_cells[] = { .name = "basic-mmio-gpio", .of_compatible = "arm,vexpress-sysreg,sys_led", .num_resources = 1, - .resources = (struct resource []) { - DEFINE_RES_MEM_NAMED(SYS_LED, 0x4, "dat"), - }, + .resources = &DEFINE_RES_MEM_NAMED(SYS_LED, 0x4, "dat"), .platform_data = &vexpress_sysreg_sys_led_pdata, .pdata_size = sizeof(vexpress_sysreg_sys_led_pdata), }, { .name = "basic-mmio-gpio", .of_compatible = "arm,vexpress-sysreg,sys_mci", .num_resources = 1, - .resources = (struct resource []) { - DEFINE_RES_MEM_NAMED(SYS_MCI, 0x4, "dat"), - }, + .resources = &DEFINE_RES_MEM_NAMED(SYS_MCI, 0x4, "dat"), .platform_data = &vexpress_sysreg_sys_mci_pdata, .pdata_size = sizeof(vexpress_sysreg_sys_mci_pdata), }, { .name = "basic-mmio-gpio", .of_compatible = "arm,vexpress-sysreg,sys_flash", .num_resources = 1, - .resources = (struct resource []) { - DEFINE_RES_MEM_NAMED(SYS_FLASH, 0x4, "dat"), - }, + .resources = &DEFINE_RES_MEM_NAMED(SYS_FLASH, 0x4, "dat"), .platform_data = &vexpress_sysreg_sys_flash_pdata, .pdata_size = sizeof(vexpress_sysreg_sys_flash_pdata), }, { .name = "vexpress-syscfg", .num_resources = 1, - .resources = (struct resource []) { - DEFINE_RES_MEM(SYS_MISC, 0x4c), - }, + .resources = &DEFINE_RES_MEM(SYS_MISC, 0x4c), } }; From e8433659bf701b6f53a6c2168262c9b2c363c049 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Nov 2022 10:03:38 +0100 Subject: [PATCH 1468/4122] Revert "drivers: bus: simple-pm-bus: Use clocks" This reverts commit 882cf4c913d730a74175db039d941005b883de38 as it breaks the build in linux-next. Link: https://lore.kernel.org/r/20221114181752.08a850f0@canb.auug.org.au Reported-by: Stephen Rothwell Cc: Geert Uytterhoeven Cc: Liu Ying Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/bus/simple-pm-bus.c | 48 ------------------------------------- 1 file changed, 48 deletions(-) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index d7b043fefde9..6b8d6257ed8a 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -8,24 +8,17 @@ * for more details. */ -#include #include #include #include #include -struct simple_pm_bus { - struct clk_bulk_data *clks; - int num_clks; -}; - static int simple_pm_bus_probe(struct platform_device *pdev) { const struct device *dev = &pdev->dev; const struct of_dev_auxdata *lookup = dev_get_platdata(dev); struct device_node *np = dev->of_node; const struct of_device_id *match; - struct simple_pm_bus *bus; /* * Allow user to use driver_override to bind this driver to a @@ -51,16 +44,6 @@ static int simple_pm_bus_probe(struct platform_device *pdev) return -ENODEV; } - bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL); - if (!bus) - return -ENOMEM; - - bus->num_clks = devm_clk_bulk_get_all(&pdev->dev, &bus->clks); - if (bus->num_clks < 0) - return dev_err_probe(&pdev->dev, bus->num_clks, "failed to get clocks\n"); - - dev_set_drvdata(&pdev->dev, bus); - dev_dbg(&pdev->dev, "%s\n", __func__); pm_runtime_enable(&pdev->dev); @@ -84,36 +67,6 @@ static int simple_pm_bus_remove(struct platform_device *pdev) return 0; } -static int simple_pm_bus_runtime_suspend(struct device *dev) -{ - struct simple_pm_bus *bus = dev_get_drvdata(dev); - - clk_bulk_disable_unprepare(bus->num_clks, bus->clks); - - return 0; -} - -static int simple_pm_bus_runtime_resume(struct device *dev) -{ - struct simple_pm_bus *bus = dev_get_drvdata(dev); - int ret; - - ret = clk_bulk_prepare_enable(bus->num_clks, bus->clks); - if (ret) { - dev_err(dev, "failed to enable clocks: %d\n", ret); - return ret; - } - - return 0; -} - -static const struct dev_pm_ops simple_pm_bus_pm_ops = { - SET_RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend, - simple_pm_bus_runtime_resume, NULL) - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) -}; - #define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ static const struct of_device_id simple_pm_bus_of_match[] = { @@ -132,7 +85,6 @@ static struct platform_driver simple_pm_bus_driver = { .driver = { .name = "simple-pm-bus", .of_match_table = simple_pm_bus_of_match, - .pm = &simple_pm_bus_pm_ops, }, }; From 1d26a55fbeb9c24bb24fa84595c56efee8783f35 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Sep 2022 13:43:00 -0700 Subject: [PATCH 1469/4122] PCI: histb: Switch to using gpiod API This patch switches the driver away from legacy gpio/of_gpio API to gpiod API, and removes use of of_get_named_gpio_flags() which I want to make private to gpiolib. Link: https://lore.kernel.org/r/20220906204301.3736813-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Lorenzo Pieralisi Reviewed-by: Linus Walleij --- drivers/pci/controller/dwc/pcie-histb.c | 39 ++++++++++++------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c index e2b80f10030d..43c27812dd6d 100644 --- a/drivers/pci/controller/dwc/pcie-histb.c +++ b/drivers/pci/controller/dwc/pcie-histb.c @@ -10,11 +10,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -60,7 +60,7 @@ struct histb_pcie { struct reset_control *sys_reset; struct reset_control *bus_reset; void __iomem *ctrl; - int reset_gpio; + struct gpio_desc *reset_gpio; struct regulator *vpcie; }; @@ -212,8 +212,8 @@ static void histb_pcie_host_disable(struct histb_pcie *hipcie) clk_disable_unprepare(hipcie->sys_clk); clk_disable_unprepare(hipcie->bus_clk); - if (gpio_is_valid(hipcie->reset_gpio)) - gpio_set_value_cansleep(hipcie->reset_gpio, 0); + if (hipcie->reset_gpio) + gpiod_set_value_cansleep(hipcie->reset_gpio, 1); if (hipcie->vpcie) regulator_disable(hipcie->vpcie); @@ -235,8 +235,8 @@ static int histb_pcie_host_enable(struct dw_pcie_rp *pp) } } - if (gpio_is_valid(hipcie->reset_gpio)) - gpio_set_value_cansleep(hipcie->reset_gpio, 1); + if (hipcie->reset_gpio) + gpiod_set_value_cansleep(hipcie->reset_gpio, 0); ret = clk_prepare_enable(hipcie->bus_clk); if (ret) { @@ -298,10 +298,7 @@ static int histb_pcie_probe(struct platform_device *pdev) struct histb_pcie *hipcie; struct dw_pcie *pci; struct dw_pcie_rp *pp; - struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; - enum of_gpio_flags of_flags; - unsigned long flag = GPIOF_DIR_OUT; int ret; hipcie = devm_kzalloc(dev, sizeof(*hipcie), GFP_KERNEL); @@ -336,17 +333,19 @@ static int histb_pcie_probe(struct platform_device *pdev) hipcie->vpcie = NULL; } - hipcie->reset_gpio = of_get_named_gpio_flags(np, - "reset-gpios", 0, &of_flags); - if (of_flags & OF_GPIO_ACTIVE_LOW) - flag |= GPIOF_ACTIVE_LOW; - if (gpio_is_valid(hipcie->reset_gpio)) { - ret = devm_gpio_request_one(dev, hipcie->reset_gpio, - flag, "PCIe device power control"); - if (ret) { - dev_err(dev, "unable to request gpio\n"); - return ret; - } + hipcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_OUT_HIGH); + ret = PTR_ERR_OR_ZERO(hipcie->reset_gpio); + if (ret) { + dev_err(dev, "unable to request reset gpio: %d\n", ret); + return ret; + } + + ret = gpiod_set_consumer_name(hipcie->reset_gpio, + "PCIe device power control"); + if (ret) { + dev_err(dev, "unable to set reset gpio name: %d\n", ret); + return ret; } hipcie->aux_clk = devm_clk_get(dev, "aux"); From 4529992c947401adac53111cf15958a7ea97b1ef Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:06 -0800 Subject: [PATCH 1470/4122] interconnect: qcom: osm-l3: Use platform-independent node ids The identifiers used for nodes needs to be unique in the running system, but defining them per platform results in a lot of duplicated definitions and prevents us from using generic compatibles. As these identifiers are not exposed outside the kernel, change to use driver-local numbers, picked completely at random. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-2-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 87 +++++++++++------------------- 1 file changed, 30 insertions(+), 57 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index ddbdf0943f94..d23769844419 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -74,6 +74,11 @@ struct qcom_osm_l3_desc { unsigned int reg_perf_state; }; +enum { + OSM_L3_MASTER_NODE = 10000, + OSM_L3_SLAVE_NODE, +}; + #define DEFINE_QNODE(_name, _id, _buswidth, ...) \ static const struct qcom_osm_l3_node _name = { \ .name = #_name, \ @@ -83,97 +88,65 @@ struct qcom_osm_l3_desc { .links = { __VA_ARGS__ }, \ } -DEFINE_QNODE(sdm845_osm_apps_l3, SDM845_MASTER_OSM_L3_APPS, 16, SDM845_SLAVE_OSM_L3); -DEFINE_QNODE(sdm845_osm_l3, SDM845_SLAVE_OSM_L3, 16); +DEFINE_QNODE(osm_l3_master, OSM_L3_MASTER_NODE, 16, OSM_L3_SLAVE_NODE); +DEFINE_QNODE(osm_l3_slave, OSM_L3_SLAVE_NODE, 16); -static const struct qcom_osm_l3_node * const sdm845_osm_l3_nodes[] = { - [MASTER_OSM_L3_APPS] = &sdm845_osm_apps_l3, - [SLAVE_OSM_L3] = &sdm845_osm_l3, +static const struct qcom_osm_l3_node * const osm_l3_nodes[] = { + [MASTER_OSM_L3_APPS] = &osm_l3_master, + [SLAVE_OSM_L3] = &osm_l3_slave, +}; + +DEFINE_QNODE(epss_l3_master, OSM_L3_MASTER_NODE, 32, OSM_L3_SLAVE_NODE); +DEFINE_QNODE(epss_l3_slave, OSM_L3_SLAVE_NODE, 32); + +static const struct qcom_osm_l3_node * const epss_l3_nodes[] = { + [MASTER_EPSS_L3_APPS] = &epss_l3_master, + [SLAVE_EPSS_L3_SHARED] = &epss_l3_slave, }; static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = { - .nodes = sdm845_osm_l3_nodes, - .num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes), + .nodes = osm_l3_nodes, + .num_nodes = ARRAY_SIZE(osm_l3_nodes), .lut_row_size = OSM_LUT_ROW_SIZE, .reg_freq_lut = OSM_REG_FREQ_LUT, .reg_perf_state = OSM_REG_PERF_STATE, }; -DEFINE_QNODE(sc7180_osm_apps_l3, SC7180_MASTER_OSM_L3_APPS, 16, SC7180_SLAVE_OSM_L3); -DEFINE_QNODE(sc7180_osm_l3, SC7180_SLAVE_OSM_L3, 16); - -static const struct qcom_osm_l3_node * const sc7180_osm_l3_nodes[] = { - [MASTER_OSM_L3_APPS] = &sc7180_osm_apps_l3, - [SLAVE_OSM_L3] = &sc7180_osm_l3, -}; - static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = { - .nodes = sc7180_osm_l3_nodes, - .num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes), + .nodes = osm_l3_nodes, + .num_nodes = ARRAY_SIZE(osm_l3_nodes), .lut_row_size = OSM_LUT_ROW_SIZE, .reg_freq_lut = OSM_REG_FREQ_LUT, .reg_perf_state = OSM_REG_PERF_STATE, }; -DEFINE_QNODE(sc7280_epss_apps_l3, SC7280_MASTER_EPSS_L3_APPS, 32, SC7280_SLAVE_EPSS_L3); -DEFINE_QNODE(sc7280_epss_l3, SC7280_SLAVE_EPSS_L3, 32); - -static const struct qcom_osm_l3_node * const sc7280_epss_l3_nodes[] = { - [MASTER_EPSS_L3_APPS] = &sc7280_epss_apps_l3, - [SLAVE_EPSS_L3_SHARED] = &sc7280_epss_l3, -}; - static const struct qcom_osm_l3_desc sc7280_icc_epss_l3 = { - .nodes = sc7280_epss_l3_nodes, - .num_nodes = ARRAY_SIZE(sc7280_epss_l3_nodes), + .nodes = epss_l3_nodes, + .num_nodes = ARRAY_SIZE(epss_l3_nodes), .lut_row_size = EPSS_LUT_ROW_SIZE, .reg_freq_lut = EPSS_REG_FREQ_LUT, .reg_perf_state = EPSS_REG_PERF_STATE, }; -DEFINE_QNODE(sc8180x_osm_apps_l3, SC8180X_MASTER_OSM_L3_APPS, 32, SC8180X_SLAVE_OSM_L3); -DEFINE_QNODE(sc8180x_osm_l3, SC8180X_SLAVE_OSM_L3, 32); - -static const struct qcom_osm_l3_node * const sc8180x_osm_l3_nodes[] = { - [MASTER_OSM_L3_APPS] = &sc8180x_osm_apps_l3, - [SLAVE_OSM_L3] = &sc8180x_osm_l3, -}; - static const struct qcom_osm_l3_desc sc8180x_icc_osm_l3 = { - .nodes = sc8180x_osm_l3_nodes, - .num_nodes = ARRAY_SIZE(sc8180x_osm_l3_nodes), + .nodes = osm_l3_nodes, + .num_nodes = ARRAY_SIZE(osm_l3_nodes), .lut_row_size = OSM_LUT_ROW_SIZE, .reg_freq_lut = OSM_REG_FREQ_LUT, .reg_perf_state = OSM_REG_PERF_STATE, }; -DEFINE_QNODE(sm8150_osm_apps_l3, SM8150_MASTER_OSM_L3_APPS, 32, SM8150_SLAVE_OSM_L3); -DEFINE_QNODE(sm8150_osm_l3, SM8150_SLAVE_OSM_L3, 32); - -static const struct qcom_osm_l3_node * const sm8150_osm_l3_nodes[] = { - [MASTER_OSM_L3_APPS] = &sm8150_osm_apps_l3, - [SLAVE_OSM_L3] = &sm8150_osm_l3, -}; - static const struct qcom_osm_l3_desc sm8150_icc_osm_l3 = { - .nodes = sm8150_osm_l3_nodes, - .num_nodes = ARRAY_SIZE(sm8150_osm_l3_nodes), + .nodes = osm_l3_nodes, + .num_nodes = ARRAY_SIZE(osm_l3_nodes), .lut_row_size = OSM_LUT_ROW_SIZE, .reg_freq_lut = OSM_REG_FREQ_LUT, .reg_perf_state = OSM_REG_PERF_STATE, }; -DEFINE_QNODE(sm8250_epss_apps_l3, SM8250_MASTER_EPSS_L3_APPS, 32, SM8250_SLAVE_EPSS_L3); -DEFINE_QNODE(sm8250_epss_l3, SM8250_SLAVE_EPSS_L3, 32); - -static const struct qcom_osm_l3_node * const sm8250_epss_l3_nodes[] = { - [MASTER_EPSS_L3_APPS] = &sm8250_epss_apps_l3, - [SLAVE_EPSS_L3_SHARED] = &sm8250_epss_l3, -}; - static const struct qcom_osm_l3_desc sm8250_icc_epss_l3 = { - .nodes = sm8250_epss_l3_nodes, - .num_nodes = ARRAY_SIZE(sm8250_epss_l3_nodes), + .nodes = epss_l3_nodes, + .num_nodes = ARRAY_SIZE(epss_l3_nodes), .lut_row_size = EPSS_LUT_ROW_SIZE, .reg_freq_lut = EPSS_REG_FREQ_LUT, .reg_perf_state = EPSS_REG_PERF_STATE, From d623264f62d40ca2d2380437f8a6318a2a9e4c66 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:07 -0800 Subject: [PATCH 1471/4122] interconnect: qcom: osm-l3: Squash common descriptors Each platform defines their own OSM L3 descriptor, but in practice there's only two: one for OSM and one for EPSS. Remove the duplicated definitions. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-3-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 48 +++++------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index d23769844419..7d6844253241 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -104,7 +104,7 @@ static const struct qcom_osm_l3_node * const epss_l3_nodes[] = { [SLAVE_EPSS_L3_SHARED] = &epss_l3_slave, }; -static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = { +static const struct qcom_osm_l3_desc osm_l3 = { .nodes = osm_l3_nodes, .num_nodes = ARRAY_SIZE(osm_l3_nodes), .lut_row_size = OSM_LUT_ROW_SIZE, @@ -112,39 +112,7 @@ static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = { .reg_perf_state = OSM_REG_PERF_STATE, }; -static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = { - .nodes = osm_l3_nodes, - .num_nodes = ARRAY_SIZE(osm_l3_nodes), - .lut_row_size = OSM_LUT_ROW_SIZE, - .reg_freq_lut = OSM_REG_FREQ_LUT, - .reg_perf_state = OSM_REG_PERF_STATE, -}; - -static const struct qcom_osm_l3_desc sc7280_icc_epss_l3 = { - .nodes = epss_l3_nodes, - .num_nodes = ARRAY_SIZE(epss_l3_nodes), - .lut_row_size = EPSS_LUT_ROW_SIZE, - .reg_freq_lut = EPSS_REG_FREQ_LUT, - .reg_perf_state = EPSS_REG_PERF_STATE, -}; - -static const struct qcom_osm_l3_desc sc8180x_icc_osm_l3 = { - .nodes = osm_l3_nodes, - .num_nodes = ARRAY_SIZE(osm_l3_nodes), - .lut_row_size = OSM_LUT_ROW_SIZE, - .reg_freq_lut = OSM_REG_FREQ_LUT, - .reg_perf_state = OSM_REG_PERF_STATE, -}; - -static const struct qcom_osm_l3_desc sm8150_icc_osm_l3 = { - .nodes = osm_l3_nodes, - .num_nodes = ARRAY_SIZE(osm_l3_nodes), - .lut_row_size = OSM_LUT_ROW_SIZE, - .reg_freq_lut = OSM_REG_FREQ_LUT, - .reg_perf_state = OSM_REG_PERF_STATE, -}; - -static const struct qcom_osm_l3_desc sm8250_icc_epss_l3 = { +static const struct qcom_osm_l3_desc epss_l3 = { .nodes = epss_l3_nodes, .num_nodes = ARRAY_SIZE(epss_l3_nodes), .lut_row_size = EPSS_LUT_ROW_SIZE, @@ -317,12 +285,12 @@ err: } static const struct of_device_id osm_l3_of_match[] = { - { .compatible = "qcom,sc7180-osm-l3", .data = &sc7180_icc_osm_l3 }, - { .compatible = "qcom,sc7280-epss-l3", .data = &sc7280_icc_epss_l3 }, - { .compatible = "qcom,sdm845-osm-l3", .data = &sdm845_icc_osm_l3 }, - { .compatible = "qcom,sm8150-osm-l3", .data = &sm8150_icc_osm_l3 }, - { .compatible = "qcom,sc8180x-osm-l3", .data = &sc8180x_icc_osm_l3 }, - { .compatible = "qcom,sm8250-epss-l3", .data = &sm8250_icc_epss_l3 }, + { .compatible = "qcom,sc7180-osm-l3", .data = &osm_l3 }, + { .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3 }, + { .compatible = "qcom,sdm845-osm-l3", .data = &osm_l3 }, + { .compatible = "qcom,sm8150-osm-l3", .data = &osm_l3 }, + { .compatible = "qcom,sc8180x-osm-l3", .data = &osm_l3 }, + { .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3 }, { } }; MODULE_DEVICE_TABLE(of, osm_l3_of_match); From 9235253ec73dfd71cc83d154693476930fc8dd77 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:08 -0800 Subject: [PATCH 1472/4122] interconnect: qcom: osm-l3: Add per-core EPSS L3 support The EPSS instance in e.g. SM8350 and SC8280XP has per-core L3 voting enabled. In this configuration, the "shared" vote is done using the REG_L3_VOTE register instead of PERF_STATE. Rename epss_l3 to clarify that it's affecting the PERF_STATE register and add a new L3_VOTE description. Given platform lineage it's assumed that the L3_VOTE-based case will be the predominant one, so use this for a new generic qcom,epss-l3 compatible. While adding the EPSS generic, also add qcom,osm-l3. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-4-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index 7d6844253241..469be732a00b 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -34,6 +34,7 @@ /* EPSS Register offsets */ #define EPSS_LUT_ROW_SIZE 4 +#define EPSS_REG_L3_VOTE 0x90 #define EPSS_REG_FREQ_LUT 0x100 #define EPSS_REG_PERF_STATE 0x320 @@ -112,7 +113,7 @@ static const struct qcom_osm_l3_desc osm_l3 = { .reg_perf_state = OSM_REG_PERF_STATE, }; -static const struct qcom_osm_l3_desc epss_l3 = { +static const struct qcom_osm_l3_desc epss_l3_perf_state = { .nodes = epss_l3_nodes, .num_nodes = ARRAY_SIZE(epss_l3_nodes), .lut_row_size = EPSS_LUT_ROW_SIZE, @@ -120,6 +121,14 @@ static const struct qcom_osm_l3_desc epss_l3 = { .reg_perf_state = EPSS_REG_PERF_STATE, }; +static const struct qcom_osm_l3_desc epss_l3_l3_vote = { + .nodes = epss_l3_nodes, + .num_nodes = ARRAY_SIZE(epss_l3_nodes), + .lut_row_size = EPSS_LUT_ROW_SIZE, + .reg_freq_lut = EPSS_REG_FREQ_LUT, + .reg_perf_state = EPSS_REG_L3_VOTE, +}; + static int qcom_osm_l3_set(struct icc_node *src, struct icc_node *dst) { struct qcom_osm_l3_icc_provider *qp; @@ -285,12 +294,14 @@ err: } static const struct of_device_id osm_l3_of_match[] = { + { .compatible = "qcom,epss-l3", .data = &epss_l3_l3_vote }, + { .compatible = "qcom,osm-l3", .data = &osm_l3 }, { .compatible = "qcom,sc7180-osm-l3", .data = &osm_l3 }, - { .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3 }, + { .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3_perf_state }, { .compatible = "qcom,sdm845-osm-l3", .data = &osm_l3 }, { .compatible = "qcom,sm8150-osm-l3", .data = &osm_l3 }, { .compatible = "qcom,sc8180x-osm-l3", .data = &osm_l3 }, - { .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3 }, + { .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3_perf_state }, { } }; MODULE_DEVICE_TABLE(of, osm_l3_of_match); From b6bcef163ae0c4329187eea8431a735a60b1d7bb Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:09 -0800 Subject: [PATCH 1473/4122] interconnect: qcom: osm-l3: Simplify osm_l3_set() The aggregation over votes for all nodes in the provider will always only find the bandwidth votes for the destination side of the path. Further more, the average kBps value will always be 0. Simplify the logic by directly looking at the destination node's peak bandwidth request. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-5-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index 469be732a00b..5fa171087425 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -134,22 +134,14 @@ static int qcom_osm_l3_set(struct icc_node *src, struct icc_node *dst) struct qcom_osm_l3_icc_provider *qp; struct icc_provider *provider; const struct qcom_osm_l3_node *qn; - struct icc_node *n; unsigned int index; - u32 agg_peak = 0; - u32 agg_avg = 0; u64 rate; qn = src->data; provider = src->provider; qp = to_osm_l3_provider(provider); - list_for_each_entry(n, &provider->nodes, node_list) - provider->aggregate(n, 0, n->avg_bw, n->peak_bw, - &agg_avg, &agg_peak); - - rate = max(agg_avg, agg_peak); - rate = icc_units_to_bps(rate); + rate = icc_units_to_bps(dst->peak_bw); do_div(rate, qn->buswidth); for (index = 0; index < qp->max_state - 1; index++) { From 6313f4b5a438023c0d20960f19df16483cbbb8d7 Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Wed, 26 Oct 2022 12:05:33 -0700 Subject: [PATCH 1474/4122] dt-bindings: arm-smmu: Add 'compatible' for QDU1000 and QRU1000 Add compatible bindings for Qualcomm QDU1000 and QRU1000 platforms. Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20221026190534.4004945-2-quic_molvera@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 9066e6df1ba1..6258302ed9e7 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -34,6 +34,7 @@ properties: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qdu1000-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 From 7b52f53ce1914f5b3542665ff6a373ee858161c9 Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Wed, 26 Oct 2022 12:05:34 -0700 Subject: [PATCH 1475/4122] drivers: arm-smmu-impl: Add QDU1000 and QRU1000 iommu implementation Add compatible for Qualcomm QDU1000 and QRU1000 SoCs to add iommu support for them. Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20221026190534.4004945-3-quic_molvera@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index b2708de25ea3..0580a381a04b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -426,6 +426,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,qcm2290-smmu-500" }, + { .compatible = "qcom,qdu1000-smmu-500" }, { .compatible = "qcom,sc7180-smmu-500" }, { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, From 728b22a57232a1738d87b2148908a81615240a37 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:54 +0200 Subject: [PATCH 1476/4122] dt-bindings: arm-smmu: Add compatible for Qualcomm SM6115 Add compatible for the Qualcomm SM6115 platform to the ARM SMMU DeviceTree binding. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221030094258.486428-5-iskren.chernev@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6258302ed9e7..406b98d090f6 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -42,6 +42,7 @@ properties: - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - qcom,sm6115-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - qcom,sm8150-smmu-500 From 2fd6e1ad7e199c1ef54341a54fa4e11edc31b63c Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:55 +0200 Subject: [PATCH 1477/4122] iommu/arm-smmu-qcom: Add SM6115 support Add the Qualcomm SM6115 platform to the list of compatible, this target uses MMU500 for both APSS and GPU. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Link: https://lore.kernel.org/r/20221030094258.486428-6-iskren.chernev@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 0580a381a04b..0f4eaf217983 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -433,6 +433,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8280xp-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, + { .compatible = "qcom,sm6115-smmu-500" }, { .compatible = "qcom,sm6125-smmu-500" }, { .compatible = "qcom,sm6350-smmu-500" }, { .compatible = "qcom,sm6375-smmu-500" }, From 8d3a9ec6ae2886305fdd03652592b3c7ffea672b Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 10 Nov 2022 19:36:06 -0500 Subject: [PATCH 1478/4122] dt-bindings: iommu: arm-smmu: add sdm670 compatible The Snapdragon 670 needs the IOMMU for GENI I2C. Add a compatible string in the documentation to represent its support. Signed-off-by: Richard Acayan Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221111003606.126795-2-mailingradian@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 406b98d090f6..3ade2dbca70e 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -39,6 +39,7 @@ properties: - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 + - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 From 4ea0be1f0db588afdb2d4d94dfa921b01f34a3c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:07 -0800 Subject: [PATCH 1479/4122] perf stat: Increase metric length to align outputs When perf stat is called with very detailed events, the output doesn't align well like below: $ sudo perf stat -a -ddd sleep 1 Performance counter stats for 'system wide': 8,020.23 msec cpu-clock # 7.997 CPUs utilized 3,970 context-switches # 494.998 /sec 169 cpu-migrations # 21.072 /sec 586 page-faults # 73.065 /sec 649,568,060 cycles # 0.081 GHz (30.42%) 304,044,345 instructions # 0.47 insn per cycle (38.40%) 60,313,022 branches # 7.520 M/sec (38.89%) 2,766,919 branch-misses # 4.59% of all branches (39.26%) 74,422,951 L1-dcache-loads # 9.279 M/sec (39.39%) 8,025,568 L1-dcache-load-misses # 10.78% of all L1-dcache accesses (39.22%) 3,314,995 LLC-loads # 413.329 K/sec (30.83%) 1,225,619 LLC-load-misses # 36.97% of all LL-cache accesses (30.45%) L1-icache-loads 20,420,493 L1-icache-load-misses # 0.00% of all L1-icache accesses (30.29%) 58,017,947 dTLB-loads # 7.234 M/sec (30.37%) 704,677 dTLB-load-misses # 1.21% of all dTLB cache accesses (30.27%) 234,225 iTLB-loads # 29.204 K/sec (30.29%) 417,166 iTLB-load-misses # 178.10% of all iTLB cache accesses (30.32%) L1-dcache-prefetches L1-dcache-prefetch-misses 1.002947355 seconds time elapsed Increase the METRIC_LEN by 3 so that it can align properly. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 657434cd29ee..576283afa319 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -218,7 +218,7 @@ struct outstate { struct evsel *evsel; }; -#define METRIC_LEN 35 +#define METRIC_LEN 38 static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) From 81a02c6577ecfee7056ccafbd028984d0d670c0c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:08 -0800 Subject: [PATCH 1480/4122] perf stat: Clear screen only if output file is a tty The --interval-clear option makes perf stat to clear the terminal at each interval. But it doesn't need to clear the screen when it saves to a file. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 576283afa319..ccb804546d5a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -890,7 +890,7 @@ static void print_interval(struct perf_stat_config *config, FILE *output = config->output; static int num_print_interval; - if (config->interval_clear) + if (config->interval_clear && isatty(fileno(output))) puts(CONSOLE_CLEAR); if (!config->iostat_run && !config->json_output) From f4e55f88da923f39f0b76edc3da3c52d0b72d429 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:09 -0800 Subject: [PATCH 1481/4122] perf stat: Move common code in print_metric_headers() The struct perf_stat_output_ctx is set in a loop with the same values. Move the code out of the loop and keep the loop minimal. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ccb804546d5a..173f4715189c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -835,11 +835,16 @@ static void print_metric_headers(struct perf_stat_config *config, struct evlist *evlist, const char *prefix, bool no_indent) { - struct perf_stat_output_ctx out; struct evsel *counter; struct outstate os = { .fh = config->output }; + struct perf_stat_output_ctx out = { + .ctx = &os, + .print_metric = print_metric_header, + .new_line = new_line_metric, + .force_header = true, + }; bool first = true; if (config->json_output && !config->interval) @@ -863,13 +868,11 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { os.evsel = counter; - out.ctx = &os; - out.print_metric = print_metric_header; + if (!first && config->json_output) fprintf(config->output, ", "); first = false; - out.new_line = new_line_metric; - out.force_header = true; + perf_stat__print_shadow_stats(config, counter, 0, 0, &out, From fdc7d6082459aa6705dd39b827214f6cfc1fa054 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:10 -0800 Subject: [PATCH 1482/4122] perf stat: Fix --metric-only --json output Currently it prints all metric headers for JSON output. But actually it skips some metrics with valid_only_metric(). So the output looks like: $ perf stat --metric-only --json true {"unit" : "CPUs utilized", "unit" : "/sec", "unit" : "/sec", "unit" : "/sec", "unit" : "GHz", "unit" : "insn per cycle", "unit" : "/sec", "unit" : "branch-misses of all branches"} {"metric-value" : "3.861"}{"metric-value" : "0.79"}{"metric-value" : "3.04"} As you can see there are 8 units in the header but only 3 metric-values are there. It should skip the unused headers as well. Also each unit should be printed as a separate object like metric values. With this patch: $ perf stat --metric-only --json true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"metric-value" : "4.166"}{"metric-value" : "0.73"}{"metric-value" : "2.96"} Fixes: df936cadfb58ba93 ("perf stat: Add JSON output option") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Claire Jensen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 173f4715189c..5c10cf49fd12 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -430,12 +430,12 @@ static void print_metric_header(struct perf_stat_config *config, os->evsel->priv != os->evsel->evlist->selected->priv) return; - if (!valid_only_metric(unit) && !config->json_output) + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) - fprintf(os->fh, "\"unit\" : \"%s\"", unit); + fprintf(os->fh, "{\"unit\" : \"%s\"}", unit); else if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else @@ -845,10 +845,6 @@ static void print_metric_headers(struct perf_stat_config *config, .new_line = new_line_metric, .force_header = true, }; - bool first = true; - - if (config->json_output && !config->interval) - fprintf(config->output, "{"); if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); @@ -869,18 +865,12 @@ static void print_metric_headers(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { os.evsel = counter; - if (!first && config->json_output) - fprintf(config->output, ", "); - first = false; - perf_stat__print_shadow_stats(config, counter, 0, 0, &out, &config->metric_events, &rt_stat); } - if (config->json_output) - fprintf(config->output, "}"); fputc('\n', config->output); } @@ -952,14 +942,8 @@ static void print_interval(struct perf_stat_config *config, } } - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && !config->json_output) + if ((num_print_interval == 0 || config->interval_clear) && metric_only) print_metric_headers(config, evlist, " ", true); - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && config->json_output) { - fprintf(output, "{"); - print_metric_headers(config, evlist, " ", true); - } if (++num_print_interval == 25) num_print_interval = 0; } From 6d0a7e394eabd456f70c2f2f8e958b2343074360 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:11 -0800 Subject: [PATCH 1483/4122] perf stat: Do not indent headers for JSON Currently --metric-only with --json indents header lines. This is not needed for JSON. $ perf stat -aA --metric-only -j true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"cpu" : "0", {"metric-value" : "0.101"}{"metric-value" : "0.86"}{"metric-value" : "1.91"} {"cpu" : "1", {"metric-value" : "0.102"}{"metric-value" : "0.87"}{"metric-value" : "2.02"} {"cpu" : "2", {"metric-value" : "0.085"}{"metric-value" : "1.02"}{"metric-value" : "1.69"} ... Note that the other lines are broken JSON, but it will be handled later. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5c10cf49fd12..99bc0db8f068 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -849,7 +849,7 @@ static void print_metric_headers(struct perf_stat_config *config, if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); - if (!config->csv_output && !no_indent) + if (!config->csv_output && !config->json_output && !no_indent) fprintf(config->output, "%*s", aggr_header_lens[config->aggr_mode], ""); if (config->csv_output) { From 1cc7642abba7b281ecd836bfb56fc6dedac32555 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:12 -0800 Subject: [PATCH 1484/4122] perf stat: Add header for interval in JSON output It missed to print a matching header line for intervals. Before: # perf stat -a -e cycles,instructions --metric-only -j -I 500 {"unit" : "insn per cycle"} {"interval" : 0.500544283}{"metric-value" : "1.96"} ^C After: # perf stat -a -e cycles,instructions --metric-only -j -I 500 {"unit" : "sec"}{"unit" : "insn per cycle"} {"interval" : 0.500515681}{"metric-value" : "2.31"} ^C Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 99bc0db8f068..2a08c66121ec 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -858,6 +858,10 @@ static void print_metric_headers(struct perf_stat_config *config, if (!config->iostat_run) fputs(aggr_header_csv[config->aggr_mode], config->output); } + if (config->json_output) { + if (config->interval) + fputs("{\"unit\" : \"sec\"}", config->output); + } if (config->iostat_run) iostat_print_header_prefix(config); From f1db5a1d1d4de248534567fd95efa570bc693f73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:13 -0800 Subject: [PATCH 1485/4122] perf stat: Fix condition in print_interval() The num_print_interval and config->interval_clear should be checked together like other places like later in the function. Otherwise, the --interval-clear option could print the headers for the CSV or JSON output unnecessarily. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2a08c66121ec..7b2ec400813e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -900,8 +900,8 @@ static void print_interval(struct perf_stat_config *config, sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) ts->tv_sec, ts->tv_nsec); - if ((num_print_interval == 0 && !config->csv_output && !config->json_output) - || config->interval_clear) { + if ((num_print_interval == 0 || config->interval_clear) && + !config->csv_output && !config->json_output) { switch (config->aggr_mode) { case AGGR_NODE: fprintf(output, "# time node cpus"); From 20e2e31779377b36ada04c06d572ce0b4e234bb1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:14 -0800 Subject: [PATCH 1486/4122] perf stat: Consolidate condition to print metrics The pm variable holds an appropriate function to print metrics for CSV anf JSON already. So we can combine the if statement to simplify the code a little bit. This also matches to the above condition for non-CSV and non-JSON case. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7b2ec400813e..cb2a116bded6 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -600,9 +600,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); print_running(config, run, ena); - if (config->csv_output) - pm(config, &os, NULL, NULL, "", 0); - else if (config->json_output) + if (config->csv_output || config->json_output) pm(config, &os, NULL, NULL, "", 0); return; } From dbf88f74358338cc444933346a0a57635fbb4c94 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:26 +0300 Subject: [PATCH 1487/4122] dt-bindings: arm-smmu: Add missing Qualcomm SMMU compatibles Add missing compatibles used for Adreno SMMU on sc7280 and sm8450 platforms and for the Qualcomm v2 SMMU used on SDM630 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-2-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3ade2dbca70e..aa863811996f 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 - const: qcom,smmu-v2 - description: Qcom SoCs implementing "arm,mmu-500" @@ -51,10 +52,20 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - const: arm,mmu-500 + + - description: Qcom Adreno GPUs implementing "arm,smmu-500" + items: + - enum: + - qcom,sc7280-smmu-500 + - qcom,sm8250-smmu-500 + - const: qcom,adreno-smmu + - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: - enum: + - qcom,msm8996-smmu-v2 - qcom,sc7180-smmu-v2 + - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 From 982295bfe36925919ab61aab0657528541a2aa83 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:27 +0300 Subject: [PATCH 1488/4122] dt-bindings: arm-smmu: fix clocks/clock-names schema Rework clocks/clock-names properties schema to properly describe possible usage cases. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-3-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 129 ++++++++++++++++-- 1 file changed, 121 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index aa863811996f..99f34a40cfe2 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -161,16 +161,12 @@ properties: present in such cases. clock-names: - items: - - const: bus - - const: iface + minItems: 1 + maxItems: 7 clocks: - items: - - description: bus clock required for downstream bus access and for the - smmu ptw - - description: interface clock required to access smmu's registers - through the TCU's programming interface. + minItems: 1 + maxItems: 7 power-domains: maxItems: 1 @@ -220,6 +216,123 @@ allOf: reg: maxItems: 1 + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 + then: + anyOf: + - properties: + clock-names: + items: + - const: bus + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - properties: + clock-names: + items: + - const: iface + - const: mem + - const: mem_iface + clocks: + items: + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for memory access + - description: bus clock required for GPU memory access + - properties: + clock-names: + items: + - const: iface-mm + - const: iface-smmu + - const: bus-mm + - const: bus-smmu + clocks: + items: + - description: interface clock required to access mnoc's registers + through the TCU's programming interface. + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for downstream bus access + - description: bus clock required for the smmu ptw + + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8996-smmu-v2 + - qcom,sc7180-smmu-v2 + - qcom,sdm845-smmu-v2 + then: + properties: + clock-names: + items: + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + + - if: + properties: + compatible: + contains: + const: qcom,sc7280-smmu-500 + then: + properties: + clock-names: + items: + - const: gcc_gpu_memnoc_gfx_clk + - const: gcc_gpu_snoc_dvm_gfx_clk + - const: gpu_cc_ahb_clk + - const: gpu_cc_hlos1_vote_gpu_smmu_clk + - const: gpu_cc_cx_gmu_clk + - const: gpu_cc_hub_cx_int_clk + - const: gpu_cc_hub_aon_clk + + clocks: + items: + - description: GPU memnoc_gfx clock + - description: GPU snoc_dvm_gfx clock + - description: GPU ahb clock + - description: GPU hlos1_vote_GPU smmu clock + - description: GPU cx_gmu clock + - description: GPU hub_cx_int clock + - description: GPU hub_aon clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + then: + properties: + clock-names: + items: + - const: ahb + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for AHB bus access + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + examples: - |+ /* SMMU with stream matching or stream indexing */ From 3a12e8c065362f0d900a4a93ee60565253d7fde7 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:28 +0300 Subject: [PATCH 1489/4122] dt-bindings: arm-smmu: add special case for Google Cheza platform Cheza fw does not properly program the GPU aperture to allow the GPU to update the SMMU pagetables for context switches. The board file works around this by dropping the "qcom,adreno-smmu" compat string. Add this usecase to arm,smmu.yaml schema. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-4-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 99f34a40cfe2..29a8b3ff8fa0 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -69,6 +69,10 @@ properties: - qcom,sdm845-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 + - description: Qcom Adreno GPUs on Google Cheza platform + items: + - const: qcom,sdm845-smmu-v2 + - const: qcom,smmu-v2 - description: Marvell SoCs implementing "arm,mmu-500" items: - const: marvell,ap806-smmu-500 From 6c84bbd103d85696af9cc0f746c01f9b2847637e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:29 +0300 Subject: [PATCH 1490/4122] dt-bindings: arm-smmu: Add generic qcom,smmu-500 bindings Add generic bindings for the Qualcomm variant of the ARM MMU-500. It is expected that all future platforms will use the generic qcom,smmu-500 compat string in addition to SoC-specific and the generic arm,mmu-500 ones. Older bindings are now described as deprecated. Note: I have split the sdx55 and sdx65 from the legacy bindings. They are not supported by the qcom SMMU implementation. I can suppose that they are using the generic implementation rather than the Qualcomm-speicific one. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-5-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 29a8b3ff8fa0..28f5720824cd 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -31,7 +31,7 @@ properties: - qcom,sdm630-smmu-v2 - const: qcom,smmu-v2 - - description: Qcom SoCs implementing "arm,mmu-500" + - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500" items: - enum: - qcom,qcm2290-smmu-500 @@ -42,8 +42,35 @@ properties: - qcom,sc8280xp-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sm6115-smmu-500 + - qcom,sm6350-smmu-500 + - qcom,sm6375-smmu-500 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + - const: qcom,smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (non-qcom implementation) + deprecated: true + items: + - enum: - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (legacy binding) + deprecated: true + items: + # Do not add additional SoC to this list. Instead use two previous lists. + - enum: + - qcom,qcm2290-smmu-500 + - qcom,sc7180-smmu-500 + - qcom,sc7280-smmu-500 + - qcom,sc8180x-smmu-500 + - qcom,sc8280xp-smmu-500 + - qcom,sdm845-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 From 4c1d0ad153f8bca09776da6031639d3b965d849a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:30 +0300 Subject: [PATCH 1491/4122] iommu/arm-smmu-qcom: Move implementation data into match data In preparation to rework of the implementation and configuration details, make qcom_smmu_create() accept new qcom_smmu_match_data structure pointer. Make implementation a field in this struct. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-6-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 62 ++++++++++++++-------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 4 ++ 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 0f4eaf217983..a7bd49e44bca 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -405,10 +405,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, - const struct arm_smmu_impl *impl) + const struct qcom_smmu_match_data *data) { + const struct arm_smmu_impl *impl; struct qcom_smmu *qsmmu; + if (!data) + return ERR_PTR(-EINVAL); + + impl = data->impl; + if (!impl) + return smmu; + /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) return ERR_PTR(-EPROBE_DEFER); @@ -423,24 +431,32 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return &qsmmu->smmu; } +static const struct qcom_smmu_match_data qcom_smmu_data = { + .impl = &qcom_smmu_impl, +}; + +static const struct qcom_smmu_match_data qcom_adreno_smmu_data = { + .impl = &qcom_adreno_smmu_impl, +}; + static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500" }, - { .compatible = "qcom,qdu1000-smmu-500" }, - { .compatible = "qcom,sc7180-smmu-500" }, - { .compatible = "qcom,sc7280-smmu-500" }, - { .compatible = "qcom,sc8180x-smmu-500" }, - { .compatible = "qcom,sc8280xp-smmu-500" }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6115-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500" }, - { .compatible = "qcom,sm6350-smmu-500" }, - { .compatible = "qcom,sm6375-smmu-500" }, - { .compatible = "qcom,sm8150-smmu-500" }, - { .compatible = "qcom,sm8250-smmu-500" }, - { .compatible = "qcom,sm8350-smmu-500" }, - { .compatible = "qcom,sm8450-smmu-500" }, + { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data }, { } }; @@ -455,12 +471,13 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; + const struct of_device_id *match; #ifdef CONFIG_ACPI if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + return qcom_smmu_create(smmu, &qcom_smmu_data); } #endif @@ -471,10 +488,11 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) * features if the order is changed. */ if (of_device_is_compatible(np, "qcom,adreno-smmu")) - return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl); + return qcom_smmu_create(smmu, &qcom_adreno_smmu_data); - if (of_match_node(qcom_smmu_impl_of_match, np)) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + match = of_match_node(qcom_smmu_impl_of_match, np); + if (match) + return qcom_smmu_create(smmu, match->data); return smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 99ec8f8629a0..2424f10b7110 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -14,6 +14,10 @@ struct qcom_smmu { u32 stall_enabled; }; +struct qcom_smmu_match_data { + const struct arm_smmu_impl *impl; +}; + #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); From 30b912a03d91727d75ae14f277b64aca8fb915e4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:31 +0300 Subject: [PATCH 1492/4122] iommu/arm-smmu-qcom: Move the qcom,adreno-smmu check into qcom_smmu_create Move special handling of qcom,adreno-smmu into qcom_smmu_create() function. This allows us to further customize the Adreno SMMU implementation. Note, this also adds two entries to the qcom_smmu_impl_of_match table. They were used with the qcom,adreno-smmu compat and were handled by the removed clause. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-7-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 32 ++++++++++++---------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 1 + 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index a7bd49e44bca..e61194127772 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -407,13 +407,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, const struct qcom_smmu_match_data *data) { + const struct device_node *np = smmu->dev->of_node; const struct arm_smmu_impl *impl; struct qcom_smmu *qsmmu; if (!data) return ERR_PTR(-EINVAL); - impl = data->impl; + if (np && of_device_is_compatible(np, "qcom,adreno-smmu")) + impl = data->adreno_impl; + else + impl = data->impl; + if (!impl) return smmu; @@ -431,15 +436,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return &qsmmu->smmu; } -static const struct qcom_smmu_match_data qcom_smmu_data = { - .impl = &qcom_smmu_impl, +/* + * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996, + * there are not enough context banks. + */ +static const struct qcom_smmu_match_data msm8996_smmu_data = { + .impl = NULL, + .adreno_impl = &qcom_adreno_smmu_impl, }; -static const struct qcom_smmu_match_data qcom_adreno_smmu_data = { - .impl = &qcom_adreno_smmu_impl, +static const struct qcom_smmu_match_data qcom_smmu_data = { + .impl = &qcom_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_impl, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, @@ -448,6 +460,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, @@ -481,15 +494,6 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) } #endif - /* - * Do not change this order of implementation, i.e., first adreno - * smmu impl and then apss smmu since we can have both implementing - * arm,mmu-500 in which case we will miss setting adreno smmu specific - * features if the order is changed. - */ - if (of_device_is_compatible(np, "qcom,adreno-smmu")) - return qcom_smmu_create(smmu, &qcom_adreno_smmu_data); - match = of_match_node(qcom_smmu_impl_of_match, np); if (match) return qcom_smmu_create(smmu, match->data); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 2424f10b7110..424d8d342ce0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -16,6 +16,7 @@ struct qcom_smmu { struct qcom_smmu_match_data { const struct arm_smmu_impl *impl; + const struct arm_smmu_impl *adreno_impl; }; #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG From 417b76adcf1d141666866eba5afdd42953f66e2f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:32 +0300 Subject: [PATCH 1493/4122] iommu/arm-smmu-qcom: provide separate implementation for SDM845-smmu-500 There is only one platform, which needs special care in the reset function, the SDM845. Add special handler for sdm845 and drop the qcom_smmu500_reset() function. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-8-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index e61194127772..6dc7fa918799 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -361,6 +361,8 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) { int ret; + arm_mmu500_reset(smmu); + /* * To address performance degradation in non-real time clients, * such as USB and UFS, turn off wait-for-safe on sdm845 based boards, @@ -374,23 +376,20 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } -static int qcom_smmu500_reset(struct arm_smmu_device *smmu) -{ - const struct device_node *np = smmu->dev->of_node; - - arm_mmu500_reset(smmu); - - if (of_device_is_compatible(np, "qcom,sdm845-smmu-500")) - return qcom_sdm845_smmu500_reset(smmu); - - return 0; -} - static const struct arm_smmu_impl qcom_smmu_impl = { .init_context = qcom_smmu_init_context, .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = arm_mmu500_reset, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl sdm845_smmu_500_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .reset = qcom_sdm845_smmu500_reset, .write_s2cr = qcom_smmu_write_s2cr, .tlb_sync = qcom_smmu_tlb_sync, }; @@ -398,7 +397,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = { static const struct arm_smmu_impl qcom_adreno_smmu_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = arm_mmu500_reset, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, @@ -450,6 +449,14 @@ static const struct qcom_smmu_match_data qcom_smmu_data = { .adreno_impl = &qcom_adreno_smmu_impl, }; +static const struct qcom_smmu_match_data sdm845_smmu_500_data = { + .impl = &sdm845_smmu_500_impl, + /* + * No need for adreno impl here. On sdm845 the Adreno SMMU is handled + * by the separate sdm845-smmu-v2 device. + */ +}; + static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, @@ -461,7 +468,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, From 4172dda2b30a9a0e628e81d2a3bc9a6ef0936774 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:33 +0300 Subject: [PATCH 1494/4122] iommu/arm-smmu-qcom: Merge table from arm-smmu-qcom-debug into match data There is little point in having a separate match table in arm-smmu-qcom-debug.c. Merge it into the main match data table in arm-smmu-qcom.c Note, this also enables debug support for qdu1000, sm6115, sm6375 and ACPI-based sc8180x systems, since these SoCs are expected to support tlb_sync debug. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-9-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 91 ------------------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 50 ++++++---- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 16 +++- 3 files changed, 45 insertions(+), 112 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 6eed8e67a0ca..74e9ef2fd580 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -10,16 +10,6 @@ #include "arm-smmu.h" #include "arm-smmu-qcom.h" -enum qcom_smmu_impl_reg_offset { - QCOM_SMMU_TBU_PWR_STATUS, - QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, - QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, -}; - -struct qcom_smmu_config { - const u32 *reg_offset; -}; - void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { int ret; @@ -59,84 +49,3 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) tbu_pwr_status, sync_inv_ack, sync_inv_progress); } } - -/* Implementation Defined Register Space 0 register offsets */ -static const u32 qcom_smmu_impl0_reg_offset[] = { - [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, - [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, - [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, -}; - -static const struct qcom_smmu_config qcm2290_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7180_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7280_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8180x_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8280xp_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6125_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8150_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8250_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8450_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg }, - { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg }, - { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg}, - { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg }, - { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg}, - { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg}, - { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg }, - { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg }, - { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg }, - { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg }, - { } -}; - -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - const struct of_device_id *match; - const struct device_node *np = smmu->dev->of_node; - - match = of_match_node(qcom_smmu_impl_debug_match, np); - if (!match) - return NULL; - - return match->data; -} diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6dc7fa918799..1843bcd81402 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -430,11 +430,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = qcom_smmu_impl_data(smmu); + qsmmu->cfg = data->cfg; return &qsmmu->smmu; } +/* Implementation Defined Register Space 0 register offsets */ +static const u32 qcom_smmu_impl0_reg_offset[] = { + [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, + [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, + [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, +}; + +static const struct qcom_smmu_config qcom_smmu_impl0_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + /* * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996, * there are not enough context banks. @@ -455,28 +466,35 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = { * No need for adreno impl here. On sdm845 the Adreno SMMU is handled * by the separate sdm845-smmu-v2 device. */ + /* Also no debug configuration. */ +}; + +static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { + .impl = &qcom_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_impl, + .cfg = &qcom_smmu_impl0_cfg, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, - { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, + { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data }, { } }; @@ -497,7 +515,7 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) - return qcom_smmu_create(smmu, &qcom_smmu_data); + return qcom_smmu_create(smmu, &qcom_smmu_500_impl0_data); } #endif diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 424d8d342ce0..593910567b88 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -14,20 +14,26 @@ struct qcom_smmu { u32 stall_enabled; }; +enum qcom_smmu_impl_reg_offset { + QCOM_SMMU_TBU_PWR_STATUS, + QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, + QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, +}; + +struct qcom_smmu_config { + const u32 *reg_offset; +}; + struct qcom_smmu_match_data { + const struct qcom_smmu_config *cfg; const struct arm_smmu_impl *impl; const struct arm_smmu_impl *adreno_impl; }; #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } -static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - return NULL; -} #endif #endif /* _ARM_SMMU_QCOM_H */ From b4c6ee515c426f5fffc3e25772a03e44655d6e1c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:34 +0300 Subject: [PATCH 1495/4122] iommu/arm-smmu-qcom: Stop using mmu500 reset for v2 MMUs The arm_mmu500_reset() writes into registers specific for MMU500. For the generic ARM SMMU v2 these registers (sACR) are defined as 'implementation defined'. Downstream Qualcomm driver for SMMUv2 doesn't touch them. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-10-dmitry.baryshkov@linaro.org [will: Remove unused 'qcom_smmu_data' stucture] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 38 +++++++++++++++------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 1843bcd81402..07372db4184e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -376,7 +376,15 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } -static const struct arm_smmu_impl qcom_smmu_impl = { +static const struct arm_smmu_impl qcom_smmu_v2_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl qcom_smmu_500_impl = { .init_context = qcom_smmu_init_context, .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, @@ -394,7 +402,15 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = { .tlb_sync = qcom_smmu_tlb_sync, }; -static const struct arm_smmu_impl qcom_adreno_smmu_impl = { +static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { + .init_context = qcom_adreno_smmu_init_context, + .def_domain_type = qcom_smmu_def_domain_type, + .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, + .write_sctlr = qcom_adreno_smmu_write_sctlr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, .reset = arm_mmu500_reset, @@ -452,12 +468,12 @@ static const struct qcom_smmu_config qcom_smmu_impl0_cfg = { */ static const struct qcom_smmu_match_data msm8996_smmu_data = { .impl = NULL, - .adreno_impl = &qcom_adreno_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_v2_impl, }; -static const struct qcom_smmu_match_data qcom_smmu_data = { - .impl = &qcom_smmu_impl, - .adreno_impl = &qcom_adreno_smmu_impl, +static const struct qcom_smmu_match_data qcom_smmu_v2_data = { + .impl = &qcom_smmu_v2_impl, + .adreno_impl = &qcom_adreno_smmu_v2_impl, }; static const struct qcom_smmu_match_data sdm845_smmu_500_data = { @@ -470,22 +486,22 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = { }; static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { - .impl = &qcom_smmu_impl, - .adreno_impl = &qcom_adreno_smmu_impl, + .impl = &qcom_smmu_500_impl, + .adreno_impl = &qcom_adreno_smmu_500_impl, .cfg = &qcom_smmu_impl0_cfg, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, - { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, - { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, From 80b71080720e34eaf06642c372d4c11d046baf27 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:35 +0300 Subject: [PATCH 1496/4122] iommu/arm-smmu-qcom: Add generic qcom,smmu-500 match entry Add generic qcom,smmu-500 compatibility string. Newer platforms should use this generic entry rather than declaring per-SoC entries. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-11-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 07372db4184e..c94daf88c505 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -491,6 +491,10 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { .cfg = &qcom_smmu_impl0_cfg, }; +/* + * Do not add any more qcom,SOC-smmu-500 entries to this list, unless they need + * special handling and can not be covered by the qcom,smmu-500 entry. + */ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data }, @@ -511,6 +515,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data }, { } }; From 4e016f969529f2aec0545e90119e7eb3cb124c46 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 6 Nov 2022 19:46:18 +0200 Subject: [PATCH 1497/4122] vfio: Add an option to get migration data size Add an option to get migration data size by introducing a new migration feature named VFIO_DEVICE_FEATURE_MIG_DATA_SIZE. Upon VFIO_DEVICE_FEATURE_GET the estimated data length that will be required to complete STOP_COPY is returned. This option may better enable user space to consider before moving to STOP_COPY whether it can meet the downtime SLA based on the returned data. The patch also includes the implementation for mlx5 and hisi for this new option to make it feature complete for the existing drivers in this area. Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Reviewed-by: Longfang Liu Link: https://lore.kernel.org/r/20221106174630.25909-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 9 ++++++ drivers/vfio/pci/mlx5/main.c | 18 +++++++++++ drivers/vfio/pci/vfio_pci_core.c | 3 +- drivers/vfio/vfio_main.c | 32 +++++++++++++++++++ include/linux/vfio.h | 5 +++ include/uapi/linux/vfio.h | 13 ++++++++ 6 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 39eeca18a0f7..0c0c0c7f0521 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -957,6 +957,14 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, return res; } +static int +hisi_acc_vfio_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + *stop_copy_length = sizeof(struct acc_vf_data); + return 0; +} + static int hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, enum vfio_device_mig_state *curr_state) @@ -1213,6 +1221,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = { .migration_set_state = hisi_acc_vfio_pci_set_device_state, .migration_get_state = hisi_acc_vfio_pci_get_device_state, + .migration_get_data_size = hisi_acc_vfio_pci_get_data_size, }; static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 457138b92f13..6e9cf2aacc52 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -512,6 +512,23 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev, return res; } +static int mlx5vf_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + size_t state_size; + int ret; + + mutex_lock(&mvdev->state_mutex); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, + &state_size); + if (!ret) + *stop_copy_length = state_size; + mlx5vf_state_mutex_unlock(mvdev); + return ret; +} + static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, enum vfio_device_mig_state *curr_state) { @@ -577,6 +594,7 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) static const struct vfio_migration_ops mlx5vf_pci_mig_ops = { .migration_set_state = mlx5vf_pci_set_device_state, .migration_get_state = mlx5vf_pci_get_device_state, + .migration_get_data_size = mlx5vf_pci_get_data_size, }; static const struct vfio_log_ops mlx5vf_pci_log_ops = { diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 9be2d5be5d95..189d4930c276 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2127,7 +2127,8 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) if (vdev->vdev.mig_ops) { if (!(vdev->vdev.mig_ops->migration_get_state && - vdev->vdev.mig_ops->migration_set_state) || + vdev->vdev.mig_ops->migration_set_state && + vdev->vdev.mig_ops->migration_get_data_size) || !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY)) return -EINVAL; } diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 9835757e2bee..662e267a3e13 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1242,6 +1242,34 @@ out_copy: return 0; } +static int +vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, + u32 flags, void __user *arg, + size_t argsz) +{ + struct vfio_device_feature_mig_data_size data_size = {}; + unsigned long stop_copy_length; + int ret; + + if (!device->mig_ops) + return -ENOTTY; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, + sizeof(data_size)); + if (ret != 1) + return ret; + + ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); + if (ret) + return ret; + + data_size.stop_copy_length = stop_copy_length; + if (copy_to_user(arg, &data_size, sizeof(data_size))) + return -EFAULT; + + return 0; +} + static int vfio_ioctl_device_feature_migration(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) @@ -1469,6 +1497,10 @@ static int vfio_ioctl_device_feature(struct vfio_device *device, return vfio_ioctl_device_feature_logging_report( device, feature.flags, arg->data, feature.argsz - minsz); + case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: + return vfio_ioctl_device_feature_migration_data_size( + device, feature.flags, arg->data, + feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) return -EINVAL; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7480154825e..43b67e46a2cb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -107,6 +107,9 @@ struct vfio_device_ops { * @migration_get_state: Optional callback to get the migration state for * devices that support migration. It's mandatory for * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * @migration_get_data_size: Optional callback to get the estimated data + * length that will be required to complete stop copy. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_migration_ops { struct file *(*migration_set_state)( @@ -114,6 +117,8 @@ struct vfio_migration_ops { enum vfio_device_mig_state new_state); int (*migration_get_state)(struct vfio_device *device, enum vfio_device_mig_state *curr_state); + int (*migration_get_data_size)(struct vfio_device *device, + unsigned long *stop_copy_length); }; /** diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index d7d8e0922376..3e45dbaf190e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1128,6 +1128,19 @@ struct vfio_device_feature_dma_logging_report { #define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 +/* + * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will + * be required to complete stop copy. + * + * Note: Can be called on each device state. + */ + +struct vfio_device_feature_mig_data_size { + __aligned_u64 stop_copy_length; +}; + +#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + /* -------- API for Type1 VFIO IOMMU -------- */ /** From 2f5d8cef45c30edcf3972d345f606df563d3a48e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 6 Nov 2022 19:46:19 +0200 Subject: [PATCH 1498/4122] vfio/mlx5: Fix a typo in mlx5vf_cmd_load_vhca_state() Fix a typo in mlx5vf_cmd_load_vhca_state() to use the 'load' memory layout. As in/out sizes are equal for save and load commands there wasn't any functional issue. Fixes: f1d98f346ee3 ("vfio/mlx5: Expose migration commands over mlx5 device") Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221106174630.25909-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index c604b70437a5..0848bc905d3e 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -378,8 +378,8 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf) { struct mlx5_core_dev *mdev; - u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; - u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; u32 pdn, mkey; int err; From a80e0e156ca6d7c339e314d15db2f038755da6c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Nov 2022 19:22:43 -0800 Subject: [PATCH 1499/4122] perf stat: Fix summary output in CSV with --metric-only It should not print "summary" for each event when --metric-only is set. Before: $ sudo perf stat -a --per-socket --summary -x, --metric-only true time,socket,cpusGhz,insn per cycle,branch-misses of all branches, 0.000709079,S0,8,0.893,2.40,0.45, S0,8, summary, summary, summary, summary, summary,0.893, summary,2.40, summary, summary,0.45, After: $ sudo perf stat -a --per-socket --summary -x, --metric-only true time,socket,cpusGHz,insn per cycle,branch-misses of all branches, 0.000882297,S0,8,0.598,1.64,0.64, summary,S0,8,0.598,1.64,0.64, Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221112032244.1077370-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cc206781c57c..6ea731403270 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -549,7 +549,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } if (!config->no_csv_summary && config->csv_output && - config->summary && !config->interval) { + config->summary && !config->interval && !config->metric_only) { fprintf(config->output, "%16s%s", "summary", config->csv_sep); } @@ -732,8 +732,13 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - if (prefix && metric_only) - fprintf(output, "%s", prefix); + if (metric_only) { + if (prefix) + fprintf(output, "%s", prefix); + else if (config->summary && !config->no_csv_summary && + config->csv_output && !config->interval) + fprintf(output, "%16s%s", "summary", config->csv_sep); + } first = true; evlist__for_each_entry(evlist, counter) { From 7565f9617efac0c0c8e2dbd08dbe0695d56684f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Nov 2022 19:22:44 -0800 Subject: [PATCH 1500/4122] perf stat: Add missing separator in the CSV header It should have a comma after 'cpus' for socket and die aggregation mode. The output of the following command shows the issue. $ sudo perf stat -a --per-socket -x, --metric-only -I1 true Before: +--- here V time,socket,cpusGhz,insn per cycle,branch-misses of all branches, 0.000908461,S0,8,0.950,1.65,1.21, After: time,socket,cpus,GHz,insn per cycle,branch-misses of all branches, 0.000683094,S0,8,0.593,2.00,0.60, Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221112032244.1077370-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 6ea731403270..2a3c1e0098b9 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -828,8 +828,8 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", - [AGGR_DIE] = "die,cpus", - [AGGR_SOCKET] = "socket,cpus", + [AGGR_DIE] = "die,cpus,", + [AGGR_SOCKET] = "socket,cpus,", [AGGR_NONE] = "cpu,", [AGGR_THREAD] = "comm-pid,", [AGGR_NODE] = "node,", From 913a144164d8f09fab7e4175d693168b29d5843b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 2 Nov 2022 09:02:33 -0700 Subject: [PATCH 1501/4122] HSI: ssi_protocol: Fix return type of ssip_pn_xmit() With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. A proposed warning in clang aims to catch these at compile time, which reveals: drivers/hsi/clients/ssi_protocol.c:1053:20: error: incompatible function pointer types initializing 'netdev_tx_t (*)(struct sk_buff *, struct net_device *)' (aka 'enum netdev_tx (*)(struct sk_buff *, struct net_device *)') with an expression of type 'int (struct sk_buff *, struct net_device *)' [-Werror,-Wincompatible-function-pointer-types-strict] .ndo_start_xmit = ssip_pn_xmit, ^~~~~~~~~~~~ 1 error generated. ->ndo_start_xmit() in 'struct net_device_ops' expects a return type of 'netdev_tx_t', not 'int'. Adjust the return type of ssip_pn_xmit() to match the prototype's to resolve the warning and CFI failure. Additionally, use the enum 'NETDEV_TX_OK' instead of a raw '0' for the return value of ssip_pn_xmit(). Link: https://github.com/ClangBuiltLinux/linux/issues/1750 Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Sebastian Reichel --- drivers/hsi/clients/ssi_protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 274ad8443f8c..38e572faff43 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -968,7 +968,7 @@ static void ssip_xmit_work(struct work_struct *work) ssip_xmit(cl); } -static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) { struct hsi_client *cl = to_hsi_client(dev->dev.parent); struct ssi_protocol *ssi = hsi_client_drvdata(cl); @@ -1027,7 +1027,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - return 0; + return NETDEV_TX_OK; drop2: hsi_free_msg(msg); drop: @@ -1035,7 +1035,7 @@ drop: inc_dropped: dev->stats.tx_dropped++; - return 0; + return NETDEV_TX_OK; } /* CMT reset event handler */ From f5181c35ed7ba0ceb6e42872aad1334d994b0175 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 1 Nov 2022 11:41:18 +0800 Subject: [PATCH 1502/4122] HSI: omap_ssi_core: fix unbalanced pm_runtime_disable() In error label 'out1' path in ssi_probe(), the pm_runtime_enable() has not been called yet, so pm_runtime_disable() is not needed. Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver") Signed-off-by: Yang Yingliang Signed-off-by: Sebastian Reichel --- drivers/hsi/controllers/omap_ssi_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index eb9820158318..b23a576ed88a 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -536,9 +536,9 @@ out3: device_for_each_child(&pd->dev, NULL, ssi_remove_ports); out2: ssi_remove_controller(ssi); + pm_runtime_disable(&pd->dev); out1: platform_set_drvdata(pd, NULL); - pm_runtime_disable(&pd->dev); return err; } From dac153f2802db1ad46207283cb9b2aae3d707a45 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:34 +0200 Subject: [PATCH 1503/4122] RDMA/restrack: Release MR restrack when delete The MR restrack also needs to be released when delete it, otherwise it cause memory leak as the task struct won't be released. Fixes: 13ef5539def7 ("RDMA/restrack: Count references to the verbs objects") Signed-off-by: Mark Zhang Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/703db18e8d4ef628691fb93980a709be673e62e3.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/restrack.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 1f935d9f6178..01a499a8b88d 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; old = xa_erase(&rt->xa, res->id); - if (res->type == RDMA_RESTRACK_MR) - return; WARN_ON(old != res); out: From 5e15ff29b156bbbdeadae230c8ecd5ecd8ca2477 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:35 +0200 Subject: [PATCH 1504/4122] RDMA/core: Make sure "ib_port" is valid when access sysfs node The "ib_port" structure must be set before adding the sysfs kobject, and reset after removing it, otherwise it may crash when accessing the sysfs node: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050 Mem abort info: ESR = 0x96000006 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e85f5ba5 [0000000000000050] pgd=0000000848fd9003, pud=000000085b387003, pmd=0000000000000000 Internal error: Oops: 96000006 [#2] PREEMPT SMP Modules linked in: ib_umad(O) mlx5_ib(O) nfnetlink_cttimeout(E) nfnetlink(E) act_gact(E) cls_flower(E) sch_ingress(E) openvswitch(E) nsh(E) nf_nat_ipv6(E) nf_nat_ipv4(E) nf_conncount(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) mst_pciconf(O) ipmi_devintf(E) ipmi_msghandler(E) ipmb_dev_int(OE) mlx5_core(O) mlxfw(O) mlxdevm(O) auxiliary(O) ib_uverbs(O) ib_core(O) mlx_compat(O) psample(E) sbsa_gwdt(E) uio_pdrv_genirq(E) uio(E) mlxbf_pmc(OE) mlxbf_gige(OE) mlxbf_tmfifo(OE) gpio_mlxbf2(OE) pwr_mlxbf(OE) mlx_trio(OE) i2c_mlxbf(OE) mlx_bootctl(OE) bluefield_edac(OE) knem(O) ip_tables(E) ipv6(E) crc_ccitt(E) [last unloaded: mst_pci] Process grep (pid: 3372, stack limit = 0x0000000022055c92) CPU: 5 PID: 3372 Comm: grep Tainted: G D OE 4.19.161-mlnx.47.gadcd9e3 #1 Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS BlueField:3.9.2-15-ga2403ab Sep 8 2022 pstate: 40000005 (nZcv daif -PAN -UAO) pc : hw_stat_port_show+0x4c/0x80 [ib_core] lr : port_attr_show+0x40/0x58 [ib_core] sp : ffff000029f43b50 x29: ffff000029f43b50 x28: 0000000019375000 x27: ffff8007b821a540 x26: ffff000029f43e30 x25: 0000000000008000 x24: ffff000000eaa958 x23: 0000000000001000 x22: ffff8007a4ce3000 x21: ffff8007baff8000 x20: ffff8007b9066ac0 x19: ffff8007bae97578 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff8007a4ce4000 x7 : 0000000000000000 x6 : 000000000000003f x5 : ffff000000e6a280 x4 : ffff8007a4ce3000 x3 : 0000000000000000 x2 : aaaaaaaaaaaaaaab x1 : ffff8007b9066a10 x0 : ffff8007baff8000 Call trace: hw_stat_port_show+0x4c/0x80 [ib_core] port_attr_show+0x40/0x58 [ib_core] sysfs_kf_seq_show+0x8c/0x150 kernfs_seq_show+0x44/0x50 seq_read+0x1b4/0x45c kernfs_fop_read+0x148/0x1d8 __vfs_read+0x58/0x180 vfs_read+0x94/0x154 ksys_read+0x68/0xd8 __arm64_sys_read+0x28/0x34 el0_svc_common+0x88/0x18c el0_svc_handler+0x78/0x94 el0_svc+0x8/0xe8 Code: f2955562 aa1603e4 aa1503e0 f9405683 (f9402861) Fixes: d8a5883814b9 ("RDMA/core: Replace the ib_port_data hw_stats pointers with a ib_port pointer") Signed-off-by: Mark Zhang Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/88867e705c42c1cd2011e45201c25eecdb9fef94.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sysfs.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 84c53bd2a52d..ee59d7391568 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, p->port_num = port_num; kobject_init(&p->kobj, &port_type); + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = p; + cur_group = p->groups_list; ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list, attr->gid_tbl_len, show_port_gid); @@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, } list_add_tail(&p->kobj.entry, &coredev->port_list); - if (device->port_data && is_full_dev) - device->port_data[port_num].sysfs = p; - return p; err_groups: @@ -1268,6 +1268,8 @@ err_groups: err_del: kobject_del(&p->kobj); err_put: + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = NULL; kobject_put(&p->kobj); return ERR_PTR(ret); } @@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port) { bool is_full_dev = &port->ibdev->coredev == coredev; - if (port->ibdev->port_data && - port->ibdev->port_data[port->port_num].sysfs == port) - port->ibdev->port_data[port->port_num].sysfs = NULL; list_del(&port->kobj.entry); if (is_full_dev) sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups); + sysfs_remove_groups(&port->kobj, port->groups_list); kobject_del(&port->kobj); + + if (port->ibdev->port_data && + port->ibdev->port_data[port->port_num].sysfs == port) + port->ibdev->port_data[port->port_num].sysfs = NULL; + kobject_put(&port->kobj); } From ecacb3751f254572af0009b9501e2cdc83a30b6a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:36 +0200 Subject: [PATCH 1505/4122] RDMA/nldev: Return "-EAGAIN" if the cm_id isn't from expected port When filling a cm_id entry, return "-EAGAIN" instead of 0 if the cm_id doesn'the have the same port as requested, otherwise an incomplete entry may be returned, which causes "rdam res show cm_id" to return an error. For example on a machine with two rdma devices with "rping -C 1 -v -s" running background, the "rdma" command fails: $ rdma -V rdma utility, iproute2-5.19.0 $ rdma res show cm_id link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 28056 comm rping src-addr 0.0.0.0:7174 error: Protocol not available While with this fix it succeeds: $ rdma res show cm_id link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174 link mlx5_1/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174 Fixes: 00313983cda6 ("RDMA/nldev: provide detailed CM_ID information") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/a08e898cdac5e28428eb749a99d9d981571b8ea7.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index b92358f606d0..2be76a3fdd87 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_cm_id *cm_id = &id_priv->id; if (port && port != cm_id->port_num) - return 0; + return -EAGAIN; if (cm_id->port_num && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) From 93e3f45a26310e3f3f8558be40df411e23ab742c Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:39 +0530 Subject: [PATCH 1506/4122] powerpc: Fix __WARN_FLAGS() for use with Objtool Commit 1e688dd2a3d675 ("powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto") updated __WARN_FLAGS() to use asm goto, and added a call to 'unreachable()' after the asm goto for optimal code generation. With CONFIG_OBJTOOL enabled, 'annotate_unreachable()' statement in 'unreachable()' tries to note down the location of the subsequent instruction in a separate elf section to aid code flow analysis. However, on powerpc, this results in gcc emitting a call to a symbol of size 0. This results in objtool complaining of "unannotated intra-function call" since the target symbol is not a valid function call destination. Objtool wants this annotation for code flow analysis, which we are not yet enabling on powerpc. As such, expand the call to 'unreachable()' in __WARN_FLAGS() without annotate_unreachable(): barrier_before_unreachable(); __builtin_unreachable(); This still results in optimal code generation for __WARN_FLAGS(), while getting rid of the objtool warning. We still need barrier_before_unreachable() to work around gcc bugs 82365 and 106751: - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106751 Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-2-sv@linux.ibm.com --- arch/powerpc/include/asm/bug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c2..ef42adb44aa3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -99,7 +99,8 @@ __label__ __label_warn_on; \ \ WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \ - unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ \ __label_warn_on: \ break; \ From 01f2cf0b990e58ae89142f57c7e02d33621311d2 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:40 +0530 Subject: [PATCH 1507/4122] powerpc: Override __ALIGN and __ALIGN_STR macros In a subsequent patch, we would want to annotate powerpc assembly functions with SYM_FUNC_START_LOCAL macro. This macro depends on __ALIGN macro. The default expansion of __ALIGN macro is: #define __ALIGN .align 4,0x90 So, override __ALIGN and __ALIGN_STR macros to use the same alignment as that of the existing _GLOBAL macro. Also, do not pad with 0x90, because repeated 0x90s are not a nop or trap on powerpc. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-3-sv@linux.ibm.com --- arch/powerpc/include/asm/linkage.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b71b9582e754..b88d1d2cf304 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -4,6 +4,9 @@ #include +#define __ALIGN .align 2 +#define __ALIGN_STR ".align 2" + #ifdef CONFIG_PPC64_ELF_ABI_V1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ From 29a011fc79e625b2b02f25262657f7c4c59ae9f7 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:41 +0530 Subject: [PATCH 1508/4122] powerpc: Fix objtool unannotated intra-function call warnings Objtool throws unannotated intra-function call warnings in the following assembly files: arch/powerpc/kernel/vector.o: warning: objtool: .text+0x53c: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x60: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x124: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5d4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5dc: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xcb8: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xd0c: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x1030: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x358: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x728: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4d94: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4ec4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_interrupts.o: warning: objtool: .text+0x6c: unannotated intra-function call arch/powerpc/kernel/misc_64.o: warning: objtool: .text+0x64: unannotated intra-function call Objtool does not add STT_NOTYPE symbols with size 0 to the rbtree, which is why find_call_destination() function is not able to find the destination symbol for 'bl' instruction. For such symbols, objtool is throwing unannotated intra-function call warnings in assembly files. Fix these warnings by annotating those symbols with SYM_FUNC_START_LOCAL and SYM_FUNC_END macros, inorder to set symbol type to STT_FUNC and symbol size accordingly. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-4-sv@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 4 +++- arch/powerpc/kernel/head_64.S | 7 +++++-- arch/powerpc/kernel/misc_64.S | 4 +++- arch/powerpc/kernel/vector.S | 4 +++- arch/powerpc/kvm/book3s_hv_interrupts.S | 4 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 22 +++++++++++++++------- 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5381a43e50fe..77201ad9f329 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -3112,7 +3113,7 @@ _GLOBAL(enable_machine_check) blr /* MSR[RI] should be clear because this uses SRR[01] */ -disable_machine_check: +SYM_FUNC_START_LOCAL(disable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 @@ -3125,3 +3126,4 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr +SYM_FUNC_END(disable_machine_check) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dedcc6fe2263..874efd25cc45 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -18,6 +18,7 @@ * variants. */ +#include #include #include #include @@ -462,7 +463,7 @@ generic_secondary_common_init: * Assumes we're mapped EA == RA if the MMU is on. */ #ifdef CONFIG_PPC_BOOK3S -__mmu_off: +SYM_FUNC_START_LOCAL(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -473,6 +474,7 @@ __mmu_off: sync rfid b . /* prevent speculative execution */ +SYM_FUNC_END(__mmu_off) #endif @@ -869,7 +871,7 @@ _GLOBAL(start_secondary_resume) /* * This subroutine clobbers r11 and r12 */ -enable_64b_mode: +SYM_FUNC_START_LOCAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ #ifdef CONFIG_PPC_BOOK3E_64 oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ @@ -881,6 +883,7 @@ enable_64b_mode: isync #endif blr +SYM_FUNC_END(enable_64b_mode) /* * This puts the TOC pointer into r2, offset by 0x8000 (as expected diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b..c61a7ba446a8 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include #include #include #include @@ -353,7 +354,7 @@ _GLOBAL(kexec_smp_wait) * * don't overwrite r3 here, it is live for kexec_wait above. */ -real_mode: /* assume normal blr return */ +SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */ #ifdef CONFIG_PPC_BOOK3E_64 /* Create an identity mapping. */ b kexec_create_tlb @@ -370,6 +371,7 @@ real_mode: /* assume normal blr return */ mtspr SPRN_SRR0,r11 rfid #endif +SYM_FUNC_END(real_mode) /* * kexec_sequence(newstack, start, image, control, clear_all(), diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 5cf64740edb8..ffe5d90abe17 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -185,7 +186,7 @@ fphalf: * Internal routine to enable floating point and set FPSCR to 0. * Don't call it from C; it doesn't use the normal calling convention. */ -fpenable: +SYM_FUNC_START_LOCAL(fpenable) #ifdef CONFIG_PPC32 stwu r1,-64(r1) #else @@ -202,6 +203,7 @@ fpenable: mffs fr31 MTFSF_L(fr1) blr +SYM_FUNC_END(fpenable) fpdisable: mtlr r12 diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 59d89e4b154a..c0deeea7eef3 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -9,6 +9,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -107,7 +108,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * void kvmhv_save_host_pmu(void) */ -kvmhv_save_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu) BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -154,3 +155,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, HSTATE_PMC5(r13) stw r9, HSTATE_PMC6(r13) 31: blr +SYM_FUNC_END(kvmhv_save_host_pmu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98..a69d36cbf43b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -10,6 +10,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -2358,7 +2359,7 @@ hmi_realmode: * This routine calls kvmppc_read_intr, a C function, if an external * interrupt is pending. */ -kvmppc_check_wake_reason: +SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason) mfspr r6, SPRN_SRR1 BEGIN_FTR_SECTION rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */ @@ -2427,6 +2428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) addi r1, r1, PPC_MIN_STKFRM mtlr r0 blr +SYM_FUNC_END(kvmppc_check_wake_reason) /* * Save away FP, VMX and VSX registers. @@ -2434,7 +2436,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_save_fp: +SYM_FUNC_START_LOCAL(kvmppc_save_fp) mflr r30 mr r31,r3 mfmsr r5 @@ -2462,6 +2464,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) stw r6,VCPU_VRSAVE(r31) mtlr r30 blr +SYM_FUNC_END(kvmppc_save_fp) /* * Load up FP, VMX and VSX registers @@ -2469,7 +2472,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_load_fp: +SYM_FUNC_START_LOCAL(kvmppc_load_fp) mflr r30 mr r31,r4 mfmsr r9 @@ -2498,6 +2501,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r30 mr r4,r31 blr +SYM_FUNC_END(kvmppc_load_fp) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* @@ -2746,7 +2750,7 @@ kvmppc_bad_host_intr: * r9 has a vcpu pointer (in) * r0 is used as a scratch register */ -kvmppc_msr_interrupt: +SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt) rldicl r0, r11, 64 - MSR_TS_S_LG, 62 cmpwi r0, 2 /* Check if we are in transactional state.. */ ld r11, VCPU_INTR_MSR(r9) @@ -2755,13 +2759,14 @@ kvmppc_msr_interrupt: li r0, 1 1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG blr +SYM_FUNC_END(kvmppc_msr_interrupt) /* * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) * * Load up guest PMU state. R3 points to the vcpu struct. */ -kvmhv_load_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu) mr r4, r3 mflr r0 li r3, 1 @@ -2811,13 +2816,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 blr +SYM_FUNC_END(kvmhv_load_guest_pmu) /* * void kvmhv_load_host_pmu(void) * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -kvmhv_load_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu) mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2859,6 +2865,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 23: blr +SYM_FUNC_END(kvmhv_load_host_pmu) /* * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) @@ -2866,7 +2873,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -kvmhv_save_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu) mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2942,6 +2949,7 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCRS, r4 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr +SYM_FUNC_END(kvmhv_save_guest_pmu) /* * This works around a hardware bug on POWER8E processors, where From 8d0c21b50655bfe136a76cf384495ba1f9c87224 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:42 +0530 Subject: [PATCH 1509/4122] powerpc: Curb objtool unannotated intra-function call warnings objtool throws the following unannotated intra-function call warnings: arch/powerpc/kernel/entry_64.o: warning: objtool: .text+0x4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xe64: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xee4: unannotated intra-function call Fix these warnings by annotating intra-function calls, using ANNOTATE_INTRA_FUNCTION_CALL macro, to indicate that the branch targets are valid. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-5-sv@linux.ibm.com --- arch/powerpc/kernel/entry_64.S | 2 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3e2e37e6ecab..1bf1121e17f1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -14,6 +14,7 @@ * code, and exception/interrupt return code for PowerPC. */ +#include #include #include #include @@ -73,6 +74,7 @@ flush_branch_caches: // Flush the link stack .rept 64 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr b 1f diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index a69d36cbf43b..96b65b530156 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -1523,12 +1524,14 @@ kvm_flush_link_stack: /* Flush the link stack. On Power8 it's up to 32 entries in size. */ .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr /* And on Power9 it's up to 64. */ BEGIN_FTR_SECTION .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) From 07445ae1c26367928311e13f2a821ae94410da7e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 15 Nov 2022 11:16:24 +0100 Subject: [PATCH 1510/4122] gpiolib: of: change of_find_gpio() to accept device node In preparation of switching all OF-based GPIO lookups to go through of_find_gpio() let's change it to accept device node as its argument as we do not always have access to device structure. Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 7 +++---- drivers/gpio/gpiolib-of.h | 4 ++-- drivers/gpio/gpiolib.c | 5 +++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 4be3c21aa718..596b8e21700e 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -605,7 +605,7 @@ static const of_find_gpio_quirk of_find_gpio_quirks[] = { NULL }; -struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, +struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *flags) { char prop_name[32]; /* 32 is max size of property name */ @@ -623,8 +623,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, snprintf(prop_name, sizeof(prop_name), "%s", gpio_suffixes[i]); - desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, - &of_flags); + desc = of_get_named_gpiod_flags(np, prop_name, idx, &of_flags); if (!gpiod_not_found(desc)) break; @@ -632,7 +631,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, /* Properly named GPIO was not found, try workarounds */ for (q = of_find_gpio_quirks; gpiod_not_found(desc) && *q; q++) - desc = (*q)(dev->of_node, con_id, idx, &of_flags); + desc = (*q)(np, con_id, idx, &of_flags); if (IS_ERR(desc)) return desc; diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h index 2c32a332ede5..bd4131ff61d3 100644 --- a/drivers/gpio/gpiolib-of.h +++ b/drivers/gpio/gpiolib-of.h @@ -7,7 +7,7 @@ struct gpio_chip; enum of_gpio_flags; #ifdef CONFIG_OF_GPIO -struct gpio_desc *of_find_gpio(struct device *dev, +struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *lookupflags); @@ -16,7 +16,7 @@ void of_gpiochip_remove(struct gpio_chip *gc); int of_gpio_get_count(struct device *dev, const char *con_id); void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev); #else -static inline struct gpio_desc *of_find_gpio(struct device *dev, +static inline struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *lookupflags) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 11fb7ec883e9..a80fc8abb03f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -4122,14 +4122,15 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, int ret; /* Maybe we have a device name, maybe not */ const char *devname = dev ? dev_name(dev) : "?"; - const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; + struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id); /* Using device tree? */ if (is_of_node(fwnode)) { dev_dbg(dev, "using device tree for GPIO lookup\n"); - desc = of_find_gpio(dev, con_id, idx, &lookupflags); + desc = of_find_gpio(to_of_node(fwnode), + con_id, idx, &lookupflags); } else if (is_acpi_node(fwnode)) { dev_dbg(dev, "using ACPI for GPIO lookup\n"); desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags); From 2b6bce80ae70b91134a5731d85076042ae90c300 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 11 Nov 2022 14:19:04 -0800 Subject: [PATCH 1511/4122] gpiolib: acpi: change acpi_find_gpio() to accept firmware node In preparation of switching all ACPI-based GPIO lookups to go through acpi_find_gpio() let's change it to accept device node as its argument as we do not always have access to device structure. Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 8 ++++++-- drivers/gpio/gpiolib-acpi.h | 4 ++-- drivers/gpio/gpiolib.c | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a7d2358736fe..61b311e4560f 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -906,18 +906,22 @@ static bool acpi_can_fallback_to_crs(struct acpi_device *adev, return con_id == NULL; } -struct gpio_desc *acpi_find_gpio(struct device *dev, +struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags) { - struct acpi_device *adev = ACPI_COMPANION(dev); + struct acpi_device *adev; struct acpi_gpio_info info; struct gpio_desc *desc; char propname[32]; int i; + adev = to_acpi_device_node(fwnode); + if (!adev) + return ERR_PTR(-ENODEV); + /* Try first from _DSD */ for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { if (con_id) { diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 1ac6816839db..9fc34830639c 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -48,7 +48,7 @@ int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, struct acpi_gpio_info *info); -struct gpio_desc *acpi_find_gpio(struct device *dev, +struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, @@ -83,7 +83,7 @@ acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, } static inline struct gpio_desc * -acpi_find_gpio(struct device *dev, const char *con_id, +acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags) { diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a80fc8abb03f..e874bb0ef685 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -4133,7 +4133,8 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, con_id, idx, &lookupflags); } else if (is_acpi_node(fwnode)) { dev_dbg(dev, "using ACPI for GPIO lookup\n"); - desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags); + desc = acpi_find_gpio(fwnode, + con_id, idx, &flags, &lookupflags); } /* From 16ba046e86e93f42117efe7ca7a7940b83c60afc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 11 Nov 2022 14:19:05 -0800 Subject: [PATCH 1512/4122] gpiolib: acpi: teach acpi_find_gpio() to handle data-only nodes In preparation of switching all ACPI-based GPIO lookups to go through acpi_find_gpio() we need to make sure it can handle data-only ACPI nodes, same as existing acpi_node_get_gpiod(). Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 76 ++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 61b311e4560f..bd36fac20ea0 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -864,8 +864,9 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode, * function only returns the first. */ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev, - const char *propname, int index, - struct acpi_gpio_info *info) + const char *propname, + int index, + struct acpi_gpio_info *info) { struct acpi_gpio_lookup lookup; int ret; @@ -896,6 +897,44 @@ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev, return ret ? ERR_PTR(ret) : lookup.desc; } +/** + * acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node + * @fwnode: pointer to an ACPI firmware node to get the GPIO information from + * @propname: Property name of the GPIO + * @index: index of GpioIo/GpioInt resource (starting from %0) + * @info: info pointer to fill in (optional) + * + * This function uses the property-based GPIO lookup to get to the GPIO + * resource with the relevant information from a data-only ACPI firmware node + * and uses that to obtain the GPIO descriptor to return. + * + * If the GPIO cannot be translated or there is an error an ERR_PTR is + * returned. + */ +static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode, + const char *propname, + int index, + struct acpi_gpio_info *info) +{ + struct acpi_gpio_lookup lookup; + int ret; + + if (!is_acpi_data_node(fwnode)) + return ERR_PTR(-ENODEV); + + if (!propname) + return ERR_PTR(-EINVAL); + + lookup.index = index; + + ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); + if (ret) + return ERR_PTR(ret); + + ret = acpi_gpio_resource_lookup(&lookup, info); + return ret ? ERR_PTR(ret) : lookup.desc; +} + static bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) { @@ -912,16 +951,12 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, enum gpiod_flags *dflags, unsigned long *lookupflags) { - struct acpi_device *adev; + struct acpi_device *adev = to_acpi_device_node(fwnode); struct acpi_gpio_info info; struct gpio_desc *desc; char propname[32]; int i; - adev = to_acpi_device_node(fwnode); - if (!adev) - return ERR_PTR(-ENODEV); - /* Try first from _DSD */ for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { if (con_id) { @@ -932,7 +967,12 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, gpio_suffixes[i]); } - desc = acpi_get_gpiod_by_index(adev, propname, idx, &info); + if (adev) + desc = acpi_get_gpiod_by_index(adev, + propname, idx, &info); + else + desc = acpi_get_gpiod_from_data(fwnode, + propname, idx, &info); if (!IS_ERR(desc)) break; if (PTR_ERR(desc) == -EPROBE_DEFER) @@ -941,7 +981,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, /* Then from plain _CRS GPIOs */ if (IS_ERR(desc)) { - if (!acpi_can_fallback_to_crs(adev, con_id)) + if (!adev || !acpi_can_fallback_to_crs(adev, con_id)) return ERR_PTR(-ENOENT); desc = acpi_get_gpiod_by_index(adev, NULL, idx, &info); @@ -979,29 +1019,13 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, int index, struct acpi_gpio_info *info) { - struct acpi_gpio_lookup lookup; struct acpi_device *adev; - int ret; adev = to_acpi_device_node(fwnode); if (adev) return acpi_get_gpiod_by_index(adev, propname, index, info); - if (!is_acpi_data_node(fwnode)) - return ERR_PTR(-ENODEV); - - if (!propname) - return ERR_PTR(-EINVAL); - - memset(&lookup, 0, sizeof(lookup)); - lookup.index = index; - - ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); - if (ret) - return ERR_PTR(ret); - - ret = acpi_gpio_resource_lookup(&lookup, info); - return ret ? ERR_PTR(ret) : lookup.desc; + return acpi_get_gpiod_from_data(fwnode, propname, index, info); } /** From b7452d670fdef8974e18754342fe6f68e20c2567 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 15 Nov 2022 11:20:47 +0100 Subject: [PATCH 1513/4122] gpiolib: acpi: avoid leaking ACPI details into upper gpiolib layers There is no need for the generic parts of GPIOLIB to be aware of implementation details of ACPI-bases lookups. Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 51 +++++++++++++++++++++++++++++++------ drivers/gpio/gpiolib-acpi.h | 46 +++------------------------------ drivers/gpio/gpiolib.c | 8 ++---- 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index bd36fac20ea0..1d69b707cbb1 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -89,6 +89,30 @@ struct acpi_gpio_chip { struct list_head deferred_req_irqs_list_entry; }; +/** + * struct acpi_gpio_info - ACPI GPIO specific information + * @adev: reference to ACPI device which consumes GPIO resource + * @flags: GPIO initialization flags + * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo + * @pin_config: pin bias as provided by ACPI + * @polarity: interrupt polarity as provided by ACPI + * @triggering: triggering type as provided by ACPI + * @wake_capable: wake capability as provided by ACPI + * @debounce: debounce timeout as provided by ACPI + * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping + */ +struct acpi_gpio_info { + struct acpi_device *adev; + enum gpiod_flags flags; + bool gpioint; + int pin_config; + int polarity; + int triggering; + bool wake_capable; + unsigned int debounce; + unsigned int quirks; +}; + /* * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a @@ -670,8 +694,8 @@ __acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update) return ret; } -int -acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info) +static int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, + struct acpi_gpio_info *info) { struct device *dev = &info->adev->dev; enum gpiod_flags old = *flags; @@ -690,8 +714,8 @@ acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *inf return ret; } -int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info) +static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, + struct acpi_gpio_info *info) { switch (info->pin_config) { case ACPI_PIN_CONFIG_PULLUP: @@ -1005,7 +1029,8 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, * @fwnode: pointer to an ACPI firmware node to get the GPIO information from * @propname: Property name of the GPIO * @index: index of GpioIo/GpioInt resource (starting from %0) - * @info: info pointer to fill in (optional) + * @lflags: bitmask of gpio_lookup_flags GPIO_* values + * @dflags: gpiod initialization flags * * If @fwnode is an ACPI device object, call acpi_get_gpiod_by_index() for it. * Otherwise (i.e. it is a data-only non-device object), use the property-based @@ -1017,15 +1042,25 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, */ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, int index, - struct acpi_gpio_info *info) + unsigned long *lflags, + enum gpiod_flags *dflags) { + struct acpi_gpio_info info; struct acpi_device *adev; + struct gpio_desc *desc; adev = to_acpi_device_node(fwnode); if (adev) - return acpi_get_gpiod_by_index(adev, propname, index, info); + desc = acpi_get_gpiod_by_index(adev, propname, index, &info); + else + desc = acpi_get_gpiod_from_data(fwnode, propname, index, &info); - return acpi_get_gpiod_from_data(fwnode, propname, index, info); + if (!IS_ERR(desc)) { + acpi_gpio_update_gpiod_flags(dflags, &info); + acpi_gpio_update_gpiod_lookup_flags(lflags, &info); + } + + return desc; } /** diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 9fc34830639c..42adaab518c5 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -10,30 +10,6 @@ struct acpi_device; -/** - * struct acpi_gpio_info - ACPI GPIO specific information - * @adev: reference to ACPI device which consumes GPIO resource - * @flags: GPIO initialization flags - * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo - * @pin_config: pin bias as provided by ACPI - * @polarity: interrupt polarity as provided by ACPI - * @triggering: triggering type as provided by ACPI - * @wake_capable: wake capability as provided by ACPI - * @debounce: debounce timeout as provided by ACPI - * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping - */ -struct acpi_gpio_info { - struct acpi_device *adev; - enum gpiod_flags flags; - bool gpioint; - int pin_config; - int polarity; - int triggering; - bool wake_capable; - unsigned int debounce; - unsigned int quirks; -}; - #ifdef CONFIG_ACPI void acpi_gpiochip_add(struct gpio_chip *chip); void acpi_gpiochip_remove(struct gpio_chip *chip); @@ -43,11 +19,6 @@ void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev); void acpi_gpiochip_request_interrupts(struct gpio_chip *chip); void acpi_gpiochip_free_interrupts(struct gpio_chip *chip); -int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, - struct acpi_gpio_info *info); -int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info); - struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, @@ -55,7 +26,8 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, unsigned long *lookupflags); struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, int index, - struct acpi_gpio_info *info); + unsigned long *lflags, + enum gpiod_flags *dflags); int acpi_gpio_count(struct device *dev, const char *con_id); #else @@ -70,18 +42,6 @@ acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { } static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { } -static inline int -acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info) -{ - return 0; -} -static inline int -acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info) -{ - return 0; -} - static inline struct gpio_desc * acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, @@ -91,7 +51,7 @@ acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, } static inline struct gpio_desc * acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, - int index, struct acpi_gpio_info *info) + int index, unsigned long *lflags, enum gpiod_flags *dflags) { return ERR_PTR(-ENXIO); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e874bb0ef685..b213f5f93ae0 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3890,14 +3890,10 @@ static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, label); return desc; } else if (is_acpi_node(fwnode)) { - struct acpi_gpio_info info; - - desc = acpi_node_get_gpiod(fwnode, propname, index, &info); + desc = acpi_node_get_gpiod(fwnode, propname, index, + &lflags, &dflags); if (IS_ERR(desc)) return desc; - - acpi_gpio_update_gpiod_flags(&dflags, &info); - acpi_gpio_update_gpiod_lookup_flags(&lflags, &info); } else { return ERR_PTR(-EINVAL); } From 8eb1f71e7acca4f92cf9cf83030cbb8ec2524025 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 11 Nov 2022 14:19:07 -0800 Subject: [PATCH 1514/4122] gpiolib: consolidate GPIO lookups Ensure that all paths to obtain/look up GPIOD from generic consumer-visible APIs go through the new gpiod_find_and_request() helper, so that we can easily extend it with support for new firmware mechanisms. The only exception is OF-specific [devm_]gpiod_get_from_of_node() API that is still being used by a couple of drivers and will be removed as soon as patches converting them to use generic fwnode/device APIs are accepted. Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 39 ------- drivers/gpio/gpiolib-acpi.h | 10 -- drivers/gpio/gpiolib.c | 206 ++++++++++++++---------------------- 3 files changed, 77 insertions(+), 178 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 1d69b707cbb1..c99c94e5483f 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1024,45 +1024,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, return desc; } -/** - * acpi_node_get_gpiod() - get a GPIO descriptor from ACPI resources - * @fwnode: pointer to an ACPI firmware node to get the GPIO information from - * @propname: Property name of the GPIO - * @index: index of GpioIo/GpioInt resource (starting from %0) - * @lflags: bitmask of gpio_lookup_flags GPIO_* values - * @dflags: gpiod initialization flags - * - * If @fwnode is an ACPI device object, call acpi_get_gpiod_by_index() for it. - * Otherwise (i.e. it is a data-only non-device object), use the property-based - * GPIO lookup to get to the GPIO resource with the relevant information and use - * that to obtain the GPIO descriptor to return. - * - * If the GPIO cannot be translated or there is an error an ERR_PTR is - * returned. - */ -struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - unsigned long *lflags, - enum gpiod_flags *dflags) -{ - struct acpi_gpio_info info; - struct acpi_device *adev; - struct gpio_desc *desc; - - adev = to_acpi_device_node(fwnode); - if (adev) - desc = acpi_get_gpiod_by_index(adev, propname, index, &info); - else - desc = acpi_get_gpiod_from_data(fwnode, propname, index, &info); - - if (!IS_ERR(desc)) { - acpi_gpio_update_gpiod_flags(dflags, &info); - acpi_gpio_update_gpiod_lookup_flags(lflags, &info); - } - - return desc; -} - /** * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 42adaab518c5..d2b5dab5c5bf 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -24,10 +24,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags); -struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - unsigned long *lflags, - enum gpiod_flags *dflags); int acpi_gpio_count(struct device *dev, const char *con_id); #else @@ -49,12 +45,6 @@ acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, { return ERR_PTR(-ENOENT); } -static inline struct gpio_desc * -acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, - int index, unsigned long *lflags, enum gpiod_flags *dflags) -{ - return ERR_PTR(-ENXIO); -} static inline int acpi_gpio_count(struct device *dev, const char *con_id) { return -ENODEV; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b213f5f93ae0..7f739096c4cf 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -366,7 +366,7 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc) static int devprop_gpiochip_set_names(struct gpio_chip *chip) { struct gpio_device *gdev = chip->gpiodev; - struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); + const struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); const char **names; int ret, i; int count; @@ -3853,58 +3853,84 @@ static int platform_gpio_count(struct device *dev, const char *con_id) return count; } -/** - * fwnode_get_named_gpiod - obtain a GPIO from firmware node - * @fwnode: handle of the firmware node - * @propname: name of the firmware property representing the GPIO - * @index: index of the GPIO to obtain for the consumer - * @dflags: GPIO initialization flags - * @label: label to attach to the requested GPIO - * - * This function can be used for drivers that get their configuration - * from opaque firmware. - * - * The function properly finds the corresponding GPIO using whatever is the - * underlying firmware interface and then makes sure that the GPIO - * descriptor is requested before it is returned to the caller. - * - * Returns: - * On successful request the GPIO pin is configured in accordance with - * provided @dflags. - * - * In case of error an ERR_PTR() is returned. - */ -static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) +static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, + struct device *consumer, + const char *con_id, + unsigned int idx, + enum gpiod_flags *flags, + unsigned long *lookupflags) { - unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT; - struct gpio_desc *desc = ERR_PTR(-ENODEV); - int ret; + struct gpio_desc *desc = ERR_PTR(-ENOENT); if (is_of_node(fwnode)) { - desc = gpiod_get_from_of_node(to_of_node(fwnode), - propname, index, - dflags, - label); - return desc; + dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags); } else if (is_acpi_node(fwnode)) { - desc = acpi_node_get_gpiod(fwnode, propname, index, - &lflags, &dflags); - if (IS_ERR(desc)) - return desc; - } else { - return ERR_PTR(-EINVAL); + dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); } - /* Currently only ACPI takes this path */ - ret = gpiod_request(desc, label); - if (ret) - return ERR_PTR(ret); + return desc; +} - ret = gpiod_configure_flags(desc, propname, lflags, dflags); +static struct gpio_desc *gpiod_find_and_request(struct device *consumer, + struct fwnode_handle *fwnode, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags, + const char *label, + bool platform_lookup_allowed) +{ + struct gpio_desc *desc = ERR_PTR(-ENOENT); + unsigned long lookupflags; + int ret; + + if (!IS_ERR_OR_NULL(fwnode)) + desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx, + &flags, &lookupflags); + + if (gpiod_not_found(desc) && platform_lookup_allowed) { + /* + * Either we are not using DT or ACPI, or their lookup did not + * return a result. In that case, use platform lookup as a + * fallback. + */ + dev_dbg(consumer, "using lookup tables for GPIO lookup\n"); + desc = gpiod_find(consumer, con_id, idx, &lookupflags); + } + + if (IS_ERR(desc)) { + dev_dbg(consumer, "No GPIO consumer %s found\n", con_id); + return desc; + } + + /* + * If a connection label was passed use that, else attempt to use + * the device name as label + */ + ret = gpiod_request(desc, label); + if (ret) { + if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) + return ERR_PTR(ret); + + /* + * This happens when there are several consumers for + * the same GPIO line: we just return here without + * further initialization. It is a bit of a hack. + * This is necessary to support fixed regulators. + * + * FIXME: Make this more sane and safe. + */ + dev_info(consumer, + "nonexclusive access to GPIO for %s\n", con_id); + return desc; + } + + ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (ret < 0) { + dev_dbg(consumer, "setup of GPIO %s failed\n", con_id); gpiod_put(desc); return ERR_PTR(ret); } @@ -3937,29 +3963,12 @@ static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, * In case of error an ERR_PTR() is returned. */ struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, - const char *con_id, int index, + const char *con_id, + int index, enum gpiod_flags flags, const char *label) { - struct gpio_desc *desc; - char prop_name[32]; /* 32 is max size of property name */ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { - if (con_id) - snprintf(prop_name, sizeof(prop_name), "%s-%s", - con_id, gpio_suffixes[i]); - else - snprintf(prop_name, sizeof(prop_name), "%s", - gpio_suffixes[i]); - - desc = fwnode_get_named_gpiod(fwnode, prop_name, index, flags, - label); - if (!gpiod_not_found(desc)) - break; - } - - return desc; + return gpiod_find_and_request(NULL, fwnode, con_id, index, flags, label, false); } EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index); @@ -4113,72 +4122,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, unsigned int idx, enum gpiod_flags flags) { - unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; - struct gpio_desc *desc = NULL; - int ret; - /* Maybe we have a device name, maybe not */ - const char *devname = dev ? dev_name(dev) : "?"; struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; + const char *devname = dev ? dev_name(dev) : "?"; + const char *label = con_id ?: devname; - dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id); - - /* Using device tree? */ - if (is_of_node(fwnode)) { - dev_dbg(dev, "using device tree for GPIO lookup\n"); - desc = of_find_gpio(to_of_node(fwnode), - con_id, idx, &lookupflags); - } else if (is_acpi_node(fwnode)) { - dev_dbg(dev, "using ACPI for GPIO lookup\n"); - desc = acpi_find_gpio(fwnode, - con_id, idx, &flags, &lookupflags); - } - - /* - * Either we are not using DT or ACPI, or their lookup did not return - * a result. In that case, use platform lookup as a fallback. - */ - if (!desc || gpiod_not_found(desc)) { - dev_dbg(dev, "using lookup tables for GPIO lookup\n"); - desc = gpiod_find(dev, con_id, idx, &lookupflags); - } - - if (IS_ERR(desc)) { - dev_dbg(dev, "No GPIO consumer %s found\n", con_id); - return desc; - } - - /* - * If a connection label was passed use that, else attempt to use - * the device name as label - */ - ret = gpiod_request(desc, con_id ?: devname); - if (ret) { - if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) - return ERR_PTR(ret); - - /* - * This happens when there are several consumers for - * the same GPIO line: we just return here without - * further initialization. It is a bit of a hack. - * This is necessary to support fixed regulators. - * - * FIXME: Make this more sane and safe. - */ - dev_info(dev, "nonexclusive access to GPIO for %s\n", con_id ?: devname); - return desc; - } - - ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); - if (ret < 0) { - dev_dbg(dev, "setup of GPIO %s failed\n", con_id); - gpiod_put(desc); - return ERR_PTR(ret); - } - - blocking_notifier_call_chain(&desc->gdev->notifier, - GPIOLINE_CHANGED_REQUESTED, desc); - - return desc; + return gpiod_find_and_request(dev, fwnode, con_id, idx, flags, label, true); } EXPORT_SYMBOL_GPL(gpiod_get_index); From e7f9ff5dc90c3826231343439c35c6b7e9e57378 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 11 Nov 2022 14:19:08 -0800 Subject: [PATCH 1515/4122] gpiolib: add support for software nodes Now that static device properties understand notion of child nodes and references, let's teach gpiolib to handle them: - GPIOs are represented as a references to software nodes representing gpiochip - references must have 2 arguments - GPIO number within the chip and GPIO flags (GPIO_ACTIVE_LOW/GPIO_ACTIVE_HIGH, etc) - a new PROPERTY_ENTRY_GPIO() macro is supplied to ensure the above - name of the software node representing gpiochip must match label of the gpiochip, as we use it to locate gpiochip structure at runtime The following illustrates use of software nodes to describe a "System" button that is currently specified via use of gpio_keys_platform_data in arch/mips/alchemy/board-mtx1.c. It follows bindings specified in Documentation/devicetree/bindings/input/gpio-keys.yaml. static const struct software_node mxt1_gpiochip2_node = { .name = "alchemy-gpio2", }; static const struct property_entry mtx1_gpio_button_props[] = { PROPERTY_ENTRY_U32("linux,code", BTN_0), PROPERTY_ENTRY_STRING("label", "System button"), PROPERTY_ENTRY_GPIO("gpios", &mxt1_gpiochip2_node, 7, GPIO_ACTIVE_LOW), { } }; Similarly, arch/arm/mach-tegra/board-paz00.c can be converted to: static const struct software_node tegra_gpiochip_node = { .name = "tegra-gpio", }; static struct property_entry wifi_rfkill_prop[] __initdata = { PROPERTY_ENTRY_STRING("name", "wifi_rfkill"), PROPERTY_ENTRY_STRING("type", "wlan"), PROPERTY_ENTRY_GPIO("reset-gpios", &tegra_gpiochip_node, 25, GPIO_ACTIVE_HIGH); PROPERTY_ENTRY_GPIO("shutdown-gpios", &tegra_gpiochip_node, 85, GPIO_ACTIVE_HIGH); { }, }; static struct platform_device wifi_rfkill_device = { .name = "rfkill_gpio", .id = -1, }; ... software_node_register(&tegra_gpiochip_node); device_create_managed_software_node(&wifi_rfkill_device.dev, wifi_rfkill_prop, NULL); Acked-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Makefile | 1 + drivers/gpio/gpiolib-swnode.c | 123 ++++++++++++++++++++++++++++++++++ drivers/gpio/gpiolib-swnode.h | 14 ++++ drivers/gpio/gpiolib.c | 7 ++ include/linux/gpio/property.h | 11 +++ 5 files changed, 156 insertions(+) create mode 100644 drivers/gpio/gpiolib-swnode.c create mode 100644 drivers/gpio/gpiolib-swnode.h create mode 100644 include/linux/gpio/property.h diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 8629e9eaf79e..010587025fc8 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_OF_GPIO) += gpiolib-of.o obj-$(CONFIG_GPIO_CDEV) += gpiolib-cdev.o obj-$(CONFIG_GPIO_SYSFS) += gpiolib-sysfs.o obj-$(CONFIG_GPIO_ACPI) += gpiolib-acpi.o +obj-$(CONFIG_GPIOLIB) += gpiolib-swnode.o # Device drivers. Generally keep list sorted alphabetically obj-$(CONFIG_GPIO_REGMAP) += gpio-regmap.o diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c new file mode 100644 index 000000000000..dd9ccac214d1 --- /dev/null +++ b/drivers/gpio/gpiolib-swnode.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Software Node helpers for the GPIO API + * + * Copyright 2022 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gpiolib.h" +#include "gpiolib-swnode.h" + +static void swnode_format_propname(const char *con_id, char *propname, + size_t max_size) +{ + /* + * Note we do not need to try both -gpios and -gpio suffixes, + * as, unlike OF and ACPI, we can fix software nodes to conform + * to the proper binding. + */ + if (con_id) + snprintf(propname, max_size, "%s-gpios", con_id); + else + strscpy(propname, "gpios", max_size); +} + +static int swnode_gpiochip_match_name(struct gpio_chip *chip, void *data) +{ + return !strcmp(chip->label, data); +} + +static struct gpio_chip *swnode_get_chip(struct fwnode_handle *fwnode) +{ + const struct software_node *chip_node; + struct gpio_chip *chip; + + chip_node = to_software_node(fwnode); + if (!chip_node || !chip_node->name) + return ERR_PTR(-EINVAL); + + chip = gpiochip_find((void *)chip_node->name, swnode_gpiochip_match_name); + return chip ?: ERR_PTR(-EPROBE_DEFER); +} + +struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode, + const char *con_id, unsigned int idx, + unsigned long *flags) +{ + const struct software_node *swnode; + struct fwnode_reference_args args; + struct gpio_chip *chip; + struct gpio_desc *desc; + char propname[32]; /* 32 is max size of property name */ + int error; + + swnode = to_software_node(fwnode); + if (!swnode) + return ERR_PTR(-EINVAL); + + swnode_format_propname(con_id, propname, sizeof(propname)); + + /* + * We expect all swnode-described GPIOs have GPIO number and + * polarity arguments, hence nargs is set to 2. + */ + error = fwnode_property_get_reference_args(fwnode, propname, NULL, 2, idx, &args); + if (error) { + pr_debug("%s: can't parse '%s' property of node '%pfwP[%d]'\n", + __func__, propname, fwnode, idx); + return ERR_PTR(error); + } + + chip = swnode_get_chip(args.fwnode); + fwnode_handle_put(args.fwnode); + if (IS_ERR(chip)) + return ERR_CAST(chip); + + desc = gpiochip_get_desc(chip, args.args[0]); + *flags = args.args[1]; /* We expect native GPIO flags */ + + pr_debug("%s: parsed '%s' property of node '%pfwP[%d]' - status (%d)\n", + __func__, propname, fwnode, idx, PTR_ERR_OR_ZERO(desc)); + + return desc; +} + +/** + * swnode_gpio_count - count the GPIOs associated with a device / function + * @fwnode: firmware node of the GPIO consumer, can be %NULL for + * system-global GPIOs + * @con_id: function within the GPIO consumer + * + * Return: + * The number of GPIOs associated with a device / function or %-ENOENT, + * if no GPIO has been assigned to the requested function. + */ +int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id) +{ + struct fwnode_reference_args args; + char propname[32]; + int count; + + swnode_format_propname(con_id, propname, sizeof(propname)); + + /* + * This is not very efficient, but GPIO lists usually have only + * 1 or 2 entries. + */ + count = 0; + while (fwnode_property_get_reference_args(fwnode, propname, NULL, 0, + count, &args) == 0) { + fwnode_handle_put(args.fwnode); + count++; + } + + return count ?: -ENOENT; +} diff --git a/drivers/gpio/gpiolib-swnode.h b/drivers/gpio/gpiolib-swnode.h new file mode 100644 index 000000000000..af849e56f6bc --- /dev/null +++ b/drivers/gpio/gpiolib-swnode.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef GPIOLIB_SWNODE_H +#define GPIOLIB_SWNODE_H + +struct fwnode_handle; +struct gpio_desc; + +struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode, + const char *con_id, unsigned int idx, + unsigned long *flags); +int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id); + +#endif /* GPIOLIB_SWNODE_H */ diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7f739096c4cf..7936d54a2e30 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -26,6 +26,7 @@ #include "gpiolib.h" #include "gpiolib-of.h" #include "gpiolib-acpi.h" +#include "gpiolib-swnode.h" #include "gpiolib-cdev.h" #include "gpiolib-sysfs.h" @@ -3870,6 +3871,10 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, con_id); desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); + } else if (is_software_node(fwnode)) { + dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags); } return desc; @@ -3987,6 +3992,8 @@ int gpiod_count(struct device *dev, const char *con_id) count = of_gpio_get_count(dev, con_id); else if (is_acpi_node(fwnode)) count = acpi_gpio_count(dev, con_id); + else if (is_software_node(fwnode)) + count = swnode_gpio_count(fwnode, con_id); if (count < 0) count = platform_gpio_count(dev, con_id); diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h new file mode 100644 index 000000000000..6c75c8bd44a0 --- /dev/null +++ b/include/linux/gpio/property.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ +#ifndef __LINUX_GPIO_PROPERTY_H +#define __LINUX_GPIO_PROPERTY_H + +#include /* for GPIO_* flags */ +#include + +#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ + PROPERTY_ENTRY_REF(_name_, _chip_node_, _idx_, _flags_) + +#endif /* __LINUX_GPIO_PROPERTY_H */ From 77289b2f5aa3535a2e49b448c6afb36f5526016a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 Nov 2022 20:46:25 +0200 Subject: [PATCH 1516/4122] gpiolib: of: Prepare of_mm_gpiochip_add_data() for fwnode GPIO library is getting rid of of_node, fwnode should be utilized instead. Prepare of_mm_gpiochip_add_data() for fwnode. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Reviewed-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 596b8e21700e..c9b0c9fdeca8 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -934,8 +934,8 @@ int of_mm_gpiochip_add_data(struct device_node *np, if (mm_gc->save_regs) mm_gc->save_regs(mm_gc); - of_node_put(mm_gc->gc.of_node); - mm_gc->gc.of_node = of_node_get(np); + fwnode_handle_put(mm_gc->gc.fwnode); + mm_gc->gc.fwnode = fwnode_handle_get(of_fwnode_handle(np)); ret = gpiochip_add_data(gc, data); if (ret) From a431803852de00d8d3c143b19f5690254225538f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 Nov 2022 20:46:26 +0200 Subject: [PATCH 1517/4122] gpiolib: of: Drop redundant check in of_mm_gpiochip_remove() The callers never call the function with invalid pointer. Moreover, compiler quite likely dropped that check anyway because we use that pointer before the check. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Reviewed-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index c9b0c9fdeca8..a6871859a59d 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -961,9 +961,6 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc) { struct gpio_chip *gc = &mm_gc->gc; - if (!mm_gc) - return; - gpiochip_remove(gc); iounmap(mm_gc->regs); kfree(gc->label); From f87f6e5b4539639460ab105e597e5190c9b2500f Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Fri, 4 Nov 2022 06:21:21 +0800 Subject: [PATCH 1518/4122] iommu/arm-smmu: Warn once when the perfetcher errata patch fails to apply Default reset value of secure banked register SMMU_sACR.cache_lock is 1. If it is not been set to 0 by secure software(eg: atf), the non-secure linux cannot clear ARM_MMU500_ACTLR_CPRE bit. In this situation, the prefetcher errata is not applied successfully, warn once. Signed-off-by: Chen Lin Link: https://lore.kernel.org/r/20221103222121.3051-1-chen45464546@163.com [will: Tweaked wording of diagnostic] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 658f3cc83278..9dc772f2cbb2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -136,6 +136,9 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); + reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); + if (reg & ARM_MMU500_ACTLR_CPRE) + dev_warn_once(smmu->dev, "Failed to disable prefetcher [errata #841119 and #826419], check ACR.CACHE_LOCK\n"); } return 0; From e5f4afbe395f1248e7501d470118a2a947fe87e6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:14 -0800 Subject: [PATCH 1519/4122] perf pmu: Remove mostly unused 'struct perf_pmu' 'is_hybrid' member Replace usage with perf_pmu__is_hybrid(). Suggested-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +---- tools/perf/util/evsel.h | 2 +- tools/perf/util/pmu.c | 3 +-- tools/perf/util/pmu.h | 1 - tools/perf/util/stat.c | 11 +++-------- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdde5b5f8ad2..ca6abb64c91d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3124,11 +3124,8 @@ void evsel__zero_per_pkg(struct evsel *evsel) } } -bool evsel__is_hybrid(struct evsel *evsel) +bool evsel__is_hybrid(const struct evsel *evsel) { - if (evsel->pmu) - return evsel->pmu->is_hybrid; - return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 989865e16aad..467bb0b32fef 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -498,7 +498,7 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); void evsel__zero_per_pkg(struct evsel *evsel); -bool evsel__is_hybrid(struct evsel *evsel); +bool evsel__is_hybrid(const struct evsel *evsel); struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6a86e6af0903..48e7be6f3baa 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -980,7 +980,6 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->is_hybrid = is_hybrid; pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -992,7 +991,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); - if (pmu->is_hybrid) + if (is_hybrid) list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 68e15c38ae71..0d556d02ce52 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -40,7 +40,6 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; - bool is_hybrid; bool auxtrace; int max_precise; struct perf_event_attr *default_config; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 3a432a949d46..acf0edf5fdd1 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -604,15 +604,10 @@ static void evsel__merge_aliases(struct evsel *evsel) } } -static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config) +static bool evsel__should_merge_hybrid(const struct evsel *evsel, + const struct perf_stat_config *config) { - struct perf_pmu *pmu; - - if (!config->hybrid_merge) - return false; - - pmu = evsel__find_pmu(evsel); - return pmu && pmu->is_hybrid; + return config->hybrid_merge && evsel__is_hybrid(evsel); } static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) From fe13d43d07393f46957c2fc09f09f097f7969a5d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:15 -0800 Subject: [PATCH 1520/4122] perf pmu: Add data structure documentation Add documentation to 'struct perf_pmu' and the associated structs of 'perf_pmu_alias' and 'perf_pmu_format'. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 16 ++++++ tools/perf/util/pmu.h | 122 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 132 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 48e7be6f3baa..057e1528c32f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -31,10 +31,26 @@ struct perf_pmu perf_pmu__fake; +/** + * struct perf_pmu_format - Values from a format file read from + * /devices/cpu/format/ held in struct perf_pmu. + * + * For example, the contents of /devices/cpu/format/event may be + * "config:0-7" and will be represented here as name="event", + * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set. + */ struct perf_pmu_format { + /** @name: The modifier/file name. */ char *name; + /** + * @value : Which config value the format relates to. Supported values + * are from PERF_PMU_FORMAT_VALUE_CONFIG to + * PERF_PMU_FORMAT_VALUE_CONFIG_END. + */ int value; + /** @bits: Which config bits are set by this format value. */ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + /** @list: Element on list within struct perf_pmu. */ struct list_head list; }; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 0d556d02ce52..ee02e1ef9187 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -33,30 +33,101 @@ struct perf_pmu_caps { struct list_head list; }; +/** + * struct perf_pmu - hi + */ struct perf_pmu { + /** @name: The name of the PMU such as "cpu". */ char *name; + /** + * @alias_name: Optional alternate name for the PMU determined in + * architecture specific code. + */ char *alias_name; + /** + * @id: Optional PMU identifier read from + * /bus/event_source/devices//identifier. + */ char *id; + /** + * @type: Perf event attributed type value, read from + * /bus/event_source/devices//type. + */ __u32 type; + /** + * @selectable: Can the PMU name be selected as if it were an event? + */ bool selectable; + /** + * @is_uncore: Is the PMU not within the CPU core? Determined by the + * presence of /bus/event_source/devices//cpumask. + */ bool is_uncore; + /** + * @auxtrace: Are events auxiliary events? Determined in architecture + * specific code. + */ bool auxtrace; + /** + * @max_precise: Number of levels of :ppp precision supported by the + * PMU, read from + * /bus/event_source/devices//caps/max_precise. + */ int max_precise; + /** + * @default_config: Optional default perf_event_attr determined in + * architecture specific code. + */ struct perf_event_attr *default_config; + /** + * @cpus: Empty or the contents of either of: + * /bus/event_source/devices//cpumask. + * /bus/event_source/devices//cpus. + */ struct perf_cpu_map *cpus; - struct list_head format; /* HEAD struct perf_pmu_format -> list */ - struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + /** + * @format: Holds the contents of files read from + * /bus/event_source/devices//format/. The contents specify + * which event parameter changes what config, config1 or config2 bits. + */ + struct list_head format; + /** + * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an + * event read from /bus/event_source/devices//events/ or + * from json events in pmu-events.c. + */ + struct list_head aliases; + /** @caps_initialized: Has the list caps been initialized? */ bool caps_initialized; + /** @nr_caps: The length of the list caps. */ u32 nr_caps; - struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ - struct list_head list; /* ELEM */ + /** + * @caps: Holds the contents of files read from + * /bus/event_source/devices//caps/. + * + * The contents are pairs of the filename with the value of its + * contents, for example, max_precise (see above) may have a value of 3. + */ + struct list_head caps; + /** @list: Element on pmus list in pmu.c. */ + struct list_head list; + /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */ struct list_head hybrid_list; + /** + * @missing_features: Features to inhibit when events on this PMU are + * opened. + */ struct { + /** + * @exclude_guest: Disables perf_event_attr exclude_guest and + * exclude_host. + */ bool exclude_guest; } missing_features; }; +/** @perf_pmu__fake: A special global PMU used for testing. */ extern struct perf_pmu perf_pmu__fake; struct perf_pmu_info { @@ -70,21 +141,60 @@ struct perf_pmu_info { #define UNIT_MAX_LEN 31 /* max length for event unit name */ +/** + * struct perf_pmu_alias - An event either read from sysfs or builtin in + * pmu-events.c, created by parsing the pmu-events json files. + */ struct perf_pmu_alias { + /** @name: Name of the event like "mem-loads". */ char *name; + /** @desc: Optional short description of the event. */ char *desc; + /** @long_desc: Optional long description. */ char *long_desc; + /** + * @topic: Optional topic such as cache or pipeline, particularly for + * json events. + */ char *topic; + /** + * @str: Comma separated parameter list like + * "event=0xcd,umask=0x1,ldlat=0x3". + */ char *str; - struct list_head terms; /* HEAD struct parse_events_term -> list */ - struct list_head list; /* ELEM */ + /** @terms: Owned list of the original parsed parameters. */ + struct list_head terms; + /** @list: List element of struct perf_pmu aliases. */ + struct list_head list; + /** @unit: Units for the event, such as bytes or cache lines. */ char unit[UNIT_MAX_LEN+1]; + /** @scale: Value to scale read counter values by. */ double scale; + /** + * @per_pkg: Does the file + * /bus/event_source/devices//events/.per-pkg or + * equivalent json value exist and have the value 1. + */ bool per_pkg; + /** + * @snapshot: Does the file + * /bus/event_source/devices//events/.snapshot + * exist and have the value 1. + */ bool snapshot; + /** + * @deprecated: Is the event hidden and so not shown in perf list by + * default. + */ bool deprecated; + /** + * @metric_expr: A metric expression associated with an event. Doing + * this makes little sense due to scale and unit applying to both. + */ char *metric_expr; + /** @metric_name: A name for the metric. unit applying to both. */ char *metric_name; + /** @pmu_name: The name copied from struct perf_pmu. */ char *pmu_name; }; From 1504b6f97bad166b484d6f27dc99746fdca5f467 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:16 -0800 Subject: [PATCH 1521/4122] tools lib api fs tracing_path: Add scandir alphasort tracing_events__opendir() allows iteration over files in /tracing/events but with an arbitrary sort order. Add a scandir alternative where the results are alphabetically sorted. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/tracing_path.c | 16 ++++++++++++++++ tools/lib/api/fs/tracing_path.h | 1 + 2 files changed, 17 insertions(+) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 5afb11b30fca..b8e457c841ab 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -113,6 +113,22 @@ DIR *tracing_events__opendir(void) return dir; } +int tracing_events__scandir_alphasort(struct dirent ***namelist) +{ + char *path = get_tracing_file("events"); + int ret; + + if (!path) { + *namelist = NULL; + return 0; + } + + ret = scandir(path, namelist, NULL, alphasort); + put_events_file(path); + + return ret; +} + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) { diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h index a19136b086dc..fc6347c11deb 100644 --- a/tools/lib/api/fs/tracing_path.h +++ b/tools/lib/api/fs/tracing_path.h @@ -6,6 +6,7 @@ #include DIR *tracing_events__opendir(void); +int tracing_events__scandir_alphasort(struct dirent ***namelist); void tracing_path_set(const char *mountpoint); const char *tracing_path_mount(void); From d74060c03368063c64e63004319e2a39930c4297 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:17 -0800 Subject: [PATCH 1522/4122] perf tracepoint: Sort events in iterator In print_tracepoint_events() use tracing_events__scandir_alphasort() and scandir alphasort so that the subsystem and events are sorted and don't need a secondary qsort. Locally this results in the following change: ... ext4:ext4_zero_range [Tracepoint event] - fib6:fib6_table_lookup [Tracepoint event] fib:fib_table_lookup [Tracepoint event] + fib6:fib6_table_lookup [Tracepoint event] filelock:break_lease_block [Tracepoint event] ... ie fib6 now is after fib and not before it. This is more consistent with how numbers are more generally sorted, such as: ... syscalls:sys_enter_renameat [Tracepoint event] syscalls:sys_enter_renameat2 [Tracepoint event] ... and so an improvement over the qsort approach. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 104 +++++++++++---------------------- 1 file changed, 35 insertions(+), 69 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 6df947df1c0f..dae927250826 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -66,26 +66,21 @@ static int cmp_string(const void *a, const void *b) void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { - DIR *sys_dir, *evt_dir; - struct dirent *sys_dirent, *evt_dirent; - char evt_path[MAXPATHLEN]; - char *dir_path; - char **evt_list = NULL; - unsigned int evt_i = 0, evt_num = 0; - bool evt_num_known = false; + struct dirent **sys_namelist = NULL; + bool printed = false; + int sys_items = tracing_events__scandir_alphasort(&sys_namelist); -restart: - sys_dir = tracing_events__opendir(); - if (!sys_dir) - return; + for (int i = 0; i < sys_items; i++) { + struct dirent *sys_dirent = sys_namelist[i]; + struct dirent **evt_namelist = NULL; + char *dir_path; + int evt_items; - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_close_sys_dir; - } + if (sys_dirent->d_type != DT_DIR || + !strcmp(sys_dirent->d_name, ".") || + !strcmp(sys_dirent->d_name, "..")) + continue; - for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; @@ -93,69 +88,40 @@ restart: dir_path = get_events_file(sys_dirent->d_name); if (!dir_path) continue; - evt_dir = opendir(dir_path); - if (!evt_dir) - goto next; - for_each_event(dir_path, evt_dir, evt_dirent) { + evt_items = scandir(dir_path, &evt_namelist, NULL, alphasort); + for (int j = 0; j < evt_items; j++) { + struct dirent *evt_dirent = evt_namelist[j]; + char evt_path[MAXPATHLEN]; + + if (evt_dirent->d_type != DT_DIR || + !strcmp(evt_dirent->d_name, ".") || + !strcmp(evt_dirent->d_name, "..")) + continue; + + if (tp_event_has_id(dir_path, evt_dirent) != 0) + continue; + if (event_glob != NULL && !strglobmatch(evt_dirent->d_name, event_glob)) continue; - if (!evt_num_known) { - evt_num++; - continue; - } - snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); - - evt_list[evt_i] = strdup(evt_path); - if (evt_list[evt_i] == NULL) { - put_events_file(dir_path); - goto out_close_evt_dir; + if (name_only) + printf("%s ", evt_path); + else { + printf(" %-50s [%s]\n", evt_path, + event_type_descriptors[PERF_TYPE_TRACEPOINT]); } - evt_i++; + printed = true; } - closedir(evt_dir); -next: - put_events_file(dir_path); + free(dir_path); + free(evt_namelist); } - closedir(sys_dir); - - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { - if (name_only) { - printf("%s ", evt_list[evt_i++]); - continue; - } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - } - if (evt_num && pager_in_use()) + free(sys_namelist); + if (printed && pager_in_use()) printf("\n"); - -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_close_evt_dir: - closedir(evt_dir); -out_close_sys_dir: - closedir(sys_dir); - - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - if (evt_list) - goto out_free; } void print_sdt_events(const char *subsys_glob, const char *event_glob, From ca0fe62413f3f536201d788672cd6cf7c4ed52dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:18 -0800 Subject: [PATCH 1523/4122] perf list: Generalize limiting to a PMU name Deprecate the --cputype option and add a --unit option where '--unit cpu_atom' behaves like '--cputype atom'. The --unit option can be used with arbitrary PMUs, for example: ``` $ perf list --unit msr pmu List of pre-defined events (to be used in -e or -M): msr/aperf/ [Kernel PMU event] msr/cpu_thermal_margin/ [Kernel PMU event] msr/mperf/ [Kernel PMU event] msr/pperf/ [Kernel PMU event] msr/smi/ [Kernel PMU event] msr/tsc/ [Kernel PMU event] ``` Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 6 +++--- tools/perf/builtin-list.c | 18 ++++++++++++------ tools/perf/util/metricgroup.c | 3 ++- tools/perf/util/pmu.c | 4 +--- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 57384a97c04f..44a819af573d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,9 +39,9 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. ---cputype:: -Print events applying cpu with this type for hybrid platform -(e.g. --cputype core or --cputype atom) +--unit:: +Print PMU events and metrics limited to the specific PMU name. +(e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom) [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 58e1ec1654ef..cc84ced6da26 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -21,7 +21,6 @@ static bool desc_flag = true; static bool details_flag; -static const char *hybrid_type; int cmd_list(int argc, const char **argv) { @@ -30,6 +29,8 @@ int cmd_list(int argc, const char **argv) bool long_desc_flag = false; bool deprecated = false; char *pmu_name = NULL; + const char *hybrid_name = NULL; + const char *unit_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -40,9 +41,10 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), - OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", - "Print events applying cpu with this type for hybrid platform " - "(e.g. core or atom)"), + OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", + "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), + OPT_STRING(0, "unit", &unit_name, "PMU name", + "Limit PMU or metric printing to the specified PMU."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -53,6 +55,8 @@ int cmd_list(int argc, const char **argv) }; set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + /* Hide hybrid flag for the more generic 'unit' flag. */ + set_option_flag(list_options, 0, "cputype", PARSE_OPT_HIDDEN); argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -62,8 +66,10 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); - if (hybrid_type) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); + if (unit_name) + pmu_name = strdup(unit_name); + else if (hybrid_name) { + pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name); if (!pmu_name) pr_warning("WARNING: hybrid cputype is not supported!\n"); } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4c98ac29ee13..1943fed9b6d9 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -556,11 +556,12 @@ static int metricgroup__print_callback(const struct pmu_event *pe, void *vdata) { struct metricgroup_print_data *data = vdata; + const char *pmu = pe->pmu ?: "cpu"; if (!pe->metric_expr) return 0; - if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu)) + if (data->pmu_name && strcmp(data->pmu_name, pmu)) return 0; return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 057e1528c32f..e6790175307b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1695,10 +1695,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu_name && perf_pmu__is_hybrid(pmu->name) && - strcmp(pmu_name, pmu->name)) { + if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) continue; - } list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : From 3301b3fe9bdcfcb109e44e31160e830fe558faec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:19 -0800 Subject: [PATCH 1524/4122] perf list: Simplify cache event printing The current code computes an array of cache names then sorts and prints them. Use a strlist to create a list of names that is sorted. Keep the hybrid names, it is unclear how to generalize it, but drop the computation of evt_pmus that is never used. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Kang Minchul Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-7-irogers@google.com [ Fixed up clash with cf9f67b36303de65 ("perf print-events: Remove redundant comparison with zero")] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 130 +++++++-------------------------- 1 file changed, 27 insertions(+), 103 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index dae927250826..ff7793944246 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -206,135 +206,59 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; - char name[64], new_name[128]; - char **evt_list = NULL, **evt_pmus = NULL; - bool evt_num_known = false; - struct perf_pmu *pmu = NULL; + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; - if (perf_pmu__has_hybrid()) { - npmus = perf_pmu__hybrid_pmu_num(); - evt_pmus = zalloc(sizeof(char *) * npmus); - if (!evt_pmus) - goto out_enomem; + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for hwcache events\n"); + return -ENOMEM; } - -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; - } - - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ if (!evsel__is_cache_op_valid(type, op)) continue; - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - unsigned int hybrid_supported = 0, j; - bool supported; + for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + struct perf_pmu *pmu = NULL; + char name[64]; __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; if (!perf_pmu__has_hybrid()) { - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) { - continue; - } - } else { - perf_pmu__for_each_hybrid_pmu(pmu) { - if (!evt_num_known) { - evt_num++; - continue; - } - - supported = is_event_supported( - PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); - if (supported) { + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) + strlist__add(evt_name_list, name); + continue; + } + perf_pmu__for_each_hybrid_pmu(pmu) { + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) { + char new_name[128]; snprintf(new_name, sizeof(new_name), "%s/%s/", pmu->name, name); - evt_pmus[hybrid_supported] = - strdup(new_name); - hybrid_supported++; - } + strlist__add(evt_name_list, new_name); } - - if (hybrid_supported == 0) - continue; } - - if (!evt_num_known) { - evt_num++; - continue; - } - - if ((hybrid_supported == 0) || - (hybrid_supported == npmus)) { - evt_list[evt_i] = strdup(name); - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } else { - for (j = 0; j < hybrid_supported; j++) { - evt_list[evt_i++] = evt_pmus[j]; - evt_pmus[j] = NULL; - } - continue; - } - - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; } } } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - - for (evt_i = 0; evt_i < evt_num; evt_i++) { - if (!evt_list[evt_i]) - break; - } - - evt_num = evt_i; - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_HW_CACHE]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - - for (evt_i = 0; evt_i < npmus; evt_i++) - zfree(&evt_pmus[evt_i]); - zfree(&evt_pmus); - return evt_num; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_HW_CACHE]); - if (evt_list) - goto out_free; - return evt_num; + strlist__delete(evt_name_list); + return 0; } static void print_tool_event(const struct event_symbol *syms, const char *event_glob, From de3752a7d6c4435087ce7cb8537bfa987b93ec8b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:20 -0800 Subject: [PATCH 1525/4122] perf list: Simplify symbol event printing The current code computes an array of symbol names then sorts and prints them. Use a strlist to create a list of names that is sorted and then print it. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 77 +++++++++------------------------- 1 file changed, 20 insertions(+), 57 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ff7793944246..d53dba033597 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -52,14 +52,6 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { }, }; -static int cmp_string(const void *a, const void *b) -{ - const char * const *as = a; - const char * const *bs = b; - - return strcmp(*as, *bs); -} - /* * Print the events from /tracing/events */ @@ -298,77 +290,48 @@ void print_symbol_events(const char *event_glob, unsigned int type, struct event_symbol *syms, unsigned int max, bool name_only) { - unsigned int i, evt_i = 0, evt_num = 0; - char name[MAX_NAME_LEN]; - char **evt_list = NULL; - bool evt_num_known = false; + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; - syms -= max; + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for symbol events\n"); + return; } - - for (i = 0; i < max; i++, syms++) { + for (unsigned int i = 0; i < max; i++) { /* * New attr.config still not supported here, the latest * example was PERF_COUNT_SW_CGROUP_SWITCHES */ - if (syms->symbol == NULL) + if (syms[i].symbol == NULL) continue; - if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || - (syms->alias && strglobmatch(syms->alias, event_glob)))) + if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) || + (syms[i].alias && strglobmatch(syms[i].alias, event_glob)))) continue; if (!is_event_supported(type, i)) continue; - if (!evt_num_known) { - evt_num++; - continue; - } + if (strlen(syms[i].alias)) { + char name[MAX_NAME_LEN]; - if (!name_only && strlen(syms->alias)) - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); - else - strlcpy(name, syms->symbol, MAX_NAME_LEN); - - evt_list[evt_i] = strdup(name); - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); + strlist__add(evt_name_list, name); + } else + strlist__add(evt_name_list, syms[i].symbol); } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[type]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); - if (evt_list) - goto out_free; + strlist__delete(evt_name_list); } /* From eb2d4514a5971444f67ac8b95bcd63e7702fa6bf Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:21 -0800 Subject: [PATCH 1526/4122] perf pmu: Restructure print_pmu_events() to avoid memory allocations Previously print_pmu_events() would compute the values to be printed, place them in struct sevent, sort them and then print them. Modify the code so that struct sevent holds just the PMU and event, sort these and then in the main print loop calculate aliases for names, etc. This avoids memory allocations for copied values as they are computed then printed. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 208 ++++++++++++++++++++++-------------------- 1 file changed, 110 insertions(+), 98 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e6790175307b..075c82dd1347 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1553,8 +1553,8 @@ static int sub_non_neg(int a, int b) return a - b; } -static char *format_alias(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { struct parse_events_term *term; int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); @@ -1579,51 +1579,67 @@ static char *format_alias(char *buf, int len, struct perf_pmu *pmu, return buf; } -static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name); return buf; } +/** Struct for ordering events as output in perf list. */ struct sevent { - char *name; - char *desc; - char *topic; - char *str; - char *pmu; - char *metric_expr; - char *metric_name; - int is_cpu; + /** PMU for event. */ + const struct perf_pmu *pmu; + /** + * Optional event for name, desc, etc. If not present then this is a + * selectable PMU and the event name is shown as "//". + */ + const struct perf_pmu_alias *event; + /** Is the PMU for the CPU? */ + bool is_cpu; }; static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + const char *a_pmu_name, *b_pmu_name; + const char *a_name = "//", *a_desc = NULL, *a_topic = ""; + const char *b_name = "//", *b_desc = NULL, *b_topic = ""; int ret; - /* Put extra events last */ - if (!!as->desc != !!bs->desc) - return !!as->desc - !!bs->desc; - if (as->topic && bs->topic) { - int n = strcmp(as->topic, bs->topic); - - if (n) - return n; + if (as->event) { + a_name = as->event->name; + a_desc = as->event->desc; + a_topic = as->event->topic ?: ""; } + if (bs->event) { + b_name = bs->event->name; + b_desc = bs->event->desc; + b_topic = bs->event->topic ?: ""; + } + /* Put extra events last. */ + if (!!a_desc != !!b_desc) + return !!a_desc - !!b_desc; + + /* Order by topics. */ + ret = strcmp(a_topic, b_topic); + if (ret) + return ret; /* Order CPU core events to be first */ if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - ret = strcmp(as->name, bs->name); - if (!ret) { - if (as->pmu && bs->pmu) - return strcmp(as->pmu, bs->pmu); - } + /* Order by PMU name. */ + a_pmu_name = as->pmu->name ?: ""; + b_pmu_name = bs->pmu->name ?: ""; + ret = strcmp(a_pmu_name, b_pmu_name); + if (ret) + return ret; - return ret; + /* Order by event name. */ + return strcmp(a_name, b_name); } static void wordwrap(char *s, int start, int max, int corr) @@ -1655,16 +1671,18 @@ bool is_pmu_core(const char *name) static bool pmu_alias_is_duplicate(struct sevent *alias_a, struct sevent *alias_b) { + const char *a_pmu_name, *b_pmu_name; + const char *a_name = alias_a->event ? alias_a->event->name : "//"; + const char *b_name = alias_b->event ? alias_b->event->name : "//"; + /* Different names -> never duplicates */ - if (strcmp(alias_a->name, alias_b->name)) + if (strcmp(a_name, b_name)) return false; - /* Don't remove duplicates for hybrid PMUs */ - if (perf_pmu__is_hybrid(alias_a->pmu) && - perf_pmu__is_hybrid(alias_b->pmu)) - return false; - - return true; + /* Don't remove duplicates for different PMUs */ + a_pmu_name = alias_a->pmu->name ?: ""; + b_pmu_name = alias_b->pmu->name ?: ""; + return strcmp(a_pmu_name, b_pmu_name) == 0; } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, @@ -1690,110 +1708,104 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, len++; } aliases = zalloc(sizeof(struct sevent) * len); - if (!aliases) - goto out_enomem; + if (!aliases) { + pr_err("FATAL: not enough memory to print PMU events\n"); + return; + } pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { + bool is_cpu; + if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) continue; - list_for_each_entry(alias, &pmu->aliases, list) { - char *name = alias->desc ? alias->name : - format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name) || - perf_pmu__is_hybrid(pmu->name); + is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); + list_for_each_entry(alias, &pmu->aliases, list) { if (alias->deprecated && !deprecated) continue; if (event_glob != NULL && - !(strglobmatch_nocase(name, event_glob) || - (!is_cpu && strglobmatch_nocase(alias->name, - event_glob)) || + !(strglobmatch_nocase(alias->name, event_glob) || + (!is_cpu && + strglobmatch_nocase(alias->name, event_glob)) || (alias->topic && strglobmatch_nocase(alias->topic, event_glob)))) continue; - if (is_cpu && !name_only && !alias->desc) - name = format_alias_or(buf, sizeof(buf), pmu, alias); - - aliases[j].name = name; - if (is_cpu && !name_only && !alias->desc) - aliases[j].name = format_alias_or(buf, - sizeof(buf), - pmu, alias); - aliases[j].name = strdup(aliases[j].name); - if (!aliases[j].name) - goto out_enomem; - - aliases[j].desc = long_desc ? alias->long_desc : - alias->desc; - aliases[j].topic = alias->topic; - aliases[j].str = alias->str; - aliases[j].pmu = pmu->name; - aliases[j].metric_expr = alias->metric_expr; - aliases[j].metric_name = alias->metric_name; + aliases[j].event = alias; + aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; j++; } if (pmu->selectable && (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { - char *s; - if (asprintf(&s, "%s//", pmu->name) < 0) - goto out_enomem; - aliases[j].name = s; + aliases[j].event = NULL; + aliases[j].pmu = pmu; + aliases[j].is_cpu = is_cpu; j++; } } len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { + char *name, *desc; + /* Skip duplicates */ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) continue; + if (!aliases[j].event) { + /* A selectable event. */ + snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name); + name = buf; + } else if (aliases[j].event->desc) { + name = aliases[j].event->name; + } else { + if (!name_only && aliases[j].is_cpu) { + name = format_alias_or(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } else { + name = format_alias(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } + } if (name_only) { - printf("%s ", aliases[j].name); + printf("%s ", name); continue; } - if (aliases[j].desc && !quiet_flag) { - if (numdesc++ == 0) - printf("\n"); - if (aliases[j].topic && (!topic || - strcmp(topic, aliases[j].topic))) { - printf("%s%s:\n", topic ? "\n" : "", - aliases[j].topic); - topic = aliases[j].topic; - } - printf(" %-50s\n", aliases[j].name); - printf("%*s", 8, "["); - wordwrap(aliases[j].desc, 8, columns, 0); - printf("]\n"); - if (details_flag) { - printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); - if (aliases[j].metric_name) - printf(" MetricName: %s", aliases[j].metric_name); - if (aliases[j].metric_expr) - printf(" MetricExpr: %s", aliases[j].metric_expr); - putchar('\n'); - } - } else - printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; + if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) { + printf(" %-50s [Kernel PMU event]\n", name); + continue; + } + if (numdesc++ == 0) + printf("\n"); + if (aliases[j].event->topic && (!topic || + strcmp(topic, aliases[j].event->topic))) { + printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic); + topic = aliases[j].event->topic; + } + printf(" %-50s\n", name); + printf("%*s", 8, "["); + desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc; + wordwrap(desc, 8, columns, 0); + printf("]\n"); + if (details_flag) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str); + if (aliases[j].event->metric_name) + printf(" MetricName: %s", aliases[j].event->metric_name); + if (aliases[j].event->metric_expr) + printf(" MetricExpr: %s", aliases[j].event->metric_expr); + putchar('\n'); + } } if (printed && pager_in_use()) printf("\n"); -out_free: - for (j = 0; j < len; j++) - zfree(&aliases[j].name); + zfree(&aliases); return; - -out_enomem: - printf("FATAL: not enough memory to print PMU events\n"); - if (aliases) - goto out_free; } bool pmu_have_event(const char *pname, const char *name) From ddf07bd874be791a63fca5ac0e3def1e15f2338f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 11 Nov 2022 12:37:32 +0100 Subject: [PATCH 1527/4122] gpiolib: of: Use correct fwnode for DT-probed chips The OF node store in chip->fwnode is used to explicitly override the FW node for a GPIO chip. For chips that use the default FW node (i.e. that of their parent device), this will be NULL and cause the chip not to be fully registered. Instead, use the GPIO device's FW node, which is set to either the node of the parent device or the explicit override in chip->fwnode. Fixes: 8afe82550240 ("gpiolib: of: Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode") Tested-by: Marek Szyprowski Signed-off-by: Thierry Reding Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Tested-by: Robert Marko Tested-by: Andrew Halaney Reviewed-by: Brian Masney Tested-by: Brian Masney Tested-by: Geert Uytterhoeven Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a6871859a59d..4fff7258ee41 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -1063,7 +1063,7 @@ int of_gpiochip_add(struct gpio_chip *chip) struct device_node *np; int ret; - np = to_of_node(chip->fwnode); + np = to_of_node(dev_fwnode(&chip->gpiodev->dev)); if (!np) return 0; From 739be9b6a84b23c40b0fb534b749602fb8285e70 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sat, 12 Nov 2022 15:29:28 +0000 Subject: [PATCH 1528/4122] gpio: sl28cpld: Replace irqchip mask_invert with unmask_base Remove use of the deprecated mask_invert flag. Inverted mask registers (where a '1' bit enables an IRQ) can be described more directly as an unmask register. Signed-off-by: Aidan MacDonald Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sl28cpld.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-sl28cpld.c b/drivers/gpio/gpio-sl28cpld.c index 52404736ac86..2195f88c2048 100644 --- a/drivers/gpio/gpio-sl28cpld.c +++ b/drivers/gpio/gpio-sl28cpld.c @@ -70,8 +70,7 @@ static int sl28cpld_gpio_irq_init(struct platform_device *pdev, irq_chip->num_irqs = ARRAY_SIZE(sl28cpld_gpio_irqs); irq_chip->num_regs = 1; irq_chip->status_base = base + GPIO_REG_IP; - irq_chip->mask_base = base + GPIO_REG_IE; - irq_chip->mask_invert = true; + irq_chip->unmask_base = base + GPIO_REG_IE; irq_chip->ack_base = base + GPIO_REG_IP; ret = devm_regmap_add_irq_chip_fwnode(dev, dev_fwnode(dev), From e67ad9354a9b7621341adec4ac2c63d5269f835d Mon Sep 17 00:00:00 2001 From: Albert Zhou Date: Tue, 15 Nov 2022 22:38:56 +1100 Subject: [PATCH 1529/4122] PCI: pciehp: Enable by default if USB4 enabled Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. Enable pciehp by default if USB4 is enabled. [bhelgaas: squash, update subject, commit logs, tidy whitespace] Link: https://lore.kernel.org/r/20221115113857.35800-2-albert.zhou.50@gmail.com Link: https://lore.kernel.org/r/20221115113857.35800-3-albert.zhou.50@gmail.com Signed-off-by: Albert Zhou Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- drivers/pci/hotplug/Kconfig | 3 +++ drivers/pci/pcie/Kconfig | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 840a84bb5ee2..48113b210cf9 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -6,11 +6,14 @@ menuconfig HOTPLUG_PCI bool "Support for PCI Hotplug" depends on PCI && SYSFS + default y if USB4 help Say Y here if you have a motherboard with a PCI Hotplug controller. This allows you to add and remove PCI cards while the machine is powered up and running. + Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. + When in doubt, say N. if HOTPLUG_PCI diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 788ac8df3f9d..228652a59f27 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -4,6 +4,7 @@ # config PCIEPORTBUS bool "PCI Express Port Bus support" + default y if USB4 help This enables PCI Express Port Bus support. Users can then enable support for Native Hot-Plug, Advanced Error Reporting, Power @@ -15,9 +16,12 @@ config PCIEPORTBUS config HOTPLUG_PCI_PCIE bool "PCI Express Hotplug driver" depends on HOTPLUG_PCI && PCIEPORTBUS + default y if USB4 help - Say Y here if you have a motherboard that supports PCI Express Native - Hotplug + Say Y here if you have a motherboard that supports PCIe native + hotplug. + + Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. When in doubt, say N. From 65d9cc3fd0e7f98964622557c0c94240e68441e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:06:58 +0200 Subject: [PATCH 1530/4122] rtc: abx80x: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-2-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-abx80x.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index 9b0138d07232..e7f325ced940 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -673,13 +673,28 @@ static int abx80x_setup_watchdog(struct abx80x_priv *priv) } #endif -static int abx80x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id abx80x_id[] = { + { "abx80x", ABX80X }, + { "ab0801", AB0801 }, + { "ab0803", AB0803 }, + { "ab0804", AB0804 }, + { "ab0805", AB0805 }, + { "ab1801", AB1801 }, + { "ab1803", AB1803 }, + { "ab1804", AB1804 }, + { "ab1805", AB1805 }, + { "rv1805", RV1805 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, abx80x_id); + +static int abx80x_probe(struct i2c_client *client) { struct device_node *np = client->dev.of_node; struct abx80x_priv *priv; int i, data, err, trickle_cfg = -EINVAL; char buf[7]; + const struct i2c_device_id *id = i2c_match_id(abx80x_id, client); unsigned int part = id->driver_data; unsigned int partnumber; unsigned int majrev, minrev; @@ -847,21 +862,6 @@ static int abx80x_probe(struct i2c_client *client, return devm_rtc_register_device(priv->rtc); } -static const struct i2c_device_id abx80x_id[] = { - { "abx80x", ABX80X }, - { "ab0801", AB0801 }, - { "ab0803", AB0803 }, - { "ab0804", AB0804 }, - { "ab0805", AB0805 }, - { "ab1801", AB1801 }, - { "ab1803", AB1803 }, - { "ab1804", AB1804 }, - { "ab1805", AB1805 }, - { "rv1805", RV1805 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, abx80x_id); - #ifdef CONFIG_OF static const struct of_device_id abx80x_of_match[] = { { @@ -914,7 +914,7 @@ static struct i2c_driver abx80x_driver = { .name = "rtc-abx80x", .of_match_table = of_match_ptr(abx80x_of_match), }, - .probe = abx80x_probe, + .probe_new = abx80x_probe, .id_table = abx80x_id, }; From 2611e6d743be8f1ad2216622f39e12c63352a457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:00 +0200 Subject: [PATCH 1531/4122] rtc: isl1208: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-4-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-isl1208.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index f448a525333e..73cc6aaf9b8b 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -797,7 +797,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq) } static int -isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) +isl1208_probe(struct i2c_client *client) { int rc = 0; struct isl1208_state *isl1208; @@ -821,6 +821,8 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) if (!isl1208->config) return -ENODEV; } else { + const struct i2c_device_id *id = i2c_match_id(isl1208_id, client); + if (id->driver_data >= ISL_LAST_ID) return -ENODEV; isl1208->config = &isl1208_configs[id->driver_data]; @@ -906,7 +908,7 @@ static struct i2c_driver isl1208_driver = { .name = "rtc-isl1208", .of_match_table = of_match_ptr(isl1208_of_match), }, - .probe = isl1208_probe, + .probe_new = isl1208_probe, .id_table = isl1208_id, }; From 67db6f0515d15e6becbd59bc5da4ca2d03d51789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:01 +0200 Subject: [PATCH 1532/4122] rtc: m41t80: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-5-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index e0b4d3794320..d3144ffdebb5 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -876,8 +876,7 @@ static struct notifier_block wdt_notifier = { ***************************************************************************** */ -static int m41t80_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int m41t80_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; int rc = 0; @@ -897,11 +896,13 @@ static int m41t80_probe(struct i2c_client *client, return -ENOMEM; m41t80_data->client = client; - if (client->dev.of_node) + if (client->dev.of_node) { m41t80_data->features = (unsigned long) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(m41t80_id, client); m41t80_data->features = id->driver_data; + } i2c_set_clientdata(client, m41t80_data); m41t80_data->rtc = devm_rtc_allocate_device(&client->dev); @@ -1007,7 +1008,7 @@ static struct i2c_driver m41t80_driver = { .of_match_table = of_match_ptr(m41t80_of_match), .pm = &m41t80_pm, }, - .probe = m41t80_probe, + .probe_new = m41t80_probe, .remove = m41t80_remove, .id_table = m41t80_id, }; From c050dedb875c2eee920010c1dba458ce5bf8a171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:02 +0200 Subject: [PATCH 1533/4122] rtc: nct3018y: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-6-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index d43acd3920ed..0a3b14c95d90 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -452,8 +452,7 @@ static const struct rtc_class_ops nct3018y_rtc_ops = { .ioctl = nct3018y_ioctl, }; -static int nct3018y_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int nct3018y_probe(struct i2c_client *client) { struct nct3018y *nct3018y; int err, flags; @@ -541,7 +540,7 @@ static struct i2c_driver nct3018y_driver = { .name = "rtc-nct3018y", .of_match_table = of_match_ptr(nct3018y_of_match), }, - .probe = nct3018y_probe, + .probe_new = nct3018y_probe, .id_table = nct3018y_id, }; From 5418e595f30bf4fde83ebb0121417c0c95cff98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:03 +0200 Subject: [PATCH 1534/4122] rtc: pcf2127: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-7-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf2127.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 63b275b014bd..87f4fc9df68b 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -885,9 +885,17 @@ static const struct regmap_bus pcf2127_i2c_regmap = { static struct i2c_driver pcf2127_i2c_driver; -static int pcf2127_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id pcf2127_i2c_id[] = { + { "pcf2127", 1 }, + { "pcf2129", 0 }, + { "pca2129", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); + +static int pcf2127_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client); struct regmap *regmap; static const struct regmap_config config = { .reg_bits = 8, @@ -910,20 +918,12 @@ static int pcf2127_i2c_probe(struct i2c_client *client, pcf2127_i2c_driver.driver.name, id->driver_data); } -static const struct i2c_device_id pcf2127_i2c_id[] = { - { "pcf2127", 1 }, - { "pcf2129", 0 }, - { "pca2129", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); - static struct i2c_driver pcf2127_i2c_driver = { .driver = { .name = "rtc-pcf2127-i2c", .of_match_table = of_match_ptr(pcf2127_of_match), }, - .probe = pcf2127_i2c_probe, + .probe_new = pcf2127_i2c_probe, .id_table = pcf2127_i2c_id, }; From 8d94da6678702fac20ed3e1421b8b1ad03368a8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:04 +0200 Subject: [PATCH 1535/4122] rtc: rs5c372: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-8-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rs5c372.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 9562c477e1c9..5047afefcceb 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -791,8 +791,7 @@ static int rs5c_oscillator_setup(struct rs5c372 *rs5c372) return 0; } -static int rs5c372_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rs5c372_probe(struct i2c_client *client) { int err = 0; int smbus_mode = 0; @@ -826,11 +825,13 @@ static int rs5c372_probe(struct i2c_client *client, rs5c372->client = client; i2c_set_clientdata(client, rs5c372); - if (client->dev.of_node) + if (client->dev.of_node) { rs5c372->type = (enum rtc_type) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client); rs5c372->type = id->driver_data; + } /* we read registers 0x0f then 0x00-0x0f; skip the first one */ rs5c372->regs = &rs5c372->buf[1]; @@ -920,7 +921,7 @@ static struct i2c_driver rs5c372_driver = { .name = "rtc-rs5c372", .of_match_table = of_match_ptr(rs5c372_of_match), }, - .probe = rs5c372_probe, + .probe_new = rs5c372_probe, .remove = rs5c372_remove, .id_table = rs5c372_id, }; From 84c2fb386f71d364cf00ec9f4596a2b279e3ca54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:05 +0200 Subject: [PATCH 1536/4122] rtc: rv8803: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-9-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv8803.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 3527a0521e9b..b581b6d5ad73 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -576,8 +576,16 @@ static int rv8803_regs_configure(struct rv8803_data *rv8803) return 0; } -static int rv8803_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id rv8803_id[] = { + { "rv8803", rv_8803 }, + { "rv8804", rx_8804 }, + { "rx8803", rx_8803 }, + { "rx8900", rx_8900 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rv8803_id); + +static int rv8803_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct rv8803_data *rv8803; @@ -605,11 +613,14 @@ static int rv8803_probe(struct i2c_client *client, mutex_init(&rv8803->flags_lock); rv8803->client = client; - if (client->dev.of_node) + if (client->dev.of_node) { rv8803->type = (enum rv8803_type) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(rv8803_id, client); + rv8803->type = id->driver_data; + } i2c_set_clientdata(client, rv8803); flags = rv8803_read_reg(client, RV8803_FLAG); @@ -666,15 +677,6 @@ static int rv8803_probe(struct i2c_client *client, return 0; } -static const struct i2c_device_id rv8803_id[] = { - { "rv8803", rv_8803 }, - { "rv8804", rx_8804 }, - { "rx8803", rx_8803 }, - { "rx8900", rx_8900 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, rv8803_id); - static const __maybe_unused struct of_device_id rv8803_of_match[] = { { .compatible = "microcrystal,rv8803", @@ -701,7 +703,7 @@ static struct i2c_driver rv8803_driver = { .name = "rtc-rv8803", .of_match_table = of_match_ptr(rv8803_of_match), }, - .probe = rv8803_probe, + .probe_new = rv8803_probe, .id_table = rv8803_id, }; module_i2c_driver(rv8803_driver); From 8ffb7733e162d26393523bd95c5bfffa6e09baf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:06 +0200 Subject: [PATCH 1537/4122] rtc: rx8025: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-10-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx8025.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index dde86f3e2a4b..77d3cb08b5ec 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -519,9 +519,9 @@ static const struct attribute_group rx8025_attr_group = { .attrs = rx8025_attrs, }; -static int rx8025_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rx8025_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_match_id(rx8025_id, client); struct i2c_adapter *adapter = client->adapter; struct rx8025_data *rx8025; int err = 0; @@ -580,7 +580,7 @@ static struct i2c_driver rx8025_driver = { .driver = { .name = "rtc-rx8025", }, - .probe = rx8025_probe, + .probe_new = rx8025_probe, .id_table = rx8025_id, }; From 9800f24f7bd5b99fb4fc4ce981427102e2e15a1c Mon Sep 17 00:00:00 2001 From: Yushan Zhou Date: Mon, 7 Nov 2022 17:25:44 +0800 Subject: [PATCH 1538/4122] rtc: rzn1: Check return value in rzn1_rtc_probe The rzn1_rtc_probe() function utilizes devm_pm_runtime_enable() but wasn't checking the return value. Fix it by adding missing check. Fixes: deeb4b5393e1 ("rtc: rzn1: Add new RTC driver") Signed-off-by: Yushan Zhou Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20221107092544.3721053-1-zys.zljxml@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rzn1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c index ac788799c8e3..0d36bc50197c 100644 --- a/drivers/rtc/rtc-rzn1.c +++ b/drivers/rtc/rtc-rzn1.c @@ -355,7 +355,9 @@ static int rzn1_rtc_probe(struct platform_device *pdev) set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features); clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features); - devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret < 0) + return ret; ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) return ret; From f2fa14b0b586bad3ff2c0d908b8a44b22eaebe15 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 8 Nov 2022 13:02:53 +0100 Subject: [PATCH 1539/4122] dt-bindings: rtc: qcom-pm8xxx: document qcom,pm8921-rtc as fallback of qcom,pm8018-rtc The PM8018 RTC is used as compatible with PM8921 RTC on the MDM9615, document this situation. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-1-bbb120c6766a@linaro.org Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml index 23ab5bb4f395..0a7aa29563c1 100644 --- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml @@ -11,12 +11,16 @@ maintainers: properties: compatible: - enum: - - qcom,pm8058-rtc - - qcom,pm8921-rtc - - qcom,pm8941-rtc - - qcom,pm8018-rtc - - qcom,pmk8350-rtc + oneOf: + - enum: + - qcom,pm8058-rtc + - qcom,pm8921-rtc + - qcom,pm8941-rtc + - qcom,pmk8350-rtc + - items: + - enum: + - qcom,pm8018-rtc + - const: qcom,pm8921-rtc reg: minItems: 1 From 741a2830734bc22238e5c248630311e401481ecc Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 8 Nov 2022 13:02:54 +0100 Subject: [PATCH 1540/4122] rtc: pm8xxx: drop unused pm8018 compatible The PM8018 compatible is always used with PM8921 fallback, so PM8018 compatible can be safely removed from device ID table Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-2-bbb120c6766a@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index dc6d1476baa5..716e5d9ad74d 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -461,7 +461,6 @@ static const struct pm8xxx_rtc_regs pmk8350_regs = { */ static const struct of_device_id pm8xxx_id_table[] = { { .compatible = "qcom,pm8921-rtc", .data = &pm8921_regs }, - { .compatible = "qcom,pm8018-rtc", .data = &pm8921_regs }, { .compatible = "qcom,pm8058-rtc", .data = &pm8058_regs }, { .compatible = "qcom,pm8941-rtc", .data = &pm8941_regs }, { .compatible = "qcom,pmk8350-rtc", .data = &pmk8350_regs }, From f27efee663701f0e93351cf052677214fed40a42 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 28 Oct 2022 17:54:00 -0700 Subject: [PATCH 1541/4122] rtc: cros-ec: Limit RTC alarm range if needed RTC chips on some older Chromebooks can only handle alarms less than 24 hours in the future. Attempts to set an alarm beyond that range fails. The most severe impact of this limitation is that suspend requests fail if alarmtimer_suspend() tries to set an alarm for more than 24 hours in the future. Try to set the real-time alarm to just below 24 hours if setting it to a larger value fails to work around the problem. While not perfect, it is better than just failing the call. A similar workaround is already implemented in the rtc-tps6586x driver. Drop error messages in cros_ec_rtc_get() and cros_ec_rtc_set() since the calling code also logs an error and to avoid spurious error messages if setting the alarm ultimately succeeds. Cc: Brian Norris Signed-off-by: Guenter Roeck Commit: Guenter Roeck Reviewed-by: Tzung-Bi Shih Reviewed-by: Brian Norris Tested-by: Brian Norris Link: https://lore.kernel.org/r/20221029005400.2712577-1-linux@roeck-us.net Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cros-ec.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 887f5193e253..a3ec066d8066 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -14,6 +14,8 @@ #define DRV_NAME "cros-ec-rtc" +#define SECS_PER_DAY (24 * 60 * 60) + /** * struct cros_ec_rtc - Driver data for EC RTC * @@ -43,13 +45,8 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, msg.msg.insize = sizeof(msg.data); ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); - if (ret < 0) { - dev_err(cros_ec->dev, - "error getting %s from EC: %d\n", - command == EC_CMD_RTC_GET_VALUE ? "time" : "alarm", - ret); + if (ret < 0) return ret; - } *response = msg.data.time; @@ -59,7 +56,7 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command, u32 param) { - int ret = 0; + int ret; struct { struct cros_ec_command msg; struct ec_response_rtc data; @@ -71,13 +68,8 @@ static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command, msg.data.time = param; ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); - if (ret < 0) { - dev_err(cros_ec->dev, "error setting %s on EC: %d\n", - command == EC_CMD_RTC_SET_VALUE ? "time" : "alarm", - ret); + if (ret < 0) return ret; - } - return 0; } @@ -190,8 +182,21 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset); if (ret < 0) { - dev_err(dev, "error setting alarm: %d\n", ret); - return ret; + if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) { + /* + * RTC chips on some older Chromebooks can only handle + * alarms up to 24h in the future. Try to set an alarm + * below that limit to avoid suspend failures. + */ + ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, + SECS_PER_DAY - 1); + } + + if (ret < 0) { + dev_err(dev, "error setting alarm in %u seconds: %d\n", + alarm_offset, ret); + return ret; + } } return 0; From 5dc8356830428656c3a00dd702fb9102fe43550f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 13 Sep 2022 22:49:05 +0800 Subject: [PATCH 1542/4122] rtc: ds1302: remove unnecessary spi_set_drvdata() Remove unnecessary spi_set_drvdata() in ds1302_remove(), the driver_data will be set to NULL in device_unbind_cleanup() after calling ->remove(). After this, ds1302_remove() is an empty function, so remove it too. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220913144905.2004924-1-yangyingliang@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1302.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 6d66ab5a8b17..ecc7d0307932 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -185,11 +185,6 @@ static int ds1302_probe(struct spi_device *spi) return 0; } -static void ds1302_remove(struct spi_device *spi) -{ - spi_set_drvdata(spi, NULL); -} - #ifdef CONFIG_OF static const struct of_device_id ds1302_dt_ids[] = { { .compatible = "maxim,ds1302", }, @@ -208,7 +203,6 @@ static struct spi_driver ds1302_driver = { .driver.name = "rtc-ds1302", .driver.of_match_table = of_match_ptr(ds1302_dt_ids), .probe = ds1302_probe, - .remove = ds1302_remove, .id_table = ds1302_spi_ids, }; From eb633de6abcb3003a8a2c03377d39a91a8d57d20 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 19 Sep 2022 16:38:12 +0800 Subject: [PATCH 1543/4122] rtc: s3c: Switch to use dev_err_probe() helper In the probe path, dev_err() can be replace with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220919083812.755082-1-yangyingliang@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s3c.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index db529733c9c4..8fc5efde3e0b 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -429,14 +429,9 @@ static int s3c_rtc_probe(struct platform_device *pdev) return PTR_ERR(info->base); info->rtc_clk = devm_clk_get(&pdev->dev, "rtc"); - if (IS_ERR(info->rtc_clk)) { - ret = PTR_ERR(info->rtc_clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "failed to find rtc clock\n"); - else - dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n"); - return ret; - } + if (IS_ERR(info->rtc_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->rtc_clk), + "failed to find rtc clock\n"); ret = clk_prepare_enable(info->rtc_clk); if (ret) return ret; From 97e78b64d138021c837a30bfa78201caf27c16b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Oct 2022 17:29:34 +0200 Subject: [PATCH 1544/4122] rtc: remove davinci rtc driver The Davinci dm365 SoC support was removed, so the rtc driver has no remaining users. Signed-off-by: Arnd Bergmann Acked-by: Bartosz Golaszewski Acked-by: Marc Zyngier Acked-by: Kevin Hilman Link: https://lore.kernel.org/r/20221019152947.3857217-9-arnd@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 10 - drivers/rtc/Makefile | 1 - drivers/rtc/rtc-davinci.c | 512 -------------------------------------- 3 files changed, 523 deletions(-) delete mode 100644 drivers/rtc/rtc-davinci.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index bb63edb507da..b45fd08d51dc 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1351,16 +1351,6 @@ config RTC_DRV_ASM9260 This driver can also be built as a module. If so, the module will be called rtc-asm9260. -config RTC_DRV_DAVINCI - tristate "TI DaVinci RTC" - depends on ARCH_DAVINCI_DM365 || COMPILE_TEST - help - If you say yes here you get support for the RTC on the - DaVinci platforms (DM365). - - This driver can also be built as a module. If so, the module - will be called rtc-davinci. - config RTC_DRV_DIGICOLOR tristate "Conexant Digicolor RTC" depends on ARCH_DIGICOLOR || COMPILE_TEST diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index aab22bc63432..791994eb913d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_RTC_DRV_CROS_EC) += rtc-cros-ec.o obj-$(CONFIG_RTC_DRV_DA9052) += rtc-da9052.o obj-$(CONFIG_RTC_DRV_DA9055) += rtc-da9055.o obj-$(CONFIG_RTC_DRV_DA9063) += rtc-da9063.o -obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o obj-$(CONFIG_RTC_DRV_DIGICOLOR) += rtc-digicolor.o obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c deleted file mode 100644 index 6bef0f2353da..000000000000 --- a/drivers/rtc/rtc-davinci.c +++ /dev/null @@ -1,512 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DaVinci Power Management and Real Time Clock Driver for TI platforms - * - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * The DaVinci RTC is a simple RTC with the following - * Sec: 0 - 59 : BCD count - * Min: 0 - 59 : BCD count - * Hour: 0 - 23 : BCD count - * Day: 0 - 0x7FFF(32767) : Binary count ( Over 89 years ) - */ - -/* PRTC interface registers */ -#define DAVINCI_PRTCIF_PID 0x00 -#define PRTCIF_CTLR 0x04 -#define PRTCIF_LDATA 0x08 -#define PRTCIF_UDATA 0x0C -#define PRTCIF_INTEN 0x10 -#define PRTCIF_INTFLG 0x14 - -/* PRTCIF_CTLR bit fields */ -#define PRTCIF_CTLR_BUSY BIT(31) -#define PRTCIF_CTLR_SIZE BIT(25) -#define PRTCIF_CTLR_DIR BIT(24) -#define PRTCIF_CTLR_BENU_MSB BIT(23) -#define PRTCIF_CTLR_BENU_3RD_BYTE BIT(22) -#define PRTCIF_CTLR_BENU_2ND_BYTE BIT(21) -#define PRTCIF_CTLR_BENU_LSB BIT(20) -#define PRTCIF_CTLR_BENU_MASK (0x00F00000) -#define PRTCIF_CTLR_BENL_MSB BIT(19) -#define PRTCIF_CTLR_BENL_3RD_BYTE BIT(18) -#define PRTCIF_CTLR_BENL_2ND_BYTE BIT(17) -#define PRTCIF_CTLR_BENL_LSB BIT(16) -#define PRTCIF_CTLR_BENL_MASK (0x000F0000) - -/* PRTCIF_INTEN bit fields */ -#define PRTCIF_INTEN_RTCSS BIT(1) -#define PRTCIF_INTEN_RTCIF BIT(0) -#define PRTCIF_INTEN_MASK (PRTCIF_INTEN_RTCSS \ - | PRTCIF_INTEN_RTCIF) - -/* PRTCIF_INTFLG bit fields */ -#define PRTCIF_INTFLG_RTCSS BIT(1) -#define PRTCIF_INTFLG_RTCIF BIT(0) -#define PRTCIF_INTFLG_MASK (PRTCIF_INTFLG_RTCSS \ - | PRTCIF_INTFLG_RTCIF) - -/* PRTC subsystem registers */ -#define PRTCSS_RTC_INTC_EXTENA1 (0x0C) -#define PRTCSS_RTC_CTRL (0x10) -#define PRTCSS_RTC_WDT (0x11) -#define PRTCSS_RTC_TMR0 (0x12) -#define PRTCSS_RTC_TMR1 (0x13) -#define PRTCSS_RTC_CCTRL (0x14) -#define PRTCSS_RTC_SEC (0x15) -#define PRTCSS_RTC_MIN (0x16) -#define PRTCSS_RTC_HOUR (0x17) -#define PRTCSS_RTC_DAY0 (0x18) -#define PRTCSS_RTC_DAY1 (0x19) -#define PRTCSS_RTC_AMIN (0x1A) -#define PRTCSS_RTC_AHOUR (0x1B) -#define PRTCSS_RTC_ADAY0 (0x1C) -#define PRTCSS_RTC_ADAY1 (0x1D) -#define PRTCSS_RTC_CLKC_CNT (0x20) - -/* PRTCSS_RTC_INTC_EXTENA1 */ -#define PRTCSS_RTC_INTC_EXTENA1_MASK (0x07) - -/* PRTCSS_RTC_CTRL bit fields */ -#define PRTCSS_RTC_CTRL_WDTBUS BIT(7) -#define PRTCSS_RTC_CTRL_WEN BIT(6) -#define PRTCSS_RTC_CTRL_WDRT BIT(5) -#define PRTCSS_RTC_CTRL_WDTFLG BIT(4) -#define PRTCSS_RTC_CTRL_TE BIT(3) -#define PRTCSS_RTC_CTRL_TIEN BIT(2) -#define PRTCSS_RTC_CTRL_TMRFLG BIT(1) -#define PRTCSS_RTC_CTRL_TMMD BIT(0) - -/* PRTCSS_RTC_CCTRL bit fields */ -#define PRTCSS_RTC_CCTRL_CALBUSY BIT(7) -#define PRTCSS_RTC_CCTRL_DAEN BIT(5) -#define PRTCSS_RTC_CCTRL_HAEN BIT(4) -#define PRTCSS_RTC_CCTRL_MAEN BIT(3) -#define PRTCSS_RTC_CCTRL_ALMFLG BIT(2) -#define PRTCSS_RTC_CCTRL_AIEN BIT(1) -#define PRTCSS_RTC_CCTRL_CAEN BIT(0) - -static DEFINE_SPINLOCK(davinci_rtc_lock); - -struct davinci_rtc { - struct rtc_device *rtc; - void __iomem *base; - int irq; -}; - -static inline void rtcif_write(struct davinci_rtc *davinci_rtc, - u32 val, u32 addr) -{ - writel(val, davinci_rtc->base + addr); -} - -static inline u32 rtcif_read(struct davinci_rtc *davinci_rtc, u32 addr) -{ - return readl(davinci_rtc->base + addr); -} - -static inline void rtcif_wait(struct davinci_rtc *davinci_rtc) -{ - while (rtcif_read(davinci_rtc, PRTCIF_CTLR) & PRTCIF_CTLR_BUSY) - cpu_relax(); -} - -static inline void rtcss_write(struct davinci_rtc *davinci_rtc, - unsigned long val, u8 addr) -{ - rtcif_wait(davinci_rtc); - - rtcif_write(davinci_rtc, PRTCIF_CTLR_BENL_LSB | addr, PRTCIF_CTLR); - rtcif_write(davinci_rtc, val, PRTCIF_LDATA); - - rtcif_wait(davinci_rtc); -} - -static inline u8 rtcss_read(struct davinci_rtc *davinci_rtc, u8 addr) -{ - rtcif_wait(davinci_rtc); - - rtcif_write(davinci_rtc, PRTCIF_CTLR_DIR | PRTCIF_CTLR_BENL_LSB | addr, - PRTCIF_CTLR); - - rtcif_wait(davinci_rtc); - - return rtcif_read(davinci_rtc, PRTCIF_LDATA); -} - -static inline void davinci_rtcss_calendar_wait(struct davinci_rtc *davinci_rtc) -{ - while (rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_CALBUSY) - cpu_relax(); -} - -static irqreturn_t davinci_rtc_interrupt(int irq, void *class_dev) -{ - struct davinci_rtc *davinci_rtc = class_dev; - unsigned long events = 0; - u32 irq_flg; - u8 alm_irq, tmr_irq; - u8 rtc_ctrl, rtc_cctrl; - int ret = IRQ_NONE; - - irq_flg = rtcif_read(davinci_rtc, PRTCIF_INTFLG) & - PRTCIF_INTFLG_RTCSS; - - alm_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_ALMFLG; - - tmr_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL) & - PRTCSS_RTC_CTRL_TMRFLG; - - if (irq_flg) { - if (alm_irq) { - events |= RTC_IRQF | RTC_AF; - rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - rtc_cctrl |= PRTCSS_RTC_CCTRL_ALMFLG; - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - } else if (tmr_irq) { - events |= RTC_IRQF | RTC_PF; - rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL); - rtc_ctrl |= PRTCSS_RTC_CTRL_TMRFLG; - rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL); - } - - rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, - PRTCIF_INTFLG); - rtc_update_irq(davinci_rtc->rtc, 1, events); - - ret = IRQ_HANDLED; - } - - return ret; -} - -static int -davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u8 rtc_ctrl; - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL); - - switch (cmd) { - case RTC_WIE_ON: - rtc_ctrl |= PRTCSS_RTC_CTRL_WEN | PRTCSS_RTC_CTRL_WDTFLG; - break; - case RTC_WIE_OFF: - rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN; - break; - default: - ret = -ENOIOCTLCMD; - } - - rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return ret; -} - -static void convertfromdays(u16 days, struct rtc_time *tm) -{ - int tmp_days, year, mon; - - for (year = 2000;; year++) { - tmp_days = rtc_year_days(1, 12, year); - if (days >= tmp_days) - days -= tmp_days; - else { - for (mon = 0;; mon++) { - tmp_days = rtc_month_days(mon, year); - if (days >= tmp_days) { - days -= tmp_days; - } else { - tm->tm_year = year - 1900; - tm->tm_mon = mon; - tm->tm_mday = days + 1; - break; - } - } - break; - } - } -} - -static void convert2days(u16 *days, struct rtc_time *tm) -{ - int i; - *days = 0; - - for (i = 2000; i < 1900 + tm->tm_year; i++) - *days += rtc_year_days(1, 12, i); - - *days += rtc_year_days(tm->tm_mday, tm->tm_mon, 1900 + tm->tm_year); -} - -static int davinci_rtc_read_time(struct device *dev, struct rtc_time *tm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days = 0; - u8 day0, day1; - unsigned long flags; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_sec = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_SEC)); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_MIN)); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_HOUR)); - - davinci_rtcss_calendar_wait(davinci_rtc); - day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - days |= day1; - days <<= 8; - days |= day0; - - convertfromdays(days, tm); - - return 0; -} - -static int davinci_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days; - u8 rtc_cctrl; - unsigned long flags; - - convert2days(&days, tm); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_sec), PRTCSS_RTC_SEC); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_min), PRTCSS_RTC_MIN); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_hour), PRTCSS_RTC_HOUR); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_DAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_DAY1); - - rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - rtc_cctrl |= PRTCSS_RTC_CCTRL_CAEN; - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static int davinci_rtc_alarm_irq_enable(struct device *dev, - unsigned int enabled) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - unsigned long flags; - u8 rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - if (enabled) - rtc_cctrl |= PRTCSS_RTC_CCTRL_DAEN | - PRTCSS_RTC_CCTRL_HAEN | - PRTCSS_RTC_CCTRL_MAEN | - PRTCSS_RTC_CCTRL_ALMFLG | - PRTCSS_RTC_CCTRL_AIEN; - else - rtc_cctrl &= ~PRTCSS_RTC_CCTRL_AIEN; - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static int davinci_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days = 0; - u8 day0, day1; - unsigned long flags; - - alm->time.tm_sec = 0; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - alm->time.tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AMIN)); - - davinci_rtcss_calendar_wait(davinci_rtc); - alm->time.tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AHOUR)); - - davinci_rtcss_calendar_wait(davinci_rtc); - day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - days |= day1; - days <<= 8; - days |= day0; - - convertfromdays(days, &alm->time); - - alm->pending = !!(rtcss_read(davinci_rtc, - PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_AIEN); - alm->enabled = alm->pending && device_may_wakeup(dev); - - return 0; -} - -static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - unsigned long flags; - u16 days; - - convert2days(&days, &alm->time); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_min), PRTCSS_RTC_AMIN); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_hour), PRTCSS_RTC_AHOUR); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_ADAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_ADAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static const struct rtc_class_ops davinci_rtc_ops = { - .ioctl = davinci_rtc_ioctl, - .read_time = davinci_rtc_read_time, - .set_time = davinci_rtc_set_time, - .alarm_irq_enable = davinci_rtc_alarm_irq_enable, - .read_alarm = davinci_rtc_read_alarm, - .set_alarm = davinci_rtc_set_alarm, -}; - -static int __init davinci_rtc_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct davinci_rtc *davinci_rtc; - int ret = 0; - - davinci_rtc = devm_kzalloc(&pdev->dev, sizeof(struct davinci_rtc), GFP_KERNEL); - if (!davinci_rtc) - return -ENOMEM; - - davinci_rtc->irq = platform_get_irq(pdev, 0); - if (davinci_rtc->irq < 0) - return davinci_rtc->irq; - - davinci_rtc->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(davinci_rtc->base)) - return PTR_ERR(davinci_rtc->base); - - platform_set_drvdata(pdev, davinci_rtc); - - davinci_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(davinci_rtc->rtc)) - return PTR_ERR(davinci_rtc->rtc); - - davinci_rtc->rtc->ops = &davinci_rtc_ops; - davinci_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - davinci_rtc->rtc->range_max = RTC_TIMESTAMP_BEGIN_2000 + (1 << 16) * 86400ULL - 1; - - rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, PRTCIF_INTFLG); - rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_INTC_EXTENA1); - - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CTRL); - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CCTRL); - - ret = devm_request_irq(dev, davinci_rtc->irq, davinci_rtc_interrupt, - 0, "davinci_rtc", davinci_rtc); - if (ret < 0) { - dev_err(dev, "unable to register davinci RTC interrupt\n"); - return ret; - } - - /* Enable interrupts */ - rtcif_write(davinci_rtc, PRTCIF_INTEN_RTCSS, PRTCIF_INTEN); - rtcss_write(davinci_rtc, PRTCSS_RTC_INTC_EXTENA1_MASK, - PRTCSS_RTC_INTC_EXTENA1); - - rtcss_write(davinci_rtc, PRTCSS_RTC_CCTRL_CAEN, PRTCSS_RTC_CCTRL); - - device_init_wakeup(&pdev->dev, 0); - - return devm_rtc_register_device(davinci_rtc->rtc); -} - -static int __exit davinci_rtc_remove(struct platform_device *pdev) -{ - struct davinci_rtc *davinci_rtc = platform_get_drvdata(pdev); - - device_init_wakeup(&pdev->dev, 0); - - rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - - return 0; -} - -static struct platform_driver davinci_rtc_driver = { - .remove = __exit_p(davinci_rtc_remove), - .driver = { - .name = "rtc_davinci", - }, -}; - -module_platform_driver_probe(davinci_rtc_driver, davinci_rtc_probe); - -MODULE_AUTHOR("Miguel Aguilar "); -MODULE_DESCRIPTION("Texas Instruments DaVinci PRTC Driver"); -MODULE_LICENSE("GPL"); From 1ff56edf137a2f034fee9b9a398ddcd8cb7a5a34 Mon Sep 17 00:00:00 2001 From: Zhang Jianhua Date: Tue, 6 Sep 2022 22:30:37 +0800 Subject: [PATCH 1545/4122] rtc: fsl-ftm-alarm: Use module_platform_driver replace device_initcall The ftm_rtc_driver has been registered while module init, however there is not unregister step for module exit, now use the macro module_platform_driver replace device_initcall, which can register and unregister platform driver automatically. Signed-off-by: Zhang Jianhua Link: https://lore.kernel.org/r/20220906143037.1455317-1-chris.zjh@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-fsl-ftm-alarm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index c0df49fb978c..3d7c4077fe1c 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -327,12 +327,7 @@ static struct platform_driver ftm_rtc_driver = { }, }; -static int __init ftm_alarm_init(void) -{ - return platform_driver_register(&ftm_rtc_driver); -} - -device_initcall(ftm_alarm_init); +module_platform_driver(ftm_rtc_driver); MODULE_DESCRIPTION("NXP/Freescale FlexTimer alarm driver"); MODULE_AUTHOR("Biwen Li "); From c69bffe199270ce001d5764985a8e414c7e05fee Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:49 +0200 Subject: [PATCH 1546/4122] dt-bindings: rtc: convert hym8563 bindings to json-schema Convert RTC binding for Haoyu Microelectronics HYM8563 to Device Tree Schema format. Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024165549.74574-7-sebastian.reichel@collabora.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/haoyu,hym8563.txt | 30 ---------- .../bindings/rtc/haoyu,hym8563.yaml | 56 +++++++++++++++++++ 2 files changed, 56 insertions(+), 30 deletions(-) delete mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt create mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt deleted file mode 100644 index a8934fe2ab4c..000000000000 --- a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt +++ /dev/null @@ -1,30 +0,0 @@ -Haoyu Microelectronics HYM8563 Real Time Clock - -The HYM8563 provides basic rtc and alarm functionality -as well as a clock output of up to 32kHz. - -Required properties: -- compatible: should be: "haoyu,hym8563" -- reg: i2c address -- #clock-cells: the value should be 0 - -Optional properties: -- clock-output-names: From common clock binding -- interrupts: rtc alarm/event interrupt - -Example: - -hym8563: hym8563@51 { - compatible = "haoyu,hym8563"; - reg = <0x51>; - - interrupts = <13 IRQ_TYPE_EDGE_FALLING>; - - #clock-cells = <0>; -}; - -device { -... - clocks = <&hym8563>; -... -}; diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml new file mode 100644 index 000000000000..0b9f39ef0edc --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/haoyu,hym8563.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Haoyu Microelectronics HYM8563 RTC + +maintainers: + - Alexandre Belloni + +properties: + compatible: + const: haoyu,hym8563 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#clock-cells": + const: 0 + + clock-output-names: + description: From common clock binding to override the default output clock name. + maxItems: 1 + + wakeup-source: + description: Enables wake up of host system on alarm. + +allOf: + - $ref: rtc.yaml + +unevaluatedProperties: false + +required: + - compatible + - reg + - "#clock-cells" + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + #clock-cells = <0>; + }; + }; From f8513363b0b7155afaee09cca777fc608dc957e3 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Mon, 5 Sep 2022 09:01:19 +0000 Subject: [PATCH 1547/4122] rtc: s35390a: Remove the unneeded result variable Return the value s35390a_set_reg() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/20220905090119.335121-1-ye.xingchen@zte.com.cn Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s35390a.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 81d97b1d3159..b18daaf72b17 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -211,7 +211,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct i2c_client *client = to_i2c_client(dev); struct s35390a *s35390a = i2c_get_clientdata(client); - int i, err; + int i; char buf[7], status; dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d mday=%d, " @@ -234,9 +234,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm) for (i = 0; i < 7; ++i) buf[i] = bitrev8(buf[i]); - err = s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); - - return err; + return s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); } static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm) From 8d816c1eaa752546fa3221f5027af0a667ff0c8f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Nov 2022 12:02:25 +0100 Subject: [PATCH 1548/4122] rtc: isl12022: add support for temperature sensor The isl12022 has built-in temperature compensation effective over the range -40C to +85C. It exposes the average of the last two temperature measurements as a 10-bit value in half-Kelvins. Make this available via the hwmon framework. Reviewed-by: Guenter Roeck Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20221104110225.2219761-1-linux@rasmusvillemoes.dk Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-isl12022.c | 94 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index ca677c4265e6..a3b0de3393f5 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -17,6 +17,7 @@ #include #include #include +#include /* ISL register offsets */ #define ISL12022_REG_SC 0x00 @@ -30,6 +31,9 @@ #define ISL12022_REG_SR 0x07 #define ISL12022_REG_INT 0x08 +#define ISL12022_REG_BETA 0x0d +#define ISL12022_REG_TEMP_L 0x28 + /* ISL register bits */ #define ISL12022_HR_MIL (1 << 7) /* military or 24 hour time */ @@ -38,6 +42,7 @@ #define ISL12022_INT_WRTC (1 << 6) +#define ISL12022_BETA_TSE (1 << 7) static struct i2c_driver isl12022_driver; @@ -46,6 +51,93 @@ struct isl12022 { struct regmap *regmap; }; +static umode_t isl12022_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type == hwmon_temp && attr == hwmon_temp_input) + return 0444; + + return 0; +} + +/* + * A user-initiated temperature conversion is not started by this function, + * so the temperature is updated once every ~60 seconds. + */ +static int isl12022_hwmon_read_temp(struct device *dev, long *mC) +{ + struct isl12022 *isl12022 = dev_get_drvdata(dev); + struct regmap *regmap = isl12022->regmap; + u8 temp_buf[2]; + int temp, ret; + + ret = regmap_bulk_read(regmap, ISL12022_REG_TEMP_L, + temp_buf, sizeof(temp_buf)); + if (ret) + return ret; + /* + * Temperature is represented as a 10-bit number, unit half-Kelvins. + */ + temp = (temp_buf[1] << 8) | temp_buf[0]; + temp *= 500; + temp -= 273000; + + *mC = temp; + + return 0; +} + +static int isl12022_hwmon_read(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + if (type == hwmon_temp && attr == hwmon_temp_input) + return isl12022_hwmon_read_temp(dev, val); + + return -EOPNOTSUPP; +} + +static const struct hwmon_channel_info *isl12022_hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + NULL +}; + +static const struct hwmon_ops isl12022_hwmon_ops = { + .is_visible = isl12022_hwmon_is_visible, + .read = isl12022_hwmon_read, +}; + +static const struct hwmon_chip_info isl12022_hwmon_chip_info = { + .ops = &isl12022_hwmon_ops, + .info = isl12022_hwmon_info, +}; + +static void isl12022_hwmon_register(struct device *dev) +{ + struct isl12022 *isl12022; + struct device *hwmon; + int ret; + + if (!IS_REACHABLE(CONFIG_HWMON)) + return; + + isl12022 = dev_get_drvdata(dev); + + ret = regmap_update_bits(isl12022->regmap, ISL12022_REG_BETA, + ISL12022_BETA_TSE, ISL12022_BETA_TSE); + if (ret) { + dev_warn(dev, "unable to enable temperature sensor\n"); + return; + } + + hwmon = devm_hwmon_device_register_with_info(dev, "isl12022", isl12022, + &isl12022_hwmon_chip_info, + NULL); + if (IS_ERR(hwmon)) + dev_warn(dev, "unable to register hwmon device: %pe\n", hwmon); +} + /* * In the routines that deal directly with the isl12022 hardware, we use * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. @@ -160,6 +252,8 @@ static int isl12022_probe(struct i2c_client *client) return PTR_ERR(isl12022->regmap); } + isl12022_hwmon_register(&client->dev); + isl12022->rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(isl12022->rtc)) return PTR_ERR(isl12022->rtc); From e59b3c730b44f042540319d62cba73054fd928c8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Nov 2022 09:00:51 +0100 Subject: [PATCH 1549/4122] rtc: Include when appropriate The kstrto() functions have been moved from kernel.h to kstrtox.h. So, include the latter directly in the appropriate files. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/786421fd0435a32206288904a1f879436a717529.1667721637.git.christophe.jaillet@wanadoo.fr Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-abx80x.c | 1 + drivers/rtc/rtc-bq32k.c | 1 + drivers/rtc/rtc-ds1307.c | 1 + drivers/rtc/rtc-rv3029c2.c | 1 + drivers/rtc/rtc-rx8025.c | 1 + drivers/rtc/sysfs.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index e7f325ced940..2e0e6432901b 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index 6d6a55efb9cc..967ddc6bf76d 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index d51565bcc189..7c2276cf5514 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index eb483a30bd92..e4fdd47ae066 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* Register map */ diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 77d3cb08b5ec..331c20d4d843 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index 00f1945bcb7e..e3062c4d3f2c 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -6,6 +6,7 @@ * Author: Alessandro Zummo */ +#include #include #include From 4dfe05bdc1ade79b943d4979a2e2a8b5ef68fbb5 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 27 Oct 2022 17:32:49 +0100 Subject: [PATCH 1550/4122] rtc: ds1347: fix value written to century register In `ds1347_set_time()`, the wrong value is being written to the `DS1347_CENTURY_REG` register. It needs to be converted to BCD. Fix it. Fixes: 147dae76dbb9 ("rtc: ds1347: handle century register") Cc: # v5.5+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20221027163249.447416-1-abbotti@mev.co.uk Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1347.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c index 157bf5209ac4..a40c1a52df65 100644 --- a/drivers/rtc/rtc-ds1347.c +++ b/drivers/rtc/rtc-ds1347.c @@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt) return err; century = (dt->tm_year / 100) + 19; - err = regmap_write(map, DS1347_CENTURY_REG, century); + err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century)); if (err) return err; From 60da73808298ff2cfa9f165d55eb3d7aa7078601 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 10 Nov 2022 17:08:10 +0800 Subject: [PATCH 1551/4122] rtc: class: Fix potential memleak in devm_rtc_allocate_device() devm_rtc_allocate_device() will alloc a rtc_device first, and then run dev_set_name(). If dev_set_name() failed, the rtc_device will memleak. Move devm_add_action_or_reset() in front of dev_set_name() to prevent memleak. unreferenced object 0xffff888110a53000 (size 2048): comm "python3", pid 470, jiffies 4296078308 (age 58.882s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 08 30 a5 10 81 88 ff ff .........0...... 08 30 a5 10 81 88 ff ff 00 00 00 00 00 00 00 00 .0.............. backtrace: [<000000004aac0364>] kmalloc_trace+0x21/0x110 [<000000000ff02202>] devm_rtc_allocate_device+0xd4/0x400 [<000000001bdf5639>] devm_rtc_device_register+0x1a/0x80 [<00000000351bf81c>] rx4581_probe+0xdd/0x110 [rtc_rx4581] [<00000000f0eba0ae>] spi_probe+0xde/0x130 [<00000000bff89ee8>] really_probe+0x175/0x3f0 [<00000000128e8d84>] __driver_probe_device+0xe6/0x170 [<00000000ee5bf913>] device_driver_attach+0x32/0x80 [<00000000f3f28f92>] bind_store+0x10b/0x1a0 [<000000009ff812d8>] drv_attr_store+0x49/0x70 [<000000008139c323>] sysfs_kf_write+0x8d/0xb0 [<00000000b6146e01>] kernfs_fop_write_iter+0x214/0x2d0 [<00000000ecbe3895>] vfs_write+0x61a/0x7d0 [<00000000aa2196ea>] ksys_write+0xc8/0x190 [<0000000046a600f5>] do_syscall_64+0x37/0x90 [<00000000541a336f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 24d23181e43d ("rtc: class: check return value when calling dev_set_name()") Signed-off-by: Shang XiaoJing Reviewed-by: Yang Yingliang Link: https://lore.kernel.org/r/20221110090810.11225-1-shangxiaojing@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/class.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index e48223c00c67..e5b7b48cffac 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -374,11 +374,11 @@ struct rtc_device *devm_rtc_allocate_device(struct device *dev) rtc->id = id; rtc->dev.parent = dev; - err = dev_set_name(&rtc->dev, "rtc%d", id); + err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); if (err) return ERR_PTR(err); - err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); + err = dev_set_name(&rtc->dev, "rtc%d", id); if (err) return ERR_PTR(err); From 508ccdfb86b21da37ad091003a4d4567709d5dfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:07:08 +0100 Subject: [PATCH 1552/4122] rtc: cmos: Call cmos_wake_setup() from cmos_do_probe() Notice that cmos_wake_setup() is the only user of acpi_rtc_info and it can operate on the cmos_rtc variable directly, so it need not set the platform_data pointer before cmos_do_probe() is called. Instead, it can be called by cmos_do_probe() in the case when the platform_data pointer is not set to implement the default behavior (which is to use the FADT information as long as ACPI support is enabled). Modify the code accordingly. While at it, drop a comment that doesn't really match the code it is supposed to be describing. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/4803444.31r3eYUQgx@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 47 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 58cc2bae2f8a..a84262265d6d 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,6 +744,8 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } +static void cmos_wake_setup(struct device *dev); + #ifdef CONFIG_PNP #define INITSECTION @@ -827,19 +829,27 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) if (info->address_space) address_space = info->address_space; - if (info->rtc_day_alarm && info->rtc_day_alarm < 128) - cmos_rtc.day_alrm = info->rtc_day_alarm; - if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128) - cmos_rtc.mon_alrm = info->rtc_mon_alarm; - if (info->rtc_century && info->rtc_century < 128) - cmos_rtc.century = info->rtc_century; + cmos_rtc.day_alrm = info->rtc_day_alarm; + cmos_rtc.mon_alrm = info->rtc_mon_alarm; + cmos_rtc.century = info->rtc_century; if (info->wake_on && info->wake_off) { cmos_rtc.wake_on = info->wake_on; cmos_rtc.wake_off = info->wake_off; } + } else { + cmos_wake_setup(dev); } + if (cmos_rtc.day_alrm >= 128) + cmos_rtc.day_alrm = 0; + + if (cmos_rtc.mon_alrm >= 128) + cmos_rtc.mon_alrm = 0; + + if (cmos_rtc.century >= 128) + cmos_rtc.century = 0; + cmos_rtc.dev = dev; dev_set_drvdata(dev, &cmos_rtc); @@ -1275,13 +1285,6 @@ static void use_acpi_alarm_quirks(void) static inline void use_acpi_alarm_quirks(void) { } #endif -/* Every ACPI platform has a mc146818 compatible "cmos rtc". Here we find - * its device node and pass extra config data. This helps its driver use - * capabilities that the now-obsolete mc146818 didn't have, and informs it - * that this board's RTC is wakeup-capable (per ACPI spec). - */ -static struct cmos_rtc_board_info acpi_rtc_info; - static void cmos_wake_setup(struct device *dev) { if (acpi_disabled) @@ -1289,26 +1292,23 @@ static void cmos_wake_setup(struct device *dev) use_acpi_alarm_quirks(); - acpi_rtc_info.wake_on = rtc_wake_on; - acpi_rtc_info.wake_off = rtc_wake_off; + cmos_rtc.wake_on = rtc_wake_on; + cmos_rtc.wake_off = rtc_wake_off; - /* workaround bug in some ACPI tables */ + /* ACPI tables bug workaround. */ if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { dev_dbg(dev, "bogus FADT month_alarm (%d)\n", acpi_gbl_FADT.month_alarm); acpi_gbl_FADT.month_alarm = 0; } - acpi_rtc_info.rtc_day_alarm = acpi_gbl_FADT.day_alarm; - acpi_rtc_info.rtc_mon_alarm = acpi_gbl_FADT.month_alarm; - acpi_rtc_info.rtc_century = acpi_gbl_FADT.century; + cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; + cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; + cmos_rtc.century = acpi_gbl_FADT.century; - /* NOTE: S4_RTC_WAKE is NOT currently useful to Linux */ if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) dev_info(dev, "RTC can wake from S4\n"); - dev->platform_data = &acpi_rtc_info; - /* RTC always wakes from S1/S2/S3, and often S4/STD */ device_init_wakeup(dev, 1); } @@ -1359,8 +1359,6 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) { int irq, ret; - cmos_wake_setup(&pnp->dev); - if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) { irq = 0; #ifdef CONFIG_X86 @@ -1468,7 +1466,6 @@ static int __init cmos_platform_probe(struct platform_device *pdev) int irq, ret; cmos_of_init(pdev); - cmos_wake_setup(&pdev->dev); if (RTC_IOMAPPED) resource = platform_get_resource(pdev, IORESOURCE_IO, 0); From 375bbba09692fe4c5218eddee8e312dd733fa846 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:09:07 +0100 Subject: [PATCH 1553/4122] rtc: cmos: Call rtc_wake_setup() from cmos_do_probe() To reduce code duplication, move the invocation of rtc_wake_setup() into cmos_do_probe() and simplify the callers of the latter. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/2143522.irdbgypaU6@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index a84262265d6d..583116994a37 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,6 +744,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } +static inline void rtc_wake_setup(struct device *dev); static void cmos_wake_setup(struct device *dev); #ifdef CONFIG_PNP @@ -938,6 +939,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) nvmem_cfg.size = address_space - NVRAM_OFFSET; devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg); + /* + * Everything has gone well so far, so by default register a handler for + * the ACPI RTC fixed event. + */ + if (!info) + rtc_wake_setup(dev); + dev_info(dev, "%s%s, %d bytes nvram%s\n", !is_valid_irq(rtc_irq) ? "no alarms" : cmos_rtc.mon_alrm ? "alarms up to one year" : @@ -1357,7 +1365,7 @@ static void rtc_wake_setup(struct device *dev) static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) { - int irq, ret; + int irq; if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) { irq = 0; @@ -1373,13 +1381,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) irq = pnp_irq(pnp, 0); } - ret = cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); - if (ret) - return ret; - - rtc_wake_setup(&pnp->dev); - - return 0; + return cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); } static void cmos_pnp_remove(struct pnp_dev *pnp) @@ -1463,7 +1465,7 @@ static inline void cmos_of_init(struct platform_device *pdev) {} static int __init cmos_platform_probe(struct platform_device *pdev) { struct resource *resource; - int irq, ret; + int irq; cmos_of_init(pdev); @@ -1475,13 +1477,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev) if (irq < 0) irq = -1; - ret = cmos_do_probe(&pdev->dev, resource, irq); - if (ret) - return ret; - - rtc_wake_setup(&pdev->dev); - - return 0; + return cmos_do_probe(&pdev->dev, resource, irq); } static int cmos_platform_remove(struct platform_device *pdev) From dca4d3b71c8a09a16951add656711fbd6f5bfbb0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:09:32 +0100 Subject: [PATCH 1554/4122] rtc: cmos: Eliminate forward declarations of some functions Reorder the ACPI-related code before cmos_do_probe() so as to eliminate excessive forward declarations of some functions. While at it, for consistency, add the inline modifier to the definitions of empty stub static funtions and remove it from the corresponding definitions of functions with non-empty bodies. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/13157911.uLZWGnKmhe@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 304 ++++++++++++++++++++--------------------- 1 file changed, 149 insertions(+), 155 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 583116994a37..2a21d8281aa6 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,8 +744,155 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } -static inline void rtc_wake_setup(struct device *dev); -static void cmos_wake_setup(struct device *dev); +#ifdef CONFIG_ACPI + +#include + +static u32 rtc_handler(void *context) +{ + struct device *dev = context; + struct cmos_rtc *cmos = dev_get_drvdata(dev); + unsigned char rtc_control = 0; + unsigned char rtc_intr; + unsigned long flags; + + + /* + * Always update rtc irq when ACPI is used as RTC Alarm. + * Or else, ACPI SCI is enabled during suspend/resume only, + * update rtc irq in that case. + */ + if (cmos_use_acpi_alarm()) + cmos_interrupt(0, (void *)cmos->rtc); + else { + /* Fix me: can we use cmos_interrupt() here as well? */ + spin_lock_irqsave(&rtc_lock, flags); + if (cmos_rtc.suspend_ctrl) + rtc_control = CMOS_READ(RTC_CONTROL); + if (rtc_control & RTC_AIE) { + cmos_rtc.suspend_ctrl &= ~RTC_AIE; + CMOS_WRITE(rtc_control, RTC_CONTROL); + rtc_intr = CMOS_READ(RTC_INTR_FLAGS); + rtc_update_irq(cmos->rtc, 1, rtc_intr); + } + spin_unlock_irqrestore(&rtc_lock, flags); + } + + pm_wakeup_hard_event(dev); + acpi_clear_event(ACPI_EVENT_RTC); + acpi_disable_event(ACPI_EVENT_RTC, 0); + return ACPI_INTERRUPT_HANDLED; +} + +static void rtc_wake_setup(struct device *dev) +{ + if (acpi_disabled) + return; + + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); + /* + * After the RTC handler is installed, the Fixed_RTC event should + * be disabled. Only when the RTC alarm is set will it be enabled. + */ + acpi_clear_event(ACPI_EVENT_RTC); + acpi_disable_event(ACPI_EVENT_RTC, 0); +} + +static void rtc_wake_on(struct device *dev) +{ + acpi_clear_event(ACPI_EVENT_RTC); + acpi_enable_event(ACPI_EVENT_RTC, 0); +} + +static void rtc_wake_off(struct device *dev) +{ + acpi_disable_event(ACPI_EVENT_RTC, 0); +} + +#ifdef CONFIG_X86 +/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ +static void use_acpi_alarm_quirks(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + if (!is_hpet_enabled()) + return; + + if (dmi_get_bios_year() < 2015) + return; + + use_acpi_alarm = true; +} +#else +static inline void use_acpi_alarm_quirks(void) { } +#endif + +static void cmos_wake_setup(struct device *dev) +{ + if (acpi_disabled) + return; + + use_acpi_alarm_quirks(); + + cmos_rtc.wake_on = rtc_wake_on; + cmos_rtc.wake_off = rtc_wake_off; + + /* ACPI tables bug workaround. */ + if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { + dev_dbg(dev, "bogus FADT month_alarm (%d)\n", + acpi_gbl_FADT.month_alarm); + acpi_gbl_FADT.month_alarm = 0; + } + + cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; + cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; + cmos_rtc.century = acpi_gbl_FADT.century; + + if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) + dev_info(dev, "RTC can wake from S4\n"); + + /* RTC always wakes from S1/S2/S3, and often S4/STD */ + device_init_wakeup(dev, 1); +} + +static void cmos_check_acpi_rtc_status(struct device *dev, + unsigned char *rtc_control) +{ + struct cmos_rtc *cmos = dev_get_drvdata(dev); + acpi_event_status rtc_status; + acpi_status status; + + if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) + return; + + status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); + if (ACPI_FAILURE(status)) { + dev_err(dev, "Could not get RTC status\n"); + } else if (rtc_status & ACPI_EVENT_FLAG_SET) { + unsigned char mask; + *rtc_control &= ~RTC_AIE; + CMOS_WRITE(*rtc_control, RTC_CONTROL); + mask = CMOS_READ(RTC_INTR_FLAGS); + rtc_update_irq(cmos->rtc, 1, mask); + } +} + +#else /* !CONFIG_ACPI */ + +static inline void rtc_wake_setup(struct device *dev) +{ +} + +static inline void cmos_wake_setup(struct device *dev) +{ +} + +static inline void cmos_check_acpi_rtc_status(struct device *dev, + unsigned char *rtc_control) +{ +} +#endif /* CONFIG_ACPI */ #ifdef CONFIG_PNP #define INITSECTION @@ -1140,9 +1287,6 @@ static void cmos_check_wkalrm(struct device *dev) } } -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control); - static int __maybe_unused cmos_resume(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); @@ -1209,156 +1353,6 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume); * predate even PNPBIOS should set up platform_bus devices. */ -#ifdef CONFIG_ACPI - -#include - -static u32 rtc_handler(void *context) -{ - struct device *dev = context; - struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char rtc_control = 0; - unsigned char rtc_intr; - unsigned long flags; - - - /* - * Always update rtc irq when ACPI is used as RTC Alarm. - * Or else, ACPI SCI is enabled during suspend/resume only, - * update rtc irq in that case. - */ - if (cmos_use_acpi_alarm()) - cmos_interrupt(0, (void *)cmos->rtc); - else { - /* Fix me: can we use cmos_interrupt() here as well? */ - spin_lock_irqsave(&rtc_lock, flags); - if (cmos_rtc.suspend_ctrl) - rtc_control = CMOS_READ(RTC_CONTROL); - if (rtc_control & RTC_AIE) { - cmos_rtc.suspend_ctrl &= ~RTC_AIE; - CMOS_WRITE(rtc_control, RTC_CONTROL); - rtc_intr = CMOS_READ(RTC_INTR_FLAGS); - rtc_update_irq(cmos->rtc, 1, rtc_intr); - } - spin_unlock_irqrestore(&rtc_lock, flags); - } - - pm_wakeup_hard_event(dev); - acpi_clear_event(ACPI_EVENT_RTC); - acpi_disable_event(ACPI_EVENT_RTC, 0); - return ACPI_INTERRUPT_HANDLED; -} - -static inline void rtc_wake_setup(struct device *dev) -{ - if (acpi_disabled) - return; - - acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); - /* - * After the RTC handler is installed, the Fixed_RTC event should - * be disabled. Only when the RTC alarm is set will it be enabled. - */ - acpi_clear_event(ACPI_EVENT_RTC); - acpi_disable_event(ACPI_EVENT_RTC, 0); -} - -static void rtc_wake_on(struct device *dev) -{ - acpi_clear_event(ACPI_EVENT_RTC); - acpi_enable_event(ACPI_EVENT_RTC, 0); -} - -static void rtc_wake_off(struct device *dev) -{ - acpi_disable_event(ACPI_EVENT_RTC, 0); -} - -#ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ -static void use_acpi_alarm_quirks(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return; - - if (!is_hpet_enabled()) - return; - - if (dmi_get_bios_year() < 2015) - return; - - use_acpi_alarm = true; -} -#else -static inline void use_acpi_alarm_quirks(void) { } -#endif - -static void cmos_wake_setup(struct device *dev) -{ - if (acpi_disabled) - return; - - use_acpi_alarm_quirks(); - - cmos_rtc.wake_on = rtc_wake_on; - cmos_rtc.wake_off = rtc_wake_off; - - /* ACPI tables bug workaround. */ - if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { - dev_dbg(dev, "bogus FADT month_alarm (%d)\n", - acpi_gbl_FADT.month_alarm); - acpi_gbl_FADT.month_alarm = 0; - } - - cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; - cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; - cmos_rtc.century = acpi_gbl_FADT.century; - - if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) - dev_info(dev, "RTC can wake from S4\n"); - - /* RTC always wakes from S1/S2/S3, and often S4/STD */ - device_init_wakeup(dev, 1); -} - -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control) -{ - struct cmos_rtc *cmos = dev_get_drvdata(dev); - acpi_event_status rtc_status; - acpi_status status; - - if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) - return; - - status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); - if (ACPI_FAILURE(status)) { - dev_err(dev, "Could not get RTC status\n"); - } else if (rtc_status & ACPI_EVENT_FLAG_SET) { - unsigned char mask; - *rtc_control &= ~RTC_AIE; - CMOS_WRITE(*rtc_control, RTC_CONTROL); - mask = CMOS_READ(RTC_INTR_FLAGS); - rtc_update_irq(cmos->rtc, 1, mask); - } -} - -#else - -static void cmos_wake_setup(struct device *dev) -{ -} - -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control) -{ -} - -static void rtc_wake_setup(struct device *dev) -{ -} -#endif - #ifdef CONFIG_PNP #include From d13e9ad9f5146f066a5c5a1cc993d09e4fb21ead Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:12:00 +0100 Subject: [PATCH 1555/4122] rtc: cmos: Rename ACPI-related functions The names of rtc_wake_setup() and cmos_wake_setup() don't indicate that these functions are ACPI-related, which is the case, and the former doesn't really reflect the role of the function. Rename them to acpi_rtc_event_setup() and acpi_cmos_wake_setup(), respectively, to address this shortcoming. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/3225614.44csPzL39Z@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 2a21d8281aa6..039486bfedf4 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -784,7 +784,7 @@ static u32 rtc_handler(void *context) return ACPI_INTERRUPT_HANDLED; } -static void rtc_wake_setup(struct device *dev) +static void acpi_rtc_event_setup(struct device *dev) { if (acpi_disabled) return; @@ -828,7 +828,7 @@ static void use_acpi_alarm_quirks(void) static inline void use_acpi_alarm_quirks(void) { } #endif -static void cmos_wake_setup(struct device *dev) +static void acpi_cmos_wake_setup(struct device *dev) { if (acpi_disabled) return; @@ -880,11 +880,11 @@ static void cmos_check_acpi_rtc_status(struct device *dev, #else /* !CONFIG_ACPI */ -static inline void rtc_wake_setup(struct device *dev) +static inline void acpi_rtc_event_setup(struct device *dev) { } -static inline void cmos_wake_setup(struct device *dev) +static inline void acpi_cmos_wake_setup(struct device *dev) { } @@ -986,7 +986,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) cmos_rtc.wake_off = info->wake_off; } } else { - cmos_wake_setup(dev); + acpi_cmos_wake_setup(dev); } if (cmos_rtc.day_alrm >= 128) @@ -1091,7 +1091,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) * the ACPI RTC fixed event. */ if (!info) - rtc_wake_setup(dev); + acpi_rtc_event_setup(dev); dev_info(dev, "%s%s, %d bytes nvram%s\n", !is_valid_irq(rtc_irq) ? "no alarms" : From 83ebb7b3036d151ee39a4a752018665648fc3bd4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:15:36 +0100 Subject: [PATCH 1556/4122] rtc: cmos: Disable ACPI RTC event on removal Make cmos_do_remove() drop the ACPI RTC fixed event handler so as to prevent it from operating on stale data in case the event triggers after driver removal. Fixes: 311ee9c151ad ("rtc: cmos: allow using ACPI for RTC alarm instead of HPET") Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/2224609.iZASKD2KPV@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 039486bfedf4..00e2ca7374ec 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -798,6 +798,14 @@ static void acpi_rtc_event_setup(struct device *dev) acpi_disable_event(ACPI_EVENT_RTC, 0); } +static void acpi_rtc_event_cleanup(void) +{ + if (acpi_disabled) + return; + + acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler); +} + static void rtc_wake_on(struct device *dev) { acpi_clear_event(ACPI_EVENT_RTC); @@ -884,6 +892,10 @@ static inline void acpi_rtc_event_setup(struct device *dev) { } +static inline void acpi_rtc_event_cleanup(void) +{ +} + static inline void acpi_cmos_wake_setup(struct device *dev) { } @@ -1138,6 +1150,9 @@ static void cmos_do_remove(struct device *dev) hpet_unregister_irq_handler(cmos_interrupt); } + if (!dev_get_platdata(dev)) + acpi_rtc_event_cleanup(); + cmos->rtc = NULL; ports = cmos->iomem; From 0462681e207ccc44778a77b3297af728b1cf5b9f Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Sun, 6 Nov 2022 12:59:15 +0100 Subject: [PATCH 1557/4122] rtc: snvs: Allow a time difference on clock register read On an iMX6ULL the following message appears when a wakealarm is set: echo 0 > /sys/class/rtc/rtc1/wakealarm rtc rtc1: Timeout trying to get valid LPSRT Counter read This does not always happen but is reproducible quite often (7 out of 10 times). The problem appears because the iMX6ULL is not able to read the registers within one 32kHz clock cycle which is the base clock of the RTC. Therefore, this patch allows a difference of up to 320 cycles (10ms). 10ms was chosen to be big enough even on systems with less cpu power (e.g. iMX6ULL). According to the reference manual a difference is fine: - If the two consecutive reads are similar, the value is correct. The values have to be similar, not equal. Fixes: cd7f3a249dbe ("rtc: snvs: Add timeouts to avoid kernel lockups") Reviewed-by: Francesco Dolcini Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Link: https://lore.kernel.org/r/20221106115915.7930-1-francesco@dolcini.it Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-snvs.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index bd929b0e7d7d..d82acf1af1fa 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -32,6 +32,14 @@ #define SNVS_LPPGDR_INIT 0x41736166 #define CNTR_TO_SECS_SH 15 +/* The maximum RTC clock cycles that are allowed to pass between two + * consecutive clock counter register reads. If the values are corrupted a + * bigger difference is expected. The RTC frequency is 32kHz. With 320 cycles + * we end at 10ms which should be enough for most cases. If it once takes + * longer than expected we do a retry. + */ +#define MAX_RTC_READ_DIFF_CYCLES 320 + struct snvs_rtc_data { struct rtc_device *rtc; struct regmap *regmap; @@ -56,6 +64,7 @@ static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) { u64 read1, read2; + s64 diff; unsigned int timeout = 100; /* As expected, the registers might update between the read of the LSB @@ -66,7 +75,8 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) do { read2 = read1; read1 = rtc_read_lpsrt(data); - } while (read1 != read2 && --timeout); + diff = read1 - read2; + } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); if (!timeout) dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); @@ -78,13 +88,15 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) { u32 count1, count2; + s32 diff; unsigned int timeout = 100; regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); do { count2 = count1; regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); - } while (count1 != count2 && --timeout); + diff = count1 - count2; + } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); if (!timeout) { dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); return -ETIMEDOUT; From 60da2d11fcbc043304910e4d2ca82f9bab953e63 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 15 Nov 2022 18:07:47 +0100 Subject: [PATCH 1558/4122] RDMA/siw: Set defined status for work completion with undefined status A malicious user may write undefined values into memory mapped completion queue elements status or opcode. Undefined status or opcode values will result in out-of-bounds access to an array mapping siw internal representation of opcode and status to RDMA core representation when reaping CQ elements. While siw detects those undefined values, it did not correctly set completion status to a defined value, thus defeating the whole purpose of the check. This bug leads to the following Smatch static checker warning: drivers/infiniband/sw/siw/siw_cq.c:96 siw_reap_cqe() error: buffer overflow 'map_cqe_status' 10 <= 21 Fixes: bdf1da5df9da ("RDMA/siw: Fix immediate work request flush to completion queue") Link: https://lore.kernel.org/r/20221115170747.1263298-1-bmt@zurich.ibm.com Reported-by: Dan Carpenter Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index acc7bcd538b5..403029de6b92 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -88,9 +88,9 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) if (opcode >= SIW_NUM_OPCODES) { opcode = 0; - status = IB_WC_GENERAL_ERR; + status = SIW_WC_GENERAL_ERR; } else if (status >= SIW_NUM_WC_STATUS) { - status = IB_WC_GENERAL_ERR; + status = SIW_WC_GENERAL_ERR; } wc->opcode = map_wc_opcode[opcode]; wc->status = map_cqe_status[status].ib; From 2d08a893b87cf9b2f9dbb3afaff60ca4530d55a2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:17:07 +0000 Subject: [PATCH 1559/4122] x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include percpu.h to pick up the definition of DECLARE_PER_CPU() and friends instead of relying on the parent to provide the #include. E.g. swapping the order of includes in arch/x86/kvm/vmx/nested.c (simulating KVM code movement being done for other purposes) results in build errors: In file included from arch/x86/kvm/vmx/nested.c:3: arch/x86/include/asm/debugreg.h:9:32: error: unknown type name ‘cpu_dr7â€=99 9 | DECLARE_PER_CPU(unsigned long, cpu_dr7); | ^~~~~~~ Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110201707.1976032-1-seanjc@google.com --- arch/x86/include/asm/debugreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index cfdf307ddc01..b049d950612f 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -2,8 +2,8 @@ #ifndef _ASM_X86_DEBUGREG_H #define _ASM_X86_DEBUGREG_H - #include +#include #include DECLARE_PER_CPU(unsigned long, cpu_dr7); From 24c94060fc9b4e0f19e6e018869db46db21d6bc7 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Mon, 14 Nov 2022 15:29:43 -0500 Subject: [PATCH 1560/4122] gpiolib: ensure that fwnode is properly set Note that this is a RFC patch and not meant to be merged. I looked into a problem with linux-next-20221110 on the Qualcomm SA8540P automotive board (sc8280xp) where the UFS host controller would fail to probe due to repeated probe deferrals when trying to get reset-gpios via devm_gpiod_get_optional(). of_get_named_gpiod_flags() returns -EPROBE_DEFER, which is caused by of_gpiochip_match_node_and_xlate() returning 0 since the of_xlate function pointer is not set for the qcom,sc8280xp-tlmm pinctrl driver. The pinctrl driver doesn't define one, so of_gpiochip_add() should automatically setup of_gpio_simple_xlate() on it's behalf. This doesn't happen since the fwnode member on the struct gpiochip is set to null when of_gpiochip_add() is called. Let's work around this by ensuring that it's set if available. Note that this broke sometime within the last few weeks within linux-next and I haven't bisected this. I'm posting this in the hopes that someone may know offhand which patch(es) may have broken this. Signed-off-by: Brian Masney Tested-by: Marijn Suijten Tested-by: Konrad Dybcio Tested-by: Steev Klimaszewski #Lenovo Thinkpad X13s Tested-by: Neil Armstrong Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7936d54a2e30..51afdc6ac919 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -679,7 +679,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * Assign fwnode depending on the result of the previous calls, * if none of them succeed, assign it to the parent's one. */ - gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; + gc->fwnode = gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL); if (gdev->id < 0) { From 40059212f99c31f26c69763e560325e59eac02c6 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 15 Nov 2022 17:10:21 -0600 Subject: [PATCH 1561/4122] dt-bindings: gpio: gpio-davinci: Increase maxItems in gpio-line-names gpio-line-names really depends on ti,ngpios. However, the maximum value we have seen across the board is on K2G and da850 platforms where it can be upto 144. Link: https://lore.kernel.org/linux-arm-kernel/20221115200357.qa2rvw3clbz7unzq@symptom/T/#u Fixes: c830b87a761b ("dt-bindings: gpio: gpio-davinci: Convert to json-schema") Reported-by: Robert Nelson Signed-off-by: Nishanth Menon Acked-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/gpio/gpio-davinci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml index f32e09ef937c..10e56cf306db 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml @@ -35,7 +35,7 @@ properties: gpio-line-names: description: strings describing the names of each gpio line. minItems: 1 - maxItems: 100 + maxItems: 144 "#gpio-cells": const: 2 From f5bc4428cc6e096170d35fc79a640d90e33dd9c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:09 -0800 Subject: [PATCH 1562/4122] perf stat: Clear screen only if output file is a tty The --interval-clear option makes perf stat to clear the terminal at each interval. But it doesn't need to clear the screen when it saves to a file. Make it fail when it's enabled with the output options. $ perf stat -I 1 --interval-clear -o myfile true --interval-clear does not work with output Usage: perf stat [] [] -o, --output output file name --log-fd log output to fd, instead of stderr --interval-clear clear screen in between new interval Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d5e1670bca20..1d79801f4e84 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2403,6 +2403,14 @@ int cmd_stat(int argc, const char **argv) } } + if (stat_config.interval_clear && !isatty(fileno(output))) { + fprintf(stderr, "--interval-clear does not work with output\n"); + parse_options_usage(stat_usage, stat_options, "o", 1); + parse_options_usage(NULL, stat_options, "log-fd", 0); + parse_options_usage(NULL, stat_options, "interval-clear", 0); + return -1; + } + stat_config.output = output; /* From 31bf6aea997674a030be02999560d8e6f7a6b974 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:10 -0800 Subject: [PATCH 1563/4122] perf stat: Split print_running() function To make the code more obvious and hopefully simpler, factor out the code for each output mode - stdio, CSV, JSON. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 37 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2a3c1e0098b9..281b811f8574 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -25,24 +25,41 @@ #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" -static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) +static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { + if (run != ena) + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); +} +static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena) +{ double enabled_percent = 100; if (run != ena) enabled_percent = 100 * run / ena; + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, run, config->csv_sep, enabled_percent); +} + +static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena) +{ + double enabled_percent = 100; + + if (run != ena) + enabled_percent = 100 * run / ena; + fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", + run, enabled_percent); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) +{ if (config->json_output) - fprintf(config->output, - "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", - run, enabled_percent); + print_running_json(config, run, ena); else if (config->csv_output) - fprintf(config->output, - "%s%" PRIu64 "%s%.2f", config->csv_sep, - run, config->csv_sep, enabled_percent); - else if (run != ena) - fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + print_running_csv(config, run, ena); + else + print_running_std(config, run, ena); } static void print_noise_pct(struct perf_stat_config *config, From def99d60df6f21b4c36d23f0071ec8a73f39f28b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:11 -0800 Subject: [PATCH 1564/4122] perf stat: Split print_noise_pct() function Likewise, split print_noise_pct() for each output mode. Although it's a tiny function, more logic will be added soon so it'd be better split it and treat it in the same way. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 281b811f8574..a230f65efa62 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -62,17 +62,36 @@ static void print_running(struct perf_stat_config *config, print_running_std(config, run, ena); } +static void print_noise_pct_std(struct perf_stat_config *config, + double pct) +{ + if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise_pct_csv(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); +} + +static void print_noise_pct_json(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "\"variance\" : %.2f, ", pct); +} + static void print_noise_pct(struct perf_stat_config *config, double total, double avg) { double pct = rel_stddev_stats(total, avg); if (config->json_output) - fprintf(config->output, "\"variance\" : %.2f, ", pct); + print_noise_pct_json(config, pct); else if (config->csv_output) - fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); - else if (pct) - fprintf(config->output, " ( +-%6.2f%% )", pct); + print_noise_pct_csv(config, pct); + else + print_noise_pct_std(config, pct); } static void print_noise(struct perf_stat_config *config, From 41cb875242e71bf1bf5539a724bc65a0b470bb12 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:12 -0800 Subject: [PATCH 1565/4122] perf stat: Split print_cgroup() function Likewise, split print_cgroup() for each output mode. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a230f65efa62..af2a561eb20c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -106,15 +106,32 @@ static void print_noise(struct perf_stat_config *config, print_noise_pct(config, stddev_stats(&ps->res_stats), avg); } +static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, " %s", cgrp_name); +} + +static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); +} + +static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); +} + static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) { if (nr_cgroups) { const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; if (config->json_output) - fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); + print_cgroup_json(config, cgrp_name); + if (config->csv_output) + print_cgroup_csv(config, cgrp_name); else - fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + print_cgroup_std(config, cgrp_name); } } From 33b2e2c2ad68489bc0c97660ef1e6bf7df0c9d7d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:13 -0800 Subject: [PATCH 1566/4122] perf stat: Split aggr_printout() function The aggr_printout() function is to print aggr_id and count (nr). Split it for each output mode to simplify the code. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 226 ++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 102 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index af2a561eb20c..ed421f6d512f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -135,123 +135,41 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) } } - -static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id, int nr) +static void print_aggr_id_std(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) { - - - if (config->json_output && !config->interval) - fprintf(config->output, "{"); + FILE *output = config->output; switch (config->aggr_mode) { case AGGR_CORE: - if (config->json_output) { - fprintf(config->output, - "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - id.core, - nr); - } else { - fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -8, - id.core, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%d-C%*d %*d ", + id.socket, id.die, -8, id.core, 4, nr); break; case AGGR_DIE: - if (config->json_output) { - fprintf(config->output, - "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - nr); - } else { - fprintf(config->output, "S%d-D%*d%s%*d%s", - id.socket, - config->csv_output ? 0 : -8, - id.die, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%*d %*d ", + id.socket, -8, id.die, 4, nr); break; case AGGR_SOCKET: - if (config->json_output) { - fprintf(config->output, - "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", - id.socket, - nr); - } else { - fprintf(config->output, "S%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.socket, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%*d %*d ", + -5, id.socket, 4, nr); break; case AGGR_NODE: - if (config->json_output) { - fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", - id.node, - nr); - } else { - fprintf(config->output, "N%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.node, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "N%*d %*d ", + -5, id.node, 4, nr); break; case AGGR_NONE: - if (config->json_output) { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"", - id.socket, - id.die, - id.core); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "\"cpu\" : \"%d\", ", - id.cpu.cpu); - } - } else { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "S%d-D%d-C%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -3, - id.core, config->csv_sep); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -7, - id.cpu.cpu, config->csv_sep); - } + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "S%d-D%d-C%*d ", + id.socket, id.die, -3, id.core); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%*d ", + -7, id.cpu.cpu); } break; case AGGR_THREAD: - if (config->json_output) { - fprintf(config->output, "\"thread\" : \"%s-%d\", ", - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - perf_thread_map__pid(evsel->core.threads, id.thread_idx)); - } else { - fprintf(config->output, "%*s-%*d%s", - config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.thread_idx), - config->csv_sep); - } + fprintf(output, "%*s-%*d ", + 16, perf_thread_map__comm(evsel->core.threads, id.thread_idx), + -8, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); break; case AGGR_GLOBAL: case AGGR_UNSET: @@ -261,6 +179,110 @@ static void aggr_printout(struct perf_stat_config *config, } } +static void print_aggr_id_csv(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + const char *sep = config->csv_sep; + + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(output, "S%d-D%d-C%d%s%d%s", + id.socket, id.die, id.core, sep, nr, sep); + break; + case AGGR_DIE: + fprintf(output, "S%d-D%d%s%d%s", + id.socket, id.die, sep, nr, sep); + break; + case AGGR_SOCKET: + fprintf(output, "S%d%s%d%s", + id.socket, sep, nr, sep); + break; + case AGGR_NODE: + fprintf(output, "N%d%s%d%s", + id.node, sep, nr, sep); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "S%d-D%d-C%d%s", + id.socket, id.die, id.core, sep); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%d%s", + id.cpu.cpu, sep); + } + break; + case AGGR_THREAD: + fprintf(output, "%s-%d%s", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx), + sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + break; + } +} + +static void print_aggr_id_json(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + + if (!config->interval) + fputc('{', output); + + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, id.core, nr); + break; + case AGGR_DIE: + fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, nr); + break; + case AGGR_SOCKET: + fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + id.socket, nr); + break; + case AGGR_NODE: + fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + id.node, nr); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "\"core\" : \"S%d-D%d-C%d\"", + id.socket, id.die, id.core); + } else if (id.cpu.cpu > -1) { + fprintf(output, "\"cpu\" : \"%d\", ", + id.cpu.cpu); + } + break; + case AGGR_THREAD: + fprintf(output, "\"thread\" : \"%s-%d\", ", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + break; + } +} + +static void aggr_printout(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + if (config->json_output) + print_aggr_id_json(config, evsel, id, nr); + else if (config->csv_output) + print_aggr_id_csv(config, evsel, id, nr); + else + print_aggr_id_std(config, evsel, id, nr); +} + struct outstate { FILE *fh; bool newline; From c2019f844eacd06decc435bdcee9eab1e1a6f4ec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:14 -0800 Subject: [PATCH 1567/4122] perf stat: Factor out print_counter_value() function And split it for each output mode like others. I believe it makes the code simpler and more intuitive. Now abs_printout() becomes just to call sub-functions. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 91 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 33 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ed421f6d512f..a72c7442ff3d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -517,46 +517,71 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static void abs_printout(struct perf_stat_config *config, - struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) +static void print_counter_value_std(struct perf_stat_config *config, + struct evsel *evsel, double avg) { FILE *output = config->output; double sc = evsel->scale; const char *fmt; - if (config->csv_output) { - fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; - } else { - if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; - } + if (config->big_num) + fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; + else + fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; + fprintf(output, fmt, avg); + + if (evsel->unit) + fprintf(output, "%-*s ", config->unit_width, evsel->unit); + + fprintf(output, "%-*s", 32, evsel__name(evsel)); +} + +static void print_counter_value_csv(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ + FILE *output = config->output; + double sc = evsel->scale; + const char *sep = config->csv_sep; + const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + + fprintf(output, fmt, avg, sep); + + if (evsel->unit) + fprintf(output, "%s%s", evsel->unit, sep); + + fprintf(output, "%s", evsel__name(evsel)); +} + +static void print_counter_value_json(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ + FILE *output = config->output; + + fprintf(output, "\"counter-value\" : \"%f\", ", avg); + + if (evsel->unit) + fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); + + fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); +} + +static void print_counter_value(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ + if (config->json_output) + print_counter_value_json(config, evsel, avg); + else if (config->csv_output) + print_counter_value_csv(config, evsel, avg); + else + print_counter_value_std(config, evsel, avg); +} + +static void abs_printout(struct perf_stat_config *config, + struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) +{ aggr_printout(config, evsel, id, nr); - - if (config->json_output) - fprintf(output, "\"counter-value\" : \"%f\", ", avg); - else - fprintf(output, fmt, avg, config->csv_sep); - - if (config->json_output) { - if (evsel->unit) { - fprintf(output, "\"unit\" : \"%s\", ", - evsel->unit); - } - } else { - if (evsel->unit) - fprintf(output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - evsel->unit, config->csv_sep); - } - - if (config->json_output) - fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); - else - fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel)); - + print_counter_value(config, evsel, avg); print_cgroup(config, evsel); } From d6aeb861b1fb26f55c1b5be4b6e900ad1e734516 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:15 -0800 Subject: [PATCH 1568/4122] perf stat: Handle bad events in abs_printout() In the printout() function, it checks if the event is bad (i.e. not counted or not supported) and print the result. But it does the same what abs_printout() is doing. So add an argument to indicate the value is ok or not and use the same function in both cases. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 68 ++++++++++++++-------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a72c7442ff3d..fe5483893289 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -518,18 +518,22 @@ static void print_metric_header(struct perf_stat_config *config, } static void print_counter_value_std(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; double sc = evsel->scale; const char *fmt; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (config->big_num) fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; else fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; - fprintf(output, fmt, avg); + if (ok) + fprintf(output, fmt, avg); + else + fprintf(output, "%18s ", bad_count); if (evsel->unit) fprintf(output, "%-*s ", config->unit_width, evsel->unit); @@ -538,14 +542,18 @@ static void print_counter_value_std(struct perf_stat_config *config, } static void print_counter_value_csv(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; double sc = evsel->scale; const char *sep = config->csv_sep; const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - fprintf(output, fmt, avg, sep); + if (ok) + fprintf(output, fmt, avg, sep); + else + fprintf(output, "%s%s", bad_count, sep); if (evsel->unit) fprintf(output, "%s%s", evsel->unit, sep); @@ -554,11 +562,15 @@ static void print_counter_value_csv(struct perf_stat_config *config, } static void print_counter_value_json(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - fprintf(output, "\"counter-value\" : \"%f\", ", avg); + if (ok) + fprintf(output, "\"counter-value\" : \"%f\", ", avg); + else + fprintf(output, "\"counter-value\" : \"%s\", ", bad_count); if (evsel->unit) fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); @@ -567,21 +579,22 @@ static void print_counter_value_json(struct perf_stat_config *config, } static void print_counter_value(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { if (config->json_output) - print_counter_value_json(config, evsel, avg); + print_counter_value_json(config, evsel, avg, ok); else if (config->csv_output) - print_counter_value_csv(config, evsel, avg); + print_counter_value_csv(config, evsel, avg, ok); else - print_counter_value_std(config, evsel, avg); + print_counter_value_std(config, evsel, avg, ok); } static void abs_printout(struct perf_stat_config *config, - struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) + struct aggr_cpu_id id, int nr, + struct evsel *evsel, double avg, bool ok) { aggr_printout(config, evsel, id, nr); - print_counter_value(config, evsel, avg); + print_counter_value(config, evsel, avg, ok); print_cgroup(config, evsel); } @@ -658,17 +671,8 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm(config, &os, NULL, "", "", 0); return; } - aggr_printout(config, counter, id, nr); - if (config->json_output) { - fprintf(config->output, "\"counter-value\" : \"%s\", ", - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED); - } else { - fprintf(config->output, "%*s%s", - config->csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - config->csv_sep); - } + abs_printout(config, id, nr, counter, uval, /*ok=*/false); if (counter->supported) { if (!evlist__has_hybrid(counter->evlist)) { @@ -678,24 +682,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } } - if (config->json_output) { - fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit); - } else { - fprintf(config->output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - counter->unit, config->csv_sep); - } - - if (config->json_output) { - fprintf(config->output, "\"event\" : \"%s\", ", - evsel__name(counter)); - } else { - fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, evsel__name(counter)); - } - - print_cgroup(config, counter); - if (!config->csv_output && !config->json_output) pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); @@ -706,7 +692,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } if (!config->metric_only) - abs_printout(config, id, nr, counter, uval); + abs_printout(config, id, nr, counter, uval, /*ok=*/true); out.print_metric = pm; out.new_line = nl; From df46a3c92b510b7776fed14728a271f8836ef19b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:16 -0800 Subject: [PATCH 1569/4122] perf stat: Add before_metric argument Unfortunately, event running time, percentage and noise data are printed in different positions in normal output than CSV/JSON. I think it's better to put such details in where it actually prints. So add before_metric argument to print_noise() and print_running() and call them twice before and after the metric. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 82 +++++++++++++++++----------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index fe5483893289..bf3f2f9d5dee 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -52,14 +52,18 @@ static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena } static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) + u64 run, u64 ena, bool before_metric) { - if (config->json_output) - print_running_json(config, run, ena); - else if (config->csv_output) - print_running_csv(config, run, ena); - else - print_running_std(config, run, ena); + if (config->json_output) { + if (before_metric) + print_running_json(config, run, ena); + } else if (config->csv_output) { + if (before_metric) + print_running_csv(config, run, ena); + } else { + if (!before_metric) + print_running_std(config, run, ena); + } } static void print_noise_pct_std(struct perf_stat_config *config, @@ -82,20 +86,24 @@ static void print_noise_pct_json(struct perf_stat_config *config, } static void print_noise_pct(struct perf_stat_config *config, - double total, double avg) + double total, double avg, bool before_metric) { double pct = rel_stddev_stats(total, avg); - if (config->json_output) - print_noise_pct_json(config, pct); - else if (config->csv_output) - print_noise_pct_csv(config, pct); - else - print_noise_pct_std(config, pct); + if (config->json_output) { + if (before_metric) + print_noise_pct_json(config, pct); + } else if (config->csv_output) { + if (before_metric) + print_noise_pct_csv(config, pct); + } else { + if (!before_metric) + print_noise_pct_std(config, pct); + } } static void print_noise(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool before_metric) { struct perf_stat_evsel *ps; @@ -103,7 +111,7 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats), avg); + print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric); } static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) @@ -637,6 +645,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int }; print_metric_t pm; new_line_t nl; + bool ok = true; if (config->csv_output) { static const int aggr_fields[AGGR_MAX] = { @@ -672,7 +681,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int return; } - abs_printout(config, id, nr, counter, uval, /*ok=*/false); + ok = false; if (counter->supported) { if (!evlist__has_hybrid(counter->evlist)) { @@ -681,37 +690,30 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int config->print_mixed_hw_group_error = 1; } } - - if (!config->csv_output && !config->json_output) - pm(config, &os, NULL, NULL, "", 0); - print_noise(config, counter, noise); - print_running(config, run, ena); - if (config->csv_output || config->json_output) - pm(config, &os, NULL, NULL, "", 0); - return; } - if (!config->metric_only) - abs_printout(config, id, nr, counter, uval, /*ok=*/true); - out.print_metric = pm; out.new_line = nl; out.ctx = &os; out.force_header = false; - if (config->csv_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } else if (config->json_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); + if (!config->metric_only) { + abs_printout(config, id, nr, counter, uval, ok); + + print_noise(config, counter, noise, /*before_metric=*/true); + print_running(config, run, ena, /*before_metric=*/true); } - perf_stat__print_shadow_stats(config, counter, uval, map_idx, - &out, &config->metric_events, st); - if (!config->csv_output && !config->metric_only && !config->json_output) { - print_noise(config, counter, noise); - print_running(config, run, ena); + if (ok) { + perf_stat__print_shadow_stats(config, counter, uval, map_idx, + &out, &config->metric_events, st); + } else { + pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + } + + if (!config->metric_only) { + print_noise(config, counter, noise, /*before_metric=*/false); + print_running(config, run, ena, /*before_metric=*/false); } } @@ -1151,7 +1153,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(config, sd, avg); + print_noise_pct(config, sd, avg, /*before_metric=*/false); } fprintf(output, "\n\n"); From 8d500292bd55c05130c96f6e84cc3e6ba3ebed99 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:17 -0800 Subject: [PATCH 1570/4122] perf stat: Align cgroup names We don't know how long cgroup name is, but at least we can align short ones like below. $ perf stat -a --for-each-cgroup system.slice,user.slice true Performance counter stats for 'system wide': 0.13 msec cpu-clock system.slice # 0.010 CPUs utilized 4 context-switches system.slice # 31.989 K/sec 1 cpu-migrations system.slice # 7.997 K/sec 0 page-faults system.slice # 0.000 /sec 450,673 cycles system.slice # 3.604 GHz (92.41%) 161,216 instructions system.slice # 0.36 insn per cycle (92.41%) 32,678 branches system.slice # 261.332 M/sec (92.41%) 2,628 branch-misses system.slice # 8.04% of all branches (92.41%) 14.29 msec cpu-clock user.slice # 1.163 CPUs utilized 35 context-switches user.slice # 2.449 K/sec 12 cpu-migrations user.slice # 839.691 /sec 57 page-faults user.slice # 3.989 K/sec 49,683,026 cycles user.slice # 3.477 GHz (99.38%) 110,790,266 instructions user.slice # 2.23 insn per cycle (99.38%) 24,552,255 branches user.slice # 1.718 G/sec (99.38%) 127,779 branch-misses user.slice # 0.52% of all branches (99.38%) 0.012289431 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bf3f2f9d5dee..e66f766a3d78 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -116,7 +116,7 @@ static void print_noise(struct perf_stat_config *config, static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) { - fprintf(config->output, " %s", cgrp_name); + fprintf(config->output, " %-16s", cgrp_name); } static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) From b2d9832e00a0d93ad6127fa8ccf2b8e0cfe67397 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:18 -0800 Subject: [PATCH 1571/4122] perf stat: Split print_metric_headers() function The print_metric_headers() shows metric headers a little bit for each mode. Split it out to make the code clearer. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 52 ++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e66f766a3d78..bb2791459f5f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -924,6 +924,37 @@ static const char *aggr_header_csv[] = { [AGGR_GLOBAL] = "" }; +static void print_metric_headers_std(struct perf_stat_config *config, + const char *prefix, bool no_indent) +{ + if (prefix) + fprintf(config->output, "%s", prefix); + if (!no_indent) { + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); + } +} + +static void print_metric_headers_csv(struct perf_stat_config *config, + const char *prefix, + bool no_indent __maybe_unused) +{ + if (prefix) + fprintf(config->output, "%s", prefix); + if (config->interval) + fputs("time,", config->output); + if (!config->iostat_run) + fputs(aggr_header_csv[config->aggr_mode], config->output); +} + +static void print_metric_headers_json(struct perf_stat_config *config, + const char *prefix __maybe_unused, + bool no_indent __maybe_unused) +{ + if (config->interval) + fputs("{\"unit\" : \"sec\"}", config->output); +} + static void print_metric_headers(struct perf_stat_config *config, struct evlist *evlist, const char *prefix, bool no_indent) @@ -939,22 +970,13 @@ static void print_metric_headers(struct perf_stat_config *config, .force_header = true, }; - if (prefix && !config->json_output) - fprintf(config->output, "%s", prefix); + if (config->json_output) + print_metric_headers_json(config, prefix, no_indent); + else if (config->csv_output) + print_metric_headers_csv(config, prefix, no_indent); + else + print_metric_headers_std(config, prefix, no_indent); - if (!config->csv_output && !config->json_output && !no_indent) - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); - if (config->csv_output) { - if (config->interval) - fputs("time,", config->output); - if (!config->iostat_run) - fputs(aggr_header_csv[config->aggr_mode], config->output); - } - if (config->json_output) { - if (config->interval) - fputs("{\"unit\" : \"sec\"}", config->output); - } if (config->iostat_run) iostat_print_header_prefix(config); From 208cbcd21bf57c80ceaf90fcec16cfdfb55a0a4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:19 -0800 Subject: [PATCH 1572/4122] perf stat: Factor out prepare_interval() This logic does not print the time directly, but it just puts the timestamp in the buffer as a prefix. To reduce the confusion, factor out the code into a separate function. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 39 +++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bb2791459f5f..c234be656db9 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -993,9 +993,25 @@ static void print_metric_headers(struct perf_stat_config *config, fputc('\n', config->output); } +static void prepare_interval(struct perf_stat_config *config, + char *prefix, struct timespec *ts) +{ + if (config->iostat_run) + return; + + if (!config->json_output) + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, + ts->tv_nsec, config->csv_sep); + else if (!config->metric_only) + sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) + ts->tv_sec, ts->tv_nsec); + else + sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) + ts->tv_sec, ts->tv_nsec); +} + static void print_interval(struct perf_stat_config *config, - struct evlist *evlist, - char *prefix, struct timespec *ts) + struct evlist *evlist) { bool metric_only = config->metric_only; unsigned int unit_width = config->unit_width; @@ -1005,16 +1021,6 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear && isatty(fileno(output))) puts(CONSOLE_CLEAR); - if (!config->iostat_run && !config->json_output) - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, - ts->tv_nsec, config->csv_sep); - if (!config->iostat_run && config->json_output && !config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) - ts->tv_sec, ts->tv_nsec); - if (!config->iostat_run && config->json_output && config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) - ts->tv_sec, ts->tv_nsec); - if ((num_print_interval == 0 || config->interval_clear) && !config->csv_output && !config->json_output) { switch (config->aggr_mode) { @@ -1252,10 +1258,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (config->iostat_run) evlist->selected = evlist__first(evlist); - if (interval) - print_interval(config, evlist, prefix = buf, ts); - else + if (interval) { + prefix = buf; + prepare_interval(config, prefix, ts); + print_interval(config, evlist); + } else { print_header(config, _target, argc, argv); + } if (metric_only) { static int num_print_iv; From 33c4ed47990f3eaaa732d3fcc2760e6165c16c40 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:20 -0800 Subject: [PATCH 1573/4122] perf stat: Cleanup interval print alignment Instead of using magic values, define symbolic constants and use them. Also add aggr_header_std[] array to simplify aggr_mode handling. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 165 ++++++++++++++++++--------------- 1 file changed, 91 insertions(+), 74 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index c234be656db9..f983432aaddd 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -25,6 +25,45 @@ #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" +#define METRIC_LEN 38 +#define EVNAME_LEN 32 +#define COUNTS_LEN 18 +#define INTERVAL_LEN 16 +#define CGROUP_LEN 16 +#define COMM_LEN 16 +#define PID_LEN 7 +#define CPUS_LEN 4 + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_DIE] = 12, + [AGGR_SOCKET] = 6, + [AGGR_NODE] = 6, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 16, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_DIE] = "die,cpus,", + [AGGR_SOCKET] = "socket,cpus,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" +}; + +static const char *aggr_header_std[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_SOCKET] = "socket", + [AGGR_NONE] = "cpu", + [AGGR_THREAD] = "comm-pid", + [AGGR_NODE] = "node", + [AGGR_GLOBAL] = "" +}; + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -116,7 +155,7 @@ static void print_noise(struct perf_stat_config *config, static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) { - fprintf(config->output, " %-16s", cgrp_name); + fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name); } static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) @@ -147,44 +186,46 @@ static void print_aggr_id_std(struct perf_stat_config *config, struct evsel *evsel, struct aggr_cpu_id id, int nr) { FILE *output = config->output; + int idx = config->aggr_mode; + char buf[128]; switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "S%d-D%d-C%*d %*d ", - id.socket, id.die, -8, id.core, 4, nr); + snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core); break; case AGGR_DIE: - fprintf(output, "S%d-D%*d %*d ", - id.socket, -8, id.die, 4, nr); + snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); break; case AGGR_SOCKET: - fprintf(output, "S%*d %*d ", - -5, id.socket, 4, nr); + snprintf(buf, sizeof(buf), "S%d", id.socket); break; case AGGR_NODE: - fprintf(output, "N%*d %*d ", - -5, id.node, 4, nr); + snprintf(buf, sizeof(buf), "N%d", id.node); break; case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { - fprintf(output, "S%d-D%d-C%*d ", - id.socket, id.die, -3, id.core); + snprintf(buf, sizeof(buf), "S%d-D%d-C%d ", + id.socket, id.die, id.core); + fprintf(output, "%-*s ", + aggr_header_lens[AGGR_CORE], buf); } else if (id.cpu.cpu > -1) { - fprintf(output, "CPU%*d ", - -7, id.cpu.cpu); + fprintf(output, "CPU%-*d ", + aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu); } - break; + return; case AGGR_THREAD: - fprintf(output, "%*s-%*d ", - 16, perf_thread_map__comm(evsel->core.threads, id.thread_idx), - -8, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); - break; + fprintf(output, "%*s-%-*d ", + COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx), + PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + return; case AGGR_GLOBAL: case AGGR_UNSET: case AGGR_MAX: default: - break; + return; } + + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, nr); } static void print_aggr_id_csv(struct perf_stat_config *config, @@ -301,8 +342,6 @@ struct outstate { struct evsel *evsel; }; -#define METRIC_LEN 38 - static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) { @@ -534,19 +573,19 @@ static void print_counter_value_std(struct perf_stat_config *config, const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; + fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f "; else - fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; + fmt = floor(sc) != sc ? "%*.2f " : "%*.0f "; if (ok) - fprintf(output, fmt, avg); + fprintf(output, fmt, COUNTS_LEN, avg); else - fprintf(output, "%18s ", bad_count); + fprintf(output, "%*s ", COUNTS_LEN, bad_count); if (evsel->unit) fprintf(output, "%-*s ", config->unit_width, evsel->unit); - fprintf(output, "%-*s", 32, evsel__name(evsel)); + fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel)); } static void print_counter_value_csv(struct perf_stat_config *config, @@ -904,34 +943,19 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } } -static int aggr_header_lens[] = { - [AGGR_CORE] = 24, - [AGGR_DIE] = 18, - [AGGR_SOCKET] = 12, - [AGGR_NONE] = 6, - [AGGR_THREAD] = 24, - [AGGR_NODE] = 6, - [AGGR_GLOBAL] = 0, -}; - -static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_DIE] = "die,cpus,", - [AGGR_SOCKET] = "socket,cpus,", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" -}; - static void print_metric_headers_std(struct perf_stat_config *config, const char *prefix, bool no_indent) { if (prefix) fprintf(config->output, "%s", prefix); + if (!no_indent) { - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); + int len = aggr_header_lens[config->aggr_mode]; + + if (nr_cgroups) + len += CGROUP_LEN + 1; + + fprintf(config->output, "%*s", len, ""); } } @@ -1025,46 +1049,39 @@ static void print_interval(struct perf_stat_config *config, !config->csv_output && !config->json_output) { switch (config->aggr_mode) { case AGGR_NODE: - fprintf(output, "# time node cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_SOCKET: - fprintf(output, "# time socket cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_DIE: - fprintf(output, "# time die cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_CORE: - fprintf(output, "# time core cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %-*s cpus", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); break; case AGGR_NONE: - fprintf(output, "# time CPU "); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %-*s", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); break; case AGGR_THREAD: - fprintf(output, "# time comm-pid"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %*s-%-*s", + INTERVAL_LEN - 1, "time", + COMM_LEN, "comm", PID_LEN, "pid"); break; case AGGR_GLOBAL: default: - if (!config->iostat_run) { - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - } + if (!config->iostat_run) + fprintf(output, "#%*s", + INTERVAL_LEN - 1, "time"); case AGGR_UNSET: case AGGR_MAX: break; } + + if (!metric_only) { + fprintf(output, " %*s %*s events\n", + COUNTS_LEN, "counts", unit_width, "unit"); + } } if ((num_print_interval == 0 || config->interval_clear) && metric_only) From 6108712c07422f8ececb163ace7fad2ae0e9a24c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:21 -0800 Subject: [PATCH 1574/4122] perf stat: Remove impossible condition The print would run only if metric_only is not set, but it's already in a block that says it's in metric_only case. And there's no place to change the setting. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f983432aaddd..cc8bb6d07dcb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1292,9 +1292,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) fprintf(config->output, "%s", prefix); - - if (config->json_output && !config->metric_only) - fprintf(config->output, "}"); } switch (config->aggr_mode) { From 4c86b664f4cce6940c187ecfc019f434f8dbd4bc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:22 -0800 Subject: [PATCH 1575/4122] perf stat: Rework header display There are print_header() and print_interval() to print header lines before actual counter values. Also print_metric_headers() needs to be called for the metric-only case. Let's move all these logics to a single place including num_print_iv to refresh the headers for interval mode. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 189 +++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 81 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cc8bb6d07dcb..f97817628478 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1034,94 +1034,129 @@ static void prepare_interval(struct perf_stat_config *config, ts->tv_sec, ts->tv_nsec); } -static void print_interval(struct perf_stat_config *config, - struct evlist *evlist) +static void print_header_interval_std(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) { - bool metric_only = config->metric_only; - unsigned int unit_width = config->unit_width; FILE *output = config->output; - static int num_print_interval; - if (config->interval_clear && isatty(fileno(output))) - puts(CONSOLE_CLEAR); - - if ((num_print_interval == 0 || config->interval_clear) && - !config->csv_output && !config->json_output) { - switch (config->aggr_mode) { - case AGGR_NODE: - case AGGR_SOCKET: - case AGGR_DIE: - case AGGR_CORE: - fprintf(output, "#%*s %-*s cpus", - INTERVAL_LEN - 1, "time", - aggr_header_lens[config->aggr_mode], - aggr_header_std[config->aggr_mode]); - break; - case AGGR_NONE: - fprintf(output, "#%*s %-*s", - INTERVAL_LEN - 1, "time", - aggr_header_lens[config->aggr_mode], - aggr_header_std[config->aggr_mode]); - break; - case AGGR_THREAD: - fprintf(output, "#%*s %*s-%-*s", - INTERVAL_LEN - 1, "time", - COMM_LEN, "comm", PID_LEN, "pid"); - break; - case AGGR_GLOBAL: - default: - if (!config->iostat_run) - fprintf(output, "#%*s", - INTERVAL_LEN - 1, "time"); - case AGGR_UNSET: - case AGGR_MAX: - break; - } - - if (!metric_only) { - fprintf(output, " %*s %*s events\n", - COUNTS_LEN, "counts", unit_width, "unit"); - } + switch (config->aggr_mode) { + case AGGR_NODE: + case AGGR_SOCKET: + case AGGR_DIE: + case AGGR_CORE: + fprintf(output, "#%*s %-*s cpus", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_NONE: + fprintf(output, "#%*s %-*s", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_THREAD: + fprintf(output, "#%*s %*s-%-*s", + INTERVAL_LEN - 1, "time", + COMM_LEN, "comm", PID_LEN, "pid"); + break; + case AGGR_GLOBAL: + default: + if (!config->iostat_run) + fprintf(output, "#%*s", + INTERVAL_LEN - 1, "time"); + case AGGR_UNSET: + case AGGR_MAX: + break; } - if ((num_print_interval == 0 || config->interval_clear) && metric_only) + if (config->metric_only) print_metric_headers(config, evlist, " ", true); - if (++num_print_interval == 25) - num_print_interval = 0; + else + fprintf(output, " %*s %*s events\n", + COUNTS_LEN, "counts", config->unit_width, "unit"); } -static void print_header(struct perf_stat_config *config, - struct target *_target, - int argc, const char **argv) +static void print_header_std(struct perf_stat_config *config, + struct target *_target, struct evlist *evlist, + int argc, const char **argv) { FILE *output = config->output; int i; + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->bpf_str) + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); + else if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); + + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + + if (config->metric_only) + print_metric_headers(config, evlist, " ", false); +} + +static void print_header_csv(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) + print_metric_headers(config, evlist, " ", true); +} +static void print_header_json(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) + print_metric_headers(config, evlist, " ", true); +} + +static void print_header(struct perf_stat_config *config, + struct target *_target, + struct evlist *evlist, + int argc, const char **argv) +{ + static int num_print_iv; + fflush(stdout); - if (!config->csv_output && !config->json_output) { - fprintf(output, "\n"); - fprintf(output, " Performance counter stats for "); - if (_target->bpf_str) - fprintf(output, "\'BPF program(s) %s", _target->bpf_str); - else if (_target->system_wide) - fprintf(output, "\'system wide"); - else if (_target->cpu_list) - fprintf(output, "\'CPU(s) %s", _target->cpu_list); - else if (!target__has_task(_target)) { - fprintf(output, "\'%s", argv ? argv[0] : "pipe"); - for (i = 1; argv && (i < argc); i++) - fprintf(output, " %s", argv[i]); - } else if (_target->pid) - fprintf(output, "process id \'%s", _target->pid); - else - fprintf(output, "thread id \'%s", _target->tid); + if (config->interval_clear) + puts(CONSOLE_CLEAR); - fprintf(output, "\'"); - if (config->run_count > 1) - fprintf(output, " (%d runs)", config->run_count); - fprintf(output, ":\n\n"); + if (num_print_iv == 0 || config->interval_clear) { + if (config->json_output) + print_header_json(config, _target, evlist, argc, argv); + else if (config->csv_output) + print_header_csv(config, _target, evlist, argc, argv); + else if (config->interval) + print_header_interval_std(config, _target, evlist, argc, argv); + else + print_header_std(config, _target, evlist, argc, argv); } + + if (num_print_iv++ == 25) + num_print_iv = 0; } static int get_precision(double num) @@ -1278,18 +1313,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (interval) { prefix = buf; prepare_interval(config, prefix, ts); - print_interval(config, evlist); - } else { - print_header(config, _target, argc, argv); } + print_header(config, _target, evlist, argc, argv); if (metric_only) { - static int num_print_iv; - - if (num_print_iv == 0 && !interval) - print_metric_headers(config, evlist, prefix, false); - if (num_print_iv++ == 25) - num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) fprintf(config->output, "%s", prefix); } From 453279d5739f12cf27b604c84c061a8a7a3937da Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:23 -0800 Subject: [PATCH 1576/4122] perf stat: Move condition to print_footer() Likewise, I think it'd better to have the control inside the function, and keep the higher level function clearer. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f97817628478..73cf898060c0 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1205,6 +1205,9 @@ static void print_footer(struct perf_stat_config *config) double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; + if (config->interval || config->csv_output || config->json_output) + return; + if (!config->null_run) fprintf(output, "\n"); @@ -1359,8 +1362,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf break; } - if (!interval && !config->csv_output && !config->json_output) - print_footer(config); + print_footer(config); fflush(config->output); } From 2cf38236d94605586da0ed0f9ec432bf0eb415c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:24 -0800 Subject: [PATCH 1577/4122] perf stat: Factor out prefix display The prefix is needed for interval mode to print timestamp at the beginning of each line. But the it's tricky for the metric only mode since it doesn't print every evsel and combines the metrics into a single line. So it needed to pass 'first' argument to print_counter_aggrdata() to determine if the current event is being printed at first. This makes the code hard to read. Let's move the logic out of the function and do it in the outer print loop. This would enable further cleanups later. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-17-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 43 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 73cf898060c0..bb40ed29300d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -805,8 +805,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix, bool metric_only, - bool *first) + char *prefix, bool metric_only) { FILE *output = config->output; u64 ena, run, val; @@ -825,10 +824,6 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - if (*first && metric_only) { - *first = false; - aggr_printout(config, counter, id, aggr->nr); - } if (prefix && !metric_only) fprintf(output, "%s", prefix); @@ -849,7 +844,6 @@ static void print_aggr(struct perf_stat_config *config, FILE *output = config->output; struct evsel *counter; int s; - bool first; if (!config->aggr_map || !config->aggr_get_id) return; @@ -860,21 +854,23 @@ static void print_aggr(struct perf_stat_config *config, */ for (s = 0; s < config->aggr_map->nr; s++) { if (metric_only) { + struct perf_stat_aggr *aggr; + struct aggr_cpu_id id = config->aggr_map->map[s]; + if (prefix) fprintf(output, "%s", prefix); - else if (config->summary && !config->no_csv_summary && - config->csv_output && !config->interval) - fprintf(output, "%16s%s", "summary", config->csv_sep); + + counter = evlist__first(evlist); + aggr = &counter->stats->aggr[s]; + aggr_printout(config, counter, id, aggr->nr); } - first = true; evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first); + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } if (metric_only) fputc('\n', output); @@ -885,7 +881,6 @@ static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { bool metric_only = config->metric_only; - bool first = false; int s; /* AGGR_THREAD doesn't have config->aggr_get_id */ @@ -896,9 +891,8 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first); + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } } @@ -1260,7 +1254,6 @@ static void print_percore(struct perf_stat_config *config, FILE *output = config->output; struct cpu_aggr_map *core_map; int s, c, i; - bool first = true; if (!config->aggr_map || !config->aggr_get_id) return; @@ -1288,11 +1281,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - if (prefix && metric_only) - fprintf(output, "%s", prefix); - - print_counter_aggrdata(config, counter, s, - prefix, metric_only, &first); + print_counter_aggrdata(config, counter, s, prefix, metric_only); core_map->map[c++] = core_id; } @@ -1319,10 +1308,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf } print_header(config, _target, evlist, argc, argv); - if (metric_only) { - if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) - fprintf(config->output, "%s", prefix); - } switch (config->aggr_mode) { case AGGR_CORE: @@ -1337,6 +1322,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); else { + if (prefix && metric_only) + fprintf(config->output, "%s", prefix); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } From 78670daefd33c8e0a30b121a2a271a90bca57130 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:25 -0800 Subject: [PATCH 1578/4122] perf stat: Factor out print_metric_{begin,end}() For the metric-only case, add new functions to handle the start and the end of each metric display. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-18-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 56 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bb40ed29300d..7a0673be720b 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -836,12 +836,39 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fputc('\n', output); } +static void print_metric_begin(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix, int aggr_idx) +{ + struct perf_stat_aggr *aggr; + struct aggr_cpu_id id; + struct evsel *evsel; + + if (!config->metric_only) + return; + + if (prefix) + fprintf(config->output, "%s", prefix); + + evsel = evlist__first(evlist); + id = config->aggr_map->map[aggr_idx]; + aggr = &evsel->stats->aggr[aggr_idx]; + aggr_printout(config, evsel, id, aggr->nr); +} + +static void print_metric_end(struct perf_stat_config *config) +{ + if (!config->metric_only) + return; + + fputc('\n', config->output); +} + static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { bool metric_only = config->metric_only; - FILE *output = config->output; struct evsel *counter; int s; @@ -853,17 +880,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - if (metric_only) { - struct perf_stat_aggr *aggr; - struct aggr_cpu_id id = config->aggr_map->map[s]; - - if (prefix) - fprintf(output, "%s", prefix); - - counter = evlist__first(evlist); - aggr = &counter->stats->aggr[s]; - aggr_printout(config, counter, id, aggr->nr); - } + print_metric_begin(config, evlist, prefix, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -872,8 +889,7 @@ static void print_aggr(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, prefix, metric_only); } - if (metric_only) - fputc('\n', output); + print_metric_end(config); } } @@ -919,9 +935,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - if (prefix) - fputs(prefix, config->output); - aggr_printout(config, counter, id, 0); + print_metric_begin(config, evlist, prefix, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -933,7 +947,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run, ena, 1.0, &rt_stat, counter_idx); } if (!first) - fputc('\n', config->output); + print_metric_end(config); } } @@ -1322,13 +1336,11 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); else { - if (prefix && metric_only) - fprintf(config->output, "%s", prefix); + print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } - if (metric_only) - fputc('\n', config->output); + print_metric_end(config); } break; case AGGR_NONE: From 67f8b7eb4e9195d037b60f1614880234aca15fee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:26 -0800 Subject: [PATCH 1579/4122] perf stat: Support --for-each-cgroup and --metric-only When we have events for each cgroup, the metric should be printed for each cgroup separately. Add print_cgroup_counter() to handle that situation properly. Also change print_metric_headers() not to print duplicate headers by checking cgroups. $ perf stat -a --for-each-cgroup system.slice,user.slice --metric-only sleep 1 Performance counter stats for 'system wide': GHz insn per cycle branch-misses of all branches system.slice 3.792 0.61 3.24% user.slice 3.661 2.32 0.37% 1.016111516 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-19-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 58 +++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7a0673be720b..cf25ed99b5df 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -168,10 +168,10 @@ static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_ fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); } -static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) +static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) { - if (nr_cgroups) { - const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + if (nr_cgroups || config->cgroup_list) { + const char *cgrp_name = cgrp ? cgrp->name : ""; if (config->json_output) print_cgroup_json(config, cgrp_name); @@ -340,6 +340,7 @@ struct outstate { int nr; struct aggr_cpu_id id; struct evsel *evsel; + struct cgroup *cgrp; }; static void new_line_std(struct perf_stat_config *config __maybe_unused, @@ -552,6 +553,9 @@ static void print_metric_header(struct perf_stat_config *config, os->evsel->priv != os->evsel->evlist->selected->priv) return; + if (os->evsel->cgrp != os->cgrp) + return; + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); @@ -642,7 +646,7 @@ static void abs_printout(struct perf_stat_config *config, { aggr_printout(config, evsel, id, nr); print_counter_value(config, evsel, avg, ok); - print_cgroup(config, evsel); + print_cgroup(config, evsel->cgrp); } static bool is_mixed_hw_group(struct evsel *counter) @@ -838,7 +842,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - char *prefix, int aggr_idx) + char *prefix, int aggr_idx, + struct cgroup *cgrp) { struct perf_stat_aggr *aggr; struct aggr_cpu_id id; @@ -854,6 +859,8 @@ static void print_metric_begin(struct perf_stat_config *config, id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); + + print_cgroup(config, cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -880,7 +887,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s); + print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -935,7 +942,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, prefix, counter_idx); + print_metric_begin(config, evlist, prefix, + counter_idx, /*cgrp=*/NULL); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -960,7 +968,7 @@ static void print_metric_headers_std(struct perf_stat_config *config, if (!no_indent) { int len = aggr_header_lens[config->aggr_mode]; - if (nr_cgroups) + if (nr_cgroups || config->cgroup_list) len += CGROUP_LEN + 1; fprintf(config->output, "%*s", len, ""); @@ -1012,6 +1020,9 @@ static void print_metric_headers(struct perf_stat_config *config, if (config->iostat_run) iostat_print_header_prefix(config); + if (config->cgroup_list) + os.cgrp = evlist__first(evlist)->cgrp; + /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { os.evsel = counter; @@ -1305,6 +1316,28 @@ static void print_percore(struct perf_stat_config *config, fputc('\n', output); } +static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, + char *prefix) +{ + struct cgroup *cgrp = NULL; + struct evsel *counter; + + evlist__for_each_entry(evlist, counter) { + if (cgrp != counter->cgrp) { + if (cgrp != NULL) + print_metric_end(config); + + cgrp = counter->cgrp; + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, cgrp); + } + + print_counter(config, counter, prefix); + } + if (cgrp) + print_metric_end(config); +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) { @@ -1332,11 +1365,14 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf break; case AGGR_THREAD: case AGGR_GLOBAL: - if (config->iostat_run) + if (config->iostat_run) { iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); - else { - print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0); + } else if (config->cgroup_list) { + print_cgroup_counter(config, evlist, prefix); + } else { + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } From 4dd7ff4a0311eee3ac946f0824442de94b34c42e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:27 -0800 Subject: [PATCH 1580/4122] perf stat: Add print_aggr_cgroup() for --for-each-cgroup and --topdown Normally, --for-each-cgroup only works with AGGR_GLOBAL. However the --topdown on some cpu (e.g. Intel Skylake) converts it to the AGGR_CORE internally. To support those machines, add print_aggr_cgroup and handle the events like in print_cgroup_events(). $ perf stat -a --for-each-cgroup system.slice,user.slice --topdown sleep 1 nmi_watchdog enabled with topdown. May give wrong results. Disable with echo 0 > /proc/sys/kernel/nmi_watchdog Performance counter stats for 'system wide': retiring bad speculation frontend bound backend bound S0-D0-C0 2 system.slice 49.0% -46.6% 31.4% S0-D0-C1 2 system.slice 55.5% 8.0% 45.5% -9.0% S0-D0-C2 2 system.slice 87.8% 22.1% 30.3% -40.3% S0-D0-C3 2 system.slice 53.3% -11.9% 45.2% 13.4% S0-D0-C0 2 user.slice 123.5% 4.0% 48.5% -75.9% S0-D0-C1 2 user.slice 19.9% 6.5% 89.9% -16.3% S0-D0-C2 2 user.slice 29.9% 7.9% 71.3% -9.1 S0-D0-C3 2 user.slice 28.0% 7.2% 43.3% 21.5% 1.004136937 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-20-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cf25ed99b5df..f5501760ff2e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -900,6 +900,42 @@ static void print_aggr(struct perf_stat_config *config, } } +static void print_aggr_cgroup(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix) +{ + bool metric_only = config->metric_only; + struct evsel *counter, *evsel; + struct cgroup *cgrp = NULL; + int s; + + if (!config->aggr_map || !config->aggr_get_id) + return; + + evlist__for_each_entry(evlist, evsel) { + if (cgrp == evsel->cgrp) + continue; + + cgrp = evsel->cgrp; + + for (s = 0; s < config->aggr_map->nr; s++) { + print_metric_begin(config, evlist, prefix, s, cgrp); + + evlist__for_each_entry(evlist, counter) { + if (counter->merged_stat) + continue; + + if (counter->cgrp != cgrp) + continue; + + print_counter_aggrdata(config, counter, s, prefix, + metric_only); + } + print_metric_end(config); + } + } +} + static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1361,7 +1397,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: - print_aggr(config, evlist, prefix); + if (config->cgroup_list) + print_aggr_cgroup(config, evlist, prefix); + else + print_aggr(config, evlist, prefix); break; case AGGR_THREAD: case AGGR_GLOBAL: From a6efaa2c89bf35c3cb7f21dee221dabf20dccf59 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:01 -0800 Subject: [PATCH 1581/4122] tools lib api: Add install target This allows libapi to be installed as a dependency. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cmc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Cc: nicolas schier Link: http://lore.kernel.org/lkml/20221109184914.1357295-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 49 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index e21e1b40b525..6629d0fd0130 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) @@ -45,10 +55,23 @@ RM = rm -f API_IN := $(OUTPUT)libapi-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) @@ -58,6 +81,32 @@ $(API_IN): FORCE $(LIBFILE): $(API_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,cpu.h,$(prefix)/include/api,644); \ + $(call do_install,debug.h,$(prefix)/include/api,644); \ + $(call do_install,io.h,$(prefix)/include/api,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) From 630ae80ea1dd253609cb50cff87f3248f901aca3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:02 -0800 Subject: [PATCH 1582/4122] tools lib subcmd: Add install target This allows libsubcmd to be installed as a dependency. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 8f1a09cdfd17..e96566f8991c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -17,6 +17,15 @@ RM = rm -f MAKEFLAGS += --no-print-directory +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC @@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) SUBCMD_IN := $(OUTPUT)libsubcmd-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V @@ -61,6 +82,34 @@ $(SUBCMD_IN): FORCE $(LIBFILE): $(SUBCMD_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ + $(call do_install,help.h,$(prefix)/include/subcmd,644); \ + $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ + $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \ + $(call do_install,run-command.h,$(prefix)/include/subcmd,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) From 911920b06e6be3faf87450affb0dbbc6117fd0a5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:03 -0800 Subject: [PATCH 1583/4122] perf build: Install libsubcmd locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libsubcmd build mirroring the libbpf build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index fd7a6ff9e7aa..413ca7e4a02a 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -40,6 +40,7 @@ pmu-events/pmu-events.c pmu-events/jevents feature/ libbpf/ +libsubcmd/ fixdep libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67819f905611..4ec6b95a1c49 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -244,7 +244,7 @@ else # force_fixdep LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBBPF_DIR = $(srctree)/tools/lib/bpf/ -SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -294,7 +294,6 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) ifneq ($(subdir),) API_PATH=$(OUTPUT)/../lib/api/ @@ -305,7 +304,6 @@ else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) - SUBCMD_PATH=$(SUBCMD_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -332,7 +330,14 @@ LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a +ifneq ($(OUTPUT),) + LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd +else + LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd +endif +LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) +LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include +LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a LIBPERF = $(LIBPERF_PATH)libperf.a export LIBPERF @@ -849,11 +854,14 @@ $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBSUBCMD): FORCE - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a +$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) + $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + $@ install_headers $(LIBSUBCMD)-clean: - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean + $(call QUIET_CLEAN, libsubcmd) + $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) help: @echo 'Perf make targets:' @@ -1039,7 +1047,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): +$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL From 00314c9bca8faad495e4e4f702491ecd37444784 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:04 -0800 Subject: [PATCH 1584/4122] perf build: Install libapi locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libapi build mirroring the libbpf and libsubcmd build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 413ca7e4a02a..4a8a76da861d 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,7 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents feature/ +libapi/ libbpf/ libsubcmd/ fixdep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4ec6b95a1c49..f8c712e26a69 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -241,7 +241,7 @@ sub-make: fixdep else # force_fixdep -LIB_DIR = $(srctree)/tools/lib/api/ +LIBAPI_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ @@ -295,15 +295,9 @@ ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) -ifneq ($(subdir),) - API_PATH=$(OUTPUT)/../lib/api/ -else - API_PATH=$(OUTPUT) -endif else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ - API_PATH=$(LIB_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -318,7 +312,14 @@ LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) -LIBAPI = $(API_PATH)libapi.a +ifneq ($(OUTPUT),) + LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi +else + LIBAPI_OUTPUT = $(CURDIR)/libapi +endif +LIBAPI_DESTDIR = $(LIBAPI_OUTPUT) +LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include +LIBAPI = $(LIBAPI_OUTPUT)/libapi.a export LIBAPI ifneq ($(OUTPUT),) @@ -831,12 +832,14 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a +$(LIBAPI): FORCE | $(LIBAPI_OUTPUT) + $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + $@ install_headers $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBAPI_OUTPUT) $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ @@ -1047,7 +1050,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL From 91009a3a9913f2755f8b9616a2124a51147c375c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:05 -0800 Subject: [PATCH 1585/4122] perf build: Install libperf locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libperf build mirroring the libbpf, libsubcmd and libapi build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 4a8a76da861d..b5f10d5603f1 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -41,6 +41,7 @@ pmu-events/jevents feature/ libapi/ libbpf/ +libperf/ libsubcmd/ fixdep libtraceevent-dynamic-list diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f8c712e26a69..57aec5a97270 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -294,11 +294,9 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - LIBPERF_PATH=$(OUTPUT) else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ - LIBPERF_PATH=$(LIBPERF_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -340,7 +338,14 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a -LIBPERF = $(LIBPERF_PATH)libperf.a +ifneq ($(OUTPUT),) + LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf +else + LIBPERF_OUTPUT = $(CURDIR)/libperf +endif +LIBPERF_DESTDIR = $(LIBPERF_OUTPUT) +LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include +LIBPERF = $(LIBPERF_OUTPUT)/libperf.a export LIBPERF # python extension build directories @@ -850,12 +855,14 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) -$(LIBPERF): FORCE - $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a +$(LIBPERF): FORCE | $(LIBPERF_OUTPUT) + $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + $@ install_headers $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBPERF_OUTPUT) $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ @@ -1050,7 +1057,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL From ef019df01e207971200ffcb06559f791980668a1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:06 -0800 Subject: [PATCH 1586/4122] perf build: Install libtraceevent locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libtraceevent build mirroring the libbpf, libsubcmd, libapi and libperf build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Similarly, the plugins are now installed into libtraceevent_plugins except they have no header files. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 3 ++- tools/perf/Makefile.perf | 57 ++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index b5f10d5603f1..bc165f4e4830 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -43,6 +43,7 @@ libapi/ libbpf/ libperf/ libsubcmd/ +libtraceevent/ +libtraceevent_plugins/ fixdep -libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 57aec5a97270..6e5e3f7730b5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -242,7 +242,8 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ -TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ @@ -292,16 +293,17 @@ grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) - TE_PATH=$(OUTPUT) - PLUGINS_PATH=$(OUTPUT) + LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent else - TE_PATH=$(TRACE_EVENT_DIR) - PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ + LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent endif - -LIBTRACEEVENT = $(TE_PATH)libtraceevent.a +LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins +LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT) +LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT) +LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include +LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list # # The static build has no dynsym table, so this does not work for @@ -821,21 +823,33 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' -$(LIBTRACEEVENT): FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a - -libtraceevent_plugins: FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins - -$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list +$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \ + $@ install_headers $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT) + +libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + plugins + +libtraceevent_plugins-clean: + $(call QUIET_CLEAN, libtraceevent_plugins) + $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT) + +$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) $@ install-traceevent-plugins: libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) install $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ @@ -1060,6 +1074,11 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ +ifndef LIBTRACEEVENT_DYNAMIC +$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT): + $(Q)$(MKDIR) -p $@ +endif + ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) @@ -1102,7 +1121,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected From 8d1f68bd76a6517c132fd1dd223c85c5e1defa49 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:07 -0800 Subject: [PATCH 1587/4122] tools lib api: Add missing install headers Headers necessary for the perf build. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 6629d0fd0130..3e5ef1e0e890 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -103,7 +103,10 @@ install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,cpu.h,$(prefix)/include/api,644); \ $(call do_install,debug.h,$(prefix)/include/api,644); \ - $(call do_install,io.h,$(prefix)/include/api,644); + $(call do_install,io.h,$(prefix)/include/api,644); \ + $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ + $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); + $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); install: install_lib install_headers From a6e8caf5db2e1db8e2ec51f87b9749bd28f6b969 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:08 -0800 Subject: [PATCH 1588/4122] tools lib perf: Add missing install headers Headers necessary for the perf build. Note, internal headers are also installed as these are necessary for the build. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 21df023a2103..1badc0a04676 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -189,13 +189,21 @@ install_lib: libs install_headers: $(call QUIET_INSTALL, headers) \ + $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \ + $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ From 160be157eaba2a37233ff2d27093e5915b6b084e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:09 -0800 Subject: [PATCH 1589/4122] tool lib symbol: Add Makefile/Build Add sufficient Makefile for libsymbol to be built as a dependency and header files installed. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Build | 1 + tools/lib/symbol/Makefile | 115 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 tools/lib/symbol/Build create mode 100644 tools/lib/symbol/Makefile diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build new file mode 100644 index 000000000000..9b9a9c78d3c9 --- /dev/null +++ b/tools/lib/symbol/Build @@ -0,0 +1 @@ +libsymbol-y += kallsyms.o diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile new file mode 100644 index 000000000000..4c1d6b53032d --- /dev/null +++ b/tools/lib/symbol/Makefile @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld + +MAKEFLAGS += --no-print-directory + +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LIBFILE = $(OUTPUT)libsymbol.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC + +ifeq ($(DEBUG),0) +ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -O3 +else + CFLAGS += -O6 +endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 + +CFLAGS += -I$(srctree)/tools/lib +CFLAGS += -I$(srctree)/tools/include + +RM = rm -f + +SYMBOL_IN := $(OUTPUT)libsymbol-in.o + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include + +all: fixdep $(LIBFILE) + +$(SYMBOL_IN): FORCE + $(MAKE) $(build)=libsymbol V=1 + +$(LIBFILE): $(SYMBOL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); + +install: install_lib install_headers + +clean: + $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \ + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE From 84bec6f0b31fb2ac8c5e4b0ded7727e6ec1115db Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:10 -0800 Subject: [PATCH 1590/4122] perf build: Install libsymbol locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf and libsymbol headers are all found via this path, making it impossible to override include behavior. Change the libsymbol build mirroring the libbpf, libsubcmd, libapi, libperf and libtraceevent build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Don't build kallsyms.o as part of util as this will lead to duplicate definitions. Add kallsym's directory to the MANIFEST rather than individual files, so that the Build and Makefile are added to a source tar ball. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/MANIFEST | 3 +-- tools/perf/Makefile.perf | 25 ++++++++++++++++++++++--- tools/perf/util/Build | 5 ----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index bc165f4e4830..05806ecfc33c 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -43,6 +43,7 @@ libapi/ libbpf/ libperf/ libsubcmd/ +libsymbol/ libtraceevent/ libtraceevent_plugins/ fixdep diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f5d72f936a6b..c8e8e05b4ff1 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -13,8 +13,7 @@ tools/lib/ctype.c tools/lib/hweight.c tools/lib/rbtree.c tools/lib/string.c -tools/lib/symbol/kallsyms.c -tools/lib/symbol/kallsyms.h +tools/lib/symbol tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/list_sort.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6e5e3f7730b5..6c1a2a3ccc38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -246,6 +246,7 @@ LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -340,6 +341,15 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a +ifneq ($(OUTPUT),) + LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol +else + LIBSYMBOL_OUTPUT = $(CURDIR)/libsymbol +endif +LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT) +LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include +LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a + ifneq ($(OUTPUT),) LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf else @@ -398,7 +408,7 @@ endif export PERL_PATH -PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF) +PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) ifndef NO_LIBBPF ifndef LIBBPF_DYNAMIC PERFLIBS += $(LIBBPF) @@ -887,6 +897,15 @@ $(LIBSUBCMD)-clean: $(call QUIET_CLEAN, libsubcmd) $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) +$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) + $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + $@ install_headers + +$(LIBSYMBOL)-clean: + $(call QUIET_CLEAN, libsymbol) + $(Q)$(RM) -r -- $(LIBSYMBOL_OUTPUT) + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -1071,7 +1090,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ ifndef LIBTRACEEVENT_DYNAMIC @@ -1121,7 +1140,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 47a7db3ad0a1..ab37f588ee8b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -19,7 +19,6 @@ perf-y += perf_event_attr_fprintf.o perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o -perf-y += kallsyms.o perf-y += levenshtein.o perf-y += llvm-utils.o perf-y += mmap.o @@ -294,10 +293,6 @@ CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) - $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) From bd560973c5d3b2a37fb13d769b34385ed320d547 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:11 -0800 Subject: [PATCH 1591/4122] perf expr: Tidy hashmap dependency hashmap.h comes from libbpf but isn't installed with its headers. Always use the header file of the code in util. Change the hashmap.h dependency in expr.h to a forward declaration, add the necessary header file includes in the C files. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 1 + tools/perf/tests/pmu-events.c | 1 + tools/perf/util/bpf-loader.c | 4 ---- tools/perf/util/evsel.c | 4 ---- tools/perf/util/expr.c | 1 + tools/perf/util/expr.h | 7 +------ tools/perf/util/metricgroup.c | 1 + tools/perf/util/stat-shadow.c | 1 + tools/perf/util/stat.c | 4 ---- 9 files changed, 6 insertions(+), 18 deletions(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 6512f5e22045..b6667501ebb4 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -2,6 +2,7 @@ #include "util/cputopo.h" #include "util/debug.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/header.h" #include "util/smt.h" #include "tests.h" diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 5d0d3b239a68..f7b9dbbad97f 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -12,6 +12,7 @@ #include #include "util/evlist.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/parse-events.h" #include "metricgroup.h" #include "stat.h" diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f4adeccdbbcb..b3c8174360bf 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -27,11 +27,7 @@ #include "util.h" #include "llvm-utils.h" #include "c++/clang-c.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include "asm/bug.h" #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ca6abb64c91d..ca554f8202f9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,11 +46,7 @@ #include "string2.h" #include "memswap.h" #include "util.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include "pmu-hybrid.h" #include "off_cpu.h" #include "../perf-sys.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index aaacf514dc09..140f2acdb325 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -11,6 +11,7 @@ #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" +#include "util/hashmap.h" #include "smt.h" #include "tsc.h" #include diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index d6c1668dc1a0..029271540fb0 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,12 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else -#include "util/hashmap.h" -#endif - +struct hashmap; struct metric_ref; struct expr_scanner_ctx { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1943fed9b6d9..cf9e2452d322 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -28,6 +28,7 @@ #include "util.h" #include #include "cgroup.h" +#include "util/hashmap.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07b29fe272c7..9bde9224a97c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,7 @@ #include "units.h" #include #include "iostat.h" +#include "util/hashmap.h" /* * AGGR_GLOBAL: Use CPU 0 diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index acf0edf5fdd1..673f017a211f 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -14,11 +14,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include void update_stats(struct stats *stats, u64 val) From fd3f518fc1140622e752ac51d0ff18bb74f1d180 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:12 -0800 Subject: [PATCH 1592/4122] perf thread_map: Reduce exposure of libperf internal API Remove unnecessary include of internal threadmap.h and refcount.h in thread_map.h. Switch to using public APIs when possible or including the internal header file in the C file. Fix a transitive dependency in openat-syscall.c broken by the clean up. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/builtin-trace.c | 4 ++-- tools/perf/tests/openat-syscall.c | 1 + tools/perf/tests/thread-map.c | 1 + tools/perf/util/bpf_counter.c | 2 +- tools/perf/util/evsel.c | 1 + tools/perf/util/python.c | 6 +++--- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/thread_map.c | 1 + tools/perf/util/thread_map.h | 2 -- 10 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1d79801f4e84..d040fbcdcc5a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -93,6 +93,7 @@ #include #include +#include #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c93b359abc31..3257da5cad23 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4095,8 +4095,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || - evlist->core.threads->nr > 1 || - evlist__first(evlist)->core.attr.inherit; + perf_thread_map__nr(evlist->core.threads) > 1 || + evlist__first(evlist)->core.attr.inherit; /* * Now that we already used evsel->core.attr to ask the kernel to setup the diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 7e05b8b5cc95..131b62271bfa 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "thread_map.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index e413c1387fcb..74308c1368fe 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -11,6 +11,7 @@ #include "util/synthetic-events.h" #include #include +#include struct perf_sample; struct perf_tool; diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ef1c15e4aeba..eeee899fcf34 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -561,7 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) - key = evsel->core.threads->map[i].pid; + key = perf_thread_map__pid(evsel->core.threads, i); else if (filter_type == BPERF_FILTER_CPU) key = evsel->core.cpus->map[i].cpu; else diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ca554f8202f9..45f4f08399ae 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -53,6 +53,7 @@ #include "util/parse-branch-options.h" #include #include +#include #include diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 5be5fa2391de..b5941c74a0d6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -718,17 +718,17 @@ static Py_ssize_t pyrf_thread_map__length(PyObject *obj) { struct pyrf_thread_map *pthreads = (void *)obj; - return pthreads->threads->nr; + return perf_thread_map__nr(pthreads->threads); } static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= pthreads->threads->nr) + if (i >= perf_thread_map__nr(pthreads->threads)) return NULL; - return Py_BuildValue("i", pthreads->threads->map[i]); + return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } static PySequenceMethods pyrf_thread_map__sequence_methods = { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1985d1a42a22..1cf65db8f861 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1654,7 +1654,7 @@ static void python_process_stat(struct perf_stat_config *config, struct perf_cpu_map *cpus = counter->core.cpus; int cpu, thread; - for (thread = 0; thread < threads->nr; thread++) { + for (thread = 0; thread < perf_thread_map__nr(threads); thread++) { for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { process_stat(counter, perf_cpu_map__cpu(cpus, cpu), perf_thread_map__pid(threads, thread), tstamp, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943..e848579e61a8 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -18,6 +18,7 @@ #include "thread_map.h" #include "debug.h" #include "event.h" +#include /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3bb860a32b8e..00ec05fc1656 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -4,8 +4,6 @@ #include #include -#include -#include #include struct perf_record_thread_map; From c5c4f72ad4faab641cb852fdd890e8a64cb39f24 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:13 -0700 Subject: [PATCH 1593/4122] KVM: selftests: Add missing break between -e and -g option in dirty_log_perf_test Passing -e option (Run VCPUs while dirty logging is being disabled) in dirty_log_perf_test also unintentionally enables -g (Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2). Add break between two switch case logic. Fixes: cfe12e64b065 ("KVM: selftests: Add an option to run vCPUs while disabling dirty logging") Signed-off-by: Vipin Sharma Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-2-vipinsh@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index f99e39a672d3..56e08da3a87f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -411,6 +411,7 @@ int main(int argc, char *argv[]) case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; + break; case 'g': dirty_log_manual_caps = 0; break; From 0eb88a4121861ce3d5f925a183abb13ad954dbe6 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:14 -0700 Subject: [PATCH 1594/4122] KVM: selftests: Put command line options in alphabetical order in dirty_log_perf_test There are 13 command line options and they are not in any order. Put them in alphabetical order to make it easy to add new options. No functional change intended. Signed-off-by: Vipin Sharma Reviewed-by: Wei Wang Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-3-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/dirty_log_perf_test.c | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 56e08da3a87f..5bb6954b2358 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -406,50 +406,52 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "b:ef:ghi:m:nop:s:v:x:")) != -1) { switch (opt) { + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; break; + case 'f': + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, + "Write fraction cannot be less than one"); + break; case 'g': dirty_log_manual_caps = 0; break; + case 'h': + help(argv[0]); + break; case 'i': p.iterations = atoi(optarg); break; - case 'p': - p.phys_offset = strtoull(optarg, NULL, 0); - break; case 'm': guest_modes_cmdline(optarg); break; case 'n': perf_test_args.nested = true; break; - case 'b': - guest_percpu_mem_size = parse_size(optarg); + case 'o': + p.partition_vcpu_memory_access = false; break; - case 'f': - p.wr_fract = atoi(optarg); - TEST_ASSERT(p.wr_fract >= 1, - "Write fraction cannot be less than one"); + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 's': + p.backing_src = parse_backing_src_type(optarg); break; case 'v': nr_vcpus = atoi(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; - case 'o': - p.partition_vcpu_memory_access = false; - break; - case 's': - p.backing_src = parse_backing_src_type(optarg); - break; case 'x': p.slots = atoi(optarg); break; - case 'h': default: help(argv[0]); break; From 018ea2d71a43372cb984021f03514dc6dd3d46df Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:15 -0700 Subject: [PATCH 1595/4122] KVM: selftests: Add atoi_paranoid() to catch errors missed by atoi() atoi() doesn't detect errors. There is no way to know that a 0 return is correct conversion or due to an error. Introduce atoi_paranoid() to detect errors and provide correct conversion. Replace all atoi() calls with atoi_paranoid(). Signed-off-by: Vipin Sharma Suggested-by: David Matlack Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-4-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/aarch64/arch_timer.c | 8 ++++---- .../selftests/kvm/aarch64/debug-exceptions.c | 2 +- .../testing/selftests/kvm/aarch64/vgic_irq.c | 6 +++--- .../selftests/kvm/access_tracking_perf_test.c | 2 +- .../selftests/kvm/demand_paging_test.c | 2 +- .../selftests/kvm/dirty_log_perf_test.c | 8 ++++---- .../testing/selftests/kvm/include/test_util.h | 2 ++ .../selftests/kvm/kvm_page_table_test.c | 2 +- tools/testing/selftests/kvm/lib/test_util.c | 19 +++++++++++++++++++ .../selftests/kvm/max_guest_memory_test.c | 6 +++--- .../kvm/memslot_modification_stress_test.c | 6 +++--- .../testing/selftests/kvm/memslot_perf_test.c | 10 +++++----- .../selftests/kvm/set_memory_region_test.c | 2 +- .../selftests/kvm/x86_64/nx_huge_pages_test.c | 4 ++-- 14 files changed, 50 insertions(+), 29 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 574eb73f0e90..251e7ff04883 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -414,7 +414,7 @@ static bool parse_args(int argc, char *argv[]) while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { switch (opt) { case 'n': - test_args.nr_vcpus = atoi(optarg); + test_args.nr_vcpus = atoi_paranoid(optarg); if (test_args.nr_vcpus <= 0) { pr_info("Positive value needed for -n\n"); goto err; @@ -425,21 +425,21 @@ static bool parse_args(int argc, char *argv[]) } break; case 'i': - test_args.nr_iter = atoi(optarg); + test_args.nr_iter = atoi_paranoid(optarg); if (test_args.nr_iter <= 0) { pr_info("Positive value needed for -i\n"); goto err; } break; case 'p': - test_args.timer_period_ms = atoi(optarg); + test_args.timer_period_ms = atoi_paranoid(optarg); if (test_args.timer_period_ms <= 0) { pr_info("Positive value needed for -p\n"); goto err; } break; case 'm': - test_args.migration_freq_ms = atoi(optarg); + test_args.migration_freq_ms = atoi_paranoid(optarg); if (test_args.migration_freq_ms < 0) { pr_info("0 or positive value needed for -m\n"); goto err; diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 947bd201435c..19fffdf19c9f 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -423,7 +423,7 @@ int main(int argc, char *argv[]) while ((opt = getopt(argc, argv, "i:")) != -1) { switch (opt) { case 'i': - ss_iteration = atoi(optarg); + ss_iteration = atoi_paranoid(optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 17417220a083..ae90b718070a 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -824,16 +824,16 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': - nr_irqs = atoi(optarg); + nr_irqs = atoi_paranoid(optarg); if (nr_irqs > 1024 || nr_irqs % 32) help(argv[0]); break; case 'e': - eoi_split = (bool)atoi(optarg); + eoi_split = (bool)atoi_paranoid(optarg); default_args = false; break; case 'l': - level_sensitive = (bool)atoi(optarg); + level_sensitive = (bool)atoi_paranoid(optarg); default_args = false; break; case 'h': diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 76c583a07ea2..c6bcc5301e2c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.nr_vcpus = atoi(optarg); + params.nr_vcpus = atoi_paranoid(optarg); break; case 'o': overlap_memory_access = true; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..82597fb04146 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -427,7 +427,7 @@ int main(int argc, char *argv[]) p.src_type = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi(optarg); + nr_vcpus = atoi_paranoid(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 5bb6954b2358..ecda802b78ff 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -416,7 +416,7 @@ int main(int argc, char *argv[]) run_vcpus_while_disabling_dirty_logging = true; break; case 'f': - p.wr_fract = atoi(optarg); + p.wr_fract = atoi_paranoid(optarg); TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; @@ -427,7 +427,7 @@ int main(int argc, char *argv[]) help(argv[0]); break; case 'i': - p.iterations = atoi(optarg); + p.iterations = atoi_paranoid(optarg); break; case 'm': guest_modes_cmdline(optarg); @@ -445,12 +445,12 @@ int main(int argc, char *argv[]) p.backing_src = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi(optarg); + nr_vcpus = atoi_paranoid(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'x': - p.slots = atoi(optarg); + p.slots = atoi_paranoid(optarg); break; default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index befc754ce9b3..feae42863759 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -152,4 +152,6 @@ static inline void *align_ptr_up(void *x, size_t size) return (void *)align_up((unsigned long)x, size); } +int atoi_paranoid(const char *num_str); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index f42c6ac6d71d..ea7feb69bb88 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -461,7 +461,7 @@ int main(int argc, char *argv[]) p.test_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); + nr_vcpus = atoi_paranoid(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6d23878bbfe1..c2d9c6827779 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -334,3 +334,22 @@ long get_run_delay(void) return val[1]; } + +int atoi_paranoid(const char *num_str) +{ + char *end_ptr; + long num; + + errno = 0; + num = strtol(num_str, &end_ptr, 0); + TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str); + TEST_ASSERT(num_str != end_ptr, + "strtol(\"%s\") didn't find a valid integer.", num_str); + TEST_ASSERT(*end_ptr == '\0', + "strtol(\"%s\") failed to parse trailing characters \"%s\".", + num_str, end_ptr); + TEST_ASSERT(num >= INT_MIN && num <= INT_MAX, + "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX); + + return num; +} diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 9a6e4f3ad6b5..1595b73dc09a 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -193,15 +193,15 @@ int main(int argc, char *argv[]) while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { switch (opt) { case 'c': - nr_vcpus = atoi(optarg); + nr_vcpus = atoi_paranoid(optarg); TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); break; case 'm': - max_mem = atoi(optarg) * size_1gb; + max_mem = atoi_paranoid(optarg) * size_1gb; TEST_ASSERT(max_mem > 0, "memory size must be >0"); break; case 's': - slot_size = atoi(optarg) * size_1gb; + slot_size = atoi_paranoid(optarg) * size_1gb; TEST_ASSERT(slot_size > 0, "slot size must be >0"); break; case 'H': diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index bb1d17a1171b..7d19a27d80d2 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -158,7 +158,7 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.memslot_modification_delay = strtoul(optarg, NULL, 0); + p.memslot_modification_delay = atoi_paranoid(optarg); TEST_ASSERT(p.memslot_modification_delay >= 0, "A negative delay is not supported."); break; @@ -166,7 +166,7 @@ int main(int argc, char *argv[]) guest_percpu_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); + nr_vcpus = atoi_paranoid(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); @@ -175,7 +175,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_memslot_modifications = atoi(optarg); + p.nr_memslot_modifications = atoi_paranoid(optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 44995446d942..4bae9e3f5ca1 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -885,21 +885,21 @@ static bool parse_args(int argc, char *argv[], map_unmap_verify = true; break; case 's': - targs->nslots = atoi(optarg); + targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 0 && targs->nslots != -1) { pr_info("Slot count cap has to be positive or -1 for no cap\n"); return false; } break; case 'f': - targs->tfirst = atoi(optarg); + targs->tfirst = atoi_paranoid(optarg); if (targs->tfirst < 0) { pr_info("First test to run has to be non-negative\n"); return false; } break; case 'e': - targs->tlast = atoi(optarg); + targs->tlast = atoi_paranoid(optarg); if (targs->tlast < 0 || targs->tlast >= NTESTS) { pr_info("Last test to run has to be non-negative and less than %zu\n", NTESTS); @@ -907,14 +907,14 @@ static bool parse_args(int argc, char *argv[], } break; case 'l': - targs->seconds = atoi(optarg); + targs->seconds = atoi_paranoid(optarg); if (targs->seconds < 0) { pr_info("Test length in seconds has to be non-negative\n"); return false; } break; case 'r': - targs->runs = atoi(optarg); + targs->runs = atoi_paranoid(optarg); if (targs->runs <= 0) { pr_info("Runs per test has to be positive\n"); return false; diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 0d55f508d595..c366949c8362 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -407,7 +407,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ if (argc > 1) - loops = atoi(argv[1]); + loops = atoi_paranoid(argv[1]); else loops = 10; diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 59ffe7fd354f..354b6902849c 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,10 +241,10 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi(optarg); + reclaim_period_ms = atoi_paranoid(optarg); break; case 't': - token = atoi(optarg); + token = atoi_paranoid(optarg); break; case 'r': reboot_permissions = true; From 69a62e2004b8bc3f9572f88a592b168345a6bbf9 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:16 -0700 Subject: [PATCH 1596/4122] KVM: selftests: Use SZ_* macros from sizes.h in max_guest_memory_test.c Replace size_1gb defined in max_guest_memory_test.c with the SZ_1G, SZ_2G and SZ_4G from linux/sizes.h header file. Signed-off-by: Vipin Sharma Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-5-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/max_guest_memory_test.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 1595b73dc09a..8056dc5831b5 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "kvm_util.h" #include "test_util.h" @@ -162,8 +163,7 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t size_1gb = (1 << 30); - const uint64_t start_gpa = (4ull * size_1gb); + const uint64_t start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2; @@ -180,13 +180,13 @@ int main(int argc, char *argv[]) * are quite common for x86, requires changing only max_mem (KVM allows * 32k memslots, 32k * 2gb == ~64tb of guest memory). */ - slot_size = 2 * size_1gb; + slot_size = SZ_2G; max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_slots > first_slot, "KVM is broken"); /* All KVM MMUs should be able to survive a 128gb guest. */ - max_mem = 128 * size_1gb; + max_mem = 128ull * SZ_1G; calc_default_nr_vcpus(); @@ -197,11 +197,11 @@ int main(int argc, char *argv[]) TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); break; case 'm': - max_mem = atoi_paranoid(optarg) * size_1gb; + max_mem = 1ull * atoi_paranoid(optarg) * SZ_1G; TEST_ASSERT(max_mem > 0, "memory size must be >0"); break; case 's': - slot_size = atoi_paranoid(optarg) * size_1gb; + slot_size = 1ull * atoi_paranoid(optarg) * SZ_1G; TEST_ASSERT(slot_size > 0, "slot size must be >0"); break; case 'H': @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += size_1gb) + for (i = 0; i < slot_size; i += SZ_1G) __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else for (i = 0; i < slot_size; i += vm->page_size) @@ -260,7 +260,7 @@ int main(int argc, char *argv[]) vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", - (gpa - start_gpa) / size_1gb, nr_vcpus); + (gpa - start_gpa) / SZ_1G, nr_vcpus); rendezvous_with_vcpus(&time_start, "spawning"); rendezvous_with_vcpus(&time_run1, "run 1"); From c15bdebb32ddc73faac5e5180d6997b360e81619 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:17 -0700 Subject: [PATCH 1597/4122] KVM: selftests: Shorten the test args in memslot_modification_stress_test.c Change test args memslot_modification_delay and nr_memslot_modifications to delay and nr_iterations for simplicity. Signed-off-by: Vipin Sharma Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-6-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../kvm/memslot_modification_stress_test.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 7d19a27d80d2..3a67d3637f48 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -87,8 +87,8 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, } struct test_params { - useconds_t memslot_modification_delay; - uint64_t nr_memslot_modifications; + useconds_t delay; + uint64_t nr_iterations; bool partition_vcpu_memory_access; }; @@ -107,8 +107,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Started all vCPUs\n"); - add_remove_memslot(vm, p->memslot_modification_delay, - p->nr_memslot_modifications); + add_remove_memslot(vm, p->delay, p->nr_iterations); run_vcpus = false; @@ -144,9 +143,8 @@ int main(int argc, char *argv[]) int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int opt; struct test_params p = { - .memslot_modification_delay = 0, - .nr_memslot_modifications = - DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, + .delay = 0, + .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, .partition_vcpu_memory_access = true }; @@ -158,8 +156,8 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.memslot_modification_delay = atoi_paranoid(optarg); - TEST_ASSERT(p.memslot_modification_delay >= 0, + p.delay = atoi_paranoid(optarg); + TEST_ASSERT(p.delay >= 0, "A negative delay is not supported."); break; case 'b': @@ -175,7 +173,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_memslot_modifications = atoi_paranoid(optarg); + p.nr_iterations = atoi_paranoid(optarg); break; case 'h': default: From 0001725d0f9b5d749540021befb67c117d566416 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:18 -0700 Subject: [PATCH 1598/4122] KVM: selftests: Add atoi_positive() and atoi_non_negative() for input validation Many KVM selftests take command line arguments which are supposed to be positive (>0) or non-negative (>=0). Some tests do these validation and some missed adding the check. Add atoi_positive() and atoi_non_negative() to validate inputs in selftests before proceeding to use those values. Signed-off-by: Vipin Sharma Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-7-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/aarch64/arch_timer.c | 25 ++++--------------- .../selftests/kvm/aarch64/debug-exceptions.c | 2 +- .../testing/selftests/kvm/aarch64/vgic_irq.c | 2 +- .../selftests/kvm/access_tracking_perf_test.c | 2 +- .../selftests/kvm/demand_paging_test.c | 4 +-- .../selftests/kvm/dirty_log_perf_test.c | 12 ++++----- .../testing/selftests/kvm/include/test_util.h | 16 ++++++++++++ .../selftests/kvm/kvm_page_table_test.c | 4 +-- .../selftests/kvm/max_guest_memory_test.c | 9 +++---- .../kvm/memslot_modification_stress_test.c | 10 +++----- .../testing/selftests/kvm/memslot_perf_test.c | 22 ++++------------ .../selftests/kvm/set_memory_region_test.c | 2 +- .../selftests/kvm/x86_64/nx_huge_pages_test.c | 3 +-- 13 files changed, 47 insertions(+), 66 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 251e7ff04883..9409617fce9c 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -414,36 +414,21 @@ static bool parse_args(int argc, char *argv[]) while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { switch (opt) { case 'n': - test_args.nr_vcpus = atoi_paranoid(optarg); - if (test_args.nr_vcpus <= 0) { - pr_info("Positive value needed for -n\n"); - goto err; - } else if (test_args.nr_vcpus > KVM_MAX_VCPUS) { + test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); + if (test_args.nr_vcpus > KVM_MAX_VCPUS) { pr_info("Max allowed vCPUs: %u\n", KVM_MAX_VCPUS); goto err; } break; case 'i': - test_args.nr_iter = atoi_paranoid(optarg); - if (test_args.nr_iter <= 0) { - pr_info("Positive value needed for -i\n"); - goto err; - } + test_args.nr_iter = atoi_positive("Number of iterations", optarg); break; case 'p': - test_args.timer_period_ms = atoi_paranoid(optarg); - if (test_args.timer_period_ms <= 0) { - pr_info("Positive value needed for -p\n"); - goto err; - } + test_args.timer_period_ms = atoi_positive("Periodicity", optarg); break; case 'm': - test_args.migration_freq_ms = atoi_paranoid(optarg); - if (test_args.migration_freq_ms < 0) { - pr_info("0 or positive value needed for -m\n"); - goto err; - } + test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 19fffdf19c9f..878c334607e1 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -423,7 +423,7 @@ int main(int argc, char *argv[]) while ((opt = getopt(argc, argv, "i:")) != -1) { switch (opt) { case 'i': - ss_iteration = atoi_paranoid(optarg); + ss_iteration = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index ae90b718070a..4ead42a072b7 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -824,7 +824,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': - nr_irqs = atoi_paranoid(optarg); + nr_irqs = atoi_non_negative("Number of IRQs", optarg); if (nr_irqs > 1024 || nr_irqs % 32) help(argv[0]); break; diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index c6bcc5301e2c..a81e7a7ae18f 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.nr_vcpus = atoi_paranoid(optarg); + params.nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'o': overlap_memory_access = true; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 82597fb04146..0c98181fa248 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -427,8 +427,8 @@ int main(int argc, char *argv[]) p.src_type = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi_paranoid(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'o': diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index ecda802b78ff..4d639683b8ef 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -416,9 +416,7 @@ int main(int argc, char *argv[]) run_vcpus_while_disabling_dirty_logging = true; break; case 'f': - p.wr_fract = atoi_paranoid(optarg); - TEST_ASSERT(p.wr_fract >= 1, - "Write fraction cannot be less than one"); + p.wr_fract = atoi_positive("Write fraction", optarg); break; case 'g': dirty_log_manual_caps = 0; @@ -427,7 +425,7 @@ int main(int argc, char *argv[]) help(argv[0]); break; case 'i': - p.iterations = atoi_paranoid(optarg); + p.iterations = atoi_positive("Number of iterations", optarg); break; case 'm': guest_modes_cmdline(optarg); @@ -445,12 +443,12 @@ int main(int argc, char *argv[]) p.backing_src = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi_paranoid(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'x': - p.slots = atoi_paranoid(optarg); + p.slots = atoi_positive("Number of slots", optarg); break; default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index feae42863759..3be98e81189a 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -154,4 +154,20 @@ static inline void *align_ptr_up(void *x, size_t size) int atoi_paranoid(const char *num_str); +static inline uint32_t atoi_positive(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str); + return num; +} + +static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str); + return num; +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index ea7feb69bb88..696b366be06b 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -461,8 +461,8 @@ int main(int argc, char *argv[]) p.test_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi_paranoid(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 's': diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 8056dc5831b5..feaf2be20ff2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -193,16 +193,13 @@ int main(int argc, char *argv[]) while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { switch (opt) { case 'c': - nr_vcpus = atoi_paranoid(optarg); - TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); + nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'm': - max_mem = 1ull * atoi_paranoid(optarg) * SZ_1G; - TEST_ASSERT(max_mem > 0, "memory size must be >0"); + max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; break; case 's': - slot_size = 1ull * atoi_paranoid(optarg) * SZ_1G; - TEST_ASSERT(slot_size > 0, "slot size must be >0"); + slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; break; case 'H': hugepages = true; diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 3a67d3637f48..4bdfc910ba4d 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -156,16 +156,14 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.delay = atoi_paranoid(optarg); - TEST_ASSERT(p.delay >= 0, - "A negative delay is not supported."); + p.delay = atoi_non_negative("Delay", optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi_paranoid(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; @@ -173,7 +171,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_iterations = atoi_paranoid(optarg); + p.nr_iterations = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 4bae9e3f5ca1..330aaef1c02f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -892,33 +892,21 @@ static bool parse_args(int argc, char *argv[], } break; case 'f': - targs->tfirst = atoi_paranoid(optarg); - if (targs->tfirst < 0) { - pr_info("First test to run has to be non-negative\n"); - return false; - } + targs->tfirst = atoi_non_negative("First test", optarg); break; case 'e': - targs->tlast = atoi_paranoid(optarg); - if (targs->tlast < 0 || targs->tlast >= NTESTS) { + targs->tlast = atoi_non_negative("Last test", optarg); + if (targs->tlast >= NTESTS) { pr_info("Last test to run has to be non-negative and less than %zu\n", NTESTS); return false; } break; case 'l': - targs->seconds = atoi_paranoid(optarg); - if (targs->seconds < 0) { - pr_info("Test length in seconds has to be non-negative\n"); - return false; - } + targs->seconds = atoi_non_negative("Test length", optarg); break; case 'r': - targs->runs = atoi_paranoid(optarg); - if (targs->runs <= 0) { - pr_info("Runs per test has to be positive\n"); - return false; - } + targs->runs = atoi_positive("Runs per test", optarg); break; } } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index c366949c8362..85c16f09a50e 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -407,7 +407,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ if (argc > 1) - loops = atoi_paranoid(argv[1]); + loops = atoi_positive("Number of iterations", argv[1]); else loops = 10; diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 354b6902849c..ea0978f22db8 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,7 +241,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi_paranoid(optarg); + reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); break; case 't': token = atoi_paranoid(optarg); @@ -257,7 +257,6 @@ int main(int argc, char **argv) } TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); - TEST_REQUIRE(reclaim_period_ms > 0); __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" From d886724ea81c6a4dc5e37d4ee09287a31ab8335e Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Thu, 3 Nov 2022 12:17:19 -0700 Subject: [PATCH 1599/4122] KVM: selftests: Allowing running dirty_log_perf_test on specific CPUs Add a command line option, -c, to pin vCPUs to physical CPUs (pCPUs), i.e. to force vCPUs to run on specific pCPUs. Requirement to implement this feature came in discussion on the patch "Make page tables for eager page splitting NUMA aware" https://lore.kernel.org/lkml/YuhPT2drgqL+osLl@google.com/ This feature is useful as it provides a way to analyze performance based on the vCPUs and dirty log worker locations, like on the different NUMA nodes or on the same NUMA nodes. To keep things simple, implementation is intentionally very limited, either all of the vCPUs will be pinned followed by an optional main thread or nothing will be pinned. Signed-off-by: Vipin Sharma Suggested-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221103191719.1559407-8-vipinsh@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/dirty_log_perf_test.c | 25 ++++++++- .../selftests/kvm/include/kvm_util_base.h | 4 ++ .../selftests/kvm/include/perf_test_util.h | 4 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 54 +++++++++++++++++++ .../selftests/kvm/lib/perf_test_util.c | 8 ++- 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 4d639683b8ef..0612158329aa 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -353,7 +353,7 @@ static void help(char *name) puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" - "[-x memslots]\n", name); + "[-x memslots] [-c physical cpus to run test on]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); @@ -383,6 +383,17 @@ static void help(char *name) backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); + printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" + " values (target pCPU), one for each vCPU, plus an optional\n" + " entry for the main application task (specified via entry\n" + " ). If used, entries must be provided for all\n" + " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" + " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" + " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n" + " To leave the application task unpinned, drop the final entry:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n" + " (default: no pinning)\n"); puts(""); exit(0); } @@ -390,6 +401,7 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, .wr_fract = 1, @@ -406,11 +418,14 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "b:ef:ghi:m:nop:s:v:x:")) != -1) { + while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:s:v:x:")) != -1) { switch (opt) { case 'b': guest_percpu_mem_size = parse_size(optarg); break; + case 'c': + pcpu_list = optarg; + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; @@ -456,6 +471,12 @@ int main(int argc, char *argv[]) } } + if (pcpu_list) { + kvm_parse_vcpu_pinning(pcpu_list, perf_test_args.vcpu_to_pcpu, + nr_vcpus); + perf_test_args.pin_vcpus = true; + } + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", p.iterations); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..3bf2333ef95d 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -688,6 +688,10 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + unsigned long vm_compute_max_gfn(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index eaa88df0555a..849c875dd0ff 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -39,6 +39,10 @@ struct perf_test_args { /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; + /* True if all vCPUs are pinned to pCPUs */ + bool pin_vcpus; + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..3b7710fb3784 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -11,6 +11,7 @@ #include "processor.h" #include +#include #include #include #include @@ -443,6 +444,59 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } +void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +{ + cpu_set_t mask; + int r; + + CPU_ZERO(&mask); + CPU_SET(pcpu, &mask); + r = sched_setaffinity(0, sizeof(mask), &mask); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); +} + +static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +{ + uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + + TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), + "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + return pcpu; +} + +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus) +{ + cpu_set_t allowed_mask; + char *cpu, *cpu_list; + char delim[2] = ","; + int i, r; + + cpu_list = strdup(pcpus_string); + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + + r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_getaffinity() failed"); + + cpu = strtok(cpu_list, delim); + + /* 1. Get all pcpus for vcpus. */ + for (i = 0; i < nr_vcpus; i++) { + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); + cpu = strtok(NULL, delim); + } + + /* 2. Check if the main worker needs to be pinned. */ + if (cpu) { + kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + cpu = strtok(NULL, delim); + } + + TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); + free(cpu_list); +} + /* * Userspace Memory Region Find * diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 9618b37c66f7..3a1d0a44419b 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -2,6 +2,8 @@ /* * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE + #include #include "kvm_util.h" @@ -243,6 +245,10 @@ void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v static void *vcpu_thread_main(void *data) { struct vcpu_thread *vcpu = data; + int vcpu_idx = vcpu->vcpu_idx; + + if (perf_test_args.pin_vcpus) + kvm_pin_this_task_to_pcpu(perf_test_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -255,7 +261,7 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]); + vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu_idx]); return NULL; } From b31f21a7e97eee501db86714868d84377e68e4df Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Mon, 7 Nov 2022 18:22:05 +0000 Subject: [PATCH 1600/4122] KVM: selftests: implement random number generator for guest code Implement random number generator for guest code to randomize parts of the test, making it less predictable and a more accurate reflection of reality. The random number generator chosen is the Park-Miller Linear Congruential Generator, a fancy name for a basic and well-understood random number generator entirely sufficient for this purpose. Signed-off-by: Colton Lewis Reviewed-by: Sean Christopherson Reviewed-by: David Matlack Link: https://lore.kernel.org/r/20221107182208.479157-2-coltonlewis@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/test_util.h | 7 +++++++ tools/testing/selftests/kvm/lib/test_util.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3be98e81189a..80d6416f3012 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -77,6 +77,13 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); +struct guest_random_state { + uint32_t seed; +}; + +struct guest_random_state new_guest_random_state(uint32_t seed); +uint32_t guest_random_u32(struct guest_random_state *state); + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index c2d9c6827779..5c22fa4c2825 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -17,6 +17,23 @@ #include "test_util.h" +/* + * Random number generator that is usable from guest code. This is the + * Park-Miller LCG using standard constants. + */ + +struct guest_random_state new_guest_random_state(uint32_t seed) +{ + struct guest_random_state s = {.seed = seed}; + return s; +} + +uint32_t guest_random_u32(struct guest_random_state *state) +{ + state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + return state->seed; +} + /* * Parses "[0-9]+[kmgt]?". */ From f11aa24bdbc66a10378d28ee962b95426e8d2a09 Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Mon, 7 Nov 2022 18:22:06 +0000 Subject: [PATCH 1601/4122] KVM: selftests: create -r argument to specify random seed Create a -r argument to specify a random seed. If no argument is provided, the seed defaults to 1. The random seed is set with perf_test_set_random_seed() and must be set before guest_code runs to apply. Signed-off-by: Colton Lewis Reviewed-by: David Matlack Link: https://lore.kernel.org/r/20221107182208.479157-3-coltonlewis@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 12 ++++++++++-- tools/testing/selftests/kvm/include/perf_test_util.h | 2 ++ tools/testing/selftests/kvm/lib/perf_test_util.c | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 0612158329aa..eb63ca12b519 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -132,6 +132,7 @@ struct test_params { bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + uint32_t random_seed; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -225,6 +226,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) p->slots, p->backing_src, p->partition_vcpu_memory_access); + pr_info("Random seed: %u\n", p->random_seed); + perf_test_set_random_seed(vm, p->random_seed); perf_test_set_wr_fract(vm, p->wr_fract); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; @@ -352,7 +355,7 @@ static void help(char *name) { puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " - "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" "[-x memslots] [-c physical cpus to run test on]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", @@ -380,6 +383,7 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -r: specify the starting random seed.\n"); backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); @@ -408,6 +412,7 @@ int main(int argc, char *argv[]) .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .random_seed = 1, }; int opt; @@ -418,7 +423,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:s:v:x:")) != -1) { + while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:r:s:v:x:")) != -1) { switch (opt) { case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -454,6 +459,9 @@ int main(int argc, char *argv[]) case 'p': p.phys_offset = strtoull(optarg, NULL, 0); break; + case 'r': + p.random_seed = atoi_positive("Random seed", optarg); + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 849c875dd0ff..5a0e48b625a2 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -35,6 +35,7 @@ struct perf_test_args { uint64_t gpa; uint64_t size; uint64_t guest_page_size; + uint32_t random_seed; int wr_fract; /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ @@ -56,6 +57,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, void perf_test_destroy_vm(struct kvm_vm *vm); void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); +void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); void perf_test_join_vcpu_threads(int vcpus); diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 3a1d0a44419b..d48ee4f604f0 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -231,6 +231,12 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) sync_global_to_guest(vm, perf_test_args); } +void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) +{ + perf_test_args.random_seed = random_seed; + sync_global_to_guest(vm, perf_test_args.random_seed); +} + uint64_t __weak perf_test_nested_pages(int nr_vcpus) { return 0; From 6864c6442f4dfa02c7cf48199cf3ea6bb1fe74ed Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Mon, 7 Nov 2022 18:22:07 +0000 Subject: [PATCH 1602/4122] KVM: selftests: randomize which pages are written vs read Randomize which pages are written vs read using the random number generator. Change the variable wr_fract and associated function calls to write_percent that now operates as a percentage from 0 to 100 where X means each page has an X% chance of being written. Change the -f argument to -w to reflect the new variable semantics. Keep the same default of 100% writes. Population always uses 100% writes to ensure all memory is actually populated and not just mapped to the zero page. The prevents expensive copy-on-write faults from occurring during the dirty memory iterations below, which would pollute the performance results. Each vCPU calculates its own random seed by adding its index to the seed provided. Signed-off-by: Colton Lewis Reviewed-by: David Matlack Link: https://lore.kernel.org/r/20221107182208.479157-4-coltonlewis@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/access_tracking_perf_test.c | 2 +- .../selftests/kvm/dirty_log_perf_test.c | 37 +++++++++++++------ .../selftests/kvm/include/perf_test_util.h | 4 +- .../selftests/kvm/lib/perf_test_util.c | 13 ++++--- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index a81e7a7ae18f..c0cdf07de147 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -279,7 +279,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti static void access_memory(struct kvm_vm *vm, int nr_vcpus, enum access_type access, const char *description) { - perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); + perf_test_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, nr_vcpus, description); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index eb63ca12b519..e9ce50fe7295 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -128,10 +128,10 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; uint64_t phys_offset; - int wr_fract; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + uint32_t write_percent; uint32_t random_seed; }; @@ -228,7 +228,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Random seed: %u\n", p->random_seed); perf_test_set_random_seed(vm, p->random_seed); - perf_test_set_wr_fract(vm, p->wr_fract); + perf_test_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -251,6 +251,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) for (i = 0; i < nr_vcpus; i++) vcpu_last_completed_iteration[i] = -1; + /* + * Use 100% writes during the population phase to ensure all + * memory is actually populated and not just mapped to the zero + * page. The prevents expensive copy-on-write faults from + * occurring during the dirty memory iterations below, which + * would pollute the performance results. + */ + perf_test_set_write_percent(vm, 100); perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ @@ -272,6 +280,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); + perf_test_set_write_percent(vm, p->write_percent); + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs @@ -356,7 +366,7 @@ static void help(char *name) puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" - "[-x memslots] [-c physical cpus to run test on]\n", name); + "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); @@ -376,10 +386,6 @@ static void help(char *name) printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); - printf(" -f: specify the fraction of pages which should be written to\n" - " as opposed to simply read, in the form\n" - " 1/.\n" - " (default: 1 i.e. all pages are written to.)\n"); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); @@ -387,6 +393,11 @@ static void help(char *name) backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); + printf(" -w: specify the percentage of pages which should be written to\n" + " as an integer from 0-100 inclusive. This is probabalistic,\n" + " so -w X means each page has an X%% chance of writing\n" + " and a (100-X)%% chance of reading.\n" + " (default: 100 i.e. all pages are written to.)\n"); printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" " values (target pCPU), one for each vCPU, plus an optional\n" " entry for the main application task (specified via entry\n" @@ -408,11 +419,11 @@ int main(int argc, char *argv[]) const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, - .wr_fract = 1, .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, .random_seed = 1, + .write_percent = 100, }; int opt; @@ -423,7 +434,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:r:s:v:x:")) != -1) { + while ((opt = getopt(argc, argv, "b:c:eghi:m:nop:r:s:v:x:w:")) != -1) { switch (opt) { case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -435,9 +446,6 @@ int main(int argc, char *argv[]) /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; break; - case 'f': - p.wr_fract = atoi_positive("Write fraction", optarg); - break; case 'g': dirty_log_manual_caps = 0; break; @@ -470,6 +478,11 @@ int main(int argc, char *argv[]) TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; + case 'w': + p.write_percent = atoi_non_negative("Write percentage", optarg); + TEST_ASSERT(p.write_percent <= 100, + "Write percentage must be between 0 and 100"); + break; case 'x': p.slots = atoi_positive("Number of slots", optarg); break; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 5a0e48b625a2..6470ca0fec4c 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -36,7 +36,7 @@ struct perf_test_args { uint64_t size; uint64_t guest_page_size; uint32_t random_seed; - int wr_fract; + uint32_t write_percent; /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; @@ -56,7 +56,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, bool partition_vcpu_memory_access); void perf_test_destroy_vm(struct kvm_vm *vm); -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); +void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index d48ee4f604f0..15000f71cdee 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -48,10 +48,13 @@ void perf_test_guest_code(uint32_t vcpu_idx) { struct perf_test_args *pta = &perf_test_args; struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; + struct guest_random_state rand_state; uint64_t gva; uint64_t pages; int i; + rand_state = new_guest_random_state(pta->random_seed + vcpu_idx); + gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -62,7 +65,7 @@ void perf_test_guest_code(uint32_t vcpu_idx) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * pta->guest_page_size); - if (i % pta->wr_fract == 0) + if (guest_random_u32(&rand_state) % 100 < pta->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -123,7 +126,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); /* By default vCPUs will write to memory. */ - pta->wr_fract = 1; + pta->write_percent = 100; /* * Snapshot the non-huge page size. This is used by the guest code to @@ -225,10 +228,10 @@ void perf_test_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) { - perf_test_args.wr_fract = wr_fract; - sync_global_to_guest(vm, perf_test_args); + perf_test_args.write_percent = write_percent; + sync_global_to_guest(vm, perf_test_args.write_percent); } void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) From c967a4752ac66cc0ef8c0b1f4914151ca8758709 Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Mon, 7 Nov 2022 18:22:08 +0000 Subject: [PATCH 1603/4122] KVM: selftests: randomize page access order Create the ability to randomize page access order with the -a argument. This includes the possibility that the same pages may be hit multiple times during an iteration or not at all. Population has random access as false to ensure all pages will be touched by population and avoid page faults in late dirty memory that would pollute the test results. Signed-off-by: Colton Lewis Reviewed-by: David Matlack Link: https://lore.kernel.org/r/20221107182208.479157-5-coltonlewis@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 11 +++++++++-- .../selftests/kvm/include/perf_test_util.h | 3 +++ tools/testing/selftests/kvm/lib/perf_test_util.c | 15 ++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index e9ce50fe7295..47cbda3580fd 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -133,6 +133,7 @@ struct test_params { int slots; uint32_t write_percent; uint32_t random_seed; + bool random_access; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -259,6 +260,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) * would pollute the performance results. */ perf_test_set_write_percent(vm, 100); + perf_test_set_random_access(vm, false); perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ @@ -281,6 +283,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) ts_diff.tv_sec, ts_diff.tv_nsec); perf_test_set_write_percent(vm, p->write_percent); + perf_test_set_random_access(vm, p->random_access); while (iteration < p->iterations) { /* @@ -364,10 +367,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " + printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] " "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name); puts(""); + printf(" -a: access memory randomly rather than in order.\n"); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" @@ -434,8 +438,11 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "b:c:eghi:m:nop:r:s:v:x:w:")) != -1) { + while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) { switch (opt) { + case 'a': + p.random_access = true; + break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 6470ca0fec4c..75ca679059dc 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -40,6 +40,8 @@ struct perf_test_args { /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; + /* Randomize which pages are accessed by the guest. */ + bool random_access; /* True if all vCPUs are pinned to pCPUs */ bool pin_vcpus; /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ @@ -58,6 +60,7 @@ void perf_test_destroy_vm(struct kvm_vm *vm); void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access); void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); void perf_test_join_vcpu_threads(int vcpus); diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 15000f71cdee..3a9a3ea01a97 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -51,6 +51,8 @@ void perf_test_guest_code(uint32_t vcpu_idx) struct guest_random_state rand_state; uint64_t gva; uint64_t pages; + uint64_t addr; + uint64_t page; int i; rand_state = new_guest_random_state(pta->random_seed + vcpu_idx); @@ -63,7 +65,12 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + if (pta->random_access) + page = guest_random_u32(&rand_state) % pages; + else + page = i; + + addr = gva + (page * pta->guest_page_size); if (guest_random_u32(&rand_state) % 100 < pta->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; @@ -240,6 +247,12 @@ void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) sync_global_to_guest(vm, perf_test_args.random_seed); } +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access) +{ + perf_test_args.random_access = random_access; + sync_global_to_guest(vm, perf_test_args.random_access); +} + uint64_t __weak perf_test_nested_pages(int nr_vcpus) { return 0; From 9fda6753c9dd4594e2c66c56b48b56a326ee686f Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 12 Oct 2022 09:57:27 -0700 Subject: [PATCH 1604/4122] KVM: selftests: Rename perf_test_util.[ch] to memstress.[ch] Rename the perf_test_util.[ch] files to memstress.[ch]. Symbols are renamed in the following commit to reduce the amount of churn here in hopes of playiing nice with git's file rename detection. The name "memstress" was chosen to better describe the functionality proveded by this library, which is to create and run a VM that reads/writes to guest memory on all vCPUs in parallel. "memstress" also contains the same number of chracters as "perf_test", making it a drop-in replacement in symbols, e.g. function names, without impacting line lengths. Also the lack of underscore between "mem" and "stress" makes it clear "memstress" is a noun. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221012165729.3505266-2-dmatlack@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 4 ++-- tools/testing/selftests/kvm/access_tracking_perf_test.c | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 2 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- .../kvm/include/{perf_test_util.h => memstress.h} | 8 ++++---- .../selftests/kvm/lib/{perf_test_util.c => memstress.c} | 2 +- .../kvm/lib/x86_64/{perf_test_util.c => memstress.c} | 4 ++-- .../selftests/kvm/memslot_modification_stress_test.c | 4 ++-- 8 files changed, 14 insertions(+), 14 deletions(-) rename tools/testing/selftests/kvm/include/{perf_test_util.h => memstress.h} (91%) rename tools/testing/selftests/kvm/lib/{perf_test_util.c => memstress.c} (99%) rename tools/testing/selftests/kvm/lib/x86_64/{perf_test_util.c => memstress.c} (97%) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..a00253b79040 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -43,7 +43,7 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c -LIBKVM += lib/perf_test_util.c +LIBKVM += lib/memstress.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c @@ -52,7 +52,7 @@ LIBKVM_STRING += lib/string_override.c LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/perf_test_util.c +LIBKVM_x86_64 += lib/x86_64/memstress.c LIBKVM_x86_64 += lib/x86_64/processor.c LIBKVM_x86_64 += lib/x86_64/svm.c LIBKVM_x86_64 += lib/x86_64/ucall.c diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index c0cdf07de147..534d18cc4a6a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -44,7 +44,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" /* Global variable used to synchronize all of the vCPU threads. */ diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 0c98181fa248..37501e83d1d8 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -20,7 +20,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __NR_userfaultfd diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 47cbda3580fd..d2bac493da5d 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -16,7 +16,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __aarch64__ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/memstress.h similarity index 91% rename from tools/testing/selftests/kvm/include/perf_test_util.h rename to tools/testing/selftests/kvm/include/memstress.h index 75ca679059dc..64a523e06125 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* - * tools/testing/selftests/kvm/include/perf_test_util.h + * tools/testing/selftests/kvm/include/memstress.h * * Copyright (C) 2020, Google LLC. */ -#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H -#define SELFTEST_KVM_PERF_TEST_UTIL_H +#ifndef SELFTEST_KVM_MEMSTRESS_H +#define SELFTEST_KVM_MEMSTRESS_H #include @@ -69,4 +69,4 @@ void perf_test_guest_code(uint32_t vcpu_id); uint64_t perf_test_nested_pages(int nr_vcpus); void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); -#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ +#endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/memstress.c similarity index 99% rename from tools/testing/selftests/kvm/lib/perf_test_util.c rename to tools/testing/selftests/kvm/lib/memstress.c index 3a9a3ea01a97..72f88e5851dd 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -7,7 +7,7 @@ #include #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" struct perf_test_args perf_test_args; diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c similarity index 97% rename from tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c rename to tools/testing/selftests/kvm/lib/x86_64/memstress.c index 0f344a7c89c4..0bb717ac2cc5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to perf_test_util.c. + * x86_64-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -11,7 +11,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "vmx.h" diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 4bdfc910ba4d..0490bd4606e5 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -21,7 +21,7 @@ #include #include -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "test_util.h" #include "guest_modes.h" @@ -72,7 +72,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, int i; /* - * Add the dummy memslot just below the perf_test_util memslot, which is + * Add the dummy memslot just below the memstress memslot, which is * at the top of the guest physical address space. */ gpa = perf_test_args.gpa - pages * vm->page_size; From a008a3351feaffdcf97a2bcf90b789626585258b Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 12 Oct 2022 09:57:28 -0700 Subject: [PATCH 1605/4122] KVM: selftests: Rename pta (short for perf_test_args) to args Rename the local variables "pta" (which is short for perf_test_args) for args. "pta" is not an obvious acronym and using "args" mirrors "vcpu_args". Suggested-by: Sean Christopherson Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221012165729.3505266-3-dmatlack@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/memstress.c | 60 ++++++++++----------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 72f88e5851dd..255f77d86330 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -46,8 +46,8 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; */ void perf_test_guest_code(uint32_t vcpu_idx) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; + struct perf_test_args *args = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; uint64_t gva; uint64_t pages; @@ -55,7 +55,7 @@ void perf_test_guest_code(uint32_t vcpu_idx) uint64_t page; int i; - rand_state = new_guest_random_state(pta->random_seed + vcpu_idx); + rand_state = new_guest_random_state(args->random_seed + vcpu_idx); gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -65,14 +65,14 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - if (pta->random_access) + if (args->random_access) page = guest_random_u32(&rand_state) % pages; else page = i; - addr = gva + (page * pta->guest_page_size); + addr = gva + (page * args->guest_page_size); - if (guest_random_u32(&rand_state) % 100 < pta->write_percent) + if (guest_random_u32(&rand_state) % 100 < args->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -87,12 +87,12 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; + struct perf_test_args *args = &perf_test_args; struct perf_test_vcpu_args *vcpu_args; int i; for (i = 0; i < nr_vcpus; i++) { - vcpu_args = &pta->vcpu_args[i]; + vcpu_args = &args->vcpu_args[i]; vcpu_args->vcpu = vcpus[i]; vcpu_args->vcpu_idx = i; @@ -101,20 +101,20 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, vcpu_args->gva = guest_test_virt_mem + (i * vcpu_memory_bytes); vcpu_args->pages = vcpu_memory_bytes / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes); + args->guest_page_size; + vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes); } else { vcpu_args->gva = guest_test_virt_mem; vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa; + args->guest_page_size; + vcpu_args->gpa = args->gpa; } vcpu_args_set(vcpus[i], 1, i); pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", i, vcpu_args->gpa, vcpu_args->gpa + - (vcpu_args->pages * pta->guest_page_size)); + (vcpu_args->pages * args->guest_page_size)); } } @@ -123,7 +123,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; + struct perf_test_args *args = &perf_test_args; struct kvm_vm *vm; uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); @@ -133,20 +133,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); /* By default vCPUs will write to memory. */ - pta->write_percent = 100; + args->write_percent = 100; /* * Snapshot the non-huge page size. This is used by the guest code to * access/dirty pages at the logging granularity. */ - pta->guest_page_size = vm_guest_mode_params[mode].page_size; + args->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size); TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", @@ -156,7 +156,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * If using nested, allocate extra pages for the nested page tables and * in-memory data structures. */ - if (pta->nested) + if (args->nested) slot0_pages += perf_test_nested_pages(nr_vcpus); /* @@ -167,7 +167,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, perf_test_guest_code, vcpus); - pta->vm = vm; + args->vm = vm; /* Put the test region at the top guest physical memory. */ region_end_gfn = vm->max_gfn + 1; @@ -177,8 +177,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * When running vCPUs in L2, restrict the test region to 48 bits to * avoid needing 5-level page tables to identity map L2. */ - if (pta->nested) - region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); + if (args->nested) + region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size); #endif /* * If there should be more memory in the guest test region than there @@ -190,20 +190,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, " nr_vcpus: %d wss: %" PRIx64 "]\n", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); - pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size; - pta->gpa = align_down(pta->gpa, backing_src_pagesz); + args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; + args->gpa = align_down(args->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - pta->gpa = align_down(pta->gpa, 1 << 20); + args->gpa = align_down(args->gpa, 1 << 20); #endif - pta->size = guest_num_pages * pta->guest_page_size; + args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", - pta->gpa, pta->gpa + pta->size); + args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; + vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, PERF_TEST_MEM_SLOT_INDEX + i, @@ -211,12 +211,12 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages); perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); - if (pta->nested) { + if (args->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); perf_test_setup_nested(vm, nr_vcpus, vcpus); } From 7812d80c0f89c2b610558e09647736b6632beb08 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 12 Oct 2022 09:57:29 -0700 Subject: [PATCH 1606/4122] KVM: selftests: Rename perf_test_util symbols to memstress Replace the perf_test_ prefix on symbol names with memstress_ to match the new file name. "memstress" better describes the functionality proveded by this library, which is to provide functionality for creating and running a VM that stresses VM memory by reading and writing to guest memory on all vCPUs in parallel. "memstress" also contains the same number of chracters as "perf_test", making it a drop-in replacement in symbols, e.g. function names, without impacting line lengths. Also the lack of underscore between "mem" and "stress" makes it clear "memstress" is a noun. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221012165729.3505266-4-dmatlack@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/access_tracking_perf_test.c | 18 ++--- .../selftests/kvm/demand_paging_test.c | 18 ++--- .../selftests/kvm/dirty_log_perf_test.c | 34 +++++----- .../testing/selftests/kvm/include/memstress.h | 30 ++++---- tools/testing/selftests/kvm/lib/memstress.c | 68 +++++++++---------- .../selftests/kvm/lib/x86_64/memstress.c | 32 ++++----- .../kvm/memslot_modification_stress_test.c | 12 ++-- 7 files changed, 106 insertions(+), 106 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 534d18cc4a6a..02d3587cab0a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -126,7 +126,7 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) } static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct perf_test_vcpu_args *vcpu_args) + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -148,7 +148,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t gva = base_gva + page * memstress_args.guest_page_size; uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { @@ -220,10 +220,10 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vm *vm = memstress_args.vm; int vcpu_idx = vcpu_args->vcpu_idx; int current_iteration = 0; @@ -279,7 +279,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti static void access_memory(struct kvm_vm *vm, int nr_vcpus, enum access_type access, const char *description) { - perf_test_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); + memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, nr_vcpus, description); } @@ -303,10 +303,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int nr_vcpus = params->nr_vcpus; - vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, + vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); @@ -324,8 +324,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Set done to signal the vCPU threads to exit */ done = true; - perf_test_join_vcpu_threads(nr_vcpus); - perf_test_destroy_vm(vm); + memstress_join_vcpu_threads(nr_vcpus); + memstress_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 37501e83d1d8..3a977ddf07b2 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -42,7 +42,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -285,7 +285,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int r, i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -307,11 +307,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); for (i = 0; i < nr_vcpus; i++) { - struct perf_test_vcpu_args *vcpu_args; + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - vcpu_args = &perf_test_args.vcpu_args[i]; + vcpu_args = &memstress_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); @@ -329,17 +329,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds[i * 2], p->uffd_mode, p->uffd_delay, &uffd_args[i], vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + vcpu_args->pages * memstress_args.guest_page_size); } } pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); pr_info("All vCPU threads joined\n"); @@ -358,10 +358,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * nr_vcpus / + memstress_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); free(guest_data_prototype); if (p->uffd_mode) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d2bac493da5d..c33e89012ae6 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -67,7 +67,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -141,7 +141,7 @@ static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; vm_mem_region_set_flags(vm, slot, flags); @@ -163,7 +163,7 @@ static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); } @@ -175,7 +175,7 @@ static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); } @@ -223,13 +223,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, p->partition_vcpu_memory_access); pr_info("Random seed: %u\n", p->random_seed); - perf_test_set_random_seed(vm, p->random_seed); - perf_test_set_write_percent(vm, p->write_percent); + memstress_set_random_seed(vm, p->random_seed); + memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -259,9 +259,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) * occurring during the dirty memory iterations below, which * would pollute the performance results. */ - perf_test_set_write_percent(vm, 100); - perf_test_set_random_access(vm, false); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_set_write_percent(vm, 100); + memstress_set_random_access(vm, false); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -282,8 +282,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - perf_test_set_write_percent(vm, p->write_percent); - perf_test_set_random_access(vm, p->random_access); + memstress_set_write_percent(vm, p->write_percent); + memstress_set_random_access(vm, p->random_access); while (iteration < p->iterations) { /* @@ -345,7 +345,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) * wait for them to exit. */ host_quit = true; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -361,7 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free_bitmaps(bitmaps, p->slots); arch_cleanup_vm(vm); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) @@ -466,7 +466,7 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'n': - perf_test_args.nested = true; + memstress_args.nested = true; break; case 'o': p.partition_vcpu_memory_access = false; @@ -500,9 +500,9 @@ int main(int argc, char *argv[]) } if (pcpu_list) { - kvm_parse_vcpu_pinning(pcpu_list, perf_test_args.vcpu_to_pcpu, + kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu, nr_vcpus); - perf_test_args.pin_vcpus = true; + memstress_args.pin_vcpus = true; } TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index 64a523e06125..bbd2a302df10 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -17,9 +17,9 @@ #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -#define PERF_TEST_MEM_SLOT_INDEX 1 +#define MEMSTRESS_MEM_SLOT_INDEX 1 -struct perf_test_vcpu_args { +struct memstress_vcpu_args { uint64_t gpa; uint64_t gva; uint64_t pages; @@ -29,7 +29,7 @@ struct perf_test_vcpu_args { int vcpu_idx; }; -struct perf_test_args { +struct memstress_args { struct kvm_vm *vm; /* The starting address and size of the guest test region. */ uint64_t gpa; @@ -47,26 +47,26 @@ struct perf_test_args { /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; - struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; + struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -extern struct perf_test_args perf_test_args; +extern struct memstress_args memstress_args; -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access); -void perf_test_destroy_vm(struct kvm_vm *vm); +void memstress_destroy_vm(struct kvm_vm *vm); -void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); -void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); -void perf_test_set_random_access(struct kvm_vm *vm, bool random_access); +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); +void memstress_set_random_access(struct kvm_vm *vm, bool random_access); -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); -void perf_test_join_vcpu_threads(int vcpus); -void perf_test_guest_code(uint32_t vcpu_id); +void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); +void memstress_join_vcpu_threads(int vcpus); +void memstress_guest_code(uint32_t vcpu_id); -uint64_t perf_test_nested_pages(int nr_vcpus); -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); +uint64_t memstress_nested_pages(int nr_vcpus); +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); #endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 255f77d86330..503da78c558d 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -10,7 +10,7 @@ #include "memstress.h" #include "processor.h" -struct perf_test_args perf_test_args; +struct memstress_args memstress_args; /* * Guest virtual memory offset of the testing memory slot. @@ -33,7 +33,7 @@ struct vcpu_thread { static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; /* The function run by each vCPU thread, as provided by the test. */ -static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); +static void (*vcpu_thread_fn)(struct memstress_vcpu_args *); /* Set to true once all vCPU threads are up and running. */ static bool all_vcpu_threads_running; @@ -44,10 +44,10 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void perf_test_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(uint32_t vcpu_idx) { - struct perf_test_args *args = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; uint64_t gva; uint64_t pages; @@ -82,13 +82,13 @@ void perf_test_guest_code(uint32_t vcpu_idx) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, +void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { - struct perf_test_args *args = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args; int i; for (i = 0; i < nr_vcpus; i++) { @@ -118,12 +118,12 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, } } -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { - struct perf_test_args *args = &perf_test_args; + struct memstress_args *args = &memstress_args; struct kvm_vm *vm; uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); @@ -157,7 +157,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * in-memory data structures. */ if (args->nested) - slot0_pages += perf_test_nested_pages(nr_vcpus); + slot0_pages += memstress_nested_pages(nr_vcpus); /* * Pass guest_num_pages to populate the page tables for test memory. @@ -165,7 +165,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * effect as KVM allows aliasing HVAs in meslots. */ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, - perf_test_guest_code, vcpus); + memstress_guest_code, vcpus); args->vm = vm; @@ -206,59 +206,59 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, - PERF_TEST_MEM_SLOT_INDEX + i, + MEMSTRESS_MEM_SLOT_INDEX + i, region_pages, 0); } /* Do mapping for the demand paging memory slot */ virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages); - perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, + memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); if (args->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); - perf_test_setup_nested(vm, nr_vcpus, vcpus); + memstress_setup_nested(vm, nr_vcpus, vcpus); } ucall_init(vm, NULL); /* Export the shared variables to the guest. */ - sync_global_to_guest(vm, perf_test_args); + sync_global_to_guest(vm, memstress_args); return vm; } -void perf_test_destroy_vm(struct kvm_vm *vm) +void memstress_destroy_vm(struct kvm_vm *vm) { ucall_uninit(vm); kvm_vm_free(vm); } -void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) { - perf_test_args.write_percent = write_percent; - sync_global_to_guest(vm, perf_test_args.write_percent); + memstress_args.write_percent = write_percent; + sync_global_to_guest(vm, memstress_args.write_percent); } -void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) { - perf_test_args.random_seed = random_seed; - sync_global_to_guest(vm, perf_test_args.random_seed); + memstress_args.random_seed = random_seed; + sync_global_to_guest(vm, memstress_args.random_seed); } -void perf_test_set_random_access(struct kvm_vm *vm, bool random_access) +void memstress_set_random_access(struct kvm_vm *vm, bool random_access) { - perf_test_args.random_access = random_access; - sync_global_to_guest(vm, perf_test_args.random_access); + memstress_args.random_access = random_access; + sync_global_to_guest(vm, memstress_args.random_access); } -uint64_t __weak perf_test_nested_pages(int nr_vcpus) +uint64_t __weak memstress_nested_pages(int nr_vcpus) { return 0; } -void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) +void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) { pr_info("%s() not support on this architecture, skipping.\n", __func__); exit(KSFT_SKIP); @@ -269,8 +269,8 @@ static void *vcpu_thread_main(void *data) struct vcpu_thread *vcpu = data; int vcpu_idx = vcpu->vcpu_idx; - if (perf_test_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(perf_test_args.vcpu_to_pcpu[vcpu_idx]); + if (memstress_args.pin_vcpus) + kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -283,13 +283,13 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu_idx]); + vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]); return NULL; } -void perf_test_start_vcpu_threads(int nr_vcpus, - void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void memstress_start_vcpu_threads(int nr_vcpus, + void (*vcpu_fn)(struct memstress_vcpu_args *)) { int i; @@ -313,7 +313,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus, WRITE_ONCE(all_vcpu_threads_running, true); } -void perf_test_join_vcpu_threads(int nr_vcpus) +void memstress_join_vcpu_threads(int nr_vcpus) { int i; diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c index 0bb717ac2cc5..2b3b47e4a973 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -15,21 +15,21 @@ #include "processor.h" #include "vmx.h" -void perf_test_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(uint64_t vcpu_id) { - perf_test_guest_code(vcpu_id); + memstress_guest_code(vcpu_id); vmcall(); } -extern char perf_test_l2_guest_entry[]; +extern char memstress_l2_guest_entry[]; __asm__( -"perf_test_l2_guest_entry:" +"memstress_l2_guest_entry:" " mov (%rsp), %rdi;" -" call perf_test_l2_guest_code;" +" call memstress_l2_guest_code;" " ud2;" ); -static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -42,14 +42,14 @@ static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; *rsp = vcpu_id; - prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); + prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t perf_test_nested_pages(int nr_vcpus) +uint64_t memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,7 +59,7 @@ uint64_t perf_test_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; @@ -72,12 +72,12 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) */ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); - start = align_down(perf_test_args.gpa, PG_SIZE_1G); - end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); + start = align_down(memstress_args.gpa, PG_SIZE_1G); + end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); nested_identity_map_1g(vmx, vm, start, end - start); } -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; @@ -90,7 +90,7 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc vmx = vcpu_alloc_vmx(vm, &vmx_gva); if (vcpu_id == 0) { - perf_test_setup_ept(vmx, vm); + memstress_setup_ept(vmx, vm); vmx0 = vmx; } else { /* Share the same EPT table across all vCPUs. */ @@ -100,11 +100,11 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc } /* - * Override the vCPU to run perf_test_l1_guest_code() which will - * bounce it into L2 before calling perf_test_guest_code(). + * Override the vCPU to run memstress_l1_guest_code() which will + * bounce it into L2 before calling memstress_guest_code(). */ vcpu_regs_get(vcpus[vcpu_id], ®s); - regs.rip = (unsigned long) perf_test_l1_guest_code; + regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 0490bd4606e5..d07e921bfcc5 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -36,7 +36,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus = true; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_run *run; @@ -75,7 +75,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, * Add the dummy memslot just below the memstress memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm->page_size; + gpa = memstress_args.gpa - pages * vm->page_size; for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -97,13 +97,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); @@ -111,10 +111,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) run_vcpus = false; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) From c4d9d95f84d7f4758db76f358b03a0d6809c929b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:13 -0800 Subject: [PATCH 1607/4122] perf cpumap: Tidy libperf includes Use public API when possible, don't include internal API in header files in evsel.h. Fix any related breakages. Committer note: There was one missing case, when building for arm64: arch/arm64/util/pmu.c: In function 'pmu_events_table__find': arch/arm64/util/pmu.c:18:30: error: invalid use of undefined type 'struct perf_cpu_map' 18 | if (pmu->cpus->nr != cpu__max_cpu().cpu) | ^~ Fix it by adding one more exception, including Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 1 + tools/perf/tests/cpumap.c | 2 +- tools/perf/util/auxtrace.h | 2 +- tools/perf/util/cpumap.c | 1 + tools/perf/util/cpumap.h | 2 +- tools/perf/util/evsel.h | 2 -- 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index f849b1e88d43..477e513972a4 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include "../../../util/cpumap.h" #include "../../../util/pmu.h" diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7c873c6ae3eb..3150fc1fed6f 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -6,7 +6,7 @@ #include "util/synthetic-events.h" #include #include -#include +#include #include "debug.h" struct machine; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 6a0f9b98f059..2cf63d377831 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6e3fcf523de9..5e564974fba4 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -12,6 +12,7 @@ #include #include +#include static struct perf_cpu max_cpu_num; static struct perf_cpu max_present_cpu_num; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index da28b3146ef9..c2f5824a3a22 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -4,8 +4,8 @@ #include #include -#include #include +#include /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 467bb0b32fef..f3485799ddf9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -10,8 +10,6 @@ #include #include #include "symbol_conf.h" -#include -#include struct bpf_object; struct cgroup; From 746bd29e348f99b44c14cb2b2a57f1d3efb66538 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 15 Nov 2022 23:22:11 -0800 Subject: [PATCH 1608/4122] perf build: Use tools/lib headers from install path Switch -I from tools/lib to the install path for the tools/lib libraries. Add the include_headers build targets to prepare target, as well as pmu-events.c compilation that dependes on libperf. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-15-irogers@google.com Link: http://lore.kernel.org/lkml/20221116072211.2837834-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 -- tools/perf/Makefile.perf | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a7f6c0669fae..9cc3c48f3288 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -349,7 +349,6 @@ ifeq ($(DEBUG),0) endif endif -INC_FLAGS += -I$(srctree)/tools/lib/perf/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/ @@ -367,7 +366,6 @@ endif INC_FLAGS += -I$(src-perf)/util INC_FLAGS += -I$(src-perf) -INC_FLAGS += -I$(srctree)/tools/lib/ CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6c1a2a3ccc38..8c0df762fb02 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -305,6 +305,7 @@ LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list +CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include # # The static build has no dynsym table, so this does not work for @@ -322,6 +323,7 @@ LIBAPI_DESTDIR = $(LIBAPI_OUTPUT) LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include LIBAPI = $(LIBAPI_OUTPUT)/libapi.a export LIBAPI +CFLAGS += -I$(LIBAPI_OUTPUT)/include ifneq ($(OUTPUT),) LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf @@ -331,6 +333,7 @@ endif LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a +CFLAGS += -I$(LIBBPF_OUTPUT)/include ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -340,6 +343,7 @@ endif LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a +CFLAGS += -I$(LIBSUBCMD_OUTPUT)/include ifneq ($(OUTPUT),) LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol @@ -349,6 +353,7 @@ endif LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT) LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a +CFLAGS += -I$(LIBSYMBOL_OUTPUT)/include ifneq ($(OUTPUT),) LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf @@ -359,6 +364,7 @@ LIBPERF_DESTDIR = $(LIBPERF_OUTPUT) LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include LIBPERF = $(LIBPERF_OUTPUT)/libperf.a export LIBPERF +CFLAGS += -I$(LIBPERF_OUTPUT)/include # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -691,14 +697,14 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf -$(PMU_EVENTS_IN): FORCE +$(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): FORCE prepare $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -774,6 +780,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(rename_flags_array) \ $(arch_errno_name_array) \ $(sync_file_range_arrays) \ + $(LIBAPI) \ + $(LIBBPF) \ + $(LIBPERF) \ + $(LIBSUBCMD) \ + $(LIBSYMBOL) \ + $(LIBTRACEEVENT) \ bpf-skel $(OUTPUT)%.o: %.c prepare FORCE From 35fef9b471c70413f8277984920129ddf601f5e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Nov 2022 15:40:46 -0300 Subject: [PATCH 1609/4122] libperf: Remove recursive perf/cpumap.h include from perf/cpumap.h It just hits the header guard, becoming a no-op, ditch it. Acked-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 03aceb72a783..98e463ec15a7 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,7 +3,6 @@ #define __LIBPERF_CPUMAP_H #include -#include #include #include From 63a3bf5e8d9e79ce456c8f73d4395a5a51d841b1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Nov 2022 15:43:34 -0300 Subject: [PATCH 1610/4122] libperf: Add missing 'struct perf_cpu_map' forward declaration to perf/cpumap.h The perf/cpumap.h header is getting the 'struct perf_cpu_map' forward declaration by luck, add it. Acked-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 98e463ec15a7..3f43f770cdac 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -11,6 +11,8 @@ struct perf_cpu { int cpu; }; +struct perf_cpu_map; + LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); From 86fdd15e10e404e70ecb2a3bff24d70356d42b36 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 15 Nov 2022 10:37:12 +0800 Subject: [PATCH 1611/4122] iio: fix memory leak in iio_device_register_eventset() When iio_device_register_sysfs_group() returns failed, iio_device_register_eventset() needs to free attrs array. Otherwise, kmemleak would scan & report memory leak as below: unreferenced object 0xffff88810a1cc3c0 (size 32): comm "100-i2c-vcnl302", pid 728, jiffies 4295052307 (age 156.027s) backtrace: __kmalloc+0x46/0x1b0 iio_device_register_eventset at drivers/iio/industrialio-event.c:541 __iio_device_register at drivers/iio/industrialio-core.c:1959 __devm_iio_device_register at drivers/iio/industrialio-core.c:2040 Fixes: 32f171724e5c ("iio: core: rework iio device group creation") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20221115023712.3726854-1-zengheng4@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index 3d78da2531a9..727e2ef66aa4 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -556,7 +556,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &ev_int->group); if (ret) - goto error_free_setup_event_lines; + goto error_free_group_attrs; ev_int->ioctl_handler.ioctl = iio_event_ioctl; iio_device_ioctl_handler_register(&iio_dev_opaque->indio_dev, @@ -564,6 +564,8 @@ int iio_device_register_eventset(struct iio_dev *indio_dev) return 0; +error_free_group_attrs: + kfree(ev_int->group.attrs); error_free_setup_event_lines: iio_free_chan_devattr_list(&ev_int->dev_attr_list); kfree(ev_int); From a4ff8e7a71601321f7bf7b58ede664dc0d774274 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 16 Nov 2022 09:56:37 +0800 Subject: [PATCH 1612/4122] PCI/DOE: Fix maximum data object length miscalculation Per PCIe r6.0, sec 6.30.1, a data object Length of 0x0 indicates 2^18 DWORDs (256K DW or 1MB) being transferred. Adjust the value of data object length for this case on both sending side and receiving side. Don't bother checking whether Length is greater than SZ_1M because all values of the 18-bit Length field are valid, and it is impossible to represent anything larger than SZ_1M: 0x00000 256K DW (1M bytes) 0x00001 1 DW (4 bytes) ... 0x3ffff 256K-1 DW (1M - 4 bytes) [bhelgaas: commit log] Link: https://lore.kernel.org/r/20221116015637.3299664-1-ming4.li@intel.com Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Signed-off-by: Li Ming Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.0+ --- drivers/pci/doe.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index e402f05068a5..66d9ab288646 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -29,6 +29,9 @@ #define PCI_DOE_FLAG_CANCEL 0 #define PCI_DOE_FLAG_DEAD 1 +/* Max data object length is 2^18 dwords */ +#define PCI_DOE_MAX_LENGTH (1 << 18) + /** * struct pci_doe_mb - State for a single DOE mailbox * @@ -107,6 +110,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, { struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; + size_t length; u32 val; int i; @@ -123,15 +127,20 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; + /* Length is 2 DW of header + length of payload in DW */ + length = 2 + task->request_pl_sz / sizeof(u32); + if (length > PCI_DOE_MAX_LENGTH) + return -EIO; + if (length == PCI_DOE_MAX_LENGTH) + length = 0; + /* Write DOE Header */ val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) | FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type); pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val); - /* Length is 2 DW of header + length of payload in DW */ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, - 2 + task->request_pl_sz / - sizeof(u32))); + length)); for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, task->request_pl[i]); @@ -178,7 +187,10 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0); length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val); - if (length > SZ_1M || length < 2) + /* A value of 0x0 indicates max data object length */ + if (!length) + length = PCI_DOE_MAX_LENGTH; + if (length < 2) return -EIO; /* First 2 dwords have already been read */ From 60cfac17d0a1c28cd41959e95ba1e0ecc47165e7 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 1 Mar 2022 14:12:20 +0100 Subject: [PATCH 1613/4122] rtc: pcf8563: clear RTC_FEATURE_ALARM if no irq If there is no IRQ hooked up, clear RTC_FEATURE_ALARM to make the core ensure that userspace is made aware that alarms are not supported. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220301131220.4011810-1-vincent.whitchurch@axis.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 11fa9788558b..0a7fd9478465 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -567,6 +567,8 @@ static int pcf8563_probe(struct i2c_client *client) client->irq); return err; } + } else { + clear_bit(RTC_FEATURE_ALARM, pcf8563->rtc->features); } err = devm_rtc_register_device(pcf8563->rtc); From eec79501cce6e8965e92174760c6a9e92d78a038 Mon Sep 17 00:00:00 2001 From: Riwen Lu Date: Wed, 10 Aug 2022 15:01:09 +0800 Subject: [PATCH 1614/4122] rtc: efi: Add wakeup support Add wakeup support for rtc-efi, so we can wakeup from S3/S4/S5 through rtcwake. Signed-off-by: Riwen Lu Link: https://lore.kernel.org/r/TYWP286MB260191455377CEBD2336557EB1659@TYWP286MB2601.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 11850c2880ad..e991cccdb6e9 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -271,6 +271,8 @@ static int __init efi_rtc_probe(struct platform_device *dev) clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features); + device_init_wakeup(&dev->dev, true); + return devm_rtc_register_device(rtc); } From fe0157ba679dc95407dd5eae6550a4ceaea75040 Mon Sep 17 00:00:00 2001 From: paulmn Date: Mon, 29 Aug 2022 14:46:39 +0200 Subject: [PATCH 1615/4122] rtc: pcf8523: fix for stop bit Bugfix for an issue detected when a goldcap capacitor gets fully discharged due to a long absence of the power supply, and then recharges again. The RTC failed to continue to keep the real-time clock. This was caused by the incorrect handling of the STOP bit in the RTC internal register. This fix solves the problem. Signed-off-by: paulmn Link: https://lore.kernel.org/r/20220829124639.10906-1-paulmn@axis.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8523.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 6174b3fd4b98..92de99f11a7a 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -99,24 +99,24 @@ static irqreturn_t pcf8523_irq(int irq, void *dev_id) static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct pcf8523 *pcf8523 = dev_get_drvdata(dev); - u8 regs[7]; + u8 regs[10]; int err; - err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_SECONDS, regs, + err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_CONTROL1, regs, sizeof(regs)); if (err < 0) return err; - if (regs[0] & PCF8523_SECONDS_OS) + if ((regs[0] & PCF8523_CONTROL1_STOP) || (regs[3] & PCF8523_SECONDS_OS)) return -EINVAL; - tm->tm_sec = bcd2bin(regs[0] & 0x7f); - tm->tm_min = bcd2bin(regs[1] & 0x7f); - tm->tm_hour = bcd2bin(regs[2] & 0x3f); - tm->tm_mday = bcd2bin(regs[3] & 0x3f); - tm->tm_wday = regs[4] & 0x7; - tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1; - tm->tm_year = bcd2bin(regs[6]) + 100; + tm->tm_sec = bcd2bin(regs[3] & 0x7f); + tm->tm_min = bcd2bin(regs[4] & 0x7f); + tm->tm_hour = bcd2bin(regs[5] & 0x3f); + tm->tm_mday = bcd2bin(regs[6] & 0x3f); + tm->tm_wday = regs[7] & 0x7; + tm->tm_mon = bcd2bin(regs[8] & 0x1f) - 1; + tm->tm_year = bcd2bin(regs[9]) + 100; return 0; } From a6ceee26fd5ed9b5bd37322b1ca88e4548cee4a3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 21 Sep 2022 09:41:41 +0200 Subject: [PATCH 1616/4122] rtc: pcf85063: Fix reading alarm If the alarms are disabled the topmost bit (AEN_*) is set in the alarm registers. This is also interpreted in BCD number leading to this warning: rtc rtc0: invalid alarm value: 2022-09-21T80:80:80 Fix this by masking alarm enabling and reserved bits. Fixes: 05cb3a56ee8c ("rtc: pcf85063: add alarm support") Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20220921074141.3903104-1-alexander.stein@ew.tq-group.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 095891999da1..99f9cc57c7b3 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -169,10 +169,10 @@ static int pcf85063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (ret) return ret; - alrm->time.tm_sec = bcd2bin(buf[0]); - alrm->time.tm_min = bcd2bin(buf[1]); - alrm->time.tm_hour = bcd2bin(buf[2]); - alrm->time.tm_mday = bcd2bin(buf[3]); + alrm->time.tm_sec = bcd2bin(buf[0] & 0x7f); + alrm->time.tm_min = bcd2bin(buf[1] & 0x7f); + alrm->time.tm_hour = bcd2bin(buf[2] & 0x3f); + alrm->time.tm_mday = bcd2bin(buf[3] & 0x3f); ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &val); if (ret) From 9a48b4a6fd512bdaed7e38ba844be743163d49c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:13 -0800 Subject: [PATCH 1617/4122] xfs: fully initialize xfs_da_args in xchk_directory_blocks While running the online fsck test suite, I noticed the following assertion in the kernel log (edited for brevity): XFS: Assertion failed: 0, file: fs/xfs/xfs_health.c, line: 571 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 11667 at fs/xfs/xfs_message.c:104 assfail+0x46/0x4a [xfs] CPU: 3 PID: 11667 Comm: xfs_scrub Tainted: G W 5.19.0-rc7-xfsx #rc7 6e6475eb29fd9dda3181f81b7ca7ff961d277a40 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:assfail+0x46/0x4a [xfs] Call Trace: xfs_dir2_isblock+0xcc/0xe0 xchk_directory_blocks+0xc7/0x420 xchk_directory+0x53/0xb0 xfs_scrub_metadata+0x2b6/0x6b0 xfs_scrubv_metadata+0x35e/0x4d0 xfs_ioc_scrubv_metadata+0x111/0x160 xfs_file_ioctl+0x4ec/0xef0 __x64_sys_ioctl+0x82/0xa0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 This assertion triggers in xfs_dirattr_mark_sick when the caller passes in a whichfork value that is neither of XFS_{DATA,ATTR}_FORK. The cause of this is that xchk_directory_blocks only partially initializes the xfs_da_args structure that is passed to xfs_dir2_isblock. If the data fork is not correct, the XFS_IS_CORRUPT clause will trigger. My development branch reports this failure to the health monitoring subsystem, which accesses the uninitialized args->whichfork field, leading the the assertion tripping. We really shouldn't be passing random stack contents around, so the solution here is to force the compiler to zero-initialize the struct. Found by fuzzing u3.bmx[0].blockcount = middlebit on xfs/1554. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 5c87800ab223..d1b0f23c2c59 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -666,7 +666,12 @@ xchk_directory_blocks( struct xfs_scrub *sc) { struct xfs_bmbt_irec got; - struct xfs_da_args args; + struct xfs_da_args args = { + .dp = sc ->ip, + .whichfork = XFS_DATA_FORK, + .geo = sc->mp->m_dir_geo, + .trans = sc->tp, + }; struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); struct xfs_mount *mp = sc->mp; xfs_fileoff_t leaf_lblk; @@ -689,9 +694,6 @@ xchk_directory_blocks( free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET); /* Is this a block dir? */ - args.dp = sc->ip; - args.geo = mp->m_dir_geo; - args.trans = sc->tp; error = xfs_dir2_isblock(&args, &is_block); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; From be1317fdb8d4e3ccbac43e199b360c248c600d99 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 1618/4122] xfs: don't track the AGFL buffer in the scrub AG context While scrubbing an allocation group, we don't need to hold the AGFL buffer as part of the scrub context. All that is necessary to lock an AG is to hold the AGI and AGF buffers, so fix all the existing users of the AGFL buffer to grab them only when necessary. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 47 +++++++++++++++++++++------------- fs/xfs/scrub/agheader_repair.c | 1 - fs/xfs/scrub/common.c | 8 ------ fs/xfs/scrub/repair.c | 11 ++++---- fs/xfs/scrub/scrub.h | 1 - 5 files changed, 35 insertions(+), 33 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index b7b838bd4ba4..af284baa6f4c 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -609,9 +609,16 @@ out: /* AGFL */ struct xchk_agfl_info { - unsigned int sz_entries; + /* Number of AGFL entries that the AGF claims are in use. */ + unsigned int agflcount; + + /* Number of AGFL entries that we found. */ unsigned int nr_entries; + + /* Buffer to hold AGFL entries for extent checking. */ xfs_agblock_t *entries; + + struct xfs_buf *agfl_bp; struct xfs_scrub *sc; }; @@ -641,10 +648,10 @@ xchk_agfl_block( struct xfs_scrub *sc = sai->sc; if (xfs_verify_agbno(sc->sa.pag, agbno) && - sai->nr_entries < sai->sz_entries) + sai->nr_entries < sai->agflcount) sai->entries[sai->nr_entries++] = agbno; else - xchk_block_set_corrupt(sc, sc->sa.agfl_bp); + xchk_block_set_corrupt(sc, sai->agfl_bp); xchk_agfl_block_xref(sc, agbno); @@ -696,19 +703,26 @@ int xchk_agfl( struct xfs_scrub *sc) { - struct xchk_agfl_info sai; + struct xchk_agfl_info sai = { + .sc = sc, + }; struct xfs_agf *agf; xfs_agnumber_t agno = sc->sm->sm_agno; - unsigned int agflcount; unsigned int i; int error; + /* Lock the AGF and AGI so that nobody can touch this AG. */ error = xchk_ag_read_headers(sc, agno, &sc->sa); if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) - goto out; + return error; if (!sc->sa.agf_bp) return -EFSCORRUPTED; - xchk_buffer_recheck(sc, sc->sa.agfl_bp); + + /* Try to read the AGFL, and verify its structure if we get it. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &sai.agfl_bp); + if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) + return error; + xchk_buffer_recheck(sc, sai.agfl_bp); xchk_agfl_xref(sc); @@ -717,24 +731,21 @@ xchk_agfl( /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = sc->sa.agf_bp->b_addr; - agflcount = be32_to_cpu(agf->agf_flcount); - if (agflcount > xfs_agfl_size(sc->mp)) { + sai.agflcount = be32_to_cpu(agf->agf_flcount); + if (sai.agflcount > xfs_agfl_size(sc->mp)) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } - memset(&sai, 0, sizeof(sai)); - sai.sc = sc; - sai.sz_entries = agflcount; - sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, - KM_MAYFAIL); + sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t), + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!sai.entries) { error = -ENOMEM; goto out; } /* Check the blocks in the AGFL. */ - error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, - sc->sa.agfl_bp, xchk_agfl_block, &sai); + error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sai.agfl_bp, + xchk_agfl_block, &sai); if (error == -ECANCELED) { error = 0; goto out_free; @@ -742,7 +753,7 @@ xchk_agfl( if (error) goto out_free; - if (agflcount != sai.nr_entries) { + if (sai.agflcount != sai.nr_entries) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out_free; } @@ -758,7 +769,7 @@ xchk_agfl( } out_free: - kmem_free(sai.entries); + kvfree(sai.entries); out: return error; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 1b0b4e243f77..2e75ff9b5b2e 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -697,7 +697,6 @@ xrep_agfl( * freespace overflow to the freespace btrees. */ sc->sa.agf_bp = agf_bp; - sc->sa.agfl_bp = agfl_bp; error = xrep_roll_ag_trans(sc); if (error) goto err; diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 9bbbf20f401b..ad70f29233c3 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -424,10 +424,6 @@ xchk_ag_read_headers( if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) return error; - error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - return error; - return 0; } @@ -515,10 +511,6 @@ xchk_ag_free( struct xchk_ag *sa) { xchk_ag_btcur_free(sa); - if (sa->agfl_bp) { - xfs_trans_brelse(sc->tp, sa->agfl_bp); - sa->agfl_bp = NULL; - } if (sa->agf_bp) { xfs_trans_brelse(sc->tp, sa->agf_bp); sa->agf_bp = NULL; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index c18bd039fce9..2ada7fc1c398 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -126,8 +126,6 @@ xrep_roll_ag_trans( xfs_trans_bhold(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) xfs_trans_bhold(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bhold(sc->tp, sc->sa.agfl_bp); /* * Roll the transaction. We still own the buffer and the buffer lock @@ -145,8 +143,6 @@ xrep_roll_ag_trans( xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp); return 0; } @@ -498,6 +494,7 @@ xrep_put_freelist( struct xfs_scrub *sc, xfs_agblock_t agbno) { + struct xfs_buf *agfl_bp; int error; /* Make sure there's space on the freelist. */ @@ -516,8 +513,12 @@ xrep_put_freelist( return error; /* Put the block on the AGFL. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); + if (error) + return error; + error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, - sc->sa.agfl_bp, agbno, 0); + agfl_bp, agbno, 0); if (error) return error; xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 3de5287e98d8..151567f88366 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -39,7 +39,6 @@ struct xchk_ag { /* AG btree roots */ struct xfs_buf *agf_bp; - struct xfs_buf *agfl_bp; struct xfs_buf *agi_bp; /* AG btrees */ From 3e59c0103e66d6e687a8b47fd70169542aba938e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 1619/4122] xfs: log the AGI/AGF buffers when rolling transactions during an AG repair Currently, the only way to lock an allocation group is to hold the AGI and AGF buffers. If a repair needs to roll the transaction while repairing some AG metadata, it maintains that lock by holding the two buffers across the transaction roll and joins them afterwards. However, repair is not like other parts of XFS that employ the bhold - roll - bjoin sequence because it's possible that the AGI or AGF buffers are not actually dirty before the roll. This presents two problems -- First, we need to redirty those buffers to keep them moving along in the log to avoid pinning the log tail. Second, a clean buffer log item can detach from the buffer. If this happens, the buffer type state is discarded along with the bli and must be reattached before the next time the buffer is logged. If it is not, the logging code will complain and log recovery will not work properly. An earlier version of this patch tried to fix the second problem by re-setting the buffer type in the bli after joining the buffer to the new transaction, but that looked weird and didn't solve the first problem. Instead, solve both problems by logging the buffer before rolling the transaction. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/repair.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 2ada7fc1c398..22335619c84e 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -121,24 +121,36 @@ xrep_roll_ag_trans( { int error; - /* Keep the AG header buffers locked so we can keep going. */ - if (sc->sa.agi_bp) + /* + * Keep the AG header buffers locked while we roll the transaction. + * Ensure that both AG buffers are dirty and held when we roll the + * transaction so that they move forward in the log without losing the + * bli (and hence the bli type) when the transaction commits. + * + * Normal code would never hold clean buffers across a roll, but repair + * needs both buffers to maintain a total lock on the AG. + */ + if (sc->sa.agi_bp) { + xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agi_bp); - if (sc->sa.agf_bp) + } + + if (sc->sa.agf_bp) { + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agf_bp); + } /* - * Roll the transaction. We still own the buffer and the buffer lock - * regardless of whether or not the roll succeeds. If the roll fails, - * the buffers will be released during teardown on our way out of the - * kernel. If it succeeds, we join them to the new transaction and - * move on. + * Roll the transaction. We still hold the AG header buffers locked + * regardless of whether or not that succeeds. On failure, the buffers + * will be released during teardown on our way out of the kernel. If + * successful, join the buffers to the new transaction and move on. */ error = xfs_trans_roll(&sc->tp); if (error) return error; - /* Join AG headers to the new transaction. */ + /* Join the AG headers to the new transaction. */ if (sc->sa.agi_bp) xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) From 48ff40458f871fb19e7b1b40e0e5084b8751d9cb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:15 -0800 Subject: [PATCH 1620/4122] xfs: standardize GFP flags usage in online scrub Memory allocation usage is the same throughout online fsck -- we want kernel memory, we have to be able to back out if we can't allocate memory, and we don't want to spray dmesg with memory allocation failure reports. Standardize the GFP flag usage and document these requirements. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 2 +- fs/xfs/scrub/attr.c | 9 ++++----- fs/xfs/scrub/scrub.h | 9 +++++++++ fs/xfs/scrub/symlink.c | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index af284baa6f4c..4dd52b15f09c 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -737,7 +737,7 @@ xchk_agfl( goto out; } sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t), - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + XCHK_GFP_FLAGS); if (!sai.entries) { error = -ENOMEM; goto out; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index b6f0c9f3f124..11b2593a2be7 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -79,7 +79,8 @@ xchk_setup_xattr( * without the inode lock held, which means we can sleep. */ if (sc->flags & XCHK_TRY_HARDER) { - error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL); + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, + XCHK_GFP_FLAGS); if (error) return error; } @@ -138,8 +139,7 @@ xchk_xattr_listent( * doesn't work, we overload the seen_enough variable to convey * the error message back to the main scrub function. */ - error = xchk_setup_xattr_buf(sx->sc, valuelen, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + error = xchk_setup_xattr_buf(sx->sc, valuelen, XCHK_GFP_FLAGS); if (error == -ENOMEM) error = -EDEADLOCK; if (error) { @@ -324,8 +324,7 @@ xchk_xattr_block( return 0; /* Allocate memory for block usage checking. */ - error = xchk_setup_xattr_buf(ds->sc, 0, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + error = xchk_setup_xattr_buf(ds->sc, 0, XCHK_GFP_FLAGS); if (error == -ENOMEM) return -EDEADLOCK; if (error) diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 151567f88366..a0f097b8acb0 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -8,6 +8,15 @@ struct xfs_scrub; +/* + * Standard flags for allocating memory within scrub. NOFS context is + * configured by the process allocation scope. Scrub and repair must be able + * to back out gracefully if there isn't enough memory. Force-cast to avoid + * complaints from static checkers. + */ +#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \ + __GFP_RETRY_MAYFAIL)) + /* Type info and names for the scrub types. */ enum xchk_type { ST_NONE = 1, /* disabled */ diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 75311f8daeeb..c1c99ffe7408 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -21,7 +21,7 @@ xchk_setup_symlink( struct xfs_scrub *sc) { /* Allocate the buffer without the inode lock held. */ - sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL); + sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM; From b255fab0f80cc65a334fcd90cd278673cddbc988 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 1621/4122] xfs: make AGFL repair function avoid crosslinked blocks Teach the AGFL repair function to check each block of the proposed AGFL against the rmap btree. If the rmapbt finds any mappings that are not OWN_AG, strike that block from the list. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader_repair.c | 78 ++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 2e75ff9b5b2e..82ceb60ea5fc 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -442,12 +442,18 @@ out_revert: /* AGFL */ struct xrep_agfl { + /* Bitmap of alleged AGFL blocks that we're not going to add. */ + struct xbitmap crossed; + /* Bitmap of other OWN_AG metadata blocks. */ struct xbitmap agmetablocks; /* Bitmap of free space. */ struct xbitmap *freesp; + /* rmapbt cursor for finding crosslinked blocks */ + struct xfs_btree_cur *rmap_cur; + struct xfs_scrub *sc; }; @@ -477,6 +483,41 @@ xrep_agfl_walk_rmap( return xbitmap_set_btcur_path(&ra->agmetablocks, cur); } +/* Strike out the blocks that are cross-linked according to the rmapbt. */ +STATIC int +xrep_agfl_check_extent( + struct xrep_agfl *ra, + uint64_t start, + uint64_t len) +{ + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start); + xfs_agblock_t last_agbno = agbno + len - 1; + int error; + + ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno); + + while (agbno <= last_agbno) { + bool other_owners; + + error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1, + &XFS_RMAP_OINFO_AG, &other_owners); + if (error) + return error; + + if (other_owners) { + error = xbitmap_set(&ra->crossed, agbno, 1); + if (error) + return error; + } + + if (xchk_should_terminate(ra->sc, &error)) + return error; + agbno++; + } + + return 0; +} + /* * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce * which blocks belong to the AGFL. @@ -496,44 +537,58 @@ xrep_agfl_collect_blocks( struct xrep_agfl ra; struct xfs_mount *mp = sc->mp; struct xfs_btree_cur *cur; + struct xbitmap_range *br, *n; int error; ra.sc = sc; ra.freesp = agfl_extents; xbitmap_init(&ra.agmetablocks); + xbitmap_init(&ra.crossed); /* Find all space used by the free space btrees & rmapbt. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the bnobt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_BNO); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the cntbt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_CNT); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; - xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* * Drop the freesp meta blocks that are in use by btrees. * The remaining blocks /should/ be AGFL blocks. */ error = xbitmap_disunion(agfl_extents, &ra.agmetablocks); - xbitmap_destroy(&ra.agmetablocks); if (error) - return error; + goto out_bmp; + + /* Strike out the blocks that are cross-linked. */ + ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); + for_each_xbitmap_extent(br, n, agfl_extents) { + error = xrep_agfl_check_extent(&ra, br->start, br->len); + if (error) + break; + } + xfs_btree_del_cursor(ra.rmap_cur, error); + if (error) + goto out_bmp; + error = xbitmap_disunion(agfl_extents, &ra.crossed); + if (error) + goto out_bmp; /* * Calculate the new AGFL size. If we found more blocks than fit in @@ -541,11 +596,10 @@ xrep_agfl_collect_blocks( */ *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents), xfs_agfl_size(mp)); - return 0; -err: +out_bmp: + xbitmap_destroy(&ra.crossed); xbitmap_destroy(&ra.agmetablocks); - xfs_btree_del_cursor(cur, error); return error; } From a7a0f9a5503f4da3b6489583ce4ef9abc0ab2475 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:16 -0800 Subject: [PATCH 1622/4122] xfs: return EINTR when a fatal signal terminates scrub If the program calling online fsck is terminated with a fatal signal, bail out to userspace by returning EINTR, not EAGAIN. EAGAIN is used by scrubbers to indicate that we should try again with more resources locked, and not to indicate that the operation was cancelled. The miswiring is mostly harmless, but it shows up in the trace data. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 454145db10e7..b73648d81d23 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -25,7 +25,7 @@ xchk_should_terminate( if (fatal_signal_pending(current)) { if (*error == 0) - *error = -EAGAIN; + *error = -EINTR; return true; } return false; From 0a713bd41ea2b19904232b9c5278012c4361bc04 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:17 -0800 Subject: [PATCH 1623/4122] xfs: fix return code when fatal signal encountered during dquot scrub If the scrub process is sent a fatal signal while we're checking dquots, the predicate for this will set the error code to -EINTR. Don't then squash that into -ECANCELED, because the wrong errno turns up in the trace output. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 21b4c9006859..0b643ff32b22 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -84,7 +84,7 @@ xchk_quota_item( int error = 0; if (xchk_should_terminate(sc, &error)) - return -ECANCELED; + return error; /* * Except for the root dquot, the actual dquot we got must either have From fcd2a43488d5a211aec94e28369b2a72c28258a2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:15 -0800 Subject: [PATCH 1624/4122] xfs: initialize the check_owner object fully Initialize the check_owner list head so that we don't corrupt the list. Reduce the scope of the object pointer. Fixes: 858333dcf021 ("xfs: check btree block ownership with bnobt/rmapbt when scrubbing btree") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 2f4519590dc1..075ff3071122 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -408,7 +408,6 @@ xchk_btree_check_owner( struct xfs_buf *bp) { struct xfs_btree_cur *cur = bs->cur; - struct check_owner *co; /* * In theory, xfs_btree_get_block should only give us a null buffer @@ -431,10 +430,14 @@ xchk_btree_check_owner( * later scanning. */ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { + struct check_owner *co; + co = kmem_alloc(sizeof(struct check_owner), KM_MAYFAIL); if (!co) return -ENOMEM; + + INIT_LIST_HEAD(&co->list); co->level = level; co->daddr = xfs_buf_daddr(bp); list_add_tail(&co->list, &bs->to_check); From 6bf2f87915970160ded16c310e2e8887deff97a2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:17 -0800 Subject: [PATCH 1625/4122] xfs: don't retry repairs harder when EAGAIN is returned Repair functions will not return EAGAIN -- if they were not able to obtain resources, they should return EDEADLOCK (like the rest of online fsck) to signal that we need to grab all the resources and try again. Hence we don't need to deal with this case except as a debugging assertion. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/repair.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 22335619c84e..7323bd9fddfb 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -61,7 +61,6 @@ xrep_attempt( sc->flags |= XREP_ALREADY_FIXED; return -EAGAIN; case -EDEADLOCK: - case -EAGAIN: /* Tell the caller to try again having grabbed all the locks. */ if (!(sc->flags & XCHK_TRY_HARDER)) { sc->flags |= XCHK_TRY_HARDER; @@ -74,6 +73,11 @@ xrep_attempt( */ return -EFSCORRUPTED; default: + /* + * EAGAIN tells the caller to re-scrub, so we cannot return + * that here. + */ + ASSERT(error != -EAGAIN); return error; } } From 306195f355bbdcc3eff6cffac05bcd93a5e419ed Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:16 -0800 Subject: [PATCH 1626/4122] xfs: pivot online scrub away from kmem.[ch] Convert all the online scrub code to use the Linux slab allocator functions directly instead of going through the kmem wrappers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader_repair.c | 2 +- fs/xfs/scrub/attr.c | 2 +- fs/xfs/scrub/bitmap.c | 11 ++++++----- fs/xfs/scrub/btree.c | 9 ++++----- fs/xfs/scrub/dabtree.c | 4 ++-- fs/xfs/scrub/fscounters.c | 2 +- fs/xfs/scrub/refcount.c | 12 ++++++------ fs/xfs/scrub/scrub.c | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 82ceb60ea5fc..d75d82151eeb 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -685,7 +685,7 @@ xrep_agfl_init_header( if (br->len) break; list_del(&br->list); - kmem_free(br); + kfree(br); } /* Write new AGFL to disk. */ diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 11b2593a2be7..31529b9bf389 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -49,7 +49,7 @@ xchk_setup_xattr_buf( if (ab) { if (sz <= ab->sz) return 0; - kmem_free(ab); + kvfree(ab); sc->buf = NULL; } diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index b89bf9de9b1c..a255f09e9f0a 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -10,6 +10,7 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "scrub/scrub.h" #include "scrub/bitmap.h" /* @@ -25,7 +26,7 @@ xbitmap_set( { struct xbitmap_range *bmr; - bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL); + bmr = kmalloc(sizeof(struct xbitmap_range), XCHK_GFP_FLAGS); if (!bmr) return -ENOMEM; @@ -47,7 +48,7 @@ xbitmap_destroy( for_each_xbitmap_extent(bmr, n, bitmap) { list_del(&bmr->list); - kmem_free(bmr); + kfree(bmr); } } @@ -174,15 +175,15 @@ xbitmap_disunion( /* Total overlap, just delete ex. */ lp = lp->next; list_del(&br->list); - kmem_free(br); + kfree(br); break; case 0: /* * Deleting from the middle: add the new right extent * and then shrink the left extent. */ - new_br = kmem_alloc(sizeof(struct xbitmap_range), - KM_MAYFAIL); + new_br = kmalloc(sizeof(struct xbitmap_range), + XCHK_GFP_FLAGS); if (!new_br) { error = -ENOMEM; goto out; diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 075ff3071122..0fd36d5b4646 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -432,8 +432,7 @@ xchk_btree_check_owner( if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { struct check_owner *co; - co = kmem_alloc(sizeof(struct check_owner), - KM_MAYFAIL); + co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS); if (!co) return -ENOMEM; @@ -652,7 +651,7 @@ xchk_btree( xchk_btree_set_corrupt(sc, cur, 0); return 0; } - bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + bs = kzalloc(cur_sz, XCHK_GFP_FLAGS); if (!bs) return -ENOMEM; bs->cur = cur; @@ -743,9 +742,9 @@ out: error = xchk_btree_check_block_owner(bs, co->level, co->daddr); list_del(&co->list); - kmem_free(co); + kfree(co); } - kmem_free(bs); + kfree(bs); return error; } diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 84fe3d33d699..d17cee177085 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -486,7 +486,7 @@ xchk_da_btree( return 0; /* Set up initial da state. */ - ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); if (!ds) return -ENOMEM; ds->dargs.dp = sc->ip; @@ -591,6 +591,6 @@ out: out_state: xfs_da_state_free(ds->state); - kmem_free(ds); + kfree(ds); return error; } diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 6a6f8fe7f87c..3c56f5890da4 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -116,7 +116,7 @@ xchk_setup_fscounters( struct xchk_fscounters *fsc; int error; - sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0); + sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM; fsc = sc->buf; diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index a26ee0f24ef2..d9c1b3cea4a5 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -127,8 +127,8 @@ xchk_refcountbt_rmap_check( * is healthy each rmap_irec we see will be in agbno order * so we don't need insertion sort here. */ - frag = kmem_alloc(sizeof(struct xchk_refcnt_frag), - KM_MAYFAIL); + frag = kmalloc(sizeof(struct xchk_refcnt_frag), + XCHK_GFP_FLAGS); if (!frag) return -ENOMEM; memcpy(&frag->rm, rec, sizeof(frag->rm)); @@ -215,7 +215,7 @@ xchk_refcountbt_process_rmap_fragments( continue; } list_del(&frag->list); - kmem_free(frag); + kfree(frag); nr++; } @@ -257,11 +257,11 @@ done: /* Delete fragments and work list. */ list_for_each_entry_safe(frag, n, &worklist, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } list_for_each_entry_safe(frag, n, &refchk->fragments, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } } @@ -306,7 +306,7 @@ xchk_refcountbt_xref_rmap( out_free: list_for_each_entry_safe(frag, n, &refchk.fragments, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 2e8e400f10a9..07a7a75f987f 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -174,7 +174,7 @@ xchk_teardown( if (sc->flags & XCHK_REAPING_DISABLED) xchk_start_reaping(sc); if (sc->buf) { - kmem_free(sc->buf); + kvfree(sc->buf); sc->buf = NULL; } return error; @@ -467,7 +467,7 @@ xfs_scrub_metadata( xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); - sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); + sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); if (!sc) { error = -ENOMEM; goto out; @@ -557,7 +557,7 @@ out_nofix: out_teardown: error = xchk_teardown(sc, error); out_sc: - kmem_free(sc); + kfree(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { From 9e13975bb0620c2bfa1a4d2943e7eb8514f7708e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:18 -0800 Subject: [PATCH 1627/4122] xfs: load rtbitmap and rtsummary extent mapping btrees at mount time It turns out that GETFSMAP and online fsck have had a bug for years due to their use of ILOCK_SHARED to coordinate their linear scans of the realtime bitmap. If the bitmap file's data fork happens to be in BTREE format and the scan occurs immediately after mounting, the incore bmbt will not be populated, leading to ASSERTs tripping over the incorrect inode state. Because the bitmap scans always lock bitmap buffers in increasing order of file offset, it is appropriate for these two callers to take a shared ILOCK to improve scalability. To fix this problem, load both data and attr fork state into memory when mounting the realtime inodes. Realtime metadata files aren't supposed to have an attr fork so the second step is likely a nop. On most filesystems this is unlikely since the rtbitmap data fork is usually in extents format, but it's possible to craft a filesystem that will by fragmenting the free space in the data section and growfsing the rt section. Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap") Also-Fixes: 46d9bfb5e706 ("xfs: cross-reference the realtime bitmap") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_rtalloc.c | 56 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 292d5e54a92c..b0846204c436 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1325,6 +1325,41 @@ xfs_rtalloc_reinit_frextents( return 0; } +/* + * Read in the bmbt of an rt metadata inode so that we never have to load them + * at runtime. This enables the use of shared ILOCKs for rtbitmap scans. Use + * an empty transaction to avoid deadlocking on loops in the bmbt. + */ +static inline int +xfs_rtmount_iread_extents( + struct xfs_inode *ip, + unsigned int lock_class) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(ip->i_mount, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class); + + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); + if (error) + goto out_unlock; + + if (xfs_inode_has_attr_fork(ip)) { + error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); + if (error) + goto out_unlock; + } + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class); + xfs_trans_cancel(tp); + return error; +} + /* * Get the bitmap and summary inodes and the summary cache into the mount * structure at mount time. @@ -1342,14 +1377,27 @@ xfs_rtmount_inodes( return error; ASSERT(mp->m_rbmip != NULL); + error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP); + if (error) + goto out_rele_bitmap; + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); - if (error) { - xfs_irele(mp->m_rbmip); - return error; - } + if (error) + goto out_rele_bitmap; ASSERT(mp->m_rsumip != NULL); + + error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM); + if (error) + goto out_rele_summary; + xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); return 0; + +out_rele_summary: + xfs_irele(mp->m_rsumip); +out_rele_bitmap: + xfs_irele(mp->m_rbmip); + return error; } void From 11f97e684583469fc342a561387cc44fac4f9b1f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:19 -0800 Subject: [PATCH 1628/4122] xfs: skip fscounters comparisons when the scan is incomplete If any part of the per-AG summary counter scan loop aborts without collecting all of the data we need, the scrubber's observation data will be invalid. Set the incomplete flag so that we abort the scrub without reporting false corruptions. Document the data dependency here too. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/fscounters.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 3c56f5890da4..eeb36ac2c6d2 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -138,6 +138,18 @@ xchk_setup_fscounters( return xchk_trans_alloc(sc, 0); } +/* + * Part 1: Collecting filesystem summary counts. For each AG, we add its + * summary counts (total inodes, free inodes, free data blocks) to an incore + * copy of the overall filesystem summary counts. + * + * To avoid false corruption reports in part 2, any failure in this part must + * set the INCOMPLETE flag even when a negative errno is returned. This care + * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, + * ECANCELED) that are absorbed into a scrub state flag update by + * xchk_*_process_error. + */ + /* Count free space btree blocks manually for pre-lazysbcount filesystems. */ static int xchk_fscount_btreeblks( @@ -225,8 +237,10 @@ retry: } if (pag) xfs_perag_put(pag); - if (error) + if (error) { + xchk_set_incomplete(sc); return error; + } /* * The global incore space reservation is taken from the incore @@ -267,6 +281,11 @@ retry: return 0; } +/* + * Part 2: Comparing filesystem summary counters. All we have to do here is + * sum the percpu counters and compare them to what we've observed. + */ + /* * Is the @counter reasonably close to the @expected value? * From 93b0c58ed04b6cbe45354f23bb5628fff31f9084 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:17 -0800 Subject: [PATCH 1629/4122] xfs: don't return -EFSCORRUPTED from repair when resources cannot be grabbed If we tried to repair something but the repair failed with -EDEADLOCK, that means that the repair function couldn't grab some resource it needed and wants us to try again. If we try again (with TRY_HARDER) but still can't get all the resources we need, the repair fails and errors remain on the filesystem. Right now, repair returns the -EDEADLOCK to the caller as -EFSCORRUPTED, which results in XFS_SCRUB_OFLAG_CORRUPT being passed out to userspace. This is not correct because repair has not determined that anything is corrupt. If the repair had been invoked on an object that could be optimized but wasn't corrupt (OFLAG_PREEN), the inability to grab resources will be reported to userspace as corrupt metadata, and users will be unnecessarily alarmed that their suboptimal metadata turned into a corruption. Fix this by returning zero so that the results of the actual scrub will be copied back out to userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/repair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 7323bd9fddfb..4b92f9253ccd 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -69,9 +69,9 @@ xrep_attempt( /* * We tried harder but still couldn't grab all the resources * we needed to fix it. The corruption has not been fixed, - * so report back to userspace. + * so exit to userspace with the scan's output flags unchanged. */ - return -EFSCORRUPTED; + return 0; default: /* * EAGAIN tells the caller to re-scrub, so we cannot return From 5f369dc5b4eb2becbdfd08924dcaf00e391f4ea1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:18 -0800 Subject: [PATCH 1630/4122] xfs: make rtbitmap ILOCKing consistent when scanning the rt bitmap file xfs_rtalloc_query_range scans the realtime bitmap file in order of increasing file offset, so this caller can take ILOCK_SHARED on the rt bitmap inode instead of ILOCK_EXCL. This isn't going to yield any practical benefits at mount time, but we'd like to make the locking usage consistent around xfs_rtalloc_query_all calls. Make all the places we do this use the same xfs_ilock lockflags for consistency. Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_fsmap.c | 4 ++-- fs/xfs/xfs_rtalloc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index d8337274c74d..88a88506ffff 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -524,7 +524,7 @@ xfs_getfsmap_rtdev_rtbitmap_query( struct xfs_mount *mp = tp->t_mountp; int error; - xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); /* * Set up query parameters to return free rtextents covering the range @@ -551,7 +551,7 @@ xfs_getfsmap_rtdev_rtbitmap_query( if (error) goto err; err: - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED); + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); return error; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index b0846204c436..16534e9873f6 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1311,10 +1311,10 @@ xfs_rtalloc_reinit_frextents( uint64_t val = 0; int error; - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent, &val); - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); if (error) return error; From e74331d6fa2c21a8ecccfe0648dad5193b83defe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:19 -0800 Subject: [PATCH 1631/4122] xfs: online checking of the free rt extent count Teach the summary count checker to count the number of free realtime extents and compare that to the superblock copy. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/fscounters.c | 86 ++++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/scrub.h | 8 ---- 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index eeb36ac2c6d2..4777e7b89fdc 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -14,6 +14,8 @@ #include "xfs_health.h" #include "xfs_btree.h" #include "xfs_ag.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -43,6 +45,16 @@ * our tolerance for mismatch between expected and actual counter values. */ +struct xchk_fscounters { + struct xfs_scrub *sc; + uint64_t icount; + uint64_t ifree; + uint64_t fdblocks; + uint64_t frextents; + unsigned long long icount_min; + unsigned long long icount_max; +}; + /* * Since the expected value computation is lockless but only browses incore * values, the percpu counters should be fairly close to each other. However, @@ -120,6 +132,7 @@ xchk_setup_fscounters( if (!sc->buf) return -ENOMEM; fsc = sc->buf; + fsc->sc = sc; xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max); @@ -281,6 +294,59 @@ retry: return 0; } +#ifdef CONFIG_XFS_RT +STATIC int +xchk_fscount_add_frextent( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xchk_fscounters *fsc = priv; + int error = 0; + + fsc->frextents += rec->ar_extcount; + + xchk_should_terminate(fsc->sc, &error); + return error; +} + +/* Calculate the number of free realtime extents from the realtime bitmap. */ +STATIC int +xchk_fscount_count_frextents( + struct xfs_scrub *sc, + struct xchk_fscounters *fsc) +{ + struct xfs_mount *mp = sc->mp; + int error; + + fsc->frextents = 0; + if (!xfs_has_realtime(mp)) + return 0; + + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + error = xfs_rtalloc_query_all(sc->mp, sc->tp, + xchk_fscount_add_frextent, fsc); + if (error) { + xchk_set_incomplete(sc); + goto out_unlock; + } + +out_unlock: + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + return error; +} +#else +STATIC int +xchk_fscount_count_frextents( + struct xfs_scrub *sc, + struct xchk_fscounters *fsc) +{ + fsc->frextents = 0; + return 0; +} +#endif /* CONFIG_XFS_RT */ + /* * Part 2: Comparing filesystem summary counters. All we have to do here is * sum the percpu counters and compare them to what we've observed. @@ -352,16 +418,17 @@ xchk_fscounters( { struct xfs_mount *mp = sc->mp; struct xchk_fscounters *fsc = sc->buf; - int64_t icount, ifree, fdblocks; + int64_t icount, ifree, fdblocks, frextents; int error; /* Snapshot the percpu counters. */ icount = percpu_counter_sum(&mp->m_icount); ifree = percpu_counter_sum(&mp->m_ifree); fdblocks = percpu_counter_sum(&mp->m_fdblocks); + frextents = percpu_counter_sum(&mp->m_frextents); /* No negative values, please! */ - if (icount < 0 || ifree < 0 || fdblocks < 0) + if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0) xchk_set_corrupt(sc); /* See if icount is obviously wrong. */ @@ -372,6 +439,10 @@ xchk_fscounters( if (fdblocks > mp->m_sb.sb_dblocks) xchk_set_corrupt(sc); + /* See if frextents is obviously wrong. */ + if (frextents > mp->m_sb.sb_rextents) + xchk_set_corrupt(sc); + /* * If ifree exceeds icount by more than the minimum variance then * something's probably wrong with the counters. @@ -386,6 +457,13 @@ xchk_fscounters( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) return 0; + /* Count the free extents counter for rt volumes. */ + error = xchk_fscount_count_frextents(sc, fsc); + if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) + return 0; + /* Compare the in-core counters with whatever we counted. */ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount)) xchk_set_corrupt(sc); @@ -397,5 +475,9 @@ xchk_fscounters( fsc->fdblocks)) xchk_set_corrupt(sc); + if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, + fsc->frextents)) + xchk_set_corrupt(sc); + return 0; } diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index a0f097b8acb0..b4d391b4c938 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -169,12 +169,4 @@ void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno, # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) #endif -struct xchk_fscounters { - uint64_t icount; - uint64_t ifree; - uint64_t fdblocks; - unsigned long long icount_min; - unsigned long long icount_max; -}; - #endif /* __XFS_SCRUB_SCRUB_H__ */ From 033985b6fe875a7a971cf4e3941e1f3085ba037c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:19 -0800 Subject: [PATCH 1632/4122] xfs: fix perag loop in xchk_bmap_check_rmaps sparse complains that we can return an uninitialized error from this function and that pag could be uninitialized. We know that there are no zero-AG filesystems and hence we had to call xchk_bmap_check_ag_rmaps at least once, so this is not actually possible, but I'm too worn out from automated complaints from unsophisticated AIs so let's just fix this and move on to more interesting problems, eh? Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index f0b9cb6506fd..cb203e083a4c 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -599,14 +599,14 @@ xchk_bmap_check_rmaps( for_each_perag(sc->mp, agno, pag) { error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); - if (error) - break; - if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - break; + if (error || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { + xfs_perag_put(pag); + return error; + } } - if (pag) - xfs_perag_put(pag); - return error; + + return 0; } /* From 6a5777865eebee1b53d7ae0fd2fa9ec2c6596df6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:20 -0800 Subject: [PATCH 1633/4122] xfs: teach scrub to check for adjacent bmaps when rmap larger than bmap When scrub is checking file fork mappings against rmap records and the rmap record starts before or ends after the bmap record, check the adjacent bmap records to make sure that they're adjacent to the one we're checking. This helps us to detect cases where the rmaps cover territory that the bmaps do not. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 74 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index cb203e083a4c..a4a156dcaa8a 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -90,6 +90,7 @@ out: struct xchk_bmap_info { struct xfs_scrub *sc; + struct xfs_iext_cursor icur; xfs_fileoff_t lastoff; bool is_rt; bool is_shared; @@ -146,6 +147,48 @@ xchk_bmap_get_rmap( return has_rmap; } +static inline bool +xchk_bmap_has_prev( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_bmbt_irec got; + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); + + if (!xfs_iext_peek_prev_extent(ifp, &info->icur, &got)) + return false; + if (got.br_startoff + got.br_blockcount != irec->br_startoff) + return false; + if (got.br_startblock + got.br_blockcount != irec->br_startblock) + return false; + if (got.br_state != irec->br_state) + return false; + return true; +} + +static inline bool +xchk_bmap_has_next( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_bmbt_irec got; + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); + + if (!xfs_iext_peek_next_extent(ifp, &info->icur, &got)) + return false; + if (irec->br_startoff + irec->br_blockcount != got.br_startoff) + return false; + if (irec->br_startblock + irec->br_blockcount != got.br_startblock) + return false; + if (got.br_state != irec->br_state) + return false; + return true; +} + /* Make sure that we have rmapbt records for this extent. */ STATIC void xchk_bmap_xref_rmap( @@ -214,6 +257,34 @@ xchk_bmap_xref_rmap( if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + + /* + * If the rmap starts before this bmbt record, make sure there's a bmbt + * record for the previous offset that is contiguous with this mapping. + * Skip this for CoW fork extents because the refcount btree (and not + * the inode) is the ondisk owner for those extents. + */ + if (info->whichfork != XFS_COW_FORK && rmap.rm_startblock < agbno && + !xchk_bmap_has_prev(info, irec)) { + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return; + } + + /* + * If the rmap ends after this bmbt record, make sure there's a bmbt + * record for the next offset that is contiguous with this mapping. + * Skip this for CoW fork extents because the refcount btree (and not + * the inode) is the ondisk owner for those extents. + */ + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; + if (info->whichfork != XFS_COW_FORK && + rmap_end > agbno + irec->br_blockcount && + !xchk_bmap_has_next(info, irec)) { + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return; + } } /* Cross-reference a single rtdev extent record. */ @@ -626,7 +697,6 @@ xchk_bmap( struct xfs_inode *ip = sc->ip; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); xfs_fileoff_t endoff; - struct xfs_iext_cursor icur; int error = 0; /* Non-existent forks can be ignored. */ @@ -690,7 +760,7 @@ xchk_bmap( /* Scrub extent records. */ info.lastoff = 0; ifp = xfs_ifork_ptr(ip, whichfork); - for_each_xfs_iext(ifp, &icur, &irec) { + for_each_xfs_iext(ifp, &info.icur, &irec) { if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) goto out; From 830ffa09fb130d31f111848a75b65506fdac623d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:20 -0800 Subject: [PATCH 1634/4122] xfs: block map scrub should handle incore delalloc reservations Enhance the block map scrubber to check delayed allocation reservations. Though there are no physical space allocations to check, we do need to make sure that the range of file offsets being mapped are correct, and to bump the lastoff cursor so that key order checking works correctly. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 55 +++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index a4a156dcaa8a..fa8f22ed057a 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -368,14 +368,13 @@ xchk_bmap_dirattr_extent( } /* Scrub a single extent record. */ -STATIC int +STATIC void xchk_bmap_iextent( struct xfs_inode *ip, struct xchk_bmap_info *info, struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = info->sc->mp; - int error = 0; /* * Check for out-of-order extents. This record could have come @@ -396,14 +395,6 @@ xchk_bmap_iextent( xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - /* - * Check for delalloc extents. We never iterate the ones in the - * in-core extent scan, and we should never see these in the bmbt. - */ - if (isnullstartblock(irec->br_startblock)) - xchk_fblock_set_corrupt(info->sc, info->whichfork, - irec->br_startoff); - /* Make sure the extent points to a valid place. */ if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) xchk_fblock_set_corrupt(info->sc, info->whichfork, @@ -424,15 +415,12 @@ xchk_bmap_iextent( irec->br_startoff); if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return 0; + return; if (info->is_rt) xchk_bmap_rt_iextent_xref(ip, info, irec); else xchk_bmap_iextent_xref(ip, info, irec); - - info->lastoff = irec->br_startoff + irec->br_blockcount; - return error; } /* Scrub a bmbt record. */ @@ -680,6 +668,33 @@ xchk_bmap_check_rmaps( return 0; } +/* Scrub a delalloc reservation from the incore extent map tree. */ +STATIC void +xchk_bmap_iextent_delalloc( + struct xfs_inode *ip, + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = info->sc->mp; + + /* + * Check for out-of-order extents. This record could have come + * from the incore list, for which there is no ordering check. + */ + if (irec->br_startoff < info->lastoff) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* Make sure the extent points to a valid place. */ + if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); +} + /* * Scrub an inode fork's block mappings. * @@ -764,16 +779,18 @@ xchk_bmap( if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) goto out; - if (isnullstartblock(irec.br_startblock)) - continue; + if (irec.br_startoff >= endoff) { xchk_fblock_set_corrupt(sc, whichfork, irec.br_startoff); goto out; } - error = xchk_bmap_iextent(ip, &info, &irec); - if (error) - goto out; + + if (isnullstartblock(irec.br_startblock)) + xchk_bmap_iextent_delalloc(ip, &info, &irec); + else + xchk_bmap_iextent(ip, &info, &irec); + info.lastoff = irec.br_startoff + irec.br_blockcount; } error = xchk_bmap_check_rmaps(sc, whichfork); From f23c40443d1c2af87c99c1424f9e43bbd7307f92 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:21 -0800 Subject: [PATCH 1635/4122] xfs: check quota files for unwritten extents Teach scrub to flag quota files containing unwritten extents. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/quota.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 0b643ff32b22..9eeac8565394 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -14,6 +14,7 @@ #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_qm.h" +#include "xfs_bmap.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -189,11 +190,12 @@ xchk_quota_data_fork( for_each_xfs_iext(ifp, &icur, &irec) { if (xchk_should_terminate(sc, &error)) break; + /* - * delalloc extents or blocks mapped above the highest + * delalloc/unwritten extents or blocks mapped above the highest * quota id shouldn't happen. */ - if (isnullstartblock(irec.br_startblock) || + if (!xfs_bmap_is_written_extent(&irec) || irec.br_startoff > max_dqid_off || irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, From 31785537010a91a0d1dc403e5d049a38a3d4a30b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:21 -0800 Subject: [PATCH 1636/4122] xfs: check that CoW fork extents are not shared Ensure that extents in an inode's CoW fork are not marked as shared in the refcount btree. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index fa8f22ed057a..4bb6672b02ba 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -335,6 +335,8 @@ xchk_bmap_iextent_xref( case XFS_COW_FORK: xchk_xref_is_cow_staging(info->sc, agbno, irec->br_blockcount); + xchk_xref_is_not_shared(info->sc, agbno, + irec->br_blockcount); break; } From 5eef46358fae1a6018d9f886a3ecd30e843728dd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:22 -0800 Subject: [PATCH 1637/4122] xfs: teach scrub to flag non-extents format cow forks CoW forks only exist in memory, which means that they can only ever have an incore extent tree. Hence they must always be FMT_EXTENTS, so check this when we're scrubbing them. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 4bb6672b02ba..d50d0eab196a 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -748,6 +748,8 @@ xchk_bmap( case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_LOCAL: /* No mappings to check. */ + if (whichfork == XFS_COW_FORK) + xchk_fblock_set_corrupt(sc, whichfork, 0); goto out; case XFS_DINODE_FMT_EXTENTS: break; From bd5ab5f9874109586cbae5bc98e1f9ff574627e2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 16 Nov 2022 16:08:03 -0800 Subject: [PATCH 1638/4122] xfs: don't warn about files that are exactly s_maxbytes long We can handle files that are exactly s_maxbytes bytes long; we just can't handle anything larger than that. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 51820b40ab1c..7a2f38e5202c 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -365,7 +365,7 @@ xchk_dinode( * pagecache can't cache all the blocks in this file due to * overly large offsets, flag the inode for admin review. */ - if (isize >= mp->m_super->s_maxbytes) + if (isize > mp->m_super->s_maxbytes) xchk_ino_set_warning(sc, ino); /* di_nblocks */ From f36b954a1f1bf06b5746fea7ecf0fa639ac65324 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 16 Nov 2022 16:08:03 -0800 Subject: [PATCH 1639/4122] xfs: check inode core when scrubbing metadata files Metadata files (e.g. realtime bitmaps and quota files) do not show up in the bulkstat output, which means that scrub-by-handle does not work; they can only be checked through a specific scrub type. Therefore, each scrub type calls xchk_metadata_inode_forks to check the metadata for whatever's in the file. Unfortunately, that function doesn't actually check the inode record itself. Refactor the function a bit to make that happen. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/common.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index ad70f29233c3..613260b04a3d 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -781,6 +781,33 @@ xchk_buffer_recheck( trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); } +static inline int +xchk_metadata_inode_subtype( + struct xfs_scrub *sc, + unsigned int scrub_type) +{ + __u32 smtype = sc->sm->sm_type; + int error; + + sc->sm->sm_type = scrub_type; + + switch (scrub_type) { + case XFS_SCRUB_TYPE_INODE: + error = xchk_inode(sc); + break; + case XFS_SCRUB_TYPE_BMBTD: + error = xchk_bmap_data(sc); + break; + default: + ASSERT(0); + error = -EFSCORRUPTED; + break; + } + + sc->sm->sm_type = smtype; + return error; +} + /* * Scrub the attr/data forks of a metadata inode. The metadata inode must be * pointed to by sc->ip and the ILOCK must be held. @@ -789,13 +816,17 @@ int xchk_metadata_inode_forks( struct xfs_scrub *sc) { - __u32 smtype; bool shared; int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return 0; + /* Check the inode record. */ + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE); + if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return error; + /* Metadata inodes don't live on the rt device. */ if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) { xchk_ino_set_corrupt(sc, sc->ip->i_ino); @@ -815,10 +846,7 @@ xchk_metadata_inode_forks( } /* Invoke the data fork scrubber. */ - smtype = sc->sm->sm_type; - sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD; - error = xchk_bmap_data(sc); - sc->sm->sm_type = smtype; + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return error; @@ -833,7 +861,7 @@ xchk_metadata_inode_forks( xchk_ino_set_corrupt(sc, sc->ip->i_ino); } - return error; + return 0; } /* From 1cec8bbc1764964de24d19983fbf9fee6ce3c09d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Nov 2022 00:23:49 +0000 Subject: [PATCH 1640/4122] KVM: arm64: selftests: Disable single-step with correct KVM define Disable single-step by setting debug.control to KVM_GUESTDBG_ENABLE, not to SINGLE_STEP_DISABLE. The latter is an arbitrary test enum that just happens to have the same value as KVM_GUESTDBG_ENABLE, and so effectively disables single-step debug. No functional change intended. Cc: Reiji Watanabe Fixes: b18e4d4aebdd ("KVM: arm64: selftests: Add a test case for KVM_GUESTDBG_SINGLESTEP") Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221117002350.2178351-2-seanjc@google.com Reviewed-by: Oliver Upton --- tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 878c334607e1..0316f225d36a 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -369,7 +369,7 @@ void test_single_step_from_userspace(int test_cnt) KVM_GUESTDBG_SINGLESTEP; ss_enable = true; } else { - debug.control = SINGLE_STEP_DISABLE; + debug.control = KVM_GUESTDBG_ENABLE; ss_enable = false; } From b3d937722de0e64eebe267451a0e3d5ed5107ef7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Nov 2022 00:23:50 +0000 Subject: [PATCH 1641/4122] KVM: arm64: selftests: Disable single-step without relying on ucall() Automatically disable single-step when the guest reaches the end of the verified section instead of using an explicit ucall() to ask userspace to disable single-step. An upcoming change to implement a pool-based scheme for ucall() will add an atomic operation (bit test and set) in the guest ucall code, and if the compiler generate "old school" atomics, e.g. 40e57c: c85f7c20 ldxr x0, [x1] 40e580: aa100011 orr x17, x0, x16 40e584: c80ffc31 stlxr w15, x17, [x1] 40e588: 35ffffaf cbnz w15, 40e57c <__aarch64_ldset8_sync+0x1c> the guest will hang as the local exclusive monitor is reset by eret, i.e. the stlxr will always fail due to the debug exception taken to EL2. Link: https://lore.kernel.org/all/20221006003409.649993-8-seanjc@google.com Cc: Oliver Upton Cc: Marc Zyngier Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221117002350.2178351-3-seanjc@google.com Reviewed-by: Oliver Upton --- .../selftests/kvm/aarch64/debug-exceptions.c | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 0316f225d36a..a3c2216b65fb 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -241,7 +241,6 @@ static void guest_svc_handler(struct ex_regs *regs) enum single_step_op { SINGLE_STEP_ENABLE = 0, - SINGLE_STEP_DISABLE = 1, }; static void guest_code_ss(int test_cnt) @@ -258,7 +257,7 @@ static void guest_code_ss(int test_cnt) GUEST_SYNC(SINGLE_STEP_ENABLE); /* - * The userspace will veriry that the pc is as expected during + * The userspace will verify that the pc is as expected during * single step execution between iter_ss_begin and iter_ss_end. */ asm volatile("iter_ss_begin:nop\n"); @@ -268,11 +267,9 @@ static void guest_code_ss(int test_cnt) bvr = read_sysreg(dbgbvr0_el1); wvr = read_sysreg(dbgwvr0_el1); + /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - /* Disable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_DISABLE); - GUEST_ASSERT(bvr == w_bvr); GUEST_ASSERT(wvr == w_wvr); } @@ -364,15 +361,12 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(cmd == UCALL_SYNC, "Unexpected ucall cmd 0x%lx", cmd); - if (uc.args[1] == SINGLE_STEP_ENABLE) { - debug.control = KVM_GUESTDBG_ENABLE | - KVM_GUESTDBG_SINGLESTEP; - ss_enable = true; - } else { - debug.control = KVM_GUESTDBG_ENABLE; - ss_enable = false; - } + TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE, + "Unexpected ucall action 0x%lx", uc.args[1]); + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; vcpu_guest_debug_set(vcpu, &debug); continue; } @@ -385,6 +379,14 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); + if ((pc + 4) == (uint64_t)&iter_ss_end) { + test_pc = 0; + debug.control = KVM_GUESTDBG_ENABLE; + ss_enable = false; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + /* * If the current pc is between iter_ss_bgin and * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should From 7046638192d52416adbfc273c36950f0e3311191 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:34:03 +0000 Subject: [PATCH 1642/4122] KVM: selftests: Consolidate common code for populating ucall struct Make ucall() a common helper that populates struct ucall, and only calls into arch code to make the actually call out to userspace. Rename all arch-specific helpers to make it clear they're arch-specific, and to avoid collisions with common helpers (one more on its way...) Add WRITE_ONCE() to stores in ucall() code (as already done to aarch64 code in commit 9e2f6498efbb ("selftests: KVM: Handle compiler optimizations in ucall")) to prevent clang optimizations breaking ucalls. Cc: Colton Lewis Reviewed-by: Andrew Jones Tested-by: Peter Gonda Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-2-seanjc@google.com --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/ucall_common.h | 23 ++++++++++++++++--- .../testing/selftests/kvm/lib/aarch64/ucall.c | 22 ++++-------------- tools/testing/selftests/kvm/lib/riscv/ucall.c | 23 ++++--------------- tools/testing/selftests/kvm/lib/s390x/ucall.c | 23 ++++--------------- .../testing/selftests/kvm/lib/ucall_common.c | 20 ++++++++++++++++ .../testing/selftests/kvm/lib/x86_64/ucall.c | 23 ++++--------------- 7 files changed, 61 insertions(+), 74 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/ucall_common.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a00253b79040..6e2a683629c7 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -47,6 +47,7 @@ LIBKVM += lib/memstress.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c LIBKVM_STRING += lib/string_override.c diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ee79d180e07e..5a85f5318bbe 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -24,10 +24,27 @@ struct ucall { uint64_t args[UCALL_MAX_ARGS]; }; -void ucall_init(struct kvm_vm *vm, void *arg); -void ucall_uninit(struct kvm_vm *vm); +void ucall_arch_init(struct kvm_vm *vm, void *arg); +void ucall_arch_uninit(struct kvm_vm *vm); +void ucall_arch_do_ucall(vm_vaddr_t uc); +uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); + void ucall(uint64_t cmd, int nargs, ...); -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); + +static inline void ucall_init(struct kvm_vm *vm, void *arg) +{ + ucall_arch_init(vm, arg); +} + +static inline void ucall_uninit(struct kvm_vm *vm) +{ + ucall_arch_uninit(vm); +} + +static inline uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + return ucall_arch_get_ucall(vcpu, uc); +} #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index ed237b744690..3630708c32d6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -21,7 +21,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) return true; } -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, void *arg) { vm_paddr_t gpa, start, end, step, offset; unsigned int bits; @@ -64,30 +64,18 @@ void ucall_init(struct kvm_vm *vm, void *arg) TEST_FAIL("Can't find a ucall mmio address"); } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_uninit(struct kvm_vm *vm) { ucall_exit_mmio_addr = 0; sync_global_to_guest(vm, ucall_exit_mmio_addr); } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = {}; - va_list va; - int i; - - WRITE_ONCE(uc.cmd, cmd); - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); - va_end(va); - - WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); + WRITE_ONCE(*ucall_exit_mmio_addr, uc); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct kvm_run *run = vcpu->run; struct ucall ucall = {}; diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 087b9740bc8f..b1598f418c1f 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,11 +10,11 @@ #include "kvm_util.h" #include "processor.h" -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, void *arg) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_uninit(struct kvm_vm *vm) { } @@ -44,27 +44,14 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, - (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); + uc, 0, 0, 0, 0, 0); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct kvm_run *run = vcpu->run; struct ucall ucall = {}; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 73dc4e21190f..114cb4af295f 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,34 +6,21 @@ */ #include "kvm_util.h" -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, void *arg) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_uninit(struct kvm_vm *vm) { } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct kvm_run *run = vcpu->run; struct ucall ucall = {}; diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c new file mode 100644 index 000000000000..2395c7f1d543 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "kvm_util.h" + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = {}; + va_list va; + int i; + + WRITE_ONCE(uc.cmd, cmd); + + nargs = min(nargs, UCALL_MAX_ARGS); + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)&uc); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index e5f0f9e0d3ee..9f532dba1003 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,34 +8,21 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, void *arg) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_uninit(struct kvm_vm *vm) { } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct kvm_run *run = vcpu->run; struct ucall ucall = {}; From ef38871eb22879438d2af8642ed7a52c1616f410 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:34:04 +0000 Subject: [PATCH 1643/4122] KVM: selftests: Consolidate boilerplate code in get_ucall() Consolidate the actual copying of a ucall struct from guest=>host into the common get_ucall(). Return a host virtual address instead of a guest virtual address even though the addr_gva2hva() part could be moved to get_ucall() too. Conceptually, get_ucall() is invoked from the host and should return a host virtual address (and returning NULL for "nothing to see here" is far superior to returning 0). Use pointer shenanigans instead of an unnecessary bounce buffer when the caller of get_ucall() provides a valid pointer. Reviewed-by: Andrew Jones Tested-by: Peter Gonda Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-3-seanjc@google.com --- .../selftests/kvm/include/ucall_common.h | 8 ++------ .../testing/selftests/kvm/lib/aarch64/ucall.c | 14 +++----------- tools/testing/selftests/kvm/lib/riscv/ucall.c | 19 +++---------------- tools/testing/selftests/kvm/lib/s390x/ucall.c | 16 +++------------- .../testing/selftests/kvm/lib/ucall_common.c | 19 +++++++++++++++++++ .../testing/selftests/kvm/lib/x86_64/ucall.c | 16 +++------------- 6 files changed, 33 insertions(+), 59 deletions(-) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 5a85f5318bbe..63bfc60be995 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -27,9 +27,10 @@ struct ucall { void ucall_arch_init(struct kvm_vm *vm, void *arg); void ucall_arch_uninit(struct kvm_vm *vm); void ucall_arch_do_ucall(vm_vaddr_t uc); -uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); static inline void ucall_init(struct kvm_vm *vm, void *arg) { @@ -41,11 +42,6 @@ static inline void ucall_uninit(struct kvm_vm *vm) ucall_arch_uninit(vm); } -static inline uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) -{ - return ucall_arch_get_ucall(vcpu, uc); -} - #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 3630708c32d6..f214f5cc53d3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -75,13 +75,9 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) WRITE_ONCE(*ucall_exit_mmio_addr, uc); } -uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { @@ -90,12 +86,8 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, "Unexpected ucall exit mmio address access"); memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return addr_gva2hva(vcpu->vm, gva); } - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index b1598f418c1f..37e091d4366e 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -51,27 +51,15 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) uc, 0, 0, 0, 0, 0); } -uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_RISCV_SBI && run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - memcpy(&ucall, - addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); - - break; + return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]); case KVM_RISCV_SELFTESTS_SBI_UNEXP: vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); @@ -80,6 +68,5 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) break; } } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 114cb4af295f..0f695a031d35 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -20,13 +20,9 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); } -uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && @@ -34,13 +30,7 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]); } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 2395c7f1d543..ced480860746 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -18,3 +18,22 @@ void ucall(uint64_t cmd, int nargs, ...) ucall_arch_do_ucall((vm_vaddr_t)&uc); } + +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + struct ucall ucall; + void *addr; + + if (!uc) + uc = &ucall; + + addr = ucall_arch_get_ucall(vcpu); + if (addr) { + memcpy(uc, addr, sizeof(*uc)); + vcpu_run_complete_io(vcpu); + } else { + memset(uc, 0, sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 9f532dba1003..ead9946399ab 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -22,25 +22,15 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); } -uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vcpu, ®s); - memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return addr_gva2hva(vcpu->vm, regs.rdi); } - - return ucall.cmd; + return NULL; } From dc88244bf5488b04fb7bbe47d8d9c38ff8f7dbb4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:34:05 +0000 Subject: [PATCH 1644/4122] KVM: selftests: Automatically do init_ucall() for non-barebones VMs Do init_ucall() automatically during VM creation to kill two (three?) birds with one stone. First, initializing ucall immediately after VM creations allows forcing aarch64's MMIO ucall address to immediately follow memslot0. This is still somewhat fragile as tests could clobber the MMIO address with a new memslot, but it's safe-ish since tests have to be conversative when accounting for memslot0. And this can be hardened in the future by creating a read-only memslot for the MMIO page (KVM ARM exits with MMIO if the guest writes to a read-only memslot). Add a TODO to document that selftests can and should use a memslot for the ucall MMIO (doing so requires yet more rework because tests assumes thay can use all memslots except memslot0). Second, initializing ucall for all VMs prepares for making ucall initialization meaningful on all architectures. aarch64 is currently the only arch that needs to do any setup, but that will change in the future by switching to a pool-based implementation (instead of the current stack-based approach). Lastly, defining the ucall MMIO address from common code will simplify switching all architectures (except s390) to a common MMIO-based ucall implementation (if there's ever sufficient motivation to do so). Cc: Oliver Upton Reviewed-by: Andrew Jones Tested-by: Peter Gonda Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-4-seanjc@google.com --- .../selftests/kvm/aarch64/aarch32_id_regs.c | 2 - .../selftests/kvm/aarch64/arch_timer.c | 1 - .../selftests/kvm/aarch64/debug-exceptions.c | 2 - .../selftests/kvm/aarch64/hypercalls.c | 1 - .../testing/selftests/kvm/aarch64/psci_test.c | 1 - .../testing/selftests/kvm/aarch64/vgic_init.c | 2 - .../testing/selftests/kvm/aarch64/vgic_irq.c | 1 - tools/testing/selftests/kvm/dirty_log_test.c | 2 - .../selftests/kvm/include/ucall_common.h | 6 +-- .../selftests/kvm/kvm_page_table_test.c | 1 - .../testing/selftests/kvm/lib/aarch64/ucall.c | 54 ++----------------- tools/testing/selftests/kvm/lib/kvm_util.c | 11 ++++ tools/testing/selftests/kvm/lib/memstress.c | 2 - tools/testing/selftests/kvm/lib/riscv/ucall.c | 2 +- tools/testing/selftests/kvm/lib/s390x/ucall.c | 2 +- .../testing/selftests/kvm/lib/x86_64/ucall.c | 2 +- .../testing/selftests/kvm/memslot_perf_test.c | 1 - tools/testing/selftests/kvm/rseq_test.c | 1 - tools/testing/selftests/kvm/steal_time.c | 1 - .../kvm/system_counter_offset_test.c | 1 - 20 files changed, 20 insertions(+), 76 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 6f9c1f19c7f6..03f6b3af6b4d 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -158,8 +158,6 @@ int main(void) TEST_REQUIRE(vcpu_aarch64_only(vcpu)); - ucall_init(vm, NULL); - test_user_raz_wi(vcpu); test_user_raz_invariant(vcpu); test_guest_raz(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 9409617fce9c..54016cdd8a09 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -375,7 +375,6 @@ static struct kvm_vm *test_vm_create(void) for (i = 0; i < nr_vcpus; i++) vcpu_init_descriptor_tables(vcpus[i]); - ucall_init(vm, NULL); test_init_timer_irq(vm); gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index a3c2216b65fb..d86c4e4d1c82 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -292,7 +292,6 @@ static void test_guest_debug_exceptions(void) int stage; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -343,7 +342,6 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_guest_debug debug = {}; vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); - ucall_init(vm, NULL); run = vcpu->run; vcpu_args_set(vcpu, 1, test_cnt); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index a39da3fe4952..3dceecfd1f62 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -236,7 +236,6 @@ static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) vm = vm_create_with_one_vcpu(vcpu, guest_code); - ucall_init(vm, NULL); steal_time_init(*vcpu); return vm; diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index e0b9e81a3e09..cfa36f387948 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -79,7 +79,6 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, struct kvm_vm *vm; vm = vm_create(2); - ucall_init(vm, NULL); vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index 9c131d977a1b..eef816b80993 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -68,8 +68,6 @@ static void guest_code(void) /* we don't want to assert on run execution, hence that helper */ static int run_vcpu(struct kvm_vcpu *vcpu) { - ucall_init(vcpu->vm, NULL); - return __vcpu_run(vcpu) ? -errno : 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 4ead42a072b7..e0310ebc313c 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -756,7 +756,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) print_args(&args); vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..b458a2701634 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -756,8 +756,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); - ucall_init(vm, NULL); - /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 63bfc60be995..8077a6d8b1ba 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -24,7 +24,7 @@ struct ucall { uint64_t args[UCALL_MAX_ARGS]; }; -void ucall_arch_init(struct kvm_vm *vm, void *arg); +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); void ucall_arch_uninit(struct kvm_vm *vm); void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); @@ -32,9 +32,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); -static inline void ucall_init(struct kvm_vm *vm, void *arg) +static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - ucall_arch_init(vm, arg); + ucall_arch_init(vm, mmio_gpa); } static inline void ucall_uninit(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 696b366be06b..3db32d56787e 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -289,7 +289,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ - ucall_init(vm, NULL); sync_global_to_guest(vm, test_args); ret = sem_init(&test_stage_updated, 0, 0); diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index f214f5cc53d3..f02ae27c3e43 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -8,60 +8,12 @@ static vm_vaddr_t *ucall_exit_mmio_addr; -static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) - return false; + virt_pg_map(vm, mmio_gpa, mmio_gpa); - virt_pg_map(vm, gpa, gpa); - - ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; + ucall_exit_mmio_addr = (vm_vaddr_t *)mmio_gpa; sync_global_to_guest(vm, ucall_exit_mmio_addr); - - return true; -} - -void ucall_arch_init(struct kvm_vm *vm, void *arg) -{ - vm_paddr_t gpa, start, end, step, offset; - unsigned int bits; - bool ret; - - if (arg) { - gpa = (vm_paddr_t)arg; - ret = ucall_mmio_init(vm, gpa); - TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); - return; - } - - /* - * Find an address within the allowed physical and virtual address - * spaces, that does _not_ have a KVM memory region associated with - * it. Identity mapping an address like this allows the guest to - * access it, but as KVM doesn't know what to do with it, it - * will assume it's something userspace handles and exit with - * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. - * Here we start with a guess that the addresses around 5/8th - * of the allowed space are unmapped and then work both down and - * up from there in 1/16th allowed space sized steps. - * - * Note, we need to use VA-bits - 1 when calculating the allowed - * virtual address space for an identity mapping because the upper - * half of the virtual address space is the two's complement of the - * lower and won't match physical addresses. - */ - bits = vm->va_bits - 1; - bits = min(vm->pa_bits, bits); - end = 1ul << bits; - start = end * 5 / 8; - step = end / 16; - for (offset = 0; offset < end - start; offset += step) { - if (ucall_mmio_init(vm, start - offset)) - return; - if (ucall_mmio_init(vm, start + offset)) - return; - } - TEST_FAIL("Can't find a ucall mmio address"); } void ucall_arch_uninit(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3b7710fb3784..07c8edd4e548 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -335,15 +335,26 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, { uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); + struct userspace_mem_region *slot0; struct kvm_vm *vm; vm = ____vm_create(mode, nr_pages); kvm_vm_elf_load(vm, program_invocation_name); + /* + * TODO: Add proper defines to protect the library's memslots, and then + * carve out memslot1 for the ucall MMIO address. KVM treats writes to + * read-only memslots as MMIO, and creating a read-only memslot for the + * MMIO region would prevent silently clobbering the MMIO region. + */ + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + #ifdef __x86_64__ vm_create_irqchip(vm); #endif + return vm; } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 503da78c558d..b66404e56a3f 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -221,8 +221,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, memstress_setup_nested(vm, nr_vcpus, vcpus); } - ucall_init(vm, NULL); - /* Export the shared variables to the guest. */ sync_global_to_guest(vm, memstress_args); diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 37e091d4366e..c58ecb8a0981 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,7 +10,7 @@ #include "kvm_util.h" #include "processor.h" -void ucall_arch_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 0f695a031d35..208f0f04299b 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,7 +6,7 @@ */ #include "kvm_util.h" -void ucall_arch_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index ead9946399ab..016a0487cf72 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,7 +8,7 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_arch_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 330aaef1c02f..d771262ea584 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -277,7 +277,6 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, TEST_ASSERT(data->hva_slots, "malloc() fail"); data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); - ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", max_mem_slots - 1, data->pages_per_slot, rempages); diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 6f88da7e60be..0e9e2b48a51f 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -224,7 +224,6 @@ int main(int argc, char *argv[]) * CPU affinity. */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index db8967f1a17b..c87f38712073 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -266,7 +266,6 @@ int main(int ac, char **av) gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); - ucall_init(vm, NULL); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 1c274933912b..7f5b330b6a1b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -121,7 +121,6 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); check_preconditions(vcpu); - ucall_init(vm, NULL); enter_guest(vcpu); kvm_vm_free(vm); From cf4694be2b2cf74945e50d39a02ea2307c4495f4 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 6 Oct 2022 00:34:06 +0000 Subject: [PATCH 1645/4122] tools: Add atomic_test_and_set_bit() Add x86 and generic implementations of atomic_test_and_set_bit() to allow KVM selftests to atomically manage bitmaps. Note, the generic version is taken from arch_test_and_set_bit() as of commit 415d83249709 ("locking/atomic: Make test_and_*_bit() ordered on failure"). Signed-off-by: Peter Gonda Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-5-seanjc@google.com --- tools/arch/x86/include/asm/atomic.h | 7 +++++++ tools/include/asm-generic/atomic-gcc.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 1f5e26aae9fc..01cc27ec4520 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -8,6 +8,7 @@ #define LOCK_PREFIX "\n\tlock; " +#include #include /* @@ -70,4 +71,10 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) return cmpxchg(&v->counter, old, new); } +static inline int atomic_test_and_set_bit(long nr, unsigned long *addr) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c"); + +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 4c1966f7c77a..6daa68bf5b9e 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -4,6 +4,7 @@ #include #include +#include /* * Atomic operations that C can't guarantee us. Useful for @@ -69,4 +70,15 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) return cmpxchg(&(v)->counter, oldval, newval); } +static inline int atomic_test_and_set_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_or(addr, mask); + return !!(old & mask); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ From 03b4750533fc6519845ac2ca0e1d88a81ac260a1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:34:07 +0000 Subject: [PATCH 1646/4122] KVM: selftests: Make arm64's MMIO ucall multi-VM friendly Fix a mostly-theoretical bug where ARM's ucall MMIO setup could result in different VMs stomping on each other by cloberring the global pointer. Fix the most obvious issue by saving the MMIO gpa into the VM. A more subtle bug is that creating VMs in parallel (on multiple tasks) could result in a VM using the wrong address. Synchronizing a global to a guest effectively snapshots the value on a per-VM basis, i.e. the "global" is already prepped to work with multiple VMs, but setting the global in the host is not thread-safe. To fix that bug, add write_guest_global() to allow stuffing a VM's copy of a "global" without modifying the host value. Reviewed-by: Andrew Jones Tested-by: Peter Gonda Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-6-seanjc@google.com --- .../selftests/kvm/include/kvm_util_base.h | 15 +++++++++++++++ .../testing/selftests/kvm/lib/aarch64/ucall.c | 19 ++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 3bf2333ef95d..a7047e0767d3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -16,6 +16,7 @@ #include #include "linux/rbtree.h" +#include #include @@ -81,6 +82,7 @@ struct kvm_vm { struct sparsebit *vpages_mapped; bool has_irqchip; bool pgd_created; + vm_paddr_t ucall_mmio_addr; vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; @@ -722,6 +724,19 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, memcpy(&(g), _p, sizeof(g)); \ }) +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index f02ae27c3e43..1c38bd260f90 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,20 +6,29 @@ */ #include "kvm_util.h" +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ static vm_vaddr_t *ucall_exit_mmio_addr; +static void ucall_set_mmio_addr(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); +} + void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { virt_pg_map(vm, mmio_gpa, mmio_gpa); - ucall_exit_mmio_addr = (vm_vaddr_t *)mmio_gpa; - sync_global_to_guest(vm, ucall_exit_mmio_addr); + ucall_set_mmio_addr(vm, mmio_gpa); } void ucall_arch_uninit(struct kvm_vm *vm) { - ucall_exit_mmio_addr = 0; - sync_global_to_guest(vm, ucall_exit_mmio_addr); + ucall_set_mmio_addr(vm, (vm_paddr_t)NULL); } void ucall_arch_do_ucall(vm_vaddr_t uc) @@ -32,7 +41,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; if (run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { vm_vaddr_t gva; TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, From 28a65567acb51759079adf5c6e3fcd047cda8120 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:34:08 +0000 Subject: [PATCH 1647/4122] KVM: selftests: Drop now-unnecessary ucall_uninit() Drop ucall_uninit() and ucall_arch_uninit() now that ARM doesn't modify the host's copy of ucall_exit_mmio_addr, i.e. now that there's no need to reset the pointer before potentially creating a new VM. The few calls to ucall_uninit() are all immediately followed by kvm_vm_free(), and that is likely always going to hold true, i.e. it's extremely unlikely a test will want to effectively disable ucall in the middle of a test. Reviewed-by: Andrew Jones Tested-by: Peter Gonda Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-7-seanjc@google.com --- .../selftests/kvm/aarch64/aarch32_id_regs.c | 1 - tools/testing/selftests/kvm/dirty_log_test.c | 1 - tools/testing/selftests/kvm/include/ucall_common.h | 6 ------ tools/testing/selftests/kvm/kvm_page_table_test.c | 1 - tools/testing/selftests/kvm/lib/aarch64/ucall.c | 14 ++------------ tools/testing/selftests/kvm/lib/memstress.c | 1 - tools/testing/selftests/kvm/lib/riscv/ucall.c | 4 ---- tools/testing/selftests/kvm/lib/s390x/ucall.c | 4 ---- tools/testing/selftests/kvm/lib/x86_64/ucall.c | 4 ---- 9 files changed, 2 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 03f6b3af6b4d..b1d2158c0b6d 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -162,6 +162,5 @@ int main(void) test_user_raz_invariant(vcpu); test_guest_raz(vcpu); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b458a2701634..a38c4369fb8e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -811,7 +811,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(host_bmap_track); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 8077a6d8b1ba..2662a4352a8c 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -25,7 +25,6 @@ struct ucall { }; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); -void ucall_arch_uninit(struct kvm_vm *vm); void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); @@ -37,11 +36,6 @@ static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) ucall_arch_init(vm, mmio_gpa); } -static inline void ucall_uninit(struct kvm_vm *vm) -{ - ucall_arch_uninit(vm); -} - #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 3db32d56787e..b3b00be1ef82 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -416,7 +416,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(ret == 0, "Error in sem_destroy"); free(vcpu_threads); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 1c38bd260f90..21d73afcb14f 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -12,23 +12,13 @@ */ static vm_vaddr_t *ucall_exit_mmio_addr; -static void ucall_set_mmio_addr(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ - vm->ucall_mmio_addr = mmio_gpa; - - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); -} - void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { virt_pg_map(vm, mmio_gpa, mmio_gpa); - ucall_set_mmio_addr(vm, mmio_gpa); -} + vm->ucall_mmio_addr = mmio_gpa; -void ucall_arch_uninit(struct kvm_vm *vm) -{ - ucall_set_mmio_addr(vm, (vm_paddr_t)NULL); + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); } void ucall_arch_do_ucall(vm_vaddr_t uc) diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index b66404e56a3f..2de8a5d527b3 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -229,7 +229,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, void memstress_destroy_vm(struct kvm_vm *vm) { - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index c58ecb8a0981..78acdb084ab0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -14,10 +14,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_arch_uninit(struct kvm_vm *vm) -{ -} - struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 208f0f04299b..cbee520a26f2 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -10,10 +10,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_arch_uninit(struct kvm_vm *vm) -{ -} - void ucall_arch_do_ucall(vm_vaddr_t uc) { /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 016a0487cf72..eb8bf55b359a 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -12,10 +12,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_arch_uninit(struct kvm_vm *vm) -{ -} - void ucall_arch_do_ucall(vm_vaddr_t uc) { asm volatile("in %[port], %%al" From 426729b2cf2e02ff4bd5c988832c044c8b77f4c7 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 6 Oct 2022 00:34:09 +0000 Subject: [PATCH 1648/4122] KVM: selftests: Add ucall pool based implementation To play nice with guests whose stack memory is encrypted, e.g. AMD SEV, introduce a new "ucall pool" implementation that passes the ucall struct via dedicated memory (which can be mapped shared, a.k.a. as plain text). Because not all architectures have access to the vCPU index in the guest, use a bitmap with atomic accesses to track which entries in the pool are free/used. A list+lock could also work in theory, but synchronizing the individual pointers to the guest would be a mess. Note, there's no need to rewalk the bitmap to ensure success. If all vCPUs are simply allocating, success is guaranteed because there are enough entries for all vCPUs. If one or more vCPUs are freeing and then reallocating, success is guaranteed because vCPUs _always_ walk the bitmap from 0=>N; if vCPU frees an entry and then wins a race to re-allocate, then either it will consume the entry it just freed (bit is the first free bit), or the losing vCPU is guaranteed to see the freed bit (winner consumes an earlier bit, which the loser hasn't yet visited). Reviewed-by: Andrew Jones Signed-off-by: Peter Gonda Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006003409.649993-8-seanjc@google.com --- .../selftests/kvm/include/ucall_common.h | 9 ++- .../testing/selftests/kvm/lib/aarch64/ucall.c | 7 +- tools/testing/selftests/kvm/lib/riscv/ucall.c | 2 +- tools/testing/selftests/kvm/lib/s390x/ucall.c | 2 +- .../testing/selftests/kvm/lib/ucall_common.c | 72 +++++++++++++++++-- .../testing/selftests/kvm/lib/x86_64/ucall.c | 2 +- 6 files changed, 77 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 2662a4352a8c..bdd373189a77 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -22,6 +22,9 @@ enum { struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + + /* Host virtual address of this struct. */ + struct ucall *hva; }; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); @@ -30,11 +33,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); - -static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ - ucall_arch_init(vm, mmio_gpa); -} +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 21d73afcb14f..562c16dfbb00 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -32,12 +32,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - vm_vaddr_t gva; - - TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), "Unexpected ucall exit mmio address access"); - memcpy(&gva, run->mmio.data, sizeof(gva)); - return addr_gva2hva(vcpu->vm, gva); + return (void *)(*((uint64_t *)run->mmio.data)); } return NULL; diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 78acdb084ab0..9a3476a2dfca 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -55,7 +55,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]); + return (void *)run->riscv_sbi.args[0]; case KVM_RISCV_SELFTESTS_SBI_UNEXP: vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index cbee520a26f2..a7f02dc372cf 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -26,7 +26,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]); + return (void *)run->s.regs.gprs[reg]; } return NULL; } diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index ced480860746..fcae96461e46 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -1,22 +1,86 @@ // SPDX-License-Identifier: GPL-2.0-only #include "kvm_util.h" +#include "linux/types.h" +#include "linux/bitmap.h" +#include "linux/atomic.h" + +struct ucall_header { + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); + struct ucall ucalls[KVM_MAX_VCPUS]; +}; + +/* + * ucall_pool holds per-VM values (global data is duplicated by each VM), it + * must not be accessed from host code. + */ +static struct ucall_header *ucall_pool; + +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + struct ucall_header *hdr; + struct ucall *uc; + vm_vaddr_t vaddr; + int i; + + vaddr = vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR); + hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + memset(hdr, 0, sizeof(*hdr)); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + uc = &hdr->ucalls[i]; + uc->hva = uc; + } + + write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + + ucall_arch_init(vm, mmio_gpa); +} + +static struct ucall *ucall_alloc(void) +{ + struct ucall *uc; + int i; + + GUEST_ASSERT(ucall_pool); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) { + uc = &ucall_pool->ucalls[i]; + memset(uc->args, 0, sizeof(uc->args)); + return uc; + } + } + + GUEST_ASSERT(0); + return NULL; +} + +static void ucall_free(struct ucall *uc) +{ + /* Beware, here be pointer arithmetic. */ + clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); +} void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = {}; + struct ucall *uc; va_list va; int i; - WRITE_ONCE(uc.cmd, cmd); + uc = ucall_alloc(); + + WRITE_ONCE(uc->cmd, cmd); nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)&uc); + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); } uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index eb8bf55b359a..4d41dc63cc9e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -26,7 +26,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) struct kvm_regs regs; vcpu_regs_get(vcpu, ®s); - return addr_gva2hva(vcpu->vm, regs.rdi); + return (void *)regs.rdi; } return NULL; } From 9a6418dacd241169df9e0eeefc7980b3f9b40794 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Sep 2022 22:34:58 +0100 Subject: [PATCH 1649/4122] KVM: selftests: Fix spelling mistake "begining" -> "beginning" There is a spelling mistake in an assert message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20220928213458.64089-1-colin.i.king@gmail.com [sean: fix an ironic typo in the changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..d71a9a5974de 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -138,7 +138,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) offset = hdr.e_phoff + (n1 * hdr.e_phentsize); offset_rv = lseek(fd, offset, SEEK_SET); TEST_ASSERT(offset_rv == offset, - "Failed to seek to begining of program header %u,\n" + "Failed to seek to beginning of program header %u,\n" " filename: %s\n" " rv: %jd errno: %i", n1, filename, (intmax_t) offset_rv, errno); From 816c54b74742ac1a74a74de9355ab982d11e63e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:06 +0000 Subject: [PATCH 1650/4122] KVM: selftests: Drop helpers to read/write page table entries Drop vm_{g,s}et_page_table_entry() and instead expose the "inner" helper (was _vm_get_page_table_entry()) that returns a _pointer_ to the PTE, i.e. let tests directly modify PTEs instead of bouncing through helpers that just make life difficult. Opportunsitically use BIT_ULL() in emulator_error_test, and use the MAXPHYADDR define to set the "rogue" GPA bit instead of open coding the same value. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-2-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 6 ++---- .../selftests/kvm/lib/x86_64/processor.c | 21 ++----------------- .../kvm/x86_64/emulator_error_test.c | 6 ++++-- 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..30d5df1ebaad 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -827,10 +827,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr); -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..90b35998b0f3 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -241,9 +241,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, - struct kvm_vcpu *vcpu, - uint64_t vaddr) +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr) { uint16_t index[4]; uint64_t *pml4e, *pdpe, *pde; @@ -313,22 +312,6 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, return &pte[index[0]]; } -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr) -{ - uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); - - return *(uint64_t *)pte; -} - -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte) -{ - uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); - - *(uint64_t *)new_pte = pte; -} - void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { uint64_t *pml4e, *pml4e_start; diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c index 236e11755ba6..bde247f3c8a1 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -152,8 +152,9 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t gpa, pte; + uint64_t *pte; uint64_t *hva; + uint64_t gpa; int rc; /* Tell stdout not to buffer its content */ @@ -178,8 +179,9 @@ int main(int argc, char *argv[]) virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); + pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); - vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36)); + *pte |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); process_exit_on_emulation_error(vcpu); From 751f280017b697d98936618a21ca3defdc03a9f4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:07 +0000 Subject: [PATCH 1651/4122] KVM: selftests: Drop reserved bit checks from PTE accessor Drop the reserved bit checks from the helper to retrieve a PTE, there's very little value in sanity checking the constructed page tables as any will quickly be noticed in the form of an unexpected #PF. The checks also place unnecessary restrictions on the usage of the helpers, e.g. if a test _wanted_ to set reserved bits for whatever reason. Removing the NX check in particular allows for the removal of the @vcpu param, which will in turn allow the helper to be reused nearly verbatim for addr_gva2gpa(). Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-3-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 3 +-- .../selftests/kvm/lib/x86_64/processor.c | 26 +------------------ .../kvm/x86_64/emulator_error_test.c | 2 +- 3 files changed, 3 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 30d5df1ebaad..53d52a5ace48 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -827,8 +827,7 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 90b35998b0f3..9e196837a794 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -241,29 +241,11 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr) +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { uint16_t index[4]; uint64_t *pml4e, *pdpe, *pde; uint64_t *pte; - struct kvm_sregs sregs; - uint64_t rsvd_mask = 0; - - /* Set the high bits in the reserved mask. */ - if (vm->pa_bits < 52) - rsvd_mask = GENMASK_ULL(51, vm->pa_bits); - - /* - * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries - * with 4-Level Paging and 5-Level Paging". - * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, - * the XD flag (bit 63) is reserved. - */ - vcpu_sregs_get(vcpu, &sregs); - if ((sregs.efer & EFER_NX) == 0) { - rsvd_mask |= PTE_NX_MASK; - } TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -286,24 +268,18 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, pml4e = addr_gpa2hva(vm, vm->pgd); TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, "Expected pml4e to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, - "Unexpected reserved bits set."); pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, "Expected pdpe to be present for gva: 0x%08lx", vaddr); TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), "Expected pdpe to map a pde not a 1-GByte page."); - TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, "Expected pde to be present for gva: 0x%08lx", vaddr); TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), "Expected pde to map a pte not a 2-MByte page."); - TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c index bde247f3c8a1..1abb34735754 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -180,7 +180,7 @@ int main(int argc, char *argv[]) hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); *pte |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); From 91add12d384c650d243b8ccdee1f2ddea3c9a85d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:08 +0000 Subject: [PATCH 1652/4122] KVM: selftests: Remove useless shifts when creating guest page tables Remove the pointless shift from GPA=>GFN and immediately back to GFN=>GPA when creating guest page tables. Ignore the other walkers that have a similar pattern for the moment, they will be converted to use virt_get_pte() in the near future. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-4-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 3 ++- .../selftests/kvm/lib/x86_64/processor.c | 17 ++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 53d52a5ace48..9676a3464758 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -177,7 +177,8 @@ struct kvm_x86_cpu_feature { #define PAGE_MASK (~(PAGE_SIZE-1)) #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) -#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) +#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) +#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) /* General Registers in 64-Bit Mode */ struct gpr64_regs { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 9e196837a794..324bf24564a1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -131,23 +131,23 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, +static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_gpa, uint64_t vaddr, int level) { - uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); + uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; return &page_table[index]; } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_pfn, + uint64_t pt_gpa, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, pt_gpa, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; @@ -197,21 +197,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, - vaddr, paddr, PG_LEVEL_512G, level); + pml4e = virt_create_upper_pte(vm, vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); + pdpe = virt_create_upper_pte(vm, PTE_GET_PA(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); + pde = virt_create_upper_pte(vm, PTE_GET_PA(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, PTE_GET_PA(*pde), vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); From ed0b58fc6f0bdde360c28314e0faedc9a0a6c3de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:09 +0000 Subject: [PATCH 1653/4122] KVM: selftests: Verify parent PTE is PRESENT when getting child PTE Verify the parent PTE is PRESENT when getting a child via virt_get_pte() so that the helper can be used for getting PTEs/GPAs without losing sanity checks that the walker isn't wandering into the weeds. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-5-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 324bf24564a1..c9649f19aca1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -131,23 +131,28 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_gpa, uint64_t vaddr, - int level) +static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, + uint64_t vaddr, int level) { + uint64_t pt_gpa = PTE_GET_PA(*parent_pte); uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", + level + 1, vaddr); + return &page_table[index]; } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_gpa, + uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_gpa, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; @@ -197,20 +202,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); + pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PA(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); + pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PA(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); + pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PA(*pde), vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); From 99d51c6eef2dadc204363ab3bf58c91d02f895be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:10 +0000 Subject: [PATCH 1654/4122] KVM: selftests: Use virt_get_pte() when getting PTE pointer Use virt_get_pte() in vm_get_page_table_entry() instead of open coding equivalent code. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-6-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 29 ++++--------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c9649f19aca1..09b550fd8815 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -247,9 +247,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { - uint16_t index[4]; uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -264,32 +262,17 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid."); - index[0] = (vaddr >> 12) & 0x1ffu; - index[1] = (vaddr >> 21) & 0x1ffu; - index[2] = (vaddr >> 30) & 0x1ffu; - index[3] = (vaddr >> 39) & 0x1ffu; + pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - pml4e = addr_gpa2hva(vm, vm->pgd); - TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, - "Expected pml4e to be present for gva: 0x%08lx", vaddr); - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, - "Expected pdpe to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), + pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); + TEST_ASSERT(!(*pdpe & PTE_LARGE_MASK), "Expected pdpe to map a pde not a 1-GByte page."); - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, - "Expected pde to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), + pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); + TEST_ASSERT(!(*pde & PTE_LARGE_MASK), "Expected pde to map a pte not a 2-MByte page."); - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, - "Expected pte to be present for gva: 0x%08lx", vaddr); - - return &pte[index[0]]; + return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) From efe91dc307d00766911fbcb5021bdc3a1cf9c79e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:11 +0000 Subject: [PATCH 1655/4122] KVM: selftests: Use vm_get_page_table_entry() in addr_arch_gva2gpa() Use vm_get_page_table_entry() in addr_arch_gva2gpa() to get the leaf PTE instead of manually walking page tables. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-7-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 38 ++----------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 09b550fd8815..053f64191122 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -458,41 +458,11 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + uint64_t *pte = vm_get_page_table_entry(vm, gva); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - index[0] = (gva >> 12) & 0x1ffu; - index[1] = (gva >> 21) & 0x1ffu; - index[2] = (gva >> 30) & 0x1ffu; - index[3] = (gva >> 39) & 0x1ffu; - - if (!vm->pgd_created) - goto unmapped_gva; - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - if (!(pde[index[1]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - if (!(pte[index[0]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); - -unmapped_gva: - TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(EXIT_FAILURE); + TEST_ASSERT(*pte & PTE_PRESENT_MASK, + "Leaf PTE not PRESENT for gva: 0x%08lx", gva); + return PTE_GET_PA(*pte) | (gva & ~PAGE_MASK); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) From 96b69958c77d84e49c06ebe2e3502e4c1620e3c0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:45:12 +0000 Subject: [PATCH 1656/4122] KVM: selftests: Play nice with huge pages when getting PTEs/GPAs Play nice with huge pages when getting PTEs and translating GVAs to GPAs, there's no reason to disallow using huge pages in selftests. Use PG_LEVEL_NONE to indicate that the caller doesn't care about the mapping level and just wants to get the pte+level. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006004512.666529-8-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 11 ++++- .../selftests/kvm/lib/x86_64/processor.c | 45 ++++++++++++++++--- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 9676a3464758..e000e35c948f 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -172,11 +172,16 @@ struct kvm_x86_cpu_feature { #define PTE_GLOBAL_MASK BIT_ULL(8) #define PTE_NX_MASK BIT_ULL(63) +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) + #define PAGE_SHIFT 12 #define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) + +#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) +#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) -#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) #define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) #define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) @@ -828,6 +833,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 053f64191122..efa20d0f9927 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -245,10 +245,26 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +{ + if (*pte & PTE_LARGE_MASK) { + TEST_ASSERT(*level == PG_LEVEL_NONE || + *level == current_level, + "Unexpected hugepage at level %d\n", current_level); + *level = current_level; + } + + return *level == current_level; +} + +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level) { uint64_t *pml4e, *pdpe, *pde; + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + "Invalid PG_LEVEL_* '%d'", *level); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, @@ -263,18 +279,27 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) "Canonical check failed. The virtual address is invalid."); pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); + if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) + return pml4e; pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - TEST_ASSERT(!(*pdpe & PTE_LARGE_MASK), - "Expected pdpe to map a pde not a 1-GByte page."); + if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) + return pdpe; pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - TEST_ASSERT(!(*pde & PTE_LARGE_MASK), - "Expected pde to map a pte not a 2-MByte page."); + if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) + return pde; return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); } +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +{ + int level = PG_LEVEL_4K; + + return __vm_get_page_table_entry(vm, vaddr, &level); +} + void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { uint64_t *pml4e, *pml4e_start; @@ -458,11 +483,17 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { - uint64_t *pte = vm_get_page_table_entry(vm, gva); + int level = PG_LEVEL_NONE; + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); TEST_ASSERT(*pte & PTE_PRESENT_MASK, "Leaf PTE not PRESENT for gva: 0x%08lx", gva); - return PTE_GET_PA(*pte) | (gva & ~PAGE_MASK); + + /* + * No need for a hugepage mask on the PTE, x86-64 requires the "unused" + * address bits to be zero. + */ + return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) From 197ebb713ad04518ffef6d966954b519a0805100 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Tue, 15 Nov 2022 21:38:43 +0000 Subject: [PATCH 1657/4122] KVM: selftests: move common startup logic to kvm_util.c Consolidate common startup logic in one place by implementing a single setup function with __attribute((constructor)) for all selftests within kvm_util.c. This allows moving logic like: /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); to a single file for all selftests. This will also allow any required setup at entry in future to be done in common main function. Link: https://lore.kernel.org/lkml/Ywa9T+jKUpaHLu%2Fl@google.com Suggested-by: Sean Christopherson Reviewed-by: Andrew Jones Reviewed-by: Peter Gonda Signed-off-by: Vishal Annapurve Link: https://lore.kernel.org/r/20221115213845.3348210-2-vannapurve@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 3 --- tools/testing/selftests/kvm/aarch64/hypercalls.c | 2 -- tools/testing/selftests/kvm/aarch64/vgic_irq.c | 3 --- tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++++ tools/testing/selftests/kvm/memslot_perf_test.c | 3 --- tools/testing/selftests/kvm/rseq_test.c | 3 --- tools/testing/selftests/kvm/s390x/memop.c | 2 -- tools/testing/selftests/kvm/s390x/resets.c | 2 -- tools/testing/selftests/kvm/s390x/sync_regs_test.c | 3 --- tools/testing/selftests/kvm/set_memory_region_test.c | 3 --- tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 3 --- tools/testing/selftests/kvm/x86_64/emulator_error_test.c | 3 --- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 3 --- tools/testing/selftests/kvm/x86_64/platform_info_test.c | 3 --- tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c | 3 --- tools/testing/selftests/kvm/x86_64/set_sregs_test.c | 3 --- .../selftests/kvm/x86_64/svm_nested_soft_inject_test.c | 3 --- tools/testing/selftests/kvm/x86_64/sync_regs_test.c | 3 --- tools/testing/selftests/kvm/x86_64/userspace_io_test.c | 3 --- .../testing/selftests/kvm/x86_64/userspace_msr_exit_test.c | 3 --- 20 files changed, 6 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 54016cdd8a09..f2a96779716a 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -446,9 +446,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 3dceecfd1f62..bef1499fb465 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -305,8 +305,6 @@ static void test_run(void) int main(void) { - setbuf(stdout, NULL); - test_run(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index e0310ebc313c..90d854e0fcff 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -817,9 +817,6 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 07c8edd4e548..575a0c38d1c0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2086,3 +2086,9 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, break; } } + +void __attribute((constructor)) kvm_selftest_init(void) +{ + /* Tell stdout not to buffer its content. */ + setbuf(stdout, NULL); +} diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index d771262ea584..36b20abfb948 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -994,9 +994,6 @@ int main(int argc, char *argv[]) struct test_result rbestslottime; int tctr; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv, &targs)) return -1; diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 0e9e2b48a51f..3045fdf9bdf5 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -205,9 +205,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9113696d5178..3fd81e58f40c 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -760,8 +760,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 19486084eb30..e41e2cb8ffa9 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -296,8 +296,6 @@ int main(int argc, char *argv[]) bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); int idx; - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 3fdb6e2598eb..2ddde41c44ba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -231,9 +231,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 85c16f09a50e..2ef1d1b72ce4 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -392,9 +392,6 @@ int main(int argc, char *argv[]) int i, loops; #endif - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - #ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 4208487652f8..1027a671c7d3 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -57,9 +57,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c index 1abb34735754..d945e571e7a0 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -157,9 +157,6 @@ int main(int argc, char *argv[]) uint64_t gpa; int rc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index e804eb08dff9..5c27efbf405e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -134,9 +134,6 @@ int main(int argc, char *argv[]) const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 76417c7d687b..310a104d94f0 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,9 +72,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t msr_platform_info; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index ea4e259a1e2e..a6ffa245c897 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -447,9 +447,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 2bb08bf2125d..a284fcef6ed7 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -82,9 +82,6 @@ int main(int argc, char *argv[]) uint64_t cr4; int rc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - /* * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and * use it to verify all supported CR4 bits can be set prior to defining diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index e637d7736012..e497ace629c1 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -194,9 +194,6 @@ done: int main(int argc, char *argv[]) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 9b6db0b0b13e..d2f9b5bdfab2 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -90,9 +90,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu_events events; int rv, cap; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 7316521428f8..91076c9787b4 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -56,9 +56,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index fae95089e655..25fa55344a10 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -818,9 +818,6 @@ static void test_user_exit_msr_flags(void) int main(int argc, char *argv[]) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - test_msr_filter_allow(); test_msr_filter_deny(); From e1ab31245c4efe973db4dcd6c82a41c2aec09b27 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Tue, 15 Nov 2022 21:38:44 +0000 Subject: [PATCH 1658/4122] KVM: selftests: Add arch specific initialization Introduce arch specific API: kvm_selftest_arch_init to allow each arch to handle initialization before running any selftest logic. Suggested-by: Sean Christopherson Reviewed-by: Andrew Jones Reviewed-by: Peter Gonda Signed-off-by: Vishal Annapurve Link: https://lore.kernel.org/r/20221115213845.3348210-3-vannapurve@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/kvm_util_base.h | 7 +++++++ .../selftests/kvm/lib/aarch64/processor.c | 18 +++++++++--------- tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a7047e0767d3..c58eec6d44a7 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -857,4 +857,11 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..0de4aabc0c76 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -495,15 +495,6 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, close(kvm_fd); } -/* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ -void __attribute__((constructor)) init_guest_modes(void) -{ - guest_modes_append_default(); -} - void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res) @@ -528,3 +519,12 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); } + +void kvm_selftest_arch_init(void) +{ + /* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ + guest_modes_append_default(); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 575a0c38d1c0..db62680d5918 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2087,8 +2087,14 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, } } +__weak void kvm_selftest_arch_init(void) +{ +} + void __attribute((constructor)) kvm_selftest_init(void) { /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + + kvm_selftest_arch_init(); } From 2115713cfab05dd6efd48ad6bf56e67f556dcec5 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Tue, 15 Nov 2022 21:38:45 +0000 Subject: [PATCH 1659/4122] KVM: selftests: Add arch specific post vm creation hook Add arch specific API kvm_arch_vm_post_create to perform any required setup after VM creation. Suggested-by: Sean Christopherson Reviewed-by: Andrew Jones Reviewed-by: Peter Gonda Signed-off-by: Vishal Annapurve Link: https://lore.kernel.org/r/20221115213845.3348210-4-vannapurve@google.com [sean: place x86's implementation by vm_arch_vcpu_add()] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util_base.h | 2 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 8 +++++--- tools/testing/selftests/kvm/lib/x86_64/processor.c | 5 +++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index c58eec6d44a7..228212ede05e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -864,4 +864,6 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) */ void kvm_selftest_arch_init(void); +void kvm_arch_vm_post_create(struct kvm_vm *vm); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index db62680d5918..5ac8f207ed92 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -351,9 +351,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + kvm_arch_vm_post_create(vm); return vm; } @@ -2087,6 +2085,10 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, } } +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ +} + __weak void kvm_selftest_arch_init(void) { } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index efa20d0f9927..999576146d69 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -586,6 +586,11 @@ void __vm_xsave_require_permission(int bit, const char *name) bitmask); } +void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ + vm_create_irqchip(vm); +} + struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { From 376bc1b458c9a6db347c70f02fa6eb8b5f187455 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Mon, 17 Oct 2022 23:28:19 +0530 Subject: [PATCH 1660/4122] KVM: selftests: Don't assume vcpu->id is '0' in xAPIC state test In xapic_state_test's test_icr(), explicitly skip iterations that would match vcpu->id instead of assuming vcpu->id is '0', so that IPIs are are correctly sent to non-existent vCPUs. Suggested-by: Sean Christopherson Link: https://lore.kernel.org/kvm/YyoZr9rXSSMEtdh5@google.com Signed-off-by: Gautam Menghani Link: https://lore.kernel.org/r/20221017175819.12672-1-gautammenghani201@gmail.com [sean: massage shortlog and changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/xapic_state_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 6f7a5ef66718..d7d37dae3eeb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -114,7 +114,9 @@ static void test_icr(struct xapic_vcpu *x) * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; - for (i = vcpu->id + 1; i < 0xff; i++) { + for (i = 0; i < 0xff; i++) { + if (i == vcpu->id) + continue; for (j = 0; j < 8; j++) __test_icr(x, i << (32 + 24) | icr | (j << 8)); } From 52d3a4fb5be1579b0fb8d48f8abaf9a9c7db1083 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:45 -0700 Subject: [PATCH 1661/4122] KVM: selftests: Rename emulator_error_test to smaller_maxphyaddr_emulation_test Rename emulator_error_test to smaller_maxphyaddr_emulation_test and update the comment at the top of the file to document that this is explicitly a test to validate that KVM emulates instructions in response to an EPT violation when emulating a smaller MAXPHYADDR. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221102184654.282799-2-dmatlack@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/.gitignore | 2 +- tools/testing/selftests/kvm/Makefile | 2 +- ...ulator_error_test.c => smaller_maxphyaddr_emulation_test.c} | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) rename tools/testing/selftests/kvm/x86_64/{emulator_error_test.c => smaller_maxphyaddr_emulation_test.c} (96%) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..053e5d34cd03 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,7 +17,6 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test -/x86_64/emulator_error_test /x86_64/fix_hypercall_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test @@ -36,6 +35,7 @@ /x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/sev_migrate_tests +/x86_64/smaller_maxphyaddr_emulation_test /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 6e2a683629c7..cff3a7ff8782 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -82,7 +82,6 @@ TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test -TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid @@ -97,6 +96,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c similarity index 96% rename from tools/testing/selftests/kvm/x86_64/emulator_error_test.c rename to tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index d945e571e7a0..6fe5af8bd47e 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -2,7 +2,8 @@ /* * Copyright (C) 2020, Google LLC. * - * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. + * Test that KVM emulates instructions in response to EPT violations when + * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. */ #define _GNU_SOURCE /* for program_invocation_short_name */ From 48e59373398a554098863bc7a3d1350cd0d5c4d0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:46 -0700 Subject: [PATCH 1662/4122] KVM: selftests: Explicitly require instructions bytes Hard-code the flds instruction and assert the exact instruction bytes are present in run->emulation_failure. The test already requires the instruction bytes to be present because that's the only way the test will advance the RIP past the flds and get to GUEST_DONE(). Note that KVM does not necessarily return exactly 2 bytes in run->emulation_failure since it may not know the exact instruction length in all cases. So just assert that run->emulation_failure.insn_size is at least 2. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221102184654.282799-3-dmatlack@google.com Signed-off-by: Sean Christopherson --- .../smaller_maxphyaddr_emulation_test.c | 62 +++++-------------- 1 file changed, 17 insertions(+), 45 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 6fe5af8bd47e..6f7891b0e193 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -19,41 +19,20 @@ #define MEM_REGION_SLOT 10 #define MEM_REGION_SIZE PAGE_SIZE +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + static void guest_code(void) { - __asm__ __volatile__("flds (%[addr])" - :: [addr]"r"(MEM_REGION_GVA)); + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(MEM_REGION_GVA)); GUEST_DONE(); } -/* - * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, - * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". - */ -#define GET_RM(insn_byte) (insn_byte & 0x7) -#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) -#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) - -/* Ensure we are dealing with a simple 2-byte flds instruction. */ -static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) -{ - return insn_size >= 2 && - insn_bytes[0] == 0xd9 && - GET_REG(insn_bytes[1]) == 0x0 && - GET_MOD(insn_bytes[1]) == 0x0 && - /* Ensure there is no SIB byte. */ - GET_RM(insn_bytes[1]) != 0x4 && - /* Ensure there is no displacement byte. */ - GET_RM(insn_bytes[1]) != 0x5; -} - static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_regs regs; uint8_t *insn_bytes; - uint8_t insn_size; uint64_t flags; TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, @@ -65,30 +44,23 @@ static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) "Unexpected suberror: %u", run->emulation_failure.suberror); - if (run->emulation_failure.ndata >= 1) { - flags = run->emulation_failure.flags; - if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && - run->emulation_failure.ndata >= 3) { - insn_size = run->emulation_failure.insn_size; - insn_bytes = run->emulation_failure.insn_bytes; + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); - TEST_ASSERT(insn_size <= 15 && insn_size > 0, - "Unexpected instruction size: %u", - insn_size); + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); - TEST_ASSERT(is_flds(insn_bytes, insn_size), - "Unexpected instruction. Expected 'flds' (0xd9 /0)"); + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + insn_bytes[0], insn_bytes[1]); - /* - * If is_flds() succeeded then the instruction bytes - * contained an flds instruction that is 2-bytes in - * length (ie: no prefix, no SIB, no displacement). - */ - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); - } - } + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); } static void do_guest_assert(struct ucall *uc) From 50824c6eee39eb2d7a60d665b4b245552e852705 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:47 -0700 Subject: [PATCH 1663/4122] KVM: selftests: Delete dead ucall code Delete a bunch of code related to ucall handling from smaller_maxphyaddr_emulation_test. The only thing smaller_maxphyaddr_emulation_test needs to check is that the vCPU exits with UCALL_DONE after the second vcpu_run(). Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221102184654.282799-4-dmatlack@google.com Signed-off-by: Sean Christopherson --- .../smaller_maxphyaddr_emulation_test.c | 61 +------------------ 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 6f7891b0e193..b72aeefda70c 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -63,64 +63,6 @@ static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) vcpu_regs_set(vcpu, ®s); } -static void do_guest_assert(struct ucall *uc) -{ - REPORT_GUEST_ASSERT(*uc); -} - -static void check_for_guest_assert(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (vcpu->run->exit_reason == KVM_EXIT_IO && - get_ucall(vcpu, &uc) == UCALL_ABORT) { - do_guest_assert(&uc); - } -} - -static void process_ucall_done(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - check_for_guest_assert(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, - "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", - uc.cmd, UCALL_DONE); -} - -static uint64_t process_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - do_guest_assert(&uc); - break; - case UCALL_DONE: - process_ucall_done(vcpu); - break; - default: - TEST_ASSERT(false, "Unexpected ucall"); - } - - return uc.cmd; -} - int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -156,8 +98,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); process_exit_on_emulation_error(vcpu); vcpu_run(vcpu); - - TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE"); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); kvm_vm_free(vm); From 19a2b32f5d242844e49764cf88256cfd53de0082 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:48 -0700 Subject: [PATCH 1664/4122] KVM: selftests: Move flds instruction emulation failure handling to header Move the flds instruction emulation failure handling code to a header so it can be re-used in an upcoming test. No functional change intended. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20221102184654.282799-5-dmatlack@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/flds_emulation.h | 55 +++++++++++++++++++ .../smaller_maxphyaddr_emulation_test.c | 44 ++------------- 2 files changed, 59 insertions(+), 40 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/flds_emulation.h diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h new file mode 100644 index 000000000000..e43a7df25f2c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_FLDS_EMULATION_H +#define SELFTEST_KVM_FLDS_EMULATION_H + +#include "kvm_util.h" + +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + +/* + * flds is an instruction that the KVM instruction emulator is known not to + * support. This can be used in guest code along with a mechanism to force + * KVM to emulate the instruction (e.g. by providing an MMIO address) to + * exercise emulation failures. + */ +static inline void flds(uint64_t address) +{ + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); +} + +static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + uint8_t *insn_bytes; + uint64_t flags; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); + + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); + + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + insn_bytes[0], insn_bytes[1]); + + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); +} + +#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index b72aeefda70c..a8d081178917 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -8,6 +8,8 @@ #define _GNU_SOURCE /* for program_invocation_short_name */ +#include "flds_emulation.h" + #include "test_util.h" #include "kvm_util.h" #include "vmx.h" @@ -19,50 +21,12 @@ #define MEM_REGION_SLOT 10 #define MEM_REGION_SIZE PAGE_SIZE -#define FLDS_MEM_EAX ".byte 0xd9, 0x00" - static void guest_code(void) { - __asm__ __volatile__(FLDS_MEM_EAX :: "a"(MEM_REGION_GVA)); - + flds(MEM_REGION_GVA); GUEST_DONE(); } -static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - uint8_t *insn_bytes; - uint64_t flags; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Unexpected suberror: %u", - run->emulation_failure.suberror); - - flags = run->emulation_failure.flags; - TEST_ASSERT(run->emulation_failure.ndata >= 3 && - flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, - "run->emulation_failure is missing instruction bytes"); - - TEST_ASSERT(run->emulation_failure.insn_size >= 2, - "Expected a 2-byte opcode for 'flds', got %d bytes", - run->emulation_failure.insn_size); - - insn_bytes = run->emulation_failure.insn_bytes; - TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", - insn_bytes[0], insn_bytes[1]); - - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); -} - int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -96,7 +60,7 @@ int main(int argc, char *argv[]) *pte |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); - process_exit_on_emulation_error(vcpu); + handle_flds_emulation_failure_exit(vcpu); vcpu_run(vcpu); ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); From d6ecfe976ac342e5c3249b9439da762f65e98015 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:49 -0700 Subject: [PATCH 1665/4122] KVM: x86/mmu: Use BIT{,_ULL}() for PFERR masks Use the preferred BIT() and BIT_ULL() to construct the PFERR masks rather than open-coding the bit shifting. No functional change intended. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221102184654.282799-6-dmatlack@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 81114a376c4e..598eb3b9ae44 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -256,16 +256,16 @@ enum x86_intercept_stage; #define PFERR_GUEST_PAGE_BIT 33 #define PFERR_IMPLICIT_ACCESS_BIT 48 -#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) -#define PFERR_USER_MASK (1U << PFERR_USER_BIT) -#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) -#define PFERR_PK_MASK (1U << PFERR_PK_BIT) -#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT) +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ From 77f7813cc2b9d95166a3539a5203663d5bbb0fd0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:50 -0700 Subject: [PATCH 1666/4122] KVM: selftests: Copy KVM PFERR masks into selftests Copy KVM's macros for page fault error masks into processor.h so they can be used in selftests. Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221102184654.282799-7-dmatlack@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/x86_64/processor.h | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e000e35c948f..159ea618d90f 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -887,4 +887,27 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) #define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ XSTATE_XTILE_DATA_MASK) + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) + #endif /* SELFTEST_KVM_PROCESSOR_H */ From f2e5b53b4ba9bc10d3febc3682bdf22e946bf6eb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 2 Nov 2022 11:46:51 -0700 Subject: [PATCH 1667/4122] KVM: selftests: Avoid JMP in non-faulting path of KVM_ASM_SAFE() Clear R9 in the non-faulting path of KVM_ASM_SAFE() and fall through to to a common load of "vector" to effectively load "vector" with '0' to reduce the code footprint of the asm blob, to reduce the runtime overhead of the non-faulting path (when "vector" is stored in a register), and so that additional output constraints that are valid if and only if a fault occur are loaded even in the non-faulting case. A future patch will add a 64-bit output for the error code, and if its output is not explicitly loaded with _something_, the user of the asm blob can end up technically consuming uninitialized data. Using a common path to load the output constraints will allow using an existing scratch register, e.g. r10, to hold the error code in the faulting path, while also guaranteeing the error code is initialized with deterministic data in the non-faulting patch (r10 is loaded with the RIP of to-be-executed instruction). Consuming the error code when a fault doesn't occur would obviously be a test bug, but there's no guarantee the compiler will detect uninitialized consumption. And conversely, it's theoretically possible that the compiler might throw a false positive on uninitialized data, e.g. if the compiler can't determine that the non-faulting path won't touch the error code. Alternatively, the error code could be explicitly loaded in the non-faulting path, but loading a 64-bit memory|register output operand with an explicitl value requires a sign-extended "MOV imm32, r/m64", which isn't exactly straightforward and has a largish code footprint. And loading the error code with what is effectively garbage (from a scratch register) avoids having to choose an arbitrary value for the non-faulting case. Opportunistically remove a rogue asterisk in the block comment. Signed-off-by: Sean Christopherson Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20221102184654.282799-8-dmatlack@google.com --- tools/testing/selftests/kvm/include/x86_64/processor.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 159ea618d90f..6f4c727d6818 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -770,7 +770,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * for recursive faults when accessing memory in the handler. The downside to * using registers is that it restricts what registers can be used by the actual * instruction. But, selftests are 64-bit only, making register* pressure a - * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved + * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved * by the callee, and except for r11 are not implicit parameters to any * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V @@ -792,11 +792,9 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "movb $0, %[vector]\n\t" \ - "jmp 3f\n\t" \ + "xor %%r9, %%r9\n\t" \ "2:\n\t" \ - "mov %%r9b, %[vector]\n\t" \ - "3:\n\t" + "mov %%r9b, %[vector]\n\t" #define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" From b9635930f0a73c1ef7b465121896c3fb2e3b77cd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 2 Nov 2022 11:46:52 -0700 Subject: [PATCH 1668/4122] KVM: selftests: Provide error code as a KVM_ASM_SAFE() output Provide the error code on a fault in KVM_ASM_SAFE(), e.g. to allow tests to assert that #PF generates the correct error code without needing to manually install a #PF handler. Use r10 as the scratch register for the error code, as it's already clobbered by the asm blob (loaded with the RIP of the to-be-executed instruction). Deliberately load the output "error_code" even in the non-faulting path so that error_code is always initialized with deterministic data (the aforementioned RIP), i.e to ensure a selftest won't end up with uninitialized consumption regardless of how KVM_ASM_SAFE() is used. Don't clear r10 in the non-faulting case and instead load error code with the RIP (see above). The error code is valid if and only if an exception occurs, and '0' isn't necessarily a better "invalid" value, e.g. '0' could result in false passes for a buggy test. Signed-off-by: Sean Christopherson Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20221102184654.282799-9-dmatlack@google.com --- .../selftests/kvm/include/x86_64/processor.h | 39 +++++++++++++------ .../selftests/kvm/lib/x86_64/processor.c | 1 + .../selftests/kvm/x86_64/hyperv_features.c | 3 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6f4c727d6818..6093d0d53b4e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -786,6 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * * REGISTER OUTPUTS: * r9 = exception vector (non-zero) + * r10 = error code */ #define KVM_ASM_SAFE(insn) \ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ @@ -794,29 +795,43 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, "1: " insn "\n\t" \ "xor %%r9, %%r9\n\t" \ "2:\n\t" \ - "mov %%r9b, %[vector]\n\t" + "mov %%r9b, %[vector]\n\t" \ + "mov %%r10, %[error_code]\n\t" -#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) +#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" -#define kvm_asm_safe(insn, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ }) static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) { + uint64_t error_code; uint8_t vector; uint32_t a, d; asm volatile(KVM_ASM_SAFE("rdmsr") - : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector) + : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code) : "c"(msr) : KVM_ASM_SAFE_CLOBBERS); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 999576146d69..4623874a805b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1068,6 +1068,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) regs->rip = regs->r11; regs->r9 = regs->vector; + regs->r10 = regs->error_code; return true; } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 05b32e550a80..2b6d455acf8a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -18,6 +18,7 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address, uint64_t *hv_status) { + uint64_t error_code; uint8_t vector; /* Note both the hypercall and the "asm safe" clobber r9-r11. */ @@ -25,7 +26,7 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, KVM_ASM_SAFE("vmcall") : "=a" (*hv_status), "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector) + KVM_ASM_SAFE_OUTPUTS(vector, error_code) : [output_address] "r"(output_address), "a" (-EFAULT) : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); From a323845d6c3d2f667274bde8145906580359fe06 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:53 -0700 Subject: [PATCH 1669/4122] KVM: selftests: Expect #PF(RSVD) when TDP is disabled Change smaller_maxphyaddr_emulation_test to expect a #PF(RSVD), rather than an emulation failure, when TDP is disabled. KVM only needs to emulate instructions to emulate a smaller guest.MAXPHYADDR when TDP is enabled. Fixes: 39bbcc3a4e39 ("selftests: kvm: Allows userspace to handle emulation errors.") Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20221102184654.282799-10-dmatlack@google.com [sean: massage comment to talk about having to emulate due to MAXPHYADDR] Signed-off-by: Sean Christopherson --- .../smaller_maxphyaddr_emulation_test.c | 51 +++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index a8d081178917..06edf00a97d6 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -21,9 +21,27 @@ #define MEM_REGION_SLOT 10 #define MEM_REGION_SIZE PAGE_SIZE -static void guest_code(void) +static void guest_code(bool tdp_enabled) { - flds(MEM_REGION_GVA); + uint64_t error_code; + uint64_t vector; + + vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); + + /* + * When TDP is enabled, flds will trigger an emulation failure, exit to + * userspace, and then the selftest host "VMM" skips the instruction. + * + * When TDP is disabled, no instruction emulation is required so flds + * should generate #PF(RSVD). + */ + if (tdp_enabled) { + GUEST_ASSERT(!vector); + } else { + GUEST_ASSERT_EQ(vector, PF_VECTOR); + GUEST_ASSERT(error_code & PFERR_RSVD_MASK); + } + GUEST_DONE(); } @@ -31,6 +49,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; + struct ucall uc; uint64_t *pte; uint64_t *hva; uint64_t gpa; @@ -39,6 +58,10 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); @@ -60,9 +83,27 @@ int main(int argc, char *argv[]) *pte |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); - handle_flds_emulation_failure_exit(vcpu); - vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + /* + * When TDP is enabled, KVM must emulate in response the guest physical + * address that is illegal from the guest's perspective, but is legal + * from hardware's perspeective. This should result in an emulation + * failure exit to userspace since KVM doesn't support emulating flds. + */ + if (kvm_is_tdp_enabled()) { + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + } kvm_vm_free(vm); From 3ae5b759c3c033b4ca2b7ef19836622902151517 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 2 Nov 2022 11:46:54 -0700 Subject: [PATCH 1670/4122] KVM: selftests: Add a test for KVM_CAP_EXIT_ON_EMULATION_FAILURE Add a selftest to exercise the KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. This capability is also exercised through smaller_maxphyaddr_emulation_test, but that test requires allow_smaller_maxphyaddr=Y, which is off by default on Intel when ept=Y and unconditionally disabled on AMD when npt=Y. This new test ensures that KVM_CAP_EXIT_ON_EMULATION_FAILURE is exercised independent of allow_smaller_maxphyaddr. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20221102184654.282799-11-dmatlack@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../x86_64/exit_on_emulation_failure_test.c | 45 +++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 053e5d34cd03..bef984e4c39d 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,6 +17,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/exit_on_emulation_failure_test /x86_64/fix_hypercall_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index cff3a7ff8782..487248c67dec 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -82,6 +82,7 @@ TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test +TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c new file mode 100644 index 000000000000..37c61f712fd5 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + * + * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" + +#define MMIO_GPA 0x700000000 +#define MMIO_GVA MMIO_GPA + +static void guest_code(void) +{ + /* Execute flds with an MMIO address to force KVM to emulate it. */ + flds(MMIO_GVA); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + virt_map(vm, MMIO_GVA, MMIO_GPA, 1); + + vcpu_run(vcpu); + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + return 0; +} From 3bd396353d18b4f4e4f9953e5f5c46b0045a5477 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:14 +0000 Subject: [PATCH 1671/4122] KVM: selftests: Add X86_FEATURE_PAE and use it calc "fallback" MAXPHYADDR Add X86_FEATURE_PAE and use it to guesstimate the MAXPHYADDR when the MAXPHYADDR CPUID entry isn't supported. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-2-seanjc@google.com --- tools/testing/selftests/kvm/include/x86_64/processor.h | 1 + tools/testing/selftests/kvm/lib/x86_64/processor.c | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6093d0d53b4e..4f928fe3ad17 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -89,6 +89,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) #define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 4623874a805b..061e001c8a48 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1012,12 +1012,10 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { const struct kvm_cpuid_entry2 *entry; - bool pae; /* SDM 4.1.4 */ if (kvm_get_cpuid_max_extended() < 0x80000008) { - pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); - *pa_bits = pae ? 36 : 32; + *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { entry = kvm_get_supported_cpuid_entry(0x80000008); From ee3795536664e514196cbe7396d3eb4c9925de98 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:15 +0000 Subject: [PATCH 1672/4122] KVM: selftests: Refactor X86_FEATURE_* framework to prep for X86_PROPERTY_* Refactor the X86_FEATURE_* framework to prepare for extending the core logic to support "properties". The "feature" framework allows querying a single CPUID bit to detect the presence of a feature; the "property" framework will extend the idea to allow querying a value, i.e. to get a value that is a set of contiguous bits in a CPUID leaf. Opportunistically add static asserts to ensure features are fully defined at compile time, and to try and catch mistakes in the definition of features. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-3-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4f928fe3ad17..52e12d40e66c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -63,16 +63,21 @@ struct kvm_x86_cpu_feature { u8 reg; u8 bit; }; -#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ -({ \ - struct kvm_x86_cpu_feature feature = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .bit = __bit, \ - }; \ - \ - feature; \ +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + feature; \ }) /* @@ -432,15 +437,22 @@ static inline void cpuid(uint32_t function, return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { uint32_t gprs[4]; - __cpuid(feature.function, feature.index, + __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); - return gprs[feature.reg] & BIT(feature.bit); + return (gprs[reg] & GENMASK(hi, lo)) >> lo; +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return __this_cpu_has(feature.function, feature.index, + feature.reg, feature.bit, feature.bit); } #define SET_XMM(__var, __xmm) \ From 53a7dc0f215ec91b098e3e6d7bb4bb9cef43a99a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:16 +0000 Subject: [PATCH 1673/4122] KVM: selftests: Add X86_PROPERTY_* framework to retrieve CPUID values Introduce X86_PROPERTY_* to allow retrieving values/properties from CPUID leafs, e.g. MAXPHYADDR from CPUID.0x80000008. Use the same core code as X86_FEATURE_*, the primary difference is that properties are multi-bit values, whereas features enumerate a single bit. Add this_cpu_has_p() to allow querying whether or not a property exists based on the maximum leaf associated with the property, e.g. MAXPHYADDR doesn't exist if the max leaf for 0x8000_xxxx is less than 0x8000_0008. Use the new property infrastructure in vm_compute_max_gfn() to prove that the code works as intended. Future patches will convert additional selftests code. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-4-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 68 +++++++++++++++++++ .../selftests/kvm/lib/x86_64/processor.c | 18 +++-- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 52e12d40e66c..df1ba5af89ed 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -168,6 +168,48 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) #define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) +/* + * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit + * value/property as opposed to a single-bit feature. Again, pack the info + * into a 64-bit value to pass by value with no overhead. + */ +struct kvm_x86_cpu_property { + u32 function; + u8 index; + u8 reg; + u8 lo_bit; + u8 hi_bit; +}; +#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ +({ \ + struct kvm_x86_cpu_property property = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .lo_bit = low_bit, \ + .hi_bit = high_bit, \ + }; \ + \ + static_assert(low_bit < high_bit); \ + static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + property; \ +}) + +#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) +#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) + +#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) + + /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) #define PTE_WRITABLE_MASK BIT_ULL(1) @@ -455,6 +497,32 @@ static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) feature.reg, feature.bit, feature.bit); } +static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +{ + return __this_cpu_has(property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + +static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 061e001c8a48..23321c1d0631 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1229,7 +1229,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint32_t eax, ebx, ecx, edx, max_ext_leaf; + uint32_t eax, ebx, ecx, edx; + uint8_t maxphyaddr; max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; @@ -1252,17 +1253,14 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - max_ext_leaf = eax; - if (max_ext_leaf < 0x80000008) + if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) goto done; - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; - if (max_ext_leaf >= 0x8000001f) { - cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); - max_pfn >>= (ebx >> 6) & 0x3f; - } + maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; + + if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) + max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); ht_gfn = max_pfn - num_ht_pages; done: From d80ddad2a8e042e6499d69d5a45a17051092e161 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:17 +0000 Subject: [PATCH 1674/4122] KVM: selftests: Use X86_PROPERTY_MAX_KVM_LEAF in CPUID test Use X86_PROPERTY_MAX_KVM_LEAF to replace the equivalent open coded check on KVM's maximum paravirt CPUID leaf. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-5-seanjc@google.com --- tools/testing/selftests/kvm/x86_64/cpuid_test.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index a6aeee2e62e4..2fc3ad9c887e 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -43,15 +43,6 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) } -static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0x40000000, &eax, &ebx, &ecx, &edx); - - GUEST_ASSERT(eax == 0x40000001); -} - static void guest_main(struct kvm_cpuid2 *guest_cpuid) { GUEST_SYNC(1); @@ -60,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - test_cpuid_40000000(guest_cpuid); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); GUEST_DONE(); } From a29e6e383b0d0d59a93ebbf6e93d3d41b905d336 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:18 +0000 Subject: [PATCH 1675/4122] KVM: selftests: Refactor kvm_cpuid_has() to prep for X86_PROPERTY_* support Refactor kvm_cpuid_has() to prepare for extending X86_PROPERTY_* support to KVM as well as "this CPU". No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-6-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 23321c1d0631..710e5851a863 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -652,8 +652,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return cpuid; } -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature) +static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -666,12 +667,18 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, * order, but kvm_x86_cpu_feature matches that mess, so yay * pointer shenanigans! */ - if (entry->function == feature.function && - entry->index == feature.index) - return (&entry->eax)[feature.reg] & BIT(feature.bit); + if (entry->function == function && entry->index == index) + return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; } - return false; + return 0; +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + return __kvm_cpu_has(cpuid, feature.function, feature.index, + feature.reg, feature.bit, feature.bit); } uint64_t kvm_get_feature_msr(uint64_t msr_index) From 40854713e3254f7a4fc4a92388309140e51e046c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:19 +0000 Subject: [PATCH 1676/4122] KVM: selftests: Add kvm_cpu_*() support for X86_PROPERTY_* Extent X86_PROPERTY_* support to KVM, i.e. add kvm_cpu_property() and kvm_cpu_has_p(), and use the new helpers in kvm_get_cpu_address_width(). No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-7-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 34 ++++++++++++++++--- .../selftests/kvm/lib/x86_64/processor.c | 17 ++++++---- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index df1ba5af89ed..1eb76268c9f7 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -205,6 +205,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -703,6 +704,34 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); + +static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +{ + return kvm_cpuid_property(kvm_get_supported_cpuid(), property); +} + +static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + @@ -815,11 +844,6 @@ static inline uint32_t kvm_get_cpuid_max_basic(void) return kvm_get_supported_cpuid_entry(0)->eax; } -static inline uint32_t kvm_get_cpuid_max_extended(void) -{ - return kvm_get_supported_cpuid_entry(0x80000000)->eax; -} - void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 710e5851a863..af23c70ac129 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -681,6 +681,13 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, feature.reg, feature.bit, feature.bit); } +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) +{ + return __kvm_cpu_has(cpuid, property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + uint64_t kvm_get_feature_msr(uint64_t msr_index) { struct { @@ -1018,16 +1025,12 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { - const struct kvm_cpuid_entry2 *entry; - - /* SDM 4.1.4 */ - if (kvm_get_cpuid_max_extended() < 0x80000008) { + if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { - entry = kvm_get_supported_cpuid_entry(0x80000008); - *pa_bits = entry->eax & 0xff; - *va_bits = (entry->eax >> 8) & 0xff; + *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); } } From 5dc19f1c7dd302f48a9f7fe7f29bb186d3477795 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:20 +0000 Subject: [PATCH 1677/4122] KVM: selftests: Convert AMX test to use X86_PROPRETY_XXX Add and use x86 "properties" for the myriad AMX CPUID values that are validated by the AMX test. Drop most of the test's single-usage helpers so that the asserts more precisely capture what check failed. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-8-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 9 ++ tools/testing/selftests/kvm/x86_64/amx_test.c | 101 ++++-------------- 2 files changed, 31 insertions(+), 79 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 1eb76268c9f7..841ef8c4ba00 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -200,6 +200,15 @@ struct kvm_x86_cpu_property { }) #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) +#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) +#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) +#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) +#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) +#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) #define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index dadcbad10a1d..21de6ae42086 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -39,11 +39,6 @@ #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) -#define TILE_CPUID 0x1d -#define XSTATE_CPUID 0xd -#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 -#define XSTATE_USER_STATE_SUBLEAVE 0x0 - #define XSAVE_HDR_OFFSET 512 struct xsave_data { @@ -129,71 +124,26 @@ static bool check_xsave_supports_xtile(void) return __xgetbv(0) & XFEATURE_MASK_XTILE; } -static bool enum_xtile_config(void) +static void check_xtile_info(void) { - u32 eax, ebx, ecx, edx; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); - __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx || !ecx) - return false; + xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); + GUEST_ASSERT(xtile.xsave_offset == 2816); + xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); - xtile.max_names = ebx >> 16; - if (xtile.max_names < NUM_TILES) - return false; - - xtile.bytes_per_tile = eax >> 16; - if (xtile.bytes_per_tile < TILE_SIZE) - return false; - - xtile.bytes_per_row = ebx; - xtile.max_rows = ecx; - - return true; -} - -static bool enum_xsave_tile(void) -{ - u32 eax, ebx, ecx, edx; - - __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx) - return false; - - xtile.xsave_offset = ebx; - xtile.xsave_size = eax; - - return true; -} - -static bool check_xsave_size(void) -{ - u32 eax, ebx, ecx, edx; - bool valid = false; - - __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (ebx && ebx <= XSAVE_SIZE) - valid = true; - - return valid; -} - -static bool check_xtile_info(void) -{ - bool ret = false; - - if (!check_xsave_size()) - return ret; - - if (!enum_xsave_tile()) - return ret; - - if (!enum_xtile_config()) - return ret; - - if (sizeof(struct tile_data) >= xtile.xsave_size) - ret = true; - - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); + xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); + GUEST_ASSERT(xtile.max_names == 8); + xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); + GUEST_ASSERT(xtile.bytes_per_row == 64); + xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); + GUEST_ASSERT(xtile.max_rows == 16); } static void set_tilecfg(struct tile_config *cfg) @@ -238,16 +188,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, { init_regs(); check_cpuid_xsave(); - GUEST_ASSERT(check_xsave_supports_xtile()); - GUEST_ASSERT(check_xtile_info()); - - /* check xtile configs */ - GUEST_ASSERT(xtile.xsave_offset == 2816); - GUEST_ASSERT(xtile.xsave_size == 8192); - GUEST_ASSERT(xtile.max_names == 8); - GUEST_ASSERT(xtile.bytes_per_tile == 1024); - GUEST_ASSERT(xtile.bytes_per_row == 64); - GUEST_ASSERT(xtile.max_rows == 16); + check_xsave_supports_xtile(); + check_xtile_info(); GUEST_SYNC(1); /* xfd=0, enable amx */ @@ -317,8 +259,9 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - /* Get xsave/restore max size */ - xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx; + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), + "KVM should enumerate max XSAVE size when XSAVE is supported"); + xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); run = vcpu->run; vcpu_regs_get(vcpu, ®s1); From 4feb9d21a407318129b6ea3a6735f1866439b9ab Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:21 +0000 Subject: [PATCH 1678/4122] KVM: selftests: Convert vmx_pmu_caps_test to use X86_PROPERTY_* Add X86_PROPERTY_PMU_VERSION and use it in vmx_pmu_caps_test to replace open coded versions of the same functionality. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-9-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 6 ++---- .../selftests/kvm/x86_64/vmx_pmu_caps_test.c | 19 ++----------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 841ef8c4ba00..848dfbc9866b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -200,6 +200,8 @@ struct kvm_x86_cpu_property { }) #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) + #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) #define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) @@ -848,10 +850,6 @@ static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); } -static inline uint32_t kvm_get_cpuid_max_basic(void) -{ - return kvm_get_supported_cpuid_entry(0)->eax; -} void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 069589c52f41..c280ba1e6572 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -20,16 +20,6 @@ #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - union perf_capabilities { struct { u64 lbr_format:6; @@ -53,11 +43,9 @@ static void guest_code(void) int main(int argc, char *argv[]) { - const struct kvm_cpuid_entry2 *entry_a_0; struct kvm_vm *vm; struct kvm_vcpu *vcpu; int ret; - union cpuid10_eax eax; union perf_capabilities host_cap; uint64_t val; @@ -69,11 +57,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); - TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa); - entry_a_0 = kvm_get_supported_cpuid_entry(0xa); - - eax.full = entry_a_0->eax; - __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU"); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); /* testcase 1, set capabilities when we have PDCM bit */ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); From 5228c02a4c541a065ec071ea7ec2c1c76f3723dd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:22 +0000 Subject: [PATCH 1679/4122] KVM: selftests: Add PMU feature framework, use in PMU event filter test Add an X86_PMU_FEATURE_* framework to simplify probing architectural events on Intel PMUs, which require checking the length of a bit vector and the _absence_ of a "feature" bit. Add helpers for both KVM and "this CPU", and use the newfangled magic (along with X86_PROPERTY_*) to clean up pmu_event_filter_test. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-10-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 41 +++++++++++++++ .../kvm/x86_64/pmu_event_filter_test.c | 51 +++---------------- 2 files changed, 48 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 848dfbc9866b..343393308005 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -201,6 +201,8 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) #define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) +#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) @@ -221,6 +223,29 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) +/* + * Intel's architectural PMU events are bizarre. They have a "feature" bit + * that indicates the feature is _not_ supported, and a property that states + * the length of the bit mask of unsupported features. A feature is supported + * if the size of the bit mask is larger than the "unavailable" bit, and said + * bit is not set. + * + * Wrap the "unavailable" feature to simplify checking whether or not a given + * architectural event is supported. + */ +struct kvm_x86_pmu_feature { + struct kvm_x86_cpu_feature anti_feature; +}; +#define KVM_X86_PMU_FEATURE(name, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \ + }; \ + \ + feature; \ +}) + +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) @@ -535,6 +560,14 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) return max_leaf >= property.function; } +static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !this_cpu_has(feature.anti_feature); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -743,6 +776,14 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) return max_leaf >= property.function; } +static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !kvm_cpu_has(feature.anti_feature); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index a6ffa245c897..201c4ea44ca9 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -21,29 +21,6 @@ #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_ebx { - struct { - unsigned int no_unhalted_core_cycles:1; - unsigned int no_instructions_retired:1; - unsigned int no_unhalted_reference_cycles:1; - unsigned int no_llc_reference:1; - unsigned int no_llc_misses:1; - unsigned int no_branch_instruction_retired:1; - unsigned int no_branch_misses_retired:1; - } split; - unsigned int full; -}; - /* End of stuff taken from perf_event.h. */ /* Oddly, this isn't in perf_event.h. */ @@ -380,30 +357,16 @@ static void test_pmu_config_disable(void (*guest_code)(void)) } /* - * Check for a non-zero PMU version, at least one general-purpose - * counter per logical processor, an EBX bit vector of length greater - * than 5, and EBX[5] clear. - */ -static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry) -{ - union cpuid10_eax eax = { .full = entry->eax }; - union cpuid10_ebx ebx = { .full = entry->ebx }; - - return eax.split.version_id && eax.split.num_counters > 0 && - eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && - !ebx.split.no_branch_instruction_retired; -} - -/* - * Note that CPUID leaf 0xa is Intel-specific. This leaf should be - * clear on AMD hardware. + * On Intel, check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, and support for counting the number of branch + * instructions retired. */ static bool use_intel_pmu(void) { - const struct kvm_cpuid_entry2 *entry; - - entry = kvm_get_supported_cpuid_entry(0xa); - return is_intel_cpu() && check_intel_pmu_leaf(entry); + return is_intel_cpu() && + kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && + kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } static bool is_zen1(uint32_t eax) From 24f3f9898e3c463b39dac8a03870c628dab8176e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:23 +0000 Subject: [PATCH 1680/4122] KVM: selftests: Add dedicated helpers for getting x86 Family and Model Add dedicated helpers for getting x86's Family and Model, which are the last holdouts that "need" raw access to CPUID information. FMS info is a mess and requires not only splicing together multiple values, but requires doing so conditional in the Family case. Provide wrappers to reduce the odds of copy+paste errors, but mostly to allow for the eventual removal of kvm_get_supported_cpuid_entry(). No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-11-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 52 +++++++++++++------ .../selftests/kvm/lib/x86_64/processor.c | 4 +- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 343393308005..5d0acf8a9633 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -247,6 +247,23 @@ struct kvm_x86_pmu_feature { #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} + /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) #define PTE_WRITABLE_MASK BIT_ULL(1) @@ -516,6 +533,24 @@ static inline void cpuid(uint32_t function, return __cpuid(function, 0, eax, ebx, ecx, edx); } +static inline uint32_t this_cpu_fms(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + return eax; +} + +static inline uint32_t this_cpu_family(void) +{ + return x86_family(this_cpu_fms()); +} + +static inline uint32_t this_cpu_model(void) +{ + return x86_model(this_cpu_fms()); +} + static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, uint8_t reg, uint8_t lo, uint8_t hi) { @@ -658,23 +693,6 @@ static inline void cpu_relax(void) bool is_intel_cpu(void); bool is_amd_cpu(void); -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} - struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); void kvm_x86_state_cleanup(struct kvm_x86_state *state); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index af23c70ac129..65e87b5acb6f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1239,7 +1239,6 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint32_t eax, ebx, ecx, edx; uint8_t maxphyaddr; max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; @@ -1254,8 +1253,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; - cpuid(1, &eax, &ebx, &ecx, &edx); - if (x86_family(eax) < 0x17) + if (this_cpu_family() < 0x17) goto done; /* From 074e9d4c9c6046f4605c9e37e90cff404119c525 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:24 +0000 Subject: [PATCH 1681/4122] KVM: selftests: Add and use KVM helpers for x86 Family and Model Add KVM variants of the x86 Family and Model helpers, and use them in the PMU event filter test. Open code the retrieval of KVM's supported CPUID entry 0x1.0 in anticipation of dropping kvm_get_supported_cpuid_entry(). No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-12-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 19 +++++++++++++-- .../kvm/x86_64/pmu_event_filter_test.c | 23 +++++++++---------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 5d0acf8a9633..4366dbcc1bcd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -754,10 +754,27 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); } +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +static inline uint32_t kvm_cpu_fms(void) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; +} + +static inline uint32_t kvm_cpu_family(void) +{ + return x86_family(kvm_cpu_fms()); +} + +static inline uint32_t kvm_cpu_model(void) +{ + return x86_model(kvm_cpu_fms()); +} + bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, struct kvm_x86_cpu_feature feature); @@ -825,8 +842,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) return cpuid; } -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 201c4ea44ca9..2de98fce7edd 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -369,20 +369,19 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t eax) +static bool is_zen1(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; + return family == 0x17 && model <= 0x0f; } -static bool is_zen2(uint32_t eax) +static bool is_zen2(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && - x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; + return family == 0x17 && model >= 0x30 && model <= 0x3f; } -static bool is_zen3(uint32_t eax) +static bool is_zen3(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; + return family == 0x19 && model <= 0x0f; } /* @@ -395,13 +394,13 @@ static bool is_zen3(uint32_t eax) */ static bool use_amd_pmu(void) { - const struct kvm_cpuid_entry2 *entry; + uint32_t family = kvm_cpu_family(); + uint32_t model = kvm_cpu_model(); - entry = kvm_get_supported_cpuid_entry(1); return is_amd_cpu() && - (is_zen1(entry->eax) || - is_zen2(entry->eax) || - is_zen3(entry->eax)); + (is_zen1(family, model) || + is_zen2(family, model) || + is_zen3(family, model)); } int main(int argc, char *argv[]) From b941ba2380ccf51e048dc58ff0e6bdf11828f6d9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:51:25 +0000 Subject: [PATCH 1682/4122] KVM: selftests: Drop helpers for getting specific KVM supported CPUID entry Drop kvm_get_supported_cpuid_entry() and its inner helper now that all known usage can use X86_FEATURE_*, X86_PROPERTY_*, X86_PMU_FEATURE_*, or the dedicated Family/Model helpers. Providing "raw" access to CPUID leafs is undesirable as it encourages open coding CPUID checks, which is often error prone and not self-documenting. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006005125.680782-13-seanjc@google.com --- .../testing/selftests/kvm/include/x86_64/processor.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4366dbcc1bcd..481f44c683aa 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -902,17 +902,6 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function, - uint32_t index) -{ - return get_cpuid_entry(kvm_get_supported_cpuid(), function, index); -} - -static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function) -{ - return __kvm_get_supported_cpuid_entry(function, 0); -} - uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); From 5c107f7085f45e071bbcf13006fffccd8e5de0e1 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 16 Nov 2022 12:46:31 -0800 Subject: [PATCH 1683/4122] KVM: selftests: Assert in prepare_eptp() that nEPT is supported Now that a VM isn't needed to check for nEPT support, assert that KVM supports nEPT in prepare_eptp() instead of skipping the test, and push the TEST_REQUIRE() check out to individual tests. The require+assert are somewhat redundant and will incur some amount of ongoing maintenance burden, but placing the "require" logic in the test makes it easier to find/understand a test's requirements and in this case, provides a very strong hint that the test cares about nEPT. Suggested-by: Sean Christopherson Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20220927165209.930904-1-dmatlack@google.com [sean: rebase on merged code, write changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/x86_64/memstress.c | 1 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c index 2b3b47e4a973..d61e623afc8c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -85,6 +85,7 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc int vcpu_id; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vmx = vcpu_alloc_vmx(vm, &vmx_gva); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 6800fc9aeef0..3e4ea846366c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -559,7 +559,7 @@ bool kvm_cpu_has_ept(void) void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - TEST_REQUIRE(kvm_cpu_has_ept()); + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 2d8c23d639f7..f0456fb031b1 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) bool done = false; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); From ecb89a51724b3cd89c13ba7364e82f9879b68dcf Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 16 Nov 2022 12:42:28 -0800 Subject: [PATCH 1684/4122] KVM: selftests: Check for KVM nEPT support using "feature" MSRs When checking for nEPT support in KVM, use kvm_get_feature_msr() instead of vcpu_get_msr() to retrieve KVM's default TRUE_PROCBASED_CTLS and PROCBASED_CTLS2 MSR values, i.e. don't require a VM+vCPU to query nEPT support. Suggested-by: Sean Christopherson Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20220927165209.930904-1-dmatlack@google.com [sean: rebase on merged code, write changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86_64/vmx.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/vmx.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 71b290b6469d..e9c96b49966a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -572,7 +572,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); -bool kvm_vm_has_ept(struct kvm_vm *vm); +bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d21049c38fc5..6800fc9aeef0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -544,26 +544,22 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } -bool kvm_vm_has_ept(struct kvm_vm *vm) +bool kvm_cpu_has_ept(void) { - struct kvm_vcpu *vcpu; uint64_t ctrl; - vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); - TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); - - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; return ctrl & SECONDARY_EXEC_ENABLE_EPT; } void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - TEST_REQUIRE(kvm_vm_has_ept(vm)); + TEST_REQUIRE(kvm_cpu_has_ept()); vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); From 2653d53345bda90604f673bb211dd060a5a5c232 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 16 Nov 2022 19:20:20 -0800 Subject: [PATCH 1685/4122] xfs: fix incorrect error-out in xfs_remove Clean up resources if resetting the dotdot entry doesn't succeed. Observed through code inspection. Fixes: 5838d0356bb3 ("xfs: reset child dir '..' entry when unlinking child") Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index aa303be11576..d354ea2b74f9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2479,7 +2479,7 @@ xfs_remove( error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, tp->t_mountp->m_sb.sb_rootino, 0); if (error) - return error; + goto out_trans_cancel; } } else { /* From 59f6ab40fd8735c9a1a15401610a31cc06a0bbd6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 16 Nov 2022 19:20:20 -0800 Subject: [PATCH 1686/4122] xfs: fix sb write verify for lazysbcount When lazysbcount is enabled, fsstress and loop mount/unmount test report the following problems: XFS (loop0): SB summary counter sanity check failed XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460, xfs_sb block 0x0 XFS (loop0): Unmount and run xfs_repair XFS (loop0): First 128 bytes of corrupted metadata buffer: 00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(.. 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z 00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... .......... 00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................ 00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................ 00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................ XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply +0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem. XFS (loop0): Please unmount the filesystem and rectify the problem(s) XFS (loop0): log mount/recovery failed: error -117 XFS (loop0): log mount failed This corruption will shutdown the file system and the file system will no longer be mountable. The following script can reproduce the problem, but it may take a long time. #!/bin/bash device=/dev/sda testdir=/mnt/test round=0 function fail() { echo "$*" exit 1 } mkdir -p $testdir while [ $round -lt 10000 ] do echo "******* round $round ********" mkfs.xfs -f $device mount $device $testdir || fail "mount failed!" fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null & sleep 4 killall -w fsstress umount $testdir xfs_repair -e $device > /dev/null if [ $? -eq 2 ];then echo "ERR CODE 2: Dirty log exception during repair." exit 1 fi round=$(($round+1)) done With lazysbcount is enabled, There is no additional lock protection for reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the m_ifree, this will make the m_ifree greater than m_icount. For example, consider the following sequence and ifreedelta is postive: CPU0 CPU1 xfs_log_sb xfs_trans_unreserve_and_mod_sb ---------- ------------------------------ percpu_counter_sum(&mp->m_icount) percpu_counter_add_batch(&mp->m_icount, idelta, XFS_ICOUNT_BATCH) percpu_counter_add(&mp->m_ifree, ifreedelta); percpu_counter_sum(&mp->m_ifree) After this, incorrect inode count (sb_ifree > sb_icount) will be writen to the log. In the subsequent writing of sb, incorrect inode count (sb_ifree > sb_icount) will fail to pass the boundary check in xfs_validate_sb_write() that cause the file system shutdown. When lazysbcount is enabled, we don't need to guarantee that Lazy sb counters are completely correct, but we do need to guarantee that sb_ifree <= sb_icount. On the other hand, the constraint that m_ifree <= m_icount must be satisfied any time that there /cannot/ be other threads allocating or freeing inode chunks. If the constraint is violated under these circumstances, sb_i{count,free} (the ondisk superblock inode counters) maybe incorrect and need to be marked sick at unmount, the count will be rebuilt on the next mount. Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks") Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_sb.c | 4 +++- fs/xfs/xfs_mount.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a20cade590e9..1eeecf2eb2a7 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -972,7 +972,9 @@ xfs_log_sb( */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_ifree = min_t(uint64_t, + percpu_counter_sum(&mp->m_ifree), + mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8bb3c2e847e..fb87ffb48f7f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -538,6 +538,20 @@ xfs_check_summary_counts( return 0; } +static void +xfs_unmount_check( + struct xfs_mount *mp) +{ + if (xfs_is_shutdown(mp)) + return; + + if (percpu_counter_sum(&mp->m_ifree) > + percpu_counter_sum(&mp->m_icount)) { + xfs_alert(mp, "ifree/icount mismatch at unmount"); + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); + } +} + /* * Flush and reclaim dirty inodes in preparation for unmount. Inodes and * internal inode structures can be sitting in the CIL and AIL at this point, @@ -1077,6 +1091,7 @@ xfs_unmountfs( if (error) xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); + xfs_unmount_check(mp); xfs_log_unmount(mp); xfs_da_unmount(mp); From 64c80dfd04d1dd2ecf550542c8f3f41b54b20207 Mon Sep 17 00:00:00 2001 From: Lukas Herbolt Date: Wed, 16 Nov 2022 19:20:21 -0800 Subject: [PATCH 1687/4122] xfs: Print XFS UUID on mount and umount events. As of now only device names are printed out over __xfs_printk(). The device names are not persistent across reboots which in case of searching for origin of corruption brings another task to properly identify the devices. This patch add XFS UUID upon every mount/umount event which will make the identification much easier. Signed-off-by: Lukas Herbolt [sandeen: rebase onto current upstream kernel] Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_log.c | 10 ++++++---- fs/xfs/xfs_super.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f02a0dd522b3..0141d9907d31 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -644,12 +644,14 @@ xfs_log_mount( int min_logfsbs; if (!xfs_has_norecovery(mp)) { - xfs_notice(mp, "Mounting V%d Filesystem", - XFS_SB_VERSION_NUM(&mp->m_sb)); + xfs_notice(mp, "Mounting V%d Filesystem %pU", + XFS_SB_VERSION_NUM(&mp->m_sb), + &mp->m_sb.sb_uuid); } else { xfs_notice(mp, -"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", - XFS_SB_VERSION_NUM(&mp->m_sb)); +"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.", + XFS_SB_VERSION_NUM(&mp->m_sb), + &mp->m_sb.sb_uuid); ASSERT(xfs_is_readonly(mp)); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ee4b429a2f2c..0c4b73e9b29d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1110,7 +1110,7 @@ xfs_fs_put_super( if (!sb->s_fs_info) return; - xfs_notice(mp, "Unmounting Filesystem"); + xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); xfs_filestream_unmount(mp); xfs_unmountfs(mp); From 4f44e519b6a945068755708119cca5b74d01d1f6 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 14 Nov 2022 19:16:59 -0600 Subject: [PATCH 1688/4122] RDMA/irdma: Fix inline for multiple SGE's Currently, inline send and inline write assume a single SGE and only copy data from the first one. Add support for multiple SGE's. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20221115011701.1379-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/uk.c | 147 ++++++++++++++++++---------- drivers/infiniband/hw/irdma/user.h | 19 +--- drivers/infiniband/hw/irdma/verbs.c | 57 ++++------- 3 files changed, 119 insertions(+), 104 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6e5d350a94c..1a57ed9d77ff 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -566,21 +566,37 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe, /** * irdma_copy_inline_data_gen_1 - Copy inline data to wqe - * @dest: pointer to wqe - * @src: pointer to inline data - * @len: length of inline data to copy + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: Total inline data length * @polarity: compatibility parameter */ -static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len, - u8 polarity) +static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list, + u32 num_sges, u8 polarity) { - if (len <= 16) { - memcpy(dest, src, len); - } else { - memcpy(dest, src, 16); - src += 16; - dest = dest + 32; - memcpy(dest, src, len - 16); + u32 quanta_bytes_remaining = 16; + int i; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + /* Remaining inline bytes reside after hdr */ + wqe += 16; + quanta_bytes_remaining = 32; + } + } } } @@ -612,35 +628,51 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe, /** * irdma_copy_inline_data - Copy inline data to wqe - * @dest: pointer to wqe - * @src: pointer to inline data - * @len: length of inline data to copy + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: number of SGE's * @polarity: polarity of wqe valid bit */ -static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity) +static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list, + u32 num_sges, u8 polarity) { u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; - u32 copy_size; + u32 quanta_bytes_remaining = 8; + bool first_quanta = true; + int i; - dest += 8; - if (len <= 8) { - memcpy(dest, src, len); - return; - } - - *((u64 *)dest) = *((u64 *)src); - len -= 8; - src += 8; - dest += 24; /* point to additional 32 byte quanta */ - - while (len) { - copy_size = len < 31 ? len : 31; - memcpy(dest, src, copy_size); - *(dest + 31) = inline_valid; - len -= copy_size; - dest += 32; - src += copy_size; + wqe += 8; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + quanta_bytes_remaining = 31; + + /* Remaining inline bytes reside after hdr */ + if (first_quanta) { + first_quanta = false; + wqe += 16; + } else { + *wqe = inline_valid; + wqe++; + } + } + } } + if (!first_quanta && quanta_bytes_remaining < 31) + *(wqe + quanta_bytes_remaining) = inline_valid; } /** @@ -679,20 +711,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; - struct irdma_inline_rdma_write *op_info; + struct irdma_rdma_write *op_info; u64 hdr = 0; u32 wqe_idx; bool read_fence = false; + u32 i, total_size = 0; u16 quanta; info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.inline_rdma_write; + op_info = &info->op.rdma_write; - if (op_info->len > qp->max_inline_data) + if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) return -EINVAL; - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + for (i = 0; i < op_info->num_lo_sges; i++) + total_size += op_info->lo_sg_list[i].length; + + if (unlikely(total_size > qp->max_inline_data)) + return -EINVAL; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) return -ENOMEM; @@ -705,7 +744,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | @@ -719,7 +758,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, 0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, + op_info->num_lo_sges, qp->swqe_polarity); dma_wmb(); /* make sure WQE is populated before valid bit is set */ @@ -745,20 +785,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; - struct irdma_post_inline_send *op_info; + struct irdma_post_send *op_info; u64 hdr; u32 wqe_idx; bool read_fence = false; + u32 i, total_size = 0; u16 quanta; info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.inline_send; + op_info = &info->op.send; - if (op_info->len > qp->max_inline_data) + if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) return -EINVAL; - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + for (i = 0; i < op_info->num_sges; i++) + total_size += op_info->sg_list[i].length; + + if (unlikely(total_size > qp->max_inline_data)) + return -EINVAL; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) return -ENOMEM; @@ -773,7 +820,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | @@ -789,8 +836,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, if (info->imm_data_valid) set_64bit_val(wqe, 0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, - qp->swqe_polarity); + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, + op_info->num_sges, qp->swqe_polarity); dma_wmb(); /* make sure WQE is populated before valid bit is set */ diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 2ef61923c926..424d4aa8cdcd 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -173,14 +173,6 @@ struct irdma_post_send { u32 ah_id; }; -struct irdma_post_inline_send { - void *data; - u32 len; - u32 qkey; - u32 dest_qp; - u32 ah_id; -}; - struct irdma_post_rq_info { u64 wr_id; struct ib_sge *sg_list; @@ -193,12 +185,6 @@ struct irdma_rdma_write { struct ib_sge rem_addr; }; -struct irdma_inline_rdma_write { - void *data; - u32 len; - struct ib_sge rem_addr; -}; - struct irdma_rdma_read { struct ib_sge *lo_sg_list; u32 num_lo_sges; @@ -241,8 +227,6 @@ struct irdma_post_sq_info { struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; - struct irdma_inline_rdma_write inline_rdma_write; - struct irdma_post_inline_send inline_send; } op; }; @@ -291,7 +275,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, bool post_sq); struct irdma_wqe_uk_ops { - void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); + void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list, + u32 num_sges, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 434241789f12..e252f431e2ac 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3136,30 +3136,20 @@ static int irdma_post_send(struct ib_qp *ibqp, info.stag_to_inv = ib_wr->ex.invalidate_rkey; } - if (ib_wr->send_flags & IB_SEND_INLINE) { - info.op.inline_send.data = (void *)(unsigned long) - ib_wr->sg_list[0].addr; - info.op.inline_send.len = ib_wr->sg_list[0].length; - if (iwqp->ibqp.qp_type == IB_QPT_UD || - iwqp->ibqp.qp_type == IB_QPT_GSI) { - ah = to_iwah(ud_wr(ib_wr)->ah); - info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx; - info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey; - info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn; - } - err = irdma_uk_inline_send(ukqp, &info, false); - } else { - info.op.send.num_sges = ib_wr->num_sge; - info.op.send.sg_list = ib_wr->sg_list; - if (iwqp->ibqp.qp_type == IB_QPT_UD || - iwqp->ibqp.qp_type == IB_QPT_GSI) { - ah = to_iwah(ud_wr(ib_wr)->ah); - info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; - info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; - info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; - } - err = irdma_uk_send(ukqp, &info, false); + info.op.send.num_sges = ib_wr->num_sge; + info.op.send.sg_list = ib_wr->sg_list; + if (iwqp->ibqp.qp_type == IB_QPT_UD || + iwqp->ibqp.qp_type == IB_QPT_GSI) { + ah = to_iwah(ud_wr(ib_wr)->ah); + info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; + info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; + info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; } + + if (ib_wr->send_flags & IB_SEND_INLINE) + err = irdma_uk_inline_send(ukqp, &info, false); + else + err = irdma_uk_send(ukqp, &info, false); break; case IB_WR_RDMA_WRITE_WITH_IMM: if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) { @@ -3176,22 +3166,15 @@ static int irdma_post_send(struct ib_qp *ibqp, else info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; - if (ib_wr->send_flags & IB_SEND_INLINE) { - info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; - info.op.inline_rdma_write.len = - ib_wr->sg_list[0].length; - info.op.inline_rdma_write.rem_addr.addr = - rdma_wr(ib_wr)->remote_addr; - info.op.inline_rdma_write.rem_addr.lkey = - rdma_wr(ib_wr)->rkey; + info.op.rdma_write.num_lo_sges = ib_wr->num_sge; + info.op.rdma_write.lo_sg_list = ib_wr->sg_list; + info.op.rdma_write.rem_addr.addr = + rdma_wr(ib_wr)->remote_addr; + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; + if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_rdma_write(ukqp, &info, false); - } else { - info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; - info.op.rdma_write.num_lo_sges = ib_wr->num_sge; - info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; + else err = irdma_uk_rdma_write(ukqp, &info, false); - } break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; From 24419777e9431137d5923a747f546facb1e49b1f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 14 Nov 2022 19:17:00 -0600 Subject: [PATCH 1689/4122] RDMA/irdma: Fix RQ completion opcode The opcode written by HW, in the RQ CQE, is the RoCEv2/iWARP protocol opcode from the received packet and not the SW opcode as currently assumed. Fix this by returning the raw operation type and queue type in the CQE to irdma_process_cqe and add 2 helpers set_ib_wc_op_sq set_ib_wc_op_rq to map IRDMA HW op types to IB op types. Note that for iWARP, only Write with Immediate is supported so the opcode can only be IB_WC_RECV_RDMA_WITH_IMM when there is immediate data present. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20221115011701.1379-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/uk.c | 21 +++++------- drivers/infiniband/hw/irdma/user.h | 1 + drivers/infiniband/hw/irdma/utils.c | 2 ++ drivers/infiniband/hw/irdma/verbs.c | 39 ++++----------------- drivers/infiniband/hw/irdma/verbs.h | 53 +++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 1a57ed9d77ff..16183e894da7 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1049,11 +1049,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; - u32 wqe_idx, q_type; + u32 wqe_idx; int ret_code; bool move_cq_head = true; u8 polarity; - u8 op_type; bool ext_valid; __le64 *ext_cqe; @@ -1121,7 +1120,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ud_vlan_valid = false; } - q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); + info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); @@ -1160,8 +1159,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle)(unsigned long)qp; + info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); - if (q_type == IRDMA_CQE_QTYPE_RQ) { + if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; @@ -1181,10 +1181,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); - if (info->imm_valid) - info->op_type = IRDMA_OP_TYPE_REC_IMM; - else - info->op_type = IRDMA_OP_TYPE_REC; if (qword3 & IRDMACQ_STAG) { info->stag_invalid_set = true; info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); @@ -1242,17 +1238,18 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, sw_wqe = qp->sq_base[tail].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); - op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); - info->op_type = op_type; + info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, + wqe_qword); IRDMA_RING_SET_TAIL(qp->sq_ring, tail + qp->sq_wrtrk_array[tail].quanta); - if (op_type != IRDMAQP_OP_NOP) { + if (info->op_type != IRDMAQP_OP_NOP) { info->wr_id = qp->sq_wrtrk_array[tail].wrid; info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len; break; } } while (1); - if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) + if (info->op_type == IRDMA_OP_TYPE_BIND_MW && + info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 424d4aa8cdcd..d0cdf609f5e0 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -245,6 +245,7 @@ struct irdma_cq_poll_info { u16 ud_vlan; u8 ud_smac[6]; u8 op_type; + u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ bool push_dropped:1; bool error:1; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 8dfc9e154d73..445e69e86409 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2591,6 +2591,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) sw_wqe = qp->sq_base[wqe_idx].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE); + cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ; /* remove the SQ WR by moving SQ tail*/ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); @@ -2629,6 +2630,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; + cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ; /* remove the RQ WR by moving RQ tail */ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); ibdev_dbg(iwqp->iwrcq->ibcq.device, diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index e252f431e2ac..01d0dc4b5649 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3334,7 +3334,6 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode static void irdma_process_cqe(struct ib_wc *entry, struct irdma_cq_poll_info *cq_poll_info) { - struct irdma_qp *iwqp; struct irdma_sc_qp *qp; entry->wc_flags = 0; @@ -3342,7 +3341,6 @@ static void irdma_process_cqe(struct ib_wc *entry, entry->wr_id = cq_poll_info->wr_id; qp = cq_poll_info->qp_handle; - iwqp = qp->qp_uk.back_qp; entry->qp = qp->qp_uk.back_qp; if (cq_poll_info->error) { @@ -3375,42 +3373,17 @@ static void irdma_process_cqe(struct ib_wc *entry, } } - switch (cq_poll_info->op_type) { - case IRDMA_OP_TYPE_RDMA_WRITE: - case IRDMA_OP_TYPE_RDMA_WRITE_SOL: - entry->opcode = IB_WC_RDMA_WRITE; - break; - case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: - case IRDMA_OP_TYPE_RDMA_READ: - entry->opcode = IB_WC_RDMA_READ; - break; - case IRDMA_OP_TYPE_SEND_INV: - case IRDMA_OP_TYPE_SEND_SOL: - case IRDMA_OP_TYPE_SEND_SOL_INV: - case IRDMA_OP_TYPE_SEND: - entry->opcode = IB_WC_SEND; - break; - case IRDMA_OP_TYPE_FAST_REG_NSMR: - entry->opcode = IB_WC_REG_MR; - break; - case IRDMA_OP_TYPE_INV_STAG: - entry->opcode = IB_WC_LOCAL_INV; - break; - case IRDMA_OP_TYPE_REC_IMM: - case IRDMA_OP_TYPE_REC: - entry->opcode = cq_poll_info->op_type == IRDMA_OP_TYPE_REC_IMM ? - IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; + if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { + set_ib_wc_op_sq(cq_poll_info, entry); + } else { + set_ib_wc_op_rq(cq_poll_info, entry, + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? + true : false); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; entry->wc_flags |= IB_WC_WITH_INVALIDATE; } - break; - default: - ibdev_err(&iwqp->iwdev->ibdev, - "Invalid opcode = %d in CQE\n", cq_poll_info->op_type); - entry->status = IB_WC_GENERAL_ERR; - return; } if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) { diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 4309b7159f42..a536e9fa85eb 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -232,6 +232,59 @@ static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev) return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]); } +static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry) +{ + switch (cq_poll_info->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: + case IRDMA_OP_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case IRDMA_OP_TYPE_SEND_SOL: + case IRDMA_OP_TYPE_SEND_SOL_INV: + case IRDMA_OP_TYPE_SEND_INV: + case IRDMA_OP_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case IRDMA_OP_TYPE_FAST_REG_NSMR: + entry->opcode = IB_WC_REG_MR; + break; + case IRDMA_OP_TYPE_INV_STAG: + entry->opcode = IB_WC_LOCAL_INV; + break; + default: + entry->status = IB_WC_GENERAL_ERR; + } +} + +static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry, bool send_imm_support) +{ + /** + * iWARP does not support sendImm, so the presence of Imm data + * must be WriteImm. + */ + if (!send_imm_support) { + entry->opcode = cq_poll_info->imm_valid ? + IB_WC_RECV_RDMA_WITH_IMM : + IB_WC_RECV; + return; + } + + switch (cq_poll_info->op_type) { + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IB_WC_RECV; + } +} + void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4); int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); From 8f7e2daa6336f9f4b6f8a4715a809674606df16b Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 14 Nov 2022 19:17:01 -0600 Subject: [PATCH 1690/4122] RDMA/irdma: Do not request 2-level PBLEs for CQ alloc When allocating PBLE's for a large CQ, it is possible that a 2-level PBLE is returned which would cause the CQ allocation to fail since 1-level is assumed and checked for. Fix this by requesting a level one PBLE only. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20221115011701.1379-4-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 01d0dc4b5649..dc3f5f3fee90 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2329,9 +2329,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc, * @rf: RDMA PCI function * @iwmr: mr pointer for this memory registration * @use_pbles: flag if to use pble's + * @lvl_1_only: request only level 1 pble if true */ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, - bool use_pbles) + bool use_pbles, bool lvl_1_only) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; @@ -2342,7 +2343,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, if (use_pbles) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, - false); + lvl_1_only); if (status) return status; @@ -2385,16 +2386,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev, bool ret = true; pg_size = iwmr->page_size; - err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles); + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true); if (err) return err; - if (use_pbles && palloc->level != PBLE_LEVEL_1) { - irdma_free_pble(iwdev->rf->pble_rsrc, palloc); - iwpbl->pbl_allocated = false; - return -ENOMEM; - } - if (use_pbles) arr = palloc->level1.addr; @@ -2870,7 +2865,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, case IRDMA_MEMREG_TYPE_MEM: use_pbles = (iwmr->page_cnt != 1); - err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles); + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); if (err) goto error; From d7115727e32e94c77a5441892e74ae0339afa1a5 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:38 +0800 Subject: [PATCH 1691/4122] RDMA/rtrs-srv: Refactor rtrs_srv_rdma_cm_handler The RDMA_CM_EVENT_CONNECT_REQUEST is quite different to other types, let's check it separately at the beginning of routine, then we can avoid the indentation accordingly. Acked-by: Jack Wang Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-2-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 22d7ba05e9fe..5fe3699cb8ff 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1950,22 +1950,21 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, { struct rtrs_srv_path *srv_path = NULL; struct rtrs_path *s = NULL; + struct rtrs_con *c = NULL; - if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { - struct rtrs_con *c = cm_id->context; - - s = c->path; - srv_path = to_srv_path(s); - } - - switch (ev->event) { - case RDMA_CM_EVENT_CONNECT_REQUEST: + if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST) /* * In case of error cma.c will destroy cm_id, * see cma_process_remove() */ return rtrs_rdma_connect(cm_id, ev->param.conn.private_data, ev->param.conn.private_data_len); + + c = cm_id->context; + s = c->path; + srv_path = to_srv_path(s); + + switch (ev->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Nothing here */ break; From 0f597ac618d04beb9de997fda59a29c9d3818fb2 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:39 +0800 Subject: [PATCH 1692/4122] RDMA/rtrs-srv: Refactor the handling of failure case in map_cont_bufs Let's call unmap_cont_bufs when failure happens, and also only update mrs_num after everything is settled which means we can remove 'mri'. Acked-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-3-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 47 +++++++++++--------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 5fe3699cb8ff..b877dd57b6b9 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -561,9 +561,11 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) { struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_path *ss = &srv_path->s; - int i, mri, err, mrs_num; + int i, err, mrs_num; unsigned int chunk_bits; int chunks_per_mr = 1; + struct ib_mr *mr; + struct sg_table *sgt; /* * Here we map queue_depth chunks to MR. Firstly we have to @@ -586,16 +588,14 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) if (!srv_path->mrs) return -ENOMEM; - srv_path->mrs_num = mrs_num; - - for (mri = 0; mri < mrs_num; mri++) { - struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri]; - struct sg_table *sgt = &srv_mr->sgt; + for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num; + srv_path->mrs_num++) { + struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num]; struct scatterlist *s; - struct ib_mr *mr; int nr, nr_sgt, chunks; - chunks = chunks_per_mr * mri; + sgt = &srv_mr->sgt; + chunks = chunks_per_mr * srv_path->mrs_num; if (!always_invalidate) chunks_per_mr = min_t(int, chunks_per_mr, srv->queue_depth - chunks); @@ -644,31 +644,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); srv_mr->mr = mr; - - continue; -err: - while (mri--) { - srv_mr = &srv_path->mrs[mri]; - sgt = &srv_mr->sgt; - mr = srv_mr->mr; - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); -dereg_mr: - ib_dereg_mr(mr); -unmap_sg: - ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, - sgt->nents, DMA_BIDIRECTIONAL); -free_sg: - sg_free_table(sgt); - } - kfree(srv_path->mrs); - - return err; } chunk_bits = ilog2(srv->queue_depth - 1) + 1; srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); return 0; + +dereg_mr: + ib_dereg_mr(mr); +unmap_sg: + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); +free_sg: + sg_free_table(sgt); +err: + unmap_cont_bufs(srv_path); + + return err; } static void rtrs_srv_hb_err_handler(struct rtrs_con *c) From 102d2f70ec0999a5cde181f1ccbe8a81cba45b10 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:40 +0800 Subject: [PATCH 1693/4122] RDMA/rtrs-srv: Correct the checking of ib_map_mr_sg We should check with nr_sgt, also the only successful case is that all sg elements are mapped, so make it explicitly. Acked-by: Jack Wang Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-4-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index b877dd57b6b9..581c850e71d6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -622,7 +622,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) } nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt, NULL, max_chunk_size); - if (nr < 0 || nr < sgt->nents) { + if (nr != nr_sgt) { err = nr < 0 ? nr : -EINVAL; goto dereg_mr; } From f5708e6699c230f64736107c90b63a53bdc0a613 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:41 +0800 Subject: [PATCH 1694/4122] RDMA/rtrs-clt: Correct the checking of ib_map_mr_sg We should check with count, also the only successful case is that all sg elements are mapped, so make it explicitly. Acked-by: Jack Wang Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-5-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 8546b8816524..be7c8480f947 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1064,10 +1064,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) /* Align the MR to a 4K page size to match the block virt boundary */ nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); - if (nr < 0) - return nr; - if (nr < req->sg_cnt) - return -EINVAL; + if (nr != count) + return nr < 0 ? nr : -EINVAL; ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); return nr; From a4399563356c86eafeadcdc155d5d93320713b6e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:42 +0800 Subject: [PATCH 1695/4122] RDMA/rtrs-srv: Remove outdated comments from create_con Remove the orphan comments. Acked-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-6-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 581c850e71d6..d1703e2c0b82 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1671,12 +1671,6 @@ static int create_con(struct rtrs_srv_path *srv_path, srv->queue_depth * (1 + 2) + 1); max_recv_wr = srv->queue_depth + 1; - /* - * If we have all receive requests posted and - * all write requests posted and each read request - * requires an invalidate request + drain - * and qp gets into error state. - */ } cq_num = max_send_wr + max_recv_wr; atomic_set(&con->c.sq_wr_avail, max_send_wr); From 7526198f27107278c600a3aee41f1a77ed84dd78 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:43 +0800 Subject: [PATCH 1696/4122] RDMA/rtrs: Clean up rtrs_rdma_dev_pd_ops Let's remove them since the three members are not used. Acked-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-7-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-pri.h | 3 --- drivers/infiniband/ulp/rtrs/rtrs.c | 22 ++++------------------ 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index a2420eecaf5a..ab25619261d2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -68,10 +68,7 @@ enum { struct rtrs_ib_dev; struct rtrs_rdma_dev_pd_ops { - struct rtrs_ib_dev *(*alloc)(void); - void (*free)(struct rtrs_ib_dev *dev); int (*init)(struct rtrs_ib_dev *dev); - void (*deinit)(struct rtrs_ib_dev *dev); }; struct rtrs_rdma_dev_pd { diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ed324b47d93a..4bf9d868cc52 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -557,7 +557,6 @@ EXPORT_SYMBOL(rtrs_addr_to_sockaddr); void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, struct rtrs_rdma_dev_pd *pool) { - WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free)); INIT_LIST_HEAD(&pool->list); mutex_init(&pool->mutex); pool->pd_flags = pd_flags; @@ -583,15 +582,8 @@ static void dev_free(struct kref *ref) list_del(&dev->entry); mutex_unlock(&pool->mutex); - if (pool->ops && pool->ops->deinit) - pool->ops->deinit(dev); - ib_dealloc_pd(dev->ib_pd); - - if (pool->ops && pool->ops->free) - pool->ops->free(dev); - else - kfree(dev); + kfree(dev); } int rtrs_ib_dev_put(struct rtrs_ib_dev *dev) @@ -618,11 +610,8 @@ rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, goto out_unlock; } mutex_unlock(&pool->mutex); - if (pool->ops && pool->ops->alloc) - dev = pool->ops->alloc(); - else - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (IS_ERR_OR_NULL(dev)) + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) goto out_err; kref_init(&dev->ref); @@ -644,10 +633,7 @@ out_unlock: out_free_pd: ib_dealloc_pd(dev->ib_pd); out_free_dev: - if (pool->ops && pool->ops->free) - pool->ops->free(dev); - else - kfree(dev); + kfree(dev); out_err: return NULL; } From 6af4609c18b3aa69209d022b9c00e6db78c57ae5 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:44 +0800 Subject: [PATCH 1697/4122] RDMA/rtrs-srv: Fix several issues in rtrs_srv_destroy_path_files There are several issues in the function which is supposed to be paired with rtrs_srv_create_path_files. 1. rtrs_srv_stats_attr_group is not removed though it is created in rtrs_srv_create_stats_files. 2. it makes more sense to check kobj_stats.state_in_sysfs before destroy kobj_stats instead of rely on kobj.state_in_sysfs. 3. kobject_init_and_add is used for both kobjs (srv_path->kobj and srv_path->stats->kobj_stats), however we missed to call kobject_del for srv_path->kobj which was called in free_path. 4. rtrs_srv_destroy_once_sysfs_root_folders is independent of either kobj or kobj_stats. Acked-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-8-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 2a3c9ac64a42..da8e205ce331 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -304,12 +304,18 @@ destroy_root: void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path) { - if (srv_path->kobj.state_in_sysfs) { + if (srv_path->stats->kobj_stats.state_in_sysfs) { + sysfs_remove_group(&srv_path->stats->kobj_stats, + &rtrs_srv_stats_attr_group); kobject_del(&srv_path->stats->kobj_stats); kobject_put(&srv_path->stats->kobj_stats); - sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); - kobject_put(&srv_path->kobj); - - rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } + + if (srv_path->kobj.state_in_sysfs) { + sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); + kobject_del(&srv_path->kobj); + kobject_put(&srv_path->kobj); + } + + rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } From 34a046f08b62fb855ac590c1f4dfb1934f1fdb64 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 17 Nov 2022 18:19:45 +0800 Subject: [PATCH 1698/4122] RDMA/rtrs-srv: Remove kobject_del from rtrs_srv_destroy_once_sysfs_root_folders The kobj_paths which is created dynamically by kobject_create_and_add, and per the comment above kobject_create_and_add, we only need to call kobject_put which is not same as other kobjs such as stats->kobj_stats and srv_path->kobj. Acked-by: Md Haris Iqbal Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20221117101945.6317-9-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index da8e205ce331..c76ba29da1e2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -203,7 +203,6 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path) mutex_lock(&srv->paths_mutex); if (!--srv->dev_ref) { - kobject_del(srv->kobj_paths); kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); From 5562c6a9657e1bffd4d66df841db142c596f411c Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Mon, 7 Nov 2022 19:27:00 +0800 Subject: [PATCH 1699/4122] bus: mhi: host: pci_generic: Add definition for some VIDs To make code neat and for convenience purpose, add definition for some VIDs. Adding it locally until these VIDs are used in multiple places. Signed-off-by: Slark Xiao Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221107112700.773-1-slark_xiao@163.com Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index b58a30367896..f39657f71483 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -24,6 +24,10 @@ #define HEALTH_CHECK_PERIOD (HZ * 2) +/* PCI VID definitions */ +#define PCI_VENDOR_ID_THALES 0x1269 +#define PCI_VENDOR_ID_QUECTEL 0x1eac + /** * struct mhi_pci_dev_info - MHI PCI device specific information * @config: MHI controller configuration @@ -559,11 +563,11 @@ static const struct pci_device_id mhi_pci_id_table[] = { .driver_data = (kernel_ulong_t) &mhi_telit_fn990_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0308), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx65_info }, - { PCI_DEVICE(0x1eac, 0x1001), /* EM120R-GL (sdx24) */ + { PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x1001), /* EM120R-GL (sdx24) */ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info }, - { PCI_DEVICE(0x1eac, 0x1002), /* EM160R-GL (sdx24) */ + { PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x1002), /* EM160R-GL (sdx24) */ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info }, - { PCI_DEVICE(0x1eac, 0x2001), /* EM120R-GL for FCCL (sdx24) */ + { PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x2001), /* EM120R-GL for FCCL (sdx24) */ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info }, /* T99W175 (sdx55), Both for eSIM and Non-eSIM */ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0ab), @@ -587,16 +591,16 @@ static const struct pci_device_id mhi_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0d9), .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx65_info }, /* MV31-W (Cinterion) */ - { PCI_DEVICE(0x1269, 0x00b3), + { PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00b3), .driver_data = (kernel_ulong_t) &mhi_mv31_info }, /* MV31-W (Cinterion), based on new baseline */ - { PCI_DEVICE(0x1269, 0x00b4), + { PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00b4), .driver_data = (kernel_ulong_t) &mhi_mv31_info }, /* MV32-WA (Cinterion) */ - { PCI_DEVICE(0x1269, 0x00ba), + { PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00ba), .driver_data = (kernel_ulong_t) &mhi_mv32_info }, /* MV32-WB (Cinterion) */ - { PCI_DEVICE(0x1269, 0x00bb), + { PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00bb), .driver_data = (kernel_ulong_t) &mhi_mv32_info }, /* T99W175 (sdx55), HP variant */ { PCI_DEVICE(0x03f0, 0x0a6c), From c70edc06773976f4e6ccfe250030a73c2896e131 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:10 -0800 Subject: [PATCH 1700/4122] dt-bindings: interconnect: Add sm8350, sc8280xp and generic OSM L3 compatibles Add EPSS L3 compatibles for sm8350 and sc8280xp, but while at it also introduce generic compatible for both qcom,osm-l3 and qcom,epss-l3. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-6-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,osm-l3.yaml | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml index bf538c0c5a81..aadae4424ba9 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml @@ -16,13 +16,21 @@ description: properties: compatible: - enum: - - qcom,sc7180-osm-l3 - - qcom,sc7280-epss-l3 - - qcom,sc8180x-osm-l3 - - qcom,sdm845-osm-l3 - - qcom,sm8150-osm-l3 - - qcom,sm8250-epss-l3 + oneOf: + - items: + - enum: + - qcom,sc7180-osm-l3 + - qcom,sc8180x-osm-l3 + - qcom,sdm845-osm-l3 + - qcom,sm8150-osm-l3 + - const: qcom,osm-l3 + - items: + - enum: + - qcom,sc7280-epss-l3 + - qcom,sc8280xp-epss-l3 + - qcom,sm8250-epss-l3 + - qcom,sm8350-epss-l3 + - const: qcom,epss-l3 reg: maxItems: 1 @@ -56,7 +64,7 @@ examples: #define RPMH_CXO_CLK 0 osm_l3: interconnect@17d41000 { - compatible = "qcom,sdm845-osm-l3"; + compatible = "qcom,sdm845-osm-l3", "qcom,osm-l3"; reg = <0x17d41000 0x1400>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>; From 2d710b00f22f3fcbc4e0189524bbf36731d9baf4 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 10 Nov 2022 19:25:14 -0800 Subject: [PATCH 1701/4122] dt-bindings: interconnect: qcom,msm8998-bwmon: Add sc8280xp bwmon instances The sc8280xp platform has two BWMON instances, one v4 and one v5. Extend the existing qcom,msm8998-bwmon and qcom,sc7280-llcc-bwmon to describe these. Signed-off-by: Bjorn Andersson Tested-by: Steev Klimaszewski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sibi Sankar Link: https://lore.kernel.org/r/20221111032515.3460-10-quic_bjorande@quicinc.com Signed-off-by: Georgi Djakov --- .../devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml index be29e0b80995..0c720dbde36e 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml @@ -25,9 +25,14 @@ properties: - items: - enum: - qcom,sc7280-cpu-bwmon + - qcom,sc8280xp-cpu-bwmon - qcom,sdm845-bwmon - const: qcom,msm8998-bwmon - const: qcom,msm8998-bwmon # BWMON v4 + - items: + - enum: + - qcom,sc8280xp-llcc-bwmon + - const: qcom,sc7280-llcc-bwmon - const: qcom,sc7280-llcc-bwmon # BWMON v5 - const: qcom,sdm845-llcc-bwmon # BWMON v5 From d017eeabd5092565c3dd1c8a7b00ba724c33c18f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:17 +0000 Subject: [PATCH 1702/4122] arm64: Add ID_DFR0_EL1.PerfMon values for PMUv3p7 and IMP_DEF Align the ID_DFR0_EL1.PerfMon values with ID_AA64DFR0_EL1.PMUver. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-2-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7d301700d1a9..84f59ce1dc6d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -698,6 +698,8 @@ #define ID_DFR0_PERFMON_8_1 0x4 #define ID_DFR0_PERFMON_8_4 0x5 #define ID_DFR0_PERFMON_8_5 0x6 +#define ID_DFR0_PERFMON_8_7 0x7 +#define ID_DFR0_PERFMON_IMP_DEF 0xf #define ID_ISAR4_SWP_FRAC_SHIFT 28 #define ID_ISAR4_PSR_M_SHIFT 24 From bead02204e9806807bb290137b1ccabfcb4b16fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:18 +0000 Subject: [PATCH 1703/4122] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode Ricardo recently pointed out that the PMU chained counter emulation in KVM wasn't quite behaving like the one on actual hardware, in the sense that a chained counter would expose an overflow on both halves of a chained counter, while KVM would only expose the overflow on the top half. The difference is subtle, but significant. What does the architecture say (DDI0087 H.a): - Up to PMUv3p4, all counters but the cycle counter are 32bit - A 32bit counter that overflows generates a CHAIN event on the adjacent counter after exposing its own overflow status - The CHAIN event is accounted if the counter is correctly configured (CHAIN event selected and counter enabled) This all means that our current implementation (which uses 64bit perf events) prevents us from emulating this overflow on the lower half. How to fix this? By implementing the above, to the letter. This largely results in code deletion, removing the notions of "counter pair", "chained counters", and "canonical counter". The code is further restructured to make the CHAIN handling similar to SWINC, as the two are now extremely similar in behaviour. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-3-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 322 ++++++++++---------------------------- include/kvm/arm_pmu.h | 2 - 2 files changed, 87 insertions(+), 237 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 0003c7d37533..57765be69bea 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,16 +15,14 @@ #include #include +#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) + DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); - -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); } +static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +{ + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); +} + static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) { struct kvm_pmu *pmu; @@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) } /** - * kvm_pmu_pmc_is_chained - determine if the pmc is chained - * @pmc: The PMU counter pointer - */ -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) -{ - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); - - return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - -/** - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter - * @select_idx: The counter index - */ -static bool kvm_pmu_idx_is_high_counter(u64 select_idx) -{ - return select_idx & 0x1; -} - -/** - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc - * @pmc: The PMU counter pointer - * - * When a pair of PMCs are chained together we use the low counter (canonical) - * to hold the underlying perf event. - */ -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - - return pmc; -} -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - else - return pmc + 1; -} - -/** - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain + * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer * @select_idx: The counter index */ -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 eventsel, reg; + u64 counter, reg, enabled, running; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - select_idx |= 0x1; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; - if (select_idx == ARMV8_PMU_CYCLE_IDX) - return false; - - reg = PMEVTYPER0_EL0 + select_idx; - eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); - - return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; -} - -/** - * kvm_pmu_get_pair_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @pmc: The PMU counter pointer - */ -static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, - struct kvm_pmc *pmc) -{ - u64 counter, counter_high, reg, enabled, running; - - if (kvm_pmu_pmc_is_chained(pmc)) { - pmc = kvm_pmu_get_canonical_pmc(pmc); - reg = PMEVCNTR0_EL0 + pmc->idx; - - counter = __vcpu_sys_reg(vcpu, reg); - counter_high = __vcpu_sys_reg(vcpu, reg + 1); - - counter = lower_32_bits(counter) | (counter_high << 32); - } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - counter = __vcpu_sys_reg(vcpu, reg); - } + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); /* * The real counter value is equal to the value of counter register plus @@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - return counter; -} - -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -{ - u64 counter; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(select_idx)) - counter = upper_32_bits(counter); - else if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -218,7 +130,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) */ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) { - pmc = kvm_pmu_get_canonical_pmc(pmc); if (pmc->perf_event) { perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); @@ -236,11 +147,10 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { u64 counter, reg, val; - pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) return; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { reg = PMCCNTR_EL0; @@ -252,9 +162,6 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) __vcpu_sys_reg(vcpu, reg) = val; - if (kvm_pmu_pmc_is_chained(pmc)) - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); - kvm_pmu_release_perf_event(pmc); } @@ -285,8 +192,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - - bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } /** @@ -340,12 +245,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ - if (pmc->perf_event) { + if (!pmc->perf_event) { + kvm_pmu_create_perf_event(vcpu, i); + } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) kvm_debug("fail to enable perf event\n"); @@ -375,11 +277,6 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ if (pmc->perf_event) perf_event_disable(pmc->perf_event); } @@ -484,6 +381,48 @@ static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) kvm_vcpu_kick(vcpu); } +/* + * Perform an increment on any of the counters described in @mask, + * generating the overflow if required, and propagate it as a chained + * event if possible. + */ +static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, + unsigned long mask, u32 event) +{ + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + u64 type, reg; + + /* Filter on event type */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= kvm_pmu_event_mask(vcpu->kvm); + if (type != event) + continue; + + /* Increment this counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* No overflow? move on */ + continue; + + /* Mark overflow */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + + if (kvm_pmu_counter_can_chain(vcpu, i)) + kvm_pmu_counter_increment(vcpu, BIT(i + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + } +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -514,6 +453,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + if (kvm_pmu_counter_can_chain(vcpu, idx)) + kvm_pmu_counter_increment(vcpu, BIT(idx + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -533,50 +476,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - int i; - - if (!kvm_vcpu_has_pmu(vcpu)) - return; - - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) - return; - - /* Weed out disabled counters */ - val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { - u64 type, reg; - - if (!(val & BIT(i))) - continue; - - /* PMSWINC only applies to ... SW_INC! */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); - type &= kvm_pmu_event_mask(vcpu->kvm); - if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) - continue; - - /* increment this even SW_INC counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - - if (reg) /* no overflow on the low part */ - continue; - - if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { - /* increment the high counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; - if (!reg) /* mark overflow on the high counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); - } else { - /* mark overflow on low counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - } - } + kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); } /** @@ -625,18 +525,11 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; u64 eventsel, counter, reg, data; - /* - * For chained counters the event type and filtering attributes are - * obtained from the low/even counter. We also use this counter to - * determine if the event is enabled/disabled. - */ - pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; data = __vcpu_sys_reg(vcpu, reg); @@ -647,8 +540,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) else eventsel = data & kvm_pmu_event_mask(vcpu->kvm); - /* Software increment event doesn't need to be backed by a perf event */ - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) + /* + * Neither SW increment nor chained events need to be backed + * by a perf event. + */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || + eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) return; /* @@ -670,31 +567,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, select_idx); - if (kvm_pmu_pmc_is_chained(pmc)) { - /** - * The initial sample period (overflow count) of an event. For - * chained counters we only support overflow interrupts on the - * high counter. - */ + /* + * If counting with a 64bit counter, advertise it to the perf + * code, carefully dealing with the initial sample period. + */ + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; attr.sample_period = (-counter) & GENMASK(63, 0); - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, - pmc + 1); } else { - /* The initial sample period (overflow count) of an event. */ - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - attr.sample_period = (-counter) & GENMASK(63, 0); - else - attr.sample_period = (-counter) & GENMASK(31, 0); - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, pmc); + attr.sample_period = (-counter) & GENMASK(31, 0); } + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, pmc); + if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); @@ -704,41 +592,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) pmc->perf_event = event; } -/** - * kvm_pmu_update_pmc_chained - update chained bitmap - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter - * - * Update the chained bitmap based on the event type written in the - * typer register and the enable state of the odd register. - */ -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; - bool new_state, old_state; - - old_state = kvm_pmu_pmc_is_chained(pmc); - new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && - kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); - - if (old_state == new_state) - return; - - canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); - kvm_pmu_stop_counter(vcpu, canonical_pmc); - if (new_state) { - /* - * During promotion from !chained to chained we must ensure - * the adjacent counter is stopped and its event destroyed - */ - kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); - set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - return; - } - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - /** * kvm_pmu_set_counter_event_type - set selected counter to monitor some event * @vcpu: The vcpu pointer @@ -766,7 +619,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_update_pmc_chained(vcpu, select_idx); kvm_pmu_create_perf_event(vcpu, select_idx); } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index c0b868ce6a8f..96b192139a23 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -11,7 +11,6 @@ #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) -#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) #ifdef CONFIG_HW_PERF_EVENTS @@ -29,7 +28,6 @@ struct kvm_pmu { struct irq_work overflow_work; struct kvm_pmu_events events; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; - DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); int irq_num; bool created; bool irq_level; From c423f01633eb948ba6f8c98872b4119685e007fb Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Sat, 24 Sep 2022 09:50:43 +0800 Subject: [PATCH 1704/4122] interconnect: qcom: icc-rpm: Remove redundant dev_err call devm_ioremap_resource() prints error message in itself. Remove the dev_err call to avoid redundant error message. Signed-off-by: Shang XiaoJing Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220924015043.25130-1-shangxiaojing@huawei.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c index 39e43b957599..ba6781f54ab7 100644 --- a/drivers/interconnect/qcom/icc-rpm.c +++ b/drivers/interconnect/qcom/icc-rpm.c @@ -477,11 +477,8 @@ int qnoc_probe(struct platform_device *pdev) } mmio = devm_ioremap_resource(dev, res); - - if (IS_ERR(mmio)) { - dev_err(dev, "Cannot ioremap interconnect bus resource\n"); + if (IS_ERR(mmio)) return PTR_ERR(mmio); - } qp->regmap = devm_regmap_init_mmio(dev, mmio, desc->regmap_cfg); if (IS_ERR(qp->regmap)) { From f24227a640344f894522045f74bb2decbdc4f55e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Oct 2022 11:48:46 -0400 Subject: [PATCH 1705/4122] interconnect: qcom: sc7180: fix dropped const of qcom_icc_bcm Pointers to struct qcom_icc_bcm are const, but the change was dropped during merge. Fixes: 016fca59f95f ("Merge branch 'icc-const' into icc-next") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221027154848.293523-1-krzysztof.kozlowski@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c index 35cd448efdfb..82d5e8a8c19e 100644 --- a/drivers/interconnect/qcom/sc7180.c +++ b/drivers/interconnect/qcom/sc7180.c @@ -369,7 +369,7 @@ static const struct qcom_icc_desc sc7180_gem_noc = { .num_bcms = ARRAY_SIZE(gem_noc_bcms), }; -static struct qcom_icc_bcm *mc_virt_bcms[] = { +static struct qcom_icc_bcm * const mc_virt_bcms[] = { &bcm_acv, &bcm_mc0, }; From 7870c7076aa07d9caaf53652d6b5a3cd74b1d157 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Oct 2022 11:48:47 -0400 Subject: [PATCH 1706/4122] interconnect: qcom: sc7180: drop double space Drop double white-space. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221027154848.293523-2-krzysztof.kozlowski@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c index 82d5e8a8c19e..ef4e13fb4983 100644 --- a/drivers/interconnect/qcom/sc7180.c +++ b/drivers/interconnect/qcom/sc7180.c @@ -443,7 +443,7 @@ static struct qcom_icc_node * const qup_virt_nodes[] = { [SLAVE_QUP_CORE_1] = &qup_core_slave_2, }; -static const struct qcom_icc_desc sc7180_qup_virt = { +static const struct qcom_icc_desc sc7180_qup_virt = { .nodes = qup_virt_nodes, .num_nodes = ARRAY_SIZE(qup_virt_nodes), .bcms = qup_virt_bcms, From c1c537cf30bc539d8f6fa4ac315a8def23fd4ae8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Oct 2022 11:48:48 -0400 Subject: [PATCH 1707/4122] interconnect: qcom: sc8180x: constify pointer to qcom_icc_node Pointers to struct qcom_icc_node are const. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221027154848.293523-3-krzysztof.kozlowski@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sc8180x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 8e32ca958824..0f515bf10bd7 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1889,7 +1889,7 @@ static struct qcom_icc_bcm * const qup_virt_bcms[] = { &bcm_qup0, }; -static struct qcom_icc_node *qup_virt_nodes[] = { +static struct qcom_icc_node * const qup_virt_nodes[] = { [MASTER_QUP_CORE_0] = &mas_qup_core_0, [MASTER_QUP_CORE_1] = &mas_qup_core_1, [MASTER_QUP_CORE_2] = &mas_qup_core_2, From acdd8a4e13a008a83c6da88bb53eecbecda9714c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:19 +0000 Subject: [PATCH 1708/4122] KVM: arm64: PMU: Always advertise the CHAIN event Even when the underlying HW doesn't offer the CHAIN event (which happens with QEMU), we can always support it as we're in control of the counter overflow. Always advertise the event via PMCEID0_EL0. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-4-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 57765be69bea..69b67ab3c4bf 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -701,6 +701,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!pmceid1) { val = read_sysreg(pmceid0_el0); + /* always support CHAIN */ + val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); base = 0; } else { val = read_sysreg(pmceid1_el0); From c82d28cbf1d4f9fe174041b4485c635cb970afa7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:20 +0000 Subject: [PATCH 1709/4122] KVM: arm64: PMU: Distinguish between 64bit counter and 64bit overflow The PMU architecture makes a subtle difference between a 64bit counter and a counter that has a 64bit overflow. This is for example the case of the cycle counter, which can generate an overflow on a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being done on 64 bits. Use this distinction in the few cases where it matters in the code, as we will reuse this with PMUv3p5 long counters. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-5-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 43 ++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 69b67ab3c4bf..d050143326b5 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) * @select_idx: The counter index */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX); +} + +static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { return (select_idx == ARMV8_PMU_CYCLE_IDX && __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); @@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); } static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) @@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) counter = lower_32_bits(counter); return counter; @@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, } } +/* Compute the sample period for a given counter value */ +static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +{ + u64 val; + + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + val = -(counter & GENMASK(31, 0)); + else + val = (-counter) & GENMASK(63, 0); + } else { + val = (-counter) & GENMASK(31, 0); + } + + return val; +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -442,10 +465,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = -(local64_read(&perf_event->count)); - - if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - period &= GENMASK(31, 0); + period = compute_period(vcpu, idx, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -571,14 +591,13 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) /* * If counting with a 64bit counter, advertise it to the perf - * code, carefully dealing with the initial sample period. + * code, carefully dealing with the initial sample period + * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = (-counter) & GENMASK(63, 0); - } else { - attr.sample_period = (-counter) & GENMASK(31, 0); - } + + attr.sample_period = compute_period(vcpu, select_idx, counter); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); From 001d85bd6c039d3662a4f33a5d212ef3e0438b27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:21 +0000 Subject: [PATCH 1710/4122] KVM: arm64: PMU: Narrow the overflow checking when required For 64bit counters that overflow on a 32bit boundary, make sure we only check the bottom 32bit to generate a CHAIN event. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-6-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d050143326b5..9e6bc7edc4de 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -417,7 +417,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (reg) /* No overflow? move on */ + /* No overflow? move on */ + if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ From 0f1e172b54f7574ca6aa46b851b332896add955f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:22 +0000 Subject: [PATCH 1711/4122] KVM: arm64: PMU: Only narrow counters that are not 64bit wide The current PMU emulation sometimes narrows counters to 32bit if the counter isn't the cycle counter. As this is going to change with PMUv3p5 where the counters are all 64bit, fix the couple of cases where this happens unconditionally. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-7-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 9e6bc7edc4de..1fab889dbc74 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -151,20 +151,17 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg, val; + u64 reg, val; if (!pmc->perf_event) return; - counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) reg = PMCCNTR_EL0; - val = counter; - } else { + else reg = PMEVCNTR0_EL0 + pmc->idx; - val = lower_32_bits(counter); - } __vcpu_sys_reg(vcpu, reg) = val; @@ -414,7 +411,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); + if (!kvm_pmu_idx_is_64bit(vcpu, i)) + reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; /* No overflow? move on */ From 0cb9c3c87a9d3287eaf353936e6846d885102439 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:23 +0000 Subject: [PATCH 1712/4122] KVM: arm64: PMU: Add counter_index_to_*reg() helpers In order to reduce the boilerplate code, add two helpers returning the counter register index (resp. the event register) in the vcpu register file from the counter index. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-8-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 1fab889dbc74..faab0f57a45d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -77,6 +77,16 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) return container_of(vcpu_arch, struct kvm_vcpu, arch); } +static u32 counter_index_to_reg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; +} + +static u32 counter_index_to_evtreg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; +} + /** * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer @@ -91,8 +101,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (!kvm_vcpu_has_pmu(vcpu)) return 0; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(select_idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -122,8 +131,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + reg = counter_index_to_reg(select_idx); __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); /* Recreate the perf event to reflect the updated sample_period */ @@ -158,10 +166,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) - reg = PMCCNTR_EL0; - else - reg = PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = val; @@ -404,16 +409,16 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, u64 type, reg; /* Filter on event type */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); type &= kvm_pmu_event_mask(vcpu->kvm); if (type != event) continue; /* Increment this counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmu_idx_is_64bit(vcpu, i)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) @@ -549,8 +554,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) struct perf_event_attr attr; u64 eventsel, counter, reg, data; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + reg = counter_index_to_evtreg(select_idx); data = __vcpu_sys_reg(vcpu, reg); kvm_pmu_stop_counter(vcpu, pmc); @@ -632,8 +636,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + reg = counter_index_to_evtreg(select_idx); __vcpu_sys_reg(vcpu, reg) = data & mask; From 9917264d74d9063341968a8e071266358496777b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:24 +0000 Subject: [PATCH 1713/4122] KVM: arm64: PMU: Simplify setting a counter to a specific value kvm_pmu_set_counter_value() is pretty odd, as it tries to update the counter value while taking into account the value that is currently held by the running perf counter. This is not only complicated, this is quite wrong. Nowhere in the architecture is it said that the counter would be offset by something that is pending. The counter should be updated with the value set by SW, and start counting from there if required. Remove the odd computation and just assign the provided value after having released the perf event (which is then restarted). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-9-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index faab0f57a45d..ea0c8411641f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -23,6 +23,7 @@ static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -131,8 +132,10 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + reg = counter_index_to_reg(select_idx); - __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); From c4b33d28ea51c7d194b19a41c96a4f973cc0a280 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 9 Nov 2022 10:59:05 -0800 Subject: [PATCH 1714/4122] KVM: x86/mmu: Split huge pages mapped by the TDP MMU on fault Now that the TDP MMU has a mechanism to split huge pages, use it in the fault path when a huge page needs to be replaced with a mapping at a lower level. This change reduces the negative performance impact of NX HugePages. Prior to this change if a vCPU executed from a huge page and NX HugePages was enabled, the vCPU would take a fault, zap the huge page, and mapping the faulting address at 4KiB with execute permissions enabled. The rest of the memory would be left *unmapped* and have to be faulted back in by the guest upon access (read, write, or execute). If guest is backed by 1GiB, a single execute instruction can zap an entire GiB of its physical address space. For example, it can take a VM longer to execute from its memory than to populate that memory in the first place: $ ./execute_perf_test -s anonymous_hugetlb_1gb -v96 Populating memory : 2.748378795s Executing from memory : 2.899670885s With this change, such faults split the huge page instead of zapping it, which avoids the non-present faults on the rest of the huge page: $ ./execute_perf_test -s anonymous_hugetlb_1gb -v96 Populating memory : 2.729544474s Executing from memory : 0.111965688s <--- This change also reduces the performance impact of dirty logging when eager_page_split=N. eager_page_split=N (abbreviated "eps=N" below) can be desirable for read-heavy workloads, as it avoids allocating memory to split huge pages that are never written and avoids increasing the TLB miss cost on reads of those pages. | Config: ept=Y, tdp_mmu=Y, 5% writes | | Iteration 1 dirty memory time | | --------------------------------------------- | vCPU Count | eps=N (Before) | eps=N (After) | eps=Y | ------------ | -------------- | ------------- | ------------ | 2 | 0.332305091s | 0.019615027s | 0.006108211s | 4 | 0.353096020s | 0.019452131s | 0.006214670s | 8 | 0.453938562s | 0.019748246s | 0.006610997s | 16 | 0.719095024s | 0.019972171s | 0.007757889s | 32 | 1.698727124s | 0.021361615s | 0.012274432s | 64 | 2.630673582s | 0.031122014s | 0.016994683s | 96 | 3.016535213s | 0.062608739s | 0.044760838s | Eager page splitting remains beneficial for write-heavy workloads, but the gap is now reduced. | Config: ept=Y, tdp_mmu=Y, 100% writes | | Iteration 1 dirty memory time | | --------------------------------------------- | vCPU Count | eps=N (Before) | eps=N (After) | eps=Y | ------------ | -------------- | ------------- | ------------ | 2 | 0.317710329s | 0.296204596s | 0.058689782s | 4 | 0.337102375s | 0.299841017s | 0.060343076s | 8 | 0.386025681s | 0.297274460s | 0.060399702s | 16 | 0.791462524s | 0.298942578s | 0.062508699s | 32 | 1.719646014s | 0.313101996s | 0.075984855s | 64 | 2.527973150s | 0.455779206s | 0.079789363s | 96 | 2.681123208s | 0.673778787s | 0.165386739s | Further study is needed to determine if the remaining gap is acceptable for customer workloads or if eager_page_split=N still requires a-priori knowledge of the VM workload, especially when considering these costs extrapolated out to large VMs with e.g. 416 vCPUs and 12TB RAM. Signed-off-by: David Matlack Reviewed-by: Mingwei Zhang Message-Id: <20221109185905.486172-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 79 ++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 4e5b3ae824c1..e08596775427 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1146,6 +1146,9 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, return 0; } +static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, + struct kvm_mmu_page *sp, bool shared); + /* * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing * page tables and SPTEs to translate the faulting guest physical address. @@ -1171,49 +1174,42 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (iter.level == fault->goal_level) break; - /* - * If there is an SPTE mapping a large page at a higher level - * than the target, that SPTE must be cleared and replaced - * with a non-leaf SPTE. - */ + /* Step down into the lower level page table if it exists. */ if (is_shadow_present_pte(iter.old_spte) && - is_large_pte(iter.old_spte)) { - if (tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter)) - break; + !is_large_pte(iter.old_spte)) + continue; - /* - * The iter must explicitly re-read the spte here - * because the new value informs the !present - * path below. - */ - iter.old_spte = kvm_tdp_mmu_read_spte(iter.sptep); + /* + * If SPTE has been frozen by another thread, just give up and + * retry, avoiding unnecessary page table allocation and free. + */ + if (is_removed_spte(iter.old_spte)) + break; + + /* + * The SPTE is either non-present or points to a huge page that + * needs to be split. + */ + sp = tdp_mmu_alloc_sp(vcpu); + tdp_mmu_init_child_sp(sp, &iter); + + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; + + if (is_shadow_present_pte(iter.old_spte)) + ret = tdp_mmu_split_huge_page(kvm, &iter, sp, true); + else + ret = tdp_mmu_link_sp(kvm, &iter, sp, true); + + if (ret) { + tdp_mmu_free_sp(sp); + break; } - if (!is_shadow_present_pte(iter.old_spte)) { - /* - * If SPTE has been frozen by another thread, just - * give up and retry, avoiding unnecessary page table - * allocation and free. - */ - if (is_removed_spte(iter.old_spte)) - break; - - sp = tdp_mmu_alloc_sp(vcpu); - tdp_mmu_init_child_sp(sp, &iter); - - sp->nx_huge_page_disallowed = fault->huge_page_disallowed; - - if (tdp_mmu_link_sp(kvm, &iter, sp, true)) { - tdp_mmu_free_sp(sp); - break; - } - - if (fault->huge_page_disallowed && - fault->req_level >= iter.level) { - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - track_possible_nx_huge_page(kvm, sp); - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - } + if (fault->huge_page_disallowed && + fault->req_level >= iter.level) { + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + track_possible_nx_huge_page(kvm, sp); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } } @@ -1477,6 +1473,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, return sp; } +/* Note, the caller is responsible for initializing @sp. */ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared) { @@ -1484,8 +1481,6 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, const int level = iter->level; int ret, i; - tdp_mmu_init_child_sp(sp, iter); - /* * No need for atomics when writing to sp->spt since the page table has * not been linked in yet and thus is not reachable from any other CPU. @@ -1561,6 +1556,8 @@ retry: continue; } + tdp_mmu_init_child_sp(sp, &iter); + if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared)) goto retry; From 63d28a25e04cb48e6bd15141506645ac99d9f8b2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2022 11:05:51 -0500 Subject: [PATCH 1715/4122] KVM: x86/mmu: simplify kvm_tdp_mmu_map flow when guest has to retry A removed SPTE is never present, hence the "if" in kvm_tdp_mmu_map only fails in the exact same conditions that the earlier loop tested in order to issue a "break". So, instead of checking twice the condition (upper level SPTEs could not be created or was frozen), just exit the loop with a goto---the usual poor-man C replacement for RAII early returns. While at it, do not use the "ret" variable for return values of functions that do not return a RET_PF_* enum. This is clearer and also makes it possible to initialize ret to RET_PF_RETRY. Suggested-by: Robert Hoo Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 40 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index e08596775427..771210ce5181 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1159,7 +1159,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; - int ret; + int ret = RET_PF_RETRY; kvm_mmu_hugepage_adjust(vcpu, fault); @@ -1168,23 +1168,25 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) rcu_read_lock(); tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) { + int r; + if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); if (iter.level == fault->goal_level) break; - /* Step down into the lower level page table if it exists. */ - if (is_shadow_present_pte(iter.old_spte) && - !is_large_pte(iter.old_spte)) - continue; - /* * If SPTE has been frozen by another thread, just give up and * retry, avoiding unnecessary page table allocation and free. */ if (is_removed_spte(iter.old_spte)) - break; + goto retry; + + /* Step down into the lower level page table if it exists. */ + if (is_shadow_present_pte(iter.old_spte) && + !is_large_pte(iter.old_spte)) + continue; /* * The SPTE is either non-present or points to a huge page that @@ -1196,13 +1198,17 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) sp->nx_huge_page_disallowed = fault->huge_page_disallowed; if (is_shadow_present_pte(iter.old_spte)) - ret = tdp_mmu_split_huge_page(kvm, &iter, sp, true); + r = tdp_mmu_split_huge_page(kvm, &iter, sp, true); else - ret = tdp_mmu_link_sp(kvm, &iter, sp, true); + r = tdp_mmu_link_sp(kvm, &iter, sp, true); - if (ret) { + /* + * Also force the guest to retry the access if the upper level SPTEs + * aren't in place. + */ + if (r) { tdp_mmu_free_sp(sp); - break; + goto retry; } if (fault->huge_page_disallowed && @@ -1213,18 +1219,10 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } } - /* - * Force the guest to retry the access if the upper level SPTEs aren't - * in place, or if the target leaf SPTE is frozen by another CPU. - */ - if (iter.level != fault->goal_level || is_removed_spte(iter.old_spte)) { - rcu_read_unlock(); - return RET_PF_RETRY; - } - ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter); - rcu_read_unlock(); +retry: + rcu_read_unlock(); return ret; } From eb298605705a5c6b3d61c754e3c80ac8ef8e8724 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 3 Nov 2022 13:44:21 -0700 Subject: [PATCH 1716/4122] KVM: x86/mmu: Do not recover dirty-tracked NX Huge Pages Do not recover (i.e. zap) an NX Huge Page that is being dirty tracked, as it will just be faulted back in at the same 4KiB granularity when accessed by a vCPU. This may need to be changed if KVM ever supports 2MiB (or larger) dirty tracking granularity, or faulting huge pages during dirty tracking for reads/executes. However for now, these zaps are entirely wasteful. In order to check if this commit increases the CPU usage of the NX recovery worker thread I used a modified version of execute_perf_test [1] that supports splitting guest memory into multiple slots and reports /proc/pid/schedstat:se.sum_exec_runtime for the NX recovery worker just before tearing down the VM. The goal was to force a large number of NX Huge Page recoveries and see if the recovery worker used any more CPU. Test Setup: echo 1000 > /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms echo 10 > /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio Test Command: ./execute_perf_test -v64 -s anonymous_hugetlb_1gb -x 16 -o | kvm-nx-lpage-re:se.sum_exec_runtime | | ---------------------------------------- | Run | Before | After | ------- | ------------------ | ------------------- | 1 | 730.084105 | 724.375314 | 2 | 728.751339 | 740.581988 | 3 | 736.264720 | 757.078163 | Comparing the median results, this commit results in about a 1% increase CPU usage of the NX recovery worker when testing a VM with 16 slots. However, the effect is negligible with the default halving time of NX pages, which is 1 hour rather than 10 seconds given by period_ms = 1000, ratio = 10. [1] https://lore.kernel.org/kvm/20221019234050.3919566-2-dmatlack@google.com/ Signed-off-by: David Matlack Message-Id: <20221103204421.1146958-1-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 93c389eaf471..cfff74685a25 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6841,6 +6841,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel static void kvm_recover_nx_huge_pages(struct kvm *kvm) { unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; + struct kvm_memory_slot *slot; int rcu_idx; struct kvm_mmu_page *sp; unsigned int ratio; @@ -6875,7 +6876,21 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); - if (is_tdp_mmu_page(sp)) + WARN_ON_ONCE(!sp->role.direct); + + slot = gfn_to_memslot(kvm, sp->gfn); + WARN_ON_ONCE(!slot); + + /* + * Unaccount and do not attempt to recover any NX Huge Pages + * that are being dirty tracked, as they would just be faulted + * back in as 4KiB pages. The NX Huge Pages in this slot will be + * recovered, along with all the other huge pages in the slot, + * when dirty logging is disabled. + */ + if (slot && kvm_slot_dirty_track_enabled(slot)) + unaccount_nx_huge_page(kvm, sp); + else if (is_tdp_mmu_page(sp)) flush |= kvm_tdp_mmu_zap_sp(kvm, sp); else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); From effae0e3d9e1139d583e9b5d050f4f948825b8a3 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 15 Nov 2022 10:51:33 +0000 Subject: [PATCH 1717/4122] riscv: Kconfig: Enable cpufreq kconfig menu Enable cpufreq kconfig menu for RISC-V. Signed-off-by: Lad Prabhakar Reviewed-by: Conor Dooley Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221115105135.1180490-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7cd981f96f48..3b41165a8b10 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -684,6 +684,8 @@ menu "CPU Power Management" source "drivers/cpuidle/Kconfig" +source "drivers/cpufreq/Kconfig" + endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" From 729c287e9f7481d630b69c73960e2ac990cd04fc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Nov 2022 10:36:20 -0800 Subject: [PATCH 1718/4122] scsi: lpfc: Remove redundant pointer 'lp' Pointer lp is being initialized and incremented but the result is never read. The pointer is redundant and can be removed. Once lp is removed, pcmd is not longer used. So remove pcmd as well Signed-off-by: Colin Ian King Signed-off-by: James Smart Link: https://lore.kernel.org/r/20221108183620.93978-1-jsmart2021@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 2b03210264bb..9326340d4226 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -9172,15 +9172,10 @@ static int lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, struct lpfc_nodelist *ndlp) { - struct lpfc_dmabuf *pcmd; - uint32_t *lp; uint32_t did; did = get_job_els_rsp64_did(vport->phba, cmdiocb); - pcmd = cmdiocb->cmd_dmabuf; - lp = (uint32_t *)pcmd->virt; - lp++; /* FARP-RSP received from DID */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0600 FARP-RSP received from DID x%x\n", did); From b27ac2faa2fc0b2677cf1cbd270af734a1f5fd95 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 8 Nov 2022 13:21:38 -0600 Subject: [PATCH 1719/4122] scsi: smartpqi: Convert to host_tagset Add support for host_tagset. Also move the reserved command slots to the end of the pool to eliminate an addition operation for every SCSI request. This patch was originally authored by Hannes Reinecke here: Link: https://lore.kernel.org/linux-block/20191126131009.71726-8-hare@suse.de/ But we NAKed this patch because we wanted to fully test multipath failover operations. Suggested-by: Hannes Reinecke Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mahesh Rajashekhara Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793529811.322537.3294617845448383948.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi.h | 3 -- drivers/scsi/smartpqi/smartpqi_init.c | 68 +++++++++++++++++---------- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index e550b12e525a..8cdf4d2476dd 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1307,7 +1307,6 @@ struct pqi_ctrl_info { dma_addr_t error_buffer_dma_handle; size_t sg_chain_buffer_length; unsigned int num_queue_groups; - u16 max_hw_queue_index; u16 num_elements_per_iq; u16 num_elements_per_oq; u16 max_inbound_iu_length_per_firmware; @@ -1369,8 +1368,6 @@ struct pqi_ctrl_info { u64 sas_address; struct pqi_io_request *io_request_pool; - u16 next_io_request_slot; - struct pqi_event events[PQI_NUM_SUPPORTED_EVENTS]; struct work_struct event_work; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index b971fbe3b3a1..651dca535b3b 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -678,23 +678,36 @@ static inline void pqi_reinit_io_request(struct pqi_io_request *io_request) io_request->raid_bypass = false; } -static struct pqi_io_request *pqi_alloc_io_request( - struct pqi_ctrl_info *ctrl_info) +static inline struct pqi_io_request *pqi_alloc_io_request(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) { struct pqi_io_request *io_request; - u16 i = ctrl_info->next_io_request_slot; /* benignly racy */ + u16 i; - while (1) { + if (scmd) { /* SML I/O request */ + u32 blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + + i = blk_mq_unique_tag_to_tag(blk_tag); io_request = &ctrl_info->io_request_pool[i]; - if (atomic_inc_return(&io_request->refcount) == 1) - break; - atomic_dec(&io_request->refcount); - i = (i + 1) % ctrl_info->max_io_slots; + if (atomic_inc_return(&io_request->refcount) > 1) { + atomic_dec(&io_request->refcount); + return NULL; + } + } else { /* IOCTL or driver internal request */ + /* + * benignly racy - may have to wait for an open slot. + * command slot range is scsi_ml_can_queue - + * [scsi_ml_can_queue + (PQI_RESERVED_IO_SLOTS - 1)] + */ + i = 0; + while (1) { + io_request = &ctrl_info->io_request_pool[ctrl_info->scsi_ml_can_queue + i]; + if (atomic_inc_return(&io_request->refcount) == 1) + break; + atomic_dec(&io_request->refcount); + i = (i + 1) % PQI_RESERVED_IO_SLOTS; + } } - /* benignly racy */ - ctrl_info->next_io_request_slot = (i + 1) % ctrl_info->max_io_slots; - pqi_reinit_io_request(io_request); return io_request; @@ -4586,7 +4599,7 @@ static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info, goto out; } - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); put_unaligned_le16(io_request->index, &(((struct pqi_raid_path_request *)request)->request_id)); @@ -5233,7 +5246,6 @@ static void pqi_calculate_queue_resources(struct pqi_ctrl_info *ctrl_info) } ctrl_info->num_queue_groups = num_queue_groups; - ctrl_info->max_hw_queue_index = num_queue_groups - 1; /* * Make sure that the max. inbound IU length is an even multiple @@ -5567,7 +5579,9 @@ static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, { struct pqi_io_request *io_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request, device, scmd, queue_group); @@ -5671,7 +5685,9 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device; device = scmd->device->hostdata; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = raid_bypass; @@ -5743,7 +5759,10 @@ static int pqi_aio_submit_r1_write_io(struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request; struct pqi_aio_r1_path_request *r1_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; + io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = true; @@ -5801,7 +5820,9 @@ static int pqi_aio_submit_r56_write_io(struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request; struct pqi_aio_r56_path_request *r56_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = true; @@ -5860,13 +5881,10 @@ static int pqi_aio_submit_r56_write_io(struct pqi_ctrl_info *ctrl_info, static inline u16 pqi_get_hw_queue(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) { - u16 hw_queue; - - hw_queue = blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scsi_cmd_to_rq(scmd))); - if (hw_queue > ctrl_info->max_hw_queue_index) - hw_queue = 0; - - return hw_queue; + /* + * We are setting host_tagset = 1 during init. + */ + return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scsi_cmd_to_rq(scmd))); } static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd) @@ -6268,7 +6286,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd struct pqi_scsi_dev *device; device = scmd->device->hostdata; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); io_request->io_complete_callback = pqi_lun_reset_complete; io_request->context = &wait; From 0b93cf2a9097b1c3d75642ef878ba87f15f03043 Mon Sep 17 00:00:00 2001 From: Mike McGowen Date: Tue, 8 Nov 2022 13:21:43 -0600 Subject: [PATCH 1720/4122] scsi: smartpqi: Add new controller PCI IDs All PCI ID entries in Hex. Add PCI IDs for ByteDance controllers: VID / DID / SVID / SDID ---- ---- ---- ---- ByteHBA JGH43024-8 9005 / 028f / 1e93 / 1000 ByteHBA JGH43034-8 9005 / 028f / 1e93 / 1001 ByteHBA JGH44014-8 9005 / 028f / 1e93 / 1002 Add PCI IDs for new Inspur controllers: VID / DID / SVID / SDID ---- ---- ---- ---- INSPUR RT0800M7E 9005 / 028f / 1bd4 / 0086 INSPUR RT0800M7H 9005 / 028f / 1bd4 / 0087 INSPUR RT0804M7R 9005 / 028f / 1bd4 / 0088 INSPUR RT0808M7R 9005 / 028f / 1bd4 / 0089 Add PCI IDs for new FAB A controllers: VID / DID / SVID / SDID ---- ---- ---- ---- Adaptec SmartRAID 3254-16e /e 9005 / 028f / 9005 / 1475 Adaptec HBA 1200-16e 9005 / 028f / 9005 / 14c3 Adaptec HBA 1200-8e 9005 / 028f / 9005 / 14c4 Add H3C controller PCI IDs: VID / DID / SVID / SDID ---- ---- ---- ---- H3C H4508-Mf-8i 9005 / 028f / 193d / 110b Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Signed-off-by: Mike McGowen Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793530327.322537.6056884426657539311.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 651dca535b3b..6cda12078130 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -9320,6 +9320,10 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x193d, 0x1109) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x193d, 0x110b) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x193d, 0x8460) @@ -9420,6 +9424,22 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1bd4, 0x0072) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1bd4, 0x0086) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1bd4, 0x0087) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1bd4, 0x0088) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1bd4, 0x0089) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x19e5, 0xd227) @@ -9668,6 +9688,10 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_ADAPTEC2, 0x1474) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_VENDOR_ID_ADAPTEC2, 0x1475) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_ADAPTEC2, 0x1480) @@ -9724,6 +9748,14 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_ADAPTEC2, 0x14c2) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_VENDOR_ID_ADAPTEC2, 0x14c3) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_VENDOR_ID_ADAPTEC2, 0x14c4) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_ADAPTEC2, 0x14d0) @@ -9960,6 +9992,18 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_LENOVO, 0x0623) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1e93, 0x1000) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1e93, 0x1001) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1e93, 0x1002) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_ANY_ID, PCI_ANY_ID) From 7c56850637ea820a89ce2f52fca66c5ae12d0f0a Mon Sep 17 00:00:00 2001 From: Kevin Barnett Date: Tue, 8 Nov 2022 13:21:48 -0600 Subject: [PATCH 1721/4122] scsi: smartpqi: Correct max LUN number Correct maximum LUN number for multi-actuator devices. When multi-actuator support was added to smartpqi, the maximum number of LUNs supported for multi-actuator devices was supposed to be changed from unlimited to 256, but the setting was inadvertently left at unlimited. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Signed-off-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793530842.322537.816949081443241857.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 6cda12078130..33059355f9cd 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -7423,7 +7423,6 @@ static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info) shost->max_channel = PQI_MAX_BUS; shost->max_cmd_len = MAX_COMMAND_SIZE; shost->max_lun = PQI_MAX_LUNS_PER_DEVICE; - shost->max_lun = ~0; shost->max_id = ~0; shost->max_sectors = ctrl_info->max_sectors; shost->can_queue = ctrl_info->scsi_ml_can_queue; From cbe42ac15698a23b204a9b5c66eb0067b22cbd42 Mon Sep 17 00:00:00 2001 From: Kevin Barnett Date: Tue, 8 Nov 2022 13:21:53 -0600 Subject: [PATCH 1722/4122] scsi: smartpqi: Change sysfs raid_level attribute to N/A for controllers Change the sysfs raid_level attribute from "RAID-0" to N/A. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mike McGowan Signed-off-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793531357.322537.8639138137605612362.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 33059355f9cd..20fc6c8044ac 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -7255,7 +7255,7 @@ static ssize_t pqi_raid_level_show(struct device *dev, return -ENODEV; } - if (pqi_is_logical_device(device)) + if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK) raid_level = pqi_raid_level_to_string(device->raid_level); else raid_level = "N/A"; From cc9befcbbb5ebce77726f938508700d913530035 Mon Sep 17 00:00:00 2001 From: Kumar Meiyappan Date: Tue, 8 Nov 2022 13:21:58 -0600 Subject: [PATCH 1723/4122] scsi: smartpqi: Correct device removal for multi-actuator devices Correct device count for multi-actuator drives which can cause kernel panics. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mike Mcgowan Reviewed-by: Kevin Barnett Signed-off-by: Kumar Meiyappan Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793531872.322537.9003385780343419275.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi.h | 2 +- drivers/scsi/smartpqi/smartpqi_init.c | 33 +++++++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index 8cdf4d2476dd..af27bb0f3133 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1130,7 +1130,7 @@ struct pqi_scsi_dev { u8 phy_id; u8 ncq_prio_enable; u8 ncq_prio_support; - u8 multi_lun_device_lun_count; + u8 lun_count; bool raid_bypass_configured; /* RAID bypass configured */ bool raid_bypass_enabled; /* RAID bypass enabled */ u32 next_bypass_group[RAID_MAP_MAX_DATA_DISKS_PER_ROW]; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 20fc6c8044ac..e82f4de46ea7 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -1623,9 +1623,7 @@ static int pqi_get_physical_device_info(struct pqi_ctrl_info *ctrl_info, &id_phys->alternate_paths_phys_connector, sizeof(device->phys_connector)); device->bay = id_phys->phys_bay_in_box; - device->multi_lun_device_lun_count = id_phys->multi_lun_device_lun_count; - if (!device->multi_lun_device_lun_count) - device->multi_lun_device_lun_count = 1; + device->lun_count = id_phys->multi_lun_device_lun_count; if ((id_phys->even_more_flags & PQI_DEVICE_PHY_MAP_SUPPORTED) && id_phys->phy_count) device->phy_id = @@ -1759,7 +1757,7 @@ out: return offline; } -static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info, +static int pqi_get_device_info_phys_logical(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, struct bmic_identify_physical_device *id_phys) { @@ -1776,6 +1774,20 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info, return rc; } +static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device, + struct bmic_identify_physical_device *id_phys) +{ + int rc; + + rc = pqi_get_device_info_phys_logical(ctrl_info, device, id_phys); + + if (rc == 0 && device->lun_count == 0) + device->lun_count = 1; + + return rc; +} + static void pqi_show_volume_status(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) { @@ -1910,7 +1922,7 @@ static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi int rc; int lun; - for (lun = 0; lun < device->multi_lun_device_lun_count; lun++) { + for (lun = 0; lun < device->lun_count; lun++) { rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS); if (rc) @@ -2089,6 +2101,7 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, existing_device->sas_address = new_device->sas_address; existing_device->queue_depth = new_device->queue_depth; existing_device->device_offline = false; + existing_device->lun_count = new_device->lun_count; if (pqi_is_logical_device(existing_device)) { existing_device->is_external_raid_device = new_device->is_external_raid_device; @@ -2121,10 +2134,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, existing_device->phy_connected_dev_type = new_device->phy_connected_dev_type; memcpy(existing_device->box, new_device->box, sizeof(existing_device->box)); memcpy(existing_device->phys_connector, new_device->phys_connector, sizeof(existing_device->phys_connector)); - - existing_device->multi_lun_device_lun_count = new_device->multi_lun_device_lun_count; - if (existing_device->multi_lun_device_lun_count == 0) - existing_device->multi_lun_device_lun_count = 1; } } @@ -6502,6 +6511,12 @@ static void pqi_slave_destroy(struct scsi_device *sdev) return; } + device->lun_count--; + if (device->lun_count > 0) { + mutex_unlock(&ctrl_info->scan_mutex); + return; + } + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); list_del(&device->scsi_device_list_entry); spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); From 14063fb625c4541f48ff0dc7ae005b0d5a159c3f Mon Sep 17 00:00:00 2001 From: Gilbert Wu Date: Tue, 8 Nov 2022 13:22:03 -0600 Subject: [PATCH 1724/4122] scsi: smartpqi: Add controller cache flush during rmmod Add in a call to flush the controller cache during driver removal. Reviewed-by: Scott Benesh Reviewed-by: Mike Mcgowan Reviewed-by: Kevin Barnett Signed-off-by: Gilbert Wu Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793532388.322537.878022136408270892.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index e82f4de46ea7..e9c924ac1bb2 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -9040,6 +9040,7 @@ static void pqi_pci_remove(struct pci_dev *pci_dev) { struct pqi_ctrl_info *ctrl_info; u16 vendor_id; + int rc; ctrl_info = pci_get_drvdata(pci_dev); if (!ctrl_info) @@ -9051,6 +9052,13 @@ static void pqi_pci_remove(struct pci_dev *pci_dev) else ctrl_info->ctrl_removal_state = PQI_CTRL_GRACEFUL_REMOVAL; + if (ctrl_info->ctrl_removal_state == PQI_CTRL_GRACEFUL_REMOVAL) { + rc = pqi_flush_cache(ctrl_info, RESTART); + if (rc) + dev_err(&pci_dev->dev, + "unable to flush controller cache during remove\n"); + } + pqi_remove_ctrl(ctrl_info); } From 921800a1deeaa832e4303e9335a31b4234c41ac1 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 8 Nov 2022 13:22:09 -0600 Subject: [PATCH 1725/4122] scsi: smartpqi: Initialize feature section info Initialize features to 0 before processing. Reviewed-by: Scott Benesh Reviewed-by: Mike Mcgowan Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793532902.322537.2436075977808555348.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index e9c924ac1bb2..fb4a33decde1 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -8004,7 +8004,7 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) struct pqi_config_table *config_table; struct pqi_config_table_section_header *section; struct pqi_config_table_section_info section_info; - struct pqi_config_table_section_info feature_section_info; + struct pqi_config_table_section_info feature_section_info = {0}; table_length = ctrl_info->config_table_length; if (table_length == 0) From 2ae45329a956ff86ff8bec36463b6f49d2ca9bea Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 8 Nov 2022 13:22:14 -0600 Subject: [PATCH 1726/4122] scsi: smartpqi: Change version to 2.1.20-035 Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Gerry Morong Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/166793533417.322537.3074216622272955440.stgit@brunhilda Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index fb4a33decde1..d0446d4d4465 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -33,11 +33,11 @@ #define BUILD_TIMESTAMP #endif -#define DRIVER_VERSION "2.1.18-045" +#define DRIVER_VERSION "2.1.20-035" #define DRIVER_MAJOR 2 #define DRIVER_MINOR 1 -#define DRIVER_RELEASE 18 -#define DRIVER_REVISION 45 +#define DRIVER_RELEASE 20 +#define DRIVER_REVISION 35 #define DRIVER_NAME "Microchip SmartPQI Driver (v" \ DRIVER_VERSION BUILD_TIMESTAMP ")" From 216e179724c1d9f57a8ababf8bd7aaabef67f01b Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 11 Nov 2022 02:05:25 -0800 Subject: [PATCH 1727/4122] scsi: scsi_debug: Fix a warning in resp_write_scat() As 'lbdof_blen' is coming from user, if the size in kzalloc() is >= MAX_ORDER then we hit a warning. Call trace: sg_ioctl sg_ioctl_common scsi_ioctl sg_scsi_ioctl blk_execute_rq blk_mq_sched_insert_request blk_mq_run_hw_queue __blk_mq_delay_run_hw_queue __blk_mq_run_hw_queue blk_mq_sched_dispatch_requests __blk_mq_sched_dispatch_requests blk_mq_dispatch_rq_list scsi_queue_rq scsi_dispatch_cmd scsi_debug_queuecommand schedule_resp resp_write_scat If you try to allocate a memory larger than(>=) MAX_ORDER, then kmalloc() will definitely fail. It creates a stack trace and messes up dmesg. The user controls the size here so if they specify a too large size it will fail. Add __GFP_NOWARN in order to avoid too large allocation warning. This is detected by static analysis using smatch. Fixes: 481b5e5c7949 ("scsi: scsi_debug: add resp_write_scat function") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20221111100526.1790533-1-harshit.m.mogalapalli@oracle.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 697fc57bc711..273224d29ce9 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -3778,7 +3778,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); return illegal_condition_result; } - lrdp = kzalloc(lbdof_blen, GFP_ATOMIC); + lrdp = kzalloc(lbdof_blen, GFP_ATOMIC | __GFP_NOWARN); if (lrdp == NULL) return SCSI_MLQUEUE_HOST_BUSY; if (sdebug_verbose) From b29e91385ce2d3aae70906f80f517f9b93d97a7b Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 11 Nov 2022 15:43:10 +0800 Subject: [PATCH 1728/4122] scsi: lpfc: Use memset_startat() helper Use memset_startat() helper to simplify the code, no functional changes in this patch. Signed-off-by: Xiu Jianfeng Link: https://lore.kernel.org/r/20221111074310.132125-1-xiujianfeng@huawei.com Reviewed-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d25afc9dde14..35c9404d5e49 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1373,7 +1373,6 @@ static void __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) { struct lpfc_sglq *sglq; - size_t start_clean = offsetof(struct lpfc_iocbq, wqe); unsigned long iflag = 0; struct lpfc_sli_ring *pring; @@ -1430,7 +1429,7 @@ out: /* * Clean all volatile data fields, preserve iotag and node struct. */ - memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean); + memset_startat(iocbq, 0, wqe); iocbq->sli4_lxritag = NO_XRI; iocbq->sli4_xritag = NO_XRI; iocbq->cmd_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET | LPFC_IO_CMF | @@ -1453,12 +1452,11 @@ out: static void __lpfc_sli_release_iocbq_s3(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) { - size_t start_clean = offsetof(struct lpfc_iocbq, iocb); /* * Clean all volatile data fields, preserve iotag and node struct. */ - memset((char*)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean); + memset_startat(iocbq, 0, iocb); iocbq->sli4_xritag = NO_XRI; list_add_tail(&iocbq->list, &phba->lpfc_iocb_list); } From 0824050682aef5151ade16129b3a0498a07ca6c9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 11 Nov 2022 17:08:24 +0000 Subject: [PATCH 1729/4122] scsi: libfc: Remove redundant variable ev_qual Variable ev_qual is being assigned and modified but the end result is never used. The variable is redundant and can be removed. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221111170824.558250-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_disc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 942fc60f7c21..0f32ded246d0 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -75,7 +75,6 @@ static void fc_disc_recv_rscn_req(struct fc_disc *disc, struct fc_frame *fp) struct fc_seq_els_data rjt_data; unsigned int len; int redisc = 0; - enum fc_els_rscn_ev_qual ev_qual; enum fc_els_rscn_addr_fmt fmt; LIST_HEAD(disc_ports); struct fc_disc_port *dp, *next; @@ -107,8 +106,6 @@ static void fc_disc_recv_rscn_req(struct fc_disc *disc, struct fc_frame *fp) goto reject; for (pp = (void *)(rp + 1); len > 0; len -= sizeof(*pp), pp++) { - ev_qual = pp->rscn_page_flags >> ELS_RSCN_EV_QUAL_BIT; - ev_qual &= ELS_RSCN_EV_QUAL_MASK; fmt = pp->rscn_page_flags >> ELS_RSCN_ADDR_FMT_BIT; fmt &= ELS_RSCN_ADDR_FMT_MASK; /* From c4c5fa35563a47957fa4f9c299ca1c6aadc27d50 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Nov 2022 14:25:16 -0600 Subject: [PATCH 1730/4122] scsi: bfa: Replace one-element array with flexible-array member One-element arrays are deprecated, and we are replacing them with flexible array members instead. So, replace one-element array with flexible-array member in struct fdmi_attr_s. Important to mention is that doing a build before/after this patch results in no binary output differences. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/209 Link: https://github.com/KSPP/linux/issues/79 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/Y3P1rEEBq7HzJygq@work Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfa_fc.h b/drivers/scsi/bfa/bfa_fc.h index 0314e4b9e1fb..a12d693065ce 100644 --- a/drivers/scsi/bfa/bfa_fc.h +++ b/drivers/scsi/bfa/bfa_fc.h @@ -1548,7 +1548,7 @@ enum fdmi_port_attribute_type { struct fdmi_attr_s { __be16 type; __be16 len; - u8 value[1]; + u8 value[]; }; /* From 2c1a0a7584f5084f3ec79f86c9a54ee4c55307c4 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 15 Nov 2022 17:19:16 -0800 Subject: [PATCH 1731/4122] scsi: lpfc: Fix WQ|CQ|EQ resource check Adapter configurations with limited EQ resources may fail to initialize. Firmware resources are queried in lpfc_sli4_read_config(). The driver parameters cfg_irq_chann and cfg_hdw_queue are adjusted from defaults if constrained by firmware resources. The minimum resource check includes a special allocation for queues such as ELS, MBOX, NVME LS. However the additional reservation was also incorrectly applied to EQ resources. Reordered WQ|CQ|EQ resource checks to apply the special allocation adjustment to WQ and CQ resources only. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221116011921.105995-2-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b49c39569386..a6e32ecd4151 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -10092,17 +10092,15 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) qmin = phba->sli4_hba.max_cfg_param.max_wq; if (phba->sli4_hba.max_cfg_param.max_cq < qmin) qmin = phba->sli4_hba.max_cfg_param.max_cq; - if (phba->sli4_hba.max_cfg_param.max_eq < qmin) - qmin = phba->sli4_hba.max_cfg_param.max_eq; /* - * Whats left after this can go toward NVME / FCP. - * The minus 4 accounts for ELS, NVME LS, MBOX - * plus one extra. When configured for - * NVMET, FCP io channel WQs are not created. + * Reserve 4 (ELS, NVME LS, MBOX, plus one extra) and + * the remainder can be used for NVME / FCP. */ qmin -= 4; + if (phba->sli4_hba.max_cfg_param.max_eq < qmin) + qmin = phba->sli4_hba.max_cfg_param.max_eq; - /* Check to see if there is enough for NVME */ + /* Check to see if there is enough for default cfg */ if ((phba->cfg_irq_chann > qmin) || (phba->cfg_hdw_queue > qmin)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, From ae696255d655bec673e5a5707f37ff6a098e89c2 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 15 Nov 2022 17:19:17 -0800 Subject: [PATCH 1732/4122] scsi: lpfc: Correct bandwidth logging during receipt of congestion sync WCQE The lpfc_cmf_timer adjusts phba->cmf_link_byte_count periodically and can artifically inflate bandwidth percent. During bandwidth calculation, correct for this by setting a cap of logging a maximum of 100%. Bandwidth calculation is only used for display under LOG_CGN_MGMT so there is no expectation of impacts on performance. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221116011921.105995-3-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 35c9404d5e49..182aaae60386 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1846,6 +1846,12 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, phba->cmf_link_byte_count); bwpcent = div64_u64(bw * 100 + slop, phba->cmf_link_byte_count); + /* Because of bytes adjustment due to shorter timer in + * lpfc_cmf_timer() the cmf_link_byte_count can be shorter and + * may seem like BW is above 100%. + */ + if (bwpcent > 100) + bwpcent = 100; if (phba->cmf_max_bytes_per_interval < bw && bwpcent > 95) From d99af587d59ca39747b4328dad0b193655835c90 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 15 Nov 2022 17:19:18 -0800 Subject: [PATCH 1733/4122] scsi: lpfc: Fix MI capability display in cmf_info sysfs attribute The dynamic mi_ver value holds the currently configured MI setting. mi_ver was being displayed as part of the cmf_info sysfs attribute, when the output string meant to display MI capabilities instead. Add a mi_cap member in the lpfc_pc_sli4_params structure that will store MI capabilities during initialization so that cmf_info prints out capabilities instead of current configuration. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221116011921.105995-4-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 2 +- drivers/scsi/lpfc/lpfc_init.c | 3 +++ drivers/scsi/lpfc/lpfc_sli4.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 030ad1d59cbd..77e1b2911cb4 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -134,7 +134,7 @@ lpfc_cmf_info_show(struct device *dev, struct device_attribute *attr, scnprintf(tmp, sizeof(tmp), "Congestion Mgmt Info: E2Eattr %d Ver %d " "CMF %d cnt %d\n", - phba->sli4_hba.pc_sli4_params.mi_ver, + phba->sli4_hba.pc_sli4_params.mi_cap, cp ? cp->cgn_info_version : 0, phba->sli4_hba.pc_sli4_params.cmf, phba->cmf_timer_cnt); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a6e32ecd4151..a119c06742b8 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -699,6 +699,8 @@ lpfc_sli4_refresh_params(struct lpfc_hba *phba) return rc; } mbx_sli4_parameters = &mqe->un.get_sli4_parameters.sli4_parameters; + phba->sli4_hba.pc_sli4_params.mi_cap = + bf_get(cfg_mi_ver, mbx_sli4_parameters); /* Are we forcing MI off via module parameter? */ if (phba->cfg_enable_mi) @@ -13839,6 +13841,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) mbx_sli4_parameters); phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters); phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters); + sli4_params->mi_cap = bf_get(cfg_mi_ver, mbx_sli4_parameters); /* Check for Extended Pre-Registered SGL support */ phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index cbb1aa1cf025..f927c2a25d54 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -556,6 +556,7 @@ struct lpfc_pc_sli4_params { #define LPFC_MIB3_SUPPORT 3 uint16_t mi_value; #define LPFC_DFLT_MIB_VAL 2 + uint8_t mi_cap; uint8_t mib_bde_cnt; uint8_t cmf; uint8_t cqv; From 97f256913c5d8a633efe4f11d4ed2d6a3ea42635 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 15 Nov 2022 17:19:19 -0800 Subject: [PATCH 1734/4122] scsi: lpfc: Fix crash involving race between FLOGI timeout and devloss handler When a FLOGI completes with a sequence timeout error, a freed kref ptr dereference crash can occur due to a timing race involving ndlp referencing in lpfc_dev_loss_tmo_callbk. Fix by ensuring the driver accounts for an outstanding FLOGI when dev_loss is active. Also, don't remove the HBA_FLOGI_OUTSTANDING flag when the FLOGI is retried to allow the driver to handle the reference counts correctly in lpfc_dev_loss_tmo_handler. Reported-by: Dietmar Hahn Tested-by: Dietmar Hahn Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221116011921.105995-5-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 36 +++++++++++++++++++++++++++----- drivers/scsi/lpfc/lpfc_hbadisc.c | 36 +++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 9326340d4226..919741bbe267 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -952,6 +952,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, uint16_t fcf_index; int rc; u32 ulp_status, ulp_word4, tmo; + bool flogi_in_retry = false; /* Check to see if link went down during discovery */ if (lpfc_els_chk_latt(vport)) { @@ -1022,8 +1023,23 @@ stop_rr_fcf_flogi: phba->hba_flag, phba->fcf.fcf_flag); /* Check for retry */ - if (lpfc_els_retry(phba, cmdiocb, rspiocb)) + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { + /* Address a timing race with dev_loss. If dev_loss + * is active on this FPort node, put the initial ref + * count back to stop premature node release actions. + */ + lpfc_check_nlp_post_devloss(vport, ndlp); + flogi_in_retry = true; goto out; + } + + /* The FLOGI will not be retried. If the FPort node is not + * registered with the SCSI transport, remove the initial + * reference to trigger node release. + */ + if (!(ndlp->nlp_flag & NLP_IN_DEV_LOSS) && + !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) + lpfc_nlp_put(ndlp); lpfc_printf_vlog(vport, KERN_WARNING, LOG_TRACE_EVENT, "0150 FLOGI failure Status:x%x/x%x " @@ -1086,7 +1102,7 @@ stop_rr_fcf_flogi: spin_unlock_irq(shost->host_lock); /* - * The FLogI succeeded. Sync the data for the CPU before + * The FLOGI succeeded. Sync the data for the CPU before * accessing it. */ prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list); @@ -1108,6 +1124,12 @@ stop_rr_fcf_flogi: vport->phba->pport->vmid_flag |= (LPFC_VMID_ISSUE_QFPA | LPFC_VMID_TYPE_PRIO); + /* + * Address a timing race with dev_loss. If dev_loss is active on + * this FPort node, put the initial ref count back to stop premature + * node release actions. + */ + lpfc_check_nlp_post_devloss(vport, ndlp); if (vport->port_state == LPFC_FLOGI) { /* * If Common Service Parameters indicate Nport @@ -1198,7 +1220,9 @@ flogifail: lpfc_issue_clear_la(phba, vport); } out: - phba->hba_flag &= ~HBA_FLOGI_OUTSTANDING; + if (!flogi_in_retry) + phba->hba_flag &= ~HBA_FLOGI_OUTSTANDING; + lpfc_els_free_iocb(phba, cmdiocb); lpfc_nlp_put(ndlp); } @@ -1365,15 +1389,17 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return 1; } + /* Avoid race with FLOGI completion and hba_flags. */ + phba->hba_flag |= (HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING); + rc = lpfc_issue_fabric_iocb(phba, elsiocb); if (rc == IOCB_ERROR) { + phba->hba_flag &= ~(HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING); lpfc_els_free_iocb(phba, elsiocb); lpfc_nlp_put(ndlp); return 1; } - phba->hba_flag |= (HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING); - /* Clear external loopback plug detected flag */ phba->link_flag &= ~LS_EXTERNAL_LOOPBACK; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index d38ebd7281b9..80375d73b732 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -426,10 +426,6 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) name = (uint8_t *)&ndlp->nlp_portname; phba = vport->phba; - spin_lock_irqsave(&ndlp->lock, iflags); - ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; - spin_unlock_irqrestore(&ndlp->lock, iflags); - if (phba->sli_rev == LPFC_SLI_REV4) fcf_inuse = lpfc_fcf_inuse(phba); @@ -451,22 +447,36 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID); + + spin_lock_irqsave(&ndlp->lock, iflags); + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); return fcf_inuse; } /* Fabric nodes are done. */ if (ndlp->nlp_type & NLP_FABRIC) { spin_lock_irqsave(&ndlp->lock, iflags); - /* In massive vport configuration settings, it's possible - * dev_loss_tmo fired during node recovery. So, check if - * fabric nodes are in discovery states outstanding. + + /* In massive vport configuration settings or when the FLOGI + * completes with a sequence timeout, it's possible + * dev_loss_tmo fired during node recovery. The driver has to + * account for this race to allow for recovery and keep + * the reference counting correct. */ switch (ndlp->nlp_DID) { case Fabric_DID: fc_vport = vport->fc_vport; - if (fc_vport && - fc_vport->vport_state == FC_VPORT_INITIALIZING) - recovering = true; + if (fc_vport) { + /* NPIV path. */ + if (fc_vport->vport_state == + FC_VPORT_INITIALIZING) + recovering = true; + } else { + /* Physical port path. */ + if (phba->hba_flag & HBA_FLOGI_OUTSTANDING) + recovering = true; + } break; case Fabric_Cntl_DID: if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) @@ -514,6 +524,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) return fcf_inuse; } + spin_lock_irqsave(&ndlp->lock, iflags); + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); return fcf_inuse; } @@ -552,6 +565,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) return fcf_inuse; } + spin_lock_irqsave(&ndlp->lock, iflags); + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); From 435d6b6564568b82ec74ee7d4d0bcb1f24ccb5f5 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Thu, 17 Nov 2022 10:16:32 -0700 Subject: [PATCH 1735/4122] vmlinux.lds.h: fix BOUNDED_SECTION_(PRE|POST)_LABEL macros Commit 2f465b921bb8 ("vmlinux.lds.h: place optional header space in BOUNDED_SECTION") added BOUNDED_SECTION_(PRE|POST)_LABEL macros, encapsulating the basic boilerplate to KEEP/pack records into a section, and to mark the begin and end of the section with linker-symbols. But it tried to do extra, adding KEEP(*(.gnu.linkonce.##_sec_)) to optionally reserve a header record in front of the data. It wrongly placed the KEEP after the linker-symbol starting the section, so if a header was added, it would wind up in the data. Moving the KEEP to the "correct" place proved brittle, and too clever by half. The obvious safe fix is to remove the KEEP and restore the plain old boilerplate. The header can be added later, with separate macros. Also, the macro var-names: _s_, _e_ are nearly invisible, change them to more obvious names: _BEGIN_, _END_ Fixes: 2f465b921bb8 ("vmlinux.lds.h: place optional header space in BOUNDED_SECTION") Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20221117171633.923628-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b3ca56ac163f..c17f94785253 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -193,17 +193,15 @@ # endif #endif -#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_) \ - _s_##_label_ = .; \ - KEEP(*(.gnu.linkonce.##_sec_)) \ +#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_) \ + _BEGIN_##_label_ = .; \ KEEP(*(_sec_)) \ - _e_##_label_ = .; + _END_##_label_ = .; -#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_) \ - _label_##_s_ = .; \ - KEEP(*(.gnu.linkonce.##_sec_)) \ +#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_) \ + _label_##_BEGIN_ = .; \ KEEP(*(_sec_)) \ - _label_##_e_ = .; + _label_##_END_ = .; #define BOUNDED_SECTION_BY(_sec_, _label_) \ BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop) From 1d926e259d8f8195fdfaeea7951149001894b473 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Thu, 17 Nov 2022 10:16:33 -0700 Subject: [PATCH 1736/4122] vmlinux.lds.h: add HEADERED_SECTION_* macros These macros elaborate on BOUNDED_SECTION_(PRE|POST)_LABEL macros, prepending an optional KEEP(.gnu.linkonce##_sec_) reservation, and a linker-symbol to address it. This allows a developer to define a header struct (which must fit with the section's base struct-type), and could contain: 1- fields whose value is common to the entire set of data-records. This allows the header & data structs to specialize, complement each other, and shrink. 2- an uplink pointer to an organizing struct which refs other related/sub data-tables header record is addressable via the extern'd header linker-symbol Once the linker-symbols created by the macro are ref'd extern in code, that code can compute a record's index (ptr - start) in the "primary" table, then use it to index into the related/sub tables. Adding a primary.map_* field foreach sub-table would then allow deduplication and remapping of that sub-table. This is aimed at dyndbg's struct _ddebug __dyndbg[] section, whose 3 columns: function, file, module are 50%, 90%, 100% redundant. The module column is fully recoverable after dynamic_debug_init() saves it to each ddebug_table.module as the builtin __dyndbg[] table is parsed. Given that those 3 columns use 24/56 of a _ddebug record, a dyndbg=y kernel with ~5k callsites could reduce kernel memory substantially. Returning that memory to the kernel buddy-allocator? is then possible. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20221117171633.923628-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c17f94785253..c9a475a30803 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -208,6 +208,21 @@ #define BOUNDED_SECTION(_sec) BOUNDED_SECTION_BY(_sec, _sec) +#define HEADERED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_, _HDR_) \ + _HDR_##_label_ = .; \ + KEEP(*(.gnu.linkonce.##_sec_)) \ + BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_) + +#define HEADERED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_, _HDR_) \ + _label_##_HDR_ = .; \ + KEEP(*(.gnu.linkonce.##_sec_)) \ + BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_) + +#define HEADERED_SECTION_BY(_sec_, _label_) \ + HEADERED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop) + +#define HEADERED_SECTION(_sec) HEADERED_SECTION_BY(_sec, _sec) + #ifdef CONFIG_TRACE_BRANCH_PROFILING #define LIKELY_PROFILE() \ BOUNDED_SECTION_BY(_ftrace_annotated_branch, _annotated_branch_profile) From a3f3e97a039cab5b7efc754dc2907ab5044c7c88 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Thu, 10 Nov 2022 10:09:27 +0100 Subject: [PATCH 1737/4122] staging: r8188eu: rename three functions Prefix the names of the following functions with the driver name. The original names are bad for the global namespace. While at it, convert is_IBSS_empty() to all lower case to follow kernel coding style. is_client_associated_to_ap() is_client_associated_to_ibss() is_IBSS_empty() Suggested-by: Greg Kroah-Hartman Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221110090927.17274-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 14 +++++++------- drivers/staging/r8188eu/core/rtw_wlan_util.c | 8 ++++---- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index be33489d3dfd..161cb67f7882 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -5831,7 +5831,7 @@ void rtw_mlme_site_survey_done(struct adapter *adapter) int res; u8 reg; - if ((is_client_associated_to_ap(adapter)) || + if ((r8188eu_is_client_associated_to_ap(adapter)) || ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE)) { /* enable to rx data frame */ rtw_write16(adapter, REG_RXFLTMAP2, 0xFFFF); @@ -5982,7 +5982,7 @@ void site_survey(struct adapter *padapter) Restore_DM_Func_Flag(padapter); /* Switch_DM_Func(padapter, DYNAMIC_ALL_FUNC_ENABLE, true); */ - if (is_client_associated_to_ap(padapter)) + if (r8188eu_is_client_associated_to_ap(padapter)) issue_nulldata(padapter, NULL, 0, 3, 500); rtw_mlme_site_survey_done(padapter); @@ -6952,7 +6952,7 @@ void mlmeext_sta_del_event_callback(struct adapter *padapter) struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - if (is_client_associated_to_ap(padapter) || is_IBSS_empty(padapter)) { + if (r8188eu_is_client_associated_to_ap(padapter) || r8188eu_is_ibss_empty(padapter)) { mlme_disconnect(padapter); rtw_set_bssid(padapter, null_addr); @@ -7025,7 +7025,7 @@ void linked_status_chk(struct adapter *padapter) rtl8188e_sreset_linked_status_check(padapter); - if (is_client_associated_to_ap(padapter)) { + if (r8188eu_is_client_associated_to_ap(padapter)) { /* linked infrastructure client mode */ int tx_chk = _SUCCESS, rx_chk = _SUCCESS; @@ -7097,7 +7097,7 @@ void linked_status_chk(struct adapter *padapter) pmlmeinfo->link_count = 0; } } /* end of if ((psta = rtw_get_stainfo(pstapriv, passoc_res->network.MacAddress)) != NULL) */ - } else if (is_client_associated_to_ibss(padapter)) { + } else if (r8188eu_is_client_associated_to_ibss(padapter)) { /* linked IBSS mode */ /* for each assoc list entry to check the rx pkt counter */ for (i = IBSS_START_MAC_ID; i < NUM_STA; i++) { @@ -7415,7 +7415,7 @@ u8 disconnect_hdl(struct adapter *padapter, unsigned char *pbuf) u8 val8; int res; - if (is_client_associated_to_ap(padapter)) + if (r8188eu_is_client_associated_to_ap(padapter)) issue_deauth_ex(padapter, pnetwork->MacAddress, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms / 100, 100); mlme_disconnect(padapter); @@ -7527,7 +7527,7 @@ u8 sitesurvey_cmd_hdl(struct adapter *padapter, u8 *pbuf) pmlmeext->sitesurvey_res.scan_mode = pparm->scan_mode; /* issue null data if associating to the AP */ - if (is_client_associated_to_ap(padapter)) { + if (r8188eu_is_client_associated_to_ap(padapter)) { pmlmeext->sitesurvey_res.state = SCAN_TXNULL; issue_nulldata(padapter, NULL, 1, 3, 500); diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c index c95438a12b59..965bb7da4cce 100644 --- a/drivers/staging/r8188eu/core/rtw_wlan_util.c +++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c @@ -331,7 +331,7 @@ u16 get_beacon_interval(struct wlan_bssid_ex *bss) return le16_to_cpu(val); } -bool is_client_associated_to_ap(struct adapter *padapter) +bool r8188eu_is_client_associated_to_ap(struct adapter *padapter) { struct mlme_ext_priv *pmlmeext; struct mlme_ext_info *pmlmeinfo; @@ -348,7 +348,7 @@ bool is_client_associated_to_ap(struct adapter *padapter) return false; } -bool is_client_associated_to_ibss(struct adapter *padapter) +bool r8188eu_is_client_associated_to_ibss(struct adapter *padapter) { struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; @@ -359,7 +359,7 @@ bool is_client_associated_to_ibss(struct adapter *padapter) return false; } -bool is_IBSS_empty(struct adapter *padapter) +bool r8188eu_is_ibss_empty(struct adapter *padapter) { unsigned int i; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -893,7 +893,7 @@ int rtw_check_bcn_info(struct adapter *Adapter, u8 *pframe, u32 packet_len) unsigned short ht_cap_info; unsigned char ht_info_infos_0; - if (!is_client_associated_to_ap(Adapter)) + if (!r8188eu_is_client_associated_to_ap(Adapter)) return true; len = packet_len - sizeof(struct ieee80211_hdr_3addr); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 9c0af4704607..c46fc1a53085 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -431,9 +431,9 @@ void update_network(struct wlan_bssid_ex *dst, struct wlan_bssid_ex *src, u8 *get_my_bssid(struct wlan_bssid_ex *pnetwork); u16 get_beacon_interval(struct wlan_bssid_ex *bss); -bool is_client_associated_to_ap(struct adapter *padapter); -bool is_client_associated_to_ibss(struct adapter *padapter); -bool is_IBSS_empty(struct adapter *padapter); +bool r8188eu_is_client_associated_to_ap(struct adapter *padapter); +bool r8188eu_is_client_associated_to_ibss(struct adapter *padapter); +bool r8188eu_is_ibss_empty(struct adapter *padapter); unsigned char check_assoc_AP(u8 *pframe, uint len); From 9b35a6926c613457e0dce179871028a0aa6efb06 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Fri, 11 Nov 2022 09:37:30 +0100 Subject: [PATCH 1738/4122] staging: r8188eu: convert aes_cipher() to void The function aes_cipher() returns always _SUCCESS and its callers do not use the return value. So we can convert the return type to void and get rid of another use of _SUCCESS. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221111083733.3144-2-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_security.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_security.c b/drivers/staging/r8188eu/core/rtw_security.c index 5bba57d18b5f..780019ce1b98 100644 --- a/drivers/staging/r8188eu/core/rtw_security.c +++ b/drivers/staging/r8188eu/core/rtw_security.c @@ -954,7 +954,7 @@ static void bitwise_xor(u8 *ina, u8 *inb, u8 *out) } -static int aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen) +static void aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen) { uint qc_exists, a4_exists, i, j, payload_remainder, num_blocks, payload_index; @@ -1083,8 +1083,6 @@ static int aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen) bitwise_xor(aes_out, padded_buffer, chain_buffer); for (j = 0; j < 8; j++) pframe[payload_index++] = chain_buffer[j]; - - return _SUCCESS; } u32 rtw_aes_encrypt(struct adapter *padapter, struct xmit_frame *pxmitframe) From 8985814bb3732aaf7ce5bf892d2827bd4d854fc7 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Fri, 11 Nov 2022 09:37:31 +0100 Subject: [PATCH 1739/4122] staging: r8188eu: convert rtw_xmit_resource_alloc() to common error logic Convert the function rtw_xmit_resource_alloc() away from returning _FAIL or _SUCCESS which uses inverted error logic. Use the common error logic instead. Return 0 for success and negative values for failure. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221111083733.3144-3-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_xmit.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c index bd6c1a401c59..c0fda6ce7c56 100644 --- a/drivers/staging/r8188eu/core/rtw_xmit.c +++ b/drivers/staging/r8188eu/core/rtw_xmit.c @@ -38,7 +38,7 @@ static int rtw_xmit_resource_alloc(struct adapter *padapter, struct xmit_buf *px { pxmitbuf->pallocated_buf = kzalloc(alloc_sz, GFP_KERNEL); if (!pxmitbuf->pallocated_buf) - return _FAIL; + return -ENOMEM; pxmitbuf->pbuf = (u8 *)ALIGN((size_t)(pxmitbuf->pallocated_buf), XMITBUF_ALIGN_SZ); pxmitbuf->dma_transfer_addr = 0; @@ -46,10 +46,10 @@ static int rtw_xmit_resource_alloc(struct adapter *padapter, struct xmit_buf *px pxmitbuf->pxmit_urb = usb_alloc_urb(0, GFP_KERNEL); if (!pxmitbuf->pxmit_urb) { kfree(pxmitbuf->pallocated_buf); - return _FAIL; + return -ENOMEM; } - return _SUCCESS; + return 0; } static void rtw_xmit_resource_free(struct adapter *padapter, struct xmit_buf *pxmitbuf, @@ -151,12 +151,12 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitbuf->ext_tag = false; /* Tx buf allocation may fail sometimes, so sleep and retry. */ - res = rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ)); - if (res == _FAIL) { + if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) { msleep(10); - res = rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ)); - if (res == _FAIL) + if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) { + res = _FAIL; goto free_xmitbuf; + } } pxmitbuf->flags = XMIT_VO_QUEUE; @@ -188,8 +188,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitbuf->padapter = padapter; pxmitbuf->ext_tag = true; - res = rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ); - if (res == _FAIL) { + if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ)) { res = _FAIL; goto free_xmit_extbuf; } From 64ce3acd0a50b3ad7ff345fe11bbb3d39cab216b Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Fri, 11 Nov 2022 09:37:32 +0100 Subject: [PATCH 1740/4122] staging: r8188eu: convert _rtw_init_xmit_priv() to common error logic Convert the function _rtw_init_xmit_priv() away from returning _FAIL or _SUCCESS which uses inverted error logic. Use the common error logic instead. Return 0 for success and negative values for failure. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221111083733.3144-4-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_xmit.c | 28 +++++++--------------- drivers/staging/r8188eu/include/rtw_xmit.h | 2 +- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c index c0fda6ce7c56..34494f08c0cd 100644 --- a/drivers/staging/r8188eu/core/rtw_xmit.c +++ b/drivers/staging/r8188eu/core/rtw_xmit.c @@ -59,12 +59,11 @@ static void rtw_xmit_resource_free(struct adapter *padapter, struct xmit_buf *px kfree(pxmitbuf->pallocated_buf); } -s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) +int _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) { int i; struct xmit_buf *pxmitbuf; struct xmit_frame *pxframe; - int res = _SUCCESS; u32 max_xmit_extbuf_size = MAX_XMIT_EXTBUF_SZ; u32 num_xmit_extbuf = NR_XMIT_EXTBUFF; @@ -97,7 +96,6 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) if (!pxmitpriv->pallocated_frame_buf) { pxmitpriv->pxmit_frame_buf = NULL; - res = _FAIL; goto exit; } pxmitpriv->pxmit_frame_buf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_frame_buf), 4); @@ -132,10 +130,8 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitpriv->pallocated_xmitbuf = vzalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4); - if (!pxmitpriv->pallocated_xmitbuf) { - res = _FAIL; + if (!pxmitpriv->pallocated_xmitbuf) goto free_frame_buf; - } pxmitpriv->pxmitbuf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_xmitbuf), 4); /* pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 - */ @@ -153,10 +149,8 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) /* Tx buf allocation may fail sometimes, so sleep and retry. */ if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) { msleep(10); - if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) { - res = _FAIL; + if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) goto free_xmitbuf; - } } pxmitbuf->flags = XMIT_VO_QUEUE; @@ -172,10 +166,8 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitpriv->pallocated_xmit_extbuf = vzalloc(num_xmit_extbuf * sizeof(struct xmit_buf) + 4); - if (!pxmitpriv->pallocated_xmit_extbuf) { - res = _FAIL; + if (!pxmitpriv->pallocated_xmit_extbuf) goto free_xmitbuf; - } pxmitpriv->pxmit_extbuf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_xmit_extbuf), 4); @@ -188,10 +180,8 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitbuf->padapter = padapter; pxmitbuf->ext_tag = true; - if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ)) { - res = _FAIL; + if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ)) goto free_xmit_extbuf; - } list_add_tail(&pxmitbuf->list, &pxmitpriv->free_xmit_extbuf_queue.queue); pxmitbuf++; @@ -199,10 +189,8 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf; - if (rtw_alloc_hwxmits(padapter)) { - res = _FAIL; + if (rtw_alloc_hwxmits(padapter)) goto free_xmit_extbuf; - } rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); @@ -225,7 +213,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) rtl8188eu_init_xmit_priv(padapter); - return _SUCCESS; + return 0; free_xmit_extbuf: pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmit_extbuf; @@ -245,7 +233,7 @@ free_xmitbuf: free_frame_buf: vfree(pxmitpriv->pallocated_frame_buf); exit: - return res; + return -ENOMEM; } static void rtw_pkt_complete(struct adapter *padapter, struct sk_buff *pkt) diff --git a/drivers/staging/r8188eu/include/rtw_xmit.h b/drivers/staging/r8188eu/include/rtw_xmit.h index cff065554608..6e7ebea5362d 100644 --- a/drivers/staging/r8188eu/include/rtw_xmit.h +++ b/drivers/staging/r8188eu/include/rtw_xmit.h @@ -351,7 +351,7 @@ s32 rtw_txframes_pending(struct adapter *padapter); s32 rtw_txframes_sta_ac_pending(struct adapter *padapter, struct pkt_attrib *pattrib); void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry); -s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); +int _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv); int rtw_alloc_hwxmits(struct adapter *padapter); void rtw_free_hwxmits(struct adapter *padapter); diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 66556e07ed93..44eb95a7682c 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -480,7 +480,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter) init_mlme_ext_priv(padapter); - if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter) == _FAIL) { + if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter)) { dev_err(dvobj_to_dev(padapter->dvobj), "_rtw_init_xmit_priv failed\n"); goto free_mlme_ext; } From 7209757311d6eec4c531feba31b170c33122491e Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Fri, 11 Nov 2022 09:37:33 +0100 Subject: [PATCH 1741/4122] staging: r8188eu: binstallGrpkey is set to _FAIL The variable binstallGrpkey is set to _FAIL which is defined as 0. Use false to set the variable to get rid of another use of _FAIL. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Michael Straube Link: https://lore.kernel.org/r/20221111083733.3144-5-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index 44eb95a7682c..2f59bb994796 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -405,7 +405,7 @@ static void rtw_init_default_value(struct adapter *padapter) pmlmepriv->htpriv.ampdu_enable = false;/* set to disabled */ /* security_priv */ - psecuritypriv->binstallGrpkey = _FAIL; + psecuritypriv->binstallGrpkey = false; psecuritypriv->sw_encrypt = pregistrypriv->software_encrypt; psecuritypriv->sw_decrypt = pregistrypriv->software_decrypt; psecuritypriv->dot11AuthAlgrthm = dot11AuthAlgrthm_Open; /* open system */ From b397fc48c5d9b70ad17ffeaf8607a4f49055c0d9 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:34:54 +0100 Subject: [PATCH 1742/4122] staging: rtl8192e: Rename bTxDisableRate.., RegMaxLPSAwa.. and bTxUseD.. Rename variable bTxDisableRateFallBack to tx_dis_rate_fallback, RegMaxLPSAwakeIntvl to reg_max_lps_awake_intvl and bTxUseDriverAssingedRate to tx_use_drv_assinged_rate to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/092eb2dc73d37daf851ea9ef9cb7e4df6f766845.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_dev.c | 4 ++-- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 10 ++++---- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 2 +- drivers/staging/rtl8192e/rtl819x_HTProc.c | 4 ++-- drivers/staging/rtl8192e/rtllib.h | 10 ++++---- drivers/staging/rtl8192e/rtllib_softmac.c | 14 +++++------ drivers/staging/rtl8192e/rtllib_tx.c | 24 +++++++++---------- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 210b7ecc273a..39aac83d8960 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1136,8 +1136,8 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, cb_desc->priority); pdesc->TxFWInfoSize = sizeof(struct tx_fwinfo_8190pci); - pdesc->DISFB = cb_desc->bTxDisableRateFallBack; - pdesc->USERATE = cb_desc->bTxUseDriverAssingedRate; + pdesc->DISFB = cb_desc->tx_dis_rate_fallback; + pdesc->USERATE = cb_desc->tx_use_drv_assinged_rate; pdesc->FirstSeg = 1; pdesc->LastSeg = 1; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 213aac943ef7..952fe8d7a7be 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -494,8 +494,8 @@ static void _rtl92e_prepare_beacon(struct tasklet_struct *t) tcb_desc->queue_index = BEACON_QUEUE; tcb_desc->data_rate = 2; tcb_desc->RATRIndex = 7; - tcb_desc->bTxDisableRateFallBack = 1; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_dis_rate_fallback = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; skb_push(pnewskb, priv->rtllib->tx_headroom); pdesc = &ring->desc[0]; @@ -822,7 +822,7 @@ static void _rtl92e_init_priv_constant(struct net_device *dev) struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) &priv->rtllib->PowerSaveControl; - pPSC->RegMaxLPSAwakeIntvl = 5; + pPSC->reg_max_lps_awake_intvl = 5; } static void _rtl92e_init_priv_variable(struct net_device *dev) @@ -1538,8 +1538,8 @@ static int _rtl92e_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } tcb_desc->RATRIndex = 7; - tcb_desc->bTxDisableRateFallBack = 1; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_dis_rate_fallback = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; tcb_desc->bTxEnableFwCalcDur = 1; skb_push(skb, priv->rtllib->tx_headroom); ret = _rtl92e_tx(dev, skb); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 12eea4fcb9dd..ffef63e8dcae 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -196,7 +196,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev, netdev_info(dev, "%s(): set lps awake interval ! extra is %d\n", __func__, *extra); - pPSC->RegMaxLPSAwakeIntvl = *extra; + pPSC->reg_max_lps_awake_intvl = *extra; mutex_unlock(&priv->wx_mutex); return 0; } diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 2c0a8d5c8f27..7fad983ad4be 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -91,8 +91,8 @@ void HTUpdateDefaultSetting(struct rtllib_device *ieee) pHTInfo->self_mimo_ps = 3; if (pHTInfo->self_mimo_ps == 2) pHTInfo->self_mimo_ps = 3; - ieee->bTxDisableRateFallBack = 0; - ieee->bTxUseDriverAssingedRate = 0; + ieee->tx_dis_rate_fallback = 0; + ieee->tx_use_drv_assinged_rate = 0; ieee->bTxEnableFwCalcDur = 1; diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index e0fa87b12ceb..ca9e016ffa9f 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -126,8 +126,8 @@ struct cb_desc { u8 bFirstSeg:1; u8 bLastSeg:1; u8 bEncrypt:1; - u8 bTxDisableRateFallBack:1; - u8 bTxUseDriverAssingedRate:1; + u8 tx_dis_rate_fallback:1; + u8 tx_use_drv_assinged_rate:1; u8 bHwSec:1; u8 nStuckCount; @@ -1256,7 +1256,7 @@ struct rt_pwr_save_ctrl { bool bLeisurePs; u8 LpsIdleCount; - u8 RegMaxLPSAwakeIntvl; + u8 reg_max_lps_awake_intvl; u8 LPSAwakeIntvl; u32 CurPsLevel; @@ -1441,8 +1441,8 @@ struct rtllib_device { u8 RegHTSuppRateSet[16]; u8 HTCurrentOperaRate; u8 HTHighestOperaRate; - u8 bTxDisableRateFallBack; - u8 bTxUseDriverAssingedRate; + u8 tx_dis_rate_fallback; + u8 tx_use_drv_assinged_rate; u8 bTxEnableFwCalcDur; atomic_t atm_swbw; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 1253de481805..604d4fd7e2cd 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -220,8 +220,8 @@ inline void softmac_mgmt_xmit(struct sk_buff *skb, struct rtllib_device *ieee) tcb_desc->data_rate = MgntQuery_MgntFrameTxRate(ieee); tcb_desc->RATRIndex = 7; - tcb_desc->bTxDisableRateFallBack = 1; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_dis_rate_fallback = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; if (single) { if (ieee->queue_stop) { enqueue_mgmt(ieee, skb); @@ -298,8 +298,8 @@ softmac_ps_mgmt_xmit(struct sk_buff *skb, tcb_desc->data_rate = MgntQuery_MgntFrameTxRate(ieee); tcb_desc->RATRIndex = 7; - tcb_desc->bTxDisableRateFallBack = 1; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_dis_rate_fallback = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; if (single) { if (type != RTLLIB_FTYPE_CTL) { header->seq_ctl = cpu_to_le16(ieee->seq_ctrl[0] << 4); @@ -1996,12 +1996,12 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time) if (pPSC->LPSAwakeIntvl == 0) pPSC->LPSAwakeIntvl = 1; - if (pPSC->RegMaxLPSAwakeIntvl == 0) + if (pPSC->reg_max_lps_awake_intvl == 0) MaxPeriod = 1; - else if (pPSC->RegMaxLPSAwakeIntvl == 0xFF) + else if (pPSC->reg_max_lps_awake_intvl == 0xFF) MaxPeriod = ieee->current_network.dtim_period; else - MaxPeriod = pPSC->RegMaxLPSAwakeIntvl; + MaxPeriod = pPSC->reg_max_lps_awake_intvl; pPSC->LPSAwakeIntvl = (pPSC->LPSAwakeIntvl >= MaxPeriod) ? MaxPeriod : (pPSC->LPSAwakeIntvl + 1); diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index 101f44129145..be3779efcd23 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -477,13 +477,13 @@ NO_PROTECTION: static void rtllib_txrate_selectmode(struct rtllib_device *ieee, struct cb_desc *tcb_desc) { - if (ieee->bTxDisableRateFallBack) - tcb_desc->bTxDisableRateFallBack = true; + if (ieee->tx_dis_rate_fallback) + tcb_desc->tx_dis_rate_fallback = true; - if (ieee->bTxUseDriverAssingedRate) - tcb_desc->bTxUseDriverAssingedRate = true; - if (!tcb_desc->bTxDisableRateFallBack || - !tcb_desc->bTxUseDriverAssingedRate) { + if (ieee->tx_use_drv_assinged_rate) + tcb_desc->tx_use_drv_assinged_rate = true; + if (!tcb_desc->tx_dis_rate_fallback || + !tcb_desc->tx_use_drv_assinged_rate) { if (ieee->iw_mode == IW_MODE_INFRA || ieee->iw_mode == IW_MODE_ADHOC) tcb_desc->RATRIndex = 0; @@ -886,14 +886,14 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) HT_IOT_ACT_WA_IOT_Broadcom) { tcb_desc->data_rate = MgntQuery_TxRateExcludeCCKRates(ieee); - tcb_desc->bTxDisableRateFallBack = false; + tcb_desc->tx_dis_rate_fallback = false; } else { tcb_desc->data_rate = ieee->basic_rate; - tcb_desc->bTxDisableRateFallBack = 1; + tcb_desc->tx_dis_rate_fallback = 1; } tcb_desc->RATRIndex = 7; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; } else { if (is_multicast_ether_addr(header.addr1)) tcb_desc->bMulticast = 1; @@ -910,14 +910,14 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) HT_IOT_ACT_WA_IOT_Broadcom) { tcb_desc->data_rate = MgntQuery_TxRateExcludeCCKRates(ieee); - tcb_desc->bTxDisableRateFallBack = false; + tcb_desc->tx_dis_rate_fallback = false; } else { tcb_desc->data_rate = MGN_1M; - tcb_desc->bTxDisableRateFallBack = 1; + tcb_desc->tx_dis_rate_fallback = 1; } tcb_desc->RATRIndex = 7; - tcb_desc->bTxUseDriverAssingedRate = 1; + tcb_desc->tx_use_drv_assinged_rate = 1; tcb_desc->bdhcp = 1; } From c7bf6d33d095637fb78ba484a181d17bc1a73a68 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:12 +0100 Subject: [PATCH 1743/4122] staging: rtl8192e: Rename Regdot11HTOper.., bSupportM.. and PowerSaveCo.. Rename variable Regdot11HTOperationalRateSet to reg_dot11ht_oper_rate_set, bSupportMode to support_mode and PowerSaveControl to pwr_save_ctrl to avoid CamelCase which is not accepted by checkpatch. Fix unnecessary parentheses warning from checkpatch when used with this variables. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/1ef7fd22b4a037c4d1f8685065ce7916b6f0930b.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_phy.c | 2 +- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 30 +++++++++---------- drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 10 +++---- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 4 +-- drivers/staging/rtl8192e/rtl819x_HTProc.c | 2 +- drivers/staging/rtl8192e/rtllib.h | 4 +-- drivers/staging/rtl8192e/rtllib_softmac.c | 10 +++---- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index 58da2dab55bd..b14497423cbf 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -1304,7 +1304,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&(priv->rtllib->PowerSaveControl)); + (&priv->rtllib->pwr_save_ctrl); bool bResult = true; u8 i = 0, QueueID = 0; struct rtl8192_tx_ring *ring = NULL; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 952fe8d7a7be..16e15b8e1b7a 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -607,13 +607,13 @@ static void _rtl92e_refresh_support_rate(struct r8192_priv *priv) if (ieee->mode == WIRELESS_MODE_N_24G || ieee->mode == WIRELESS_MODE_N_5G) { - memcpy(ieee->Regdot11HTOperationalRateSet, + memcpy(ieee->reg_dot11ht_oper_rate_set, ieee->RegHTSuppRateSet, 16); memcpy(ieee->Regdot11TxHTOperationalRateSet, ieee->RegHTSuppRateSet, 16); } else { - memset(ieee->Regdot11HTOperationalRateSet, 0, 16); + memset(ieee->reg_dot11ht_oper_rate_set, 0, 16); } } @@ -642,19 +642,19 @@ static u8 _rtl92e_get_supported_wireless_mode(struct net_device *dev) void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode) { struct r8192_priv *priv = rtllib_priv(dev); - u8 bSupportMode = _rtl92e_get_supported_wireless_mode(dev); + u8 support_mode = _rtl92e_get_supported_wireless_mode(dev); if ((wireless_mode == WIRELESS_MODE_AUTO) || - ((wireless_mode & bSupportMode) == 0)) { - if (bSupportMode & WIRELESS_MODE_N_24G) { + ((wireless_mode & support_mode) == 0)) { + if (support_mode & WIRELESS_MODE_N_24G) { wireless_mode = WIRELESS_MODE_N_24G; - } else if (bSupportMode & WIRELESS_MODE_N_5G) { + } else if (support_mode & WIRELESS_MODE_N_5G) { wireless_mode = WIRELESS_MODE_N_5G; - } else if ((bSupportMode & WIRELESS_MODE_A)) { + } else if ((support_mode & WIRELESS_MODE_A)) { wireless_mode = WIRELESS_MODE_A; - } else if ((bSupportMode & WIRELESS_MODE_G)) { + } else if ((support_mode & WIRELESS_MODE_G)) { wireless_mode = WIRELESS_MODE_G; - } else if ((bSupportMode & WIRELESS_MODE_B)) { + } else if ((support_mode & WIRELESS_MODE_B)) { wireless_mode = WIRELESS_MODE_B; } else { netdev_info(dev, @@ -683,7 +683,7 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&priv->rtllib->PowerSaveControl); + (&priv->rtllib->pwr_save_ctrl); bool init_status; priv->bdisable_nic = false; @@ -820,7 +820,7 @@ static void _rtl92e_init_priv_constant(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &priv->rtllib->PowerSaveControl; + &priv->rtllib->pwr_save_ctrl; pPSC->reg_max_lps_awake_intvl = 5; } @@ -877,7 +877,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rf_change_in_progress = false; priv->bHwRfOffAction = 0; priv->SetRFPowerStateInProgress = false; - priv->rtllib->PowerSaveControl.bLeisurePs = true; + priv->rtllib->pwr_save_ctrl.bLeisurePs = true; priv->rtllib->LPSDelayCnt = 0; priv->rtllib->sta_sleep = LPS_IS_WAKE; priv->rtllib->rf_power_state = rf_on; @@ -1272,7 +1272,7 @@ static void _rtl92e_watchdog_wq_cb(void *data) static u8 check_reset_cnt; unsigned long flags; struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&priv->rtllib->PowerSaveControl); + (&priv->rtllib->pwr_save_ctrl); bool bBusyTraffic = false; bool bHigherBusyTraffic = false; bool bHigherBusyRxTraffic = false; @@ -1295,7 +1295,7 @@ static void _rtl92e_watchdog_wq_cb(void *data) RTLLIB_NOLINK) && (ieee->rf_power_state == rf_on) && !ieee->is_set_key && (!ieee->proto_stoppping) && !ieee->wx_set_enc) { - if ((ieee->PowerSaveControl.ReturnPoint == + if ((ieee->pwr_save_ctrl.ReturnPoint == IPS_CALLBACK_NONE) && (!ieee->bNetPromiscuousMode)) { rtl92e_ips_enter(dev); @@ -2422,7 +2422,7 @@ bool rtl92e_enable_nic(struct net_device *dev) bool init_status = true; struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&priv->rtllib->PowerSaveControl); + (&priv->rtllib->pwr_save_ctrl); if (!priv->up) { netdev_warn(dev, "%s(): Driver is already down!\n", __func__); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index 1501f7be8eee..8a6b7f58ebfd 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -101,7 +101,7 @@ static void _rtl92e_ps_update_rf_state(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &(priv->rtllib->PowerSaveControl); + &priv->rtllib->pwr_save_ctrl; pPSC->bSwRfProcessing = true; rtl92e_set_rf_state(dev, pPSC->eInactivePowerState, RF_CHANGE_BY_IPS); @@ -113,7 +113,7 @@ void rtl92e_ips_enter(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &(priv->rtllib->PowerSaveControl); + &priv->rtllib->pwr_save_ctrl; enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; @@ -129,7 +129,7 @@ void rtl92e_ips_leave(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &(priv->rtllib->PowerSaveControl); + &priv->rtllib->pwr_save_ctrl; enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; @@ -207,7 +207,7 @@ void rtl92e_leisure_ps_enter(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &(priv->rtllib->PowerSaveControl); + &priv->rtllib->pwr_save_ctrl; if (!((priv->rtllib->iw_mode == IW_MODE_INFRA) && (priv->rtllib->state == RTLLIB_LINKED)) @@ -233,7 +233,7 @@ void rtl92e_leisure_ps_leave(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - &(priv->rtllib->PowerSaveControl); + &priv->rtllib->pwr_save_ctrl; if (pPSC->bLeisurePs) { if (priv->rtllib->ps != RTLLIB_PS_DISABLED) { diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index ffef63e8dcae..d68f8f5902c8 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -160,7 +160,7 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev, { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&priv->rtllib->PowerSaveControl); + (&priv->rtllib->pwr_save_ctrl); struct rtllib_device *ieee = priv->rtllib; mutex_lock(&priv->wx_mutex); @@ -189,7 +189,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev, { struct r8192_priv *priv = rtllib_priv(dev); struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) - (&priv->rtllib->PowerSaveControl); + (&priv->rtllib->pwr_save_ctrl); mutex_lock(&priv->wx_mutex); diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 7fad983ad4be..52f41562dd82 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -323,7 +323,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap, pCapELE->MPDUDensity = 0; } - memcpy(pCapELE->MCS, ieee->Regdot11HTOperationalRateSet, 16); + memcpy(pCapELE->MCS, ieee->reg_dot11ht_oper_rate_set, 16); memset(&pCapELE->ExtHTCapInfo, 0, 2); memset(pCapELE->TxBFCap, 0, 4); diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index ca9e016ffa9f..d80883b211ce 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1435,7 +1435,7 @@ struct rtllib_device { struct rt_hi_throughput *pHTInfo; spinlock_t reorder_spinlock; - u8 Regdot11HTOperationalRateSet[16]; + u8 reg_dot11ht_oper_rate_set[16]; u8 Regdot11TxHTOperationalRateSet[16]; u8 dot11HTOperationalRateSet[16]; u8 RegHTSuppRateSet[16]; @@ -1645,7 +1645,7 @@ struct rtllib_device { struct rt_link_detect LinkDetectInfo; bool bIsAggregateFrame; - struct rt_pwr_save_ctrl PowerSaveControl; + struct rt_pwr_save_ctrl pwr_save_ctrl; /* used if IEEE_SOFTMAC_TX_QUEUE is set */ struct tx_pending tx_pending; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 604d4fd7e2cd..498febc88b15 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -1504,7 +1504,7 @@ static void rtllib_associate_complete_wq(void *data) container_of_work_rsl(data, struct rtllib_device, associate_complete_wq); - struct rt_pwr_save_ctrl *pPSC = &(ieee->PowerSaveControl); + struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl; netdev_info(ieee->dev, "Associated successfully with %pM\n", ieee->current_network.bssid); @@ -1960,7 +1960,7 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time) { int timeout; u8 dtim; - struct rt_pwr_save_ctrl *pPSC = &(ieee->PowerSaveControl); + struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl; if (ieee->LPSDelayCnt) { ieee->LPSDelayCnt--; @@ -2984,9 +2984,9 @@ int rtllib_softmac_init(struct rtllib_device *ieee) ieee->ps = RTLLIB_PS_DISABLED; ieee->sta_sleep = LPS_IS_WAKE; - ieee->Regdot11HTOperationalRateSet[0] = 0xff; - ieee->Regdot11HTOperationalRateSet[1] = 0xff; - ieee->Regdot11HTOperationalRateSet[4] = 0x01; + ieee->reg_dot11ht_oper_rate_set[0] = 0xff; + ieee->reg_dot11ht_oper_rate_set[1] = 0xff; + ieee->reg_dot11ht_oper_rate_set[4] = 0x01; ieee->Regdot11TxHTOperationalRateSet[0] = 0xff; ieee->Regdot11TxHTOperationalRateSet[1] = 0xff; From ca25401e21ab40539a5ed4b86aecc3698f89b86f Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:27 +0100 Subject: [PATCH 1744/4122] staging: rtl8192e: Rename Regdot11TxHT.., dot11HTOpera.. and RegHTSuppRa.. Rename variable Regdot11TxHTOperationalRateSet to reg_dot11tx_ht_oper_rate_set, dot11HTOperationalRateSet to dot11ht_oper_rate_set and RegHTSuppRateSet to reg_ht_supp_rate_set to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/0c87e7ffc94be1c26f6400f5e12419f2df0418a3.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 +- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 6 +++--- drivers/staging/rtl8192e/rtl819x_HTProc.c | 14 +++++++------- drivers/staging/rtl8192e/rtllib.h | 6 +++--- drivers/staging/rtl8192e/rtllib_softmac.c | 10 +++++----- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 39aac83d8960..bbaf20fbfde0 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1958,7 +1958,7 @@ void rtl92e_update_ratr_table(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); struct rtllib_device *ieee = priv->rtllib; - u8 *pMcsRate = ieee->dot11HTOperationalRateSet; + u8 *pMcsRate = ieee->dot11ht_oper_rate_set; u32 ratr_value = 0; u16 rate_config = 0; u8 rate_index = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 16e15b8e1b7a..d5eec6d6b644 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -608,9 +608,9 @@ static void _rtl92e_refresh_support_rate(struct r8192_priv *priv) if (ieee->mode == WIRELESS_MODE_N_24G || ieee->mode == WIRELESS_MODE_N_5G) { memcpy(ieee->reg_dot11ht_oper_rate_set, - ieee->RegHTSuppRateSet, 16); - memcpy(ieee->Regdot11TxHTOperationalRateSet, - ieee->RegHTSuppRateSet, 16); + ieee->reg_ht_supp_rate_set, 16); + memcpy(ieee->reg_dot11tx_ht_oper_rate_set, + ieee->reg_ht_supp_rate_set, 16); } else { memset(ieee->reg_dot11ht_oper_rate_set, 0, 16); diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 52f41562dd82..2092ebf717a1 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -489,7 +489,7 @@ static u8 HTFilterMCSRate(struct rtllib_device *ieee, u8 *pSupportMCS, u8 i; for (i = 0; i <= 15; i++) - pOperateMCS[i] = ieee->Regdot11TxHTOperationalRateSet[i] & + pOperateMCS[i] = ieee->reg_dot11tx_ht_oper_rate_set[i] & pSupportMCS[i]; HT_PickMCSRate(ieee, pOperateMCS); @@ -604,7 +604,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee) HTIOTActDetermineRaFunc(ieee, ((pPeerHTCap->MCS[1]) != 0)); - HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11HTOperationalRateSet); + HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11ht_oper_rate_set); pHTInfo->peer_mimo_ps = pPeerHTCap->MimoPwrSave; if (pHTInfo->peer_mimo_ps == MIMO_PS_STATIC) @@ -612,7 +612,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee) else pMcsFilter = MCS_FILTER_ALL; ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee, - ieee->dot11HTOperationalRateSet, + ieee->dot11ht_oper_rate_set, pMcsFilter); ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; @@ -661,7 +661,7 @@ void HTInitializeHTInfo(struct rtllib_device *ieee) pHTInfo->iot_ra_func = 0; { - u8 *RegHTSuppRateSets = &ieee->RegHTSuppRateSet[0]; + u8 *RegHTSuppRateSets = &ieee->reg_ht_supp_rate_set[0]; RegHTSuppRateSets[0] = 0xFF; RegHTSuppRateSets[1] = 0xFF; @@ -796,10 +796,10 @@ void HTUseDefaultSetting(struct rtllib_device *ieee) pHTInfo->current_mpdu_density = pHTInfo->current_mpdu_density; - HTFilterMCSRate(ieee, ieee->Regdot11TxHTOperationalRateSet, - ieee->dot11HTOperationalRateSet); + HTFilterMCSRate(ieee, ieee->reg_dot11tx_ht_oper_rate_set, + ieee->dot11ht_oper_rate_set); ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee, - ieee->dot11HTOperationalRateSet, + ieee->dot11ht_oper_rate_set, MCS_FILTER_ALL); ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index d80883b211ce..487aa337eafa 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1436,9 +1436,9 @@ struct rtllib_device { spinlock_t reorder_spinlock; u8 reg_dot11ht_oper_rate_set[16]; - u8 Regdot11TxHTOperationalRateSet[16]; - u8 dot11HTOperationalRateSet[16]; - u8 RegHTSuppRateSet[16]; + u8 reg_dot11tx_ht_oper_rate_set[16]; + u8 dot11ht_oper_rate_set[16]; + u8 reg_ht_supp_rate_set[16]; u8 HTCurrentOperaRate; u8 HTHighestOperaRate; u8 tx_dis_rate_fallback; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 498febc88b15..09cc3b3068cc 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -1532,7 +1532,7 @@ static void rtllib_associate_complete_wq(void *data) "Successfully associated, ht not enabled(%d, %d)\n", ieee->pHTInfo->bCurrentHTSupport, ieee->pHTInfo->bEnableHT); - memset(ieee->dot11HTOperationalRateSet, 0, 16); + memset(ieee->dot11ht_oper_rate_set, 0, 16); } ieee->LinkDetectInfo.SlotNum = 2 * (1 + ieee->current_network.beacon_interval / @@ -1728,7 +1728,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee, netdev_info(ieee->dev, "Using B rates\n"); } - memset(ieee->dot11HTOperationalRateSet, 0, 16); + memset(ieee->dot11ht_oper_rate_set, 0, 16); ieee->state = RTLLIB_LINKED; } } @@ -2988,9 +2988,9 @@ int rtllib_softmac_init(struct rtllib_device *ieee) ieee->reg_dot11ht_oper_rate_set[1] = 0xff; ieee->reg_dot11ht_oper_rate_set[4] = 0x01; - ieee->Regdot11TxHTOperationalRateSet[0] = 0xff; - ieee->Regdot11TxHTOperationalRateSet[1] = 0xff; - ieee->Regdot11TxHTOperationalRateSet[4] = 0x01; + ieee->reg_dot11tx_ht_oper_rate_set[0] = 0xff; + ieee->reg_dot11tx_ht_oper_rate_set[1] = 0xff; + ieee->reg_dot11tx_ht_oper_rate_set[4] = 0x01; ieee->FirstIe_InScan = false; ieee->actscanning = false; From ca0298c09a5ec55954c0c0c1585e6a5643b8d3ca Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:34 +0100 Subject: [PATCH 1745/4122] staging: rtl8192e: Rename pPSC Rename variable pPSC to psc to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/47ded8a906e55d6f09b51cd8f2dfb78b7b92c1cc.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_phy.c | 14 +++---- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 16 ++++---- drivers/staging/rtl8192e/rtl8192e/rtl_ps.c | 32 ++++++++-------- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 10 ++--- drivers/staging/rtl8192e/rtllib.h | 10 ++--- drivers/staging/rtl8192e/rtllib_softmac.c | 38 +++++++++---------- 6 files changed, 60 insertions(+), 60 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index b14497423cbf..c357adf95a3d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -1303,7 +1303,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, enum rt_rf_power_state rf_power_state) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); bool bResult = true; u8 i = 0, QueueID = 0; @@ -1318,7 +1318,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, switch (rf_power_state) { case rf_on: if ((priv->rtllib->rf_power_state == rf_off) && - RT_IN_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC)) { + RT_IN_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC)) { bool rtstatus; u32 InitilizeCount = 3; @@ -1335,7 +1335,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, return false; } - RT_CLEAR_PS_LEVEL(pPSC, + RT_CLEAR_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC); } else { rtl92e_writeb(dev, ANAPAR, 0x37); @@ -1399,11 +1399,11 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, break; } - if (pPSC->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC && - !RT_IN_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC)) { + if (psc->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC && + !RT_IN_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC)) { rtl92e_disable_nic(dev); - RT_SET_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC); - } else if (!(pPSC->RegRfPsLevel & + RT_SET_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC); + } else if (!(psc->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC)) { rtl92e_set_rf_off(dev); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index d5eec6d6b644..144bc7c9abd5 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -682,7 +682,7 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode) static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); bool init_status; @@ -700,7 +700,7 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset) return -1; } - RT_CLEAR_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC); + RT_CLEAR_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC); priv->bfirst_init = false; if (priv->polling_timer_on == 0) @@ -819,10 +819,10 @@ static void _rtl92e_init_priv_handler(struct net_device *dev) static void _rtl92e_init_priv_constant(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; - pPSC->reg_max_lps_awake_intvl = 5; + psc->reg_max_lps_awake_intvl = 5; } static void _rtl92e_init_priv_variable(struct net_device *dev) @@ -1271,7 +1271,7 @@ static void _rtl92e_watchdog_wq_cb(void *data) enum reset_type ResetType = RESET_TYPE_NORESET; static u8 check_reset_cnt; unsigned long flags; - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); bool bBusyTraffic = false; bool bHigherBusyTraffic = false; @@ -1389,7 +1389,7 @@ static void _rtl92e_watchdog_wq_cb(void *data) spin_lock_irqsave(&priv->tx_lock, flags); if ((check_reset_cnt++ >= 3) && (!ieee->is_roaming) && - (!priv->rf_change_in_progress) && (!pPSC->bSwRfProcessing)) { + (!priv->rf_change_in_progress) && (!psc->bSwRfProcessing)) { ResetType = _rtl92e_if_check_reset(dev); check_reset_cnt = 3; } @@ -2421,7 +2421,7 @@ bool rtl92e_enable_nic(struct net_device *dev) { bool init_status = true; struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); if (!priv->up) { @@ -2437,7 +2437,7 @@ bool rtl92e_enable_nic(struct net_device *dev) priv->bdisable_nic = false; return false; } - RT_CLEAR_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC); + RT_CLEAR_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC); priv->bfirst_init = false; rtl92e_irq_enable(dev); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c index 8a6b7f58ebfd..ef4f736ce325 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c @@ -100,27 +100,27 @@ void rtl92e_enter_sleep(struct net_device *dev, u64 time) static void _rtl92e_ps_update_rf_state(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; - pPSC->bSwRfProcessing = true; - rtl92e_set_rf_state(dev, pPSC->eInactivePowerState, RF_CHANGE_BY_IPS); + psc->bSwRfProcessing = true; + rtl92e_set_rf_state(dev, psc->eInactivePowerState, RF_CHANGE_BY_IPS); - pPSC->bSwRfProcessing = false; + psc->bSwRfProcessing = false; } void rtl92e_ips_enter(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; - if (rt_state == rf_on && !pPSC->bSwRfProcessing && + if (rt_state == rf_on && !psc->bSwRfProcessing && (priv->rtllib->state != RTLLIB_LINKED) && (priv->rtllib->iw_mode != IW_MODE_MASTER)) { - pPSC->eInactivePowerState = rf_off; + psc->eInactivePowerState = rf_off; _rtl92e_ps_update_rf_state(dev); } } @@ -128,14 +128,14 @@ void rtl92e_ips_enter(struct net_device *dev) void rtl92e_ips_leave(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; enum rt_rf_power_state rt_state; rt_state = priv->rtllib->rf_power_state; - if (rt_state != rf_on && !pPSC->bSwRfProcessing && + if (rt_state != rf_on && !psc->bSwRfProcessing && priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) { - pPSC->eInactivePowerState = rf_on; + psc->eInactivePowerState = rf_on; _rtl92e_ps_update_rf_state(dev); } } @@ -206,7 +206,7 @@ static bool _rtl92e_ps_set_mode(struct net_device *dev, u8 rtPsMode) void rtl92e_leisure_ps_enter(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; if (!((priv->rtllib->iw_mode == IW_MODE_INFRA) && @@ -215,8 +215,8 @@ void rtl92e_leisure_ps_enter(struct net_device *dev) (priv->rtllib->iw_mode == IW_MODE_MASTER)) return; - if (pPSC->bLeisurePs) { - if (pPSC->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) { + if (psc->bLeisurePs) { + if (psc->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) { if (priv->rtllib->ps == RTLLIB_PS_DISABLED) { if (priv->rtllib->SetFwCmdHandler) @@ -225,17 +225,17 @@ void rtl92e_leisure_ps_enter(struct net_device *dev) RTLLIB_PS_UNICAST); } } else - pPSC->LpsIdleCount++; + psc->LpsIdleCount++; } } void rtl92e_leisure_ps_leave(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) &priv->rtllib->pwr_save_ctrl; - if (pPSC->bLeisurePs) { + if (psc->bLeisurePs) { if (priv->rtllib->ps != RTLLIB_PS_DISABLED) { _rtl92e_ps_set_mode(dev, RTLLIB_PS_DISABLED); if (priv->rtllib->SetFwCmdHandler) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index d68f8f5902c8..b200d53c8e3d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -159,7 +159,7 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); struct rtllib_device *ieee = priv->rtllib; @@ -167,13 +167,13 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev, if (*extra || priv->force_lps) { priv->ps_force = false; - pPSC->bLeisurePs = true; + psc->bLeisurePs = true; } else { if (priv->rtllib->state == RTLLIB_LINKED) rtl92e_leisure_ps_leave(dev); priv->ps_force = true; - pPSC->bLeisurePs = false; + psc->bLeisurePs = false; ieee->ps = *extra; } @@ -188,7 +188,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev, char *extra) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *) + struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *) (&priv->rtllib->pwr_save_ctrl); mutex_lock(&priv->wx_mutex); @@ -196,7 +196,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev, netdev_info(dev, "%s(): set lps awake interval ! extra is %d\n", __func__, *extra); - pPSC->reg_max_lps_awake_intvl = *extra; + psc->reg_max_lps_awake_intvl = *extra; mutex_unlock(&priv->wx_mutex); return 0; } diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 487aa337eafa..6b7b2c887c34 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -111,11 +111,11 @@ static inline void *netdev_priv_rsl(struct net_device *dev) #define SUPPORT_CKIP_MIC 0x08 #define SUPPORT_CKIP_PK 0x10 #define RT_RF_OFF_LEVL_HALT_NIC BIT3 -#define RT_IN_PS_LEVEL(pPSC, _PS_FLAG) \ - ((pPSC->CurPsLevel & _PS_FLAG) ? true : false) -#define RT_CLEAR_PS_LEVEL(pPSC, _PS_FLAG) \ - (pPSC->CurPsLevel &= (~(_PS_FLAG))) -#define RT_SET_PS_LEVEL(pPSC, _PS_FLAG) (pPSC->CurPsLevel |= _PS_FLAG) +#define RT_IN_PS_LEVEL(psc, _PS_FLAG) \ + ((psc->CurPsLevel & _PS_FLAG) ? true : false) +#define RT_CLEAR_PS_LEVEL(psc, _PS_FLAG) \ + (psc->CurPsLevel &= (~(_PS_FLAG))) +#define RT_SET_PS_LEVEL(psc, _PS_FLAG) (psc->CurPsLevel |= _PS_FLAG) /* defined for skb cb field */ /* At most 28 byte */ diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 09cc3b3068cc..ee4173a6ade0 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -1504,7 +1504,7 @@ static void rtllib_associate_complete_wq(void *data) container_of_work_rsl(data, struct rtllib_device, associate_complete_wq); - struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl; + struct rt_pwr_save_ctrl *psc = &ieee->pwr_save_ctrl; netdev_info(ieee->dev, "Associated successfully with %pM\n", ieee->current_network.bssid); @@ -1542,7 +1542,7 @@ static void rtllib_associate_complete_wq(void *data) ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1; ieee->LinkDetectInfo.NumRecvDataInPeriod = 1; } - pPSC->LpsIdleCount = 0; + psc->LpsIdleCount = 0; ieee->link_change(ieee->dev); if (ieee->is_silent_reset) { @@ -1960,7 +1960,7 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time) { int timeout; u8 dtim; - struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl; + struct rt_pwr_save_ctrl *psc = &ieee->pwr_save_ctrl; if (ieee->LPSDelayCnt) { ieee->LPSDelayCnt--; @@ -1990,21 +1990,21 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time) if (time) { if (ieee->bAwakePktSent) { - pPSC->LPSAwakeIntvl = 1; + psc->LPSAwakeIntvl = 1; } else { u8 MaxPeriod = 1; - if (pPSC->LPSAwakeIntvl == 0) - pPSC->LPSAwakeIntvl = 1; - if (pPSC->reg_max_lps_awake_intvl == 0) + if (psc->LPSAwakeIntvl == 0) + psc->LPSAwakeIntvl = 1; + if (psc->reg_max_lps_awake_intvl == 0) MaxPeriod = 1; - else if (pPSC->reg_max_lps_awake_intvl == 0xFF) + else if (psc->reg_max_lps_awake_intvl == 0xFF) MaxPeriod = ieee->current_network.dtim_period; else - MaxPeriod = pPSC->reg_max_lps_awake_intvl; - pPSC->LPSAwakeIntvl = (pPSC->LPSAwakeIntvl >= + MaxPeriod = psc->reg_max_lps_awake_intvl; + psc->LPSAwakeIntvl = (psc->LPSAwakeIntvl >= MaxPeriod) ? MaxPeriod : - (pPSC->LPSAwakeIntvl + 1); + (psc->LPSAwakeIntvl + 1); } { u8 LPSAwakeIntvl_tmp = 0; @@ -2012,23 +2012,23 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time) u8 count = ieee->current_network.tim.tim_count; if (count == 0) { - if (pPSC->LPSAwakeIntvl > period) + if (psc->LPSAwakeIntvl > period) LPSAwakeIntvl_tmp = period + - (pPSC->LPSAwakeIntvl - + (psc->LPSAwakeIntvl - period) - - ((pPSC->LPSAwakeIntvl-period) % + ((psc->LPSAwakeIntvl-period) % period); else - LPSAwakeIntvl_tmp = pPSC->LPSAwakeIntvl; + LPSAwakeIntvl_tmp = psc->LPSAwakeIntvl; } else { - if (pPSC->LPSAwakeIntvl > + if (psc->LPSAwakeIntvl > ieee->current_network.tim.tim_count) LPSAwakeIntvl_tmp = count + - (pPSC->LPSAwakeIntvl - count) - - ((pPSC->LPSAwakeIntvl-count)%period); + (psc->LPSAwakeIntvl - count) - + ((psc->LPSAwakeIntvl-count)%period); else - LPSAwakeIntvl_tmp = pPSC->LPSAwakeIntvl; + LPSAwakeIntvl_tmp = psc->LPSAwakeIntvl; } *time = ieee->current_network.last_dtim_sta_time From ab4bcf795e46de03772c583e5180dfaa67eb0293 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:43 +0100 Subject: [PATCH 1746/4122] staging: rtl8192e: Rename RFInProgres.., bEnableHT and RegChannelPlan Rename variable RFInProgressTimeOut to rf_in_progress_timeout, bEnableHT to enable_ht and RegChannelPlan to reg_chnl_plan to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/aa3b8fd4a51fc9b1c32566cd079590bf11a9190d.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 4 ++-- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 12 ++++++------ drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 2 +- drivers/staging/rtl8192e/rtl819x_HT.h | 2 +- drivers/staging/rtl8192e/rtl819x_HTProc.c | 4 ++-- drivers/staging/rtl8192e/rtllib_softmac.c | 14 +++++++------- drivers/staging/rtl8192e/rtllib_softmac_wx.c | 2 +- drivers/staging/rtl8192e/rtllib_tx.c | 8 ++++---- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index bbaf20fbfde0..d0ce89837073 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -474,10 +474,10 @@ static void _rtl92e_read_eeprom_info(struct net_device *dev) priv->rf_chip = RF_8256; - if (priv->RegChannelPlan == 0xf) + if (priv->reg_chnl_plan == 0xf) priv->ChannelPlan = priv->eeprom_ChannelPlan; else - priv->ChannelPlan = priv->RegChannelPlan; + priv->ChannelPlan = priv->reg_chnl_plan; if (priv->eeprom_vid == 0x1186 && priv->eeprom_did == 0x3304) priv->CustomerID = RT_CID_DLINK; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 144bc7c9abd5..1bf5e760d87a 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -672,9 +672,9 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode) if ((wireless_mode == WIRELESS_MODE_N_24G) || (wireless_mode == WIRELESS_MODE_N_5G)) { - priv->rtllib->pHTInfo->bEnableHT = 1; + priv->rtllib->pHTInfo->enable_ht = 1; } else { - priv->rtllib->pHTInfo->bEnableHT = 0; + priv->rtllib->pHTInfo->enable_ht = 0; } _rtl92e_refresh_support_rate(priv); } @@ -723,7 +723,7 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf) { struct r8192_priv *priv = rtllib_priv(dev); unsigned long flags = 0; - u8 RFInProgressTimeOut = 0; + u8 rf_in_progress_timeout = 0; if (priv->up == 0) return -1; @@ -755,12 +755,12 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf) spin_lock_irqsave(&priv->rf_ps_lock, flags); while (priv->rf_change_in_progress) { spin_unlock_irqrestore(&priv->rf_ps_lock, flags); - if (RFInProgressTimeOut > 100) { + if (rf_in_progress_timeout > 100) { spin_lock_irqsave(&priv->rf_ps_lock, flags); break; } mdelay(1); - RFInProgressTimeOut++; + rf_in_progress_timeout++; spin_lock_irqsave(&priv->rf_ps_lock, flags); } priv->rf_change_in_progress = true; @@ -845,7 +845,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rxringcount = MAX_RX_COUNT; priv->irq_enabled = 0; priv->chan = 1; - priv->RegChannelPlan = 0xf; + priv->reg_chnl_plan = 0xf; priv->rtllib->mode = WIRELESS_MODE_AUTO; priv->rtllib->iw_mode = IW_MODE_INFRA; priv->rtllib->bNetPromiscuousMode = false; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 1ae3c77e2fef..98c750730f87 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -467,7 +467,7 @@ struct r8192_priv { bool bTXPowerDataReadFromEEPORM; - u16 RegChannelPlan; + u16 reg_chnl_plan; u16 ChannelPlan; u8 bHwRfOffAction; diff --git a/drivers/staging/rtl8192e/rtl819x_HT.h b/drivers/staging/rtl8192e/rtl819x_HT.h index 76bc9c5a6d83..22e4f126ed56 100644 --- a/drivers/staging/rtl8192e/rtl819x_HT.h +++ b/drivers/staging/rtl8192e/rtl819x_HT.h @@ -96,7 +96,7 @@ enum ht_aggre_mode { struct rt_hi_throughput { - u8 bEnableHT; + u8 enable_ht; u8 bCurrentHTSupport; u8 bRegBW40MHz; diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 2092ebf717a1..2c4c1cb4cbed 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -690,7 +690,7 @@ void HTResetSelfAndSavePeerSetting(struct rtllib_device *ieee, struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; u8 bIOTAction = 0; - /* unmark bEnableHT flag here is the same reason why unmarked in + /* unmark enable_ht flag here is the same reason why unmarked in * function rtllib_softmac_new_net. WB 2008.09.10 */ if (pNetwork->bssht.bd_support_ht) { @@ -776,7 +776,7 @@ void HTUseDefaultSetting(struct rtllib_device *ieee) { struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; - if (pHTInfo->bEnableHT) { + if (pHTInfo->enable_ht) { pHTInfo->bCurrentHTSupport = true; pHTInfo->bCurSuppCCK = pHTInfo->bRegSuppCCK; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index ee4173a6ade0..ac8132d8c45a 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -1179,11 +1179,11 @@ rtllib_association_req(struct rtllib_network *beacon, if ((ieee->rtllib_ap_sec_type && (ieee->rtllib_ap_sec_type(ieee) & SEC_ALG_TKIP)) || ieee->bForcedBgMode) { - ieee->pHTInfo->bEnableHT = 0; + ieee->pHTInfo->enable_ht = 0; ieee->mode = WIRELESS_MODE_G; } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) { + if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap); ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap); HTConstructCapabilityElement(ieee, ht_cap_buf, &ht_cap_len, @@ -1324,7 +1324,7 @@ rtllib_association_req(struct rtllib_network *beacon, memcpy(tag, osCcxVerNum.Octet, osCcxVerNum.Length); tag += osCcxVerNum.Length; } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) { + if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { if (ieee->pHTInfo->ePeerHTSpecVer != HT_SPEC_VER_EWC) { tag = skb_put(skb, ht_cap_len); *tag++ = MFIE_TYPE_HT_CAP; @@ -1358,7 +1358,7 @@ rtllib_association_req(struct rtllib_network *beacon, rtllib_TURBO_Info(ieee, &tag); } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) { + if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { if (ieee->pHTInfo->ePeerHTSpecVer == HT_SPEC_VER_EWC) { tag = skb_put(skb, ht_cap_len); *tag++ = MFIE_TYPE_GENERIC; @@ -1524,14 +1524,14 @@ static void rtllib_associate_complete_wq(void *data) ieee->SetWirelessMode(ieee->dev, IEEE_B); netdev_info(ieee->dev, "Using B rates:%d\n", ieee->rate); } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) { + if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { netdev_info(ieee->dev, "Successfully associated, ht enabled\n"); HTOnAssocRsp(ieee); } else { netdev_info(ieee->dev, "Successfully associated, ht not enabled(%d, %d)\n", ieee->pHTInfo->bCurrentHTSupport, - ieee->pHTInfo->bEnableHT); + ieee->pHTInfo->enable_ht); memset(ieee->dot11ht_oper_rate_set, 0, 16); } ieee->LinkDetectInfo.SlotNum = 2 * (1 + @@ -1684,7 +1684,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee, ieee->current_network.ssid, ieee->current_network.channel, ieee->current_network.qos_data.supported, - ieee->pHTInfo->bEnableHT, + ieee->pHTInfo->enable_ht, ieee->current_network.bssht.bd_support_ht, ieee->current_network.mode, ieee->current_network.flags); diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index fdf867a5dd7a..7bac30b87c93 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -359,7 +359,7 @@ void rtllib_wx_sync_scan_wq(void *data) if (ieee->ScanOperationBackupHandler) ieee->ScanOperationBackupHandler(ieee->dev, SCAN_OPT_BACKUP); - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT && + if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht && ieee->pHTInfo->bCurBW40MHz) { b40M = 1; chan_offset = ieee->pHTInfo->CurSTAExtChnlOffset; diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index be3779efcd23..8b01cfe03381 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -273,7 +273,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, if (rtllib_act_scanning(ieee, false)) return; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT) + if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) return; if (!IsQoSDataFrame(skb->data)) return; @@ -354,7 +354,7 @@ static void rtllib_query_HTCapShortGI(struct rtllib_device *ieee, tcb_desc->bUseShortGI = false; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT) + if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) return; if (pHTInfo->forced_short_gi) { @@ -375,7 +375,7 @@ static void rtllib_query_BandwidthMode(struct rtllib_device *ieee, tcb_desc->bPacketBW = false; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT) + if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) return; if (tcb_desc->bMulticast || tcb_desc->bBroadcast) @@ -438,7 +438,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, tcb_desc->rts_rate = MGN_24M; break; } - if (pHTInfo->bCurrentHTSupport && pHTInfo->bEnableHT) { + if (pHTInfo->bCurrentHTSupport && pHTInfo->enable_ht) { u8 HTOpMode = pHTInfo->current_op_mode; if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 || From 7e5cfa3c77d324fa235e94cc4363b14624ff2c45 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:49 +0100 Subject: [PATCH 1747/4122] staging: rtl8192e: Rename LinkDetectInfo Rename variable LinkDetectInfo to link_detect_info to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/48d043893fa755490e810af204e5b7ad2ba606de.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 58 ++++++++++---------- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 2 +- drivers/staging/rtl8192e/rtllib.h | 2 +- drivers/staging/rtl8192e/rtllib_rx.c | 14 ++--- drivers/staging/rtl8192e/rtllib_softmac.c | 26 ++++----- drivers/staging/rtl8192e/rtllib_softmac_wx.c | 8 +-- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 1bf5e760d87a..3379b4229a85 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -1250,15 +1250,15 @@ static void _rtl92e_update_rxcounts(struct r8192_priv *priv, u32 *TotalRxBcnNum, *TotalRxBcnNum = 0; *TotalRxDataNum = 0; - SlotIndex = (priv->rtllib->LinkDetectInfo.SlotIndex++) % - (priv->rtllib->LinkDetectInfo.SlotNum); - priv->rtllib->LinkDetectInfo.RxBcnNum[SlotIndex] = - priv->rtllib->LinkDetectInfo.NumRecvBcnInPeriod; - priv->rtllib->LinkDetectInfo.RxDataNum[SlotIndex] = - priv->rtllib->LinkDetectInfo.NumRecvDataInPeriod; - for (i = 0; i < priv->rtllib->LinkDetectInfo.SlotNum; i++) { - *TotalRxBcnNum += priv->rtllib->LinkDetectInfo.RxBcnNum[i]; - *TotalRxDataNum += priv->rtllib->LinkDetectInfo.RxDataNum[i]; + SlotIndex = (priv->rtllib->link_detect_info.SlotIndex++) % + (priv->rtllib->link_detect_info.SlotNum); + priv->rtllib->link_detect_info.RxBcnNum[SlotIndex] = + priv->rtllib->link_detect_info.NumRecvBcnInPeriod; + priv->rtllib->link_detect_info.RxDataNum[SlotIndex] = + priv->rtllib->link_detect_info.NumRecvDataInPeriod; + for (i = 0; i < priv->rtllib->link_detect_info.SlotNum; i++) { + *TotalRxBcnNum += priv->rtllib->link_detect_info.RxBcnNum[i]; + *TotalRxDataNum += priv->rtllib->link_detect_info.RxDataNum[i]; } } @@ -1304,22 +1304,22 @@ static void _rtl92e_watchdog_wq_cb(void *data) } if ((ieee->state == RTLLIB_LINKED) && (ieee->iw_mode == IW_MODE_INFRA) && (!ieee->bNetPromiscuousMode)) { - if (ieee->LinkDetectInfo.NumRxOkInPeriod > 100 || - ieee->LinkDetectInfo.NumTxOkInPeriod > 100) + if (ieee->link_detect_info.NumRxOkInPeriod > 100 || + ieee->link_detect_info.NumTxOkInPeriod > 100) bBusyTraffic = true; - if (ieee->LinkDetectInfo.NumRxOkInPeriod > 4000 || - ieee->LinkDetectInfo.NumTxOkInPeriod > 4000) { + if (ieee->link_detect_info.NumRxOkInPeriod > 4000 || + ieee->link_detect_info.NumTxOkInPeriod > 4000) { bHigherBusyTraffic = true; - if (ieee->LinkDetectInfo.NumRxOkInPeriod > 5000) + if (ieee->link_detect_info.NumRxOkInPeriod > 5000) bHigherBusyRxTraffic = true; else bHigherBusyRxTraffic = false; } - if (((ieee->LinkDetectInfo.NumRxUnicastOkInPeriod + - ieee->LinkDetectInfo.NumTxOkInPeriod) > 8) || - (ieee->LinkDetectInfo.NumRxUnicastOkInPeriod > 2)) + if (((ieee->link_detect_info.NumRxUnicastOkInPeriod + + ieee->link_detect_info.NumTxOkInPeriod) > 8) || + (ieee->link_detect_info.NumRxUnicastOkInPeriod > 2)) bEnterPS = false; else bEnterPS = true; @@ -1336,13 +1336,13 @@ static void _rtl92e_watchdog_wq_cb(void *data) rtl92e_leisure_ps_leave(dev); } - ieee->LinkDetectInfo.NumRxOkInPeriod = 0; - ieee->LinkDetectInfo.NumTxOkInPeriod = 0; - ieee->LinkDetectInfo.NumRxUnicastOkInPeriod = 0; - ieee->LinkDetectInfo.bBusyTraffic = bBusyTraffic; + ieee->link_detect_info.NumRxOkInPeriod = 0; + ieee->link_detect_info.NumTxOkInPeriod = 0; + ieee->link_detect_info.NumRxUnicastOkInPeriod = 0; + ieee->link_detect_info.bBusyTraffic = bBusyTraffic; - ieee->LinkDetectInfo.bHigherBusyTraffic = bHigherBusyTraffic; - ieee->LinkDetectInfo.bHigherBusyRxTraffic = bHigherBusyRxTraffic; + ieee->link_detect_info.bHigherBusyTraffic = bHigherBusyTraffic; + ieee->link_detect_info.bHigherBusyRxTraffic = bHigherBusyRxTraffic; if (ieee->state == RTLLIB_LINKED && ieee->iw_mode == IW_MODE_INFRA) { u32 TotalRxBcnNum = 0; @@ -1383,8 +1383,8 @@ static void _rtl92e_watchdog_wq_cb(void *data) priv->check_roaming_cnt = 0; } - ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0; - ieee->LinkDetectInfo.NumRecvDataInPeriod = 0; + ieee->link_detect_info.NumRecvBcnInPeriod = 0; + ieee->link_detect_info.NumRecvDataInPeriod = 0; } spin_lock_irqsave(&priv->tx_lock, flags); @@ -2213,25 +2213,25 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev) if (inta & IMR_BKDOK) { priv->stats.txbkokint++; - priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++; + priv->rtllib->link_detect_info.NumTxOkInPeriod++; _rtl92e_tx_isr(dev, BK_QUEUE); } if (inta & IMR_BEDOK) { priv->stats.txbeokint++; - priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++; + priv->rtllib->link_detect_info.NumTxOkInPeriod++; _rtl92e_tx_isr(dev, BE_QUEUE); } if (inta & IMR_VIDOK) { priv->stats.txviokint++; - priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++; + priv->rtllib->link_detect_info.NumTxOkInPeriod++; _rtl92e_tx_isr(dev, VI_QUEUE); } if (inta & IMR_VODOK) { priv->stats.txvookint++; - priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++; + priv->rtllib->link_detect_info.NumTxOkInPeriod++; _rtl92e_tx_isr(dev, VO_QUEUE); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index b200d53c8e3d..4f16c3d079cc 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -393,7 +393,7 @@ static int _rtl92e_wx_set_scan(struct net_device *dev, rt_state = priv->rtllib->rf_power_state; if (!priv->up) return -ENETDOWN; - if (priv->rtllib->LinkDetectInfo.bBusyTraffic == true) + if (priv->rtllib->link_detect_info.bBusyTraffic == true) return -EAGAIN; if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 6b7b2c887c34..b5313c1ea32d 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1643,7 +1643,7 @@ struct rtllib_device { struct bandwidth_autoswitch bandwidth_auto_switch; bool FwRWRF; - struct rt_link_detect LinkDetectInfo; + struct rt_link_detect link_detect_info; bool bIsAggregateFrame; struct rt_pwr_save_ctrl pwr_save_ctrl; diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index 46d75e925ee9..f9a35fcb1d1b 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -1211,9 +1211,9 @@ static void rtllib_rx_check_leave_lps(struct rtllib_device *ieee, u8 unicast, if (unicast) { if (ieee->state == RTLLIB_LINKED) { - if (((ieee->LinkDetectInfo.NumRxUnicastOkInPeriod + - ieee->LinkDetectInfo.NumTxOkInPeriod) > 8) || - (ieee->LinkDetectInfo.NumRxUnicastOkInPeriod > 2)) { + if (((ieee->link_detect_info.NumRxUnicastOkInPeriod + + ieee->link_detect_info.NumTxOkInPeriod) > 8) || + (ieee->link_detect_info.NumRxUnicastOkInPeriod > 2)) { if (ieee->LeisurePSLeave) ieee->LeisurePSLeave(ieee->dev); } @@ -1355,8 +1355,8 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb, /* Update statstics for AP roaming */ if (!bToOtherSTA) { - ieee->LinkDetectInfo.NumRecvDataInPeriod++; - ieee->LinkDetectInfo.NumRxOkInPeriod++; + ieee->link_detect_info.NumRecvDataInPeriod++; + ieee->link_detect_info.NumRxOkInPeriod++; } /* Data frame - extract src/dst addresses */ @@ -1437,7 +1437,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb, else nr_subframes = 1; if (unicast) - ieee->LinkDetectInfo.NumRxUnicastOkInPeriod += nr_subframes; + ieee->link_detect_info.NumRxUnicastOkInPeriod += nr_subframes; rtllib_rx_check_leave_lps(ieee, unicast, nr_subframes); } @@ -2620,7 +2620,7 @@ static inline void rtllib_process_probe_response( } if (is_beacon(frame_ctl)) { if (ieee->state >= RTLLIB_LINKED) - ieee->LinkDetectInfo.NumRecvBcnInPeriod++; + ieee->link_detect_info.NumRecvBcnInPeriod++; } } list_for_each_entry(target, &ieee->network_list, list) { diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index ac8132d8c45a..1d583e73d753 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -1534,13 +1534,13 @@ static void rtllib_associate_complete_wq(void *data) ieee->pHTInfo->enable_ht); memset(ieee->dot11ht_oper_rate_set, 0, 16); } - ieee->LinkDetectInfo.SlotNum = 2 * (1 + + ieee->link_detect_info.SlotNum = 2 * (1 + ieee->current_network.beacon_interval / 500); - if (ieee->LinkDetectInfo.NumRecvBcnInPeriod == 0 || - ieee->LinkDetectInfo.NumRecvDataInPeriod == 0) { - ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1; - ieee->LinkDetectInfo.NumRecvDataInPeriod = 1; + if (ieee->link_detect_info.NumRecvBcnInPeriod == 0 || + ieee->link_detect_info.NumRecvDataInPeriod == 0) { + ieee->link_detect_info.NumRecvBcnInPeriod = 1; + ieee->link_detect_info.NumRecvDataInPeriod = 1; } psc->LpsIdleCount = 0; ieee->link_change(ieee->dev); @@ -2369,7 +2369,7 @@ rtllib_rx_deauth(struct rtllib_device *ieee, struct sk_buff *skb) ieee->state = RTLLIB_ASSOCIATING; ieee->softmac_stats.reassoc++; ieee->is_roaming = true; - ieee->LinkDetectInfo.bBusyTraffic = false; + ieee->link_detect_info.bBusyTraffic = false; rtllib_disassociate(ieee); RemovePeerTS(ieee, header->addr2); if (ieee->LedControlHandler != NULL) @@ -2963,13 +2963,13 @@ int rtllib_softmac_init(struct rtllib_device *ieee) if (!ieee->dot11d_info) return -ENOMEM; - ieee->LinkDetectInfo.SlotIndex = 0; - ieee->LinkDetectInfo.SlotNum = 2; - ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0; - ieee->LinkDetectInfo.NumRecvDataInPeriod = 0; - ieee->LinkDetectInfo.NumTxOkInPeriod = 0; - ieee->LinkDetectInfo.NumRxOkInPeriod = 0; - ieee->LinkDetectInfo.NumRxUnicastOkInPeriod = 0; + ieee->link_detect_info.SlotIndex = 0; + ieee->link_detect_info.SlotNum = 2; + ieee->link_detect_info.NumRecvBcnInPeriod = 0; + ieee->link_detect_info.NumRecvDataInPeriod = 0; + ieee->link_detect_info.NumTxOkInPeriod = 0; + ieee->link_detect_info.NumRxOkInPeriod = 0; + ieee->link_detect_info.NumRxUnicastOkInPeriod = 0; ieee->bIsAggregateFrame = false; ieee->assoc_id = 0; ieee->queue_stop = 0; diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index 7bac30b87c93..e02e7d9566b2 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -391,10 +391,10 @@ void rtllib_wx_sync_scan_wq(void *data) /* Notify AP that I wake up again */ rtllib_sta_ps_send_null_frame(ieee, 0); - if (ieee->LinkDetectInfo.NumRecvBcnInPeriod == 0 || - ieee->LinkDetectInfo.NumRecvDataInPeriod == 0) { - ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1; - ieee->LinkDetectInfo.NumRecvDataInPeriod = 1; + if (ieee->link_detect_info.NumRecvBcnInPeriod == 0 || + ieee->link_detect_info.NumRecvDataInPeriod == 0) { + ieee->link_detect_info.NumRecvBcnInPeriod = 1; + ieee->link_detect_info.NumRecvDataInPeriod = 1; } if (ieee->data_hard_resume) From 8e5d08ceb8db933bdb1f60772984ffc166d2eb55 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:35:55 +0100 Subject: [PATCH 1748/4122] staging: rtl8192e: Rename bNetPromisc.., IntelPromiscu.. and bPromiscu.. Rename variable bNetPromiscuousMode to net_promiscuous_md, IntelPromiscuousModeInfo to intel_promiscuous_md_info and bPromiscuousOn to promiscuous_on to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/69d9998a30ce2286c3ae6cb4510174e1255b3f9e.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 +- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 10 +++++----- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 20 +++++++++---------- drivers/staging/rtl8192e/rtllib.h | 6 +++--- drivers/staging/rtl8192e/rtllib_rx.c | 8 ++++---- drivers/staging/rtl8192e/rtllib_softmac.c | 4 ++-- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index d0ce89837073..2ccd1e0542c2 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -859,7 +859,7 @@ void rtl92e_link_change(struct net_device *dev) reg = rtl92e_readl(dev, RCR); if (priv->rtllib->state == RTLLIB_LINKED) { - if (ieee->IntelPromiscuousModeInfo.bPromiscuousOn) + if (ieee->intel_promiscuous_md_info.promiscuous_on) ; else priv->ReceiveConfig = reg |= RCR_CBSSID; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 3379b4229a85..94b8ed2e3489 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -848,9 +848,9 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->reg_chnl_plan = 0xf; priv->rtllib->mode = WIRELESS_MODE_AUTO; priv->rtllib->iw_mode = IW_MODE_INFRA; - priv->rtllib->bNetPromiscuousMode = false; - priv->rtllib->IntelPromiscuousModeInfo.bPromiscuousOn = false; - priv->rtllib->IntelPromiscuousModeInfo.bFilterSourceStationFrame = + priv->rtllib->net_promiscuous_md = false; + priv->rtllib->intel_promiscuous_md_info.promiscuous_on = false; + priv->rtllib->intel_promiscuous_md_info.bFilterSourceStationFrame = false; priv->rtllib->ieee_up = 0; priv->retry_rts = DEFAULT_RETRY_RTS; @@ -1297,13 +1297,13 @@ static void _rtl92e_watchdog_wq_cb(void *data) (!ieee->proto_stoppping) && !ieee->wx_set_enc) { if ((ieee->pwr_save_ctrl.ReturnPoint == IPS_CALLBACK_NONE) && - (!ieee->bNetPromiscuousMode)) { + (!ieee->net_promiscuous_md)) { rtl92e_ips_enter(dev); } } } if ((ieee->state == RTLLIB_LINKED) && (ieee->iw_mode == - IW_MODE_INFRA) && (!ieee->bNetPromiscuousMode)) { + IW_MODE_INFRA) && (!ieee->net_promiscuous_md)) { if (ieee->link_detect_info.NumRxOkInPeriod > 100 || ieee->link_detect_info.NumTxOkInPeriod > 100) bBusyTraffic = true; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 4f16c3d079cc..c31e5b572e9e 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -251,7 +251,7 @@ static int _rtl92e_wx_set_mode(struct net_device *dev, rt_state = priv->rtllib->rf_power_state; mutex_lock(&priv->wx_mutex); if (wrqu->mode == IW_MODE_ADHOC || wrqu->mode == IW_MODE_MONITOR || - ieee->bNetPromiscuousMode) { + ieee->net_promiscuous_md) { if (rt_state == rf_off) { if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) { @@ -1014,28 +1014,28 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev, u32 info_buf[3]; u32 oid; - u32 bPromiscuousOn; + u32 promiscuous_on; u32 bFilterSourceStationFrame; if (copy_from_user(info_buf, wrqu->data.pointer, sizeof(info_buf))) return -EFAULT; oid = info_buf[0]; - bPromiscuousOn = info_buf[1]; + promiscuous_on = info_buf[1]; bFilterSourceStationFrame = info_buf[2]; if (oid == OID_RT_INTEL_PROMISCUOUS_MODE) { - ieee->IntelPromiscuousModeInfo.bPromiscuousOn = - (bPromiscuousOn) ? (true) : (false); - ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame = + ieee->intel_promiscuous_md_info.promiscuous_on = + (promiscuous_on) ? (true) : (false); + ieee->intel_promiscuous_md_info.bFilterSourceStationFrame = (bFilterSourceStationFrame) ? (true) : (false); - (bPromiscuousOn) ? + (promiscuous_on) ? (rtllib_EnableIntelPromiscuousMode(dev, false)) : (rtllib_DisableIntelPromiscuousMode(dev, false)); netdev_info(dev, "=======>%s(), on = %d, filter src sta = %d\n", - __func__, bPromiscuousOn, + __func__, promiscuous_on, bFilterSourceStationFrame); } else { return -1; @@ -1054,8 +1054,8 @@ static int _rtl92e_wx_get_promisc_mode(struct net_device *dev, mutex_lock(&priv->wx_mutex); snprintf(extra, 45, "PromiscuousMode:%d, FilterSrcSTAFrame:%d", - ieee->IntelPromiscuousModeInfo.bPromiscuousOn, - ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame); + ieee->intel_promiscuous_md_info.promiscuous_on, + ieee->intel_promiscuous_md_info.bFilterSourceStationFrame); wrqu->data.length = strlen(extra) + 1; mutex_unlock(&priv->wx_mutex); diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index b5313c1ea32d..493759cc6ccf 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1384,7 +1384,7 @@ struct rt_pmkid_list { }; struct rt_intel_promisc_mode { - bool bPromiscuousOn; + bool promiscuous_on; bool bFilterSourceStationFrame; }; @@ -1470,8 +1470,8 @@ struct rtllib_device { int scan_age; int iw_mode; /* operating mode (IW_MODE_*) */ - bool bNetPromiscuousMode; - struct rt_intel_promisc_mode IntelPromiscuousModeInfo; + bool net_promiscuous_md; + struct rt_intel_promisc_mode intel_promiscuous_md_info; spinlock_t lock; spinlock_t wpax_suitlist_lock; diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index f9a35fcb1d1b..ccb61d8decd3 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -999,8 +999,8 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc, } /* Filter packets sent by an STA that will be forwarded by AP */ - if (ieee->IntelPromiscuousModeInfo.bPromiscuousOn && - ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame) { + if (ieee->intel_promiscuous_md_info.promiscuous_on && + ieee->intel_promiscuous_md_info.bFilterSourceStationFrame) { if ((fc & RTLLIB_FCTL_TODS) && !(fc & RTLLIB_FCTL_FROMDS) && !ether_addr_equal(dst, ieee->current_network.bssid) && ether_addr_equal(bssid, ieee->current_network.bssid)) { @@ -1011,7 +1011,7 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc, /* Nullfunc frames may have PS-bit set, so they must be passed to * hostap_handle_sta_rx() before being dropped here. */ - if (!ieee->IntelPromiscuousModeInfo.bPromiscuousOn) { + if (!ieee->intel_promiscuous_md_info.promiscuous_on) { if (stype != RTLLIB_STYPE_DATA && stype != RTLLIB_STYPE_DATA_CFACK && stype != RTLLIB_STYPE_DATA_CFPOLL && @@ -1317,7 +1317,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb, multicast = is_multicast_ether_addr(hdr->addr1); unicast = !multicast; if (unicast && !ether_addr_equal(dev->dev_addr, hdr->addr1)) { - if (ieee->bNetPromiscuousMode) + if (ieee->net_promiscuous_md) bToOtherSTA = true; else goto rx_dropped; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 1d583e73d753..8b1d88061125 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -445,7 +445,7 @@ void rtllib_EnableIntelPromiscuousMode(struct net_device *dev, ieee->SetHwRegHandler(dev, HW_VAR_CECHK_BSSID, (u8 *)&bFilterOutNonAssociatedBSSID); - ieee->bNetPromiscuousMode = true; + ieee->net_promiscuous_md = true; } EXPORT_SYMBOL(rtllib_EnableIntelPromiscuousMode); @@ -466,7 +466,7 @@ void rtllib_DisableIntelPromiscuousMode(struct net_device *dev, ieee->SetHwRegHandler(dev, HW_VAR_CECHK_BSSID, (u8 *)&bFilterOutNonAssociatedBSSID); - ieee->bNetPromiscuousMode = false; + ieee->net_promiscuous_md = false; } EXPORT_SYMBOL(rtllib_DisableIntelPromiscuousMode); From 62310f889a73ea2089f31209a426307887435e86 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:36:02 +0100 Subject: [PATCH 1749/4122] staging: rtl8192e: Rename bFilterSour.., CCKPresentAt.. and ResetProg.. Rename variable bFilterSourceStationFrame to fltr_src_sta_frame, CCKPresentAttentuation to cck_present_attn and ResetProgress to rst_progress to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/25ab52350a4a3249a1f76b28eea10c44e2f9552d.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_dev.c | 12 +++++------ .../staging/rtl8192e/rtl8192e/r8192E_phy.c | 20 +++++++++---------- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 14 ++++++------- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 4 ++-- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 20 +++++++++---------- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 12 +++++------ drivers/staging/rtl8192e/rtllib.h | 2 +- drivers/staging/rtl8192e/rtllib_rx.c | 2 +- 8 files changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 2ccd1e0542c2..0eeb9b2daefc 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -617,7 +617,7 @@ bool rtl92e_start_adapter(struct net_device *dev) start: rtl92e_reset_desc_ring(dev); priv->Rf_Mode = RF_OP_By_SW_3wire; - if (priv->ResetProgress == RESET_TYPE_NORESET) { + if (priv->rst_progress == RESET_TYPE_NORESET) { rtl92e_writeb(dev, ANAPAR, 0x37); mdelay(500); } @@ -650,7 +650,7 @@ start: } priv->LoopbackMode = RTL819X_NO_LOOPBACK; - if (priv->ResetProgress == RESET_TYPE_NORESET) { + if (priv->rst_progress == RESET_TYPE_NORESET) { ulRegRead = rtl92e_readl(dev, CPU_GEN); if (priv->LoopbackMode == RTL819X_NO_LOOPBACK) ulRegRead = (ulRegRead & CPU_GEN_NO_LOOPBACK_MSK) | @@ -699,7 +699,7 @@ start: rtl92e_writeb(dev, ACK_TIMEOUT, 0x30); - if (priv->ResetProgress == RESET_TYPE_NORESET) + if (priv->rst_progress == RESET_TYPE_NORESET) rtl92e_set_wireless_mode(dev, priv->rtllib->mode); rtl92e_cam_reset(dev); { @@ -739,7 +739,7 @@ start: } } - if (priv->ResetProgress == RESET_TYPE_NORESET) { + if (priv->rst_progress == RESET_TYPE_NORESET) { rtStatus = rtl92e_config_phy(dev); if (!rtStatus) { netdev_info(dev, "RF Config failed\n"); @@ -766,7 +766,7 @@ start: else priv->Rf_Mode = RF_OP_By_SW_3wire; - if (priv->ResetProgress == RESET_TYPE_NORESET) { + if (priv->rst_progress == RESET_TYPE_NORESET) { rtl92e_dm_init_txpower_tracking(dev); if (priv->IC_Cut >= IC_VersionCut_D) { @@ -795,7 +795,7 @@ start: } priv->CCKPresentAttentuation_40Mdefault = 0; priv->CCKPresentAttentuation_difference = 0; - priv->CCKPresentAttentuation = + priv->cck_present_attn = priv->CCKPresentAttentuation_20Mdefault; priv->btxpower_tracking = false; } diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index c357adf95a3d..ac3c7f047ca2 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -1008,16 +1008,16 @@ static void _rtl92e_cck_tx_power_track_bw_switch_tssi(struct net_device *dev) switch (priv->CurrentChannelBW) { case HT_CHANNEL_WIDTH_20: - priv->CCKPresentAttentuation = + priv->cck_present_attn = priv->CCKPresentAttentuation_20Mdefault + priv->CCKPresentAttentuation_difference; - if (priv->CCKPresentAttentuation > + if (priv->cck_present_attn > (CCKTxBBGainTableLength-1)) - priv->CCKPresentAttentuation = + priv->cck_present_attn = CCKTxBBGainTableLength-1; - if (priv->CCKPresentAttentuation < 0) - priv->CCKPresentAttentuation = 0; + if (priv->cck_present_attn < 0) + priv->cck_present_attn = 0; if (priv->rtllib->current_network.channel == 14 && !priv->bcck_in_ch14) { @@ -1033,16 +1033,16 @@ static void _rtl92e_cck_tx_power_track_bw_switch_tssi(struct net_device *dev) break; case HT_CHANNEL_WIDTH_20_40: - priv->CCKPresentAttentuation = + priv->cck_present_attn = priv->CCKPresentAttentuation_40Mdefault + priv->CCKPresentAttentuation_difference; - if (priv->CCKPresentAttentuation > + if (priv->cck_present_attn > (CCKTxBBGainTableLength - 1)) - priv->CCKPresentAttentuation = + priv->cck_present_attn = CCKTxBBGainTableLength-1; - if (priv->CCKPresentAttentuation < 0) - priv->CCKPresentAttentuation = 0; + if (priv->cck_present_attn < 0) + priv->cck_present_attn = 0; if (priv->rtllib->current_network.channel == 14 && !priv->bcck_in_ch14) { diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 94b8ed2e3489..1c4985dcc5eb 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -850,7 +850,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rtllib->iw_mode = IW_MODE_INFRA; priv->rtllib->net_promiscuous_md = false; priv->rtllib->intel_promiscuous_md_info.promiscuous_on = false; - priv->rtllib->intel_promiscuous_md_info.bFilterSourceStationFrame = + priv->rtllib->intel_promiscuous_md_info.fltr_src_sta_frame = false; priv->rtllib->ieee_up = 0; priv->retry_rts = DEFAULT_RETRY_RTS; @@ -861,11 +861,11 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->promisc = (dev->flags & IFF_PROMISC) ? 1 : 0; priv->bcck_in_ch14 = false; priv->bfsync_processing = false; - priv->CCKPresentAttentuation = 0; + priv->cck_present_attn = 0; priv->rfa_txpowertrackingindex = 0; priv->rfc_txpowertrackingindex = 0; priv->CckPwEnl = 6; - priv->ResetProgress = RESET_TYPE_NORESET; + priv->rst_progress = RESET_TYPE_NORESET; priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); @@ -1135,8 +1135,8 @@ static void _rtl92e_if_silent_reset(struct net_device *dev) struct rtllib_device *ieee = priv->rtllib; unsigned long flag; - if (priv->ResetProgress == RESET_TYPE_NORESET) { - priv->ResetProgress = RESET_TYPE_SILENT; + if (priv->rst_progress == RESET_TYPE_NORESET) { + priv->rst_progress = RESET_TYPE_SILENT; spin_lock_irqsave(&priv->rf_ps_lock, flag); if (priv->rf_change_in_progress) { @@ -1233,7 +1233,7 @@ RESET_START: rtl92e_cam_restore(dev); rtl92e_dm_restore_state(dev); END: - priv->ResetProgress = RESET_TYPE_NORESET; + priv->rst_progress = RESET_TYPE_NORESET; priv->reset_count++; priv->bResetInProgress = false; @@ -1396,7 +1396,7 @@ static void _rtl92e_watchdog_wq_cb(void *data) spin_unlock_irqrestore(&priv->tx_lock, flags); if (ResetType == RESET_TYPE_NORMAL) { - priv->ResetProgress = RESET_TYPE_NORMAL; + priv->rst_progress = RESET_TYPE_NORMAL; return; } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 98c750730f87..dfbc9fbcc129 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -483,7 +483,7 @@ struct r8192_priv { u8 CCKPresentAttentuation_20Mdefault; u8 CCKPresentAttentuation_40Mdefault; s8 CCKPresentAttentuation_difference; - s8 CCKPresentAttentuation; + s8 cck_present_attn; long undecorated_smoothed_pwdb; u32 MCSTxPowerLevelOriginalOffset[6]; @@ -536,7 +536,7 @@ struct r8192_priv { u32 reset_count; - enum reset_type ResetProgress; + enum reset_type rst_progress; u16 TxCounter; u16 RxCounter; bool bResetInProgress; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index 767c746fc73d..234e85a25d45 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -267,7 +267,7 @@ static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev) "PATH=/usr/bin:/bin", NULL}; - if (priv->ResetProgress == RESET_TYPE_SILENT) + if (priv->rst_progress == RESET_TYPE_SILENT) return; if (priv->rtllib->state != RTLLIB_LINKED) return; @@ -716,21 +716,21 @@ static void _rtl92e_dm_tx_power_tracking_callback_tssi(struct net_device *dev) } if (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20) - priv->CCKPresentAttentuation = + priv->cck_present_attn = priv->CCKPresentAttentuation_20Mdefault + priv->CCKPresentAttentuation_difference; else - priv->CCKPresentAttentuation = + priv->cck_present_attn = priv->CCKPresentAttentuation_40Mdefault + priv->CCKPresentAttentuation_difference; - if (priv->CCKPresentAttentuation > (CCKTxBBGainTableLength-1)) - priv->CCKPresentAttentuation = CCKTxBBGainTableLength-1; - if (priv->CCKPresentAttentuation < 0) - priv->CCKPresentAttentuation = 0; + if (priv->cck_present_attn > (CCKTxBBGainTableLength-1)) + priv->cck_present_attn = CCKTxBBGainTableLength-1; + if (priv->cck_present_attn < 0) + priv->cck_present_attn = 0; - if (priv->CCKPresentAttentuation > -1 && - priv->CCKPresentAttentuation < CCKTxBBGainTableLength) { + if (priv->cck_present_attn > -1 && + priv->cck_present_attn < CCKTxBBGainTableLength) { if (priv->rtllib->current_network.channel == 14 && !priv->bcck_in_ch14) { priv->bcck_in_ch14 = true; @@ -963,7 +963,7 @@ static void _rtl92e_dm_cck_tx_power_adjust_tssi(struct net_device *dev, { u32 TempVal; struct r8192_priv *priv = rtllib_priv(dev); - u8 attenuation = priv->CCKPresentAttentuation; + u8 attenuation = priv->cck_present_attn; TempVal = 0; if (!bInCH14) { diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index c31e5b572e9e..7ff14aa9f476 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -1015,20 +1015,20 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev, u32 oid; u32 promiscuous_on; - u32 bFilterSourceStationFrame; + u32 fltr_src_sta_frame; if (copy_from_user(info_buf, wrqu->data.pointer, sizeof(info_buf))) return -EFAULT; oid = info_buf[0]; promiscuous_on = info_buf[1]; - bFilterSourceStationFrame = info_buf[2]; + fltr_src_sta_frame = info_buf[2]; if (oid == OID_RT_INTEL_PROMISCUOUS_MODE) { ieee->intel_promiscuous_md_info.promiscuous_on = (promiscuous_on) ? (true) : (false); - ieee->intel_promiscuous_md_info.bFilterSourceStationFrame = - (bFilterSourceStationFrame) ? (true) : (false); + ieee->intel_promiscuous_md_info.fltr_src_sta_frame = + (fltr_src_sta_frame) ? (true) : (false); (promiscuous_on) ? (rtllib_EnableIntelPromiscuousMode(dev, false)) : (rtllib_DisableIntelPromiscuousMode(dev, false)); @@ -1036,7 +1036,7 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev, netdev_info(dev, "=======>%s(), on = %d, filter src sta = %d\n", __func__, promiscuous_on, - bFilterSourceStationFrame); + fltr_src_sta_frame); } else { return -1; } @@ -1055,7 +1055,7 @@ static int _rtl92e_wx_get_promisc_mode(struct net_device *dev, snprintf(extra, 45, "PromiscuousMode:%d, FilterSrcSTAFrame:%d", ieee->intel_promiscuous_md_info.promiscuous_on, - ieee->intel_promiscuous_md_info.bFilterSourceStationFrame); + ieee->intel_promiscuous_md_info.fltr_src_sta_frame); wrqu->data.length = strlen(extra) + 1; mutex_unlock(&priv->wx_mutex); diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 493759cc6ccf..9c81ca38f4b1 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1385,7 +1385,7 @@ struct rt_pmkid_list { struct rt_intel_promisc_mode { bool promiscuous_on; - bool bFilterSourceStationFrame; + bool fltr_src_sta_frame; }; diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index ccb61d8decd3..75190c389ccf 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -1000,7 +1000,7 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc, /* Filter packets sent by an STA that will be forwarded by AP */ if (ieee->intel_promiscuous_md_info.promiscuous_on && - ieee->intel_promiscuous_md_info.bFilterSourceStationFrame) { + ieee->intel_promiscuous_md_info.fltr_src_sta_frame) { if ((fc & RTLLIB_FCTL_TODS) && !(fc & RTLLIB_FCTL_FROMDS) && !ether_addr_equal(dst, ieee->current_network.bssid) && ether_addr_equal(bssid, ieee->current_network.bssid)) { From 41fa3d42a3210f09771a10d4e30896c6e3062a82 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:36:09 +0100 Subject: [PATCH 1750/4122] staging: rtl8192e: Rename InterruptLog, RxCounter and bHwRfOffAction Rename variable InterruptLog to int_log, RxCounter to rx_ctr and bHwRfOffAction to hw_rf_off_action to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/82ea07ddd894ac9b863ce90ddb9ba78065bd1f4e.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 6 +++--- drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 4 ++-- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 8 ++++---- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 6 +++--- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 0eeb9b2daefc..e530f917fd23 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1928,7 +1928,7 @@ void rtl92e_stop_adapter(struct net_device *dev, bool reset) if (!reset) { mdelay(150); - priv->bHwRfOffAction = 2; + priv->hw_rf_off_action = 2; if (!priv->rtllib->bSupportRemoteWakeUp) { rtl92e_set_rf_off(dev); @@ -2129,7 +2129,7 @@ bool rtl92e_is_rx_stuck(struct net_device *dev) SlotIndex = (priv->SilentResetRxSlotIndex++)%SilentResetRxSoltNum; - if (priv->RxCounter == RegRxCounter) { + if (priv->rx_ctr == RegRxCounter) { priv->SilentResetRxStuckEvent[SlotIndex] = 1; for (i = 0; i < SilentResetRxSoltNum; i++) @@ -2147,7 +2147,7 @@ bool rtl92e_is_rx_stuck(struct net_device *dev) priv->SilentResetRxStuckEvent[SlotIndex] = 0; } - priv->RxCounter = RegRxCounter; + priv->rx_ctr = RegRxCounter; return bStuck; } diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c index ac3c7f047ca2..a813eded4cb3 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c @@ -1342,7 +1342,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev, mdelay(1); rtl92e_set_bb_reg(dev, rFPGA0_AnalogParameter1, 0x4, 0x1); - priv->bHwRfOffAction = 0; + priv->hw_rf_off_action = 0; rtl92e_set_bb_reg(dev, rFPGA0_XA_RFInterfaceOE, BIT4, 0x1); @@ -1450,7 +1450,7 @@ bool rtl92e_set_rf_power_state(struct net_device *dev, bool bResult = false; if (rf_power_state == priv->rtllib->rf_power_state && - priv->bHwRfOffAction == 0) { + priv->hw_rf_off_action == 0) { return bResult; } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 1c4985dcc5eb..43601ec8d903 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -869,13 +869,13 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); - memset(&priv->InterruptLog, 0, sizeof(struct log_int_8190)); - priv->RxCounter = 0; + memset(&priv->int_log, 0, sizeof(struct log_int_8190)); + priv->rx_ctr = 0; priv->rtllib->wx_set_enc = 0; priv->hw_radio_off = false; priv->rtllib->rf_off_reason = 0; priv->rf_change_in_progress = false; - priv->bHwRfOffAction = 0; + priv->hw_rf_off_action = 0; priv->SetRFPowerStateInProgress = false; priv->rtllib->pwr_save_ctrl.bLeisurePs = true; priv->rtllib->LPSDelayCnt = 0; @@ -2189,7 +2189,7 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev) if (inta & IMR_ROK) { priv->stats.rxint++; - priv->InterruptLog.nIMR_ROK++; + priv->int_log.nIMR_ROK++; tasklet_schedule(&priv->irq_rx_tasklet); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index dfbc9fbcc129..b1656d4ecbad 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -330,7 +330,7 @@ struct r8192_priv { struct work_struct reset_wq; - struct log_int_8190 InterruptLog; + struct log_int_8190 int_log; enum rt_customer_id CustomerID; @@ -469,7 +469,7 @@ struct r8192_priv { u16 reg_chnl_plan; u16 ChannelPlan; - u8 bHwRfOffAction; + u8 hw_rf_off_action; bool rf_change_in_progress; bool SetRFPowerStateInProgress; @@ -538,7 +538,7 @@ struct r8192_priv { enum reset_type rst_progress; u16 TxCounter; - u16 RxCounter; + u16 rx_ctr; bool bResetInProgress; bool force_reset; bool force_lps; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index 234e85a25d45..e0f6f1405c17 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -1693,7 +1693,7 @@ static void _rtl92e_dm_check_rf_ctrl_gpio(void *data) if (bActuallySet) { mdelay(1000); - priv->bHwRfOffAction = 1; + priv->hw_rf_off_action = 1; rtl92e_set_rf_state(dev, rf_power_state_to_set, RF_CHANGE_BY_HW); if (priv->hw_radio_off) argv[1] = "RFOFF"; From ccdbe14b77a5e39496baf632e157f9daf322dd27 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Sun, 13 Nov 2022 05:36:15 +0100 Subject: [PATCH 1751/4122] staging: rtl8192e: Rename pHTInfo Rename variable pHTInfo to ht_info to avoid CamelCase which is not accepted by checkpatch. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/88cdc0ef393c92cb2102a66893c5320e8c8606df.1668313325.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8192e/rtl8192e/r8192E_dev.c | 10 +- drivers/staging/rtl8192e/rtl8192e/rtl_cam.c | 2 +- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 12 +- drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 50 +-- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 2 +- drivers/staging/rtl8192e/rtl819x_BAProc.c | 20 +- drivers/staging/rtl8192e/rtl819x_HTProc.c | 354 +++++++++--------- drivers/staging/rtl8192e/rtl819x_TSProc.c | 2 +- drivers/staging/rtl8192e/rtllib.h | 4 +- drivers/staging/rtl8192e/rtllib_module.c | 6 +- drivers/staging/rtl8192e/rtllib_rx.c | 12 +- drivers/staging/rtl8192e/rtllib_softmac.c | 78 ++-- drivers/staging/rtl8192e/rtllib_softmac_wx.c | 8 +- drivers/staging/rtl8192e/rtllib_tx.c | 54 +-- 14 files changed, 307 insertions(+), 307 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index e530f917fd23..b9c846015d28 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1978,7 +1978,7 @@ void rtl92e_update_ratr_table(struct net_device *dev) break; case IEEE_N_24G: case IEEE_N_5G: - if (ieee->pHTInfo->peer_mimo_ps == 0) { + if (ieee->ht_info->peer_mimo_ps == 0) { ratr_value &= 0x0007F007; } else { if (priv->rf_type == RF_1T2R) @@ -1991,11 +1991,11 @@ void rtl92e_update_ratr_table(struct net_device *dev) break; } ratr_value &= 0x0FFFFFFF; - if (ieee->pHTInfo->cur_tx_bw40mhz && - ieee->pHTInfo->bCurShortGI40MHz) + if (ieee->ht_info->cur_tx_bw40mhz && + ieee->ht_info->bCurShortGI40MHz) ratr_value |= 0x80000000; - else if (!ieee->pHTInfo->cur_tx_bw40mhz && - ieee->pHTInfo->bCurShortGI20MHz) + else if (!ieee->ht_info->cur_tx_bw40mhz && + ieee->ht_info->bCurShortGI20MHz) ratr_value |= 0x80000000; rtl92e_writel(dev, RATR0+rate_index*4, ratr_value); rtl92e_writeb(dev, UFWP, 1); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c index 9d8d4837e6b2..a4d65b4d99c2 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c @@ -41,7 +41,7 @@ void rtl92e_enable_hw_security_config(struct net_device *dev) } ieee->hwsec_active = 1; - if ((ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) { + if ((ieee->ht_info->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) { ieee->hwsec_active = 0; SECR_value &= ~SCR_RxDecEnable; } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 43601ec8d903..399ee9783f99 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -307,7 +307,7 @@ static void _rtl92e_update_cap(struct net_device *dev, u16 cap) u8 cur_slot_time = priv->slot_time; if ((cap & WLAN_CAPABILITY_SHORT_SLOT_TIME) && - (!priv->rtllib->pHTInfo->current_rt2rt_long_slot_time)) { + (!priv->rtllib->ht_info->current_rt2rt_long_slot_time)) { if (cur_slot_time != SHORT_SLOT_TIME) { slot_time_val = SHORT_SLOT_TIME; priv->rtllib->SetHwRegHandler(dev, @@ -339,10 +339,10 @@ static void _rtl92e_update_beacon(void *data) struct rtllib_device *ieee = priv->rtllib; struct rtllib_network *net = &ieee->current_network; - if (ieee->pHTInfo->bCurrentHTSupport) + if (ieee->ht_info->bCurrentHTSupport) HT_update_self_and_peer_setting(ieee, net); - ieee->pHTInfo->current_rt2rt_long_slot_time = net->bssht.bd_rt2rt_long_slot_time; - ieee->pHTInfo->RT2RT_HT_Mode = net->bssht.rt2rt_ht_mode; + ieee->ht_info->current_rt2rt_long_slot_time = net->bssht.bd_rt2rt_long_slot_time; + ieee->ht_info->RT2RT_HT_Mode = net->bssht.rt2rt_ht_mode; _rtl92e_update_cap(dev, net->capability); } @@ -672,9 +672,9 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode) if ((wireless_mode == WIRELESS_MODE_N_24G) || (wireless_mode == WIRELESS_MODE_N_5G)) { - priv->rtllib->pHTInfo->enable_ht = 1; + priv->rtllib->ht_info->enable_ht = 1; } else { - priv->rtllib->pHTInfo->enable_ht = 0; + priv->rtllib->ht_info->enable_ht = 0; } _rtl92e_refresh_support_rate(priv); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c index e0f6f1405c17..a18393c8a833 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c @@ -321,7 +321,7 @@ void rtl92e_init_adaptive_rate(struct net_device *dev) static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo; + struct rt_hi_throughput *ht_info = priv->rtllib->ht_info; struct rate_adaptive *pra = &priv->rate_adaptive; u32 currentRATR, targetRATR = 0; u32 LowRSSIThreshForRA = 0, HighRSSIThreshForRA = 0; @@ -340,10 +340,10 @@ static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev) if (priv->rtllib->state == RTLLIB_LINKED) { - bshort_gi_enabled = (pHTInfo->cur_tx_bw40mhz && - pHTInfo->bCurShortGI40MHz) || - (!pHTInfo->cur_tx_bw40mhz && - pHTInfo->bCurShortGI20MHz); + bshort_gi_enabled = (ht_info->cur_tx_bw40mhz && + ht_info->bCurShortGI40MHz) || + (!ht_info->cur_tx_bw40mhz && + ht_info->bCurShortGI20MHz); pra->upper_rssi_threshold_ratr = (pra->upper_rssi_threshold_ratr & (~BIT31)) | @@ -1532,7 +1532,7 @@ void rtl92e_dm_init_edca_turbo(struct net_device *dev) static void _rtl92e_dm_check_edca_turbo(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv(dev); - struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo; + struct rt_hi_throughput *ht_info = priv->rtllib->ht_info; static unsigned long lastTxOkCnt; static unsigned long lastRxOkCnt; @@ -1543,18 +1543,18 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev) goto dm_CheckEdcaTurbo_EXIT; if (priv->rtllib->state != RTLLIB_LINKED) goto dm_CheckEdcaTurbo_EXIT; - if (priv->rtllib->pHTInfo->iot_action & HT_IOT_ACT_DISABLE_EDCA_TURBO) + if (priv->rtllib->ht_info->iot_action & HT_IOT_ACT_DISABLE_EDCA_TURBO) goto dm_CheckEdcaTurbo_EXIT; if (!priv->rtllib->bis_any_nonbepkts) { curTxOkCnt = priv->stats.txbytesunicast - lastTxOkCnt; curRxOkCnt = priv->stats.rxbytesunicast - lastRxOkCnt; - if (pHTInfo->iot_action & HT_IOT_ACT_EDCA_BIAS_ON_RX) { + if (ht_info->iot_action & HT_IOT_ACT_EDCA_BIAS_ON_RX) { if (curTxOkCnt > 4*curRxOkCnt) { if (priv->bis_cur_rdlstate || !priv->bcurrent_turbo_EDCA) { rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_UL[pHTInfo->IOTPeer]); + edca_setting_UL[ht_info->IOTPeer]); priv->bis_cur_rdlstate = false; } } else { @@ -1562,10 +1562,10 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev) !priv->bcurrent_turbo_EDCA) { if (priv->rtllib->mode == WIRELESS_MODE_G) rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_DL_GMode[pHTInfo->IOTPeer]); + edca_setting_DL_GMode[ht_info->IOTPeer]); else rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_DL[pHTInfo->IOTPeer]); + edca_setting_DL[ht_info->IOTPeer]); priv->bis_cur_rdlstate = true; } } @@ -1576,17 +1576,17 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev) !priv->bcurrent_turbo_EDCA) { if (priv->rtllib->mode == WIRELESS_MODE_G) rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_DL_GMode[pHTInfo->IOTPeer]); + edca_setting_DL_GMode[ht_info->IOTPeer]); else rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_DL[pHTInfo->IOTPeer]); + edca_setting_DL[ht_info->IOTPeer]); priv->bis_cur_rdlstate = true; } } else { if (priv->bis_cur_rdlstate || !priv->bcurrent_turbo_EDCA) { rtl92e_writel(dev, EDCAPARA_BE, - edca_setting_UL[pHTInfo->IOTPeer]); + edca_setting_UL[ht_info->IOTPeer]); priv->bis_cur_rdlstate = false; } @@ -1621,23 +1621,23 @@ static void _rtl92e_dm_init_cts_to_self(struct net_device *dev) static void _rtl92e_dm_cts_to_self(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv((struct net_device *)dev); - struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo; + struct rt_hi_throughput *ht_info = priv->rtllib->ht_info; static unsigned long lastTxOkCnt; static unsigned long lastRxOkCnt; unsigned long curTxOkCnt = 0; unsigned long curRxOkCnt = 0; if (!priv->rtllib->bCTSToSelfEnable) { - pHTInfo->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF; + ht_info->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF; return; } - if (pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) { + if (ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) { curTxOkCnt = priv->stats.txbytesunicast - lastTxOkCnt; curRxOkCnt = priv->stats.rxbytesunicast - lastRxOkCnt; if (curRxOkCnt > 4*curTxOkCnt) - pHTInfo->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF; + ht_info->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF; else - pHTInfo->iot_action |= HT_IOT_ACT_FORCED_CTS2SELF; + ht_info->iot_action |= HT_IOT_ACT_FORCED_CTS2SELF; lastTxOkCnt = priv->stats.txbytesunicast; lastRxOkCnt = priv->stats.rxbytesunicast; @@ -1648,10 +1648,10 @@ static void _rtl92e_dm_cts_to_self(struct net_device *dev) static void _rtl92e_dm_init_wa_broadcom_iot(struct net_device *dev) { struct r8192_priv *priv = rtllib_priv((struct net_device *)dev); - struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo; + struct rt_hi_throughput *ht_info = priv->rtllib->ht_info; - pHTInfo->bWAIotBroadcom = false; - pHTInfo->WAIotTH = WAIotTHVal; + ht_info->bWAIotBroadcom = false; + ht_info->WAIotTH = WAIotTHVal; } static void _rtl92e_dm_check_rf_ctrl_gpio(void *data) @@ -1992,7 +1992,7 @@ static void _rtl92e_dm_fsync_timer_callback(struct timer_list *t) if (priv->rtllib->state == RTLLIB_LINKED && priv->rtllib->bfsync_enable && - (priv->rtllib->pHTInfo->iot_action & HT_IOT_ACT_CDD_FSYNC)) { + (priv->rtllib->ht_info->iot_action & HT_IOT_ACT_CDD_FSYNC)) { u32 rate_bitmap; for (rate_index = 0; rate_index <= 27; rate_index++) { @@ -2163,7 +2163,7 @@ static void _rtl92e_dm_check_fsync(struct net_device *dev) static u32 reset_cnt; if (priv->rtllib->state == RTLLIB_LINKED && - priv->rtllib->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) { + priv->rtllib->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) { if (priv->rtllib->bfsync_enable == 0) { switch (priv->rtllib->fsync_state) { case Default_Fsync: @@ -2288,7 +2288,7 @@ static void _rtl92e_dm_dynamic_tx_power(struct net_device *dev) priv->bDynamicTxLowPower = false; return; } - if ((priv->rtllib->pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) && + if ((priv->rtllib->ht_info->IOTPeer == HT_IOT_PEER_ATHEROS) && (priv->rtllib->mode == IEEE_G)) { txhipower_threshold = TX_POWER_ATHEROAP_THRESH_HIGH; txlowpower_threshold = TX_POWER_ATHEROAP_THRESH_LOW; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 7ff14aa9f476..bf0030144e5d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -915,7 +915,7 @@ static int _rtl92e_wx_set_encode_ext(struct net_device *dev, key, 0); } else { if ((ieee->pairwise_key_type == KEY_TYPE_CCMP) && - ieee->pHTInfo->bCurrentHTSupport) + ieee->ht_info->bCurrentHTSupport) rtl92e_writeb(dev, 0x173, 1); rtl92e_set_key(dev, 4, idx, alg, (u8 *)ieee->ap_mac_addr, 0, key); diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c index 7c0369319f97..acc19514bca6 100644 --- a/drivers/staging/rtl8192e/rtl819x_BAProc.c +++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c @@ -244,13 +244,13 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb) pBaStartSeqCtrl = (union sequence_control *)(req + 7); if (!ieee->current_network.qos_data.active || - !ieee->pHTInfo->bCurrentHTSupport || - (ieee->pHTInfo->iot_action & HT_IOT_ACT_REJECT_ADDBA_REQ)) { + !ieee->ht_info->bCurrentHTSupport || + (ieee->ht_info->iot_action & HT_IOT_ACT_REJECT_ADDBA_REQ)) { rc = ADDBA_STATUS_REFUSED; netdev_warn(ieee->dev, "Failed to reply on ADDBA_REQ as some capability is not ready(%d, %d)\n", ieee->current_network.qos_data.active, - ieee->pHTInfo->bCurrentHTSupport); + ieee->ht_info->bCurrentHTSupport); goto OnADDBAReq_Fail; } if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst, @@ -277,7 +277,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb) pBA->ba_start_seq_ctrl = *pBaStartSeqCtrl; if (ieee->GetHalfNmodeSupportByAPsHandler(ieee->dev) || - (ieee->pHTInfo->iot_action & HT_IOT_ACT_ALLOW_PEER_AGG_ONE_PKT)) + (ieee->ht_info->iot_action & HT_IOT_ACT_ALLOW_PEER_AGG_ONE_PKT)) pBA->ba_param_set.field.buffer_size = 1; else pBA->ba_param_set.field.buffer_size = 32; @@ -326,13 +326,13 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb) pBaTimeoutVal = (u16 *)(tag + 7); if (!ieee->current_network.qos_data.active || - !ieee->pHTInfo->bCurrentHTSupport || - !ieee->pHTInfo->bCurrentAMPDUEnable) { + !ieee->ht_info->bCurrentHTSupport || + !ieee->ht_info->bCurrentAMPDUEnable) { netdev_warn(ieee->dev, "reject to ADDBA_RSP as some capability is not ready(%d, %d, %d)\n", ieee->current_network.qos_data.active, - ieee->pHTInfo->bCurrentHTSupport, - ieee->pHTInfo->bCurrentAMPDUEnable); + ieee->ht_info->bCurrentHTSupport, + ieee->ht_info->bCurrentAMPDUEnable); ReasonCode = DELBA_REASON_UNKNOWN_BA; goto OnADDBARsp_Reject; } @@ -413,11 +413,11 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb) } if (!ieee->current_network.qos_data.active || - !ieee->pHTInfo->bCurrentHTSupport) { + !ieee->ht_info->bCurrentHTSupport) { netdev_warn(ieee->dev, "received DELBA while QOS or HT is not supported(%d, %d)\n", ieee->current_network. qos_data.active, - ieee->pHTInfo->bCurrentHTSupport); + ieee->ht_info->bCurrentHTSupport); return -1; } diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c index 2c4c1cb4cbed..fe30a291e64c 100644 --- a/drivers/staging/rtl8192e/rtl819x_HTProc.c +++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c @@ -69,48 +69,48 @@ static u8 LINKSYS_MARVELL_4400N[3] = {0x00, 0x14, 0xa4}; void HTUpdateDefaultSetting(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - pHTInfo->bRegShortGI20MHz = 1; - pHTInfo->bRegShortGI40MHz = 1; + ht_info->bRegShortGI20MHz = 1; + ht_info->bRegShortGI40MHz = 1; - pHTInfo->bRegBW40MHz = 1; + ht_info->bRegBW40MHz = 1; - if (pHTInfo->bRegBW40MHz) - pHTInfo->bRegSuppCCK = 1; + if (ht_info->bRegBW40MHz) + ht_info->bRegSuppCCK = 1; else - pHTInfo->bRegSuppCCK = true; + ht_info->bRegSuppCCK = true; - pHTInfo->nAMSDU_MaxSize = 7935UL; - pHTInfo->bAMSDU_Support = 0; + ht_info->nAMSDU_MaxSize = 7935UL; + ht_info->bAMSDU_Support = 0; - pHTInfo->bAMPDUEnable = 1; - pHTInfo->AMPDU_Factor = 2; - pHTInfo->MPDU_Density = 0; + ht_info->bAMPDUEnable = 1; + ht_info->AMPDU_Factor = 2; + ht_info->MPDU_Density = 0; - pHTInfo->self_mimo_ps = 3; - if (pHTInfo->self_mimo_ps == 2) - pHTInfo->self_mimo_ps = 3; + ht_info->self_mimo_ps = 3; + if (ht_info->self_mimo_ps == 2) + ht_info->self_mimo_ps = 3; ieee->tx_dis_rate_fallback = 0; ieee->tx_use_drv_assinged_rate = 0; ieee->bTxEnableFwCalcDur = 1; - pHTInfo->reg_rt2rt_aggregation = 1; + ht_info->reg_rt2rt_aggregation = 1; - pHTInfo->reg_rx_reorder_enable = 1; - pHTInfo->rx_reorder_win_size = 64; - pHTInfo->rx_reorder_pending_time = 30; + ht_info->reg_rx_reorder_enable = 1; + ht_info->rx_reorder_win_size = 64; + ht_info->rx_reorder_pending_time = 30; } static u16 HTMcsToDataRate(struct rtllib_device *ieee, u8 nMcsRate) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - u8 is40MHz = (pHTInfo->bCurBW40MHz) ? 1 : 0; - u8 isShortGI = (pHTInfo->bCurBW40MHz) ? - ((pHTInfo->bCurShortGI40MHz) ? 1 : 0) : - ((pHTInfo->bCurShortGI20MHz) ? 1 : 0); + u8 is40MHz = (ht_info->bCurBW40MHz) ? 1 : 0; + u8 isShortGI = (ht_info->bCurBW40MHz) ? + ((ht_info->bCurShortGI40MHz) ? 1 : 0) : + ((ht_info->bCurShortGI20MHz) ? 1 : 0); return MCS_DATA_RATE[is40MHz][isShortGI][(nMcsRate & 0x7f)]; } @@ -166,45 +166,45 @@ bool IsHTHalfNmodeAPs(struct rtllib_device *ieee) static void HTIOTPeerDetermine(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; struct rtllib_network *net = &ieee->current_network; if (net->bssht.bd_rt2rt_aggregation) { - pHTInfo->IOTPeer = HT_IOT_PEER_REALTEK; + ht_info->IOTPeer = HT_IOT_PEER_REALTEK; if (net->bssht.rt2rt_ht_mode & RT_HT_CAP_USE_92SE) - pHTInfo->IOTPeer = HT_IOT_PEER_REALTEK_92SE; + ht_info->IOTPeer = HT_IOT_PEER_REALTEK_92SE; if (net->bssht.rt2rt_ht_mode & RT_HT_CAP_USE_SOFTAP) - pHTInfo->IOTPeer = HT_IOT_PEER_92U_SOFTAP; + ht_info->IOTPeer = HT_IOT_PEER_92U_SOFTAP; } else if (net->broadcom_cap_exist) { - pHTInfo->IOTPeer = HT_IOT_PEER_BROADCOM; + ht_info->IOTPeer = HT_IOT_PEER_BROADCOM; } else if (!memcmp(net->bssid, UNKNOWN_BORADCOM, 3) || !memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) || !memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3)) { - pHTInfo->IOTPeer = HT_IOT_PEER_BROADCOM; + ht_info->IOTPeer = HT_IOT_PEER_BROADCOM; } else if ((memcmp(net->bssid, BELKINF5D8233V1_RALINK, 3) == 0) || (memcmp(net->bssid, BELKINF5D82334V3_RALINK, 3) == 0) || (memcmp(net->bssid, PCI_RALINK, 3) == 0) || (memcmp(net->bssid, EDIMAX_RALINK, 3) == 0) || (memcmp(net->bssid, AIRLINK_RALINK, 3) == 0) || net->ralink_cap_exist) { - pHTInfo->IOTPeer = HT_IOT_PEER_RALINK; + ht_info->IOTPeer = HT_IOT_PEER_RALINK; } else if ((net->atheros_cap_exist) || (memcmp(net->bssid, DLINK_ATHEROS_1, 3) == 0) || (memcmp(net->bssid, DLINK_ATHEROS_2, 3) == 0)) { - pHTInfo->IOTPeer = HT_IOT_PEER_ATHEROS; + ht_info->IOTPeer = HT_IOT_PEER_ATHEROS; } else if ((memcmp(net->bssid, CISCO_BROADCOM, 3) == 0) || net->cisco_cap_exist) { - pHTInfo->IOTPeer = HT_IOT_PEER_CISCO; + ht_info->IOTPeer = HT_IOT_PEER_CISCO; } else if ((memcmp(net->bssid, LINKSYS_MARVELL_4400N, 3) == 0) || net->marvell_cap_exist) { - pHTInfo->IOTPeer = HT_IOT_PEER_MARVELL; + ht_info->IOTPeer = HT_IOT_PEER_MARVELL; } else if (net->airgo_cap_exist) { - pHTInfo->IOTPeer = HT_IOT_PEER_AIRGO; + ht_info->IOTPeer = HT_IOT_PEER_AIRGO; } else { - pHTInfo->IOTPeer = HT_IOT_PEER_UNKNOWN; + ht_info->IOTPeer = HT_IOT_PEER_UNKNOWN; } - netdev_dbg(ieee->dev, "IOTPEER: %x\n", pHTInfo->IOTPeer); + netdev_dbg(ieee->dev, "IOTPEER: %x\n", ht_info->IOTPeer); } static u8 HTIOTActIsDisableMCS14(struct rtllib_device *ieee, u8 *PeerMacAddr) @@ -233,7 +233,7 @@ static u8 HTIOTActIsMgntUseCCK6M(struct rtllib_device *ieee, { u8 retValue = 0; - if (ieee->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) + if (ieee->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) retValue = 1; return retValue; @@ -243,40 +243,40 @@ static u8 HTIOTActIsCCDFsync(struct rtllib_device *ieee) { u8 retValue = 0; - if (ieee->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) + if (ieee->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) retValue = 1; return retValue; } static void HTIOTActDetermineRaFunc(struct rtllib_device *ieee, bool bPeerRx2ss) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - pHTInfo->iot_ra_func &= HT_IOT_RAFUNC_DISABLE_ALL; + ht_info->iot_ra_func &= HT_IOT_RAFUNC_DISABLE_ALL; - if (pHTInfo->IOTPeer == HT_IOT_PEER_RALINK && !bPeerRx2ss) - pHTInfo->iot_ra_func |= HT_IOT_RAFUNC_PEER_1R; + if (ht_info->IOTPeer == HT_IOT_PEER_RALINK && !bPeerRx2ss) + ht_info->iot_ra_func |= HT_IOT_RAFUNC_PEER_1R; - if (pHTInfo->iot_action & HT_IOT_ACT_AMSDU_ENABLE) - pHTInfo->iot_ra_func |= HT_IOT_RAFUNC_TX_AMSDU; + if (ht_info->iot_action & HT_IOT_ACT_AMSDU_ENABLE) + ht_info->iot_ra_func |= HT_IOT_RAFUNC_TX_AMSDU; } -void HTResetIOTSetting(struct rt_hi_throughput *pHTInfo) +void HTResetIOTSetting(struct rt_hi_throughput *ht_info) { - pHTInfo->iot_action = 0; - pHTInfo->IOTPeer = HT_IOT_PEER_UNKNOWN; - pHTInfo->iot_ra_func = 0; + ht_info->iot_action = 0; + ht_info->IOTPeer = HT_IOT_PEER_UNKNOWN; + ht_info->iot_ra_func = 0; } void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap, u8 *len, u8 IsEncrypt, bool bAssoc) { - struct rt_hi_throughput *pHT = ieee->pHTInfo; + struct rt_hi_throughput *pHT = ieee->ht_info; struct ht_capab_ele *pCapELE = NULL; if (!posHTCap || !pHT) { netdev_warn(ieee->dev, - "%s(): posHTCap and pHTInfo are null\n", __func__); + "%s(): posHTCap and ht_info are null\n", __func__); return; } memset(posHTCap, 0, *len); @@ -352,7 +352,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap, void HTConstructInfoElement(struct rtllib_device *ieee, u8 *posHTInfo, u8 *len, u8 IsEncrypt) { - struct rt_hi_throughput *pHT = ieee->pHTInfo; + struct rt_hi_throughput *pHT = ieee->ht_info; struct ht_info_ele *pHTInfoEle = (struct ht_info_ele *)posHTInfo; if (!posHTInfo || !pHTInfoEle) { @@ -509,7 +509,7 @@ void HTSetConnectBwMode(struct rtllib_device *ieee, void HTOnAssocRsp(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; struct ht_capab_ele *pPeerHTCap = NULL; struct ht_info_ele *pPeerHTInfo = NULL; u16 nMaxAMSDUSize = 0; @@ -518,22 +518,22 @@ void HTOnAssocRsp(struct rtllib_device *ieee) static const u8 EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 }; static const u8 EWC11NHTInfo[] = { 0x00, 0x90, 0x4c, 0x34 }; - if (!pHTInfo->bCurrentHTSupport) { + if (!ht_info->bCurrentHTSupport) { netdev_warn(ieee->dev, "%s(): HT_DISABLE\n", __func__); return; } netdev_dbg(ieee->dev, "%s(): HT_ENABLE\n", __func__); - if (!memcmp(pHTInfo->PeerHTCapBuf, EWC11NHTCap, sizeof(EWC11NHTCap))) - pPeerHTCap = (struct ht_capab_ele *)(&pHTInfo->PeerHTCapBuf[4]); + if (!memcmp(ht_info->PeerHTCapBuf, EWC11NHTCap, sizeof(EWC11NHTCap))) + pPeerHTCap = (struct ht_capab_ele *)(&ht_info->PeerHTCapBuf[4]); else - pPeerHTCap = (struct ht_capab_ele *)(pHTInfo->PeerHTCapBuf); + pPeerHTCap = (struct ht_capab_ele *)(ht_info->PeerHTCapBuf); - if (!memcmp(pHTInfo->PeerHTInfoBuf, EWC11NHTInfo, sizeof(EWC11NHTInfo))) + if (!memcmp(ht_info->PeerHTInfoBuf, EWC11NHTInfo, sizeof(EWC11NHTInfo))) pPeerHTInfo = (struct ht_info_ele *) - (&pHTInfo->PeerHTInfoBuf[4]); + (&ht_info->PeerHTInfoBuf[4]); else - pPeerHTInfo = (struct ht_info_ele *)(pHTInfo->PeerHTInfoBuf); + pPeerHTInfo = (struct ht_info_ele *)(ht_info->PeerHTInfoBuf); #ifdef VERBOSE_DEBUG print_hex_dump_bytes("%s: ", __func__, DUMP_PREFIX_NONE, @@ -541,63 +541,63 @@ void HTOnAssocRsp(struct rtllib_device *ieee) #endif HTSetConnectBwMode(ieee, (enum ht_channel_width)(pPeerHTCap->ChlWidth), (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset)); - pHTInfo->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ? + ht_info->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ? true : false); - pHTInfo->bCurShortGI20MHz = ((pHTInfo->bRegShortGI20MHz) ? + ht_info->bCurShortGI20MHz = ((ht_info->bRegShortGI20MHz) ? ((pPeerHTCap->ShortGI20Mhz == 1) ? true : false) : false); - pHTInfo->bCurShortGI40MHz = ((pHTInfo->bRegShortGI40MHz) ? + ht_info->bCurShortGI40MHz = ((ht_info->bRegShortGI40MHz) ? ((pPeerHTCap->ShortGI40Mhz == 1) ? true : false) : false); - pHTInfo->bCurSuppCCK = ((pHTInfo->bRegSuppCCK) ? + ht_info->bCurSuppCCK = ((ht_info->bRegSuppCCK) ? ((pPeerHTCap->DssCCk == 1) ? true : false) : false); - pHTInfo->bCurrent_AMSDU_Support = pHTInfo->bAMSDU_Support; + ht_info->bCurrent_AMSDU_Support = ht_info->bAMSDU_Support; nMaxAMSDUSize = (pPeerHTCap->MaxAMSDUSize == 0) ? 3839 : 7935; - if (pHTInfo->nAMSDU_MaxSize > nMaxAMSDUSize) - pHTInfo->nCurrent_AMSDU_MaxSize = nMaxAMSDUSize; + if (ht_info->nAMSDU_MaxSize > nMaxAMSDUSize) + ht_info->nCurrent_AMSDU_MaxSize = nMaxAMSDUSize; else - pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize; + ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize; - pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable; + ht_info->bCurrentAMPDUEnable = ht_info->bAMPDUEnable; if (ieee->rtllib_ap_sec_type && (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) { - if ((pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) || - (pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN)) - pHTInfo->bCurrentAMPDUEnable = false; + if ((ht_info->IOTPeer == HT_IOT_PEER_ATHEROS) || + (ht_info->IOTPeer == HT_IOT_PEER_UNKNOWN)) + ht_info->bCurrentAMPDUEnable = false; } - if (!pHTInfo->reg_rt2rt_aggregation) { - if (pHTInfo->AMPDU_Factor > pPeerHTCap->MaxRxAMPDUFactor) - pHTInfo->CurrentAMPDUFactor = + if (!ht_info->reg_rt2rt_aggregation) { + if (ht_info->AMPDU_Factor > pPeerHTCap->MaxRxAMPDUFactor) + ht_info->CurrentAMPDUFactor = pPeerHTCap->MaxRxAMPDUFactor; else - pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor; + ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor; } else { if (ieee->current_network.bssht.bd_rt2rt_aggregation) { if (ieee->pairwise_key_type != KEY_TYPE_NA) - pHTInfo->CurrentAMPDUFactor = + ht_info->CurrentAMPDUFactor = pPeerHTCap->MaxRxAMPDUFactor; else - pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_64K; + ht_info->CurrentAMPDUFactor = HT_AGG_SIZE_64K; } else { - pHTInfo->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor, + ht_info->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor, HT_AGG_SIZE_32K); } } - pHTInfo->current_mpdu_density = max_t(u8, pHTInfo->MPDU_Density, + ht_info->current_mpdu_density = max_t(u8, ht_info->MPDU_Density, pPeerHTCap->MPDUDensity); - if (pHTInfo->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) { - pHTInfo->bCurrentAMPDUEnable = false; - pHTInfo->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE; + if (ht_info->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) { + ht_info->bCurrentAMPDUEnable = false; + ht_info->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE; } - pHTInfo->cur_rx_reorder_enable = pHTInfo->reg_rx_reorder_enable; + ht_info->cur_rx_reorder_enable = ht_info->reg_rx_reorder_enable; if (pPeerHTCap->MCS[0] == 0) pPeerHTCap->MCS[0] = 0xff; @@ -606,8 +606,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee) HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11ht_oper_rate_set); - pHTInfo->peer_mimo_ps = pPeerHTCap->MimoPwrSave; - if (pHTInfo->peer_mimo_ps == MIMO_PS_STATIC) + ht_info->peer_mimo_ps = pPeerHTCap->MimoPwrSave; + if (ht_info->peer_mimo_ps == MIMO_PS_STATIC) pMcsFilter = MCS_FILTER_1SS; else pMcsFilter = MCS_FILTER_ALL; @@ -616,49 +616,49 @@ void HTOnAssocRsp(struct rtllib_device *ieee) pMcsFilter); ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; - pHTInfo->current_op_mode = pPeerHTInfo->OptMode; + ht_info->current_op_mode = pPeerHTInfo->OptMode; } void HTInitializeHTInfo(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - pHTInfo->bCurrentHTSupport = false; + ht_info->bCurrentHTSupport = false; - pHTInfo->bCurBW40MHz = false; - pHTInfo->cur_tx_bw40mhz = false; + ht_info->bCurBW40MHz = false; + ht_info->cur_tx_bw40mhz = false; - pHTInfo->bCurShortGI20MHz = false; - pHTInfo->bCurShortGI40MHz = false; - pHTInfo->forced_short_gi = false; + ht_info->bCurShortGI20MHz = false; + ht_info->bCurShortGI40MHz = false; + ht_info->forced_short_gi = false; - pHTInfo->bCurSuppCCK = true; + ht_info->bCurSuppCCK = true; - pHTInfo->bCurrent_AMSDU_Support = false; - pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize; - pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density; - pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor; + ht_info->bCurrent_AMSDU_Support = false; + ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize; + ht_info->current_mpdu_density = ht_info->MPDU_Density; + ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor; - memset((void *)(&pHTInfo->SelfHTCap), 0, - sizeof(pHTInfo->SelfHTCap)); - memset((void *)(&pHTInfo->SelfHTInfo), 0, - sizeof(pHTInfo->SelfHTInfo)); - memset((void *)(&pHTInfo->PeerHTCapBuf), 0, - sizeof(pHTInfo->PeerHTCapBuf)); - memset((void *)(&pHTInfo->PeerHTInfoBuf), 0, - sizeof(pHTInfo->PeerHTInfoBuf)); + memset((void *)(&ht_info->SelfHTCap), 0, + sizeof(ht_info->SelfHTCap)); + memset((void *)(&ht_info->SelfHTInfo), 0, + sizeof(ht_info->SelfHTInfo)); + memset((void *)(&ht_info->PeerHTCapBuf), 0, + sizeof(ht_info->PeerHTCapBuf)); + memset((void *)(&ht_info->PeerHTInfoBuf), 0, + sizeof(ht_info->PeerHTInfoBuf)); - pHTInfo->sw_bw_in_progress = false; + ht_info->sw_bw_in_progress = false; - pHTInfo->ePeerHTSpecVer = HT_SPEC_VER_IEEE; + ht_info->ePeerHTSpecVer = HT_SPEC_VER_IEEE; - pHTInfo->current_rt2rt_aggregation = false; - pHTInfo->current_rt2rt_long_slot_time = false; - pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0; + ht_info->current_rt2rt_aggregation = false; + ht_info->current_rt2rt_long_slot_time = false; + ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0; - pHTInfo->IOTPeer = 0; - pHTInfo->iot_action = 0; - pHTInfo->iot_ra_func = 0; + ht_info->IOTPeer = 0; + ht_info->iot_action = 0; + ht_info->iot_ra_func = 0; { u8 *RegHTSuppRateSets = &ieee->reg_ht_supp_rate_set[0]; @@ -687,114 +687,114 @@ void HTInitializeBssDesc(struct bss_ht *pBssHT) void HTResetSelfAndSavePeerSetting(struct rtllib_device *ieee, struct rtllib_network *pNetwork) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; u8 bIOTAction = 0; /* unmark enable_ht flag here is the same reason why unmarked in * function rtllib_softmac_new_net. WB 2008.09.10 */ if (pNetwork->bssht.bd_support_ht) { - pHTInfo->bCurrentHTSupport = true; - pHTInfo->ePeerHTSpecVer = pNetwork->bssht.bd_ht_spec_ver; + ht_info->bCurrentHTSupport = true; + ht_info->ePeerHTSpecVer = pNetwork->bssht.bd_ht_spec_ver; if (pNetwork->bssht.bd_ht_cap_len > 0 && - pNetwork->bssht.bd_ht_cap_len <= sizeof(pHTInfo->PeerHTCapBuf)) - memcpy(pHTInfo->PeerHTCapBuf, + pNetwork->bssht.bd_ht_cap_len <= sizeof(ht_info->PeerHTCapBuf)) + memcpy(ht_info->PeerHTCapBuf, pNetwork->bssht.bd_ht_cap_buf, pNetwork->bssht.bd_ht_cap_len); if (pNetwork->bssht.bd_ht_info_len > 0 && pNetwork->bssht.bd_ht_info_len <= - sizeof(pHTInfo->PeerHTInfoBuf)) - memcpy(pHTInfo->PeerHTInfoBuf, + sizeof(ht_info->PeerHTInfoBuf)) + memcpy(ht_info->PeerHTInfoBuf, pNetwork->bssht.bd_ht_info_buf, pNetwork->bssht.bd_ht_info_len); - if (pHTInfo->reg_rt2rt_aggregation) { - pHTInfo->current_rt2rt_aggregation = + if (ht_info->reg_rt2rt_aggregation) { + ht_info->current_rt2rt_aggregation = pNetwork->bssht.bd_rt2rt_aggregation; - pHTInfo->current_rt2rt_long_slot_time = + ht_info->current_rt2rt_long_slot_time = pNetwork->bssht.bd_rt2rt_long_slot_time; - pHTInfo->RT2RT_HT_Mode = pNetwork->bssht.rt2rt_ht_mode; + ht_info->RT2RT_HT_Mode = pNetwork->bssht.rt2rt_ht_mode; } else { - pHTInfo->current_rt2rt_aggregation = false; - pHTInfo->current_rt2rt_long_slot_time = false; - pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0; + ht_info->current_rt2rt_aggregation = false; + ht_info->current_rt2rt_long_slot_time = false; + ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0; } HTIOTPeerDetermine(ieee); - pHTInfo->iot_action = 0; + ht_info->iot_action = 0; bIOTAction = HTIOTActIsDisableMCS14(ieee, pNetwork->bssid); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_MCS14; + ht_info->iot_action |= HT_IOT_ACT_DISABLE_MCS14; bIOTAction = HTIOTActIsDisableMCS15(ieee); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_MCS15; + ht_info->iot_action |= HT_IOT_ACT_DISABLE_MCS15; bIOTAction = HTIOTActIsDisableMCSTwoSpatialStream(ieee); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_ALL_2SS; + ht_info->iot_action |= HT_IOT_ACT_DISABLE_ALL_2SS; bIOTAction = HTIOTActIsDisableEDCATurbo(ieee, pNetwork->bssid); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_EDCA_TURBO; + ht_info->iot_action |= HT_IOT_ACT_DISABLE_EDCA_TURBO; bIOTAction = HTIOTActIsMgntUseCCK6M(ieee, pNetwork); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_MGNT_USE_CCK_6M; + ht_info->iot_action |= HT_IOT_ACT_MGNT_USE_CCK_6M; bIOTAction = HTIOTActIsCCDFsync(ieee); if (bIOTAction) - pHTInfo->iot_action |= HT_IOT_ACT_CDD_FSYNC; + ht_info->iot_action |= HT_IOT_ACT_CDD_FSYNC; } else { - pHTInfo->bCurrentHTSupport = false; - pHTInfo->current_rt2rt_aggregation = false; - pHTInfo->current_rt2rt_long_slot_time = false; - pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0; + ht_info->bCurrentHTSupport = false; + ht_info->current_rt2rt_aggregation = false; + ht_info->current_rt2rt_long_slot_time = false; + ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0; - pHTInfo->iot_action = 0; - pHTInfo->iot_ra_func = 0; + ht_info->iot_action = 0; + ht_info->iot_ra_func = 0; } } void HT_update_self_and_peer_setting(struct rtllib_device *ieee, struct rtllib_network *pNetwork) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; struct ht_info_ele *pPeerHTInfo = (struct ht_info_ele *)pNetwork->bssht.bd_ht_info_buf; - if (pHTInfo->bCurrentHTSupport) { + if (ht_info->bCurrentHTSupport) { if (pNetwork->bssht.bd_ht_info_len != 0) - pHTInfo->current_op_mode = pPeerHTInfo->OptMode; + ht_info->current_op_mode = pPeerHTInfo->OptMode; } } EXPORT_SYMBOL(HT_update_self_and_peer_setting); void HTUseDefaultSetting(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - if (pHTInfo->enable_ht) { - pHTInfo->bCurrentHTSupport = true; - pHTInfo->bCurSuppCCK = pHTInfo->bRegSuppCCK; + if (ht_info->enable_ht) { + ht_info->bCurrentHTSupport = true; + ht_info->bCurSuppCCK = ht_info->bRegSuppCCK; - pHTInfo->bCurBW40MHz = pHTInfo->bRegBW40MHz; - pHTInfo->bCurShortGI20MHz = pHTInfo->bRegShortGI20MHz; + ht_info->bCurBW40MHz = ht_info->bRegBW40MHz; + ht_info->bCurShortGI20MHz = ht_info->bRegShortGI20MHz; - pHTInfo->bCurShortGI40MHz = pHTInfo->bRegShortGI40MHz; + ht_info->bCurShortGI40MHz = ht_info->bRegShortGI40MHz; if (ieee->iw_mode == IW_MODE_ADHOC) ieee->current_network.qos_data.active = ieee->current_network.qos_data.supported; - pHTInfo->bCurrent_AMSDU_Support = pHTInfo->bAMSDU_Support; - pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize; + ht_info->bCurrent_AMSDU_Support = ht_info->bAMSDU_Support; + ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize; - pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable; - pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor; + ht_info->bCurrentAMPDUEnable = ht_info->bAMPDUEnable; + ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor; - pHTInfo->current_mpdu_density = pHTInfo->current_mpdu_density; + ht_info->current_mpdu_density = ht_info->current_mpdu_density; HTFilterMCSRate(ieee, ieee->reg_dot11tx_ht_oper_rate_set, ieee->dot11ht_oper_rate_set); @@ -804,13 +804,13 @@ void HTUseDefaultSetting(struct rtllib_device *ieee) ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate; } else { - pHTInfo->bCurrentHTSupport = false; + ht_info->bCurrentHTSupport = false; } } u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame) { - if (ieee->pHTInfo->bCurrentHTSupport) { + if (ieee->ht_info->bCurrentHTSupport) { if ((IsQoSDataFrame(pFrame) && Frame_Order(pFrame)) == 1) { netdev_dbg(ieee->dev, "HT CONTROL FILED EXIST!!\n"); return true; @@ -821,13 +821,13 @@ u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame) static void HTSetConnectBwModeCallback(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - if (pHTInfo->bCurBW40MHz) { - if (pHTInfo->CurSTAExtChnlOffset == HT_EXTCHNL_OFFSET_UPPER) + if (ht_info->bCurBW40MHz) { + if (ht_info->CurSTAExtChnlOffset == HT_EXTCHNL_OFFSET_UPPER) ieee->set_chan(ieee->dev, ieee->current_network.channel + 2); - else if (pHTInfo->CurSTAExtChnlOffset == + else if (ht_info->CurSTAExtChnlOffset == HT_EXTCHNL_OFFSET_LOWER) ieee->set_chan(ieee->dev, ieee->current_network.channel - 2); @@ -836,29 +836,29 @@ static void HTSetConnectBwModeCallback(struct rtllib_device *ieee) ieee->current_network.channel); ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20_40, - pHTInfo->CurSTAExtChnlOffset); + ht_info->CurSTAExtChnlOffset); } else { ieee->set_chan(ieee->dev, ieee->current_network.channel); ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20, HT_EXTCHNL_OFFSET_NO_EXT); } - pHTInfo->sw_bw_in_progress = false; + ht_info->sw_bw_in_progress = false; } void HTSetConnectBwMode(struct rtllib_device *ieee, enum ht_channel_width bandwidth, enum ht_extchnl_offset Offset) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; - if (!pHTInfo->bRegBW40MHz) + if (!ht_info->bRegBW40MHz) return; if (ieee->GetHalfNmodeSupportByAPsHandler(ieee->dev)) bandwidth = HT_CHANNEL_WIDTH_20; - if (pHTInfo->sw_bw_in_progress) { + if (ht_info->sw_bw_in_progress) { pr_info("%s: sw_bw_in_progress!!\n", __func__); return; } @@ -868,21 +868,21 @@ void HTSetConnectBwMode(struct rtllib_device *ieee, Offset = HT_EXTCHNL_OFFSET_NO_EXT; if (Offset == HT_EXTCHNL_OFFSET_UPPER || Offset == HT_EXTCHNL_OFFSET_LOWER) { - pHTInfo->bCurBW40MHz = true; - pHTInfo->CurSTAExtChnlOffset = Offset; + ht_info->bCurBW40MHz = true; + ht_info->CurSTAExtChnlOffset = Offset; } else { - pHTInfo->bCurBW40MHz = false; - pHTInfo->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT; + ht_info->bCurBW40MHz = false; + ht_info->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT; } } else { - pHTInfo->bCurBW40MHz = false; - pHTInfo->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT; + ht_info->bCurBW40MHz = false; + ht_info->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT; } - netdev_dbg(ieee->dev, "%s():pHTInfo->bCurBW40MHz:%x\n", __func__, - pHTInfo->bCurBW40MHz); + netdev_dbg(ieee->dev, "%s():ht_info->bCurBW40MHz:%x\n", __func__, + ht_info->bCurBW40MHz); - pHTInfo->sw_bw_in_progress = true; + ht_info->sw_bw_in_progress = true; HTSetConnectBwModeCallback(ieee); } diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c index 05c7e822f372..68c131afc2ba 100644 --- a/drivers/staging/rtl8192e/rtl819x_TSProc.c +++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c @@ -83,7 +83,7 @@ static void RxPktPendingTimeout(struct timer_list *t) if (bPktInBuf && (pRxTs->rx_timeout_indicate_seq == 0xffff)) { pRxTs->rx_timeout_indicate_seq = pRxTs->rx_indicate_seq; mod_timer(&pRxTs->rx_pkt_pending_timer, jiffies + - msecs_to_jiffies(ieee->pHTInfo->rx_reorder_pending_time) + msecs_to_jiffies(ieee->ht_info->rx_reorder_pending_time) ); } spin_unlock_irqrestore(&(ieee->reorder_spinlock), flags); diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 9c81ca38f4b1..7119c9c5e1fe 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1432,7 +1432,7 @@ struct rtllib_device { RT_RF_CHANGE_SOURCE rf_off_reason; bool is_set_key; bool wx_set_enc; - struct rt_hi_throughput *pHTInfo; + struct rt_hi_throughput *ht_info; spinlock_t reorder_spinlock; u8 reg_dot11ht_oper_rate_set[16]; @@ -2089,7 +2089,7 @@ u8 HTGetHighestMCSRate(struct rtllib_device *ieee, u8 *pMCSRateSet, extern u8 MCS_FILTER_ALL[]; extern u16 MCS_DATA_RATE[2][2][77]; u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame); -void HTResetIOTSetting(struct rt_hi_throughput *pHTInfo); +void HTResetIOTSetting(struct rt_hi_throughput *ht_info); bool IsHTHalfNmodeAPs(struct rtllib_device *ieee); u16 TxCountToDataRate(struct rtllib_device *ieee, u8 nDataRate); int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb); diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c index ce8b73f437a3..d6a4d6b4ec57 100644 --- a/drivers/staging/rtl8192e/rtllib_module.c +++ b/drivers/staging/rtl8192e/rtllib_module.c @@ -125,8 +125,8 @@ struct net_device *alloc_rtllib(int sizeof_priv) if (err) goto free_crypt_info; - ieee->pHTInfo = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL); - if (!ieee->pHTInfo) + ieee->ht_info = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL); + if (!ieee->ht_info) goto free_softmac; HTUpdateDefaultSetting(ieee); @@ -160,7 +160,7 @@ void free_rtllib(struct net_device *dev) struct rtllib_device *ieee = (struct rtllib_device *) netdev_priv_rsl(dev); - kfree(ieee->pHTInfo); + kfree(ieee->ht_info); rtllib_softmac_free(ieee); lib80211_crypt_info_free(&ieee->crypt_info); diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index 75190c389ccf..6a0f5bbb99ef 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -567,9 +567,9 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee, struct rtllib_rxb *prxb, struct rx_ts_record *pTS, u16 SeqNum) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; struct rx_reorder_entry *pReorderEntry = NULL; - u8 WinSize = pHTInfo->rx_reorder_win_size; + u8 WinSize = ht_info->rx_reorder_win_size; u16 WinEnd = 0; u8 index = 0; bool bMatchWinStart = false, bPktInBuf = false; @@ -591,7 +591,7 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee, netdev_dbg(ieee->dev, "Packet Drop! IndicateSeq: %d, NewSeq: %d\n", pTS->rx_indicate_seq, SeqNum); - pHTInfo->rx_reorder_drop_counter++; + ht_info->rx_reorder_drop_counter++; { int i; @@ -755,7 +755,7 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee, netdev_dbg(ieee->dev, "%s(): SET rx timeout timer\n", __func__); pTS->rx_timeout_indicate_seq = pTS->rx_indicate_seq; mod_timer(&pTS->rx_pkt_pending_timer, jiffies + - msecs_to_jiffies(pHTInfo->rx_reorder_pending_time)); + msecs_to_jiffies(ht_info->rx_reorder_pending_time)); } spin_unlock_irqrestore(&(ieee->reorder_spinlock), flags); } @@ -924,7 +924,7 @@ static int rtllib_rx_check_duplicate(struct rtllib_device *ieee, sc = le16_to_cpu(hdr->seq_ctl); frag = WLAN_GET_SEQ_FRAG(sc); - if (!ieee->pHTInfo->cur_rx_reorder_enable || + if (!ieee->ht_info->cur_rx_reorder_enable || !ieee->current_network.qos_data.active || !IsDataFrame(skb->data) || IsLegacyDataFrame(skb->data)) { @@ -1442,7 +1442,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb, } /* Indicate packets to upper layer or Rx Reorder */ - if (!ieee->pHTInfo->cur_rx_reorder_enable || pTS == NULL || bToOtherSTA) + if (!ieee->ht_info->cur_rx_reorder_enable || pTS == NULL || bToOtherSTA) rtllib_rx_indicate_pkt_legacy(ieee, rx_stats, rxb, dst, src); else RxReorderIndicatePacket(ieee, rxb, pTS, SeqNum); diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 8b1d88061125..2552aa089700 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -176,10 +176,10 @@ u8 MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee) static u8 MgntQuery_MgntFrameTxRate(struct rtllib_device *ieee) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; u8 rate; - if (pHTInfo->iot_action & HT_IOT_ACT_MGNT_USE_CCK_6M) + if (ht_info->iot_action & HT_IOT_ACT_MGNT_USE_CCK_6M) rate = 0x0c; else rate = ieee->basic_rate & 0x7f; @@ -187,7 +187,7 @@ static u8 MgntQuery_MgntFrameTxRate(struct rtllib_device *ieee) if (rate == 0) { if (ieee->mode == IEEE_A || ieee->mode == IEEE_N_5G || - (ieee->mode == IEEE_N_24G && !pHTInfo->bCurSuppCCK)) + (ieee->mode == IEEE_N_24G && !ht_info->bCurSuppCCK)) rate = 0x0c; else rate = 0x02; @@ -829,7 +829,7 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee, u8 tmp_ht_cap_len = 0; u8 *tmp_ht_info_buf = NULL; u8 tmp_ht_info_len = 0; - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; u8 *tmp_generic_ie_buf = NULL; u8 tmp_generic_ie_len = 0; @@ -843,7 +843,7 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee, if ((ieee->current_network.mode == IEEE_G) || (ieee->current_network.mode == IEEE_N_24G && - ieee->pHTInfo->bCurSuppCCK)) { + ieee->ht_info->bCurSuppCCK)) { erp_len = 3; erpinfo_content = 0; if (ieee->current_network.buseprotection) @@ -854,20 +854,20 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee, crypt = ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx]; encrypt = ieee->host_encrypt && crypt && crypt->ops && ((strcmp(crypt->ops->name, "R-WEP") == 0 || wpa_ie_len)); - if (ieee->pHTInfo->bCurrentHTSupport) { - tmp_ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap); - tmp_ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap); - tmp_ht_info_buf = (u8 *)&(ieee->pHTInfo->SelfHTInfo); - tmp_ht_info_len = sizeof(ieee->pHTInfo->SelfHTInfo); + if (ieee->ht_info->bCurrentHTSupport) { + tmp_ht_cap_buf = (u8 *)&(ieee->ht_info->SelfHTCap); + tmp_ht_cap_len = sizeof(ieee->ht_info->SelfHTCap); + tmp_ht_info_buf = (u8 *)&(ieee->ht_info->SelfHTInfo); + tmp_ht_info_len = sizeof(ieee->ht_info->SelfHTInfo); HTConstructCapabilityElement(ieee, tmp_ht_cap_buf, &tmp_ht_cap_len, encrypt, false); HTConstructInfoElement(ieee, tmp_ht_info_buf, &tmp_ht_info_len, encrypt); - if (pHTInfo->reg_rt2rt_aggregation) { - tmp_generic_ie_buf = ieee->pHTInfo->sz_rt2rt_agg_buf; + if (ht_info->reg_rt2rt_aggregation) { + tmp_generic_ie_buf = ieee->ht_info->sz_rt2rt_agg_buf; tmp_generic_ie_len = - sizeof(ieee->pHTInfo->sz_rt2rt_agg_buf); + sizeof(ieee->ht_info->sz_rt2rt_agg_buf); HTConstructRT2RTAggElement(ieee, tmp_generic_ie_buf, &tmp_generic_ie_len); } @@ -1179,19 +1179,19 @@ rtllib_association_req(struct rtllib_network *beacon, if ((ieee->rtllib_ap_sec_type && (ieee->rtllib_ap_sec_type(ieee) & SEC_ALG_TKIP)) || ieee->bForcedBgMode) { - ieee->pHTInfo->enable_ht = 0; + ieee->ht_info->enable_ht = 0; ieee->mode = WIRELESS_MODE_G; } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { - ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap); - ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap); + if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) { + ht_cap_buf = (u8 *)&(ieee->ht_info->SelfHTCap); + ht_cap_len = sizeof(ieee->ht_info->SelfHTCap); HTConstructCapabilityElement(ieee, ht_cap_buf, &ht_cap_len, encrypt, true); - if (ieee->pHTInfo->current_rt2rt_aggregation) { - realtek_ie_buf = ieee->pHTInfo->sz_rt2rt_agg_buf; + if (ieee->ht_info->current_rt2rt_aggregation) { + realtek_ie_buf = ieee->ht_info->sz_rt2rt_agg_buf; realtek_ie_len = - sizeof(ieee->pHTInfo->sz_rt2rt_agg_buf); + sizeof(ieee->ht_info->sz_rt2rt_agg_buf); HTConstructRT2RTAggElement(ieee, realtek_ie_buf, &realtek_ie_len); } @@ -1324,8 +1324,8 @@ rtllib_association_req(struct rtllib_network *beacon, memcpy(tag, osCcxVerNum.Octet, osCcxVerNum.Length); tag += osCcxVerNum.Length; } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { - if (ieee->pHTInfo->ePeerHTSpecVer != HT_SPEC_VER_EWC) { + if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) { + if (ieee->ht_info->ePeerHTSpecVer != HT_SPEC_VER_EWC) { tag = skb_put(skb, ht_cap_len); *tag++ = MFIE_TYPE_HT_CAP; *tag++ = ht_cap_len - 2; @@ -1358,8 +1358,8 @@ rtllib_association_req(struct rtllib_network *beacon, rtllib_TURBO_Info(ieee, &tag); } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { - if (ieee->pHTInfo->ePeerHTSpecVer == HT_SPEC_VER_EWC) { + if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) { + if (ieee->ht_info->ePeerHTSpecVer == HT_SPEC_VER_EWC) { tag = skb_put(skb, ht_cap_len); *tag++ = MFIE_TYPE_GENERIC; *tag++ = ht_cap_len - 2; @@ -1367,7 +1367,7 @@ rtllib_association_req(struct rtllib_network *beacon, tag += ht_cap_len - 2; } - if (ieee->pHTInfo->current_rt2rt_aggregation) { + if (ieee->ht_info->current_rt2rt_aggregation) { tag = skb_put(skb, realtek_ie_len); *tag++ = MFIE_TYPE_GENERIC; *tag++ = realtek_ie_len - 2; @@ -1524,14 +1524,14 @@ static void rtllib_associate_complete_wq(void *data) ieee->SetWirelessMode(ieee->dev, IEEE_B); netdev_info(ieee->dev, "Using B rates:%d\n", ieee->rate); } - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) { + if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) { netdev_info(ieee->dev, "Successfully associated, ht enabled\n"); HTOnAssocRsp(ieee); } else { netdev_info(ieee->dev, "Successfully associated, ht not enabled(%d, %d)\n", - ieee->pHTInfo->bCurrentHTSupport, - ieee->pHTInfo->enable_ht); + ieee->ht_info->bCurrentHTSupport, + ieee->ht_info->enable_ht); memset(ieee->dot11ht_oper_rate_set, 0, 16); } ieee->link_detect_info.SlotNum = 2 * (1 + @@ -1684,7 +1684,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee, ieee->current_network.ssid, ieee->current_network.channel, ieee->current_network.qos_data.supported, - ieee->pHTInfo->enable_ht, + ieee->ht_info->enable_ht, ieee->current_network.bssht.bd_support_ht, ieee->current_network.mode, ieee->current_network.flags); @@ -1693,7 +1693,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee, !(ieee->softmac_features & IEEE_SOFTMAC_SCAN)) rtllib_stop_scan_syncro(ieee); - HTResetIOTSetting(ieee->pHTInfo); + HTResetIOTSetting(ieee->ht_info); ieee->wmm_acm = 0; if (ieee->iw_mode == IW_MODE_INFRA) { /* Join the network for the first time */ @@ -1703,7 +1703,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee, HTResetSelfAndSavePeerSetting(ieee, &(ieee->current_network)); else - ieee->pHTInfo->bCurrentHTSupport = + ieee->ht_info->bCurrentHTSupport = false; ieee->state = RTLLIB_ASSOCIATING; @@ -1893,7 +1893,7 @@ static inline u16 assoc_parse(struct rtllib_device *ieee, struct sk_buff *skb, ((ieee->mode == IEEE_G) && (ieee->current_network.mode == IEEE_N_24G) && (ieee->AsocRetryCount++ < (RT_ASOC_RETRY_LIMIT-1)))) { - ieee->pHTInfo->iot_action |= HT_IOT_ACT_PURE_N_MODE; + ieee->ht_info->iot_action |= HT_IOT_ACT_PURE_N_MODE; } else { ieee->AsocRetryCount = 0; } @@ -2100,7 +2100,7 @@ static void rtllib_sta_wakeup(struct rtllib_device *ieee, short nl) { if (ieee->sta_sleep == LPS_IS_WAKE) { if (nl) { - if (ieee->pHTInfo->iot_action & + if (ieee->ht_info->iot_action & HT_IOT_ACT_NULL_DATA_POWER_SAVING) { ieee->ack_tx_to_ieee = 1; rtllib_sta_ps_send_null_frame(ieee, 0); @@ -2116,7 +2116,7 @@ static void rtllib_sta_wakeup(struct rtllib_device *ieee, short nl) if (ieee->sta_sleep == LPS_IS_SLEEP) ieee->sta_wake_up(ieee->dev); if (nl) { - if (ieee->pHTInfo->iot_action & + if (ieee->ht_info->iot_action & HT_IOT_ACT_NULL_DATA_POWER_SAVING) { ieee->ack_tx_to_ieee = 1; rtllib_sta_ps_send_null_frame(ieee, 0); @@ -2151,7 +2151,7 @@ void rtllib_ps_tx_ack(struct rtllib_device *ieee, short success) if ((ieee->sta_sleep == LPS_IS_WAKE) && !success) { spin_lock_irqsave(&ieee->mgmt_tx_lock, flags2); - if (ieee->pHTInfo->iot_action & + if (ieee->ht_info->iot_action & HT_IOT_ACT_NULL_DATA_POWER_SAVING) rtllib_sta_ps_send_null_frame(ieee, 0); else @@ -2235,10 +2235,10 @@ rtllib_rx_assoc_resp(struct rtllib_device *ieee, struct sk_buff *skb, kfree(network); return 1; } - memcpy(ieee->pHTInfo->PeerHTCapBuf, + memcpy(ieee->ht_info->PeerHTCapBuf, network->bssht.bd_ht_cap_buf, network->bssht.bd_ht_cap_len); - memcpy(ieee->pHTInfo->PeerHTInfoBuf, + memcpy(ieee->ht_info->PeerHTInfoBuf, network->bssht.bd_ht_info_buf, network->bssht.bd_ht_info_len); if (ieee->handle_assoc_response != NULL) @@ -2295,7 +2295,7 @@ static void rtllib_rx_auth_resp(struct rtllib_device *ieee, struct sk_buff *skb) if (ieee->open_wep || !challenge) { ieee->state = RTLLIB_ASSOCIATING_AUTHENTICATED; ieee->softmac_stats.rx_auth_rs_ok++; - if (!(ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE)) { + if (!(ieee->ht_info->iot_action & HT_IOT_ACT_PURE_N_MODE)) { if (!ieee->GetNmodeSupportBySecCfg(ieee->dev)) { if (IsHTHalfNmodeAPs(ieee)) { bSupportNmode = true; @@ -2669,7 +2669,7 @@ static void rtllib_start_ibss_wq(void *data) if ((ieee->mode == IEEE_N_24G) || (ieee->mode == IEEE_N_5G)) HTUseDefaultSetting(ieee); else - ieee->pHTInfo->bCurrentHTSupport = false; + ieee->ht_info->bCurrentHTSupport = false; ieee->SetHwRegHandler(ieee->dev, HW_VAR_MEDIA_STATUS, (u8 *)(&ieee->state)); diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index e02e7d9566b2..63edf68c0b49 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -359,11 +359,11 @@ void rtllib_wx_sync_scan_wq(void *data) if (ieee->ScanOperationBackupHandler) ieee->ScanOperationBackupHandler(ieee->dev, SCAN_OPT_BACKUP); - if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht && - ieee->pHTInfo->bCurBW40MHz) { + if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht && + ieee->ht_info->bCurBW40MHz) { b40M = 1; - chan_offset = ieee->pHTInfo->CurSTAExtChnlOffset; - bandwidth = (enum ht_channel_width)ieee->pHTInfo->bCurBW40MHz; + chan_offset = ieee->ht_info->CurSTAExtChnlOffset; + bandwidth = (enum ht_channel_width)ieee->ht_info->bCurBW40MHz; ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20, HT_EXTCHNL_OFFSET_NO_EXT); } diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c index 8b01cfe03381..9ab8ee46ef66 100644 --- a/drivers/staging/rtl8192e/rtllib_tx.c +++ b/drivers/staging/rtl8192e/rtllib_tx.c @@ -266,14 +266,14 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, struct sk_buff *skb, struct cb_desc *tcb_desc) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; struct tx_ts_record *pTxTs = NULL; struct rtllib_hdr_1addr *hdr = (struct rtllib_hdr_1addr *)skb->data; if (rtllib_act_scanning(ieee, false)) return; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) + if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht) return; if (!IsQoSDataFrame(skb->data)) return; @@ -283,12 +283,12 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, if (tcb_desc->bdhcp || ieee->CntAfterLink < 2) return; - if (pHTInfo->iot_action & HT_IOT_ACT_TX_NO_AGGREGATION) + if (ht_info->iot_action & HT_IOT_ACT_TX_NO_AGGREGATION) return; if (!ieee->GetNmodeSupportBySecCfg(ieee->dev)) return; - if (pHTInfo->bCurrentAMPDUEnable) { + if (ht_info->bCurrentAMPDUEnable) { if (!GetTs(ieee, (struct ts_common_info **)(&pTxTs), hdr->addr1, skb->priority, TX_DIR, true)) { netdev_info(ieee->dev, "%s: can't get TS\n", __func__); @@ -313,19 +313,19 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee, } if (ieee->iw_mode == IW_MODE_INFRA) { tcb_desc->bAMPDUEnable = true; - tcb_desc->ampdu_factor = pHTInfo->CurrentAMPDUFactor; - tcb_desc->ampdu_density = pHTInfo->current_mpdu_density; + tcb_desc->ampdu_factor = ht_info->CurrentAMPDUFactor; + tcb_desc->ampdu_density = ht_info->current_mpdu_density; } } FORCED_AGG_SETTING: - switch (pHTInfo->ForcedAMPDUMode) { + switch (ht_info->ForcedAMPDUMode) { case HT_AGG_AUTO: break; case HT_AGG_FORCE_ENABLE: tcb_desc->bAMPDUEnable = true; - tcb_desc->ampdu_density = pHTInfo->forced_mpdu_density; - tcb_desc->ampdu_factor = pHTInfo->forced_ampdu_factor; + tcb_desc->ampdu_density = ht_info->forced_mpdu_density; + tcb_desc->ampdu_factor = ht_info->forced_ampdu_factor; break; case HT_AGG_FORCE_DISABLE: @@ -350,32 +350,32 @@ static void rtllib_query_ShortPreambleMode(struct rtllib_device *ieee, static void rtllib_query_HTCapShortGI(struct rtllib_device *ieee, struct cb_desc *tcb_desc) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; tcb_desc->bUseShortGI = false; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) + if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht) return; - if (pHTInfo->forced_short_gi) { + if (ht_info->forced_short_gi) { tcb_desc->bUseShortGI = true; return; } - if (pHTInfo->bCurBW40MHz && pHTInfo->bCurShortGI40MHz) + if (ht_info->bCurBW40MHz && ht_info->bCurShortGI40MHz) tcb_desc->bUseShortGI = true; - else if (!pHTInfo->bCurBW40MHz && pHTInfo->bCurShortGI20MHz) + else if (!ht_info->bCurBW40MHz && ht_info->bCurShortGI20MHz) tcb_desc->bUseShortGI = true; } static void rtllib_query_BandwidthMode(struct rtllib_device *ieee, struct cb_desc *tcb_desc) { - struct rt_hi_throughput *pHTInfo = ieee->pHTInfo; + struct rt_hi_throughput *ht_info = ieee->ht_info; tcb_desc->bPacketBW = false; - if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht) + if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht) return; if (tcb_desc->bMulticast || tcb_desc->bBroadcast) @@ -383,7 +383,7 @@ static void rtllib_query_BandwidthMode(struct rtllib_device *ieee, if ((tcb_desc->data_rate & 0x80) == 0) return; - if (pHTInfo->bCurBW40MHz && pHTInfo->cur_tx_bw40mhz && + if (ht_info->bCurBW40MHz && ht_info->cur_tx_bw40mhz && !ieee->bandwidth_auto_switch.bforced_tx20Mhz) tcb_desc->bPacketBW = true; } @@ -392,7 +392,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, struct cb_desc *tcb_desc, struct sk_buff *skb) { - struct rt_hi_throughput *pHTInfo; + struct rt_hi_throughput *ht_info; tcb_desc->bRTSSTBC = false; tcb_desc->bRTSUseShortGI = false; @@ -418,15 +418,15 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, return; } - pHTInfo = ieee->pHTInfo; + ht_info = ieee->ht_info; while (true) { - if (pHTInfo->iot_action & HT_IOT_ACT_FORCED_CTS2SELF) { + if (ht_info->iot_action & HT_IOT_ACT_FORCED_CTS2SELF) { tcb_desc->bCTSEnable = true; tcb_desc->rts_rate = MGN_24M; tcb_desc->bRTSEnable = true; break; - } else if (pHTInfo->iot_action & (HT_IOT_ACT_FORCED_RTS | + } else if (ht_info->iot_action & (HT_IOT_ACT_FORCED_RTS | HT_IOT_ACT_PURE_N_MODE)) { tcb_desc->bRTSEnable = true; tcb_desc->rts_rate = MGN_24M; @@ -438,12 +438,12 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee, tcb_desc->rts_rate = MGN_24M; break; } - if (pHTInfo->bCurrentHTSupport && pHTInfo->enable_ht) { - u8 HTOpMode = pHTInfo->current_op_mode; + if (ht_info->bCurrentHTSupport && ht_info->enable_ht) { + u8 HTOpMode = ht_info->current_op_mode; - if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 || + if ((ht_info->bCurBW40MHz && (HTOpMode == 2 || HTOpMode == 3)) || - (!pHTInfo->bCurBW40MHz && HTOpMode == 3)) { + (!ht_info->bCurBW40MHz && HTOpMode == 3)) { tcb_desc->rts_rate = MGN_24M; tcb_desc->bRTSEnable = true; break; @@ -882,7 +882,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) tcb_desc->priority = skb->priority; if (ether_type == ETH_P_PAE) { - if (ieee->pHTInfo->iot_action & + if (ieee->ht_info->iot_action & HT_IOT_ACT_WA_IOT_Broadcom) { tcb_desc->data_rate = MgntQuery_TxRateExcludeCCKRates(ieee); @@ -906,7 +906,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev) tcb_desc->data_rate = rtllib_current_rate(ieee); if (bdhcp) { - if (ieee->pHTInfo->iot_action & + if (ieee->ht_info->iot_action & HT_IOT_ACT_WA_IOT_Broadcom) { tcb_desc->data_rate = MgntQuery_TxRateExcludeCCKRates(ieee); From 8610e98f0b48a7f9974cb12c5f501433c4afa958 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 16 Nov 2022 14:50:59 -0600 Subject: [PATCH 1752/4122] PCI: Drop of_match_ptr() to avoid unused variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have stubs for most OF interfaces even when CONFIG_OF is not set, so we allow building of most controller drivers in that case for compile testing. When CONFIG_OF is not set, "of_match_ptr()" compiles to NULL, which leaves unused, resulting in errors like this: $ make W=1 drivers/pci/controller/pci-xgene.c:636:34: error: ‘xgene_pcie_match_table’ defined but not used [-Werror=unused-const-variable=] Drop of_match_ptr() to avoid the unused variable warning. See also 1dff012f636d ("PCI: Drop of_match_ptr() to avoid unused variables"). Link: https://lore.kernel.org/r/20221025191339.667614-2-helgaas@kernel.org Link: https://lore.kernel.org/r/20221116205100.1136224-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-ftpci100.c | 2 +- drivers/pci/controller/pci-v3-semi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c index 0cfd9d5a497c..ecd3009df586 100644 --- a/drivers/pci/controller/pci-ftpci100.c +++ b/drivers/pci/controller/pci-ftpci100.c @@ -553,7 +553,7 @@ static const struct of_device_id faraday_pci_of_match[] = { static struct platform_driver faraday_pci_driver = { .driver = { .name = "ftpci100", - .of_match_table = of_match_ptr(faraday_pci_of_match), + .of_match_table = faraday_pci_of_match, .suppress_bind_attrs = true, }, .probe = faraday_pci_probe, diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c index 784fcf35599c..ca44b0c83d1b 100644 --- a/drivers/pci/controller/pci-v3-semi.c +++ b/drivers/pci/controller/pci-v3-semi.c @@ -901,7 +901,7 @@ static const struct of_device_id v3_pci_of_match[] = { static struct platform_driver v3_pci_driver = { .driver = { .name = "pci-v3-semi", - .of_match_table = of_match_ptr(v3_pci_of_match), + .of_match_table = v3_pci_of_match, .suppress_bind_attrs = true, }, .probe = v3_pci_probe, From 1aff514e1d2bd47854dbbdf867970b9d463d4c57 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 31 Oct 2022 15:43:37 +0800 Subject: [PATCH 1753/4122] HSI: omap_ssi_core: fix possible memory leak in ssi_probe() If ssi_add_controller() returns error, it should call hsi_put_controller() to give up the reference that was set in hsi_alloc_controller(), so that it can call hsi_controller_release() to free controller and ports that allocated in hsi_alloc_controller(). Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver") Signed-off-by: Yang Yingliang Signed-off-by: Sebastian Reichel --- drivers/hsi/controllers/omap_ssi_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index b23a576ed88a..052cf3e92dd6 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -502,8 +502,10 @@ static int ssi_probe(struct platform_device *pd) platform_set_drvdata(pd, ssi); err = ssi_add_controller(ssi, pd); - if (err < 0) + if (err < 0) { + hsi_put_controller(ssi); goto out1; + } pm_runtime_enable(&pd->dev); From 8acbca3a92b859e3dfe0538254acd5bd5b4632b1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 13:56:59 +0200 Subject: [PATCH 1754/4122] headers: Remove some left-over license text in include/uapi/linux/hsi/ Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Signed-off-by: Christophe JAILLET Acked-by: Kai Vehmanen Acked-by: Peter Ujfalusi Signed-off-by: Sebastian Reichel --- include/uapi/linux/hsi/cs-protocol.h | 14 -------------- include/uapi/linux/hsi/hsi_char.h | 14 -------------- 2 files changed, 28 deletions(-) diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h index c7f6e7672cb5..07c3bfb67463 100644 --- a/include/uapi/linux/hsi/cs-protocol.h +++ b/include/uapi/linux/hsi/cs-protocol.h @@ -6,20 +6,6 @@ * * Contact: Kai Vehmanen * Original author: Peter Ujfalusi - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef _CS_PROTOCOL_H diff --git a/include/uapi/linux/hsi/hsi_char.h b/include/uapi/linux/hsi/hsi_char.h index 91623b0398b1..5ef72f0daf94 100644 --- a/include/uapi/linux/hsi/hsi_char.h +++ b/include/uapi/linux/hsi/hsi_char.h @@ -5,20 +5,6 @@ * Copyright (C) 2010 Nokia Corporation. All rights reserved. * * Contact: Andras Domokos - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef __HSI_CHAR_H From 5b79480ce1978864ac3f06f2134dfa3b6691fe74 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 17 Nov 2022 16:32:19 +0800 Subject: [PATCH 1755/4122] power: supply: fix residue sysfs file in error handle route of __power_supply_register() If device_add() succeeds, we should call device_del() when want to get rid of it, so move it into proper jump symbol. Otherwise, when __power_supply_register() returns fail and goto wakeup_init_failed to exit, there is still residue device file in sysfs. When attempt to probe device again, sysfs would complain as below: sysfs: cannot create duplicate filename '/devices/platform/i2c/i2c-0/0-001c/power_supply/adp5061' Call Trace: dump_stack_lvl+0x68/0x85 sysfs_warn_dup.cold+0x1c/0x29 sysfs_create_dir_ns+0x1b1/0x1d0 kobject_add_internal+0x143/0x390 kobject_add+0x108/0x170 Fixes: 80c6463e2fa3 ("power_supply: Fix Oops from NULL pointer dereference from wakeup_source_activate") Signed-off-by: Zeng Heng Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 9035e349bf53..00cb19b46001 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1386,8 +1386,8 @@ create_triggers_failed: register_cooler_failed: psy_unregister_thermal(psy); register_thermal_failed: - device_del(dev); wakeup_init_failed: + device_del(dev); device_add_failed: check_supplies_failed: dev_set_name_failed: From 332d7d0c6dd7db50109d304802e0d1a9d086188f Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 16 Nov 2022 12:05:56 +0200 Subject: [PATCH 1756/4122] power: supply: 88pm860x: simplify using devm Use devm variants for requesting threaded IRQ and for power-supply registration. Clean up error path and remove the .remove-callback. Signed-off-by: Matti Vaittinen Signed-off-by: Sebastian Reichel --- drivers/power/supply/88pm860x_charger.c | 38 ++++++------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/power/supply/88pm860x_charger.c b/drivers/power/supply/88pm860x_charger.c index f21ce52fbc04..2b9fcb7e71d7 100644 --- a/drivers/power/supply/88pm860x_charger.c +++ b/drivers/power/supply/88pm860x_charger.c @@ -690,8 +690,7 @@ static int pm860x_charger_probe(struct platform_device *pdev) (chip->id == CHIP_PM8607) ? chip->companion : chip->client; if (!info->i2c_8606) { dev_err(&pdev->dev, "Missed I2C address of 88PM8606!\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } info->dev = &pdev->dev; @@ -704,44 +703,26 @@ static int pm860x_charger_probe(struct platform_device *pdev) psy_cfg.drv_data = info; psy_cfg.supplied_to = pm860x_supplied_to; psy_cfg.num_supplicants = ARRAY_SIZE(pm860x_supplied_to); - info->usb = power_supply_register(&pdev->dev, &pm860x_charger_desc, - &psy_cfg); + info->usb = devm_power_supply_register(&pdev->dev, &pm860x_charger_desc, + &psy_cfg); if (IS_ERR(info->usb)) { - ret = PTR_ERR(info->usb); - goto out; + return PTR_ERR(info->usb); } pm860x_init_charger(info); for (i = 0; i < ARRAY_SIZE(info->irq); i++) { - ret = request_threaded_irq(info->irq[i], NULL, - pm860x_irq_descs[i].handler, - IRQF_ONESHOT, pm860x_irq_descs[i].name, info); + ret = devm_request_threaded_irq(&pdev->dev, info->irq[i], NULL, + pm860x_irq_descs[i].handler, + IRQF_ONESHOT, + pm860x_irq_descs[i].name, info); if (ret < 0) { dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n", info->irq[i], ret); - goto out_irq; + return ret; } } return 0; - -out_irq: - power_supply_unregister(info->usb); - while (--i >= 0) - free_irq(info->irq[i], info); -out: - return ret; -} - -static int pm860x_charger_remove(struct platform_device *pdev) -{ - struct pm860x_charger_info *info = platform_get_drvdata(pdev); - int i; - - power_supply_unregister(info->usb); - for (i = 0; i < info->irq_nums; i++) - free_irq(info->irq[i], info); - return 0; } static struct platform_driver pm860x_charger_driver = { @@ -749,7 +730,6 @@ static struct platform_driver pm860x_charger_driver = { .name = "88pm860x-charger", }, .probe = pm860x_charger_probe, - .remove = pm860x_charger_remove, }; module_platform_driver(pm860x_charger_driver); From 1c137323e9a2a970b4a5bf8cf3c50e0ea1cefbeb Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:43 +0530 Subject: [PATCH 1757/4122] crypto: vmx: Skip objtool from running on aesp8-ppc.o With objtool enabled, below warnings are seen when trying to build: drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: aes_p8_set_encrypt_key+0x44: unannotated intra-function call drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2448: unannotated intra-function call drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2d68: unannotated intra-function call Skip objtool from running on drivers/crypto/vmx/aesp8-ppc.o file for the following reasons: - Since this file comes from OpenSSL, and since it is a perl file which generates a .S file, it may not be the best choice to make too many code changes to such files, unless absolutely necessary. - As far as the objtool --mcount functionality is concerned, we do not have to run objtool on this file because there are no calls to _mcount(). Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-6-sv@linux.ibm.com --- drivers/crypto/vmx/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index 2560cfea1dec..7b41f0da6807 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -9,3 +9,5 @@ targets += aesp8-ppc.S ghashp8-ppc.S $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perl) + +OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y From 2da37761671b5bdedbe04e6469cfa57cd6b6ae45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:44 +0530 Subject: [PATCH 1758/4122] powerpc/32: Fix objtool unannotated intra-function call warnings Fix several annotations in assembly files on PPC32. [Sathvika Vasireddy: Changed subject line and removed Kconfig change to enable objtool, as it is a part of "objtool/powerpc: Enable objtool to be built on ppc" patch in this series.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Signed-off-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-7-sv@linux.ibm.com --- arch/powerpc/kernel/cpu_setup_6xx.S | 26 ++++++++++++------ arch/powerpc/kernel/cpu_setup_e500.S | 8 ++++-- arch/powerpc/kernel/entry_32.S | 9 ++++-- arch/powerpc/kernel/head_40x.S | 5 +++- arch/powerpc/kernel/head_85xx.S | 5 +++- arch/powerpc/kernel/head_8xx.S | 5 +++- arch/powerpc/kernel/head_book3s_32.S | 29 ++++++++++++++------ arch/powerpc/kernel/swsusp_32.S | 5 +++- arch/powerpc/kvm/fpu.S | 17 ++++++++---- arch/powerpc/platforms/52xx/lite5200_sleep.S | 15 +++++++--- 10 files changed, 89 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8b5ff64b604..f29ce3dd6140 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -4,6 +4,8 @@ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) */ +#include + #include #include #include @@ -81,7 +83,7 @@ _GLOBAL(__setup_cpu_745x) blr /* Enable caches for 603's, 604, 750 & 7400 */ -setup_common_caches: +SYM_FUNC_START_LOCAL(setup_common_caches) mfspr r11,SPRN_HID0 andi. r0,r11,HID0_DCE ori r11,r11,HID0_ICE|HID0_DCE @@ -95,11 +97,12 @@ setup_common_caches: sync isync blr +SYM_FUNC_END(setup_common_caches) /* 604, 604e, 604ev, ... * Enable superscalar execution & branch history table */ -setup_604_hid0: +SYM_FUNC_START_LOCAL(setup_604_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SIED|HID0_BHTE ori r8,r11,HID0_BTCD @@ -110,6 +113,7 @@ setup_604_hid0: sync isync blr +SYM_FUNC_END(setup_604_hid0) /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. @@ -125,13 +129,14 @@ setup_604_hid0: * needed once we have applied workaround #5 (though it's * not set by Apple's firmware at least). */ -setup_7400_workarounds: +SYM_FUNC_START_LOCAL(setup_7400_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0207 ble 1f blr -setup_7410_workarounds: +SYM_FUNC_END(setup_7400_workarounds) +SYM_FUNC_START_LOCAL(setup_7410_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0100 @@ -151,6 +156,7 @@ setup_7410_workarounds: sync isync blr +SYM_FUNC_END(setup_7410_workarounds) /* 740/750/7400/7410 * Enable Store Gathering (SGE), Address Broadcast (ABE), @@ -158,7 +164,7 @@ setup_7410_workarounds: * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) */ -setup_750_7400_hid0: +SYM_FUNC_START_LOCAL(setup_750_7400_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC oris r11,r11,HID0_DPM@h @@ -177,12 +183,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_750_7400_hid0) /* 750cx specific * Looks like we have to disable NAP feature for some PLL settings... * (waiting for confirmation) */ -setup_750cx: +SYM_FUNC_START_LOCAL(setup_750cx) mfspr r10, SPRN_HID1 rlwinm r10,r10,4,28,31 cmpwi cr0,r10,7 @@ -196,11 +203,13 @@ setup_750cx: andc r6,r6,r7 stw r6,CPU_SPEC_FEATURES(r4) blr +SYM_FUNC_END(setup_750cx) /* 750fx specific */ -setup_750fx: +SYM_FUNC_START_LOCAL(setup_750fx) blr +SYM_FUNC_END(setup_750fx) /* MPC 745x * Enable Store Gathering (SGE), Branch Folding (FOLD) @@ -212,7 +221,7 @@ setup_750fx: * Clear Instruction cache throttling (ICTC) * Enable L2 HW prefetch */ -setup_745x_specifics: +SYM_FUNC_START_LOCAL(setup_745x_specifics) /* We check for the presence of an L3 cache setup by * the firmware. If any, we disable NAP capability as * it's known to be bogus on rev 2.1 and earlier @@ -270,6 +279,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_745x_specifics) /* * Initialize the FPU registers. This is needed to work around an errata diff --git a/arch/powerpc/kernel/cpu_setup_e500.S b/arch/powerpc/kernel/cpu_setup_e500.S index 2ab25161b0ad..077cfccc3461 100644 --- a/arch/powerpc/kernel/cpu_setup_e500.S +++ b/arch/powerpc/kernel/cpu_setup_e500.S @@ -8,6 +8,8 @@ * Benjamin Herrenschmidt */ +#include + #include #include #include @@ -274,7 +276,7 @@ _GLOBAL(flush_dcache_L1) blr -has_L2_cache: +SYM_FUNC_START_LOCAL(has_L2_cache) /* skip L2 cache on P2040/P2040E as they have no L2 cache */ mfspr r3, SPRN_SVR /* shift right by 8 bits and clear E bit of SVR */ @@ -290,9 +292,10 @@ has_L2_cache: 1: li r3, 0 blr +SYM_FUNC_END(has_L2_cache) /* flush backside L2 cache */ -flush_backside_L2_cache: +SYM_FUNC_START_LOCAL(flush_backside_L2_cache) mflr r10 bl has_L2_cache mtlr r10 @@ -313,6 +316,7 @@ flush_backside_L2_cache: bne 1b 2: blr +SYM_FUNC_END(flush_backside_L2_cache) _GLOBAL(cpu_down_flush_e500v2) mflr r0 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3fc7c9886bb7..5e0763be1549 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include #include @@ -74,17 +76,18 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */ #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) - .globl __kuep_lock -__kuep_lock: +SYM_FUNC_START(__kuep_lock) lwz r9, THREAD+THSR0(r2) update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_lock) -__kuep_unlock: +SYM_FUNC_START_LOCAL(__kuep_unlock) lwz r9, THREAD+THSR0(r2) rlwinm r9,r9,0,~SR_NX update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_unlock) .macro kuep_lock bl __kuep_lock diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 088f500896c7..9110fe9d6747 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -28,6 +28,8 @@ #include #include #include +#include + #include #include #include @@ -662,7 +664,7 @@ start_here: * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) tlbia /* Invalidate all TLB entries */ isync @@ -711,6 +713,7 @@ initial_mmu: mtspr SPRN_EVPR,r0 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(abort) mfspr r13,SPRN_DBCR0 diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 52c0ab416326..6be3cc36b716 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -29,6 +29,8 @@ #include #include #include +#include + #include #include #include @@ -885,7 +887,7 @@ KernelSPE: * Translate the effec addr in r3 to phys addr. The phys addr will be put * into r3(higher 32bit) and r4(lower 32bit) */ -get_phys_addr: +SYM_FUNC_START_LOCAL(get_phys_addr) mfmsr r8 mfspr r9,SPRN_PID rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ @@ -907,6 +909,7 @@ get_phys_addr: mfspr r3,SPRN_MAS7 #endif blr +SYM_FUNC_END(get_phys_addr) /* * Global functions diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9..c94ed5a08c93 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include #include @@ -625,7 +627,7 @@ start_here: * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ lis r10, MD_TWAM@h @@ -686,6 +688,7 @@ initial_mmu: #endif mtspr SPRN_DER, r8 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 519b60695167..4af12447dc0b 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -18,6 +18,8 @@ #include #include +#include + #include #include #include @@ -877,7 +879,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ -early_hash_table: +SYM_FUNC_START_LOCAL(early_hash_table) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -888,8 +890,9 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 blr +SYM_FUNC_END(early_hash_table) -load_up_mmu: +SYM_FUNC_START_LOCAL(load_up_mmu) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -918,6 +921,7 @@ BEGIN_MMU_FTR_SECTION LOAD_BAT(7,r3,r4,r5) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(load_up_mmu) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ @@ -1028,7 +1032,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * this makes sure it's done. * -- Cort */ -clear_bats: +SYM_FUNC_START_LOCAL(clear_bats) li r10,0 mtspr SPRN_DBAT0U,r10 @@ -1072,6 +1076,7 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_IBAT7L,r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(clear_bats) _GLOBAL(update_bats) lis r4, 1f@h @@ -1108,15 +1113,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtspr SPRN_SRR1, r6 rfi -flush_tlbs: +SYM_FUNC_START_LOCAL(flush_tlbs) lis r10, 0x40 1: addic. r10, r10, -0x1000 tlbie r10 bgt 1b sync blr +SYM_FUNC_END(flush_tlbs) -mmu_off: +SYM_FUNC_START_LOCAL(mmu_off) addi r4, r3, __after_mmu_off - _start mfmsr r3 andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ @@ -1128,9 +1134,10 @@ mmu_off: mtspr SPRN_SRR1,r3 sync rfi +SYM_FUNC_END(mmu_off) /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ -initial_bats: +SYM_FUNC_START_LOCAL(initial_bats) lis r11,PAGE_OFFSET@h tophys(r8,r11) #ifdef CONFIG_SMP @@ -1146,9 +1153,10 @@ initial_bats: mtspr SPRN_IBAT0U,r11 isync blr +SYM_FUNC_END(initial_bats) #ifdef CONFIG_BOOTX_TEXT -setup_disp_bat: +SYM_FUNC_START_LOCAL(setup_disp_bat) /* * setup the display bat prepared for us in prom.c */ @@ -1164,10 +1172,11 @@ setup_disp_bat: mtspr SPRN_DBAT3L,r8 mtspr SPRN_DBAT3U,r11 blr +SYM_FUNC_END(setup_disp_bat) #endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_PPC_EARLY_DEBUG_CPM -setup_cpm_bat: +SYM_FUNC_START_LOCAL(setup_cpm_bat) lis r8, 0xf000 ori r8, r8, 0x002a mtspr SPRN_DBAT1L, r8 @@ -1177,10 +1186,11 @@ setup_cpm_bat: mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_cpm_bat) #endif #ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO -setup_usbgecko_bat: +SYM_FUNC_START_LOCAL(setup_usbgecko_bat) /* prepare a BAT for early io */ #if defined(CONFIG_GAMECUBE) lis r8, 0x0c00 @@ -1199,6 +1209,7 @@ setup_usbgecko_bat: mtspr SPRN_DBAT1L, r8 mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_usbgecko_bat) #endif .data diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index e0cbd63007f2..ffb79326483c 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include + #include #include #include @@ -400,7 +402,7 @@ _ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. */ -turn_on_mmu: +SYM_FUNC_START_LOCAL(turn_on_mmu) mflr r4 mtsrr0 r4 mtsrr1 r3 @@ -408,4 +410,5 @@ turn_on_mmu: isync rfi _ASM_NOKPROBE_SYMBOL(turn_on_mmu) +SYM_FUNC_END(turn_on_mmu) diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 315c94946bad..b68e7f26a81f 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -6,6 +6,8 @@ */ #include +#include + #include #include #include @@ -110,18 +112,22 @@ FPS_THREE_IN(fsel) * R8 = (double*)¶m3 [load_three] * LR = instruction call function */ -fpd_load_three: +SYM_FUNC_START_LOCAL(fpd_load_three) lfd 2,0(r8) /* load param3 */ -fpd_load_two: +SYM_FUNC_START_LOCAL(fpd_load_two) lfd 1,0(r7) /* load param2 */ -fpd_load_one: +SYM_FUNC_START_LOCAL(fpd_load_one) lfd 0,0(r6) /* load param1 */ -fpd_load_none: +SYM_FUNC_START_LOCAL(fpd_load_none) lfd 3,0(r3) /* load up fpscr value */ MTFSF_L(3) lwz r6, 0(r4) /* load cr */ mtcr r6 blr +SYM_FUNC_END(fpd_load_none) +SYM_FUNC_END(fpd_load_one) +SYM_FUNC_END(fpd_load_two) +SYM_FUNC_END(fpd_load_three) /* * End of double instruction processing @@ -131,13 +137,14 @@ fpd_load_none: * R5 = (double*)&result * LR = caller of instruction call function */ -fpd_return: +SYM_FUNC_START_LOCAL(fpd_return) mfcr r6 stfd 0,0(r5) /* save result */ mffs 0 stfd 0,0(r3) /* save new fpscr value */ stw r6,0(r4) /* save new cr value */ blr +SYM_FUNC_END(fpd_return) /* * Double operation with no input operand diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index afee8b1515a8..0b12647e7b42 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include + #include #include #include @@ -178,7 +180,8 @@ sram_code: /* local udelay in sram is needed */ - udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ +SYM_FUNC_START_LOCAL(udelay) + /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ add r12, r13, r12 /* end */ @@ -187,6 +190,7 @@ sram_code: cmp cr0, r13, r12 blt 1b blr +SYM_FUNC_END(udelay) sram_code_end: @@ -271,7 +275,7 @@ _ASM_NOKPROBE_SYMBOL(lite5200_wakeup) SAVE_SR(n+2, addr+2); \ SAVE_SR(n+3, addr+3); -save_regs: +SYM_FUNC_START_LOCAL(save_regs) stw r0, 0(r4) stw r1, 0x4(r4) stw r2, 0x8(r4) @@ -317,6 +321,7 @@ save_regs: SAVE_SPRN(TBRU, 0x5b) blr +SYM_FUNC_END(save_regs) /* restore registers */ @@ -336,7 +341,7 @@ save_regs: LOAD_SR(n+2, addr+2); \ LOAD_SR(n+3, addr+3); -restore_regs: +SYM_FUNC_START_LOCAL(restore_regs) lis r4, registers@h ori r4, r4, registers@l @@ -393,6 +398,7 @@ restore_regs: blr _ASM_NOKPROBE_SYMBOL(restore_regs) +SYM_FUNC_END(restore_regs) @@ -403,7 +409,7 @@ _ASM_NOKPROBE_SYMBOL(restore_regs) * Flush data cache * Do this by just reading lots of stuff into the cache. */ -flush_data_cache: +SYM_FUNC_START_LOCAL(flush_data_cache) lis r3,CONFIG_KERNEL_START@h ori r3,r3,CONFIG_KERNEL_START@l li r4,NUM_CACHE_LINES @@ -413,3 +419,4 @@ flush_data_cache: addi r3,r3,L1_CACHE_BYTES /* Next line, please */ bdnz 1b blr +SYM_FUNC_END(flush_data_cache) From d0160bd5d389da247fb5affb6a35ea393d22fedb Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:45 +0530 Subject: [PATCH 1759/4122] powerpc/vdso: Skip objtool from running on VDSO files Do not run objtool on VDSO files, by using OBJECT_FILES_NON_STANDARD. Suggested-by: Christophe Leroy Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-8-sv@linux.ibm.com --- arch/powerpc/kernel/vdso/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index a2e7b0ce5b19..6a977b0d8ffc 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -102,3 +102,5 @@ quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< + +OBJECT_FILES_NON_STANDARD := y From efb11fdb3e1a9f694fa12b70b21e69e55ec59c36 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:46 +0530 Subject: [PATCH 1760/4122] objtool: Fix SEGFAULT find_insn() will return NULL in case of failure. Check insn in order to avoid a kernel Oops for NULL pointer dereference. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-9-sv@linux.ibm.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43ec14c29a60..8427af808221 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -207,7 +207,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; insn = find_insn(file, func->sec, func->offset); - if (!insn->func) + if (!insn || !insn->func) return false; func_for_each_insn(file, func, insn) { From 0646c28b417b7fe307c9da72ca1c508e43b57dc0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:47 +0530 Subject: [PATCH 1761/4122] objtool: Use target file endianness instead of a compiled constant Some architectures like powerpc support both endianness, it's therefore not possible to fix the endianness via arch/endianness.h because there is no easy way to get the target endianness at build time. Use the endianness recorded in the file objtool is working on. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-10-sv@linux.ibm.com --- .../arch/x86/include/arch/endianness.h | 9 ------ tools/objtool/check.c | 2 +- tools/objtool/include/objtool/endianness.h | 32 +++++++++---------- tools/objtool/orc_dump.c | 11 +++++-- tools/objtool/orc_gen.c | 4 +-- tools/objtool/special.c | 3 +- 6 files changed, 30 insertions(+), 31 deletions(-) delete mode 100644 tools/objtool/arch/x86/include/arch/endianness.h diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h deleted file mode 100644 index 7c362527da20..000000000000 --- a/tools/objtool/arch/x86/include/arch/endianness.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ARCH_ENDIANNESS_H -#define _ARCH_ENDIANNESS_H - -#include - -#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN - -#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8427af808221..ad5dab175701 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2100,7 +2100,7 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfi.cfa.offset = bswap_if_needed(hint->sp_offset); + cfi.cfa.offset = bswap_if_needed(file->elf, hint->sp_offset); cfi.type = hint->type; cfi.end = hint->end; diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h index 10241341eff3..4d2aa9b0fe2f 100644 --- a/tools/objtool/include/objtool/endianness.h +++ b/tools/objtool/include/objtool/endianness.h @@ -2,33 +2,33 @@ #ifndef _OBJTOOL_ENDIANNESS_H #define _OBJTOOL_ENDIANNESS_H -#include #include #include - -#ifndef __TARGET_BYTE_ORDER -#error undefined arch __TARGET_BYTE_ORDER -#endif - -#if __BYTE_ORDER != __TARGET_BYTE_ORDER -#define __NEED_BSWAP 1 -#else -#define __NEED_BSWAP 0 -#endif +#include /* - * Does a byte swap if target endianness doesn't match the host, i.e. cross + * Does a byte swap if target file endianness doesn't match the host, i.e. cross * compilation for little endian on big endian and vice versa. * To be used for multi-byte values conversion, which are read from / about * to be written to a target native endianness ELF file. */ -#define bswap_if_needed(val) \ +static inline bool need_bswap(struct elf *elf) +{ + return (__BYTE_ORDER == __LITTLE_ENDIAN) ^ + (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB); +} + +#define bswap_if_needed(elf, val) \ ({ \ __typeof__(val) __ret; \ + bool __need_bswap = need_bswap(elf); \ switch (sizeof(val)) { \ - case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ - case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ - case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ + case 8: \ + __ret = __need_bswap ? bswap_64(val) : (val); break; \ + case 4: \ + __ret = __need_bswap ? bswap_32(val) : (val); break; \ + case 2: \ + __ret = __need_bswap ? bswap_16(val) : (val); break; \ default: \ BUILD_BUG(); break; \ } \ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index f5a8508c42d6..4f1211fec82c 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -76,6 +76,7 @@ int orc_dump(const char *_objname) GElf_Rela rela; GElf_Sym sym; Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL; + struct elf dummy_elf = {}; objname = _objname; @@ -94,6 +95,12 @@ int orc_dump(const char *_objname) return -1; } + if (!elf64_getehdr(elf)) { + WARN_ELF("elf64_getehdr"); + return -1; + } + memcpy(&dummy_elf.ehdr, elf64_getehdr(elf), sizeof(dummy_elf.ehdr)); + if (elf_getshdrnum(elf, &nr_sections)) { WARN_ELF("elf_getshdrnum"); return -1; @@ -198,11 +205,11 @@ int orc_dump(const char *_objname) printf(" sp:"); - print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset)); + print_reg(orc[i].sp_reg, bswap_if_needed(&dummy_elf, orc[i].sp_offset)); printf(" bp:"); - print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset)); + print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset)); printf(" type:%s end:%d\n", orc_type_name(orc[i].type), orc[i].end); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dd3c64af9db2..1f22b7ebae58 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -97,8 +97,8 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec, /* populate ORC data */ orc = (struct orc_entry *)orc_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); - orc->sp_offset = bswap_if_needed(orc->sp_offset); - orc->bp_offset = bswap_if_needed(orc->bp_offset); + orc->sp_offset = bswap_if_needed(elf, orc->sp_offset); + orc->bp_offset = bswap_if_needed(elf, orc->bp_offset); /* populate reloc for ip */ if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32, diff --git a/tools/objtool/special.c b/tools/objtool/special.c index e2223dd91c37..9c8d827f69af 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -87,7 +87,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (entry->feature) { unsigned short feature; - feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf + + feature = bswap_if_needed(elf, + *(unsigned short *)(sec->data->d_buf + offset + entry->feature)); arch_handle_alternative(feature, alt); From 86ea7f361537f825a699e86fdc9e49be19f128d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:48 +0530 Subject: [PATCH 1762/4122] objtool: Use target file class size instead of a compiled constant In order to allow using objtool on cross-built kernels, determine size of long from elf data instead of using sizeof(long) at build time. For the time being this covers only mcount. [Sathvika Vasireddy: Rename variable "size" to "addrsize" and function "elf_class_size()" to "elf_class_addrsize()", and modify create_mcount_loc_sections() function to follow reverse christmas tree format to order local variable declarations.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-11-sv@linux.ibm.com --- tools/objtool/check.c | 18 ++++++++++-------- tools/objtool/elf.c | 8 ++++++-- tools/objtool/include/objtool/elf.h | 8 ++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ad5dab175701..b64518c7c7b4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -852,9 +852,9 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec; - unsigned long *loc; + int addrsize = elf_class_addrsize(file->elf); struct instruction *insn; + struct section *sec; int idx; sec = find_section_by_name(file->elf, "__mcount_loc"); @@ -871,23 +871,25 @@ static int create_mcount_loc_sections(struct objtool_file *file) list_for_each_entry(insn, &file->mcount_loc_list, call_node) idx++; - sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); + sec = elf_create_section(file->elf, "__mcount_loc", 0, addrsize, idx); if (!sec) return -1; + sec->sh.sh_addralign = addrsize; + idx = 0; list_for_each_entry(insn, &file->mcount_loc_list, call_node) { + void *loc; - loc = (unsigned long *)sec->data->d_buf + idx; - memset(loc, 0, sizeof(unsigned long)); + loc = sec->data->d_buf + idx; + memset(loc, 0, addrsize); - if (elf_add_reloc_to_insn(file->elf, sec, - idx * sizeof(unsigned long), + if (elf_add_reloc_to_insn(file->elf, sec, idx, R_X86_64_64, insn->sec, insn->offset)) return -1; - idx++; + idx += addrsize; } return 0; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7e24b09b1163..33739865735b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -1129,6 +1129,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec { char *relocname; struct section *sec; + int addrsize = elf_class_addrsize(elf); relocname = malloc(strlen(base->name) + strlen(".rela") + 1); if (!relocname) { @@ -1138,7 +1139,10 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); + if (addrsize == sizeof(u32)) + sec = elf_create_section(elf, relocname, 0, sizeof(Elf32_Rela), 0); + else + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; @@ -1147,7 +1151,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec sec->base = base; sec->sh.sh_type = SHT_RELA; - sec->sh.sh_addralign = 8; + sec->sh.sh_addralign = addrsize; sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx; sec->sh.sh_info = base->idx; sec->sh.sh_flags = SHF_INFO_LINK; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 16f4067b82ae..78b3aa2e546d 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -142,6 +142,14 @@ static inline bool has_multiple_files(struct elf *elf) return elf->num_files > 1; } +static inline int elf_class_addrsize(struct elf *elf) +{ + if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32) + return sizeof(u32); + else + return sizeof(u64); +} + struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); From 280981d6994e0700abd36647b141e73059851e66 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:49 +0530 Subject: [PATCH 1763/4122] objtool: Add --mnop as an option to --mcount Some architectures (powerpc) may not support ftrace locations being nop'ed out at build time. Introduce CONFIG_HAVE_OBJTOOL_NOP_MCOUNT for objtool, as a means for architectures to enable nop'ing of ftrace locations. Add --mnop as an option to objtool --mcount, to indicate support for the same. Also, make sure that --mnop can be passed as an option to objtool only when --mcount is passed. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Reviewed-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-12-sv@linux.ibm.com --- Makefile | 4 +++- arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 7 +++++++ scripts/Makefile.lib | 3 +++ tools/objtool/builtin-check.c | 14 ++++++++++++++ tools/objtool/check.c | 19 ++++++++++--------- tools/objtool/include/objtool/builtin.h | 1 + 7 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index d148a55bfd0f..53c2b715d0bf 100644 --- a/Makefile +++ b/Makefile @@ -933,7 +933,9 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_CC endif endif ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL - CC_FLAGS_USING += -DCC_USING_NOP_MCOUNT + ifdef CONFIG_HAVE_OBJTOOL_NOP_MCOUNT + CC_FLAGS_USING += -DCC_USING_NOP_MCOUNT + endif endif ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT ifdef CONFIG_HAVE_C_RECORDMCOUNT diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67745ceab0db..4be7c06a5d18 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -195,6 +195,7 @@ config X86 select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL + select HAVE_OBJTOOL_NOP_MCOUNT if HAVE_OBJTOOL_MCOUNT select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e9e95c790b8e..2b782321376a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,6 +82,13 @@ config HAVE_OBJTOOL_MCOUNT help Arch supports objtool --mcount +config HAVE_OBJTOOL_NOP_MCOUNT + bool + help + Arch supports the objtool options --mcount with --mnop. + An architecture can select this if it wants to enable nop'ing + of ftrace locations. + config HAVE_C_RECORDMCOUNT bool help diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3aa384cec76b..658f541c2782 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -256,6 +256,9 @@ objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount +ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL +objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT) += --mnop +endif objtool-args-$(CONFIG_UNWINDER_ORC) += --orc objtool-args-$(CONFIG_RETPOLINE) += --retpoline objtool-args-$(CONFIG_RETHUNK) += --rethunk diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 24fbe803a0d3..9bd347d3c244 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -82,6 +82,7 @@ const struct option check_options[] = { OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"), OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"), OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"), + OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"), OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"), OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"), OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"), @@ -150,6 +151,16 @@ static bool opts_valid(void) return false; } +static bool mnop_opts_valid(void) +{ + if (opts.mnop && !opts.mcount) { + ERROR("--mnop requires --mcount"); + return false; + } + + return true; +} + static bool link_opts_valid(struct objtool_file *file) { if (opts.link) @@ -198,6 +209,9 @@ int objtool_run(int argc, const char **argv) if (!file) return 1; + if (!mnop_opts_valid()) + return 1; + if (!link_opts_valid(file)) return 1; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b64518c7c7b4..71cf4b4ba1da 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1256,18 +1256,19 @@ static void annotate_call_site(struct objtool_file *file, if (opts.mcount && sym->fentry) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); + if (opts.mnop) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len)); + + insn->type = INSN_NOP; } - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); - - insn->type = INSN_NOP; - list_add_tail(&insn->call_node, &file->mcount_loc_list); return; } diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 42a52f1a0add..0785707c5a92 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -31,6 +31,7 @@ struct opts { bool backup; bool dryrun; bool link; + bool mnop; bool module; bool no_unreachable; bool sec_address; From de6fbcedf5abce4c321eeb15d7d286b79804b8b6 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:50 +0530 Subject: [PATCH 1764/4122] objtool: Read special sections with alts only when specific options are selected Call add_special_section_alts() only when stackval or orc or uaccess or noinstr options are passed to objtool. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-13-sv@linux.ibm.com --- tools/objtool/check.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 71cf4b4ba1da..752a6ffd5c4c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,9 +2392,11 @@ static int decode_sections(struct objtool_file *file) * Must be before add_jump_destinations(), which depends on 'func' * being set for alternatives, to enable proper sibling call detection. */ - ret = add_special_section_alts(file); - if (ret) - return ret; + if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) { + ret = add_special_section_alts(file); + if (ret) + return ret; + } ret = add_jump_destinations(file); if (ret) From c1449735211dd8c4c2d54fa0ece6890ecbd74e24 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:51 +0530 Subject: [PATCH 1765/4122] objtool: Use macros to define arch specific reloc types Make relocation types architecture specific. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-14-sv@linux.ibm.com --- tools/objtool/arch/x86/include/arch/elf.h | 2 ++ tools/objtool/check.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h index 69cc4264b28a..ac14987cf687 100644 --- a/tools/objtool/arch/x86/include/arch/elf.h +++ b/tools/objtool/arch/x86/include/arch/elf.h @@ -2,5 +2,7 @@ #define _OBJTOOL_ARCH_ELF #define R_NONE R_X86_64_NONE +#define R_ABS64 R_X86_64_64 +#define R_ABS32 R_X86_64_32 #endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 752a6ffd5c4c..2d7153b5d5d1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -885,7 +885,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) memset(loc, 0, addrsize); if (elf_add_reloc_to_insn(file->elf, sec, idx, - R_X86_64_64, + addrsize == sizeof(u64) ? R_ABS64 : R_ABS32, insn->sec, insn->offset)) return -1; From 4ca993d498987332ceeedee5380101b84accaf35 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:52 +0530 Subject: [PATCH 1766/4122] objtool: Add arch specific function arch_ftrace_match() Add architecture specific function to look for relocation records pointing to architecture specific symbols. Suggested-by: Christophe Leroy Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-15-sv@linux.ibm.com --- tools/objtool/arch/x86/decode.c | 5 +++++ tools/objtool/check.c | 2 +- tools/objtool/include/objtool/arch.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1c253b4b7ce0..af7ad09c926c 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -23,6 +23,11 @@ #include #include +int arch_ftrace_match(char *name) +{ + return !strcmp(name, "__fentry__"); +} + static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2d7153b5d5d1..7580c66ca5c8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2316,7 +2316,7 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_rethunk(func)) func->return_thunk = true; - if (!strcmp(func->name, "__fentry__")) + if (arch_ftrace_match(func->name)) func->fentry = true; if (is_profiling_func(func->name)) diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index beb2f3aa94ff..5149330f400f 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -69,6 +69,8 @@ struct stack_op { struct instruction; +int arch_ftrace_match(char *name); + void arch_initial_func_cfi_state(struct cfi_init_state *state); int arch_decode_instruction(struct objtool_file *file, const struct section *sec, From e52ec98c5ab18c0710ea22bf52f45e60a725adaf Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:53 +0530 Subject: [PATCH 1767/4122] objtool/powerpc: Enable objtool to be built on ppc This patch adds [stub] implementations for required functions, inorder to enable objtool build on powerpc. [Christophe Leroy: powerpc: Add missing asm/asm.h for objtool, Use local variables for type and imm in arch_decode_instruction(), Adapt len for prefixed instructions.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-16-sv@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/asm.h | 7 ++ tools/objtool/arch/powerpc/Build | 2 + tools/objtool/arch/powerpc/decode.c | 85 +++++++++++++++++++ .../arch/powerpc/include/arch/cfi_regs.h | 11 +++ tools/objtool/arch/powerpc/include/arch/elf.h | 8 ++ .../arch/powerpc/include/arch/special.h | 21 +++++ tools/objtool/arch/powerpc/special.c | 19 +++++ 8 files changed, 154 insertions(+) create mode 100644 arch/powerpc/include/asm/asm.h create mode 100644 tools/objtool/arch/powerpc/Build create mode 100644 tools/objtool/arch/powerpc/decode.c create mode 100644 tools/objtool/arch/powerpc/include/arch/cfi_regs.h create mode 100644 tools/objtool/arch/powerpc/include/arch/elf.h create mode 100644 tools/objtool/arch/powerpc/include/arch/special.h create mode 100644 tools/objtool/arch/powerpc/special.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2f..12e6c16be54e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -238,6 +238,7 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES + select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h new file mode 100644 index 000000000000..86f46b604e9a --- /dev/null +++ b/arch/powerpc/include/asm/asm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_ASM_H +#define _ASM_POWERPC_ASM_H + +#define _ASM_PTR " .long " + +#endif /* _ASM_POWERPC_ASM_H */ diff --git a/tools/objtool/arch/powerpc/Build b/tools/objtool/arch/powerpc/Build new file mode 100644 index 000000000000..d24d5636a5b8 --- /dev/null +++ b/tools/objtool/arch/powerpc/Build @@ -0,0 +1,2 @@ +objtool-y += decode.o +objtool-y += special.o diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c new file mode 100644 index 000000000000..dcd0975cad6b --- /dev/null +++ b/tools/objtool/arch/powerpc/decode.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long arch_dest_reloc_offset(int addend) +{ + return addend; +} + +bool arch_callee_saved_reg(unsigned char reg) +{ + return false; +} + +int arch_decode_hint_reg(u8 sp_reg, int *base) +{ + exit(-1); +} + +const char *arch_nop_insn(int len) +{ + exit(-1); +} + +const char *arch_ret_insn(int len) +{ + exit(-1); +} + +int arch_decode_instruction(struct objtool_file *file, const struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, enum insn_type *type, + unsigned long *immediate, + struct list_head *ops_list) +{ + unsigned int opcode; + enum insn_type typ; + unsigned long imm; + u32 insn; + + insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset)); + opcode = insn >> 26; + typ = INSN_OTHER; + imm = 0; + + if (opcode == 1) + *len = 8; + else + *len = 4; + + *type = typ; + *immediate = imm; + + return 0; +} + +unsigned long arch_jump_destination(struct instruction *insn) +{ + return insn->offset + insn->immediate; +} + +void arch_initial_func_cfi_state(struct cfi_init_state *state) +{ + int i; + + for (i = 0; i < CFI_NUM_REGS; i++) { + state->regs[i].base = CFI_UNDEFINED; + state->regs[i].offset = 0; + } + + /* initial CFA (call frame address) */ + state->cfa.base = CFI_SP; + state->cfa.offset = 0; + + /* initial LR (return address) */ + state->regs[CFI_RA].base = CFI_CFA; + state->regs[CFI_RA].offset = 0; +} diff --git a/tools/objtool/arch/powerpc/include/arch/cfi_regs.h b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h new file mode 100644 index 000000000000..59638ebeafc8 --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_CFI_REGS_H +#define _OBJTOOL_CFI_REGS_H + +#define CFI_BP 1 +#define CFI_SP CFI_BP +#define CFI_RA 32 +#define CFI_NUM_REGS 33 + +#endif diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h new file mode 100644 index 000000000000..3c8ebb7d2a6b --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/elf.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_ARCH_ELF +#define _OBJTOOL_ARCH_ELF + +#define R_NONE R_PPC_NONE + +#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/arch/powerpc/include/arch/special.h b/tools/objtool/arch/powerpc/include/arch/special.h new file mode 100644 index 000000000000..ffef9ada7133 --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/special.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PPC_ARCH_SPECIAL_H +#define _PPC_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 8 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 +#define JUMP_KEY_OFFSET 8 + +#define ALT_ENTRY_SIZE 12 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _PPC_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c new file mode 100644 index 000000000000..d33868147196 --- /dev/null +++ b/tools/objtool/arch/powerpc/special.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include + + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc) +{ + exit(-1); +} + +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn) +{ + exit(-1); +} From c984aef8c8326035570ff6e01d0ff9e79a5dfa76 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:54 +0530 Subject: [PATCH 1768/4122] objtool/powerpc: Add --mcount specific implementation This patch enables objtool --mcount on powerpc, and adds implementation specific to powerpc. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-17-sv@linux.ibm.com --- arch/powerpc/Kconfig | 1 + tools/objtool/arch/powerpc/decode.c | 16 ++++++++++++++++ tools/objtool/arch/powerpc/include/arch/elf.h | 2 ++ 3 files changed, 19 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 12e6c16be54e..9c07068ba5e5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -239,6 +239,7 @@ config PPC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL + select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c index dcd0975cad6b..01cade98b49e 100644 --- a/tools/objtool/arch/powerpc/decode.c +++ b/tools/objtool/arch/powerpc/decode.c @@ -9,6 +9,11 @@ #include #include +int arch_ftrace_match(char *name) +{ + return !strcmp(name, "_mcount"); +} + unsigned long arch_dest_reloc_offset(int addend) { return addend; @@ -50,6 +55,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec typ = INSN_OTHER; imm = 0; + switch (opcode) { + case 18: /* b[l][a] */ + if ((insn & 3) == 1) /* bl */ + typ = INSN_CALL; + + imm = insn & 0x3fffffc; + if (imm & 0x2000000) + imm -= 0x4000000; + break; + } + if (opcode == 1) *len = 8; else diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h index 3c8ebb7d2a6b..73f9ae172fe5 100644 --- a/tools/objtool/arch/powerpc/include/arch/elf.h +++ b/tools/objtool/arch/powerpc/include/arch/elf.h @@ -4,5 +4,7 @@ #define _OBJTOOL_ARCH_ELF #define R_NONE R_PPC_NONE +#define R_ABS64 R_PPC64_ADDR64 +#define R_ABS32 R_PPC_ADDR32 #endif /* _OBJTOOL_ARCH_ELF */ From 16bdbae394280f1d97933d919023eccbf0b564bd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Nov 2022 13:24:55 +0100 Subject: [PATCH 1769/4122] hwrng: core - treat default_quality as a maximum and default to 1024 Most hw_random devices return entropy which is assumed to be of full quality, but driver authors don't bother setting the quality knob. Some hw_random devices return less than full quality entropy, and then driver authors set the quality knob. Therefore, the entropy crediting should be opt-out rather than opt-in per-driver, to reflect the actual reality on the ground. For example, the two Raspberry Pi RNG drivers produce full entropy randomness, and both EDK2 and U-Boot's drivers for these treat them as such. The result is that EFI then uses these numbers and passes the to Linux, and Linux credits them as boot, thereby initializing the RNG. Yet, in Linux, the quality knob was never set to anything, and so on the chance that Linux is booted without EFI, nothing is ever credited. That's annoying. The same pattern appears to repeat itself throughout various drivers. In fact, very very few drivers have bothered setting quality=1024. Looking at the git history of existing drivers and corresponding mailing list discussion, this conclusion tracks. There's been a decent amount of discussion about drivers that set quality < 1024 -- somebody read and interepreted a datasheet, or made some back of the envelope calculation somehow. But there's been very little, if any, discussion about most drivers where the quality is just set to 1024 or unset (or set to 1000 when the authors misunderstood the API and assumed it was base-10 rather than base-2); in both cases the intent was fairly clear of, "this is a hardware random device; it's fine." So let's invert this logic. A hw_random struct's quality knob now controls the maximum quality a driver can produce, or 0 to specify 1024. Then, the module-wide switch called "default_quality" is changed to represent the maximum quality of any driver. By default it's 1024, and the quality of any particular driver is then given by: min(default_quality, rng->quality ?: 1024); This way, the user can still turn this off for weird reasons (and we can replace whatever driver-specific disabling hacks existed in the past), yet we get proper crediting for relevant RNGs. Cc: Dominik Brodowski Cc: Ard Biesheuvel Cc: Herbert Xu Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/um/drivers/random.c | 1 - drivers/char/hw_random/cavium-rng-vf.c | 1 - drivers/char/hw_random/cn10k-rng.c | 1 - drivers/char/hw_random/core.c | 9 +++------ drivers/char/hw_random/mpfs-rng.c | 1 - drivers/char/hw_random/npcm-rng.c | 1 - drivers/char/hw_random/s390-trng.c | 1 - drivers/char/hw_random/timeriomem-rng.c | 2 -- drivers/char/hw_random/virtio-rng.c | 1 - drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c | 1 - drivers/crypto/atmel-sha204a.c | 1 - drivers/crypto/caam/caamrng.c | 1 - drivers/firmware/turris-mox-rwtm.c | 1 - drivers/s390/crypto/zcrypt_api.c | 6 ------ drivers/usb/misc/chaoskey.c | 1 - include/linux/hw_random.h | 2 +- 16 files changed, 4 insertions(+), 27 deletions(-) diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 32b3341fe970..da985e0dc69a 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -82,7 +82,6 @@ static int __init rng_init (void) sigio_broken(random_fd); hwrng.name = RNG_MODULE_NAME; hwrng.read = rng_dev_read; - hwrng.quality = 1024; err = hwrng_register(&hwrng); if (err) { diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c index 7c55f4cf4a8b..c99c54cd99c6 100644 --- a/drivers/char/hw_random/cavium-rng-vf.c +++ b/drivers/char/hw_random/cavium-rng-vf.c @@ -225,7 +225,6 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev, return -ENOMEM; rng->ops.read = cavium_rng_read; - rng->ops.quality = 1000; pci_set_drvdata(pdev, rng); diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c index a01e9307737c..c1193f85982c 100644 --- a/drivers/char/hw_random/cn10k-rng.c +++ b/drivers/char/hw_random/cn10k-rng.c @@ -145,7 +145,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; rng->ops.read = cn10k_rng_read; - rng->ops.quality = 1000; rng->ops.priv = (unsigned long)rng; reset_rng_health_state(rng); diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index cc002b0c2f0c..afde685f5e0a 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -41,14 +41,14 @@ static DEFINE_MUTEX(reading_mutex); static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; -static unsigned short default_quality; /* = 0; default to "off" */ +static unsigned short default_quality = 1024; /* default to maximum */ module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, - "default entropy content of hwrng per 1024 bits of input"); + "default maximum entropy content of hwrng per 1024 bits of input"); static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); @@ -170,10 +170,7 @@ static int hwrng_init(struct hwrng *rng) reinit_completion(&rng->cleanup_done); skip_init: - if (!rng->quality) - rng->quality = default_quality; - if (rng->quality > 1024) - rng->quality = 1024; + rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024); current_quality = rng->quality; /* obsolete */ return 0; diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c index 5813da617a48..c6972734ae62 100644 --- a/drivers/char/hw_random/mpfs-rng.c +++ b/drivers/char/hw_random/mpfs-rng.c @@ -78,7 +78,6 @@ static int mpfs_rng_probe(struct platform_device *pdev) rng_priv->rng.read = mpfs_rng_read; rng_priv->rng.name = pdev->name; - rng_priv->rng.quality = 1024; platform_set_drvdata(pdev, rng_priv); diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c index 5bf7f370f985..9903d0357e06 100644 --- a/drivers/char/hw_random/npcm-rng.c +++ b/drivers/char/hw_random/npcm-rng.c @@ -111,7 +111,6 @@ static int npcm_rng_probe(struct platform_device *pdev) priv->rng.name = pdev->name; priv->rng.read = npcm_rng_read; priv->rng.priv = (unsigned long)&pdev->dev; - priv->rng.quality = 1000; priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev); writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG); diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c index 795853dfc46b..cffa326ddc8d 100644 --- a/drivers/char/hw_random/s390-trng.c +++ b/drivers/char/hw_random/s390-trng.c @@ -191,7 +191,6 @@ static struct hwrng trng_hwrng_dev = { .name = "s390-trng", .data_read = trng_hwrng_data_read, .read = trng_hwrng_read, - .quality = 1024, }; diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 8ea1fc831eb7..26f322d19a88 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -145,8 +145,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev) if (!of_property_read_u32(pdev->dev.of_node, "quality", &i)) priv->rng_ops.quality = i; - else - priv->rng_ops.quality = 0; } else { period = pdata->period; priv->rng_ops.quality = pdata->quality; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a6f3a8a2aca6..f7690e0f92ed 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -148,7 +148,6 @@ static int probe_common(struct virtio_device *vdev) .cleanup = virtio_cleanup, .priv = (unsigned long)vi, .name = vi->name, - .quality = 1000, }; vdev->priv = vi; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c index c4b0a8b58842..e2b9b9104694 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c @@ -108,7 +108,6 @@ int sun8i_ce_hwrng_register(struct sun8i_ce_dev *ce) } ce->trng.name = "sun8i Crypto Engine TRNG"; ce->trng.read = sun8i_ce_trng_read; - ce->trng.quality = 1000; ret = hwrng_register(&ce->trng); if (ret) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index a84b657598c6..c0103e7fc2e7 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -107,7 +107,6 @@ static int atmel_sha204a_probe(struct i2c_client *client, i2c_priv->hwrng.name = dev_name(&client->dev); i2c_priv->hwrng.read = atmel_sha204a_rng_read; - i2c_priv->hwrng.quality = 1024; ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng); if (ret) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 77d048dfe5d0..1f0e82050976 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -246,7 +246,6 @@ int caam_rng_init(struct device *ctrldev) ctx->rng.cleanup = caam_cleanup; ctx->rng.read = caam_read; ctx->rng.priv = (unsigned long)ctx; - ctx->rng.quality = 1024; dev_info(ctrldev, "registering rng-caam\n"); diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c index c2d34dc8ba46..6ea5789a89e2 100644 --- a/drivers/firmware/turris-mox-rwtm.c +++ b/drivers/firmware/turris-mox-rwtm.c @@ -528,7 +528,6 @@ static int turris_mox_rwtm_probe(struct platform_device *pdev) rwtm->hwrng.name = DRIVER_NAME "_hwrng"; rwtm->hwrng.read = mox_hwrng_read; rwtm->hwrng.priv = (unsigned long) rwtm; - rwtm->hwrng.quality = 1024; ret = devm_hwrng_register(dev, &rwtm->hwrng); if (ret < 0) { diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index f94b43ce9a65..4bf36e53fe3e 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -53,10 +53,6 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_req); EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_rep); -static int zcrypt_hwrng_seed = 1; -module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, 0440); -MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on)."); - DEFINE_SPINLOCK(zcrypt_list_lock); LIST_HEAD(zcrypt_card_list); @@ -2063,8 +2059,6 @@ int zcrypt_rng_device_add(void) goto out; } zcrypt_rng_buffer_index = 0; - if (!zcrypt_hwrng_seed) - zcrypt_rng_dev.quality = 0; rc = hwrng_register(&zcrypt_rng_dev); if (rc) goto out_free; diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 87067c3d6109..6fb5140e29b9 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -200,7 +200,6 @@ static int chaoskey_probe(struct usb_interface *interface, dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name; dev->hwrng.read = chaoskey_rng_read; - dev->hwrng.quality = 1024; dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0); if (!dev->hwrng_registered) diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 77c2885c4c13..8a3115516a1b 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -34,7 +34,7 @@ * @priv: Private data, for use by the RNG driver. * @quality: Estimation of true entropy in RNG's bitstream * (in bits of entropy per 1024 bits of input; - * valid values: 1 to 1024, or 0 for unknown). + * valid values: 1 to 1024, or 0 for maximum). */ struct hwrng { const char *name; From 7cdc5e6bcd02ab45d0a2991b35861b448e355276 Mon Sep 17 00:00:00 2001 From: Tomas Marek Date: Tue, 8 Nov 2022 07:42:40 +0100 Subject: [PATCH 1770/4122] hwrng: stm32 - rename readl return value Use a more meaningful name for the readl return value variable. Link: https://lore.kernel.org/all/Y1J3QwynPFIlfrIv@loth.rohan.me.apana.org.au/ Signed-off-by: Tomas Marek Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 366edda4848b..a6731cf0627a 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -49,13 +49,13 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) /* Manage timeout which is based on timer and take */ /* care of initial delay time when enabling rng */ if (!sr && wait) { - int ret; + int err; - ret = readl_relaxed_poll_timeout_atomic(priv->base + err = readl_relaxed_poll_timeout_atomic(priv->base + RNG_SR, sr, sr, 10, 50000); - if (ret) + if (err) dev_err((struct device *)priv->rng.priv, "%s: timeout %x!\n", __func__, sr); } From 4f1c596df706c9aca662b6c214fad84047ae2a97 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 8 Nov 2022 16:29:12 +0800 Subject: [PATCH 1771/4122] crypto: ccree - Remove debugfs when platform_driver_register failed When platform_driver_register failed, we need to remove debugfs, which will caused a resource leak, fix it. Failed logs as follows: [ 32.606488] debugfs: Directory 'ccree' with parent '/' already present! Fixes: 4c3f97276e15 ("crypto: ccree - introduce CryptoCell driver") Signed-off-by: Gaosheng Cui Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index cadead18b59e..d489c6f80892 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -651,9 +651,17 @@ static struct platform_driver ccree_driver = { static int __init ccree_init(void) { + int rc; + cc_debugfs_global_init(); - return platform_driver_register(&ccree_driver); + rc = platform_driver_register(&ccree_driver); + if (rc) { + cc_debugfs_global_fini(); + return rc; + } + + return 0; } module_init(ccree_init); From 824db5cd1ec9b95c254fc317c3999f6b53e98b12 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 10 Nov 2022 18:42:04 +0800 Subject: [PATCH 1772/4122] crypto: arm64 - Fix unused variable compilation warnings of cpu_feature The cpu feature defined by MODULE_DEVICE_TABLE is only referenced when compiling as a module, and the warning of unused variable will be encountered when compiling with intree. The warning can be removed by adding the __maybe_unused flag. Fixes: 03c9a333fef1 ("crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL") Fixes: ae1b83c7d572 ("crypto: arm64/sm4 - add CE implementation for GCM mode") Reported-by: kernel test robot Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/ghash-ce-glue.c | 2 +- arch/arm64/crypto/sm4-ce-gcm-glue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 15794fe21a0b..e5e9adc1fcf4 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -508,7 +508,7 @@ static void __exit ghash_ce_mod_exit(void) crypto_unregister_shash(&ghash_alg); } -static const struct cpu_feature ghash_cpu_feature[] = { +static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = { { cpu_feature(PMULL) }, { } }; MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c index e90ea0f17beb..c450a2025ca9 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-glue.c +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -271,7 +271,7 @@ static void __exit sm4_ce_gcm_exit(void) crypto_unregister_aead(&sm4_gcm_alg); } -static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = { +static const struct cpu_feature __maybe_unused sm4_ce_gcm_cpu_feature[] = { { cpu_feature(PMULL) }, {} }; From 3a58c231172537f7b0e19d93ed33decd04f80eab Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Nov 2022 17:59:17 +0800 Subject: [PATCH 1773/4122] crypto: cryptd - Use request context instead of stack for sub-request cryptd is buggy as it tries to use sync_skcipher without going through the proper sync_skcipher interface. In fact it doesn't even need sync_skcipher since it's already a proper skcipher and can easily access the request context instead of using something off the stack. Fixes: 36b3875a97b8 ("crypto: cryptd - Remove VLA usage of skcipher") Signed-off-by: Herbert Xu --- crypto/cryptd.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 668095eca0fa..ca3a40fc7da9 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -68,11 +68,12 @@ struct aead_instance_ctx { struct cryptd_skcipher_ctx { refcount_t refcnt; - struct crypto_sync_skcipher *child; + struct crypto_skcipher *child; }; struct cryptd_skcipher_request_ctx { crypto_completion_t complete; + struct skcipher_request req; }; struct cryptd_hash_ctx { @@ -227,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); - struct crypto_sync_skcipher *child = ctx->child; + struct crypto_skcipher *child = ctx->child; - crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_sync_skcipher_set_flags(child, - crypto_skcipher_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - return crypto_sync_skcipher_setkey(child, key, keylen); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, + crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + return crypto_skcipher_setkey(child, key, keylen); } static void cryptd_skcipher_complete(struct skcipher_request *req, int err) @@ -258,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base, struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_sync_skcipher *child = ctx->child; - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + struct skcipher_request *subreq = &rctx->req; + struct crypto_skcipher *child = ctx->child; if (unlikely(err == -EINPROGRESS)) goto out; - skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, @@ -286,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base, struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_sync_skcipher *child = ctx->child; - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + struct skcipher_request *subreq = &rctx->req; + struct crypto_skcipher *child = ctx->child; if (unlikely(err == -EINPROGRESS)) goto out; - skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, @@ -343,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctx->child = (struct crypto_sync_skcipher *)cipher; + ctx->child = cipher; crypto_skcipher_set_reqsize( - tfm, sizeof(struct cryptd_skcipher_request_ctx)); + tfm, sizeof(struct cryptd_skcipher_request_ctx) + + crypto_skcipher_reqsize(cipher)); return 0; } @@ -353,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - crypto_free_sync_skcipher(ctx->child); + crypto_free_skcipher(ctx->child); } static void cryptd_skcipher_free(struct skcipher_instance *inst) @@ -931,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); - return &ctx->child->base; + return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_skcipher_child); From cc7710d0d4ebc6998f04035cde4f32c5ddbe9d7f Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 11 Nov 2022 18:00:36 +0800 Subject: [PATCH 1774/4122] crypto: hisilicon/qm - add missing pci_dev_put() in q_num_set() pci_get_device() will increase the reference count for the returned pci_dev. We need to use pci_dev_put() to decrease the reference count before q_num_set() returns. Fixes: c8b4b477079d ("crypto: hisilicon - add HiSilicon HPRE accelerator") Signed-off-by: Xiongfeng Wang Reviewed-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index e230c7c46110..c3618255b150 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -384,14 +384,14 @@ struct hisi_qp { static inline int q_num_set(const char *val, const struct kernel_param *kp, unsigned int device) { - struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, - device, NULL); + struct pci_dev *pdev; u32 n, q_num; int ret; if (!val) return -EINVAL; + pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); if (!pdev) { q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); pr_info("No device found currently, suppose queue number is %u\n", @@ -401,6 +401,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp, q_num = QM_QNUM_V1; else q_num = QM_QNUM_V2; + + pci_dev_put(pdev); } ret = kstrtou32(val, 10, &n); From e6cb02bd0a52457e486a752da5db7b67f2540c16 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Nov 2022 18:05:41 +0800 Subject: [PATCH 1775/4122] crypto: skcipher - Allow sync algorithms with large request contexts Some sync algorithms may require a large amount of temporary space during its operations. There is no reason why they should be limited just because some legacy users want to place all temporary data on the stack. Such algorithms can now set a flag to indicate that they need extra request context, which will cause them to be invisible to users that go through the sync_skcipher interface. Signed-off-by: Herbert Xu --- crypto/skcipher.c | 2 +- include/crypto/internal/skcipher.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 418211180cee..0ecab31cfe79 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -763,7 +763,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher( struct crypto_skcipher *tfm; /* Only sync algorithms allowed. */ - mask |= CRYPTO_ALG_ASYNC; + mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE; tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a2339f80a615..2a97540156bb 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -14,6 +14,14 @@ #include #include +/* + * Set this if your algorithm is sync but needs a reqsize larger + * than MAX_SYNC_SKCIPHER_REQSIZE. + * + * Reuse bit that is specific to hash algorithms. + */ +#define CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE CRYPTO_ALG_OPTIONAL_KEY + struct aead_request; struct rtattr; From 7bbbc9d81be588ae4fb28b5b202e4421dbfef197 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 12 Nov 2022 02:12:50 +0000 Subject: [PATCH 1776/4122] crypto: hisilicon/qm - delete redundant null assignment operations There is no security data in the pointer. It is only a value transferred as a structure. It makes no sense to zero a variable that is on the stack. So not need to set the pointer to null. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 363a02810a16..849dc80a7118 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1773,7 +1773,6 @@ static void dfx_regs_uninit(struct hisi_qm *qm, dregs[i].regs = NULL; } kfree(dregs); - dregs = NULL; } /** From b40b62ed7b0ffe8eb2e6fe8bcfb47027c9a93e93 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 12 Nov 2022 02:12:51 +0000 Subject: [PATCH 1777/4122] crypto: hisilicon/qm - modify the process of regs dfx The last register logic and different register logic are combined. Use "u32" instead of 'int' in the regs function input parameter to simplify some checks. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 7 +- drivers/crypto/hisilicon/qm.c | 177 ++++++++++++---------- drivers/crypto/hisilicon/sec2/sec_main.c | 7 +- drivers/crypto/hisilicon/zip/zip_main.c | 7 +- include/linux/hisi_acc_qm.h | 8 +- 5 files changed, 112 insertions(+), 94 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index baf1faec7046..923f9c279265 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -1101,8 +1101,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; - ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs, - ARRAY_SIZE(hpre_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs)); if (ret) { dev_warn(dev, "Failed to init HPRE diff regs!\n"); goto debugfs_remove; @@ -1121,7 +1120,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); return ret; @@ -1129,7 +1128,7 @@ debugfs_remove: static void hpre_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 849dc80a7118..441466df7c6d 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1722,8 +1722,21 @@ static int qm_regs_show(struct seq_file *s, void *unused) DEFINE_SHOW_ATTRIBUTE(qm_regs); +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); +} + static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, - const struct dfx_diff_registers *cregs, int reg_len) + const struct dfx_diff_registers *cregs, u32 reg_len) { struct dfx_diff_registers *diff_regs; u32 j, base_offset; @@ -1762,64 +1775,107 @@ alloc_error: return ERR_PTR(-ENOMEM); } -static void dfx_regs_uninit(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len) +static int qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) { - int i; - - /* Setting the pointer is NULL to prevent double free */ - for (i = 0; i < reg_len; i++) { - kfree(dregs[i].regs); - dregs[i].regs = NULL; - } - kfree(dregs); -} - -/** - * hisi_qm_diff_regs_init() - Allocate memory for registers. - * @qm: device qm handle. - * @dregs: diff registers handle. - * @reg_len: diff registers region length. - */ -int hisi_qm_diff_regs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len) -{ - if (!qm || !dregs || reg_len <= 0) - return -EINVAL; - - if (qm->fun_type != QM_HW_PF) - return 0; - - qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); if (IS_ERR(qm->debug.qm_diff_regs)) return PTR_ERR(qm->debug.qm_diff_regs); qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); if (IS_ERR(qm->debug.acc_diff_regs)) { - dfx_regs_uninit(qm, qm->debug.qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); return PTR_ERR(qm->debug.acc_diff_regs); } return 0; } -EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init); -/** - * hisi_qm_diff_regs_uninit() - Free memory for registers. - * @qm: device qm handle. - * @reg_len: diff registers region length. - */ -void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len) +static void qm_last_regs_uninit(struct hisi_qm *qm) { - if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF) + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) return; + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + +static int qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return 0; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->qm_last_words) + return -ENOMEM; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } + + return 0; +} + +static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) +{ dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); } -EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); + +/** + * hisi_qm_regs_debugfs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) +{ + int ret; + + if (!qm || !dregs) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + ret = qm_last_regs_init(qm); + if (ret) { + dev_info(&qm->pdev->dev, "failed to init qm words memory!\n"); + return ret; + } + + ret = qm_diff_regs_init(qm, dregs, reg_len); + if (ret) { + qm_last_regs_uninit(qm); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init); + +/** + * hisi_qm_regs_debugfs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len) +{ + if (!qm || qm->fun_type != QM_HW_PF) + return; + + qm_diff_regs_uninit(qm, reg_len); + qm_last_regs_uninit(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit); /** * hisi_qm_acc_diff_regs_dump() - Dump registers's value. @@ -1829,12 +1885,12 @@ EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); * @regs_len: diff registers region length. */ void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, - struct dfx_diff_registers *dregs, int regs_len) + struct dfx_diff_registers *dregs, u32 regs_len) { u32 j, val, base_offset; int i, ret; - if (!qm || !s || !dregs || regs_len <= 0) + if (!qm || !s || !dregs) return; ret = hisi_qm_get_dfx_access(qm); @@ -3719,17 +3775,6 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state) writel(state, qm->io_base + QM_VF_STATE); } -static void qm_last_regs_uninit(struct hisi_qm *qm) -{ - struct qm_debug *debug = &qm->debug; - - if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) - return; - - kfree(debug->qm_last_words); - debug->qm_last_words = NULL; -} - static void hisi_qm_unint_work(struct hisi_qm *qm) { destroy_workqueue(qm->wq); @@ -3760,8 +3805,6 @@ static void hisi_qm_memory_uninit(struct hisi_qm *qm) */ void hisi_qm_uninit(struct hisi_qm *qm) { - qm_last_regs_uninit(qm); - qm_cmd_uninit(qm); hisi_qm_unint_work(qm); down_write(&qm->qps_lock); @@ -6339,26 +6382,6 @@ err_destroy_idr: return ret; } -static void qm_last_regs_init(struct hisi_qm *qm) -{ - int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); - struct qm_debug *debug = &qm->debug; - int i; - - if (qm->fun_type == QM_HW_VF) - return; - - debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), - GFP_KERNEL); - if (!debug->qm_last_words) - return; - - for (i = 0; i < dfx_regs_num; i++) { - debug->qm_last_words[i] = readl_relaxed(qm->io_base + - qm_dfx_regs[i].offset); - } -} - /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. @@ -6407,8 +6430,6 @@ int hisi_qm_init(struct hisi_qm *qm) qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); - qm_last_regs_init(qm); - return 0; err_free_qm_memory: diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 6eb8a16ba0a7..4e24735d95ba 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -899,8 +899,7 @@ static int sec_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_diff_regs_init(qm, sec_diff_regs, - ARRAY_SIZE(sec_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs)); if (ret) { dev_warn(dev, "Failed to init SEC diff regs!\n"); goto debugfs_remove; @@ -915,7 +914,7 @@ static int sec_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove: debugfs_remove_recursive(sec_debugfs_root); return ret; @@ -923,7 +922,7 @@ debugfs_remove: static void sec_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); } diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index c863435e8c75..1549bec3aea5 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -849,8 +849,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; qm->debug.debug_root = dev_d; - ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs, - ARRAY_SIZE(hzip_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs)); if (ret) { dev_warn(dev, "Failed to init ZIP diff regs!\n"); goto debugfs_remove; @@ -869,7 +868,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove: debugfs_remove_recursive(hzip_debugfs_root); return ret; @@ -895,7 +894,7 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index c3618255b150..be3aedaa96dc 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -471,11 +471,11 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); -int hisi_qm_diff_regs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len); -void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len); +int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len); +void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len); void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, - struct dfx_diff_registers *dregs, int regs_len); + struct dfx_diff_registers *dregs, u32 regs_len); pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, pci_channel_state_t state); From 94476b2b6d60bc926a585ae62e1bf69bd22c1dff Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 12 Nov 2022 02:12:52 +0000 Subject: [PATCH 1778/4122] crypto: hisilicon/qm - split a debugfs.c from qm Considering that the qm feature and debugfs feature are independent. The code related to debugfs is getting larger and larger. It should be separate as a debugfs file. So move some debugfs code to new file from qm file. The qm code logic is not modified. And maintainability is enhanced. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/Makefile | 2 +- drivers/crypto/hisilicon/debugfs.c | 1097 ++++++++++++++++++++++++ drivers/crypto/hisilicon/qm.c | 1178 +------------------------- drivers/crypto/hisilicon/qm_common.h | 87 ++ 4 files changed, 1192 insertions(+), 1172 deletions(-) create mode 100644 drivers/crypto/hisilicon/debugfs.c create mode 100644 drivers/crypto/hisilicon/qm_common.h diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile index 1e89269a2e4b..8595a5a5d228 100644 --- a/drivers/crypto/hisilicon/Makefile +++ b/drivers/crypto/hisilicon/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hpre/ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/ obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o -hisi_qm-objs = qm.o sgl.o +hisi_qm-objs = qm.o sgl.o debugfs.o obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/ obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += trng/ diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c new file mode 100644 index 000000000000..13bec8b2d723 --- /dev/null +++ b/drivers/crypto/hisilicon/debugfs.c @@ -0,0 +1,1097 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 HiSilicon Limited. */ +#include +#include "qm_common.h" + +#define QM_DFX_BASE 0x0100000 +#define QM_DFX_STATE1 0x0104000 +#define QM_DFX_STATE2 0x01040C8 +#define QM_DFX_COMMON 0x0000 +#define QM_DFX_BASE_LEN 0x5A +#define QM_DFX_STATE1_LEN 0x2E +#define QM_DFX_STATE2_LEN 0x11 +#define QM_DFX_COMMON_LEN 0xC3 +#define QM_DFX_REGS_LEN 4UL +#define QM_DBG_TMP_BUF_LEN 22 +#define CURRENT_FUN_MASK GENMASK(5, 0) +#define CURRENT_Q_MASK GENMASK(31, 16) +#define QM_SQE_ADDR_MASK GENMASK(7, 0) + +#define QM_DFX_MB_CNT_VF 0x104010 +#define QM_DFX_DB_CNT_VF 0x104020 +#define QM_DFX_SQE_CNT_VF_SQN 0x104030 +#define QM_DFX_CQE_CNT_VF_CQN 0x104040 +#define QM_DFX_QN_SHIFT 16 +#define QM_DFX_CNT_CLR_CE 0x100118 +#define QM_DBG_WRITE_LEN 1024 + +static const char * const qm_debug_file_name[] = { + [CURRENT_QM] = "current_qm", + [CURRENT_Q] = "current_q", + [CLEAR_ENABLE] = "clear_enable", +}; + +struct qm_dfx_item { + const char *name; + u32 offset; +}; + +static struct qm_dfx_item qm_dfx_files[] = { + {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)}, + {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)}, + {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)}, + {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)}, + {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)}, +}; + +#define CNT_CYC_REGS_NUM 10 +static const struct debugfs_reg32 qm_dfx_regs[] = { + /* XXX_CNT are reading clear register */ + {"QM_ECC_1BIT_CNT ", 0x104000ull}, + {"QM_ECC_MBIT_CNT ", 0x104008ull}, + {"QM_DFX_MB_CNT ", 0x104018ull}, + {"QM_DFX_DB_CNT ", 0x104028ull}, + {"QM_DFX_SQE_CNT ", 0x104038ull}, + {"QM_DFX_CQE_CNT ", 0x104048ull}, + {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull}, + {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull}, + {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull}, + {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull}, + {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, + {"QM_ECC_1BIT_INF ", 0x104004ull}, + {"QM_ECC_MBIT_INF ", 0x10400cull}, + {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull}, + {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull}, + {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull}, + {"QM_DFX_FF_ST0 ", 0x1040c8ull}, + {"QM_DFX_FF_ST1 ", 0x1040ccull}, + {"QM_DFX_FF_ST2 ", 0x1040d0ull}, + {"QM_DFX_FF_ST3 ", 0x1040d4ull}, + {"QM_DFX_FF_ST4 ", 0x1040d8ull}, + {"QM_DFX_FF_ST5 ", 0x1040dcull}, + {"QM_DFX_FF_ST6 ", 0x1040e0ull}, + {"QM_IN_IDLE_ST ", 0x1040e4ull}, +}; + +static const struct debugfs_reg32 qm_vf_dfx_regs[] = { + {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, +}; + +/* define the QM's dfx regs region and region length */ +static struct dfx_diff_registers qm_diff_regs[] = { + { + .reg_offset = QM_DFX_BASE, + .reg_len = QM_DFX_BASE_LEN, + }, { + .reg_offset = QM_DFX_STATE1, + .reg_len = QM_DFX_STATE1_LEN, + }, { + .reg_offset = QM_DFX_STATE2, + .reg_len = QM_DFX_STATE2_LEN, + }, { + .reg_offset = QM_DFX_COMMON, + .reg_len = QM_DFX_COMMON_LEN, + }, +}; + +static struct hisi_qm *file_to_qm(struct debugfs_file *file) +{ + struct qm_debug *debug = file->debug; + + return container_of(debug, struct hisi_qm, debug); +} + +static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, + size_t count, loff_t *pos) +{ + char buf[QM_DBG_READ_LEN]; + int len; + + len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", + "Please echo help to cmd to get help information"); + + return simple_read_from_buffer(buffer, count, pos, buf, len); +} + +static void dump_show(struct hisi_qm *qm, void *info, + unsigned int info_size, char *info_name) +{ + struct device *dev = &qm->pdev->dev; + u8 *info_curr = info; + u32 i; +#define BYTE_PER_DW 4 + + dev_info(dev, "%s DUMP\n", info_name); + for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) { + pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW, + *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr)); + } +} + +static int qm_sqc_dump(struct hisi_qm *qm, const char *s) +{ + struct device *dev = &qm->pdev->dev; + struct qm_sqc *sqc, *sqc_curr; + dma_addr_t sqc_dma; + u32 qp_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &qp_id); + if (ret || qp_id >= qm->qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); + return -EINVAL; + } + + sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma); + if (IS_ERR(sqc)) + return PTR_ERR(sqc); + + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1); + if (ret) { + down_read(&qm->qps_lock); + if (qm->sqc) { + sqc_curr = qm->sqc + qp_id; + + dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC"); + } + up_read(&qm->qps_lock); + + goto free_ctx; + } + + dump_show(qm, sqc, sizeof(*sqc), "SQC"); + +free_ctx: + hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); + return 0; +} + +static int qm_cqc_dump(struct hisi_qm *qm, const char *s) +{ + struct device *dev = &qm->pdev->dev; + struct qm_cqc *cqc, *cqc_curr; + dma_addr_t cqc_dma; + u32 qp_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &qp_id); + if (ret || qp_id >= qm->qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); + return -EINVAL; + } + + cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma); + if (IS_ERR(cqc)) + return PTR_ERR(cqc); + + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1); + if (ret) { + down_read(&qm->qps_lock); + if (qm->cqc) { + cqc_curr = qm->cqc + qp_id; + + dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC"); + } + up_read(&qm->qps_lock); + + goto free_ctx; + } + + dump_show(qm, cqc, sizeof(*cqc), "CQC"); + +free_ctx: + hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); + return 0; +} + +static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, + int cmd, char *name) +{ + struct device *dev = &qm->pdev->dev; + dma_addr_t xeqc_dma; + void *xeqc; + int ret; + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma); + if (IS_ERR(xeqc)) + return PTR_ERR(xeqc); + + ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); + if (ret) + goto err_free_ctx; + + dump_show(qm, xeqc, size, name); + +err_free_ctx: + hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma); + return ret; +} + +static int q_dump_param_parse(struct hisi_qm *qm, char *s, + u32 *e_id, u32 *q_id, u16 q_depth) +{ + struct device *dev = &qm->pdev->dev; + unsigned int qp_num = qm->qp_num; + char *presult; + int ret; + + presult = strsep(&s, " "); + if (!presult) { + dev_err(dev, "Please input qp number!\n"); + return -EINVAL; + } + + ret = kstrtou32(presult, 0, q_id); + if (ret || *q_id >= qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qp_num - 1); + return -EINVAL; + } + + presult = strsep(&s, " "); + if (!presult) { + dev_err(dev, "Please input sqe number!\n"); + return -EINVAL; + } + + ret = kstrtou32(presult, 0, e_id); + if (ret || *e_id >= q_depth) { + dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1); + return -EINVAL; + } + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + return 0; +} + +static int qm_sq_dump(struct hisi_qm *qm, char *s) +{ + u16 sq_depth = qm->qp_array->cq_depth; + void *sqe, *sqe_curr; + struct hisi_qp *qp; + u32 qp_id, sqe_id; + int ret; + + ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); + if (ret) + return ret; + + sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); + if (!sqe) + return -ENOMEM; + + qp = &qm->qp_array[qp_id]; + memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); + sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); + memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, + qm->debug.sqe_mask_len); + + dump_show(qm, sqe_curr, qm->sqe_size, "SQE"); + + kfree(sqe); + + return 0; +} + +static int qm_cq_dump(struct hisi_qm *qm, char *s) +{ + struct qm_cqe *cqe_curr; + struct hisi_qp *qp; + u32 qp_id, cqe_id; + int ret; + + ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth); + if (ret) + return ret; + + qp = &qm->qp_array[qp_id]; + cqe_curr = qp->cqe + cqe_id; + dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE"); + + return 0; +} + +static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s, + size_t size, char *name) +{ + struct device *dev = &qm->pdev->dev; + void *xeqe; + u32 xeqe_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &xeqe_id); + if (ret) + return -EINVAL; + + if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) { + dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1); + return -EINVAL; + } else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) { + dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1); + return -EINVAL; + } + + down_read(&qm->qps_lock); + + if (qm->eqe && !strcmp(name, "EQE")) { + xeqe = qm->eqe + xeqe_id; + } else if (qm->aeqe && !strcmp(name, "AEQE")) { + xeqe = qm->aeqe + xeqe_id; + } else { + ret = -EINVAL; + goto err_unlock; + } + + dump_show(qm, xeqe, size, name); + +err_unlock: + up_read(&qm->qps_lock); + return ret; +} + +static int qm_dbg_help(struct hisi_qm *qm, char *s) +{ + struct device *dev = &qm->pdev->dev; + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + dev_info(dev, "available commands:\n"); + dev_info(dev, "sqc \n"); + dev_info(dev, "cqc \n"); + dev_info(dev, "eqc\n"); + dev_info(dev, "aeqc\n"); + dev_info(dev, "sq \n"); + dev_info(dev, "cq \n"); + dev_info(dev, "eq \n"); + dev_info(dev, "aeq \n"); + + return 0; +} + +static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) +{ + struct device *dev = &qm->pdev->dev; + char *presult, *s, *s_tmp; + int ret; + + s = kstrdup(cmd_buf, GFP_KERNEL); + if (!s) + return -ENOMEM; + + s_tmp = s; + presult = strsep(&s, " "); + if (!presult) { + ret = -EINVAL; + goto err_buffer_free; + } + + if (!strcmp(presult, "sqc")) + ret = qm_sqc_dump(qm, s); + else if (!strcmp(presult, "cqc")) + ret = qm_cqc_dump(qm, s); + else if (!strcmp(presult, "eqc")) + ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc), + QM_MB_CMD_EQC, "EQC"); + else if (!strcmp(presult, "aeqc")) + ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc), + QM_MB_CMD_AEQC, "AEQC"); + else if (!strcmp(presult, "sq")) + ret = qm_sq_dump(qm, s); + else if (!strcmp(presult, "cq")) + ret = qm_cq_dump(qm, s); + else if (!strcmp(presult, "eq")) + ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE"); + else if (!strcmp(presult, "aeq")) + ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE"); + else if (!strcmp(presult, "help")) + ret = qm_dbg_help(qm, s); + else + ret = -EINVAL; + + if (ret) + dev_info(dev, "Please echo help\n"); + +err_buffer_free: + kfree(s_tmp); + + return ret; +} + +static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char *cmd_buf, *cmd_buf_tmp; + int ret; + + if (*pos) + return 0; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + /* Judge if the instance is being reset. */ + if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) { + ret = 0; + goto put_dfx_access; + } + + if (count > QM_DBG_WRITE_LEN) { + ret = -ENOSPC; + goto put_dfx_access; + } + + cmd_buf = memdup_user_nul(buffer, count); + if (IS_ERR(cmd_buf)) { + ret = PTR_ERR(cmd_buf); + goto put_dfx_access; + } + + cmd_buf_tmp = strchr(cmd_buf, '\n'); + if (cmd_buf_tmp) { + *cmd_buf_tmp = '\0'; + count = cmd_buf_tmp - cmd_buf + 1; + } + + ret = qm_cmd_write_dump(qm, cmd_buf); + if (ret) { + kfree(cmd_buf); + goto put_dfx_access; + } + + kfree(cmd_buf); + + ret = count; + +put_dfx_access: + hisi_qm_put_dfx_access(qm); + return ret; +} + +static const struct file_operations qm_cmd_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_cmd_read, + .write = qm_cmd_write, +}; + +/** + * hisi_qm_regs_dump() - Dump registers's value. + * @s: debugfs file handle. + * @regset: accelerator registers information. + * + * Dump accelerator registers. + */ +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) +{ + struct pci_dev *pdev = to_pci_dev(regset->dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + const struct debugfs_reg32 *regs = regset->regs; + int regs_len = regset->nregs; + int i, ret; + u32 val; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + for (i = 0; i < regs_len; i++) { + val = readl(regset->base + regs[i].offset); + seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); + } + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); + +static int qm_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + struct debugfs_regset32 regset; + + if (qm->fun_type == QM_HW_PF) { + regset.regs = qm_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_dfx_regs); + } else { + regset.regs = qm_vf_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); + } + + regset.base = qm->io_base; + regset.dev = &qm->pdev->dev; + + hisi_qm_regs_dump(s, ®set); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(qm_regs); + +static u32 current_q_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; +} + +static int current_q_write(struct hisi_qm *qm, u32 val) +{ + u32 tmp; + + if (val >= qm->debug.curr_qm_qp_num) + return -EINVAL; + + tmp = val << QM_DFX_QN_SHIFT | + (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK); + writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + + tmp = val << QM_DFX_QN_SHIFT | + (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK); + writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + return 0; +} + +static u32 clear_enable_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_CNT_CLR_CE); +} + +/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ +static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) +{ + if (rd_clr_ctrl > 1) + return -EINVAL; + + writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE); + + return 0; +} + +static u32 current_qm_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_MB_CNT_VF); +} + +static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num) +{ + u32 remain_q_num, vfq_num; + u32 num_vfs = qm->vfs_num; + + vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs; + if (vfq_num >= qm->max_qp_num) + return qm->max_qp_num; + + remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs; + if (vfq_num + remain_q_num <= qm->max_qp_num) + return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num; + + /* + * if vfq_num + remain_q_num > max_qp_num, the last VFs, + * each with one more queue. + */ + return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num; +} + +static int current_qm_write(struct hisi_qm *qm, u32 val) +{ + u32 tmp; + + if (val > qm->vfs_num) + return -EINVAL; + + /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */ + if (!val) + qm->debug.curr_qm_qp_num = qm->qp_num; + else + qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val); + + writel(val, qm->io_base + QM_DFX_MB_CNT_VF); + writel(val, qm->io_base + QM_DFX_DB_CNT_VF); + + tmp = val | + (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK); + writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + + tmp = val | + (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK); + writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + return 0; +} + +static ssize_t qm_debug_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct debugfs_file *file = filp->private_data; + enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); + char tbuf[QM_DBG_TMP_BUF_LEN]; + u32 val; + int ret; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + mutex_lock(&file->lock); + switch (index) { + case CURRENT_QM: + val = current_qm_read(qm); + break; + case CURRENT_Q: + val = current_q_read(qm); + break; + case CLEAR_ENABLE: + val = clear_enable_read(qm); + break; + default: + goto err_input; + } + mutex_unlock(&file->lock); + + hisi_qm_put_dfx_access(qm); + ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; +} + +static ssize_t qm_debug_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct debugfs_file *file = filp->private_data; + enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); + unsigned long val; + char tbuf[QM_DBG_TMP_BUF_LEN]; + int len, ret; + + if (*pos != 0) + return 0; + + if (count >= QM_DBG_TMP_BUF_LEN) + return -ENOSPC; + + len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf, + count); + if (len < 0) + return len; + + tbuf[len] = '\0'; + if (kstrtoul(tbuf, 0, &val)) + return -EFAULT; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + mutex_lock(&file->lock); + switch (index) { + case CURRENT_QM: + ret = current_qm_write(qm, val); + break; + case CURRENT_Q: + ret = current_q_write(qm, val); + break; + case CLEAR_ENABLE: + ret = clear_enable_write(qm, val); + break; + default: + ret = -EINVAL; + } + mutex_unlock(&file->lock); + + hisi_qm_put_dfx_access(qm); + + if (ret) + return ret; + + return count; +} + +static const struct file_operations qm_debug_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_debug_read, + .write = qm_debug_write, +}; + +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); +} + +static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, + const struct dfx_diff_registers *cregs, u32 reg_len) +{ + struct dfx_diff_registers *diff_regs; + u32 j, base_offset; + int i; + + diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); + if (!diff_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < reg_len; i++) { + if (!cregs[i].reg_len) + continue; + + diff_regs[i].reg_offset = cregs[i].reg_offset; + diff_regs[i].reg_len = cregs[i].reg_len; + diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, + GFP_KERNEL); + if (!diff_regs[i].regs) + goto alloc_error; + + for (j = 0; j < diff_regs[i].reg_len; j++) { + base_offset = diff_regs[i].reg_offset + + j * QM_DFX_REGS_LEN; + diff_regs[i].regs[j] = readl(qm->io_base + base_offset); + } + } + + return diff_regs; + +alloc_error: + while (i > 0) { + i--; + kfree(diff_regs[i].regs); + } + kfree(diff_regs); + return ERR_PTR(-ENOMEM); +} + +static int qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) +{ + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + if (IS_ERR(qm->debug.qm_diff_regs)) + return PTR_ERR(qm->debug.qm_diff_regs); + + qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); + if (IS_ERR(qm->debug.acc_diff_regs)) { + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + return PTR_ERR(qm->debug.acc_diff_regs); + } + + return 0; +} + +static void qm_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + +static int qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return 0; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->qm_last_words) + return -ENOMEM; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } + + return 0; +} + +static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) +{ + dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); +} + +/** + * hisi_qm_regs_debugfs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) +{ + int ret; + + if (!qm || !dregs) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + ret = qm_last_regs_init(qm); + if (ret) { + dev_info(&qm->pdev->dev, "failed to init qm words memory!\n"); + return ret; + } + + ret = qm_diff_regs_init(qm, dregs, reg_len); + if (ret) { + qm_last_regs_uninit(qm); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init); + +/** + * hisi_qm_regs_debugfs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len) +{ + if (!qm || qm->fun_type != QM_HW_PF) + return; + + qm_diff_regs_uninit(qm, reg_len); + qm_last_regs_uninit(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit); + +/** + * hisi_qm_acc_diff_regs_dump() - Dump registers's value. + * @qm: device qm handle. + * @s: Debugfs file handle. + * @dregs: diff registers handle. + * @regs_len: diff registers region length. + */ +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, u32 regs_len) +{ + u32 j, val, base_offset; + int i, ret; + + if (!qm || !s || !dregs) + return; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + down_read(&qm->qps_lock); + for (i = 0; i < regs_len; i++) { + if (!dregs[i].reg_len) + continue; + + for (j = 0; j < dregs[i].reg_len; j++) { + base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; + val = readl(qm->io_base + base_offset); + if (val != dregs[i].regs[j]) + seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", + base_offset, dregs[i].regs[j], val); + } + } + up_read(&qm->qps_lock); + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); + +void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); + if (debug->qm_last_words[i] != val) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + qm_dfx_regs[i].name, debug->qm_last_words[i], val); + } +} + +static int qm_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); + +static ssize_t qm_status_read(struct file *filp, char __user *buffer, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char buf[QM_DBG_READ_LEN]; + int val, len; + + val = atomic_read(&qm->status.flags); + len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]); + + return simple_read_from_buffer(buffer, count, pos, buf, len); +} + +static const struct file_operations qm_status_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_status_read, +}; + +static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, + enum qm_debug_file index) +{ + struct debugfs_file *file = qm->debug.files + index; + + debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, + &qm_debug_fops); + + file->index = index; + mutex_init(&file->lock); + file->debug = &qm->debug; +} + +static int qm_debugfs_atomic64_set(void *data, u64 val) +{ + if (val) + return -EINVAL; + + atomic64_set((atomic64_t *)data, 0); + + return 0; +} + +static int qm_debugfs_atomic64_get(void *data, u64 *val) +{ + *val = atomic64_read((atomic64_t *)data); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, + qm_debugfs_atomic64_set, "%llu\n"); + +/** + * hisi_qm_debug_init() - Initialize qm related debugfs files. + * @qm: The qm for which we want to add debugfs files. + * + * Create qm related debugfs files. + */ +void hisi_qm_debug_init(struct hisi_qm *qm) +{ + struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; + struct qm_dfx *dfx = &qm->debug.dfx; + struct dentry *qm_d; + void *data; + int i; + + qm_d = debugfs_create_dir("qm", qm->debug.debug_root); + qm->debug.qm_d = qm_d; + + /* only show this in PF */ + if (qm->fun_type == QM_HW_PF) { + qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); + for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) + qm_create_debugfs_file(qm, qm->debug.qm_d, i); + } + + if (qm_regs) + debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, + qm, &qm_diff_regs_fops); + + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); + + debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); + + debugfs_create_file("status", 0444, qm->debug.qm_d, qm, + &qm_status_fops); + for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { + data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); + debugfs_create_file(qm_dfx_files[i].name, + 0644, + qm_d, + data, + &qm_atomic64_ops); + } + + if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) + hisi_qm_set_algqos_init(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_init); + +/** + * hisi_qm_debug_regs_clear() - clear qm debug related registers. + * @qm: The qm for which we want to clear its debug registers. + */ +void hisi_qm_debug_regs_clear(struct hisi_qm *qm) +{ + const struct debugfs_reg32 *regs; + int i; + + /* clear current_qm */ + writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); + writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); + + /* clear current_q */ + writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + /* + * these registers are reading and clearing, so clear them after + * reading them. + */ + writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); + + regs = qm_dfx_regs; + for (i = 0; i < CNT_CYC_REGS_NUM; i++) { + readl(qm->io_base + regs->offset); + regs++; + } + + /* clear clear_enable */ + writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 441466df7c6d..36d70b9f6117 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -16,6 +16,7 @@ #include #include #include +#include "qm_common.h" /* eq/aeq irq enable */ #define QM_VF_AEQ_INT_SOURCE 0x0 @@ -119,8 +120,6 @@ #define QM_SQC_VFT_NUM_SHIFT_V2 45 #define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0) -#define QM_DFX_CNT_CLR_CE 0x100118 - #define QM_ABNORMAL_INT_SOURCE 0x100000 #define QM_ABNORMAL_INT_MASK 0x100004 #define QM_ABNORMAL_INT_MASK_VALUE 0x7fff @@ -187,14 +186,6 @@ #define QM_VF_RESET_WAIT_TIMEOUT_US \ (QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT) -#define QM_DFX_MB_CNT_VF 0x104010 -#define QM_DFX_DB_CNT_VF 0x104020 -#define QM_DFX_SQE_CNT_VF_SQN 0x104030 -#define QM_DFX_CQE_CNT_VF_CQN 0x104040 -#define QM_DFX_QN_SHIFT 16 -#define CURRENT_FUN_MASK GENMASK(5, 0) -#define CURRENT_Q_MASK GENMASK(31, 16) - #define POLL_PERIOD 10 #define POLL_TIMEOUT 1000 #define WAIT_PERIOD_US_MAX 200 @@ -211,19 +202,15 @@ #define QMC_ALIGN(sz) ALIGN(sz, 32) #define QM_DBG_READ_LEN 256 -#define QM_DBG_WRITE_LEN 1024 -#define QM_DBG_TMP_BUF_LEN 22 #define QM_PCI_COMMAND_INVALID ~0 #define QM_RESET_STOP_TX_OFFSET 1 #define QM_RESET_STOP_RX_OFFSET 2 #define WAIT_PERIOD 20 #define REMOVE_WAIT_DELAY 10 -#define QM_SQE_ADDR_MASK GENMASK(7, 0) #define QM_DRIVER_REMOVING 0 #define QM_RST_SCHED 1 -#define QM_RESETTING 2 #define QM_QOS_PARAM_NUM 2 #define QM_QOS_VAL_NUM 1 #define QM_QOS_BDF_PARAM_NUM 4 @@ -250,15 +237,6 @@ #define QM_QOS_MIN_CIR_B 100 #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 -#define QM_DFX_BASE 0x0100000 -#define QM_DFX_STATE1 0x0104000 -#define QM_DFX_STATE2 0x01040C8 -#define QM_DFX_COMMON 0x0000 -#define QM_DFX_BASE_LEN 0x5A -#define QM_DFX_STATE1_LEN 0x2E -#define QM_DFX_STATE2_LEN 0x11 -#define QM_DFX_COMMON_LEN 0xC3 -#define QM_DFX_REGS_LEN 4UL #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ @@ -368,73 +346,6 @@ static const struct hisi_qm_cap_info qm_basic_info[] = { {QM_VF_IRQ_NUM_CAP, 0x311c, 0, GENMASK(15, 0), 0x1, 0x2, 0x3}, }; -struct qm_cqe { - __le32 rsvd0; - __le16 cmd_id; - __le16 rsvd1; - __le16 sq_head; - __le16 sq_num; - __le16 rsvd2; - __le16 w7; -}; - -struct qm_eqe { - __le32 dw0; -}; - -struct qm_aeqe { - __le32 dw0; -}; - -struct qm_sqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le16 w8; - __le16 rsvd0; - __le16 pasid; - __le16 w11; - __le16 cq_num; - __le16 w13; - __le32 rsvd1; -}; - -struct qm_cqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le16 w8; - __le16 rsvd0; - __le16 pasid; - __le16 w11; - __le32 dw6; - __le32 rsvd1; -}; - -struct qm_eqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le32 rsvd[2]; - __le32 dw6; -}; - -struct qm_aeqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le32 rsvd[2]; - __le32 dw6; -}; - struct qm_mailbox { __le16 w0; __le16 queue_num; @@ -467,25 +378,6 @@ struct hisi_qm_hw_ops { int (*set_msi)(struct hisi_qm *qm, bool set); }; -struct qm_dfx_item { - const char *name; - u32 offset; -}; - -static struct qm_dfx_item qm_dfx_files[] = { - {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)}, - {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)}, - {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)}, - {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)}, - {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)}, -}; - -static const char * const qm_debug_file_name[] = { - [CURRENT_QM] = "current_qm", - [CURRENT_Q] = "current_q", - [CLEAR_ENABLE] = "clear_enable", -}; - struct hisi_qm_hw_error { u32 int_msk; const char *msg; @@ -510,23 +402,6 @@ static const struct hisi_qm_hw_error qm_hw_error[] = { { /* sentinel */ } }; -/* define the QM's dfx regs region and region length */ -static struct dfx_diff_registers qm_diff_regs[] = { - { - .reg_offset = QM_DFX_BASE, - .reg_len = QM_DFX_BASE_LEN, - }, { - .reg_offset = QM_DFX_STATE1, - .reg_len = QM_DFX_STATE1_LEN, - }, { - .reg_offset = QM_DFX_STATE2, - .reg_len = QM_DFX_STATE2_LEN, - }, { - .reg_offset = QM_DFX_COMMON, - .reg_len = QM_DFX_COMMON_LEN, - }, -}; - static const char * const qm_db_timeout[] = { "sq", "cq", "eq", "aeq", }; @@ -535,10 +410,6 @@ static const char * const qm_fifo_overflow[] = { "cq", "eq", "aeq", }; -static const char * const qm_s[] = { - "init", "start", "close", "stop", -}; - static const char * const qp_s[] = { "none", "init", "start", "stop", "close", }; @@ -1439,507 +1310,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) return 0; } -static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num) -{ - u32 remain_q_num, vfq_num; - u32 num_vfs = qm->vfs_num; - - vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs; - if (vfq_num >= qm->max_qp_num) - return qm->max_qp_num; - - remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs; - if (vfq_num + remain_q_num <= qm->max_qp_num) - return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num; - - /* - * if vfq_num + remain_q_num > max_qp_num, the last VFs, - * each with one more queue. - */ - return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num; -} - -static struct hisi_qm *file_to_qm(struct debugfs_file *file) -{ - struct qm_debug *debug = file->debug; - - return container_of(debug, struct hisi_qm, debug); -} - -static u32 current_q_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; -} - -static int current_q_write(struct hisi_qm *qm, u32 val) -{ - u32 tmp; - - if (val >= qm->debug.curr_qm_qp_num) - return -EINVAL; - - tmp = val << QM_DFX_QN_SHIFT | - (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK); - writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - - tmp = val << QM_DFX_QN_SHIFT | - (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK); - writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - return 0; -} - -static u32 clear_enable_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_CNT_CLR_CE); -} - -/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ -static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) -{ - if (rd_clr_ctrl > 1) - return -EINVAL; - - writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE); - - return 0; -} - -static u32 current_qm_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_MB_CNT_VF); -} - -static int current_qm_write(struct hisi_qm *qm, u32 val) -{ - u32 tmp; - - if (val > qm->vfs_num) - return -EINVAL; - - /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */ - if (!val) - qm->debug.curr_qm_qp_num = qm->qp_num; - else - qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val); - - writel(val, qm->io_base + QM_DFX_MB_CNT_VF); - writel(val, qm->io_base + QM_DFX_DB_CNT_VF); - - tmp = val | - (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK); - writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - - tmp = val | - (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK); - writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - return 0; -} - -static ssize_t qm_debug_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) -{ - struct debugfs_file *file = filp->private_data; - enum qm_debug_file index = file->index; - struct hisi_qm *qm = file_to_qm(file); - char tbuf[QM_DBG_TMP_BUF_LEN]; - u32 val; - int ret; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return ret; - - mutex_lock(&file->lock); - switch (index) { - case CURRENT_QM: - val = current_qm_read(qm); - break; - case CURRENT_Q: - val = current_q_read(qm); - break; - case CLEAR_ENABLE: - val = clear_enable_read(qm); - break; - default: - goto err_input; - } - mutex_unlock(&file->lock); - - hisi_qm_put_dfx_access(qm); - ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); - return simple_read_from_buffer(buf, count, pos, tbuf, ret); - -err_input: - mutex_unlock(&file->lock); - hisi_qm_put_dfx_access(qm); - return -EINVAL; -} - -static ssize_t qm_debug_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct debugfs_file *file = filp->private_data; - enum qm_debug_file index = file->index; - struct hisi_qm *qm = file_to_qm(file); - unsigned long val; - char tbuf[QM_DBG_TMP_BUF_LEN]; - int len, ret; - - if (*pos != 0) - return 0; - - if (count >= QM_DBG_TMP_BUF_LEN) - return -ENOSPC; - - len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf, - count); - if (len < 0) - return len; - - tbuf[len] = '\0'; - if (kstrtoul(tbuf, 0, &val)) - return -EFAULT; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return ret; - - mutex_lock(&file->lock); - switch (index) { - case CURRENT_QM: - ret = current_qm_write(qm, val); - break; - case CURRENT_Q: - ret = current_q_write(qm, val); - break; - case CLEAR_ENABLE: - ret = clear_enable_write(qm, val); - break; - default: - ret = -EINVAL; - } - mutex_unlock(&file->lock); - - hisi_qm_put_dfx_access(qm); - - if (ret) - return ret; - - return count; -} - -static const struct file_operations qm_debug_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_debug_read, - .write = qm_debug_write, -}; - -#define CNT_CYC_REGS_NUM 10 -static const struct debugfs_reg32 qm_dfx_regs[] = { - /* XXX_CNT are reading clear register */ - {"QM_ECC_1BIT_CNT ", 0x104000ull}, - {"QM_ECC_MBIT_CNT ", 0x104008ull}, - {"QM_DFX_MB_CNT ", 0x104018ull}, - {"QM_DFX_DB_CNT ", 0x104028ull}, - {"QM_DFX_SQE_CNT ", 0x104038ull}, - {"QM_DFX_CQE_CNT ", 0x104048ull}, - {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull}, - {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull}, - {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull}, - {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull}, - {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, - {"QM_ECC_1BIT_INF ", 0x104004ull}, - {"QM_ECC_MBIT_INF ", 0x10400cull}, - {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull}, - {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull}, - {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull}, - {"QM_DFX_FF_ST0 ", 0x1040c8ull}, - {"QM_DFX_FF_ST1 ", 0x1040ccull}, - {"QM_DFX_FF_ST2 ", 0x1040d0ull}, - {"QM_DFX_FF_ST3 ", 0x1040d4ull}, - {"QM_DFX_FF_ST4 ", 0x1040d8ull}, - {"QM_DFX_FF_ST5 ", 0x1040dcull}, - {"QM_DFX_FF_ST6 ", 0x1040e0ull}, - {"QM_IN_IDLE_ST ", 0x1040e4ull}, -}; - -static const struct debugfs_reg32 qm_vf_dfx_regs[] = { - {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, -}; - -/** - * hisi_qm_regs_dump() - Dump registers's value. - * @s: debugfs file handle. - * @regset: accelerator registers information. - * - * Dump accelerator registers. - */ -void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) -{ - struct pci_dev *pdev = to_pci_dev(regset->dev); - struct hisi_qm *qm = pci_get_drvdata(pdev); - const struct debugfs_reg32 *regs = regset->regs; - int regs_len = regset->nregs; - int i, ret; - u32 val; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return; - - for (i = 0; i < regs_len; i++) { - val = readl(regset->base + regs[i].offset); - seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); - } - - hisi_qm_put_dfx_access(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); - -static int qm_regs_show(struct seq_file *s, void *unused) -{ - struct hisi_qm *qm = s->private; - struct debugfs_regset32 regset; - - if (qm->fun_type == QM_HW_PF) { - regset.regs = qm_dfx_regs; - regset.nregs = ARRAY_SIZE(qm_dfx_regs); - } else { - regset.regs = qm_vf_dfx_regs; - regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); - } - - regset.base = qm->io_base; - regset.dev = &qm->pdev->dev; - - hisi_qm_regs_dump(s, ®set); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(qm_regs); - -static void dfx_regs_uninit(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len) -{ - int i; - - /* Setting the pointer is NULL to prevent double free */ - for (i = 0; i < reg_len; i++) { - kfree(dregs[i].regs); - dregs[i].regs = NULL; - } - kfree(dregs); -} - -static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, - const struct dfx_diff_registers *cregs, u32 reg_len) -{ - struct dfx_diff_registers *diff_regs; - u32 j, base_offset; - int i; - - diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); - if (!diff_regs) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < reg_len; i++) { - if (!cregs[i].reg_len) - continue; - - diff_regs[i].reg_offset = cregs[i].reg_offset; - diff_regs[i].reg_len = cregs[i].reg_len; - diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, - GFP_KERNEL); - if (!diff_regs[i].regs) - goto alloc_error; - - for (j = 0; j < diff_regs[i].reg_len; j++) { - base_offset = diff_regs[i].reg_offset + - j * QM_DFX_REGS_LEN; - diff_regs[i].regs[j] = readl(qm->io_base + base_offset); - } - } - - return diff_regs; - -alloc_error: - while (i > 0) { - i--; - kfree(diff_regs[i].regs); - } - kfree(diff_regs); - return ERR_PTR(-ENOMEM); -} - -static int qm_diff_regs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, u32 reg_len) -{ - qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - if (IS_ERR(qm->debug.qm_diff_regs)) - return PTR_ERR(qm->debug.qm_diff_regs); - - qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); - if (IS_ERR(qm->debug.acc_diff_regs)) { - dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - return PTR_ERR(qm->debug.acc_diff_regs); - } - - return 0; -} - -static void qm_last_regs_uninit(struct hisi_qm *qm) -{ - struct qm_debug *debug = &qm->debug; - - if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) - return; - - kfree(debug->qm_last_words); - debug->qm_last_words = NULL; -} - -static int qm_last_regs_init(struct hisi_qm *qm) -{ - int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); - struct qm_debug *debug = &qm->debug; - int i; - - if (qm->fun_type == QM_HW_VF) - return 0; - - debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); - if (!debug->qm_last_words) - return -ENOMEM; - - for (i = 0; i < dfx_regs_num; i++) { - debug->qm_last_words[i] = readl_relaxed(qm->io_base + - qm_dfx_regs[i].offset); - } - - return 0; -} - -static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) -{ - dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); - dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); -} - -/** - * hisi_qm_regs_debugfs_init() - Allocate memory for registers. - * @qm: device qm handle. - * @dregs: diff registers handle. - * @reg_len: diff registers region length. - */ -int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, u32 reg_len) -{ - int ret; - - if (!qm || !dregs) - return -EINVAL; - - if (qm->fun_type != QM_HW_PF) - return 0; - - ret = qm_last_regs_init(qm); - if (ret) { - dev_info(&qm->pdev->dev, "failed to init qm words memory!\n"); - return ret; - } - - ret = qm_diff_regs_init(qm, dregs, reg_len); - if (ret) { - qm_last_regs_uninit(qm); - return ret; - } - - return 0; -} -EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init); - -/** - * hisi_qm_regs_debugfs_uninit() - Free memory for registers. - * @qm: device qm handle. - * @reg_len: diff registers region length. - */ -void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len) -{ - if (!qm || qm->fun_type != QM_HW_PF) - return; - - qm_diff_regs_uninit(qm, reg_len); - qm_last_regs_uninit(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit); - -/** - * hisi_qm_acc_diff_regs_dump() - Dump registers's value. - * @qm: device qm handle. - * @s: Debugfs file handle. - * @dregs: diff registers handle. - * @regs_len: diff registers region length. - */ -void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, - struct dfx_diff_registers *dregs, u32 regs_len) -{ - u32 j, val, base_offset; - int i, ret; - - if (!qm || !s || !dregs) - return; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return; - - down_read(&qm->qps_lock); - for (i = 0; i < regs_len; i++) { - if (!dregs[i].reg_len) - continue; - - for (j = 0; j < dregs[i].reg_len; j++) { - base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; - val = readl(qm->io_base + base_offset); - if (val != dregs[i].regs[j]) - seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", - base_offset, dregs[i].regs[j], val); - } - } - up_read(&qm->qps_lock); - - hisi_qm_put_dfx_access(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); - -static int qm_diff_regs_show(struct seq_file *s, void *unused) -{ - struct hisi_qm *qm = s->private; - - hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); - -static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, - size_t count, loff_t *pos) -{ - char buf[QM_DBG_READ_LEN]; - int len; - - len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", - "Please echo help to cmd to get help information"); - - return simple_read_from_buffer(buffer, count, pos, buf, len); -} - -static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, +void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, dma_addr_t *dma_addr) { struct device *dev = &qm->pdev->dev; @@ -1959,7 +1330,7 @@ static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, return ctx_addr; } -static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, +void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, const void *ctx_addr, dma_addr_t *dma_addr) { struct device *dev = &qm->pdev->dev; @@ -1968,21 +1339,6 @@ static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, kfree(ctx_addr); } -static void dump_show(struct hisi_qm *qm, void *info, - unsigned int info_size, char *info_name) -{ - struct device *dev = &qm->pdev->dev; - u8 *info_curr = info; - u32 i; -#define BYTE_PER_DW 4 - - dev_info(dev, "%s DUMP\n", info_name); - for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) { - pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW, - *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr)); - } -} - static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); @@ -1993,387 +1349,6 @@ static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); } -static int qm_sqc_dump(struct hisi_qm *qm, const char *s) -{ - struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc, *sqc_curr; - dma_addr_t sqc_dma; - u32 qp_id; - int ret; - - if (!s) - return -EINVAL; - - ret = kstrtou32(s, 0, &qp_id); - if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); - return -EINVAL; - } - - sqc = qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma); - if (IS_ERR(sqc)) - return PTR_ERR(sqc); - - ret = qm_dump_sqc_raw(qm, sqc_dma, qp_id); - if (ret) { - down_read(&qm->qps_lock); - if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; - - dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC"); - } - up_read(&qm->qps_lock); - - goto free_ctx; - } - - dump_show(qm, sqc, sizeof(*sqc), "SQC"); - -free_ctx: - qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); - return 0; -} - -static int qm_cqc_dump(struct hisi_qm *qm, const char *s) -{ - struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc, *cqc_curr; - dma_addr_t cqc_dma; - u32 qp_id; - int ret; - - if (!s) - return -EINVAL; - - ret = kstrtou32(s, 0, &qp_id); - if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); - return -EINVAL; - } - - cqc = qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma); - if (IS_ERR(cqc)) - return PTR_ERR(cqc); - - ret = qm_dump_cqc_raw(qm, cqc_dma, qp_id); - if (ret) { - down_read(&qm->qps_lock); - if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; - - dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC"); - } - up_read(&qm->qps_lock); - - goto free_ctx; - } - - dump_show(qm, cqc, sizeof(*cqc), "CQC"); - -free_ctx: - qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); - return 0; -} - -static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, - int cmd, char *name) -{ - struct device *dev = &qm->pdev->dev; - dma_addr_t xeqc_dma; - void *xeqc; - int ret; - - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; - } - - xeqc = qm_ctx_alloc(qm, size, &xeqc_dma); - if (IS_ERR(xeqc)) - return PTR_ERR(xeqc); - - ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); - if (ret) - goto err_free_ctx; - - dump_show(qm, xeqc, size, name); - -err_free_ctx: - qm_ctx_free(qm, size, xeqc, &xeqc_dma); - return ret; -} - -static int q_dump_param_parse(struct hisi_qm *qm, char *s, - u32 *e_id, u32 *q_id, u16 q_depth) -{ - struct device *dev = &qm->pdev->dev; - unsigned int qp_num = qm->qp_num; - char *presult; - int ret; - - presult = strsep(&s, " "); - if (!presult) { - dev_err(dev, "Please input qp number!\n"); - return -EINVAL; - } - - ret = kstrtou32(presult, 0, q_id); - if (ret || *q_id >= qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qp_num - 1); - return -EINVAL; - } - - presult = strsep(&s, " "); - if (!presult) { - dev_err(dev, "Please input sqe number!\n"); - return -EINVAL; - } - - ret = kstrtou32(presult, 0, e_id); - if (ret || *e_id >= q_depth) { - dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1); - return -EINVAL; - } - - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; - } - - return 0; -} - -static int qm_sq_dump(struct hisi_qm *qm, char *s) -{ - u16 sq_depth = qm->qp_array->cq_depth; - void *sqe, *sqe_curr; - struct hisi_qp *qp; - u32 qp_id, sqe_id; - int ret; - - ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); - if (ret) - return ret; - - sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); - if (!sqe) - return -ENOMEM; - - qp = &qm->qp_array[qp_id]; - memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); - sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); - memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, - qm->debug.sqe_mask_len); - - dump_show(qm, sqe_curr, qm->sqe_size, "SQE"); - - kfree(sqe); - - return 0; -} - -static int qm_cq_dump(struct hisi_qm *qm, char *s) -{ - struct qm_cqe *cqe_curr; - struct hisi_qp *qp; - u32 qp_id, cqe_id; - int ret; - - ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth); - if (ret) - return ret; - - qp = &qm->qp_array[qp_id]; - cqe_curr = qp->cqe + cqe_id; - dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE"); - - return 0; -} - -static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s, - size_t size, char *name) -{ - struct device *dev = &qm->pdev->dev; - void *xeqe; - u32 xeqe_id; - int ret; - - if (!s) - return -EINVAL; - - ret = kstrtou32(s, 0, &xeqe_id); - if (ret) - return -EINVAL; - - if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) { - dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1); - return -EINVAL; - } else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) { - dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1); - return -EINVAL; - } - - down_read(&qm->qps_lock); - - if (qm->eqe && !strcmp(name, "EQE")) { - xeqe = qm->eqe + xeqe_id; - } else if (qm->aeqe && !strcmp(name, "AEQE")) { - xeqe = qm->aeqe + xeqe_id; - } else { - ret = -EINVAL; - goto err_unlock; - } - - dump_show(qm, xeqe, size, name); - -err_unlock: - up_read(&qm->qps_lock); - return ret; -} - -static int qm_dbg_help(struct hisi_qm *qm, char *s) -{ - struct device *dev = &qm->pdev->dev; - - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; - } - - dev_info(dev, "available commands:\n"); - dev_info(dev, "sqc \n"); - dev_info(dev, "cqc \n"); - dev_info(dev, "eqc\n"); - dev_info(dev, "aeqc\n"); - dev_info(dev, "sq \n"); - dev_info(dev, "cq \n"); - dev_info(dev, "eq \n"); - dev_info(dev, "aeq \n"); - - return 0; -} - -static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) -{ - struct device *dev = &qm->pdev->dev; - char *presult, *s, *s_tmp; - int ret; - - s = kstrdup(cmd_buf, GFP_KERNEL); - if (!s) - return -ENOMEM; - - s_tmp = s; - presult = strsep(&s, " "); - if (!presult) { - ret = -EINVAL; - goto err_buffer_free; - } - - if (!strcmp(presult, "sqc")) - ret = qm_sqc_dump(qm, s); - else if (!strcmp(presult, "cqc")) - ret = qm_cqc_dump(qm, s); - else if (!strcmp(presult, "eqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc), - QM_MB_CMD_EQC, "EQC"); - else if (!strcmp(presult, "aeqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc), - QM_MB_CMD_AEQC, "AEQC"); - else if (!strcmp(presult, "sq")) - ret = qm_sq_dump(qm, s); - else if (!strcmp(presult, "cq")) - ret = qm_cq_dump(qm, s); - else if (!strcmp(presult, "eq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE"); - else if (!strcmp(presult, "aeq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE"); - else if (!strcmp(presult, "help")) - ret = qm_dbg_help(qm, s); - else - ret = -EINVAL; - - if (ret) - dev_info(dev, "Please echo help\n"); - -err_buffer_free: - kfree(s_tmp); - - return ret; -} - -static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, - size_t count, loff_t *pos) -{ - struct hisi_qm *qm = filp->private_data; - char *cmd_buf, *cmd_buf_tmp; - int ret; - - if (*pos) - return 0; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return ret; - - /* Judge if the instance is being reset. */ - if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) { - ret = 0; - goto put_dfx_access; - } - - if (count > QM_DBG_WRITE_LEN) { - ret = -ENOSPC; - goto put_dfx_access; - } - - cmd_buf = memdup_user_nul(buffer, count); - if (IS_ERR(cmd_buf)) { - ret = PTR_ERR(cmd_buf); - goto put_dfx_access; - } - - cmd_buf_tmp = strchr(cmd_buf, '\n'); - if (cmd_buf_tmp) { - *cmd_buf_tmp = '\0'; - count = cmd_buf_tmp - cmd_buf + 1; - } - - ret = qm_cmd_write_dump(qm, cmd_buf); - if (ret) { - kfree(cmd_buf); - goto put_dfx_access; - } - - kfree(cmd_buf); - - ret = count; - -put_dfx_access: - hisi_qm_put_dfx_access(qm); - return ret; -} - -static const struct file_operations qm_cmd_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_cmd_read, - .write = qm_cmd_write, -}; - -static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, - enum qm_debug_file index) -{ - struct debugfs_file *file = qm->debug.files + index; - - debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, - &qm_debug_fops); - - file->index = index; - mutex_init(&file->lock); - file->debug = &qm->debug; -} - static void qm_hw_error_init_v1(struct hisi_qm *qm) { writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); @@ -3155,7 +2130,7 @@ static int qm_drain_qp(struct hisi_qp *qp) return ret; } - addr = qm_ctx_alloc(qm, size, &dma_addr); + addr = hisi_qm_ctx_alloc(qm, size, &dma_addr); if (IS_ERR(addr)) { dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n"); return -ENOMEM; @@ -3190,7 +2165,7 @@ static int qm_drain_qp(struct hisi_qp *qp) usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX); } - qm_ctx_free(qm, size, addr, &dma_addr); + hisi_qm_ctx_free(qm, size, addr, &dma_addr); return ret; } @@ -4173,45 +3148,6 @@ err_unlock: } EXPORT_SYMBOL_GPL(hisi_qm_stop); -static ssize_t qm_status_read(struct file *filp, char __user *buffer, - size_t count, loff_t *pos) -{ - struct hisi_qm *qm = filp->private_data; - char buf[QM_DBG_READ_LEN]; - int val, len; - - val = atomic_read(&qm->status.flags); - len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]); - - return simple_read_from_buffer(buffer, count, pos, buf, len); -} - -static const struct file_operations qm_status_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_status_read, -}; - -static int qm_debugfs_atomic64_set(void *data, u64 val) -{ - if (val) - return -EINVAL; - - atomic64_set((atomic64_t *)data, 0); - - return 0; -} - -static int qm_debugfs_atomic64_get(void *data, u64 *val) -{ - *val = atomic64_read((atomic64_t *)data); - - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, - qm_debugfs_atomic64_set, "%llu\n"); - static void qm_hw_error_init(struct hisi_qm *qm) { if (!qm->ops->hw_error_init) { @@ -4732,7 +3668,7 @@ static const struct file_operations qm_algqos_fops = { * * Create function qos debugfs files, VF ping PF to get function qos. */ -static void hisi_qm_set_algqos_init(struct hisi_qm *qm) +void hisi_qm_set_algqos_init(struct hisi_qm *qm) { if (qm->fun_type == QM_HW_PF) debugfs_create_file("alg_qos", 0644, qm->debug.debug_root, @@ -4742,88 +3678,6 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) qm, &qm_algqos_fops); } -/** - * hisi_qm_debug_init() - Initialize qm related debugfs files. - * @qm: The qm for which we want to add debugfs files. - * - * Create qm related debugfs files. - */ -void hisi_qm_debug_init(struct hisi_qm *qm) -{ - struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; - struct qm_dfx *dfx = &qm->debug.dfx; - struct dentry *qm_d; - void *data; - int i; - - qm_d = debugfs_create_dir("qm", qm->debug.debug_root); - qm->debug.qm_d = qm_d; - - /* only show this in PF */ - if (qm->fun_type == QM_HW_PF) { - qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); - for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) - qm_create_debugfs_file(qm, qm->debug.qm_d, i); - } - - if (qm_regs) - debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, - qm, &qm_diff_regs_fops); - - debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); - - debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); - - debugfs_create_file("status", 0444, qm->debug.qm_d, qm, - &qm_status_fops); - for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { - data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); - debugfs_create_file(qm_dfx_files[i].name, - 0644, - qm_d, - data, - &qm_atomic64_ops); - } - - if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) - hisi_qm_set_algqos_init(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_init); - -/** - * hisi_qm_debug_regs_clear() - clear qm debug related registers. - * @qm: The qm for which we want to clear its debug registers. - */ -void hisi_qm_debug_regs_clear(struct hisi_qm *qm) -{ - const struct debugfs_reg32 *regs; - int i; - - /* clear current_qm */ - writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); - writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); - - /* clear current_q */ - writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - /* - * these registers are reading and clearing, so clear them after - * reading them. - */ - writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); - - regs = qm_dfx_regs; - for (i = 0; i < CNT_CYC_REGS_NUM; i++) { - readl(qm->io_base + regs->offset); - regs++; - } - - /* clear clear_enable */ - writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); - static void hisi_qm_init_vf_qos(struct hisi_qm *qm, int total_func) { int i; @@ -5462,24 +4316,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return 0; } -static void qm_show_last_dfx_regs(struct hisi_qm *qm) -{ - struct qm_debug *debug = &qm->debug; - struct pci_dev *pdev = qm->pdev; - u32 val; - int i; - - if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) - return; - - for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { - val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); - if (debug->qm_last_words[i] != val) - pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", - qm_dfx_regs[i].name, debug->qm_last_words[i], val); - } -} - static int qm_controller_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5495,7 +4331,7 @@ static int qm_controller_reset(struct hisi_qm *qm) return ret; } - qm_show_last_dfx_regs(qm); + hisi_qm_show_last_dfx_regs(qm); if (qm->err_ini->show_last_dfx_regs) qm->err_ini->show_last_dfx_regs(qm); diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h new file mode 100644 index 000000000000..1406a422d455 --- /dev/null +++ b/drivers/crypto/hisilicon/qm_common.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 HiSilicon Limited. */ +#ifndef QM_COMMON_H +#define QM_COMMON_H + +#define QM_DBG_READ_LEN 256 +#define QM_RESETTING 2 + +struct qm_cqe { + __le32 rsvd0; + __le16 cmd_id; + __le16 rsvd1; + __le16 sq_head; + __le16 sq_num; + __le16 rsvd2; + __le16 w7; +}; + +struct qm_eqe { + __le32 dw0; +}; + +struct qm_aeqe { + __le32 dw0; +}; + +struct qm_sqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le16 w8; + __le16 rsvd0; + __le16 pasid; + __le16 w11; + __le16 cq_num; + __le16 w13; + __le32 rsvd1; +}; + +struct qm_cqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le16 w8; + __le16 rsvd0; + __le16 pasid; + __le16 w11; + __le32 dw6; + __le32 rsvd1; +}; + +struct qm_eqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le32 rsvd[2]; + __le32 dw6; +}; + +struct qm_aeqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le32 rsvd[2]; + __le32 dw6; +}; + +static const char * const qm_s[] = { + "init", "start", "close", "stop", +}; + +void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, + dma_addr_t *dma_addr); +void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, + const void *ctx_addr, dma_addr_t *dma_addr); +void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm); +void hisi_qm_set_algqos_init(struct hisi_qm *qm); + +#endif From 9c75609842f091fa814153d21243b07988dc8ef3 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 12 Nov 2022 02:12:53 +0000 Subject: [PATCH 1779/4122] crypto: hisilicon/qm - the command dump process is modified Reduce the function complexity by use the function table in the process of dumping queue. The function input parameters are unified. And maintainability is enhanced. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 130 ++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 40 deletions(-) diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 13bec8b2d723..2cc1591949db 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -36,6 +36,12 @@ struct qm_dfx_item { u32 offset; }; +struct qm_cmd_dump_item { + const char *cmd; + char *info_name; + int (*dump_fn)(struct hisi_qm *qm, char *cmd, char *info_name); +}; + static struct qm_dfx_item qm_dfx_files[] = { {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)}, {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)}, @@ -128,7 +134,7 @@ static void dump_show(struct hisi_qm *qm, void *info, } } -static int qm_sqc_dump(struct hisi_qm *qm, const char *s) +static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; struct qm_sqc *sqc, *sqc_curr; @@ -162,14 +168,14 @@ static int qm_sqc_dump(struct hisi_qm *qm, const char *s) goto free_ctx; } - dump_show(qm, sqc, sizeof(*sqc), "SQC"); + dump_show(qm, sqc, sizeof(*sqc), name); free_ctx: hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); return 0; } -static int qm_cqc_dump(struct hisi_qm *qm, const char *s) +static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; struct qm_cqc *cqc, *cqc_curr; @@ -203,26 +209,35 @@ static int qm_cqc_dump(struct hisi_qm *qm, const char *s) goto free_ctx; } - dump_show(qm, cqc, sizeof(*cqc), "CQC"); + dump_show(qm, cqc, sizeof(*cqc), name); free_ctx: hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); return 0; } -static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, - int cmd, char *name) +static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; dma_addr_t xeqc_dma; + size_t size; void *xeqc; int ret; + u8 cmd; if (strsep(&s, " ")) { dev_err(dev, "Please do not input extra characters!\n"); return -EINVAL; } + if (!strcmp(name, "EQC")) { + cmd = QM_MB_CMD_EQC; + size = sizeof(struct qm_eqc); + } else { + cmd = QM_MB_CMD_AEQC; + size = sizeof(struct qm_aeqc); + } + xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma); if (IS_ERR(xeqc)) return PTR_ERR(xeqc); @@ -278,7 +293,7 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, return 0; } -static int qm_sq_dump(struct hisi_qm *qm, char *s) +static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) { u16 sq_depth = qm->qp_array->cq_depth; void *sqe, *sqe_curr; @@ -300,14 +315,14 @@ static int qm_sq_dump(struct hisi_qm *qm, char *s) memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, qm->debug.sqe_mask_len); - dump_show(qm, sqe_curr, qm->sqe_size, "SQE"); + dump_show(qm, sqe_curr, qm->sqe_size, name); kfree(sqe); return 0; } -static int qm_cq_dump(struct hisi_qm *qm, char *s) +static int qm_cq_dump(struct hisi_qm *qm, char *s, char *name) { struct qm_cqe *cqe_curr; struct hisi_qp *qp; @@ -320,15 +335,16 @@ static int qm_cq_dump(struct hisi_qm *qm, char *s) qp = &qm->qp_array[qp_id]; cqe_curr = qp->cqe + cqe_id; - dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE"); + dump_show(qm, cqe_curr, sizeof(struct qm_cqe), name); return 0; } -static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s, - size_t size, char *name) +static int qm_eq_aeq_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; + u16 xeq_depth; + size_t size; void *xeqe; u32 xeqe_id; int ret; @@ -340,11 +356,16 @@ static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s, if (ret) return -EINVAL; - if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) { - dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1); - return -EINVAL; - } else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) { - dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1); + if (!strcmp(name, "EQE")) { + xeq_depth = qm->eq_depth; + size = sizeof(struct qm_eqe); + } else { + xeq_depth = qm->aeq_depth; + size = sizeof(struct qm_aeqe); + } + + if (xeqe_id >= xeq_depth) { + dev_err(dev, "Please input eqe or aeqe num (0-%u)", xeq_depth - 1); return -EINVAL; } @@ -388,11 +409,47 @@ static int qm_dbg_help(struct hisi_qm *qm, char *s) return 0; } +static const struct qm_cmd_dump_item qm_cmd_dump_table[] = { + { + .cmd = "sqc", + .info_name = "SQC", + .dump_fn = qm_sqc_dump, + }, { + .cmd = "cqc", + .info_name = "CQC", + .dump_fn = qm_cqc_dump, + }, { + .cmd = "eqc", + .info_name = "EQC", + .dump_fn = qm_eqc_aeqc_dump, + }, { + .cmd = "aeqc", + .info_name = "AEQC", + .dump_fn = qm_eqc_aeqc_dump, + }, { + .cmd = "sq", + .info_name = "SQE", + .dump_fn = qm_sq_dump, + }, { + .cmd = "cq", + .info_name = "CQE", + .dump_fn = qm_cq_dump, + }, { + .cmd = "eq", + .info_name = "EQE", + .dump_fn = qm_eq_aeq_dump, + }, { + .cmd = "aeq", + .info_name = "AEQE", + .dump_fn = qm_eq_aeq_dump, + }, +}; + static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) { struct device *dev = &qm->pdev->dev; char *presult, *s, *s_tmp; - int ret; + int table_size, i, ret; s = kstrdup(cmd_buf, GFP_KERNEL); if (!s) @@ -405,31 +462,24 @@ static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) goto err_buffer_free; } - if (!strcmp(presult, "sqc")) - ret = qm_sqc_dump(qm, s); - else if (!strcmp(presult, "cqc")) - ret = qm_cqc_dump(qm, s); - else if (!strcmp(presult, "eqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc), - QM_MB_CMD_EQC, "EQC"); - else if (!strcmp(presult, "aeqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc), - QM_MB_CMD_AEQC, "AEQC"); - else if (!strcmp(presult, "sq")) - ret = qm_sq_dump(qm, s); - else if (!strcmp(presult, "cq")) - ret = qm_cq_dump(qm, s); - else if (!strcmp(presult, "eq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE"); - else if (!strcmp(presult, "aeq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE"); - else if (!strcmp(presult, "help")) + if (!strcmp(presult, "help")) { ret = qm_dbg_help(qm, s); - else - ret = -EINVAL; + goto err_buffer_free; + } - if (ret) + table_size = ARRAY_SIZE(qm_cmd_dump_table); + for (i = 0; i < table_size; i++) { + if (!strcmp(presult, qm_cmd_dump_table[i].cmd)) { + ret = qm_cmd_dump_table[i].dump_fn(qm, s, + qm_cmd_dump_table[i].info_name); + break; + } + } + + if (i == table_size) { dev_info(dev, "Please echo help\n"); + ret = -EINVAL; + } err_buffer_free: kfree(s_tmp); From 2132d4efaa66388f1f79c79a920908a22464686b Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 12 Nov 2022 08:51:04 +0000 Subject: [PATCH 1780/4122] crypto: hisilicon/sec - fix spelling mistake 'ckeck' -> 'check' There are a couple of spelling mistakes in sec2. Fix them. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 84ae8ddd1a13..a2630ddc0294 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2009,7 +2009,7 @@ static int sec_aead_sha512_ctx_init(struct crypto_aead *tfm) return sec_aead_ctx_init(tfm, "sha512"); } -static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx, +static int sec_skcipher_cryptlen_check(struct sec_ctx *ctx, struct sec_req *sreq) { u32 cryptlen = sreq->c_req.sk_req->cryptlen; @@ -2071,7 +2071,7 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) } return 0; } else if (c_alg == SEC_CALG_AES || c_alg == SEC_CALG_SM4) { - return sec_skcipher_cryptlen_ckeck(ctx, sreq); + return sec_skcipher_cryptlen_check(ctx, sreq); } dev_err(dev, "skcipher algorithm error!\n"); From 75df46b598b5b46b0857ee7d2410deaf215e23d1 Mon Sep 17 00:00:00 2001 From: Wenkai Lin Date: Sat, 12 Nov 2022 08:51:05 +0000 Subject: [PATCH 1781/4122] crypto: hisilicon/sec - remove continuous blank lines Fix that put two or more continuous blank lines inside function. Signed-off-by: Wenkai Lin Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 1 - drivers/crypto/hisilicon/sec2/sec_main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a2630ddc0294..f5bfc9755a4a 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -283,7 +283,6 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) spin_lock_bh(&qp_ctx->req_lock); ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); - if (ctx->fake_req_limit <= atomic_read(&qp_ctx->qp->qp_status.used) && !ret) { list_add_tail(&req->backlog_head, &qp_ctx->backlog); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 4e24735d95ba..93572c0d4faa 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -427,7 +427,6 @@ static void sec_set_endian(struct hisi_qm *qm) if (!IS_ENABLED(CONFIG_64BIT)) reg |= BIT(1); - if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) reg |= BIT(0); From 5a47cb4df38bee861de37c12aaa4ef5510dd533b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 17 Nov 2022 10:44:21 +0100 Subject: [PATCH 1782/4122] dt-bindings: arm-smmu: Add SM6350 GPU SMMUv2 SM6350 has a qcom,smmu-v2-style SMMU just for Adreno and friends. Document it. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221117094422.11000-2-konrad.dybcio@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 28f5720824cd..b28c5c2b0ff2 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -94,6 +94,7 @@ properties: - qcom,sc7180-smmu-v2 - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 + - qcom,sm6350-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 - description: Qcom Adreno GPUs on Google Cheza platform @@ -346,6 +347,7 @@ allOf: compatible: contains: enum: + - qcom,sm6350-smmu-v2 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 then: From 3811a7283a0a07fa84ccde69b3d48115d34e79af Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 17 Nov 2022 10:44:22 +0100 Subject: [PATCH 1783/4122] iommu/arm-smmu-qcom: Add SM6350 SMMUv2 SM6350 uses a qcom,smmu-v2-style SMMU just for Adreno and friends. Add a compatible for it. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221117094422.11000-3-konrad.dybcio@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index c94daf88c505..91d404deb115 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -509,6 +509,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, From 6c7b2202e4d11572ab23a89aeec49005b94bb966 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2022 12:25:02 -0500 Subject: [PATCH 1784/4122] KVM: x86: avoid memslot check in NX hugepage recovery if it cannot succeed Since gfn_to_memslot() is relatively expensive, it helps to skip it if it the memslot cannot possibly have dirty logging enabled. In order to do this, add to struct kvm a counter of the number of log-page memslots. While the correct value can only be read with slots_lock taken, the NX recovery thread is content with using an approximate value. Therefore, the counter is an atomic_t. Based on https://lore.kernel.org/kvm/20221027200316.2221027-2-dmatlack@google.com/ by David Matlack. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 22 +++++++++++++++++++--- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 8 ++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cfff74685a25..4736d7849c60 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6878,16 +6878,32 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) WARN_ON_ONCE(!sp->nx_huge_page_disallowed); WARN_ON_ONCE(!sp->role.direct); - slot = gfn_to_memslot(kvm, sp->gfn); - WARN_ON_ONCE(!slot); - /* * Unaccount and do not attempt to recover any NX Huge Pages * that are being dirty tracked, as they would just be faulted * back in as 4KiB pages. The NX Huge Pages in this slot will be * recovered, along with all the other huge pages in the slot, * when dirty logging is disabled. + * + * Since gfn_to_memslot() is relatively expensive, it helps to + * skip it if it the test cannot possibly return true. On the + * other hand, if any memslot has logging enabled, chances are + * good that all of them do, in which case unaccount_nx_huge_page() + * is much cheaper than zapping the page. + * + * If a memslot update is in progress, reading an incorrect value + * of kvm->nr_memslots_dirty_logging is not a problem: if it is + * becoming zero, gfn_to_memslot() will be done unnecessarily; if + * it is becoming nonzero, the page will be zapped unnecessarily. + * Either way, this only affects efficiency in racy situations, + * and not correctness. */ + slot = NULL; + if (atomic_read(&kvm->nr_memslots_dirty_logging)) { + slot = gfn_to_memslot(kvm, sp->gfn); + WARN_ON_ONCE(!slot); + } + if (slot && kvm_slot_dirty_track_enabled(slot)) unaccount_nx_huge_page(kvm, sp); else if (is_tdp_mmu_page(sp)) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e6e66c5e56f2..6f0f389f5f9c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -722,6 +722,11 @@ struct kvm { /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct xarray vcpu_array; + /* + * Protected by slots_lock, but can be read outside if an + * incorrect answer is acceptable. + */ + atomic_t nr_memslots_dirty_logging; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 43bbe4fde078..1782c4555d94 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1641,6 +1641,8 @@ static void kvm_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { + int old_flags = old ? old->flags : 0; + int new_flags = new ? new->flags : 0; /* * Update the total number of memslot pages before calling the arch * hook so that architectures can consume the result directly. @@ -1650,6 +1652,12 @@ static void kvm_commit_memory_region(struct kvm *kvm, else if (change == KVM_MR_CREATE) kvm->nr_memslot_pages += new->npages; + if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES) { + int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; + atomic_set(&kvm->nr_memslots_dirty_logging, + atomic_read(&kvm->nr_memslots_dirty_logging) + change); + } + kvm_arch_commit_memory_region(kvm, old, new, change); switch (change) { From 74c8e6bffbe10c4470139496f930c0b0752c85c9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 29 Oct 2022 00:47:34 -0700 Subject: [PATCH 1785/4122] driver core: Add __alloc_size hint to devm allocators Mark the devm_*alloc()-family of allocations with appropriate __alloc_size()/__realloc_size() hints so the compiler can attempt to reason about buffer lengths from allocations. Cc: Greg Kroah-Hartman Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: Jason Gunthorpe Cc: Nishanth Menon Cc: Michael Kelley Cc: Dan Williams Cc: Won Chung Signed-off-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20221029074734.gonna.276-kees@kernel.org --- include/linux/device.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..5e4cd857e74f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id); int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; +void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); void *devm_krealloc(struct device *dev, void *ptr, size_t size, - gfp_t gfp) __must_check; + gfp_t gfp) __must_check __realloc_size(3); __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) __malloc; __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, @@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev, void devm_kfree(struct device *dev, const void *p); char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) + __realloc_size(3); unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); From 96d845a67b7e406cfed7880a724c8ca6121e022e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 2 Nov 2022 08:42:15 -0700 Subject: [PATCH 1786/4122] drm/fsl-dcu: Fix return type of fsl_dcu_drm_connector_mode_valid() With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. A proposed warning in clang aims to catch these at compile time, which reveals: drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c:74:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict] .mode_valid = fsl_dcu_drm_connector_mode_valid, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. ->mode_valid() in 'struct drm_connector_helper_funcs' expects a return type of 'enum drm_mode_status', not 'int'. Adjust the return type of fsl_dcu_drm_connector_mode_valid() to match the prototype's to resolve the warning and CFI failure. Link: https://github.com/ClangBuiltLinux/linux/issues/1750 Reported-by: Sami Tolvanen Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221102154215.78059-1-nathan@kernel.org --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c index 4d4a715b429d..2c2b92324a2e 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c @@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector) return drm_panel_get_modes(fsl_connector->panel, connector); } -static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { if (mode->hdisplay & 0xf) return MODE_ERROR; From 0ad811cc08a937d875cbad0149c1bab17f84ba05 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 2 Nov 2022 08:56:23 -0700 Subject: [PATCH 1787/4122] drm/sti: Fix return type of sti_{dvo,hda,hdmi}_connector_mode_valid() With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. A proposed warning in clang aims to catch these at compile time, which reveals: drivers/gpu/drm/sti/sti_hda.c:637:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict] .mode_valid = sti_hda_connector_mode_valid, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/sti/sti_dvo.c:376:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict] .mode_valid = sti_dvo_connector_mode_valid, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/sti/sti_hdmi.c:1035:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict] .mode_valid = sti_hdmi_connector_mode_valid, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ->mode_valid() in 'struct drm_connector_helper_funcs' expects a return type of 'enum drm_mode_status', not 'int'. Adjust the return type of sti_{dvo,hda,hdmi}_connector_mode_valid() to match the prototype's to resolve the warning and CFI failure. Link: https://github.com/ClangBuiltLinux/linux/issues/1750 Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221102155623.3042869-1-nathan@kernel.org --- drivers/gpu/drm/sti/sti_dvo.c | 5 +++-- drivers/gpu/drm/sti/sti_hda.c | 5 +++-- drivers/gpu/drm/sti/sti_hdmi.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c index b6ee8a82e656..076d5f30a09c 100644 --- a/drivers/gpu/drm/sti/sti_dvo.c +++ b/drivers/gpu/drm/sti/sti_dvo.c @@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_dvo_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_dvo_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index 03cc401ed593..a53b5a15c2a9 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -601,8 +601,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_hda_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hda_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index cb82622877d2..09e0cadb6368 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1004,8 +1004,9 @@ fail: #define CLK_TOLERANCE_HZ 50 -static int sti_hdmi_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hdmi_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; From 089fe572a2e0a89e36a455d299d801770293d08f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:39 +0100 Subject: [PATCH 1788/4122] x86/hyperv: Move VMCB enlightenment definitions to hyperv-tlfs.h Move Hyper-V's VMCB enlightenment definitions to the TLFS header; the definitions come directly from the TLFS[*], not from KVM. No functional change intended. [*] https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/datatypes/hv_svm_enlightened_vmcb_fields [vitaly: rename VMCB_HV_ -> HV_VMCB_ to match the rest of hyperv-tlfs.h, keep svm/hyperv.h] Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 22 +++++++++++++++++++ arch/x86/kvm/svm/hyperv.h | 22 ------------------- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/svm/svm_onhyperv.c | 2 +- arch/x86/kvm/svm/svm_onhyperv.h | 4 ++-- .../selftests/kvm/x86_64/hyperv_svm_test.c | 6 ++--- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 3089ec352743..245a806a9717 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -598,6 +598,28 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF +/* + * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose + * SVM enlightenments to guests. + */ +struct hv_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB. + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS 31 + struct hv_partition_assist_pg { u32 tlb_lock_count; }; diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index 7d6d97968fb9..c59544cdf03b 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -10,26 +10,4 @@ #include "../hyperv.h" -/* - * Hyper-V uses the software reserved 32 bytes in VMCB - * control area to expose SVM enlightenments to guests. - */ -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW - #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3aa9184d1e4e..a2f25bffff48 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -195,7 +195,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) if (!svm->nested.force_msr_bitmap_recalc && kvm_hv_hypercall_enabled(&svm->vcpu) && hve->hv_enlightenments_control.msr_bitmap && - (svm->nested.ctl.clean & BIT(VMCB_HV_NESTED_ENLIGHTENMENTS))) + (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS))) goto set_msrpm_base_pa; if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 8cdc62c74a96..ed5e79392544 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -32,7 +32,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) hve->hv_vm_id = (unsigned long)vcpu->kvm; if (!hve->hv_enlightenments_control.nested_flush_hypercall) { hve->hv_enlightenments_control.nested_flush_hypercall = 1; - vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(to_svm(vcpu)->vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } return 0; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index e2fc59380465..66e61a73caeb 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -64,7 +64,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( (struct hv_enlightenments *)vmcb->control.reserved_sw; if (hve->hv_enlightenments_control.msr_bitmap) - vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } static inline void svm_hv_update_vp_id(struct vmcb *vmcb, @@ -76,7 +76,7 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, if (hve->hv_vp_id != vp_index) { hve->hv_vp_id = vp_index; - vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } } #else diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index a380ad7bb9b3..5060fcfe1760 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -39,7 +39,7 @@ struct hv_enlightenments { /* * Hyper-V uses the software reserved clean bit in VMCB */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31) +#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) void l2_guest_code(void) { @@ -98,14 +98,14 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) /* Intercept RDMSR 0xc0000101 without telling KVM about it */ set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ - vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); vmcb->save.rip += 3; /* vmcall */ /* Now tell KVM we've changed MSR-Bitmap */ - vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ From 381fc63ac0754e05d3921e9d399b89dfdfd2b2e5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:40 +0100 Subject: [PATCH 1789/4122] KVM: selftests: Move "struct hv_enlightenments" to x86_64/svm.h Move Hyper-V's VMCB "struct hv_enlightenments" to the svm.h header so that the struct can be referenced in "struct vmcb_control_area". Alternatively, a dedicated header for SVM+Hyper-V could be added, a la x86_64/evmcs.h, but it doesn't appear that Hyper-V will end up needing a wholesale replacement for the VMCB. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/x86_64/svm.h | 17 +++++++++++++++++ .../selftests/kvm/x86_64/hyperv_svm_test.c | 18 ------------------ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index c8343ff84f7f..89ce2c6b57fe 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -58,6 +58,23 @@ enum { INTERCEPT_RDPRU, }; +struct hv_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_cr; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 5060fcfe1760..2fd64b419928 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -23,24 +23,6 @@ #define L2_GUEST_STACK_SIZE 256 -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) - void l2_guest_code(void) { GUEST_SYNC(3); From 68ae7c7bc56a4504ed5efde7c2f8d6024148a35e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:41 +0100 Subject: [PATCH 1790/4122] KVM: SVM: Add a proper field for Hyper-V VMCB enlightenments Add a union to provide hv_enlightenments side-by-side with the sw_reserved bytes that Hyper-V's enlightenments overlay. Casting sw_reserved everywhere is messy, confusing, and unnecessarily unsafe. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 7 ++++++- arch/x86/kvm/svm/nested.c | 9 ++++----- arch/x86/kvm/svm/svm.h | 5 ++++- arch/x86/kvm/svm/svm_onhyperv.c | 2 +- arch/x86/kvm/svm/svm_onhyperv.h | 15 +++++++-------- tools/testing/selftests/kvm/include/x86_64/svm.h | 5 ++++- .../selftests/kvm/x86_64/hyperv_svm_test.c | 3 +-- 7 files changed, 27 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 4352b46dd20c..b37249e7c660 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,6 +5,8 @@ #include #include +#include + /* * 32-bit intercept words in the VMCB Control Area, starting * at Byte offset 000h. @@ -161,7 +163,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. */ - u8 reserved_sw[32]; + union { + struct hv_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a2f25bffff48..622fe00c3acf 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -180,8 +180,7 @@ void recalc_intercepts(struct vcpu_svm *svm) */ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)svm->nested.ctl.reserved_sw; + struct hv_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; int i; /* @@ -370,8 +369,8 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, /* Hyper-V extensions (Enlightened VMCB) */ if (kvm_hv_hypercall_enabled(vcpu)) { to->clean = from->clean; - memcpy(to->reserved_sw, from->reserved_sw, - sizeof(struct hv_enlightenments)); + memcpy(&to->hv_enlightenments, &from->hv_enlightenments, + sizeof(to->hv_enlightenments)); } } @@ -1488,7 +1487,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, dst->virt_ext = from->virt_ext; dst->pause_filter_count = from->pause_filter_count; dst->pause_filter_thresh = from->pause_filter_thresh; - /* 'clean' and 'reserved_sw' are not changed by KVM */ + /* 'clean' and 'hv_enlightenments' are not changed by KVM */ } static int svm_get_nested_state(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 199a2ecef1ce..a5af367502e4 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -151,7 +151,10 @@ struct vmcb_ctrl_area_cached { u64 nested_cr3; u64 virt_ext; u32 clean; - u8 reserved_sw[32]; + union { + struct hv_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; struct svm_nested_state { diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index ed5e79392544..422d00fee24a 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -26,7 +26,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) if (!*p_hv_pa_pg) return -ENOMEM; - hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw; + hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; hve->partition_assist_page = __pa(*p_hv_pa_pg); hve->hv_vm_id = (unsigned long)vcpu->kvm; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 66e61a73caeb..5c664dd7bee2 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -17,8 +17,10 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu); static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; + + BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) != + sizeof(vmcb->control.reserved_sw)); if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) @@ -60,18 +62,15 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct kvm_vcpu *vcpu) { struct vmcb *vmcb = to_svm(vcpu)->vmcb; - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; if (hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } -static inline void svm_hv_update_vp_id(struct vmcb *vmcb, - struct kvm_vcpu *vcpu) +static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; u32 vp_index = kvm_hv_get_vpindex(vcpu); if (hve->hv_vp_id != vp_index) { diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index 89ce2c6b57fe..6e1527aa3419 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -123,7 +123,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. */ - u8 reserved_sw[32]; + union { + struct hv_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 2fd64b419928..8ef6a4c83cb1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -46,8 +46,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; GUEST_SYNC(1); From 26b516bb39215cf60aa1fb55d0a6fd73058698fa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:42 +0100 Subject: [PATCH 1791/4122] x86/hyperv: KVM: Rename "hv_enlightenments" to "hv_vmcb_enlightenments" Now that KVM isn't littered with "struct hv_enlightenments" casts, rename the struct to "hv_vmcb_enlightenments" to highlight the fact that the struct is specifically for SVM's VMCB. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Michael Kelley Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 2 +- arch/x86/include/asm/svm.h | 2 +- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/svm/svm_onhyperv.c | 2 +- arch/x86/kvm/svm/svm_onhyperv.h | 6 +++--- tools/testing/selftests/kvm/include/x86_64/svm.h | 4 ++-- tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 245a806a9717..c5e0e5a06c0d 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -602,7 +602,7 @@ struct hv_enlightened_vmcs { * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose * SVM enlightenments to guests. */ -struct hv_enlightenments { +struct hv_vmcb_enlightenments { struct __packed hv_enlightenments_control { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index b37249e7c660..cb1ee53ad3b1 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -164,7 +164,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * for use by hypervisor/software. */ union { - struct hv_enlightenments hv_enlightenments; + struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 622fe00c3acf..cc8f47e7e294 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -180,7 +180,7 @@ void recalc_intercepts(struct vcpu_svm *svm) */ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { - struct hv_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; int i; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a5af367502e4..4826e6cc611b 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -152,7 +152,7 @@ struct vmcb_ctrl_area_cached { u64 virt_ext; u32 clean; union { - struct hv_enlightenments hv_enlightenments; + struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 422d00fee24a..52c73a8be72b 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -16,7 +16,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) { - struct hv_enlightenments *hve; + struct hv_vmcb_enlightenments *hve; struct hv_partition_assist_pg **p_hv_pa_pg = &to_kvm_hv(vcpu->kvm)->hv_pa_pg; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 5c664dd7bee2..d5cb2c62e355 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -17,7 +17,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu); static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { - struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) != sizeof(vmcb->control.reserved_sw)); @@ -62,7 +62,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct kvm_vcpu *vcpu) { struct vmcb *vmcb = to_svm(vcpu)->vmcb; - struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; if (hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); @@ -70,7 +70,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) { - struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; u32 vp_index = kvm_hv_get_vpindex(vcpu); if (hve->hv_vp_id != vp_index) { diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index 6e1527aa3419..483e6ae12f69 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -58,7 +58,7 @@ enum { INTERCEPT_RDPRU, }; -struct hv_enlightenments { +struct hv_vmcb_enlightenments { struct __packed hv_enlightenments_control { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; @@ -124,7 +124,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * for use by hypervisor/software. */ union { - struct hv_enlightenments hv_enlightenments; + struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 8ef6a4c83cb1..1c3fc38b4f15 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -46,7 +46,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; - struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; GUEST_SYNC(1); From b83237ad2167a0f9a43b909adb42623941b741b8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:43 +0100 Subject: [PATCH 1792/4122] KVM: x86: Rename 'enable_direct_tlbflush' to 'enable_l2_tlb_flush' To make terminology between Hyper-V-on-KVM and KVM-on-Hyper-V consistent, rename 'enable_direct_tlbflush' to 'enable_l2_tlb_flush'. The change eliminates the use of confusing 'direct' and adds the missing underscore. No functional change. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm_onhyperv.c | 2 +- arch/x86/kvm/svm/svm_onhyperv.h | 6 +++--- arch/x86/kvm/vmx/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 6 +++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index ea58e67e9a67..abccd51dcfca 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) KVM_X86_OP(get_msr_feature) KVM_X86_OP(can_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) -KVM_X86_OP_OPTIONAL(enable_direct_tlbflush) +KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) KVM_X86_OP(msr_filter_changed) KVM_X86_OP(complete_emulated_msr) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 598eb3b9ae44..a413f841e830 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1652,7 +1652,7 @@ struct kvm_x86_ops { void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); - int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 52c73a8be72b..26a89d0da93e 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -14,7 +14,7 @@ #include "kvm_onhyperv.h" #include "svm_onhyperv.h" -int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { struct hv_vmcb_enlightenments *hve; struct hv_partition_assist_pg **p_hv_pa_pg = diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index d5cb2c62e355..45faf84476ce 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -13,7 +13,7 @@ static struct kvm_x86_ops svm_x86_ops; -int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu); +int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { @@ -53,8 +53,8 @@ static inline void svm_hv_hardware_setup(void) vp_ap->nested_control.features.directhypercall = 1; } - svm_x86_ops.enable_direct_tlbflush = - svm_hv_enable_direct_tlbflush; + svm_x86_ops.enable_l2_tlb_flush = + svm_hv_enable_l2_tlb_flush; } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aca88524fd1e..5806ab88851e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -527,7 +527,7 @@ static unsigned long host_idt_base; static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { struct hv_enlightened_vmcs *evmcs; struct hv_partition_assist_pg **p_hv_pa_pg = @@ -8520,8 +8520,8 @@ static int __init vmx_init(void) } if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_direct_tlbflush - = hv_enable_direct_tlbflush; + vmx_x86_ops.enable_l2_tlb_flush + = hv_enable_l2_tlb_flush; } else { enlightened_vmcs = false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 404325a13dc2..838ed4ea7e4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4481,7 +4481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0; break; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: - r = kvm_x86_ops.enable_direct_tlbflush != NULL; + r = kvm_x86_ops.enable_l2_tlb_flush != NULL; break; case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; @@ -5494,10 +5494,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, } return r; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: - if (!kvm_x86_ops.enable_direct_tlbflush) + if (!kvm_x86_ops.enable_l2_tlb_flush) return -ENOTTY; - return static_call(kvm_x86_enable_direct_tlbflush)(vcpu); + return static_call(kvm_x86_enable_l2_tlb_flush)(vcpu); case KVM_CAP_HYPERV_ENFORCE_CPUID: return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]); From a789aeba419647c44d7e7320de20fea037c211d0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:44 +0100 Subject: [PATCH 1793/4122] KVM: VMX: Rename "vmx/evmcs.{ch}" to "vmx/hyperv.{ch}" To conform with SVM, rename VMX specific Hyper-V files from "evmcs.{ch}" to "hyperv.{ch}". While Enlightened VMCS is a lion's share of these files, some stuff (e.g. enlightened MSR bitmap, the upcoming Hyper-V L2 TLB flush, ...) goes beyond that. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-7-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/vmx/{evmcs.c => hyperv.c} | 3 +-- arch/x86/kvm/vmx/{evmcs.h => hyperv.h} | 8 +++++--- arch/x86/kvm/vmx/nested.c | 1 - arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/vmx/vmx_ops.h | 2 +- 6 files changed, 8 insertions(+), 9 deletions(-) rename arch/x86/kvm/vmx/{evmcs.c => hyperv.c} (99%) rename arch/x86/kvm/vmx/{evmcs.h => hyperv.h} (98%) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b8a494b6a5ec..4cf407563fee 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -23,7 +23,7 @@ kvm-$(CONFIG_KVM_XEN) += xen.o kvm-$(CONFIG_KVM_SMM) += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ - vmx/evmcs.o vmx/nested.o vmx/posted_intr.o + vmx/hyperv.o vmx/nested.o vmx/posted_intr.o kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/hyperv.c similarity index 99% rename from arch/x86/kvm/vmx/evmcs.c rename to arch/x86/kvm/vmx/hyperv.c index d8b23c96d627..5e239158174e 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -3,9 +3,8 @@ #include #include -#include "../hyperv.h" #include "../cpuid.h" -#include "evmcs.h" +#include "hyperv.h" #include "vmcs.h" #include "vmx.h" #include "trace.h" diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/hyperv.h similarity index 98% rename from arch/x86/kvm/vmx/evmcs.h rename to arch/x86/kvm/vmx/hyperv.h index 6f746ef3c038..99a151af7a81 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KVM_X86_VMX_EVMCS_H -#define __KVM_X86_VMX_EVMCS_H +#ifndef __KVM_X86_VMX_HYPERV_H +#define __KVM_X86_VMX_HYPERV_H #include @@ -8,6 +8,8 @@ #include #include +#include "../hyperv.h" + #include "capabilities.h" #include "vmcs.h" #include "vmcs12.h" @@ -242,4 +244,4 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); -#endif /* __KVM_X86_VMX_EVMCS_H */ +#endif /* __KVM_X86_VMX_HYPERV_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7924dea93678..048b2c3e3b3f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,7 +7,6 @@ #include #include "cpuid.h" -#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5806ab88851e..cea8c07f5229 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -51,7 +51,6 @@ #include "capabilities.h" #include "cpuid.h" -#include "evmcs.h" #include "hyperv.h" #include "kvm_onhyperv.h" #include "irq.h" diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83ed6..f6f23c7397dc 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -6,7 +6,7 @@ #include -#include "evmcs.h" +#include "hyperv.h" #include "vmcs.h" #include "../x86.h" From e94cea0930195adee67c93140ad9f95e430b2be8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:45 +0100 Subject: [PATCH 1794/4122] KVM: x86: Move clearing of TLB_FLUSH_CURRENT to kvm_vcpu_flush_tlb_all() Clear KVM_REQ_TLB_FLUSH_CURRENT in kvm_vcpu_flush_tlb_all() instead of in its sole caller that processes KVM_REQ_TLB_FLUSH. Regardless of why/when kvm_vcpu_flush_tlb_all() is called, flushing "all" TLB entries also flushes "current" TLB entries. Ideally, there will never be another caller of kvm_vcpu_flush_tlb_all(), and moving the handling "requires" extra work to document the ordering requirement, but future Hyper-V paravirt TLB flushing support will add similar logic for flush "guest" (Hyper-V can flush a subset of "guest" entries). And in the Hyper-V case, KVM needs to do more than just clear the request, the queue of GPAs to flush also needs to purged, and doing all only in the request path is undesirable as kvm_vcpu_flush_tlb_guest() does have multiple callers (though it's unlikely KVM's paravirt TLB flush will coincide with Hyper-V's paravirt TLB flush). Move the logic even though it adds extra "work" so that KVM will be consistent with how flush requests are processed when the Hyper-V support lands. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-8-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 838ed4ea7e4d..7fc5508c0b4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3399,6 +3399,9 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; static_call(kvm_x86_flush_tlb_all)(vcpu); + + /* Flushing all ASIDs flushes the current ASID... */ + kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) @@ -10236,12 +10239,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu)) kvm_mmu_load_pgd(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + + /* + * Note, the order matters here, as flushing "all" TLB entries + * also flushes the "current" TLB entries, i.e. servicing the + * flush "all" will clear any request to flush "current". + */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvm_vcpu_flush_tlb_all(vcpu); - /* Flushing all ASIDs flushes the current ASID... */ - kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } kvm_service_local_tlb_flush_requests(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { From adc43caa0a25746e1a9dabbab241abd01120dbfe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:46 +0100 Subject: [PATCH 1795/4122] KVM: x86: hyper-v: Resurrect dedicated KVM_REQ_HV_TLB_FLUSH flag In preparation to implementing fine-grained Hyper-V TLB flush and L2 TLB flush, resurrect dedicated KVM_REQ_HV_TLB_FLUSH request bit. As KVM_REQ_TLB_FLUSH_GUEST is a stronger operation, clear KVM_REQ_HV_TLB_FLUSH request in kvm_vcpu_flush_tlb_guest(). The flush itself is temporary handled by kvm_vcpu_flush_tlb_guest(). No functional change intended. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-9-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/hyperv.c | 4 ++-- arch/x86/kvm/svm/svm.c | 7 +++++++ arch/x86/kvm/x86.c | 9 +++++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a413f841e830..0b85230a0e0a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -110,6 +110,8 @@ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0adf4a437e85..3c0f639f6a05 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1870,11 +1870,11 @@ do_flush: * analyze it here, flush TLB regardless of the specified address space. */ if (all_cpus) { - kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); + kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); } else { sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask); + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); } ret_success: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7efc4fdaa446..4ea6ddd99899 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3722,6 +3722,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries. + * A TLB flush for the current ASID flushes both "host" and "guest" TLB + * entries, and thus is a superset of Hyper-V's fine grained flushing. + */ + kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + /* * Flush only the current ASID even if the TLB flush was invoked via * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fc5508c0b4a..12e49e8566d4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3420,6 +3420,12 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) } static_call(kvm_x86_flush_tlb_guest)(vcpu); + + /* + * Flushing all "guest" TLB is always a superset of Hyper-V's fine + * grained flushing. + */ + kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu); } @@ -10250,6 +10256,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_service_local_tlb_flush_requests(vcpu); + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; From 0823570f01989d3703751f66534a138d4fae062e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:47 +0100 Subject: [PATCH 1796/4122] KVM: x86: hyper-v: Introduce TLB flush fifo To allow flushing individual GVAs instead of always flushing the whole VPID a per-vCPU structure to pass the requests is needed. Use standard 'kfifo' to queue two types of entries: individual GVA (GFN + up to 4095 following GFNs in the lower 12 bits) and 'flush all'. The size of the fifo is arbitrarily set to '16'. Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now and kvm_hv_vcpu_flush_tlb() doesn't actually read the fifo just resets the queue before returning -EOPNOTSUPP (which triggers full TLB flush) so the functional change is very small but the infrastructure is prepared to handle individual GVA flush requests. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-10-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 20 ++++++++++++++ arch/x86/kvm/hyperv.c | 47 +++++++++++++++++++++++++++++++++ arch/x86/kvm/hyperv.h | 15 +++++++++++ arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/x86.c | 11 ++++++-- 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b85230a0e0a..3e35dcf40dc7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -618,6 +619,23 @@ struct kvm_vcpu_hv_synic { bool dont_zero_synic_pages; }; +/* The maximum number of entries on the TLB flush fifo. */ +#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) +/* + * Note: the following 'magic' entry is made up by KVM to avoid putting + * anything besides GVA on the TLB flush fifo. It is theoretically possible + * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 + * which will look identical. KVM's action to 'flush everything' instead of + * flushing these particular addresses is, however, fully legitimate as + * flushing more than requested is always OK. + */ +#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) + +struct kvm_vcpu_hv_tlb_flush_fifo { + spinlock_t write_lock; + DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { struct kvm_vcpu *vcpu; @@ -639,6 +657,8 @@ struct kvm_vcpu_hv { u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ } cpuid_cache; + + struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo; }; /* Xen HVM per vcpu emulation context */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 3c0f639f6a05..9d9a5ff2d54b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -954,6 +955,9 @@ int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) hv_vcpu->vp_index = vcpu->vcpu_idx; + INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries); + spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock); + return 0; } @@ -1783,6 +1787,37 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, var_cnt * sizeof(*sparse_banks)); } +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY; + + if (!hv_vcpu) + return; + + tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + + kfifo_in_spinlocked_noirqsave(&tlb_flush_fifo->entries, &flush_all_entry, + 1, &tlb_flush_fifo->write_lock); +} + +int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu) + return -EINVAL; + + tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + + kfifo_reset_out(&tlb_flush_fifo->entries); + + /* Precise flushing isn't implemented yet. */ + return -EOPNOTSUPP; +} + static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm *kvm = vcpu->kvm; @@ -1791,6 +1826,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; + struct kvm_vcpu *v; + unsigned long i; bool all_cpus; /* @@ -1870,10 +1907,20 @@ do_flush: * analyze it here, flush TLB regardless of the specified address space. */ if (all_cpus) { + kvm_for_each_vcpu(i, v, kvm) + hv_tlb_flush_enqueue(v); + kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); } else { sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); + for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) { + v = kvm_get_vcpu(kvm, i); + if (!v) + continue; + hv_tlb_flush_enqueue(v); + } + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); } diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 1030b1b50552..f79edf9234cd 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -151,4 +151,19 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + return; + + tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + + kfifo_reset_out(&tlb_flush_fifo->entries); +} + +int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4ea6ddd99899..91352d692845 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3727,7 +3727,7 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) * A TLB flush for the current ASID flushes both "host" and "guest" TLB * entries, and thus is a superset of Hyper-V's fine grained flushing. */ - kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + kvm_hv_vcpu_purge_flush_tlb(vcpu); /* * Flush only the current ASID even if the TLB flush was invoked via diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 12e49e8566d4..72ac6bf05c8b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3425,7 +3425,7 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) * Flushing all "guest" TLB is always a superset of Hyper-V's fine * grained flushing. */ - kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + kvm_hv_vcpu_purge_flush_tlb(vcpu); } @@ -10256,7 +10256,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_service_local_tlb_flush_requests(vcpu); - if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + /* + * Fall back to a "full" guest flush if Hyper-V's precise + * flushing fails. Note, Hyper-V's flushing is per-vCPU, but + * the flushes are considered "remote" and not "local" because + * the requests can be initiated from other vCPUs. + */ + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu) && + kvm_hv_vcpu_flush_tlb(vcpu)) kvm_vcpu_flush_tlb_guest(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { From 56b5354fd8f9173de2e1614864e5fb7bec8c50c4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Nov 2022 15:53:48 +0100 Subject: [PATCH 1797/4122] KVM: x86: hyper-v: Add helper to read hypercall data for array Move the guts of kvm_get_sparse_vp_set() to a helper so that the code for reading a guest-provided array can be reused in the future, e.g. for getting a list of virtual addresses whose TLB entries need to be flushed. Opportunisticaly swap the order of the data and XMM adjustment so that the XMM/gpa offsets are bundled together. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-11-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 53 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 9d9a5ff2d54b..3ba7e2d2fbbd 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1753,38 +1753,51 @@ struct kvm_hv_hcall { sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; }; -static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, - int consumed_xmm_halves, - u64 *sparse_banks, gpa_t offset) + +static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc, + u16 orig_cnt, u16 cnt_cap, u64 *data, + int consumed_xmm_halves, gpa_t offset) { - u16 var_cnt; - int i; - - if (hc->var_cnt > 64) - return -EINVAL; - - /* Ignore banks that cannot possibly contain a legal VP index. */ - var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS); + /* + * Preserve the original count when ignoring entries via a "cap", KVM + * still needs to validate the guest input (though the non-XMM path + * punts on the checks). + */ + u16 cnt = min(orig_cnt, cnt_cap); + int i, j; if (hc->fast) { /* * Each XMM holds two sparse banks, but do not count halves that * have already been consumed for hypercall parameters. */ - if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves) + if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves) return HV_STATUS_INVALID_HYPERCALL_INPUT; - for (i = 0; i < var_cnt; i++) { - int j = i + consumed_xmm_halves; + + for (i = 0; i < cnt; i++) { + j = i + consumed_xmm_halves; if (j % 2) - sparse_banks[i] = sse128_hi(hc->xmm[j / 2]); + data[i] = sse128_hi(hc->xmm[j / 2]); else - sparse_banks[i] = sse128_lo(hc->xmm[j / 2]); + data[i] = sse128_lo(hc->xmm[j / 2]); } return 0; } - return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks, - var_cnt * sizeof(*sparse_banks)); + return kvm_read_guest(kvm, hc->ingpa + offset, data, + cnt * sizeof(*data)); +} + +static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, + u64 *sparse_banks, int consumed_xmm_halves, + gpa_t offset) +{ + if (hc->var_cnt > 64) + return -EINVAL; + + /* Cap var_cnt to ignore banks that cannot contain a legal VP index. */ + return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS, + sparse_banks, consumed_xmm_halves, offset); } static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu) @@ -1895,7 +1908,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!hc->var_cnt) goto ret_success; - if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks, + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2, offsetof(struct hv_tlb_flush_ex, hv_vp_set.bank_contents))) return HV_STATUS_INVALID_HYPERCALL_INPUT; @@ -2006,7 +2019,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!hc->var_cnt) goto ret_success; - if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks, + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1, offsetof(struct hv_send_ipi_ex, vp_set.bank_contents))) return HV_STATUS_INVALID_HYPERCALL_INPUT; From 260970862c88b4130e9e12be023c7e2c2d37a966 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:49 +0100 Subject: [PATCH 1798/4122] KVM: x86: hyper-v: Handle HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls gently Currently, HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls are handled the exact same way as HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE{,EX}: by flushing the whole VPID and this is sub-optimal. Switch to handling these requests with 'flush_tlb_gva()' hooks instead. Use the newly introduced TLB flush fifo to queue the requests. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-12-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 113 +++++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 3ba7e2d2fbbd..6868c478617c 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1800,7 +1800,14 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, sparse_banks, consumed_xmm_halves, offset); } -static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu) +static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[], + int consumed_xmm_halves, gpa_t offset) +{ + return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, + entries, consumed_xmm_halves, offset); +} + +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count) { struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); @@ -1811,24 +1818,64 @@ static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu) tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; - kfifo_in_spinlocked_noirqsave(&tlb_flush_fifo->entries, &flush_all_entry, - 1, &tlb_flush_fifo->write_lock); + spin_lock(&tlb_flush_fifo->write_lock); + + /* + * All entries should fit on the fifo leaving one free for 'flush all' + * entry in case another request comes in. In case there's not enough + * space, just put 'flush all' entry there. + */ + if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) { + WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count); + goto out_unlock; + } + + /* + * Note: full fifo always contains 'flush all' entry, no need to check the + * return value. + */ + kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1); + +out_unlock: + spin_unlock(&tlb_flush_fifo->write_lock); } int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE]; + int i, j, count; + gva_t gva; - if (!hv_vcpu) + if (!tdp_enabled || !hv_vcpu) return -EINVAL; tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE); + + for (i = 0; i < count; i++) { + if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) + goto out_flush_all; + + /* + * Lower 12 bits of 'address' encode the number of additional + * pages to flush. + */ + gva = entries[i] & PAGE_MASK; + for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) + static_call(kvm_x86_flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE); + + ++vcpu->stat.tlb_flush; + } + return 0; + +out_flush_all: kfifo_reset_out(&tlb_flush_fifo->entries); - /* Precise flushing isn't implemented yet. */ - return -EOPNOTSUPP; + /* Fall back to full flush. */ + return -ENOSPC; } static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) @@ -1837,11 +1884,21 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); + /* + * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' + * entries on the TLB flush fifo. The last entry, however, needs to be + * always left free for 'flush all' entry which gets placed when + * there is not enough space to put all the requested entries. + */ + u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1]; + u64 *tlb_flush_entries; u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; struct kvm_vcpu *v; unsigned long i; bool all_cpus; + int consumed_xmm_halves = 0; + gpa_t data_offset; /* * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the @@ -1857,10 +1914,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush.address_space = hc->ingpa; flush.flags = hc->outgpa; flush.processor_mask = sse128_lo(hc->xmm[0]); + consumed_xmm_halves = 1; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; + data_offset = sizeof(flush); } trace_kvm_hv_flush_tlb(flush.processor_mask, @@ -1884,10 +1943,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush_ex.flags = hc->outgpa; memcpy(&flush_ex.hv_vp_set, &hc->xmm[0], sizeof(hc->xmm[0])); + consumed_xmm_halves = 2; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, sizeof(flush_ex)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; + data_offset = sizeof(flush_ex); } trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, @@ -1902,26 +1963,44 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (hc->var_cnt != hweight64(valid_bank_mask)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - if (all_cpus) - goto do_flush; + if (!all_cpus) { + if (!hc->var_cnt) + goto ret_success; - if (!hc->var_cnt) - goto ret_success; + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, + consumed_xmm_halves, data_offset)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } - if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2, - offsetof(struct hv_tlb_flush_ex, - hv_vp_set.bank_contents))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + /* + * Hyper-V TLFS doesn't explicitly forbid non-empty sparse vCPU + * banks (and, thus, non-zero 'var_cnt') for the 'all vCPUs' + * case (HV_GENERIC_SET_ALL). Always adjust data_offset and + * consumed_xmm_halves to make sure TLB flush entries are read + * from the correct offset. + */ + data_offset += hc->var_cnt * sizeof(sparse_banks[0]); + consumed_xmm_halves += hc->var_cnt; + } + + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || + hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX || + hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) { + tlb_flush_entries = NULL; + } else { + if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries, + consumed_xmm_halves, data_offset)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + tlb_flush_entries = __tlb_flush_entries; } -do_flush: /* * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ if (all_cpus) { kvm_for_each_vcpu(i, v, kvm) - hv_tlb_flush_enqueue(v); + hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); } else { @@ -1931,7 +2010,7 @@ do_flush: v = kvm_get_vcpu(kvm, i); if (!v) continue; - hv_tlb_flush_enqueue(v); + hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); } kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); From f84fcb66568c0b00626f7f03e28db7d0dcba8098 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:50 +0100 Subject: [PATCH 1799/4122] KVM: x86: hyper-v: Expose support for extended gva ranges for flush hypercalls Extended GVA ranges support bit seems to indicate whether lower 12 bits of GVA can be used to specify up to 4095 additional consequent GVAs to flush. This is somewhat described in TLFS. Previously, KVM was handling HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} requests by flushing the whole VPID so technically, extended GVA ranges were already supported. As such requests are handled more gently now, advertizing support for extended ranges starts making sense to reduce the size of TLB flush requests. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-13-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 2 ++ arch/x86/kvm/hyperv.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index c5e0e5a06c0d..6639979302ab 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -61,6 +61,8 @@ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) /* Support for debug MSRs available */ #define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +/* Support for extended gva ranges for flush hypercalls available */ +#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14) /* * Support for returning hypercall output block via XMM * registers is available diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6868c478617c..fca9c51891f5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2641,6 +2641,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->ebx |= HV_DEBUGGING; ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + ent->edx |= HV_FEATURE_EXT_GVA_RANGES_FLUSH; /* * Direct Synthetic timers only make sense with in-kernel From aee738236dca0d0870789138ec494e15d6303566 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:51 +0100 Subject: [PATCH 1800/4122] KVM: x86: Prepare kvm_hv_flush_tlb() to handle L2's GPAs To handle L2 TLB flush requests, KVM needs to translate the specified L2 GPA to L1 GPA to read hypercall arguments from there. No functional change as KVM doesn't handle VMCALL/VMMCALL from L2 yet. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-14-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index fca9c51891f5..cb145987f5b8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,6 +23,7 @@ #include "ioapic.h" #include "cpuid.h" #include "hyperv.h" +#include "mmu.h" #include "xen.h" #include @@ -1908,6 +1909,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) */ BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64); + /* + * 'Slow' hypercall's first parameter is the address in guest's memory + * where hypercall parameters are placed. This is either a GPA or a + * nested GPA when KVM is handling the call from L2 ('direct' TLB + * flush). Translate the address here so the memory can be uniformly + * read with kvm_read_guest(). + */ + if (!hc->fast && is_guest_mode(vcpu)) { + hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); + if (unlikely(hc->ingpa == INVALID_GPA)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) { if (hc->fast) { From bd19c94a19b09b563a20862c651859f6e3d73847 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:52 +0100 Subject: [PATCH 1801/4122] x86/hyperv: Introduce HV_MAX_SPARSE_VCPU_BANKS/HV_VCPUS_PER_SPARSE_BANK constants It may not come clear from where the magical '64' value used in __cpumask_to_vpset() come from. Moreover, '64' means both the maximum sparse bank number as well as the number of vCPUs per bank. Add defines to make things clear. These defines are also going to be used by KVM. No functional change. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-15-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/asm-generic/hyperv-tlfs.h | 5 +++++ include/asm-generic/mshyperv.h | 11 ++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index fdce7a4cfc6f..020ca9bdbb79 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -399,6 +399,11 @@ struct hv_vpset { u64 bank_contents[]; } __packed; +/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */ +#define HV_MAX_SPARSE_VCPU_BANKS (64) +/* The number of vCPUs in one sparse bank */ +#define HV_VCPUS_PER_SPARSE_BANK (64) + /* HvCallSendSyntheticClusterIpi hypercall */ struct hv_send_ipi { u32 vector; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index bfb9eb9d7215..d55d2833a37b 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -211,9 +211,10 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset, { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; int this_cpu = smp_processor_id(); + int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK; - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) + /* vpset.valid_bank_mask can represent up to HV_MAX_SPARSE_VCPU_BANKS banks */ + if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS) return 0; /* @@ -221,7 +222,7 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset, * structs are not cleared between calls, we risk flushing unneeded * vCPUs otherwise. */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++) vpset->bank_contents[vcpu_bank] = 0; /* @@ -233,8 +234,8 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset, vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) return -1; - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; + vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK; + vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK; __set_bit(vcpu_offset, (unsigned long *) &vpset->bank_contents[vcpu_bank]); if (vcpu_bank >= nr_bank) From ca7372aca7f4b2f1b29a9941053999d224d1e7c7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:53 +0100 Subject: [PATCH 1802/4122] KVM: x86: hyper-v: Use HV_MAX_SPARSE_VCPU_BANKS/HV_VCPUS_PER_SPARSE_BANK instead of raw '64' It may not be clear from where the '64' limit for the maximum sparse bank number comes from, use HV_MAX_SPARSE_VCPU_BANKS define instead. Use HV_VCPUS_PER_SPARSE_BANK in KVM_HV_MAX_SPARSE_VCPU_SET_BITS's definition. Opportunistically adjust the comment around BUILD_BUG_ON(). No functional change. Suggested-by: Sean Christopherson Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-16-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cb145987f5b8..2fceb85687c1 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -40,7 +40,7 @@ #include "irq.h" #include "fpu.h" -#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) +#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK) static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, bool vcpu_kick); @@ -1793,7 +1793,7 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 *sparse_banks, int consumed_xmm_halves, gpa_t offset) { - if (hc->var_cnt > 64) + if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS) return -EINVAL; /* Cap var_cnt to ignore banks that cannot contain a legal VP index. */ @@ -1902,12 +1902,11 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) gpa_t data_offset; /* - * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the - * valid mask is a u64. Fail the build if KVM's max allowed number of - * vCPUs (>4096) would exceed this limit, KVM will additional changes - * for Hyper-V support to avoid setting the guest up to fail. + * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS + * sparse banks. Fail the build if KVM's max allowed number of + * vCPUs (>4096) exceeds this limit. */ - BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64); + BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS); /* * 'Slow' hypercall's first parameter is the address in guest's memory From b6c2c22fa7012616b3039c9f559bf01195137b9d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:54 +0100 Subject: [PATCH 1803/4122] KVM: x86: hyper-v: Don't use sparse_set_to_vcpu_mask() in kvm_hv_send_ipi() Get rid of on-stack allocation of vcpu_mask and optimize kvm_hv_send_ipi() for a smaller number of vCPUs in the request. When Hyper-V TLB flush is in use, HvSendSyntheticClusterIpi{,Ex} calls are not commonly used to send IPIs to a large number of vCPUs (and are rarely used in general). Introduce hv_is_vp_in_sparse_set() to directly check if the specified VP_ID is present in sparse vCPU set. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-17-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2fceb85687c1..0bfa59838e0a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1741,6 +1741,28 @@ static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks, } } +static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[]) +{ + int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK; + unsigned long sbank; + + if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask)) + return false; + + /* + * The index into the sparse bank is the number of preceding bits in + * the valid mask. Optimize for VMs with <64 vCPUs by skipping the + * fancy math if there can't possibly be preceding bits. + */ + if (valid_bit_nr) + sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0)); + else + sbank = 0; + + return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK, + (unsigned long *)&sparse_banks[sbank]); +} + struct kvm_hv_hcall { u64 param; u64 ingpa; @@ -2035,8 +2057,8 @@ ret_success: ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); } -static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, - unsigned long *vcpu_bitmap) +static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, + u64 *sparse_banks, u64 valid_bank_mask) { struct kvm_lapic_irq irq = { .delivery_mode = APIC_DM_FIXED, @@ -2046,7 +2068,9 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + if (sparse_banks && + !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu), + valid_bank_mask, sparse_banks)) continue; /* We fail only when APIC is disabled */ @@ -2059,7 +2083,6 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; @@ -2121,13 +2144,10 @@ check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - if (all_cpus) { - kvm_send_ipi_to_many(kvm, vector, NULL); - } else { - sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - - kvm_send_ipi_to_many(kvm, vector, vcpu_mask); - } + if (all_cpus) + kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0); + else + kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask); ret_success: return HV_STATUS_SUCCESS; From 53ca765a041d5a24650d3f01bced791be5d72df7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:55 +0100 Subject: [PATCH 1804/4122] KVM: x86: hyper-v: Create a separate fifo for L2 TLB flush To handle L2 TLB flush requests, KVM needs to use a separate fifo from regular (L1) Hyper-V TLB flush requests: e.g. when a request to flush something in L2 is made, the target vCPU can transition from L2 to L1, receive a request to flush a GVA for L1 and then try to enter L2 back. The first request needs to be processed at this point. Similarly, requests to flush GVAs in L1 must wait until L2 exits to L1. No functional change as KVM doesn't handle L2 TLB flush requests from L2 yet. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-18-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 +++++++- arch/x86/kvm/hyperv.c | 11 +++++++---- arch/x86/kvm/hyperv.h | 19 ++++++++++++++++--- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3e35dcf40dc7..89f9c98ff445 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -631,6 +631,12 @@ struct kvm_vcpu_hv_synic { */ #define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) +enum hv_tlb_flush_fifos { + HV_L1_TLB_FLUSH_FIFO, + HV_L2_TLB_FLUSH_FIFO, + HV_NR_TLB_FLUSH_FIFOS, +}; + struct kvm_vcpu_hv_tlb_flush_fifo { spinlock_t write_lock; DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); @@ -658,7 +664,7 @@ struct kvm_vcpu_hv { u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ } cpuid_cache; - struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo; + struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; }; /* Xen HVM per vcpu emulation context */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0bfa59838e0a..989846310303 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -956,8 +956,10 @@ int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) hv_vcpu->vp_index = vcpu->vcpu_idx; - INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries); - spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock); + for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) { + INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries); + spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock); + } return 0; } @@ -1839,7 +1841,8 @@ static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count) if (!hv_vcpu) return; - tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + /* kvm_hv_flush_tlb() is not ready to handle requests for L2s yet */ + tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo[HV_L1_TLB_FLUSH_FIFO]; spin_lock(&tlb_flush_fifo->write_lock); @@ -1874,7 +1877,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (!tdp_enabled || !hv_vcpu) return -EINVAL; - tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu)); count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE); diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index f79edf9234cd..8942e8c6c912 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -22,6 +22,7 @@ #define __ARCH_X86_KVM_HYPERV_H__ #include +#include "x86.h" /* "Hv#1" signature */ #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 @@ -151,15 +152,27 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struct kvm_vcpu *vcpu, + bool is_guest_mode) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO : + HV_L1_TLB_FLUSH_FIFO; + + /* KVM does not handle L2 TLB flush requests yet */ + WARN_ON_ONCE(i != HV_L1_TLB_FLUSH_FIFO); + + return &hv_vcpu->tlb_flush_fifo[i]; +} + static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + if (!to_hv_vcpu(vcpu) || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) return; - tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu)); kfifo_reset_out(&tlb_flush_fifo->entries); } From 7d5e88d301f84a7b64602dbe3640f288223095ea Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:56 +0100 Subject: [PATCH 1805/4122] KVM: x86: hyper-v: Use preallocated buffer in 'struct kvm_vcpu_hv' instead of on-stack 'sparse_banks' To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1 may use vCPU overcommit for L2. To avoid growing on-stack allocation, make 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated dynamically. Note: sparse_set_to_vcpu_mask() can't currently be used to handle L2 requests as KVM does not keep L2 VM_ID -> L2 VCPU_ID -> L1 vCPU mappings, i.e. its vp_bitmap array is still bounded by the number of L1 vCPUs and so can remain an on-stack allocation. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-19-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/hyperv.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 89f9c98ff445..4596f19f927b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -665,6 +665,9 @@ struct kvm_vcpu_hv { } cpuid_cache; struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; + + /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; }; /* Xen HVM per vcpu emulation context */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 989846310303..058e14564389 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1906,6 +1906,8 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; @@ -1919,7 +1921,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1]; u64 *tlb_flush_entries; u64 valid_bank_mask; - u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; struct kvm_vcpu *v; unsigned long i; bool all_cpus; @@ -2083,11 +2084,12 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; u64 valid_bank_mask; - u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; bool all_cpus; From 38edb45231832ef2aa191e2f3f77e30ad0bb4b61 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:57 +0100 Subject: [PATCH 1806/4122] KVM: nVMX: Keep track of hv_vm_id/hv_vp_id when eVMCS is in use To handle L2 TLB flush requests, KVM needs to keep track of L2's VM_ID/ VP_IDs which are set by L1 hypervisor. 'Partition assist page' address is also needed to handle post-flush exit to L1 upon request. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-20-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/vmx/nested.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4596f19f927b..63dad1e12969 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -668,6 +668,12 @@ struct kvm_vcpu_hv { /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + + struct { + u64 pa_page_gpa; + u64 vm_id; + u32 vp_id; + } nested; }; /* Xen HVM per vcpu emulation context */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 048b2c3e3b3f..cce68fd5befb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -225,6 +225,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { @@ -233,6 +234,12 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) } vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; + + if (hv_vcpu) { + hv_vcpu->nested.pa_page_gpa = INVALID_GPA; + hv_vcpu->nested.vm_id = 0; + hv_vcpu->nested.vp_id = 0; + } } static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, @@ -1557,11 +1564,19 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields { struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu); /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ vmcs12->tpr_threshold = evmcs->tpr_threshold; vmcs12->guest_rip = evmcs->guest_rip; + if (unlikely(!(hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) { + hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page; + hv_vcpu->nested.vm_id = evmcs->hv_vm_id; + hv_vcpu->nested.vp_id = evmcs->hv_vp_id; + } + if (unlikely(!(hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) { vmcs12->guest_rsp = evmcs->guest_rsp; From e45aa2444d280747d27d4d98685d761125c4e364 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:58 +0100 Subject: [PATCH 1807/4122] KVM: nSVM: Keep track of Hyper-V hv_vm_id/hv_vp_id Similar to nSVM, KVM needs to know L2's VM_ID/VP_ID and Partition assist page address to handle L2 TLB flush requests. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-21-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/hyperv.h | 15 +++++++++++++++ arch/x86/kvm/svm/nested.c | 2 ++ 2 files changed, 17 insertions(+) diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index c59544cdf03b..e97d80974e72 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -9,5 +9,20 @@ #include #include "../hyperv.h" +#include "svm.h" + +static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu) + return; + + hv_vcpu->nested.pa_page_gpa = hve->partition_assist_page; + hv_vcpu->nested.vm_id = hve->hv_vm_id; + hv_vcpu->nested.vp_id = hve->hv_vp_id; +} #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cc8f47e7e294..aa36c349da43 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -800,6 +800,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, if (kvm_vcpu_apicv_active(vcpu)) kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + nested_svm_hv_update_vm_vp_ids(vcpu); + return 0; } From b0c9c25e46252a576a974dd659f2396774e0dbb1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:53:59 +0100 Subject: [PATCH 1808/4122] KVM: x86: Introduce .hv_inject_synthetic_vmexit_post_tlb_flush() nested hook Hyper-V supports injecting synthetic L2->L1 exit after performing L2 TLB flush operation but the procedure is vendor specific. Introduce .hv_inject_synthetic_vmexit_post_tlb_flush nested hook for it. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-22-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/Makefile | 3 ++- arch/x86/kvm/svm/hyperv.c | 11 +++++++++++ arch/x86/kvm/svm/hyperv.h | 2 ++ arch/x86/kvm/svm/nested.c | 1 + arch/x86/kvm/vmx/hyperv.c | 4 ++++ arch/x86/kvm/vmx/hyperv.h | 1 + arch/x86/kvm/vmx/nested.c | 1 + 8 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kvm/svm/hyperv.c diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 63dad1e12969..ebf90f4f1a21 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1722,6 +1722,7 @@ struct kvm_x86_nested_ops { int (*enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); + void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4cf407563fee..80e3fe184d17 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -26,7 +26,8 @@ kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/hyperv.o vmx/nested.o vmx/posted_intr.o kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \ + svm/sev.o svm/hyperv.o ifdef CONFIG_HYPERV kvm-amd-y += svm/svm_onhyperv.o diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c new file mode 100644 index 000000000000..911f51021af1 --- /dev/null +++ b/arch/x86/kvm/svm/hyperv.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD SVM specific code for Hyper-V on KVM. + * + * Copyright 2022 Red Hat, Inc. and/or its affiliates. + */ +#include "hyperv.h" + +void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index e97d80974e72..7564bdf652e4 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -25,4 +25,6 @@ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) hv_vcpu->nested.vp_id = hve->hv_vp_id; } +void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); + #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index aa36c349da43..748e4de40c8f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1730,4 +1730,5 @@ struct kvm_x86_nested_ops svm_nested_ops = { .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, .set_state = svm_set_nested_state, + .hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush, }; diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index 5e239158174e..f05464db4fdc 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -506,3 +506,7 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, return 0; } + +void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index 99a151af7a81..8efaffe9215b 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -243,5 +243,6 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); +void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #endif /* __KVM_X86_VMX_HYPERV_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index cce68fd5befb..396712d13211 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6995,4 +6995,5 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, + .hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush, }; From 3c9eb0655fc03fb5e84f1db334ebc832d9c5ac31 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:00 +0100 Subject: [PATCH 1809/4122] KVM: x86: hyper-v: Introduce kvm_hv_is_tlb_flush_hcall() The newly introduced helper checks whether vCPU is performing a Hyper-V TLB flush hypercall. This is required to filter out L2 TLB flush hypercalls for processing. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-23-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 8942e8c6c912..5f9c76b45f46 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -177,6 +177,23 @@ static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) kfifo_reset_out(&tlb_flush_fifo->entries); } +static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u16 code; + + if (!hv_vcpu) + return false; + + code = is_64_bit_hypercall(vcpu) ? kvm_rcx_read(vcpu) : + kvm_rax_read(vcpu); + + return (code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX); +} + int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); #endif From c58a318f6090efe06e6702b8882e2026f44f620e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:01 +0100 Subject: [PATCH 1810/4122] KVM: x86: hyper-v: L2 TLB flush Handle L2 TLB flush requests by going through all vCPUs and checking whether there are vCPUs running the same VM_ID with a VP_ID specified in the requests. Perform synthetic exit to L2 upon finish. Note, while checking VM_ID/VP_ID of running vCPUs seem to be a bit racy, we count on the fact that KVM flushes the whole L2 VPID upon transition. Also, KVM_REQ_HV_TLB_FLUSH request needs to be done upon transition between L1 and L2 to make sure all pending requests are always processed. For the reference, Hyper-V TLFS refers to the feature as "Direct Virtual Flush". Note, nVMX/nSVM code does not handle VMCALL/VMMCALL from L2 yet. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-24-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 80 ++++++++++++++++++++++++++++++++++++------- arch/x86/kvm/hyperv.h | 3 -- arch/x86/kvm/trace.h | 21 +++++++----- 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 058e14564389..3715a6f026a2 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -34,6 +34,7 @@ #include #include +#include #include #include "trace.h" @@ -1832,18 +1833,16 @@ static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc entries, consumed_xmm_halves, offset); } -static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count) +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo, + u64 *entries, int count) { - struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY; if (!hv_vcpu) return; - /* kvm_hv_flush_tlb() is not ready to handle requests for L2s yet */ - tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo[HV_L1_TLB_FLUSH_FIFO]; - spin_lock(&tlb_flush_fifo->write_lock); /* @@ -1912,6 +1911,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' * entries on the TLB flush fifo. The last entry, however, needs to be @@ -1962,7 +1962,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) } trace_kvm_hv_flush_tlb(flush.processor_mask, - flush.address_space, flush.flags); + flush.address_space, flush.flags, + is_guest_mode(vcpu)); valid_bank_mask = BIT_ULL(0); sparse_banks[0] = flush.processor_mask; @@ -1993,7 +1994,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, flush_ex.hv_vp_set.format, flush_ex.address_space, - flush_ex.flags); + flush_ex.flags, is_guest_mode(vcpu)); valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; all_cpus = flush_ex.hv_vp_set.format != @@ -2037,19 +2038,57 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - if (all_cpus) { - kvm_for_each_vcpu(i, v, kvm) - hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); + if (all_cpus && !is_guest_mode(vcpu)) { + kvm_for_each_vcpu(i, v, kvm) { + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); + } kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); - } else { + } else if (!is_guest_mode(vcpu)) { sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) { v = kvm_get_vcpu(kvm, i); if (!v) continue; - hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); + } + + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); + } else { + struct kvm_vcpu_hv *hv_v; + + bitmap_zero(vcpu_mask, KVM_MAX_VCPUS); + + kvm_for_each_vcpu(i, v, kvm) { + hv_v = to_hv_vcpu(v); + + /* + * The following check races with nested vCPUs entering/exiting + * and/or migrating between L1's vCPUs, however the only case when + * KVM *must* flush the TLB is when the target L2 vCPU keeps + * running on the same L1 vCPU from the moment of the request until + * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other + * cases, e.g. when the target L2 vCPU migrates to a different L1 + * vCPU or when the corresponding L1 vCPU temporary switches to a + * different L2 vCPU while the request is being processed. + */ + if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id) + continue; + + if (!all_cpus && + !hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask, + sparse_banks)) + continue; + + __set_bit(i, vcpu_mask); + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); } kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); @@ -2239,10 +2278,25 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + u32 tlb_lock_count = 0; + int ret; + + if (hv_result_success(result) && is_guest_mode(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu) && + kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa, + &tlb_lock_count, sizeof(tlb_lock_count))) + result = HV_STATUS_INVALID_HYPERCALL_INPUT; + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; - return kvm_skip_emulated_instruction(vcpu); + + ret = kvm_skip_emulated_instruction(vcpu); + + if (tlb_lock_count) + kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu); + + return ret; } static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 5f9c76b45f46..7706e203ff43 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -159,9 +159,6 @@ static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struc int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO : HV_L1_TLB_FLUSH_FIFO; - /* KVM does not handle L2 TLB flush requests yet */ - WARN_ON_ONCE(i != HV_L1_TLB_FLUSH_FIFO); - return &hv_vcpu->tlb_flush_fifo[i]; } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index bc25589ad588..09f3392dd830 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1547,38 +1547,41 @@ TRACE_EVENT(kvm_hv_timer_state, * Tracepoint for kvm_hv_flush_tlb. */ TRACE_EVENT(kvm_hv_flush_tlb, - TP_PROTO(u64 processor_mask, u64 address_space, u64 flags), - TP_ARGS(processor_mask, address_space, flags), + TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode), + TP_ARGS(processor_mask, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, processor_mask) __field(u64, address_space) __field(u64, flags) + __field(bool, guest_mode) ), TP_fast_assign( __entry->processor_mask = processor_mask; __entry->address_space = address_space; __entry->flags = flags; + __entry->guest_mode = guest_mode; ), - TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx", + TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s", __entry->processor_mask, __entry->address_space, - __entry->flags) + __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoint for kvm_hv_flush_tlb_ex. */ TRACE_EVENT(kvm_hv_flush_tlb_ex, - TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags), - TP_ARGS(valid_bank_mask, format, address_space, flags), + TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode), + TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, valid_bank_mask) __field(u64, format) __field(u64, address_space) __field(u64, flags) + __field(bool, guest_mode) ), TP_fast_assign( @@ -1586,12 +1589,14 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex, __entry->format = format; __entry->address_space = address_space; __entry->flags = flags; + __entry->guest_mode = guest_mode; ), TP_printk("valid_bank_mask 0x%llx format 0x%llx " - "address_space 0x%llx flags 0x%llx", + "address_space 0x%llx flags 0x%llx %s", __entry->valid_bank_mask, __entry->format, - __entry->address_space, __entry->flags) + __entry->address_space, __entry->flags, + __entry->guest_mode ? "(L2)" : "") ); /* From d4baf1a9a572910d7b4cd63d23bf4be89b7648bd Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:02 +0100 Subject: [PATCH 1811/4122] KVM: x86: hyper-v: Introduce fast guest_hv_cpuid_has_l2_tlb_flush() check Introduce a helper to quickly check if KVM needs to handle VMCALL/VMMCALL from L2 in L0 to process L2 TLB flush requests. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-25-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 7706e203ff43..bd698eb2bda1 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -174,6 +174,14 @@ static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) kfifo_reset_out(&tlb_flush_fifo->entries); } +static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + return hv_vcpu && + (hv_vcpu->cpuid_cache.nested_eax & HV_X64_NESTED_DIRECT_FLUSH); +} + static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); From 046f5756c49106471bc98bd32b87a62d0717ddda Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:03 +0100 Subject: [PATCH 1812/4122] KVM: nVMX: hyper-v: Cache VP assist page in 'struct kvm_vcpu_hv' In preparation to enabling L2 TLB flush, cache VP assist page in 'struct kvm_vcpu_hv'. While on it, rename nested_enlightened_vmentry() to nested_get_evmptr() and make it return eVMCS GPA directly. No functional change intended. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-26-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/hyperv.c | 10 ++++++---- arch/x86/kvm/hyperv.h | 3 +-- arch/x86/kvm/vmx/hyperv.c | 21 +++++++-------------- arch/x86/kvm/vmx/hyperv.h | 2 +- arch/x86/kvm/vmx/nested.c | 6 +++--- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ebf90f4f1a21..d1013c4f673c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -669,6 +669,8 @@ struct kvm_vcpu_hv { /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + struct hv_vp_assist_page vp_assist_page; + struct { u64 pa_page_gpa; u64 vm_id; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 3715a6f026a2..ce245e37d08f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -900,13 +900,15 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, - struct hv_vp_assist_page *assist_page) +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) { - if (!kvm_hv_assist_page_enabled(vcpu)) + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu)) return false; + return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, - assist_page, sizeof(*assist_page)); + &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page)); } EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index bd698eb2bda1..81313e418b80 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -108,8 +108,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, - struct hv_vp_assist_page *assist_page); +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu); static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu, int timer_index) diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index f05464db4fdc..bceca1a99804 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -321,24 +321,17 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) +u64 nested_get_evmptr(struct kvm_vcpu *vcpu) { - struct hv_vp_assist_page assist_page; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - *evmcs_gpa = -1ull; + if (unlikely(!kvm_hv_get_assist_page(vcpu))) + return EVMPTR_INVALID; - if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) - return false; + if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry)) + return EVMPTR_INVALID; - if (unlikely(!assist_page.enlighten_vmentry)) - return false; - - if (unlikely(!evmptr_is_valid(assist_page.current_nested_vmcs))) - return false; - - *evmcs_gpa = assist_page.current_nested_vmcs; - - return true; + return hv_vcpu->vp_assist_page.current_nested_vmcs; } uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index 8efaffe9215b..8bf366730d33 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -237,7 +237,7 @@ enum nested_evmptrld_status { EVMPTRLD_ERROR, }; -bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); +u64 nested_get_evmptr(struct kvm_vcpu *vcpu); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 396712d13211..38e6cb8abe62 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1992,7 +1992,8 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( if (likely(!guest_cpuid_has_evmcs(vcpu))) return EVMPTRLD_DISABLED; - if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) { + evmcs_gpa = nested_get_evmptr(vcpu); + if (!evmptr_is_valid(evmcs_gpa)) { nested_release_evmcs(vcpu); return EVMPTRLD_DISABLED; } @@ -5221,7 +5222,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 zero = 0; gpa_t vmptr; - u64 evmcs_gpa; int r; if (!nested_vmx_check_permission(vcpu)) @@ -5247,7 +5247,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) * vmx->nested.hv_evmcs but this shouldn't be a problem. */ if (likely(!guest_cpuid_has_evmcs(vcpu) || - !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { + !evmptr_is_valid(nested_get_evmptr(vcpu)))) { if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vcpu); From c30e9bc8b606077142969a807ada42ca921e605a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:04 +0100 Subject: [PATCH 1813/4122] KVM: nVMX: hyper-v: Enable L2 TLB flush Enable L2 TLB flush feature on nVMX when: - Enlightened VMCS is in use. - The feature flag is enabled in eVMCS. - The feature flag is enabled in partition assist page. Perform synthetic vmexit to L1 after processing TLB flush call upon request (HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH). Note: nested_evmcs_l2_tlb_flush_enabled() uses cached VP assist page copy which gets updated from nested_vmx_handle_enlightened_vmptrld(). This is also guaranteed to happen post migration with eVMCS backed L2 running. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-27-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 9 +++++++++ arch/x86/kvm/vmx/hyperv.c | 17 +++++++++++++++++ arch/x86/kvm/vmx/hyperv.h | 1 + arch/x86/kvm/vmx/nested.c | 20 ++++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 6639979302ab..b25c6792d409 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -600,6 +600,15 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF +/* + * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by + * pairing it with architecturally impossible exit reasons. Bit 28 is set only + * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit + * is pending. I.e. it will never be set by hardware for non-SMI exits (there + * are only three), nor will it ever be set unless the VMM is an STM. + */ +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 + /* * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose * SVM enlightenments to guests. diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index bceca1a99804..04a0bba58c7d 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -5,6 +5,7 @@ #include "../cpuid.h" #include "hyperv.h" +#include "nested.h" #include "vmcs.h" #include "vmx.h" #include "trace.h" @@ -500,6 +501,22 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, return 0; } +bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + + if (!hv_vcpu || !evmcs) + return false; + + if (!evmcs->hv_enlightenments_control.nested_flush_hypercall) + return false; + + return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; +} + void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) { + nested_vmx_vmexit(vcpu, HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH, 0, 0); } diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index 8bf366730d33..571e7929d14e 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -243,6 +243,7 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); +bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu); void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #endif /* __KVM_X86_VMX_HYPERV_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 38e6cb8abe62..b28be793de29 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1132,6 +1132,15 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or + * L2's VP_ID upon request from the guest. Make sure we check for + * pending entries in the right FIFO upon L1/L2 transition as these + * requests are put by other vCPUs asynchronously. + */ + if (to_hv_vcpu(vcpu) && enable_ept) + kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + /* * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a @@ -3267,6 +3276,12 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) { + /* + * Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy + * in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory + * to make nested_evmcs_l2_tlb_flush_enabled() work correctly post + * migration. + */ if (!nested_get_evmcs_page(vcpu)) { pr_debug_ratelimited("%s: enlightened vmptrld failed\n", __func__); @@ -6144,6 +6159,11 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, * Handle L2's bus locks in L0 directly. */ return true; + case EXIT_REASON_VMCALL: + /* Hyper-V L2 TLB flush hypercall is handled by L0 */ + return guest_hv_cpuid_has_l2_tlb_flush(vcpu) && + nested_evmcs_l2_tlb_flush_enabled(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu); default: break; } From b415d8d417bbe5403626b74e1041101ac23d602f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:05 +0100 Subject: [PATCH 1814/4122] KVM: x86: Make kvm_hv_get_assist_page() return 0/-errno Convert kvm_hv_get_assist_page() to return 'int' and propagate possible errors from kvm_read_guest_cached(). Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-28-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 8 ++++---- arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/vmx/hyperv.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ce245e37d08f..15880da73a7b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -900,15 +900,15 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) +int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu)) - return false; + return -EFAULT; - return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, - &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page)); + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, + &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page)); } EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 81313e418b80..5157622c2fb3 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -108,7 +108,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu); +int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu); static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu, int timer_index) diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index 04a0bba58c7d..ae03d1fe0355 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -326,7 +326,7 @@ u64 nested_get_evmptr(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - if (unlikely(!kvm_hv_get_assist_page(vcpu))) + if (unlikely(kvm_hv_get_assist_page(vcpu))) return EVMPTR_INVALID; if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry)) From 3f4a812edf5cb0a50e65fbdfafdb3e688da18f16 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:06 +0100 Subject: [PATCH 1815/4122] KVM: nSVM: hyper-v: Enable L2 TLB flush Implement Hyper-V L2 TLB flush for nSVM. The feature needs to be enabled both in extended 'nested controls' in VMCB and VP assist page. According to Hyper-V TLFS, synthetic vmexit to L1 is performed with - HV_SVM_EXITCODE_ENL exit_code. - HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH exit_info_1. Note: VP assist page is cached in 'struct kvm_vcpu_hv' so recalc_intercepts() doesn't need to read from guest's memory. KVM needs to update the case upon each VMRUN and after svm_set_nested_state (svm_get_nested_state_pages()) to handle the case when the guest got migrated while L2 was running. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-29-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 4 ++++ arch/x86/kvm/hyperv.h | 11 ++++++++++ arch/x86/kvm/svm/hyperv.c | 7 ++++++ arch/x86/kvm/svm/hyperv.h | 15 +++++++++++++ arch/x86/kvm/svm/nested.c | 35 ++++++++++++++++++++++++++++-- 5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index b25c6792d409..e3efaf6e6b62 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -631,6 +631,10 @@ struct hv_vmcb_enlightenments { */ #define HV_VMCB_NESTED_ENLIGHTENMENTS 31 +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) + struct hv_partition_assist_pg { u32 tlb_lock_count; }; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 5157622c2fb3..9f96414a31c5 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -198,6 +198,17 @@ static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX); } +static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu) +{ + if (!to_hv_vcpu(vcpu)) + return 0; + + if (!kvm_hv_assist_page_enabled(vcpu)) + return 0; + + return kvm_hv_get_assist_page(vcpu); +} + int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c index 911f51021af1..088f6429b24c 100644 --- a/arch/x86/kvm/svm/hyperv.c +++ b/arch/x86/kvm/svm/hyperv.c @@ -8,4 +8,11 @@ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH; + svm->vmcb->control.exit_info_2 = 0; + nested_svm_vmexit(svm); } diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index 7564bdf652e4..02f4784b5d44 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -25,6 +25,21 @@ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) hv_vcpu->nested.vp_id = hve->hv_vp_id; } +static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu) + return false; + + if (!hve->hv_enlightenments_control.nested_flush_hypercall) + return false; + + return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; +} + void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 748e4de40c8f..bc9cd7086fa9 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -150,8 +150,12 @@ void recalc_intercepts(struct vcpu_svm *svm) vmcb_clr_intercept(c, INTERCEPT_VINTR); } - /* We don't want to see VMMCALLs from a nested guest */ - vmcb_clr_intercept(c, INTERCEPT_VMMCALL); + /* + * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB + * flush feature is enabled. + */ + if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu)) + vmcb_clr_intercept(c, INTERCEPT_VMMCALL); for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; @@ -473,6 +477,15 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu) { + /* + * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or + * L2's VP_ID upon request from the guest. Make sure we check for + * pending entries in the right FIFO upon L1/L2 transition as these + * requests are put by other vCPUs asynchronously. + */ + if (to_hv_vcpu(vcpu) && npt_enabled) + kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + /* * TODO: optimize unconditional TLB flush/MMU sync. A partial list of * things to fix before this can be conditional: @@ -824,6 +837,13 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) return 1; } + /* This fails when VP assist page is enabled but the supplied GPA is bogus */ + ret = kvm_hv_verify_vp_assist(vcpu); + if (ret) { + kvm_inject_gp(vcpu, 0); + return ret; + } + vmcb12_gpa = svm->vmcb->save.rax; ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); if (ret == -EINVAL) { @@ -1421,6 +1441,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) int nested_svm_exit_special(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; + struct kvm_vcpu *vcpu = &svm->vcpu; switch (exit_code) { case SVM_EXIT_INTR: @@ -1439,6 +1460,13 @@ int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_HOST; break; } + case SVM_EXIT_VMMCALL: + /* Hyper-V L2 TLB flush hypercall is handled by L0 */ + if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) && + nested_svm_l2_tlb_flush_enabled(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu)) + return NESTED_EXIT_HOST; + break; default: break; } @@ -1719,6 +1747,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) return false; } + if (kvm_hv_verify_vp_assist(vcpu)) + return false; + return true; } From f4de6a1fa3ee81197239603756fc5c4259e5ef1b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:07 +0100 Subject: [PATCH 1816/4122] KVM: x86: Expose Hyper-V L2 TLB flush feature With both nSVM and nVMX implementations in place, KVM can now expose Hyper-V L2 TLB flush feature to userspace. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-30-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 15880da73a7b..2c7f2a26421e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2779,6 +2779,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_NESTED_FEATURES: ent->eax = evmcs_ver; + ent->eax |= HV_X64_NESTED_DIRECT_FLUSH; ent->eax |= HV_X64_NESTED_MSR_BITMAP; ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL; break; From 676a863ce605caca3a559bdb8e40f640c15f1fde Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:08 +0100 Subject: [PATCH 1817/4122] KVM: selftests: Better XMM read/write helpers set_xmm()/get_xmm() helpers are fairly useless as they only read 64 bits from 128-bit registers. Moreover, these helpers are not used. Borrow _kvm_read_sse_reg()/_kvm_write_sse_reg() from KVM limiting them to XMM0-XMM8 for now. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-31-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 70 ++++++++++--------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index f838ac5865dc..a10f39affa45 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -603,71 +603,73 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) !this_cpu_has(feature.anti_feature); } -#define SET_XMM(__var, __xmm) \ - asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) +typedef u32 __attribute__((vector_size(16))) sse128_t; +#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; } +#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; }) +#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; }) -static inline void set_xmm(int n, unsigned long val) +static inline void read_sse_reg(int reg, sse128_t *data) { - switch (n) { + switch (reg) { case 0: - SET_XMM(val, xmm0); + asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: - SET_XMM(val, xmm1); + asm("movdqa %%xmm1, %0" : "=m"(*data)); break; case 2: - SET_XMM(val, xmm2); + asm("movdqa %%xmm2, %0" : "=m"(*data)); break; case 3: - SET_XMM(val, xmm3); + asm("movdqa %%xmm3, %0" : "=m"(*data)); break; case 4: - SET_XMM(val, xmm4); + asm("movdqa %%xmm4, %0" : "=m"(*data)); break; case 5: - SET_XMM(val, xmm5); + asm("movdqa %%xmm5, %0" : "=m"(*data)); break; case 6: - SET_XMM(val, xmm6); + asm("movdqa %%xmm6, %0" : "=m"(*data)); break; case 7: - SET_XMM(val, xmm7); + asm("movdqa %%xmm7, %0" : "=m"(*data)); break; + default: + BUG(); } } -#define GET_XMM(__xmm) \ -({ \ - unsigned long __val; \ - asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \ - __val; \ -}) - -static inline unsigned long get_xmm(int n) +static inline void write_sse_reg(int reg, const sse128_t *data) { - assert(n >= 0 && n <= 7); - - switch (n) { + switch (reg) { case 0: - return GET_XMM(xmm0); + asm("movdqa %0, %%xmm0" : : "m"(*data)); + break; case 1: - return GET_XMM(xmm1); + asm("movdqa %0, %%xmm1" : : "m"(*data)); + break; case 2: - return GET_XMM(xmm2); + asm("movdqa %0, %%xmm2" : : "m"(*data)); + break; case 3: - return GET_XMM(xmm3); + asm("movdqa %0, %%xmm3" : : "m"(*data)); + break; case 4: - return GET_XMM(xmm4); + asm("movdqa %0, %%xmm4" : : "m"(*data)); + break; case 5: - return GET_XMM(xmm5); + asm("movdqa %0, %%xmm5" : : "m"(*data)); + break; case 6: - return GET_XMM(xmm6); + asm("movdqa %0, %%xmm6" : : "m"(*data)); + break; case 7: - return GET_XMM(xmm7); + asm("movdqa %0, %%xmm7" : : "m"(*data)); + break; + default: + BUG(); } - - /* never reached */ - return 0; } static inline void cpu_relax(void) From c05a0a71c5d0aea010af19f21ccdc0d576066790 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:09 +0100 Subject: [PATCH 1818/4122] KVM: selftests: Move HYPERV_LINUX_OS_ID definition to a common header HYPERV_LINUX_OS_ID needs to be written to HV_X64_MSR_GUEST_OS_ID by each Hyper-V specific selftest. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-32-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/hyperv.h | 3 +++ tools/testing/selftests/kvm/x86_64/hyperv_features.c | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index b66910702c0a..f0a8a93694b2 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -185,4 +185,7 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) +/* Proper HV_X64_MSR_GUEST_OS_ID value */ +#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 2b6d455acf8a..6558cc61cf69 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,8 +13,6 @@ #include "processor.h" #include "hyperv.h" -#define LINUX_OS_ID ((u64)0x8100 << 48) - static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address, uint64_t *hv_status) { @@ -72,7 +70,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) GUEST_ASSERT(hcall->control); - wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { @@ -170,7 +168,7 @@ static void guest_test_msrs_access(void) */ msr->idx = HV_X64_MSR_GUEST_OS_ID; msr->write = 1; - msr->write_val = LINUX_OS_ID; + msr->write_val = HYPERV_LINUX_OS_ID; msr->available = 1; break; case 3: From caf4110fbaa89a20733facb062381e89961ce698 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:10 +0100 Subject: [PATCH 1819/4122] KVM: selftests: Move the function doing Hyper-V hypercall to a common header All Hyper-V specific tests issuing hypercalls need this. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-33-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/hyperv.h | 19 ++++++++++++++++++ .../selftests/kvm/x86_64/hyperv_features.c | 20 +------------------ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index f0a8a93694b2..7ed8f4f5f7d8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -185,6 +185,25 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) +static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, + uint64_t *hv_status) +{ + uint64_t error_code; + uint8_t vector; + + /* Note both the hypercall and the "asm safe" clobber r9-r11. */ + asm volatile("mov %[output_address], %%r8\n\t" + KVM_ASM_SAFE("vmcall") + : "=a" (*hv_status), + "+c" (control), "+d" (input_address), + KVM_ASM_SAFE_OUTPUTS(vector, error_code) + : [output_address] "r"(output_address), + "a" (-EFAULT) + : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); + return vector; +} + /* Proper HV_X64_MSR_GUEST_OS_ID value */ #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 6558cc61cf69..5ff4ff2365bb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,24 +13,6 @@ #include "processor.h" #include "hyperv.h" -static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, uint64_t *hv_status) -{ - uint64_t error_code; - uint8_t vector; - - /* Note both the hypercall and the "asm safe" clobber r9-r11. */ - asm volatile("mov %[output_address], %%r8\n\t" - KVM_ASM_SAFE("vmcall") - : "=a" (*hv_status), - "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector, error_code) - : [output_address] "r"(output_address), - "a" (-EFAULT) - : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); - return vector; -} - struct msr_data { uint32_t idx; bool available; @@ -80,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) input = output = 0; } - vector = hypercall(hcall->control, input, output, &res); + vector = hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); } else { From 998489245d8469c21f1517c4bbe192e0ef3c3374 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:11 +0100 Subject: [PATCH 1820/4122] KVM: selftests: Hyper-V PV IPI selftest Introduce a selftest for Hyper-V PV IPI hypercalls (HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx). The test creates one 'sender' vCPU and two 'receiver' vCPU and then issues various combinations of send IPI hypercalls in both 'normal' and 'fast' (with XMM input where necessary) mode. Later, the test checks whether IPIs were delivered to the expected destination vCPU[s]. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-34-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/hyperv.h | 35 +- .../selftests/kvm/x86_64/hyperv_features.c | 2 +- .../testing/selftests/kvm/x86_64/hyperv_ipi.c | 314 ++++++++++++++++++ 5 files changed, 349 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_ipi.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 582e2e198fbf..3b3218cb46ed 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -25,6 +25,7 @@ /x86_64/hyperv_clock /x86_64/hyperv_cpuid /x86_64/hyperv_features +/x86_64/hyperv_ipi /x86_64/hyperv_svm_test /x86_64/max_vcpuid_cap_test /x86_64/mmio_warning_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f62dcfcda618..4095b1212f08 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 7ed8f4f5f7d8..c757e4001173 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -9,6 +9,8 @@ #ifndef SELFTEST_KVM_HYPERV_H #define SELFTEST_KVM_HYPERV_H +#include "processor.h" + #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HYPERV_CPUID_INTERFACE 0x40000001 #define HYPERV_CPUID_VERSION 0x40000002 @@ -184,10 +186,15 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 -static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, - uint64_t *hv_status) +/* + * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' + * is set to the hypercall status (if no exception occurred). + */ +static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, + uint64_t *hv_status) { uint64_t error_code; uint8_t vector; @@ -204,6 +211,28 @@ static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address, return vector; } +/* Issue a Hyper-V hypercall and assert that it succeeded. */ +static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address) +{ + uint64_t hv_status; + uint8_t vector; + + vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); + + GUEST_ASSERT(!vector); + GUEST_ASSERT((hv_status & 0xffff) == 0); +} + +/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */ +static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) +{ + int i; + + for (i = 0; i < n_sse_regs; i++) + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); +} + /* Proper HV_X64_MSR_GUEST_OS_ID value */ #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 5ff4ff2365bb..3163c3e8db0a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -62,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) input = output = 0; } - vector = hyperv_hypercall(hcall->control, input, output, &res); + vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); } else { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c new file mode 100644 index 000000000000..8b791eac7d5a --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include + +#include "kvm_util.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define RECEIVER_VCPU_ID_1 2 +#define RECEIVER_VCPU_ID_2 65 + +#define IPI_VECTOR 0xfe + +static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[2]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + +static inline void hv_init(vm_vaddr_t pgs_gpa) +{ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); +} + +static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + u32 vcpu_id; + + x2apic_enable(); + hv_init(pgs_gpa); + + vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + /* Signal sender vCPU we're ready */ + ipis_rcvd[vcpu_id] = (u64)-1; + + for (;;) + asm volatile("sti; hlt; cli"); +} + +static void guest_ipi_handler(struct ex_regs *regs) +{ + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + ipis_rcvd[vcpu_id]++; + wrmsr(HV_X64_MSR_EOI, 1); +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 100000000; i++) + asm volatile("nop"); +} + +static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; + struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; + int stage = 1, ipis_expected[2] = {0}; + + hv_init(pgs_gpa); + GUEST_SYNC(stage++); + + /* Wait for receiver vCPUs to come up */ + while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2]) + nop_loop(); + ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0; + + /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + ipi->vector = IPI_VECTOR; + ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 0; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* + * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. + * Nothing to write anything to XMM regs. + */ + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, HV_GENERIC_SET_ALL); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + int old, r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + + TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id); + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + vm_vaddr_t hcall_page; + pthread_t threads[2]; + int stage = 1, r; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + vm_init_descriptor_tables(vm); + + vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); + vcpu_init_descriptor_tables(vcpu[1]); + vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); + vcpu_init_descriptor_tables(vcpu[2]); + vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_hv_cpuid(vcpu[0]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", r); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", errno); + + while (true) { + vcpu_run(vcpu[0]); + + exit_reason = vcpu[0]->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return r; +} From 56fc7732031d9999a35cb43b3de30d52ae30fd3f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:12 +0100 Subject: [PATCH 1821/4122] KVM: selftests: Fill in vm->vpages_mapped bitmap in virt_map() too Similar to vm_vaddr_alloc(), virt_map() needs to reflect the mapping in vm->vpages_mapped. While on it, remove unneeded code wrapping in vm_vaddr_alloc(). Reviewed-by: Andrew Jones Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-35-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5ac8f207ed92..a3ff96ec0235 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1319,8 +1319,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, - vaddr >> vm->page_shift); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; @@ -1393,6 +1392,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, virt_pg_map(vm, vaddr, paddr); vaddr += page_size; paddr += page_size; + + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } } From 2d4a5f91837f4a75cf02010b9a52bfe52d0efd40 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:13 +0100 Subject: [PATCH 1822/4122] KVM: selftests: Export vm_vaddr_unused_gap() to make it possible to request unmapped ranges Currently, tests can only request a new vaddr range by using vm_vaddr_alloc()/vm_vaddr_alloc_page()/vm_vaddr_alloc_pages() but these functions allocate and map physical pages too. Make it possible to request unmapped range too. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-36-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util_base.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 228212ede05e..c7685c7038ff 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -385,6 +385,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a3ff96ec0235..1d26a2160178 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1214,8 +1214,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * TEST_ASSERT failure occurs for invalid input or no area of at least * sz unallocated bytes >= vaddr_min is available. */ -static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; From 8f649b57856b855f8a28724321e7ae72e31d513a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Nov 2022 13:58:51 -0800 Subject: [PATCH 1823/4122] IB/hfi1: Replace 1-element array with singleton Zero-length arrays are deprecated[1] and are being replaced with flexible array members in support of the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy(), correctly instrument array indexing with UBSAN_BOUNDS, and to globally enable -fstrict-flex-arrays=3. Replace zero-length array with flexible-array member "lvs" in struct opa_port_data_counters_msg and struct opa_port_error_counters64_msg. Additionally, the "port" member of several structs is defined as a single-element, but is only ever accessed at index 0. Replace it with a singleton so that flexible array usage is sane. This results in no differences in binary output. [1] https://github.com/KSPP/linux/issues/78 Link: https://lore.kernel.org/r/20221118215847.never.416-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mad.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4146a2113a95..e5e783c45810 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2437,9 +2437,9 @@ struct opa_port_data_counters_msg { __be64 port_vl_xmit_wait_data; __be64 port_vl_rcv_bubble; __be64 port_vl_mark_fecn; - } vls[0]; + } vls[]; /* array size defined by #bits set in vl_select_mask*/ - } port[1]; /* array size defined by #ports in attribute modifier */ + } port; }; struct opa_port_error_counters64_msg { @@ -2470,9 +2470,9 @@ struct opa_port_error_counters64_msg { u8 reserved3[7]; struct _vls_ectrs { __be64 port_vl_xmit_discards; - } vls[0]; + } vls[]; /* array size defined by #bits set in vl_select_mask */ - } port[1]; /* array size defined by #ports in attribute modifier */ + } port; }; struct opa_port_error_info_msg { @@ -2543,7 +2543,7 @@ struct opa_port_error_info_msg { u8 error_info; } __packed fm_config_ei; __u32 reserved9; - } port[1]; /* actual array size defined by #ports in attr modifier */ + } port; }; /* opa_port_error_info_msg error_info_select_mask bit definitions */ @@ -2966,7 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, } /* Sanity check */ - response_data_size = struct_size(req, port[0].vls, num_vls); + response_data_size = struct_size(req, port.vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -2986,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = &req->port[0]; + rsp = &req->port; memset(rsp, 0, sizeof(*rsp)); rsp->port_number = port; @@ -3182,7 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - response_data_size = struct_size(req, port[0].vls, num_vls); + response_data_size = struct_size(req, port.vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3201,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = &req->port[0]; + rsp = &req->port; ibp = to_iport(ibdev, port_num); ppd = ppd_from_ibp(ibp); @@ -3340,7 +3340,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, u64 reg; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = &req->port[0]; + rsp = &req->port; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); @@ -3590,7 +3590,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, u32 error_info_select; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = &req->port[0]; + rsp = &req->port; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); From 8e1a76493be9868fef34f977296a69769dcdfa6f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 11 Nov 2022 21:35:37 -0500 Subject: [PATCH 1824/4122] RDMA/rxe: Remove reliable datagram support The rdma_rxe driver does not actually support the reliable datagram transport but contains a variable with RD opcodes in driver code. And this variable is never used. So remove it. Link: https://lore.kernel.org/r/20221112023537.432912-1-yanjun.zhu@intel.com Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_hdr.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index e432f9e37795..804594b76040 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -742,7 +742,6 @@ enum aeth_syndrome { AETH_NAK_INVALID_REQ = 0x61, AETH_NAK_REM_ACC_ERR = 0x62, AETH_NAK_REM_OP_ERR = 0x63, - AETH_NAK_INV_RD_REQ = 0x64, }; static inline u8 __aeth_syn(void *arg) From 7d984dac8f6bf4ebd3398af82b357e1d181ecaac Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Sun, 30 Oct 2022 03:04:33 +0000 Subject: [PATCH 1825/4122] RDMA/rxe: Fix mr->map double free rxe_mr_cleanup() which tries to free mr->map again will be called when rxe_mr_init_user() fails: CPU: 0 PID: 4917 Comm: rdma_flush_serv Kdump: loaded Not tainted 6.1.0-rc1-roce-flush+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x45/0x5d panic+0x19e/0x349 end_report.part.0+0x54/0x7c kasan_report.cold+0xa/0xf rxe_mr_cleanup+0x9d/0xf0 [rdma_rxe] __rxe_cleanup+0x10a/0x1e0 [rdma_rxe] rxe_reg_user_mr+0xb7/0xd0 [rdma_rxe] ib_uverbs_reg_mr+0x26a/0x480 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x1a2/0x250 [ib_uverbs] ib_uverbs_cmd_verbs+0x1397/0x15a0 [ib_uverbs] This issue was firstly exposed since commit b18c7da63fcb ("RDMA/rxe: Fix memory leak in error path code") and then we fixed it in commit 8ff5f5d9d8cf ("RDMA/rxe: Prevent double freeing rxe_map_set()") but this fix was reverted together at last by commit 1e75550648da (Revert "RDMA/rxe: Create duplicate mapping tables for FMRs") Simply let rxe_mr_cleanup() always handle freeing the mr->map once it is successfully allocated. Fixes: 1e75550648da ("Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"") Link: https://lore.kernel.org/r/1667099073-2-1-git-send-email-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index cd846cf82a84..b1423000e4bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -97,6 +97,7 @@ err2: kfree(mr->map[i]); kfree(mr->map); + mr->map = NULL; err1: return -ENOMEM; } @@ -120,7 +121,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { @@ -159,9 +159,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, if (!vaddr) { rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; - goto err_cleanup_map; + goto err_release_umem; } - buf->addr = (uintptr_t)vaddr; buf->size = PAGE_SIZE; num_buf++; @@ -178,10 +177,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, return 0; -err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: From 8eaa6f7d569b4a22bfc1b0a3fdfeeb401feb65a4 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Tue, 8 Nov 2022 21:38:46 +0800 Subject: [PATCH 1826/4122] RDMA/hns: Fix ext_sge num error when post send In the HNS ROCE driver, The sge is divided into standard sge and extended sge. There are 2 standard sge in RC/XRC, and the UD standard sge is 0. In the scenario of RC SQ inline, if the data does not exceed 32bytes, the standard sge will be used. If it exceeds, only the extended sge will be used to fill the data. Currently, when filling the extended sge, max_gs is directly used as the number of the extended sge, which did not subtract the number of standard sge. There is a logical error. The new algorithm subtracts the number of standard sge from max_gs to get the actual number of extended sge. Fixes: 30b707886aeb ("RDMA/hns: Support inline data in extented sge space for RC") Link: https://lore.kernel.org/r/20221108133847.2304539-2-xuhaoyue1@hisilicon.com Signed-off-by: Luoyouming Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1ead35fb031b..dcb59c05edfd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -188,20 +188,29 @@ static void set_atomic_seg(const struct ib_send_wr *wr, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); } +static unsigned int get_std_sge_num(struct hns_roce_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return 0; + + return HNS_ROCE_SGE_IN_WQE; +} + static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_idx, u32 msg_len) { struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; - unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; + unsigned int std_sge_num; unsigned int i = 0; unsigned int len; void *addr; void *dseg; - if (msg_len > ext_sge_sz) { + std_sge_num = get_std_sge_num(qp); + if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) { ibdev_err(ibdev, "no enough extended sge space for inline data.\n"); return -EINVAL; From 0c5e259b06a8efc69f929ad777ea49281bb58e37 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Tue, 8 Nov 2022 21:38:47 +0800 Subject: [PATCH 1827/4122] RDMA/hns: Fix incorrect sge nums calculation The user usually configures the number of sge through the max_send_sge parameter when creating qp, and configures the maximum size of inline data that can be sent through max_inline_data. Inline uses sge to fill data to send. Expect the following: 1) When the sge space cannot hold inline data, the sge space needs to be expanded to accommodate all inline data 2) When the sge space is enough to accommodate inline data, the upper limit of inline data can be increased so that users can send larger inline data Currently case one is not implemented. When the inline data is larger than the sge space, an error of insufficient sge space occurs. This part of the code needs to be reimplemented according to the expected rules. The calculation method of sge num is modified to take the maximum value of max_send_sge and the sge for max_inline_data to solve this problem. Fixes: 05201e01be93 ("RDMA/hns: Refactor process of setting extended sge") Fixes: 30b707886aeb ("RDMA/hns: Support inline data in extented sge space for RC") Link: https://lore.kernel.org/r/20221108133847.2304539-3-xuhaoyue1@hisilicon.com Signed-off-by: Luoyouming Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 3 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +-- drivers/infiniband/hw/hns/hns_roce_main.c | 18 +++- drivers/infiniband/hw/hns/hns_roce_qp.c | 107 ++++++++++++++++---- include/uapi/rdma/hns-abi.h | 15 +++ 5 files changed, 125 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 723e55a7de8d..f701cc86896b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -202,6 +202,7 @@ struct hns_roce_ucontext { struct list_head page_list; struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; + u32 config; }; struct hns_roce_pd { @@ -334,6 +335,7 @@ struct hns_roce_wq { u32 head; u32 tail; void __iomem *db_reg; + u32 ext_sge_cnt; }; struct hns_roce_sge { @@ -635,6 +637,7 @@ struct hns_roce_qp { struct list_head rq_node; /* all recv qps are on a list */ struct list_head sq_node; /* all send qps are on a list */ struct hns_user_mmap_entry *dwqe_mmap_entry; + u32 config; }; struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dcb59c05edfd..939811867249 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -188,14 +188,6 @@ static void set_atomic_seg(const struct ib_send_wr *wr, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); } -static unsigned int get_std_sge_num(struct hns_roce_qp *qp) -{ - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) - return 0; - - return HNS_ROCE_SGE_IN_WQE; -} - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_idx, u32 msg_len) @@ -203,14 +195,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; - unsigned int std_sge_num; unsigned int i = 0; unsigned int len; void *addr; void *dseg; - std_sge_num = get_std_sge_num(qp); - if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) { + if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) { ibdev_err(ibdev, "no enough extended sge space for inline data.\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index dcf89689a4c6..8ba68ac12388 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - int ret; struct hns_roce_ucontext *context = to_hr_ucontext(uctx); - struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + struct hns_roce_ib_alloc_ucontext_resp resp = {}; + struct hns_roce_ib_alloc_ucontext ucmd = {}; + int ret; if (!hr_dev->active) return -EAGAIN; @@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.qp_tab_size = hr_dev->caps.num_qps; resp.srq_tab_size = hr_dev->caps.num_srqs; + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); + if (ret) + return ret; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS; + + if (context->config & HNS_ROCE_EXSGE_FLAGS) { + resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS; + resp.max_inline_data = hr_dev->caps.max_sq_inline; + } + ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) goto error_fail_uar_alloc; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f0bd82a18069..0ae335fb205c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +static u32 get_max_inline_data(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap) { - /* GSI/UD QP only has extended sge */ - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) - return qp->sq.max_gs; - - if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + if (cap->max_inline_data) { + cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data); + return min(cap->max_inline_data, + hr_dev->caps.max_sq_inline); + } return 0; } +static void update_inline_data(struct hns_roce_qp *hr_qp, + struct ib_qp_cap *cap) +{ + u32 sge_num = hr_qp->sq.ext_sge_cnt; + + if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) { + if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD)) + sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num); + + cap->max_inline_data = max(cap->max_inline_data, + sge_num * HNS_ROCE_SGE_SIZE); + } + + hr_qp->max_inline_data = cap->max_inline_data; +} + +static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi, + u32 max_send_sge) +{ + unsigned int std_sge_num; + unsigned int min_sge; + + std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE; + min_sge = is_ud_or_gsi ? 1 : 0; + return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) : + min_sge; +} + +static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi, + u32 max_inline_data) +{ + unsigned int inline_sge; + + inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; + + /* + * if max_inline_data less than + * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, + * In addition to ud's mode, no need to extend sge. + */ + if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE) + inline_sge = 0; + + return inline_sge; +} + static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) { + bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD); + unsigned int std_sge_num; + u32 inline_ext_sge = 0; + u32 ext_wqe_sge_cnt; u32 total_sge_cnt; - u32 wqe_sge_cnt; + + cap->max_inline_data = get_max_inline_data(hr_dev, cap); hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE; + ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi, + cap->max_send_sge); - hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) { + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data(is_ud_or_gsi, + cap->max_inline_data)); + hr_qp->sq.ext_sge_cnt = inline_ext_sge ? + roundup_pow_of_two(inline_ext_sge) : 0; - wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num)); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + + ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt; + } else { + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs; + } /* If the number of extended sge is not zero, they MUST use the * space of HNS_HW_PAGE_SIZE at least. */ - if (wqe_sge_cnt) { - total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + if (ext_wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt); hr_qp->sge.sge_cnt = max(total_sge_cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); } + + update_inline_data(hr_qp, cap); } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; + cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ucontext *uctx; int ret; - if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) - init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; - - hr_qp->max_inline_data = init_attr->cap.max_inline_data; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; } + uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext); + hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); } else { + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + hr_qp->config = HNS_ROCE_EXSGE_FLAGS; ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index f6fde06db4b4..745790ce3c26 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp { __aligned_u64 dwqe_mmap_key; }; +enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, +}; + +enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, +}; + struct hns_roce_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 cqe_size; __u32 srq_tab_size; __u32 reserved; + __u32 config; + __u32 max_inline_data; +}; + +struct hns_roce_ib_alloc_ucontext { + __u32 config; + __u32 reserved; }; struct hns_roce_ib_alloc_pd_resp { From 5f18e9f8868c6d4eae71678e7ebd4977b7d8c8cf Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:37 -0500 Subject: [PATCH 1828/4122] iommu/amd: Fix ivrs_acpihid cmdline parsing code The second (UID) strcmp in acpi_dev_hid_uid_match considers "0" and "00" different, which can prevent device registration. Have the AMD IOMMU driver's ivrs_acpihid parsing code remove any leading zeroes to make the UID strcmp succeed. Now users can safely specify "AMDxxxxx:00" or "AMDxxxxx:0" and expect the same behaviour. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Signed-off-by: Kim Phillips Cc: stable@vger.kernel.org Cc: Suravee Suthikulpanit Cc: Joerg Roedel Link: https://lore.kernel.org/r/20220919155638.391481-1-kim.phillips@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1a2d425bf568..d14da30b8706 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3488,6 +3488,13 @@ static int __init parse_ivrs_acpihid(char *str) return 1; } + /* + * Ignore leading zeroes after ':', so e.g., AMDI0095:00 + * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match + */ + while (*uid == '0' && *(uid + 1)) + uid++; + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 1198d2316dc4265a97d0e8445a22c7a6d17580a4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:38 -0500 Subject: [PATCH 1829/4122] iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options Currently, these options cause the following libkmod error: libkmod: ERROR ../libkmod/libkmod-config.c:489 kcmdline_parse_result: \ Ignoring bad option on kernel command line while parsing module \ name: 'ivrs_xxxx[XX:XX' Fix by introducing a new parameter format for these options and throw a warning for the deprecated format. Users are still allowed to omit the PCI Segment if zero. Adding a Link: to the reason why we're modding the syntax parsing in the driver and not in libkmod. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-modules/20200310082308.14318-2-lucas.demarchi@intel.com/ Reported-by: Kim Phillips Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Kim Phillips Link: https://lore.kernel.org/r/20220919155638.391481-2-kim.phillips@amd.com Signed-off-by: Joerg Roedel --- .../admin-guide/kernel-parameters.txt | 27 +++++-- drivers/iommu/amd/init.c | 77 +++++++++++++------ 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..bb1c62314f9e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2300,7 +2300,13 @@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@ -2312,7 +2318,13 @@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@ -2323,15 +2335,20 @@ ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5 - By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d14da30b8706..34029d116107 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3402,18 +3402,24 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + +found: if (early_ioapic_map_size == EARLY_MAP_SIZE) { pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); @@ -3434,18 +3440,24 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + +found: if (early_hpet_map_size == EARLY_MAP_SIZE) { pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); @@ -3466,19 +3478,36 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; - char *hid, *uid, *p; + char *hid, *uid, *p, *addr; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; - int ret, i; + int i; - ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); - if (ret != 4) { - ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); - if (ret != 5) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + addr = strchr(str, '@'); + if (!addr) { + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || + sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { + pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", + str, acpiid, seg, bus, dev, fn); + goto found; } + goto not_found; } + /* We have the '@', make it the terminator to get just the acpiid */ + *addr++ = 0; + + if (sscanf(str, "=%s", acpiid) != 1) + goto not_found; + + if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || + sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) + goto found; + +not_found: + pr_err("Invalid command line: ivrs_acpihid%s\n", str); + return 1; + +found: p = acpiid; hid = strsep(&p, ":"); uid = p; From 73b6924cdebc899de9b719e1319aa86c6bed4acf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 18:35:50 +0800 Subject: [PATCH 1830/4122] iommu/mediatek: Check return value after calling platform_get_resource() platform_get_resource() may return NULL pointer, we need check its return value to avoid null-ptr-deref in resource_size(). Fixes: 42d57fc58aeb ("iommu/mediatek: Initialise/Remove for multi bank dev") Signed-off-by: Yang Yingliang Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221029103550.3774365-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ab2ecfe01f8..2d14dc846b83 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1173,6 +1173,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) banks_num = data->plat_data->banks_num; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; if (resource_size(res) < banks_num * MTK_IOMMU_BANK_SZ) { dev_err(dev, "banknr %d. res %pR is not enough.\n", banks_num, res); return -EINVAL; From 59a316fdc4d564dc5e811321a8b20a444fc0094c Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:07 +0100 Subject: [PATCH 1831/4122] dt-bindings: iommu: mediatek: add binding documentation for MT8365 SoC Add IOMMU binding documentation for the MT8365 SoC. Signed-off-by: Fabien Parent Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-1-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 2 + .../memory/mediatek,mt8365-larb-port.h | 90 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 include/dt-bindings/memory/mediatek,mt8365-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 839e3be0bf3c..5b6395bc10e0 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -82,6 +82,7 @@ properties: - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two - mediatek,mt8195-iommu-infra # generation two + - mediatek,mt8365-m4u # generation two - description: mt7623 generation one items: @@ -132,6 +133,7 @@ properties: dt-binding/memory/mt8186-memory-port.h for mt8186, dt-binding/memory/mt8192-larb-port.h for mt8192. dt-binding/memory/mt8195-memory-port.h for mt8195. + dt-binding/memory/mediatek,mt8365-larb-port.h for mt8365. power-domains: maxItems: 1 diff --git a/include/dt-bindings/memory/mediatek,mt8365-larb-port.h b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h new file mode 100644 index 000000000000..56d5a5dd519e --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ + +#include + +#define M4U_LARB0_ID 0 +#define M4U_LARB1_ID 1 +#define M4U_LARB2_ID 2 +#define M4U_LARB3_ID 3 + +/* larb0 */ +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_OVL0_2L MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_MDP_WROT1 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 8) +#define M4U_PORT_DISP_FAKE0 MTK_M4U_ID(M4U_LARB0_ID, 9) +#define M4U_PORT_APU_READ MTK_M4U_ID(M4U_LARB0_ID, 10) +#define M4U_PORT_APU_WRITE MTK_M4U_ID(M4U_LARB0_ID, 11) + +/* larb1 */ +#define M4U_PORT_VENC_RCPU MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_VENC_REC MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_VENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_VENC_SV_COMV MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_VENC_RD_COMV MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_VENC_NBM_RDMA MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_VENC_NBM_RDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 6) +#define M4U_PORT_JPGENC_Y_RDMA MTK_M4U_ID(M4U_LARB1_ID, 7) +#define M4U_PORT_JPGENC_C_RDMA MTK_M4U_ID(M4U_LARB1_ID, 8) +#define M4U_PORT_JPGENC_Q_TABLE MTK_M4U_ID(M4U_LARB1_ID, 9) +#define M4U_PORT_JPGENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 10) +#define M4U_PORT_JPGDEC_WDMA MTK_M4U_ID(M4U_LARB1_ID, 11) +#define M4U_PORT_JPGDEC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 12) +#define M4U_PORT_VENC_NBM_WDMA MTK_M4U_ID(M4U_LARB1_ID, 13) +#define M4U_PORT_VENC_NBM_WDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 14) +#define M4U_PORT_VENC_CUR_LUMA MTK_M4U_ID(M4U_LARB1_ID, 15) +#define M4U_PORT_VENC_CUR_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 16) +#define M4U_PORT_VENC_REF_LUMA MTK_M4U_ID(M4U_LARB1_ID, 17) +#define M4U_PORT_VENC_REF_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 18) + +/* larb2 */ +#define M4U_PORT_CAM_IMGO MTK_M4U_ID(M4U_LARB2_ID, 0) +#define M4U_PORT_CAM_RRZO MTK_M4U_ID(M4U_LARB2_ID, 1) +#define M4U_PORT_CAM_AAO MTK_M4U_ID(M4U_LARB2_ID, 2) +#define M4U_PORT_CAM_LCS MTK_M4U_ID(M4U_LARB2_ID, 3) +#define M4U_PORT_CAM_ESFKO MTK_M4U_ID(M4U_LARB2_ID, 4) +#define M4U_PORT_CAM_CAM_SV0 MTK_M4U_ID(M4U_LARB2_ID, 5) +#define M4U_PORT_CAM_CAM_SV1 MTK_M4U_ID(M4U_LARB2_ID, 6) +#define M4U_PORT_CAM_LSCI MTK_M4U_ID(M4U_LARB2_ID, 7) +#define M4U_PORT_CAM_LSCI_D MTK_M4U_ID(M4U_LARB2_ID, 8) +#define M4U_PORT_CAM_AFO MTK_M4U_ID(M4U_LARB2_ID, 9) +#define M4U_PORT_CAM_SPARE MTK_M4U_ID(M4U_LARB2_ID, 10) +#define M4U_PORT_CAM_BPCI MTK_M4U_ID(M4U_LARB2_ID, 11) +#define M4U_PORT_CAM_BPCI_D MTK_M4U_ID(M4U_LARB2_ID, 12) +#define M4U_PORT_CAM_UFDI MTK_M4U_ID(M4U_LARB2_ID, 13) +#define M4U_PORT_CAM_IMGI MTK_M4U_ID(M4U_LARB2_ID, 14) +#define M4U_PORT_CAM_IMG2O MTK_M4U_ID(M4U_LARB2_ID, 15) +#define M4U_PORT_CAM_IMG3O MTK_M4U_ID(M4U_LARB2_ID, 16) +#define M4U_PORT_CAM_WPE0_I MTK_M4U_ID(M4U_LARB2_ID, 17) +#define M4U_PORT_CAM_WPE1_I MTK_M4U_ID(M4U_LARB2_ID, 18) +#define M4U_PORT_CAM_WPE_O MTK_M4U_ID(M4U_LARB2_ID, 19) +#define M4U_PORT_CAM_FD0_I MTK_M4U_ID(M4U_LARB2_ID, 20) +#define M4U_PORT_CAM_FD1_I MTK_M4U_ID(M4U_LARB2_ID, 21) +#define M4U_PORT_CAM_FD0_O MTK_M4U_ID(M4U_LARB2_ID, 22) +#define M4U_PORT_CAM_FD1_O MTK_M4U_ID(M4U_LARB2_ID, 23) + +/* larb3 */ +#define M4U_PORT_HW_VDEC_MC_EXT MTK_M4U_ID(M4U_LARB3_ID, 0) +#define M4U_PORT_HW_VDEC_UFO_EXT MTK_M4U_ID(M4U_LARB3_ID, 1) +#define M4U_PORT_HW_VDEC_PP_EXT MTK_M4U_ID(M4U_LARB3_ID, 2) +#define M4U_PORT_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(M4U_LARB3_ID, 3) +#define M4U_PORT_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(M4U_LARB3_ID, 4) +#define M4U_PORT_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(M4U_LARB3_ID, 5) +#define M4U_PORT_HW_VDEC_TILE_EXT MTK_M4U_ID(M4U_LARB3_ID, 6) +#define M4U_PORT_HW_VDEC_VLD_EXT MTK_M4U_ID(M4U_LARB3_ID, 7) +#define M4U_PORT_HW_VDEC_VLD2_EXT MTK_M4U_ID(M4U_LARB3_ID, 8) +#define M4U_PORT_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(M4U_LARB3_ID, 9) +#define M4U_PORT_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(M4U_LARB3_ID, 10) + +#endif From 65df7d824f82f4dd3552b5a62ae8db07f25e423f Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:08 +0100 Subject: [PATCH 1832/4122] iommu/mediatek: add support for 6-bit encoded port IDs Until now the port ID was always encoded as a 5-bit data. On MT8365, the port ID is encoded as a 6-bit data. This requires to add extra macro F_MMU_INT_ID_LARB_ID_EXT, and F_MMU_INT_ID_PORT_ID_EXT in order to support 6-bit encoded port IDs. Signed-off-by: Fabien Parent Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-2-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2d14dc846b83..885ba5233b99 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -108,8 +108,12 @@ #define F_MMU_INT_ID_SUB_COMM_ID(a) (((a) >> 7) & 0x3) #define F_MMU_INT_ID_COMM_ID_EXT(a) (((a) >> 10) & 0x7) #define F_MMU_INT_ID_SUB_COMM_ID_EXT(a) (((a) >> 7) & 0x7) +/* Macro for 5 bits length port ID field (default) */ #define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) +/* Macro for 6 bits length port ID field */ +#define F_MMU_INT_ID_LARB_ID_WID_6(a) (((a) >> 8) & 0x7) +#define F_MMU_INT_ID_PORT_ID_WID_6(a) (((a) >> 2) & 0x3f) #define MTK_PROTECT_PA_ALIGN 256 #define MTK_IOMMU_BANK_SZ 0x1000 @@ -139,6 +143,7 @@ #define IFA_IOMMU_PCIE_SUPPORT BIT(16) #define PGTABLE_PA_35_EN BIT(17) #define TF_PORT_TO_ADDR_MT8173 BIT(18) +#define INT_ID_PORT_WIDTH_6 BIT(19) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -441,14 +446,19 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_pa |= (u64)pa34_32 << 32; if (MTK_IOMMU_IS_TYPE(plat_data, MTK_IOMMU_TYPE_MM)) { - fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_2BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); } else if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_3BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); + } else if (MTK_IOMMU_HAS_FLAG(plat_data, INT_ID_PORT_WIDTH_6)) { + fault_port = F_MMU_INT_ID_PORT_ID_WID_6(regval); + fault_larb = F_MMU_INT_ID_LARB_ID_WID_6(regval); } else { + fault_port = F_MMU_INT_ID_PORT_ID(regval); fault_larb = F_MMU_INT_ID_LARB_ID(regval); } fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; From 3cd0e4a34d5a9bcff90e0c104800700346e42658 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:09 +0100 Subject: [PATCH 1833/4122] iommu/mediatek: add support for MT8365 SoC Add IOMMU support for MT8365 SoC. Signed-off-by: Fabien Parent Reviewed-by: Amjad Ouled-Ameur Tested-by: Amjad Ouled-Ameur Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-3-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 885ba5233b99..c80f33dd2d43 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -170,6 +170,7 @@ enum mtk_iommu_plat { M4U_MT8186, M4U_MT8192, M4U_MT8195, + M4U_MT8365, }; struct mtk_iommu_iova_region { @@ -1528,6 +1529,17 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { {4, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 6}}, }; +static const struct mtk_iommu_plat_data mt8365_data = { + .m4u_plat = M4U_MT8365, + .flags = RESET_AXI | INT_ID_PORT_WIDTH_6, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), + .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, @@ -1540,6 +1552,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, { .compatible = "mediatek,mt8195-iommu-vpp", .data = &mt8195_data_vpp}, + { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data}, {} }; From 01657bc14a3990c665375f77978631fee77b1fce Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Nov 2022 19:51:43 +0000 Subject: [PATCH 1834/4122] iommu: Avoid races around device probe We currently have 3 different ways that __iommu_probe_device() may be called, but no real guarantee that multiple callers can't tread on each other, especially once asynchronous driver probe gets involved. It would likely have taken a fair bit of luck to hit this previously, but commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") ups the odds since now it's not just omap-iommu that may trigger multiple bus_iommu_probe() calls in parallel if probing asynchronously. Add a lock to ensure we can't try to double-probe a device, and also close some possible race windows to make sure we're truly robust against trying to double-initialise a group via two different member devices. Reported-by: Brian Norris Signed-off-by: Robin Murphy Tested-by: Brian Norris Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Link: https://lore.kernel.org/r/1946ef9f774851732eed78760a78ec40dbc6d178.1667591503.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6ca377f4fbf9..7c99d8eb3182 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -306,13 +306,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); int ret; if (!ops) return -ENODEV; - - if (!dev_iommu_get(dev)) - return -ENOMEM; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); + if (!dev_iommu_get(dev)) { + ret = -ENOMEM; + goto err_unlock; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -333,11 +343,14 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list ret = PTR_ERR(group); goto out_release; } - iommu_group_put(group); + mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); + mutex_unlock(&group->mutex); + iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); iommu_device_link(iommu_dev, dev); return 0; @@ -352,6 +365,9 @@ out_module_put: err_free: dev_iommu_free(dev); +err_unlock: + mutex_unlock(&iommu_probe_device_lock); + return ret; } @@ -1824,11 +1840,11 @@ int bus_iommu_probe(struct bus_type *bus) return ret; list_for_each_entry_safe(group, next, &group_list, entry) { + mutex_lock(&group->mutex); + /* Remove item from the list */ list_del_init(&group->entry); - mutex_lock(&group->mutex); - /* Try to allocate default domain */ probe_alloc_default_domain(bus, group); From 59bbf596791b89c7f88fdcac29dfc39c1221d25d Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:28:59 +0100 Subject: [PATCH 1835/4122] iommu/s390: Make attach succeed even if the device is in error state If a zPCI device is in the error state while switching IOMMU domains zpci_register_ioat() will fail and we would end up with the device not attached to any domain. In this state since zdev->dma_table == NULL a reset via zpci_hot_reset_device() would wrongfully re-initialize the device for DMA API usage using zpci_dma_init_device(). As automatic recovery is currently disabled while attached to an IOMMU domain this only affects slot resets triggered through other means but will affect automatic recovery once we switch to using dma-iommu. Additionally with that switch common code expects attaching to the default domain to always work so zpci_register_ioat() should only fail if there is no chance to recover anyway, e.g. if the device has been unplugged. Improve the robustness of attach by specifically looking at the status returned by zpci_mod_fc() to determine if the device is unavailable and in this case simply ignore the error. Once the device is reset zpci_hot_reset_device() will then correctly set the domain's DMA translation tables. Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221109142903.4080275-2-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 2 +- arch/s390/kvm/pci.c | 6 ++++-- arch/s390/pci/pci.c | 11 ++++++----- arch/s390/pci/pci_dma.c | 3 ++- drivers/iommu/s390-iommu.c | 9 +++++++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 15f8714ca9b7..07361e2fd8c5 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev); bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_hot_reset_device(struct zpci_dev *zdev); -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); void zpci_update_fh(struct zpci_dev *zdev, u32 fh); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0ae..03964c0e1fdf 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; + u8 status; int rc; if (!zdev) @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) /* Re-register the IOMMU that was already created */ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); if (rc) goto clear_gisa; @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; + u8 status; if (!zdev) return; @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) /* Re-register the IOMMU that was already created */ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 73cdc5539384..a703dcd94a68 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, - u64 base, u64 limit, u64 iota) + u64 base, u64 limit, u64 iota, u8 *status) { u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); struct zpci_fib fib = {0}; - u8 cc, status; + u8 cc; WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; - cc = zpci_mod_fc(req, &fib, &status); + cc = zpci_mod_fc(req, &fib, status); if (cc) - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status); return cc; } EXPORT_SYMBOL_GPL(zpci_register_ioat); @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { + u8 status; int rc; zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); else rc = zpci_dma_init_device(zdev); if (rc) { diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 227cf0a62800..dee825ee7305 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { + u8 status; int rc; /* @@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) } if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table))) { + virt_to_phys(zdev->dma_table), &status)) { rc = -EIO; goto free_bitmap; } diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 7fb512bece9a..e2c886bc4376 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; + u8 status; int cc; if (!zdev) @@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, zpci_dma_exit_device(zdev); cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(s390_domain->dma_table)); - if (cc) + virt_to_phys(s390_domain->dma_table), &status); + /* + * If the device is undergoing error recovery the reset code + * will re-establish the new domain. + */ + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; zdev->dma_table = s390_domain->dma_table; From c228f5a043370ef02867e4f0aab1bdc8422500e6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:00 +0100 Subject: [PATCH 1836/4122] iommu/s390: Add I/O TLB ops Currently s390-iommu does an I/O TLB flush (RPCIT) for every update of the I/O translation table explicitly. For one this is wasteful since RPCIT can be skipped after a mapping operation if zdev->tlb_refresh is unset. Moreover we can do a single RPCIT for a range of pages including whne doing lazy unmapping. Thankfully both of these optimizations can be achieved by implementing the IOMMU operations common code provides for the different types of I/O tlb flushes: * flush_iotlb_all: Flushes the I/O TLB for the entire IOVA space * iotlb_sync: Flushes the I/O TLB for a range of pages that can be gathered up, for example to implement lazy unmapping. * iotlb_sync_map: Flushes the I/O TLB after a mapping operation Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-3-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 67 +++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index e2c886bc4376..9771bce86e94 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -199,14 +199,63 @@ static void s390_iommu_release_device(struct device *dev) __s390_iommu_detach_device(zdev); } +static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + unsigned long flags; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + +static void s390_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = gather->end - gather->start + 1; + struct zpci_dev *zdev; + unsigned long flags; + + /* If gather was never added to there is nothing to flush */ + if (!gather->end) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, gather->start, + size); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + +static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + unsigned long flags; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + if (!zdev->tlb_refresh) + continue; + zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; - dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags, i; - struct zpci_dev *zdev; unsigned long *entry; int rc = 0; @@ -225,15 +274,6 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, dma_addr += PAGE_SIZE; } - spin_lock(&s390_domain->list_lock); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { - rc = zpci_refresh_trans((u64)zdev->fh << 32, - start_dma_addr, nr_pages * PAGE_SIZE); - if (rc) - break; - } - spin_unlock(&s390_domain->list_lock); - undo_cpu_trans: if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { flags = ZPCI_PTE_INVALID; @@ -340,6 +380,8 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, if (rc) return 0; + iommu_iotlb_gather_add_range(gather, iova, size); + return size; } @@ -384,6 +426,9 @@ static const struct iommu_ops s390_iommu_ops = { .detach_dev = s390_iommu_detach_device, .map_pages = s390_iommu_map_pages, .unmap_pages = s390_iommu_unmap_pages, + .flush_iotlb_all = s390_iommu_flush_iotlb_all, + .iotlb_sync = s390_iommu_iotlb_sync, + .iotlb_sync_map = s390_iommu_iotlb_sync_map, .iova_to_phys = s390_iommu_iova_to_phys, .free = s390_domain_free, } From 2ba8336dab5fb81452aea9c21dfc870050a017f3 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:01 +0100 Subject: [PATCH 1837/4122] iommu/s390: Use RCU to allow concurrent domain_list iteration The s390_domain->devices list is only added to when new devices are attached but is iterated through in read-only fashion for every mapping operation as well as for I/O TLB flushes and thus in performance critical code causing contention on the s390_domain->list_lock. Fortunately such a read-mostly linked list is a standard use case for RCU. This change closely follows the example fpr RCU protected list given in Documentation/RCU/listRCU.rst. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-4-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 2 +- drivers/iommu/s390-iommu.c | 44 +++++++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 07361e2fd8c5..e4c3e4e04d30 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -119,6 +119,7 @@ struct zpci_dev { struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ struct list_head iommu_list; struct kref kref; + struct rcu_head rcu; struct hotplug_slot hotplug_slot; enum zpci_state state; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a703dcd94a68..ef38b1514c77 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -996,7 +996,7 @@ void zpci_release_device(struct kref *kref) break; } zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + kfree_rcu(zdev, rcu); } int zpci_report_error(struct pci_dev *pdev, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9771bce86e94..cf5dcbcea4e0 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include static const struct iommu_ops s390_iommu_ops; @@ -20,6 +22,7 @@ struct s390_domain { unsigned long *dma_table; spinlock_t dma_table_lock; spinlock_t list_lock; + struct rcu_head rcu; }; static struct s390_domain *to_s390_domain(struct iommu_domain *dom) @@ -61,18 +64,28 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); - INIT_LIST_HEAD(&s390_domain->devices); + INIT_LIST_HEAD_RCU(&s390_domain->devices); return &s390_domain->domain; } +static void s390_iommu_rcu_free_domain(struct rcu_head *head) +{ + struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu); + + dma_cleanup_tables(s390_domain->dma_table); + kfree(s390_domain); +} + static void s390_domain_free(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); + rcu_read_lock(); WARN_ON(!list_empty(&s390_domain->devices)); - dma_cleanup_tables(s390_domain->dma_table); - kfree(s390_domain); + rcu_read_unlock(); + + call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); } static void __s390_iommu_detach_device(struct zpci_dev *zdev) @@ -84,7 +97,7 @@ static void __s390_iommu_detach_device(struct zpci_dev *zdev) return; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_del_init(&zdev->iommu_list); + list_del_rcu(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); zpci_unregister_ioat(zdev, 0); @@ -127,7 +140,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, zdev->s390_domain = s390_domain; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_add(&zdev->iommu_list, &s390_domain->devices); + list_add_rcu(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; @@ -203,14 +216,13 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; - unsigned long flags; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, zdev->end_dma - zdev->start_dma + 1); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static void s390_iommu_iotlb_sync(struct iommu_domain *domain, @@ -219,18 +231,17 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); size_t size = gather->end - gather->start + 1; struct zpci_dev *zdev; - unsigned long flags; /* If gather was never added to there is nothing to flush */ if (!gather->end) return; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { zpci_refresh_trans((u64)zdev->fh << 32, gather->start, size); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, @@ -238,16 +249,15 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; - unsigned long flags; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { if (!zdev->tlb_refresh) continue; zpci_refresh_trans((u64)zdev->fh << 32, iova, size); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static int s390_iommu_update_trans(struct s390_domain *s390_domain, From 08955af0600303455f57fe2f2a26f24f9b496b49 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:02 +0100 Subject: [PATCH 1838/4122] iommu/s390: Optimize IOMMU table walking When invalidating existing table entries for unmap there is no need to know the physical address beforehand so don't do an extra walk of the IOMMU table to get it. Also when invalidating entries not finding an entry indicates an invalid unmap and not a lack of memory we also don't need to undo updates in this case. Implement this by splitting s390_iommu_update_trans() in a variant for validating and one for invalidating translations. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-5-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 83 +++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index cf5dcbcea4e0..2b9a3e3bc606 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -260,11 +260,50 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, rcu_read_unlock(); } -static int s390_iommu_update_trans(struct s390_domain *s390_domain, - phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) +static int s390_iommu_validate_trans(struct s390_domain *s390_domain, + phys_addr_t pa, dma_addr_t dma_addr, + unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; + unsigned long irq_flags, i; + unsigned long *entry; + int rc; + + if (!nr_pages) + return 0; + + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); + for (i = 0; i < nr_pages; i++) { + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + if (unlikely(!entry)) { + rc = -ENOMEM; + goto undo_cpu_trans; + } + dma_update_cpu_trans(entry, page_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return 0; + +undo_cpu_trans: + while (i-- > 0) { + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(s390_domain->dma_table, + dma_addr); + if (!entry) + break; + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return rc; +} + +static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, + dma_addr_t dma_addr, unsigned long nr_pages) +{ unsigned long irq_flags, i; unsigned long *entry; int rc = 0; @@ -275,28 +314,13 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); - if (!entry) { - rc = -ENOMEM; - goto undo_cpu_trans; + if (unlikely(!entry)) { + rc = -EINVAL; + break; } - dma_update_cpu_trans(entry, page_addr, flags); - page_addr += PAGE_SIZE; + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); dma_addr += PAGE_SIZE; } - -undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); - } - } spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; @@ -308,8 +332,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, int prot, gfp_t gfp, size_t *mapped) { struct s390_domain *s390_domain = to_s390_domain(domain); - int flags = ZPCI_PTE_VALID, rc = 0; size_t size = pgcount << __ffs(pgsize); + int flags = ZPCI_PTE_VALID, rc = 0; if (pgsize != SZ_4K) return -EINVAL; @@ -327,8 +351,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, if (!(prot & IOMMU_WRITE)) flags |= ZPCI_TABLE_PROTECTED; - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - pgcount, flags); + rc = s390_iommu_validate_trans(s390_domain, paddr, iova, + pgcount, flags); if (!rc) *mapped = size; @@ -373,20 +397,13 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); size_t size = pgcount << __ffs(pgsize); - int flags = ZPCI_PTE_INVALID; - phys_addr_t paddr; int rc; if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) return 0; - paddr = s390_iommu_iova_to_phys(domain, iova); - if (!paddr) - return 0; - - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - pgcount, flags); + rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount); if (rc) return 0; From 21c1f9021f0e7d28c3edfcc70e1ca1926ea3774e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:03 +0100 Subject: [PATCH 1839/4122] s390/pci: use lock-free I/O translation updates I/O translation tables on s390 use 8 byte page table entries and tables which are allocated lazily but only freed when the entire I/O translation table is torn down. Also each IOVA can at any time only translate to one physical address Furthermore I/O table accesses by the IOMMU hardware are cache coherent. With a bit of care we can thus use atomic updates to manipulate the translation table without having to use a global lock at all. This is done analogous to the existing I/O translation table handling code used on Intel and AMD x86 systems. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-6-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_dma.c | 74 ++++++++++++++++++++++--------------- drivers/iommu/s390-iommu.c | 37 +++++++------------ 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index e4c3e4e04d30..b248694e0024 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -157,7 +157,6 @@ struct zpci_dev { /* DMA stuff */ unsigned long *dma_table; - spinlock_t dma_table_lock; int tlb_refresh; spinlock_t iommu_bitmap_lock; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index dee825ee7305..ea478d11fbd1 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -63,37 +63,55 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) { + unsigned long old_rte, rte; unsigned long *sto; - if (reg_entry_isvalid(*entry)) - sto = get_rt_sto(*entry); - else { + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { sto = dma_alloc_cpu_table(); if (!sto) return NULL; - set_rt_sto(entry, virt_to_phys(sto)); - validate_rt_entry(entry); - entry_clr_protected(entry); + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } } return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *entry) +static unsigned long *dma_get_page_table_origin(unsigned long *step) { + unsigned long old_ste, ste; unsigned long *pto; - if (reg_entry_isvalid(*entry)) - pto = get_st_pto(*entry); - else { + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { pto = dma_alloc_page_table(); if (!pto) return NULL; - set_st_pto(entry, virt_to_phys(pto)); - validate_st_entry(entry); - entry_clr_protected(entry); + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } } return pto; } @@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) return &pto[px]; } -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags) +void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) { + unsigned long pte; + + pte = READ_ONCE(*ptep); if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(entry); + invalidate_pt_entry(&pte); } else { - set_pt_pfaa(entry, page_addr); - validate_pt_entry(entry); + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); } if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(entry); + entry_set_protected(&pte); else - entry_clr_protected(entry); + entry_clr_protected(&pte); + + xchg(ptep, pte); } static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, @@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, { unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long irq_flags; unsigned long *entry; int i, rc = 0; if (!nr_pages) return -EINVAL; - spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); - if (!zdev->dma_table) { - rc = -EINVAL; - goto out_unlock; - } + if (!zdev->dma_table) + return -EINVAL; for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); @@ -173,8 +192,6 @@ undo_cpu_trans: dma_update_cpu_trans(entry, page_addr, flags); } } -out_unlock: - spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } @@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev) WARN_ON(zdev->s390_domain); spin_lock_init(&zdev->iommu_bitmap_lock); - spin_lock_init(&zdev->dma_table_lock); zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 2b9a3e3bc606..ed33c6cce083 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -20,7 +20,6 @@ struct s390_domain { struct iommu_domain domain; struct list_head devices; unsigned long *dma_table; - spinlock_t dma_table_lock; spinlock_t list_lock; struct rcu_head rcu; }; @@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) s390_domain->domain.geometry.aperture_start = 0; s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; - spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); INIT_LIST_HEAD_RCU(&s390_domain->devices); @@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; - unsigned long irq_flags, i; unsigned long *entry; + unsigned long i; int rc; - if (!nr_pages) - return 0; - - spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); if (unlikely(!entry)) { @@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return 0; @@ -296,7 +289,6 @@ undo_cpu_trans: break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; } @@ -304,14 +296,10 @@ undo_cpu_trans: static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, dma_addr_t dma_addr, unsigned long nr_pages) { - unsigned long irq_flags, i; unsigned long *entry; + unsigned long i; int rc = 0; - if (!nr_pages) - return 0; - - spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); if (unlikely(!entry)) { @@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); dma_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; } @@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct s390_domain *s390_domain = to_s390_domain(domain); - unsigned long *sto, *pto, *rto, flags; + unsigned long *rto, *sto, *pto; + unsigned long ste, pte, rte; unsigned int rtx, sx, px; phys_addr_t phys = 0; @@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, px = calc_px(iova); rto = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->dma_table_lock, flags); - if (rto && reg_entry_isvalid(rto[rtx])) { - sto = get_rt_sto(rto[rtx]); - if (sto && reg_entry_isvalid(sto[sx])) { - pto = get_st_pto(sto[sx]); - if (pto && pt_entry_isvalid(pto[px])) - phys = pto[px] & ZPCI_PTE_ADDR_MASK; + rte = READ_ONCE(rto[rtx]); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + ste = READ_ONCE(sto[sx]); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + pte = READ_ONCE(pto[px]); + if (pt_entry_isvalid(pte)) + phys = pte & ZPCI_PTE_ADDR_MASK; } } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); return phys; } From bbc4d205d93f52ee18dfa7858d51489c0506547f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 10 Nov 2022 16:44:07 +0100 Subject: [PATCH 1840/4122] iommu/exynos: Fix driver initialization sequence Registering a SYSMMU platform driver might directly trigger initializing IOMMU domains and performing the initial mappings. Also the IOMMU core might use the IOMMU hardware once it has been registered with iommu_device_register() function. Ensure that all driver resources are allocated and initialized before the driver advertise its presence to the platform bus and the IOMMU subsystem. Signed-off-by: Marek Szyprowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20221110154407.26531-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 45fd4850bacb..b0cde2211987 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -708,10 +708,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); - if (ret) - goto err_iommu_register; - platform_set_drvdata(pdev, data); if (PG_ENT_SHIFT < 0) { @@ -743,11 +739,13 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); + ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); + if (ret) + goto err_dma_set_mask; + return 0; err_dma_set_mask: - iommu_device_unregister(&data->iommu); -err_iommu_register: iommu_device_sysfs_remove(&data->iommu); return ret; } @@ -1432,12 +1430,6 @@ static int __init exynos_iommu_init(void) return -ENOMEM; } - ret = platform_driver_register(&exynos_sysmmu_driver); - if (ret) { - pr_err("%s: Failed to register driver\n", __func__); - goto err_reg_driver; - } - zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL); if (zero_lv2_table == NULL) { pr_err("%s: Failed to allocate zero level2 page table\n", @@ -1446,10 +1438,16 @@ static int __init exynos_iommu_init(void) goto err_zero_lv2; } + ret = platform_driver_register(&exynos_sysmmu_driver); + if (ret) { + pr_err("%s: Failed to register driver\n", __func__); + goto err_reg_driver; + } + return 0; -err_zero_lv2: - platform_driver_unregister(&exynos_sysmmu_driver); err_reg_driver: + platform_driver_unregister(&exynos_sysmmu_driver); +err_zero_lv2: kmem_cache_destroy(lv2table_kmem_cache); return ret; } From b577f7e679b763b706032e7a65c7b3a05c5f2184 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:35 +0000 Subject: [PATCH 1841/4122] iommu/mediatek-v1: Update to {map,unmap}_pages Now that the core API has a proper notion of multi-page mappings, clean up the old pgsize_bitmap hack by implementing the new interfaces instead. This also brings a slight simplification since we no longer need to worry about rolling back partial mappings on failure. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/768e90ff0c2d61e4723049c1349d8bac58daa437.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6e0e65831eb7..69682ee068d2 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -327,44 +327,42 @@ static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct devic } static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; unsigned long flags; unsigned int i; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); u32 pabase = (u32)paddr; - int map_size = 0; spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { - if (pgt_base_iova[i]) { - memset(pgt_base_iova, 0, i * sizeof(u32)); + for (i = 0; i < pgcount; i++) { + if (pgt_base_iova[i]) break; - } pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC; pabase += MT2701_IOMMU_PAGE_SIZE; - map_size += MT2701_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); - mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); + *mapped = i * MT2701_IOMMU_PAGE_SIZE; + mtk_iommu_v1_tlb_flush_range(dom->data, iova, *mapped); - return map_size == size ? 0 : -EEXIST; + return i == pgcount ? 0 : -EEXIST; } static size_t mtk_iommu_v1_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned long flags; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; + size_t size = pgcount * MT2701_IOMMU_PAGE_SIZE; spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); @@ -586,13 +584,13 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .release_device = mtk_iommu_v1_release_device, .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, - .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, .detach_dev = mtk_iommu_v1_detach_device, - .map = mtk_iommu_v1_map, - .unmap = mtk_iommu_v1_unmap, + .map_pages = mtk_iommu_v1_map, + .unmap_pages = mtk_iommu_v1_unmap, .iova_to_phys = mtk_iommu_v1_iova_to_phys, .free = mtk_iommu_v1_domain_free, } From a05d5857cec3efef02af557dcb5ed257364356e6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:36 +0000 Subject: [PATCH 1842/4122] iommu/sprd: Update to {map,unmap}_pages Now that the core API has a proper notion of multi-page mappings, clean up the old pgsize_bitmap hack by implementing the new interfaces instead. This time we'll get the return values for unmaps correct too. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/9026464e8380b92d10d09103e215eb4306a5df7c.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/sprd-iommu.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index e02793375598..219bfa11f7f4 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -271,10 +271,11 @@ static void sprd_iommu_detach_device(struct iommu_domain *domain, } static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long flags; unsigned int i; u32 *pgt_base_iova; @@ -296,35 +297,37 @@ static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { + for (i = 0; i < pgcount; i++) { pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT; pabase += SPRD_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); + *mapped = size; return 0; } static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned long flags; u32 *pgt_base_iova; - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long start = domain->geometry.aperture_start; unsigned long end = domain->geometry.aperture_end; if (iova < start || (iova + size) > (end + 1)) - return -EINVAL; + return 0; pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); - return 0; + return size; } static void sprd_iommu_sync_map(struct iommu_domain *domain, @@ -407,13 +410,13 @@ static const struct iommu_ops sprd_iommu_ops = { .probe_device = sprd_iommu_probe_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, - .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, .detach_dev = sprd_iommu_detach_device, - .map = sprd_iommu_map, - .unmap = sprd_iommu_unmap, + .map_pages = sprd_iommu_map, + .unmap_pages = sprd_iommu_unmap, .iotlb_sync_map = sprd_iommu_sync_map, .iotlb_sync = sprd_iommu_sync, .iova_to_phys = sprd_iommu_iova_to_phys, From 85637380dad6d97071018cba6f2aa90667f716b3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:37 +0000 Subject: [PATCH 1843/4122] iommu/mediatek: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/25b65b71e7e5d1006469aee48bab07ca87227bfa.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b383c8327f9c..6b8ad85b50ed 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -711,7 +711,8 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, } static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); @@ -720,17 +721,17 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ - return dom->iop->map(dom->iop, iova, paddr, size, prot, gfp); + return dom->iop->map_pages(dom->iop, iova, paddr, pgsize, pgcount, prot, gfp, mapped); } static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, + unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - iommu_iotlb_gather_add_range(gather, iova, size); - return dom->iop->unmap(dom->iop, iova, size, gather); + iommu_iotlb_gather_add_range(gather, iova, pgsize * pgcount); + return dom->iop->unmap_pages(dom->iop, iova, pgsize, pgcount, gather); } static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) @@ -938,8 +939,8 @@ static const struct iommu_ops mtk_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, + .map_pages = mtk_iommu_map, + .unmap_pages = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, .iotlb_sync_map = mtk_iommu_sync_map, From 8b35cdcf9bf82098dd15ed02a2a51cdf5f5ca090 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:38 +0000 Subject: [PATCH 1844/4122] iommu/msm: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/24a8f522710ddd6bbac4da154aa28799e939ebe4.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 16179a9a7283..c60624910872 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -471,14 +471,16 @@ fail: } static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t len, int prot, gfp_t gfp) + phys_addr_t pa, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; int ret; spin_lock_irqsave(&priv->pgtlock, flags); - ret = priv->iop->map(priv->iop, iova, pa, len, prot, GFP_ATOMIC); + ret = priv->iop->map_pages(priv->iop, iova, pa, pgsize, pgcount, prot, + GFP_ATOMIC, mapped); spin_unlock_irqrestore(&priv->pgtlock, flags); return ret; @@ -493,16 +495,18 @@ static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t len, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; + size_t ret; spin_lock_irqsave(&priv->pgtlock, flags); - len = priv->iop->unmap(priv->iop, iova, len, gather); + ret = priv->iop->unmap_pages(priv->iop, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&priv->pgtlock, flags); - return len; + return ret; } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, @@ -679,8 +683,8 @@ static struct iommu_ops msm_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, .detach_dev = msm_iommu_detach_dev, - .map = msm_iommu_map, - .unmap = msm_iommu_unmap, + .map_pages = msm_iommu_map, + .unmap_pages = msm_iommu_unmap, /* * Nothing is needed here, the barrier to guarantee * completion of the tlb sync operation is implicitly From 0a17bbab2330aecd026696d4decc2636bd31e790 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:39 +0000 Subject: [PATCH 1845/4122] iommu/ipmmu-vmsa: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Since these are domain ops now, the domain is inherently valid (not to mention that container_of() wouldn't return NULL anyway), so garbage-collect that check in the process. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/ad859ccc24720d72f8eafd03817c1fc11255ddc1.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 22230cc15dcd..a003bd5fc65c 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -659,22 +659,22 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain, } static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - if (!domain) - return -ENODEV; - - return domain->iop->map(domain->iop, iova, paddr, size, prot, gfp); + return domain->iop->map_pages(domain->iop, iova, paddr, pgsize, pgcount, + prot, gfp, mapped); } static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - return domain->iop->unmap(domain->iop, iova, size, gather); + return domain->iop->unmap_pages(domain->iop, iova, pgsize, pgcount, gather); } static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain) @@ -877,8 +877,8 @@ static const struct iommu_ops ipmmu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, + .map_pages = ipmmu_map, + .unmap_pages = ipmmu_unmap, .flush_iotlb_all = ipmmu_flush_iotlb_all, .iotlb_sync = ipmmu_iotlb_sync, .iova_to_phys = ipmmu_iova_to_phys, From fa8ce5743039bc7ea5cb4217423efaebe381fc54 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:40 +0000 Subject: [PATCH 1846/4122] iommu/qcom: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/ccff9a133d12ec938741720be6baf5d788b71ea0.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index bfd7b51eb5db..270c3d9128ba 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -410,7 +410,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { int ret; unsigned long flags; @@ -421,13 +422,14 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot, GFP_ATOMIC); + ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_ATOMIC, mapped); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); return ret; } static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { size_t ret; unsigned long flags; @@ -444,7 +446,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, */ pm_runtime_get_sync(qcom_domain->iommu->dev); spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size, gather); + ret = ops->unmap_pages(ops, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); pm_runtime_put_sync(qcom_domain->iommu->dev); @@ -582,8 +584,8 @@ static const struct iommu_ops qcom_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, .detach_dev = qcom_iommu_detach_dev, - .map = qcom_iommu_map, - .unmap = qcom_iommu_unmap, + .map_pages = qcom_iommu_map, + .unmap_pages = qcom_iommu_unmap, .flush_iotlb_all = qcom_iommu_flush_iotlb_all, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, From 99cbb8e436344ddd0554108a3d8afb7ce5c4994e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:41 +0000 Subject: [PATCH 1847/4122] iommu/io-pgtable-arm: Remove map/unmap With all users now calling {map,unmap}_pages, remove the wrappers. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/162e58e83ed42f78c3fbefe78c9b5410dd1dc412.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 42 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0ba817e86346..72dcdd468cf3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -360,7 +360,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; num_entries = min_t(int, pgcount, max_entries); ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); - if (!ret && mapped) + if (!ret) *mapped += num_entries * size; return ret; @@ -496,13 +496,6 @@ static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp) -{ - return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp, - NULL); -} - static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *ptep) { @@ -682,12 +675,6 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } -static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_lpae_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -799,9 +786,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); data->iop.ops = (struct io_pgtable_ops) { - .map = arm_lpae_map, .map_pages = arm_lpae_map_pages, - .unmap = arm_lpae_unmap, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, }; @@ -1176,7 +1161,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) int i, j; unsigned long iova; - size_t size; + size_t size, mapped; struct io_pgtable_ops *ops; selftest_running = true; @@ -1209,15 +1194,16 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1228,11 +1214,12 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) /* Partial unmap */ size = 1UL << __ffs(cfg->pgsize_bitmap); - if (ops->unmap(ops, SZ_1G + size, size, NULL) != size) + if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) return __FAIL(ops, i); /* Remap of partial unmap */ - if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, SZ_1G + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) @@ -1243,14 +1230,15 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops, i); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_WRITE, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) From b9bf41e249f8c8bf79389cec9d29faf03f79aad2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:42 +0000 Subject: [PATCH 1848/4122] iommu/io-pgtable-arm-v7s: Remove map/unmap With all users now calling {map,unmap}_pages, remove the wrappers. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/98481dd7e3576b74149ce2de8f217338ee1dd490.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 41 +++++++++++------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index ba3115fd0f86..75f244a3e12d 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -564,8 +564,7 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, iova += pgsize; paddr += pgsize; - if (mapped) - *mapped += pgsize; + *mapped += pgsize; } /* * Synchronise all PTE updates for the new mapping before there's @@ -576,12 +575,6 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) -{ - return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL); -} - static void arm_v7s_free_pgtable(struct io_pgtable *iop) { struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); @@ -764,12 +757,6 @@ static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova return unmapped; } -static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_v7s_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -842,9 +829,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, goto out_free_data; data->iop.ops = (struct io_pgtable_ops) { - .map = arm_v7s_map, .map_pages = arm_v7s_map_pages, - .unmap = arm_v7s_unmap, .unmap_pages = arm_v7s_unmap_pages, .iova_to_phys = arm_v7s_iova_to_phys, }; @@ -954,6 +939,7 @@ static int __init arm_v7s_do_selftests(void) }; unsigned int iova, size, iova_start; unsigned int i, loopnr = 0; + size_t mapped; selftest_running = true; @@ -984,15 +970,16 @@ static int __init arm_v7s_do_selftests(void) iova = 0; for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL, + &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1007,11 +994,12 @@ static int __init arm_v7s_do_selftests(void) size = 1UL << __ffs(cfg.pgsize_bitmap); while (i < loopnr) { iova_start = i * SZ_16M; - if (ops->unmap(ops, iova_start + size, size, NULL) != size) + if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size) return __FAIL(ops); /* Remap of partial unmap */ - if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, iova_start + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova_start + size + 42) @@ -1025,14 +1013,15 @@ static int __init arm_v7s_do_selftests(void) for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, IOMMU_WRITE, + GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) From b169a180bef26679b44484ad24b7d8ae32623a10 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:43 +0000 Subject: [PATCH 1849/4122] iommu/io-pgtable: Remove map/unmap With all users now calling {map,unmap}_pages, retire the redundant single-page callbacks. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/a5a3cbf95c3279982e378cc43dad830322a59868.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1f068dfdb140..1b7a44b35616 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -150,9 +150,7 @@ struct io_pgtable_cfg { /** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * - * @map: Map a physically contiguous memory region. * @map_pages: Map a physically contiguous range of pages of the same size. - * @unmap: Unmap a physically contiguous memory region. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @@ -160,13 +158,9 @@ struct io_pgtable_cfg { * the same names. */ struct io_pgtable_ops { - int (*map)(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather); size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather); From 6cf0981c2233f97d56938d9d61845383d6eb227c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 18 Nov 2022 17:36:04 +0800 Subject: [PATCH 1850/4122] iommu/amd: Fix pci device refcount leak in ppr_notifier() As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). So call it before returning from ppr_notifier() to avoid refcount leak. Fixes: daae2d25a477 ("iommu/amd: Don't copy GCR3 table root pointer") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221118093604.216371-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 6a1f02c62dff..9f7fab49a5a9 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -587,6 +587,7 @@ out_drop_state: put_device_state(dev_state); out: + pci_dev_put(pdev); return ret; } From b09b56734fae28be9332021ae3e84c9b05020fda Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 18 Nov 2022 13:42:52 +0300 Subject: [PATCH 1851/4122] iommu/amd: Check return value of mmu_notifier_register() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Аdded a return value check for the function mmu_notifier_register(). Return value of a function 'mmu_notifier_register' called at iommu_v2.c:642 is not checked, but it is usually checked for this function Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Link: https://lore.kernel.org/r/20221118104252.122809-1-arefev@swemel.ru [joro: Fix commit message ] Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 9f7fab49a5a9..864e4ffb6aa9 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -640,7 +640,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (pasid_state->mm == NULL) goto out_free; - mmu_notifier_register(&pasid_state->mn, mm); + ret = mmu_notifier_register(&pasid_state->mn, mm); + if (ret) + goto out_free; ret = set_pasid_state(dev_state, pasid_state, pasid); if (ret) From 26d2d0594d7016dbcbce4038aa202c2858d5a944 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:25 +0000 Subject: [PATCH 1852/4122] KVM: arm64: PMU: Do not let AArch32 change the counters' top 32 bits Even when using PMUv3p5 (which implies 64bit counters), there is no way for AArch32 to write to the top 32 bits of the counters. The only way to influence these bits (other than by counting events) is by writing PMCR.P==1. Make sure we obey the architecture and preserve the top 32 bits on a counter update. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-10-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index ea0c8411641f..7a945fa6dd03 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -119,13 +119,8 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) return counter; } -/** - * kvm_pmu_set_counter_value - set PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - * @val: The counter value - */ -void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, + bool force) { u64 reg; @@ -135,12 +130,36 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); reg = counter_index_to_reg(select_idx); + + if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + !force) { + /* + * Even with PMUv3p5, AArch32 cannot write to the top + * 32bit of the counters. The only possible course of + * action is to use PMCR.P, which will reset them to + * 0 (the only use of the 'force' parameter). + */ + val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); + val |= lower_32_bits(val); + } + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); } +/** + * kvm_pmu_set_counter_value - set PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + * @val: The counter value + */ +void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +{ + kvm_pmu_set_counter(vcpu, select_idx, val, false); +} + /** * kvm_pmu_release_perf_event - remove the perf event * @pmc: The PMU counter pointer @@ -533,7 +552,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter_value(vcpu, i, 0); + kvm_pmu_set_counter(vcpu, i, 0, true); } } From 3d0dba5764b94308b8c4257ad64e383f11ce0c92 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:26 +0000 Subject: [PATCH 1853/4122] KVM: arm64: PMU: Move the ID_AA64DFR0_EL1.PMUver limit to VM creation As further patches will enable the selection of a PMU revision from userspace, sample the supported PMU revision at VM creation time, rather than building each time the ID_AA64DFR0_EL1 register is accessed. This shouldn't result in any change in behaviour. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-11-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/arm.c | 6 ++++++ arch/arm64/kvm/pmu-emul.c | 11 ++++++++++ arch/arm64/kvm/sys_regs.c | 36 ++++++++++++++++++++++++------- include/kvm/arm_pmu.h | 6 ++++++ 5 files changed, 55 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..cc44e3bc528d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -163,6 +163,10 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + struct { + u8 imp:4; + u8 unimp:4; + } dfr0_pmuver; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..f956aab438c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7a945fa6dd03..94ca2d17a4e4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1047,3 +1047,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + u64 tmp; + + tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + tmp = cpuid_feature_cap_perfmon_field(tmp, + ID_AA64DFR0_EL1_PMUVer_SHIFT, + ID_AA64DFR0_EL1_PMUVer_V3P4); + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f4a7c5abcbca..297b4fcbf969 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1062,6 +1062,27 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return vcpu->kvm->arch.dfr0_pmuver.imp; + + return vcpu->kvm->arch.dfr0_pmuver.unimp; +} + +static u8 pmuver_to_perfmon(u8 pmuver) +{ + switch (pmuver) { + case ID_AA64DFR0_EL1_PMUVer_IMP: + return ID_DFR0_PERFMON_8_0; + case ID_AA64DFR0_EL1_PMUVer_IMP_DEF: + return ID_DFR0_PERFMON_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return pmuver; + } +} + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { @@ -1111,18 +1132,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_EL1_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0); + /* Set PMUver to the required version */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), + vcpu_pmuver(vcpu)); /* Hide SPE from guests */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_PERFMON_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), + pmuver_to_perfmon(vcpu_pmuver(vcpu))); break; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96b192139a23..812f729c9108 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,8 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +u8 kvm_arm_pmu_get_pmuver_limit(void); + #else struct kvm_pmu { }; @@ -154,6 +156,10 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} +static inline u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + return 0; +} #endif From 60e651ff1f48bfdf8fec80d35510bd89ecf8c766 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:27 +0000 Subject: [PATCH 1854/4122] KVM: arm64: PMU: Allow ID_AA64DFR0_EL1.PMUver to be set from userspace Allow userspace to write ID_AA64DFR0_EL1, on the condition that only the PMUver field can be altered and be at most the one that was initially computed for the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-12-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 297b4fcbf969..49585258ae6c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1242,6 +1242,45 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 pmuver, host_pmuver; + bool valid_pmu; + + host_pmuver = kvm_arm_pmu_get_pmuver_limit(); + + /* + * Allow AA64DFR0_EL1.PMUver to be set from userspace as long + * as it doesn't promise more than what the HW gives us. We + * allow an IMPDEF PMU though, only if no PMU is supported + * (KVM backward compatibility handling). + */ + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val); + if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver)) + return -EINVAL; + + valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PMUver, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = pmuver; + else + vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver; + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1503,7 +1542,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,7), /* CRm=5 */ - ID_SANITISED(ID_AA64DFR0_EL1), + { SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), From d82e0dfdfda73f91e7282e1083a2cd7cd366ea87 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:28 +0000 Subject: [PATCH 1855/4122] KVM: arm64: PMU: Allow ID_DFR0_EL1.PerfMon to be set from userspace Allow userspace to write ID_DFR0_EL1, on the condition that only the PerfMon field can be altered and be something that is compatible with what was computed for the AArch64 view of the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-13-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 57 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 49585258ae6c..b8ac58723459 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1070,6 +1070,19 @@ static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) return vcpu->kvm->arch.dfr0_pmuver.unimp; } +static u8 perfmon_to_pmuver(u8 perfmon) +{ + switch (perfmon) { + case ID_DFR0_PERFMON_8_0: + return ID_AA64DFR0_EL1_PMUVer_IMP; + case ID_DFR0_PERFMON_IMP_DEF: + return ID_AA64DFR0_EL1_PMUVer_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return perfmon; + } +} + static u8 pmuver_to_perfmon(u8 pmuver) { switch (pmuver) { @@ -1281,6 +1294,46 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 perfmon, host_perfmon; + bool valid_pmu; + + host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); + + /* + * Allow DFR0_EL1.PerfMon to be set from userspace as long as + * it doesn't promise more than what the HW gives us on the + * AArch64 side (as everything is emulated with that), and + * that this is a PMUv3. + */ + perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), val); + if ((perfmon != ID_DFR0_PERFMON_IMP_DEF && perfmon > host_perfmon) || + (perfmon != 0 && perfmon < ID_DFR0_PERFMON_8_0)) + return -EINVAL; + + valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_PERFMON_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PerfMon, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon); + else + vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon); + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1502,7 +1555,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=1 */ AA32_ID_SANITISED(ID_PFR0_EL1), AA32_ID_SANITISED(ID_PFR1_EL1), - AA32_ID_SANITISED(ID_DFR0_EL1), + { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_dfr0_el1, + .visibility = aa32_id_visibility, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), AA32_ID_SANITISED(ID_MMFR1_EL1), From 11af4c37165e36a6090172ded5d06acdf15206da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:29 +0000 Subject: [PATCH 1856/4122] KVM: arm64: PMU: Implement PMUv3p5 long counter support PMUv3p5 (which is mandatory with ARMv8.5) comes with some extra features: - All counters are 64bit - The overflow point is controlled by the PMCR_EL0.LP bit Add the required checks in the helpers that control counter width and overflow, as well as the sysreg handling for the LP bit. A new kvm_pmu_is_3p5() helper makes it easy to spot the PMUv3p5 specific handling. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-14-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 8 +++++--- arch/arm64/kvm/sys_regs.c | 4 ++++ include/kvm/arm_pmu.h | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 94ca2d17a4e4..7e25ff73cbba 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -52,13 +52,15 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX); + return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); } static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX && - __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + + return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b8ac58723459..67eac0f747be 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -654,6 +654,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -703,6 +705,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 812f729c9108..628775334d5e 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +/* + * Evaluates as true when emulating PMUv3p5, and false otherwise. + */ +#define kvm_pmu_is_3p5(vcpu) \ + (vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5) + u8 kvm_arm_pmu_get_pmuver_limit(void); #else @@ -153,6 +159,7 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) } #define kvm_vcpu_has_pmu(vcpu) ({ false; }) +#define kvm_pmu_is_3p5(vcpu) ({ false; }) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} From 1f7c978282855d6b2abd608064004c74902e791d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:30 +0000 Subject: [PATCH 1857/4122] KVM: arm64: PMU: Allow PMUv3p5 to be exposed to the guest Now that the infrastructure is in place, bump the PMU support up to PMUv3p5. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-15-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7e25ff73cbba..be881ae67133 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1057,6 +1057,6 @@ u8 kvm_arm_pmu_get_pmuver_limit(void) tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); tmp = cpuid_feature_cap_perfmon_field(tmp, ID_AA64DFR0_EL1_PMUVer_SHIFT, - ID_AA64DFR0_EL1_PMUVer_V3P4); + ID_AA64DFR0_EL1_PMUVer_V3P5); return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); } From 9bad925dd741408825590eccc495d073cc246de0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:31 +0000 Subject: [PATCH 1858/4122] KVM: arm64: PMU: Simplify vcpu computation on perf overflow notification The way we compute the target vcpu on getting an overflow is a bit odd, as we use the PMC array as an anchor for kvm_pmc_to_vcpu, while we could directly compute the correct address. Get rid of the intermediate step and directly compute the target vcpu. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-16-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index be881ae67133..49a004660497 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -405,11 +405,8 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) { struct kvm_vcpu *vcpu; - struct kvm_pmu *pmu; - - pmu = container_of(work, struct kvm_pmu, overflow_work); - vcpu = kvm_pmc_to_vcpu(pmu->pmc); + vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); kvm_vcpu_kick(vcpu); } From d56bdce586e7fabd2b3339f476e0e4c059b24e19 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:32 +0000 Subject: [PATCH 1859/4122] KVM: arm64: PMU: Make kvm_pmc the main data structure The PMU code has historically been torn between referencing a counter as a pair vcpu+index or as the PMC pointer. Given that it is pretty easy to go from one representation to the other, standardise on the latter which, IMHO, makes the code slightly more readable. YMMV. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-17-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 174 +++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 87 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 49a004660497..3295dea34f4c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -22,9 +22,19 @@ DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); +static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) +{ + return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); +} + +static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) +{ + return &vcpu->arch.pmu.pmc[cnt_idx]; +} + static u32 kvm_pmu_event_mask(struct kvm *kvm) { unsigned int pmuver; @@ -46,38 +56,27 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) } /** - * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter - * @vcpu: The vcpu pointer - * @select_idx: The counter index + * kvm_pmc_is_64bit - determine if counter is 64bit + * @pmc: counter context */ -static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) { - return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); + return (pmc->idx == ARMV8_PMU_CYCLE_IDX || + kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); } -static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) { - u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); - return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || - (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); + return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } -static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && - !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); -} - -static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; - - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); + return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmc_has_64bit_overflow(pmc)); } static u32 counter_index_to_reg(u64 idx) @@ -90,21 +89,12 @@ static u32 counter_index_to_evtreg(u64 idx) return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; } -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 counter, reg, enabled, running; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - - reg = counter_index_to_reg(select_idx); + reg = counter_index_to_reg(pmc->idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -115,25 +105,35 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (!kvm_pmc_is_64bit(pmc)) counter = lower_32_bits(counter); return counter; } -static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, - bool force) +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + + return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); +} + +static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 reg; - if (!kvm_vcpu_has_pmu(vcpu)) - return; + kvm_pmu_release_perf_event(pmc); - kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + reg = counter_index_to_reg(pmc->idx); - reg = counter_index_to_reg(select_idx); - - if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && !force) { /* * Even with PMUv3p5, AArch32 cannot write to the top @@ -148,7 +148,7 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } /** @@ -159,7 +159,10 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, */ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { - kvm_pmu_set_counter(vcpu, select_idx, val, false); + if (!kvm_vcpu_has_pmu(vcpu)) + return; + + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); } /** @@ -181,14 +184,15 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) * * If this counter has been configured to monitor some event, release it here. */ -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 reg, val; if (!pmc->perf_event) return; - val = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_pmc_value(pmc); reg = counter_index_to_reg(pmc->idx); @@ -219,11 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); - struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; for_each_set_bit(i, &mask, 32) - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); + kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); } /** @@ -234,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) - kvm_pmu_release_perf_event(&pmu->pmc[i]); + kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); irq_work_sync(&vcpu->arch.pmu.overflow_work); } @@ -262,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - if (!kvm_vcpu_has_pmu(vcpu)) return; @@ -272,13 +271,15 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (!pmc->perf_event) { - kvm_pmu_create_perf_event(vcpu, i); + kvm_pmu_create_perf_event(pmc); } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) @@ -297,17 +298,17 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (pmc->perf_event) perf_event_disable(pmc->perf_event); @@ -427,6 +428,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); u64 type, reg; /* Filter on event type */ @@ -437,30 +439,30 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; - if (!kvm_pmu_idx_is_64bit(vcpu, i)) + if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ - if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) + if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - if (kvm_pmu_counter_can_chain(vcpu, i)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), ARMV8_PMUV3_PERFCTR_CHAIN); } } /* Compute the sample period for a given counter value */ -static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { - if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) { + if (!kvm_pmc_has_64bit_overflow(pmc)) val = -(counter & GENMASK(31, 0)); else val = (-counter) & GENMASK(63, 0); @@ -490,7 +492,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = compute_period(vcpu, idx, local64_read(&perf_event->count)); + period = compute_period(pmc, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -498,7 +500,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); - if (kvm_pmu_counter_can_chain(vcpu, idx)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), ARMV8_PMUV3_PERFCTR_CHAIN); @@ -551,34 +553,33 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter(vcpu, i, 0, true); + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } } -static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); } /** * kvm_pmu_create_perf_event - create a perf event for a counter - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter + * @pmc: Counter context */ -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; - u64 eventsel, counter, reg, data; + u64 eventsel, reg, data; - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); data = __vcpu_sys_reg(vcpu, reg); - kvm_pmu_stop_counter(vcpu, pmc); + kvm_pmu_stop_counter(pmc); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; else @@ -604,24 +605,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.disabled = !kvm_pmu_counter_is_enabled(pmc); attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_counter_value(vcpu, select_idx); - /* * If counting with a 64bit counter, advertise it to the perf * code, carefully dealing with the initial sample period * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = compute_period(vcpu, select_idx, counter); + attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); @@ -648,6 +647,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); u64 reg, mask; if (!kvm_vcpu_has_pmu(vcpu)) @@ -657,11 +657,11 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } void kvm_host_pmu_init(struct arm_pmu *pmu) From 28927f6c483d4a4c9ba8050f2a0e5af1b3557105 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 7 Oct 2022 17:34:36 +0800 Subject: [PATCH 1860/4122] watchdog: mtk_wdt: Add support for MT6795 Helio X10 watchdog and toprgu Add support for the toprgu reset controller and watchdog for the MediaTek MT6795 SoC. Signed-off-by: AngeloGioacchino Del Regno Co-developed-by: Allen-KH Cheng Signed-off-by: Allen-KH Cheng Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221005113517.70628-6-angelogioacchino.delregno@collabora.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index e97787536792..5fa42b7d4b4d 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -78,6 +79,10 @@ static const struct mtk_wdt_data mt2712_data = { .toprgu_sw_rst_num = MT2712_TOPRGU_SW_RST_NUM, }; +static const struct mtk_wdt_data mt6795_data = { + .toprgu_sw_rst_num = MT6795_TOPRGU_SW_RST_NUM, +}; + static const struct mtk_wdt_data mt7986_data = { .toprgu_sw_rst_num = MT7986_TOPRGU_SW_RST_NUM, }; @@ -426,6 +431,7 @@ static int mtk_wdt_resume(struct device *dev) static const struct of_device_id mtk_wdt_dt_ids[] = { { .compatible = "mediatek,mt2712-wdt", .data = &mt2712_data }, { .compatible = "mediatek,mt6589-wdt" }, + { .compatible = "mediatek,mt6795-wdt", .data = &mt6795_data }, { .compatible = "mediatek,mt7986-wdt", .data = &mt7986_data }, { .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data }, { .compatible = "mediatek,mt8186-wdt", .data = &mt8186_data }, From 52f46a6aea59caf63798b2dd09050d4806398890 Mon Sep 17 00:00:00 2001 From: Runyang Chen Date: Wed, 26 Oct 2022 14:33:25 +0800 Subject: [PATCH 1861/4122] dt-bindings: watchdog: Add compatible for MediaTek MT8188 Add dt-binding documentation of watchdog for MediaTek MT8188 Soc Signed-off-by: Runyang Chen Acked-by: Krzysztof Kozlowski Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221026063327.20037-2-Runyang.Chen@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/mtk-wdt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt index 762c62e428ef..b900c85d4560 100644 --- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt @@ -17,6 +17,7 @@ Required properties: "mediatek,mt7986-wdt", "mediatek,mt6589-wdt": for MT7986 "mediatek,mt8183-wdt": for MT8183 "mediatek,mt8186-wdt", "mediatek,mt6589-wdt": for MT8186 + "mediatek,mt8188-wdt", "mediatek,mt6589-wdt": for MT8188 "mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516 "mediatek,mt8192-wdt": for MT8192 "mediatek,mt8195-wdt", "mediatek,mt6589-wdt": for MT8195 From fea58041af4c1a8ac2e8b461a772599e205d0d1f Mon Sep 17 00:00:00 2001 From: Runyang Chen Date: Wed, 26 Oct 2022 14:33:26 +0800 Subject: [PATCH 1862/4122] dt-bindings: reset: mt8188: add toprgu reset-controller header file Add toprgu reset-controller header file for MT8188 Signed-off-by: Runyang Chen Acked-by: Rob Herring Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221026063327.20037-3-Runyang.Chen@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/dt-bindings/reset/mt8188-resets.h | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/dt-bindings/reset/mt8188-resets.h diff --git a/include/dt-bindings/reset/mt8188-resets.h b/include/dt-bindings/reset/mt8188-resets.h new file mode 100644 index 000000000000..377cdfda82a9 --- /dev/null +++ b/include/dt-bindings/reset/mt8188-resets.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)*/ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Runyang Chen + */ + +#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8188 +#define _DT_BINDINGS_RESET_CONTROLLER_MT8188 + +#define MT8188_TOPRGU_CONN_MCU_SW_RST 0 +#define MT8188_TOPRGU_INFRA_GRST_SW_RST 1 +#define MT8188_TOPRGU_IPU0_SW_RST 2 +#define MT8188_TOPRGU_IPU1_SW_RST 3 +#define MT8188_TOPRGU_IPU2_SW_RST 4 +#define MT8188_TOPRGU_AUD_ASRC_SW_RST 5 +#define MT8188_TOPRGU_INFRA_SW_RST 6 +#define MT8188_TOPRGU_MMSYS_SW_RST 7 +#define MT8188_TOPRGU_MFG_SW_RST 8 +#define MT8188_TOPRGU_VENC_SW_RST 9 +#define MT8188_TOPRGU_VDEC_SW_RST 10 +#define MT8188_TOPRGU_CAM_VCORE_SW_RST 11 +#define MT8188_TOPRGU_SCP_SW_RST 12 +#define MT8188_TOPRGU_APMIXEDSYS_SW_RST 13 +#define MT8188_TOPRGU_AUDIO_SW_RST 14 +#define MT8188_TOPRGU_CAMSYS_SW_RST 15 +#define MT8188_TOPRGU_MJC_SW_RST 16 +#define MT8188_TOPRGU_PERI_SW_RST 17 +#define MT8188_TOPRGU_PERI_AO_SW_RST 18 +#define MT8188_TOPRGU_PCIE_SW_RST 19 +#define MT8188_TOPRGU_ADSPSYS_SW_RST 21 +#define MT8188_TOPRGU_DPTX_SW_RST 22 +#define MT8188_TOPRGU_SPMI_MST_SW_RST 23 + +#define MT8188_TOPRGU_SW_RST_NUM 24 + +#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8188 */ From bc7313652a6370df2f73e146483abfa5a69b85cf Mon Sep 17 00:00:00 2001 From: Runyang Chen Date: Wed, 26 Oct 2022 14:33:27 +0800 Subject: [PATCH 1863/4122] watchdog: mediatek: mt8188: add wdt support Support MT8188 watchdog device. Signed-off-by: Runyang Chen Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Guenter Roeck Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221026063327.20037-4-Runyang.Chen@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index 5fa42b7d4b4d..3e6212591e69 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,10 @@ static const struct mtk_wdt_data mt8186_data = { .toprgu_sw_rst_num = MT8186_TOPRGU_SW_RST_NUM, }; +static const struct mtk_wdt_data mt8188_data = { + .toprgu_sw_rst_num = MT8188_TOPRGU_SW_RST_NUM, +}; + static const struct mtk_wdt_data mt8192_data = { .toprgu_sw_rst_num = MT8192_TOPRGU_SW_RST_NUM, }; @@ -435,6 +440,7 @@ static const struct of_device_id mtk_wdt_dt_ids[] = { { .compatible = "mediatek,mt7986-wdt", .data = &mt7986_data }, { .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data }, { .compatible = "mediatek,mt8186-wdt", .data = &mt8186_data }, + { .compatible = "mediatek,mt8188-wdt", .data = &mt8188_data }, { .compatible = "mediatek,mt8192-wdt", .data = &mt8192_data }, { .compatible = "mediatek,mt8195-wdt", .data = &mt8195_data }, { /* sentinel */ } From 4d1363a46cdfcc00460adf1f0fcb81bb5ba69d94 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 7 Oct 2022 17:34:34 +0800 Subject: [PATCH 1864/4122] dt-bindings: watchdog: mediatek: Convert mtk-wdt to json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the MediaTek watchdog bindings to schema. The original binding only had 4 without a fallback but there is a reset controller on the "mediatek,mt7986-wdt", "mediatek,mt8186-wdt", "mediatek,mt8188-wdt" and "mediatek,mt8195-wdt" Since there is no reset controller for the mt6589, we remove "mediatek,mt6589-wdt" as a fallback. Signed-off-by: AngeloGioacchino Del Regno Co-developed-by: Allen-KH Cheng Signed-off-by: Allen-KH Cheng Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20221101090116.27130-6-allen-kh.cheng@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/mediatek,mtk-wdt.yaml | 78 +++++++++++++++++++ .../devicetree/bindings/watchdog/mtk-wdt.txt | 43 ---------- 2 files changed, 78 insertions(+), 43 deletions(-) create mode 100644 Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml delete mode 100644 Documentation/devicetree/bindings/watchdog/mtk-wdt.txt diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml new file mode 100644 index 000000000000..b0d71fa28ac3 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/mediatek,mtk-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek SoCs Watchdog timer + +maintainers: + - Matthias Brugger + +description: + The watchdog supports a pre-timeout interrupt that fires + timeout-sec/2 before the expiry. + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + oneOf: + - enum: + - mediatek,mt2712-wdt + - mediatek,mt6589-wdt + - mediatek,mt7986-wdt + - mediatek,mt8183-wdt + - mediatek,mt8186-wdt + - mediatek,mt8188-wdt + - mediatek,mt8192-wdt + - mediatek,mt8195-wdt + - items: + - enum: + - mediatek,mt2701-wdt + - mediatek,mt6582-wdt + - mediatek,mt6797-wdt + - mediatek,mt7622-wdt + - mediatek,mt7623-wdt + - mediatek,mt7629-wdt + - mediatek,mt8516-wdt + - const: mediatek,mt6589-wdt + + reg: + maxItems: 1 + + interrupts: + items: + - description: Watchdog pre-timeout (bark) interrupt + + mediatek,disable-extrst: + description: Disable sending output reset signal + type: boolean + + '#reset-cells': + const: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + watchdog: watchdog@10007000 { + compatible = "mediatek,mt8183-wdt"; + reg = <0 0x10007000 0 0x100>; + interrupts = ; + mediatek,disable-extrst; + timeout-sec = <10>; + #reset-cells = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt deleted file mode 100644 index b900c85d4560..000000000000 --- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt +++ /dev/null @@ -1,43 +0,0 @@ -Mediatek SoCs Watchdog timer - -The watchdog supports a pre-timeout interrupt that fires timeout-sec/2 -before the expiry. - -Required properties: - -- compatible should contain: - "mediatek,mt2701-wdt", "mediatek,mt6589-wdt": for MT2701 - "mediatek,mt2712-wdt": for MT2712 - "mediatek,mt6582-wdt", "mediatek,mt6589-wdt": for MT6582 - "mediatek,mt6589-wdt": for MT6589 - "mediatek,mt6797-wdt", "mediatek,mt6589-wdt": for MT6797 - "mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622 - "mediatek,mt7623-wdt", "mediatek,mt6589-wdt": for MT7623 - "mediatek,mt7629-wdt", "mediatek,mt6589-wdt": for MT7629 - "mediatek,mt7986-wdt", "mediatek,mt6589-wdt": for MT7986 - "mediatek,mt8183-wdt": for MT8183 - "mediatek,mt8186-wdt", "mediatek,mt6589-wdt": for MT8186 - "mediatek,mt8188-wdt", "mediatek,mt6589-wdt": for MT8188 - "mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516 - "mediatek,mt8192-wdt": for MT8192 - "mediatek,mt8195-wdt", "mediatek,mt6589-wdt": for MT8195 - -- reg : Specifies base physical address and size of the registers. - -Optional properties: -- mediatek,disable-extrst: disable send output reset signal -- interrupts: Watchdog pre-timeout (bark) interrupt. -- timeout-sec: contains the watchdog timeout in seconds. -- #reset-cells: Should be 1. - -Example: - -watchdog: watchdog@10007000 { - compatible = "mediatek,mt8183-wdt", - "mediatek,mt6589-wdt"; - mediatek,disable-extrst; - reg = <0 0x10007000 0 0x100>; - interrupts = ; - timeout-sec = <10>; - #reset-cells = <1>; -}; From c389e1f5068f13aa9891fead50dc83a747426bc6 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 7 Oct 2022 17:34:35 +0800 Subject: [PATCH 1865/4122] dt-bindings: watchdog: mediatek,mtk-wdt: Add compatible for MT6795 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the mediatek,mt6795-wdt compatible. Signed-off-by: AngeloGioacchino Del Regno Co-developed-by: Allen-KH Cheng Signed-off-by: Allen-KH Cheng Acked-by: Rob Herring Reviewed-by: Guenter Roeck Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20221101090116.27130-7-allen-kh.cheng@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml index b0d71fa28ac3..36606524d869 100644 --- a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml @@ -22,6 +22,7 @@ properties: - enum: - mediatek,mt2712-wdt - mediatek,mt6589-wdt + - mediatek,mt6795-wdt - mediatek,mt7986-wdt - mediatek,mt8183-wdt - mediatek,mt8186-wdt From 1d8e67ecf114ef4140a1df7f1581e0e2cab6739a Mon Sep 17 00:00:00 2001 From: Allen-KH Cheng Date: Fri, 7 Oct 2022 17:34:37 +0800 Subject: [PATCH 1866/4122] dt-bindings: watchdog: mediatek,mtk-wdt: Add compatible for MT8173 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the mediatek,mt8173-wdt compatible using mediatek,mt6589-wdt as fallback. Signed-off-by: Allen-KH Cheng Reviewed-by: AngeloGioacchino Del Regno Acked-by: Rob Herring Reviewed-by: Guenter Roeck Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20221101090116.27130-8-allen-kh.cheng@mediatek.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml index 36606524d869..b3605608410c 100644 --- a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml @@ -37,6 +37,7 @@ properties: - mediatek,mt7622-wdt - mediatek,mt7623-wdt - mediatek,mt7629-wdt + - mediatek,mt8173-wdt - mediatek,mt8516-wdt - const: mediatek,mt6589-wdt From 08435c2aab3aea369182bdec3c71ab78b15f8c82 Mon Sep 17 00:00:00 2001 From: Thomas Kastner Date: Wed, 19 Oct 2022 09:39:03 +0200 Subject: [PATCH 1867/4122] watchdog: Add Advantech EC watchdog driver This patch adds the 'advantech_ec_wdt' kernel module which provides WDT support for Advantech platforms with ITE based Embedded Controller. Signed-off-by: Thomas Kastner Reviewed-by: Guenter Roeck Tested-by: Thomas Kastner Link: https://lore.kernel.org/r/Y0+pl/26e3pcEUPk@EIS-S230 Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 7 + drivers/watchdog/Makefile | 1 + drivers/watchdog/advantech_ec_wdt.c | 205 ++++++++++++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 drivers/watchdog/advantech_ec_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b64bc49c7f30..0bc40b763b06 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1055,6 +1055,13 @@ config ADVANTECH_WDT feature. More information can be found at +config ADVANTECH_EC_WDT + tristate "Advantech Embedded Controller Watchdog Timer" + depends on X86 + help + This driver supports Advantech products with ITE based Embedded Controller. + It does not support Advantech products with other ECs or without EC. + config ALIM1535_WDT tristate "ALi M1535 PMU Watchdog Timer" depends on X86 && PCI diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index d41e5f830ae7..9cbf6580f16c 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_SUNPLUS_WATCHDOG) += sunplus_wdt.o # X86 (i386 + ia64 + x86_64) Architecture obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o +obj-$(CONFIG_ADVANTECH_EC_WDT) += advantech_ec_wdt.o obj-$(CONFIG_ALIM1535_WDT) += alim1535_wdt.o obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o obj-$(CONFIG_EBC_C384_WDT) += ebc-c384_wdt.o diff --git a/drivers/watchdog/advantech_ec_wdt.c b/drivers/watchdog/advantech_ec_wdt.c new file mode 100644 index 000000000000..7c380f90ca58 --- /dev/null +++ b/drivers/watchdog/advantech_ec_wdt.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Advantech Embedded Controller Watchdog Driver + * + * This driver supports Advantech products with ITE based Embedded Controller. + * It does not support Advantech products with other ECs or without EC. + * + * Copyright (C) 2022 Advantech Europe B.V. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "advantech_ec_wdt" + +/* EC IO region */ +#define EC_BASE_ADDR 0x299 +#define EC_ADDR_EXTENT 2 + +/* EC minimum IO access delay in ms */ +#define EC_MIN_DELAY 10 + +/* EC interface definitions */ +#define EC_ADDR_CMD (EC_BASE_ADDR + 1) +#define EC_ADDR_DATA EC_BASE_ADDR +#define EC_CMD_EC_PROBE 0x30 +#define EC_CMD_COMM 0x89 +#define EC_CMD_WDT_START 0x28 +#define EC_CMD_WDT_STOP 0x29 +#define EC_CMD_WDT_RESET 0x2A +#define EC_DAT_EN_DLY_H 0x58 +#define EC_DAT_EN_DLY_L 0x59 +#define EC_DAT_RST_DLY_H 0x5E +#define EC_DAT_RST_DLY_L 0x5F +#define EC_MAGIC 0x95 + +/* module parameters */ +#define MIN_TIME 1 +#define MAX_TIME 6000 /* 100 minutes */ +#define DEFAULT_TIME 60 + +static unsigned int timeout; +static ktime_t ec_timestamp; + +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, + "Default Watchdog timer setting (" __MODULE_STRING(DEFAULT_TIME) "s). The range is from " __MODULE_STRING(MIN_TIME) " to " __MODULE_STRING(MAX_TIME) "."); + +static void adv_ec_wdt_timing_gate(void) +{ + ktime_t time_cur, time_delta; + + /* ensure minimum delay between IO accesses*/ + time_cur = ktime_get(); + time_delta = ktime_to_ms(ktime_sub(time_cur, ec_timestamp)); + if (time_delta < EC_MIN_DELAY) { + time_delta = EC_MIN_DELAY - time_delta; + usleep_range(time_delta * 1000, (time_delta + 1) * 1000); + } + ec_timestamp = ktime_get(); +} + +static void adv_ec_wdt_outb(unsigned char value, unsigned short port) +{ + adv_ec_wdt_timing_gate(); + outb(value, port); +} + +static unsigned char adv_ec_wdt_inb(unsigned short port) +{ + adv_ec_wdt_timing_gate(); + return inb(port); +} + +static int adv_ec_wdt_ping(struct watchdog_device *wdd) +{ + adv_ec_wdt_outb(EC_CMD_WDT_RESET, EC_ADDR_CMD); + return 0; +} + +static int adv_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t) +{ + unsigned int val; + + /* scale time to EC 100 ms base */ + val = t * 10; + + /* reset enable delay, just in case it was set by BIOS etc. */ + adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD); + adv_ec_wdt_outb(EC_DAT_EN_DLY_H, EC_ADDR_DATA); + adv_ec_wdt_outb(0, EC_ADDR_DATA); + + adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD); + adv_ec_wdt_outb(EC_DAT_EN_DLY_L, EC_ADDR_DATA); + adv_ec_wdt_outb(0, EC_ADDR_DATA); + + /* set reset delay */ + adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD); + adv_ec_wdt_outb(EC_DAT_RST_DLY_H, EC_ADDR_DATA); + adv_ec_wdt_outb(val >> 8, EC_ADDR_DATA); + + adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD); + adv_ec_wdt_outb(EC_DAT_RST_DLY_L, EC_ADDR_DATA); + adv_ec_wdt_outb(val & 0xFF, EC_ADDR_DATA); + + wdd->timeout = t; + return 0; +} + +static int adv_ec_wdt_start(struct watchdog_device *wdd) +{ + adv_ec_wdt_set_timeout(wdd, wdd->timeout); + adv_ec_wdt_outb(EC_CMD_WDT_START, EC_ADDR_CMD); + + return 0; +} + +static int adv_ec_wdt_stop(struct watchdog_device *wdd) +{ + adv_ec_wdt_outb(EC_CMD_WDT_STOP, EC_ADDR_CMD); + + return 0; +} + +static const struct watchdog_info adv_ec_wdt_info = { + .identity = DRIVER_NAME, + .options = WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, +}; + +static const struct watchdog_ops adv_ec_wdt_ops = { + .owner = THIS_MODULE, + .start = adv_ec_wdt_start, + .stop = adv_ec_wdt_stop, + .ping = adv_ec_wdt_ping, + .set_timeout = adv_ec_wdt_set_timeout, +}; + +static struct watchdog_device adv_ec_wdt_dev = { + .info = &adv_ec_wdt_info, + .ops = &adv_ec_wdt_ops, + .min_timeout = MIN_TIME, + .max_timeout = MAX_TIME, + .timeout = DEFAULT_TIME, +}; + +static int adv_ec_wdt_probe(struct device *dev, unsigned int id) +{ + if (!devm_request_region(dev, EC_BASE_ADDR, EC_ADDR_EXTENT, dev_name(dev))) { + dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n", + EC_BASE_ADDR, EC_BASE_ADDR + EC_ADDR_EXTENT); + return -EBUSY; + } + + watchdog_init_timeout(&adv_ec_wdt_dev, timeout, dev); + watchdog_stop_on_reboot(&adv_ec_wdt_dev); + watchdog_stop_on_unregister(&adv_ec_wdt_dev); + + return devm_watchdog_register_device(dev, &adv_ec_wdt_dev); +} + +static struct isa_driver adv_ec_wdt_driver = { + .probe = adv_ec_wdt_probe, + .driver = { + .name = DRIVER_NAME, + }, +}; + +static int __init adv_ec_wdt_init(void) +{ + unsigned int val; + + /* quick probe for EC */ + if (!request_region(EC_BASE_ADDR, EC_ADDR_EXTENT, DRIVER_NAME)) + return -EBUSY; + + adv_ec_wdt_outb(EC_CMD_EC_PROBE, EC_ADDR_CMD); + val = adv_ec_wdt_inb(EC_ADDR_DATA); + release_region(EC_BASE_ADDR, EC_ADDR_EXTENT); + + if (val != EC_MAGIC) + return -ENODEV; + + return isa_register_driver(&adv_ec_wdt_driver, 1); +} + +static void __exit adv_ec_wdt_exit(void) +{ + isa_unregister_driver(&adv_ec_wdt_driver); +} + +module_init(adv_ec_wdt_init); +module_exit(adv_ec_wdt_exit); + +MODULE_AUTHOR("Thomas Kastner "); +MODULE_DESCRIPTION("Advantech Embedded Controller Watchdog Device Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("20221019"); +MODULE_ALIAS("isa:" DRIVER_NAME); From b49e2a3cfb84290b878999ade1410a3edb65706c Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Oct 2022 19:50:44 +0100 Subject: [PATCH 1868/4122] watchdog: at91rm9200: Remove #ifdef guards for PM related functions Use the pm_ptr() macro to handle the .suspend/.resume callbacks. This macro allows the suspend and resume functions to be automatically dropped by the compiler when CONFIG_SUSPEND is disabled, without having to use #ifdef guards. Not using #ifdef guards means that the code is always compiled independently of any Kconfig option, and thanks to that bugs and regressions are easier to catch. Signed-off-by: Paul Cercueil Reviewed-by: Guenter Roeck Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221020185047.1001522-2-paul@crapouillou.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/at91rm9200_wdt.c | 11 ++--------- drivers/watchdog/db8500_wdt.c | 9 ++------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 6d751eb8191d..5126454bb861 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -278,8 +278,6 @@ static void at91wdt_shutdown(struct platform_device *pdev) at91_wdt_stop(); } -#ifdef CONFIG_PM - static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message) { at91_wdt_stop(); @@ -293,11 +291,6 @@ static int at91wdt_resume(struct platform_device *pdev) return 0; } -#else -#define at91wdt_suspend NULL -#define at91wdt_resume NULL -#endif - static const struct of_device_id at91_wdt_dt_ids[] = { { .compatible = "atmel,at91rm9200-wdt" }, { /* sentinel */ } @@ -308,8 +301,8 @@ static struct platform_driver at91wdt_driver = { .probe = at91wdt_probe, .remove = at91wdt_remove, .shutdown = at91wdt_shutdown, - .suspend = at91wdt_suspend, - .resume = at91wdt_resume, + .suspend = pm_ptr(at91wdt_suspend), + .resume = pm_ptr(at91wdt_resume), .driver = { .name = "atmel_st_watchdog", .of_match_table = at91_wdt_dt_ids, diff --git a/drivers/watchdog/db8500_wdt.c b/drivers/watchdog/db8500_wdt.c index 6ed8b63d310d..97148ac0aa54 100644 --- a/drivers/watchdog/db8500_wdt.c +++ b/drivers/watchdog/db8500_wdt.c @@ -105,7 +105,6 @@ static int db8500_wdt_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM static int db8500_wdt_suspend(struct platform_device *pdev, pm_message_t state) { @@ -130,15 +129,11 @@ static int db8500_wdt_resume(struct platform_device *pdev) } return 0; } -#else -#define db8500_wdt_suspend NULL -#define db8500_wdt_resume NULL -#endif static struct platform_driver db8500_wdt_driver = { .probe = db8500_wdt_probe, - .suspend = db8500_wdt_suspend, - .resume = db8500_wdt_resume, + .suspend = pm_ptr(db8500_wdt_suspend), + .resume = pm_ptr(db8500_wdt_resume), .driver = { .name = "db8500_wdt", }, From d36eda79c600518fb6bc8ad9e3f2f5f201ec1fb9 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Oct 2022 19:50:45 +0100 Subject: [PATCH 1869/4122] watchdog: twl4030: Remove #ifdef guards for PM related functions Use the pm_ptr() macro to handle the .suspend/.resume callbacks. This macro allows the suspend and resume functions to be automatically dropped by the compiler when CONFIG_SUSPEND is disabled, without having to use #ifdef guards. Not using #ifdef guards means that the code is always compiled independently of any Kconfig option, and thanks to that bugs and regressions are easier to catch. Signed-off-by: Paul Cercueil Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221020185047.1001522-3-paul@crapouillou.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/twl4030_wdt.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c index 36b4a660928d..09d17e20f4a7 100644 --- a/drivers/watchdog/twl4030_wdt.c +++ b/drivers/watchdog/twl4030_wdt.c @@ -81,7 +81,6 @@ static int twl4030_wdt_probe(struct platform_device *pdev) return devm_watchdog_register_device(dev, wdt); } -#ifdef CONFIG_PM static int twl4030_wdt_suspend(struct platform_device *pdev, pm_message_t state) { struct watchdog_device *wdt = platform_get_drvdata(pdev); @@ -99,10 +98,6 @@ static int twl4030_wdt_resume(struct platform_device *pdev) return 0; } -#else -#define twl4030_wdt_suspend NULL -#define twl4030_wdt_resume NULL -#endif static const struct of_device_id twl_wdt_of_match[] = { { .compatible = "ti,twl4030-wdt", }, @@ -112,8 +107,8 @@ MODULE_DEVICE_TABLE(of, twl_wdt_of_match); static struct platform_driver twl4030_wdt_driver = { .probe = twl4030_wdt_probe, - .suspend = twl4030_wdt_suspend, - .resume = twl4030_wdt_resume, + .suspend = pm_ptr(twl4030_wdt_suspend), + .resume = pm_ptr(twl4030_wdt_resume), .driver = { .name = "twl4030_wdt", .of_match_table = twl_wdt_of_match, From 0327476d6ef32c347e1590e6215616adc847afe1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Oct 2022 19:50:46 +0100 Subject: [PATCH 1870/4122] watchdog: omap: Remove #ifdef guards for PM related functions Use the pm_ptr() macro to handle the .suspend/.resume callbacks. This macro allows the suspend and resume functions to be automatically dropped by the compiler when CONFIG_SUSPEND is disabled, without having to use #ifdef guards. Not using #ifdef guards means that the code is always compiled independently of any Kconfig option, and thanks to that bugs and regressions are easier to catch. Signed-off-by: Paul Cercueil Acked-by: Aaro Koskinen Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221020185047.1001522-4-paul@crapouillou.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/omap_wdt.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 74d785b2b478..e75aa86f63cb 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -316,8 +316,6 @@ static int omap_wdt_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM - /* REVISIT ... not clear this is the best way to handle system suspend; and * it's very inappropriate for selective device suspend (e.g. suspending this * through sysfs rather than by stopping the watchdog daemon). Also, this @@ -353,11 +351,6 @@ static int omap_wdt_resume(struct platform_device *pdev) return 0; } -#else -#define omap_wdt_suspend NULL -#define omap_wdt_resume NULL -#endif - static const struct of_device_id omap_wdt_of_match[] = { { .compatible = "ti,omap3-wdt", }, {}, @@ -368,8 +361,8 @@ static struct platform_driver omap_wdt_driver = { .probe = omap_wdt_probe, .remove = omap_wdt_remove, .shutdown = omap_wdt_shutdown, - .suspend = omap_wdt_suspend, - .resume = omap_wdt_resume, + .suspend = pm_ptr(omap_wdt_suspend), + .resume = pm_ptr(omap_wdt_resume), .driver = { .name = "omap_wdt", .of_match_table = omap_wdt_of_match, From 758f46c2e67c4901ea49e684e12adb698b1b2bbd Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Oct 2022 19:50:47 +0100 Subject: [PATCH 1871/4122] watchdog: kempld: Remove #ifdef guards for PM related functions Use the pm_ptr() macro to handle the .suspend/.resume callbacks. This macro allows the suspend and resume functions to be automatically dropped by the compiler when CONFIG_SUSPEND is disabled, without having to use #ifdef guards. Not using #ifdef guards means that the code is always compiled independently of any Kconfig option, and thanks to that bugs and regressions are easier to catch. Signed-off-by: Paul Cercueil Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221020185047.1001522-5-paul@crapouillou.net Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/kempld_wdt.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c index 40bd518ed873..e6c7a2906680 100644 --- a/drivers/watchdog/kempld_wdt.c +++ b/drivers/watchdog/kempld_wdt.c @@ -75,9 +75,7 @@ struct kempld_wdt_data { struct watchdog_device wdd; unsigned int pretimeout; struct kempld_wdt_stage stage[KEMPLD_WDT_MAX_STAGES]; -#ifdef CONFIG_PM u8 pm_status_store; -#endif }; #define DEFAULT_TIMEOUT 30 /* seconds */ @@ -495,7 +493,6 @@ static int kempld_wdt_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM /* Disable watchdog if it is active during suspend */ static int kempld_wdt_suspend(struct platform_device *pdev, pm_message_t message) @@ -531,18 +528,14 @@ static int kempld_wdt_resume(struct platform_device *pdev) else return kempld_wdt_stop(wdd); } -#else -#define kempld_wdt_suspend NULL -#define kempld_wdt_resume NULL -#endif static struct platform_driver kempld_wdt_driver = { .driver = { .name = "kempld-wdt", }, .probe = kempld_wdt_probe, - .suspend = kempld_wdt_suspend, - .resume = kempld_wdt_resume, + .suspend = pm_ptr(kempld_wdt_suspend), + .resume = pm_ptr(kempld_wdt_resume), }; module_platform_driver(kempld_wdt_driver); From 47c008050aec3e9a13af29dd74cd8b4c112bc07b Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 28 Oct 2022 09:50:19 +0200 Subject: [PATCH 1872/4122] watchdog: rn5t618: add support for read out bootstatus The PMIC does store the power-off factor internally. Read it out and report it as bootstatus. Signed-off-by: Marcus Folkesson Acked-by: Lee Jones Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221028075019.2757812-1-marcus.folkesson@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rn5t618_wdt.c | 12 ++++++++++++ include/linux/mfd/rn5t618.h | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c index 6e524c8e26a8..40d8ebd8c0ac 100644 --- a/drivers/watchdog/rn5t618_wdt.c +++ b/drivers/watchdog/rn5t618_wdt.c @@ -144,6 +144,8 @@ static int rn5t618_wdt_probe(struct platform_device *pdev) struct rn5t618 *rn5t618 = dev_get_drvdata(dev->parent); struct rn5t618_wdt *wdt; int min_timeout, max_timeout; + int ret; + unsigned int val; wdt = devm_kzalloc(dev, sizeof(struct rn5t618_wdt), GFP_KERNEL); if (!wdt) @@ -160,6 +162,16 @@ static int rn5t618_wdt_probe(struct platform_device *pdev) wdt->wdt_dev.timeout = max_timeout; wdt->wdt_dev.parent = dev; + /* Read out previous power-off factor */ + ret = regmap_read(wdt->rn5t618->regmap, RN5T618_POFFHIS, &val); + if (ret) + return ret; + + if (val & RN5T618_POFFHIS_VINDET) + wdt->wdt_dev.bootstatus = WDIOF_POWERUNDER; + else if (val & RN5T618_POFFHIS_WDG) + wdt->wdt_dev.bootstatus = WDIOF_CARDRESET; + watchdog_set_drvdata(&wdt->wdt_dev, wdt); watchdog_init_timeout(&wdt->wdt_dev, timeout, dev); watchdog_set_nowayout(&wdt->wdt_dev, nowayout); diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h index 8aa0bda1af4f..aacb6d51e99c 100644 --- a/include/linux/mfd/rn5t618.h +++ b/include/linux/mfd/rn5t618.h @@ -227,6 +227,15 @@ #define RN5T618_WATCHDOG_WDOGTIM_S 0 #define RN5T618_PWRIRQ_IR_WDOG BIT(6) +#define RN5T618_POFFHIS_PWRON BIT(0) +#define RN5T618_POFFHIS_TSHUT BIT(1) +#define RN5T618_POFFHIS_VINDET BIT(2) +#define RN5T618_POFFHIS_IODET BIT(3) +#define RN5T618_POFFHIS_CPU BIT(4) +#define RN5T618_POFFHIS_WDG BIT(5) +#define RN5T618_POFFHIS_DCLIM BIT(6) +#define RN5T618_POFFHIS_N_OE BIT(7) + enum { RN5T618_DCDC1, RN5T618_DCDC2, From ef9b7bf52c2f47f0a9bf988543c577b92c92d15e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 28 Oct 2022 09:27:50 +0300 Subject: [PATCH 1873/4122] watchdog: iTCO_wdt: Set NO_REBOOT if the watchdog is not already running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daniel reported that the commit 1ae3e78c0820 ("watchdog: iTCO_wdt: No need to stop the timer in probe") makes QEMU implementation of the iTCO watchdog not to trigger reboot anymore when NO_REBOOT flag is initially cleared using this option (in QEMU command line): -global ICH9-LPC.noreboot=false The problem with the commit is that it left the unconditional setting of NO_REBOOT that is not cleared anymore when the kernel keeps pinging the watchdog (as opposed to the previous code that called iTCO_wdt_stop() that cleared it). Fix this so that we only set NO_REBOOT if the watchdog was not initially running. Fixes: 1ae3e78c0820 ("watchdog: iTCO_wdt: No need to stop the timer in probe") Reported-by: Daniel P. Berrangé Signed-off-by: Mika Westerberg Tested-by: Daniel P. Berrangé Reviewed-by: Daniel P. Berrangé Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221028062750.45451-1-mika.westerberg@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 34693f11385f..e937b4dd28be 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -423,14 +423,18 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev) return time_left; } -static void iTCO_wdt_set_running(struct iTCO_wdt_private *p) +/* Returns true if the watchdog was running */ +static bool iTCO_wdt_set_running(struct iTCO_wdt_private *p) { u16 val; - /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is * enabled */ + /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is enabled */ val = inw(TCO1_CNT(p)); - if (!(val & BIT(11))) + if (!(val & BIT(11))) { set_bit(WDOG_HW_RUNNING, &p->wddev.status); + return true; + } + return false; } /* @@ -518,9 +522,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev) return -ENODEV; /* Cannot reset NO_REBOOT bit */ } - /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ - p->update_no_reboot_bit(p->no_reboot_priv, true); - if (turn_SMI_watchdog_clear_off >= p->iTCO_version) { /* * Bit 13: TCO_EN -> 0 @@ -572,7 +573,13 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_set_drvdata(&p->wddev, p); platform_set_drvdata(pdev, p); - iTCO_wdt_set_running(p); + if (!iTCO_wdt_set_running(p)) { + /* + * If the watchdog was not running set NO_REBOOT now to + * prevent later reboots. + */ + p->update_no_reboot_bit(p->no_reboot_priv, true); + } /* Check that the heartbeat value is within it's range; if not reset to the default */ From 9ec0b7e06835b857f892feb2fe6121db1393425d Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 1 Nov 2022 15:53:37 -0500 Subject: [PATCH 1874/4122] watchdog: aspeed: Enable pre-timeout interrupt Enable the core pre-timeout interrupt on AST2500 and AST2600. Signed-off-by: Eddie James Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221101205338.577427-2-eajames@linux.ibm.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 104 ++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index 0cff2adfbfc9..86b5331bc491 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -5,11 +5,14 @@ * Joel Stanley */ +#include #include +#include #include #include #include #include +#include #include #include @@ -18,28 +21,41 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +struct aspeed_wdt_config { + u32 ext_pulse_width_mask; + u32 irq_shift; + u32 irq_mask; +}; + struct aspeed_wdt { struct watchdog_device wdd; void __iomem *base; u32 ctrl; -}; - -struct aspeed_wdt_config { - u32 ext_pulse_width_mask; + const struct aspeed_wdt_config *cfg; }; static const struct aspeed_wdt_config ast2400_config = { .ext_pulse_width_mask = 0xff, + .irq_shift = 0, + .irq_mask = 0, }; static const struct aspeed_wdt_config ast2500_config = { .ext_pulse_width_mask = 0xfffff, + .irq_shift = 12, + .irq_mask = GENMASK(31, 12), +}; + +static const struct aspeed_wdt_config ast2600_config = { + .ext_pulse_width_mask = 0xfffff, + .irq_shift = 0, + .irq_mask = GENMASK(31, 10), }; static const struct of_device_id aspeed_wdt_of_table[] = { { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config }, { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config }, - { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config }, + { .compatible = "aspeed,ast2600-wdt", .data = &ast2600_config }, { }, }; MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); @@ -58,6 +74,7 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_CTRL_RESET_SYSTEM BIT(1) #define WDT_CTRL_ENABLE BIT(0) #define WDT_TIMEOUT_STATUS 0x10 +#define WDT_TIMEOUT_STATUS_IRQ BIT(2) #define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) #define WDT_CLEAR_TIMEOUT_STATUS 0x14 #define WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION BIT(0) @@ -160,6 +177,26 @@ static int aspeed_wdt_set_timeout(struct watchdog_device *wdd, return 0; } +static int aspeed_wdt_set_pretimeout(struct watchdog_device *wdd, + unsigned int pretimeout) +{ + struct aspeed_wdt *wdt = to_aspeed_wdt(wdd); + u32 actual = pretimeout * WDT_RATE_1MHZ; + u32 s = wdt->cfg->irq_shift; + u32 m = wdt->cfg->irq_mask; + + wdd->pretimeout = pretimeout; + wdt->ctrl &= ~m; + if (pretimeout) + wdt->ctrl |= ((actual << s) & m) | WDT_CTRL_WDT_INTR; + else + wdt->ctrl &= ~WDT_CTRL_WDT_INTR; + + writel(wdt->ctrl, wdt->base + WDT_CTRL); + + return 0; +} + static int aspeed_wdt_restart(struct watchdog_device *wdd, unsigned long action, void *data) { @@ -232,6 +269,7 @@ static const struct watchdog_ops aspeed_wdt_ops = { .stop = aspeed_wdt_stop, .ping = aspeed_wdt_ping, .set_timeout = aspeed_wdt_set_timeout, + .set_pretimeout = aspeed_wdt_set_pretimeout, .restart = aspeed_wdt_restart, .owner = THIS_MODULE, }; @@ -243,10 +281,29 @@ static const struct watchdog_info aspeed_wdt_info = { .identity = KBUILD_MODNAME, }; +static const struct watchdog_info aspeed_wdt_pretimeout_info = { + .options = WDIOF_KEEPALIVEPING + | WDIOF_PRETIMEOUT + | WDIOF_MAGICCLOSE + | WDIOF_SETTIMEOUT, + .identity = KBUILD_MODNAME, +}; + +static irqreturn_t aspeed_wdt_irq(int irq, void *arg) +{ + struct watchdog_device *wdd = arg; + struct aspeed_wdt *wdt = to_aspeed_wdt(wdd); + u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS); + + if (status & WDT_TIMEOUT_STATUS_IRQ) + watchdog_notify_pretimeout(wdd); + + return IRQ_HANDLED; +} + static int aspeed_wdt_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - const struct aspeed_wdt_config *config; const struct of_device_id *ofdid; struct aspeed_wdt *wdt; struct device_node *np; @@ -259,11 +316,33 @@ static int aspeed_wdt_probe(struct platform_device *pdev) if (!wdt) return -ENOMEM; + np = dev->of_node; + + ofdid = of_match_node(aspeed_wdt_of_table, np); + if (!ofdid) + return -EINVAL; + wdt->cfg = ofdid->data; + wdt->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); wdt->wdd.info = &aspeed_wdt_info; + + if (wdt->cfg->irq_mask) { + int irq = platform_get_irq_optional(pdev, 0); + + if (irq > 0) { + ret = devm_request_irq(dev, irq, aspeed_wdt_irq, + IRQF_SHARED, dev_name(dev), + wdt); + if (ret) + return ret; + + wdt->wdd.info = &aspeed_wdt_pretimeout_info; + } + } + wdt->wdd.ops = &aspeed_wdt_ops; wdt->wdd.max_hw_heartbeat_ms = WDT_MAX_TIMEOUT_MS; wdt->wdd.parent = dev; @@ -273,13 +352,6 @@ static int aspeed_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&wdt->wdd, nowayout); - np = dev->of_node; - - ofdid = of_match_node(aspeed_wdt_of_table, np); - if (!ofdid) - return -EINVAL; - config = ofdid->data; - /* * On clock rates: * - ast2400 wdt can run at PCLK, or 1MHz @@ -331,7 +403,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev) (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { u32 reg = readl(wdt->base + WDT_RESET_WIDTH); - reg &= config->ext_pulse_width_mask; + reg &= wdt->cfg->ext_pulse_width_mask; if (of_property_read_bool(np, "aspeed,ext-active-high")) reg |= WDT_ACTIVE_HIGH_MAGIC; else @@ -339,7 +411,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev) writel(reg, wdt->base + WDT_RESET_WIDTH); - reg &= config->ext_pulse_width_mask; + reg &= wdt->cfg->ext_pulse_width_mask; if (of_property_read_bool(np, "aspeed,ext-push-pull")) reg |= WDT_PUSH_PULL_MAGIC; else @@ -349,7 +421,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev) } if (!of_property_read_u32(np, "aspeed,ext-pulse-duration", &duration)) { - u32 max_duration = config->ext_pulse_width_mask + 1; + u32 max_duration = wdt->cfg->ext_pulse_width_mask + 1; if (duration == 0 || duration > max_duration) { dev_err(dev, "Invalid pulse duration: %uus\n", From 586cb1d65cc44371115600bc981626725c864029 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:51 -0800 Subject: [PATCH 1875/4122] tools lib api: Clean up install_headers Add missing backslash that caused an install command to always appear in build output. Make the install headers more specific. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 3e5ef1e0e890..3649c7f7ea65 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -100,12 +100,12 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libapi_headers) \ $(call do_install,cpu.h,$(prefix)/include/api,644); \ $(call do_install,debug.h,$(prefix)/include/api,644); \ $(call do_install,io.h,$(prefix)/include/api,644); \ $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ - $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); + $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \ $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); install: install_lib install_headers From daa45f3f3577556801c7b0b2df85eed1289fbcb6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:52 -0800 Subject: [PATCH 1876/4122] tools lib bpf: Avoid install_headers make warning The perf build makes the install_headers target, however, as there is no action for this target a warning is always produced of: make[3]: Nothing to be done for 'install_headers'. Solve this by adding a display of 'INSTALL libbpf_headers'. Signed-off-by: Ian Rogers Acked-by: Andrii Nakryiko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4c904ef0b47e..7f5f7d2ebe1f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -255,6 +255,7 @@ $(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h $(call do_install,$<,$(prefix)/include/bpf,644) install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) + $(call QUIET_INSTALL, libbpf_headers) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ From 806dda31b856d83d8ec211aa9831bac5f978271e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:53 -0800 Subject: [PATCH 1877/4122] tools lib symbol: Clean up build output Missing @ when building libsymbol. Make the install echo specific to installing the libsymbol headers. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile index 4c1d6b53032d..ea8707b3442a 100644 --- a/tools/lib/symbol/Makefile +++ b/tools/lib/symbol/Makefile @@ -77,7 +77,7 @@ include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) $(SYMBOL_IN): FORCE - $(MAKE) $(build)=libsymbol V=1 + @$(MAKE) $(build)=libsymbol $(LIBFILE): $(SYMBOL_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) @@ -101,7 +101,7 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libsymbol_headers) \ $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); install: install_lib install_headers From e8951bfb4cb325a6b80310790dc78ac9b4a147eb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:54 -0800 Subject: [PATCH 1878/4122] tools lib perf: Make install_headers clearer Add libperf to the name so that this install_headers build appears different to similar targets in different libraries. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1badc0a04676..a90fb8c6bed4 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -188,7 +188,7 @@ install_lib: libs cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libperf_headers) \ $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ From 77dce6890a2a715b186bdc149c843571a5bb47df Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:55 -0800 Subject: [PATCH 1879/4122] tools lib subcmd: Make install_headers clearer Add libsubcmd to the name so that this install_headers build appears different to similar targets in different libraries. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index e96566f8991c..9a316d8b89df 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -101,7 +101,7 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libsubcmd_headers) \ $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ $(call do_install,help.h,$(prefix)/include/subcmd,644); \ $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ From e664f31e21a2d201507704f302ab32f498871b11 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:56 -0800 Subject: [PATCH 1880/4122] tools lib traceevent: Make install_headers clearer Add libtraceevent to the name so that this install_headers build appears different to similar targets in different libraries. Add ; after kbuffer.h install target for consistency. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index c874c017c636..98dfd4badea3 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -234,11 +234,11 @@ install_pkgconfig: $(call do_install_pkgconfig_file,$(prefix)) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, traceevent_headers) \ $(call do_install,event-parse.h,$(includedir_SQ),644); \ $(call do_install,event-utils.h,$(includedir_SQ),644); \ $(call do_install,trace-seq.h,$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(includedir_SQ),644) + $(call do_install,kbuffer.h,$(includedir_SQ),644); install: install_lib From f215054d749b17c56e014fdca2fcc592dac4529c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:43 -0800 Subject: [PATCH 1881/4122] perf test: Add -w/--workload option The -w/--workload option is to run a simple workload used by testing. This adds a basic framework to run the workloads and 'noploop' workload as an example. $ perf test -w noploop The noploop does a loop doing nothing (NOP) for a second by default. It can have an optional argument to specify the time in seconds. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 2 ++ tools/perf/tests/builtin-test.c | 24 +++++++++++++++++++++ tools/perf/tests/tests.h | 22 +++++++++++++++++++ tools/perf/tests/workloads/Build | 3 +++ tools/perf/tests/workloads/noploop.c | 32 ++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+) create mode 100644 tools/perf/tests/workloads/Build create mode 100644 tools/perf/tests/workloads/noploop.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2064a640facb..11b69023011b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -103,3 +103,5 @@ endif CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls + +perf-y += workloads/ diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 7122eae1d98d..ce641ccfcf81 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,10 @@ static struct test_suite **tests[] = { arch_tests, }; +static struct test_workload *workloads[] = { + &workload__noploop, +}; + static int num_subtests(const struct test_suite *t) { int num; @@ -475,6 +479,21 @@ static int perf_test__list(int argc, const char **argv) return 0; } +static int run_workload(const char *work, int argc, const char **argv) +{ + unsigned int i = 0; + struct test_workload *twl; + + for (i = 0; i < ARRAY_SIZE(workloads); i++) { + twl = workloads[i]; + if (!strcmp(twl->name, work)) + return twl->func(argc, argv); + } + + pr_info("No workload found: %s\n", work); + return -1; +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -482,12 +501,14 @@ int cmd_test(int argc, const char **argv) NULL, }; const char *skip = NULL; + const char *workload = NULL; const struct option test_options[] = { OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), + OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; @@ -504,6 +525,9 @@ int cmd_test(int argc, const char **argv) if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); + if (workload) + return run_workload(workload, argc, argv); + symbol_conf.priv_size = sizeof(int); symbol_conf.sort_by_name = true; symbol_conf.try_vmlinux_path = true; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 5bbb8f6a48fc..d315d0d6fc97 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -180,4 +180,26 @@ int test__arch_unwind_sample(struct perf_sample *sample, DECLARE_SUITE(vectors_page); #endif +/* + * Define test workloads to be used in test suites. + */ +typedef int (*workload_fnptr)(int argc, const char **argv); + +struct test_workload { + const char *name; + workload_fnptr func; +}; + +#define DECLARE_WORKLOAD(work) \ + extern struct test_workload workload__##work + +#define DEFINE_WORKLOAD(work) \ +struct test_workload workload__##work = { \ + .name = #work, \ + .func = work, \ +} + +/* The list of test workloads */ +DECLARE_WORKLOAD(noploop); + #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build new file mode 100644 index 000000000000..f98e968d4633 --- /dev/null +++ b/tools/perf/tests/workloads/Build @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +perf-y += noploop.o diff --git a/tools/perf/tests/workloads/noploop.c b/tools/perf/tests/workloads/noploop.c new file mode 100644 index 000000000000..940ea5910a84 --- /dev/null +++ b/tools/perf/tests/workloads/noploop.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +static int noploop(int argc, const char **argv) +{ + int sec = 1; + + if (argc > 0) + sec = atoi(argv[0]); + + signal(SIGINT, sighandler); + signal(SIGALRM, sighandler); + alarm(sec); + + while (!done) + continue; + + return 0; +} + +DEFINE_WORKLOAD(noploop); From 24e733b29f13284aac30c4d1fb9f19201951d770 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:44 -0800 Subject: [PATCH 1882/4122] perf test: Replace pipe test workload with noploop So that it can get rid of requirement of a compiler. Also define and use more local symbols to ease future changes. $ sudo ./perf test -v pipe 87: perf pipe recording and injection test : --- start --- test child forked, pid 748003 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 748014 748014 -1 |perf [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 99.83% perf perf [.] noploop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 99.85% perf perf [.] noploop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.160 MB /tmp/perf.data.2XYPdw (4007 samples) ] 99.83% perf perf [.] noploop test child finished with 0 ---- end ---- perf pipe recording and injection test: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/pipe_test.sh | 55 ++++++----------------------- 1 file changed, 10 insertions(+), 45 deletions(-) diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index 1b32b4f28391..8dd115dd35a7 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -2,68 +2,33 @@ # perf pipe recording and injection test # SPDX-License-Identifier: GPL-2.0 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - -file=$(mktemp /tmp/test.file.XXXXXX) data=$(mktemp /tmp/perf.data.XXXXXX) +prog="perf test -w noploop" +task="perf" +sym="noploop" -cat < -#include -#include - -volatile int done; - -void sigalrm(int sig) { - done = 1; -} - -__attribute__((noinline)) void noploop(void) { - while (!done) - continue; -} - -int main(int argc, char *argv[]) { - int sec = 1; - - if (argc > 1) - sec = atoi(argv[1]); - - signal(SIGALRM, sigalrm); - alarm(sec); - - noploop(); - return 0; -} -EOF - - -if ! perf record -e task-clock:u -o - ${file} | perf report -i - --task | grep test.file; then +if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then echo "cannot find the test file in the perf report" exit 1 fi -if ! perf record -e task-clock:u -o - ${file} | perf inject -b | perf report -i - | grep noploop; then +if ! perf record -e task-clock:u -o - ${prog} | perf inject -b | perf report -i - | grep ${sym}; then echo "cannot find noploop function in pipe #1" exit 1 fi -perf record -e task-clock:u -o - ${file} | perf inject -b -o ${data} -if ! perf report -i ${data} | grep noploop; then +perf record -e task-clock:u -o - ${prog} | perf inject -b -o ${data} +if ! perf report -i ${data} | grep ${sym}; then echo "cannot find noploop function in pipe #2" exit 1 fi -perf record -e task-clock:u -o ${data} ${file} -if ! perf inject -b -i ${data} | perf report -i - | grep noploop; then +perf record -e task-clock:u -o ${data} ${prog} +if ! perf inject -b -i ${data} | perf report -i - | grep ${sym}; then echo "cannot find noploop function in pipe #3" exit 1 fi -rm -f ${file} ${data} ${data}.old +rm -f ${data} ${data}.old exit 0 From 69b352927885b17f03d3ee4ee38f580699af107a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:45 -0800 Subject: [PATCH 1883/4122] perf test: Add 'thloop' test workload The thloop is similar to noploop but runs in two threads. This is needed to verify perf record --per-thread to handle multi-threaded programs properly. $ perf test -w thloop It also takes an optional argument to specify runtime in seconds (default: 1). Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 1 + tools/perf/tests/workloads/thloop.c | 53 +++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 tools/perf/tests/workloads/thloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index ce641ccfcf81..161f38476e77 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -120,6 +120,7 @@ static struct test_suite **tests[] = { static struct test_workload *workloads[] = { &workload__noploop, + &workload__thloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d315d0d6fc97..e6edfeeadaeb 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -201,5 +201,6 @@ struct test_workload workload__##work = { \ /* The list of test workloads */ DECLARE_WORKLOAD(noploop); +DECLARE_WORKLOAD(thloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index f98e968d4633..b8964b1099c0 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 perf-y += noploop.o +perf-y += thloop.o diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c new file mode 100644 index 000000000000..29193b75717e --- /dev/null +++ b/tools/perf/tests/workloads/thloop.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; +static volatile unsigned count; + +/* We want to check this symbol in perf report */ +noinline void test_loop(void); + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +noinline void test_loop(void) +{ + while (!done) + count++; +} + +static void *thfunc(void *arg) +{ + void (*loop_fn)(void) = arg; + + loop_fn(); + return NULL; +} + +static int thloop(int argc, const char **argv) +{ + int sec = 1; + pthread_t th; + + if (argc > 0) + sec = atoi(argv[0]); + + signal(SIGINT, sighandler); + signal(SIGALRM, sighandler); + alarm(sec); + + pthread_create(&th, NULL, thfunc, test_loop); + test_loop(); + pthread_join(th, NULL); + + return 0; +} + +DEFINE_WORKLOAD(thloop); From 0b8ff0ba2744f364e8f5fb695ae323bae0ecfd19 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:46 -0800 Subject: [PATCH 1884/4122] perf test: Replace record test workload with thloop So that it can get rid of requirements for a compiler. $ sudo ./perf test -v 92 92: perf record tests : --- start --- test child forked, pid 740204 Basic --per-thread mode test Basic --per-thread mode test [Success] Register capture test Register capture test [Success] Basic --system-wide mode test Basic --system-wide mode test [Success] Basic target workload test Basic target workload test [Success] test child finished with 0 ---- end ---- perf record tests: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 59 ++------------------------------ 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index e93b3a8871fe..4dff89e3a3fd 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -9,17 +9,13 @@ shelldir=$(dirname "$0") err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) +testprog="perf test -w thloop" testsym="test_loop" cleanup() { rm -rf "${perfdata}" rm -rf "${perfdata}".old - if [ "${testprog}" != "true" ]; then - rm -f "${testprog}" - fi - trap - EXIT TERM INT } @@ -29,53 +25,6 @@ trap_cleanup() { } trap trap_cleanup EXIT TERM INT -build_test_program() { - if ! [ -x "$(command -v cc)" ]; then - # No CC found. Fall back to 'true' - testprog=true - testsym=true - return - fi - - echo "Build a test program" - cat < -#include -#include - -void test_loop(void) { - volatile int count = 1000000; - - while (count--) - continue; -} - -void *thfunc(void *arg) { - int forever = *(int *)arg; - - do { - test_loop(); - } while (forever); - - return NULL; -} - -int main(int argc, char *argv[]) { - pthread_t th; - int forever = 0; - - if (argc > 1) - forever = atoi(argv[1]); - - pthread_create(&th, NULL, thfunc, &forever); - test_loop(); - pthread_join(th, NULL); - - return 0; -} -EOF -} - test_per_thread() { echo "Basic --per-thread mode test" if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null @@ -96,8 +45,8 @@ test_per_thread() { return fi - # run the test program in background (forever) - ${testprog} 1 & + # run the test program in background (for 30 seconds) + ${testprog} 30 & TESTPID=$! rm -f "${perfdata}" @@ -205,8 +154,6 @@ test_workload() { echo "Basic target workload test [Success]" } -build_test_program - test_per_thread test_register_capture test_system_wide From 02c70e915967c372d80f6015ef56744285350251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:07 +0100 Subject: [PATCH 1885/4122] power: supply: adp5061: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/adp5061.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/adp5061.c b/drivers/power/supply/adp5061.c index fcf8ff0bc974..840db629a46c 100644 --- a/drivers/power/supply/adp5061.c +++ b/drivers/power/supply/adp5061.c @@ -694,8 +694,7 @@ static const struct power_supply_desc adp5061_desc = { .num_properties = ARRAY_SIZE(adp5061_props), }; -static int adp5061_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adp5061_probe(struct i2c_client *client) { struct power_supply_config psy_cfg = {}; struct adp5061_state *st; @@ -737,7 +736,7 @@ static struct i2c_driver adp5061_driver = { .driver = { .name = KBUILD_MODNAME, }, - .probe = adp5061_probe, + .probe_new = adp5061_probe, .id_table = adp5061_id, }; module_i2c_driver(adp5061_driver); From 31c050513c7ac1e455d0f1b1b6a882d96c94d5da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:08 +0100 Subject: [PATCH 1886/4122] power: supply: bq2415x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq2415x_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 6b99e1c675b8..d2cb7431dced 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -1520,9 +1520,9 @@ static int bq2415x_power_supply_init(struct bq2415x_device *bq) } /* main bq2415x probe function */ -static int bq2415x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq2415x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; int num; char *name = NULL; @@ -1780,7 +1780,7 @@ static struct i2c_driver bq2415x_driver = { .of_match_table = of_match_ptr(bq2415x_of_match_table), .acpi_match_table = ACPI_PTR(bq2415x_i2c_acpi_match), }, - .probe = bq2415x_probe, + .probe_new = bq2415x_probe, .remove = bq2415x_remove, .id_table = bq2415x_i2c_id_table, }; From 31731754b9257a1e2ebad60f270ecbe089d2ebd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:09 +0100 Subject: [PATCH 1887/4122] power: supply: bq24190: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24190_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 2274679c5ddd..2b2c3a4391c1 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1767,9 +1767,9 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi) return 0; } -static int bq24190_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq24190_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; struct power_supply_config charger_cfg = {}, battery_cfg = {}; @@ -2032,7 +2032,7 @@ static const struct of_device_id bq24190_of_match[] = { MODULE_DEVICE_TABLE(of, bq24190_of_match); static struct i2c_driver bq24190_driver = { - .probe = bq24190_probe, + .probe_new = bq24190_probe, .remove = bq24190_remove, .shutdown = bq24190_shutdown, .id_table = bq24190_i2c_ids, From 924668b4ed02dc6c134f6f3b30bd4c22f8dd4a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:10 +0100 Subject: [PATCH 1888/4122] power: supply: bq24257: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24257_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq24257_charger.c b/drivers/power/supply/bq24257_charger.c index a309bbedfe52..ab4c49788c58 100644 --- a/drivers/power/supply/bq24257_charger.c +++ b/drivers/power/supply/bq24257_charger.c @@ -947,9 +947,9 @@ static int bq24257_fw_probe(struct bq24257_device *bq) return 0; } -static int bq24257_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq24257_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; const struct acpi_device_id *acpi_id; @@ -1167,7 +1167,7 @@ static struct i2c_driver bq24257_driver = { .acpi_match_table = ACPI_PTR(bq24257_acpi_match), .pm = &bq24257_pm, }, - .probe = bq24257_probe, + .probe_new = bq24257_probe, .remove = bq24257_remove, .id_table = bq24257_i2c_ids, }; From aaf5339e295baaee2b03c2dc45ac9dc8c2573a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:11 +0100 Subject: [PATCH 1889/4122] power: supply: bq24735: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24735-charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq24735-charger.c b/drivers/power/supply/bq24735-charger.c index 3ce36d09c017..cfca3a82d5a8 100644 --- a/drivers/power/supply/bq24735-charger.c +++ b/drivers/power/supply/bq24735-charger.c @@ -352,8 +352,7 @@ static struct bq24735_platform *bq24735_parse_dt_data(struct i2c_client *client) return pdata; } -static int bq24735_charger_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq24735_charger_probe(struct i2c_client *client) { int ret; struct bq24735 *charger; @@ -506,7 +505,7 @@ static struct i2c_driver bq24735_charger_driver = { .name = "bq24735-charger", .of_match_table = bq24735_match_ids, }, - .probe = bq24735_charger_probe, + .probe_new = bq24735_charger_probe, .id_table = bq24735_charger_id, }; From ed4e2c7570a7b0972f449b2961e2efb9b48849b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:12 +0100 Subject: [PATCH 1890/4122] power: supply: bq2515x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq2515x_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq2515x_charger.c b/drivers/power/supply/bq2515x_charger.c index 4f76ad9c2f18..da224ae8dc61 100644 --- a/drivers/power/supply/bq2515x_charger.c +++ b/drivers/power/supply/bq2515x_charger.c @@ -1078,9 +1078,9 @@ static const struct regmap_config bq25155_regmap_config = { .volatile_reg = bq2515x_volatile_register, }; -static int bq2515x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq2515x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct bq2515x_device *bq2515x; struct power_supply_config charger_cfg = {}; @@ -1158,7 +1158,7 @@ static struct i2c_driver bq2515x_driver = { .name = "bq2515x-charger", .of_match_table = bq2515x_of_match, }, - .probe = bq2515x_probe, + .probe_new = bq2515x_probe, .id_table = bq2515x_i2c_ids, }; module_i2c_driver(bq2515x_driver); From fb94ef2efa1c3e2af52d8144b68eb9c90b0b31ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:13 +0100 Subject: [PATCH 1891/4122] power: supply: bq256xx: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq256xx_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c index 01ad84fd147c..db13e288e439 100644 --- a/drivers/power/supply/bq256xx_charger.c +++ b/drivers/power/supply/bq256xx_charger.c @@ -1619,9 +1619,9 @@ static int bq256xx_parse_dt(struct bq256xx_device *bq, return 0; } -static int bq256xx_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq256xx_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct bq256xx_device *bq; struct power_supply_config psy_cfg = { }; @@ -1744,7 +1744,7 @@ static struct i2c_driver bq256xx_driver = { .of_match_table = bq256xx_of_match, .acpi_match_table = bq256xx_acpi_match, }, - .probe = bq256xx_probe, + .probe_new = bq256xx_probe, .id_table = bq256xx_i2c_ids, }; module_i2c_driver(bq256xx_driver); From c5cddca2351b291c8787b45cd046b1dfeb86979f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:14 +0100 Subject: [PATCH 1892/4122] power: supply: bq25890: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index f0362dcb935e..bfdd2213ba69 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1315,8 +1315,7 @@ static int bq25890_fw_probe(struct bq25890_device *bq) return 0; } -static int bq25890_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq25890_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct bq25890_device *bq; @@ -1515,7 +1514,7 @@ static struct i2c_driver bq25890_driver = { .acpi_match_table = ACPI_PTR(bq25890_acpi_match), .pm = &bq25890_pm, }, - .probe = bq25890_probe, + .probe_new = bq25890_probe, .remove = bq25890_remove, .shutdown = bq25890_shutdown, .id_table = bq25890_i2c_ids, From 79fc7c26602f990e49471df1d237466b9530ccdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:15 +0100 Subject: [PATCH 1893/4122] power: supply: bq25980: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25980_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq25980_charger.c b/drivers/power/supply/bq25980_charger.c index 9339f5649282..a59d9762bc91 100644 --- a/drivers/power/supply/bq25980_charger.c +++ b/drivers/power/supply/bq25980_charger.c @@ -1207,9 +1207,9 @@ static int bq25980_parse_dt(struct bq25980_device *bq) return 0; } -static int bq25980_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq25980_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct bq25980_device *bq; int ret; @@ -1287,7 +1287,7 @@ static struct i2c_driver bq25980_driver = { .name = "bq25980-charger", .of_match_table = bq25980_of_match, }, - .probe = bq25980_probe, + .probe_new = bq25980_probe, .id_table = bq25980_i2c_ids, }; module_i2c_driver(bq25980_driver); From 67f56c79a5723cbdd9dd7bbb1a0375895c2d122f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:16 +0100 Subject: [PATCH 1894/4122] power: supply: bq27xxx: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq27xxx_battery_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 94b00bb89c17..f8768997333b 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -136,9 +136,9 @@ static int bq27xxx_battery_i2c_bulk_write(struct bq27xxx_device_info *di, return 0; } -static int bq27xxx_battery_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bq27xxx_battery_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct bq27xxx_device_info *di; int ret; char *name; @@ -295,7 +295,7 @@ static struct i2c_driver bq27xxx_battery_i2c_driver = { .name = "bq27xxx-battery", .of_match_table = of_match_ptr(bq27xxx_battery_i2c_of_match_table), }, - .probe = bq27xxx_battery_i2c_probe, + .probe_new = bq27xxx_battery_i2c_probe, .remove = bq27xxx_battery_i2c_remove, .id_table = bq27xxx_i2c_id_table, }; From 433e380226799bfcd823adca1c5e1aec8e2ef72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:17 +0100 Subject: [PATCH 1895/4122] power: supply: ds2782: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/ds2782_battery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/ds2782_battery.c b/drivers/power/supply/ds2782_battery.c index d78cd05402f6..9b9619246902 100644 --- a/drivers/power/supply/ds2782_battery.c +++ b/drivers/power/supply/ds2782_battery.c @@ -368,9 +368,9 @@ static const struct ds278x_battery_ops ds278x_ops[] = { } }; -static int ds278x_battery_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ds278x_battery_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ds278x_platform_data *pdata = client->dev.platform_data; struct power_supply_config psy_cfg = {}; struct ds278x_info *info; @@ -458,7 +458,7 @@ static struct i2c_driver ds278x_battery_driver = { .name = "ds2782-battery", .pm = &ds278x_battery_pm_ops, }, - .probe = ds278x_battery_probe, + .probe_new = ds278x_battery_probe, .remove = ds278x_battery_remove, .id_table = ds278x_id, }; From d9cafca1f200ab4bce13a42ffae82718aba29eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:18 +0100 Subject: [PATCH 1896/4122] power: supply: lp8727: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/lp8727_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/lp8727_charger.c b/drivers/power/supply/lp8727_charger.c index 384a374b52c1..e6c21377d53c 100644 --- a/drivers/power/supply/lp8727_charger.c +++ b/drivers/power/supply/lp8727_charger.c @@ -540,7 +540,7 @@ static struct lp8727_platform_data *lp8727_parse_dt(struct device *dev) } #endif -static int lp8727_probe(struct i2c_client *cl, const struct i2c_device_id *id) +static int lp8727_probe(struct i2c_client *cl) { struct lp8727_chg *pchg; struct lp8727_platform_data *pdata; @@ -615,7 +615,7 @@ static struct i2c_driver lp8727_driver = { .name = "lp8727", .of_match_table = of_match_ptr(lp8727_dt_ids), }, - .probe = lp8727_probe, + .probe_new = lp8727_probe, .remove = lp8727_remove, .id_table = lp8727_ids, }; From 66d9e8fc2bacffb80300d496c54d2fc072229656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:19 +0100 Subject: [PATCH 1897/4122] power: supply: ltc2941: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/ltc2941-battery-gauge.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index 657305214d68..d3fb42825983 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -439,8 +439,7 @@ static enum power_supply_property ltc294x_properties[] = { POWER_SUPPLY_PROP_CURRENT_NOW, }; -static int ltc294x_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltc294x_i2c_probe(struct i2c_client *client) { struct power_supply_config psy_cfg = {}; struct ltc294x_info *info; @@ -636,7 +635,7 @@ static struct i2c_driver ltc294x_driver = { .of_match_table = ltc294x_i2c_of_match, .pm = LTC294X_PM_OPS, }, - .probe = ltc294x_i2c_probe, + .probe_new = ltc294x_i2c_probe, .shutdown = ltc294x_i2c_shutdown, .id_table = ltc294x_i2c_id, }; From 97bdbe0d04b183e5680cf0a487fb0abd4de85a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:20 +0100 Subject: [PATCH 1898/4122] power: supply: ltc4162-l: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/ltc4162-l-charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/ltc4162-l-charger.c b/drivers/power/supply/ltc4162-l-charger.c index 1a5cb4405ee3..db2bb5233570 100644 --- a/drivers/power/supply/ltc4162-l-charger.c +++ b/drivers/power/supply/ltc4162-l-charger.c @@ -819,8 +819,7 @@ static void ltc4162l_clear_interrupts(struct ltc4162l_info *info) regmap_write(info->regmap, LTC4162L_CHARGE_STATUS_ALERTS_REG, 0); } -static int ltc4162l_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltc4162l_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; @@ -916,7 +915,7 @@ static const struct of_device_id ltc4162l_of_match[] = { MODULE_DEVICE_TABLE(of, ltc4162l_of_match); static struct i2c_driver ltc4162l_driver = { - .probe = ltc4162l_probe, + .probe_new = ltc4162l_probe, .alert = ltc4162l_alert, .id_table = ltc4162l_i2c_id_table, .driver = { From b17018dee05a145e428d1de12d962d25d5f8837d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:21 +0100 Subject: [PATCH 1899/4122] power: supply: max14656: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/max14656_charger_detector.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/max14656_charger_detector.c b/drivers/power/supply/max14656_charger_detector.c index fc36828895bf..0d0180fcfa63 100644 --- a/drivers/power/supply/max14656_charger_detector.c +++ b/drivers/power/supply/max14656_charger_detector.c @@ -234,8 +234,7 @@ static enum power_supply_property max14656_battery_props[] = { POWER_SUPPLY_PROP_MANUFACTURER, }; -static int max14656_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max14656_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; @@ -317,7 +316,7 @@ static struct i2c_driver max14656_i2c_driver = { .name = "max14656", .of_match_table = max14656_match_table, }, - .probe = max14656_probe, + .probe_new = max14656_probe, .id_table = max14656_id, }; module_i2c_driver(max14656_i2c_driver); From a07fca69378c26ea034826feb0011256e25ad237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:22 +0100 Subject: [PATCH 1900/4122] power: supply: max17040: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/max17040_battery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c index a9aef1e8b186..d1075959dd46 100644 --- a/drivers/power/supply/max17040_battery.c +++ b/drivers/power/supply/max17040_battery.c @@ -430,9 +430,9 @@ static const struct power_supply_desc max17040_battery_desc = { .num_properties = ARRAY_SIZE(max17040_battery_props), }; -static int max17040_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max17040_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adapter = client->adapter; struct power_supply_config psy_cfg = {}; struct max17040_chip *chip; @@ -599,7 +599,7 @@ static struct i2c_driver max17040_i2c_driver = { .of_match_table = max17040_of_match, .pm = MAX17040_PM_OPS, }, - .probe = max17040_probe, + .probe_new = max17040_probe, .id_table = max17040_id, }; module_i2c_driver(max17040_i2c_driver); From d9ac265b57b8625e4ce1b1bd6a6baa862d51c1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:23 +0100 Subject: [PATCH 1901/4122] power: supply: max17042_battery: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/max17042_battery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index ab031bbfbe78..89cabe8ed3b0 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -1031,9 +1031,9 @@ static const struct power_supply_desc max17042_no_current_sense_psy_desc = { .num_properties = ARRAY_SIZE(max17042_battery_props) - 2, }; -static int max17042_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max17042_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adapter = client->adapter; const struct power_supply_desc *max17042_desc = &max17042_psy_desc; struct power_supply_config psy_cfg = {}; @@ -1220,7 +1220,7 @@ static struct i2c_driver max17042_i2c_driver = { .of_match_table = of_match_ptr(max17042_dt_match), .pm = &max17042_pm_ops, }, - .probe = max17042_probe, + .probe_new = max17042_probe, .id_table = max17042_id, }; module_i2c_driver(max17042_i2c_driver); From f40ec8bc0888ee785317e0aa4ffa239f73b03d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:24 +0100 Subject: [PATCH 1902/4122] power: supply: rt5033_battery: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/rt5033_battery.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 736dec608ff6..5c04cf305219 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -112,8 +112,7 @@ static const struct power_supply_desc rt5033_battery_desc = { .num_properties = ARRAY_SIZE(rt5033_battery_props), }; -static int rt5033_battery_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rt5033_battery_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct power_supply_config psy_cfg = {}; @@ -173,7 +172,7 @@ static struct i2c_driver rt5033_battery_driver = { .name = "rt5033-battery", .of_match_table = rt5033_battery_of_match, }, - .probe = rt5033_battery_probe, + .probe_new = rt5033_battery_probe, .remove = rt5033_battery_remove, .id_table = rt5033_battery_id, }; From 2adfc4370ebb745380a44c3e5418486a32a9ba67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:25 +0100 Subject: [PATCH 1903/4122] power: supply: rt9455: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/rt9455_charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c index 72962286d704..31fb6526a1fd 100644 --- a/drivers/power/supply/rt9455_charger.c +++ b/drivers/power/supply/rt9455_charger.c @@ -1581,8 +1581,7 @@ static const struct regmap_config rt9455_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static int rt9455_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rt9455_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct device *dev = &client->dev; @@ -1738,7 +1737,7 @@ MODULE_DEVICE_TABLE(acpi, rt9455_i2c_acpi_match); #endif static struct i2c_driver rt9455_driver = { - .probe = rt9455_probe, + .probe_new = rt9455_probe, .remove = rt9455_remove, .id_table = rt9455_i2c_id_table, .driver = { From ef3f6e07d55aeb32880fd24993de4efa9ec09c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:26 +0100 Subject: [PATCH 1904/4122] power: supply: sbs: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/sbs-charger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c index b08f7d0c4181..75ebcbf0a788 100644 --- a/drivers/power/supply/sbs-charger.c +++ b/drivers/power/supply/sbs-charger.c @@ -162,8 +162,7 @@ static const struct power_supply_desc sbs_desc = { .get_property = sbs_get_property, }; -static int sbs_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sbs_probe(struct i2c_client *client) { struct power_supply_config psy_cfg = {}; struct sbs_info *chip; @@ -241,7 +240,7 @@ static const struct i2c_device_id sbs_id[] = { MODULE_DEVICE_TABLE(i2c, sbs_id); static struct i2c_driver sbs_driver = { - .probe = sbs_probe, + .probe_new = sbs_probe, .id_table = sbs_id, .driver = { .name = "sbs-charger", From 02d1a40141a7b9d9cb8ef151c14e7d7aeaa56966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:27 +0100 Subject: [PATCH 1905/4122] power: supply: sbs-manager: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/sbs-manager.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/sbs-manager.c b/drivers/power/supply/sbs-manager.c index 71ec8f74f835..bde977391fd4 100644 --- a/drivers/power/supply/sbs-manager.c +++ b/drivers/power/supply/sbs-manager.c @@ -315,9 +315,9 @@ static void sbsm_del_mux_adapter(void *data) i2c_mux_del_adapters(sbsm->muxc); } -static int sbsm_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sbsm_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adapter = client->adapter; struct sbsm_data *data; struct device *dev = &client->dev; @@ -409,7 +409,7 @@ static struct i2c_driver sbsm_driver = { .name = "sbsm", .of_match_table = of_match_ptr(sbsm_dt_ids), }, - .probe = sbsm_probe, + .probe_new = sbsm_probe, .alert = sbsm_alert, .id_table = sbsm_ids }; From 6d43a4b0b2f8ab6c9893e2673a133d1798b230e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:28 +0100 Subject: [PATCH 1906/4122] power: supply: smb347: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/smb347-charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c index 996a82f8a2a1..b5f038310282 100644 --- a/drivers/power/supply/smb347-charger.c +++ b/drivers/power/supply/smb347-charger.c @@ -1528,9 +1528,9 @@ static const struct regulator_desc smb347_usb_vbus_regulator_desc = { .n_voltages = 1, }; -static int smb347_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int smb347_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct power_supply_config mains_usb_cfg = {}; struct regulator_config usb_rdev_cfg = {}; struct device *dev = &client->dev; @@ -1629,7 +1629,7 @@ static struct i2c_driver smb347_driver = { .name = "smb347", .of_match_table = smb3xx_of_match, }, - .probe = smb347_probe, + .probe_new = smb347_probe, .remove = smb347_remove, .shutdown = smb347_shutdown, .id_table = smb347_id, From 8a4f891b2a10f6bbc7bac256b2f745f03d2b7185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:29 +0100 Subject: [PATCH 1907/4122] power: supply: ucs1002: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/ucs1002_power.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/ucs1002_power.c b/drivers/power/supply/ucs1002_power.c index ef673ec3db56..836d44c9fb74 100644 --- a/drivers/power/supply/ucs1002_power.c +++ b/drivers/power/supply/ucs1002_power.c @@ -532,8 +532,7 @@ static const struct regulator_desc ucs1002_regulator_descriptor = { .n_voltages = 1, }; -static int ucs1002_probe(struct i2c_client *client, - const struct i2c_device_id *dev_id) +static int ucs1002_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct power_supply_config charger_config = {}; @@ -681,7 +680,7 @@ static struct i2c_driver ucs1002_driver = { .name = "ucs1002", .of_match_table = ucs1002_of_match, }, - .probe = ucs1002_probe, + .probe_new = ucs1002_probe, }; module_i2c_driver(ucs1002_driver); From 922bde5a095540fe3870245e4f0b625a20967ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:44:30 +0100 Subject: [PATCH 1908/4122] power: supply: z2_battery: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Sebastian Reichel --- drivers/power/supply/z2_battery.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c index 1897c2984860..eb01b01bf593 100644 --- a/drivers/power/supply/z2_battery.c +++ b/drivers/power/supply/z2_battery.c @@ -176,8 +176,7 @@ static int z2_batt_ps_init(struct z2_charger *charger, int props) return 0; } -static int z2_batt_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int z2_batt_probe(struct i2c_client *client) { int ret = 0; int props = 1; /* POWER_SUPPLY_PROP_PRESENT */ @@ -306,7 +305,7 @@ static struct i2c_driver z2_batt_driver = { .name = "z2-battery", .pm = Z2_BATTERY_PM_OPS }, - .probe = z2_batt_probe, + .probe_new = z2_batt_probe, .remove = z2_batt_remove, .id_table = z2_batt_id, }; From 0d2573a2b7838a4f6934c2835e6730b38df4bcc9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Nov 2022 14:30:55 +0100 Subject: [PATCH 1909/4122] modpost: Join broken long printed messages Breaking long printed messages in multiple lines makes it very hard to look up where they originated from. Signed-off-by: Geert Uytterhoeven Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/mod/file2alias.c | 18 +++++++----------- scripts/mod/modpost.c | 8 +++----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 80d973144fde..7df23905fdf1 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -140,25 +140,22 @@ static void device_id_check(const char *modname, const char *device_id, int i; if (size % id_size || size < id_size) { - fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo " - "of the size of " - "section __mod_%s___device_table=%lu.\n" - "Fix definition of struct %s_device_id " - "in mod_devicetable.h\n", + fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo of the size of section __mod_%s___device_table=%lu.\n" + "Fix definition of struct %s_device_id in mod_devicetable.h\n", modname, device_id, id_size, device_id, size, device_id); } /* Verify last one is a terminator */ for (i = 0; i < id_size; i++ ) { if (*(uint8_t*)(symval+size-id_size+i)) { - fprintf(stderr,"%s: struct %s_device_id is %lu bytes. " - "The last of %lu is:\n", + fprintf(stderr, + "%s: struct %s_device_id is %lu bytes. The last of %lu is:\n", modname, device_id, id_size, size / id_size); for (i = 0; i < id_size; i++ ) fprintf(stderr,"0x%02x ", *(uint8_t*)(symval+size-id_size+i) ); fprintf(stderr,"\n"); - fatal("%s: struct %s_device_id is not terminated " - "with a NULL entry!\n", modname, device_id); + fatal("%s: struct %s_device_id is not terminated with a NULL entry!\n", + modname, device_id); } } } @@ -1154,8 +1151,7 @@ static int do_amba_entry(const char *filename, DEF_FIELD(symval, amba_id, mask); if ((id & mask) != id) - fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: " - "id=0x%08X, mask=0x%08X. Please fix this driver.\n", + fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: id=0x%08X, mask=0x%08X. Please fix this driver.\n", filename, id, mask); p += sprintf(alias, "amba:d"); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2c80da0220c3..56d856f2e511 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -519,9 +519,8 @@ static int parse_elf(struct elf_info *info, const char *filename) int nobits = sechdrs[i].sh_type == SHT_NOBITS; if (!nobits && sechdrs[i].sh_offset > info->size) { - fatal("%s is truncated. sechdrs[i].sh_offset=%lu > " - "sizeof(*hrd)=%zu\n", filename, - (unsigned long)sechdrs[i].sh_offset, + fatal("%s is truncated. sechdrs[i].sh_offset=%lu > sizeof(*hrd)=%zu\n", + filename, (unsigned long)sechdrs[i].sh_offset, sizeof(*hdr)); return 0; } @@ -1355,8 +1354,7 @@ static void report_extable_warnings(const char* modname, struct elf_info* elf, get_pretty_name(is_function(tosym), &to_pretty_name, &to_pretty_name_p); - warn("%s(%s+0x%lx): Section mismatch in reference" - " from the %s %s%s to the %s %s:%s%s\n", + warn("%s(%s+0x%lx): Section mismatch in reference from the %s %s%s to the %s %s:%s%s\n", modname, fromsec, (long)r->r_offset, from_pretty_name, fromsym_name, from_pretty_name_p, to_pretty_name, tosec, tosym_name, to_pretty_name_p); From 9f8fe647797a4bc049bc7cceaf3a63584678ba04 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 10 Nov 2022 11:59:05 -0800 Subject: [PATCH 1910/4122] Makefile.debug: support for -gz=zstd Make DEBUG_INFO_COMPRESSED a choice; DEBUG_INFO_COMPRESSED_NONE is the default, DEBUG_INFO_COMPRESSED_ZLIB uses zlib, DEBUG_INFO_COMPRESSED_ZSTD uses zstd. This renames the existing KConfig option DEBUG_INFO_COMPRESSED to DEBUG_INFO_COMPRESSED_ZLIB so users upgrading may need to reset the new Kconfigs. Some quick N=1 measurements with du, /usr/bin/time -v, and bloaty: clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_NONE=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 0:55.43 488M vmlinux 27.6% 136Mi 0.0% 0 .debug_info 6.1% 30.2Mi 0.0% 0 .debug_str_offsets 3.5% 17.2Mi 0.0% 0 .debug_line 3.3% 16.3Mi 0.0% 0 .debug_loclists 0.9% 4.62Mi 0.0% 0 .debug_str clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZLIB=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 1:00.35 385M vmlinux 21.8% 85.4Mi 0.0% 0 .debug_info 2.1% 8.26Mi 0.0% 0 .debug_str_offsets 2.1% 8.24Mi 0.0% 0 .debug_loclists 1.9% 7.48Mi 0.0% 0 .debug_line 0.5% 1.94Mi 0.0% 0 .debug_str clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZSTD=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.69 373M vmlinux 21.4% 81.4Mi 0.0% 0 .debug_info 2.3% 8.85Mi 0.0% 0 .debug_loclists 1.5% 5.71Mi 0.0% 0 .debug_line 0.5% 1.95Mi 0.0% 0 .debug_str_offsets 0.4% 1.62Mi 0.0% 0 .debug_str That's only a 3.11% overall binary size savings over zlib, but at no performance regression. Link: https://maskray.me/blog/2022-09-09-zstd-compressed-debug-sections Link: https://maskray.me/blog/2022-01-23-compressed-debug-sections Suggested-by: Sedat Dilek (DHL Supply Chain) Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- lib/Kconfig.debug | 29 +++++++++++++++++++++++++++-- scripts/Makefile.debug | 6 +++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade3..d93dbe5a1d14 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -312,8 +312,21 @@ config DEBUG_INFO_REDUCED DEBUG_INFO build and compile times are reduced too. Only works with newer gcc versions. -config DEBUG_INFO_COMPRESSED - bool "Compressed debugging information" +choice + prompt "Compressed Debug information" + help + Compress the resulting debug info. Results in smaller debug info sections, + but requires that consumers are able to decompress the results. + + If unsure, choose DEBUG_INFO_COMPRESSED_NONE. + +config DEBUG_INFO_COMPRESSED_NONE + bool "Don't compress debug information" + help + Don't compress debug info sections. + +config DEBUG_INFO_COMPRESSED_ZLIB + bool "Compress debugging information with zlib" depends on $(cc-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help @@ -327,6 +340,18 @@ config DEBUG_INFO_COMPRESSED preferable to setting $KDEB_COMPRESS to "none" which would be even larger. +config DEBUG_INFO_COMPRESSED_ZSTD + bool "Compress debugging information with zstd" + depends on $(cc-option,-gz=zstd) + depends on $(ld-option,--compress-debug-sections=zstd) + help + Compress the debug information using zstd. This may provide better + compression than zlib, for about the same time costs, but requires newer + toolchain support. Requires GCC 13.0+ or Clang 16.0+, binutils 2.40+, and + zstd. + +endchoice # "Compressed Debug information" + config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" depends on $(cc-option,-gsplit-dwarf) diff --git a/scripts/Makefile.debug b/scripts/Makefile.debug index 332c486f705f..059ff38fe0cb 100644 --- a/scripts/Makefile.debug +++ b/scripts/Makefile.debug @@ -27,10 +27,14 @@ else DEBUG_RUSTFLAGS += -Cdebuginfo=2 endif -ifdef CONFIG_DEBUG_INFO_COMPRESSED +ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZLIB DEBUG_CFLAGS += -gz=zlib KBUILD_AFLAGS += -gz=zlib KBUILD_LDFLAGS += --compress-debug-sections=zlib +else ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZSTD +DEBUG_CFLAGS += -gz=zstd +KBUILD_AFLAGS += -gz=zstd +KBUILD_LDFLAGS += --compress-debug-sections=zstd endif KBUILD_CFLAGS += $(DEBUG_CFLAGS) From 30daacc571d1416f24abd4cc49910ff9322a8cf6 Mon Sep 17 00:00:00 2001 From: KaiLong Wang Date: Sun, 13 Nov 2022 17:29:50 +0800 Subject: [PATCH 1911/4122] modpost: fix array_size.cocci warning Fix following coccicheck warning: scripts/mod/sumversion.c:219:48-49: WARNING: Use ARRAY_SIZE scripts/mod/sumversion.c:156:48-49: WARNING: Use ARRAY_SIZE Signed-off-by: KaiLong Wang Signed-off-by: Masahiro Yamada --- scripts/mod/sumversion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca0968..31066bfdba04 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -153,7 +153,7 @@ static void md4_transform(uint32_t *hash, uint32_t const *in) static inline void md4_transform_helper(struct md4_ctx *ctx) { - le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(uint32_t)); + le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block)); md4_transform(ctx->hash, ctx->block); } @@ -216,7 +216,7 @@ static void md4_final_ascii(struct md4_ctx *mctx, char *out, unsigned int len) le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - sizeof(uint64_t)) / sizeof(uint32_t)); md4_transform(mctx->hash, mctx->block); - cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t)); + cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash)); snprintf(out, len, "%08X%08X%08X%08X", mctx->hash[0], mctx->hash[1], mctx->hash[2], mctx->hash[3]); From 1791360cb37ff5ef797afe9006cb315ebb7e969e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:41 +0900 Subject: [PATCH 1912/4122] kconfig: remove unneeded variable in get_prompt_str() The variable 'accessible' is redundant. Signed-off-by: Masahiro Yamada --- scripts/kconfig/menu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 109325f31bef..b90fff833588 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -724,10 +724,8 @@ static void get_prompt_str(struct gstr *r, struct property *prop, menu = prop->menu; for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { - bool accessible = menu_is_visible(menu); - submenu[i++] = menu; - if (location == NULL && accessible) + if (location == NULL && menu_is_visible(menu)) location = menu; } if (head && location) { From 4d980fd111237ab64705b982f61f284c2a7885e5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:42 +0900 Subject: [PATCH 1913/4122] kconfig: remove const qualifier from str_get() update_text() apparently edits the buffer returned by str_get(). (and there is no reason why it shouldn't) Remove 'const' quailifier and casting. Signed-off-by: Masahiro Yamada --- scripts/kconfig/lkc.h | 2 +- scripts/kconfig/mconf.c | 4 ++-- scripts/kconfig/util.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 6ac2eabe109d..e7118d62a45f 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -76,7 +76,7 @@ struct gstr str_new(void); void str_free(struct gstr *gs); void str_append(struct gstr *gs, const char *s); void str_printf(struct gstr *gs, const char *fmt, ...); -const char *str_get(struct gstr *gs); +char *str_get(struct gstr *gs); /* menu.c */ void _menu_init(void); diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 9d3cf510562f..d7f7e1bf7dd4 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -440,8 +440,8 @@ again: res = get_relations_str(sym_arr, &head); set_subtitle(); - dres = show_textbox_ext("Search Results", (char *) - str_get(&res), 0, 0, keys, &vscroll, + dres = show_textbox_ext("Search Results", str_get(&res), 0, 0, + keys, &vscroll, &hscroll, &update_text, (void *) &data); again = false; diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index 29585394df71..b78f114ad48c 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -74,7 +74,7 @@ void str_printf(struct gstr *gs, const char *fmt, ...) } /* Retrieve value of growable string */ -const char *str_get(struct gstr *gs) +char *str_get(struct gstr *gs) { return gs->s; } From be5ea98983efe2a2c5156c3b43e35a076d5b640d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:43 +0900 Subject: [PATCH 1914/4122] kconfig: remove redundant (void *) cast in search_conf() The (void *) cast is redundant because the last argument of show_textbox_ext() is an opaque pointer. Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index d7f7e1bf7dd4..9c549683c627 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -441,8 +441,7 @@ again: res = get_relations_str(sym_arr, &head); set_subtitle(); dres = show_textbox_ext("Search Results", str_get(&res), 0, 0, - keys, &vscroll, - &hscroll, &update_text, (void *) + keys, &vscroll, &hscroll, &update_text, &data); again = false; for (i = 0; i < JUMP_NB && keys[i]; i++) From f8f4dc7685c72c8ef86420566a38a4f786613851 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 15 Nov 2022 03:10:55 +0900 Subject: [PATCH 1915/4122] scripts/jobserver-exec: parse the last --jobserver-auth= option In the GNU Make manual, the section "Sharing Job Slots with GNU make" says: Be aware that the MAKEFLAGS variable may contain multiple instances of the --jobserver-auth= option. Only the last instance is relevant. Take the last element of the array, not the first. Link: https://www.gnu.org/software/make/manual/html_node/Job-Slots.html Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/jobserver-exec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 8762887a970c..4192855f5b8b 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -23,7 +23,9 @@ try: opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] # Parse out R,W file descriptor numbers and set them nonblocking. - fds = opts[0].split("=", 1)[1] + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] reader, writer = [int(x) for x in fds.split(",", 1)] # Open a private copy of reader to avoid setting nonblocking # on an unexpecting process with the same reader fd. From 5724ac5589ad93d35d95a845222f566175c681b8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Nov 2022 10:30:33 +0900 Subject: [PATCH 1916/4122] kbuild: deb-pkg: get rid of |flex:native workaround from Build-Depends "| flex:native" was a workaround (suggested by Ben, see Link) because "MultiArch: foreign" was missing in the flex package on some old distros when commit e3a22850664f ("deb-pkg: generate correct build dependencies") was applied. It seems fixing the flex package has been completed. Get rid of the workaround. Link: https://lore.kernel.org/linux-kbuild/ab49b0582ef12b14b1a68877263b81813e2492a2.camel@decadent.org.uk/ Link: https://wiki.debian.org/CrossBuildPackagingGuidelines Signed-off-by: Masahiro Yamada Reviewed-by: Ben Hutchings --- scripts/package/mkdebian | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index a3ac5a716e9f..6cf383225b8b 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -175,7 +175,7 @@ Section: kernel Priority: optional Maintainer: $maintainer Rules-Requires-Root: no -Build-Depends: bc, rsync, kmod, cpio, bison, flex | flex:native $extra_build_depends +Build-Depends: bc, rsync, kmod, cpio, bison, flex $extra_build_depends Homepage: https://www.kernel.org/ Package: $packagename-$version From 9e7726a8a08a65ed48e2749ef62ec4970bdf851f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:15 +0100 Subject: [PATCH 1917/4122] KVM: selftests: Hyper-V PV TLB flush selftest Introduce a selftest for Hyper-V PV TLB flush hypercalls (HvFlushVirtualAddressSpace/HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressList/HvFlushVirtualAddressListEx). The test creates one 'sender' vCPU and two 'worker' vCPU which do busy loop reading from a certain GVA checking the observed value. Sender vCPU swaos the data page with another page filled with a different value. The expectation for workers is also altered. Without TLB flush on worker vCPUs, they may continue to observe old value. To guard against accidental TLB flushes for worker vCPUs the test is repeated 100 times. Hyper-V TLB flush hypercalls are tested in both 'normal' and 'XMM fast' modes. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-38-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/hyperv.h | 1 + .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 690 ++++++++++++++++++ 4 files changed, 693 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 3b3218cb46ed..dc7e28cf2da0 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -27,6 +27,7 @@ /x86_64/hyperv_features /x86_64/hyperv_ipi /x86_64/hyperv_svm_test +/x86_64/hyperv_tlb_flush /x86_64/max_vcpuid_cap_test /x86_64/mmio_warning_test /x86_64/monitor_mwait_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4095b1212f08..058b15213c5d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -89,6 +89,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index c757e4001173..ae945f740835 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -187,6 +187,7 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 /* * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c new file mode 100644 index 000000000000..68f97ff720a7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define WORKER_VCPU_ID_1 2 +#define WORKER_VCPU_ID_2 65 + +#define NTRY 100 +#define NTEST_PAGES 2 + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +} __packed; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +} __packed; + +/* + * Pass the following info to 'workers' and 'sender' + * - Hypercall page's GVA + * - Hypercall page's GPA + * - Test pages GVA + * - GVAs of the test pages' PTEs + */ +struct test_data { + vm_vaddr_t hcall_gva; + vm_paddr_t hcall_gpa; + vm_vaddr_t test_pages; + vm_vaddr_t test_pages_pte[NTEST_PAGES]; +}; + +/* 'Worker' vCPU code checking the contents of the test page */ +static void worker_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES; + u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64)); + u64 expected, val; + + x2apic_enable(); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + + for (;;) { + cpu_relax(); + + expected = READ_ONCE(*this_cpu); + + /* + * Make sure the value in the test page is read after reading + * the expectation for the first time. Pairs with wmb() in + * prepare_to_test(). + */ + rmb(); + + val = READ_ONCE(*(u64 *)data->test_pages); + + /* + * Make sure the value in the test page is read after before + * reading the expectation for the second time. Pairs with wmb() + * post_test(). + */ + rmb(); + + /* + * '0' indicates the sender is between iterations, wait until + * the sender is ready for this vCPU to start checking again. + */ + if (!expected) + continue; + + /* + * Re-read the per-vCPU byte to ensure the sender didn't move + * onto a new iteration. + */ + if (expected != READ_ONCE(*this_cpu)) + continue; + + GUEST_ASSERT(val == expected); + } +} + +/* + * Write per-CPU info indicating what each 'worker' CPU is supposed to see in + * test page. '0' means don't check. + */ +static void set_expected_val(void *addr, u64 val, int vcpu_id) +{ + void *exp_page = addr + PAGE_SIZE * NTEST_PAGES; + + *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val; +} + +/* + * Update PTEs swapping two test pages. + * TODO: use swap()/xchg() when these are provided. + */ +static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +{ + uint64_t tmp = *(uint64_t *)pte_gva1; + + *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; + *(uint64_t *)pte_gva2 = tmp; +} + +/* + * TODO: replace the silly NOP loop with a proper udelay() implementation. + */ +static inline void do_delay(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +/* + * Prepare to test: 'disable' workers by setting the expectation to '0', + * clear hypercall input page and then swap two test pages. + */ +static inline void prepare_to_test(struct test_data *data) +{ + /* Clear hypercall input page */ + memset((void *)data->hcall_gva, 0, PAGE_SIZE); + + /* 'Disable' workers */ + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2); + + /* Make sure workers are 'disabled' before we swap PTEs. */ + wmb(); + + /* Make sure workers have enough time to notice */ + do_delay(); + + /* Swap test page mappings */ + swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]); +} + +/* + * Finalize the test: check hypercall resule set the expected val for + * 'worker' CPUs and give them some time to test. + */ +static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) +{ + /* Make sure we change the expectation after swapping PTEs */ + wmb(); + + /* Set the expectation for workers, '0' means don't test */ + set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2); + + /* Make sure workers have enough time to test */ + do_delay(); +} + +#define TESTVAL1 0x0101010101010101 +#define TESTVAL2 0x0202020202020202 + +/* Main vCPU doing the test */ +static void sender_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; + vm_paddr_t hcall_gpa = data->hcall_gpa; + int i, stage = 1; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa); + + /* "Slow" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->processor_mask = 0; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + /* "Fast" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : + TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT, + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + struct ucall uc; + int old; + int r; + unsigned int exit_reason; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + exit_reason = vcpu->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", + vcpu->id, exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + pthread_t threads[2]; + vm_vaddr_t test_data_page, gva; + vm_paddr_t gpa; + uint64_t *pte; + struct test_data *data; + struct ucall uc; + int stage = 1, r, i; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Test data page */ + test_data_page = vm_vaddr_alloc_page(vm); + data = (struct test_data *)addr_gva2hva(vm, test_data_page); + + /* Hypercall input/output */ + data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); + memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); + + /* + * Test pages: the first one is filled with '0x01's, the second with '0x02's + * and the test will swap their mappings. The third page keeps the indication + * about the current state of mappings. + */ + data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + for (i = 0; i < NTEST_PAGES; i++) + memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), + (u8)(i + 1), PAGE_SIZE); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2); + + /* + * Get PTE pointers for test pages and map them inside the guest. + * Use separate page for each PTE for simplicity. + */ + gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + for (i = 0; i < NTEST_PAGES; i++) { + pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + gpa = addr_hva2gpa(vm, pte); + __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); + } + + /* + * Sender vCPU which performs the test: swaps test pages, sets expectation + * for 'workers' and issues TLB flush hypercalls. + */ + vcpu_args_set(vcpu[0], 1, test_data_page); + vcpu_set_hv_cpuid(vcpu[0]); + + /* Create worker vCPUs which check the contents of the test pages */ + vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code); + vcpu_args_set(vcpu[1], 1, test_data_page); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code); + vcpu_args_set(vcpu[2], 1, test_data_page); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create() failed"); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create() failed"); + + while (true) { + vcpu_run(vcpu[0]); + exit_reason = vcpu[0]->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return 0; +} From 1ad51c0c0cdd5315405d1c93a345635451c245bb Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:16 +0100 Subject: [PATCH 1918/4122] KVM: selftests: Sync 'struct hv_enlightened_vmcs' definition with hyperv-tlfs.h 'struct hv_enlightened_vmcs' definition in selftests is not '__packed' and so we rely on the compiler doing the right padding. This is not obvious so it seems beneficial to use the same definition as in kernel. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-39-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/evmcs.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 58db74f68af2..4b6840df2979 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -41,6 +41,8 @@ struct hv_enlightened_vmcs { u16 host_gs_selector; u16 host_tr_selector; + u16 padding16_1; + u64 host_ia32_pat; u64 host_ia32_efer; @@ -159,7 +161,7 @@ struct hv_enlightened_vmcs { u64 ept_pointer; u16 virtual_processor_id; - u16 padding16[3]; + u16 padding16_2[3]; u64 padding64_2[5]; u64 guest_physical_address; @@ -195,13 +197,13 @@ struct hv_enlightened_vmcs { u64 guest_rip; u32 hv_clean_fields; - u32 hv_padding_32; + u32 padding32_1; u32 hv_synthetic_controls; struct { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; u32 reserved:30; - } hv_enlightenments_control; + } __packed hv_enlightenments_control; u32 hv_vp_id; u32 padding32_2; u64 hv_vm_id; @@ -222,7 +224,7 @@ struct hv_enlightened_vmcs { u64 host_ssp; u64 host_ia32_int_ssp_table_addr; u64 padding64_6; -}; +} __packed; #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) From d7b14a868ac2122459b2d702fe05c75c48a687bf Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:17 +0100 Subject: [PATCH 1919/4122] KVM: selftests: Sync 'struct hv_vp_assist_page' definition with hyperv-tlfs.h 'struct hv_vp_assist_page' definition doesn't match TLFS. Also, define 'struct hv_nested_enlightenments_control' and use it instead of opaque '__u64'. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-40-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/evmcs.h | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 4b6840df2979..efdc62704f27 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -20,14 +20,26 @@ extern bool enable_evmcs; +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ struct hv_vp_assist_page { __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; __u64 current_nested_vmcs; -}; +} __packed; struct hv_enlightened_vmcs { u32 revision_id; From e8f3d23c02d09210a980ef211b3e8a99d44cb602 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:18 +0100 Subject: [PATCH 1920/4122] KVM: selftests: Move Hyper-V VP assist page enablement out of evmcs.h Hyper-V VP assist page is not eVMCS specific, it is also used for enlightened nSVM. Move the code to vendor neutral place. Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-41-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/evmcs.h | 40 +------------------ .../selftests/kvm/include/x86_64/hyperv.h | 31 ++++++++++++++ .../testing/selftests/kvm/lib/x86_64/hyperv.c | 21 ++++++++++ .../testing/selftests/kvm/x86_64/evmcs_test.c | 1 + 5 files changed, 56 insertions(+), 38 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/x86_64/hyperv.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 058b15213c5d..246d52e9df60 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -53,6 +53,7 @@ LIBKVM_STRING += lib/string_override.c LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S +LIBKVM_x86_64 += lib/x86_64/hyperv.c LIBKVM_x86_64 += lib/x86_64/memstress.c LIBKVM_x86_64 += lib/x86_64/processor.c LIBKVM_x86_64 += lib/x86_64/svm.c diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index efdc62704f27..2530b5aeb4ba 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -10,6 +10,7 @@ #define SELFTEST_KVM_EVMCS_H #include +#include "hyperv.h" #include "vmx.h" #define u16 uint16_t @@ -20,27 +21,6 @@ extern bool enable_evmcs; -struct hv_nested_enlightenments_control { - struct { - __u32 directhypercall:1; - __u32 reserved:31; - } features; - struct { - __u32 reserved; - } hypercallControls; -} __packed; - -/* Define virtual processor assist page structure. */ -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved1; - __u64 vtl_control[3]; - struct hv_nested_enlightenments_control nested_control; - __u8 enlighten_vmentry; - __u8 reserved2[7]; - __u64 current_nested_vmcs; -} __packed; - struct hv_enlightened_vmcs { u32 revision_id; u32 abort; @@ -257,29 +237,13 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) - extern struct hv_enlightened_vmcs *current_evmcs; -extern struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); -static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +static inline void evmcs_enable(void) { - u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - enable_evmcs = true; - - return 0; } static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index ae945f740835..ba38fa347cba 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -237,4 +237,35 @@ static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) /* Proper HV_X64_MSR_GUEST_OS_ID value */ #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; + __u64 current_nested_vmcs; +} __packed; + +extern struct hv_vp_assist_page *current_vp_assist; + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c new file mode 100644 index 000000000000..32dc0afd9e5b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include +#include "processor.h" +#include "hyperv.h" + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 99bc202243d2..9007fb04343b 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -79,6 +79,7 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_SYNC(2); enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + evmcs_enable(); GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); From cd8f11bd6bbd00565cf39c6335cf2788795fdca7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:19 +0100 Subject: [PATCH 1921/4122] KVM: selftests: Split off load_evmcs() from load_vmcs() In preparation to putting Hyper-V specific test pages to a dedicated struct, move eVMCS load logic from load_vmcs(). Tests call load_vmcs() directly and the only one which needs 'enlightened' version is evmcs_test so there's not much gain in having this merged. Temporary pass both GPA and HVA to load_evmcs(). Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-42-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/evmcs.h | 10 +++++++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 28 +++++++------------ .../testing/selftests/kvm/x86_64/evmcs_test.c | 4 +-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 2530b5aeb4ba..59b60d45b8f6 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -256,6 +256,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) return 0; } +static inline bool load_evmcs(uint64_t enlightened_vmcs_gpa, void *enlightened_vmcs) +{ + if (evmcs_vmptrld(enlightened_vmcs_gpa, enlightened_vmcs)) + return false; + + current_evmcs->revision_id = EVMCS_VERSION; + + return true; +} + static inline int evmcs_vmptrst(uint64_t *value) { *value = current_vp_assist->current_nested_vmcs & diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 3e4ea846366c..318ee4658f0b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -171,26 +171,18 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { - if (!enable_evmcs) { - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; - if (vmptrld(vmx->vmcs_gpa)) - return false; + if (vmptrld(vmx->vmcs_gpa)) + return false; - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = - vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; - } else { - if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, - vmx->enlightened_vmcs)) - return false; - current_evmcs->revision_id = EVMCS_VERSION; - } + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; return true; } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 9007fb04343b..5a4c8b1873aa 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -81,10 +81,10 @@ void guest_code(struct vmx_pages *vmx_pages) enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); evmcs_enable(); - GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_SYNC(3); - GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(load_evmcs(vmx_pages->enlightened_vmcs_gpa, + vmx_pages->enlightened_vmcs)); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); GUEST_SYNC(4); From 2dc458b8622182ac4d89de793c548cf04f632801 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:20 +0100 Subject: [PATCH 1922/4122] KVM: selftests: Create a vendor independent helper to allocate Hyper-V specific test pages There's no need to pollute VMX and SVM code with Hyper-V specific stuff and allocate Hyper-V specific test pages for all test as only few really need them. Create a dedicated struct and an allocation helper. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-43-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/evmcs.h | 4 ++-- .../selftests/kvm/include/x86_64/hyperv.h | 15 +++++++++++++ .../selftests/kvm/include/x86_64/vmx.h | 8 ------- .../testing/selftests/kvm/lib/x86_64/hyperv.c | 20 +++++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 12 ---------- .../testing/selftests/kvm/x86_64/evmcs_test.c | 22 +++++++++---------- 6 files changed, 48 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 59b60d45b8f6..94d6059e9a12 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -256,9 +256,9 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) return 0; } -static inline bool load_evmcs(uint64_t enlightened_vmcs_gpa, void *enlightened_vmcs) +static inline bool load_evmcs(struct hyperv_test_pages *hv) { - if (evmcs_vmptrld(enlightened_vmcs_gpa, enlightened_vmcs)) + if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs)) return false; current_evmcs->revision_id = EVMCS_VERSION; diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index ba38fa347cba..becdd8245e84 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -268,4 +268,19 @@ extern struct hv_vp_assist_page *current_vp_assist; int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); +struct hyperv_test_pages { + /* VP assist page */ + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + /* Enlightened VMCS */ + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; +}; + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index e9c96b49966a..ef784bd6dfc2 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -517,14 +517,6 @@ struct vmx_pages { uint64_t vmwrite_gpa; void *vmwrite; - void *vp_assist_hva; - uint64_t vp_assist_gpa; - void *vp_assist; - - void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; - void *enlightened_vmcs; - void *eptp_hva; uint64_t eptp_gpa; void *eptp; diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index 32dc0afd9e5b..a2fc083c65ef 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -8,6 +8,26 @@ #include "processor.h" #include "hyperv.h" +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) { uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 318ee4658f0b..59d97531c9b1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -109,18 +109,6 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); - /* Setup of a region of guest memory for the VP Assist page. */ - vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm); - vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); - vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - vmx->enlightened_vmcs_hva = - addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); - vmx->enlightened_vmcs_gpa = - addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); - *p_vmx_gva = vmx_gva; return vmx; } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 5a4c8b1873aa..74f076ba574b 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -68,7 +68,7 @@ void l2_guest_code(void) vmcall(); } -void guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -78,23 +78,22 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_SYNC(1); GUEST_SYNC(2); - enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); evmcs_enable(); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_SYNC(3); - GUEST_ASSERT(load_evmcs(vmx_pages->enlightened_vmcs_gpa, - vmx_pages->enlightened_vmcs)); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(load_evmcs(hv_pages)); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(5); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); current_evmcs->revision_id = -1u; GUEST_ASSERT(vmlaunch()); current_evmcs->revision_id = EVMCS_VERSION; @@ -104,7 +103,7 @@ void guest_code(struct vmx_pages *vmx_pages) PIN_BASED_NMI_EXITING); GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); /* * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is @@ -152,7 +151,7 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ - evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); + evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(ud_count == 1); GUEST_DONE(); @@ -199,7 +198,7 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -217,7 +216,8 @@ int main(int argc, char *argv[]) vcpu_enable_evmcs(vcpu); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); From 6c15c3c46520374e1a144942e5228f963f5eb2d5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:21 +0100 Subject: [PATCH 1923/4122] KVM: selftests: Allocate Hyper-V partition assist page In preparation to testing Hyper-V L2 TLB flush hypercalls, allocate so-called Partition assist page. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-44-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/hyperv.h | 5 +++++ tools/testing/selftests/kvm/lib/x86_64/hyperv.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index becdd8245e84..9218bb5f44bf 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -274,6 +274,11 @@ struct hyperv_test_pages { uint64_t vp_assist_gpa; void *vp_assist; + /* Partition assist page */ + void *partition_assist_hva; + uint64_t partition_assist_gpa; + void *partition_assist; + /* Enlightened VMCS */ void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index a2fc083c65ef..efb7e7a1354d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -19,6 +19,11 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + /* Setup of a region of guest memory for the enlightened VMCS. */ hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); From 8fda37cf3d41f1dfb0667c4b10e3dd01d17735b8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:22 +0100 Subject: [PATCH 1924/4122] KVM: selftests: Stuff RAX/RCX with 'safe' values in vmmcall()/vmcall() vmmcall()/vmcall() are used to exit from L2 to L1 and no concrete hypercall ABI is currenty followed. With the introduction of Hyper-V L2 TLB flush it becomes (theoretically) possible that L0 will take responsibility for handling the call and no L1 exit will happen. Prevent this by stuffing RAX (KVM ABI) and RCX (Hyper-V ABI) with 'safe' values. While on it, convert vmmcall() to 'static inline', make it setup stack frame and move to include/x86_64/svm_util.h. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-45-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 5 ----- .../selftests/kvm/include/x86_64/svm_util.h | 14 ++++++++++++++ tools/testing/selftests/kvm/include/x86_64/vmx.h | 15 ++++++++++----- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a10f39affa45..5d310abe6c3f 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -677,11 +677,6 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } -#define vmmcall() \ - __asm__ __volatile__( \ - "vmmcall\n" \ - ) - #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index 7aee6244ab6a..044f0f872ba9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -32,6 +32,20 @@ struct svm_test_data { uint64_t msr_gpa; }; +static inline void vmmcall(void) +{ + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + #define stgi() \ __asm__ __volatile__( \ "stgi\n" \ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index ef784bd6dfc2..5f0c0a29c556 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -437,11 +437,16 @@ static inline int vmresume(void) static inline void vmcall(void) { - /* Currently, L1 destroys our GPRs during vmexits. */ - __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } static inline int vmread(uint64_t encoding, uint64_t *value) From 75ee7505feae16bbfbed62115e04f762047c4765 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:23 +0100 Subject: [PATCH 1925/4122] KVM: selftests: Introduce rdmsr_from_l2() and use it for MSR-Bitmap tests Hyper-V MSR-Bitmap tests do RDMSR from L2 to exit to L1. While 'evmcs_test' correctly clobbers all GPRs (which are not preserved), 'hyperv_svm_test' does not. Introduce a more generic rdmsr_from_l2() to avoid code duplication and remove hardcoding of MSRs. Do not put it in common code because it is really just a selftests bug rather than a processor feature that requires it. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-46-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/x86_64/evmcs_test.c | 27 +++++++------------ .../selftests/kvm/x86_64/hyperv_svm_test.c | 17 +++++++++--- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 74f076ba574b..58fa98512c24 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -30,22 +30,15 @@ static void guest_nmi_handler(struct ex_regs *regs) { } -/* Exits to L1 destroy GRPs! */ -static inline void rdmsr_fs_base(void) +static inline void rdmsr_from_l2(uint32_t msr) { - __asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); -} -static inline void rdmsr_gs_base(void) -{ - __asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } +/* Exit to L1 from L2 with RDMSR instruction */ void l2_guest_code(void) { GUEST_SYNC(7); @@ -58,11 +51,11 @@ void l2_guest_code(void) vmcall(); /* MSR-Bitmap tests */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_gs_base(); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmcall(); - rdmsr_gs_base(); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ /* Done, exit to L1 and never come back. */ vmcall(); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 1c3fc38b4f15..3c9a2a1b4cfd 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -23,6 +23,15 @@ #define L2_GUEST_STACK_SIZE 256 +/* Exit to L1 from L2 with RDMSR instruction */ +static inline void rdmsr_from_l2(uint32_t msr) +{ + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + void l2_guest_code(void) { GUEST_SYNC(3); @@ -30,11 +39,11 @@ void l2_guest_code(void) vmmcall(); /* MSR-Bitmap tests */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_GS_BASE); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmmcall(); - rdmsr(MSR_GS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ GUEST_SYNC(5); From 4b5d8b222bf185bda25b56de403afde7b6d3c466 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:24 +0100 Subject: [PATCH 1926/4122] KVM: selftests: evmcs_test: Introduce L2 TLB flush test Enable Hyper-V L2 TLB flush and check that Hyper-V TLB flush hypercalls from L2 don't exit to L1 unless 'TlbLockCount' is set in the Partition assist page. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-47-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/evmcs.h | 2 + .../testing/selftests/kvm/x86_64/evmcs_test.c | 50 ++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 94d6059e9a12..901caf0e0939 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -237,6 +237,8 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 + extern struct hv_enlightened_vmcs *current_evmcs; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 58fa98512c24..ba09d300c953 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -16,6 +16,7 @@ #include "kvm_util.h" +#include "hyperv.h" #include "vmx.h" static int ud_count; @@ -41,6 +42,8 @@ static inline void rdmsr_from_l2(uint32_t msr) /* Exit to L1 from L2 with RDMSR instruction */ void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(7); GUEST_SYNC(8); @@ -57,15 +60,31 @@ void l2_guest_code(void) vmcall(); rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, + &unused); + /* Done, exit to L1 and never come back. */ vmcall(); } -void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages) +void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, + vm_vaddr_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); + x2apic_enable(); GUEST_SYNC(1); @@ -95,7 +114,17 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages) vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | PIN_BASED_NMI_EXITING); + /* L2 TLB flush setup */ + current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; + current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + current_evmcs->hv_vm_id = 1; + current_evmcs->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); + GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); /* @@ -139,6 +168,18 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages) GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); + GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_SYNC(11); @@ -192,6 +233,7 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -205,12 +247,16 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + vcpu_set_hv_cpuid(vcpu); vcpu_enable_evmcs(vcpu); vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); - vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva); + vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); From 9c2e881945dca4904e8817acf4f0a928570bd400 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:25 +0100 Subject: [PATCH 1927/4122] KVM: selftests: hyperv_svm_test: Introduce L2 TLB flush test Enable Hyper-V L2 TLB flush and check that Hyper-V TLB flush hypercalls from L2 don't exit to L1 unless 'TlbLockCount' is set in the Partition assist page. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-48-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/svm.h | 4 ++ .../selftests/kvm/x86_64/hyperv_svm_test.c | 59 +++++++++++++++++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index 483e6ae12f69..4803e1056055 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -76,6 +76,10 @@ struct hv_vmcb_enlightenments { */ #define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) + struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_cr; u32 intercept_dr; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 3c9a2a1b4cfd..3b3cc94ba8e4 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -34,6 +34,8 @@ static inline void rdmsr_from_l2(uint32_t msr) void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(3); /* Exit to L1 */ vmmcall(); @@ -47,11 +49,28 @@ void l2_guest_code(void) GUEST_SYNC(5); + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS, &unused); + /* Done, exit to L1 and never come back. */ vmmcall(); } -static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) +static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, + struct hyperv_test_pages *hv_pages, + vm_vaddr_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; @@ -59,13 +78,23 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) GUEST_SYNC(1); - wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); GUEST_ASSERT(svm->vmcb_gpa); /* Prepare for L2 execution. */ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + /* L2 TLB flush setup */ + hve->partition_assist_page = hv_pages->partition_assist_gpa; + hve->hv_enlightenments_control.nested_flush_hypercall = 1; + hve->hv_vm_id = 1; + hve->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_SYNC(2); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); @@ -100,6 +129,20 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ + + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL); + GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH); + run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); GUEST_SYNC(6); @@ -109,8 +152,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; - + vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; @@ -124,7 +167,13 @@ int main(int argc, char *argv[]) vcpu_set_hv_cpuid(vcpu); run = vcpu->run; vcpu_alloc_svm(vm, &nested_gva); - vcpu_args_set(vcpu, 1, nested_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + + vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); for (stage = 1;; stage++) { vcpu_run(vcpu); From 0fa32dad1e78629cb42999dacd82489503fdf4c2 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 1 Nov 2022 15:54:26 +0100 Subject: [PATCH 1928/4122] KVM: selftests: Rename 'evmcs_test' to 'hyperv_evmcs' Conform to the rest of Hyper-V emulation selftests which have 'hyperv' prefix. Get rid of '_test' suffix as well as the purpose of this code is fairly obvious. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20221101145426.251680-49-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 2 +- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/x86_64/{evmcs_test.c => hyperv_evmcs.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename tools/testing/selftests/kvm/x86_64/{evmcs_test.c => hyperv_evmcs.c} (100%) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index dc7e28cf2da0..082855d94c72 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -16,7 +16,6 @@ /x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs -/x86_64/evmcs_test /x86_64/exit_on_emulation_failure_test /x86_64/fix_hypercall_test /x86_64/get_msr_index_features @@ -24,6 +23,7 @@ /x86_64/kvm_pv_test /x86_64/hyperv_clock /x86_64/hyperv_cpuid +/x86_64/hyperv_evmcs /x86_64/hyperv_features /x86_64/hyperv_ipi /x86_64/hyperv_svm_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 246d52e9df60..2275ba861e0e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -82,11 +82,11 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features -TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c similarity index 100% rename from tools/testing/selftests/kvm/x86_64/evmcs_test.c rename to tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c From 28b4b0596343d19d140da059eee0e5c2b5328731 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 17 Nov 2022 13:02:56 -0800 Subject: [PATCH 1929/4122] xfs: fix incorrect i_nlink caused by inode racing The following error occurred during the fsstress test: XFS: Assertion failed: VFS_I(ip)->i_nlink >= 2, file: fs/xfs/xfs_inode.c, line: 2452 The problem was that inode race condition causes incorrect i_nlink to be written to disk, and then it is read into memory. Consider the following call graph, inodes that are marked as both XFS_IFLUSHING and XFS_IRECLAIMABLE, i_nlink will be reset to 1 and then restored to original value in xfs_reinit_inode(). Therefore, the i_nlink of directory on disk may be set to 1. xfsaild xfs_inode_item_push xfs_iflush_cluster xfs_iflush xfs_inode_to_disk xfs_iget xfs_iget_cache_hit xfs_iget_recycle xfs_reinit_inode inode_init_always xfs_reinit_inode() needs to hold the ILOCK_EXCL as it is changing internal inode state and can race with other RCU protected inode lookups. On the read side, xfs_iflush_cluster() grabs the ILOCK_SHARED while under rcu + ip->i_flags_lock, and so xfs_iflush/xfs_inode_to_disk() are protected from racing inode updates (during transactions) by that lock. Fixes: ff7bebeb91f8 ("xfs: refactor the inode recycling code") # goes further back than this Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index eae7427062cf..f35e2cee5265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -342,6 +342,9 @@ xfs_iget_recycle( trace_xfs_iget_recycle(ip); + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return -EAGAIN; + /* * We need to make it look like the inode is being reclaimed to prevent * the actual reclaim workers from stomping over us while we recycle @@ -355,6 +358,7 @@ xfs_iget_recycle( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); error = xfs_reinit_inode(mp, inode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -518,6 +522,8 @@ xfs_iget_cache_hit( if (ip->i_flags & XFS_IRECLAIMABLE) { /* Drops i_flags_lock and RCU read lock. */ error = xfs_iget_recycle(pag, ip); + if (error == -EAGAIN) + goto out_skip; if (error) return error; } else { From 2a402120a8d413238999a67ebff5b7dca0e5d14c Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 16 Nov 2022 10:45:35 +0100 Subject: [PATCH 1930/4122] IB/isert: use the ISCSI_LOGIN_CURRENT_STAGE macro Use the proper macro to get the current_stage value. Link: https://lore.kernel.org/r/20221116094535.138298-1-mlombard@redhat.com Signed-off-by: Maurizio Lombardi Reviewed-by: Mike Christie Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b360a1527cd1..75404885cf98 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -993,9 +993,8 @@ isert_rx_login_req(struct isert_conn *isert_conn) * login request PDU. */ login->leading_connection = (!login_req->tsih) ? 1 : 0; - login->current_stage = - (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) - >> 2; + login->current_stage = ISCSI_LOGIN_CURRENT_STAGE( + login_req->flags); login->version_min = login_req->min_version; login->version_max = login_req->max_version; memcpy(login->isid, login_req->isid, 6); From 5e5ff73c2e5863f93fc5fd78d178cd8f2af12464 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Mon, 17 Oct 2022 20:04:50 +0530 Subject: [PATCH 1931/4122] asm-generic/io: Add _RET_IP_ to MMIO trace for more accurate debug info Due to compiler optimizations like inlining, there are cases where MMIO traces using _THIS_IP_ for caller information might not be sufficient to provide accurate debug traces. 1) With optimizations (Seen with GCC): In this case, _THIS_IP_ works fine and prints the caller information since it will be inlined into the caller and we get the debug traces on who made the MMIO access, for ex: rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 addr=0xffff8000087447f4 rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 val=0x0 addr=0xffff8000087447f4 2) Without optimizations (Seen with Clang): _THIS_IP_ will not be sufficient in this case as it will print only the MMIO accessors itself which is of not much use since it is not inlined as below for example: rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4 So in order to handle this second case as well irrespective of the compiler optimizations, add _RET_IP_ to MMIO trace to make it provide more accurate debug information in all these scenarios. Before: rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4 After: rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 val=0x0 addr=0xffff8000087447f4 Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors") Signed-off-by: Sai Prakash Ranjan Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 80 +++++++++++++++++------------------ include/trace/events/rwmmio.h | 43 ++++++++++++------- lib/trace_readwrite.c | 16 +++---- 3 files changed, 75 insertions(+), 64 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index a68f8fbf423b..4c44a29b5e8e 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -80,24 +80,24 @@ DECLARE_TRACEPOINT(rwmmio_read); DECLARE_TRACEPOINT(rwmmio_post_read); void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); #else static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} #endif /* CONFIG_TRACE_MMIO_ACCESS */ @@ -188,11 +188,11 @@ static inline u8 readb(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_); + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __raw_readb(addr); __io_ar(val); - log_post_read_mmio(val, 8, addr, _THIS_IP_); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -203,11 +203,11 @@ static inline u16 readw(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_); + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); __io_ar(val); - log_post_read_mmio(val, 16, addr, _THIS_IP_); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -218,11 +218,11 @@ static inline u32 readl(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_); + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); __io_ar(val); - log_post_read_mmio(val, 32, addr, _THIS_IP_); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -234,11 +234,11 @@ static inline u64 readq(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_); + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le64_to_cpu(__raw_readq(addr)); __io_ar(val); - log_post_read_mmio(val, 64, addr, _THIS_IP_); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -248,11 +248,11 @@ static inline u64 readq(const volatile void __iomem *addr) #define writeb writeb static inline void writeb(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_); + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeb(value, addr); __io_aw(); - log_post_write_mmio(value, 8, addr, _THIS_IP_); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -260,11 +260,11 @@ static inline void writeb(u8 value, volatile void __iomem *addr) #define writew writew static inline void writew(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_); + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writew((u16 __force)cpu_to_le16(value), addr); __io_aw(); - log_post_write_mmio(value, 16, addr, _THIS_IP_); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -272,11 +272,11 @@ static inline void writew(u16 value, volatile void __iomem *addr) #define writel writel static inline void writel(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_); + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writel((u32 __force)__cpu_to_le32(value), addr); __io_aw(); - log_post_write_mmio(value, 32, addr, _THIS_IP_); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -285,11 +285,11 @@ static inline void writel(u32 value, volatile void __iomem *addr) #define writeq writeq static inline void writeq(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_); + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeq(__cpu_to_le64(value), addr); __io_aw(); - log_post_write_mmio(value, 64, addr, _THIS_IP_); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif #endif /* CONFIG_64BIT */ @@ -305,9 +305,9 @@ static inline u8 readb_relaxed(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_); + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); val = __raw_readb(addr); - log_post_read_mmio(val, 8, addr, _THIS_IP_); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -318,9 +318,9 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_); + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); val = __le16_to_cpu(__raw_readw(addr)); - log_post_read_mmio(val, 16, addr, _THIS_IP_); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -331,9 +331,9 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_); + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); val = __le32_to_cpu(__raw_readl(addr)); - log_post_read_mmio(val, 32, addr, _THIS_IP_); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -344,9 +344,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_); + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); val = __le64_to_cpu(__raw_readq(addr)); - log_post_read_mmio(val, 64, addr, _THIS_IP_); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -355,9 +355,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) #define writeb_relaxed writeb_relaxed static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_); + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __raw_writeb(value, addr); - log_post_write_mmio(value, 8, addr, _THIS_IP_); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -365,9 +365,9 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) #define writew_relaxed writew_relaxed static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_); + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __raw_writew(cpu_to_le16(value), addr); - log_post_write_mmio(value, 16, addr, _THIS_IP_); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -375,9 +375,9 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) #define writel_relaxed writel_relaxed static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_); + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __raw_writel(__cpu_to_le32(value), addr); - log_post_write_mmio(value, 32, addr, _THIS_IP_); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -385,9 +385,9 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) #define writeq_relaxed writeq_relaxed static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_); + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __raw_writeq(__cpu_to_le64(value), addr); - log_post_write_mmio(value, 64, addr, _THIS_IP_); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif diff --git a/include/trace/events/rwmmio.h b/include/trace/events/rwmmio.h index de41159216c1..a43e5dd7436b 100644 --- a/include/trace/events/rwmmio.h +++ b/include/trace/events/rwmmio.h @@ -12,12 +12,14 @@ DECLARE_EVENT_CLASS(rwmmio_rw_template, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr), + TP_ARGS(caller, caller0, val, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u64, val) __field(u8, width) @@ -25,56 +27,64 @@ DECLARE_EVENT_CLASS(rwmmio_rw_template, TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->val = val; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d val=%#llx addr=%#lx", - (void *)__entry->caller, __entry->width, + TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->val, __entry->addr) ); DEFINE_EVENT(rwmmio_rw_template, rwmmio_write, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr) + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), + TP_ARGS(caller, caller0, val, width, addr) ); DEFINE_EVENT(rwmmio_rw_template, rwmmio_post_write, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr) + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), + TP_ARGS(caller, caller0, val, width, addr) ); TRACE_EVENT(rwmmio_read, - TP_PROTO(unsigned long caller, u8 width, const volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u8 width, + const volatile void __iomem *addr), - TP_ARGS(caller, width, addr), + TP_ARGS(caller, caller0, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u8, width) ), TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d addr=%#lx", - (void *)__entry->caller, __entry->width, __entry->addr) + TP_printk("%pS -> %pS width=%d addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->addr) ); TRACE_EVENT(rwmmio_post_read, - TP_PROTO(unsigned long caller, u64 val, u8 width, const volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + const volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr), + TP_ARGS(caller, caller0, val, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u64, val) __field(u8, width) @@ -82,13 +92,14 @@ TRACE_EVENT(rwmmio_post_read, TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->val = val; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d val=%#llx addr=%#lx", - (void *)__entry->caller, __entry->width, + TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->val, __entry->addr) ); diff --git a/lib/trace_readwrite.c b/lib/trace_readwrite.c index 88637038b30c..62b4e8b3c733 100644 --- a/lib/trace_readwrite.c +++ b/lib/trace_readwrite.c @@ -14,33 +14,33 @@ #ifdef CONFIG_TRACE_MMIO_ACCESS void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_write(caller_addr, val, width, addr); + trace_rwmmio_write(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_write_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_write); void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_post_write(caller_addr, val, width, addr); + trace_rwmmio_post_write(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_post_write_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_write); void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_read(caller_addr, width, addr); + trace_rwmmio_read(caller_addr, caller_addr0, width, addr); } EXPORT_SYMBOL_GPL(log_read_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_read); void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_post_read(caller_addr, val, width, addr); + trace_rwmmio_post_read(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_post_read_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_read); From 2d9cd957d40c3ac491b358e7cff0515bb07a3a9c Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Mon, 21 Nov 2022 10:00:29 +0800 Subject: [PATCH 1932/4122] PCI: Check for alloc failure in pci_request_irq() When kvasprintf() fails to allocate memory, it returns a NULL pointer. Return error from pci_request_irq() so we don't dereference it. [bhelgaas: commit log] Fixes: 704e8953d3e9 ("PCI/irq: Add pci_request_irq() and pci_free_irq() helpers") Link: https://lore.kernel.org/r/20221121020029.3759444-1-zengheng4@huawei.com Signed-off-by: Zeng Heng Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig --- drivers/pci/irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index 12ecd0aaa28d..0050e8f6814e 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, va_start(ap, fmt); devname = kvasprintf(GFP_KERNEL, fmt, ap); va_end(ap); + if (!devname) + return -ENOMEM; ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn, irqflags, devname, dev_id); From 9b51d072da1d27e1193e84708201c48e385ad912 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 17 Nov 2022 21:15:46 +0800 Subject: [PATCH 1933/4122] RDMA/hfi: Decrease PCI device reference count in error path pci_get_device() will increase the reference count for the returned pci_dev, and also decrease the reference count for the input parameter *from* if it is not NULL. If we break out the loop in node_affinity_init() with 'dev' not NULL, we need to call pci_dev_put() to decrease the reference count. Add missing pci_dev_put() in error path. Fixes: c513de490f80 ("IB/hfi1: Invalid NUMA node information can cause a divide by zero") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221117131546.113280-1-wangxiongfeng2@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/affinity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 877f8e84a672..77ee77d4000f 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -177,6 +177,8 @@ out: for (node = 0; node < node_affinity.num_possible_nodes; node++) hfi1_per_node_cntr[node] = 1; + pci_dev_put(dev); + return 0; } From 8e96729fc26c8967db45a3fb7a60387619f77a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 21 Nov 2022 18:22:36 +0100 Subject: [PATCH 1934/4122] crypto: ccree - Make cc_debugfs_global_fini() available for module init function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ccree_init() calls cc_debugfs_global_fini(), the former is an init function and the latter an exit function though. A modular build emits: WARNING: modpost: drivers/crypto/ccree/ccree.o: section mismatch in reference: init_module (section: .init.text) -> cc_debugfs_global_fini (section: .exit.text) (with CONFIG_DEBUG_SECTION_MISMATCH=y). Fixes: 4f1c596df706 ("crypto: ccree - Remove debugfs when platform_driver_register failed") Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 7083767602fc..8f008f024f8f 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void) cc_debugfs_dir = debugfs_create_dir("ccree", NULL); } -void __exit cc_debugfs_global_fini(void) +void cc_debugfs_global_fini(void) { debugfs_remove(cc_debugfs_dir); } From 357057ee55d3c99a5de5abe8150f7bca04f8e53b Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 17 Nov 2022 11:59:14 +0800 Subject: [PATCH 1935/4122] staging: vme_user: Fix possible UAF in tsi148_dma_list_add Smatch report warning as follows: drivers/staging/vme_user/vme_tsi148.c:1757 tsi148_dma_list_add() warn: '&entry->list' not removed from list In tsi148_dma_list_add(), the error path "goto err_dma" will not remove entry->list from list->entries, but entry will be freed, then list traversal may cause UAF. Fix by removeing it from list->entries before free(). Fixes: b2383c90a9d6 ("vme: tsi148: fix first DMA item mapping") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221117035914.2954454-1-cuigaosheng1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme_tsi148.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vme_user/vme_tsi148.c b/drivers/staging/vme_user/vme_tsi148.c index 020e0b3bce64..0171f46d1848 100644 --- a/drivers/staging/vme_user/vme_tsi148.c +++ b/drivers/staging/vme_user/vme_tsi148.c @@ -1751,6 +1751,7 @@ static int tsi148_dma_list_add(struct vme_dma_list *list, return 0; err_dma: + list_del(&entry->list); err_dest: err_source: err_align: From 2b7962bd05163f5b20fb5f933092b997debf8ed6 Mon Sep 17 00:00:00 2001 From: Brent Pappas Date: Thu, 17 Nov 2022 14:54:43 -0500 Subject: [PATCH 1936/4122] staging: gdm724x: Replace macro GDM_TTY_READY with static inline function Replace the macro GDM_TTY_READY with a static inline function to follow the Linux kernel coding style. Signed-off-by: Brent Pappas Link: https://lore.kernel.org/r/20221117195443.19616-1-bpappas@pappasbrent.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gdm724x/gdm_tty.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/staging/gdm724x/gdm_tty.c b/drivers/staging/gdm724x/gdm_tty.c index cc6d80554c98..e1a84d6020f4 100644 --- a/drivers/staging/gdm724x/gdm_tty.c +++ b/drivers/staging/gdm724x/gdm_tty.c @@ -21,7 +21,10 @@ #define MUX_TX_MAX_SIZE 2048 -#define GDM_TTY_READY(gdm) (gdm && gdm->tty_dev && gdm->port.count) +static inline bool gdm_tty_ready(struct gdm *gdm) +{ + return gdm && gdm->tty_dev && gdm->port.count; +} static struct tty_driver *gdm_driver[TTY_MAX_COUNT]; static struct gdm *gdm_table[TTY_MAX_COUNT][GDM_TTY_MINOR]; @@ -113,7 +116,7 @@ static int gdm_tty_recv_complete(void *data, { struct gdm *gdm = tty_dev->gdm[index]; - if (!GDM_TTY_READY(gdm)) { + if (!gdm_tty_ready(gdm)) { if (complete == RECV_PACKET_PROCESS_COMPLETE) gdm->tty_dev->recv_func(gdm->tty_dev->priv_dev, gdm_tty_recv_complete); @@ -140,7 +143,7 @@ static void gdm_tty_send_complete(void *arg) { struct gdm *gdm = arg; - if (!GDM_TTY_READY(gdm)) + if (!gdm_tty_ready(gdm)) return; tty_port_tty_wakeup(&gdm->port); @@ -154,7 +157,7 @@ static int gdm_tty_write(struct tty_struct *tty, const unsigned char *buf, int sent_len = 0; int sending_len = 0; - if (!GDM_TTY_READY(gdm)) + if (!gdm_tty_ready(gdm)) return -ENODEV; if (!len) @@ -181,7 +184,7 @@ static unsigned int gdm_tty_write_room(struct tty_struct *tty) { struct gdm *gdm = tty->driver_data; - if (!GDM_TTY_READY(gdm)) + if (!gdm_tty_ready(gdm)) return 0; return WRITE_SIZE; From 733611730676de202fade0cb73792c17d5aa9903 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Fri, 18 Nov 2022 14:12:42 +0530 Subject: [PATCH 1937/4122] Revert "staging: mmal-vchiq: Avoid use of bool in structures" This reverts commit 640e77466e69d9c28de227bc76881f5501f532ca. In commit 7967656ffbfa ("coding-style: Clarify the expectations around bool") the check to dis-allow bool structure members was removed from checkpatch.pl. It promotes bool structure members to store boolean values. This enhances code readability. Signed-off-by: Umang Jain Reviewed-by: Dave Stevenson Reviewed-by: Kieran Bingham Link: https://lore.kernel.org/r/20221118084244.199909-2-umang.jain@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 12 ++++++------ .../staging/vc04_services/vchiq-mmal/mmal-vchiq.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index cb921c94996a..4abb6178cb9f 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -863,9 +863,9 @@ static int port_info_get(struct vchiq_mmal_instance *instance, goto release_msg; if (rmsg->u.port_info_get_reply.port.is_enabled == 0) - port->enabled = 0; + port->enabled = false; else - port->enabled = 1; + port->enabled = true; /* copy the values out of the message */ port->handle = rmsg->u.port_info_get_reply.port_handle; @@ -1304,7 +1304,7 @@ static int port_disable(struct vchiq_mmal_instance *instance, if (!port->enabled) return 0; - port->enabled = 0; + port->enabled = false; ret = port_action_port(instance, port, MMAL_MSG_PORT_ACTION_TYPE_DISABLE); @@ -1359,7 +1359,7 @@ static int port_enable(struct vchiq_mmal_instance *instance, if (ret) goto done; - port->enabled = 1; + port->enabled = true; if (port->buffer_cb) { /* send buffer headers to videocore */ @@ -1531,7 +1531,7 @@ int vchiq_mmal_port_connect_tunnel(struct vchiq_mmal_instance *instance, pr_err("failed disconnecting src port\n"); goto release_unlock; } - src->connected->enabled = 0; + src->connected->enabled = false; src->connected = NULL; } @@ -1799,7 +1799,7 @@ int vchiq_mmal_component_disable(struct vchiq_mmal_instance *instance, ret = disable_component(instance, component); if (ret == 0) - component->enabled = 0; + component->enabled = false; mutex_unlock(&instance->vchiq_mutex); diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h index 6006e29232b3..6d984cf5a83a 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h @@ -48,7 +48,7 @@ typedef void (*vchiq_mmal_buffer_cb)( int status, struct mmal_buffer *buffer); struct vchiq_mmal_port { - u32 enabled:1; + bool enabled; u32 handle; u32 type; /* port type, cached to use on port info set */ u32 index; /* port index, cached to use on port info set */ @@ -83,7 +83,7 @@ struct vchiq_mmal_port { struct vchiq_mmal_component { u32 in_use:1; - u32 enabled:1; + bool enabled; u32 handle; /* VideoCore handle for component */ u32 inputs; /* Number of input ports */ u32 outputs; /* Number of output ports */ From c0012a39cf6c7197ad93da0bdba7245c094f8469 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Fri, 18 Nov 2022 14:12:43 +0530 Subject: [PATCH 1938/4122] vc04_services: mmal-vchiq: Use bool for vchiq_mmal_component.in_use In commit 7967656ffbfa ("coding-style: Clarify the expectations around bool") the check to dis-allow bool structure members was removed from checkpatch.pl. It promotes bool structure members to store boolean values. This enhances code readability. Signed-off-by: Umang Jain Reviewed-by: Kieran Bingham Reviewed-by: Dave Stevenson Link: https://lore.kernel.org/r/20221118084244.199909-3-umang.jain@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 6 +++--- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index 4abb6178cb9f..294b184d4a49 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -1648,7 +1648,7 @@ int vchiq_mmal_component_init(struct vchiq_mmal_instance *instance, for (idx = 0; idx < VCHIQ_MMAL_MAX_COMPONENTS; idx++) { if (!instance->component[idx].in_use) { component = &instance->component[idx]; - component->in_use = 1; + component->in_use = true; break; } } @@ -1724,7 +1724,7 @@ release_component: destroy_component(instance, component); unlock: if (component) - component->in_use = 0; + component->in_use = false; mutex_unlock(&instance->vchiq_mutex); return ret; @@ -1747,7 +1747,7 @@ int vchiq_mmal_component_finalise(struct vchiq_mmal_instance *instance, ret = destroy_component(instance, component); - component->in_use = 0; + component->in_use = false; mutex_unlock(&instance->vchiq_mutex); diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h index 6d984cf5a83a..09f030919d4e 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h @@ -82,7 +82,7 @@ struct vchiq_mmal_port { }; struct vchiq_mmal_component { - u32 in_use:1; + bool in_use; bool enabled; u32 handle; /* VideoCore handle for component */ u32 inputs; /* Number of input ports */ From f198d34759eb3d110d37bb42f6c39cd90bd0b0cb Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Fri, 18 Nov 2022 14:12:44 +0530 Subject: [PATCH 1939/4122] vc04_services: bcm2835-camera: Use bool values for mmal_fmt.remove_padding mmal_fmt.remove_padding is defined as a boolean type hence, use boolean values for it instead of 0/1 integers. This enhances code readability. Signed-off-by: Umang Jain Reviewed-by: Dave Stevenson Reviewed-by: Kieran Bingham Link: https://lore.kernel.org/r/20221118084244.199909-4-umang.jain@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- .../bcm2835-camera/bcm2835-camera.c | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index fd456d1f7061..797ebe2a973a 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -87,21 +87,21 @@ static struct mmal_fmt formats[] = { .depth = 12, .mmal_component = COMP_CAMERA, .ybbp = 1, - .remove_padding = 1, + .remove_padding = true, }, { .fourcc = V4L2_PIX_FMT_YUYV, .mmal = MMAL_ENCODING_YUYV, .depth = 16, .mmal_component = COMP_CAMERA, .ybbp = 2, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_RGB24, .mmal = MMAL_ENCODING_RGB24, .depth = 24, .mmal_component = COMP_CAMERA, .ybbp = 3, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_JPEG, .flags = V4L2_FMT_FLAG_COMPRESSED, @@ -109,7 +109,7 @@ static struct mmal_fmt formats[] = { .depth = 8, .mmal_component = COMP_IMAGE_ENCODE, .ybbp = 0, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_H264, .flags = V4L2_FMT_FLAG_COMPRESSED, @@ -117,7 +117,7 @@ static struct mmal_fmt formats[] = { .depth = 8, .mmal_component = COMP_VIDEO_ENCODE, .ybbp = 0, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_MJPEG, .flags = V4L2_FMT_FLAG_COMPRESSED, @@ -125,63 +125,63 @@ static struct mmal_fmt formats[] = { .depth = 8, .mmal_component = COMP_VIDEO_ENCODE, .ybbp = 0, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_YVYU, .mmal = MMAL_ENCODING_YVYU, .depth = 16, .mmal_component = COMP_CAMERA, .ybbp = 2, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_VYUY, .mmal = MMAL_ENCODING_VYUY, .depth = 16, .mmal_component = COMP_CAMERA, .ybbp = 2, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_UYVY, .mmal = MMAL_ENCODING_UYVY, .depth = 16, .mmal_component = COMP_CAMERA, .ybbp = 2, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_NV12, .mmal = MMAL_ENCODING_NV12, .depth = 12, .mmal_component = COMP_CAMERA, .ybbp = 1, - .remove_padding = 1, + .remove_padding = true, }, { .fourcc = V4L2_PIX_FMT_BGR24, .mmal = MMAL_ENCODING_BGR24, .depth = 24, .mmal_component = COMP_CAMERA, .ybbp = 3, - .remove_padding = 0, + .remove_padding = false, }, { .fourcc = V4L2_PIX_FMT_YVU420, .mmal = MMAL_ENCODING_YV12, .depth = 12, .mmal_component = COMP_CAMERA, .ybbp = 1, - .remove_padding = 1, + .remove_padding = true, }, { .fourcc = V4L2_PIX_FMT_NV21, .mmal = MMAL_ENCODING_NV21, .depth = 12, .mmal_component = COMP_CAMERA, .ybbp = 1, - .remove_padding = 1, + .remove_padding = true, }, { .fourcc = V4L2_PIX_FMT_BGR32, .mmal = MMAL_ENCODING_BGRA, .depth = 32, .mmal_component = COMP_CAMERA, .ybbp = 4, - .remove_padding = 0, + .remove_padding = false, }, }; @@ -1147,7 +1147,7 @@ static int mmal_setup_components(struct bcm2835_mmal_dev *dev, struct vchiq_mmal_port *port = NULL, *camera_port = NULL; struct vchiq_mmal_component *encode_component = NULL; struct mmal_fmt *mfmt = get_format(f); - u32 remove_padding; + bool remove_padding; if (!mfmt) return -EINVAL; From 9a2c1d64c8eb4fab0387c0943eb6666b246f96aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:03 +0100 Subject: [PATCH 1940/4122] staging: most: i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-570-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/i2c/i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/most/i2c/i2c.c b/drivers/staging/most/i2c/i2c.c index 285a071f02be..df53a4c4f850 100644 --- a/drivers/staging/most/i2c/i2c.c +++ b/drivers/staging/most/i2c/i2c.c @@ -284,7 +284,7 @@ static irqreturn_t most_irq_handler(int irq, void *_dev) * * Register the i2c client device as a MOST interface */ -static int i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int i2c_probe(struct i2c_client *client) { struct hdm_i2c *dev; int ret, i; @@ -359,7 +359,7 @@ static struct i2c_driver i2c_driver = { .driver = { .name = "hdm_i2c", }, - .probe = i2c_probe, + .probe_new = i2c_probe, .remove = i2c_remove, .id_table = i2c_id, }; From b62649822e8c03ec1319f9d33c753106b8c80bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:04 +0100 Subject: [PATCH 1941/4122] staging: olpc_dcon: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-571-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/olpc_dcon/olpc_dcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 4fb9b9f10799..2fba52e0bd7b 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -579,7 +579,7 @@ static int dcon_detect(struct i2c_client *client, struct i2c_board_info *info) return 0; } -static int dcon_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int dcon_probe(struct i2c_client *client) { struct dcon_priv *dcon; int rc, i, j; @@ -779,7 +779,7 @@ static struct i2c_driver dcon_driver = { }, .class = I2C_CLASS_DDC | I2C_CLASS_HWMON, .id_table = dcon_idtable, - .probe = dcon_probe, + .probe_new = dcon_probe, .remove = dcon_remove, .detect = dcon_detect, .address_list = normal_i2c, From 9dadff066244543780e5d9ee406b3ec7af19e22c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:02 +0100 Subject: [PATCH 1942/4122] staging: iio: ade7854: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-569-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/meter/ade7854-i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c index a9a06e8dda51..70f64b68f5b9 100644 --- a/drivers/staging/iio/meter/ade7854-i2c.c +++ b/drivers/staging/iio/meter/ade7854-i2c.c @@ -109,8 +109,7 @@ unlock: return ret; } -static int ade7854_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ade7854_i2c_probe(struct i2c_client *client) { struct ade7854_state *st; struct iio_dev *indio_dev; @@ -141,7 +140,7 @@ static struct i2c_driver ade7854_i2c_driver = { .driver = { .name = "ade7854", }, - .probe = ade7854_i2c_probe, + .probe_new = ade7854_i2c_probe, .id_table = ade7854_id, }; module_i2c_driver(ade7854_i2c_driver); From ec62b4424174f41bdcedd08d12d7bed80088453d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:43 +0800 Subject: [PATCH 1943/4122] iommu/vt-d: Allocate pasid table in device probe path Whether or not a domain is attached to the device, the pasid table should always be valid as long as it has been probed. This moves the pasid table allocation from the domain attaching device path to device probe path and frees it in the device release path. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..6b8a24f68da8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2475,13 +2475,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - ret = intel_pasid_alloc_table(dev); - if (ret) { - dev_err(dev, "PASID table allocation failed\n"); - dmar_remove_one_dev_info(dev); - return ret; - } - /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, @@ -4106,7 +4099,6 @@ static void dmar_remove_one_dev_info(struct device *dev) iommu_disable_dev_iotlb(info); domain_context_clear(info); - intel_pasid_free_table(info->dev); } spin_lock_irqsave(&domain->lock, flags); @@ -4466,6 +4458,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) struct device_domain_info *info; struct intel_iommu *iommu; u8 bus, devfn; + int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu || !iommu->iommu.ops) @@ -4509,6 +4502,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_iommu_priv_set(dev, info); + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { + ret = intel_pasid_alloc_table(dev); + if (ret) { + dev_err(dev, "PASID table allocation failed\n"); + dev_iommu_priv_set(dev, NULL); + kfree(info); + return ERR_PTR(ret); + } + } + return &iommu->iommu; } @@ -4517,6 +4520,7 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); dmar_remove_one_dev_info(dev); + intel_pasid_free_table(dev); dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); From c7be17c2903d4acbf9aa372bfb6e2a418387fce0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:44 +0800 Subject: [PATCH 1944/4122] iommu/vt-d: Add device_block_translation() helper If domain attaching to device fails, the IOMMU driver should bring the device to blocking DMA state. The upper layer is expected to recover it by attaching a new domain. Use device_block_translation() in the error path of dev_attach to make the behavior specific. The difference between device_block_translation() and the previous dmar_remove_one_dev_info() is that, in the scalable mode, it is the RID2PASID entry instead of context entry being cleared. As a result, enabling PCI capabilities is moved up. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 44 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6b8a24f68da8..6aafb86ef5c3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -277,7 +277,7 @@ static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -static void dmar_remove_one_dev_info(struct device *dev); +static void device_block_translation(struct device *dev); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -1400,7 +1400,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; - if (!info || !dev_is_pci(info->dev)) + if (!dev_is_pci(info->dev)) return; pdev = to_pci_dev(info->dev); @@ -2045,7 +2045,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } else { iommu_flush_write_buffer(iommu); } - iommu_enable_pci_caps(info); ret = 0; @@ -2487,7 +2486,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) dev, PASID_RID2PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); - dmar_remove_one_dev_info(dev); + device_block_translation(dev); return ret; } } @@ -2495,10 +2494,12 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) ret = domain_context_mapping(domain, dev); if (ret) { dev_err(dev, "Domain context map failed\n"); - dmar_remove_one_dev_info(dev); + device_block_translation(dev); return ret; } + iommu_enable_pci_caps(info); + return 0; } @@ -4109,6 +4110,37 @@ static void dmar_remove_one_dev_info(struct device *dev) info->domain = NULL; } +/* + * Clear the page table pointer in context or pasid table entries so that + * all DMA requests without PASID from the device are blocked. If the page + * table has been set, clean up the data structures. + */ +static void device_block_translation(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + iommu_disable_dev_iotlb(info); + if (!dev_is_real_dma_subdevice(dev)) { + if (sm_supported(iommu)) + intel_pasid_tear_down_entry(iommu, dev, + PASID_RID2PASID, false); + else + domain_context_clear(info); + } + + if (!info->domain) + return; + + spin_lock_irqsave(&info->domain->lock, flags); + list_del(&info->link); + spin_unlock_irqrestore(&info->domain->lock, flags); + + domain_detach_iommu(info->domain, iommu); + info->domain = NULL; +} + static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; @@ -4232,7 +4264,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); if (info->domain) - dmar_remove_one_dev_info(dev); + device_block_translation(dev); } ret = prepare_domain_attach_device(domain, dev); From 35a99c54dd60103930db4a472dd15f232e754867 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:45 +0800 Subject: [PATCH 1945/4122] iommu/vt-d: Add blocking domain support The Intel IOMMU hardwares support blocking DMA transactions by clearing the translation table entries. This implements a real blocking domain to avoid using an empty UNMANAGED domain. The detach_dev callback of the domain ops is not used in any path. Remove it to avoid dead code as well. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6aafb86ef5c3..25c772e8106f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -278,6 +278,7 @@ static LIST_HEAD(dmar_satc_units); list_for_each_entry(rmrr, &dmar_rmrr_units, list) static void device_block_translation(struct device *dev); +static void intel_iommu_domain_free(struct iommu_domain *domain); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -4162,12 +4163,28 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static int blocking_domain_attach_dev(struct iommu_domain *domain, + struct device *dev) +{ + device_block_translation(dev); + return 0; +} + +static struct iommu_domain blocking_domain = { + .ops = &(const struct iommu_domain_ops) { + .attach_dev = blocking_domain_attach_dev, + .free = intel_iommu_domain_free + } +}; + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; switch (type) { + case IOMMU_DOMAIN_BLOCKED: + return &blocking_domain; case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: case IOMMU_DOMAIN_UNMANAGED: @@ -4200,7 +4217,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) static void intel_iommu_domain_free(struct iommu_domain *domain) { - if (domain != &si_domain->domain) + if (domain != &si_domain->domain && domain != &blocking_domain) domain_exit(to_dmar_domain(domain)); } @@ -4274,12 +4291,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } -static void intel_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - dmar_remove_one_dev_info(dev); -} - static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -4767,7 +4778,6 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, - .detach_dev = intel_iommu_detach_device, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, From ba502132f5430d66f768569f2af32b8f268322a8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:46 +0800 Subject: [PATCH 1946/4122] iommu/vt-d: Rename iommu_disable_dev_iotlb() Rename iommu_disable_dev_iotlb() to iommu_disable_pci_caps() to pair with iommu_enable_pci_caps(). Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 25c772e8106f..a5885665ccef 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1441,7 +1441,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) } } -static void iommu_disable_dev_iotlb(struct device_domain_info *info) +static void iommu_disable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -4099,7 +4099,7 @@ static void dmar_remove_one_dev_info(struct device *dev) intel_pasid_tear_down_entry(iommu, info->dev, PASID_RID2PASID, false); - iommu_disable_dev_iotlb(info); + iommu_disable_pci_caps(info); domain_context_clear(info); } @@ -4122,7 +4122,7 @@ static void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; - iommu_disable_dev_iotlb(info); + iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, From a8204479f284a9d21c22e2fd7c9f7564b5828553 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:47 +0800 Subject: [PATCH 1947/4122] iommu/vt-d: Rename domain_add_dev_info() dmar_domain_attach_device() is more meaningful according to what this helper does. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a5885665ccef..3bd79ae238f2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2453,7 +2453,8 @@ static int __init si_domain_init(int hw) return 0; } -static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) +static int dmar_domain_attach_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; @@ -4288,7 +4289,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - return domain_add_dev_info(to_dmar_domain(domain), dev); + return dmar_domain_attach_device(to_dmar_domain(domain), dev); } static int intel_iommu_map(struct iommu_domain *domain, From b1cf1563f3b7396a2cb76b12b3bcdd7046b46372 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:48 +0800 Subject: [PATCH 1948/4122] iommu/vt-d: Remove unnecessary domain_context_mapped() The device_domain_info::domain accurately records the domain attached to the device. It is unnecessary to check whether the context is present in the attach_dev path. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 47 +++---------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3bd79ae238f2..3b37f1b3b6de 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -780,19 +780,6 @@ static void domain_flush_cache(struct dmar_domain *domain, clflush_cache_range(addr, size); } -static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - struct context_entry *context; - int ret = 0; - - spin_lock(&iommu->lock); - context = iommu_context_addr(iommu, bus, devfn, 0); - if (context) - ret = context_present(context); - spin_unlock(&iommu->lock); - return ret; -} - static void free_context_table(struct intel_iommu *iommu) { struct context_entry *context; @@ -2097,30 +2084,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) &domain_context_mapping_cb, &data); } -static int domain_context_mapped_cb(struct pci_dev *pdev, - u16 alias, void *opaque) -{ - struct intel_iommu *iommu = opaque; - - return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); -} - -static int domain_context_mapped(struct device *dev) -{ - struct intel_iommu *iommu; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!dev_is_pci(dev)) - return device_context_mapped(iommu, bus, devfn); - - return !pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapped_cb, iommu); -} - /* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) @@ -4269,6 +4232,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; if (domain->type == IOMMU_DOMAIN_UNMANAGED && @@ -4277,13 +4241,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } - /* normally dev is not mapped */ - if (unlikely(domain_context_mapped(dev))) { - struct device_domain_info *info = dev_iommu_priv_get(dev); - - if (info->domain) - device_block_translation(dev); - } + if (info->domain) + device_block_translation(dev); ret = prepare_domain_attach_device(domain, dev); if (ret) From e5b0feb4361a4830b9133f57ed13923d70409b69 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:49 +0800 Subject: [PATCH 1949/4122] iommu/vt-d: Use real field for indication of first level The dmar_domain uses bit field members to indicate the behaviors. Add a bit field for using first level and remove the flags member to avoid duplication. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 25 ++++++++++--------------- drivers/iommu/intel/iommu.h | 15 +++++---------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3b37f1b3b6de..a3db7ac3d60c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -383,11 +383,6 @@ static inline int domain_type_is_si(struct dmar_domain *domain) return domain->domain.type == IOMMU_DOMAIN_IDENTITY; } -static inline bool domain_use_first_level(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; -} - static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -501,7 +496,7 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - if (domain && domain_use_first_level(domain)) { + if (domain && domain->use_first_level) { if (!cap_fl1gp_support(iommu->cap)) mask = 0x1; } else { @@ -579,7 +574,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) * paging and 57-bits with 5-level paging). Hence, skip bit * [N-1]. */ - if (domain_use_first_level(domain)) + if (domain->use_first_level) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); else domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); @@ -947,7 +942,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) + if (domain->use_first_level) pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (cmpxchg64(&pte->val, 0ULL, pteval)) @@ -1498,7 +1493,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain_use_first_level(domain)) { + if (domain->use_first_level) { qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1552,7 +1547,7 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, * It's a non-present to present mapping. Only flush if caching mode * and second level. */ - if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain)) + if (cap_caching_mode(iommu->cap) && !domain->use_first_level) iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); else iommu_flush_write_buffer(iommu); @@ -1568,7 +1563,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) struct intel_iommu *iommu = info->iommu; u16 did = domain_id_iommu(dmar_domain, iommu); - if (domain_use_first_level(dmar_domain)) + if (dmar_domain->use_first_level) qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, @@ -1741,7 +1736,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) - domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; + domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); spin_lock_init(&domain->lock); @@ -2173,7 +2168,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); attr |= DMA_FL_PTE_PRESENT; - if (domain_use_first_level(domain)) { + if (domain->use_first_level) { attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; @@ -2443,7 +2438,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); - else if (domain_use_first_level(domain)) + else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, PASID_RID2PASID); else @@ -4412,7 +4407,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) * Second level page table supports per-PTE snoop control. The * iommu_map() interface will handle this by setting SNP bit. */ - if (!domain_use_first_level(domain)) { + if (!domain->use_first_level) { domain->set_pte_snp = true; return; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..30b0d72aeb6c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -517,14 +517,6 @@ struct context_entry { u64 hi; }; -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) - struct iommu_domain_info { struct intel_iommu *iommu; unsigned int refcnt; /* Refcount of devices per iommu */ @@ -541,6 +533,11 @@ struct dmar_domain { u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; + u8 use_first_level:1; /* DMA translation for the domain goes + * through the first level page table, + * otherwise, goes through the second + * level. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -550,8 +547,6 @@ struct dmar_domain { /* adjusted guest address width, 0 is level 2 30-bit */ int agaw; - - int flags; /* flags to find out type of domain */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ From 3a5154c723ba5ceb9ce374a7307e03263c03fd29 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:20 +0000 Subject: [PATCH 1950/4122] KVM: arm64: Take a pointer to walker data in kvm_dereference_pteref() Rather than passing through the state of the KVM_PGTABLE_WALK_SHARED flag, just take a pointer to the whole walker structure instead. Move around struct kvm_pgtable and the RCU indirection such that the associated ifdeffery remains in one place while ensuring the walker + flags definitions precede their use. No functional change intended. Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 144 ++++++++++++++------------- arch/arm64/kvm/hyp/pgtable.c | 6 +- 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index a874ce0ce7b5..f23af693e3c5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,54 +37,6 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; -/* - * RCU cannot be used in a non-kernel context such as the hyp. As such, page - * table walkers used in hyp do not call into RCU and instead use other - * synchronization mechanisms (such as a spinlock). - */ -#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) - -typedef kvm_pte_t *kvm_pteref_t; - -static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) -{ - return pteref; -} - -static inline void kvm_pgtable_walk_begin(void) {} -static inline void kvm_pgtable_walk_end(void) {} - -static inline bool kvm_pgtable_walk_lock_held(void) -{ - return true; -} - -#else - -typedef kvm_pte_t __rcu *kvm_pteref_t; - -static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) -{ - return rcu_dereference_check(pteref, !shared); -} - -static inline void kvm_pgtable_walk_begin(void) -{ - rcu_read_lock(); -} - -static inline void kvm_pgtable_walk_end(void) -{ - rcu_read_unlock(); -} - -static inline bool kvm_pgtable_walk_lock_held(void) -{ - return rcu_read_lock_held(); -} - -#endif - #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -212,29 +164,6 @@ enum kvm_pgtable_prot { typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); -/** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - * @flags: Stage-2 page-table flags. - * @force_pte_cb: Function that returns true if page level mappings must - * be used instead of block mappings. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pteref_t pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; -}; - /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -285,6 +214,79 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return pteref; +} + +static inline void kvm_pgtable_walk_begin(void) {} +static inline void kvm_pgtable_walk_end(void) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); +} + +static inline void kvm_pgtable_walk_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_pgtable_walk_end(void) +{ + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 5bca9610d040..b5b91a882836 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -188,7 +188,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, kvm_pteref_t pteref, u32 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; - kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED); + kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); struct kvm_pgtable_visit_ctx ctx = { .ptep = ptep, .old = READ_ONCE(*ptep), @@ -558,7 +558,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false)); + pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd)); pgt->pgd = NULL; } @@ -1241,7 +1241,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); pgt->pgd = NULL; } From b7833bf202e3068abb77c642a0843f696e9c8d38 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:21 +0000 Subject: [PATCH 1951/4122] KVM: arm64: Don't acquire RCU read lock for exclusive table walks Marek reported a BUG resulting from the recent parallel faults changes, as the hyp stage-1 map walker attempted to allocate table memory while holding the RCU read lock: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 2 locks held by swapper/0/1: #0: ffff80000a8a44d0 (kvm_hyp_pgd_mutex){+.+.}-{3:3}, at: __create_hyp_mappings+0x80/0xc4 #1: ffff80000a927720 (rcu_read_lock){....}-{1:2}, at: kvm_pgtable_walk+0x0/0x1f4 CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc3+ #5918 Hardware name: Raspberry Pi 3 Model B (DT) Call trace: dump_backtrace.part.0+0xe4/0xf0 show_stack+0x18/0x40 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 __might_resched+0x178/0x220 __might_sleep+0x48/0xa0 prepare_alloc_pages+0x178/0x1a0 __alloc_pages+0x9c/0x109c alloc_page_interleave+0x1c/0xc4 alloc_pages+0xec/0x160 get_zeroed_page+0x1c/0x44 kvm_hyp_zalloc_page+0x14/0x20 hyp_map_walker+0xd4/0x134 kvm_pgtable_visitor_cb.isra.0+0x38/0x5c __kvm_pgtable_walk+0x1a4/0x220 kvm_pgtable_walk+0x104/0x1f4 kvm_pgtable_hyp_map+0x80/0xc4 __create_hyp_mappings+0x9c/0xc4 kvm_mmu_init+0x144/0x1cc kvm_arch_init+0xe4/0xef4 kvm_init+0x3c/0x3d0 arm_init+0x20/0x30 do_one_initcall+0x74/0x400 kernel_init_freeable+0x2e0/0x350 kernel_init+0x24/0x130 ret_from_fork+0x10/0x20 Since the hyp stage-1 table walkers are serialized by kvm_hyp_pgd_mutex, RCU protection really doesn't add anything. Don't acquire the RCU read lock for an exclusive walk. Reported-by: Marek Szyprowski Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 14 ++++++++------ arch/arm64/kvm/hyp/pgtable.c | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index f23af693e3c5..4b6b52ebc11c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -229,8 +229,8 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline void kvm_pgtable_walk_begin(void) {} -static inline void kvm_pgtable_walk_end(void) {} +static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {} +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} static inline bool kvm_pgtable_walk_lock_held(void) { @@ -247,14 +247,16 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline void kvm_pgtable_walk_begin(void) +static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { - rcu_read_lock(); + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_lock(); } -static inline void kvm_pgtable_walk_end(void) +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) { - rcu_read_unlock(); + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_unlock(); } static inline bool kvm_pgtable_walk_lock_held(void) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b5b91a882836..d6f3753cb87e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -289,9 +289,9 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, }; int r; - kvm_pgtable_walk_begin(); + kvm_pgtable_walk_begin(walker); r = _kvm_pgtable_walk(pgt, &walk_data); - kvm_pgtable_walk_end(); + kvm_pgtable_walk_end(walker); return r; } From 5e806c5812e8012a83496cf96bdba266b3aec428 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 18:22:22 +0000 Subject: [PATCH 1952/4122] KVM: arm64: Reject shared table walks in the hyp code Exclusive table walks are the only supported table walk in the hyp, as there is no construct like RCU available in the hypervisor code. Reject any attempt to do a shared table walk by returning an error and allowing the caller to clean up the mess. Suggested-by: Will Deacon Signed-off-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118182222.3932898-4-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 17 +++++++++++++++-- arch/arm64/kvm/hyp/pgtable.c | 5 ++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4b6b52ebc11c..d5cb01f8dc06 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -229,7 +229,18 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {} +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + /* + * Due to the lack of RCU (or a similar protection scheme), only + * non-shared table walkers are allowed in the hypervisor. + */ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + return -EPERM; + + return 0; +} + static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} static inline bool kvm_pgtable_walk_lock_held(void) @@ -247,10 +258,12 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) rcu_read_lock(); + + return 0; } static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d6f3753cb87e..58dbe0ab567f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -289,7 +289,10 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, }; int r; - kvm_pgtable_walk_begin(walker); + r = kvm_pgtable_walk_begin(walker); + if (r) + return r; + r = _kvm_pgtable_walk(pgt, &walk_data); kvm_pgtable_walk_end(walker); From 9907526d25c4ad8a6e3006487a544140776ba005 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 21 Nov 2022 18:44:10 -0600 Subject: [PATCH 1953/4122] RDMA/irdma: Initialize net_type before checking it The av->net_type is not initialized before it is checked in irdma_modify_qp_roce. This leads to an incorrect update to the ARP cache and QP context. RoCEv2 connections might fail as result. Set the net_type using rdma_gid_attr_network_type. Fixes: 80005c43d4c8 ("RDMA/irdma: Use net_type to check network type") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20221122004410.1471-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index dc3f5f3fee90..f6973ea55eda 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1213,6 +1213,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); + av->net_type = rdma_gid_attr_network_type(sgid_attr); if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; From a115aa00b18f7b8982b8f458149632caf64a862a Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 19 Nov 2022 15:08:34 +0800 Subject: [PATCH 1954/4122] RDMA/hns: fix memory leak in hns_roce_alloc_mr() When hns_roce_mr_enable() failed in hns_roce_alloc_mr(), mr_key is not released. Compiled test only. Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221119070834.48502-1-shaozhengchao@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 845ac7d3831f..37a5cf62f88b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -392,10 +392,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return &mr->ibmr; -err_key: - free_mr_key(hr_dev, mr); err_pbl: free_mr_pbl(hr_dev, mr); +err_key: + free_mr_key(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); From a2430b25c31840a6dcbf95c65415d5fee2984dbc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 19 Nov 2022 04:15:50 +0900 Subject: [PATCH 1955/4122] kbuild: add kbuild-file macro While building, installing, cleaning, Kbuild visits sub-directories and includes 'Kbuild' or 'Makefile' that exists there. Add 'kbuild-file' macro, and reuse it from scripts/Makefie.* Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin --- scripts/Kbuild.include | 5 +++++ scripts/Makefile.asm-generic | 6 +++--- scripts/Makefile.build | 6 +----- scripts/Makefile.clean | 5 +---- scripts/Makefile.dtbinst | 2 +- scripts/Makefile.modpost | 2 +- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 2bc08ace38a3..cbe28744637b 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -40,6 +40,11 @@ escsq = $(subst $(squote),'\$(squote)',$1) # Quote a string to pass it to C files. foo => '"foo"' stringify = $(squote)$(quote)$1$(quote)$(squote) +### +# The path to Kbuild or Makefile. Kbuild has precedence over Makefile. +kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) +kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) + ### # Easy method for doing a status message kecho := : diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic index 1d501c57f9ef..8d01b37b7677 100644 --- a/scripts/Makefile.asm-generic +++ b/scripts/Makefile.asm-generic @@ -10,15 +10,15 @@ PHONY := all all: src := $(subst /generated,,$(obj)) --include $(src)/Kbuild + +include $(srctree)/scripts/Kbuild.include +-include $(kbuild-file) # $(generic)/Kbuild lists mandatory-y. Exclude um since it is a special case. ifneq ($(SRCARCH),um) include $(srctree)/$(generic)/Kbuild endif -include $(srctree)/scripts/Kbuild.include - redundant := $(filter $(mandatory-y) $(generated-y), $(generic-y)) redundant += $(foreach f, $(generic-y), $(if $(wildcard $(srctree)/$(src)/$(f)),$(f))) redundant := $(sort $(redundant)) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 41f3602fc8de..37cf88d076e8 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -38,11 +38,7 @@ subdir-ccflags-y := include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.compiler - -# The filename Kbuild has precedence over Makefile -kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) - +include $(kbuild-file) include $(srctree)/scripts/Makefile.lib # Do not include hostprogs rules unless needed. diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index 878cec648959..3649900696dd 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -9,10 +9,7 @@ PHONY := __clean __clean: include $(srctree)/scripts/Kbuild.include - -# The filename Kbuild has precedence over Makefile -kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) +include $(kbuild-file) # Figure out what we need to build from the various variables # ========================================================================== diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 190d781e84f4..2ab936e4179d 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -15,7 +15,7 @@ __dtbs_install: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(src)/Makefile +include $(kbuild-file) dtbs := $(addprefix $(dst)/, $(dtb-y) $(if $(CONFIG_OF_ALL_DTBS),$(dtb-))) subdirs := $(addprefix $(obj)/, $(subdir-y) $(subdir-m)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d429..55a72f5eb76d 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -93,7 +93,7 @@ obj := $(KBUILD_EXTMOD) src := $(obj) # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS -include $(or $(wildcard $(src)/Kbuild), $(src)/Makefile) +include $(kbuild-file) module.symvers-if-present := $(wildcard Module.symvers) output-symdump := $(KBUILD_EXTMOD)/Module.symvers From 598afa050403ddbb015ad4d9f8e6b911c3c93d33 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 19 Nov 2022 04:15:51 +0900 Subject: [PATCH 1956/4122] kbuild: warn objects shared among multiple modules If an object is shared among multiple modules, and some of them are configured as 'm', but the others as 'y', the shared object is built as modular, then linked to the modules and vmlinux. This is a potential issue because the expected CFLAGS are different between modules and builtins. Commit 637a642f5ca5 ("zstd: Fixing mixed module-builtin objects") reported that this could be even more fatal in some cases such as Clang LTO. That commit fixed lib/zlib/zstd_{compress,decompress}, but there are still more instances of breakage. This commit adds a W=1 warning for shared objects, so that the kbuild test robot, which provides build tests with W=1, will avoid a new breakage slipping in. Quick compile tests on v6.1-rc4 detected the following: scripts/Makefile.build:252: ./drivers/block/rnbd/Makefile: rnbd-common.o is added to multiple modules: rnbd-client rnbd-server scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: cn10k_cpt.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cptlf.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cpt_mbox_common.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/edac/Makefile: skx_common.o is added to multiple modules: i10nm_edac skx_edac scripts/Makefile.build:252: ./drivers/gpu/drm/bridge/imx/Makefile: imx-ldb-helper.o is added to multiple modules: imx8qm-ldb imx8qxp-ldb scripts/Makefile.build:252: ./drivers/mfd/Makefile: rsmu_core.o is added to multiple modules: rsmu-i2c rsmu-spi scripts/Makefile.build:252: ./drivers/mtd/tests/Makefile: mtd_test.o is added to multiple modules: mtd_nandbiterrs mtd_oobtest mtd_pagetest mtd_readtest mtd_speedtest mtd_stresstest mtd_subpagetest mtd_torturetest scripts/Makefile.build:252: ./drivers/net/dsa/ocelot/Makefile: felix.o is added to multiple modules: mscc_felix mscc_seville scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_pf_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_vf_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn66xx_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn68xx_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_core.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_ethtool.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_droq.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mailbox.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mem_ops.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_nic.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: request_manager.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: response_manager.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpaa2-mac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpmac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_cbdr.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_ethtool.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_cmd.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_rss.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_tqp_stats.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_dcbnl.o is added to multiple modules: rvu_nicpf rvu_nicvf scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_devlink.o is added to multiple modules: rvu_nicpf rvu_nicvf scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ale.o is added to multiple modules: keystone_netcp keystone_netcp_ethss ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ethtool.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_priv.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_sl.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: davinci_cpdma.o is added to multiple modules: ti_cpsw ti_cpsw_new ti_davinci_emac scripts/Makefile.build:252: ./drivers/platform/x86/intel/int3472/Makefile: common.o is added to multiple modules: intel_skl_int3472_discrete intel_skl_int3472_tps68470 scripts/Makefile.build:252: ./sound/soc/codecs/Makefile: wcd-clsh-v2.o is added to multiple modules: snd-soc-wcd9335 snd-soc-wcd934x snd-soc-wcd938x Once all the warnings are fixed, it can become an error without the W= option. Signed-off-by: Masahiro Yamada Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin Reviewed-by: Nicolas Schier --- scripts/Makefile.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 37cf88d076e8..799df12b53f3 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -222,6 +222,10 @@ endif cmd_check_local_export = $(srctree)/scripts/check-local-export $@ +ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) +cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi))) +endif + define rule_cc_o_c $(call cmd_and_fixdep,cc_o_c) $(call cmd,gen_ksymdeps) @@ -231,6 +235,7 @@ define rule_cc_o_c $(call cmd,gen_objtooldep) $(call cmd,gen_symversions_c) $(call cmd,record_mcount) + $(call cmd,warn_shared_object) endef define rule_as_o_S @@ -239,6 +244,7 @@ define rule_as_o_S $(call cmd,check_local_export) $(call cmd,gen_objtooldep) $(call cmd,gen_symversions_S) + $(call cmd,warn_shared_object) endef # Built-in and composite module parts From 73f5fc5f884ad0c5f7d57f66303af64f9f002526 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 21 Nov 2022 08:20:22 +0000 Subject: [PATCH 1957/4122] iommu/fsl_pamu: Fix resource leak in fsl_pamu_probe() The fsl_pamu_probe() returns directly when create_csd() failed, leaving irq and memories unreleased. Fix by jumping to error if create_csd() returns error. Fixes: 695093e38c3e ("iommu/fsl: Freescale PAMU driver and iommu implementation.") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221121082022.19091-1-yuancan@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 1b53d2da2c19..1a8c85d54123 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -868,7 +868,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) ret = create_csd(ppaact_phys, mem_size, csd_port_id); if (ret) { dev_err(dev, "could not create coherence subdomain\n"); - return ret; + goto error; } } From ddacd6ef44cac60c4fb8cd1a994fb13e32c1c761 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Nov 2022 15:48:21 +0100 Subject: [PATCH 1958/4122] usb: fotg210: Fix Kconfig for USB host modules The kernel robot reports a link failure when activating the FOTG210 host subdriver with =y on a system where the USB host core is a module (CONFIG_USB=m). This is a bit of special case, so mimic the Kconfig incantations from DWC3: let the subdrivers for host or peripheral depend on the host or gadget support being =y or the same as the FOTG210 core itself. This should ensure that either: - The host (CONFIG_USB) or gadget (CONFIG_GADGET) is compiled in and then the FOTG210 can be either module or compiled in. - The host or gadget is modular, and then the FOTG210 module must be a module too, or we cannot resolve the symbols at link time. Reported-by: kernel test robot Link: https://lore.kernel.org/linux-usb/202211112132.0BUPGKCd-lkp@intel.com/ Cc: Arnd Bergmann Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221111144821.113665-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig index 534206ee0d1d..2db6ac9f8074 100644 --- a/drivers/usb/fotg210/Kconfig +++ b/drivers/usb/fotg210/Kconfig @@ -14,7 +14,7 @@ if USB_FOTG210 config USB_FOTG210_HCD bool "Faraday FOTG210 USB Host Controller support" - depends on USB + depends on USB=y || USB=USB_FOTG210 help Faraday FOTG210 is an OTG controller which can be configured as an USB2.0 host. It is designed to meet USB2.0 EHCI specification @@ -24,7 +24,7 @@ config USB_FOTG210_HCD module will be called fotg210-hcd. config USB_FOTG210_UDC - depends on USB_GADGET + depends on USB_GADGET=y || USB_GADGET=USB_FOTG210 bool "Faraday FOTG210 USB Peripheral Controller support" help Faraday USB2.0 OTG controller which can be configured as From 6d36e0e1a14ac9a382c7a157bce5354fd8b68134 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 21 Nov 2022 16:22:19 +0100 Subject: [PATCH 1959/4122] usb: USB_FOTG210 should depend on ARCH_GEMINI The Faraday Technology FOTG210 USB2 Dual Role Controller is only present on Cortina Systems Gemini SoCs. Hence add a dependency on ARCH_GEMINI, to prevent asking the user about its drivers when configuring a kernel without Cortina Systems Gemini SoC support. Fixes: 1dd33a9f1b95ab59 ("usb: fotg210: Collect pieces of dual mode controller") Signed-off-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/a989b3b798ecaf3b45f35160e30e605636d66a77.1669044086.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig index 2db6ac9f8074..2b05968735ba 100644 --- a/drivers/usb/fotg210/Kconfig +++ b/drivers/usb/fotg210/Kconfig @@ -4,6 +4,7 @@ config USB_FOTG210 tristate "Faraday FOTG210 USB2 Dual Role controller" depends on USB || USB_GADGET depends on HAS_DMA && HAS_IOMEM + depends on ARCH_GEMINI || COMPILE_TEST default ARCH_GEMINI select MFD_SYSCON help From d40eaada4209959264be63b21e18e15030db0a38 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2022 12:51:58 +0100 Subject: [PATCH 1960/4122] fotg210-udc: Use dev pointer in probe and dev_messages Add a local struct device *dev pointer and use dev_err() etc to report status. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221114115201.302887-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 3c357ce42d3b..b3106e4b3194 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1091,6 +1091,7 @@ int fotg210_udc_probe(struct platform_device *pdev) struct resource *res, *ires; struct fotg210_udc *fotg210 = NULL; struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP]; + struct device *dev = &pdev->dev; int ret = 0; int i; @@ -1122,7 +1123,7 @@ int fotg210_udc_probe(struct platform_device *pdev) fotg210->reg = ioremap(res->start, resource_size(res)); if (fotg210->reg == NULL) { - pr_err("ioremap error.\n"); + dev_err(dev, "ioremap error\n"); goto err_alloc; } @@ -1133,8 +1134,8 @@ int fotg210_udc_probe(struct platform_device *pdev) fotg210->gadget.ops = &fotg210_gadget_ops; fotg210->gadget.max_speed = USB_SPEED_HIGH; - fotg210->gadget.dev.parent = &pdev->dev; - fotg210->gadget.dev.dma_mask = pdev->dev.dma_mask; + fotg210->gadget.dev.parent = dev; + fotg210->gadget.dev.dma_mask = dev->dma_mask; fotg210->gadget.name = udc_name; INIT_LIST_HEAD(&fotg210->gadget.ep_list); @@ -1180,15 +1181,15 @@ int fotg210_udc_probe(struct platform_device *pdev) ret = request_irq(ires->start, fotg210_irq, IRQF_SHARED, udc_name, fotg210); if (ret < 0) { - pr_err("request_irq error (%d)\n", ret); + dev_err(dev, "request_irq error (%d)\n", ret); goto err_req; } - ret = usb_add_gadget_udc(&pdev->dev, &fotg210->gadget); + ret = usb_add_gadget_udc(dev, &fotg210->gadget); if (ret) goto err_add_udc; - dev_info(&pdev->dev, "version %s\n", DRIVER_VERSION); + dev_info(dev, "version %s\n", DRIVER_VERSION); return 0; From 5f217ccd520f155c2e3b3dd95627140dd5ec947e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2022 12:51:59 +0100 Subject: [PATCH 1961/4122] fotg210-udc: Support optional external PHY This adds support for an optional external PHY to the FOTG210 UDC driver. Tested with the GPIO VBUS PHY driver on the Gemini SoC. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221114115201.302887-2-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 72 +++++++++++++++++++++++++++++++ drivers/usb/fotg210/fotg210-udc.h | 2 + 2 files changed, 74 insertions(+) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index b3106e4b3194..4026103330e1 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include "fotg210.h" #include "fotg210-udc.h" @@ -1008,11 +1010,19 @@ static int fotg210_udc_start(struct usb_gadget *g, { struct fotg210_udc *fotg210 = gadget_to_fotg210(g); u32 value; + int ret; /* hook up the driver */ driver->driver.bus = NULL; fotg210->driver = driver; + if (!IS_ERR_OR_NULL(fotg210->phy)) { + ret = otg_set_peripheral(fotg210->phy->otg, + &fotg210->gadget); + if (ret) + dev_err(fotg210->dev, "can't bind to phy\n"); + } + /* enable device global interrupt */ value = ioread32(fotg210->reg + FOTG210_DMCR); value |= DMCR_GLINT_EN; @@ -1054,6 +1064,9 @@ static int fotg210_udc_stop(struct usb_gadget *g) struct fotg210_udc *fotg210 = gadget_to_fotg210(g); unsigned long flags; + if (!IS_ERR_OR_NULL(fotg210->phy)) + return otg_set_peripheral(fotg210->phy->otg, NULL); + spin_lock_irqsave(&fotg210->lock, flags); fotg210_init(fotg210); @@ -1069,12 +1082,50 @@ static const struct usb_gadget_ops fotg210_gadget_ops = { .udc_stop = fotg210_udc_stop, }; +/** + * fotg210_phy_event - Called by phy upon VBus event + * @nb: notifier block + * @action: phy action, is vbus connect or disconnect + * @data: the usb_gadget structure in fotg210 + * + * Called by the USB Phy when a cable connect or disconnect is sensed. + * + * Returns NOTIFY_OK or NOTIFY_DONE + */ +static int fotg210_phy_event(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct usb_gadget *gadget = data; + + if (!gadget) + return NOTIFY_DONE; + + switch (action) { + case USB_EVENT_VBUS: + usb_gadget_vbus_connect(gadget); + return NOTIFY_OK; + case USB_EVENT_NONE: + usb_gadget_vbus_disconnect(gadget); + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block fotg210_phy_notifier = { + .notifier_call = fotg210_phy_event, +}; + int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = platform_get_drvdata(pdev); int i; usb_del_gadget_udc(&fotg210->gadget); + if (!IS_ERR_OR_NULL(fotg210->phy)) { + usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier); + usb_put_phy(fotg210->phy); + } iounmap(fotg210->reg); free_irq(platform_get_irq(pdev, 0), fotg210); @@ -1114,6 +1165,22 @@ int fotg210_udc_probe(struct platform_device *pdev) if (fotg210 == NULL) goto err; + fotg210->dev = dev; + + fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0); + if (IS_ERR(fotg210->phy)) { + ret = PTR_ERR(fotg210->phy); + if (ret == -EPROBE_DEFER) + goto err; + dev_info(dev, "no PHY found\n"); + fotg210->phy = NULL; + } else { + ret = usb_phy_init(fotg210->phy); + if (ret) + goto err; + dev_info(dev, "found and initialized PHY\n"); + } + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { _ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); if (_ep[i] == NULL) @@ -1185,6 +1252,9 @@ int fotg210_udc_probe(struct platform_device *pdev) goto err_req; } + if (!IS_ERR_OR_NULL(fotg210->phy)) + usb_register_notifier(fotg210->phy, &fotg210_phy_notifier); + ret = usb_add_gadget_udc(dev, &fotg210->gadget); if (ret) goto err_add_udc; @@ -1194,6 +1264,8 @@ int fotg210_udc_probe(struct platform_device *pdev) return 0; err_add_udc: + if (!IS_ERR_OR_NULL(fotg210->phy)) + usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier); free_irq(ires->start, fotg210); err_req: diff --git a/drivers/usb/fotg210/fotg210-udc.h b/drivers/usb/fotg210/fotg210-udc.h index 08c32957503b..e3067d22a895 100644 --- a/drivers/usb/fotg210/fotg210-udc.h +++ b/drivers/usb/fotg210/fotg210-udc.h @@ -234,6 +234,8 @@ struct fotg210_udc { unsigned long irq_trigger; + struct device *dev; + struct usb_phy *phy; struct usb_gadget gadget; struct usb_gadget_driver *driver; From 718a38d092ec920dd84a5e25510cc6721f527c3e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2022 12:52:00 +0100 Subject: [PATCH 1962/4122] fotg210-udc: Handle PCLK This adds optional handling of the peripheral clock PCLK. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221114115201.302887-3-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 30 ++++++++++++++++++++++++++++-- drivers/usb/fotg210/fotg210-udc.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 4026103330e1..de0f72ca103c 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1132,6 +1133,10 @@ int fotg210_udc_remove(struct platform_device *pdev) fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); for (i = 0; i < FOTG210_MAX_NUM_EP; i++) kfree(fotg210->ep[i]); + + if (!IS_ERR(fotg210->pclk)) + clk_disable_unprepare(fotg210->pclk); + kfree(fotg210); return 0; @@ -1167,17 +1172,34 @@ int fotg210_udc_probe(struct platform_device *pdev) fotg210->dev = dev; + /* It's OK not to supply this clock */ + fotg210->pclk = devm_clk_get(dev, "PCLK"); + if (!IS_ERR(fotg210->pclk)) { + ret = clk_prepare_enable(fotg210->pclk); + if (ret) { + dev_err(dev, "failed to enable PCLK\n"); + return ret; + } + } else if (PTR_ERR(fotg210->pclk) == -EPROBE_DEFER) { + /* + * Percolate deferrals, for anything else, + * just live without the clocking. + */ + ret = -EPROBE_DEFER; + goto err; + } + fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0); if (IS_ERR(fotg210->phy)) { ret = PTR_ERR(fotg210->phy); if (ret == -EPROBE_DEFER) - goto err; + goto err_pclk; dev_info(dev, "no PHY found\n"); fotg210->phy = NULL; } else { ret = usb_phy_init(fotg210->phy); if (ret) - goto err; + goto err_pclk; dev_info(dev, "found and initialized PHY\n"); } @@ -1277,6 +1299,10 @@ err_map: err_alloc: for (i = 0; i < FOTG210_MAX_NUM_EP; i++) kfree(fotg210->ep[i]); +err_pclk: + if (!IS_ERR(fotg210->pclk)) + clk_disable_unprepare(fotg210->pclk); + kfree(fotg210); err: diff --git a/drivers/usb/fotg210/fotg210-udc.h b/drivers/usb/fotg210/fotg210-udc.h index e3067d22a895..fadb57ca8d78 100644 --- a/drivers/usb/fotg210/fotg210-udc.h +++ b/drivers/usb/fotg210/fotg210-udc.h @@ -231,6 +231,7 @@ struct fotg210_ep { struct fotg210_udc { spinlock_t lock; /* protect the struct */ void __iomem *reg; + struct clk *pclk; unsigned long irq_trigger; From f8b729ce97f66807f6b958e891888d0b1ed20a9e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2022 12:52:01 +0100 Subject: [PATCH 1963/4122] fotg210-udc: Get IRQ using platform_get_irq() The platform_get_irq() is necessary to use to get dynamic IRQ resolution when instantiating the device from the device tree. IRQs are not passed as resources in that case. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221114115201.302887-4-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index de0f72ca103c..44dfe66e189c 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1144,10 +1144,11 @@ int fotg210_udc_remove(struct platform_device *pdev) int fotg210_udc_probe(struct platform_device *pdev) { - struct resource *res, *ires; + struct resource *res; struct fotg210_udc *fotg210 = NULL; struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP]; struct device *dev = &pdev->dev; + int irq; int ret = 0; int i; @@ -1157,9 +1158,9 @@ int fotg210_udc_probe(struct platform_device *pdev) return -ENODEV; } - ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!ires) { - pr_err("platform_get_resource IORESOURCE_IRQ error.\n"); + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + pr_err("could not get irq\n"); return -ENODEV; } @@ -1189,7 +1190,7 @@ int fotg210_udc_probe(struct platform_device *pdev) goto err; } - fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0); + fotg210->phy = devm_usb_get_phy_by_phandle(dev, "usb-phy", 0); if (IS_ERR(fotg210->phy)) { ret = PTR_ERR(fotg210->phy); if (ret == -EPROBE_DEFER) @@ -1267,7 +1268,7 @@ int fotg210_udc_probe(struct platform_device *pdev) fotg210_disable_unplug(fotg210); - ret = request_irq(ires->start, fotg210_irq, IRQF_SHARED, + ret = request_irq(irq, fotg210_irq, IRQF_SHARED, udc_name, fotg210); if (ret < 0) { dev_err(dev, "request_irq error (%d)\n", ret); @@ -1288,7 +1289,7 @@ int fotg210_udc_probe(struct platform_device *pdev) err_add_udc: if (!IS_ERR_OR_NULL(fotg210->phy)) usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier); - free_irq(ires->start, fotg210); + free_irq(irq, fotg210); err_req: fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); From 202f785b1863d8feef53f6489afd9abcb744e7bf Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Nov 2022 21:38:04 +0100 Subject: [PATCH 1964/4122] usb: fotg210-udc: Remove a useless assignment There is no need to use an intermediate array for these memory allocations, so, axe it. While at it, turn a '== NULL' into a shorter '!' when testing memory allocation failure. Signed-off-by: Christophe JAILLET Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/deab9696fc4000499470e7ccbca7c36fca17bd4e.1668458274.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 44dfe66e189c..b9ea6c6d931c 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1146,7 +1146,6 @@ int fotg210_udc_probe(struct platform_device *pdev) { struct resource *res; struct fotg210_udc *fotg210 = NULL; - struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP]; struct device *dev = &pdev->dev; int irq; int ret = 0; @@ -1205,10 +1204,9 @@ int fotg210_udc_probe(struct platform_device *pdev) } for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { - _ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); - if (_ep[i] == NULL) + fotg210->ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); + if (!fotg210->ep[i]) goto err_alloc; - fotg210->ep[i] = _ep[i]; } fotg210->reg = ioremap(res->start, resource_size(res)); From 488c2c67463cc704715e9d4b68c9edfcc20f299d Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 15 Nov 2022 11:31:53 +0100 Subject: [PATCH 1965/4122] MAINTAINERS: rectify entry for MICROCHIP USB251XB DRIVER Commit fff61d4ccf3d ("dt-bindings: usb: usb251xb: Convert to YAML schema") converts usb251xb.txt to usb251xb.yaml, but misses to adjust its reference in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken reference. Repair this file reference in MICROCHIP USB251XB DRIVER. Signed-off-by: Lukas Bulwahn Acked-by: Marek Vasut Link: https://lore.kernel.org/r/20221115103153.28502-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2a8c456c184e..8cb2c2b7d6cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13644,7 +13644,7 @@ MICROCHIP USB251XB DRIVER M: Richard Leitner L: linux-usb@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/usb/usb251xb.txt +F: Documentation/devicetree/bindings/usb/usb251xb.yaml F: drivers/usb/misc/usb251xb.c MICROCHIP USBA UDC DRIVER From 7b462b05e47adaf11358f5c2c24db85c487b613e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 11 Nov 2022 16:57:24 +0000 Subject: [PATCH 1966/4122] usb: ftdi-elan: remove variable l Variable l is just being accumulated and it's never used anywhere else. The variable and the addition are redundant so remove it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221111165724.557152-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ftdi-elan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 33b35788bd0b..8ce191e3a4c0 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -1624,7 +1624,6 @@ wait:if (ftdi->disconnected > 0) { char data[30 *3 + 4]; char *d = data; int m = (sizeof(data) - 1) / 3 - 1; - int l = 0; struct u132_target *target = &ftdi->target[ed]; struct u132_command *command = &ftdi->command[ COMMAND_MASK & ftdi->command_next]; @@ -1647,7 +1646,6 @@ wait:if (ftdi->disconnected > 0) { } else if (i++ < m) { int w = sprintf(d, " %02X", *b++); d += w; - l += w; } else d += sprintf(d, " .."); } From b6ddd180e3d9f92c1e482b3cdeec7dda086b1341 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 14 Nov 2022 17:59:24 +0100 Subject: [PATCH 1967/4122] usb: typec: Check for ops->exit instead of ops->enter in altmode_exit typec_altmode_exit checks if ops->enter is not NULL but then calls ops->exit a few lines below. Fix that and check for the function pointer it's about to call instead. Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Signed-off-by: Sven Peter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221114165924.33487-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c index 26ea2fdec17d..31c2a3130cad 100644 --- a/drivers/usb/typec/bus.c +++ b/drivers/usb/typec/bus.c @@ -134,7 +134,7 @@ int typec_altmode_exit(struct typec_altmode *adev) if (!adev || !adev->active) return 0; - if (!pdev->ops || !pdev->ops->enter) + if (!pdev->ops || !pdev->ops->exit) return -EOPNOTSUPP; /* Moving to USB Safe State */ From 6552ba4cd0841c23486368ed4feb2229e0abd1b3 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 16 Nov 2022 17:06:00 +0200 Subject: [PATCH 1968/4122] dt-bindings: usb: dwc3: Add SM8550 compatible Document the SM8550 dwc3 compatible. Signed-off-by: Abel Vesa Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221116150600.3011160-1-abel.vesa@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index a6e6abb4dfa9..a3f8a3f49852 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -39,6 +39,7 @@ properties: - qcom,sm8250-dwc3 - qcom,sm8350-dwc3 - qcom,sm8450-dwc3 + - qcom,sm8550-dwc3 - const: qcom,dwc3 reg: @@ -301,6 +302,7 @@ allOf: - qcom,sm8150-dwc3 - qcom,sm8250-dwc3 - qcom,sm8450-dwc3 + - qcom,sm8550-dwc3 then: properties: clocks: @@ -358,6 +360,7 @@ allOf: - qcom,sm8250-dwc3 - qcom,sm8350-dwc3 - qcom,sm8450-dwc3 + - qcom,sm8550-dwc3 then: properties: interrupts: From 0384e87e3fec735e47f1c133c796f32ef7a72a9b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 14:24:16 +0800 Subject: [PATCH 1969/4122] usb: typec: tcpci: fix of node refcount leak in tcpci_register_port() I got the following report while doing device(mt6370-tcpc) load test with CONFIG_OF_UNITTEST and CONFIG_OF_DYNAMIC enabled: OF: ERROR: memory leak, expected refcount 1 instead of 2, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /i2c/pmic@34/tcpc/connector The 'fwnode' set in tcpci_parse_config() which is called in tcpci_register_port(), its node refcount is increased in device_get_named_child_node(). It needs be put while exiting, so call fwnode_handle_put() in the error path of tcpci_register_port() and in tcpci_unregister_port() to avoid leak. Fixes: 5e85a04c8c0d ("usb: typec: add fwnode to tcpc") Signed-off-by: Yang Yingliang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221121062416.1026192-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index b2bfcebe218f..72f8d1e87600 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -794,8 +794,10 @@ struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data) return ERR_PTR(err); tcpci->port = tcpm_register_port(tcpci->dev, &tcpci->tcpc); - if (IS_ERR(tcpci->port)) + if (IS_ERR(tcpci->port)) { + fwnode_handle_put(tcpci->tcpc.fwnode); return ERR_CAST(tcpci->port); + } return tcpci; } @@ -804,6 +806,7 @@ EXPORT_SYMBOL_GPL(tcpci_register_port); void tcpci_unregister_port(struct tcpci *tcpci) { tcpm_unregister_port(tcpci->port); + fwnode_handle_put(tcpci->tcpc.fwnode); } EXPORT_SYMBOL_GPL(tcpci_unregister_port); From e99e1a7d6f88b9c54dc32671bac29f26e58bde80 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 18 Nov 2022 19:01:16 +0800 Subject: [PATCH 1970/4122] usb: host: xhci-mtk: omit shared hcd if either root hub has no ports There is error log when add a usb3 root hub without ports: "hub 4-0:1.0: config failed, hub doesn't have any ports! (err -19)" so omit the shared hcd if either of the root hubs has no ports, but usually there is no usb3 port. Signed-off-by: Chunfeng Yun Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221118110116.20165-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 72 +++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 01705e559c42..cff3c4aea036 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -485,6 +485,7 @@ static int xhci_mtk_probe(struct platform_device *pdev) const struct hc_driver *driver; struct xhci_hcd *xhci; struct resource *res; + struct usb_hcd *usb3_hcd; struct usb_hcd *hcd; int ret = -ENODEV; int wakeup_irq; @@ -593,6 +594,7 @@ static int xhci_mtk_probe(struct platform_device *pdev) xhci = hcd_to_xhci(hcd); xhci->main_hcd = hcd; + xhci->allow_single_roothub = 1; /* * imod_interval is the interrupt moderation value in nanoseconds. @@ -602,24 +604,29 @@ static int xhci_mtk_probe(struct platform_device *pdev) xhci->imod_interval = 5000; device_property_read_u32(dev, "imod-interval-ns", &xhci->imod_interval); - xhci->shared_hcd = usb_create_shared_hcd(driver, dev, - dev_name(dev), hcd); - if (!xhci->shared_hcd) { - ret = -ENOMEM; - goto disable_device_wakeup; - } - ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) - goto put_usb3_hcd; + goto disable_device_wakeup; - if (HCC_MAX_PSA(xhci->hcc_params) >= 4 && + if (!xhci_has_one_roothub(xhci)) { + xhci->shared_hcd = usb_create_shared_hcd(driver, dev, + dev_name(dev), hcd); + if (!xhci->shared_hcd) { + ret = -ENOMEM; + goto dealloc_usb2_hcd; + } + } + + usb3_hcd = xhci_get_usb3_hcd(xhci); + if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 && !(xhci->quirks & XHCI_BROKEN_STREAMS)) - xhci->shared_hcd->can_do_streams = 1; + usb3_hcd->can_do_streams = 1; - ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); - if (ret) - goto dealloc_usb2_hcd; + if (xhci->shared_hcd) { + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); + if (ret) + goto put_usb3_hcd; + } if (wakeup_irq > 0) { ret = dev_pm_set_dedicated_wake_irq_reverse(dev, wakeup_irq); @@ -641,13 +648,13 @@ dealloc_usb3_hcd: usb_remove_hcd(xhci->shared_hcd); xhci->shared_hcd = NULL; -dealloc_usb2_hcd: - usb_remove_hcd(hcd); - put_usb3_hcd: - xhci_mtk_sch_exit(mtk); usb_put_hcd(xhci->shared_hcd); +dealloc_usb2_hcd: + xhci_mtk_sch_exit(mtk); + usb_remove_hcd(hcd); + disable_device_wakeup: device_init_wakeup(dev, false); @@ -679,10 +686,15 @@ static int xhci_mtk_remove(struct platform_device *pdev) dev_pm_clear_wake_irq(dev); device_init_wakeup(dev, false); - usb_remove_hcd(shared_hcd); - xhci->shared_hcd = NULL; + if (shared_hcd) { + usb_remove_hcd(shared_hcd); + xhci->shared_hcd = NULL; + } usb_remove_hcd(hcd); - usb_put_hcd(shared_hcd); + + if (shared_hcd) + usb_put_hcd(shared_hcd); + usb_put_hcd(hcd); xhci_mtk_sch_exit(mtk); clk_bulk_disable_unprepare(BULK_CLKS_NUM, mtk->clks); @@ -700,13 +712,16 @@ static int __maybe_unused xhci_mtk_suspend(struct device *dev) struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev); struct usb_hcd *hcd = mtk->hcd; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct usb_hcd *shared_hcd = xhci->shared_hcd; int ret; xhci_dbg(xhci, "%s: stop port polling\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); del_timer_sync(&hcd->rh_timer); - clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); - del_timer_sync(&xhci->shared_hcd->rh_timer); + if (shared_hcd) { + clear_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags); + del_timer_sync(&shared_hcd->rh_timer); + } ret = xhci_mtk_host_disable(mtk); if (ret) @@ -718,8 +733,10 @@ static int __maybe_unused xhci_mtk_suspend(struct device *dev) restart_poll_rh: xhci_dbg(xhci, "%s: restart port polling\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); - usb_hcd_poll_rh_status(xhci->shared_hcd); + if (shared_hcd) { + set_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags); + usb_hcd_poll_rh_status(shared_hcd); + } set_bit(HCD_FLAG_POLL_RH, &hcd->flags); usb_hcd_poll_rh_status(hcd); return ret; @@ -730,6 +747,7 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev) struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev); struct usb_hcd *hcd = mtk->hcd; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct usb_hcd *shared_hcd = xhci->shared_hcd; int ret; usb_wakeup_set(mtk, false); @@ -742,8 +760,10 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev) goto disable_clks; xhci_dbg(xhci, "%s: restart port polling\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); - usb_hcd_poll_rh_status(xhci->shared_hcd); + if (shared_hcd) { + set_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags); + usb_hcd_poll_rh_status(shared_hcd); + } set_bit(HCD_FLAG_POLL_RH, &hcd->flags); usb_hcd_poll_rh_status(hcd); return 0; From 19c220e9ab00f50edefb9667e3101e84a5112df2 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 14 Nov 2022 18:44:46 +0100 Subject: [PATCH 1971/4122] usb: typec: tipd: Cleanup resources if devm_tps6598_psy_register fails We can't just return if devm_tps6598_psy_register fails since previous resources are not devres managed and have yet to be cleaned up. Fixes: 10eb0b6ac63a ("usb: typec: tps6598x: Export some power supply properties") Signed-off-by: Sven Peter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221114174449.34634-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 2a77bab948f5..83a7a82e55f1 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -814,7 +814,7 @@ static int tps6598x_probe(struct i2c_client *client) ret = devm_tps6598_psy_register(tps); if (ret) - return ret; + goto err_role_put; tps->port = typec_register_port(&client->dev, &typec_cap); if (IS_ERR(tps->port)) { From 782c70edc4852a5d39be12377a85501546236212 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 14 Nov 2022 18:44:47 +0100 Subject: [PATCH 1972/4122] usb: typec: tipd: Fix spurious fwnode_handle_put in error path The err_role_put error path always calls fwnode_handle_put to release the fwnode. This path can be reached after probe itself has already released that fwnode though. Fix that by moving fwnode_handle_put in the happy path to the very end. Fixes: 18a6c866bb19 ("usb: typec: tps6598x: Add USB role switching logic") Signed-off-by: Sven Peter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221114174449.34634-2-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 83a7a82e55f1..59059310ba74 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -821,7 +821,6 @@ static int tps6598x_probe(struct i2c_client *client) ret = PTR_ERR(tps->port); goto err_role_put; } - fwnode_handle_put(fwnode); if (status & TPS_STATUS_PLUG_PRESENT) { ret = tps6598x_read16(tps, TPS_REG_POWER_STATUS, &tps->pwr_status); @@ -845,6 +844,7 @@ static int tps6598x_probe(struct i2c_client *client) } i2c_set_clientdata(client, tps); + fwnode_handle_put(fwnode); return 0; From 4c8f27ba9ede0118cac9d775204f9b0ecdb877b0 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 14 Nov 2022 18:44:48 +0100 Subject: [PATCH 1973/4122] usb: typec: tipd: Fix typec_unregister_port error paths typec_unregister_port is only called for some error paths after typec_register_port was successful. Ensure it's called in all cases. Fixes: 92440202a880 ("usb: typec: tipd: Only update power status on IRQ") Signed-off-by: Sven Peter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221114174449.34634-3-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 59059310ba74..195c9c16f817 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -826,7 +826,7 @@ static int tps6598x_probe(struct i2c_client *client) ret = tps6598x_read16(tps, TPS_REG_POWER_STATUS, &tps->pwr_status); if (ret < 0) { dev_err(tps->dev, "failed to read power status: %d\n", ret); - goto err_role_put; + goto err_unregister_port; } ret = tps6598x_connect(tps, status); if (ret) @@ -839,8 +839,7 @@ static int tps6598x_probe(struct i2c_client *client) dev_name(&client->dev), tps); if (ret) { tps6598x_disconnect(tps, 0); - typec_unregister_port(tps->port); - goto err_role_put; + goto err_unregister_port; } i2c_set_clientdata(client, tps); @@ -848,6 +847,8 @@ static int tps6598x_probe(struct i2c_client *client) return 0; +err_unregister_port: + typec_unregister_port(tps->port); err_role_put: usb_role_switch_put(tps->role_sw); err_fwnode_put: From 53a256ea9596ec78a9f5dd51f2b49c2355b15d6e Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 14 Nov 2022 18:44:49 +0100 Subject: [PATCH 1974/4122] usb: typec: tipd: Move tps6598x_disconnect error path to its own label While the code currently correctly calls tps6598x_disconnect before jumping to the error cleanup label it's inconsistent compared to all the other cleanup actions and prone to introduce bugs if any more resources are added. Signed-off-by: Sven Peter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221114174449.34634-4-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 195c9c16f817..982bd2cad931 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -837,16 +837,16 @@ static int tps6598x_probe(struct i2c_client *client) irq_handler, IRQF_SHARED | IRQF_ONESHOT, dev_name(&client->dev), tps); - if (ret) { - tps6598x_disconnect(tps, 0); - goto err_unregister_port; - } + if (ret) + goto err_disconnect; i2c_set_clientdata(client, tps); fwnode_handle_put(fwnode); return 0; +err_disconnect: + tps6598x_disconnect(tps, 0); err_unregister_port: typec_unregister_port(tps->port); err_role_put: From ffbe2feac59b37c8dc536727552b4f375e1b9aec Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 18 Nov 2022 12:25:32 +0200 Subject: [PATCH 1975/4122] usb: musb: omap2430: Fix probe regression for missing resources Probe for omap2430 glue layer is now broken for interrupt resources in all cases. Commit 239071064732 ("partially Revert "usb: musb: Set the DT node on the child device"") broke probing for SoCs using ti-sysc interconnect target module as the dt node is not found. Commit a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core") caused omap3 to fail with error "-ENXIO: IRQ mc not found" as the IRQ resources are no longer automatically populated from devicetree. Let's fix the issues by calling device_set_of_node_from_dev() only if the SoC has been updated to probe with ti-sysc. And for legacy SoCs, let's populate the resources manually as needed. Note that once we have updated the SoCs to probe with proper devicetree data in all cases, this is no longer needed. But doing that requires patching both devicetree and SoC code, so let's fix the probe issues first. Fixes: a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core") Fixes: 239071064732 ("partially Revert "usb: musb: Set the DT node on the child device"") Cc: H. Nikolaus Schaller Reported-by: Sicelo Mhlongo Tested-by: Sicelo Mhlongo Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20221118102532.34458-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index f571a65ae6ee..476f55d1fec3 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -310,6 +311,7 @@ static int omap2430_probe(struct platform_device *pdev) struct device_node *control_node; struct platform_device *control_pdev; int ret = -ENOMEM, val; + bool populate_irqs = false; if (!np) return -ENODEV; @@ -328,6 +330,18 @@ static int omap2430_probe(struct platform_device *pdev) musb->dev.dma_mask = &omap2430_dmamask; musb->dev.coherent_dma_mask = omap2430_dmamask; + /* + * Legacy SoCs using omap_device get confused if node is moved + * because of interconnect properties mixed into the node. + */ + if (of_get_property(np, "ti,hwmods", NULL)) { + dev_warn(&pdev->dev, "please update to probe with ti-sysc\n"); + populate_irqs = true; + } else { + device_set_of_node_from_dev(&musb->dev, &pdev->dev); + } + of_node_put(np); + glue->dev = &pdev->dev; glue->musb = musb; glue->status = MUSB_UNKNOWN; @@ -389,6 +403,46 @@ static int omap2430_probe(struct platform_device *pdev) goto err2; } + if (populate_irqs) { + struct resource musb_res[3]; + struct resource *res; + int i = 0; + + memset(musb_res, 0, sizeof(*musb_res) * ARRAY_SIZE(musb_res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + goto err2; + + musb_res[i].start = res->start; + musb_res[i].end = res->end; + musb_res[i].flags = res->flags; + musb_res[i].name = res->name; + i++; + + ret = of_irq_get_byname(np, "mc"); + if (ret > 0) { + musb_res[i].start = ret; + musb_res[i].flags = IORESOURCE_IRQ; + musb_res[i].name = "mc"; + i++; + } + + ret = of_irq_get_byname(np, "dma"); + if (ret > 0) { + musb_res[i].start = ret; + musb_res[i].flags = IORESOURCE_IRQ; + musb_res[i].name = "dma"; + i++; + } + + ret = platform_device_add_resources(musb, musb_res, i); + if (ret) { + dev_err(&pdev->dev, "failed to add IRQ resources\n"); + goto err2; + } + } + ret = platform_device_add_data(musb, pdata, sizeof(*pdata)); if (ret) { dev_err(&pdev->dev, "failed to add platform_data\n"); From 3205054dc6fe2425ff24827a51fdf7cbbb528680 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 16 Nov 2022 12:04:44 +0100 Subject: [PATCH 1976/4122] usb: dwc3: improve the config dependency of USB_DWC3_XILINX A request to Manish Narani (see Link) asked for clarification of the reference to the config ARCH_VERSAL in the support of Xilinx SoCs with DesignWare Core USB3 IP. As there is no response, clean up the reference to the non-existing config symbol. While at it, follow up on Felipe Balbi's request to add the alternative COMPILE_TEST dependency. Link: https://lore.kernel.org/all/CAKXUXMwgWfX8+OvY0aCwRNukencwJERAZzU7p4eOLXQ2zv6rAg@mail.gmail.com/ Signed-off-by: Lukas Bulwahn Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20221116110444.8340-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index 03ededa86da1..b2f72b0e75c6 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -152,11 +152,11 @@ config USB_DWC3_IMX8MP config USB_DWC3_XILINX tristate "Xilinx Platforms" - depends on (ARCH_ZYNQMP || ARCH_VERSAL) && OF + depends on (ARCH_ZYNQMP || COMPILE_TEST) && OF default USB_DWC3 help Support Xilinx SoCs with DesignWare Core USB3 IP. - This driver handles both ZynqMP and Versal SoC operations. + This driver handles ZynqMP SoC operations. Say 'Y' or 'M' if you have one such device. config USB_DWC3_AM62 From 581c848b610dbf3fe1ed4d85fd53d0743c61faba Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 20 Nov 2022 15:15:09 +0100 Subject: [PATCH 1977/4122] extcon: usbc-tusb320: Update state on probe even if no IRQ pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently this driver triggers extcon and typec state update in its probe function, to read out current state reported by the chip and report the correct state to upper layers. This synchronization is performed correctly, but only in case the chip indicates a pending interrupt in reg09 register. This fails to cover the situation where all interrupts reported by the chip were already handled by Linux before reboot, then the system rebooted, and then Linux starts again. In this case, the TUSB320 no longer reports any interrupts in reg09, and the state update does not perform any update as it depends on that interrupt indication. Fix this by turning tusb320_irq_handler() into a thin wrapper around tusb320_state_update_handler(), where the later now contains the bulk of the code of tusb320_irq_handler(), but adds new function parameter "force_update". The "force_update" parameter can be used by the probe function to assure that the state synchronization is always performed, independent of the interrupt indicated in reg09. The interrupt handler tusb320_irq_handler() callback uses force_update=false to avoid state updates on potential spurious interrupts and retain current behavior. Fixes: 06bc4ca115cdd ("extcon: Add driver for TI TUSB320") Signed-off-by: Marek Vasut Reviewed-by: Alvin Šipraga Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221120141509.81012-1-marex@denx.de Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon-usbc-tusb320.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 2a120d8d3c27..9dfa545427ca 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -313,9 +313,9 @@ static void tusb320_typec_irq_handler(struct tusb320_priv *priv, u8 reg9) typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB); } -static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) +static irqreturn_t tusb320_state_update_handler(struct tusb320_priv *priv, + bool force_update) { - struct tusb320_priv *priv = dev_id; unsigned int reg; if (regmap_read(priv->regmap, TUSB320_REG9, ®)) { @@ -323,7 +323,7 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) return IRQ_NONE; } - if (!(reg & TUSB320_REG9_INTERRUPT_STATUS)) + if (!force_update && !(reg & TUSB320_REG9_INTERRUPT_STATUS)) return IRQ_NONE; tusb320_extcon_irq_handler(priv, reg); @@ -340,6 +340,13 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) +{ + struct tusb320_priv *priv = dev_id; + + return tusb320_state_update_handler(priv, false); +} + static const struct regmap_config tusb320_regmap_config = { .reg_bits = 8, .val_bits = 8, @@ -466,7 +473,7 @@ static int tusb320_probe(struct i2c_client *client, return ret; /* update initial state */ - tusb320_irq_handler(client->irq, priv); + tusb320_state_update_handler(priv, true); /* Reset chip to its default state */ ret = tusb320_reset(priv); @@ -477,7 +484,7 @@ static int tusb320_probe(struct i2c_client *client, * State and polarity might change after a reset, so update * them again and make sure the interrupt status bit is cleared. */ - tusb320_irq_handler(client->irq, priv); + tusb320_state_update_handler(priv, true); ret = devm_request_threaded_irq(priv->dev, client->irq, NULL, tusb320_irq_handler, From afdc12887f2b2ecf20d065a7d81ad29824155083 Mon Sep 17 00:00:00 2001 From: Jiantao Zhang Date: Mon, 21 Nov 2022 13:08:05 +0000 Subject: [PATCH 1978/4122] USB: gadget: Fix use-after-free during usb config switch In the process of switching USB config from rndis to other config, if the hardware does not support the ->pullup callback, or the hardware encounters a low probability fault, both of them may cause the ->pullup callback to fail, which will then cause a system panic (use after free). The gadget drivers sometimes need to be unloaded regardless of the hardware's behavior. Analysis as follows: ======================================================================= (1) write /config/usb_gadget/g1/UDC "none" gether_disconnect+0x2c/0x1f8 rndis_disable+0x4c/0x74 composite_disconnect+0x74/0xb0 configfs_composite_disconnect+0x60/0x7c usb_gadget_disconnect+0x70/0x124 usb_gadget_unregister_driver+0xc8/0x1d8 gadget_dev_desc_UDC_store+0xec/0x1e4 (2) rm /config/usb_gadget/g1/configs/b.1/f1 rndis_deregister+0x28/0x54 rndis_free+0x44/0x7c usb_put_function+0x14/0x1c config_usb_cfg_unlink+0xc4/0xe0 configfs_unlink+0x124/0x1c8 vfs_unlink+0x114/0x1dc (3) rmdir /config/usb_gadget/g1/functions/rndis.gs4 panic+0x1fc/0x3d0 do_page_fault+0xa8/0x46c do_mem_abort+0x3c/0xac el1_sync_handler+0x40/0x78 0xffffff801138f880 rndis_close+0x28/0x34 eth_stop+0x74/0x110 dev_close_many+0x48/0x194 rollback_registered_many+0x118/0x814 unregister_netdev+0x20/0x30 gether_cleanup+0x1c/0x38 rndis_attr_release+0xc/0x14 kref_put+0x74/0xb8 configfs_rmdir+0x314/0x374 If gadget->ops->pullup() return an error, function rndis_close() will be called, then it will causes a use-after-free problem. ======================================================================= Fixes: 0a55187a1ec8 ("USB: gadget core: Issue ->disconnect() callback from usb_gadget_disconnect()") Signed-off-by: Jiantao Zhang Signed-off-by: TaoXue Link: https://lore.kernel.org/r/20221121130805.10735-1-water.zhangjiantao@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index c63c0c2cf649..bf9878e1a72a 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -734,13 +734,13 @@ int usb_gadget_disconnect(struct usb_gadget *gadget) } ret = gadget->ops->pullup(gadget, 0); - if (!ret) { + if (!ret) gadget->connected = 0; - mutex_lock(&udc_lock); - if (gadget->udc->driver) - gadget->udc->driver->disconnect(gadget); - mutex_unlock(&udc_lock); - } + + mutex_lock(&udc_lock); + if (gadget->udc->driver) + gadget->udc->driver->disconnect(gadget); + mutex_unlock(&udc_lock); out: trace_usb_gadget_disconnect(gadget, ret); From 05b2e347a58385b8b00051fef61f10a512b5aa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:18 +0100 Subject: [PATCH 1979/4122] usb: typec: ucsi: stm32g0: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221118224540.619276-585-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_stm32g0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index 7b92f0c8de70..93fead0096b7 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -626,7 +626,7 @@ static int ucsi_stm32g0_probe_bootloader(struct ucsi *ucsi) return 0; } -static int ucsi_stm32g0_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int ucsi_stm32g0_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct ucsi_stm32g0 *g0; @@ -763,7 +763,7 @@ static struct i2c_driver ucsi_stm32g0_i2c_driver = { .of_match_table = of_match_ptr(ucsi_stm32g0_typec_of_match), .pm = pm_sleep_ptr(&ucsi_stm32g0_pm_ops), }, - .probe = ucsi_stm32g0_probe, + .probe_new = ucsi_stm32g0_probe, .remove = ucsi_stm32g0_remove, .id_table = ucsi_stm32g0_typec_i2c_devid }; From d24182b10cd5c734499c6185e9c63403ee1de5ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:17 +0100 Subject: [PATCH 1980/4122] usb: typec: ucsi/ucsi_ccg: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221118224540.619276-584-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 835f1c4372ba..46441f1477f2 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1338,8 +1338,7 @@ static struct attribute *ucsi_ccg_attrs[] = { }; ATTRIBUTE_GROUPS(ucsi_ccg); -static int ucsi_ccg_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ucsi_ccg_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct ucsi_ccg *uc; @@ -1482,7 +1481,7 @@ static struct i2c_driver ucsi_ccg_driver = { .dev_groups = ucsi_ccg_groups, .acpi_match_table = amd_i2c_ucsi_match, }, - .probe = ucsi_ccg_probe, + .probe_new = ucsi_ccg_probe, .remove = ucsi_ccg_remove, .id_table = ucsi_ccg_device_id, }; From f02586d70aeb491a17c179c857aa4a361760bf0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:16 +0100 Subject: [PATCH 1981/4122] usb: typec: tcpm/tcpci_rt1711h: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221118224540.619276-583-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_rt1711h.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 7b217c712c11..a0e9e3fe8564 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -327,8 +327,7 @@ static int rt1711h_check_revision(struct i2c_client *i2c, struct rt1711h_chip *c return ret; } -static int rt1711h_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int rt1711h_probe(struct i2c_client *client) { int ret; struct rt1711h_chip *chip; @@ -413,7 +412,7 @@ static struct i2c_driver rt1711h_i2c_driver = { .name = "rt1711h", .of_match_table = of_match_ptr(rt1711h_of_match), }, - .probe = rt1711h_probe, + .probe_new = rt1711h_probe, .remove = rt1711h_remove, .id_table = rt1711h_id, }; From c852ec1c0192ff0ce032a1f0bbf23be01c98479d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:15 +0100 Subject: [PATCH 1982/4122] usb: typec: tcpm/tcpci_maxim: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221118224540.619276-582-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_maxim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpci_maxim.c b/drivers/usb/typec/tcpm/tcpci_maxim.c index 03f89e6f1a78..83e140ffcc3e 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim.c +++ b/drivers/usb/typec/tcpm/tcpci_maxim.c @@ -438,7 +438,7 @@ static int tcpci_init(struct tcpci *tcpci, struct tcpci_data *data) return -1; } -static int max_tcpci_probe(struct i2c_client *client, const struct i2c_device_id *i2c_id) +static int max_tcpci_probe(struct i2c_client *client) { int ret; struct max_tcpci_chip *chip; @@ -519,7 +519,7 @@ static struct i2c_driver max_tcpci_i2c_driver = { .name = "maxtcpc", .of_match_table = of_match_ptr(max_tcpci_of_match), }, - .probe = max_tcpci_probe, + .probe_new = max_tcpci_probe, .remove = max_tcpci_remove, .id_table = max_tcpci_id, }; From bdd0400d0f7245091ca8eff781825e93db1a135f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:14 +0100 Subject: [PATCH 1983/4122] usb: typec: tcpm/tcpci: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221118224540.619276-581-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 72f8d1e87600..fe781a38dc82 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -810,8 +810,7 @@ void tcpci_unregister_port(struct tcpci *tcpci) } EXPORT_SYMBOL_GPL(tcpci_unregister_port); -static int tcpci_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int tcpci_probe(struct i2c_client *client) { struct tcpci_chip *chip; int err; @@ -881,7 +880,7 @@ static struct i2c_driver tcpci_i2c_driver = { .name = "tcpci", .of_match_table = of_match_ptr(tcpci_of_match), }, - .probe = tcpci_probe, + .probe_new = tcpci_probe, .remove = tcpci_remove, .id_table = tcpci_id, }; From 3646730ee44f42dd91619e30c917c0140853a948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:13 +0100 Subject: [PATCH 1984/4122] usb: typec: tcpm/fusb302: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221118224540.619276-580-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/fusb302.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index 721b2a548084..1ffce00d94b4 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -1677,8 +1677,7 @@ static struct fwnode_handle *fusb302_fwnode_get(struct device *dev) return fwnode; } -static int fusb302_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int fusb302_probe(struct i2c_client *client) { struct fusb302_chip *chip; struct i2c_adapter *adapter = client->adapter; @@ -1837,7 +1836,7 @@ static struct i2c_driver fusb302_driver = { .pm = &fusb302_pm_ops, .of_match_table = of_match_ptr(fusb302_dt_match), }, - .probe = fusb302_probe, + .probe_new = fusb302_probe, .remove = fusb302_remove, .id_table = fusb302_i2c_device_id, }; From b5583ea8b9ea659241e3f0cb8c9ca56f9a9630b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:12 +0100 Subject: [PATCH 1985/4122] usb: typec: hd3ss3220: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221118224540.619276-579-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/hd3ss3220.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/hd3ss3220.c b/drivers/usb/typec/hd3ss3220.c index 2a58185fb14c..f128664cb130 100644 --- a/drivers/usb/typec/hd3ss3220.c +++ b/drivers/usb/typec/hd3ss3220.c @@ -148,8 +148,7 @@ static const struct regmap_config config = { .max_register = 0x0A, }; -static int hd3ss3220_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hd3ss3220_probe(struct i2c_client *client) { struct typec_capability typec_cap = { }; struct hd3ss3220 *hd3ss3220; @@ -264,7 +263,7 @@ static struct i2c_driver hd3ss3220_driver = { .name = "hd3ss3220", .of_match_table = of_match_ptr(dev_ids), }, - .probe = hd3ss3220_probe, + .probe_new = hd3ss3220_probe, .remove = hd3ss3220_remove, }; From cfb8e41ae81311eef73fc50e401e93543204de3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:11 +0100 Subject: [PATCH 1986/4122] usb: typec: anx7411: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-578-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/anx7411.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/anx7411.c b/drivers/usb/typec/anx7411.c index b8f3b75fd7eb..3d5edce270a4 100644 --- a/drivers/usb/typec/anx7411.c +++ b/drivers/usb/typec/anx7411.c @@ -1440,8 +1440,7 @@ static int anx7411_psy_register(struct anx7411_data *ctx) return PTR_ERR_OR_ZERO(ctx->psy); } -static int anx7411_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int anx7411_i2c_probe(struct i2c_client *client) { struct anx7411_data *plat; struct device *dev = &client->dev; @@ -1585,7 +1584,7 @@ static struct i2c_driver anx7411_driver = { .of_match_table = anx_match_table, .pm = &anx7411_pm_ops, }, - .probe = anx7411_i2c_probe, + .probe_new = anx7411_i2c_probe, .remove = anx7411_i2c_remove, .id_table = anx7411_id, From 9f7cc30769ac0681669b963ede092f12afe829b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:10 +0100 Subject: [PATCH 1987/4122] usb: phy: isp1301: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-577-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-isp1301.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index c2777a5c1f4e..f4ee14d98585 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -92,8 +92,7 @@ static int isp1301_phy_set_vbus(struct usb_phy *phy, int on) return 0; } -static int isp1301_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int isp1301_probe(struct i2c_client *client) { struct isp1301 *isp; struct usb_phy *phy; @@ -133,7 +132,7 @@ static struct i2c_driver isp1301_driver = { .name = DRV_NAME, .of_match_table = isp1301_of_match, }, - .probe = isp1301_probe, + .probe_new = isp1301_probe, .remove = isp1301_remove, .id_table = isp1301_id, }; From c3ed6965fe7400c97d806edb282ad08810acdee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:09 +0100 Subject: [PATCH 1988/4122] usb: isp1301-omap: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-576-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-isp1301-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy-isp1301-omap.c b/drivers/usb/phy/phy-isp1301-omap.c index e5d3f206097c..931610b76f3d 100644 --- a/drivers/usb/phy/phy-isp1301-omap.c +++ b/drivers/usb/phy/phy-isp1301-omap.c @@ -1471,7 +1471,7 @@ isp1301_start_hnp(struct usb_otg *otg) /*-------------------------------------------------------------------------*/ static int -isp1301_probe(struct i2c_client *i2c, const struct i2c_device_id *id) +isp1301_probe(struct i2c_client *i2c) { int status; struct isp1301 *isp; @@ -1616,7 +1616,7 @@ static struct i2c_driver isp1301_driver = { .driver = { .name = "isp1301_omap", }, - .probe = isp1301_probe, + .probe_new = isp1301_probe, .remove = isp1301_remove, .id_table = isp1301_id, }; From d4468280d8bcd68a7303efcb5a404efb680de1fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:08 +0100 Subject: [PATCH 1989/4122] usb: usb4604: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-575-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb4604.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/usb4604.c b/drivers/usb/misc/usb4604.c index 2142af9bbdec..6b5e77231efa 100644 --- a/drivers/usb/misc/usb4604.c +++ b/drivers/usb/misc/usb4604.c @@ -97,8 +97,7 @@ static int usb4604_probe(struct usb4604 *hub) return usb4604_switch_mode(hub, hub->mode); } -static int usb4604_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int usb4604_i2c_probe(struct i2c_client *i2c) { struct usb4604 *hub; @@ -155,7 +154,7 @@ static struct i2c_driver usb4604_i2c_driver = { .pm = pm_ptr(&usb4604_i2c_pm_ops), .of_match_table = of_match_ptr(usb4604_of_match), }, - .probe = usb4604_i2c_probe, + .probe_new = usb4604_i2c_probe, .id_table = usb4604_id, }; module_i2c_driver(usb4604_i2c_driver); From 4b1e537ad367b415f30fc37c1f4403ddd12a88d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:07 +0100 Subject: [PATCH 1990/4122] usb: misc: usb3503: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-574-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb3503.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c index c70ca475c7c7..bd47c4437ca4 100644 --- a/drivers/usb/misc/usb3503.c +++ b/drivers/usb/misc/usb3503.c @@ -280,8 +280,7 @@ err_clk: return err; } -static int usb3503_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int usb3503_i2c_probe(struct i2c_client *i2c) { struct usb3503 *hub; int err; @@ -400,7 +399,7 @@ static struct i2c_driver usb3503_i2c_driver = { .pm = pm_ptr(&usb3503_i2c_pm_ops), .of_match_table = of_match_ptr(usb3503_of_match), }, - .probe = usb3503_i2c_probe, + .probe_new = usb3503_i2c_probe, .remove = usb3503_i2c_remove, .id_table = usb3503_id, }; From 907140462eb511f3d98aa89c0665da1b618d3545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:06 +0100 Subject: [PATCH 1991/4122] usb: usb251xb: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Acked-by: Richard Leitner Link: https://lore.kernel.org/r/20221118224540.619276-573-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb251xb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 54337d72bb9f..e3abe67a155d 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -699,8 +699,7 @@ static int usb251xb_probe(struct usb251xb *hub) return 0; } -static int usb251xb_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int usb251xb_i2c_probe(struct i2c_client *i2c) { struct usb251xb *hub; @@ -758,7 +757,7 @@ static struct i2c_driver usb251xb_i2c_driver = { .of_match_table = of_match_ptr(usb251xb_of_match), .pm = &usb251xb_pm_ops, }, - .probe = usb251xb_i2c_probe, + .probe_new = usb251xb_i2c_probe, .id_table = usb251xb_id, }; From 02a476d932287cf3096f78962ccb70d94d6203c6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2022 10:46:45 +0100 Subject: [PATCH 1992/4122] kobject: make kobject_get_ownership() take a constant kobject * The call, kobject_get_ownership(), does not modify the kobject passed into it, so make it const. This propagates down into the kobj_type function callbacks so make the kobject passed into them also const, ensuring that nothing in the kobject is being changed here. This helps make it more obvious what calls and callbacks do, and do not, modify structures passed to them. Cc: Trond Myklebust Cc: Anna Schumaker Cc: Roopa Prabhu Cc: "David S. Miller" Cc: Eric Dumazet Cc: Paolo Abeni Cc: Chuck Lever Cc: Jeff Layton Cc: linux-nfs@vger.kernel.org Cc: bridge@lists.linux-foundation.org Cc: netdev@vger.kernel.org Acked-by: Jakub Kicinski Acked-by: Rafael J. Wysocki Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221121094649.1556002-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 2 +- drivers/base/core.c | 8 ++++---- fs/nfs/sysfs.c | 4 ++-- include/linux/kobject.h | 8 ++++---- lib/kobject.c | 4 ++-- net/bridge/br_if.c | 2 +- net/core/net-sysfs.c | 8 ++++---- net/sunrpc/sysfs.c | 8 ++++---- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/base/class.c b/drivers/base/class.c index 8ceafb7d0203..86ec554cfe60 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -62,7 +62,7 @@ static void class_release(struct kobject *kobj) kfree(cp); } -static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj) +static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj) { struct subsys_private *cp = to_subsys_private(kobj); struct class *class = cp->class; diff --git a/drivers/base/core.c b/drivers/base/core.c index ab01828fe6c1..a79b99ecf4d8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2335,7 +2335,7 @@ static void device_release(struct kobject *kobj) kfree(p); } -static const void *device_namespace(struct kobject *kobj) +static const void *device_namespace(const struct kobject *kobj) { const struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; @@ -2346,7 +2346,7 @@ static const void *device_namespace(struct kobject *kobj) return ns; } -static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { const struct device *dev = kobj_to_dev(kobj); @@ -2986,9 +2986,9 @@ static void class_dir_release(struct kobject *kobj) } static const -struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj) +struct kobj_ns_type_operations *class_dir_child_ns_type(const struct kobject *kobj) { - struct class_dir *dir = to_class_dir(kobj); + const struct class_dir *dir = to_class_dir(kobj); return dir->class->ns_type; } diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index a6f740366963..67a87800b3a9 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -26,7 +26,7 @@ static void nfs_netns_object_release(struct kobject *kobj) } static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( - struct kobject *kobj) + const struct kobject *kobj) { return &net_ns_type_operations; } @@ -130,7 +130,7 @@ static void nfs_netns_client_release(struct kobject *kobj) kfree(c); } -static const void *nfs_netns_client_namespace(struct kobject *kobj) +static const void *nfs_netns_client_namespace(const struct kobject *kobj) { return container_of(kobj, struct nfs_netns_client, kobject)->net; } diff --git a/include/linux/kobject.h b/include/linux/kobject.h index fc40fc81aeb1..d978dbceb50d 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -113,7 +113,7 @@ extern struct kobject * __must_check kobject_get_unless_zero( extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); -extern void kobject_get_ownership(struct kobject *kobj, +extern void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag); @@ -121,9 +121,9 @@ struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; const struct attribute_group **default_groups; - const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); - const void *(*namespace)(struct kobject *kobj); - void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); + const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj); + const void *(*namespace)(const struct kobject *kobj); + void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; struct kobj_uevent_env { diff --git a/lib/kobject.c b/lib/kobject.c index ba1017cd67d1..26e744a46d24 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -45,7 +45,7 @@ const void *kobject_namespace(struct kobject *kobj) * representation of given kobject. Normally used to adjust ownership of * objects in a container. */ -void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; @@ -907,7 +907,7 @@ static void kset_release(struct kobject *kobj) kfree(kset); } -static void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { if (kobj->parent) kobject_get_ownership(kobj->parent, uid, gid); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 228fd5b20f10..ad13b48e3e08 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -262,7 +262,7 @@ static void release_nbp(struct kobject *kobj) kfree(p); } -static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { struct net_bridge_port *p = kobj_to_brport(kobj); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a8c5a7cd9701..9cfc80b8ed25 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1020,7 +1020,7 @@ static void rx_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *rx_queue_namespace(struct kobject *kobj) +static const void *rx_queue_namespace(const struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); struct device *dev = &queue->dev->dev; @@ -1032,7 +1032,7 @@ static const void *rx_queue_namespace(struct kobject *kobj) return ns; } -static void rx_queue_get_ownership(struct kobject *kobj, +static void rx_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { const struct net *net = rx_queue_namespace(kobj); @@ -1623,7 +1623,7 @@ static void netdev_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *netdev_queue_namespace(struct kobject *kobj) +static const void *netdev_queue_namespace(const struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); struct device *dev = &queue->dev->dev; @@ -1635,7 +1635,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj) return ns; } -static void netdev_queue_get_ownership(struct kobject *kobj, +static void netdev_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { const struct net *net = netdev_queue_namespace(kobj); diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index c1f559892ae8..1e05a2d723f4 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -31,7 +31,7 @@ static void rpc_sysfs_object_release(struct kobject *kobj) } static const struct kobj_ns_type_operations * -rpc_sysfs_object_child_ns_type(struct kobject *kobj) +rpc_sysfs_object_child_ns_type(const struct kobject *kobj) { return &net_ns_type_operations; } @@ -381,17 +381,17 @@ static void rpc_sysfs_xprt_release(struct kobject *kobj) kfree(xprt); } -static const void *rpc_sysfs_client_namespace(struct kobject *kobj) +static const void *rpc_sysfs_client_namespace(const struct kobject *kobj) { return container_of(kobj, struct rpc_sysfs_client, kobject)->net; } -static const void *rpc_sysfs_xprt_switch_namespace(struct kobject *kobj) +static const void *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj) { return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net; } -static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj) +static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj) { return container_of(kobj, struct rpc_sysfs_xprt, kobject)->xprt->xprt_net; From 542aa24646ca20ccedb70829a95254ce602cdcbd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2022 10:46:46 +0100 Subject: [PATCH 1993/4122] kobject: make kobject_namespace take a const * kobject_namespace() should take a const *kobject as it does not modify the kobject passed to it. Change that, and the functions kobj_child_ns_ops() and kobj_ns_ops() needed to also be changed to const *. Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221121094649.1556002-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- include/linux/kobject_ns.h | 4 ++-- lib/kobject.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index d978dbceb50d..5a2d58e10bf5 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -112,7 +112,7 @@ extern struct kobject * __must_check kobject_get_unless_zero( struct kobject *kobj); extern void kobject_put(struct kobject *kobj); -extern const void *kobject_namespace(struct kobject *kobj); +extern const void *kobject_namespace(const struct kobject *kobj); extern void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag); diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 2b5b64256cf4..be707748e7ce 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -47,8 +47,8 @@ struct kobj_ns_type_operations { int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); int kobj_ns_type_registered(enum kobj_ns_type type); -const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); -const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); +const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent); +const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type); diff --git a/lib/kobject.c b/lib/kobject.c index 26e744a46d24..6e0bf03f4f36 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -25,7 +25,7 @@ * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ -const void *kobject_namespace(struct kobject *kobj) +const void *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); @@ -1039,7 +1039,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type) return registered; } -const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent) { const struct kobj_ns_type_operations *ops = NULL; @@ -1049,7 +1049,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) return ops; } -const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj) { return kobj_child_ns_ops(kobj->parent); } From c45a88bb3f6cdaeb29d8ee98463610ad815721ab Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2022 10:46:47 +0100 Subject: [PATCH 1994/4122] kobject: kset_uevent_ops: make filter() callback take a const * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The filter() callback in struct kset_uevent_ops does not modify the kobject passed into it, so make the pointer const to enforce this restriction. When doing so, fix up all existing filter() callbacks to have the correct signature to preserve the build. Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Acked-by: Rafael J. Wysocki Acked-by: Christian König for the changes to Link: https://lore.kernel.org/r/20221121094649.1556002-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- drivers/base/core.c | 4 ++-- drivers/dma-buf/dma-buf-sysfs-stats.c | 2 +- include/linux/kobject.h | 2 +- kernel/params.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 7ca47e5b3c1f..4ec6dbab73be 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -163,7 +163,7 @@ static struct kobj_type bus_ktype = { .release = bus_release, }; -static int bus_uevent_filter(struct kobject *kobj) +static int bus_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); diff --git a/drivers/base/core.c b/drivers/base/core.c index a79b99ecf4d8..005a2b092f3e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2362,12 +2362,12 @@ static struct kobj_type device_ktype = { }; -static int dev_uevent_filter(struct kobject *kobj) +static int dev_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &device_ktype) { - struct device *dev = kobj_to_dev(kobj); + const struct device *dev = kobj_to_dev(kobj); if (dev->bus) return 1; if (dev->class) diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 2bba0babcb62..f69d68122b9b 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -132,7 +132,7 @@ void dma_buf_stats_teardown(struct dma_buf *dmabuf) /* Statistics files do not need to send uevents. */ -static int dmabuf_sysfs_uevent_filter(struct kobject *kobj) +static int dmabuf_sysfs_uevent_filter(const struct kobject *kobj) { return 0; } diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 5a2d58e10bf5..640f59d4b3de 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -135,7 +135,7 @@ struct kobj_uevent_env { }; struct kset_uevent_ops { - int (* const filter)(struct kobject *kobj); + int (* const filter)(const struct kobject *kobj); const char *(* const name)(struct kobject *kobj); int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; diff --git a/kernel/params.c b/kernel/params.c index 5b92310425c5..d2237209ceda 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -926,7 +926,7 @@ static const struct sysfs_ops module_sysfs_ops = { .store = module_attr_store, }; -static int uevent_filter(struct kobject *kobj) +static int uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); From a53d1acc978321734a8fd7388f2c050a7219ab69 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2022 10:46:48 +0100 Subject: [PATCH 1995/4122] kobject: kset_uevent_ops: make name() callback take a const * The name() callback in struct kset_uevent_ops does not modify the kobject passed into it, so make the pointer const to enforce this restriction. When doing so, fix up the single existing name() callback to have the correct signature to preserve the build. Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20221121094649.1556002-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- include/linux/kobject.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 005a2b092f3e..a3e14143ec0c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2376,9 +2376,9 @@ static int dev_uevent_filter(const struct kobject *kobj) return 0; } -static const char *dev_uevent_name(struct kobject *kobj) +static const char *dev_uevent_name(const struct kobject *kobj) { - struct device *dev = kobj_to_dev(kobj); + const struct device *dev = kobj_to_dev(kobj); if (dev->bus) return dev->bus->name; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 640f59d4b3de..58a5b75612e3 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -136,7 +136,7 @@ struct kobj_uevent_env { struct kset_uevent_ops { int (* const filter)(const struct kobject *kobj); - const char *(* const name)(struct kobject *kobj); + const char *(* const name)(const struct kobject *kobj); int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; From 9f041c5d8296b3a04cf3ead473a124fb538490dc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2022 10:46:49 +0100 Subject: [PATCH 1996/4122] driver core: pass a const * into of_device_uevent() of_device_uevent() does not modify the struct device * passed into it, so make it a const * to enforce this. Also the documentation for the function was really wrong so fix that up at the same time. Cc: Rob Herring Cc: Frank Rowand Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20221121094649.1556002-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/of/device.c | 6 +++--- include/linux/of_device.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 8cefe5a7d04e..c674a13c3055 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -332,10 +332,10 @@ EXPORT_SYMBOL_GPL(of_device_modalias); /** * of_device_uevent - Display OF related uevent information - * @dev: Device to apply DMA configuration - * @env: Kernel object's userspace event reference + * @dev: Device to display the uevent information for + * @env: Kernel object's userspace event reference to fill up */ -void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) +void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env) { const char *compat, *type; struct alias_prop *app; diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 1a803e4335d3..ab7d557d541d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -35,7 +35,7 @@ extern const void *of_device_get_match_data(const struct device *dev); extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_request_module(struct device *dev); -extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); +extern void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env); extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); static inline struct device_node *of_cpu_device_node_get(int cpu) @@ -64,7 +64,7 @@ static inline int of_driver_match_device(struct device *dev, return 0; } -static inline void of_device_uevent(struct device *dev, +static inline void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env) { } static inline const void *of_device_get_match_data(const struct device *dev) From 947d66b68f3c4e7cf8f3f3500807b9d2a0de28ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 11 Nov 2022 16:25:02 +0200 Subject: [PATCH 1997/4122] n_tty: Rename tail to old_tail in n_tty_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local tail variable in n_tty_read() is used for one purpose, it keeps the old tail. Thus, rename it appropriately to improve code readability. Signed-off-by: Ilpo Järvinen Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/22b37499-ff9a-7fc1-f6e0-58411328d122@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 597019690ae6..c8f56c9b1a1c 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2130,7 +2130,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, ssize_t retval = 0; long timeout; bool packet; - size_t tail; + size_t old_tail; /* * Is this a continuation of a read started earler? @@ -2193,7 +2193,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } packet = tty->ctrl.packet; - tail = ldata->read_tail; + old_tail = ldata->read_tail; add_wait_queue(&tty->read_wait, &wait); while (nr) { @@ -2282,7 +2282,7 @@ more_to_be_read: if (time) timeout = time; } - if (tail != ldata->read_tail) + if (old_tail != ldata->read_tail) n_tty_kick_worker(tty); up_read(&tty->termios_rwsem); From cbdf6759e5b798b35ceafbae50fb7dd2340c9751 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 15 Nov 2022 08:17:21 +0100 Subject: [PATCH 1998/4122] tty: serial: altera_jtaguart: remove flag from altera_jtaguart_rx_chars() TTY_NORMAL is the only value it contains, so remove the variable and use the constant instead. Cc: Tobias Klauser Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Tobias Klauser Link: https://lore.kernel.org/r/20221115071724.5185-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_jtaguart.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index aa49553fac58..8d1729711584 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -126,18 +126,17 @@ static void altera_jtaguart_set_termios(struct uart_port *port, static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp) { struct uart_port *port = &pp->port; - unsigned char ch, flag; + unsigned char ch; unsigned long status; while ((status = readl(port->membase + ALTERA_JTAGUART_DATA_REG)) & ALTERA_JTAGUART_DATA_RVALID_MSK) { ch = status & ALTERA_JTAGUART_DATA_DATA_MSK; - flag = TTY_NORMAL; port->icount.rx++; if (uart_handle_sysrq_char(port, ch)) continue; - uart_insert_char(port, 0, 0, ch, flag); + uart_insert_char(port, 0, 0, ch, TTY_NORMAL); } tty_flip_buffer_push(&port->state->port); From 070fa1d2bf089937559183320c4066c065312665 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 15 Nov 2022 08:17:22 +0100 Subject: [PATCH 1999/4122] tty: serial: altera_jtaguart: remove unused altera_jtaguart::sigs Nothing uses struct altera_jtaguart::sigs. Remove it. Cc: Tobias Klauser Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Tobias Klauser Link: https://lore.kernel.org/r/20221115071724.5185-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_jtaguart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index 8d1729711584..b83eade64b22 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -55,7 +55,6 @@ */ struct altera_jtaguart { struct uart_port port; - unsigned int sigs; /* Local copy of line sigs */ unsigned long imr; /* Local IMR mirror */ }; From 4e2b16a62d9975c4f6135e6a79a8e00cbad812d6 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 15 Nov 2022 08:17:23 +0100 Subject: [PATCH 2000/4122] tty: serial: altera_jtaguart: use uart_port::read_status_mask Instead of self-defined struct altera_jtaguart::imr, use preexisting uart_port::read_status_mask. Note that imr was ulong. But there is no reason for that, its values are uints. And readl/writel's are used to read it/write to it. Cc: Tobias Klauser Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Tobias Klauser Link: https://lore.kernel.org/r/20221115071724.5185-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_jtaguart.c | 41 +++++++++++----------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index b83eade64b22..6808abd27785 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -55,7 +55,6 @@ */ struct altera_jtaguart { struct uart_port port; - unsigned long imr; /* Local IMR mirror */ }; static unsigned int altera_jtaguart_tx_space(struct uart_port *port, u32 *ctlp) @@ -84,29 +83,23 @@ static void altera_jtaguart_set_mctrl(struct uart_port *port, unsigned int sigs) static void altera_jtaguart_start_tx(struct uart_port *port) { - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); - - pp->imr |= ALTERA_JTAGUART_CONTROL_WE_MSK; - writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG); + port->read_status_mask |= ALTERA_JTAGUART_CONTROL_WE_MSK; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); } static void altera_jtaguart_stop_tx(struct uart_port *port) { - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); - - pp->imr &= ~ALTERA_JTAGUART_CONTROL_WE_MSK; - writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG); + port->read_status_mask &= ~ALTERA_JTAGUART_CONTROL_WE_MSK; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); } static void altera_jtaguart_stop_rx(struct uart_port *port) { - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); - - pp->imr &= ~ALTERA_JTAGUART_CONTROL_RE_MSK; - writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG); + port->read_status_mask &= ~ALTERA_JTAGUART_CONTROL_RE_MSK; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); } static void altera_jtaguart_break_ctl(struct uart_port *port, int break_state) @@ -163,7 +156,7 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) unsigned int isr; isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >> - ALTERA_JTAGUART_CONTROL_RI_OFF) & pp->imr; + ALTERA_JTAGUART_CONTROL_RI_OFF) & port->read_status_mask; spin_lock(&port->lock); @@ -187,8 +180,6 @@ static void altera_jtaguart_config_port(struct uart_port *port, int flags) static int altera_jtaguart_startup(struct uart_port *port) { - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); unsigned long flags; int ret; @@ -203,8 +194,9 @@ static int altera_jtaguart_startup(struct uart_port *port) spin_lock_irqsave(&port->lock, flags); /* Enable RX interrupts now */ - pp->imr = ALTERA_JTAGUART_CONTROL_RE_MSK; - writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG); + port->read_status_mask = ALTERA_JTAGUART_CONTROL_RE_MSK; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); spin_unlock_irqrestore(&port->lock, flags); @@ -213,15 +205,14 @@ static int altera_jtaguart_startup(struct uart_port *port) static void altera_jtaguart_shutdown(struct uart_port *port) { - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); unsigned long flags; spin_lock_irqsave(&port->lock, flags); /* Disable all interrupts now */ - pp->imr = 0; - writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG); + port->read_status_mask = 0; + writel(port->read_status_mask, + port->membase + ALTERA_JTAGUART_CONTROL_REG); spin_unlock_irqrestore(&port->lock, flags); From 4d167f635a4d33f6b645f60c2a265f93668fdd8d Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 15 Nov 2022 08:17:24 +0100 Subject: [PATCH 2001/4122] tty: serial: altera_jtaguart: remove struct altera_jtaguart It contains only struct uart_port, so no need for another structure. Remove it and convert the rest to use struct uart_port directly. Cc: Tobias Klauser Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Tobias Klauser Link: https://lore.kernel.org/r/20221115071724.5185-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_jtaguart.c | 29 +++++++++------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index 6808abd27785..9f843d1cee40 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -50,13 +50,6 @@ #define ALTERA_JTAGUART_CONTROL_AC_MSK 0x00000400 #define ALTERA_JTAGUART_CONTROL_WSPACE_MSK 0xFFFF0000 -/* - * Local per-uart structure. - */ -struct altera_jtaguart { - struct uart_port port; -}; - static unsigned int altera_jtaguart_tx_space(struct uart_port *port, u32 *ctlp) { u32 ctl = readl(port->membase + ALTERA_JTAGUART_CONTROL_REG); @@ -115,9 +108,8 @@ static void altera_jtaguart_set_termios(struct uart_port *port, tty_termios_copy_hw(termios, old); } -static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp) +static void altera_jtaguart_rx_chars(struct uart_port *port) { - struct uart_port *port = &pp->port; unsigned char ch; unsigned long status; @@ -134,9 +126,8 @@ static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp) tty_flip_buffer_push(&port->state->port); } -static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp) +static void altera_jtaguart_tx_chars(struct uart_port *port) { - struct uart_port *port = &pp->port; unsigned int count; u8 ch; @@ -151,8 +142,6 @@ static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp) static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) { struct uart_port *port = data; - struct altera_jtaguart *pp = - container_of(port, struct altera_jtaguart, port); unsigned int isr; isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >> @@ -161,9 +150,9 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data) spin_lock(&port->lock); if (isr & ALTERA_JTAGUART_CONTROL_RE_MSK) - altera_jtaguart_rx_chars(pp); + altera_jtaguart_rx_chars(port); if (isr & ALTERA_JTAGUART_CONTROL_WE_MSK) - altera_jtaguart_tx_chars(pp); + altera_jtaguart_tx_chars(port); spin_unlock(&port->lock); @@ -265,7 +254,7 @@ static const struct uart_ops altera_jtaguart_ops = { }; #define ALTERA_JTAGUART_MAXPORTS 1 -static struct altera_jtaguart altera_jtaguart_ports[ALTERA_JTAGUART_MAXPORTS]; +static struct uart_port altera_jtaguart_ports[ALTERA_JTAGUART_MAXPORTS]; #if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE) @@ -308,7 +297,7 @@ static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c static void altera_jtaguart_console_write(struct console *co, const char *s, unsigned int count) { - struct uart_port *port = &(altera_jtaguart_ports + co->index)->port; + struct uart_port *port = &altera_jtaguart_ports[co->index]; uart_console_write(port, s, count, altera_jtaguart_console_putc); } @@ -320,7 +309,7 @@ static int __init altera_jtaguart_console_setup(struct console *co, if (co->index < 0 || co->index >= ALTERA_JTAGUART_MAXPORTS) return -EINVAL; - port = &altera_jtaguart_ports[co->index].port; + port = &altera_jtaguart_ports[co->index]; if (port->membase == NULL) return -ENODEV; return 0; @@ -400,7 +389,7 @@ static int altera_jtaguart_probe(struct platform_device *pdev) if (i >= ALTERA_JTAGUART_MAXPORTS) return -EINVAL; - port = &altera_jtaguart_ports[i].port; + port = &altera_jtaguart_ports[i]; res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res_mem) @@ -444,7 +433,7 @@ static int altera_jtaguart_remove(struct platform_device *pdev) if (i == -1) i = 0; - port = &altera_jtaguart_ports[i].port; + port = &altera_jtaguart_ports[i]; uart_remove_one_port(&altera_jtaguart_driver, port); iounmap(port->membase); From 94cdb9f33698478b0e7062586633c42c6158a786 Mon Sep 17 00:00:00 2001 From: Jiamei Xie Date: Thu, 17 Nov 2022 18:32:37 +0800 Subject: [PATCH 2002/4122] serial: amba-pl011: avoid SBSA UART accessing DMACR register Chapter "B Generic UART" in "ARM Server Base System Architecture" [1] documentation describes a generic UART interface. Such generic UART does not support DMA. In current code, sbsa_uart_pops and amba_pl011_pops share the same stop_rx operation, which will invoke pl011_dma_rx_stop, leading to an access of the DMACR register. This commit adds a using_rx_dma check in pl011_dma_rx_stop to avoid the access to DMACR register for SBSA UARTs which does not support DMA. When the kernel enables DMA engine with "CONFIG_DMA_ENGINE=y", Linux SBSA PL011 driver will access PL011 DMACR register in some functions. For most real SBSA Pl011 hardware implementations, the DMACR write behaviour will be ignored. So these DMACR operations will not cause obvious problems. But for some virtual SBSA PL011 hardware, like Xen virtual SBSA PL011 (vpl011) device, the behaviour might be different. Xen vpl011 emulation will inject a data abort to guest, when guest is accessing an unimplemented UART register. As Xen VPL011 is SBSA compatible, it will not implement DMACR register. So when Linux SBSA PL011 driver access DMACR register, it will get an unhandled data abort fault and the application will get a segmentation fault: Unhandled fault at 0xffffffc00944d048 Mem abort info: ESR = 0x96000000 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x00: ttbr address size fault Data abort info: ISV = 0, ISS = 0x00000000 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000020e2e000 [ffffffc00944d048] pgd=100000003ffff803, p4d=100000003ffff803, pud=100000003ffff803, pmd=100000003fffa803, pte=006800009c090f13 Internal error: ttbr address size fault: 96000000 [#1] PREEMPT SMP ... Call trace: pl011_stop_rx+0x70/0x80 tty_port_shutdown+0x7c/0xb4 tty_port_close+0x60/0xcc uart_close+0x34/0x8c tty_release+0x144/0x4c0 __fput+0x78/0x220 ____fput+0x1c/0x30 task_work_run+0x88/0xc0 do_notify_resume+0x8d0/0x123c el0_svc+0xa8/0xc0 el0t_64_sync_handler+0xa4/0x130 el0t_64_sync+0x1a0/0x1a4 Code: b9000083 b901f001 794038a0 8b000042 (b9000041) ---[ end trace 83dd93df15c3216f ]--- note: bootlogd[132] exited with preempt_count 1 /etc/rcS.d/S07bootlogd: line 47: 132 Segmentation fault start-stop-daemon This has been discussed in the Xen community, and we think it should fix this in Linux. See [2] for more information. [1] https://developer.arm.com/documentation/den0094/c/?lang=en [2] https://lists.xenproject.org/archives/html/xen-devel/2022-11/msg00543.html Fixes: 0dd1e247fd39 (drivers: PL011: add support for the ARM SBSA generic UART) Signed-off-by: Jiamei Xie Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20221117103237.86856-1-jiamei.xie@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 6d8552506091..6b9deb4211b5 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1044,6 +1044,9 @@ static void pl011_dma_rx_callback(void *data) */ static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) { + if (!uap->using_rx_dma) + return; + /* FIXME. Just disable the DMA enable */ uap->dmacr &= ~UART011_RXDMAE; pl011_write(uap->dmacr, uap, REG_DMACR); From 032d5a71ed378ffc6a2d41a187d8488a4f9fe415 Mon Sep 17 00:00:00 2001 From: delisun Date: Thu, 10 Nov 2022 10:01:08 +0800 Subject: [PATCH 2003/4122] serial: pl011: Do not clear RX FIFO & RX interrupt in unthrottle. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clearing the RX FIFO will cause data loss. Copy the pl011_enabl_interrupts implementation, and remove the clear interrupt and FIFO part of the code. Fixes: 211565b10099 ("serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle") Signed-off-by: delisun Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221110020108.7700-1-delisun@pateo.com.cn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 6b9deb4211b5..d75c39f4622b 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1830,8 +1830,17 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) static void pl011_unthrottle_rx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + unsigned long flags; - pl011_enable_interrupts(uap); + spin_lock_irqsave(&uap->port.lock, flags); + + uap->im = UART011_RTIM; + if (!pl011_dma_rx_running(uap)) + uap->im |= UART011_RXIM; + + pl011_write(uap->im, uap, REG_IMSC); + + spin_unlock_irqrestore(&uap->port.lock, flags); } static int pl011_startup(struct uart_port *port) From b30e66863c60bb5c7da2151a65afb2ee419b9df4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 11 Nov 2022 15:28:38 -0600 Subject: [PATCH 2004/4122] dt-bindings: serial: xlnx,opb-uartlite: Drop 'contains' from 'xlnx,use-parity' 'contains' applies to arrays, but 'xlnx,use-parity' is a scalar. So drop 'contains' from the 'if' schema. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20221111212838.4103828-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/xlnx,opb-uartlite.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml b/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml index f7617b88c7c3..2f4390e8d4e8 100644 --- a/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml +++ b/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml @@ -67,8 +67,7 @@ allOf: - if: properties: xlnx,use-parity: - contains: - const: 1 + const: 1 then: required: - xlnx,odd-parity From 0d114e9ff940ebad8e88267013bf96c605a6b336 Mon Sep 17 00:00:00 2001 From: Valentin Caron Date: Fri, 18 Nov 2022 18:06:02 +0100 Subject: [PATCH 2005/4122] serial: stm32: move dma_request_chan() before clk_prepare_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If dma_request_chan() returns a PROBE_DEFER error, clk_disable_unprepare() will be called and USART clock will be disabled. But early console can be still active on the same USART. While moving dma_request_chan() before clk_prepare_enable(), the clock won't be taken in case of a DMA PROBE_DEFER error, and so it doesn't need to be disabled. Then USART is still clocked for early console. Fixes: a7770a4bfcf4 ("serial: stm32: defer probe for dma devices") Reported-by: Uwe Kleine-König Signed-off-by: Valentin Caron Link: https://lore.kernel.org/r/20221118170602.1057863-1-valentin.caron@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 47 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 24def72b2565..a1490033aa16 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -1680,22 +1680,10 @@ static int stm32_usart_serial_probe(struct platform_device *pdev) if (!stm32port->info) return -EINVAL; - ret = stm32_usart_init_port(stm32port, pdev); - if (ret) - return ret; - - if (stm32port->wakeup_src) { - device_set_wakeup_capable(&pdev->dev, true); - ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq); - if (ret) - goto err_deinit_port; - } - stm32port->rx_ch = dma_request_chan(&pdev->dev, "rx"); - if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto err_wakeirq; - } + if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) + return -EPROBE_DEFER; + /* Fall back in interrupt mode for any non-deferral error */ if (IS_ERR(stm32port->rx_ch)) stm32port->rx_ch = NULL; @@ -1709,6 +1697,17 @@ static int stm32_usart_serial_probe(struct platform_device *pdev) if (IS_ERR(stm32port->tx_ch)) stm32port->tx_ch = NULL; + ret = stm32_usart_init_port(stm32port, pdev); + if (ret) + goto err_dma_tx; + + if (stm32port->wakeup_src) { + device_set_wakeup_capable(&pdev->dev, true); + ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq); + if (ret) + goto err_deinit_port; + } + if (stm32port->rx_ch && stm32_usart_of_dma_rx_probe(stm32port, pdev)) { /* Fall back in interrupt mode */ dma_release_channel(stm32port->rx_ch); @@ -1745,19 +1744,11 @@ err_port: pm_runtime_set_suspended(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); - if (stm32port->tx_ch) { + if (stm32port->tx_ch) stm32_usart_of_dma_tx_remove(stm32port, pdev); - dma_release_channel(stm32port->tx_ch); - } - if (stm32port->rx_ch) stm32_usart_of_dma_rx_remove(stm32port, pdev); -err_dma_rx: - if (stm32port->rx_ch) - dma_release_channel(stm32port->rx_ch); - -err_wakeirq: if (stm32port->wakeup_src) dev_pm_clear_wake_irq(&pdev->dev); @@ -1767,6 +1758,14 @@ err_deinit_port: stm32_usart_deinit_port(stm32port); +err_dma_tx: + if (stm32port->tx_ch) + dma_release_channel(stm32port->tx_ch); + +err_dma_rx: + if (stm32port->rx_ch) + dma_release_channel(stm32port->rx_ch); + return ret; } From 24ce048b0d4d4d8542c26459e53be4b7840d374c Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Tue, 15 Nov 2022 09:38:32 -0800 Subject: [PATCH 2006/4122] tty: synclink_gt: unwind actions in error path of net device open Resent again, last attempt still altered the plain text. Zhengchao Shao identified by inspection bugs in the error path of hdlcdev_open() in synclink_gt.c The function did not fully unwind actions in the error path. The use of try_module_get()/module_put() is unnecessary, potentially hazardous and is removed. The synclink_gt driver is already pinned any point the net device is registered, a requirement for calling this entry point. The call hdlc_open() to init the generic HDLC layer is moved to after driver level init/checks and proper rollback of previous actions is added. This is a more sensible ordering as the most common error paths are at the driver level and the driver level rollbacks require less processing than hdlc_open()/hdlc_close(). This has been tested with supported hardware. Signed-off-by:Paul Fulghum Link: https://lore.kernel.org/r/7599F007-8985-4469-BE00-52BD1530210E@microgate.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/synclink_gt.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 25e9befdda3a..72b76cdde534 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1433,16 +1433,8 @@ static int hdlcdev_open(struct net_device *dev) int rc; unsigned long flags; - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - DBGINFO(("%s hdlcdev_open\n", dev->name)); - /* generic HDLC layer open processing */ - rc = hdlc_open(dev); - if (rc) - return rc; - /* arbitrate between network and tty opens */ spin_lock_irqsave(&info->netlock, flags); if (info->port.count != 0 || info->netcount != 0) { @@ -1461,6 +1453,16 @@ static int hdlcdev_open(struct net_device *dev) return rc; } + /* generic HDLC layer open processing */ + rc = hdlc_open(dev); + if (rc) { + shutdown(info); + spin_lock_irqsave(&info->netlock, flags); + info->netcount = 0; + spin_unlock_irqrestore(&info->netlock, flags); + return rc; + } + /* assert RTS and DTR, apply hardware settings */ info->signals |= SerialSignal_RTS | SerialSignal_DTR; program_hw(info); @@ -1506,7 +1508,6 @@ static int hdlcdev_close(struct net_device *dev) info->netcount=0; spin_unlock_irqrestore(&info->netlock, flags); - module_put(THIS_MODULE); return 0; } From 96e8298945010d4b0a0c21841566401848a42afc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 Nov 2022 17:18:13 +0200 Subject: [PATCH 2007/4122] serdev: Replace poll loop by readx_poll_timeout() macro The readx_poll_timeout() consolidates the necessary code under macro. Replace current code with it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221114151813.37294-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 66f624fc618c..5f6bfe4f6d95 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -279,18 +280,10 @@ static inline bool serdev_device_get_cts(struct serdev_device *serdev) static inline int serdev_device_wait_for_cts(struct serdev_device *serdev, bool state, int timeout_ms) { - unsigned long timeout; bool signal; - timeout = jiffies + msecs_to_jiffies(timeout_ms); - while (time_is_after_jiffies(timeout)) { - signal = serdev_device_get_cts(serdev); - if (signal == state) - return 0; - usleep_range(1000, 2000); - } - - return -ETIMEDOUT; + return readx_poll_timeout(serdev_device_get_cts, serdev, signal, signal == state, + 2000, timeout_ms * 1000); } static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enable) From 4f5cb8c5e9151c678fc2be533c070c6f7522940e Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 10 Nov 2022 19:38:57 +0800 Subject: [PATCH 2008/4122] tty: serial: fsl_lpuart: enable wakeup source for lpuart LPUART supports both synchronous wakeup and asynchronous wakeup(wakeup the system when the UART clocks are shut-off), the synchronous wakeup is configured by UARTCTRL_RIE interrupt, and the asynchronous wakeup is configured by UARTBAUD_RXEDGIE interrupt. Add lpuart_uport_is_active() to determine if the uart port needs to get into the suspend states, also add lpuart_suspend_noirq() and lpuart_resume_noirq() to enable and disable the wakeup irq bits if the uart port needs to be set as wakeup source. When use lpuart with DMA mode, it still needs to switch to the cpu mode in .suspend() that enable cpu interrupts RIE and RXEDGIE as wakeup source, after system resume back, needs to setup DMA again, .resume() will share the HW setup code with .startup(), so abstract the same code to the api like lpuart32_hw_setup(). Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20221110113859.8485-2-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 287 ++++++++++++++++++++++---------- 1 file changed, 203 insertions(+), 84 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index bd685491eead..07c1524ef008 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1627,10 +1628,23 @@ err: sport->lpuart_dma_rx_use = false; } +static void lpuart_hw_setup(struct lpuart_port *sport) +{ + unsigned long flags; + + spin_lock_irqsave(&sport->port.lock, flags); + + lpuart_setup_watermark_enable(sport); + + lpuart_rx_dma_startup(sport); + lpuart_tx_dma_startup(sport); + + spin_unlock_irqrestore(&sport->port.lock, flags); +} + static int lpuart_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long flags; unsigned char temp; /* determine FIFO size and enable FIFO mode */ @@ -1644,15 +1658,7 @@ static int lpuart_startup(struct uart_port *port) UARTPFIFO_FIFOSIZE_MASK); lpuart_request_dma(sport); - - spin_lock_irqsave(&sport->port.lock, flags); - - lpuart_setup_watermark_enable(sport); - - lpuart_rx_dma_startup(sport); - lpuart_tx_dma_startup(sport); - - spin_unlock_irqrestore(&sport->port.lock, flags); + lpuart_hw_setup(sport); return 0; } @@ -1675,10 +1681,25 @@ static void lpuart32_configure(struct lpuart_port *sport) lpuart32_write(&sport->port, temp, UARTCTRL); } +static void lpuart32_hw_setup(struct lpuart_port *sport) +{ + unsigned long flags; + + spin_lock_irqsave(&sport->port.lock, flags); + + lpuart32_setup_watermark_enable(sport); + + lpuart_rx_dma_startup(sport); + lpuart_tx_dma_startup(sport); + + lpuart32_configure(sport); + + spin_unlock_irqrestore(&sport->port.lock, flags); +} + static int lpuart32_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long flags; unsigned long temp; /* determine FIFO size */ @@ -1703,17 +1724,8 @@ static int lpuart32_startup(struct uart_port *port) } lpuart_request_dma(sport); + lpuart32_hw_setup(sport); - spin_lock_irqsave(&sport->port.lock, flags); - - lpuart32_setup_watermark_enable(sport); - - lpuart_rx_dma_startup(sport); - lpuart_tx_dma_startup(sport); - - lpuart32_configure(sport); - - spin_unlock_irqrestore(&sport->port.lock, flags); return 0; } @@ -2766,97 +2778,204 @@ static int lpuart_remove(struct platform_device *pdev) return 0; } +static void serial_lpuart_enable_wakeup(struct lpuart_port *sport, bool on) +{ + unsigned int val, baud; + + if (lpuart_is_32(sport)) { + val = lpuart32_read(&sport->port, UARTCTRL); + baud = lpuart32_read(&sport->port, UARTBAUD); + if (on) { + /* set rx_watermark to 0 in wakeup source mode */ + lpuart32_write(&sport->port, 0, UARTWATER); + val |= UARTCTRL_RIE; + /* clear RXEDGIF flag before enable RXEDGIE interrupt */ + lpuart32_write(&sport->port, UARTSTAT_RXEDGIF, UARTSTAT); + baud |= UARTBAUD_RXEDGIE; + } else { + val &= ~UARTCTRL_RIE; + baud &= ~UARTBAUD_RXEDGIE; + } + lpuart32_write(&sport->port, val, UARTCTRL); + lpuart32_write(&sport->port, baud, UARTBAUD); + } else { + val = readb(sport->port.membase + UARTCR2); + if (on) + val |= UARTCR2_RIE; + else + val &= ~UARTCR2_RIE; + writeb(val, sport->port.membase + UARTCR2); + } +} + +static bool lpuart_uport_is_active(struct lpuart_port *sport) +{ + struct tty_port *port = &sport->port.state->port; + struct tty_struct *tty; + struct device *tty_dev; + int may_wake = 0; + + tty = tty_port_tty_get(port); + if (tty) { + tty_dev = tty->dev; + may_wake = device_may_wakeup(tty_dev); + tty_kref_put(tty); + } + + if ((tty_port_initialized(port) && may_wake) || + (!console_suspend_enabled && uart_console(&sport->port))) + return true; + + return false; +} + +static int __maybe_unused lpuart_suspend_noirq(struct device *dev) +{ + struct lpuart_port *sport = dev_get_drvdata(dev); + bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq)); + + if (lpuart_uport_is_active(sport)) + serial_lpuart_enable_wakeup(sport, !!irq_wake); + + pinctrl_pm_select_sleep_state(dev); + + return 0; +} + +static int __maybe_unused lpuart_resume_noirq(struct device *dev) +{ + struct lpuart_port *sport = dev_get_drvdata(dev); + unsigned int val; + + pinctrl_pm_select_default_state(dev); + + if (lpuart_uport_is_active(sport)) { + serial_lpuart_enable_wakeup(sport, false); + + /* clear the wakeup flags */ + if (lpuart_is_32(sport)) { + val = lpuart32_read(&sport->port, UARTSTAT); + lpuart32_write(&sport->port, val, UARTSTAT); + } + } + + return 0; +} + static int __maybe_unused lpuart_suspend(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); - unsigned long temp; - bool irq_wake; - - if (lpuart_is_32(sport)) { - /* disable Rx/Tx and interrupts */ - temp = lpuart32_read(&sport->port, UARTCTRL); - temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE); - lpuart32_write(&sport->port, temp, UARTCTRL); - } else { - /* disable Rx/Tx and interrupts */ - temp = readb(sport->port.membase + UARTCR2); - temp &= ~(UARTCR2_TE | UARTCR2_TIE | UARTCR2_TCIE); - writeb(temp, sport->port.membase + UARTCR2); - } + unsigned long temp, flags; uart_suspend_port(&lpuart_reg, &sport->port); - /* uart_suspend_port() might set wakeup flag */ - irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq)); + if (lpuart_uport_is_active(sport)) { + spin_lock_irqsave(&sport->port.lock, flags); + if (lpuart_is_32(sport)) { + /* disable Rx/Tx and interrupts */ + temp = lpuart32_read(&sport->port, UARTCTRL); + temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE); + lpuart32_write(&sport->port, temp, UARTCTRL); + } else { + /* disable Rx/Tx and interrupts */ + temp = readb(sport->port.membase + UARTCR2); + temp &= ~(UARTCR2_TE | UARTCR2_TIE | UARTCR2_TCIE); + writeb(temp, sport->port.membase + UARTCR2); + } + spin_unlock_irqrestore(&sport->port.lock, flags); - if (sport->lpuart_dma_rx_use) { - /* - * EDMA driver during suspend will forcefully release any - * non-idle DMA channels. If port wakeup is enabled or if port - * is console port or 'no_console_suspend' is set the Rx DMA - * cannot resume as expected, hence gracefully release the - * Rx DMA path before suspend and start Rx DMA path on resume. - */ - if (irq_wake) { + if (sport->lpuart_dma_rx_use) { + /* + * EDMA driver during suspend will forcefully release any + * non-idle DMA channels. If port wakeup is enabled or if port + * is console port or 'no_console_suspend' is set the Rx DMA + * cannot resume as expected, hence gracefully release the + * Rx DMA path before suspend and start Rx DMA path on resume. + */ del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); + + /* Disable Rx DMA to use UART port as wakeup source */ + spin_lock_irqsave(&sport->port.lock, flags); + if (lpuart_is_32(sport)) { + temp = lpuart32_read(&sport->port, UARTBAUD); + lpuart32_write(&sport->port, temp & ~UARTBAUD_RDMAE, + UARTBAUD); + } else { + writeb(readb(sport->port.membase + UARTCR5) & + ~UARTCR5_RDMAS, sport->port.membase + UARTCR5); + } + spin_unlock_irqrestore(&sport->port.lock, flags); } - /* Disable Rx DMA to use UART port as wakeup source */ - if (lpuart_is_32(sport)) { - temp = lpuart32_read(&sport->port, UARTBAUD); - lpuart32_write(&sport->port, temp & ~UARTBAUD_RDMAE, - UARTBAUD); - } else { - writeb(readb(sport->port.membase + UARTCR5) & - ~UARTCR5_RDMAS, sport->port.membase + UARTCR5); + if (sport->lpuart_dma_tx_use) { + spin_lock_irqsave(&sport->port.lock, flags); + if (lpuart_is_32(sport)) { + temp = lpuart32_read(&sport->port, UARTBAUD); + temp &= ~UARTBAUD_TDMAE; + lpuart32_write(&sport->port, temp, UARTBAUD); + } else { + temp = readb(sport->port.membase + UARTCR5); + temp &= ~UARTCR5_TDMAS; + writeb(temp, sport->port.membase + UARTCR5); + } + spin_unlock_irqrestore(&sport->port.lock, flags); + sport->dma_tx_in_progress = false; + dmaengine_terminate_all(sport->dma_tx_chan); } } - if (sport->lpuart_dma_tx_use) { - sport->dma_tx_in_progress = false; - dmaengine_terminate_all(sport->dma_tx_chan); - } - - if (sport->port.suspended && !irq_wake) - lpuart_disable_clks(sport); - return 0; } +static void lpuart_console_fixup(struct lpuart_port *sport) +{ + struct tty_port *port = &sport->port.state->port; + struct uart_port *uport = &sport->port; + struct ktermios termios; + + /* i.MX7ULP enter VLLS mode that lpuart module power off and registers + * all lost no matter the port is wakeup source. + * For console port, console baud rate setting lost and print messy + * log when enable the console port as wakeup source. To avoid the + * issue happen, user should not enable uart port as wakeup source + * in VLLS mode, or restore console setting here. + */ + if (is_imx7ulp_lpuart(sport) && lpuart_uport_is_active(sport) && + console_suspend_enabled && uart_console(&sport->port)) { + + mutex_lock(&port->mutex); + memset(&termios, 0, sizeof(struct ktermios)); + termios.c_cflag = uport->cons->cflag; + if (port->tty && termios.c_cflag == 0) + termios = port->tty->termios; + uport->ops->set_termios(uport, &termios, NULL); + mutex_unlock(&port->mutex); + } +} + static int __maybe_unused lpuart_resume(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); - bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq)); - if (sport->port.suspended && !irq_wake) - lpuart_enable_clks(sport); - - if (lpuart_is_32(sport)) - lpuart32_setup_watermark_enable(sport); - else - lpuart_setup_watermark_enable(sport); - - if (sport->lpuart_dma_rx_use) { - if (irq_wake) { - if (!lpuart_start_rx_dma(sport)) - rx_dma_timer_init(sport); - else - sport->lpuart_dma_rx_use = false; - } + if (lpuart_uport_is_active(sport)) { + if (lpuart_is_32(sport)) + lpuart32_hw_setup(sport); + else + lpuart_hw_setup(sport); } - lpuart_tx_dma_startup(sport); - - if (lpuart_is_32(sport)) - lpuart32_configure(sport); - + lpuart_console_fixup(sport); uart_resume_port(&lpuart_reg, &sport->port); return 0; } -static SIMPLE_DEV_PM_OPS(lpuart_pm_ops, lpuart_suspend, lpuart_resume); +static const struct dev_pm_ops lpuart_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq, + lpuart_resume_noirq) + SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume) +}; static struct platform_driver lpuart_driver = { .probe = lpuart_probe, From 43543e6f539b3e646348c253059f75e27d63c94d Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 10 Nov 2022 19:38:58 +0800 Subject: [PATCH 2009/4122] tty: serial: fsl_lpuart: Add runtime pm support Add runtime pm support to manage the lpuart clock. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20221110113859.8485-3-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 07c1524ef008..9e04728bcc0b 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -233,6 +234,7 @@ /* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */ #define DMA_RX_TIMEOUT (10) +#define UART_AUTOSUSPEND_TIMEOUT 3000 #define DRIVER_NAME "fsl-lpuart" #define DEV_NAME "ttyLP" @@ -793,6 +795,20 @@ static void lpuart32_start_tx(struct uart_port *port) } } +static void +lpuart_uart_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) +{ + switch (state) { + case UART_PM_STATE_OFF: + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); + break; + default: + pm_runtime_get_sync(port->dev); + break; + } +} + /* return TIOCSER_TEMT when transmitter is not busy */ static unsigned int lpuart_tx_empty(struct uart_port *port) { @@ -2229,6 +2245,7 @@ static const struct uart_ops lpuart_pops = { .startup = lpuart_startup, .shutdown = lpuart_shutdown, .set_termios = lpuart_set_termios, + .pm = lpuart_uart_pm, .type = lpuart_type, .request_port = lpuart_request_port, .release_port = lpuart_release_port, @@ -2253,6 +2270,7 @@ static const struct uart_ops lpuart32_pops = { .startup = lpuart32_startup, .shutdown = lpuart32_shutdown, .set_termios = lpuart32_set_termios, + .pm = lpuart_uart_pm, .type = lpuart_type, .request_port = lpuart_request_port, .release_port = lpuart_release_port, @@ -2733,6 +2751,11 @@ static int lpuart_probe(struct platform_device *pdev) handler = lpuart_int; } + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + ret = lpuart_global_reset(sport); if (ret) goto failed_reset; @@ -2757,6 +2780,9 @@ failed_irq_request: failed_attach_port: failed_get_rs485: failed_reset: + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); lpuart_disable_clks(sport); return ret; } @@ -2775,9 +2801,30 @@ static int lpuart_remove(struct platform_device *pdev) if (sport->dma_rx_chan) dma_release_channel(sport->dma_rx_chan); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); return 0; } +static int __maybe_unused lpuart_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpuart_port *sport = platform_get_drvdata(pdev); + + lpuart_disable_clks(sport); + + return 0; +}; + +static int __maybe_unused lpuart_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpuart_port *sport = platform_get_drvdata(pdev); + + return lpuart_enable_clks(sport); +}; + static void serial_lpuart_enable_wakeup(struct lpuart_port *sport, bool on) { unsigned int val, baud; @@ -2923,6 +2970,10 @@ static int __maybe_unused lpuart_suspend(struct device *dev) sport->dma_tx_in_progress = false; dmaengine_terminate_all(sport->dma_tx_chan); } + } else if (pm_runtime_active(sport->port.dev)) { + lpuart_disable_clks(sport); + pm_runtime_disable(sport->port.dev); + pm_runtime_set_suspended(sport->port.dev); } return 0; @@ -2957,12 +3008,19 @@ static void lpuart_console_fixup(struct lpuart_port *sport) static int __maybe_unused lpuart_resume(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); + int ret; if (lpuart_uport_is_active(sport)) { if (lpuart_is_32(sport)) lpuart32_hw_setup(sport); else lpuart_hw_setup(sport); + } else if (pm_runtime_active(sport->port.dev)) { + ret = lpuart_enable_clks(sport); + if (ret) + return ret; + pm_runtime_set_active(sport->port.dev); + pm_runtime_enable(sport->port.dev); } lpuart_console_fixup(sport); @@ -2972,6 +3030,8 @@ static int __maybe_unused lpuart_resume(struct device *dev) } static const struct dev_pm_ops lpuart_pm_ops = { + SET_RUNTIME_PM_OPS(lpuart_runtime_suspend, + lpuart_runtime_resume, NULL) SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq, lpuart_resume_noirq) SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume) From 22cf92bb3908e1bbc22b03371e9e67e7bd455e0f Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 10 Nov 2022 19:38:59 +0800 Subject: [PATCH 2010/4122] tty: serial: fsl_lpuart: Use pm_ptr() to avoid need to make pm __maybe_unused Use pm_ptr() to remove the need to mark the pm functions as __maybe_unused when the kernel is built without CONFIG_PM support. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20221110113859.8485-4-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 9e04728bcc0b..c1c8aa3e0fac 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2807,7 +2807,7 @@ static int lpuart_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused lpuart_runtime_suspend(struct device *dev) +static int lpuart_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct lpuart_port *sport = platform_get_drvdata(pdev); @@ -2817,7 +2817,7 @@ static int __maybe_unused lpuart_runtime_suspend(struct device *dev) return 0; }; -static int __maybe_unused lpuart_runtime_resume(struct device *dev) +static int lpuart_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct lpuart_port *sport = platform_get_drvdata(pdev); @@ -2876,7 +2876,7 @@ static bool lpuart_uport_is_active(struct lpuart_port *sport) return false; } -static int __maybe_unused lpuart_suspend_noirq(struct device *dev) +static int lpuart_suspend_noirq(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq)); @@ -2889,7 +2889,7 @@ static int __maybe_unused lpuart_suspend_noirq(struct device *dev) return 0; } -static int __maybe_unused lpuart_resume_noirq(struct device *dev) +static int lpuart_resume_noirq(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); unsigned int val; @@ -2909,7 +2909,7 @@ static int __maybe_unused lpuart_resume_noirq(struct device *dev) return 0; } -static int __maybe_unused lpuart_suspend(struct device *dev) +static int lpuart_suspend(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); unsigned long temp, flags; @@ -3005,7 +3005,7 @@ static void lpuart_console_fixup(struct lpuart_port *sport) } } -static int __maybe_unused lpuart_resume(struct device *dev) +static int lpuart_resume(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); int ret; @@ -3030,11 +3030,11 @@ static int __maybe_unused lpuart_resume(struct device *dev) } static const struct dev_pm_ops lpuart_pm_ops = { - SET_RUNTIME_PM_OPS(lpuart_runtime_suspend, + RUNTIME_PM_OPS(lpuart_runtime_suspend, lpuart_runtime_resume, NULL) - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq, + NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq, lpuart_resume_noirq) - SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume) + SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume) }; static struct platform_driver lpuart_driver = { @@ -3043,7 +3043,7 @@ static struct platform_driver lpuart_driver = { .driver = { .name = "fsl-lpuart", .of_match_table = lpuart_dt_ids, - .pm = &lpuart_pm_ops, + .pm = pm_ptr(&lpuart_pm_ops), }, }; From 8be3a7bf773700534a6e8f87f6ed2ed111254be5 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 22 Nov 2022 19:45:59 +0800 Subject: [PATCH 2011/4122] serial: pch: Fix PCI device refcount leak in pch_request_dma() As comment of pci_get_slot() says, it returns a pci_device with its refcount increased. The caller must decrement the reference count by calling pci_dev_put(). Since 'dma_dev' is only used to filter the channel in filter(), we can call pci_dev_put() before exiting from pch_request_dma(). Add the missing pci_dev_put() for the normal and error path. Fixes: 3c6a483275f4 ("Serial: EG20T: add PCH_UART driver") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221122114559.27692-1-wangxiongfeng2@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index c76719c0f453..3d54a43768cd 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -694,6 +694,7 @@ static void pch_request_dma(struct uart_port *port) if (!chan) { dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Tx)\n", __func__); + pci_dev_put(dma_dev); return; } priv->chan_tx = chan; @@ -710,6 +711,7 @@ static void pch_request_dma(struct uart_port *port) __func__); dma_release_channel(priv->chan_tx); priv->chan_tx = NULL; + pci_dev_put(dma_dev); return; } @@ -717,6 +719,8 @@ static void pch_request_dma(struct uart_port *port) priv->rx_buf_virt = dma_alloc_coherent(port->dev, port->fifosize, &priv->rx_buf_dma, GFP_KERNEL); priv->chan_rx = chan; + + pci_dev_put(dma_dev); } static void pch_dma_rx_complete(void *arg) From 05f5747414c6ecb8a7f9b0c1dc10bcffa6dfb5ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 Nov 2022 19:15:18 +0100 Subject: [PATCH 2012/4122] PCI/portdrv: Set PCIE_PORT_SERVICE_HP for Root and Downstream Ports only It is reported that on some systems pciehp binds to an Upstream Port and attempts to operate it which causes devices below the Port to disappear from the bus. This happens because acpiphp sets dev->is_hotplug_bridge for that Port (after receiving a Device Check notification on it from the platform firmware via ACPI) during the enumeration of PCI devices. get_port_device_capability() sees that dev->is_hotplug_bridge is set and adds PCIE_PORT_SERVICE_HP to Port services (which allows pciehp to bind to the Port in question) without consulting the PCIe type, which should be either Root Port or Downstream Port for the hotplug capability to be present. Per PCIe r6.0, sec 7.5.3.2, the Slot Implemented bit is only valid for Downstream Ports (including Root Ports), and PCIe hotplug depends on the Slot Capabilities / Control / Status registers. Make get_port_device_capability() more robust by adding a PCIe type check to it before adding PCIE_PORT_SERVICE_HP to Port services which helps to avoid the problem. [bhelgaas: add spec citation] Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/4786090.31r3eYUQgx@kreacher Reported-by: Rodrigo Vivi Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner --- drivers/pci/pcie/portdrv_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 1ac7fec47d6f..98f0126aaa90 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -209,6 +209,8 @@ static int get_port_device_capability(struct pci_dev *dev) int services = 0; if (dev->is_hotplug_bridge && + (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) && (pcie_ports_native || host->native_pcie_hotplug)) { services |= PCIE_PORT_SERVICE_HP; From c63a3be76df678b173c59f1d5dc19a21b2d1c753 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 Nov 2022 19:16:57 +0100 Subject: [PATCH 2013/4122] PCI: acpiphp: Avoid setting is_hotplug_bridge for PCIe Upstream Ports It is reported that on some systems pciehp binds to an Upstream Port and attempts to operate it which causes devices below the Port to disappear from the bus. This happens because acpiphp sets dev->is_hotplug_bridge for that Port (after receiving a Device Check notification on it from the platform firmware via ACPI) during the enumeration of PCI devices. get_port_device_capability() sees that dev->is_hotplug_bridge is set and adds PCIE_PORT_SERVICE_HP to Port services, which allows pciehp to bind to the Port in question. Even though this particular problem can be addressed by making the portdrv_core checks more robust, it also causes power management to work differently on the affected systems which generally is not desirable (PCIe Ports with dev->is_hotplug_bridge set have to pass additional tests to be allowed to go into the D3hot/cold power states which affects runtime PM of devices below these Ports). For this reason, amend check_hotplug_bridge() with a PCIe type check to prevent it from setting dev->is_hotplug_bridge for Upstream Ports. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/2262230.ElGaqSPkdT@kreacher Reported-by: Rodrigo Vivi Tested-by: Rodrigo Vivi Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Reviewed-by: Rodrigo Vivi Reviewed-by: Lukas Wunner --- drivers/pci/hotplug/acpiphp_glue.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6efa3d8db9a5..5b1f271c6034 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -411,6 +411,14 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev) if (dev->is_hotplug_bridge) return; + /* + * In the PCIe case, only Root Ports and Downstream Ports are capable of + * accommodating hotplug devices, so avoid marking Upstream Ports as + * "hotplug bridges". + */ + if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM) + return; + list_for_each_entry(func, &slot->funcs, sibling) { if (PCI_FUNC(dev->devfn) == func->function) { dev->is_hotplug_bridge = 1; From cb6562c380832a930ffd1722ac9d479b454aed4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 22 Nov 2022 15:37:39 -0400 Subject: [PATCH 2014/4122] RDMA/rxe: Do not NULL deref on debugging failure path Correct the mistake, mr is obviously NULL in this code path. Fixes: 2778b72b1df0 ("RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c") Link: https://lore.kernel.org/r/Y3eeJW0AdyJYhYyQ@kili Reported-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index b1423000e4bc..b7c9ff1ddf0e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -519,7 +519,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - rxe_dbg_mr(mr, "No MR for key %#x\n", key); + rxe_dbg_qp(qp, "No MR for key %#x\n", key); ret = -EINVAL; goto err; } From f67376d801499f4fa0838c18c1efcad8840e550d Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 22 Nov 2022 23:14:37 +0800 Subject: [PATCH 2015/4122] RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup() when socket create failed There is a null-ptr-deref when mount.cifs over rdma: BUG: KASAN: null-ptr-deref in rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe] Read of size 8 at addr 0000000000000018 by task mount.cifs/3046 CPU: 2 PID: 3046 Comm: mount.cifs Not tainted 6.1.0-rc5+ #62 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc3 Call Trace: dump_stack_lvl+0x34/0x44 kasan_report+0xad/0x130 rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe] execute_in_process_context+0x25/0x90 __rxe_cleanup+0x101/0x1d0 [rdma_rxe] rxe_create_qp+0x16a/0x180 [rdma_rxe] create_qp.part.0+0x27d/0x340 ib_create_qp_kernel+0x73/0x160 rdma_create_qp+0x100/0x230 _smbd_get_connection+0x752/0x20f0 smbd_get_connection+0x21/0x40 cifs_get_tcp_session+0x8ef/0xda0 mount_get_conns+0x60/0x750 cifs_mount+0x103/0xd00 cifs_smb3_do_mount+0x1dd/0xcb0 smb3_get_tree+0x1d5/0x300 vfs_get_tree+0x41/0xf0 path_mount+0x9b3/0xdd0 __x64_sys_mount+0x190/0x1d0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The root cause of the issue is the socket create failed in rxe_qp_init_req(). So move the reset rxe_qp_do_cleanup() after the NULL ptr check. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20221122151437.1057671-1-zhangxiaoxu5@huawei.com Signed-off-by: Zhang Xiaoxu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 46f6c74ce00e..ab72db68b58f 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -817,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) if (qp->resp.mr) rxe_put(qp->resp.mr); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_reset(qp->sk->sk); - free_rd_atomic_resources(qp); if (qp->sk) { + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + kernel_sock_shutdown(qp->sk, SHUT_RDWR); sock_release(qp->sk); } From 9676f40618df9f8e1ab681486021d6c0df86c5fa Mon Sep 17 00:00:00 2001 From: Ian Cowan Date: Sat, 12 Nov 2022 09:28:57 -0500 Subject: [PATCH 2016/4122] PCI: shpchp: Remove unused get_mode1_ECC_cap callback The ->get_mode1_ECC_cap callback in the shpchp_hpc_ops struct is never called, so remove it. [bhelgaas: squash] Link: https://lore.kernel.org/r/20221112142859.319733-2-ian@linux.cowan.aero Link: https://lore.kernel.org/r/20221112142859.319733-3-ian@linux.cowan.aero Link: https://lore.kernel.org/r/20221112142859.319733-4-ian@linux.cowan.aero Signed-off-by: Ian Cowan Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/TODO | 3 --- drivers/pci/hotplug/shpchp.h | 1 - drivers/pci/hotplug/shpchp_hpc.c | 18 ------------------ 3 files changed, 22 deletions(-) diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO index 88f217c82b4f..fdb8dd6ea24d 100644 --- a/drivers/pci/hotplug/TODO +++ b/drivers/pci/hotplug/TODO @@ -58,9 +58,6 @@ shpchp: pciehp with commit 82a9e79ef132 ("PCI: pciehp: remove hpc_ops"). Clarify if there was a specific reason not to apply the same change to shpchp. -* The ->get_mode1_ECC_cap callback in shpchp_hpc_ops is never invoked. - Why was it introduced? Can it be removed? - * The hardirq handler shpc_isr() queues events on a workqueue. It can be simplified by converting it to threaded IRQ handling. Use pciehp as a template. diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 6e85885b554c..3a97f455336e 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -311,7 +311,6 @@ struct hpc_ops { int (*get_latch_status)(struct slot *slot, u8 *status); int (*get_adapter_status)(struct slot *slot, u8 *status); int (*get_adapter_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_mode1_ECC_cap)(struct slot *slot, u8 *mode); int (*get_prog_int)(struct slot *slot, u8 *prog_int); int (*query_power_fault)(struct slot *slot); void (*green_led_on)(struct slot *slot); diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index bd7557ca4910..48e4daefc44a 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -489,23 +489,6 @@ static int hpc_get_adapter_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_mode1_ECC_cap(struct slot *slot, u8 *mode) -{ - int retval = 0; - struct controller *ctrl = slot->ctrl; - u16 sec_bus_status = shpc_readw(ctrl, SEC_BUS_CONFIG); - u8 pi = shpc_readb(ctrl, PROG_INTERFACE); - - if (pi == 2) { - *mode = (sec_bus_status & 0x0100) >> 8; - } else { - retval = -1; - } - - ctrl_dbg(ctrl, "Mode 1 ECC cap = %d\n", *mode); - return retval; -} - static int hpc_query_power_fault(struct slot *slot) { struct controller *ctrl = slot->ctrl; @@ -900,7 +883,6 @@ static const struct hpc_ops shpchp_hpc_ops = { .get_adapter_status = hpc_get_adapter_status, .get_adapter_speed = hpc_get_adapter_speed, - .get_mode1_ECC_cap = hpc_get_mode1_ECC_cap, .get_prog_int = hpc_get_prog_int, .query_power_fault = hpc_query_power_fault, From 198dd8aedee6a7d2de0dfa739f9a008a938f6848 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 23 Nov 2022 12:40:11 +1100 Subject: [PATCH 2017/4122] xfs: punching delalloc extents on write failure is racy xfs_buffered_write_iomap_end() has a comment about the safety of punching delalloc extents based holding the IOLOCK_EXCL. This comment is wrong, and punching delalloc extents is not race free. When we punch out a delalloc extent after a write failure in xfs_buffered_write_iomap_end(), we punch out the page cache with truncate_pagecache_range() before we punch out the delalloc extents. At this point, we only hold the IOLOCK_EXCL, so there is nothing stopping mmap() write faults racing with this cleanup operation, reinstantiating a folio over the range we are about to punch and hence requiring the delalloc extent to be kept. If this race condition is hit, we can end up with a dirty page in the page cache that has no delalloc extent or space reservation backing it. This leads to bad things happening at writeback time. To avoid this race condition, we need the page cache truncation to be atomic w.r.t. the extent manipulation. We can do this by holding the mapping->invalidate_lock exclusively across this operation - this will prevent new pages from being inserted into the page cache whilst we are removing the pages and the backing extent and space reservation. Taking the mapping->invalidate_lock exclusively in the buffered write IO path is safe - it naturally nests inside the IOLOCK (see truncate and fallocate paths). iomap_zero_range() can be called from under the mapping->invalidate_lock (from the truncate path via either xfs_zero_eof() or xfs_truncate_page(), but iomap_zero_iter() will not instantiate new delalloc pages (because it skips holes) and hence will not ever need to punch out delalloc extents on failure. Fix the locking issue, and clean up the code logic a little to avoid unnecessary work if we didn't allocate the delalloc extent or wrote the entire region we allocated. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5cea069a38b4..a2e45ea1b0cb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1147,6 +1147,10 @@ xfs_buffered_write_iomap_end( written = 0; } + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + /* * start_fsb refers to the first unused block after a short write. If * nothing was written, round offset down to point at the first block in @@ -1158,27 +1162,28 @@ xfs_buffered_write_iomap_end( start_fsb = XFS_B_TO_FSB(mp, offset + written); end_fsb = XFS_B_TO_FSB(mp, offset + length); + /* Nothing to do if we've written the entire delalloc extent */ + if (start_fsb >= end_fsb) + return 0; + /* - * Trim delalloc blocks if they were allocated by this write and we - * didn't manage to write the whole range. - * - * We don't need to care about racing delalloc as we hold i_mutex - * across the reserve/allocate/unreserve calls. If there are delalloc - * blocks in the range, they are ours. + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite between the page cache + * truncation and the delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. */ - if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); + filemap_invalidate_lock(inode->i_mapping); + truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), + XFS_FSB_TO_B(mp, end_fsb) - 1); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); - return error; - } + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, + end_fsb - start_fsb); + filemap_invalidate_unlock(inode->i_mapping); + if (error && !xfs_is_shutdown(mp)) { + xfs_alert(mp, "%s: unable to clean up ino %lld", + __func__, ip->i_ino); + return error; } - return 0; } From b71f889c18ada210a97aa3eb5e00c0de552234c6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 23 Nov 2022 12:40:12 +1100 Subject: [PATCH 2018/4122] xfs: use byte ranges for write cleanup ranges xfs_buffered_write_iomap_end() currently converts the byte ranges passed to it to filesystem blocks to pass them to the bmap code to punch out delalloc blocks, but then has to convert filesytem blocks back to byte ranges for page cache truncate. We're about to make the page cache truncate go away and replace it with a page cache walk, so having to convert everything to/from/to filesystem blocks is messy and error-prone. It is much easier to pass around byte ranges and convert to page indexes and/or filesystem blocks only where those units are needed. In preparation for the page cache walk being added, add a helper that converts byte ranges to filesystem blocks and calls xfs_bmap_punch_delalloc_range() and convert xfs_buffered_write_iomap_end() to calculate limits in byte ranges. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a2e45ea1b0cb..7bb55dbc19d3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1120,6 +1120,20 @@ out_unlock: return error; } +static int +xfs_buffered_write_delalloc_punch( + struct inode *inode, + loff_t start_byte, + loff_t end_byte) +{ + struct xfs_mount *mp = XFS_M(inode->i_sb); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + + return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, + end_fsb - start_fsb); +} + static int xfs_buffered_write_iomap_end( struct inode *inode, @@ -1129,10 +1143,9 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; + struct xfs_mount *mp = XFS_M(inode->i_sb); + loff_t start_byte; + loff_t end_byte; int error = 0; if (iomap->type != IOMAP_DELALLOC) @@ -1157,13 +1170,13 @@ xfs_buffered_write_iomap_end( * the range. */ if (unlikely(!written)) - start_fsb = XFS_B_TO_FSBT(mp, offset); + start_byte = round_down(offset, mp->m_sb.sb_blocksize); else - start_fsb = XFS_B_TO_FSB(mp, offset + written); - end_fsb = XFS_B_TO_FSB(mp, offset + length); + start_byte = round_up(offset + written, mp->m_sb.sb_blocksize); + end_byte = round_up(offset + length, mp->m_sb.sb_blocksize); /* Nothing to do if we've written the entire delalloc extent */ - if (start_fsb >= end_fsb) + if (start_byte >= end_byte) return 0; /* @@ -1173,15 +1186,12 @@ xfs_buffered_write_iomap_end( * leave dirty pages with no space reservation in the cache. */ filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); + truncate_pagecache_range(inode, start_byte, end_byte - 1); + error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte); filemap_invalidate_unlock(inode->i_mapping); if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); + xfs_alert(mp, "%s: unable to clean up ino 0x%llx", + __func__, XFS_I(inode)->i_ino); return error; } return 0; From 9c7babf94a0d686b552e53aded8d4703d1b8b92b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 23 Nov 2022 12:44:38 +1100 Subject: [PATCH 2019/4122] xfs,iomap: move delalloc punching to iomap Because that's what Christoph wants for this error handling path only XFS uses. It requires a new iomap export for handling errors over delalloc ranges. This is basically the XFS code as is stands, but even though Christoph wants this as iomap funcitonality, we still have to call it from the filesystem specific ->iomap_end callback, and call into the iomap code with yet another filesystem specific callback to punch the delalloc extent within the defined ranges. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 60 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_iomap.c | 47 ++++++--------------------------- include/linux/iomap.h | 4 +++ 3 files changed, 72 insertions(+), 39 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 91ee0b308e13..734b761a1e4a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * When a short write occurs, the filesystem may need to remove reserved space + * that was allocated in ->iomap_begin from it's ->iomap_end method. For + * filesystems that use delayed allocation, we need to punch out delalloc + * extents from the range that are not dirty in the page cache. As the write can + * race with page faults, there can be dirty pages over the delalloc extent + * outside the range of a short write but still within the delalloc extent + * allocated for this iomap. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations, but converts them back to {offset,len} tuples for + * the punch callback. + */ +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, + ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t start_byte; + loff_t end_byte; + int blocksize = i_blocksize(inode); + int error = 0; + + if (iomap->type != IOMAP_DELALLOC) + return 0; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + + /* + * start_byte refers to the first unused block after a short write. If + * nothing was written, round offset down to point at the first block in + * the range. + */ + if (unlikely(!written)) + start_byte = round_down(pos, blocksize); + else + start_byte = round_up(pos + written, blocksize); + end_byte = round_up(pos + length, blocksize); + + /* Nothing to do if we've written the entire delalloc extent */ + if (start_byte >= end_byte) + return 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite between the page cache + * truncation and the delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + truncate_pagecache_range(inode, start_byte, end_byte - 1); + error = punch(inode, start_byte, end_byte - start_byte); + filemap_invalidate_unlock(inode->i_mapping); + + return error; +} +EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); + static loff_t iomap_unshare_iter(struct iomap_iter *iter) { struct iomap *iomap = &iter->iomap; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7bb55dbc19d3..ea96e8a34868 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1123,12 +1123,12 @@ out_unlock: static int xfs_buffered_write_delalloc_punch( struct inode *inode, - loff_t start_byte, - loff_t end_byte) + loff_t offset, + loff_t length) { struct xfs_mount *mp = XFS_M(inode->i_sb); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, end_fsb - start_fsb); @@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_mount *mp = XFS_M(inode->i_sb); - loff_t start_byte; - loff_t end_byte; - int error = 0; - if (iomap->type != IOMAP_DELALLOC) - return 0; + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; /* * Behave as if the write failed if drop writes is enabled. Set the NEW @@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end( written = 0; } - /* If we didn't reserve the blocks, we're not allowed to punch them. */ - if (!(iomap->flags & IOMAP_F_NEW)) - return 0; - - /* - * start_fsb refers to the first unused block after a short write. If - * nothing was written, round offset down to point at the first block in - * the range. - */ - if (unlikely(!written)) - start_byte = round_down(offset, mp->m_sb.sb_blocksize); - else - start_byte = round_up(offset + written, mp->m_sb.sb_blocksize); - end_byte = round_up(offset + length, mp->m_sb.sb_blocksize); - - /* Nothing to do if we've written the entire delalloc extent */ - if (start_byte >= end_byte) - return 0; - - /* - * Lock the mapping to avoid races with page faults re-instantiating - * folios and dirtying them via ->page_mkwrite between the page cache - * truncation and the delalloc extent removal. Failing to do this can - * leave dirty pages with no space reservation in the cache. - */ - filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(inode, start_byte, end_byte - 1); - error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte); - filemap_invalidate_unlock(inode->i_mapping); + error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); if (error && !xfs_is_shutdown(mp)) { xfs_alert(mp, "%s: unable to clean up ino 0x%llx", __func__, XFS_I(inode)->i_ino); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 238a03087e17..0698c4b8ce0e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)); + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); From 9124a26401483bf2b13a99cb4317dce3f677060f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 29 Sep 2022 01:58:59 -0700 Subject: [PATCH 2020/4122] kunit/fortify: Validate __alloc_size attribute results Validate the effect of the __alloc_size attribute on allocators. If the compiler doesn't support __builtin_dynamic_object_size(), skip the associated tests. (For GCC, just remove the "--make_options" line below...) $ ./tools/testing/kunit/kunit.py run --arch x86_64 \ --kconfig_add CONFIG_FORTIFY_SOURCE=y \ --make_options LLVM=1 fortify ... [15:16:30] ================== fortify (10 subtests) =================== [15:16:30] [PASSED] known_sizes_test [15:16:30] [PASSED] control_flow_split_test [15:16:30] [PASSED] alloc_size_kmalloc_const_test [15:16:30] [PASSED] alloc_size_kmalloc_dynamic_test [15:16:30] [PASSED] alloc_size_vmalloc_const_test [15:16:30] [PASSED] alloc_size_vmalloc_dynamic_test [15:16:30] [PASSED] alloc_size_kvmalloc_const_test [15:16:30] [PASSED] alloc_size_kvmalloc_dynamic_test [15:16:30] [PASSED] alloc_size_devm_kmalloc_const_test [15:16:30] [PASSED] alloc_size_devm_kmalloc_dynamic_test [15:16:30] ===================== [PASSED] fortify ===================== [15:16:30] ============================================================ [15:16:30] Testing complete. Ran 10 tests: passed: 10 [15:16:31] Elapsed time: 8.348s total, 0.002s configuring, 6.923s building, 1.075s running For earlier GCC prior to version 12, the dynamic tests will be skipped: [15:18:59] ================== fortify (10 subtests) =================== [15:18:59] [PASSED] known_sizes_test [15:18:59] [PASSED] control_flow_split_test [15:18:59] [PASSED] alloc_size_kmalloc_const_test [15:18:59] [SKIPPED] alloc_size_kmalloc_dynamic_test [15:18:59] [PASSED] alloc_size_vmalloc_const_test [15:18:59] [SKIPPED] alloc_size_vmalloc_dynamic_test [15:18:59] [PASSED] alloc_size_kvmalloc_const_test [15:18:59] [SKIPPED] alloc_size_kvmalloc_dynamic_test [15:18:59] [PASSED] alloc_size_devm_kmalloc_const_test [15:18:59] [SKIPPED] alloc_size_devm_kmalloc_dynamic_test [15:18:59] ===================== [PASSED] fortify ===================== [15:18:59] ============================================================ [15:18:59] Testing complete. Ran 10 tests: passed: 6, skipped: 4 [15:18:59] Elapsed time: 11.965s total, 0.002s configuring, 10.540s building, 1.068s running Cc: David Gow Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- lib/Makefile | 1 + lib/fortify_kunit.c | 255 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) diff --git a/lib/Makefile b/lib/Makefile index 322178b9f7fb..2f0454b931dc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -378,6 +378,7 @@ CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-o obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o +CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index 409af07f340a..c8c33cbaae9e 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -16,7 +16,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include +#include #include +#include static const char array_of_10[] = "this is 10"; static const char *ptr_of_11 = "this is 11!"; @@ -60,9 +63,261 @@ static void control_flow_split_test(struct kunit *test) KUNIT_EXPECT_EQ(test, want_minus_one(pick), SIZE_MAX); } +#define KUNIT_EXPECT_BOS(test, p, expected, name) \ + KUNIT_EXPECT_EQ_MSG(test, __builtin_object_size(p, 1), \ + expected, \ + "__alloc_size() not working with __bos on " name "\n") + +#if !__has_builtin(__builtin_dynamic_object_size) +#define KUNIT_EXPECT_BDOS(test, p, expected, name) \ + /* Silence "unused variable 'expected'" warning. */ \ + KUNIT_EXPECT_EQ(test, expected, expected) +#else +#define KUNIT_EXPECT_BDOS(test, p, expected, name) \ + KUNIT_EXPECT_EQ_MSG(test, __builtin_dynamic_object_size(p, 1), \ + expected, \ + "__alloc_size() not working with __bdos on " name "\n") +#endif + +/* If the execpted size is a constant value, __bos can see it. */ +#define check_const(_expected, alloc, free) do { \ + size_t expected = (_expected); \ + void *p = alloc; \ + KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n"); \ + KUNIT_EXPECT_BOS(test, p, expected, #alloc); \ + KUNIT_EXPECT_BDOS(test, p, expected, #alloc); \ + free; \ +} while (0) + +/* If the execpted size is NOT a constant value, __bos CANNOT see it. */ +#define check_dynamic(_expected, alloc, free) do { \ + size_t expected = (_expected); \ + void *p = alloc; \ + KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n"); \ + KUNIT_EXPECT_BOS(test, p, SIZE_MAX, #alloc); \ + KUNIT_EXPECT_BDOS(test, p, expected, #alloc); \ + free; \ +} while (0) + +/* Assortment of constant-value kinda-edge cases. */ +#define CONST_TEST_BODY(TEST_alloc) do { \ + /* Special-case vmalloc()-family to skip 0-sized allocs. */ \ + if (strcmp(#TEST_alloc, "TEST_vmalloc") != 0) \ + TEST_alloc(check_const, 0, 0); \ + TEST_alloc(check_const, 1, 1); \ + TEST_alloc(check_const, 128, 128); \ + TEST_alloc(check_const, 1023, 1023); \ + TEST_alloc(check_const, 1025, 1025); \ + TEST_alloc(check_const, 4096, 4096); \ + TEST_alloc(check_const, 4097, 4097); \ +} while (0) + +static volatile size_t zero_size; +static volatile size_t unknown_size = 50; + +#if !__has_builtin(__builtin_dynamic_object_size) +#define DYNAMIC_TEST_BODY(TEST_alloc) \ + kunit_skip(test, "Compiler is missing __builtin_dynamic_object_size() support\n") +#else +#define DYNAMIC_TEST_BODY(TEST_alloc) do { \ + size_t size = unknown_size; \ + \ + /* \ + * Expected size is "size" in each test, before it is then \ + * internally incremented in each test. Requires we disable \ + * -Wunsequenced. \ + */ \ + TEST_alloc(check_dynamic, size, size++); \ + /* Make sure incrementing actually happened. */ \ + KUNIT_EXPECT_NE(test, size, unknown_size); \ +} while (0) +#endif + +#define DEFINE_ALLOC_SIZE_TEST_PAIR(allocator) \ +static void alloc_size_##allocator##_const_test(struct kunit *test) \ +{ \ + CONST_TEST_BODY(TEST_##allocator); \ +} \ +static void alloc_size_##allocator##_dynamic_test(struct kunit *test) \ +{ \ + DYNAMIC_TEST_BODY(TEST_##allocator); \ +} + +#define TEST_kmalloc(checker, expected_size, alloc_size) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + void *orig; \ + size_t len; \ + \ + checker(expected_size, kmalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kzalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kzalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kcalloc(1, alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, kcalloc(alloc_size, 1, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kcalloc_node(1, alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, \ + kcalloc_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kmalloc_array(1, alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, kmalloc_array(alloc_size, 1, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_array_node(1, alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, __kmalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + __kmalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc(orig, (alloc_size) * 2, gfp), \ + kfree(p)); \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc_array(orig, 1, (alloc_size) * 2, gfp), \ + kfree(p)); \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc_array(orig, (alloc_size) * 2, 1, gfp), \ + kfree(p)); \ + \ + len = 11; \ + /* Using memdup() with fixed size, so force unknown length. */ \ + if (!__builtin_constant_p(expected_size)) \ + len += zero_size; \ + checker(len, kmemdup("hello there", len, gfp), kfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(kmalloc) + +/* Sizes are in pages, not bytes. */ +#define TEST_vmalloc(checker, expected_pages, alloc_pages) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + checker((expected_pages) * PAGE_SIZE, \ + vmalloc((alloc_pages) * PAGE_SIZE), vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + vzalloc((alloc_pages) * PAGE_SIZE), vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + __vmalloc((alloc_pages) * PAGE_SIZE, gfp), vfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(vmalloc) + +/* Sizes are in pages (and open-coded for side-effects), not bytes. */ +#define TEST_kvmalloc(checker, expected_pages, alloc_pages) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + size_t prev_size; \ + void *orig; \ + \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc((alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvzalloc((alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvzalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvcalloc(1, (alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvcalloc((alloc_pages) * PAGE_SIZE, 1, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_array(1, (alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_array((alloc_pages) * PAGE_SIZE, 1, gfp), \ + vfree(p)); \ + \ + prev_size = (expected_pages) * PAGE_SIZE; \ + orig = kvmalloc(prev_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker(((expected_pages) * PAGE_SIZE) * 2, \ + kvrealloc(orig, prev_size, \ + ((alloc_pages) * PAGE_SIZE) * 2, gfp), \ + kvfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc) + +#define TEST_devm_kmalloc(checker, expected_size, alloc_size) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + const char dev_name[] = "fortify-test"; \ + struct device *dev; \ + void *orig; \ + size_t len; \ + \ + /* Create dummy device for devm_kmalloc()-family tests. */ \ + dev = root_device_register(dev_name); \ + KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), \ + "Cannot register test device\n"); \ + \ + checker(expected_size, devm_kmalloc(dev, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, devm_kzalloc(dev, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kmalloc_array(dev, 1, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kmalloc_array(dev, alloc_size, 1, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kcalloc(dev, 1, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kcalloc(dev, alloc_size, 1, gfp), \ + devm_kfree(dev, p)); \ + \ + orig = devm_kmalloc(dev, alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + devm_krealloc(dev, orig, (alloc_size) * 2, gfp), \ + devm_kfree(dev, p)); \ + \ + len = 4; \ + /* Using memdup() with fixed size, so force unknown length. */ \ + if (!__builtin_constant_p(expected_size)) \ + len += zero_size; \ + checker(len, devm_kmemdup(dev, "Ohai", len, gfp), \ + devm_kfree(dev, p)); \ + \ + device_unregister(dev); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(devm_kmalloc) + static struct kunit_case fortify_test_cases[] = { KUNIT_CASE(known_sizes_test), KUNIT_CASE(control_flow_split_test), + KUNIT_CASE(alloc_size_kmalloc_const_test), + KUNIT_CASE(alloc_size_kmalloc_dynamic_test), + KUNIT_CASE(alloc_size_vmalloc_const_test), + KUNIT_CASE(alloc_size_vmalloc_dynamic_test), + KUNIT_CASE(alloc_size_kvmalloc_const_test), + KUNIT_CASE(alloc_size_kvmalloc_dynamic_test), + KUNIT_CASE(alloc_size_devm_kmalloc_const_test), + KUNIT_CASE(alloc_size_devm_kmalloc_dynamic_test), {} }; From 1307c5d33cce8a41dd77c2571e4df65a5b627feb Mon Sep 17 00:00:00 2001 From: Gabriel Somlo Date: Tue, 22 Nov 2022 15:04:26 -0500 Subject: [PATCH 2021/4122] serial: altera_uart: fix locking in polling mode Since altera_uart_interrupt() may also be called from a poll timer in "serving_softirq" context, use spin_[lock_irqsave|unlock_irqrestore] variants, which are appropriate for both softirq and hardware interrupt contexts. Fixes: 2f8b9c15cd88 ("altera_uart: Add support for polling mode (IRQ-less)") Signed-off-by: Gabriel Somlo Link: https://lore.kernel.org/r/20221122200426.888349-1-gsomlo@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_uart.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c index 316074bb23e9..9ce3d24af536 100644 --- a/drivers/tty/serial/altera_uart.c +++ b/drivers/tty/serial/altera_uart.c @@ -259,16 +259,17 @@ static irqreturn_t altera_uart_interrupt(int irq, void *data) { struct uart_port *port = data; struct altera_uart *pp = container_of(port, struct altera_uart, port); + unsigned long flags; unsigned int isr; isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); if (isr & ALTERA_UART_STATUS_RRDY_MSK) altera_uart_rx_chars(port); if (isr & ALTERA_UART_STATUS_TRDY_MSK) altera_uart_tx_chars(port); - spin_unlock(&port->lock); + spin_unlock_irqrestore(&port->lock, flags); return IRQ_RETVAL(isr); } From 1a6ec673fb627c26e2267ca0a03849f91dbd9b40 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 23 Nov 2022 06:12:12 +0000 Subject: [PATCH 2022/4122] serial: sunsab: Fix error handling in sunsab_init() The sunsab_init() returns the platform_driver_register() directly without checking its return value, if platform_driver_register() failed, the allocated sunsab_ports is leaked. Fix by free sunsab_ports and set it to NULL when platform_driver_register() failed. Fixes: c4d37215a824 ("[SERIAL] sunsab: Convert to of_driver framework.") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221123061212.52593-1-yuancan@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunsab.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c index 94db67f21abf..48b39fdb0397 100644 --- a/drivers/tty/serial/sunsab.c +++ b/drivers/tty/serial/sunsab.c @@ -1131,7 +1131,13 @@ static int __init sunsab_init(void) } } - return platform_driver_register(&sab_driver); + err = platform_driver_register(&sab_driver); + if (err) { + kfree(sunsab_ports); + sunsab_ports = NULL; + } + + return err; } static void __exit sunsab_exit(void) From 8682ab0eea89c300ebb120c02ead3999ca5560a8 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 23 Nov 2022 10:36:19 +0800 Subject: [PATCH 2023/4122] tty: serial: fsl_lpuart: switch to new dmaengine_terminate_* API Convert dmaengine_terminate_all() calls to synchronous and asynchronous versions where appropriate. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20221123023619.30173-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c1c8aa3e0fac..5e69fb73f570 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -583,7 +583,7 @@ static void lpuart_flush_buffer(struct uart_port *port) sport->dma_tx_nents, DMA_TO_DEVICE); sport->dma_tx_in_progress = false; } - dmaengine_terminate_all(chan); + dmaengine_terminate_async(chan); } if (lpuart_is_32(sport)) { @@ -1327,7 +1327,7 @@ static void lpuart_dma_rx_free(struct uart_port *port) struct lpuart_port, port); struct dma_chan *chan = sport->dma_rx_chan; - dmaengine_terminate_all(chan); + dmaengine_terminate_sync(chan); dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE); kfree(sport->rx_ring.buf); sport->rx_ring.tail = 0; @@ -1757,7 +1757,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) if (wait_event_interruptible_timeout(sport->dma_wait, !sport->dma_tx_in_progress, msecs_to_jiffies(300)) <= 0) { sport->dma_tx_in_progress = false; - dmaengine_terminate_all(sport->dma_tx_chan); + dmaengine_terminate_sync(sport->dma_tx_chan); } sport->lpuart_dma_tx_use = false; } @@ -2968,7 +2968,7 @@ static int lpuart_suspend(struct device *dev) } spin_unlock_irqrestore(&sport->port.lock, flags); sport->dma_tx_in_progress = false; - dmaengine_terminate_all(sport->dma_tx_chan); + dmaengine_terminate_sync(sport->dma_tx_chan); } } else if (pm_runtime_active(sport->port.dev)) { lpuart_disable_clks(sport); From 94ec165c9f98189ce9aa50cfcb7181ba23f92eb7 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 23 Nov 2022 09:27:35 +0100 Subject: [PATCH 2024/4122] serial: atmel: cleanup atmel_start+stop_tx() Define local variables holding information about whether pdc or dma is used in the HW. These are retested several times by calls to atmel_use_pdc_tx() and atmel_use_dma_tx(). So to make the code more readable, simply cache the values. This is also a preparatory patch for the next one (where is_pdc is used once more in atmel_stop_tx()). Cc: Richard Genoud Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Cc: linux-arm-kernel@lists.infradead.org Reported-by: Michael Walle Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221123082736.24566-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 4ca04676c406..65f63dccfd72 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -552,8 +552,9 @@ static u_int atmel_get_mctrl(struct uart_port *port) static void atmel_stop_tx(struct uart_port *port) { struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); + bool is_pdc = atmel_use_pdc_tx(port); - if (atmel_use_pdc_tx(port)) { + if (is_pdc) { /* disable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); } @@ -572,7 +573,6 @@ static void atmel_stop_tx(struct uart_port *port) if (atmel_uart_is_half_duplex(port)) if (!atomic_read(&atmel_port->tasklet_shutdown)) atmel_start_rx(port); - } /* @@ -581,20 +581,22 @@ static void atmel_stop_tx(struct uart_port *port) static void atmel_start_tx(struct uart_port *port) { struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); + bool is_pdc = atmel_use_pdc_tx(port); + bool is_dma = is_pdc || atmel_use_dma_tx(port); - if (atmel_use_pdc_tx(port) && (atmel_uart_readl(port, ATMEL_PDC_PTSR) + if (is_pdc && (atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN)) /* The transmitter is already running. Yes, we really need this.*/ return; - if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port)) - if (atmel_uart_is_half_duplex(port)) - atmel_stop_rx(port); + if (is_dma && atmel_uart_is_half_duplex(port)) + atmel_stop_rx(port); - if (atmel_use_pdc_tx(port)) + if (is_pdc) { /* re-enable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN); + } /* Enable interrupts */ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); From 6373ab4dfee731deec62b4452ea641611feff9b3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 23 Nov 2022 09:27:36 +0100 Subject: [PATCH 2025/4122] serial: atmel: don't stop the transmitter when doing PIO Writing ATMEL_US_TXDIS to ATMEL_US_CR makes the transmitter NOT to send the just queued character. This means when the character is last and uart calls ops->stop_tx(), the character is not sent at all. The usart datasheet is not much specific on this, it just says the transmitter is stopped. But apparently, the character is dropped. So we should stop the transmitter only for DMA and PDC transfers to not send any more characters. For PIO, this is unexpected and deviates from other drivers. In particular, the below referenced commit broke TX as it added a call to ->stop_tx() after the very last character written to the transmitter. So fix this by limiting the write of ATMEL_US_TXDIS to DMA transfers only. Even there, I don't know if it is correctly implemented. Are all the queued characters sent once ->start_tx() is called? Anyone tested flow control -- be it hard (RTSCTS) or the soft (XOFF/XON) one? Fixes: 2d141e683e9a ("tty: serial: use uart_port_tx() helper") Cc: Richard Genoud Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Cc: linux-arm-kernel@lists.infradead.org Reported-by: Michael Walle Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221123082736.24566-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 65f63dccfd72..f1c06e12efa0 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -553,19 +553,22 @@ static void atmel_stop_tx(struct uart_port *port) { struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); bool is_pdc = atmel_use_pdc_tx(port); + bool is_dma = is_pdc || atmel_use_dma_tx(port); if (is_pdc) { /* disable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); } - /* - * Disable the transmitter. - * This is mandatory when DMA is used, otherwise the DMA buffer - * is fully transmitted. - */ - atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS); - atmel_port->tx_stopped = true; + if (is_dma) { + /* + * Disable the transmitter. + * This is mandatory when DMA is used, otherwise the DMA buffer + * is fully transmitted. + */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS); + atmel_port->tx_stopped = true; + } /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); @@ -601,9 +604,11 @@ static void atmel_start_tx(struct uart_port *port) /* Enable interrupts */ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); - /* re-enable the transmitter */ - atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); - atmel_port->tx_stopped = false; + if (is_dma) { + /* re-enable the transmitter */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); + atmel_port->tx_stopped = false; + } } /* From fb491d5500a7ca551e49bc32d9b19d226023f68d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:27 +0100 Subject: [PATCH 2026/4122] KVM: s390: pv: asynchronous destroy for reboot Until now, destroying a protected guest was an entirely synchronous operation that could potentially take a very long time, depending on the size of the guest, due to the time needed to clean up the address space from protected pages. This patch implements an asynchronous destroy mechanism, that allows a protected guest to reboot significantly faster than previously. This is achieved by clearing the pages of the old guest in background. In case of reboot, the new guest will be able to run in the same address space almost immediately. The old protected guest is then only destroyed when all of its memory has been destroyed or otherwise made non protected. Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl: KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for later asynchronous teardown. The current KVM VM will then continue immediately as non-protected. If a protected VM had already been set aside for asynchronous teardown, but without starting the teardown process, this call will fail. There can be at most one VM set aside at any time. Once it is set aside, the protected VM only exists in the context of the Ultravisor, it is not associated with the KVM VM anymore. Its protected CPUs have already been destroyed, but not its memory. This command can be issued again immediately after starting KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion. KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace from a separate thread. If a fatal signal is received (or if the process terminates naturally), the command will terminate immediately without completing. All protected VMs whose teardown was interrupted will be put in the need_cleanup list. The rest of the normal KVM teardown process will take care of properly cleaning up all remaining protected VMs, including the ones on the need_cleanup list. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 2 + arch/s390/kvm/kvm-s390.c | 49 ++++- arch/s390/kvm/kvm-s390.h | 3 + arch/s390/kvm/pv.c | 295 +++++++++++++++++++++++++++++-- include/uapi/linux/kvm.h | 2 + 5 files changed, 333 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 21f1339a4197..d67ce719d16a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -942,6 +942,8 @@ struct kvm_s390_pv { unsigned long stor_base; void *stor_var; bool dumping; + void *set_aside; + struct list_head need_cleanup; struct mmu_notifier mmu_notifier; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bd6e0201bfe5..f0abaaf7eea4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz; module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); +static int async_destroy; + /* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond @@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) { + const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM); + void __user *argp = (void __user *)cmd->data; int r = 0; u16 dummy; - void __user *argp = (void __user *)cmd->data; + + if (need_lock) + mutex_lock(&kvm->lock); switch (cmd->cmd) { case KVM_PV_ENABLE: { @@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break; } + case KVM_PV_ASYNC_CLEANUP_PREPARE: + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) + break; + + r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); + /* + * If a CPU could not be destroyed, destroy VM will also fail. + * There is no point in trying to destroy it. Instead return + * the rc and rrc from the first CPU that failed destroying. + */ + if (r) + break; + r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc); + + /* no need to block service interrupts any more */ + clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + case KVM_PV_ASYNC_CLEANUP_PERFORM: + r = -EINVAL; + if (!async_destroy) + break; + /* kvm->lock must not be held; this is asserted inside the function. */ + r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); + break; case KVM_PV_DISABLE: { r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) @@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) */ if (r) break; - r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); + r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc); /* no need to block service interrupts any more */ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); @@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) default: r = -ENOTTY; } + if (need_lock) + mutex_unlock(&kvm->lock); + return r; } @@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EINVAL; break; } - mutex_lock(&kvm->lock); + /* must be called without kvm->lock */ r = kvm_s390_handle_pv(kvm, &args); - mutex_unlock(&kvm->lock); if (copy_to_user(argp, &args, sizeof(args))) { r = -EFAULT; break; @@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_vsie_init(kvm); if (use_gisa) kvm_s390_gisa_init(kvm); + INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* * We are already at the end of life and kvm->lock is not taken. * This is ok as the file descriptor is closed by now and nobody - * can mess with the pv state. To avoid lockdep_assert_held from - * complaining we do not use kvm_s390_pv_is_protected. + * can mess with the pv state. */ - if (kvm_s390_pv_get_handle(kvm)) - kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); + kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); /* * Remove the mmu notifier only when the whole KVM VM is torn down, * and only if one was registered to begin with. If the VM is diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a60d1e5c44cd..826754937ae4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 48c4f57d5d76..5f958fcf6283 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,29 @@ #include #include "kvm-s390.h" +/** + * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to + * be destroyed + * + * @list: list head for the list of leftover VMs + * @old_gmap_table: the gmap table of the leftover protected VM + * @handle: the handle of the leftover protected VM + * @stor_var: pointer to the variable storage of the leftover protected VM + * @stor_base: address of the base storage of the leftover protected VM + * + * Represents a protected VM that is still registered with the Ultravisor, + * but which does not correspond any longer to an active KVM VM. It should + * be destroyed at some point later, either asynchronously or when the + * process terminates. + */ +struct pv_vm_to_be_destroyed { + struct list_head list; + unsigned long old_gmap_table; + u64 handle; + void *stor_var; + unsigned long stor_base; +}; + static void kvm_s390_clear_pv_state(struct kvm *kvm) { kvm->arch.pv.handle = 0; @@ -161,7 +184,143 @@ out_err: return -ENOMEM; } -/* this should not fail, but if it does, we must not free the donated memory */ +/** + * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. + * @kvm: the KVM that was associated with this leftover protected VM + * @leftover: details about the leftover protected VM that needs a clean up + * @rc: the RC code of the Destroy Secure Configuration UVC + * @rrc: the RRC code of the Destroy Secure Configuration UVC + * + * Destroy one leftover protected VM. + * On success, kvm->mm->context.protected_count will be decremented atomically + * and all other resources used by the VM will be freed. + * + * Return: 0 in case of success, otherwise 1 + */ +static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, + struct pv_vm_to_be_destroyed *leftover, + u16 *rc, u16 *rrc) +{ + int cc; + + cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); + if (cc) + return cc; + /* + * Intentionally leak unusable memory. If the UVC fails, the memory + * used for the VM and its metadata is permanently unusable. + * This can only happen in case of a serious KVM or hardware bug; it + * is not expected to happen in normal operation. + */ + free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); + free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); + vfree(leftover->stor_var); + atomic_dec(&kvm->mm->context.protected_count); + return 0; +} + +/** + * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. + * @kvm: the VM whose memory is to be cleared. + * + * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. + * The CPUs of the protected VM need to be destroyed beforehand. + */ +static void kvm_s390_destroy_lower_2g(struct kvm *kvm) +{ + const unsigned long pages_2g = SZ_2G / PAGE_SIZE; + struct kvm_memory_slot *slot; + unsigned long len; + int srcu_idx; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + /* Take the memslot containing guest absolute address 0 */ + slot = gfn_to_memslot(kvm, 0); + /* Clear all slots or parts thereof that are below 2GB */ + while (slot && slot->base_gfn < pages_2g) { + len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; + s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); + /* Take the next memslot */ + slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages); + } + + srcu_read_unlock(&kvm->srcu, srcu_idx); +} + +/** + * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. + * @kvm: the VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Set aside the protected VM for a subsequent teardown. The VM will be able + * to continue immediately as a non-secure VM, and the information needed to + * properly tear down the protected VM is set aside. If another protected VM + * was already set aside without starting its teardown, this function will + * fail. + * The CPUs of the protected VM need to be destroyed beforehand. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, -EINVAL if another protected VM was already set + * aside, -ENOMEM if the system ran out of memory. + */ +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *priv; + + lockdep_assert_held(&kvm->lock); + /* + * If another protected VM was already prepared for teardown, refuse. + * A normal deinitialization has to be performed instead. + */ + if (kvm->arch.pv.set_aside) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) { + kfree(priv); + return -ENOMEM; + } + + kvm_s390_destroy_lower_2g(kvm); + kvm_s390_clear_pv_state(kvm); + kvm->arch.pv.set_aside = priv; + + *rc = UVC_RC_EXECUTED; + *rrc = 42; + return 0; +} + +/** + * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM + * @kvm: the KVM whose protected VM needs to be deinitialized + * @rc: the RC code of the UVC + * @rrc: the RRC code of the UVC + * + * Deinitialize the current protected VM. This function will destroy and + * cleanup the current protected VM, but it will not cleanup the guest + * memory. This function should only be called when the protected VM has + * just been created and therefore does not have any guest memory, or when + * the caller cleans up the guest memory separately. + * + * This function should not fail, but if it does, the donated memory must + * not be freed. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, otherwise -EIO + */ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) { int cc; @@ -169,15 +328,6 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), UVC_CMD_DESTROY_SEC_CONF, rc, rrc); WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); - /* - * if the mm still has a mapping, make all its pages accessible - * before destroying the guest - */ - if (mmget_not_zero(kvm->mm)) { - s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); - mmput(kvm->mm); - } - if (!cc) { atomic_dec(&kvm->mm->context.protected_count); kvm_s390_pv_dealloc_vm(kvm); @@ -191,6 +341,131 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return cc ? -EIO : 0; } +/** + * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated + * with a specific KVM. + * @kvm: the KVM to be cleaned up + * @rc: the RC code of the first failing UVC + * @rrc: the RRC code of the first failing UVC + * + * This function will clean up all protected VMs associated with a KVM. + * This includes the active one, the one prepared for deinitialization with + * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. + * + * Context: kvm->lock needs to be held unless being called from + * kvm_arch_destroy_vm. + * + * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO + */ +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *cur; + bool need_zap = false; + u16 _rc, _rrc; + int cc = 0; + + /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */ + atomic_inc(&kvm->mm->context.protected_count); + + *rc = 1; + /* If the current VM is protected, destroy it */ + if (kvm_s390_pv_get_handle(kvm)) { + cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); + need_zap = true; + } + + /* If a previous protected VM was set aside, put it in the need_cleanup list */ + if (kvm->arch.pv.set_aside) { + list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; + } + + /* Cleanup all protected VMs in the need_cleanup list */ + while (!list_empty(&kvm->arch.pv.need_cleanup)) { + cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); + need_zap = true; + if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { + cc = 1; + /* + * Only return the first error rc and rrc, so make + * sure it is not overwritten. All destroys will + * additionally be reported via KVM_UV_EVENT(). + */ + if (*rc == UVC_RC_EXECUTED) { + *rc = _rc; + *rrc = _rrc; + } + } + list_del(&cur->list); + kfree(cur); + } + + /* + * If the mm still has a mapping, try to mark all its pages as + * accessible. The counter should not reach zero before this + * cleanup has been performed. + */ + if (need_zap && mmget_not_zero(kvm->mm)) { + s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); + mmput(kvm->mm); + } + + /* Now the counter can safely reach 0 */ + atomic_dec(&kvm->mm->context.protected_count); + return cc ? -EIO : 0; +} + +/** + * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. + * @kvm: the VM previously associated with the protected VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Tear down the protected VM that had been previously prepared for teardown + * using kvm_s390_pv_set_aside_vm. Ideally this should be called by + * userspace asynchronously from a separate thread. + * + * Context: kvm->lock must not be held. + * + * Return: 0 in case of success, -EINVAL if no protected VM had been + * prepared for asynchronous teardowm, -EIO in case of other errors. + */ +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *p; + int ret = 0; + + lockdep_assert_not_held(&kvm->lock); + mutex_lock(&kvm->lock); + p = kvm->arch.pv.set_aside; + kvm->arch.pv.set_aside = NULL; + mutex_unlock(&kvm->lock); + if (!p) + return -EINVAL; + + /* When a fatal signal is received, stop immediately */ + if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) + goto done; + if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) + ret = -EIO; + kfree(p); + p = NULL; +done: + /* + * p is not NULL if we aborted because of a fatal signal, in which + * case queue the leftover for later cleanup. + */ + if (p) { + mutex_lock(&kvm->lock); + list_add(&p->list, &kvm->arch.pv.need_cleanup); + mutex_unlock(&kvm->lock); + /* Did not finish, but pretend things went well */ + *rc = UVC_RC_EXECUTED; + *rrc = 42; + } + return ret; +} + static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, struct mm_struct *mm) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..b3701b23ca18 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1740,6 +1740,8 @@ enum pv_cmd_id { KVM_PV_UNSHARE_ALL, KVM_PV_INFO, KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, }; struct kvm_pv_cmd { From d9459922a15ce7a20a85b38a976494ac7f445732 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:28 +0100 Subject: [PATCH 2027/4122] KVM: s390: pv: api documentation for asynchronous destroy Add documentation for the new commands added to the KVM_S390_PV_COMMAND ioctl. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221111170632.77622-3-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-3-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- Documentation/virt/kvm/api.rst | 41 ++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..9175d41e8081 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5163,10 +5163,13 @@ KVM_PV_ENABLE ===== ============================= KVM_PV_DISABLE - Deregister the VM from the Ultravisor and reclaim the memory that - had been donated to the Ultravisor, making it usable by the kernel - again. All registered VCPUs are converted back to non-protected - ones. + Deregister the VM from the Ultravisor and reclaim the memory that had + been donated to the Ultravisor, making it usable by the kernel again. + All registered VCPUs are converted back to non-protected ones. If a + previous protected VM had been prepared for asynchonous teardown with + KVM_PV_ASYNC_CLEANUP_PREPARE and not subsequently torn down with + KVM_PV_ASYNC_CLEANUP_PERFORM, it will be torn down in this call + together with the current protected VM. KVM_PV_VM_SET_SEC_PARMS Pass the image header from VM memory to the Ultravisor in @@ -5289,6 +5292,36 @@ KVM_PV_DUMP authentication tag all of which are needed to decrypt the dump at a later time. +KVM_PV_ASYNC_CLEANUP_PREPARE + :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE + + Prepare the current protected VM for asynchronous teardown. Most + resources used by the current protected VM will be set aside for a + subsequent asynchronous teardown. The current protected VM will then + resume execution immediately as non-protected. There can be at most + one protected VM prepared for asynchronous teardown at any time. If + a protected VM had already been prepared for teardown without + subsequently calling KVM_PV_ASYNC_CLEANUP_PERFORM, this call will + fail. In that case, the userspace process should issue a normal + KVM_PV_DISABLE. The resources set aside with this call will need to + be cleaned up with a subsequent call to KVM_PV_ASYNC_CLEANUP_PERFORM + or KVM_PV_DISABLE, otherwise they will be cleaned up when KVM + terminates. KVM_PV_ASYNC_CLEANUP_PREPARE can be called again as soon + as cleanup starts, i.e. before KVM_PV_ASYNC_CLEANUP_PERFORM finishes. + +KVM_PV_ASYNC_CLEANUP_PERFORM + :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE + + Tear down the protected VM previously prepared for teardown with + KVM_PV_ASYNC_CLEANUP_PREPARE. The resources that had been set aside + will be freed during the execution of this command. This PV command + should ideally be issued by userspace from a separate thread. If a + fatal signal is received (or the process terminates naturally), the + command will terminate immediately without completing, and the normal + KVM shutdown procedure will take care of cleaning up all remaining + protected VMs, including the ones whose teardown was interrupted by + process termination. + 4.126 KVM_XEN_HVM_SET_ATTR -------------------------- From 8c516b25d6e9c70e6d76627932b14b0ef03a82c4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:29 +0100 Subject: [PATCH 2028/4122] KVM: s390: pv: add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE Add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE to signal that the KVM_PV_ASYNC_DISABLE and KVM_PV_ASYNC_DISABLE_PREPARE commands for the KVM_S390_PV_COMMAND ioctl are available. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221111170632.77622-4-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-4-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 3 +++ include/uapi/linux/kvm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f0abaaf7eea4..b6cc7d2935c0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -618,6 +618,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_BPB: r = test_facility(82); break; + case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE: + r = async_destroy && is_prot_virt_host(); + break; case KVM_CAP_S390_PROTECTED: r = is_prot_virt_host(); break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b3701b23ca18..d3f86a280858 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #ifdef KVM_CAP_IRQ_ROUTING From afe20eb8df9108e4be9bdec88a4f90d2de863ca2 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:30 +0100 Subject: [PATCH 2029/4122] KVM: s390: pv: avoid export before import if possible If the appropriate UV feature bit is set, there is no need to perform an export before import. The misc feature indicates, among other things, that importing a shared page from a different protected VM will automatically also transfer its ownership. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-5-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-5-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kernel/uv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f9810d2a267c..9f18a4af9c13 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -255,6 +255,13 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, */ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) { + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications)) + return false; if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) return false; return atomic_read(&mm->context.protected_count) > 1; From f7866f582b1c9d80d1a3bd0953170185668c52ca Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:31 +0100 Subject: [PATCH 2030/4122] KVM: s390: pv: support for Destroy fast UVC Add support for the Destroy Secure Configuration Fast Ultravisor call, and take advantage of it for asynchronous destroy. When supported, the protected guest is destroyed immediately using the new UVC, leaving only the memory to be cleaned up asynchronously. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-6-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-6-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/uv.h | 10 +++++++ arch/s390/kvm/pv.c | 61 +++++++++++++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index be3ef9dd6972..28a9ad57b6f1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -34,6 +34,7 @@ #define UVC_CMD_INIT_UV 0x000f #define UVC_CMD_CREATE_SEC_CONF 0x0100 #define UVC_CMD_DESTROY_SEC_CONF 0x0101 +#define UVC_CMD_DESTROY_SEC_CONF_FAST 0x0102 #define UVC_CMD_CREATE_SEC_CPU 0x0120 #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 @@ -81,6 +82,7 @@ enum uv_cmds_inst { BIT_UVC_CMD_UNSHARE_ALL = 20, BIT_UVC_CMD_PIN_PAGE_SHARED = 21, BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, + BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23, BIT_UVC_CMD_DUMP_INIT = 24, BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25, BIT_UVC_CMD_DUMP_CPU = 26, @@ -230,6 +232,14 @@ struct uv_cb_nodata { u64 reserved20[4]; } __packed __aligned(8); +/* Destroy Configuration Fast */ +struct uv_cb_destroy_fast { + struct uv_cb_header header; + u64 reserved08[2]; + u64 handle; + u64 reserved20[5]; +} __packed __aligned(8); + /* Set Shared Access */ struct uv_cb_share { struct uv_cb_header header; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 5f958fcf6283..e032ebbf51b9 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -203,6 +203,9 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, { int cc; + /* It used the destroy-fast UVC, nothing left to do here */ + if (!leftover->handle) + goto done_fast; cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); @@ -217,6 +220,7 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); vfree(leftover->stor_var); +done_fast: atomic_dec(&kvm->mm->context.protected_count); return 0; } @@ -250,6 +254,36 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, srcu_idx); } +static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct uv_cb_destroy_fast uvcb = { + .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, + .header.len = sizeof(uvcb), + .handle = kvm_s390_pv_get_handle(kvm), + }; + int cc; + + cc = uv_call_sched(0, (u64)&uvcb); + if (rc) + *rc = uvcb.header.rc; + if (rrc) + *rrc = uvcb.header.rrc; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", + uvcb.header.rc, uvcb.header.rrc); + WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", + kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); + /* Inteded memory leak on "impossible" error */ + if (!cc) + kvm_s390_pv_dealloc_vm(kvm); + return cc ? -EIO : 0; +} + +static inline bool is_destroy_fast_available(void) +{ + return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); +} + /** * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. * @kvm: the VM @@ -271,6 +305,7 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm) int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) { struct pv_vm_to_be_destroyed *priv; + int res = 0; lockdep_assert_held(&kvm->lock); /* @@ -283,14 +318,21 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) if (!priv) return -ENOMEM; - priv->stor_var = kvm->arch.pv.stor_var; - priv->stor_base = kvm->arch.pv.stor_base; - priv->handle = kvm_s390_pv_get_handle(kvm); - priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; - WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); - if (s390_replace_asce(kvm->arch.gmap)) { + if (is_destroy_fast_available()) { + res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); + } else { + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) + res = -ENOMEM; + } + + if (res) { kfree(priv); - return -ENOMEM; + return res; } kvm_s390_destroy_lower_2g(kvm); @@ -471,6 +513,7 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, { struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); u16 dummy; + int r; /* * No locking is needed since this is the last thread of the last user of this @@ -479,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, * unregistered. This means that if this notifier runs, then the * struct kvm is still valid. */ - kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) + kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); } static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { From cc726886079febfa2384d77486d7f3a11f951ea9 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:32 +0100 Subject: [PATCH 2031/4122] KVM: s390: pv: module parameter to fence asynchronous destroy Add the module parameter "async_destroy", to allow the asynchronous destroy mechanism to be switched off. This might be useful for debugging purposes. The parameter is enabled by default since the feature is opt-in anyway. Signed-off-by: Claudio Imbrenda Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20221111170632.77622-7-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-7-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b6cc7d2935c0..8a0c884bf737 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -209,7 +209,13 @@ unsigned int diag9c_forwarding_hz; module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); -static int async_destroy; +/* + * allow asynchronous deinit for protected guests; enable by default since + * the feature is opt-in anyway + */ +static int async_destroy = 1; +module_param(async_destroy, int, 0444); +MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests"); /* * For now we handle at most 16 double words as this is what the s390 base From dbec280045f8ca568de2a88ac4712a35f82f2cb1 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Fri, 18 Nov 2022 11:04:29 +0100 Subject: [PATCH 2032/4122] s390/vfio-ap: GISA: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same) for the GISA when enabling the IRQ. Signed-off-by: Nico Boehr Reviewed-by: Halil Pasic Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221118100429.70453-1-nrb@linux.ibm.com Message-Id: <20221118100429.70453-1-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- drivers/s390/crypto/vfio_ap_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0b4cc8c597ae..205a00105858 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -429,7 +429,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, aqic_gisa.isc = nisc; aqic_gisa.ir = 1; - aqic_gisa.gisa = (uint64_t)gisa >> 4; + aqic_gisa.gisa = virt_to_phys(gisa) >> 4; status = ap_aqic(q->apqn, aqic_gisa, h_nib); switch (status.response_code) { From 99b63f55dc514a357c2ecf25e9aab149879329f0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Nov 2022 16:11:33 +0100 Subject: [PATCH 2033/4122] KVM: s390: remove unused gisa_clear_ipm_gisc() function clang warns about an unused function: arch/s390/kvm/interrupt.c:317:20: error: unused function 'gisa_clear_ipm_gisc' [-Werror,-Wunused-function] static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) Remove gisa_clear_ipm_gisc(), since it is unused and get rid of this warning. Signed-off-by: Heiko Carstens Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20221118151133.2974602-1-hca@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Janosch Frank --- arch/s390/kvm/interrupt.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ab569faf0df2..1dae78deddf2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -314,11 +314,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) return READ_ONCE(gisa->ipm); } -static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) -{ - clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); -} - static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); From a39818a3fb2bf12ae945a7c5fba8c5d9048a0e96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Nov 2022 21:26:10 +1100 Subject: [PATCH 2034/4122] objtool/powerpc: Implement arch_pc_relative_reloc() Provide an implementation for arch_pc_relative_reloc(). It is needed to pass the build once 61c6065ef7ec ("objtool: Allow !PC relative relocations") is merged. Signed-off-by: Michael Ellerman --- tools/objtool/arch/powerpc/decode.c | 9 +++++++++ tools/objtool/include/objtool/arch.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c index 01cade98b49e..9c653805a08a 100644 --- a/tools/objtool/arch/powerpc/decode.c +++ b/tools/objtool/arch/powerpc/decode.c @@ -82,6 +82,15 @@ unsigned long arch_jump_destination(struct instruction *insn) return insn->offset + insn->immediate; } +bool arch_pc_relative_reloc(struct reloc *reloc) +{ + /* + * The powerpc build only allows certain relocation types, see + * relocs_check.sh, and none of those accepted are PC relative. + */ + return false; +} + void arch_initial_func_cfi_state(struct cfi_init_state *state) { int i; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 5149330f400f..4ecb480131c7 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -95,4 +95,6 @@ bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); +bool arch_pc_relative_reloc(struct reloc *reloc); + #endif /* _ARCH_H */ From 41522f7442905814c654dbe2ca7b8d3605c7e0cc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:47 -0800 Subject: [PATCH 2035/4122] perf test: Add 'leafloop' test workload The leafloop workload is to run an infinite loop in the test_leaf function. This is needed for the ARM fp callgraph test to verify if it gets the correct callchains. $ perf test -w leafloop Committer notes: Add a: -U_FORTIFY_SOURCE to the leafloop CFLAGS as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 3 +++ tools/perf/tests/workloads/leafloop.c | 34 +++++++++++++++++++++++++++ 4 files changed, 39 insertions(+) create mode 100644 tools/perf/tests/workloads/leafloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 161f38476e77..0ed5ac452f6e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -121,6 +121,7 @@ static struct test_suite **tests[] = { static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, + &workload__leafloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e6edfeeadaeb..86804dd6452b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -202,5 +202,6 @@ struct test_workload workload__##work = { \ /* The list of test workloads */ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); +DECLARE_WORKLOAD(leafloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index b8964b1099c0..03dc675a4a7c 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -2,3 +2,6 @@ perf-y += noploop.o perf-y += thloop.o +perf-y += leafloop.o + +CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/leafloop.c b/tools/perf/tests/workloads/leafloop.c new file mode 100644 index 000000000000..1bf5cc97649b --- /dev/null +++ b/tools/perf/tests/workloads/leafloop.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include "../tests.h" + +/* We want to check these symbols in perf script */ +noinline void leaf(volatile int b); +noinline void parent(volatile int b); + +static volatile int a; + +noinline void leaf(volatile int b) +{ + for (;;) + a += b; +} + +noinline void parent(volatile int b) +{ + leaf(b); +} + +static int leafloop(int argc, const char **argv) +{ + int c = 1; + + if (argc > 0) + c = atoi(argv[0]); + + parent(c); + return 0; +} + +DEFINE_WORKLOAD(leafloop); From 7cf0b4a73a4a4f36bb4ef53d066b811b7621c635 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:48 -0800 Subject: [PATCH 2036/4122] perf test: Replace arm callgraph fp test workload with leafloop So that it can get rid of requirement of a compiler. Reviewed-by: Leo Yan Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/tests/shell/test_arm_callgraph_fp.sh | 34 ++----------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index ec108d45d3c6..e61d8deaa0c4 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -4,44 +4,16 @@ lscpu | grep -q "aarch64" || exit 2 -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c) -TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX) +TEST_PROGRAM="perf test -w leafloop" cleanup_files() { rm -f $PERF_DATA - rm -f $TEST_PROGRAM_SOURCE - rm -f $TEST_PROGRAM } trap cleanup_files exit term int -cat << EOF > $TEST_PROGRAM_SOURCE -int a = 0; -void leaf(void) { - for (;;) - a += a; -} -void parent(void) { - leaf(); -} -int main(void) { - parent(); - return 0; -} -EOF - -echo " + Compiling test program ($TEST_PROGRAM)..." - -CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer" -cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1 - # Add a 1 second delay to skip samples that are not in the leaf() function perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null & PID=$! @@ -58,11 +30,11 @@ wait $PID # program # 728 leaf # 753 parent -# 76c main +# 76c leafloop # ... perf script -i $PERF_DATA -F comm,ip,sym | head -n4 perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \ awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" || sym[1] != "parent" || - sym[2] != "main") exit 1 }' + sym[2] != "leafloop") exit 1 }' From 39281709a6e2301ac4c6ac7015c7793392ca2dfe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:49 -0800 Subject: [PATCH 2037/4122] perf test: Add 'sqrtloop' test workload The sqrtloop creates a child process to run an infinite loop calling sqrt() with rand(). This is needed for ARM SPE fork test. $ perf test -w sqrtloop It can take an optional argument to specify how long it will run in seconds (default: 1). Committer notes: Explicitely ignored the sqrt() return to fix the build on systems where the compiler complains it isn't being used. And added a sqrtloop specific CFLAGS to disable optimizations to make this a bit more robust wrt dead code elimination. Doing that a -U_FORTIFY_SOURCE needs to be added, as -O0 is incompatible with it. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/sqrtloop.c | 45 +++++++++++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 tools/perf/tests/workloads/sqrtloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 0ed5ac452f6e..9acb7a93eeb9 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -122,6 +122,7 @@ static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, &workload__leafloop, + &workload__sqrtloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 86804dd6452b..18c40319e67c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -203,5 +203,6 @@ struct test_workload workload__##work = { \ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); +DECLARE_WORKLOAD(sqrtloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index 03dc675a4a7c..2312a338f01c 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -3,5 +3,7 @@ perf-y += noploop.o perf-y += thloop.o perf-y += leafloop.o +perf-y += sqrtloop.o +CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/sqrtloop.c b/tools/perf/tests/workloads/sqrtloop.c new file mode 100644 index 000000000000..ccc94c6a6676 --- /dev/null +++ b/tools/perf/tests/workloads/sqrtloop.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +static int __sqrtloop(int sec) +{ + signal(SIGALRM, sighandler); + alarm(sec); + + while (!done) + (void)sqrt(rand()); + return 0; +} + +static int sqrtloop(int argc, const char **argv) +{ + int sec = 1; + + if (argc > 0) + sec = atoi(argv[0]); + + switch (fork()) { + case 0: + return __sqrtloop(sec); + case -1: + return -1; + default: + wait(NULL); + } + return 0; +} + +DEFINE_WORKLOAD(sqrtloop); From e011979ec4c3482b68be05e2fdf98fefce4cd75b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:50 -0800 Subject: [PATCH 2038/4122] perf test: Replace arm spe fork test workload with sqrtloop So that it can get rid of requirement of a compiler. I've also removed killall as it'll kill perf process now and run the test workload for 10 sec instead. Signed-off-by: Namhyung Kim Tested-by: James Clark Tested-by: Leo Yan Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_spe_fork.sh | 44 +-------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index c920d3583d30..da810e1b2b9e 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -11,14 +11,7 @@ skip_if_no_arm_spe_event() { skip_if_no_arm_spe_event || exit 2 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - -TEST_PROGRAM_SOURCE=$(mktemp /tmp/__perf_test.program.XXXXX.c) -TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX) +TEST_PROGRAM="perf test -w sqrtloop 10" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) PERF_RECORD_LOG=$(mktemp /tmp/__perf_test.log.XXXXX) @@ -27,43 +20,10 @@ cleanup_files() echo "Cleaning up files..." rm -f ${PERF_RECORD_LOG} rm -f ${PERF_DATA} - rm -f ${TEST_PROGRAM_SOURCE} - rm -f ${TEST_PROGRAM} } trap cleanup_files exit term int -# compile test program -cat << EOF > $TEST_PROGRAM_SOURCE -#include -#include -#include -#include -#include - -int workload() { - while (1) - sqrt(rand()); - return 0; -} - -int main() { - switch (fork()) { - case 0: - return workload(); - case -1: - return 1; - default: - wait(NULL); - } - return 0; -} -EOF - -echo "Compiling test program..." -CFLAGS="-lm" -cc $TEST_PROGRAM_SOURCE $CFLAGS -o $TEST_PROGRAM || exit 1 - echo "Recording workload..." perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 & PERFPID=$! @@ -78,8 +38,6 @@ echo Log lines after 1 second = $log1 kill $PERFPID wait $PERFPID -# test program may leave an orphan process running the workload -killall $(basename $TEST_PROGRAM) if [ "$log0" = "$log1" ]; then From a104f0ea99d846df19aad8a5476eb9bc39fa42ca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:51 -0800 Subject: [PATCH 2039/4122] perf test: Add 'brstack' test workload The brstack is to run different kinds of branches repeatedly. This is necessary for brstack test case to verify if it has correct branch info. $ perf test -w brstack I renamed the internal functions to have brstack_ prefix as it's too generic name. Add a -U_FORTIFY_SOURCE to the brstack CFLAGS, as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/brstack.c | 40 ++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 tools/perf/tests/workloads/brstack.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9acb7a93eeb9..69fa56939309 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -123,6 +123,7 @@ static struct test_workload *workloads[] = { &workload__thloop, &workload__leafloop, &workload__sqrtloop, + &workload__brstack, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 18c40319e67c..dc96f59cac2e 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -204,5 +204,6 @@ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); +DECLARE_WORKLOAD(brstack); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index 2312a338f01c..ae06a5538b17 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -4,6 +4,8 @@ perf-y += noploop.o perf-y += thloop.o perf-y += leafloop.o perf-y += sqrtloop.o +perf-y += brstack.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE +CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/brstack.c b/tools/perf/tests/workloads/brstack.c new file mode 100644 index 000000000000..0b60bd37b9d1 --- /dev/null +++ b/tools/perf/tests/workloads/brstack.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include "../tests.h" + +#define BENCH_RUNS 999999 + +static volatile int cnt; + +static void brstack_bar(void) { +} /* return */ + +static void brstack_foo(void) { + brstack_bar(); /* call */ +} /* return */ + +static void brstack_bench(void) { + void (*brstack_foo_ind)(void) = brstack_foo; + + if ((cnt++) % 3) /* branch (cond) */ + brstack_foo(); /* call */ + brstack_bar(); /* call */ + brstack_foo_ind(); /* call (ind) */ +} + +static int brstack(int argc, const char **argv) +{ + int num_loops = BENCH_RUNS; + + if (argc > 0) + num_loops = atoi(argv[0]); + + while (1) { + if ((cnt++) > num_loops) + break; + brstack_bench();/* call */ + } /* branch (uncond) */ + return 0; +} + +DEFINE_WORKLOAD(brstack); From 7bc1dd96cf48e1b44773698e7c97481f5f455f6c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:52 -0800 Subject: [PATCH 2040/4122] perf test: Replace brstack test workload So that it can get rid of requirement of a compiler. Also rename the symbols to match with the perf test workload. Signed-off-by: Namhyung Kim Tested-by: James Clark Acked-by: German Gomez Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_brstack.sh | 66 +++++--------------------- 1 file changed, 12 insertions(+), 54 deletions(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index d7ff5c4b4da4..5856639b565b 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -4,13 +4,6 @@ # SPDX-License-Identifier: GPL-2.0 # German Gomez , 2022 -# we need a C compiler to build the test programs -# so bail if none is found -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - # skip the test if the hardware doesn't support branch stack sampling # and if the architecture doesn't support filter types: any,save_type,u if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then @@ -19,6 +12,7 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev fi TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) +TESTPROG="perf test -w brstack" cleanup() { rm -rf $TMPDIR @@ -26,57 +20,24 @@ cleanup() { trap cleanup exit term int -gen_test_program() { - # generate test program - cat << EOF > $1 -#define BENCH_RUNS 999999 -int cnt; -void bar(void) { -} /* return */ -void foo(void) { - bar(); /* call */ -} /* return */ -void bench(void) { - void (*foo_ind)(void) = foo; - if ((cnt++) % 3) /* branch (cond) */ - foo(); /* call */ - bar(); /* call */ - foo_ind(); /* call (ind) */ -} -int main(void) -{ - int cnt = 0; - while (1) { - if ((cnt++) > BENCH_RUNS) - break; - bench(); /* call */ - } /* branch (uncond) */ - return 0; -} -EOF -} - test_user_branches() { echo "Testing user branch stack sampling" - gen_test_program "$TEMPDIR/program.c" - cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out - - perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1 perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script # example of branch entries: - # foo+0x14/bar+0x40/P/-/-/0/CALL + # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL set -x - egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script - egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$" $TMPDIR/perf.script - egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$" $TMPDIR/perf.script - egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script + egrep -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script set +x # some branch types are still not being tested: @@ -91,10 +52,7 @@ test_filter() { echo "Testing branch stack filtering permutation ($filter,$expect)" - gen_test_program "$TEMPDIR/program.c" - cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out - - perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones From 3dfc01fe9d12a1e832f49deab37279faa8a9ebc8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:53 -0800 Subject: [PATCH 2041/4122] perf test: Add 'datasym' test workload The datasym workload is to check if perf mem command gets the data addresses precisely. This is needed for data symbol test. $ perf test -w datasym I had to keep the buf1 in the data section, otherwise it could end up in the BSS and was mmaped as a separate //anon region, then it was not symbolized at all. It needs to be fixed separately. Committer notes: Add a -U _FORTIFY_SOURCE to the datasym CFLAGS, as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/datasym.c | 24 ++++++++++++++++++++++++ 4 files changed, 28 insertions(+) create mode 100644 tools/perf/tests/workloads/datasym.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 69fa56939309..4c6ae59a4dfd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -124,6 +124,7 @@ static struct test_workload *workloads[] = { &workload__leafloop, &workload__sqrtloop, &workload__brstack, + &workload__datasym, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index dc96f59cac2e..e15f24cfc909 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -205,5 +205,6 @@ DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); DECLARE_WORKLOAD(brstack); +DECLARE_WORKLOAD(datasym); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index ae06a5538b17..a1f34d5861e3 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -5,7 +5,9 @@ perf-y += thloop.o perf-y += leafloop.o perf-y += sqrtloop.o perf-y += brstack.o +perf-y += datasym.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE +CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c new file mode 100644 index 000000000000..ddd40bc63448 --- /dev/null +++ b/tools/perf/tests/workloads/datasym.c @@ -0,0 +1,24 @@ +#include +#include "../tests.h" + +typedef struct _buf { + char data1; + char reserved[55]; + char data2; +} buf __attribute__((aligned(64))); + +static buf buf1 = { + /* to have this in the data section */ + .reserved[0] = 1, +}; + +static int datasym(int argc __maybe_unused, const char **argv __maybe_unused) +{ + for (;;) { + buf1.data1++; + buf1.data2 += buf1.data1; + } + return 0; +} + +DEFINE_WORKLOAD(datasym); From 0b77fe474696aaaa592d52c7316c135401337aec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:54 -0800 Subject: [PATCH 2042/4122] perf test: Replace data symbol test workload with datasym So that it can get rid of requirement of a compiler. $ sudo ./perf test -v 109 109: Test data symbol : --- start --- test child forked, pid 844526 Recording workload... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.354 MB /tmp/__perf_test.perf.data.GFeZO (4847 samples) ] Cleaning up files... test child finished with 0 ---- end ---- Test data symbol: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_data_symbol.sh | 29 +--------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index cd6eb54d235d..d871e6c743ef 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -11,13 +11,7 @@ skip_if_no_mem_event() { skip_if_no_mem_event || exit 2 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "skip: no compiler, install gcc" - exit 2 -fi - -TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX) +TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) check_result() { @@ -45,31 +39,10 @@ cleanup_files() { echo "Cleaning up files..." rm -f ${PERF_DATA} - rm -f ${TEST_PROGRAM} } trap cleanup_files exit term int -# compile test program -echo "Compiling test program..." -cat << EOF | cc -o ${TEST_PROGRAM} -x c - -typedef struct _buf { - char data1; - char reserved[55]; - char data2; -} buf __attribute__((aligned(64))); - -static buf buf1; - -int main(void) { - for (;;) { - buf1.data1++; - buf1.data2 += buf1.data1; - } - return 0; -} -EOF - echo "Recording workload..." # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support From a3720e969c6de39210809ca9aaebec81919d6c6c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 14:46:31 -0800 Subject: [PATCH 2043/4122] perf build: Fix LIBTRACEEVENT_DYNAMIC The tools/lib includes fixes break LIBTRACEVENT_DYNAMIC as the makefile erroneously had dependencies on building libtraceevent even when not linking with it. This change fixes the issues with LIBTRACEEVENT_DYNAMIC by making the built files optional. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221116224631.207631-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 25 ++++++++++++++++++++++--- tools/perf/util/setup.py | 3 ++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8c0df762fb02..a17a6ea85e81 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -242,8 +242,10 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ +ifndef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins +endif LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ @@ -293,6 +295,7 @@ SCRIPT_SH += perf-iostat.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) +ifndef LIBTRACEEVENT_DYNAMIC ifneq ($(OUTPUT),) LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent else @@ -306,13 +309,16 @@ LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include - # # The static build has no dynsym table, so this does not work for # static build. Looks like linker starts to scream about that now # (in Fedora 26) so we need to switch it off for static build. DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) +else +LIBTRACEEVENT_DYNAMIC_LIST = +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = +endif ifneq ($(OUTPUT),) LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi @@ -375,7 +381,11 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) +ifndef LIBTRACEEVENT_DYNAMIC PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) +else +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) +endif SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) @@ -785,9 +795,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ - $(LIBTRACEEVENT) \ bpf-skel +ifndef LIBTRACEEVENT_DYNAMIC +prepare: $(LIBTRACEEVENT) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -843,6 +856,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) +ifndef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' $(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) @@ -872,6 +886,7 @@ install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ $(LIBTRACEEVENT_FLAGS) install +endif $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ @@ -1152,7 +1167,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean +clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -1192,6 +1207,10 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, Documentation) \ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null +ifndef LIBTRACEEVENT_DYNAMIC +clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean +endif + # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) # file if defined, with no further action. diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 5b1e6468d5e8..43e7ca40b2ec 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -77,7 +77,8 @@ perf = Extension('perf', include_dirs = ['util/include'], libraries = extra_libraries, extra_compile_args = cflags, - extra_objects = [libtraceevent, libapikfs, libperf], + extra_objects = [ x for x in [libtraceevent, libapikfs, libperf] + if x is not None], ) setup(name='perf', From e5c6109f4813246aa21e2c441e3cde549efa1f18 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:22 -0800 Subject: [PATCH 2044/4122] perf list: Reorganize to use callbacks to allow honouring command line options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than controlling the list output with passed flags, add callbacks that are called when an event or metric are encountered. State is passed to the callback so that command line options can be respected, alternatively the callbacks can be changed. Fix a few bugs: - wordwrap to columns metric descriptions and expressions; - remove unnecessary whitespace after PMU event names; - the metric filter is a glob but matched using strstr which will always fail, switch to using a proper globmatch, - the detail flag gives details for extra kernel PMU events like branch-instructions. In metricgroup.c switch from struct mep being a rbtree of metricgroups containing a list of metrics, to the tree directly containing all the metrics. In general the alias for a name is passed to the print routine rather than being contained in the name with OR. Committer notes: Check the asprint() return to address this on fedora 36: util/print-events.c: In function ‘print_sdt_events’: util/print-events.c:183:33: error: ignoring return value of ‘asprintf’ declared with attribute ‘warn_unused_result’ [-Werror=unused-result] 183 | asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors $ gcc --version | head -1 gcc (GCC) 12.2.1 20220819 (Red Hat 12.2.1-2) $ Fix ps.pmu_glob setting when dealing with *:* events, it was being left with a freed pointer that then at the end of cmd_list() would be double freed. Check if pmu_name is NULL in default_print_event() before calling strglobmatch(pmu_name, ...) to avoid a segfault. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 333 ++++++++++++++++++++++++++----- tools/perf/util/metricgroup.c | 251 +++++++----------------- tools/perf/util/metricgroup.h | 4 +- tools/perf/util/pmu.c | 145 +++++--------- tools/perf/util/pmu.h | 5 +- tools/perf/util/print-events.c | 348 ++++++++++++++++++--------------- tools/perf/util/print-events.h | 42 ++-- 7 files changed, 620 insertions(+), 508 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index cc84ced6da26..0c84fdb3ad37 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -15,31 +15,240 @@ #include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" +#include "util/string2.h" +#include "util/strlist.h" #include #include #include -static bool desc_flag = true; -static bool details_flag; +/** + * struct print_state - State and configuration passed to the default_print + * functions. + */ +struct print_state { + /** + * @pmu_glob: Optionally restrict PMU and metric matching to PMU or + * debugfs subsystem name. + */ + char *pmu_glob; + /** @event_glob: Optional pattern matching glob. */ + char *event_glob; + /** @name_only: Print event or metric names only. */ + bool name_only; + /** @desc: Print the event or metric description. */ + bool desc; + /** @long_desc: Print longer event or metric description. */ + bool long_desc; + /** @deprecated: Print deprecated events or metrics. */ + bool deprecated; + /** + * @detailed: Print extra information on the perf event such as names + * and expressions used internally by events. + */ + bool detailed; + /** @metrics: Controls printing of metric and metric groups. */ + bool metrics; + /** @metricgroups: Controls printing of metric and metric groups. */ + bool metricgroups; + /** @last_topic: The last printed event topic. */ + char *last_topic; + /** @last_metricgroups: The last printed metric group. */ + char *last_metricgroups; + /** @visited_metrics: Metrics that are printed to avoid duplicates. */ + struct strlist *visited_metrics; +}; + +static void default_print_start(void *ps) +{ + struct print_state *print_state = ps; + + if (!print_state->name_only && pager_in_use()) + printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); +} + +static void default_print_end(void *print_state __maybe_unused) {} + +static void wordwrap(const char *s, int start, int max, int corr) +{ + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, " \t"); + + if (column + wlen >= max && column > start) { + printf("\n%*s", start, ""); + column = start + corr; + } + n = printf("%s%.*s", column > start ? " " : "", wlen, s); + if (n <= 0) + break; + s += wlen; + column += n; + s = skip_spaces(s); + } +} + +static void default_print_event(void *ps, const char *pmu_name, const char *topic, + const char *event_name, const char *event_alias, + const char *scale_unit __maybe_unused, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr) +{ + struct print_state *print_state = ps; + int pos; + + if (deprecated && !print_state->deprecated) + return; + + if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob)) + return; + + if (print_state->event_glob && + (!event_name || !strglobmatch(event_name, print_state->event_glob)) && + (!event_alias || !strglobmatch(event_alias, print_state->event_glob)) && + (!topic || !strglobmatch_nocase(topic, print_state->event_glob))) + return; + + if (print_state->name_only) { + if (event_alias && strlen(event_alias)) + printf("%s ", event_alias); + else + printf("%s ", event_name); + return; + } + + if (strcmp(print_state->last_topic, topic ?: "")) { + if (topic) + printf("\n%s:\n", topic); + free(print_state->last_topic); + print_state->last_topic = strdup(topic ?: ""); + } + + if (event_alias && strlen(event_alias)) + pos = printf(" %s OR %s", event_name, event_alias); + else + pos = printf(" %s", event_name); + + if (!topic && event_type_desc) { + for (; pos < 53; pos++) + putchar(' '); + printf("[%s]\n", event_type_desc); + } else + putchar('\n'); + + if (desc && print_state->desc) { + printf("%*s", 8, "["); + wordwrap(desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + + if (long_desc && print_state->long_desc) { + printf("%*s", 8, "["); + wordwrap(long_desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + + if (print_state->detailed && encoding_desc) { + printf("%*s%s", 8, "", encoding_desc); + if (metric_name) + printf(" MetricName: %s", metric_name); + if (metric_expr) + printf(" MetricExpr: %s", metric_expr); + putchar('\n'); + } +} + +static void default_print_metric(void *ps, + const char *group, + const char *name, + const char *desc, + const char *long_desc, + const char *expr, + const char *unit __maybe_unused) +{ + struct print_state *print_state = ps; + + if (print_state->event_glob && + (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) && + (!print_state->metricgroups || !group || !strglobmatch(group, print_state->event_glob))) + return; + + if (!print_state->name_only && !print_state->last_metricgroups) { + if (print_state->metricgroups) { + printf("\nMetric Groups:\n"); + if (!print_state->metrics) + putchar('\n'); + } else { + printf("\nMetrics:\n\n"); + } + } + if (!print_state->last_metricgroups || + strcmp(print_state->last_metricgroups, group ?: "")) { + if (group && print_state->metricgroups) { + if (print_state->name_only) + printf("%s ", group); + else if (print_state->metrics) + printf("\n%s:\n", group); + else + printf("%s\n", group); + } + free(print_state->last_metricgroups); + print_state->last_metricgroups = strdup(group ?: ""); + } + if (!print_state->metrics) + return; + + if (print_state->name_only) { + if (print_state->metrics && + !strlist__has_entry(print_state->visited_metrics, name)) { + printf("%s ", name); + strlist__add(print_state->visited_metrics, name); + } + return; + } + printf(" %s\n", name); + + if (desc && print_state->desc) { + printf("%*s", 8, "["); + wordwrap(desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + if (long_desc && print_state->long_desc) { + printf("%*s", 8, "["); + wordwrap(long_desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + if (expr && print_state->detailed) { + printf("%*s", 8, "["); + wordwrap(expr, 8, pager_get_columns(), 0); + printf("]\n"); + } +} int cmd_list(int argc, const char **argv) { int i, ret = 0; - bool raw_dump = false; - bool long_desc_flag = false; - bool deprecated = false; - char *pmu_name = NULL; + struct print_state ps = {}; + struct print_callbacks print_cb = { + .print_start = default_print_start, + .print_end = default_print_end, + .print_event = default_print_event, + .print_metric = default_print_metric, + }; const char *hybrid_name = NULL; const char *unit_name = NULL; struct option list_options[] = { - OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), - OPT_BOOLEAN('d', "desc", &desc_flag, + OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"), + OPT_BOOLEAN('d', "desc", &ps.desc, "Print extra event descriptions. --no-desc to not print."), - OPT_BOOLEAN('v', "long-desc", &long_desc_flag, + OPT_BOOLEAN('v', "long-desc", &ps.long_desc, "Print longer event descriptions."), - OPT_BOOLEAN(0, "details", &details_flag, + OPT_BOOLEAN(0, "details", &ps.detailed, "Print information on the perf event names and expressions used internally by events."), - OPT_BOOLEAN(0, "deprecated", &deprecated, + OPT_BOOLEAN(0, "deprecated", &ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), @@ -63,20 +272,28 @@ int cmd_list(int argc, const char **argv) setup_pager(); - if (!raw_dump && pager_in_use()) - printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); + if (!ps.name_only) + setup_pager(); + ps.desc = !ps.long_desc; + ps.last_topic = strdup(""); + assert(ps.last_topic); + ps.visited_metrics = strlist__new(NULL, NULL); + assert(ps.visited_metrics); if (unit_name) - pmu_name = strdup(unit_name); + ps.pmu_glob = strdup(unit_name); else if (hybrid_name) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name); - if (!pmu_name) + ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); + if (!ps.pmu_glob) pr_warning("WARNING: hybrid cputype is not supported!\n"); } + print_cb.print_start(&ps); + if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag, deprecated, pmu_name); + ps.metrics = true; + ps.metricgroups = true; + print_events(&print_cb, &ps); goto out; } @@ -84,31 +301,35 @@ int cmd_list(int argc, const char **argv) char *sep, *s; if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(NULL, NULL, raw_dump); + print_tracepoint_events(&print_cb, &ps); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_symbol_events(NULL, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); + print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { - print_symbol_events(NULL, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); - print_tool_events(NULL, raw_dump); + print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_tool_events(&print_cb, &ps); } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, raw_dump); + print_hwcache_events(&print_cb, &ps); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag, details_flag, - deprecated, pmu_name); + print_pmu_events(&print_cb, &ps); else if (strcmp(argv[i], "sdt") == 0) - print_sdt_events(NULL, NULL, raw_dump); - else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) - metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name); - else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) - metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name); - else if ((sep = strchr(argv[i], ':')) != NULL) { + print_sdt_events(&print_cb, &ps); + else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) { + ps.metricgroups = false; + ps.metrics = true; + metricgroup__print(&print_cb, &ps); + } else if (strcmp(argv[i], "metricgroup") == 0 || + strcmp(argv[i], "metricgroups") == 0) { + ps.metricgroups = true; + ps.metrics = false; + metricgroup__print(&print_cb, &ps); + } else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; + char *old_pmu_glob = ps.pmu_glob; sep_idx = sep - argv[i]; s = strdup(argv[i]); @@ -118,34 +339,42 @@ int cmd_list(int argc, const char **argv) } s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, raw_dump); - print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + ps.pmu_glob = s; + ps.event_glob = s + sep_idx + 1; + print_tracepoint_events(&print_cb, &ps); + print_sdt_events(&print_cb, &ps); + ps.metrics = true; + ps.metricgroups = true; + metricgroup__print(&print_cb, &ps); free(s); + ps.pmu_glob = old_pmu_glob; } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { printf("Critical: Not enough memory! Trying to continue...\n"); continue; } - print_symbol_events(s, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); - print_symbol_events(s, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); - print_tool_events(s, raw_dump); - print_hwcache_events(s, raw_dump); - print_pmu_events(s, raw_dump, !desc_flag, - long_desc_flag, - details_flag, - deprecated, - pmu_name); - print_tracepoint_events(NULL, s, raw_dump); - print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + ps.event_glob = s; + print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_tool_events(&print_cb, &ps); + print_hwcache_events(&print_cb, &ps); + print_pmu_events(&print_cb, &ps); + print_tracepoint_events(&print_cb, &ps); + print_sdt_events(&print_cb, &ps); + ps.metrics = true; + ps.metricgroups = true; + metricgroup__print(&print_cb, &ps); free(s); } } out: - free(pmu_name); + print_cb.print_end(&ps); + free(ps.pmu_glob); + free(ps.last_topic); + free(ps.last_metricgroups); + strlist__delete(ps.visited_metrics); return ret; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index cf9e2452d322..6eac7a60ed27 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -12,6 +12,7 @@ #include "strbuf.h" #include "pmu.h" #include "pmu-hybrid.h" +#include "print-events.h" #include "expr.h" #include "rblist.h" #include @@ -353,56 +354,41 @@ static bool match_pe_metric(const struct pmu_event *pe, const char *metric) match_metric(pe->metric_name, metric); } +/** struct mep - RB-tree node for building printing information. */ struct mep { + /** nd - RB-tree element. */ struct rb_node nd; - const char *name; - struct strlist *metrics; + /** @metric_group: Owned metric group name, separated others with ';'. */ + char *metric_group; + const char *metric_name; + const char *metric_desc; + const char *metric_long_desc; + const char *metric_expr; + const char *metric_unit; }; static int mep_cmp(struct rb_node *rb_node, const void *entry) { struct mep *a = container_of(rb_node, struct mep, nd); struct mep *b = (struct mep *)entry; + int ret; - return strcmp(a->name, b->name); + ret = strcmp(a->metric_group, b->metric_group); + if (ret) + return ret; + + return strcmp(a->metric_name, b->metric_name); } -static struct rb_node *mep_new(struct rblist *rl __maybe_unused, - const void *entry) +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) { struct mep *me = malloc(sizeof(struct mep)); if (!me) return NULL; - memcpy(me, entry, sizeof(struct mep)); - me->name = strdup(me->name); - if (!me->name) - goto out_me; - me->metrics = strlist__new(NULL, NULL); - if (!me->metrics) - goto out_name; - return &me->nd; -out_name: - zfree(&me->name); -out_me: - free(me); - return NULL; -} -static struct mep *mep_lookup(struct rblist *groups, const char *name) -{ - struct rb_node *nd; - struct mep me = { - .name = name - }; - nd = rblist__find(groups, &me); - if (nd) - return container_of(nd, struct mep, nd); - rblist__add_node(groups, &me); - nd = rblist__find(groups, &me); - if (nd) - return container_of(nd, struct mep, nd); - return NULL; + memcpy(me, entry, sizeof(struct mep)); + return &me->nd; } static void mep_delete(struct rblist *rl __maybe_unused, @@ -410,102 +396,61 @@ static void mep_delete(struct rblist *rl __maybe_unused, { struct mep *me = container_of(nd, struct mep, nd); - strlist__delete(me->metrics); - zfree(&me->name); + zfree(&me->metric_group); free(me); } -static void metricgroup__print_strlist(struct strlist *metrics, bool raw) +static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, + const char *metric_name) { - struct str_node *sn; - int n = 0; - - strlist__for_each_entry (sn, metrics) { - if (raw) - printf("%s%s", n > 0 ? " " : "", sn->s); - else - printf(" %s\n", sn->s); - n++; + struct rb_node *nd; + struct mep me = { + .metric_group = strdup(metric_group), + .metric_name = metric_name, + }; + nd = rblist__find(groups, &me); + if (nd) { + free(me.metric_group); + return container_of(nd, struct mep, nd); } - if (raw) - putchar('\n'); + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; } -static int metricgroup__print_pmu_event(const struct pmu_event *pe, - bool metricgroups, char *filter, - bool raw, bool details, - struct rblist *groups, - struct strlist *metriclist) +static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, + struct rblist *groups) { const char *g; char *omg, *mg; - g = pe->metric_group; - if (!g && pe->metric_name) { - if (pe->name) - return 0; - g = "No_group"; - } - - if (!g) - return 0; - - mg = strdup(g); - + mg = strdup(pe->metric_group ?: "No_group"); if (!mg) return -ENOMEM; omg = mg; while ((g = strsep(&mg, ";")) != NULL) { struct mep *me; - char *s; g = skip_spaces(g); - if (*g == 0) - g = "No_group"; - if (filter && !strstr(g, filter)) - continue; - if (raw) - s = (char *)pe->metric_name; - else { - if (asprintf(&s, "%s\n%*s%s]", - pe->metric_name, 8, "[", pe->desc) < 0) - return -1; - if (details) { - if (asprintf(&s, "%s\n%*s%s]", - s, 8, "[", pe->metric_expr) < 0) - return -1; - } + if (strlen(g)) + me = mep_lookup(groups, g, pe->metric_name); + else + me = mep_lookup(groups, "No_group", pe->metric_name); + + if (me) { + me->metric_desc = pe->desc; + me->metric_long_desc = pe->long_desc; + me->metric_expr = pe->metric_expr; + me->metric_unit = pe->unit; } - - if (!s) - continue; - - if (!metricgroups) { - strlist__add(metriclist, s); - } else { - me = mep_lookup(groups, g); - if (!me) - continue; - strlist__add(me->metrics, s); - } - - if (!raw) - free(s); } free(omg); return 0; } -struct metricgroup_print_sys_idata { - struct strlist *metriclist; - char *filter; - struct rblist *groups; - bool metricgroups; - bool raw; - bool details; -}; - struct metricgroup_iter_data { pmu_event_iter_fn fn; void *data; @@ -528,61 +473,26 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe, return d->fn(pe, table, d->data); } - return 0; } -static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *data) +static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe, + const struct pmu_events_table *table __maybe_unused, + void *vdata) { - struct metricgroup_print_sys_idata *d = data; + struct rblist *groups = vdata; - return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw, - d->details, d->groups, d->metriclist); -} - -struct metricgroup_print_data { - const char *pmu_name; - struct strlist *metriclist; - char *filter; - struct rblist *groups; - bool metricgroups; - bool raw; - bool details; -}; - -static int metricgroup__print_callback(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *vdata) -{ - struct metricgroup_print_data *data = vdata; - const char *pmu = pe->pmu ?: "cpu"; - - if (!pe->metric_expr) + if (!pe->metric_name) return 0; - if (data->pmu_name && strcmp(data->pmu_name, pmu)) - return 0; - - return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, - data->raw, data->details, data->groups, - data->metriclist); + return metricgroup__add_to_mep_groups(pe, groups); } -void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details, const char *pmu_name) +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) { struct rblist groups; - struct rb_node *node, *next; - struct strlist *metriclist = NULL; const struct pmu_events_table *table; - - if (!metricgroups) { - metriclist = strlist__new(NULL, NULL); - if (!metriclist) - return; - } + struct rb_node *node, *next; rblist__init(&groups); groups.node_new = mep_new; @@ -590,56 +500,31 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_delete = mep_delete; table = pmu_events_table__find(); if (table) { - struct metricgroup_print_data data = { - .pmu_name = pmu_name, - .metriclist = metriclist, - .metricgroups = metricgroups, - .filter = filter, - .raw = raw, - .details = details, - .groups = &groups, - }; - pmu_events_table_for_each_event(table, - metricgroup__print_callback, - &data); + metricgroup__add_to_mep_groups_callback, + &groups); } { struct metricgroup_iter_data data = { - .fn = metricgroup__print_sys_event_iter, - .data = (void *) &(struct metricgroup_print_sys_idata){ - .metriclist = metriclist, - .metricgroups = metricgroups, - .filter = filter, - .raw = raw, - .details = details, - .groups = &groups, - }, + .fn = metricgroup__add_to_mep_groups_callback, + .data = &groups, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } - if (!filter || !rblist__empty(&groups)) { - if (metricgroups && !raw) - printf("\nMetric Groups:\n\n"); - else if (metrics && !raw) - printf("\nMetrics:\n\n"); - } - for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); - if (metricgroups) - printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); - if (metrics) - metricgroup__print_strlist(me->metrics, raw); + print_cb->print_metric(print_state, + me->metric_group, + me->metric_name, + me->metric_desc, + me->metric_long_desc, + me->metric_expr, + me->metric_unit); next = rb_next(node); rblist__remove_node(&groups, node); } - if (!metricgroups) - metricgroup__print_strlist(metriclist, raw); - strlist__delete(metriclist); } static const char *code_characters = ",-=@"; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 732d3a0d3334..0013cf582173 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -10,6 +10,7 @@ struct evlist; struct evsel; struct option; +struct print_callbacks; struct rblist; struct cgroup; @@ -78,8 +79,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metric_no_merge, struct rblist *metric_events); -void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details, const char *pmu_name); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 075c82dd1347..e9a4f31926bf 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -23,6 +23,7 @@ #include "evsel.h" #include "pmu.h" #include "parse-events.h" +#include "print-events.h" #include "header.h" #include "string2.h" #include "strbuf.h" @@ -1579,13 +1580,6 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, return buf; } -static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu, - const struct perf_pmu_alias *alias) -{ - snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name); - return buf; -} - /** Struct for ordering events as output in perf list. */ struct sevent { /** PMU for event. */ @@ -1629,7 +1623,7 @@ static int cmp_sevent(const void *a, const void *b) /* Order CPU core events to be first */ if (as->is_cpu != bs->is_cpu) - return bs->is_cpu - as->is_cpu; + return as->is_cpu ? -1 : 1; /* Order by PMU name. */ a_pmu_name = as->pmu->name ?: ""; @@ -1642,27 +1636,6 @@ static int cmp_sevent(const void *a, const void *b) return strcmp(a_name, b_name); } -static void wordwrap(char *s, int start, int max, int corr) -{ - int column = start; - int n; - - while (*s) { - int wlen = strcspn(s, " \t"); - - if (column + wlen >= max && column > start) { - printf("\n%*s", start, ""); - column = start + corr; - } - n = printf("%s%.*s", column > start ? " " : "", wlen, s); - if (n <= 0) - break; - s += wlen; - column += n; - s = skip_spaces(s); - } -} - bool is_pmu_core(const char *name) { return !strcmp(name, "cpu") || is_arm_pmu_core(name); @@ -1685,24 +1658,19 @@ static bool pmu_alias_is_duplicate(struct sevent *alias_a, return strcmp(a_pmu_name, b_pmu_name) == 0; } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) +void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) { struct perf_pmu *pmu; - struct perf_pmu_alias *alias; + struct perf_pmu_alias *event; char buf[1024]; int printed = 0; int len, j; struct sevent *aliases; - int numdesc = 0; - int columns = pager_get_columns(); - char *topic = NULL; pmu = NULL; len = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) + list_for_each_entry(event, &pmu->aliases, list) len++; if (pmu->selectable) len++; @@ -1715,32 +1683,15 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - bool is_cpu; + bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); - if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) - continue; - - is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); - - list_for_each_entry(alias, &pmu->aliases, list) { - if (alias->deprecated && !deprecated) - continue; - - if (event_glob != NULL && - !(strglobmatch_nocase(alias->name, event_glob) || - (!is_cpu && - strglobmatch_nocase(alias->name, event_glob)) || - (alias->topic && - strglobmatch_nocase(alias->topic, event_glob)))) - continue; - - aliases[j].event = alias; + list_for_each_entry(event, &pmu->aliases, list) { + aliases[j].event = event; aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; j++; } - if (pmu->selectable && - (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { + if (pmu->selectable) { aliases[j].event = NULL; aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; @@ -1750,7 +1701,12 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { - char *name, *desc; + const char *name, *alias = NULL, *scale_unit = NULL, + *desc = NULL, *long_desc = NULL, + *encoding_desc = NULL, *topic = NULL, + *metric_name = NULL, *metric_expr = NULL; + bool deprecated = false; + size_t buf_used; /* Skip duplicates */ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) @@ -1758,48 +1714,51 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (!aliases[j].event) { /* A selectable event. */ - snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name); + buf_used = snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name) + 1; name = buf; - } else if (aliases[j].event->desc) { - name = aliases[j].event->name; } else { - if (!name_only && aliases[j].is_cpu) { - name = format_alias_or(buf, sizeof(buf), aliases[j].pmu, - aliases[j].event); + if (aliases[j].event->desc) { + name = aliases[j].event->name; + buf_used = 0; } else { name = format_alias(buf, sizeof(buf), aliases[j].pmu, aliases[j].event); + if (aliases[j].is_cpu) { + alias = name; + name = aliases[j].event->name; + } + buf_used = strlen(buf) + 1; } - } - if (name_only) { - printf("%s ", name); - continue; - } - printed++; - if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) { - printf(" %-50s [Kernel PMU event]\n", name); - continue; - } - if (numdesc++ == 0) - printf("\n"); - if (aliases[j].event->topic && (!topic || - strcmp(topic, aliases[j].event->topic))) { - printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic); + if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) { + scale_unit = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%G%s", aliases[j].event->scale, + aliases[j].event->unit) + 1; + } + desc = aliases[j].event->desc; + long_desc = aliases[j].event->long_desc; topic = aliases[j].event->topic; + encoding_desc = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%s/%s/", aliases[j].pmu->name, + aliases[j].event->str) + 1; + metric_name = aliases[j].event->metric_name; + metric_expr = aliases[j].event->metric_expr; + deprecated = aliases[j].event->deprecated; } - printf(" %-50s\n", name); - printf("%*s", 8, "["); - desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc; - wordwrap(desc, 8, columns, 0); - printf("]\n"); - if (details_flag) { - printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str); - if (aliases[j].event->metric_name) - printf(" MetricName: %s", aliases[j].event->metric_name); - if (aliases[j].event->metric_expr) - printf(" MetricExpr: %s", aliases[j].event->metric_expr); - putchar('\n'); - } + print_cb->print_event(print_state, + aliases[j].pmu->name, + topic, + name, + alias, + scale_unit, + deprecated, + "Kernel PMU event", + desc, + long_desc, + encoding_desc, + metric_name, + metric_expr); } if (printed && pager_in_use()) printf("\n"); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ee02e1ef9187..69ca0004f94f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -12,6 +12,7 @@ struct evsel_config_term; struct perf_cpu_map; +struct print_callbacks; enum { PERF_PMU_FORMAT_VALUE_CONFIG, @@ -225,9 +226,7 @@ void perf_pmu__del_formats(struct list_head *formats); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); -void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, - bool deprecated, const char *pmu_name); +void print_pmu_events(const struct print_callbacks *print_cb, void *print_state); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index d53dba033597..2646ae18d9f9 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -28,6 +28,7 @@ #define MAX_NAME_LEN 100 +/** Strings corresponding to enum perf_type_id. */ static const char * const event_type_descriptors[] = { "Hardware event", "Software event", @@ -55,11 +56,9 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { /* * Print the events from /tracing/events */ -void print_tracepoint_events(const char *subsys_glob, - const char *event_glob, bool name_only) +void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state) { struct dirent **sys_namelist = NULL; - bool printed = false; int sys_items = tracing_events__scandir_alphasort(&sys_namelist); for (int i = 0; i < sys_items; i++) { @@ -73,10 +72,6 @@ void print_tracepoint_events(const char *subsys_glob, !strcmp(sys_dirent->d_name, "..")) continue; - if (subsys_glob != NULL && - !strglobmatch(sys_dirent->d_name, subsys_glob)) - continue; - dir_path = get_events_file(sys_dirent->d_name); if (!dir_path) continue; @@ -94,41 +89,41 @@ void print_tracepoint_events(const char *subsys_glob, if (tp_event_has_id(dir_path, evt_dirent) != 0) continue; - if (event_glob != NULL && - !strglobmatch(evt_dirent->d_name, event_glob)) - continue; - snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); - if (name_only) - printf("%s ", evt_path); - else { - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - } - printed = true; + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + evt_path, + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + "Tracepoint event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } free(dir_path); free(evt_namelist); } free(sys_namelist); - if (printed && pager_in_use()) - printf("\n"); } -void print_sdt_events(const char *subsys_glob, const char *event_glob, - bool name_only) +void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) { - struct probe_cache *pcache; - struct probe_cache_entry *ent; struct strlist *bidlist, *sdtlist; - struct strlist_config cfg = {.dont_dupstr = true}; - struct str_node *nd, *nd2; - char *buf, *path, *ptr = NULL; - bool show_detail = false; - int ret; + struct str_node *bid_nd, *sdt_name, *next_sdt_name; + const char *last_sdt_name = NULL; - sdtlist = strlist__new(NULL, &cfg); + /* + * The implicitly sorted sdtlist will hold the tracepoint name followed + * by @. If the tracepoint name is unique (determined by + * looking at the adjacent nodes) the @ is dropped otherwise + * the executable path and buildid are added to the name. + */ + sdtlist = strlist__new(NULL, NULL); if (!sdtlist) { pr_debug("Failed to allocate new strlist for SDT\n"); return; @@ -138,65 +133,78 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, pr_debug("Failed to get buildids: %d\n", errno); return; } - strlist__for_each_entry(nd, bidlist) { - pcache = probe_cache__new(nd->s, NULL); + strlist__for_each_entry(bid_nd, bidlist) { + struct probe_cache *pcache; + struct probe_cache_entry *ent; + + pcache = probe_cache__new(bid_nd->s, NULL); if (!pcache) continue; list_for_each_entry(ent, &pcache->entries, node) { - if (!ent->sdt) - continue; - if (subsys_glob && - !strglobmatch(ent->pev.group, subsys_glob)) - continue; - if (event_glob && - !strglobmatch(ent->pev.event, event_glob)) - continue; - ret = asprintf(&buf, "%s:%s@%s", ent->pev.group, - ent->pev.event, nd->s); - if (ret > 0) - strlist__add(sdtlist, buf); + char buf[1024]; + + snprintf(buf, sizeof(buf), "%s:%s@%s", + ent->pev.group, ent->pev.event, bid_nd->s); + strlist__add(sdtlist, buf); } probe_cache__delete(pcache); } strlist__delete(bidlist); - strlist__for_each_entry(nd, sdtlist) { - buf = strchr(nd->s, '@'); - if (buf) - *(buf++) = '\0'; - if (name_only) { - printf("%s ", nd->s); - continue; - } - nd2 = strlist__next(nd); - if (nd2) { - ptr = strchr(nd2->s, '@'); - if (ptr) - *ptr = '\0'; - if (strcmp(nd->s, nd2->s) == 0) - show_detail = true; - } - if (show_detail) { - path = build_id_cache__origname(buf); - ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf); - if (ret > 0) { - printf(" %-50s [%s]\n", buf, "SDT event"); - free(buf); + strlist__for_each_entry(sdt_name, sdtlist) { + bool show_detail = false; + char *bid = strchr(sdt_name->s, '@'); + char *evt_name = NULL; + + if (bid) + *(bid++) = '\0'; + + if (last_sdt_name && !strcmp(last_sdt_name, sdt_name->s)) { + show_detail = true; + } else { + next_sdt_name = strlist__next(sdt_name); + if (next_sdt_name) { + char *bid2 = strchr(next_sdt_name->s, '@'); + + if (bid2) + *bid2 = '\0'; + if (strcmp(sdt_name->s, next_sdt_name->s) == 0) + show_detail = true; + if (bid2) + *bid2 = '@'; } - free(path); - } else - printf(" %-50s [%s]\n", nd->s, "SDT event"); - if (nd2) { - if (strcmp(nd->s, nd2->s) != 0) - show_detail = false; - if (ptr) - *ptr = '@'; } + last_sdt_name = sdt_name->s; + + if (show_detail) { + char *path = build_id_cache__origname(bid); + + if (path) { + if (asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid) < 0) + evt_name = NULL; + free(path); + } + } + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + evt_name ?: sdt_name->s, + /*event_alias=*/NULL, + /*deprecated=*/false, + /*scale_unit=*/NULL, + "SDT event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + + free(evt_name); } strlist__delete(sdtlist); } -int print_hwcache_events(const char *event_glob, bool name_only) +int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) { struct strlist *evt_name_list = strlist__new(NULL, NULL); struct str_node *nd; @@ -216,9 +224,6 @@ int print_hwcache_events(const char *event_glob, bool name_only) char name[64]; __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (event_glob != NULL && !strglobmatch(name, event_glob)) - continue; - if (!perf_pmu__has_hybrid()) { if (is_event_supported(PERF_TYPE_HW_CACHE, type | (op << 8) | (i << 16))) @@ -240,55 +245,47 @@ int print_hwcache_events(const char *event_glob, bool name_only) } strlist__for_each_entry(nd, evt_name_list) { - if (name_only) { - printf("%s ", nd->s); - continue; - } - printf(" %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]); + print_cb->print_event(print_state, + "cache", + /*pmu_name=*/NULL, + nd->s, + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_HW_CACHE], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } - if (!strlist__empty(evt_name_list) && pager_in_use()) - printf("\n"); - strlist__delete(evt_name_list); return 0; } -static void print_tool_event(const struct event_symbol *syms, const char *event_glob, - bool name_only) +void print_tool_events(const struct print_callbacks *print_cb, void *print_state) { - if (syms->symbol == NULL) - return; - - if (event_glob && !(strglobmatch(syms->symbol, event_glob) || - (syms->alias && strglobmatch(syms->alias, event_glob)))) - return; - - if (name_only) - printf("%s ", syms->symbol); - else { - char name[MAX_NAME_LEN]; - - if (syms->alias && strlen(syms->alias)) - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); - else - strlcpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, "Tool event"); + // Start at 1 because the first enum entry means no tool event. + for (int i = 1; i < PERF_TOOL_MAX; ++i) { + print_cb->print_event(print_state, + "tool", + /*pmu_name=*/NULL, + event_symbols_tool[i].symbol, + event_symbols_tool[i].alias, + /*scale_unit=*/NULL, + /*deprecated=*/false, + "Tool event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } } -void print_tool_events(const char *event_glob, bool name_only) -{ - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) - print_tool_event(event_symbols_tool + i, event_glob, name_only); - - if (pager_in_use()) - printf("\n"); -} - -void print_symbol_events(const char *event_glob, unsigned int type, - struct event_symbol *syms, unsigned int max, - bool name_only) +void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, + unsigned int type, const struct event_symbol *syms, + unsigned int max) { struct strlist *evt_name_list = strlist__new(NULL, NULL); struct str_node *nd; @@ -305,10 +302,6 @@ void print_symbol_events(const char *event_glob, unsigned int type, if (syms[i].symbol == NULL) continue; - if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) || - (syms[i].alias && strglobmatch(syms[i].alias, event_glob)))) - continue; - if (!is_event_supported(type, i)) continue; @@ -322,63 +315,92 @@ void print_symbol_events(const char *event_glob, unsigned int type, } strlist__for_each_entry(nd, evt_name_list) { - if (name_only) { - printf("%s ", nd->s); - continue; - } - printf(" %-50s [%s]\n", nd->s, event_type_descriptors[type]); - } - if (!strlist__empty(evt_name_list) && pager_in_use()) - printf("\n"); + char *alias = strstr(nd->s, " OR "); + if (alias) { + *alias = '\0'; + alias += 4; + } + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + nd->s, + alias, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[type], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + } strlist__delete(evt_name_list); } /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) +void print_events(const struct print_callbacks *print_cb, void *print_state) { - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, name_only); + print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); - print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, name_only); - print_tool_events(event_glob, name_only); + print_tool_events(print_cb, print_state); - print_hwcache_events(event_glob, name_only); + print_hwcache_events(print_cb, print_state); - print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated, pmu_name); + print_pmu_events(print_cb, print_state); - if (event_glob != NULL) - return; + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "rNNN", + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_RAW], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); - if (!name_only) { - printf(" %-50s [%s]\n", - "rNNN", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" %-50s [%s]\n", - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - event_type_descriptors[PERF_TYPE_RAW]); - if (pager_in_use()) - printf(" (see 'man perf-list' on how to encode it)\n\n"); + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_RAW], + "(see 'man perf-list' on how to encode it)", + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); - printf(" %-50s [%s]\n", - "mem:[/len][:access]", - event_type_descriptors[PERF_TYPE_BREAKPOINT]); - if (pager_in_use()) - printf("\n"); - } + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "mem:[/len][:access]", + /*scale_unit=*/NULL, + /*event_alias=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_BREAKPOINT], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); - print_tracepoint_events(NULL, NULL, name_only); + print_tracepoint_events(print_cb, print_state); - print_sdt_events(NULL, NULL, name_only); + print_sdt_events(print_cb, print_state); - metricgroup__print(true, true, NULL, name_only, details_flag, - pmu_name); + metricgroup__print(print_cb, print_state); - print_libpfm_events(name_only, long_desc); + print_libpfm_events(print_cb, print_state); } diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 1da9910d83a6..c237e53c4487 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -2,21 +2,39 @@ #ifndef __PERF_PRINT_EVENTS_H #define __PERF_PRINT_EVENTS_H +#include #include struct event_symbol; -void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name); -int print_hwcache_events(const char *event_glob, bool name_only); -void print_sdt_events(const char *subsys_glob, const char *event_glob, - bool name_only); -void print_symbol_events(const char *event_glob, unsigned int type, - struct event_symbol *syms, unsigned int max, - bool name_only); -void print_tool_events(const char *event_glob, bool name_only); -void print_tracepoint_events(const char *subsys_glob, const char *event_glob, - bool name_only); +struct print_callbacks { + void (*print_start)(void *print_state); + void (*print_end)(void *print_state); + void (*print_event)(void *print_state, const char *topic, + const char *pmu_name, + const char *event_name, const char *event_alias, + const char *scale_unit, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr); + void (*print_metric)(void *print_state, + const char *group, + const char *name, + const char *desc, + const char *long_desc, + const char *expr, + const char *unit); +}; + +/** Print all events, the default when no options are specified. */ +void print_events(const struct print_callbacks *print_cb, void *print_state); +int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state); +void print_sdt_events(const struct print_callbacks *print_cb, void *print_state); +void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, + unsigned int type, const struct event_symbol *syms, + unsigned int max); +void print_tool_events(const struct print_callbacks *print_cb, void *print_state); +void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); #endif /* __PERF_PRINT_EVENTS_H */ From 6ed249441a7d3ead8e81cc926e68d5e7ae031032 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 13:43:37 -0300 Subject: [PATCH 2045/4122] perf list: Add JSON output option Output events and metrics in a JSON format by overriding the print callbacks. Currently other command line options aren't supported and metrics are repeated once per metric group. Committer testing: $ perf list cache List of pre-defined events (to be used in -e or -M): L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-prefetches [Hardware cache event] L1-icache-load-misses [Hardware cache event] L1-icache-loads [Hardware cache event] branch-load-misses [Hardware cache event] branch-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] iTLB-loads [Hardware cache event] $ perf list --json cache [ { "Unit": "cache", "EventName": "L1-dcache-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-dcache-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-dcache-prefetches", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-icache-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-icache-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "branch-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "branch-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "dTLB-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "dTLB-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "iTLB-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "iTLB-loads", "EventType": "Hardware cache event" } ] $ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 + tools/perf/builtin-list.c | 308 +++++++++++++++++++------ 2 files changed, 245 insertions(+), 67 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 44a819af573d..c5a3cb0f57c7 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -43,6 +43,10 @@ Print deprecated events. By default the deprecated events are hidden. Print PMU events and metrics limited to the specific PMU name. (e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom) +-j:: +--json:: +Output in JSON format. + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 0c84fdb3ad37..84fa2d050eac 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,6 +19,8 @@ #include "util/strlist.h" #include #include +#include +#include #include /** @@ -228,10 +230,176 @@ static void default_print_metric(void *ps, } } +struct json_print_state { + /** Should a separator be printed prior to the next item? */ + bool need_sep; +}; + +static void json_print_start(void *print_state __maybe_unused) +{ + printf("[\n"); +} + +static void json_print_end(void *ps) +{ + struct json_print_state *print_state = ps; + + printf("%s]\n", print_state->need_sep ? "\n" : ""); +} + +static void fix_escape_printf(const char *fmt, ...) +{ + va_list args; + char buf[2048]; + size_t buf_pos = 0; + + va_start(args, fmt); + for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) { + switch (fmt[fmt_pos]) { + case '%': { + const char *s = va_arg(args, const char*); + + fmt_pos++; + assert(fmt[fmt_pos] == 's'); + for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { + switch (s[s_pos]) { + case '\\': + __fallthrough; + case '\"': + buf[buf_pos++] = '\\'; + assert(buf_pos < sizeof(buf)); + __fallthrough; + default: + buf[buf_pos++] = s[s_pos]; + assert(buf_pos < sizeof(buf)); + break; + } + } + break; + } + default: + buf[buf_pos++] = fmt[fmt_pos]; + assert(buf_pos < sizeof(buf)); + break; + } + } + va_end(args); + buf[buf_pos] = '\0'; + fputs(buf, stdout); +} + +static void json_print_event(void *ps, const char *pmu_name, const char *topic, + const char *event_name, const char *event_alias, + const char *scale_unit, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr) +{ + struct json_print_state *print_state = ps; + bool need_sep = false; + + printf("%s{\n", print_state->need_sep ? ",\n" : ""); + print_state->need_sep = true; + if (pmu_name) { + fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name); + need_sep = true; + } + if (topic) { + fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic); + need_sep = true; + } + if (event_name) { + fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name); + need_sep = true; + } + if (event_alias && strlen(event_alias)) { + fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias); + need_sep = true; + } + if (scale_unit && strlen(scale_unit)) { + fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", + scale_unit); + need_sep = true; + } + if (event_type_desc) { + fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "", + event_type_desc); + need_sep = true; + } + if (deprecated) { + fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "", + deprecated ? "1" : "0"); + need_sep = true; + } + if (desc) { + fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + need_sep = true; + } + if (long_desc) { + fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + long_desc); + need_sep = true; + } + if (encoding_desc) { + fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc); + need_sep = true; + } + if (metric_name) { + fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name); + need_sep = true; + } + if (metric_expr) { + fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr); + need_sep = true; + } + printf("%s}", need_sep ? "\n" : ""); +} + +static void json_print_metric(void *ps __maybe_unused, const char *group, + const char *name, const char *desc, + const char *long_desc, const char *expr, + const char *unit) +{ + struct json_print_state *print_state = ps; + bool need_sep = false; + + printf("%s{\n", print_state->need_sep ? ",\n" : ""); + print_state->need_sep = true; + if (group) { + fix_escape_printf("\t\"MetricGroup\": \"%s\"", group); + need_sep = true; + } + if (name) { + fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name); + need_sep = true; + } + if (expr) { + fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr); + need_sep = true; + } + if (unit) { + fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit); + need_sep = true; + } + if (desc) { + fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + need_sep = true; + } + if (long_desc) { + fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + long_desc); + need_sep = true; + } + printf("%s}", need_sep ? "\n" : ""); +} + int cmd_list(int argc, const char **argv) { int i, ret = 0; - struct print_state ps = {}; + struct print_state default_ps = {}; + struct print_state json_ps = {}; + void *ps = &default_ps; struct print_callbacks print_cb = { .print_start = default_print_start, .print_end = default_print_end, @@ -240,15 +408,17 @@ int cmd_list(int argc, const char **argv) }; const char *hybrid_name = NULL; const char *unit_name = NULL; + bool json = false; struct option list_options[] = { - OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"), - OPT_BOOLEAN('d', "desc", &ps.desc, + OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"), + OPT_BOOLEAN('j', "json", &json, "JSON encode events and metrics"), + OPT_BOOLEAN('d', "desc", &default_ps.desc, "Print extra event descriptions. --no-desc to not print."), - OPT_BOOLEAN('v', "long-desc", &ps.long_desc, + OPT_BOOLEAN('v', "long-desc", &default_ps.long_desc, "Print longer event descriptions."), - OPT_BOOLEAN(0, "details", &ps.detailed, + OPT_BOOLEAN(0, "details", &default_ps.detailed, "Print information on the perf event names and expressions used internally by events."), - OPT_BOOLEAN(0, "deprecated", &ps.deprecated, + OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), @@ -272,28 +442,37 @@ int cmd_list(int argc, const char **argv) setup_pager(); - if (!ps.name_only) + if (!default_ps.name_only) setup_pager(); - ps.desc = !ps.long_desc; - ps.last_topic = strdup(""); - assert(ps.last_topic); - ps.visited_metrics = strlist__new(NULL, NULL); - assert(ps.visited_metrics); - if (unit_name) - ps.pmu_glob = strdup(unit_name); - else if (hybrid_name) { - ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); - if (!ps.pmu_glob) - pr_warning("WARNING: hybrid cputype is not supported!\n"); + if (json) { + print_cb = (struct print_callbacks){ + .print_start = json_print_start, + .print_end = json_print_end, + .print_event = json_print_event, + .print_metric = json_print_metric, + }; + ps = &json_ps; + } else { + default_ps.desc = !default_ps.long_desc; + default_ps.last_topic = strdup(""); + assert(default_ps.last_topic); + default_ps.visited_metrics = strlist__new(NULL, NULL); + assert(default_ps.visited_metrics); + if (unit_name) + default_ps.pmu_glob = strdup(unit_name); + else if (hybrid_name) { + default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); + if (!default_ps.pmu_glob) + pr_warning("WARNING: hybrid cputype is not supported!\n"); + } } - - print_cb.print_start(&ps); + print_cb.print_start(ps); if (argc == 0) { - ps.metrics = true; - ps.metricgroups = true; - print_events(&print_cb, &ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + print_events(&print_cb, ps); goto out; } @@ -301,80 +480,75 @@ int cmd_list(int argc, const char **argv) char *sep, *s; if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(&print_cb, &ps); + print_tracepoint_events(&print_cb, ps); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { - print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, &ps); + print_tool_events(&print_cb, ps); } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(&print_cb, &ps); + print_hwcache_events(&print_cb, ps); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(&print_cb, &ps); + print_pmu_events(&print_cb, ps); else if (strcmp(argv[i], "sdt") == 0) - print_sdt_events(&print_cb, &ps); + print_sdt_events(&print_cb, ps); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) { - ps.metricgroups = false; - ps.metrics = true; - metricgroup__print(&print_cb, &ps); + default_ps.metricgroups = false; + default_ps.metrics = true; + metricgroup__print(&print_cb, ps); } else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) { - ps.metricgroups = true; - ps.metrics = false; - metricgroup__print(&print_cb, &ps); + default_ps.metricgroups = true; + default_ps.metrics = false; + metricgroup__print(&print_cb, ps); } else if ((sep = strchr(argv[i], ':')) != NULL) { - int sep_idx; - char *old_pmu_glob = ps.pmu_glob; + char *old_pmu_glob = default_ps.pmu_glob; - sep_idx = sep - argv[i]; - s = strdup(argv[i]); - if (s == NULL) { + default_ps.event_glob = strdup(argv[i]); + if (!default_ps.event_glob) { ret = -1; goto out; } - s[sep_idx] = '\0'; - ps.pmu_glob = s; - ps.event_glob = s + sep_idx + 1; - print_tracepoint_events(&print_cb, &ps); - print_sdt_events(&print_cb, &ps); - ps.metrics = true; - ps.metricgroups = true; - metricgroup__print(&print_cb, &ps); - free(s); - ps.pmu_glob = old_pmu_glob; + print_tracepoint_events(&print_cb, ps); + print_sdt_events(&print_cb, ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + metricgroup__print(&print_cb, ps); + zfree(&default_ps.event_glob); + default_ps.pmu_glob = old_pmu_glob; } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { printf("Critical: Not enough memory! Trying to continue...\n"); continue; } - ps.event_glob = s; - print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + default_ps.event_glob = s; + print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, &ps); - print_hwcache_events(&print_cb, &ps); - print_pmu_events(&print_cb, &ps); - print_tracepoint_events(&print_cb, &ps); - print_sdt_events(&print_cb, &ps); - ps.metrics = true; - ps.metricgroups = true; - metricgroup__print(&print_cb, &ps); + print_tool_events(&print_cb, ps); + print_hwcache_events(&print_cb, ps); + print_pmu_events(&print_cb, ps); + print_tracepoint_events(&print_cb, ps); + print_sdt_events(&print_cb, ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + metricgroup__print(&print_cb, ps); free(s); } } out: - print_cb.print_end(&ps); - free(ps.pmu_glob); - free(ps.last_topic); - free(ps.last_metricgroups); - strlist__delete(ps.visited_metrics); + print_cb.print_end(ps); + free(default_ps.pmu_glob); + free(default_ps.last_topic); + free(default_ps.last_metricgroups); + strlist__delete(default_ps.visited_metrics); return ret; } From eadcab4c7a66e1df03d32da0db55d89fd9343fcc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:33 +0000 Subject: [PATCH 2046/4122] perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number This patch defines a macro RAW_SYSCALL_ARGS_NUM to replace the open coded number '6'. Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3257da5cad23..22008a31684b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -88,6 +88,8 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +#define RAW_SYSCALL_ARGS_NUM 6 + /* * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 */ @@ -108,7 +110,7 @@ struct syscall_fmt { const char *sys_enter, *sys_exit; } bpf_prog_name; - struct syscall_arg_fmt arg[6]; + struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM]; u8 nr_args; bool errpid; bool timeout; @@ -1229,7 +1231,7 @@ struct syscall { */ struct bpf_map_syscall_entry { bool enabled; - u16 string_args_len[6]; + u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; }; /* @@ -1661,7 +1663,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) { int idx; - if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) + if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0) nr_args = sc->fmt->nr_args; sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); @@ -1812,7 +1814,8 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; if (IS_ERR(sc->tp_format)) From d4223e1776c30b2ce8d0e6eaadcbf696e60fca3c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:34 +0000 Subject: [PATCH 2047/4122] perf trace: Return error if a system call doesn't exist When a system call is not detected, the reason is either because the system call ID is out of scope or failure to find the corresponding path in the sysfs, trace__read_syscall_info() returns zero. Finally, without returning an error value it introduces confusion for the caller. This patch lets the function trace__read_syscall_info() to return -EEXIST when a system call doesn't exist. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 22008a31684b..bd5513b15cde 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1796,11 +1796,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) #endif sc = trace->syscalls.table + id; if (sc->nonexistent) - return 0; + return -EEXIST; if (name == NULL) { sc->nonexistent = true; - return 0; + return -EEXIST; } sc->name = name; From 03e9a5d8eb552a1bf692a9c8a5ecd50f4e428006 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:35 +0000 Subject: [PATCH 2048/4122] perf trace: Handle failure when trace point folder is missed On Arm64 a case is perf tools fails to find the corresponding trace point folder for system calls listed in the table 'syscalltbl_arm64', e.g. the generated system call table contains "lookup_dcookie" but we cannot find out the matched trace point folder for it. We need to figure out if there have any issue for the generated system call table, on the other hand, we need to handle the case when trace point folder is missed under sysfs, this patch sets the flag syscall::nonexistent as true and returns the error from trace__read_syscall_info(). Another problem is for trace__syscall_info(), it returns two different values if a system call doesn't exist: at the first time calling trace__syscall_info() it returns NULL when the system call doesn't exist, later if call trace__syscall_info() again for the same missed system call, it returns pointer of syscall. trace__syscall_info() checks the condition 'syscalls.table[id].name == NULL', but the name will be assigned in the first invoking even the system call is not found. So checking system call's name in trace__syscall_info() is not the right thing to do, this patch simply checks flag syscall::nonexistent to make decision if a system call exists or not, finally trace__syscall_info() returns the consistent result (NULL) if a system call doesn't existed. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bd5513b15cde..071e7598391f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1814,13 +1814,19 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } + /* + * Fails to read trace point format via sysfs node, so the trace point + * doesn't exist. Set the 'nonexistent' flag as true. + */ + if (IS_ERR(sc->tp_format)) { + sc->nonexistent = true; + return PTR_ERR(sc->tp_format); + } + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; - if (IS_ERR(sc->tp_format)) - return PTR_ERR(sc->tp_format); - sc->args = sc->tp_format->format.fields; /* * We need to check and discard the first variable '__syscall_nr' @@ -2137,11 +2143,8 @@ static struct syscall *trace__syscall_info(struct trace *trace, (err = trace__read_syscall_info(trace, id)) != 0) goto out_cant_read; - if (trace->syscalls.table[id].name == NULL) { - if (trace->syscalls.table[id].nonexistent) - return NULL; + if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) goto out_cant_read; - } return &trace->syscalls.table[id]; From 9bc427a0613da358f56fe499c690d97ce5d1af26 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:36 +0000 Subject: [PATCH 2049/4122] perf augmented_raw_syscalls: Remove unused variable 'syscall' The local variable 'syscall' is not used anymore, remove it. Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_raw_syscalls.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 7dc24c9173a7..4203f92c063b 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -389,7 +389,6 @@ int sys_enter(struct syscall_enter_args *args) * initial, non-augmented raw_syscalls:sys_enter payload. */ unsigned int len = sizeof(augmented_args->args); - struct syscall *syscall; if (pid_filter__has(&pids_filtered, getpid())) return 0; From 8daf87f5922730468c98ae588573386042bc2992 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:37 +0000 Subject: [PATCH 2050/4122] perf trace: Remove unused bpf map 'syscalls' augmented_raw_syscalls.c defines the bpf map 'syscalls' which is initialized by perf tool in user space to indicate which system calls are enabled for tracing, on the other flip eBPF program relies on the map to filter out the trace events which are not enabled. The map also includes a field 'string_args_len[6]' which presents the string length if the corresponding argument is a string type. Now the map 'syscalls' is not used, bpf program doesn't use it as filter anymore, this is replaced by using the function bpf_tail_call() and PROG_ARRAY syscalls map. And we don't need to explicitly set the string length anymore, bpf_probe_read_str() is smart to copy the string and return string length. Therefore, it's safe to remove the bpf map 'syscalls'. To consolidate the code, this patch removes the definition of map 'syscalls' from augmented_raw_syscalls.c and drops code for using the map in the perf trace. Note, since function trace__set_ev_qualifier_bpf_filter() is removed, calling trace__init_syscall_bpf_progs() from it is also removed. We don't need to worry it because trace__init_syscall_bpf_progs() is still invoked from trace__init_syscalls_bpf_prog_array_maps() for initialization the system call's bpf program callback. After: # perf trace -e examples/bpf/augmented_raw_syscalls.c,open* --max-events 10 perf stat --quiet sleep 0.001 openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libdw.so.1", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind.so.8", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind-aarch64.so.8", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libcrypto.so.3", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libslang.so.2", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libperl.so.5.34", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 # perf trace -e examples/bpf/augmented_raw_syscalls.c --max-events 10 perf stat --quiet sleep 0.001 ... [continued]: execve()) = 0 brk(NULL) = 0xaaaab1d28000 faccessat(-100, "/etc/ld.so.preload", 4) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 close(3) = 0 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3 read(3, 0xfffff33f70d0, 832) = 832 munmap(0xffffb5519000, 28672) = 0 munmap(0xffffb55b7000, 32880) = 0 mprotect(0xffffb55a6000, 61440, PROT_NONE) = 0 Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 101 ------------------ .../examples/bpf/augmented_raw_syscalls.c | 17 --- 2 files changed, 118 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 071e7598391f..543c379d2a57 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -122,7 +122,6 @@ struct trace { struct syscalltbl *sctbl; struct { struct syscall *table; - struct bpf_map *map; struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY struct bpf_map *sys_enter, *sys_exit; @@ -1224,16 +1223,6 @@ struct syscall { struct syscall_arg_fmt *arg_fmt; }; -/* - * Must match what is in the BPF program: - * - * tools/perf/examples/bpf/augmented_raw_syscalls.c - */ -struct bpf_map_syscall_entry { - bool enabled; - u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; -}; - /* * We need to have this 'calculated' boolean because in some cases we really * don't know what is the duration of a syscall, for instance, when we start @@ -3259,7 +3248,6 @@ static void trace__set_bpf_map_filtered_pids(struct trace *trace) static void trace__set_bpf_map_syscalls(struct trace *trace) { - trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); } @@ -3339,80 +3327,6 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); } -static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) -{ - struct syscall *sc = trace__syscall_info(trace, NULL, id); - int arg = 0; - - if (sc == NULL) - goto out; - - for (; arg < sc->nr_args; ++arg) { - entry->string_args_len[arg] = 0; - if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { - /* Should be set like strace -s strsize */ - entry->string_args_len[arg] = PATH_MAX; - } - } -out: - for (; arg < 6; ++arg) - entry->string_args_len[arg] = 0; -} -static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) -{ - int fd = bpf_map__fd(trace->syscalls.map); - struct bpf_map_syscall_entry value = { - .enabled = !trace->not_ev_qualifier, - }; - int err = 0; - size_t i; - - for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { - int key = trace->ev_qualifier_ids.entries[i]; - - if (value.enabled) { - trace__init_bpf_map_syscall_args(trace, key, &value); - trace__init_syscall_bpf_progs(trace, key); - } - - err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); - if (err) - break; - } - - return err; -} - -static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled) -{ - int fd = bpf_map__fd(trace->syscalls.map); - struct bpf_map_syscall_entry value = { - .enabled = enabled, - }; - int err = 0, key; - - for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { - if (enabled) - trace__init_bpf_map_syscall_args(trace, key, &value); - - err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); - if (err) - break; - } - - return err; -} - -static int trace__init_syscalls_bpf_map(struct trace *trace) -{ - bool enabled = true; - - if (trace->ev_qualifier_ids.nr) - enabled = trace->not_ev_qualifier; - - return __trace__init_syscalls_bpf_map(trace, enabled); -} - static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) { struct tep_format_field *field, *candidate_field; @@ -3627,16 +3541,6 @@ static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused) { } -static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) -{ - return 0; -} - -static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused) -{ - return 0; -} - static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, const char *name __maybe_unused) { @@ -3670,8 +3574,6 @@ static bool trace__only_augmented_syscalls_evsels(struct trace *trace) static int trace__set_ev_qualifier_filter(struct trace *trace) { - if (trace->syscalls.map) - return trace__set_ev_qualifier_bpf_filter(trace); if (trace->syscalls.events.sys_enter) return trace__set_ev_qualifier_tp_filter(trace); return 0; @@ -4045,9 +3947,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_mem; - if (trace->syscalls.map) - trace__init_syscalls_bpf_map(trace); - if (trace->syscalls.prog_array.sys_enter) trace__init_syscalls_bpf_prog_array_maps(trace); diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 4203f92c063b..9a03189d33d3 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -37,23 +37,6 @@ struct __augmented_syscalls__ { __uint(max_entries, __NR_CPUS__); } __augmented_syscalls__ SEC(".maps"); -/* - * string_args_len: one per syscall arg, 0 means not a string or don't copy it, - * PATH_MAX for copying everything, any other value to limit - * it a la 'strace -s strsize'. - */ -struct syscall { - bool enabled; - __u16 string_args_len[6]; -}; - -struct syscalls { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, struct syscall); - __uint(max_entries, 512); -} syscalls SEC(".maps"); - /* * What to augment at entry? * From 7d54a4acd8c1de3ea70d31424757dcdb7f0a231a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 21 Nov 2022 15:57:47 +0530 Subject: [PATCH 2051/4122] perf test: Skip watchpoint tests if no watchpoints available On IBM Power9, perf watchpoint tests fail since no hardware breakpoints are available. Detect this by checking the error returned by perf_event_open() and skip the tests in that case. Reported-by: Disha Goel Signed-off-by: Naveen N. Rao Acked-by: Ian Rogers Reviewed-by: Kajol Jain Tested-by: Kajol Jain Link: https://lore.kernel.org/r/20221121102747.208289-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Ravi Bangoria Cc: Arnaldo Carvalho de Melo Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org --- tools/perf/tests/wp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 56455da30341..cc8719609b19 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -59,8 +59,10 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) get__perf_event_attr(&attr, wp_type, wp_addr, wp_len); fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { + fd = -errno; pr_debug("failed opening event %x\n", attr.bp_type); + } return fd; } @@ -77,7 +79,7 @@ static int test__wp_ro(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1); @@ -101,7 +103,7 @@ static int test__wp_wo(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0); @@ -126,7 +128,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1); @@ -150,7 +152,7 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest _ fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; data1 = tmp; WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1); From 19030564ab116757e3270a567fd9d5b20b411d74 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 18 Nov 2022 16:27:50 -0800 Subject: [PATCH 2052/4122] perf inject: Set PERF_RECORD_MISC_BUILD_ID_SIZE With perf inject -b, it synthesizes build-id event for DSOs. But it missed to set the size and resulted in having trailing zeros. As perf record sets the size in write_build_id(), let's set the size here as well. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221119002750.1568027-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cccd293b5312..0645795ff080 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2218,8 +2218,9 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); + ev.build_id.size = pos->bid.size; ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; - ev.build_id.header.misc = misc; + ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; ev.build_id.header.size = sizeof(ev.build_id) + len; memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); From 2e9f5bda2f036aa38312fbeb99ff7e19c0221578 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 22 Nov 2022 09:31:21 +0100 Subject: [PATCH 2053/4122] perf test: Fix record test on KVM guests Using precise flag with br_inst_retired.near_call causes the test fail on KVM guests, even when the guests have PMU forwarding enabled and the event itself is supported. Remove the precise flag in order to make the test work on KVM guests. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20221122083121.6012-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 4dff89e3a3fd..4fbc74805d52 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -83,7 +83,7 @@ test_register_capture() { echo "Register capture test [Skipped missing registers]" return fi - if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ + if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \ -c 1000 --per-thread ${testprog} 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | grep -q "DI:" From 7b7c22ccdf275018f715af95b7d052e8d4c6c690 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Nov 2022 11:30:18 +0000 Subject: [PATCH 2054/4122] MAINTAINERS: Update John Garry's email address for arm64 perf tooling Update my address. Signed-off-by: John Garry Acked-by: Will Deacon Cc: Ian Rogers Link: https://lore.kernel.org/r/20221121113018.1899426-1-john.g.garry@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2585e7edc335..170f1763557a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16200,7 +16200,7 @@ F: tools/lib/perf/ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 -R: John Garry +R: John Garry R: Will Deacon R: James Clark R: Mike Leach From c66a36af7ba3a628453da1d91f42ee64fb36ea5a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 18 Nov 2022 11:01:09 -0800 Subject: [PATCH 2055/4122] perf lock contention: Do not use BPF task local storage It caused some troubles when a lock inside kmalloc is contended because task local storage would allocate memory using kmalloc. It'd create a recusion and even crash in my system. There could be a couple of workarounds but I think the simplest one is to use a pre-allocated hash map. We could fix the task local storage to use the safe BPF allocator, but it takes time so let's change this until it happens actually. Signed-off-by: Namhyung Kim Acked-by: Martin KaFai Lau Cc: Adrian Hunter Cc: Blake Jones Cc: Chris Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221118190109.1512674-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 1 + .../perf/util/bpf_skel/lock_contention.bpf.c | 34 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 0deec1178778..4db9ad3d50c4 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -39,6 +39,7 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); if (target__has_cpu(target)) ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 1bb8628e7c9f..9681cb59b0df 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -40,10 +40,10 @@ struct { /* maintain timestamp at the beginning of contention */ struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, struct tstamp_data); + __uint(max_entries, MAX_ENTRIES); } tstamp SEC(".maps"); /* actual lock contention statistics */ @@ -103,18 +103,28 @@ static inline int can_record(void) SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { - struct task_struct *curr; + __u32 pid; struct tstamp_data *pelem; if (!enabled || !can_record()) return 0; - curr = bpf_get_current_task_btf(); - pelem = bpf_task_storage_get(&tstamp, curr, NULL, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!pelem || pelem->lock) + pid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &pid); + if (pelem && pelem->lock) return 0; + if (pelem == NULL) { + struct tstamp_data zero = {}; + + bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY); + pelem = bpf_map_lookup_elem(&tstamp, &pid); + if (pelem == NULL) { + lost++; + return 0; + } + } + pelem->timestamp = bpf_ktime_get_ns(); pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; @@ -128,7 +138,7 @@ int contention_begin(u64 *ctx) SEC("tp_btf/contention_end") int contention_end(u64 *ctx) { - struct task_struct *curr; + __u32 pid; struct tstamp_data *pelem; struct contention_key key; struct contention_data *data; @@ -137,8 +147,8 @@ int contention_end(u64 *ctx) if (!enabled) return 0; - curr = bpf_get_current_task_btf(); - pelem = bpf_task_storage_get(&tstamp, curr, NULL, 0); + pid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &pid); if (!pelem || pelem->lock != ctx[0]) return 0; @@ -156,7 +166,7 @@ int contention_end(u64 *ctx) }; bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); - pelem->lock = 0; + bpf_map_delete_elem(&tstamp, &pid); return 0; } @@ -169,7 +179,7 @@ int contention_end(u64 *ctx) if (data->min_time > duration) data->min_time = duration; - pelem->lock = 0; + bpf_map_delete_elem(&tstamp, &pid); return 0; } From 8749c27895a369a99e4a21709b3e3bec4785778f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 23 Sep 2022 22:39:13 +0800 Subject: [PATCH 2056/4122] habanalabs: fix return value check in hl_fw_get_sec_attest_data() If hl_cpu_accessible_dma_pool_alloc() fails, we should check 'req_cpu_addr', fix it. Fixes: 0c88760f8f5e ("habanalabs/gaudi2: add secured attestation info uapi") Signed-off-by: Yang Yingliang Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 2de6a9bd564d..f18e53bbba6b 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -2983,7 +2983,7 @@ static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void int rc; req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr); - if (!data) { + if (!req_cpu_addr) { dev_err(hdev->dev, "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id); return -ENOMEM; From a925d90b365aa38565191857bddc3c12d80fda96 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 23 Aug 2022 15:14:14 +0300 Subject: [PATCH 2057/4122] habanalabs: allow control device open during reset Monitoring apps would like to query device state at any time so we should allow it also during reset because it doesn't involve accessing the h/w. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 22 +++++++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ .../misc/habanalabs/common/habanalabs_drv.c | 4 ++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 233d8b46c831..1aaaa2004e34 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -355,6 +355,28 @@ bool hl_device_operational(struct hl_device *hdev, } } +bool hl_ctrl_device_operational(struct hl_device *hdev, + enum hl_device_status *status) +{ + enum hl_device_status current_status; + + current_status = hl_device_status(hdev); + if (status) + *status = current_status; + + switch (current_status) { + case HL_DEVICE_STATUS_MALFUNCTION: + return false; + case HL_DEVICE_STATUS_IN_RESET: + case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: + case HL_DEVICE_STATUS_NEEDS_RESET: + case HL_DEVICE_STATUS_OPERATIONAL: + case HL_DEVICE_STATUS_IN_DEVICE_CREATION: + default: + return true; + } +} + static void hpriv_release(struct kref *ref) { u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 58c95b13be69..2ffb8378f565 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3496,6 +3496,8 @@ int hl_device_open(struct inode *inode, struct file *filp); int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, enum hl_device_status *status); +bool hl_ctrl_device_operational(struct hl_device *hdev, + enum hl_device_status *status); enum hl_device_status hl_device_status(struct hl_device *hdev); int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable); int hl_hw_queues_create(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 112632afe7d5..3ee44ea58d5c 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -270,9 +270,9 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_ctrl_list_lock); - if (!hl_device_operational(hdev, NULL)) { + if (!hl_ctrl_device_operational(hdev, NULL)) { dev_dbg_ratelimited(hdev->dev_ctrl, - "Can't open %s because it is disabled or in reset\n", + "Can't open %s because it is disabled\n", dev_name(hdev->dev_ctrl)); rc = -EPERM; goto out_err; From ea73ef14ddf93b8b1ae6ce1963846f43a81bb510 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 22 Sep 2022 12:30:32 +0300 Subject: [PATCH 2058/4122] habanalabs: Use simplified API for p2p dist calc Use the simplified API that calculates distance between two devices. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index ef28f3b37b93..99b1d6ce26ae 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1689,7 +1689,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf, hl_dmabuf = dmabuf->priv; hdev = hl_dmabuf->ctx->hdev; - rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true); + rc = pci_p2pdma_distance(hdev->pdev, attachment->dev, true); if (rc < 0) attachment->peer2peer = false; From 52d5e5469526216bcc418f26a2796d5af6226023 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 19 Sep 2022 18:51:59 +0300 Subject: [PATCH 2059/4122] habanalabs: refactor razwi event notification This event notification was compatible only with gaudi, where razwi and page fault happens together. To make it compatible with all ASICs, this refactor contains: 1. Razwi notification will only notify about razwi info. New notification will be added in future patch, to retrieve data about page fault error. 2. Changed razwi info structure to support all ASICs. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 22 +++++++ drivers/misc/habanalabs/common/habanalabs.h | 31 ++-------- .../misc/habanalabs/common/habanalabs_drv.c | 2 +- .../misc/habanalabs/common/habanalabs_ioctl.c | 12 +--- drivers/misc/habanalabs/gaudi/gaudi.c | 60 +++++++++---------- include/uapi/misc/habanalabs.h | 45 +++++++++----- 6 files changed, 88 insertions(+), 84 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 1aaaa2004e34..30ddaaae67e5 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2253,3 +2253,25 @@ inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) { writel(val, hdev->rmmio + reg); } + +void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, + u8 flags) +{ + if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) { + dev_err(hdev->dev, + "Number of possible razwi initiators (%u) exceeded limit (%u)\n", + num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR); + return; + } + + /* In case it's the first razwi since the device was opened, capture its parameters */ + if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info_recorded, 0, 1)) + return; + + hdev->captured_err_info.razwi.timestamp = ktime_to_ns(ktime_get()); + hdev->captured_err_info.razwi.addr = addr; + hdev->captured_err_info.razwi.num_of_possible_engines = num_of_engines; + memcpy(&hdev->captured_err_info.razwi.engine_id[0], &engine_id[0], + num_of_engines * sizeof(u16)); + hdev->captured_err_info.razwi.flags = flags; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2ffb8378f565..cdc50c2c4de8 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2925,30 +2925,6 @@ struct cs_timeout_info { u64 seq; }; -/** - * struct razwi_info - info about last razwi error occurred. - * @timestamp: razwi timestamp. - * @write_enable: if set writing to razwi parameters in the structure is enabled. - * otherwise - disabled, so the first (root cause) razwi will not be overwritten. - * @addr: address that caused razwi. - * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does - * not have engine id it will be set to U16_MAX. - * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @non_engine_initiator: in case the initiator of the razwi does not have engine id. - * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. - */ -struct razwi_info { - ktime_t timestamp; - atomic_t write_enable; - u64 addr; - u16 engine_id_1; - u16 engine_id_2; - u8 non_engine_initiator; - u8 type; -}; - #define MAX_QMAN_STREAMS_INFO 4 #define OPCODE_INFO_MAX_ADDR_SIZE 8 /** @@ -2985,11 +2961,14 @@ struct undefined_opcode_info { * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi: razwi information. + * @razwi_info_recorded: if set writing to razwi information is enabled. + * otherwise - disabled, so the first (root cause) razwi will not be overwritten. * @undef_opcode: undefined opcode information */ struct hl_error_info { struct cs_timeout_info cs_timeout; - struct razwi_info razwi; + struct hl_info_razwi_event razwi; + atomic_t razwi_info_recorded; struct undefined_opcode_info undef_opcode; }; @@ -3800,6 +3779,8 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, void *args); __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); +void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, + u8 flags); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 3ee44ea58d5c..d87434b9bc16 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -212,7 +212,7 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); - atomic_set(&hdev->captured_err_info.razwi.write_enable, 1); + atomic_set(&hdev->captured_err_info.razwi_info_recorded, 0); hdev->captured_err_info.undef_opcode.write_enable = true; hdev->open_counter++; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 43afe40966e5..6aef4e24d122 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -603,20 +603,14 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; u32 max_size = args->return_size; - struct hl_info_razwi_event info = {0}; + struct hl_info_razwi_event *info = &hdev->captured_err_info.razwi; void __user *out = (void __user *) (uintptr_t) args->return_pointer; if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp); - info.addr = hdev->captured_err_info.razwi.addr; - info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1; - info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2; - info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator; - info.error_type = hdev->captured_err_info.razwi.type; - - return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; + return copy_to_user(out, info, min_t(size_t, max_size, sizeof(struct hl_info_razwi_event))) + ? -EFAULT : 0; } static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *args) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 92560414e843..f856ac51fde1 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6505,8 +6505,8 @@ event_not_supported: } static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, - bool is_write, s32 *engine_id_1, - s32 *engine_id_2) + bool is_write, u16 *engine_id_1, + u16 *engine_id_2) { u32 dma_id[2], dma_offset, err_cause[2], mask, i; @@ -6603,7 +6603,7 @@ unknown_initiator: } static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, - u32 *engine_id_1, u32 *engine_id_2) + u16 *engine_id_1, u16 *engine_id_2) { u32 val, x_y, axi_id; @@ -6719,8 +6719,8 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool i return "unknown initiator"; } -static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1, - u32 *engine_id_2) +static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, + u16 *engine_id_2, bool *is_read, bool *is_write) { if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { @@ -6728,6 +6728,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_i "RAZWI event caused by illegal write of %s\n", gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); + *is_write = true; } if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { @@ -6735,10 +6736,11 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_i "RAZWI event caused by illegal read of %s\n", gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); + *is_read = true; } } -static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type) +static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr) { struct gaudi_device *gaudi = hdev->asic_specific; u32 val; @@ -6753,8 +6755,6 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); - *type = HL_RAZWI_PAGE_FAULT; - WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); } @@ -6765,7 +6765,6 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); - *type = HL_RAZWI_MMU_ACCESS_ERROR; WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); } @@ -7302,46 +7301,41 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi) { - u32 engine_id_1, engine_id_2; + bool is_read = false, is_write = false; + u16 engine_id[2], num_of_razwi_eng = 0; char desc[64] = ""; u64 razwi_addr = 0; - u8 razwi_type; - int rc; + u8 razwi_flags = 0; /* * Init engine id by default as not valid and only if razwi initiated from engine with * engine id it will get valid value. - * Init razwi type to default, will be changed only if razwi caused by page fault of - * MMU access error */ - engine_id_1 = U16_MAX; - engine_id_2 = U16_MAX; - razwi_type = U8_MAX; + engine_id[0] = HL_RAZWI_NA_ENG_ID; + engine_id[1] = HL_RAZWI_NA_ENG_ID; gaudi_get_event_desc(event_type, desc, sizeof(desc)); dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", event_type, desc); if (razwi) { - gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2); - gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); + gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, + &is_write); + gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr); - /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0); - if (rc) { - hdev->captured_err_info.razwi.timestamp = ktime_get(); - hdev->captured_err_info.razwi.addr = razwi_addr; - hdev->captured_err_info.razwi.engine_id_1 = engine_id_1; - hdev->captured_err_info.razwi.engine_id_2 = engine_id_2; - /* - * If first engine id holds non valid value the razwi initiator - * does not have engine id - */ - hdev->captured_err_info.razwi.non_engine_initiator = - (engine_id_1 == U16_MAX); - hdev->captured_err_info.razwi.type = razwi_type; + if (is_read) + razwi_flags |= HL_RAZWI_READ; + if (is_write) + razwi_flags |= HL_RAZWI_WRITE; + if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { + if (engine_id[1] != HL_RAZWI_NA_ENG_ID) + num_of_razwi_eng = 2; + else + num_of_razwi_eng = 1; } + + hl_capture_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags); } } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index e00ebe05097d..d6f84cb35e3d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1071,31 +1071,44 @@ struct hl_info_cs_timeout_event { __u64 seq; }; -#define HL_RAZWI_PAGE_FAULT 0 -#define HL_RAZWI_MMU_ACCESS_ERROR 1 +#define HL_RAZWI_NA_ENG_ID U16_MAX +#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 +#define HL_RAZWI_READ BIT(0) +#define HL_RAZWI_WRITE BIT(1) +#define HL_RAZWI_LBW BIT(2) +#define HL_RAZWI_HBW BIT(3) +#define HL_RAZWI_RR BIT(4) +#define HL_RAZWI_ADDR_DEC BIT(5) /** * struct hl_info_razwi_event - razwi information. * @timestamp: timestamp of razwi. * @addr: address which accessing it caused razwi. - * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not - * have engine id it will be set to U16_MAX. - * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1, - * otherwise 0. - * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. - * @pad: padding to 64 bit. + * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not + * have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible + * engines which caused the razwi, it will hold all of them. + * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication + * might be common for several engines and there is no way to get the + * exact engine. In this way, engine_id array will be filled with all + * possible engines caused this razwi. Also, there might be possibility + * in gaudi, where we don't indication on specific engine, in that case + * the value of this parameter will be zero. + * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation + * HL_RAZWI_WRITE - razwi caused by write operation + * HL_RAZWI_LBW - razwi caused by lbw fabric transaction + * HL_RAZWI_HBW - razwi caused by hbw fabric transaction + * HL_RAZWI_RR - razwi caused by range register + * HL_RAZWI_ADDR_DEC - razwi caused by address decode error + * Note: this data is not supported by all asics, in that case the relevant bits will not + * be set. */ struct hl_info_razwi_event { __s64 timestamp; __u64 addr; - __u16 engine_id_1; - __u16 engine_id_2; - __u8 no_engine_id; - __u8 error_type; - __u8 pad[2]; + __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; + __u16 num_of_possible_engines; + __u8 flags; + __u8 pad[5]; }; #define MAX_QMAN_STREAMS_INFO 4 From 0502df9bbea0bd0e77c4a283e4cc34801038899a Mon Sep 17 00:00:00 2001 From: Bharat Jauhari Date: Tue, 27 Sep 2022 14:38:38 +0300 Subject: [PATCH 2060/4122] habanalabs: use lower_32_bits() This fixes sparse warning on doing cast to 32-bits Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index cdc50c2c4de8..f4b3fa4b0976 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2528,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)(addr)); \ + (val) = RREG32(lower_32_bits(addr)); \ } \ if (cond) \ break; \ @@ -2539,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)(addr)); \ + (val) = RREG32(lower_32_bits(addr)); \ } \ break; \ } \ @@ -2594,7 +2594,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); if (__rc) \ break; \ } else { \ - __read_val = RREG32((u32)(addr_arr)[__arr_idx]); \ + __read_val = RREG32(lower_32_bits(addr_arr[__arr_idx])); \ } \ if (__read_val == (expected_val)) \ __elem_bitmask &= ~BIT_ULL(__arr_idx); \ From 6d1c567f2ac66391edf5423247f27c82f6b82d86 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 22 Sep 2022 15:25:46 +0300 Subject: [PATCH 2061/4122] habanalabs/gaudi2: fix module ID for RAZWI handling RAZWI is optionally handled as part of the generic QM SEI error handling, but it always uses PDMA as the module ID. Fix it to use the suitable module ID according to the specific event. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 65e6cae6100a..b3685978f6ae 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -7602,6 +7602,7 @@ static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base) static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, struct hl_eq_razwi_info *razwi_info) { + enum razwi_event_sources module; u64 qman_base; u8 index; @@ -7611,9 +7612,11 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, qman_base = mmDCORE0_TPC0_QM_BASE + (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; + module = RAZWI_TPC; break; case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: qman_base = mmDCORE0_TPC6_QM_BASE; + module = RAZWI_TPC; break; case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: @@ -7623,16 +7626,19 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; + module = RAZWI_MME; break; case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; + module = RAZWI_PDMA; break; case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; + module = RAZWI_ROT; break; default: return; @@ -7647,7 +7653,7 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, /* check if RAZWI happened */ if (razwi_info) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, 0, 0, razwi_info); + gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info); } static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type) From dd600db47ba60c3c69d4d24c73c43133c6040118 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Sun, 18 Sep 2022 21:37:31 +0300 Subject: [PATCH 2062/4122] habanalabs: add page fault info uapi Only the first page fault will be saved. Besides the address which caused the page fault, the driver captures all of the mmu user mappings. User can retrieve this data via the new uapi (new opcode in INFO ioctl). Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 58 +++++++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 22 ++++++- .../misc/habanalabs/common/habanalabs_drv.c | 1 + .../misc/habanalabs/common/habanalabs_ioctl.c | 42 ++++++++++++++ drivers/misc/habanalabs/gaudi/gaudi.c | 2 + include/uapi/misc/habanalabs.h | 31 ++++++++++ 6 files changed, 155 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 30ddaaae67e5..5dc6c77b4721 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -12,6 +12,7 @@ #include #include +#include #include @@ -2199,6 +2200,8 @@ void hl_device_fini(struct hl_device *hdev) hl_mmu_fini(hdev); + vfree(hdev->captured_err_info.pgf_info.user_mappings); + hl_eq_fini(hdev, &hdev->event_queue); kfree(hdev->shadow_cs_queue); @@ -2275,3 +2278,58 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ num_of_engines * sizeof(u16)); hdev->captured_err_info.razwi.flags = flags; } +static void hl_capture_user_mappings(struct hl_device *hdev) +{ + struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + struct hl_ctx *ctx; + u32 map_idx = 0; + int i; + + ctx = hl_get_compute_ctx(hdev); + if (!ctx) { + dev_err(hdev->dev, "Can't get user context for user mappings\n"); + return; + } + + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) + pgf_info->num_of_user_mappings++; + + if (!pgf_info->num_of_user_mappings) + goto finish; + + /* In case we already allocated in previous session, need to release it before + * allocating new buffer. + */ + vfree(pgf_info->user_mappings); + pgf_info->user_mappings = + vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); + if (!pgf_info->user_mappings) { + pgf_info->num_of_user_mappings = 0; + goto finish; + } + + hash_for_each(ctx->mem_hash, i, hnode, node) { + userptr = hnode->ptr; + pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; + pgf_info->user_mappings[map_idx].size = userptr->size; + map_idx++; + } +finish: + mutex_unlock(&ctx->mem_hash_lock); + hl_ctx_put(ctx); +} + +void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu) +{ + /* Capture only the first page fault */ + if (atomic_cmpxchg(&hdev->captured_err_info.pgf_info_recorded, 0, 1)) + return; + + hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get()); + hdev->captured_err_info.pgf_info.pgf.addr = addr; + hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id; + hl_capture_user_mappings(hdev); +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f4b3fa4b0976..1489240d5a3a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2957,19 +2957,38 @@ struct undefined_opcode_info { bool write_enable; }; +/** + * struct page_fault_info - info about page fault + * @pgf_info: page fault information. + * @user_mappings: buffer containing user mappings. + * @num_of_user_mappings: number of user mappings. + */ +struct page_fault_info { + struct hl_page_fault_info pgf; + struct hl_user_mapping *user_mappings; + u64 num_of_user_mappings; +}; + /** * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi: razwi information. * @razwi_info_recorded: if set writing to razwi information is enabled. - * otherwise - disabled, so the first (root cause) razwi will not be overwritten. + * otherwise - disabled, so the first (root cause) razwi will not be + * overwritten. * @undef_opcode: undefined opcode information + * @pgf_info: page fault information. + * @pgf_info_recorded: if set writing to page fault information is enabled. + * otherwise - disabled, so the first (root cause) page fault will not be + * overwritten. */ struct hl_error_info { struct cs_timeout_info cs_timeout; struct hl_info_razwi_event razwi; atomic_t razwi_info_recorded; struct undefined_opcode_info undef_opcode; + struct page_fault_info pgf_info; + atomic_t pgf_info_recorded; }; /** @@ -3781,6 +3800,7 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, u8 flags); +void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index d87434b9bc16..714994725224 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -213,6 +213,7 @@ int hl_device_open(struct inode *inode, struct file *filp) atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); atomic_set(&hdev->captured_err_info.razwi_info_recorded, 0); + atomic_set(&hdev->captured_err_info.pgf_info_recorded, 0); hdev->captured_err_info.undef_opcode.write_enable = true; hdev->open_counter++; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 6aef4e24d122..cac2c7fb14f1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -778,6 +778,42 @@ static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args) return rc; } +static int page_fault_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_page_fault_info *info = &hdev->captured_err_info.pgf_info.pgf; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + return copy_to_user(out, info, min_t(size_t, max_size, sizeof(struct hl_page_fault_info))) + ? -EFAULT : 0; +} + +static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + u32 user_buf_size = args->return_size; + struct hl_device *hdev = hpriv->hdev; + struct page_fault_info *pgf_info; + u64 actual_size; + + pgf_info = &hdev->captured_err_info.pgf_info; + args->array_size = pgf_info->num_of_user_mappings; + + if (!out) + return -EINVAL; + + actual_size = pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping); + if (user_buf_size < actual_size) + return -ENOMEM; + + return copy_to_user(out, pgf_info->user_mappings, min_t(size_t, user_buf_size, actual_size)) + ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -837,6 +873,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_GET_EVENTS: return events_info(hpriv, args); + case HL_INFO_PAGE_FAULT_EVENT: + return page_fault_info(hpriv, args); + + case HL_INFO_USER_MAPPINGS: + return user_mappings_info(hpriv, args); + default: break; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f856ac51fde1..1a99f7be8b60 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6755,6 +6755,8 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); + hl_capture_page_fault(hdev, *addr, 0, true); + WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d6f84cb35e3d..2b794f54e2ed 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -778,6 +778,9 @@ enum hl_server_type { * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd * HL_INFO_GET_EVENTS - Retrieve the last occurred events * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. + * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. + * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. + * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -809,6 +812,8 @@ enum hl_server_type { #define HL_INFO_GET_EVENTS 30 #define HL_INFO_UNDEFINED_OPCODE_EVENT 31 #define HL_INFO_ENGINE_STATUS 32 +#define HL_INFO_PAGE_FAULT_EVENT 33 +#define HL_INFO_USER_MAPPINGS 34 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -1187,6 +1192,29 @@ struct hl_info_sec_attest { __u8 pad0[2]; }; +/** + * struct hl_page_fault_info - page fault information. + * @timestamp: timestamp of page fault. + * @addr: address which accessing it caused page fault. + * @engine_id: engine id which caused the page fault, supported only in gaudi3. + */ +struct hl_page_fault_info { + __s64 timestamp; + __u64 addr; + __u16 engine_id; + __u8 pad[6]; +}; + +/** + * struct hl_user_mapping - user mapping information. + * @dev_va: device virtual address. + * @size: virtual address mapping size. + */ +struct hl_user_mapping { + __u64 dev_va; + __u64 size; +}; + enum gaudi_dcores { HL_GAUDI_WS_DCORE, HL_GAUDI_WN_DCORE, @@ -1213,6 +1241,8 @@ enum gaudi_dcores { * needed, hence updating this variable so user will know the exact amount * of bytes copied by the kernel to the buffer. * @sec_attest_nonce: Nonce number used for attestation report. + * @array_size: Number of array members copied to user buffer. + * Relevant for HL_INFO_USER_MAPPINGS info ioctl. * @pad: Padding to 64 bit. */ struct hl_info_args { @@ -1228,6 +1258,7 @@ struct hl_info_args { __u32 eventfd; __u32 user_buffer_actual_size; __u32 sec_attest_nonce; + __u32 array_size; }; __u32 pad; From 189b203ebbea181d678b4d8bf3547eb78c8ae44a Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Wed, 28 Sep 2022 11:38:00 +0300 Subject: [PATCH 2063/4122] habanalabs: replace 'pf' to 'prefetch' pf was an abbreviation for prefetch but because pf already stands for 'physical function', we decided to change it to 'prefetch'. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 14 ++++++------- drivers/misc/habanalabs/common/habanalabs.h | 8 ++++---- drivers/misc/habanalabs/common/mmu/mmu.c | 22 ++++++++++----------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5dc6c77b4721..bf675cf39f71 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -783,8 +783,8 @@ static int device_early_init(struct hl_device *hdev) goto free_cs_cmplt_wq; } - hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); - if (!hdev->pf_wq) { + hdev->prefetch_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); + if (!hdev->prefetch_wq) { dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); rc = -ENOMEM; goto free_ts_free_wq; @@ -794,7 +794,7 @@ static int device_early_init(struct hl_device *hdev) GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_pf_wq; + goto free_prefetch_wq; } rc = hl_mmu_if_set_funcs(hdev); @@ -833,8 +833,8 @@ free_cb_mgr: hl_mem_mgr_fini(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); -free_pf_wq: - destroy_workqueue(hdev->pf_wq); +free_prefetch_wq: + destroy_workqueue(hdev->prefetch_wq); free_ts_free_wq: destroy_workqueue(hdev->ts_free_obj_wq); free_cs_cmplt_wq: @@ -877,7 +877,7 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->hl_chip_info); - destroy_workqueue(hdev->pf_wq); + destroy_workqueue(hdev->prefetch_wq); destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->cs_cmplt_wq); destroy_workqueue(hdev->eq_wq); @@ -1076,7 +1076,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r hl_cs_rollback_all(hdev, skip_wq_flush); /* flush the MMU prefetch workqueue */ - flush_workqueue(hdev->pf_wq); + flush_workqueue(hdev->prefetch_wq); /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 1489240d5a3a..6d8ce4a1dbb1 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2811,7 +2811,7 @@ struct hl_mmu_funcs { /** * struct hl_prefetch_work - prefetch work structure handler - * @pf_work: actual work struct. + * @prefetch_work: actual work struct. * @ctx: compute context. * @va: virtual address to pre-fetch. * @size: pre-fetch size. @@ -2819,7 +2819,7 @@ struct hl_mmu_funcs { * @asid: ASID for maintenance operation. */ struct hl_prefetch_work { - struct work_struct pf_work; + struct work_struct prefetch_work; struct hl_ctx *ctx; u64 va; u64 size; @@ -3060,7 +3060,7 @@ struct hl_reset_info { * @cs_cmplt_wq: work queue of CS completions for executing work in process * context. * @ts_free_obj_wq: work queue for timestamp registration objects release. - * @pf_wq: work queue for MMU pre-fetch operations. + * @prefetch_wq: work queue for MMU pre-fetch operations. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -3231,7 +3231,7 @@ struct hl_device { struct workqueue_struct *eq_wq; struct workqueue_struct *cs_cmplt_wq; struct workqueue_struct *ts_free_obj_wq; - struct workqueue_struct *pf_wq; + struct workqueue_struct *prefetch_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index cf8946266615..589179f8cd41 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -699,7 +699,7 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, static void hl_mmu_prefetch_work_function(struct work_struct *work) { - struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); + struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, prefetch_work); struct hl_ctx *ctx = pfw->ctx; struct hl_device *hdev = ctx->hdev; @@ -723,25 +723,25 @@ put_ctx: int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size) { - struct hl_prefetch_work *handle_pf_work; + struct hl_prefetch_work *handle_prefetch_work; - handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL); - if (!handle_pf_work) + handle_prefetch_work = kmalloc(sizeof(*handle_prefetch_work), GFP_KERNEL); + if (!handle_prefetch_work) return -ENOMEM; - INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function); - handle_pf_work->ctx = ctx; - handle_pf_work->va = va; - handle_pf_work->size = size; - handle_pf_work->flags = flags; - handle_pf_work->asid = asid; + INIT_WORK(&handle_prefetch_work->prefetch_work, hl_mmu_prefetch_work_function); + handle_prefetch_work->ctx = ctx; + handle_prefetch_work->va = va; + handle_prefetch_work->size = size; + handle_prefetch_work->flags = flags; + handle_prefetch_work->asid = asid; /* * as actual prefetch is done in a WQ we must get the context (and put it * at the end of the work function) */ hl_ctx_get(ctx); - queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work); + queue_work(ctx->hdev->prefetch_wq, &handle_prefetch_work->prefetch_work); return 0; } From 16448d644404351e685466ab14e7e043ad67673c Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Wed, 28 Sep 2022 15:56:13 +0300 Subject: [PATCH 2064/4122] habanalabs/gaudi2: remove privileged MME clock configuration Privileged MME clock configuration is removed as it is done by the f/w. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index b3685978f6ae..cb048920ffc8 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -4535,7 +4535,7 @@ static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, bool config_qman_only) { - u32 queue_id_base, reg_base, clk_en_addr = 0; + u32 queue_id_base, reg_base; switch (dcore_id) { case 0: @@ -4543,23 +4543,18 @@ static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, break; case 1: queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; - clk_en_addr = mmDCORE1_MME_CTRL_LO_QM_SLV_CLK_EN; break; case 2: queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; break; case 3: queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; - clk_en_addr = mmDCORE3_MME_CTRL_LO_QM_SLV_CLK_EN; break; default: dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); return; } - if (clk_en_addr && !(hdev->fw_components & FW_TYPE_BOOT_CPU)) - WREG32(clk_en_addr, 0x1); - if (!config_qman_only) { reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; gaudi2_init_mme_acc(hdev, reg_base); From 5731b6e6f08a4a3adf944fd0436f77f3e9ce1725 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Wed, 28 Sep 2022 18:33:19 +0300 Subject: [PATCH 2065/4122] habanalabs/gaudi2: add device unavailable notification Device unavailable notifies the user that there isn't an option to retrieve debug information from the device. When a critical device error occurs and the f/w performs the device reset, a device unavailable notification shall be sent to the user process. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index cb048920ffc8..e9c4ec429bae 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8576,7 +8576,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent { u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY; struct gaudi2_device *gaudi2 = hdev->asic_specific; - bool reset_required = false, skip_reset = false; + bool reset_required = false, skip_reset = false, is_critical = false; int index, sbte_index; u64 event_mask = 0; u16 event_type; @@ -8602,6 +8602,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); + is_critical = eq_entry->ecc_data.is_critical; break; case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: @@ -8976,9 +8977,16 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent return; reset_device: - if (hdev->hard_reset_on_fw_events) { + if (hdev->asic_prop.fw_security_enabled && is_critical) { + reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; + + /* notify on device unavailable while the reset triggered by fw */ + event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | + HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); hl_device_reset(hdev, reset_flags); + } else if (hdev->hard_reset_on_fw_events) { event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + hl_device_reset(hdev, reset_flags); } else { if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); From 3a83ebc521b2e57af070b5667c60ac2d50347658 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 14:09:32 +0300 Subject: [PATCH 2066/4122] habanalabs: skip idle status check if reset on device release If reset upon device release is enabled, there is no need to check the device idle status in hpriv_release(), because device is going to be reset in any case. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index bf675cf39f71..e60ed0c8a9db 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -398,16 +398,14 @@ static void hpriv_release(struct kref *ref) mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); - if ((!hdev->pldm) && (hdev->pdev) && - (!hdev->asic_funcs->is_device_idle(hdev, - idle_mask, - HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) { - dev_err(hdev->dev, - "device not idle after user context is closed (0x%llx_%llx)\n", - idle_mask[1], idle_mask[0]); + /* No need for idle status check if device is going to be reset in any case */ + if (!hdev->reset_upon_device_release && hdev->pdev && !hdev->pldm) + device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); - device_is_idle = false; - } + if (!device_is_idle) + dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n", + idle_mask[1], idle_mask[0]); /* We need to remove the user from the list to make sure the reset process won't * try to kill the user process. Because, if we got here, it means there are no From 51236cd95e7bcea41e57fb2cf238312be21dcf58 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 14:19:21 +0300 Subject: [PATCH 2067/4122] habanalabs: allow unregistering eventfd when device non-operational Unregistering eventfd is for releasing host resources and doesn't involve an access to the device. As such, there is no reason to disallow it when device isn't operational. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index cac2c7fb14f1..5ce5c42e2731 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -879,6 +879,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_USER_MAPPINGS: return user_mappings_info(hpriv, args); + case HL_INFO_UNREGISTER_EVENTFD: + return eventfd_unregister(hpriv, args); + default: break; } @@ -935,9 +938,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_REGISTER_EVENTFD: return eventfd_register(hpriv, args); - case HL_INFO_UNREGISTER_EVENTFD: - return eventfd_unregister(hpriv, args); - case HL_INFO_ENGINE_STATUS: return engine_status_info(hpriv, args); From 1eebb259290b1be5398fec953bdd7923a5cbf33e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 14:36:27 +0300 Subject: [PATCH 2068/4122] habanalabs: move reset workqueue to be under hl_device 'struct hl_device_reset_work' is used as a wrapper for the reset work and its parameters, including the reset workqueue on which it runs. In a future commit, another reset related work with similar parameters is going to be added, but it won't use the reset workqueue. As in any case there is a single reset workqueue, and to allow the resue of this structure, move the reset workqueue to 'struct hl_device'. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 15 ++++++--------- drivers/misc/habanalabs/common/habanalabs.h | 12 ++++++------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e60ed0c8a9db..e9b373a8cdad 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -684,9 +684,8 @@ static void device_hard_reset_pending(struct work_struct *work) "Could not reset device. will try again in %u seconds", HL_PENDING_RESET_PER_SEC); - queue_delayed_work(device_reset_work->wq, - &device_reset_work->reset_work, - msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); + queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, + msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); } } @@ -801,9 +800,8 @@ static int device_early_init(struct hl_device *hdev) hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); - hdev->device_reset_work.wq = - create_singlethread_workqueue("hl_device_reset"); - if (!hdev->device_reset_work.wq) { + hdev->reset_wq = create_singlethread_workqueue("hl_device_reset"); + if (!hdev->reset_wq) { rc = -ENOMEM; dev_err(hdev->dev, "Failed to create device reset WQ\n"); goto free_cb_mgr; @@ -879,7 +877,7 @@ static void device_early_fini(struct hl_device *hdev) destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->cs_cmplt_wq); destroy_workqueue(hdev->eq_wq); - destroy_workqueue(hdev->device_reset_work.wq); + destroy_workqueue(hdev->reset_wq); for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) destroy_workqueue(hdev->cq_wq[i]); @@ -1460,8 +1458,7 @@ again: * Because the reset function can't run from heartbeat work, * we need to call the reset function from a dedicated work. */ - queue_delayed_work(hdev->device_reset_work.wq, - &hdev->device_reset_work.reset_work, 0); + queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0); return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6d8ce4a1dbb1..4913197c433e 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2682,17 +2682,15 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); struct hwmon_chip_info; /** - * struct hl_device_reset_work - reset workqueue task wrapper. - * @wq: work queue for device reset procedure. + * struct hl_device_reset_work - reset work wrapper. * @reset_work: reset work to be done. * @hdev: habanalabs device structure. * @flags: reset flags. */ struct hl_device_reset_work { - struct workqueue_struct *wq; - struct delayed_work reset_work; - struct hl_device *hdev; - u32 flags; + struct delayed_work reset_work; + struct hl_device *hdev; + u32 flags; }; /** @@ -3061,6 +3059,7 @@ struct hl_reset_info { * context. * @ts_free_obj_wq: work queue for timestamp registration objects release. * @prefetch_wq: work queue for MMU pre-fetch operations. + * @reset_wq: work queue for device reset procedure. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -3232,6 +3231,7 @@ struct hl_device { struct workqueue_struct *cs_cmplt_wq; struct workqueue_struct *ts_free_obj_wq; struct workqueue_struct *prefetch_wq; + struct workqueue_struct *reset_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; From 17f3f42af2bcddc38ff08b355e007f3b6d5ce70c Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 29 Sep 2022 10:21:28 +0300 Subject: [PATCH 2069/4122] habanalabs: handle HBM MMU when capturing page fault data In case of HBM MMU page fault, capture its relevant mappings. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e9b373a8cdad..b8b32285720d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2273,15 +2273,20 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ num_of_engines * sizeof(u16)); hdev->captured_err_info.razwi.flags = flags; } -static void hl_capture_user_mappings(struct hl_device *hdev) +static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) { struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode; struct hl_userptr *userptr; + enum vm_type *vm_type; struct hl_ctx *ctx; u32 map_idx = 0; int i; + /* Reset previous session count*/ + pgf_info->num_of_user_mappings = 0; + ctx = hl_get_compute_ctx(hdev); if (!ctx) { dev_err(hdev->dev, "Can't get user context for user mappings\n"); @@ -2290,7 +2295,7 @@ static void hl_capture_user_mappings(struct hl_device *hdev) mutex_lock(&ctx->mem_hash_lock); hash_for_each(ctx->mem_hash, i, hnode, node) - pgf_info->num_of_user_mappings++; + pgf_info->num_of_user_mappings++; if (!pgf_info->num_of_user_mappings) goto finish; @@ -2300,17 +2305,25 @@ static void hl_capture_user_mappings(struct hl_device *hdev) */ vfree(pgf_info->user_mappings); pgf_info->user_mappings = - vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); + vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); if (!pgf_info->user_mappings) { pgf_info->num_of_user_mappings = 0; goto finish; } hash_for_each(ctx->mem_hash, i, hnode, node) { - userptr = hnode->ptr; - pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; - pgf_info->user_mappings[map_idx].size = userptr->size; - map_idx++; + vm_type = hnode->ptr; + if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { + userptr = hnode->ptr; + pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; + pgf_info->user_mappings[map_idx].size = userptr->size; + map_idx++; + } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { + phys_pg_pack = hnode->ptr; + pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; + pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; + map_idx++; + } } finish: mutex_unlock(&ctx->mem_hash_lock); @@ -2326,5 +2339,5 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get()); hdev->captured_err_info.pgf_info.pgf.addr = addr; hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id; - hl_capture_user_mappings(hdev); + hl_capture_user_mappings(hdev, is_pmmu); } From 15ac503cdc0d9a1275d82a926c673359cf69ebef Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Wed, 28 Sep 2022 22:14:55 +0300 Subject: [PATCH 2070/4122] habanalabs/gaudi2: capture RAZWI information Added function to calculate possible engines which caused RAZWI (read-only zero, write ignored), from a given router id or module index. When getting RAZWI via PSOC IP, first the router id is calculated and then the possible engines that caused the RAZWI are calculated. There is a possibility that the RAZWI initiator is not an engine. In that case, it will not be included in possible engines as it doesn't have an engine id. RAZWI information is captured when receiving event from engine or via PSOC IP. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 255 ++++++++++++++++++++++-- include/uapi/misc/habanalabs.h | 4 + 2 files changed, 242 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index e9c4ec429bae..1058c8a0e644 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -128,6 +128,8 @@ #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) +#define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + enum hl_pmmu_fatal_cause { LATENCY_RD_OUT_FIFO_OVERRUN, LATENCY_WR_OUT_FIFO_OVERRUN, @@ -7092,9 +7094,12 @@ static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *q static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, u64 rtr_mstr_if_base_addr, bool is_write, char *name, - bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info) + bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, + enum gaudi2_engine_id id) { u32 razwi_hi, razwi_lo, razwi_xy; + u16 eng_id = id; + u8 rd_wr_flag; if (is_write) { if (read_razwi_regs) { @@ -7106,6 +7111,7 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); } + rd_wr_flag = HL_RAZWI_WRITE; } else { if (read_razwi_regs) { razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); @@ -7116,8 +7122,12 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); } + rd_wr_flag = HL_RAZWI_READ; } + hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, + rd_wr_flag | HL_RAZWI_HBW); + dev_err_ratelimited(hdev->dev, "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); @@ -7125,9 +7135,12 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, u64 rtr_mstr_if_base_addr, bool is_write, char *name, - bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info) + bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, + enum gaudi2_engine_id id) { u32 razwi_addr, razwi_xy; + u16 eng_id = id; + u8 rd_wr_flag; if (is_write) { if (read_razwi_regs) { @@ -7138,9 +7151,7 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg); } - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR LBW WR error, mstr_if 0x%llx, captured address 0x%x, Initiator coordinates 0x%x\n", - name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy); + rd_wr_flag = HL_RAZWI_WRITE; } else { if (read_razwi_regs) { razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); @@ -7150,9 +7161,57 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg); } - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR LBW AR error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", - name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy); + rd_wr_flag = HL_RAZWI_READ; + } + + hl_capture_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW); + dev_err_ratelimited(hdev->dev, + "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", + name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, + razwi_xy); +} + +static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, + enum razwi_event_sources module, u8 module_idx) +{ + switch (module) { + case RAZWI_TPC: + if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) + return GAUDI2_DCORE0_ENGINE_ID_TPC_6; + return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + + (module_idx % NUM_OF_TPC_PER_DCORE) + + (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); + + case RAZWI_MME: + return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + + (module_idx * ENGINE_ID_DCORE_OFFSET)); + + case RAZWI_EDMA: + return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + + (module_idx % NUM_OF_EDMA_PER_DCORE)); + + case RAZWI_PDMA: + return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); + + case RAZWI_NIC: + return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); + + case RAZWI_DEC: + if (module_idx == 8) + return GAUDI2_PCIE_ENGINE_ID_DEC_0; + + if (module_idx == 9) + return GAUDI2_PCIE_ENGINE_ID_DEC_1; + ; + return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + + (module_idx % NUM_OF_DEC_PER_DCORE) + + (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); + + case RAZWI_ROT: + return GAUDI2_ENGINE_ID_ROT_0 + module_idx; + + default: + return GAUDI2_ENGINE_ID_SIZE; } } @@ -7165,7 +7224,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info) { bool via_sft = false, read_razwi_regs = false; - u32 rtr_id, dcore_id, dcore_rtr_id, sft_id; + u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id; u64 rtr_mstr_if_base_addr; u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; @@ -7299,9 +7358,11 @@ dump_info: if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar) return; + eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); if (hbw_shrd_aw) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, - initiator_name, read_razwi_regs, razwi_info); + initiator_name, read_razwi_regs, razwi_info, + eng_id); /* Clear event indication */ if (read_razwi_regs) @@ -7310,7 +7371,8 @@ dump_info: if (hbw_shrd_ar) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, - initiator_name, read_razwi_regs, razwi_info); + initiator_name, read_razwi_regs, razwi_info, + eng_id); /* Clear event indication */ if (read_razwi_regs) @@ -7319,7 +7381,8 @@ dump_info: if (lbw_shrd_aw) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, - initiator_name, read_razwi_regs, razwi_info); + initiator_name, read_razwi_regs, razwi_info, + eng_id); /* Clear event indication */ if (read_razwi_regs) @@ -7328,7 +7391,8 @@ dump_info: if (lbw_shrd_ar) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, - initiator_name, read_razwi_regs, razwi_info); + initiator_name, read_razwi_regs, razwi_info, + eng_id); /* Clear event indication */ if (read_razwi_regs) @@ -7450,25 +7514,175 @@ static const char *gaudi2_get_initiators_name(u32 rtr_id) } } +static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) +{ + switch (rtr_id) { + case DCORE0_RTR0: + engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0; + engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1; + engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0; + engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1; + engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6; + engines[5] = GAUDI2_ENGINE_ID_PDMA_0; + engines[6] = GAUDI2_ENGINE_ID_PDMA_1; + engines[7] = GAUDI2_ENGINE_ID_PCIE; + engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0; + engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0; + engines[10] = GAUDI2_ENGINE_ID_PSOC; + return 11; + + case DCORE0_RTR1: + engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0; + engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1; + return 2; + + case DCORE0_RTR2: + engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2; + engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3; + return 2; + + case DCORE0_RTR3: + engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4; + engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5; + return 2; + + case DCORE0_RTR4: + case DCORE0_RTR5: + case DCORE0_RTR6: + case DCORE0_RTR7: + engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME; + return 1; + + case DCORE1_RTR0: + case DCORE1_RTR1: + case DCORE1_RTR2: + case DCORE1_RTR3: + engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME; + return 1; + + case DCORE1_RTR4: + engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4; + engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5; + return 2; + + case DCORE1_RTR5: + engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2; + engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3; + return 2; + + case DCORE1_RTR6: + engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0; + engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1; + return 2; + + case DCORE1_RTR7: + engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0; + engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1; + engines[2] = GAUDI2_ENGINE_ID_NIC0_0; + engines[3] = GAUDI2_ENGINE_ID_NIC1_0; + engines[4] = GAUDI2_ENGINE_ID_NIC2_0; + engines[5] = GAUDI2_ENGINE_ID_NIC3_0; + engines[6] = GAUDI2_ENGINE_ID_NIC4_0; + engines[7] = GAUDI2_ENGINE_ID_ARC_FARM; + engines[8] = GAUDI2_ENGINE_ID_KDMA; + engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1; + engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1; + return 11; + + case DCORE2_RTR0: + engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0; + engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1; + engines[2] = GAUDI2_ENGINE_ID_NIC5_0; + engines[3] = GAUDI2_ENGINE_ID_NIC6_0; + engines[4] = GAUDI2_ENGINE_ID_NIC7_0; + engines[5] = GAUDI2_ENGINE_ID_NIC8_0; + engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0; + engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0; + engines[8] = GAUDI2_ENGINE_ID_ROT_0; + return 9; + + case DCORE2_RTR1: + engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4; + engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5; + return 2; + + case DCORE2_RTR2: + engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2; + engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3; + return 2; + + case DCORE2_RTR3: + engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0; + engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1; + return 2; + + case DCORE2_RTR4: + case DCORE2_RTR5: + case DCORE2_RTR6: + case DCORE2_RTR7: + engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME; + return 1; + case DCORE3_RTR0: + case DCORE3_RTR1: + case DCORE3_RTR2: + case DCORE3_RTR3: + engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME; + return 1; + case DCORE3_RTR4: + engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0; + engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1; + return 2; + case DCORE3_RTR5: + engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2; + engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3; + return 2; + case DCORE3_RTR6: + engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4; + engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5; + return 2; + case DCORE3_RTR7: + engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0; + engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1; + engines[2] = GAUDI2_ENGINE_ID_NIC9_0; + engines[3] = GAUDI2_ENGINE_ID_NIC10_0; + engines[4] = GAUDI2_ENGINE_ID_NIC11_0; + engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1; + engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1; + engines[7] = GAUDI2_ENGINE_ID_ROT_1; + engines[8] = GAUDI2_ENGINE_ID_ROT_0; + return 9; + default: + return 0; + } +} + static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, u64 rtr_ctrl_base_addr, bool is_write) { + u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; u32 razwi_hi, razwi_lo; + u8 rd_wr_flag; + + num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); if (is_write) { razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); + rd_wr_flag = HL_RAZWI_WRITE; /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); } else { razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); + rd_wr_flag = HL_RAZWI_READ; /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); } + hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, + rd_wr_flag | HL_RAZWI_HBW); dev_err_ratelimited(hdev->dev, "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); @@ -7480,20 +7694,27 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, u64 rtr_ctrl_base_addr, bool is_write) { + u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; u32 razwi_addr; + u8 rd_wr_flag; + + num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]); if (is_write) { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); + rd_wr_flag = HL_RAZWI_WRITE; /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); } else { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); + rd_wr_flag = HL_RAZWI_READ; /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); } + hl_capture_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW); dev_err_ratelimited(hdev->dev, "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", is_write ? "WR" : "RD", rtr_id, razwi_addr); @@ -7974,28 +8195,28 @@ static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev) razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, - NULL); + NULL, GAUDI2_ENGINE_ID_PCIE); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, - NULL); + NULL, GAUDI2_ENGINE_ID_PCIE); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, - NULL); + NULL, GAUDI2_ENGINE_ID_PCIE); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, - NULL); + NULL, GAUDI2_ENGINE_ID_PCIE); WREG32(razwi_happened_addr, 0x1); } } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 2b794f54e2ed..a4ceee681898 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -597,6 +597,10 @@ enum gaudi2_engine_id { GAUDI2_ENGINE_ID_NIC10_1, GAUDI2_ENGINE_ID_NIC11_0, GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_PCIE, + GAUDI2_ENGINE_ID_PSOC, + GAUDI2_ENGINE_ID_ARC_FARM, + GAUDI2_ENGINE_ID_KDMA, GAUDI2_ENGINE_ID_SIZE }; From 4f11694f27582fa0875c4be7d133e0ae88ad36f8 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 29 Sep 2022 10:28:36 +0300 Subject: [PATCH 2071/4122] habanalabs/gaudi2: capture page fault data Capture page fault data when it happens. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 1058c8a0e644..a4e3586f1a12 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8286,6 +8286,7 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", is_pmmu ? "PMMU" : "HMMU", addr); + hl_capture_page_fault(hdev, addr, 0, is_pmmu); WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); } From 27cd39afde454ca8f9a438cfc84d676e96b36bd7 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Mon, 3 Oct 2022 13:55:50 +0300 Subject: [PATCH 2072/4122] habanalabs: verify no zero event is sent The event notifier mechanism should not raise an empty event (event equals zero). Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 5 +++++ drivers/misc/habanalabs/gaudi/gaudi.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b8b32285720d..9b54d1df5302 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1746,6 +1746,11 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) { struct hl_fpriv *hpriv; + if (!event_mask) { + dev_warn(hdev->dev, "Skip sending zero event"); + return; + } + mutex_lock(&hdev->fpriv_list_lock); list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1a99f7be8b60..337123f73501 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7945,7 +7945,9 @@ reset_device: /* despite reset doesn't execute. a notification on * occurred event needs to be sent here */ - hl_notifier_event_send_all(hdev, event_mask); + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); + if (reset_required) hl_device_reset(hdev, flags); else From dc8d243caea8056bd2580b0f1703fe019d3b4419 Mon Sep 17 00:00:00 2001 From: Dilip Puri Date: Wed, 12 Oct 2022 11:06:48 +0300 Subject: [PATCH 2073/4122] habanalabs/gaudi2: unsecure CBU_EARLY_BRESP registers NIC ARCs need to have access to CBU_EARLY_BRESP, hence we unsecure those registers. Signed-off-by: Dilip Puri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2_security.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c index c6906fb14229..768c2f3dc900 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c @@ -1764,6 +1764,7 @@ static const struct range gaudi2_pb_nic0_qm_arc_aux0_unsecured_regs[] = { {mmNIC0_QM_ARC_AUX0_CLUSTER_NUM, mmNIC0_QM_ARC_AUX0_WAKE_UP_EVENT}, {mmNIC0_QM_ARC_AUX0_ARC_RST_REQ, mmNIC0_QM_ARC_AUX0_CID_OFFSET_7}, {mmNIC0_QM_ARC_AUX0_SCRATCHPAD_0, mmNIC0_QM_ARC_AUX0_INFLIGHT_LBU_RD_CNT}, + {mmNIC0_QM_ARC_AUX0_CBU_EARLY_BRESP_EN, mmNIC0_QM_ARC_AUX0_CBU_EARLY_BRESP_EN}, {mmNIC0_QM_ARC_AUX0_LBU_EARLY_BRESP_EN, mmNIC0_QM_ARC_AUX0_LBU_EARLY_BRESP_EN}, {mmNIC0_QM_ARC_AUX0_DCCM_QUEUE_BASE_ADDR_0, mmNIC0_QM_ARC_AUX0_DCCM_QUEUE_ALERT_MSG}, {mmNIC0_QM_ARC_AUX0_DCCM_Q_PUSH_FIFO_CNT, mmNIC0_QM_ARC_AUX0_QMAN_ARC_CQ_SHADOW_CI}, From 24fdfb359cadd222de8ba9d2d6a3f4dfc514878a Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Tue, 18 Oct 2022 08:51:33 +0300 Subject: [PATCH 2074/4122] habanalabs: fix using freed pointer The code uses the pointer for trace purpose (without actually dereference it) but still get static analysis warning. This patch eliminate the warning. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 9b54d1df5302..dd01be5c4ba3 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -135,6 +135,9 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c dma_addr_t dma_handle, enum dma_alloc_type alloc_type, const char *caller) { + /* this is needed to avoid warning on using freed pointer */ + u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr; + switch (alloc_type) { case DMA_ALLOC_COHERENT: hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); @@ -147,7 +150,7 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c break; } - trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller); + trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); } void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, From e325d5dbf34500fd42d5847d5b8c4e097f8030af Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 14 Sep 2022 08:53:29 +0300 Subject: [PATCH 2075/4122] habanalabs: allow setting HBM BAR to other regions Up until now the use-case in the driver was that the HBM is accessed using the HBM BAR, yet the BAR sometimes cannot cover the whole HBM and so we needed to set the BAR to other HBM offset. Now we are facing the need to access other PCI memory regions that can be covered by the HBM BAR. To answer that we are allowing the caller to determine if the HBM BAR need to be set or not regardless of the PCI memory region. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 29 ++++++++++++--------- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index dd01be5c4ba3..0026fe42b3d2 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -32,6 +32,7 @@ enum dma_alloc_type { * @hdev: pointer to habanalabs device structure. * @addr: the address the caller wants to access. * @region: the PCI region. + * @new_bar_region_base: the new BAR region base address. * * @return: the old BAR base address on success, U64_MAX for failure. * The caller should set it back to the old address after use. @@ -41,7 +42,8 @@ enum dma_alloc_type { * This function can be called also if the bar doesn't need to be set, * in that case it just won't change the base. */ -static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region) +static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region, + u64 *new_bar_region_base) { struct asic_fixed_properties *prop = &hdev->asic_prop; u64 bar_base_addr, old_base; @@ -55,27 +57,28 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); /* in case of success we need to update the new BAR base */ - if (old_base != U64_MAX) - region->region_base = bar_base_addr; + if ((old_base != U64_MAX) && new_bar_region_base) + *new_bar_region_base = bar_base_addr; return old_base; } -static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, - enum debugfs_access_type acc_type, enum pci_region region_type) +int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) { struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; + u64 old_base = 0, rc, new_bar_region_base = 0; void __iomem *acc_addr; - u64 old_base = 0, rc; - if (region_type == PCI_REGION_DRAM) { - old_base = hl_set_dram_bar(hdev, addr, region); + if (set_dram_bar) { + old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base); if (old_base == U64_MAX) return -EIO; } - acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base + - region->offset_in_bar; + acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + + (addr - new_bar_region_base); + switch (acc_type) { case DEBUGFS_READ8: *val = readb(acc_addr); @@ -97,8 +100,8 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val break; } - if (region_type == PCI_REGION_DRAM) { - rc = hl_set_dram_bar(hdev, old_base, region); + if (set_dram_bar) { + rc = hl_set_dram_bar(hdev, old_base, region, NULL); if (rc == U64_MAX) return -EIO; } @@ -283,7 +286,7 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, case PCI_REGION_SRAM: case PCI_REGION_DRAM: return hl_access_sram_dram_region(hdev, addr, val, acc_type, - region_type); + region_type, (region_type == PCI_REGION_DRAM)); default: return -EFAULT; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4913197c433e..c8347eac09ed 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3486,6 +3486,8 @@ void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); +int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, + enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar); int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, From 5ad06bb1d2c073c8b071016226fb9ebe2163e660 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 20 Oct 2022 11:29:03 +0300 Subject: [PATCH 2076/4122] habanalabs/gaudi2: remove configurations to access the MSI-X doorbell The virtual MSI-X doorbell is supported now in F/W, so all configurations to access the PCIE_DBI MSI-X doorbell can be removed. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 34 +++---------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index a4e3586f1a12..9208f69dd7f8 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -4473,23 +4473,9 @@ static void gaudi2_init_sm(struct hl_device *hdev) reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); - /* Init CQ0 DB */ - /* Configure the monitor to trigger MSI-X interrupt */ - /* TODO: - * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022) - * and in F/W (SW-93024). - */ - if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) { - u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF; - - WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg)); - WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg)); - } else { - WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, - lower_32_bits(gaudi2->virt_msix_db_dma_addr)); - WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, - upper_32_bits(gaudi2->virt_msix_db_dma_addr)); - } + /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ + WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); + WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { @@ -4657,20 +4643,6 @@ static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u3 { u32 sob_id; - /* TODO: - * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W - * (SW-93024). - */ - if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) { - u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; - - WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF); - WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id); - WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF); - WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1); - return; - } - /* VCMD normal interrupt */ sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, From 6bcb2d05a59b3534821a194f8642808ae56f2d10 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Wed, 19 Oct 2022 20:24:55 +0300 Subject: [PATCH 2077/4122] habanalabs: fix user mappings calculation in case of page fault As there are 2 types of user mappings, pmmu and hmmu, calculate only the relevant mappings for the requested type. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0026fe42b3d2..0e88396744a1 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2305,8 +2305,13 @@ static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) } mutex_lock(&ctx->mem_hash_lock); - hash_for_each(ctx->mem_hash, i, hnode, node) - pgf_info->num_of_user_mappings++; + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) || + ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu)) + pgf_info->num_of_user_mappings++; + + } if (!pgf_info->num_of_user_mappings) goto finish; From d1e0ac37ed41e581c030a8fffe4ad1d0bb987872 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 23 Oct 2022 14:46:08 +0300 Subject: [PATCH 2078/4122] habanalabs: avoid divide by zero in device utilization Currently there is no verification whether the divisor is legal. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0e88396744a1..b71303ba11d0 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -987,11 +987,16 @@ static void device_late_fini(struct hl_device *hdev) int hl_device_utilization(struct hl_device *hdev, u32 *utilization) { - u64 max_power, curr_power, dc_power, dividend; + u64 max_power, curr_power, dc_power, dividend, divisor; int rc; max_power = hdev->max_power; dc_power = hdev->asic_prop.dc_power_default; + divisor = max_power - dc_power; + if (!divisor) { + dev_warn(hdev->dev, "device utilization is not supported\n"); + return -EOPNOTSUPP; + } rc = hl_fw_cpucp_power_get(hdev, &curr_power); if (rc) @@ -1000,7 +1005,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization) curr_power = clamp(curr_power, dc_power, max_power); dividend = (curr_power - dc_power) * 100; - *utilization = (u32) div_u64(dividend, (max_power - dc_power)); + *utilization = (u32) div_u64(dividend, divisor); return 0; } From a88a6f5f5cdfce21aaf988370287e0e78970c8ad Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 15:08:13 +0300 Subject: [PATCH 2079/4122] habanalabs: add support for graceful hard reset Calling hl_device_reset() for a hard reset will lead to a quite immediate device reset and to killing user process. For resets that follow errors, it disables the option to debug the errors on both the device side and the user application side. This patch adds a 'graceful hard reset' option and a new hl_device_cond_reset() function. Under some conditions, mainly if there is no user process or if he is not registered to driver notifications, this function will execute hard reset as usual. Otherwise, the reset will be postponed and a notification will be sent to user, to let him perform post-error actions and then to release the device, after which reset will take place. If device is not released by user in some defined time, a watchdog work will execute the reset in any case. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 141 ++++++++++++++++++-- drivers/misc/habanalabs/common/habanalabs.h | 14 +- 2 files changed, 140 insertions(+), 15 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b71303ba11d0..bcd959924971 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -16,7 +16,9 @@ #include -#define HL_RESET_DELAY_USEC 10000 /* 10ms */ +#define HL_RESET_DELAY_USEC 10000 /* 10ms */ + +#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 enum dma_alloc_type { DMA_ALLOC_COHERENT, @@ -387,7 +389,7 @@ bool hl_ctrl_device_operational(struct hl_device *hdev, static void hpriv_release(struct kref *ref) { u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; - bool device_is_idle = true; + bool reset_device, device_is_idle = true; struct hl_fpriv *hpriv; struct hl_device *hdev; @@ -404,14 +406,20 @@ static void hpriv_release(struct kref *ref) mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); - /* No need for idle status check if device is going to be reset in any case */ - if (!hdev->reset_upon_device_release && hdev->pdev && !hdev->pldm) + /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending + * reset that waits for device release. + */ + reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active; + + /* Unless device is reset in any case, check idle status and reset if device is not idle */ + if (!reset_device && hdev->pdev && !hdev->pldm) device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); - - if (!device_is_idle) + if (!device_is_idle) { dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n", idle_mask[1], idle_mask[0]); + reset_device = true; + } /* We need to remove the user from the list to make sure the reset process won't * try to kill the user process. Because, if we got here, it means there are no @@ -426,9 +434,10 @@ static void hpriv_release(struct kref *ref) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); - if (!device_is_idle || hdev->reset_upon_device_release) { + if (reset_device) { hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); } else { + /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ int rc = hdev->asic_funcs->scrub_device_mem(hdev); if (rc) @@ -695,6 +704,20 @@ static void device_hard_reset_pending(struct work_struct *work) } } +static void device_release_watchdog_func(struct work_struct *work) +{ + struct hl_device_reset_work *device_release_watchdog_work = + container_of(work, struct hl_device_reset_work, reset_work.work); + struct hl_device *hdev = device_release_watchdog_work->hdev; + u32 flags; + + dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n"); + + flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR; + + hl_device_reset(hdev, flags); +} + /* * device_early_init - do some early initialization for the habanalabs device * @@ -813,11 +836,14 @@ static int device_early_init(struct hl_device *hdev) goto free_cb_mgr; } - INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, - device_hard_reset_pending); + INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); hdev->device_reset_work.hdev = hdev; hdev->device_fini_pending = 0; + INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work, + device_release_watchdog_func); + hdev->device_release_watchdog_work.hdev = hdev; + mutex_init(&hdev->send_cpu_message_lock); mutex_init(&hdev->debug_lock); INIT_LIST_HEAD(&hdev->cs_mirror_list); @@ -1367,8 +1393,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags) { bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false, schedule_hard_reset = false, - skip_wq_flush, delay_reset; + reset_upon_device_release = false, schedule_hard_reset = false, delay_reset, + from_dev_release, from_watchdog_thread; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; int i, rc; @@ -1381,8 +1407,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) hard_reset = !!(flags & HL_DRV_RESET_HARD); from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); - skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE); + from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); delay_reset = !!(flags & HL_DRV_RESET_DELAY); + from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { hard_instead_soft = true; @@ -1439,6 +1466,23 @@ do_reset: spin_unlock(&hdev->reset_info.lock); + /* Cancel the device release watchdog work if required. + * In case of reset-upon-device-release while the release watchdog work is + * scheduled, do hard-reset instead of compute-reset. + */ + if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { + hdev->reset_info.watchdog_active = 0; + if (!from_watchdog_thread) + cancel_delayed_work_sync( + &hdev->device_release_watchdog_work.reset_work); + + if (from_dev_release) { + flags |= HL_DRV_RESET_HARD; + flags &= ~HL_DRV_RESET_DEV_RELEASE; + hard_reset = true; + } + } + if (delay_reset) usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); @@ -1474,7 +1518,7 @@ again: return 0; } - cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush); + cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release); kill_processes: if (hard_reset) { @@ -1735,6 +1779,73 @@ out_err: return rc; } +/* + * hl_device_cond_reset() - conditionally reset the device. + * @hdev: pointer to habanalabs device structure. + * @reset_flags: reset flags. + * @event_mask: events to notify user about. + * + * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device + * unless another reset precedes it. + */ +int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) +{ + struct hl_ctx *ctx = NULL; + + /* Device release watchdog is only for hard reset */ + if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset) + goto device_reset; + + /* F/W reset cannot be postponed */ + if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) + goto device_reset; + + /* Device release watchdog is relevant only if user exists and gets a reset notification */ + if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) { + dev_err(hdev->dev, "Resetting device without a reset indication to user\n"); + goto device_reset; + } + + ctx = hl_get_compute_ctx(hdev); + if (!ctx || !ctx->hpriv->notifier_event.eventfd) + goto device_reset; + + /* Schedule the device release watchdog work unless reset is already in progress or if the + * work is already scheduled. + */ + spin_lock(&hdev->reset_info.lock); + if (hdev->reset_info.in_reset) { + spin_unlock(&hdev->reset_info.lock); + goto device_reset; + } + + if (hdev->reset_info.watchdog_active) + goto out; + + hdev->device_release_watchdog_work.flags = flags; + dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n", + hdev->device_release_watchdog_timeout_sec); + schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, + msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); + hdev->reset_info.watchdog_active = 1; +out: + spin_unlock(&hdev->reset_info.lock); + + hl_notifier_event_send_all(hdev, event_mask); + + hl_ctx_put(ctx); + + return 0; + +device_reset: + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); + if (ctx) + hl_ctx_put(ctx); + + return hl_device_reset(hdev, flags); +} + static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) { mutex_lock(¬ifier_event->lock); @@ -1932,6 +2043,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->asic_funcs->state_dump_init(hdev); + hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; + hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; hl_debugfs_add_device(hdev); @@ -2152,6 +2265,8 @@ void hl_device_fini(struct hl_device *hdev) } } + cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work); + /* Disable PCI access from device F/W so it won't send us additional * interrupts. We disable MSI/MSI-X at the halt_engines function and we * can't have the F/W sending us interrupts after that. We need to diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index c8347eac09ed..bfaaa9daa750 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -191,6 +191,9 @@ enum hl_mmu_enablement { * * - HL_DRV_RESET_DELAY * Set if a delay should be added before the reset + * + * - HL_DRV_RESET_FROM_WD_THR + * Set if the caller is the device release watchdog thread */ #define HL_DRV_RESET_HARD (1 << 0) @@ -201,6 +204,7 @@ enum hl_mmu_enablement { #define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5) #define HL_DRV_RESET_FW_FATAL_ERR (1 << 6) #define HL_DRV_RESET_DELAY (1 << 7) +#define HL_DRV_RESET_FROM_WD_THR (1 << 8) /* * Security @@ -3009,6 +3013,7 @@ struct hl_error_info { * same cause. * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to * complete instead. + * @watchdog_active: true if a device release watchdog work is scheduled. */ struct hl_reset_info { spinlock_t lock; @@ -3019,12 +3024,11 @@ struct hl_reset_info { u8 in_compute_reset; u8 needs_reset; u8 hard_reset_pending; - u8 curr_reset_cause; u8 prev_reset_trigger; u8 reset_trigger_repeated; - u8 skip_reset_on_timeout; + u8 watchdog_active; }; /** @@ -3040,6 +3044,8 @@ struct hl_reset_info { * @dev_ctrl: related kernel device structure for the control device * @work_heartbeat: delayed work for CPU-CP is-alive check. * @device_reset_work: delayed work which performs hard reset + * @device_release_watchdog_work: watchdog work that performs hard reset if user doesn't release + * device upon certain error cases. * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. @@ -3149,6 +3155,7 @@ struct hl_reset_info { * indicates which decoder engines are binned-out * @edma_binning: contains mask of edma engines that is received from the f/w which * indicates which edma engines are binned-out + * @device_release_watchdog_timeout_sec: device release watchdog timeout value in seconds. * @id: device minor. * @id_control: minor of the control device. * @cdev_idx: char device index. Used for setting its name. @@ -3218,6 +3225,7 @@ struct hl_device { struct device *dev_ctrl; struct delayed_work work_heartbeat; struct hl_device_reset_work device_reset_work; + struct hl_device_reset_work device_release_watchdog_work; char asic_name[HL_STR_MAX]; char status[HL_DEV_STS_MAX][HL_STR_MAX]; enum hl_asic_type asic_type; @@ -3312,6 +3320,7 @@ struct hl_device { u32 high_pll; u32 decoder_binning; u32 edma_binning; + u32 device_release_watchdog_timeout_sec; u16 id; u16 id_control; u16 cdev_idx; @@ -3551,6 +3560,7 @@ void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); int hl_device_reset(struct hl_device *hdev, u32 flags); +int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask); void hl_hpriv_get(struct hl_fpriv *hpriv); int hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_utilization(struct hl_device *hdev, u32 *utilization); From 11669b58fa1cee8442ae31ad4ba71398729727b5 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 16:37:41 +0300 Subject: [PATCH 2080/4122] habanalabs: add an option to control watchdog timeout via debugfs Add an option to control the timeout value for the driver's watchdog of the reset process. The timeout represents the amount of the user has to close his process once he gets a device reset notification from the driver. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/debugfs-driver-habanalabs | 7 +++++++ drivers/misc/habanalabs/common/debugfs.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index c915bf17b293..85f6d04f528b 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -91,6 +91,13 @@ Description: Enables the root user to set the device to specific state. Valid values are "disable", "enable", "suspend", "resume". User can read this property to see the valid values +What: /sys/kernel/debug/habanalabs/hl/device_release_watchdog_timeout +Date: Oct 2022 +KernelVersion: 6.2 +Contact: ttayar@habana.ai +Description: The watchdog timeout value in seconds for a device relese upon + certain error cases, after which the device is reset. + What: /sys/kernel/debug/habanalabs/hl/dma_size Date: Apr 2021 KernelVersion: 5.13 diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 48d3ec8b5c82..945c0e6758ca 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1769,6 +1769,11 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_timeout_locked_fops); + debugfs_create_u32("device_release_watchdog_timeout", + 0644, + dev_entry->root, + &hdev->device_release_watchdog_timeout_sec); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, From 5b8873b39c5d4ee93e382389b199d553b38b19f3 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 16:43:47 +0300 Subject: [PATCH 2081/4122] habanalabs/gaudi: use graceful hard reset for F/W events Use graceful hard reset for F/W events on Gaudi device that require a device reset. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 337123f73501..3dfb9ecf7db3 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7942,16 +7942,14 @@ reset_device: reset_required = false; } - /* despite reset doesn't execute. a notification on - * occurred event needs to be sent here - */ - if (event_mask) - hl_notifier_event_send_all(hdev, event_mask); - - if (reset_required) - hl_device_reset(hdev, flags); - else + if (reset_required) { + hl_device_cond_reset(hdev, flags, event_mask); + } else { hl_fw_unmask_irq(hdev, event_type); + /* Notification on occurred event needs to be sent although reset is not executed */ + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); + } } static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) From d1ce7e5ea140bb01d8c6faded09b9264bb83f722 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 16:57:54 +0300 Subject: [PATCH 2082/4122] habanalabs/gaudi2: use graceful hard reset for F/W events Use graceful hard reset for F/W events on Gaudi2 device that require a device reset. While at it, do a small refactor of the checks and function calls, to simplify it and to avoid code duplication. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 27 +++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 9208f69dd7f8..22f5445fe71c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8768,9 +8768,9 @@ static void hl_arc_event_handle(struct hl_device *hdev, static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { - u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY; - struct gaudi2_device *gaudi2 = hdev->asic_specific; bool reset_required = false, skip_reset = false, is_critical = false; + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u32 ctl, reset_flags = HL_DRV_RESET_HARD; int index, sbte_index; u64 event_mask = 0; u16 event_type; @@ -9158,7 +9158,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent event_type); } - if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset) + if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset && + (hdev->hard_reset_on_fw_events || + (hdev->asic_prop.fw_security_enabled && is_critical))) goto reset_device; /* Send unmask irq only for interrupts not classified as MSG */ @@ -9172,22 +9174,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent reset_device: if (hdev->asic_prop.fw_security_enabled && is_critical) { - reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; - - /* notify on device unavailable while the reset triggered by fw */ - event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | - HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); - hl_device_reset(hdev, reset_flags); - } else if (hdev->hard_reset_on_fw_events) { - event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; - hl_device_reset(hdev, reset_flags); + reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; + event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; } else { - if (!gaudi2_irq_map_table[event_type].msg) - hl_fw_unmask_irq(hdev, event_type); + reset_flags |= HL_DRV_RESET_DELAY; } - - if (event_mask) - hl_notifier_event_send_all(hdev, event_mask); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + hl_device_cond_reset(hdev, reset_flags, event_mask); } static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) From 1b363adc7fbe37c4b6c18864c1f7043d85b4af6e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 30 Sep 2022 17:02:19 +0300 Subject: [PATCH 2083/4122] habanalabs: use graceful hard reset for CS timeouts Use graceful hard reset when detecting a CS timeout that requires a device reset. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index fa05770865c6..f1c69c8ed74a 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -798,7 +798,7 @@ out: static void cs_timedout(struct work_struct *work) { struct hl_device *hdev; - u64 event_mask; + u64 event_mask = 0x0; int rc; struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); @@ -830,11 +830,7 @@ static void cs_timedout(struct work_struct *work) if (rc) { hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); hdev->captured_err_info.cs_timeout.seq = cs->sequence; - - event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | - HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; - - hl_notifier_event_send_all(hdev, event_mask); + event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; } switch (cs->type) { @@ -869,8 +865,12 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); - if (device_reset) - hl_device_reset(hdev, HL_DRV_RESET_TDR); + if (device_reset) { + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); + } else if (event_mask) { + hl_notifier_event_send_all(hdev, event_mask); + } } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, From 4a9c6e2cdf2b4128f5204b9cf14e3a788a8511df Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Tue, 18 Oct 2022 17:35:06 +0300 Subject: [PATCH 2084/4122] habanalabs: no consecutive err when user context is enabled Consecutive error protects a device reset loop from being triggered due to h/w issues and enters the device into an unavailable state. When user may cause the error, an unavailable state will prevent the user from running its workloads. The commit prevents entering consecutive state when a user context is enabled. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index bcd959924971..61ddcb1ce508 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1320,6 +1320,10 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) { u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; + /* No consecutive mechanism when user context exists */ + if (hdev->is_compute_ctx_active) + return; + /* * 'reset cause' is being updated here, because getting here * means that it's the 1st time and the last time we're here From 679e968908a4997d02c2a7df294e97b066f9149f Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 20 Sep 2022 11:48:40 +0300 Subject: [PATCH 2085/4122] habanalabs: zero ts registration buff when allocated To avoid memory corruption in kernel memory while using timestamp registration nodes, zero the kernel buff memory when its allocated. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 99b1d6ce26ae..541e1b6a2176 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -2109,7 +2109,7 @@ static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) /* Allocate the internal kernel buffer */ size = num_elements * sizeof(struct hl_user_pending_interrupt); - p = vmalloc(size); + p = vzalloc(size); if (!p) goto free_user_buff; From fc69aa8640f8baf9c1246c17ca858bab9aea98b0 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 24 Oct 2022 01:14:18 +0300 Subject: [PATCH 2086/4122] habanalabs: fix PCIe access to SRAM via debugfs hl_access_sram_dram_region() uses a region base which is set within the hl_set_dram_bar() function. However, for SRAM access this function is not called, and we end up with a wrong value of region base and with a bad calculated address. Fix it by initializing the region base value independently of whether hl_set_dram_bar() is called or not. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 61ddcb1ce508..cb8ecc17bba1 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -69,17 +69,17 @@ int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) { struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; - u64 old_base = 0, rc, new_bar_region_base = 0; + u64 old_base = 0, rc, bar_region_base = region->region_base; void __iomem *acc_addr; if (set_dram_bar) { - old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base); + old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base); if (old_base == U64_MAX) return -EIO; } acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + - (addr - new_bar_region_base); + (addr - bar_region_base); switch (acc_type) { case DEBUGFS_READ8: From bdfef91e7c9c2bae083ce1965f53115d88329773 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 19 Oct 2022 14:05:18 +0300 Subject: [PATCH 2087/4122] habanalabs: add warning print upon a PCI error In order to know if driver catches PCI errors correctly, we need to print a warning per each error. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 714994725224..e82af8989700 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -595,15 +595,16 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) switch (state) { case pci_channel_io_normal: + dev_warn(hdev->dev, "PCI normal state error detected\n"); return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: - dev_warn(hdev->dev, "frozen state error detected\n"); + dev_warn(hdev->dev, "PCI frozen state error detected\n"); result = PCI_ERS_RESULT_NEED_RESET; break; case pci_channel_io_perm_failure: - dev_warn(hdev->dev, "failure state error detected\n"); + dev_warn(hdev->dev, "PCI failure state error detected\n"); result = PCI_ERS_RESULT_DISCONNECT; break; @@ -639,6 +640,10 @@ static void hl_pci_err_resume(struct pci_dev *pdev) */ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) { + struct hl_device *hdev = pci_get_drvdata(pdev); + + dev_warn(hdev->dev, "PCI slot reset detected\n"); + return PCI_ERS_RESULT_RECOVERED; } From 306206985a4bcfc12b45596d56c7bd8ba6f0f6b1 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 26 Oct 2022 18:20:49 +0300 Subject: [PATCH 2088/4122] habanalabs: remove redundant gaudi2_sec asic type As Gaudi2 has a single PCI id, the secured asic type is redundant. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 3 --- drivers/misc/habanalabs/common/habanalabs.h | 2 -- drivers/misc/habanalabs/common/mmu/mmu.c | 1 - drivers/misc/habanalabs/common/sysfs.c | 2 -- drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +- 5 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index cb8ecc17bba1..3ea1ee1ec8ef 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -748,9 +748,6 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); break; - case ASIC_GAUDI2_SEC: - gaudi2_set_asic_funcs(hdev); - strscpy(hdev->asic_name, "GAUDI2 SEC", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index bfaaa9daa750..7d191f388953 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1192,7 +1192,6 @@ struct hl_dec { * @ASIC_GAUDI: Gaudi device (HL-2000). * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). * @ASIC_GAUDI2: Gaudi2 device. - * @ASIC_GAUDI2_SEC: Gaudi2 secured device. */ enum hl_asic_type { ASIC_INVALID, @@ -1200,7 +1199,6 @@ enum hl_asic_type { ASIC_GAUDI, ASIC_GAUDI_SEC, ASIC_GAUDI2, - ASIC_GAUDI2_SEC, }; struct hl_cs_parser; diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 589179f8cd41..67d3e70cf571 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -635,7 +635,6 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; case ASIC_GAUDI2: - case ASIC_GAUDI2_SEC: /* MMUs in Gaudi2 are always host resident */ hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); break; diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 36e9814139d1..c924fc994bd9 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -248,8 +248,6 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI2: str = "GAUDI2"; break; - case ASIC_GAUDI2_SEC: - str = "GAUDI2 SEC"; break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 22f5445fe71c..03f8cf9bb136 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -3969,7 +3969,7 @@ static void gaudi2_init_firmware_loader(struct hl_device *hdev) fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; fw_loader->dram_bar_id = DRAM_BAR_ID; - if (hdev->asic_type == ASIC_GAUDI2 || hdev->asic_type == ASIC_GAUDI2_SEC) + if (hdev->asic_type == ASIC_GAUDI2) fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; else /* ASIC_GAUDI2_FPGA */ fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT; From 841cd2d7658d92e09354640c1887797f0da3d444 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 26 Oct 2022 16:20:45 +0300 Subject: [PATCH 2089/4122] habanalabs/gaudi2: add PCI revision 2 support Add support for Gaudi2 Device with PCI revision 2. Functionality is exactly the same as revision 1, the only difference is device name exposed to user. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 +++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ .../misc/habanalabs/common/habanalabs_drv.c | 26 +++++++++++++------ .../misc/habanalabs/common/habanalabs_ioctl.c | 6 +++-- drivers/misc/habanalabs/common/mmu/mmu.c | 1 + drivers/misc/habanalabs/common/sysfs.c | 2 ++ drivers/misc/habanalabs/gaudi2/gaudi2.c | 6 +---- drivers/misc/habanalabs/gaudi2/gaudi2P.h | 2 -- .../include/hw_ip/pci/pci_general.h | 7 +++++ include/uapi/misc/habanalabs.h | 7 +++++ 10 files changed, 46 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 3ea1ee1ec8ef..35ed494fcfdf 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -748,6 +748,10 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI2B: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); + break; break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 7d191f388953..e391e7951fb7 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1192,6 +1192,7 @@ struct hl_dec { * @ASIC_GAUDI: Gaudi device (HL-2000). * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). * @ASIC_GAUDI2: Gaudi2 device. + * @ASIC_GAUDI2B: Gaudi2B device. */ enum hl_asic_type { ASIC_INVALID, @@ -1199,6 +1200,7 @@ enum hl_asic_type { ASIC_GAUDI, ASIC_GAUDI_SEC, ASIC_GAUDI2, + ASIC_GAUDI2B, }; struct hl_cs_parser; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index e82af8989700..7815c60df54e 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "habanalabs: " fmt #include "habanalabs.h" +#include "../include/hw_ip/pci/pci_general.h" #include #include @@ -74,16 +75,17 @@ MODULE_DEVICE_TABLE(pci, ids); /* * get_asic_type - translate device id to asic type * - * @device: id of the PCI device + * @hdev: pointer to habanalabs device structure. * - * Translate device id to asic type. + * Translate device id and revision id to asic type. * In case of unidentified device, return -1 */ -static enum hl_asic_type get_asic_type(u16 device) +static enum hl_asic_type get_asic_type(struct hl_device *hdev) { - enum hl_asic_type asic_type; + struct pci_dev *pdev = hdev->pdev; + enum hl_asic_type asic_type = ASIC_INVALID; - switch (device) { + switch (pdev->device) { case PCI_IDS_GOYA: asic_type = ASIC_GOYA; break; @@ -94,10 +96,18 @@ static enum hl_asic_type get_asic_type(u16 device) asic_type = ASIC_GAUDI_SEC; break; case PCI_IDS_GAUDI2: - asic_type = ASIC_GAUDI2; + switch (pdev->revision) { + case REV_ID_A: + asic_type = ASIC_GAUDI2; + break; + case REV_ID_B: + asic_type = ASIC_GAUDI2B; + break; + default: + break; + } break; default: - asic_type = ASIC_INVALID; break; } @@ -416,7 +426,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) /* First, we must find out which ASIC are we handling. This is needed * to configure the behavior of the driver (kernel parameters) */ - hdev->asic_type = get_asic_type(pdev->device); + hdev->asic_type = get_asic_type(hdev); if (hdev->asic_type == ASIC_INVALID) { dev_err(&pdev->dev, "Unsupported ASIC\n"); rc = -ENODEV; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 5ce5c42e2731..ee43017eb563 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -10,10 +10,11 @@ #include #include "habanalabs.h" -#include #include -#include +#include +#include #include +#include #include static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { @@ -105,6 +106,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.edma_enabled_mask = prop->edma_enabled_mask; hw_ip.server_type = prop->server_type; hw_ip.security_enabled = prop->fw_security_enabled; + hw_ip.revision_id = hdev->pdev->revision; return copy_to_user(out, &hw_ip, min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 67d3e70cf571..2c1005f74cf4 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -635,6 +635,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; case ASIC_GAUDI2: + case ASIC_GAUDI2B: /* MMUs in Gaudi2 are always host resident */ hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); break; diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index c924fc994bd9..735d8bed0066 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -248,6 +248,8 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI2: str = "GAUDI2"; break; + case ASIC_GAUDI2B: + str = "GAUDI2B"; break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 03f8cf9bb136..f21b68be6d20 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -3968,11 +3968,7 @@ static void gaudi2_init_firmware_loader(struct hl_device *hdev) fw_loader->skip_bmc = false; fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; fw_loader->dram_bar_id = DRAM_BAR_ID; - - if (hdev->asic_type == ASIC_GAUDI2) - fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; - else /* ASIC_GAUDI2_FPGA */ - fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT; + fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; /* here we update initial values for few specific dynamic regs (as * before reading the first descriptor from FW those value has to be diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index a99c348bbf39..b4383c199bbb 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -23,8 +23,6 @@ #define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */ -#define GAUDI2_FPGA_CPU_TIMEOUT 100000000 /* 100s */ - #define NUMBER_OF_PDMA_QUEUES 2 #define NUMBER_OF_EDMA_QUEUES 8 #define NUMBER_OF_MME_QUEUES 4 diff --git a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h index d232081d4e0f..f5d497dc9bdc 100644 --- a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h @@ -20,4 +20,11 @@ #define PCI_CONFIG_ELBI_STS_MASK (PCI_CONFIG_ELBI_STS_ERR | \ PCI_CONFIG_ELBI_STS_DONE) +enum hl_revision_id { + /* PCI revision ID 0 is not legal */ + REV_ID_INVALID = 0x00, + REV_ID_A = 0x01, + REV_ID_B = 0x02, +}; + #endif /* INCLUDE_PCI_GENERAL_H_ */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a4ceee681898..58343998bd63 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -868,6 +868,7 @@ enum hl_server_type { * @number_of_user_interrupts: The number of interrupts that are available to the userspace * application to use. Relevant for Gaudi2 and later. * @device_mem_alloc_default_page_size: default page size used in device memory allocation. + * @revision_id: PCI revision ID of the ASIC. */ struct hl_info_hw_ip_info { __u64 sram_base_address; @@ -898,6 +899,12 @@ struct hl_info_hw_ip_info { __u16 pad2; __u64 reserved4; __u64 device_mem_alloc_default_page_size; + __u64 reserved5; + __u64 reserved6; + __u32 reserved7; + __u8 reserved8; + __u8 revision_id; + __u8 pad[2]; }; struct hl_info_dram_usage { From cb5fb665f30388cf8cb9becae86dcb84ace0ca88 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Sun, 30 Oct 2022 13:08:37 +0200 Subject: [PATCH 2090/4122] habanalabs/gaudi: add razwi notify event Each time razwi (read-only zero, write ignore) happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 8 +++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 37 +++++++++++---------- include/uapi/misc/habanalabs.h | 2 ++ 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 35ed494fcfdf..d1a609589558 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2409,6 +2409,14 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ num_of_engines * sizeof(u16)); hdev->captured_err_info.razwi.flags = flags; } + +void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, + u8 flags, u64 *event_mask) +{ + hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); + *event_mask |= HL_NOTIFIER_EVENT_RAZWI; +} + static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) { struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e391e7951fb7..d9335f3769b8 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3812,6 +3812,8 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, u8 flags); +void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, + u8 flags, u64 *event_mask); void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 3dfb9ecf7db3..035865cb097c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7301,7 +7301,7 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e } static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, - bool razwi) + bool razwi, u64 *event_mask) { bool is_read = false, is_write = false; u16 engine_id[2], num_of_razwi_eng = 0; @@ -7337,7 +7337,8 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, num_of_razwi_eng = 1; } - hl_capture_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags); + hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags, + event_mask); } } @@ -7675,7 +7676,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: case GAUDI_EVENT_MMU_DERR: case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; @@ -7685,7 +7686,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_AXI_ECC: case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; @@ -7694,7 +7695,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_HBM1_SPI_0: case GAUDI_EVENT_HBM2_SPI_0: case GAUDI_EVENT_HBM3_SPI_0: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); @@ -7706,7 +7707,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_HBM1_SPI_1: case GAUDI_EVENT_HBM2_SPI_1: case GAUDI_EVENT_HBM3_SPI_1: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); @@ -7728,7 +7729,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr * if the event is a TPC Assertion or a "real" TPC DEC. */ event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); reset_required = gaudi_tpc_read_interrupts(hdev, tpc_dec_event_to_tpc_id(event_type), "AXI_SLV_DEC_Error"); @@ -7753,7 +7754,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_TPC5_KRN_ERR: case GAUDI_EVENT_TPC6_KRN_ERR: case GAUDI_EVENT_TPC7_KRN_ERR: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); reset_required = gaudi_tpc_read_interrupts(hdev, tpc_krn_event_to_tpc_id(event_type), "KRN_ERR"); @@ -7792,7 +7793,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: fallthrough; case GAUDI_EVENT_MMU_SERR: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); hl_fw_unmask_irq(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; @@ -7802,14 +7803,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_CPU_AXI_SPLITTER: case GAUDI_EVENT_PSOC_AXI_DEC: case GAUDI_EVENT_PSOC_PRSTN_FALL: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); hl_fw_unmask_irq(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI_EVENT_MMU_PAGE_FAULT: case GAUDI_EVENT_MMU_WR_PERM: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); hl_fw_unmask_irq(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -7838,14 +7839,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_NIC4_QM1: case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); gaudi_handle_qman_err(hdev, event_type, &event_mask); hl_fw_unmask_irq(hdev, event_type); event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); break; case GAUDI_EVENT_RAZWI_OR_ADC_SW: - gaudi_print_irq_info(hdev, event_type, true); + gaudi_print_irq_info(hdev, event_type, true, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; goto reset_device; @@ -7858,7 +7859,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_TPC6_BMON_SPMU: case GAUDI_EVENT_TPC7_BMON_SPMU: case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); hl_fw_unmask_irq(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -7870,7 +7871,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr break; case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_print_sm_sei_info(hdev, event_type, &eq_entry->sm_sei_data); rc = hl_state_dump(hdev); @@ -7899,18 +7900,18 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr break; case GAUDI_EVENT_DEV_RESET_REQ: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_FW_ALIVE_S: - gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_irq_info(hdev, event_type, false, &event_mask); gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 58343998bd63..7747e19e81fe 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -721,6 +721,7 @@ enum hl_server_type { * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error + * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) @@ -729,6 +730,7 @@ enum hl_server_type { #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) +#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) /* Opcode for management ioctl * From cd21701cde33123fc53c6401192219ba14832da3 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 27 Oct 2022 20:38:26 +0300 Subject: [PATCH 2091/4122] habanalabs: use single threaded WQ for event handling Creating event queue workqueue using alloc_workqueue made it run in multi threaded mode, which caused parallel dumping of events as well as parallel events notifying to user, causing logs with multiple events to be out of order. Fixed by creating event queue workqueue as single threaded work queue. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index d1a609589558..65bb40f81901 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -787,7 +787,7 @@ static int device_early_init(struct hl_device *hdev) } } - hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); + hdev->eq_wq = create_singlethread_workqueue("hl-events"); if (hdev->eq_wq == NULL) { dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); rc = -ENOMEM; From aff6354afd1f9eae1e10658c157c26e316806f56 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 31 Oct 2022 11:44:45 +0200 Subject: [PATCH 2092/4122] habanalabs/gaudi: add page fault notify event Each time page fault happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 6 +++--- include/uapi/misc/habanalabs.h | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 65bb40f81901..31818121ef4d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2490,3 +2490,12 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id; hl_capture_user_mappings(hdev, is_pmmu); } + +void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, + u64 *event_mask) +{ + hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); + + if (event_mask) + *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d9335f3769b8..0781b8698f74 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3815,6 +3815,8 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, u8 flags, u64 *event_mask); void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); +void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, + u64 *event_mask); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 035865cb097c..cbe1daf5a793 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6740,7 +6740,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_i } } -static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr) +static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) { struct gaudi_device *gaudi = hdev->asic_specific; u32 val; @@ -6755,7 +6755,7 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); - hl_capture_page_fault(hdev, *addr, 0, true); + hl_handle_page_fault(hdev, *addr, 0, true, event_mask); WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); } @@ -7323,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, if (razwi) { gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, &is_write); - gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr); + gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); if (is_read) razwi_flags |= HL_RAZWI_READ; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7747e19e81fe..e50cb71df081 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -722,6 +722,7 @@ enum hl_server_type { * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened + * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) @@ -731,6 +732,7 @@ enum hl_server_type { #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) #define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) +#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) /* Opcode for management ioctl * From 91bd822448e57a55d12dc0461909b5c585485a6c Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 30 Oct 2022 15:10:13 +0200 Subject: [PATCH 2093/4122] habanalabs/gaudi2: implement fp32 not supported event Due to binning, Gaudi2 does not always support fp32. We add support for such an event in case fp32 is used by the user in such a device. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 5 +++++ drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h | 1 + .../include/gaudi2/gaudi2_async_ids_map_extended.h | 4 +++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index f21b68be6d20..77bdbab41e6c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9148,6 +9148,11 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; + case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + is_critical = true; + break; + default: if (gaudi2_irq_map_table[event_type].valid) dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h index 34406770a76a..305b576222e6 100644 --- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h @@ -957,6 +957,7 @@ enum gaudi2_async_event_id { GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0 = 1317, GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318, GAUDI2_EVENT_ARC_DCCM_FULL = 1319, + GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320, GAUDI2_EVENT_SIZE, }; diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h index 5bd4383c9f2c..d510cb10c883 100644 --- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2018-2021 HabanaLabs, Ltd. + * Copyright 2018-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -2663,6 +2663,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .msg = 1, .reset = 0, .name = "STATUS_NIC11_ENG1" }, { .fc_id = 1319, .cpu_id = 625, .valid = 1, .msg = 1, .reset = 0, .name = "ARC_DCCM_FULL" }, + { .fc_id = 1320, .cpu_id = 626, .valid = 1, + .msg = 1, .reset = 1, .name = "FP32_NOT_SUPPORTED" }, }; #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */ From 413bdb176eaa7d02c979a3c738738aea91fe6ed7 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Sun, 30 Oct 2022 14:46:19 +0200 Subject: [PATCH 2094/4122] habanalabs/gaudi2: add razwi notify event Each time razwi (read-only zero, write ignored) event happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 +- drivers/misc/habanalabs/gaudi2/gaudi2.c | 140 +++++++++++++----------- 2 files changed, 82 insertions(+), 62 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 31818121ef4d..708db0f48ee0 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2414,7 +2414,9 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o u8 flags, u64 *event_mask) { hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); - *event_mask |= HL_NOTIFIER_EVENT_RAZWI; + + if (event_mask) + *event_mask |= HL_NOTIFIER_EVENT_RAZWI; } static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 77bdbab41e6c..59940c8df2d2 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -7063,7 +7063,7 @@ static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *q static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, u64 rtr_mstr_if_base_addr, bool is_write, char *name, bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, - enum gaudi2_engine_id id) + enum gaudi2_engine_id id, u64 *event_mask) { u32 razwi_hi, razwi_lo, razwi_xy; u16 eng_id = id; @@ -7093,8 +7093,8 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, rd_wr_flag = HL_RAZWI_READ; } - hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, - rd_wr_flag | HL_RAZWI_HBW); + hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, + rd_wr_flag | HL_RAZWI_HBW, event_mask); dev_err_ratelimited(hdev->dev, "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", @@ -7104,7 +7104,7 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, u64 rtr_mstr_if_base_addr, bool is_write, char *name, bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info, - enum gaudi2_engine_id id) + enum gaudi2_engine_id id, u64 *event_mask) { u32 razwi_addr, razwi_xy; u16 eng_id = id; @@ -7132,7 +7132,7 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, rd_wr_flag = HL_RAZWI_READ; } - hl_capture_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW); + hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); dev_err_ratelimited(hdev->dev, "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n", name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, @@ -7189,7 +7189,8 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, */ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, enum razwi_event_sources module, u8 module_idx, - u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info) + u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info, + u64 *event_mask) { bool via_sft = false, read_razwi_regs = false; u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id; @@ -7330,7 +7331,7 @@ dump_info: if (hbw_shrd_aw) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, initiator_name, read_razwi_regs, razwi_info, - eng_id); + eng_id, event_mask); /* Clear event indication */ if (read_razwi_regs) @@ -7340,7 +7341,7 @@ dump_info: if (hbw_shrd_ar) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, initiator_name, read_razwi_regs, razwi_info, - eng_id); + eng_id, event_mask); /* Clear event indication */ if (read_razwi_regs) @@ -7350,7 +7351,7 @@ dump_info: if (lbw_shrd_aw) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true, initiator_name, read_razwi_regs, razwi_info, - eng_id); + eng_id, event_mask); /* Clear event indication */ if (read_razwi_regs) @@ -7360,7 +7361,7 @@ dump_info: if (lbw_shrd_ar) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false, initiator_name, read_razwi_regs, razwi_info, - eng_id); + eng_id, event_mask); /* Clear event indication */ if (read_razwi_regs) @@ -7376,38 +7377,42 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) /* check all TPCs */ for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { if (prop->tpc_enabled_mask & BIT(mod_idx)) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL, + NULL); } /* check all MMEs */ for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, - sub_mod, NULL); + sub_mod, NULL, NULL); /* check all EDMAs */ for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) if (prop->edma_enabled_mask & BIT(mod_idx)) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL, + NULL); /* check all PDMAs */ for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL, + NULL); /* check all NICs */ for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) if (hdev->nic_ports_mask & BIT(mod_idx)) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, - NULL); + NULL, NULL); /* check all DECs */ for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) if (prop->decoder_enabled_mask & BIT(mod_idx)) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL, + NULL); /* check all ROTs */ for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL); } static const char *gaudi2_get_initiators_name(u32 rtr_id) @@ -7625,7 +7630,8 @@ static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines) } static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, - u64 rtr_ctrl_base_addr, bool is_write) + u64 rtr_ctrl_base_addr, bool is_write, + u64 *event_mask) { u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; u32 razwi_hi, razwi_lo; @@ -7649,8 +7655,8 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); } - hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, - rd_wr_flag | HL_RAZWI_HBW); + hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng, + rd_wr_flag | HL_RAZWI_HBW, event_mask); dev_err_ratelimited(hdev->dev, "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); @@ -7660,7 +7666,8 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u } static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, - u64 rtr_ctrl_base_addr, bool is_write) + u64 rtr_ctrl_base_addr, bool is_write, + u64 *event_mask) { u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng; u32 razwi_addr; @@ -7682,7 +7689,8 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); } - hl_capture_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW); + hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW, + event_mask); dev_err_ratelimited(hdev->dev, "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", is_write ? "WR" : "RD", rtr_id, razwi_addr); @@ -7692,7 +7700,7 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u } /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ -static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) +static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) { u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy, razwi_mask_info, razwi_intr = 0; @@ -7746,19 +7754,19 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) if (hbw_aw_set) gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, true); + rtr_ctrl_base_addr, true, event_mask); if (hbw_ar_set) gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, false); + rtr_ctrl_base_addr, false, event_mask); if (lbw_aw_set) gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, true); + rtr_ctrl_base_addr, true, event_mask); if (lbw_ar_set) gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, - rtr_ctrl_base_addr, false); + rtr_ctrl_base_addr, false, event_mask); clear: /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ @@ -7784,7 +7792,7 @@ static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base) } static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, - struct hl_eq_razwi_info *razwi_info) + struct hl_eq_razwi_info *razwi_info, u64 *event_mask) { enum razwi_event_sources module; u64 qman_base; @@ -7837,7 +7845,7 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, /* check if RAZWI happened */ if (razwi_info) - gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info); + gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask); } static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type) @@ -8003,7 +8011,8 @@ static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev) } static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, - struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause) + struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, + u64 *event_mask) { u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); int i; @@ -8015,11 +8024,12 @@ static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, - &razwi_with_intr_cause->razwi_info); + &razwi_with_intr_cause->razwi_info, event_mask); } static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name, - struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause) + struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, + u64 *event_mask) { u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); int i; @@ -8031,11 +8041,11 @@ static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, - &razwi_with_intr_cause->razwi_info); + &razwi_with_intr_cause->razwi_info, event_mask); } static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name, - struct hl_eq_razwi_info *razwi_info) + struct hl_eq_razwi_info *razwi_info, u64 *event_mask) { u32 sts_addr, sts_val, sts_clr_val = 0; int i; @@ -8061,14 +8071,15 @@ static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const ch } /* check if RAZWI happened */ - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info, + event_mask); /* Write 1 clear errors */ WREG32(sts_addr, sts_clr_val); } static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name, - struct hl_eq_razwi_info *razwi_info) + struct hl_eq_razwi_info *razwi_info, u64 *event_mask) { u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0; int i; @@ -8088,7 +8099,8 @@ static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const ch /* check if RAZWI happened */ for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info, + event_mask); WREG32(sts_clr_addr, sts_clr_val); } @@ -8105,7 +8117,7 @@ static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8 } static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, - struct hl_eq_razwi_info *razwi_info) + struct hl_eq_razwi_info *razwi_info, u64 *event_mask) { u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0; int i; @@ -8125,8 +8137,10 @@ static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, } /* check if RAZWI happened on WAP0/1 */ - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info); - gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info, + event_mask); + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info, + event_mask); WREG32(sts_clr_addr, sts_clr_val); } @@ -8156,40 +8170,41 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_ gaudi2_dma_core_interrupts_cause[i]); } -static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev) +static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) { u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, - NULL, GAUDI2_ENGINE_ID_PCIE); + NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, - NULL, GAUDI2_ENGINE_ID_PCIE); + NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, - NULL, GAUDI2_ENGINE_ID_PCIE); + NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); WREG32(razwi_happened_addr, 0x1); } razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; if (RREG32(razwi_happened_addr)) { gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, - NULL, GAUDI2_ENGINE_ID_PCIE); + NULL, GAUDI2_ENGINE_ID_PCIE, event_mask); WREG32(razwi_happened_addr, 0x1); } } -static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data) +static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data, + u64 *event_mask) { int i; @@ -8204,7 +8219,7 @@ static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cau case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: break; case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: - gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev); + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); break; } } @@ -8818,29 +8833,30 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; - gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info); + gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; - gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause); - gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause, &event_mask); + gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP", - &eq_entry->razwi_with_intr_cause); - gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + &eq_entry->razwi_with_intr_cause, &event_mask); + gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; - gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info); + gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info, + &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8871,7 +8887,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_TPC24_KERNEL_ERR: index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); - gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause); + gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause, + &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8887,7 +8904,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_DEC9_SPI: index = (event_type - GAUDI2_EVENT_DEC0_SPI) / (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); - gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info); + gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8899,8 +8916,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); gaudi2_handle_mme_err(hdev, index, - "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info); - gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info, &event_mask); + gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8911,7 +8928,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - GAUDI2_EVENT_MME0_QMAN_SW_ERROR); - gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info); + gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info, + &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8922,7 +8940,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); - gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info); + gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -8941,7 +8959,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: gaudi2_print_pcie_addr_dec_info(hdev, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; @@ -8970,7 +8988,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: - gaudi2_ack_psoc_razwi_event_handler(hdev); + gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; From 3daa64eea1fb219c8cfb3bb6948dc2993652e201 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Thu, 22 Sep 2022 14:24:35 +0300 Subject: [PATCH 2095/4122] habanalabs: fix firmware descriptor copy operation This is needed to allow adding more data to the lkd_fw_comms_desc structure. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 28 +++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index f18e53bbba6b..01c4ffba6e97 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -12,6 +12,7 @@ #include #include #include +#include #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ @@ -1988,10 +1989,11 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev, struct fw_load_mgr *fw_loader) { struct lkd_fw_comms_desc *fw_desc; + void __iomem *src, *temp_fw_desc; struct pci_mem_region *region; struct fw_response *response; + u16 fw_data_size; enum pci_region region_id; - void __iomem *src; int rc; fw_desc = &fw_loader->dynamic_loader.comm_desc; @@ -2018,9 +2020,29 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev, fw_loader->dynamic_loader.fw_desc_valid = false; src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + response->ram_offset; - memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc)); - return hl_fw_dynamic_validate_descriptor(hdev, fw_loader, fw_desc); + /* + * We do the copy of the fw descriptor in 2 phases: + * 1. copy the header + data info according to our lkd_fw_comms_desc definition. + * then we're able to read the actual data size provided by fw. + * this is needed for cases where data in descriptor was changed(add/remove) + * in embedded specs header file before updating lkd copy of the header file + * 2. copy descriptor to temporary buffer with aligned size and send it to validation + */ + memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc)); + fw_data_size = le16_to_cpu(fw_desc->header.size); + + temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size); + if (!temp_fw_desc) + return -ENOMEM; + + memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size); + + rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader, + (struct lkd_fw_comms_desc *) temp_fw_desc); + vfree(temp_fw_desc); + + return rc; } /** From b829e01025f8936bb85bdc39cbd1faddcca290d0 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 6 Nov 2022 09:26:01 +0200 Subject: [PATCH 2096/4122] habanalabs: skip events info ioctl if not supported Some ASICs haven't yet implemented this functionality and so the ioctl call should fail and the user should be notified of the reason. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index ee43017eb563..b6abfa7761a7 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -123,6 +123,10 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate, return -EINVAL; arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size); + if (!arr) { + dev_err(hdev->dev, "Events info not supported\n"); + return -EOPNOTSUPP; + } return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; } From a63de89bee7ff01dc184fbe289eade5b5ab5f49a Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 6 Nov 2022 12:07:03 +0200 Subject: [PATCH 2097/4122] habanalabs/gaudi2: classify power/thermal events as info As power and thermal envelope events are pure informative and not indicating an error, we reduce the print level to info only. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 59940c8df2d2..61960fa059e0 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -6828,6 +6828,7 @@ static inline bool is_info_event(u32 event) { switch (event) { case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: + case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: return true; default: return false; From d3027f4a625063c18becd6953b4a2a273033b071 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 31 Oct 2022 23:04:14 +0200 Subject: [PATCH 2098/4122] habanalabs/gaudi2: add page fault notify event Each time page fault happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 61960fa059e0..65c9b535aa69 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8253,7 +8253,8 @@ static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 } } -static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) +static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, + u64 *event_mask) { u32 valid, val; u64 addr; @@ -8270,7 +8271,7 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", is_pmmu ? "PMMU" : "HMMU", addr); - hl_capture_page_fault(hdev, addr, 0, is_pmmu); + hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0); } @@ -8296,7 +8297,7 @@ static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, boo } static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name, - u64 mmu_base, bool is_pmmu) + u64 mmu_base, bool is_pmmu, u64 *event_mask) { u32 spi_sei_cause, interrupt_clr = 0x0; int i; @@ -8309,7 +8310,7 @@ static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char mmu_name, gaudi2_mmu_spi_sei[i].cause); if (i == 0) - gaudi2_handle_page_error(hdev, mmu_base, is_pmmu); + gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); else if (i == 1) gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); @@ -8381,7 +8382,7 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index) return reset; } -static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type) +static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) { bool is_pmmu = false; char desc[32]; @@ -8439,7 +8440,7 @@ static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type return; } - gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu); + gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu, event_mask); } @@ -8969,7 +8970,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: - gaudi2_handle_mmu_spi_sei_err(hdev, event_type); + gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -10206,7 +10207,7 @@ static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) return; - gaudi2_handle_page_error(hdev, mmu_base, is_pmmu); + gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); } From 5f8981d699ed33017ff2212ec17f6cde89212756 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 7 Nov 2022 16:20:03 +0200 Subject: [PATCH 2099/4122] habanalabs: fix print for out-of-sync and pkt-failure events Add missing le32_to_cpu() conversions, and use %d for the value returned from atomic_read(). Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++-- drivers/misc/habanalabs/gaudi2/gaudi2.c | 8 ++++---- drivers/misc/habanalabs/goya/goya.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index cbe1daf5a793..7b93f0d26dd0 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7347,8 +7347,8 @@ static void gaudi_print_out_of_sync_info(struct hl_device *hdev, { struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; - dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", - sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } static void gaudi_print_fw_alive_info(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 65c9b535aa69..bdb5782afb7e 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8684,8 +8684,8 @@ static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, { struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; - dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", - sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev) @@ -8751,8 +8751,8 @@ static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; dev_warn(hdev->dev, - "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", - sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); + "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } static void hl_arc_event_handle(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5ef9e3ca97a6..0f083fcf81a6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4475,8 +4475,8 @@ static void goya_print_out_of_sync_info(struct hl_device *hdev, { struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; - dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", - sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", + le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); } static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, From fe3e88c9470ceb2ea67651aa397f29e80453eed1 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 7 Nov 2022 16:34:32 +0200 Subject: [PATCH 2100/4122] habanalabs/gaudi: fix print for firmware-alive event Add missing le{32,64}_to_cpu conversions. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 7b93f0d26dd0..9f5e208701ba 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7356,9 +7356,10 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, { dev_err(hdev->dev, "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", - (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? - "Minor" : "Critical", fw_alive->process_id, - fw_alive->thread_id, fw_alive->uptime_seconds); + (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", + le32_to_cpu(fw_alive->process_id), + le32_to_cpu(fw_alive->thread_id), + le64_to_cpu(fw_alive->uptime_seconds)); } static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, From 24c983c88f5e7865de972e3b395baf2c237485ca Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 8 Nov 2022 13:23:17 +0200 Subject: [PATCH 2101/4122] habanalabs/gaudi2: remove redundant firmware version check Firmware 1.7 is the first official firmware, so no need to check if we are running a version below it. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index bdb5782afb7e..36f0ea1100bb 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -10358,10 +10358,9 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open) { struct gaudi2_device *gaudi2 = hdev->asic_specific; - if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37) + if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - /* TODO: add check for FW version using minor ver once it's known */ return hl_fw_send_device_activity(hdev, open); } From 2c77ec14c2db228f76a74e9123aecbb5b8c994f5 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 20 Oct 2022 12:05:09 +0300 Subject: [PATCH 2102/4122] habanalabs/gaudi2: don't enable entries in the MSIX_GW table User should use the virtual MSI-X doorbell to generate interrupts from the device, so there is no need to enable entries in the MSIX_GW table. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 26 ------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 36f0ea1100bb..d5efec347bc1 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -4695,30 +4695,6 @@ static void gaudi2_init_dec(struct hl_device *hdev) } } -static void gaudi2_init_msix_gw_table(struct hl_device *hdev) -{ - u32 first_reg_offset, last_reg_offset, msix_gw_table_base; - u8 first_bit, last_bit; - int i; - - msix_gw_table_base = mmPCIE_WRAP_MSIX_GW_TABLE_0; - first_reg_offset = (GAUDI2_IRQ_NUM_USER_FIRST >> 5) << 2; - first_bit = GAUDI2_IRQ_NUM_USER_FIRST % 32; - last_reg_offset = (GAUDI2_IRQ_NUM_USER_LAST >> 5) << 2; - last_bit = GAUDI2_IRQ_NUM_USER_LAST % 32; - - if (first_reg_offset == last_reg_offset) { - WREG32(msix_gw_table_base + first_reg_offset, GENMASK(last_bit, first_bit)); - return; - } - - WREG32(msix_gw_table_base + first_reg_offset, GENMASK(31, first_bit)); - WREG32(msix_gw_table_base + last_reg_offset, GENMASK(last_bit, 0)); - - for (i = first_reg_offset + 4; i < last_reg_offset ; i += 4) - WREG32(msix_gw_table_base + i, 0xFFFFFFFF); -} - static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 stlb_base, u32 asid, u64 phys_addr) { @@ -5232,8 +5208,6 @@ static int gaudi2_hw_init(struct hl_device *hdev) return rc; } - gaudi2_init_msix_gw_table(hdev); - gaudi2_init_scrambler_hbm(hdev); gaudi2_init_kdma(hdev); From 9c604af0c9d4efe4f308761229186768b3f3a6a9 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 20 Oct 2022 14:40:16 +0300 Subject: [PATCH 2103/4122] habanalabs/gaudi2: return to reset upon SM SEI BRESP error Due to a H/W issue in the LBW path to the PCIE_DBI MSI-X doorbell, there were false sporadic error responses in SM when it was configured to write to there, and hence no reset was done as part of handling the relevant event. Now that the virtual MSI-X doorbell is used, such errors in SM are not expected and reset shouldn't be skipped. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index d5efec347bc1..f0f2f77f56de 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8300,11 +8300,10 @@ static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); } -static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index) +static void gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index) { u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log; u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index; - bool reset = true; int i; sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; @@ -8329,10 +8328,6 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index) gaudi2_sm_sei_cause[i].cause_name, gaudi2_sm_sei_cause[i].log_name, sei_cause_log & gaudi2_sm_sei_cause[i].log_mask); - - /* Due to a potential H/W issue, do not reset upon BRESP errors */ - if (i == 2) - reset = false; break; } @@ -8352,8 +8347,6 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index) /* Clear CQ_INTR */ WREG32(cq_intr_addr, 0); } - - return reset; } static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) @@ -8755,8 +8748,8 @@ static void hl_arc_event_handle(struct hl_device *hdev, static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { - bool reset_required = false, skip_reset = false, is_critical = false; struct gaudi2_device *gaudi2 = hdev->asic_specific; + bool reset_required = false, is_critical = false; u32 ctl, reset_flags = HL_DRV_RESET_HARD; int index, sbte_index; u64 event_mask = 0; @@ -9113,7 +9106,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; - skip_reset = !gaudi2_handle_sm_err(hdev, index); + gaudi2_handle_sm_err(hdev, index); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -9153,9 +9146,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent event_type); } - if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset && - (hdev->hard_reset_on_fw_events || - (hdev->asic_prop.fw_security_enabled && is_critical))) + if ((gaudi2_irq_map_table[event_type].reset || reset_required) && + (hdev->hard_reset_on_fw_events || + (hdev->asic_prop.fw_security_enabled && is_critical))) goto reset_device; /* Send unmask irq only for interrupts not classified as MSG */ From bc8e4bae70237f4671e07f83bcfb726eb14d86ed Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 9 Nov 2022 18:08:38 +0200 Subject: [PATCH 2104/4122] habanalabs: reset device if still in use when released If the device file is released while a context is still held, it won't be possible to reopen it until the context is eventually released. If that doesn't happen, only a device reset will revert it back to an operational state, i.e. need to wait for a CS timeout or an error, or to wait for an external intervention of injecting a reset via sysfs. At this stage, after the device was released by user, context is held either because of CS which were left running on the device and are not relevant anymore, or due to missing cleanup steps from user side. All of this is in any case handled in the device reset flow, so initiate the reset at this point instead of waiting for it. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 708db0f48ee0..49640c8ca910 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -504,9 +504,10 @@ static int hl_device_release(struct inode *inode, struct file *filp) hdev->compute_ctx_in_release = 1; - if (!hl_hpriv_put(hpriv)) - dev_notice(hdev->dev, - "User process closed FD but device still in use\n"); + if (!hl_hpriv_put(hpriv)) { + dev_notice(hdev->dev, "User process closed FD but device still in use\n"); + hl_device_reset(hdev, HL_DRV_RESET_HARD); + } hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; From f69c3e460a614cba8939f7c623f7b77f0bcb3584 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 10 Nov 2022 17:05:24 +0200 Subject: [PATCH 2105/4122] habanalabs: check schedule_hard_reset correctly schedule_hard_reset can be true only if we didn't do hard-reset. Therefore, no point of checking it in case hard_reset is true. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/common/device.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 49640c8ca910..0650e511a0f5 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1737,18 +1737,19 @@ kill_processes: * the device will be operational although it shouldn't be */ hdev->asic_funcs->enable_events_from_fw(hdev); - } else if (!reset_upon_device_release) { - hdev->reset_info.compute_reset_cnt++; - } + } else { + if (!reset_upon_device_release) + hdev->reset_info.compute_reset_cnt++; - if (schedule_hard_reset) { - dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); - flags = hdev->reset_info.hard_reset_schedule_flags; - hdev->reset_info.hard_reset_schedule_flags = 0; - hdev->disabled = true; - hard_reset = true; - handle_reset_trigger(hdev, flags); - goto again; + if (schedule_hard_reset) { + dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); + flags = hdev->reset_info.hard_reset_schedule_flags; + hdev->reset_info.hard_reset_schedule_flags = 0; + hdev->disabled = true; + hard_reset = true; + handle_reset_trigger(hdev, flags); + goto again; + } } return 0; From b585daa89d572210a94c7f11a746bd5489017003 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 10 Nov 2022 17:24:02 +0200 Subject: [PATCH 2106/4122] habanalabs: extend process wait timeout in device fine Processes that use our device are likely to use at the same time other devices such as remote storage. In case our device is removed and a user process is still using the device, we need to kill the user process. However, if that process has a thread waiting for i/o to complete on remote storage, for example, the process won't terminate. Let's give it enough time to terminate before giving up. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/common/device.c | 6 ++++-- drivers/misc/habanalabs/common/habanalabs.h | 11 ++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0650e511a0f5..63d0cb7087e8 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2300,14 +2300,16 @@ void hl_device_fini(struct hl_device *hdev) */ dev_info(hdev->dev, "Waiting for all processes to exit (timeout of %u seconds)", - HL_PENDING_RESET_LONG_SEC); + HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI); - rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false); + hdev->process_kill_trial_cnt = 0; + rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); device_disable_open_processes(hdev, false); } + hdev->process_kill_trial_cnt = 0; rc = device_kill_open_processes(hdev, 0, true); if (rc) { dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 0781b8698f74..e7f89868428d 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -50,9 +50,14 @@ struct hl_fpriv; #define HL_MMAP_OFFSET_VALUE_MASK (0x1FFFFFFFFFFFull >> PAGE_SHIFT) #define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK) -#define HL_PENDING_RESET_PER_SEC 10 -#define HL_PENDING_RESET_MAX_TRIALS 60 /* 10 minutes */ -#define HL_PENDING_RESET_LONG_SEC 60 +#define HL_PENDING_RESET_PER_SEC 10 +#define HL_PENDING_RESET_MAX_TRIALS 60 /* 10 minutes */ +#define HL_PENDING_RESET_LONG_SEC 60 +/* + * In device fini, wait 10 minutes for user processes to be terminated after we kill them. + * This is needed to prevent situation of clearing resources while user processes are still alive. + */ +#define HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI 600 #define HL_HARD_RESET_MAX_TIMEOUT 120 #define HL_PLDM_HARD_RESET_MAX_TIMEOUT (HL_HARD_RESET_MAX_TIMEOUT * 3) From 18cd948204fffa61660d3f8454fc9d275c1f6c94 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 8 Nov 2022 15:24:33 +0200 Subject: [PATCH 2107/4122] habanalabs/gaudi2: change memory scrub mechanism Currently the scrubbing mechanism used the EDMA engines by directly setting the engine core registers to scrub a chunk of memory. Due to a sporadic failure with this mechanism, it was decided to initiate the engines via its QMAN using LIN-DMA packets. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 135 +++++++++++++++--------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index f0f2f77f56de..c14e63164a84 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9171,34 +9171,74 @@ reset_device: hl_device_cond_reset(hdev, reset_flags, event_mask); } -static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) +static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, + struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, + u32 hw_queue_id, u32 size, u64 addr, u32 val) { - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 comp_addr, cur_addr = addr, end_addr = addr + size; - u32 chunk_size, busy, dcore, edma_idx, sob_offset, sob_addr, comp_val, edma_commit; - u32 old_mmubp, mmubp; + u32 ctl, pkt_size; int rc = 0; - sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; - sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; - comp_addr = CFG_BASE + sob_addr; - comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | - FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); + ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); + ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); + ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); + ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); - edma_commit = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | - FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1) | - FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); - mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | - FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); + lin_dma_pkt->ctl = cpu_to_le32(ctl); + lin_dma_pkt->src_addr = cpu_to_le64(val); + lin_dma_pkt->dst_addr = cpu_to_le64(addr); + lin_dma_pkt->tsize = cpu_to_le32(size); + + pkt_size = sizeof(struct packet_lin_dma); + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); + if (rc) + dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", + hw_queue_id); + + return rc; +} + +static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) +{ + u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; + u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, + old_mmubp, mmubp, num_of_pkts, busy, pkt_size; + u64 comp_addr, cur_addr = addr, end_addr = addr + size; + struct asic_fixed_properties *prop = &hdev->asic_prop; + void *lin_dma_pkts_arr; + dma_addr_t pkt_dma_addr; + int rc = 0, dma_num = 0; if (prop->edma_enabled_mask == 0) { dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); return -EIO; } + sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; + sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; + comp_addr = CFG_BASE + sob_addr; + comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); + mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | + FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); + + /* Calculate how many lin dma pkts we'll need */ + num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); + pkt_size = sizeof(struct packet_lin_dma); + + lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, + &pkt_dma_addr, GFP_KERNEL); + if (!lin_dma_pkts_arr) + return -ENOMEM; + /* * set mmu bypass for the scrubbing - all ddmas are configured the same so save * only the first one to restore later + * also set the sob addr for all edma cores for completion. + * set QM as trusted to allow it to access physical address with MMU bp. */ old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { @@ -9211,17 +9251,22 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, mmubp); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, + lower_32_bits(comp_addr)); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, + upper_32_bits(comp_addr)); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, + comp_val); + gaudi2_qman_set_test_mode(hdev, + edma_queues_id[dcore] + 4 * edma_idx, true); } } - while (cur_addr < end_addr) { - int dma_num = 0; + WREG32(sob_addr, 0); - WREG32(sob_addr, 0); + while (cur_addr < end_addr) { for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { - u32 edma_offset = dcore * DCORE_OFFSET + - edma_idx * DCORE_EDMA_OFFSET; u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; if (!(prop->edma_enabled_mask & BIT(edma_bit))) @@ -9229,41 +9274,26 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); - WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_LO + edma_offset, - lower_32_bits(val)); - WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_HI + edma_offset, - upper_32_bits(val)); - - WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_LO + edma_offset, - lower_32_bits(cur_addr)); - WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_HI + edma_offset, - upper_32_bits(cur_addr)); - - WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, - lower_32_bits(comp_addr)); - WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, - upper_32_bits(comp_addr)); - WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, - comp_val); - - WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_TSIZE_0 + edma_offset, - chunk_size); - WREG32(mmDCORE0_EDMA0_CORE_CTX_COMMIT + edma_offset, edma_commit); + rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, + (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, + pkt_dma_addr + dma_num * pkt_size, + edma_queues_id[dcore] + edma_idx * 4, + chunk_size, cur_addr, val); + if (rc) + goto end; dma_num++; - cur_addr += chunk_size; - if (cur_addr == end_addr) - goto poll; + break; } } -poll: - rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); - if (rc) { - dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); - goto end; - } + } + + rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); + if (rc) { + dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); + goto end; } end: for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { @@ -9275,10 +9305,17 @@ end: continue; WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); + WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); + gaudi2_qman_set_test_mode(hdev, + edma_queues_id[dcore] + 4 * edma_idx, false); } } WREG32(sob_addr, 0); + hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); + return rc; } From 01907ba5252164ca6bf0de670660cd94d77c378c Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 23 Oct 2022 12:55:21 +0300 Subject: [PATCH 2108/4122] habanalabs: increase the size of busy engines mask Increase the size of the busy engines mask in 'struct hl_info_hw_idle', for future ASICs with more than 128 engines. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++---- include/uapi/misc/habanalabs.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 63d0cb7087e8..f5864893237c 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -416,8 +416,9 @@ static void hpriv_release(struct kref *ref) device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); if (!device_is_idle) { - dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n", - idle_mask[1], idle_mask[0]); + dev_err(hdev->dev, + "device not idle after user context is closed (0x%llx_%llx_%llx_%llx)\n", + idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); reset_device = true; } @@ -1661,8 +1662,8 @@ kill_processes: /* If device is not idle fail the reset process */ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { - dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n", - idle_mask[1], idle_mask[0]); + dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx_%llx_%llx) after reset\n", + idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); rc = -EIO; goto out_err; } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index e50cb71df081..3b995e841eb8 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -916,7 +916,7 @@ struct hl_info_dram_usage { __u64 ctx_dram_mem; }; -#define HL_BUSY_ENGINES_MASK_EXT_SIZE 2 +#define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 struct hl_info_hw_idle { __u32 is_idle; From 5908560a7f14171d1100f4a357deda659dc26868 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 16 Nov 2022 14:41:25 +0100 Subject: [PATCH 2109/4122] habanalabs: added return value check for hl_fw_dynamic_send_clear_cmd() The clang-analyzer reported a warning: "Value stored to 'rc' is never read". The return value check for the first hl_fw_dynamic_send_clear_cmd() call in hl_fw_dynamic_send_protocol_cmd() appears to be missing. Signed-off-by: Marco Pagani Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 01c4ffba6e97..c0909d76d6eb 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1783,6 +1783,8 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, /* first send clear command to clean former commands */ rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader); + if (rc) + return rc; /* send the actual command */ hl_fw_dynamic_send_cmd(hdev, fw_loader, cmd, size); From 6825b5f81f273fcc1ec61e7e203b0ea40d9987fc Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 23 Nov 2022 09:56:39 +0100 Subject: [PATCH 2110/4122] habanalabs/gaudi2: added memset for the cq_size register The clang-analyzer reported a warning: "Value stored to 'cq_size_addr' is never read". The cq_size register of dcore0 is not being zeroed using gaudi2_memset_device_lbw(), along with the other cq_* registers, even though the corresponding cq_size_addr variable is set. Signed-off-by: Marco Pagani Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index c14e63164a84..a33a9072fca4 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9386,6 +9386,7 @@ static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); + gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; From b3ad31f33982497dbc7a66a9d3013b1ac6985dfe Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:28 +0800 Subject: [PATCH 2111/4122] soundwire: intel: start using hw_ops Before introducing new hardware with completely different register spaces and programming sequences, we need to abstract some of the existing routines in hw_ops that will be platform-specific. For now we only use the 'cnl' ops - after the first Intel platform with SoundWire capabilities. Rather than one big intrusive patch, hw_ops are introduced in this patch so show the dependencies between drivers. Follow-up patches will introduce callbacks for debugfs, power and bus management. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 38 +++++++++++++++++++++++------ drivers/soundwire/intel.h | 3 +++ drivers/soundwire/intel_init.c | 1 + include/linux/soundwire/sdw_intel.h | 15 ++++++++++++ sound/soc/sof/intel/hda.c | 2 ++ 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index b9cb7e31ddb3..f88319f8ded4 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -745,10 +745,10 @@ static int intel_free_stream(struct sdw_intel *sdw, * bank switch routines */ -static int intel_pre_bank_switch(struct sdw_bus *bus) +static int intel_pre_bank_switch(struct sdw_intel *sdw) { - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_cdns *cdns = &sdw->cdns; + struct sdw_bus *bus = &cdns->bus; /* Write to register only for multi-link */ if (!bus->multi_link) @@ -759,10 +759,10 @@ static int intel_pre_bank_switch(struct sdw_bus *bus) return 0; } -static int intel_post_bank_switch(struct sdw_bus *bus) +static int intel_post_bank_switch(struct sdw_intel *sdw) { - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_cdns *cdns = &sdw->cdns; + struct sdw_bus *bus = &cdns->bus; void __iomem *shim = sdw->link_res->shim; int sync_reg, ret; @@ -1422,6 +1422,28 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) return 0; } +const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { + .pre_bank_switch = intel_pre_bank_switch, + .post_bank_switch = intel_post_bank_switch, +}; +EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL); + +static int generic_pre_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->pre_bank_switch(sdw); +} + +static int generic_post_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->post_bank_switch(sdw); +} + static int sdw_master_read_intel_prop(struct sdw_bus *bus) { struct sdw_master_prop *prop = &bus->prop; @@ -1477,8 +1499,8 @@ static struct sdw_master_ops sdw_intel_ops = { .xfer_msg_defer = cdns_xfer_msg_defer, .reset_page_addr = cdns_reset_page_addr, .set_bus_conf = cdns_bus_conf, - .pre_bank_switch = intel_pre_bank_switch, - .post_bank_switch = intel_post_bank_switch, + .pre_bank_switch = generic_pre_bank_switch, + .post_bank_switch = generic_post_bank_switch, .read_ping_status = cdns_read_ping_status, }; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index cd93a44dba9a..3170df76b411 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -7,6 +7,7 @@ /** * struct sdw_intel_link_res - Soundwire Intel link resource structure, * typically populated by the controller driver. + * @hw_ops: platform-specific ops * @mmio_base: mmio base of SoundWire registers * @registers: Link IO registers base * @shim: Audio shim pointer @@ -22,6 +23,8 @@ * @list: used to walk-through all masters exposed by the same controller */ struct sdw_intel_link_res { + const struct sdw_intel_hw_ops *hw_ops; + void __iomem *mmio_base; /* not strictly needed, useful for debug */ void __iomem *registers; void __iomem *shim; diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index d091513919df..1e6d74b3e773 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -60,6 +60,7 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res * /* Add link information used in the driver probe */ link = &ldev->link_res; + link->hw_ops = res->hw_ops; link->mmio_base = res->mmio_base; link->registers = res->mmio_base + SDW_LINK_BASE + (SDW_LINK_SIZE * link_id); diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2e9fd91572d4..2dbe34b41ef1 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -233,6 +233,7 @@ struct sdw_intel_ctx { * struct sdw_intel_res - Soundwire Intel global resource structure, * typically populated by the DSP driver * + * @hw_ops: abstraction for platform ops * @count: link count * @mmio_base: mmio base of SoundWire registers * @irq: interrupt number @@ -249,6 +250,7 @@ struct sdw_intel_ctx { * @alh_base: sdw alh base. */ struct sdw_intel_res { + const struct sdw_intel_hw_ops *hw_ops; int count; void __iomem *mmio_base; int irq; @@ -292,4 +294,17 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id); #define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1) +struct sdw_intel; + +/* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. + * @pre_bank_switch: helper for bus management + * @post_bank_switch: helper for bus management + */ +struct sdw_intel_hw_ops { + int (*pre_bank_switch)(struct sdw_intel *sdw); + int (*post_bank_switch)(struct sdw_intel *sdw); +}; + +extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; + #endif diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1188ec51816b..3d6254489056 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -188,6 +188,7 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev) memset(&res, 0, sizeof(res)); + res.hw_ops = &sdw_intel_cnl_hw_ops; res.mmio_base = sdev->bar[HDA_DSP_BAR]; res.shim_base = hdev->desc->sdw_shim_base; res.alh_base = hdev->desc->sdw_alh_base; @@ -1694,3 +1695,4 @@ MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI); MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT); +MODULE_IMPORT_NS(SOUNDWIRE_INTEL); From fb2dc6a0a5f885233d632b1e92be9c0be977b0dc Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:29 +0800 Subject: [PATCH 2112/4122] soundwire: intel: add debugfs callbacks in hw_ops No functionality change, only add indirection for debugfs helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 7 +++++-- drivers/soundwire/intel.h | 16 ++++++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index f88319f8ded4..914f2fb43721 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1423,6 +1423,9 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) } const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { + .debugfs_init = intel_debugfs_init, + .debugfs_exit = intel_debugfs_exit, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) goto err_power_up; } - intel_debugfs_init(sdw); + sdw_intel_debugfs_init(sdw); /* start bus */ ret = intel_start_bus(sdw); @@ -1685,7 +1688,7 @@ static void intel_link_remove(struct auxiliary_device *auxdev) * SDW_INTEL_CLK_STOP_NOT_ALLOWED */ if (!bus->prop.hw_disabled) { - intel_debugfs_exit(sdw); + sdw_intel_debugfs_exit(sdw); sdw_cdns_enable_interrupt(cdns, false); } sdw_bus_master_delete(bus); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 3170df76b411..5548b8451d01 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -61,4 +61,20 @@ struct sdw_intel_link_dev { #define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) +#define SDW_INTEL_CHECK_OPS(sdw, cb) ((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \ + (sdw)->link_res->hw_ops->cb) +#define SDW_INTEL_OPS(sdw, cb) ((sdw)->link_res->hw_ops->cb) + +static inline void sdw_intel_debugfs_init(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, debugfs_init)) + SDW_INTEL_OPS(sdw, debugfs_init)(sdw); +} + +static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, debugfs_exit)) + SDW_INTEL_OPS(sdw, debugfs_exit)(sdw); +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2dbe34b41ef1..211924e4ebf2 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -297,10 +297,15 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id); struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. + * @debugfs_init: initialize all debugfs capabilities + * @debugfs_exit: close and cleanup debugfs capabilities * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ struct sdw_intel_hw_ops { + void (*debugfs_init)(struct sdw_intel *sdw); + void (*debugfs_exit)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; From b6234bcc6589a0719ec91d810114c0b556a5b88b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:30 +0800 Subject: [PATCH 2113/4122] soundwire: intel: add register_dai callback in hw_ops No functionality change, only add indirection for DAI registration helper. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 4 +++- drivers/soundwire/intel.h | 7 +++++++ include/linux/soundwire/sdw_intel.h | 3 +++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 914f2fb43721..0496eb0d6084 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1426,6 +1426,8 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .debugfs_init = intel_debugfs_init, .debugfs_exit = intel_debugfs_exit, + .register_dai = intel_register_dai, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1611,7 +1613,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) goto err_init; /* Register DAIs */ - ret = intel_register_dai(sdw); + ret = sdw_intel_register_dai(sdw); if (ret) { dev_err(dev, "DAI registration failed: %d\n", ret); goto err_power_up; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 5548b8451d01..0521cab311a3 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -77,4 +77,11 @@ static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw) SDW_INTEL_OPS(sdw, debugfs_exit)(sdw); } +static inline int sdw_intel_register_dai(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, register_dai)) + return SDW_INTEL_OPS(sdw, register_dai)(sdw); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 211924e4ebf2..5be63d4fe62e 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -299,6 +299,7 @@ struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities + * @register_dai: read all PDI information and register DAIs * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -306,6 +307,8 @@ struct sdw_intel_hw_ops { void (*debugfs_init)(struct sdw_intel *sdw); void (*debugfs_exit)(struct sdw_intel *sdw); + int (*register_dai)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; From 3db0c5a6a2832c7b4b40676299e4bbbe1a96bc8b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:31 +0800 Subject: [PATCH 2114/4122] soundwire: intel: add bus management callbacks in hw_ops No functionality change, only add indirection for bus management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 24 ++++++++++++-------- drivers/soundwire/intel.h | 34 +++++++++++++++++++++++++++++ include/linux/soundwire/sdw_intel.h | 11 ++++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 0496eb0d6084..6d2fdf3a01fd 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1428,6 +1428,12 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .register_dai = intel_register_dai, + .check_clock_stop = intel_check_clock_stop, + .start_bus = intel_start_bus, + .start_bus_after_reset = intel_start_bus_after_reset, + .start_bus_after_clock_stop = intel_start_bus_after_clock_stop, + .stop_bus = intel_stop_bus, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1622,7 +1628,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) sdw_intel_debugfs_init(sdw); /* start bus */ - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret) { dev_err(dev, "bus start failed: %d\n", ret); goto err_power_up; @@ -1850,7 +1856,7 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - ret = intel_stop_bus(sdw, false); + ret = sdw_intel_stop_bus(sdw, false); if (ret < 0) { dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); return ret; @@ -1876,14 +1882,14 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev) clock_stop_quirks = sdw->link_res->clock_stop_quirks; if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = intel_stop_bus(sdw, false); + ret = sdw_intel_stop_bus(sdw, false); if (ret < 0) { dev_err(dev, "%s: cannot stop bus during teardown: %d\n", __func__, ret); return ret; } } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { - ret = intel_stop_bus(sdw, true); + ret = sdw_intel_stop_bus(sdw, true); if (ret < 0) { dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", __func__, ret); @@ -1941,7 +1947,7 @@ static int __maybe_unused intel_resume(struct device *dev) */ sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "cannot start bus during resume\n"); intel_link_power_down(sdw); @@ -1995,7 +2001,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) */ sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); intel_link_power_down(sdw); @@ -2010,7 +2016,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) return ret; } - ret = intel_start_bus_after_reset(sdw); + ret = sdw_intel_start_bus_after_reset(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); intel_link_power_down(sdw); @@ -2018,7 +2024,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) } } else if (!clock_stop_quirks) { - intel_check_clock_stop(sdw); + sdw_intel_check_clock_stop(sdw); ret = intel_link_power_up(sdw); if (ret) { @@ -2026,7 +2032,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) return ret; } - ret = intel_start_bus_after_clock_stop(sdw); + ret = sdw_intel_start_bus_after_clock_stop(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); intel_link_power_down(sdw); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 0521cab311a3..99a2d875a331 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -84,4 +84,38 @@ static inline int sdw_intel_register_dai(struct sdw_intel *sdw) return -ENOTSUPP; } +static inline void sdw_intel_check_clock_stop(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, check_clock_stop)) + SDW_INTEL_OPS(sdw, check_clock_stop)(sdw); +} + +static inline int sdw_intel_start_bus(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus)) + return SDW_INTEL_OPS(sdw, start_bus)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_start_bus_after_reset(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_reset)) + return SDW_INTEL_OPS(sdw, start_bus_after_reset)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_start_bus_after_clock_stop(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_clock_stop)) + return SDW_INTEL_OPS(sdw, start_bus_after_clock_stop)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) +{ + if (SDW_INTEL_CHECK_OPS(sdw, stop_bus)) + return SDW_INTEL_OPS(sdw, stop_bus)(sdw, clock_stop); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 5be63d4fe62e..cee61bc9af8a 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -300,6 +300,11 @@ struct sdw_intel; * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities * @register_dai: read all PDI information and register DAIs + * @check_clock_stop: throw error message if clock is not stopped. + * @start_bus: normal start + * @start_bus_after_reset: start after reset + * @start_bus_after_clock_stop: start after mode0 clock stop + * @stop_bus: stop all bus * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -309,6 +314,12 @@ struct sdw_intel_hw_ops { int (*register_dai)(struct sdw_intel *sdw); + void (*check_clock_stop)(struct sdw_intel *sdw); + int (*start_bus)(struct sdw_intel *sdw); + int (*start_bus_after_reset)(struct sdw_intel *sdw); + int (*start_bus_after_clock_stop)(struct sdw_intel *sdw); + int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; From 49c9ff45991a5a62e040c8b43c89a9ab38a0a91f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:32 +0800 Subject: [PATCH 2115/4122] soundwire: intel: add link power management callbacks in hw_ops No functionality change, only add indirection for link power management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 23 +++++++++++++---------- drivers/soundwire/intel.h | 14 ++++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 6d2fdf3a01fd..2320f1b8a2d1 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1434,6 +1434,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .start_bus_after_clock_stop = intel_start_bus_after_clock_stop, .stop_bus = intel_stop_bus, + .link_power_up = intel_link_power_up, + .link_power_down = intel_link_power_down, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) bus->multi_link = multi_link; /* Initialize shim, controller */ - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) goto err_init; @@ -1679,7 +1682,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) return 0; err_power_up: - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); err_init: return ret; } @@ -1935,7 +1938,7 @@ static int __maybe_unused intel_resume(struct device *dev) pm_runtime_idle(dev); } - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); return ret; @@ -1950,7 +1953,7 @@ static int __maybe_unused intel_resume(struct device *dev) ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "cannot start bus during resume\n"); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } @@ -1989,7 +1992,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) clock_stop_quirks = sdw->link_res->clock_stop_quirks; if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); return ret; @@ -2004,13 +2007,13 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); return ret; @@ -2019,14 +2022,14 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus_after_reset(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else if (!clock_stop_quirks) { sdw_intel_check_clock_stop(sdw); - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); return ret; @@ -2035,7 +2038,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus_after_clock_stop(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else { diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 99a2d875a331..0f63e7584132 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -118,4 +118,18 @@ static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) return -ENOTSUPP; } +static inline int sdw_intel_link_power_up(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, link_power_up)) + return SDW_INTEL_OPS(sdw, link_power_up)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_link_power_down(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, link_power_down)) + return SDW_INTEL_OPS(sdw, link_power_down)(sdw); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index cee61bc9af8a..81430201b8b9 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -305,6 +305,8 @@ struct sdw_intel; * @start_bus_after_reset: start after reset * @start_bus_after_clock_stop: start after mode0 clock stop * @stop_bus: stop all bus + * @link_power_up: power-up using chip-specific helpers + * @link_power_down: power-down with chip-specific helpers * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -320,6 +322,9 @@ struct sdw_intel_hw_ops { int (*start_bus_after_clock_stop)(struct sdw_intel *sdw); int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop); + int (*link_power_up)(struct sdw_intel *sdw); + int (*link_power_down)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; From 36e3b385f35a33a10b792ec46350dd87d79e84dd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:33 +0800 Subject: [PATCH 2116/4122] soundwire: intel: add in-band wake callbacks in hw_ops No functionality change, only add indirection for in-band wake management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-7-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 11 +++++++---- drivers/soundwire/intel.h | 13 +++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 2320f1b8a2d1..ea6479b4010d 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1437,6 +1437,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .link_power_up = intel_link_power_up, .link_power_down = intel_link_power_down, + .shim_check_wake = intel_shim_check_wake, + .shim_wake = intel_shim_wake, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1720,11 +1723,11 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) return 0; } - if (!intel_shim_check_wake(sdw)) + if (!sdw_intel_shim_check_wake(sdw)) return 0; /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); /* * resume the Master, which will generate a bus reset and result in @@ -1852,7 +1855,7 @@ static int __maybe_unused intel_suspend(struct device *dev) */ dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); } else { - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); } } @@ -1987,7 +1990,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) } /* unconditionally disable WAKEEN interrupt */ - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); clock_stop_quirks = sdw->link_res->clock_stop_quirks; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 0f63e7584132..9ac3397757a0 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -132,4 +132,17 @@ static inline int sdw_intel_link_power_down(struct sdw_intel *sdw) return -ENOTSUPP; } +static inline int sdw_intel_shim_check_wake(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, shim_check_wake)) + return SDW_INTEL_OPS(sdw, shim_check_wake)(sdw); + return -ENOTSUPP; +} + +static inline void sdw_intel_shim_wake(struct sdw_intel *sdw, bool wake_enable) +{ + if (SDW_INTEL_CHECK_OPS(sdw, shim_wake)) + SDW_INTEL_OPS(sdw, shim_wake)(sdw, wake_enable); +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 81430201b8b9..0942cd464095 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -307,6 +307,8 @@ struct sdw_intel; * @stop_bus: stop all bus * @link_power_up: power-up using chip-specific helpers * @link_power_down: power-down with chip-specific helpers + * @shim_check_wake: check if a wake was received + * @shim_wake: enable/disable in-band wake management * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -325,6 +327,9 @@ struct sdw_intel_hw_ops { int (*link_power_up)(struct sdw_intel *sdw); int (*link_power_down)(struct sdw_intel *sdw); + int (*shim_check_wake)(struct sdw_intel *sdw); + void (*shim_wake)(struct sdw_intel *sdw, bool wake_enable); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; From 7cbf00bd4142cd88ac7ecbc4ea7b917a220cb721 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:34 +0800 Subject: [PATCH 2117/4122] soundwire: intel: split auxdevice to different file The auxdevice layer is completely generic, it should be split from intel.c which is only geared to the 'cnl' hw_ops now. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-8-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/Makefile | 2 +- drivers/soundwire/intel.c | 657 --------------------------- drivers/soundwire/intel.h | 11 +- drivers/soundwire/intel_auxdevice.c | 678 ++++++++++++++++++++++++++++ drivers/soundwire/intel_auxdevice.h | 18 + drivers/soundwire/intel_init.c | 1 + 6 files changed, 700 insertions(+), 667 deletions(-) create mode 100644 drivers/soundwire/intel_auxdevice.c create mode 100644 drivers/soundwire/intel_auxdevice.h diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile index 986776787b9e..ca97414ada70 100644 --- a/drivers/soundwire/Makefile +++ b/drivers/soundwire/Makefile @@ -20,7 +20,7 @@ soundwire-cadence-y := cadence_master.o obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o #Intel driver -soundwire-intel-y := intel.o intel_init.o dmi-quirks.o +soundwire-intel-y := intel.o intel_auxdevice.o intel_init.o dmi-quirks.o obj-$(CONFIG_SOUNDWIRE_INTEL) += soundwire-intel.o #Qualcomm driver diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index ea6479b4010d..bc9c50bacc49 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -8,10 +8,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -22,27 +19,6 @@ #include "bus.h" #include "intel.h" -/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */ -#define INTEL_DEV_NUM_IDA_MIN 4 - -#define INTEL_MASTER_SUSPEND_DELAY_MS 3000 -#define INTEL_MASTER_RESET_ITERATIONS 10 - -/* - * debug/config flags for the Intel SoundWire Master. - * - * Since we may have multiple masters active, we can have up to 8 - * flags reused in each byte, with master0 using the ls-byte, etc. - */ - -#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME BIT(0) -#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP BIT(1) -#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE BIT(2) -#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK BIT(3) - -static int md_flags; -module_param_named(sdw_md_flags, md_flags, int, 0444); -MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)"); enum intel_pdi_type { INTEL_PDI_IN = 0, @@ -1445,636 +1421,3 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { }; EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL); -static int generic_pre_bank_switch(struct sdw_bus *bus) -{ - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); - - return sdw->link_res->hw_ops->pre_bank_switch(sdw); -} - -static int generic_post_bank_switch(struct sdw_bus *bus) -{ - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); - - return sdw->link_res->hw_ops->post_bank_switch(sdw); -} - -static int sdw_master_read_intel_prop(struct sdw_bus *bus) -{ - struct sdw_master_prop *prop = &bus->prop; - struct fwnode_handle *link; - char name[32]; - u32 quirk_mask; - - /* Find master handle */ - snprintf(name, sizeof(name), - "mipi-sdw-link-%d-subproperties", bus->link_id); - - link = device_get_named_child_node(bus->dev, name); - if (!link) { - dev_err(bus->dev, "Master node %s not found\n", name); - return -EIO; - } - - fwnode_property_read_u32(link, - "intel-sdw-ip-clock", - &prop->mclk_freq); - - /* the values reported by BIOS are the 2x clock, not the bus clock */ - prop->mclk_freq /= 2; - - fwnode_property_read_u32(link, - "intel-quirk-mask", - &quirk_mask); - - if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) - prop->hw_disabled = true; - - prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH | - SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; - - return 0; -} - -static int intel_prop_read(struct sdw_bus *bus) -{ - /* Initialize with default handler to read all DisCo properties */ - sdw_master_read_prop(bus); - - /* read Intel-specific properties */ - sdw_master_read_intel_prop(bus); - - return 0; -} - -static struct sdw_master_ops sdw_intel_ops = { - .read_prop = intel_prop_read, - .override_adr = sdw_dmi_override_adr, - .xfer_msg = cdns_xfer_msg, - .xfer_msg_defer = cdns_xfer_msg_defer, - .reset_page_addr = cdns_reset_page_addr, - .set_bus_conf = cdns_bus_conf, - .pre_bank_switch = generic_pre_bank_switch, - .post_bank_switch = generic_post_bank_switch, - .read_ping_status = cdns_read_ping_status, -}; - -/* - * probe and init (aux_dev_id argument is required by function prototype but not used) - */ -static int intel_link_probe(struct auxiliary_device *auxdev, - const struct auxiliary_device_id *aux_dev_id) - -{ - struct device *dev = &auxdev->dev; - struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev); - struct sdw_intel *sdw; - struct sdw_cdns *cdns; - struct sdw_bus *bus; - int ret; - - sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL); - if (!sdw) - return -ENOMEM; - - cdns = &sdw->cdns; - bus = &cdns->bus; - - sdw->instance = auxdev->id; - sdw->link_res = &ldev->link_res; - cdns->dev = dev; - cdns->registers = sdw->link_res->registers; - cdns->instance = sdw->instance; - cdns->msg_count = 0; - - bus->link_id = auxdev->id; - bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN; - bus->clk_stop_timeout = 1; - - sdw_cdns_probe(cdns); - - /* Set ops */ - bus->ops = &sdw_intel_ops; - - /* set driver data, accessed by snd_soc_dai_get_drvdata() */ - auxiliary_set_drvdata(auxdev, cdns); - - /* use generic bandwidth allocation algorithm */ - sdw->cdns.bus.compute_params = sdw_compute_params; - - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - - ret = sdw_bus_master_add(bus, dev, dev->fwnode); - if (ret) { - dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); - return ret; - } - - if (bus->prop.hw_disabled) - dev_info(dev, - "SoundWire master %d is disabled, will be ignored\n", - bus->link_id); - /* - * Ignore BIOS err_threshold, it's a really bad idea when dealing - * with multiple hardware synchronized links - */ - bus->prop.err_threshold = 0; - - return 0; -} - -int intel_link_startup(struct auxiliary_device *auxdev) -{ - struct device *dev = &auxdev->dev; - struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - int link_flags; - bool multi_link; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled) { - dev_info(dev, - "SoundWire master %d is disabled, ignoring\n", - sdw->instance); - return 0; - } - - link_flags = md_flags >> (bus->link_id * 8); - multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK); - if (!multi_link) { - dev_dbg(dev, "Multi-link is disabled\n"); - } else { - /* - * hardware-based synchronization is required regardless - * of the number of segments used by a stream: SSP-based - * synchronization is gated by gsync when the multi-master - * mode is set. - */ - bus->hw_sync_min_links = 1; - } - bus->multi_link = multi_link; - - /* Initialize shim, controller */ - ret = sdw_intel_link_power_up(sdw); - if (ret) - goto err_init; - - /* Register DAIs */ - ret = sdw_intel_register_dai(sdw); - if (ret) { - dev_err(dev, "DAI registration failed: %d\n", ret); - goto err_power_up; - } - - sdw_intel_debugfs_init(sdw); - - /* start bus */ - ret = sdw_intel_start_bus(sdw); - if (ret) { - dev_err(dev, "bus start failed: %d\n", ret); - goto err_power_up; - } - - /* Enable runtime PM */ - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) { - pm_runtime_set_autosuspend_delay(dev, - INTEL_MASTER_SUSPEND_DELAY_MS); - pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); - - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) { - /* - * To keep the clock running we need to prevent - * pm_runtime suspend from happening by increasing the - * reference count. - * This quirk is specified by the parent PCI device in - * case of specific latency requirements. It will have - * no effect if pm_runtime is disabled by the user via - * a module parameter for testing purposes. - */ - pm_runtime_get_noresume(dev); - } - - /* - * The runtime PM status of Slave devices is "Unsupported" - * until they report as ATTACHED. If they don't, e.g. because - * there are no Slave devices populated or if the power-on is - * delayed or dependent on a power switch, the Master will - * remain active and prevent its parent from suspending. - * - * Conditionally force the pm_runtime core to re-evaluate the - * Master status in the absence of any Slave activity. A quirk - * is provided to e.g. deal with Slaves that may be powered on - * with a delay. A more complete solution would require the - * definition of Master properties. - */ - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - - sdw->startup_done = true; - return 0; - -err_power_up: - sdw_intel_link_power_down(sdw); -err_init: - return ret; -} - -static void intel_link_remove(struct auxiliary_device *auxdev) -{ - struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - - /* - * Since pm_runtime is already disabled, we don't decrease - * the refcount when the clock_stop_quirk is - * SDW_INTEL_CLK_STOP_NOT_ALLOWED - */ - if (!bus->prop.hw_disabled) { - sdw_intel_debugfs_exit(sdw); - sdw_cdns_enable_interrupt(cdns, false); - } - sdw_bus_master_delete(bus); -} - -int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) -{ - struct device *dev = &auxdev->dev; - struct sdw_intel *sdw; - struct sdw_bus *bus; - - sdw = auxiliary_get_drvdata(auxdev); - bus = &sdw->cdns.bus; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - if (!sdw_intel_shim_check_wake(sdw)) - return 0; - - /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ - sdw_intel_shim_wake(sdw, false); - - /* - * resume the Master, which will generate a bus reset and result in - * Slaves re-attaching and be re-enumerated. The SoundWire physical - * device which generated the wake will trigger an interrupt, which - * will in turn cause the corresponding Linux Slave device to be - * resumed and the Slave codec driver to check the status. - */ - pm_request_resume(dev); - - return 0; -} - -/* - * PM calls - */ - -static int intel_resume_child_device(struct device *dev, void *data) -{ - int ret; - struct sdw_slave *slave = dev_to_sdw_dev(dev); - - if (!slave->probed) { - dev_dbg(dev, "skipping device, no probed driver\n"); - return 0; - } - if (!slave->dev_num_sticky) { - dev_dbg(dev, "skipping device, never detected on bus\n"); - return 0; - } - - ret = pm_request_resume(dev); - if (ret < 0) - dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); - - return ret; -} - -static int __maybe_unused intel_pm_prepare(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (pm_runtime_suspended(dev) && - pm_runtime_suspended(dev->parent) && - ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || - !clock_stop_quirks)) { - /* - * if we've enabled clock stop, and the parent is suspended, the SHIM registers - * are not accessible and the shim wake cannot be disabled. - * The only solution is to resume the entire bus to full power - */ - - /* - * If any operation in this block fails, we keep going since we don't want - * to prevent system suspend from happening and errors should be recoverable - * on resume. - */ - - /* - * first resume the device for this link. This will also by construction - * resume the PCI parent device. - */ - ret = pm_request_resume(dev); - if (ret < 0) { - dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); - return 0; - } - - /* - * Continue resuming the entire bus (parent + child devices) to exit - * the clock stop mode. If there are no devices connected on this link - * this is a no-op. - * The resume to full power could have been implemented with a .prepare - * step in SoundWire codec drivers. This would however require a lot - * of code to handle an Intel-specific corner case. It is simpler in - * practice to add a loop at the link level. - */ - ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device); - - if (ret < 0) - dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret); - } - - return 0; -} - -static int __maybe_unused intel_suspend(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status: suspended\n"); - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || - !clock_stop_quirks) { - - if (pm_runtime_suspended(dev->parent)) { - /* - * paranoia check: this should not happen with the .prepare - * resume to full power - */ - dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); - } else { - sdw_intel_shim_wake(sdw, false); - } - } - - return 0; - } - - ret = sdw_intel_stop_bus(sdw, false); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); - return ret; - } - - return 0; -} - -static int __maybe_unused intel_suspend_runtime(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = sdw_intel_stop_bus(sdw, false); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus during teardown: %d\n", - __func__, ret); - return ret; - } - } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { - ret = sdw_intel_stop_bus(sdw, true); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", - __func__, ret); - return ret; - } - } else { - dev_err(dev, "%s clock_stop_quirks %x unsupported\n", - __func__, clock_stop_quirks); - ret = -EINVAL; - } - - return ret; -} - -static int __maybe_unused intel_resume(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - int link_flags; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - link_flags = md_flags >> (bus->link_id * 8); - - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s failed: %d\n", __func__, ret); - return ret; - } - - /* - * make sure all Slaves are tagged as UNATTACHED and provide - * reason for reinitialization - */ - sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - - ret = sdw_intel_start_bus(sdw); - if (ret < 0) { - dev_err(dev, "cannot start bus during resume\n"); - sdw_intel_link_power_down(sdw); - return ret; - } - - /* - * after system resume, the pm_runtime suspend() may kick in - * during the enumeration, before any children device force the - * master device to remain active. Using pm_runtime_get() - * routines is not really possible, since it'd prevent the - * master from suspending. - * A reasonable compromise is to update the pm_runtime - * counters and delay the pm_runtime suspend by several - * seconds, by when all enumeration should be complete. - */ - pm_runtime_mark_last_busy(dev); - - return 0; -} - -static int __maybe_unused intel_resume_runtime(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - /* unconditionally disable WAKEEN interrupt */ - sdw_intel_shim_wake(sdw, false); - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); - return ret; - } - - /* - * make sure all Slaves are tagged as UNATTACHED and provide - * reason for reinitialization - */ - sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - - ret = sdw_intel_start_bus(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - - - } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); - return ret; - } - - ret = sdw_intel_start_bus_after_reset(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - } else if (!clock_stop_quirks) { - - sdw_intel_check_clock_stop(sdw); - - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); - return ret; - } - - ret = sdw_intel_start_bus_after_clock_stop(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - } else { - dev_err(dev, "%s: clock_stop_quirks %x unsupported\n", - __func__, clock_stop_quirks); - ret = -EINVAL; - } - - return ret; -} - -static const struct dev_pm_ops intel_pm = { - .prepare = intel_pm_prepare, - SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume) - SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL) -}; - -static const struct auxiliary_device_id intel_link_id_table[] = { - { .name = "soundwire_intel.link" }, - {}, -}; -MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table); - -static struct auxiliary_driver sdw_intel_drv = { - .probe = intel_link_probe, - .remove = intel_link_remove, - .driver = { - /* auxiliary_driver_register() sets .name to be the modname */ - .pm = &intel_pm, - }, - .id_table = intel_link_id_table -}; -module_auxiliary_driver(sdw_intel_drv); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Intel Soundwire Link Driver"); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 9ac3397757a0..de9883313c8f 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -50,16 +50,9 @@ struct sdw_intel { #endif }; -int intel_link_startup(struct auxiliary_device *auxdev); -int intel_link_process_wakeen_event(struct auxiliary_device *auxdev); +#define cdns_to_intel(_cdns) container_of(_cdns, struct sdw_intel, cdns) -struct sdw_intel_link_dev { - struct auxiliary_device auxdev; - struct sdw_intel_link_res link_res; -}; - -#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ - container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) +#define INTEL_MASTER_RESET_ITERATIONS 10 #define SDW_INTEL_CHECK_OPS(sdw, cb) ((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \ (sdw)->link_res->hw_ops->cb) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c new file mode 100644 index 000000000000..96c6b2112feb --- /dev/null +++ b/drivers/soundwire/intel_auxdevice.c @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2015-22 Intel Corporation. + +/* + * Soundwire Intel Manager Driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cadence_master.h" +#include "bus.h" +#include "intel.h" +#include "intel_auxdevice.h" + +/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */ +#define INTEL_DEV_NUM_IDA_MIN 4 + +#define INTEL_MASTER_SUSPEND_DELAY_MS 3000 + +/* + * debug/config flags for the Intel SoundWire Master. + * + * Since we may have multiple masters active, we can have up to 8 + * flags reused in each byte, with master0 using the ls-byte, etc. + */ + +#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME BIT(0) +#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP BIT(1) +#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE BIT(2) +#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK BIT(3) + +static int md_flags; +module_param_named(sdw_md_flags, md_flags, int, 0444); +MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)"); + +static int generic_pre_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->pre_bank_switch(sdw); +} + +static int generic_post_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->post_bank_switch(sdw); +} + +static int sdw_master_read_intel_prop(struct sdw_bus *bus) +{ + struct sdw_master_prop *prop = &bus->prop; + struct fwnode_handle *link; + char name[32]; + u32 quirk_mask; + + /* Find master handle */ + snprintf(name, sizeof(name), + "mipi-sdw-link-%d-subproperties", bus->link_id); + + link = device_get_named_child_node(bus->dev, name); + if (!link) { + dev_err(bus->dev, "Master node %s not found\n", name); + return -EIO; + } + + fwnode_property_read_u32(link, + "intel-sdw-ip-clock", + &prop->mclk_freq); + + /* the values reported by BIOS are the 2x clock, not the bus clock */ + prop->mclk_freq /= 2; + + fwnode_property_read_u32(link, + "intel-quirk-mask", + &quirk_mask); + + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) + prop->hw_disabled = true; + + prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH | + SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; + + return 0; +} + +static int intel_prop_read(struct sdw_bus *bus) +{ + /* Initialize with default handler to read all DisCo properties */ + sdw_master_read_prop(bus); + + /* read Intel-specific properties */ + sdw_master_read_intel_prop(bus); + + return 0; +} + +static struct sdw_master_ops sdw_intel_ops = { + .read_prop = intel_prop_read, + .override_adr = sdw_dmi_override_adr, + .xfer_msg = cdns_xfer_msg, + .xfer_msg_defer = cdns_xfer_msg_defer, + .reset_page_addr = cdns_reset_page_addr, + .set_bus_conf = cdns_bus_conf, + .pre_bank_switch = generic_pre_bank_switch, + .post_bank_switch = generic_post_bank_switch, + .read_ping_status = cdns_read_ping_status, +}; + +/* + * probe and init (aux_dev_id argument is required by function prototype but not used) + */ +static int intel_link_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *aux_dev_id) + +{ + struct device *dev = &auxdev->dev; + struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev); + struct sdw_intel *sdw; + struct sdw_cdns *cdns; + struct sdw_bus *bus; + int ret; + + sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL); + if (!sdw) + return -ENOMEM; + + cdns = &sdw->cdns; + bus = &cdns->bus; + + sdw->instance = auxdev->id; + sdw->link_res = &ldev->link_res; + cdns->dev = dev; + cdns->registers = sdw->link_res->registers; + cdns->instance = sdw->instance; + cdns->msg_count = 0; + + bus->link_id = auxdev->id; + bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN; + bus->clk_stop_timeout = 1; + + sdw_cdns_probe(cdns); + + /* Set ops */ + bus->ops = &sdw_intel_ops; + + /* set driver data, accessed by snd_soc_dai_get_drvdata() */ + auxiliary_set_drvdata(auxdev, cdns); + + /* use generic bandwidth allocation algorithm */ + sdw->cdns.bus.compute_params = sdw_compute_params; + + /* avoid resuming from pm_runtime suspend if it's not required */ + dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + + ret = sdw_bus_master_add(bus, dev, dev->fwnode); + if (ret) { + dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); + return ret; + } + + if (bus->prop.hw_disabled) + dev_info(dev, + "SoundWire master %d is disabled, will be ignored\n", + bus->link_id); + /* + * Ignore BIOS err_threshold, it's a really bad idea when dealing + * with multiple hardware synchronized links + */ + bus->prop.err_threshold = 0; + + return 0; +} + +int intel_link_startup(struct auxiliary_device *auxdev) +{ + struct device *dev = &auxdev->dev; + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + int link_flags; + bool multi_link; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled) { + dev_info(dev, + "SoundWire master %d is disabled, ignoring\n", + sdw->instance); + return 0; + } + + link_flags = md_flags >> (bus->link_id * 8); + multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK); + if (!multi_link) { + dev_dbg(dev, "Multi-link is disabled\n"); + } else { + /* + * hardware-based synchronization is required regardless + * of the number of segments used by a stream: SSP-based + * synchronization is gated by gsync when the multi-master + * mode is set. + */ + bus->hw_sync_min_links = 1; + } + bus->multi_link = multi_link; + + /* Initialize shim, controller */ + ret = sdw_intel_link_power_up(sdw); + if (ret) + goto err_init; + + /* Register DAIs */ + ret = sdw_intel_register_dai(sdw); + if (ret) { + dev_err(dev, "DAI registration failed: %d\n", ret); + goto err_power_up; + } + + sdw_intel_debugfs_init(sdw); + + /* start bus */ + ret = sdw_intel_start_bus(sdw); + if (ret) { + dev_err(dev, "bus start failed: %d\n", ret); + goto err_power_up; + } + + /* Enable runtime PM */ + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) { + pm_runtime_set_autosuspend_delay(dev, + INTEL_MASTER_SUSPEND_DELAY_MS); + pm_runtime_use_autosuspend(dev); + pm_runtime_mark_last_busy(dev); + + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) { + /* + * To keep the clock running we need to prevent + * pm_runtime suspend from happening by increasing the + * reference count. + * This quirk is specified by the parent PCI device in + * case of specific latency requirements. It will have + * no effect if pm_runtime is disabled by the user via + * a module parameter for testing purposes. + */ + pm_runtime_get_noresume(dev); + } + + /* + * The runtime PM status of Slave devices is "Unsupported" + * until they report as ATTACHED. If they don't, e.g. because + * there are no Slave devices populated or if the power-on is + * delayed or dependent on a power switch, the Master will + * remain active and prevent its parent from suspending. + * + * Conditionally force the pm_runtime core to re-evaluate the + * Master status in the absence of any Slave activity. A quirk + * is provided to e.g. deal with Slaves that may be powered on + * with a delay. A more complete solution would require the + * definition of Master properties. + */ + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) + pm_runtime_idle(dev); + + sdw->startup_done = true; + return 0; + +err_power_up: + sdw_intel_link_power_down(sdw); +err_init: + return ret; +} + +static void intel_link_remove(struct auxiliary_device *auxdev) +{ + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + + /* + * Since pm_runtime is already disabled, we don't decrease + * the refcount when the clock_stop_quirk is + * SDW_INTEL_CLK_STOP_NOT_ALLOWED + */ + if (!bus->prop.hw_disabled) { + sdw_intel_debugfs_exit(sdw); + sdw_cdns_enable_interrupt(cdns, false); + } + sdw_bus_master_delete(bus); +} + +int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) +{ + struct device *dev = &auxdev->dev; + struct sdw_intel *sdw; + struct sdw_bus *bus; + + sdw = auxiliary_get_drvdata(auxdev); + bus = &sdw->cdns.bus; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + if (!sdw_intel_shim_check_wake(sdw)) + return 0; + + /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ + sdw_intel_shim_wake(sdw, false); + + /* + * resume the Master, which will generate a bus reset and result in + * Slaves re-attaching and be re-enumerated. The SoundWire physical + * device which generated the wake will trigger an interrupt, which + * will in turn cause the corresponding Linux Slave device to be + * resumed and the Slave codec driver to check the status. + */ + pm_request_resume(dev); + + return 0; +} + +/* + * PM calls + */ + +static int intel_resume_child_device(struct device *dev, void *data) +{ + int ret; + struct sdw_slave *slave = dev_to_sdw_dev(dev); + + if (!slave->probed) { + dev_dbg(dev, "skipping device, no probed driver\n"); + return 0; + } + if (!slave->dev_num_sticky) { + dev_dbg(dev, "skipping device, never detected on bus\n"); + return 0; + } + + ret = pm_request_resume(dev); + if (ret < 0) + dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); + + return ret; +} + +static int __maybe_unused intel_pm_prepare(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (pm_runtime_suspended(dev) && + pm_runtime_suspended(dev->parent) && + ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || + !clock_stop_quirks)) { + /* + * if we've enabled clock stop, and the parent is suspended, the SHIM registers + * are not accessible and the shim wake cannot be disabled. + * The only solution is to resume the entire bus to full power + */ + + /* + * If any operation in this block fails, we keep going since we don't want + * to prevent system suspend from happening and errors should be recoverable + * on resume. + */ + + /* + * first resume the device for this link. This will also by construction + * resume the PCI parent device. + */ + ret = pm_request_resume(dev); + if (ret < 0) { + dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); + return 0; + } + + /* + * Continue resuming the entire bus (parent + child devices) to exit + * the clock stop mode. If there are no devices connected on this link + * this is a no-op. + * The resume to full power could have been implemented with a .prepare + * step in SoundWire codec drivers. This would however require a lot + * of code to handle an Intel-specific corner case. It is simpler in + * practice to add a loop at the link level. + */ + ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device); + + if (ret < 0) + dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret); + } + + return 0; +} + +static int __maybe_unused intel_suspend(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + if (pm_runtime_suspended(dev)) { + dev_dbg(dev, "pm_runtime status: suspended\n"); + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || + !clock_stop_quirks) { + + if (pm_runtime_suspended(dev->parent)) { + /* + * paranoia check: this should not happen with the .prepare + * resume to full power + */ + dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); + } else { + sdw_intel_shim_wake(sdw, false); + } + } + + return 0; + } + + ret = sdw_intel_stop_bus(sdw, false); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); + return ret; + } + + return 0; +} + +static int __maybe_unused intel_suspend_runtime(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { + ret = sdw_intel_stop_bus(sdw, false); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus during teardown: %d\n", + __func__, ret); + return ret; + } + } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { + ret = sdw_intel_stop_bus(sdw, true); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", + __func__, ret); + return ret; + } + } else { + dev_err(dev, "%s clock_stop_quirks %x unsupported\n", + __func__, clock_stop_quirks); + ret = -EINVAL; + } + + return ret; +} + +static int __maybe_unused intel_resume(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + int link_flags; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + link_flags = md_flags >> (bus->link_id * 8); + + if (pm_runtime_suspended(dev)) { + dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); + + /* follow required sequence from runtime_pm.rst */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_enable(dev); + + link_flags = md_flags >> (bus->link_id * 8); + + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) + pm_runtime_idle(dev); + } + + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + /* + * make sure all Slaves are tagged as UNATTACHED and provide + * reason for reinitialization + */ + sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); + + ret = sdw_intel_start_bus(sdw); + if (ret < 0) { + dev_err(dev, "cannot start bus during resume\n"); + sdw_intel_link_power_down(sdw); + return ret; + } + + /* + * after system resume, the pm_runtime suspend() may kick in + * during the enumeration, before any children device force the + * master device to remain active. Using pm_runtime_get() + * routines is not really possible, since it'd prevent the + * master from suspending. + * A reasonable compromise is to update the pm_runtime + * counters and delay the pm_runtime suspend by several + * seconds, by when all enumeration should be complete. + */ + pm_runtime_mark_last_busy(dev); + + return 0; +} + +static int __maybe_unused intel_resume_runtime(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + /* unconditionally disable WAKEEN interrupt */ + sdw_intel_shim_wake(sdw, false); + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); + return ret; + } + + /* + * make sure all Slaves are tagged as UNATTACHED and provide + * reason for reinitialization + */ + sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); + + ret = sdw_intel_start_bus(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + + } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); + return ret; + } + + ret = sdw_intel_start_bus_after_reset(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + } else if (!clock_stop_quirks) { + + sdw_intel_check_clock_stop(sdw); + + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); + return ret; + } + + ret = sdw_intel_start_bus_after_clock_stop(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + } else { + dev_err(dev, "%s: clock_stop_quirks %x unsupported\n", + __func__, clock_stop_quirks); + ret = -EINVAL; + } + + return ret; +} + +static const struct dev_pm_ops intel_pm = { + .prepare = intel_pm_prepare, + SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume) + SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL) +}; + +static const struct auxiliary_device_id intel_link_id_table[] = { + { .name = "soundwire_intel.link" }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table); + +static struct auxiliary_driver sdw_intel_drv = { + .probe = intel_link_probe, + .remove = intel_link_remove, + .driver = { + /* auxiliary_driver_register() sets .name to be the modname */ + .pm = &intel_pm, + }, + .id_table = intel_link_id_table +}; +module_auxiliary_driver(sdw_intel_drv); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Intel Soundwire Link Driver"); diff --git a/drivers/soundwire/intel_auxdevice.h b/drivers/soundwire/intel_auxdevice.h new file mode 100644 index 000000000000..a00ecde95563 --- /dev/null +++ b/drivers/soundwire/intel_auxdevice.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright(c) 2015-2022 Intel Corporation. */ + +#ifndef __SDW_INTEL_AUXDEVICE_H +#define __SDW_INTEL_AUXDEVICE_H + +int intel_link_startup(struct auxiliary_device *auxdev); +int intel_link_process_wakeen_event(struct auxiliary_device *auxdev); + +struct sdw_intel_link_dev { + struct auxiliary_device auxdev; + struct sdw_intel_link_res link_res; +}; + +#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ + container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) + +#endif /* __SDW_INTEL_AUXDEVICE_H */ diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index 1e6d74b3e773..b3a8db7c3b77 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -17,6 +17,7 @@ #include #include "cadence_master.h" #include "intel.h" +#include "intel_auxdevice.h" static void intel_link_dev_release(struct device *dev) { From 56fb517775f4d71dbca2b1fb3562276138361072 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 18 Nov 2022 15:08:33 +0200 Subject: [PATCH 2118/4122] habanalabs: fix rc when new CPUCP opcodes are not supported When the new CPUCP opcodes are not supported and a CPUCP packet fails, the return value is the F/W error resposone which is a positive value. If this packet is sent from IOCTL and the positive value is used, the ICOTL will not be considered as unsuccessful. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index c0909d76d6eb..cf8147e43833 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -324,6 +324,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, if (!prop->supports_advanced_cpucp_rc) { dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode); + rc = -EIO; goto scrub_descriptor; } From 0abcae8b48850e0f488d0eb7232323d93bdc4b13 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 30 Oct 2022 16:49:42 +0200 Subject: [PATCH 2119/4122] habanalabs: add RMWREG32_SHIFTED to set a val within a mask This is similar to RMWREG32, but the given 'val' is already shifted according to the mask. This allows several 'ORed' vals and masks to be set at once The patch also fixes wrong usage of RMWREG32 by replacing it with RMWREG32_SHIFTED Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 10 +++------- drivers/misc/habanalabs/gaudi2/gaudi2.c | 6 +++--- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e7f89868428d..0329a0980bb7 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2498,13 +2498,9 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); #define WREG32_AND(reg, and) WREG32_P(reg, 0, and) #define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) -#define RMWREG32(reg, val, mask) \ - do { \ - u32 tmp_ = RREG32(reg); \ - tmp_ &= ~(mask); \ - tmp_ |= ((val) << __ffs(mask)); \ - WREG32(reg, tmp_); \ - } while (0) +#define RMWREG32_SHIFTED(reg, val, mask) WREG32_P(reg, val, ~(mask)) + +#define RMWREG32(reg, val, mask) RMWREG32_SHIFTED(reg, (val) << __ffs(mask), mask) #define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask)) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index a33a9072fca4..e793fb2bdcbe 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -5052,7 +5052,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) mmu_base = mmPMMU_HBW_MMU_BASE; stlb_base = mmPMMU_HBW_STLB_BASE; - RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, + RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | @@ -5068,7 +5068,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) if (PAGE_SIZE == SZ_64K) { /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ - RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, + RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | FIELD_PREP( @@ -5116,7 +5116,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); - RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, + RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | From 408c46bd6eb7a4e2fb9fd686218e4a13b9de844c Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 14 Nov 2022 13:26:21 +0200 Subject: [PATCH 2120/4122] habanalabs: print context refcount value if hard reset fails Failing to kill a user process during a hard reset can be due to a reference to the user context which isn't released. To make it easier to understand if this the reason for the failure and not something else, add a print of the context refcount value. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index f5864893237c..926f230def56 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -696,10 +696,22 @@ static void device_hard_reset_pending(struct work_struct *work) flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; rc = hl_device_reset(hdev, flags); + if ((rc == -EBUSY) && !hdev->device_fini_pending) { - dev_info(hdev->dev, - "Could not reset device. will try again in %u seconds", - HL_PENDING_RESET_PER_SEC); + struct hl_ctx *ctx = hl_get_compute_ctx(hdev); + + if (ctx) { + /* The read refcount value should subtracted by one, because the read is + * protected with hl_get_compute_ctx(). + */ + dev_info(hdev->dev, + "Could not reset device (compute_ctx refcount %u). will try again in %u seconds", + kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC); + hl_ctx_put(ctx); + } else { + dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", + HL_PENDING_RESET_PER_SEC); + } queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); From 1f615120fc9d24a8df7f14b0d1e79f3402330855 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 16 Nov 2022 13:14:02 +0200 Subject: [PATCH 2121/4122] habanalabs: don't put context in hl_encaps_handle_do_release_sob() hl_encaps_handle_do_release_sob() can be called only when the last reference to the context object is released and hl_ctx_do_release() is initiated, and therefore it shouldn't call hl_ctx_put(). Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/context.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 2f4620b7990c..ba6675960203 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -39,7 +39,6 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref) idr_remove(&mgr->handles, handle->id); spin_unlock(&mgr->lock); - hl_ctx_put(handle->ctx); kfree(handle); } From 893afb248c7a1f24d17719a5e5f4fe4174ecb60c Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 17 Nov 2022 15:22:31 +0200 Subject: [PATCH 2122/4122] habanalabs: clear non-released encapsulated signals Reserved encapsulated signals which were not released hold the context refcount, leading to a failure when killing the user process on device reset or device fini. Add the release of these left signals in the CS roll-back process. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c | 46 ++++++++++++---- drivers/misc/habanalabs/common/context.c | 53 +++++++++++-------- drivers/misc/habanalabs/common/habanalabs.h | 3 +- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index f1c69c8ed74a..ea0e5101c10e 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -742,13 +742,11 @@ static void cs_do_release(struct kref *ref) */ if (hl_cs_cmpl->encaps_signals) kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, - hl_encaps_handle_do_release); + hl_encaps_release_handle_and_put_ctx); } - if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) - && cs->encaps_signals) - kref_put(&cs->encaps_sig_hdl->refcount, - hl_encaps_handle_do_release); + if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) + kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); out: /* Must be called before hl_ctx_put because inside we use ctx to get @@ -1011,6 +1009,34 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) hl_complete_job(hdev, job); } +/* + * release_reserved_encaps_signals() - release reserved encapsulated signals. + * @hdev: pointer to habanalabs device structure + * + * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with + * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. + * For these signals need also to put the refcount of the H/W SOB which was taken at the + * reservation. + */ +static void release_reserved_encaps_signals(struct hl_device *hdev) +{ + struct hl_ctx *ctx = hl_get_compute_ctx(hdev); + struct hl_cs_encaps_sig_handle *handle; + struct hl_encaps_signals_mgr *mgr; + u32 id; + + if (!ctx) + return; + + mgr = &ctx->sig_mgr; + + idr_for_each_entry(&mgr->handles, handle, id) + if (handle->cs_seq == ULLONG_MAX) + kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); + + hl_ctx_put(ctx); +} + void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) { int i; @@ -1039,6 +1065,8 @@ void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) } force_complete_multi_cs(hdev); + + release_reserved_encaps_signals(hdev); } static void @@ -2001,6 +2029,8 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, */ handle->pre_sob_val = prop->next_sob_val - handle->count; + handle->cs_seq = ULLONG_MAX; + *signals_count = prop->next_sob_val; hdev->asic_funcs->hw_queues_unlock(hdev); @@ -2350,10 +2380,8 @@ put_cs: /* We finished with the CS in this function, so put the ref */ cs_put(cs); free_cs_chunk_array: - if (!wait_cs_submitted && cs_encaps_signals && handle_found && - is_wait_cs) - kref_put(&encaps_sig_hdl->refcount, - hl_encaps_handle_do_release); + if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) + kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); kfree(cs_chunk_array); out: return rc; diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index ba6675960203..9c8b1b37b510 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -9,37 +9,46 @@ #include -void hl_encaps_handle_do_release(struct kref *ref) +static void encaps_handle_do_release(struct hl_cs_encaps_sig_handle *handle, bool put_hw_sob, + bool put_ctx) { - struct hl_cs_encaps_sig_handle *handle = - container_of(ref, struct hl_cs_encaps_sig_handle, refcount); struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr; + if (put_hw_sob) + hw_sob_put(handle->hw_sob); + spin_lock(&mgr->lock); idr_remove(&mgr->handles, handle->id); spin_unlock(&mgr->lock); - hl_ctx_put(handle->ctx); + if (put_ctx) + hl_ctx_put(handle->ctx); + kfree(handle); } -static void hl_encaps_handle_do_release_sob(struct kref *ref) +void hl_encaps_release_handle_and_put_ctx(struct kref *ref) { struct hl_cs_encaps_sig_handle *handle = - container_of(ref, struct hl_cs_encaps_sig_handle, refcount); - struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr; + container_of(ref, struct hl_cs_encaps_sig_handle, refcount); - /* if we're here, then there was a signals reservation but cs with - * encaps signals wasn't submitted, so need to put refcount - * to hw_sob taken at the reservation. - */ - hw_sob_put(handle->hw_sob); + encaps_handle_do_release(handle, false, true); +} - spin_lock(&mgr->lock); - idr_remove(&mgr->handles, handle->id); - spin_unlock(&mgr->lock); +static void hl_encaps_release_handle_and_put_sob(struct kref *ref) +{ + struct hl_cs_encaps_sig_handle *handle = + container_of(ref, struct hl_cs_encaps_sig_handle, refcount); - kfree(handle); + encaps_handle_do_release(handle, true, false); +} + +void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref) +{ + struct hl_cs_encaps_sig_handle *handle = + container_of(ref, struct hl_cs_encaps_sig_handle, refcount); + + encaps_handle_do_release(handle, true, true); } static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr) @@ -48,8 +57,7 @@ static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr) idr_init(&mgr->handles); } -static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, - struct hl_encaps_signals_mgr *mgr) +static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, struct hl_encaps_signals_mgr *mgr) { struct hl_cs_encaps_sig_handle *handle; struct idr *idp; @@ -57,11 +65,14 @@ static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, idp = &mgr->handles; + /* The IDR is expected to be empty at this stage, because any left signal should have been + * released as part of CS roll-back. + */ if (!idr_is_empty(idp)) { - dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n"); + dev_warn(hdev->dev, + "device released while some encaps signals handles are still allocated\n"); idr_for_each_entry(idp, handle, id) - kref_put(&handle->refcount, - hl_encaps_handle_do_release_sob); + kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob); } idr_destroy(&mgr->handles); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 0329a0980bb7..e2527d976ee0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3775,7 +3775,8 @@ void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *d void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob); -void hl_encaps_handle_do_release(struct kref *ref); +void hl_encaps_release_handle_and_put_ctx(struct kref *ref); +void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref); void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, struct hl_cs *cs, struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl); From 1b18cf33d6ce63a9f5fe3764d7b20c4738dd1245 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 17 Nov 2022 18:57:49 +0200 Subject: [PATCH 2123/4122] habanalabs: make print of engines idle mask more readable The engines idle mask was increased to be an array of 4 u64 entries. To make the print of this mask more readable, remove the "0x" prefix, and zero-pad each u64 to 16 bytes if either it isn't zero or if any of the higher-order u64's is not zero. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 27 +++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 926f230def56..87ab329e65d4 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -386,6 +386,23 @@ bool hl_ctrl_device_operational(struct hl_device *hdev, } } +static void print_idle_status_mask(struct hl_device *hdev, const char *message, + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) +{ + u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {}; + + BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4); + + pad_width[3] = idle_mask[3] ? 16 : 0; + pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0; + pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0; + pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0; + + dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n", + message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2], + pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]); +} + static void hpriv_release(struct kref *ref) { u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; @@ -416,9 +433,8 @@ static void hpriv_release(struct kref *ref) device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); if (!device_is_idle) { - dev_err(hdev->dev, - "device not idle after user context is closed (0x%llx_%llx_%llx_%llx)\n", - idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); + print_idle_status_mask(hdev, "device is not idle after user context is closed", + idle_mask); reset_device = true; } @@ -1673,9 +1689,8 @@ kill_processes: /* If device is not idle fail the reset process */ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, - HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { - dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx_%llx_%llx) after reset\n", - idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + print_idle_status_mask(hdev, "device is not idle after reset", idle_mask); rc = -EIO; goto out_err; } From 5354a2a0018345774ab2517fc2fe107a6cd894fa Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 22 Nov 2022 09:59:27 +0200 Subject: [PATCH 2124/4122] habanalabs: fail driver load if EEPROM errors detected In case EEPROM is not burned, firmware sets default EEPROM values. As this is not valid in production, driver should fail load upon any EEPROM error reported by firmware. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 23 ++++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index cf8147e43833..228b92278e48 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -617,16 +617,12 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, if (sts_val & CPU_BOOT_DEV_STS0_ENABLED) dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); - /* All warnings should go here in order not to reach the unknown error validation */ if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { - dev_warn(hdev->dev, - "Device boot warning - EEPROM failure detected, default settings applied\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL; + dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n"); + err_exists = true; } + /* All warnings should go here in order not to reach the unknown error validation */ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); @@ -2532,7 +2528,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, struct fw_load_mgr *fw_loader) { struct cpu_dyn_regs *dyn_regs; - int rc; + int rc, fw_error_rc; dev_info(hdev->dev, "Loading %sfirmware to device, may take some time...\n", @@ -2632,14 +2628,17 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hl_fw_dynamic_update_linux_interrupt_if(hdev); - return 0; - protocol_err: - if (fw_loader->dynamic_loader.fw_desc_valid) - fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0), + if (fw_loader->dynamic_loader.fw_desc_valid) { + fw_error_rc = fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0), le32_to_cpu(dyn_regs->cpu_boot_err1), le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), le32_to_cpu(dyn_regs->cpu_boot_dev_sts1)); + + if (fw_error_rc) + return fw_error_rc; + } + return rc; } From 19a17a9fb486b2961dbd7f3fff0d79a144c9a3b6 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 20 Nov 2022 15:12:26 +0200 Subject: [PATCH 2125/4122] habanalabs: fix VA range calculation Current implementation is fixing the page size to PAGE_SIZE whereas the input page size may be different. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 541e1b6a2176..7c5c18be294a 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -2508,24 +2508,20 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges, /* * PAGE_SIZE alignment - * it is the callers responsibility to align the addresses if the + * it is the caller's responsibility to align the addresses if the * page size is not a power of 2 */ if (is_power_of_2(page_size)) { - if (start & (PAGE_SIZE - 1)) { - start &= PAGE_MASK; - start += PAGE_SIZE; - } + start = round_up(start, page_size); /* * The end of the range is inclusive, hence we need to align it * to the end of the last full page in the range. For example if * end = 0x3ff5 with page size 0x1000, we need to align it to - * 0x2fff. The remainig 0xff5 bytes do not form a full page. + * 0x2fff. The remaining 0xff5 bytes do not form a full page. */ - if ((end + 1) & (PAGE_SIZE - 1)) - end = ((end + 1) & PAGE_MASK) - 1; + end = round_down(end + 1, page_size) - 1; } if (start >= end) { From b8a83e600bdde93e7da41ea3204b2b3832a3c99b Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:42 +0300 Subject: [PATCH 2126/4122] dt-bindings: imx6q-pcie: Fix clock names for imx6sx and imx8mq Originally as it was defined the legacy bindings the pcie_inbound_axi and pcie_aux clock names were supposed to be used in the fsl,imx6sx-pcie and fsl,imx8mq-pcie devices respectively. But the bindings conversion has been incorrectly so now the fourth clock name is defined as "pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie", which is completely wrong. Let's fix that by conditionally apply the clock-names constraints based on the compatible string content. Link: https://lore.kernel.org/r/20221113191301.5526-2-Sergey.Semin@baikalelectronics.ru Fixes: 751ca492f131 ("dt-bindings: PCI: imx6: convert the imx pcie controller to dtschema") Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Alexander Stein --- .../bindings/pci/fsl,imx6q-pcie.yaml | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml index 376e739bcad4..49b4f7a32e71 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml @@ -14,9 +14,6 @@ description: |+ This PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in snps,dw-pcie.yaml. -allOf: - - $ref: /schemas/pci/snps,dw-pcie.yaml# - properties: compatible: enum: @@ -61,7 +58,7 @@ properties: - const: pcie - const: pcie_bus - const: pcie_phy - - const: pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie + - enum: [ pcie_inbound_axi, pcie_aux ] num-lanes: const: 1 @@ -175,6 +172,47 @@ required: - clocks - clock-names +allOf: + - $ref: /schemas/pci/snps,dw-pcie.yaml# + - if: + properties: + compatible: + contains: + const: fsl,imx6sx-pcie + then: + properties: + clock-names: + items: + - {} + - {} + - {} + - const: pcie_inbound_axi + - if: + properties: + compatible: + contains: + const: fsl,imx8mq-pcie + then: + properties: + clock-names: + items: + - {} + - {} + - {} + - const: pcie_aux + - if: + properties: + compatible: + not: + contains: + enum: + - fsl,imx6sx-pcie + - fsl,imx8mq-pcie + then: + properties: + clock-names: + maxItems: 3 + unevaluatedProperties: false examples: From 4cf4b9b70ab2785461190c08a3542d2d74c28b46 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:43 +0300 Subject: [PATCH 2127/4122] dt-bindings: visconti-pcie: Fix interrupts array max constraints In accordance with the way the device DT-node is actually defined in arch/arm64/boot/dts/toshiba/tmpv7708.dtsi and the way the device is probed by the DW PCIe driver there are two IRQs it actually has. It's MSI IRQ the DT-bindings lack. Let's extend the interrupts property constraints then and fix the schema example so one would be acceptable by the actual device DT-bindings. Link: https://lore.kernel.org/r/20221113191301.5526-3-Sergey.Semin@baikalelectronics.ru Fixes: 17c1b16340f0 ("dt-bindings: pci: Add DT binding for Toshiba Visconti PCIe controller") Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Acked-by: Rob Herring Acked-by: Nobuhiro Iwamatsu --- .../devicetree/bindings/pci/toshiba,visconti-pcie.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml index 48ed227fc5b9..53da2edd7c9a 100644 --- a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml @@ -36,7 +36,7 @@ properties: - const: mpu interrupts: - maxItems: 1 + maxItems: 2 clocks: items: @@ -94,8 +94,9 @@ examples: #interrupt-cells = <1>; ranges = <0x81000000 0 0x40000000 0 0x40000000 0 0x00010000>, <0x82000000 0 0x50000000 0 0x50000000 0 0x20000000>; - interrupts = ; - interrupt-names = "intr"; + interrupts = , + ; + interrupt-names = "msi", "intr"; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &gic GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH From 057646a5db2f8873efba90eeffd165c2525b413f Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:44 +0300 Subject: [PATCH 2128/4122] dt-bindings: PCI: dwc: Detach common RP/EP DT bindings Currently both DW PCIe Root Port and End-point DT bindings are defined as separate schemas. Carefully looking at them, at the hardware reference manuals and seeing there is a generic part of the driver used by the both RP and EP drivers we can greatly simplify the DW PCIe controller bindings by moving some of the properties into the common DT schema. It concerns the PERST GPIO control, number of lanes, number of iATU windows and CDM check properties. They will be defined in the snps,dw-pcie-common.yaml schema which will be referenced in the DW PCIe Root Port and End-point DT bindings in order to evaluate the common for both of these controllers properties. The rest of properties like reg{,-names}, clock{s,-names}, reset{s,-names}, etc will be consolidate there in one of the next commits. Link: https://lore.kernel.org/r/20221113191301.5526-4-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-common.yaml | 76 +++++++++++++++++++ .../bindings/pci/snps,dw-pcie-ep.yaml | 31 +------- .../devicetree/bindings/pci/snps,dw-pcie.yaml | 33 +------- 3 files changed, 78 insertions(+), 62 deletions(-) create mode 100644 Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml new file mode 100644 index 000000000000..554c2804c608 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/snps,dw-pcie-common.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys DWC PCIe RP/EP controller + +maintainers: + - Jingoo Han + - Gustavo Pimentel + +description: + Generic Synopsys DesignWare PCIe Root Port and Endpoint controller + properties. + +select: false + +properties: + reset-gpio: + deprecated: true + description: + Reference to the GPIO-controlled PERST# signal. It is used to reset all + the peripheral devices available on the PCIe bus. + maxItems: 1 + + reset-gpios: + description: + Reference to the GPIO-controlled PERST# signal. It is used to reset all + the peripheral devices available on the PCIe bus. + maxItems: 1 + + num-lanes: + description: + Number of PCIe link lanes to use. Can be omitted if the already brought + up link is supposed to be preserved. + maximum: 16 + + num-ob-windows: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of outbound address translation windows. This parameter can be + auto-detected based on the iATU memory writability. So there is no + point in having a dedicated DT-property for it. + maximum: 256 + + num-ib-windows: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of inbound address translation windows. In the same way as + for the outbound AT windows, this parameter can be auto-detected based + on the iATU memory writability. There is no point having a dedicated + DT-property for it either. + maximum: 256 + + num-viewport: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of outbound view ports configured in hardware. It's the same as + the number of outbound AT windows. + maximum: 256 + + snps,enable-cdm-check: + $ref: /schemas/types.yaml#/definitions/flag + description: + Enable automatic checking of CDM (Configuration Dependent Module) + registers for data corruption. CDM registers include standard PCIe + configuration space registers, Port Logic registers, DMA and iATU + registers. This feature has been available since DWC PCIe v4.80a. + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index b78535040f04..eae60901d60e 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -15,6 +15,7 @@ description: | allOf: - $ref: /schemas/pci/pci-ep.yaml# + - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: compatible: @@ -36,36 +37,6 @@ properties: items: enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] - reset-gpio: - description: GPIO pin number of PERST# signal - maxItems: 1 - deprecated: true - - reset-gpios: - description: GPIO controlled connection to PERST# signal - maxItems: 1 - - snps,enable-cdm-check: - type: boolean - description: | - This is a boolean property and if present enables - automatic checking of CDM (Configuration Dependent Module) registers - for data corruption. CDM registers include standard PCIe configuration - space registers, Port Logic registers, DMA and iATU (internal Address - Translation Unit) registers. - - num-ib-windows: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: number of inbound address translation windows - deprecated: true - - num-ob-windows: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: number of outbound address translation windows - deprecated: true - required: - reg - reg-names diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 7287d395e1b6..505b01e0a034 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -15,6 +15,7 @@ description: | allOf: - $ref: /schemas/pci/pci-bus.yaml# + - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: compatible: @@ -37,44 +38,12 @@ properties: enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl, parf, cfg, link, ulreg, smu, mpu, apb, phy ] - num-lanes: - description: | - number of lanes to use (this property should be specified unless - the link is brought already up in firmware) - maximum: 16 - - reset-gpio: - description: GPIO pin number of PERST# signal - maxItems: 1 - deprecated: true - - reset-gpios: - description: GPIO controlled connection to PERST# signal - maxItems: 1 - interrupts: true interrupt-names: true clocks: true - snps,enable-cdm-check: - type: boolean - description: | - This is a boolean property and if present enables - automatic checking of CDM (Configuration Dependent Module) registers - for data corruption. CDM registers include standard PCIe configuration - space registers, Port Logic registers, DMA and iATU (internal Address - Translation Unit) registers. - - num-viewport: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: | - number of view ports configured in hardware. If a platform - does not specify it, the driver autodetects it. - deprecated: true - additionalProperties: true required: From b9fe9985aee2cb62814671b883b9cbfa1c941ab3 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:45 +0300 Subject: [PATCH 2129/4122] dt-bindings: PCI: dwc: Remove bus node from the examples It's absolutely redundant seeing by default each node is embedded into its own example-X node with address and size cells set to 1. Link: https://lore.kernel.org/r/20221113191301.5526-5-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-ep.yaml | 16 ++++----- .../devicetree/bindings/pci/snps,dw-pcie.yaml | 35 ++++++++++--------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index eae60901d60e..7d05dcba419b 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -46,14 +46,10 @@ additionalProperties: true examples: - | - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie-ep@dfd00000 { - compatible = "snps,dw-pcie-ep"; - reg = <0xdfc00000 0x0001000>, /* IP registers 1 */ - <0xdfc01000 0x0001000>, /* IP registers 2 */ - <0xd0000000 0x2000000>; /* Configuration space */ - reg-names = "dbi", "dbi2", "addr_space"; - }; + pcie-ep@dfd00000 { + compatible = "snps,dw-pcie-ep"; + reg = <0xdfc00000 0x0001000>, /* IP registers 1 */ + <0xdfc01000 0x0001000>, /* IP registers 2 */ + <0xd0000000 0x2000000>; /* Configuration space */ + reg-names = "dbi", "dbi2", "addr_space"; }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 505b01e0a034..3fdc80453a85 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -53,21 +53,22 @@ required: examples: - | - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie@dfc00000 { - device_type = "pci"; - compatible = "snps,dw-pcie"; - reg = <0xdfc00000 0x0001000>, /* IP registers */ - <0xd0000000 0x0002000>; /* Configuration space */ - reg-names = "dbi", "config"; - #address-cells = <3>; - #size-cells = <2>; - ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>, - <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>; - interrupts = <25>, <24>; - #interrupt-cells = <1>; - num-lanes = <1>; - }; + pcie@dfc00000 { + compatible = "snps,dw-pcie"; + device_type = "pci"; + reg = <0xdfc00000 0x0001000>, /* IP registers */ + <0xd0000000 0x0002000>; /* Configuration space */ + reg-names = "dbi", "config"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>, + <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>; + bus-range = <0x0 0xff>; + + interrupts = <25>, <24>; + #interrupt-cells = <1>; + + reset-gpios = <&port0 0 1>; + + num-lanes = <1>; }; From 875596361910711f3e7ba6314075d867e4b74fd1 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:46 +0300 Subject: [PATCH 2130/4122] dt-bindings: PCI: dwc: Add phys/phy-names common properties It's normal to have the DW PCIe RP/EP DT-nodes equipped with the explicit PHY phandle references. There can be up to 16 PHYs attach in accordance with the maximum number of supported PCIe lanes. Let's extend the common DW PCIe controller schema with the 'phys' and 'phy-names' properties definition. There two types PHY names are defined: preferred generic names '^pcie[0-9]+$' and non-preferred vendor-specific names '^pcie([0-9]+|-?phy[0-9]*)?$' so to match the names currently supported by the DW PCIe platform drivers ("pcie": meson; "pciephy": qcom, imx6; "pcie-phy": uniphier, rockchip, spear13xx; "pcie": intel-gw; "pcie-phy%d": keystone, dra7xx; "pcie": histb, etc). Link: https://lore.kernel.org/r/20221113191301.5526-6-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-common.yaml | 24 +++++++++++++++++++ .../bindings/pci/snps,dw-pcie-ep.yaml | 3 +++ .../devicetree/bindings/pci/snps,dw-pcie.yaml | 3 +++ 3 files changed, 30 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index 554c2804c608..91d24e400dfc 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -17,6 +17,30 @@ description: select: false properties: + phys: + description: + There can be up to the number of possible lanes PHYs specified placed in + the phandle array in the line-based order. Obviously each the specified + PHYs are supposed to be able to work in the PCIe mode with a speed + implied by the DWC PCIe controller they are attached to. + minItems: 1 + maxItems: 16 + + phy-names: + minItems: 1 + maxItems: 16 + oneOf: + - description: Generic PHY names + items: + pattern: '^pcie[0-9]+$' + - description: + Vendor-specific PHY names. Consider using the generic + names above for new bindings. + items: + oneOf: + - pattern: '^pcie(-?phy[0-9]*)?$' + - pattern: '^p2u-[0-7]$' + reset-gpio: deprecated: true description: diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index 7d05dcba419b..dcd521aed213 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -52,4 +52,7 @@ examples: <0xdfc01000 0x0001000>, /* IP registers 2 */ <0xd0000000 0x2000000>; /* Configuration space */ reg-names = "dbi", "dbi2", "addr_space"; + + phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>; + phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 3fdc80453a85..d9512f7f7124 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -70,5 +70,8 @@ examples: reset-gpios = <&port0 0 1>; + phys = <&pcie_phy>; + phy-names = "pcie"; + num-lanes = <1>; }; From eaa9d886528730bcd7213f0b22c8dd468460f495 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:47 +0300 Subject: [PATCH 2131/4122] dt-bindings: PCI: dwc: Add max-link-speed common property In accordance with [1] DW PCIe controllers support up to Gen5 link speed. Let's add the max-link-speed property upper bound to 5 then. The DT bindings of the particular devices are expected to setup more strict constraint on that parameter. [1] Synopsys DesignWare Cores PCI Express Controller Databook, Version 5.40a, March 2019, p. 27 Link: https://lore.kernel.org/r/20221113191301.5526-7-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml | 3 +++ Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 2 ++ Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml | 1 + 3 files changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index 91d24e400dfc..e63c21783fc1 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -54,6 +54,9 @@ properties: the peripheral devices available on the PCIe bus. maxItems: 1 + max-link-speed: + maximum: 5 + num-lanes: description: Number of PCIe link lanes to use. Can be omitted if the already brought diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index dcd521aed213..fc3b5d4ac245 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -55,4 +55,6 @@ examples: phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>; phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; + + max-link-speed = <3>; }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index d9512f7f7124..e787b9727589 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -74,4 +74,5 @@ examples: phy-names = "pcie"; num-lanes = <1>; + max-link-speed = <3>; }; From f133396e2d0063d589362122da659fe047643384 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:48 +0300 Subject: [PATCH 2132/4122] dt-bindings: PCI: dwc: Apply generic schema for generic device only Having the generic compatible strings constraints with the 'any'+'generic string' semantic implicitly encourages either to add new DW PCIe-based DT-bindings with the generic compatible string attached or just forget about adding new DT-bindings since the corresponding DT-node will be evaluated anyway. Moreover having that semantic implemented in the generic DT-schema causes the DT-validation tool to apply the schema twice: first by implicit compatible-string-based selection and second by means of the 'allOf: [ $ref ]' statement. Let's fix all of that by dropping the compatible property constraints and selecting the generic DT-schema only for the purely generic DW PCIe DT-nodes. The later is required since there is a driver for such devices. (Though there are no such DT-nodes currently defined in the kernel DT sources.) Link: https://lore.kernel.org/r/20221113191301.5526-8-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 16 ++++++++++------ .../devicetree/bindings/pci/snps,dw-pcie.yaml | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index fc3b5d4ac245..d04001248b53 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -13,16 +13,20 @@ maintainers: description: | Synopsys DesignWare PCIe host controller endpoint +# Please create a separate DT-schema for your DWC PCIe Endpoint controller +# and make sure it's assigned with the vendor-specific compatible string. +select: + properties: + compatible: + const: snps,dw-pcie-ep + required: + - compatible + allOf: - $ref: /schemas/pci/pci-ep.yaml# - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: - compatible: - anyOf: - - {} - - const: snps,dw-pcie-ep - reg: description: | It should contain Data Bus Interface (dbi) and config registers for all @@ -38,9 +42,9 @@ properties: enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] required: + - compatible - reg - reg-names - - compatible additionalProperties: true diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index e787b9727589..85861b71d9ff 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -13,16 +13,20 @@ maintainers: description: | Synopsys DesignWare PCIe host controller +# Please create a separate DT-schema for your DWC PCIe Root Port controller +# and make sure it's assigned with the vendor-specific compatible string. +select: + properties: + compatible: + const: snps,dw-pcie + required: + - compatible + allOf: - $ref: /schemas/pci/pci-bus.yaml# - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: - compatible: - anyOf: - - {} - - const: snps,dw-pcie - reg: description: | It should contain Data Bus Interface (dbi) and config registers for all @@ -47,9 +51,9 @@ properties: additionalProperties: true required: + - compatible - reg - reg-names - - compatible examples: - | From 12f7936c7a0e0c40069ff12ddfd091a29da6e77c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:49 +0300 Subject: [PATCH 2133/4122] dt-bindings: PCI: dwc: Add max-functions EP property In accordance with [1] the CX_NFUNC IP-core synthesize parameter is responsible for the number of physical functions to support in the EP mode. Its upper limit is 32. Let's use it to constrain the number of PCIe functions the DW PCIe EP DT-nodes can advertise. [1] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe Endpoint, Version 5.40a, March 2019, p. 887. Link: https://lore.kernel.org/r/20221113191301.5526-9-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index d04001248b53..71dd19ae1060 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -41,6 +41,9 @@ properties: items: enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] + max-functions: + maximum: 32 + required: - compatible - reg @@ -61,4 +64,5 @@ examples: phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; max-link-speed = <3>; + max-functions = /bits/ 8 <4>; }; From 35486813c41b3a5229b4987857ff597704feda21 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:50 +0300 Subject: [PATCH 2134/4122] dt-bindings: PCI: dwc: Add interrupts/interrupt-names common properties Currently the 'interrupts' and 'interrupt-names' properties are defined being too generic to really describe any actual IRQ interface. Moreover the DW PCIe End-point devices are left with no IRQ signals. All of that can be fixed by adding the IRQ-related properties to the common DW PCIe DT-schemas in accordance with the hardware reference manual. The DW PCIe common DT-schema will contain the generic properties definitions with just a number of entries per property, while the DW PCIe RP/EP-specific schemas will have the particular number of items and the generic resource names listed. Note since there are DW PCI-based vendor-specific DT-bindings with the custom names assigned to the same IRQ resources we have no much choice but to add them to the generic DT-schemas in order to have the schemas being applicable for such devices. These names are marked as vendor-specific and should be avoided being used in new bindings in favor of the generic names. Link: https://lore.kernel.org/r/20221113191301.5526-10-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-common.yaml | 19 ++++ .../bindings/pci/snps,dw-pcie-ep.yaml | 52 +++++++++++ .../devicetree/bindings/pci/snps,dw-pcie.yaml | 88 ++++++++++++++++++- 3 files changed, 157 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index e63c21783fc1..4646fb14e817 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -17,6 +17,25 @@ description: select: false properties: + interrupts: + description: + There are two main sub-blocks which are normally capable of + generating interrupts. It's System Information Interface and MSI + interface. While the former one has some common for the Host and + Endpoint controllers IRQ-signals, the later interface is obviously + Root Complex specific since it's responsible for the incoming MSI + messages signalling. The System Information IRQ signals are mainly + responsible for reporting the generic PCIe hierarchy and Root + Complex events like VPD IO request, general AER, PME, Hot-plug, link + bandwidth change, link equalization request, INTx asserted/deasserted + Message detection, embedded DMA Tx/Rx/Error. + minItems: 1 + maxItems: 26 + + interrupt-names: + minItems: 1 + maxItems: 26 + phys: description: There can be up to the number of possible lanes PHYs specified placed in diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index 71dd19ae1060..7d3f8fc8b7b4 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -41,6 +41,55 @@ properties: items: enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] + interrupts: + description: + There is no mandatory IRQ signals for the normal controller functioning, + but in addition to the native set the platforms may have a link- or + PM-related IRQs specified. + minItems: 1 + maxItems: 20 + + interrupt-names: + minItems: 1 + maxItems: 20 + items: + oneOf: + - description: + Controller request to read or write virtual product data + from/to the VPD capability registers. + const: vpd + - description: + Link Equalization Request flag is set in the Link Status 2 + register (applicable if the corresponding IRQ is enabled in + the Link Control 3 register). + const: l_eq + - description: + Indicates that the eDMA Tx/Rx transfer is complete or that an + error has occurred on the corresponding channel. eDMA can have + eight Tx (Write) and Rx (Read) eDMA channels thus supporting up + to 16 IRQ signals all together. Write eDMA channels shall go + first in the ordered row as per default edma_int[*] bus setup. + pattern: '^dma([0-9]|1[0-5])?$' + - description: + PCIe protocol correctable error or a Data Path protection + correctable error is detected by the automotive/safety + feature. + const: sft_ce + - description: + Indicates that the internal safety mechanism has detected an + uncorrectable error. + const: sft_ue + - description: + Application-specific IRQ raised depending on the vendor-specific + events basis. + const: app + - description: + Vendor-specific IRQ names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native "app" IRQ for details + enum: [ intr ] + max-functions: maximum: 32 @@ -60,6 +109,9 @@ examples: <0xd0000000 0x2000000>; /* Configuration space */ reg-names = "dbi", "dbi2", "addr_space"; + interrupts = <23>, <24>; + interrupt-names = "dma0", "dma1"; + phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>; phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 85861b71d9ff..fa1db57b2b97 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -42,9 +42,92 @@ properties: enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl, parf, cfg, link, ulreg, smu, mpu, apb, phy ] - interrupts: true + interrupts: + description: + DWC PCIe Root Port/Complex specific IRQ signals. At least MSI interrupt + signal is supposed to be specified for the host controller. + minItems: 1 + maxItems: 26 - interrupt-names: true + interrupt-names: + minItems: 1 + maxItems: 26 + items: + oneOf: + - description: + Controller request to read or write virtual product data + from/to the VPD capability registers. + const: vpd + - description: + Link Equalization Request flag is set in the Link Status 2 + register (applicable if the corresponding IRQ is enabled in + the Link Control 3 register). + const: l_eq + - description: + Indicates that the eDMA Tx/Rx transfer is complete or that an + error has occurred on the corresponding channel. eDMA can have + eight Tx (Write) and Rx (Read) eDMA channels thus supporting up + to 16 IRQ signals all together. Write eDMA channels shall go + first in the ordered row as per default edma_int[*] bus setup. + pattern: '^dma([0-9]|1[0-5])?$' + - description: + PCIe protocol correctable error or a Data Path protection + correctable error is detected by the automotive/safety + feature. + const: sft_ce + - description: + Indicates that the internal safety mechanism has detected an + uncorrectable error. + const: sft_ue + - description: + Application-specific IRQ raised depending on the vendor-specific + events basis. + const: app + - description: + DSP AXI MSI Interrupt detected. It gets de-asserted when there is + no more MSI interrupt pending. The interrupt is relevant to the + iMSI-RX - Integrated MSI Receiver (AXI bridge). + const: msi + - description: + Legacy A/B/C/D interrupt signal. Basically it's triggered by + receiving a Assert_INT{A,B,C,D}/Desassert_INT{A,B,C,D} message + from the downstream device. + pattern: "^int(a|b|c|d)$" + - description: + Error condition detected and a flag is set in the Root Error Status + register of the AER capability. It's asserted when the RC + internally generated an error or an error message is received by + the RC. + const: aer + - description: + PME message is received by the port. That means having the PME + status bit set in the Root Status register (the event is + supposed to be unmasked in the Root Control register). + const: pme + - description: + Hot-plug event is detected. That is a bit has been set in the + Slot Status register and the corresponding event is enabled in + the Slot Control register. + const: hp + - description: + Link Autonomous Bandwidth Status flag has been set in the Link + Status register (the event is supposed to be unmasked in the + Link Control register). + const: bw_au + - description: + Bandwidth Management Status flag has been set in the Link + Status register (the event is supposed to be unmasked in the + Link Control register). + const: bw_mg + - description: + Vendor-specific IRQ names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native "app" IRQ for details + enum: [ intr ] + allOf: + - contains: + const: msi clocks: true @@ -70,6 +153,7 @@ examples: bus-range = <0x0 0xff>; interrupts = <25>, <24>; + interrupt-names = "msi", "hp"; #interrupt-cells = <1>; reset-gpios = <&port0 0 1>; From 4cc13eedb892c53f3d61fb5a1f6d57724541441a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:51 +0300 Subject: [PATCH 2135/4122] dt-bindings: PCI: dwc: Add reg/reg-names common properties Even though there is a more-or-less limited set of the CSR spaces can be defined for each DW PCIe controller the generic DT-schema currently doesn't specify much limitations on the reg-space names used for one or another range. In order to prevent the vendor-specific controller schemas further deviation from the generic interface let's fix that by introducing the reg-names definition in the common DW PCIe DT-schemas and preserving the generic "reg" and "reg-names" properties in there. New DW PCIe device DT-bindings are encouraged to use the generic set of the CSR spaces defined in the generic DW PCIe RP/EP DT-bindings, while the already available vendor-specific DT-bindings can still apple the common DT-schemas. Note the number of reg/reg-names items need to be changed in the DW PCIe EP DT-schema since aside with the "dbi" CSRs space these arrays can have "dbi2", "addr_space", "atu", etc ranges. Also note since there are DW PCIe-based vendor-specific DT-bindings with the custom names assigned to the same CSR resources we have no much choice but to add them to the generic DT-schemas in order to have the schemas being applicable for such devices. These names are marked as vendor-specific and should be avoided being used in new bindings in favor of the generic names. Link: https://lore.kernel.org/r/20221113191301.5526-11-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-common.yaml | 22 +++++ .../bindings/pci/snps,dw-pcie-ep.yaml | 82 +++++++++++++++++-- .../devicetree/bindings/pci/snps,dw-pcie.yaml | 78 ++++++++++++++++-- 3 files changed, 169 insertions(+), 13 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index 4646fb14e817..13c41cd50e54 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -17,6 +17,28 @@ description: select: false properties: + reg: + description: + DWC PCIe CSR space is normally accessed over the dedicated Data Bus + Interface - DBI. In accordance with the reference manual the register + configuration space belongs to the Configuration-Dependent Module (CDM) + and is split up into several sub-parts Standard PCIe configuration + space, Port Logic Registers (PL), Shadow Config-space Registers, + iATU/eDMA registers. The particular sub-space is selected by the + CDM/ELBI (dbi_cs) and CS2 (dbi_cs2) signals (selector bits). Such + configuration provides a flexible interface for the system engineers to + either map the particular space at a desired MMIO address or just leave + them in a contiguous memory space if pure Native or AXI Bridge DBI access + is selected. Note the PCIe CFG-space, PL and Shadow registers are + specific for each activated function, while the rest of the sub-spaces + are common for all of them (if there are more than one). + minItems: 2 + maxItems: 6 + + reg-names: + minItems: 2 + maxItems: 6 + interrupts: description: There are two main sub-blocks which are normally capable of diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index 7d3f8fc8b7b4..f4d7eb2dec4d 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -28,18 +28,86 @@ allOf: properties: reg: - description: | - It should contain Data Bus Interface (dbi) and config registers for all - versions. - For designware core version >= 4.80, it may contain ATU address space. + description: + DBI, DBI2 reg-spaces and outbound memory window are required for the + normal controller functioning. iATU memory IO region is also required + if the space is unrolled (IP-core version >= 4.80a). minItems: 2 - maxItems: 4 + maxItems: 5 reg-names: minItems: 2 - maxItems: 4 + maxItems: 5 items: - enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] + oneOf: + - description: + Basic DWC PCIe controller configuration-space accessible over + the DBI interface. This memory space is either activated with + CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region + with all spaces. Note iATU/eDMA CSRs are indirectly accessible + via the PL viewports on the DWC PCIe controllers older than + v4.80a. + const: dbi + - description: + Shadow DWC PCIe config-space registers. This space is selected + by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of + the PCI-SIG PCIe CFG-space with the shadow registers for some + PCI Header space, PCI Standard and Extended Structures. It's + mainly relevant for the end-point controller configuration, + but still there are some shadow registers available for the + Root Port mode too. + const: dbi2 + - description: + External Local Bus registers. It's an application-dependent + registers normally defined by the platform engineers. The space + can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can + be accessed over some platform-specific means (for instance + as a part of a system controller). + enum: [ elbi, app ] + - description: + iATU/eDMA registers common for all device functions. It's an + unrolled memory space with the internal Address Translation + Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1 + and CS2 = 1. For IP-core releases prior v4.80a, these registers + have been programmed via an indirect addressing scheme using a + set of viewport CSRs mapped into the PL space. Note iATU is + normally mapped to the 0x0 address of this region, while eDMA + is available at 0x80000 base address. + const: atu + - description: + Platform-specific eDMA registers. Some platforms may have eDMA + CSRs mapped in a non-standard base address. The registers offset + can be changed or the MS/LS-bits of the address can be attached + in an additional RTL block before the MEM-IO transactions reach + the DW PCIe slave interface. + const: dma + - description: + PHY/PCS configuration registers. Some platforms can have the + PCS and PHY CSRs accessible over a dedicated memory mapped + region, but mainly these registers are indirectly accessible + either by means of the embedded PHY viewport schema or by some + platform-specific method. + const: phy + - description: + Outbound iATU-capable memory-region which will be used to + generate various application-specific traffic on the PCIe bus + hierarchy. It's usage scenario depends on the endpoint + functionality, for instance it can be used to create MSI(X) + messages. + const: addr_space + - description: + Vendor-specific CSR names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native 'elbi/app' CSR region for details. + enum: [ link, appl ] + - description: See native 'atu' CSR region for details. + enum: [ atu_dma ] + allOf: + - contains: + const: dbi + - contains: + const: addr_space interrupts: description: diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index fa1db57b2b97..59d3bbb5883a 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -28,10 +28,10 @@ allOf: properties: reg: - description: | - It should contain Data Bus Interface (dbi) and config registers for all - versions. - For designware core version >= 4.80, it may contain ATU address space. + description: + At least DBI reg-space and peripheral devices CFG-space outbound window + are required for the normal controller work. iATU memory IO region is + also required if the space is unrolled (IP-core version >= 4.80a). minItems: 2 maxItems: 5 @@ -39,8 +39,74 @@ properties: minItems: 2 maxItems: 5 items: - enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl, - parf, cfg, link, ulreg, smu, mpu, apb, phy ] + oneOf: + - description: + Basic DWC PCIe controller configuration-space accessible over + the DBI interface. This memory space is either activated with + CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region + with all spaces. Note iATU/eDMA CSRs are indirectly accessible + via the PL viewports on the DWC PCIe controllers older than + v4.80a. + const: dbi + - description: + Shadow DWC PCIe config-space registers. This space is selected + by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of + the PCI-SIG PCIe CFG-space with the shadow registers for some + PCI Header space, PCI Standard and Extended Structures. It's + mainly relevant for the end-point controller configuration, + but still there are some shadow registers available for the + Root Port mode too. + const: dbi2 + - description: + External Local Bus registers. It's an application-dependent + registers normally defined by the platform engineers. The space + can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can + be accessed over some platform-specific means (for instance + as a part of a system controller). + enum: [ elbi, app ] + - description: + iATU/eDMA registers common for all device functions. It's an + unrolled memory space with the internal Address Translation + Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1 + and CS2 = 1. For IP-core releases prior v4.80a, these registers + have been programmed via an indirect addressing scheme using a + set of viewport CSRs mapped into the PL space. Note iATU is + normally mapped to the 0x0 address of this region, while eDMA + is available at 0x80000 base address. + const: atu + - description: + Platform-specific eDMA registers. Some platforms may have eDMA + CSRs mapped in a non-standard base address. The registers offset + can be changed or the MS/LS-bits of the address can be attached + in an additional RTL block before the MEM-IO transactions reach + the DW PCIe slave interface. + const: dma + - description: + PHY/PCS configuration registers. Some platforms can have the + PCS and PHY CSRs accessible over a dedicated memory mapped + region, but mainly these registers are indirectly accessible + either by means of the embedded PHY viewport schema or by some + platform-specific method. + const: phy + - description: + Outbound iATU-capable memory-region which will be used to access + the peripheral PCIe devices configuration space. + const: config + - description: + Vendor-specific CSR names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native 'elbi/app' CSR region for details. + enum: [ apb, mgmt, link, ulreg, appl ] + - description: See native 'atu' CSR region for details. + enum: [ atu_dma ] + - description: Syscon-related CSR regions. + enum: [ smu, mpu ] + allOf: + - contains: + const: dbi + - contains: + const: config interrupts: description: From bd9504af9169131156e753a6e47de34ad7a97b7d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:52 +0300 Subject: [PATCH 2136/4122] dt-bindings: PCI: dwc: Add clocks/resets common properties DW PCIe RP/EP reference manuals explicit define all the clocks and reset requirements in [1] and [2]. Seeing the DW PCIe vendor-specific DT-bindings have already started assigning random names to the same set of the clocks and resets lines, let's define a generic names sets and add them to the DW PCIe common DT-schema. Note since there are DW PCI-based vendor-specific DT-bindings with the custom names assigned to the same clocks and resets resources we have no much choice but to add them to the generic DT-schemas in order to have the schemas being applicable for such devices. These names are marked as vendor-specific and should be avoided being used in new bindings in favor of the generic names. [1] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, Version 5.40a, March 2019, p.55 - 78. [2] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe Endpoint, Version 5.40a, March 2019, p.58 - 81. Link: https://lore.kernel.org/r/20221113191301.5526-12-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/snps,dw-pcie-common.yaml | 120 ++++++++++++++++++ .../bindings/pci/snps,dw-pcie-ep.yaml | 6 + .../devicetree/bindings/pci/snps,dw-pcie.yaml | 2 - 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index 13c41cd50e54..4d9efcea3859 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -58,6 +58,126 @@ properties: minItems: 1 maxItems: 26 + clocks: + description: + DWC PCIe reference manual explicitly defines a set of the clocks required + to get the controller working correctly. In general all of them can + be divided into two groups':' application and core clocks. Note the + platforms may have some of the clock sources unspecified in case if the + corresponding domains are fed up from a common clock source. + minItems: 1 + maxItems: 7 + + clock-names: + minItems: 1 + maxItems: 7 + items: + oneOf: + - description: + Data Bus Interface (DBI) clock. Clock signal for the AXI-bus + interface of the Configuration-Dependent Module, which is + basically the set of the controller CSRs. + const: dbi + - description: + Application AXI-bus Master interface clock. Basically this is + a clock for the controller DMA interface (PCI-to-CPU). + const: mstr + - description: + Application AXI-bus Slave interface clock. This is a clock for + the CPU-to-PCI memory IO interface. + const: slv + - description: + Controller Core-PCS PIPE interface clock. It's normally + supplied by an external PCS-PHY. + const: pipe + - description: + Controller Primary clock. It's assumed that all controller input + signals (except resets) are synchronous to this clock. + const: core + - description: + Auxiliary clock for the controller PMC domain. The controller + partitioning implies having some parts to operate with this + clock in some power management states. + const: aux + - description: + Generic reference clock. In case if there are several + interfaces fed up with a common clock source it's advisable to + define it with this name (for instance pipe, core and aux can + be connected to a single source of the periodic signal). + const: ref + - description: + Clock for the PHY registers interface. Originally this is + a PHY-viewport-based interface, but some platform may have + specifically designed one. + const: phy_reg + - description: + Vendor-specific clock names. Consider using the generic names + above for new bindings. + oneOf: + - description: See native 'dbi' clock for details + enum: [ pcie, pcie_apb_sys, aclk_dbi ] + - description: See native 'mstr/slv' clock for details + enum: [ pcie_bus, pcie_inbound_axi, pcie_aclk, aclk_mst, aclk_slv ] + - description: See native 'pipe' clock for details + enum: [ pcie_phy, pcie_phy_ref, link ] + - description: See native 'aux' clock for details + enum: [ pcie_aux ] + - description: See native 'ref' clock for details. + enum: [ gio ] + - description: See nativs 'phy_reg' clock for details + enum: [ pcie_apb_phy, pclk ] + + resets: + description: + DWC PCIe reference manual explicitly defines a set of the reset + signals required to be de-asserted to properly activate the controller + sub-parts. All of these signals can be divided into two sub-groups':' + application and core resets with respect to the main sub-domains they + are supposed to reset. Note the platforms may have some of these signals + unspecified in case if they are automatically handled or aggregated into + a comprehensive control module. + minItems: 1 + maxItems: 10 + + reset-names: + minItems: 1 + maxItems: 10 + items: + oneOf: + - description: Data Bus Interface (DBI) domain reset + const: dbi + - description: AXI-bus Master interface reset + const: mstr + - description: AXI-bus Slave interface reset + const: slv + - description: Application-dependent interface reset + const: app + - description: Controller Non-sticky CSR flags reset + const: non-sticky + - description: Controller sticky CSR flags reset + const: sticky + - description: PIPE-interface (Core-PCS) logic reset + const: pipe + - description: + Controller primary reset (resets everything except PMC module) + const: core + - description: PCS/PHY block reset + const: phy + - description: PMC hot reset signal + const: hot + - description: Cold reset signal + const: pwr + - description: + Vendor-specific reset names. Consider using the generic names + above for new bindings. + oneOf: + - description: See native 'app' reset for details + enum: [ apps, gio, apb ] + - description: See native 'phy' reset for details + enum: [ pciephy, link ] + - description: See native 'pwr' reset for details + enum: [ turnoff ] + phys: description: There can be up to the number of possible lanes PHYs specified placed in diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index f4d7eb2dec4d..8fc2151691a4 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -180,6 +180,12 @@ examples: interrupts = <23>, <24>; interrupt-names = "dma0", "dma1"; + clocks = <&sys_clk 12>, <&sys_clk 24>; + clock-names = "dbi", "ref"; + + resets = <&sys_rst 12>, <&sys_rst 24>; + reset-names = "dbi", "phy"; + phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>; phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 59d3bbb5883a..c62c8fe517ae 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -195,8 +195,6 @@ properties: - contains: const: msi - clocks: true - additionalProperties: true required: From 4a8972542a6d1eee81c7cc27699b0a47f6a6619e Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:53 +0300 Subject: [PATCH 2137/4122] dt-bindings: PCI: dwc: Add dma-coherent property DW PCIe EP/RP AXI- and TRGT1-master interfaces are responsible for the application memory access. They are used by the RP/EP PCIe buses (MWr/MWr TLPs emitted by the peripheral PCIe devices) and the eDMA block. Since all of them mainly involve the system memory and basically mean DMA we can expect the corresponding platforms can be designed in a way to make sure the transactions are cache-coherent. As such the DW PCIe DT-nodes can have the 'dma-coherent' property specified. Let's permit it in the DT-bindings then. Link: https://lore.kernel.org/r/20221113191301.5526-13-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index 4d9efcea3859..d87e13496834 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -259,6 +259,8 @@ properties: configuration space registers, Port Logic registers, DMA and iATU registers. This feature has been available since DWC PCIe v4.80a. + dma-coherent: true + additionalProperties: true ... From 98b59129cb9f43a37bb92a577145f29ca54353a7 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:54 +0300 Subject: [PATCH 2138/4122] dt-bindings: PCI: dwc: Apply common schema to Rockchip DW PCIe nodes As the DT-bindings description states the Rockchip PCIe controller is based on the DW PCIe RP IP-core thus its DT-nodes are supposed to be compatible with the common DW PCIe controller schema. Let's make sure they are evaluated against it by referring to the snps,dw-pcie.yaml schema in the allOf sub-schemas composition. Link: https://lore.kernel.org/r/20221113191301.5526-14-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml index bc0a9d1db750..2be72ae1169f 100644 --- a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml @@ -14,10 +14,10 @@ maintainers: description: |+ RK3568 SoC PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in - designware-pcie.txt. + snps,dw-pcie.yaml. allOf: - - $ref: /schemas/pci/pci-bus.yaml# + - $ref: /schemas/pci/snps,dw-pcie.yaml# properties: compatible: From ce27c4e61f2dcc41d13f54cbecbd3a4b15db86c8 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:55 +0300 Subject: [PATCH 2139/4122] dt-bindings: PCI: dwc: Add Baikal-T1 PCIe Root Port bindings Baikal-T1 SoC is equipped with DWC PCIe v4.60a Root Port controller, which link can be trained to work on up to Gen.3 speed over up to x4 lanes. The controller is supposed to be fed up with four clock sources: DBI peripheral clock, AXI application Tx/Rx clocks and external PHY/core reference clock generating the 100MHz signal. In addition to that the platform provide a way to reset each part of the controller: sticky/non-sticky bits, host controller core, PIPE interface, PCS/PHY and Hot/Power reset signal. The Root Port controller is equipped with multiple IRQ lines like MSI, system AER, PME, HP, Bandwidth change, Link equalization request and eDMA ones. The registers space is accessed over the DBI interface. There can be no more than four inbound or outbound iATU windows configured. Link: https://lore.kernel.org/r/20221113191301.5526-15-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/baikal,bt1-pcie.yaml | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml diff --git a/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml new file mode 100644 index 000000000000..8eaa07ae9774 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/baikal,bt1-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Baikal-T1 PCIe Root Port Controller + +maintainers: + - Serge Semin + +description: + Embedded into Baikal-T1 SoC Root Complex controller with a single port + activated. It's based on the DWC RC PCIe v4.60a IP-core, which is configured + to have just a single Root Port function and is capable of establishing the + link up to Gen.3 speed on x4 lanes. It doesn't have embedded clock and reset + control module, so the proper interface initialization is supposed to be + performed by software. There four in- and four outbound iATU regions + which can be used to emit all required TLP types on the PCIe bus. + +allOf: + - $ref: /schemas/pci/snps,dw-pcie.yaml# + +properties: + compatible: + const: baikal,bt1-pcie + + reg: + description: + DBI, DBI2 and at least 4KB outbound iATU-capable region for the + peripheral devices CFG-space access. + maxItems: 3 + + reg-names: + items: + - const: dbi + - const: dbi2 + - const: config + + interrupts: + description: + MSI, AER, PME, Hot-plug, Link Bandwidth Management, Link Equalization + request and eight Read/Write eDMA IRQ lines are available. + maxItems: 14 + + interrupt-names: + items: + - const: dma0 + - const: dma1 + - const: dma2 + - const: dma3 + - const: dma4 + - const: dma5 + - const: dma6 + - const: dma7 + - const: msi + - const: aer + - const: pme + - const: hp + - const: bw_mg + - const: l_eq + + clocks: + description: + DBI (attached to the APB bus), AXI-bus master and slave interfaces + are fed up by the dedicated application clocks. A common reference + clock signal is supposed to be attached to the corresponding Ref-pad + of the SoC. It will be redistributed amongst the controller core + sub-modules (pipe, core, aux, etc). + maxItems: 4 + + clock-names: + items: + - const: dbi + - const: mstr + - const: slv + - const: ref + + resets: + description: + A comprehensive controller reset logic is supposed to be implemented + by software, so almost all the possible application and core reset + signals are exposed via the system CCU module. + maxItems: 9 + + reset-names: + items: + - const: mstr + - const: slv + - const: pwr + - const: hot + - const: phy + - const: core + - const: pipe + - const: sticky + - const: non-sticky + + baikal,bt1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the Baikal-T1 System Controller DT node. It's required to + access some additional PM, Reset-related and LTSSM signals. + + num-lanes: + maximum: 4 + + max-link-speed: + maximum: 3 + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + + pcie@1f052000 { + compatible = "baikal,bt1-pcie"; + device_type = "pci"; + reg = <0x1f052000 0x1000>, <0x1f053000 0x1000>, <0x1bdbf000 0x1000>; + reg-names = "dbi", "dbi2", "config"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0x00000000 0x1bdb0000 0 0x00008000>, + <0x82000000 0 0x20000000 0x08000000 0 0x13db0000>; + bus-range = <0x0 0xff>; + + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "dma0", "dma1", "dma2", "dma3", + "dma4", "dma5", "dma6", "dma7", + "msi", "aer", "pme", "hp", "bw_mg", + "l_eq"; + + clocks = <&ccu_sys 1>, <&ccu_axi 6>, <&ccu_axi 7>, <&clk_pcie>; + clock-names = "dbi", "mstr", "slv", "ref"; + + resets = <&ccu_axi 6>, <&ccu_axi 7>, <&ccu_sys 7>, <&ccu_sys 10>, + <&ccu_sys 4>, <&ccu_sys 6>, <&ccu_sys 5>, <&ccu_sys 8>, + <&ccu_sys 9>; + reset-names = "mstr", "slv", "pwr", "hot", "phy", "core", "pipe", + "sticky", "non-sticky"; + + reset-gpios = <&port0 0 GPIO_ACTIVE_LOW>; + + num-lanes = <4>; + max-link-speed = <3>; + }; +... From 8522e17d4cab47b35d43943ca13d677e76ab01b7 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:56 +0300 Subject: [PATCH 2140/4122] PCI: dwc: Introduce dma-ranges property support for RC-host In accordance with the generic PCIe Root Port DT-bindings the "dma-ranges" property has the same format as the "ranges" property. The only difference is in their semantics. The "dma-ranges" property describes the PCIe-to-CPU memory mapping in opposite to the CPU-to-PCIe mapping of the "ranges" property. Even though the DW PCIe controllers are normally equipped with the internal Address Translation Unit which inbound and outbound tables can be used to implement both properties semantics, it was surprising for me to discover that the host-related part of the DW PCIe driver currently supports the "ranges" property only while the "dma-ranges" windows are just ignored. Having the "dma-ranges" supported in the driver would be very handy for the platforms, that don't tolerate the 1:1 CPU-PCIe memory mapping and require a customized PCIe memory layout. So let's fix that by introducing the "dma-ranges" property support. First of all we suggest to rename the dw_pcie_prog_inbound_atu() method to dw_pcie_prog_ep_inbound_atu() and create a new version of the dw_pcie_prog_inbound_atu() function. Thus we'll have two methods for the RC and EP controllers respectively in the same way as it has been developed for the outbound ATU setup methods. Secondly aside with the memory window index and type the new dw_pcie_prog_inbound_atu() function will accept CPU address, PCIe address and size as its arguments. These parameters define the PCIe and CPU memory ranges which will be used to setup the respective inbound ATU mapping. The passed parameters need to be verified against the ATU ranges constraints in the same way as it is done for the outbound ranges. Finally the DMA-ranges detected for the PCIe controller need to be converted to the inbound ATU entries during the host controller initialization procedure. It will be done in the framework of the dw_pcie_iatu_setup() method. Note before setting the inbound ranges up we need to disable all the inbound ATU entries in order to prevent unexpected PCIe TLPs translations defined by some third party software like bootloaders. Link: https://lore.kernel.org/r/20221113191301.5526-16-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: Manivannan Sadhasivam --- .../pci/controller/dwc/pcie-designware-ep.c | 4 +- .../pci/controller/dwc/pcie-designware-host.c | 32 ++++++++++- drivers/pci/controller/dwc/pcie-designware.c | 56 ++++++++++++++++++- drivers/pci/controller/dwc/pcie-designware.h | 6 +- 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index efc6c6360e28..40d0056b2f56 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -171,8 +171,8 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type, return -EINVAL; } - ret = dw_pcie_prog_inbound_atu(pci, func_no, free_win, type, - cpu_addr, bar); + ret = dw_pcie_prog_ep_inbound_atu(pci, func_no, free_win, type, + cpu_addr, bar); if (ret < 0) { dev_err(pci->dev, "Failed to program IB window\n"); return ret; diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 39f3b37d4033..ea923c25e12d 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -643,12 +643,15 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) } /* - * Ensure all outbound windows are disabled before proceeding with - * the MEM/IO ranges setups. + * Ensure all out/inbound windows are disabled before proceeding with + * the MEM/IO (dma-)ranges setups. */ for (i = 0; i < pci->num_ob_windows; i++) dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, i); + for (i = 0; i < pci->num_ib_windows; i++) + dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, i); + i = 0; resource_list_for_each_entry(entry, &pp->bridge->windows) { if (resource_type(entry->res) != IORESOURCE_MEM) @@ -685,9 +688,32 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) } if (pci->num_ob_windows <= i) - dev_warn(pci->dev, "Resources exceed number of ATU entries (%d)\n", + dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n", pci->num_ob_windows); + i = 0; + resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) { + if (resource_type(entry->res) != IORESOURCE_MEM) + continue; + + if (pci->num_ib_windows <= i) + break; + + ret = dw_pcie_prog_inbound_atu(pci, i++, PCIE_ATU_TYPE_MEM, + entry->res->start, + entry->res->start - entry->offset, + resource_size(entry->res)); + if (ret) { + dev_err(pci->dev, "Failed to set DMA range %pr\n", + entry->res); + return ret; + } + } + + if (pci->num_ib_windows <= i) + dev_warn(pci->dev, "Dma-ranges exceed inbound iATU size (%u)\n", + pci->num_ib_windows); + return 0; } diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 432aead68d1f..7f1fb764897d 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -393,8 +393,60 @@ static inline void dw_pcie_writel_atu_ib(struct dw_pcie *pci, u32 index, u32 reg dw_pcie_writel_atu(pci, PCIE_ATU_REGION_DIR_IB, index, reg, val); } -int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, - int type, u64 cpu_addr, u8 bar) +int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type, + u64 cpu_addr, u64 pci_addr, u64 size) +{ + u64 limit_addr = pci_addr + size - 1; + u32 retries, val; + + if ((limit_addr & ~pci->region_limit) != (pci_addr & ~pci->region_limit) || + !IS_ALIGNED(cpu_addr, pci->region_align) || + !IS_ALIGNED(pci_addr, pci->region_align) || !size) { + return -EINVAL; + } + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_BASE, + lower_32_bits(pci_addr)); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_BASE, + upper_32_bits(pci_addr)); + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LIMIT, + lower_32_bits(limit_addr)); + if (dw_pcie_ver_is_ge(pci, 460A)) + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_LIMIT, + upper_32_bits(limit_addr)); + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_TARGET, + lower_32_bits(cpu_addr)); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_TARGET, + upper_32_bits(cpu_addr)); + + val = type; + if (upper_32_bits(limit_addr) > upper_32_bits(pci_addr) && + dw_pcie_ver_is_ge(pci, 460A)) + val |= PCIE_ATU_INCREASE_REGION_SIZE; + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL1, val); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2, PCIE_ATU_ENABLE); + + /* + * Make sure ATU enable takes effect before any subsequent config + * and I/O accesses. + */ + for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) { + val = dw_pcie_readl_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2); + if (val & PCIE_ATU_ENABLE) + return 0; + + mdelay(LINK_WAIT_IATU); + } + + dev_err(pci->dev, "Inbound iATU is not being enabled\n"); + + return -ETIMEDOUT; +} + +int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, + int type, u64 cpu_addr, u8 bar) { u32 retries, val; diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index a871ae7eb59e..37801bbce854 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -346,8 +346,10 @@ int dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type, u64 cpu_addr, u64 pci_addr, u64 size); int dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index, int type, u64 cpu_addr, u64 pci_addr, u64 size); -int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, - int type, u64 cpu_addr, u8 bar); +int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type, + u64 cpu_addr, u64 pci_addr, u64 size); +int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, + int type, u64 cpu_addr, u8 bar); void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index); void dw_pcie_setup(struct dw_pcie *pci); void dw_pcie_iatu_detect(struct dw_pcie *pci); From 7f9e982dc4fcf7b4bc7e9dc8a9f344395fc125b8 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:57 +0300 Subject: [PATCH 2141/4122] PCI: dwc: Introduce generic controller capabilities interface Since in addition to the already available iATU unrolled mapping we are about to add a few more DW PCIe platform-specific capabilities (CDM-check and generic clocks/resets resources) let's add a generic interface to set and get the flags indicating their availability. The new interface shall improve maintainability of the platform-specific code. Link: https://lore.kernel.org/r/20221113191301.5526-17-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware.c | 11 ++++++----- drivers/pci/controller/dwc/pcie-designware.h | 12 +++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 7f1fb764897d..b9cc4b00e5fe 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -211,7 +211,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val) static inline void __iomem *dw_pcie_select_atu(struct dw_pcie *pci, u32 dir, u32 index) { - if (pci->iatu_unroll_enabled) + if (dw_pcie_cap_is(pci, IATU_UNROLL)) return pci->atu_base + PCIE_ATU_UNROLL_BASE(dir, index); dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, dir | index); @@ -591,7 +591,7 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) u32 val, min, dir; u64 max; - if (pci->iatu_unroll_enabled) { + if (dw_pcie_cap_is(pci, IATU_UNROLL)) { max_region = min((int)pci->atu_size / 512, 256); } else { dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF); @@ -641,8 +641,9 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci) { struct platform_device *pdev = to_platform_device(pci->dev); - pci->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pci); - if (pci->iatu_unroll_enabled) { + if (dw_pcie_iatu_unroll_enabled(pci)) { + dw_pcie_cap_set(pci, IATU_UNROLL); + if (!pci->atu_base) { struct resource *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu"); @@ -664,7 +665,7 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci) dw_pcie_iatu_detect_regions(pci); - dev_info(pci->dev, "iATU unroll: %s\n", pci->iatu_unroll_enabled ? + dev_info(pci->dev, "iATU unroll: %s\n", dw_pcie_cap_is(pci, IATU_UNROLL) ? "enabled" : "disabled"); dev_info(pci->dev, "iATU regions: %u ob, %u ib, align %uK, limit %lluG\n", diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 37801bbce854..c6dddacee3b1 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -12,6 +12,7 @@ #define _PCIE_DESIGNWARE_H #include +#include #include #include #include @@ -43,6 +44,15 @@ (__dw_pcie_ver_cmp(_pci, _ver, ==) && \ __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=)) +/* DWC PCIe controller capabilities */ +#define DW_PCIE_CAP_IATU_UNROLL 1 + +#define dw_pcie_cap_is(_pci, _cap) \ + test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) + +#define dw_pcie_cap_set(_pci, _cap) \ + set_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) + /* Parameters for the waiting for link up routine */ #define LINK_WAIT_MAX_RETRIES 10 #define LINK_WAIT_USLEEP_MIN 90000 @@ -317,10 +327,10 @@ struct dw_pcie { const struct dw_pcie_ops *ops; u32 version; u32 type; + unsigned long caps; int num_lanes; int link_gen; u8 n_fts[2]; - bool iatu_unroll_enabled: 1; }; #define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp) From ef8c58877fe77c7807777f61f59cffaee89881f7 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:58 +0300 Subject: [PATCH 2142/4122] PCI: dwc: Introduce generic resources getter Currently the DW PCIe Root Port and Endpoint CSR spaces are retrieved in the separate parts of the DW PCIe core driver. It doesn't really make sense since the both controller types have identical set of the core CSR regions: DBI, DBI CS2 and iATU/eDMA. Thus we can simplify the DW PCIe Host and EP initialization methods by moving the platform-specific registers space getting and mapping into a common method. It gets to be even more justified seeing the CSRs base address pointers are preserved in the common DW PCIe descriptor. Note all the OF-based common DW PCIe settings initialization will be moved to the new method too in order to have a single function for all the generic platform properties handling in single place. A nice side-effect of this change is that the pcie-designware-host.c and pcie-designware-ep.c drivers are cleaned up from all the direct dw_pcie storage modification, which makes the DW PCIe core, Root Port and Endpoint modules more coherent. Link: https://lore.kernel.org/r/20221113191301.5526-18-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../pci/controller/dwc/pcie-designware-ep.c | 25 +------ .../pci/controller/dwc/pcie-designware-host.c | 15 +--- drivers/pci/controller/dwc/pcie-designware.c | 75 ++++++++++++++----- drivers/pci/controller/dwc/pcie-designware.h | 3 + 4 files changed, 65 insertions(+), 53 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 40d0056b2f56..d06654895eba 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -13,8 +13,6 @@ #include #include -#include "../../pci.h" - void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) { struct pci_epc *epc = ep->epc; @@ -711,23 +709,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) INIT_LIST_HEAD(&ep->func_list); - if (!pci->dbi_base) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pci->dbi_base = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base)) - return PTR_ERR(pci->dbi_base); - } - - if (!pci->dbi_base2) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2"); - if (!res) { - pci->dbi_base2 = pci->dbi_base + SZ_4K; - } else { - pci->dbi_base2 = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base2)) - return PTR_ERR(pci->dbi_base2); - } - } + ret = dw_pcie_get_resources(pci); + if (ret) + return ret; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space"); if (!res) @@ -756,9 +740,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) return -ENOMEM; ep->outbound_addr = addr; - if (pci->link_gen < 1) - pci->link_gen = of_pci_get_max_link_speed(np); - epc = devm_pci_epc_create(dev, &epc_ops); if (IS_ERR(epc)) { dev_err(dev, "Failed to create epc device\n"); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index ea923c25e12d..3ab6ae3712c4 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -16,7 +16,6 @@ #include #include -#include "../../pci.h" #include "pcie-designware.h" static struct pci_ops dw_pcie_ops; @@ -395,6 +394,10 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) raw_spin_lock_init(&pp->lock); + ret = dw_pcie_get_resources(pci); + if (ret) + return ret; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config"); if (res) { pp->cfg0_size = resource_size(res); @@ -408,13 +411,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) return -ENODEV; } - if (!pci->dbi_base) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pci->dbi_base = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base)) - return PTR_ERR(pci->dbi_base); - } - bridge = devm_pci_alloc_host_bridge(dev, 0); if (!bridge) return -ENOMEM; @@ -429,9 +425,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) pp->io_base = pci_pio_to_address(win->res->start); } - if (pci->link_gen < 1) - pci->link_gen = of_pci_get_max_link_speed(np); - /* Set default bus ops */ bridge->ops = &dw_pcie_ops; bridge->child_ops = &dw_child_pcie_ops; diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index b9cc4b00e5fe..393e64ecccd3 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,59 @@ #include "../../pci.h" #include "pcie-designware.h" +int dw_pcie_get_resources(struct dw_pcie *pci) +{ + struct platform_device *pdev = to_platform_device(pci->dev); + struct device_node *np = dev_of_node(pci->dev); + struct resource *res; + + if (!pci->dbi_base) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); + pci->dbi_base = devm_pci_remap_cfg_resource(pci->dev, res); + if (IS_ERR(pci->dbi_base)) + return PTR_ERR(pci->dbi_base); + } + + /* DBI2 is mainly useful for the endpoint controller */ + if (!pci->dbi_base2) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2"); + if (res) { + pci->dbi_base2 = devm_pci_remap_cfg_resource(pci->dev, res); + if (IS_ERR(pci->dbi_base2)) + return PTR_ERR(pci->dbi_base2); + } else { + pci->dbi_base2 = pci->dbi_base + SZ_4K; + } + } + + /* For non-unrolled iATU/eDMA platforms this range will be ignored */ + if (!pci->atu_base) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu"); + if (res) { + pci->atu_size = resource_size(res); + pci->atu_base = devm_ioremap_resource(pci->dev, res); + if (IS_ERR(pci->atu_base)) + return PTR_ERR(pci->atu_base); + } else { + pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; + } + } + + /* Set a default value suitable for at most 8 in and 8 out windows */ + if (!pci->atu_size) + pci->atu_size = SZ_4K; + + if (pci->link_gen < 1) + pci->link_gen = of_pci_get_max_link_speed(np); + + of_property_read_u32(np, "num-lanes", &pci->num_lanes); + + if (of_property_read_bool(np, "snps,enable-cdm-check")) + dw_pcie_cap_set(pci, CDM_CHECK); + + return 0; +} + void dw_pcie_version_detect(struct dw_pcie *pci) { u32 ver; @@ -639,25 +693,8 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) void dw_pcie_iatu_detect(struct dw_pcie *pci) { - struct platform_device *pdev = to_platform_device(pci->dev); - if (dw_pcie_iatu_unroll_enabled(pci)) { dw_pcie_cap_set(pci, IATU_UNROLL); - - if (!pci->atu_base) { - struct resource *res = - platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu"); - if (res) { - pci->atu_size = resource_size(res); - pci->atu_base = devm_ioremap_resource(pci->dev, res); - } - if (!pci->atu_base || IS_ERR(pci->atu_base)) - pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; - } - - if (!pci->atu_size) - /* Pick a minimal default, enough for 8 in and 8 out windows */ - pci->atu_size = SZ_4K; } else { pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE; pci->atu_size = PCIE_ATU_VIEWPORT_SIZE; @@ -675,7 +712,6 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci) void dw_pcie_setup(struct dw_pcie *pci) { - struct device_node *np = pci->dev->of_node; u32 val; if (pci->link_gen > 0) @@ -703,14 +739,13 @@ void dw_pcie_setup(struct dw_pcie *pci) val |= PORT_LINK_DLL_LINK_EN; dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - if (of_property_read_bool(np, "snps,enable-cdm-check")) { + if (dw_pcie_cap_is(pci, CDM_CHECK)) { val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | PCIE_PL_CHK_REG_CHK_REG_START; dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); } - of_property_read_u32(np, "num-lanes", &pci->num_lanes); if (!pci->num_lanes) { dev_dbg(pci->dev, "Using h/w default number of lanes\n"); return; diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index c6dddacee3b1..081f169e6021 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -46,6 +46,7 @@ /* DWC PCIe controller capabilities */ #define DW_PCIE_CAP_IATU_UNROLL 1 +#define DW_PCIE_CAP_CDM_CHECK 2 #define dw_pcie_cap_is(_pci, _cap) \ test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) @@ -338,6 +339,8 @@ struct dw_pcie { #define to_dw_pcie_from_ep(endpoint) \ container_of((endpoint), struct dw_pcie, ep) +int dw_pcie_get_resources(struct dw_pcie *pci); + void dw_pcie_version_detect(struct dw_pcie *pci); u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap); From 9f67ecdd9579228d656192a4b6e951c757085db8 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:12:59 +0300 Subject: [PATCH 2143/4122] PCI: dwc: Combine iATU detection procedures Since the iATU CSR region is now retrieved in the DW PCIe resources getter there is no much benefits in the iATU detection procedures splitting up. Therefore let's join the iATU unroll/viewport detection procedure with the rest of the iATU parameters detection code. The resultant method will be as coherent as before, while the redundant functions will be eliminated thus producing more readable code. Link: https://lore.kernel.org/r/20221113191301.5526-19-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware.c | 39 +++++--------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 393e64ecccd3..e979fb8f3cee 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -628,26 +628,21 @@ static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen) } -static bool dw_pcie_iatu_unroll_enabled(struct dw_pcie *pci) -{ - u32 val; - - val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT); - if (val == 0xffffffff) - return true; - - return false; -} - -static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) +void dw_pcie_iatu_detect(struct dw_pcie *pci) { int max_region, ob, ib; u32 val, min, dir; u64 max; - if (dw_pcie_cap_is(pci, IATU_UNROLL)) { + val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT); + if (val == 0xFFFFFFFF) { + dw_pcie_cap_set(pci, IATU_UNROLL); + max_region = min((int)pci->atu_size / 512, 256); } else { + pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE; + pci->atu_size = PCIE_ATU_VIEWPORT_SIZE; + dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF); max_region = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT) + 1; } @@ -689,23 +684,9 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) pci->num_ib_windows = ib; pci->region_align = 1 << fls(min); pci->region_limit = (max << 32) | (SZ_4G - 1); -} -void dw_pcie_iatu_detect(struct dw_pcie *pci) -{ - if (dw_pcie_iatu_unroll_enabled(pci)) { - dw_pcie_cap_set(pci, IATU_UNROLL); - } else { - pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE; - pci->atu_size = PCIE_ATU_VIEWPORT_SIZE; - } - - dw_pcie_iatu_detect_regions(pci); - - dev_info(pci->dev, "iATU unroll: %s\n", dw_pcie_cap_is(pci, IATU_UNROLL) ? - "enabled" : "disabled"); - - dev_info(pci->dev, "iATU regions: %u ob, %u ib, align %uK, limit %lluG\n", + dev_info(pci->dev, "iATU: unroll %s, %u ob, %u ib, align %uK, limit %lluG\n", + dw_pcie_cap_is(pci, IATU_UNROLL) ? "T" : "F", pci->num_ob_windows, pci->num_ib_windows, pci->region_align / SZ_1K, (pci->region_limit + 1) / SZ_1G); } From ef69f852a9784017e646e50e3efc715dac7e3fc4 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:13:00 +0300 Subject: [PATCH 2144/4122] PCI: dwc: Introduce generic platform clocks and resets Currently almost each platform driver uses its own resets and clocks naming in order to get the corresponding descriptors. It makes the code harder to maintain and comprehend especially seeing the DWC PCIe core main resets and clocks signals set hasn't changed much for about at least one major IP-core release. So in order to organize things around these signals we suggest to create a generic interface for them in accordance with the naming introduced in the DWC PCIe IP-core reference manual: Application clocks: - "dbi" - data bus interface clock (on some DWC PCIe platforms it's referred as "pclk", "pcie", "sys", "ahb", "cfg", "iface", "gio", "reg", "pcie_apb_sys"); - "mstr" - AXI-bus master interface clock (some DWC PCIe glue drivers refer to this clock as "port", "bus", "pcie_bus", "bus_master/master_bus/axi_m", "pcie_aclk"); - "slv" - AXI-bus slave interface clock (also called as "port", "bus", "pcie_bus", "bus_slave/slave_bus/axi_s", "pcie_aclk", "pcie_inbound_axi"). Core clocks: - "pipe" - core-PCS PIPE interface clock coming from external PHY (it's normally named by the platform drivers as just "pipe"); - "core" - primary clock of the controller (none of the platform drivers declare such a clock but in accordance with the ref. manual the devices may have it separately specified); - "aux" - auxiliary PMC domain clock (it is named by some platforms as "pcie_aux" and just "aux"); - "ref" - Generic reference clock (it is a generic clock source, which can be used as a signal source for multiple interfaces, some platforms call it as "ref", "general", "pcie_phy", "pcie_phy_ref"). Application resets: - "dbi" - Data-bus interface reset (it's CSR interface clock and is normally called as "apb" though technically it's not APB but DWC PCIe-specific interface); - "mstr" - AXI-bus master reset (some platforms call it as "port", "apps", "bus", "axi_m"); - "slv" - ABI-bus slave reset (some platforms call it as "port", "apps", "bus", "axi_s"). Core resets: - "non-sticky" - non-sticky CSR flags reset; - "sticky" - sticky CSR flags reset; - "pipe" - PIPE-interface (Core-PCS) logic reset (some platforms call it just "pipe"); - "core" - controller primary reset (resets everything except PMC module, some platforms refer to this signal as "soft", "pci"); - "phy" - PCS/PHY block reset (strictly speaking it is normally connected to the input of an external block, but the reference manual says it must be available for the PMC working correctly, some existing platforms call it "pciephy", "phy", "link"); - "hot" - PMC hot reset signal (also called as "sleep"); - "pwr" - cold reset signal (can be referred as "pwr", "turnoff"). Bus reset: - "perst" - PCIe standard signal used to reset the PCIe peripheral devices. As you can see each platform uses it's own naming for basically the same set of the signals. In the framework of this commit we suggest to add a set of the clocks and reset signals resources, corresponding names and identifiers for each denoted entity. At current stage the platforms will be able to use the provided infrastructure to automatically request all these resources and manipulate with them in the Host/EP init callbacks. Alas it isn't that easy to create a common cold/hot reset procedure due to too many platform-specifics in the procedure, like the external flags exposure and the delays requirement. Link: https://lore.kernel.org/r/20221113191301.5526-20-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/pcie-designware.c | 91 ++++++++++++++++++++ drivers/pci/controller/dwc/pcie-designware.h | 42 +++++++++ 2 files changed, 133 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index e979fb8f3cee..6d5d619ab2e9 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -10,7 +10,9 @@ #include #include +#include #include +#include #include #include #include @@ -20,11 +22,89 @@ #include "../../pci.h" #include "pcie-designware.h" +static const char * const dw_pcie_app_clks[DW_PCIE_NUM_APP_CLKS] = { + [DW_PCIE_DBI_CLK] = "dbi", + [DW_PCIE_MSTR_CLK] = "mstr", + [DW_PCIE_SLV_CLK] = "slv", +}; + +static const char * const dw_pcie_core_clks[DW_PCIE_NUM_CORE_CLKS] = { + [DW_PCIE_PIPE_CLK] = "pipe", + [DW_PCIE_CORE_CLK] = "core", + [DW_PCIE_AUX_CLK] = "aux", + [DW_PCIE_REF_CLK] = "ref", +}; + +static const char * const dw_pcie_app_rsts[DW_PCIE_NUM_APP_RSTS] = { + [DW_PCIE_DBI_RST] = "dbi", + [DW_PCIE_MSTR_RST] = "mstr", + [DW_PCIE_SLV_RST] = "slv", +}; + +static const char * const dw_pcie_core_rsts[DW_PCIE_NUM_CORE_RSTS] = { + [DW_PCIE_NON_STICKY_RST] = "non-sticky", + [DW_PCIE_STICKY_RST] = "sticky", + [DW_PCIE_CORE_RST] = "core", + [DW_PCIE_PIPE_RST] = "pipe", + [DW_PCIE_PHY_RST] = "phy", + [DW_PCIE_HOT_RST] = "hot", + [DW_PCIE_PWR_RST] = "pwr", +}; + +static int dw_pcie_get_clocks(struct dw_pcie *pci) +{ + int i, ret; + + for (i = 0; i < DW_PCIE_NUM_APP_CLKS; i++) + pci->app_clks[i].id = dw_pcie_app_clks[i]; + + for (i = 0; i < DW_PCIE_NUM_CORE_CLKS; i++) + pci->core_clks[i].id = dw_pcie_core_clks[i]; + + ret = devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_APP_CLKS, + pci->app_clks); + if (ret) + return ret; + + return devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_CORE_CLKS, + pci->core_clks); +} + +static int dw_pcie_get_resets(struct dw_pcie *pci) +{ + int i, ret; + + for (i = 0; i < DW_PCIE_NUM_APP_RSTS; i++) + pci->app_rsts[i].id = dw_pcie_app_rsts[i]; + + for (i = 0; i < DW_PCIE_NUM_CORE_RSTS; i++) + pci->core_rsts[i].id = dw_pcie_core_rsts[i]; + + ret = devm_reset_control_bulk_get_optional_shared(pci->dev, + DW_PCIE_NUM_APP_RSTS, + pci->app_rsts); + if (ret) + return ret; + + ret = devm_reset_control_bulk_get_optional_exclusive(pci->dev, + DW_PCIE_NUM_CORE_RSTS, + pci->core_rsts); + if (ret) + return ret; + + pci->pe_rst = devm_gpiod_get_optional(pci->dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(pci->pe_rst)) + return PTR_ERR(pci->pe_rst); + + return 0; +} + int dw_pcie_get_resources(struct dw_pcie *pci) { struct platform_device *pdev = to_platform_device(pci->dev); struct device_node *np = dev_of_node(pci->dev); struct resource *res; + int ret; if (!pci->dbi_base) { res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); @@ -62,6 +142,17 @@ int dw_pcie_get_resources(struct dw_pcie *pci) if (!pci->atu_size) pci->atu_size = SZ_4K; + /* LLDD is supposed to manually switch the clocks and resets state */ + if (dw_pcie_cap_is(pci, REQ_RES)) { + ret = dw_pcie_get_clocks(pci); + if (ret) + return ret; + + ret = dw_pcie_get_resets(pci); + if (ret) + return ret; + } + if (pci->link_gen < 1) pci->link_gen = of_pci_get_max_link_speed(np); diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 081f169e6021..393dfb931df6 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -13,10 +13,13 @@ #include #include +#include #include +#include #include #include #include +#include #include #include @@ -45,6 +48,7 @@ __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=)) /* DWC PCIe controller capabilities */ +#define DW_PCIE_CAP_REQ_RES 0 #define DW_PCIE_CAP_IATU_UNROLL 1 #define DW_PCIE_CAP_CDM_CHECK 2 @@ -233,6 +237,39 @@ enum dw_pcie_device_mode { DW_PCIE_RC_TYPE, }; +enum dw_pcie_app_clk { + DW_PCIE_DBI_CLK, + DW_PCIE_MSTR_CLK, + DW_PCIE_SLV_CLK, + DW_PCIE_NUM_APP_CLKS +}; + +enum dw_pcie_core_clk { + DW_PCIE_PIPE_CLK, + DW_PCIE_CORE_CLK, + DW_PCIE_AUX_CLK, + DW_PCIE_REF_CLK, + DW_PCIE_NUM_CORE_CLKS +}; + +enum dw_pcie_app_rst { + DW_PCIE_DBI_RST, + DW_PCIE_MSTR_RST, + DW_PCIE_SLV_RST, + DW_PCIE_NUM_APP_RSTS +}; + +enum dw_pcie_core_rst { + DW_PCIE_NON_STICKY_RST, + DW_PCIE_STICKY_RST, + DW_PCIE_CORE_RST, + DW_PCIE_PIPE_RST, + DW_PCIE_PHY_RST, + DW_PCIE_HOT_RST, + DW_PCIE_PWR_RST, + DW_PCIE_NUM_CORE_RSTS +}; + struct dw_pcie_host_ops { int (*host_init)(struct dw_pcie_rp *pp); void (*host_deinit)(struct dw_pcie_rp *pp); @@ -332,6 +369,11 @@ struct dw_pcie { int num_lanes; int link_gen; u8 n_fts[2]; + struct clk_bulk_data app_clks[DW_PCIE_NUM_APP_CLKS]; + struct clk_bulk_data core_clks[DW_PCIE_NUM_CORE_CLKS]; + struct reset_control_bulk_data app_rsts[DW_PCIE_NUM_APP_RSTS]; + struct reset_control_bulk_data core_rsts[DW_PCIE_NUM_CORE_RSTS]; + struct gpio_desc *pe_rst; }; #define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp) From ba6ed462dcf41a83b36eb9a74a8c4720040f9762 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sun, 13 Nov 2022 22:13:01 +0300 Subject: [PATCH 2145/4122] PCI: dwc: Add Baikal-T1 PCIe controller support Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be trained to work up to Gen.3 speed over up to x4 lanes. The host controller is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its turn is connected to the DWC 10G PHY. The whole system is supposed to be fed up with four clock sources: DBI peripheral clock, AXI application clocks and external PHY/core reference clock generating the 100MHz signal. In addition to that the platform provide a way to reset each part of the controller: sticky/non-sticky bits, host controller core, PIPE interface, PCS/PHY and Hot/Power reset signal. The driver also provides a way to handle the GPIO-based PERST# signal. Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI interface accessors which make sure the IO operations are dword-aligned. Link: https://lore.kernel.org/r/20221113191301.5526-21-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/Kconfig | 9 + drivers/pci/controller/dwc/Makefile | 1 + drivers/pci/controller/dwc/pcie-bt1.c | 643 ++++++++++++++++++++++++++ 3 files changed, 653 insertions(+) create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index 62ce3abf0f19..771b8b146623 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP Enables support for the PCIe controller in the ARTPEC-6 SoC to work in endpoint mode. This uses the DesignWare core. +config PCIE_BT1 + tristate "Baikal-T1 PCIe controller" + depends on MIPS_BAIKAL_T1 || COMPILE_TEST + depends on PCI_MSI_IRQ_DOMAIN + select PCIE_DW_HOST + help + Enables support for the PCIe controller in the Baikal-T1 SoC to work + in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core. + config PCIE_ROCKCHIP_DW_HOST bool "Rockchip DesignWare PCIe controller" select PCIE_DW diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile index 8ba7b67f5e50..bf5c311875a1 100644 --- a/drivers/pci/controller/dwc/Makefile +++ b/drivers/pci/controller/dwc/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c new file mode 100644 index 000000000000..3346770e6654 --- /dev/null +++ b/drivers/pci/controller/dwc/pcie-bt1.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC + * + * Authors: + * Vadim Vlasov + * Serge Semin + * + * Baikal-T1 PCIe controller driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcie-designware.h" + +/* Baikal-T1 System CCU control registers */ +#define BT1_CCU_PCIE_CLKC 0x140 +#define BT1_CCU_PCIE_REQ_PCS_CLK BIT(16) +#define BT1_CCU_PCIE_REQ_MAC_CLK BIT(17) +#define BT1_CCU_PCIE_REQ_PIPE_CLK BIT(18) + +#define BT1_CCU_PCIE_RSTC 0x144 +#define BT1_CCU_PCIE_REQ_LINK_RST BIT(13) +#define BT1_CCU_PCIE_REQ_SMLH_RST BIT(14) +#define BT1_CCU_PCIE_REQ_PHY_RST BIT(16) +#define BT1_CCU_PCIE_REQ_CORE_RST BIT(24) +#define BT1_CCU_PCIE_REQ_STICKY_RST BIT(26) +#define BT1_CCU_PCIE_REQ_NSTICKY_RST BIT(27) + +#define BT1_CCU_PCIE_PMSC 0x148 +#define BT1_CCU_PCIE_LTSSM_STATE_MASK GENMASK(5, 0) +#define BT1_CCU_PCIE_LTSSM_DET_QUIET 0x00 +#define BT1_CCU_PCIE_LTSSM_DET_ACT 0x01 +#define BT1_CCU_PCIE_LTSSM_POLL_ACT 0x02 +#define BT1_CCU_PCIE_LTSSM_POLL_COMP 0x03 +#define BT1_CCU_PCIE_LTSSM_POLL_CONF 0x04 +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET 0x05 +#define BT1_CCU_PCIE_LTSSM_DET_WAIT 0x06 +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START 0x07 +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT 0x08 +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT 0x09 +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT 0x0a +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE 0x0b +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE 0x0c +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK 0x0d +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED 0x0e +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG 0x0f +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE 0x10 +#define BT1_CCU_PCIE_LTSSM_L0 0x11 +#define BT1_CCU_PCIE_LTSSM_L0S 0x12 +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE 0x13 +#define BT1_CCU_PCIE_LTSSM_L1_IDLE 0x14 +#define BT1_CCU_PCIE_LTSSM_L2_IDLE 0x15 +#define BT1_CCU_PCIE_LTSSM_L2_WAKE 0x16 +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY 0x17 +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE 0x18 +#define BT1_CCU_PCIE_LTSSM_DISABLE 0x19 +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY 0x1a +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE 0x1b +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT 0x1c +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT 0x1d +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY 0x1e +#define BT1_CCU_PCIE_LTSSM_HOT_RST 0x1f +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0 0x20 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1 0x21 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2 0x22 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3 0x23 +#define BT1_CCU_PCIE_SMLH_LINKUP BIT(6) +#define BT1_CCU_PCIE_RDLH_LINKUP BIT(7) +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S BIT(8) +#define BT1_CCU_PCIE_PM_LINKSTATE_L1 BIT(9) +#define BT1_CCU_PCIE_PM_LINKSTATE_L2 BIT(10) +#define BT1_CCU_PCIE_L1_PENDING BIT(12) +#define BT1_CCU_PCIE_REQ_EXIT_L1 BIT(14) +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ BIT(15) +#define BT1_CCU_PCIE_PM_DSTAT_MASK GENMASK(18, 16) +#define BT1_CCU_PCIE_PM_PME_EN BIT(20) +#define BT1_CCU_PCIE_PM_PME_STATUS BIT(21) +#define BT1_CCU_PCIE_AUX_PM_EN BIT(22) +#define BT1_CCU_PCIE_AUX_PWR_DET BIT(23) +#define BT1_CCU_PCIE_WAKE_DET BIT(24) +#define BT1_CCU_PCIE_TURNOFF_REQ BIT(30) +#define BT1_CCU_PCIE_TURNOFF_ACK BIT(31) + +#define BT1_CCU_PCIE_GENC 0x14c +#define BT1_CCU_PCIE_LTSSM_EN BIT(1) +#define BT1_CCU_PCIE_DBI2_MODE BIT(2) +#define BT1_CCU_PCIE_MGMT_EN BIT(3) +#define BT1_CCU_PCIE_RXLANE_FLIP_EN BIT(16) +#define BT1_CCU_PCIE_TXLANE_FLIP_EN BIT(17) +#define BT1_CCU_PCIE_SLV_XFER_PEND BIT(24) +#define BT1_CCU_PCIE_RCV_XFER_PEND BIT(25) +#define BT1_CCU_PCIE_DBI_XFER_PEND BIT(26) +#define BT1_CCU_PCIE_DMA_XFER_PEND BIT(27) + +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \ +({ \ + int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \ + __state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \ +}) + +/* Baikal-T1 PCIe specific control registers */ +#define BT1_PCIE_AXI2MGM_LANENUM 0xd04 +#define BT1_PCIE_AXI2MGM_LANESEL_MASK GENMASK(3, 0) + +#define BT1_PCIE_AXI2MGM_ADDRCTL 0xd08 +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK GENMASK(20, 0) +#define BT1_PCIE_AXI2MGM_READ_FLAG BIT(29) +#define BT1_PCIE_AXI2MGM_DONE BIT(30) +#define BT1_PCIE_AXI2MGM_BUSY BIT(31) + +#define BT1_PCIE_AXI2MGM_WRITEDATA 0xd0c +#define BT1_PCIE_AXI2MGM_WDATA GENMASK(15, 0) + +#define BT1_PCIE_AXI2MGM_READDATA 0xd10 +#define BT1_PCIE_AXI2MGM_RDATA GENMASK(15, 0) + +/* Generic Baikal-T1 PCIe interface resources */ +#define BT1_PCIE_NUM_APP_CLKS ARRAY_SIZE(bt1_pcie_app_clks) +#define BT1_PCIE_NUM_CORE_CLKS ARRAY_SIZE(bt1_pcie_core_clks) +#define BT1_PCIE_NUM_APP_RSTS ARRAY_SIZE(bt1_pcie_app_rsts) +#define BT1_PCIE_NUM_CORE_RSTS ARRAY_SIZE(bt1_pcie_core_rsts) + +/* PCIe bus setup delays and timeouts */ +#define BT1_PCIE_RST_DELAY_MS 100 +#define BT1_PCIE_RUN_DELAY_US 100 +#define BT1_PCIE_REQ_DELAY_US 1 +#define BT1_PCIE_REQ_TIMEOUT_US 1000 +#define BT1_PCIE_LNK_DELAY_US 1000 +#define BT1_PCIE_LNK_TIMEOUT_US 1000000 + +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = { + DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK, +}; + +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = { + DW_PCIE_REF_CLK, +}; + +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = { + DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST, +}; + +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = { + DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST, + DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST, +}; + +struct bt1_pcie { + struct dw_pcie dw; + struct platform_device *pdev; + struct regmap *sys_regs; +}; +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw) + +/* + * Baikal-T1 MMIO space must be read/written by the dword-aligned + * instructions. Note the methods are optimized to have the dword operations + * performed with minimum overhead as the most frequently used ones. + */ +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val) +{ + unsigned int ofs = (uintptr_t)addr & 0x3; + + if (!IS_ALIGNED((uintptr_t)addr, size)) + return -EINVAL; + + *val = readl(addr - ofs) >> ofs * BITS_PER_BYTE; + if (size == 4) { + return 0; + } else if (size == 2) { + *val &= 0xffff; + return 0; + } else if (size == 1) { + *val &= 0xff; + return 0; + } + + return -EINVAL; +} + +static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val) +{ + unsigned int ofs = (uintptr_t)addr & 0x3; + u32 tmp, mask; + + if (!IS_ALIGNED((uintptr_t)addr, size)) + return -EINVAL; + + if (size == 4) { + writel(val, addr); + return 0; + } else if (size == 2 || size == 1) { + mask = GENMASK(size * BITS_PER_BYTE - 1, 0); + tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE); + tmp |= (val & mask) << ofs * BITS_PER_BYTE; + writel(tmp, addr - ofs); + return 0; + } + + return -EINVAL; +} + +static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size) +{ + int ret; + u32 val; + + ret = bt1_pcie_read_mmio(base + reg, size, &val); + if (ret) { + dev_err(pci->dev, "Read DBI address failed\n"); + return ~0U; + } + + return val; +} + +static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size, u32 val) +{ + int ret; + + ret = bt1_pcie_write_mmio(base + reg, size, val); + if (ret) + dev_err(pci->dev, "Write DBI address failed\n"); +} + +static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size, u32 val) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + int ret; + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE); + + ret = bt1_pcie_write_mmio(base + reg, size, val); + if (ret) + dev_err(pci->dev, "Write DBI2 address failed\n"); + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_DBI2_MODE, 0); +} + +static int bt1_pcie_start_link(struct dw_pcie *pci) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + u32 val; + int ret; + + /* + * Enable LTSSM and make sure it was able to establish both PHY and + * data links. This procedure shall work fine to reach 2.5 GT/s speed. + */ + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN); + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + (val & BT1_CCU_PCIE_SMLH_LINKUP), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) { + dev_err(pci->dev, "LTSSM failed to set PHY link up\n"); + return ret; + } + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + (val & BT1_CCU_PCIE_RDLH_LINKUP), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) { + dev_err(pci->dev, "LTSSM failed to set data link up\n"); + return ret; + } + + /* + * Activate direct speed change after the link is established in an + * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s). + * This is required at least to get 8.0 GT/s speed. + */ + val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + val |= PORT_LOGIC_SPEED_CHANGE; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + BT1_CCU_PCIE_LTSSM_LINKUP(val), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) + dev_err(pci->dev, "LTSSM failed to get into L0 state\n"); + + return ret; +} + +static void bt1_pcie_stop_link(struct dw_pcie *pci) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, 0); +} + +static const struct dw_pcie_ops bt1_pcie_ops = { + .read_dbi = bt1_pcie_read_dbi, + .write_dbi = bt1_pcie_write_dbi, + .write_dbi2 = bt1_pcie_write_dbi2, + .start_link = bt1_pcie_start_link, + .stop_link = bt1_pcie_stop_link, +}; + +static struct pci_ops bt1_pci_ops = { + .map_bus = dw_pcie_own_conf_map_bus, + .read = pci_generic_config_read32, + .write = pci_generic_config_write32, +}; + +static int bt1_pcie_get_resources(struct bt1_pcie *btpci) +{ + struct device *dev = btpci->dw.dev; + int i; + + /* DBI access is supposed to be performed by the dword-aligned IOs */ + btpci->dw.pp.bridge->ops = &bt1_pci_ops; + + /* These CSRs are in MMIO so we won't check the regmap-methods status */ + btpci->sys_regs = + syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon"); + if (IS_ERR(btpci->sys_regs)) + return dev_err_probe(dev, PTR_ERR(btpci->sys_regs), + "Failed to get syscon\n"); + + /* Make sure all the required resources have been specified */ + for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) { + if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) { + dev_err(dev, "App clocks set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) { + if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) { + dev_err(dev, "Core clocks set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) { + if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) { + dev_err(dev, "App resets set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) { + if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) { + dev_err(dev, "Core resets set is incomplete\n"); + return -ENOENT; + } + } + + return 0; +} + +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init) +{ + struct device *dev = btpci->dw.dev; + struct dw_pcie *pci = &btpci->dw; + int ret; + + /* Disable LTSSM for sure */ + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, 0); + + /* + * Application reset controls are trigger-based so assert the core + * resets only. + */ + ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts); + if (ret) + dev_err(dev, "Failed to assert core resets\n"); + + /* + * Clocks are disabled by default at least in accordance with the clk + * enable counter value on init stage. + */ + if (!init) { + clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + + clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + } + + /* The peripheral devices are unavailable anyway so reset them too */ + gpiod_set_value_cansleep(pci->pe_rst, 1); + + /* Make sure all the resets are settled */ + msleep(BT1_PCIE_RST_DELAY_MS); +} + +/* + * Implements the cold reset procedure in accordance with the reference manual + * and available PM signals. + */ +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci) +{ + struct device *dev = btpci->dw.dev; + struct dw_pcie *pci = &btpci->dw; + u32 val; + int ret; + + /* First get out of the Power/Hot reset state */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PHY reset\n"); + return ret; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert hot reset\n"); + goto err_assert_pwr_rst; + } + + /* Wait for the PM-core to stop requesting the PHY reset */ + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val, + !(val & BT1_CCU_PCIE_REQ_PHY_RST), + BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US); + if (ret) { + dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n"); + goto err_assert_hot_rst; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PHY reset\n"); + goto err_assert_hot_rst; + } + + /* Clocks can be now enabled, but the ref one is crucial at this stage */ + ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + if (ret) { + dev_err(dev, "Failed to enable app clocks\n"); + goto err_assert_phy_rst; + } + + ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + if (ret) { + dev_err(dev, "Failed to enable ref clocks\n"); + goto err_disable_app_clk; + } + + /* Wait for the PM to stop requesting the controller core reset */ + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val, + !(val & BT1_CCU_PCIE_REQ_CORE_RST), + BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US); + if (ret) { + dev_err(dev, "Timed out waiting for PM to stop core resetting\n"); + goto err_disable_core_clk; + } + + /* PCS-PIPE interface and controller core can be now activated */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PIPE reset\n"); + goto err_disable_core_clk; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert core reset\n"); + goto err_assert_pipe_rst; + } + + /* It's recommended to reset the core and application logic together */ + ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts); + if (ret) { + dev_err(dev, "Failed to reset app domain\n"); + goto err_assert_core_rst; + } + + /* Sticky/Non-sticky CSR flags can be now unreset too */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert sticky reset\n"); + goto err_assert_core_rst; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert non-sticky reset\n"); + goto err_assert_sticky_rst; + } + + /* Activate the PCIe bus peripheral devices */ + gpiod_set_value_cansleep(pci->pe_rst, 0); + + /* Make sure the state is settled (LTSSM is still disabled though) */ + usleep_range(BT1_PCIE_RUN_DELAY_US, BT1_PCIE_RUN_DELAY_US + 100); + + return 0; + +err_assert_sticky_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc); + +err_assert_core_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc); + +err_assert_pipe_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc); + +err_disable_core_clk: + clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + +err_disable_app_clk: + clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + +err_assert_phy_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc); + +err_assert_hot_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc); + +err_assert_pwr_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc); + + return ret; +} + +static int bt1_pcie_host_init(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct bt1_pcie *btpci = to_bt1_pcie(pci); + int ret; + + ret = bt1_pcie_get_resources(btpci); + if (ret) + return ret; + + bt1_pcie_full_stop_bus(btpci, true); + + return bt1_pcie_cold_start_bus(btpci); +} + +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct bt1_pcie *btpci = to_bt1_pcie(pci); + + bt1_pcie_full_stop_bus(btpci, false); +} + +static const struct dw_pcie_host_ops bt1_pcie_host_ops = { + .host_init = bt1_pcie_host_init, + .host_deinit = bt1_pcie_host_deinit, +}; + +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev) +{ + struct bt1_pcie *btpci; + + btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL); + if (!btpci) + return ERR_PTR(-ENOMEM); + + btpci->pdev = pdev; + + platform_set_drvdata(pdev, btpci); + + return btpci; +} + +static int bt1_pcie_add_port(struct bt1_pcie *btpci) +{ + struct device *dev = &btpci->pdev->dev; + int ret; + + btpci->dw.version = DW_PCIE_VER_460A; + btpci->dw.dev = dev; + btpci->dw.ops = &bt1_pcie_ops; + + btpci->dw.pp.num_vectors = MAX_MSI_IRQS; + btpci->dw.pp.ops = &bt1_pcie_host_ops; + + dw_pcie_cap_set(&btpci->dw, REQ_RES); + + ret = dw_pcie_host_init(&btpci->dw.pp); + + return dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n"); +} + +static void bt1_pcie_del_port(struct bt1_pcie *btpci) +{ + dw_pcie_host_deinit(&btpci->dw.pp); +} + +static int bt1_pcie_probe(struct platform_device *pdev) +{ + struct bt1_pcie *btpci; + + btpci = bt1_pcie_create_data(pdev); + if (IS_ERR(btpci)) + return PTR_ERR(btpci); + + return bt1_pcie_add_port(btpci); +} + +static int bt1_pcie_remove(struct platform_device *pdev) +{ + struct bt1_pcie *btpci = platform_get_drvdata(pdev); + + bt1_pcie_del_port(btpci); + + return 0; +} + +static const struct of_device_id bt1_pcie_of_match[] = { + { .compatible = "baikal,bt1-pcie" }, + {}, +}; +MODULE_DEVICE_TABLE(of, bt1_pcie_of_match); + +static struct platform_driver bt1_pcie_driver = { + .probe = bt1_pcie_probe, + .remove = bt1_pcie_remove, + .driver = { + .name = "bt1-pcie", + .of_match_table = bt1_pcie_of_match, + }, +}; +module_platform_driver(bt1_pcie_driver); + +MODULE_AUTHOR("Serge Semin "); +MODULE_DESCRIPTION("Baikal-T1 PCIe driver"); +MODULE_LICENSE("GPL"); From 9298804840457c29c7e115f3a87bec406c262c81 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:08 -0400 Subject: [PATCH 2146/4122] PCI: endpoint: pci-epf-vntb: Clean up kernel_doc warning Cleanup warning found by scripts/kernel-doc. Consolidate terms: - host, host1 to HOST - vhost, vHost, Vhost, VHOST2 to VHOST Link: https://lore.kernel.org/r/20221102141014.1025893-2-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 83 ++++++++++++------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 0ea85e1d292e..c0115bcb3b5e 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -11,7 +11,7 @@ * Author: Kishon Vijay Abraham I */ -/** +/* * +------------+ +---------------------------------------+ * | | | | * +------------+ | +--------------+ @@ -156,12 +156,14 @@ static struct pci_epf_header epf_ntb_header = { }; /** - * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host + * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host (VHOST) * @ntb: NTB device that facilitates communication between HOST and VHOST * @link_up: true or false indicating Link is UP or Down * * Once NTB function in HOST invoke ntb_link_enable(), - * this NTB function driver will trigger a link event to vhost. + * this NTB function driver will trigger a link event to VHOST. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) { @@ -175,9 +177,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) } /** - * epf_ntb_configure_mw() - Configure the Outbound Address Space for vhost - * to access the memory window of host - * @ntb: NTB device that facilitates communication between host and vhost + * epf_ntb_configure_mw() - Configure the Outbound Address Space for VHOST + * to access the memory window of HOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * @mw: Index of the memory window (either 0, 1, 2 or 3) * * EP Outbound Window @@ -194,7 +196,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) * | | | | * | | | | * +--------+ +-----------+ - * VHost PCI EP + * VHOST PCI EP + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw) { @@ -219,7 +223,7 @@ static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw) /** * epf_ntb_teardown_mw() - Teardown the configured OB ATU - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * @mw: Index of the memory window (either 0, 1, 2 or 3) * * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using @@ -234,12 +238,12 @@ static void epf_ntb_teardown_mw(struct epf_ntb *ntb, u32 mw) } /** - * epf_ntb_cmd_handler() - Handle commands provided by the NTB Host + * epf_ntb_cmd_handler() - Handle commands provided by the NTB HOST * @work: work_struct for the epf_ntb_epc * * Workqueue function that gets invoked for the two epf_ntb_epc * periodically (once every 5ms) to see if it has received any commands - * from NTB host. The host can send commands to configure doorbell or + * from NTB HOST. The HOST can send commands to configure doorbell or * configure memory window or to update link status. */ static void epf_ntb_cmd_handler(struct work_struct *work) @@ -321,8 +325,8 @@ reset_handler: /** * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR - * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound - * address. + * @ntb: EPC associated with one of the HOST which holds peer's outbound + * address. * * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and * self scratchpad region (removes inbound ATU configuration). While BAR0 is @@ -331,8 +335,10 @@ reset_handler: * used for self scratchpad from epf_ntb_bar[BAR_CONFIG]. * * Please note the self scratchpad region and config region is combined to - * a single region and mapped using the same BAR. Also note HOST2's peer - * scratchpad is HOST1's self scratchpad. + * a single region and mapped using the same BAR. Also note VHOST's peer + * scratchpad is HOST's self scratchpad. + * + * Returns: void */ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb) { @@ -347,13 +353,15 @@ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb) /** * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * - * Map BAR0 of EP CONTROLLER 1 which contains the HOST1's config and + * Map BAR0 of EP CONTROLLER which contains the VHOST's config and * self scratchpad region. * * Please note the self scratchpad region and config region is combined to * a single region and mapped using the same BAR. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb) { @@ -380,7 +388,7 @@ static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb) /** * epf_ntb_config_spad_bar_free() - Free the physical memory associated with * config + scratchpad region - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb) { @@ -393,11 +401,13 @@ static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb) /** * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad * region - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Allocate the Local Memory mentioned in the above diagram. The size of * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION * is obtained from "spad-count" configfs entry. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) { @@ -465,11 +475,13 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) } /** - * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capaiblity - * @ntb: NTB device that facilitates communication between HOST and vHOST + * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Configure MSI/MSI-X capability for each interface with number of * interrupts equal to "db_count" configfs entry. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb) { @@ -511,7 +523,9 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb) /** * epf_ntb_db_bar_init() - Configure Doorbell window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) { @@ -566,7 +580,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws); /** * epf_ntb_db_bar_clear() - Clear doorbell BAR and free memory * allocated in peer's outbound address space - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb) { @@ -582,8 +596,9 @@ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb) /** * epf_ntb_mw_bar_init() - Configure Memory window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_mw_bar_init(struct epf_ntb *ntb) { @@ -639,7 +654,7 @@ err_alloc_mem: /** * epf_ntb_mw_bar_clear() - Clear Memory window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws) { @@ -662,7 +677,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws) /** * epf_ntb_epc_destroy() - Cleanup NTB EPC interface - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces */ @@ -675,7 +690,9 @@ static void epf_ntb_epc_destroy(struct epf_ntb *ntb) /** * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB * constructs (scratchpad region, doorbell, memorywindow) - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) { @@ -716,11 +733,13 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) /** * epf_ntb_epc_init() - Initialize NTB interface - * @ntb: NTB device that facilitates communication between HOST and vHOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper to initialize a particular EPC interface and start the workqueue - * to check for commands from host. This function will write to the + * to check for commands from HOST. This function will write to the * EP controller HW for configuring it. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_epc_init(struct epf_ntb *ntb) { @@ -787,7 +806,7 @@ err_config_interrupt: /** * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper to cleanup all NTB interfaces. */ @@ -951,6 +970,8 @@ static const struct config_item_type ntb_group_type = { * * Add configfs directory specific to NTB. This directory will hold * NTB specific properties like db_count, spad_count, num_mws etc., + * + * Returns: Pointer to config_group */ static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf, struct config_group *group) @@ -1292,6 +1313,8 @@ static struct pci_driver vntb_pci_driver = { * Invoked when a primary interface or secondary interface is bound to EPC * device. This function will succeed only when EPC is bound to both the * interfaces. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_bind(struct pci_epf *epf) { @@ -1377,6 +1400,8 @@ static struct pci_epf_ops epf_ntb_ops = { * * Probe NTB function driver when endpoint function bus detects a NTB * endpoint function. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_probe(struct pci_epf *epf) { From 1d118fed348f65bcc08e9bfb947085c276d05b52 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:09 -0400 Subject: [PATCH 2147/4122] PCI: endpoint: pci-epf-vntb: Fix struct epf_ntb_ctrl indentation Align the indentation of struct epf_ntb_ctrl with other structs in the driver. Link: https://lore.kernel.org/r/20221102141014.1025893-3-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index c0115bcb3b5e..1863006cc36c 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -99,20 +99,20 @@ enum epf_ntb_bar { * NTB Driver NTB Driver */ struct epf_ntb_ctrl { - u32 command; - u32 argument; - u16 command_status; - u16 link_status; - u32 topology; - u64 addr; - u64 size; - u32 num_mws; - u32 reserved; - u32 spad_offset; - u32 spad_count; - u32 db_entry_size; - u32 db_data[MAX_DB_COUNT]; - u32 db_offset[MAX_DB_COUNT]; + u32 command; + u32 argument; + u16 command_status; + u16 link_status; + u32 topology; + u64 addr; + u64 size; + u32 num_mws; + u32 reserved; + u32 spad_offset; + u32 spad_count; + u32 db_entry_size; + u32 db_data[MAX_DB_COUNT]; + u32 db_offset[MAX_DB_COUNT]; } __packed; struct epf_ntb { From 8818039f959b2efc0d6f2cb101f8061332f0c77e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 21 Nov 2022 09:48:45 -0500 Subject: [PATCH 2148/4122] kbuild: add ability to make source rpm buildable using koji MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes: - added new target 'srcrpm-pkg' to generate source rpm - added required build tools to spec file - removed locally compiled host tools to force their re-compile Signed-off-by: Ivan Vecera Signed-off-by: Jonathan Toppins Acked-by: Íñigo Huguet Tested-by: Ivan Vecera Signed-off-by: Masahiro Yamada --- scripts/Makefile.package | 10 ++++++++++ scripts/package/mkspec | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c22..1290f1c631fb 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -62,6 +62,16 @@ rpm-pkg: +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ta $(KERNELPATH).tar.gz \ --define='_smp_mflags %{nil}' +# srcrpm-pkg +# --------------------------------------------------------------------------- +PHONY += srcrpm-pkg +srcrpm-pkg: + $(MAKE) clean + $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec + $(call cmd,src_tar,$(KERNELPATH),kernel.spec) + +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ts $(KERNELPATH).tar.gz \ + --define='_smp_mflags %{nil}' --define='_srcrpmdir $(srctree)' + # binrpm-pkg # --------------------------------------------------------------------------- PHONY += binrpm-pkg diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 70392fd2fd29..dda00a948a01 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -33,6 +33,8 @@ EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \ --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \ --exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s" +test -n "$LOCALVERSION" && MAKE="$MAKE LOCALVERSION=$LOCALVERSION" + # We can label the here-doc lines for conditional output to the spec file # # Labels: @@ -49,6 +51,9 @@ sed -e '/^DEL/d' -e 's/^\t*//' < Date: Wed, 2 Nov 2022 10:10:10 -0400 Subject: [PATCH 2149/4122] PCI: endpoint: pci-epf-vntb: Fix call pci_epc_mem_free_addr() in error path Replace pci_epc_mem_free_addr() with pci_epf_free_space() in the error handle path to match pci_epf_alloc_space(). Link: https://lore.kernel.org/r/20221102141014.1025893-4-Frank.Li@nxp.com Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP") Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 1863006cc36c..191924a83454 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -571,7 +571,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) return ret; err_alloc_peer_mem: - pci_epc_mem_free_addr(ntb->epf->epc, epf_bar->phys_addr, mw_addr, epf_bar->size); + pci_epf_free_space(ntb->epf, mw_addr, barno, 0); return -1; } From 03d426ae5426caf46cb96534ca77fa50a018dd3a Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:11 -0400 Subject: [PATCH 2150/4122] PCI: endpoint: pci-epf-vntb: Remove unused epf_db_phy struct member epf_db_phy member in struct epf_ntb is not used, remove it. Link: https://lore.kernel.org/r/20221102141014.1025893-5-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 191924a83454..ee66101cb5c4 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -136,7 +136,6 @@ struct epf_ntb { struct epf_ntb_ctrl *reg; - phys_addr_t epf_db_phy; void __iomem *epf_db; phys_addr_t vpci_mw_phy[MAX_MW]; From 2b35c886556a24f1531edf38a4dab53bbbea4db4 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:12 -0400 Subject: [PATCH 2151/4122] PCI: endpoint: pci-epf-vntb: Replace hardcoded 4 with sizeof(u32) NTB spad entry item size is sizeof(u32), replace hardcoded 4 with it. Link: https://lore.kernel.org/r/20221102141014.1025893-6-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index ee66101cb5c4..54616281da9e 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -257,12 +257,12 @@ static void epf_ntb_cmd_handler(struct work_struct *work) ntb = container_of(work, struct epf_ntb, cmd_handler.work); for (i = 1; i < ntb->db_count; i++) { - if (readl(ntb->epf_db + i * 4)) { - if (readl(ntb->epf_db + i * 4)) + if (readl(ntb->epf_db + i * sizeof(u32))) { + if (readl(ntb->epf_db + i * sizeof(u32))) ntb->db |= 1 << (i - 1); ntb_db_event(&ntb->ntb, i); - writel(0, ntb->epf_db + i * 4); + writel(0, ntb->epf_db + i * sizeof(u32)); } } @@ -433,7 +433,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) spad_count = ntb->spad_count; ctrl_size = sizeof(struct epf_ntb_ctrl); - spad_size = 2 * spad_count * 4; + spad_size = 2 * spad_count * sizeof(u32); if (!align) { ctrl_size = roundup_pow_of_two(ctrl_size); @@ -463,7 +463,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) ctrl->num_mws = ntb->num_mws; ntb->spad_size = spad_size; - ctrl->db_entry_size = 4; + ctrl->db_entry_size = sizeof(u32); for (i = 0; i < ntb->db_count; i++) { ntb->reg->db_data[i] = 1 + i; @@ -535,7 +535,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) struct pci_epf_bar *epf_bar; void __iomem *mw_addr; enum pci_barno barno; - size_t size = 4 * ntb->db_count; + size_t size = sizeof(u32) * ntb->db_count; epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, @@ -1121,11 +1121,11 @@ static int vntb_epf_link_enable(struct ntb_dev *ntb, static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx) { struct epf_ntb *ntb = ntb_ndev(ndev); - int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * 4; + int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32); u32 val; void __iomem *base = ntb->reg; - val = readl(base + off + ct + idx * 4); + val = readl(base + off + ct + idx * sizeof(u32)); return val; } @@ -1133,10 +1133,10 @@ static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val) { struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; - int off = ctrl->spad_offset, ct = ctrl->spad_count * 4; + int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32); void __iomem *base = ntb->reg; - writel(val, base + off + ct + idx * 4); + writel(val, base + off + ct + idx * sizeof(u32)); return 0; } @@ -1148,7 +1148,7 @@ static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx) void __iomem *base = ntb->reg; u32 val; - val = readl(base + off + idx * 4); + val = readl(base + off + idx * sizeof(u32)); return val; } @@ -1159,7 +1159,7 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32 int off = ctrl->spad_offset; void __iomem *base = ntb->reg; - writel(val, base + off + idx * 4); + writel(val, base + off + idx * sizeof(u32)); return 0; } From 01dcec6d57ce62d535b2016fc4a617627fff506d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:13 -0400 Subject: [PATCH 2152/4122] PCI: endpoint: pci-epf-vntb: Fix sparse build warning for epf_db Use epf_db[i] dereference instead of readl() because epf_db is in memory allocated by dma_alloc_coherent(), not I/O. Remove useless/duplicated readl() in the process. Link: https://lore.kernel.org/r/20221102141014.1025893-7-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 54616281da9e..f896846ed970 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -136,7 +136,7 @@ struct epf_ntb { struct epf_ntb_ctrl *reg; - void __iomem *epf_db; + u32 *epf_db; phys_addr_t vpci_mw_phy[MAX_MW]; void __iomem *vpci_mw_addr[MAX_MW]; @@ -257,12 +257,10 @@ static void epf_ntb_cmd_handler(struct work_struct *work) ntb = container_of(work, struct epf_ntb, cmd_handler.work); for (i = 1; i < ntb->db_count; i++) { - if (readl(ntb->epf_db + i * sizeof(u32))) { - if (readl(ntb->epf_db + i * sizeof(u32))) - ntb->db |= 1 << (i - 1); - + if (ntb->epf_db[i]) { + ntb->db |= 1 << (i - 1); ntb_db_event(&ntb->ntb, i); - writel(0, ntb->epf_db + i * sizeof(u32)); + ntb->epf_db[i] = 0; } } From 5f697b25009ccfebede5e42c6693c4b18de11b37 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 2 Nov 2022 10:10:14 -0400 Subject: [PATCH 2153/4122] PCI: endpoint: pci-epf-vntb: Fix sparse ntb->reg build warning pci-epf-vntb.c:1128:33: sparse: expected void [noderef] __iomem *base pci-epf-vntb.c:1128:33: sparse: got struct epf_ntb_ctrl *reg Add __iomem type cast in vntb_epf_peer_spad_read() and vntb_epf_peer_spad_write(). Link: https://lore.kernel.org/r/20221102141014.1025893-8-Frank.Li@nxp.com Reported-by: kernel test robot Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index f896846ed970..04698e7995a5 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1121,7 +1121,7 @@ static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx) struct epf_ntb *ntb = ntb_ndev(ndev); int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32); u32 val; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; val = readl(base + off + ct + idx * sizeof(u32)); return val; @@ -1132,7 +1132,7 @@ static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val) struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32); - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; writel(val, base + off + ct + idx * sizeof(u32)); return 0; @@ -1143,7 +1143,7 @@ static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx) struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; int off = ctrl->spad_offset; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; u32 val; val = readl(base + off + idx * sizeof(u32)); @@ -1155,7 +1155,7 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32 struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; int off = ctrl->spad_offset; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; writel(val, base + off + idx * sizeof(u32)); return 0; From 24b3e3dd9c9c742a4dd18e71b6963f9e7ab72911 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 11 Nov 2022 19:14:56 -0800 Subject: [PATCH 2154/4122] kernfs: fix all kernel-doc warnings and multiple typos Fix kernel-doc warnings. Many of these are about a function's return value, so use the kernel-doc Return: format to fix those Use % prefix on numeric constant values. dir.c: fix typos/spellos file.c fix typo: s/taret/target/ Fix all of these kernel-doc warnings: dir.c:305: warning: missing initial short description on line: * kernfs_name_hash dir.c:137: warning: No description found for return value of 'kernfs_path_from_node_locked' dir.c:196: warning: No description found for return value of 'kernfs_name' dir.c:224: warning: No description found for return value of 'kernfs_path_from_node' dir.c:292: warning: No description found for return value of 'kernfs_get_parent' dir.c:312: warning: No description found for return value of 'kernfs_name_hash' dir.c:404: warning: No description found for return value of 'kernfs_unlink_sibling' dir.c:588: warning: No description found for return value of 'kernfs_node_from_dentry' dir.c:806: warning: No description found for return value of 'kernfs_find_ns' dir.c:879: warning: No description found for return value of 'kernfs_find_and_get_ns' dir.c:904: warning: No description found for return value of 'kernfs_walk_and_get_ns' dir.c:927: warning: No description found for return value of 'kernfs_create_root' dir.c:996: warning: No description found for return value of 'kernfs_root_to_node' dir.c:1016: warning: No description found for return value of 'kernfs_create_dir_ns' dir.c:1048: warning: No description found for return value of 'kernfs_create_empty_dir' dir.c:1306: warning: No description found for return value of 'kernfs_next_descendant_post' dir.c:1568: warning: No description found for return value of 'kernfs_remove_self' dir.c:1630: warning: No description found for return value of 'kernfs_remove_by_name_ns' dir.c:1667: warning: No description found for return value of 'kernfs_rename_ns' file.c:66: warning: No description found for return value of 'of_on' file.c:88: warning: No description found for return value of 'kernfs_deref_open_node_locked' file.c:1036: warning: No description found for return value of '__kernfs_create_file' inode.c:100: warning: No description found for return value of 'kernfs_setattr' mount.c:160: warning: No description found for return value of 'kernfs_root_from_sb' mount.c:198: warning: No description found for return value of 'kernfs_node_dentry' mount.c:302: warning: No description found for return value of 'kernfs_super_ns' mount.c:318: warning: No description found for return value of 'kernfs_get_tree' symlink.c:28: warning: No description found for return value of 'kernfs_create_link' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: Tejun Heo Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20221112031456.22980-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 82 ++++++++++++++++++++++--------------- fs/kernfs/file.c | 18 ++++---- fs/kernfs/inode.c | 8 ++-- fs/kernfs/kernfs-internal.h | 2 +- fs/kernfs/mount.c | 10 +++-- fs/kernfs/symlink.c | 2 +- 6 files changed, 74 insertions(+), 48 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 77d7a3a28057..935ef8cb02b2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -125,9 +125,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, * kn_to: /n1/n2/n3 [depth=3] * result: /../.. * - * [3] when @kn_to is NULL result will be "(null)" + * [3] when @kn_to is %NULL result will be "(null)" * - * Returns the length of the full path. If the full length is equal to or + * Return: the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -185,10 +185,12 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, * @buflen: size of @buf * * Copies the name of @kn into @buf of @buflen bytes. The behavior is - * similar to strlcpy(). It returns the length of @kn's name and if @buf - * isn't long enough, it's filled upto @buflen-1 and nul terminated. + * similar to strlcpy(). * - * Fills buffer with "(null)" if @kn is NULL. + * Fills buffer with "(null)" if @kn is %NULL. + * + * Return: the length of @kn's name and if @buf isn't long enough, + * it's filled up to @buflen-1 and nul terminated. * * This function can be called from any context. */ @@ -215,7 +217,7 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) * path (which includes '..'s) as needed to reach from @from to @to is * returned. * - * Returns the length of the full path. If the full length is equal to or + * Return: the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -287,6 +289,8 @@ out: * * Determines @kn's parent, pins and returns it. This function can be * called from any context. + * + * Return: parent node of @kn */ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { @@ -302,11 +306,11 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) } /** - * kernfs_name_hash + * kernfs_name_hash - calculate hash of @ns + @name * @name: Null terminated string to hash * @ns: Namespace tag to hash * - * Returns 31 bit hash of ns + name (so it fits in an off_t ) + * Return: 31-bit hash of ns + name (so it fits in an off_t) */ static unsigned int kernfs_name_hash(const char *name, const void *ns) { @@ -354,8 +358,8 @@ static int kernfs_sd_compare(const struct kernfs_node *left, * Locking: * kernfs_rwsem held exclusive * - * RETURNS: - * 0 on susccess -EEXIST on failure. + * Return: + * %0 on success, -EEXIST on failure. */ static int kernfs_link_sibling(struct kernfs_node *kn) { @@ -394,8 +398,10 @@ static int kernfs_link_sibling(struct kernfs_node *kn) * @kn: kernfs_node of interest * * Try to unlink @kn from its sibling rbtree which starts from - * kn->parent->dir.children. Returns %true if @kn was actually - * removed, %false if @kn wasn't on the rbtree. + * kn->parent->dir.children. + * + * Return: %true if @kn was actually removed, + * %false if @kn wasn't on the rbtree. * * Locking: * kernfs_rwsem held exclusive @@ -419,10 +425,10 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn) * @kn: kernfs_node to get an active reference to * * Get an active reference of @kn. This function is noop if @kn - * is NULL. + * is %NULL. * - * RETURNS: - * Pointer to @kn on success, NULL on failure. + * Return: + * Pointer to @kn on success, %NULL on failure. */ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) { @@ -442,7 +448,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) * @kn: kernfs_node to put an active reference to * * Put an active reference to @kn. This function is noop if @kn - * is NULL. + * is %NULL. */ void kernfs_put_active(struct kernfs_node *kn) { @@ -464,7 +470,7 @@ void kernfs_put_active(struct kernfs_node *kn) * kernfs_drain - drain kernfs_node * @kn: kernfs_node to drain * - * Drain existing usages and nuke all existing mmaps of @kn. Mutiple + * Drain existing usages and nuke all existing mmaps of @kn. Multiple * removers may invoke this function concurrently on @kn and all will * return after draining is complete. */ @@ -577,7 +583,7 @@ EXPORT_SYMBOL_GPL(kernfs_put); * kernfs_node_from_dentry - determine kernfs_node associated with a dentry * @dentry: the dentry in question * - * Return the kernfs_node associated with @dentry. If @dentry is not a + * Return: the kernfs_node associated with @dentry. If @dentry is not a * kernfs one, %NULL is returned. * * While the returned kernfs_node will stay accessible as long as @dentry @@ -684,8 +690,8 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, * @id's lower 32bits encode ino and upper gen. If the gen portion is * zero, all generations are matched. * - * RETURNS: - * NULL on failure. Return a kernfs node with reference counter incremented + * Return: %NULL on failure, + * otherwise a kernfs node with reference counter incremented. */ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id) @@ -733,8 +739,8 @@ err_unlock: * function increments nlink of the parent's inode if @kn is a * directory and link into the children list of the parent. * - * RETURNS: - * 0 on success, -EEXIST if entry with the given name already + * Return: + * %0 on success, -EEXIST if entry with the given name already * exists. */ int kernfs_add_one(struct kernfs_node *kn) @@ -797,8 +803,9 @@ out_unlock: * @name: name to look for * @ns: the namespace tag to use * - * Look for kernfs_node with name @name under @parent. Returns pointer to - * the found kernfs_node on success, %NULL on failure. + * Look for kernfs_node with name @name under @parent. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, @@ -871,8 +878,9 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, * @ns: the namespace tag to use * * Look for kernfs_node with name @name under @parent and get a reference - * if found. This function may sleep and returns pointer to the found - * kernfs_node on success, %NULL on failure. + * if found. This function may sleep. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) @@ -896,8 +904,9 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); * @ns: the namespace tag to use * * Look for kernfs_node with path @path under @parent and get a reference - * if found. This function may sleep and returns pointer to the found - * kernfs_node on success, %NULL on failure. + * if found. This function may sleep. + * + * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) @@ -919,7 +928,7 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, * @flags: KERNFS_ROOT_* flags * @priv: opaque data associated with the new directory * - * Returns the root of the new hierarchy on success, ERR_PTR() value on + * Return: the root of the new hierarchy on success, ERR_PTR() value on * failure. */ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, @@ -991,6 +1000,8 @@ void kernfs_destroy_root(struct kernfs_root *root) /** * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root * @root: root to use to lookup + * + * Return: @root's kernfs_node */ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) { @@ -1007,7 +1018,7 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) * @priv: opaque data associated with the new directory * @ns: optional namespace tag of the directory * - * Returns the created node on success, ERR_PTR() value on failure. + * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -1041,7 +1052,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, * @parent: parent in which to create a new directory * @name: name of the new directory * - * Returns the created node on success, ERR_PTR() value on failure. + * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name) @@ -1300,6 +1311,8 @@ static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) * Find the next descendant to visit for post-order traversal of @root's * descendants. @root is included in the iteration and the last node to be * visited. + * + * Return: the next descendant to visit or %NULL when done. */ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, struct kernfs_node *root) @@ -1563,6 +1576,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn) * the whole kernfs_ops which won the arbitration. This can be used to * guarantee, for example, all concurrent writes to a "delete" file to * finish only after the whole operation is complete. + * + * Return: %true if @kn is removed by this call, otherwise %false. */ bool kernfs_remove_self(struct kernfs_node *kn) { @@ -1623,7 +1638,8 @@ bool kernfs_remove_self(struct kernfs_node *kn) * @ns: namespace tag of the kernfs_node to remove * * Look for the kernfs_node with @name and @ns under @parent and remove it. - * Returns 0 on success, -ENOENT if such entry doesn't exist. + * + * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) @@ -1661,6 +1677,8 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, * @new_parent: new parent to put @sd under * @new_name: new name * @new_ns: new namespace tag + * + * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 9ab6c92e02da..e4a50e4ff0d2 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -33,7 +33,7 @@ struct kernfs_open_node { * pending queue is implemented as a singly linked list of kernfs_nodes. * The list is terminated with the self pointer so that whether a * kernfs_node is on the list or not can be determined by testing the next - * pointer for NULL. + * pointer for %NULL. */ #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) @@ -59,8 +59,10 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) } /** - * of_on - Return the kernfs_open_node of the specified kernfs_open_file - * @of: taret kernfs_open_file + * of_on - Get the kernfs_open_node of the specified kernfs_open_file + * @of: target kernfs_open_file + * + * Return: the kernfs_open_node of the kernfs_open_file */ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) { @@ -82,6 +84,8 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) * outside RCU read-side critical section. * * The caller needs to make sure that kernfs_open_file_mutex is held. + * + * Return: @kn->attr.open when kernfs_open_file_mutex is held. */ static struct kernfs_open_node * kernfs_deref_open_node_locked(struct kernfs_node *kn) @@ -548,11 +552,11 @@ out_unlock: * If @kn->attr.open exists, increment its reference count; otherwise, * create one. @of is chained to the files list. * - * LOCKING: + * Locking: * Kernel thread context (may sleep). * - * RETURNS: - * 0 on success, -errno on failure. + * Return: + * %0 on success, -errno on failure. */ static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) @@ -1024,7 +1028,7 @@ const struct file_operations kernfs_file_fops = { * @ns: optional namespace tag of the file * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * - * Returns the created node on success, ERR_PTR() value on error. + * Return: the created node on success, ERR_PTR() value on error. */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 74f3453f4639..eac0f210299a 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -94,7 +94,7 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) * @kn: target node * @iattr: iattr to set * - * Returns 0 on success, -errno on failure. + * Return: %0 on success, -errno on failure. */ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { @@ -239,11 +239,11 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) * allocated and basics are initialized. New inode is returned * locked. * - * LOCKING: + * Locking: * Kernel thread context (may sleep). * - * RETURNS: - * Pointer to allocated inode on success, NULL on failure. + * Return: + * Pointer to allocated inode on success, %NULL on failure. */ struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) { diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index fc5821effd97..9046d9f39e63 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -58,7 +58,7 @@ struct kernfs_root { * kernfs_root - find out the kernfs_root a kernfs_node belongs to * @kn: kernfs_node of interest * - * Return the kernfs_root @kn belongs to. + * Return: the kernfs_root @kn belongs to. */ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d0859f72d2d6..e08e8d999807 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -153,7 +153,7 @@ static const struct export_operations kernfs_export_ops = { * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * - * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, + * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) @@ -167,7 +167,7 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * - * Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root + * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ @@ -192,6 +192,8 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block + * + * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) @@ -296,7 +298,7 @@ static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * - * Return the namespace tag associated with kernfs super_block @sb. + * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { @@ -313,6 +315,8 @@ const void *kernfs_super_ns(struct super_block *sb) * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. + * + * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 0ab13824822f..45371a70caa7 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -19,7 +19,7 @@ * @name: name of the symlink * @target: target node for the symlink to point to * - * Returns the created node on success, ERR_PTR() value on error. + * Return: the created node on success, ERR_PTR() value on error. * Ownership of the link matches ownership of the target. */ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, From 40eb28dc17f87cfac69d7755447039e92ac5fbda Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Nov 2022 15:35:57 +0200 Subject: [PATCH 2155/4122] device property: Get rid of __PROPERTY_ENTRY_ARRAY_EL*SIZE*() First of all, _ELEMENT_SIZE() repeats existing sizeof_field() macro. Second, usage of _ARRAY_ELSIZE_LEN() adds unnecessary indirection to the data layout. It's more understandable when the data structure is placed explicitly. That said, get rid of those macros by replacing them with the existing helper and explicit data structure layout. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221122133600.49897-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/include/linux/property.h b/include/linux/property.h index 83674f968a8f..04a6b1433cd9 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -12,6 +12,7 @@ #include #include +#include #include struct device; @@ -303,24 +304,14 @@ struct property_entry { * crafted to avoid gcc-4.4.4's problems with initialization of anon unions * and structs. */ - -#define __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_) \ - sizeof(((struct property_entry *)NULL)->value._elem_[0]) - -#define __PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_, _elsize_, _Type_, \ - _val_, _len_) \ -(struct property_entry) { \ - .name = _name_, \ - .length = (_len_) * (_elsize_), \ - .type = DEV_PROP_##_Type_, \ - { .pointer = _val_ }, \ +#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _elem_, _Type_, _val_, _len_) \ +(struct property_entry) { \ + .name = _name_, \ + .length = (_len_) * sizeof_field(struct property_entry, value._elem_[0]), \ + .type = DEV_PROP_##_Type_, \ + { .pointer = _val_ }, \ } -#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _elem_, _Type_, _val_, _len_)\ - __PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_, \ - __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_), \ - _Type_, _val_, _len_) - #define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_) \ __PROPERTY_ENTRY_ARRAY_LEN(_name_, u8_data, U8, _val_, _len_) #define PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, _len_) \ @@ -332,9 +323,12 @@ struct property_entry { #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_) \ __PROPERTY_ENTRY_ARRAY_LEN(_name_, str, STRING, _val_, _len_) #define PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, _len_) \ - __PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_, \ - sizeof(struct software_node_ref_args), \ - REF, _val_, _len_) +(struct property_entry) { \ + .name = _name_, \ + .length = (_len_) * sizeof(struct software_node_ref_args), \ + .type = DEV_PROP_REF, \ + { .pointer = _val_ }, \ +} #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) @@ -352,7 +346,7 @@ struct property_entry { #define __PROPERTY_ENTRY_ELEMENT(_name_, _elem_, _Type_, _val_) \ (struct property_entry) { \ .name = _name_, \ - .length = __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_), \ + .length = sizeof_field(struct property_entry, value._elem_[0]), \ .is_inline = true, \ .type = DEV_PROP_##_Type_, \ { .value = { ._elem_[0] = _val_ } }, \ From c6c76563bd13871739539e20fd3116159e491f5b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Nov 2022 15:35:58 +0200 Subject: [PATCH 2156/4122] device property: Move PROPERTY_ENTRY_BOOL() a bit down Let's order ARRAY and non-ARRAY macros in the same way. The PROPERTY_ENTRY_BOOL() is special, move it a bit down in the code so it won't break ordering of the rest. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221122133600.49897-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/property.h b/include/linux/property.h index 04a6b1433cd9..a1f846a15113 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -322,6 +322,7 @@ struct property_entry { __PROPERTY_ENTRY_ARRAY_LEN(_name_, u64_data, U64, _val_, _len_) #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_) \ __PROPERTY_ENTRY_ARRAY_LEN(_name_, str, STRING, _val_, _len_) + #define PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, _len_) \ (struct property_entry) { \ .name = _name_, \ @@ -340,7 +341,7 @@ struct property_entry { PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) -#define PROPERTY_ENTRY_REF_ARRAY(_name_, _val_) \ +#define PROPERTY_ENTRY_REF_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) #define __PROPERTY_ENTRY_ELEMENT(_name_, _elem_, _Type_, _val_) \ @@ -363,12 +364,6 @@ struct property_entry { #define PROPERTY_ENTRY_STRING(_name_, _val_) \ __PROPERTY_ENTRY_ELEMENT(_name_, str, STRING, _val_) -#define PROPERTY_ENTRY_BOOL(_name_) \ -(struct property_entry) { \ - .name = _name_, \ - .is_inline = true, \ -} - #define PROPERTY_ENTRY_REF(_name_, _ref_, ...) \ (struct property_entry) { \ .name = _name_, \ @@ -377,9 +372,14 @@ struct property_entry { { .pointer = &SOFTWARE_NODE_REFERENCE(_ref_, ##__VA_ARGS__), }, \ } +#define PROPERTY_ENTRY_BOOL(_name_) \ +(struct property_entry) { \ + .name = _name_, \ + .is_inline = true, \ +} + struct property_entry * property_entries_dup(const struct property_entry *properties); - void property_entries_free(const struct property_entry *properties); bool device_dma_supported(const struct device *dev); From 4d57b4f215e8ba86c36540eaccd3b17bc5ee39c0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Nov 2022 15:35:59 +0200 Subject: [PATCH 2157/4122] device property: Rename goto label to be more precise In the fwnode_property_match_string() the goto label out has an additional task. Rename the label to be more precise on what is going to happen if goto it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221122133600.49897-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index f7b5aa8fcf28..ed74083c179d 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -482,12 +482,13 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode, ret = fwnode_property_read_string_array(fwnode, propname, values, nval); if (ret < 0) - goto out; + goto out_free; ret = match_string(values, nval, string); if (ret < 0) ret = -ENODATA; -out: + +out_free: kfree(values); return ret; } From 9dc5f12f95e00fb0e0500ea4a8da0c0f29e718cb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Nov 2022 15:36:00 +0200 Subject: [PATCH 2158/4122] device property: Add a blank line in Kconfig of tests Seems the blank line to separate entries in Kconfig was missing. Add it. Signed-off-by: Andy Shevchenko Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221122133600.49897-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/test/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig index 2f3fa31a948e..610a1ba7a467 100644 --- a/drivers/base/test/Kconfig +++ b/drivers/base/test/Kconfig @@ -8,6 +8,7 @@ config TEST_ASYNC_DRIVER_PROBE The module name will be test_async_driver_probe.ko If unsure say N. + config DRIVER_PE_KUNIT_TEST bool "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS depends on KUNIT=y From 6865788f5ad998f261e37f6b029d61d3bb7dc373 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Tue, 8 Nov 2022 20:59:12 +0200 Subject: [PATCH 2159/4122] MAINTAINERS: Update entries from the Nitro Enclaves section Update the list of maintainers for the Nitro Enclaves project. Alex (lexnv@) is not working at Amazon anymore and there will be the same case for me starting with 2023. Add a reference to the mailing list of the Nitro Enclaves development team. Signed-off-by: Andra Paraschiv Acked-by: Alexandru Vasile Link: https://lore.kernel.org/r/20221108185912.15792-1-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38bae79369a3..33c76a218c0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14569,10 +14569,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git F: arch/nios2/ NITRO ENCLAVES (NE) -M: Andra Paraschiv -M: Alexandru Vasile M: Alexandru Ciobotaru L: linux-kernel@vger.kernel.org +L: The AWS Nitro Enclaves Team S: Supported W: https://aws.amazon.com/ec2/nitro/nitro-enclaves/ F: Documentation/virt/ne_overview.rst From 83f47eea742c1152c237398fc040ceba04fc5d76 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 16 Nov 2022 14:47:34 +0200 Subject: [PATCH 2160/4122] mei: add timeout to send When driver wakes up the firmware from the low power state, it is sending a memory ready message. The send is done via synchronous/blocking function to ensure that firmware is in ready state. However, in case of firmware undergoing reset send might be block forever. To address this issue a timeout is added to blocking write command on the internal bus. Introduce the __mei_cl_send_timeout function to use instead of __mei_cl_send in cases where timeout is required. The mei_cl_write has only two callers and there is no need to split it into two functions. Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20221116124735.2493847-2-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 7 +++++-- drivers/misc/mei/bus.c | 22 +++++++++++++++++++++- drivers/misc/mei/client.c | 20 ++++++++++++++++---- drivers/misc/mei/client.h | 2 +- drivers/misc/mei/main.c | 2 +- drivers/misc/mei/mei_dev.h | 2 ++ 6 files changed, 46 insertions(+), 9 deletions(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 71fbf0bc8453..90023c34666e 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -188,17 +188,20 @@ static int mei_fwver(struct mei_cl_device *cldev) return ret; } +#define GFX_MEMORY_READY_TIMEOUT 200 /* timeout in milliseconds */ + static int mei_gfx_memory_ready(struct mei_cl_device *cldev) { struct mkhi_gfx_mem_ready req = {0}; - unsigned int mode = MEI_CL_IO_TX_INTERNAL; + unsigned int mode = MEI_CL_IO_TX_INTERNAL | MEI_CL_IO_TX_BLOCKING; req.hdr.group_id = MKHI_GROUP_ID_GFX; req.hdr.command = MKHI_GFX_MEMORY_READY_CMD_REQ; req.flags = MKHI_GFX_MEM_READY_PXP_ALLOWED; dev_dbg(&cldev->dev, "Sending memory ready command\n"); - return __mei_cl_send(cldev->cl, (u8 *)&req, sizeof(req), 0, mode); + return __mei_cl_send_timeout(cldev->cl, (u8 *)&req, sizeof(req), 0, + mode, GFX_MEMORY_READY_TIMEOUT); } static void mei_mkhi_fix(struct mei_cl_device *cldev) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 46aa3554e97b..fdb5f7331695 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -33,6 +33,26 @@ */ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, unsigned int mode) +{ + return __mei_cl_send_timeout(cl, buf, length, vtag, mode, MAX_SCHEDULE_TIMEOUT); +} + +/** + * __mei_cl_send_timeout - internal client send (write) + * + * @cl: host client + * @buf: buffer to send + * @length: buffer length + * @vtag: virtual tag + * @mode: sending mode + * @timeout: send timeout in milliseconds. + * effective only for blocking writes: the MEI_CL_IO_TX_BLOCKING mode bit is set. + * set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait. + * + * Return: written size bytes or < 0 on error + */ +ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, + unsigned int mode, unsigned long timeout) { struct mei_device *bus; struct mei_cl_cb *cb; @@ -101,7 +121,7 @@ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING); memcpy(cb->buf.data, buf, length); - rets = mei_cl_write(cl, cb); + rets = mei_cl_write(cl, cb, timeout); out: mutex_unlock(&bus->device_lock); diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 0b2fbe1335a7..b4c104907ce2 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -1926,10 +1926,13 @@ err: * * @cl: host client * @cb: write callback with filled data + * @timeout: send timeout in milliseconds. + * effective only for blocking writes: the cb->blocking is set. + * set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait. * * Return: number of bytes sent on success, <0 on failure. */ -ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb) +ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout) { struct mei_device *dev; struct mei_msg_data *buf; @@ -2056,11 +2059,20 @@ out: if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) { mutex_unlock(&dev->device_lock); - rets = wait_event_interruptible(cl->tx_wait, - cl->writing_state == MEI_WRITE_COMPLETE || - (!mei_cl_is_connected(cl))); + rets = wait_event_interruptible_timeout(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE || + (!mei_cl_is_connected(cl)), + msecs_to_jiffies(timeout)); mutex_lock(&dev->device_lock); + /* clean all queue on timeout as something fatal happened */ + if (rets == 0) { + rets = -ETIME; + mei_io_tx_list_free_cl(&dev->write_list, cl, NULL); + mei_io_tx_list_free_cl(&dev->write_waiting_list, cl, NULL); + } /* wait_event_interruptible returns -ERESTARTSYS */ + if (rets > 0) + rets = 0; if (rets) { if (signal_pending(current)) rets = -EINTR; diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 418056fb1489..9052860bcfe0 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -246,7 +246,7 @@ int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl, int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, struct list_head *cmpl_list); int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp); -ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb); +ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout); int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, struct list_head *cmpl_list); diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 930887e7e38d..632d4ae21e46 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -383,7 +383,7 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, goto out; } - rets = mei_cl_write(cl, cb); + rets = mei_cl_write(cl, cb, MAX_SCHEDULE_TIMEOUT); out: mutex_unlock(&dev->device_lock); return rets; diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 6bb3e1ba9ded..c1618a44c5a4 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -373,6 +373,8 @@ void mei_cl_bus_rescan_work(struct work_struct *work); void mei_cl_bus_dev_fixup(struct mei_cl_device *dev); ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, unsigned int mode); +ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, + unsigned int mode, unsigned long timeout); ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag, unsigned int mode, unsigned long timeout); bool mei_cl_bus_rx_event(struct mei_cl *cl); From 0ef77698b85603d21453daf32ae70f76ae62ccae Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 16 Nov 2022 14:47:35 +0200 Subject: [PATCH 2161/4122] mei: bus-fixup: change pxp mode only if message was sent Move PXP mode state machine to SETUP mode only if memory ready message sent successfully to the firmware. Leave it in INIT mode otherwise to allow try to send message later. Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20221116124735.2493847-3-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 90023c34666e..6df7679d9739 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -266,12 +266,13 @@ static void mei_gsc_mkhi_fix_ver(struct mei_cl_device *cldev) if (cldev->bus->pxp_mode == MEI_DEV_PXP_INIT) { ret = mei_gfx_memory_ready(cldev); - if (ret < 0) + if (ret < 0) { dev_err(&cldev->dev, "memory ready command failed %d\n", ret); - else + } else { dev_dbg(&cldev->dev, "memory ready command sent\n"); + cldev->bus->pxp_mode = MEI_DEV_PXP_SETUP; + } /* we go to reset after that */ - cldev->bus->pxp_mode = MEI_DEV_PXP_SETUP; goto out; } From c002f04c0bc79ec00d4beb75fb631d5bf37419bd Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Thu, 17 Nov 2022 09:18:25 +0200 Subject: [PATCH 2162/4122] char: xillybus: Fix trivial bug with mutex @unit_mutex protects @unit from being freed, so obviously it should be released after @unit is used, and not before. This is a follow-up to commit 282a4b71816b ("char: xillybus: Prevent use-after-free due to race condition") which ensures, among others, the protection of @private_data after @unit_mutex has been released. Reported-by: Hyunwoo Kim Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20221117071825.3942-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillybus_class.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/xillybus/xillybus_class.c b/drivers/char/xillybus/xillybus_class.c index 0f238648dcfe..e9a288e61c15 100644 --- a/drivers/char/xillybus/xillybus_class.c +++ b/drivers/char/xillybus/xillybus_class.c @@ -227,14 +227,15 @@ int xillybus_find_inode(struct inode *inode, break; } - mutex_unlock(&unit_mutex); - - if (!unit) + if (!unit) { + mutex_unlock(&unit_mutex); return -ENODEV; + } *private_data = unit->private_data; *index = minor - unit->lowest_minor; + mutex_unlock(&unit_mutex); return 0; } EXPORT_SYMBOL(xillybus_find_inode); From adc40221bf676f3e722d135889a7b913b4162dc2 Mon Sep 17 00:00:00 2001 From: Yuma Ueda Date: Fri, 18 Nov 2022 22:36:31 +0900 Subject: [PATCH 2163/4122] scripts/kallsyms.c Make the comment up-to-date with current implementation The comment in scripts/kallsyms.c describing the usage of scripts/kallsyms does not reflect the latest implementation. Fix the comment to be equivalent to what the usage() function prints. Signed-off-by: Yuma Ueda Reviewed-by: Miguel Ojeda Link: https://lore.kernel.org/r/20221118133631.4554-1-cyan@0x00a1e9.dev Signed-off-by: Greg Kroah-Hartman --- scripts/kallsyms.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 03fa07ad45d9..46d1afaaf4cd 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,7 +5,8 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] + * [--base-relative] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might From e6278a5445780c71cc3dfc6ceda2875838eac8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 22 Nov 2022 14:46:43 +0100 Subject: [PATCH 2164/4122] virtio_console: Introduce an ID allocator for virtual console numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a virtio console port is initialized, it is registered as an hvc console using a virtual console number. If a KVM guest is started with multiple virtio console devices, the same vtermno (or virtual console number) can be used to allocate different hvc consoles, which leads to various communication problems later on. This is also reported in debugfs : # grep vtermno /sys/kernel/debug/virtio-ports/* /sys/kernel/debug/virtio-ports/vport1p1:console_vtermno: 1 /sys/kernel/debug/virtio-ports/vport2p1:console_vtermno: 1 /sys/kernel/debug/virtio-ports/vport3p1:console_vtermno: 2 /sys/kernel/debug/virtio-ports/vport4p1:console_vtermno: 3 Replace the next_vtermno global with an ID allocator and start the allocation at 1 as it is today. Also recycle IDs when a console port is removed. Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20221122134643.376184-1-clg@kaod.org Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 9fa3c76a267f..6a821118d553 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -48,22 +49,11 @@ struct ports_driver_data { /* List of all the devices we're handling */ struct list_head portdevs; - /* - * This is used to keep track of the number of hvc consoles - * spawned by this driver. This number is given as the first - * argument to hvc_alloc(). To correctly map an initial - * console spawned via hvc_instantiate to the console being - * hooked up via hvc_alloc, we need to pass the same vtermno. - * - * We also just assume the first console being initialised was - * the first one that got used as the initial console. - */ - unsigned int next_vtermno; - /* All the console devices handled by this driver */ struct list_head consoles; }; -static struct ports_driver_data pdrvdata = { .next_vtermno = 1}; + +static struct ports_driver_data pdrvdata; static DEFINE_SPINLOCK(pdrvdata_lock); static DECLARE_COMPLETION(early_console_added); @@ -89,6 +79,8 @@ struct console { u32 vtermno; }; +static DEFINE_IDA(vtermno_ida); + struct port_buffer { char *buf; @@ -1244,18 +1236,21 @@ static int init_port_console(struct port *port) * pointers. The final argument is the output buffer size: we * can do any size, so we put PAGE_SIZE here. */ - port->cons.vtermno = pdrvdata.next_vtermno; + ret = ida_alloc_min(&vtermno_ida, 1, GFP_KERNEL); + if (ret < 0) + return ret; + port->cons.vtermno = ret; port->cons.hvc = hvc_alloc(port->cons.vtermno, 0, &hv_ops, PAGE_SIZE); if (IS_ERR(port->cons.hvc)) { ret = PTR_ERR(port->cons.hvc); dev_err(port->dev, "error %d allocating hvc for port\n", ret); port->cons.hvc = NULL; + ida_free(&vtermno_ida, port->cons.vtermno); return ret; } spin_lock_irq(&pdrvdata_lock); - pdrvdata.next_vtermno++; list_add_tail(&port->cons.list, &pdrvdata.consoles); spin_unlock_irq(&pdrvdata_lock); port->guest_connected = true; @@ -1532,6 +1527,7 @@ static void unplug_port(struct port *port) list_del(&port->cons.list); spin_unlock_irq(&pdrvdata_lock); hvc_remove(port->cons.hvc); + ida_free(&vtermno_ida, port->cons.vtermno); } remove_port_data(port); From fbfc4ca465a1f8d81bf2d67d95bf7fc67c3cf0c2 Mon Sep 17 00:00:00 2001 From: Patrick Delaunay Date: Fri, 18 Nov 2022 06:39:20 +0000 Subject: [PATCH 2165/4122] nvmem: stm32: move STM32MP15_BSEC_NUM_LOWER in config Support STM32MP15_BSEC_NUM_LOWER in stm32 romem config to prepare the next SoC in STM32MP family. Signed-off-by: Patrick Delaunay Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/stm32-romem.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c index 354be526897f..d93baee01d7b 100644 --- a/drivers/nvmem/stm32-romem.c +++ b/drivers/nvmem/stm32-romem.c @@ -22,16 +22,15 @@ /* shadow registers offest */ #define STM32MP15_BSEC_DATA0 0x200 -/* 32 (x 32-bits) lower shadow registers */ -#define STM32MP15_BSEC_NUM_LOWER 32 - struct stm32_romem_cfg { int size; + u8 lower; }; struct stm32_romem_priv { void __iomem *base; struct nvmem_config cfg; + u8 lower; }; static int stm32_romem_read(void *context, unsigned int offset, void *buf, @@ -85,7 +84,7 @@ static int stm32_bsec_read(void *context, unsigned int offset, void *buf, for (i = roffset; (i < roffset + rbytes); i += 4) { u32 otp = i >> 2; - if (otp < STM32MP15_BSEC_NUM_LOWER) { + if (otp < priv->lower) { /* read lower data from shadow registers */ val = readl_relaxed( priv->base + STM32MP15_BSEC_DATA0 + i); @@ -159,6 +158,8 @@ static int stm32_romem_probe(struct platform_device *pdev) priv->cfg.priv = priv; priv->cfg.owner = THIS_MODULE; + priv->lower = 0; + cfg = (const struct stm32_romem_cfg *) of_match_device(dev->driver->of_match_table, dev)->data; if (!cfg) { @@ -167,6 +168,7 @@ static int stm32_romem_probe(struct platform_device *pdev) priv->cfg.reg_read = stm32_romem_read; } else { priv->cfg.size = cfg->size; + priv->lower = cfg->lower; priv->cfg.reg_read = stm32_bsec_read; priv->cfg.reg_write = stm32_bsec_write; } @@ -174,8 +176,17 @@ static int stm32_romem_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(devm_nvmem_register(dev, &priv->cfg)); } +/* + * STM32MP15 BSEC OTP regions: 4096 OTP bits (with 3072 effective bits) + * => 96 x 32-bits data words + * - Lower: 1K bits, 2:1 redundancy, incremental bit programming + * => 32 (x 32-bits) lower shadow registers = words 0 to 31 + * - Upper: 2K bits, ECC protection, word programming only + * => 64 (x 32-bits) = words 32 to 95 + */ static const struct stm32_romem_cfg stm32mp15_bsec_cfg = { - .size = 384, /* 96 x 32-bits data words */ + .size = 384, + .lower = 32, }; static const struct of_device_id stm32_romem_of_match[] = { From d61784e6410f3df2028e6eb91b06ffed37a660e0 Mon Sep 17 00:00:00 2001 From: Patrick Delaunay Date: Fri, 18 Nov 2022 06:39:21 +0000 Subject: [PATCH 2166/4122] nvmem: stm32: add warning when upper OTPs are updated As the upper OTPs are ECC protected, they support only one 32 bits word programming. For a second modification of this word, these ECC become invalid and this OTP will be no more accessible, the shadowed value is invalid. This patch adds a warning to indicate an upper OTP update, because this operation is dangerous as OTP is not locked by the driver after the first update to avoid a second update. Signed-off-by: Patrick Delaunay Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/stm32-romem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c index d93baee01d7b..bb8aa72ba2f9 100644 --- a/drivers/nvmem/stm32-romem.c +++ b/drivers/nvmem/stm32-romem.c @@ -132,6 +132,9 @@ static int stm32_bsec_write(void *context, unsigned int offset, void *buf, } } + if (offset + bytes >= priv->lower * 4) + dev_warn(dev, "Update of upper OTPs with ECC protection (word programming, only once)\n"); + return 0; } From a3816a7d7c097c1da46aad5f5d1e229b607dce04 Mon Sep 17 00:00:00 2001 From: Patrick Delaunay Date: Fri, 18 Nov 2022 06:39:22 +0000 Subject: [PATCH 2167/4122] nvmem: stm32: add nvmem type attribute Inform NVMEM framework of type attribute for stm32-romem as NVMEM_TYPE_OTP so userspace is able to know how the data is stored in BSEC. Signed-off-by: Patrick Delaunay Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/stm32-romem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c index bb8aa72ba2f9..6de565639d5f 100644 --- a/drivers/nvmem/stm32-romem.c +++ b/drivers/nvmem/stm32-romem.c @@ -160,6 +160,7 @@ static int stm32_romem_probe(struct platform_device *pdev) priv->cfg.dev = dev; priv->cfg.priv = priv; priv->cfg.owner = THIS_MODULE; + priv->cfg.type = NVMEM_TYPE_OTP; priv->lower = 0; From 107548adf89d581d1271cb2c81858bc793930306 Mon Sep 17 00:00:00 2001 From: Patrick Delaunay Date: Fri, 18 Nov 2022 06:39:23 +0000 Subject: [PATCH 2168/4122] dt-bindings: nvmem: add new stm32mp13 compatible for stm32-romem Add a new compatible for stm32mp13 support. Acked-by: Rob Herring Signed-off-by: Patrick Delaunay Reviewed-by: Fabrice Gasnier Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml index 448a2678dc62..16f4cad2fa55 100644 --- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml +++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml @@ -22,6 +22,7 @@ properties: compatible: enum: - st,stm32f4-otp + - st,stm32mp13-bsec - st,stm32mp15-bsec reg: From 06aac0e11960a7ddccc1888326b5906d017e0f24 Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Fri, 18 Nov 2022 06:39:24 +0000 Subject: [PATCH 2169/4122] nvmem: stm32: fix spelling typo in comment Fix spelling typo in comment. Reported-by: k2ci Signed-off-by: Jiangshan Yi Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-6-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/stm32-romem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c index 6de565639d5f..d1d03c2ad081 100644 --- a/drivers/nvmem/stm32-romem.c +++ b/drivers/nvmem/stm32-romem.c @@ -19,7 +19,7 @@ #define STM32_SMC_WRITE_SHADOW 0x03 #define STM32_SMC_READ_OTP 0x04 -/* shadow registers offest */ +/* shadow registers offset */ #define STM32MP15_BSEC_DATA0 0x200 struct stm32_romem_cfg { From 42b868b032901075f8e9bdcd8d700cb9c0d03ba5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 18 Nov 2022 06:39:25 +0000 Subject: [PATCH 2170/4122] dt-bindings: nvmem: Fix example Despite not being listed nor required within the top level nvmem yaml file, the "compatible" property is mandatory and is actually enforced by all the nvmem provider bindings. Unfortunately, the lack of compatible in the nvmem.yaml to level description file lead to the example not matching anything and thus not being checked at all. Let's pick a compatible almost randomly (one which is already used with the qfprom label) to make the example at least valid on a semantic point of view and getting it checked. Signed-off-by: Miquel Raynal Acked-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/nvmem.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index 1eb22dba364c..0455506fc30f 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -67,6 +67,7 @@ examples: #include qfprom: eeprom@700000 { + compatible = "qcom,msm8974-qfprom", "qcom,qfprom"; #address-cells = <1>; #size-cells = <1>; reg = <0x00700000 0x100000>; From fb817c4ef63e8cfb6e77ae4a2875ae854c80708f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Nov 2022 06:39:26 +0000 Subject: [PATCH 2171/4122] nvmem: Kconfig: Fix spelling mistake "controlls" -> "controls" There is a spelling mistake in a Kconfig description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-8-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig index ec8a49c04003..755f551426b5 100644 --- a/drivers/nvmem/Kconfig +++ b/drivers/nvmem/Kconfig @@ -164,7 +164,7 @@ config NVMEM_MICROCHIP_OTPC depends on ARCH_AT91 || COMPILE_TEST help This driver enable the OTP controller available on Microchip SAMA7G5 - SoCs. It controlls the access to the OTP memory connected to it. + SoCs. It controls the access to the OTP memory connected to it. config NVMEM_MTK_EFUSE tristate "Mediatek SoCs EFUSE support" From ada84d07af6097b2addd18262668ce6cb9e15206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 18 Nov 2022 06:39:27 +0000 Subject: [PATCH 2172/4122] nvmem: u-boot-env: add Broadcom format support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcom uses U-Boot for a lot of their bcmbca familiy chipsets. They decided to store U-Boot environment data inside U-Boot partition and to use a custom header (with "uEnv" magic and env data length). Add support for Broadcom's specific binding and their custom format. Ref: 6b0584c19d87 ("dt-bindings: nvmem: u-boot,env: add Broadcom's variant binding") Signed-off-by: Rafał Miłecki Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/u-boot-env.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvmem/u-boot-env.c b/drivers/nvmem/u-boot-env.c index 4fdbdccebda1..29b1d87a3c51 100644 --- a/drivers/nvmem/u-boot-env.c +++ b/drivers/nvmem/u-boot-env.c @@ -16,6 +16,7 @@ enum u_boot_env_format { U_BOOT_FORMAT_SINGLE, U_BOOT_FORMAT_REDUNDANT, + U_BOOT_FORMAT_BROADCOM, }; struct u_boot_env { @@ -40,6 +41,13 @@ struct u_boot_env_image_redundant { uint8_t data[]; } __packed; +struct u_boot_env_image_broadcom { + __le32 magic; + __le32 len; + __le32 crc32; + uint8_t data[0]; +} __packed; + static int u_boot_env_read(void *context, unsigned int offset, void *val, size_t bytes) { @@ -138,6 +146,11 @@ static int u_boot_env_parse(struct u_boot_env *priv) crc32_data_offset = offsetof(struct u_boot_env_image_redundant, data); data_offset = offsetof(struct u_boot_env_image_redundant, data); break; + case U_BOOT_FORMAT_BROADCOM: + crc32_offset = offsetof(struct u_boot_env_image_broadcom, crc32); + crc32_data_offset = offsetof(struct u_boot_env_image_broadcom, data); + data_offset = offsetof(struct u_boot_env_image_broadcom, data); + break; } crc32 = le32_to_cpu(*(__le32 *)(buf + crc32_offset)); crc32_data_len = priv->mtd->size - crc32_data_offset; @@ -202,6 +215,7 @@ static const struct of_device_id u_boot_env_of_match_table[] = { { .compatible = "u-boot,env", .data = (void *)U_BOOT_FORMAT_SINGLE, }, { .compatible = "u-boot,env-redundant-bool", .data = (void *)U_BOOT_FORMAT_REDUNDANT, }, { .compatible = "u-boot,env-redundant-count", .data = (void *)U_BOOT_FORMAT_REDUNDANT, }, + { .compatible = "brcm,env", .data = (void *)U_BOOT_FORMAT_BROADCOM, }, {}, }; From 27dfc44e1ba30d2d49675e21918bf4b3b3b59fa6 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 18 Nov 2022 06:39:28 +0000 Subject: [PATCH 2173/4122] dt-bindings: nvmem: Introduce the nvmem-layout container The nvmem devices description works like this: * Most cases (EEPROM & co): eeprom@x { compatible = ""; ... }; * MTD case: flash@y { compatible = ""; ... otp { compatible = "user-otp"; /* or "factory-otp" */ ... }; }; In the former case, the nvmem device is "eeprom@x", while in the latter case the nvmem device is "otp". Nvmem devices can produce nvmem cells. The current way to describe nvmem cells is to locate them by providing their static byte and bit offset and length. These information are stored in subnodes of the nvmem device. It is now a fact that such description does not fit more advanced use cases where the location or the size of the cells may vary. There are currently three known situations which require being described differently: Kontron's SL28 VPD, ONIE's TLV table and U-Boot's environment variables. Hence, we need a way to describe the parsers that must be used in order to make the dynamic discovery of the nvmem cells. This new description must fit both use cases (the generic situation and the MTD case). Let's create in both cases a container node named nvmem-layout whose content will depend on the parser. Right now nvmem-layout.yaml is "empty", but references to additional layout parser bindings will be inserted in the near future. The final goal being something that looks like: * Most cases (EEPROM & co): eeprom@x { compatible = ""; ... nvmem-layout { compatible = ""; ... }; }; * MTD case: flash@y { compatible = ""; ... otp { compatible = "user-otp"; /* or "factory-otp" */ ... nvmem-layout { compatible = ""; ... }; }; }; Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/nvmem/layouts/nvmem-layout.yaml | 30 +++++++++++++++++++ .../devicetree/bindings/nvmem/nvmem.yaml | 7 +++++ 2 files changed, 37 insertions(+) create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml new file mode 100644 index 000000000000..ecc7c37cbc1f --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/nvmem/layouts/nvmem-layout.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVMEM (Non Volatile Memory) layouts + +maintainers: + - Srinivas Kandagatla + - Michael Walle + - Miquel Raynal + +description: | + Most NVMEM layouts are static and thus do not require additional description + besides the bytes/bits offset and length. Other layouts can be less statically + define and might require dynamic reading of the NVMEM device in order to + perform their parsing. The nvmem-layout container is here to describe these. + +properties: + compatible: true + + '#address-cells': false + + '#size-cells': false + +required: + - compatible + +unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index 0455506fc30f..75bb93dda9df 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -39,6 +39,13 @@ properties: when it's driven low (logical '0') to allow writing. maxItems: 1 + nvmem-layout: + $ref: /schemas/nvmem/layouts/nvmem-layout.yaml + description: + Alternative to the statically defined nvmem cells, this + container may reference more advanced (dynamic) layout + parsers. + patternProperties: "@[0-9a-f]+(,[0-7])?$": type: object From ca104926184db10d486e91e64cec725e6c2bd8ae Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 18 Nov 2022 06:39:29 +0000 Subject: [PATCH 2174/4122] dt-bindings: eeprom: Inherit from nvmem.yaml EEPROMs can be nvmem providers. Let's make all EEPROM bindings reference nvmem.yaml as they should, so that nvmem cells and layout parsers can be safely described within the EEPROM nodes. Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-11-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/eeprom/at24.yaml | 5 ++++- Documentation/devicetree/bindings/eeprom/at25.yaml | 1 + .../devicetree/bindings/eeprom/microchip,93lc46b.yaml | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index d14e0accbda8..84af0d5f52aa 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -10,6 +10,9 @@ title: I2C EEPROMs compatible with Atmel's AT24 maintainers: - Bartosz Golaszewski +allOf: + - $ref: /schemas/nvmem/nvmem.yaml + select: properties: compatible: @@ -183,7 +186,7 @@ required: - compatible - reg -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 8b1c997caac1..0f5a8ef996d3 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -104,6 +104,7 @@ required: allOf: - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: /schemas/nvmem/nvmem.yaml - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml index 0c2f5ddb79c5..64cfd971c9c5 100644 --- a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml +++ b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml @@ -47,6 +47,7 @@ required: allOf: - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: /schemas/nvmem/nvmem.yaml unevaluatedProperties: false From e33cfae0eff66dca02ed482cf3ef01bb61f84fb0 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 18 Nov 2022 06:39:30 +0000 Subject: [PATCH 2175/4122] dt-bindings: nvmem: add YAML schema for the sl28 vpd layout Add a schema for the NVMEM layout on Kontron's sl28 boards. Signed-off-by: Michael Walle Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-12-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../nvmem/layouts/kontron,sl28-vpd.yaml | 64 +++++++++++++++++++ .../bindings/nvmem/layouts/nvmem-layout.yaml | 3 + 2 files changed, 67 insertions(+) create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml diff --git a/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml b/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml new file mode 100644 index 000000000000..c713e23819f1 --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/nvmem/layouts/kontron,sl28-vpd.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVMEM layout of the Kontron SMARC-sAL28 vital product data + +maintainers: + - Michael Walle + +description: + The vital product data (VPD) of the sl28 boards contains a serial + number and a base MAC address. The actual MAC addresses for the + on-board ethernet devices are derived from this base MAC address by + adding an offset. + +select: false + +properties: + compatible: + const: kontron,sl28-vpd + + serial-number: + type: object + description: The board's serial number + + additionalProperties: false + + base-mac-address: + type: object + description: + Base MAC address for all on-module network interfaces. The first + argument of the phandle will be treated as an offset. + + properties: + "#nvmem-cell-cells": + const: 1 + + additionalProperties: false + +required: + - compatible + +additionalProperties: false + +examples: + - | + otp-1 { + compatible = "user-otp"; + + nvmem-layout { + compatible = "kontron,sl28-vpd"; + + serial_number: serial-number { + }; + + base_mac_address: base-mac-address { + #nvmem-cell-cells = <1>; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml index ecc7c37cbc1f..f64ea2fa362d 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml @@ -17,6 +17,9 @@ description: | define and might require dynamic reading of the NVMEM device in order to perform their parsing. The nvmem-layout container is here to describe these. +oneOf: + - $ref: kontron,sl28-vpd.yaml + properties: compatible: true From b6c88f10e8bb20f0ccaabe4c0a4ac3c6c1fb8768 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 18 Nov 2022 06:39:31 +0000 Subject: [PATCH 2176/4122] dt-bindings: vendor-prefixes: Add ONIE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described on their website (see link below), "The Open Network Install Environment (ONIE) is an open source initiative that defines an open “install environment” for modern networking hardware." It is not a proper corporation per-se but rather more a group which tries to spread the use of open source standards in the networking hardware world. Link: https://opencomputeproject.github.io/onie/ Signed-off-by: Miquel Raynal Acked-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-13-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 6e323a380294..65a74026cf2b 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -927,6 +927,8 @@ patternProperties: description: One Laptop Per Child "^oneplus,.*": description: OnePlus Technology (Shenzhen) Co., Ltd. + "^onie,.*": + description: Open Network Install Environment group "^onion,.*": description: Onion Corporation "^onnn,.*": From ce9c0b06abc44d51e81ce36b83a81e960034f3ee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 18 Nov 2022 06:39:32 +0000 Subject: [PATCH 2177/4122] dt-bindings: nvmem: add YAML schema for the ONIE tlv layout Add a schema for the ONIE tlv NVMEM layout that can be found on any ONIE compatible networking device. Describe all the possible NVMEM cells that can be produced by this layout parser. Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063932.6418-14-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/nvmem/layouts/nvmem-layout.yaml | 1 + .../nvmem/layouts/onie,tlv-layout.yaml | 147 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml index f64ea2fa362d..8512ee538c4c 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml @@ -19,6 +19,7 @@ description: | oneOf: - $ref: kontron,sl28-vpd.yaml + - $ref: onie,tlv-layout.yaml properties: compatible: true diff --git a/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml new file mode 100644 index 000000000000..5a0e7671aa3f --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/nvmem/layouts/onie,tlv-layout.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVMEM layout of the ONIE tlv table + +maintainers: + - Miquel Raynal + +description: + Modern networking hardware implementing the Open Compute Project ONIE + infrastructure shall provide a non-volatile memory with a table whose the + content is well specified and gives many information about the manufacturer + (name, country of manufacture, etc) as well as device caracteristics (serial + number, hardware version, mac addresses, etc). The underlaying device type + (flash, EEPROM,...) is not specified. The exact location of each value is also + dynamic and should be discovered at run time because it depends on the + parameters the manufacturer decided to embed. + +select: false + +properties: + compatible: + const: onie,tlv-layout + + product-name: + type: object + additionalProperties: false + + part-number: + type: object + additionalProperties: false + + serial-number: + type: object + additionalProperties: false + + mac-address: + type: object + description: + Base MAC address for all on-module network interfaces. The first + argument of the phandle will be treated as an offset. + + properties: + "#nvmem-cell-cells": + const: 1 + + additionalProperties: false + + manufacture-date: + type: object + additionalProperties: false + + device-version: + type: object + additionalProperties: false + + label-revision: + type: object + additionalProperties: false + + platforn-name: + type: object + additionalProperties: false + + onie-version: + type: object + additionalProperties: false + + num-macs: + type: object + additionalProperties: false + + manufacturer: + type: object + additionalProperties: false + + country-code: + type: object + additionalProperties: false + + vendor: + type: object + additionalProperties: false + + diag-version: + type: object + additionalProperties: false + + service-tag: + type: object + additionalProperties: false + + vendor-extension: + type: object + additionalProperties: false + +required: + - compatible + +additionalProperties: false + +examples: + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + eeprom@56 { + compatible = "atmel,24c64"; + read-only; + reg = <0x56>; + + nvmem-layout { + compatible = "onie,tlv-layout"; + + serial-number { + }; + }; + }; + }; + + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + flash@0 { + compatible = "m25p80", "jedec,spi-nor"; + reg = <0>; + + otp { + compatible = "user-otp"; + + nvmem-layout { + compatible = "onie,tlv-layout"; + + mac-address { + #nvmem-cell-cells = <1>; + }; + }; + }; + }; + }; +... From 97e1a5309190aca528c7e12697a898bda793a460 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:35 +0000 Subject: [PATCH 2178/4122] slimbus: qcom-ngd-ctrl: check for device runtime PM status during ISR Slimbus core interrupt is getting fired after suspend. At this point ADSP slimbus hardware is off with gated clocks which is leading to an unclocked access when HLOS slimbus tried to read the interrupt status register in the ISR. Co-developed-by: Chandana Kishori Chiluveru Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 76c5e446d243..964adf77b51b 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -763,7 +763,14 @@ static irqreturn_t qcom_slim_ngd_interrupt(int irq, void *d) { struct qcom_slim_ngd_ctrl *ctrl = d; void __iomem *base = ctrl->ngd->base; - u32 stat = readl(base + NGD_INT_STAT); + u32 stat; + + if (pm_runtime_suspended(ctrl->ctrl.dev)) { + dev_warn_once(ctrl->dev, "Interrupt received while suspended\n"); + return IRQ_NONE; + } + + stat = readl(base + NGD_INT_STAT); if ((stat & NGD_INT_MSG_BUF_CONTE) || (stat & NGD_INT_MSG_TX_INVAL) || (stat & NGD_INT_DEV_ERR) || From 63c60a226c908f46589b57b5bcb220ca82a76cc5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:36 +0000 Subject: [PATCH 2179/4122] slimbus: qcom-ngd-ctrl: drop PM runtime counter on transfer error paths If transfer in qcom_slim_ngd_xfer_msg_sync() fails, we need to drop the PM runtime usage counter to have it balanced. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 964adf77b51b..d48e58ca5d58 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -919,21 +919,29 @@ static int qcom_slim_ngd_xfer_msg_sync(struct slim_controller *ctrl, DECLARE_COMPLETION_ONSTACK(done); int ret, timeout; - pm_runtime_get_sync(ctrl->dev); + ret = pm_runtime_get_sync(ctrl->dev); + if (ret < 0) + goto pm_put; txn->comp = &done; ret = qcom_slim_ngd_xfer_msg(ctrl, txn); if (ret) - return ret; + goto pm_put; timeout = wait_for_completion_timeout(&done, HZ); if (!timeout) { dev_err(ctrl->dev, "TX timed out:MC:0x%x,mt:0x%x", txn->mc, txn->mt); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto pm_put; } return 0; + +pm_put: + pm_runtime_put(ctrl->dev); + + return ret; } static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt) From 434d25728171aa72ed1b1c4d248527cbf6b6c99b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:37 +0000 Subject: [PATCH 2180/4122] slimbus: stream: handle unsupported bitrates for presence rate Handle errors of getting presence rate for unsupported stream bitrate, instead of sending -EINVAL in change content message. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/stream.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c index 73a2aa362957..7e9c818e66c1 100644 --- a/drivers/slimbus/stream.c +++ b/drivers/slimbus/stream.c @@ -204,7 +204,7 @@ int slim_stream_prepare(struct slim_stream_runtime *rt, { struct slim_controller *ctrl = rt->dev->ctrl; struct slim_port *port; - int num_ports, i, port_id; + int num_ports, i, port_id, prrate; if (rt->ports) { dev_err(&rt->dev->dev, "Stream already Prepared\n"); @@ -221,6 +221,13 @@ int slim_stream_prepare(struct slim_stream_runtime *rt, rt->bps = cfg->bps; rt->direction = cfg->direction; + prrate = slim_get_prate_code(cfg->rate); + if (prrate < 0) { + dev_err(&rt->dev->dev, "Cannot get presence rate for rate %d Hz\n", + cfg->rate); + return prrate; + } + if (cfg->rate % ctrl->a_framer->superfreq) { /* * data rate not exactly multiple of super frame, @@ -241,7 +248,7 @@ int slim_stream_prepare(struct slim_stream_runtime *rt, port = &rt->ports[i]; port->state = SLIM_PORT_DISCONNECTED; port->id = port_id; - port->ch.prrate = slim_get_prate_code(cfg->rate); + port->ch.prrate = prrate; port->ch.id = cfg->chs[i]; port->ch.data_fmt = SLIM_CH_DATA_FMT_NOT_DEFINED; port->ch.aux_fmt = SLIM_CH_AUX_FMT_NOT_APPLICABLE; From 4594cb4b76c6cf9acf81d3044c6a6817ed4a3781 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:38 +0000 Subject: [PATCH 2181/4122] slimbus: qcom-ngd-ctrl: add support for 44.1 Khz frequency Add support for 44.1Khz frequency by dynamically calculating the slimbus parameters instead of statically defining them. Co-developed-by: Prudhvi Yarlagadda Signed-off-by: Prudhvi Yarlagadda Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 64 ++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index d48e58ca5d58..22720ad4c22d 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -944,6 +944,54 @@ pm_put: return ret; } +static int qcom_slim_calc_coef(struct slim_stream_runtime *rt, int *exp) +{ + struct slim_controller *ctrl = rt->dev->ctrl; + int coef; + + if (rt->ratem * ctrl->a_framer->superfreq < rt->rate) + rt->ratem++; + + coef = rt->ratem; + *exp = 0; + + /* + * CRM = Cx(2^E) is the formula we are using. + * Here C is the coffecient and E is the exponent. + * CRM is the Channel Rate Multiplier. + * Coefficeint should be either 1 or 3 and exponenet + * should be an integer between 0 to 9, inclusive. + */ + while (1) { + while ((coef & 0x1) != 0x1) { + coef >>= 1; + *exp = *exp + 1; + } + + if (coef <= 3) + break; + + coef++; + } + + /* + * we rely on the coef value (1 or 3) to set a bit + * in the slimbus message packet. This bit is + * BIT(5) which is the segment rate coefficient. + */ + if (coef == 1) { + if (*exp > 9) + return -EIO; + coef = 0; + } else { + if (*exp > 8) + return -EIO; + coef = 1; + } + + return coef; +} + static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt) { struct slim_device *sdev = rt->dev; @@ -967,16 +1015,22 @@ static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt) struct slim_port *port = &rt->ports[i]; if (txn.msg->num_bytes == 0) { - int seg_interval = SLIM_SLOTS_PER_SUPERFRAME/rt->ratem; - int exp; + int exp = 0, coef = 0; wbuf[txn.msg->num_bytes++] = sdev->laddr; wbuf[txn.msg->num_bytes] = rt->bps >> 2 | (port->ch.aux_fmt << 6); - /* Data channel segment interval not multiple of 3 */ - exp = seg_interval % 3; - if (exp) + /* calculate coef dynamically */ + coef = qcom_slim_calc_coef(rt, &exp); + if (coef < 0) { + dev_err(&sdev->dev, + "%s: error calculating coef %d\n", __func__, + coef); + return -EIO; + } + + if (coef) wbuf[txn.msg->num_bytes] |= BIT(5); txn.msg->num_bytes++; From a82b1ec34e9bea94058f429560e311e5ca634356 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:39 +0000 Subject: [PATCH 2182/4122] slimbus: stream: add checks for invalid unprepare/disable usage slim_disable_stream() and slim_stream_unprepare() are exported, so add sanity checks preventing unmatched/invalid calls. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-6-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/stream.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c index 7e9c818e66c1..1d6b38657917 100644 --- a/drivers/slimbus/stream.c +++ b/drivers/slimbus/stream.c @@ -414,6 +414,9 @@ int slim_stream_disable(struct slim_stream_runtime *stream) struct slim_controller *ctrl = stream->dev->ctrl; int ret, i; + if (!stream->ports || !stream->num_ports) + return -EINVAL; + if (ctrl->disable_stream) ctrl->disable_stream(stream); @@ -445,6 +448,9 @@ int slim_stream_unprepare(struct slim_stream_runtime *stream) { int i; + if (!stream->ports || !stream->num_ports) + return -EINVAL; + for (i = 0; i < stream->num_ports; i++) slim_disconnect_port(stream, &stream->ports[i]); From 8c8112d7c442579cca821836bbcc46b747ceca74 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:40 +0000 Subject: [PATCH 2183/4122] slimbus: qcom-ctrl: drop unneeded qcom,apq8064-slim compatible Bindings require usage of fallback "qcom,slim" compatible, so "qcom,apq8064-slim" is redundant. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ctrl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index c0c4f895d76e..bb106eab8ae2 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -718,7 +718,6 @@ static const struct dev_pm_ops qcom_slim_dev_pm_ops = { static const struct of_device_id qcom_slim_dt_match[] = { { .compatible = "qcom,slim", }, - { .compatible = "qcom,apq8064-slim", }, {} }; From 3d58b933c9eb2da5745c485bc7008d29c0eaddac Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:41 +0000 Subject: [PATCH 2184/4122] slimbus: qcom-ctrl: use devm_platform_ioremap_resource_byname() Simplify the code with devm_platform_ioremap_resource_byname(). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-8-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ctrl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index bb106eab8ae2..400b7b385a44 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -488,7 +488,6 @@ static int qcom_slim_probe(struct platform_device *pdev) { struct qcom_slim_ctrl *ctrl; struct slim_controller *sctrl; - struct resource *slim_mem; int ret, ver; ctrl = devm_kzalloc(&pdev->dev, sizeof(*ctrl), GFP_KERNEL); @@ -519,8 +518,7 @@ static int qcom_slim_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ctrl); dev_set_drvdata(ctrl->dev, ctrl); - slim_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl"); - ctrl->base = devm_ioremap_resource(ctrl->dev, slim_mem); + ctrl->base = devm_platform_ioremap_resource_byname(pdev, "ctrl"); if (IS_ERR(ctrl->base)) return PTR_ERR(ctrl->base); From 1d01bcb4659cfca87d92075b87cdadb0a9897d14 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:42 +0000 Subject: [PATCH 2185/4122] slimbus: qcom-ngd-ctrl: use devm_platform_get_and_ioremap_resource() Simplify the code with devm_platform_get_and_ioremap_resource(). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 22720ad4c22d..a6f3b6860d8f 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1597,7 +1597,6 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct qcom_slim_ngd_ctrl *ctrl; - struct resource *res; int ret; struct pdr_service *pds; @@ -1607,8 +1606,7 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev) dev_set_drvdata(dev, ctrl); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctrl->base = devm_ioremap_resource(dev, res); + ctrl->base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(ctrl->base)) return PTR_ERR(ctrl->base); From 319a538d618fea33434387c4502361bb8f047e11 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:43 +0000 Subject: [PATCH 2186/4122] slimbus: qcom-ngd-ctrl: reinit the reconf completion flag Reinitialize the reconf completion flag when ngd registers are not retainied or when enumeration is lost for ngd. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index a6f3b6860d8f..77aa6d26476c 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1205,6 +1205,12 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl) return 0; } + /* + * Reinitialize only when registers are not retained or when enumeration + * is lost for ngd. + */ + reinit_completion(&ctrl->reconf); + writel_relaxed(DEF_NGD_INT_MASK, ngd->base + NGD_INT_EN); rx_msgq = readl_relaxed(ngd->base + NGD_RX_MSGQ_CFG); From c53627f83abc9fb4ac5cbd9dc1c898008bcc2ac8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:44 +0000 Subject: [PATCH 2187/4122] dt-bindings: slimbus: convert bus description to DT schema Convert the SLIMbus bus description bindings to DT Schema. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-11-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/slimbus/bus.txt | 60 ------------ .../bindings/slimbus/slim-ngd-qcom-ctrl.txt | 2 - .../bindings/slimbus/slim-qcom-ctrl.txt | 3 - .../devicetree/bindings/slimbus/slimbus.yaml | 95 +++++++++++++++++++ 4 files changed, 95 insertions(+), 65 deletions(-) delete mode 100644 Documentation/devicetree/bindings/slimbus/bus.txt create mode 100644 Documentation/devicetree/bindings/slimbus/slimbus.yaml diff --git a/Documentation/devicetree/bindings/slimbus/bus.txt b/Documentation/devicetree/bindings/slimbus/bus.txt deleted file mode 100644 index bbe871f82a8b..000000000000 --- a/Documentation/devicetree/bindings/slimbus/bus.txt +++ /dev/null @@ -1,60 +0,0 @@ -SLIM(Serial Low Power Interchip Media Bus) bus - -SLIMbus is a 2-wire bus, and is used to communicate with peripheral -components like audio-codec. - -Required property for SLIMbus controller node: -- compatible - name of SLIMbus controller - -Child nodes: -Every SLIMbus controller node can contain zero or more child nodes -representing slave devices on the bus. Every SLIMbus slave device is -uniquely determined by the enumeration address containing 4 fields: -Manufacturer ID, Product code, Device index, and Instance value for -the device. -If child node is not present and it is instantiated after device -discovery (slave device reporting itself present). - -In some cases it may be necessary to describe non-probeable device -details such as non-standard ways of powering up a device. In -such cases, child nodes for those devices will be present as -slaves of the SLIMbus controller, as detailed below. - -Required property for SLIMbus child node if it is present: -- reg - Should be ('Device index', 'Instance ID') from SLIMbus - Enumeration Address. - Device Index Uniquely identifies multiple Devices within - a single Component. - Instance ID Is for the cases where multiple Devices of the - same type or Class are attached to the bus. - -- compatible -"slimMID,PID". The textual representation of Manufacturer ID, - Product Code, shall be in lower case hexadecimal with leading - zeroes suppressed - -Optional property for SLIMbus child node if it is present: -- slim-ifc-dev - Should be phandle to SLIMBus Interface device. - Required for devices which deal with streams. - -SLIMbus example for Qualcomm's slimbus manager component: - - slim@28080000 { - compatible = "qcom,apq8064-slim", "qcom,slim"; - reg = <0x28080000 0x2000>, - interrupts = <0 33 0>; - clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>; - clock-names = "iface", "core"; - #address-cells = <2>; - #size-cell = <0>; - - codec_ifd: ifd@0,0{ - compatible = "slim217,60"; - reg = <0 0>; - }; - - codec: wcd9310@1,0{ - compatible = "slim217,60"; - reg = <1 0>; - slim-ifc-dev = <&codec_ifd>; - }; - }; diff --git a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt index e94a2ad3a710..7c3d9eb6af5d 100644 --- a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt +++ b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt @@ -5,8 +5,6 @@ with SLIMBus slaves directly over the bus using messaging interface and communicating with master component residing on ADSP for bandwidth and data-channel management -Please refer to slimbus/bus.txt for details of the common SLIMBus bindings. - - compatible: Usage: required Value type: diff --git a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt index 922dcb8ff24a..6d955e129f90 100644 --- a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt +++ b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt @@ -4,9 +4,6 @@ master component. Required properties: - - #address-cells - refer to Documentation/devicetree/bindings/slimbus/bus.txt - - #size-cells - refer to Documentation/devicetree/bindings/slimbus/bus.txt - - reg : Offset and length of the register region(s) for the device - reg-names : Register region name(s) referenced in reg above Required register resource entries are: diff --git a/Documentation/devicetree/bindings/slimbus/slimbus.yaml b/Documentation/devicetree/bindings/slimbus/slimbus.yaml new file mode 100644 index 000000000000..22513fb7c59a --- /dev/null +++ b/Documentation/devicetree/bindings/slimbus/slimbus.yaml @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/slimbus/slimbus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SLIM (Serial Low Power Interchip Media) bus + +maintainers: + - Srinivas Kandagatla + +description: + SLIMbus is a 2-wire bus, and is used to communicate with peripheral + components like audio-codec. + +properties: + $nodename: + pattern: "^slim(@.*|-[0-9a-f])*$" + + "#address-cells": + const: 2 + + "#size-cells": + const: 0 + +patternProperties: + "^.*@[0-9a-f]+,[0-9a-f]+$": + type: object + description: | + Every SLIMbus controller node can contain zero or more child nodes + representing slave devices on the bus. Every SLIMbus slave device is + uniquely determined by the enumeration address containing 4 fields:: + Manufacturer ID, Product code, Device index, and Instance value for the + device. + + If child node is not present and it is instantiated after device + discovery (slave device reporting itself present). + + In some cases it may be necessary to describe non-probeable device + details such as non-standard ways of powering up a device. In such cases, + child nodes for those devices will be present as slaves of the SLIMbus + controller. + + properties: + compatible: + pattern: "^slim[0-9a-f]+,[0-9a-f]+$" + + reg: + maxItems: 1 + description: | + Pair of (device index, instande ID), where:: + - Device index, which uniquely identifies multiple devices within a + single component. + - Instance ID, can be used for the cases where multiple devices of + the same type or class are attached to the bus. + + required: + - compatible + - reg + + additionalProperties: true + +required: + - "#address-cells" + - "#size-cells" + +additionalProperties: true + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + slim@28080000 { + compatible = "qcom,apq8064-slim", "qcom,slim"; + reg = <0x28080000 0x2000>, <0x80207c 4>; + reg-names = "ctrl", "slew"; + interrupts = ; + clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>; + clock-names = "iface", "core"; + #address-cells = <2>; + #size-cells = <0>; + + audio-codec@1,0 { + compatible = "slim217,60"; + reg = <1 0>; + }; + }; + }; From 717bd3dfc4ced0a12bc177b267a0c6fd23eec620 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:45 +0000 Subject: [PATCH 2188/4122] dt-bindings: slimbus: qcom,slim: convert to DT schema Convert the Qualcomm SoC SLIMbus controller bindings to DT Schema. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-12-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/slimbus/qcom,slim.yaml | 86 +++++++++++++++++++ .../bindings/slimbus/slim-qcom-ctrl.txt | 36 -------- 2 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 Documentation/devicetree/bindings/slimbus/qcom,slim.yaml delete mode 100644 Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt diff --git a/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml b/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml new file mode 100644 index 000000000000..883bda58ca97 --- /dev/null +++ b/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/slimbus/qcom,slim.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SoC SLIMbus controller + +maintainers: + - Krzysztof Kozlowski + - Srinivas Kandagatla + +description: + SLIMbus controller used when applications processor controls SLIMbus master + component. + +allOf: + - $ref: slimbus.yaml# + +properties: + compatible: + items: + - enum: + - qcom,apq8064-slim + - const: qcom,slim + + reg: + items: + - description: Physical address of controller register blocks + - description: SLEW RATE register + + reg-names: + items: + - const: ctrl + - const: slew + + clocks: + items: + - description: Interface clock for this controller + - description: Interrupt for controller core's BAM + + clock-names: + items: + - const: iface + - const: core + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - interrupts + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + slim@28080000 { + compatible = "qcom,apq8064-slim", "qcom,slim"; + reg = <0x28080000 0x2000>, <0x80207c 4>; + reg-names = "ctrl", "slew"; + interrupts = ; + clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>; + clock-names = "iface", "core"; + #address-cells = <2>; + #size-cells = <0>; + + audio-codec@1,0 { + compatible = "slim217,60"; + reg = <1 0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt deleted file mode 100644 index 6d955e129f90..000000000000 --- a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt +++ /dev/null @@ -1,36 +0,0 @@ -Qualcomm SLIMbus controller -This controller is used if applications processor driver controls SLIMbus -master component. - -Required properties: - - - reg : Offset and length of the register region(s) for the device - - reg-names : Register region name(s) referenced in reg above - Required register resource entries are: - "ctrl": Physical address of controller register blocks - "slew": required for "qcom,apq8064-slim" SOC. - - compatible : should be "qcom,-slim" for SOC specific compatible - followed by "qcom,slim" for fallback. - - interrupts : Interrupt number used by this controller - - clocks : Interface and core clocks used by this SLIMbus controller - - clock-names : Required clock-name entries are: - "iface" : Interface clock for this controller - "core" : Interrupt for controller core's BAM - -Example: - - slim@28080000 { - compatible = "qcom,apq8064-slim", "qcom,slim"; - reg = <0x28080000 0x2000>, <0x80207C 4>; - reg-names = "ctrl", "slew"; - interrupts = <0 33 0>; - clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>; - clock-names = "iface", "core"; - #address-cells = <2>; - #size-cell = <0>; - - wcd9310: audio-codec@1,0{ - compatible = "slim217,60"; - reg = <1 0>; - }; - }; From 5f115bb92a631b01bee7ca2310c6c353a770656e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Nov 2022 06:52:46 +0000 Subject: [PATCH 2189/4122] dt-bindings: slimbus: qcom,slim-ngd: convert to DT schema Convert the Qualcomm SoC SLIMBus Non Generic Device (NGD) controller bindings to DT Schema. During conversion add iommus already present in DTS and extend the example based on SDM845. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118065246.6835-13-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/slimbus/qcom,slim-ngd.yaml | 120 ++++++++++++++++++ .../bindings/slimbus/slim-ngd-qcom-ctrl.txt | 82 ------------ 2 files changed, 120 insertions(+), 82 deletions(-) create mode 100644 Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml delete mode 100644 Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt diff --git a/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml b/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml new file mode 100644 index 000000000000..abf61c15246e --- /dev/null +++ b/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/slimbus/qcom,slim-ngd.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SoC SLIMBus Non Generic Device (NGD) Controller + +maintainers: + - Krzysztof Kozlowski + - Srinivas Kandagatla + +description: + SLIMBus NGD controller is a light-weight driver responsible for communicating + with SLIMBus slaves directly over the bus using messaging interface and + communicating with master component residing on ADSP for bandwidth and + data-channel management + +properties: + compatible: + enum: + - qcom,slim-ngd-v1.5.0 # for MSM8996 + - qcom,slim-ngd-v2.1.0 # for SDM845 + + reg: + maxItems: 1 + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + dmas: + maxItems: 2 + + dma-names: + items: + - const: rx + - const: tx + + interrupts: + maxItems: 1 + + iommus: + maxItems: 1 + +patternProperties: + "^slim@[0-9a-f]+$": + type: object + $ref: slimbus.yaml# + description: + Each subnode represents an instance of NGD + + properties: + reg: + maxItems: 1 + + unevaluatedProperties: false + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + - dmas + - dma-names + - interrupts + +additionalProperties: false + +examples: + - | + #include + #include + + slim-ngd@171c0000 { + compatible = "qcom,slim-ngd-v2.1.0"; + reg = <0x171c0000 0x2c000>; + interrupts = ; + + dmas = <&slimbam 3>, <&slimbam 4>; + dma-names = "rx", "tx"; + iommus = <&apps_smmu 0x1806 0x0>; + #address-cells = <1>; + #size-cells = <0>; + + slim@1 { + reg = <1>; + #address-cells = <2>; + #size-cells = <0>; + + codec@1,0 { + compatible = "slim217,250"; + reg = <1 0>; + slim-ifc-dev = <&wcd9340_ifd>; + + #sound-dai-cells = <1>; + + interrupts-extended = <&tlmm 54 IRQ_TYPE_LEVEL_HIGH>; + interrupt-controller; + #interrupt-cells = <1>; + + #clock-cells = <0>; + clock-frequency = <9600000>; + clock-output-names = "mclk"; + qcom,micbias1-microvolt = <1800000>; + qcom,micbias2-microvolt = <1800000>; + qcom,micbias3-microvolt = <1800000>; + qcom,micbias4-microvolt = <1800000>; + + #address-cells = <1>; + #size-cells = <1>; + + reset-gpios = <&tlmm 64 GPIO_ACTIVE_HIGH>; + + /* Rest of the WCD9340 codec */ + }; + }; + }; diff --git a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt deleted file mode 100644 index 7c3d9eb6af5d..000000000000 --- a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt +++ /dev/null @@ -1,82 +0,0 @@ -Qualcomm SLIMBus Non Generic Device (NGD) Controller binding - -SLIMBus NGD controller is a light-weight driver responsible for communicating -with SLIMBus slaves directly over the bus using messaging interface and -communicating with master component residing on ADSP for bandwidth and -data-channel management - -- compatible: - Usage: required - Value type: - Definition: must be "qcom,slim-ngd-v.." - must be one of the following. - "qcom,slim-ngd-v1.5.0" for MSM8996 - "qcom,slim-ngd-v2.1.0" for SDM845 - -- reg: - Usage: required - Value type: - Definition: must specify the base address and size of the controller - register space. -- dmas - Usage: required - Value type: - Definition: List of rx and tx dma channels - -- dma-names - Usage: required - Value type: - Definition: must be "rx" and "tx". - -- interrupts: - Usage: required - Value type: - Definition: must list controller IRQ. - -#address-cells - Usage: required - Value type: - Definition: Should be 1, reflecting the instance id of ngd. - -#size-cells - Usage: required - Value type: - Definition: Should be 0 - -= NGD Devices -Each subnode represents an instance of NGD, must contain the following -properties: - -- reg: - Usage: required - Value type: - Definition: Should be instance id of ngd. - -#address-cells - Usage: required - Refer to slimbus/bus.txt for details of the common SLIMBus bindings. - -#size-cells - Usage: required - Refer to slimbus/bus.txt for details of the common SLIMBus bindings. - -= EXAMPLE - -slim@91c0000 { - compatible = "qcom,slim-ngd-v1.5.0"; - reg = <0x91c0000 0x2c000>; - interrupts = <0 163 0>; - dmas = <&slimbam 3>, <&slimbam 4>; - dma-names = "rx", "tx"; - #address-cells = <1>; - #size-cells = <0>; - ngd@1 { - reg = <1>; - #address-cells = <1>; - #size-cells = <1>; - codec@1 { - compatible = "slim217,1a0"; - reg = <1 0>; - }; - }; -}; From b9bf27386dddab16bc455124c54cbeea6cb9a1ca Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:54 +0500 Subject: [PATCH 2190/4122] Accessiblity: speakup_soft: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_soft module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-2-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_soft.c | 57 ++++++++++++++------ 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_soft.c b/drivers/accessibility/speakup/speakup_soft.c index 28c8f60370cf..6d446824677b 100644 --- a/drivers/accessibility/speakup/speakup_soft.c +++ b/drivers/accessibility/speakup/speakup_soft.c @@ -33,21 +33,30 @@ static struct miscdevice synth_device, synthu_device; static int init_pos; static int misc_registered; -static struct var_t vars[] = { - /* DIRECT is put first so that module_param_named can access it easily */ - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, - { CAPS_START, .u.s = {"\x01+3p" } }, - { CAPS_STOP, .u.s = {"\x01-3p" } }, - { PAUSE, .u.n = {"\x01P" } }, - { RATE, .u.n = {"\x01%ds", 2, 0, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"\x01%dp", 5, 0, 9, 0, 0, NULL } }, - { INFLECTION, .u.n = {"\x01%dr", 5, 0, 9, 0, 0, NULL } }, - { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, - { PUNCT, .u.n = {"\x01%db", 0, 0, 3, 0, 0, NULL } }, - { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, - { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, +enum default_vars_id { + DIRECT_ID = 0, CAPS_START_ID, CAPS_STOP_ID, + PAUSE_ID, RATE_ID, PITCH_ID, INFLECTION_ID, + VOL_ID, TONE_ID, PUNCT_ID, VOICE_ID, + FREQUENCY_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+3p" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x01-3p" } }, + [PAUSE_ID] = { PAUSE, .u.n = {"\x01P" } }, + [RATE_ID] = { RATE, .u.n = {"\x01%ds", 2, 0, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x01%dp", 5, 0, 9, 0, 0, NULL } }, + [INFLECTION_ID] = { INFLECTION, .u.n = {"\x01%dr", 5, 0, 9, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"\x01%db", 0, 0, 3, 0, 0, NULL } }, + [VOICE_ID] = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, + [FREQUENCY_ID] = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, V_LAST_VAR }; @@ -451,10 +460,28 @@ static int softsynth_adjust(struct spk_synth *synth, struct st_var_header *var) } module_param_named(start, synth_soft.startup, short, 0444); -module_param_named(direct, vars[0].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); MODULE_PARM_DESC(direct, "Set the direct variable on load."); +MODULE_PARM_DESC(rate, "Sets the rate of the synthesizer."); +MODULE_PARM_DESC(pitch, "Sets the pitch of the synthesizer."); +MODULE_PARM_DESC(inflection, "Sets the inflection of the synthesizer."); +MODULE_PARM_DESC(vol, "Sets the volume of the speech synthesizer."); +MODULE_PARM_DESC(tone, "Sets the tone of the speech synthesizer."); +MODULE_PARM_DESC(punct, "Sets the amount of punctuation spoken by the synthesizer."); +MODULE_PARM_DESC(voice, "Sets the voice used by the synthesizer."); +MODULE_PARM_DESC(frequency, "Sets the frequency of speech synthesizer."); module_spk_synth(synth_soft); From 5e3e27f040ec3611db14a5efe23c9108831c38be Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:55 +0500 Subject: [PATCH 2191/4122] Accessiblity: speakup_apollo: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_apollo module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-3-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_apollo.c | 46 +++++++++++++++---- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_apollo.c b/drivers/accessibility/speakup/speakup_apollo.c index c84a7e0864b7..d2fbb3f57221 100644 --- a/drivers/accessibility/speakup/speakup_apollo.c +++ b/drivers/accessibility/speakup/speakup_apollo.c @@ -24,15 +24,28 @@ static void do_catch_up(struct spk_synth *synth); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"cap, " } }, - { CAPS_STOP, .u.s = {"" } }, - { RATE, .u.n = {"@W%d", 6, 1, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"@F%x", 10, 0, 15, 0, 0, NULL } }, - { VOL, .u.n = {"@A%x", 10, 0, 15, 0, 0, NULL } }, - { VOICE, .u.n = {"@V%d", 1, 1, 6, 0, 0, NULL } }, - { LANG, .u.n = {"@=%d,", 1, 1, 4, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, VOICE_ID, LANG_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"cap, " } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"" } }, + [RATE_ID] = { RATE, .u.n = {"@W%d", 6, 1, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"@F%x", 10, 0, 15, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"@A%x", 10, 0, 15, 0, 0, NULL } }, + [VOICE_ID] = { VOICE, .u.n = {"@V%d", 1, 1, 6, 0, 0, NULL } }, + [LANG_ID] = { LANG, .u.n = {"@=%d,", 1, 1, 4, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -193,10 +206,25 @@ static void do_catch_up(struct spk_synth *synth) module_param_named(ser, synth_apollo.ser, int, 0444); module_param_named(dev, synth_apollo.dev_name, charp, 0444); module_param_named(start, synth_apollo.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(lang, vars[LANG_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(lang, "Set the lang variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + + module_spk_synth(synth_apollo); From d5dab7ff97b8370d8bf406d9be2113b1df7d2f0c Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:56 +0500 Subject: [PATCH 2192/4122] Accessiblity: speakup_audptr: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding a default variables to the speakup_audptr module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-4-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_audptr.c | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_audptr.c b/drivers/accessibility/speakup/speakup_audptr.c index 4d16d60db9b2..55813f3e40ff 100644 --- a/drivers/accessibility/speakup/speakup_audptr.c +++ b/drivers/accessibility/speakup/speakup_audptr.c @@ -19,15 +19,24 @@ static int synth_probe(struct spk_synth *synth); static void synth_flush(struct spk_synth *synth); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x05[f99]" } }, - { CAPS_STOP, .u.s = {"\x05[f80]" } }, - { RATE, .u.n = {"\x05[r%d]", 10, 0, 20, 100, -10, NULL } }, - { PITCH, .u.n = {"\x05[f%d]", 80, 39, 4500, 0, 0, NULL } }, - { VOL, .u.n = {"\x05[g%d]", 21, 0, 40, 0, 0, NULL } }, - { TONE, .u.n = {"\x05[s%d]", 9, 0, 63, 0, 0, NULL } }, - { PUNCT, .u.n = {"\x05[A%c]", 0, 0, 3, 0, 0, "nmsa" } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, PUNCT_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x05[f99]" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05[f80]" } }, + [RATE_ID] = { RATE, .u.n = {"\x05[r%d]", 10, 0, 20, 100, -10, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x05[f%d]", 80, 39, 4500, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x05[g%d]", 21, 0, 40, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x05[s%d]", 9, 0, 63, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"\x05[A%c]", 0, 0, 3, 0, 0, "nmsa" } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -158,10 +167,25 @@ static int synth_probe(struct spk_synth *synth) module_param_named(ser, synth_audptr.ser, int, 0444); module_param_named(dev, synth_audptr.dev_name, charp, 0444); module_param_named(start, synth_audptr.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_audptr); From 95892c4e70f391955dcd69dd25bd6be8f888590f Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:57 +0500 Subject: [PATCH 2193/4122] Accessiblity: speakup_bns: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_bns module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-5-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_bns.c | 36 ++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_bns.c b/drivers/accessibility/speakup/speakup_bns.c index b8103eb117b8..60507756499c 100644 --- a/drivers/accessibility/speakup/speakup_bns.c +++ b/drivers/accessibility/speakup/speakup_bns.c @@ -16,14 +16,23 @@ #define SYNTH_CLEAR 0x18 #define PROCSPEECH '\r' -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x05\x31\x32P" } }, - { CAPS_STOP, .u.s = {"\x05\x38P" } }, - { RATE, .u.n = {"\x05%dE", 8, 1, 16, 0, 0, NULL } }, - { PITCH, .u.n = {"\x05%dP", 8, 0, 16, 0, 0, NULL } }, - { VOL, .u.n = {"\x05%dV", 8, 0, 16, 0, 0, NULL } }, - { TONE, .u.n = {"\x05%dT", 8, 0, 16, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x05\x31\x32P" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05\x38P" } }, + [RATE_ID] = { RATE, .u.n = {"\x05%dE", 8, 1, 16, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x05%dP", 8, 0, 16, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x05%dV", 8, 0, 16, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x05%dT", 8, 0, 16, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -113,10 +122,21 @@ static struct spk_synth synth_bns = { module_param_named(ser, synth_bns.ser, int, 0444); module_param_named(dev, synth_bns.dev_name, charp, 0444); module_param_named(start, synth_bns.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); module_spk_synth(synth_bns); From f613f00eb046970f20f4abe1ab8b26c1424a4f5a Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:58 +0500 Subject: [PATCH 2194/4122] Accessiblity: speakup_decext: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding a default variables to the speakup_decext module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-6-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_decext.c | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_decext.c b/drivers/accessibility/speakup/speakup_decext.c index eaebf62300a4..271bcf279bf9 100644 --- a/drivers/accessibility/speakup/speakup_decext.c +++ b/drivers/accessibility/speakup/speakup_decext.c @@ -38,16 +38,25 @@ static void synth_flush(struct spk_synth *synth); static int in_escape; -static struct var_t vars[] = { - { CAPS_START, .u.s = {"[:dv ap 222]" } }, - { CAPS_STOP, .u.s = {"[:dv ap 100]" } }, - { RATE, .u.n = {"[:ra %d]", 7, 0, 9, 150, 25, NULL } }, - { PITCH, .u.n = {"[:dv ap %d]", 100, 0, 100, 0, 0, NULL } }, - { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, - { VOL, .u.n = {"[:dv gv %d]", 13, 0, 16, 0, 5, NULL } }, - { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } }, - { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, INFLECTION_ID, + VOL_ID, PUNCT_ID, VOICE_ID, + DIRECT_ID, V_LAST_ID, + NB_ID, +}; + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 222]" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100]" } }, + [RATE_ID] = { RATE, .u.n = {"[:ra %d]", 7, 0, 9, 150, 25, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d]", 100, 0, 100, 0, 0, NULL } }, + [INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"[:dv gv %d]", 13, 0, 16, 0, 5, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } }, + [VOICE_ID] = { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -225,10 +234,25 @@ static void synth_flush(struct spk_synth *synth) module_param_named(ser, synth_decext.ser, int, 0444); module_param_named(dev, synth_decext.dev_name, charp, 0444); module_param_named(start, synth_decext.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(inflection, "Set the inflection variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); module_spk_synth(synth_decext); From b75cfeb116de5894a2e7cc6f8bcf53c4910a7c6f Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:50:59 +0500 Subject: [PATCH 2195/4122] Accessiblity: speakup_decpc: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_decpc module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-7-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_decpc.c | 48 +++++++++++++++---- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_decpc.c b/drivers/accessibility/speakup/speakup_decpc.c index dec314dee214..083ca9265805 100644 --- a/drivers/accessibility/speakup/speakup_decpc.c +++ b/drivers/accessibility/speakup/speakup_decpc.c @@ -134,16 +134,27 @@ static int synth_portlist[] = { 0x340, 0x350, 0x240, 0x250, 0 }; static int in_escape, is_flushing; static int dt_stat, dma_state; -static struct var_t vars[] = { - { CAPS_START, .u.s = {"[:dv ap 200]" } }, - { CAPS_STOP, .u.s = {"[:dv ap 100]" } }, - { RATE, .u.n = {"[:ra %d]", 9, 0, 18, 150, 25, NULL } }, - { PITCH, .u.n = {"[:dv ap %d]", 80, 0, 100, 20, 0, NULL } }, - { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, - { VOL, .u.n = {"[:vo se %d]", 5, 0, 9, 5, 10, NULL } }, - { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } }, - { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, INFLECTION_ID, + VOL_ID, PUNCT_ID, VOICE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID, +}; + + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 200]" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100]" } }, + [RATE_ID] = { RATE, .u.n = {"[:ra %d]", 9, 0, 18, 150, 25, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d]", 80, 0, 100, 20, 0, NULL } }, + [INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"[:vo se %d]", 5, 0, 9, 5, 10, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } }, + [VOICE_ID] = { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -483,8 +494,25 @@ static void dtpc_release(struct spk_synth *synth) } module_param_named(start, synth_dec_pc.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + + MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(inflection, "Set the inflection variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); module_spk_synth(synth_dec_pc); From 44d3e977dd361f4491bd5adc31f32ca13243703b Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:00 +0500 Subject: [PATCH 2196/4122] Accessiblity: speakup_dectlk: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding a default variables to the speakup_dectlk module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-8-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_dectlk.c | 45 ++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c index 2a7e8d727904..56334405d865 100644 --- a/drivers/accessibility/speakup/speakup_dectlk.c +++ b/drivers/accessibility/speakup/speakup_dectlk.c @@ -40,16 +40,24 @@ static int is_flushing; static DEFINE_SPINLOCK(flush_lock); static DECLARE_WAIT_QUEUE_HEAD(flush); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"[:dv ap 160] " } }, - { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } }, - { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } }, - { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } }, - { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, - { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } }, - { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } }, - { VOICE, .u.n = {"[:n%c] ", 0, 0, 9, 0, 0, "phfdburwkv" } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, INFLECTION_ID, + VOL_ID, PUNCT_ID, VOICE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID, +}; + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 160] " } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } }, + [RATE_ID] = { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } }, + [INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } }, + [VOICE_ID] = { VOICE, .u.n = {"[:n%c] ", 0, 0, 9, 0, 0, "phfdburwkv" } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -306,10 +314,27 @@ static void synth_flush(struct spk_synth *synth) module_param_named(ser, synth_dectlk.ser, int, 0444); module_param_named(dev, synth_dectlk.dev_name, charp, 0444); module_param_named(start, synth_dectlk.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(inflection, "Set the inflection variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_dectlk); From 251ca7da3087361d25bfd05e23ebd711e82ccf20 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:01 +0500 Subject: [PATCH 2197/4122] Accessiblity: speakup_dtlk: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_dtlk module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-9-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_dtlk.c | 50 +++++++++++++++----- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c index 6f01e010aaf4..fa826568937b 100644 --- a/drivers/accessibility/speakup/speakup_dtlk.c +++ b/drivers/accessibility/speakup/speakup_dtlk.c @@ -37,17 +37,27 @@ static unsigned int synth_portlist[] = { static u_char synth_status; -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x01+35p" } }, - { CAPS_STOP, .u.s = {"\x01-35p" } }, - { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } }, - { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, - { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } }, - { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, - { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, PUNCT_ID, + VOICE_ID, FREQUENCY_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID, +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+35p" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x01-35p" } }, + [RATE_ID] = { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } }, + [VOICE_ID] = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, + [FREQUENCY_ID] = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -376,9 +386,27 @@ static void dtlk_release(struct spk_synth *synth) module_param_hw_named(port, port_forced, int, ioport, 0444); module_param_named(start, synth_dtlk.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing)."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(frequency, "Set the frequency variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_dtlk); From 81188dd195788a1b314570c5d901a88de2179cf4 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:02 +0500 Subject: [PATCH 2198/4122] Accessiblity: speakup_dummy: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_dummy module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-10-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_dummy.c | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_dummy.c b/drivers/accessibility/speakup/speakup_dummy.c index 56419dbb28d3..52b2c5d44576 100644 --- a/drivers/accessibility/speakup/speakup_dummy.c +++ b/drivers/accessibility/speakup/speakup_dummy.c @@ -18,17 +18,30 @@ #define DRV_VERSION "2.11" #define SYNTH_CLEAR '!' -static struct var_t vars[] = { - { CAPS_START, .u.s = {"CAPS_START\n" } }, - { CAPS_STOP, .u.s = {"CAPS_STOP\n" } }, - { PAUSE, .u.s = {"PAUSE\n"} }, - { RATE, .u.n = {"RATE %d\n", 8, 1, 16, 0, 0, NULL } }, - { PITCH, .u.n = {"PITCH %d\n", 8, 0, 16, 0, 0, NULL } }, - { INFLECTION, .u.n = {"INFLECTION %d\n", 8, 0, 16, 0, 0, NULL } }, - { VOL, .u.n = {"VOL %d\n", 8, 0, 16, 0, 0, NULL } }, - { TONE, .u.n = {"TONE %d\n", 8, 0, 16, 0, 0, NULL } }, - { PUNCT, .u.n = {"PUNCT %d\n", 0, 0, 3, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + PAUSE_ID, + RATE_ID, PITCH_ID, INFLECTION_ID, + VOL_ID, TONE_ID, PUNCT_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"CAPS_START\n" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"CAPS_STOP\n" } }, + [PAUSE_ID] = { PAUSE, .u.s = {"PAUSE\n"} }, + [RATE_ID] = { RATE, .u.n = {"RATE %d\n", 8, 1, 16, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"PITCH %d\n", 8, 0, 16, 0, 0, NULL } }, + [INFLECTION_ID] = { INFLECTION, .u.n = {"INFLECTION %d\n", 8, 0, 16, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"VOL %d\n", 8, 0, 16, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"TONE %d\n", 8, 0, 16, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"PUNCT %d\n", 0, 0, 3, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -129,10 +142,28 @@ static struct spk_synth synth_dummy = { module_param_named(ser, synth_dummy.ser, int, 0444); module_param_named(dev, synth_dummy.dev_name, charp, 0444); module_param_named(start, synth_dummy.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(inflection, "Set the inflection variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_dummy); From 031c122f8950f939b715781dadea0b7659f1ea3b Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:03 +0500 Subject: [PATCH 2199/4122] Accessiblity: speakup_keypc: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_keypc module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-11-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_keypc.c | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c index f61b62f1ea4d..9356f6379560 100644 --- a/drivers/accessibility/speakup/speakup_keypc.c +++ b/drivers/accessibility/speakup/speakup_keypc.c @@ -33,12 +33,21 @@ static int synth_port; static int port_forced; static unsigned int synth_portlist[] = { 0x2a8, 0 }; -static struct var_t vars[] = { - { CAPS_START, .u.s = {"[f130]" } }, - { CAPS_STOP, .u.s = {"[f90]" } }, - { RATE, .u.n = {"\04%c ", 8, 0, 10, 81, -8, NULL } }, - { PITCH, .u.n = {"[f%d]", 5, 0, 9, 40, 10, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"[f130]" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[f90]" } }, + [RATE_ID] = { RATE, .u.n = {"\04%c ", 8, 0, 10, 81, -8, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"[f%d]", 5, 0, 9, 40, 10, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -305,9 +314,17 @@ static void keynote_release(struct spk_synth *synth) module_param_hw_named(port, port_forced, int, ioport, 0444); module_param_named(start, synth_keypc.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing)."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + + module_spk_synth(synth_keypc); From 88dce45b3282647ac0b2916d4820956cd239a1e9 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:04 +0500 Subject: [PATCH 2200/4122] Accessiblity: speakup_ltlk: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_ltlk module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-12-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_ltlk.c | 53 ++++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_ltlk.c b/drivers/accessibility/speakup/speakup_ltlk.c index f885cfaa27c8..1e279ae143bf 100644 --- a/drivers/accessibility/speakup/speakup_ltlk.c +++ b/drivers/accessibility/speakup/speakup_ltlk.c @@ -18,17 +18,28 @@ static int synth_probe(struct spk_synth *synth); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x01+35p" } }, - { CAPS_STOP, .u.s = {"\x01-35p" } }, - { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } }, - { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, - { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } }, - { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, - { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, PUNCT_ID, + VOICE_ID, FREQUENCY_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+35p" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x01-35p" } }, + [RATE_ID] = { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } }, + [VOICE_ID] = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } }, + [FREQUENCY_ID] = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -160,10 +171,30 @@ static int synth_probe(struct spk_synth *synth) module_param_named(ser, synth_ltlk.ser, int, 0444); module_param_named(dev, synth_ltlk.dev_name, charp, 0444); module_param_named(start, synth_ltlk.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444); +module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(voice, "Set the voice variable on load."); +MODULE_PARM_DESC(frequency, "Set the frequency variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_ltlk); From 9744f41fe0879eb971a5cab950f7022ac49c0a92 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:05 +0500 Subject: [PATCH 2201/4122] Accessiblity: speakup_spkout: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_spkout module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-13-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_spkout.c | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_spkout.c b/drivers/accessibility/speakup/speakup_spkout.c index 5e3bb3aa98b6..d3f26095b0ee 100644 --- a/drivers/accessibility/speakup/speakup_spkout.c +++ b/drivers/accessibility/speakup/speakup_spkout.c @@ -18,15 +18,26 @@ static void synth_flush(struct spk_synth *synth); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x05P+" } }, - { CAPS_STOP, .u.s = {"\x05P-" } }, - { RATE, .u.n = {"\x05R%d", 7, 0, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"\x05P%d", 3, 0, 9, 0, 0, NULL } }, - { VOL, .u.n = {"\x05V%d", 9, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\x05T%c", 8, 0, 25, 65, 0, NULL } }, - { PUNCT, .u.n = {"\x05M%c", 0, 0, 3, 0, 0, "nsma" } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, PUNCT_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x05P+" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05P-" } }, + [RATE_ID] = { RATE, .u.n = {"\x05R%d", 7, 0, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x05P%d", 3, 0, 9, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x05V%d", 9, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x05T%c", 8, 0, 25, 65, 0, NULL } }, + [PUNCT_ID] = { PUNCT, .u.n = {"\x05M%c", 0, 0, 3, 0, 0, "nsma" } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -124,10 +135,24 @@ static void synth_flush(struct spk_synth *synth) module_param_named(ser, synth_spkout.ser, int, 0444); module_param_named(dev, synth_spkout.dev_name, charp, 0444); module_param_named(start, synth_spkout.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(punct, "Set the punct variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + + module_spk_synth(synth_spkout); From dfa6c10c89f8e9d001d649201bd5a5d821c23c36 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:06 +0500 Subject: [PATCH 2202/4122] Accessiblity: speakup_txprt: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding a default variables to the speakup_txprt module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Link: https://lore.kernel.org/r/20221109215108.7933-14-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/speakup_txprt.c | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_txprt.c b/drivers/accessibility/speakup/speakup_txprt.c index 9e781347f7eb..4d0a0d4c41f0 100644 --- a/drivers/accessibility/speakup/speakup_txprt.c +++ b/drivers/accessibility/speakup/speakup_txprt.c @@ -16,14 +16,29 @@ #define SYNTH_CLEAR 0x18 #define PROCSPEECH '\r' /* process speech char */ -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\x05P8" } }, - { CAPS_STOP, .u.s = {"\x05P5" } }, - { RATE, .u.n = {"\x05R%d", 5, 0, 9, 0, 0, NULL } }, - { PITCH, .u.n = {"\x05P%d", 5, 0, 9, 0, 0, NULL } }, - { VOL, .u.n = {"\x05V%d", 5, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\x05T%c", 12, 0, 25, 61, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + + + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + + + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\x05P8" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05P5" } }, + [RATE_ID] = { RATE, .u.n = {"\x05R%d", 5, 0, 9, 0, 0, NULL } }, + [PITCH_ID] = { PITCH, .u.n = {"\x05P%d", 5, 0, 9, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\x05V%d", 5, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\x05T%c", 12, 0, 25, 61, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -112,10 +127,24 @@ static struct spk_synth synth_txprt = { module_param_named(ser, synth_txprt.ser, int, 0444); module_param_named(dev, synth_txprt.dev_name, charp, 0444); module_param_named(start, synth_txprt.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + + module_spk_synth(synth_txprt); From cf0b4652d6acaed768b81b153872476a10b7db3b Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:07 +0500 Subject: [PATCH 2203/4122] Accessiblity: speakup_acntpc: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_acntpc module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-15-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_acntpc.c | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c index a55b60754eb1..a27e6bbf05da 100644 --- a/drivers/accessibility/speakup/speakup_acntpc.c +++ b/drivers/accessibility/speakup/speakup_acntpc.c @@ -34,14 +34,23 @@ static int synth_port_control; static int port_forced; static unsigned int synth_portlist[] = { 0x2a8, 0 }; -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\033P8" } }, - { CAPS_STOP, .u.s = {"\033P5" } }, - { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } }, - { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } }, - { VOL, .u.n = {"\033A%d", 5, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\033P8" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\033P5" } }, + [RATE_ID] = { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } }, + [PITCH_ID] = { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\033A%d", 5, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -305,9 +314,22 @@ static void accent_release(struct spk_synth *synth) module_param_hw_named(port, port_forced, int, ioport, 0444); module_param_named(start, synth_acntpc.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); + + MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing)."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_acntpc); From a606dd6253b8de8dc81a1112ab702edfe7a98d72 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 10 Nov 2022 02:51:08 +0500 Subject: [PATCH 2204/4122] Accessiblity: speakup_acntsa: specifying the default driver parameters among the module params This is an enhancement which allows to specify the default driver parameters among the module parameters. Adding default variables to the speakup_acntsa module allows to easily set that at boot, rather than setting the sys variables after boot. More details can be found here: https://github.com/linux-speakup/speakup/issues/7 Signed-off-by: Osama Muhammad Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221109215108.7933-16-osmtendev@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../accessibility/speakup/speakup_acntsa.c | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/accessibility/speakup/speakup_acntsa.c b/drivers/accessibility/speakup/speakup_acntsa.c index 2697c51ed6b5..26bb9f9399d3 100644 --- a/drivers/accessibility/speakup/speakup_acntsa.c +++ b/drivers/accessibility/speakup/speakup_acntsa.c @@ -19,14 +19,24 @@ static int synth_probe(struct spk_synth *synth); -static struct var_t vars[] = { - { CAPS_START, .u.s = {"\033P8" } }, - { CAPS_STOP, .u.s = {"\033P5" } }, - { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } }, - { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } }, - { VOL, .u.n = {"\033A%d", 9, 0, 9, 0, 0, NULL } }, - { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } }, - { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, + +enum default_vars_id { + CAPS_START_ID = 0, CAPS_STOP_ID, + RATE_ID, PITCH_ID, + VOL_ID, TONE_ID, + DIRECT_ID, V_LAST_VAR_ID, + NB_ID +}; + + +static struct var_t vars[NB_ID] = { + [CAPS_START_ID] = { CAPS_START, .u.s = {"\033P8" } }, + [CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\033P5" } }, + [RATE_ID] = { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } }, + [PITCH_ID] = { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } }, + [VOL_ID] = { VOL, .u.n = {"\033A%d", 9, 0, 9, 0, 0, NULL } }, + [TONE_ID] = { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } }, + [DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } }, V_LAST_VAR }; @@ -129,10 +139,21 @@ static int synth_probe(struct spk_synth *synth) module_param_named(ser, synth_acntsa.ser, int, 0444); module_param_named(dev, synth_acntsa.dev_name, charp, 0444); module_param_named(start, synth_acntsa.startup, short, 0444); +module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444); +module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444); +module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444); +module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444); +module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444); MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based)."); MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer."); MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded."); +MODULE_PARM_DESC(rate, "Set the rate variable on load."); +MODULE_PARM_DESC(pitch, "Set the pitch variable on load."); +MODULE_PARM_DESC(vol, "Set the vol variable on load."); +MODULE_PARM_DESC(tone, "Set the tone variable on load."); +MODULE_PARM_DESC(direct, "Set the direct variable on load."); + module_spk_synth(synth_acntsa); From f43241aafedb1c7d72825cea1a521cef183ee61d Mon Sep 17 00:00:00 2001 From: Mushahid Hussain Date: Tue, 15 Nov 2022 15:05:29 +0500 Subject: [PATCH 2205/4122] accessibility: speakup: Specify spk_vars among module parameters This is an enhancement which allows setting default variables for speakup module at the boot rather than setting the sys variables after the boot. Signed-off-by: Mushahid Hussain Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221115100530.91174-2-mushi.shar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 60 ++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 73db0cb44fc7..1df9a27a5214 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -1268,20 +1268,28 @@ int spk_set_key_info(const u_char *key_info, u_char *k_buffer) return 0; } -static struct var_t spk_vars[] = { +enum spk_vars_id { + BELL_POS_ID = 0, SPELL_DELAY_ID, ATTRIB_BLEEP_ID, + BLEEPS_ID, BLEEP_TIME_ID, PUNC_LEVEL_ID, + READING_PUNC_ID, CURSOR_TIME_ID, SAY_CONTROL_ID, + SAY_WORD_CTL_ID, NO_INTERRUPT_ID, KEY_ECHO_ID, + V_LAST_VAR_ID, NB_ID +}; + +static struct var_t spk_vars[NB_ID] = { /* bell must be first to set high limit */ - {BELL_POS, .u.n = {NULL, 0, 0, 0, 0, 0, NULL} }, - {SPELL_DELAY, .u.n = {NULL, 0, 0, 4, 0, 0, NULL} }, - {ATTRIB_BLEEP, .u.n = {NULL, 1, 0, 3, 0, 0, NULL} }, - {BLEEPS, .u.n = {NULL, 3, 0, 3, 0, 0, NULL} }, - {BLEEP_TIME, .u.n = {NULL, 30, 1, 200, 0, 0, NULL} }, - {PUNC_LEVEL, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} }, - {READING_PUNC, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} }, - {CURSOR_TIME, .u.n = {NULL, 120, 50, 600, 0, 0, NULL} }, - {SAY_CONTROL, TOGGLE_0}, - {SAY_WORD_CTL, TOGGLE_0}, - {NO_INTERRUPT, TOGGLE_0}, - {KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} }, + [BELL_POS_ID] = { BELL_POS, .u.n = {NULL, 0, 0, 0, 0, 0, NULL} }, + [SPELL_DELAY_ID] = { SPELL_DELAY, .u.n = {NULL, 0, 0, 4, 0, 0, NULL} }, + [ATTRIB_BLEEP_ID] = { ATTRIB_BLEEP, .u.n = {NULL, 1, 0, 3, 0, 0, NULL} }, + [BLEEPS_ID] = { BLEEPS, .u.n = {NULL, 3, 0, 3, 0, 0, NULL} }, + [BLEEP_TIME_ID] = { BLEEP_TIME, .u.n = {NULL, 30, 1, 200, 0, 0, NULL} }, + [PUNC_LEVEL_ID] = { PUNC_LEVEL, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} }, + [READING_PUNC_ID] = { READING_PUNC, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} }, + [CURSOR_TIME_ID] = { CURSOR_TIME, .u.n = {NULL, 120, 50, 600, 0, 0, NULL} }, + [SAY_CONTROL_ID] { SAY_CONTROL, TOGGLE_0}, + [SAY_WORD_CTL_ID] = {SAY_WORD_CTL, TOGGLE_0}, + [NO_INTERRUPT_ID] = { NO_INTERRUPT, TOGGLE_0}, + [KEY_ECHO_ID] = { KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} }, V_LAST_VAR }; @@ -2453,5 +2461,31 @@ out: return err; } +module_param_named(bell_pos, spk_vars[BELL_POS_ID].u.n.default_val, int, 0444); +module_param_named(spell_delay, spk_vars[SPELL_DELAY_ID].u.n.default_val, int, 0444); +module_param_named(attrib_bleep, spk_vars[ATTRIB_BLEEP_ID].u.n.default_val, int, 0444); +module_param_named(bleeps, spk_vars[BLEEPS_ID].u.n.default_val, int, 0444); +module_param_named(bleep_time, spk_vars[BLEEP_TIME_ID].u.n.default_val, int, 0444); +module_param_named(punc_level, spk_vars[PUNC_LEVEL_ID].u.n.default_val, int, 0444); +module_param_named(reading_punc, spk_vars[READING_PUNC_ID].u.n.default_val, int, 0444); +module_param_named(cursor_time, spk_vars[CURSOR_TIME_ID].u.n.default_val, int, 0444); +module_param_named(say_control, spk_vars[SAY_CONTROL_ID].u.n.default_val, int, 0444); +module_param_named(say_word_ctl, spk_vars[SAY_WORD_CTL_ID].u.n.default_val, int, 0444); +module_param_named(no_interrupt, spk_vars[NO_INTERRUPT_ID].u.n.default_val, int, 0444); +module_param_named(key_echo, spk_vars[KEY_ECHO_ID].u.n.default_val, int, 0444); + +MODULE_PARM_DESC(bell_pos, "This works much like a typewriter bell. If for example 72 is echoed to bell_pos, it will beep the PC speaker when typing on a line past character 72."); +MODULE_PARM_DESC(spell_delay, "This controls how fast a word is spelled when speakup's spell word review command is pressed."); +MODULE_PARM_DESC(attrib_bleep, "Beeps the PC speaker when there is an attribute change such as background color when using speakup review commands. One = on, zero = off."); +MODULE_PARM_DESC(bleeps, "This controls whether one hears beeps through the PC speaker when using speakup review commands."); +MODULE_PARM_DESC(bleep_time, "This controls the duration of the PC speaker beeps speakup produces."); +MODULE_PARM_DESC(punc_level, "Controls the level of punctuation spoken as the screen is displayed, not reviewed."); +MODULE_PARM_DESC(reading_punc, "It controls the level of punctuation when reviewing the screen with speakup's screen review commands."); +MODULE_PARM_DESC(cursor_time, "This controls cursor delay when using arrow keys."); +MODULE_PARM_DESC(say_control, "This controls if speakup speaks shift, alt and control when those keys are pressed or not."); +MODULE_PARM_DESC(say_word_ctl, "Sets thw say_word_ctl on load."); +MODULE_PARM_DESC(no_interrupt, "Controls if typing interrupts output from speakup."); +MODULE_PARM_DESC(key_echo, "Controls if speakup speaks keys when they are typed. One = on zero = off or don't echo keys."); + module_init(speakup_init); module_exit(speakup_exit); From 72b8ec15f97bbabbb2379c87cb5f7a9dc63f0d88 Mon Sep 17 00:00:00 2001 From: Mushahid Hussain Date: Tue, 15 Nov 2022 15:05:30 +0500 Subject: [PATCH 2206/4122] accessibility: speakup: phonetic spelling while arrowing letter by letter This patch includes an enhancement requested frequently on the mailing list.[1][2] It adds a variable, cur_phonetic in the spk_vars, which can be set as a module parameter, as well as in /sys/speakup/cur_phonetic. This patch also documents cur_phonetic as a sysfs attribute in sysfs-driver-speakup. When cur_phonetic=1, it causes speakup to speak letters phonetically if paused on the character while arrowing through a word. When a user does not set cur_phonetic to any value, the default value for it would be 0. [1]: https://github.com/linux-speakup/speakup/issues/6 [2]: https://github.com/linux-speakup/speakup/issues/5 since V1: - removed unnecessary lines Signed-off-by: Mushahid Hussain Reviewed-by: Samuel Thibault Link: https://lore.kernel.org/r/20221115100530.91174-3-mushi.shar@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/stable/sysfs-driver-speakup | 9 +++++++++ drivers/accessibility/speakup/kobjects.c | 3 +++ drivers/accessibility/speakup/main.c | 14 +++++++++++--- drivers/accessibility/speakup/speakup.h | 1 + drivers/accessibility/speakup/spk_types.h | 2 +- drivers/accessibility/speakup/varhandlers.c | 1 + 6 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-speakup b/Documentation/ABI/stable/sysfs-driver-speakup index dc2a6ba1674b..bcb6831aa114 100644 --- a/Documentation/ABI/stable/sysfs-driver-speakup +++ b/Documentation/ABI/stable/sysfs-driver-speakup @@ -35,6 +35,15 @@ Description: This controls cursor delay when using arrow keys. When a characters. Set this to a higher value to adjust for the delay and better synchronisation between cursor position and speech. +What: /sys/accessibility/speakup/cur_phonetic +KernelVersion: 6.2 +Contact: speakup@linux-speakup.org +Description: This allows speakup to speak letters phoneticaly when arrowing through + a word letter by letter. This doesn't affect the spelling when typing + the characters. When cur_phonetic=1, speakup will speak characters + phoneticaly when arrowing over a letter. When cur_phonetic=0, speakup + will speak letters as normally. + What: /sys/accessibility/speakup/delimiters KernelVersion: 2.6 Contact: speakup@linux-speakup.org diff --git a/drivers/accessibility/speakup/kobjects.c b/drivers/accessibility/speakup/kobjects.c index 41ae24ab5d08..a7522d409802 100644 --- a/drivers/accessibility/speakup/kobjects.c +++ b/drivers/accessibility/speakup/kobjects.c @@ -914,6 +914,8 @@ static struct kobj_attribute say_word_ctl_attribute = __ATTR(say_word_ctl, 0644, spk_var_show, spk_var_store); static struct kobj_attribute spell_delay_attribute = __ATTR(spell_delay, 0644, spk_var_show, spk_var_store); +static struct kobj_attribute cur_phonetic_attribute = + __ATTR(cur_phonetic, 0644, spk_var_show, spk_var_store); /* * These attributes are i18n related. @@ -967,6 +969,7 @@ static struct attribute *main_attrs[] = { &say_control_attribute.attr, &say_word_ctl_attribute.attr, &spell_delay_attribute.attr, + &cur_phonetic_attribute.attr, NULL, }; diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 1df9a27a5214..4733fd6334ab 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -65,6 +65,7 @@ int spk_key_echo, spk_say_word_ctl; int spk_say_ctrl, spk_bell_pos; short spk_punc_mask; int spk_punc_level, spk_reading_punc; +int spk_cur_phonetic; char spk_str_caps_start[MAXVARLEN + 1] = "\0"; char spk_str_caps_stop[MAXVARLEN + 1] = "\0"; char spk_str_pause[MAXVARLEN + 1] = "\0"; @@ -1273,7 +1274,7 @@ enum spk_vars_id { BLEEPS_ID, BLEEP_TIME_ID, PUNC_LEVEL_ID, READING_PUNC_ID, CURSOR_TIME_ID, SAY_CONTROL_ID, SAY_WORD_CTL_ID, NO_INTERRUPT_ID, KEY_ECHO_ID, - V_LAST_VAR_ID, NB_ID + CUR_PHONETIC_ID, V_LAST_VAR_ID, NB_ID }; static struct var_t spk_vars[NB_ID] = { @@ -1290,6 +1291,7 @@ static struct var_t spk_vars[NB_ID] = { [SAY_WORD_CTL_ID] = {SAY_WORD_CTL, TOGGLE_0}, [NO_INTERRUPT_ID] = { NO_INTERRUPT, TOGGLE_0}, [KEY_ECHO_ID] = { KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} }, + [CUR_PHONETIC_ID] = { CUR_PHONETIC, .u.n = {NULL, 0, 0, 1, 0, 0, NULL} }, V_LAST_VAR }; @@ -1720,8 +1722,12 @@ static void cursor_done(struct timer_list *unused) speakup_win_say(vc); else if (is_cursor == 1 || is_cursor == 4) say_line_from_to(vc, 0, vc->vc_cols, 0); - else - say_char(vc); + else { + if (spk_cur_phonetic == 1) + say_phonetic_char(vc); + else + say_char(vc); + } spk_keydown = 0; is_cursor = 0; out: @@ -2473,6 +2479,7 @@ module_param_named(say_control, spk_vars[SAY_CONTROL_ID].u.n.default_val, int, 0 module_param_named(say_word_ctl, spk_vars[SAY_WORD_CTL_ID].u.n.default_val, int, 0444); module_param_named(no_interrupt, spk_vars[NO_INTERRUPT_ID].u.n.default_val, int, 0444); module_param_named(key_echo, spk_vars[KEY_ECHO_ID].u.n.default_val, int, 0444); +module_param_named(cur_phonetic, spk_vars[CUR_PHONETIC_ID].u.n.default_val, int, 0444); MODULE_PARM_DESC(bell_pos, "This works much like a typewriter bell. If for example 72 is echoed to bell_pos, it will beep the PC speaker when typing on a line past character 72."); MODULE_PARM_DESC(spell_delay, "This controls how fast a word is spelled when speakup's spell word review command is pressed."); @@ -2486,6 +2493,7 @@ MODULE_PARM_DESC(say_control, "This controls if speakup speaks shift, alt and co MODULE_PARM_DESC(say_word_ctl, "Sets thw say_word_ctl on load."); MODULE_PARM_DESC(no_interrupt, "Controls if typing interrupts output from speakup."); MODULE_PARM_DESC(key_echo, "Controls if speakup speaks keys when they are typed. One = on zero = off or don't echo keys."); +MODULE_PARM_DESC(cur_phonetic, "Controls if speakup speaks letters phonetically during navigation. One = on zero = off or don't speak phonetically."); module_init(speakup_init); module_exit(speakup_exit); diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h index 33594f5a7983..364fde99749e 100644 --- a/drivers/accessibility/speakup/speakup.h +++ b/drivers/accessibility/speakup/speakup.h @@ -105,6 +105,7 @@ extern int spk_no_intr, spk_say_ctrl, spk_say_word_ctl, spk_punc_level; extern int spk_reading_punc, spk_attrib_bleep, spk_bleeps; extern int spk_bleep_time, spk_bell_pos; extern int spk_spell_delay, spk_key_echo; +extern int spk_cur_phonetic; extern short spk_punc_mask; extern short spk_pitch_shift, synth_flags; extern bool spk_quiet_boot; diff --git a/drivers/accessibility/speakup/spk_types.h b/drivers/accessibility/speakup/spk_types.h index 3a14d39bf896..08011518a28a 100644 --- a/drivers/accessibility/speakup/spk_types.h +++ b/drivers/accessibility/speakup/spk_types.h @@ -49,7 +49,7 @@ enum var_id_t { RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG, DIRECT, PAUSE, CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, FLUSH, - MAXVARS + CUR_PHONETIC, MAXVARS }; typedef int (*special_func)(struct vc_data *vc, u_char type, u_char ch, diff --git a/drivers/accessibility/speakup/varhandlers.c b/drivers/accessibility/speakup/varhandlers.c index e1c9f42e39f0..462f8d879053 100644 --- a/drivers/accessibility/speakup/varhandlers.c +++ b/drivers/accessibility/speakup/varhandlers.c @@ -48,6 +48,7 @@ static struct st_var_header var_headers[] = { { "chartab", CHARTAB, VAR_PROC, NULL, NULL }, { "direct", DIRECT, VAR_NUM, NULL, NULL }, { "pause", PAUSE, VAR_STRING, spk_str_pause, NULL }, + { "cur_phonetic", CUR_PHONETIC, VAR_NUM, &spk_cur_phonetic, NULL }, }; static struct st_var_header *var_ptrs[MAXVARS] = { NULL, NULL, NULL }; From 7610615e8cdb3f6f5bbd9d8e7a5d8a63e3cabf2e Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 19 Nov 2022 11:57:21 +0800 Subject: [PATCH 2207/4122] test_firmware: fix memory leak in test_firmware_init() When misc_register() failed in test_firmware_init(), the memory pointed by test_fw_config->name is not released. The memory leak information is as follows: unreferenced object 0xffff88810a34cb00 (size 32): comm "insmod", pid 7952, jiffies 4294948236 (age 49.060s) hex dump (first 32 bytes): 74 65 73 74 2d 66 69 72 6d 77 61 72 65 2e 62 69 test-firmware.bi 6e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 n............... backtrace: [] __kmalloc_node_track_caller+0x4b/0xc0 [] kstrndup+0x46/0xc0 [] __test_firmware_config_init+0x29/0x380 [test_firmware] [] 0xffffffffa040f068 [] do_one_initcall+0x141/0x780 [] do_init_module+0x1c3/0x630 [] load_module+0x623e/0x76a0 [] __do_sys_finit_module+0x181/0x240 [] do_syscall_64+0x39/0xb0 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests") Signed-off-by: Zhengchao Shao Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20221119035721.18268-1-shaozhengchao@huawei.com Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 0c714cdd51ef..e207bc08820d 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -1491,6 +1491,7 @@ static int __init test_firmware_init(void) rc = misc_register(&test_fw_misc_device); if (rc) { + __test_firmware_config_free(); kfree(test_fw_config); pr_err("could not register misc device: %d\n", rc); return rc; From a4cb1004aeed2ab893a058fad00a5b41a12c4691 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:59:29 +0800 Subject: [PATCH 2208/4122] misc: ocxl: fix possible name leak in ocxl_file_register_afu() If device_register() returns error in ocxl_file_register_afu(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and info is freed in info_release(). Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Link: https://lore.kernel.org/r/20221111145929.2429271-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ocxl/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index d46dba2df5a1..452d5777a0e4 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -541,8 +541,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) goto err_put; rc = device_register(&info->dev); - if (rc) - goto err_put; + if (rc) { + free_minor(info); + put_device(&info->dev); + return rc; + } rc = ocxl_sysfs_register_afu(info); if (rc) From 27158c72678b39ee01cc01de1aba6b51c71abe2f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 23:43:39 +0800 Subject: [PATCH 2209/4122] ocxl: fix pci device refcount leak when calling get_function_0() get_function_0() calls pci_get_domain_bus_and_slot(), as comment says, it returns a pci device with refcount increment, so after using it, pci_dev_put() needs be called. Get the device reference when get_function_0() is not called, so pci_dev_put() can be called in the error path and callers unconditionally. And add comment above get_dvsec_vendor0() to tell callers to call pci_dev_put(). Fixes: 87db7579ebd5 ("ocxl: control via sysfs whether the FPGA is reloaded on a link reset") Suggested-by: Andrew Donnellan Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Link: https://lore.kernel.org/r/20221121154339.4088935-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ocxl/config.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index e401a51596b9..92ab49705f64 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -193,6 +193,18 @@ static int read_dvsec_vendor(struct pci_dev *dev) return 0; } +/** + * get_dvsec_vendor0() - Find a related PCI device (function 0) + * @dev: PCI device to match + * @dev0: The PCI device (function 0) found + * @out_pos: The position of PCI device (function 0) + * + * Returns 0 on success, negative on failure. + * + * NOTE: If it's successful, the reference of dev0 is increased, + * so after using it, the callers must call pci_dev_put() to give + * up the reference. + */ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, int *out_pos) { @@ -202,10 +214,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, dev = get_function_0(dev); if (!dev) return -1; + } else { + dev = pci_dev_get(dev); } pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); - if (!pos) + if (!pos) { + pci_dev_put(dev); return -1; + } *dev0 = dev; *out_pos = pos; return 0; @@ -222,6 +238,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val) pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, &reset_reload); + pci_dev_put(dev0); *val = !!(reset_reload & BIT(0)); return 0; } @@ -243,6 +260,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val) reset_reload &= ~BIT(0); pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, reset_reload); + pci_dev_put(dev0); return 0; } From e68a558fb2af06daa38f86dad25061ddd90ab131 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 18 Nov 2022 02:22:06 +0100 Subject: [PATCH 2210/4122] speakup: Fix building as extmod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit spk_priv_keyinfo.h should be opened from the speakup directory. When building as extmod we should thus open it from the module directory rather than the main Linux source. Signed-off-by: Samuel Thibault  Link: https://lore.kernel.org/r/20221118012206.j6hq6b6nfx2jhqg6@begin Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/Makefile | 4 +++- drivers/accessibility/speakup/makemapdata.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/accessibility/speakup/Makefile b/drivers/accessibility/speakup/Makefile index ba69b0803d42..6f6a83565c0d 100644 --- a/drivers/accessibility/speakup/Makefile +++ b/drivers/accessibility/speakup/Makefile @@ -40,7 +40,9 @@ hostprogs += makemapdata makemapdata-objs := makemapdata.o quiet_cmd_mkmap = MKMAP $@ - cmd_mkmap = TOPDIR=$(srctree) $(obj)/makemapdata > $@ + cmd_mkmap = TOPDIR=$(srctree) \ + SPKDIR=$(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD),$(srctree)/drivers/accessibility/speakup) \ + $(obj)/makemapdata > $@ $(obj)/mapdata.h: $(obj)/makemapdata $(call cmd,mkmap) diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c index 81db9ebf1fff..d7d41bb9b05f 100644 --- a/drivers/accessibility/speakup/makemapdata.c +++ b/drivers/accessibility/speakup/makemapdata.c @@ -51,12 +51,15 @@ main(int argc, char *argv[]) { int value, i; struct st_key *this; - const char *dir_name; + const char *dir_name, *spk_dir_name; char *cp; dir_name = getenv("TOPDIR"); if (!dir_name) dir_name = "."; + spk_dir_name = getenv("SPKDIR"); + if (!spk_dir_name) + spk_dir_name = "drivers/accessibility/speakup"; bzero(key_table, sizeof(key_table)); add_key("shift", 1, is_shift); add_key("altgr", 2, is_shift); @@ -83,7 +86,7 @@ main(int argc, char *argv[]) add_key(def_name, value, is_input); } - open_input(dir_name, "drivers/accessibility/speakup/spk_priv_keyinfo.h"); + open_input(spk_dir_name, "spk_priv_keyinfo.h"); while (get_define()) { if (strlen(def_val) > 5) { //if (def_val[0] == '(') From 1dbb4f0235a450f22e518124cbf9b922802ce38f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Nov 2022 18:29:56 +0200 Subject: [PATCH 2211/4122] virt: acrn: Mark the uuid field as unused After the commits for userspace (see Link tags below) the uuid field is not being used in the ACRN code. Update kernel to reflect these changes, i.e. do the following: - adding a comment explaining that it's not used anymore - replacing the specific type by a raw buffer - updating the example code accordingly The advertised field confused users and actually never been used. So the wrong part here is that kernel puts something which userspace never used and hence this may confuse a reader of this code. Note, that there is only a single tool that had been prepared a year ago for these forthcoming changes in the kernel. Link: https://github.com/projectacrn/acrn-hypervisor/commit/da0d24326ed6 Link: https://github.com/projectacrn/acrn-hypervisor/commit/bb0327e70097 Signed-off-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221116162956.72658-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/acrn.h | 5 ++--- samples/acrn/vm-sample.c | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h index ccf47ed92500..7b714c1902eb 100644 --- a/include/uapi/linux/acrn.h +++ b/include/uapi/linux/acrn.h @@ -12,7 +12,6 @@ #define _UAPI_ACRN_H #include -#include #define ACRN_IO_REQUEST_MAX 16 @@ -186,7 +185,7 @@ struct acrn_ioreq_notify { * @reserved0: Reserved and must be 0 * @vcpu_num: Number of vCPU in the VM. Return from hypervisor. * @reserved1: Reserved and must be 0 - * @uuid: UUID of the VM. Pass to hypervisor directly. + * @uuid: Empty space never to be used again (used to be UUID of the VM) * @vm_flag: Flag of the VM creating. Pass to hypervisor directly. * @ioreq_buf: Service VM GPA of I/O request buffer. Pass to * hypervisor directly. @@ -198,7 +197,7 @@ struct acrn_vm_creation { __u16 reserved0; __u16 vcpu_num; __u16 reserved1; - guid_t uuid; + __u8 uuid[16]; __u64 vm_flag; __u64 ioreq_buf; __u64 cpu_affinity; diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c index b2dad47a77a0..7abd68b20153 100644 --- a/samples/acrn/vm-sample.c +++ b/samples/acrn/vm-sample.c @@ -29,8 +29,6 @@ static struct acrn_io_request *io_req_buf = (struct acrn_io_request *)io_request __u16 vcpu_num; __u16 vmid; -/* POST_STANDARD_VM_UUID1, refer to https://github.com/projectacrn/acrn-hypervisor/blob/master/hypervisor/include/common/vm_uuids.h */ -guid_t vm_uuid = GUID_INIT(0x385479d2, 0xd625, 0xe811, 0x86, 0x4e, 0xcb, 0x7a, 0x18, 0xb3, 0x46, 0x43); int hsm_fd; int is_running = 1; @@ -63,7 +61,6 @@ int main(int argc, char **argv) } hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC); - memcpy(&create_vm.uuid, &vm_uuid, 16); create_vm.ioreq_buf = (__u64)io_req_buf; ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_VM, &create_vm); printf("Created VM! [%d]\n", ret); From fd2c930cf6a5b9176382c15f9acb1996e76e25ad Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Thu, 17 Nov 2022 14:47:25 +0800 Subject: [PATCH 2212/4122] misc: tifm: fix possible memory leak in tifm_7xx1_switch_media() If device_register() returns error in tifm_7xx1_switch_media(), name of kobject which is allocated in dev_set_name() called in device_add() is leaked. Never directly free @dev after calling device_register(), even if it returned an error! Always use put_device() to give up the reference initialized. Fixes: 2428a8fe2261 ("tifm: move common device management tasks from tifm_7xx1 to tifm_core") Signed-off-by: ruanjinjie Link: https://lore.kernel.org/r/20221117064725.3478402-1-ruanjinjie@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/tifm_7xx1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c index 017c2f7d6287..7dd86a9858ab 100644 --- a/drivers/misc/tifm_7xx1.c +++ b/drivers/misc/tifm_7xx1.c @@ -190,7 +190,7 @@ static void tifm_7xx1_switch_media(struct work_struct *work) spin_unlock_irqrestore(&fm->lock, flags); } if (sock) - tifm_free_device(&sock->dev); + put_device(&sock->dev); } spin_lock_irqsave(&fm->lock, flags); } From 643a16a0eb1d6ac23744bb6e90a00fc21148a9dc Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 10 Nov 2022 11:50:33 +0800 Subject: [PATCH 2213/4122] misc: sgi-gru: fix use-after-free error in gru_set_context_option, gru_fault and gru_handle_user_call_os In some bad situation, the gts may be freed gru_check_chiplet_assignment. The call chain can be gru_unload_context->gru_free_gru_context->gts_drop and kfree finally. However, the caller didn't know if the gts is freed or not and use it afterwards. This will trigger a Use after Free bug. Fix it by introducing a return value to see if it's in error path or not. Free the gts in caller if gru_check_chiplet_assignment check failed. Fixes: 55484c45dbec ("gru: allow users to specify gru chiplet 2") Signed-off-by: Zheng Wang Acked-by: Dimitri Sivanich Link: https://lore.kernel.org/r/20221110035033.19498-1-zyytlz.wz@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sgi-gru/grufault.c | 13 +++++++++++-- drivers/misc/sgi-gru/grumain.c | 22 ++++++++++++++++++---- drivers/misc/sgi-gru/grutables.h | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index d7ef61e602ed..b836936e9747 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb) if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL; +again: gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; @@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb) if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + goto again; + } /* * CCH may contain stale data if ts_force_cch_reload is set. @@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg) } else { gts->ts_user_blade_id = req.val1; gts->ts_user_chiplet_id = req.val0; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + return ret; + } } break; case sco_gseg_owner: diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 6706ef3c5977..4eb4b9455139 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru, * chiplet. Misassignment can occur if the process migrates to a different * blade or if the user changes the selected blade/chiplet. */ -void gru_check_context_placement(struct gru_thread_state *gts) +int gru_check_context_placement(struct gru_thread_state *gts) { struct gru_state *gru; + int ret = 0; /* * If the current task is the context owner, verify that the @@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts) * references. Pthread apps use non-owner references to the CBRs. */ gru = gts->ts_gru; + /* + * If gru or gts->ts_tgid_owner isn't initialized properly, return + * success to indicate that the caller does not need to unload the + * gru context.The caller is responsible for their inspection and + * reinitialization if needed. + */ if (!gru || gts->ts_tgid_owner != current->tgid) - return; + return ret; if (!gru_check_chiplet_assignment(gru, gts)) { STAT(check_context_unload); - gru_unload_context(gts, 1); + ret = -EINVAL; } else if (gru_retarget_intr(gts)) { STAT(check_context_retarget_intr); } + + return ret; } @@ -934,7 +943,12 @@ again: mutex_lock(>s->ts_ctxlock); preempt_disable(); - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + gru_unload_context(gts, 1); + return VM_FAULT_NOPAGE; + } if (!gts->ts_gru) { STAT(load_user_context); diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 8c52776db234..640daf1994df 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -632,7 +632,7 @@ extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); -extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); From 7198cf0f1ca90581f27452664216a662ad72aed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:48 +0100 Subject: [PATCH 2214/4122] misc: lis3lv02d/lis3lv02d_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-495-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index d7daa01fe7ca..7071412d6bf6 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -100,8 +100,7 @@ static const struct of_device_id lis3lv02d_i2c_dt_ids[] = { MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids); #endif -static int lis3lv02d_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int lis3lv02d_i2c_probe(struct i2c_client *client) { int ret = 0; struct lis3lv02d_platform_data *pdata = client->dev.platform_data; @@ -263,7 +262,7 @@ static struct i2c_driver lis3lv02d_i2c_driver = { .pm = &lis3_pm_ops, .of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids), }, - .probe = lis3lv02d_i2c_probe, + .probe_new = lis3lv02d_i2c_probe, .remove = lis3lv02d_i2c_remove, .id_table = lis3lv02d_id, }; From 59ee8ca4eeda35d850e4c81ec3065dba10023842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:41 +0100 Subject: [PATCH 2215/4122] misc: eeprom/eeprom: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-488-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/eeprom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 8a841a75d893..32611100d5cd 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -141,8 +141,7 @@ static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info) return 0; } -static int eeprom_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int eeprom_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct eeprom_data *data; @@ -197,7 +196,7 @@ static struct i2c_driver eeprom_driver = { .driver = { .name = "eeprom", }, - .probe = eeprom_probe, + .probe_new = eeprom_probe, .remove = eeprom_remove, .id_table = eeprom_id, From 8427bd8bdee8f35797cade56fe173fbea990e38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:49 +0100 Subject: [PATCH 2216/4122] misc: tsl2550: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-496-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/tsl2550.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c index 1652fb9b3856..6c62b94e0acd 100644 --- a/drivers/misc/tsl2550.c +++ b/drivers/misc/tsl2550.c @@ -331,8 +331,7 @@ static int tsl2550_init_client(struct i2c_client *client) */ static struct i2c_driver tsl2550_driver; -static int tsl2550_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tsl2550_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct tsl2550_data *data; @@ -438,7 +437,7 @@ static struct i2c_driver tsl2550_driver = { .of_match_table = tsl2550_of_match, .pm = TSL2550_PM_OPS, }, - .probe = tsl2550_probe, + .probe_new = tsl2550_probe, .remove = tsl2550_remove, .id_table = tsl2550_id, }; From 327e1ad186d91b12b6ece0b21178c07edef01806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:47 +0100 Subject: [PATCH 2217/4122] misc: isl29020: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-494-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/isl29020.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c index c6f2a94f501a..3be02093368c 100644 --- a/drivers/misc/isl29020.c +++ b/drivers/misc/isl29020.c @@ -151,8 +151,7 @@ static int als_set_default_config(struct i2c_client *client) return 0; } -static int isl29020_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29020_probe(struct i2c_client *client) { int res; @@ -215,7 +214,7 @@ static struct i2c_driver isl29020_driver = { .name = "isl29020", .pm = ISL29020_PM_OPS, }, - .probe = isl29020_probe, + .probe_new = isl29020_probe, .remove = isl29020_remove, .id_table = isl29020_id, }; From 99b0cb3f5f8d67f4552c24d9b0aa6cda38f558aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:43 +0100 Subject: [PATCH 2218/4122] misc: eeprom/max6875: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-490-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/max6875.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c index 6bd4f4339af4..79cf8afcef2e 100644 --- a/drivers/misc/eeprom/max6875.c +++ b/drivers/misc/eeprom/max6875.c @@ -130,8 +130,7 @@ static const struct bin_attribute user_eeprom_attr = { .read = max6875_read, }; -static int max6875_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max6875_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct max6875_data *data; @@ -193,7 +192,7 @@ static struct i2c_driver max6875_driver = { .driver = { .name = "max6875", }, - .probe = max6875_probe, + .probe_new = max6875_probe, .remove = max6875_remove, .id_table = max6875_id, }; From 654700c9fc2860d33d57b42fd39cae2310dbc2ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:44 +0100 Subject: [PATCH 2219/4122] misc: hmc6352: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-491-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/hmc6352.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c index 42b9adef28a3..8967940ecd1e 100644 --- a/drivers/misc/hmc6352.c +++ b/drivers/misc/hmc6352.c @@ -101,8 +101,7 @@ static const struct attribute_group m_compass_gr = { .attrs = mid_att_compass }; -static int hmc6352_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hmc6352_probe(struct i2c_client *client) { int res; @@ -132,7 +131,7 @@ static struct i2c_driver hmc6352_driver = { .driver = { .name = "hmc6352", }, - .probe = hmc6352_probe, + .probe_new = hmc6352_probe, .remove = hmc6352_remove, .id_table = hmc6352_id, }; From 9c18dad44dc1de202a69c8ccef983e6070740acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:45 +0100 Subject: [PATCH 2220/4122] misc: ics932s401: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-492-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ics932s401.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c index 1cb71df966a4..12108a7b9b40 100644 --- a/drivers/misc/ics932s401.c +++ b/drivers/misc/ics932s401.c @@ -89,8 +89,7 @@ struct ics932s401_data { u8 regs[NUM_REGS]; }; -static int ics932s401_probe(struct i2c_client *client, - const struct i2c_device_id *id); +static int ics932s401_probe(struct i2c_client *client); static int ics932s401_detect(struct i2c_client *client, struct i2c_board_info *info); static void ics932s401_remove(struct i2c_client *client); @@ -106,7 +105,7 @@ static struct i2c_driver ics932s401_driver = { .driver = { .name = "ics932s401", }, - .probe = ics932s401_probe, + .probe_new = ics932s401_probe, .remove = ics932s401_remove, .id_table = ics932s401_id, .detect = ics932s401_detect, @@ -429,8 +428,7 @@ static int ics932s401_detect(struct i2c_client *client, return 0; } -static int ics932s401_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ics932s401_probe(struct i2c_client *client) { struct ics932s401_data *data; int err; From db687ce71845aeb639be7452f4d8a272cf190cd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:46 +0100 Subject: [PATCH 2221/4122] misc: isl29003: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-493-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/isl29003.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c index 8ab61be79c76..aeda2fa89e61 100644 --- a/drivers/misc/isl29003.c +++ b/drivers/misc/isl29003.c @@ -374,8 +374,7 @@ static int isl29003_init_client(struct i2c_client *client) * I2C layer */ -static int isl29003_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29003_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct isl29003_data *data; @@ -460,7 +459,7 @@ static struct i2c_driver isl29003_driver = { .name = ISL29003_DRV_NAME, .pm = ISL29003_PM_OPS, }, - .probe = isl29003_probe, + .probe_new = isl29003_probe, .remove = isl29003_remove, .id_table = isl29003_id, }; From 244179dbe11e707a0ef596246a9b80327492fc35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:42 +0100 Subject: [PATCH 2222/4122] misc: eeprom/idt_89hpesx: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-489-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/idt_89hpesx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c index bb3ed352b95f..4e07ee9cb500 100644 --- a/drivers/misc/eeprom/idt_89hpesx.c +++ b/drivers/misc/eeprom/idt_89hpesx.c @@ -1366,7 +1366,7 @@ static void idt_remove_dbgfs_files(struct idt_89hpesx_dev *pdev) /* * idt_probe() - IDT 89HPESx driver probe() callback method */ -static int idt_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int idt_probe(struct i2c_client *client) { struct idt_89hpesx_dev *pdev; int ret; @@ -1556,7 +1556,7 @@ static struct i2c_driver idt_driver = { .name = IDT_NAME, .of_match_table = idt_of_match, }, - .probe = idt_probe, + .probe_new = idt_probe, .remove = idt_remove, .id_table = idt_ids, }; From 6757c6480d7f34cb272d28339dfac096b94c8638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:38 +0100 Subject: [PATCH 2223/4122] misc: apds990x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-485-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/apds990x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c index e2100cc42ce8..0024503ea6db 100644 --- a/drivers/misc/apds990x.c +++ b/drivers/misc/apds990x.c @@ -1051,8 +1051,7 @@ static const struct attribute_group apds990x_attribute_group[] = { {.attrs = sysfs_attrs_ctrl }, }; -static int apds990x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int apds990x_probe(struct i2c_client *client) { struct apds990x_chip *chip; int err; @@ -1272,7 +1271,7 @@ static struct i2c_driver apds990x_driver = { .name = "apds990x", .pm = &apds990x_pm_ops, }, - .probe = apds990x_probe, + .probe_new = apds990x_probe, .remove = apds990x_remove, .id_table = apds990x_id, }; From 9f28b675c160519c79daed9f73bc38ab3d6c9015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:37 +0100 Subject: [PATCH 2224/4122] misc: apds9802als: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-484-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/apds9802als.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c index a32431f4b370..0526c55d5cd5 100644 --- a/drivers/misc/apds9802als.c +++ b/drivers/misc/apds9802als.c @@ -212,8 +212,7 @@ static int als_set_default_config(struct i2c_client *client) return ret_val; } -static int apds9802als_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int apds9802als_probe(struct i2c_client *client) { int res; struct als_data *data; @@ -297,7 +296,7 @@ static struct i2c_driver apds9802als_driver = { .name = DRIVER_NAME, .pm = APDS9802ALS_PM_OPS, }, - .probe = apds9802als_probe, + .probe_new = apds9802als_probe, .remove = apds9802als_remove, .id_table = apds9802als_id, }; From 781edb0530a1009f89e7888726ca87b255d2526b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:39 +0100 Subject: [PATCH 2225/4122] misc: bh1770glc: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-486-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/bh1770glc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c index d0dfa674414c..bedbe0efb330 100644 --- a/drivers/misc/bh1770glc.c +++ b/drivers/misc/bh1770glc.c @@ -1162,8 +1162,7 @@ static const struct attribute_group bh1770_attribute_group = { .attrs = sysfs_attrs }; -static int bh1770_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bh1770_probe(struct i2c_client *client) { struct bh1770_chip *chip; int err; @@ -1379,7 +1378,7 @@ static struct i2c_driver bh1770_driver = { .name = "bh1770glc", .pm = &bh1770_pm_ops, }, - .probe = bh1770_probe, + .probe_new = bh1770_probe, .remove = bh1770_remove, .id_table = bh1770_id, }; From 3127a86a3702bd3a2ff43503d49919d666739ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:43:40 +0100 Subject: [PATCH 2226/4122] misc: ds1682: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-487-uwe@kleine-koenig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ds1682.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c index 0698ddc5f4d5..d517eed32971 100644 --- a/drivers/misc/ds1682.c +++ b/drivers/misc/ds1682.c @@ -200,8 +200,7 @@ static const struct bin_attribute ds1682_eeprom_attr = { /* * Called when a ds1682 device is matched with this driver */ -static int ds1682_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ds1682_probe(struct i2c_client *client) { int rc; @@ -251,7 +250,7 @@ static struct i2c_driver ds1682_driver = { .name = "ds1682", .of_match_table = ds1682_of_match, }, - .probe = ds1682_probe, + .probe_new = ds1682_probe, .remove = ds1682_remove, .id_table = ds1682_id, }; From 7b51161696e803fd5f9ad55b20a64c2df313f95c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 17 Nov 2022 15:06:36 +0800 Subject: [PATCH 2227/4122] firmware: raspberrypi: fix possible memory leak in rpi_firmware_probe() In rpi_firmware_probe(), if mbox_request_channel() fails, the 'fw' will not be freed through rpi_firmware_delete(), fix this leak by calling kfree() in the error path. Fixes: 1e7c57355a3b ("firmware: raspberrypi: Keep count of all consumers") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221117070636.3849773-1-yangyingliang@huawei.com Acked-by: Joel Savitz Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/raspberrypi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c index 4b8978b254f9..dba315f675bc 100644 --- a/drivers/firmware/raspberrypi.c +++ b/drivers/firmware/raspberrypi.c @@ -272,6 +272,7 @@ static int rpi_firmware_probe(struct platform_device *pdev) int ret = PTR_ERR(fw->chan); if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to get mbox channel: %d\n", ret); + kfree(fw); return ret; } From ab760791c0cfbb1d7a668f46a135264f56c8f018 Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Mon, 14 Nov 2022 13:22:12 -0800 Subject: [PATCH 2228/4122] char: misc: Increase the maximum number of dynamic misc devices to 1048448 On AmpereOne, 128 dynamic misc devices is not enough for the per-cpu coresight_tmc devices. Switch the dynamic minors allocator to an ida and add logic to allocate in the ranges [0..127] and [256..1048575], leaving [128..255] for static misc devices. Dynamic allocations start from 127 growing downwards and then increasing from 256, so device numbering for the first 128 devices remain the same as before. Signed-off-by: D Scott Phillips Link: https://lore.kernel.org/r/20221114212212.9279-1-scott@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/char/misc.c b/drivers/char/misc.c index cba19bfdc44d..05727f0daa6b 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -61,7 +61,29 @@ static DEFINE_MUTEX(misc_mtx); * Assigned numbers, used for dynamic minors */ #define DYNAMIC_MINORS 128 /* like dynamic majors */ -static DECLARE_BITMAP(misc_minors, DYNAMIC_MINORS); +static DEFINE_IDA(misc_minors_ida); + +static int misc_minor_alloc(void) +{ + int ret; + + ret = ida_alloc_max(&misc_minors_ida, DYNAMIC_MINORS - 1, GFP_KERNEL); + if (ret >= 0) { + ret = DYNAMIC_MINORS - ret - 1; + } else { + ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1, + MINORMASK, GFP_KERNEL); + } + return ret; +} + +static void misc_minor_free(int minor) +{ + if (minor < DYNAMIC_MINORS) + ida_free(&misc_minors_ida, DYNAMIC_MINORS - minor - 1); + else if (minor > MISC_DYNAMIC_MINOR) + ida_free(&misc_minors_ida, minor); +} #ifdef CONFIG_PROC_FS static void *misc_seq_start(struct seq_file *seq, loff_t *pos) @@ -183,14 +205,13 @@ int misc_register(struct miscdevice *misc) mutex_lock(&misc_mtx); if (is_dynamic) { - int i = find_first_zero_bit(misc_minors, DYNAMIC_MINORS); + int i = misc_minor_alloc(); - if (i >= DYNAMIC_MINORS) { + if (i < 0) { err = -EBUSY; goto out; } - misc->minor = DYNAMIC_MINORS - i - 1; - set_bit(i, misc_minors); + misc->minor = i; } else { struct miscdevice *c; @@ -209,10 +230,7 @@ int misc_register(struct miscdevice *misc) misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { if (is_dynamic) { - int i = DYNAMIC_MINORS - misc->minor - 1; - - if (i < DYNAMIC_MINORS && i >= 0) - clear_bit(i, misc_minors); + misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); @@ -240,16 +258,13 @@ EXPORT_SYMBOL(misc_register); void misc_deregister(struct miscdevice *misc) { - int i = DYNAMIC_MINORS - misc->minor - 1; - if (WARN_ON(list_empty(&misc->list))) return; mutex_lock(&misc_mtx); list_del(&misc->list); device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor)); - if (i < DYNAMIC_MINORS && i >= 0) - clear_bit(i, misc_minors); + misc_minor_free(misc->minor); mutex_unlock(&misc_mtx); } EXPORT_SYMBOL(misc_deregister); From fa1ba41c17cd786925720bc1a9554d6c6624923d Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Tue, 15 Nov 2022 09:11:38 +0000 Subject: [PATCH 2229/4122] firmware: google: fix a NULL vs IS_ERR() check in cbmem_entry_probe() The devm_memremap() function returns error pointers on error, it doesn't return NULL. Fixes: 19d54020883c ("firmware: google: Implement cbmem in sysfs driver") Signed-off-by: Peng Wu Reviewed-by: Guenter Roeck Reviewed-by: Jack Rosenthal Link: https://lore.kernel.org/r/20221115091138.51614-1-wupeng58@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/cbmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/google/cbmem.c b/drivers/firmware/google/cbmem.c index 685f3070ce9d..88e587ba1e0d 100644 --- a/drivers/firmware/google/cbmem.c +++ b/drivers/firmware/google/cbmem.c @@ -106,8 +106,8 @@ static int cbmem_entry_probe(struct coreboot_device *dev) entry->mem_file_buf = devm_memremap(&dev->dev, dev->cbmem_entry.address, dev->cbmem_entry.entry_size, MEMREMAP_WB); - if (!entry->mem_file_buf) - return -ENOMEM; + if (IS_ERR(entry->mem_file_buf)) + return PTR_ERR(entry->mem_file_buf); entry->size = dev->cbmem_entry.entry_size; From 61c80d1c3833e196256fb060382db94f24d3d9a7 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:39 +0800 Subject: [PATCH 2230/4122] cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter() If device_register() fails in cxl_register_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: 14baf4d9c739 ("cxl: Add guest-specific code") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Link: https://lore.kernel.org/r/20221111145440.2426970-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/guest.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 375f692ae9d6..fb95a2d5cef4 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -965,10 +965,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; /* * pHyp doesn't expose the programming models supported by the @@ -984,7 +984,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n afu->modes_supported = CXL_MODE_DIRECTED; if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; + goto err_remove_sysfs; adapter->afu[afu->slice] = afu; @@ -1004,10 +1004,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n return 0; -err_put2: +err_remove_sysfs: cxl_sysfs_afu_remove(afu); -err_put1: - device_unregister(&afu->dev); +err_del_dev: + device_del(&afu->dev); +err_put_dev: + put_device(&afu->dev); free = false; guest_release_serr_irq(afu); err2: @@ -1141,18 +1143,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* release the context lock as the adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: - device_unregister(&adapter->dev); +err_del_dev: + device_del(&adapter->dev); +err_put_dev: + put_device(&adapter->dev); free = false; cxl_guest_remove_chardev(adapter); err1: From 02cd3032b154fa02fdf90e7467abaeed889330b2 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:40 +0800 Subject: [PATCH 2231/4122] cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter() If device_register() fails in cxl_pci_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Signed-off-by: Yang Yingliang Acked-by: Frederic Barrat Acked-by: Andrew Donnellan Link: https://lore.kernel.org/r/20221111145440.2426970-2-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 3de0aea62ade..6d495d641c95 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1164,10 +1164,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; adapter->afu[afu->slice] = afu; @@ -1176,10 +1176,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; -err_put1: +err_del_dev: + device_del(&afu->dev); +err_put_dev: pci_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - device_unregister(&afu->dev); + put_device(&afu->dev); return rc; err_free_native: @@ -1667,23 +1669,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* Release the context lock as adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: +err_del_dev: + device_del(&adapter->dev); +err_put_dev: /* This should mirror cxl_remove_adapter, except without the * sysfs parts */ cxl_debugfs_adapter_remove(adapter); cxl_deconfigure_adapter(adapter); - device_unregister(&adapter->dev); + put_device(&adapter->dev); return ERR_PTR(rc); err_release: From 2613cc29c5723881ca603b1a3b50f0107010d5d6 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 16 Nov 2022 10:49:58 +0100 Subject: [PATCH 2232/4122] cacheinfo: Remove of_node_put() for fw_token fw_token is used for DT/ACPI systems to identify CPUs sharing caches. For DT based systems, fw_token is set to a pointer to a DT node. commit 3da72e18371c ("cacheinfo: Decrement refcount in cache_setup_of_node()") doesn't increment the refcount of fw_token anymore in cache_setup_of_node(). fw_token is indeed used as a token and not as a (struct device_node*), so no reference to fw_token should be kept. However, [1] is triggered when hotplugging a CPU multiple times since cache_shared_cpu_map_remove() decrements the refcount to fw_token at each CPU unplugging, eventually reaching 0. Remove of_node_put() for fw_token in cache_shared_cpu_map_remove(). [1] ------------[ cut here ]------------ refcount_t: saturated; leaking memory. WARNING: CPU: 4 PID: 32 at lib/refcount.c:22 refcount_warn_saturate (lib/refcount.c:22 (discriminator 3)) Modules linked in: CPU: 4 PID: 32 Comm: cpuhp/4 Tainted: G W 6.1.0-rc1-14091-g9fdf2ca7b9c8 #76 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Oct 31 2022 pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : refcount_warn_saturate (lib/refcount.c:22 (discriminator 3)) lr : refcount_warn_saturate (lib/refcount.c:22 (discriminator 3)) [...] Call trace: [...] of_node_release (drivers/of/dynamic.c:335) kobject_put (lib/kobject.c:677 lib/kobject.c:704 ./include/linux/kref.h:65 lib/kobject.c:721) of_node_put (drivers/of/dynamic.c:49) free_cache_attributes.part.0 (drivers/base/cacheinfo.c:712) cacheinfo_cpu_pre_down (drivers/base/cacheinfo.c:718) cpuhp_invoke_callback (kernel/cpu.c:247 (discriminator 4)) cpuhp_thread_fun (kernel/cpu.c:785) smpboot_thread_fn (kernel/smpboot.c:164 (discriminator 3)) kthread (kernel/kthread.c:376) ret_from_fork (arch/arm64/kernel/entry.S:861) ---[ end trace 0000000000000000 ]--- Fixes: 3da72e18371c ("cacheinfo: Decrement refcount in cache_setup_of_node()") Reported-by: Geert Uytterhoeven Reported-by: Marek Szyprowski Tested-by: Geert Uytterhoeven Tested-by: Sudeep Holla Reviewed-by: Sudeep Holla Signed-off-by: Pierre Gondois Link: https://lore.kernel.org/r/20221116094958.2141072-1-pierre.gondois@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 04317cde800c..950b22cdb5f7 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -317,8 +317,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - if (of_have_populated_dt()) - of_node_put(this_leaf->fw_token); } } From 20228a1d5a55e7db0c6720840f2c7d2b48c55f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 20 Sep 2022 13:28:07 +0200 Subject: [PATCH 2233/4122] iio: adc: ad_sigma_delta: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop 'mlock' usage by making use of iio_device_claim_direct_mode(). This change actually makes sure we cannot do a single conversion while buffering is enable. Note there was a potential race in the previous code since we were only acquiring the lock after checking if the bus is enabled. Fixes: af3008485ea0 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Nuno Sá Reviewed-by: Miquel Raynal Cc: #No rush as race is very old. Link: https://lore.kernel.org/r/20220920112821.975359-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 261a9a6b45e1..d8570f620785 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -281,10 +281,10 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, unsigned int data_reg; int ret = 0; - if (iio_buffer_enabled(indio_dev)) - return -EBUSY; + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; - mutex_lock(&indio_dev->mlock); ad_sigma_delta_set_channel(sigma_delta, chan->address); spi_bus_lock(sigma_delta->spi->master); @@ -323,7 +323,7 @@ out: ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); sigma_delta->bus_locked = false; spi_bus_unlock(sigma_delta->spi->master); - mutex_unlock(&indio_dev->mlock); + iio_device_release_direct_mode(indio_dev); if (ret) return ret; From 2a22b40aea42dd516e669257ab5faf10396c2fad Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 21 Sep 2022 02:39:15 +0000 Subject: [PATCH 2234/4122] iio: accel: bma400: Switch to use dev_err_probe() helper In the probe path, dev_err() can be replace with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220921023915.47300-1-yuancan@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma400_core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index ad8fce3e08cd..e8de88e6cfb9 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -886,14 +886,10 @@ static int bma400_init(struct bma400_data *data) ret = devm_regulator_bulk_get(data->dev, ARRAY_SIZE(data->regulators), data->regulators); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(data->dev, - "Failed to get regulators: %d\n", - ret); + if (ret) + return dev_err_probe(data->dev, ret, "Failed to get regulators: %d\n", + ret); - return ret; - } ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators), data->regulators); if (ret) { From 2aebc223fc7ce613140ef2b64ca3c0f1a4f458cb Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Thu, 22 Sep 2022 10:13:22 +0200 Subject: [PATCH 2235/4122] iio: temperature: mlx90632 Add runtime powermanagement modes The sensor can operate in lower power modes and even make measurements when in those lower powered modes. The decision was taken that if measurement is not requested within 2 seconds the sensor will remain in SLEEP_STEP power mode, where measurements are triggered on request with setting the start of measurement bit (SOB). In this mode the measurements are taking a bit longer because we need to start it and complete it. Currently, in continuous mode we read ready data and this mode is activated if sensor measurement is requested within 2 seconds. The suspend timeout is increased to 6 seconds (instead of 3 before), because that enables more measurements in lower power mode (SLEEP_STEP), with the lowest refresh rate (2 seconds). Signed-off-by: Crt Mori Link: https://lore.kernel.org/r/be405068f081f2d518843897b13cd0289c280b5d.1663834141.git.cmo@melexis.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 377 +++++++++++++++++++++++------ 1 file changed, 309 insertions(+), 68 deletions(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index f6dec0e5f097..71130d237a69 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -6,11 +6,14 @@ * * Driver for the Melexis MLX90632 I2C 16-bit IR thermopile sensor */ +#include #include +#include #include #include #include #include +#include #include #include #include @@ -55,6 +58,12 @@ #define MLX90632_EE_Ha 0x2481 /* Ha customer calib value reg 16bit */ #define MLX90632_EE_Hb 0x2482 /* Hb customer calib value reg 16bit */ +#define MLX90632_EE_MEDICAL_MEAS1 0x24E1 /* Medical measurement 1 16bit */ +#define MLX90632_EE_MEDICAL_MEAS2 0x24E2 /* Medical measurement 2 16bit */ +#define MLX90632_EE_EXTENDED_MEAS1 0x24F1 /* Extended measurement 1 16bit */ +#define MLX90632_EE_EXTENDED_MEAS2 0x24F2 /* Extended measurement 2 16bit */ +#define MLX90632_EE_EXTENDED_MEAS3 0x24F3 /* Extended measurement 3 16bit */ + /* Register addresses - volatile */ #define MLX90632_REG_I2C_ADDR 0x3000 /* Chip I2C address register */ @@ -62,13 +71,16 @@ #define MLX90632_REG_CONTROL 0x3001 /* Control Register address */ #define MLX90632_CFG_PWR_MASK GENMASK(2, 1) /* PowerMode Mask */ #define MLX90632_CFG_MTYP_MASK GENMASK(8, 4) /* Meas select Mask */ +#define MLX90632_CFG_SOB_MASK BIT(11) /* PowerModes statuses */ #define MLX90632_PWR_STATUS(ctrl_val) (ctrl_val << 1) #define MLX90632_PWR_STATUS_HALT MLX90632_PWR_STATUS(0) /* hold */ -#define MLX90632_PWR_STATUS_SLEEP_STEP MLX90632_PWR_STATUS(1) /* sleep step*/ +#define MLX90632_PWR_STATUS_SLEEP_STEP MLX90632_PWR_STATUS(1) /* sleep step */ #define MLX90632_PWR_STATUS_STEP MLX90632_PWR_STATUS(2) /* step */ -#define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous*/ +#define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous */ + +#define MLX90632_EE_RR GENMASK(10, 8) /* Only Refresh Rate bits */ /* Measurement types */ #define MLX90632_MTYP_MEDICAL 0 @@ -116,8 +128,9 @@ #define MLX90632_REF_12 12LL /* ResCtrlRef value of Ch 1 or Ch 2 */ #define MLX90632_REF_3 12LL /* ResCtrlRef value of Channel 3 */ #define MLX90632_MAX_MEAS_NUM 31 /* Maximum measurements in list */ -#define MLX90632_SLEEP_DELAY_MS 3000 /* Autosleep delay */ +#define MLX90632_SLEEP_DELAY_MS 6000 /* Autosleep delay */ #define MLX90632_EXTENDED_LIMIT 27000 /* Extended mode raw value limit */ +#define MLX90632_MEAS_MAX_TIME 2000 /* Max measurement time in ms for the lowest refresh rate */ /** * struct mlx90632_data - private data for the MLX90632 device @@ -130,6 +143,9 @@ * @object_ambient_temperature: Ambient temperature at object (might differ of * the ambient temperature of sensor. * @regulator: Regulator of the device + * @powerstatus: Current POWER status of the device + * @interaction_ts: Timestamp of the last temperature read that is used + * for power management in jiffies */ struct mlx90632_data { struct i2c_client *client; @@ -139,6 +155,8 @@ struct mlx90632_data { u8 mtyp; u32 object_ambient_temperature; struct regulator *regulator; + int powerstatus; + unsigned long interaction_ts; }; static const struct regmap_range mlx90632_volatile_reg_range[] = { @@ -158,6 +176,8 @@ static const struct regmap_range mlx90632_read_reg_range[] = { regmap_reg_range(MLX90632_EE_VERSION, MLX90632_EE_Ka), regmap_reg_range(MLX90632_EE_CTRL, MLX90632_EE_I2C_ADDR), regmap_reg_range(MLX90632_EE_Ha, MLX90632_EE_Hb), + regmap_reg_range(MLX90632_EE_MEDICAL_MEAS1, MLX90632_EE_MEDICAL_MEAS2), + regmap_reg_range(MLX90632_EE_EXTENDED_MEAS1, MLX90632_EE_EXTENDED_MEAS3), regmap_reg_range(MLX90632_REG_I2C_ADDR, MLX90632_REG_CONTROL), regmap_reg_range(MLX90632_REG_I2C_CMD, MLX90632_REG_I2C_CMD), regmap_reg_range(MLX90632_REG_STATUS, MLX90632_REG_STATUS), @@ -198,16 +218,38 @@ static const struct regmap_config mlx90632_regmap = { static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap) { - return regmap_update_bits(regmap, MLX90632_REG_CONTROL, - MLX90632_CFG_PWR_MASK, - MLX90632_PWR_STATUS_SLEEP_STEP); + struct mlx90632_data *data = + iio_priv(dev_get_drvdata(regmap_get_device(regmap))); + s32 ret; + + if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) + return 0; + + ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, MLX90632_CFG_PWR_MASK, + MLX90632_PWR_STATUS_SLEEP_STEP); + if (ret < 0) + return ret; + + data->powerstatus = MLX90632_PWR_STATUS_SLEEP_STEP; + return ret; } static s32 mlx90632_pwr_continuous(struct regmap *regmap) { - return regmap_update_bits(regmap, MLX90632_REG_CONTROL, - MLX90632_CFG_PWR_MASK, - MLX90632_PWR_STATUS_CONTINUOUS); + struct mlx90632_data *data = + iio_priv(dev_get_drvdata(regmap_get_device(regmap))); + s32 ret; + + if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) + return 0; + + ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, MLX90632_CFG_PWR_MASK, + MLX90632_PWR_STATUS_CONTINUOUS); + if (ret < 0) + return ret; + + data->powerstatus = MLX90632_PWR_STATUS_CONTINUOUS; + return ret; } /** @@ -219,6 +261,63 @@ static void mlx90632_reset_delay(void) usleep_range(150, 200); } +static int mlx90632_get_measurement_time(struct regmap *regmap, u16 meas) +{ + unsigned int reg; + int ret; + + ret = regmap_read(regmap, meas, ®); + if (ret < 0) + return ret; + + return MLX90632_MEAS_MAX_TIME >> FIELD_GET(MLX90632_EE_RR, reg); +} + +static int mlx90632_calculate_dataset_ready_time(struct mlx90632_data *data) +{ + unsigned int refresh_time; + int ret; + + if (data->mtyp == MLX90632_MTYP_MEDICAL) { + ret = mlx90632_get_measurement_time(data->regmap, + MLX90632_EE_MEDICAL_MEAS1); + if (ret < 0) + return ret; + + refresh_time = ret; + + ret = mlx90632_get_measurement_time(data->regmap, + MLX90632_EE_MEDICAL_MEAS2); + if (ret < 0) + return ret; + + refresh_time += ret; + } else { + ret = mlx90632_get_measurement_time(data->regmap, + MLX90632_EE_EXTENDED_MEAS1); + if (ret < 0) + return ret; + + refresh_time = ret; + + ret = mlx90632_get_measurement_time(data->regmap, + MLX90632_EE_EXTENDED_MEAS2); + if (ret < 0) + return ret; + + refresh_time += ret; + + ret = mlx90632_get_measurement_time(data->regmap, + MLX90632_EE_EXTENDED_MEAS3); + if (ret < 0) + return ret; + + refresh_time += ret; + } + + return refresh_time; +} + /** * mlx90632_perform_measurement() - Trigger and retrieve current measurement cycle * @data: pointer to mlx90632_data object containing regmap information @@ -249,26 +348,75 @@ static int mlx90632_perform_measurement(struct mlx90632_data *data) return (reg_status & MLX90632_STAT_CYCLE_POS) >> 2; } -static int mlx90632_set_meas_type(struct regmap *regmap, u8 type) +/** + * mlx90632_perform_measurement_burst() - Trigger and retrieve current measurement + * cycle in step sleep mode + * @data: pointer to mlx90632_data object containing regmap information + * + * Perform a measurement and return 2 as measurement cycle position reported + * by sensor. This is a blocking function for amount dependent on the sensor + * refresh rate. + */ +static int mlx90632_perform_measurement_burst(struct mlx90632_data *data) { + unsigned int reg_status; int ret; - if ((type != MLX90632_MTYP_MEDICAL) && (type != MLX90632_MTYP_EXTENDED)) - return -EINVAL; + ret = regmap_write_bits(data->regmap, MLX90632_REG_CONTROL, + MLX90632_CFG_SOB_MASK, MLX90632_CFG_SOB_MASK); + if (ret < 0) + return ret; - ret = regmap_write(regmap, MLX90632_REG_I2C_CMD, MLX90632_RESET_CMD); + ret = mlx90632_calculate_dataset_ready_time(data); + if (ret < 0) + return ret; + + msleep(ret); /* Wait minimum time for dataset to be ready */ + + ret = regmap_read_poll_timeout(data->regmap, MLX90632_REG_STATUS, + reg_status, + (reg_status & MLX90632_STAT_BUSY) == 0, + 10000, 100 * 10000); + if (ret < 0) { + dev_err(&data->client->dev, "data not ready"); + return -ETIMEDOUT; + } + + return 2; +} + +static int mlx90632_set_meas_type(struct mlx90632_data *data, u8 type) +{ + int current_powerstatus; + int ret; + + if (data->mtyp == type) + return 0; + + current_powerstatus = data->powerstatus; + ret = mlx90632_pwr_continuous(data->regmap); + if (ret < 0) + return ret; + + ret = regmap_write(data->regmap, MLX90632_REG_I2C_CMD, MLX90632_RESET_CMD); if (ret < 0) return ret; mlx90632_reset_delay(); - ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, + ret = regmap_update_bits(data->regmap, MLX90632_REG_CONTROL, (MLX90632_CFG_MTYP_MASK | MLX90632_CFG_PWR_MASK), (MLX90632_MTYP_STATUS(type) | MLX90632_PWR_STATUS_HALT)); if (ret < 0) return ret; - return mlx90632_pwr_continuous(regmap); + data->mtyp = type; + data->powerstatus = MLX90632_PWR_STATUS_HALT; + + if (current_powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) + return mlx90632_pwr_set_sleep_step(data->regmap); + + return mlx90632_pwr_continuous(data->regmap); } static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new, @@ -355,11 +503,30 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data, s32 ret, measurement; mutex_lock(&data->lock); - measurement = mlx90632_perform_measurement(data); - if (measurement < 0) { - ret = measurement; + ret = mlx90632_set_meas_type(data, MLX90632_MTYP_MEDICAL); + if (ret < 0) + goto read_unlock; + + switch (data->powerstatus) { + case MLX90632_PWR_STATUS_CONTINUOUS: + measurement = mlx90632_perform_measurement(data); + if (measurement < 0) { + ret = measurement; + goto read_unlock; + } + break; + case MLX90632_PWR_STATUS_SLEEP_STEP: + measurement = mlx90632_perform_measurement_burst(data); + if (measurement < 0) { + ret = measurement; + goto read_unlock; + } + break; + default: + ret = -EOPNOTSUPP; goto read_unlock; } + ret = mlx90632_read_ambient_raw(data->regmap, ambient_new_raw, ambient_old_raw); if (ret < 0) @@ -441,14 +608,20 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o s32 ret, meas; mutex_lock(&data->lock); - ret = mlx90632_set_meas_type(data->regmap, MLX90632_MTYP_EXTENDED); + ret = mlx90632_set_meas_type(data, MLX90632_MTYP_EXTENDED); if (ret < 0) goto read_unlock; - ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19, - 50000, 800000, false, data); - if (ret != 0) - goto read_unlock; + if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) { + ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19, + 50000, 800000, false, data); + if (ret) + goto read_unlock; + } else if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) { + ret = mlx90632_perform_measurement_burst(data); + if (ret < 0) + goto read_unlock; + } ret = mlx90632_read_object_raw_extended(data->regmap, object_new_raw); if (ret < 0) @@ -457,8 +630,6 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o ret = mlx90632_read_ambient_raw_extended(data->regmap, ambient_new_raw, ambient_old_raw); read_unlock: - (void) mlx90632_set_meas_type(data->regmap, MLX90632_MTYP_MEDICAL); - mutex_unlock(&data->lock); return ret; } @@ -743,12 +914,47 @@ static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val) return ret; } +/** + * mlx90632_pm_interraction_wakeup() - Measure time between user interactions to change powermode + * @data: pointer to mlx90632_data object containing interaction_ts information + * + * Switch to continuous mode when interaction is faster than MLX90632_MEAS_MAX_TIME. Update the + * interaction_ts for each function call with the jiffies to enable measurement between function + * calls. Initial value of the interaction_ts needs to be set before this function call. + */ +static int mlx90632_pm_interraction_wakeup(struct mlx90632_data *data) +{ + unsigned long now; + int ret; + + now = jiffies; + if (time_in_range(now, data->interaction_ts, + data->interaction_ts + + msecs_to_jiffies(MLX90632_MEAS_MAX_TIME + 100))) { + if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) { + ret = mlx90632_pwr_continuous(data->regmap); + if (ret < 0) + return ret; + } + } + + data->interaction_ts = now; + + return 0; +} + static int mlx90632_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *channel, int *val, int *val2, long mask) { struct mlx90632_data *data = iio_priv(indio_dev); int ret; + int cr; + + pm_runtime_get_sync(&data->client->dev); + ret = mlx90632_pm_interraction_wakeup(data); + if (ret < 0) + goto mlx90632_read_raw_pm; switch (mask) { case IIO_CHAN_INFO_PROCESSED: @@ -756,16 +962,22 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev, case IIO_MOD_TEMP_AMBIENT: ret = mlx90632_calc_ambient_dsp105(data, val); if (ret < 0) - return ret; - return IIO_VAL_INT; + goto mlx90632_read_raw_pm; + + ret = IIO_VAL_INT; + break; case IIO_MOD_TEMP_OBJECT: ret = mlx90632_calc_object_dsp105(data, val); if (ret < 0) - return ret; - return IIO_VAL_INT; + goto mlx90632_read_raw_pm; + + ret = IIO_VAL_INT; + break; default: - return -EINVAL; + ret = -EINVAL; + break; } + break; case IIO_CHAN_INFO_CALIBEMISSIVITY: if (data->emissivity == 1000) { *val = 1; @@ -774,13 +986,21 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev, *val = 0; *val2 = data->emissivity * 1000; } - return IIO_VAL_INT_PLUS_MICRO; + ret = IIO_VAL_INT_PLUS_MICRO; + break; case IIO_CHAN_INFO_CALIBAMBIENT: *val = data->object_ambient_temperature; - return IIO_VAL_INT; + ret = IIO_VAL_INT; + break; default: - return -EINVAL; + ret = -EINVAL; + break; } + +mlx90632_read_raw_pm: + pm_runtime_mark_last_busy(&data->client->dev); + pm_runtime_put_autosuspend(&data->client->dev); + return ret; } static int mlx90632_write_raw(struct iio_dev *indio_dev, @@ -826,11 +1046,18 @@ static const struct iio_info mlx90632_info = { .write_raw = mlx90632_write_raw, }; -static int mlx90632_sleep(struct mlx90632_data *data) +static void mlx90632_sleep(void *_data) +{ + struct mlx90632_data *data = _data; + + mlx90632_pwr_set_sleep_step(data->regmap); +} + +static int mlx90632_suspend(struct mlx90632_data *data) { regcache_mark_dirty(data->regmap); - dev_dbg(&data->client->dev, "Requesting sleep"); + dev_dbg(&data->client->dev, "Requesting suspend"); return mlx90632_pwr_set_sleep_step(data->regmap); } @@ -902,6 +1129,7 @@ static int mlx90632_probe(struct i2c_client *client, mlx90632->client = client; mlx90632->regmap = regmap; mlx90632->mtyp = MLX90632_MTYP_MEDICAL; + mlx90632->powerstatus = MLX90632_PWR_STATUS_HALT; mutex_init(&mlx90632->lock); indio_dev->name = id->name; @@ -933,6 +1161,13 @@ static int mlx90632_probe(struct i2c_client *client, return ret; } + ret = devm_add_action_or_reset(&client->dev, mlx90632_sleep, mlx90632); + if (ret < 0) { + dev_err(&client->dev, "Failed to setup low power cleanup action %d\n", + ret); + return ret; + } + ret = regmap_read(mlx90632->regmap, MLX90632_EE_VERSION, &read); if (ret < 0) { dev_err(&client->dev, "read of version failed: %d\n", ret); @@ -961,32 +1196,17 @@ static int mlx90632_probe(struct i2c_client *client, mlx90632->emissivity = 1000; mlx90632->object_ambient_temperature = 25000; /* 25 degrees milliCelsius */ + mlx90632->interaction_ts = jiffies; /* Set initial value */ - pm_runtime_disable(&client->dev); - ret = pm_runtime_set_active(&client->dev); - if (ret < 0) { - mlx90632_sleep(mlx90632); - return ret; - } - pm_runtime_enable(&client->dev); + pm_runtime_get_noresume(&client->dev); + pm_runtime_set_active(&client->dev); + + devm_pm_runtime_enable(&client->dev); pm_runtime_set_autosuspend_delay(&client->dev, MLX90632_SLEEP_DELAY_MS); pm_runtime_use_autosuspend(&client->dev); + pm_runtime_put_autosuspend(&client->dev); - return iio_device_register(indio_dev); -} - -static void mlx90632_remove(struct i2c_client *client) -{ - struct iio_dev *indio_dev = i2c_get_clientdata(client); - struct mlx90632_data *data = iio_priv(indio_dev); - - iio_device_unregister(indio_dev); - - pm_runtime_disable(&client->dev); - pm_runtime_set_suspended(&client->dev); - pm_runtime_put_noidle(&client->dev); - - mlx90632_sleep(data); + return devm_iio_device_register(&client->dev, indio_dev); } static const struct i2c_device_id mlx90632_id[] = { @@ -1001,33 +1221,54 @@ static const struct of_device_id mlx90632_of_match[] = { }; MODULE_DEVICE_TABLE(of, mlx90632_of_match); -static int __maybe_unused mlx90632_pm_suspend(struct device *dev) +static int mlx90632_pm_suspend(struct device *dev) { - struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); - struct mlx90632_data *data = iio_priv(indio_dev); + struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev)); + int ret; - return mlx90632_sleep(data); + ret = mlx90632_suspend(data); + if (ret < 0) + return ret; + + ret = regulator_disable(data->regulator); + if (ret < 0) + dev_err(regmap_get_device(data->regmap), + "Failed to disable power regulator: %d\n", ret); + + return ret; } -static int __maybe_unused mlx90632_pm_resume(struct device *dev) +static int mlx90632_pm_resume(struct device *dev) { - struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); - struct mlx90632_data *data = iio_priv(indio_dev); + struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev)); + int ret; + + ret = mlx90632_enable_regulator(data); + if (ret < 0) + return ret; return mlx90632_wakeup(data); } -static UNIVERSAL_DEV_PM_OPS(mlx90632_pm_ops, mlx90632_pm_suspend, - mlx90632_pm_resume, NULL); +static int mlx90632_pm_runtime_suspend(struct device *dev) +{ + struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev)); + + return mlx90632_pwr_set_sleep_step(data->regmap); +} + +const struct dev_pm_ops mlx90632_pm_ops = { + SYSTEM_SLEEP_PM_OPS(mlx90632_pm_suspend, mlx90632_pm_resume) + RUNTIME_PM_OPS(mlx90632_pm_runtime_suspend, NULL, NULL) +}; static struct i2c_driver mlx90632_driver = { .driver = { .name = "mlx90632", .of_match_table = mlx90632_of_match, - .pm = &mlx90632_pm_ops, + .pm = pm_ptr(&mlx90632_pm_ops), }, .probe = mlx90632_probe, - .remove = mlx90632_remove, .id_table = mlx90632_id, }; module_i2c_driver(mlx90632_driver); From eff07b20700a55e5b39e76960934d6eeb56a12d9 Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Thu, 22 Sep 2022 10:13:23 +0200 Subject: [PATCH 2236/4122] iio: temperature: mlx90632 Read sampling frequency Allow users to read sensor sampling frequency to better plan the application measurement requests. Signed-off-by: Crt Mori Link: https://lore.kernel.org/r/0bd6d6d665b4bd39e4565f6f44cb1bdc03386e23.1663834141.git.cmo@melexis.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 71130d237a69..081803940261 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -81,6 +81,9 @@ #define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous */ #define MLX90632_EE_RR GENMASK(10, 8) /* Only Refresh Rate bits */ +#define MLX90632_REFRESH_RATE(ee_val) FIELD_GET(MLX90632_EE_RR, ee_val) + /* Extract Refresh Rate from ee register */ +#define MLX90632_REFRESH_RATE_STATUS(refresh_rate) (refresh_rate << 8) /* Measurement types */ #define MLX90632_MTYP_MEDICAL 0 @@ -914,6 +917,32 @@ static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val) return ret; } +static int mlx90632_get_refresh_rate(struct mlx90632_data *data, + int *refresh_rate) +{ + unsigned int meas1; + int ret; + + ret = regmap_read(data->regmap, MLX90632_EE_MEDICAL_MEAS1, &meas1); + if (ret < 0) + return ret; + + *refresh_rate = MLX90632_REFRESH_RATE(meas1); + + return ret; +} + +static const int mlx90632_freqs[][2] = { + {0, 500000}, + {1, 0}, + {2, 0}, + {4, 0}, + {8, 0}, + {16, 0}, + {32, 0}, + {64, 0} +}; + /** * mlx90632_pm_interraction_wakeup() - Measure time between user interactions to change powermode * @data: pointer to mlx90632_data object containing interaction_ts information @@ -992,6 +1021,15 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev, *val = data->object_ambient_temperature; ret = IIO_VAL_INT; break; + case IIO_CHAN_INFO_SAMP_FREQ: + ret = mlx90632_get_refresh_rate(data, &cr); + if (ret < 0) + goto mlx90632_read_raw_pm; + + *val = mlx90632_freqs[cr][0]; + *val2 = mlx90632_freqs[cr][1]; + ret = IIO_VAL_INT_PLUS_MICRO; + break; default: ret = -EINVAL; break; @@ -1025,12 +1063,30 @@ static int mlx90632_write_raw(struct iio_dev *indio_dev, } } +static int mlx90632_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = (int *)mlx90632_freqs; + *type = IIO_VAL_INT_PLUS_MICRO; + *length = 2 * ARRAY_SIZE(mlx90632_freqs); + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } +} + static const struct iio_chan_spec mlx90632_channels[] = { { .type = IIO_TEMP, .modified = 1, .channel2 = IIO_MOD_TEMP_AMBIENT, .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), }, { .type = IIO_TEMP, @@ -1038,12 +1094,15 @@ static const struct iio_chan_spec mlx90632_channels[] = { .channel2 = IIO_MOD_TEMP_OBJECT, .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) | BIT(IIO_CHAN_INFO_CALIBEMISSIVITY) | BIT(IIO_CHAN_INFO_CALIBAMBIENT), + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), }, }; static const struct iio_info mlx90632_info = { .read_raw = mlx90632_read_raw, .write_raw = mlx90632_write_raw, + .read_avail = mlx90632_read_avail, }; static void mlx90632_sleep(void *_data) From 4e6151403631255828a5530d9d6233caedcd2976 Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Thu, 22 Sep 2022 10:13:24 +0200 Subject: [PATCH 2237/4122] iio: temperature: mlx90632 Change return value of sensor measurement channel The current EINVAL value is more applicable to embedded library, where user can actually put the fixed value to the sensor. In case of the driver if the value of the channel is invalid it is better in inform userspace that Channel was out of range as that implies more to internal driver error than invalid input. It also makes for easier debugging of where the error comes from during the development. Signed-off-by: Crt Mori Link: https://lore.kernel.org/r/565d4df2592d751dc0f40908f2569b7c9af8e56e.1663834141.git.cmo@melexis.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 081803940261..224db7513baa 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -435,7 +435,7 @@ static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new, *channel_old = 1; break; default: - return -EINVAL; + return -ECHRNG; } return 0; From 8cf5f0329128efdfe18f12a8697752d39821fbdf Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Thu, 22 Sep 2022 21:46:39 +0200 Subject: [PATCH 2238/4122] iio: adc: mcp3911: add support to set PGA Add support for setting the Programmable Gain Amplifiers by adjust the scale value. Signed-off-by: Marcus Folkesson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220922194639.1118971-1-marcus.folkesson@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3911.c | 104 +++++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 76b334f5ac61..974c5bd923a6 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -29,6 +29,8 @@ #define MCP3911_REG_MOD 0x06 #define MCP3911_REG_PHASE 0x07 #define MCP3911_REG_GAIN 0x09 +#define MCP3911_GAIN_MASK(ch) (GENMASK(2, 0) << 3 * ch) +#define MCP3911_GAIN_VAL(ch, val) ((val << 3 * ch) & MCP3911_GAIN_MASK(ch)) #define MCP3911_REG_STATUSCOM 0x0a #define MCP3911_STATUSCOM_DRHIZ BIT(12) @@ -60,8 +62,10 @@ #define MCP3911_REG_MASK GENMASK(4, 1) #define MCP3911_NUM_CHANNELS 2 +#define MCP3911_NUM_SCALES 6 static const int mcp3911_osr_table[] = { 32, 64, 128, 256, 512, 1024, 2048, 4096 }; +static u32 mcp3911_scale_table[MCP3911_NUM_SCALES][2]; struct mcp3911 { struct spi_device *spi; @@ -70,6 +74,7 @@ struct mcp3911 { struct clk *clki; u32 dev_addr; struct iio_trigger *trig; + u32 gain[MCP3911_NUM_CHANNELS]; struct { u32 channels[MCP3911_NUM_CHANNELS]; s64 ts __aligned(8); @@ -146,6 +151,11 @@ static int mcp3911_read_avail(struct iio_dev *indio_dev, *vals = mcp3911_osr_table; *length = ARRAY_SIZE(mcp3911_osr_table); return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SCALE: + *type = IIO_VAL_INT_PLUS_NANO; + *vals = (int *)mcp3911_scale_table; + *length = ARRAY_SIZE(mcp3911_scale_table) * 2; + return IIO_AVAIL_LIST; default: return -EINVAL; } @@ -190,29 +200,9 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev, break; case IIO_CHAN_INFO_SCALE: - if (adc->vref) { - ret = regulator_get_voltage(adc->vref); - if (ret < 0) { - dev_err(indio_dev->dev.parent, - "failed to get vref voltage: %d\n", - ret); - goto out; - } - - *val = ret / 1000; - } else { - *val = MCP3911_INT_VREF_MV; - } - - /* - * For 24bit Conversion - * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5 - * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5) - */ - - /* val2 = (2^23 * 1.5) */ - *val2 = 12582912; - ret = IIO_VAL_FRACTIONAL; + *val = mcp3911_scale_table[ilog2(adc->gain[channel->channel])][0]; + *val2 = mcp3911_scale_table[ilog2(adc->gain[channel->channel])][1]; + ret = IIO_VAL_INT_PLUS_NANO; break; } @@ -230,6 +220,18 @@ static int mcp3911_write_raw(struct iio_dev *indio_dev, mutex_lock(&adc->lock); switch (mask) { + case IIO_CHAN_INFO_SCALE: + for (int i = 0; i < MCP3911_NUM_SCALES; i++) { + if (val == mcp3911_scale_table[i][0] && + val2 == mcp3911_scale_table[i][1]) { + + adc->gain[channel->channel] = BIT(i); + ret = mcp3911_update(adc, MCP3911_REG_GAIN, + MCP3911_GAIN_MASK(channel->channel), + MCP3911_GAIN_VAL(channel->channel, i), 1); + } + } + break; case IIO_CHAN_INFO_OFFSET: if (val2 != 0) { ret = -EINVAL; @@ -265,6 +267,44 @@ out: return ret; } +static int mcp3911_calc_scale_table(struct mcp3911 *adc) +{ + u32 ref = MCP3911_INT_VREF_MV; + u32 div; + int ret; + u64 tmp; + + if (adc->vref) { + ret = regulator_get_voltage(adc->vref); + if (ret < 0) { + dev_err(&adc->spi->dev, + "failed to get vref voltage: %d\n", + ret); + return ret; + } + + ref = ret / 1000; + } + + /* + * For 24-bit Conversion + * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5 + * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5) + * + * ref = Reference voltage + * div = (2^23 * 1.5 * gain) = 12582912 * gain + */ + for (int i = 0; i < MCP3911_NUM_SCALES; i++) { + div = 12582912 * BIT(i); + tmp = div_s64((s64)ref * 1000000000LL, div); + + mcp3911_scale_table[i][0] = 0; + mcp3911_scale_table[i][1] = tmp; + } + + return 0; +} + #define MCP3911_CHAN(idx) { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ @@ -274,8 +314,10 @@ out: .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ BIT(IIO_CHAN_INFO_OFFSET) | \ BIT(IIO_CHAN_INFO_SCALE), \ - .info_mask_shared_by_type_available = \ + .info_mask_shared_by_type_available = \ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .info_mask_separate_available = \ + BIT(IIO_CHAN_INFO_SCALE), \ .scan_type = { \ .sign = 's', \ .realbits = 24, \ @@ -482,6 +524,20 @@ static int mcp3911_probe(struct spi_device *spi) if (ret) return ret; + ret = mcp3911_calc_scale_table(adc); + if (ret) + return ret; + + /* Set gain to 1 for all channels */ + for (int i = 0; i < MCP3911_NUM_CHANNELS; i++) { + adc->gain[i] = 1; + ret = mcp3911_update(adc, MCP3911_REG_GAIN, + MCP3911_GAIN_MASK(i), + MCP3911_GAIN_VAL(i, 0), 1); + if (ret) + return ret; + } + indio_dev->name = spi_get_device_id(spi)->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &mcp3911_info; From 3f4033a811bcd1a1f077ce5297488a5c4dd30eb1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 22 Sep 2022 11:58:48 +0000 Subject: [PATCH 2239/4122] iio: filter: admv8818: close potential out-of-bounds read in __admv8818_read_[h|l]pf_freq() ADMV8818_SW_IN_WR0_MSK and ADMV8818_SW_OUT_WR0_MSK have 3 bits, which means a length of 8, but freq_range_hpf and freq_range_lpf array size is 4, may end up reading 4 elements beyond the end of those arrays. Check value first before access freq_range_hpf and freq_range_lpf to harden against the hardware allowing out of range values. Signed-off-by: Wei Yongjun Reviewed-by: Antoniu Miclaus Link: https://lore.kernel.org/r/20220922115848.1800021-1-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/filter/admv8818.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 68de45fe21b4..fe8d46cb7f1d 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -265,7 +265,7 @@ static int __admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq) return ret; hpf_band = FIELD_GET(ADMV8818_SW_IN_WR0_MSK, data); - if (!hpf_band) { + if (!hpf_band || hpf_band > 4) { *hpf_freq = 0; return ret; } @@ -303,7 +303,7 @@ static int __admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq) return ret; lpf_band = FIELD_GET(ADMV8818_SW_OUT_WR0_MSK, data); - if (!lpf_band) { + if (!lpf_band || lpf_band > 4) { *lpf_freq = 0; return ret; } From e21b5b1f26694a4498ca11a15e09ccc0a72abb81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Lindahl?= Date: Mon, 26 Sep 2022 11:18:59 +0200 Subject: [PATCH 2240/4122] iio: light: vcnl4000: Preserve conf bits when toggle power MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the vcnl4040 and vcnl4200 chip uses runtime power management for turning the ambient light and proximity sensors on/off, it overwrites the entire register each time. In ALS_CONF register bit fields ALS_IT, ALS_PERS, ALS_INT_EN are overwritten. In PS_CONF1 register bit fields PS_DUTY, PS_PERS, PS_IT, PS_HD, and PS_INT are overwritten. Add functions for preserving the affected bit fields when changing power state. Signed-off-by: Mårten Lindahl Link: https://lore.kernel.org/r/20220926091900.1724105-2-marten.lindahl@axis.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 56 +++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index f6c83ecaad8b..bc8c974cd8b8 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -74,6 +74,9 @@ #define VCNL4000_PROX_EN BIT(1) /* start proximity measurement */ #define VCNL4000_SELF_TIMED_EN BIT(0) /* start self-timed measurement */ +#define VCNL4040_ALS_CONF_ALS_SHUTDOWN BIT(0) +#define VCNL4040_PS_CONF1_PS_SHUTDOWN BIT(0) + /* Bit masks for interrupt registers. */ #define VCNL4010_INT_THR_SEL BIT(0) /* Select threshold interrupt source */ #define VCNL4010_INT_THR_EN BIT(1) /* Threshold interrupt type */ @@ -188,16 +191,61 @@ static int vcnl4000_init(struct vcnl4000_data *data) return data->chip_spec->set_power_state(data, true); }; -static int vcnl4200_set_power_state(struct vcnl4000_data *data, bool on) +static ssize_t vcnl4000_write_als_enable(struct vcnl4000_data *data, bool en) { - u16 val = on ? 0 /* power on */ : 1 /* shut down */; int ret; - ret = i2c_smbus_write_word_data(data->client, VCNL4200_AL_CONF, val); + mutex_lock(&data->vcnl4000_lock); + + ret = i2c_smbus_read_word_data(data->client, VCNL4200_AL_CONF); + if (ret < 0) + goto out; + + if (en) + ret &= ~VCNL4040_ALS_CONF_ALS_SHUTDOWN; + else + ret |= VCNL4040_ALS_CONF_ALS_SHUTDOWN; + + ret = i2c_smbus_write_word_data(data->client, VCNL4200_AL_CONF, ret); + +out: + mutex_unlock(&data->vcnl4000_lock); + + return ret; +} + +static ssize_t vcnl4000_write_ps_enable(struct vcnl4000_data *data, bool en) +{ + int ret; + + mutex_lock(&data->vcnl4000_lock); + + ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1); + if (ret < 0) + goto out; + + if (en) + ret &= ~VCNL4040_PS_CONF1_PS_SHUTDOWN; + else + ret |= VCNL4040_PS_CONF1_PS_SHUTDOWN; + + ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1, ret); + +out: + mutex_unlock(&data->vcnl4000_lock); + + return ret; +} + +static int vcnl4200_set_power_state(struct vcnl4000_data *data, bool on) +{ + int ret; + + ret = vcnl4000_write_als_enable(data, on); if (ret < 0) return ret; - ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1, val); + ret = vcnl4000_write_ps_enable(data, on); if (ret < 0) return ret; From 85e2c6a23f851f65b2b14c1d87685168be620f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Lindahl?= Date: Mon, 26 Sep 2022 11:19:00 +0200 Subject: [PATCH 2241/4122] iio: light: vcnl4000: Add ps_it attributes for vcnl4040 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add read/write attribute for proximity integration time, and read attribute for available proximity integration times for the vcnl4040 chip. Signed-off-by: Mårten Lindahl Link: https://lore.kernel.org/r/20220926091900.1724105-3-marten.lindahl@axis.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 131 ++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index bc8c974cd8b8..fdb3922ae4ac 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -17,6 +17,7 @@ * interrupts (VCNL4040, VCNL4200) */ +#include #include #include #include @@ -76,6 +77,7 @@ #define VCNL4040_ALS_CONF_ALS_SHUTDOWN BIT(0) #define VCNL4040_PS_CONF1_PS_SHUTDOWN BIT(0) +#define VCNL4040_PS_CONF2_PS_IT GENMASK(3, 1) /* Proximity integration time */ /* Bit masks for interrupt registers. */ #define VCNL4010_INT_THR_SEL BIT(0) /* Select threshold interrupt source */ @@ -104,6 +106,17 @@ static const int vcnl4010_prox_sampling_frequency[][2] = { {250, 0}, }; +static const int vcnl4040_ps_it_times[][2] = { + {0, 100}, + {0, 150}, + {0, 200}, + {0, 250}, + {0, 300}, + {0, 350}, + {0, 400}, + {0, 800}, +}; + #define VCNL4000_SLEEP_DELAY_MS 2000 /* before we enter pm_runtime_suspend */ enum vcnl4000_device_ids { @@ -470,6 +483,57 @@ static int vcnl4000_set_pm_runtime_state(struct vcnl4000_data *data, bool on) return ret; } +static int vcnl4040_read_ps_it(struct vcnl4000_data *data, int *val, int *val2) +{ + int ret; + + ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1); + if (ret < 0) + return ret; + + ret = FIELD_GET(VCNL4040_PS_CONF2_PS_IT, ret); + + if (ret >= ARRAY_SIZE(vcnl4040_ps_it_times)) + return -EINVAL; + + *val = vcnl4040_ps_it_times[ret][0]; + *val2 = vcnl4040_ps_it_times[ret][1]; + + return 0; +} + +static ssize_t vcnl4040_write_ps_it(struct vcnl4000_data *data, int val) +{ + unsigned int i; + int ret, index = -1; + u16 regval; + + for (i = 0; i < ARRAY_SIZE(vcnl4040_ps_it_times); i++) { + if (val == vcnl4040_ps_it_times[i][1]) { + index = i; + break; + } + } + + if (index < 0) + return -EINVAL; + + mutex_lock(&data->vcnl4000_lock); + + ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1); + if (ret < 0) + goto out; + + regval = (ret & ~VCNL4040_PS_CONF2_PS_IT) | + FIELD_PREP(VCNL4040_PS_CONF2_PS_IT, index); + ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1, + regval); + +out: + mutex_unlock(&data->vcnl4000_lock); + return ret; +} + static int vcnl4000_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -506,6 +570,47 @@ static int vcnl4000_read_raw(struct iio_dev *indio_dev, *val = 0; *val2 = data->al_scale; return IIO_VAL_INT_PLUS_MICRO; + case IIO_CHAN_INFO_INT_TIME: + if (chan->type != IIO_PROXIMITY) + return -EINVAL; + ret = vcnl4040_read_ps_it(data, val, val2); + if (ret < 0) + return ret; + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } +} + +static int vcnl4040_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct vcnl4000_data *data = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_INT_TIME: + if (val != 0) + return -EINVAL; + if (chan->type != IIO_PROXIMITY) + return -EINVAL; + return vcnl4040_write_ps_it(data, val2); + default: + return -EINVAL; + } +} + +static int vcnl4040_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_INT_TIME: + *vals = (int *)vcnl4040_ps_it_times; + *type = IIO_VAL_INT_PLUS_MICRO; + *length = 2 * ARRAY_SIZE(vcnl4040_ps_it_times); + return IIO_AVAIL_LIST; default: return -EINVAL; } @@ -844,6 +949,20 @@ static const struct iio_chan_spec vcnl4010_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(1), }; +static const struct iio_chan_spec vcnl4040_channels[] = { + { + .type = IIO_LIGHT, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE), + }, { + .type = IIO_PROXIMITY, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_INT_TIME), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME), + .ext_info = vcnl4000_ext_info, + } +}; + static const struct iio_info vcnl4000_info = { .read_raw = vcnl4000_read_raw, }; @@ -858,6 +977,12 @@ static const struct iio_info vcnl4010_info = { .write_event_config = vcnl4010_write_event_config, }; +static const struct iio_info vcnl4040_info = { + .read_raw = vcnl4000_read_raw, + .write_raw = vcnl4040_write_raw, + .read_avail = vcnl4040_read_avail, +}; + static const struct vcnl4000_chip_spec vcnl4000_chip_spec_cfg[] = { [VCNL4000] = { .prod = "VCNL4000", @@ -887,9 +1012,9 @@ static const struct vcnl4000_chip_spec vcnl4000_chip_spec_cfg[] = { .measure_light = vcnl4200_measure_light, .measure_proximity = vcnl4200_measure_proximity, .set_power_state = vcnl4200_set_power_state, - .channels = vcnl4000_channels, - .num_channels = ARRAY_SIZE(vcnl4000_channels), - .info = &vcnl4000_info, + .channels = vcnl4040_channels, + .num_channels = ARRAY_SIZE(vcnl4040_channels), + .info = &vcnl4040_info, .irq_support = false, }, [VCNL4200] = { From 55e00b871ce2fc5689af7991638e5889dcd2ea7a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 27 Sep 2022 14:48:41 +0800 Subject: [PATCH 2242/4122] iio: multiplexer: Switch to use dev_err_probe() helper In the probe path, dev_err() can be replaced with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. It also sets the defer probe reason which can be checked later through debugfs. It's more simple in error path. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220927064841.319291-1-yangyingliang@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/multiplexer/iio-mux.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iio/multiplexer/iio-mux.c b/drivers/iio/multiplexer/iio-mux.c index 93558fddfa9b..edd8c69f6d2e 100644 --- a/drivers/iio/multiplexer/iio-mux.c +++ b/drivers/iio/multiplexer/iio-mux.c @@ -416,11 +416,9 @@ static int mux_probe(struct platform_device *pdev) } mux->control = devm_mux_control_get(dev, NULL); - if (IS_ERR(mux->control)) { - if (PTR_ERR(mux->control) != -EPROBE_DEFER) - dev_err(dev, "failed to get control-mux\n"); - return PTR_ERR(mux->control); - } + if (IS_ERR(mux->control)) + return dev_err_probe(dev, PTR_ERR(mux->control), + "failed to get control-mux\n"); i = 0; for (state = 0; state < all_children; state++) { From 4eb61e1a3338d0f8c9f7a28b72f3289cc92133c6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2022 16:36:16 +0000 Subject: [PATCH 2243/4122] iio: adc: ti-ads131e08: Silence no spi_device_id warnings SPI devices use the spi_device_id for module autoloading even on systems using device tree, after commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible"), kernel warns as follows since the spi_device_id is missing: SPI driver ads131e08 has no spi_device_id for ti,ads131e04 SPI driver ads131e08 has no spi_device_id for ti,ads131e06 Add spi_device_id entries to silence the warnings, and ensure driver module autoloading works. Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20220921163620.805879-2-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads131e08.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 5235a93f28bc..fcfc46254313 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -807,6 +807,8 @@ static int ads131e08_probe(struct spi_device *spi) int ret; info = device_get_match_data(&spi->dev); + if (!info) + info = (void *)spi_get_device_id(spi)->driver_data; if (!info) { dev_err(&spi->dev, "failed to get match data\n"); return -ENODEV; @@ -926,12 +928,21 @@ static const struct of_device_id ads131e08_of_match[] = { }; MODULE_DEVICE_TABLE(of, ads131e08_of_match); +static const struct spi_device_id ads131e08_ids[] = { + { "ads131e04", (kernel_ulong_t)&ads131e08_info_tbl[ads131e04] }, + { "ads131e06", (kernel_ulong_t)&ads131e08_info_tbl[ads131e06] }, + { "ads131e08", (kernel_ulong_t)&ads131e08_info_tbl[ads131e08] }, + {} +}; +MODULE_DEVICE_TABLE(spi, ads131e08_ids); + static struct spi_driver ads131e08_driver = { .driver = { .name = "ads131e08", .of_match_table = ads131e08_of_match, }, .probe = ads131e08_probe, + .id_table = ads131e08_ids, }; module_spi_driver(ads131e08_driver); From 35dab731c4d2b1e71827a56826ef60f53e97fc32 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2022 16:36:17 +0000 Subject: [PATCH 2244/4122] iio: accel: sca3300: Silence no spi_device_id warning SPI devices use the spi_device_id for module autoloading even on systems using device tree, after commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible"), kernel warns as follows since the spi_device_id is missing: SPI driver sca3300 has no spi_device_id for murata,scl3300 Add spi_device_id entries to silence the warning, and ensure driver module autoloading works. Signed-off-by: Wei Yongjun Reviewed-by: Tomas Melin Link: https://lore.kernel.org/r/20220921163620.805879-3-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/sca3300.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c index eaa0c9cfda44..306482b70fad 100644 --- a/drivers/iio/accel/sca3300.c +++ b/drivers/iio/accel/sca3300.c @@ -679,12 +679,20 @@ static const struct of_device_id sca3300_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, sca3300_dt_ids); +static const struct spi_device_id sca3300_ids[] = { + { "sca3300" }, + { "scl3300" }, + {} +}; +MODULE_DEVICE_TABLE(spi, sca3300_ids); + static struct spi_driver sca3300_driver = { - .driver = { + .driver = { .name = SCA3300_ALIAS, .of_match_table = sca3300_dt_ids, }, - .probe = sca3300_probe, + .probe = sca3300_probe, + .id_table = sca3300_ids, }; module_spi_driver(sca3300_driver); From 283026528e0ee8ea89a60f9addaf0f2eda167c42 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2022 16:36:18 +0000 Subject: [PATCH 2245/4122] iio: adc: ad9467: Silence no spi_device_id warnings SPI devices use the spi_device_id for module autoloading even on systems using device tree, after commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible"), kernel warns as follows since the spi_device_id is missing: SPI driver ad9467 has no spi_device_id for adi,ad9265 SPI driver ad9467 has no spi_device_id for adi,ad9434 Add spi_device_id entries to silence the warnings, and ensure driver module autoloading works. Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20220921163620.805879-4-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad9467.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c index 7534572f7475..0621cf59d614 100644 --- a/drivers/iio/adc/ad9467.c +++ b/drivers/iio/adc/ad9467.c @@ -387,6 +387,8 @@ static int ad9467_probe(struct spi_device *spi) int ret; info = of_device_get_match_data(&spi->dev); + if (!info) + info = (void *)spi_get_device_id(spi)->driver_data; if (!info) return -ENODEV; @@ -447,12 +449,21 @@ static const struct of_device_id ad9467_of_match[] = { }; MODULE_DEVICE_TABLE(of, ad9467_of_match); +static const struct spi_device_id ad9467_ids[] = { + { "ad9265", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9265] }, + { "ad9434", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9434] }, + { "ad9467", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9467] }, + {} +}; +MODULE_DEVICE_TABLE(spi, ad9467_ids); + static struct spi_driver ad9467_driver = { .driver = { .name = "ad9467", .of_match_table = ad9467_of_match, }, .probe = ad9467_probe, + .id_table = ad9467_ids, }; module_spi_driver(ad9467_driver); From 935779eac00aaae02fa61e12c81775e165b11164 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2022 16:36:19 +0000 Subject: [PATCH 2246/4122] iio: adc: ad7192: Silence no spi_device_id warnings SPI devices use the spi_device_id for module autoloading even on systems using device tree, after commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible"), kernel warns as follows since the spi_device_id is missing: SPI driver ad7192 has no spi_device_id for adi,ad7190 SPI driver ad7192 has no spi_device_id for adi,ad7193 SPI driver ad7192 has no spi_device_id for adi,ad7195 Add spi_device_id entries to silence the warnings, and ensure driver module autoloading works. Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20220921163620.805879-5-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index d71977be7d22..f5067173deb6 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1037,6 +1037,8 @@ static int ad7192_probe(struct spi_device *spi) st->int_vref_mv = ret / 1000; st->chip_info = of_device_get_match_data(&spi->dev); + if (!st->chip_info) + st->chip_info = (void *)spi_get_device_id(spi)->driver_data; indio_dev->name = st->chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; @@ -1098,12 +1100,22 @@ static const struct of_device_id ad7192_of_match[] = { }; MODULE_DEVICE_TABLE(of, ad7192_of_match); +static const struct spi_device_id ad7192_ids[] = { + { "ad7190", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7190] }, + { "ad7192", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7192] }, + { "ad7193", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7193] }, + { "ad7195", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7195] }, + {} +}; +MODULE_DEVICE_TABLE(spi, ad7192_ids); + static struct spi_driver ad7192_driver = { .driver = { .name = "ad7192", .of_match_table = ad7192_of_match, }, .probe = ad7192_probe, + .id_table = ad7192_ids, }; module_spi_driver(ad7192_driver); From 3a258747a01f1f21fd4c10a07499bde684f8ca2a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2022 16:36:20 +0000 Subject: [PATCH 2247/4122] iio: adc: ad7124: Silence no spi_device_id warnings SPI devices use the spi_device_id for module autoloading even on systems using device tree, after commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible"), kernel warns as follows since the spi_device_id is missing: SPI driver ad7124 has no spi_device_id for adi,ad7124-4 SPI driver ad7124 has no spi_device_id for adi,ad7124-8 Add spi_device_id entries to silence the warnings, and ensure driver module autoloading works. Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20220921163620.805879-6-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 4088786e1026..050a2fbf5c49 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -944,6 +944,8 @@ static int ad7124_probe(struct spi_device *spi) int i, ret; info = of_device_get_match_data(&spi->dev); + if (!info) + info = (void *)spi_get_device_id(spi)->driver_data; if (!info) return -ENODEV; @@ -1021,12 +1023,20 @@ static const struct of_device_id ad7124_of_match[] = { }; MODULE_DEVICE_TABLE(of, ad7124_of_match); +static const struct spi_device_id ad71124_ids[] = { + { "ad7124-4", (kernel_ulong_t)&ad7124_chip_info_tbl[ID_AD7124_4] }, + { "ad7124-8", (kernel_ulong_t)&ad7124_chip_info_tbl[ID_AD7124_8] }, + {} +}; +MODULE_DEVICE_TABLE(spi, ad71124_ids); + static struct spi_driver ad71124_driver = { .driver = { .name = "ad7124", .of_match_table = ad7124_of_match, }, .probe = ad7124_probe, + .id_table = ad71124_ids, }; module_spi_driver(ad71124_driver); From 8f347c565df4e8dd2c862a48a3056bfe59d315e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:54 +0200 Subject: [PATCH 2248/4122] iio: adc: ad799x: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'mlock' was being grabbed when setting the device frequency. In order to not introduce any functional change a new lock is added. With that in mind, the lock also needs to be grabbed in the places where 'mlock' is since it was also being used to protect st->config against the current device state. On the other places the lock was being used, we can just drop it since we are only doing one i2c bus read/write which is already safe. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad799x.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index 6dbe9d5e08a2..4730d8d0f4c3 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -125,6 +126,8 @@ struct ad799x_state { const struct ad799x_chip_config *chip_config; struct regulator *reg; struct regulator *vref; + /* lock to protect against multiple access to the device */ + struct mutex lock; unsigned id; u16 config; @@ -290,7 +293,9 @@ static int ad799x_read_raw(struct iio_dev *indio_dev, ret = iio_device_claim_direct_mode(indio_dev); if (ret) return ret; + mutex_lock(&st->lock); ret = ad799x_scan_direct(st, chan->scan_index); + mutex_unlock(&st->lock); iio_device_release_direct_mode(indio_dev); if (ret < 0) @@ -351,7 +356,8 @@ static ssize_t ad799x_write_frequency(struct device *dev, if (ret) return ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&st->lock); + ret = i2c_smbus_read_byte_data(st->client, AD7998_CYCLE_TMR_REG); if (ret < 0) goto error_ret_mutex; @@ -373,7 +379,7 @@ static ssize_t ad799x_write_frequency(struct device *dev, ret = len; error_ret_mutex: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->lock); return ret; } @@ -407,6 +413,8 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev, if (ret) return ret; + mutex_lock(&st->lock); + if (state) st->config |= BIT(chan->scan_index) << AD799X_CHANNEL_SHIFT; else @@ -418,6 +426,7 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev, st->config &= ~AD7998_ALERT_EN; ret = ad799x_write_config(st, st->config); + mutex_unlock(&st->lock); iio_device_release_direct_mode(indio_dev); return ret; } @@ -454,11 +463,9 @@ static int ad799x_write_event_value(struct iio_dev *indio_dev, if (val < 0 || val > GENMASK(chan->scan_type.realbits - 1, 0)) return -EINVAL; - mutex_lock(&indio_dev->mlock); ret = i2c_smbus_write_word_swapped(st->client, ad799x_threshold_reg(chan, dir, info), val << chan->scan_type.shift); - mutex_unlock(&indio_dev->mlock); return ret; } @@ -473,10 +480,8 @@ static int ad799x_read_event_value(struct iio_dev *indio_dev, int ret; struct ad799x_state *st = iio_priv(indio_dev); - mutex_lock(&indio_dev->mlock); ret = i2c_smbus_read_word_swapped(st->client, ad799x_threshold_reg(chan, dir, info)); - mutex_unlock(&indio_dev->mlock); if (ret < 0) return ret; *val = (ret >> chan->scan_type.shift) & @@ -863,6 +868,9 @@ static int ad799x_probe(struct i2c_client *client, if (ret) goto error_cleanup_ring; } + + mutex_init(&st->lock); + ret = iio_device_register(indio_dev); if (ret) goto error_cleanup_ring; From ed3aa67167bed8825993e6483c6dbeae607c1ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:55 +0200 Subject: [PATCH 2249/4122] iio: adc: axp288_adc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-3-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp288_adc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 580361bd9849..49fff1cabd0d 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -50,6 +51,8 @@ enum axp288_adc_id { struct axp288_adc_info { int irq; struct regmap *regmap; + /* lock to protect against multiple access to the device */ + struct mutex lock; bool ts_enabled; }; @@ -161,7 +164,7 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, int ret; struct axp288_adc_info *info = iio_priv(indio_dev); - mutex_lock(&indio_dev->mlock); + mutex_lock(&info->lock); switch (mask) { case IIO_CHAN_INFO_RAW: if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND, @@ -178,7 +181,7 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, default: ret = -EINVAL; } - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return ret; } @@ -289,6 +292,8 @@ static int axp288_adc_probe(struct platform_device *pdev) if (ret < 0) return ret; + mutex_init(&info->lock); + return devm_iio_device_register(&pdev->dev, indio_dev); } From 7dde7ec2a84d598eb755883540d48a5ee73948ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:56 +0200 Subject: [PATCH 2250/4122] iio: adc: imx7d_adc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20221004134909.1692021-4-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/imx7d_adc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/imx7d_adc.c b/drivers/iio/adc/imx7d_adc.c index 86caff1d006b..22da81bac97f 100644 --- a/drivers/iio/adc/imx7d_adc.c +++ b/drivers/iio/adc/imx7d_adc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -108,7 +109,8 @@ struct imx7d_adc { struct device *dev; void __iomem *regs; struct clk *clk; - + /* lock to protect against multiple access to the device */ + struct mutex lock; u32 vref_uv; u32 value; u32 channel; @@ -293,7 +295,7 @@ static int imx7d_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - mutex_lock(&indio_dev->mlock); + mutex_lock(&info->lock); reinit_completion(&info->completion); channel = chan->channel & 0x03; @@ -303,16 +305,16 @@ static int imx7d_adc_read_raw(struct iio_dev *indio_dev, ret = wait_for_completion_interruptible_timeout (&info->completion, IMX7D_ADC_TIMEOUT); if (ret == 0) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return -ETIMEDOUT; } if (ret < 0) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return ret; } *val = info->value; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: @@ -531,6 +533,8 @@ static int imx7d_adc_probe(struct platform_device *pdev) if (ret) return ret; + mutex_init(&info->lock); + ret = devm_iio_device_register(dev, indio_dev); if (ret) { dev_err(&pdev->dev, "Couldn't register the device.\n"); From 98c4fb93d1d448db191eea795a40072dc61da07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:57 +0200 Subject: [PATCH 2251/4122] iio: adc: lpc32xx_adc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-5-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/lpc32xx_adc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/lpc32xx_adc.c b/drivers/iio/adc/lpc32xx_adc.c index b56ce15255cf..732c924a976d 100644 --- a/drivers/iio/adc/lpc32xx_adc.c +++ b/drivers/iio/adc/lpc32xx_adc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,8 @@ struct lpc32xx_adc_state { struct clk *clk; struct completion completion; struct regulator *vref; + /* lock to protect against multiple access to the device */ + struct mutex lock; u32 value; }; @@ -64,10 +67,10 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - mutex_lock(&indio_dev->mlock); + mutex_lock(&st->lock); ret = clk_prepare_enable(st->clk); if (ret) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->lock); return ret; } /* Measurement setup */ @@ -80,7 +83,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev, wait_for_completion(&st->completion); /* set by ISR */ clk_disable_unprepare(st->clk); *val = st->value; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->lock); return IIO_VAL_INT; @@ -201,6 +204,8 @@ static int lpc32xx_adc_probe(struct platform_device *pdev) iodev->modes = INDIO_DIRECT_MODE; iodev->num_channels = ARRAY_SIZE(lpc32xx_adc_iio_channels); + mutex_init(&st->lock); + retval = devm_iio_device_register(&pdev->dev, iodev); if (retval) return retval; From da8091f8acfa953ac55e2aa8d4218e49b18206a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:58 +0200 Subject: [PATCH 2252/4122] iio: adc: ltc2947-core: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-6-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ltc2497-core.c | 7 +++++-- drivers/iio/adc/ltc2497.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ltc2497-core.c b/drivers/iio/adc/ltc2497-core.c index f52d37af4d1f..996f6cbbed3c 100644 --- a/drivers/iio/adc/ltc2497-core.c +++ b/drivers/iio/adc/ltc2497-core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "ltc2497.h" @@ -81,9 +82,9 @@ static int ltc2497core_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - mutex_lock(&indio_dev->mlock); + mutex_lock(&ddata->lock); ret = ltc2497core_read(ddata, chan->address, val); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&ddata->lock); if (ret < 0) return ret; @@ -214,6 +215,8 @@ int ltc2497core_probe(struct device *dev, struct iio_dev *indio_dev) ddata->addr_prev = LTC2497_CONFIG_DEFAULT; ddata->time_prev = ktime_get(); + mutex_init(&ddata->lock); + ret = iio_device_register(indio_dev); if (ret < 0) goto err_array_unregister; diff --git a/drivers/iio/adc/ltc2497.h b/drivers/iio/adc/ltc2497.h index e023de0d88c4..781519b52475 100644 --- a/drivers/iio/adc/ltc2497.h +++ b/drivers/iio/adc/ltc2497.h @@ -12,6 +12,8 @@ struct ltc2497_chip_info { struct ltc2497core_driverdata { struct regulator *ref; ktime_t time_prev; + /* lock to protect against multiple access to the device */ + struct mutex lock; const struct ltc2497_chip_info *chip_info; u8 addr_prev; int (*result_and_measure)(struct ltc2497core_driverdata *ddata, From d0c09264f1a64ba8435acfaa70380f61239fc0d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:48:59 +0200 Subject: [PATCH 2253/4122] iio: adc: meson_saradc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20221004134909.1692021-7-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 1a68b099d323..85b6826cc10c 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -276,6 +277,8 @@ struct meson_sar_adc_priv { struct clk *adc_div_clk; struct clk_divider clk_div; struct completion done; + /* lock to protect against multiple access to the device */ + struct mutex lock; int calibbias; int calibscale; struct regmap *tsc_regmap; @@ -486,7 +489,7 @@ static int meson_sar_adc_lock(struct iio_dev *indio_dev) struct meson_sar_adc_priv *priv = iio_priv(indio_dev); int val, ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&priv->lock); if (priv->param->has_bl30_integration) { /* prevent BL30 from using the SAR ADC while we are using it */ @@ -504,7 +507,7 @@ static int meson_sar_adc_lock(struct iio_dev *indio_dev) !(val & MESON_SAR_ADC_DELAY_BL30_BUSY), 1, 10000); if (ret) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&priv->lock); return ret; } } @@ -521,7 +524,7 @@ static void meson_sar_adc_unlock(struct iio_dev *indio_dev) regmap_update_bits(priv->regmap, MESON_SAR_ADC_DELAY, MESON_SAR_ADC_DELAY_KERNEL_BUSY, 0); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&priv->lock); } static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev) @@ -1250,6 +1253,8 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (ret) goto err; + mutex_init(&priv->lock); + ret = meson_sar_adc_hw_enable(indio_dev); if (ret) goto err; From bb690935df8dd8f97612b422c669bd1a5fe87096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:00 +0200 Subject: [PATCH 2254/4122] iio: adc: rockchip_saradc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221004134909.1692021-8-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rockchip_saradc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index b87ea7148b58..79448c5ffc2a 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -49,6 +50,8 @@ struct rockchip_saradc { struct clk *clk; struct completion completion; struct regulator *vref; + /* lock to protect against multiple access to the device */ + struct mutex lock; int uv_vref; struct reset_control *reset; const struct rockchip_saradc_data *data; @@ -94,17 +97,17 @@ static int rockchip_saradc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - mutex_lock(&indio_dev->mlock); + mutex_lock(&info->lock); ret = rockchip_saradc_conversion(info, chan); if (ret) { rockchip_saradc_power_down(info); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return ret; } *val = info->last_val; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = info->uv_vref / 1000; @@ -270,7 +273,7 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p) int ret; int i, j = 0; - mutex_lock(&i_dev->mlock); + mutex_lock(&info->lock); for_each_set_bit(i, i_dev->active_scan_mask, i_dev->masklength) { const struct iio_chan_spec *chan = &i_dev->channels[i]; @@ -287,7 +290,7 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(i_dev, &data, iio_get_time_ns(i_dev)); out: - mutex_unlock(&i_dev->mlock); + mutex_unlock(&info->lock); iio_trigger_notify_done(i_dev->trig); @@ -478,6 +481,8 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (ret) return ret; + mutex_init(&info->lock); + return devm_iio_device_register(&pdev->dev, indio_dev); } From 8433aa3591afff6f4dc641a2e274e6171f66a4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:01 +0200 Subject: [PATCH 2255/4122] iio: adc: sc27xx_adc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-9-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/sc27xx_adc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/sc27xx_adc.c b/drivers/iio/adc/sc27xx_adc.c index f8421cbba8fa..ff1fc329bb9b 100644 --- a/drivers/iio/adc/sc27xx_adc.c +++ b/drivers/iio/adc/sc27xx_adc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,8 @@ struct sc27xx_adc_data { struct device *dev; struct regulator *volref; struct regmap *regmap; + /* lock to protect against multiple access to the device */ + struct mutex lock; /* * One hardware spinlock to synchronize between the multiple * subsystems which will access the unique ADC controller. @@ -664,9 +667,9 @@ static int sc27xx_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - mutex_lock(&indio_dev->mlock); + mutex_lock(&data->lock); ret = sc27xx_adc_read(data, chan->channel, scale, &tmp); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&data->lock); if (ret) return ret; @@ -675,10 +678,10 @@ static int sc27xx_adc_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_PROCESSED: - mutex_lock(&indio_dev->mlock); + mutex_lock(&data->lock); ret = sc27xx_adc_read_processed(data, chan->channel, scale, &tmp); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&data->lock); if (ret) return ret; @@ -934,6 +937,9 @@ static int sc27xx_adc_probe(struct platform_device *pdev) indio_dev->info = &sc27xx_info; indio_dev->channels = sc27xx_channels; indio_dev->num_channels = ARRAY_SIZE(sc27xx_channels); + + mutex_init(&sc27xx_data->lock); + ret = devm_iio_device_register(dev, indio_dev); if (ret) dev_err(dev, "could not register iio (ADC)"); From f2bdea865e776b571557035eac2e8afde8ea7844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:02 +0200 Subject: [PATCH 2256/4122] iio: adc: vf610_adc: add helper function to read samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a precursor change to make it simpler to remove the 'mlock' usage. Having the code in it's own helper function, also makes it easier to read the error paths. Signed-off-by: Nuno Sá Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20221004134909.1692021-10-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/vf610_adc.c | 94 +++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index c6b16cf6e367..a6f9182d7766 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -622,6 +622,58 @@ static const struct attribute_group vf610_attribute_group = { .attrs = vf610_attributes, }; +static int vf610_read_sample(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val) +{ + struct vf610_adc *info = iio_priv(indio_dev); + unsigned int hc_cfg; + int ret; + + mutex_lock(&indio_dev->mlock); + if (iio_buffer_enabled(indio_dev)) { + ret = -EBUSY; + goto out_unlock; + } + + reinit_completion(&info->completion); + hc_cfg = VF610_ADC_ADCHC(chan->channel); + hc_cfg |= VF610_ADC_AIEN; + writel(hc_cfg, info->regs + VF610_REG_ADC_HC0); + ret = wait_for_completion_interruptible_timeout(&info->completion, + VF610_ADC_TIMEOUT); + if (ret == 0) { + ret = -ETIMEDOUT; + goto out_unlock; + } + + if (ret < 0) + goto out_unlock; + + switch (chan->type) { + case IIO_VOLTAGE: + *val = info->value; + break; + case IIO_TEMP: + /* + * Calculate in degree Celsius times 1000 + * Using the typical sensor slope of 1.84 mV/°C + * and VREFH_ADC at 3.3V, V at 25°C of 699 mV + */ + *val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) * + 1000000 / VF610_TEMP_SLOPE_COEFF; + + break; + default: + ret = -EINVAL; + break; + } + +out_unlock: + mutex_unlock(&indio_dev->mlock); + + return ret; +} + static int vf610_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, @@ -629,53 +681,15 @@ static int vf610_read_raw(struct iio_dev *indio_dev, long mask) { struct vf610_adc *info = iio_priv(indio_dev); - unsigned int hc_cfg; long ret; switch (mask) { case IIO_CHAN_INFO_RAW: case IIO_CHAN_INFO_PROCESSED: - mutex_lock(&indio_dev->mlock); - if (iio_buffer_enabled(indio_dev)) { - mutex_unlock(&indio_dev->mlock); - return -EBUSY; - } - - reinit_completion(&info->completion); - hc_cfg = VF610_ADC_ADCHC(chan->channel); - hc_cfg |= VF610_ADC_AIEN; - writel(hc_cfg, info->regs + VF610_REG_ADC_HC0); - ret = wait_for_completion_interruptible_timeout - (&info->completion, VF610_ADC_TIMEOUT); - if (ret == 0) { - mutex_unlock(&indio_dev->mlock); - return -ETIMEDOUT; - } - if (ret < 0) { - mutex_unlock(&indio_dev->mlock); + ret = vf610_read_sample(indio_dev, chan, val); + if (ret < 0) return ret; - } - switch (chan->type) { - case IIO_VOLTAGE: - *val = info->value; - break; - case IIO_TEMP: - /* - * Calculate in degree Celsius times 1000 - * Using the typical sensor slope of 1.84 mV/°C - * and VREFH_ADC at 3.3V, V at 25°C of 699 mV - */ - *val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) * - 1000000 / VF610_TEMP_SLOPE_COEFF; - - break; - default: - mutex_unlock(&indio_dev->mlock); - return -EINVAL; - } - - mutex_unlock(&indio_dev->mlock); return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: From 4e15cad8dbf9991d430c31166040575bf972b179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:03 +0200 Subject: [PATCH 2257/4122] iio: adc: vf610_adc: vf610_adc: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to drop the internal lock usage we needed two different things: 1) The first place where 'mlock' was being used was a typical case where iio_device_claim_direct_mode() fits perfectly. 2) In the second case, it was being used to prevent concurrent accesses to the device and shared data but nothing was being enforced with regards to buffering (i.e, there was nothing preventing from changing the conversion mode while buffering). Hence, in this case, a new lock was introduced in the state structure. Note that the goal is not to introduce any functional change and that is the reason why a new lock was introduced to guarantee 2). While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20221004134909.1692021-11-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/vf610_adc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index a6f9182d7766..ae31aafd2653 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -156,6 +157,9 @@ struct vf610_adc { void __iomem *regs; struct clk *clk; + /* lock to protect against multiple access to the device */ + struct mutex lock; + u32 vref_uv; u32 value; struct regulator *vref; @@ -467,11 +471,11 @@ static int vf610_set_conversion_mode(struct iio_dev *indio_dev, { struct vf610_adc *info = iio_priv(indio_dev); - mutex_lock(&indio_dev->mlock); + mutex_lock(&info->lock); info->adc_feature.conv_mode = mode; vf610_adc_calculate_rates(info); vf610_adc_hw_init(info); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); return 0; } @@ -629,12 +633,11 @@ static int vf610_read_sample(struct iio_dev *indio_dev, unsigned int hc_cfg; int ret; - mutex_lock(&indio_dev->mlock); - if (iio_buffer_enabled(indio_dev)) { - ret = -EBUSY; - goto out_unlock; - } + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + mutex_lock(&info->lock); reinit_completion(&info->completion); hc_cfg = VF610_ADC_ADCHC(chan->channel); hc_cfg |= VF610_ADC_AIEN; @@ -669,7 +672,8 @@ static int vf610_read_sample(struct iio_dev *indio_dev, } out_unlock: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&info->lock); + iio_device_release_direct_mode(indio_dev); return ret; } @@ -892,6 +896,8 @@ static int vf610_adc_probe(struct platform_device *pdev) goto error_iio_device_register; } + mutex_init(&info->lock); + ret = iio_device_register(indio_dev); if (ret) { dev_err(&pdev->dev, "Couldn't register the device.\n"); From d711a5a7eff4855deb0e2c2663e679dc205e2d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:04 +0200 Subject: [PATCH 2258/4122] iio: common: scmi_iio: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-12-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/common/scmi_sensors/scmi_iio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iio/common/scmi_sensors/scmi_iio.c b/drivers/iio/common/scmi_sensors/scmi_iio.c index 54ccf19ab2bb..d92f7f651f7b 100644 --- a/drivers/iio/common/scmi_sensors/scmi_iio.c +++ b/drivers/iio/common/scmi_sensors/scmi_iio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,8 @@ struct scmi_iio_priv { struct scmi_protocol_handle *ph; const struct scmi_sensor_info *sensor_info; struct iio_dev *indio_dev; + /* lock to protect against multiple access to the device */ + struct mutex lock; /* adding one additional channel for timestamp */ s64 iio_buf[SCMI_IIO_NUM_OF_AXIS + 1]; struct notifier_block sensor_update_nb; @@ -198,13 +201,14 @@ static int scmi_iio_write_raw(struct iio_dev *iio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { + struct scmi_iio_priv *sensor = iio_priv(iio_dev); int err; switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: - mutex_lock(&iio_dev->mlock); + mutex_lock(&sensor->lock); err = scmi_iio_set_odr_val(iio_dev, val, val2); - mutex_unlock(&iio_dev->mlock); + mutex_unlock(&sensor->lock); return err; default: return -EINVAL; @@ -586,6 +590,7 @@ scmi_alloc_iiodev(struct scmi_device *sdev, sensor->sensor_info = sensor_info; sensor->sensor_update_nb.notifier_call = scmi_iio_sensor_update_cb; sensor->indio_dev = iiodev; + mutex_init(&sensor->lock); /* adding one additional channel for timestamp */ iiodev->num_channels = sensor_info->num_axis + 1; From 3cc36cabc669fffa7f04931e3dd25dc47314ec06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 4 Oct 2022 15:49:05 +0200 Subject: [PATCH 2259/4122] iio: gyro: itg3200_core: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iio_device lock is only meant for internal use. Hence define a device local lock to protect against concurrent accesses. While at it, properly include "mutex.h" for mutex related APIs. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221004134909.1692021-13-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/itg3200_core.c | 10 +++++++--- include/linux/iio/gyro/itg3200.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/itg3200_core.c b/drivers/iio/gyro/itg3200_core.c index 421501584587..74ca22468496 100644 --- a/drivers/iio/gyro/itg3200_core.c +++ b/drivers/iio/gyro/itg3200_core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -131,6 +132,7 @@ static int itg3200_write_raw(struct iio_dev *indio_dev, int val2, long mask) { + struct itg3200 *st = iio_priv(indio_dev); int ret; u8 t; @@ -139,11 +141,11 @@ static int itg3200_write_raw(struct iio_dev *indio_dev, if (val == 0 || val2 != 0) return -EINVAL; - mutex_lock(&indio_dev->mlock); + mutex_lock(&st->lock); ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_DLPF, &t); if (ret) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->lock); return ret; } t = ((t & ITG3200_DLPF_CFG_MASK) ? 1000u : 8000u) / val - 1; @@ -152,7 +154,7 @@ static int itg3200_write_raw(struct iio_dev *indio_dev, ITG3200_REG_SAMPLE_RATE_DIV, t); - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&st->lock); return ret; default: @@ -336,6 +338,8 @@ static int itg3200_probe(struct i2c_client *client, if (ret) goto error_remove_trigger; + mutex_init(&st->lock); + ret = iio_device_register(indio_dev); if (ret) goto error_remove_trigger; diff --git a/include/linux/iio/gyro/itg3200.h b/include/linux/iio/gyro/itg3200.h index a602fe7b84fa..74b6d1cadc86 100644 --- a/include/linux/iio/gyro/itg3200.h +++ b/include/linux/iio/gyro/itg3200.h @@ -102,6 +102,8 @@ struct itg3200 { struct i2c_client *i2c; struct iio_trigger *trig; struct iio_mount_matrix orientation; + /* lock to protect against multiple access to the device */ + struct mutex lock; }; enum ITG3200_SCAN_INDEX { From 4b0c44bdb72e99f17b600ba5ba9acc81cf67e335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 5 Oct 2022 10:50:43 +0200 Subject: [PATCH 2260/4122] dt-bindings: iio: dac: change ad5766 maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the maintainer email no longer exists, change it to myself. Signed-off-by: Nuno Sá Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221005085044.204701-1-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml index 29bd16dab546..3c8784a54d2c 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Analog Devices AD5766 DAC device driver maintainers: - - Cristian Pop + - Nuno Sá description: | Bindings for the Analog Devices AD5766 current DAC device. Datasheet can be From 8add74e75ea2a2356d5b4579cacbaef7f4828e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 5 Oct 2022 10:50:44 +0200 Subject: [PATCH 2261/4122] dt-bindings: iio: frequency: change admv4420 maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the maintainer email no longer exists, change it to myself. Signed-off-by: Nuno Sá Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221005085044.204701-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/frequency/adi,admv4420.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml index da7fe85ec92e..071cda64865d 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ADMV4420 K Band Downconverter maintainers: - - Cristian Pop + - Nuno Sá description: The ADMV4420 is a highly integrated, double balanced, active From d26b79732d27ff45c83109e2484b96dcac223355 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:35 +0200 Subject: [PATCH 2262/4122] dt-bindings: iio: addac: adi,ad74413r: use spi-peripheral-props.yaml Reference the spi-peripheral-props.yaml schema to allow using all properties typical for SPI-connected devices, even these which device bindings author did not tried yet. While changing additionalProperties->unevaluatedProperties, put it in typical place, just before example DTS. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/addac/adi,ad74413r.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml index 03bb90a7f4f8..2e8dad9278f2 100644 --- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml @@ -58,8 +58,6 @@ required: - spi-cpol - refin-supply -additionalProperties: false - patternProperties: "^channel@[0-3]$": type: object @@ -103,6 +101,11 @@ patternProperties: required: - reg +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + examples: - | #include From 1b96d663d97da5205526d7020dad5258b88baee2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:36 +0200 Subject: [PATCH 2263/4122] dt-bindings: iio: addac: adi,ad74413r: improve example Improve example by: dropping unrelated properties (status and cs-gpios) and using generic node name. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/addac/adi,ad74413r.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml index 2e8dad9278f2..58b3ae14ccaa 100644 --- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml @@ -116,10 +116,7 @@ examples: #address-cells = <1>; #size-cells = <0>; - cs-gpios = <&gpio 17 GPIO_ACTIVE_LOW>; - status = "okay"; - - ad74413r@0 { + addac@0 { compatible = "adi,ad74413r"; reg = <0>; spi-max-frequency = <1000000>; From 00407a680e86db4d2fee43250849b7fa205507d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:37 +0200 Subject: [PATCH 2264/4122] dt-bindings: iio: frequency: use spi-peripheral-props.yaml For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. While changing additionalProperties->unevaluatedProperties, put it in typical place, just before example DTS. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-3-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/frequency/adi,adf4350.yaml | 7 +++++-- .../devicetree/bindings/iio/frequency/adi,admv1013.yaml | 5 ++++- .../devicetree/bindings/iio/frequency/adi,admv1014.yaml | 5 ++++- .../devicetree/bindings/iio/frequency/adi,admv4420.yaml | 6 +++++- .../devicetree/bindings/iio/frequency/adi,adrf6780.yaml | 5 ++++- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml index d7f20b8518e0..43cbf27114c7 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml @@ -160,13 +160,16 @@ properties: 2: +2dBm 3: +5dBm -additionalProperties: false - required: - compatible - reg - clocks +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + examples: - | spi { diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml index 23f1f3b55abb..fc813bcb6532 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml @@ -70,7 +70,10 @@ required: - clock-names - vcm-supply -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml index 2716c1e8fe31..ab86daa2c56e 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml @@ -104,7 +104,10 @@ required: - clock-names - vcm-supply -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml index 071cda64865d..64f2352aac3d 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml @@ -37,7 +37,11 @@ required: - compatible - reg -additionalProperties: false + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml index 3a8ea93f4e0c..f11391ab4b62 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml @@ -113,7 +113,10 @@ required: - clocks - clock-names -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | From d29c7f8ce90d549727d7a07b6096615f26b38e85 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:38 +0200 Subject: [PATCH 2265/4122] dt-bindings: iio: gyroscope: use spi-peripheral-props.yaml For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-4-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml | 5 ++++- .../devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml b/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml index 662ec59ca0af..0ae2464b9bc4 100644 --- a/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml +++ b/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml @@ -38,7 +38,10 @@ required: - spi-cpol - spi-cpha -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml b/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml index 3f57a1b813e6..2c900e9dddc6 100644 --- a/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml +++ b/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml @@ -56,7 +56,10 @@ required: - compatible - reg -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | From acce6052cdf9f9fc03f2668f63a1d9e42dc8a424 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:39 +0200 Subject: [PATCH 2266/4122] dt-bindings: iio: imu: adi,adis16475: use spi-peripheral-props.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221004115642.63749-5-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml b/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml index a7574210175a..5dbfae80bb28 100644 --- a/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml +++ b/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml @@ -79,6 +79,7 @@ required: - spi-cpol allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# - if: properties: compatible: @@ -107,7 +108,7 @@ allOf: dependencies: adi,sync-mode: [ clocks ] -additionalProperties: false +unevaluatedProperties: false examples: - | From 87748b25c65a0fda5f605b7cf07cf9b79b257308 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:40 +0200 Subject: [PATCH 2267/4122] dt-bindings: iio: pressure: use spi-peripheral-props.yaml For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-6-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/pressure/meas,ms5611.yaml | 5 ++++- .../devicetree/bindings/iio/pressure/murata,zpa2326.yaml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml index 4f06707450bf..7fed750fa3ff 100644 --- a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml +++ b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml @@ -30,7 +30,10 @@ required: - compatible - reg -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml b/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml index d6103be03460..c33640ddde58 100644 --- a/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml +++ b/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml @@ -33,7 +33,10 @@ required: - compatible - reg -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | From e91d40216890874791ee43ef40cc883ba412a84f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:41 +0200 Subject: [PATCH 2268/4122] dt-bindings: iio: proximity: ams,as3935: use spi-peripheral-props.yaml For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-7-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/proximity/ams,as3935.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml b/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml index 7fcba5d6d508..710d3b9a86d9 100644 --- a/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml @@ -49,7 +49,10 @@ required: - spi-cpha - interrupts -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | From 85250a2400ad43542d02b46b6e269343ea835331 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Oct 2022 13:56:42 +0200 Subject: [PATCH 2269/4122] dt-bindings: iio: resolver: adi,ad2s90: use spi-peripheral-props.yaml For devices connectable by SPI bus (e.g. already using "spi-max-frequency" property), reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties, even these which device bindings author did not tried yet. While changing additionalProperties->unevaluatedProperties, put it in typical place, just before example DTS. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221004115642.63749-8-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/resolver/adi,ad2s90.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml index 81e4bdfc17c4..b24e5a202a48 100644 --- a/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml +++ b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml @@ -33,8 +33,6 @@ properties: spi-cpha: true -additionalProperties: false - required: - compatible - reg @@ -43,6 +41,11 @@ dependencies: spi-cpol: [ spi-cpha ] spi-cpha: [ spi-cpol ] +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + examples: - | spi { From a44ef7c4609724e2f395d8a24d4a863cd860bbba Mon Sep 17 00:00:00 2001 From: Ibrahim Tilki Date: Mon, 3 Oct 2022 13:59:01 +0300 Subject: [PATCH 2270/4122] iio: adc: add max11410 adc driver Adding support for max11410 24-bit, 1.9ksps delta-sigma adc which has 3 differential reference and 10 differential channel inputs. Inputs and references can be buffered internally. Inputs can also be amplified with internal PGA. Device has four digital filter modes: FIR50/60, FIR50, FIR60 and SINC4. FIR 50Hz and 60Hz rejections can be enabled/disabled separately. Digital filter selection affects sampling frequency range so driver has to consider the configured filter when configuring sampling frequency. Signed-off-by: Ibrahim Tilki Link: https://lore.kernel.org/r/20221003105903.229-2-Ibrahim.Tilki@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 13 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/max11410.c | 1050 ++++++++++++++++++++++++++++++++++++ 3 files changed, 1064 insertions(+) create mode 100644 drivers/iio/adc/max11410.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 791612ca6012..544986fd456d 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -667,6 +667,19 @@ config MAX11205 To compile this driver as a module, choose M here: the module will be called max11205. +config MAX11410 + tristate "Analog Devices MAX11410 ADC driver" + depends on SPI + select REGMAP_SPI + select IIO_BUFFER + select IIO_TRIGGER + select IIO_TRIGGERED_BUFFER + help + Say yes here to build support for Analog Devices MAX11410 ADCs. + + To compile this driver as a module, choose M here: the module will be + called max11410. + config MAX1241 tristate "Maxim max1241 ADC driver" depends on SPI_MASTER diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 46caba7a010c..4d803413425c 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_MAX1027) += max1027.o obj-$(CONFIG_MAX11100) += max11100.o obj-$(CONFIG_MAX1118) += max1118.o obj-$(CONFIG_MAX11205) += max11205.o +obj-$(CONFIG_MAX11410) += max11410.o obj-$(CONFIG_MAX1241) += max1241.o obj-$(CONFIG_MAX1363) += max1363.o obj-$(CONFIG_MAX9611) += max9611.o diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c new file mode 100644 index 000000000000..8cd566367187 --- /dev/null +++ b/drivers/iio/adc/max11410.c @@ -0,0 +1,1050 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MAX11410 SPI ADC driver + * + * Copyright 2022 Analog Devices Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define MAX11410_REG_CONV_START 0x01 +#define MAX11410_CONV_TYPE_SINGLE 0x00 +#define MAX11410_CONV_TYPE_CONTINUOUS 0x01 +#define MAX11410_REG_CAL_START 0x03 +#define MAX11410_CAL_START_SELF 0x00 +#define MAX11410_CAL_START_PGA 0x01 +#define MAX11410_REG_GPIO_CTRL(ch) ((ch) ? 0x05 : 0x04) +#define MAX11410_GPIO_INTRB 0xC1 +#define MAX11410_REG_FILTER 0x08 +#define MAX11410_FILTER_RATE_MASK GENMASK(3, 0) +#define MAX11410_FILTER_RATE_MAX 0x0F +#define MAX11410_FILTER_LINEF_MASK GENMASK(5, 4) +#define MAX11410_FILTER_50HZ BIT(5) +#define MAX11410_FILTER_60HZ BIT(4) +#define MAX11410_REG_CTRL 0x09 +#define MAX11410_CTRL_REFSEL_MASK GENMASK(2, 0) +#define MAX11410_CTRL_VREFN_BUF_BIT BIT(3) +#define MAX11410_CTRL_VREFP_BUF_BIT BIT(4) +#define MAX11410_CTRL_FORMAT_BIT BIT(5) +#define MAX11410_CTRL_UNIPOLAR_BIT BIT(6) +#define MAX11410_REG_MUX_CTRL0 0x0B +#define MAX11410_REG_PGA 0x0E +#define MAX11410_PGA_GAIN_MASK GENMASK(2, 0) +#define MAX11410_PGA_SIG_PATH_MASK GENMASK(5, 4) +#define MAX11410_PGA_SIG_PATH_BUFFERED 0x00 +#define MAX11410_PGA_SIG_PATH_BYPASS 0x01 +#define MAX11410_PGA_SIG_PATH_PGA 0x02 +#define MAX11410_REG_DATA0 0x30 +#define MAX11410_REG_STATUS 0x38 +#define MAX11410_STATUS_CONV_READY_BIT BIT(0) +#define MAX11410_STATUS_CAL_READY_BIT BIT(2) + +#define MAX11410_REFSEL_AVDD_AGND 0x03 +#define MAX11410_REFSEL_MAX 0x06 +#define MAX11410_SIG_PATH_MAX 0x02 +#define MAX11410_CHANNEL_INDEX_MAX 0x0A +#define MAX11410_AINP_AVDD 0x0A +#define MAX11410_AINN_GND 0x0A + +#define MAX11410_CONVERSION_TIMEOUT_MS 2000 +#define MAX11410_CALIB_TIMEOUT_MS 2000 + +#define MAX11410_SCALE_AVAIL_SIZE 8 + +enum max11410_filter { + MAX11410_FILTER_FIR5060, + MAX11410_FILTER_FIR50, + MAX11410_FILTER_FIR60, + MAX11410_FILTER_SINC4, +}; + +static const u8 max11410_sampling_len[] = { + [MAX11410_FILTER_FIR5060] = 5, + [MAX11410_FILTER_FIR50] = 6, + [MAX11410_FILTER_FIR60] = 6, + [MAX11410_FILTER_SINC4] = 10, +}; + +static const int max11410_sampling_rates[4][10][2] = { + [MAX11410_FILTER_FIR5060] = { + { 1, 100000 }, + { 2, 100000 }, + { 4, 200000 }, + { 8, 400000 }, + { 16, 800000 } + }, + [MAX11410_FILTER_FIR50] = { + { 1, 300000 }, + { 2, 700000 }, + { 5, 300000 }, + { 10, 700000 }, + { 21, 300000 }, + { 40 } + }, + [MAX11410_FILTER_FIR60] = { + { 1, 300000 }, + { 2, 700000 }, + { 5, 300000 }, + { 10, 700000 }, + { 21, 300000 }, + { 40 } + }, + [MAX11410_FILTER_SINC4] = { + { 4 }, + { 10 }, + { 20 }, + { 40 }, + { 60 }, + { 120 }, + { 240 }, + { 480 }, + { 960 }, + { 1920 } + } +}; + +struct max11410_channel_config { + u32 settling_time_us; + u32 *scale_avail; + u8 refsel; + u8 sig_path; + u8 gain; + bool bipolar; + bool buffered_vrefp; + bool buffered_vrefn; +}; + +struct max11410_state { + struct spi_device *spi_dev; + struct iio_trigger *trig; + struct completion completion; + struct mutex lock; /* Prevent changing channel config during sampling */ + struct regmap *regmap; + struct regulator *avdd; + struct regulator *vrefp[3]; + struct regulator *vrefn[3]; + struct max11410_channel_config *channels; + int irq; + struct { + u32 data __aligned(IIO_DMA_MINALIGN); + s64 ts __aligned(8); + } scan; +}; + +static const struct iio_chan_spec chanspec_template = { + .type = IIO_VOLTAGE, + .indexed = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_OFFSET), + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .scan_type = { + .sign = 's', + .realbits = 24, + .storagebits = 32, + .endianness = IIO_LE, + }, +}; + +static unsigned int max11410_reg_size(unsigned int reg) +{ + /* Registers from 0x00 to 0x10 are 1 byte, the rest are 3 bytes long. */ + return reg <= 0x10 ? 1 : 3; +} + +static int max11410_write_reg(struct max11410_state *st, unsigned int reg, + unsigned int val) +{ + /* This driver only needs to write 8-bit registers */ + if (max11410_reg_size(reg) != 1) + return -EINVAL; + + return regmap_write(st->regmap, reg, val); +} + +static int max11410_read_reg(struct max11410_state *st, unsigned int reg, + int *val) +{ + int ret; + + if (max11410_reg_size(reg) == 3) { + ret = regmap_bulk_read(st->regmap, reg, &st->scan.data, 3); + if (ret) + return ret; + + *val = get_unaligned_be24(&st->scan.data); + return 0; + } + + return regmap_read(st->regmap, reg, val); +} + +static struct regulator *max11410_get_vrefp(struct max11410_state *st, + u8 refsel) +{ + refsel = refsel % 4; + if (refsel == 3) + return st->avdd; + + return st->vrefp[refsel]; +} + +static struct regulator *max11410_get_vrefn(struct max11410_state *st, + u8 refsel) +{ + if (refsel > 2) + return NULL; + + return st->vrefn[refsel]; +} + +static const struct regmap_config regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x39, +}; + +static ssize_t max11410_notch_en_show(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct max11410_state *state = iio_priv(indio_dev); + struct iio_dev_attr *iio_attr = to_iio_dev_attr(devattr); + unsigned int val; + int ret; + + ret = max11410_read_reg(state, MAX11410_REG_FILTER, &val); + if (ret) + return ret; + + switch (iio_attr->address) { + case 0: + val = !FIELD_GET(MAX11410_FILTER_50HZ, val); + break; + case 1: + val = !FIELD_GET(MAX11410_FILTER_60HZ, val); + break; + case 2: + val = FIELD_GET(MAX11410_FILTER_LINEF_MASK, val) == 3; + break; + default: + return -EINVAL; + } + + return sysfs_emit(buf, "%d\n", val); +} + +static ssize_t max11410_notch_en_store(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct iio_dev_attr *iio_attr = to_iio_dev_attr(devattr); + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct max11410_state *state = iio_priv(indio_dev); + unsigned int filter_bits; + bool enable; + int ret; + + ret = kstrtobool(buf, &enable); + if (ret) + return ret; + + switch (iio_attr->address) { + case 0: + filter_bits = MAX11410_FILTER_50HZ; + break; + case 1: + filter_bits = MAX11410_FILTER_60HZ; + break; + case 2: + default: + filter_bits = MAX11410_FILTER_50HZ | MAX11410_FILTER_60HZ; + enable = !enable; + break; + } + + if (enable) + ret = regmap_clear_bits(state->regmap, MAX11410_REG_FILTER, + filter_bits); + else + ret = regmap_set_bits(state->regmap, MAX11410_REG_FILTER, + filter_bits); + + if (ret) + return ret; + + return count; +} + +static ssize_t in_voltage_filter2_notch_center_show(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct max11410_state *state = iio_priv(indio_dev); + int ret, reg, rate, filter; + + ret = regmap_read(state->regmap, MAX11410_REG_FILTER, ®); + if (ret) + return ret; + + rate = FIELD_GET(MAX11410_FILTER_RATE_MASK, reg); + rate = clamp_val(rate, 0, + max11410_sampling_len[MAX11410_FILTER_SINC4] - 1); + filter = max11410_sampling_rates[MAX11410_FILTER_SINC4][rate][0]; + + return sysfs_emit(buf, "%d\n", filter); +} + +static IIO_CONST_ATTR(in_voltage_filter0_notch_center, "50"); +static IIO_CONST_ATTR(in_voltage_filter1_notch_center, "60"); +static IIO_DEVICE_ATTR_RO(in_voltage_filter2_notch_center, 2); + +static IIO_DEVICE_ATTR(in_voltage_filter0_notch_en, 0644, + max11410_notch_en_show, max11410_notch_en_store, 0); +static IIO_DEVICE_ATTR(in_voltage_filter1_notch_en, 0644, + max11410_notch_en_show, max11410_notch_en_store, 1); +static IIO_DEVICE_ATTR(in_voltage_filter2_notch_en, 0644, + max11410_notch_en_show, max11410_notch_en_store, 2); + +static struct attribute *max11410_attributes[] = { + &iio_const_attr_in_voltage_filter0_notch_center.dev_attr.attr, + &iio_const_attr_in_voltage_filter1_notch_center.dev_attr.attr, + &iio_dev_attr_in_voltage_filter2_notch_center.dev_attr.attr, + &iio_dev_attr_in_voltage_filter0_notch_en.dev_attr.attr, + &iio_dev_attr_in_voltage_filter1_notch_en.dev_attr.attr, + &iio_dev_attr_in_voltage_filter2_notch_en.dev_attr.attr, + NULL +}; + +static const struct attribute_group max11410_attribute_group = { + .attrs = max11410_attributes, +}; + +static int max11410_set_input_mux(struct max11410_state *st, u8 ainp, u8 ainn) +{ + if (ainp > MAX11410_CHANNEL_INDEX_MAX || + ainn > MAX11410_CHANNEL_INDEX_MAX) + return -EINVAL; + + return max11410_write_reg(st, MAX11410_REG_MUX_CTRL0, + (ainp << 4) | ainn); +} + +static int max11410_configure_channel(struct max11410_state *st, + struct iio_chan_spec const *chan) +{ + struct max11410_channel_config cfg = st->channels[chan->address]; + unsigned int regval; + int ret; + + if (chan->differential) + ret = max11410_set_input_mux(st, chan->channel, chan->channel2); + else + ret = max11410_set_input_mux(st, chan->channel, + MAX11410_AINN_GND); + + if (ret) + return ret; + + regval = FIELD_PREP(MAX11410_CTRL_VREFP_BUF_BIT, cfg.buffered_vrefp) | + FIELD_PREP(MAX11410_CTRL_VREFN_BUF_BIT, cfg.buffered_vrefn) | + FIELD_PREP(MAX11410_CTRL_REFSEL_MASK, cfg.refsel) | + FIELD_PREP(MAX11410_CTRL_UNIPOLAR_BIT, cfg.bipolar ? 0 : 1); + ret = regmap_update_bits(st->regmap, MAX11410_REG_CTRL, + MAX11410_CTRL_REFSEL_MASK | + MAX11410_CTRL_VREFN_BUF_BIT | + MAX11410_CTRL_VREFN_BUF_BIT | + MAX11410_CTRL_UNIPOLAR_BIT, regval); + if (ret) + return ret; + + regval = FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK, cfg.sig_path) | + FIELD_PREP(MAX11410_PGA_GAIN_MASK, cfg.gain); + ret = regmap_write(st->regmap, MAX11410_REG_PGA, regval); + if (ret) + return ret; + + if (cfg.settling_time_us) + fsleep(cfg.settling_time_us); + + return 0; +} + +static int max11410_sample(struct max11410_state *st, int *sample_raw, + struct iio_chan_spec const *chan) +{ + int val, ret; + + ret = max11410_configure_channel(st, chan); + if (ret) + return ret; + + if (st->irq > 0) + reinit_completion(&st->completion); + + /* Start Conversion */ + ret = max11410_write_reg(st, MAX11410_REG_CONV_START, + MAX11410_CONV_TYPE_SINGLE); + if (ret) + return ret; + + if (st->irq > 0) { + /* Wait for an interrupt. */ + ret = wait_for_completion_timeout(&st->completion, + msecs_to_jiffies(MAX11410_CONVERSION_TIMEOUT_MS)); + if (!ret) + return -ETIMEDOUT; + } else { + /* Wait for status register Conversion Ready flag */ + ret = read_poll_timeout(max11410_read_reg, ret, + ret || (val & MAX11410_STATUS_CONV_READY_BIT), + 5000, MAX11410_CONVERSION_TIMEOUT_MS * 1000, + true, st, MAX11410_REG_STATUS, &val); + if (ret) + return ret; + } + + /* Read ADC Data */ + return max11410_read_reg(st, MAX11410_REG_DATA0, sample_raw); +} + +static int max11410_get_scale(struct max11410_state *state, + struct max11410_channel_config cfg) +{ + struct regulator *vrefp, *vrefn; + int scale; + + vrefp = max11410_get_vrefp(state, cfg.refsel); + + scale = regulator_get_voltage(vrefp) / 1000; + vrefn = max11410_get_vrefn(state, cfg.refsel); + if (vrefn) + scale -= regulator_get_voltage(vrefn) / 1000; + + if (cfg.bipolar) + scale *= 2; + + return scale >> cfg.gain; +} + +static int max11410_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long info) +{ + struct max11410_state *state = iio_priv(indio_dev); + struct max11410_channel_config cfg = state->channels[chan->address]; + int ret, reg_val, filter, rate; + + switch (info) { + case IIO_CHAN_INFO_SCALE: + *val = max11410_get_scale(state, cfg); + *val2 = chan->scan_type.realbits; + return IIO_VAL_FRACTIONAL_LOG2; + case IIO_CHAN_INFO_OFFSET: + if (cfg.bipolar) + *val = -BIT(chan->scan_type.realbits - 1); + else + *val = 0; + + return IIO_VAL_INT; + case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + mutex_lock(&state->lock); + + ret = max11410_sample(state, ®_val, chan); + + mutex_unlock(&state->lock); + + iio_device_release_direct_mode(indio_dev); + + if (ret) + return ret; + + *val = reg_val; + + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + ret = regmap_read(state->regmap, MAX11410_REG_FILTER, ®_val); + if (ret) + return ret; + + filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val); + rate = reg_val & MAX11410_FILTER_RATE_MASK; + if (rate >= max11410_sampling_len[filter]) + rate = max11410_sampling_len[filter] - 1; + + *val = max11410_sampling_rates[filter][rate][0]; + *val2 = max11410_sampling_rates[filter][rate][1]; + + return IIO_VAL_INT_PLUS_MICRO; + } + return -EINVAL; +} + +static int max11410_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct max11410_state *st = iio_priv(indio_dev); + int i, ret, reg_val, filter, gain; + u32 *scale_avail; + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + scale_avail = st->channels[chan->address].scale_avail; + if (!scale_avail) + return -EOPNOTSUPP; + + /* Accept values in range 0.000001 <= scale < 1.000000 */ + if (val != 0 || val2 == 0) + return -EINVAL; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + /* Convert from INT_PLUS_MICRO to FRACTIONAL_LOG2 */ + val2 = val2 * DIV_ROUND_CLOSEST(BIT(24), 1000000); + val2 = DIV_ROUND_CLOSEST(scale_avail[0], val2); + gain = order_base_2(val2); + + st->channels[chan->address].gain = clamp_val(gain, 0, 7); + + iio_device_release_direct_mode(indio_dev); + + return 0; + case IIO_CHAN_INFO_SAMP_FREQ: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + mutex_lock(&st->lock); + + ret = regmap_read(st->regmap, MAX11410_REG_FILTER, ®_val); + if (ret) + goto out; + + filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val); + + for (i = 0; i < max11410_sampling_len[filter]; ++i) { + if (val == max11410_sampling_rates[filter][i][0] && + val2 == max11410_sampling_rates[filter][i][1]) + break; + } + if (i == max11410_sampling_len[filter]) { + ret = -EINVAL; + goto out; + } + + ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER, + MAX11410_FILTER_RATE_MASK, i); + +out: + mutex_unlock(&st->lock); + iio_device_release_direct_mode(indio_dev); + + return ret; + default: + return -EINVAL; + } +} + +static int max11410_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long info) +{ + struct max11410_state *st = iio_priv(indio_dev); + struct max11410_channel_config cfg; + int ret, reg_val, filter; + + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + ret = regmap_read(st->regmap, MAX11410_REG_FILTER, ®_val); + if (ret) + return ret; + + filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val); + + *vals = (const int *)max11410_sampling_rates[filter]; + *length = max11410_sampling_len[filter] * 2; + *type = IIO_VAL_INT_PLUS_MICRO; + + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SCALE: + cfg = st->channels[chan->address]; + + if (!cfg.scale_avail) + return -EINVAL; + + *vals = cfg.scale_avail; + *length = MAX11410_SCALE_AVAIL_SIZE * 2; + *type = IIO_VAL_FRACTIONAL_LOG2; + + return IIO_AVAIL_LIST; + } + return -EINVAL; +} + +static const struct iio_info max11410_info = { + .read_raw = max11410_read_raw, + .write_raw = max11410_write_raw, + .read_avail = max11410_read_avail, + .attrs = &max11410_attribute_group, +}; + +static irqreturn_t max11410_trigger_handler(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *indio_dev = pf->indio_dev; + struct max11410_state *st = iio_priv(indio_dev); + int ret; + + ret = max11410_read_reg(st, MAX11410_REG_DATA0, &st->scan.data); + if (ret) { + dev_err(&indio_dev->dev, "cannot read data\n"); + goto out; + } + + iio_push_to_buffers_with_timestamp(indio_dev, &st->scan, + iio_get_time_ns(indio_dev)); + +out: + iio_trigger_notify_done(indio_dev->trig); + + return IRQ_HANDLED; +} + +static int max11410_buffer_postenable(struct iio_dev *indio_dev) +{ + struct max11410_state *st = iio_priv(indio_dev); + int scan_ch, ret; + + scan_ch = ffs(*indio_dev->active_scan_mask) - 1; + + ret = max11410_configure_channel(st, &indio_dev->channels[scan_ch]); + if (ret) + return ret; + + /* Start continuous conversion. */ + return max11410_write_reg(st, MAX11410_REG_CONV_START, + MAX11410_CONV_TYPE_CONTINUOUS); +} + +static int max11410_buffer_predisable(struct iio_dev *indio_dev) +{ + struct max11410_state *st = iio_priv(indio_dev); + + /* Stop continuous conversion. */ + return max11410_write_reg(st, MAX11410_REG_CONV_START, + MAX11410_CONV_TYPE_SINGLE); +} + +static const struct iio_buffer_setup_ops max11410_buffer_ops = { + .postenable = &max11410_buffer_postenable, + .predisable = &max11410_buffer_predisable, + .validate_scan_mask = &iio_validate_scan_mask_onehot, +}; + +static const struct iio_trigger_ops max11410_trigger_ops = { + .validate_device = iio_trigger_validate_own_device, +}; + +static irqreturn_t max11410_interrupt(int irq, void *dev_id) +{ + struct iio_dev *indio_dev = dev_id; + struct max11410_state *st = iio_priv(indio_dev); + + if (iio_buffer_enabled(indio_dev)) + iio_trigger_poll_chained(st->trig); + else + complete(&st->completion); + + return IRQ_HANDLED; +}; + +static int max11410_parse_channels(struct max11410_state *st, + struct iio_dev *indio_dev) +{ + struct iio_chan_spec chanspec = chanspec_template; + struct device *dev = &st->spi_dev->dev; + struct max11410_channel_config *cfg; + struct iio_chan_spec *channels; + struct fwnode_handle *child; + u32 reference, sig_path; + const char *node_name; + u32 inputs[2], scale; + unsigned int num_ch; + int chan_idx = 0; + int ret, i; + + num_ch = device_get_child_node_count(dev); + if (num_ch == 0) + return dev_err_probe(&indio_dev->dev, -ENODEV, + "FW has no channels defined\n"); + + /* Reserve space for soft timestamp channel */ + num_ch++; + channels = devm_kcalloc(dev, num_ch, sizeof(*channels), GFP_KERNEL); + if (!channels) + return -ENOMEM; + + st->channels = devm_kcalloc(dev, num_ch, sizeof(*st->channels), + GFP_KERNEL); + if (!st->channels) + return -ENOMEM; + + device_for_each_child_node(dev, child) { + node_name = fwnode_get_name(child); + if (fwnode_property_present(child, "diff-channels")) { + ret = fwnode_property_read_u32_array(child, + "diff-channels", + inputs, + ARRAY_SIZE(inputs)); + + chanspec.differential = 1; + } else { + ret = fwnode_property_read_u32(child, "reg", &inputs[0]); + + inputs[1] = 0; + chanspec.differential = 0; + } + if (ret) { + fwnode_handle_put(child); + return ret; + } + + if (inputs[0] > MAX11410_CHANNEL_INDEX_MAX || + inputs[1] > MAX11410_CHANNEL_INDEX_MAX) { + fwnode_handle_put(child); + return dev_err_probe(&indio_dev->dev, -EINVAL, + "Invalid channel index for %s, should be less than %d\n", + node_name, + MAX11410_CHANNEL_INDEX_MAX + 1); + } + + cfg = &st->channels[chan_idx]; + + reference = MAX11410_REFSEL_AVDD_AGND; + fwnode_property_read_u32(child, "adi,reference", &reference); + if (reference > MAX11410_REFSEL_MAX) { + fwnode_handle_put(child); + return dev_err_probe(&indio_dev->dev, -EINVAL, + "Invalid adi,reference value for %s, should be less than %d.\n", + node_name, MAX11410_REFSEL_MAX + 1); + } + + if (!max11410_get_vrefp(st, reference) || + (!max11410_get_vrefn(st, reference) && reference <= 2)) { + fwnode_handle_put(child); + return dev_err_probe(&indio_dev->dev, -EINVAL, + "Invalid VREF configuration for %s, either specify corresponding VREF regulators or change adi,reference property.\n", + node_name); + } + + sig_path = MAX11410_PGA_SIG_PATH_BUFFERED; + fwnode_property_read_u32(child, "adi,input-mode", &sig_path); + if (sig_path > MAX11410_SIG_PATH_MAX) { + fwnode_handle_put(child); + return dev_err_probe(&indio_dev->dev, -EINVAL, + "Invalid adi,input-mode value for %s, should be less than %d.\n", + node_name, MAX11410_SIG_PATH_MAX + 1); + } + + fwnode_property_read_u32(child, "settling-time-us", + &cfg->settling_time_us); + cfg->bipolar = fwnode_property_read_bool(child, "bipolar"); + cfg->buffered_vrefp = fwnode_property_read_bool(child, "adi,buffered-vrefp"); + cfg->buffered_vrefn = fwnode_property_read_bool(child, "adi,buffered-vrefn"); + cfg->refsel = reference; + cfg->sig_path = sig_path; + cfg->gain = 0; + + /* Enable scale_available property if input mode is PGA */ + if (sig_path == MAX11410_PGA_SIG_PATH_PGA) { + __set_bit(IIO_CHAN_INFO_SCALE, + &chanspec.info_mask_separate_available); + cfg->scale_avail = devm_kcalloc(dev, MAX11410_SCALE_AVAIL_SIZE * 2, + sizeof(*cfg->scale_avail), + GFP_KERNEL); + if (!cfg->scale_avail) { + fwnode_handle_put(child); + return -ENOMEM; + } + + scale = max11410_get_scale(st, *cfg); + for (i = 0; i < MAX11410_SCALE_AVAIL_SIZE; i++) { + cfg->scale_avail[2 * i] = scale >> i; + cfg->scale_avail[2 * i + 1] = chanspec.scan_type.realbits; + } + } else { + __clear_bit(IIO_CHAN_INFO_SCALE, + &chanspec.info_mask_separate_available); + } + + chanspec.address = chan_idx; + chanspec.scan_index = chan_idx; + chanspec.channel = inputs[0]; + chanspec.channel2 = inputs[1]; + + channels[chan_idx] = chanspec; + chan_idx++; + } + + channels[chan_idx] = (struct iio_chan_spec)IIO_CHAN_SOFT_TIMESTAMP(chan_idx); + + indio_dev->num_channels = chan_idx + 1; + indio_dev->channels = channels; + + return 0; +} + +static void max11410_disable_reg(void *reg) +{ + regulator_disable(reg); +} + +static int max11410_init_vref(struct device *dev, + struct regulator **vref, + const char *id) +{ + struct regulator *reg; + int ret; + + reg = devm_regulator_get_optional(dev, id); + if (PTR_ERR(reg) == -ENODEV) { + *vref = NULL; + return 0; + } else if (IS_ERR(reg)) { + return PTR_ERR(reg); + } + ret = regulator_enable(reg); + if (ret) + return dev_err_probe(dev, ret, + "Failed to enable regulator %s\n", id); + + *vref = reg; + return devm_add_action_or_reset(dev, max11410_disable_reg, reg); +} + +static int max11410_calibrate(struct max11410_state *st, u32 cal_type) +{ + int ret, val; + + ret = max11410_write_reg(st, MAX11410_REG_CAL_START, cal_type); + if (ret) + return ret; + + /* Wait for status register Calibration Ready flag */ + return read_poll_timeout(max11410_read_reg, ret, + ret || (val & MAX11410_STATUS_CAL_READY_BIT), + 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true, + st, MAX11410_REG_STATUS, &val); +} + +static int max11410_self_calibrate(struct max11410_state *st) +{ + int ret, i; + + ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER, + MAX11410_FILTER_RATE_MASK, + FIELD_PREP(MAX11410_FILTER_RATE_MASK, + MAX11410_FILTER_RATE_MAX)); + if (ret) + return ret; + + ret = max11410_calibrate(st, MAX11410_CAL_START_SELF); + if (ret) + return ret; + + ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA, + MAX11410_PGA_SIG_PATH_MASK, + FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK, + MAX11410_PGA_SIG_PATH_PGA)); + if (ret) + return ret; + + /* PGA calibrations */ + for (i = 1; i < 8; ++i) { + ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA, + MAX11410_PGA_GAIN_MASK, i); + if (ret) + return ret; + + ret = max11410_calibrate(st, MAX11410_CAL_START_PGA); + if (ret) + return ret; + } + + /* Cleanup */ + ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA, + MAX11410_PGA_GAIN_MASK, 0); + if (ret) + return ret; + + ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER, + MAX11410_FILTER_RATE_MASK, 0); + if (ret) + return ret; + + return regmap_write_bits(st->regmap, MAX11410_REG_PGA, + MAX11410_PGA_SIG_PATH_MASK, + FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK, + MAX11410_PGA_SIG_PATH_BUFFERED)); +} + +static int max11410_probe(struct spi_device *spi) +{ + const char *vrefp_regs[] = { "vref0p", "vref1p", "vref2p" }; + const char *vrefn_regs[] = { "vref0n", "vref1n", "vref2n" }; + struct device *dev = &spi->dev; + struct max11410_state *st; + struct iio_dev *indio_dev; + int ret, irqs[2]; + int i; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + st->spi_dev = spi; + init_completion(&st->completion); + mutex_init(&st->lock); + + indio_dev->name = "max11410"; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &max11410_info; + + st->regmap = devm_regmap_init_spi(spi, ®map_config); + if (IS_ERR(st->regmap)) + return dev_err_probe(dev, PTR_ERR(st->regmap), + "regmap initialization failed\n"); + + ret = max11410_init_vref(dev, &st->avdd, "avdd"); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(vrefp_regs); i++) { + ret = max11410_init_vref(dev, &st->vrefp[i], vrefp_regs[i]); + if (ret) + return ret; + + ret = max11410_init_vref(dev, &st->vrefn[i], vrefn_regs[i]); + if (ret) + return ret; + } + + /* + * Regulators must be configured before parsing channels for + * validating "adi,reference" property of each channel. + */ + ret = max11410_parse_channels(st, indio_dev); + if (ret) + return ret; + + irqs[0] = fwnode_irq_get_byname(dev_fwnode(dev), "gpio0"); + irqs[1] = fwnode_irq_get_byname(dev_fwnode(dev), "gpio1"); + + if (irqs[0] > 0) { + st->irq = irqs[0]; + ret = regmap_write(st->regmap, MAX11410_REG_GPIO_CTRL(0), + MAX11410_GPIO_INTRB); + } else if (irqs[1] > 0) { + st->irq = irqs[1]; + ret = regmap_write(st->regmap, MAX11410_REG_GPIO_CTRL(1), + MAX11410_GPIO_INTRB); + } else if (spi->irq > 0) { + return dev_err_probe(dev, -ENODEV, + "no interrupt name specified"); + } + + if (ret) + return ret; + + ret = regmap_set_bits(st->regmap, MAX11410_REG_CTRL, + MAX11410_CTRL_FORMAT_BIT); + if (ret) + return ret; + + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, + &max11410_trigger_handler, + &max11410_buffer_ops); + if (ret) + return ret; + + if (st->irq > 0) { + st->trig = devm_iio_trigger_alloc(dev, "%s-dev%d", + indio_dev->name, + iio_device_id(indio_dev)); + if (!st->trig) + return -ENOMEM; + + st->trig->ops = &max11410_trigger_ops; + ret = devm_iio_trigger_register(dev, st->trig); + if (ret) + return ret; + + ret = devm_request_threaded_irq(dev, st->irq, NULL, + &max11410_interrupt, + IRQF_ONESHOT, "max11410", + indio_dev); + if (ret) + return ret; + } + + ret = max11410_self_calibrate(st); + if (ret) + return dev_err_probe(dev, ret, + "cannot perform device self calibration\n"); + + return devm_iio_device_register(dev, indio_dev); +} + +static const struct of_device_id max11410_spi_of_id[] = { + { .compatible = "adi,max11410" }, + { } +}; +MODULE_DEVICE_TABLE(of, max11410_spi_of_id); + +static const struct spi_device_id max11410_id[] = { + { "max11410" }, + { } +}; +MODULE_DEVICE_TABLE(spi, max11410_id); + +static struct spi_driver max11410_driver = { + .driver = { + .name = "max11410", + .of_match_table = max11410_spi_of_id, + }, + .probe = max11410_probe, + .id_table = max11410_id, +}; +module_spi_driver(max11410_driver); + +MODULE_AUTHOR("David Jung "); +MODULE_AUTHOR("Ibrahim Tilki "); +MODULE_DESCRIPTION("Analog Devices MAX11410 ADC"); +MODULE_LICENSE("GPL"); From 089ec09f4973898c60c6781c448384bf828e7f45 Mon Sep 17 00:00:00 2001 From: Ibrahim Tilki Date: Mon, 3 Oct 2022 13:59:02 +0300 Subject: [PATCH 2271/4122] dt-bindings: iio: adc: add adi,max11410.yaml Adding devicetree binding documentation for max11410 adc. Signed-off-by: Ibrahim Tilki Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221003105903.229-3-Ibrahim.Tilki@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,max11410.yaml | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml b/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml new file mode 100644 index 000000000000..53f9feff137b --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2022 Analog Devices Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/adi,max11410.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices MAX11410 ADC device driver + +maintainers: + - Ibrahim Tilki + +description: | + Bindings for the Analog Devices MAX11410 ADC device. Datasheet can be + found here: + https://datasheets.maximintegrated.com/en/ds/MAX11410.pdf + +properties: + compatible: + enum: + - adi,max11410 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 2 + + interrupt-names: + description: Name of the gpio pin of max11410 used for IRQ + minItems: 1 + items: + - enum: [gpio0, gpio1] + - const: gpio1 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + avdd-supply: + description: Optional avdd supply. Used as reference when no explicit reference supplied. + + vref0p-supply: + description: vref0p supply can be used as reference for conversion. + + vref1p-supply: + description: vref1p supply can be used as reference for conversion. + + vref2p-supply: + description: vref2p supply can be used as reference for conversion. + + vref0n-supply: + description: vref0n supply can be used as reference for conversion. + + vref1n-supply: + description: vref1n supply can be used as reference for conversion. + + vref2n-supply: + description: vref2n supply can be used as reference for conversion. + + spi-max-frequency: + maximum: 8000000 + +patternProperties: + "^channel(@[0-9])?$": + $ref: adc.yaml + type: object + description: Represents the external channels which are connected to the ADC. + + properties: + reg: + description: The channel number in single-ended mode. + minimum: 0 + maximum: 9 + + adi,reference: + description: | + Select the reference source to use when converting on + the specific channel. Valid values are: + 0: VREF0P/VREF0N + 1: VREF1P/VREF1N + 2: VREF2P/VREF2N + 3: AVDD/AGND + 4: VREF0P/AGND + 5: VREF1P/AGND + 6: VREF2P/AGND + If this field is left empty, AVDD/AGND is selected. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3, 4, 5, 6] + default: 3 + + adi,input-mode: + description: | + Select signal path of input channels. Valid values are: + 0: Buffered, low-power, unity-gain path (default) + 1: Bypass path + 2: PGA path + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2] + default: 0 + + diff-channels: true + + bipolar: true + + settling-time-us: true + + adi,buffered-vrefp: + description: Enable buffered mode for positive reference. + type: boolean + + adi,buffered-vrefn: + description: Enable buffered mode for negative reference. + type: boolean + + required: + - reg + + additionalProperties: false + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + reg = <0>; + compatible = "adi,max11410"; + spi-max-frequency = <8000000>; + + interrupt-parent = <&gpio>; + interrupts = <25 IRQ_TYPE_EDGE_FALLING>; + interrupt-names = "gpio1"; + + avdd-supply = <&adc_avdd>; + + vref1p-supply = <&adc_vref1p>; + vref1n-supply = <&adc_vref1n>; + + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + }; + + channel@1 { + reg = <1>; + diff-channels = <2 3>; + adi,reference = <1>; + bipolar; + settling-time-us = <100000>; + }; + + channel@2 { + reg = <2>; + diff-channels = <7 9>; + adi,reference = <5>; + adi,input-mode = <2>; + settling-time-us = <50000>; + }; + }; + }; From 8d2caf8b98f17f44f76a002ced7c7ce6425652bc Mon Sep 17 00:00:00 2001 From: Ibrahim Tilki Date: Mon, 3 Oct 2022 13:59:03 +0300 Subject: [PATCH 2272/4122] Documentation: ABI: testing: add max11410 doc Adding documentation for Analog Devices max11410 adc userspace sysfs. Signed-off-by: Ibrahim Tilki Link: https://lore.kernel.org/r/20221003105903.229-4-Ibrahim.Tilki@analog.com Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-adc-max11410 | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 new file mode 100644 index 000000000000..2a53c6b37360 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 @@ -0,0 +1,13 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_en +Date: September 2022 +KernelVersion: 6.0 +Contact: linux-iio@vger.kernel.org +Description: + Enable or disable a notch filter. + +What: /sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_center +Date: September 2022 +KernelVersion: 6.0 +Contact: linux-iio@vger.kernel.org +Description: + Center frequency of the notch filter in Hz. From c1404d1b659fe3d7d13bdbd59e5161ab508dc101 Mon Sep 17 00:00:00 2001 From: ChiaEn Wu Date: Tue, 11 Oct 2022 12:05:45 +0800 Subject: [PATCH 2273/4122] iio: adc: mt6370: Add MediaTek MT6370 support MediaTek MT6370 is a SubPMIC consisting of a single cell battery charger with ADC monitoring, RGB LEDs, dual channel flashlight, WLED backlight driver, display bias voltage supply, one general purpose LDO, and the USB Type-C & PD controller complies with the latest USB Type-C and PD standards. Add support for the MT6370 ADC driver for system monitoring, including charger current, voltage, and temperature. Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Andy Shevchenko Signed-off-by: ChiaEn Wu Link: https://lore.kernel.org/r/81ec58ae89030e48508d6810396de2679c40d26c.1665488982.git.chiaen_wu@richtek.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 12 ++ drivers/iio/adc/Makefile | 1 + drivers/iio/adc/mt6370-adc.c | 305 +++++++++++++++++++++++++++++++++++ 3 files changed, 318 insertions(+) create mode 100644 drivers/iio/adc/mt6370-adc.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 544986fd456d..5386d862def9 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -765,6 +765,18 @@ config MEDIATEK_MT6360_ADC is used in smartphones and tablets and supports a 11 channel general purpose ADC. +config MEDIATEK_MT6370_ADC + tristate "MediaTek MT6370 ADC driver" + depends on MFD_MT6370 + help + Say yes here to enable MediaTek MT6370 ADC support. + + This ADC driver provides 9 channels for system monitoring (charger + current, voltage, and temperature). + + This driver can also be built as a module. If so, the module + will be called "mt6370-adc". + config MEDIATEK_MT6577_AUXADC tristate "MediaTek AUXADC driver" depends on ARCH_MEDIATEK || COMPILE_TEST diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 4d803413425c..1571e891828e 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_MCP320X) += mcp320x.o obj-$(CONFIG_MCP3422) += mcp3422.o obj-$(CONFIG_MCP3911) += mcp3911.o obj-$(CONFIG_MEDIATEK_MT6360_ADC) += mt6360-adc.o +obj-$(CONFIG_MEDIATEK_MT6370_ADC) += mt6370-adc.o obj-$(CONFIG_MEDIATEK_MT6577_AUXADC) += mt6577_auxadc.o obj-$(CONFIG_MEN_Z188_ADC) += men_z188_adc.o obj-$(CONFIG_MESON_SARADC) += meson_saradc.o diff --git a/drivers/iio/adc/mt6370-adc.c b/drivers/iio/adc/mt6370-adc.c new file mode 100644 index 000000000000..bc62e5a9d50d --- /dev/null +++ b/drivers/iio/adc/mt6370-adc.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Richtek Technology Corp. + * + * Author: ChiaEn Wu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MT6370_REG_CHG_CTRL3 0x113 +#define MT6370_REG_CHG_CTRL7 0x117 +#define MT6370_REG_CHG_ADC 0x121 +#define MT6370_REG_ADC_DATA_H 0x14C + +#define MT6370_ADC_START_MASK BIT(0) +#define MT6370_ADC_IN_SEL_MASK GENMASK(7, 4) +#define MT6370_AICR_ICHG_MASK GENMASK(7, 2) + +#define MT6370_AICR_100_mA 0x0 +#define MT6370_AICR_150_mA 0x1 +#define MT6370_AICR_200_mA 0x2 +#define MT6370_AICR_250_mA 0x3 +#define MT6370_AICR_300_mA 0x4 +#define MT6370_AICR_350_mA 0x5 + +#define MT6370_ICHG_100_mA 0x0 +#define MT6370_ICHG_200_mA 0x1 +#define MT6370_ICHG_300_mA 0x2 +#define MT6370_ICHG_400_mA 0x3 +#define MT6370_ICHG_500_mA 0x4 +#define MT6370_ICHG_600_mA 0x5 +#define MT6370_ICHG_700_mA 0x6 +#define MT6370_ICHG_800_mA 0x7 + +#define ADC_CONV_TIME_MS 35 +#define ADC_CONV_POLLING_TIME_US 1000 + +struct mt6370_adc_data { + struct device *dev; + struct regmap *regmap; + /* + * This mutex lock is for preventing the different ADC channels + * from being read at the same time. + */ + struct mutex adc_lock; +}; + +static int mt6370_adc_read_channel(struct mt6370_adc_data *priv, int chan, + unsigned long addr, int *val) +{ + unsigned int reg_val; + __be16 be_val; + int ret; + + mutex_lock(&priv->adc_lock); + + reg_val = MT6370_ADC_START_MASK | + FIELD_PREP(MT6370_ADC_IN_SEL_MASK, addr); + ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, reg_val); + if (ret) + goto adc_unlock; + + msleep(ADC_CONV_TIME_MS); + + ret = regmap_read_poll_timeout(priv->regmap, + MT6370_REG_CHG_ADC, reg_val, + !(reg_val & MT6370_ADC_START_MASK), + ADC_CONV_POLLING_TIME_US, + ADC_CONV_TIME_MS * MILLI * 3); + if (ret) { + dev_err(priv->dev, "Failed to read ADC register (%d)\n", ret); + goto adc_unlock; + } + + ret = regmap_raw_read(priv->regmap, MT6370_REG_ADC_DATA_H, + &be_val, sizeof(be_val)); + if (ret) + goto adc_unlock; + + *val = be16_to_cpu(be_val); + ret = IIO_VAL_INT; + +adc_unlock: + mutex_unlock(&priv->adc_lock); + + return ret; +} + +static int mt6370_adc_read_scale(struct mt6370_adc_data *priv, + int chan, int *val1, int *val2) +{ + unsigned int reg_val; + int ret; + + switch (chan) { + case MT6370_CHAN_VBAT: + case MT6370_CHAN_VSYS: + case MT6370_CHAN_CHG_VDDP: + *val1 = 5; + return IIO_VAL_INT; + case MT6370_CHAN_IBUS: + ret = regmap_read(priv->regmap, MT6370_REG_CHG_CTRL3, ®_val); + if (ret) + return ret; + + reg_val = FIELD_GET(MT6370_AICR_ICHG_MASK, reg_val); + switch (reg_val) { + case MT6370_AICR_100_mA: + case MT6370_AICR_150_mA: + case MT6370_AICR_200_mA: + case MT6370_AICR_250_mA: + case MT6370_AICR_300_mA: + case MT6370_AICR_350_mA: + *val1 = 3350; + break; + default: + *val1 = 5000; + break; + } + + *val2 = 100; + + return IIO_VAL_FRACTIONAL; + case MT6370_CHAN_IBAT: + ret = regmap_read(priv->regmap, MT6370_REG_CHG_CTRL7, ®_val); + if (ret) + return ret; + + reg_val = FIELD_GET(MT6370_AICR_ICHG_MASK, reg_val); + switch (reg_val) { + case MT6370_ICHG_100_mA: + case MT6370_ICHG_200_mA: + case MT6370_ICHG_300_mA: + case MT6370_ICHG_400_mA: + *val1 = 2375; + break; + case MT6370_ICHG_500_mA: + case MT6370_ICHG_600_mA: + case MT6370_ICHG_700_mA: + case MT6370_ICHG_800_mA: + *val1 = 2680; + break; + default: + *val1 = 5000; + break; + } + + *val2 = 100; + + return IIO_VAL_FRACTIONAL; + case MT6370_CHAN_VBUSDIV5: + *val1 = 25; + return IIO_VAL_INT; + case MT6370_CHAN_VBUSDIV2: + *val1 = 10; + return IIO_VAL_INT; + case MT6370_CHAN_TS_BAT: + *val1 = 25; + *val2 = 10000; + return IIO_VAL_FRACTIONAL; + case MT6370_CHAN_TEMP_JC: + *val1 = 2000; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int mt6370_adc_read_offset(struct mt6370_adc_data *priv, + int chan, int *val) +{ + *val = -20; + + return IIO_VAL_INT; +} + +static int mt6370_adc_read_raw(struct iio_dev *iio_dev, + const struct iio_chan_spec *chan, + int *val, int *val2, long mask) +{ + struct mt6370_adc_data *priv = iio_priv(iio_dev); + + switch (mask) { + case IIO_CHAN_INFO_RAW: + return mt6370_adc_read_channel(priv, chan->channel, + chan->address, val); + case IIO_CHAN_INFO_SCALE: + return mt6370_adc_read_scale(priv, chan->channel, val, val2); + case IIO_CHAN_INFO_OFFSET: + return mt6370_adc_read_offset(priv, chan->channel, val); + default: + return -EINVAL; + } +} + +static const char * const mt6370_channel_labels[MT6370_CHAN_MAX] = { + [MT6370_CHAN_VBUSDIV5] = "vbusdiv5", + [MT6370_CHAN_VBUSDIV2] = "vbusdiv2", + [MT6370_CHAN_VSYS] = "vsys", + [MT6370_CHAN_VBAT] = "vbat", + [MT6370_CHAN_TS_BAT] = "ts_bat", + [MT6370_CHAN_IBUS] = "ibus", + [MT6370_CHAN_IBAT] = "ibat", + [MT6370_CHAN_CHG_VDDP] = "chg_vddp", + [MT6370_CHAN_TEMP_JC] = "temp_jc", +}; + +static int mt6370_adc_read_label(struct iio_dev *iio_dev, + struct iio_chan_spec const *chan, char *label) +{ + return sysfs_emit(label, "%s\n", mt6370_channel_labels[chan->channel]); +} + +static const struct iio_info mt6370_adc_iio_info = { + .read_raw = mt6370_adc_read_raw, + .read_label = mt6370_adc_read_label, +}; + +#define MT6370_ADC_CHAN(_idx, _type, _addr, _extra_info) { \ + .type = _type, \ + .channel = MT6370_CHAN_##_idx, \ + .address = _addr, \ + .scan_index = MT6370_CHAN_##_idx, \ + .indexed = 1, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE) | \ + _extra_info, \ +} + +static const struct iio_chan_spec mt6370_adc_channels[] = { + MT6370_ADC_CHAN(VBUSDIV5, IIO_VOLTAGE, 1, 0), + MT6370_ADC_CHAN(VBUSDIV2, IIO_VOLTAGE, 2, 0), + MT6370_ADC_CHAN(VSYS, IIO_VOLTAGE, 3, 0), + MT6370_ADC_CHAN(VBAT, IIO_VOLTAGE, 4, 0), + MT6370_ADC_CHAN(TS_BAT, IIO_VOLTAGE, 6, 0), + MT6370_ADC_CHAN(IBUS, IIO_CURRENT, 8, 0), + MT6370_ADC_CHAN(IBAT, IIO_CURRENT, 9, 0), + MT6370_ADC_CHAN(CHG_VDDP, IIO_VOLTAGE, 11, 0), + MT6370_ADC_CHAN(TEMP_JC, IIO_TEMP, 12, BIT(IIO_CHAN_INFO_OFFSET)), +}; + +static int mt6370_adc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mt6370_adc_data *priv; + struct iio_dev *indio_dev; + struct regmap *regmap; + int ret; + + regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!regmap) + return dev_err_probe(dev, -ENODEV, "Failed to get regmap\n"); + + indio_dev = devm_iio_device_alloc(dev, sizeof(*priv)); + if (!indio_dev) + return -ENOMEM; + + priv = iio_priv(indio_dev); + priv->dev = dev; + priv->regmap = regmap; + mutex_init(&priv->adc_lock); + + ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, 0); + if (ret) + return dev_err_probe(dev, ret, "Failed to reset ADC\n"); + + indio_dev->name = "mt6370-adc"; + indio_dev->info = &mt6370_adc_iio_info; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = mt6370_adc_channels; + indio_dev->num_channels = ARRAY_SIZE(mt6370_adc_channels); + + return devm_iio_device_register(dev, indio_dev); +} + +static const struct of_device_id mt6370_adc_of_id[] = { + { .compatible = "mediatek,mt6370-adc", }, + {} +}; +MODULE_DEVICE_TABLE(of, mt6370_adc_of_id); + +static struct platform_driver mt6370_adc_driver = { + .driver = { + .name = "mt6370-adc", + .of_match_table = mt6370_adc_of_id, + }, + .probe = mt6370_adc_probe, +}; +module_platform_driver(mt6370_adc_driver); + +MODULE_AUTHOR("ChiaEn Wu "); +MODULE_DESCRIPTION("MT6370 ADC Driver"); +MODULE_LICENSE("GPL v2"); From cd83c5c10036a2a156d725725daf3409832c8a24 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 10 Oct 2022 13:07:26 +0200 Subject: [PATCH 2274/4122] iio: imu: st_lsm6dsx: introduce st_lsm6dsx_device_set_enable routine Introduce st_lsm6dsx_device_set_enable utility routine and remove duplicated code used to enable/disable sensors Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/e3fbe5d4a3bed41130908669f745f78c8505cf47.1665399959.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 11 +++++++++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 14 +++----------- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 14 ++------------ 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 6b57d47be69e..d3a4e21bc114 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -509,6 +509,17 @@ st_lsm6dsx_get_mount_matrix(const struct iio_dev *iio_dev, return &hw->orientation; } +static inline int +st_lsm6dsx_device_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable) +{ + if (sensor->id == ST_LSM6DSX_ID_EXT0 || + sensor->id == ST_LSM6DSX_ID_EXT1 || + sensor->id == ST_LSM6DSX_ID_EXT2) + return st_lsm6dsx_shub_set_enable(sensor, enable); + + return st_lsm6dsx_sensor_set_enable(sensor, enable); +} + static const struct iio_chan_spec_ext_info __maybe_unused st_lsm6dsx_accel_ext_info[] = { IIO_MOUNT_MATRIX(IIO_SHARED_BY_ALL, st_lsm6dsx_get_mount_matrix), diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index e49f2d120ed3..48fe6a45671b 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -673,17 +673,9 @@ int st_lsm6dsx_update_fifo(struct st_lsm6dsx_sensor *sensor, bool enable) goto out; } - if (sensor->id == ST_LSM6DSX_ID_EXT0 || - sensor->id == ST_LSM6DSX_ID_EXT1 || - sensor->id == ST_LSM6DSX_ID_EXT2) { - err = st_lsm6dsx_shub_set_enable(sensor, enable); - if (err < 0) - goto out; - } else { - err = st_lsm6dsx_sensor_set_enable(sensor, enable); - if (err < 0) - goto out; - } + err = st_lsm6dsx_device_set_enable(sensor, enable); + if (err < 0) + goto out; err = st_lsm6dsx_set_fifo_odr(sensor, enable); if (err < 0) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index f8bbb005718e..6af2e905c161 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2317,12 +2317,7 @@ static int st_lsm6dsx_suspend(struct device *dev) continue; } - if (sensor->id == ST_LSM6DSX_ID_EXT0 || - sensor->id == ST_LSM6DSX_ID_EXT1 || - sensor->id == ST_LSM6DSX_ID_EXT2) - err = st_lsm6dsx_shub_set_enable(sensor, false); - else - err = st_lsm6dsx_sensor_set_enable(sensor, false); + err = st_lsm6dsx_device_set_enable(sensor, false); if (err < 0) return err; @@ -2353,12 +2348,7 @@ static int st_lsm6dsx_resume(struct device *dev) if (!(hw->suspend_mask & BIT(sensor->id))) continue; - if (sensor->id == ST_LSM6DSX_ID_EXT0 || - sensor->id == ST_LSM6DSX_ID_EXT1 || - sensor->id == ST_LSM6DSX_ID_EXT2) - err = st_lsm6dsx_shub_set_enable(sensor, true); - else - err = st_lsm6dsx_sensor_set_enable(sensor, true); + err = st_lsm6dsx_device_set_enable(sensor, true); if (err < 0) return err; From 6cafcdb19ee4a0a073e4a8b0828698f8816c800c Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 12 Oct 2022 16:21:59 +0200 Subject: [PATCH 2275/4122] dt-bindings: iio: adc: stm32-adc: add stm32mp13 compatibles Add st,stm32mp13-adc-core and st,stm32mp13-adc compatibles to support STM32MPU13 SoC. On STM32MP13x, each ADC peripheral has a single ADC block. These ADC peripherals, ADC1 and ADC2, are fully independent. Main characteristics of STM32MP13x ADC: - One interrupt line per ADC - 6 to 12 bits resolution - 19 channels ADC2 instance supports two extra internal channels VDDCPU and VDDQ_DDR. Add "vddcpu" and "vddq_ddr" internal channels names to the reserved labels list. Signed-off-by: Olivier Moysan Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221012142205.13041-3-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/st,stm32-adc.yaml | 68 ++++++++++++++++++- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml index fa8da42cb1e6..05265f381fde 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -27,6 +27,7 @@ properties: - st,stm32f4-adc-core - st,stm32h7-adc-core - st,stm32mp1-adc-core + - st,stm32mp13-adc-core reg: maxItems: 1 @@ -37,6 +38,7 @@ properties: - stm32f4 and stm32h7 share a common ADC interrupt line. - stm32mp1 has two separate interrupt lines, one for each ADC within ADC block. + - stm32mp13 has an interrupt line per ADC block. minItems: 1 maxItems: 2 @@ -180,6 +182,33 @@ allOf: maximum: 36000000 default: 36000000 + - if: + properties: + compatible: + contains: + const: st,stm32mp13-adc-core + + then: + properties: + clocks: + minItems: 1 + maxItems: 2 + + clock-names: + items: + - const: bus + - const: adc + minItems: 1 + + interrupts: + items: + - description: ADC interrupt line + + st,max-clk-rate-hz: + minimum: 150000 + maximum: 75000000 + default: 75000000 + additionalProperties: false required: @@ -208,6 +237,7 @@ patternProperties: - st,stm32f4-adc - st,stm32h7-adc - st,stm32mp1-adc + - st,stm32mp13-adc reg: description: | @@ -229,7 +259,7 @@ patternProperties: interrupts: description: | IRQ Line for the ADC instance. Valid values are: - - 0 for adc@0 + - 0 for adc@0 (single adc for stm32mp13) - 1 for adc@100 - 2 for adc@200 (stm32f4 only) maxItems: 1 @@ -250,13 +280,14 @@ patternProperties: assigned-resolution-bits: description: | Resolution (bits) to use for conversions: - - can be 6, 8, 10 or 12 on stm32f4 + - can be 6, 8, 10 or 12 on stm32f4 and stm32mp13 - can be 8, 10, 12, 14 or 16 on stm32h7 and stm32mp1 st,adc-channels: description: | List of single-ended channels muxed for this ADC. It can have up to: - 16 channels, numbered from 0 to 15 (for in0..in15) on stm32f4 + - 19 channels, numbered from 0 to 18 (for in0..in18) on stm32mp13. - 20 channels, numbered from 0 to 19 (for in0..in19) on stm32h7 and stm32mp1. $ref: /schemas/types.yaml#/definitions/uint32-array @@ -322,7 +353,7 @@ patternProperties: label: description: | Unique name to identify which channel this is. - Reserved label names "vddcore", "vrefint" and "vbat" + Reserved label names "vddcore", "vddcpu", "vddq_ddr", "vrefint" and "vbat" are used to identify internal channels with matching names. diff-channels: @@ -419,6 +450,37 @@ patternProperties: items: minimum: 40 + + - if: + properties: + compatible: + contains: + const: st,stm32mp13-adc + + then: + properties: + reg: + const: 0x0 + + interrupts: + const: 0 + + assigned-resolution-bits: + enum: [6, 8, 10, 12] + default: 12 + + st,adc-channels: + minItems: 1 + maxItems: 19 + items: + minimum: 0 + maximum: 18 + + st,min-sample-time-nsecs: + minItems: 1 + maxItems: 19 + items: + minimum: 40 additionalProperties: false required: From cf0fb80ae1675179620d964d629b43ca57219ced Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 12 Oct 2022 16:22:00 +0200 Subject: [PATCH 2276/4122] iio: adc: stm32-adc: add stm32mp13 support Add STM32 ADC support for STM32MP13x SOCs family. On STM32MP13x, each ADC peripheral has a single ADC block. These ADC peripherals, ADC1 and ADC2, are fully independent. This introduces changes in common registers handling. Some features such as boost mode, channel preselection and linear calibration are not supported by the STM32MP13x ADC. Add diversity management for these features. The STM32MP13x ADC introduces registers and bitfield variants on existing features such as calibration factors and internal channels. Add register diversity management. Add also support for new internal channels VDDCPU and VDDQ_DDR. Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221012142205.13041-4-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 30 ++++- drivers/iio/adc/stm32-adc-core.h | 30 +++++ drivers/iio/adc/stm32-adc.c | 210 +++++++++++++++++++++++++++---- 3 files changed, 239 insertions(+), 31 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 81d5db91c67b..48f02dcc81c1 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "stm32-adc-core.h" @@ -306,8 +307,8 @@ out: static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { .csr = STM32F4_ADC_CSR, .ccr = STM32F4_ADC_CCR, - .eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3}, - .ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3}, + .eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3 }, + .ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3 }, .ier = STM32F4_ADC_CR1, .eocie_msk = STM32F4_EOCIE, }; @@ -316,8 +317,18 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { static const struct stm32_adc_common_regs stm32h7_adc_common_regs = { .csr = STM32H7_ADC_CSR, .ccr = STM32H7_ADC_CCR, - .eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV}, - .ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV}, + .eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV }, + .ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV }, + .ier = STM32H7_ADC_IER, + .eocie_msk = STM32H7_EOCIE, +}; + +/* STM32MP13 common registers definitions */ +static const struct stm32_adc_common_regs stm32mp13_adc_common_regs = { + .csr = STM32H7_ADC_CSR, + .ccr = STM32H7_ADC_CCR, + .eoc_msk = { STM32H7_EOC_MST }, + .ovr_msk = { STM32H7_OVR_MST }, .ier = STM32H7_ADC_IER, .eocie_msk = STM32H7_EOCIE, }; @@ -868,6 +879,14 @@ static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .num_irqs = 2, }; +static const struct stm32_adc_priv_cfg stm32mp13_adc_priv_cfg = { + .regs = &stm32mp13_adc_common_regs, + .clk_sel = stm32h7_adc_clk_sel, + .max_clk_rate_hz = 75 * HZ_PER_MHZ, + .ipid = STM32MP13_IPIDR_NUMBER, + .num_irqs = 1, +}; + static const struct of_device_id stm32_adc_of_match[] = { { .compatible = "st,stm32f4-adc-core", @@ -878,6 +897,9 @@ static const struct of_device_id stm32_adc_of_match[] = { }, { .compatible = "st,stm32mp1-adc-core", .data = (void *)&stm32mp1_adc_priv_cfg + }, { + .compatible = "st,stm32mp13-adc-core", + .data = (void *)&stm32mp13_adc_priv_cfg }, { }, }; diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 2118ef63843d..9d6dfa1c03fa 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -112,6 +112,11 @@ #define STM32MP1_ADC_IPDR 0x3F8 #define STM32MP1_ADC_SIDR 0x3FC +/* STM32MP13 - Registers for each ADC instance */ +#define STM32MP13_ADC_DIFSEL 0xB0 +#define STM32MP13_ADC_CALFACT 0xB4 +#define STM32MP13_ADC2_OR 0xC8 + /* STM32H7 - common registers for all ADC instances */ #define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) #define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) @@ -161,6 +166,9 @@ enum stm32h7_adc_dmngt { STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ }; +/* STM32H7_ADC_DIFSEL - bit fields */ +#define STM32H7_DIFSEL_MASK GENMASK(19, 0) + /* STM32H7_ADC_CALFACT - bit fields */ #define STM32H7_CALFACT_D_SHIFT 16 #define STM32H7_CALFACT_D_MASK GENMASK(26, 16) @@ -210,7 +218,29 @@ enum stm32h7_adc_dmngt { /* STM32MP1_ADC_SIDR - bit fields */ #define STM32MP1_SIDR_MASK GENMASK(31, 0) +/* STM32MP13_ADC_CFGR specific bit fields */ +#define STM32MP13_DMAEN BIT(0) +#define STM32MP13_DMACFG BIT(1) +#define STM32MP13_DFSDMCFG BIT(2) +#define STM32MP13_RES_SHIFT 3 +#define STM32MP13_RES_MASK GENMASK(4, 3) + +/* STM32MP13_ADC_DIFSEL - bit fields */ +#define STM32MP13_DIFSEL_MASK GENMASK(18, 0) + +/* STM32MP13_ADC_CALFACT - bit fields */ +#define STM32MP13_CALFACT_D_SHIFT 16 +#define STM32MP13_CALFACT_D_MASK GENMASK(22, 16) +#define STM32MP13_CALFACT_S_SHIFT 0 +#define STM32MP13_CALFACT_S_MASK GENMASK(6, 0) + +/* STM32MP13_ADC2_OR - bit fields */ +#define STM32MP13_OP2 BIT(2) +#define STM32MP13_OP1 BIT(1) +#define STM32MP13_OP0 BIT(0) + #define STM32MP15_IPIDR_NUMBER 0x00110005 +#define STM32MP13_IPIDR_NUMBER 0x00110006 /** * struct stm32_adc_common - stm32 ADC driver common data (for all instances) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 3cda529f081d..d36c024526f2 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -82,6 +82,8 @@ enum stm32_adc_extsel { enum stm32_adc_int_ch { STM32_ADC_INT_CH_NONE = -1, STM32_ADC_INT_CH_VDDCORE, + STM32_ADC_INT_CH_VDDCPU, + STM32_ADC_INT_CH_VDDQ_DDR, STM32_ADC_INT_CH_VREFINT, STM32_ADC_INT_CH_VBAT, STM32_ADC_INT_CH_NB, @@ -99,6 +101,8 @@ struct stm32_adc_ic { static const struct stm32_adc_ic stm32_adc_ic[STM32_ADC_INT_CH_NB] = { { "vddcore", STM32_ADC_INT_CH_VDDCORE }, + { "vddcpu", STM32_ADC_INT_CH_VDDCPU }, + { "vddq_ddr", STM32_ADC_INT_CH_VDDQ_DDR }, { "vrefint", STM32_ADC_INT_CH_VREFINT }, { "vbat", STM32_ADC_INT_CH_VBAT }, }; @@ -160,9 +164,14 @@ struct stm32_adc_vrefint { * @exten: trigger control register & bitfield * @extsel: trigger selection register & bitfield * @res: resolution selection register & bitfield + * @difsel: differential mode selection register & bitfield + * @calfact_s: single-ended calibration factors register & bitfield + * @calfact_d: differential calibration factors register & bitfield * @smpr: smpr1 & smpr2 registers offset array * @smp_bits: smpr1 & smpr2 index and bitfields - * @or_vdd: option register & vddcore bitfield + * @or_vddcore: option register & vddcore bitfield + * @or_vddcpu: option register & vddcpu bitfield + * @or_vddq_ddr: option register & vddq_ddr bitfield * @ccr_vbat: common register & vbat bitfield * @ccr_vref: common register & vrefint bitfield */ @@ -176,9 +185,14 @@ struct stm32_adc_regspec { const struct stm32_adc_regs exten; const struct stm32_adc_regs extsel; const struct stm32_adc_regs res; + const struct stm32_adc_regs difsel; + const struct stm32_adc_regs calfact_s; + const struct stm32_adc_regs calfact_d; const u32 smpr[2]; const struct stm32_adc_regs *smp_bits; - const struct stm32_adc_regs or_vdd; + const struct stm32_adc_regs or_vddcore; + const struct stm32_adc_regs or_vddcpu; + const struct stm32_adc_regs or_vddq_ddr; const struct stm32_adc_regs ccr_vbat; const struct stm32_adc_regs ccr_vref; }; @@ -192,6 +206,9 @@ struct stm32_adc; * @trigs: external trigger sources * @clk_required: clock is required * @has_vregready: vregready status flag presence + * @has_boostmode: boost mode support flag + * @has_linearcal: linear calibration support flag + * @has_presel: channel preselection support flag * @prepare: optional prepare routine (power-up, enable) * @start_conv: routine to start conversions * @stop_conv: routine to stop conversions @@ -206,6 +223,9 @@ struct stm32_adc_cfg { struct stm32_adc_trig_info *trigs; bool clk_required; bool has_vregready; + bool has_boostmode; + bool has_linearcal; + bool has_presel; int (*prepare)(struct iio_dev *); void (*start_conv)(struct iio_dev *, bool dma); void (*stop_conv)(struct iio_dev *); @@ -312,6 +332,13 @@ static const struct stm32_adc_info stm32h7_adc_info = { .num_res = ARRAY_SIZE(stm32h7_adc_resolutions), }; +/* stm32mp13 can have up to 19 channels */ +static const struct stm32_adc_info stm32mp13_adc_info = { + .max_channels = 19, + .resolutions = stm32f4_adc_resolutions, + .num_res = ARRAY_SIZE(stm32f4_adc_resolutions), +}; + /* * stm32f4_sq - describe regular sequence registers * - L: sequence len (register & bit field) @@ -497,10 +524,45 @@ static const struct stm32_adc_regspec stm32h7_adc_regspec = { .extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK, STM32H7_EXTSEL_SHIFT }, .res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT }, + .difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK}, + .calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK, + STM32H7_CALFACT_S_SHIFT }, + .calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK, + STM32H7_CALFACT_D_SHIFT }, .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, .smp_bits = stm32h7_smp_bits, }; +/* STM32MP13 programmable sampling time (ADC clock cycles, rounded down) */ +static const unsigned int stm32mp13_adc_smp_cycles[STM32_ADC_MAX_SMP + 1] = { + 2, 6, 12, 24, 47, 92, 247, 640, +}; + +static const struct stm32_adc_regspec stm32mp13_adc_regspec = { + .dr = STM32H7_ADC_DR, + .ier_eoc = { STM32H7_ADC_IER, STM32H7_EOCIE }, + .ier_ovr = { STM32H7_ADC_IER, STM32H7_OVRIE }, + .isr_eoc = { STM32H7_ADC_ISR, STM32H7_EOC }, + .isr_ovr = { STM32H7_ADC_ISR, STM32H7_OVR }, + .sqr = stm32h7_sq, + .exten = { STM32H7_ADC_CFGR, STM32H7_EXTEN_MASK, STM32H7_EXTEN_SHIFT }, + .extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK, + STM32H7_EXTSEL_SHIFT }, + .res = { STM32H7_ADC_CFGR, STM32MP13_RES_MASK, STM32MP13_RES_SHIFT }, + .difsel = { STM32MP13_ADC_DIFSEL, STM32MP13_DIFSEL_MASK}, + .calfact_s = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_S_MASK, + STM32MP13_CALFACT_S_SHIFT }, + .calfact_d = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_D_MASK, + STM32MP13_CALFACT_D_SHIFT }, + .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, + .smp_bits = stm32h7_smp_bits, + .or_vddcore = { STM32MP13_ADC2_OR, STM32MP13_OP0 }, + .or_vddcpu = { STM32MP13_ADC2_OR, STM32MP13_OP1 }, + .or_vddq_ddr = { STM32MP13_ADC2_OR, STM32MP13_OP2 }, + .ccr_vbat = { STM32H7_ADC_CCR, STM32H7_VBATEN }, + .ccr_vref = { STM32H7_ADC_CCR, STM32H7_VREFEN }, +}; + static const struct stm32_adc_regspec stm32mp1_adc_regspec = { .dr = STM32H7_ADC_DR, .ier_eoc = { STM32H7_ADC_IER, STM32H7_EOCIE }, @@ -512,9 +574,14 @@ static const struct stm32_adc_regspec stm32mp1_adc_regspec = { .extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK, STM32H7_EXTSEL_SHIFT }, .res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT }, + .difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK}, + .calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK, + STM32H7_CALFACT_S_SHIFT }, + .calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK, + STM32H7_CALFACT_D_SHIFT }, .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, .smp_bits = stm32h7_smp_bits, - .or_vdd = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN }, + .or_vddcore = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN }, .ccr_vbat = { STM32H7_ADC_CCR, STM32H7_VBATEN }, .ccr_vref = { STM32H7_ADC_CCR, STM32H7_VREFEN }, }; @@ -675,8 +742,18 @@ static void stm32_adc_int_ch_enable(struct iio_dev *indio_dev) switch (i) { case STM32_ADC_INT_CH_VDDCORE: dev_dbg(&indio_dev->dev, "Enable VDDCore\n"); - stm32_adc_set_bits(adc, adc->cfg->regs->or_vdd.reg, - adc->cfg->regs->or_vdd.mask); + stm32_adc_set_bits(adc, adc->cfg->regs->or_vddcore.reg, + adc->cfg->regs->or_vddcore.mask); + break; + case STM32_ADC_INT_CH_VDDCPU: + dev_dbg(&indio_dev->dev, "Enable VDDCPU\n"); + stm32_adc_set_bits(adc, adc->cfg->regs->or_vddcpu.reg, + adc->cfg->regs->or_vddcpu.mask); + break; + case STM32_ADC_INT_CH_VDDQ_DDR: + dev_dbg(&indio_dev->dev, "Enable VDDQ_DDR\n"); + stm32_adc_set_bits(adc, adc->cfg->regs->or_vddq_ddr.reg, + adc->cfg->regs->or_vddq_ddr.mask); break; case STM32_ADC_INT_CH_VREFINT: dev_dbg(&indio_dev->dev, "Enable VREFInt\n"); @@ -702,8 +779,16 @@ static void stm32_adc_int_ch_disable(struct stm32_adc *adc) switch (i) { case STM32_ADC_INT_CH_VDDCORE: - stm32_adc_clr_bits(adc, adc->cfg->regs->or_vdd.reg, - adc->cfg->regs->or_vdd.mask); + stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddcore.reg, + adc->cfg->regs->or_vddcore.mask); + break; + case STM32_ADC_INT_CH_VDDCPU: + stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddcpu.reg, + adc->cfg->regs->or_vddcpu.mask); + break; + case STM32_ADC_INT_CH_VDDQ_DDR: + stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddq_ddr.reg, + adc->cfg->regs->or_vddq_ddr.mask); break; case STM32_ADC_INT_CH_VREFINT: stm32_adc_clr_bits_common(adc, adc->cfg->regs->ccr_vref.reg, @@ -801,6 +886,7 @@ static void stm32h7_adc_stop_conv(struct iio_dev *indio_dev) if (ret) dev_warn(&indio_dev->dev, "stop failed\n"); + /* STM32H7_DMNGT_MASK covers STM32MP13_DMAEN & STM32MP13_DMACFG */ stm32_adc_clr_bits(adc, STM32H7_ADC_CFGR, STM32H7_DMNGT_MASK); } @@ -811,6 +897,17 @@ static void stm32h7_adc_irq_clear(struct iio_dev *indio_dev, u32 msk) stm32_adc_set_bits(adc, adc->cfg->regs->isr_eoc.reg, msk); } +static void stm32mp13_adc_start_conv(struct iio_dev *indio_dev, bool dma) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + + if (dma) + stm32_adc_set_bits(adc, STM32H7_ADC_CFGR, + STM32MP13_DMAEN | STM32MP13_DMACFG); + + stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADSTART); +} + static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); @@ -821,7 +918,8 @@ static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev) stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_DEEPPWD); stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADVREGEN); - if (adc->common->rate > STM32H7_BOOST_CLKRATE) + if (adc->cfg->has_boostmode && + adc->common->rate > STM32H7_BOOST_CLKRATE) stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST); /* Wait for startup time */ @@ -843,7 +941,8 @@ static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev) static void stm32h7_adc_enter_pwr_down(struct stm32_adc *adc) { - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST); + if (adc->cfg->has_boostmode) + stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST); /* Setting DEEPPWD disables ADC vreg and clears ADVREGEN */ stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_DEEPPWD); @@ -901,6 +1000,9 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev) int i, ret; u32 lincalrdyw_mask, val; + if (!adc->cfg->has_linearcal) + goto skip_linearcal; + /* Read linearity calibration */ lincalrdyw_mask = STM32H7_LINCALRDYW6; for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) { @@ -923,12 +1025,13 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev) lincalrdyw_mask >>= 1; } +skip_linearcal: /* Read offset calibration */ - val = stm32_adc_readl(adc, STM32H7_ADC_CALFACT); - adc->cal.calfact_s = (val & STM32H7_CALFACT_S_MASK); - adc->cal.calfact_s >>= STM32H7_CALFACT_S_SHIFT; - adc->cal.calfact_d = (val & STM32H7_CALFACT_D_MASK); - adc->cal.calfact_d >>= STM32H7_CALFACT_D_SHIFT; + val = stm32_adc_readl(adc, adc->cfg->regs->calfact_s.reg); + adc->cal.calfact_s = (val & adc->cfg->regs->calfact_s.mask); + adc->cal.calfact_s >>= adc->cfg->regs->calfact_s.shift; + adc->cal.calfact_d = (val & adc->cfg->regs->calfact_d.mask); + adc->cal.calfact_d >>= adc->cfg->regs->calfact_d.shift; adc->cal.calibrated = true; return 0; @@ -945,9 +1048,12 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev) int i, ret; u32 lincalrdyw_mask, val; - val = (adc->cal.calfact_s << STM32H7_CALFACT_S_SHIFT) | - (adc->cal.calfact_d << STM32H7_CALFACT_D_SHIFT); - stm32_adc_writel(adc, STM32H7_ADC_CALFACT, val); + val = (adc->cal.calfact_s << adc->cfg->regs->calfact_s.shift) | + (adc->cal.calfact_d << adc->cfg->regs->calfact_d.shift); + stm32_adc_writel(adc, adc->cfg->regs->calfact_s.reg, val); + + if (!adc->cfg->has_linearcal) + return 0; lincalrdyw_mask = STM32H7_LINCALRDYW6; for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) { @@ -1016,11 +1122,14 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); int ret; + u32 msk = STM32H7_ADCALDIF; u32 val; if (adc->cal.calibrated) return true; + if (adc->cfg->has_linearcal) + msk |= STM32H7_ADCALLIN; /* ADC must be disabled for calibration */ stm32h7_adc_disable(indio_dev); @@ -1029,8 +1138,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev) * - Offset calibration for single ended inputs * - No linearity calibration (do it later, before reading it) */ - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADCALDIF); - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADCALLIN); + stm32_adc_clr_bits(adc, STM32H7_ADC_CR, msk); /* Start calibration, then wait for completion */ stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADCAL); @@ -1048,8 +1156,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev) * - Linearity calibration (needs to be done only once for single/diff) * will run simultaneously with offset calibration. */ - stm32_adc_set_bits(adc, STM32H7_ADC_CR, - STM32H7_ADCALDIF | STM32H7_ADCALLIN); + stm32_adc_set_bits(adc, STM32H7_ADC_CR, msk); stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADCAL); ret = stm32_adc_readl_poll_timeout(STM32H7_ADC_CR, val, !(val & STM32H7_ADCAL), 100, @@ -1060,8 +1167,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev) } out: - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, - STM32H7_ADCALDIF | STM32H7_ADCALLIN); + stm32_adc_clr_bits(adc, STM32H7_ADC_CR, msk); return ret; } @@ -1093,7 +1199,7 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev) stm32_adc_int_ch_enable(indio_dev); - stm32_adc_writel(adc, STM32H7_ADC_DIFSEL, adc->difsel); + stm32_adc_writel(adc, adc->cfg->regs->difsel.reg, adc->difsel); ret = stm32h7_adc_enable(indio_dev); if (ret) @@ -1107,7 +1213,8 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev) if (ret) goto disable; - stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel); + if (adc->cfg->has_presel) + stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel); return 0; @@ -1125,7 +1232,8 @@ static void stm32h7_adc_unprepare(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); - stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0); + if (adc->cfg->has_presel) + stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0); stm32h7_adc_disable(indio_dev); stm32_adc_int_ch_disable(adc); stm32h7_adc_enter_pwr_down(adc); @@ -1857,7 +1965,7 @@ static void stm32_adc_chan_init_one(struct iio_dev *indio_dev, adc->pcsel |= BIT(chan->channel); if (differential) { /* pre-build diff channels mask */ - adc->difsel |= BIT(chan->channel); + adc->difsel |= BIT(chan->channel) & adc->cfg->regs->difsel.mask; /* Also add negative input to pre-selected channels */ adc->pcsel |= BIT(chan->channel2); } @@ -1998,6 +2106,35 @@ static int stm32_adc_populate_int_ch(struct iio_dev *indio_dev, const char *ch_n for (i = 0; i < STM32_ADC_INT_CH_NB; i++) { if (!strncmp(stm32_adc_ic[i].name, ch_name, STM32_ADC_CH_SZ)) { + /* Check internal channel availability */ + switch (i) { + case STM32_ADC_INT_CH_VDDCORE: + if (!adc->cfg->regs->or_vddcore.reg) + dev_warn(&indio_dev->dev, + "%s channel not available\n", ch_name); + break; + case STM32_ADC_INT_CH_VDDCPU: + if (!adc->cfg->regs->or_vddcpu.reg) + dev_warn(&indio_dev->dev, + "%s channel not available\n", ch_name); + break; + case STM32_ADC_INT_CH_VDDQ_DDR: + if (!adc->cfg->regs->or_vddq_ddr.reg) + dev_warn(&indio_dev->dev, + "%s channel not available\n", ch_name); + break; + case STM32_ADC_INT_CH_VREFINT: + if (!adc->cfg->regs->ccr_vref.reg) + dev_warn(&indio_dev->dev, + "%s channel not available\n", ch_name); + break; + case STM32_ADC_INT_CH_VBAT: + if (!adc->cfg->regs->ccr_vbat.reg) + dev_warn(&indio_dev->dev, + "%s channel not available\n", ch_name); + break; + } + if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT) { adc->int_ch[i] = chan; break; @@ -2435,6 +2572,9 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = { .regs = &stm32h7_adc_regspec, .adc_info = &stm32h7_adc_info, .trigs = stm32h7_adc_trigs, + .has_boostmode = true, + .has_linearcal = true, + .has_presel = true, .start_conv = stm32h7_adc_start_conv, .stop_conv = stm32h7_adc_stop_conv, .prepare = stm32h7_adc_prepare, @@ -2448,6 +2588,9 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = { .adc_info = &stm32h7_adc_info, .trigs = stm32h7_adc_trigs, .has_vregready = true, + .has_boostmode = true, + .has_linearcal = true, + .has_presel = true, .start_conv = stm32h7_adc_start_conv, .stop_conv = stm32h7_adc_stop_conv, .prepare = stm32h7_adc_prepare, @@ -2457,10 +2600,23 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = { .ts_vrefint_ns = 4300, }; +static const struct stm32_adc_cfg stm32mp13_adc_cfg = { + .regs = &stm32mp13_adc_regspec, + .adc_info = &stm32mp13_adc_info, + .trigs = stm32h7_adc_trigs, + .start_conv = stm32mp13_adc_start_conv, + .stop_conv = stm32h7_adc_stop_conv, + .prepare = stm32h7_adc_prepare, + .unprepare = stm32h7_adc_unprepare, + .smp_cycles = stm32mp13_adc_smp_cycles, + .irq_clear = stm32h7_adc_irq_clear, +}; + static const struct of_device_id stm32_adc_of_match[] = { { .compatible = "st,stm32f4-adc", .data = (void *)&stm32f4_adc_cfg }, { .compatible = "st,stm32h7-adc", .data = (void *)&stm32h7_adc_cfg }, { .compatible = "st,stm32mp1-adc", .data = (void *)&stm32mp1_adc_cfg }, + { .compatible = "st,stm32mp13-adc", .data = (void *)&stm32mp13_adc_cfg }, {}, }; MODULE_DEVICE_TABLE(of, stm32_adc_of_match); From 7cb2303dd02487cc18bc9ff0fc1338e8c78ae846 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 12 Oct 2022 16:22:01 +0200 Subject: [PATCH 2277/4122] iio: adc: stm32: manage min sampling time on all internal channels Force minimum sampling time for all internal channels according to datasheet requirement. This value can be increased through DT st,min-sample-time-ns property. Signed-off-by: Olivier Moysan Reviewed-by: Andy Shevchenko Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20221012142205.13041-5-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index d36c024526f2..8d03d21a33d6 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -215,7 +215,7 @@ struct stm32_adc; * @unprepare: optional unprepare routine (disable, power-down) * @irq_clear: routine to clear irqs * @smp_cycles: programmable sampling time (ADC clock cycles) - * @ts_vrefint_ns: vrefint minimum sampling time in ns + * @ts_int_ch: pointer to array of internal channels minimum sampling time in ns */ struct stm32_adc_cfg { const struct stm32_adc_regspec *regs; @@ -232,7 +232,7 @@ struct stm32_adc_cfg { void (*unprepare)(struct iio_dev *); void (*irq_clear)(struct iio_dev *indio_dev, u32 msk); const unsigned int *smp_cycles; - const unsigned int ts_vrefint_ns; + const unsigned int *ts_int_ch; }; /** @@ -1910,14 +1910,15 @@ static void stm32_adc_smpr_init(struct stm32_adc *adc, int channel, u32 smp_ns) { const struct stm32_adc_regs *smpr = &adc->cfg->regs->smp_bits[channel]; u32 period_ns, shift = smpr->shift, mask = smpr->mask; - unsigned int smp, r = smpr->reg; + unsigned int i, smp, r = smpr->reg; /* - * For vrefint channel, ensure that the sampling time cannot + * For internal channels, ensure that the sampling time cannot * be lower than the one specified in the datasheet */ - if (channel == adc->int_ch[STM32_ADC_INT_CH_VREFINT]) - smp_ns = max(smp_ns, adc->cfg->ts_vrefint_ns); + for (i = 0; i < STM32_ADC_INT_CH_NB; i++) + if (channel == adc->int_ch[i] && adc->int_ch[i] != STM32_ADC_INT_CH_NONE) + smp_ns = max(smp_ns, adc->cfg->ts_int_ch[i]); /* Determine sampling time (ADC clock cycles) */ period_ns = NSEC_PER_SEC / adc->common->rate; @@ -2568,6 +2569,9 @@ static const struct stm32_adc_cfg stm32f4_adc_cfg = { .irq_clear = stm32f4_adc_irq_clear, }; +const unsigned int stm32_adc_min_ts_h7[] = { 0, 0, 0, 4300, 9000 }; +static_assert(ARRAY_SIZE(stm32_adc_min_ts_h7) == STM32_ADC_INT_CH_NB); + static const struct stm32_adc_cfg stm32h7_adc_cfg = { .regs = &stm32h7_adc_regspec, .adc_info = &stm32h7_adc_info, @@ -2581,8 +2585,12 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = { .unprepare = stm32h7_adc_unprepare, .smp_cycles = stm32h7_adc_smp_cycles, .irq_clear = stm32h7_adc_irq_clear, + .ts_int_ch = stm32_adc_min_ts_h7, }; +const unsigned int stm32_adc_min_ts_mp1[] = { 100, 100, 100, 4300, 9800 }; +static_assert(ARRAY_SIZE(stm32_adc_min_ts_mp1) == STM32_ADC_INT_CH_NB); + static const struct stm32_adc_cfg stm32mp1_adc_cfg = { .regs = &stm32mp1_adc_regspec, .adc_info = &stm32h7_adc_info, @@ -2597,9 +2605,12 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = { .unprepare = stm32h7_adc_unprepare, .smp_cycles = stm32h7_adc_smp_cycles, .irq_clear = stm32h7_adc_irq_clear, - .ts_vrefint_ns = 4300, + .ts_int_ch = stm32_adc_min_ts_mp1, }; +const unsigned int stm32_adc_min_ts_mp13[] = { 100, 0, 0, 4300, 9800 }; +static_assert(ARRAY_SIZE(stm32_adc_min_ts_mp13) == STM32_ADC_INT_CH_NB); + static const struct stm32_adc_cfg stm32mp13_adc_cfg = { .regs = &stm32mp13_adc_regspec, .adc_info = &stm32mp13_adc_info, @@ -2610,6 +2621,7 @@ static const struct stm32_adc_cfg stm32mp13_adc_cfg = { .unprepare = stm32h7_adc_unprepare, .smp_cycles = stm32mp13_adc_smp_cycles, .irq_clear = stm32h7_adc_irq_clear, + .ts_int_ch = stm32_adc_min_ts_mp13, }; static const struct of_device_id stm32_adc_of_match[] = { From 0a8565425afd8ba0e1a0ea73e21da119ee6dacea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 12 Oct 2022 17:16:17 +0200 Subject: [PATCH 2278/4122] iio: core: introduce iio_device_{claim|release}_buffer_mode() APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These APIs are analogous to iio_device_claim_direct_mode() and iio_device_release_direct_mode() but, as the name suggests, with the logic flipped. While this looks odd enough, it will have at least two users (in following changes) and it will be important to move the IIO mlock to the private struct. Signed-off-by: Nuno Sá Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221012151620.1725215-2-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 38 +++++++++++++++++++++++++++++++++ include/linux/iio/iio.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 151ff3993354..b2b7bd27adc7 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -2083,6 +2083,44 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev) } EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); +/** + * iio_device_claim_buffer_mode - Keep device in buffer mode + * @indio_dev: the iio_dev associated with the device + * + * If the device is in buffer mode it is guaranteed to stay + * that way until iio_device_release_buffer_mode() is called. + * + * Use with iio_device_release_buffer_mode(). + * + * Returns: 0 on success, -EBUSY on failure. + */ +int iio_device_claim_buffer_mode(struct iio_dev *indio_dev) +{ + mutex_lock(&indio_dev->mlock); + + if (iio_buffer_enabled(indio_dev)) + return 0; + + mutex_unlock(&indio_dev->mlock); + return -EBUSY; +} +EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode); + +/** + * iio_device_release_buffer_mode - releases claim on buffer mode + * @indio_dev: the iio_dev associated with the device + * + * Release the claim. Device is no longer guaranteed to stay + * in buffer mode. + * + * Use with iio_device_claim_buffer_mode(). + */ +void iio_device_release_buffer_mode(struct iio_dev *indio_dev) +{ + mutex_unlock(&indio_dev->mlock); +} +EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode); + /** * iio_device_get_current_mode() - helper function providing read-only access to * the opaque @currentmode variable diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f0ec8a5e5a7a..9d3bd6379eb8 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -629,6 +629,8 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); +int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); +void iio_device_release_buffer_mode(struct iio_dev *indio_dev); extern struct bus_type iio_bus_type; From 1555790c86286933dc674fffe9c1104250d093ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 12 Oct 2022 17:16:18 +0200 Subject: [PATCH 2279/4122] iio: health: max30100: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pattern used in this device does not quite fit in the iio_device_claim_direct_mode() typical usage. In this case, iio_buffer_enabled() was being used not to prevent the raw access but to allow it. Hence, let's make use of the new iio_device_claim_buffer_mode() API to make sure we stay in buffered mode during the complete read. Note that we are shadowing the error code returned by iio_device_claim_buffer_mode() so that we keep the original one (-EAGAIN). The reason is that some userspace stack might already be relying on this particular code so that we are not taking chances and leave it alone. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20221012151620.1725215-3-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30100.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 2cca5e0519f8..3aa5d037a1c3 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -387,18 +387,21 @@ static int max30100_read_raw(struct iio_dev *indio_dev, * Temperature reading can only be acquired while engine * is running */ - mutex_lock(&indio_dev->mlock); - - if (!iio_buffer_enabled(indio_dev)) + if (iio_device_claim_buffer_mode(indio_dev)) { + /* + * Replacing -EBUSY or other error code + * returned by iio_device_claim_buffer_mode() + * because user space may rely on the current + * one. + */ ret = -EAGAIN; - else { + } else { ret = max30100_get_temp(data, val); if (!ret) ret = IIO_VAL_INT; + iio_device_release_buffer_mode(indio_dev); } - - mutex_unlock(&indio_dev->mlock); break; case IIO_CHAN_INFO_SCALE: *val = 1; /* 0.0625 */ From 6b701cda3632c9cffaea6f79c5fe638800c8f7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 12 Oct 2022 17:16:19 +0200 Subject: [PATCH 2280/4122] iio: health: max30102: do not use internal iio_dev lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pattern used in this device does not quite fit in the iio_device_claim_direct_mode() typical usage. In this case, we want to know if we are in buffered mode or not to know if the device is powered (buffer mode) or not. And depending on that max30102_get_temp() will power on the device if needed. Hence, in order to keep the same functionality, we try to: 1. Claim Buffered mode; 2: If 1) succeeds call max30102_get_temp() without powering on the device; 3: Release Buffered mode; 4: If 1) fails, Claim Direct mode; 5: If 4) succeeds call max30102_get_temp() with powering on the device; 6: Release Direct mode; 7: If 4) fails, goto to 1) and try again. This dance between buffered and direct mode is not particularly pretty (as well as the loop introduced by the goto statement) but it does allow us to get rid of the mlock usage while keeping the same behavior. Signed-off-by: Nuno Sá Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221012151620.1725215-4-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30102.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c index 437298a29f2d..66df4aaa31a7 100644 --- a/drivers/iio/health/max30102.c +++ b/drivers/iio/health/max30102.c @@ -477,12 +477,23 @@ static int max30102_read_raw(struct iio_dev *indio_dev, * Temperature reading can only be acquired when not in * shutdown; leave shutdown briefly when buffer not running */ - mutex_lock(&indio_dev->mlock); - if (!iio_buffer_enabled(indio_dev)) +any_mode_retry: + if (iio_device_claim_buffer_mode(indio_dev)) { + /* + * This one is a *bit* hacky. If we cannot claim buffer + * mode, then try direct mode so that we make sure + * things cannot concurrently change. And we just keep + * trying until we get one of the modes... + */ + if (iio_device_claim_direct_mode(indio_dev)) + goto any_mode_retry; + ret = max30102_get_temp(data, val, true); - else + iio_device_release_direct_mode(indio_dev); + } else { ret = max30102_get_temp(data, val, false); - mutex_unlock(&indio_dev->mlock); + iio_device_release_buffer_mode(indio_dev); + } if (ret) return ret; From 16afe125b53f88b855d2713c8ba253d905dcf3cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 12 Oct 2022 17:16:20 +0200 Subject: [PATCH 2281/4122] iio: core: move 'mlock' to 'struct iio_dev_opaque' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that there are no more users accessing 'mlock' directly, we can move it to the iio_dev private structure. Hence, it's now explicit that new driver's should not directly use this lock. Signed-off-by: Nuno Sá Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221012151620.1725215-5-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/TODO | 3 --- drivers/iio/industrialio-buffer.c | 29 +++++++++++++++++------------ drivers/iio/industrialio-core.c | 26 +++++++++++++++----------- drivers/iio/industrialio-event.c | 4 ++-- drivers/iio/industrialio-trigger.c | 12 ++++++------ include/linux/iio/iio-opaque.h | 2 ++ include/linux/iio/iio.h | 3 --- 7 files changed, 42 insertions(+), 37 deletions(-) diff --git a/drivers/iio/TODO b/drivers/iio/TODO index 7d7326b7085a..2ace27d1ac62 100644 --- a/drivers/iio/TODO +++ b/drivers/iio/TODO @@ -7,9 +7,6 @@ tree - ABI Documentation - Audit driviers/iio/staging/Documentation -- Replace iio_dev->mlock by either a local lock or use -iio_claim_direct.(Requires analysis of the purpose of the lock.) - - Converting drivers from device tree centric to more generic property handlers. diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 228598b82a2f..9cd7db549fcb 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -507,13 +507,14 @@ static ssize_t iio_scan_el_store(struct device *dev, int ret; bool state; struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); struct iio_buffer *buffer = this_attr->buffer; ret = kstrtobool(buf, &state); if (ret < 0) return ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) { ret = -EBUSY; goto error_ret; @@ -532,7 +533,7 @@ static ssize_t iio_scan_el_store(struct device *dev, } error_ret: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return ret < 0 ? ret : len; @@ -554,6 +555,7 @@ static ssize_t iio_scan_el_ts_store(struct device *dev, { int ret; struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; bool state; @@ -561,14 +563,14 @@ static ssize_t iio_scan_el_ts_store(struct device *dev, if (ret < 0) return ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) { ret = -EBUSY; goto error_ret; } buffer->scan_timestamp = state; error_ret: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return ret ? ret : len; } @@ -642,6 +644,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; unsigned int val; int ret; @@ -653,7 +656,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr, if (val == buffer->length) return len; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (iio_buffer_is_active(buffer)) { ret = -EBUSY; } else { @@ -665,7 +668,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr, if (buffer->length && buffer->length < buffer->watermark) buffer->watermark = buffer->length; out: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return ret ? ret : len; } @@ -1256,7 +1259,7 @@ int iio_update_buffers(struct iio_dev *indio_dev, return -EINVAL; mutex_lock(&iio_dev_opaque->info_exist_lock); - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (insert_buffer && iio_buffer_is_active(insert_buffer)) insert_buffer = NULL; @@ -1277,7 +1280,7 @@ int iio_update_buffers(struct iio_dev *indio_dev, ret = __iio_update_buffers(indio_dev, insert_buffer, remove_buffer); out_unlock: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); mutex_unlock(&iio_dev_opaque->info_exist_lock); return ret; @@ -1296,6 +1299,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, int ret; bool requested_state; struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; bool inlist; @@ -1303,7 +1307,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, if (ret < 0) return ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); /* Find out if it is in the list */ inlist = iio_buffer_is_active(buffer); @@ -1317,7 +1321,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, ret = __iio_update_buffers(indio_dev, NULL, buffer); done: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return (ret < 0) ? ret : len; } @@ -1334,6 +1338,7 @@ static ssize_t watermark_store(struct device *dev, const char *buf, size_t len) { struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer; unsigned int val; int ret; @@ -1344,7 +1349,7 @@ static ssize_t watermark_store(struct device *dev, if (!val) return -EINVAL; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (val > buffer->length) { ret = -EINVAL; @@ -1358,7 +1363,7 @@ static ssize_t watermark_store(struct device *dev, buffer->watermark = val; out: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return ret ? ret : len; } diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index b2b7bd27adc7..52e690f031cb 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -285,16 +285,16 @@ int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id) struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); const struct iio_event_interface *ev_int = iio_dev_opaque->event_interface; - ret = mutex_lock_interruptible(&indio_dev->mlock); + ret = mutex_lock_interruptible(&iio_dev_opaque->mlock); if (ret) return ret; if ((ev_int && iio_event_enabled(ev_int)) || iio_buffer_enabled(indio_dev)) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return -EBUSY; } iio_dev_opaque->clock_id = clock_id; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return 0; } @@ -1674,7 +1674,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv) indio_dev->dev.type = &iio_device_type; indio_dev->dev.bus = &iio_bus_type; device_initialize(&indio_dev->dev); - mutex_init(&indio_dev->mlock); + mutex_init(&iio_dev_opaque->mlock); mutex_init(&iio_dev_opaque->info_exist_lock); INIT_LIST_HEAD(&iio_dev_opaque->channel_attr_list); @@ -1696,7 +1696,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv) INIT_LIST_HEAD(&iio_dev_opaque->ioctl_handlers); lockdep_register_key(&iio_dev_opaque->mlock_key); - lockdep_set_class(&indio_dev->mlock, &iio_dev_opaque->mlock_key); + lockdep_set_class(&iio_dev_opaque->mlock, &iio_dev_opaque->mlock_key); return indio_dev; } @@ -2058,10 +2058,12 @@ EXPORT_SYMBOL_GPL(__devm_iio_device_register); */ int iio_device_claim_direct_mode(struct iio_dev *indio_dev) { - mutex_lock(&indio_dev->mlock); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); + + mutex_lock(&iio_dev_opaque->mlock); if (iio_buffer_enabled(indio_dev)) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return -EBUSY; } return 0; @@ -2079,7 +2081,7 @@ EXPORT_SYMBOL_GPL(iio_device_claim_direct_mode); */ void iio_device_release_direct_mode(struct iio_dev *indio_dev) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock); } EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); @@ -2096,12 +2098,14 @@ EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); */ int iio_device_claim_buffer_mode(struct iio_dev *indio_dev) { - mutex_lock(&indio_dev->mlock); + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); + + mutex_lock(&iio_dev_opaque->mlock); if (iio_buffer_enabled(indio_dev)) return 0; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return -EBUSY; } EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode); @@ -2117,7 +2121,7 @@ EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode); */ void iio_device_release_buffer_mode(struct iio_dev *indio_dev) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock); } EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode); diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index 3d78da2531a9..1a26393a7c0c 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -198,7 +198,7 @@ static int iio_event_getfd(struct iio_dev *indio_dev) if (ev_int == NULL) return -ENODEV; - fd = mutex_lock_interruptible(&indio_dev->mlock); + fd = mutex_lock_interruptible(&iio_dev_opaque->mlock); if (fd) return fd; @@ -219,7 +219,7 @@ static int iio_event_getfd(struct iio_dev *indio_dev) } unlock: - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return fd; } diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 6885a186fe27..a2f3cc2f65ef 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -120,12 +120,12 @@ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *tri return -EINVAL; iio_dev_opaque = to_iio_dev_opaque(indio_dev); - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); WARN_ON(iio_dev_opaque->trig_readonly); indio_dev->trig = iio_trigger_get(trig); iio_dev_opaque->trig_readonly = true; - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return 0; } @@ -438,16 +438,16 @@ static ssize_t current_trigger_store(struct device *dev, struct iio_trigger *trig; int ret; - mutex_lock(&indio_dev->mlock); + mutex_lock(&iio_dev_opaque->mlock); if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return -EBUSY; } if (iio_dev_opaque->trig_readonly) { - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); return -EPERM; } - mutex_unlock(&indio_dev->mlock); + mutex_unlock(&iio_dev_opaque->mlock); trig = iio_trigger_acquire_by_name(buf); if (oldtrig == trig) { diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index d1f8b30a7c8b..5aec3945555b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -11,6 +11,7 @@ * checked by device drivers but should be considered * read-only as this is a core internal bit * @driver_module: used to make it harder to undercut users + * @mlock: lock used to prevent simultaneous device state changes * @mlock_key: lockdep class for iio_dev lock * @info_exist_lock: lock to prevent use during removal * @trig_readonly: mark the current trigger immutable @@ -43,6 +44,7 @@ struct iio_dev_opaque { int currentmode; int id; struct module *driver_module; + struct mutex mlock; struct lock_class_key mlock_key; struct mutex info_exist_lock; bool trig_readonly; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 9d3bd6379eb8..8e0afaaa3f75 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -548,8 +548,6 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @mlock: [INTERN] lock used to prevent simultaneous device state - * changes * @available_scan_masks: [DRIVER] optional array of allowed bitmasks * @masklength: [INTERN] the length of the mask established from * channels @@ -574,7 +572,6 @@ struct iio_dev { struct iio_buffer *buffer; int scan_bytes; - struct mutex mlock; const unsigned long *available_scan_masks; unsigned masklength; From 0c3a333524a3e5ba4b6c7b2638faef8420cfdb2a Mon Sep 17 00:00:00 2001 From: Rajat Khandelwal Date: Mon, 10 Oct 2022 23:07:20 +0530 Subject: [PATCH 2282/4122] iio: pressure: mpl115: Implementing low power mode by shutdown gpio MPL115 supports shutdown gpio which can be used to set the state to low power mode. Power from all internal circuits and registers is removed. This is done by pulling the SHDN pin to low. This patch enables runtime PM on MPL115 to increase power savings. According to spec., a wakeup time period of ~5 ms exists between waking up and actually communicating with the device. This is implemented using sleep delay. Signed-off-by: Rajat Khandelwal Link: https://lore.kernel.org/r/20221010173720.568916-1-rajat.khandelwal@linux.intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/mpl115.c | 62 ++++++++++++++++++++++++++++++- drivers/iio/pressure/mpl115.h | 5 +++ drivers/iio/pressure/mpl115_i2c.c | 1 + drivers/iio/pressure/mpl115_spi.c | 1 + 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/iio/pressure/mpl115.c b/drivers/iio/pressure/mpl115.c index 5bf5b9abe6f1..02ea38c8a3e4 100644 --- a/drivers/iio/pressure/mpl115.c +++ b/drivers/iio/pressure/mpl115.c @@ -4,12 +4,13 @@ * * Copyright (c) 2014 Peter Meerwald * - * TODO: shutdown pin + * TODO: synchronization with system suspend */ #include #include #include +#include #include "mpl115.h" @@ -27,6 +28,7 @@ struct mpl115_data { s16 a0; s16 b1, b2; s16 c12; + struct gpio_desc *shutdown; const struct mpl115_ops *ops; }; @@ -102,16 +104,24 @@ static int mpl115_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_PROCESSED: + pm_runtime_get_sync(data->dev); ret = mpl115_comp_pressure(data, val, val2); if (ret < 0) return ret; + pm_runtime_mark_last_busy(data->dev); + pm_runtime_put_autosuspend(data->dev); + return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_RAW: + pm_runtime_get_sync(data->dev); /* temperature -5.35 C / LSB, 472 LSB is 25 C */ ret = mpl115_read_temp(data); if (ret < 0) return ret; + pm_runtime_mark_last_busy(data->dev); + pm_runtime_put_autosuspend(data->dev); *val = ret >> 6; + return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: *val = -605; @@ -168,6 +178,8 @@ int mpl115_probe(struct device *dev, const char *name, if (ret) return ret; + dev_set_drvdata(dev, indio_dev); + ret = data->ops->read(data->dev, MPL115_A0); if (ret < 0) return ret; @@ -185,10 +197,58 @@ int mpl115_probe(struct device *dev, const char *name, return ret; data->c12 = ret; + data->shutdown = devm_gpiod_get_optional(dev, "shutdown", + GPIOD_OUT_LOW); + if (IS_ERR(data->shutdown)) + return dev_err_probe(dev, PTR_ERR(data->shutdown), + "cannot get shutdown gpio\n"); + + if (data->shutdown) { + /* Enable runtime PM */ + pm_runtime_get_noresume(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + /* + * As the device takes 3 ms to come up with a fresh + * reading after power-on and 5 ms to actually power-on, + * do not shut it down unnecessarily. Set autosuspend to + * 2000 ms. + */ + pm_runtime_set_autosuspend_delay(dev, 2000); + pm_runtime_use_autosuspend(dev); + pm_runtime_put(dev); + + dev_dbg(dev, "low-power mode enabled"); + } else + dev_dbg(dev, "low-power mode disabled"); + return devm_iio_device_register(dev, indio_dev); } EXPORT_SYMBOL_NS_GPL(mpl115_probe, IIO_MPL115); +static int mpl115_runtime_suspend(struct device *dev) +{ + struct mpl115_data *data = iio_priv(dev_get_drvdata(dev)); + + gpiod_set_value(data->shutdown, 1); + + return 0; +} + +static int mpl115_runtime_resume(struct device *dev) +{ + struct mpl115_data *data = iio_priv(dev_get_drvdata(dev)); + + gpiod_set_value(data->shutdown, 0); + usleep_range(5000, 6000); + + return 0; +} + +EXPORT_NS_RUNTIME_DEV_PM_OPS(mpl115_dev_pm_ops, mpl115_runtime_suspend, + mpl115_runtime_resume, NULL, IIO_MPL115); + MODULE_AUTHOR("Peter Meerwald "); MODULE_DESCRIPTION("Freescale MPL115 pressure/temperature driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/iio/pressure/mpl115.h b/drivers/iio/pressure/mpl115.h index 57d55eb8e661..78a0068a17bb 100644 --- a/drivers/iio/pressure/mpl115.h +++ b/drivers/iio/pressure/mpl115.h @@ -6,6 +6,8 @@ * Copyright (c) 2016 Akinobu Mita */ +#include + #ifndef _MPL115_H_ #define _MPL115_H_ @@ -18,4 +20,7 @@ struct mpl115_ops { int mpl115_probe(struct device *dev, const char *name, const struct mpl115_ops *ops); +/*PM ops */ +extern const struct dev_pm_ops mpl115_dev_pm_ops; + #endif diff --git a/drivers/iio/pressure/mpl115_i2c.c b/drivers/iio/pressure/mpl115_i2c.c index 099ab1c6832c..555bda1146fb 100644 --- a/drivers/iio/pressure/mpl115_i2c.c +++ b/drivers/iio/pressure/mpl115_i2c.c @@ -53,6 +53,7 @@ MODULE_DEVICE_TABLE(i2c, mpl115_i2c_id); static struct i2c_driver mpl115_i2c_driver = { .driver = { .name = "mpl115", + .pm = pm_ptr(&mpl115_dev_pm_ops), }, .probe = mpl115_i2c_probe, .id_table = mpl115_i2c_id, diff --git a/drivers/iio/pressure/mpl115_spi.c b/drivers/iio/pressure/mpl115_spi.c index 7feec87e2704..58d218fd90dc 100644 --- a/drivers/iio/pressure/mpl115_spi.c +++ b/drivers/iio/pressure/mpl115_spi.c @@ -92,6 +92,7 @@ MODULE_DEVICE_TABLE(spi, mpl115_spi_ids); static struct spi_driver mpl115_spi_driver = { .driver = { .name = "mpl115", + .pm = pm_ptr(&mpl115_dev_pm_ops), }, .probe = mpl115_spi_probe, .id_table = mpl115_spi_ids, From 687c8848c642c093c190bb182e4a6ac2ed86b4eb Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 25 Sep 2022 16:57:16 +0100 Subject: [PATCH 2283/4122] iio: accel: fxls8962af: Use new EXPORT_NS_GPL_DEV_PM_OPS() Using this macro allows the compiler to remove unused structures and callbacks if we are not building with CONFIG_PM* without needing __maybe_unused markings. Signed-off-by: Jonathan Cameron Cc: Sean Nyekjaer -- Switched to Paul Cercueil's more flexible implementation fo the macro. Dropped Sean's Ack given new form of macro being used. Acked-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20220925155719.3316280-3-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/fxls8962af-core.c | 16 +++++++--------- drivers/iio/accel/fxls8962af-i2c.c | 2 +- drivers/iio/accel/fxls8962af-spi.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 8874d6d61725..bf259db281f5 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -1241,7 +1241,7 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) } EXPORT_SYMBOL_NS_GPL(fxls8962af_core_probe, IIO_FXLS8962AF); -static int __maybe_unused fxls8962af_runtime_suspend(struct device *dev) +static int fxls8962af_runtime_suspend(struct device *dev) { struct fxls8962af_data *data = iio_priv(dev_get_drvdata(dev)); int ret; @@ -1255,14 +1255,14 @@ static int __maybe_unused fxls8962af_runtime_suspend(struct device *dev) return 0; } -static int __maybe_unused fxls8962af_runtime_resume(struct device *dev) +static int fxls8962af_runtime_resume(struct device *dev) { struct fxls8962af_data *data = iio_priv(dev_get_drvdata(dev)); return fxls8962af_active(data); } -static int __maybe_unused fxls8962af_suspend(struct device *dev) +static int fxls8962af_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct fxls8962af_data *data = iio_priv(indio_dev); @@ -1283,7 +1283,7 @@ static int __maybe_unused fxls8962af_suspend(struct device *dev) return 0; } -static int __maybe_unused fxls8962af_resume(struct device *dev) +static int fxls8962af_resume(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct fxls8962af_data *data = iio_priv(indio_dev); @@ -1300,12 +1300,10 @@ static int __maybe_unused fxls8962af_resume(struct device *dev) return 0; } -const struct dev_pm_ops fxls8962af_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fxls8962af_suspend, fxls8962af_resume) - SET_RUNTIME_PM_OPS(fxls8962af_runtime_suspend, - fxls8962af_runtime_resume, NULL) +EXPORT_NS_GPL_DEV_PM_OPS(fxls8962af_pm_ops, IIO_FXLS8962AF) = { + SYSTEM_SLEEP_PM_OPS(fxls8962af_suspend, fxls8962af_resume) + RUNTIME_PM_OPS(fxls8962af_runtime_suspend, fxls8962af_runtime_resume, NULL) }; -EXPORT_SYMBOL_NS_GPL(fxls8962af_pm_ops, IIO_FXLS8962AF); MODULE_AUTHOR("Sean Nyekjaer "); MODULE_DESCRIPTION("NXP FXLS8962AF/FXLS8964AF accelerometer driver"); diff --git a/drivers/iio/accel/fxls8962af-i2c.c b/drivers/iio/accel/fxls8962af-i2c.c index 8fbadfea1620..22640eaebac7 100644 --- a/drivers/iio/accel/fxls8962af-i2c.c +++ b/drivers/iio/accel/fxls8962af-i2c.c @@ -45,7 +45,7 @@ static struct i2c_driver fxls8962af_driver = { .driver = { .name = "fxls8962af_i2c", .of_match_table = fxls8962af_of_match, - .pm = &fxls8962af_pm_ops, + .pm = pm_ptr(&fxls8962af_pm_ops), }, .probe_new = fxls8962af_probe, .id_table = fxls8962af_id, diff --git a/drivers/iio/accel/fxls8962af-spi.c b/drivers/iio/accel/fxls8962af-spi.c index 885b3ab7fcb5..a0d192211839 100644 --- a/drivers/iio/accel/fxls8962af-spi.c +++ b/drivers/iio/accel/fxls8962af-spi.c @@ -44,7 +44,7 @@ MODULE_DEVICE_TABLE(spi, fxls8962af_spi_id_table); static struct spi_driver fxls8962af_driver = { .driver = { .name = "fxls8962af_spi", - .pm = &fxls8962af_pm_ops, + .pm = pm_ptr(&fxls8962af_pm_ops), .of_match_table = fxls8962af_spi_of_match, }, .probe = fxls8962af_probe, From 02e082c433c65f06f4cb359688993377c1d7b6d1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 25 Sep 2022 16:57:17 +0100 Subject: [PATCH 2284/4122] iio: gyro: fxas210002c: Move exports to IIO_FXAS210002C namespace. Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that brings by allowing the compiler to remove unused struct dev_pm_ops and callbacks without needing explicit __maybe_unused markings. Signed-off-by: Jonathan Cameron Cc: Rui Miguel Silva Signed-off-by: Jonathan Cameron -- Dropped Rui's tag on basis this is rather different from v1 due to the different macro implementation. v2: Switch to Paul's more flexible approach to EXPORT_NS_GPL_DEV_PM_OPS() Acked-by: Rui Miguel Silva Link: https://lore.kernel.org/r/20220925155719.3316280-4-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/fxas21002c_core.c | 21 ++++++++++----------- drivers/iio/gyro/fxas21002c_i2c.c | 3 ++- drivers/iio/gyro/fxas21002c_spi.c | 3 ++- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/iio/gyro/fxas21002c_core.c b/drivers/iio/gyro/fxas21002c_core.c index a36d71d9e3ea..3ea1d4613080 100644 --- a/drivers/iio/gyro/fxas21002c_core.c +++ b/drivers/iio/gyro/fxas21002c_core.c @@ -998,7 +998,7 @@ pm_disable: return ret; } -EXPORT_SYMBOL_GPL(fxas21002c_core_probe); +EXPORT_SYMBOL_NS_GPL(fxas21002c_core_probe, IIO_FXAS21002C); void fxas21002c_core_remove(struct device *dev) { @@ -1009,9 +1009,9 @@ void fxas21002c_core_remove(struct device *dev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); } -EXPORT_SYMBOL_GPL(fxas21002c_core_remove); +EXPORT_SYMBOL_NS_GPL(fxas21002c_core_remove, IIO_FXAS21002C); -static int __maybe_unused fxas21002c_suspend(struct device *dev) +static int fxas21002c_suspend(struct device *dev) { struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev)); @@ -1021,7 +1021,7 @@ static int __maybe_unused fxas21002c_suspend(struct device *dev) return 0; } -static int __maybe_unused fxas21002c_resume(struct device *dev) +static int fxas21002c_resume(struct device *dev) { struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev)); int ret; @@ -1033,26 +1033,25 @@ static int __maybe_unused fxas21002c_resume(struct device *dev) return fxas21002c_mode_set(data, data->prev_mode); } -static int __maybe_unused fxas21002c_runtime_suspend(struct device *dev) +static int fxas21002c_runtime_suspend(struct device *dev) { struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev)); return fxas21002c_mode_set(data, FXAS21002C_MODE_READY); } -static int __maybe_unused fxas21002c_runtime_resume(struct device *dev) +static int fxas21002c_runtime_resume(struct device *dev) { struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev)); return fxas21002c_mode_set(data, FXAS21002C_MODE_ACTIVE); } -const struct dev_pm_ops fxas21002c_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fxas21002c_suspend, fxas21002c_resume) - SET_RUNTIME_PM_OPS(fxas21002c_runtime_suspend, - fxas21002c_runtime_resume, NULL) +EXPORT_NS_GPL_DEV_PM_OPS(fxas21002c_pm_ops, IIO_FXAS21002C) = { + SYSTEM_SLEEP_PM_OPS(fxas21002c_suspend, fxas21002c_resume) + RUNTIME_PM_OPS(fxas21002c_runtime_suspend, fxas21002c_runtime_resume, + NULL) }; -EXPORT_SYMBOL_GPL(fxas21002c_pm_ops); MODULE_AUTHOR("Rui Miguel Silva "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/gyro/fxas21002c_i2c.c b/drivers/iio/gyro/fxas21002c_i2c.c index 13bb52c594d1..9e2d0f34a672 100644 --- a/drivers/iio/gyro/fxas21002c_i2c.c +++ b/drivers/iio/gyro/fxas21002c_i2c.c @@ -53,7 +53,7 @@ MODULE_DEVICE_TABLE(of, fxas21002c_i2c_of_match); static struct i2c_driver fxas21002c_i2c_driver = { .driver = { .name = "fxas21002c_i2c", - .pm = &fxas21002c_pm_ops, + .pm = pm_ptr(&fxas21002c_pm_ops), .of_match_table = fxas21002c_i2c_of_match, }, .probe_new = fxas21002c_i2c_probe, @@ -65,3 +65,4 @@ module_i2c_driver(fxas21002c_i2c_driver); MODULE_AUTHOR("Rui Miguel Silva "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("FXAS21002C I2C Gyro driver"); +MODULE_IMPORT_NS(IIO_FXAS21002C); diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c index c3ac169facf9..4f633826547c 100644 --- a/drivers/iio/gyro/fxas21002c_spi.c +++ b/drivers/iio/gyro/fxas21002c_spi.c @@ -54,7 +54,7 @@ MODULE_DEVICE_TABLE(of, fxas21002c_spi_of_match); static struct spi_driver fxas21002c_spi_driver = { .driver = { .name = "fxas21002c_spi", - .pm = &fxas21002c_pm_ops, + .pm = pm_ptr(&fxas21002c_pm_ops), .of_match_table = fxas21002c_spi_of_match, }, .probe = fxas21002c_spi_probe, @@ -66,3 +66,4 @@ module_spi_driver(fxas21002c_spi_driver); MODULE_AUTHOR("Rui Miguel Silva "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("FXAS21002C SPI Gyro driver"); +MODULE_IMPORT_NS(IIO_FXAS21002C); From ef5a5ef29c8f70ca640d785b7173101417c78d24 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 25 Sep 2022 16:57:18 +0100 Subject: [PATCH 2285/4122] iio: imu: inv_icm42600: Move exports to IIO_ICM42600 namespace As these exports are only relevant to core module and users in the bus specific modules, move them out of the main kernel namespace. Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that brings by allowing the compiler to remove unused struct dev_pm_ops and callbacks without needing explicit __maybe_unused markings. Signed-off-by: Jonathan Cameron Cc: Jean-Baptiste Maneyrol -- EXPORT_NS_GPL_DEV_PM_OPS() v2: Switch to Paul's more flexible version of the Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20220925155719.3316280-5-jic23@kernel.org Signed-off-by: Jonathan Cameron --- .../iio/imu/inv_icm42600/inv_icm42600_core.c | 21 +++++++++---------- .../iio/imu/inv_icm42600/inv_icm42600_i2c.c | 3 ++- .../iio/imu/inv_icm42600/inv_icm42600_spi.c | 3 ++- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index ca85fccc9839..b63c5dab1a56 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -41,7 +41,7 @@ const struct regmap_config inv_icm42600_regmap_config = { .ranges = inv_icm42600_regmap_ranges, .num_ranges = ARRAY_SIZE(inv_icm42600_regmap_ranges), }; -EXPORT_SYMBOL_GPL(inv_icm42600_regmap_config); +EXPORT_SYMBOL_NS_GPL(inv_icm42600_regmap_config, IIO_ICM42600); struct inv_icm42600_hw { uint8_t whoami; @@ -660,13 +660,13 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, return devm_add_action_or_reset(dev, inv_icm42600_disable_pm, dev); } -EXPORT_SYMBOL_GPL(inv_icm42600_core_probe); +EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_probe, IIO_ICM42600); /* * Suspend saves sensors state and turns everything off. * Check first if runtime suspend has not already done the job. */ -static int __maybe_unused inv_icm42600_suspend(struct device *dev) +static int inv_icm42600_suspend(struct device *dev) { struct inv_icm42600_state *st = dev_get_drvdata(dev); int ret; @@ -706,7 +706,7 @@ out_unlock: * System resume gets the system back on and restores the sensors state. * Manually put runtime power management in system active state. */ -static int __maybe_unused inv_icm42600_resume(struct device *dev) +static int inv_icm42600_resume(struct device *dev) { struct inv_icm42600_state *st = dev_get_drvdata(dev); int ret; @@ -739,7 +739,7 @@ out_unlock: } /* Runtime suspend will turn off sensors that are enabled by iio devices. */ -static int __maybe_unused inv_icm42600_runtime_suspend(struct device *dev) +static int inv_icm42600_runtime_suspend(struct device *dev) { struct inv_icm42600_state *st = dev_get_drvdata(dev); int ret; @@ -761,7 +761,7 @@ error_unlock: } /* Sensors are enabled by iio devices, no need to turn them back on here. */ -static int __maybe_unused inv_icm42600_runtime_resume(struct device *dev) +static int inv_icm42600_runtime_resume(struct device *dev) { struct inv_icm42600_state *st = dev_get_drvdata(dev); int ret; @@ -774,12 +774,11 @@ static int __maybe_unused inv_icm42600_runtime_resume(struct device *dev) return ret; } -const struct dev_pm_ops inv_icm42600_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(inv_icm42600_suspend, inv_icm42600_resume) - SET_RUNTIME_PM_OPS(inv_icm42600_runtime_suspend, - inv_icm42600_runtime_resume, NULL) +EXPORT_NS_GPL_DEV_PM_OPS(inv_icm42600_pm_ops, IIO_ICM42600) = { + SYSTEM_SLEEP_PM_OPS(inv_icm42600_suspend, inv_icm42600_resume) + RUNTIME_PM_OPS(inv_icm42600_runtime_suspend, + inv_icm42600_runtime_resume, NULL) }; -EXPORT_SYMBOL_GPL(inv_icm42600_pm_ops); MODULE_AUTHOR("InvenSense, Inc."); MODULE_DESCRIPTION("InvenSense ICM-426xx device driver"); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c index d4a692b838d0..4f96989ddf4a 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c @@ -93,7 +93,7 @@ static struct i2c_driver inv_icm42600_driver = { .driver = { .name = "inv-icm42600-i2c", .of_match_table = inv_icm42600_of_matches, - .pm = &inv_icm42600_pm_ops, + .pm = pm_ptr(&inv_icm42600_pm_ops), }, .probe_new = inv_icm42600_probe, }; @@ -102,3 +102,4 @@ module_i2c_driver(inv_icm42600_driver); MODULE_AUTHOR("InvenSense, Inc."); MODULE_DESCRIPTION("InvenSense ICM-426xx I2C driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_ICM42600); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c index e6305e5fa975..486b46e53113 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c @@ -89,7 +89,7 @@ static struct spi_driver inv_icm42600_driver = { .driver = { .name = "inv-icm42600-spi", .of_match_table = inv_icm42600_of_matches, - .pm = &inv_icm42600_pm_ops, + .pm = pm_ptr(&inv_icm42600_pm_ops), }, .probe = inv_icm42600_probe, }; @@ -98,3 +98,4 @@ module_spi_driver(inv_icm42600_driver); MODULE_AUTHOR("InvenSense, Inc."); MODULE_DESCRIPTION("InvenSense ICM-426xx SPI driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_ICM42600); From 62bfa12c87ac7468a69e81ea870eb68b0adabf37 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 25 Sep 2022 16:57:19 +0100 Subject: [PATCH 2286/4122] iio: imu: inv_mpu: Move exports to IIO_MPU6050 namespace As these exports are only relevant to core module and users in the bus specific modules, move them out of the main kernel namespace. Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that brings by allowing the compiler to remove unused struct dev_pm_ops and callbacks without needing explicit __maybe_unused markings. Signed-off-by: Jonathan Cameron Cc: Jean-Baptiste Maneyrol Cc: Linus Walleij -- Dropped Linus' tag as the new patch is significantly different. v2: Switch to more flexible version of EXPORT* macro from Paul. Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20220925155719.3316280-6-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 17 ++++++++--------- drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c | 3 ++- drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c | 3 ++- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 86fbbe904050..8a129120b73d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -1653,9 +1653,9 @@ error_power_off: inv_mpu6050_set_power_itg(st, false); return result; } -EXPORT_SYMBOL_GPL(inv_mpu_core_probe); +EXPORT_SYMBOL_NS_GPL(inv_mpu_core_probe, IIO_MPU6050); -static int __maybe_unused inv_mpu_resume(struct device *dev) +static int inv_mpu_resume(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct inv_mpu6050_state *st = iio_priv(indio_dev); @@ -1687,7 +1687,7 @@ out_unlock: return result; } -static int __maybe_unused inv_mpu_suspend(struct device *dev) +static int inv_mpu_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct inv_mpu6050_state *st = iio_priv(indio_dev); @@ -1730,7 +1730,7 @@ out_unlock: return result; } -static int __maybe_unused inv_mpu_runtime_suspend(struct device *dev) +static int inv_mpu_runtime_suspend(struct device *dev) { struct inv_mpu6050_state *st = iio_priv(dev_get_drvdata(dev)); unsigned int sensors; @@ -1755,7 +1755,7 @@ out_unlock: return ret; } -static int __maybe_unused inv_mpu_runtime_resume(struct device *dev) +static int inv_mpu_runtime_resume(struct device *dev) { struct inv_mpu6050_state *st = iio_priv(dev_get_drvdata(dev)); int ret; @@ -1767,11 +1767,10 @@ static int __maybe_unused inv_mpu_runtime_resume(struct device *dev) return inv_mpu6050_set_power_itg(st, true); } -const struct dev_pm_ops inv_mpu_pmops = { - SET_SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume) - SET_RUNTIME_PM_OPS(inv_mpu_runtime_suspend, inv_mpu_runtime_resume, NULL) +EXPORT_NS_GPL_DEV_PM_OPS(inv_mpu_pmops, IIO_MPU6050) = { + SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume) + RUNTIME_PM_OPS(inv_mpu_runtime_suspend, inv_mpu_runtime_resume, NULL) }; -EXPORT_SYMBOL_GPL(inv_mpu_pmops); MODULE_AUTHOR("Invensense Corporation"); MODULE_DESCRIPTION("Invensense device MPU6050 driver"); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c index 14255a918eb1..7a8d60a5afa9 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c @@ -267,7 +267,7 @@ static struct i2c_driver inv_mpu_driver = { .of_match_table = inv_of_match, .acpi_match_table = inv_acpi_match, .name = "inv-mpu6050-i2c", - .pm = &inv_mpu_pmops, + .pm = pm_ptr(&inv_mpu_pmops), }, }; @@ -276,3 +276,4 @@ module_i2c_driver(inv_mpu_driver); MODULE_AUTHOR("Invensense Corporation"); MODULE_DESCRIPTION("Invensense device MPU6050 driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_MPU6050); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c index e6107b0cc38f..89f46c2f213d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c @@ -154,7 +154,7 @@ static struct spi_driver inv_mpu_driver = { .of_match_table = inv_of_match, .acpi_match_table = inv_acpi_match, .name = "inv-mpu6000-spi", - .pm = &inv_mpu_pmops, + .pm = pm_ptr(&inv_mpu_pmops), }, }; @@ -163,3 +163,4 @@ module_spi_driver(inv_mpu_driver); MODULE_AUTHOR("Adriana Reus "); MODULE_DESCRIPTION("Invensense device MPU6000 driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_MPU6050); From 1ccef2e6e9205e209ad958d2e591bcca60981007 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:19:01 +0300 Subject: [PATCH 2287/4122] iio: adc: ad7192: Simplify using devm_regulator_get_enable() Use devm_regulator_get_enable() instead of open coded get, enable, add-action-to-disable-at-detach - pattern. Also drop the seemingly unused struct member 'dvdd'. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/9719c445c095d3d308e2fc9f4f93294f5806c41c.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index f5067173deb6..55a6ab591016 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -177,7 +177,6 @@ struct ad7192_chip_info { struct ad7192_state { const struct ad7192_chip_info *chip_info; struct regulator *avdd; - struct regulator *dvdd; struct clk *mclk; u16 int_vref_mv; u32 fclk; @@ -1015,19 +1014,9 @@ static int ad7192_probe(struct spi_device *spi) if (ret) return ret; - st->dvdd = devm_regulator_get(&spi->dev, "dvdd"); - if (IS_ERR(st->dvdd)) - return PTR_ERR(st->dvdd); - - ret = regulator_enable(st->dvdd); - if (ret) { - dev_err(&spi->dev, "Failed to enable specified DVdd supply\n"); - return ret; - } - - ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->dvdd); + ret = devm_regulator_get_enable(&spi->dev, "dvdd"); if (ret) - return ret; + return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n"); ret = regulator_get_voltage(st->avdd); if (ret < 0) { From 08f75f180db06566d134b1cac1d8a4b8f266761e Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:19:17 +0300 Subject: [PATCH 2288/4122] iio: dac: ltc2688: Simplify using devm_regulator_*get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm_regulator_bulk_get_enable() instead of open coded bulk-get, bulk-enable, add-action-to-disable-at-detach - pattern. Signed-off-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/a29493f594c84b3bd852e462bbd3e591a8575a27.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ltc2688.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c index 28bdde2d3088..fc8eb53c65be 100644 --- a/drivers/iio/dac/ltc2688.c +++ b/drivers/iio/dac/ltc2688.c @@ -84,7 +84,6 @@ struct ltc2688_chan { struct ltc2688_state { struct spi_device *spi; struct regmap *regmap; - struct regulator_bulk_data regulators[2]; struct ltc2688_chan channels[LTC2688_DAC_CHANNELS]; struct iio_chan_spec *iio_chan; /* lock to protect against multiple access to the device and shared data */ @@ -902,13 +901,6 @@ static int ltc2688_setup(struct ltc2688_state *st, struct regulator *vref) LTC2688_CONFIG_EXT_REF); } -static void ltc2688_disable_regulators(void *data) -{ - struct ltc2688_state *st = data; - - regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators); -} - static void ltc2688_disable_regulator(void *regulator) { regulator_disable(regulator); @@ -965,6 +957,7 @@ static const struct iio_info ltc2688_info = { static int ltc2688_probe(struct spi_device *spi) { + static const char * const regulators[] = { "vcc", "iovcc" }; struct ltc2688_state *st; struct iio_dev *indio_dev; struct regulator *vref_reg; @@ -988,21 +981,11 @@ static int ltc2688_probe(struct spi_device *spi) return dev_err_probe(dev, PTR_ERR(st->regmap), "Failed to init regmap"); - st->regulators[0].supply = "vcc"; - st->regulators[1].supply = "iovcc"; - ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(st->regulators), - st->regulators); - if (ret) - return dev_err_probe(dev, ret, "Failed to get regulators\n"); - - ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators), + regulators); if (ret) return dev_err_probe(dev, ret, "Failed to enable regulators\n"); - ret = devm_add_action_or_reset(dev, ltc2688_disable_regulators, st); - if (ret) - return ret; - vref_reg = devm_regulator_get_optional(dev, "vref"); if (IS_ERR(vref_reg)) { if (PTR_ERR(vref_reg) != -ENODEV) From 2c620883a46b6e2299cf9e4b4683e5b3cc67a61e Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:19:31 +0300 Subject: [PATCH 2289/4122] iio: gyro: bmg160_core: Simplify using devm_regulator_*get_enable() Use devm_regulator_bulk_get_enable() instead of open coded bulk-get, bulk-enable, add-action-to-disable-at-detach - pattern. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/3fd11489356b1c73a3d7b4bd9dec7e12c9fe8788.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_core.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index cedd9f02ea21..0e2eb0e98235 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -93,7 +93,6 @@ struct bmg160_data { struct regmap *regmap; - struct regulator_bulk_data regulators[2]; struct iio_trigger *dready_trig; struct iio_trigger *motion_trig; struct iio_mount_matrix orientation; @@ -1067,16 +1066,10 @@ static const char *bmg160_match_acpi_device(struct device *dev) return dev_name(dev); } -static void bmg160_disable_regulators(void *d) -{ - struct bmg160_data *data = d; - - regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators); -} - int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq, const char *name) { + static const char * const regulators[] = { "vdd", "vddio" }; struct bmg160_data *data; struct iio_dev *indio_dev; int ret; @@ -1090,22 +1083,11 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq, data->irq = irq; data->regmap = regmap; - data->regulators[0].supply = "vdd"; - data->regulators[1].supply = "vddio"; - ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(data->regulators), - data->regulators); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators), + regulators); if (ret) return dev_err_probe(dev, ret, "Failed to get regulators\n"); - ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators), - data->regulators); - if (ret) - return ret; - - ret = devm_add_action_or_reset(dev, bmg160_disable_regulators, data); - if (ret) - return ret; - ret = iio_read_mount_matrix(dev, &data->orientation); if (ret) return ret; From 6900cdbfb99e8600572da340576509297a684a3a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:19:46 +0300 Subject: [PATCH 2290/4122] iio: imu: st_lsm6dsx: Simplify using devm_regulator_*get_enable() Use devm_regulator_bulk_get_enable() instead of open coded bulk-get, bulk-enable, add-action-to-disable-at-detach - pattern. A functional change (which seems like a bugfix) is that if regulator_bulk_get fails, the enable is not attempted. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/876e58428cec056d51070e49eff559e2d7c23b12.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 -- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 30 ++++---------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index d3a4e21bc114..07ad8027de73 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -374,7 +374,6 @@ struct st_lsm6dsx_sensor { * struct st_lsm6dsx_hw - ST IMU MEMS hw instance * @dev: Pointer to instance of struct device (I2C or SPI). * @regmap: Register map of the device. - * @regulators: VDD/VDDIO voltage regulators. * @irq: Device interrupt line (I2C or SPI). * @fifo_lock: Mutex to prevent concurrent access to the hw FIFO. * @conf_lock: Mutex to prevent concurrent FIFO configuration update. @@ -397,7 +396,6 @@ struct st_lsm6dsx_sensor { struct st_lsm6dsx_hw { struct device *dev; struct regmap *regmap; - struct regulator_bulk_data regulators[2]; int irq; struct mutex fifo_lock; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 6af2e905c161..fe5fa08b68ac 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2177,36 +2177,20 @@ static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw) static int st_lsm6dsx_init_regulators(struct device *dev) { - struct st_lsm6dsx_hw *hw = dev_get_drvdata(dev); + /* vdd-vddio power regulators */ + static const char * const regulators[] = { "vdd", "vddio" }; int err; - /* vdd-vddio power regulators */ - hw->regulators[0].supply = "vdd"; - hw->regulators[1].supply = "vddio"; - err = devm_regulator_bulk_get(dev, ARRAY_SIZE(hw->regulators), - hw->regulators); + err = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators), + regulators); if (err) - return dev_err_probe(dev, err, "failed to get regulators\n"); - - err = regulator_bulk_enable(ARRAY_SIZE(hw->regulators), - hw->regulators); - if (err) { - dev_err(dev, "failed to enable regulators: %d\n", err); - return err; - } + return dev_err_probe(dev, err, "failed to enable regulators\n"); msleep(50); return 0; } -static void st_lsm6dsx_chip_uninit(void *data) -{ - struct st_lsm6dsx_hw *hw = data; - - regulator_bulk_disable(ARRAY_SIZE(hw->regulators), hw->regulators); -} - int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, struct regmap *regmap) { @@ -2230,10 +2214,6 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, if (err) return err; - err = devm_add_action_or_reset(dev, st_lsm6dsx_chip_uninit, hw); - if (err) - return err; - hw->buff = devm_kzalloc(dev, ST_LSM6DSX_BUFF_SIZE, GFP_KERNEL); if (!hw->buff) return -ENOMEM; From 7ff0ad35aa6f70fe32fee33bc6e0aa356af99f76 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:20:03 +0300 Subject: [PATCH 2291/4122] iio: adc: ad7476: simplify using devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(), add_action_or_reset(regulator_disable)' and use the devm_regulator_get_enable() Signed-off-by: Matti Vaittinen Acked-by: Nuno Sá Link: https://lore.kernel.org/r/33070d66b9b976acac1cee5570facef9278b6b61.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7476.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c index 94776f696290..80aebed47d1f 100644 --- a/drivers/iio/adc/ad7476.c +++ b/drivers/iio/adc/ad7476.c @@ -368,16 +368,7 @@ static int ad7476_probe(struct spi_device *spi) } if (st->chip_info->has_vdrive) { - reg = devm_regulator_get(&spi->dev, "vdrive"); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - ret = regulator_enable(reg); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&spi->dev, ad7476_reg_disable, - reg); + ret = devm_regulator_get_enable(&spi->dev, "vdrive"); if (ret) return ret; } From fd5b6c48ec3345d5c243e283fbdb69618ada2be8 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:20:18 +0300 Subject: [PATCH 2292/4122] iio: adc: ad7606: simplify using devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(), add_action_or_reset(regulator_disable)' and use the devm_regulator_get_enable() and drop the pointer to the regulator. This simplifies code and makes it less tempting to add manual control for the regulator which is also controlled by devm. Whilst here also switch to dev_err_probe() to provide more information if a deferred probe occurs. Signed-off-by: Matti Vaittinen Acked-by: Nuno Sá Link: https://lore.kernel.org/r/521c52f5a9bdc2db04d5775b36df4b233ae338da.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 22 +++------------------- drivers/iio/adc/ad7606.h | 2 -- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index ba24f99523e0..dd6b603f65ea 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -557,13 +557,6 @@ static const struct iio_trigger_ops ad7606_trigger_ops = { .validate_device = iio_trigger_validate_own_device, }; -static void ad7606_regulator_disable(void *data) -{ - struct ad7606_state *st = data; - - regulator_disable(st->reg); -} - int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, const char *name, unsigned int id, const struct ad7606_bus_ops *bops) @@ -589,19 +582,10 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, st->scale_avail = ad7606_scale_avail; st->num_scales = ARRAY_SIZE(ad7606_scale_avail); - st->reg = devm_regulator_get(dev, "avcc"); - if (IS_ERR(st->reg)) - return PTR_ERR(st->reg); - - ret = regulator_enable(st->reg); - if (ret) { - dev_err(dev, "Failed to enable specified AVcc supply\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, ad7606_regulator_disable, st); + ret = devm_regulator_get_enable(dev, "avcc"); if (ret) - return ret; + return dev_err_probe(dev, ret, + "Failed to enable specified AVcc supply\n"); st->chip_info = &ad7606_chip_info_tbl[id]; diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index 2dc4f599f9df..0c6a88cc4695 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -62,7 +62,6 @@ struct ad7606_chip_info { * struct ad7606_state - driver instance specific data * @dev pointer to kernel device * @chip_info entry in the table of chips that describes this device - * @reg regulator info for the power supply of the device * @bops bus operations (SPI or parallel) * @range voltage range selection, selects which scale to apply * @oversampling oversampling selection @@ -92,7 +91,6 @@ struct ad7606_chip_info { struct ad7606_state { struct device *dev; const struct ad7606_chip_info *chip_info; - struct regulator *reg; const struct ad7606_bus_ops *bops; unsigned int range[16]; unsigned int oversampling; From f55c8e6696425ef22b7d08fc05b920d3a1a5dbf6 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:20:32 +0300 Subject: [PATCH 2293/4122] iio: adc: max1241: simplify using devm_regulator_get_enable() Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(), add_action_or_reset(regulator_disable)' and use the devm_regulator_get_enable() and drop the pointer to the regulator. This simplifies code and makes it less tempting to add manual control for the regulator which is also controlled by devm. Signed-off-by: Matti Vaittinen Acked-by: Alexandru Lazar Link: https://lore.kernel.org/r/7c759bf6c06e72ae70bffeebc1939d9903427278.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1241.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/drivers/iio/adc/max1241.c b/drivers/iio/adc/max1241.c index a815ad1f6913..500bb09ab19b 100644 --- a/drivers/iio/adc/max1241.c +++ b/drivers/iio/adc/max1241.c @@ -22,7 +22,6 @@ enum max1241_id { struct max1241 { struct spi_device *spi; struct mutex lock; - struct regulator *vdd; struct regulator *vref; struct gpio_desc *shutdown; @@ -110,17 +109,6 @@ static const struct iio_info max1241_info = { .read_raw = max1241_read_raw, }; -static void max1241_disable_vdd_action(void *data) -{ - struct max1241 *adc = data; - struct device *dev = &adc->spi->dev; - int err; - - err = regulator_disable(adc->vdd); - if (err) - dev_err(dev, "could not disable vdd regulator.\n"); -} - static void max1241_disable_vref_action(void *data) { struct max1241 *adc = data; @@ -147,20 +135,10 @@ static int max1241_probe(struct spi_device *spi) adc->spi = spi; mutex_init(&adc->lock); - adc->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(adc->vdd)) - return dev_err_probe(dev, PTR_ERR(adc->vdd), - "failed to get vdd regulator\n"); - - ret = regulator_enable(adc->vdd); + ret = devm_regulator_get_enable(dev, "vdd"); if (ret) - return ret; - - ret = devm_add_action_or_reset(dev, max1241_disable_vdd_action, adc); - if (ret) { - dev_err(dev, "could not set up vdd regulator cleanup action\n"); - return ret; - } + return dev_err_probe(dev, ret, + "failed to get/enable vdd regulator\n"); adc->vref = devm_regulator_get(dev, "vref"); if (IS_ERR(adc->vref)) From 3dfa1d4f63baf41774bbc77467729de39e0ac8a6 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:20:47 +0300 Subject: [PATCH 2294/4122] iio: adc: max1363: simplify using devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(), add_action_or_reset(regulator_disable)' and use the devm_regulator_get_enable() and drop the pointer to the regulator. This simplifies code and makes it less tempting to add manual control for the regulator which is also controlled by devm. Signed-off-by: Matti Vaittinen Acked-by: Nuno Sá Link: https://lore.kernel.org/r/1286ea127d190e5708a0aaff271819b2d3f8802d.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1363.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c index a28cf86cdce8..42d3479cefb7 100644 --- a/drivers/iio/adc/max1363.c +++ b/drivers/iio/adc/max1363.c @@ -148,7 +148,6 @@ struct max1363_chip_info { * @chip_info: chip model specific constants, available modes, etc. * @current_mode: the scan mode of this chip * @requestedmask: a valid requested set of channels - * @reg: supply regulator * @lock: lock to ensure state is consistent * @monitor_on: whether monitor mode is enabled * @monitor_speed: parameter corresponding to device monitor speed setting @@ -168,7 +167,6 @@ struct max1363_state { const struct max1363_chip_info *chip_info; const struct max1363_mode *current_mode; u32 requestedmask; - struct regulator *reg; struct mutex lock; /* Using monitor modes and buffer at the same time is @@ -1597,15 +1595,7 @@ static int max1363_probe(struct i2c_client *client, st = iio_priv(indio_dev); mutex_init(&st->lock); - st->reg = devm_regulator_get(&client->dev, "vcc"); - if (IS_ERR(st->reg)) - return PTR_ERR(st->reg); - - ret = regulator_enable(st->reg); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&client->dev, max1363_reg_disable, st->reg); + ret = devm_regulator_get_enable(&client->dev, "vcc"); if (ret) return ret; From 563746c26db6c203d30bed2daa301416b27b77fb Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 22:21:02 +0300 Subject: [PATCH 2295/4122] iio: amplifier: hmc425a: simplify using devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(), add_action_or_reset(regulator_disable)' and use the devm_regulator_get_enable() and drop the pointer to the regulator. This simplifies code and makes it less tempting to add manual control for the regulator which is also controlled by devm. Signed-off-by: Matti Vaittinen Acked-by: Nuno Sá Link: https://lore.kernel.org/r/8b1193fdefb231a6d721e2bded52c48e56039c20.1660934107.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/amplifiers/hmc425a.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/iio/amplifiers/hmc425a.c b/drivers/iio/amplifiers/hmc425a.c index ce80e0c916f4..108f0f1685ef 100644 --- a/drivers/iio/amplifiers/hmc425a.c +++ b/drivers/iio/amplifiers/hmc425a.c @@ -34,7 +34,6 @@ struct hmc425a_chip_info { }; struct hmc425a_state { - struct regulator *reg; struct mutex lock; /* protect sensor state */ struct hmc425a_chip_info *chip_info; struct gpio_descs *gpios; @@ -162,13 +161,6 @@ static const struct of_device_id hmc425a_of_match[] = { }; MODULE_DEVICE_TABLE(of, hmc425a_of_match); -static void hmc425a_reg_disable(void *data) -{ - struct hmc425a_state *st = data; - - regulator_disable(st->reg); -} - static struct hmc425a_chip_info hmc425a_chip_info_tbl[] = { [ID_HMC425A] = { .name = "hmc425a", @@ -211,14 +203,7 @@ static int hmc425a_probe(struct platform_device *pdev) return -ENODEV; } - st->reg = devm_regulator_get(&pdev->dev, "vcc-supply"); - if (IS_ERR(st->reg)) - return PTR_ERR(st->reg); - - ret = regulator_enable(st->reg); - if (ret) - return ret; - ret = devm_add_action_or_reset(&pdev->dev, hmc425a_reg_disable, st); + ret = devm_regulator_get_enable(&pdev->dev, "vcc-supply"); if (ret) return ret; From fd4fc88d0b6fc452571152d4d3b2fc8078be3825 Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Fri, 21 Oct 2022 15:58:22 +0200 Subject: [PATCH 2296/4122] dt-bindings: iio: pressure: meas,ms5611: add max SPI frequency to the example Added max SPI frequency setting to the example. Signed-off-by: Mitja Spes Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221021135827.1444793-4-mitja@lxnav.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml index 7fed750fa3ff..21e6ddb7f41e 100644 --- a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml +++ b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml @@ -55,6 +55,7 @@ examples: compatible = "meas,ms5611"; reg = <0>; vdd-supply = <&ldo_3v3_gnss>; + spi-max-frequency = <20000000>; }; }; ... From 36a4df5003c91663bd2d68a3cf452c0495ce583c Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Fri, 21 Oct 2022 13:41:14 +0300 Subject: [PATCH 2297/4122] dt-bindings: iio: adc: add AD4130 AD4130-8 is an ultra-low power, high precision, measurement solution for low bandwidth battery operated applications. The fully integrated AFE (Analog Front-End) includes a multiplexer for up to 16 single-ended or 8 differential inputs, PGA (Programmable Gain Amplifier), 24-bit Sigma-Delta ADC, on-chip reference and oscillator, selectable filter options, smart sequencer, sensor biasing and excitation options, diagnostics, and a FIFO buffer. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221021104115.1812486-2-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,ad4130.yaml | 259 ++++++++++++++++++ MAINTAINERS | 7 + 2 files changed, 266 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml new file mode 100644 index 000000000000..28ebd38b9db4 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml @@ -0,0 +1,259 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2022 Analog Devices Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/adi,ad4130.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD4130 ADC device driver + +maintainers: + - Cosmin Tanislav + +description: | + Bindings for the Analog Devices AD4130 ADC. Datasheet can be found here: + https://www.analog.com/media/en/technical-documentation/data-sheets/AD4130-8.pdf + +properties: + compatible: + enum: + - adi,ad4130 + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + description: phandle to the master clock (mclk) + + clock-names: + items: + - const: mclk + + interrupts: + maxItems: 1 + + interrupt-names: + description: | + Specify which interrupt pin should be configured as Data Ready / FIFO + interrupt. + Default if not supplied is int. + enum: + - int + - clk + - p2 + - dout + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + '#clock-cells': + const: 0 + + clock-output-names: + maxItems: 1 + + refin1-supply: + description: refin1 supply. Can be used as reference for conversion. + + refin2-supply: + description: refin2 supply. Can be used as reference for conversion. + + avdd-supply: + description: AVDD voltage supply. Can be used as reference for conversion. + + iovdd-supply: + description: IOVDD voltage supply. Used for the chip interface. + + spi-max-frequency: + maximum: 5000000 + + adi,ext-clk-freq-hz: + description: Specify the frequency of the external clock. + enum: [76800, 153600] + default: 76800 + + adi,bipolar: + description: Specify if the device should be used in bipolar mode. + type: boolean + + adi,vbias-pins: + description: Analog inputs to apply a voltage bias of (AVDD − AVSS) / 2 to. + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 16 + items: + minimum: 0 + maximum: 15 + +required: + - compatible + - reg + - interrupts + +patternProperties: + "^channel@([0-9a-f])$": + type: object + $ref: adc.yaml + unevaluatedProperties: false + + properties: + reg: + description: The channel number. + minimum: 0 + maximum: 15 + + diff-channels: + description: | + Besides the analog inputs available, internal inputs can be used. + 16: Internal temperature sensor. + 17: AVSS + 18: Internal reference + 19: DGND + 20: (AVDD − AVSS)/6+ + 21: (AVDD − AVSS)/6- + 22: (IOVDD − DGND)/6+ + 23: (IOVDD − DGND)/6- + 24: (ALDO − AVSS)/6+ + 25: (ALDO − AVSS)/6- + 26: (DLDO − DGND)/6+ + 27: (DLDO − DGND)/6- + 28: V_MV_P + 29: V_MV_M + items: + minimum: 0 + maximum: 29 + + adi,reference-select: + description: | + Select the reference source to use when converting on the + specific channel. Valid values are: + 0: REFIN1(+)/REFIN1(−) + 1: REFIN2(+)/REFIN2(−) + 2: REFOUT/AVSS (Internal reference) + 3: AVDD/AVSS + If not specified, REFIN1 is used. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 0 + + adi,excitation-pin-0: + description: | + Analog input to apply excitation current to while the channel + is active. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 15 + default: 0 + + adi,excitation-pin-1: + description: | + Analog input to apply excitation current to while this channel + is active. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 15 + default: 0 + + adi,excitation-current-0-nanoamp: + description: | + Excitation current in nanoamps to be applied to pin specified in + adi,excitation-pin-0 while this channel is active. + enum: [0, 100, 10000, 20000, 50000, 100000, 150000, 200000] + default: 0 + + adi,excitation-current-1-nanoamp: + description: | + Excitation current in nanoamps to be applied to pin specified in + adi,excitation-pin-1 while this channel is active. + enum: [0, 100, 10000, 20000, 50000, 100000, 150000, 200000] + default: 0 + + adi,burnout-current-nanoamp: + description: | + Burnout current in nanoamps to be applied for this channel. + enum: [0, 500, 2000, 4000] + default: 0 + + adi,buffered-positive: + description: Enable buffered mode for positive input. + type: boolean + + adi,buffered-negative: + description: Enable buffered mode for negative input. + type: boolean + + required: + - reg + - diff-channels + +additionalProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + compatible = "adi,ad4130"; + reg = <0>; + + #address-cells = <1>; + #size-cells = <0>; + + spi-max-frequency = <5000000>; + interrupts = <27 IRQ_TYPE_EDGE_FALLING>; + interrupt-parent = <&gpio>; + + channel@0 { + reg = <0>; + + adi,reference-select = <2>; + + /* AIN8, AIN9 */ + diff-channels = <8 9>; + }; + + channel@1 { + reg = <1>; + + adi,reference-select = <2>; + + /* AIN10, AIN11 */ + diff-channels = <10 11>; + }; + + channel@2 { + reg = <2>; + + adi,reference-select = <2>; + + /* Temperature Sensor, DGND */ + diff-channels = <16 19>; + }; + + channel@3 { + reg = <3>; + + adi,reference-select = <2>; + + /* Internal reference, DGND */ + diff-channels = <18 19>; + }; + + channel@4 { + reg = <4>; + + adi,reference-select = <2>; + + /* DGND, DGND */ + diff-channels = <19 19>; + }; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 379945f82a64..5cb67803af96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1117,6 +1117,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: drivers/net/amt.c +ANALOG DEVICES INC AD4130 DRIVER +M: Cosmin Tanislav +L: linux-iio@vger.kernel.org +S: Supported +W: http://ez.analog.com/community/linux-device-drivers +F: Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml + ANALOG DEVICES INC AD7192 DRIVER M: Alexandru Tachici L: linux-iio@vger.kernel.org From 62094060cf3acaf52e277457d807ea753269b89e Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Fri, 21 Oct 2022 13:41:15 +0300 Subject: [PATCH 2298/4122] iio: adc: ad4130: add AD4130 driver AD4130-8 is an ultra-low power, high precision, measurement solution for low bandwidth battery operated applications. The fully integrated AFE (Analog Front-End) includes a multiplexer for up to 16 single-ended or 8 differential inputs, PGA (Programmable Gain Amplifier), 24-bit Sigma-Delta ADC, on-chip reference and oscillator, selectable filter options, smart sequencer, sensor biasing and excitation options, diagnostics, and a FIFO buffer. Signed-off-by: Cosmin Tanislav Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221021104115.1812486-3-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-adc-ad4130 | 36 + MAINTAINERS | 2 + drivers/iio/adc/Kconfig | 14 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ad4130.c | 2100 +++++++++++++++++ 5 files changed, 2153 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 create mode 100644 drivers/iio/adc/ad4130.c diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 new file mode 100644 index 000000000000..d9555751d21c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 @@ -0,0 +1,36 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns a list with the possible filter modes. + "sinc4" - Sinc 4. Excellent noise performance. Long 1st + conversion time. No natural 50/60Hz rejection. + "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion time. + "sinc3" - Sinc3. Moderate 1st conversion time. Good noise + performance. + "sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling frequency + of 50Hz, achieves simultaneous 50Hz and 60Hz + rejection. + "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion time. + Best used with a sampling frequency of at least + 216.19Hz. + "sinc3+pf1" - Sinc3 + Post Filter 1. + 53dB rejection @ 50Hz, 58dB rejection @ 60Hz. + "sinc3+pf2" - Sinc3 + Post Filter 2. + 70dB rejection @ 50Hz, 70dB rejection @ 60Hz. + "sinc3+pf3" - Sinc3 + Post Filter 3. + 99dB rejection @ 50Hz, 103dB rejection @ 60Hz. + "sinc3+pf4" - Sinc3 + Post Filter 4. + 103dB rejection @ 50Hz, 109dB rejection @ 60Hz. + +What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + Set the filter mode of the differential channel. When the filter + mode changes, the in_voltageY-voltageZ_sampling_frequency and + in_voltageY-voltageZ_sampling_frequency_available attributes + might also change to accommodate the new filter mode. + If the current sampling frequency is out of range for the new + filter mode, the sampling frequency will be changed to the + closest valid one. diff --git a/MAINTAINERS b/MAINTAINERS index 5cb67803af96..ecec0bb6b09e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1122,7 +1122,9 @@ M: Cosmin Tanislav L: linux-iio@vger.kernel.org S: Supported W: http://ez.analog.com/community/linux-device-drivers +F: Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 F: Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml +F: drivers/iio/adc/ad4130.c ANALOG DEVICES INC AD7192 DRIVER M: Alexandru Tachici diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 5386d862def9..8d719fbb6acc 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -21,6 +21,20 @@ config AD_SIGMA_DELTA select IIO_BUFFER select IIO_TRIGGERED_BUFFER +config AD4130 + tristate "Analog Device AD4130 ADC Driver" + depends on SPI + select IIO_BUFFER + select IIO_KFIFO_BUF + select REGMAP_SPI + depends on COMMON_CLK + help + Say yes here to build support for Analog Devices AD4130-8 SPI analog + to digital converters (ADC). + + To compile this driver as a module, choose M here: the module will be + called ad4130. + config AD7091R5 tristate "Analog Devices AD7091R5 ADC Driver" depends on I2C diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 1571e891828e..4ef41a7dfac6 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -6,6 +6,7 @@ # When adding new entries keep the list in alphabetical order obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o +obj-$(CONFIG_AD4130) += ad4130.o obj-$(CONFIG_AD7091R5) += ad7091r5.o ad7091r-base.o obj-$(CONFIG_AD7124) += ad7124.o obj-$(CONFIG_AD7192) += ad7192.o diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c new file mode 100644 index 000000000000..9a4d0043d797 --- /dev/null +++ b/drivers/iio/adc/ad4130.c @@ -0,0 +1,2100 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2022 Analog Devices, Inc. + * Author: Cosmin Tanislav + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define AD4130_NAME "ad4130" + +#define AD4130_COMMS_READ_MASK BIT(6) + +#define AD4130_STATUS_REG 0x00 + +#define AD4130_ADC_CONTROL_REG 0x01 +#define AD4130_ADC_CONTROL_BIPOLAR_MASK BIT(14) +#define AD4130_ADC_CONTROL_INT_REF_VAL_MASK BIT(13) +#define AD4130_INT_REF_2_5V 2500000 +#define AD4130_INT_REF_1_25V 1250000 +#define AD4130_ADC_CONTROL_CSB_EN_MASK BIT(9) +#define AD4130_ADC_CONTROL_INT_REF_EN_MASK BIT(8) +#define AD4130_ADC_CONTROL_MODE_MASK GENMASK(5, 2) +#define AD4130_ADC_CONTROL_MCLK_SEL_MASK GENMASK(1, 0) +#define AD4130_MCLK_FREQ_76_8KHZ 76800 +#define AD4130_MCLK_FREQ_153_6KHZ 153600 + +#define AD4130_DATA_REG 0x02 + +#define AD4130_IO_CONTROL_REG 0x03 +#define AD4130_IO_CONTROL_INT_PIN_SEL_MASK GENMASK(9, 8) +#define AD4130_IO_CONTROL_GPIO_DATA_MASK GENMASK(7, 4) +#define AD4130_IO_CONTROL_GPIO_CTRL_MASK GENMASK(3, 0) + +#define AD4130_VBIAS_REG 0x04 + +#define AD4130_ID_REG 0x05 + +#define AD4130_ERROR_REG 0x06 + +#define AD4130_ERROR_EN_REG 0x07 + +#define AD4130_MCLK_COUNT_REG 0x08 + +#define AD4130_CHANNEL_X_REG(x) (0x09 + (x)) +#define AD4130_CHANNEL_EN_MASK BIT(23) +#define AD4130_CHANNEL_SETUP_MASK GENMASK(22, 20) +#define AD4130_CHANNEL_AINP_MASK GENMASK(17, 13) +#define AD4130_CHANNEL_AINM_MASK GENMASK(12, 8) +#define AD4130_CHANNEL_IOUT1_MASK GENMASK(7, 4) +#define AD4130_CHANNEL_IOUT2_MASK GENMASK(3, 0) + +#define AD4130_CONFIG_X_REG(x) (0x19 + (x)) +#define AD4130_CONFIG_IOUT1_VAL_MASK GENMASK(15, 13) +#define AD4130_CONFIG_IOUT2_VAL_MASK GENMASK(12, 10) +#define AD4130_CONFIG_BURNOUT_MASK GENMASK(9, 8) +#define AD4130_CONFIG_REF_BUFP_MASK BIT(7) +#define AD4130_CONFIG_REF_BUFM_MASK BIT(6) +#define AD4130_CONFIG_REF_SEL_MASK GENMASK(5, 4) +#define AD4130_CONFIG_PGA_MASK GENMASK(3, 1) + +#define AD4130_FILTER_X_REG(x) (0x21 + (x)) +#define AD4130_FILTER_MODE_MASK GENMASK(15, 12) +#define AD4130_FILTER_SELECT_MASK GENMASK(10, 0) +#define AD4130_FILTER_SELECT_MIN 1 + +#define AD4130_OFFSET_X_REG(x) (0x29 + (x)) + +#define AD4130_GAIN_X_REG(x) (0x31 + (x)) + +#define AD4130_MISC_REG 0x39 + +#define AD4130_FIFO_CONTROL_REG 0x3a +#define AD4130_FIFO_CONTROL_HEADER_MASK BIT(18) +#define AD4130_FIFO_CONTROL_MODE_MASK GENMASK(17, 16) +#define AD4130_FIFO_CONTROL_WM_INT_EN_MASK BIT(9) +#define AD4130_FIFO_CONTROL_WM_MASK GENMASK(7, 0) +#define AD4130_WATERMARK_256 0 + +#define AD4130_FIFO_STATUS_REG 0x3b + +#define AD4130_FIFO_THRESHOLD_REG 0x3c + +#define AD4130_FIFO_DATA_REG 0x3d +#define AD4130_FIFO_SIZE 256 +#define AD4130_FIFO_MAX_SAMPLE_SIZE 3 + +#define AD4130_MAX_ANALOG_PINS 16 +#define AD4130_MAX_CHANNELS 16 +#define AD4130_MAX_DIFF_INPUTS 30 +#define AD4130_MAX_GPIOS 4 +#define AD4130_MAX_ODR 2400 +#define AD4130_MAX_PGA 8 +#define AD4130_MAX_SETUPS 8 + +#define AD4130_AIN2_P1 0x2 +#define AD4130_AIN3_P2 0x3 + +#define AD4130_RESET_BUF_SIZE 8 +#define AD4130_RESET_SLEEP_US (160 * MICRO / AD4130_MCLK_FREQ_76_8KHZ) + +#define AD4130_INVALID_SLOT -1 + +static const unsigned int ad4130_reg_size[] = { + [AD4130_STATUS_REG] = 1, + [AD4130_ADC_CONTROL_REG] = 2, + [AD4130_DATA_REG] = 3, + [AD4130_IO_CONTROL_REG] = 2, + [AD4130_VBIAS_REG] = 2, + [AD4130_ID_REG] = 1, + [AD4130_ERROR_REG] = 2, + [AD4130_ERROR_EN_REG] = 2, + [AD4130_MCLK_COUNT_REG] = 1, + [AD4130_CHANNEL_X_REG(0) ... AD4130_CHANNEL_X_REG(AD4130_MAX_CHANNELS - 1)] = 3, + [AD4130_CONFIG_X_REG(0) ... AD4130_CONFIG_X_REG(AD4130_MAX_SETUPS - 1)] = 2, + [AD4130_FILTER_X_REG(0) ... AD4130_FILTER_X_REG(AD4130_MAX_SETUPS - 1)] = 3, + [AD4130_OFFSET_X_REG(0) ... AD4130_OFFSET_X_REG(AD4130_MAX_SETUPS - 1)] = 3, + [AD4130_GAIN_X_REG(0) ... AD4130_GAIN_X_REG(AD4130_MAX_SETUPS - 1)] = 3, + [AD4130_MISC_REG] = 2, + [AD4130_FIFO_CONTROL_REG] = 3, + [AD4130_FIFO_STATUS_REG] = 1, + [AD4130_FIFO_THRESHOLD_REG] = 3, + [AD4130_FIFO_DATA_REG] = 3, +}; + +enum ad4130_int_ref_val { + AD4130_INT_REF_VAL_2_5V, + AD4130_INT_REF_VAL_1_25V, +}; + +enum ad4130_mclk_sel { + AD4130_MCLK_76_8KHZ, + AD4130_MCLK_76_8KHZ_OUT, + AD4130_MCLK_76_8KHZ_EXT, + AD4130_MCLK_153_6KHZ_EXT, +}; + +enum ad4130_int_pin_sel { + AD4130_INT_PIN_INT, + AD4130_INT_PIN_CLK, + AD4130_INT_PIN_P2, + AD4130_INT_PIN_DOUT, +}; + +enum ad4130_iout { + AD4130_IOUT_OFF, + AD4130_IOUT_10000NA, + AD4130_IOUT_20000NA, + AD4130_IOUT_50000NA, + AD4130_IOUT_100000NA, + AD4130_IOUT_150000NA, + AD4130_IOUT_200000NA, + AD4130_IOUT_100NA, + AD4130_IOUT_MAX +}; + +enum ad4130_burnout { + AD4130_BURNOUT_OFF, + AD4130_BURNOUT_500NA, + AD4130_BURNOUT_2000NA, + AD4130_BURNOUT_4000NA, + AD4130_BURNOUT_MAX +}; + +enum ad4130_ref_sel { + AD4130_REF_REFIN1, + AD4130_REF_REFIN2, + AD4130_REF_REFOUT_AVSS, + AD4130_REF_AVDD_AVSS, + AD4130_REF_SEL_MAX +}; + +enum ad4130_fifo_mode { + AD4130_FIFO_MODE_DISABLED = 0b00, + AD4130_FIFO_MODE_WM = 0b01, +}; + +enum ad4130_mode { + AD4130_MODE_CONTINUOUS = 0b0000, + AD4130_MODE_IDLE = 0b0100, +}; + +enum ad4130_filter_mode { + AD4130_FILTER_SINC4, + AD4130_FILTER_SINC4_SINC1, + AD4130_FILTER_SINC3, + AD4130_FILTER_SINC3_REJ60, + AD4130_FILTER_SINC3_SINC1, + AD4130_FILTER_SINC3_PF1, + AD4130_FILTER_SINC3_PF2, + AD4130_FILTER_SINC3_PF3, + AD4130_FILTER_SINC3_PF4, +}; + +enum ad4130_pin_function { + AD4130_PIN_FN_NONE, + AD4130_PIN_FN_SPECIAL = BIT(0), + AD4130_PIN_FN_DIFF = BIT(1), + AD4130_PIN_FN_EXCITATION = BIT(2), + AD4130_PIN_FN_VBIAS = BIT(3), +}; + +struct ad4130_setup_info { + unsigned int iout0_val; + unsigned int iout1_val; + unsigned int burnout; + unsigned int pga; + unsigned int fs; + u32 ref_sel; + enum ad4130_filter_mode filter_mode; + bool ref_bufp; + bool ref_bufm; +}; + +struct ad4130_slot_info { + struct ad4130_setup_info setup; + unsigned int enabled_channels; + unsigned int channels; +}; + +struct ad4130_chan_info { + struct ad4130_setup_info setup; + u32 iout0; + u32 iout1; + int slot; + bool enabled; + bool initialized; +}; + +struct ad4130_filter_config { + enum ad4130_filter_mode filter_mode; + unsigned int odr_div; + unsigned int fs_max; + enum iio_available_type samp_freq_avail_type; + int samp_freq_avail_len; + int samp_freq_avail[3][2]; +}; + +struct ad4130_state { + struct regmap *regmap; + struct spi_device *spi; + struct clk *mclk; + struct regulator_bulk_data regulators[4]; + u32 irq_trigger; + u32 inv_irq_trigger; + + /* + * Synchronize access to members the of driver state, and ensure + * atomicity of consecutive regmap operations. + */ + struct mutex lock; + struct completion completion; + + struct iio_chan_spec chans[AD4130_MAX_CHANNELS]; + struct ad4130_chan_info chans_info[AD4130_MAX_CHANNELS]; + struct ad4130_slot_info slots_info[AD4130_MAX_SETUPS]; + enum ad4130_pin_function pins_fn[AD4130_MAX_ANALOG_PINS]; + u32 vbias_pins[AD4130_MAX_ANALOG_PINS]; + u32 num_vbias_pins; + int scale_tbls[AD4130_REF_SEL_MAX][AD4130_MAX_PGA][2]; + struct gpio_chip gc; + struct clk_hw int_clk_hw; + + u32 int_pin_sel; + u32 int_ref_uv; + u32 mclk_sel; + bool int_ref_en; + bool bipolar; + + unsigned int num_enabled_channels; + unsigned int effective_watermark; + unsigned int watermark; + + struct spi_message fifo_msg; + struct spi_transfer fifo_xfer[2]; + + /* + * DMA (thus cache coherency maintenance) requires any transfer + * buffers to live in their own cache lines. As the use of these + * buffers is synchronous, all of the buffers used for DMA in this + * driver may share a cache line. + */ + u8 reset_buf[AD4130_RESET_BUF_SIZE] __aligned(IIO_DMA_MINALIGN); + u8 reg_write_tx_buf[4]; + u8 reg_read_tx_buf[1]; + u8 reg_read_rx_buf[3]; + u8 fifo_tx_buf[2]; + u8 fifo_rx_buf[AD4130_FIFO_SIZE * + AD4130_FIFO_MAX_SAMPLE_SIZE]; +}; + +static const char * const ad4130_int_pin_names[] = { + [AD4130_INT_PIN_INT] = "int", + [AD4130_INT_PIN_CLK] = "clk", + [AD4130_INT_PIN_P2] = "p2", + [AD4130_INT_PIN_DOUT] = "dout", +}; + +static const unsigned int ad4130_iout_current_na_tbl[AD4130_IOUT_MAX] = { + [AD4130_IOUT_OFF] = 0, + [AD4130_IOUT_100NA] = 100, + [AD4130_IOUT_10000NA] = 10000, + [AD4130_IOUT_20000NA] = 20000, + [AD4130_IOUT_50000NA] = 50000, + [AD4130_IOUT_100000NA] = 100000, + [AD4130_IOUT_150000NA] = 150000, + [AD4130_IOUT_200000NA] = 200000, +}; + +static const unsigned int ad4130_burnout_current_na_tbl[AD4130_BURNOUT_MAX] = { + [AD4130_BURNOUT_OFF] = 0, + [AD4130_BURNOUT_500NA] = 500, + [AD4130_BURNOUT_2000NA] = 2000, + [AD4130_BURNOUT_4000NA] = 4000, +}; + +#define AD4130_VARIABLE_ODR_CONFIG(_filter_mode, _odr_div, _fs_max) \ +{ \ + .filter_mode = (_filter_mode), \ + .odr_div = (_odr_div), \ + .fs_max = (_fs_max), \ + .samp_freq_avail_type = IIO_AVAIL_RANGE, \ + .samp_freq_avail = { \ + { AD4130_MAX_ODR, (_odr_div) * (_fs_max) }, \ + { AD4130_MAX_ODR, (_odr_div) * (_fs_max) }, \ + { AD4130_MAX_ODR, (_odr_div) }, \ + }, \ +} + +#define AD4130_FIXED_ODR_CONFIG(_filter_mode, _odr_div) \ +{ \ + .filter_mode = (_filter_mode), \ + .odr_div = (_odr_div), \ + .fs_max = AD4130_FILTER_SELECT_MIN, \ + .samp_freq_avail_type = IIO_AVAIL_LIST, \ + .samp_freq_avail_len = 1, \ + .samp_freq_avail = { \ + { AD4130_MAX_ODR, (_odr_div) }, \ + }, \ +} + +static const struct ad4130_filter_config ad4130_filter_configs[] = { + AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC4, 1, 10), + AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC4_SINC1, 11, 10), + AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3, 1, 2047), + AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3_REJ60, 1, 2047), + AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3_SINC1, 10, 2047), + AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF1, 92), + AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF2, 100), + AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF3, 124), + AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF4, 148), +}; + +static const char * const ad4130_filter_modes_str[] = { + [AD4130_FILTER_SINC4] = "sinc4", + [AD4130_FILTER_SINC4_SINC1] = "sinc4+sinc1", + [AD4130_FILTER_SINC3] = "sinc3", + [AD4130_FILTER_SINC3_REJ60] = "sinc3+rej60", + [AD4130_FILTER_SINC3_SINC1] = "sinc3+sinc1", + [AD4130_FILTER_SINC3_PF1] = "sinc3+pf1", + [AD4130_FILTER_SINC3_PF2] = "sinc3+pf2", + [AD4130_FILTER_SINC3_PF3] = "sinc3+pf3", + [AD4130_FILTER_SINC3_PF4] = "sinc3+pf4", +}; + +static int ad4130_get_reg_size(struct ad4130_state *st, unsigned int reg, + unsigned int *size) +{ + if (reg >= ARRAY_SIZE(ad4130_reg_size)) + return -EINVAL; + + *size = ad4130_reg_size[reg]; + + return 0; +} + +static unsigned int ad4130_data_reg_size(struct ad4130_state *st) +{ + unsigned int data_reg_size; + int ret; + + ret = ad4130_get_reg_size(st, AD4130_DATA_REG, &data_reg_size); + if (ret) + return 0; + + return data_reg_size; +} + +static unsigned int ad4130_resolution(struct ad4130_state *st) +{ + return ad4130_data_reg_size(st) * BITS_PER_BYTE; +} + +static int ad4130_reg_write(void *context, unsigned int reg, unsigned int val) +{ + struct ad4130_state *st = context; + unsigned int size; + int ret; + + ret = ad4130_get_reg_size(st, reg, &size); + if (ret) + return ret; + + st->reg_write_tx_buf[0] = reg; + + switch (size) { + case 3: + put_unaligned_be24(val, &st->reg_write_tx_buf[1]); + break; + case 2: + put_unaligned_be16(val, &st->reg_write_tx_buf[1]); + break; + case 1: + st->reg_write_tx_buf[1] = val; + break; + default: + return -EINVAL; + } + + return spi_write(st->spi, st->reg_write_tx_buf, size + 1); +} + +static int ad4130_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + struct ad4130_state *st = context; + struct spi_transfer t[] = { + { + .tx_buf = st->reg_read_tx_buf, + .len = sizeof(st->reg_read_tx_buf), + }, + { + .rx_buf = st->reg_read_rx_buf, + }, + }; + unsigned int size; + int ret; + + ret = ad4130_get_reg_size(st, reg, &size); + if (ret) + return ret; + + st->reg_read_tx_buf[0] = AD4130_COMMS_READ_MASK | reg; + t[1].len = size; + + ret = spi_sync_transfer(st->spi, t, ARRAY_SIZE(t)); + if (ret) + return ret; + + switch (size) { + case 3: + *val = get_unaligned_be24(st->reg_read_rx_buf); + break; + case 2: + *val = get_unaligned_be16(st->reg_read_rx_buf); + break; + case 1: + *val = st->reg_read_rx_buf[0]; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct regmap_config ad4130_regmap_config = { + .reg_read = ad4130_reg_read, + .reg_write = ad4130_reg_write, +}; + +static int ad4130_gpio_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct ad4130_state *st = gpiochip_get_data(gc); + unsigned int i; + + /* + * Output-only GPIO functionality is available on pins AIN2 through + * AIN5. If these pins are used for anything else, do not expose them. + */ + for (i = 0; i < ngpios; i++) { + unsigned int pin = i + AD4130_AIN2_P1; + bool valid = st->pins_fn[pin] == AD4130_PIN_FN_NONE; + + __assign_bit(i, valid_mask, valid); + } + + return 0; +} + +static int ad4130_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + return GPIO_LINE_DIRECTION_OUT; +} + +static void ad4130_gpio_set(struct gpio_chip *gc, unsigned int offset, + int value) +{ + struct ad4130_state *st = gpiochip_get_data(gc); + unsigned int mask = FIELD_PREP(AD4130_IO_CONTROL_GPIO_DATA_MASK, + BIT(offset)); + + regmap_update_bits(st->regmap, AD4130_IO_CONTROL_REG, mask, + value ? mask : 0); +} + +static int ad4130_set_mode(struct ad4130_state *st, enum ad4130_mode mode) +{ + return regmap_update_bits(st->regmap, AD4130_ADC_CONTROL_REG, + AD4130_ADC_CONTROL_MODE_MASK, + FIELD_PREP(AD4130_ADC_CONTROL_MODE_MASK, mode)); +} + +static int ad4130_set_watermark_interrupt_en(struct ad4130_state *st, bool en) +{ + return regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG, + AD4130_FIFO_CONTROL_WM_INT_EN_MASK, + FIELD_PREP(AD4130_FIFO_CONTROL_WM_INT_EN_MASK, en)); +} + +static unsigned int ad4130_watermark_reg_val(unsigned int val) +{ + if (val == AD4130_FIFO_SIZE) + val = AD4130_WATERMARK_256; + + return val; +} + +static int ad4130_set_fifo_mode(struct ad4130_state *st, + enum ad4130_fifo_mode mode) +{ + return regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG, + AD4130_FIFO_CONTROL_MODE_MASK, + FIELD_PREP(AD4130_FIFO_CONTROL_MODE_MASK, mode)); +} + +static void ad4130_push_fifo_data(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int data_reg_size = ad4130_data_reg_size(st); + unsigned int transfer_len = st->effective_watermark * data_reg_size; + unsigned int set_size = st->num_enabled_channels * data_reg_size; + unsigned int i; + int ret; + + st->fifo_tx_buf[1] = ad4130_watermark_reg_val(st->effective_watermark); + st->fifo_xfer[1].len = transfer_len; + + ret = spi_sync(st->spi, &st->fifo_msg); + if (ret) + return; + + for (i = 0; i < transfer_len; i += set_size) + iio_push_to_buffers(indio_dev, &st->fifo_rx_buf[i]); +} + +static irqreturn_t ad4130_irq_handler(int irq, void *private) +{ + struct iio_dev *indio_dev = private; + struct ad4130_state *st = iio_priv(indio_dev); + + if (iio_buffer_enabled(indio_dev)) + ad4130_push_fifo_data(indio_dev); + else + complete(&st->completion); + + return IRQ_HANDLED; +} + +static int ad4130_find_slot(struct ad4130_state *st, + struct ad4130_setup_info *target_setup_info, + unsigned int *slot, bool *overwrite) +{ + unsigned int i; + + *slot = AD4130_INVALID_SLOT; + *overwrite = false; + + for (i = 0; i < AD4130_MAX_SETUPS; i++) { + struct ad4130_slot_info *slot_info = &st->slots_info[i]; + + /* Immediately accept a matching setup info. */ + if (!memcmp(target_setup_info, &slot_info->setup, + sizeof(*target_setup_info))) { + *slot = i; + return 0; + } + + /* Ignore all setups which are used by enabled channels. */ + if (slot_info->enabled_channels) + continue; + + /* Find the least used slot. */ + if (*slot == AD4130_INVALID_SLOT || + slot_info->channels < st->slots_info[*slot].channels) + *slot = i; + } + + if (*slot == AD4130_INVALID_SLOT) + return -EINVAL; + + *overwrite = true; + + return 0; +} + +static void ad4130_unlink_channel(struct ad4130_state *st, unsigned int channel) +{ + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_slot_info *slot_info = &st->slots_info[chan_info->slot]; + + chan_info->slot = AD4130_INVALID_SLOT; + slot_info->channels--; +} + +static int ad4130_unlink_slot(struct ad4130_state *st, unsigned int slot) +{ + unsigned int i; + + for (i = 0; i < AD4130_MAX_CHANNELS; i++) { + struct ad4130_chan_info *chan_info = &st->chans_info[i]; + + if (!chan_info->initialized || chan_info->slot != slot) + continue; + + ad4130_unlink_channel(st, i); + } + + return 0; +} + +static int ad4130_link_channel_slot(struct ad4130_state *st, + unsigned int channel, unsigned int slot) +{ + struct ad4130_slot_info *slot_info = &st->slots_info[slot]; + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + int ret; + + ret = regmap_update_bits(st->regmap, AD4130_CHANNEL_X_REG(channel), + AD4130_CHANNEL_SETUP_MASK, + FIELD_PREP(AD4130_CHANNEL_SETUP_MASK, slot)); + if (ret) + return ret; + + chan_info->slot = slot; + slot_info->channels++; + + return 0; +} + +static int ad4130_write_slot_setup(struct ad4130_state *st, + unsigned int slot, + struct ad4130_setup_info *setup_info) +{ + unsigned int val; + int ret; + + val = FIELD_PREP(AD4130_CONFIG_IOUT1_VAL_MASK, setup_info->iout0_val) | + FIELD_PREP(AD4130_CONFIG_IOUT1_VAL_MASK, setup_info->iout1_val) | + FIELD_PREP(AD4130_CONFIG_BURNOUT_MASK, setup_info->burnout) | + FIELD_PREP(AD4130_CONFIG_REF_BUFP_MASK, setup_info->ref_bufp) | + FIELD_PREP(AD4130_CONFIG_REF_BUFM_MASK, setup_info->ref_bufm) | + FIELD_PREP(AD4130_CONFIG_REF_SEL_MASK, setup_info->ref_sel) | + FIELD_PREP(AD4130_CONFIG_PGA_MASK, setup_info->pga); + + ret = regmap_write(st->regmap, AD4130_CONFIG_X_REG(slot), val); + if (ret) + return ret; + + val = FIELD_PREP(AD4130_FILTER_MODE_MASK, setup_info->filter_mode) | + FIELD_PREP(AD4130_FILTER_SELECT_MASK, setup_info->fs); + + ret = regmap_write(st->regmap, AD4130_FILTER_X_REG(slot), val); + if (ret) + return ret; + + memcpy(&st->slots_info[slot].setup, setup_info, sizeof(*setup_info)); + + return 0; +} + +static int ad4130_write_channel_setup(struct ad4130_state *st, + unsigned int channel, bool on_enable) +{ + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_setup_info *setup_info = &chan_info->setup; + bool overwrite; + int slot; + int ret; + + /* + * The following cases need to be handled. + * + * 1. Enabled and linked channel with setup changes: + * - Find a slot. If not possible, return error. + * - Unlink channel from current slot. + * - If the slot has channels linked to it, unlink all channels, and + * write the new setup to it. + * - Link channel to new slot. + * + * 2. Soon to be enabled and unlinked channel: + * - Find a slot. If not possible, return error. + * - If the slot has channels linked to it, unlink all channels, and + * write the new setup to it. + * - Link channel to the slot. + * + * 3. Disabled and linked channel with setup changes: + * - Unlink channel from current slot. + * + * 4. Soon to be enabled and linked channel: + * 5. Disabled and unlinked channel with setup changes: + * - Do nothing. + */ + + /* Case 4 */ + if (on_enable && chan_info->slot != AD4130_INVALID_SLOT) + return 0; + + if (!on_enable && !chan_info->enabled) { + if (chan_info->slot != AD4130_INVALID_SLOT) + /* Case 3 */ + ad4130_unlink_channel(st, channel); + + /* Cases 3 & 5 */ + return 0; + } + + /* Cases 1 & 2 */ + ret = ad4130_find_slot(st, setup_info, &slot, &overwrite); + if (ret) + return ret; + + if (chan_info->slot != AD4130_INVALID_SLOT) + /* Case 1 */ + ad4130_unlink_channel(st, channel); + + if (overwrite) { + ret = ad4130_unlink_slot(st, slot); + if (ret) + return ret; + + ret = ad4130_write_slot_setup(st, slot, setup_info); + if (ret) + return ret; + } + + return ad4130_link_channel_slot(st, channel, slot); +} + +static int ad4130_set_channel_enable(struct ad4130_state *st, + unsigned int channel, bool status) +{ + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_slot_info *slot_info; + int ret; + + if (chan_info->enabled == status) + return 0; + + if (status) { + ret = ad4130_write_channel_setup(st, channel, true); + if (ret) + return ret; + } + + slot_info = &st->slots_info[chan_info->slot]; + + ret = regmap_update_bits(st->regmap, AD4130_CHANNEL_X_REG(channel), + AD4130_CHANNEL_EN_MASK, + FIELD_PREP(AD4130_CHANNEL_EN_MASK, status)); + if (ret) + return ret; + + slot_info->enabled_channels += status ? 1 : -1; + chan_info->enabled = status; + + return 0; +} + +/* + * Table 58. FILTER_MODE_n bits and Filter Types of the datasheet describes + * the relation between filter mode, ODR and FS. + * + * Notice that the max ODR of each filter mode is not necessarily the + * absolute max ODR supported by the chip. + * + * The ODR divider is not explicitly specified, but it can be deduced based + * on the ODR range of each filter mode. + * + * For example, for Sinc4+Sinc1, max ODR is 218.18. That means that the + * absolute max ODR is divided by 11 to achieve the max ODR of this filter + * mode. + * + * The formulas for converting between ODR and FS for a specific filter + * mode can be deduced from the same table. + * + * Notice that FS = 1 actually means max ODR, and that ODR decreases by + * (maximum ODR / maximum FS) for each increment of FS. + * + * odr = MAX_ODR / odr_div * (1 - (fs - 1) / fs_max) <=> + * odr = MAX_ODR * (1 - (fs - 1) / fs_max) / odr_div <=> + * odr = MAX_ODR * (1 - (fs - 1) / fs_max) / odr_div <=> + * odr = MAX_ODR * (fs_max - fs + 1) / (fs_max * odr_div) + * (used in ad4130_fs_to_freq) + * + * For the opposite formula, FS can be extracted from the last one. + * + * MAX_ODR * (fs_max - fs + 1) = fs_max * odr_div * odr <=> + * fs_max - fs + 1 = fs_max * odr_div * odr / MAX_ODR <=> + * fs = 1 + fs_max - fs_max * odr_div * odr / MAX_ODR + * (used in ad4130_fs_to_freq) + */ + +static void ad4130_freq_to_fs(enum ad4130_filter_mode filter_mode, + int val, int val2, unsigned int *fs) +{ + const struct ad4130_filter_config *filter_config = + &ad4130_filter_configs[filter_mode]; + u64 dividend, divisor; + int temp; + + dividend = filter_config->fs_max * filter_config->odr_div * + ((u64)val * NANO + val2); + divisor = (u64)AD4130_MAX_ODR * NANO; + + temp = AD4130_FILTER_SELECT_MIN + filter_config->fs_max - + DIV64_U64_ROUND_CLOSEST(dividend, divisor); + + if (temp < AD4130_FILTER_SELECT_MIN) + temp = AD4130_FILTER_SELECT_MIN; + else if (temp > filter_config->fs_max) + temp = filter_config->fs_max; + + *fs = temp; +} + +static void ad4130_fs_to_freq(enum ad4130_filter_mode filter_mode, + unsigned int fs, int *val, int *val2) +{ + const struct ad4130_filter_config *filter_config = + &ad4130_filter_configs[filter_mode]; + unsigned int dividend, divisor; + u64 temp; + + dividend = (filter_config->fs_max - fs + AD4130_FILTER_SELECT_MIN) * + AD4130_MAX_ODR; + divisor = filter_config->fs_max * filter_config->odr_div; + + temp = div_u64((u64)dividend * NANO, divisor); + *val = div_u64_rem(temp, NANO, val2); +} + +static int ad4130_set_filter_mode(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + unsigned int val) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel = chan->scan_index; + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_setup_info *setup_info = &chan_info->setup; + enum ad4130_filter_mode old_filter_mode; + int freq_val, freq_val2; + unsigned int old_fs; + int ret = 0; + + mutex_lock(&st->lock); + if (setup_info->filter_mode == val) + goto out; + + old_fs = setup_info->fs; + old_filter_mode = setup_info->filter_mode; + + /* + * When switching between filter modes, try to match the ODR as + * close as possible. To do this, convert the current FS into ODR + * using the old filter mode, then convert it back into FS using + * the new filter mode. + */ + ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs, + &freq_val, &freq_val2); + + ad4130_freq_to_fs(val, freq_val, freq_val2, &setup_info->fs); + + setup_info->filter_mode = val; + + ret = ad4130_write_channel_setup(st, channel, false); + if (ret) { + setup_info->fs = old_fs; + setup_info->filter_mode = old_filter_mode; + } + + out: + mutex_unlock(&st->lock); + + return ret; +} + +static int ad4130_get_filter_mode(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel = chan->scan_index; + struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup; + enum ad4130_filter_mode filter_mode; + + mutex_lock(&st->lock); + filter_mode = setup_info->filter_mode; + mutex_unlock(&st->lock); + + return filter_mode; +} + +static const struct iio_enum ad4130_filter_mode_enum = { + .items = ad4130_filter_modes_str, + .num_items = ARRAY_SIZE(ad4130_filter_modes_str), + .set = ad4130_set_filter_mode, + .get = ad4130_get_filter_mode, +}; + +static const struct iio_chan_spec_ext_info ad4130_filter_mode_ext_info[] = { + IIO_ENUM("filter_mode", IIO_SEPARATE, &ad4130_filter_mode_enum), + IIO_ENUM_AVAILABLE("filter_mode", IIO_SHARED_BY_TYPE, + &ad4130_filter_mode_enum), + { } +}; + +static const struct iio_chan_spec ad4130_channel_template = { + .type = IIO_VOLTAGE, + .indexed = 1, + .differential = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), + .ext_info = ad4130_filter_mode_ext_info, + .scan_type = { + .sign = 'u', + .endianness = IIO_BE, + }, +}; + +static int ad4130_set_channel_pga(struct ad4130_state *st, unsigned int channel, + int val, int val2) +{ + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_setup_info *setup_info = &chan_info->setup; + unsigned int pga, old_pga; + int ret = 0; + + for (pga = 0; pga < AD4130_MAX_PGA; pga++) + if (val == st->scale_tbls[setup_info->ref_sel][pga][0] && + val2 == st->scale_tbls[setup_info->ref_sel][pga][1]) + break; + + if (pga == AD4130_MAX_PGA) + return -EINVAL; + + mutex_lock(&st->lock); + if (pga == setup_info->pga) + goto out; + + old_pga = setup_info->pga; + setup_info->pga = pga; + + ret = ad4130_write_channel_setup(st, channel, false); + if (ret) + setup_info->pga = old_pga; + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static int ad4130_set_channel_freq(struct ad4130_state *st, + unsigned int channel, int val, int val2) +{ + struct ad4130_chan_info *chan_info = &st->chans_info[channel]; + struct ad4130_setup_info *setup_info = &chan_info->setup; + unsigned int fs, old_fs; + int ret = 0; + + mutex_lock(&st->lock); + old_fs = setup_info->fs; + + ad4130_freq_to_fs(setup_info->filter_mode, val, val2, &fs); + + if (fs == setup_info->fs) + goto out; + + setup_info->fs = fs; + + ret = ad4130_write_channel_setup(st, channel, false); + if (ret) + setup_info->fs = old_fs; + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static int _ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel, + int *val) +{ + struct ad4130_state *st = iio_priv(indio_dev); + int ret; + + ret = ad4130_set_channel_enable(st, channel, true); + if (ret) + return ret; + + reinit_completion(&st->completion); + + ret = ad4130_set_mode(st, AD4130_MODE_CONTINUOUS); + if (ret) + return ret; + + ret = wait_for_completion_timeout(&st->completion, + msecs_to_jiffies(1000)); + if (!ret) + return -ETIMEDOUT; + + ret = ad4130_set_mode(st, AD4130_MODE_IDLE); + if (ret) + return ret; + + ret = regmap_read(st->regmap, AD4130_DATA_REG, val); + if (ret) + return ret; + + ret = ad4130_set_channel_enable(st, channel, false); + if (ret) + return ret; + + return IIO_VAL_INT; +} + +static int ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel, + int *val) +{ + struct ad4130_state *st = iio_priv(indio_dev); + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + mutex_lock(&st->lock); + ret = _ad4130_read_sample(indio_dev, channel, val); + mutex_unlock(&st->lock); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad4130_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long info) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel = chan->scan_index; + struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup; + + switch (info) { + case IIO_CHAN_INFO_RAW: + return ad4130_read_sample(indio_dev, channel, val); + case IIO_CHAN_INFO_SCALE: + mutex_lock(&st->lock); + *val = st->scale_tbls[setup_info->ref_sel][setup_info->pga][0]; + *val2 = st->scale_tbls[setup_info->ref_sel][setup_info->pga][1]; + mutex_unlock(&st->lock); + + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_OFFSET: + *val = st->bipolar ? -BIT(chan->scan_type.realbits - 1) : 0; + + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + mutex_lock(&st->lock); + ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs, + val, val2); + mutex_unlock(&st->lock); + + return IIO_VAL_INT_PLUS_NANO; + default: + return -EINVAL; + } +} + +static int ad4130_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long info) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel = chan->scan_index; + struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup; + const struct ad4130_filter_config *filter_config; + + switch (info) { + case IIO_CHAN_INFO_SCALE: + *vals = (int *)st->scale_tbls[setup_info->ref_sel]; + *length = ARRAY_SIZE(st->scale_tbls[setup_info->ref_sel]) * 2; + + *type = IIO_VAL_INT_PLUS_NANO; + + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SAMP_FREQ: + mutex_lock(&st->lock); + filter_config = &ad4130_filter_configs[setup_info->filter_mode]; + mutex_unlock(&st->lock); + + *vals = (int *)filter_config->samp_freq_avail; + *length = filter_config->samp_freq_avail_len * 2; + *type = IIO_VAL_FRACTIONAL; + + return filter_config->samp_freq_avail_type; + default: + return -EINVAL; + } +} + +static int ad4130_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long info) +{ + switch (info) { + case IIO_CHAN_INFO_SCALE: + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT_PLUS_NANO; + default: + return -EINVAL; + } +} + +static int ad4130_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long info) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel = chan->scan_index; + + switch (info) { + case IIO_CHAN_INFO_SCALE: + return ad4130_set_channel_pga(st, channel, val, val2); + case IIO_CHAN_INFO_SAMP_FREQ: + return ad4130_set_channel_freq(st, channel, val, val2); + default: + return -EINVAL; + } +} + +static int ad4130_reg_access(struct iio_dev *indio_dev, unsigned int reg, + unsigned int writeval, unsigned int *readval) +{ + struct ad4130_state *st = iio_priv(indio_dev); + + if (readval) + return regmap_read(st->regmap, reg, readval); + + return regmap_write(st->regmap, reg, writeval); +} + +static int ad4130_update_scan_mode(struct iio_dev *indio_dev, + const unsigned long *scan_mask) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int channel; + unsigned int val = 0; + int ret; + + mutex_lock(&st->lock); + + for_each_set_bit(channel, scan_mask, indio_dev->num_channels) { + ret = ad4130_set_channel_enable(st, channel, true); + if (ret) + goto out; + + val++; + } + + st->num_enabled_channels = val; + +out: + mutex_unlock(&st->lock); + + return 0; +} + +static int ad4130_set_fifo_watermark(struct iio_dev *indio_dev, unsigned int val) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int eff; + int ret; + + if (val > AD4130_FIFO_SIZE) + return -EINVAL; + + eff = val * st->num_enabled_channels; + if (eff > AD4130_FIFO_SIZE) + /* + * Always set watermark to a multiple of the number of + * enabled channels to avoid making the FIFO unaligned. + */ + eff = rounddown(AD4130_FIFO_SIZE, st->num_enabled_channels); + + mutex_lock(&st->lock); + + ret = regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG, + AD4130_FIFO_CONTROL_WM_MASK, + FIELD_PREP(AD4130_FIFO_CONTROL_WM_MASK, + ad4130_watermark_reg_val(eff))); + if (ret) + goto out; + + st->effective_watermark = eff; + st->watermark = val; + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static const struct iio_info ad4130_info = { + .read_raw = ad4130_read_raw, + .read_avail = ad4130_read_avail, + .write_raw_get_fmt = ad4130_write_raw_get_fmt, + .write_raw = ad4130_write_raw, + .update_scan_mode = ad4130_update_scan_mode, + .hwfifo_set_watermark = ad4130_set_fifo_watermark, + .debugfs_reg_access = ad4130_reg_access, +}; + +static int ad4130_buffer_postenable(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + int ret; + + mutex_lock(&st->lock); + + ret = ad4130_set_watermark_interrupt_en(st, true); + if (ret) + goto out; + + ret = irq_set_irq_type(st->spi->irq, st->inv_irq_trigger); + if (ret) + goto out; + + ret = ad4130_set_fifo_mode(st, AD4130_FIFO_MODE_WM); + if (ret) + goto out; + + ret = ad4130_set_mode(st, AD4130_MODE_CONTINUOUS); + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static int ad4130_buffer_predisable(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int i; + int ret; + + mutex_lock(&st->lock); + + ret = ad4130_set_mode(st, AD4130_MODE_IDLE); + if (ret) + goto out; + + ret = irq_set_irq_type(st->spi->irq, st->irq_trigger); + if (ret) + goto out; + + ret = ad4130_set_fifo_mode(st, AD4130_FIFO_MODE_DISABLED); + if (ret) + goto out; + + ret = ad4130_set_watermark_interrupt_en(st, false); + if (ret) + goto out; + + /* + * update_scan_mode() is not called in the disable path, disable all + * channels here. + */ + for (i = 0; i < indio_dev->num_channels; i++) { + ret = ad4130_set_channel_enable(st, i, false); + if (ret) + goto out; + } + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static const struct iio_buffer_setup_ops ad4130_buffer_ops = { + .postenable = ad4130_buffer_postenable, + .predisable = ad4130_buffer_predisable, +}; + +static ssize_t hwfifo_watermark_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ad4130_state *st = iio_priv(dev_to_iio_dev(dev)); + unsigned int val; + + mutex_lock(&st->lock); + val = st->watermark; + mutex_unlock(&st->lock); + + return sysfs_emit(buf, "%d\n", val); +} + +static ssize_t hwfifo_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ad4130_state *st = iio_priv(dev_to_iio_dev(dev)); + unsigned int val; + int ret; + + ret = regmap_read(st->regmap, AD4130_FIFO_CONTROL_REG, &val); + if (ret) + return ret; + + val = FIELD_GET(AD4130_FIFO_CONTROL_MODE_MASK, val); + + return sysfs_emit(buf, "%d\n", val != AD4130_FIFO_MODE_DISABLED); +} + +static ssize_t hwfifo_watermark_min_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", "1"); +} + +static ssize_t hwfifo_watermark_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", __stringify(AD4130_FIFO_SIZE)); +} + +static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); +static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); +static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0); +static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0); + +static const struct attribute *ad4130_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, + &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, + &iio_dev_attr_hwfifo_watermark.dev_attr.attr, + &iio_dev_attr_hwfifo_enabled.dev_attr.attr, + NULL +}; + +static int _ad4130_find_table_index(const unsigned int *tbl, size_t len, + unsigned int val) +{ + unsigned int i; + + for (i = 0; i < len; i++) + if (tbl[i] == val) + return i; + + return -EINVAL; +} + +#define ad4130_find_table_index(table, val) \ + _ad4130_find_table_index(table, ARRAY_SIZE(table), val) + +static int ad4130_get_ref_voltage(struct ad4130_state *st, + enum ad4130_ref_sel ref_sel) +{ + switch (ref_sel) { + case AD4130_REF_REFIN1: + return regulator_get_voltage(st->regulators[2].consumer); + case AD4130_REF_REFIN2: + return regulator_get_voltage(st->regulators[3].consumer); + case AD4130_REF_AVDD_AVSS: + return regulator_get_voltage(st->regulators[0].consumer); + case AD4130_REF_REFOUT_AVSS: + return st->int_ref_uv; + default: + return -EINVAL; + } +} + +static int ad4130_parse_fw_setup(struct ad4130_state *st, + struct fwnode_handle *child, + struct ad4130_setup_info *setup_info) +{ + struct device *dev = &st->spi->dev; + u32 tmp; + int ret; + + tmp = 0; + fwnode_property_read_u32(child, "adi,excitation-current-0-nanoamp", &tmp); + ret = ad4130_find_table_index(ad4130_iout_current_na_tbl, tmp); + if (ret < 0) + return dev_err_probe(dev, ret, + "Invalid excitation current %unA\n", tmp); + setup_info->iout0_val = ret; + + tmp = 0; + fwnode_property_read_u32(child, "adi,excitation-current-1-nanoamp", &tmp); + ret = ad4130_find_table_index(ad4130_iout_current_na_tbl, tmp); + if (ret < 0) + return dev_err_probe(dev, ret, + "Invalid excitation current %unA\n", tmp); + setup_info->iout1_val = ret; + + tmp = 0; + fwnode_property_read_u32(child, "adi,burnout-current-nanoamp", &tmp); + ret = ad4130_find_table_index(ad4130_burnout_current_na_tbl, tmp); + if (ret < 0) + return dev_err_probe(dev, ret, + "Invalid burnout current %unA\n", tmp); + setup_info->burnout = ret; + + setup_info->ref_bufp = fwnode_property_read_bool(child, "adi,buffered-positive"); + setup_info->ref_bufm = fwnode_property_read_bool(child, "adi,buffered-negative"); + + setup_info->ref_sel = AD4130_REF_REFIN1; + fwnode_property_read_u32(child, "adi,reference-select", + &setup_info->ref_sel); + if (setup_info->ref_sel >= AD4130_REF_SEL_MAX) + return dev_err_probe(dev, -EINVAL, + "Invalid reference selected %u\n", + setup_info->ref_sel); + + if (setup_info->ref_sel == AD4130_REF_REFOUT_AVSS) + st->int_ref_en = true; + + ret = ad4130_get_ref_voltage(st, setup_info->ref_sel); + if (ret < 0) + return dev_err_probe(dev, ret, "Cannot use reference %u\n", + setup_info->ref_sel); + + return 0; +} + +static int ad4130_validate_diff_channel(struct ad4130_state *st, u32 pin) +{ + struct device *dev = &st->spi->dev; + + if (pin >= AD4130_MAX_DIFF_INPUTS) + return dev_err_probe(dev, -EINVAL, + "Invalid diffreential channel %u\n", pin); + + if (pin >= AD4130_MAX_ANALOG_PINS) + return 0; + + if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL) + return dev_err_probe(dev, -EINVAL, + "Pin %u already used with fn %u\n", pin, + st->pins_fn[pin]); + + st->pins_fn[pin] |= AD4130_PIN_FN_DIFF; + + return 0; +} + +static int ad4130_validate_diff_channels(struct ad4130_state *st, + u32 *pins, unsigned int len) +{ + unsigned int i; + int ret; + + for (i = 0; i < len; i++) { + ret = ad4130_validate_diff_channel(st, pins[i]); + if (ret) + return ret; + } + + return 0; +} + +static int ad4130_validate_excitation_pin(struct ad4130_state *st, u32 pin) +{ + struct device *dev = &st->spi->dev; + + if (pin >= AD4130_MAX_ANALOG_PINS) + return dev_err_probe(dev, -EINVAL, + "Invalid excitation pin %u\n", pin); + + if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL) + return dev_err_probe(dev, -EINVAL, + "Pin %u already used with fn %u\n", pin, + st->pins_fn[pin]); + + st->pins_fn[pin] |= AD4130_PIN_FN_EXCITATION; + + return 0; +} + +static int ad4130_validate_vbias_pin(struct ad4130_state *st, u32 pin) +{ + struct device *dev = &st->spi->dev; + + if (pin >= AD4130_MAX_ANALOG_PINS) + return dev_err_probe(dev, -EINVAL, "Invalid vbias pin %u\n", + pin); + + if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL) + return dev_err_probe(dev, -EINVAL, + "Pin %u already used with fn %u\n", pin, + st->pins_fn[pin]); + + st->pins_fn[pin] |= AD4130_PIN_FN_VBIAS; + + return 0; +} + +static int ad4130_validate_vbias_pins(struct ad4130_state *st, + u32 *pins, unsigned int len) +{ + unsigned int i; + int ret; + + for (i = 0; i < st->num_vbias_pins; i++) { + ret = ad4130_validate_vbias_pin(st, pins[i]); + if (ret) + return ret; + } + + return 0; +} + +static int ad4130_parse_fw_channel(struct iio_dev *indio_dev, + struct fwnode_handle *child) +{ + struct ad4130_state *st = iio_priv(indio_dev); + unsigned int resolution = ad4130_resolution(st); + unsigned int index = indio_dev->num_channels++; + struct device *dev = &st->spi->dev; + struct ad4130_chan_info *chan_info; + struct iio_chan_spec *chan; + u32 pins[2]; + int ret; + + if (index >= AD4130_MAX_CHANNELS) + return dev_err_probe(dev, -EINVAL, "Too many channels\n"); + + chan = &st->chans[index]; + chan_info = &st->chans_info[index]; + + *chan = ad4130_channel_template; + chan->scan_type.realbits = resolution; + chan->scan_type.storagebits = resolution; + chan->scan_index = index; + + chan_info->slot = AD4130_INVALID_SLOT; + chan_info->setup.fs = AD4130_FILTER_SELECT_MIN; + chan_info->initialized = true; + + ret = fwnode_property_read_u32_array(child, "diff-channels", pins, + ARRAY_SIZE(pins)); + if (ret) + return ret; + + ret = ad4130_validate_diff_channels(st, pins, ARRAY_SIZE(pins)); + if (ret) + return ret; + + chan->channel = pins[0]; + chan->channel2 = pins[1]; + + ret = ad4130_parse_fw_setup(st, child, &chan_info->setup); + if (ret) + return ret; + + fwnode_property_read_u32(child, "adi,excitation-pin-0", + &chan_info->iout0); + if (chan_info->setup.iout0_val != AD4130_IOUT_OFF) { + ret = ad4130_validate_excitation_pin(st, chan_info->iout0); + if (ret) + return ret; + } + + fwnode_property_read_u32(child, "adi,excitation-pin-1", + &chan_info->iout1); + if (chan_info->setup.iout1_val != AD4130_IOUT_OFF) { + ret = ad4130_validate_excitation_pin(st, chan_info->iout1); + if (ret) + return ret; + } + + return 0; +} + +static int ad4130_parse_fw_children(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + struct fwnode_handle *child; + int ret; + + indio_dev->channels = st->chans; + + device_for_each_child_node(dev, child) { + ret = ad4130_parse_fw_channel(indio_dev, child); + if (ret) { + fwnode_handle_put(child); + return ret; + } + } + + return 0; +} + +static int ad4310_parse_fw(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + u32 ext_clk_freq = AD4130_MCLK_FREQ_76_8KHZ; + unsigned int i; + int avdd_uv; + int irq; + int ret; + + st->mclk = devm_clk_get_optional(dev, "mclk"); + if (IS_ERR(st->mclk)) + return dev_err_probe(dev, PTR_ERR(st->mclk), + "Failed to get mclk\n"); + + st->int_pin_sel = AD4130_INT_PIN_INT; + + for (i = 0; i < ARRAY_SIZE(ad4130_int_pin_names); i++) { + irq = fwnode_irq_get_byname(dev_fwnode(dev), + ad4130_int_pin_names[i]); + if (irq > 0) { + st->int_pin_sel = i; + break; + } + } + + if (st->int_pin_sel == AD4130_INT_PIN_DOUT) + return dev_err_probe(dev, -EINVAL, + "Cannot use DOUT as interrupt pin\n"); + + if (st->int_pin_sel == AD4130_INT_PIN_P2) + st->pins_fn[AD4130_AIN3_P2] = AD4130_PIN_FN_SPECIAL; + + device_property_read_u32(dev, "adi,ext-clk-freq-hz", &ext_clk_freq); + if (ext_clk_freq != AD4130_MCLK_FREQ_153_6KHZ && + ext_clk_freq != AD4130_MCLK_FREQ_76_8KHZ) + return dev_err_probe(dev, -EINVAL, + "Invalid external clock frequency %u\n", + ext_clk_freq); + + if (st->mclk && ext_clk_freq == AD4130_MCLK_FREQ_153_6KHZ) + st->mclk_sel = AD4130_MCLK_153_6KHZ_EXT; + else if (st->mclk) + st->mclk_sel = AD4130_MCLK_76_8KHZ_EXT; + else + st->mclk_sel = AD4130_MCLK_76_8KHZ; + + if (st->int_pin_sel == AD4130_INT_PIN_CLK && + st->mclk_sel != AD4130_MCLK_76_8KHZ) + return dev_err_probe(dev, -EINVAL, + "Invalid clock %u for interrupt pin %u\n", + st->mclk_sel, st->int_pin_sel); + + st->int_ref_uv = AD4130_INT_REF_2_5V; + + /* + * When the AVDD supply is set to below 2.5V the internal reference of + * 1.25V should be selected. + * See datasheet page 37, section ADC REFERENCE. + */ + avdd_uv = regulator_get_voltage(st->regulators[0].consumer); + if (avdd_uv > 0 && avdd_uv < AD4130_INT_REF_2_5V) + st->int_ref_uv = AD4130_INT_REF_1_25V; + + st->bipolar = device_property_read_bool(dev, "adi,bipolar"); + + ret = device_property_count_u32(dev, "adi,vbias-pins"); + if (ret > 0) { + if (ret > AD4130_MAX_ANALOG_PINS) + return dev_err_probe(dev, -EINVAL, + "Too many vbias pins %u\n", ret); + + st->num_vbias_pins = ret; + + ret = device_property_read_u32_array(dev, "adi,vbias-pins", + st->vbias_pins, + st->num_vbias_pins); + if (ret) + return dev_err_probe(dev, ret, + "Failed to read vbias pins\n"); + + ret = ad4130_validate_vbias_pins(st, st->vbias_pins, + st->num_vbias_pins); + if (ret) + return ret; + } + + ret = ad4130_parse_fw_children(indio_dev); + if (ret) + return ret; + + return 0; +} + +static void ad4130_fill_scale_tbls(struct ad4130_state *st) +{ + unsigned int pow = ad4130_resolution(st) - st->bipolar; + unsigned int i, j; + + for (i = 0; i < AD4130_REF_SEL_MAX; i++) { + int ret; + u64 nv; + + ret = ad4130_get_ref_voltage(st, i); + if (ret < 0) + continue; + + nv = (u64)ret * NANO; + + for (j = 0; j < AD4130_MAX_PGA; j++) + st->scale_tbls[i][j][1] = div_u64(nv >> (pow + j), MILLI); + } +} + +static void ad4130_clk_disable_unprepare(void *clk) +{ + clk_disable_unprepare(clk); +} + +static int ad4130_set_mclk_sel(struct ad4130_state *st, + enum ad4130_mclk_sel mclk_sel) +{ + return regmap_update_bits(st->regmap, AD4130_ADC_CONTROL_REG, + AD4130_ADC_CONTROL_MCLK_SEL_MASK, + FIELD_PREP(AD4130_ADC_CONTROL_MCLK_SEL_MASK, + mclk_sel)); +} + +static unsigned long ad4130_int_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + return AD4130_MCLK_FREQ_76_8KHZ; +} + +static int ad4130_int_clk_is_enabled(struct clk_hw *hw) +{ + struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw); + + return st->mclk_sel == AD4130_MCLK_76_8KHZ_OUT; +} + +static int ad4130_int_clk_prepare(struct clk_hw *hw) +{ + struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw); + int ret; + + ret = ad4130_set_mclk_sel(st, AD4130_MCLK_76_8KHZ_OUT); + if (ret) + return ret; + + st->mclk_sel = AD4130_MCLK_76_8KHZ_OUT; + + return 0; +} + +static void ad4130_int_clk_unprepare(struct clk_hw *hw) +{ + struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw); + int ret; + + ret = ad4130_set_mclk_sel(st, AD4130_MCLK_76_8KHZ); + if (ret) + return; + + st->mclk_sel = AD4130_MCLK_76_8KHZ; +} + +static const struct clk_ops ad4130_int_clk_ops = { + .recalc_rate = ad4130_int_clk_recalc_rate, + .is_enabled = ad4130_int_clk_is_enabled, + .prepare = ad4130_int_clk_prepare, + .unprepare = ad4130_int_clk_unprepare, +}; + +static int ad4130_setup_int_clk(struct ad4130_state *st) +{ + struct device *dev = &st->spi->dev; + struct device_node *of_node = dev_of_node(dev); + struct clk_init_data init; + const char *clk_name; + struct clk *clk; + + if (st->int_pin_sel == AD4130_INT_PIN_CLK || + st->mclk_sel != AD4130_MCLK_76_8KHZ) + return 0; + + if (!of_node) + return 0; + + clk_name = of_node->name; + of_property_read_string(of_node, "clock-output-names", &clk_name); + + init.name = clk_name; + init.ops = &ad4130_int_clk_ops; + + st->int_clk_hw.init = &init; + clk = devm_clk_register(dev, &st->int_clk_hw); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + return of_clk_add_provider(of_node, of_clk_src_simple_get, clk); +} + +static int ad4130_setup(struct iio_dev *indio_dev) +{ + struct ad4130_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + unsigned int int_ref_val; + unsigned long rate = AD4130_MCLK_FREQ_76_8KHZ; + unsigned int val; + unsigned int i; + int ret; + + if (st->mclk_sel == AD4130_MCLK_153_6KHZ_EXT) + rate = AD4130_MCLK_FREQ_153_6KHZ; + + ret = clk_set_rate(st->mclk, rate); + if (ret) + return ret; + + ret = clk_prepare_enable(st->mclk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, ad4130_clk_disable_unprepare, + st->mclk); + if (ret) + return ret; + + if (st->int_ref_uv == AD4130_INT_REF_2_5V) + int_ref_val = AD4130_INT_REF_VAL_2_5V; + else + int_ref_val = AD4130_INT_REF_VAL_1_25V; + + /* Switch to SPI 4-wire mode. */ + val = FIELD_PREP(AD4130_ADC_CONTROL_CSB_EN_MASK, 1); + val |= FIELD_PREP(AD4130_ADC_CONTROL_BIPOLAR_MASK, st->bipolar); + val |= FIELD_PREP(AD4130_ADC_CONTROL_INT_REF_EN_MASK, st->int_ref_en); + val |= FIELD_PREP(AD4130_ADC_CONTROL_MODE_MASK, AD4130_MODE_IDLE); + val |= FIELD_PREP(AD4130_ADC_CONTROL_MCLK_SEL_MASK, st->mclk_sel); + val |= FIELD_PREP(AD4130_ADC_CONTROL_INT_REF_VAL_MASK, int_ref_val); + + ret = regmap_write(st->regmap, AD4130_ADC_CONTROL_REG, val); + if (ret) + return ret; + + /* + * Configure all GPIOs for output. If configured, the interrupt function + * of P2 takes priority over the GPIO out function. + */ + val = AD4130_IO_CONTROL_GPIO_CTRL_MASK; + val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel); + + ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val); + if (ret) + return ret; + + val = 0; + for (i = 0; i < st->num_vbias_pins; i++) + val |= BIT(st->vbias_pins[i]); + + ret = regmap_write(st->regmap, AD4130_VBIAS_REG, val); + if (ret) + return ret; + + ret = regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG, + AD4130_FIFO_CONTROL_HEADER_MASK, 0); + if (ret) + return ret; + + /* FIFO watermark interrupt starts out as enabled, disable it. */ + ret = ad4130_set_watermark_interrupt_en(st, false); + if (ret) + return ret; + + /* Setup channels. */ + for (i = 0; i < indio_dev->num_channels; i++) { + struct ad4130_chan_info *chan_info = &st->chans_info[i]; + struct iio_chan_spec *chan = &st->chans[i]; + unsigned int val; + + val = FIELD_PREP(AD4130_CHANNEL_AINP_MASK, chan->channel) | + FIELD_PREP(AD4130_CHANNEL_AINM_MASK, chan->channel2) | + FIELD_PREP(AD4130_CHANNEL_IOUT1_MASK, chan_info->iout0) | + FIELD_PREP(AD4130_CHANNEL_IOUT2_MASK, chan_info->iout1); + + ret = regmap_write(st->regmap, AD4130_CHANNEL_X_REG(i), val); + if (ret) + return ret; + } + + return 0; +} + +static int ad4130_soft_reset(struct ad4130_state *st) +{ + int ret; + + ret = spi_write(st->spi, st->reset_buf, sizeof(st->reset_buf)); + if (ret) + return ret; + + fsleep(AD4130_RESET_SLEEP_US); + + return 0; +} + +static void ad4130_disable_regulators(void *data) +{ + struct ad4130_state *st = data; + + regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators); +} + +static int ad4130_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct iio_dev *indio_dev; + struct ad4130_state *st; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + + memset(st->reset_buf, 0xff, sizeof(st->reset_buf)); + init_completion(&st->completion); + mutex_init(&st->lock); + st->spi = spi; + + /* + * Xfer: [ XFR1 ] [ XFR2 ] + * Master: 0x7D N ...................... + * Slave: ...... DATA1 DATA2 ... DATAN + */ + st->fifo_tx_buf[0] = AD4130_COMMS_READ_MASK | AD4130_FIFO_DATA_REG; + st->fifo_xfer[0].tx_buf = st->fifo_tx_buf; + st->fifo_xfer[0].len = sizeof(st->fifo_tx_buf); + st->fifo_xfer[1].rx_buf = st->fifo_rx_buf; + spi_message_init_with_transfers(&st->fifo_msg, st->fifo_xfer, + ARRAY_SIZE(st->fifo_xfer)); + + indio_dev->name = AD4130_NAME; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &ad4130_info; + + st->regmap = devm_regmap_init(dev, NULL, st, &ad4130_regmap_config); + if (IS_ERR(st->regmap)) + return PTR_ERR(st->regmap); + + st->regulators[0].supply = "avdd"; + st->regulators[1].supply = "iovdd"; + st->regulators[2].supply = "refin1"; + st->regulators[3].supply = "refin2"; + + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(st->regulators), + st->regulators); + if (ret) + return dev_err_probe(dev, ret, "Failed to get regulators\n"); + + ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable regulators\n"); + + ret = devm_add_action_or_reset(dev, ad4130_disable_regulators, st); + if (ret) + return dev_err_probe(dev, ret, + "Failed to add regulators disable action\n"); + + ret = ad4130_soft_reset(st); + if (ret) + return ret; + + ret = ad4310_parse_fw(indio_dev); + if (ret) + return ret; + + ret = ad4130_setup(indio_dev); + if (ret) + return ret; + + ret = ad4130_setup_int_clk(st); + if (ret) + return ret; + + ad4130_fill_scale_tbls(st); + + st->gc.owner = THIS_MODULE; + st->gc.label = AD4130_NAME; + st->gc.base = -1; + st->gc.ngpio = AD4130_MAX_GPIOS; + st->gc.parent = dev; + st->gc.can_sleep = true; + st->gc.init_valid_mask = ad4130_gpio_init_valid_mask; + st->gc.get_direction = ad4130_gpio_get_direction; + st->gc.set = ad4130_gpio_set; + + ret = devm_gpiochip_add_data(dev, &st->gc, st); + if (ret) + return ret; + + ret = devm_iio_kfifo_buffer_setup_ext(dev, indio_dev, + &ad4130_buffer_ops, + ad4130_fifo_attributes); + if (ret) + return ret; + + ret = devm_request_threaded_irq(dev, spi->irq, NULL, + ad4130_irq_handler, IRQF_ONESHOT, + indio_dev->name, indio_dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to request irq\n"); + + /* + * When the chip enters FIFO mode, IRQ polarity is inverted. + * When the chip exits FIFO mode, IRQ polarity returns to normal. + * See datasheet pages: 65, FIFO Watermark Interrupt section, + * and 71, Bit Descriptions for STATUS Register, RDYB. + * Cache the normal and inverted IRQ triggers to set them when + * entering and exiting FIFO mode. + */ + st->irq_trigger = irq_get_trigger_type(spi->irq); + if (st->irq_trigger & IRQF_TRIGGER_RISING) + st->inv_irq_trigger = IRQF_TRIGGER_FALLING; + else if (st->irq_trigger & IRQF_TRIGGER_FALLING) + st->inv_irq_trigger = IRQF_TRIGGER_RISING; + else + return dev_err_probe(dev, -EINVAL, "Invalid irq flags: %u\n", + st->irq_trigger); + + return devm_iio_device_register(dev, indio_dev); +} + +static const struct of_device_id ad4130_of_match[] = { + { + .compatible = "adi,ad4130", + }, + { } +}; +MODULE_DEVICE_TABLE(of, ad4130_of_match); + +static struct spi_driver ad4130_driver = { + .driver = { + .name = AD4130_NAME, + .of_match_table = ad4130_of_match, + }, + .probe = ad4130_probe, +}; +module_spi_driver(ad4130_driver); + +MODULE_AUTHOR("Cosmin Tanislav "); +MODULE_DESCRIPTION("Analog Devices AD4130 SPI driver"); +MODULE_LICENSE("GPL"); From e54ec96c7201507db1b2c053068cda32f3c42cc7 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Mon, 17 Oct 2022 12:02:30 +0530 Subject: [PATCH 2299/4122] staging: iio: frequency: ad9834: merge unnecessary split lines Improve code readability by merging unnecessary split lines that are well within the code-style guidelines post merge. Signed-off-by: Deepak R Varma Acked-by: Julia Lawall Link: https://lore.kernel.org/r/Y0z2/qFe3kW96MTs@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Jonathan Cameron --- drivers/staging/iio/frequency/ad9834.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c index 2b4267a87e65..285df0e489a6 100644 --- a/drivers/staging/iio/frequency/ad9834.c +++ b/drivers/staging/iio/frequency/ad9834.c @@ -331,11 +331,9 @@ static IIO_DEV_ATTR_PHASE(0, 1, 0200, NULL, ad9834_write, AD9834_REG_PHASE1); static IIO_DEV_ATTR_PHASESYMBOL(0, 0200, NULL, ad9834_write, AD9834_PSEL); static IIO_CONST_ATTR_PHASE_SCALE(0, "0.0015339808"); /* 2PI/2^12 rad*/ -static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, - ad9834_write, AD9834_PIN_SW); +static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, ad9834_write, AD9834_PIN_SW); static IIO_DEV_ATTR_OUT_ENABLE(0, 0200, NULL, ad9834_write, AD9834_RESET); -static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, - ad9834_write, AD9834_OPBITEN); +static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, ad9834_write, AD9834_OPBITEN); static IIO_DEV_ATTR_OUT_WAVETYPE(0, 0, ad9834_store_wavetype, 0); static IIO_DEV_ATTR_OUT_WAVETYPE(0, 1, ad9834_store_wavetype, 1); From e9b96e18cf47133e51f4cb67d9dd9d163abc5f6d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:33:56 +0100 Subject: [PATCH 2300/4122] iio: accel: adxl367: Use devm_regulator_bulk_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_bulk_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Cosmin Tanislav Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-2-jic23@kernel.org --- drivers/iio/accel/adxl367.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 7c7d78040793..d7af3a006a44 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -160,8 +160,6 @@ struct adxl367_state { struct device *dev; struct regmap *regmap; - struct regulator_bulk_data regulators[2]; - /* * Synchronize access to members of driver state, and ensure atomicity * of consecutive regmap operations. @@ -1487,16 +1485,10 @@ static int adxl367_setup(struct adxl367_state *st) return adxl367_set_measure_en(st, true); } -static void adxl367_disable_regulators(void *data) -{ - struct adxl367_state *st = data; - - regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators); -} - int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, void *context, struct regmap *regmap, int irq) { + static const char * const regulator_names[] = { "vdd", "vddio" }; struct iio_dev *indio_dev; struct adxl367_state *st; int ret; @@ -1520,25 +1512,13 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, indio_dev->info = &adxl367_info; indio_dev->modes = INDIO_DIRECT_MODE; - st->regulators[0].supply = "vdd"; - st->regulators[1].supply = "vddio"; - - ret = devm_regulator_bulk_get(st->dev, ARRAY_SIZE(st->regulators), - st->regulators); + ret = devm_regulator_bulk_get_enable(st->dev, + ARRAY_SIZE(regulator_names), + regulator_names); if (ret) return dev_err_probe(st->dev, ret, "Failed to get regulators\n"); - ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators); - if (ret) - return dev_err_probe(st->dev, ret, - "Failed to enable regulators\n"); - - ret = devm_add_action_or_reset(st->dev, adxl367_disable_regulators, st); - if (ret) - return dev_err_probe(st->dev, ret, - "Failed to add regulators disable action\n"); - ret = regmap_write(st->regmap, ADXL367_REG_RESET, ADXL367_RESET_CODE); if (ret) return ret; From 5b30e739ceaf37aa5817195a20cd1fa7a4607623 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:33:58 +0100 Subject: [PATCH 2301/4122] iio: accel: fxls8962af: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Sean Nyekjaer Reviewed-by: Sean Nyekjaer Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-4-jic23@kernel.org --- drivers/iio/accel/fxls8962af-core.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index bf259db281f5..0d672b1469e8 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -159,7 +159,6 @@ struct fxls8962af_chip_info { struct fxls8962af_data { struct regmap *regmap; const struct fxls8962af_chip_info *chip_info; - struct regulator *vdd_reg; struct { __le16 channels[3]; s64 ts __aligned(8); @@ -1051,13 +1050,6 @@ static irqreturn_t fxls8962af_interrupt(int irq, void *p) return IRQ_NONE; } -static void fxls8962af_regulator_disable(void *data_ptr) -{ - struct fxls8962af_data *data = data_ptr; - - regulator_disable(data->vdd_reg); -} - static void fxls8962af_pm_disable(void *dev_ptr) { struct device *dev = dev_ptr; @@ -1171,20 +1163,10 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) if (ret) return ret; - data->vdd_reg = devm_regulator_get(dev, "vdd"); - if (IS_ERR(data->vdd_reg)) - return dev_err_probe(dev, PTR_ERR(data->vdd_reg), - "Failed to get vdd regulator\n"); - - ret = regulator_enable(data->vdd_reg); - if (ret) { - dev_err(dev, "Failed to enable vdd regulator: %d\n", ret); - return ret; - } - - ret = devm_add_action_or_reset(dev, fxls8962af_regulator_disable, data); + ret = devm_regulator_get_enable(dev, "vdd"); if (ret) - return ret; + return dev_err_probe(dev, ret, + "Failed to get vdd regulator\n"); ret = regmap_read(data->regmap, FXLS8962AF_WHO_AM_I, ®); if (ret) From 1fa4ff1f249983b38394ee79379e028c1c501ae5 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:33:59 +0100 Subject: [PATCH 2302/4122] iio: accel: kxcjk-1013: Use devm_regulator_bulk_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_bulk_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-5-jic23@kernel.org --- drivers/iio/accel/kxcjk-1013.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index adc66b3615c0..e626b6fa8a36 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -241,7 +241,6 @@ enum kxcjk1013_axis { }; struct kxcjk1013_data { - struct regulator_bulk_data regulators[2]; struct i2c_client *client; struct iio_trigger *dready_trig; struct iio_trigger *motion_trig; @@ -1425,16 +1424,10 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, return dev_name(dev); } -static void kxcjk1013_disable_regulators(void *d) -{ - struct kxcjk1013_data *data = d; - - regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators); -} - static int kxcjk1013_probe(struct i2c_client *client, const struct i2c_device_id *id) { + static const char * const regulator_names[] = { "vdd", "vddio" }; struct kxcjk1013_data *data; struct iio_dev *indio_dev; struct kxcjk_1013_platform_data *pdata; @@ -1461,22 +1454,12 @@ static int kxcjk1013_probe(struct i2c_client *client, return ret; } - data->regulators[0].supply = "vdd"; - data->regulators[1].supply = "vddio"; - ret = devm_regulator_bulk_get(&client->dev, ARRAY_SIZE(data->regulators), - data->regulators); + ret = devm_regulator_bulk_get_enable(&client->dev, + ARRAY_SIZE(regulator_names), + regulator_names); if (ret) return dev_err_probe(&client->dev, ret, "Failed to get regulators\n"); - ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators), - data->regulators); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&client->dev, kxcjk1013_disable_regulators, data); - if (ret) - return ret; - /* * A typical delay of 10ms is required for powering up * according to the data sheets of supported chips. From eefa008b1677244d73ba47920172be36570c87c1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:00 +0100 Subject: [PATCH 2303/4122] iio: accel: msa311: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Dmitry Rokosov Reviewed-by: Dmitry Rokosov Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-6-jic23@kernel.org --- drivers/iio/accel/msa311.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/iio/accel/msa311.c b/drivers/iio/accel/msa311.c index 2fded3759171..af94d3adf6d8 100644 --- a/drivers/iio/accel/msa311.c +++ b/drivers/iio/accel/msa311.c @@ -351,7 +351,6 @@ static const struct regmap_config msa311_regmap_config = { * @chip_name: Chip name in the format "msa311-%02x" % partid * @new_data_trig: Optional NEW_DATA interrupt driven trigger used * to notify external consumers a new sample is ready - * @vdd: Optional external voltage regulator for the device power supply */ struct msa311_priv { struct regmap *regs; @@ -362,7 +361,6 @@ struct msa311_priv { char *chip_name; struct iio_trigger *new_data_trig; - struct regulator *vdd; }; enum msa311_si { @@ -1146,11 +1144,6 @@ static void msa311_powerdown(void *msa311) msa311_set_pwr_mode(msa311, MSA311_PWR_MODE_SUSPEND); } -static void msa311_vdd_disable(void *vdd) -{ - regulator_disable(vdd); -} - static int msa311_probe(struct i2c_client *i2c) { struct device *dev = &i2c->dev; @@ -1173,19 +1166,9 @@ static int msa311_probe(struct i2c_client *i2c) mutex_init(&msa311->lock); - msa311->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(msa311->vdd)) - return dev_err_probe(dev, PTR_ERR(msa311->vdd), - "can't get vdd supply\n"); - - err = regulator_enable(msa311->vdd); + err = devm_regulator_get_enable(dev, "vdd"); if (err) - return dev_err_probe(dev, err, "can't enable vdd supply\n"); - - err = devm_add_action_or_reset(dev, msa311_vdd_disable, msa311->vdd); - if (err) - return dev_err_probe(dev, err, - "can't add vdd disable action\n"); + return dev_err_probe(dev, err, "can't get vdd supply\n"); err = msa311_check_partid(msa311); if (err) From 72ce527c7bb599ac1d64ec5393f66a673b75de6b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:01 +0100 Subject: [PATCH 2304/4122] iio: cdc: ad7150: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-7-jic23@kernel.org --- drivers/iio/cdc/ad7150.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/iio/cdc/ad7150.c b/drivers/iio/cdc/ad7150.c index ebe112b4618b..1113745890ca 100644 --- a/drivers/iio/cdc/ad7150.c +++ b/drivers/iio/cdc/ad7150.c @@ -536,19 +536,11 @@ static const struct iio_info ad7150_info_no_irq = { .read_raw = &ad7150_read_raw, }; -static void ad7150_reg_disable(void *data) -{ - struct regulator *reg = data; - - regulator_disable(reg); -} - static int ad7150_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ad7150_chip_info *chip; struct iio_dev *indio_dev; - struct regulator *reg; int ret; indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*chip)); @@ -563,15 +555,7 @@ static int ad7150_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; - reg = devm_regulator_get(&client->dev, "vdd"); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - ret = regulator_enable(reg); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&client->dev, ad7150_reg_disable, reg); + ret = devm_regulator_get_enable(&client->dev, "vdd"); if (ret) return ret; From 9e855d77b1ec57704d23e25761a97e6e64abed66 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:02 +0100 Subject: [PATCH 2305/4122] iio: st_sensors: core and lsm9ds0 switch to devm_regulator_bulk_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These drivers only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The two regulators were handled separately so also switch to bulk registration. The new devm_regulator_bulk_get_enable() replaces all this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Linus Walleij Cc: Andy Shevchenko Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-8-jic23@kernel.org --- .../iio/common/st_sensors/st_sensors_core.c | 39 ++--------- drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c | 65 ++----------------- include/linux/iio/common/st_sensors.h | 4 -- 3 files changed, 14 insertions(+), 94 deletions(-) diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 35720c64fea8..c77d7bdcc121 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -219,47 +219,22 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable) } EXPORT_SYMBOL_NS(st_sensors_set_axis_enable, IIO_ST_SENSORS); -static void st_reg_disable(void *reg) -{ - regulator_disable(reg); -} int st_sensors_power_enable(struct iio_dev *indio_dev) { - struct st_sensor_data *pdata = iio_priv(indio_dev); + static const char * const regulator_names[] = { "vdd", "vddio" }; struct device *parent = indio_dev->dev.parent; int err; /* Regulators not mandatory, but if requested we should enable them. */ - pdata->vdd = devm_regulator_get(parent, "vdd"); - if (IS_ERR(pdata->vdd)) - return dev_err_probe(&indio_dev->dev, PTR_ERR(pdata->vdd), - "unable to get Vdd supply\n"); - - err = regulator_enable(pdata->vdd); - if (err != 0) { - dev_warn(&indio_dev->dev, - "Failed to enable specified Vdd supply\n"); - return err; - } - - err = devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd); + err = devm_regulator_bulk_get_enable(parent, + ARRAY_SIZE(regulator_names), + regulator_names); if (err) - return err; + return dev_err_probe(&indio_dev->dev, err, + "unable to enable supplies\n"); - pdata->vdd_io = devm_regulator_get(parent, "vddio"); - if (IS_ERR(pdata->vdd_io)) - return dev_err_probe(&indio_dev->dev, PTR_ERR(pdata->vdd_io), - "unable to get Vdd_IO supply\n"); - - err = regulator_enable(pdata->vdd_io); - if (err != 0) { - dev_warn(&indio_dev->dev, - "Failed to enable specified Vdd_IO supply\n"); - return err; - } - - return devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd_io); + return 0; } EXPORT_SYMBOL_NS(st_sensors_power_enable, IIO_ST_SENSORS); diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c index ae7bc815382f..e887b45cdbcd 100644 --- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c +++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c @@ -18,58 +18,6 @@ #include "st_lsm9ds0.h" -static int st_lsm9ds0_power_enable(struct device *dev, struct st_lsm9ds0 *lsm9ds0) -{ - int ret; - - /* Regulators not mandatory, but if requested we should enable them. */ - lsm9ds0->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(lsm9ds0->vdd)) - return dev_err_probe(dev, PTR_ERR(lsm9ds0->vdd), - "unable to get Vdd supply\n"); - - ret = regulator_enable(lsm9ds0->vdd); - if (ret) { - dev_warn(dev, "Failed to enable specified Vdd supply\n"); - return ret; - } - - lsm9ds0->vdd_io = devm_regulator_get(dev, "vddio"); - if (IS_ERR(lsm9ds0->vdd_io)) { - regulator_disable(lsm9ds0->vdd); - return dev_err_probe(dev, PTR_ERR(lsm9ds0->vdd_io), - "unable to get Vdd_IO supply\n"); - } - ret = regulator_enable(lsm9ds0->vdd_io); - if (ret) { - dev_warn(dev, "Failed to enable specified Vdd_IO supply\n"); - regulator_disable(lsm9ds0->vdd); - return ret; - } - - return 0; -} - -static void st_lsm9ds0_power_disable(void *data) -{ - struct st_lsm9ds0 *lsm9ds0 = data; - - regulator_disable(lsm9ds0->vdd_io); - regulator_disable(lsm9ds0->vdd); -} - -static int devm_st_lsm9ds0_power_enable(struct st_lsm9ds0 *lsm9ds0) -{ - struct device *dev = lsm9ds0->dev; - int ret; - - ret = st_lsm9ds0_power_enable(dev, lsm9ds0); - if (ret) - return ret; - - return devm_add_action_or_reset(dev, st_lsm9ds0_power_disable, lsm9ds0); -} - static int st_lsm9ds0_probe_accel(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap) { const struct st_sensor_settings *settings; @@ -92,8 +40,6 @@ static int st_lsm9ds0_probe_accel(struct st_lsm9ds0 *lsm9ds0, struct regmap *reg data->sensor_settings = (struct st_sensor_settings *)settings; data->irq = lsm9ds0->irq; data->regmap = regmap; - data->vdd = lsm9ds0->vdd; - data->vdd_io = lsm9ds0->vdd_io; return st_accel_common_probe(lsm9ds0->accel); } @@ -120,19 +66,22 @@ static int st_lsm9ds0_probe_magn(struct st_lsm9ds0 *lsm9ds0, struct regmap *regm data->sensor_settings = (struct st_sensor_settings *)settings; data->irq = lsm9ds0->irq; data->regmap = regmap; - data->vdd = lsm9ds0->vdd; - data->vdd_io = lsm9ds0->vdd_io; return st_magn_common_probe(lsm9ds0->magn); } int st_lsm9ds0_probe(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap) { + struct device *dev = lsm9ds0->dev; + static const char * const regulator_names[] = { "vdd", "vddio" }; int ret; - ret = devm_st_lsm9ds0_power_enable(lsm9ds0); + /* Regulators not mandatory, but if requested we should enable them. */ + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names), + regulator_names); if (ret) - return ret; + return dev_err_probe(dev, ret, + "unable to enable Vdd supply\n"); /* Setup accelerometer device */ ret = st_lsm9ds0_probe_accel(lsm9ds0, regmap); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index db4a1b260348..f5f3ee57bc70 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -224,8 +224,6 @@ struct st_sensor_settings { * @mount_matrix: The mounting matrix of the sensor. * @sensor_settings: Pointer to the specific sensor settings in use. * @current_fullscale: Maximum range of measure by the sensor. - * @vdd: Pointer to sensor's Vdd power supply - * @vdd_io: Pointer to sensor's Vdd-IO power supply * @regmap: Pointer to specific sensor regmap configuration. * @enabled: Status of the sensor (false->off, true->on). * @odr: Output data rate of the sensor [Hz]. @@ -244,8 +242,6 @@ struct st_sensor_data { struct iio_mount_matrix mount_matrix; struct st_sensor_settings *sensor_settings; struct st_sensor_fullscale_avl *current_fullscale; - struct regulator *vdd; - struct regulator *vdd_io; struct regmap *regmap; bool enabled; From d86186a6e0247394c70239713994df5e2c66220c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:03 +0100 Subject: [PATCH 2306/4122] iio: frequency: ad9523: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Note that in event of an error on the devm_regulator_get() the driver would have continued without enabling the regulator which is probably not a good idea. So here we handle any error as a reason to fail the probe(). In theory this may expose breakage on a platform that was previously papered over but it seems low risk. Signed-off-by: Jonathan Cameron Cc: Michael Hennerich Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-9-jic23@kernel.org --- drivers/iio/frequency/ad9523.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index 97662ca1ca96..b391c6e27ab0 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -265,7 +265,6 @@ enum { struct ad9523_state { struct spi_device *spi; - struct regulator *reg; struct ad9523_platform_data *pdata; struct iio_chan_spec ad9523_channels[AD9523_NUM_CHAN]; struct gpio_desc *pwrdown_gpio; @@ -969,13 +968,6 @@ static int ad9523_setup(struct iio_dev *indio_dev) return 0; } -static void ad9523_reg_disable(void *data) -{ - struct regulator *reg = data; - - regulator_disable(reg); -} - static int ad9523_probe(struct spi_device *spi) { struct ad9523_platform_data *pdata = spi->dev.platform_data; @@ -996,17 +988,9 @@ static int ad9523_probe(struct spi_device *spi) mutex_init(&st->lock); - st->reg = devm_regulator_get(&spi->dev, "vcc"); - if (!IS_ERR(st->reg)) { - ret = regulator_enable(st->reg); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&spi->dev, ad9523_reg_disable, - st->reg); - if (ret) - return ret; - } + ret = devm_regulator_get_enable(&spi->dev, "vcc"); + if (ret) + return ret; st->pwrdown_gpio = devm_gpiod_get_optional(&spi->dev, "powerdown", GPIOD_OUT_HIGH); From 2c97f7b404b8610ebca645d60a6ed1e68d08fbe3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:04 +0100 Subject: [PATCH 2307/4122] iio: humidity: hts211: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Acked-by: Lorenzo Bianconi Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-10-jic23@kernel.org --- drivers/iio/humidity/hts221.h | 2 -- drivers/iio/humidity/hts221_core.c | 27 ++++----------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/iio/humidity/hts221.h b/drivers/iio/humidity/hts221.h index cf3d8d2dccd6..721359e226cb 100644 --- a/drivers/iio/humidity/hts221.h +++ b/drivers/iio/humidity/hts221.h @@ -13,7 +13,6 @@ #define HTS221_DEV_NAME "hts221" #include -#include enum hts221_sensor_type { HTS221_SENSOR_H, @@ -30,7 +29,6 @@ struct hts221_hw { const char *name; struct device *dev; struct regmap *regmap; - struct regulator *vdd; struct iio_trigger *trig; int irq; diff --git a/drivers/iio/humidity/hts221_core.c b/drivers/iio/humidity/hts221_core.c index 517158307d8c..2a413da87b76 100644 --- a/drivers/iio/humidity/hts221_core.c +++ b/drivers/iio/humidity/hts221_core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "hts221.h" @@ -549,33 +550,17 @@ static const unsigned long hts221_scan_masks[] = {0x3, 0x0}; static int hts221_init_regulators(struct device *dev) { - struct iio_dev *iio_dev = dev_get_drvdata(dev); - struct hts221_hw *hw = iio_priv(iio_dev); int err; - hw->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(hw->vdd)) - return dev_err_probe(dev, PTR_ERR(hw->vdd), - "failed to get vdd regulator\n"); - - err = regulator_enable(hw->vdd); - if (err) { - dev_err(dev, "failed to enable vdd regulator: %d\n", err); - return err; - } + err = devm_regulator_get_enable(dev, "vdd"); + if (err) + return dev_err_probe(dev, err, "failed to get vdd regulator\n"); msleep(50); return 0; } -static void hts221_chip_uninit(void *data) -{ - struct hts221_hw *hw = data; - - regulator_disable(hw->vdd); -} - int hts221_probe(struct device *dev, int irq, const char *name, struct regmap *regmap) { @@ -600,10 +585,6 @@ int hts221_probe(struct device *dev, int irq, const char *name, if (err) return err; - err = devm_add_action_or_reset(dev, hts221_chip_uninit, hw); - if (err) - return err; - err = hts221_check_whoami(hw); if (err < 0) return err; From c437c977c1d2116e0f3667de7222544f348032e4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:05 +0100 Subject: [PATCH 2308/4122] iio: light: ltr501: Use devm_regulator_bulk_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power for some regulators on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_bulk_get_enable() replaces all this boilerplate code. Signed-off-by: Jonathan Cameron Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-11-jic23@kernel.org --- drivers/iio/light/ltr501.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 74a1ccda8b9c..453b845ef265 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -153,7 +153,6 @@ struct ltr501_chip_info { struct ltr501_data { struct i2c_client *client; - struct regulator_bulk_data regulators[2]; struct mutex lock_als, lock_ps; const struct ltr501_chip_info *chip_info; u8 als_contr, ps_contr; @@ -1415,13 +1414,6 @@ static const struct regmap_config ltr501_regmap_config = { .volatile_reg = ltr501_is_volatile_reg, }; -static void ltr501_disable_regulators(void *d) -{ - struct ltr501_data *data = d; - - regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators); -} - static int ltr501_powerdown(struct ltr501_data *data) { return ltr501_write_contr(data, data->als_contr & @@ -1443,6 +1435,7 @@ static const char *ltr501_match_acpi_device(struct device *dev, int *chip_idx) static int ltr501_probe(struct i2c_client *client, const struct i2c_device_id *id) { + static const char * const regulator_names[] = { "vdd", "vddio" }; struct ltr501_data *data; struct iio_dev *indio_dev; struct regmap *regmap; @@ -1466,25 +1459,13 @@ static int ltr501_probe(struct i2c_client *client, mutex_init(&data->lock_als); mutex_init(&data->lock_ps); - data->regulators[0].supply = "vdd"; - data->regulators[1].supply = "vddio"; - ret = devm_regulator_bulk_get(&client->dev, - ARRAY_SIZE(data->regulators), - data->regulators); + ret = devm_regulator_bulk_get_enable(&client->dev, + ARRAY_SIZE(regulator_names), + regulator_names); if (ret) return dev_err_probe(&client->dev, ret, "Failed to get regulators\n"); - ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators), - data->regulators); - if (ret) - return ret; - - ret = devm_add_action_or_reset(&client->dev, - ltr501_disable_regulators, data); - if (ret) - return ret; - data->reg_it = devm_regmap_field_alloc(&client->dev, regmap, reg_field_it); if (IS_ERR(data->reg_it)) { From b620be5f32621b953313056c396894818d5a2ed9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:06 +0100 Subject: [PATCH 2309/4122] iio: light: noa1305: Use devm_regulator_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_get_enable() replaces this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Martyn Welch Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-12-jic23@kernel.org --- drivers/iio/light/noa1305.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/drivers/iio/light/noa1305.c b/drivers/iio/light/noa1305.c index ee81fe083e4c..be3536b390fc 100644 --- a/drivers/iio/light/noa1305.c +++ b/drivers/iio/light/noa1305.c @@ -46,7 +46,6 @@ struct noa1305_priv { struct i2c_client *client; struct regmap *regmap; - struct regulator *vin_reg; }; static int noa1305_measure(struct noa1305_priv *priv) @@ -187,13 +186,6 @@ static const struct regmap_config noa1305_regmap_config = { .writeable_reg = noa1305_writable_reg, }; -static void noa1305_reg_remove(void *data) -{ - struct noa1305_priv *priv = data; - - regulator_disable(priv->vin_reg); -} - static int noa1305_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -216,23 +208,11 @@ static int noa1305_probe(struct i2c_client *client, priv = iio_priv(indio_dev); - priv->vin_reg = devm_regulator_get(&client->dev, "vin"); - if (IS_ERR(priv->vin_reg)) - return dev_err_probe(&client->dev, PTR_ERR(priv->vin_reg), + ret = devm_regulator_get_enable(&client->dev, "vin"); + if (ret) + return dev_err_probe(&client->dev, ret, "get regulator vin failed\n"); - ret = regulator_enable(priv->vin_reg); - if (ret) { - dev_err(&client->dev, "enable regulator vin failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(&client->dev, noa1305_reg_remove, priv); - if (ret) { - dev_err(&client->dev, "addition of devm action failed\n"); - return ret; - } - i2c_set_clientdata(client, indio_dev); priv->client = client; priv->regmap = regmap; From 1db96143df6b81a402e9b88d08f04374f1353ed0 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 17:34:07 +0100 Subject: [PATCH 2310/4122] iio: proximity: sx_common: Use devm_regulator_bulk_get_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver only turns the power for some regulators on at probe and off via a custom devm_add_action_or_reset() callback. The new devm_regulator_bulk_get_enable() replaces all this boilerplate code. Signed-off-by: Jonathan Cameron Cc: Gwendal Grignou Reviewed-by: Stephen Boyd Reviewed-by: Matti Vaittinen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016163409.320197-13-jic23@kernel.org --- drivers/iio/proximity/sx_common.c | 23 +++-------------------- drivers/iio/proximity/sx_common.h | 2 -- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/iio/proximity/sx_common.c b/drivers/iio/proximity/sx_common.c index d70a6b4f0bf8..eba9256730ec 100644 --- a/drivers/iio/proximity/sx_common.c +++ b/drivers/iio/proximity/sx_common.c @@ -424,13 +424,6 @@ static const struct iio_buffer_setup_ops sx_common_buffer_setup_ops = { .postdisable = sx_common_buffer_postdisable, }; -static void sx_common_regulator_disable(void *_data) -{ - struct sx_common_data *data = _data; - - regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies); -} - #define SX_COMMON_SOFT_RESET 0xde static int sx_common_init_device(struct device *dev, struct iio_dev *indio_dev) @@ -474,6 +467,7 @@ int sx_common_probe(struct i2c_client *client, const struct sx_common_chip_info *chip_info, const struct regmap_config *regmap_config) { + static const char * const regulator_names[] = { "vdd", "svdd" }; struct device *dev = &client->dev; struct iio_dev *indio_dev; struct sx_common_data *data; @@ -487,8 +481,6 @@ int sx_common_probe(struct i2c_client *client, data->chip_info = chip_info; data->client = client; - data->supplies[0].supply = "vdd"; - data->supplies[1].supply = "svdd"; mutex_init(&data->mutex); init_completion(&data->completion); @@ -497,23 +489,14 @@ int sx_common_probe(struct i2c_client *client, return dev_err_probe(dev, PTR_ERR(data->regmap), "Could init register map\n"); - ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(data->supplies), - data->supplies); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names), + regulator_names); if (ret) return dev_err_probe(dev, ret, "Unable to get regulators\n"); - ret = regulator_bulk_enable(ARRAY_SIZE(data->supplies), data->supplies); - if (ret) - return dev_err_probe(dev, ret, "Unable to enable regulators\n"); - /* Must wait for Tpor time after initial power up */ usleep_range(1000, 1100); - ret = devm_add_action_or_reset(dev, sx_common_regulator_disable, data); - if (ret) - return dev_err_probe(dev, ret, - "Unable to register regulators deleter\n"); - ret = data->chip_info->ops.check_whoami(dev, indio_dev); if (ret) return dev_err_probe(dev, ret, "error reading WHOAMI\n"); diff --git a/drivers/iio/proximity/sx_common.h b/drivers/iio/proximity/sx_common.h index 5d3edeb75f4e..49d4517103b0 100644 --- a/drivers/iio/proximity/sx_common.h +++ b/drivers/iio/proximity/sx_common.h @@ -102,7 +102,6 @@ struct sx_common_chip_info { * @trig: IIO trigger object. * @regmap: Register map. * @num_default_regs: Number of default registers to set at init. - * @supplies: Power supplies object. * @chan_prox_stat: Last reading of the proximity status for each channel. * We only send an event to user space when this changes. * @trigger_enabled: True when the device trigger is enabled. @@ -120,7 +119,6 @@ struct sx_common_data { struct iio_trigger *trig; struct regmap *regmap; - struct regulator_bulk_data supplies[2]; unsigned long chan_prox_stat; bool trigger_enabled; From ecff5cb471b5dc20b996378c0533bd5f34e85c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:40 +0200 Subject: [PATCH 2311/4122] iio: accel: adxl367: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-2-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c index 3606efa25835..070aad724abd 100644 --- a/drivers/iio/accel/adxl367_i2c.c +++ b/drivers/iio/accel/adxl367_i2c.c @@ -41,8 +41,7 @@ static const struct adxl367_ops adxl367_i2c_ops = { .read_fifo = adxl367_i2c_read_fifo, }; -static int adxl367_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adxl367_i2c_probe(struct i2c_client *client) { struct adxl367_i2c_state *st; struct regmap *regmap; @@ -78,7 +77,7 @@ static struct i2c_driver adxl367_i2c_driver = { .name = "adxl367_i2c", .of_match_table = adxl367_of_match, }, - .probe = adxl367_i2c_probe, + .probe_new = adxl367_i2c_probe, .id_table = adxl367_i2c_id, }; From a9e38f1ecc461c43aa26849394c552e38593ad38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:45 +0200 Subject: [PATCH 2312/4122] iio: accel: da311: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-7-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/da311.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/da311.c b/drivers/iio/accel/da311.c index ec4e29d260f7..080335fa2ad6 100644 --- a/drivers/iio/accel/da311.c +++ b/drivers/iio/accel/da311.c @@ -217,8 +217,7 @@ static void da311_disable(void *client) da311_enable(client, false); } -static int da311_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int da311_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -279,7 +278,7 @@ static struct i2c_driver da311_driver = { .name = "da311", .pm = pm_sleep_ptr(&da311_pm_ops), }, - .probe = da311_probe, + .probe_new = da311_probe, .id_table = da311_i2c_id, }; From 72907238b193be0fc18f2859b7f4c6741b73aedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:46 +0200 Subject: [PATCH 2313/4122] iio: accel: dmard06: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-8-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/dmard06.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/dmard06.c b/drivers/iio/accel/dmard06.c index 4b69c8530f5e..7390509aaac0 100644 --- a/drivers/iio/accel/dmard06.c +++ b/drivers/iio/accel/dmard06.c @@ -125,8 +125,7 @@ static const struct iio_info dmard06_info = { .read_raw = dmard06_read_raw, }; -static int dmard06_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dmard06_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -218,7 +217,7 @@ static const struct of_device_id dmard06_of_match[] = { MODULE_DEVICE_TABLE(of, dmard06_of_match); static struct i2c_driver dmard06_driver = { - .probe = dmard06_probe, + .probe_new = dmard06_probe, .id_table = dmard06_id, .driver = { .name = DMARD06_DRV_NAME, From d61f79d383f548df8b3b9da58ab687c7db1a93c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:47 +0200 Subject: [PATCH 2314/4122] iio: accel: dmard09: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-9-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/dmard09.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/dmard09.c b/drivers/iio/accel/dmard09.c index cb0246ca72f3..4b7a537f617d 100644 --- a/drivers/iio/accel/dmard09.c +++ b/drivers/iio/accel/dmard09.c @@ -88,8 +88,7 @@ static const struct iio_info dmard09_info = { .read_raw = dmard09_read_raw, }; -static int dmard09_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dmard09_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -136,7 +135,7 @@ static struct i2c_driver dmard09_driver = { .driver = { .name = DMARD09_DRV_NAME }, - .probe = dmard09_probe, + .probe_new = dmard09_probe, .id_table = dmard09_id, }; From 5019025fc3a9869c438ad33ab18a4fdad46a996d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:48 +0200 Subject: [PATCH 2315/4122] iio: accel: dmard10: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-10-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/dmard10.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/dmard10.c b/drivers/iio/accel/dmard10.c index 8ac62ec0a04a..07e68aed8a13 100644 --- a/drivers/iio/accel/dmard10.c +++ b/drivers/iio/accel/dmard10.c @@ -175,8 +175,7 @@ static void dmard10_shutdown_cleanup(void *client) dmard10_shutdown(client); } -static int dmard10_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dmard10_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -242,7 +241,7 @@ static struct i2c_driver dmard10_driver = { .name = "dmard10", .pm = pm_sleep_ptr(&dmard10_pm_ops), }, - .probe = dmard10_probe, + .probe_new = dmard10_probe, .id_table = dmard10_i2c_id, }; From 2ba423fefb1c87d080e3a7964ccfa7e929ed6f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:50 +0200 Subject: [PATCH 2316/4122] iio: accel: kxsd9: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-12-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxsd9-i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c index 61346ea8ef19..6b3683ddce36 100644 --- a/drivers/iio/accel/kxsd9-i2c.c +++ b/drivers/iio/accel/kxsd9-i2c.c @@ -10,8 +10,7 @@ #include "kxsd9.h" -static int kxsd9_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int kxsd9_i2c_probe(struct i2c_client *i2c) { static const struct regmap_config config = { .reg_bits = 8, @@ -55,7 +54,7 @@ static struct i2c_driver kxsd9_i2c_driver = { .of_match_table = kxsd9_of_match, .pm = pm_ptr(&kxsd9_dev_pm_ops), }, - .probe = kxsd9_i2c_probe, + .probe_new = kxsd9_i2c_probe, .remove = kxsd9_i2c_remove, .id_table = kxsd9_i2c_id, }; From ded7a4f8f90286e63eefcd32443ac911d926e118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:51 +0200 Subject: [PATCH 2317/4122] iio: accel: mc3230: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-13-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mc3230.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c index 2462000e0519..efc21871de42 100644 --- a/drivers/iio/accel/mc3230.c +++ b/drivers/iio/accel/mc3230.c @@ -106,8 +106,7 @@ static const struct iio_info mc3230_info = { .read_raw = mc3230_read_raw, }; -static int mc3230_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mc3230_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -191,7 +190,7 @@ static struct i2c_driver mc3230_driver = { .name = "mc3230", .pm = pm_sleep_ptr(&mc3230_pm_ops), }, - .probe = mc3230_probe, + .probe_new = mc3230_probe, .remove = mc3230_remove, .id_table = mc3230_i2c_id, }; From 76403ea698adffb6661e083545f21e50fef5b3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:53 +0200 Subject: [PATCH 2318/4122] iio: accel: mma7660: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-15-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma7660.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma7660.c b/drivers/iio/accel/mma7660.c index 85829990bbad..b279ca4dcdc0 100644 --- a/drivers/iio/accel/mma7660.c +++ b/drivers/iio/accel/mma7660.c @@ -169,8 +169,7 @@ static const struct iio_info mma7660_info = { .attrs = &mma7660_attribute_group, }; -static int mma7660_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mma7660_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -267,7 +266,7 @@ static struct i2c_driver mma7660_driver = { .of_match_table = mma7660_of_match, .acpi_match_table = mma7660_acpi_id, }, - .probe = mma7660_probe, + .probe_new = mma7660_probe, .remove = mma7660_remove, .id_table = mma7660_i2c_id, }; From a97d9d95a188d21f5d1cbedbbc7c990b00bf5744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:57 +0200 Subject: [PATCH 2319/4122] iio: accel: mxc4005: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-19-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mxc4005.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index df600d2917c0..b146fc82738f 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -385,8 +385,7 @@ static int mxc4005_chip_init(struct mxc4005_data *data) return 0; } -static int mxc4005_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mxc4005_probe(struct i2c_client *client) { struct mxc4005_data *data; struct iio_dev *indio_dev; @@ -489,7 +488,7 @@ static struct i2c_driver mxc4005_driver = { .name = MXC4005_DRV_NAME, .acpi_match_table = ACPI_PTR(mxc4005_acpi_match), }, - .probe = mxc4005_probe, + .probe_new = mxc4005_probe, .id_table = mxc4005_id, }; From 76e4a118c85a7e93ff5264ff8d92f76f675be352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:58 +0200 Subject: [PATCH 2320/4122] iio: accel: mxc6255: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-20-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mxc6255.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mxc6255.c b/drivers/iio/accel/mxc6255.c index 9aeeadc420d3..aa2e660545f8 100644 --- a/drivers/iio/accel/mxc6255.c +++ b/drivers/iio/accel/mxc6255.c @@ -113,8 +113,7 @@ static const struct regmap_config mxc6255_regmap_config = { .readable_reg = mxc6255_is_readable_reg, }; -static int mxc6255_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mxc6255_probe(struct i2c_client *client) { struct mxc6255_data *data; struct iio_dev *indio_dev; @@ -184,7 +183,7 @@ static struct i2c_driver mxc6255_driver = { .name = MXC6255_DRV_NAME, .acpi_match_table = ACPI_PTR(mxc6255_acpi_match), }, - .probe = mxc6255_probe, + .probe_new = mxc6255_probe, .id_table = mxc6255_id, }; From 3d8a49ebd6576b7803181e0a9fa4e7930f011f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:22:59 +0200 Subject: [PATCH 2321/4122] iio: accel: stk8312: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-21-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/stk8312.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/stk8312.c b/drivers/iio/accel/stk8312.c index 7b1d6fb692b3..68f680db7505 100644 --- a/drivers/iio/accel/stk8312.c +++ b/drivers/iio/accel/stk8312.c @@ -498,8 +498,7 @@ static const struct iio_buffer_setup_ops stk8312_buffer_setup_ops = { .postdisable = stk8312_buffer_postdisable, }; -static int stk8312_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int stk8312_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -645,7 +644,7 @@ static struct i2c_driver stk8312_driver = { .name = STK8312_DRIVER_NAME, .pm = pm_sleep_ptr(&stk8312_pm_ops), }, - .probe = stk8312_probe, + .probe_new = stk8312_probe, .remove = stk8312_remove, .id_table = stk8312_i2c_id, }; From d18e70608e01a5845fd75e7ec1956ce422b07184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:23:00 +0200 Subject: [PATCH 2322/4122] iio: accel: stk8ba50: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-22-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/accel/stk8ba50.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/stk8ba50.c b/drivers/iio/accel/stk8ba50.c index 2f5e4ab2a6e7..44f6e0fbdfcc 100644 --- a/drivers/iio/accel/stk8ba50.c +++ b/drivers/iio/accel/stk8ba50.c @@ -379,8 +379,7 @@ static const struct iio_buffer_setup_ops stk8ba50_buffer_setup_ops = { .postdisable = stk8ba50_buffer_postdisable, }; -static int stk8ba50_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int stk8ba50_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -544,7 +543,7 @@ static struct i2c_driver stk8ba50_driver = { .pm = pm_sleep_ptr(&stk8ba50_pm_ops), .acpi_match_table = ACPI_PTR(stk8ba50_acpi_id), }, - .probe = stk8ba50_probe, + .probe_new = stk8ba50_probe, .remove = stk8ba50_remove, .id_table = stk8ba50_i2c_id, }; From 9492c00d83ee8b2b141e29acbcd3e56dd11aa436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:23:01 +0200 Subject: [PATCH 2323/4122] iio: accel: st_magn: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-23-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c index c5d8c303db4e..b4098d3b3813 100644 --- a/drivers/iio/magnetometer/st_magn_i2c.c +++ b/drivers/iio/magnetometer/st_magn_i2c.c @@ -54,8 +54,7 @@ static const struct of_device_id st_magn_of_match[] = { }; MODULE_DEVICE_TABLE(of, st_magn_of_match); -static int st_magn_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_magn_i2c_probe(struct i2c_client *client) { const struct st_sensor_settings *settings; struct st_sensor_data *mdata; @@ -107,7 +106,7 @@ static struct i2c_driver st_magn_driver = { .name = "st-magn-i2c", .of_match_table = st_magn_of_match, }, - .probe = st_magn_i2c_probe, + .probe_new = st_magn_i2c_probe, .id_table = st_magn_id_table, }; module_i2c_driver(st_magn_driver); From 2ca0b16f6ce23cf586c27a9139cf426b3f64d242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Oct 2022 15:23:02 +0200 Subject: [PATCH 2324/4122] iio: accel: vl6180: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221023132302.911644-24-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/light/vl6180.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/vl6180.c b/drivers/iio/light/vl6180.c index d47a4f6f4e87..8b56df26c59e 100644 --- a/drivers/iio/light/vl6180.c +++ b/drivers/iio/light/vl6180.c @@ -493,8 +493,7 @@ static int vl6180_init(struct vl6180_data *data) return vl6180_hold(data, false); } -static int vl6180_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int vl6180_probe(struct i2c_client *client) { struct vl6180_data *data; struct iio_dev *indio_dev; @@ -539,7 +538,7 @@ static struct i2c_driver vl6180_driver = { .name = VL6180_DRV_NAME, .of_match_table = vl6180_of_match, }, - .probe = vl6180_probe, + .probe_new = vl6180_probe, .id_table = vl6180_id, }; From a95ccebde759f1f6a6070819cd43dce1ad06750d Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Mon, 24 Oct 2022 12:22:05 +0200 Subject: [PATCH 2325/4122] iio: temperature: mlx90632 Style alignment for the driver Changing and aligning the overall style of the driver with the recent reviews. There is no functional change, only type generalization and moving to the reverse Christmas tree for variable declarations. Reviewed-by: Andy Shevchenko Signed-off-by: Crt Mori Link: https://lore.kernel.org/r/d59aad00891c1a64e044a0f5bc7d40e42d47e9c7.1666606912.git.cmo@melexis.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 65 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 224db7513baa..a17fe5f4967a 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -219,11 +219,11 @@ static const struct regmap_config mlx90632_regmap = { .cache_type = REGCACHE_RBTREE, }; -static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap) +static int mlx90632_pwr_set_sleep_step(struct regmap *regmap) { struct mlx90632_data *data = iio_priv(dev_get_drvdata(regmap_get_device(regmap))); - s32 ret; + int ret; if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) return 0; @@ -234,14 +234,14 @@ static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap) return ret; data->powerstatus = MLX90632_PWR_STATUS_SLEEP_STEP; - return ret; + return 0; } -static s32 mlx90632_pwr_continuous(struct regmap *regmap) +static int mlx90632_pwr_continuous(struct regmap *regmap) { struct mlx90632_data *data = iio_priv(dev_get_drvdata(regmap_get_device(regmap))); - s32 ret; + int ret; if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) return 0; @@ -252,7 +252,7 @@ static s32 mlx90632_pwr_continuous(struct regmap *regmap) return ret; data->powerstatus = MLX90632_PWR_STATUS_CONTINUOUS; - return ret; + return 0; } /** @@ -444,8 +444,8 @@ static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new, static int mlx90632_read_ambient_raw(struct regmap *regmap, s16 *ambient_new_raw, s16 *ambient_old_raw) { - int ret; unsigned int read_tmp; + int ret; ret = regmap_read(regmap, MLX90632_RAM_3(1), &read_tmp); if (ret < 0) @@ -464,11 +464,11 @@ static int mlx90632_read_object_raw(struct regmap *regmap, int perform_measurement_ret, s16 *object_new_raw, s16 *object_old_raw) { - int ret; unsigned int read_tmp; - s16 read; - u8 channel = 0; u8 channel_old = 0; + u8 channel = 0; + s16 read; + int ret; ret = mlx90632_channel_new_select(perform_measurement_ret, &channel, &channel_old); @@ -503,7 +503,8 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data, s16 *ambient_new_raw, s16 *ambient_old_raw, s16 *object_new_raw, s16 *object_old_raw) { - s32 ret, measurement; + s32 measurement; + int ret; mutex_lock(&data->lock); ret = mlx90632_set_meas_type(data, MLX90632_MTYP_MEDICAL); @@ -512,24 +513,24 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data, switch (data->powerstatus) { case MLX90632_PWR_STATUS_CONTINUOUS: - measurement = mlx90632_perform_measurement(data); - if (measurement < 0) { - ret = measurement; + ret = mlx90632_perform_measurement(data); + if (ret < 0) goto read_unlock; - } + break; case MLX90632_PWR_STATUS_SLEEP_STEP: - measurement = mlx90632_perform_measurement_burst(data); - if (measurement < 0) { - ret = measurement; + ret = mlx90632_perform_measurement_burst(data); + if (ret < 0) goto read_unlock; - } + break; default: ret = -EOPNOTSUPP; goto read_unlock; } + measurement = ret; /* If we came here ret holds the measurement position */ + ret = mlx90632_read_ambient_raw(data->regmap, ambient_new_raw, ambient_old_raw); if (ret < 0) @@ -615,15 +616,21 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o if (ret < 0) goto read_unlock; - if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) { + switch (data->powerstatus) { + case MLX90632_PWR_STATUS_CONTINUOUS: ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19, 50000, 800000, false, data); if (ret) goto read_unlock; - } else if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) { + break; + case MLX90632_PWR_STATUS_SLEEP_STEP: ret = mlx90632_perform_measurement_burst(data); if (ret < 0) goto read_unlock; + break; + default: + ret = -EOPNOTSUPP; + goto read_unlock; } ret = mlx90632_read_object_raw_extended(data->regmap, object_new_raw); @@ -640,9 +647,9 @@ read_unlock: static int mlx90632_read_ee_register(struct regmap *regmap, u16 reg_lsb, s32 *reg_value) { - s32 ret; unsigned int read; u32 value; + int ret; ret = regmap_read(regmap, reg_lsb, &read); if (ret < 0) @@ -806,12 +813,12 @@ static s32 mlx90632_calc_temp_object_extended(s64 object, s64 ambient, s64 refle static int mlx90632_calc_object_dsp105(struct mlx90632_data *data, int *val) { - s32 ret; + s16 ambient_new_raw, ambient_old_raw, object_new_raw, object_old_raw; s32 Ea, Eb, Fa, Fb, Ga; unsigned int read_tmp; - s16 Ha, Hb, Gb, Ka; - s16 ambient_new_raw, ambient_old_raw, object_new_raw, object_old_raw; s64 object, ambient; + s16 Ha, Hb, Gb, Ka; + int ret; ret = mlx90632_read_ee_register(data->regmap, MLX90632_EE_Ea, &Ea); if (ret < 0) @@ -885,11 +892,11 @@ static int mlx90632_calc_object_dsp105(struct mlx90632_data *data, int *val) static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val) { - s32 ret; + s16 ambient_new_raw, ambient_old_raw; unsigned int read_tmp; s32 PT, PR, PG, PO; + int ret; s16 Gb; - s16 ambient_new_raw, ambient_old_raw; ret = mlx90632_read_ee_register(data->regmap, MLX90632_EE_P_R, &PR); if (ret < 0) @@ -1164,11 +1171,11 @@ static int mlx90632_enable_regulator(struct mlx90632_data *data) static int mlx90632_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct iio_dev *indio_dev; struct mlx90632_data *mlx90632; + struct iio_dev *indio_dev; struct regmap *regmap; - int ret; unsigned int read; + int ret; indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*mlx90632)); if (!indio_dev) { From f7626504432a6d30178bc6946d50a9d4a1bb4e31 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 24 Oct 2022 18:52:50 +0800 Subject: [PATCH 2326/4122] iio: trigger: sysfs: rename error label in iio_sysfs_trigger_probe() Rename error label in iio_sysfs_trigger_probe() to make more readable. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221024105250.873394-1-yangyingliang@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/iio-trig-sysfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index d6c5e9644738..63ce01ac2036 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -138,18 +138,18 @@ static int iio_sysfs_trigger_probe(int id) } if (foundit) { ret = -EINVAL; - goto out1; + goto err_unlock; } t = kmalloc(sizeof(*t), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; - goto out1; + goto err_unlock; } t->id = id; t->trig = iio_trigger_alloc(&iio_sysfs_trig_dev, "sysfstrig%d", id); if (!t->trig) { ret = -ENOMEM; - goto free_t; + goto err_free_sys_trig; } t->trig->dev.groups = iio_sysfs_trigger_attr_groups; @@ -159,17 +159,17 @@ static int iio_sysfs_trigger_probe(int id) ret = iio_trigger_register(t->trig); if (ret) - goto out2; + goto err_free_trig; list_add(&t->l, &iio_sysfs_trig_list); __module_get(THIS_MODULE); mutex_unlock(&iio_sysfs_trig_list_mut); return 0; -out2: +err_free_trig: iio_trigger_free(t->trig); -free_t: +err_free_sys_trig: kfree(t); -out1: +err_unlock: mutex_unlock(&iio_sysfs_trig_list_mut); return ret; } From b52e2f19f80240365d7eaa3fdd320afcf14cf4c0 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 24 Oct 2022 15:40:11 +0300 Subject: [PATCH 2327/4122] dt-bindings: iio: Add KX022A accelerometer KX022A is a 3-axis Accelerometer from ROHM/Kionix. The sensor features include variable ODRs, I2C and SPI control, FIFO/LIFO with watermark IRQ, tap/motion detection, wake-up & back-to-sleep events, four acceleration ranges (2, 4, 8 and 16g) and probably some other cool features. Add the basic device tree description for the accelerometer. Only basic accelerometer features are considered as of now - new properties may or may not be needed in the future when rest of the features are supported. Signed-off-by: Matti Vaittinen Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/06f8e1ab29d02ed216db10091a269df4b6abad9a.1666614295.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/accel/kionix,kx022a.yaml | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml diff --git a/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml new file mode 100644 index 000000000000..986df1a6ff0a --- /dev/null +++ b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/accel/kionix,kx022a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ROHM/Kionix KX022A Accelerometer + +maintainers: + - Matti Vaittinen + +description: | + KX022A is a 3-axis accelerometer supporting +/- 2G, 4G, 8G and 16G ranges, + output data-rates from 0.78Hz to 1600Hz and a hardware-fifo buffering. + KX022A can be accessed either via I2C or SPI. + +properties: + compatible: + const: kionix,kx022a + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 2 + + interrupt-names: + minItems: 1 + items: + - enum: [INT1, INT2] + - const: INT2 + + vdd-supply: true + io-vdd-supply: true + + mount-matrix: + description: | + an optional 3x3 mounting rotation matrix. + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + accel@1f { + compatible = "kionix,kx022a"; + reg = <0x1f>; + + interrupt-parent = <&gpio1>; + interrupts = <29 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "INT1"; + + io-vdd-supply = <&iovdd>; + vdd-supply = <&vdd>; + }; + }; From 7c1d1677b3227c6b18ac999f2b84778baa280b8f Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 24 Oct 2022 15:40:29 +0300 Subject: [PATCH 2328/4122] iio: accel: Support Kionix/ROHM KX022A accelerometer KX022A is a 3-axis accelerometer from ROHM/Kionix. The sensor features include variable ODRs, I2C and SPI control, FIFO/LIFO with watermark IRQ, tap/motion detection, wake-up & back-to-sleep events, four acceleration ranges (2, 4, 8 and 16g), and probably some other cool features. Add support for the basic accelerometer features such as getting the acceleration data via IIO. (raw reads, triggered buffer [data-ready] or using the WMI IRQ). Important things to be added include the double-tap, motion detection and wake-up as well as the runtime power management. Signed-off-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/758b00d6aea0a6431a5a3a78d557d449c113b21e.1666614295.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 21 + drivers/iio/accel/Makefile | 3 + drivers/iio/accel/kionix-kx022a-i2c.c | 51 ++ drivers/iio/accel/kionix-kx022a-spi.c | 58 ++ drivers/iio/accel/kionix-kx022a.c | 1142 +++++++++++++++++++++++++ drivers/iio/accel/kionix-kx022a.h | 82 ++ 6 files changed, 1357 insertions(+) create mode 100644 drivers/iio/accel/kionix-kx022a-i2c.c create mode 100644 drivers/iio/accel/kionix-kx022a-spi.c create mode 100644 drivers/iio/accel/kionix-kx022a.c create mode 100644 drivers/iio/accel/kionix-kx022a.h diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index ffac66db7ac9..03ac410c162e 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -409,6 +409,27 @@ config IIO_ST_ACCEL_SPI_3AXIS To compile this driver as a module, choose M here. The module will be called st_accel_spi. +config IIO_KX022A + tristate + +config IIO_KX022A_SPI + tristate "Kionix KX022A tri-axis digital accelerometer SPI interface" + depends on SPI + select IIO_KX022A + select REGMAP_SPI + help + Enable support for the Kionix KX022A digital tri-axis + accelerometer connected to I2C interface. + +config IIO_KX022A_I2C + tristate "Kionix KX022A tri-axis digital accelerometer I2C interface" + depends on I2C + select IIO_KX022A + select REGMAP_I2C + help + Enable support for the Kionix KX022A digital tri-axis + accelerometer connected to I2C interface. + config KXSD9 tristate "Kionix KXSD9 Accelerometer Driver" select IIO_BUFFER diff --git a/drivers/iio/accel/Makefile b/drivers/iio/accel/Makefile index 5e45b5fa5ab5..311ead9c3ef1 100644 --- a/drivers/iio/accel/Makefile +++ b/drivers/iio/accel/Makefile @@ -40,6 +40,9 @@ obj-$(CONFIG_FXLS8962AF) += fxls8962af-core.o obj-$(CONFIG_FXLS8962AF_I2C) += fxls8962af-i2c.o obj-$(CONFIG_FXLS8962AF_SPI) += fxls8962af-spi.o obj-$(CONFIG_HID_SENSOR_ACCEL_3D) += hid-sensor-accel-3d.o +obj-$(CONFIG_IIO_KX022A) += kionix-kx022a.o +obj-$(CONFIG_IIO_KX022A_I2C) += kionix-kx022a-i2c.o +obj-$(CONFIG_IIO_KX022A_SPI) += kionix-kx022a-spi.o obj-$(CONFIG_KXCJK1013) += kxcjk-1013.o obj-$(CONFIG_KXSD9) += kxsd9.o obj-$(CONFIG_KXSD9_SPI) += kxsd9-spi.o diff --git a/drivers/iio/accel/kionix-kx022a-i2c.c b/drivers/iio/accel/kionix-kx022a-i2c.c new file mode 100644 index 000000000000..e6fd02d931b6 --- /dev/null +++ b/drivers/iio/accel/kionix-kx022a-i2c.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ROHM Semiconductors + * + * ROHM/KIONIX KX022A accelerometer driver + */ + +#include +#include +#include +#include + +#include "kionix-kx022a.h" + +static int kx022a_i2c_probe(struct i2c_client *i2c) +{ + struct device *dev = &i2c->dev; + struct regmap *regmap; + + if (!i2c->irq) { + dev_err(dev, "No IRQ configured\n"); + return -EINVAL; + } + + regmap = devm_regmap_init_i2c(i2c, &kx022a_regmap); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), + "Failed to initialize Regmap\n"); + + return kx022a_probe_internal(dev); +} + +static const struct of_device_id kx022a_of_match[] = { + { .compatible = "kionix,kx022a", }, + { } +}; +MODULE_DEVICE_TABLE(of, kx022a_of_match); + +static struct i2c_driver kx022a_i2c_driver = { + .driver = { + .name = "kx022a-i2c", + .of_match_table = kx022a_of_match, + }, + .probe_new = kx022a_i2c_probe, +}; +module_i2c_driver(kx022a_i2c_driver); + +MODULE_DESCRIPTION("ROHM/Kionix KX022A accelerometer driver"); +MODULE_AUTHOR("Matti Vaittinen "); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_KX022A); diff --git a/drivers/iio/accel/kionix-kx022a-spi.c b/drivers/iio/accel/kionix-kx022a-spi.c new file mode 100644 index 000000000000..9cd047f7b346 --- /dev/null +++ b/drivers/iio/accel/kionix-kx022a-spi.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ROHM Semiconductors + * + * ROHM/KIONIX KX022A accelerometer driver + */ + +#include +#include +#include +#include + +#include "kionix-kx022a.h" + +static int kx022a_spi_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct regmap *regmap; + + if (!spi->irq) { + dev_err(dev, "No IRQ configured\n"); + return -EINVAL; + } + + regmap = devm_regmap_init_spi(spi, &kx022a_regmap); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), + "Failed to initialize Regmap\n"); + + return kx022a_probe_internal(dev); +} + +static const struct spi_device_id kx022a_id[] = { + { "kx022a" }, + { } +}; +MODULE_DEVICE_TABLE(spi, kx022a_id); + +static const struct of_device_id kx022a_of_match[] = { + { .compatible = "kionix,kx022a", }, + { } +}; +MODULE_DEVICE_TABLE(of, kx022a_of_match); + +static struct spi_driver kx022a_spi_driver = { + .driver = { + .name = "kx022a-spi", + .of_match_table = kx022a_of_match, + }, + .probe = kx022a_spi_probe, + .id_table = kx022a_id, +}; +module_spi_driver(kx022a_spi_driver); + +MODULE_DESCRIPTION("ROHM/Kionix kx022A accelerometer driver"); +MODULE_AUTHOR("Matti Vaittinen "); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IIO_KX022A); diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c new file mode 100644 index 000000000000..50de7f9a1cc7 --- /dev/null +++ b/drivers/iio/accel/kionix-kx022a.c @@ -0,0 +1,1142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ROHM Semiconductors + * + * ROHM/KIONIX KX022A accelerometer driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "kionix-kx022a.h" + +/* + * The KX022A has FIFO which can store 43 samples of HiRes data from 2 + * channels. This equals to 43 (samples) * 3 (channels) * 2 (bytes/sample) to + * 258 bytes of sample data. The quirk to know is that the amount of bytes in + * the FIFO is advertised via 8 bit register (max value 255). The thing to note + * is that full 258 bytes of data is indicated using the max value 255. + */ +#define KX022A_FIFO_LENGTH 43 +#define KX022A_FIFO_FULL_VALUE 255 +#define KX022A_SOFT_RESET_WAIT_TIME_US (5 * USEC_PER_MSEC) +#define KX022A_SOFT_RESET_TOTAL_WAIT_TIME_US (500 * USEC_PER_MSEC) + +/* 3 axis, 2 bytes of data for each of the axis */ +#define KX022A_FIFO_SAMPLES_SIZE_BYTES 6 +#define KX022A_FIFO_MAX_BYTES \ + (KX022A_FIFO_LENGTH * KX022A_FIFO_SAMPLES_SIZE_BYTES) + +enum { + KX022A_STATE_SAMPLE, + KX022A_STATE_FIFO, +}; + +/* Regmap configs */ +static const struct regmap_range kx022a_volatile_ranges[] = { + { + .range_min = KX022A_REG_XHP_L, + .range_max = KX022A_REG_COTR, + }, { + .range_min = KX022A_REG_TSCP, + .range_max = KX022A_REG_INT_REL, + }, { + /* The reset bit will be cleared by sensor */ + .range_min = KX022A_REG_CNTL2, + .range_max = KX022A_REG_CNTL2, + }, { + .range_min = KX022A_REG_BUF_STATUS_1, + .range_max = KX022A_REG_BUF_READ, + }, +}; + +static const struct regmap_access_table kx022a_volatile_regs = { + .yes_ranges = &kx022a_volatile_ranges[0], + .n_yes_ranges = ARRAY_SIZE(kx022a_volatile_ranges), +}; + +static const struct regmap_range kx022a_precious_ranges[] = { + { + .range_min = KX022A_REG_INT_REL, + .range_max = KX022A_REG_INT_REL, + }, +}; + +static const struct regmap_access_table kx022a_precious_regs = { + .yes_ranges = &kx022a_precious_ranges[0], + .n_yes_ranges = ARRAY_SIZE(kx022a_precious_ranges), +}; + +/* + * The HW does not set WHO_AM_I reg as read-only but we don't want to write it + * so we still include it in the read-only ranges. + */ +static const struct regmap_range kx022a_read_only_ranges[] = { + { + .range_min = KX022A_REG_XHP_L, + .range_max = KX022A_REG_INT_REL, + }, { + .range_min = KX022A_REG_BUF_STATUS_1, + .range_max = KX022A_REG_BUF_STATUS_2, + }, { + .range_min = KX022A_REG_BUF_READ, + .range_max = KX022A_REG_BUF_READ, + }, +}; + +static const struct regmap_access_table kx022a_ro_regs = { + .no_ranges = &kx022a_read_only_ranges[0], + .n_no_ranges = ARRAY_SIZE(kx022a_read_only_ranges), +}; + +static const struct regmap_range kx022a_write_only_ranges[] = { + { + .range_min = KX022A_REG_BTS_WUF_TH, + .range_max = KX022A_REG_BTS_WUF_TH, + }, { + .range_min = KX022A_REG_MAN_WAKE, + .range_max = KX022A_REG_MAN_WAKE, + }, { + .range_min = KX022A_REG_SELF_TEST, + .range_max = KX022A_REG_SELF_TEST, + }, { + .range_min = KX022A_REG_BUF_CLEAR, + .range_max = KX022A_REG_BUF_CLEAR, + }, +}; + +static const struct regmap_access_table kx022a_wo_regs = { + .no_ranges = &kx022a_write_only_ranges[0], + .n_no_ranges = ARRAY_SIZE(kx022a_write_only_ranges), +}; + +static const struct regmap_range kx022a_noinc_read_ranges[] = { + { + .range_min = KX022A_REG_BUF_READ, + .range_max = KX022A_REG_BUF_READ, + }, +}; + +static const struct regmap_access_table kx022a_nir_regs = { + .yes_ranges = &kx022a_noinc_read_ranges[0], + .n_yes_ranges = ARRAY_SIZE(kx022a_noinc_read_ranges), +}; + +const struct regmap_config kx022a_regmap = { + .reg_bits = 8, + .val_bits = 8, + .volatile_table = &kx022a_volatile_regs, + .rd_table = &kx022a_wo_regs, + .wr_table = &kx022a_ro_regs, + .rd_noinc_table = &kx022a_nir_regs, + .precious_table = &kx022a_precious_regs, + .max_register = KX022A_MAX_REGISTER, + .cache_type = REGCACHE_RBTREE, +}; +EXPORT_SYMBOL_NS_GPL(kx022a_regmap, IIO_KX022A); + +struct kx022a_data { + struct regmap *regmap; + struct iio_trigger *trig; + struct device *dev; + struct iio_mount_matrix orientation; + int64_t timestamp, old_timestamp; + + int irq; + int inc_reg; + int ien_reg; + + unsigned int g_range; + unsigned int state; + unsigned int odr_ns; + + bool trigger_enabled; + /* + * Prevent toggling the sensor stby/active state (PC1 bit) in the + * middle of a configuration, or when the fifo is enabled. Also, + * protect the data stored/retrieved from this structure from + * concurrent accesses. + */ + struct mutex mutex; + u8 watermark; + + /* 3 x 16bit accel data + timestamp */ + __le16 buffer[8] __aligned(IIO_DMA_MINALIGN); + struct { + __le16 channels[3]; + s64 ts __aligned(8); + } scan; +}; + +static const struct iio_mount_matrix * +kx022a_get_mount_matrix(const struct iio_dev *idev, + const struct iio_chan_spec *chan) +{ + struct kx022a_data *data = iio_priv(idev); + + return &data->orientation; +} + +enum { + AXIS_X, + AXIS_Y, + AXIS_Z, + AXIS_MAX +}; + +static const unsigned long kx022a_scan_masks[] = { + BIT(AXIS_X) | BIT(AXIS_Y) | BIT(AXIS_Z), 0 +}; + +static const struct iio_chan_spec_ext_info kx022a_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_TYPE, kx022a_get_mount_matrix), + { } +}; + +#define KX022A_ACCEL_CHAN(axis, index) \ +{ \ + .type = IIO_ACCEL, \ + .modified = 1, \ + .channel2 = IIO_MOD_##axis, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .info_mask_shared_by_type_available = \ + BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .ext_info = kx022a_ext_info, \ + .address = KX022A_REG_##axis##OUT_L, \ + .scan_index = index, \ + .scan_type = { \ + .sign = 's', \ + .realbits = 16, \ + .storagebits = 16, \ + .endianness = IIO_LE, \ + }, \ +} + +static const struct iio_chan_spec kx022a_channels[] = { + KX022A_ACCEL_CHAN(X, 0), + KX022A_ACCEL_CHAN(Y, 1), + KX022A_ACCEL_CHAN(Z, 2), + IIO_CHAN_SOFT_TIMESTAMP(3), +}; + +/* + * The sensor HW can support ODR up to 1600 Hz, which is beyond what most of the + * Linux CPUs can handle without dropping samples. Also, the low power mode is + * not available for higher sample rates. Thus, the driver only supports 200 Hz + * and slower ODRs. The slowest is 0.78 Hz. + */ +static const int kx022a_accel_samp_freq_table[][2] = { + { 0, 780000 }, + { 1, 563000 }, + { 3, 125000 }, + { 6, 250000 }, + { 12, 500000 }, + { 25, 0 }, + { 50, 0 }, + { 100, 0 }, + { 200, 0 }, +}; + +static const unsigned int kx022a_odrs[] = { + 1282051282, + 639795266, + 320 * MEGA, + 160 * MEGA, + 80 * MEGA, + 40 * MEGA, + 20 * MEGA, + 10 * MEGA, + 5 * MEGA, +}; + +/* + * range is typically +-2G/4G/8G/16G, distributed over the amount of bits. + * The scale table can be calculated using + * (range / 2^bits) * g = (range / 2^bits) * 9.80665 m/s^2 + * => KX022A uses 16 bit (HiRes mode - assume the low 8 bits are zeroed + * in low-power mode(?) ) + * => +/-2G => 4 / 2^16 * 9,80665 * 10^6 (to scale to micro) + * => +/-2G - 598.550415 + * +/-4G - 1197.10083 + * +/-8G - 2394.20166 + * +/-16G - 4788.40332 + */ +static const int kx022a_scale_table[][2] = { + { 598, 550415 }, + { 1197, 100830 }, + { 2394, 201660 }, + { 4788, 403320 }, +}; + +static int kx022a_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = (const int *)kx022a_accel_samp_freq_table; + *length = ARRAY_SIZE(kx022a_accel_samp_freq_table) * + ARRAY_SIZE(kx022a_accel_samp_freq_table[0]); + *type = IIO_VAL_INT_PLUS_MICRO; + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SCALE: + *vals = (const int *)kx022a_scale_table; + *length = ARRAY_SIZE(kx022a_scale_table) * + ARRAY_SIZE(kx022a_scale_table[0]); + *type = IIO_VAL_INT_PLUS_MICRO; + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } +} + +#define KX022A_DEFAULT_PERIOD_NS (20 * NSEC_PER_MSEC) + +static void kx022a_reg2freq(unsigned int val, int *val1, int *val2) +{ + *val1 = kx022a_accel_samp_freq_table[val & KX022A_MASK_ODR][0]; + *val2 = kx022a_accel_samp_freq_table[val & KX022A_MASK_ODR][1]; +} + +static void kx022a_reg2scale(unsigned int val, unsigned int *val1, + unsigned int *val2) +{ + val &= KX022A_MASK_GSEL; + val >>= KX022A_GSEL_SHIFT; + + *val1 = kx022a_scale_table[val][0]; + *val2 = kx022a_scale_table[val][1]; +} + +static int kx022a_turn_on_off_unlocked(struct kx022a_data *data, bool on) +{ + int ret; + + if (on) + ret = regmap_set_bits(data->regmap, KX022A_REG_CNTL, + KX022A_MASK_PC1); + else + ret = regmap_clear_bits(data->regmap, KX022A_REG_CNTL, + KX022A_MASK_PC1); + if (ret) + dev_err(data->dev, "Turn %s fail %d\n", str_on_off(on), ret); + + return ret; + +} + +static int kx022a_turn_off_lock(struct kx022a_data *data) +{ + int ret; + + mutex_lock(&data->mutex); + ret = kx022a_turn_on_off_unlocked(data, false); + if (ret) + mutex_unlock(&data->mutex); + + return ret; +} + +static int kx022a_turn_on_unlock(struct kx022a_data *data) +{ + int ret; + + ret = kx022a_turn_on_off_unlocked(data, true); + mutex_unlock(&data->mutex); + + return ret; +} + +static int kx022a_write_raw(struct iio_dev *idev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct kx022a_data *data = iio_priv(idev); + int ret, n; + + /* + * We should not allow changing scale or frequency when FIFO is running + * as it will mess the timestamp/scale for samples existing in the + * buffer. If this turns out to be an issue we can later change logic + * to internally flush the fifo before reconfiguring so the samples in + * fifo keep matching the freq/scale settings. (Such setup could cause + * issues if users trust the watermark to be reached within known + * time-limit). + */ + ret = iio_device_claim_direct_mode(idev); + if (ret) + return ret; + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + n = ARRAY_SIZE(kx022a_accel_samp_freq_table); + + while (n--) + if (val == kx022a_accel_samp_freq_table[n][0] && + val2 == kx022a_accel_samp_freq_table[n][1]) + break; + if (n < 0) { + ret = -EINVAL; + goto unlock_out; + } + ret = kx022a_turn_off_lock(data); + if (ret) + break; + + ret = regmap_update_bits(data->regmap, + KX022A_REG_ODCNTL, + KX022A_MASK_ODR, n); + data->odr_ns = kx022a_odrs[n]; + kx022a_turn_on_unlock(data); + break; + case IIO_CHAN_INFO_SCALE: + n = ARRAY_SIZE(kx022a_scale_table); + + while (n-- > 0) + if (val == kx022a_scale_table[n][0] && + val2 == kx022a_scale_table[n][1]) + break; + if (n < 0) { + ret = -EINVAL; + goto unlock_out; + } + + ret = kx022a_turn_off_lock(data); + if (ret) + break; + + ret = regmap_update_bits(data->regmap, KX022A_REG_CNTL, + KX022A_MASK_GSEL, + n << KX022A_GSEL_SHIFT); + kx022a_turn_on_unlock(data); + break; + default: + ret = -EINVAL; + break; + } + +unlock_out: + iio_device_release_direct_mode(idev); + + return ret; +} + +static int kx022a_fifo_set_wmi(struct kx022a_data *data) +{ + u8 threshold; + + threshold = data->watermark; + + return regmap_update_bits(data->regmap, KX022A_REG_BUF_CNTL1, + KX022A_MASK_WM_TH, threshold); +} + +static int kx022a_get_axis(struct kx022a_data *data, + struct iio_chan_spec const *chan, + int *val) +{ + int ret; + + ret = regmap_bulk_read(data->regmap, chan->address, &data->buffer[0], + sizeof(__le16)); + if (ret) + return ret; + + *val = le16_to_cpu(data->buffer[0]); + + return IIO_VAL_INT; +} + +static int kx022a_read_raw(struct iio_dev *idev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct kx022a_data *data = iio_priv(idev); + unsigned int regval; + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(idev); + if (ret) + return ret; + + mutex_lock(&data->mutex); + ret = kx022a_get_axis(data, chan, val); + mutex_unlock(&data->mutex); + + iio_device_release_direct_mode(idev); + + return ret; + + case IIO_CHAN_INFO_SAMP_FREQ: + ret = regmap_read(data->regmap, KX022A_REG_ODCNTL, ®val); + if (ret) + return ret; + + if ((regval & KX022A_MASK_ODR) > + ARRAY_SIZE(kx022a_accel_samp_freq_table)) { + dev_err(data->dev, "Invalid ODR\n"); + return -EINVAL; + } + + kx022a_reg2freq(regval, val, val2); + + return IIO_VAL_INT_PLUS_MICRO; + + case IIO_CHAN_INFO_SCALE: + ret = regmap_read(data->regmap, KX022A_REG_CNTL, ®val); + if (ret < 0) + return ret; + + kx022a_reg2scale(regval, val, val2); + + return IIO_VAL_INT_PLUS_MICRO; + } + + return -EINVAL; +}; + +static int kx022a_validate_trigger(struct iio_dev *idev, + struct iio_trigger *trig) +{ + struct kx022a_data *data = iio_priv(idev); + + if (data->trig != trig) + return -EINVAL; + + return 0; +} + +static int kx022a_set_watermark(struct iio_dev *idev, unsigned int val) +{ + struct kx022a_data *data = iio_priv(idev); + + if (val > KX022A_FIFO_LENGTH) + val = KX022A_FIFO_LENGTH; + + mutex_lock(&data->mutex); + data->watermark = val; + mutex_unlock(&data->mutex); + + return 0; +} + +static ssize_t hwfifo_enabled_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct iio_dev *idev = dev_to_iio_dev(dev); + struct kx022a_data *data = iio_priv(idev); + bool state; + + mutex_lock(&data->mutex); + state = data->state; + mutex_unlock(&data->mutex); + + return sysfs_emit(buf, "%d\n", state); +} + +static ssize_t hwfifo_watermark_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct iio_dev *idev = dev_to_iio_dev(dev); + struct kx022a_data *data = iio_priv(idev); + int wm; + + mutex_lock(&data->mutex); + wm = data->watermark; + mutex_unlock(&data->mutex); + + return sysfs_emit(buf, "%d\n", wm); +} + +static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0); +static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0); + +static const struct attribute *kx022a_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark.dev_attr.attr, + &iio_dev_attr_hwfifo_enabled.dev_attr.attr, + NULL +}; + +static int kx022a_drop_fifo_contents(struct kx022a_data *data) +{ + /* + * We must clear the old time-stamp to avoid computing the timestamps + * based on samples acquired when buffer was last enabled. + * + * We don't need to protect the timestamp as long as we are only + * called from fifo-disable where we can guarantee the sensor is not + * triggering interrupts and where the mutex is locked to prevent the + * user-space access. + */ + data->timestamp = 0; + + return regmap_write(data->regmap, KX022A_REG_BUF_CLEAR, 0x0); +} + +static int __kx022a_fifo_flush(struct iio_dev *idev, unsigned int samples, + bool irq) +{ + struct kx022a_data *data = iio_priv(idev); + struct device *dev = regmap_get_device(data->regmap); + __le16 buffer[KX022A_FIFO_LENGTH * 3]; + uint64_t sample_period; + int count, fifo_bytes; + bool renable = false; + int64_t tstamp; + int ret, i; + + ret = regmap_read(data->regmap, KX022A_REG_BUF_STATUS_1, &fifo_bytes); + if (ret) { + dev_err(dev, "Error reading buffer status\n"); + return ret; + } + + /* Let's not overflow if we for some reason get bogus value from i2c */ + if (fifo_bytes == KX022A_FIFO_FULL_VALUE) + fifo_bytes = KX022A_FIFO_MAX_BYTES; + + if (fifo_bytes % KX022A_FIFO_SAMPLES_SIZE_BYTES) + dev_warn(data->dev, "Bad FIFO alignment. Data may be corrupt\n"); + + count = fifo_bytes / KX022A_FIFO_SAMPLES_SIZE_BYTES; + if (!count) + return 0; + + /* + * If we are being called from IRQ handler we know the stored timestamp + * is fairly accurate for the last stored sample. Otherwise, if we are + * called as a result of a read operation from userspace and hence + * before the watermark interrupt was triggered, take a timestamp + * now. We can fall anywhere in between two samples so the error in this + * case is at most one sample period. + */ + if (!irq) { + /* + * We need to have the IRQ disabled or we risk of messing-up + * the timestamps. If we are ran from IRQ, then the + * IRQF_ONESHOT has us covered - but if we are ran by the + * user-space read we need to disable the IRQ to be on a safe + * side. We do this usng synchronous disable so that if the + * IRQ thread is being ran on other CPU we wait for it to be + * finished. + */ + disable_irq(data->irq); + renable = true; + + data->old_timestamp = data->timestamp; + data->timestamp = iio_get_time_ns(idev); + } + + /* + * Approximate timestamps for each of the sample based on the sampling + * frequency, timestamp for last sample and number of samples. + * + * We'd better not use the current bandwidth settings to compute the + * sample period. The real sample rate varies with the device and + * small variation adds when we store a large number of samples. + * + * To avoid this issue we compute the actual sample period ourselves + * based on the timestamp delta between the last two flush operations. + */ + if (data->old_timestamp) { + sample_period = data->timestamp - data->old_timestamp; + do_div(sample_period, count); + } else { + sample_period = data->odr_ns; + } + tstamp = data->timestamp - (count - 1) * sample_period; + + if (samples && count > samples) { + /* + * Here we leave some old samples to the buffer. We need to + * adjust the timestamp to match the first sample in the buffer + * or we will miscalculate the sample_period at next round. + */ + data->timestamp -= (count - samples) * sample_period; + count = samples; + } + + fifo_bytes = count * KX022A_FIFO_SAMPLES_SIZE_BYTES; + ret = regmap_noinc_read(data->regmap, KX022A_REG_BUF_READ, + &buffer[0], fifo_bytes); + if (ret) + goto renable_out; + + for (i = 0; i < count; i++) { + __le16 *sam = &buffer[i * 3]; + __le16 *chs; + int bit; + + chs = &data->scan.channels[0]; + for_each_set_bit(bit, idev->active_scan_mask, AXIS_MAX) + chs[bit] = sam[bit]; + + iio_push_to_buffers_with_timestamp(idev, &data->scan, tstamp); + + tstamp += sample_period; + } + + ret = count; + +renable_out: + if (renable) + enable_irq(data->irq); + + return ret; +} + +static int kx022a_fifo_flush(struct iio_dev *idev, unsigned int samples) +{ + struct kx022a_data *data = iio_priv(idev); + int ret; + + mutex_lock(&data->mutex); + ret = __kx022a_fifo_flush(idev, samples, false); + mutex_unlock(&data->mutex); + + return ret; +} + +static const struct iio_info kx022a_info = { + .read_raw = &kx022a_read_raw, + .write_raw = &kx022a_write_raw, + .read_avail = &kx022a_read_avail, + + .validate_trigger = kx022a_validate_trigger, + .hwfifo_set_watermark = kx022a_set_watermark, + .hwfifo_flush_to_buffer = kx022a_fifo_flush, +}; + +static int kx022a_set_drdy_irq(struct kx022a_data *data, bool en) +{ + if (en) + return regmap_set_bits(data->regmap, KX022A_REG_CNTL, + KX022A_MASK_DRDY); + + return regmap_clear_bits(data->regmap, KX022A_REG_CNTL, + KX022A_MASK_DRDY); +} + +static int kx022a_prepare_irq_pin(struct kx022a_data *data) +{ + /* Enable IRQ1 pin. Set polarity to active low */ + int mask = KX022A_MASK_IEN | KX022A_MASK_IPOL | + KX022A_MASK_ITYP; + int val = KX022A_MASK_IEN | KX022A_IPOL_LOW | + KX022A_ITYP_LEVEL; + int ret; + + ret = regmap_update_bits(data->regmap, data->inc_reg, mask, val); + if (ret) + return ret; + + /* We enable WMI to IRQ pin only at buffer_enable */ + mask = KX022A_MASK_INS2_DRDY; + + return regmap_set_bits(data->regmap, data->ien_reg, mask); +} + +static int kx022a_fifo_disable(struct kx022a_data *data) +{ + int ret = 0; + + ret = kx022a_turn_off_lock(data); + if (ret) + return ret; + + ret = regmap_clear_bits(data->regmap, data->ien_reg, KX022A_MASK_WMI); + if (ret) + goto unlock_out; + + ret = regmap_clear_bits(data->regmap, KX022A_REG_BUF_CNTL2, + KX022A_MASK_BUF_EN); + if (ret) + goto unlock_out; + + data->state &= ~KX022A_STATE_FIFO; + + kx022a_drop_fifo_contents(data); + + return kx022a_turn_on_unlock(data); + +unlock_out: + mutex_unlock(&data->mutex); + + return ret; +} + +static int kx022a_buffer_predisable(struct iio_dev *idev) +{ + struct kx022a_data *data = iio_priv(idev); + + if (iio_device_get_current_mode(idev) == INDIO_BUFFER_TRIGGERED) + return 0; + + return kx022a_fifo_disable(data); +} + +static int kx022a_fifo_enable(struct kx022a_data *data) +{ + int ret; + + ret = kx022a_turn_off_lock(data); + if (ret) + return ret; + + /* Update watermark to HW */ + ret = kx022a_fifo_set_wmi(data); + if (ret) + goto unlock_out; + + /* Enable buffer */ + ret = regmap_set_bits(data->regmap, KX022A_REG_BUF_CNTL2, + KX022A_MASK_BUF_EN); + if (ret) + goto unlock_out; + + data->state |= KX022A_STATE_FIFO; + ret = regmap_set_bits(data->regmap, data->ien_reg, + KX022A_MASK_WMI); + if (ret) + goto unlock_out; + + return kx022a_turn_on_unlock(data); + +unlock_out: + mutex_unlock(&data->mutex); + + return ret; +} + +static int kx022a_buffer_postenable(struct iio_dev *idev) +{ + struct kx022a_data *data = iio_priv(idev); + + /* + * If we use data-ready trigger, then the IRQ masks should be handled by + * trigger enable and the hardware buffer is not used but we just update + * results to the IIO fifo when data-ready triggers. + */ + if (iio_device_get_current_mode(idev) == INDIO_BUFFER_TRIGGERED) + return 0; + + return kx022a_fifo_enable(data); +} + +static const struct iio_buffer_setup_ops kx022a_buffer_ops = { + .postenable = kx022a_buffer_postenable, + .predisable = kx022a_buffer_predisable, +}; + +static irqreturn_t kx022a_trigger_handler(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *idev = pf->indio_dev; + struct kx022a_data *data = iio_priv(idev); + int ret; + + ret = regmap_bulk_read(data->regmap, KX022A_REG_XOUT_L, data->buffer, + KX022A_FIFO_SAMPLES_SIZE_BYTES); + if (ret < 0) + goto err_read; + + iio_push_to_buffers_with_timestamp(idev, data->buffer, pf->timestamp); +err_read: + iio_trigger_notify_done(idev->trig); + + return IRQ_HANDLED; +} + +/* Get timestamps and wake the thread if we need to read data */ +static irqreturn_t kx022a_irq_handler(int irq, void *private) +{ + struct iio_dev *idev = private; + struct kx022a_data *data = iio_priv(idev); + + data->old_timestamp = data->timestamp; + data->timestamp = iio_get_time_ns(idev); + + if (data->state & KX022A_STATE_FIFO || data->trigger_enabled) + return IRQ_WAKE_THREAD; + + return IRQ_NONE; +} + +/* + * WMI and data-ready IRQs are acked when results are read. If we add + * TILT/WAKE or other IRQs - then we may need to implement the acking + * (which is racy). + */ +static irqreturn_t kx022a_irq_thread_handler(int irq, void *private) +{ + struct iio_dev *idev = private; + struct kx022a_data *data = iio_priv(idev); + irqreturn_t ret = IRQ_NONE; + + mutex_lock(&data->mutex); + + if (data->trigger_enabled) { + iio_trigger_poll_chained(data->trig); + ret = IRQ_HANDLED; + } + + if (data->state & KX022A_STATE_FIFO) { + int ok; + + ok = __kx022a_fifo_flush(idev, KX022A_FIFO_LENGTH, true); + if (ok > 0) + ret = IRQ_HANDLED; + } + + mutex_unlock(&data->mutex); + + return ret; +} + +static int kx022a_trigger_set_state(struct iio_trigger *trig, + bool state) +{ + struct kx022a_data *data = iio_trigger_get_drvdata(trig); + int ret = 0; + + mutex_lock(&data->mutex); + + if (data->trigger_enabled == state) + goto unlock_out; + + if (data->state & KX022A_STATE_FIFO) { + dev_warn(data->dev, "Can't set trigger when FIFO enabled\n"); + ret = -EBUSY; + goto unlock_out; + } + + ret = kx022a_turn_on_off_unlocked(data, false); + if (ret) + goto unlock_out; + + data->trigger_enabled = state; + ret = kx022a_set_drdy_irq(data, state); + if (ret) + goto unlock_out; + + ret = kx022a_turn_on_off_unlocked(data, true); + +unlock_out: + mutex_unlock(&data->mutex); + + return ret; +} + +static const struct iio_trigger_ops kx022a_trigger_ops = { + .set_trigger_state = kx022a_trigger_set_state, +}; + +static int kx022a_chip_init(struct kx022a_data *data) +{ + int ret, val; + + /* Reset the senor */ + ret = regmap_write(data->regmap, KX022A_REG_CNTL2, KX022A_MASK_SRST); + if (ret) + return ret; + + /* + * I've seen I2C read failures if we poll too fast after the sensor + * reset. Slight delay gives I2C block the time to recover. + */ + msleep(1); + + ret = regmap_read_poll_timeout(data->regmap, KX022A_REG_CNTL2, val, + !(val & KX022A_MASK_SRST), + KX022A_SOFT_RESET_WAIT_TIME_US, + KX022A_SOFT_RESET_TOTAL_WAIT_TIME_US); + if (ret) { + dev_err(data->dev, "Sensor reset %s\n", + val & KX022A_MASK_SRST ? "timeout" : "fail#"); + return ret; + } + + ret = regmap_reinit_cache(data->regmap, &kx022a_regmap); + if (ret) { + dev_err(data->dev, "Failed to reinit reg cache\n"); + return ret; + } + + /* set data res 16bit */ + ret = regmap_set_bits(data->regmap, KX022A_REG_BUF_CNTL2, + KX022A_MASK_BRES16); + if (ret) { + dev_err(data->dev, "Failed to set data resolution\n"); + return ret; + } + + return kx022a_prepare_irq_pin(data); +} + +int kx022a_probe_internal(struct device *dev) +{ + static const char * const regulator_names[] = {"io-vdd", "vdd"}; + struct iio_trigger *indio_trig; + struct fwnode_handle *fwnode; + struct kx022a_data *data; + struct regmap *regmap; + unsigned int chip_id; + struct iio_dev *idev; + int ret, irq; + char *name; + + regmap = dev_get_regmap(dev, NULL); + if (!regmap) { + dev_err(dev, "no regmap\n"); + return -EINVAL; + } + + fwnode = dev_fwnode(dev); + if (!fwnode) + return -ENODEV; + + idev = devm_iio_device_alloc(dev, sizeof(*data)); + if (!idev) + return -ENOMEM; + + data = iio_priv(idev); + + /* + * VDD is the analog and digital domain voltage supply and + * IO_VDD is the digital I/O voltage supply. + */ + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names), + regulator_names); + if (ret && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to enable regulator\n"); + + ret = regmap_read(regmap, KX022A_REG_WHO, &chip_id); + if (ret) + return dev_err_probe(dev, ret, "Failed to access sensor\n"); + + if (chip_id != KX022A_ID) { + dev_err(dev, "unsupported device 0x%x\n", chip_id); + return -EINVAL; + } + + irq = fwnode_irq_get_byname(fwnode, "INT1"); + if (irq > 0) { + data->inc_reg = KX022A_REG_INC1; + data->ien_reg = KX022A_REG_INC4; + } else { + irq = fwnode_irq_get_byname(fwnode, "INT2"); + if (irq <= 0) + return dev_err_probe(dev, irq, "No suitable IRQ\n"); + + data->inc_reg = KX022A_REG_INC5; + data->ien_reg = KX022A_REG_INC6; + } + + data->regmap = regmap; + data->dev = dev; + data->irq = irq; + data->odr_ns = KX022A_DEFAULT_PERIOD_NS; + mutex_init(&data->mutex); + + idev->channels = kx022a_channels; + idev->num_channels = ARRAY_SIZE(kx022a_channels); + idev->name = "kx022-accel"; + idev->info = &kx022a_info; + idev->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE; + idev->available_scan_masks = kx022a_scan_masks; + + /* Read the mounting matrix, if present */ + ret = iio_read_mount_matrix(dev, &data->orientation); + if (ret) + return ret; + + /* The sensor must be turned off for configuration */ + ret = kx022a_turn_off_lock(data); + if (ret) + return ret; + + ret = kx022a_chip_init(data); + if (ret) { + mutex_unlock(&data->mutex); + return ret; + } + + ret = kx022a_turn_on_unlock(data); + if (ret) + return ret; + + ret = devm_iio_triggered_buffer_setup_ext(dev, idev, + &iio_pollfunc_store_time, + kx022a_trigger_handler, + IIO_BUFFER_DIRECTION_IN, + &kx022a_buffer_ops, + kx022a_fifo_attributes); + + if (ret) + return dev_err_probe(data->dev, ret, + "iio_triggered_buffer_setup_ext FAIL\n"); + indio_trig = devm_iio_trigger_alloc(dev, "%sdata-rdy-dev%d", idev->name, + iio_device_id(idev)); + if (!indio_trig) + return -ENOMEM; + + data->trig = indio_trig; + + indio_trig->ops = &kx022a_trigger_ops; + iio_trigger_set_drvdata(indio_trig, data); + + /* + * No need to check for NULL. request_threaded_irq() defaults to + * dev_name() should the alloc fail. + */ + name = devm_kasprintf(data->dev, GFP_KERNEL, "%s-kx022a", + dev_name(data->dev)); + + ret = devm_request_threaded_irq(data->dev, irq, kx022a_irq_handler, + &kx022a_irq_thread_handler, + IRQF_ONESHOT, name, idev); + if (ret) + return dev_err_probe(data->dev, ret, "Could not request IRQ\n"); + + + ret = devm_iio_trigger_register(dev, indio_trig); + if (ret) + return dev_err_probe(data->dev, ret, + "Trigger registration failed\n"); + + ret = devm_iio_device_register(data->dev, idev); + if (ret < 0) + return dev_err_probe(dev, ret, + "Unable to register iio device\n"); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(kx022a_probe_internal, IIO_KX022A); + +MODULE_DESCRIPTION("ROHM/Kionix KX022A accelerometer driver"); +MODULE_AUTHOR("Matti Vaittinen "); +MODULE_LICENSE("GPL"); diff --git a/drivers/iio/accel/kionix-kx022a.h b/drivers/iio/accel/kionix-kx022a.h new file mode 100644 index 000000000000..12424649d438 --- /dev/null +++ b/drivers/iio/accel/kionix-kx022a.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022 ROHM Semiconductors + * + * ROHM/KIONIX KX022A accelerometer driver + */ + +#ifndef _KX022A_H_ +#define _KX022A_H_ + +#include +#include + +#define KX022A_REG_WHO 0x0f +#define KX022A_ID 0xc8 + +#define KX022A_REG_CNTL2 0x19 +#define KX022A_MASK_SRST BIT(7) +#define KX022A_REG_CNTL 0x18 +#define KX022A_MASK_PC1 BIT(7) +#define KX022A_MASK_RES BIT(6) +#define KX022A_MASK_DRDY BIT(5) +#define KX022A_MASK_GSEL GENMASK(4, 3) +#define KX022A_GSEL_SHIFT 3 +#define KX022A_GSEL_2 0x0 +#define KX022A_GSEL_4 BIT(3) +#define KX022A_GSEL_8 BIT(4) +#define KX022A_GSEL_16 GENMASK(4, 3) + +#define KX022A_REG_INS2 0x13 +#define KX022A_MASK_INS2_DRDY BIT(4) +#define KX122_MASK_INS2_WMI BIT(5) + +#define KX022A_REG_XHP_L 0x0 +#define KX022A_REG_XOUT_L 0x06 +#define KX022A_REG_YOUT_L 0x08 +#define KX022A_REG_ZOUT_L 0x0a +#define KX022A_REG_COTR 0x0c +#define KX022A_REG_TSCP 0x10 +#define KX022A_REG_INT_REL 0x17 + +#define KX022A_REG_ODCNTL 0x1b + +#define KX022A_REG_BTS_WUF_TH 0x31 +#define KX022A_REG_MAN_WAKE 0x2c + +#define KX022A_REG_BUF_CNTL1 0x3a +#define KX022A_MASK_WM_TH GENMASK(6, 0) +#define KX022A_REG_BUF_CNTL2 0x3b +#define KX022A_MASK_BUF_EN BIT(7) +#define KX022A_MASK_BRES16 BIT(6) +#define KX022A_REG_BUF_STATUS_1 0x3c +#define KX022A_REG_BUF_STATUS_2 0x3d +#define KX022A_REG_BUF_CLEAR 0x3e +#define KX022A_REG_BUF_READ 0x3f +#define KX022A_MASK_ODR GENMASK(3, 0) +#define KX022A_ODR_SHIFT 3 +#define KX022A_FIFO_MAX_WMI_TH 41 + +#define KX022A_REG_INC1 0x1c +#define KX022A_REG_INC5 0x20 +#define KX022A_REG_INC6 0x21 +#define KX022A_MASK_IEN BIT(5) +#define KX022A_MASK_IPOL BIT(4) +#define KX022A_IPOL_LOW 0 +#define KX022A_IPOL_HIGH KX022A_MASK_IPOL1 +#define KX022A_MASK_ITYP BIT(3) +#define KX022A_ITYP_PULSE KX022A_MASK_ITYP +#define KX022A_ITYP_LEVEL 0 + +#define KX022A_REG_INC4 0x1f +#define KX022A_MASK_WMI BIT(5) + +#define KX022A_REG_SELF_TEST 0x60 +#define KX022A_MAX_REGISTER 0x60 + +struct device; + +int kx022a_probe_internal(struct device *dev); +extern const struct regmap_config kx022a_regmap; + +#endif From d269e0d60761e9d2a6ca2adb9b3b44323a2f55cb Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 24 Oct 2022 15:40:58 +0300 Subject: [PATCH 2329/4122] MAINTAINERS: Add KX022A maintainer entry Add maintainer entry for ROHM/Kionix KX022A accelerometer sensor driver. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/7895435f7fd31a3b576fc6a59b01eb3202c85d36.1666614295.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ecec0bb6b09e..be373dfae4dd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11450,6 +11450,12 @@ F: drivers/mfd/khadas-mcu.c F: include/linux/mfd/khadas-mcu.h F: drivers/thermal/khadas_mcu_fan.c +KIONIX/ROHM KX022A ACCELEROMETER +M: Matti Vaittinen +L: linux-iio@vger.kernel.org +S: Supported +F: drivers/iio/accel/kionix-kx022a* + KMEMLEAK M: Catalin Marinas S: Maintained From 6abcb19e0a6a371b5dafe4473ea886a6891a5cd5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 4 Nov 2022 09:31:48 +0000 Subject: [PATCH 2330/4122] iio: adc: ad4130: Fix spelling mistake "diffreential" -> "differential" There is a spelling mistake in an error message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221104093148.167765-1-colin.i.king@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 9a4d0043d797..ae1a4dd5c40a 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1480,7 +1480,7 @@ static int ad4130_validate_diff_channel(struct ad4130_state *st, u32 pin) if (pin >= AD4130_MAX_DIFF_INPUTS) return dev_err_probe(dev, -EINVAL, - "Invalid diffreential channel %u\n", pin); + "Invalid differential channel %u\n", pin); if (pin >= AD4130_MAX_ANALOG_PINS) return 0; From 2cc64a23c4e26107887af23a62f8ba3c79ff7ab5 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:12:04 +0300 Subject: [PATCH 2331/4122] iio: Add IIO_STATIC_CONST_DEVICE_ATTR Add IIO_STATIC_CONST_DEVICE_ATTR macro for creating an read-only iio_dev_attr which returns constant value. This macro is intended to be used when replacing the IIO_CONST_ATTR - attributes for triggered buffers because the triggered buffer attributes must be of type iio_dev_attr. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/8dd853dd0ef8eb40cb980cc6f6e7a43166de3afb.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/sysfs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h index e51fba66de4b..de5bb125815c 100644 --- a/include/linux/iio/sysfs.h +++ b/include/linux/iio/sysfs.h @@ -97,6 +97,17 @@ struct iio_const_attr { = { .string = _string, \ .dev_attr = __ATTR(_name, S_IRUGO, iio_read_const_attr, NULL)} +#define IIO_STATIC_CONST_DEVICE_ATTR(_name, _string) \ + static ssize_t iio_const_dev_attr_show_##_name( \ + struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + return sysfs_emit(buf, "%s\n", _string); \ + } \ + static IIO_DEVICE_ATTR(_name, 0444, \ + iio_const_dev_attr_show_##_name, NULL, 0) + /* Generic attributes of onetype or another */ /** From f0ab171b80d49a2a47b88f79de38ca64c7d641d4 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:12:25 +0300 Subject: [PATCH 2332/4122] iio: adxl367: Use IIO_STATIC_CONST_DEVICE_ATTR() Slightly simplify by dropping open-coded constant data iio_dev_attr functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/c96c55363b8f36017ef7f18fdfe810cd3990e2a4.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index d7af3a006a44..d8e98f65f25d 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1183,22 +1183,9 @@ static ssize_t adxl367_get_fifo_watermark(struct device *dev, return sysfs_emit(buf, "%d\n", fifo_watermark); } -static ssize_t hwfifo_watermark_min_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", "1"); -} - -static ssize_t hwfifo_watermark_max_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", __stringify(ADXL367_FIFO_MAX_WATERMARK)); -} - -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1"); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, + __stringify(ADXL367_FIFO_MAX_WATERMARK)); static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, adxl367_get_fifo_watermark, NULL, 0); static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, From f7e6804509eeb27accc6625150db3f02d0b93ad7 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:12:48 +0300 Subject: [PATCH 2333/4122] iio: adxl372: Use IIO_STATIC_CONST_DEVICE_ATTR() Slightly simplify by dropping open-coded constant data iio_dev_attr functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/3632af8849ef101c54ec1f739596f22c773b067c.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl372.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index bc53af809d5d..90e1d726b9c5 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -998,22 +998,9 @@ static ssize_t adxl372_get_fifo_watermark(struct device *dev, return sprintf(buf, "%d\n", st->watermark); } -static ssize_t hwfifo_watermark_min_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", "1"); -} - -static ssize_t hwfifo_watermark_max_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", __stringify(ADXL372_FIFO_SIZE)); -} - -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1"); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, + __stringify(ADXL372_FIFO_SIZE)); static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, adxl372_get_fifo_watermark, NULL, 0); static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, From c02b2a5166a58823e8a335c486ee71ce4f22f2b7 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:13:10 +0300 Subject: [PATCH 2334/4122] iio: bmc150-accel-core: Use IIO_STATIC_CONST_DEVICE_ATTR() Slightly simplify by dropping open-coded constant data iio_dev_attr functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/b1b0d662705b43d68594639fb7719bc431101f79.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-core.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 92f8b139acce..b4a077944896 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -925,22 +925,9 @@ static const struct iio_chan_spec_ext_info bmc150_accel_ext_info[] = { { } }; -static ssize_t hwfifo_watermark_min_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", "1"); -} - -static ssize_t hwfifo_watermark_max_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", __stringify(BMC150_ACCEL_FIFO_LENGTH)); -} - -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1"); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, + __stringify(BMC150_ACCEL_FIFO_LENGTH)); static IIO_DEVICE_ATTR(hwfifo_enabled, S_IRUGO, bmc150_accel_get_fifo_state, NULL, 0); static IIO_DEVICE_ATTR(hwfifo_watermark, S_IRUGO, From c1531e3ad43f5e66cd04ae19644343b9222204d2 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:13:31 +0300 Subject: [PATCH 2335/4122] iio: at91-sama5d2_adc: Use IIO_STATIC_CONST_DEVICE_ATTR() Slightly simplify by dropping open-coded constant data iio_dev_attr functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead. Signed-off-by: Matti Vaittinen Tested-by: Claudiu Beznea Link: https://lore.kernel.org/r/4476a4ce852febb3eb863878e66751c787195b18.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 33e251552214..aa2e1640c4a3 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -2193,26 +2193,13 @@ static ssize_t at91_adc_get_watermark(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", st->dma_st.watermark); } -static ssize_t hwfifo_watermark_min_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", "2"); -} - -static ssize_t hwfifo_watermark_max_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", AT91_HWFIFO_MAX_SIZE_STR); -} - static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, at91_adc_get_fifo_state, NULL, 0); static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, at91_adc_get_watermark, NULL, 0); -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); -static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); + +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "2"); +IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, AT91_HWFIFO_MAX_SIZE_STR); static const struct attribute *at91_adc_fifo_attributes[] = { &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, From 0a33755c4b01ed62a6d025cb585928304f9653d7 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Oct 2022 11:13:53 +0300 Subject: [PATCH 2336/4122] iio: Don't silently expect attribute types The iio_triggered_buffer_setup_ext() and the devm_iio_kfifo_buffer_setup_ext() were changed by commit 15097c7a1adc ("iio: buffer: wrap all buffer attributes into iio_dev_attr") to silently expect that all attributes given in buffer_attrs array are device-attributes. This expectation was not forced by the API - and some drivers did register attributes created by IIO_CONST_ATTR(). When using IIO_CONST_ATTRs the added attribute "wrapping" does not copy the pointer to stored string constant and when the sysfs file is read the kernel will access to invalid location. Change the function signatures to expect an array of iio_dev_attrs to avoid similar errors in the future. Merge conflict resolved whilst applying due to patch crossing with two new drivers (kx022a accelerometer and ad4130 ADC). Signed-off-by: Matti Vaittinen Tested-by: Claudiu Beznea Link: https://lore.kernel.org/r/63f54787a684eb1232f1c5d275a09c786987fe4a.1664782676.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 10 +++++----- drivers/iio/accel/adxl372.c | 10 +++++----- drivers/iio/accel/bmc150-accel-core.c | 12 ++++++------ drivers/iio/accel/kionix-kx022a.c | 6 +++--- drivers/iio/adc/ad4130.c | 10 +++++----- drivers/iio/adc/at91-sama5d2_adc.c | 12 ++++++------ drivers/iio/buffer/industrialio-buffer-dmaengine.c | 4 ++-- drivers/iio/buffer/industrialio-triggered-buffer.c | 4 ++-- drivers/iio/buffer/kfifo_buf.c | 2 +- .../common/cros_ec_sensors/cros_ec_sensors_core.c | 6 +++--- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 8 ++++---- drivers/iio/industrialio-buffer.c | 11 +++++++---- include/linux/iio/buffer_impl.h | 2 +- include/linux/iio/kfifo_buf.h | 3 ++- include/linux/iio/triggered_buffer.h | 6 +++--- 15 files changed, 55 insertions(+), 51 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index d8e98f65f25d..90b7ae6d42b7 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1191,11 +1191,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, adxl367_get_fifo_enabled, NULL, 0); -static const struct attribute *adxl367_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *adxl367_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min, + &iio_dev_attr_hwfifo_watermark_max, + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL, }; diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index 90e1d726b9c5..c4193286eb05 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -1006,11 +1006,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, adxl372_get_fifo_enabled, NULL, 0); -static const struct attribute *adxl372_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *adxl372_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min, + &iio_dev_attr_hwfifo_watermark_max, + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL, }; diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index b4a077944896..110591804b4c 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -933,11 +933,11 @@ static IIO_DEVICE_ATTR(hwfifo_enabled, S_IRUGO, static IIO_DEVICE_ATTR(hwfifo_watermark, S_IRUGO, bmc150_accel_get_fifo_watermark, NULL, 0); -static const struct attribute *bmc150_accel_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *bmc150_accel_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min, + &iio_dev_attr_hwfifo_watermark_max, + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL, }; @@ -1665,7 +1665,7 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq, enum bmc150_type type, const char *name, bool block_supported) { - const struct attribute **fifo_attrs; + const struct iio_dev_attr **fifo_attrs; struct bmc150_accel_data *data; struct iio_dev *indio_dev; int ret; diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index 50de7f9a1cc7..f866859855cd 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -575,9 +575,9 @@ static ssize_t hwfifo_watermark_show(struct device *dev, static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0); static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0); -static const struct attribute *kx022a_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *kx022a_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL }; diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index ae1a4dd5c40a..38394341fd6e 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1380,11 +1380,11 @@ static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0); static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0); -static const struct attribute *ad4130_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *ad4130_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min, + &iio_dev_attr_hwfifo_watermark_max, + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL }; diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index aa2e1640c4a3..a00b8316a12f 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -2201,11 +2201,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "2"); IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, AT91_HWFIFO_MAX_SIZE_STR); -static const struct attribute *at91_adc_fifo_attributes[] = { - &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *at91_adc_fifo_attributes[] = { + &iio_dev_attr_hwfifo_watermark_min, + &iio_dev_attr_hwfifo_watermark_max, + &iio_dev_attr_hwfifo_watermark, + &iio_dev_attr_hwfifo_enabled, NULL, }; @@ -2222,7 +2222,7 @@ static int at91_adc_buffer_and_trigger_init(struct device *dev, struct iio_dev *indio) { struct at91_adc_state *st = iio_priv(indio); - const struct attribute **fifo_attrs; + const struct iio_dev_attr **fifo_attrs; int ret; if (st->selected_trig->hw_trig) diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c index f744b62a636a..5f85ba38e6f6 100644 --- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c +++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c @@ -142,8 +142,8 @@ static ssize_t iio_dmaengine_buffer_get_length_align(struct device *dev, static IIO_DEVICE_ATTR(length_align_bytes, 0444, iio_dmaengine_buffer_get_length_align, NULL, 0); -static const struct attribute *iio_dmaengine_buffer_attrs[] = { - &iio_dev_attr_length_align_bytes.dev_attr.attr, +static const struct iio_dev_attr *iio_dmaengine_buffer_attrs[] = { + &iio_dev_attr_length_align_bytes, NULL, }; diff --git a/drivers/iio/buffer/industrialio-triggered-buffer.c b/drivers/iio/buffer/industrialio-triggered-buffer.c index 8d4fc97d1005..c7671b1f5ead 100644 --- a/drivers/iio/buffer/industrialio-triggered-buffer.c +++ b/drivers/iio/buffer/industrialio-triggered-buffer.c @@ -41,7 +41,7 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, irqreturn_t (*thread)(int irq, void *p), enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *setup_ops, - const struct attribute **buffer_attrs) + const struct iio_dev_attr **buffer_attrs) { struct iio_buffer *buffer; int ret; @@ -110,7 +110,7 @@ int devm_iio_triggered_buffer_setup_ext(struct device *dev, irqreturn_t (*thread)(int irq, void *p), enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *ops, - const struct attribute **buffer_attrs) + const struct iio_dev_attr **buffer_attrs) { int ret; diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c index 35d8b4077376..05b285f0eb22 100644 --- a/drivers/iio/buffer/kfifo_buf.c +++ b/drivers/iio/buffer/kfifo_buf.c @@ -270,7 +270,7 @@ static struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev) int devm_iio_kfifo_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const struct iio_buffer_setup_ops *setup_ops, - const struct attribute **buffer_attrs) + const struct iio_dev_attr **buffer_attrs) { struct iio_buffer *buffer; diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index 05a28d353e34..943e9e14d1e9 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -172,9 +172,9 @@ static ssize_t hwfifo_watermark_max_show(struct device *dev, static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); -static const struct attribute *cros_ec_sensor_fifo_attributes[] = { - &iio_dev_attr_hwfifo_timeout.dev_attr.attr, - &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, +static const struct iio_dev_attr *cros_ec_sensor_fifo_attributes[] = { + &iio_dev_attr_hwfifo_timeout, + &iio_dev_attr_hwfifo_watermark_max, NULL, }; diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 1151434038d4..ad8910e6ad59 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -75,9 +75,9 @@ static IIO_DEVICE_ATTR(hwfifo_timeout, 0644, static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, _hid_sensor_get_fifo_state, NULL, 0); -static const struct attribute *hid_sensor_fifo_attributes[] = { - &iio_dev_attr_hwfifo_timeout.dev_attr.attr, - &iio_dev_attr_hwfifo_enabled.dev_attr.attr, +static const struct iio_dev_attr *hid_sensor_fifo_attributes[] = { + &iio_dev_attr_hwfifo_timeout, + &iio_dev_attr_hwfifo_enabled, NULL, }; @@ -231,7 +231,7 @@ static const struct iio_trigger_ops hid_sensor_trigger_ops = { int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, struct hid_sensor_common *attrb) { - const struct attribute **fifo_attrs; + const struct iio_dev_attr **fifo_attrs; int ret; struct iio_trigger *trig; diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 9cd7db549fcb..80c78bd6bbef 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -1605,6 +1605,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); struct iio_dev_attr *p; + const struct iio_dev_attr *id_attr; struct attribute **attr; int ret, i, attrn, scan_el_attrcount, buffer_attrcount; const struct iio_chan_spec *channels; @@ -1614,6 +1615,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, while (buffer->attrs[buffer_attrcount] != NULL) buffer_attrcount++; } + buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs); scan_el_attrcount = 0; INIT_LIST_HEAD(&buffer->buffer_attr_list); @@ -1656,7 +1658,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, } } - attrn = buffer_attrcount + scan_el_attrcount + ARRAY_SIZE(iio_buffer_attrs); + attrn = buffer_attrcount + scan_el_attrcount; attr = kcalloc(attrn + 1, sizeof(*attr), GFP_KERNEL); if (!attr) { ret = -ENOMEM; @@ -1671,10 +1673,11 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, attr[2] = &dev_attr_watermark_ro.attr; if (buffer->attrs) - memcpy(&attr[ARRAY_SIZE(iio_buffer_attrs)], buffer->attrs, - sizeof(struct attribute *) * buffer_attrcount); + for (i = 0, id_attr = buffer->attrs[i]; + (id_attr = buffer->attrs[i]); i++) + attr[ARRAY_SIZE(iio_buffer_attrs) + i] = + (struct attribute *)&id_attr->dev_attr.attr; - buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs); buffer->buffer_group.attrs = attr; for (i = 0; i < buffer_attrcount; i++) { diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index e2ca8ea23e19..89c3fd7c29ca 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -123,7 +123,7 @@ struct iio_buffer { struct attribute_group buffer_group; /* @attrs: Standard attributes of the buffer. */ - const struct attribute **attrs; + const struct iio_dev_attr **attrs; /* @demux_bounce: Buffer for doing gather from incoming scan. */ void *demux_bounce; diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index 8a83fb58232d..22874da0c8be 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -5,6 +5,7 @@ struct iio_buffer; struct iio_buffer_setup_ops; struct iio_dev; +struct iio_dev_attr; struct device; struct iio_buffer *iio_kfifo_allocate(void); @@ -13,7 +14,7 @@ void iio_kfifo_free(struct iio_buffer *r); int devm_iio_kfifo_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const struct iio_buffer_setup_ops *setup_ops, - const struct attribute **buffer_attrs); + const struct iio_dev_attr **buffer_attrs); #define devm_iio_kfifo_buffer_setup(dev, indio_dev, setup_ops) \ devm_iio_kfifo_buffer_setup_ext((dev), (indio_dev), (setup_ops), NULL) diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index 7490b05fc5b2..29e1fe146879 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -5,8 +5,8 @@ #include #include -struct attribute; struct iio_dev; +struct iio_dev_attr; struct iio_buffer_setup_ops; int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, @@ -14,7 +14,7 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, irqreturn_t (*thread)(int irq, void *p), enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *setup_ops, - const struct attribute **buffer_attrs); + const struct iio_dev_attr **buffer_attrs); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); #define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops) \ @@ -28,7 +28,7 @@ int devm_iio_triggered_buffer_setup_ext(struct device *dev, irqreturn_t (*thread)(int irq, void *p), enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *ops, - const struct attribute **buffer_attrs); + const struct iio_dev_attr **buffer_attrs); #define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops) \ devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), \ From 26bfb581931a020eaf560612f01009b3e179db0a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 18:09:46 +0100 Subject: [PATCH 2337/4122] iio: adc: cc10001: Add local struct device *dev variable to avoid repitition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are lots of uses of this in probe() and we are about to introduce some more, so add a local variable to simplify this. Signed-off-by: Jonathan Cameron Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016170950.387751-2-jic23@kernel.org --- drivers/iio/adc/cc10001_adc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c index e16ac935693b..eeaea1362ed1 100644 --- a/drivers/iio/adc/cc10001_adc.c +++ b/drivers/iio/adc/cc10001_adc.c @@ -307,14 +307,15 @@ static int cc10001_adc_channel_init(struct iio_dev *indio_dev, static int cc10001_adc_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; struct cc10001_adc_device *adc_dev; unsigned long adc_clk_rate; struct iio_dev *indio_dev; unsigned long channel_map; int ret; - indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*adc_dev)); + indio_dev = devm_iio_device_alloc(dev, sizeof(*adc_dev)); if (indio_dev == NULL) return -ENOMEM; @@ -326,7 +327,7 @@ static int cc10001_adc_probe(struct platform_device *pdev) channel_map &= ~ret; } - adc_dev->reg = devm_regulator_get(&pdev->dev, "vref"); + adc_dev->reg = devm_regulator_get(dev, "vref"); if (IS_ERR(adc_dev->reg)) return PTR_ERR(adc_dev->reg); @@ -334,7 +335,7 @@ static int cc10001_adc_probe(struct platform_device *pdev) if (ret) return ret; - indio_dev->name = dev_name(&pdev->dev); + indio_dev->name = dev_name(dev); indio_dev->info = &cc10001_adc_info; indio_dev->modes = INDIO_DIRECT_MODE; @@ -344,23 +345,23 @@ static int cc10001_adc_probe(struct platform_device *pdev) goto err_disable_reg; } - adc_dev->adc_clk = devm_clk_get(&pdev->dev, "adc"); + adc_dev->adc_clk = devm_clk_get(dev, "adc"); if (IS_ERR(adc_dev->adc_clk)) { - dev_err(&pdev->dev, "failed to get the clock\n"); + dev_err(dev, "failed to get the clock\n"); ret = PTR_ERR(adc_dev->adc_clk); goto err_disable_reg; } ret = clk_prepare_enable(adc_dev->adc_clk); if (ret) { - dev_err(&pdev->dev, "failed to enable the clock\n"); + dev_err(dev, "failed to enable the clock\n"); goto err_disable_reg; } adc_clk_rate = clk_get_rate(adc_dev->adc_clk); if (!adc_clk_rate) { ret = -EINVAL; - dev_err(&pdev->dev, "null clock rate!\n"); + dev_err(dev, "null clock rate!\n"); goto err_disable_clk; } From dc0ba516d103532b7f289b20119374fe3797f81b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 18:09:47 +0100 Subject: [PATCH 2338/4122] iio: adc: cc10001: Add devm_add_action_or_reset() to disable regulator. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the voltage of this regulator is queried, we cannot use the devm_regulator_get_enable() call and have to role our own disable. Signed-off-by: Jonathan Cameron Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016170950.387751-3-jic23@kernel.org --- drivers/iio/adc/cc10001_adc.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c index eeaea1362ed1..4f42ceb40ded 100644 --- a/drivers/iio/adc/cc10001_adc.c +++ b/drivers/iio/adc/cc10001_adc.c @@ -305,6 +305,11 @@ static int cc10001_adc_channel_init(struct iio_dev *indio_dev, return 0; } +static void cc10001_reg_disable(void *priv) +{ + regulator_disable(priv); +} + static int cc10001_adc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -335,27 +340,28 @@ static int cc10001_adc_probe(struct platform_device *pdev) if (ret) return ret; + ret = devm_add_action_or_reset(dev, cc10001_reg_disable, adc_dev->reg); + if (ret) + return ret; + indio_dev->name = dev_name(dev); indio_dev->info = &cc10001_adc_info; indio_dev->modes = INDIO_DIRECT_MODE; adc_dev->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(adc_dev->reg_base)) { - ret = PTR_ERR(adc_dev->reg_base); - goto err_disable_reg; - } + if (IS_ERR(adc_dev->reg_base)) + return PTR_ERR(adc_dev->reg_base); adc_dev->adc_clk = devm_clk_get(dev, "adc"); if (IS_ERR(adc_dev->adc_clk)) { dev_err(dev, "failed to get the clock\n"); - ret = PTR_ERR(adc_dev->adc_clk); - goto err_disable_reg; + return PTR_ERR(adc_dev->adc_clk); } ret = clk_prepare_enable(adc_dev->adc_clk); if (ret) { dev_err(dev, "failed to enable the clock\n"); - goto err_disable_reg; + return ret; } adc_clk_rate = clk_get_rate(adc_dev->adc_clk); @@ -400,8 +406,6 @@ err_cleanup_buffer: iio_triggered_buffer_cleanup(indio_dev); err_disable_clk: clk_disable_unprepare(adc_dev->adc_clk); -err_disable_reg: - regulator_disable(adc_dev->reg); return ret; } @@ -414,7 +418,6 @@ static int cc10001_adc_remove(struct platform_device *pdev) iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); clk_disable_unprepare(adc_dev->adc_clk); - regulator_disable(adc_dev->reg); return 0; } From c247e0d8c0d50793f459d2a7997d2f8f2105c973 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 18:09:48 +0100 Subject: [PATCH 2339/4122] iio: adc: cc10001: Use devm_clk_get_enabled() to avoid boilerplate. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As this driver just enables clock in probe() and disables in remove() we can use this new function to replace boilerplate and simplify error paths. Signed-off-by: Jonathan Cameron Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016170950.387751-4-jic23@kernel.org --- drivers/iio/adc/cc10001_adc.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c index 4f42ceb40ded..1a3a5e0a52f7 100644 --- a/drivers/iio/adc/cc10001_adc.c +++ b/drivers/iio/adc/cc10001_adc.c @@ -352,23 +352,16 @@ static int cc10001_adc_probe(struct platform_device *pdev) if (IS_ERR(adc_dev->reg_base)) return PTR_ERR(adc_dev->reg_base); - adc_dev->adc_clk = devm_clk_get(dev, "adc"); + adc_dev->adc_clk = devm_clk_get_enabled(dev, "adc"); if (IS_ERR(adc_dev->adc_clk)) { - dev_err(dev, "failed to get the clock\n"); + dev_err(dev, "failed to get/enable the clock\n"); return PTR_ERR(adc_dev->adc_clk); } - ret = clk_prepare_enable(adc_dev->adc_clk); - if (ret) { - dev_err(dev, "failed to enable the clock\n"); - return ret; - } - adc_clk_rate = clk_get_rate(adc_dev->adc_clk); if (!adc_clk_rate) { - ret = -EINVAL; dev_err(dev, "null clock rate!\n"); - goto err_disable_clk; + return -EINVAL; } adc_dev->eoc_delay_ns = NSEC_PER_SEC / adc_clk_rate; @@ -385,14 +378,14 @@ static int cc10001_adc_probe(struct platform_device *pdev) /* Setup the ADC channels available on the device */ ret = cc10001_adc_channel_init(indio_dev, channel_map); if (ret < 0) - goto err_disable_clk; + return ret; mutex_init(&adc_dev->lock); ret = iio_triggered_buffer_setup(indio_dev, NULL, &cc10001_adc_trigger_h, NULL); if (ret < 0) - goto err_disable_clk; + return ret; ret = iio_device_register(indio_dev); if (ret < 0) @@ -404,8 +397,6 @@ static int cc10001_adc_probe(struct platform_device *pdev) err_cleanup_buffer: iio_triggered_buffer_cleanup(indio_dev); -err_disable_clk: - clk_disable_unprepare(adc_dev->adc_clk); return ret; } @@ -417,7 +408,6 @@ static int cc10001_adc_remove(struct platform_device *pdev) cc10001_adc_power_down(adc_dev); iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); - clk_disable_unprepare(adc_dev->adc_clk); return 0; } From a43d5155b9455d38c4b3e4656131d79af61a2978 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 18:09:49 +0100 Subject: [PATCH 2340/4122] iio: adc: cc10001: Use devm_ to call device power down. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is presumably safe to call the powerdown whether or not we are in the commented shared state (the driver always did this). The power down was previously out of order wrt to the probe() function so move using devm_ will ensure it occurs after the userspace interfaces are removed. Signed-off-by: Jonathan Cameron Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016170950.387751-5-jic23@kernel.org --- drivers/iio/adc/cc10001_adc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c index 1a3a5e0a52f7..aecad89c00ff 100644 --- a/drivers/iio/adc/cc10001_adc.c +++ b/drivers/iio/adc/cc10001_adc.c @@ -310,6 +310,11 @@ static void cc10001_reg_disable(void *priv) regulator_disable(priv); } +static void cc10001_pd_cb(void *priv) +{ + cc10001_adc_power_down(priv); +} + static int cc10001_adc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -375,6 +380,9 @@ static int cc10001_adc_probe(struct platform_device *pdev) if (adc_dev->shared) cc10001_adc_power_up(adc_dev); + ret = devm_add_action_or_reset(dev, cc10001_pd_cb, adc_dev); + if (ret) + return ret; /* Setup the ADC channels available on the device */ ret = cc10001_adc_channel_init(indio_dev, channel_map); if (ret < 0) @@ -405,7 +413,6 @@ static int cc10001_adc_remove(struct platform_device *pdev) struct iio_dev *indio_dev = platform_get_drvdata(pdev); struct cc10001_adc_device *adc_dev = iio_priv(indio_dev); - cc10001_adc_power_down(adc_dev); iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); From c5269fe908635c67c1eb4876df625efcfb156827 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 16 Oct 2022 18:09:50 +0100 Subject: [PATCH 2341/4122] iio: adc: cc10001: Switch remaining IIO calls in probe to devm_ forms. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As everything else is now handled by devm managed releases the triggered buffer setup and IIO device registration can also be moved over to their devm forms allowing dropping of remove(). Only user of drvdata associated with the struct device was the remove function, so also drop the platform_set_drvdata() call. Signed-off-by: Jonathan Cameron Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221016170950.387751-6-jic23@kernel.org --- drivers/iio/adc/cc10001_adc.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c index aecad89c00ff..2cde4b44fc6e 100644 --- a/drivers/iio/adc/cc10001_adc.c +++ b/drivers/iio/adc/cc10001_adc.c @@ -390,33 +390,12 @@ static int cc10001_adc_probe(struct platform_device *pdev) mutex_init(&adc_dev->lock); - ret = iio_triggered_buffer_setup(indio_dev, NULL, - &cc10001_adc_trigger_h, NULL); + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, + &cc10001_adc_trigger_h, NULL); if (ret < 0) return ret; - ret = iio_device_register(indio_dev); - if (ret < 0) - goto err_cleanup_buffer; - - platform_set_drvdata(pdev, indio_dev); - - return 0; - -err_cleanup_buffer: - iio_triggered_buffer_cleanup(indio_dev); - return ret; -} - -static int cc10001_adc_remove(struct platform_device *pdev) -{ - struct iio_dev *indio_dev = platform_get_drvdata(pdev); - struct cc10001_adc_device *adc_dev = iio_priv(indio_dev); - - iio_device_unregister(indio_dev); - iio_triggered_buffer_cleanup(indio_dev); - - return 0; + return devm_iio_device_register(dev, indio_dev); } static const struct of_device_id cc10001_adc_dt_ids[] = { @@ -431,7 +410,6 @@ static struct platform_driver cc10001_adc_driver = { .of_match_table = cc10001_adc_dt_ids, }, .probe = cc10001_adc_probe, - .remove = cc10001_adc_remove, }; module_platform_driver(cc10001_adc_driver); From 306935570f23e25bff22a3273bc6f92c8f13c910 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Oct 2022 10:34:10 -0400 Subject: [PATCH 2342/4122] dt-bindings: iio: adc: qcom,spmi-vadc: simplify compatible enum The second compatible item in oneOf is just an enum, not a list. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221027143411.277980-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml index 8bac0c4120dd..a848df37db06 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml @@ -22,13 +22,11 @@ properties: - items: - const: qcom,pms405-adc - const: qcom,spmi-adc-rev2 - - - items: - - enum: - - qcom,spmi-vadc - - qcom,spmi-adc5 - - qcom,spmi-adc-rev2 - - qcom,spmi-adc7 + - enum: + - qcom,spmi-vadc + - qcom,spmi-adc5 + - qcom,spmi-adc-rev2 + - qcom,spmi-adc7 reg: description: VADC base address in the SPMI PMIC register map From ea4b79e98ae0aedc0ebbe4adbb8f73b6c34f21d7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Oct 2022 10:34:11 -0400 Subject: [PATCH 2343/4122] dt-bindings: iio: adc: qcom,spmi-vadc: extend example Cleanup existing example (generic node name for spmi, use 4-space indentation) and add example for ADCv7 copied from Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221027143411.277980-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/qcom,spmi-vadc.yaml | 90 ++++++++++++------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml index a848df37db06..f1522196042d 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml @@ -236,42 +236,72 @@ additionalProperties: false examples: - | - spmi_bus { - #address-cells = <1>; - #size-cells = <0>; - /* VADC node */ - pmic_vadc: adc@3100 { - compatible = "qcom,spmi-vadc"; - reg = <0x3100>; - interrupts = <0x0 0x31 0x0 0x1>; + spmi { #address-cells = <1>; #size-cells = <0>; - #io-channel-cells = <1>; + /* VADC node */ + pmic_vadc: adc@3100 { + compatible = "qcom,spmi-vadc"; + reg = <0x3100>; + interrupts = <0x0 0x31 0x0 0x1>; + #address-cells = <1>; + #size-cells = <0>; + #io-channel-cells = <1>; - /* Channel node */ - adc-chan@39 { - reg = <0x39>; - qcom,decimation = <512>; - qcom,ratiometric; - qcom,hw-settle-time = <200>; - qcom,avg-samples = <1>; - qcom,pre-scaling = <1 3>; - }; + /* Channel node */ + adc-chan@39 { + reg = <0x39>; + qcom,decimation = <512>; + qcom,ratiometric; + qcom,hw-settle-time = <200>; + qcom,avg-samples = <1>; + qcom,pre-scaling = <1 3>; + }; - adc-chan@9 { - reg = <0x9>; - }; + adc-chan@9 { + reg = <0x9>; + }; - adc-chan@a { - reg = <0xa>; - }; + adc-chan@a { + reg = <0xa>; + }; - adc-chan@e { - reg = <0xe>; - }; + adc-chan@e { + reg = <0xe>; + }; - adc-chan@f { - reg = <0xf>; + adc-chan@f { + reg = <0xf>; + }; + }; + }; + + - | + #include + #include + #include + + spmi { + #address-cells = <1>; + #size-cells = <0>; + adc@3100 { + reg = <0x3100>; + compatible = "qcom,spmi-adc7"; + #address-cells = <1>; + #size-cells = <0>; + #io-channel-cells = <1>; + + /* Other properties are omitted */ + xo-therm@44 { + reg = ; + qcom,ratiometric; + qcom,hw-settle-time = <200>; + }; + + conn-therm@47 { + reg = ; + qcom,ratiometric; + qcom,hw-settle-time = <200>; + }; }; - }; }; From 2cfb2180c3e8002719234c43b88b040e4f89396f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 28 Oct 2022 13:23:42 +0200 Subject: [PATCH 2344/4122] iio: imu: st_lsm6dsx: introduce sw trigger support There are some hw configuration where irq0 and/or irq1 pins are not connected to the SPI or I2C/I3C controller. In order to avoid polling the output register introduce iio-sw trigger support when irq line is not available (or hw FIFO is not supported). Suggested-by: Mario Tesi Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/93ae6ff1150b531a9d7a4d3d1b1adb8383613717.1666955685.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 3 +- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 78 ++++++++++++++++++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c | 4 +- 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 07ad8027de73..6399b0bb6f67 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -424,7 +424,7 @@ struct st_lsm6dsx_hw { struct { __le16 channels[3]; s64 ts __aligned(8); - } scan[3]; + } scan[ST_LSM6DSX_ID_MAX]; }; static __maybe_unused const struct iio_event_spec st_lsm6dsx_event = { @@ -456,6 +456,7 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw); int st_lsm6dsx_check_odr(struct st_lsm6dsx_sensor *sensor, u32 odr, u8 *val); int st_lsm6dsx_shub_probe(struct st_lsm6dsx_hw *hw, const char *name); int st_lsm6dsx_shub_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable); +int st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data, int len); int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable); static inline int diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index fe5fa08b68ac..73fd5f038375 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -53,6 +53,8 @@ #include #include #include +#include +#include #include #include #include @@ -2117,6 +2119,32 @@ static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private) return fifo_len || event ? IRQ_HANDLED : IRQ_NONE; } +static irqreturn_t st_lsm6dsx_sw_trigger_handler_thread(int irq, + void *private) +{ + struct iio_poll_func *pf = private; + struct iio_dev *iio_dev = pf->indio_dev; + struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev); + struct st_lsm6dsx_hw *hw = sensor->hw; + + if (sensor->id == ST_LSM6DSX_ID_EXT0 || + sensor->id == ST_LSM6DSX_ID_EXT1 || + sensor->id == ST_LSM6DSX_ID_EXT2) + st_lsm6dsx_shub_read_output(hw, + (u8 *)hw->scan[sensor->id].channels, + sizeof(hw->scan[sensor->id].channels)); + else + st_lsm6dsx_read_locked(hw, iio_dev->channels[0].address, + hw->scan[sensor->id].channels, + sizeof(hw->scan[sensor->id].channels)); + + iio_push_to_buffers_with_timestamp(iio_dev, &hw->scan[sensor->id], + iio_get_time_ns(iio_dev)); + iio_trigger_notify_done(iio_dev->trig); + + return IRQ_HANDLED; +} + static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw) { struct st_sensors_platform_data *pdata; @@ -2175,6 +2203,46 @@ static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw) return 0; } +static int st_lsm6dsx_sw_buffer_preenable(struct iio_dev *iio_dev) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev); + + return st_lsm6dsx_device_set_enable(sensor, true); +} + +static int st_lsm6dsx_sw_buffer_postdisable(struct iio_dev *iio_dev) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev); + + return st_lsm6dsx_device_set_enable(sensor, false); +} + +static const struct iio_buffer_setup_ops st_lsm6dsx_sw_buffer_ops = { + .preenable = st_lsm6dsx_sw_buffer_preenable, + .postdisable = st_lsm6dsx_sw_buffer_postdisable, +}; + +static int st_lsm6dsx_sw_buffers_setup(struct st_lsm6dsx_hw *hw) +{ + int i; + + for (i = 0; i < ST_LSM6DSX_ID_MAX; i++) { + int err; + + if (!hw->iio_devs[i]) + continue; + + err = devm_iio_triggered_buffer_setup(hw->dev, + hw->iio_devs[i], NULL, + st_lsm6dsx_sw_trigger_handler_thread, + &st_lsm6dsx_sw_buffer_ops); + if (err) + return err; + } + + return 0; +} + static int st_lsm6dsx_init_regulators(struct device *dev) { /* vdd-vddio power regulators */ @@ -2255,6 +2323,16 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, return err; } + if (!hw->irq || !hw->settings->fifo_ops.read_fifo) { + /* + * Rely on sw triggers (e.g. hr-timers) if irq pin is not + * connected of if the device does not support HW FIFO + */ + err = st_lsm6dsx_sw_buffers_setup(hw); + if (err) + return err; + } + err = iio_read_mount_matrix(hw->dev, &hw->orientation); if (err) return err; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c index 99562ba85ee4..f2b64b4956a3 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c @@ -170,9 +170,7 @@ static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw) * * Read st_lsm6dsx i2c controller register */ -static int -st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data, - int len) +int st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data, int len) { const struct st_lsm6dsx_shub_settings *hub_settings; int err; From a1c6d631ff12cd41201c3ab824f3c4db66621c13 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Oct 2022 12:41:25 +0100 Subject: [PATCH 2345/4122] iio: imu: st_lsm6dsx: add support to LSM6DSV Add support to STM LSM6DSV (accelerometer and gyroscope) Mems sensor. Datasheet: https://www.st.com/resource/en/datasheet/lsm6dsv.pdf Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/aad879e7af0fe583bbf043a2b93e32fade79fa19.1667216004.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Kconfig | 2 +- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 + .../iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 2 +- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 202 ++++++++++++++++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 5 + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c | 5 + 6 files changed, 216 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig index 2ed2b3f40c0b..1c68bac94bce 100644 --- a/drivers/iio/imu/st_lsm6dsx/Kconfig +++ b/drivers/iio/imu/st_lsm6dsx/Kconfig @@ -13,7 +13,7 @@ config IIO_ST_LSM6DSX sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm, ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr, lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx, - the accelerometer/gyroscope of lsm9ds1 and lsm6dst. + lsm6dsv, the accelerometer/gyroscope of lsm9ds1 and lsm6dst. To compile this driver as a module, choose M here: the module will be called st_lsm6dsx. diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 6399b0bb6f67..ab61895cf072 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -33,6 +33,7 @@ #define ST_LSM6DSOP_DEV_NAME "lsm6dsop" #define ST_ASM330LHHX_DEV_NAME "asm330lhhx" #define ST_LSM6DSTX_DEV_NAME "lsm6dstx" +#define ST_LSM6DSV_DEV_NAME "lsm6dsv" enum st_lsm6dsx_hw_id { ST_LSM6DS3_ID, @@ -53,6 +54,7 @@ enum st_lsm6dsx_hw_id { ST_LSM6DSOP_ID, ST_ASM330LHHX_ID, ST_LSM6DSTX_ID, + ST_LSM6DSV_ID, ST_LSM6DSX_MAX_ID, }; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 48fe6a45671b..7dd5205aea5b 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -15,7 +15,7 @@ * value of the decimation factor and ODR set for each FIFO data set. * * LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/LSM6DSRX/ISM330DHCX/ - * LSM6DST/LSM6DSOP/LSM6DSTX: + * LSM6DST/LSM6DSOP/LSM6DSTX/LSM6DSV: * The FIFO buffer can be configured to store data from gyroscope and * accelerometer. Each sample is queued with a tag (1B) indicating data * source (gyroscope, accelerometer, hw timer). diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 73fd5f038375..5e716a5071fd 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1162,6 +1162,208 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .wakeup_src_x_mask = BIT(2), }, }, + { + .reset = { + .addr = 0x12, + .mask = BIT(0), + }, + .boot = { + .addr = 0x12, + .mask = BIT(7), + }, + .bdu = { + .addr = 0x12, + .mask = BIT(6), + }, + .id = { + { + .hw_id = ST_LSM6DSV_ID, + .name = ST_LSM6DSV_DEV_NAME, + .wai = 0x70, + }, + }, + .channels = { + [ST_LSM6DSX_ID_ACC] = { + .chan = st_lsm6dsx_acc_channels, + .len = ARRAY_SIZE(st_lsm6dsx_acc_channels), + }, + [ST_LSM6DSX_ID_GYRO] = { + .chan = st_lsm6dsx_gyro_channels, + .len = ARRAY_SIZE(st_lsm6dsx_gyro_channels), + }, + }, + .drdy_mask = { + .addr = 0x13, + .mask = BIT(3), + }, + .odr_table = { + [ST_LSM6DSX_ID_ACC] = { + .reg = { + .addr = 0x10, + .mask = GENMASK(3, 0), + }, + .odr_avl[0] = { 7500, 0x02 }, + .odr_avl[1] = { 15000, 0x03 }, + .odr_avl[2] = { 30000, 0x04 }, + .odr_avl[3] = { 60000, 0x05 }, + .odr_avl[4] = { 120000, 0x06 }, + .odr_avl[5] = { 240000, 0x07 }, + .odr_avl[6] = { 480000, 0x08 }, + .odr_avl[7] = { 960000, 0x09 }, + .odr_len = 8, + }, + [ST_LSM6DSX_ID_GYRO] = { + .reg = { + .addr = 0x11, + .mask = GENMASK(3, 0), + }, + .odr_avl[0] = { 7500, 0x02 }, + .odr_avl[1] = { 15000, 0x03 }, + .odr_avl[2] = { 30000, 0x04 }, + .odr_avl[3] = { 60000, 0x05 }, + .odr_avl[4] = { 120000, 0x06 }, + .odr_avl[5] = { 240000, 0x07 }, + .odr_avl[6] = { 480000, 0x08 }, + .odr_avl[7] = { 960000, 0x09 }, + .odr_len = 8, + }, + }, + .fs_table = { + [ST_LSM6DSX_ID_ACC] = { + .reg = { + .addr = 0x17, + .mask = GENMASK(1, 0), + }, + .fs_avl[0] = { IIO_G_TO_M_S_2(61000), 0x0 }, + .fs_avl[1] = { IIO_G_TO_M_S_2(122000), 0x1 }, + .fs_avl[2] = { IIO_G_TO_M_S_2(244000), 0x2 }, + .fs_avl[3] = { IIO_G_TO_M_S_2(488000), 0x3 }, + .fs_len = 4, + }, + [ST_LSM6DSX_ID_GYRO] = { + .reg = { + .addr = 0x15, + .mask = GENMASK(3, 0), + }, + .fs_avl[0] = { IIO_DEGREE_TO_RAD(8750000), 0x1 }, + .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500000), 0x2 }, + .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000000), 0x3 }, + .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000000), 0x4 }, + .fs_len = 4, + }, + }, + .irq_config = { + .irq1 = { + .addr = 0x0d, + .mask = BIT(3), + }, + .irq2 = { + .addr = 0x0e, + .mask = BIT(3), + }, + .lir = { + .addr = 0x56, + .mask = BIT(0), + }, + .irq1_func = { + .addr = 0x5e, + .mask = BIT(5), + }, + .irq2_func = { + .addr = 0x5f, + .mask = BIT(5), + }, + .hla = { + .addr = 0x03, + .mask = BIT(4), + }, + .od = { + .addr = 0x03, + .mask = BIT(3), + }, + }, + .batch = { + [ST_LSM6DSX_ID_ACC] = { + .addr = 0x09, + .mask = GENMASK(3, 0), + }, + [ST_LSM6DSX_ID_GYRO] = { + .addr = 0x09, + .mask = GENMASK(7, 4), + }, + }, + .fifo_ops = { + .update_fifo = st_lsm6dsx_update_fifo, + .read_fifo = st_lsm6dsx_read_tagged_fifo, + .fifo_th = { + .addr = 0x07, + .mask = GENMASK(7, 0), + }, + .fifo_diff = { + .addr = 0x1b, + .mask = GENMASK(8, 0), + }, + .max_size = 512, + .th_wl = 1, + }, + .ts_settings = { + .timer_en = { + .addr = 0x50, + .mask = BIT(6), + }, + .decimator = { + .addr = 0x0a, + .mask = GENMASK(7, 6), + }, + .freq_fine = 0x4f, + }, + .shub_settings = { + .page_mux = { + .addr = 0x01, + .mask = BIT(6), + }, + .master_en = { + .sec_page = true, + .addr = 0x14, + .mask = BIT(2), + }, + .pullup_en = { + .addr = 0x03, + .mask = BIT(6), + }, + .aux_sens = { + .addr = 0x14, + .mask = GENMASK(1, 0), + }, + .wr_once = { + .addr = 0x14, + .mask = BIT(6), + }, + .num_ext_dev = 3, + .shub_out = { + .sec_page = true, + .addr = 0x02, + }, + .slv0_addr = 0x15, + .dw_slv0_addr = 0x21, + .batch_en = BIT(3), + }, + .event_settings = { + .enable_reg = { + .addr = 0x50, + .mask = BIT(7), + }, + .wakeup_reg = { + .addr = 0x5b, + .mask = GENMASK(5, 0), + }, + .wakeup_src_reg = 0x45, + .wakeup_src_status_mask = BIT(3), + .wakeup_src_z_mask = BIT(0), + .wakeup_src_y_mask = BIT(1), + .wakeup_src_x_mask = BIT(2), + }, + }, }; int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c index 307c8c436862..239c8920a31f 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c @@ -109,6 +109,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = { .compatible = "st,lsm6dstx", .data = (void *)ST_LSM6DSTX_ID, }, + { + .compatible = "st,lsm6dsv", + .data = (void *)ST_LSM6DSV_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match); @@ -132,6 +136,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = { { ST_LSM6DSOP_DEV_NAME, ST_LSM6DSOP_ID }, { ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID }, { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, + { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, {}, }; MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c index 6a4eecf4bb05..66705ef16ed0 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c @@ -109,6 +109,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = { .compatible = "st,lsm6dstx", .data = (void *)ST_LSM6DSTX_ID, }, + { + .compatible = "st,lsm6dsv", + .data = (void *)ST_LSM6DSV_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match); @@ -132,6 +136,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = { { ST_LSM6DSOP_DEV_NAME, ST_LSM6DSOP_ID }, { ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID }, { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, + { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, {}, }; MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table); From 9b4901528f57fe88c7e0a0bfdf69d8edcaad46fb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Oct 2022 12:41:26 +0100 Subject: [PATCH 2346/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dsv device bindings Introduce device bindings for LSM6DSV IMU sensor. Signed-off-by: Lorenzo Bianconi Acked-by: Rob Herring Link: https://lore.kernel.org/r/c34cea4468dc26050dff812f47e1a21f8e544758.1667216004.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml index fe1e02e5d7b3..e7349a3275dd 100644 --- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml +++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml @@ -32,6 +32,7 @@ properties: - st,lsm6dsrx - st,lsm6dst - st,lsm6dsop + - st,lsm6dsv - items: - const: st,asm330lhhx - const: st,lsm6dsr From 75347e30f142521c140ba1f5011d4fb175c1406b Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Mon, 31 Oct 2022 12:51:27 +0200 Subject: [PATCH 2347/4122] drivers: iio: accel: Use warning if invalid device id is detected Use warning instead of failing driver probe if invalid device id is detected for ADXL355 device. Signed-off-by: Ramona Bolboaca Link: https://lore.kernel.org/r/20221031105129.47740-2-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl355_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index 4bc648eac8b2..dd08253d66d0 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -262,10 +262,8 @@ static int adxl355_setup(struct adxl355_data *data) if (ret) return ret; - if (regval != ADXL355_PARTID_VAL) { - dev_err(data->dev, "Invalid DEV ID 0x%02x\n", regval); - return -ENODEV; - } + if (regval != ADXL355_PARTID_VAL) + dev_warn(data->dev, "Invalid DEV ID 0x%02x\n", regval); /* * Perform a software reset to make sure the device is in a consistent From b311d2e170761fbf73184d236f53187e3db8bd14 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Mon, 31 Oct 2022 12:51:28 +0200 Subject: [PATCH 2348/4122] dt-bindings: iio: accel: Add docs for ADXL359 Update ADXL355 existing documentation with documentation for ADXL359 device. Signed-off-by: Ramona Bolboaca Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221031105129.47740-3-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/accel/adi,adxl355.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml index 14b487088ab4..6b03c4efbb08 100644 --- a/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml +++ b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml @@ -4,20 +4,22 @@ $id: http://devicetree.org/schemas/iio/accel/adi,adxl355.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer +title: Analog Devices ADXL355 and ADXL359 3-Axis, Low noise MEMS Accelerometers maintainers: - Puranjay Mohan description: | - Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer that supports - both I2C & SPI interfaces + Analog Devices ADXL355 and ADXL359 3-Axis, Low noise MEMS Accelerometers that + support both I2C & SPI interfaces https://www.analog.com/en/products/adxl355.html + https://www.analog.com/en/products/adxl359.html properties: compatible: enum: - adi,adxl355 + - adi,adxl359 reg: maxItems: 1 From d3532d69757f9c6a73155d73bd9b94b64b12ef72 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Mon, 31 Oct 2022 12:51:29 +0200 Subject: [PATCH 2349/4122] drivers: iio: accel: Add support for ADXL359 device Add support for ADXL359 device in already existing ADXL355 driver. Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/adxl359.pdf Signed-off-by: Ramona Bolboaca Link: https://lore.kernel.org/r/20221031105129.47740-4-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl355.h | 20 +++++++- drivers/iio/accel/adxl355_core.c | 87 +++++++++++++++++++++++++------- drivers/iio/accel/adxl355_i2c.c | 22 ++++++-- drivers/iio/accel/adxl355_spi.c | 19 +++++-- 4 files changed, 121 insertions(+), 27 deletions(-) diff --git a/drivers/iio/accel/adxl355.h b/drivers/iio/accel/adxl355.h index 6dd49b13e4fd..061e66dc7057 100644 --- a/drivers/iio/accel/adxl355.h +++ b/drivers/iio/accel/adxl355.h @@ -10,12 +10,30 @@ #include +enum adxl355_device_type { + ADXL355, + ADXL359, +}; + +struct adxl355_fractional_type { + int integer; + int decimal; +}; + struct device; +struct adxl355_chip_info { + const char *name; + u8 part_id; + struct adxl355_fractional_type accel_scale; + struct adxl355_fractional_type temp_offset; +}; + extern const struct regmap_access_table adxl355_readable_regs_tbl; extern const struct regmap_access_table adxl355_writeable_regs_tbl; +extern const struct adxl355_chip_info adxl35x_chip_info[]; int adxl355_core_probe(struct device *dev, struct regmap *regmap, - const char *name); + const struct adxl355_chip_info *chip_info); #endif /* _ADXL355_H_ */ diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index dd08253d66d0..0c9225d18fb2 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -60,6 +60,7 @@ #define ADXL355_DEVID_AD_VAL 0xAD #define ADXL355_DEVID_MST_VAL 0x1D #define ADXL355_PARTID_VAL 0xED +#define ADXL359_PARTID_VAL 0xE9 #define ADXL355_RESET_CODE 0x52 static const struct regmap_range adxl355_read_reg_range[] = { @@ -83,6 +84,60 @@ const struct regmap_access_table adxl355_writeable_regs_tbl = { }; EXPORT_SYMBOL_NS_GPL(adxl355_writeable_regs_tbl, IIO_ADXL355); +const struct adxl355_chip_info adxl35x_chip_info[] = { + [ADXL355] = { + .name = "adxl355", + .part_id = ADXL355_PARTID_VAL, + /* + * At +/- 2g with 20-bit resolution, scale is given in datasheet + * as 3.9ug/LSB = 0.0000039 * 9.80665 = 0.00003824593 m/s^2. + */ + .accel_scale = { + .integer = 0, + .decimal = 38245, + }, + /* + * The datasheet defines an intercept of 1885 LSB at 25 degC + * and a slope of -9.05 LSB/C. The following formula can be used + * to find the temperature: + * Temp = ((RAW - 1885)/(-9.05)) + 25 but this doesn't follow + * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE. + * Hence using some rearranging we get the scale as -110.497238 + * and offset as -2111.25. + */ + .temp_offset = { + .integer = -2111, + .decimal = 250000, + }, + }, + [ADXL359] = { + .name = "adxl359", + .part_id = ADXL359_PARTID_VAL, + /* + * At +/- 10g with 20-bit resolution, scale is given in datasheet + * as 19.5ug/LSB = 0.0000195 * 9.80665 = 0.0.00019122967 m/s^2. + */ + .accel_scale = { + .integer = 0, + .decimal = 191229, + }, + /* + * The datasheet defines an intercept of 1852 LSB at 25 degC + * and a slope of -9.05 LSB/C. The following formula can be used + * to find the temperature: + * Temp = ((RAW - 1852)/(-9.05)) + 25 but this doesn't follow + * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE. + * Hence using some rearranging we get the scale as -110.497238 + * and offset as -2079.25. + */ + .temp_offset = { + .integer = -2079, + .decimal = 250000, + }, + }, +}; +EXPORT_SYMBOL_NS_GPL(adxl35x_chip_info, IIO_ADXL355); + enum adxl355_op_mode { ADXL355_MEASUREMENT, ADXL355_STANDBY, @@ -162,6 +217,7 @@ static const struct adxl355_chan_info adxl355_chans[] = { }; struct adxl355_data { + const struct adxl355_chip_info *chip_info; struct regmap *regmap; struct device *dev; struct mutex lock; /* lock to protect op_mode */ @@ -456,33 +512,25 @@ static int adxl355_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: switch (chan->type) { - /* - * The datasheet defines an intercept of 1885 LSB at 25 degC - * and a slope of -9.05 LSB/C. The following formula can be used - * to find the temperature: - * Temp = ((RAW - 1885)/(-9.05)) + 25 but this doesn't follow - * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE. - * Hence using some rearranging we get the scale as -110.497238 - * and offset as -2111.25. - */ case IIO_TEMP: + /* + * Temperature scale is -110.497238. + * See the detailed explanation in adxl35x_chip_info + * definition above. + */ *val = -110; *val2 = 497238; return IIO_VAL_INT_PLUS_MICRO; - /* - * At +/- 2g with 20-bit resolution, scale is given in datasheet - * as 3.9ug/LSB = 0.0000039 * 9.80665 = 0.00003824593 m/s^2. - */ case IIO_ACCEL: - *val = 0; - *val2 = 38245; + *val = data->chip_info->accel_scale.integer; + *val2 = data->chip_info->accel_scale.decimal; return IIO_VAL_INT_PLUS_NANO; default: return -EINVAL; } case IIO_CHAN_INFO_OFFSET: - *val = -2111; - *val2 = 250000; + *val = data->chip_info->temp_offset.integer; + *val2 = data->chip_info->temp_offset.decimal; return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_CALIBBIAS: *val = sign_extend32(data->calibbias[chan->address], 15); @@ -705,7 +753,7 @@ static int adxl355_probe_trigger(struct iio_dev *indio_dev, int irq) } int adxl355_core_probe(struct device *dev, struct regmap *regmap, - const char *name) + const struct adxl355_chip_info *chip_info) { struct adxl355_data *data; struct iio_dev *indio_dev; @@ -720,9 +768,10 @@ int adxl355_core_probe(struct device *dev, struct regmap *regmap, data->regmap = regmap; data->dev = dev; data->op_mode = ADXL355_STANDBY; + data->chip_info = chip_info; mutex_init(&data->lock); - indio_dev->name = name; + indio_dev->name = chip_info->name; indio_dev->info = &adxl355_info; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = adxl355_channels; diff --git a/drivers/iio/accel/adxl355_i2c.c b/drivers/iio/accel/adxl355_i2c.c index f67d57921c81..6cde5ccac06b 100644 --- a/drivers/iio/accel/adxl355_i2c.c +++ b/drivers/iio/accel/adxl355_i2c.c @@ -23,6 +23,20 @@ static const struct regmap_config adxl355_i2c_regmap_config = { static int adxl355_i2c_probe(struct i2c_client *client) { struct regmap *regmap; + const struct adxl355_chip_info *chip_data; + const struct i2c_device_id *adxl355; + + chip_data = device_get_match_data(&client->dev); + if (!chip_data) { + adxl355 = to_i2c_driver(client->dev.driver)->id_table; + if (!adxl355) + return -EINVAL; + + chip_data = (void *)i2c_match_id(adxl355, client)->driver_data; + + if (!chip_data) + return -EINVAL; + } regmap = devm_regmap_init_i2c(client, &adxl355_i2c_regmap_config); if (IS_ERR(regmap)) { @@ -32,17 +46,19 @@ static int adxl355_i2c_probe(struct i2c_client *client) return PTR_ERR(regmap); } - return adxl355_core_probe(&client->dev, regmap, client->name); + return adxl355_core_probe(&client->dev, regmap, chip_data); } static const struct i2c_device_id adxl355_i2c_id[] = { - { "adxl355", 0 }, + { "adxl355", (kernel_ulong_t)&adxl35x_chip_info[ADXL355] }, + { "adxl359", (kernel_ulong_t)&adxl35x_chip_info[ADXL359] }, { } }; MODULE_DEVICE_TABLE(i2c, adxl355_i2c_id); static const struct of_device_id adxl355_of_match[] = { - { .compatible = "adi,adxl355" }, + { .compatible = "adi,adxl355", .data = &adxl35x_chip_info[ADXL355] }, + { .compatible = "adi,adxl359", .data = &adxl35x_chip_info[ADXL359] }, { } }; MODULE_DEVICE_TABLE(of, adxl355_of_match); diff --git a/drivers/iio/accel/adxl355_spi.c b/drivers/iio/accel/adxl355_spi.c index 5fe986ae03f6..fc99534d91ff 100644 --- a/drivers/iio/accel/adxl355_spi.c +++ b/drivers/iio/accel/adxl355_spi.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "adxl355.h" @@ -24,9 +25,17 @@ static const struct regmap_config adxl355_spi_regmap_config = { static int adxl355_spi_probe(struct spi_device *spi) { - const struct spi_device_id *id = spi_get_device_id(spi); + const struct adxl355_chip_info *chip_data; struct regmap *regmap; + chip_data = device_get_match_data(&spi->dev); + if (!chip_data) { + chip_data = (void *)spi_get_device_id(spi)->driver_data; + + if (!chip_data) + return -EINVAL; + } + regmap = devm_regmap_init_spi(spi, &adxl355_spi_regmap_config); if (IS_ERR(regmap)) { dev_err(&spi->dev, "Error initializing spi regmap: %ld\n", @@ -35,17 +44,19 @@ static int adxl355_spi_probe(struct spi_device *spi) return PTR_ERR(regmap); } - return adxl355_core_probe(&spi->dev, regmap, id->name); + return adxl355_core_probe(&spi->dev, regmap, chip_data); } static const struct spi_device_id adxl355_spi_id[] = { - { "adxl355", 0 }, + { "adxl355", (kernel_ulong_t)&adxl35x_chip_info[ADXL355] }, + { "adxl359", (kernel_ulong_t)&adxl35x_chip_info[ADXL359] }, { } }; MODULE_DEVICE_TABLE(spi, adxl355_spi_id); static const struct of_device_id adxl355_of_match[] = { - { .compatible = "adi,adxl355" }, + { .compatible = "adi,adxl355", .data = &adxl35x_chip_info[ADXL355] }, + { .compatible = "adi,adxl359", .data = &adxl35x_chip_info[ADXL359] }, { } }; MODULE_DEVICE_TABLE(of, adxl355_of_match); From ed81d3de8eacf70c96f7012e4e16257efc2e12a9 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sun, 6 Nov 2022 21:02:33 +0700 Subject: [PATCH 2350/4122] Documentation: ad4130: format list of in_voltage-voltage_filter_mode_available modes Sphinx reports two warnings on sysfs documentation for AD4130 driver: Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130:2: WARNING: Unexpected indentation. Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130:2: WARNING: Block quote ends without a blank line; unexpected unindent. These are due to misformatting of sinc* modes list. Format it with bullet list. Since each entry spans multiple lines, separate each with a blank line. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20221106140233.74112-1-bagasdotme@gmail.com Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-adc-ad4130 | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 index d9555751d21c..f24ed6687e90 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 @@ -3,25 +3,35 @@ KernelVersion: 6.2 Contact: linux-iio@vger.kernel.org Description: Reading returns a list with the possible filter modes. - "sinc4" - Sinc 4. Excellent noise performance. Long 1st - conversion time. No natural 50/60Hz rejection. - "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion time. - "sinc3" - Sinc3. Moderate 1st conversion time. Good noise - performance. - "sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling frequency - of 50Hz, achieves simultaneous 50Hz and 60Hz - rejection. - "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion time. - Best used with a sampling frequency of at least - 216.19Hz. - "sinc3+pf1" - Sinc3 + Post Filter 1. - 53dB rejection @ 50Hz, 58dB rejection @ 60Hz. - "sinc3+pf2" - Sinc3 + Post Filter 2. - 70dB rejection @ 50Hz, 70dB rejection @ 60Hz. - "sinc3+pf3" - Sinc3 + Post Filter 3. - 99dB rejection @ 50Hz, 103dB rejection @ 60Hz. - "sinc3+pf4" - Sinc3 + Post Filter 4. - 103dB rejection @ 50Hz, 109dB rejection @ 60Hz. + + * "sinc4" - Sinc 4. Excellent noise performance. Long + 1st conversion time. No natural 50/60Hz rejection. + + * "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion + time. + + * "sinc3" - Sinc3. Moderate 1st conversion time. + Good noise performance. + + * "sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling + frequency of 50Hz, achieves simultaneous 50Hz and 60Hz + rejection. + + * "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion + time. Best used with a sampling frequency of at least + 216.19Hz. + + * "sinc3+pf1" - Sinc3 + Post Filter 1. 53dB rejection @ + 50Hz, 58dB rejection @ 60Hz. + + * "sinc3+pf2" - Sinc3 + Post Filter 2. 70dB rejection @ + 50Hz, 70dB rejection @ 60Hz. + + * "sinc3+pf3" - Sinc3 + Post Filter 3. 99dB rejection @ + 50Hz, 103dB rejection @ 60Hz. + + * "sinc3+pf4" - Sinc3 + Post Filter 4. 103dB rejection @ + 50Hz, 109dB rejection @ 60Hz. What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode KernelVersion: 6.2 From 5e0176213949724fbe9a8e4a39817edce337b8a0 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:29 +0200 Subject: [PATCH 2351/4122] iio: temperature: ltc2983: make bulk write buffer DMA-safe regmap_bulk_write() does not guarantee implicit DMA-safety, even though the current implementation duplicates the given buffer. Do not rely on it. Fixes: f110f3188e56 ("iio: temperature: Add support for LTC2983") Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20221103130041.2153295-2-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/ltc2983.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c index a60ccf183687..1117991ca2ab 100644 --- a/drivers/iio/temperature/ltc2983.c +++ b/drivers/iio/temperature/ltc2983.c @@ -209,6 +209,7 @@ struct ltc2983_data { * Holds the converted temperature */ __be32 temp __aligned(IIO_DMA_MINALIGN); + __be32 chan_val; }; struct ltc2983_sensor { @@ -313,19 +314,18 @@ static int __ltc2983_fault_handler(const struct ltc2983_data *st, return 0; } -static int __ltc2983_chan_assign_common(const struct ltc2983_data *st, +static int __ltc2983_chan_assign_common(struct ltc2983_data *st, const struct ltc2983_sensor *sensor, u32 chan_val) { u32 reg = LTC2983_CHAN_START_ADDR(sensor->chan); - __be32 __chan_val; chan_val |= LTC2983_CHAN_TYPE(sensor->type); dev_dbg(&st->spi->dev, "Assign reg:0x%04X, val:0x%08X\n", reg, chan_val); - __chan_val = cpu_to_be32(chan_val); - return regmap_bulk_write(st->regmap, reg, &__chan_val, - sizeof(__chan_val)); + st->chan_val = cpu_to_be32(chan_val); + return regmap_bulk_write(st->regmap, reg, &st->chan_val, + sizeof(st->chan_val)); } static int __ltc2983_chan_custom_sensor_assign(struct ltc2983_data *st, From 4ecee36077698bc5c759571106cd29321a886735 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:30 +0200 Subject: [PATCH 2352/4122] dt-bindings: iio: temperature: ltc2983: add default values Binding properties should have default values to let the reader know if they should change it. Add them based on driver logic. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-3-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 722781aa4697..82667adc85b1 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -34,6 +34,7 @@ properties: cases. An extra delay can be configured using this property. The value is rounded to nearest 100us. maximum: 255 + default: 0 adi,filter-notch-freq: description: @@ -45,6 +46,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 2 + default: 0 '#address-cells': const: 1 @@ -104,6 +106,7 @@ patternProperties: This property set's the pulsed current value applied during open-circuit detect. enum: [10, 100, 500, 1000] + default: 10 adi,cold-junction-handle: description: @@ -163,6 +166,7 @@ patternProperties: cycles, this property will assume different predefined values on each cycle. Just set the value of the first cycle (1l). enum: [10, 20, 40, 80] + default: 10 adi,ideal-factor-value: description: @@ -170,6 +174,7 @@ patternProperties: be multiplied by 1000000 to remove the fractional part. For more information look at table 20 of the datasheet. $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 "^rtd@": type: object @@ -207,6 +212,7 @@ patternProperties: property to 5 means 4 wires with Kelvin Rsense. $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 3, 4, 5] + default: 2 adi,rsense-share: description: @@ -226,6 +232,7 @@ patternProperties: This property controls the magnitude of the excitation current applied to the RTD. enum: [5, 10, 25, 50, 100, 250, 500, 1000] + default: 5 adi,rtd-curve: description: @@ -234,6 +241,7 @@ patternProperties: $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 3 + default: 0 adi,custom-rtd: description: @@ -308,6 +316,7 @@ patternProperties: $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000] + default: 0 adi,custom-thermistor: description: From d449fb5146b9ba93cb72a0676c3da3dd7a1743a9 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:31 +0200 Subject: [PATCH 2353/4122] dt-bindings: iio: temperature: ltc2983: use hex for sensor address Addresses should be in hex, fix it. Although the driver initially specified 1-20, it can be made free-range since the address is supposed to match reg, onto which we can impose restrictions based on the compatible property value. Signed-off-by: Cosmin Tanislav Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-4-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 82667adc85b1..29f6fa5e2529 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -55,7 +55,7 @@ properties: const: 0 patternProperties: - "@([1-9]|1[0-9]|20)$": + "@([0-9a-f]+)$": type: object properties: From 828a6c2252babdb79c7870080e872729ea90df17 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:32 +0200 Subject: [PATCH 2354/4122] dt-bindings: iio: temperature: ltc2983: remove qutations from phandle ref Not needed, so why have them? Signed-off-by: Cosmin Tanislav Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-5-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 29f6fa5e2529..6b3a20448f78 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -112,7 +112,7 @@ patternProperties: description: Phandle which points to a sensor object responsible for measuring the thermocouple cold junction temperature. - $ref: "/schemas/types.yaml#/definitions/phandle" + $ref: /schemas/types.yaml#/definitions/phandle adi,custom-thermocouple: description: @@ -204,7 +204,7 @@ patternProperties: adi,rsense-handle: description: Phandle pointing to a rsense object associated with this RTD. - $ref: "/schemas/types.yaml#/definitions/phandle" + $ref: /schemas/types.yaml#/definitions/phandle adi,number-of-wires: description: @@ -288,7 +288,7 @@ patternProperties: description: Phandle pointing to a rsense object associated with this thermistor. - $ref: "/schemas/types.yaml#/definitions/phandle" + $ref: /schemas/types.yaml#/definitions/phandle adi,single-ended: description: From 6cf75e90c5cc268d47ea67bfdf05961c1439dee4 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:33 +0200 Subject: [PATCH 2355/4122] dt-bindings: iio: temperature: ltc2983: describe matrix items Give a little bit of information on what each item in the matrix is supposed to be. Also, some matrices put the 'minItems' and 'maxItems' keywords in the wrong level. They should be on the same level as the 'items' keyword. Fix it. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-6-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 6b3a20448f78..4f26b337c957 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -126,8 +126,9 @@ patternProperties: minItems: 3 maxItems: 64 items: - minItems: 2 - maxItems: 2 + items: + - description: Voltage point in nV, signed. + - description: Temperature point in uK. "^diode@": type: object @@ -249,12 +250,12 @@ patternProperties: resistance(ohm)-temperature(K). The entries added here are in uohm and uK. For more details values look at table 74 and 75. $ref: /schemas/types.yaml#/definitions/uint64-matrix + minItems: 3 + maxItems: 64 items: - minItems: 3 - maxItems: 64 items: - minItems: 2 - maxItems: 2 + - description: Resistance point in uOhms. + - description: Temperature point in uK. required: - adi,rsense-handle @@ -328,8 +329,9 @@ patternProperties: minItems: 3 maxItems: 64 items: - minItems: 2 - maxItems: 2 + items: + - description: Resistance point in uOhms. + - description: Temperature point in uK. adi,custom-steinhart: description: @@ -338,9 +340,8 @@ patternProperties: Steinhart sensors the coefficients are given in the raw format. Look at table 82 for more information. $ref: /schemas/types.yaml#/definitions/uint32-array - items: - minItems: 6 - maxItems: 6 + minItems: 6 + maxItems: 6 required: - adi,rsense-handle From a2ebc8d281de63797fea5188f2aaa402cdb63cf1 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:34 +0200 Subject: [PATCH 2356/4122] dt-bindings: iio: temperature: ltc2983: require custom sensor tables The driver will error out when a custom sensor type is used but a custom sensor table is not provided. Require it in the binding too. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-7-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 4f26b337c957..bbac5f5cfbb3 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -130,6 +130,15 @@ patternProperties: - description: Voltage point in nV, signed. - description: Temperature point in uK. + allOf: + - if: + properties: + adi,sensor-type: + const: 9 + then: + required: + - adi,custom-thermocouple + "^diode@": type: object description: @@ -263,6 +272,15 @@ patternProperties: dependencies: adi,current-rotate: [ "adi,rsense-share" ] + allOf: + - if: + properties: + adi,sensor-type: + const: 18 + then: + required: + - adi,custom-rtd + "^thermistor@": type: object description: @@ -349,6 +367,22 @@ patternProperties: dependencies: adi,current-rotate: [ "adi,rsense-share" ] + allOf: + - if: + properties: + adi,sensor-type: + const: 26 + then: + required: + - adi,custom-steinhart + - if: + properties: + adi,sensor-type: + const: 27 + then: + required: + - adi,custom-thermistor + "^adc@": type: object description: Represents a channel which is being used as a direct adc. From b3805fc3dcf35512266bddf8c785eb82a24325bc Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:35 +0200 Subject: [PATCH 2357/4122] dt-bindings: iio: temperature: ltc2983: require 4 wire rtd for current rotate The driver will error out when current rotation is enabled but the RTD is not 4-wire. Require it in the binding too. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-8-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index bbac5f5cfbb3..b603219fb0c9 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -230,13 +230,6 @@ patternProperties: resistor is used for multiple 2-, 3-, and/or 4-wire RTDs. type: boolean - adi,current-rotate: - description: - Boolean property which enables excitation current rotation to - automatically remove parasitic thermocouple effects. Note that - this property is not allowed for 2- and 3-wire RTDs. - type: boolean - adi,excitation-current-microamp: description: This property controls the magnitude of the excitation current @@ -269,10 +262,22 @@ patternProperties: required: - adi,rsense-handle - dependencies: - adi,current-rotate: [ "adi,rsense-share" ] - allOf: + - if: + properties: + adi,number-of-wires: + const: 4 + then: + properties: + adi,current-rotate: + description: + Whether to enable excitation current rotation to automatically + remove parasitic thermocouple effects. + type: boolean + + dependencies: + adi,current-rotate: [ "adi,rsense-share" ] + - if: properties: adi,sensor-type: From ee7e336c48d3f8f5bef748b202792290d0fa0c1e Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:36 +0200 Subject: [PATCH 2358/4122] dt-bindings: iio: temperature: ltc2983: change default excitation for custom thermistors Excitation cannot be set to auto-range when using a custom thermistor or Steinhart sensor type. Default it to 1000nA to match the driver and remove the auto-range value from the enum. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-9-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index b603219fb0c9..1b6a11f2aa9d 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -378,6 +378,11 @@ patternProperties: adi,sensor-type: const: 26 then: + properties: + adi,excitation-current-nanoamp: + enum: [250, 500, 1000, 5000, 10000, 25000, 50000, 100000, + 250000, 500000, 1000000] + default: 1000 required: - adi,custom-steinhart - if: @@ -385,6 +390,11 @@ patternProperties: adi,sensor-type: const: 27 then: + properties: + adi,excitation-current-nanoamp: + enum: [250, 500, 1000, 5000, 10000, 25000, 50000, 100000, + 250000, 500000, 1000000] + default: 1000 required: - adi,custom-thermistor From 95027f5b3999b1b47a407ae4aa7bee18591164d1 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:37 +0200 Subject: [PATCH 2359/4122] dt-bindings: iio: temperature: ltc2983: refine descriptions Some descriptions are too verbose, while others are too succint. Rewrite them all. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-10-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 163 +++++++----------- 1 file changed, 65 insertions(+), 98 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 1b6a11f2aa9d..676801b036cf 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -25,21 +25,16 @@ properties: maxItems: 1 adi,mux-delay-config-us: - description: - The LTC2983 performs 2 or 3 internal conversion cycles per temperature - result. Each conversion cycle is performed with different excitation and - input multiplexer configurations. Prior to each conversion, these - excitation circuits and input switch configurations are changed and an - internal 1ms delay ensures settling prior to the conversion cycle in most - cases. An extra delay can be configured using this property. The value is - rounded to nearest 100us. + description: | + Extra delay prior to each conversion, in addition to the internal 1ms + delay, for the multiplexer to switch input configurations and + excitation values. maximum: 255 default: 0 adi,filter-notch-freq: description: - Set's the default setting of the digital filter. The default is - simultaneous 50/60Hz rejection. + Notch frequency of the digital filter. 0 - 50/60Hz rejection 1 - 60Hz rejection 2 - 50Hz rejection @@ -57,17 +52,18 @@ properties: patternProperties: "@([0-9a-f]+)$": type: object + description: Sensor. properties: reg: description: - The channel number. It can be connected to one of the 20 channels of - the device. + Channel number. Connects the sensor to the channel with this number + of the device. minimum: 1 maximum: 20 adi,sensor-type: - description: Identifies the type of sensor connected to the device. + description: Type of sensor connected to the device. $ref: /schemas/types.yaml#/definitions/uint32 required: @@ -76,9 +72,7 @@ patternProperties: "^thermocouple@": type: object - description: - Represents a thermocouple sensor which is connected to one of the device - channels. + description: Thermocouple sensor. properties: adi,sensor-type: @@ -97,31 +91,24 @@ patternProperties: maximum: 9 adi,single-ended: - description: - Boolean property which set's the thermocouple as single-ended. + description: Whether the sensor is single-ended. type: boolean adi,sensor-oc-current-microamp: - description: - This property set's the pulsed current value applied during - open-circuit detect. + description: Pulsed current value applied during open-circuit detect. enum: [10, 100, 500, 1000] default: 10 adi,cold-junction-handle: description: - Phandle which points to a sensor object responsible for measuring - the thermocouple cold junction temperature. + Sensor responsible for measuring the thermocouple cold junction + temperature. $ref: /schemas/types.yaml#/definitions/phandle adi,custom-thermocouple: description: - This is a table, where each entry should be a pair of - voltage(mv)-temperature(K). The entries must be given in nv and uK - so that, the original values must be multiplied by 1000000. For - more details look at table 69 and 70. - Note should be signed, but dtc doesn't currently maintain the - sign. + Used for digitizing custom thermocouples. + See Page 59 of the datasheet. $ref: /schemas/types.yaml#/definitions/uint64-matrix minItems: 3 maxItems: 64 @@ -141,55 +128,50 @@ patternProperties: "^diode@": type: object - description: - Represents a diode sensor which is connected to one of the device - channels. + description: Diode sensor. properties: adi,sensor-type: - description: Identifies the sensor as a diode. + description: Sensor type for diodes. $ref: /schemas/types.yaml#/definitions/uint32 const: 28 adi,single-ended: - description: Boolean property which set's the diode as single-ended. + description: Whether the sensor is single-ended. type: boolean adi,three-conversion-cycles: description: - Boolean property which set's three conversion cycles removing - parasitic resistance effects between the LTC2983 and the diode. + Whether to use three conversion cycles to remove parasitic + resistance between the device and the diode. type: boolean adi,average-on: description: - Boolean property which enables a running average of the diode - temperature reading. This reduces the noise when the diode is used - as a cold junction temperature element on an isothermal block - where temperatures change slowly. + Whether to use a running average of the diode temperature + reading to reduce the noise when the diode is used as a cold + junction temperature element on an isothermal block where + temperatures change slowly. type: boolean adi,excitation-current-microamp: description: - This property controls the magnitude of the excitation current - applied to the diode. Depending on the number of conversions - cycles, this property will assume different predefined values on - each cycle. Just set the value of the first cycle (1l). + Magnitude of the 1l excitation current applied to the diode. + 4l excitation current will be 4 times this value, and 8l + excitation current will be 8 times value. enum: [10, 20, 40, 80] default: 10 adi,ideal-factor-value: description: - This property sets the diode ideality factor. The real value must - be multiplied by 1000000 to remove the fractional part. For more - information look at table 20 of the datasheet. + Diode ideality factor. + Set this property to 1000000 times the real value. $ref: /schemas/types.yaml#/definitions/uint32 default: 0 "^rtd@": type: object - description: - Represents a rtd sensor which is connected to one of the device channels. + description: RTD sensor. properties: reg: @@ -212,35 +194,35 @@ patternProperties: maximum: 18 adi,rsense-handle: - description: - Phandle pointing to a rsense object associated with this RTD. + description: Associated sense resistor sensor. $ref: /schemas/types.yaml#/definitions/phandle adi,number-of-wires: description: - Identifies the number of wires used by the RTD. Setting this - property to 5 means 4 wires with Kelvin Rsense. + Number of wires used by the RTD. + 5 means 4 wires with Kelvin sense resistor. $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 3, 4, 5] default: 2 adi,rsense-share: description: - Boolean property which enables Rsense sharing, where one sense - resistor is used for multiple 2-, 3-, and/or 4-wire RTDs. + Whether to enable sense resistor sharing, where one sense + resistor is used by multiple sensors. type: boolean adi,excitation-current-microamp: - description: - This property controls the magnitude of the excitation current - applied to the RTD. + description: Excitation current applied to the RTD. enum: [5, 10, 25, 50, 100, 250, 500, 1000] default: 5 adi,rtd-curve: - description: - This property set the RTD curve used and the corresponding - Callendar-VanDusen constants. Look at table 30 of the datasheet. + description: | + RTD curve and the corresponding Callendar-VanDusen constants. + 0 - European + 1 - American + 2 - Japanese + 3 - ITS-90 $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 3 @@ -248,9 +230,8 @@ patternProperties: adi,custom-rtd: description: - This is a table, where each entry should be a pair of - resistance(ohm)-temperature(K). The entries added here are in uohm - and uK. For more details values look at table 74 and 75. + Used for digitizing custom RTDs. + See Page 62 of the datasheet. $ref: /schemas/types.yaml#/definitions/uint64-matrix minItems: 3 maxItems: 64 @@ -288,9 +269,7 @@ patternProperties: "^thermistor@": type: object - description: - Represents a thermistor sensor which is connected to one of the device - channels. + description: Thermistor sensor. properties: adi,sensor-type: @@ -309,34 +288,29 @@ patternProperties: maximum: 27 adi,rsense-handle: - description: - Phandle pointing to a rsense object associated with this - thermistor. + description: Associated sense resistor sensor. $ref: /schemas/types.yaml#/definitions/phandle adi,single-ended: - description: - Boolean property which set's the thermistor as single-ended. + description: Whether the sensor is single-ended. type: boolean adi,rsense-share: description: - Boolean property which enables Rsense sharing, where one sense - resistor is used for multiple thermistors. Note that this property - is ignored if adi,single-ended is set. + Whether to enable sense resistor sharing, where one sense + resistor is used by multiple sensors. type: boolean adi,current-rotate: description: - Boolean property which enables excitation current rotation to - automatically remove parasitic thermocouple effects. + Whether to enable excitation current rotation to automatically + remove parasitic thermocouple effects. type: boolean adi,excitation-current-nanoamp: description: - This property controls the magnitude of the excitation current - applied to the thermistor. Value 0 set's the sensor in auto-range - mode. + Excitation current applied to the thermistor. + 0 sets the sensor in auto-range mode. $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000] @@ -344,10 +318,8 @@ patternProperties: adi,custom-thermistor: description: - This is a table, where each entry should be a pair of - resistance(ohm)-temperature(K). The entries added here are in uohm - and uK only for custom thermistors. For more details look at table - 78 and 79. + Used for digitizing custom thermistors. + See Page 65 of the datasheet. $ref: /schemas/types.yaml#/definitions/uint64-matrix minItems: 3 maxItems: 64 @@ -358,10 +330,9 @@ patternProperties: adi,custom-steinhart: description: - Steinhart-Hart coefficients are also supported and can - be programmed into the device memory using this property. For - Steinhart sensors the coefficients are given in the raw - format. Look at table 82 for more information. + Steinhart-Hart coefficients in raw format, used for digitizing + custom thermistors. + See Page 68 of the datasheet. $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 6 maxItems: 6 @@ -400,23 +371,21 @@ patternProperties: "^adc@": type: object - description: Represents a channel which is being used as a direct adc. + description: Direct ADC sensor. properties: adi,sensor-type: - description: Identifies the sensor as a direct adc. + description: Sensor type for direct ADC sensors. $ref: /schemas/types.yaml#/definitions/uint32 const: 30 adi,single-ended: - description: Boolean property which set's the adc as single-ended. + description: Whether the sensor is single-ended. type: boolean "^rsense@": type: object - description: - Represents a rsense which is connected to one of the device channels. - Rsense are used by thermistors and RTD's. + description: Sense resistor sensor. properties: reg: @@ -424,14 +393,12 @@ patternProperties: maximum: 20 adi,sensor-type: - description: Identifies the sensor as a rsense. + description: Sensor type sense resistor sensors. $ref: /schemas/types.yaml#/definitions/uint32 const: 29 adi,rsense-val-milli-ohms: - description: - Sets the value of the sense resistor. Look at table 20 of the - datasheet for information. + description: Value of the sense resistor. required: - adi,rsense-val-milli-ohms From 93144097f51bf01591da5b8f8e37b6c616a0b23a Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:38 +0200 Subject: [PATCH 2360/4122] dt-bindings: iio: temperature: ltc2983: describe broken mux delay property The 'adi,mux-delay-config-us' property is broken. It was supposed to be in us, but the value is actually written directly to the register. Describe the fact that it is broken and how it actually works. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-11-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 676801b036cf..467e165e9b0b 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -29,6 +29,9 @@ properties: Extra delay prior to each conversion, in addition to the internal 1ms delay, for the multiplexer to switch input configurations and excitation values. + + This property is supposed to be in microseconds, but to maintain + compatibility, this value will be multiplied by 100 before usage. maximum: 255 default: 0 From cf738c544da333f6a8b7927f848ea388d7e58421 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:39 +0200 Subject: [PATCH 2361/4122] dt-bindings: iio: temperature: ltc2983: use generic node name in example Examples should use the generic IIO node name of temperature-sensor. Fix it. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20221103130041.2153295-12-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 467e165e9b0b..0e8333260a44 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -420,7 +420,7 @@ examples: #address-cells = <1>; #size-cells = <0>; - sensor_ltc2983: ltc2983@0 { + temperature-sensor@0 { compatible = "adi,ltc2983"; reg = <0>; From d24052695057eb5254d9fedcb5494428f23d3ecb Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:40 +0200 Subject: [PATCH 2362/4122] dt-bindings: iio: temperature: ltc2983: support more parts Add support for the following parts: * LTC2984 * LTC2986 * LTM2985 The LTC2984 is a variant of the LTC2983 with EEPROM. The LTC2986 is a variant of the LTC2983 with only 10 channels, EEPROM and support for active analog temperature sensors. The LTM2985 is software-compatible with the LTC2986. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221103130041.2153295-13-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/temperature/adi,ltc2983.yaml | 60 +++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 0e8333260a44..44f8b0672f53 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -4,19 +4,30 @@ $id: http://devicetree.org/schemas/iio/temperature/adi,ltc2983.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Analog Devices LTC2983 Multi-sensor Temperature system +title: Analog Devices LTC2983, LTC2986, LTM2985 Multi-sensor Temperature system maintainers: - Nuno Sá description: | - Analog Devices LTC2983 Multi-Sensor Digital Temperature Measurement System + Analog Devices LTC2983, LTC2984, LTC2986, LTM2985 Multi-Sensor Digital + Temperature Measurement Systems + https://www.analog.com/media/en/technical-documentation/data-sheets/2983fc.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/2984fb.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/29861fa.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/ltm2985.pdf properties: compatible: - enum: - - adi,ltc2983 + oneOf: + - enum: + - adi,ltc2983 + - adi,ltc2986 + - adi,ltm2985 + - items: + - const: adi,ltc2984 + - const: adi,ltc2983 reg: maxItems: 1 @@ -386,6 +397,35 @@ patternProperties: description: Whether the sensor is single-ended. type: boolean + "^temp@": + type: object + description: Active analog temperature sensor. + + properties: + adi,sensor-type: + description: Sensor type for active analog temperature sensors. + $ref: /schemas/types.yaml#/definitions/uint32 + const: 31 + + adi,single-ended: + description: Whether the sensor is single-ended. + type: boolean + + adi,custom-temp: + description: + Used for digitizing active analog temperature sensors. + See Page 67 of the LTM2985 datasheet. + $ref: /schemas/types.yaml#/definitions/uint64-matrix + minItems: 3 + maxItems: 64 + items: + items: + - description: Voltage point in nV, signed. + - description: Temperature point in uK. + + required: + - adi,custom-temp + "^rsense@": type: object description: Sense resistor sensor. @@ -413,6 +453,18 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + enum: + - adi,ltc2983 + - adi,ltc2984 + then: + patternProperties: + "^temp@": false + examples: - | #include From 6f7cadcf664b04df3b2e9b9c6d65bf626aa1b411 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 3 Nov 2022 15:00:41 +0200 Subject: [PATCH 2363/4122] iio: temperature: ltc2983: support more parts Add support for the following parts: * LTC2984 * LTC2986 * LTM2985 The LTC2984 is a variant of the LTC2983 with EEPROM. The LTC2986 is a variant of the LTC2983 with only 10 channels, EEPROM and support for active analog temperature sensors. The LTM2985 is software-compatible with the LTC2986. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20221103130041.2153295-14-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/ltc2983.c | 183 ++++++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 7 deletions(-) diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c index 1117991ca2ab..fcb96c44d954 100644 --- a/drivers/iio/temperature/ltc2983.c +++ b/drivers/iio/temperature/ltc2983.c @@ -25,9 +25,12 @@ #define LTC2983_STATUS_REG 0x0000 #define LTC2983_TEMP_RES_START_REG 0x0010 #define LTC2983_TEMP_RES_END_REG 0x005F +#define LTC2983_EEPROM_KEY_REG 0x00B0 +#define LTC2983_EEPROM_READ_STATUS_REG 0x00D0 #define LTC2983_GLOBAL_CONFIG_REG 0x00F0 #define LTC2983_MULT_CHANNEL_START_REG 0x00F4 #define LTC2983_MULT_CHANNEL_END_REG 0x00F7 +#define LTC2986_EEPROM_STATUS_REG 0x00F9 #define LTC2983_MUX_CONFIG_REG 0x00FF #define LTC2983_CHAN_ASSIGN_START_REG 0x0200 #define LTC2983_CHAN_ASSIGN_END_REG 0x024F @@ -35,13 +38,21 @@ #define LTC2983_CUST_SENS_TBL_END_REG 0x03CF #define LTC2983_DIFFERENTIAL_CHAN_MIN 2 -#define LTC2983_MAX_CHANNELS_NR 20 #define LTC2983_MIN_CHANNELS_NR 1 #define LTC2983_SLEEP 0x97 #define LTC2983_CUSTOM_STEINHART_SIZE 24 #define LTC2983_CUSTOM_SENSOR_ENTRY_SZ 6 #define LTC2983_CUSTOM_STEINHART_ENTRY_SZ 4 +#define LTC2983_EEPROM_KEY 0xA53C0F5A +#define LTC2983_EEPROM_WRITE_CMD 0x15 +#define LTC2983_EEPROM_READ_CMD 0x16 +#define LTC2983_EEPROM_STATUS_FAILURE_MASK GENMASK(3, 1) +#define LTC2983_EEPROM_READ_FAILURE_MASK GENMASK(7, 0) + +#define LTC2983_EEPROM_WRITE_TIME_MS 2600 +#define LTC2983_EEPROM_READ_TIME_MS 20 + #define LTC2983_CHAN_START_ADDR(chan) \ (((chan - 1) * 4) + LTC2983_CHAN_ASSIGN_START_REG) #define LTC2983_CHAN_RES_ADDR(chan) \ @@ -171,6 +182,7 @@ enum { LTC2983_SENSOR_DIODE = 28, LTC2983_SENSOR_SENSE_RESISTOR = 29, LTC2983_SENSOR_DIRECT_ADC = 30, + LTC2983_SENSOR_ACTIVE_TEMP = 31, }; #define to_thermocouple(_sensor) \ @@ -191,7 +203,17 @@ enum { #define to_adc(_sensor) \ container_of(_sensor, struct ltc2983_adc, sensor) +#define to_temp(_sensor) \ + container_of(_sensor, struct ltc2983_temp, sensor) + +struct ltc2983_chip_info { + unsigned int max_channels_nr; + bool has_temp; + bool has_eeprom; +}; + struct ltc2983_data { + const struct ltc2983_chip_info *info; struct regmap *regmap; struct spi_device *spi; struct mutex lock; @@ -210,6 +232,7 @@ struct ltc2983_data { */ __be32 temp __aligned(IIO_DMA_MINALIGN); __be32 chan_val; + __be32 eeprom_key; }; struct ltc2983_sensor { @@ -272,6 +295,12 @@ struct ltc2983_adc { bool single_ended; }; +struct ltc2983_temp { + struct ltc2983_sensor sensor; + struct ltc2983_custom_sensor *custom; + bool single_ended; +}; + /* * Convert to Q format numbers. These number's are integers where * the number of integer and fractional bits are specified. The resolution @@ -606,6 +635,22 @@ static int ltc2983_adc_assign_chan(struct ltc2983_data *st, return __ltc2983_chan_assign_common(st, sensor, chan_val); } +static int ltc2983_temp_assign_chan(struct ltc2983_data *st, + const struct ltc2983_sensor *sensor) +{ + struct ltc2983_temp *temp = to_temp(sensor); + u32 chan_val; + int ret; + + chan_val = LTC2983_ADC_SINGLE_ENDED(temp->single_ended); + + ret = __ltc2983_chan_custom_sensor_assign(st, temp->custom, &chan_val); + if (ret) + return ret; + + return __ltc2983_chan_assign_common(st, sensor, chan_val); +} + static struct ltc2983_sensor * ltc2983_thermocouple_new(const struct fwnode_handle *child, struct ltc2983_data *st, const struct ltc2983_sensor *sensor) @@ -771,10 +816,10 @@ ltc2983_rtd_new(const struct fwnode_handle *child, struct ltc2983_data *st, if (rtd->sensor_config & LTC2983_RTD_4_WIRE_MASK) { /* 4-wire */ u8 min = LTC2983_DIFFERENTIAL_CHAN_MIN, - max = LTC2983_MAX_CHANNELS_NR; + max = st->info->max_channels_nr; if (rtd->sensor_config & LTC2983_RTD_ROTATION_MASK) - max = LTC2983_MAX_CHANNELS_NR - 1; + max = st->info->max_channels_nr - 1; if (((rtd->sensor_config & LTC2983_RTD_KELVIN_R_SENSE_MASK) == LTC2983_RTD_KELVIN_R_SENSE_MASK) && @@ -1143,6 +1188,38 @@ static struct ltc2983_sensor *ltc2983_adc_new(struct fwnode_handle *child, return &adc->sensor; } +static struct ltc2983_sensor *ltc2983_temp_new(struct fwnode_handle *child, + struct ltc2983_data *st, + const struct ltc2983_sensor *sensor) +{ + struct ltc2983_temp *temp; + + temp = devm_kzalloc(&st->spi->dev, sizeof(*temp), GFP_KERNEL); + if (!temp) + return ERR_PTR(-ENOMEM); + + if (fwnode_property_read_bool(child, "adi,single-ended")) + temp->single_ended = true; + + if (!temp->single_ended && + sensor->chan < LTC2983_DIFFERENTIAL_CHAN_MIN) { + dev_err(&st->spi->dev, "Invalid chan:%d for differential temp\n", + sensor->chan); + return ERR_PTR(-EINVAL); + } + + temp->custom = __ltc2983_custom_sensor_new(st, child, "adi,custom-temp", + false, 4096, true); + if (IS_ERR(temp->custom)) + return ERR_CAST(temp->custom); + + /* set common parameters */ + temp->sensor.assign_chan = ltc2983_temp_assign_chan; + temp->sensor.fault_handler = ltc2983_common_fault_handler; + + return &temp->sensor; +} + static int ltc2983_chan_read(struct ltc2983_data *st, const struct ltc2983_sensor *sensor, int *val) { @@ -1302,10 +1379,10 @@ static int ltc2983_parse_dt(struct ltc2983_data *st) /* check if we have a valid channel */ if (sensor.chan < LTC2983_MIN_CHANNELS_NR || - sensor.chan > LTC2983_MAX_CHANNELS_NR) { + sensor.chan > st->info->max_channels_nr) { ret = -EINVAL; dev_err(dev, "chan:%d must be from %u to %u\n", sensor.chan, - LTC2983_MIN_CHANNELS_NR, LTC2983_MAX_CHANNELS_NR); + LTC2983_MIN_CHANNELS_NR, st->info->max_channels_nr); goto put_child; } else if (channel_avail_mask & BIT(sensor.chan)) { ret = -EINVAL; @@ -1345,6 +1422,9 @@ static int ltc2983_parse_dt(struct ltc2983_data *st) st->iio_channels--; } else if (sensor.type == LTC2983_SENSOR_DIRECT_ADC) { st->sensors[chan] = ltc2983_adc_new(child, st, &sensor); + } else if (st->info->has_temp && + sensor.type == LTC2983_SENSOR_ACTIVE_TEMP) { + st->sensors[chan] = ltc2983_temp_new(child, st, &sensor); } else { dev_err(dev, "Unknown sensor type %d\n", sensor.type); ret = -EINVAL; @@ -1371,6 +1451,45 @@ put_child: return ret; } +static int ltc2983_eeprom_cmd(struct ltc2983_data *st, unsigned int cmd, + unsigned int wait_time, unsigned int status_reg, + unsigned long status_fail_mask) +{ + unsigned long time; + unsigned int val; + int ret; + + ret = regmap_bulk_write(st->regmap, LTC2983_EEPROM_KEY_REG, + &st->eeprom_key, sizeof(st->eeprom_key)); + if (ret) + return ret; + + reinit_completion(&st->completion); + + ret = regmap_write(st->regmap, LTC2983_STATUS_REG, + LTC2983_STATUS_START(true) | cmd); + if (ret) + return ret; + + time = wait_for_completion_timeout(&st->completion, + msecs_to_jiffies(wait_time)); + if (!time) { + dev_err(&st->spi->dev, "EEPROM command timed out\n"); + return -ETIMEDOUT; + } + + ret = regmap_read(st->regmap, status_reg, &val); + if (ret) + return ret; + + if (val & status_fail_mask) { + dev_err(&st->spi->dev, "EEPROM command failed: 0x%02X\n", val); + return -EINVAL; + } + + return 0; +} + static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio) { u32 iio_chan_t = 0, iio_chan_v = 0, chan, iio_idx = 0, status; @@ -1396,6 +1515,15 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio) if (ret) return ret; + if (st->info->has_eeprom && !assign_iio) { + ret = ltc2983_eeprom_cmd(st, LTC2983_EEPROM_READ_CMD, + LTC2983_EEPROM_READ_TIME_MS, + LTC2983_EEPROM_READ_STATUS_REG, + LTC2983_EEPROM_READ_FAILURE_MASK); + if (!ret) + return 0; + } + for (chan = 0; chan < st->num_channels; chan++) { u32 chan_type = 0, *iio_chan; @@ -1435,9 +1563,13 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio) static const struct regmap_range ltc2983_reg_ranges[] = { regmap_reg_range(LTC2983_STATUS_REG, LTC2983_STATUS_REG), regmap_reg_range(LTC2983_TEMP_RES_START_REG, LTC2983_TEMP_RES_END_REG), + regmap_reg_range(LTC2983_EEPROM_KEY_REG, LTC2983_EEPROM_KEY_REG), + regmap_reg_range(LTC2983_EEPROM_READ_STATUS_REG, + LTC2983_EEPROM_READ_STATUS_REG), regmap_reg_range(LTC2983_GLOBAL_CONFIG_REG, LTC2983_GLOBAL_CONFIG_REG), regmap_reg_range(LTC2983_MULT_CHANNEL_START_REG, LTC2983_MULT_CHANNEL_END_REG), + regmap_reg_range(LTC2986_EEPROM_STATUS_REG, LTC2986_EEPROM_STATUS_REG), regmap_reg_range(LTC2983_MUX_CONFIG_REG, LTC2983_MUX_CONFIG_REG), regmap_reg_range(LTC2983_CHAN_ASSIGN_START_REG, LTC2983_CHAN_ASSIGN_END_REG), @@ -1482,6 +1614,12 @@ static int ltc2983_probe(struct spi_device *spi) st = iio_priv(indio_dev); + st->info = device_get_match_data(&spi->dev); + if (!st->info) + st->info = (void *)spi_get_device_id(spi)->driver_data; + if (!st->info) + return -ENODEV; + st->regmap = devm_regmap_init_spi(spi, <c2983_regmap_config); if (IS_ERR(st->regmap)) { dev_err(&spi->dev, "Failed to initialize regmap\n"); @@ -1491,6 +1629,7 @@ static int ltc2983_probe(struct spi_device *spi) mutex_init(&st->lock); init_completion(&st->completion); st->spi = spi; + st->eeprom_key = cpu_to_be32(LTC2983_EEPROM_KEY); spi_set_drvdata(spi, st); ret = ltc2983_parse_dt(st); @@ -1524,6 +1663,15 @@ static int ltc2983_probe(struct spi_device *spi) return ret; } + if (st->info->has_eeprom) { + ret = ltc2983_eeprom_cmd(st, LTC2983_EEPROM_WRITE_CMD, + LTC2983_EEPROM_WRITE_TIME_MS, + LTC2986_EEPROM_STATUS_REG, + LTC2983_EEPROM_STATUS_FAILURE_MASK); + if (ret) + return ret; + } + indio_dev->name = name; indio_dev->num_channels = st->iio_channels; indio_dev->channels = st->iio_chan; @@ -1554,14 +1702,35 @@ static int ltc2983_suspend(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(ltc2983_pm_ops, ltc2983_suspend, ltc2983_resume); +static const struct ltc2983_chip_info ltc2983_chip_info_data = { + .max_channels_nr = 20, +}; + +static const struct ltc2983_chip_info ltc2984_chip_info_data = { + .max_channels_nr = 20, + .has_eeprom = true, +}; + +static const struct ltc2983_chip_info ltc2986_chip_info_data = { + .max_channels_nr = 10, + .has_temp = true, + .has_eeprom = true, +}; + static const struct spi_device_id ltc2983_id_table[] = { - { "ltc2983" }, + { "ltc2983", (kernel_ulong_t)<c2983_chip_info_data }, + { "ltc2984", (kernel_ulong_t)<c2984_chip_info_data }, + { "ltc2986", (kernel_ulong_t)<c2986_chip_info_data }, + { "ltm2985", (kernel_ulong_t)<c2986_chip_info_data }, {}, }; MODULE_DEVICE_TABLE(spi, ltc2983_id_table); static const struct of_device_id ltc2983_of_match[] = { - { .compatible = "adi,ltc2983" }, + { .compatible = "adi,ltc2983", .data = <c2983_chip_info_data }, + { .compatible = "adi,ltc2984", .data = <c2984_chip_info_data }, + { .compatible = "adi,ltc2986", .data = <c2986_chip_info_data }, + { .compatible = "adi,ltm2985", .data = <c2986_chip_info_data }, {}, }; MODULE_DEVICE_TABLE(of, ltc2983_of_match); From bcf22afd2ce0b8bd4fad50bfda92e8cbbc483c72 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 5 Nov 2022 12:51:07 +0000 Subject: [PATCH 2364/4122] iio: temperature: mlx90632: Add error handling for devm_pm_runtime_enable() This call can fail so handling is necessary even if it is very unlikely. Reported-by: coverity-bot Addresses-Coverity-ID: 1527134 ("Error handling issues") Fixes: 2aebc223fc7c ("iio: temperature: mlx90632 Add runtime powermanagement modes") Signed-off-by: Jonathan Cameron Acked-off-by: Crt Mori Link: https://lore.kernel.org/r/20221105125108.383193-2-jic23@kernel.org --- drivers/iio/temperature/mlx90632.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index a17fe5f4967a..7572ae3f8432 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -1267,7 +1267,10 @@ static int mlx90632_probe(struct i2c_client *client, pm_runtime_get_noresume(&client->dev); pm_runtime_set_active(&client->dev); - devm_pm_runtime_enable(&client->dev); + ret = devm_pm_runtime_enable(&client->dev); + if (ret) + return ret; + pm_runtime_set_autosuspend_delay(&client->dev, MLX90632_SLEEP_DELAY_MS); pm_runtime_use_autosuspend(&client->dev); pm_runtime_put_autosuspend(&client->dev); From 99043ba702243f69d1853bd8aeca01e22836ede3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 5 Nov 2022 12:51:08 +0000 Subject: [PATCH 2365/4122] iio: temperature: mlx90632: Add missing static marking on devm_pm_ops Only used within this file, so should be marked static. Fixes: 2aebc223fc7c ("iio: temperature: mlx90632 Add runtime powermanagement modes") Signed-off-by: Jonathan Cameron Acked-off-by: Crt Mori Link: https://lore.kernel.org/r/20221105125108.383193-3-jic23@kernel.org --- drivers/iio/temperature/mlx90632.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 7572ae3f8432..f1f5ebc145b1 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -1326,7 +1326,7 @@ static int mlx90632_pm_runtime_suspend(struct device *dev) return mlx90632_pwr_set_sleep_step(data->regmap); } -const struct dev_pm_ops mlx90632_pm_ops = { +static const struct dev_pm_ops mlx90632_pm_ops = { SYSTEM_SLEEP_PM_OPS(mlx90632_pm_suspend, mlx90632_pm_resume) RUNTIME_PM_OPS(mlx90632_pm_runtime_suspend, NULL, NULL) }; From c486b7019398a7a451631d5d99b67480768bfd25 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Sat, 5 Nov 2022 15:51:57 -0700 Subject: [PATCH 2366/4122] iio: proximity: sx9360: Add a new ACPI hardware ID From https://treexy.com/products/driver-fusion/database/sensors/semtech/sx9360-proximity/ sx9360 SAR sensor can be presented with ACPI ID SAMM0208. Whilst this does not appear to be an official ACPI ID, it is in the wild (perhaps due to a typo on the SAM PnP ID). Reported-by: Jordi Torres Signed-off-by: Gwendal Grignou Link: https://lore.kernel.org/r/20221105225157.10081-1-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9360.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/proximity/sx9360.c b/drivers/iio/proximity/sx9360.c index 7fa2213d23ba..6e19d22e6a01 100644 --- a/drivers/iio/proximity/sx9360.c +++ b/drivers/iio/proximity/sx9360.c @@ -865,6 +865,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(sx9360_pm_ops, sx9360_suspend, sx9360_resume); static const struct acpi_device_id sx9360_acpi_match[] = { { "STH9360", SX9360_WHOAMI_VALUE }, + { "SAMM0208", SX9360_WHOAMI_VALUE }, { } }; MODULE_DEVICE_TABLE(acpi, sx9360_acpi_match); From 46975081f75105de5a6bda05af90323a4894d2be Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 6 Nov 2022 15:36:53 +0100 Subject: [PATCH 2367/4122] iio: imu: st_lsm6dsx: add support to LSM6DSV16X Add support to STM LSM6DSV16X (accelerometer and gyroscope) Mems sensor. The LSM6DSV16X sensor can use LSM6DSV as fallback device since it implements all the LSM6DSV16X features currently implemented in st_lsm6dsx. Datasheet: https://www.st.com/resource/en/datasheet/lsm6dsv16x.pdf Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/284b251f861dff30c399e5736a843c8e3a497249.1667745215.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Kconfig | 3 ++- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 ++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 6 +++++- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 5 +++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c | 5 +++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig index 1c68bac94bce..37d02e0fc227 100644 --- a/drivers/iio/imu/st_lsm6dsx/Kconfig +++ b/drivers/iio/imu/st_lsm6dsx/Kconfig @@ -13,7 +13,8 @@ config IIO_ST_LSM6DSX sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm, ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr, lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx, - lsm6dsv, the accelerometer/gyroscope of lsm9ds1 and lsm6dst. + lsm6dsv, lsm6dsv16x, the accelerometer/gyroscope of lsm9ds1 + and lsm6dst. To compile this driver as a module, choose M here: the module will be called st_lsm6dsx. diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index ab61895cf072..683cfadcf62e 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -34,6 +34,7 @@ #define ST_ASM330LHHX_DEV_NAME "asm330lhhx" #define ST_LSM6DSTX_DEV_NAME "lsm6dstx" #define ST_LSM6DSV_DEV_NAME "lsm6dsv" +#define ST_LSM6DSV16X_DEV_NAME "lsm6dsv16x" enum st_lsm6dsx_hw_id { ST_LSM6DS3_ID, @@ -55,6 +56,7 @@ enum st_lsm6dsx_hw_id { ST_ASM330LHHX_ID, ST_LSM6DSTX_ID, ST_LSM6DSV_ID, + ST_LSM6DSV16X_ID, ST_LSM6DSX_MAX_ID, }; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 5e716a5071fd..acb6101aec5d 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -27,7 +27,7 @@ * - FIFO size: 4KB * * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/ - * LSM6DSTX: + * LSM6DSTX/LSM6DSV/LSM6DSV16X: * - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416, * 833 * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 @@ -1180,6 +1180,10 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .hw_id = ST_LSM6DSV_ID, .name = ST_LSM6DSV_DEV_NAME, .wai = 0x70, + }, { + .hw_id = ST_LSM6DSV16X_ID, + .name = ST_LSM6DSV16X_DEV_NAME, + .wai = 0x70, }, }, .channels = { diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c index 239c8920a31f..0faf1b4c11af 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c @@ -113,6 +113,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = { .compatible = "st,lsm6dsv", .data = (void *)ST_LSM6DSV_ID, }, + { + .compatible = "st,lsm6dsv16x", + .data = (void *)ST_LSM6DSV16X_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match); @@ -137,6 +141,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = { { ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID }, { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, + { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, {}, }; MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c index 66705ef16ed0..57597aaa2a92 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c @@ -113,6 +113,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = { .compatible = "st,lsm6dsv", .data = (void *)ST_LSM6DSV_ID, }, + { + .compatible = "st,lsm6dsv16x", + .data = (void *)ST_LSM6DSV16X_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match); @@ -137,6 +141,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = { { ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID }, { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, + { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, {}, }; MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table); From 03e8373c070daf14eed069783e3cf343470ca5cd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 6 Nov 2022 15:36:54 +0100 Subject: [PATCH 2368/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dsv16x Add device bindings for lsm6dsv16x IMU sensor. Use lsm6dsv as fallback device for lsm6dsv16x since it implements all the features currently supported by lsm6dsv16x. Signed-off-by: Lorenzo Bianconi Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/8d10a63ec6abd22863ab25addd8c2f578dbc9cd9.1667745215.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml index e7349a3275dd..5933270799f5 100644 --- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml +++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml @@ -39,6 +39,9 @@ properties: - items: - const: st,lsm6dstx - const: st,lsm6dst + - items: + - const: st,lsm6dsv16x + - const: st,lsm6dsv reg: maxItems: 1 From 84aea36299a411f46d4372dc2d51c037ae70341a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 11 Nov 2022 15:28:46 -0600 Subject: [PATCH 2369/4122] dt-bindings: iio: dac: adi,ad5758: Drop 'contains' from 'adi,dc-dc-mode' 'contains' applies to arrays, but 'adi,dc-dc-mode' is a scalar. So drop 'contains' from the 'if' schema. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20221111212846.4104059-1-robh@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml index e49e7556175d..4e508bfcc9d8 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml @@ -102,8 +102,7 @@ allOf: - if: properties: adi,dc-dc-mode: - contains: - enum: [1, 3] + enum: [1, 3] then: properties: adi,range-microvolt: false From 6809ec97686f00b667468f7e8e582a6e3d50169b Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Tue, 8 Nov 2022 09:43:53 +0530 Subject: [PATCH 2370/4122] dt-bindings: iio: adc: rockchip-saradc: Add saradc for rv1126 Add saradc compatible string for rockchip rv1126. Cc: linux-iio@vger.kernel.org Cc: Jonathan Cameron Signed-off-by: Jagan Teki Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221108041400.157052-4-jagan@edgeble.ai Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml index e512a14e41b4..da50b529c157 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml @@ -22,6 +22,7 @@ properties: - rockchip,rk3328-saradc - rockchip,rk3568-saradc - rockchip,rv1108-saradc + - rockchip,rv1126-saradc - const: rockchip,rk3399-saradc reg: From c896b9f0920fdcb9a815014ca1917522ba2c9b9a Mon Sep 17 00:00:00 2001 From: Jay Greco Date: Thu, 10 Nov 2022 19:29:32 +0000 Subject: [PATCH 2371/4122] iio: imu: inv_icm42600: Add support for icm42631 Add the required WHOAMI and device_id definitions to support the icm42631. Signed-off-by: Jay Greco Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20221110192933.13616-2-grecojay@amazon.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600.h | 2 ++ drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 5 +++++ drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c | 3 +++ drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c | 3 +++ 4 files changed, 13 insertions(+) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index 3d91469beccb..0e290c807b0f 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -22,6 +22,7 @@ enum inv_icm42600_chip { INV_CHIP_ICM42602, INV_CHIP_ICM42605, INV_CHIP_ICM42622, + INV_CHIP_ICM42631, INV_CHIP_NB, }; @@ -303,6 +304,7 @@ struct inv_icm42600_state { #define INV_ICM42600_WHOAMI_ICM42602 0x41 #define INV_ICM42600_WHOAMI_ICM42605 0x42 #define INV_ICM42600_WHOAMI_ICM42622 0x46 +#define INV_ICM42600_WHOAMI_ICM42631 0x5C /* User bank 1 (MSB 0x10) */ #define INV_ICM42600_REG_SENSOR_CONFIG0 0x1003 diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index b63c5dab1a56..7b3a2a0dc2cb 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -87,6 +87,11 @@ static const struct inv_icm42600_hw inv_icm42600_hw[INV_CHIP_NB] = { .name = "icm42622", .conf = &inv_icm42600_default_conf, }, + [INV_CHIP_ICM42631] = { + .whoami = INV_ICM42600_WHOAMI_ICM42631, + .name = "icm42631", + .conf = &inv_icm42600_default_conf, + }, }; const struct iio_mount_matrix * diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c index 4f96989ddf4a..eb2681ad375f 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c @@ -84,6 +84,9 @@ static const struct of_device_id inv_icm42600_of_matches[] = { }, { .compatible = "invensense,icm42622", .data = (void *)INV_CHIP_ICM42622, + }, { + .compatible = "invensense,icm42631", + .data = (void *)INV_CHIP_ICM42631, }, {} }; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c index 486b46e53113..6be4ac794937 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c @@ -80,6 +80,9 @@ static const struct of_device_id inv_icm42600_of_matches[] = { }, { .compatible = "invensense,icm42622", .data = (void *)INV_CHIP_ICM42622, + }, { + .compatible = "invensense,icm42631", + .data = (void *)INV_CHIP_ICM42631, }, {} }; From 7b4452381a966de33707718aa8d79672cc8caa2f Mon Sep 17 00:00:00 2001 From: Jay Greco Date: Thu, 10 Nov 2022 19:29:33 +0000 Subject: [PATCH 2372/4122] dt-bindings: iio: imu: Add inv_icm42600 documentation Update the required documentation for the icm42631. Signed-off-by: Jay Greco Acked-by: Krzysztof Kozlowski Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20221110192933.13616-3-grecojay@amazon.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/imu/invensense,icm42600.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml b/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml index 488349755c99..13c9abdd3131 100644 --- a/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml +++ b/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml @@ -31,6 +31,7 @@ properties: - invensense,icm42602 - invensense,icm42605 - invensense,icm42622 + - invensense,icm42631 reg: maxItems: 1 From 5ae34494cf37ae6383f4bd674f343ef20833024d Mon Sep 17 00:00:00 2001 From: Subhajit Ghosh Date: Thu, 10 Nov 2022 18:12:41 +0800 Subject: [PATCH 2373/4122] iio: light: apds9960: Fix iio_event_spec structures There is only one interrupt enable option for both ALS low and high thresholds, and one for both Proximity low and high thresholds. Signed-off-by: Subhajit Ghosh Reviewed-by: Matt Ranostay Link: https://lore.kernel.org/r/20221110101241.10576-1-subhajit.ghosh@vixtechnology.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9960.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b62c139baf41..997aa01ecc11 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -223,14 +223,16 @@ static const struct iio_event_spec apds9960_pxs_event_spec[] = { { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_RISING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), + .mask_separate = BIT(IIO_EV_INFO_VALUE), }, { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_FALLING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), + .mask_separate = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .mask_separate = BIT(IIO_EV_INFO_ENABLE), }, }; @@ -238,14 +240,16 @@ static const struct iio_event_spec apds9960_als_event_spec[] = { { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_RISING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), + .mask_separate = BIT(IIO_EV_INFO_VALUE), }, { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_FALLING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), + .mask_separate = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .mask_separate = BIT(IIO_EV_INFO_ENABLE), }, }; From 572cc583c92e7b113a0d0f650b02d80505920eef Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Sun, 13 Nov 2022 10:58:20 +0530 Subject: [PATCH 2374/4122] staging: iio: meter: replace ternary operator by if condition Replace ternary operator by simple if based evaluation of the return value. Issue identified using coccicheck. Signed-off-by: Deepak R Varma Link: https://lore.kernel.org/r/Y3CAdCa17WdWDYUa@qemulion Signed-off-by: Jonathan Cameron --- drivers/staging/iio/meter/ade7854-i2c.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c index a9a06e8dda51..71b67dd3c8e9 100644 --- a/drivers/staging/iio/meter/ade7854-i2c.c +++ b/drivers/staging/iio/meter/ade7854-i2c.c @@ -61,7 +61,10 @@ static int ade7854_i2c_write_reg(struct device *dev, unlock: mutex_unlock(&st->buf_lock); - return ret < 0 ? ret : 0; + if (ret < 0) + return ret; + + return 0; } static int ade7854_i2c_read_reg(struct device *dev, From 10c4539d1d5581646997d7f9b430319bcb9245b1 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Mon, 14 Nov 2022 15:36:49 +0200 Subject: [PATCH 2375/4122] iio: adc: ad4130: depend on GPIOLIB Fixes undefined references to 'gpiochip_get_data' and 'devm_gpiochip_add_data_with_key'. Signed-off-by: Cosmin Tanislav Reported-by: kernel test robot Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Link: https://lore.kernel.org/r/20221114133649.1737027-1-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 8d719fbb6acc..63f80d747cbd 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -24,6 +24,7 @@ config AD_SIGMA_DELTA config AD4130 tristate "Analog Device AD4130 ADC Driver" depends on SPI + depends on GPIOLIB select IIO_BUFFER select IIO_KFIFO_BUF select REGMAP_SPI From d94fbd9231a2b4062cf5972629999402ea7dd6fa Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 14 Nov 2022 10:25:34 +0100 Subject: [PATCH 2376/4122] iio: imu: st_lsm6dsx: fix LSM6DSV sensor description Fix sensor ODR description for LSM6DSV/LSM6DSVX Fixes: a1c6d631ff12 ("iio: imu: st_lsm6dsx: add support to LSM6DSV") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/653e14bf79bb88d8581d2bc42da2f784caaf3776.1668417770.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index acb6101aec5d..b680682f9833 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -27,13 +27,20 @@ * - FIFO size: 4KB * * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/ - * LSM6DSTX/LSM6DSV/LSM6DSV16X: + * LSM6DSTX: * - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416, * 833 * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 * - Gyroscope supported full-scale [dps]: +-125/+-245/+-500/+-1000/+-2000 * - FIFO size: 3KB * + * - LSM6DSV/LSM6DSV16X: + * - Accelerometer/Gyroscope supported ODR [Hz]: 7.5, 15, 30, 60, 120, 240, + * 480, 960 + * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 + * - Gyroscope supported full-scale [dps]: +-125/+-250/+-500/+-1000/+-2000 + * - FIFO size: 3KB + * * - LSM9DS1/LSM6DS0: * - Accelerometer supported ODR [Hz]: 10, 50, 119, 238, 476, 952 * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 From 2c5c45e236c4706a261042f67766b0486c72d603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:23 +0100 Subject: [PATCH 2377/4122] iio: accel: adxl372_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-50-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl372_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/adxl372_i2c.c b/drivers/iio/accel/adxl372_i2c.c index 4efb70a5fe40..e5f310ea65ff 100644 --- a/drivers/iio/accel/adxl372_i2c.c +++ b/drivers/iio/accel/adxl372_i2c.c @@ -18,9 +18,9 @@ static const struct regmap_config adxl372_regmap_config = { .readable_noinc_reg = adxl372_readable_noinc_reg, }; -static int adxl372_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adxl372_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; unsigned int regval; int ret; @@ -58,7 +58,7 @@ static struct i2c_driver adxl372_i2c_driver = { .name = "adxl372_i2c", .of_match_table = adxl372_of_match, }, - .probe = adxl372_i2c_probe, + .probe_new = adxl372_i2c_probe, .id_table = adxl372_i2c_id, }; From c476246bd5abb1f6d40054d83fea3f1a407ccb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:24 +0100 Subject: [PATCH 2378/4122] iio: accel: bma180: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-51-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma180.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index d03fc3400f94..eb697eeb4301 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -921,9 +921,9 @@ static const struct iio_trigger_ops bma180_trigger_ops = { .reenable = bma180_trig_reen, }; -static int bma180_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bma180_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct bma180_data *data; struct iio_dev *indio_dev; @@ -1134,7 +1134,7 @@ static struct i2c_driver bma180_driver = { .pm = pm_sleep_ptr(&bma180_pm_ops), .of_match_table = bma180_of_match, }, - .probe = bma180_probe, + .probe_new = bma180_probe, .remove = bma180_remove, .id_table = bma180_ids, }; From e599500f96fb51417320b8b51ae4c36868672b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:25 +0100 Subject: [PATCH 2379/4122] iio: accel: bma400: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-52-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma400_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/bma400_i2c.c b/drivers/iio/accel/bma400_i2c.c index 1ba2a982ea73..688b06dae669 100644 --- a/drivers/iio/accel/bma400_i2c.c +++ b/drivers/iio/accel/bma400_i2c.c @@ -13,9 +13,9 @@ #include "bma400.h" -static int bma400_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bma400_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; regmap = devm_regmap_init_i2c(client, &bma400_regmap_config); @@ -44,7 +44,7 @@ static struct i2c_driver bma400_i2c_driver = { .name = "bma400", .of_match_table = bma400_of_i2c_match, }, - .probe = bma400_i2c_probe, + .probe_new = bma400_i2c_probe, .id_table = bma400_i2c_ids, }; From a141d225f6d62809d2b6a6c916929a8bbaaaaad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:26 +0100 Subject: [PATCH 2380/4122] iio: accel: bmc150: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-53-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-i2c.c b/drivers/iio/accel/bmc150-accel-i2c.c index be8cc598b88e..509cab2bd694 100644 --- a/drivers/iio/accel/bmc150-accel-i2c.c +++ b/drivers/iio/accel/bmc150-accel-i2c.c @@ -171,9 +171,9 @@ static void bmc150_acpi_dual_accel_probe(struct i2c_client *client) {} static void bmc150_acpi_dual_accel_remove(struct i2c_client *client) {} #endif -static int bmc150_accel_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bmc150_accel_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name = NULL; enum bmc150_type type = BOSCH_UNKNOWN; @@ -269,7 +269,7 @@ static struct i2c_driver bmc150_accel_driver = { .acpi_match_table = ACPI_PTR(bmc150_accel_acpi_match), .pm = &bmc150_accel_pm_ops, }, - .probe = bmc150_accel_probe, + .probe_new = bmc150_accel_probe, .remove = bmc150_accel_remove, .id_table = bmc150_accel_id, }; From a8fab44897dd959c01a88a806fd4252a29c62055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:27 +0100 Subject: [PATCH 2381/4122] iio: accel: da280: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-54-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/da280.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/da280.c b/drivers/iio/accel/da280.c index 04e9c5678964..38a7d811610e 100644 --- a/drivers/iio/accel/da280.c +++ b/drivers/iio/accel/da280.c @@ -105,9 +105,9 @@ static void da280_disable(void *client) da280_enable(client, false); } -static int da280_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int da280_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; struct iio_dev *indio_dev; struct da280_data *data; @@ -184,7 +184,7 @@ static struct i2c_driver da280_driver = { .acpi_match_table = ACPI_PTR(da280_acpi_match), .pm = pm_sleep_ptr(&da280_pm_ops), }, - .probe = da280_probe, + .probe_new = da280_probe, .id_table = da280_i2c_id, }; From 09cec0835243b17316995bce30532f5ad51db355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:28 +0100 Subject: [PATCH 2382/4122] iio: accel: kxcjk-1013: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-55-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index e626b6fa8a36..98da4bda22df 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1424,9 +1424,9 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, return dev_name(dev); } -static int kxcjk1013_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int kxcjk1013_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); static const char * const regulator_names[] = { "vdd", "vddio" }; struct kxcjk1013_data *data; struct iio_dev *indio_dev; @@ -1732,7 +1732,7 @@ static struct i2c_driver kxcjk1013_driver = { .of_match_table = kxcjk1013_of_match, .pm = &kxcjk1013_pm_ops, }, - .probe = kxcjk1013_probe, + .probe_new = kxcjk1013_probe, .remove = kxcjk1013_remove, .id_table = kxcjk1013_id, }; From 17b9a72f4a11c64a12d7cd4a7951ba44c5f2f374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:29 +0100 Subject: [PATCH 2383/4122] iio: accel: mma7455_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-56-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma7455_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma7455_i2c.c b/drivers/iio/accel/mma7455_i2c.c index c63b321b01cd..a3864dbe2761 100644 --- a/drivers/iio/accel/mma7455_i2c.c +++ b/drivers/iio/accel/mma7455_i2c.c @@ -10,9 +10,9 @@ #include "mma7455.h" -static int mma7455_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int mma7455_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); struct regmap *regmap; const char *name = NULL; @@ -46,7 +46,7 @@ static const struct of_device_id mma7455_of_match[] = { MODULE_DEVICE_TABLE(of, mma7455_of_match); static struct i2c_driver mma7455_i2c_driver = { - .probe = mma7455_i2c_probe, + .probe_new = mma7455_i2c_probe, .remove = mma7455_i2c_remove, .id_table = mma7455_i2c_ids, .driver = { From 12491d35551df69709777bd7769e1e33641943cc Mon Sep 17 00:00:00 2001 From: Angel Iglesias Date: Sun, 13 Nov 2022 18:54:46 +0100 Subject: [PATCH 2384/4122] iio: pressure: bmp280: convert to i2c's .probe_new() Use i2c_client_get_device_id() to get the i2c_device_id* parameter in the .new_probe() callback. Signed-off-by: Angel Iglesias Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/5dcaa389ea2ffe7050091b07a3bc4b0c1c9d586b.1668361368.git.ang.iglesiasg@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/bmp280-i2c.c b/drivers/iio/pressure/bmp280-i2c.c index 0c27211f3ea0..14eab086d24a 100644 --- a/drivers/iio/pressure/bmp280-i2c.c +++ b/drivers/iio/pressure/bmp280-i2c.c @@ -5,11 +5,11 @@ #include "bmp280.h" -static int bmp280_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bmp280_i2c_probe(struct i2c_client *client) { struct regmap *regmap; const struct regmap_config *regmap_config; + const struct i2c_device_id *id = i2c_client_get_device_id(client); switch (id->driver_data) { case BMP180_CHIP_ID: @@ -65,7 +65,7 @@ static struct i2c_driver bmp280_i2c_driver = { .of_match_table = bmp280_of_i2c_match, .pm = pm_ptr(&bmp280_dev_pm_ops), }, - .probe = bmp280_i2c_probe, + .probe_new = bmp280_i2c_probe, .id_table = bmp280_i2c_id, }; module_i2c_driver(bmp280_i2c_driver); From 226d6dbac88336f37b1fae76aabe6fa610e9b872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:30 +0100 Subject: [PATCH 2385/4122] iio: accel: mma8452: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-57-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 3ba28c2ff68a..f97fb68e3a71 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1545,9 +1545,9 @@ static const struct of_device_id mma8452_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mma8452_dt_ids); -static int mma8452_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mma8452_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mma8452_data *data; struct iio_dev *indio_dev; int ret; @@ -1846,7 +1846,7 @@ static struct i2c_driver mma8452_driver = { .of_match_table = mma8452_dt_ids, .pm = &mma8452_pm_ops, }, - .probe = mma8452_probe, + .probe_new = mma8452_probe, .remove = mma8452_remove, .id_table = mma8452_id, }; From 6dae5d11e29d2935e7995d05468d72660dde5f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:31 +0100 Subject: [PATCH 2386/4122] iio: accel: mma9551: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-58-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma9551.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c index f7a793f4a8e3..aa4f5842859e 100644 --- a/drivers/iio/accel/mma9551.c +++ b/drivers/iio/accel/mma9551.c @@ -446,9 +446,9 @@ static const char *mma9551_match_acpi_device(struct device *dev) return dev_name(dev); } -static int mma9551_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mma9551_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mma9551_data *data; struct iio_dev *indio_dev; const char *name = NULL; @@ -607,7 +607,7 @@ static struct i2c_driver mma9551_driver = { .acpi_match_table = ACPI_PTR(mma9551_acpi_match), .pm = pm_ptr(&mma9551_pm_ops), }, - .probe = mma9551_probe, + .probe_new = mma9551_probe, .remove = mma9551_remove, .id_table = mma9551_id, }; From 50434e4605d66a55e7422cb24804c88b57cfb2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:32 +0100 Subject: [PATCH 2387/4122] iio: accel: mma9553: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-59-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma9553.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c index 2da0e005b13e..0af578ef9d3d 100644 --- a/drivers/iio/accel/mma9553.c +++ b/drivers/iio/accel/mma9553.c @@ -1073,9 +1073,9 @@ static const char *mma9553_match_acpi_device(struct device *dev) return dev_name(dev); } -static int mma9553_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mma9553_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mma9553_data *data; struct iio_dev *indio_dev; const char *name = NULL; @@ -1246,7 +1246,7 @@ static struct i2c_driver mma9553_driver = { .acpi_match_table = ACPI_PTR(mma9553_acpi_match), .pm = pm_ptr(&mma9553_pm_ops), }, - .probe = mma9553_probe, + .probe_new = mma9553_probe, .remove = mma9553_remove, .id_table = mma9553_id, }; From fe44f0738dd142a173e1b62e13b59d134abca003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:33 +0100 Subject: [PATCH 2388/4122] iio: adc: ad7091r5: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-60-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7091r5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7091r5.c b/drivers/iio/adc/ad7091r5.c index 47f5763023a4..7d6709da1005 100644 --- a/drivers/iio/adc/ad7091r5.c +++ b/drivers/iio/adc/ad7091r5.c @@ -69,9 +69,9 @@ static const struct ad7091r_chip_info ad7091r5_chip_info_noirq = { .vref_mV = 2500, }; -static int ad7091r5_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad7091r5_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); const struct ad7091r_chip_info *chip_info; struct regmap *map = devm_regmap_init_i2c(i2c, &ad7091r_regmap_config); @@ -103,7 +103,7 @@ static struct i2c_driver ad7091r5_driver = { .name = "ad7091r5", .of_match_table = ad7091r5_dt_ids, }, - .probe = ad7091r5_i2c_probe, + .probe_new = ad7091r5_i2c_probe, .id_table = ad7091r5_i2c_ids, }; module_i2c_driver(ad7091r5_driver); From 3ff5dd78f6b7e5aa73ddbc8f609f5ba53d1d49bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:34 +0100 Subject: [PATCH 2389/4122] iio: adc: ad7291: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-61-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7291.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7291.c b/drivers/iio/adc/ad7291.c index e9129dac762f..3dd0105f63d7 100644 --- a/drivers/iio/adc/ad7291.c +++ b/drivers/iio/adc/ad7291.c @@ -465,9 +465,9 @@ static void ad7291_reg_disable(void *reg) regulator_disable(reg); } -static int ad7291_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad7291_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ad7291_chip_info *chip; struct iio_dev *indio_dev; int ret; @@ -553,7 +553,7 @@ static struct i2c_driver ad7291_driver = { .name = KBUILD_MODNAME, .of_match_table = ad7291_of_match, }, - .probe = ad7291_probe, + .probe_new = ad7291_probe, .id_table = ad7291_id, }; module_i2c_driver(ad7291_driver); From 28ae41885ab2bec935a61a9c811713df6e70043e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:35 +0100 Subject: [PATCH 2390/4122] iio: adc: ad799x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-62-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad799x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index 4730d8d0f4c3..8f0a3a35e727 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -775,9 +775,9 @@ static const struct ad799x_chip_info ad799x_chip_info_tbl[] = { }, }; -static int ad799x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad799x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; int extra_config = 0; struct ad799x_state *st; @@ -968,7 +968,7 @@ static struct i2c_driver ad799x_driver = { .name = "ad799x", .pm = pm_sleep_ptr(&ad799x_pm_ops), }, - .probe = ad799x_probe, + .probe_new = ad799x_probe, .remove = ad799x_remove, .id_table = ad799x_id, }; From 203a5e83dd3f7ea4d0d827563a317bd656d2f579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:36 +0100 Subject: [PATCH 2391/4122] iio: adc: ina2xx-adc: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-63-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ina2xx-adc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 910e7e965fc4..38d9d7b2313e 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -946,9 +946,9 @@ static int ina2xx_init(struct ina2xx_chip_info *chip, unsigned int config) return ina2xx_set_calibration(chip); } -static int ina2xx_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ina2xx_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ina2xx_chip_info *chip; struct iio_dev *indio_dev; unsigned int val; @@ -1090,7 +1090,7 @@ static struct i2c_driver ina2xx_driver = { .name = KBUILD_MODNAME, .of_match_table = ina2xx_of_match, }, - .probe = ina2xx_probe, + .probe_new = ina2xx_probe, .remove = ina2xx_remove, .id_table = ina2xx_id, }; From 7b8c4fa43583ea1a7e60658ad23302608fe19ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:37 +0100 Subject: [PATCH 2392/4122] iio: adc: ltc2471: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-64-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ltc2471.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ltc2471.c b/drivers/iio/adc/ltc2471.c index 0e0fe881a8e6..eeb2945829eb 100644 --- a/drivers/iio/adc/ltc2471.c +++ b/drivers/iio/adc/ltc2471.c @@ -99,9 +99,9 @@ static const struct iio_info ltc2471_info = { .read_raw = ltc2471_read_raw, }; -static int ltc2471_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltc2471_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct ltc2471_data *data; int ret; @@ -146,7 +146,7 @@ static struct i2c_driver ltc2471_i2c_driver = { .driver = { .name = "ltc2471", }, - .probe = ltc2471_i2c_probe, + .probe_new = ltc2471_i2c_probe, .id_table = ltc2471_i2c_id, }; From 11e67cc9675f1289556e1524fa7717cc95282487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:38 +0100 Subject: [PATCH 2393/4122] iio: adc: ltc2485: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-65-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ltc2485.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ltc2485.c b/drivers/iio/adc/ltc2485.c index 37c762f8218c..6a23427344ec 100644 --- a/drivers/iio/adc/ltc2485.c +++ b/drivers/iio/adc/ltc2485.c @@ -89,9 +89,9 @@ static const struct iio_info ltc2485_info = { .read_raw = ltc2485_read_raw, }; -static int ltc2485_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltc2485_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct ltc2485_data *data; int ret; @@ -133,7 +133,7 @@ static struct i2c_driver ltc2485_driver = { .driver = { .name = "ltc2485", }, - .probe = ltc2485_probe, + .probe_new = ltc2485_probe, .id_table = ltc2485_id, }; module_i2c_driver(ltc2485_driver); From 3a79844428c39fa54a30ec11eaad5a8e577ef706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:39 +0100 Subject: [PATCH 2394/4122] iio: adc: ltc2497: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-66-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ltc2497.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c index 556f10dfb502..17370c5eb6fe 100644 --- a/drivers/iio/adc/ltc2497.c +++ b/drivers/iio/adc/ltc2497.c @@ -94,9 +94,9 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata, return ret; } -static int ltc2497_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltc2497_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const struct ltc2497_chip_info *chip_info; struct iio_dev *indio_dev; struct ltc2497_driverdata *st; @@ -165,7 +165,7 @@ static struct i2c_driver ltc2497_driver = { .name = "ltc2497", .of_match_table = ltc2497_of_match, }, - .probe = ltc2497_probe, + .probe_new = ltc2497_probe, .remove = ltc2497_remove, .id_table = ltc2497_id, }; From a69e45a411ea3ae0bbec474e4303c4281f3f8376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:40 +0100 Subject: [PATCH 2395/4122] iio: adc: max1363: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-67-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1363.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c index 42d3479cefb7..73b783b430d7 100644 --- a/drivers/iio/adc/max1363.c +++ b/drivers/iio/adc/max1363.c @@ -1579,9 +1579,9 @@ static void max1363_reg_disable(void *reg) regulator_disable(reg); } -static int max1363_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max1363_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; struct max1363_state *st; struct iio_dev *indio_dev; @@ -1718,7 +1718,7 @@ static struct i2c_driver max1363_driver = { .name = "max1363", .of_match_table = max1363_of_match, }, - .probe = max1363_probe, + .probe_new = max1363_probe, .id_table = max1363_id, }; module_i2c_driver(max1363_driver); From d59ecbc48a1c74cfe6bc2d4ff503fb3e7455d19f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:41 +0100 Subject: [PATCH 2396/4122] iio: adc: max9611: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Acked-by: Jacopo Mondi Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-68-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max9611.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index f982f00303dc..cb7f4785423a 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -510,8 +510,7 @@ static const struct of_device_id max9611_of_table[] = { }; MODULE_DEVICE_TABLE(of, max9611_of_table); -static int max9611_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max9611_probe(struct i2c_client *client) { const char * const shunt_res_prop = "shunt-resistor-micro-ohms"; struct max9611_dev *max9611; @@ -557,7 +556,7 @@ static struct i2c_driver max9611_driver = { .name = DRIVER_NAME, .of_match_table = max9611_of_table, }, - .probe = max9611_probe, + .probe_new = max9611_probe, }; module_i2c_driver(max9611_driver); From 6168215dbfea1c1d42a7aff145efbbcbdb5d6051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:42 +0100 Subject: [PATCH 2397/4122] iio: adc: mcp3422: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-69-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3422.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c index da353dcb1e9d..ada844c3f7ec 100644 --- a/drivers/iio/adc/mcp3422.c +++ b/drivers/iio/adc/mcp3422.c @@ -330,9 +330,9 @@ static const struct iio_info mcp3422_info = { .attrs = &mcp3422_attribute_group, }; -static int mcp3422_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mcp3422_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct mcp3422 *adc; int err; @@ -417,7 +417,7 @@ static struct i2c_driver mcp3422_driver = { .name = "mcp3422", .of_match_table = mcp3422_of_match, }, - .probe = mcp3422_probe, + .probe_new = mcp3422_probe, .id_table = mcp3422_id, }; module_i2c_driver(mcp3422_driver); From 4b50867f6a328c13d40223937c34b4146eafd883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:43 +0100 Subject: [PATCH 2398/4122] iio: adc: ti-adc081c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-70-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-adc081c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c index bd48b073e720..c663dc59d459 100644 --- a/drivers/iio/adc/ti-adc081c.c +++ b/drivers/iio/adc/ti-adc081c.c @@ -152,9 +152,9 @@ static void adc081c_reg_disable(void *reg) regulator_disable(reg); } -static int adc081c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adc081c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *iio; struct adc081c *adc; const struct adcxx1c_model *model; @@ -235,7 +235,7 @@ static struct i2c_driver adc081c_driver = { .of_match_table = adc081c_of_match, .acpi_match_table = adc081c_acpi_match, }, - .probe = adc081c_probe, + .probe_new = adc081c_probe, .id_table = adc081c_id, }; module_i2c_driver(adc081c_driver); From 0143ce1039f70e2656865d75c163ed4dbf40db30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:44 +0100 Subject: [PATCH 2399/4122] iio: adc: ti-ads1015: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-71-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads1015.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 8bceba694026..56af5e148802 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -974,9 +974,9 @@ static int ads1015_set_conv_mode(struct ads1015_data *data, int mode) mode << ADS1015_CFG_MOD_SHIFT); } -static int ads1015_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ads1015_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const struct ads1015_chip_data *chip; struct iio_dev *indio_dev; struct ads1015_data *data; @@ -1195,7 +1195,7 @@ static struct i2c_driver ads1015_driver = { .of_match_table = ads1015_of_match, .pm = &ads1015_pm_ops, }, - .probe = ads1015_probe, + .probe_new = ads1015_probe, .remove = ads1015_remove, .id_table = ads1015_id, }; From 7558eaa9727dbf2efd41d29101d1639975cf778e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:45 +0100 Subject: [PATCH 2400/4122] iio: cdc: ad7150: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-72-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/cdc/ad7150.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/cdc/ad7150.c b/drivers/iio/cdc/ad7150.c index 1113745890ca..79aeb0aaea67 100644 --- a/drivers/iio/cdc/ad7150.c +++ b/drivers/iio/cdc/ad7150.c @@ -536,9 +536,9 @@ static const struct iio_info ad7150_info_no_irq = { .read_raw = &ad7150_read_raw, }; -static int ad7150_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad7150_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ad7150_chip_info *chip; struct iio_dev *indio_dev; int ret; @@ -647,7 +647,7 @@ static struct i2c_driver ad7150_driver = { .name = "ad7150", .of_match_table = ad7150_of_match, }, - .probe = ad7150_probe, + .probe_new = ad7150_probe, .id_table = ad7150_id, }; module_i2c_driver(ad7150_driver); From 9b1cd21eafc34beb50194a060e1cd3902b763a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:46 +0100 Subject: [PATCH 2401/4122] iio: cdc: ad7746: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-73-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/cdc/ad7746.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/cdc/ad7746.c b/drivers/iio/cdc/ad7746.c index b266f5328140..6f68651ce1d5 100644 --- a/drivers/iio/cdc/ad7746.c +++ b/drivers/iio/cdc/ad7746.c @@ -717,9 +717,9 @@ static const struct iio_info ad7746_info = { .write_raw = ad7746_write_raw, }; -static int ad7746_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad7746_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct ad7746_chip_info *chip; struct iio_dev *indio_dev; @@ -810,7 +810,7 @@ static struct i2c_driver ad7746_driver = { .name = KBUILD_MODNAME, .of_match_table = ad7746_of_match, }, - .probe = ad7746_probe, + .probe_new = ad7746_probe, .id_table = ad7746_id, }; module_i2c_driver(ad7746_driver); From 89d63224e20c8409afb553d334d38bb2675757ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:47 +0100 Subject: [PATCH 2402/4122] iio: chemical: ams-iaq-core: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-74-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/ams-iaq-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/ams-iaq-core.c b/drivers/iio/chemical/ams-iaq-core.c index 97be3669c554..0a0fbcdd4469 100644 --- a/drivers/iio/chemical/ams-iaq-core.c +++ b/drivers/iio/chemical/ams-iaq-core.c @@ -135,8 +135,7 @@ static const struct iio_info ams_iaqcore_info = { .read_raw = ams_iaqcore_read_raw, }; -static int ams_iaqcore_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ams_iaqcore_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct ams_iaqcore_data *data; @@ -180,7 +179,7 @@ static struct i2c_driver ams_iaqcore_driver = { .name = "ams-iaq-core", .of_match_table = ams_iaqcore_dt_ids, }, - .probe = ams_iaqcore_probe, + .probe_new = ams_iaqcore_probe, .id_table = ams_iaqcore_id, }; module_i2c_driver(ams_iaqcore_driver); From e9c812ca569f1314f0a9c0420cb17c9db0a55457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:48 +0100 Subject: [PATCH 2403/4122] iio: chemical: atlas-ezo-sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-75-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/atlas-ezo-sensor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/atlas-ezo-sensor.c b/drivers/iio/chemical/atlas-ezo-sensor.c index bbcf5a59c1f4..307c3488f4bd 100644 --- a/drivers/iio/chemical/atlas-ezo-sensor.c +++ b/drivers/iio/chemical/atlas-ezo-sensor.c @@ -201,9 +201,9 @@ static const struct of_device_id atlas_ezo_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, atlas_ezo_dt_ids); -static int atlas_ezo_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atlas_ezo_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const struct atlas_ezo_device *chip; struct atlas_ezo_data *data; struct iio_dev *indio_dev; @@ -238,7 +238,7 @@ static struct i2c_driver atlas_ezo_driver = { .name = ATLAS_EZO_DRV_NAME, .of_match_table = atlas_ezo_dt_ids, }, - .probe = atlas_ezo_probe, + .probe_new = atlas_ezo_probe, .id_table = atlas_ezo_id, }; module_i2c_driver(atlas_ezo_driver); From 50c359245069f7e64880fe01b06d40954df71d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:49 +0100 Subject: [PATCH 2404/4122] iio: chemical: atlas-sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-76-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/atlas-sensor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/atlas-sensor.c b/drivers/iio/chemical/atlas-sensor.c index 7cac77a931c7..024657bc59e1 100644 --- a/drivers/iio/chemical/atlas-sensor.c +++ b/drivers/iio/chemical/atlas-sensor.c @@ -608,9 +608,9 @@ static const struct of_device_id atlas_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, atlas_dt_ids); -static int atlas_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atlas_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atlas_data *data; struct atlas_device *chip; struct iio_trigger *trig; @@ -767,7 +767,7 @@ static struct i2c_driver atlas_driver = { .of_match_table = atlas_dt_ids, .pm = pm_ptr(&atlas_pm_ops), }, - .probe = atlas_probe, + .probe_new = atlas_probe, .remove = atlas_remove, .id_table = atlas_id, }; From 5aa377658458cb7c4ea20dab7405b795a5cc8a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:50 +0100 Subject: [PATCH 2405/4122] iio: chemical: bme680_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-77-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c index 20f2c20b6b02..61b12079858d 100644 --- a/drivers/iio/chemical/bme680_i2c.c +++ b/drivers/iio/chemical/bme680_i2c.c @@ -17,9 +17,9 @@ #include "bme680.h" -static int bme680_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bme680_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name = NULL; @@ -52,7 +52,7 @@ static struct i2c_driver bme680_i2c_driver = { .name = "bme680_i2c", .of_match_table = bme680_of_i2c_match, }, - .probe = bme680_i2c_probe, + .probe_new = bme680_i2c_probe, .id_table = bme680_i2c_id, }; module_i2c_driver(bme680_i2c_driver); From 684e57366e68e4ba2abc5ab56041b4d6534f28ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:51 +0100 Subject: [PATCH 2406/4122] iio: chemical: ccs811: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-78-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/ccs811.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index ba4045e20303..6ead80c08924 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -401,9 +401,9 @@ static int ccs811_reset(struct i2c_client *client) return 0; } -static int ccs811_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ccs811_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct ccs811_data *data; int ret; @@ -567,7 +567,7 @@ static struct i2c_driver ccs811_driver = { .name = "ccs811", .of_match_table = ccs811_dt_ids, }, - .probe = ccs811_probe, + .probe_new = ccs811_probe, .remove = ccs811_remove, .id_table = ccs811_id, }; From 2f2adc666335cad150b720a6b19cec33464e2680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:52 +0100 Subject: [PATCH 2407/4122] iio: chemical: scd4x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-79-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/scd4x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c index 54066532ea45..f7ed9455b3c8 100644 --- a/drivers/iio/chemical/scd4x.c +++ b/drivers/iio/chemical/scd4x.c @@ -615,7 +615,7 @@ out: return IRQ_HANDLED; } -static int scd4x_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int scd4x_probe(struct i2c_client *client) { static const unsigned long scd4x_scan_masks[] = { 0x07, 0x00 }; struct device *dev = &client->dev; @@ -690,7 +690,7 @@ static struct i2c_driver scd4x_i2c_driver = { .of_match_table = scd4x_dt_ids, .pm = pm_sleep_ptr(&scd4x_pm_ops), }, - .probe = scd4x_probe, + .probe_new = scd4x_probe, }; module_i2c_driver(scd4x_i2c_driver); From 67eba68bfccbdf6598200d70f80971e454d1ee2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:53 +0100 Subject: [PATCH 2408/4122] iio: chemical: sgp30: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-80-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/sgp30.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/sgp30.c b/drivers/iio/chemical/sgp30.c index e2c13c78c7e0..9d0c68485b63 100644 --- a/drivers/iio/chemical/sgp30.c +++ b/drivers/iio/chemical/sgp30.c @@ -496,9 +496,9 @@ static const struct of_device_id sgp_dt_ids[] = { { } }; -static int sgp_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sgp_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct iio_dev *indio_dev; struct sgp_data *data; @@ -575,7 +575,7 @@ static struct i2c_driver sgp_driver = { .name = "sgp30", .of_match_table = sgp_dt_ids, }, - .probe = sgp_probe, + .probe_new = sgp_probe, .remove = sgp_remove, .id_table = sgp_id, }; From 07eda54d92f7ae7439a77903b9a43bfe563ebf21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:54 +0100 Subject: [PATCH 2409/4122] iio: chemical: sgp40: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-81-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/sgp40.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/sgp40.c b/drivers/iio/chemical/sgp40.c index 8a56394cea4e..c0ea01300908 100644 --- a/drivers/iio/chemical/sgp40.c +++ b/drivers/iio/chemical/sgp40.c @@ -311,9 +311,9 @@ static const struct iio_info sgp40_info = { .write_raw = sgp40_write_raw, }; -static int sgp40_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sgp40_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct iio_dev *indio_dev; struct sgp40_data *data; @@ -368,7 +368,7 @@ static struct i2c_driver sgp40_driver = { .name = "sgp40", .of_match_table = sgp40_dt_ids, }, - .probe = sgp40_probe, + .probe_new = sgp40_probe, .id_table = sgp40_id, }; module_i2c_driver(sgp40_driver); From 0d8535ee52f0392f6293e564e3ff4522d6c37cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:55 +0100 Subject: [PATCH 2410/4122] iio: chemical: vz89x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-82-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/vz89x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/vz89x.c b/drivers/iio/chemical/vz89x.c index e7e1c74a351e..d4604f7ccd1e 100644 --- a/drivers/iio/chemical/vz89x.c +++ b/drivers/iio/chemical/vz89x.c @@ -348,9 +348,9 @@ static const struct of_device_id vz89x_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, vz89x_dt_ids); -static int vz89x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int vz89x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct iio_dev *indio_dev; struct vz89x_data *data; @@ -402,7 +402,7 @@ static struct i2c_driver vz89x_driver = { .name = "vz89x", .of_match_table = vz89x_dt_ids, }, - .probe = vz89x_probe, + .probe_new = vz89x_probe, .id_table = vz89x_id, }; module_i2c_driver(vz89x_driver); From 16fb97c4aacf5db311f4fb54745f852d1183e662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:56 +0100 Subject: [PATCH 2411/4122] iio: dac: ad5064: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-83-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5064.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 4447b8811827..f01249c1ba93 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -993,9 +993,9 @@ static int ad5064_i2c_write(struct ad5064_state *st, unsigned int cmd, return 0; } -static int ad5064_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad5064_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); return ad5064_probe(&i2c->dev, id->driver_data, id->name, ad5064_i2c_write); } @@ -1056,7 +1056,7 @@ static struct i2c_driver ad5064_i2c_driver = { .driver = { .name = "ad5064", }, - .probe = ad5064_i2c_probe, + .probe_new = ad5064_i2c_probe, .id_table = ad5064_i2c_ids, }; From 4b2b4370a8459982d78d5716361a7a51089fecbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:57 +0100 Subject: [PATCH 2412/4122] iio: dac: ad5380: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-84-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5380.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c index a81bfa47a221..64b4519f8f5e 100644 --- a/drivers/iio/dac/ad5380.c +++ b/drivers/iio/dac/ad5380.c @@ -546,9 +546,9 @@ static inline void ad5380_spi_unregister_driver(void) #if IS_ENABLED(CONFIG_I2C) -static int ad5380_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad5380_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); struct regmap *regmap; regmap = devm_regmap_init_i2c(i2c, &ad5380_regmap_config); @@ -589,7 +589,7 @@ static struct i2c_driver ad5380_i2c_driver = { .driver = { .name = "ad5380", }, - .probe = ad5380_i2c_probe, + .probe_new = ad5380_i2c_probe, .remove = ad5380_i2c_remove, .id_table = ad5380_i2c_ids, }; From 94e5ddbaac1ea9b5bda64af53d8a57b19be890ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:58 +0100 Subject: [PATCH 2413/4122] iio: dac: ad5446: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-85-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5446.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 7324065d3782..aa3130b33456 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -568,9 +568,9 @@ static const struct ad5446_chip_info ad5446_i2c_chip_info[] = { }, }; -static int ad5446_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad5446_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); return ad5446_probe(&i2c->dev, id->name, &ad5446_i2c_chip_info[id->driver_data]); } @@ -595,7 +595,7 @@ static struct i2c_driver ad5446_i2c_driver = { .driver = { .name = "ad5446", }, - .probe = ad5446_i2c_probe, + .probe_new = ad5446_i2c_probe, .remove = ad5446_i2c_remove, .id_table = ad5446_i2c_ids, }; From a17c748115cb1f95675b322e955763cd9bb628ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:59 +0100 Subject: [PATCH 2414/4122] iio: dac: ad5593r: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-86-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5593r.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5593r.c b/drivers/iio/dac/ad5593r.c index 8e5e014e0c28..d311567ab324 100644 --- a/drivers/iio/dac/ad5593r.c +++ b/drivers/iio/dac/ad5593r.c @@ -99,9 +99,9 @@ static const struct ad5592r_rw_ops ad5593r_rw_ops = { .gpio_read = ad5593r_gpio_read, }; -static int ad5593r_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad5593r_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C)) return -EOPNOTSUPP; @@ -138,7 +138,7 @@ static struct i2c_driver ad5593r_driver = { .of_match_table = ad5593r_of_match, .acpi_match_table = ad5593r_acpi_match, }, - .probe = ad5593r_i2c_probe, + .probe_new = ad5593r_i2c_probe, .remove = ad5593r_i2c_remove, .id_table = ad5593r_i2c_ids, }; From 92cd05a0968d60b3d76dc8d3a5880b25b1af96b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:00 +0100 Subject: [PATCH 2415/4122] iio: dac: ad5696-i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-87-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5696-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5696-i2c.c b/drivers/iio/dac/ad5696-i2c.c index aa36cbf0137c..160e80cf9135 100644 --- a/drivers/iio/dac/ad5696-i2c.c +++ b/drivers/iio/dac/ad5696-i2c.c @@ -58,9 +58,9 @@ static int ad5686_i2c_write(struct ad5686_state *st, return (ret != 3) ? -EIO : 0; } -static int ad5686_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ad5686_i2c_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); return ad5686_probe(&i2c->dev, id->driver_data, id->name, ad5686_i2c_write, ad5686_i2c_read); } @@ -113,7 +113,7 @@ static struct i2c_driver ad5686_i2c_driver = { .name = "ad5696", .of_match_table = ad5686_of_match, }, - .probe = ad5686_i2c_probe, + .probe_new = ad5686_i2c_probe, .remove = ad5686_i2c_remove, .id_table = ad5686_i2c_id, }; From 53f46605c3656c181e49df0d5fdba014cffa498e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:01 +0100 Subject: [PATCH 2416/4122] iio: dac: ds4424: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-88-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ds4424.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c index 3e17a681174e..a16a6a934d9d 100644 --- a/drivers/iio/dac/ds4424.c +++ b/drivers/iio/dac/ds4424.c @@ -213,9 +213,9 @@ static const struct iio_info ds4424_info = { .write_raw = ds4424_write_raw, }; -static int ds4424_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ds4424_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ds4424_data *data; struct iio_dev *indio_dev; int ret; @@ -312,7 +312,7 @@ static struct i2c_driver ds4424_driver = { .of_match_table = ds4424_of_match, .pm = pm_sleep_ptr(&ds4424_pm_ops), }, - .probe = ds4424_probe, + .probe_new = ds4424_probe, .remove = ds4424_remove, .id_table = ds4424_id, }; From 44d7a03c98f466c1567a8cad84908ce6a9d1f2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:02 +0100 Subject: [PATCH 2417/4122] iio: dac: m62332: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-89-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/m62332.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/m62332.c b/drivers/iio/dac/m62332.c index 5a812f87970c..b692459b0f23 100644 --- a/drivers/iio/dac/m62332.c +++ b/drivers/iio/dac/m62332.c @@ -176,8 +176,7 @@ static const struct iio_chan_spec m62332_channels[M62332_CHANNELS] = { M62332_CHANNEL(1) }; -static int m62332_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int m62332_probe(struct i2c_client *client) { struct m62332_data *data; struct iio_dev *indio_dev; @@ -239,7 +238,7 @@ static struct i2c_driver m62332_driver = { .name = "m62332", .pm = pm_sleep_ptr(&m62332_pm_ops), }, - .probe = m62332_probe, + .probe_new = m62332_probe, .remove = m62332_remove, .id_table = m62332_id, }; From 090515edf46dc1f2ff7dbc78a60ba7692ff8420f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:03 +0100 Subject: [PATCH 2418/4122] iio: dac: max517: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-90-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/max517.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/max517.c b/drivers/iio/dac/max517.c index 373ce6ff83b7..25967c39365d 100644 --- a/drivers/iio/dac/max517.c +++ b/drivers/iio/dac/max517.c @@ -141,9 +141,9 @@ static const struct iio_chan_spec max517_channels[] = { MAX517_CHANNEL(7), }; -static int max517_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max517_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct max517_data *data; struct iio_dev *indio_dev; struct max517_platform_data *platform_data = client->dev.platform_data; @@ -203,7 +203,7 @@ static struct i2c_driver max517_driver = { .name = MAX517_DRV_NAME, .pm = pm_sleep_ptr(&max517_pm_ops), }, - .probe = max517_probe, + .probe_new = max517_probe, .id_table = max517_id, }; module_i2c_driver(max517_driver); From 62b001dad803e40274db2c712dd4dc0d3a4a37d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:04 +0100 Subject: [PATCH 2419/4122] iio: dac: max5821: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-91-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/max5821.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/max5821.c b/drivers/iio/dac/max5821.c index e001b594d5b1..23da345b9250 100644 --- a/drivers/iio/dac/max5821.c +++ b/drivers/iio/dac/max5821.c @@ -300,9 +300,9 @@ static void max5821_regulator_disable(void *reg) regulator_disable(reg); } -static int max5821_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max5821_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct max5821_data *data; struct iio_dev *indio_dev; u32 tmp; @@ -377,7 +377,7 @@ static struct i2c_driver max5821_driver = { .of_match_table = max5821_of_match, .pm = pm_sleep_ptr(&max5821_pm_ops), }, - .probe = max5821_probe, + .probe_new = max5821_probe, .id_table = max5821_id, }; module_i2c_driver(max5821_driver); From 818fe4546521f5e836ab02eeb6533bfd6aee6829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:05 +0100 Subject: [PATCH 2420/4122] iio: dac: mcp4725: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-92-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/mcp4725.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index 446d1a8fe4be..46bf758760f8 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -369,9 +369,9 @@ static int mcp4725_probe_dt(struct device *dev, return 0; } -static int mcp4725_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mcp4725_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mcp4725_data *data; struct iio_dev *indio_dev; struct mcp4725_platform_data *pdata, pdata_dt; @@ -524,7 +524,7 @@ static struct i2c_driver mcp4725_driver = { .of_match_table = mcp4725_of_match, .pm = pm_sleep_ptr(&mcp4725_pm_ops), }, - .probe = mcp4725_probe, + .probe_new = mcp4725_probe, .remove = mcp4725_remove, .id_table = mcp4725_id, }; From 3de8dd69fad2c2ee7ca623d78fb2806d7b8b5a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:06 +0100 Subject: [PATCH 2421/4122] iio: dac: ti-dac5571: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20221118224540.619276-93-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ti-dac5571.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index 3210e3098f9a..40191947fea3 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -306,9 +306,9 @@ static const struct iio_info dac5571_info = { .write_raw_get_fmt = dac5571_write_raw_get_fmt, }; -static int dac5571_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dac5571_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; const struct dac5571_spec *spec; struct dac5571_data *data; @@ -426,7 +426,7 @@ static struct i2c_driver dac5571_driver = { .name = "ti-dac5571", .of_match_table = dac5571_of_id, }, - .probe = dac5571_probe, + .probe_new = dac5571_probe, .remove = dac5571_remove, .id_table = dac5571_id, }; From 840ef016c202fc196c91f020750fab08c9a0779f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:07 +0100 Subject: [PATCH 2422/4122] iio: gyro: bmg160_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-94-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/bmg160_i2c.c b/drivers/iio/gyro/bmg160_i2c.c index 908ccc385254..2b019ee5b2eb 100644 --- a/drivers/iio/gyro/bmg160_i2c.c +++ b/drivers/iio/gyro/bmg160_i2c.c @@ -13,9 +13,9 @@ static const struct regmap_config bmg160_regmap_i2c_conf = { .max_register = 0x3f }; -static int bmg160_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bmg160_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name = NULL; @@ -70,7 +70,7 @@ static struct i2c_driver bmg160_i2c_driver = { .of_match_table = bmg160_of_match, .pm = &bmg160_pm_ops, }, - .probe = bmg160_i2c_probe, + .probe_new = bmg160_i2c_probe, .remove = bmg160_i2c_remove, .id_table = bmg160_i2c_id, }; From 8312841060b8bc4b463dcf6a2cb8cc2452c6ac8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:08 +0100 Subject: [PATCH 2423/4122] iio: gyro: itg3200_core: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-95-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/itg3200_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/itg3200_core.c b/drivers/iio/gyro/itg3200_core.c index 74ca22468496..ceacd863d3ea 100644 --- a/drivers/iio/gyro/itg3200_core.c +++ b/drivers/iio/gyro/itg3200_core.c @@ -295,8 +295,7 @@ static const struct iio_info itg3200_info = { static const unsigned long itg3200_available_scan_masks[] = { 0xffffffff, 0x0 }; -static int itg3200_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int itg3200_probe(struct i2c_client *client) { int ret; struct itg3200 *st; @@ -406,7 +405,7 @@ static struct i2c_driver itg3200_driver = { .pm = pm_sleep_ptr(&itg3200_pm_ops), }, .id_table = itg3200_id, - .probe = itg3200_probe, + .probe_new = itg3200_probe, .remove = itg3200_remove, }; From b97db5284e9ae169de79f78c3f09514d661407a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:09 +0100 Subject: [PATCH 2424/4122] iio: gyro: mpu3050-i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-96-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/mpu3050-i2c.c b/drivers/iio/gyro/mpu3050-i2c.c index 12e3afa9dd11..2116798226bf 100644 --- a/drivers/iio/gyro/mpu3050-i2c.c +++ b/drivers/iio/gyro/mpu3050-i2c.c @@ -32,9 +32,9 @@ static int mpu3050_i2c_bypass_deselect(struct i2c_mux_core *mux, u32 chan_id) return 0; } -static int mpu3050_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mpu3050_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name; struct mpu3050 *mpu3050; @@ -108,7 +108,7 @@ static const struct of_device_id mpu3050_i2c_of_match[] = { MODULE_DEVICE_TABLE(of, mpu3050_i2c_of_match); static struct i2c_driver mpu3050_i2c_driver = { - .probe = mpu3050_i2c_probe, + .probe_new = mpu3050_i2c_probe, .remove = mpu3050_i2c_remove, .id_table = mpu3050_i2c_id, .driver = { From 1522b453c153cd93fe295a6dbd589f0f62fb168b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:10 +0100 Subject: [PATCH 2425/4122] iio: gyro: st_gyro_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-97-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/st_gyro_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c index 8c7af42b6558..797a1c6a0402 100644 --- a/drivers/iio/gyro/st_gyro_i2c.c +++ b/drivers/iio/gyro/st_gyro_i2c.c @@ -58,8 +58,7 @@ static const struct of_device_id st_gyro_of_match[] = { }; MODULE_DEVICE_TABLE(of, st_gyro_of_match); -static int st_gyro_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_gyro_i2c_probe(struct i2c_client *client) { const struct st_sensor_settings *settings; struct st_sensor_data *gdata; @@ -112,7 +111,7 @@ static struct i2c_driver st_gyro_driver = { .name = "st-gyro-i2c", .of_match_table = st_gyro_of_match, }, - .probe = st_gyro_i2c_probe, + .probe_new = st_gyro_i2c_probe, .id_table = st_gyro_id_table, }; module_i2c_driver(st_gyro_driver); From d4764a4045b14a938594a082662b780c2c8eec78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:11 +0100 Subject: [PATCH 2426/4122] iio: health: afe4404: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-98-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/health/afe4404.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 8fca787b2524..658dfc1a346c 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -459,8 +459,7 @@ static int afe4404_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(afe4404_pm_ops, afe4404_suspend, afe4404_resume); -static int afe4404_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int afe4404_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct afe4404_data *afe; @@ -608,7 +607,7 @@ static struct i2c_driver afe4404_i2c_driver = { .of_match_table = afe4404_of_match, .pm = pm_sleep_ptr(&afe4404_pm_ops), }, - .probe = afe4404_probe, + .probe_new = afe4404_probe, .remove = afe4404_remove, .id_table = afe4404_ids, }; From 3ef7e6e019c66beb6e4c8749a70d09d26326be8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:12 +0100 Subject: [PATCH 2427/4122] iio: health: max30100: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-99-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30100.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 3aa5d037a1c3..a80fa9852c22 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -417,8 +417,7 @@ static const struct iio_info max30100_info = { .read_raw = max30100_read_raw, }; -static int max30100_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max30100_probe(struct i2c_client *client) { struct max30100_data *data; struct iio_dev *indio_dev; @@ -500,7 +499,7 @@ static struct i2c_driver max30100_driver = { .name = MAX30100_DRV_NAME, .of_match_table = max30100_dt_ids, }, - .probe = max30100_probe, + .probe_new = max30100_probe, .remove = max30100_remove, .id_table = max30100_id, }; From 15818f0890133372fb7eb875841dbcdb8c8ed592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:13 +0100 Subject: [PATCH 2428/4122] iio: health: max30102: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-100-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30102.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c index 66df4aaa31a7..7edcf9e05687 100644 --- a/drivers/iio/health/max30102.c +++ b/drivers/iio/health/max30102.c @@ -513,9 +513,9 @@ static const struct iio_info max30102_info = { .read_raw = max30102_read_raw, }; -static int max30102_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max30102_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct max30102_data *data; struct iio_dev *indio_dev; int ret; @@ -631,7 +631,7 @@ static struct i2c_driver max30102_driver = { .name = MAX30102_DRV_NAME, .of_match_table = max30102_dt_ids, }, - .probe = max30102_probe, + .probe_new = max30102_probe, .remove = max30102_remove, .id_table = max30102_id, }; From 1b1a60e70b0cd7ec46df424f6e7d342585c446b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:14 +0100 Subject: [PATCH 2429/4122] iio: humidity: am2315: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-101-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/am2315.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c index 4a39f1019347..f246516bd45e 100644 --- a/drivers/iio/humidity/am2315.c +++ b/drivers/iio/humidity/am2315.c @@ -218,8 +218,7 @@ static const struct iio_info am2315_info = { .read_raw = am2315_read_raw, }; -static int am2315_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int am2315_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -263,7 +262,7 @@ static struct i2c_driver am2315_driver = { .driver = { .name = "am2315", }, - .probe = am2315_probe, + .probe_new = am2315_probe, .id_table = am2315_i2c_id, }; From c5f1c4fdbfa2ac6ca247bf5f32fb4c35ba0b1ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:15 +0100 Subject: [PATCH 2430/4122] iio: humidity: hdc100x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-102-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 47f8e8ef56d6..49a950d739e4 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -351,8 +351,7 @@ static const struct iio_info hdc100x_info = { .attrs = &hdc100x_attribute_group, }; -static int hdc100x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hdc100x_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct hdc100x_data *data; @@ -429,7 +428,7 @@ static struct i2c_driver hdc100x_driver = { .of_match_table = hdc100x_dt_ids, .acpi_match_table = hdc100x_acpi_match, }, - .probe = hdc100x_probe, + .probe_new = hdc100x_probe, .id_table = hdc100x_id, }; module_i2c_driver(hdc100x_driver); From 0a78deeb639a464c9160f6bba912c97567f18b17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:16 +0100 Subject: [PATCH 2431/4122] iio: humidity: hdc2010: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-103-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc2010.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/hdc2010.c b/drivers/iio/humidity/hdc2010.c index d6858ccb056e..c8fddd612e06 100644 --- a/drivers/iio/humidity/hdc2010.c +++ b/drivers/iio/humidity/hdc2010.c @@ -251,8 +251,7 @@ static const struct iio_info hdc2010_info = { .attrs = &hdc2010_attribute_group, }; -static int hdc2010_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hdc2010_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct hdc2010_data *data; @@ -339,7 +338,7 @@ static struct i2c_driver hdc2010_driver = { .name = "hdc2010", .of_match_table = hdc2010_dt_ids, }, - .probe = hdc2010_probe, + .probe_new = hdc2010_probe, .remove = hdc2010_remove, .id_table = hdc2010_id, }; From 7b64a83ce6f94bbb91d91a66555f17bf3e9bc085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:17 +0100 Subject: [PATCH 2432/4122] iio: humidity: hts221_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-104-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hts221_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/hts221_i2c.c b/drivers/iio/humidity/hts221_i2c.c index afbc611f7712..d81869423cf0 100644 --- a/drivers/iio/humidity/hts221_i2c.c +++ b/drivers/iio/humidity/hts221_i2c.c @@ -25,8 +25,7 @@ static const struct regmap_config hts221_i2c_regmap_config = { .read_flag_mask = HTS221_I2C_AUTO_INCREMENT, }; -static int hts221_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hts221_i2c_probe(struct i2c_client *client) { struct regmap *regmap; @@ -66,7 +65,7 @@ static struct i2c_driver hts221_driver = { .of_match_table = hts221_i2c_of_match, .acpi_match_table = ACPI_PTR(hts221_acpi_match), }, - .probe = hts221_i2c_probe, + .probe_new = hts221_i2c_probe, .id_table = hts221_i2c_id_table, }; module_i2c_driver(hts221_driver); From 1a144b6320f32feec3e54dbbda8b4c58b69f86c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:18 +0100 Subject: [PATCH 2433/4122] iio: humidity: htu21: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-105-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/htu21.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/htu21.c b/drivers/iio/humidity/htu21.c index fd9e2565f8a2..8411a9f3e828 100644 --- a/drivers/iio/humidity/htu21.c +++ b/drivers/iio/humidity/htu21.c @@ -177,9 +177,9 @@ static const struct iio_info htu21_info = { .attrs = &htu21_attribute_group, }; -static int htu21_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int htu21_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ms_ht_dev *dev_data; struct iio_dev *indio_dev; int ret; @@ -244,7 +244,7 @@ static const struct of_device_id htu21_of_match[] = { MODULE_DEVICE_TABLE(of, htu21_of_match); static struct i2c_driver htu21_driver = { - .probe = htu21_probe, + .probe_new = htu21_probe, .id_table = htu21_id, .driver = { .name = "htu21", From e18594f6d87f8f7f2623263eb230139382e1a080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:19 +0100 Subject: [PATCH 2434/4122] iio: humidity: si7005: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-106-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/si7005.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/si7005.c b/drivers/iio/humidity/si7005.c index 160b3d92df61..fa1faf168c8d 100644 --- a/drivers/iio/humidity/si7005.c +++ b/drivers/iio/humidity/si7005.c @@ -123,8 +123,7 @@ static const struct iio_info si7005_info = { .read_raw = si7005_read_raw, }; -static int si7005_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int si7005_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct si7005_data *data; @@ -174,7 +173,7 @@ static struct i2c_driver si7005_driver = { .driver = { .name = "si7005", }, - .probe = si7005_probe, + .probe_new = si7005_probe, .id_table = si7005_id, }; module_i2c_driver(si7005_driver); From e6b610550e2c17b9eb83b10109462fc88f9a6367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:20 +0100 Subject: [PATCH 2435/4122] iio: humidity: si7020: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-107-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/si7020.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c index ab6537f136ba..3e50592e8e68 100644 --- a/drivers/iio/humidity/si7020.c +++ b/drivers/iio/humidity/si7020.c @@ -103,8 +103,7 @@ static const struct iio_info si7020_info = { .read_raw = si7020_read_raw, }; -static int si7020_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int si7020_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct i2c_client **data; @@ -156,7 +155,7 @@ static struct i2c_driver si7020_driver = { .name = "si7020", .of_match_table = si7020_dt_ids, }, - .probe = si7020_probe, + .probe_new = si7020_probe, .id_table = si7020_id, }; From 97202c55041806ba3fcddd6ca1b467154b5812cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:21 +0100 Subject: [PATCH 2436/4122] iio: imu: bmi160/bmi160_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-108-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi160/bmi160_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/bmi160/bmi160_i2c.c b/drivers/iio/imu/bmi160/bmi160_i2c.c index d93f4fa2ad55..2ca907d396a0 100644 --- a/drivers/iio/imu/bmi160/bmi160_i2c.c +++ b/drivers/iio/imu/bmi160/bmi160_i2c.c @@ -15,9 +15,9 @@ #include "bmi160.h" -static int bmi160_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bmi160_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name; @@ -60,7 +60,7 @@ static struct i2c_driver bmi160_i2c_driver = { .acpi_match_table = bmi160_acpi_match, .of_match_table = bmi160_of_match, }, - .probe = bmi160_i2c_probe, + .probe_new = bmi160_i2c_probe, .id_table = bmi160_i2c_id, }; module_i2c_driver(bmi160_i2c_driver); From 4fee985aaf3afaa4ce908342a854439fd8480196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:22 +0100 Subject: [PATCH 2437/4122] iio: imu: fxos8700_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-109-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/fxos8700_i2c.c b/drivers/iio/imu/fxos8700_i2c.c index 40a570325b0a..a74a15fda8cb 100644 --- a/drivers/iio/imu/fxos8700_i2c.c +++ b/drivers/iio/imu/fxos8700_i2c.c @@ -18,9 +18,9 @@ #include "fxos8700.h" -static int fxos8700_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int fxos8700_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name = NULL; @@ -60,7 +60,7 @@ static struct i2c_driver fxos8700_i2c_driver = { .acpi_match_table = ACPI_PTR(fxos8700_acpi_match), .of_match_table = fxos8700_of_match, }, - .probe = fxos8700_i2c_probe, + .probe_new = fxos8700_i2c_probe, .id_table = fxos8700_i2c_id, }; module_i2c_driver(fxos8700_i2c_driver); From 4f218ae01c5c365f78f97e5d5b2c8cb15b503fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:23 +0100 Subject: [PATCH 2438/4122] iio: imu: inv_mpu6050: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Acked-by: Jean-Baptiste Maneyrol Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-110-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c index 7a8d60a5afa9..2f2da4cb7321 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c @@ -91,13 +91,12 @@ static int inv_mpu_i2c_aux_setup(struct iio_dev *indio_dev) /** * inv_mpu_probe() - probe function. * @client: i2c client. - * @id: i2c device id. * * Returns 0 on success, a negative error code otherwise. */ -static int inv_mpu_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int inv_mpu_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const void *match; struct inv_mpu6050_state *st; int result; @@ -260,7 +259,7 @@ static const struct acpi_device_id inv_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, inv_acpi_match); static struct i2c_driver inv_mpu_driver = { - .probe = inv_mpu_probe, + .probe_new = inv_mpu_probe, .remove = inv_mpu_remove, .id_table = inv_mpu_id, .driver = { From 4bf718bc3b7f07f82895d4e56a2af93b9a89f43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:24 +0100 Subject: [PATCH 2439/4122] iio: imu: kmx61: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-111-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/kmx61.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c index b10c0dcac0bb..e692dfeeda44 100644 --- a/drivers/iio/imu/kmx61.c +++ b/drivers/iio/imu/kmx61.c @@ -1276,9 +1276,9 @@ static struct iio_trigger *kmx61_trigger_setup(struct kmx61_data *data, return trig; } -static int kmx61_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int kmx61_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; struct kmx61_data *data; const char *name = NULL; @@ -1517,7 +1517,7 @@ static struct i2c_driver kmx61_driver = { .acpi_match_table = ACPI_PTR(kmx61_acpi_match), .pm = pm_ptr(&kmx61_pm_ops), }, - .probe = kmx61_probe, + .probe_new = kmx61_probe, .remove = kmx61_remove, .id_table = kmx61_id, }; From b7dbc0aecb2f4ca38475f0ef9d1b6ab6137be599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:25 +0100 Subject: [PATCH 2440/4122] iio: imu: st_lsm6dsx: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-112-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c index 0faf1b4c11af..3570fac1b612 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c @@ -21,9 +21,9 @@ static const struct regmap_config st_lsm6dsx_i2c_regmap_config = { .val_bits = 8, }; -static int st_lsm6dsx_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_lsm6dsx_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int hw_id = id->driver_data; struct regmap *regmap; @@ -152,7 +152,7 @@ static struct i2c_driver st_lsm6dsx_driver = { .pm = pm_sleep_ptr(&st_lsm6dsx_pm_ops), .of_match_table = st_lsm6dsx_i2c_of_match, }, - .probe = st_lsm6dsx_i2c_probe, + .probe_new = st_lsm6dsx_i2c_probe, .id_table = st_lsm6dsx_i2c_id_table, }; module_i2c_driver(st_lsm6dsx_driver); From 39c7d963d54a4dee2c6d95a4c5461600b26ca295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:26 +0100 Subject: [PATCH 2441/4122] iio: light: adjd_s311: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-113-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/adjd_s311.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c index 6b33975c8d73..210a90f44c53 100644 --- a/drivers/iio/light/adjd_s311.c +++ b/drivers/iio/light/adjd_s311.c @@ -233,8 +233,7 @@ static const struct iio_info adjd_s311_info = { .write_raw = adjd_s311_write_raw, }; -static int adjd_s311_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adjd_s311_probe(struct i2c_client *client) { struct adjd_s311_data *data; struct iio_dev *indio_dev; @@ -271,7 +270,7 @@ static struct i2c_driver adjd_s311_driver = { .driver = { .name = ADJD_S311_DRV_NAME, }, - .probe = adjd_s311_probe, + .probe_new = adjd_s311_probe, .id_table = adjd_s311_id, }; module_i2c_driver(adjd_s311_driver); From fd63b0a46acf4b00ff69e6c60ecdd3c78a68fc71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:27 +0100 Subject: [PATCH 2442/4122] iio: light: adux1020: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-114-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/adux1020.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/adux1020.c b/drivers/iio/light/adux1020.c index 9aa28695e6f1..606075350d01 100644 --- a/drivers/iio/light/adux1020.c +++ b/drivers/iio/light/adux1020.c @@ -774,8 +774,7 @@ static int adux1020_chip_init(struct adux1020_data *data) ADUX1020_MODE_INT_MASK, ADUX1020_MODE_INT_DISABLE); } -static int adux1020_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adux1020_probe(struct i2c_client *client) { struct adux1020_data *data; struct iio_dev *indio_dev; @@ -838,7 +837,7 @@ static struct i2c_driver adux1020_driver = { .name = ADUX1020_DRV_NAME, .of_match_table = adux1020_of_match, }, - .probe = adux1020_probe, + .probe_new = adux1020_probe, .id_table = adux1020_id, }; module_i2c_driver(adux1020_driver); From ad428de325d27964f1c471ae496232df89b22885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:28 +0100 Subject: [PATCH 2443/4122] iio: light: al3010: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: David Heidelberg Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-115-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/al3010.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/al3010.c b/drivers/iio/light/al3010.c index ce5363845b22..69cc723e2ac4 100644 --- a/drivers/iio/light/al3010.c +++ b/drivers/iio/light/al3010.c @@ -164,8 +164,7 @@ static const struct iio_info al3010_info = { .attrs = &al3010_attribute_group, }; -static int al3010_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int al3010_probe(struct i2c_client *client) { struct al3010_data *data; struct iio_dev *indio_dev; @@ -230,7 +229,7 @@ static struct i2c_driver al3010_driver = { .of_match_table = al3010_of_match, .pm = pm_sleep_ptr(&al3010_pm_ops), }, - .probe = al3010_probe, + .probe_new = al3010_probe, .id_table = al3010_id, }; module_i2c_driver(al3010_driver); From a76c90301f86107240eebb404d9881f16344f779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:29 +0100 Subject: [PATCH 2444/4122] iio: light: al3320a: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-116-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/al3320a.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/al3320a.c b/drivers/iio/light/al3320a.c index bc99179728ed..9ff28bbf34bb 100644 --- a/drivers/iio/light/al3320a.c +++ b/drivers/iio/light/al3320a.c @@ -187,8 +187,7 @@ static const struct iio_info al3320a_info = { .attrs = &al3320a_attribute_group, }; -static int al3320a_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int al3320a_probe(struct i2c_client *client) { struct al3320a_data *data; struct iio_dev *indio_dev; @@ -254,7 +253,7 @@ static struct i2c_driver al3320a_driver = { .of_match_table = al3320a_of_match, .pm = pm_sleep_ptr(&al3320a_pm_ops), }, - .probe = al3320a_probe, + .probe_new = al3320a_probe, .id_table = al3320a_id, }; From debe8c7568242ae7e49803c6bffc56362e43bfd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:30 +0100 Subject: [PATCH 2445/4122] iio: light: apds9300: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-117-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9300.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/apds9300.c b/drivers/iio/light/apds9300.c index b70f2681bcb3..15dfb753734f 100644 --- a/drivers/iio/light/apds9300.c +++ b/drivers/iio/light/apds9300.c @@ -398,8 +398,7 @@ static irqreturn_t apds9300_interrupt_handler(int irq, void *private) return IRQ_HANDLED; } -static int apds9300_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int apds9300_probe(struct i2c_client *client) { struct apds9300_data *data; struct iio_dev *indio_dev; @@ -505,7 +504,7 @@ static struct i2c_driver apds9300_driver = { .name = APDS9300_DRV_NAME, .pm = pm_sleep_ptr(&apds9300_pm_ops), }, - .probe = apds9300_probe, + .probe_new = apds9300_probe, .remove = apds9300_remove, .id_table = apds9300_id, }; From 783964ae9752541356e40f47b677069151543c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:31 +0100 Subject: [PATCH 2446/4122] iio: light: apds9960: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Acked-by: Matt Ranostay Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-118-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9960.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index 997aa01ecc11..ee6acc6a36ee 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -988,8 +988,7 @@ static int apds9960_chip_init(struct apds9960_data *data) return apds9960_set_powermode(data, 1); } -static int apds9960_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int apds9960_probe(struct i2c_client *client) { struct apds9960_data *data; struct iio_dev *indio_dev; @@ -1132,7 +1131,7 @@ static struct i2c_driver apds9960_driver = { .pm = &apds9960_pm_ops, .acpi_match_table = apds9960_acpi_match, }, - .probe = apds9960_probe, + .probe_new = apds9960_probe, .remove = apds9960_remove, .id_table = apds9960_id, }; From 058d6333ea4fe069cce36adb12d6a6341ddcb540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:32 +0100 Subject: [PATCH 2447/4122] iio: light: bh1750: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-119-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1750.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/bh1750.c b/drivers/iio/light/bh1750.c index 3e92820bc820..390c5b3ad4f6 100644 --- a/drivers/iio/light/bh1750.c +++ b/drivers/iio/light/bh1750.c @@ -228,9 +228,9 @@ static const struct iio_chan_spec bh1750_channels[] = { } }; -static int bh1750_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bh1750_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret, usec; struct bh1750_data *data; struct iio_dev *indio_dev; @@ -320,7 +320,7 @@ static struct i2c_driver bh1750_driver = { .of_match_table = bh1750_of_match, .pm = pm_sleep_ptr(&bh1750_pm_ops), }, - .probe = bh1750_probe, + .probe_new = bh1750_probe, .remove = bh1750_remove, .id_table = bh1750_id, From ed5c6b1ce4e1d412f9a97973348c7b8f3e12025a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:33 +0100 Subject: [PATCH 2448/4122] iio: light: bh1780: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-120-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1780.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index 90bca392b262..da9039e5a839 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c @@ -141,8 +141,7 @@ static const struct iio_chan_spec bh1780_channels[] = { } }; -static int bh1780_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bh1780_probe(struct i2c_client *client) { int ret; struct bh1780_data *bh1780; @@ -270,7 +269,7 @@ static const struct of_device_id of_bh1780_match[] = { MODULE_DEVICE_TABLE(of, of_bh1780_match); static struct i2c_driver bh1780_driver = { - .probe = bh1780_probe, + .probe_new = bh1780_probe, .remove = bh1780_remove, .id_table = bh1780_id, .driver = { From f8232aad7609a946d4bbb76b747cb9d73b8180f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:34 +0100 Subject: [PATCH 2449/4122] iio: light: cm3232: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-121-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm3232.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/cm3232.c b/drivers/iio/light/cm3232.c index 5214cd014cf8..43e492f5051d 100644 --- a/drivers/iio/light/cm3232.c +++ b/drivers/iio/light/cm3232.c @@ -325,9 +325,9 @@ static const struct iio_info cm3232_info = { .attrs = &cm3232_attribute_group, }; -static int cm3232_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int cm3232_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct cm3232_chip *chip; struct iio_dev *indio_dev; int ret; @@ -417,7 +417,7 @@ static struct i2c_driver cm3232_driver = { .pm = pm_sleep_ptr(&cm3232_pm_ops), }, .id_table = cm3232_id, - .probe = cm3232_probe, + .probe_new = cm3232_probe, .remove = cm3232_remove, }; From ebbcdb1a9ddec05b37347d3fbf537e1741876901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:35 +0100 Subject: [PATCH 2450/4122] iio: light: cm3323: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-122-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm3323.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/cm3323.c b/drivers/iio/light/cm3323.c index fd9a8c27de2e..e5ce7d0fd272 100644 --- a/drivers/iio/light/cm3323.c +++ b/drivers/iio/light/cm3323.c @@ -214,8 +214,7 @@ static const struct iio_info cm3323_info = { .attrs = &cm3323_attribute_group, }; -static int cm3323_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int cm3323_probe(struct i2c_client *client) { struct cm3323_data *data; struct iio_dev *indio_dev; @@ -267,7 +266,7 @@ static struct i2c_driver cm3323_driver = { .name = CM3323_DRV_NAME, .of_match_table = cm3323_of_match, }, - .probe = cm3323_probe, + .probe_new = cm3323_probe, .id_table = cm3323_id, }; From 31ceb2f5b4fbd7dd46d7d679b990e9195f1b3db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:36 +0100 Subject: [PATCH 2451/4122] iio: light: cm36651: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-123-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm36651.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c index 6615c98b601c..1707dbf2ce26 100644 --- a/drivers/iio/light/cm36651.c +++ b/drivers/iio/light/cm36651.c @@ -618,9 +618,9 @@ static const struct iio_info cm36651_info = { .attrs = &cm36651_attribute_group, }; -static int cm36651_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int cm36651_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct cm36651_data *cm36651; struct iio_dev *indio_dev; int ret; @@ -730,7 +730,7 @@ static struct i2c_driver cm36651_driver = { .name = "cm36651", .of_match_table = cm36651_of_match, }, - .probe = cm36651_probe, + .probe_new = cm36651_probe, .remove = cm36651_remove, .id_table = cm36651_id, }; From b3f8e22e737e7991a3754675c7418215cdb461b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:37 +0100 Subject: [PATCH 2452/4122] iio: light: gp2ap002: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-124-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/gp2ap002.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c index 8000fa347344..c0430db0038a 100644 --- a/drivers/iio/light/gp2ap002.c +++ b/drivers/iio/light/gp2ap002.c @@ -425,8 +425,7 @@ static struct regmap_bus gp2ap002_regmap_bus = { .reg_write = gp2ap002_regmap_i2c_write, }; -static int gp2ap002_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int gp2ap002_probe(struct i2c_client *client) { struct gp2ap002 *gp2ap002; struct iio_dev *indio_dev; @@ -711,7 +710,7 @@ static struct i2c_driver gp2ap002_driver = { .of_match_table = gp2ap002_of_match, .pm = pm_ptr(&gp2ap002_dev_pm_ops), }, - .probe = gp2ap002_probe, + .probe_new = gp2ap002_probe, .remove = gp2ap002_remove, .id_table = gp2ap002_id_table, }; From a969195001bcf0d8baa5363fc8a1b4d3fc5aff0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:38 +0100 Subject: [PATCH 2453/4122] iio: light: gp2ap020a00f: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-125-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/gp2ap020a00f.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c index 826439299e8b..a5bf9da0d2f3 100644 --- a/drivers/iio/light/gp2ap020a00f.c +++ b/drivers/iio/light/gp2ap020a00f.c @@ -1467,9 +1467,9 @@ static const struct iio_buffer_setup_ops gp2ap020a00f_buffer_setup_ops = { .predisable = &gp2ap020a00f_buffer_predisable, }; -static int gp2ap020a00f_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int gp2ap020a00f_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct gp2ap020a00f_data *data; struct iio_dev *indio_dev; struct regmap *regmap; @@ -1609,7 +1609,7 @@ static struct i2c_driver gp2ap020a00f_driver = { .name = GP2A_I2C_NAME, .of_match_table = gp2ap020a00f_of_match, }, - .probe = gp2ap020a00f_probe, + .probe_new = gp2ap020a00f_probe, .remove = gp2ap020a00f_remove, .id_table = gp2ap020a00f_id, }; From ee6e0241f854b23f2177b05a6cdd538bcfe8d9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:39 +0100 Subject: [PATCH 2454/4122] iio: light: isl29018: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-126-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/isl29018.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/isl29018.c b/drivers/iio/light/isl29018.c index b36f8b7ca68e..141845fb47f9 100644 --- a/drivers/iio/light/isl29018.c +++ b/drivers/iio/light/isl29018.c @@ -711,9 +711,9 @@ static void isl29018_disable_regulator_action(void *_data) pr_err("failed to disable isl29018's VCC regulator!\n"); } -static int isl29018_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29018_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct isl29018_chip *chip; struct iio_dev *indio_dev; int err; @@ -865,7 +865,7 @@ static struct i2c_driver isl29018_driver = { .pm = pm_sleep_ptr(&isl29018_pm_ops), .of_match_table = isl29018_of_match, }, - .probe = isl29018_probe, + .probe_new = isl29018_probe, .id_table = isl29018_id, }; module_i2c_driver(isl29018_driver); From 2086bafffdf7200238ba887a4ac9bdba0c4503e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:40 +0100 Subject: [PATCH 2455/4122] iio: light: isl29028: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-127-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/isl29028.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/isl29028.c b/drivers/iio/light/isl29028.c index 32d58e18f26d..bcf3a556e41a 100644 --- a/drivers/iio/light/isl29028.c +++ b/drivers/iio/light/isl29028.c @@ -565,9 +565,9 @@ static const struct regmap_config isl29028_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static int isl29028_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29028_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct isl29028_chip *chip; struct iio_dev *indio_dev; int ret; @@ -698,7 +698,7 @@ static struct i2c_driver isl29028_driver = { .pm = pm_ptr(&isl29028_pm_ops), .of_match_table = isl29028_of_match, }, - .probe = isl29028_probe, + .probe_new = isl29028_probe, .remove = isl29028_remove, .id_table = isl29028_id, }; From 3059126a449c8c62c6b766da7744f853cce6ad36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:41 +0100 Subject: [PATCH 2456/4122] iio: light: isl29125: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-128-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/isl29125.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/isl29125.c b/drivers/iio/light/isl29125.c index c199e63cce82..b4bd656ca169 100644 --- a/drivers/iio/light/isl29125.c +++ b/drivers/iio/light/isl29125.c @@ -241,8 +241,7 @@ static const struct iio_buffer_setup_ops isl29125_buffer_setup_ops = { .predisable = isl29125_buffer_predisable, }; -static int isl29125_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29125_probe(struct i2c_client *client) { struct isl29125_data *data; struct iio_dev *indio_dev; @@ -338,7 +337,7 @@ static struct i2c_driver isl29125_driver = { .name = ISL29125_DRV_NAME, .pm = pm_sleep_ptr(&isl29125_pm_ops), }, - .probe = isl29125_probe, + .probe_new = isl29125_probe, .remove = isl29125_remove, .id_table = isl29125_id, }; From dd97aab4f02c72d053335424752ee648a572ff81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:42 +0100 Subject: [PATCH 2457/4122] iio: light: jsa1212: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-129-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/jsa1212.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/jsa1212.c b/drivers/iio/light/jsa1212.c index 57ce6d75966c..d3834d0a0635 100644 --- a/drivers/iio/light/jsa1212.c +++ b/drivers/iio/light/jsa1212.c @@ -308,8 +308,7 @@ static const struct regmap_config jsa1212_regmap_config = { .volatile_reg = jsa1212_is_volatile_reg, }; -static int jsa1212_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int jsa1212_probe(struct i2c_client *client) { struct jsa1212_data *data; struct iio_dev *indio_dev; @@ -441,7 +440,7 @@ static struct i2c_driver jsa1212_driver = { .pm = pm_sleep_ptr(&jsa1212_pm_ops), .acpi_match_table = ACPI_PTR(jsa1212_acpi_match), }, - .probe = jsa1212_probe, + .probe_new = jsa1212_probe, .remove = jsa1212_remove, .id_table = jsa1212_id, }; From 958f5a0de0f67289e6bc0fa7850056b2af0c14f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:43 +0100 Subject: [PATCH 2458/4122] iio: light: ltr501: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-130-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/ltr501.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 453b845ef265..bdbd918213e4 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -1432,9 +1432,9 @@ static const char *ltr501_match_acpi_device(struct device *dev, int *chip_idx) return dev_name(dev); } -static int ltr501_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ltr501_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); static const char * const regulator_names[] = { "vdd", "vddio" }; struct ltr501_data *data; struct iio_dev *indio_dev; @@ -1641,7 +1641,7 @@ static struct i2c_driver ltr501_driver = { .pm = pm_sleep_ptr(<r501_pm_ops), .acpi_match_table = ACPI_PTR(ltr_acpi_match), }, - .probe = ltr501_probe, + .probe_new = ltr501_probe, .remove = ltr501_remove, .id_table = ltr501_id, }; From f0a6f7674ef29e01e8db2986f534476f60aac3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:44 +0100 Subject: [PATCH 2459/4122] iio: light: lv0104cs: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-131-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/lv0104cs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/lv0104cs.c b/drivers/iio/light/lv0104cs.c index c2aef88f4e63..c041fa0faa5d 100644 --- a/drivers/iio/light/lv0104cs.c +++ b/drivers/iio/light/lv0104cs.c @@ -474,8 +474,7 @@ static const struct iio_chan_spec lv0104cs_channels[] = { }, }; -static int lv0104cs_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int lv0104cs_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct lv0104cs_private *lv0104cs; @@ -521,7 +520,7 @@ static struct i2c_driver lv0104cs_i2c_driver = { .name = "lv0104cs", }, .id_table = lv0104cs_id, - .probe = lv0104cs_probe, + .probe_new = lv0104cs_probe, }; module_i2c_driver(lv0104cs_i2c_driver); From 0978ef36b24cb10937417e94d49d5a3f1092e3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:45 +0100 Subject: [PATCH 2460/4122] iio: light: max44000: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-132-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/max44000.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index 85689dffbcbf..5dcabc43a30e 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -523,8 +523,7 @@ out_unlock: return IRQ_HANDLED; } -static int max44000_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max44000_probe(struct i2c_client *client) { struct max44000_data *data; struct iio_dev *indio_dev; @@ -617,7 +616,7 @@ static struct i2c_driver max44000_driver = { .name = MAX44000_DRV_NAME, .acpi_match_table = ACPI_PTR(max44000_acpi_match), }, - .probe = max44000_probe, + .probe_new = max44000_probe, .id_table = max44000_id, }; From c49135d99f5285a3e781a50ee7111f68150d5cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:47 +0100 Subject: [PATCH 2461/4122] iio: light: noa1305: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-134-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/noa1305.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/noa1305.c b/drivers/iio/light/noa1305.c index be3536b390fc..eaf548d4649e 100644 --- a/drivers/iio/light/noa1305.c +++ b/drivers/iio/light/noa1305.c @@ -186,8 +186,7 @@ static const struct regmap_config noa1305_regmap_config = { .writeable_reg = noa1305_writable_reg, }; -static int noa1305_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int noa1305_probe(struct i2c_client *client) { struct noa1305_priv *priv; struct iio_dev *indio_dev; @@ -279,7 +278,7 @@ static struct i2c_driver noa1305_driver = { .name = NOA1305_DRIVER_NAME, .of_match_table = noa1305_of_match, }, - .probe = noa1305_probe, + .probe_new = noa1305_probe, .id_table = noa1305_ids, }; From 06d10073283e5b301bd850a9ded6f592fb74a803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:48 +0100 Subject: [PATCH 2462/4122] iio: light: opt3001: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-135-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/opt3001.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index a26d1c3f9543..ec4f5c2369c4 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -735,8 +735,7 @@ out: return IRQ_HANDLED; } -static int opt3001_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int opt3001_probe(struct i2c_client *client) { struct device *dev = &client->dev; @@ -835,7 +834,7 @@ static const struct of_device_id opt3001_of_match[] = { MODULE_DEVICE_TABLE(of, opt3001_of_match); static struct i2c_driver opt3001_driver = { - .probe = opt3001_probe, + .probe_new = opt3001_probe, .remove = opt3001_remove, .id_table = opt3001_id, From e16302076d8a42b760878fe603593e2c59106948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:49 +0100 Subject: [PATCH 2463/4122] iio: light: pa12203001: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-136-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/pa12203001.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/pa12203001.c b/drivers/iio/light/pa12203001.c index 3cb2de51f4aa..15a666f15c27 100644 --- a/drivers/iio/light/pa12203001.c +++ b/drivers/iio/light/pa12203001.c @@ -338,8 +338,7 @@ out: return ret; } -static int pa12203001_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pa12203001_probe(struct i2c_client *client) { struct pa12203001_data *data; struct iio_dev *indio_dev; @@ -475,7 +474,7 @@ static struct i2c_driver pa12203001_driver = { .pm = &pa12203001_pm_ops, .acpi_match_table = ACPI_PTR(pa12203001_acpi_match), }, - .probe = pa12203001_probe, + .probe_new = pa12203001_probe, .remove = pa12203001_remove, .id_table = pa12203001_id, From 314ba3b4208b8fdc66ca6a94dac778ab673444e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:50 +0100 Subject: [PATCH 2464/4122] iio: light: rpr0521: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-137-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/rpr0521.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/rpr0521.c b/drivers/iio/light/rpr0521.c index d1c16dd76058..668e444f6049 100644 --- a/drivers/iio/light/rpr0521.c +++ b/drivers/iio/light/rpr0521.c @@ -927,8 +927,7 @@ static const struct regmap_config rpr0521_regmap_config = { .volatile_reg = rpr0521_is_volatile_reg, }; -static int rpr0521_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rpr0521_probe(struct i2c_client *client) { struct rpr0521_data *data; struct iio_dev *indio_dev; @@ -1122,7 +1121,7 @@ static struct i2c_driver rpr0521_driver = { .pm = pm_ptr(&rpr0521_pm_ops), .acpi_match_table = ACPI_PTR(rpr0521_acpi_match), }, - .probe = rpr0521_probe, + .probe_new = rpr0521_probe, .remove = rpr0521_remove, .id_table = rpr0521_id, }; From 122b0c0ba505a2faebb883bbfc178449d47c4fba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:51 +0100 Subject: [PATCH 2465/4122] iio: light: si1133: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-138-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/si1133.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/si1133.c b/drivers/iio/light/si1133.c index f8c9b2cc322e..a08fbc8f5adb 100644 --- a/drivers/iio/light/si1133.c +++ b/drivers/iio/light/si1133.c @@ -990,9 +990,9 @@ static int si1133_validate_ids(struct iio_dev *iio_dev) return 0; } -static int si1133_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int si1133_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct si1133_data *data; struct iio_dev *iio_dev; int err; @@ -1064,7 +1064,7 @@ static struct i2c_driver si1133_driver = { .driver = { .name = "si1133", }, - .probe = si1133_probe, + .probe_new = si1133_probe, .id_table = si1133_ids, }; From fb006652dc597b2807bab56d25fe3b3404f87e53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:52 +0100 Subject: [PATCH 2466/4122] iio: light: si1145: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-139-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/si1145.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/si1145.c b/drivers/iio/light/si1145.c index e8f6cdf26f22..f7126235f94c 100644 --- a/drivers/iio/light/si1145.c +++ b/drivers/iio/light/si1145.c @@ -1269,9 +1269,9 @@ static int si1145_probe_trigger(struct iio_dev *indio_dev) return 0; } -static int si1145_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int si1145_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct si1145_data *data; struct iio_dev *indio_dev; u8 part_id, rev_id, seq_id; @@ -1352,7 +1352,7 @@ static struct i2c_driver si1145_driver = { .driver = { .name = "si1145", }, - .probe = si1145_probe, + .probe_new = si1145_probe, .id_table = si1145_ids, }; From c3ff326a9d3c809b493775bd65c07dccb73258cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:53 +0100 Subject: [PATCH 2467/4122] iio: light: st_uvis25_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-140-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/st_uvis25_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/st_uvis25_i2c.c b/drivers/iio/light/st_uvis25_i2c.c index c982b0b255cf..2160e87bb498 100644 --- a/drivers/iio/light/st_uvis25_i2c.c +++ b/drivers/iio/light/st_uvis25_i2c.c @@ -25,8 +25,7 @@ static const struct regmap_config st_uvis25_i2c_regmap_config = { .read_flag_mask = UVIS25_I2C_AUTO_INCREMENT, }; -static int st_uvis25_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_uvis25_i2c_probe(struct i2c_client *client) { struct regmap *regmap; @@ -58,7 +57,7 @@ static struct i2c_driver st_uvis25_driver = { .pm = pm_sleep_ptr(&st_uvis25_pm_ops), .of_match_table = st_uvis25_i2c_of_match, }, - .probe = st_uvis25_i2c_probe, + .probe_new = st_uvis25_i2c_probe, .id_table = st_uvis25_i2c_id_table, }; module_i2c_driver(st_uvis25_driver); From 9046d80dce04c65c92ea5550f220a2de236e3ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:54 +0100 Subject: [PATCH 2468/4122] iio: light: stk3310: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-141-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/stk3310.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c index 7b8e0da6aabc..48ae6ff0015e 100644 --- a/drivers/iio/light/stk3310.c +++ b/drivers/iio/light/stk3310.c @@ -586,8 +586,7 @@ out: return IRQ_HANDLED; } -static int stk3310_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int stk3310_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -715,7 +714,7 @@ static struct i2c_driver stk3310_driver = { .pm = pm_sleep_ptr(&stk3310_pm_ops), .acpi_match_table = ACPI_PTR(stk3310_acpi_id), }, - .probe = stk3310_probe, + .probe_new = stk3310_probe, .remove = stk3310_remove, .id_table = stk3310_i2c_id, }; From b30cfdeb9f7fe03103472c4c79e65e382935082a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:55 +0100 Subject: [PATCH 2469/4122] iio: light: tcs3414: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-142-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tcs3414.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tcs3414.c b/drivers/iio/light/tcs3414.c index 3951536022b3..5100732fbaf0 100644 --- a/drivers/iio/light/tcs3414.c +++ b/drivers/iio/light/tcs3414.c @@ -279,8 +279,7 @@ static void tcs3414_powerdown_cleanup(void *data) tcs3414_powerdown(data); } -static int tcs3414_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tcs3414_probe(struct i2c_client *client) { struct tcs3414_data *data; struct iio_dev *indio_dev; @@ -374,7 +373,7 @@ static struct i2c_driver tcs3414_driver = { .name = TCS3414_DRV_NAME, .pm = pm_sleep_ptr(&tcs3414_pm_ops), }, - .probe = tcs3414_probe, + .probe_new = tcs3414_probe, .id_table = tcs3414_id, }; module_i2c_driver(tcs3414_driver); From e498cc544eae9b6264c9a535f8784aafe5788965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:56 +0100 Subject: [PATCH 2470/4122] iio: light: tcs3472: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-143-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tcs3472.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tcs3472.c b/drivers/iio/light/tcs3472.c index db17fec634be..6187c5487916 100644 --- a/drivers/iio/light/tcs3472.c +++ b/drivers/iio/light/tcs3472.c @@ -442,8 +442,7 @@ static const struct iio_info tcs3472_info = { .attrs = &tcs3472_attribute_group, }; -static int tcs3472_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tcs3472_probe(struct i2c_client *client) { struct tcs3472_data *data; struct iio_dev *indio_dev; @@ -610,7 +609,7 @@ static struct i2c_driver tcs3472_driver = { .name = TCS3472_DRV_NAME, .pm = pm_sleep_ptr(&tcs3472_pm_ops), }, - .probe = tcs3472_probe, + .probe_new = tcs3472_probe, .remove = tcs3472_remove, .id_table = tcs3472_id, }; From 74cd01b3f9041ef3e44cc043b8bb10f79ec52c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:57 +0100 Subject: [PATCH 2471/4122] iio: light: tsl2563: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-144-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2563.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c index 951f35ef3f41..d0e42b73203a 100644 --- a/drivers/iio/light/tsl2563.c +++ b/drivers/iio/light/tsl2563.c @@ -699,8 +699,7 @@ static const struct iio_info tsl2563_info = { .write_event_config = &tsl2563_write_interrupt_config, }; -static int tsl2563_probe(struct i2c_client *client, - const struct i2c_device_id *device_id) +static int tsl2563_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct tsl2563_chip *chip; @@ -880,7 +879,7 @@ static struct i2c_driver tsl2563_i2c_driver = { .of_match_table = tsl2563_of_match, .pm = pm_sleep_ptr(&tsl2563_pm_ops), }, - .probe = tsl2563_probe, + .probe_new = tsl2563_probe, .remove = tsl2563_remove, .id_table = tsl2563_id, }; From dcc484a21108441f7970c4813b1223154dd23ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:58 +0100 Subject: [PATCH 2472/4122] iio: light: tsl2583: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-145-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2583.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c index 7bcb5c718922..a05f1c0453d1 100644 --- a/drivers/iio/light/tsl2583.c +++ b/drivers/iio/light/tsl2583.c @@ -809,8 +809,7 @@ static const struct iio_info tsl2583_info = { .write_raw = tsl2583_write_raw, }; -static int tsl2583_probe(struct i2c_client *clientp, - const struct i2c_device_id *idp) +static int tsl2583_probe(struct i2c_client *clientp) { int ret; struct tsl2583_chip *chip; @@ -943,7 +942,7 @@ static struct i2c_driver tsl2583_driver = { .of_match_table = tsl2583_of_match, }, .id_table = tsl2583_idtable, - .probe = tsl2583_probe, + .probe_new = tsl2583_probe, .remove = tsl2583_remove, }; module_i2c_driver(tsl2583_driver); From e723b95135b1510873c832eaf8361e6b8d76bfb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:37:59 +0100 Subject: [PATCH 2473/4122] iio: light: tsl2772: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-146-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2772.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tsl2772.c b/drivers/iio/light/tsl2772.c index dd9051f1cc1a..ad50baa0202c 100644 --- a/drivers/iio/light/tsl2772.c +++ b/drivers/iio/light/tsl2772.c @@ -1750,9 +1750,9 @@ static const struct tsl2772_chip_info tsl2772_chip_info_tbl[] = { }, }; -static int tsl2772_probe(struct i2c_client *clientp, - const struct i2c_device_id *id) +static int tsl2772_probe(struct i2c_client *clientp) { + const struct i2c_device_id *id = i2c_client_get_device_id(clientp); struct iio_dev *indio_dev; struct tsl2772_chip *chip; int ret; @@ -1931,7 +1931,7 @@ static struct i2c_driver tsl2772_driver = { .pm = &tsl2772_pm_ops, }, .id_table = tsl2772_idtable, - .probe = tsl2772_probe, + .probe_new = tsl2772_probe, }; module_i2c_driver(tsl2772_driver); From 081f6b9dc055f9ccb19a86346b80df691972120b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:00 +0100 Subject: [PATCH 2474/4122] iio: light: tsl4531: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-147-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl4531.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/tsl4531.c b/drivers/iio/light/tsl4531.c index 090038fed889..d95397eb1526 100644 --- a/drivers/iio/light/tsl4531.c +++ b/drivers/iio/light/tsl4531.c @@ -160,8 +160,7 @@ static int tsl4531_check_id(struct i2c_client *client) } } -static int tsl4531_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tsl4531_probe(struct i2c_client *client) { struct tsl4531_data *data; struct iio_dev *indio_dev; @@ -238,7 +237,7 @@ static struct i2c_driver tsl4531_driver = { .name = TSL4531_DRV_NAME, .pm = pm_sleep_ptr(&tsl4531_pm_ops), }, - .probe = tsl4531_probe, + .probe_new = tsl4531_probe, .remove = tsl4531_remove, .id_table = tsl4531_id, }; From 3ed2b14de4d36a9335e93798bf966c521df42a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:01 +0100 Subject: [PATCH 2475/4122] iio: light: us5182d: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-148-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/us5182d.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/us5182d.c b/drivers/iio/light/us5182d.c index 3e652d7f3b0e..8b2a0c99c8e6 100644 --- a/drivers/iio/light/us5182d.c +++ b/drivers/iio/light/us5182d.c @@ -832,8 +832,7 @@ static irqreturn_t us5182d_irq_thread_handler(int irq, void *private) return IRQ_HANDLED; } -static int us5182d_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int us5182d_probe(struct i2c_client *client) { struct us5182d_data *data; struct iio_dev *indio_dev; @@ -975,7 +974,7 @@ static struct i2c_driver us5182d_driver = { .of_match_table = us5182d_of_match, .acpi_match_table = ACPI_PTR(us5182d_acpi_match), }, - .probe = us5182d_probe, + .probe_new = us5182d_probe, .remove = us5182d_remove, .id_table = us5182d_id, From e61295e0d7cf6de3d7cc25e7f387ffe021f0b0a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:02 +0100 Subject: [PATCH 2476/4122] iio: light: vcnl4000: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-149-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index fdb3922ae4ac..cc1a2062e76d 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -1180,9 +1180,9 @@ static int vcnl4010_probe_trigger(struct iio_dev *indio_dev) return devm_iio_trigger_register(&client->dev, trigger); } -static int vcnl4000_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int vcnl4000_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct vcnl4000_data *data; struct iio_dev *indio_dev; int ret; @@ -1326,7 +1326,7 @@ static struct i2c_driver vcnl4000_driver = { .pm = pm_ptr(&vcnl4000_pm_ops), .of_match_table = vcnl_4000_of_match, }, - .probe = vcnl4000_probe, + .probe_new = vcnl4000_probe, .id_table = vcnl4000_id, .remove = vcnl4000_remove, }; From 9da43dcc54a19fe40e3aa7dc03fbca3f8c0395ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:03 +0100 Subject: [PATCH 2477/4122] iio: light: vcnl4035: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-150-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4035.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c index 3ed37f6057fb..84148b944000 100644 --- a/drivers/iio/light/vcnl4035.c +++ b/drivers/iio/light/vcnl4035.c @@ -539,8 +539,7 @@ static int vcnl4035_probe_trigger(struct iio_dev *indio_dev) return ret; } -static int vcnl4035_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int vcnl4035_probe(struct i2c_client *client) { struct vcnl4035_data *data; struct iio_dev *indio_dev; @@ -668,7 +667,7 @@ static struct i2c_driver vcnl4035_driver = { .pm = pm_ptr(&vcnl4035_pm_ops), .of_match_table = vcnl4035_of_match, }, - .probe = vcnl4035_probe, + .probe_new = vcnl4035_probe, .remove = vcnl4035_remove, .id_table = vcnl4035_id, }; From e465524d64d11fbd25a9eb65ce4e77b965979291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:04 +0100 Subject: [PATCH 2478/4122] iio: light: veml6030: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-151-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6030.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c index 9a7800cdfee2..e7d2d5d177d4 100644 --- a/drivers/iio/light/veml6030.c +++ b/drivers/iio/light/veml6030.c @@ -786,8 +786,7 @@ static int veml6030_hw_init(struct iio_dev *indio_dev) return ret; } -static int veml6030_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int veml6030_probe(struct i2c_client *client) { int ret; struct veml6030_data *data; @@ -893,7 +892,7 @@ static struct i2c_driver veml6030_driver = { .of_match_table = veml6030_of_match, .pm = pm_ptr(&veml6030_pm_ops), }, - .probe = veml6030_probe, + .probe_new = veml6030_probe, .id_table = veml6030_id, }; module_i2c_driver(veml6030_driver); From f90b8694f355566c2ff70564942d4932d76e28e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:05 +0100 Subject: [PATCH 2479/4122] iio: light: veml6070: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-152-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6070.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/veml6070.c b/drivers/iio/light/veml6070.c index cfa4e9e7c803..ee76a68deb24 100644 --- a/drivers/iio/light/veml6070.c +++ b/drivers/iio/light/veml6070.c @@ -135,8 +135,7 @@ static const struct iio_info veml6070_info = { .read_raw = veml6070_read_raw, }; -static int veml6070_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int veml6070_probe(struct i2c_client *client) { struct veml6070_data *data; struct iio_dev *indio_dev; @@ -199,7 +198,7 @@ static struct i2c_driver veml6070_driver = { .driver = { .name = VEML6070_DRV_NAME, }, - .probe = veml6070_probe, + .probe_new = veml6070_probe, .remove = veml6070_remove, .id_table = veml6070_id, }; From 6811c7a52f025ab50f53b3107e6071132a8fe1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:06 +0100 Subject: [PATCH 2480/4122] iio: light: zopt2201: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-153-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/zopt2201.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/zopt2201.c b/drivers/iio/light/zopt2201.c index e0bc9df9c88b..e3bac8b56380 100644 --- a/drivers/iio/light/zopt2201.c +++ b/drivers/iio/light/zopt2201.c @@ -501,8 +501,7 @@ static const struct iio_info zopt2201_info = { .attrs = &zopt2201_attribute_group, }; -static int zopt2201_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int zopt2201_probe(struct i2c_client *client) { struct zopt2201_data *data; struct iio_dev *indio_dev; @@ -555,7 +554,7 @@ static struct i2c_driver zopt2201_driver = { .driver = { .name = ZOPT2201_DRV_NAME, }, - .probe = zopt2201_probe, + .probe_new = zopt2201_probe, .id_table = zopt2201_id, }; From 149a5043b6f9b100471a1fdbdc22a395602fadd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:07 +0100 Subject: [PATCH 2481/4122] iio: magnetometer: ak8974: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-154-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/ak8974.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 7ec9ab3beb45..45abdcce6bc0 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -814,8 +814,7 @@ static const struct regmap_config ak8974_regmap_config = { .precious_reg = ak8974_precious_reg, }; -static int ak8974_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int ak8974_probe(struct i2c_client *i2c) { struct iio_dev *indio_dev; struct ak8974 *ak8974; @@ -1047,7 +1046,7 @@ static struct i2c_driver ak8974_driver = { .pm = pm_ptr(&ak8974_dev_pm_ops), .of_match_table = ak8974_of_match, }, - .probe = ak8974_probe, + .probe_new = ak8974_probe, .remove = ak8974_remove, .id_table = ak8974_id, }; From f64eecea19ba974f03e3614ebcc3749d9a24da77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:08 +0100 Subject: [PATCH 2482/4122] iio: magnetometer: ak8975: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-155-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/ak8975.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index caf03a2a98a5..924b481a3034 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -876,9 +876,9 @@ static irqreturn_t ak8975_handle_trigger(int irq, void *p) return IRQ_HANDLED; } -static int ak8975_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ak8975_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ak8975_data *data; struct iio_dev *indio_dev; struct gpio_desc *eoc_gpiod; @@ -1110,7 +1110,7 @@ static struct i2c_driver ak8975_driver = { .of_match_table = ak8975_of_match, .acpi_match_table = ak_acpi_match, }, - .probe = ak8975_probe, + .probe_new = ak8975_probe, .remove = ak8975_remove, .id_table = ak8975_id, }; From f23215c1e5a1255ed2097afe3e564dec96f73911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:09 +0100 Subject: [PATCH 2483/4122] iio: magnetometer: bmc150_magn_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-156-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/bmc150_magn_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/bmc150_magn_i2c.c b/drivers/iio/magnetometer/bmc150_magn_i2c.c index 570deaa87836..44b8960eea17 100644 --- a/drivers/iio/magnetometer/bmc150_magn_i2c.c +++ b/drivers/iio/magnetometer/bmc150_magn_i2c.c @@ -16,9 +16,9 @@ #include "bmc150_magn.h" -static int bmc150_magn_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int bmc150_magn_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct regmap *regmap; const char *name = NULL; @@ -71,7 +71,7 @@ static struct i2c_driver bmc150_magn_driver = { .acpi_match_table = ACPI_PTR(bmc150_magn_acpi_match), .pm = &bmc150_magn_pm_ops, }, - .probe = bmc150_magn_i2c_probe, + .probe_new = bmc150_magn_i2c_probe, .remove = bmc150_magn_i2c_remove, .id_table = bmc150_magn_i2c_id, }; From bcf73c37f7c7e7c1d0cac6ebea36cfd7bf626727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:10 +0100 Subject: [PATCH 2484/4122] iio: magnetometer: hmc5843: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-157-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/hmc5843_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/hmc5843_i2c.c b/drivers/iio/magnetometer/hmc5843_i2c.c index 18a13dd51296..7ef2b1d56289 100644 --- a/drivers/iio/magnetometer/hmc5843_i2c.c +++ b/drivers/iio/magnetometer/hmc5843_i2c.c @@ -52,9 +52,9 @@ static const struct regmap_config hmc5843_i2c_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static int hmc5843_i2c_probe(struct i2c_client *cli, - const struct i2c_device_id *id) +static int hmc5843_i2c_probe(struct i2c_client *cli) { + const struct i2c_device_id *id = i2c_client_get_device_id(cli); struct regmap *regmap = devm_regmap_init_i2c(cli, &hmc5843_i2c_regmap_config); if (IS_ERR(regmap)) @@ -95,7 +95,7 @@ static struct i2c_driver hmc5843_driver = { .of_match_table = hmc5843_of_match, }, .id_table = hmc5843_id, - .probe = hmc5843_i2c_probe, + .probe_new = hmc5843_i2c_probe, .remove = hmc5843_i2c_remove, }; module_i2c_driver(hmc5843_driver); From cee51403c696aac364fd991bc3a95a90722678b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:11 +0100 Subject: [PATCH 2485/4122] iio: magnetometer: mag3110: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-158-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/mag3110.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/mag3110.c b/drivers/iio/magnetometer/mag3110.c index b870ad803862..661176a885ad 100644 --- a/drivers/iio/magnetometer/mag3110.c +++ b/drivers/iio/magnetometer/mag3110.c @@ -469,9 +469,9 @@ static const struct iio_info mag3110_info = { static const unsigned long mag3110_scan_masks[] = {0x7, 0xf, 0}; -static int mag3110_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mag3110_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mag3110_data *data; struct iio_dev *indio_dev; int ret; @@ -641,7 +641,7 @@ static struct i2c_driver mag3110_driver = { .of_match_table = mag3110_of_match, .pm = pm_sleep_ptr(&mag3110_pm_ops), }, - .probe = mag3110_probe, + .probe_new = mag3110_probe, .remove = mag3110_remove, .id_table = mag3110_id, }; From e7a45a76351b6b5163af4a581ab46fdad137d62a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:12 +0100 Subject: [PATCH 2486/4122] iio: magnetometer: mmc35240: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-159-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/mmc35240.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/mmc35240.c b/drivers/iio/magnetometer/mmc35240.c index 186edfcda0b7..756dadbad106 100644 --- a/drivers/iio/magnetometer/mmc35240.c +++ b/drivers/iio/magnetometer/mmc35240.c @@ -481,8 +481,7 @@ static const struct regmap_config mmc35240_regmap_config = { .num_reg_defaults = ARRAY_SIZE(mmc35240_reg_defaults), }; -static int mmc35240_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mmc35240_probe(struct i2c_client *client) { struct mmc35240_data *data; struct iio_dev *indio_dev; @@ -576,7 +575,7 @@ static struct i2c_driver mmc35240_driver = { .pm = pm_sleep_ptr(&mmc35240_pm_ops), .acpi_match_table = ACPI_PTR(mmc35240_acpi_match), }, - .probe = mmc35240_probe, + .probe_new = mmc35240_probe, .id_table = mmc35240_id, }; From fcd969675290cd3cbe911b350502332be58f100b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:13 +0100 Subject: [PATCH 2487/4122] iio: magnetometer: yamaha-yas530: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-160-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/yamaha-yas530.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c index 801c760feb4d..753717158b07 100644 --- a/drivers/iio/magnetometer/yamaha-yas530.c +++ b/drivers/iio/magnetometer/yamaha-yas530.c @@ -1384,9 +1384,9 @@ static const struct yas5xx_chip_info yas5xx_chip_info_tbl[] = { }, }; -static int yas5xx_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int yas5xx_probe(struct i2c_client *i2c) { + const struct i2c_device_id *id = i2c_client_get_device_id(i2c); struct iio_dev *indio_dev; struct device *dev = &i2c->dev; struct yas5xx *yas5xx; @@ -1605,7 +1605,7 @@ static struct i2c_driver yas5xx_driver = { .of_match_table = yas5xx_of_match, .pm = pm_ptr(&yas5xx_dev_pm_ops), }, - .probe = yas5xx_probe, + .probe_new = yas5xx_probe, .remove = yas5xx_remove, .id_table = yas5xx_id, }; From b11df8374c0d0581474e0b8f74a7ba8f4a10be7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:14 +0100 Subject: [PATCH 2488/4122] iio: potentiometer: ad5272: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-161-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/potentiometer/ad5272.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/potentiometer/ad5272.c b/drivers/iio/potentiometer/ad5272.c index ed5fc0b50fe9..aa140d632101 100644 --- a/drivers/iio/potentiometer/ad5272.c +++ b/drivers/iio/potentiometer/ad5272.c @@ -158,9 +158,9 @@ static int ad5272_reset(struct ad5272_data *data) return 0; } -static int ad5272_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad5272_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct iio_dev *indio_dev; struct ad5272_data *data; @@ -218,7 +218,7 @@ static struct i2c_driver ad5272_driver = { .name = "ad5272", .of_match_table = ad5272_dt_ids, }, - .probe = ad5272_probe, + .probe_new = ad5272_probe, .id_table = ad5272_id, }; From 705f1ce22b967b8fd7fc3cb0de31830ec9eda3c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:15 +0100 Subject: [PATCH 2489/4122] iio: potentiometer: ds1803: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-162-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/potentiometer/ds1803.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/potentiometer/ds1803.c b/drivers/iio/potentiometer/ds1803.c index 5c212ed7a931..0b5e475807cb 100644 --- a/drivers/iio/potentiometer/ds1803.c +++ b/drivers/iio/potentiometer/ds1803.c @@ -202,8 +202,9 @@ static const struct iio_info ds1803_info = { .read_avail = ds1803_read_avail, }; -static int ds1803_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int ds1803_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct ds1803_data *data; struct iio_dev *indio_dev; @@ -251,7 +252,7 @@ static struct i2c_driver ds1803_driver = { .name = "ds1803", .of_match_table = ds1803_dt_ids, }, - .probe = ds1803_probe, + .probe_new = ds1803_probe, .id_table = ds1803_id, }; From e005024112dd3800e78d056facf958fc6451ba10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:16 +0100 Subject: [PATCH 2490/4122] iio: potentiometer: max5432: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-163-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/potentiometer/max5432.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/potentiometer/max5432.c b/drivers/iio/potentiometer/max5432.c index aed3b6ab82a2..94ef27ef3fb5 100644 --- a/drivers/iio/potentiometer/max5432.c +++ b/drivers/iio/potentiometer/max5432.c @@ -85,8 +85,7 @@ static const struct iio_info max5432_info = { .write_raw = max5432_write_raw, }; -static int max5432_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max5432_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct iio_dev *indio_dev; @@ -124,7 +123,7 @@ static struct i2c_driver max5432_driver = { .name = "max5432", .of_match_table = max5432_dt_ids, }, - .probe = max5432_probe, + .probe_new = max5432_probe, }; module_i2c_driver(max5432_driver); From 4c1142cc436842eefffcd800ca41f8327bf61f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:17 +0100 Subject: [PATCH 2491/4122] iio: potentiometer: tpl0102: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-164-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/potentiometer/tpl0102.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/potentiometer/tpl0102.c b/drivers/iio/potentiometer/tpl0102.c index d996dc367fb7..a3465b413b0c 100644 --- a/drivers/iio/potentiometer/tpl0102.c +++ b/drivers/iio/potentiometer/tpl0102.c @@ -120,9 +120,9 @@ static const struct iio_info tpl0102_info = { .write_raw = tpl0102_write_raw, }; -static int tpl0102_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tpl0102_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct tpl0102_data *data; struct iio_dev *indio_dev; @@ -161,7 +161,7 @@ static struct i2c_driver tpl0102_driver = { .driver = { .name = "tpl0102", }, - .probe = tpl0102_probe, + .probe_new = tpl0102_probe, .id_table = tpl0102_id, }; From 7d8b3e100abeeff40f54853ce82bc1d900cb2bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:18 +0100 Subject: [PATCH 2492/4122] iio: potentiostat: lmp91000: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-165-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/potentiostat/lmp91000.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/potentiostat/lmp91000.c b/drivers/iio/potentiostat/lmp91000.c index 5ec7060d31d9..b82f093f1e6a 100644 --- a/drivers/iio/potentiostat/lmp91000.c +++ b/drivers/iio/potentiostat/lmp91000.c @@ -292,8 +292,7 @@ static const struct iio_buffer_setup_ops lmp91000_buffer_setup_ops = { .predisable = lmp91000_buffer_predisable, }; -static int lmp91000_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int lmp91000_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct lmp91000_data *data; @@ -417,7 +416,7 @@ static struct i2c_driver lmp91000_driver = { .name = LMP91000_DRV_NAME, .of_match_table = lmp91000_of_match, }, - .probe = lmp91000_probe, + .probe_new = lmp91000_probe, .remove = lmp91000_remove, .id_table = lmp91000_id, }; From 576306ab4ceb16a032b5f67aeb635f385390193c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:19 +0100 Subject: [PATCH 2493/4122] iio: pressure: abp060mg: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-166-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/abp060mg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/abp060mg.c b/drivers/iio/pressure/abp060mg.c index e1c3bdb371ee..c0140779366a 100644 --- a/drivers/iio/pressure/abp060mg.c +++ b/drivers/iio/pressure/abp060mg.c @@ -174,9 +174,9 @@ static void abp060mg_init_device(struct iio_dev *indio_dev, unsigned long id) state->offset -= ABP060MG_NUM_COUNTS >> 1; } -static int abp060mg_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int abp060mg_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct abp_state *state; unsigned long cfg_id = id->driver_data; @@ -255,7 +255,7 @@ static struct i2c_driver abp060mg_driver = { .driver = { .name = "abp060mg", }, - .probe = abp060mg_probe, + .probe_new = abp060mg_probe, .id_table = abp060mg_id_table, }; module_i2c_driver(abp060mg_driver); From 046ae105f266aae218f76dc6321eebebeead6b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:21 +0100 Subject: [PATCH 2494/4122] iio: pressure: dlhl60d: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-168-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/dlhl60d.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/dlhl60d.c b/drivers/iio/pressure/dlhl60d.c index f0b0d198c6d4..43650b048d62 100644 --- a/drivers/iio/pressure/dlhl60d.c +++ b/drivers/iio/pressure/dlhl60d.c @@ -282,9 +282,9 @@ static irqreturn_t dlh_interrupt(int irq, void *private) return IRQ_HANDLED; }; -static int dlh_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dlh_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct dlh_state *st; struct iio_dev *indio_dev; int ret; @@ -362,7 +362,7 @@ static struct i2c_driver dlh_driver = { .name = "dlhl60d", .of_match_table = dlh_of_match, }, - .probe = dlh_probe, + .probe_new = dlh_probe, .id_table = dlh_id, }; module_i2c_driver(dlh_driver); From 92a54a29cf39f8f18632f9af43aa5baf4ff805bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:22 +0100 Subject: [PATCH 2495/4122] iio: pressure: dps310: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-169-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/dps310.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index 984a3f511a1a..2af275a24ff9 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -827,9 +827,9 @@ static const struct iio_info dps310_info = { .write_raw = dps310_write_raw, }; -static int dps310_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dps310_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct dps310_data *data; struct iio_dev *iio; int rc; @@ -887,7 +887,7 @@ static struct i2c_driver dps310_driver = { .name = DPS310_DEV_NAME, .acpi_match_table = dps310_acpi_match, }, - .probe = dps310_probe, + .probe_new = dps310_probe, .id_table = dps310_id, }; module_i2c_driver(dps310_driver); From 3d5f5d599f61a2ab99768471cf56c61b1751f98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:23 +0100 Subject: [PATCH 2496/4122] iio: pressure: hp03: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-170-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/hp03.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/hp03.c b/drivers/iio/pressure/hp03.c index 9538118c9648..bd1f71a99cfa 100644 --- a/drivers/iio/pressure/hp03.c +++ b/drivers/iio/pressure/hp03.c @@ -208,9 +208,9 @@ static const struct iio_info hp03_info = { .read_raw = &hp03_read_raw, }; -static int hp03_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hp03_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device *dev = &client->dev; struct iio_dev *indio_dev; struct hp03_priv *priv; @@ -282,7 +282,7 @@ static struct i2c_driver hp03_driver = { .name = "hp03", .of_match_table = hp03_of_match, }, - .probe = hp03_probe, + .probe_new = hp03_probe, .id_table = hp03_id, }; module_i2c_driver(hp03_driver); From 240c69e09d3f77673653f56867aba2c69a66264d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:24 +0100 Subject: [PATCH 2497/4122] iio: pressure: hp206c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-171-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/hp206c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/hp206c.c b/drivers/iio/pressure/hp206c.c index 986b7a59712e..b6d2ff464341 100644 --- a/drivers/iio/pressure/hp206c.c +++ b/drivers/iio/pressure/hp206c.c @@ -352,9 +352,9 @@ static const struct iio_info hp206c_info = { .write_raw = hp206c_write_raw, }; -static int hp206c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int hp206c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct hp206c_data *data; int ret; @@ -409,7 +409,7 @@ MODULE_DEVICE_TABLE(acpi, hp206c_acpi_match); #endif static struct i2c_driver hp206c_driver = { - .probe = hp206c_probe, + .probe_new = hp206c_probe, .id_table = hp206c_id, .driver = { .name = "hp206c", From 7a5da8b28618426a81f53f496de3a83d47df2634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:25 +0100 Subject: [PATCH 2498/4122] iio: pressure: icp10100: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20221118224540.619276-172-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/icp10100.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/icp10100.c b/drivers/iio/pressure/icp10100.c index b62f28585db5..407cf25ea0e3 100644 --- a/drivers/iio/pressure/icp10100.c +++ b/drivers/iio/pressure/icp10100.c @@ -530,8 +530,7 @@ static void icp10100_pm_disable(void *data) pm_runtime_disable(dev); } -static int icp10100_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int icp10100_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct icp10100_state *st; @@ -649,7 +648,7 @@ static struct i2c_driver icp10100_driver = { .pm = pm_ptr(&icp10100_pm), .of_match_table = icp10100_of_match, }, - .probe = icp10100_probe, + .probe_new = icp10100_probe, .id_table = icp10100_id, }; module_i2c_driver(icp10100_driver); From 8afce858762fc767dbd52d313adf058098332aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:26 +0100 Subject: [PATCH 2499/4122] iio: pressure: mpl115_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-173-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/mpl115_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/mpl115_i2c.c b/drivers/iio/pressure/mpl115_i2c.c index 555bda1146fb..ade4dd854ddf 100644 --- a/drivers/iio/pressure/mpl115_i2c.c +++ b/drivers/iio/pressure/mpl115_i2c.c @@ -35,9 +35,9 @@ static const struct mpl115_ops mpl115_i2c_ops = { .write = mpl115_i2c_write, }; -static int mpl115_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mpl115_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA)) return -EOPNOTSUPP; @@ -55,7 +55,7 @@ static struct i2c_driver mpl115_i2c_driver = { .name = "mpl115", .pm = pm_ptr(&mpl115_dev_pm_ops), }, - .probe = mpl115_i2c_probe, + .probe_new = mpl115_i2c_probe, .id_table = mpl115_i2c_id, }; module_i2c_driver(mpl115_i2c_driver); From 0586ce78a70c2792c4dfd415d1a427ea447bebd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:27 +0100 Subject: [PATCH 2500/4122] iio: pressure: mpl3115: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-174-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/mpl3115.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c index 2f22aba61e4d..72e811a5c96e 100644 --- a/drivers/iio/pressure/mpl3115.c +++ b/drivers/iio/pressure/mpl3115.c @@ -230,9 +230,9 @@ static const struct iio_info mpl3115_info = { .read_raw = &mpl3115_read_raw, }; -static int mpl3115_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mpl3115_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mpl3115_data *data; struct iio_dev *indio_dev; int ret; @@ -335,7 +335,7 @@ static struct i2c_driver mpl3115_driver = { .of_match_table = mpl3115_of_match, .pm = pm_sleep_ptr(&mpl3115_pm_ops), }, - .probe = mpl3115_probe, + .probe_new = mpl3115_probe, .remove = mpl3115_remove, .id_table = mpl3115_id, }; From eba7dcb65337dc2b081508317e10d0f1187121a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:28 +0100 Subject: [PATCH 2501/4122] iio: pressure: ms5611_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-175-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5611_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c index b681a4183909..caf882497656 100644 --- a/drivers/iio/pressure/ms5611_i2c.c +++ b/drivers/iio/pressure/ms5611_i2c.c @@ -79,9 +79,9 @@ static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st, return ms5611_i2c_read_adc(st, pressure); } -static int ms5611_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ms5611_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ms5611_state *st; struct iio_dev *indio_dev; @@ -130,7 +130,7 @@ static struct i2c_driver ms5611_driver = { .of_match_table = ms5611_i2c_matches, }, .id_table = ms5611_id, - .probe = ms5611_i2c_probe, + .probe_new = ms5611_i2c_probe, .remove = ms5611_i2c_remove, }; module_i2c_driver(ms5611_driver); From f80ccc71c3f38d55662a6cfdec5a9e0907f6dfb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:29 +0100 Subject: [PATCH 2502/4122] iio: pressure: ms5637: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-176-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5637.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/ms5637.c b/drivers/iio/pressure/ms5637.c index 70c70019142a..c4981b29dccb 100644 --- a/drivers/iio/pressure/ms5637.c +++ b/drivers/iio/pressure/ms5637.c @@ -142,9 +142,9 @@ static const struct iio_info ms5637_info = { .attrs = &ms5637_attribute_group, }; -static int ms5637_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ms5637_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const struct ms_tp_data *data; struct ms_tp_dev *dev_data; struct iio_dev *indio_dev; @@ -238,7 +238,7 @@ static const struct of_device_id ms5637_of_match[] = { MODULE_DEVICE_TABLE(of, ms5637_of_match); static struct i2c_driver ms5637_driver = { - .probe = ms5637_probe, + .probe_new = ms5637_probe, .id_table = ms5637_id, .driver = { .name = "ms5637", From a52833a526b5e38be1aaf3e8f2e74a21bc679cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:30 +0100 Subject: [PATCH 2503/4122] iio: pressure: st_pressure_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-177-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/st_pressure_i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c index 58fede861891..f2c3bb568d16 100644 --- a/drivers/iio/pressure/st_pressure_i2c.c +++ b/drivers/iio/pressure/st_pressure_i2c.c @@ -76,8 +76,7 @@ static const struct i2c_device_id st_press_id_table[] = { }; MODULE_DEVICE_TABLE(i2c, st_press_id_table); -static int st_press_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_press_i2c_probe(struct i2c_client *client) { const struct st_sensor_settings *settings; struct st_sensor_data *press_data; @@ -117,7 +116,7 @@ static struct i2c_driver st_press_driver = { .of_match_table = st_press_of_match, .acpi_match_table = ACPI_PTR(st_press_acpi_match), }, - .probe = st_press_i2c_probe, + .probe_new = st_press_i2c_probe, .id_table = st_press_id_table, }; module_i2c_driver(st_press_driver); From aa9b3321735d42d3a2a2e58ce25be5586396c8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:31 +0100 Subject: [PATCH 2504/4122] iio: pressure: t5403: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-178-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/t5403.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/t5403.c b/drivers/iio/pressure/t5403.c index 685fcf65334f..2fbf14aff033 100644 --- a/drivers/iio/pressure/t5403.c +++ b/drivers/iio/pressure/t5403.c @@ -208,9 +208,9 @@ static const struct iio_info t5403_info = { .attrs = &t5403_attribute_group, }; -static int t5403_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int t5403_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct t5403_data *data; struct iio_dev *indio_dev; int ret; @@ -260,7 +260,7 @@ static struct i2c_driver t5403_driver = { .driver = { .name = "t5403", }, - .probe = t5403_probe, + .probe_new = t5403_probe, .id_table = t5403_id, }; module_i2c_driver(t5403_driver); From 1bee48f48a69013cdd5ed5fdbff1e0410a18a381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:32 +0100 Subject: [PATCH 2505/4122] iio: pressure: zpa2326_i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-179-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/zpa2326_i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/zpa2326_i2c.c b/drivers/iio/pressure/zpa2326_i2c.c index f26dd8cbb387..ade465014be1 100644 --- a/drivers/iio/pressure/zpa2326_i2c.c +++ b/drivers/iio/pressure/zpa2326_i2c.c @@ -38,9 +38,9 @@ static unsigned int zpa2326_i2c_hwid(const struct i2c_client *client) (ZPA2326_SA0(client->addr) << ZPA2326_DEVICE_ID_SA0_SHIFT)); } -static int zpa2326_probe_i2c(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int zpa2326_probe_i2c(struct i2c_client *client) { + const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client); struct regmap *regmap; regmap = devm_regmap_init_i2c(client, &zpa2326_regmap_i2c_config); @@ -76,7 +76,7 @@ static struct i2c_driver zpa2326_i2c_driver = { .of_match_table = zpa2326_i2c_matches, .pm = ZPA2326_PM_OPS, }, - .probe = zpa2326_probe_i2c, + .probe_new = zpa2326_probe_i2c, .remove = zpa2326_remove_i2c, .id_table = zpa2326_i2c_ids, }; From 9d6f774dd4ff150ce7a5bb2bf64918b3b72cda11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:33 +0100 Subject: [PATCH 2506/4122] iio: proximity: isl29501: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-180-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/isl29501.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/isl29501.c b/drivers/iio/proximity/isl29501.c index 5b6ea783795d..7b8f40b7ccf3 100644 --- a/drivers/iio/proximity/isl29501.c +++ b/drivers/iio/proximity/isl29501.c @@ -949,8 +949,7 @@ static irqreturn_t isl29501_trigger_handler(int irq, void *p) return IRQ_HANDLED; } -static int isl29501_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int isl29501_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct isl29501_private *isl29501; @@ -1009,7 +1008,7 @@ static struct i2c_driver isl29501_driver = { .name = "isl29501", }, .id_table = isl29501_id, - .probe = isl29501_probe, + .probe_new = isl29501_probe, }; module_i2c_driver(isl29501_driver); From 5c5c482e02bcd09bf113853b7520ad121dba081b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:34 +0100 Subject: [PATCH 2507/4122] iio: proximity: mb1232: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-181-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/mb1232.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/mb1232.c b/drivers/iio/proximity/mb1232.c index 0bca5f74de68..e70cac8240af 100644 --- a/drivers/iio/proximity/mb1232.c +++ b/drivers/iio/proximity/mb1232.c @@ -180,9 +180,9 @@ static const struct iio_info mb1232_info = { .read_raw = mb1232_read_raw, }; -static int mb1232_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mb1232_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct mb1232_data *data; int ret; @@ -264,7 +264,7 @@ static struct i2c_driver mb1232_driver = { .name = "maxbotix-mb1232", .of_match_table = of_mb1232_match, }, - .probe = mb1232_probe, + .probe_new = mb1232_probe, .id_table = mb1232_id, }; module_i2c_driver(mb1232_driver); From 94c80ea7677c9d1886673a4f6764e19c5309b11c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:35 +0100 Subject: [PATCH 2508/4122] iio: proximity: pulsedlight-lidar-lite-v2: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-182-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c index 791a33d5286c..c9eead01a031 100644 --- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c +++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c @@ -253,8 +253,7 @@ static const struct iio_info lidar_info = { .read_raw = lidar_read_raw, }; -static int lidar_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int lidar_probe(struct i2c_client *client) { struct lidar_data *data; struct iio_dev *indio_dev; @@ -366,7 +365,7 @@ static struct i2c_driver lidar_driver = { .of_match_table = lidar_dt_ids, .pm = pm_ptr(&lidar_pm_ops), }, - .probe = lidar_probe, + .probe_new = lidar_probe, .remove = lidar_remove, .id_table = lidar_id, }; From 576eb2b091084425eea6edd36d4407d139387d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:36 +0100 Subject: [PATCH 2509/4122] iio: proximity: rfd77402: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-183-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/rfd77402.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/rfd77402.c b/drivers/iio/proximity/rfd77402.c index cb80b3c9d073..44f72b78bd50 100644 --- a/drivers/iio/proximity/rfd77402.c +++ b/drivers/iio/proximity/rfd77402.c @@ -257,8 +257,7 @@ static void rfd77402_disable(void *client) rfd77402_powerdown(client); } -static int rfd77402_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rfd77402_probe(struct i2c_client *client) { struct rfd77402_data *data; struct iio_dev *indio_dev; @@ -319,7 +318,7 @@ static struct i2c_driver rfd77402_driver = { .name = RFD77402_DRV_NAME, .pm = pm_sleep_ptr(&rfd77402_pm_ops), }, - .probe = rfd77402_probe, + .probe_new = rfd77402_probe, .id_table = rfd77402_id, }; From 038b0fac0abe2f5d9beee2511434f231a58f41fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:37 +0100 Subject: [PATCH 2510/4122] iio: proximity: srf08: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-184-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/srf08.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/srf08.c b/drivers/iio/proximity/srf08.c index 7ed11339c31e..61866d0440f7 100644 --- a/drivers/iio/proximity/srf08.c +++ b/drivers/iio/proximity/srf08.c @@ -443,9 +443,9 @@ static const struct iio_info srf02_info = { .read_raw = srf08_read_raw, }; -static int srf08_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int srf08_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct srf08_data *data; int ret; @@ -549,7 +549,7 @@ static struct i2c_driver srf08_driver = { .name = "srf08", .of_match_table = of_srf08_match, }, - .probe = srf08_probe, + .probe_new = srf08_probe, .id_table = srf08_id, }; module_i2c_driver(srf08_driver); From 7660d32e6f9608b5bea7d066c74d63fc7666c510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:38 +0100 Subject: [PATCH 2511/4122] iio: proximity: sx9500: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-185-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9500.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c index d4670864ddc7..8794e75e5bf9 100644 --- a/drivers/iio/proximity/sx9500.c +++ b/drivers/iio/proximity/sx9500.c @@ -901,8 +901,7 @@ static void sx9500_gpio_probe(struct i2c_client *client, } } -static int sx9500_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int sx9500_probe(struct i2c_client *client) { int ret; struct iio_dev *indio_dev; @@ -1056,7 +1055,7 @@ static struct i2c_driver sx9500_driver = { .of_match_table = of_match_ptr(sx9500_of_match), .pm = pm_sleep_ptr(&sx9500_pm_ops), }, - .probe = sx9500_probe, + .probe_new = sx9500_probe, .remove = sx9500_remove, .id_table = sx9500_id, }; From 160c7140bb25a1921c4f3845bb15423721bcc0fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:39 +0100 Subject: [PATCH 2512/4122] iio: temperature: mlx90614: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Acked-by: Crt Mori Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-186-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90614.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/mlx90614.c b/drivers/iio/temperature/mlx90614.c index 8eb0f962ed25..909fadb62349 100644 --- a/drivers/iio/temperature/mlx90614.c +++ b/drivers/iio/temperature/mlx90614.c @@ -537,9 +537,9 @@ static int mlx90614_probe_num_ir_sensors(struct i2c_client *client) return (ret & MLX90614_CONFIG_DUAL_MASK) ? 1 : 0; } -static int mlx90614_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mlx90614_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct iio_dev *indio_dev; struct mlx90614_data *data; int ret; @@ -675,7 +675,7 @@ static struct i2c_driver mlx90614_driver = { .of_match_table = mlx90614_of_match, .pm = pm_ptr(&mlx90614_pm_ops), }, - .probe = mlx90614_probe, + .probe_new = mlx90614_probe, .remove = mlx90614_remove, .id_table = mlx90614_id, }; From 40fbb59b9c132e401b7487ab2792e6dcd3f14809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:40 +0100 Subject: [PATCH 2513/4122] iio: temperature: mlx90632: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Acked-by: Crt Mori Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-187-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index f1f5ebc145b1..753b7a4ccfdd 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -1168,9 +1168,9 @@ static int mlx90632_enable_regulator(struct mlx90632_data *data) return ret; } -static int mlx90632_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mlx90632_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct mlx90632_data *mlx90632; struct iio_dev *indio_dev; struct regmap *regmap; @@ -1337,7 +1337,7 @@ static struct i2c_driver mlx90632_driver = { .of_match_table = mlx90632_of_match, .pm = pm_ptr(&mlx90632_pm_ops), }, - .probe = mlx90632_probe, + .probe_new = mlx90632_probe, .id_table = mlx90632_id, }; module_i2c_driver(mlx90632_driver); From 89824f4cb65e7a7174cb24e4c0835e4c37f74a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:41 +0100 Subject: [PATCH 2514/4122] iio: temperature: tmp006: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-188-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tmp006.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/tmp006.c b/drivers/iio/temperature/tmp006.c index 706a760f30b4..cdf08477e63f 100644 --- a/drivers/iio/temperature/tmp006.c +++ b/drivers/iio/temperature/tmp006.c @@ -212,8 +212,7 @@ static void tmp006_powerdown_cleanup(void *dev) tmp006_power(dev, false); } -static int tmp006_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tmp006_probe(struct i2c_client *client) { struct iio_dev *indio_dev; struct tmp006_data *data; @@ -284,7 +283,7 @@ static struct i2c_driver tmp006_driver = { .name = "tmp006", .pm = pm_sleep_ptr(&tmp006_pm_ops), }, - .probe = tmp006_probe, + .probe_new = tmp006_probe, .id_table = tmp006_id, }; module_i2c_driver(tmp006_driver); From f1e7d53ddfc309149521a2812c3ffcd54eb67b04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:42 +0100 Subject: [PATCH 2515/4122] iio: temperature: tmp007: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-189-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tmp007.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/tmp007.c b/drivers/iio/temperature/tmp007.c index f3420d8a0e35..8d27aa3bdd6d 100644 --- a/drivers/iio/temperature/tmp007.c +++ b/drivers/iio/temperature/tmp007.c @@ -446,9 +446,9 @@ static void tmp007_powerdown_action_cb(void *priv) tmp007_powerdown(data); } -static int tmp007_probe(struct i2c_client *client, - const struct i2c_device_id *tmp007_id) +static int tmp007_probe(struct i2c_client *client) { + const struct i2c_device_id *tmp007_id = i2c_client_get_device_id(client); struct tmp007_data *data; struct iio_dev *indio_dev; int ret; @@ -574,7 +574,7 @@ static struct i2c_driver tmp007_driver = { .of_match_table = tmp007_of_match, .pm = pm_sleep_ptr(&tmp007_pm_ops), }, - .probe = tmp007_probe, + .probe_new = tmp007_probe, .id_table = tmp007_id, }; module_i2c_driver(tmp007_driver); From d7c9422850aad0e57316f74c2f9cdfc18be39459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:43 +0100 Subject: [PATCH 2516/4122] iio: temperature: tsys01: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-190-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tsys01.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/tsys01.c b/drivers/iio/temperature/tsys01.c index 60d58ec5b063..30b268ba82cc 100644 --- a/drivers/iio/temperature/tsys01.c +++ b/drivers/iio/temperature/tsys01.c @@ -176,8 +176,7 @@ static int tsys01_probe(struct iio_dev *indio_dev, struct device *dev) return devm_iio_device_register(dev, indio_dev); } -static int tsys01_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tsys01_i2c_probe(struct i2c_client *client) { struct tsys01_dev *dev_data; struct iio_dev *indio_dev; @@ -219,7 +218,7 @@ static const struct of_device_id tsys01_of_match[] = { MODULE_DEVICE_TABLE(of, tsys01_of_match); static struct i2c_driver tsys01_driver = { - .probe = tsys01_i2c_probe, + .probe_new = tsys01_i2c_probe, .id_table = tsys01_id, .driver = { .name = "tsys01", From 6a9113195fff8e48d8898cdab82c96bb77780228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:38:44 +0100 Subject: [PATCH 2517/4122] iio: temperature: tsys02d: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221118224540.619276-191-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tsys02d.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/tsys02d.c b/drivers/iio/temperature/tsys02d.c index 49c275e4f510..cdefe046ab17 100644 --- a/drivers/iio/temperature/tsys02d.c +++ b/drivers/iio/temperature/tsys02d.c @@ -121,9 +121,9 @@ static const struct iio_info tsys02d_info = { .attrs = &tsys02d_attribute_group, }; -static int tsys02d_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tsys02d_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct ms_ht_dev *dev_data; struct iio_dev *indio_dev; int ret; @@ -174,7 +174,7 @@ static const struct i2c_device_id tsys02d_id[] = { MODULE_DEVICE_TABLE(i2c, tsys02d_id); static struct i2c_driver tsys02d_driver = { - .probe = tsys02d_probe, + .probe_new = tsys02d_probe, .id_table = tsys02d_id, .driver = { .name = "tsys02d", From 8282ef72a6e5e075f7f2f2f905b2a1adcc0bdcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:00 +0100 Subject: [PATCH 2518/4122] staging: iio: adt7316: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-567-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/staging/iio/addac/adt7316-i2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/addac/adt7316-i2c.c b/drivers/staging/iio/addac/adt7316-i2c.c index 5543cc909707..7e3d1a6f30ba 100644 --- a/drivers/staging/iio/addac/adt7316-i2c.c +++ b/drivers/staging/iio/addac/adt7316-i2c.c @@ -93,9 +93,9 @@ static int adt7316_i2c_multi_write(void *client, u8 reg, u8 count, u8 *data) * device probe and remove */ -static int adt7316_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int adt7316_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct adt7316_bus bus = { .client = client, .irq = client->irq, @@ -138,7 +138,7 @@ static struct i2c_driver adt7316_driver = { .of_match_table = adt7316_of_match, .pm = ADT7316_PM_OPS, }, - .probe = adt7316_i2c_probe, + .probe_new = adt7316_i2c_probe, .id_table = adt7316_i2c_id, }; module_i2c_driver(adt7316_driver); From 67ab4155b2391b7901c83c69a6dec97a4e0f709d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:01 +0100 Subject: [PATCH 2519/4122] staging: iio: ad5933: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-568-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index f177b20f0f2d..b3152f7153fb 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -674,9 +674,9 @@ static void ad5933_clk_disable(void *data) clk_disable_unprepare(st->mclk); } -static int ad5933_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ad5933_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); int ret; struct ad5933_state *st; struct iio_dev *indio_dev; @@ -781,7 +781,7 @@ static struct i2c_driver ad5933_driver = { .name = "ad5933", .of_match_table = ad5933_of_match, }, - .probe = ad5933_probe, + .probe_new = ad5933_probe, .id_table = ad5933_id, }; module_i2c_driver(ad5933_driver); From 9fe1614f96af91ffd876da529cf9d667992e73a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:45:02 +0100 Subject: [PATCH 2520/4122] staging: iio: ade7854: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221118224540.619276-569-uwe@kleine-koenig.org Signed-off-by: Jonathan Cameron --- drivers/staging/iio/meter/ade7854-i2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c index 71b67dd3c8e9..572d714eb0dd 100644 --- a/drivers/staging/iio/meter/ade7854-i2c.c +++ b/drivers/staging/iio/meter/ade7854-i2c.c @@ -112,8 +112,7 @@ unlock: return ret; } -static int ade7854_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ade7854_i2c_probe(struct i2c_client *client) { struct ade7854_state *st; struct iio_dev *indio_dev; @@ -144,7 +143,7 @@ static struct i2c_driver ade7854_i2c_driver = { .driver = { .name = "ade7854", }, - .probe = ade7854_i2c_probe, + .probe_new = ade7854_i2c_probe, .id_table = ade7854_id, }; module_i2c_driver(ade7854_i2c_driver); From 9ee95ae4cffd04e0773f16118b343104bab80634 Mon Sep 17 00:00:00 2001 From: Rajat Khandelwal Date: Tue, 22 Nov 2022 23:23:00 +0530 Subject: [PATCH 2521/4122] iio: temperature: Add driver support for Maxim MAX30208 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maxim MAX30208 is a digital temperature sensor with 0.1°C accuracy. Add support for max30208 driver in iio subsystem. Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX30208.pdf Signed-off-by: Rajat Khandelwal Link: https://lore.kernel.org/r/20221122175300.800956-1-rajat.khandelwal@linux.intel.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 6 + drivers/iio/temperature/Kconfig | 10 ++ drivers/iio/temperature/Makefile | 1 + drivers/iio/temperature/max30208.c | 252 +++++++++++++++++++++++++++++ 4 files changed, 269 insertions(+) create mode 100644 drivers/iio/temperature/max30208.c diff --git a/MAINTAINERS b/MAINTAINERS index be373dfae4dd..f1e1ac23a1c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12518,6 +12518,12 @@ S: Maintained F: Documentation/devicetree/bindings/regulator/maxim,max20086.yaml F: drivers/regulator/max20086-regulator.c +MAXIM MAX30208 TEMPERATURE SENSOR DRIVER +M: Rajat Khandelwal +L: linux-iio@vger.kernel.org +S: Maintained +F: drivers/iio/temperature/max30208.c + MAXIM MAX77650 PMIC MFD DRIVER M: Bartosz Golaszewski L: linux-kernel@vger.kernel.org diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig index e8ed849e3b76..ed384f33e0c7 100644 --- a/drivers/iio/temperature/Kconfig +++ b/drivers/iio/temperature/Kconfig @@ -128,6 +128,16 @@ config TSYS02D This driver can also be built as a module. If so, the module will be called tsys02d. +config MAX30208 + tristate "Maxim MAX30208 digital temperature sensor" + depends on I2C + help + If you say yes here you get support for Maxim MAX30208 + digital temperature sensor connected via I2C. + + This driver can also be built as a module. If so, the module + will be called max30208. + config MAX31856 tristate "MAX31856 thermocouple sensor" depends on SPI diff --git a/drivers/iio/temperature/Makefile b/drivers/iio/temperature/Makefile index dd08e562ffe0..dfec8c6d3019 100644 --- a/drivers/iio/temperature/Makefile +++ b/drivers/iio/temperature/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_IQS620AT_TEMP) += iqs620at-temp.o obj-$(CONFIG_LTC2983) += ltc2983.o obj-$(CONFIG_HID_SENSOR_TEMP) += hid-sensor-temperature.o obj-$(CONFIG_MAXIM_THERMOCOUPLE) += maxim_thermocouple.o +obj-$(CONFIG_MAX30208) += max30208.o obj-$(CONFIG_MAX31856) += max31856.o obj-$(CONFIG_MAX31865) += max31865.o obj-$(CONFIG_MLX90614) += mlx90614.o diff --git a/drivers/iio/temperature/max30208.c b/drivers/iio/temperature/max30208.c new file mode 100644 index 000000000000..c85c21474711 --- /dev/null +++ b/drivers/iio/temperature/max30208.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (c) Rajat Khandelwal + * + * Maxim MAX30208 digital temperature sensor with 0.1°C accuracy + * (7-bit I2C slave address (0x50 - 0x53)) + */ + +#include +#include +#include +#include +#include +#include + +#define MAX30208_STATUS 0x00 +#define MAX30208_STATUS_TEMP_RDY BIT(0) +#define MAX30208_INT_ENABLE 0x01 +#define MAX30208_INT_ENABLE_TEMP_RDY BIT(0) + +#define MAX30208_FIFO_OVF_CNTR 0x06 +#define MAX30208_FIFO_DATA_CNTR 0x07 +#define MAX30208_FIFO_DATA 0x08 + +#define MAX30208_FIFO_CONFIG 0x0a +#define MAX30208_FIFO_CONFIG_RO BIT(1) + +#define MAX30208_SYSTEM_CTRL 0x0c +#define MAX30208_SYSTEM_CTRL_RESET 0x01 + +#define MAX30208_TEMP_SENSOR_SETUP 0x14 +#define MAX30208_TEMP_SENSOR_SETUP_CONV BIT(0) + +struct max30208_data { + struct i2c_client *client; + struct iio_dev *indio_dev; + struct mutex lock; /* Lock to prevent concurrent reads of temperature readings */ +}; + +static const struct iio_chan_spec max30208_channels[] = { + { + .type = IIO_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), + }, +}; + +/** + * max30208_request() - Request a reading + * @data: Struct comprising member elements of the device + * + * Requests a reading from the device and waits until the conversion is ready. + */ +static int max30208_request(struct max30208_data *data) +{ + /* + * Sensor can take up to 500 ms to respond so execute a total of + * 10 retries to give the device sufficient time. + */ + int retries = 10; + u8 regval; + int ret; + + ret = i2c_smbus_read_byte_data(data->client, MAX30208_TEMP_SENSOR_SETUP); + if (ret < 0) + return ret; + + regval = ret | MAX30208_TEMP_SENSOR_SETUP_CONV; + + ret = i2c_smbus_write_byte_data(data->client, MAX30208_TEMP_SENSOR_SETUP, regval); + if (ret) + return ret; + + while (retries--) { + ret = i2c_smbus_read_byte_data(data->client, MAX30208_STATUS); + if (ret < 0) + return ret; + + if (ret & MAX30208_STATUS_TEMP_RDY) + return 0; + + msleep(50); + } + dev_err(&data->client->dev, "Temperature conversion failed\n"); + + return -ETIMEDOUT; +} + +static int max30208_update_temp(struct max30208_data *data) +{ + u8 data_count; + int ret; + + mutex_lock(&data->lock); + + ret = max30208_request(data); + if (ret) + goto unlock; + + ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_OVF_CNTR); + if (ret < 0) + goto unlock; + else if (!ret) { + ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_DATA_CNTR); + if (ret < 0) + goto unlock; + + data_count = ret; + } else + data_count = 1; + + while (data_count) { + ret = i2c_smbus_read_word_swapped(data->client, MAX30208_FIFO_DATA); + if (ret < 0) + goto unlock; + + data_count--; + } + +unlock: + mutex_unlock(&data->lock); + return ret; +} + +/** + * max30208_config_setup() - Set up FIFO configuration register + * @data: Struct comprising member elements of the device + * + * Sets the rollover bit to '1' to enable overwriting FIFO during overflow. + */ +static int max30208_config_setup(struct max30208_data *data) +{ + u8 regval; + int ret; + + ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_CONFIG); + if (ret < 0) + return ret; + + regval = ret | MAX30208_FIFO_CONFIG_RO; + + ret = i2c_smbus_write_byte_data(data->client, MAX30208_FIFO_CONFIG, regval); + if (ret) + return ret; + + return 0; +} + +static int max30208_read(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct max30208_data *data = iio_priv(indio_dev); + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = max30208_update_temp(data); + if (ret < 0) + return ret; + + *val = sign_extend32(ret, 15); + return IIO_VAL_INT; + + case IIO_CHAN_INFO_SCALE: + *val = 5; + return IIO_VAL_INT; + + default: + return -EINVAL; + } +} + +static const struct iio_info max30208_info = { + .read_raw = max30208_read, +}; + +static int max30208_probe(struct i2c_client *i2c) +{ + struct device *dev = &i2c->dev; + struct max30208_data *data; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); + if (!indio_dev) + return -ENOMEM; + + data = iio_priv(indio_dev); + data->client = i2c; + mutex_init(&data->lock); + + indio_dev->name = "max30208"; + indio_dev->channels = max30208_channels; + indio_dev->num_channels = ARRAY_SIZE(max30208_channels); + indio_dev->info = &max30208_info; + indio_dev->modes = INDIO_DIRECT_MODE; + + ret = i2c_smbus_write_byte_data(data->client, MAX30208_SYSTEM_CTRL, + MAX30208_SYSTEM_CTRL_RESET); + if (ret) { + dev_err(dev, "Failure in performing reset\n"); + return ret; + } + + msleep(50); + + ret = max30208_config_setup(data); + if (ret) + return ret; + + ret = devm_iio_device_register(dev, indio_dev); + if (ret) { + dev_err(dev, "Failed to register IIO device\n"); + return ret; + } + + return 0; +} + +static const struct i2c_device_id max30208_id_table[] = { + { "max30208" }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max30208_id_table); + +static const struct acpi_device_id max30208_acpi_match[] = { + { "MAX30208" }, + { } +}; +MODULE_DEVICE_TABLE(acpi, max30208_acpi_match); + +static const struct of_device_id max30208_of_match[] = { + { .compatible = "maxim,max30208" }, + { } +}; +MODULE_DEVICE_TABLE(of, max30208_of_match); + +static struct i2c_driver max30208_driver = { + .driver = { + .name = "max30208", + .of_match_table = max30208_of_match, + .acpi_match_table = max30208_acpi_match, + }, + .probe_new = max30208_probe, + .id_table = max30208_id_table, +}; +module_i2c_driver(max30208_driver); + +MODULE_AUTHOR("Rajat Khandelwal "); +MODULE_DESCRIPTION("Maxim MAX30208 digital temperature sensor"); +MODULE_LICENSE("GPL"); From 1140f96cd4af9bcf0065b0295c4486e1dba8426e Mon Sep 17 00:00:00 2001 From: Edmund Berenson Date: Tue, 22 Nov 2022 17:14:37 +0100 Subject: [PATCH 2522/4122] dt-bindings: iio: adc: ad7923: adjust documentation - The ad7927 is fully compatible with ad7928 driver, add documentation for device. - ad7923 and ad7924 are treated the same in the driver, show the relationship in the documentation. Suggested-by: Lukasz Zemla Signed-off-by: Edmund Berenson Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221122161437.18937-1-edmund.berenson@emlix.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,ad7923.yaml | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml index 07f9d1c09c7d..85148338c597 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml @@ -11,7 +11,7 @@ maintainers: description: | Analog Devices AD7904, AD7914, AD7923, AD7924 4 Channel ADCs, and AD7908, - AD7918, AD7928 8 Channels ADCs. + AD7918, AD7927, AD7928 8 Channels ADCs. Specifications about the part can be found at: https://www.analog.com/media/en/technical-documentation/data-sheets/AD7923.pdf @@ -20,14 +20,22 @@ description: | properties: compatible: - enum: - - adi,ad7904 - - adi,ad7914 - - adi,ad7923 - - adi,ad7924 - - adi,ad7908 - - adi,ad7918 - - adi,ad7928 + oneOf: + - enum: + - adi,ad7904 + - adi,ad7908 + - adi,ad7914 + - adi,ad7918 + - adi,ad7923 + - adi,ad7928 + - const: adi,ad7924 + deprecated: true + - items: + - const: adi,ad7924 + - const: adi,ad7923 + - items: + - const: adi,ad7927 + - const: adi,ad7928 reg: maxItems: 1 From 99b43a15915543484d7538cb32f49901e802628a Mon Sep 17 00:00:00 2001 From: Ibrahim Tilki Date: Tue, 22 Nov 2022 14:47:18 +0300 Subject: [PATCH 2523/4122] iio: adc: max11410: fix incomplete vref buffer mask VREFP bit was missing from channel configuration mask and VREFN bit was included twice instead which fails to enable positive reference buffer when requested by a channel. Channels that don't enable vrefp buffer were not affected. Fixes: a44ef7c46097 ("iio: adc: add max11410 adc driver") Reported-by: kernel test robot Reported-by: Julia Lawall Signed-off-by: Ibrahim Tilki Link: https://lore.kernel.org/r/20221122114718.17557-1-Ibrahim.Tilki@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max11410.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c index 8cd566367187..fdc9f03135b5 100644 --- a/drivers/iio/adc/max11410.c +++ b/drivers/iio/adc/max11410.c @@ -370,7 +370,7 @@ static int max11410_configure_channel(struct max11410_state *st, FIELD_PREP(MAX11410_CTRL_UNIPOLAR_BIT, cfg.bipolar ? 0 : 1); ret = regmap_update_bits(st->regmap, MAX11410_REG_CTRL, MAX11410_CTRL_REFSEL_MASK | - MAX11410_CTRL_VREFN_BUF_BIT | + MAX11410_CTRL_VREFP_BUF_BIT | MAX11410_CTRL_VREFN_BUF_BIT | MAX11410_CTRL_UNIPOLAR_BIT, regval); if (ret) From 99c05e4283a19a02a256f14100ca4ec3b2da3f62 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:49 +0200 Subject: [PATCH 2524/4122] iio: adis: add '__adis_enable_irq()' implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add '__adis_enable_irq()' implementation which is the unlocked version of 'adis_enable_irq()'. Call '__adis_enable_irq()' instead of 'adis_enable_irq()' from '__adis_intial_startup()' to keep the expected unlocked functionality. This fix is needed to remove a deadlock for all devices which are using 'adis_initial_startup()'. The deadlock occurs because the same mutex is acquired twice, without releasing it. The mutex is acquired once inside 'adis_initial_startup()', before calling '__adis_initial_startup()', and once inside 'adis_enable_irq()', which is called by '__adis_initial_startup()'. The deadlock is removed by calling '__adis_enable_irq()', instead of 'adis_enable_irq()' from within '__adis_initial_startup()'. Fixes: b600bd7eb3335 ("iio: adis: do not disabe IRQs in 'adis_init()'") Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-2-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 28 ++++++++++------------------ include/linux/iio/imu/adis.h | 13 ++++++++++++- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index f7fcfd04f659..bc40240b29e2 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -270,23 +270,19 @@ EXPORT_SYMBOL_NS(adis_debugfs_reg_access, IIO_ADISLIB); #endif /** - * adis_enable_irq() - Enable or disable data ready IRQ + * __adis_enable_irq() - Enable or disable data ready IRQ (unlocked) * @adis: The adis device * @enable: Whether to enable the IRQ * * Returns 0 on success, negative error code otherwise */ -int adis_enable_irq(struct adis *adis, bool enable) +int __adis_enable_irq(struct adis *adis, bool enable) { - int ret = 0; + int ret; u16 msc; - mutex_lock(&adis->state_lock); - - if (adis->data->enable_irq) { - ret = adis->data->enable_irq(adis, enable); - goto out_unlock; - } + if (adis->data->enable_irq) + return adis->data->enable_irq(adis, enable); if (adis->data->unmasked_drdy) { if (enable) @@ -294,12 +290,12 @@ int adis_enable_irq(struct adis *adis, bool enable) else disable_irq(adis->spi->irq); - goto out_unlock; + return 0; } ret = __adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc); if (ret) - goto out_unlock; + return ret; msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH; msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2; @@ -308,13 +304,9 @@ int adis_enable_irq(struct adis *adis, bool enable) else msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN; - ret = __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc); - -out_unlock: - mutex_unlock(&adis->state_lock); - return ret; + return __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc); } -EXPORT_SYMBOL_NS(adis_enable_irq, IIO_ADISLIB); +EXPORT_SYMBOL_NS(__adis_enable_irq, IIO_ADISLIB); /** * __adis_check_status() - Check the device for error conditions (unlocked) @@ -445,7 +437,7 @@ int __adis_initial_startup(struct adis *adis) * with 'IRQF_NO_AUTOEN' anyways. */ if (!adis->data->unmasked_drdy) - adis_enable_irq(adis, false); + __adis_enable_irq(adis, false); if (!adis->data->prod_id_reg) return 0; diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 515ca09764fe..bcbefb757475 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -402,9 +402,20 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg, __adis_update_bits_base(adis, reg, mask, val, sizeof(val)); \ }) -int adis_enable_irq(struct adis *adis, bool enable); int __adis_check_status(struct adis *adis); int __adis_initial_startup(struct adis *adis); +int __adis_enable_irq(struct adis *adis, bool enable); + +static inline int adis_enable_irq(struct adis *adis, bool enable) +{ + int ret; + + mutex_lock(&adis->state_lock); + ret = __adis_enable_irq(adis, enable); + mutex_unlock(&adis->state_lock); + + return ret; +} static inline int adis_check_status(struct adis *adis) { From 594ff4c49e9dd7aba84d82052643c1e4b09b8788 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:50 +0200 Subject: [PATCH 2525/4122] iio: accel: adis16201: Call '__adis_initial_startup()' in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16201_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-3-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adis16201.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c index dfb8e2e5bdf5..d054721859b3 100644 --- a/drivers/iio/accel/adis16201.c +++ b/drivers/iio/accel/adis16201.c @@ -281,7 +281,7 @@ static int adis16201_probe(struct spi_device *spi) if (ret) return ret; - ret = adis_initial_startup(st); + ret = __adis_initial_startup(st); if (ret) return ret; From 09f8360f3a8843733d9e54960405c902982062f4 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:51 +0200 Subject: [PATCH 2526/4122] iio: accel: adis16209: Call '__adis_initial_startup()' in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16209_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-4-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adis16209.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adis16209.c b/drivers/iio/accel/adis16209.c index 5a9c6e2296f1..0035e4f4db63 100644 --- a/drivers/iio/accel/adis16209.c +++ b/drivers/iio/accel/adis16209.c @@ -291,7 +291,7 @@ static int adis16209_probe(struct spi_device *spi) if (ret) return ret; - ret = adis_initial_startup(st); + ret = __adis_initial_startup(st); if (ret) return ret; From c5de7d4c026f4c4310e6cb1e06e1ae0bd57cd838 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:52 +0200 Subject: [PATCH 2527/4122] iio: gyro: adis16136: Call '__adis_initial_startup()' in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16136_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-5-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/adis16136.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/gyro/adis16136.c b/drivers/iio/gyro/adis16136.c index 71295709f2b9..c95cf41be34b 100644 --- a/drivers/iio/gyro/adis16136.c +++ b/drivers/iio/gyro/adis16136.c @@ -429,7 +429,7 @@ static int adis16136_initial_setup(struct iio_dev *indio_dev) uint16_t prod_id; int ret; - ret = adis_initial_startup(&adis16136->adis); + ret = __adis_initial_startup(&adis16136->adis); if (ret) return ret; From 2647f0e4d397fa5f80c78370d514b05a2110f987 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:53 +0200 Subject: [PATCH 2528/4122] iio: gyro: adis16260: Call '__adis_initial_startup()' in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16260_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-6-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/adis16260.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c index eaf57bd339ed..112d635b7dfd 100644 --- a/drivers/iio/gyro/adis16260.c +++ b/drivers/iio/gyro/adis16260.c @@ -395,7 +395,7 @@ static int adis16260_probe(struct spi_device *spi) return ret; /* Get the device into a sane initial state */ - ret = adis_initial_startup(&adis16260->adis); + ret = __adis_initial_startup(&adis16260->adis); if (ret) return ret; From 40fd61b0698c1ecf73f8da61b461937b7d91ee26 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:54 +0200 Subject: [PATCH 2529/4122] iio: imu: adis16400: Call '__adis_initial_startup()' in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16400_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-7-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16400.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c index 17bb0c40a149..c02fc35dceb4 100644 --- a/drivers/iio/imu/adis16400.c +++ b/drivers/iio/imu/adis16400.c @@ -445,7 +445,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev) st->adis.spi->mode = SPI_MODE_3; spi_setup(st->adis.spi); - ret = adis_initial_startup(&st->adis); + ret = __adis_initial_startup(&st->adis); if (ret) return ret; From f3b0ab42a875d3c4b05311bd1ebcc0322a55c30c Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:55 +0200 Subject: [PATCH 2530/4122] staging: iio: accel: adis16203: Call '__adis_initial_startup()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16203_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-8-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/accel/adis16203.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/accel/adis16203.c b/drivers/staging/iio/accel/adis16203.c index 62d5397ff1f9..c0e4c9266b5f 100644 --- a/drivers/staging/iio/accel/adis16203.c +++ b/drivers/staging/iio/accel/adis16203.c @@ -285,7 +285,7 @@ static int adis16203_probe(struct spi_device *spi) return ret; /* Get the device into a sane initial state */ - ret = adis_initial_startup(st); + ret = __adis_initial_startup(st); if (ret) return ret; From 60105b59cc12813267b922fa5969e38f9e8f9b50 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:56 +0200 Subject: [PATCH 2531/4122] staging: iio: accel: adis16240: Call '__adis_initial_startup()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call '__adis_initial_startup()' instead of its locked variant in 'adis16240_probe()'. The locks are not needed at this point. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-9-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/accel/adis16240.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/accel/adis16240.c b/drivers/staging/iio/accel/adis16240.c index bca857eef92e..337492785f04 100644 --- a/drivers/staging/iio/accel/adis16240.c +++ b/drivers/staging/iio/accel/adis16240.c @@ -414,7 +414,7 @@ static int adis16240_probe(struct spi_device *spi) return ret; /* Get the device into a sane initial state */ - ret = adis_initial_startup(st); + ret = __adis_initial_startup(st); if (ret) return ret; From c613afc1f257e1e3229b8dcade43a104a26541c8 Mon Sep 17 00:00:00 2001 From: Ramona Bolboaca Date: Tue, 22 Nov 2022 10:27:57 +0200 Subject: [PATCH 2532/4122] iio: imu: adis: Remove adis_initial_startup function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove adis_initial_startup function since it is not used anymore. Signed-off-by: Ramona Bolboaca Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221122082757.449452-10-ramona.bolboaca@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bcbefb757475..dc9ea299e088 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -428,18 +428,6 @@ static inline int adis_check_status(struct adis *adis) return ret; } -/* locked version of __adis_initial_startup() */ -static inline int adis_initial_startup(struct adis *adis) -{ - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_initial_startup(adis); - mutex_unlock(&adis->state_lock); - - return ret; -} - static inline void adis_dev_lock(struct adis *adis) { mutex_lock(&adis->state_lock); From 8aa2e715ca65757e0eb625862491f95e8baf0328 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Tue, 22 Nov 2022 09:35:33 +0800 Subject: [PATCH 2533/4122] iio: use devm_platform_get_and_ioremap_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Minghao Chi Signed-off-by: ye xingchen Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/202211220935338446115@zte.com.cn Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606_par.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7606_par.c b/drivers/iio/adc/ad7606_par.c index b912b4df9b56..d8408052262e 100644 --- a/drivers/iio/adc/ad7606_par.c +++ b/drivers/iio/adc/ad7606_par.c @@ -57,8 +57,7 @@ static int ad7606_par_probe(struct platform_device *pdev) if (irq < 0) return irq; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - addr = devm_ioremap_resource(&pdev->dev, res); + addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(addr)) return PTR_ERR(addr); From 980389d06d08442fad0139874bff455c76125e47 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Nov 2022 13:32:08 +0100 Subject: [PATCH 2534/4122] iio: addac: ad74413r: fix integer promotion bug in ad74413_get_input_current_offset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The constant AD74413R_ADC_RESULT_MAX is defined via GENMASK, so its type is "unsigned long". Hence in the expression voltage_offset * AD74413R_ADC_RESULT_MAX, voltage_offset is first promoted to unsigned long, and since it may be negative, that results in a garbage value. For example, when range is AD74413R_ADC_RANGE_5V_BI_DIR, voltage_offset is -2500 and voltage_range is 5000, so the RHS of this assignment is, depending on sizeof(long), either 826225UL or 3689348814709142UL, which after truncation to int then results in either 826225 or 1972216214 being the output from in_currentX_offset. Casting to int avoids that promotion and results in the correct -32767 output. Signed-off-by: Rasmus Villemoes Fixes: fea251b6a5db (iio: addac: add AD74413R driver) Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221118123209.1658420-1-linux@rasmusvillemoes.dk Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 899bcd83f40b..e0e130ba9d3e 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -691,7 +691,7 @@ static int ad74413_get_input_current_offset(struct ad74413r_state *st, if (ret) return ret; - *val = voltage_offset * AD74413R_ADC_RESULT_MAX / voltage_range; + *val = voltage_offset * (int)AD74413R_ADC_RESULT_MAX / voltage_range; return IIO_VAL_INT; } From 0e69ba0dd56700b173100984f8a89fe4605591a5 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 17 Nov 2022 10:09:15 +0200 Subject: [PATCH 2535/4122] dt-bindings: iio: addac: add AD74115 The AD74115H is a single-channel, software-configurable, input and output device for industrial control applications. The AD74115H provides a wide range of use cases, integrated on a single chip. These use cases include analog output, analog input, digital output, digital input, resistance temperature detector (RTD), and thermocouple measurement capability. The AD74115H also has an integrated HART modem. A serial peripheral interface (SPI) is used to handle all communications to the device, including communications with the HART modem. The digital input and digital outputs can be accessed via the SPI or the general-purpose input and output (GPIO) pins to support higher speed data rates. The device features a 16-bit, sigma-delta analog-to-digital converter (ADC) and a 14-bit digital-to-analog converter (DAC). The AD74115H contains a high accuracy 2.5 V on-chip reference that can be used as the DAC and ADC reference. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221117080916.411766-2-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/addac/adi,ad74115.yaml | 373 ++++++++++++++++++ MAINTAINERS | 7 + 2 files changed, 380 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml new file mode 100644 index 000000000000..72d2e910f206 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml @@ -0,0 +1,373 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/addac/adi,ad74115.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD74115H device + +maintainers: + - Cosmin Tanislav + +description: | + The AD74115H is a single-channel software configurable input/output + device for industrial control applications. It contains functionality for + analog output, analog input, digital output, digital input, resistance + temperature detector, and thermocouple measurements integrated into a single + chip solution with an SPI interface. The device features a 16-bit ADC and a + 14-bit DAC. + + https://www.analog.com/en/products/ad74115h.html + +properties: + compatible: + enum: + - adi,ad74115h + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 24000000 + + spi-cpol: true + + reset-gpios: true + + interrupts: + minItems: 1 + maxItems: 2 + + interrupt-names: + minItems: 1 + maxItems: 2 + items: + enum: + - adc_rdy + - alert + + avdd-supply: true + avcc-supply: true + dvcc-supply: true + dovdd-supply: true + refin-supply: true + + adi,ch-func: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Channel function. + 0 - High impedance + 1 - Voltage output + 2 - Current output + 3 - Voltage input + 4 - Current input, externally-powered + 5 - Current input, loop-powered + 6 - Resistance input + 7 - RTD measure + 8 - Digital input logic + 9 - Digital input, loop-powered + 10 - Current output with HART + 11 - Current input, externally-powered, with HART + 12 - Current input, loop-powered, with HART + minimum: 0 + maximum: 12 + default: 0 + + adi,conv2-mux: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Input node for ADC conversion 2. + 0 - SENSE_EXT1 to AGND_SENSE + 1 - SENSE_EXT2 to AGND_SENSE + 2 - SENSE_EXT2 to SENSE_EXT1 + 3 - AGND to AGND + minimum: 0 + maximum: 3 + default: 0 + + adi,conv2-range-microvolt: + description: Conversion range for ADC conversion 2. + oneOf: + - items: + - enum: [-2500000, 0] + - const: 2500000 + - items: + - enum: [-12000000, 0] + - const: 12000000 + - items: + - const: -2500000 + - const: 0 + - items: + - const: -104000 + - const: 104000 + - items: + - const: 0 + - const: 625000 + + adi,sense-agnd-buffer-low-power: + type: boolean + description: + Whether to enable low-power buffered mode for the AGND sense pin. + + adi,lf-buffer-low-power: + type: boolean + description: + Whether to enable low-power buffered mode for the low-side filtered + sense pin. + + adi,hf-buffer-low-power: + type: boolean + description: + Whether to enable low-power buffered mode for the high-side filtered + sense pin. + + adi,ext2-buffer-low-power: + type: boolean + description: Whether to enable low-power buffered mode for the EXT2 pin. + + adi,ext1-buffer-low-power: + type: boolean + description: Whether to enable low-power buffered mode for the EXT1 pin. + + adi,comparator-invert: + type: boolean + description: Whether to invert the comparator output. + + adi,digital-input-sink-range-high: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + When not present, the digital input range is from 0 to 3700uA in steps + of 120uA, with a ~2k series resistance. + When present, the digital input range is from 0 to 7400uA in steps + of 240uA, with a ~1k series resistance. + + adi,digital-input-sink-microamp: + description: Sink current in digital input mode. + minimum: 0 + maximum: 3700 + default: 0 + + adi,digital-input-debounce-mode-counter-reset: + type: boolean + description: | + When not present, a counter increments when the signal is asserted + and decrements when the signal is de-asserted. + When present, a counter increments while the signal is asserted and + resets when the signal de-asserts + + adi,digital-input-unbuffered: + type: boolean + description: Whether to buffer digital input signals. + + adi,digital-input-short-circuit-detection: + type: boolean + description: Whether to detect digital input short circuits. + + adi,digital-input-open-circuit-detection: + type: boolean + description: Whether to detect digital input open circuits. + + adi,digital-input-threshold-mode-fixed: + type: boolean + description: | + When not present, the digital input threshold range is -0.96 * AVDD + to AVDD. + When present, the threshold range is fixed from -19V to 30V. + + adi,dac-bipolar: + type: boolean + description: | + When not present, the DAC operates in the 0V to 12V range. + When present, the DAC operates in the -12V to 12V range. + + adi,charge-pump: + type: boolean + description: Whether to enable the internal charge pump. + + adi,dac-hart-slew: + type: boolean + description: Whether to use a HART-compatible slew rate. + + adi,dac-current-limit-low: + type: boolean + description: | + When not present, the DAC short-circuit current limit is 32mA in + either source or sink for VOUT and 4mA sink for IOUT. + When present, the limit is 16mA in either source or sink for VOUT, + 1mA sink for IOUT. + + adi,4-wire-rtd: + type: boolean + description: | + When not present, the ADC should be used for measuring 3-wire RTDs. + When present, the ADC should be used for measuring 4-wire RTDs. + + adi,3-wire-rtd-excitation-swap: + type: boolean + description: Whether to swap the excitation for 3-wire RTD. + + adi,rtd-excitation-current-microamp: + description: Excitation current to apply to RTD. + enum: [250, 500, 750, 1000] + default: 250 + + adi,ext1-burnout: + type: boolean + description: Whether to enable burnout current for EXT1. + + adi,ext1-burnout-current-nanoamp: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Burnout current in nanoamps to be applied to EXT1. + enum: [0, 50, 500, 1000, 10000] + default: 0 + + adi,ext1-burnout-current-polarity-sourcing: + type: boolean + description: | + When not present, the burnout current polarity for EXT1 is sinking. + When present, the burnout current polarity for EXT1 is sourcing. + + adi,ext2-burnout: + type: boolean + description: Whether to enable burnout current for EXT2. + + adi,ext2-burnout-current-nanoamp: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Burnout current in nanoamps to be applied to EXT2. + enum: [0, 50, 500, 1000, 10000] + default: 0 + + adi,ext2-burnout-current-polarity-sourcing: + type: boolean + description: | + When not present, the burnout current polarity for EXT2 is sinking. + When present, the burnout current polarity for EXT2 is sourcing. + + adi,viout-burnout: + type: boolean + description: Whether to enable burnout current for VIOUT. + + adi,viout-burnout-current-nanoamp: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Burnout current in nanoamps to be applied to VIOUT. + enum: [0, 1000, 10000] + default: 0 + + adi,viout-burnout-current-polarity-sourcing: + type: boolean + description: | + When not present, the burnout current polarity for VIOUT is sinking. + When present, the burnout current polarity for VIOUT is sourcing. + + adi,gpio0-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + GPIO functions. + 0 - Disabled + 1 - Logic I/O + 2 - Comparator output + 3 - Control HART CD + 4 - Monitor HART CD + 5 - Monitor HART EOM status + minimum: 0 + maximum: 5 + default: 0 + + adi,gpio1-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + GPIO functions. + 0 - Disabled + 1 - Logic I/O + 2 - Drive external digital output FET + 3 - Control HART RXD + 4 - Monitor HART RXD + 5 - Monitor HART SOM status + minimum: 0 + maximum: 5 + default: 0 + + adi,gpio2-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + GPIO functions. + 0 - Disabled + 1 - Logic I/O + 2 - Drive internal digital output FET + 3 - Control HART TXD + 4 - Monitor HART TXD + 5 - Monitor HART TX complete status + minimum: 0 + maximum: 5 + default: 0 + + adi,gpio3-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + GPIO functions. + 0 - Disabled + 1 - Logic I/O + 2 - High impedance + 3 - Control HART RTS + 4 - Monitor HART RTS + 5 - Monitor HART CD complete status + minimum: 0 + maximum: 5 + default: 0 + +required: + - compatible + - reg + - spi-cpol + - avdd-supply + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - if: + required: + - adi,digital-input-sink-range-high + then: + properties: + adi,digital-input-sink-microamp: + maximum: 7400 + +unevaluatedProperties: false + +examples: + - | + #include + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + addac@0 { + compatible = "adi,ad74115h"; + reg = <0>; + + spi-max-frequency = <12000000>; + spi-cpol; + + reset-gpios = <&gpio 27 GPIO_ACTIVE_LOW>; + + interrupt-parent = <&gpio>; + interrupts = <26 IRQ_TYPE_EDGE_FALLING>; + interrupt-names = "adc_rdy"; + + avdd-supply = <&ad74115_avdd>; + + adi,ch-func = <1>; + adi,conv2-mux = <2>; + adi,conv2-range-microvolt = <(-12000000) 12000000>; + + adi,gpio0-mode = <1>; + adi,gpio1-mode = <1>; + adi,gpio2-mode = <1>; + adi,gpio3-mode = <1>; + + adi,dac-bipolar; + }; + }; +... diff --git a/MAINTAINERS b/MAINTAINERS index f1e1ac23a1c6..d1ba33eb0cec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1175,6 +1175,13 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml F: drivers/iio/adc/ad7780.c +ANALOG DEVICES INC AD74115 DRIVER +M: Cosmin Tanislav +L: linux-iio@vger.kernel.org +S: Supported +W: http://ez.analog.com/community/linux-device-drivers +F: Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml + ANALOG DEVICES INC AD74413R DRIVER M: Cosmin Tanislav L: linux-iio@vger.kernel.org From 48ea75598db2b38e4a4b5738d65f226be717bd59 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 17 Nov 2022 10:09:16 +0200 Subject: [PATCH 2536/4122] iio: addac: add AD74115 driver The AD74115H is a single-channel, software-configurable, input and output device for industrial control applications. The AD74115H provides a wide range of use cases, integrated on a single chip. These use cases include analog output, analog input, digital output, digital input, resistance temperature detector (RTD), and thermocouple measurement capability. The AD74115H also has an integrated HART modem. A serial peripheral interface (SPI) is used to handle all communications to the device, including communications with the HART modem. The digital input and digital outputs can be accessed via the SPI or the general-purpose input and output (GPIO) pins to support higher speed data rates. The device features a 16-bit, sigma-delta analog-to-digital converter (ADC) and a 14-bit digital-to-analog converter (DAC). The AD74115H contains a high accuracy 2.5 V on-chip reference that can be used as the DAC and ADC reference. Signed-off-by: Cosmin Tanislav Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20221117080916.411766-3-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 + drivers/iio/addac/Kconfig | 14 + drivers/iio/addac/Makefile | 1 + drivers/iio/addac/ad74115.c | 1947 +++++++++++++++++++++++++++++++++++ 4 files changed, 1963 insertions(+) create mode 100644 drivers/iio/addac/ad74115.c diff --git a/MAINTAINERS b/MAINTAINERS index d1ba33eb0cec..860075c493dd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1181,6 +1181,7 @@ L: linux-iio@vger.kernel.org S: Supported W: http://ez.analog.com/community/linux-device-drivers F: Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml +F: drivers/iio/addac/ad74115.c ANALOG DEVICES INC AD74413R DRIVER M: Cosmin Tanislav diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig index fcf6d2269bfc..2843fcb70e24 100644 --- a/drivers/iio/addac/Kconfig +++ b/drivers/iio/addac/Kconfig @@ -5,6 +5,20 @@ menu "Analog to digital and digital to analog converters" +config AD74115 + tristate "Analog Devices AD74115H driver" + depends on GPIOLIB && SPI + select CRC8 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER + select REGMAP_SPI + help + Say yes here to build support for Analog Devices AD74115H + single-channel software configurable input/output solution. + + To compile this driver as a module, choose M here: the + module will be called ad74115. + config AD74413R tristate "Analog Devices AD74412R/AD74413R driver" depends on GPIOLIB && SPI diff --git a/drivers/iio/addac/Makefile b/drivers/iio/addac/Makefile index 17de20ef0d8e..577777276e43 100644 --- a/drivers/iio/addac/Makefile +++ b/drivers/iio/addac/Makefile @@ -4,5 +4,6 @@ # # When adding new entries keep the list in alphabetical order +obj-$(CONFIG_AD74115) += ad74115.o obj-$(CONFIG_AD74413R) += ad74413r.o obj-$(CONFIG_STX104) += stx104.o diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c new file mode 100644 index 000000000000..383b92e7b682 --- /dev/null +++ b/drivers/iio/addac/ad74115.c @@ -0,0 +1,1947 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Analog Devices, Inc. + * Author: Cosmin Tanislav + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#define AD74115_NAME "ad74115" + +#define AD74115_CH_FUNC_SETUP_REG 0x01 + +#define AD74115_ADC_CONFIG_REG 0x02 +#define AD74115_ADC_CONFIG_CONV2_RATE_MASK GENMASK(15, 13) +#define AD74115_ADC_CONFIG_CONV1_RATE_MASK GENMASK(12, 10) +#define AD74115_ADC_CONFIG_CONV2_RANGE_MASK GENMASK(9, 7) +#define AD74115_ADC_CONFIG_CONV1_RANGE_MASK GENMASK(6, 4) + +#define AD74115_PWR_OPTIM_CONFIG_REG 0x03 + +#define AD74115_DIN_CONFIG1_REG 0x04 +#define AD74115_DIN_COMPARATOR_EN_MASK BIT(13) +#define AD74115_DIN_SINK_MASK GENMASK(11, 7) +#define AD74115_DIN_DEBOUNCE_MASK GENMASK(4, 0) + +#define AD74115_DIN_CONFIG2_REG 0x05 +#define AD74115_COMP_THRESH_MASK GENMASK(6, 0) + +#define AD74115_OUTPUT_CONFIG_REG 0x06 +#define AD74115_OUTPUT_SLEW_EN_MASK GENMASK(6, 5) +#define AD74115_OUTPUT_SLEW_LIN_STEP_MASK GENMASK(4, 3) +#define AD74115_OUTPUT_SLEW_LIN_RATE_MASK GENMASK(2, 1) + +#define AD74115_RTD3W4W_CONFIG_REG 0x07 + +#define AD74115_BURNOUT_CONFIG_REG 0x0a +#define AD74115_BURNOUT_EXT2_EN_MASK BIT(10) +#define AD74115_BURNOUT_EXT1_EN_MASK BIT(5) +#define AD74115_BURNOUT_VIOUT_EN_MASK BIT(0) + +#define AD74115_DAC_CODE_REG 0x0b + +#define AD74115_DAC_ACTIVE_REG 0x0d + +#define AD74115_GPIO_CONFIG_X_REG(x) (0x35 + (x)) +#define AD74115_GPIO_CONFIG_GPI_DATA BIT(5) +#define AD74115_GPIO_CONFIG_GPO_DATA BIT(4) +#define AD74115_GPIO_CONFIG_SELECT_MASK GENMASK(2, 0) + +#define AD74115_CHARGE_PUMP_REG 0x3a + +#define AD74115_ADC_CONV_CTRL_REG 0x3b +#define AD74115_ADC_CONV_SEQ_MASK GENMASK(13, 12) + +#define AD74115_DIN_COMP_OUT_REG 0x40 + +#define AD74115_LIVE_STATUS_REG 0x42 +#define AD74115_ADC_DATA_RDY_MASK BIT(3) + +#define AD74115_READ_SELECT_REG 0x64 + +#define AD74115_CMD_KEY_REG 0x78 +#define AD74115_CMD_KEY_RESET1 0x15fa +#define AD74115_CMD_KEY_RESET2 0xaf51 + +#define AD74115_CRC_POLYNOMIAL 0x7 +DECLARE_CRC8_TABLE(ad74115_crc8_table); + +#define AD74115_ADC_CODE_MAX ((int)GENMASK(15, 0)) +#define AD74115_ADC_CODE_HALF (AD74115_ADC_CODE_MAX / 2) + +#define AD74115_DAC_VOLTAGE_MAX 12000 +#define AD74115_DAC_CURRENT_MAX 25 +#define AD74115_DAC_CODE_MAX ((int)GENMASK(13, 0)) +#define AD74115_DAC_CODE_HALF (AD74115_DAC_CODE_MAX / 2) + +#define AD74115_COMP_THRESH_MAX 98 + +#define AD74115_SENSE_RESISTOR_OHMS 100 +#define AD74115_REF_RESISTOR_OHMS 2100 + +#define AD74115_DIN_SINK_LOW_STEP 120 +#define AD74115_DIN_SINK_HIGH_STEP 240 +#define AD74115_DIN_SINK_MAX 31 + +#define AD74115_FRAME_SIZE 4 +#define AD74115_GPIO_NUM 4 + +#define AD74115_CONV_TIME_US 1000000 + +enum ad74115_dac_ch { + AD74115_DAC_CH_MAIN, + AD74115_DAC_CH_COMPARATOR, +}; + +enum ad74115_adc_ch { + AD74115_ADC_CH_CONV1, + AD74115_ADC_CH_CONV2, + AD74115_ADC_CH_NUM +}; + +enum ad74115_ch_func { + AD74115_CH_FUNC_HIGH_IMPEDANCE, + AD74115_CH_FUNC_VOLTAGE_OUTPUT, + AD74115_CH_FUNC_CURRENT_OUTPUT, + AD74115_CH_FUNC_VOLTAGE_INPUT, + AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER, + AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER, + AD74115_CH_FUNC_2_WIRE_RESISTANCE_INPUT, + AD74115_CH_FUNC_3_4_WIRE_RESISTANCE_INPUT, + AD74115_CH_FUNC_DIGITAL_INPUT_LOGIC, + AD74115_CH_FUNC_DIGITAL_INPUT_LOOP_POWER, + AD74115_CH_FUNC_CURRENT_OUTPUT_HART, + AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER_HART, + AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART, + AD74115_CH_FUNC_MAX = AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART, + AD74115_CH_FUNC_NUM +}; + +enum ad74115_adc_range { + AD74115_ADC_RANGE_12V, + AD74115_ADC_RANGE_12V_BIPOLAR, + AD74115_ADC_RANGE_2_5V_BIPOLAR, + AD74115_ADC_RANGE_2_5V_NEG, + AD74115_ADC_RANGE_2_5V, + AD74115_ADC_RANGE_0_625V, + AD74115_ADC_RANGE_104MV_BIPOLAR, + AD74115_ADC_RANGE_12V_OTHER, + AD74115_ADC_RANGE_MAX = AD74115_ADC_RANGE_12V_OTHER, + AD74115_ADC_RANGE_NUM +}; + +enum ad74115_adc_conv_seq { + AD74115_ADC_CONV_SEQ_STANDBY = 0b00, + AD74115_ADC_CONV_SEQ_SINGLE = 0b01, + AD74115_ADC_CONV_SEQ_CONTINUOUS = 0b10, +}; + +enum ad74115_din_threshold_mode { + AD74115_DIN_THRESHOLD_MODE_AVDD, + AD74115_DIN_THRESHOLD_MODE_FIXED, + AD74115_DIN_THRESHOLD_MODE_MAX = AD74115_DIN_THRESHOLD_MODE_FIXED, +}; + +enum ad74115_slew_mode { + AD74115_SLEW_MODE_DISABLED, + AD74115_SLEW_MODE_LINEAR, + AD74115_SLEW_MODE_HART, +}; + +enum ad74115_slew_step { + AD74115_SLEW_STEP_0_8_PERCENT, + AD74115_SLEW_STEP_1_5_PERCENT, + AD74115_SLEW_STEP_6_1_PERCENT, + AD74115_SLEW_STEP_22_2_PERCENT, +}; + +enum ad74115_slew_rate { + AD74115_SLEW_RATE_4KHZ, + AD74115_SLEW_RATE_64KHZ, + AD74115_SLEW_RATE_150KHZ, + AD74115_SLEW_RATE_240KHZ, +}; + +enum ad74115_gpio_config { + AD74115_GPIO_CONFIG_OUTPUT_BUFFERED = 0b010, + AD74115_GPIO_CONFIG_INPUT = 0b011, +}; + +enum ad74115_gpio_mode { + AD74115_GPIO_MODE_LOGIC = 1, + AD74115_GPIO_MODE_SPECIAL = 2, +}; + +struct ad74115_channels { + struct iio_chan_spec *channels; + unsigned int num_channels; +}; + +struct ad74115_state { + struct spi_device *spi; + struct regmap *regmap; + struct iio_trigger *trig; + struct regulator *avdd; + + /* + * Synchronize consecutive operations when doing a one-shot + * conversion and when updating the ADC samples SPI message. + */ + struct mutex lock; + struct gpio_chip gc; + struct gpio_chip comp_gc; + int irq; + + unsigned int avdd_mv; + unsigned long gpio_valid_mask; + bool dac_bipolar; + bool dac_hart_slew; + bool rtd_mode_4_wire; + enum ad74115_ch_func ch_func; + enum ad74115_din_threshold_mode din_threshold_mode; + + struct completion adc_data_completion; + struct spi_message adc_samples_msg; + struct spi_transfer adc_samples_xfer[AD74115_ADC_CH_NUM + 1]; + + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u8 reg_tx_buf[AD74115_FRAME_SIZE] __aligned(IIO_DMA_MINALIGN); + u8 reg_rx_buf[AD74115_FRAME_SIZE]; + u8 adc_samples_tx_buf[AD74115_FRAME_SIZE * AD74115_ADC_CH_NUM]; + u8 adc_samples_rx_buf[AD74115_FRAME_SIZE * AD74115_ADC_CH_NUM]; +}; + +struct ad74115_fw_prop { + const char *name; + bool is_boolean; + bool negate; + unsigned int max; + unsigned int reg; + unsigned int mask; + const unsigned int *lookup_tbl; + unsigned int lookup_tbl_len; +}; + +#define AD74115_FW_PROP(_name, _max, _reg, _mask) \ +{ \ + .name = (_name), \ + .max = (_max), \ + .reg = (_reg), \ + .mask = (_mask), \ +} + +#define AD74115_FW_PROP_TBL(_name, _tbl, _reg, _mask) \ +{ \ + .name = (_name), \ + .reg = (_reg), \ + .mask = (_mask), \ + .lookup_tbl = (_tbl), \ + .lookup_tbl_len = ARRAY_SIZE(_tbl), \ +} + +#define AD74115_FW_PROP_BOOL(_name, _reg, _mask) \ +{ \ + .name = (_name), \ + .is_boolean = true, \ + .reg = (_reg), \ + .mask = (_mask), \ +} + +#define AD74115_FW_PROP_BOOL_NEG(_name, _reg, _mask) \ +{ \ + .name = (_name), \ + .is_boolean = true, \ + .negate = true, \ + .reg = (_reg), \ + .mask = (_mask), \ +} + +static const int ad74115_dac_rate_tbl[] = { + 0, + 4 * 8, + 4 * 15, + 4 * 61, + 4 * 222, + 64 * 8, + 64 * 15, + 64 * 61, + 64 * 222, + 150 * 8, + 150 * 15, + 150 * 61, + 150 * 222, + 240 * 8, + 240 * 15, + 240 * 61, + 240 * 222, +}; + +static const unsigned int ad74115_dac_rate_step_tbl[][3] = { + { AD74115_SLEW_MODE_DISABLED }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_4KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_4KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_4KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_4KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_64KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_64KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_64KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_64KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_150KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_150KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_150KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_150KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_240KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_240KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_240KHZ }, + { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_240KHZ }, +}; + +static const unsigned int ad74115_dac_slew_rate_hz_tbl[] = { + 4000, 64000, 150000, 240000 +}; + +static const unsigned int ad74115_rtd_excitation_current_ua_tbl[] = { + 250, 500, 750, 1000 +}; + +static const unsigned int ad74115_burnout_current_na_tbl[] = { + 0, 50, 0, 500, 1000, 0, 10000, 0 +}; + +static const unsigned int ad74115_viout_burnout_current_na_tbl[] = { + 0, 0, 0, 0, 1000, 0, 10000, 0 +}; + +static const unsigned int ad74115_gpio_mode_tbl[] = { + 0, 0, 0, 1, 2, 3, 4, 5 +}; + +static const unsigned int ad74115_adc_conv_rate_tbl[] = { + 10, 20, 1200, 4800, 9600 +}; + +static const unsigned int ad74115_debounce_tbl[] = { + 0, 13, 18, 24, 32, 42, 56, 75, + 100, 130, 180, 240, 320, 420, 560, 750, + 1000, 1300, 1800, 2400, 3200, 4200, 5600, 7500, + 10000, 13000, 18000, 24000, 32000, 42000, 56000, 75000, +}; + +static const unsigned int ad74115_adc_ch_data_regs_tbl[] = { + [AD74115_ADC_CH_CONV1] = 0x44, + [AD74115_ADC_CH_CONV2] = 0x46, +}; + +static const unsigned int ad74115_adc_ch_en_bit_tbl[] = { + [AD74115_ADC_CH_CONV1] = BIT(0), + [AD74115_ADC_CH_CONV2] = BIT(1), +}; + +static const bool ad74115_adc_bipolar_tbl[AD74115_ADC_RANGE_NUM] = { + [AD74115_ADC_RANGE_12V_BIPOLAR] = true, + [AD74115_ADC_RANGE_2_5V_BIPOLAR] = true, + [AD74115_ADC_RANGE_104MV_BIPOLAR] = true, +}; + +static const unsigned int ad74115_adc_conv_mul_tbl[AD74115_ADC_RANGE_NUM] = { + [AD74115_ADC_RANGE_12V] = 12000, + [AD74115_ADC_RANGE_12V_BIPOLAR] = 24000, + [AD74115_ADC_RANGE_2_5V_BIPOLAR] = 5000, + [AD74115_ADC_RANGE_2_5V_NEG] = 2500, + [AD74115_ADC_RANGE_2_5V] = 2500, + [AD74115_ADC_RANGE_0_625V] = 625, + [AD74115_ADC_RANGE_104MV_BIPOLAR] = 208, + [AD74115_ADC_RANGE_12V_OTHER] = 12000, +}; + +static const unsigned int ad74115_adc_gain_tbl[AD74115_ADC_RANGE_NUM][2] = { + [AD74115_ADC_RANGE_12V] = { 5, 24 }, + [AD74115_ADC_RANGE_12V_BIPOLAR] = { 5, 24 }, + [AD74115_ADC_RANGE_2_5V_BIPOLAR] = { 1, 1 }, + [AD74115_ADC_RANGE_2_5V_NEG] = { 1, 1 }, + [AD74115_ADC_RANGE_2_5V] = { 1, 1 }, + [AD74115_ADC_RANGE_0_625V] = { 4, 1 }, + [AD74115_ADC_RANGE_104MV_BIPOLAR] = { 24, 1 }, + [AD74115_ADC_RANGE_12V_OTHER] = { 5, 24 }, +}; + +static const int ad74115_adc_range_tbl[AD74115_ADC_RANGE_NUM][2] = { + [AD74115_ADC_RANGE_12V] = { 0, 12000000 }, + [AD74115_ADC_RANGE_12V_BIPOLAR] = { -12000000, 12000000 }, + [AD74115_ADC_RANGE_2_5V_BIPOLAR] = { -2500000, 2500000 }, + [AD74115_ADC_RANGE_2_5V_NEG] = { -2500000, 0 }, + [AD74115_ADC_RANGE_2_5V] = { 0, 2500000 }, + [AD74115_ADC_RANGE_0_625V] = { 0, 625000 }, + [AD74115_ADC_RANGE_104MV_BIPOLAR] = { -104000, 104000 }, + [AD74115_ADC_RANGE_12V_OTHER] = { 0, 12000000 }, +}; + +static int _ad74115_find_tbl_index(const unsigned int *tbl, unsigned int tbl_len, + unsigned int val, unsigned int *index) +{ + unsigned int i; + + for (i = 0; i < tbl_len; i++) + if (val == tbl[i]) { + *index = i; + return 0; + } + + return -EINVAL; +} + +#define ad74115_find_tbl_index(tbl, val, index) \ + _ad74115_find_tbl_index(tbl, ARRAY_SIZE(tbl), val, index) + +static int ad74115_crc(u8 *buf) +{ + return crc8(ad74115_crc8_table, buf, 3, 0); +} + +static void ad74115_format_reg_write(u8 reg, u16 val, u8 *buf) +{ + buf[0] = reg; + put_unaligned_be16(val, &buf[1]); + buf[3] = ad74115_crc(buf); +} + +static int ad74115_reg_write(void *context, unsigned int reg, unsigned int val) +{ + struct ad74115_state *st = context; + + ad74115_format_reg_write(reg, val, st->reg_tx_buf); + + return spi_write(st->spi, st->reg_tx_buf, AD74115_FRAME_SIZE); +} + +static int ad74115_crc_check(struct ad74115_state *st, u8 *buf) +{ + struct device *dev = &st->spi->dev; + u8 expected_crc = ad74115_crc(buf); + + if (buf[3] != expected_crc) { + dev_err(dev, "Bad CRC %02x for %02x%02x%02x, expected %02x\n", + buf[3], buf[0], buf[1], buf[2], expected_crc); + return -EINVAL; + } + + return 0; +} + +static int ad74115_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + struct ad74115_state *st = context; + struct spi_transfer reg_read_xfer[] = { + { + .tx_buf = st->reg_tx_buf, + .len = sizeof(st->reg_tx_buf), + .cs_change = 1, + }, + { + .rx_buf = st->reg_rx_buf, + .len = sizeof(st->reg_rx_buf), + }, + }; + int ret; + + ad74115_format_reg_write(AD74115_READ_SELECT_REG, reg, st->reg_tx_buf); + + ret = spi_sync_transfer(st->spi, reg_read_xfer, ARRAY_SIZE(reg_read_xfer)); + if (ret) + return ret; + + ret = ad74115_crc_check(st, st->reg_rx_buf); + if (ret) + return ret; + + *val = get_unaligned_be16(&st->reg_rx_buf[1]); + + return 0; +} + +static const struct regmap_config ad74115_regmap_config = { + .reg_bits = 8, + .val_bits = 16, + .reg_read = ad74115_reg_read, + .reg_write = ad74115_reg_write, +}; + +static int ad74115_gpio_config_set(struct ad74115_state *st, unsigned int offset, + enum ad74115_gpio_config cfg) +{ + return regmap_update_bits(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), + AD74115_GPIO_CONFIG_SELECT_MASK, + FIELD_PREP(AD74115_GPIO_CONFIG_SELECT_MASK, cfg)); +} + +static int ad74115_gpio_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + + *valid_mask = st->gpio_valid_mask; + + return 0; +} + +static int ad74115_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + unsigned int val; + int ret; + + ret = regmap_read(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), &val); + if (ret) + return ret; + + return FIELD_GET(AD74115_GPIO_CONFIG_SELECT_MASK, val) == AD74115_GPIO_CONFIG_INPUT; +} + +static int ad74115_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + + return ad74115_gpio_config_set(st, offset, AD74115_GPIO_CONFIG_INPUT); +} + +static int ad74115_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, + int value) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + + return ad74115_gpio_config_set(st, offset, AD74115_GPIO_CONFIG_OUTPUT_BUFFERED); +} + +static int ad74115_gpio_get(struct gpio_chip *gc, unsigned int offset) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + unsigned int val; + int ret; + + ret = regmap_read(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), &val); + if (ret) + return ret; + + return FIELD_GET(AD74115_GPIO_CONFIG_GPI_DATA, val); +} + +static void ad74115_gpio_set(struct gpio_chip *gc, unsigned int offset, int value) +{ + struct ad74115_state *st = gpiochip_get_data(gc); + struct device *dev = &st->spi->dev; + int ret; + + ret = regmap_update_bits(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), + AD74115_GPIO_CONFIG_GPO_DATA, + FIELD_PREP(AD74115_GPIO_CONFIG_GPO_DATA, value)); + if (ret) + dev_err(dev, "Failed to set GPIO %u output value, err: %d\n", + offset, ret); +} + +static int ad74115_set_comp_debounce(struct ad74115_state *st, unsigned int val) +{ + unsigned int len = ARRAY_SIZE(ad74115_debounce_tbl); + unsigned int i; + + for (i = 0; i < len; i++) + if (val <= ad74115_debounce_tbl[i]) + break; + + if (i == len) + i = len - 1; + + return regmap_update_bits(st->regmap, AD74115_DIN_CONFIG1_REG, + AD74115_DIN_DEBOUNCE_MASK, + FIELD_PREP(AD74115_DIN_DEBOUNCE_MASK, val)); +} + +static int ad74115_comp_gpio_get_direction(struct gpio_chip *chip, + unsigned int offset) +{ + return GPIO_LINE_DIRECTION_IN; +} + +static int ad74115_comp_gpio_set_config(struct gpio_chip *chip, + unsigned int offset, + unsigned long config) +{ + struct ad74115_state *st = gpiochip_get_data(chip); + u32 param = pinconf_to_config_param(config); + u32 arg = pinconf_to_config_argument(config); + + switch (param) { + case PIN_CONFIG_INPUT_DEBOUNCE: + return ad74115_set_comp_debounce(st, arg); + default: + return -ENOTSUPP; + } +} + +static int ad74115_comp_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + struct ad74115_state *st = gpiochip_get_data(chip); + unsigned int val; + int ret; + + ret = regmap_read(st->regmap, AD74115_DIN_COMP_OUT_REG, &val); + if (ret) + return ret; + + return !!val; +} + +static irqreturn_t ad74115_trigger_handler(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *indio_dev = pf->indio_dev; + struct ad74115_state *st = iio_priv(indio_dev); + int ret; + + ret = spi_sync(st->spi, &st->adc_samples_msg); + if (ret) + goto out; + + iio_push_to_buffers(indio_dev, st->adc_samples_rx_buf); + +out: + iio_trigger_notify_done(indio_dev->trig); + + return IRQ_HANDLED; +} + +static irqreturn_t ad74115_adc_data_interrupt(int irq, void *data) +{ + struct iio_dev *indio_dev = data; + struct ad74115_state *st = iio_priv(indio_dev); + + if (iio_buffer_enabled(indio_dev)) + iio_trigger_poll(st->trig); + else + complete(&st->adc_data_completion); + + return IRQ_HANDLED; +} + +static int ad74115_set_adc_ch_en(struct ad74115_state *st, + enum ad74115_adc_ch channel, bool status) +{ + unsigned int mask = ad74115_adc_ch_en_bit_tbl[channel]; + + return regmap_update_bits(st->regmap, AD74115_ADC_CONV_CTRL_REG, mask, + status ? mask : 0); +} + +static int ad74115_set_adc_conv_seq(struct ad74115_state *st, + enum ad74115_adc_conv_seq conv_seq) +{ + return regmap_update_bits(st->regmap, AD74115_ADC_CONV_CTRL_REG, + AD74115_ADC_CONV_SEQ_MASK, + FIELD_PREP(AD74115_ADC_CONV_SEQ_MASK, conv_seq)); +} + +static int ad74115_update_scan_mode(struct iio_dev *indio_dev, + const unsigned long *active_scan_mask) +{ + struct ad74115_state *st = iio_priv(indio_dev); + struct spi_transfer *xfer = st->adc_samples_xfer; + u8 *rx_buf = st->adc_samples_rx_buf; + u8 *tx_buf = st->adc_samples_tx_buf; + unsigned int i; + int ret = 0; + + mutex_lock(&st->lock); + + spi_message_init(&st->adc_samples_msg); + + for_each_clear_bit(i, active_scan_mask, AD74115_ADC_CH_NUM) { + ret = ad74115_set_adc_ch_en(st, i, false); + if (ret) + goto out; + } + + /* + * The read select register is used to select which register's value + * will be sent by the slave on the next SPI frame. + * + * Create an SPI message that, on each step, writes to the read select + * register to select the ADC result of the next enabled channel, and + * reads the ADC result of the previous enabled channel. + * + * Example: + * W: [WCH1] [WCH2] [WCH2] [WCH3] [ ] + * R: [ ] [RCH1] [RCH2] [RCH3] [RCH4] + */ + for_each_set_bit(i, active_scan_mask, AD74115_ADC_CH_NUM) { + ret = ad74115_set_adc_ch_en(st, i, true); + if (ret) + goto out; + + if (xfer == st->adc_samples_xfer) + xfer->rx_buf = NULL; + else + xfer->rx_buf = rx_buf; + + xfer->tx_buf = tx_buf; + xfer->len = AD74115_FRAME_SIZE; + xfer->cs_change = 1; + + ad74115_format_reg_write(AD74115_READ_SELECT_REG, + ad74115_adc_ch_data_regs_tbl[i], tx_buf); + + spi_message_add_tail(xfer, &st->adc_samples_msg); + + tx_buf += AD74115_FRAME_SIZE; + if (xfer != st->adc_samples_xfer) + rx_buf += AD74115_FRAME_SIZE; + xfer++; + } + + xfer->rx_buf = rx_buf; + xfer->tx_buf = NULL; + xfer->len = AD74115_FRAME_SIZE; + xfer->cs_change = 0; + + spi_message_add_tail(xfer, &st->adc_samples_msg); + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static int ad74115_buffer_postenable(struct iio_dev *indio_dev) +{ + struct ad74115_state *st = iio_priv(indio_dev); + + return ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_CONTINUOUS); +} + +static int ad74115_buffer_predisable(struct iio_dev *indio_dev) +{ + struct ad74115_state *st = iio_priv(indio_dev); + unsigned int i; + int ret; + + mutex_lock(&st->lock); + + ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_STANDBY); + if (ret) + goto out; + + /* + * update_scan_mode() is not called in the disable path, disable all + * channels here. + */ + for (i = 0; i < AD74115_ADC_CH_NUM; i++) { + ret = ad74115_set_adc_ch_en(st, i, false); + if (ret) + goto out; + } + +out: + mutex_unlock(&st->lock); + + return ret; +} + +static const struct iio_buffer_setup_ops ad74115_buffer_ops = { + .postenable = &ad74115_buffer_postenable, + .predisable = &ad74115_buffer_predisable, +}; + +static const struct iio_trigger_ops ad74115_trigger_ops = { + .validate_device = iio_trigger_validate_own_device, +}; + +static int ad74115_get_adc_rate(struct ad74115_state *st, + enum ad74115_adc_ch channel, int *val) +{ + unsigned int i; + int ret; + + ret = regmap_read(st->regmap, AD74115_ADC_CONFIG_REG, &i); + if (ret) + return ret; + + if (channel == AD74115_ADC_CH_CONV1) + i = FIELD_GET(AD74115_ADC_CONFIG_CONV1_RATE_MASK, i); + else + i = FIELD_GET(AD74115_ADC_CONFIG_CONV2_RATE_MASK, i); + + *val = ad74115_adc_conv_rate_tbl[i]; + + return IIO_VAL_INT; +} + +static int _ad74115_get_adc_code(struct ad74115_state *st, + enum ad74115_adc_ch channel, int *val) +{ + unsigned int uval; + int ret; + + reinit_completion(&st->adc_data_completion); + + ret = ad74115_set_adc_ch_en(st, channel, true); + if (ret) + return ret; + + ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_SINGLE); + if (ret) + return ret; + + if (st->irq) { + ret = wait_for_completion_timeout(&st->adc_data_completion, + msecs_to_jiffies(1000)); + if (!ret) + return -ETIMEDOUT; + } else { + unsigned int regval, wait_time; + int rate; + + ret = ad74115_get_adc_rate(st, channel, &rate); + if (ret < 0) + return ret; + + wait_time = DIV_ROUND_CLOSEST(AD74115_CONV_TIME_US, rate); + + ret = regmap_read_poll_timeout(st->regmap, AD74115_LIVE_STATUS_REG, + regval, regval & AD74115_ADC_DATA_RDY_MASK, + wait_time, 5 * wait_time); + if (ret) + return ret; + + /* + * The ADC_DATA_RDY bit is W1C. + * See datasheet page 98, Table 62. Bit Descriptions for + * LIVE_STATUS. + * Although the datasheet mentions that the bit will auto-clear + * when writing to the ADC_CONV_CTRL register, this does not + * seem to happen. + */ + ret = regmap_write_bits(st->regmap, AD74115_LIVE_STATUS_REG, + AD74115_ADC_DATA_RDY_MASK, + FIELD_PREP(AD74115_ADC_DATA_RDY_MASK, 1)); + if (ret) + return ret; + } + + ret = regmap_read(st->regmap, ad74115_adc_ch_data_regs_tbl[channel], &uval); + if (ret) + return ret; + + ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_STANDBY); + if (ret) + return ret; + + ret = ad74115_set_adc_ch_en(st, channel, false); + if (ret) + return ret; + + *val = uval; + + return IIO_VAL_INT; +} + +static int ad74115_get_adc_code(struct iio_dev *indio_dev, + enum ad74115_adc_ch channel, int *val) +{ + struct ad74115_state *st = iio_priv(indio_dev); + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + mutex_lock(&st->lock); + ret = _ad74115_get_adc_code(st, channel, val); + mutex_unlock(&st->lock); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad74115_adc_code_to_resistance(int code, int *val, int *val2) +{ + if (code == AD74115_ADC_CODE_MAX) + code--; + + *val = code * AD74115_REF_RESISTOR_OHMS; + *val2 = AD74115_ADC_CODE_MAX - code; + + return IIO_VAL_FRACTIONAL; +} + +static int ad74115_set_dac_code(struct ad74115_state *st, + enum ad74115_dac_ch channel, int val) +{ + if (val < 0) + return -EINVAL; + + if (channel == AD74115_DAC_CH_COMPARATOR) { + if (val > AD74115_COMP_THRESH_MAX) + return -EINVAL; + + return regmap_update_bits(st->regmap, AD74115_DIN_CONFIG2_REG, + AD74115_COMP_THRESH_MASK, + FIELD_PREP(AD74115_COMP_THRESH_MASK, val)); + } + + if (val > AD74115_DAC_CODE_MAX) + return -EINVAL; + + return regmap_write(st->regmap, AD74115_DAC_CODE_REG, val); +} + +static int ad74115_get_dac_code(struct ad74115_state *st, + enum ad74115_dac_ch channel, int *val) +{ + unsigned int uval; + int ret; + + if (channel == AD74115_DAC_CH_COMPARATOR) + return -EINVAL; + + ret = regmap_read(st->regmap, AD74115_DAC_ACTIVE_REG, &uval); + if (ret) + return ret; + + *val = uval; + + return IIO_VAL_INT; +} + +static int ad74115_set_adc_rate(struct ad74115_state *st, + enum ad74115_adc_ch channel, int val) +{ + unsigned int i; + int ret; + + ret = ad74115_find_tbl_index(ad74115_adc_conv_rate_tbl, val, &i); + if (ret) + return ret; + + if (channel == AD74115_ADC_CH_CONV1) + return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG, + AD74115_ADC_CONFIG_CONV1_RATE_MASK, + FIELD_PREP(AD74115_ADC_CONFIG_CONV1_RATE_MASK, i)); + + return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG, + AD74115_ADC_CONFIG_CONV2_RATE_MASK, + FIELD_PREP(AD74115_ADC_CONFIG_CONV2_RATE_MASK, i)); +} + +static int ad74115_get_dac_rate(struct ad74115_state *st, int *val) +{ + unsigned int i, en_val, step_val, rate_val, tmp; + int ret; + + ret = regmap_read(st->regmap, AD74115_OUTPUT_CONFIG_REG, &tmp); + if (ret) + return ret; + + en_val = FIELD_GET(AD74115_OUTPUT_SLEW_EN_MASK, tmp); + step_val = FIELD_GET(AD74115_OUTPUT_SLEW_LIN_STEP_MASK, tmp); + rate_val = FIELD_GET(AD74115_OUTPUT_SLEW_LIN_RATE_MASK, tmp); + + for (i = 0; i < ARRAY_SIZE(ad74115_dac_rate_step_tbl); i++) + if (en_val == ad74115_dac_rate_step_tbl[i][0] && + step_val == ad74115_dac_rate_step_tbl[i][1] && + rate_val == ad74115_dac_rate_step_tbl[i][2]) + break; + + if (i == ARRAY_SIZE(ad74115_dac_rate_step_tbl)) + return -EINVAL; + + *val = ad74115_dac_rate_tbl[i]; + + return IIO_VAL_INT; +} + +static int ad74115_set_dac_rate(struct ad74115_state *st, int val) +{ + unsigned int i, en_val, step_val, rate_val, mask, tmp; + int ret; + + ret = ad74115_find_tbl_index(ad74115_dac_rate_tbl, val, &i); + if (ret) + return ret; + + en_val = ad74115_dac_rate_step_tbl[i][0]; + step_val = ad74115_dac_rate_step_tbl[i][1]; + rate_val = ad74115_dac_rate_step_tbl[i][2]; + + mask = AD74115_OUTPUT_SLEW_EN_MASK; + mask |= AD74115_OUTPUT_SLEW_LIN_STEP_MASK; + mask |= AD74115_OUTPUT_SLEW_LIN_RATE_MASK; + + tmp = FIELD_PREP(AD74115_OUTPUT_SLEW_EN_MASK, en_val); + tmp |= FIELD_PREP(AD74115_OUTPUT_SLEW_LIN_STEP_MASK, step_val); + tmp |= FIELD_PREP(AD74115_OUTPUT_SLEW_LIN_RATE_MASK, rate_val); + + return regmap_update_bits(st->regmap, AD74115_OUTPUT_CONFIG_REG, mask, tmp); +} + +static int ad74115_get_dac_scale(struct ad74115_state *st, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + if (chan->channel == AD74115_DAC_CH_MAIN) { + if (chan->type == IIO_VOLTAGE) { + *val = AD74115_DAC_VOLTAGE_MAX; + + if (st->dac_bipolar) + *val *= 2; + + } else { + *val = AD74115_DAC_CURRENT_MAX; + } + + *val2 = AD74115_DAC_CODE_MAX; + } else { + if (st->din_threshold_mode == AD74115_DIN_THRESHOLD_MODE_AVDD) { + *val = 196 * st->avdd_mv; + *val2 = 10 * AD74115_COMP_THRESH_MAX; + } else { + *val = 49000; + *val2 = AD74115_COMP_THRESH_MAX; + } + } + + return IIO_VAL_FRACTIONAL; +} + +static int ad74115_get_dac_offset(struct ad74115_state *st, + struct iio_chan_spec const *chan, int *val) +{ + if (chan->channel == AD74115_DAC_CH_MAIN) { + if (chan->type == IIO_VOLTAGE && st->dac_bipolar) + *val = -AD74115_DAC_CODE_HALF; + else + *val = 0; + } else { + if (st->din_threshold_mode == AD74115_DIN_THRESHOLD_MODE_AVDD) + *val = -48; + else + *val = -38; + } + + return IIO_VAL_INT; +} + +static int ad74115_get_adc_range(struct ad74115_state *st, + enum ad74115_adc_ch channel, unsigned int *val) +{ + int ret; + + ret = regmap_read(st->regmap, AD74115_ADC_CONFIG_REG, val); + if (ret) + return ret; + + if (channel == AD74115_ADC_CH_CONV1) + *val = FIELD_GET(AD74115_ADC_CONFIG_CONV1_RANGE_MASK, *val); + else + *val = FIELD_GET(AD74115_ADC_CONFIG_CONV2_RANGE_MASK, *val); + + return 0; +} + +static int ad74115_get_adc_resistance_scale(struct ad74115_state *st, + unsigned int range, + int *val, int *val2) +{ + *val = ad74115_adc_gain_tbl[range][1] * AD74115_REF_RESISTOR_OHMS; + *val2 = ad74115_adc_gain_tbl[range][0]; + + if (ad74115_adc_bipolar_tbl[range]) + *val2 *= AD74115_ADC_CODE_HALF; + else + *val2 *= AD74115_ADC_CODE_MAX; + + return IIO_VAL_FRACTIONAL; +} + +static int ad74115_get_adc_scale(struct ad74115_state *st, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + unsigned int range; + int ret; + + ret = ad74115_get_adc_range(st, chan->channel, &range); + if (ret) + return ret; + + if (chan->type == IIO_RESISTANCE) + return ad74115_get_adc_resistance_scale(st, range, val, val2); + + *val = ad74115_adc_conv_mul_tbl[range]; + *val2 = AD74115_ADC_CODE_MAX; + + if (chan->type == IIO_CURRENT) + *val2 *= AD74115_SENSE_RESISTOR_OHMS; + + return IIO_VAL_FRACTIONAL; +} + +static int ad74115_get_adc_resistance_offset(struct ad74115_state *st, + unsigned int range, + int *val, int *val2) +{ + unsigned int d = 10 * AD74115_REF_RESISTOR_OHMS + * ad74115_adc_gain_tbl[range][1]; + + *val = 5; + + if (ad74115_adc_bipolar_tbl[range]) + *val -= AD74115_ADC_CODE_HALF; + + *val *= d; + + if (!st->rtd_mode_4_wire) { + /* Add 0.2 Ohm to the final result for 3-wire RTD. */ + unsigned int v = 2 * ad74115_adc_gain_tbl[range][0]; + + if (ad74115_adc_bipolar_tbl[range]) + v *= AD74115_ADC_CODE_HALF; + else + v *= AD74115_ADC_CODE_MAX; + + *val += v; + } + + *val2 = d; + + return IIO_VAL_FRACTIONAL; +} + +static int ad74115_get_adc_offset(struct ad74115_state *st, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + unsigned int range; + int ret; + + ret = ad74115_get_adc_range(st, chan->channel, &range); + if (ret) + return ret; + + if (chan->type == IIO_RESISTANCE) + return ad74115_get_adc_resistance_offset(st, range, val, val2); + + if (ad74115_adc_bipolar_tbl[range]) + *val = -AD74115_ADC_CODE_HALF; + else if (range == AD74115_ADC_RANGE_2_5V_NEG) + *val = -AD74115_ADC_CODE_MAX; + else + *val = 0; + + return IIO_VAL_INT; +} + +static int ad74115_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long info) +{ + struct ad74115_state *st = iio_priv(indio_dev); + int ret; + + switch (info) { + case IIO_CHAN_INFO_RAW: + if (chan->output) + return ad74115_get_dac_code(st, chan->channel, val); + + return ad74115_get_adc_code(indio_dev, chan->channel, val); + case IIO_CHAN_INFO_PROCESSED: + ret = ad74115_get_adc_code(indio_dev, chan->channel, val); + if (ret) + return ret; + + return ad74115_adc_code_to_resistance(*val, val, val2); + case IIO_CHAN_INFO_SCALE: + if (chan->output) + return ad74115_get_dac_scale(st, chan, val, val2); + + return ad74115_get_adc_scale(st, chan, val, val2); + case IIO_CHAN_INFO_OFFSET: + if (chan->output) + return ad74115_get_dac_offset(st, chan, val); + + return ad74115_get_adc_offset(st, chan, val, val2); + case IIO_CHAN_INFO_SAMP_FREQ: + if (chan->output) + return ad74115_get_dac_rate(st, val); + + return ad74115_get_adc_rate(st, chan->channel, val); + default: + return -EINVAL; + } +} + +static int ad74115_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, int val2, + long info) +{ + struct ad74115_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_CHAN_INFO_RAW: + if (!chan->output) + return -EINVAL; + + return ad74115_set_dac_code(st, chan->channel, val); + case IIO_CHAN_INFO_SAMP_FREQ: + if (chan->output) + return ad74115_set_dac_rate(st, val); + + return ad74115_set_adc_rate(st, chan->channel, val); + default: + return -EINVAL; + } +} + +static int ad74115_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, long info) +{ + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + if (chan->output) { + *vals = ad74115_dac_rate_tbl; + *length = ARRAY_SIZE(ad74115_dac_rate_tbl); + } else { + *vals = ad74115_adc_conv_rate_tbl; + *length = ARRAY_SIZE(ad74115_adc_conv_rate_tbl); + } + + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } +} + +static int ad74115_reg_access(struct iio_dev *indio_dev, unsigned int reg, + unsigned int writeval, unsigned int *readval) +{ + struct ad74115_state *st = iio_priv(indio_dev); + + if (readval) + return regmap_read(st->regmap, reg, readval); + + return regmap_write(st->regmap, reg, writeval); +} + +static const struct iio_info ad74115_info = { + .read_raw = ad74115_read_raw, + .write_raw = ad74115_write_raw, + .read_avail = ad74115_read_avail, + .update_scan_mode = ad74115_update_scan_mode, + .debugfs_reg_access = ad74115_reg_access, +}; + +#define AD74115_DAC_CHANNEL(_type, index) \ + { \ + .type = (_type), \ + .channel = (index), \ + .indexed = 1, \ + .output = 1, \ + .scan_index = -1, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) \ + | BIT(IIO_CHAN_INFO_SCALE) \ + | BIT(IIO_CHAN_INFO_OFFSET), \ + } + +#define _AD74115_ADC_CHANNEL(_type, index, extra_mask_separate) \ + { \ + .type = (_type), \ + .channel = (index), \ + .indexed = 1, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) \ + | BIT(IIO_CHAN_INFO_SAMP_FREQ) \ + | (extra_mask_separate), \ + .info_mask_separate_available = \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .scan_index = index, \ + .scan_type = { \ + .sign = 'u', \ + .realbits = 16, \ + .storagebits = 32, \ + .shift = 8, \ + .endianness = IIO_BE, \ + }, \ + } + +#define AD74115_ADC_CHANNEL(_type, index) \ + _AD74115_ADC_CHANNEL(_type, index, BIT(IIO_CHAN_INFO_SCALE) \ + | BIT(IIO_CHAN_INFO_OFFSET)) + +static struct iio_chan_spec ad74115_voltage_input_channels[] = { + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_voltage_output_channels[] = { + AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_MAIN), + AD74115_ADC_CHANNEL(IIO_CURRENT, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_current_input_channels[] = { + AD74115_ADC_CHANNEL(IIO_CURRENT, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_current_output_channels[] = { + AD74115_DAC_CHANNEL(IIO_CURRENT, AD74115_DAC_CH_MAIN), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_2_wire_resistance_input_channels[] = { + _AD74115_ADC_CHANNEL(IIO_RESISTANCE, AD74115_ADC_CH_CONV1, + BIT(IIO_CHAN_INFO_PROCESSED)), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_3_4_wire_resistance_input_channels[] = { + AD74115_ADC_CHANNEL(IIO_RESISTANCE, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_digital_input_logic_channels[] = { + AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_COMPARATOR), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +static struct iio_chan_spec ad74115_digital_input_loop_channels[] = { + AD74115_DAC_CHANNEL(IIO_CURRENT, AD74115_DAC_CH_MAIN), + AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_COMPARATOR), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1), + AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2), +}; + +#define _AD74115_CHANNELS(_channels) \ + { \ + .channels = _channels, \ + .num_channels = ARRAY_SIZE(_channels), \ + } + +#define AD74115_CHANNELS(name) \ + _AD74115_CHANNELS(ad74115_ ## name ## _channels) + +static const struct ad74115_channels ad74115_channels_map[AD74115_CH_FUNC_NUM] = { + [AD74115_CH_FUNC_HIGH_IMPEDANCE] = AD74115_CHANNELS(voltage_input), + [AD74115_CH_FUNC_VOLTAGE_INPUT] = AD74115_CHANNELS(voltage_input), + + [AD74115_CH_FUNC_VOLTAGE_OUTPUT] = AD74115_CHANNELS(voltage_output), + + [AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER] = AD74115_CHANNELS(current_input), + [AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER] = AD74115_CHANNELS(current_input), + [AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER_HART] = AD74115_CHANNELS(current_input), + [AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART] = AD74115_CHANNELS(current_input), + + [AD74115_CH_FUNC_CURRENT_OUTPUT] = AD74115_CHANNELS(current_output), + [AD74115_CH_FUNC_CURRENT_OUTPUT_HART] = AD74115_CHANNELS(current_output), + + [AD74115_CH_FUNC_2_WIRE_RESISTANCE_INPUT] = AD74115_CHANNELS(2_wire_resistance_input), + [AD74115_CH_FUNC_3_4_WIRE_RESISTANCE_INPUT] = AD74115_CHANNELS(3_4_wire_resistance_input), + + [AD74115_CH_FUNC_DIGITAL_INPUT_LOGIC] = AD74115_CHANNELS(digital_input_logic), + + [AD74115_CH_FUNC_DIGITAL_INPUT_LOOP_POWER] = AD74115_CHANNELS(digital_input_loop), +}; + +#define AD74115_GPIO_MODE_FW_PROP(i) \ +{ \ + .name = "adi,gpio" __stringify(i) "-mode", \ + .reg = AD74115_GPIO_CONFIG_X_REG(i), \ + .mask = AD74115_GPIO_CONFIG_SELECT_MASK, \ + .lookup_tbl = ad74115_gpio_mode_tbl, \ + .lookup_tbl_len = ARRAY_SIZE(ad74115_gpio_mode_tbl), \ +} + +static const struct ad74115_fw_prop ad74115_gpio_mode_fw_props[] = { + AD74115_GPIO_MODE_FW_PROP(0), + AD74115_GPIO_MODE_FW_PROP(1), + AD74115_GPIO_MODE_FW_PROP(2), + AD74115_GPIO_MODE_FW_PROP(3), +}; + +static const struct ad74115_fw_prop ad74115_din_threshold_mode_fw_prop = + AD74115_FW_PROP_BOOL("adi,digital-input-threshold-mode-fixed", + AD74115_DIN_CONFIG2_REG, BIT(7)); + +static const struct ad74115_fw_prop ad74115_dac_bipolar_fw_prop = + AD74115_FW_PROP_BOOL("adi,dac-bipolar", AD74115_OUTPUT_CONFIG_REG, BIT(7)); + +static const struct ad74115_fw_prop ad74115_ch_func_fw_prop = + AD74115_FW_PROP("adi,ch-func", AD74115_CH_FUNC_MAX, + AD74115_CH_FUNC_SETUP_REG, GENMASK(3, 0)); + +static const struct ad74115_fw_prop ad74115_rtd_mode_fw_prop = + AD74115_FW_PROP_BOOL("adi,4-wire-rtd", AD74115_RTD3W4W_CONFIG_REG, BIT(3)); + +static const struct ad74115_fw_prop ad74115_din_range_fw_prop = + AD74115_FW_PROP_BOOL("adi,digital-input-sink-range-high", + AD74115_DIN_CONFIG1_REG, BIT(12)); + +static const struct ad74115_fw_prop ad74115_ext2_burnout_current_fw_prop = + AD74115_FW_PROP_TBL("adi,ext2-burnout-current-nanoamp", + ad74115_burnout_current_na_tbl, + AD74115_BURNOUT_CONFIG_REG, GENMASK(14, 12)); + +static const struct ad74115_fw_prop ad74115_ext1_burnout_current_fw_prop = + AD74115_FW_PROP_TBL("adi,ext1-burnout-current-nanoamp", + ad74115_burnout_current_na_tbl, + AD74115_BURNOUT_CONFIG_REG, GENMASK(9, 7)); + +static const struct ad74115_fw_prop ad74115_viout_burnout_current_fw_prop = + AD74115_FW_PROP_TBL("adi,viout-burnout-current-nanoamp", + ad74115_viout_burnout_current_na_tbl, + AD74115_BURNOUT_CONFIG_REG, GENMASK(4, 2)); + +static const struct ad74115_fw_prop ad74115_fw_props[] = { + AD74115_FW_PROP("adi,conv2-mux", 3, + AD74115_ADC_CONFIG_REG, GENMASK(3, 2)), + + AD74115_FW_PROP_BOOL_NEG("adi,sense-agnd-buffer-low-power", + AD74115_PWR_OPTIM_CONFIG_REG, BIT(4)), + AD74115_FW_PROP_BOOL_NEG("adi,lf-buffer-low-power", + AD74115_PWR_OPTIM_CONFIG_REG, BIT(3)), + AD74115_FW_PROP_BOOL_NEG("adi,hf-buffer-low-power", + AD74115_PWR_OPTIM_CONFIG_REG, BIT(2)), + AD74115_FW_PROP_BOOL_NEG("adi,ext2-buffer-low-power", + AD74115_PWR_OPTIM_CONFIG_REG, BIT(1)), + AD74115_FW_PROP_BOOL_NEG("adi,ext1-buffer-low-power", + AD74115_PWR_OPTIM_CONFIG_REG, BIT(0)), + + AD74115_FW_PROP_BOOL("adi,comparator-invert", + AD74115_DIN_CONFIG1_REG, BIT(14)), + AD74115_FW_PROP_BOOL("adi,digital-input-debounce-mode-counter-reset", + AD74115_DIN_CONFIG1_REG, BIT(6)), + + AD74115_FW_PROP_BOOL("adi,digital-input-unbuffered", + AD74115_DIN_CONFIG2_REG, BIT(10)), + AD74115_FW_PROP_BOOL("adi,digital-input-short-circuit-detection", + AD74115_DIN_CONFIG2_REG, BIT(9)), + AD74115_FW_PROP_BOOL("adi,digital-input-open-circuit-detection", + AD74115_DIN_CONFIG2_REG, BIT(8)), + + AD74115_FW_PROP_BOOL("adi,dac-current-limit-low", + AD74115_OUTPUT_CONFIG_REG, BIT(0)), + + AD74115_FW_PROP_BOOL("adi,3-wire-rtd-excitation-swap", + AD74115_RTD3W4W_CONFIG_REG, BIT(2)), + AD74115_FW_PROP_TBL("adi,rtd-excitation-current-microamp", + ad74115_rtd_excitation_current_ua_tbl, + AD74115_RTD3W4W_CONFIG_REG, GENMASK(1, 0)), + + AD74115_FW_PROP_BOOL("adi,ext2-burnout-current-polarity-sourcing", + AD74115_BURNOUT_CONFIG_REG, BIT(11)), + AD74115_FW_PROP_BOOL("adi,ext1-burnout-current-polarity-sourcing", + AD74115_BURNOUT_CONFIG_REG, BIT(6)), + AD74115_FW_PROP_BOOL("adi,viout-burnout-current-polarity-sourcing", + AD74115_BURNOUT_CONFIG_REG, BIT(1)), + + AD74115_FW_PROP_BOOL("adi,charge-pump", + AD74115_CHARGE_PUMP_REG, BIT(0)), +}; + +static int ad74115_apply_fw_prop(struct ad74115_state *st, + const struct ad74115_fw_prop *prop, u32 *retval) +{ + struct device *dev = &st->spi->dev; + u32 val = 0; + int ret; + + if (prop->is_boolean) { + val = device_property_read_bool(dev, prop->name); + } else { + ret = device_property_read_u32(dev, prop->name, &val); + if (ret && prop->lookup_tbl) + val = prop->lookup_tbl[0]; + } + + *retval = val; + + if (prop->negate) + val = !val; + + if (prop->lookup_tbl) + ret = _ad74115_find_tbl_index(prop->lookup_tbl, + prop->lookup_tbl_len, val, &val); + else if (prop->max && val > prop->max) + ret = -EINVAL; + else + ret = 0; + + if (ret) + return dev_err_probe(dev, -EINVAL, + "Invalid value %u for prop %s\n", + val, prop->name); + + WARN(!prop->mask, "Prop %s mask is empty\n", prop->name); + + val = (val << __ffs(prop->mask)) & prop->mask; + + return regmap_update_bits(st->regmap, prop->reg, prop->mask, val); +} + +static int ad74115_setup_adc_conv2_range(struct ad74115_state *st) +{ + unsigned int tbl_len = ARRAY_SIZE(ad74115_adc_range_tbl); + const char *prop_name = "adi,conv2-range-microvolt"; + s32 vals[2] = { + ad74115_adc_range_tbl[0][0], + ad74115_adc_range_tbl[0][1], + }; + struct device *dev = &st->spi->dev; + unsigned int i; + + device_property_read_u32_array(dev, prop_name, vals, 2); + + for (i = 0; i < tbl_len; i++) + if (vals[0] == ad74115_adc_range_tbl[i][0] && + vals[1] == ad74115_adc_range_tbl[i][1]) + break; + + if (i == tbl_len) + return dev_err_probe(dev, -EINVAL, + "Invalid value %d, %d for prop %s\n", + vals[0], vals[1], prop_name); + + return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG, + AD74115_ADC_CONFIG_CONV2_RANGE_MASK, + FIELD_PREP(AD74115_ADC_CONFIG_CONV2_RANGE_MASK, i)); +} + +static int ad74115_setup_iio_channels(struct iio_dev *indio_dev) +{ + struct ad74115_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + struct iio_chan_spec *channels; + + channels = devm_kcalloc(dev, sizeof(*channels), + indio_dev->num_channels, GFP_KERNEL); + if (!channels) + return -ENOMEM; + + indio_dev->channels = channels; + + memcpy(channels, ad74115_channels_map[st->ch_func].channels, + sizeof(*channels) * ad74115_channels_map[st->ch_func].num_channels); + + if (channels[0].output && channels[0].channel == AD74115_DAC_CH_MAIN && + channels[0].type == IIO_VOLTAGE && !st->dac_hart_slew) { + channels[0].info_mask_separate |= BIT(IIO_CHAN_INFO_SAMP_FREQ); + channels[0].info_mask_separate_available |= BIT(IIO_CHAN_INFO_SAMP_FREQ); + } + + return 0; +} + +static int ad74115_setup_gpio_chip(struct ad74115_state *st) +{ + struct device *dev = &st->spi->dev; + + if (!st->gpio_valid_mask) + return 0; + + st->gc = (struct gpio_chip) { + .owner = THIS_MODULE, + .label = AD74115_NAME, + .base = -1, + .ngpio = AD74115_GPIO_NUM, + .parent = dev, + .can_sleep = true, + .init_valid_mask = ad74115_gpio_init_valid_mask, + .get_direction = ad74115_gpio_get_direction, + .direction_input = ad74115_gpio_direction_input, + .direction_output = ad74115_gpio_direction_output, + .get = ad74115_gpio_get, + .set = ad74115_gpio_set, + }; + + return devm_gpiochip_add_data(dev, &st->gc, st); +} + +static int ad74115_setup_comp_gpio_chip(struct ad74115_state *st) +{ + struct device *dev = &st->spi->dev; + u32 val; + int ret; + + ret = regmap_read(st->regmap, AD74115_DIN_CONFIG1_REG, &val); + if (ret) + return ret; + + if (!(val & AD74115_DIN_COMPARATOR_EN_MASK)) + return 0; + + st->comp_gc = (struct gpio_chip) { + .owner = THIS_MODULE, + .label = AD74115_NAME, + .base = -1, + .ngpio = 1, + .parent = dev, + .can_sleep = true, + .get_direction = ad74115_comp_gpio_get_direction, + .get = ad74115_comp_gpio_get, + .set_config = ad74115_comp_gpio_set_config, + }; + + return devm_gpiochip_add_data(dev, &st->comp_gc, st); +} + +static int ad74115_setup(struct iio_dev *indio_dev) +{ + struct ad74115_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + u32 val, din_range_high; + unsigned int i; + int ret; + + ret = ad74115_apply_fw_prop(st, &ad74115_ch_func_fw_prop, &val); + if (ret) + return ret; + + indio_dev->num_channels += ad74115_channels_map[val].num_channels; + st->ch_func = val; + + ret = ad74115_setup_adc_conv2_range(st); + if (ret) + return ret; + + val = device_property_read_bool(dev, "adi,dac-hart-slew"); + if (val) { + st->dac_hart_slew = val; + + ret = regmap_update_bits(st->regmap, AD74115_OUTPUT_CONFIG_REG, + AD74115_OUTPUT_SLEW_EN_MASK, + FIELD_PREP(AD74115_OUTPUT_SLEW_EN_MASK, + AD74115_SLEW_MODE_HART)); + if (ret) + return ret; + } + + ret = ad74115_apply_fw_prop(st, &ad74115_din_range_fw_prop, + &din_range_high); + if (ret) + return ret; + + ret = device_property_read_u32(dev, "adi,digital-input-sink-microamp", &val); + if (!ret) { + if (din_range_high) + val = DIV_ROUND_CLOSEST(val, AD74115_DIN_SINK_LOW_STEP); + else + val = DIV_ROUND_CLOSEST(val, AD74115_DIN_SINK_HIGH_STEP); + + if (val > AD74115_DIN_SINK_MAX) + val = AD74115_DIN_SINK_MAX; + + ret = regmap_update_bits(st->regmap, AD74115_DIN_CONFIG1_REG, + AD74115_DIN_SINK_MASK, + FIELD_PREP(AD74115_DIN_SINK_MASK, val)); + if (ret) + return ret; + } + + ret = ad74115_apply_fw_prop(st, &ad74115_din_threshold_mode_fw_prop, &val); + if (ret) + return ret; + + if (val == AD74115_DIN_THRESHOLD_MODE_AVDD) { + ret = regulator_get_voltage(st->avdd); + if (ret < 0) + return ret; + + st->avdd_mv = ret / 1000; + } + + st->din_threshold_mode = val; + + ret = ad74115_apply_fw_prop(st, &ad74115_dac_bipolar_fw_prop, &val); + if (ret) + return ret; + + st->dac_bipolar = val; + + ret = ad74115_apply_fw_prop(st, &ad74115_rtd_mode_fw_prop, &val); + if (ret) + return ret; + + st->rtd_mode_4_wire = val; + + ret = ad74115_apply_fw_prop(st, &ad74115_ext2_burnout_current_fw_prop, &val); + if (ret) + return ret; + + if (val) { + ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG, + AD74115_BURNOUT_EXT2_EN_MASK, + FIELD_PREP(AD74115_BURNOUT_EXT2_EN_MASK, 1)); + if (ret) + return ret; + } + + ret = ad74115_apply_fw_prop(st, &ad74115_ext1_burnout_current_fw_prop, &val); + if (ret) + return ret; + + if (val) { + ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG, + AD74115_BURNOUT_EXT1_EN_MASK, + FIELD_PREP(AD74115_BURNOUT_EXT1_EN_MASK, 1)); + if (ret) + return ret; + } + + ret = ad74115_apply_fw_prop(st, &ad74115_viout_burnout_current_fw_prop, &val); + if (ret) + return ret; + + if (val) { + ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG, + AD74115_BURNOUT_VIOUT_EN_MASK, + FIELD_PREP(AD74115_BURNOUT_VIOUT_EN_MASK, 1)); + if (ret) + return ret; + } + + for (i = 0; i < AD74115_GPIO_NUM; i++) { + ret = ad74115_apply_fw_prop(st, &ad74115_gpio_mode_fw_props[i], &val); + if (ret) + return ret; + + if (val == AD74115_GPIO_MODE_LOGIC) + st->gpio_valid_mask |= BIT(i); + } + + for (i = 0; i < ARRAY_SIZE(ad74115_fw_props); i++) { + ret = ad74115_apply_fw_prop(st, &ad74115_fw_props[i], &val); + if (ret) + return ret; + } + + ret = ad74115_setup_gpio_chip(st); + if (ret) + return ret; + + ret = ad74115_setup_comp_gpio_chip(st); + if (ret) + return ret; + + return ad74115_setup_iio_channels(indio_dev); +} + +static int ad74115_reset(struct ad74115_state *st) +{ + struct device *dev = &st->spi->dev; + struct gpio_desc *reset_gpio; + int ret; + + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(reset_gpio)) + return dev_err_probe(dev, PTR_ERR(reset_gpio), + "Failed to find reset GPIO\n"); + + if (reset_gpio) { + fsleep(100); + + gpiod_set_value_cansleep(reset_gpio, 0); + } else { + ret = regmap_write(st->regmap, AD74115_CMD_KEY_REG, + AD74115_CMD_KEY_RESET1); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD74115_CMD_KEY_REG, + AD74115_CMD_KEY_RESET2); + if (ret) + return ret; + } + + fsleep(1000); + + return 0; +} + +static void ad74115_regulator_disable(void *data) +{ + regulator_disable(data); +} + +static int ad74115_setup_trigger(struct iio_dev *indio_dev) +{ + struct ad74115_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + int ret; + + st->irq = fwnode_irq_get_byname(dev_fwnode(dev), "adc_rdy"); + + if (st->irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + + if (st->irq < 0) { + st->irq = 0; + return 0; + } + + ret = devm_request_irq(dev, st->irq, ad74115_adc_data_interrupt, + 0, AD74115_NAME, indio_dev); + if (ret) + return ret; + + st->trig = devm_iio_trigger_alloc(dev, "%s-dev%d", AD74115_NAME, + iio_device_id(indio_dev)); + if (!st->trig) + return -ENOMEM; + + st->trig->ops = &ad74115_trigger_ops; + iio_trigger_set_drvdata(st->trig, st); + + ret = devm_iio_trigger_register(dev, st->trig); + if (ret) + return ret; + + indio_dev->trig = iio_trigger_get(st->trig); + + return 0; +} + +static int ad74115_probe(struct spi_device *spi) +{ + static const char * const regulator_names[] = { + "avcc", "dvcc", "dovdd", "refin", + }; + struct device *dev = &spi->dev; + struct ad74115_state *st; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + + st->spi = spi; + mutex_init(&st->lock); + init_completion(&st->adc_data_completion); + + indio_dev->name = AD74115_NAME; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &ad74115_info; + + st->avdd = devm_regulator_get(dev, "avdd"); + if (IS_ERR(st->avdd)) + return PTR_ERR(st->avdd); + + ret = regulator_enable(st->avdd); + if (ret) { + dev_err(dev, "Failed to enable avdd regulator\n"); + return ret; + } + + ret = devm_add_action_or_reset(dev, ad74115_regulator_disable, st->avdd); + if (ret) + return ret; + + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names), + regulator_names); + if (ret) + return ret; + + st->regmap = devm_regmap_init(dev, NULL, st, &ad74115_regmap_config); + if (IS_ERR(st->regmap)) + return PTR_ERR(st->regmap); + + ret = ad74115_reset(st); + if (ret) + return ret; + + ret = ad74115_setup(indio_dev); + if (ret) + return ret; + + ret = ad74115_setup_trigger(indio_dev); + if (ret) + return ret; + + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, + ad74115_trigger_handler, + &ad74115_buffer_ops); + if (ret) + return ret; + + return devm_iio_device_register(dev, indio_dev); +} + +static int ad74115_unregister_driver(struct spi_driver *spi) +{ + spi_unregister_driver(spi); + + return 0; +} + +static int __init ad74115_register_driver(struct spi_driver *spi) +{ + crc8_populate_msb(ad74115_crc8_table, AD74115_CRC_POLYNOMIAL); + + return spi_register_driver(spi); +} + +static const struct spi_device_id ad74115_spi_id[] = { + { "ad74115h" }, + { } +}; + +MODULE_DEVICE_TABLE(spi, ad74115_spi_id); + +static const struct of_device_id ad74115_dt_id[] = { + { .compatible = "adi,ad74115h" }, + { } +}; +MODULE_DEVICE_TABLE(of, ad74115_dt_id); + +static struct spi_driver ad74115_driver = { + .driver = { + .name = "ad74115", + .of_match_table = ad74115_dt_id, + }, + .probe = ad74115_probe, + .id_table = ad74115_spi_id, +}; + +module_driver(ad74115_driver, + ad74115_register_driver, ad74115_unregister_driver); + +MODULE_AUTHOR("Cosmin Tanislav "); +MODULE_DESCRIPTION("Analog Devices AD74115 ADDAC"); +MODULE_LICENSE("GPL"); From f35e1ee9cb5d617efeb0f3695ff65169eb2b9cdd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Nov 2022 14:40:03 +0100 Subject: [PATCH 2537/4122] iio: imu: st_lsm6dsx: add support to LSM6DSO16IS Add support to STM LSM6DSO16IS (accelerometer and gyroscope) Mems sensor. Datasheet: https://www.st.com/resource/en/datasheet/lsm6dso16is.pdf Tested-by: Mario Tesi Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/df6a9d4653cd69f7204190f8b6a9b618fd48bd23.1668605631.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Kconfig | 4 +- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 128 ++++++++++++++++++- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 5 + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c | 5 + 5 files changed, 141 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig index 37d02e0fc227..0096035728cd 100644 --- a/drivers/iio/imu/st_lsm6dsx/Kconfig +++ b/drivers/iio/imu/st_lsm6dsx/Kconfig @@ -13,8 +13,8 @@ config IIO_ST_LSM6DSX sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm, ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr, lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx, - lsm6dsv, lsm6dsv16x, the accelerometer/gyroscope of lsm9ds1 - and lsm6dst. + lsm6dsv, lsm6dsv16x, lsm6dso16is, the accelerometer/gyroscope + of lsm9ds1 and lsm6dst. To compile this driver as a module, choose M here: the module will be called st_lsm6dsx. diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 683cfadcf62e..abf14a2ce0e9 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -35,6 +35,7 @@ #define ST_LSM6DSTX_DEV_NAME "lsm6dstx" #define ST_LSM6DSV_DEV_NAME "lsm6dsv" #define ST_LSM6DSV16X_DEV_NAME "lsm6dsv16x" +#define ST_LSM6DSO16IS_DEV_NAME "lsm6dso16is" enum st_lsm6dsx_hw_id { ST_LSM6DS3_ID, @@ -57,6 +58,7 @@ enum st_lsm6dsx_hw_id { ST_LSM6DSTX_ID, ST_LSM6DSV_ID, ST_LSM6DSV16X_ID, + ST_LSM6DSO16IS_ID, ST_LSM6DSX_MAX_ID, }; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index b680682f9833..57a79bf35bba 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -27,7 +27,7 @@ * - FIFO size: 4KB * * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/ - * LSM6DSTX: + * LSM6DSTX/LSM6DSO16IS: * - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416, * 833 * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 @@ -1375,6 +1375,132 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .wakeup_src_x_mask = BIT(2), }, }, + { + .reset = { + .addr = 0x12, + .mask = BIT(0), + }, + .boot = { + .addr = 0x12, + .mask = BIT(7), + }, + .bdu = { + .addr = 0x12, + .mask = BIT(6), + }, + .id = { + { + .hw_id = ST_LSM6DSO16IS_ID, + .name = ST_LSM6DSO16IS_DEV_NAME, + .wai = 0x22, + }, + }, + .channels = { + [ST_LSM6DSX_ID_ACC] = { + .chan = st_lsm6dsx_acc_channels, + .len = ARRAY_SIZE(st_lsm6dsx_acc_channels), + }, + [ST_LSM6DSX_ID_GYRO] = { + .chan = st_lsm6dsx_gyro_channels, + .len = ARRAY_SIZE(st_lsm6dsx_gyro_channels), + }, + }, + .odr_table = { + [ST_LSM6DSX_ID_ACC] = { + .reg = { + .addr = 0x10, + .mask = GENMASK(7, 4), + }, + .odr_avl[0] = { 12500, 0x01 }, + .odr_avl[1] = { 26000, 0x02 }, + .odr_avl[2] = { 52000, 0x03 }, + .odr_avl[3] = { 104000, 0x04 }, + .odr_avl[4] = { 208000, 0x05 }, + .odr_avl[5] = { 416000, 0x06 }, + .odr_avl[6] = { 833000, 0x07 }, + .odr_len = 7, + }, + [ST_LSM6DSX_ID_GYRO] = { + .reg = { + .addr = 0x11, + .mask = GENMASK(7, 4), + }, + .odr_avl[0] = { 12500, 0x01 }, + .odr_avl[1] = { 26000, 0x02 }, + .odr_avl[2] = { 52000, 0x03 }, + .odr_avl[3] = { 104000, 0x04 }, + .odr_avl[4] = { 208000, 0x05 }, + .odr_avl[5] = { 416000, 0x06 }, + .odr_avl[6] = { 833000, 0x07 }, + .odr_len = 7, + }, + }, + .fs_table = { + [ST_LSM6DSX_ID_ACC] = { + .reg = { + .addr = 0x10, + .mask = GENMASK(3, 2), + }, + .fs_avl[0] = { IIO_G_TO_M_S_2(61000), 0x0 }, + .fs_avl[1] = { IIO_G_TO_M_S_2(122000), 0x2 }, + .fs_avl[2] = { IIO_G_TO_M_S_2(244000), 0x3 }, + .fs_avl[3] = { IIO_G_TO_M_S_2(488000), 0x1 }, + .fs_len = 4, + }, + [ST_LSM6DSX_ID_GYRO] = { + .reg = { + .addr = 0x11, + .mask = GENMASK(3, 2), + }, + .fs_avl[0] = { IIO_DEGREE_TO_RAD(8750000), 0x0 }, + .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500000), 0x1 }, + .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000000), 0x2 }, + .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000000), 0x3 }, + .fs_len = 4, + }, + }, + .irq_config = { + .hla = { + .addr = 0x12, + .mask = BIT(5), + }, + .od = { + .addr = 0x12, + .mask = BIT(4), + }, + }, + .shub_settings = { + .page_mux = { + .addr = 0x01, + .mask = BIT(6), + }, + .master_en = { + .sec_page = true, + .addr = 0x14, + .mask = BIT(2), + }, + .pullup_en = { + .sec_page = true, + .addr = 0x14, + .mask = BIT(3), + }, + .aux_sens = { + .addr = 0x14, + .mask = GENMASK(1, 0), + }, + .wr_once = { + .addr = 0x14, + .mask = BIT(6), + }, + .num_ext_dev = 3, + .shub_out = { + .sec_page = true, + .addr = 0x02, + }, + .slv0_addr = 0x15, + .dw_slv0_addr = 0x21, + }, + }, }; int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c index 3570fac1b612..c34ccc85e4c7 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c @@ -117,6 +117,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = { .compatible = "st,lsm6dsv16x", .data = (void *)ST_LSM6DSV16X_ID, }, + { + .compatible = "st,lsm6dso16is", + .data = (void *)ST_LSM6DSO16IS_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match); @@ -142,6 +146,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = { { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, + { ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID }, {}, }; MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c index 57597aaa2a92..24d5e51a8662 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c @@ -117,6 +117,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = { .compatible = "st,lsm6dsv16x", .data = (void *)ST_LSM6DSV16X_ID, }, + { + .compatible = "st,lsm6dso16is", + .data = (void *)ST_LSM6DSO16IS_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match); @@ -142,6 +146,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = { { ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID }, { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, + { ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID }, {}, }; MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table); From 1842fff0f7b603e587e0fe45804d9ebfba329e38 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Nov 2022 14:40:04 +0100 Subject: [PATCH 2538/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dso16is Add device bindings for lsm6dso16is IMU sensor. Signed-off-by: Lorenzo Bianconi Acked-by: Rob Herring Link: https://lore.kernel.org/r/55b15b3e8453a12edcf8195ef9c9243a76f87096.1668605631.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml index 5933270799f5..07d5aee7e442 100644 --- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml +++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml @@ -33,6 +33,7 @@ properties: - st,lsm6dst - st,lsm6dsop - st,lsm6dsv + - st,lsm6dso16is - items: - const: st,asm330lhhx - const: st,lsm6dsr From 58ae95f9671ddf98082e021ee42643d126912e71 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 31 Oct 2022 19:24:54 +0100 Subject: [PATCH 2539/4122] dt-bindings: iio/adc: qcom,spmi-iadc: use double compatibles As in other bindings, let's use specific compatibles together with the fallback compatible. Adjust the bindings for it. Signed-off-by: Luca Weiss Acked-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221031182456.952648-1-luca@z3ntu.xyz Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml index 2a94db688830..fa855baa368c 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml @@ -18,7 +18,10 @@ description: | properties: compatible: - const: qcom,spmi-iadc + items: + - enum: + - qcom,pm8941-iadc + - const: qcom,spmi-iadc reg: description: IADC base address and length in the SPMI PMIC register map @@ -50,7 +53,7 @@ examples: #address-cells = <1>; #size-cells = <0>; pmic_iadc: adc@3600 { - compatible = "qcom,spmi-iadc"; + compatible = "qcom,pm8941-iadc", "qcom,spmi-iadc"; reg = <0x3600>; interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>; qcom,external-resistor-micro-ohms = <10000>; From 7b2366008125b0849dcbd18afbcb33cbc30c3477 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 15 Nov 2022 10:55:15 +0100 Subject: [PATCH 2540/4122] iio: addac: ad74413r: add spi_device_id table Silence the run-time warning SPI driver ad74413r has no spi_device_id for adi,ad74412r Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20221115095517.1008632-2-linux@rasmusvillemoes.dk Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index e0e130ba9d3e..29b3a5775f23 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -1305,6 +1305,15 @@ static int ad74413r_probe(struct spi_device *spi) st->spi = spi; st->dev = &spi->dev; st->chip_info = device_get_match_data(&spi->dev); + if (!st->chip_info) { + const struct spi_device_id *id = spi_get_device_id(spi); + if (id) + st->chip_info = + (struct ad74413r_chip_info *)id->driver_data; + if (!st->chip_info) + return -EINVAL; + } + mutex_init(&st->lock); init_completion(&st->adc_data_completion); @@ -1457,12 +1466,20 @@ static const struct of_device_id ad74413r_dt_id[] = { }; MODULE_DEVICE_TABLE(of, ad74413r_dt_id); +static const struct spi_device_id ad74413r_spi_id[] = { + { .name = "ad74412r", .driver_data = (kernel_ulong_t)&ad74412r_chip_info_data }, + { .name = "ad74413r", .driver_data = (kernel_ulong_t)&ad74413r_chip_info_data }, + {} +}; +MODULE_DEVICE_TABLE(spi, ad74413r_spi_id); + static struct spi_driver ad74413r_driver = { .driver = { .name = "ad74413r", .of_match_table = ad74413r_dt_id, }, .probe = ad74413r_probe, + .id_table = ad74413r_spi_id, }; module_driver(ad74413r_driver, From 6aaf7045697aa93589bea1b33e751814b0776991 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 15 Nov 2022 10:55:16 +0100 Subject: [PATCH 2541/4122] dt-bindings: iio: ad74413r: add optional reset-gpios The ad74412 and ad74413 devices have an active-low reset pin. Add a binding allowing one to specify a gpio tied to that. Signed-off-by: Rasmus Villemoes Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221115095517.1008632-3-linux@rasmusvillemoes.dk Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml index 58b3ae14ccaa..9eb3ecc8bbc8 100644 --- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml @@ -51,6 +51,9 @@ properties: Shunt (sense) resistor value in micro-Ohms. default: 100000000 + reset-gpios: + maxItems: 1 + required: - compatible - reg @@ -129,6 +132,7 @@ examples: interrupts = <26 IRQ_TYPE_EDGE_FALLING>; refin-supply = <&ad74413r_refin>; + reset-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; channel@0 { reg = <0>; From f237cf1914e24ebba88670ecc36e8209c888c9a2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 15 Nov 2022 10:55:17 +0100 Subject: [PATCH 2542/4122] iio: addac: ad74413r: add support for reset-gpio We have a board where the reset pin of the ad74412 is connected to a gpio, but also pulled low (i.e., asserted) by default. Hence to get the chip out of reset, the driver needs to know about that gpio and deassert the reset signal before attempting to communicate with the chip. When a reset-gpio is given in device tree, use that instead of the software reset. According to the data sheet, the two methods are functionally equivalent. Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20221115095517.1008632-4-linux@rasmusvillemoes.dk Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 29b3a5775f23..61030053cbea 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -71,6 +71,7 @@ struct ad74413r_state { struct regmap *regmap; struct device *dev; struct iio_trigger *trig; + struct gpio_desc *reset_gpio; size_t adc_active_channels; struct spi_message adc_samples_msg; @@ -393,6 +394,13 @@ static int ad74413r_reset(struct ad74413r_state *st) { int ret; + if (st->reset_gpio) { + gpiod_set_value_cansleep(st->reset_gpio, 1); + fsleep(50); + gpiod_set_value_cansleep(st->reset_gpio, 0); + return 0; + } + ret = regmap_write(st->regmap, AD74413R_REG_CMD_KEY, AD74413R_CMD_KEY_RESET1); if (ret) @@ -1322,6 +1330,10 @@ static int ad74413r_probe(struct spi_device *spi) if (IS_ERR(st->regmap)) return PTR_ERR(st->regmap); + st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(st->reset_gpio)) + return PTR_ERR(st->reset_gpio); + st->refin_reg = devm_regulator_get(st->dev, "refin"); if (IS_ERR(st->refin_reg)) return dev_err_probe(st->dev, PTR_ERR(st->refin_reg), From 9d901e356c8d3640940cb72ff7969f03e85c2e95 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 15 Nov 2022 11:31:21 +0100 Subject: [PATCH 2543/4122] iio: adc: stm32-adc: smart calibration support Add smart calibration support for STM32MP1. - STM32MP15x: both linear & offset calibration are supported - STM32MP13x: Only offset calibration is supported Linear calibration: Linear calibration is SoC dependent and does not change over time so it can be done only once. Linear calibration may have already been done in u-boot. Skip calibration execution if calibration data are already available. Save calibration factors in private data and restore them from private data on next ADC start. Offset calibration: This calibration may vary over time, depending on temperature or voltage. Run offset single-ended and differential calibration on each ADC start, as it is not time consuming. This calibration do not need to be saved. So, remove calfact_s and calfact_d value and bitfields that are no longer used. Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20221115103124.70074-2-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.h | 1 + drivers/iio/adc/stm32-adc.c | 108 +++++++++++++++---------------- 2 files changed, 53 insertions(+), 56 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 9d6dfa1c03fa..73b2c2e91c08 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -142,6 +142,7 @@ #define STM32H7_LINCALRDYW3 BIT(24) #define STM32H7_LINCALRDYW2 BIT(23) #define STM32H7_LINCALRDYW1 BIT(22) +#define STM32H7_LINCALRDYW_MASK GENMASK(27, 22) #define STM32H7_ADCALLIN BIT(16) #define STM32H7_BOOST BIT(8) #define STM32H7_ADSTP BIT(4) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 8d03d21a33d6..2b2b55eb130e 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -119,16 +119,12 @@ struct stm32_adc_trig_info { /** * struct stm32_adc_calib - optional adc calibration data - * @calfact_s: Calibration offset for single ended channels - * @calfact_d: Calibration offset in differential * @lincalfact: Linearity calibration factor - * @calibrated: Indicates calibration status + * @lincal_saved: Indicates that linear calibration factors are saved */ struct stm32_adc_calib { - u32 calfact_s; - u32 calfact_d; u32 lincalfact[STM32H7_LINCALFACT_NUM]; - bool calibrated; + bool lincal_saved; }; /** @@ -165,8 +161,6 @@ struct stm32_adc_vrefint { * @extsel: trigger selection register & bitfield * @res: resolution selection register & bitfield * @difsel: differential mode selection register & bitfield - * @calfact_s: single-ended calibration factors register & bitfield - * @calfact_d: differential calibration factors register & bitfield * @smpr: smpr1 & smpr2 registers offset array * @smp_bits: smpr1 & smpr2 index and bitfields * @or_vddcore: option register & vddcore bitfield @@ -186,8 +180,6 @@ struct stm32_adc_regspec { const struct stm32_adc_regs extsel; const struct stm32_adc_regs res; const struct stm32_adc_regs difsel; - const struct stm32_adc_regs calfact_s; - const struct stm32_adc_regs calfact_d; const u32 smpr[2]; const struct stm32_adc_regs *smp_bits; const struct stm32_adc_regs or_vddcore; @@ -525,10 +517,6 @@ static const struct stm32_adc_regspec stm32h7_adc_regspec = { STM32H7_EXTSEL_SHIFT }, .res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT }, .difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK}, - .calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK, - STM32H7_CALFACT_S_SHIFT }, - .calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK, - STM32H7_CALFACT_D_SHIFT }, .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, .smp_bits = stm32h7_smp_bits, }; @@ -550,10 +538,6 @@ static const struct stm32_adc_regspec stm32mp13_adc_regspec = { STM32H7_EXTSEL_SHIFT }, .res = { STM32H7_ADC_CFGR, STM32MP13_RES_MASK, STM32MP13_RES_SHIFT }, .difsel = { STM32MP13_ADC_DIFSEL, STM32MP13_DIFSEL_MASK}, - .calfact_s = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_S_MASK, - STM32MP13_CALFACT_S_SHIFT }, - .calfact_d = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_D_MASK, - STM32MP13_CALFACT_D_SHIFT }, .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, .smp_bits = stm32h7_smp_bits, .or_vddcore = { STM32MP13_ADC2_OR, STM32MP13_OP0 }, @@ -575,10 +559,6 @@ static const struct stm32_adc_regspec stm32mp1_adc_regspec = { STM32H7_EXTSEL_SHIFT }, .res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT }, .difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK}, - .calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK, - STM32H7_CALFACT_S_SHIFT }, - .calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK, - STM32H7_CALFACT_D_SHIFT }, .smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 }, .smp_bits = stm32h7_smp_bits, .or_vddcore = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN }, @@ -1000,9 +980,6 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev) int i, ret; u32 lincalrdyw_mask, val; - if (!adc->cfg->has_linearcal) - goto skip_linearcal; - /* Read linearity calibration */ lincalrdyw_mask = STM32H7_LINCALRDYW6; for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) { @@ -1024,15 +1001,7 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev) lincalrdyw_mask >>= 1; } - -skip_linearcal: - /* Read offset calibration */ - val = stm32_adc_readl(adc, adc->cfg->regs->calfact_s.reg); - adc->cal.calfact_s = (val & adc->cfg->regs->calfact_s.mask); - adc->cal.calfact_s >>= adc->cfg->regs->calfact_s.shift; - adc->cal.calfact_d = (val & adc->cfg->regs->calfact_d.mask); - adc->cal.calfact_d >>= adc->cfg->regs->calfact_d.shift; - adc->cal.calibrated = true; + adc->cal.lincal_saved = true; return 0; } @@ -1048,13 +1017,6 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev) int i, ret; u32 lincalrdyw_mask, val; - val = (adc->cal.calfact_s << adc->cfg->regs->calfact_s.shift) | - (adc->cal.calfact_d << adc->cfg->regs->calfact_d.shift); - stm32_adc_writel(adc, adc->cfg->regs->calfact_s.reg, val); - - if (!adc->cfg->has_linearcal) - return 0; - lincalrdyw_mask = STM32H7_LINCALRDYW6; for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) { /* @@ -1116,19 +1078,20 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev) /** * stm32h7_adc_selfcalib() - Procedure to calibrate ADC * @indio_dev: IIO device instance + * @do_lincal: linear calibration request flag * Note: Must be called once ADC is out of power down. + * + * Run offset calibration unconditionally. + * Run linear calibration if requested & supported. */ -static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev) +static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal) { struct stm32_adc *adc = iio_priv(indio_dev); int ret; u32 msk = STM32H7_ADCALDIF; u32 val; - if (adc->cal.calibrated) - return true; - - if (adc->cfg->has_linearcal) + if (adc->cfg->has_linearcal && do_lincal) msk |= STM32H7_ADCALLIN; /* ADC must be disabled for calibration */ stm32h7_adc_disable(indio_dev); @@ -1172,6 +1135,33 @@ out: return ret; } +/** + * stm32h7_adc_check_selfcalib() - Check linear calibration status + * @indio_dev: IIO device instance + * + * Used to check if linear calibration has been done. + * Return true if linear calibration factors are already saved in private data + * or if a linear calibration has been done at boot stage. + */ +static int stm32h7_adc_check_selfcalib(struct iio_dev *indio_dev) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + u32 val; + + if (adc->cal.lincal_saved) + return true; + + /* + * Check if linear calibration factors are available in ADC registers, + * by checking that all LINCALRDYWx bits are set. + */ + val = stm32_adc_readl(adc, STM32H7_ADC_CR) & STM32H7_LINCALRDYW_MASK; + if (val == STM32H7_LINCALRDYW_MASK) + return true; + + return false; +} + /** * stm32h7_adc_prepare() - Leave power down mode to enable ADC. * @indio_dev: IIO device instance @@ -1186,16 +1176,20 @@ out: static int stm32h7_adc_prepare(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); - int calib, ret; + int lincal_done = false; + int ret; ret = stm32h7_adc_exit_pwr_down(indio_dev); if (ret) return ret; - ret = stm32h7_adc_selfcalib(indio_dev); + if (adc->cfg->has_linearcal) + lincal_done = stm32h7_adc_check_selfcalib(indio_dev); + + /* Always run offset calibration. Run linear calibration only once */ + ret = stm32h7_adc_selfcalib(indio_dev, !lincal_done); if (ret < 0) goto pwr_dwn; - calib = ret; stm32_adc_int_ch_enable(indio_dev); @@ -1205,13 +1199,15 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev) if (ret) goto ch_disable; - /* Either restore or read calibration result for future reference */ - if (calib) - ret = stm32h7_adc_restore_selfcalib(indio_dev); - else - ret = stm32h7_adc_read_selfcalib(indio_dev); - if (ret) - goto disable; + if (adc->cfg->has_linearcal) { + if (!adc->cal.lincal_saved) + ret = stm32h7_adc_read_selfcalib(indio_dev); + else + ret = stm32h7_adc_restore_selfcalib(indio_dev); + + if (ret) + goto disable; + } if (adc->cfg->has_presel) stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel); From 2206732b9ad1be8c59f714f2912539bf95cf2961 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 15 Nov 2022 11:31:22 +0100 Subject: [PATCH 2544/4122] iio: adc: stm32-adc: improve calibration error log Add more information in calibration error log to differentiate single-ended and differential calibration. Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20221115103124.70074-3-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 2b2b55eb130e..65dd45537505 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1109,7 +1109,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal) !(val & STM32H7_ADCAL), 100, STM32H7_ADC_CALIB_TIMEOUT_US); if (ret) { - dev_err(&indio_dev->dev, "calibration failed\n"); + dev_err(&indio_dev->dev, "calibration (single-ended) error %d\n", ret); goto out; } @@ -1125,7 +1125,8 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal) !(val & STM32H7_ADCAL), 100, STM32H7_ADC_CALIB_TIMEOUT_US); if (ret) { - dev_err(&indio_dev->dev, "calibration failed\n"); + dev_err(&indio_dev->dev, "calibration (diff%s) error %d\n", + (msk & STM32H7_ADCALLIN) ? "+linear" : "", ret); goto out; } From 51bcacc6fce8db7085c8652069b68123c6c1897c Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 15 Nov 2022 11:31:23 +0100 Subject: [PATCH 2545/4122] iio: adc: stm32-adc: add debugfs to read raw calibration result Add debugfs to read linear ADC STM32 self calibration results. Signed-off-by: Fabrice Gasnier Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20221115103124.70074-4-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 65dd45537505..45d4e79f8e55 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -1879,6 +1880,23 @@ static const struct iio_chan_spec_ext_info stm32_adc_ext_info[] = { {}, }; +static void stm32_adc_debugfs_init(struct iio_dev *indio_dev) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + struct dentry *d = iio_get_debugfs_dentry(indio_dev); + struct stm32_adc_calib *cal = &adc->cal; + char buf[16]; + unsigned int i; + + if (!adc->cfg->has_linearcal) + return; + + for (i = 0; i < STM32H7_LINCALFACT_NUM; i++) { + snprintf(buf, sizeof(buf), "lincalfact%d", i + 1); + debugfs_create_u32(buf, 0444, d, &cal->lincalfact[i]); + } +} + static int stm32_adc_fw_get_resolution(struct iio_dev *indio_dev) { struct device *dev = &indio_dev->dev; @@ -2465,6 +2483,9 @@ static int stm32_adc_probe(struct platform_device *pdev) pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + if (IS_ENABLED(CONFIG_DEBUG_FS)) + stm32_adc_debugfs_init(indio_dev); + return 0; err_hw_stop: @@ -2493,6 +2514,7 @@ static int stm32_adc_remove(struct platform_device *pdev) struct stm32_adc *adc = iio_priv(indio_dev); pm_runtime_get_sync(&pdev->dev); + /* iio_device_unregister() also removes debugfs entries */ iio_device_unregister(indio_dev); stm32_adc_hw_stop(&pdev->dev); pm_runtime_disable(&pdev->dev); From e2af60f5900c6ade53477b494ffb54690eee11f5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 15 Nov 2022 14:23:23 +0100 Subject: [PATCH 2546/4122] iio: adc128s052: add proper .data members in adc128_of_match table Prior to commit bd5d54e4d49d ("iio: adc128s052: add ACPI _HID AANT1280"), the driver unconditionally used spi_get_device_id() to get the index into the adc128_config array. However, with that commit, OF-based boards now incorrectly treat all supported sensors as if they are an adc128s052, because all the .data members of the adc128_of_match table are implicitly 0. Our board, which has an adc122s021, thus exposes 8 channels whereas it really only has two. Fixes: bd5d54e4d49d ("iio: adc128s052: add ACPI _HID AANT1280") Signed-off-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221115132324.1078169-1-linux@rasmusvillemoes.dk Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-adc128s052.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c index 622fd384983c..b3d5b9b7255b 100644 --- a/drivers/iio/adc/ti-adc128s052.c +++ b/drivers/iio/adc/ti-adc128s052.c @@ -181,13 +181,13 @@ static int adc128_probe(struct spi_device *spi) } static const struct of_device_id adc128_of_match[] = { - { .compatible = "ti,adc128s052", }, - { .compatible = "ti,adc122s021", }, - { .compatible = "ti,adc122s051", }, - { .compatible = "ti,adc122s101", }, - { .compatible = "ti,adc124s021", }, - { .compatible = "ti,adc124s051", }, - { .compatible = "ti,adc124s101", }, + { .compatible = "ti,adc128s052", .data = (void*)0L, }, + { .compatible = "ti,adc122s021", .data = (void*)1L, }, + { .compatible = "ti,adc122s051", .data = (void*)1L, }, + { .compatible = "ti,adc122s101", .data = (void*)1L, }, + { .compatible = "ti,adc124s021", .data = (void*)2L, }, + { .compatible = "ti,adc124s051", .data = (void*)2L, }, + { .compatible = "ti,adc124s101", .data = (void*)2L, }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, adc128_of_match); From 32abe97b48fb1532c752495dab3f8255db8f5c2a Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 15 Nov 2022 12:41:28 +0000 Subject: [PATCH 2547/4122] dt-bindings: iio: adc: renesas,rzg2l-adc: Document RZ/Five SoC The ADC block on the RZ/Five SoC is identical to one found on the RZ/G2UL SoC. "renesas,r9a07g043-adc" compatible string will be used on the RZ/Five SoC so to make this clear, update the comment to include RZ/Five SoC. No driver changes are required as generic compatible string "renesas,rzg2l-adc" will be used as a fallback on RZ/Five SoC. Signed-off-by: Lad Prabhakar Acked-by: Rob Herring Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20221115124128.1183144-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml b/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml index 61c6157cf5a9..8b743742a5f9 100644 --- a/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml @@ -19,7 +19,7 @@ properties: compatible: items: - enum: - - renesas,r9a07g043-adc # RZ/G2UL + - renesas,r9a07g043-adc # RZ/G2UL and RZ/Five - renesas,r9a07g044-adc # RZ/G2L - renesas,r9a07g054-adc # RZ/V2L - const: renesas,rzg2l-adc From a0666f7f0f9470bfe64ab60324bc0a7830ce9035 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 15 Nov 2022 17:16:53 +0200 Subject: [PATCH 2548/4122] dt-bindings: iio: temperature: ltc2983: drop $ref for -nanoamp properties Currently there are -nanoamp properties both with and without a $ref. dt-schema has been modified to handle it as a standard unit, but the change has been reverted since there were still occurrences of -nanoamp properties with a $ref. Remove this since it's the only occurrence left. Signed-off-by: Cosmin Tanislav Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221115151653.393559-1-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 44f8b0672f53..b69813f281da 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -325,7 +325,6 @@ patternProperties: description: Excitation current applied to the thermistor. 0 sets the sensor in auto-range mode. - $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000] default: 0 From e0736b1bb71b21faeebf85efc725d01c7ecf2ebe Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 15 Nov 2022 17:19:55 +0200 Subject: [PATCH 2549/4122] dt-bindings: iio: adc: ad4130: use spi-peripheral-props.yaml Reference the "spi-peripheral-props.yaml" schema to allow using all SPI device properties. Signed-off-by: Cosmin Tanislav Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221115151955.394030-1-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml index 28ebd38b9db4..d00690a8d3fb 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml @@ -190,7 +190,10 @@ patternProperties: - reg - diff-channels -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | From 1407438a7ad513a9dd5c70bc996200f97960584c Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 15 Nov 2022 13:00:40 +0200 Subject: [PATCH 2550/4122] dt-bindings: iio: frequency: add adf4377 doc Add device tree bindings for the ADF4377 driver. Signed-off-by: Antoniu Miclaus Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221115110041.71495-1-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/frequency/adi,adf4377.yaml | 92 +++++++++++++++++++ MAINTAINERS | 8 ++ 2 files changed, 100 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml new file mode 100644 index 000000000000..aa6a3193b4e0 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/frequency/adi,adf4377.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ADF4377 Microwave Wideband Synthesizer with Integrated VCO + +maintainers: + - Antoniu Miclaus + - Dragos Bogdan + +description: | + The ADF4377 is a high performance, ultralow jitter, dual output integer-N + phased locked loop (PLL) with integrated voltage controlled oscillator (VCO) + ideally suited for data converter and mixed signal front end (MxFE) clock + applications. + + https://www.analog.com/en/products/adf4377.html + +properties: + compatible: + enum: + - adi,adf4377 + - adi,adf4378 + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 10000000 + + clocks: + maxItems: 1 + + clock-names: + description: + External clock that provides reference input frequency. + items: + - const: ref_in + + chip-enable-gpios: + description: + GPIO that controls the Chip Enable Pin. + maxItems: 1 + + clk1-enable-gpios: + description: + GPIO that controls the Enable Clock 1 Output Buffer Pin. + maxItems: 1 + + clk2-enable-gpios: + description: + GPIO that controls the Enable Clock 2 Output Buffer Pin. + maxItems: 1 + + adi,muxout-select: + description: + On chip multiplexer output selection. + high_z - MUXOUT Pin set to high-Z. + lock_detect - MUXOUT Pin set to lock detector output. + muxout_low - MUXOUT Pin set to low. + f_div_rclk_2 - MUXOUT Pin set to fDIV_RCLK/2. + f_div_nclk_2 - MUXOUT Pin set to fDIV_NCLK/2. + muxout_high - MUXOUT Pin set to high. + enum: [high_z, lock_detect, muxout_low, f_div_rclk_2, f_div_nclk_2, muxout_high] + +required: + - compatible + - reg + - clocks + - clock-names + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + +examples: + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + frequency@0 { + compatible = "adi,adf4377"; + reg = <0>; + spi-max-frequency = <10000000>; + clocks = <&adf4377_ref_in>; + clock-names = "ref_in"; + }; + }; +... diff --git a/MAINTAINERS b/MAINTAINERS index 860075c493dd..82e23ae055e1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1206,6 +1206,14 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/amplifiers/adi,ada4250.yaml F: drivers/iio/amplifiers/ada4250.c +ANALOG DEVICES INC ADF4377 DRIVER +M: Antoniu Miclaus +L: linux-iio@vger.kernel.org +S: Supported +W: https://ez.analog.com/linux-software-drivers +F: Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml +F: drivers/iio/frequency/adf4377.c + ANALOG DEVICES INC ADGS1408 DRIVER M: Mircea Caprioru S: Supported From eda549e2e52496d0d374ce457f514a4f14172aa5 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 15 Nov 2022 13:00:41 +0200 Subject: [PATCH 2551/4122] iio: frequency: adf4377: add support for ADF4377 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADF4377 is a high performance, ultralow jitter, dual output integer-N phased locked loop (PLL) with integrated voltage controlled oscillator (VCO) ideally suited for data converter and mixed signal front end (MxFE) clock applications. Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/adf4377.pdf Signed-off-by: Antoniu Miclaus Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20221115110041.71495-2-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/Kconfig | 10 + drivers/iio/frequency/Makefile | 1 + drivers/iio/frequency/adf4377.c | 994 ++++++++++++++++++++++++++++++++ 3 files changed, 1005 insertions(+) create mode 100644 drivers/iio/frequency/adf4377.c diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig index f3702f36436c..9e85dfa58508 100644 --- a/drivers/iio/frequency/Kconfig +++ b/drivers/iio/frequency/Kconfig @@ -50,6 +50,16 @@ config ADF4371 To compile this driver as a module, choose M here: the module will be called adf4371. +config ADF4377 + tristate "Analog Devices ADF4377 Microwave Wideband Synthesizer" + depends on SPI && COMMON_CLK + help + Say yes here to build support for Analog Devices ADF4377 Microwave + Wideband Synthesizer. + + To compile this driver as a module, choose M here: the + module will be called adf4377. + config ADMV1013 tristate "Analog Devices ADMV1013 Microwave Upconverter" depends on SPI && COMMON_CLK diff --git a/drivers/iio/frequency/Makefile b/drivers/iio/frequency/Makefile index 48add732f1d3..b616c29b4a08 100644 --- a/drivers/iio/frequency/Makefile +++ b/drivers/iio/frequency/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_AD9523) += ad9523.o obj-$(CONFIG_ADF4350) += adf4350.o obj-$(CONFIG_ADF4371) += adf4371.o +obj-$(CONFIG_ADF4377) += adf4377.o obj-$(CONFIG_ADMV1013) += admv1013.o obj-$(CONFIG_ADMV1014) += admv1014.o obj-$(CONFIG_ADMV4420) += admv4420.o diff --git a/drivers/iio/frequency/adf4377.c b/drivers/iio/frequency/adf4377.c new file mode 100644 index 000000000000..26abecbd51e0 --- /dev/null +++ b/drivers/iio/frequency/adf4377.c @@ -0,0 +1,994 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ADF4377 driver + * + * Copyright 2022 Analog Devices Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* ADF4377 REG0000 Map */ +#define ADF4377_0000_SOFT_RESET_R_MSK BIT(7) +#define ADF4377_0000_LSB_FIRST_R_MSK BIT(6) +#define ADF4377_0000_ADDRESS_ASC_R_MSK BIT(5) +#define ADF4377_0000_SDO_ACTIVE_R_MSK BIT(4) +#define ADF4377_0000_SDO_ACTIVE_MSK BIT(3) +#define ADF4377_0000_ADDRESS_ASC_MSK BIT(2) +#define ADF4377_0000_LSB_FIRST_MSK BIT(1) +#define ADF4377_0000_SOFT_RESET_MSK BIT(0) + +/* ADF4377 REG0000 Bit Definition */ +#define ADF4377_0000_SDO_ACTIVE_SPI_3W 0x0 +#define ADF4377_0000_SDO_ACTIVE_SPI_4W 0x1 + +#define ADF4377_0000_ADDR_ASC_AUTO_DECR 0x0 +#define ADF4377_0000_ADDR_ASC_AUTO_INCR 0x1 + +#define ADF4377_0000_LSB_FIRST_MSB 0x0 +#define ADF4377_0000_LSB_FIRST_LSB 0x1 + +#define ADF4377_0000_SOFT_RESET_N_OP 0x0 +#define ADF4377_0000_SOFT_RESET_EN 0x1 + +/* ADF4377 REG0001 Map */ +#define ADF4377_0001_SINGLE_INSTR_MSK BIT(7) +#define ADF4377_0001_MASTER_RB_CTRL_MSK BIT(5) + +/* ADF4377 REG0003 Bit Definition */ +#define ADF4377_0003_CHIP_TYPE 0x06 + +/* ADF4377 REG0004 Bit Definition */ +#define ADF4377_0004_PRODUCT_ID_LSB 0x0005 + +/* ADF4377 REG0005 Bit Definition */ +#define ADF4377_0005_PRODUCT_ID_MSB 0x0005 + +/* ADF4377 REG000A Map */ +#define ADF4377_000A_SCRATCHPAD_MSK GENMASK(7, 0) + +/* ADF4377 REG000C Bit Definition */ +#define ADF4377_000C_VENDOR_ID_LSB 0x56 + +/* ADF4377 REG000D Bit Definition */ +#define ADF4377_000D_VENDOR_ID_MSB 0x04 + +/* ADF4377 REG000F Bit Definition */ +#define ADF4377_000F_R00F_RSV1_MSK GENMASK(7, 0) + +/* ADF4377 REG0010 Map*/ +#define ADF4377_0010_N_INT_LSB_MSK GENMASK(7, 0) + +/* ADF4377 REG0011 Map*/ +#define ADF4377_0011_EN_AUTOCAL_MSK BIT(7) +#define ADF4377_0011_EN_RDBLR_MSK BIT(6) +#define ADF4377_0011_DCLK_DIV2_MSK GENMASK(5, 4) +#define ADF4377_0011_N_INT_MSB_MSK GENMASK(3, 0) + +/* ADF4377 REG0011 Bit Definition */ +#define ADF4377_0011_DCLK_DIV2_1 0x0 +#define ADF4377_0011_DCLK_DIV2_2 0x1 +#define ADF4377_0011_DCLK_DIV2_4 0x2 +#define ADF4377_0011_DCLK_DIV2_8 0x3 + +/* ADF4377 REG0012 Map*/ +#define ADF4377_0012_CLKOUT_DIV_MSK GENMASK(7, 6) +#define ADF4377_0012_R_DIV_MSK GENMASK(5, 0) + +/* ADF4377 REG0012 Bit Definition */ +#define ADF4377_0012_CLKOUT_DIV_1 0x0 +#define ADF4377_0012_CLKOUT_DIV_2 0x1 +#define ADF4377_0012_CLKOUT_DIV_4 0x2 +#define ADF4377_0012_CLKOUT_DIV_8 0x3 + +/* ADF4377 REG0013 Map */ +#define ADF4377_0013_M_VCO_CORE_MSK GENMASK(5, 4) +#define ADF4377_0013_VCO_BIAS_MSK GENMASK(3, 0) + +/* ADF4377 REG0013 Bit Definition */ +#define ADF4377_0013_M_VCO_0 0x0 +#define ADF4377_0013_M_VCO_1 0x1 +#define ADF4377_0013_M_VCO_2 0x2 +#define ADF4377_0013_M_VCO_3 0x3 + +/* ADF4377 REG0014 Map */ +#define ADF4377_0014_M_VCO_BAND_MSK GENMASK(7, 0) + +/* ADF4377 REG0015 Map */ +#define ADF4377_0015_BLEED_I_LSB_MSK GENMASK(7, 6) +#define ADF4377_0015_BLEED_POL_MSK BIT(5) +#define ADF4377_0015_EN_BLEED_MSK BIT(4) +#define ADF4377_0015_CP_I_MSK GENMASK(3, 0) + +/* ADF4377 REG0015 Bit Definition */ +#define ADF4377_CURRENT_SINK 0x0 +#define ADF4377_CURRENT_SOURCE 0x1 + +#define ADF4377_0015_CP_0MA7 0x0 +#define ADF4377_0015_CP_0MA9 0x1 +#define ADF4377_0015_CP_1MA1 0x2 +#define ADF4377_0015_CP_1MA3 0x3 +#define ADF4377_0015_CP_1MA4 0x4 +#define ADF4377_0015_CP_1MA8 0x5 +#define ADF4377_0015_CP_2MA2 0x6 +#define ADF4377_0015_CP_2MA5 0x7 +#define ADF4377_0015_CP_2MA9 0x8 +#define ADF4377_0015_CP_3MA6 0x9 +#define ADF4377_0015_CP_4MA3 0xA +#define ADF4377_0015_CP_5MA0 0xB +#define ADF4377_0015_CP_5MA7 0xC +#define ADF4377_0015_CP_7MA2 0xD +#define ADF4377_0015_CP_8MA6 0xE +#define ADF4377_0015_CP_10MA1 0xF + +/* ADF4377 REG0016 Map */ +#define ADF4377_0016_BLEED_I_MSB_MSK GENMASK(7, 0) + +/* ADF4377 REG0017 Map */ +#define ADF4377_0016_INV_CLKOUT_MSK BIT(7) +#define ADF4377_0016_N_DEL_MSK GENMASK(6, 0) + +/* ADF4377 REG0018 Map */ +#define ADF4377_0018_CMOS_OV_MSK BIT(7) +#define ADF4377_0018_R_DEL_MSK GENMASK(6, 0) + +/* ADF4377 REG0018 Bit Definition */ +#define ADF4377_0018_1V8_LOGIC 0x0 +#define ADF4377_0018_3V3_LOGIC 0x1 + +/* ADF4377 REG0019 Map */ +#define ADF4377_0019_CLKOUT2_OP_MSK GENMASK(7, 6) +#define ADF4377_0019_CLKOUT1_OP_MSK GENMASK(5, 4) +#define ADF4377_0019_PD_CLK_MSK BIT(3) +#define ADF4377_0019_PD_RDET_MSK BIT(2) +#define ADF4377_0019_PD_ADC_MSK BIT(1) +#define ADF4377_0019_PD_CALADC_MSK BIT(0) + +/* ADF4377 REG0019 Bit Definition */ +#define ADF4377_0019_CLKOUT_320MV 0x0 +#define ADF4377_0019_CLKOUT_420MV 0x1 +#define ADF4377_0019_CLKOUT_530MV 0x2 +#define ADF4377_0019_CLKOUT_640MV 0x3 + +/* ADF4377 REG001A Map */ +#define ADF4377_001A_PD_ALL_MSK BIT(7) +#define ADF4377_001A_PD_RDIV_MSK BIT(6) +#define ADF4377_001A_PD_NDIV_MSK BIT(5) +#define ADF4377_001A_PD_VCO_MSK BIT(4) +#define ADF4377_001A_PD_LD_MSK BIT(3) +#define ADF4377_001A_PD_PFDCP_MSK BIT(2) +#define ADF4377_001A_PD_CLKOUT1_MSK BIT(1) +#define ADF4377_001A_PD_CLKOUT2_MSK BIT(0) + +/* ADF4377 REG001B Map */ +#define ADF4377_001B_EN_LOL_MSK BIT(7) +#define ADF4377_001B_LDWIN_PW_MSK BIT(6) +#define ADF4377_001B_EN_LDWIN_MSK BIT(5) +#define ADF4377_001B_LD_COUNT_MSK GENMASK(4, 0) + +/* ADF4377 REG001B Bit Definition */ +#define ADF4377_001B_LDWIN_PW_NARROW 0x0 +#define ADF4377_001B_LDWIN_PW_WIDE 0x1 + +/* ADF4377 REG001C Map */ +#define ADF4377_001C_EN_DNCLK_MSK BIT(7) +#define ADF4377_001C_EN_DRCLK_MSK BIT(6) +#define ADF4377_001C_RST_LD_MSK BIT(2) +#define ADF4377_001C_R01C_RSV1_MSK BIT(0) + +/* ADF4377 REG001C Bit Definition */ +#define ADF4377_001C_RST_LD_INACTIVE 0x0 +#define ADF4377_001C_RST_LD_ACTIVE 0x1 + +#define ADF4377_001C_R01C_RSV1 0x1 + +/* ADF4377 REG001D Map */ +#define ADF4377_001D_MUXOUT_MSK GENMASK(7, 4) +#define ADF4377_001D_EN_CPTEST_MSK BIT(2) +#define ADF4377_001D_CP_DOWN_MSK BIT(1) +#define ADF4377_001D_CP_UP_MSK BIT(0) + +#define ADF4377_001D_EN_CPTEST_OFF 0x0 +#define ADF4377_001D_EN_CPTEST_ON 0x1 + +#define ADF4377_001D_CP_DOWN_OFF 0x0 +#define ADF4377_001D_CP_DOWN_ON 0x1 + +#define ADF4377_001D_CP_UP_OFF 0x0 +#define ADF4377_001D_CP_UP_ON 0x1 + +/* ADF4377 REG001F Map */ +#define ADF4377_001F_BST_REF_MSK BIT(7) +#define ADF4377_001F_FILT_REF_MSK BIT(6) +#define ADF4377_001F_REF_SEL_MSK BIT(5) +#define ADF4377_001F_R01F_RSV1_MSK GENMASK(4, 0) + +/* ADF4377 REG001F Bit Definition */ +#define ADF4377_001F_BST_LARGE_REF_IN 0x0 +#define ADF4377_001F_BST_SMALL_REF_IN 0x1 + +#define ADF4377_001F_FILT_REF_OFF 0x0 +#define ADF4377_001F_FILT_REF_ON 0x1 + +#define ADF4377_001F_REF_SEL_DMA 0x0 +#define ADF4377_001F_REF_SEL_LNA 0x1 + +#define ADF4377_001F_R01F_RSV1 0x7 + +/* ADF4377 REG0020 Map */ +#define ADF4377_0020_RST_SYS_MSK BIT(4) +#define ADF4377_0020_EN_ADC_CLK_MSK BIT(3) +#define ADF4377_0020_R020_RSV1_MSK BIT(0) + +/* ADF4377 REG0021 Bit Definition */ +#define ADF4377_0021_R021_RSV1 0xD3 + +/* ADF4377 REG0022 Bit Definition */ +#define ADF4377_0022_R022_RSV1 0x32 + +/* ADF4377 REG0023 Map */ +#define ADF4377_0023_CAT_CT_SEL BIT(7) +#define ADF4377_0023_R023_RSV1_MSK GENMASK(6, 0) + +/* ADF4377 REG0023 Bit Definition */ +#define ADF4377_0023_R023_RSV1 0x18 + +/* ADF4377 REG0024 Map */ +#define ADF4377_0024_DCLK_MODE_MSK BIT(2) + +/* ADF4377 REG0025 Map */ +#define ADF4377_0025_CLKODIV_DB_MSK BIT(7) +#define ADF4377_0025_DCLK_DB_MSK BIT(6) +#define ADF4377_0025_R025_RSV1_MSK GENMASK(5, 0) + +/* ADF4377 REG0025 Bit Definition */ +#define ADF4377_0025_R025_RSV1 0x16 + +/* ADF4377 REG0026 Map */ +#define ADF4377_0026_VCO_BAND_DIV_MSK GENMASK(7, 0) + +/* ADF4377 REG0027 Map */ +#define ADF4377_0027_SYNTH_LOCK_TO_LSB_MSK GENMASK(7, 0) + +/* ADF4377 REG0028 Map */ +#define ADF4377_0028_O_VCO_DB_MSK BIT(7) +#define ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK GENMASK(6, 0) + +/* ADF4377 REG0029 Map */ +#define ADF4377_0029_VCO_ALC_TO_LSB_MSK GENMASK(7, 0) + +/* ADF4377 REG002A Map */ +#define ADF4377_002A_DEL_CTRL_DB_MSK BIT(7) +#define ADF4377_002A_VCO_ALC_TO_MSB_MSK GENMASK(6, 0) + +/* ADF4377 REG002C Map */ +#define ADF4377_002C_R02C_RSV1 0xC0 + +/* ADF4377 REG002D Map */ +#define ADF4377_002D_ADC_CLK_DIV_MSK GENMASK(7, 0) + +/* ADF4377 REG002E Map */ +#define ADF4377_002E_EN_ADC_CNV_MSK BIT(7) +#define ADF4377_002E_EN_ADC_MSK BIT(1) +#define ADF4377_002E_ADC_A_CONV_MSK BIT(0) + +/* ADF4377 REG002E Bit Definition */ +#define ADF4377_002E_ADC_A_CONV_ADC_ST_CNV 0x0 +#define ADF4377_002E_ADC_A_CONV_VCO_CALIB 0x1 + +/* ADF4377 REG002F Map */ +#define ADF4377_002F_DCLK_DIV1_MSK GENMASK(1, 0) + +/* ADF4377 REG002F Bit Definition */ +#define ADF4377_002F_DCLK_DIV1_1 0x0 +#define ADF4377_002F_DCLK_DIV1_2 0x1 +#define ADF4377_002F_DCLK_DIV1_8 0x2 +#define ADF4377_002F_DCLK_DIV1_32 0x3 + +/* ADF4377 REG0031 Bit Definition */ +#define ADF4377_0031_R031_RSV1 0x09 + +/* ADF4377 REG0032 Map */ +#define ADF4377_0032_ADC_CLK_SEL_MSK BIT(6) +#define ADF4377_0032_R032_RSV1_MSK GENMASK(5, 0) + +/* ADF4377 REG0032 Bit Definition */ +#define ADF4377_0032_ADC_CLK_SEL_N_OP 0x0 +#define ADF4377_0032_ADC_CLK_SEL_SPI_CLK 0x1 + +#define ADF4377_0032_R032_RSV1 0x9 + +/* ADF4377 REG0033 Bit Definition */ +#define ADF4377_0033_R033_RSV1 0x18 + +/* ADF4377 REG0034 Bit Definition */ +#define ADF4377_0034_R034_RSV1 0x08 + +/* ADF4377 REG003A Bit Definition */ +#define ADF4377_003A_R03A_RSV1 0x5D + +/* ADF4377 REG003B Bit Definition */ +#define ADF4377_003B_R03B_RSV1 0x2B + +/* ADF4377 REG003D Map */ +#define ADF4377_003D_O_VCO_BAND_MSK BIT(3) +#define ADF4377_003D_O_VCO_CORE_MSK BIT(2) +#define ADF4377_003D_O_VCO_BIAS_MSK BIT(1) + +/* ADF4377 REG003D Bit Definition */ +#define ADF4377_003D_O_VCO_BAND_VCO_CALIB 0x0 +#define ADF4377_003D_O_VCO_BAND_M_VCO 0x1 + +#define ADF4377_003D_O_VCO_CORE_VCO_CALIB 0x0 +#define ADF4377_003D_O_VCO_CORE_M_VCO 0x1 + +#define ADF4377_003D_O_VCO_BIAS_VCO_CALIB 0x0 +#define ADF4377_003D_O_VCO_BIAS_M_VCO 0x1 + +/* ADF4377 REG0042 Map */ +#define ADF4377_0042_R042_RSV1 0x05 + +/* ADF4377 REG0045 Map */ +#define ADF4377_0045_ADC_ST_CNV_MSK BIT(0) + +/* ADF4377 REG0049 Map */ +#define ADF4377_0049_EN_CLK2_MSK BIT(7) +#define ADF4377_0049_EN_CLK1_MSK BIT(6) +#define ADF4377_0049_REF_OK_MSK BIT(3) +#define ADF4377_0049_ADC_BUSY_MSK BIT(2) +#define ADF4377_0049_FSM_BUSY_MSK BIT(1) +#define ADF4377_0049_LOCKED_MSK BIT(0) + +/* ADF4377 REG004B Map */ +#define ADF4377_004B_VCO_CORE_MSK GENMASK(1, 0) + +/* ADF4377 REG004C Map */ +#define ADF4377_004C_CHIP_TEMP_LSB_MSK GENMASK(7, 0) + +/* ADF4377 REG004D Map */ +#define ADF4377_004D_CHIP_TEMP_MSB_MSK BIT(0) + +/* ADF4377 REG004F Map */ +#define ADF4377_004F_VCO_BAND_MSK GENMASK(7, 0) + +/* ADF4377 REG0051 Map */ +#define ADF4377_0051_VCO_BIAS_MSK GENMASK(3, 0) + +/* ADF4377 REG0054 Map */ +#define ADF4377_0054_CHIP_VERSION_MSK GENMASK(7, 0) + +/* Specifications */ +#define ADF4377_SPI_READ_CMD BIT(7) +#define ADF4377_MAX_VCO_FREQ (12800ULL * HZ_PER_MHZ) +#define ADF4377_MIN_VCO_FREQ (6400ULL * HZ_PER_MHZ) +#define ADF4377_MAX_REFIN_FREQ (1000 * HZ_PER_MHZ) +#define ADF4377_MIN_REFIN_FREQ (10 * HZ_PER_MHZ) +#define ADF4377_MAX_FREQ_PFD (500 * HZ_PER_MHZ) +#define ADF4377_MIN_FREQ_PFD (3 * HZ_PER_MHZ) +#define ADF4377_MAX_CLKPN_FREQ ADF4377_MAX_VCO_FREQ +#define ADF4377_MIN_CLKPN_FREQ (ADF4377_MIN_VCO_FREQ / 8) +#define ADF4377_FREQ_PFD_80MHZ (80 * HZ_PER_MHZ) +#define ADF4377_FREQ_PFD_125MHZ (125 * HZ_PER_MHZ) +#define ADF4377_FREQ_PFD_160MHZ (160 * HZ_PER_MHZ) +#define ADF4377_FREQ_PFD_250MHZ (250 * HZ_PER_MHZ) +#define ADF4377_FREQ_PFD_320MHZ (320 * HZ_PER_MHZ) + +enum { + ADF4377_FREQ, +}; + +enum muxout_select_mode { + ADF4377_MUXOUT_HIGH_Z = 0x0, + ADF4377_MUXOUT_LKDET = 0x1, + ADF4377_MUXOUT_LOW = 0x2, + ADF4377_MUXOUT_DIV_RCLK_2 = 0x4, + ADF4377_MUXOUT_DIV_NCLK_2 = 0x5, + ADF4377_MUXOUT_HIGH = 0x8, +}; + +struct adf4377_state { + struct spi_device *spi; + struct regmap *regmap; + struct clk *clkin; + /* Protect against concurrent accesses to the device and data content */ + struct mutex lock; + struct notifier_block nb; + /* Reference Divider */ + unsigned int ref_div_factor; + /* PFD Frequency */ + unsigned int f_pfd; + /* Input Reference Clock */ + unsigned int clkin_freq; + /* CLKOUT Divider */ + u8 clkout_div_sel; + /* Feedback Divider (N) */ + u16 n_int; + u16 synth_lock_timeout; + u16 vco_alc_timeout; + u16 adc_clk_div; + u16 vco_band_div; + u8 dclk_div1; + u8 dclk_div2; + u8 dclk_mode; + unsigned int f_div_rclk; + enum muxout_select_mode muxout_select; + struct gpio_desc *gpio_ce; + struct gpio_desc *gpio_enclk1; + struct gpio_desc *gpio_enclk2; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); +}; + +static const char * const adf4377_muxout_modes[] = { + [ADF4377_MUXOUT_HIGH_Z] = "high_z", + [ADF4377_MUXOUT_LKDET] = "lock_detect", + [ADF4377_MUXOUT_LOW] = "muxout_low", + [ADF4377_MUXOUT_DIV_RCLK_2] = "f_div_rclk_2", + [ADF4377_MUXOUT_DIV_NCLK_2] = "f_div_nclk_2", + [ADF4377_MUXOUT_HIGH] = "muxout_high", +}; + +static const struct reg_sequence adf4377_reg_defaults[] = { + { 0x42, ADF4377_0042_R042_RSV1 }, + { 0x3B, ADF4377_003B_R03B_RSV1 }, + { 0x3A, ADF4377_003A_R03A_RSV1 }, + { 0x34, ADF4377_0034_R034_RSV1 }, + { 0x33, ADF4377_0033_R033_RSV1 }, + { 0x32, ADF4377_0032_R032_RSV1 }, + { 0x31, ADF4377_0031_R031_RSV1 }, + { 0x2C, ADF4377_002C_R02C_RSV1 }, + { 0x25, ADF4377_0025_R025_RSV1 }, + { 0x23, ADF4377_0023_R023_RSV1 }, + { 0x22, ADF4377_0022_R022_RSV1 }, + { 0x21, ADF4377_0021_R021_RSV1 }, + { 0x1f, ADF4377_001F_R01F_RSV1 }, + { 0x1c, ADF4377_001C_R01C_RSV1 }, +}; + +static const struct regmap_config adf4377_regmap_config = { + .reg_bits = 16, + .val_bits = 8, + .read_flag_mask = BIT(7), + .max_register = 0x54, +}; + +static int adf4377_reg_access(struct iio_dev *indio_dev, + unsigned int reg, + unsigned int write_val, + unsigned int *read_val) +{ + struct adf4377_state *st = iio_priv(indio_dev); + + if (read_val) + return regmap_read(st->regmap, reg, read_val); + + return regmap_write(st->regmap, reg, write_val); +} + +static const struct iio_info adf4377_info = { + .debugfs_reg_access = &adf4377_reg_access, +}; + +static int adf4377_soft_reset(struct adf4377_state *st) +{ + unsigned int read_val; + int ret; + + ret = regmap_update_bits(st->regmap, 0x0, ADF4377_0000_SOFT_RESET_MSK | + ADF4377_0000_SOFT_RESET_R_MSK, + FIELD_PREP(ADF4377_0000_SOFT_RESET_MSK, 1) | + FIELD_PREP(ADF4377_0000_SOFT_RESET_R_MSK, 1)); + if (ret) + return ret; + + return regmap_read_poll_timeout(st->regmap, 0x0, read_val, + !(read_val & (ADF4377_0000_SOFT_RESET_R_MSK | + ADF4377_0000_SOFT_RESET_R_MSK)), 200, 200 * 100); +} + +static int adf4377_get_freq(struct adf4377_state *st, u64 *freq) +{ + unsigned int ref_div_factor, n_int; + u64 clkin_freq; + int ret; + + mutex_lock(&st->lock); + ret = regmap_read(st->regmap, 0x12, &ref_div_factor); + if (ret) + goto exit; + + ret = regmap_bulk_read(st->regmap, 0x10, st->buf, sizeof(st->buf)); + if (ret) + goto exit; + + clkin_freq = clk_get_rate(st->clkin); + ref_div_factor = FIELD_GET(ADF4377_0012_R_DIV_MSK, ref_div_factor); + n_int = FIELD_GET(ADF4377_0010_N_INT_LSB_MSK | ADF4377_0011_N_INT_MSB_MSK, + get_unaligned_le16(&st->buf)); + + *freq = div_u64(clkin_freq, ref_div_factor) * n_int; +exit: + mutex_unlock(&st->lock); + + return ret; +} + +static int adf4377_set_freq(struct adf4377_state *st, u64 freq) +{ + unsigned int read_val; + u64 f_vco; + int ret; + + mutex_lock(&st->lock); + + if (freq > ADF4377_MAX_CLKPN_FREQ || freq < ADF4377_MIN_CLKPN_FREQ) { + ret = -EINVAL; + goto exit; + } + + ret = regmap_update_bits(st->regmap, 0x1C, ADF4377_001C_EN_DNCLK_MSK | + ADF4377_001C_EN_DRCLK_MSK, + FIELD_PREP(ADF4377_001C_EN_DNCLK_MSK, 1) | + FIELD_PREP(ADF4377_001C_EN_DRCLK_MSK, 1)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x11, ADF4377_0011_EN_AUTOCAL_MSK | + ADF4377_0011_DCLK_DIV2_MSK, + FIELD_PREP(ADF4377_0011_EN_AUTOCAL_MSK, 1) | + FIELD_PREP(ADF4377_0011_DCLK_DIV2_MSK, st->dclk_div2)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x2E, ADF4377_002E_EN_ADC_CNV_MSK | + ADF4377_002E_EN_ADC_MSK | + ADF4377_002E_ADC_A_CONV_MSK, + FIELD_PREP(ADF4377_002E_EN_ADC_CNV_MSK, 1) | + FIELD_PREP(ADF4377_002E_EN_ADC_MSK, 1) | + FIELD_PREP(ADF4377_002E_ADC_A_CONV_MSK, + ADF4377_002E_ADC_A_CONV_VCO_CALIB)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x20, ADF4377_0020_EN_ADC_CLK_MSK, + FIELD_PREP(ADF4377_0020_EN_ADC_CLK_MSK, 1)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x2F, ADF4377_002F_DCLK_DIV1_MSK, + FIELD_PREP(ADF4377_002F_DCLK_DIV1_MSK, st->dclk_div1)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x24, ADF4377_0024_DCLK_MODE_MSK, + FIELD_PREP(ADF4377_0024_DCLK_MODE_MSK, st->dclk_mode)); + if (ret) + goto exit; + + ret = regmap_write(st->regmap, 0x27, + FIELD_PREP(ADF4377_0027_SYNTH_LOCK_TO_LSB_MSK, + st->synth_lock_timeout)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x28, ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK, + FIELD_PREP(ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK, + st->synth_lock_timeout >> 8)); + if (ret) + goto exit; + + ret = regmap_write(st->regmap, 0x29, + FIELD_PREP(ADF4377_0029_VCO_ALC_TO_LSB_MSK, + st->vco_alc_timeout)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x2A, ADF4377_002A_VCO_ALC_TO_MSB_MSK, + FIELD_PREP(ADF4377_002A_VCO_ALC_TO_MSB_MSK, + st->vco_alc_timeout >> 8)); + if (ret) + goto exit; + + ret = regmap_write(st->regmap, 0x26, + FIELD_PREP(ADF4377_0026_VCO_BAND_DIV_MSK, st->vco_band_div)); + if (ret) + goto exit; + + ret = regmap_write(st->regmap, 0x2D, + FIELD_PREP(ADF4377_002D_ADC_CLK_DIV_MSK, st->adc_clk_div)); + if (ret) + goto exit; + + st->clkout_div_sel = 0; + + f_vco = freq; + + while (f_vco < ADF4377_MIN_VCO_FREQ) { + f_vco <<= 1; + st->clkout_div_sel++; + } + + st->n_int = div_u64(freq, st->f_pfd); + + ret = regmap_update_bits(st->regmap, 0x11, ADF4377_0011_EN_RDBLR_MSK | + ADF4377_0011_N_INT_MSB_MSK, + FIELD_PREP(ADF4377_0011_EN_RDBLR_MSK, 0) | + FIELD_PREP(ADF4377_0011_N_INT_MSB_MSK, st->n_int >> 8)); + if (ret) + goto exit; + + ret = regmap_update_bits(st->regmap, 0x12, ADF4377_0012_R_DIV_MSK | + ADF4377_0012_CLKOUT_DIV_MSK, + FIELD_PREP(ADF4377_0012_CLKOUT_DIV_MSK, st->clkout_div_sel) | + FIELD_PREP(ADF4377_0012_R_DIV_MSK, st->ref_div_factor)); + if (ret) + goto exit; + + ret = regmap_write(st->regmap, 0x10, + FIELD_PREP(ADF4377_0010_N_INT_LSB_MSK, st->n_int)); + if (ret) + goto exit; + + ret = regmap_read_poll_timeout(st->regmap, 0x49, read_val, + !(read_val & (ADF4377_0049_FSM_BUSY_MSK)), 200, 200 * 100); + if (ret) + goto exit; + + /* Disable EN_DNCLK, EN_DRCLK */ + ret = regmap_update_bits(st->regmap, 0x1C, ADF4377_001C_EN_DNCLK_MSK | + ADF4377_001C_EN_DRCLK_MSK, + FIELD_PREP(ADF4377_001C_EN_DNCLK_MSK, 0) | + FIELD_PREP(ADF4377_001C_EN_DRCLK_MSK, 0)); + if (ret) + goto exit; + + /* Disable EN_ADC_CLK */ + ret = regmap_update_bits(st->regmap, 0x20, ADF4377_0020_EN_ADC_CLK_MSK, + FIELD_PREP(ADF4377_0020_EN_ADC_CLK_MSK, 0)); + if (ret) + goto exit; + + /* Set output Amplitude */ + ret = regmap_update_bits(st->regmap, 0x19, ADF4377_0019_CLKOUT2_OP_MSK | + ADF4377_0019_CLKOUT1_OP_MSK, + FIELD_PREP(ADF4377_0019_CLKOUT1_OP_MSK, + ADF4377_0019_CLKOUT_420MV) | + FIELD_PREP(ADF4377_0019_CLKOUT2_OP_MSK, + ADF4377_0019_CLKOUT_420MV)); + +exit: + mutex_unlock(&st->lock); + + return ret; +} + +static void adf4377_gpio_init(struct adf4377_state *st) +{ + if (st->gpio_ce) { + gpiod_set_value(st->gpio_ce, 1); + + /* Delay for SPI register bits to settle to their power-on reset state */ + fsleep(200); + } + + if (st->gpio_enclk1) + gpiod_set_value(st->gpio_enclk1, 1); + + if (st->gpio_enclk2) + gpiod_set_value(st->gpio_enclk2, 1); +} + +static int adf4377_init(struct adf4377_state *st) +{ + struct spi_device *spi = st->spi; + int ret; + + adf4377_gpio_init(st); + + ret = adf4377_soft_reset(st); + if (ret) { + dev_err(&spi->dev, "Failed to soft reset.\n"); + return ret; + } + + ret = regmap_multi_reg_write(st->regmap, adf4377_reg_defaults, + ARRAY_SIZE(adf4377_reg_defaults)); + if (ret) { + dev_err(&spi->dev, "Failed to set default registers.\n"); + return ret; + } + + ret = regmap_update_bits(st->regmap, 0x00, + ADF4377_0000_SDO_ACTIVE_MSK | ADF4377_0000_SDO_ACTIVE_R_MSK, + FIELD_PREP(ADF4377_0000_SDO_ACTIVE_MSK, + ADF4377_0000_SDO_ACTIVE_SPI_4W) | + FIELD_PREP(ADF4377_0000_SDO_ACTIVE_R_MSK, + ADF4377_0000_SDO_ACTIVE_SPI_4W)); + if (ret) { + dev_err(&spi->dev, "Failed to set 4-Wire Operation.\n"); + return ret; + } + + st->clkin_freq = clk_get_rate(st->clkin); + + /* Power Up */ + ret = regmap_write(st->regmap, 0x1a, + FIELD_PREP(ADF4377_001A_PD_ALL_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_RDIV_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_NDIV_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_VCO_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_LD_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_PFDCP_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_CLKOUT1_MSK, 0) | + FIELD_PREP(ADF4377_001A_PD_CLKOUT2_MSK, 0)); + if (ret) { + dev_err(&spi->dev, "Failed to set power down registers.\n"); + return ret; + } + + /* Set Mux Output */ + ret = regmap_update_bits(st->regmap, 0x1D, + ADF4377_001D_MUXOUT_MSK, + FIELD_PREP(ADF4377_001D_MUXOUT_MSK, st->muxout_select)); + if (ret) + return ret; + + /* Compute PFD */ + st->ref_div_factor = 0; + do { + st->ref_div_factor++; + st->f_pfd = st->clkin_freq / st->ref_div_factor; + } while (st->f_pfd > ADF4377_MAX_FREQ_PFD); + + if (st->f_pfd > ADF4377_MAX_FREQ_PFD || st->f_pfd < ADF4377_MIN_FREQ_PFD) + return -EINVAL; + + st->f_div_rclk = st->f_pfd; + + if (st->f_pfd <= ADF4377_FREQ_PFD_80MHZ) { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_1; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1; + st->dclk_mode = 0; + } else if (st->f_pfd <= ADF4377_FREQ_PFD_125MHZ) { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_1; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1; + st->dclk_mode = 1; + } else if (st->f_pfd <= ADF4377_FREQ_PFD_160MHZ) { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1; + st->dclk_mode = 0; + st->f_div_rclk /= 2; + } else if (st->f_pfd <= ADF4377_FREQ_PFD_250MHZ) { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1; + st->dclk_mode = 1; + st->f_div_rclk /= 2; + } else if (st->f_pfd <= ADF4377_FREQ_PFD_320MHZ) { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_2; + st->dclk_mode = 0; + st->f_div_rclk /= 4; + } else { + st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2; + st->dclk_div2 = ADF4377_0011_DCLK_DIV2_2; + st->dclk_mode = 1; + st->f_div_rclk /= 4; + } + + st->synth_lock_timeout = DIV_ROUND_UP(st->f_div_rclk, 50000); + st->vco_alc_timeout = DIV_ROUND_UP(st->f_div_rclk, 20000); + st->vco_band_div = DIV_ROUND_UP(st->f_div_rclk, 150000 * 16 * (1 << st->dclk_mode)); + st->adc_clk_div = DIV_ROUND_UP((st->f_div_rclk / 400000 - 2), 4); + + return 0; +} + +static ssize_t adf4377_read(struct iio_dev *indio_dev, uintptr_t private, + const struct iio_chan_spec *chan, char *buf) +{ + struct adf4377_state *st = iio_priv(indio_dev); + u64 val = 0; + int ret; + + switch ((u32)private) { + case ADF4377_FREQ: + ret = adf4377_get_freq(st, &val); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", val); + default: + return -EINVAL; + } +} + +static ssize_t adf4377_write(struct iio_dev *indio_dev, uintptr_t private, + const struct iio_chan_spec *chan, const char *buf, + size_t len) +{ + struct adf4377_state *st = iio_priv(indio_dev); + unsigned long long freq; + int ret; + + switch ((u32)private) { + case ADF4377_FREQ: + ret = kstrtoull(buf, 10, &freq); + if (ret) + return ret; + + ret = adf4377_set_freq(st, freq); + if (ret) + return ret; + + return len; + default: + return -EINVAL; + } +} + +#define _ADF4377_EXT_INFO(_name, _shared, _ident) { \ + .name = _name, \ + .read = adf4377_read, \ + .write = adf4377_write, \ + .private = _ident, \ + .shared = _shared, \ + } + +static const struct iio_chan_spec_ext_info adf4377_ext_info[] = { + /* + * Usually we use IIO_CHAN_INFO_FREQUENCY, but there are + * values > 2^32 in order to support the entire frequency range + * in Hz. + */ + _ADF4377_EXT_INFO("frequency", IIO_SEPARATE, ADF4377_FREQ), + { } +}; + +static const struct iio_chan_spec adf4377_channels[] = { + { + .type = IIO_ALTVOLTAGE, + .indexed = 1, + .output = 1, + .channel = 0, + .ext_info = adf4377_ext_info, + }, +}; + +static int adf4377_properties_parse(struct adf4377_state *st) +{ + struct spi_device *spi = st->spi; + const char *str; + int ret; + + st->clkin = devm_clk_get_enabled(&spi->dev, "ref_in"); + if (IS_ERR(st->clkin)) + return dev_err_probe(&spi->dev, PTR_ERR(st->clkin), + "failed to get the reference input clock\n"); + + st->gpio_ce = devm_gpiod_get_optional(&st->spi->dev, "chip-enable", + GPIOD_OUT_LOW); + if (IS_ERR(st->gpio_ce)) + return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_ce), + "failed to get the CE GPIO\n"); + + st->gpio_enclk1 = devm_gpiod_get_optional(&st->spi->dev, "clk1-enable", + GPIOD_OUT_LOW); + if (IS_ERR(st->gpio_enclk1)) + return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_enclk1), + "failed to get the CE GPIO\n"); + + st->gpio_enclk2 = devm_gpiod_get_optional(&st->spi->dev, "clk2-enable", + GPIOD_OUT_LOW); + if (IS_ERR(st->gpio_enclk2)) + return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_enclk2), + "failed to get the CE GPIO\n"); + + ret = device_property_read_string(&spi->dev, "adi,muxout-select", &str); + if (ret) { + st->muxout_select = ADF4377_MUXOUT_HIGH_Z; + } else { + ret = match_string(adf4377_muxout_modes, ARRAY_SIZE(adf4377_muxout_modes), str); + if (ret < 0) + return ret; + + st->muxout_select = ret; + } + + return 0; +} + +static int adf4377_freq_change(struct notifier_block *nb, unsigned long action, void *data) +{ + struct adf4377_state *st = container_of(nb, struct adf4377_state, nb); + int ret; + + if (action == POST_RATE_CHANGE) { + mutex_lock(&st->lock); + ret = notifier_from_errno(adf4377_init(st)); + mutex_unlock(&st->lock); + return ret; + } + + return NOTIFY_OK; +} + +static int adf4377_probe(struct spi_device *spi) +{ + struct iio_dev *indio_dev; + struct regmap *regmap; + struct adf4377_state *st; + int ret; + + indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + regmap = devm_regmap_init_spi(spi, &adf4377_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + st = iio_priv(indio_dev); + + indio_dev->info = &adf4377_info; + indio_dev->name = "adf4377"; + indio_dev->channels = adf4377_channels; + indio_dev->num_channels = ARRAY_SIZE(adf4377_channels); + + st->regmap = regmap; + st->spi = spi; + mutex_init(&st->lock); + + ret = adf4377_properties_parse(st); + if (ret) + return ret; + + st->nb.notifier_call = adf4377_freq_change; + ret = devm_clk_notifier_register(&spi->dev, st->clkin, &st->nb); + if (ret) + return ret; + + ret = adf4377_init(st); + if (ret) + return ret; + + return devm_iio_device_register(&spi->dev, indio_dev); +} + +static const struct spi_device_id adf4377_id[] = { + { "adf4377", 0 }, + {} +}; +MODULE_DEVICE_TABLE(spi, adf4377_id); + +static const struct of_device_id adf4377_of_match[] = { + { .compatible = "adi,adf4377" }, + {} +}; +MODULE_DEVICE_TABLE(of, adf4377_of_match); + +static struct spi_driver adf4377_driver = { + .driver = { + .name = "adf4377", + .of_match_table = adf4377_of_match, + }, + .probe = adf4377_probe, + .id_table = adf4377_id, +}; +module_spi_driver(adf4377_driver); + +MODULE_AUTHOR("Antoniu Miclaus "); +MODULE_DESCRIPTION("Analog Devices ADF4377"); +MODULE_LICENSE("GPL"); From 78a01feb4024ffb6c6321e45dc2bfcafb2d1d1e5 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Tue, 25 Oct 2022 15:39:23 +0000 Subject: [PATCH 2552/4122] ftrace: Clean comments related to FTRACE_OPS_FL_PER_CPU Commit b3a88803ac5b ("ftrace: Kill FTRACE_OPS_FL_PER_CPU") didn't completely remove the comments related to FTRACE_OPS_FL_PER_CPU. Link: https://lkml.kernel.org/r/20221025153923.1995973-1-zhengyejian1@huawei.com Fixes: b3a88803ac5b ("ftrace: Kill FTRACE_OPS_FL_PER_CPU") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 33236241f236..65a5d36463e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -163,7 +163,7 @@ static void ftrace_sync_ipi(void *data) static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic, RCU, or per CPU ops, or we force list func, + * If this is a dynamic or RCU ops, or we force list func, * then it needs to call the list anyway. */ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || @@ -3071,8 +3071,6 @@ out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the per_cpu - * ops. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* @@ -7519,8 +7517,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true - * if PER_CPU is set, then ftrace_function_local_disable() - * must be false * Otherwise test if the ip matches the ops filter * * If any of the above fails then the op->func() is not executed. @@ -7570,8 +7566,8 @@ NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support - * recursion, needs RCU protection and/or requires per cpu handling, then - * this function will be called by the mcount trampoline. + * recursion, needs RCU protection, then this function will be called + * by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) From 8230f27b1ccc4b8976c137e3d6d690f9d4ffca8d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 14 Oct 2022 08:04:56 -0400 Subject: [PATCH 2553/4122] tracing: Add __cpumask to denote a trace event field that is a cpumask_t The trace events have a __bitmask field that can be used for anything that requires bitmasks. Although currently it is only used for CPU masks, it could be used in the future for any type of bitmasks. There is some user space tooling that wants to know if a field is a CPU mask and not just some random unsigned long bitmask. Introduce "__cpumask()" helper functions that work the same as the current __bitmask() helpers but displays in the format file: field:__data_loc cpumask_t *[] mask; offset:36; size:4; signed:0; Instead of: field:__data_loc unsigned long[] mask; offset:32; size:4; signed:0; The main difference is the type. Instead of "unsigned long" it is "cpumask_t *". Note, this type field needs to be a real type in the __dynamic_array() logic that both __cpumask and__bitmask use, but the comparison field requires it to be a scalar type whereas cpumask_t is a structure (non-scalar). But everything works when making it a pointer. Valentin added changes to remove the need of passing in "nr_bits" and the __cpumask will always use nr_cpumask_bits as its size. Link: https://lkml.kernel.org/r/20221014080456.1d32b989@rorschach.local.home Requested-by: Valentin Schneider Reviewed-by: Valentin Schneider Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- include/trace/bpf_probe.h | 6 ++++ include/trace/perf.h | 6 ++++ include/trace/stages/stage1_struct_define.h | 6 ++++ include/trace/stages/stage2_data_offsets.h | 6 ++++ include/trace/stages/stage3_trace_output.h | 6 ++++ include/trace/stages/stage4_event_fields.h | 6 ++++ include/trace/stages/stage5_get_offsets.h | 6 ++++ include/trace/stages/stage6_event_callback.h | 20 ++++++++++++ include/trace/stages/stage7_class_define.h | 2 ++ samples/trace_events/trace-events-sample.c | 2 +- samples/trace_events/trace-events-sample.h | 34 +++++++++++++++----- 11 files changed, 91 insertions(+), 9 deletions(-) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 6a13220d2d27..155c495b89ea 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_cpumask +#define __get_cpumask(field) (char *)__get_dynamic_array(field) + #undef __get_sockaddr #define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) @@ -40,6 +43,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_cpumask +#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field) + #undef __get_rel_sockaddr #define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) diff --git a/include/trace/perf.h b/include/trace/perf.h index 5800d13146c3..8f3bf1e17707 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_cpumask +#define __get_cpumask(field) (char *)__get_dynamic_array(field) + #undef __get_sockaddr #define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) @@ -41,6 +44,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_cpumask +#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field) + #undef __get_rel_sockaddr #define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h index 1b7bab60434c..69e0dae453bf 100644 --- a/include/trace/stages/stage1_struct_define.h +++ b/include/trace/stages/stage1_struct_define.h @@ -32,6 +32,9 @@ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __cpumask +#define __cpumask(item) __dynamic_array(char, item, -1) + #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) @@ -47,6 +50,9 @@ #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) +#undef __rel_cpumask +#define __rel_cpumask(item) __rel_dynamic_array(char, item, -1) + #undef __rel_sockaddr #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 1b7a8f764fdd..469b6a64293d 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -38,6 +38,9 @@ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __cpumask +#define __cpumask(item) __dynamic_array(unsigned long, item, -1) + #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) @@ -53,5 +56,8 @@ #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_cpumask +#define __rel_cpumask(item) __rel_dynamic_array(unsigned long, item, -1) + #undef __rel_sockaddr #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h index e3b183e9d18e..66374df61ed3 100644 --- a/include/trace/stages/stage3_trace_output.h +++ b/include/trace/stages/stage3_trace_output.h @@ -42,6 +42,9 @@ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_cpumask +#define __get_cpumask(field) __get_bitmask(field) + #undef __get_rel_bitmask #define __get_rel_bitmask(field) \ ({ \ @@ -51,6 +54,9 @@ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_rel_cpumask +#define __get_rel_cpumask(field) __get_rel_bitmask(field) + #undef __get_sockaddr #define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index a8fb25f39a99..f2990d22313c 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -46,6 +46,9 @@ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __cpumask +#define __cpumask(item) __dynamic_array(cpumask_t *, item, -1) + #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) @@ -64,5 +67,8 @@ #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_cpumask +#define __rel_cpumask(item) __rel_dynamic_array(cpumask_t *, item, -1) + #undef __rel_sockaddr #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index fba4c24ed9e6..ac5c24d3beeb 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -82,10 +82,16 @@ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __cpumask +#define __cpumask(item) __bitmask(item, nr_cpumask_bits) + #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __rel_cpumask +#define __rel_cpumask(item) __rel_bitmask(item, nr_cpumask_bits) + #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 3c554a585320..49c32394b53f 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -57,6 +57,16 @@ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __cpumask +#define __cpumask(item) __dynamic_array(unsigned long, item, -1) + +#undef __get_cpumask +#define __get_cpumask(field) (char *)__get_dynamic_array(field) + +#undef __assign_cpumask +#define __assign_cpumask(dst, src) \ + memcpy(__get_cpumask(dst), (src), __bitmask_size_in_bytes(nr_cpumask_bits)) + #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) @@ -98,6 +108,16 @@ #define __assign_rel_bitmask(dst, src, nr_bits) \ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_cpumask +#define __rel_cpumask(item) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_cpumask +#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_cpumask +#define __assign_rel_cpumask(dst, src) \ + memcpy(__get_rel_cpumask(dst), (src), __bitmask_size_in_bytes(nr_cpumask_bits)) + #undef __rel_sockaddr #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h index 8a7ec24c246d..8795429f388b 100644 --- a/include/trace/stages/stage7_class_define.h +++ b/include/trace/stages/stage7_class_define.h @@ -13,11 +13,13 @@ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_cpumask #undef __get_sockaddr #undef __get_rel_dynamic_array #undef __get_rel_dynamic_array_len #undef __get_rel_str #undef __get_rel_bitmask +#undef __get_rel_cpumask #undef __get_rel_sockaddr #undef __print_array #undef __print_hex_dump diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 608c4ae3b08a..ecc7db237f2e 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -50,7 +50,7 @@ static void do_simple_thread_func(int cnt, const char *fmt, ...) trace_foo_with_template_print("I have to be different", cnt); - trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); + trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask, current->cpus_ptr); } static void simple_thread_func(int cnt) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 1a92226202fc..fb4548a44153 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -200,6 +200,16 @@ * * __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits); * + * __cpumask: This is pretty much the same as __bitmask but is specific for + * CPU masks. The type displayed to the user via the format files will + * be "cpumaks_t" such that user space may deal with them differently + * if they choose to do so, and the bits is always set to nr_cpumask_bits. + * + * __cpumask(target_cpu) + * + * To assign a cpumask, use the __assign_cpumask() helper macro. + * + * __assign_cpumask(target_cpus, cpumask_bits(bar)); * * fast_assign: This is a C like function that is used to store the items * into the ring buffer. A special variable called "__entry" will be the @@ -212,8 +222,8 @@ * This is also used to print out the data from the trace files. * Again, the __entry macro is used to access the data from the ring buffer. * - * Note, __dynamic_array, __string, and __bitmask require special helpers - * to access the data. + * Note, __dynamic_array, __string, __bitmask and __cpumask require special + * helpers to access the data. * * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) * Use __get_dynamic_array_len(foo) to get the length of the array @@ -226,6 +236,8 @@ * * For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus) * + * For __cpumask(target_cpus) use __get_cpumask(target_cpus) + * * * Note, that for both the assign and the printk, __entry is the handler * to the data structure in the ring buffer, and is defined by the @@ -288,6 +300,7 @@ TRACE_EVENT(foo_bar, __dynamic_array(int, list, __length_of(lst)) __string( str, string ) __bitmask( cpus, num_possible_cpus() ) + __cpumask( cpum ) __vstring( vstr, fmt, va ) ), @@ -299,9 +312,10 @@ TRACE_EVENT(foo_bar, __assign_str(str, string); __assign_vstr(vstr, fmt, va); __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); + __assign_cpumask(cpum, cpumask_bits(mask)); ), - TP_printk("foo %s %d %s %s %s %s (%s) %s", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -345,7 +359,8 @@ TRACE_EVENT(foo_bar, __print_array(__get_dynamic_array(list), __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), - __get_str(str), __get_bitmask(cpus), __get_str(vstr)) + __get_str(str), __get_bitmask(cpus), __get_cpumask(cpus), + __get_str(vstr)) ); /* @@ -542,15 +557,16 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, TRACE_EVENT(foo_rel_loc, - TP_PROTO(const char *foo, int bar, unsigned long *mask), + TP_PROTO(const char *foo, int bar, unsigned long *mask, const cpumask_t *cpus), - TP_ARGS(foo, bar, mask), + TP_ARGS(foo, bar, mask, cpus), TP_STRUCT__entry( __rel_string( foo, foo ) __field( int, bar ) __rel_bitmask( bitmask, BITS_PER_BYTE * sizeof(unsigned long) ) + __rel_cpumask( cpumask ) ), TP_fast_assign( @@ -558,10 +574,12 @@ TRACE_EVENT(foo_rel_loc, __entry->bar = bar; __assign_rel_bitmask(bitmask, mask, BITS_PER_BYTE * sizeof(unsigned long)); + __assign_rel_cpumask(cpumask, cpus); ), - TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar, - __get_rel_bitmask(bitmask)) + TP_printk("foo_rel_loc %s, %d, %s, %s", __get_rel_str(foo), __entry->bar, + __get_rel_bitmask(bitmask), + __get_rel_cpumask(cpumask)) ); #endif From a01fdc897fa56ffb596d0a0cd7ea2ab3bd8398c5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 20 Oct 2022 21:00:56 -0400 Subject: [PATCH 2554/4122] tracing: Add trace_trigger kernel command line option Allow triggers to be enabled at kernel boot up. For example: trace_trigger="sched_switch.stacktrace if prev_state == 2" The above will enable the stacktrace trigger on top of the sched_switch event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then at boot up, a stacktrace will trigger and be recorded in the tracing ring buffer every time the sched_switch happens where the previous state is TASK_INTERRUPTIBLE. Another useful trigger would be "traceoff" which can stop tracing on an event if a field of the event matches a certain value defined by the filter ("if" statement). Link: https://lore.kernel.org/linux-trace-kernel/20221020210056.0d8d0a5b@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../admin-guide/kernel-parameters.txt | 19 +++++ kernel/trace/trace_events.c | 72 ++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..ccf91a4bf113 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6257,6 +6257,25 @@ See also Documentation/trace/ftrace.rst "trace options" section. + trace_trigger=[trigger-list] + [FTRACE] Add a event trigger on specific events. + Set a trigger on top of a specific event, with an optional + filter. + + The format is is "trace_trigger=.[ if ],..." + Where more than one trigger may be specified that are comma deliminated. + + For example: + + trace_trigger="sched_switch.stacktrace if prev_state == 2" + + The above will enable the "stacktrace" trigger on the "sched_switch" + event but only trigger it if the "prev_state" of the "sched_switch" + event is "2" (TASK_UNINTERUPTIBLE). + + See also "Event triggers" in Documentation/trace/events.rst + + traceoff_on_warning [FTRACE] enable this option to disable tracing when a warning is hit. This turns off "tracing_on". Tracing can diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f71ea6e79b3c..3bfaf560ecc4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call, return file; } +#ifdef CONFIG_HIST_TRIGGERS +#define MAX_BOOT_TRIGGERS 32 + +static struct boot_triggers { + const char *event; + char *trigger; +} bootup_triggers[MAX_BOOT_TRIGGERS]; + +static char bootup_trigger_buf[COMMAND_LINE_SIZE]; +static int nr_boot_triggers; + +static __init int setup_trace_triggers(char *str) +{ + char *trigger; + char *buf; + int i; + + strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); + ring_buffer_expanded = true; + disable_tracing_selftest("running event triggers"); + + buf = bootup_trigger_buf; + for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { + trigger = strsep(&buf, ","); + if (!trigger) + break; + bootup_triggers[i].event = strsep(&trigger, "."); + bootup_triggers[i].trigger = strsep(&trigger, "."); + if (!bootup_triggers[i].trigger) + break; + } + + nr_boot_triggers = i; + return 1; +} +__setup("trace_trigger=", setup_trace_triggers); +#endif + /* Add an event to a trace directory */ static int __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) @@ -2812,6 +2850,28 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) return event_define_fields(call); } +#ifdef CONFIG_HIST_TRIGGERS +static void trace_early_triggers(struct trace_event_file *file, const char *name) +{ + int ret; + int i; + + for (i = 0; i < nr_boot_triggers; i++) { + if (strcmp(name, bootup_triggers[i].event)) + continue; + mutex_lock(&event_mutex); + ret = trigger_process_regex(file, bootup_triggers[i].trigger); + mutex_unlock(&event_mutex); + if (ret) + pr_err("Failed to register trigger '%s' on event %s\n", + bootup_triggers[i].trigger, + bootup_triggers[i].event); + } +} +#else +static inline void trace_early_triggers(struct trace_event_file *file, const char *name) { } +#endif + /* * Just create a descriptor for early init. A descriptor is required * for enabling events at boot. We want to enable events before @@ -2822,12 +2882,19 @@ __trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; + int ret; file = trace_create_new_event(call, tr); if (!file) return -ENOMEM; - return event_define_fields(call); + ret = event_define_fields(call); + if (ret) + return ret; + + trace_early_triggers(file, trace_event_name(call)); + + return 0; } struct ftrace_module_file_ops; @@ -3735,6 +3802,8 @@ static __init int event_trace_enable(void) list_add(&call->list, &ftrace_events); } + register_trigger_cmds(); + /* * We need the top trace array to have a working set of trace * points at early init, before the debug files and directories @@ -3749,7 +3818,6 @@ static __init int event_trace_enable(void) register_event_cmds(); - register_trigger_cmds(); return 0; } From 04aabc32fb677f91d676fd306bca1043805e78d5 Mon Sep 17 00:00:00 2001 From: Song Chen Date: Thu, 20 Oct 2022 22:06:51 +0800 Subject: [PATCH 2555/4122] ring_buffer: Remove unused "event" parameter After commit a389d86f7fd0 ("ring-buffer: Have nested events still record running time stamp"), the "event" parameter is no longer used in either ring_buffer_unlock_commit() or rb_commit(). Best to remove it. Link: https://lkml.kernel.org/r/1666274811-24138-1-git-send-email-chensong_2000@189.cn Signed-off-by: Song Chen Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 +-- kernel/trace/ring_buffer.c | 12 +++++------- kernel/trace/ring_buffer_benchmark.c | 2 +- kernel/trace/trace.c | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c7d295746f6..782e14f62201 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -113,8 +113,7 @@ void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val); struct ring_buffer_event *ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length); -int ring_buffer_unlock_commit(struct trace_buffer *buffer, - struct ring_buffer_event *event); +int ring_buffer_unlock_commit(struct trace_buffer *buffer); int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b21bf14bae9b..843818ee4814 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3180,8 +3180,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event) event->time_delta = 1; } -static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) +static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) { local_inc(&cpu_buffer->entries); rb_end_commit(cpu_buffer); @@ -3383,15 +3382,14 @@ void ring_buffer_nest_end(struct trace_buffer *buffer) * * Must be paired with ring_buffer_lock_reserve. */ -int ring_buffer_unlock_commit(struct trace_buffer *buffer, - struct ring_buffer_event *event) +int ring_buffer_unlock_commit(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu = raw_smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; - rb_commit(cpu_buffer, event); + rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); @@ -3977,7 +3975,7 @@ int ring_buffer_write(struct trace_buffer *buffer, memcpy(body, data, length); - rb_commit(cpu_buffer, event); + rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); @@ -5998,7 +5996,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) } out: - ring_buffer_unlock_commit(data->buffer, event); + ring_buffer_unlock_commit(data->buffer); return 0; } diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 78e576575b79..aef34673d79d 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -258,7 +258,7 @@ static void ring_buffer_producer(void) hit++; entry = ring_buffer_event_data(event); *entry = smp_processor_id(); - ring_buffer_unlock_commit(buffer, event); + ring_buffer_unlock_commit(buffer); } } end_time = ktime_get(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5cfc95a52bc3..5c97dbef741b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -999,7 +999,7 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev /* ring_buffer_unlock_commit() enables preemption */ preempt_enable_notrace(); } else - ring_buffer_unlock_commit(buffer, event); + ring_buffer_unlock_commit(buffer); } /** From b179d48b6aab21f3999f5006685ea4254c0618a9 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 17 Nov 2022 14:46:18 +0100 Subject: [PATCH 2556/4122] tracing/osnoise: Add osnoise/options file Add the tracing/osnoise/options file to control osnoise/timerlat tracer features. It is a single file to contain multiple features, similar to the sched/features file. Reading the file displays a list of options. Writing the OPTION_NAME enables it, writing NO_OPTION_NAME disables it. The DEAFULTS is a particular option that resets the options to the default ones. It uses a bitmask to keep track of the status of the option. When needed, we can add a list of static keys, but for now it does not justify the memory increase. Link: https://lkml.kernel.org/r/f8d34aefdb225d2603fcb4c02a120832a0cd3339.1668692096.git.bristot@kernel.org Cc: Daniel Bristot de Oliveira Cc: Masami Hiramatsu Cc: Jonathan Corbet Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 170 +++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 4300c5dc4e5d..17b77fe3950b 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -48,6 +48,19 @@ #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ +/* + * osnoise/options entries. + */ +enum osnoise_options_index { + OSN_DEFAULTS = 0, + OSN_MAX +}; + +static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS" }; + +#define OSN_DEFAULT_OPTIONS 0 +unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; + /* * trace_array of the enabled osnoise/timerlat instances. */ @@ -1860,6 +1873,150 @@ static void osnoise_init_hotplug_support(void) } #endif /* CONFIG_HOTPLUG_CPU */ +/* + * seq file functions for the osnoise/options file. + */ +static void *s_options_start(struct seq_file *s, loff_t *pos) +{ + int option = *pos; + + mutex_lock(&interface_lock); + + if (option >= OSN_MAX) + return NULL; + + return pos; +} + +static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) +{ + int option = ++(*pos); + + if (option >= OSN_MAX) + return NULL; + + return pos; +} + +static int s_options_show(struct seq_file *s, void *v) +{ + loff_t *pos = v; + int option = *pos; + + if (option == OSN_DEFAULTS) { + if (osnoise_options == OSN_DEFAULT_OPTIONS) + seq_printf(s, "%s", osnoise_options_str[option]); + else + seq_printf(s, "NO_%s", osnoise_options_str[option]); + goto out; + } + + if (test_bit(option, &osnoise_options)) + seq_printf(s, "%s", osnoise_options_str[option]); + else + seq_printf(s, "NO_%s", osnoise_options_str[option]); + +out: + if (option != OSN_MAX) + seq_puts(s, " "); + + return 0; +} + +static void s_options_stop(struct seq_file *s, void *v) +{ + seq_puts(s, "\n"); + mutex_unlock(&interface_lock); +} + +static const struct seq_operations osnoise_options_seq_ops = { + .start = s_options_start, + .next = s_options_next, + .show = s_options_show, + .stop = s_options_stop +}; + +static int osnoise_options_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &osnoise_options_seq_ops); +}; + +/** + * osnoise_options_write - Write function for "options" entry + * @filp: The active open file structure + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in @file + * + * Writing the option name sets the option, writing the "NO_" + * prefix in front of the option name disables it. + * + * Writing "DEFAULTS" resets the option values to the default ones. + */ +static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int running, option, enable, retval; + char buf[256], *option_str; + + if (cnt >= 256) + return -EINVAL; + + if (copy_from_user(buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (strncmp(buf, "NO_", 3)) { + option_str = strstrip(buf); + enable = true; + } else { + option_str = strstrip(&buf[3]); + enable = false; + } + + option = match_string(osnoise_options_str, OSN_MAX, option_str); + if (option < 0) + return -EINVAL; + + /* + * trace_types_lock is taken to avoid concurrency on start/stop. + */ + mutex_lock(&trace_types_lock); + running = osnoise_has_registered_instances(); + if (running) + stop_per_cpu_kthreads(); + + mutex_lock(&interface_lock); + /* + * avoid CPU hotplug operations that might read options. + */ + cpus_read_lock(); + + retval = cnt; + + if (enable) { + if (option == OSN_DEFAULTS) + osnoise_options = OSN_DEFAULT_OPTIONS; + else + set_bit(option, &osnoise_options); + } else { + if (option == OSN_DEFAULTS) + retval = -EINVAL; + else + clear_bit(option, &osnoise_options); + } + + cpus_read_unlock(); + mutex_unlock(&interface_lock); + + if (running) + start_per_cpu_kthreads(); + mutex_unlock(&trace_types_lock); + + return retval; +} + /* * osnoise_cpus_read - Read function for reading the "cpus" file * @filp: The active open file structure @@ -2042,6 +2199,14 @@ static const struct file_operations cpus_fops = { .llseek = generic_file_llseek, }; +static const struct file_operations osnoise_options_fops = { + .open = osnoise_options_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = osnoise_options_write +}; + #ifdef CONFIG_TIMERLAT_TRACER #ifdef CONFIG_STACKTRACE static int init_timerlat_stack_tracefs(struct dentry *top_dir) @@ -2128,6 +2293,11 @@ static int init_tracefs(void) if (!tmp) goto err; + tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, + &osnoise_options_fops); + if (!tmp) + goto err; + ret = init_timerlat_tracefs(top_dir); if (ret) goto err; From 30838fcd81078d078b10209bc18a6357ba4dd5fa Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 17 Nov 2022 14:46:19 +0100 Subject: [PATCH 2557/4122] tracing/osnoise: Add OSNOISE_WORKLOAD option The osnoise tracer is not only a tracer, and a set of tracepoints, but also a workload dispatcher. In preparation for having other workloads, e.g., in user-space, add an option to avoid dispatching the workload. By not dispatching the workload, the osnoise: tracepoints become generic events to measure the execution time of *any* task on Linux. For example: # cd /sys/kernel/tracing/ # cat osnoise/options DEFAULTS OSNOISE_WORKLOAD # echo NO_OSNOISE_WORKLOAD > osnoise/options # cat osnoise/options NO_DEFAULTS NO_OSNOISE_WORKLOAD # echo osnoise > set_event # echo osnoise > current_tracer # tail -8 trace make-94722 [002] d..3. 1371.794507: thread_noise: make:94722 start 1371.794302286 duration 200897 ns sh-121042 [020] d..3. 1371.794534: thread_noise: sh:121042 start 1371.781610976 duration 8943683 ns make-121097 [005] d..3. 1371.794542: thread_noise: make:121097 start 1371.794481522 duration 60444 ns <...>-40 [005] d..3. 1371.794550: thread_noise: migration/5:40 start 1371.794542256 duration 7154 ns -0 [018] dNh2. 1371.794554: irq_noise: reschedule:253 start 1371.794553547 duration 40 ns -0 [018] dNh2. 1371.794561: irq_noise: local_timer:236 start 1371.794556222 duration 4890 ns -0 [018] .Ns2. 1371.794563: softirq_noise: SCHED:7 start 1371.794561803 duration 992 ns -0 [018] d..3. 1371.794566: thread_noise: swapper/18:0 start 1371.781368110 duration 13191798 ns In preparation for the rtla exec_time tracer/tool and rtla osnoise --user option. Link: https://lkml.kernel.org/r/f5cfbd37aefd419eefe9243b4d2fc38ed5753fe4.1668692096.git.bristot@kernel.org Cc: Daniel Bristot de Oliveira Cc: Masami Hiramatsu Cc: Jonathan Corbet Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 17b77fe3950b..3f10dd1f2f1c 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -53,12 +53,13 @@ */ enum osnoise_options_index { OSN_DEFAULTS = 0, + OSN_WORKLOAD, OSN_MAX }; -static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS" }; +static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" }; -#define OSN_DEFAULT_OPTIONS 0 +#define OSN_DEFAULT_OPTIONS 0x2 unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; /* @@ -1186,11 +1187,12 @@ trace_sched_switch_callback(void *data, bool preempt, unsigned int prev_state) { struct osnoise_variables *osn_var = this_cpu_osn_var(); + int workload = test_bit(OSN_WORKLOAD, &osnoise_options); - if (p->pid != osn_var->pid) + if ((p->pid != osn_var->pid) || !workload) thread_exit(osn_var, p); - if (n->pid != osn_var->pid) + if ((n->pid != osn_var->pid) || !workload) thread_entry(osn_var, n); } @@ -1723,9 +1725,16 @@ static void stop_kthread(unsigned int cpu) struct task_struct *kthread; kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; - if (kthread) + if (kthread) { kthread_stop(kthread); - per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + } else { + if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { + per_cpu(per_cpu_osnoise_var, cpu).sampling = false; + barrier(); + return; + } + } } /* @@ -1759,6 +1768,13 @@ static int start_kthread(unsigned int cpu) snprintf(comm, 24, "timerlat/%d", cpu); main = timerlat_main; } else { + /* if no workload, just return */ + if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { + per_cpu(per_cpu_osnoise_var, cpu).sampling = true; + barrier(); + return 0; + } + snprintf(comm, 24, "osnoise/%d", cpu); } From 67543cd6b8eee53959e624b9ce420ca4d47be0c8 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 17 Nov 2022 14:46:20 +0100 Subject: [PATCH 2558/4122] Documentation/osnoise: Add osnoise/options documentation Add the documentation about the osnoise/options file, along with an explanation about the OSNOISE_WORKLOAD option. Link: https://lkml.kernel.org/r/777af8f3d87beedd304805f98eff6c8291d64226.1668692096.git.bristot@kernel.org Cc: Daniel Bristot de Oliveira Cc: Masami Hiramatsu Cc: Jonathan Corbet Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/osnoise-tracer.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst index 963def9f97c6..3c675ed82b27 100644 --- a/Documentation/trace/osnoise-tracer.rst +++ b/Documentation/trace/osnoise-tracer.rst @@ -109,6 +109,11 @@ The tracer has a set of options inside the osnoise directory, they are: - tracing_threshold: the minimum delta between two time() reads to be considered as noise, in us. When set to 0, the default value will be used, which is currently 5 us. + - osnoise/options: a set of on/off options that can be enabled by + writing the option name to the file or disabled by writing the option + name preceded with the 'NO_' prefix. For example, writing + NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The + special DEAFAULTS option resets all options to the default value. Additional Tracing ------------------ @@ -150,3 +155,10 @@ tracepoints is smaller than eight us reported in the sample_threshold. The reason roots in the overhead of the entry and exit code that happens before and after any interference execution. This justifies the dual approach: measuring thread and tracing. + +Running osnoise tracer without workload +--------------------------------------- + +By enabling the osnoise tracer with the NO_OSNOISE_WORKLOAD option set, +the osnoise: tracepoints serve to measure the execution time of +any type of Linux task, free from the interference of other tasks. From 9430cd62b6ccdf1f06915cc06561f0e364809604 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Mon, 21 Nov 2022 16:08:31 +0800 Subject: [PATCH 2559/4122] tracing/perf: Use strndup_user instead of kzalloc/strncpy_from_user This patch uses strndup_user instead of kzalloc + strncpy_from_user, which makes the code more concise. Link: https://lkml.kernel.org/r/20221121080831.707409-1-nashuiliang@gmail.com Signed-off-by: Chuang Wang Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_event_perf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 61e3a2620fa3..05e791241812 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -251,16 +251,12 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) struct trace_event_call *tp_event; if (p_event->attr.kprobe_func) { - func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL); - if (!func) - return -ENOMEM; - ret = strncpy_from_user( - func, u64_to_user_ptr(p_event->attr.kprobe_func), - KSYM_NAME_LEN); - if (ret == KSYM_NAME_LEN) - ret = -E2BIG; - if (ret < 0) - goto out; + func = strndup_user(u64_to_user_ptr(p_event->attr.kprobe_func), + KSYM_NAME_LEN); + if (IS_ERR(func)) { + ret = PTR_ERR(func); + return (ret == -EINVAL) ? -E2BIG : ret; + } if (func[0] == '\0') { kfree(func); From a76d4648a0bbd624a1322c15073b5032a5135f01 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Tue, 22 Nov 2022 17:14:56 +0800 Subject: [PATCH 2560/4122] tracing: Make tracepoint_print_iter static After change in commit 4239174570da ("tracing: Make tracepoint_printk a static_key"), this symbol is not used outside of the file, so mark it static. Link: https://lkml.kernel.org/r/20221122091456.72055-1-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c97dbef741b..93a75a97118f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -85,7 +85,7 @@ void __init disable_tracing_selftest(const char *reason) #endif /* Pipe tracepoints to printk */ -struct trace_iterator *tracepoint_print_iter; +static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d42e24507152..48643f07bc01 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1942,8 +1942,6 @@ static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -extern struct trace_iterator *tracepoint_print_iter; - /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not From 96e6122cb79616c622ae0d025eb9f981120b568d Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 10 Nov 2022 10:03:19 +0800 Subject: [PATCH 2561/4122] tracing: Optimize event type allocation with IDA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 060fa5c83e67 ("tracing/events: reuse trace event ids after overflow"), trace events with dynamic type are linked up in list 'ftrace_event_list' through field 'trace_event.list'. Then when max event type number used up, it's possible to reuse type number of some freed one by traversing 'ftrace_event_list'. As instead, using IDA to manage available type numbers can make codes simpler and then the field 'trace_event.list' can be dropped. Since 'struct trace_event' is used in static tracepoints, drop 'trace_event.list' can make vmlinux smaller. Local test with about 2000 tracepoints, vmlinux reduced about 64KB: before:-rwxrwxr-x 1 root root 76669448 Nov 8 17:14 vmlinux after: -rwxrwxr-x 1 root root 76604176 Nov 8 17:15 vmlinux Link: https://lkml.kernel.org/r/20221110020319.1259291-1-zhengyejian1@huawei.com Signed-off-by: Zheng Yejian Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 - kernel/trace/trace_output.c | 66 +++++++++--------------------------- 2 files changed, 16 insertions(+), 51 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 20749bd9db71..bb2053246d6a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -136,7 +136,6 @@ struct trace_event_functions { struct trace_event { struct hlist_node node; - struct list_head list; int type; struct trace_event_functions *funcs; }; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 67f47ea27921..f0ba97121345 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "trace_output.h" @@ -21,8 +22,6 @@ DECLARE_RWSEM(trace_event_sem); static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; -static int next_event_type = __TRACE_LAST_TYPE; - enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -688,38 +687,23 @@ struct trace_event *ftrace_find_event(int type) return NULL; } -static LIST_HEAD(ftrace_event_list); +static DEFINE_IDA(trace_event_ida); -static int trace_search_list(struct list_head **list) +static void free_trace_event_type(int type) { - struct trace_event *e = NULL, *iter; - int next = __TRACE_LAST_TYPE; + if (type >= __TRACE_LAST_TYPE) + ida_free(&trace_event_ida, type); +} - if (list_empty(&ftrace_event_list)) { - *list = &ftrace_event_list; - return next; - } +static int alloc_trace_event_type(void) +{ + int next; - /* - * We used up all possible max events, - * lets see if somebody freed one. - */ - list_for_each_entry(iter, &ftrace_event_list, list) { - if (iter->type != next) { - e = iter; - break; - } - next++; - } - - /* Did we used up all 65 thousand events??? */ - if (next > TRACE_EVENT_TYPE_MAX) + /* Skip static defined type numbers */ + next = ida_alloc_range(&trace_event_ida, __TRACE_LAST_TYPE, + TRACE_EVENT_TYPE_MAX, GFP_KERNEL); + if (next < 0) return 0; - - if (e) - *list = &e->list; - else - *list = &ftrace_event_list; return next; } @@ -761,28 +745,10 @@ int register_trace_event(struct trace_event *event) if (WARN_ON(!event->funcs)) goto out; - INIT_LIST_HEAD(&event->list); - if (!event->type) { - struct list_head *list = NULL; - - if (next_event_type > TRACE_EVENT_TYPE_MAX) { - - event->type = trace_search_list(&list); - if (!event->type) - goto out; - - } else { - - event->type = next_event_type++; - list = &ftrace_event_list; - } - - if (WARN_ON(ftrace_find_event(event->type))) + event->type = alloc_trace_event_type(); + if (!event->type) goto out; - - list_add_tail(&event->list, list); - } else if (WARN(event->type > __TRACE_LAST_TYPE, "Need to add type to trace.h")) { goto out; @@ -819,7 +785,7 @@ EXPORT_SYMBOL_GPL(register_trace_event); int __unregister_trace_event(struct trace_event *event) { hlist_del(&event->node); - list_del(&event->list); + free_trace_event_type(event->type); return 0; } From bd604f3db49c5b21171abea0414a2020dcbf2646 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 22 Nov 2022 18:09:05 -0500 Subject: [PATCH 2562/4122] ftrace: Avoid needless updates of the ftrace function call Song Shuai reported: The list func (ftrace_ops_list_func) will be patched first before the transition between old and new calls are set, which fixed the race described in this commit `59338f75`. While ftrace_trace_function changes from the list func to a ftrace_ops func, like unregistering the klp_ops to leave the only global_ops in ftrace_ops_list, the ftrace_[regs]_call will be replaced with the list func although it already exists. So there should be a condition to avoid this. And suggested using another variable to keep track of what the ftrace function is set to. But this could be simplified by using a helper function that does the same with a static variable. Link: https://lore.kernel.org/lkml/20221026132039.2236233-1-suagrfillet@gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221122180905.737b6f52@gandalf.local.home Reported-by: Song Shuai Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65a5d36463e0..d04552c0c275 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2763,6 +2763,19 @@ void __weak ftrace_arch_code_modify_post_process(void) { } +static int update_ftrace_func(ftrace_func_t func) +{ + static ftrace_func_t save_func; + + /* Avoid updating if it hasn't changed */ + if (func == save_func) + return 0; + + save_func = func; + + return ftrace_update_ftrace_func(func); +} + void ftrace_modify_all_code(int command) { int update = command & FTRACE_UPDATE_TRACE_FUNC; @@ -2783,7 +2796,7 @@ void ftrace_modify_all_code(int command) * traced. */ if (update) { - err = ftrace_update_ftrace_func(ftrace_ops_list_func); + err = update_ftrace_func(ftrace_ops_list_func); if (FTRACE_WARN_ON(err)) return; } @@ -2799,7 +2812,7 @@ void ftrace_modify_all_code(int command) /* If irqs are disabled, we are in stop machine */ if (!irqs_disabled()) smp_call_function(ftrace_sync_ipi, NULL, 1); - err = ftrace_update_ftrace_func(ftrace_trace_function); + err = update_ftrace_func(ftrace_trace_function); if (FTRACE_WARN_ON(err)) return; } From d57d98fef46fead01d954afa1b585405b617a4e4 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 15 Nov 2022 17:19:21 -0800 Subject: [PATCH 2563/4122] scsi: lpfc: Update lpfc version to 14.2.0.9 Update lpfc version to 14.2.0.9. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20221116011921.105995-7-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 378eba7b09d9..41a1128f8651 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.2.0.8" +#define LPFC_DRIVER_VERSION "14.2.0.9" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From cdd9344e00b4fe3a4683a0ee58826c7a5ce778e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:33:59 +0100 Subject: [PATCH 2564/4122] scsi: lpfc: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221113202428.436270297@linutronix.de Cc: James Smart Cc: Dick Kennedy Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Reviewed-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a119c06742b8..9d595d37d6ca 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include From 7870d24817890bccee98db0718acececd6399d04 Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Mon, 14 Nov 2022 13:24:58 +0300 Subject: [PATCH 2565/4122] scsi: target: core: Send max transfer length in blocks A MAXIMUM TRANSFER LENGTH value indicates the maximum transfer length in logical blocks that the device server accepts for a single command. Fix function sending the length in sectors instead of blocks. This patch also removes the special casing for fileio in block_size_store since this logic in now unified in spc_emulate_evpd_b0() for all backends. Reviewed-by: Konstantin Shelekhin Reviewed-by: Dmitriy Bogdanov Signed-off-by: Anastasia Kovaleva Link: https://lore.kernel.org/r/20221114102500.88892-2-a.kovaleva@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 2 -- drivers/target/target_core_file.c | 1 - drivers/target/target_core_spc.c | 6 +++++- include/target/target_core_base.h | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index b8a5c8d6cfde..611b0424e305 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1102,8 +1102,6 @@ static ssize_t block_size_store(struct config_item *item, } da->block_size = val; - if (da->max_bytes_per_io) - da->hw_max_sectors = da->max_bytes_per_io / val; pr_debug("dev[%p]: SE Device block_size changed to %u\n", da->da_dev, val); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 28aa643be5d5..f9aed9fa8ced 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -193,7 +193,6 @@ static int fd_configure_device(struct se_device *dev) } dev->dev_attrib.hw_block_size = fd_dev->fd_block_size; - dev->dev_attrib.max_bytes_per_io = FD_MAX_BYTES; dev->dev_attrib.hw_max_sectors = FD_MAX_BYTES / fd_dev->fd_block_size; dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 621a460ba234..fcc7b10a7ae3 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -515,6 +515,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) struct se_device *dev = cmd->se_dev; u32 mtl = 0; int have_tp = 0, opt, min; + u32 io_max_blocks; /* * Following spc3r22 section 6.5.3 Block Limits VPD page, when @@ -553,7 +554,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE) / dev->dev_attrib.block_size; } - put_unaligned_be32(min_not_zero(mtl, dev->dev_attrib.hw_max_sectors), &buf[8]); + io_max_blocks = mult_frac(dev->dev_attrib.hw_max_sectors, + dev->dev_attrib.hw_block_size, + dev->dev_attrib.block_size); + put_unaligned_be32(min_not_zero(mtl, io_max_blocks), &buf[8]); /* * Set OPTIMAL TRANSFER LENGTH diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 0c1e43980985..12c9ba16217e 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -712,7 +712,6 @@ struct se_dev_attrib { u32 unmap_granularity; u32 unmap_granularity_alignment; u32 max_write_same_len; - u32 max_bytes_per_io; struct se_device *da_dev; struct config_group da_group; }; From 9375031ee40b66c8fd2fc24d5fbea47b69f53de6 Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Mon, 14 Nov 2022 13:24:59 +0300 Subject: [PATCH 2566/4122] scsi: target: core: Make hw_max_sectors store the sectors amount in blocks By default, hw_max_sectors stores its value in 512 blocks in iblock, despite the fact that the block size can be 4096 bytes. Change hw_max_sectors to store the number of sectors in hw_block_size blocks. Reviewed-by: Konstantin Shelekhin Reviewed-by: Dmitriy Bogdanov Signed-off-by: Anastasia Kovaleva Link: https://lore.kernel.org/r/20221114102500.88892-3-a.kovaleva@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_iblock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 8351c974cee3..2a704926edb9 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -124,7 +124,9 @@ static int iblock_configure_device(struct se_device *dev) q = bdev_get_queue(bd); dev->dev_attrib.hw_block_size = bdev_logical_block_size(bd); - dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); + dev->dev_attrib.hw_max_sectors = mult_frac(queue_max_hw_sectors(q), + SECTOR_SIZE, + dev->dev_attrib.hw_block_size); dev->dev_attrib.hw_queue_depth = q->nr_requests; /* From 689d94ec208cfdf95101d99319cb4bdc5f55774d Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Mon, 14 Nov 2022 13:25:00 +0300 Subject: [PATCH 2567/4122] scsi: target: core: Change the way target_xcopy_do_work() sets restiction on max I/O To determine how many blocks sends in one command, the minimum value is selected from the hw_max_sectors of both devices. In target_xcopy_do_work, hw_max_sectors are used as blocks, not sectors; it also ignores the fact that sectors can be of different sizes, for example 512 and 4096 bytes. Because of this, a number of blocks can be transmitted that the device will not be able to accept. Change the selection of max transmission size into bytes. Reviewed-by: Konstantin Shelekhin Reviewed-by: Dmitriy Bogdanov Signed-off-by: Anastasia Kovaleva Link: https://lore.kernel.org/r/20221114102500.88892-4-a.kovaleva@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 97 ++++++++++++++++-------------- drivers/target/target_core_xcopy.h | 2 +- 2 files changed, 54 insertions(+), 45 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index edf522208285..49eaee022ef1 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -582,11 +582,11 @@ static int target_xcopy_read_source( struct xcopy_op *xop, struct se_device *src_dev, sector_t src_lba, - u32 src_sectors) + u32 src_bytes) { struct xcopy_pt_cmd xpt_cmd; struct se_cmd *se_cmd = &xpt_cmd.se_cmd; - u32 length = (src_sectors * src_dev->dev_attrib.block_size); + u32 transfer_length_block = src_bytes / src_dev->dev_attrib.block_size; int rc; unsigned char cdb[16]; bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP); @@ -597,11 +597,11 @@ static int target_xcopy_read_source( memset(&cdb[0], 0, 16); cdb[0] = READ_16; put_unaligned_be64(src_lba, &cdb[2]); - put_unaligned_be32(src_sectors, &cdb[10]); - pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", - (unsigned long long)src_lba, src_sectors, length); + put_unaligned_be32(transfer_length_block, &cdb[10]); + pr_debug("XCOPY: Built READ_16: LBA: %llu Blocks: %u Length: %u\n", + (unsigned long long)src_lba, transfer_length_block, src_bytes); - __target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, + __target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, src_bytes, DMA_FROM_DEVICE, 0, &xpt_cmd.sense_buffer[0], 0); rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, src_dev, &cdb[0], @@ -627,11 +627,11 @@ static int target_xcopy_write_destination( struct xcopy_op *xop, struct se_device *dst_dev, sector_t dst_lba, - u32 dst_sectors) + u32 dst_bytes) { struct xcopy_pt_cmd xpt_cmd; struct se_cmd *se_cmd = &xpt_cmd.se_cmd; - u32 length = (dst_sectors * dst_dev->dev_attrib.block_size); + u32 transfer_length_block = dst_bytes / dst_dev->dev_attrib.block_size; int rc; unsigned char cdb[16]; bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP); @@ -642,11 +642,11 @@ static int target_xcopy_write_destination( memset(&cdb[0], 0, 16); cdb[0] = WRITE_16; put_unaligned_be64(dst_lba, &cdb[2]); - put_unaligned_be32(dst_sectors, &cdb[10]); - pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", - (unsigned long long)dst_lba, dst_sectors, length); + put_unaligned_be32(transfer_length_block, &cdb[10]); + pr_debug("XCOPY: Built WRITE_16: LBA: %llu Blocks: %u Length: %u\n", + (unsigned long long)dst_lba, transfer_length_block, dst_bytes); - __target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, + __target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, dst_bytes, DMA_TO_DEVICE, 0, &xpt_cmd.sense_buffer[0], 0); rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, dst_dev, &cdb[0], @@ -670,9 +670,10 @@ static void target_xcopy_do_work(struct work_struct *work) struct se_cmd *ec_cmd = xop->xop_se_cmd; struct se_device *src_dev, *dst_dev; sector_t src_lba, dst_lba, end_lba; - unsigned int max_sectors; + unsigned long long max_bytes, max_bytes_src, max_bytes_dst, max_blocks; int rc = 0; - unsigned short nolb, max_nolb, copied_nolb = 0; + unsigned short nolb; + unsigned int copied_bytes = 0; sense_reason_t sense_rc; sense_rc = target_parse_xcopy_cmd(xop); @@ -691,23 +692,31 @@ static void target_xcopy_do_work(struct work_struct *work) nolb = xop->nolb; end_lba = src_lba + nolb; /* - * Break up XCOPY I/O into hw_max_sectors sized I/O based on the - * smallest max_sectors between src_dev + dev_dev, or + * Break up XCOPY I/O into hw_max_sectors * hw_block_size sized + * I/O based on the smallest max_bytes between src_dev + dst_dev */ - max_sectors = min(src_dev->dev_attrib.hw_max_sectors, - dst_dev->dev_attrib.hw_max_sectors); - max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS); + max_bytes_src = (unsigned long long) src_dev->dev_attrib.hw_max_sectors * + src_dev->dev_attrib.hw_block_size; + max_bytes_dst = (unsigned long long) dst_dev->dev_attrib.hw_max_sectors * + dst_dev->dev_attrib.hw_block_size; - max_nolb = min_t(u16, max_sectors, ((u16)(~0U))); + max_bytes = min_t(u64, max_bytes_src, max_bytes_dst); + max_bytes = min_t(u64, max_bytes, XCOPY_MAX_BYTES); - pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n", - nolb, max_nolb, (unsigned long long)end_lba); - pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n", + /* + * Using shift instead of the division because otherwise GCC + * generates __udivdi3 that is missing on i386 + */ + max_blocks = max_bytes >> ilog2(src_dev->dev_attrib.block_size); + + pr_debug("%s: nolb: %u, max_blocks: %llu end_lba: %llu\n", __func__, + nolb, max_blocks, (unsigned long long)end_lba); + pr_debug("%s: Starting src_lba: %llu, dst_lba: %llu\n", __func__, (unsigned long long)src_lba, (unsigned long long)dst_lba); - while (src_lba < end_lba) { - unsigned short cur_nolb = min(nolb, max_nolb); - u32 cur_bytes = cur_nolb * src_dev->dev_attrib.block_size; + while (nolb) { + u32 cur_bytes = min_t(u64, max_bytes, nolb * src_dev->dev_attrib.block_size); + unsigned short cur_nolb = cur_bytes / src_dev->dev_attrib.block_size; if (cur_bytes != xop->xop_data_bytes) { /* @@ -724,43 +733,43 @@ static void target_xcopy_do_work(struct work_struct *work) xop->xop_data_bytes = cur_bytes; } - pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," - " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); + pr_debug("%s: Calling read src_dev: %p src_lba: %llu, cur_nolb: %hu\n", + __func__, src_dev, (unsigned long long)src_lba, cur_nolb); - rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb); + rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_bytes); if (rc < 0) goto out; - src_lba += cur_nolb; - pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n", + src_lba += cur_bytes / src_dev->dev_attrib.block_size; + pr_debug("%s: Incremented READ src_lba to %llu\n", __func__, (unsigned long long)src_lba); - pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu," - " cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb); + pr_debug("%s: Calling write dst_dev: %p dst_lba: %llu, cur_nolb: %u\n", + __func__, dst_dev, (unsigned long long)dst_lba, cur_nolb); rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev, - dst_lba, cur_nolb); + dst_lba, cur_bytes); if (rc < 0) goto out; - dst_lba += cur_nolb; - pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n", + dst_lba += cur_bytes / dst_dev->dev_attrib.block_size; + pr_debug("%s: Incremented WRITE dst_lba to %llu\n", __func__, (unsigned long long)dst_lba); - copied_nolb += cur_nolb; - nolb -= cur_nolb; + copied_bytes += cur_bytes; + nolb -= cur_bytes / src_dev->dev_attrib.block_size; } xcopy_pt_undepend_remotedev(xop); target_free_sgl(xop->xop_data_sg, xop->xop_data_nents); kfree(xop); - pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", + pr_debug("%s: Final src_lba: %llu, dst_lba: %llu\n", __func__, (unsigned long long)src_lba, (unsigned long long)dst_lba); - pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n", - copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size); + pr_debug("%s: Blocks copied: %u, Bytes Copied: %u\n", __func__, + copied_bytes / dst_dev->dev_attrib.block_size, copied_bytes); - pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n"); + pr_debug("%s: Setting X-COPY GOOD status -> sending response\n", __func__); target_complete_cmd(ec_cmd, SAM_STAT_GOOD); return; @@ -776,8 +785,8 @@ out: err_free: kfree(xop); - pr_warn_ratelimited("target_xcopy_do_work: rc: %d, sense: %u, XCOPY operation failed\n", - rc, sense_rc); + pr_warn_ratelimited("%s: rc: %d, sense: %u, XCOPY operation failed\n", + __func__, rc, sense_rc); target_complete_cmd_with_sense(ec_cmd, SAM_STAT_CHECK_CONDITION, sense_rc); } diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index e5f20005179a..0aad7dc65895 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -5,7 +5,7 @@ #define XCOPY_TARGET_DESC_LEN 32 #define XCOPY_SEGMENT_DESC_LEN 28 #define XCOPY_NAA_IEEE_REGEX_LEN 16 -#define XCOPY_MAX_SECTORS 4096 +#define XCOPY_MAX_BYTES 16777216 /* 16 MB */ /* * SPC4r37 6.4.6.1 From fec1b2fa62c162d03f5dcd7b03e3c89d3116d49f Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 15 Nov 2022 13:56:38 +0100 Subject: [PATCH 2568/4122] scsi: target: iscsi: Fix a race condition between login_work and the login thread In case a malicious initiator sends some random data immediately after a login PDU; the iscsi_target_sk_data_ready() callback will schedule the login_work and, at the same time, the negotiation may end without clearing the LOGIN_FLAGS_INITIAL_PDU flag (because no additional PDU exchanges are required to complete the login). The login has been completed but the login_work function will find the LOGIN_FLAGS_INITIAL_PDU flag set and will never stop from rescheduling itself; at this point, if the initiator drops the connection, the iscsit_conn structure will be freed, login_work will dereference a released socket structure and the kernel crashes. BUG: kernel NULL pointer dereference, address: 0000000000000230 PF: supervisor write access in kernel mode PF: error_code(0x0002) - not-present page Workqueue: events iscsi_target_do_login_rx [iscsi_target_mod] RIP: 0010:_raw_read_lock_bh+0x15/0x30 Call trace: iscsi_target_do_login_rx+0x75/0x3f0 [iscsi_target_mod] process_one_work+0x1e8/0x3c0 Fix this bug by forcing login_work to stop after the login has been completed and the socket callbacks have been restored. Add a comment to clearify the return values of iscsi_target_do_login() Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20221115125638.102517-1-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_nego.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index f2919319ad38..ff49c8f3fe24 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1018,6 +1018,13 @@ static int iscsi_target_handle_csg_one(struct iscsit_conn *conn, struct iscsi_lo return 0; } +/* + * RETURN VALUE: + * + * 1 = Login successful + * -1 = Login failed + * 0 = More PDU exchanges required + */ static int iscsi_target_do_login(struct iscsit_conn *conn, struct iscsi_login *login) { int pdu_count = 0; @@ -1363,12 +1370,13 @@ int iscsi_target_start_negotiation( ret = -1; if (ret < 0) { - cancel_delayed_work_sync(&conn->login_work); iscsi_target_restore_sock_callbacks(conn); iscsi_remove_failed_auth_entry(conn); } - if (ret != 0) + if (ret != 0) { + cancel_delayed_work_sync(&conn->login_work); iscsi_target_nego_release(conn); + } return ret; } From a72629b5cdbc43e28a4a19b0fce8d17c582c4db4 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 21 Nov 2022 10:27:03 +0100 Subject: [PATCH 2569/4122] scsi: target: core: Fix hard lockup when executing a compare-and-write command While handling an I/O completion for the compare portion of a COMPARE_AND_WRITE command, it may happen that the compare_and_write_callback function submits new bio structs while still in softirq context. Low level drivers like md raid5 do not expect their make_request call to be used in softirq context, they call into schedule() and create a deadlocked system. __schedule at ffffffff873a0807 schedule at ffffffff873a0cc5 raid5_get_active_stripe at ffffffffc0875744 [raid456] raid5_make_request at ffffffffc0875a50 [raid456] md_handle_request at ffffffff8713b9f9 md_make_request at ffffffff8713bacb generic_make_request at ffffffff86e6f14b submit_bio at ffffffff86e6f27c iblock_submit_bios at ffffffffc0b4e4dc [target_core_iblock] iblock_execute_rw at ffffffffc0b4f3ce [target_core_iblock] __target_execute_cmd at ffffffffc1090079 [target_core_mod] compare_and_write_callback at ffffffffc1093602 [target_core_mod] target_cmd_interrupted at ffffffffc108d1ec [target_core_mod] target_complete_cmd_with_sense at ffffffffc108d27c [target_core_mod] iblock_complete_cmd at ffffffffc0b4e23a [target_core_iblock] dm_io_dec_pending at ffffffffc00db29e [dm_mod] clone_endio at ffffffffc00dbf07 [dm_mod] raid5_align_endio at ffffffffc086d6c2 [raid456] blk_update_request at ffffffff86e6d950 scsi_end_request at ffffffff87063d48 scsi_io_completion at ffffffff87063ee8 blk_complete_reqs at ffffffff86e77b05 __softirqentry_text_start at ffffffff876000d7 This problem appears to be an issue between target_cmd_interrupted() and compare_and_write_callback(). target_cmd_interrupted() calls the se_cmd's transport_complete_callback function pointer if the se_cmd is being stopped or aborted, and CMD_T_ABORTED was set on the se_cmd. When calling compare_and_write_callback(), the success parameter was set to false. target_cmd_interrupted() seems to expect this means the callback will do cleanup that does not require a process context. But compare_and_write_callback() ignores the parameter if there was I/O done for the compare part of COMPARE_AND_WRITE. Since there was data, the function continued on, passed the compare, and issued a write while ignoring the value of the success parameter. The submit of a bio for the write portion of the COMPARE_AND_WRITE then causes schedule to be unsafely called from the softirq context. Fix the bug in compare_and_write_callback by jumping to the out label if success == "false", after checking if we have been called by transport_generic_request_failure(); The command is being aborted or stopped so there is no need to submit the write bio for the write part of the COMPARE_AND_WRITE command. Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20221121092703.316489-1-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_sbc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 1cd41e3834bb..7536ca797606 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -446,12 +446,22 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes sense_reason_t ret = TCM_NO_SENSE; int i; - /* - * Handle early failure in transport_generic_request_failure(), - * which will not have taken ->caw_sem yet.. - */ - if (!success && (!cmd->t_data_sg || !cmd->t_bidi_data_sg)) - return TCM_NO_SENSE; + if (!success) { + /* + * Handle early failure in transport_generic_request_failure(), + * which will not have taken ->caw_sem yet.. + */ + if (!cmd->t_data_sg || !cmd->t_bidi_data_sg) + return TCM_NO_SENSE; + + /* + * The command has been stopped or aborted so + * we don't have to perform the write operation. + */ + WARN_ON(!(cmd->transport_state & + (CMD_T_ABORTED | CMD_T_STOP))); + goto out; + } /* * Handle special case for zero-length COMPARE_AND_WRITE */ From 0c26a2d7c98039e913e63f9250fde738a3f88a60 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Tue, 22 Nov 2022 18:11:05 +0000 Subject: [PATCH 2570/4122] scsi: iscsi: Rename iscsi_set_param() to iscsi_if_set_param() There are two iscsi_set_param() functions defined in libiscsi.c and scsi_transport_iscsi.c respectively which is confusing. Rename the one in scsi_transport_iscsi.c to iscsi_if_set_param(). Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20221122181105.4123935-1-haowenchao@huawei.com Reviewed-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index cd3db9684e52..c3fe5ecfee59 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2988,7 +2988,7 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev } static int -iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) +iscsi_if_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) { char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; @@ -3941,7 +3941,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) err = -EINVAL; break; case ISCSI_UEVENT_SET_PARAM: - err = iscsi_set_param(transport, ev); + err = iscsi_if_set_param(transport, ev); break; case ISCSI_UEVENT_CREATE_CONN: case ISCSI_UEVENT_DESTROY_CONN: From ec9780e48c77f469c339b53940ef0c5eacc8b9d2 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 23 Nov 2022 20:21:36 +0800 Subject: [PATCH 2571/4122] scsi: core: Increase scsi_device's iodone_cnt in scsi_timeout() If a SCSI command times out and is going to be aborted, we should increase the iodone_cnt of the related scsi_device. Otherwise the iodone_cnt would be smaller than iorequest_cnt. Increasing iodone_cnt in scsi_timeout() would not cause a double accounting issue. Brief analysis follows: - We add the iodone_cnt when BLK_EH_DONE is returned in scsi_timeout(). The related command's timeout event would not happen. - If the abort succeeds and the command is not retried, the command would be completed with scsi_finish_command() which would not increase iodone_cnt. - If the abort succeeds and the command is retried, it would be requeue. A scsi_dispatch_cmd() would be called and iorequest_cnt would be increased again. - If the abort fails, the error handler successfully recovers the device, and the command is not retried, the command would be completed with scsi_finish_command() which would not increase iodone_cnt. - If the abort fails, the error handler successfully recovers the device, and the command is retried, the iorequest_cnt would be increased again. Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20221123122137.150776-2-haowenchao@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index be2a70c5ac6d..613d5aeb1e3c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -354,6 +354,7 @@ enum blk_eh_timer_return scsi_timeout(struct request *req) */ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) return BLK_EH_DONE; + atomic_inc(&scmd->device->iodone_cnt); if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); From cfee29ffb45b1c9798011b19d454637d1b0fe87d Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 23 Nov 2022 20:21:37 +0800 Subject: [PATCH 2572/4122] scsi: core: Do not increase scsi_device's iorequest_cnt if dispatch failed If scsi_dispatch_cmd() failed, the SCSI command was not sent to the target. scsi_queue_rq() would return BLK_STS_RESOURCE if scsi_dispatch_cmd() failed, and the related request would be requeued. The timeout of this request would not fire, so noone would increase iodone_cnt. Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20221123122137.150776-3-haowenchao@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ec890865abae..a29d87e57430 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1464,8 +1464,6 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) struct Scsi_Host *host = cmd->device->host; int rtn = 0; - atomic_inc(&cmd->device->iorequest_cnt); - /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT @@ -1764,6 +1762,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_dec_host_busy; } + atomic_inc(&cmd->device->iorequest_cnt); return BLK_STS_OK; out_dec_host_busy: From 9c9ff300e0de07475796495d86f449340d454a0c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 22 Nov 2022 01:57:51 +0000 Subject: [PATCH 2573/4122] scsi: hpsa: Fix possible memory leak in hpsa_init_one() The hpda_alloc_ctlr_info() allocates h and its field reply_map. However, in hpsa_init_one(), if alloc_percpu() failed, the hpsa_init_one() jumps to clean1 directly, which frees h and leaks the h->reply_map. Fix by calling hpda_free_ctlr_info() to release h->replay_map and h instead free h directly. Fixes: 8b834bff1b73 ("scsi: hpsa: fix selection of reply queue") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221122015751.87284-1-yuancan@huawei.com Reviewed-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index f8e832b1bc46..e5cbc97a5ea4 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -8925,7 +8925,7 @@ clean1: /* wq/aer/h */ destroy_workqueue(h->monitor_ctlr_wq); h->monitor_ctlr_wq = NULL; } - kfree(h); + hpda_free_ctlr_info(h); return rc; } From 8ac813f7e663bcf03e09291517359111d0cf9785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:45 +0100 Subject: [PATCH 2574/4122] gpio: max732x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-max732x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index da6972117030..68e982cdee73 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -608,9 +608,9 @@ static struct max732x_platform_data *of_gpio_max732x(struct device *dev) return pdata; } -static int max732x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max732x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct max732x_platform_data *pdata; struct device_node *node; struct max732x_chip *chip; @@ -707,7 +707,7 @@ static struct i2c_driver max732x_driver = { .name = "max732x", .of_match_table = of_match_ptr(max732x_of_table), }, - .probe = max732x_probe, + .probe_new = max732x_probe, .id_table = max732x_id, }; From 1287341c1980e0cf9eb19bdd370405d755392f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:46 +0100 Subject: [PATCH 2575/4122] gpio: pca953x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index ebe1943b85dd..342800527c14 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1049,9 +1049,9 @@ out: return ret; } -static int pca953x_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int pca953x_probe(struct i2c_client *client) { + const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client); struct pca953x_platform_data *pdata; struct pca953x_chip *chip; int irq_base = 0; @@ -1375,7 +1375,7 @@ static struct i2c_driver pca953x_driver = { .of_match_table = pca953x_dt_ids, .acpi_match_table = pca953x_acpi_ids, }, - .probe = pca953x_probe, + .probe_new = pca953x_probe, .remove = pca953x_remove, .id_table = pca953x_id, }; From 7963ba02b2d1de681ba1ee33060db42eb4cf4c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:47 +0100 Subject: [PATCH 2576/4122] gpio: pcf857x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pcf857x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index e98ea47d7237..cec2f2c78255 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -247,9 +247,9 @@ static const struct irq_chip pcf857x_irq_chip = { /*-------------------------------------------------------------------------*/ -static int pcf857x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pcf857x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct pcf857x_platform_data *pdata = dev_get_platdata(&client->dev); struct device_node *np = client->dev.of_node; struct pcf857x *gpio; @@ -422,7 +422,7 @@ static struct i2c_driver pcf857x_driver = { .name = "pcf857x", .of_match_table = of_match_ptr(pcf857x_of_table), }, - .probe = pcf857x_probe, + .probe_new = pcf857x_probe, .remove = pcf857x_remove, .shutdown = pcf857x_shutdown, .id_table = pcf857x_id, From 61119786de40f61b8843aa57217b678361763d67 Mon Sep 17 00:00:00 2001 From: XueBing Chen Date: Fri, 17 Jun 2022 23:50:19 +0800 Subject: [PATCH 2577/4122] KVM: PPC: Use __func__ to get function's name Prefer using '"%s...", __func__' to get current function's name in output messages. Signed-off-by: XueBing Chen Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13b2c857.beb.181725bad35.Coremail.chenxuebing@jari.cn --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index e9744b41a226..351ff0f89b00 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1202,7 +1202,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize) if (rc < 0) return rc; - resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n", + resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__, resize->hpt.virt); return 0; @@ -1443,7 +1443,7 @@ static void resize_hpt_prepare_work(struct work_struct *work) */ mutex_unlock(&kvm->arch.mmu_setup_lock); - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize_hpt_debug(resize, "%s(): order = %d\n", __func__, resize->order); err = resize_hpt_allocate(resize); @@ -1887,8 +1887,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, tmp); if (ret != H_SUCCESS) { - pr_err("kvm_htab_write ret %ld i=%ld v=%lx " - "r=%lx\n", ret, i, v, r); + pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r); goto out; } if (!mmu_ready && is_vrma_hpte(v)) { From 392a58f1eaab0c90b80d7ba4a03dbf6eaaeabe60 Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Thu, 23 Jun 2022 18:20:31 +0800 Subject: [PATCH 2578/4122] KVM: PPC: Book3S HV: XIVE: Fix spelling mistakes Change 'subsquent' to 'subsequent'. Change 'accross' to 'across'. Signed-off-by: Zhang Jiaming Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220623102031.15359-1-jiaming@nfschina.com --- arch/powerpc/kvm/book3s_xive.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 4ca23644f752..b4b680f2d853 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -539,7 +539,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (irq == XICS_IPI || irq == 0) { /* * This barrier orders the setting of xc->cppr vs. - * subsquent test of xc->mfrr done inside + * subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -563,7 +563,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) /* * This barrier orders both setting of in_eoi above vs, * subsequent test of guest_priority, and the setting - * of xc->cppr vs. subsquent test of xc->mfrr done inside + * of xc->cppr vs. subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -2392,7 +2392,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Now, we select a target if we have one. If we don't we * leave the interrupt untargetted. It means that an interrupt - * can become "untargetted" accross migration if it was masked + * can become "untargetted" across migration if it was masked * by set_xive() but there is little we can do about it. */ From 6fa1efeaa6671fb7339a6c62ceeec19e8e787963 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Sun, 3 Jul 2022 13:29:32 -0400 Subject: [PATCH 2579/4122] KVM: PPC: Book3s: Use arg->size directly in kvm_vm_ioctl_create_spapr_tce() The size variable is just a copy of args->size, neither size nor args are modifed, so just use args->size directly. Signed-off-by: Deming Wang [mpe: Reword change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220703172932.11329-1-wangdeming@inspur.com --- arch/powerpc/kvm/book3s_64_vio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 40864373ef87..95e738ef9062 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,14 +294,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; struct mm_struct *mm = kvm->mm; - unsigned long npages, size = args->size; + unsigned long npages; int ret; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; - npages = kvmppc_tce_pages(size); + npages = kvmppc_tce_pages(args->size); ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -314,7 +314,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->liobn = args->liobn; stt->page_shift = args->page_shift; stt->offset = args->offset; - stt->size = size; + stt->size = args->size; stt->kvm = kvm; mutex_init(&stt->alloc_lock); INIT_LIST_HEAD_RCU(&stt->iommu_tables); From a96b20758b23be7e9f693218908228d6100c3c26 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 9 Jul 2022 17:56:43 +0200 Subject: [PATCH 2580/4122] KVM: PPC: Book3S HV: Use the bitmap API to allocate bitmaps Use bitmap_zalloc()/bitmap_free() instead of hand-writing them. It is less verbose and it improves the semantic. Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/52e843a460bc374973149b8da0bd04f9761b80b7.1657382184.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/kvm/book3s_hv_uvmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e2f11f9c3f2a..1d67baa5557a 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -1190,8 +1190,7 @@ int kvmppc_uvmem_init(void) pfn_first = res->start >> PAGE_SHIFT; pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); - kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), - sizeof(unsigned long), GFP_KERNEL); + kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL); if (!kvmppc_uvmem_bitmap) { ret = -ENOMEM; goto out_unmap; @@ -1215,5 +1214,5 @@ void kvmppc_uvmem_free(void) memunmap_pages(&kvmppc_uvmem_pgmap); release_mem_region(kvmppc_uvmem_pgmap.range.start, range_len(&kvmppc_uvmem_pgmap.range)); - kfree(kvmppc_uvmem_bitmap); + bitmap_free(kvmppc_uvmem_bitmap); } From 8daa9c1dc9b4a3422801017ca46d935073dc14c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:23 +0100 Subject: [PATCH 2581/4122] macintosh/ams-i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-290-uwe@kleine-koenig.org --- drivers/macintosh/ams/ams-i2c.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/macintosh/ams/ams-i2c.c b/drivers/macintosh/ams/ams-i2c.c index 3ded340699fb..a4a1035eb412 100644 --- a/drivers/macintosh/ams/ams-i2c.c +++ b/drivers/macintosh/ams/ams-i2c.c @@ -56,8 +56,7 @@ enum ams_i2c_cmd { AMS_CMD_START, }; -static int ams_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id); +static int ams_i2c_probe(struct i2c_client *client); static void ams_i2c_remove(struct i2c_client *client); static const struct i2c_device_id ams_id[] = { @@ -70,7 +69,7 @@ static struct i2c_driver ams_i2c_driver = { .driver = { .name = "ams", }, - .probe = ams_i2c_probe, + .probe_new = ams_i2c_probe, .remove = ams_i2c_remove, .id_table = ams_id, }; @@ -155,8 +154,7 @@ static void ams_i2c_get_xyz(s8 *x, s8 *y, s8 *z) *z = ams_i2c_read(AMS_DATAZ); } -static int ams_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ams_i2c_probe(struct i2c_client *client) { int vmaj, vmin; int result; From 0424113fed923a2fcb699b5f3aa335d16e092f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:24 +0100 Subject: [PATCH 2582/4122] macintosh/therm_adt746x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-291-uwe@kleine-koenig.org --- drivers/macintosh/therm_adt746x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index b004ea2a1102..8f5db9093c9a 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -464,9 +464,9 @@ static void thermostat_remove_files(struct thermostat *th) } -static int probe_thermostat(struct i2c_client *client, - const struct i2c_device_id *id) +static int probe_thermostat(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device_node *np = client->dev.of_node; struct thermostat* th; const __be32 *prop; @@ -598,7 +598,7 @@ static struct i2c_driver thermostat_driver = { .driver = { .name = "therm_adt746x", }, - .probe = probe_thermostat, + .probe_new = probe_thermostat, .remove = remove_thermostat, .id_table = therm_adt746x_id, }; From dc9be0735c3e245fe60775307cf7842b1f9b45a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:25 +0100 Subject: [PATCH 2583/4122] macintosh/therm_windtunnel: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-292-uwe@kleine-koenig.org --- drivers/macintosh/therm_windtunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index b8228ca40454..22b15efcc025 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -411,8 +411,9 @@ static const struct i2c_device_id therm_windtunnel_id[] = { MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); static int -do_probe(struct i2c_client *cl, const struct i2c_device_id *id) +do_probe(struct i2c_client *cl) { + const struct i2c_device_id *id = i2c_client_get_device_id(cl); struct i2c_adapter *adapter = cl->adapter; int ret = 0; @@ -441,7 +442,7 @@ static struct i2c_driver g4fan_driver = { .driver = { .name = "therm_windtunnel", }, - .probe = do_probe, + .probe_new = do_probe, .remove = do_remove, .id_table = therm_windtunnel_id, }; From 9d533bdf4a582f037327f1a38ed8cf689d67cab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:26 +0100 Subject: [PATCH 2584/4122] macintosh/windfarm_ad7417_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-293-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_ad7417_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c index c5c54a4ce91f..33b4723d235e 100644 --- a/drivers/macintosh/windfarm_ad7417_sensor.c +++ b/drivers/macintosh/windfarm_ad7417_sensor.c @@ -229,8 +229,7 @@ static void wf_ad7417_init_chip(struct wf_ad7417_priv *pv) pv->config = config; } -static int wf_ad7417_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_ad7417_probe(struct i2c_client *client) { struct wf_ad7417_priv *pv; const struct mpu_data *mpu; @@ -321,7 +320,7 @@ static struct i2c_driver wf_ad7417_driver = { .name = "wf_ad7417", .of_match_table = wf_ad7417_of_id, }, - .probe = wf_ad7417_probe, + .probe_new = wf_ad7417_probe, .remove = wf_ad7417_remove, .id_table = wf_ad7417_id, }; From 472e4c61d2bb4977ade8e2491953954bf9723563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:27 +0100 Subject: [PATCH 2585/4122] macintosh/windfarm_fcu_controls: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-294-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_fcu_controls.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c index c5b1ca5bcd73..e027d889d7e8 100644 --- a/drivers/macintosh/windfarm_fcu_controls.c +++ b/drivers/macintosh/windfarm_fcu_controls.c @@ -514,8 +514,7 @@ static int wf_fcu_init_chip(struct wf_fcu_priv *pv) return 0; } -static int wf_fcu_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_fcu_probe(struct i2c_client *client) { struct wf_fcu_priv *pv; @@ -590,7 +589,7 @@ static struct i2c_driver wf_fcu_driver = { .name = "wf_fcu", .of_match_table = wf_fcu_of_id, }, - .probe = wf_fcu_probe, + .probe_new = wf_fcu_probe, .remove = wf_fcu_remove, .id_table = wf_fcu_id, }; From 51a9e1755cdd8b127191030d15b74b97f7d3ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:28 +0100 Subject: [PATCH 2586/4122] macintosh/windfarm_lm75_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-295-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_lm75_sensor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 204661c8e918..24f0a444d312 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -87,9 +87,9 @@ static const struct wf_sensor_ops wf_lm75_ops = { .owner = THIS_MODULE, }; -static int wf_lm75_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ +static int wf_lm75_probe(struct i2c_client *client) +{ + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct wf_lm75_sensor *lm; int rc, ds1775; const char *name, *loc; @@ -177,7 +177,7 @@ static struct i2c_driver wf_lm75_driver = { .name = "wf_lm75", .of_match_table = wf_lm75_of_id, }, - .probe = wf_lm75_probe, + .probe_new = wf_lm75_probe, .remove = wf_lm75_remove, .id_table = wf_lm75_id, }; From 0e2211b3373ea718d2161bcc360cd4d9a3bcebc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:29 +0100 Subject: [PATCH 2587/4122] macintosh/windfarm_lm87_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-296-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_lm87_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c index 40d25463346e..f37a32c2070c 100644 --- a/drivers/macintosh/windfarm_lm87_sensor.c +++ b/drivers/macintosh/windfarm_lm87_sensor.c @@ -95,8 +95,7 @@ static const struct wf_sensor_ops wf_lm87_ops = { .owner = THIS_MODULE, }; -static int wf_lm87_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_lm87_probe(struct i2c_client *client) { struct wf_lm87_sensor *lm; const char *name = NULL, *loc; @@ -173,7 +172,7 @@ static struct i2c_driver wf_lm87_driver = { .name = "wf_lm87", .of_match_table = wf_lm87_of_id, }, - .probe = wf_lm87_probe, + .probe_new = wf_lm87_probe, .remove = wf_lm87_remove, .id_table = wf_lm87_id, }; From 2d7a9d780444c8f31ee6af522a92a99492d9eeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:30 +0100 Subject: [PATCH 2588/4122] macintosh/windfarm_max6690_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-297-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_max6690_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c index c0d404ebc792..6c5ab657b6b3 100644 --- a/drivers/macintosh/windfarm_max6690_sensor.c +++ b/drivers/macintosh/windfarm_max6690_sensor.c @@ -60,8 +60,7 @@ static const struct wf_sensor_ops wf_max6690_ops = { .owner = THIS_MODULE, }; -static int wf_max6690_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_max6690_probe(struct i2c_client *client) { const char *name, *loc; struct wf_6690_sensor *max; @@ -129,7 +128,7 @@ static struct i2c_driver wf_max6690_driver = { .name = "wf_max6690", .of_match_table = wf_max6690_of_id, }, - .probe = wf_max6690_probe, + .probe_new = wf_max6690_probe, .remove = wf_max6690_remove, .id_table = wf_max6690_id, }; From d05921a09a5a72805a1d669dce0fcbd66df86237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:31 +0100 Subject: [PATCH 2589/4122] macintosh/windfarm_smu_sat: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-298-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_smu_sat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index be5d4593db93..ebc4256a9e4a 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -189,8 +189,7 @@ static const struct wf_sensor_ops wf_sat_ops = { .owner = THIS_MODULE, }; -static int wf_sat_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_sat_probe(struct i2c_client *client) { struct device_node *dev = client->dev.of_node; struct wf_sat *sat; @@ -349,7 +348,7 @@ static struct i2c_driver wf_sat_driver = { .name = "wf_smu_sat", .of_match_table = wf_sat_of_id, }, - .probe = wf_sat_probe, + .probe_new = wf_sat_probe, .remove = wf_sat_remove, .id_table = wf_sat_id, }; From e0acfdd13474815696595206e11169736b4bca9d Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:32:39 +0800 Subject: [PATCH 2590/4122] macintosh/windfarm_pm81: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484359-12402-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm81.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c index e0f4743f21cc..257fb2c695c5 100644 --- a/drivers/macintosh/windfarm_pm81.c +++ b/drivers/macintosh/windfarm_pm81.c @@ -401,7 +401,7 @@ static void wf_smu_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "windfarm: CPU PID fan config not found " "max fan speed\n"); goto fail; @@ -705,7 +705,7 @@ static int wf_init_pm(void) const struct smu_sdbp_header *hdr; hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); - if (hdr != 0) { + if (hdr) { struct smu_sdbp_sensortree *st = (struct smu_sdbp_sensortree *)&hdr[1]; wf_smu_mach_model = st->model_id; From 2f59562c140d3119328f869126e8e593a99a392f Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:35:54 +0800 Subject: [PATCH 2591/4122] macintosh/adb: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484554-13258-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/adb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 1bbb9ca08d40..23bd0c77ac1a 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -478,7 +478,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids, if ((adb_handler[i].original_address == default_id) && (!handler_id || (handler_id == adb_handler[i].handler_id) || try_handler_change(i, handler_id))) { - if (adb_handler[i].handler != 0) { + if (adb_handler[i].handler) { pr_err("Two handlers for ADB device %d\n", default_id); continue; @@ -673,7 +673,7 @@ static int adb_open(struct inode *inode, struct file *file) goto out; } state = kmalloc(sizeof(struct adbdev_state), GFP_KERNEL); - if (state == 0) { + if (!state) { ret = -ENOMEM; goto out; } From 88316944c3b3aa3ce3249c51689ef1621049df9d Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:42:33 +0800 Subject: [PATCH 2592/4122] macintosh/windfarm_pm91: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484953-15249-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm91.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c index c8535855360d..120a9cfba0c5 100644 --- a/drivers/macintosh/windfarm_pm91.c +++ b/drivers/macintosh/windfarm_pm91.c @@ -150,7 +150,7 @@ static void wf_smu_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "windfarm: CPU PID fan config not found " "max fan speed\n"); goto fail; From a823307bf0a3b79b27eea916bf6499ba4377cdf9 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:57:02 +0800 Subject: [PATCH 2593/4122] macintosh/windfarm_pm121: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647485822-16717-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm121.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index 36312f163aac..82500417ebee 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c @@ -651,7 +651,7 @@ static void pm121_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "pm121: CPU PID fan config not found.\n"); goto fail; } @@ -970,7 +970,7 @@ static int pm121_init_pm(void) const struct smu_sdbp_header *hdr; hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); - if (hdr != 0) { + if (hdr) { struct smu_sdbp_sensortree *st = (struct smu_sdbp_sensortree *)&hdr[1]; pm121_mach_model = st->model_id; From fc21ed8f26d980428f9b4e08e0fb72c7f7ffc9b8 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 17:24:49 +0800 Subject: [PATCH 2594/4122] macintosh/macio-adb: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647509089-4280-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/macio-adb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 9b63bd2551c6..3721402582b4 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -100,7 +100,7 @@ int macio_init(void) unsigned int irq; adbs = of_find_compatible_node(NULL, "adb", "chrp,adb0"); - if (adbs == 0) + if (!adbs) return -ENXIO; if (of_address_to_resource(adbs, 0, &r)) { @@ -183,7 +183,7 @@ static int macio_send_request(struct adb_request *req, int sync) req->reply_len = 0; spin_lock_irqsave(&macio_lock, flags); - if (current_req != 0) { + if (current_req) { last_req->next = req; last_req = req; } else { @@ -213,7 +213,8 @@ static irqreturn_t macio_adb_interrupt(int irq, void *arg) spin_lock(&macio_lock); if (in_8(&adb->intr.r) & TAG) { handled = 1; - if ((req = current_req) != 0) { + req = current_req; + if (req) { /* put the current request in */ for (i = 0; i < req->nbytes; ++i) out_8(&adb->data[i].r, req->data[i]); From 27f9690a81d7acf185b78be8d03d4b3a243116b1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 21 Mar 2022 20:28:00 +1100 Subject: [PATCH 2595/4122] macintosh/via-pmu: Avoid compiler warnings when CONFIG_PROC_FS is disabled drivers/macintosh/via-pmu.c:897:12: warning: 'pmu_battery_proc_show' defined but not used [-Wunused-function] static int pmu_battery_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~~~~ drivers/macintosh/via-pmu.c:871:12: warning: 'pmu_irqstats_proc_show' defined but not used [-Wunused-function] static int pmu_irqstats_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~~~~~ drivers/macintosh/via-pmu.c:860:12: warning: 'pmu_info_proc_show' defined but not used [-Wunused-function] static int pmu_info_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~ Add some #ifdefs to avoid unused code warnings when CONFIG_PROC_FS is disabled. Reported-by: Randy Dunlap Suggested-by: Christophe Leroy Signed-off-by: Finn Thain Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c11c0770fc4ec7e80a4b2e0ffce1055b792cfdb.1647854880.git.fthain@linux-m68k.org --- drivers/macintosh/via-pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 49657962d892..e0cb8daf4f08 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -203,9 +203,11 @@ static int init_pmu(void); static void pmu_start(void); static irqreturn_t via_pmu_interrupt(int irq, void *arg); static irqreturn_t gpio1_interrupt(int irq, void *arg); +#ifdef CONFIG_PROC_FS static int pmu_info_proc_show(struct seq_file *m, void *v); static int pmu_irqstats_proc_show(struct seq_file *m, void *v); static int pmu_battery_proc_show(struct seq_file *m, void *v); +#endif static void pmu_pass_intr(unsigned char *data, int len); static const struct proc_ops pmu_options_proc_ops; @@ -852,6 +854,7 @@ query_battery_state(void) 2, PMU_SMART_BATTERY_STATE, pmu_cur_battery+1); } +#ifdef CONFIG_PROC_FS static int pmu_info_proc_show(struct seq_file *m, void *v) { seq_printf(m, "PMU driver version : %d\n", PMU_DRIVER_VERSION); @@ -972,6 +975,7 @@ static const struct proc_ops pmu_options_proc_ops = { .proc_release = single_release, .proc_write = pmu_options_proc_write, }; +#endif #ifdef CONFIG_ADB /* Send an ADB command */ From a0542d2c45a64162e63ad2d80684e57de0566271 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 16 Jun 2022 19:04:24 +0200 Subject: [PATCH 2596/4122] macintosh/via-pmu-backlight: Use backlight helper backlight_properties.fb_blank is deprecated. The states it represents are handled by other properties; but instead of accessing those properties directly, drivers should use the helpers provided by backlight.h. Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220616170425.1346081-1-steve@sk2.org --- drivers/macintosh/via-pmu-backlight.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c index 2194016122d2..c2d87e7fa85b 100644 --- a/drivers/macintosh/via-pmu-backlight.c +++ b/drivers/macintosh/via-pmu-backlight.c @@ -71,12 +71,7 @@ static int pmu_backlight_get_level_brightness(int level) static int __pmu_backlight_update_status(struct backlight_device *bd) { struct adb_request req; - int level = bd->props.brightness; - - - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; + int level = backlight_get_brightness(bd); if (level > 0) { int pmulevel = pmu_backlight_get_level_brightness(level); From 2dfcace75e1e1dfbd89af63fce1bfe8aebe38427 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 7 Jul 2022 09:53:52 +0800 Subject: [PATCH 2597/4122] macintosh/ams/ams: Add header file macro definition Add header file macro definition. Signed-off-by: Li zeming [mpe: Add endif comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707015352.3391-1-zeming@nfschina.com --- drivers/macintosh/ams/ams.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h index 935bdd9cd9a6..2c159c8844c1 100644 --- a/drivers/macintosh/ams/ams.h +++ b/drivers/macintosh/ams/ams.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _AMS_H +#define _AMS_H + #include #include #include @@ -69,3 +72,5 @@ extern int ams_i2c_init(struct device_node *np); extern int ams_input_init(void); extern void ams_input_exit(void); + +#endif /* _AMS_H */ From e3e528d29d13c01289f382a0d3ddb5312ac3dae3 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 7 Jul 2022 09:59:49 +0800 Subject: [PATCH 2598/4122] macintosh/windfarm_pid: Add header file macro definition I think the header file could avoid redefinition errors at compile time by adding macro definitions. Signed-off-by: Li zeming [mpe: Add endif comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707015949.3733-1-zeming@nfschina.com --- drivers/macintosh/windfarm_pid.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/macintosh/windfarm_pid.h b/drivers/macintosh/windfarm_pid.h index 83f747dbeafc..335613a200fb 100644 --- a/drivers/macintosh/windfarm_pid.h +++ b/drivers/macintosh/windfarm_pid.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _WINDFARM_PID_H +#define _WINDFARM_PID_H + /* * Windfarm PowerMac thermal control. Generic PID helpers * @@ -82,3 +85,5 @@ struct wf_cpu_pid_state { extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st, struct wf_cpu_pid_param *param); extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp); + +#endif /* _WINDFARM_PID_H */ From 3aa16303dc98b7b8baa2adbc3210fd513ec0e810 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 16 Sep 2022 22:16:38 +0800 Subject: [PATCH 2599/4122] macintosh: Switch to use for_each_child_of_node() macro Use for_each_child_of_node() macro instead of open coding it. No functional change. Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916141638.685575-1-yangyingliang@huawei.com --- drivers/macintosh/windfarm_smu_controls.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index e9957ad49a2a..bdd92b27da2a 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -266,12 +266,11 @@ static int __init smu_controls_init(void) return -ENODEV; /* Look for RPM fans */ - for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + for_each_child_of_node(smu, fans) if (of_node_name_eq(fans, "rpm-fans") || of_device_is_compatible(fans, "smu-rpm-fans")) break; - for (fan = NULL; - fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + for_each_child_of_node(fans, fan) { struct smu_fan_control *fct; fct = smu_fan_create(fan, 0); @@ -286,11 +285,10 @@ static int __init smu_controls_init(void) /* Look for PWM fans */ - for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + for_each_child_of_node(smu, fans) if (of_node_name_eq(fans, "pwm-fans")) break; - for (fan = NULL; - fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + for_each_child_of_node(fans, fan) { struct smu_fan_control *fct; fct = smu_fan_create(fan, 1); From 5ca86eae55a2f006e6c1edd2029b2cacb6979515 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 4 Nov 2022 11:25:51 +0800 Subject: [PATCH 2600/4122] macintosh: fix possible memory leak in macio_add_one_device() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically. It needs to be freed when of_device_register() fails. Call put_device() to give up the reference that's taken in device_initialize(), so that it can be freed in kobject_cleanup() when the refcount hits 0. macio device is freed in macio_release_dev(), so the kfree() can be removed. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221104032551.1075335-1-yangyingliang@huawei.com --- drivers/macintosh/macio_asic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 1ec1e5984563..3bc1f374e657 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -424,7 +424,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, if (of_device_register(&dev->ofdev) != 0) { printk(KERN_DEBUG"macio: device registration error for %s!\n", dev_name(&dev->ofdev.dev)); - kfree(dev); + put_device(&dev->ofdev.dev); return NULL; } From dbaa3105736d4d73063ea0a3b01cd7fafce924e6 Mon Sep 17 00:00:00 2001 From: Xie Shaowen Date: Tue, 2 Aug 2022 15:41:48 +0800 Subject: [PATCH 2601/4122] macintosh/macio-adb: check the return value of ioremap() The function ioremap() in macio_init() can fail, so its return value should be checked. Fixes: 36874579dbf4c ("[PATCH] powerpc: macio-adb build fix") Reported-by: Hacash Robot Signed-off-by: Xie Shaowen Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220802074148.3213659-1-studentxswpy@163.com --- drivers/macintosh/macio-adb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 3721402582b4..55a9f8c3a150 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -108,6 +108,10 @@ int macio_init(void) return -ENXIO; } adb = ioremap(r.start, sizeof(struct adb_regs)); + if (!adb) { + of_node_put(adbs); + return -ENOMEM; + } out_8(&adb->ctrl.r, 0); out_8(&adb->intr.r, 0); From 5836947613ef33d311b4eff6a32d019580a214f5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 29 Jan 2022 08:16:04 +0100 Subject: [PATCH 2602/4122] powerpc/52xx: Fix a resource leak in an error handling path The error handling path of mpc52xx_lpbfifo_probe() has a request_irq() that is not balanced by a corresponding free_irq(). Add the missing call, as already done in the remove function. Fixes: 3c9059d79f5e ("powerpc/5200: add LocalPlus bus FIFO device driver") Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dec1496d46ccd5311d0f6e9f9ca4238be11bf6a6.1643440531.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 48038aaedbd3..2875c206ac0f 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -531,6 +531,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op) err_bcom_rx_irq: bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task); err_bcom_rx: + free_irq(lpbfifo.irq, &lpbfifo); err_irq: iounmap(lpbfifo.regs); lpbfifo.regs = NULL; From e75d07bd8303588c33e6f1f180a9081fb58c872e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Mar 2022 10:29:50 +0100 Subject: [PATCH 2603/4122] powerpc: Remove find_current_mm_pte() Last usage of find_current_mm_pte() was removed by commit 15759cb054ef ("powerpc/perf/callchain: Use __get_user_pages_fast in read_user_stack_slow") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec79f462a3bfa8365b7df505e574d5d85246bc68.1646818177.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/pte-walk.h | 25 ------------------------- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 714a35f0d425..73c22c579a79 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -60,29 +60,4 @@ static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) return pa; } -/* - * This is what we should always use. Any other lockless page table lookup needs - * careful audit against THP split. - */ -static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *hshift) -{ - pte_t *pte; - - VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); - VM_WARN(pgdir != current->mm->pgd, - "%s lock less page table lookup called on wrong mm\n", __func__); - pte = __find_linux_pte(pgdir, ea, is_thp, hshift); - -#if defined(CONFIG_DEBUG_VM) && \ - !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)) - /* - * We should not find huge page if these configs are not enabled. - */ - if (hshift) - WARN_ON(*hshift); -#endif - return pte; -} - #endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a589298..85c84e89e3ea 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -100,14 +100,14 @@ static void do_serialize(void *arg) } /* - * Serialize against find_current_mm_pte which does lock-less + * Serialize against __find_linux_pte() which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. + * __find_linux_pte() to finish. */ void serialize_against_pte_lookup(struct mm_struct *mm) { From 4562bffb83b88e61ea9c9912e50efbd5a941f0b3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2022 22:19:18 -0800 Subject: [PATCH 2604/4122] powerpc/mpc52xx_lpbfifo: fix all kernel-doc warnings Fix multiple kernel-doc warnings in mpc52xx_lpbfifo.c: arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c:377: warning: expecting prototype for mpc52xx_lpbfifo_bcom_poll(). Prototype was for mpc52xx_lpbfifo_poll() instead mpc52xx_lpbfifo.c:221: warning: No description found for return value of 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:327: warning: No description found for return value of 'mpc52xx_lpbfifo_bcom_irq' mpc52xx_lpbfifo.c:398: warning: No description found for return value of 'mpc52xx_lpbfifo_submit' mpc52xx_lpbfifo.c:64: warning: Function parameter or member 'req' not described in 'mpc52xx_lpbfifo_kick' mpc52xx_lpbfifo.c:220: warning: contents before sections mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:328: warning: contents before sections mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_bcom_irq' mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_bcom_irq' Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221124061918.1967-1-rdunlap@infradead.org --- arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 2875c206ac0f..6d1dd6e87478 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -59,6 +59,8 @@ static struct mpc52xx_lpbfifo lpbfifo; /** * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred + * + * @req: Pointer to request structure */ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) { @@ -178,6 +180,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) /** * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * On transmit, the dma completion irq triggers before the fifo completion * triggers. Handle the dma completion here instead of the LPB FIFO Bestcomm @@ -216,6 +220,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) * or nested spinlock condition. The out path is non-trivial, so * extra fiddling is done to make sure all paths lead to the same * outbound code. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) { @@ -320,8 +326,12 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) /** * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * Only used when receiving data. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) { @@ -372,7 +382,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) } /** - * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion + * mpc52xx_lpbfifo_poll - Poll for DMA completion */ void mpc52xx_lpbfifo_poll(void) { @@ -393,6 +403,8 @@ EXPORT_SYMBOL(mpc52xx_lpbfifo_poll); /** * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request. * @req: Pointer to request structure + * + * Return: %0 on success, -errno code on error */ int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req) { From 932c6dea4f32f7d71488137c475b60a77e56bb2a Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 13 Apr 2022 06:55:07 -0400 Subject: [PATCH 2605/4122] powerpc/xive: remove unused parameter The parameter xc to xive_cleanup_single_escalation() is unused, so we can remove it. Signed-off-by: Deming Wang [mpe: Reword change log, unwrap lines < 90 columns] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220413105507.1729-1-wangdeming@inspur.com --- arch/powerpc/kvm/book3s_xive.c | 6 ++---- arch/powerpc/kvm/book3s_xive.h | 3 +-- arch/powerpc/kvm/book3s_xive_native.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 4ca23644f752..d64b2dcc0e7f 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1785,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) * stale_p (because it has no easy way to address it). Hence we have * to adjust stale_p before shutting down the interrupt. */ -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq) +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { struct irq_data *d = irq_get_irq_data(irq); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -1827,8 +1826,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 1e48f72e8aa5..62bf39f53783 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -299,8 +299,7 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq); +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5271c33fe79e..4f566bea5e10 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -93,8 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Free the escalation irq */ if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); From 37195edebf479b94f1e20c2a83a29e4beebe7ff5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:32 +0200 Subject: [PATCH 2606/4122] cxl: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220521111145.81697-82-Julia.Lawall@inria.fr --- include/misc/cxl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 0410412de16b..d8044299d654 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -30,7 +30,7 @@ unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); /* * Context lifetime overview: * - * An AFU context may be inited and then started and stoppped multiple times + * An AFU context may be inited and then started and stopped multiple times * before it's released. ie. * - cxl_dev_context_init() * - cxl_start_context() From 1d09697ff22908ae487fc8c4fbde1811732be523 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 10:00:38 +0400 Subject: [PATCH 2607/4122] cxl: Fix refcount leak in cxl_calc_capp_routing of_get_next_parent() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. This function only calls of_node_put() in normal path, missing it in the error path. Add missing of_node_put() to avoid refcount leak. Fixes: f24be42aab37 ("cxl: Add psl9 specific code") Signed-off-by: Miaoqian Lin Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220605060038.62217-1-linmq006@gmail.com --- drivers/misc/cxl/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 3de0aea62ade..62385a529d86 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -387,6 +387,7 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, rc = get_phb_index(np, phb_index); if (rc) { pr_err("cxl: invalid phb index\n"); + of_node_put(np); return rc; } From f949ccee1dde970bc77dc871b4f0b5e651577344 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:39 +0800 Subject: [PATCH 2608/4122] cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter() If device_register() fails in cxl_register_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: 14baf4d9c739 ("cxl: Add guest-specific code") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145440.2426970-1-yangyingliang@huawei.com --- drivers/misc/cxl/guest.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 375f692ae9d6..fb95a2d5cef4 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -965,10 +965,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; /* * pHyp doesn't expose the programming models supported by the @@ -984,7 +984,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n afu->modes_supported = CXL_MODE_DIRECTED; if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; + goto err_remove_sysfs; adapter->afu[afu->slice] = afu; @@ -1004,10 +1004,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n return 0; -err_put2: +err_remove_sysfs: cxl_sysfs_afu_remove(afu); -err_put1: - device_unregister(&afu->dev); +err_del_dev: + device_del(&afu->dev); +err_put_dev: + put_device(&afu->dev); free = false; guest_release_serr_irq(afu); err2: @@ -1141,18 +1143,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* release the context lock as the adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: - device_unregister(&adapter->dev); +err_del_dev: + device_del(&adapter->dev); +err_put_dev: + put_device(&adapter->dev); free = false; cxl_guest_remove_chardev(adapter); err1: From 8bf03f557d6c6e108cf47bea32f4a68e276e1157 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:40 +0800 Subject: [PATCH 2609/4122] cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter() If device_register() fails in cxl_pci_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145440.2426970-2-yangyingliang@huawei.com --- drivers/misc/cxl/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 62385a529d86..0ff944860dda 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1165,10 +1165,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; adapter->afu[afu->slice] = afu; @@ -1177,10 +1177,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; -err_put1: +err_del_dev: + device_del(&afu->dev); +err_put_dev: pci_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - device_unregister(&afu->dev); + put_device(&afu->dev); return rc; err_free_native: @@ -1668,23 +1670,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* Release the context lock as adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: +err_del_dev: + device_del(&adapter->dev); +err_put_dev: /* This should mirror cxl_remove_adapter, except without the * sysfs parts */ cxl_debugfs_adapter_remove(adapter); cxl_deconfigure_adapter(adapter); - device_unregister(&adapter->dev); + put_device(&adapter->dev); return ERR_PTR(rc); err_release: From 295faa17722a11cac8dbf51e4c9f9405a5e07ef1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:59:29 +0800 Subject: [PATCH 2610/4122] ocxl: fix possible name leak in ocxl_file_register_afu() If device_register() returns error in ocxl_file_register_afu(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and info is freed in info_release(). Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145929.2429271-1-yangyingliang@huawei.com --- drivers/misc/ocxl/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index d46dba2df5a1..452d5777a0e4 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -541,8 +541,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) goto err_put; rc = device_register(&info->dev); - if (rc) - goto err_put; + if (rc) { + free_minor(info); + put_device(&info->dev); + return rc; + } rc = ocxl_sysfs_register_afu(info); if (rc) From 5f58cad1e4c65bebee34292696c6d2105eeb2027 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 23:43:39 +0800 Subject: [PATCH 2611/4122] ocxl: fix pci device refcount leak when calling get_function_0() get_function_0() calls pci_get_domain_bus_and_slot(), as comment says, it returns a pci device with refcount increment, so after using it, pci_dev_put() needs be called. Get the device reference when get_function_0() is not called, so pci_dev_put() can be called in the error path and callers unconditionally. And add comment above get_dvsec_vendor0() to tell callers to call pci_dev_put(). Fixes: 87db7579ebd5 ("ocxl: control via sysfs whether the FPGA is reloaded on a link reset") Suggested-by: Andrew Donnellan Signed-off-by: Yang Yingliang Acked-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221121154339.4088935-1-yangyingliang@huawei.com --- drivers/misc/ocxl/config.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index e401a51596b9..92ab49705f64 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -193,6 +193,18 @@ static int read_dvsec_vendor(struct pci_dev *dev) return 0; } +/** + * get_dvsec_vendor0() - Find a related PCI device (function 0) + * @dev: PCI device to match + * @dev0: The PCI device (function 0) found + * @out_pos: The position of PCI device (function 0) + * + * Returns 0 on success, negative on failure. + * + * NOTE: If it's successful, the reference of dev0 is increased, + * so after using it, the callers must call pci_dev_put() to give + * up the reference. + */ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, int *out_pos) { @@ -202,10 +214,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, dev = get_function_0(dev); if (!dev) return -1; + } else { + dev = pci_dev_get(dev); } pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); - if (!pos) + if (!pos) { + pci_dev_put(dev); return -1; + } *dev0 = dev; *out_pos = pos; return 0; @@ -222,6 +238,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val) pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, &reset_reload); + pci_dev_put(dev0); *val = !!(reset_reload & BIT(0)); return 0; } @@ -243,6 +260,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val) reset_reload &= ~BIT(0); pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, reset_reload); + pci_dev_put(dev0); return 0; } From 14b5d59a261b1947db287b3b52f4bb1dc496dede Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Fri, 1 Jul 2022 05:45:53 -0400 Subject: [PATCH 2612/4122] powerpc/pseries: Fix formatting to make code look more beautiful Operators should be separated by spaces in tce_buildmulti_pSeriesLP() Signed-off-by: Deming Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701094553.1722-1-wangdeming@inspur.com --- arch/powerpc/platforms/pseries/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 561adac69022..c74b71d4733d 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -248,7 +248,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * Set up the page with TCE data, looping through and setting * the values. */ - limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); + limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); From 7af82ff90a2b0690c2c45818fcce4c4ac3b187f3 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 9 Aug 2022 16:24:25 +0530 Subject: [PATCH 2613/4122] powerpc/ftrace: Ignore weak functions Extend commit b39181f7c6907d ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function") to ppc32 and ppc64 -mprofile-kernel by defining FTRACE_MCOUNT_MAX_OFFSET. For ppc64 -mprofile-kernel ABI, we can have two instructions at function entry for TOC setup followed by 'mflr r0' and 'bl _mcount'. So, the mcount location is at most the 4th instruction in a function. For ppc32, mcount location is always the 3rd instruction in a function, preceded by 'mflr r0' and 'stw r0,4(r1)'. With this patch, and with ppc64le_guest_defconfig and some ftrace/bpf config items enabled: # grep __ftrace_invalid_address available_filter_functions | wc -l 79 Signed-off-by: Naveen N. Rao Acked-by: Steven Rostedt (Google) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220809105425.424045-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 3cee7115441b..ade406dc6504 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,6 +10,13 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +/* Ignore unused weak functions which will have larger offsets */ +#ifdef CONFIG_MPROFILE_KERNEL +#define FTRACE_MCOUNT_MAX_OFFSET 12 +#elif defined(CONFIG_PPC32) +#define FTRACE_MCOUNT_MAX_OFFSET 8 +#endif + #ifndef __ASSEMBLY__ extern void _mcount(void); From addebe1cfa71eb29caa9d5c6bc719171e4e76414 Mon Sep 17 00:00:00 2001 From: Nicholas Miehlbradt Date: Wed, 10 Aug 2022 04:03:21 +0000 Subject: [PATCH 2614/4122] docs: powerpc: add POWER9 and POWER10 to CPU families Add POWER9 and POWER10 to CPU families and list Radix MMU. Signed-off-by: Nicholas Miehlbradt Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220810040321.375396-1-nicholas@linux.ibm.com --- Documentation/powerpc/cpu_families.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst index 9b84e045e713..eb7e60649b43 100644 --- a/Documentation/powerpc/cpu_families.rst +++ b/Documentation/powerpc/cpu_families.rst @@ -10,6 +10,7 @@ Book3S (aka sPAPR) ------------------ - Hash MMU (except 603 and e300) +- Radix MMU (POWER9 and later) - Software loaded TLB (603 and e300) - Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600) - Mix of 32 & 64 bit:: @@ -100,6 +101,18 @@ Book3S (aka sPAPR) v +--------------+ | POWER8 | + +--------------+ + | + | + v + +--------------+ + | POWER9 | + +--------------+ + | + | + v + +--------------+ + | POWER10 | +--------------+ From ff8fae94e26f5cd2779ceda0ee6d714a10501abd Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sun, 4 Sep 2022 11:51:02 -0400 Subject: [PATCH 2615/4122] drivers/ps3: Fix double word in comments Drop the repeated word "when" in comments. Signed-off-by: Shaomin Deng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220904155102.26957-1-dengshaomin@cdjrlc.com --- drivers/ps3/ps3-lpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 65512b6cc6fd..200ad8751860 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -1066,7 +1066,7 @@ EXPORT_SYMBOL_GPL(ps3_disable_pm_interrupts); * instance, specified by one of enum ps3_lpm_tb_type. * @tb_cache: Optional user supplied buffer to use as the trace buffer cache. * If NULL, the driver will allocate and manage an internal buffer. - * Unused when when @tb_type is PS3_LPM_TB_TYPE_NONE. + * Unused when @tb_type is PS3_LPM_TB_TYPE_NONE. * @tb_cache_size: The size in bytes of the user supplied @tb_cache buffer. * Unused when @tb_cache is NULL or @tb_type is PS3_LPM_TB_TYPE_NONE. */ From b86cf14f240e002e001fd4f2bf49114c7836fd5c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 9 Sep 2022 15:23:12 +1000 Subject: [PATCH 2616/4122] powerpc: add compile-time support for lbarx, lharx ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lharx. Add a compile option that allows code to use it, and add support in cmpxchg and xchg 8 and 16 bit values without shifting and masking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220909052312.63916-1-npiggin@gmail.com --- arch/powerpc/Kconfig | 3 + arch/powerpc/include/asm/cmpxchg.h | 231 ++++++++++++++++++++++++- arch/powerpc/lib/sstep.c | 21 +-- arch/powerpc/platforms/Kconfig.cputype | 5 + 4 files changed, 249 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2f..4fd4924f6d50 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -293,6 +293,9 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_E500 +config PPC_HAS_LBARX_LHARX + bool + config EARLY_PRINTK bool default y diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 05f246c0e36e..d0ea0571e79a 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ * the previous value stored there. */ +#ifndef CONFIG_PPC_HAS_LBARX_LHARX XCHG_GEN(u8, _local, "memory"); XCHG_GEN(u8, _relaxed, "cc"); XCHG_GEN(u16, _local, "memory"); XCHG_GEN(u16, _relaxed, "cc"); +#else +static __always_inline unsigned long +__xchg_u8_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_local\n" +" stbcx. %3,0,%2 \n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u8_relaxed(u8 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_relaxed\n" +" stbcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_local\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_relaxed(u16 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_relaxed\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} +#endif static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ (unsigned long)_x_, sizeof(*(ptr))); \ }) + /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ - +#ifndef CONFIG_PPC_HAS_LBARX_LHARX CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u8, _local, , , "memory"); CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); @@ -211,6 +278,168 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u16, _local, , , "memory"); CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); CMPXCHG_GEN(u16, _relaxed, , , "cc"); +#else +static __always_inline unsigned long +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lbarx %0,0,%2 # __cmpxchg_u8\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lharx %0,0,%2 # __cmpxchg_u16\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} +#endif static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 398b5694aeb7..38158b77a801 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 4); break; -#ifdef __powerpc64__ - case 84: /* ldarx */ - op->type = MKOP(LARX, 0, 8); - break; - - case 214: /* stdcx. */ - op->type = MKOP(STCX, 0, 8); - break; - +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 52: /* lbarx */ op->type = MKOP(LARX, 0, 1); break; @@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 726: /* sthcx. */ op->type = MKOP(STCX, 0, 2); break; +#endif +#ifdef __powerpc64__ + case 84: /* ldarx */ + op->type = MKOP(LARX, 0, 8); + break; + + case 214: /* stdcx. */ + op->type = MKOP(STCX, 0, 8); + break; case 276: /* lqarx */ if (!((rd & 1) || rd == ra || rd == rb)) @@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; val = 0; switch (size) { -#ifdef __powerpc64__ +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 1: __get_user_asmx(val, ea, err, "lbarx"); break; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 0c4eed9aea80..7bac213b4125 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -135,6 +135,7 @@ config GENERIC_CPU depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWERPC_CPU bool "Generic 32 bits powerpc" @@ -160,17 +161,20 @@ config POWER7_CPU depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER9_CPU bool "POWER9" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_HAS_LBARX_LHARX config POWER10_CPU bool "POWER10" @@ -184,6 +188,7 @@ config E5500_CPU config E6500_CPU bool "Freescale e6500" depends on PPC64 && PPC_E500 + select PPC_HAS_LBARX_LHARX config 405_CPU bool "40x family" From d87a233717da400792fa601b29fa74a7d28e03c2 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sun, 11 Sep 2022 16:43:44 +0800 Subject: [PATCH 2617/4122] powerpc/pasemi: Add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs. Signed-off-by: Xiu Jianfeng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220911084344.196353-1-xiujianfeng@huawei.com --- arch/powerpc/platforms/pasemi/gpio_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index bf300167ad6b..913b77b92cea 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -294,7 +294,7 @@ static struct platform_driver gpio_mdio_driver = }, }; -static int gpio_mdio_init(void) +static int __init gpio_mdio_init(void) { struct device_node *np; @@ -314,7 +314,7 @@ static int gpio_mdio_init(void) } module_init(gpio_mdio_init); -static void gpio_mdio_exit(void) +static void __exit gpio_mdio_exit(void) { platform_driver_unregister(&gpio_mdio_driver); if (gpio_regs) From 2223552256dfc48435e0699dbe1e9b8d2cd56b06 Mon Sep 17 00:00:00 2001 From: Disha Goel Date: Fri, 16 Sep 2022 16:27:35 +0530 Subject: [PATCH 2618/4122] powerpc/kvm: Remove unused macros from asm-offset The kvm code was refactored to convert some of kvm assembly routines to C. This includes commits which moved code path for the kvm guest entry/exit for p7/8 from aseembly to C. As part of the code changes, usage of some of the macros were removed. But definitions still exist in the assembly files. Commits are listed below: Commit 2e1ae9cd56f8 ("KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU") Commit 9769a7fd79b6 ("KVM: PPC: Book3S HV: Remove radix guest support from P7/8 path") Commit fae5c9f3664b ("KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path") Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") Many of the asm-offset macro definitions were missed to remove. Patch fixes by removing the unused macros. Signed-off-by: Disha Goel Reviewed-by: Nicholas Piggin Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916105736.268153-2-disgoel@linux.vnet.ibm.com --- arch/powerpc/kernel/asm-offsets.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4ce2a4aa3985..b4b661f631f5 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -418,21 +418,18 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets); OFFSET(KVM_SDR1, kvm, arch.sdr1); OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); - OFFSET(KVM_RADIX, kvm, arch.radix); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); - OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested); OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); #endif @@ -449,16 +446,12 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); - OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); - OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); - OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); - OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); @@ -486,8 +479,6 @@ int main(void) OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr); OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop); OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); - OFFSET(VCPU_TID, kvm_vcpu, arch.tid); - OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); @@ -582,8 +573,6 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); - HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -594,9 +583,6 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); - HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); - HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); - HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); @@ -672,17 +658,6 @@ int main(void) OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif -#ifdef CONFIG_KVM_XICS - DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu, - arch.xive_saved_state)); - DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, - arch.xive_cam_word)); - DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); - DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); - DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); - DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); -#endif - #ifdef CONFIG_KVM_EXIT_TIMING OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); From 4ac9d3187cc7ccba25f76a3faef3e08a366f77b9 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 16 Sep 2022 16:27:36 +0530 Subject: [PATCH 2619/4122] powerpc/kvm: Remove unused references for MMCR3/SIER2/SIER3 registers Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") removed the PMU save/restore functions from assembly code and implemented these functions in C, for power9 and later platforms. After the code refactoring, Performance Monitoring Unit (PMU) registers became part of "p9_host_os_sprs" structure and now this structure is used to save/restore pmu host registers, for power9 and later platfroms. But we still have old unused registers references. Patch removes unused host_mmcr references for Monitor Mode Control Register 3 (MMCR3)/ Sampled Instruction Event Register 2 (SIER2)/ SIER3 registers from "struct kvmppc_host_state". Fixes: 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") Signed-off-by: Kajol Jain Reviewed-by: Nicholas Piggin Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916105736.268153-3-disgoel@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_book3s_asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index c8882d9b86c2..a36797938620 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -105,7 +105,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ + u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; From 1c4a4a4c8410be4a231a58b23e7a30923ff954ac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 16 Sep 2022 15:15:04 +0100 Subject: [PATCH 2620/4122] powerpc/xmon: Fix -Wswitch-unreachable warning in bpt_cmds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with automatic stack variable initialization, GCC 12 complains about variables defined outside of switch case statements. Move the variable into the case that uses it, which silences the warning: arch/powerpc/xmon/xmon.c: In function ‘bpt_cmds’: arch/powerpc/xmon/xmon.c:1529:13: warning: statement will never be executed [-Wswitch-unreachable] 1529 | int mode; | ^~~~ Fixes: 09b6c1129f89 ("powerpc/xmon: Fix compile error with PPC_8xx=y") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YySE6FHiOcbWWR+9@work --- arch/powerpc/xmon/xmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f51c882bf902..e34d7809f6c9 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1525,9 +1525,9 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { - static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; - int mode; - case 'd': /* bd - hardware data breakpoint */ + case 'd': { /* bd - hardware data breakpoint */ + static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; + int mode; if (xmon_is_ro) { printf(xmon_ro_msg); break; @@ -1560,6 +1560,7 @@ bpt_cmds(void) force_enable_xmon(); break; + } case 'i': /* bi - hardware instr breakpoint */ if (xmon_is_ro) { From 1892e87a3e9170146549779622cb844582f1e2bb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Sep 2022 23:03:25 -0700 Subject: [PATCH 2621/4122] powerpc/warp: switch to using gpiod API This switches PIKA Warp away from legacy gpio API and to newer gpiod API, so that we can eventually deprecate the former. Because LEDs are normally driven by leds-gpio driver, but the platform code also wants to access the LEDs during thermal shutdown, and gpiod API does not allow locating GPIO without requesting it, the platform code is now responsible for locating GPIOs through device tree and requesting them. It then constructs platform data for leds-gpio platform device and registers it. This allows platform code to retain access to LED GPIO descriptors and use them when needed. Signed-off-by: Dmitry Torokhov Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YzKSLcrYmV5kjyeX@google.com --- arch/powerpc/boot/dts/warp.dts | 4 +- arch/powerpc/platforms/44x/warp.c | 105 ++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index b4f32740870e..aa62d08e97c2 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -258,14 +258,12 @@ }; power-leds { - compatible = "gpio-leds"; + compatible = "warp-power-leds"; green { gpios = <&GPIO1 0 0>; - default-state = "keep"; }; red { gpios = <&GPIO1 1 0>; - default-state = "keep"; }; }; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index f03432ef010b..cefa313c09f0 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -5,15 +5,17 @@ * Copyright (c) 2008-2009 PIKA Technologies * Sean MacLennan */ +#include #include #include #include +#include #include #include #include #include #include -#include +#include #include #include @@ -92,8 +94,6 @@ static int __init warp_post_info(void) static LIST_HEAD(dtm_shutdown_list); static void __iomem *dtm_fpga; -static unsigned green_led, red_led; - struct dtm_shutdown { struct list_head list; @@ -101,7 +101,6 @@ struct dtm_shutdown { void *arg; }; - int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) { struct dtm_shutdown *shutdown; @@ -132,6 +131,35 @@ int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) return -EINVAL; } +#define WARP_GREEN_LED 0 +#define WARP_RED_LED 1 + +static struct gpio_led warp_gpio_led_pins[] = { + [WARP_GREEN_LED] = { + .name = "green", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, + [WARP_RED_LED] = { + .name = "red", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, +}; + +static struct gpio_led_platform_data warp_gpio_led_data = { + .leds = warp_gpio_led_pins, + .num_leds = ARRAY_SIZE(warp_gpio_led_pins), +}; + +static struct platform_device warp_gpio_leds = { + .name = "leds-gpio", + .id = -1, + .dev = { + .platform_data = &warp_gpio_led_data, + }, +}; + static irqreturn_t temp_isr(int irq, void *context) { struct dtm_shutdown *shutdown; @@ -139,7 +167,7 @@ static irqreturn_t temp_isr(int irq, void *context) local_irq_disable(); - gpio_set_value(green_led, 0); + gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0); /* Run through the shutdown list. */ list_for_each_entry(shutdown, &dtm_shutdown_list, list) @@ -153,7 +181,7 @@ static irqreturn_t temp_isr(int irq, void *context) out_be32(dtm_fpga + 0x14, reset); } - gpio_set_value(red_led, value); + gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value); value ^= 1; mdelay(500); } @@ -162,25 +190,78 @@ static irqreturn_t temp_isr(int irq, void *context) return IRQ_HANDLED; } +/* + * Because green and red power LEDs are normally driven by leds-gpio driver, + * but in case of critical temperature shutdown we want to drive them + * ourselves, we acquire both and then create leds-gpio platform device + * ourselves, instead of doing it through device tree. This way we can still + * keep access to the gpios and use them when needed. + */ static int pika_setup_leds(void) { struct device_node *np, *child; + struct gpio_desc *gpio; + struct gpio_led *led; + int led_count = 0; + int error; + int i; - np = of_find_compatible_node(NULL, NULL, "gpio-leds"); + np = of_find_compatible_node(NULL, NULL, "warp-power-leds"); if (!np) { printk(KERN_ERR __FILE__ ": Unable to find leds\n"); return -ENOENT; } - for_each_child_of_node(np, child) - if (of_node_name_eq(child, "green")) - green_led = of_get_gpio(child, 0); - else if (of_node_name_eq(child, "red")) - red_led = of_get_gpio(child, 0); + for_each_child_of_node(np, child) { + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + + if (!of_node_name_eq(child, led->name)) + continue; + + if (led->gpiod) { + printk(KERN_ERR __FILE__ ": %s led has already been defined\n", + led->name); + continue; + } + + gpio = fwnode_gpiod_get_index(of_fwnode_handle(child), + NULL, 0, GPIOD_ASIS, + led->name); + error = PTR_ERR_OR_ZERO(gpio); + if (error) { + printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n", + led->name, error); + of_node_put(child); + goto err_cleanup_pins; + } + + led->gpiod = gpio; + led_count++; + } + } of_node_put(np); + /* Skip device registration if no leds have been defined */ + if (led_count) { + error = platform_device_register(&warp_gpio_leds); + if (error) { + printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n", + error); + goto err_cleanup_pins; + } + } + return 0; + +err_cleanup_pins: + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + gpiod_put(led->gpiod); + led->gpiod = NULL; + } + return error; } static void pika_setup_critical_temp(struct device_node *np, From 4e87bd14e501030619d1bad29b3ec1f947f84fc4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 27 Sep 2022 12:23:58 -0700 Subject: [PATCH 2622/4122] powerpc/sgy_cts1000: convert to using gpiod API and facelift This patch converts the driver to newer gpiod API, and away from OF-specific legacy gpio API that we want to stop using. While at it, let's address a few more issues: - switch to using dev_info()/pr_info() and friends - cancel work when unbinding the driver Note that the original code handled halt GPIO polarity incorrectly: in halt callback, when line polarity is "low" it would set trigger to "1" and drive halt line high, which is counter to the annotation. gpiod API will drive such line low. However I do not see any DTSes in mainline that have a DT node with "sgy,gpio-halt" compatible. Signed-off-by: Dmitry Torokhov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YzNNznewTyCJiGFz@google.com --- arch/powerpc/platforms/85xx/sgy_cts1000.c | 132 +++++++++------------- 1 file changed, 53 insertions(+), 79 deletions(-) diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index e14d1b74d4e4..751395cbf022 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -7,10 +7,13 @@ * Copyright 2012 by Servergy, Inc. */ +#define pr_fmt(fmt) "gpio-halt: " fmt + +#include #include #include +#include #include -#include #include #include #include @@ -18,7 +21,8 @@ #include -static struct device_node *halt_node; +static struct gpio_desc *halt_gpio; +static int halt_irq; static const struct of_device_id child_match[] = { { @@ -36,23 +40,10 @@ static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn); static void __noreturn gpio_halt_cb(void) { - enum of_gpio_flags flags; - int trigger, gpio; - - if (!halt_node) - panic("No reset GPIO information was provided in DT\n"); - - gpio = of_get_gpio_flags(halt_node, 0, &flags); - - if (!gpio_is_valid(gpio)) - panic("Provided GPIO is invalid\n"); - - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - printk(KERN_INFO "gpio-halt: triggering GPIO.\n"); + pr_info("triggering GPIO.\n"); /* Probably wont return */ - gpio_set_value(gpio, trigger); + gpiod_set_value(halt_gpio, 1); panic("Halt failed\n"); } @@ -61,95 +52,78 @@ static void __noreturn gpio_halt_cb(void) * to handle the shutdown/poweroff. */ static irqreturn_t gpio_halt_irq(int irq, void *__data) { - printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n"); + struct platform_device *pdev = __data; + + dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n"); schedule_work(&gpio_halt_wq); return IRQ_HANDLED; }; -static int gpio_halt_probe(struct platform_device *pdev) +static int __gpio_halt_probe(struct platform_device *pdev, + struct device_node *halt_node) { - enum of_gpio_flags flags; - struct device_node *node = pdev->dev.of_node; - struct device_node *child_node; - int gpio, err, irq; - int trigger; + int err; - if (!node) - return -ENODEV; - - /* If there's no matching child, this isn't really an error */ - child_node = of_find_matching_node(node, child_match); - if (!child_node) - return 0; - - /* Technically we could just read the first one, but punish - * DT writers for invalid form. */ - if (of_gpio_count(child_node) != 1) { - err = -EINVAL; - goto err_put; - } - - /* Get the gpio number relative to the dynamic base. */ - gpio = of_get_gpio_flags(child_node, 0, &flags); - if (!gpio_is_valid(gpio)) { - err = -EINVAL; - goto err_put; - } - - err = gpio_request(gpio, "gpio-halt"); + halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node), + NULL, 0, GPIOD_OUT_LOW, "gpio-halt"); + err = PTR_ERR_OR_ZERO(halt_gpio); if (err) { - printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n", - gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err); + return err; } - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - gpio_direction_output(gpio, !trigger); - /* Now get the IRQ which tells us when the power button is hit */ - irq = irq_of_parse_and_map(child_node, 0); - err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, "gpio-halt", child_node); + halt_irq = irq_of_parse_and_map(halt_node, 0); + err = request_irq(halt_irq, gpio_halt_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "gpio-halt", pdev); if (err) { - printk(KERN_ERR "gpio-halt: error requesting IRQ %d for " - "GPIO %d.\n", irq, gpio); - gpio_free(gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request IRQ %d: %d\n", + halt_irq, err); + gpiod_put(halt_gpio); + halt_gpio = NULL; + return err; } /* Register our halt function */ ppc_md.halt = gpio_halt_cb; pm_power_off = gpio_halt_cb; - printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" - " irq).\n", gpio, trigger, irq); + dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq); - halt_node = child_node; return 0; +} -err_put: - of_node_put(child_node); - return err; +static int gpio_halt_probe(struct platform_device *pdev) +{ + struct device_node *halt_node; + int ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + /* If there's no matching child, this isn't really an error */ + halt_node = of_find_matching_node(pdev->dev.of_node, child_match); + if (!halt_node) + return -ENODEV; + + ret = __gpio_halt_probe(pdev, halt_node); + of_node_put(halt_node); + + return ret; } static int gpio_halt_remove(struct platform_device *pdev) { - if (halt_node) { - int gpio = of_get_gpio(halt_node, 0); - int irq = irq_of_parse_and_map(halt_node, 0); + free_irq(halt_irq, pdev); + cancel_work_sync(&gpio_halt_wq); - free_irq(irq, halt_node); + ppc_md.halt = NULL; + pm_power_off = NULL; - ppc_md.halt = NULL; - pm_power_off = NULL; - - gpio_free(gpio); - - of_node_put(halt_node); - halt_node = NULL; - } + gpiod_put(halt_gpio); + halt_gpio = NULL; return 0; } From f2c45962cc618c12f69fd46e6ebc20b9cd7f15ac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Sep 2022 08:29:00 +0200 Subject: [PATCH 2623/4122] powerpc/8xx: Simplify pte_update() with 16k pages While looking at code generated for code patching, I saw that pte_clear generated: 2d8: 38 a0 00 00 li r5,0 2dc: 38 e0 10 00 li r7,4096 2e0: 39 00 20 00 li r8,8192 2e4: 39 40 30 00 li r10,12288 2e8: 90 a9 00 00 stw r5,0(r9) 2ec: 90 e9 00 04 stw r7,4(r9) 2f0: 91 09 00 08 stw r8,8(r9) 2f4: 91 49 00 0c stw r10,12(r9) With 16k pages, only the first entry is used by the kernel, so no need to adapt the address of other entries. Only duplicate the first entry for hardware. Now it is: 2cc: 39 40 00 00 li r10,0 2d0: 91 49 00 00 stw r10,0(r9) 2d4: 91 49 00 04 stw r10,4(r9) 2d8: 91 49 00 08 stw r10,8(r9) 2dc: 91 49 00 0c stw r10,12(r9) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/65f76300de07091a59a042a3db2d0ce9b939a05c.1664346532.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0d40b33184eb..0e861e59b769 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -256,8 +256,14 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p num = number_of_cells_per_pte(pmd, new, huge); - for (i = 0; i < num; i++, entry++, new += SZ_4K) - *entry = new; + for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { + *entry++ = new; + if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { + *entry++ = new; + *entry++ = new; + *entry++ = new; + } + } return old; } From 0b4721815c5328e08c3acdee4a53890e012d830b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Sep 2022 08:29:22 +0200 Subject: [PATCH 2624/4122] powerpc/8xx: Reverse order entries are written by __set_pte_at() At the time being, with 16k pages __set_pte_at() writes table entries in reverse order: 294: 91 49 00 0c stw r10,12(r9) 298: 91 49 00 08 stw r10,8(r9) 29c: 91 49 00 04 stw r10,4(r9) 2a0: 91 49 00 00 stw r10,0(r9) Allthough there should be no impact at all as it stays in a single cacheline, reverse the writing in a more natural order. 288: 91 49 00 0c stw r10,0(r9) 28c: 91 49 00 08 stw r10,4(r9) 290: 91 49 00 04 stw r10,8(r9) 294: 91 49 00 00 stw r10,12(r9) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/67c3b5d44edfec054234ea9b4d05fc4b4f7f8a0e.1664346554.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index d9067dfc531c..69c3a050a3d8 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -183,7 +183,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * cases, and 32-bit non-hash with 32-bit PTEs. */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - ptep->pte = ptep->pte1 = ptep->pte2 = ptep->pte3 = pte_val(pte); + ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte); #else *ptep = pte; #endif From 5825603f67bc5ff445a1847302884154f0afa627 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 30 Sep 2022 16:20:12 +0930 Subject: [PATCH 2625/4122] powerpc/microwatt: Add litesd This is the register layout of the litesd peripheral for the fusesoc based Microwatt SoC. It requires a description of the system clock, which is hardcoded to 100MHz. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220930065012.2860577-1-joel@jms.id.au --- arch/powerpc/boot/dts/microwatt.dts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index b69db1d275cd..269e930b3b0b 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -21,6 +21,14 @@ reg = <0x00000000 0x00000000 0x00000000 0x10000000>; }; + clocks { + sys_clk: litex_sys_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <100000000>; + }; + }; + cpus { #size-cells = <0x00>; #address-cells = <0x01>; @@ -141,6 +149,20 @@ litex,slot-size = <0x800>; interrupts = <0x11 0x1>; }; + + mmc@8040000 { + compatible = "litex,mmc"; + reg = <0x8042800 0x800 + 0x8041000 0x800 + 0x8040800 0x800 + 0x8042000 0x800 + 0x8041800 0x800>; + reg-names = "phy", "core", "reader", "writer", "irq"; + bus-width = <4>; + interrupts = <0x13 1>; + cap-sd-highspeed; + clocks = <&sys_clk>; + }; }; chosen { From 3e65412709293d5fb65249408e8e801b23b72635 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Oct 2022 14:20:18 +1100 Subject: [PATCH 2626/4122] powerpc: Make instruction dump work with scripts/decodecode Matt reported that scripts/decodecode doesn't work for the instruction dump in the powerpc oops output. Although there are scripts around that can decode it, it would be preferable if the standard in-tree script worked. All other arches prefix the instruction dump with "Code:", and that's what the script looks for, so use that. The script then works as expected: $ CROSS_COMPILE=powerpc64le-linux-gnu- ./scripts/decodecode Code: fbc1fff0 f821ffc1 7c7d1b78 7c9c2378 ebc30028 7fdff378 48000018 60000000 60000000 ebff0008 7c3ef840 41820048 <815f0060> e93f0000 5529077c 7d295378 ^D All code ======== 0: f0 ff c1 fb std r30,-16(r1) 4: c1 ff 21 f8 stdu r1,-64(r1) 8: 78 1b 7d 7c mr r29,r3 ... Note that the script doesn't cope well with printk timestamps or printk caller info. Reported-by: Matthew Wilcox Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006032019.1128624-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 67da147fe34d..3372b5c21168 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Instruction dump:"); + printk("Code:"); /* * If we were executing with the MMU off for instructions, adjust pc From d90bb7b4fdaff3f2fa68c7af85de2ce9e70189b1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Oct 2022 14:20:19 +1100 Subject: [PATCH 2627/4122] powerpc: Print instruction dump on a single line Although the previous commit made the powerpc instruction dump usable with scripts/decodecode, there are still some problems. Because the dump is split across multiple lines, the script doesn't cope with printk timestamps or caller info. That can be fixed by printing the entire dump on one line, eg: [ 12.016307][ T112] --- interrupt: c00 [ 12.016605][ T112] Code: 4b7aae15 60000000 3d22016e 3c62ffec 39291160 38639bc0 e8890000 4b7aadf9 60000000 4bfffee8 7c0802a6 60000000 <0fe00000> 60420000 3c4c008f 384268a0 [ 12.017655][ T112] ---[ end trace 0000000000000000 ]--- That output can then be piped directly into scripts/decodecode and interpreted correctly. Printing the dump on a single line does produce a very long line, about 173 characters. That is still shorter than x86, which prints nearly 200 characters even without timestamps etc. All consoles I'm aware of will wrap the line if it's too long, so the length should not be a functional problem. If anything it should help on consoles like VGA by using less vertical space. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006032019.1128624-2-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3372b5c21168..e3e1feaa536a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Code:"); + printk("Code: "); /* * If we were executing with the MMU off for instructions, adjust pc @@ -1373,9 +1373,6 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!(i % 8)) - pr_cont("\n"); - if (!__kernel_text_address(pc) || get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); From f985adaf2ff934ec869b32ca1f7f97e2825e3a49 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 6 Oct 2022 20:56:53 +1000 Subject: [PATCH 2628/4122] powerpc: remove the last remnants of cputime_t cputime_t was a core kernel type, removed by commits ed5c8c854f2b..b672592f0221. As explained in commit b672592f0221 ("sched/cputime: Remove generic asm headers"), the final cleanup is for the arch to provide cputime_to_nsec[s](). Commit ade7667a981b ("powerpc: Add cputime_to_nsecs()") did that, but justdidn't remove the then-unused cputime_to_usecs(), cputime_t type, and associated remnants. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006105653.115829-1-npiggin@gmail.com --- arch/powerpc/include/asm/cputime.h | 17 +---------------- arch/powerpc/kernel/time.c | 23 ++--------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 431ae2343022..4961fb38e438 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -21,23 +21,8 @@ #include #include -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new) - #ifdef __KERNEL__ -/* - * Convert cputime <-> microseconds - */ -extern u64 __cputime_usec_factor; - -static inline unsigned long cputime_to_usecs(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_usec_factor); -} - -#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) +#define cputime_to_nsecs(cputime) tb_to_ns(cputime) /* * PPC64 uses PACA which is task independent for storing accounting data while diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a2ab397065c6..d68de3618741 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; -EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); @@ -150,21 +150,6 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); bool tb_invalid; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -/* - * Factor for converting from cputime_t (timebase ticks) to - * microseconds. This is stored as 0.64 fixed-point binary fraction. - */ -u64 __cputime_usec_factor; -EXPORT_SYMBOL(__cputime_usec_factor); - -static void calc_cputime_factors(void) -{ - struct div_result res; - - div128_by_32(1000000, 0, tb_ticks_per_sec, &res); - __cputime_usec_factor = res.result_low; -} - /* * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. @@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } - -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#define calc_cputime_factors() -#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ void __delay(unsigned long loops) { @@ -914,7 +896,6 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - calc_cputime_factors(); /* * Compute scale factor for sched_clock. From 2cb1dfac6f792f9e4a092793215f0d26e9f8d5b2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 Oct 2022 12:49:50 +0200 Subject: [PATCH 2629/4122] powerpc/sysdev: Remove some duplicate prefix in some messages At the beginning of the file, we have: #define pr_fmt(fmt) "xive: " fmt So, there is no need to duplicate "XIVE:" in debug and error messages. For the records, these useless prefix have been added in commit 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7b8b5915a2c7c1616b33e8433ebe0a0bf07070a2.1665312579.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/sysdev/xive/native.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 3925825954bc..19d880ebc5e6 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -535,13 +535,13 @@ static bool __init xive_parse_provisioning(struct device_node *np) static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); + pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) - pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); + pr_err("Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", + pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } From 579aee9fc594af94c242068c011b0233563d4bbf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 10 Oct 2022 16:57:21 +1100 Subject: [PATCH 2630/4122] powerpc: suppress some linker warnings in recent linker versions This is a follow on from commit 0d362be5b142 ("Makefile: link with -z noexecstack --no-warn-rwx-segments") for arch/powerpc/boot to address wanrings like: ld: warning: opal-calls.o: missing .note.GNU-stack section implies executable stack ld: NOTE: This behaviour is deprecated and will be removed in a future version of the linker ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions This fixes issue https://github.com/linuxppc/issues/issues/417 Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221010165721.106267e6@canb.auug.org.au --- arch/powerpc/boot/wrapper | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 5bdd4dd20bbb..a8a87d7667f4 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -215,6 +215,11 @@ ld_version() }' } +ld_is_lld() +{ + ${CROSS}ld -V 2>&1 | grep -q LLD +} + # Do not include PT_INTERP segment when linking pie. Non-pie linking # just ignores this option. LD_VERSION=$(${CROSS}ld --version | ld_version) @@ -223,6 +228,14 @@ if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then nodl="--no-dynamic-linker" fi +# suppress some warnings in recent ld versions +nowarn="-z noexecstack" +if ! ld_is_lld; then + if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then + nowarn="$nowarn --no-warn-rwx-segments" + fi +fi + platformo=$object/"$platform".o lds=$object/zImage.lds ext=strip @@ -504,7 +517,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi From 8b49670f3bb3f10cd4d5a6dca17f5a31b173ecdc Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 17 Oct 2022 11:23:33 +0800 Subject: [PATCH 2631/4122] powerpc/xive: add missing iounmap() in error path in xive_spapr_populate_irq_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If remapping 'data->trig_page' fails, the 'data->eoi_mmio' need be unmapped before returning from xive_spapr_populate_irq_data(). Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Signed-off-by: Yang Yingliang Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221017032333.1852406-1-yangyingliang@huawei.com --- arch/powerpc/sysdev/xive/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index e2c8f93b535b..e45419264391 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -439,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); if (!data->trig_mmio) { + iounmap(data->eoi_mmio); pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); return -ENOMEM; } From 16a3f41ff3322830683d3ccc14d77736829c61bf Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Wed, 19 Oct 2022 14:34:14 +0800 Subject: [PATCH 2632/4122] powerpc/mpic_msgr: fix cast removes address space of expression warnings When build Linux kernel, encounter the following warnings: ./arch/powerpc/sysdev/mpic_msgr.c:230:38: warning: cast removes address space '__iomem' of expression ./arch/powerpc/sysdev/mpic_msgr.c:230:27: warning: incorrect type in assignment (different address spaces) The data type of msgr->mer and msgr->base are 'u32 __iomem *', but converted to 'u32 *' and 'u8 *' directly and cause above warnings, now instead of using a type cast, change the size of the pointer offset to fix these warnings. Signed-off-by: ruanjinjie Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221019063414.3758087-1-ruanjinjie@huawei.com --- arch/powerpc/sysdev/mpic_msgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index a439e33eae06..d75064fb7d12 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -20,7 +20,7 @@ #define MPIC_MSGR_REGISTERS_PER_BLOCK 4 #define MPIC_MSGR_STRIDE 0x10 -#define MPIC_MSGR_MER_OFFSET 0x100 +#define MPIC_MSGR_MER_OFFSET (0x100 / sizeof(u32)) #define MSGR_INUSE 0 #define MSGR_FREE 1 @@ -234,7 +234,7 @@ static int mpic_msgr_probe(struct platform_device *dev) reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i; msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE; - msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET); + msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET; msgr->in_use = MSGR_FREE; msgr->num = i; raw_spin_lock_init(&msgr->lock); From 2fa9482334b0593b7edc371a13c0cca81daaa89e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:57 +0530 Subject: [PATCH 2633/4122] powerpc/kprobes: Remove preempt disable around call to get_kprobe() in arch_prepare_kprobe() arch_prepare_kprobe() is called from register_kprobe() via prepare_kprobe(), or through register_aggr_kprobe(), both with the kprobe_mutex held. Per the comment for get_kprobe(): /* * This routine is called either: * - under the 'kprobe_mutex' - during kprobe_[un]register(). * OR * - with preemption disabled - from architecture specific code. */ As such, there is no need to disable preemption around the call to get_kprobe(). Drop the same. Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1043d06a0affed83a4a46dd29466e72820ee215d.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bd7b1a035459..88f42de681e1 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -158,9 +158,7 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } - preempt_disable(); prev = get_kprobe(p->addr - 1); - preempt_enable_no_resched(); /* * When prev is a ftrace-based kprobe, we don't have an insn, and it From 04ec5d5782fb346c291a05a2efe59483d8ada4c4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:58 +0530 Subject: [PATCH 2634/4122] powerpc/kprobes: Have optimized_callback() use preempt_enable() Similar to x86 commit 2e62024c265aa6 ("kprobes/x86: Use preempt_enable() in optimized_callback()"), change powerpc optprobes to use preempt_enable() rather than preempt_enable_no_resched() since powerpc also removed irq disabling for optprobes in commit f72180cc93a2c6 ("powerpc/kprobes: Do not disable interrupts for optprobes and kprobes_on_ftrace"). Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1885bab182626c33d9bf6421f430abf924c521a5.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/optprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 3b1c2236cbee..004fae2044a3 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op, __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + preempt_enable(); } NOKPROBE_SYMBOL(optimized_callback); From 266b1991a433cd55bb86a933216b3f6762737d47 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:59 +0530 Subject: [PATCH 2635/4122] powerpc/kprobes: Use preempt_enable() rather than the no_resched variant preempt_enable_no_resched() is just the same as preempt_enable() when we are in a irqs disabled context. kprobe_handler() and the post/fault handlers are all called with irqs disabled. As such, convert those to just use preempt_enable(). Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/72639f75fe66f931ec8c2165276ffbfb0fe1006f.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 88f42de681e1..86ca5a61ea9a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -369,7 +369,7 @@ int kprobe_handler(struct pt_regs *regs) if (ret > 0) { restore_previous_kprobe(kcb); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -382,7 +382,7 @@ int kprobe_handler(struct pt_regs *regs) if (p->pre_handler && p->pre_handler(p, regs)) { /* handler changed execution path, so skip ss setup */ reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } @@ -395,7 +395,7 @@ int kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -404,7 +404,7 @@ int kprobe_handler(struct pt_regs *regs) return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } NOKPROBE_SYMBOL(kprobe_handler); @@ -490,7 +490,7 @@ int kprobe_post_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr @@ -529,7 +529,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: From 04757c5e21ea17615b66f45e38f1cab32a7a0654 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Oct 2022 09:45:45 +0100 Subject: [PATCH 2636/4122] selftests/powerpc: Fix spelling mistake "mmaping" -> "mmapping" There is a spelling mistake in a perror message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221021084545.65973-1-colin.i.king@gmail.com --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index bbc05ffc5860..1a70a96f0bfe 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -329,7 +329,7 @@ static int parent(struct shared_info *info, pid_t pid) core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0); if (core == (void *) -1) { - perror("Error mmaping core file"); + perror("Error mmapping core file"); ret = TEST_FAIL; goto out; } From ad8284ead833379fc57d90e50dbae1352b116c2b Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sat, 29 Oct 2022 05:46:43 -0400 Subject: [PATCH 2637/4122] selftests/powerpc: Remove repeated word in comments Remove the repeated word "not" in comments. Signed-off-by: Shaomin Deng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221029094643.5595-1-dengshaomin@cdjrlc.com --- tools/testing/selftests/powerpc/include/pkeys.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 3312cb1b058d..51729d9a7111 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,7 +24,7 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 -/* Older versions of libc do not not define this */ +/* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 #endif From f668027521561d1071ccf54500c82a58a1918b2b Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Mon, 24 Oct 2022 15:13:46 +1100 Subject: [PATCH 2638/4122] powerpc/8xx: Fix warning in hw_breakpoint_handler() In hw_breakpoint_handler(), ea is set by wp_get_instr_detail() except for 8xx, leading the variable to be passed uninitialised to wp_check_constraints(). This is safe as wp_check_constraints() returns early without using ea, so just set it to make the compiler happy. Signed-off-by: Russell Currey Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221024041346.103608-1-ruscur@russell.cc --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8db1a15d7acb..e1b4e70c8fd0 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -646,7 +646,7 @@ int hw_breakpoint_handler(struct die_args *args) ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; - unsigned long ea; + unsigned long ea = 0; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); From afa1cda4097077e37639ca7098c2147e1885b2df Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 31 Oct 2022 02:37:06 -0400 Subject: [PATCH 2639/4122] powerpc/pseries/eeh: Fix some kernel-doc warnings Fixes the following W=1 kernel build warning(s): arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_reset' arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_reset' arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_configure_bridge' arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_configure_bridge' Signed-off-by: Bo Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221031063706.2770-1-liubo03@inspur.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 8e40ccac0f44..ea890037843c 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -154,7 +154,7 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) /** * pseries_eeh_phb_reset - Reset the specified PHB * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * @option: reset option * * Reset the specified PHB/PE @@ -188,7 +188,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in /** * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered From 59dc2d94bc12dac53a5d2368ad97ca24e7cc5682 Mon Sep 17 00:00:00 2001 From: Chen Lifu Date: Thu, 3 Nov 2022 15:01:22 +0800 Subject: [PATCH 2640/4122] powerpc/powermac: Fix symbol not declared warnings 1. ppc_override_l2cr and ppc_override_l2cr_value are only used in l2cr_init() function, remove them and used *l2cr directly. 2. has_l2cache is not used outside of the file, so mark it static and do not initialise statics to 0. Fixes the following warnings: arch/powerpc/platforms/powermac/setup.c:73:5: warning: symbol 'ppc_override_l2cr' was not declared. Should it be static? arch/powerpc/platforms/powermac/setup.c:74:5: warning: symbol 'ppc_override_l2cr_value' was not declared. Should it be static? arch/powerpc/platforms/powermac/setup.c:75:5: warning: symbol 'has_l2cache' was not declared. Should it be static? Signed-off-by: Chen Lifu Reviewed-by: Christophe Leroy [mpe: Unwrap printk string] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221103070122.340773-1-chenlifu@huawei.com --- arch/powerpc/platforms/powermac/setup.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 04daa7f0a03c..4f7ee885a78f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -70,9 +70,7 @@ #undef SHOW_GATWICK_IRQS -int ppc_override_l2cr = 0; -int ppc_override_l2cr_value; -int has_l2cache = 0; +static int has_l2cache; int pmac_newworld; @@ -236,22 +234,16 @@ static void __init l2cr_init(void) const unsigned int *l2cr = of_get_property(np, "l2cr-value", NULL); if (l2cr) { - ppc_override_l2cr = 1; - ppc_override_l2cr_value = *l2cr; _set_L2CR(0); - _set_L2CR(ppc_override_l2cr_value); + _set_L2CR(*l2cr); + pr_info("L2CR overridden (0x%x), backside cache is %s\n", + *l2cr, ((*l2cr) & 0x80000000) ? + "enabled" : "disabled"); } of_node_put(np); break; } } - - if (ppc_override_l2cr) - printk(KERN_INFO "L2CR overridden (0x%x), " - "backside cache is %s\n", - ppc_override_l2cr_value, - (ppc_override_l2cr_value & 0x80000000) - ? "enabled" : "disabled"); } #endif From 2330757e0be0acad88852e211dcd6106390a729b Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:34 -0500 Subject: [PATCH 2641/4122] powerpc/pseries: fix the object owners enum value in plpks driver OS_VAR_LINUX enum in PLPKS driver should be 0x02 instead of 0x01. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-2-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h index c6a291367bb1..275ccd86bfb5 100644 --- a/arch/powerpc/platforms/pseries/plpks.h +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -17,7 +17,7 @@ #define WORLDREADABLE 0x08000000 #define SIGNEDUPDATE 0x01000000 -#define PLPKS_VAR_LINUX 0x01 +#define PLPKS_VAR_LINUX 0x02 #define PLPKS_VAR_COMMON 0x04 struct plpks_var { From af223e1728c448073d1e12fe464bf344310edeba Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:35 -0500 Subject: [PATCH 2642/4122] powerpc/pseries: Fix the H_CALL error code in PLPKS driver PAPR Spec defines H_P1 actually as H_PARAMETER and maps H_ABORTED to a different numerical value. Fix the error codes as per PAPR Specification. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-3-nayna@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 3 +-- arch/powerpc/platforms/pseries/plpks.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8abae463f6c1..95fd7f9485d5 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -79,7 +79,7 @@ #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDED -46 -#define H_P1 -54 +#define H_ABORTED -54 #define H_P2 -55 #define H_P3 -56 #define H_P4 -57 @@ -100,7 +100,6 @@ #define H_COP_HW -74 #define H_STATE -75 #define H_IN_USE -77 -#define H_ABORTED -78 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index f4b5b5a64db3..32ce4d780d8f 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -75,7 +75,7 @@ static int pseries_status_to_err(int rc) case H_FUNCTION: err = -ENXIO; break; - case H_P1: + case H_PARAMETER: case H_P2: case H_P3: case H_P4: From bb8e4c7cb759b90a04f2e94056b50288ff46a0ed Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:36 -0500 Subject: [PATCH 2643/4122] powerpc/pseries: Return -EIO instead of -EINTR for H_ABORTED error Some commands for eg. "cat" might continue to retry on encountering EINTR. This is not expected for original error code H_ABORTED. Map H_ABORTED to more relevant Linux error code EIO. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-4-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 32ce4d780d8f..cbea447122ca 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -111,7 +111,7 @@ static int pseries_status_to_err(int rc) err = -EEXIST; break; case H_ABORTED: - err = -EINTR; + err = -EIO; break; default: err = -EINVAL; From 8888ea772972323362660e9a1339175294664a6c Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:37 -0500 Subject: [PATCH 2644/4122] powerpc/pseries: cleanup error logs in plpks driver Logging H_CALL return codes in PLPKS driver are easy to confuse with Linux error codes. Let the caller of the function log the converted linux error code. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-5-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index cbea447122ca..72d9debf18c0 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -312,10 +312,6 @@ int plpks_write_var(struct plpks_var var) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to write variable %s for component %s with error %d\n", - var.name, var.component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -350,10 +346,6 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to remove variable %s for component %s with error %d\n", - vname.name, component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -395,8 +387,6 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) maxobjsize); if (rc != H_SUCCESS) { - pr_err("Failed to read variable %s for component %s with error %d\n", - var->name, var->component, rc); rc = pseries_status_to_err(rc); goto out_free_output; } From 212dd5cfbee7815f3c665a51c501701edb881599 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:38 -0500 Subject: [PATCH 2645/4122] powerpc/pseries: replace kmalloc with kzalloc in PLPKS driver Replace kmalloc with kzalloc in construct_auth() function to default initialize structure with zeroes. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-6-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 72d9debf18c0..e8c02735b702 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer) if (consumer > PKS_OS_OWNER) return ERR_PTR(-EINVAL); - auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); if (!auth) return ERR_PTR(-ENOMEM); auth->version = 1; auth->consumer = consumer; - auth->rsvd0 = 0; - auth->rsvd1 = 0; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { - auth->passwordlength = 0; + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) return auth; - } memcpy(auth->password, ospassword, ospasswordlength); From 1f622f3f80cbf8999ff5955a2fcfbd801a1f32e0 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:39 -0500 Subject: [PATCH 2646/4122] powerpc/pseries: fix plpks_read_var() code for different consumers Even though plpks_read_var() is currently called to read variables owned by different consumers, it internally supports only OS consumer. Fix plpks_read_var() to handle different consumers correctly. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-7-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 28 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index e8c02735b702..4edd1585e245 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -354,22 +354,24 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; struct plpks_auth *auth; - struct label *label; + struct label *label = NULL; u8 *output; int rc; if (var->namelen > MAX_NAME_SIZE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); - label = construct_label(var->component, var->os, var->name, - var->namelen); - if (IS_ERR(label)) { - rc = PTR_ERR(label); - goto out_free_auth; + if (consumer == PKS_OS_OWNER) { + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } } output = kzalloc(maxobjsize, GFP_KERNEL); @@ -378,9 +380,15 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_label; } - rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), - virt_to_phys(label), label->size, virt_to_phys(output), - maxobjsize); + if (consumer == PKS_OS_OWNER) + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + else + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(var->name), var->namelen, virt_to_phys(output), + maxobjsize); + if (rc != H_SUCCESS) { rc = pseries_status_to_err(rc); From a9ffb8ee7b65a468474d6a2be7e9cca4b8f8ea5f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Nov 2022 17:40:29 +0800 Subject: [PATCH 2647/4122] powerpc: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the related file to use "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl arch/powerpc` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Signed-off-by: Tiezhu Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1668764429-11540-1-git-send-email-yangtiezhu@loongson.cn --- arch/powerpc/boot/wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index a8a87d7667f4..af04cea82b94 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -594,7 +594,7 @@ ps3) # reached, then enter the system reset vector of the partially decompressed # image. No warning is issued. rm -f "$odir"/{otheros,otheros-too-big}.bld - size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1) + size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1) bld="otheros.bld" if [ $size -gt $((0x1000000)) ]; then bld="otheros-too-big.bld" From 15792642db6946890416a6d1616b03ab25c26fa7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:54 -0800 Subject: [PATCH 2648/4122] perf stat: Fix cgroup display in JSON output It missed the 'else' keyword after checking json output mode. Fixes: 41cb875242e71bf1 ("perf stat: Split print_cgroup() function") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f5501760ff2e..46e90f0bb423 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -175,7 +175,7 @@ static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) if (config->json_output) print_cgroup_json(config, cgrp_name); - if (config->csv_output) + else if (config->csv_output) print_cgroup_csv(config, cgrp_name); else print_cgroup_std(config, cgrp_name); From 6d74ed369d4342bb7d4fecbc1cde6061b5bb5604 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:55 -0800 Subject: [PATCH 2649/4122] perf stat: Move summary prefix printing logic in CSV output It matches to the prefix (interval timestamp), so better to have them together. No functional change intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 46e90f0bb423..d86f2f8e020d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -713,11 +713,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nl = config->metric_only ? new_line_metric : new_line_std; } - if (!config->no_csv_summary && config->csv_output && - config->summary && !config->interval && !config->metric_only) { - fprintf(config->output, "%16s%s", "summary", config->csv_sep); - } - if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { pm(config, &os, NULL, "", "", 0); @@ -828,8 +823,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - if (prefix && !metric_only) - fprintf(output, "%s", prefix); + if (!metric_only) { + if (prefix) + fprintf(output, "%s", prefix); + else if (config->summary && config->csv_output && + !config->no_csv_summary && !config->interval) + fprintf(output, "%16s%s", "summary", config->csv_sep); + } uval = val * counter->scale; From 8e55ae24c08fd5bf39b632df72122a7c2591c03c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:56 -0800 Subject: [PATCH 2650/4122] perf stat: Do not align time prefix in CSV output We don't care about the alignment in the CSV output as it's intended for machine processing. Let's get rid of it to make the output more compact. Before: # perf stat -a --summary -I 1 -x, true 0.001149309,219.20,msec,cpu-clock,219322251,100.00,219.200,CPUs utilized 0.001149309,144,,context-switches,219241902,100.00,656.935,/sec 0.001149309,38,,cpu-migrations,219173705,100.00,173.358,/sec 0.001149309,61,,page-faults,219093635,100.00,278.285,/sec 0.001149309,10679310,,cycles,218746228,100.00,0.049,GHz 0.001149309,6288296,,instructions,218589869,100.00,0.59,insn per cycle 0.001149309,1386904,,branches,218428851,100.00,6.327,M/sec 0.001149309,56863,,branch-misses,218219951,100.00,4.10,of all branches summary,219.20,msec,cpu-clock,219322251,100.00,20.025,CPUs utilized summary,144,,context-switches,219241902,100.00,656.935,/sec summary,38,,cpu-migrations,219173705,100.00,173.358,/sec summary,61,,page-faults,219093635,100.00,278.285,/sec summary,10679310,,cycles,218746228,100.00,0.049,GHz summary,6288296,,instructions,218589869,100.00,0.59,insn per cycle summary,1386904,,branches,218428851,100.00,6.327,M/sec summary,56863,,branch-misses,218219951,100.00,4.10,of all branches After: 0.001148449,224.75,msec,cpu-clock,224870589,100.00,224.747,CPUs utilized 0.001148449,176,,context-switches,224775564,100.00,783.103,/sec 0.001148449,38,,cpu-migrations,224707428,100.00,169.079,/sec 0.001148449,61,,page-faults,224629326,100.00,271.416,/sec 0.001148449,12172071,,cycles,224266368,100.00,0.054,GHz 0.001148449,6901907,,instructions,224108764,100.00,0.57,insn per cycle 0.001148449,1515655,,branches,223946693,100.00,6.744,M/sec 0.001148449,70027,,branch-misses,223735385,100.00,4.62,of all branches summary,224.75,msec,cpu-clock,224870589,100.00,21.066,CPUs utilized summary,176,,context-switches,224775564,100.00,783.103,/sec summary,38,,cpu-migrations,224707428,100.00,169.079,/sec summary,61,,page-faults,224629326,100.00,271.416,/sec summary,12172071,,cycles,224266368,100.00,0.054,GHz summary,6901907,,instructions,224108764,100.00,0.57,insn per cycle summary,1515655,,branches,223946693,100.00,6.744,M/sec summary,70027,,branch-misses,223735385,100.00,4.62,of all branches Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d86f2f8e020d..15c88b9b5aa3 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -828,7 +828,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); else if (config->summary && config->csv_output && !config->no_csv_summary && !config->interval) - fprintf(output, "%16s%s", "summary", config->csv_sep); + fprintf(output, "%s%s", "summary", config->csv_sep); } uval = val * counter->scale; @@ -1078,9 +1078,12 @@ static void prepare_interval(struct perf_stat_config *config, if (config->iostat_run) return; - if (!config->json_output) - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, + if (config->csv_output) + sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); + else if (!config->json_output) + sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec, + ts->tv_nsec); else if (!config->metric_only) sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) ts->tv_sec, ts->tv_nsec); From a7ec1dd2d744208ca814ebde8c97f94e041625ef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:57 -0800 Subject: [PATCH 2651/4122] perf stat: Use scnprintf() in prepare_interval() It should not use sprintf() anymore. Let's pass the buffer size and use the safer scnprintf() instead. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 15c88b9b5aa3..744b7a40f59a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1073,23 +1073,23 @@ static void print_metric_headers(struct perf_stat_config *config, } static void prepare_interval(struct perf_stat_config *config, - char *prefix, struct timespec *ts) + char *prefix, size_t len, struct timespec *ts) { if (config->iostat_run) return; if (config->csv_output) - sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec, - ts->tv_nsec, config->csv_sep); + scnprintf(prefix, len, "%lu.%09lu%s", + (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); else if (!config->json_output) - sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec, - ts->tv_nsec); + scnprintf(prefix, len, "%6lu.%09lu ", + (unsigned long) ts->tv_sec, ts->tv_nsec); else if (!config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) - ts->tv_sec, ts->tv_nsec); + scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ", + (unsigned long) ts->tv_sec, ts->tv_nsec); else - sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) - ts->tv_sec, ts->tv_nsec); + scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}", + (unsigned long) ts->tv_sec, ts->tv_nsec); } static void print_header_interval_std(struct perf_stat_config *config, @@ -1390,7 +1390,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (interval) { prefix = buf; - prepare_interval(config, prefix, ts); + prepare_interval(config, buf, sizeof(buf), ts); } print_header(config, _target, evlist, argc, argv); From f123b2d84ecec9a3c551e745f43d36bba1d69e1c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:58 -0800 Subject: [PATCH 2652/4122] perf stat: Remove prefix argument in print_metric_headers() It always passes a whitespace to the function, thus we can just add it to the function body. Furthermore, it's only used in the normal output mode. Well, actually CSV used it but it doesn't need to since we don't care about the indentation or alignment in the CSV output. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 744b7a40f59a..deed6ccf072f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -996,10 +996,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } static void print_metric_headers_std(struct perf_stat_config *config, - const char *prefix, bool no_indent) + bool no_indent) { - if (prefix) - fprintf(config->output, "%s", prefix); + fputc(' ', config->output); if (!no_indent) { int len = aggr_header_lens[config->aggr_mode]; @@ -1012,11 +1011,8 @@ static void print_metric_headers_std(struct perf_stat_config *config, } static void print_metric_headers_csv(struct perf_stat_config *config, - const char *prefix, bool no_indent __maybe_unused) { - if (prefix) - fprintf(config->output, "%s", prefix); if (config->interval) fputs("time,", config->output); if (!config->iostat_run) @@ -1024,7 +1020,6 @@ static void print_metric_headers_csv(struct perf_stat_config *config, } static void print_metric_headers_json(struct perf_stat_config *config, - const char *prefix __maybe_unused, bool no_indent __maybe_unused) { if (config->interval) @@ -1032,8 +1027,7 @@ static void print_metric_headers_json(struct perf_stat_config *config, } static void print_metric_headers(struct perf_stat_config *config, - struct evlist *evlist, - const char *prefix, bool no_indent) + struct evlist *evlist, bool no_indent) { struct evsel *counter; struct outstate os = { @@ -1047,11 +1041,11 @@ static void print_metric_headers(struct perf_stat_config *config, }; if (config->json_output) - print_metric_headers_json(config, prefix, no_indent); + print_metric_headers_json(config, no_indent); else if (config->csv_output) - print_metric_headers_csv(config, prefix, no_indent); + print_metric_headers_csv(config, no_indent); else - print_metric_headers_std(config, prefix, no_indent); + print_metric_headers_std(config, no_indent); if (config->iostat_run) iostat_print_header_prefix(config); @@ -1132,7 +1126,7 @@ static void print_header_interval_std(struct perf_stat_config *config, } if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); else fprintf(output, " %*s %*s events\n", COUNTS_LEN, "counts", config->unit_width, "unit"); @@ -1168,7 +1162,7 @@ static void print_header_std(struct perf_stat_config *config, fprintf(output, ":\n\n"); if (config->metric_only) - print_metric_headers(config, evlist, " ", false); + print_metric_headers(config, evlist, false); } static void print_header_csv(struct perf_stat_config *config, @@ -1178,7 +1172,7 @@ static void print_header_csv(struct perf_stat_config *config, const char **argv __maybe_unused) { if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); } static void print_header_json(struct perf_stat_config *config, struct target *_target __maybe_unused, @@ -1187,7 +1181,7 @@ static void print_header_json(struct perf_stat_config *config, const char **argv __maybe_unused) { if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); } static void print_header(struct perf_stat_config *config, From ce551ec923445b821893bddfb13c116d7e8fe454 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:59 -0800 Subject: [PATCH 2653/4122] perf stat: Remove metric_only argument in print_counter_aggrdata() It already passes the stat_config argument, then it can find the value in the config. No need to pass it separately. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index deed6ccf072f..b8432c0a0ec3 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix, bool metric_only) + char *prefix) { FILE *output = config->output; u64 ena, run, val; @@ -813,6 +813,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct perf_stat_aggr *aggr = &ps->aggr[s]; struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; + bool metric_only = config->metric_only; if (counter->supported && aggr->nr == 0) return; @@ -875,7 +876,6 @@ static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - bool metric_only = config->metric_only; struct evsel *counter; int s; @@ -893,8 +893,7 @@ static void print_aggr(struct perf_stat_config *config, if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } print_metric_end(config); } @@ -904,7 +903,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - bool metric_only = config->metric_only; struct evsel *counter, *evsel; struct cgroup *cgrp = NULL; int s; @@ -928,8 +926,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, if (counter->cgrp != cgrp) continue; - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } print_metric_end(config); } @@ -939,7 +936,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - bool metric_only = config->metric_only; int s; /* AGGR_THREAD doesn't have config->aggr_get_id */ @@ -950,8 +946,7 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } } @@ -1339,7 +1334,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - print_counter_aggrdata(config, counter, s, prefix, metric_only); + print_counter_aggrdata(config, counter, s, prefix); core_map->map[c++] = core_id; } From 991991ab99635d9e368f9671fa8c30ec1113042c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:00 -0800 Subject: [PATCH 2654/4122] perf stat: Pass const char *prefix to display routines This is a minor cleanup and preparation for the later change. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/iostat.h | 2 +- tools/perf/util/stat-display.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h index 23c1c46a331a..c22688f87cb2 100644 --- a/tools/perf/util/iostat.h +++ b/tools/perf/util/iostat.h @@ -28,7 +28,7 @@ enum iostat_mode_t { extern enum iostat_mode_t iostat_mode; -typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *); +typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *); int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); int iostat_parse(const struct option *opt, const char *str, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index b8432c0a0ec3..d2894a519d61 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -675,7 +675,7 @@ static bool is_mixed_hw_group(struct evsel *counter) static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise, + const char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; @@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix) + const char *prefix) { FILE *output = config->output; u64 ena, run, val; @@ -843,7 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - char *prefix, int aggr_idx, + const char *prefix, int aggr_idx, struct cgroup *cgrp) { struct perf_stat_aggr *aggr; @@ -874,7 +874,7 @@ static void print_metric_end(struct perf_stat_config *config) static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct evsel *counter; int s; @@ -901,7 +901,7 @@ static void print_aggr(struct perf_stat_config *config, static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct evsel *counter, *evsel; struct cgroup *cgrp = NULL; @@ -934,7 +934,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, } static void print_counter(struct perf_stat_config *config, - struct evsel *counter, char *prefix) + struct evsel *counter, const char *prefix) { int s; @@ -952,7 +952,7 @@ static void print_counter(struct perf_stat_config *config, static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { int all_idx; struct perf_cpu cpu; @@ -1301,7 +1301,7 @@ static void print_footer(struct perf_stat_config *config) } static void print_percore(struct perf_stat_config *config, - struct evsel *counter, char *prefix) + struct evsel *counter, const char *prefix) { bool metric_only = config->metric_only; FILE *output = config->output; @@ -1345,7 +1345,7 @@ static void print_percore(struct perf_stat_config *config, } static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct cgroup *cgrp = NULL; struct evsel *counter; From 92ccf7f11d68fa55dc82cb7ae01043b3691918cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:01 -0800 Subject: [PATCH 2655/4122] perf stat: Use 'struct outstate' in evlist__print_counters() This is a preparation for the later cleanup. No functional changes intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d2894a519d61..70aebf359e16 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1372,13 +1372,16 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf bool metric_only = config->metric_only; int interval = config->interval; struct evsel *counter; - char buf[64], *prefix = NULL; + char buf[64]; + struct outstate os = { + .fh = config->output, + }; if (config->iostat_run) evlist->selected = evlist__first(evlist); if (interval) { - prefix = buf; + os.prefix = buf; prepare_interval(config, buf, sizeof(buf), ts); } @@ -1390,35 +1393,35 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_SOCKET: case AGGR_NODE: if (config->cgroup_list) - print_aggr_cgroup(config, evlist, prefix); + print_aggr_cgroup(config, evlist, os.prefix); else - print_aggr(config, evlist, prefix); + print_aggr(config, evlist, os.prefix); break; case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { - iostat_print_counters(evlist, config, ts, prefix = buf, + iostat_print_counters(evlist, config, ts, buf, print_counter); } else if (config->cgroup_list) { - print_cgroup_counter(config, evlist, prefix); + print_cgroup_counter(config, evlist, os.prefix); } else { - print_metric_begin(config, evlist, prefix, + print_metric_begin(config, evlist, os.prefix, /*aggr_idx=*/0, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, prefix); + print_counter(config, counter, os.prefix); } print_metric_end(config); } break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(config, evlist, prefix); + print_no_aggr_metric(config, evlist, os.prefix); else { evlist__for_each_entry(evlist, counter) { if (counter->percore) - print_percore(config, counter, prefix); + print_percore(config, counter, os.prefix); else - print_counter(config, counter, prefix); + print_counter(config, counter, os.prefix); } } break; From 922ae948c429a9b396761f2071eebf6a19688dd2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:02 -0800 Subject: [PATCH 2656/4122] perf stat: Pass 'struct outstate' to print_metric_begin() It passes prefix and cgroup pointers but the outstate already has them. Let's pass the outstate pointer instead. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 50 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 70aebf359e16..3ed63061d6f8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -843,8 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix, int aggr_idx, - struct cgroup *cgrp) + struct outstate *os, int aggr_idx) { struct perf_stat_aggr *aggr; struct aggr_cpu_id id; @@ -853,15 +852,15 @@ static void print_metric_begin(struct perf_stat_config *config, if (!config->metric_only) return; - if (prefix) - fprintf(config->output, "%s", prefix); + if (os->prefix) + fprintf(config->output, "%s", os->prefix); evsel = evlist__first(evlist); id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); - print_cgroup(config, cgrp); + print_cgroup(config, os->cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -877,6 +876,9 @@ static void print_aggr(struct perf_stat_config *config, const char *prefix) { struct evsel *counter; + struct outstate os = { + .prefix = prefix, + }; int s; if (!config->aggr_map || !config->aggr_get_id) @@ -887,7 +889,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -904,26 +906,28 @@ static void print_aggr_cgroup(struct perf_stat_config *config, const char *prefix) { struct evsel *counter, *evsel; - struct cgroup *cgrp = NULL; + struct outstate os = { + .prefix = prefix, + }; int s; if (!config->aggr_map || !config->aggr_get_id) return; evlist__for_each_entry(evlist, evsel) { - if (cgrp == evsel->cgrp) + if (os.cgrp == evsel->cgrp) continue; - cgrp = evsel->cgrp; + os.cgrp = evsel->cgrp; for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s, cgrp); + print_metric_begin(config, evlist, &os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - if (counter->cgrp != cgrp) + if (counter->cgrp != os.cgrp) continue; print_counter_aggrdata(config, counter, s, prefix); @@ -956,6 +960,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config, { int all_idx; struct perf_cpu cpu; + struct outstate os = { + .prefix = prefix, + }; perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) { struct evsel *counter; @@ -973,8 +980,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, prefix, - counter_idx, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -1347,22 +1353,23 @@ static void print_percore(struct perf_stat_config *config, static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, const char *prefix) { - struct cgroup *cgrp = NULL; struct evsel *counter; + struct outstate os = { + .prefix = prefix, + }; evlist__for_each_entry(evlist, counter) { - if (cgrp != counter->cgrp) { - if (cgrp != NULL) + if (os.cgrp != counter->cgrp) { + if (os.cgrp != NULL) print_metric_end(config); - cgrp = counter->cgrp; - print_metric_begin(config, evlist, prefix, - /*aggr_idx=*/0, cgrp); + os.cgrp = counter->cgrp; + print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); } print_counter(config, counter, prefix); } - if (cgrp) + if (os.cgrp) print_metric_end(config); } @@ -1405,8 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf } else if (config->cgroup_list) { print_cgroup_counter(config, evlist, os.prefix); } else { - print_metric_begin(config, evlist, os.prefix, - /*aggr_idx=*/0, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, os.prefix); } From e7f4da312259e61877ae8e26d216993c4128bddc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:03 -0800 Subject: [PATCH 2657/4122] perf stat: Pass struct outstate to printout() The printout() takes a lot of arguments and sets an outstate with the value. Instead, we can fill the outstate first and then pass it to reduce the number of arguments. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 38 ++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 3ed63061d6f8..dd190f71e933 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -673,22 +673,15 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } -static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, - struct evsel *counter, double uval, - const char *prefix, u64 run, u64 ena, double noise, +static void printout(struct perf_stat_config *config, struct outstate *os, + double uval, u64 run, u64 ena, double noise, struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; - struct outstate os = { - .fh = config->output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = nr, - .evsel = counter, - }; print_metric_t pm; new_line_t nl; bool ok = true; + struct evsel *counter = os->evsel; if (config->csv_output) { static const int aggr_fields[AGGR_MAX] = { @@ -704,7 +697,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm = config->metric_only ? print_metric_only_csv : print_metric_csv; nl = config->metric_only ? new_line_metric : new_line_csv; - os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); + os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; nl = config->metric_only ? new_line_metric : new_line_json; @@ -715,7 +708,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, &os, NULL, "", "", 0); + pm(config, os, NULL, "", "", 0); return; } @@ -732,11 +725,11 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int out.print_metric = pm; out.new_line = nl; - out.ctx = &os; + out.ctx = os; out.force_header = false; if (!config->metric_only) { - abs_printout(config, id, nr, counter, uval, ok); + abs_printout(config, os->id, os->nr, counter, uval, ok); print_noise(config, counter, noise, /*before_metric=*/true); print_running(config, run, ena, /*before_metric=*/true); @@ -814,6 +807,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; bool metric_only = config->metric_only; + struct outstate os = { + .fh = config->output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = aggr->nr, + .evsel = counter, + }; if (counter->supported && aggr->nr == 0) return; @@ -834,8 +834,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, id, aggr->nr, counter, uval, - prefix, run, ena, avg, &rt_stat, s); + printout(config, &os, uval, run, ena, avg, &rt_stat, s); if (!metric_only) fputc('\n', output); @@ -971,14 +970,14 @@ static void print_no_aggr_metric(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { u64 ena, run, val; double uval; - struct aggr_cpu_id id; struct perf_stat_evsel *ps = counter->stats; int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); if (counter_idx < 0) continue; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + os.evsel = counter; + os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { print_metric_begin(config, evlist, &os, counter_idx); first = false; @@ -988,8 +987,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat, counter_idx); + printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx); } if (!first) print_metric_end(config); From 01577597493dc8bded8a5880fbf84a6d5bf13f1b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:04 -0800 Subject: [PATCH 2658/4122] perf stat: Do not pass runtime_stat to printout() It always passes a pointer to rt_stat as it's the only one. Let's not pass it and directly refer it in the printout(). Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index dd190f71e933..cdf4ca7f6e3a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -674,8 +674,7 @@ static bool is_mixed_hw_group(struct evsel *counter) } static void printout(struct perf_stat_config *config, struct outstate *os, - double uval, u64 run, u64 ena, double noise, - struct runtime_stat *st, int map_idx) + double uval, u64 run, u64 ena, double noise, int map_idx) { struct perf_stat_output_ctx out; print_metric_t pm; @@ -737,7 +736,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (ok) { perf_stat__print_shadow_stats(config, counter, uval, map_idx, - &out, &config->metric_events, st); + &out, &config->metric_events, &rt_stat); } else { pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } @@ -834,7 +833,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, &os, uval, run, ena, avg, &rt_stat, s); + printout(config, &os, uval, run, ena, avg, s); if (!metric_only) fputc('\n', output); @@ -987,7 +986,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx); + printout(config, &os, uval, run, ena, 1.0, counter_idx); } if (!first) print_metric_end(config); From 5f334d88c25e0dbdbc199ad38becc5cc5aa33081 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:05 -0800 Subject: [PATCH 2659/4122] perf stat: Pass through 'struct outstate' Now most of the print functions take a pointer to the struct outstate. We have one in the evlist__print_counters() and pass it through the child functions. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/iostat.c | 4 +- tools/perf/util/iostat.c | 3 +- tools/perf/util/iostat.h | 4 +- tools/perf/util/stat-display.c | 102 +++++++++++++----------------- 4 files changed, 50 insertions(+), 63 deletions(-) diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index 404de795ec0b..7eb0a7b00b95 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -449,7 +449,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, void iostat_print_counters(struct evlist *evlist, struct perf_stat_config *config, struct timespec *ts, - char *prefix, iostat_print_counter_t print_cnt_cb) + char *prefix, iostat_print_counter_t print_cnt_cb, void *arg) { void *perf_device = NULL; struct evsel *counter = evlist__first(evlist); @@ -464,7 +464,7 @@ void iostat_print_counters(struct evlist *evlist, iostat_prefix(evlist, config, prefix, ts); fprintf(config->output, "\n%s", prefix); } - print_cnt_cb(config, counter, prefix); + print_cnt_cb(config, counter, arg); } fputc('\n', config->output); } diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c index 57dd49da28fe..b770bd473af7 100644 --- a/tools/perf/util/iostat.c +++ b/tools/perf/util/iostat.c @@ -48,6 +48,7 @@ __weak void iostat_print_counters(struct evlist *evlist __maybe_unused, struct perf_stat_config *config __maybe_unused, struct timespec *ts __maybe_unused, char *prefix __maybe_unused, - iostat_print_counter_t print_cnt_cb __maybe_unused) + iostat_print_counter_t print_cnt_cb __maybe_unused, + void *arg __maybe_unused) { } diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h index c22688f87cb2..a4e7299c5c2f 100644 --- a/tools/perf/util/iostat.h +++ b/tools/perf/util/iostat.h @@ -28,7 +28,7 @@ enum iostat_mode_t { extern enum iostat_mode_t iostat_mode; -typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *); +typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, void *); int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); int iostat_parse(const struct option *opt, const char *str, @@ -42,6 +42,6 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, struct perf_stat_output_ctx *out); void iostat_print_counters(struct evlist *evlist, struct perf_stat_config *config, struct timespec *ts, - char *prefix, iostat_print_counter_t print_cnt_cb); + char *prefix, iostat_print_counter_t print_cnt_cb, void *arg); #endif /* _IOSTAT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cdf4ca7f6e3a..335627e8542d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -796,7 +796,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - const char *prefix) + struct outstate *os) { FILE *output = config->output; u64 ena, run, val; @@ -806,13 +806,10 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; bool metric_only = config->metric_only; - struct outstate os = { - .fh = config->output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = aggr->nr, - .evsel = counter, - }; + + os->id = id; + os->nr = aggr->nr; + os->evsel = counter; if (counter->supported && aggr->nr == 0) return; @@ -824,8 +821,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, run = aggr->counts.run; if (!metric_only) { - if (prefix) - fprintf(output, "%s", prefix); + if (os->prefix) + fprintf(output, "%s", os->prefix); else if (config->summary && config->csv_output && !config->no_csv_summary && !config->interval) fprintf(output, "%s%s", "summary", config->csv_sep); @@ -833,7 +830,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, &os, uval, run, ena, avg, s); + printout(config, os, uval, run, ena, avg, s); if (!metric_only) fputc('\n', output); @@ -871,12 +868,9 @@ static void print_metric_end(struct perf_stat_config *config) static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter; - struct outstate os = { - .prefix = prefix, - }; int s; if (!config->aggr_map || !config->aggr_get_id) @@ -887,13 +881,13 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, &os, s); + print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } print_metric_end(config); } @@ -901,34 +895,31 @@ static void print_aggr(struct perf_stat_config *config, static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter, *evsel; - struct outstate os = { - .prefix = prefix, - }; int s; if (!config->aggr_map || !config->aggr_get_id) return; evlist__for_each_entry(evlist, evsel) { - if (os.cgrp == evsel->cgrp) + if (os->cgrp == evsel->cgrp) continue; - os.cgrp = evsel->cgrp; + os->cgrp = evsel->cgrp; for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, &os, s); + print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - if (counter->cgrp != os.cgrp) + if (counter->cgrp != os->cgrp) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } print_metric_end(config); } @@ -936,7 +927,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, } static void print_counter(struct perf_stat_config *config, - struct evsel *counter, const char *prefix) + struct evsel *counter, struct outstate *os) { int s; @@ -948,19 +939,16 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } } static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { int all_idx; struct perf_cpu cpu; - struct outstate os = { - .prefix = prefix, - }; perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) { struct evsel *counter; @@ -975,10 +963,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (counter_idx < 0) continue; - os.evsel = counter; - os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + os->evsel = counter; + os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, &os, counter_idx); + print_metric_begin(config, evlist, os, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -986,7 +974,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, &os, uval, run, ena, 1.0, counter_idx); + printout(config, os, uval, run, ena, 1.0, counter_idx); } if (!first) print_metric_end(config); @@ -1304,7 +1292,7 @@ static void print_footer(struct perf_stat_config *config) } static void print_percore(struct perf_stat_config *config, - struct evsel *counter, const char *prefix) + struct evsel *counter, struct outstate *os) { bool metric_only = config->metric_only; FILE *output = config->output; @@ -1315,7 +1303,7 @@ static void print_percore(struct perf_stat_config *config, return; if (config->percore_show_thread) - return print_counter(config, counter, prefix); + return print_counter(config, counter, os); core_map = cpu_aggr_map__empty_new(config->aggr_map->nr); if (core_map == NULL) { @@ -1337,7 +1325,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); core_map->map[c++] = core_id; } @@ -1348,30 +1336,28 @@ static void print_percore(struct perf_stat_config *config, } static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter; - struct outstate os = { - .prefix = prefix, - }; evlist__for_each_entry(evlist, counter) { - if (os.cgrp != counter->cgrp) { - if (os.cgrp != NULL) + if (os->cgrp != counter->cgrp) { + if (os->cgrp != NULL) print_metric_end(config); - os.cgrp = counter->cgrp; - print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); + os->cgrp = counter->cgrp; + print_metric_begin(config, evlist, os, /*aggr_idx=*/0); } - print_counter(config, counter, prefix); + print_counter(config, counter, os); } - if (os.cgrp) + if (os->cgrp) print_metric_end(config); } void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, - struct target *_target, struct timespec *ts, int argc, const char **argv) + struct target *_target, struct timespec *ts, + int argc, const char **argv) { bool metric_only = config->metric_only; int interval = config->interval; @@ -1397,34 +1383,34 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_SOCKET: case AGGR_NODE: if (config->cgroup_list) - print_aggr_cgroup(config, evlist, os.prefix); + print_aggr_cgroup(config, evlist, &os); else - print_aggr(config, evlist, os.prefix); + print_aggr(config, evlist, &os); break; case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { iostat_print_counters(evlist, config, ts, buf, - print_counter); + (iostat_print_counter_t)print_counter, &os); } else if (config->cgroup_list) { - print_cgroup_counter(config, evlist, os.prefix); + print_cgroup_counter(config, evlist, &os); } else { print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, os.prefix); + print_counter(config, counter, &os); } print_metric_end(config); } break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(config, evlist, os.prefix); + print_no_aggr_metric(config, evlist, &os); else { evlist__for_each_entry(evlist, counter) { if (counter->percore) - print_percore(config, counter, os.prefix); + print_percore(config, counter, &os); else - print_counter(config, counter, os.prefix); + print_counter(config, counter, &os); } } break; From ab6baaae273572909f1e957fc27a9459fc95dd8c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:06 -0800 Subject: [PATCH 2660/4122] perf stat: Fix JSON output in metric-only mode It generated a broken JSON output when aggregation mode or cgroup is used with --metric-only option. Also get rid of the header line and make the output single line for each entry. It needs to know whether the current metric is the first one or not. So add 'first' field in the outstate and mark it false after printing. Before: # perf stat -a -j --metric-only true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {{"metric-value" : "0.797"}{"metric-value" : "1.65"}{"metric-value" : "0.89"} ^ # perf stat -a -j --metric-only --per-socket true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"socket" : "S0", "aggregate-number" : 8, {"metric-value" : "0.295"}{"metric-value" : "1.88"}{"metric-value" : "0.64"} ^ After: # perf stat -a -j --metric-only true {"GHz" : "0.990", "insn per cycle" : "2.06", "branch-misses of all branches" : "0.59"} # perf stat -a -j --metric-only --per-socket true {"socket" : "S0", "aggregate-number" : 8, "GHz" : "0.439", "insn per cycle" : "2.14", "branch-misses of all branches" : "0.51"} Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 42 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 335627e8542d..43640115454c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -279,9 +279,6 @@ static void print_aggr_id_json(struct perf_stat_config *config, { FILE *output = config->output; - if (!config->interval) - fputc('{', output); - switch (config->aggr_mode) { case AGGR_CORE: fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", @@ -335,6 +332,7 @@ static void aggr_printout(struct perf_stat_config *config, struct outstate { FILE *fh; bool newline; + bool first; const char *prefix; int nfields; int nr; @@ -491,6 +489,7 @@ static void print_metric_only(struct perf_stat_config *config, color_snprintf(str, sizeof(str), color ?: "", fmt, val); fprintf(out, "%*s ", mlen, str); + os->first = false; } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, @@ -512,6 +511,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused ends++; *ends = 0; fprintf(out, "%s%s", vals, config->csv_sep); + os->first = false; } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, @@ -532,7 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - fprintf(out, "{\"metric-value\" : \"%s\"}", vals); + fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); + os->first = false; } static void new_line_metric(struct perf_stat_config *config __maybe_unused, @@ -561,7 +562,7 @@ static void print_metric_header(struct perf_stat_config *config, unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) - fprintf(os->fh, "{\"unit\" : \"%s\"}", unit); + return; else if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else @@ -821,6 +822,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, run = aggr->counts.run; if (!metric_only) { + if (config->json_output) + fputc('{', output); if (os->prefix) fprintf(output, "%s", os->prefix); else if (config->summary && config->csv_output && @@ -844,9 +847,12 @@ static void print_metric_begin(struct perf_stat_config *config, struct aggr_cpu_id id; struct evsel *evsel; + os->first = true; if (!config->metric_only) return; + if (config->json_output) + fputc('{', config->output); if (os->prefix) fprintf(config->output, "%s", os->prefix); @@ -855,7 +861,7 @@ static void print_metric_begin(struct perf_stat_config *config, aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); - print_cgroup(config, os->cgrp); + print_cgroup(config, os->cgrp ? : evsel->cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -863,6 +869,8 @@ static void print_metric_end(struct perf_stat_config *config) if (!config->metric_only) return; + if (config->json_output) + fputc('}', config->output); fputc('\n', config->output); } @@ -1005,11 +1013,9 @@ static void print_metric_headers_csv(struct perf_stat_config *config, fputs(aggr_header_csv[config->aggr_mode], config->output); } -static void print_metric_headers_json(struct perf_stat_config *config, +static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused, bool no_indent __maybe_unused) { - if (config->interval) - fputs("{\"unit\" : \"sec\"}", config->output); } static void print_metric_headers(struct perf_stat_config *config, @@ -1049,7 +1055,9 @@ static void print_metric_headers(struct perf_stat_config *config, &config->metric_events, &rt_stat); } - fputc('\n', config->output); + + if (!config->json_output) + fputc('\n', config->output); } static void prepare_interval(struct perf_stat_config *config, @@ -1058,17 +1066,14 @@ static void prepare_interval(struct perf_stat_config *config, if (config->iostat_run) return; - if (config->csv_output) + if (config->json_output) + scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ", + (unsigned long) ts->tv_sec, ts->tv_nsec); + else if (config->csv_output) scnprintf(prefix, len, "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); - else if (!config->json_output) - scnprintf(prefix, len, "%6lu.%09lu ", - (unsigned long) ts->tv_sec, ts->tv_nsec); - else if (!config->metric_only) - scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ", - (unsigned long) ts->tv_sec, ts->tv_nsec); else - scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}", + scnprintf(prefix, len, "%6lu.%09lu ", (unsigned long) ts->tv_sec, ts->tv_nsec); } @@ -1365,6 +1370,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf char buf[64]; struct outstate os = { .fh = config->output, + .first = true, }; if (config->iostat_run) From c4b41b83c25073c09bfcc4e5ec496c9dd316656b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:07 -0800 Subject: [PATCH 2661/4122] perf stat: Rename "aggregate-number" to "cpu-count" in JSON As the JSON output has been broken for a little while, I guess there are not many users. Let's rename the field to more intuitive one. :) Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 43640115454c..7a39a1a7261d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ", id.socket, id.die, id.core, nr); break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ", id.socket, id.die, nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ", id.socket, nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ", id.node, nr); break; case AGGR_NONE: From 765d4e497fc51c64b50e5947d0b63bc3ccbd83d3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:08 -0800 Subject: [PATCH 2662/4122] perf stat: Tidy up JSON metric-only output when no metrics It printed empty strings for each metric. I guess it's needed for CSV output to match the column number. We could just ignore the empty metrics in JSON but it ended up with a broken JSON object with a trailing comma. So I added a dummy '"metric-value" : "none"' part. To do that, it needs to pass struct outstate to print_metric_end() to check if any metric value is printed or not. Before: # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : ""} After: # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "metric-value" : "none"} Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7a39a1a7261d..847acdb5dc40 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -532,6 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; + if (!unit[0] || !vals[0]) + return; fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); os->first = false; } @@ -864,14 +866,19 @@ static void print_metric_begin(struct perf_stat_config *config, print_cgroup(config, os->cgrp ? : evsel->cgrp); } -static void print_metric_end(struct perf_stat_config *config) +static void print_metric_end(struct perf_stat_config *config, struct outstate *os) { + FILE *output = config->output; + if (!config->metric_only) return; - if (config->json_output) - fputc('}', config->output); - fputc('\n', config->output); + if (config->json_output) { + if (os->first) + fputs("\"metric-value\" : \"none\"", output); + fputc('}', output); + } + fputc('\n', output); } static void print_aggr(struct perf_stat_config *config, @@ -897,7 +904,7 @@ static void print_aggr(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, os); } - print_metric_end(config); + print_metric_end(config, os); } } @@ -929,7 +936,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, os); } - print_metric_end(config); + print_metric_end(config, os); } } } @@ -985,7 +992,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, printout(config, os, uval, run, ena, 1.0, counter_idx); } if (!first) - print_metric_end(config); + print_metric_end(config, os); } } @@ -1348,7 +1355,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist evlist__for_each_entry(evlist, counter) { if (os->cgrp != counter->cgrp) { if (os->cgrp != NULL) - print_metric_end(config); + print_metric_end(config, os); os->cgrp = counter->cgrp; print_metric_begin(config, evlist, os, /*aggr_idx=*/0); @@ -1357,7 +1364,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_counter(config, counter, os); } if (os->cgrp) - print_metric_end(config); + print_metric_end(config, os); } void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, @@ -1405,7 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf evlist__for_each_entry(evlist, counter) { print_counter(config, counter, &os); } - print_metric_end(config); + print_metric_end(config, &os); } break; case AGGR_NONE: From eafcbb6838b67ae67bcab3f794dff593250a71bb Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:38 +0800 Subject: [PATCH 2663/4122] perf vendor events intel: Add core event list for Alderlake-N Alderlake-N only has E-core, it has been moved to non-hybrid code path on the kernel side, so add the cpuid for Alderlake-N separately. Add core event list for Alderlake-N, it is based on the ADL gracemont v1.16 JSON file. https://github.com/intel/perfmon/tree/main/ADL/events/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-1-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/alderlaken/cache.json | 330 +++++++++++ .../arch/x86/alderlaken/floating-point.json | 18 + .../arch/x86/alderlaken/frontend.json | 26 + .../arch/x86/alderlaken/memory.json | 81 +++ .../pmu-events/arch/x86/alderlaken/other.json | 38 ++ .../arch/x86/alderlaken/pipeline.json | 533 ++++++++++++++++++ .../arch/x86/alderlaken/virtual-memory.json | 47 ++ tools/perf/pmu-events/arch/x86/mapfile.csv | 3 +- 8 files changed, 1075 insertions(+), 1 deletion(-) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/other.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json new file mode 100644 index 000000000000..043445ae14a8 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json @@ -0,0 +1,330 @@ +[ + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x38" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD", + "SampleAfterValue": "200003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ALL", + "SampleAfterValue": "20003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.RSV", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of load uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "PEBS": "1", + "PublicDescription": "Counts the total number of load uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x81" + }, + { + "BriefDescription": "Counts the number of store uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "PEBS": "1", + "PublicDescription": "Counts the total number of store uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x82" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of retired split load uops.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "PEBS": "2", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json new file mode 100644 index 000000000000..30e8ca3c1485 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json new file mode 100644 index 000000000000..36898bab2bba --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "EventCode": "0xe6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of instruction cache misses.", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json new file mode 100644 index 000000000000..f84bf8c43495 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json @@ -0,0 +1,81 @@ +[ + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ANY_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xff" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", + "EventCode": "0x05", + "EventName": "LD_HEAD.L1_BOUND_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xf4" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", + "EventCode": "0x05", + "EventName": "LD_HEAD.OTHER_AT_RET", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", + "SampleAfterValue": "1000003", + "UMask": "0xc0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", + "EventCode": "0x05", + "EventName": "LD_HEAD.PGWALK_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xa0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ST_ADDR_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x84" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json new file mode 100644 index 000000000000..6336de61f628 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json @@ -0,0 +1,38 @@ +[ + { + "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.COREWB_M.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10008", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json new file mode 100644 index 000000000000..fa53ff11a509 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json @@ -0,0 +1,533 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xbf" + }, + { + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of near RET branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.REL_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfd" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of instructions retired.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", + "Deprecated": "1", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.4K_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of machines clears due to memory renaming.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MRN_NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", + "EventCode": "0x75", + "EventName": "SERIALIZATION.NON_C01_MS_SCB", + "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the total number of consumed retirement slots.", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the total number of uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json new file mode 100644 index 000000000000..67fd640f790e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json @@ -0,0 +1,47 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.PDE_CACHE_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 5e609b876790..78af105ca236 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,5 +1,6 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BE|BF),v1.15,alderlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core +GenuineIntel-6-BE,v1.16,alderlaken,core GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core GenuineIntel-6-(3D|47),v26,broadwell,core GenuineIntel-6-56,v23,broadwellde,core From a6a29bcf596141f95fc0f9756ba68de31ba1f46c Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:39 +0800 Subject: [PATCH 2664/4122] perf vendor events intel: Add uncore event list for Alderlake-N Add JSON uncore events for Alderlake-N Based on JSON list v1.16: https://github.com/intel/perfmon/tree/main/ADL/events/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-2-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlaken/uncore-memory.json | 175 ++++++++++++++++++ .../arch/x86/alderlaken/uncore-other.json | 33 ++++ 2 files changed, 208 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json new file mode 100644 index 000000000000..2ccd9cf96957 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json @@ -0,0 +1,175 @@ +[ + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Read CAS command sent to DRAM", + "EventCode": "0x22", + "EventName": "UNC_M_CAS_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Write CAS command sent to DRAM", + "EventCode": "0x23", + "EventName": "UNC_M_CAS_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of clocks", + "EventCode": "0x01", + "EventName": "UNC_M_CLOCKTICKS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Empty", + "EventCode": "0x1D", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Empty", + "EventCode": "0x20", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Hit", + "EventCode": "0x1C", + "EventName": "UNC_M_DRAM_PAGE_HIT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Hit", + "EventCode": "0x1F", + "EventName": "UNC_M_DRAM_PAGE_HIT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Miss", + "EventCode": "0x1E", + "EventName": "UNC_M_DRAM_PAGE_MISS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Miss", + "EventCode": "0x21", + "EventName": "UNC_M_DRAM_PAGE_MISS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Hot state", + "EventCode": "0x19", + "EventName": "UNC_M_DRAM_THERMAL_HOT", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Warm state", + "EventCode": "0x1A", + "EventName": "UNC_M_DRAM_THERMAL_WARM", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming read prefetch request from IA.", + "EventCode": "0x0A", + "EventName": "UNC_M_PREFETCH_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", + "EventCode": "0x28", + "EventName": "UNC_M_PRE_COUNT_IDLE", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM for a read/write request", + "EventCode": "0x27", + "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 read request", + "EventCode": "0x02", + "EventName": "UNC_M_VC0_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 write request", + "EventCode": "0x03", + "EventName": "UNC_M_VC0_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 read request", + "EventCode": "0x04", + "EventName": "UNC_M_VC1_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 write request", + "EventCode": "0x05", + "EventName": "UNC_M_VC1_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json new file mode 100644 index 000000000000..f9e7777cd2be --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json @@ -0,0 +1,33 @@ +[ + { + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "CLOCK" + } +] From 2bb3fbad4c3b3b5b6d5ac537b6ab404443fc5224 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:40 +0800 Subject: [PATCH 2665/4122] perf vendor events intel: Add metrics for Alderlake-N Add JSON metrics for Alderlake-N to perf. It only included E-core metrics. E-core metrics based on E-core TMA v2.2 (E-core_TMA_Metrics.csv) It is downloaded from: https://github.com/intel/perfmon/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-3-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlaken/adln-metrics.json | 583 ++++++++++++++++++ 1 file changed, 583 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json new file mode 100644 index 000000000000..c57e9f325fb0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -0,0 +1,583 @@ +[ + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_frontend_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_icache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_itlb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_detect", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_resteer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_bandwidth", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_cisc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_decode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_predecode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_other_fb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", + "MetricExpr": "(SLOTS - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_bad_speculation", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_branch_mispredicts", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_smc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_memory_ordering", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_fp_assist", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_disambiguation", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_page_fault", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls. ", + "MetricExpr": "max(0, tma_backend_bound - tma_load_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads. ", + "MetricExpr": "min((TOPDOWN_BE_BOUND.ALL / SLOTS), (LD_HEAD.ANY_AT_RET / CLKS) + tma_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_load_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", + "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / CLKS", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l1_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", + "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_store_fwd", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", + "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", + "MetricExpr": "LD_HEAD.PGWALK_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", + "MetricExpr": "LD_HEAD.OTHER_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_other_l1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_L2_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l2_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_LLC_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l3_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_DRAM_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_dram_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.", + "MetricExpr": "max(0, tma_load_store_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_other_load_store", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound_aux", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL2;tma_backend_bound_aux_group", + "MetricName": "tma_resource_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_st_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_ld_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative ", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_rsv", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_register", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_alloc_restriction", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_serialization", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the numer of issue slots that result in retirement slots. ", + "MetricExpr": "TOPDOWN_RETIRING.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_retiring", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are not from the microsequencer. ", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_base", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of floating point operations per uop with all default weighting.", + "MetricExpr": "UOPS_RETIRED.FPDIV / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_fp_uops", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_other_ret", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", + "MetricExpr": "UOPS_RETIRED.MS / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_ms_uops", + "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE", + "MetricName": "CLKS" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE_P", + "MetricName": "CLKS_P" + }, + { + "BriefDescription": "", + "MetricExpr": "5 * CLKS", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "INST_RETIRED.ANY / CLKS", + "MetricName": "IPC" + }, + { + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "CLKS / INST_RETIRED.ANY", + "MetricName": "CPI" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", + "MetricName": "UPI" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Store_Fwd_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Address_Alias_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Load_Splits" + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "IpBranch" + }, + { + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricName": "IpCall" + }, + { + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "IpLoad" + }, + { + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "IpStore" + }, + { + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "IpMispredict" + }, + { + "BriefDescription": "Instructions per Far Branch", + "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)", + "MetricName": "IpFarBranch" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "Branch_Mispredict_Ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are ucode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", + "MetricName": "Microcode_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", + "MetricName": "FPDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", + "MetricName": "IDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", + "MetricName": "X87_Uop_Ratio" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CLKS / CPU_CLK_UNHALTED.REF_TSC", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Cycle cost per L2 hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", + "MetricName": "Cycles_per_Demand_Load_L2_Hit" + }, + { + "BriefDescription": "Cycle cost per LLC hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", + "MetricName": "Cycles_per_Demand_Load_L3_Hit" + }, + { + "BriefDescription": "Cycle cost per DRAM hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "MetricName": "Cycles_per_Demand_Load_DRAM_Hit" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L2Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L3Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in DRAM", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_DRAMHit_Percent" + }, + { + "BriefDescription": "load ops retired per 1000 instruction", + "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricName": "MemLoadPKI" + }, + { + "BriefDescription": "C1 residency percent per core", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C8 residency percent per package", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C9 residency percent per package", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C10 residency percent per package", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" + } +] From 4c12f41a14d6c4dd2b4e387eaea249cee68bc01a Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:41 +0800 Subject: [PATCH 2666/4122] perf vendor events intel: Update events and metrics for alderlake Update JSON events and metrics for alderlake to perf. Based on ADL JSON event list v1.16: https://github.com/intel/perfmon/tree/main/ADL/events Generate the event list and metrics with the converter scripts: https://github.com/intel/perfmon/pull/32 Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-4-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlake/adl-metrics.json | 73 +- .../pmu-events/arch/x86/alderlake/cache.json | 1503 +++++------- .../arch/x86/alderlake/floating-point.json | 91 +- .../arch/x86/alderlake/frontend.json | 224 +- .../pmu-events/arch/x86/alderlake/memory.json | 460 ++-- .../pmu-events/arch/x86/alderlake/other.json | 172 +- .../arch/x86/alderlake/pipeline.json | 2008 ++++++----------- .../arch/x86/alderlake/uncore-memory.json | 247 +- .../arch/x86/alderlake/uncore-other.json | 97 +- .../arch/x86/alderlake/virtual-memory.json | 223 +- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 11 files changed, 1967 insertions(+), 3133 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index e06d26ad5138..edf440e9359a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -1287,14 +1287,14 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", "MetricGroup": "HPC;Summary", "MetricName": "CPU_Utilization", "Unit": "cpu_core" }, { "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]", - "MetricExpr": "Turbo_Utilization * msr@tsc@ / 1000000000 / duration_time", + "MetricExpr": "Turbo_Utilization * TSC / 1000000000 / duration_time", "MetricGroup": "Power;Summary", "MetricName": "Average_Frequency", "Unit": "cpu_core" @@ -1337,18 +1337,25 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1000000 / duration_time / 1000", + "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1000000 / duration_time / 1000", "MetricGroup": "HPC;Mem;MemoryBW;SoC", "MetricName": "DRAM_BW_Use", "Unit": "cpu_core" }, { "BriefDescription": "Average number of parallel requests to external memory. Accounts for all requests", - "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / arb@event\\=0x81\\,umask\\=0x1@", + "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL", "MetricGroup": "Mem;SoC", "MetricName": "MEM_Parallel_Requests", "Unit": "cpu_core" }, + { + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricExpr": "UNC_CLOCK.SOCKET", + "MetricGroup": "SoC", + "MetricName": "Socket_CLKS", + "Unit": "cpu_core" + }, { "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", @@ -1356,6 +1363,12 @@ "MetricName": "IpFarBranch", "Unit": "cpu_core" }, + { + "BriefDescription": "Uncore frequency per die [GHZ]", + "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1000000000", + "MetricGroup": "SoC", + "MetricName": "UNCORE_FREQ" + }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS", @@ -1902,7 +1915,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", "MetricName": "CPU_Utilization", "Unit": "cpu_atom" }, @@ -1950,62 +1963,72 @@ }, { "BriefDescription": "C1 residency percent per core", - "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C1_Core_Residency" + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C6 residency percent per core", - "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C6_Core_Residency" + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C7 residency percent per core", - "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C7_Core_Residency" + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C2 residency percent per package", - "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C2_Pkg_Residency" + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C3 residency percent per package", - "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C3_Pkg_Residency" + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C6 residency percent per package", - "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C6_Pkg_Residency" + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C7 residency percent per package", - "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C7_Pkg_Residency" + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C8 residency percent per package", - "MetricExpr": "(cstate_pkg@c8\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C8_Pkg_Residency" + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C9 residency percent per package", - "MetricExpr": "(cstate_pkg@c9\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C9_Pkg_Residency" + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C10 residency percent per package", - "MetricExpr": "(cstate_pkg@c10\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C10_Pkg_Residency" + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json index 2cc62d2779d2..adc9887b8ae0 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json @@ -1,403 +1,851 @@ [ + { + "BriefDescription": "L1D.HWPF_MISS", + "EventCode": "0x51", + "EventName": "L1D.HWPF_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS", + "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS", + "Deprecated": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.L2_STALL", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.L2_STALLS", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of L1D misses that are outstanding", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache lines filling L2", + "EventCode": "0x25", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses", + "EventCode": "0x26", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]", + "EventCode": "0x24", + "EventName": "L2_REQUEST.ALL", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.REFERENCES]", + "SampleAfterValue": "200003", + "UMask": "0xff", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_RQSTS.MISS]", + "EventCode": "0x24", + "EventName": "L2_REQUEST.MISS", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.MISS]", + "SampleAfterValue": "200003", + "UMask": "0x3f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 code requests", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read access L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.", + "SampleAfterValue": "200003", + "UMask": "0xe1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Counts demand requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x27", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2_RQSTS.ALL_HWPF", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_HWPF", + "SampleAfterValue": "200003", + "UMask": "0xf0", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests to L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache misses when fetching instructions", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.", + "SampleAfterValue": "200003", + "UMask": "0x21", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2_RQSTS.HWPF_MISS", + "EventCode": "0x24", + "EventName": "L2_RQSTS.HWPF_MISS", + "SampleAfterValue": "200003", + "UMask": "0x30", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.MISS]", + "SampleAfterValue": "200003", + "UMask": "0x3f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.ALL]", + "SampleAfterValue": "200003", + "UMask": "0xff", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that hit L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22", + "Unit": "cpu_core" + }, + { + "BriefDescription": "SW prefetch requests that hit L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_HIT", + "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0xc8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "SW prefetch requests that miss L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_MISS", + "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0x28", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.MISS", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x41", "Unit": "cpu_atom" }, + { + "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4f", "Unit": "cpu_atom" }, + { + "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x4f", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.IFETCH", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x38", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.LOAD", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_atom" }, + { + "BriefDescription": "Retired load instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PEBS": "1", + "PublicDescription": "Counts all retired store instructions.", + "SampleAfterValue": "1000003", + "UMask": "0x82", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All retired memory instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", + "SampleAfterValue": "1000003", + "UMask": "0x83", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with locked access.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with locked access.", + "SampleAfterValue": "100007", + "UMask": "0x21", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x42", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x11", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "PEBS": "1", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x12", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Completed demand load uops that miss the L1 d-cache.", + "EventCode": "0x43", + "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY", + "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)", + "SampleAfterValue": "1000003", + "UMask": "0xfd", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", + "Data_LA": "1", + "EventCode": "0xd3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "Data_LA": "1", + "EventCode": "0xd4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).", + "SampleAfterValue": "100007", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "100021", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "100021", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x20", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0x80", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0x2", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0x4", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x04", "EventName": "MEM_SCHEDULER_BLOCK.ALL", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x04", "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x04", "EventName": "MEM_SCHEDULER_BLOCK.RSV", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x04", "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "MEM_STORE_RETIRED.L2_HIT", + "EventCode": "0x44", + "EventName": "MEM_STORE_RETIRED.L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of load uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the total number of load uops retired.", "SampleAfterValue": "200003", "UMask": "0x81", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of store uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the total number of store uops retired.", "SampleAfterValue": "200003", "UMask": "0x82", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x80", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x10", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x100", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x20", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x4", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x200", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x40", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", - "L1_Hit_Indication": "1", "MSRIndex": "0x3F6", "MSRValue": "0x8", "PEBS": "2", - "PEBScounters": "0,1", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "TakenAlone": "1", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of retired split load uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0x41", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "L1_Hit_Indication": "1", "PEBS": "2", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x6", "Unit": "cpu_atom" }, + { + "BriefDescription": "Retired memory uops for any access", + "EventCode": "0xe5", + "EventName": "MEM_UOP_RETIRED.ANY", + "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", "MSRIndex": "0x1a6,0x1a7", @@ -408,7 +856,6 @@ }, { "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", @@ -417,9 +864,18 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", "MSRIndex": "0x1a6,0x1a7", @@ -430,7 +886,6 @@ }, { "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", @@ -439,9 +894,18 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_RFO.L3_HIT", "MSRIndex": "0x1a6,0x1a7", @@ -452,7 +916,6 @@ }, { "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", @@ -461,740 +924,8 @@ "UMask": "0x1", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ICACHE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "L1D.HWPF_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x51", - "EventName": "L1D.HWPF_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x51", - "EventName": "L1D.REPLACEMENT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.FB_FULL", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "1", - "EdgeDetect": "1", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.L2_STALL", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.L2_STALLS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of L1D misses that are outstanding", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.PENDING", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "1", - "EventCode": "0x48", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2 cache lines filling L2", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x25", - "EventName": "L2_LINES_IN.ALL", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x1f", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x26", - "EventName": "L2_LINES_OUT.USELESS_HWPF", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_REQUEST.ALL", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xff", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_RQSTS.MISS]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_REQUEST.MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x3f", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2 code requests", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Demand Data Read access L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Demand requests that miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x27", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2_RQSTS.ALL_HWPF", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.ALL_HWPF", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xf0", - "Unit": "cpu_core" - }, - { - "BriefDescription": "RFO requests to L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.ALL_RFO", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2 cache misses when fetching instructions", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x24", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Demand Data Read miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x21", - "Unit": "cpu_core" - }, - { - "BriefDescription": "L2_RQSTS.HWPF_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.HWPF_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x30", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x3f", - "Unit": "cpu_core" - }, - { - "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.REFERENCES", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xff", - "Unit": "cpu_core" - }, - { - "BriefDescription": "RFO requests that hit L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.RFO_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "RFO requests that miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.RFO_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x22", - "Unit": "cpu_core" - }, - { - "BriefDescription": "SW prefetch requests that hit L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.SWPF_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "SW prefetch requests that miss L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.SWPF_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x28", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.MISS", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x41", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x4f", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "UMask": "0x81", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired store instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.ALL_STORES", - "L1_Hit_Indication": "1", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "UMask": "0x82", - "Unit": "cpu_core" - }, - { - "BriefDescription": "All retired memory instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.ANY", - "L1_Hit_Indication": "1", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "UMask": "0x83", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions with locked access.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x21", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions that split across a cacheline boundary.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x41", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired store instructions that split across a cacheline boundary.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "L1_Hit_Indication": "1", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x42", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions that miss the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x11", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired store instructions that miss the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "L1_Hit_Indication": "1", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x12", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Completed demand load uops that miss the L1 d-cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x43", - "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0xfd", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x40", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100021", - "UMask": "0x10", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100021", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "50021", - "UMask": "0x20", - "Unit": "cpu_core" - }, - { - "BriefDescription": "MEM_STORE_RETIRED.L2_HIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x44", - "EventName": "MEM_STORE_RETIRED.L2_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired memory uops for any access", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xe5", - "EventName": "MEM_UOP_RETIRED.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "UMask": "0x3", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10003C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x8003C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_core" - }, { "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", @@ -1205,139 +936,111 @@ }, { "BriefDescription": "OFFCORE_REQUESTS.ALL_REQUESTS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Demand and prefetch data reads", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.DATA_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Demand Data Read requests sent to uncore", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "Deprecated": "1", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "PEBScounters": "0,1,2,3", + "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.NTA", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHW instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHW instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHT0 instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.T0", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json index 48a4605fc057..3eb7cab9b431 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json @@ -1,165 +1,124 @@ [ - { - "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.FP_ASSIST", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.FPDIV", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x8", - "Unit": "cpu_atom" - }, { "BriefDescription": "ARITH.FPDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb0", "EventName": "ARITH.FPDIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts all microcode FP assists.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.FP", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts all microcode Floating Point assists.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "ASSISTS.SSE_AVX_MIX", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.SSE_AVX_MIX", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x2", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json index da1a7ba0e568..250cd128b674 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json @@ -1,536 +1,416 @@ [ { "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xe6", "EventName": "BACLEARS.ANY", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x80", - "EventName": "ICACHE.ACCESSES", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of instruction cache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x80", - "EventName": "ICACHE.MISSES", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, { "BriefDescription": "Stalls caused by changing prefix length of the instruction.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x87", "EventName": "DECODE.LCP", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles the Microcode Sequencer is busy.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x87", "EventName": "DECODE.MS_BUSY", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "DSB-to-MITE switch true penalty cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x61", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced DSB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x1", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x11", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced iTLB true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x14", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x12", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.L2_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x13", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", "MSRIndex": "0x3F7", "MSRValue": "0x600106", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", "MSRIndex": "0x3F7", "MSRValue": "0x608006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", "MSRValue": "0x601006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", "MSRValue": "0x600206", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", "MSRValue": "0x610006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", "MSRValue": "0x100206", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", "MSRValue": "0x602006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", "MSRValue": "0x600406", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", "MSRValue": "0x620006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", "MSRIndex": "0x3F7", "MSRValue": "0x604006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", "MSRValue": "0x600806", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.MS_FLOWS", "MSRIndex": "0x3F7", "MSRValue": "0x8", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x15", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH", "MSRIndex": "0x3F7", "MSRValue": "0x17", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction cache misses.", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "ICACHE_DATA.STALLS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x83", "EventName": "ICACHE_TAG.STALLS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles DSB is delivering optimal number of Uops", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "6", "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES_OK", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.DSB_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles MITE is delivering any Uop", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.MITE_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles MITE is delivering optimal number of Uops", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "6", "EventCode": "0x79", "EventName": "IDQ.MITE_CYCLES_OK", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.MITE_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.MS_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Number of switches from DSB or MITE to the MS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0x79", "EventName": "IDQ.MS_SWITCHES", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to IDQ while MS is busy", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.MS_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "6", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json index f894e4a0212b..7595eb4ab46f 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json @@ -1,79 +1,224 @@ [ + { + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.ANY_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xff", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.L1_BOUND_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xf4", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.OTHER_AT_RET", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xc0", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.PGWALK_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xa0", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.ST_ADDR_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x84", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_atom" }, + { + "BriefDescription": "Number of machine clears due to memory ordering conflicts.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "2", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "3", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_core" + }, + { + "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS", + "CounterMask": "9", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "1009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE", + "PEBS": "2", + "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", "MSRIndex": "0x1a6,0x1a7", @@ -84,7 +229,16 @@ }, { "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBFC00001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", "MSRIndex": "0x1a6,0x1a7", @@ -95,7 +249,6 @@ }, { "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_RFO.L3_MISS", "MSRIndex": "0x1a6,0x1a7", @@ -104,9 +257,18 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBFC00002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", "MSRIndex": "0x1a6,0x1a7", @@ -116,241 +278,21 @@ "Unit": "cpu_atom" }, { - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "6", - "EventCode": "0xa3", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x6", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of machine clears due to memory ordering conflicts.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "PEBScounters": "0,1,2,3,4,5,6,7", + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x2", + "UMask": "0x10", "Unit": "cpu_core" }, { - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "2", - "EventCode": "0x47", - "EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "3", - "EventCode": "0x47", - "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_core" - }, - { - "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "5", - "EventCode": "0x47", - "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x5", - "Unit": "cpu_core" - }, - { - "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "9", - "EventCode": "0x47", - "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x9", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", - "MSRIndex": "0x3F6", - "MSRValue": "0x80", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "1009", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", - "MSRIndex": "0x3F6", - "MSRValue": "0x10", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "20011", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", - "MSRIndex": "0x3F6", - "MSRValue": "0x100", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "503", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", - "MSRIndex": "0x3F6", - "MSRValue": "0x20", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "100007", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", - "MSRIndex": "0x3F6", - "MSRValue": "0x4", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", - "MSRIndex": "0x3F6", - "MSRValue": "0x200", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "101", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", - "MSRIndex": "0x3F6", - "MSRValue": "0x40", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "2003", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", - "MSRIndex": "0x3F6", - "MSRValue": "0x8", - "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", - "SampleAfterValue": "50021", - "TakenAlone": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.", - "CollectPEBSRecord": "2", - "Data_LA": "1", - "EventCode": "0xcd", - "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE", - "PEBS": "2", - "SampleAfterValue": "1000003", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_RFO.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00002", - "SampleAfterValue": "100003", - "UMask": "0x1", + "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.", + "SampleAfterValue": "2000003", + "UMask": "0x10", "Unit": "cpu_core" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json index c49d8ce27310..329c611d7cf7 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json @@ -1,7 +1,46 @@ [ + { + "BriefDescription": "ASSISTS.HARDWARE", + "EventCode": "0xc1", + "EventName": "ASSISTS.HARDWARE", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "ASSISTS.PAGE_FAULT", + "EventCode": "0xc1", + "EventName": "ASSISTS.PAGE_FAULT", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "CORE_POWER.LICENSE_1", + "EventCode": "0x28", + "EventName": "CORE_POWER.LICENSE_1", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "CORE_POWER.LICENSE_2", + "EventCode": "0x28", + "EventName": "CORE_POWER.LICENSE_2", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "CORE_POWER.LICENSE_3", + "EventCode": "0x28", + "EventName": "CORE_POWER.LICENSE_3", + "SampleAfterValue": "200003", + "UMask": "0x8", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.COREWB_M.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -12,7 +51,6 @@ }, { "BriefDescription": "Counts demand data reads that have any type of response.", - "Counter": "0,1,2,3,4,5", "EventCode": "0xB7", "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -21,91 +59,8 @@ "UMask": "0x1", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts streaming stores that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10800", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "ASSISTS.HARDWARE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xc1", - "EventName": "ASSISTS.HARDWARE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "ASSISTS.PAGE_FAULT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xc1", - "EventName": "ASSISTS.PAGE_FAULT", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "CORE_POWER.LICENSE_1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x28", - "EventName": "CORE_POWER.LICENSE_1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_core" - }, - { - "BriefDescription": "CORE_POWER.LICENSE_2", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x28", - "EventName": "CORE_POWER.LICENSE_2", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_core" - }, - { - "BriefDescription": "CORE_POWER.LICENSE_3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x28", - "EventName": "CORE_POWER.LICENSE_3", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_core" - }, { "BriefDescription": "Counts demand data reads that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -116,7 +71,6 @@ }, { "BriefDescription": "Counts demand data reads that were supplied by DRAM.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.DRAM", "MSRIndex": "0x1a6,0x1a7", @@ -125,9 +79,18 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -138,7 +101,16 @@ }, { "BriefDescription": "Counts streaming stores that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", "EventCode": "0x2A,0x2B", "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -149,68 +121,52 @@ }, { "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa5", "EventName": "RS.EMPTY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0xa5", "EventName": "RS.EMPTY_COUNT", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EdgeDetect": "1", "EventCode": "0xa5", "EventName": "RS_EMPTY.COUNT", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "Deprecated": "1", "EventCode": "0xa5", "EventName": "RS_EMPTY.CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "XQ.FULL_CYCLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x2d", "EventName": "XQ.FULL_CYCLES", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json index 1a137f7f8b7e..f46fa7ba168a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json @@ -1,2168 +1,1634 @@ [ + { + "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", + "CounterMask": "1", + "Deprecated": "1", + "EventCode": "0xb0", + "EventName": "ARITH.DIVIDER_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", + "CounterMask": "1", + "EventCode": "0xb0", + "EventName": "ARITH.DIV_ACTIVE", + "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", + "SampleAfterValue": "1000003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE", + "CounterMask": "1", + "Deprecated": "1", + "EventCode": "0xb0", + "EventName": "ARITH.FP_DIVIDER_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event counts the cycles the integer divider is busy.", + "EventCode": "0xb0", + "EventName": "ARITH.IDIV_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE", + "CounterMask": "1", + "Deprecated": "1", + "EventCode": "0xb0", + "EventName": "ARITH.INT_DIVIDER_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.", + "EventCode": "0xc1", + "EventName": "ASSISTS.ANY", + "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.", + "SampleAfterValue": "100003", + "UMask": "0x1b", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", "SampleAfterValue": "200003", "Unit": "cpu_atom" }, + { + "BriefDescription": "All branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts all branch instructions retired.", + "SampleAfterValue": "400009", + "Unit": "cpu_core" + }, { "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", + "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.CALL", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0xf9", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xbf", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.INDIRECT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.IND_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf9", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near RET branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.REL_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfd", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.COND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.INDIRECT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.IND_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.CORE", - "PEBScounters": "33", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.CORE_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PEBScounters": "34", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "PEBScounters": "33", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", - "EventName": "INST_RETIRED.ANY", - "PEBS": "1", - "PEBScounters": "32", - "SampleAfterValue": "2000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc0", - "EventName": "INST_RETIRED.ANY_P", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.4K_ALIAS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.DISAMBIGUATION", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machines clears due to memory renaming.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.MRN_NUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.PAGE_FAULT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.SLOW", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x6f", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.SMC", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x75", - "EventName": "SERIALIZATION.NON_C01_MS_SCB", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.REGISTER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x40", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x40", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.CISC", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.DECODE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8d", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x72", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ITLB", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.OTHER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.PREDECODE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of consumed retirement slots.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "TOPDOWN_RETIRING.ALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.ALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of integer divide uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.IDIV", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.MS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.X87", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "CounterMask": "1", - "EventCode": "0xb0", - "EventName": "ARITH.DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x9", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "CounterMask": "1", - "EventCode": "0xb0", - "EventName": "ARITH.DIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x9", - "Unit": "cpu_core" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "CounterMask": "1", - "EventCode": "0xb0", - "EventName": "ARITH.FP_DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "This event counts the cycles the integer divider is busy.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xb0", - "EventName": "ARITH.IDIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "CounterMask": "1", - "EventCode": "0xb0", - "EventName": "ARITH.INT_DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xc1", - "EventName": "ASSISTS.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x1b", - "Unit": "cpu_core" - }, - { - "BriefDescription": "All branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "400009", - "Unit": "cpu_core" - }, { "BriefDescription": "Conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x11", "Unit": "cpu_core" }, { "BriefDescription": "Not taken branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_NTAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts not taken branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x10", "Unit": "cpu_core" }, { - "BriefDescription": "Taken conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Taken conditional branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Far branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xbf", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Far branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "Counts far branch instructions retired.", "SampleAfterValue": "100007", "UMask": "0x40", "Unit": "cpu_core" }, { - "BriefDescription": "Indirect near branch instructions retired (excluding returns)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Indirect near branch instructions retired (excluding returns)", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.", "SampleAfterValue": "100003", "UMask": "0x80", "Unit": "cpu_core" }, { - "BriefDescription": "Direct and indirect near call instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xf9", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Direct and indirect near call instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both direct and indirect near call instructions retired.", "SampleAfterValue": "100007", "UMask": "0x2", "Unit": "cpu_core" }, { - "BriefDescription": "Return instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of near RET branch instructions retired.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Return instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "Counts return instructions retired.", "SampleAfterValue": "100007", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Taken branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts taken branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x20", "Unit": "cpu_core" }, { - "BriefDescription": "All mispredicted branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.REL_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfd", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "200003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "All mispredicted branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", "SampleAfterValue": "400009", "Unit": "cpu_core" }, { - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Mispredicted conditional branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "PublicDescription": "Counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x11", "Unit": "cpu_core" }, { "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_NTAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.", "SampleAfterValue": "400009", "UMask": "0x10", "Unit": "cpu_core" }, { - "BriefDescription": "number of branch instructions retired that were mispredicted and taken.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "number of branch instructions retired that were mispredicted and taken.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Mispredicted indirect CALL retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Mispredicted indirect CALL retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.", "SampleAfterValue": "400009", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, { "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.", "SampleAfterValue": "400009", "UMask": "0x20", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, { "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RET", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", "SampleAfterValue": "100007", "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C01", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C02", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C0_WAIT", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x70", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", "SampleAfterValue": "25003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.PAUSE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { - "BriefDescription": "Reference cycles when the core is not in halt state.", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 2", + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PEBScounters": "34", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.", + "SampleAfterValue": "2000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x3", "Unit": "cpu_core" }, { - "BriefDescription": "Reference cycles when the core is not in halt state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Core cycles when the thread is not in halt state", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", "EventName": "CPU_CLK_UNHALTED.THREAD", - "PEBScounters": "33", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core cycles when the thread is not in halt state", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { - "BriefDescription": "Thread cycles when thread is not in halt state", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of unhalted core clock cycles.", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", "SampleAfterValue": "2000003", - "Speculative": "1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "8", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "16", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "12", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xc", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "5", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x5", "Unit": "cpu_core" }, { "BriefDescription": "Total execution stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "5", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x21", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Instruction decoders utilized in a cycle", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x75", "EventName": "INST_DECODED.DECODERS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PEBS": "1", - "PEBScounters": "32", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the total number of instructions retired.", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", "PEBS": "1", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", "SampleAfterValue": "2000003", "Unit": "cpu_core" }, { "BriefDescription": "INST_RETIRED.MACRO_FUSED", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.MACRO_FUSED", - "PEBScounters": "1,2,3,4,5,6,7", "SampleAfterValue": "2000003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Retired NOP instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.NOP", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Precise instruction retired with PEBS precise-distribution", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.PREC_DIST", "PEBS": "1", - "PEBScounters": "32", + "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "INST_RETIRED.REP_ITERATION", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.REP_ITERATION", - "PEBScounters": "1,2,3,4,5,6,7", "SampleAfterValue": "2000003", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.RECOVERY_CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", "MSRValue": "0x7", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", - "TakenAlone": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots where uops got dropped", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.UOP_DROPPING", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.128BIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.128BIT", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x13", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.256BIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.256BIT", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0xac", "Unit": "cpu_core" }, { "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.ADD_128", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.", "SampleAfterValue": "1000003", "UMask": "0x3", "Unit": "cpu_core" }, { "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.ADD_256", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.", "SampleAfterValue": "1000003", "UMask": "0xc", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.MUL_256", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.MUL_256", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.SHUFFLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.SHUFFLES", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.VNNI_128", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.VNNI_128", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.VNNI_256", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.VNNI_256", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x20", "Unit": "cpu_core" }, { - "BriefDescription": "False dependencies in MOB due to partial compare on address.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", + "Deprecated": "1", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.4K_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PEBScounters": "0,1,2,3", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "False dependencies in MOB due to partial compare on address.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x03", "EventName": "LD_BLOCKS.NO_SR", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x88", "Unit": "cpu_core" }, { "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x03", "EventName": "LD_BLOCKS.STORE_FORWARD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x82", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xa8", "EventName": "LSD.CYCLES_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "6", "EventCode": "0xa8", "EventName": "LSD.CYCLES_OK", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Number of Uops delivered by the LSD.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa8", "EventName": "LSD.UOPS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Number of machine clears (nukes) of any type.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.COUNT", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of machine clears (nukes) of any type.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Self-modifying code (SMC) detected.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machines clears due to memory renaming.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MRN_NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.SMC", - "PEBScounters": "0,1,2,3,4,5,6,7", + "SampleAfterValue": "20003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Self-modifying code (SMC) detected.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "MISC2_RETIRED.LFENCE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe0", "EventName": "MISC2_RETIRED.LFENCE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "400009", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Increments whenever there is an update to the LBR array.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xcc", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", "SampleAfterValue": "100003", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa2", "EventName": "RESOURCE_STALLS.SB", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa2", "EventName": "RESOURCE_STALLS.SCOREBOARD", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", + "EventCode": "0x75", + "EventName": "SERIALIZATION.NON_C01_MS_SCB", + "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots wasted due to incorrect speculations.", - "CollectPEBSRecord": "2", "EventCode": "0xa4", "EventName": "TOPDOWN.BAD_SPEC_SLOTS", + "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", - "CollectPEBSRecord": "2", "EventCode": "0xa4", "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 3", "EventName": "TOPDOWN.SLOTS", - "PEBScounters": "35", + "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.SLOTS_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of consumed retirement slots.", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "UOPS_DECODED.DEC0_UOPS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x76", "EventName": "UOPS_DECODED.DEC0_UOPS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 0", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_0", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 0.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 1.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 2, 3 and 10", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_2_3_10", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 4 and 9", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_4_9", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 4 and 9", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 5 and 11", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_5_11", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 5 and 11", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 6", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_6", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 6.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 7 and 8", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_7_8", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "3", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_2", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "3", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_3", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_4", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.STALLS", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.STALL_CYCLES", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.THREAD", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of x87 uops dispatched.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.X87", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Uops that RAT issues to RS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xae", "EventName": "UOPS_ISSUED.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles with retired uop(s).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles where at least one uop has retired.", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Retired uops except the last uop of each instruction.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.HEAVY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "UOPS_RETIRED.MS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", "MSRIndex": "0x3F7", "MSRValue": "0x8", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "TakenAlone": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Retirement slots used.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the retirement slots used each cycle.", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles without actually retired uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.STALLS", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.STALL_CYCLES", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json index d82d6f62a6fb..2ccd9cf96957 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json @@ -1,134 +1,53 @@ [ { - "BriefDescription": "Number of clocks", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x01", - "EventName": "UNC_M_CLOCKTICKS", + "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC0 read request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x02", - "EventName": "UNC_M_VC0_REQUESTS_RD", + "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC0 write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x03", - "EventName": "UNC_M_VC0_REQUESTS_WR", + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC1 read request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x04", - "EventName": "UNC_M_VC1_REQUESTS_RD", + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC1 write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x05", - "EventName": "UNC_M_VC1_REQUESTS_WR", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Incoming read prefetch request from IA", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x0A", - "EventName": "UNC_M_PREFETCH_RD", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Any Rank at Hot state", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x19", - "EventName": "UNC_M_DRAM_THERMAL_HOT", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Any Rank at Warm state", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1A", - "EventName": "UNC_M_DRAM_THERMAL_WARM", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming read request page status is Page Hit", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1C", - "EventName": "UNC_M_DRAM_PAGE_HIT_RD", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming read request page status is Page Empty", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1D", - "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming read request page status is Page Miss", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1E", - "EventName": "UNC_M_DRAM_PAGE_MISS_RD", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming write request page status is Page Hit", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1F", - "EventName": "UNC_M_DRAM_PAGE_HIT_WR", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming write request page status is Page Empty", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x20", - "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "incoming write request page status is Page Miss", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x21", - "EventName": "UNC_M_DRAM_PAGE_MISS_WR", + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "Read CAS command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x22", "EventName": "UNC_M_CAS_COUNT_RD", "PerPkg": "1", @@ -136,86 +55,120 @@ }, { "BriefDescription": "Write CAS command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x23", "EventName": "UNC_M_CAS_COUNT_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command for a read request sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x24", - "EventName": "UNC_M_ACT_COUNT_RD", + "BriefDescription": "Number of clocks", + "EventCode": "0x01", + "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command for a write request sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x25", - "EventName": "UNC_M_ACT_COUNT_WR", + "BriefDescription": "incoming read request page status is Page Empty", + "EventCode": "0x1D", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x26", - "EventName": "UNC_M_ACT_COUNT_TOTAL", + "BriefDescription": "incoming write request page status is Page Empty", + "EventCode": "0x20", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "PRE command sent to DRAM for a read/write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x27", - "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", + "BriefDescription": "incoming read request page status is Page Hit", + "EventCode": "0x1C", + "EventName": "UNC_M_DRAM_PAGE_HIT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Hit", + "EventCode": "0x1F", + "EventName": "UNC_M_DRAM_PAGE_HIT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Miss", + "EventCode": "0x1E", + "EventName": "UNC_M_DRAM_PAGE_MISS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Miss", + "EventCode": "0x21", + "EventName": "UNC_M_DRAM_PAGE_MISS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Hot state", + "EventCode": "0x19", + "EventName": "UNC_M_DRAM_THERMAL_HOT", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Warm state", + "EventCode": "0x1A", + "EventName": "UNC_M_DRAM_THERMAL_WARM", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming read prefetch request from IA.", + "EventCode": "0x0A", + "EventName": "UNC_M_PREFETCH_RD", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x28", "EventName": "UNC_M_PRE_COUNT_IDLE", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels)", - "CounterType": "FREERUN", - "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "BriefDescription": "PRE command sent to DRAM for a read/write request", + "EventCode": "0x27", + "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels)", - "Counter": "3", - "CounterType": "FREERUN", - "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC0 read request", + "EventCode": "0x02", + "EventName": "UNC_M_VC0_REQUESTS_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM", - "Counter": "1", - "CounterType": "FREERUN", - "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC0 write request", + "EventCode": "0x03", + "EventName": "UNC_M_VC0_REQUESTS_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM", - "Counter": "4", - "CounterType": "FREERUN", - "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC1 read request", + "EventCode": "0x04", + "EventName": "UNC_M_VC1_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 write request", + "EventCode": "0x05", + "EventName": "UNC_M_VC1_REQUESTS_WR", "PerPkg": "1", "Unit": "iMC" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json index b1ae349f5f21..bc5fb6b76065 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json @@ -1,40 +1,73 @@ [ { - "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", - "Counter": "Fixed", - "CounterType": "PGMABLE", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Number of coherent read requests sent to memory controller that were issued by any core.", + "EventCode": "0x81", + "EventName": "UNC_ARB_DAT_REQUESTS.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_OCCUPANCY.ALL", + "EventCode": "0x85", + "EventName": "UNC_ARB_IFA_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_REQUESTS.RD", + "EventCode": "0x81", + "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", "EventCode": "0xff", "EventName": "UNC_CLOCK.SOCKET", "PerPkg": "1", "Unit": "CLOCK" - }, - { - "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC", - "Counter": "0,1", - "CounterType": "PGMABLE", - "EventCode": "0x81", - "EventName": "UNC_ARB_TRK_REQUESTS.ALL", - "PerPkg": "1", - "UMask": "0x01", - "Unit": "ARB" - }, - { - "BriefDescription": "Number of requests allocated in Coherency Tracker", - "Counter": "0,1", - "CounterType": "PGMABLE", - "EventCode": "0x84", - "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", - "PerPkg": "1", - "UMask": "0x01", - "Unit": "ARB" - }, - { - "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic", - "CounterType": "PGMABLE", - "EventCode": "0x80", - "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", - "PerPkg": "1", - "UMask": "0x01", - "Unit": "ARB" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json index 12baf768ad8d..3827d292da80 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json @@ -1,317 +1,236 @@ [ - { - "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x49", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.PDE_CACHE_MISS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x05", - "EventName": "LD_HEAD.DTLB_MISS_AT_RET", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x90", - "Unit": "cpu_atom" - }, { "BriefDescription": "Loads that miss the DTLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 1G page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 4K page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Stores that miss the DTLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 1G page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 4K page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.PDE_CACHE_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, { "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 78af105ca236..df47462a125f 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,5 +1,5 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.16,alderlake,core GenuineIntel-6-BE,v1.16,alderlaken,core GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core GenuineIntel-6-(3D|47),v26,broadwell,core From 5736b1b70170e15d66ec02e500db917ef42ade83 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 3 Sep 2022 00:37:06 -0700 Subject: [PATCH 2667/4122] x86/paravirt: Remove clobber bitmask from .parainstructions The u16 "clobber" value is not used in .parainstructions since commit 27876f3882fd ("x86/paravirt: Remove clobbers from struct paravirt_patch_site") Remove the u16 from the section macro, the argument from all macros, and all now-unused CLBR_* macros. Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220903073706.3193746-1-keescook@chromium.org --- arch/x86/include/asm/paravirt_types.h | 61 ++++++--------------------- 1 file changed, 12 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 27c692791b7e..8c1da419260f 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -20,37 +20,6 @@ enum paravirt_lazy_mode { #ifdef CONFIG_PARAVIRT -/* Bitmask of what can be clobbered: usually at least eax. */ -#define CLBR_EAX (1 << 0) -#define CLBR_ECX (1 << 1) -#define CLBR_EDX (1 << 2) -#define CLBR_EDI (1 << 3) - -#ifdef CONFIG_X86_32 -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 4) - 1) - -#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) -#else -#define CLBR_RAX CLBR_EAX -#define CLBR_RCX CLBR_ECX -#define CLBR_RDX CLBR_EDX -#define CLBR_RDI CLBR_EDI -#define CLBR_RSI (1 << 4) -#define CLBR_R8 (1 << 5) -#define CLBR_R9 (1 << 6) -#define CLBR_R10 (1 << 7) -#define CLBR_R11 (1 << 8) - -#define CLBR_ANY ((1 << 9) - 1) - -#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ - CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX) - -#endif /* X86_64 */ - #ifndef __ASSEMBLY__ #include @@ -297,27 +266,23 @@ extern struct paravirt_patch_template pv_ops; #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ [paravirt_opptr] "m" (pv_ops.op) -#define paravirt_clobber(clobber) \ - [paravirt_clobber] "i" (clobber) - /* * Generate some code, and mark it as patchable by the * apply_paravirt() alternate instruction patcher. */ -#define _paravirt_alt(insn_string, type, clobber) \ +#define _paravirt_alt(insn_string, type) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ _ASM_ALIGN "\n" \ _ASM_PTR " 771b\n" \ " .byte " type "\n" \ " .byte 772b-771b\n" \ - " .short " clobber "\n" \ _ASM_ALIGN "\n" \ ".popsection\n" /* Generate patchable code, with the default asm parameters. */ #define paravirt_alt(insn_string) \ - _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + _paravirt_alt(insn_string, "%c[paravirt_typenum]") /* Simple instruction patching code. */ #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" @@ -469,20 +434,19 @@ int paravirt_disable_iospace(void); }) -#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...) \ +#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \ ({ \ PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(paravirt_alt(PARAVIRT_CALL) \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ - paravirt_clobber(clbr), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ }) -#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \ +#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr, \ extra_clbr, ...) \ ({ \ PVOP_CALL_ARGS; \ @@ -491,45 +455,44 @@ int paravirt_disable_iospace(void); alt, cond) \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ - paravirt_clobber(clbr), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ }) #define __PVOP_CALL(rettype, op, ...) \ - ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY, \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op, \ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__) #define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \ - ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, \ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \ ##__VA_ARGS__) #define __PVOP_CALLEESAVE(rettype, op, ...) \ - ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG, \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, \ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) #define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \ - CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) #define __PVOP_VCALL(op, ...) \ - (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + (void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS, \ VEXTRA_CLOBBERS, ##__VA_ARGS__) #define __PVOP_ALT_VCALL(op, alt, cond, ...) \ - (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY, \ + (void)____PVOP_ALT_CALL(, op, alt, cond, \ PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \ ##__VA_ARGS__) #define __PVOP_VCALLEESAVE(op, ...) \ - (void)____PVOP_CALL(, op.func, CLBR_RET_REG, \ + (void)____PVOP_CALL(, op.func, \ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) #define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \ - (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \ + (void)____PVOP_ALT_CALL(, op.func, alt, cond, \ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) From 6f520ce17920b3cdfbd2479b3ccf27f9706219d0 Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Wed, 23 Nov 2022 15:48:16 +0530 Subject: [PATCH 2668/4122] perf symbol: correction while adjusting symbol perf doesn't provide proper symbol information for specially crafted .debug files. Sometimes .debug file may not have similar program header as runtime ELF file. For example if we generate .debug file using objcopy --only-keep-debug resulting file will not contain .text, .data and other runtime sections. That means corresponding program headers will have zero FileSiz and modified Offset. Example: program header of text section of libxxx.so: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x00000000003d3000 0x00000000003d3000 0x00000000003d3000 0x000000000055ae80 0x000000000055ae80 R E 0x1000 Same program header after executing: objcopy --only-keep-debug libxxx.so libxxx.so.debug LOAD 0x0000000000001000 0x00000000003d3000 0x00000000003d3000 0x0000000000000000 0x000000000055ae80 R E 0x1000 Offset and FileSiz have been changed. Following formula will not provide correct value, if program header taken from .debug file (syms_ss): sym.st_value -= phdr.p_vaddr - phdr.p_offset; Correct program header information is located inside runtime ELF file (runtime_ss). Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols") Signed-off-by: Ajay Kaher Cc: Alexander Shishkin Cc: Alexey Makhalov Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srivatsa S. Bhat Cc: Steven Rostedt (VMware) Cc: Vasavi Sirnapalli Link: http://lore.kernel.org/lkml/1669198696-50547-1-git-send-email-akaher@vmware.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 647b7dff8ef3..80345695b136 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1303,7 +1303,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, (!used_opd && syms_ss->adjust_symbols)) { GElf_Phdr phdr; - if (elf_read_program_header(syms_ss->elf, + if (elf_read_program_header(runtime_ss->elf, (u64)sym.st_value, &phdr)) { pr_debug4("%s: failed to find program header for " "symbol: %s st_value: %#" PRIx64 "\n", From f1a033cc6b9eb6d80322008422df3c87aa5d47a0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 9 Nov 2022 14:44:18 +0100 Subject: [PATCH 2669/4122] x86/paravirt: Use common macro for creating simple asm paravirt functions There are some paravirt assembler functions which are sharing a common pattern. Introduce a macro DEFINE_PARAVIRT_ASM() for creating them. Note that this macro is including explicit alignment of the generated functions, leading to __raw_callee_save___kvm_vcpu_is_preempted(), _paravirt_nop() and paravirt_ret0() to be aligned at 4 byte boundaries now. The explicit _paravirt_nop() prototype in paravirt.c isn't needed, as it is included in paravirt_types.h already. Signed-off-by: Juergen Gross Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Srivatsa S. Bhat (VMware) Link: https://lkml.kernel.org/r/20221109134418.6516-1-jgross@suse.com --- arch/x86/include/asm/paravirt.h | 12 ++++++ arch/x86/include/asm/qspinlock_paravirt.h | 47 ++++++++++------------- arch/x86/kernel/kvm.c | 19 +++------ arch/x86/kernel/paravirt.c | 23 +---------- 4 files changed, 40 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2851bc2339d5..73e9522db7c1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -731,6 +731,18 @@ static __always_inline unsigned long arch_local_irq_save(void) #undef PVOP_VCALL4 #undef PVOP_CALL4 +#define DEFINE_PARAVIRT_ASM(func, instr, sec) \ + asm (".pushsection " #sec ", \"ax\"\n" \ + ".global " #func "\n\t" \ + ".type " #func ", @function\n\t" \ + ASM_FUNC_ALIGN "\n" \ + #func ":\n\t" \ + ASM_ENDBR \ + instr "\n\t" \ + ASM_RET \ + ".size " #func ", . - " #func "\n\t" \ + ".popsection") + extern void default_banner(void); #else /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index d861127731f4..42b17cf10b10 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -14,8 +14,6 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); #define __pv_queued_spin_unlock __pv_queued_spin_unlock -#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" -#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" /* * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock @@ -37,32 +35,27 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .spinlock.text, \"ax\";" - ".globl " PV_UNLOCK ";" - ".type " PV_UNLOCK ", @function;" - ASM_FUNC_ALIGN - PV_UNLOCK ": " - ASM_ENDBR - FRAME_BEGIN - "push %rdx;" - "mov $0x1,%eax;" - "xor %edx,%edx;" - LOCK_PREFIX "cmpxchg %dl,(%rdi);" - "cmp $0x1,%al;" - "jne .slowpath;" - "pop %rdx;" +#define PV_UNLOCK_ASM \ + FRAME_BEGIN \ + "push %rdx\n\t" \ + "mov $0x1,%eax\n\t" \ + "xor %edx,%edx\n\t" \ + LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ + "cmp $0x1,%al\n\t" \ + "jne .slowpath\n\t" \ + "pop %rdx\n\t" \ + FRAME_END \ + ASM_RET \ + ".slowpath:\n\t" \ + "push %rsi\n\t" \ + "movzbl %al,%esi\n\t" \ + "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \ + "pop %rsi\n\t" \ + "pop %rdx\n\t" \ FRAME_END - ASM_RET - ".slowpath: " - "push %rsi;" - "movzbl %al,%esi;" - "call " PV_UNLOCK_SLOWPATH ";" - "pop %rsi;" - "pop %rdx;" - FRAME_END - ASM_RET - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" - ".popsection"); + +DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock, + PV_UNLOCK_ASM, .spinlock.text); #else /* CONFIG_64BIT */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 95fb85bea111..4d053cb2c48a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -798,20 +798,13 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and * restoring to/from the stack. */ -asm( -".pushsection .text;" -".global __raw_callee_save___kvm_vcpu_is_preempted;" -".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" -ASM_FUNC_ALIGN -"__raw_callee_save___kvm_vcpu_is_preempted:" -ASM_ENDBR -"movq __per_cpu_offset(,%rdi,8), %rax;" -"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" -"setne %al;" -ASM_RET -".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" -".popsection"); +#define PV_VCPU_PREEMPTED_ASM \ + "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \ + "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \ + "setne %al\n\t" +DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted, + PV_VCPU_PREEMPTED_ASM, .text); #endif static void __init kvm_guest_init(void) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e244c49b52d7..327757afb027 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -37,29 +37,10 @@ * nop stub, which must not clobber anything *including the stack* to * avoid confusing the entry prologues. */ -extern void _paravirt_nop(void); -asm (".pushsection .entry.text, \"ax\"\n" - ".global _paravirt_nop\n" - ASM_FUNC_ALIGN - "_paravirt_nop:\n\t" - ASM_ENDBR - ASM_RET - ".size _paravirt_nop, . - _paravirt_nop\n\t" - ".type _paravirt_nop, @function\n\t" - ".popsection"); +DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text); /* stub always returning 0. */ -asm (".pushsection .entry.text, \"ax\"\n" - ".global paravirt_ret0\n" - ASM_FUNC_ALIGN - "paravirt_ret0:\n\t" - ASM_ENDBR - "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" - ASM_RET - ".size paravirt_ret0, . - paravirt_ret0\n\t" - ".type paravirt_ret0, @function\n\t" - ".popsection"); - +DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text); void __init default_banner(void) { From 1a9c20b45d193ead21dc63b07d1abb40b0a237c2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:05 -0800 Subject: [PATCH 2670/4122] perf list: Support newlines in wordwrap Rather than a newline starting from column 0, record a newline was seen and then add the newline and space before the next word. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 84fa2d050eac..f3750331e8f6 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -74,17 +74,19 @@ static void wordwrap(const char *s, int start, int max, int corr) { int column = start; int n; + bool saw_newline = false; while (*s) { - int wlen = strcspn(s, " \t"); + int wlen = strcspn(s, " \t\n"); - if (column + wlen >= max && column > start) { + if ((column + wlen >= max && column > start) || saw_newline) { printf("\n%*s", start, ""); column = start + corr; } n = printf("%s%.*s", column > start ? " " : "", wlen, s); if (n <= 0) break; + saw_newline = s[wlen] == '\n'; s += wlen; column += n; s = skip_spaces(s); @@ -146,7 +148,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi wordwrap(desc, 8, pager_get_columns(), 0); printf("]\n"); } - + long_desc = long_desc ?: desc; if (long_desc && print_state->long_desc) { printf("%*s", 8, "["); wordwrap(long_desc, 8, pager_get_columns(), 0); @@ -154,7 +156,8 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi } if (print_state->detailed && encoding_desc) { - printf("%*s%s", 8, "", encoding_desc); + printf("%*s", 8, ""); + wordwrap(encoding_desc, 8, pager_get_columns(), 0); if (metric_name) printf(" MetricName: %s", metric_name); if (metric_expr) From 1284ded7d05952f2657f5abeeda5a3f74ca9cffc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:06 -0800 Subject: [PATCH 2671/4122] perf list: JSON escape encoding improvements Use strbuf to make the string under construction's length unlimited. Use the format %s to mean a literal string copy and %S to signify a need to escape the string. Add supported for escaping a newline character. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 109 +++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index f3750331e8f6..137d73edb541 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -17,6 +17,7 @@ #include "util/metricgroup.h" #include "util/string2.h" #include "util/strlist.h" +#include "util/strbuf.h" #include #include #include @@ -250,45 +251,56 @@ static void json_print_end(void *ps) printf("%s]\n", print_state->need_sep ? "\n" : ""); } -static void fix_escape_printf(const char *fmt, ...) +static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) { va_list args; - char buf[2048]; - size_t buf_pos = 0; va_start(args, fmt); + strbuf_setlen(buf, 0); for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) { switch (fmt[fmt_pos]) { - case '%': { - const char *s = va_arg(args, const char*); - + case '%': fmt_pos++; - assert(fmt[fmt_pos] == 's'); - for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { - switch (s[s_pos]) { - case '\\': - __fallthrough; - case '\"': - buf[buf_pos++] = '\\'; - assert(buf_pos < sizeof(buf)); - __fallthrough; - default: - buf[buf_pos++] = s[s_pos]; - assert(buf_pos < sizeof(buf)); - break; + switch (fmt[fmt_pos]) { + case 's': { + const char *s = va_arg(args, const char*); + + strbuf_addstr(buf, s); + break; + } + case 'S': { + const char *s = va_arg(args, const char*); + + for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { + switch (s[s_pos]) { + case '\n': + strbuf_addstr(buf, "\\n"); + break; + case '\\': + __fallthrough; + case '\"': + strbuf_addch(buf, '\\'); + __fallthrough; + default: + strbuf_addch(buf, s[s_pos]); + break; + } } + break; + } + default: + pr_err("Unexpected format character '%c'\n", fmt[fmt_pos]); + strbuf_addch(buf, '%'); + strbuf_addch(buf, fmt[fmt_pos]); } break; - } default: - buf[buf_pos++] = fmt[fmt_pos]; - assert(buf_pos < sizeof(buf)); + strbuf_addch(buf, fmt[fmt_pos]); break; } } va_end(args); - buf[buf_pos] = '\0'; - fputs(buf, stdout); + fputs(buf->buf, stdout); } static void json_print_event(void *ps, const char *pmu_name, const char *topic, @@ -301,62 +313,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, { struct json_print_state *print_state = ps; bool need_sep = false; + struct strbuf buf; + strbuf_init(&buf, 0); printf("%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (pmu_name) { - fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name); + fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name); need_sep = true; } if (topic) { - fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic); + fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic); need_sep = true; } if (event_name) { - fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name); + fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "", + event_name); need_sep = true; } if (event_alias && strlen(event_alias)) { - fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias); + fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "", + event_alias); need_sep = true; } if (scale_unit && strlen(scale_unit)) { - fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", scale_unit); need_sep = true; } if (event_type_desc) { - fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "", event_type_desc); need_sep = true; } if (deprecated) { - fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "", deprecated ? "1" : "0"); need_sep = true; } if (desc) { - fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", long_desc); need_sep = true; } if (encoding_desc) { - fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc); + fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "", + encoding_desc); need_sep = true; } if (metric_name) { - fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name); + fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", + metric_name); need_sep = true; } if (metric_expr) { - fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr); + fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", + metric_expr); need_sep = true; } printf("%s}", need_sep ? "\n" : ""); + strbuf_release(&buf); } static void json_print_metric(void *ps __maybe_unused, const char *group, @@ -366,35 +387,39 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, { struct json_print_state *print_state = ps; bool need_sep = false; + struct strbuf buf; + strbuf_init(&buf, 0); printf("%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (group) { - fix_escape_printf("\t\"MetricGroup\": \"%s\"", group); + fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group); need_sep = true; } if (name) { - fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name); + fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name); need_sep = true; } if (expr) { - fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr); + fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr); need_sep = true; } if (unit) { - fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit); + fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit); need_sep = true; } if (desc) { - fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", long_desc); need_sep = true; } printf("%s}", need_sep ? "\n" : ""); + strbuf_release(&buf); } int cmd_list(int argc, const char **argv) From be3392b65f2b989badcadf5f6a353c7924d3ccf4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:07 -0800 Subject: [PATCH 2672/4122] perf list: List callback support for libpfm Missed previously, add libpfm support for 'perf list' callbacks and thereby JSON support. Committer notes: Add __maybe_unused to the args of the new print_libpfm_events() in the else HAVE_LIBPFM block. Fixes: e42b0ee61282a2f9 ("perf list: Add JSON output option") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pfm.c | 170 +++++++++++++++++++----------------------- tools/perf/util/pfm.h | 7 +- 2 files changed, 79 insertions(+), 98 deletions(-) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index f0bcfcab1a93..ac3227ba769c 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -12,6 +12,7 @@ #include "util/parse-events.h" #include "util/pmu.h" #include "util/pfm.h" +#include "util/strbuf.h" #include #include @@ -130,118 +131,111 @@ static const char *srcs[PFM_ATTR_CTRL_MAX] = { }; static void -print_attr_flags(pfm_event_attr_info_t *info) +print_attr_flags(struct strbuf *buf, const pfm_event_attr_info_t *info) { - int n = 0; + if (info->is_dfl) + strbuf_addf(buf, "[default] "); - if (info->is_dfl) { - printf("[default] "); - n++; - } - - if (info->is_precise) { - printf("[precise] "); - n++; - } - - if (!n) - printf("- "); + if (info->is_precise) + strbuf_addf(buf, "[precise] "); } static void -print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc) +print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, + const pfm_pmu_info_t *pinfo, const pfm_event_info_t *info, + struct strbuf *buf) { - pfm_event_attr_info_t ainfo; - const char *src; int j, ret; + char topic[80], name[80]; - ainfo.size = sizeof(ainfo); + strbuf_setlen(buf, 0); + snprintf(topic, sizeof(topic), "pfm %s", pinfo->name); - printf(" %s\n", info->name); - printf(" [%s]\n", info->desc); - if (long_desc) { - if (info->equiv) - printf(" Equiv: %s\n", info->equiv); + snprintf(name, sizeof(name), "%s::%s", pinfo->name, info->name); + strbuf_addf(buf, "Code: 0x%"PRIx64"\n", info->code); - printf(" Code : 0x%"PRIx64"\n", info->code); - } pfm_for_each_event_attr(j, info) { - ret = pfm_get_event_attr_info(info->idx, j, - PFM_OS_PERF_EVENT_EXT, &ainfo); + pfm_event_attr_info_t ainfo; + const char *src; + + ainfo.size = sizeof(ainfo); + ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo); if (ret != PFM_SUCCESS) continue; - if (ainfo.type == PFM_ATTR_UMASK) { - printf(" %s:%s\n", info->name, ainfo.name); - printf(" [%s]\n", ainfo.desc); - } - - if (!long_desc) - continue; - if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; src = srcs[ainfo.ctrl]; switch (ainfo.type) { - case PFM_ATTR_UMASK: - printf(" Umask : 0x%02"PRIx64" : %s: ", - ainfo.code, src); - print_attr_flags(&ainfo); - putchar('\n'); + case PFM_ATTR_UMASK: /* Ignore for now */ break; case PFM_ATTR_MOD_BOOL: - printf(" Modif : %s: [%s] : %s (boolean)\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Modif: %s: [%s] : %s (boolean)\n", src, + ainfo.name, ainfo.desc); break; case PFM_ATTR_MOD_INTEGER: - printf(" Modif : %s: [%s] : %s (integer)\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Modif: %s: [%s] : %s (integer)\n", src, + ainfo.name, ainfo.desc); break; case PFM_ATTR_NONE: case PFM_ATTR_RAW_UMASK: case PFM_ATTR_MAX: default: - printf(" Attr : %s: [%s] : %s\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Attr: %s: [%s] : %s\n", src, + ainfo.name, ainfo.desc); + } + } + print_cb->print_event(print_state, + pinfo->name, + topic, + name, info->equiv, + /*scale_unit=*/NULL, + /*deprecated=*/NULL, "PFM event", + info->desc, /*long_desc=*/NULL, + /*encoding_desc=*/buf->buf, + /*metric_name=*/NULL, /*metric_expr=*/NULL); + + pfm_for_each_event_attr(j, info) { + pfm_event_attr_info_t ainfo; + const char *src; + + strbuf_setlen(buf, 0); + + ainfo.size = sizeof(ainfo); + ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo); + if (ret != PFM_SUCCESS) + continue; + + if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) + ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; + + src = srcs[ainfo.ctrl]; + if (ainfo.type == PFM_ATTR_UMASK) { + strbuf_addf(buf, "Umask: 0x%02"PRIx64" : %s: ", + ainfo.code, src); + print_attr_flags(buf, &ainfo); + snprintf(name, sizeof(name), "%s::%s:%s", + pinfo->name, info->name, ainfo.name); + print_cb->print_event(print_state, + pinfo->name, + topic, + name, /*alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/NULL, "PFM event", + ainfo.desc, /*long_desc=*/NULL, + /*encoding_desc=*/buf->buf, + /*metric_name=*/NULL, /*metric_expr=*/NULL); } } } -/* - * list all pmu::event:umask, pmu::event - * printed events may not be all valid combinations of umask for an event - */ -static void -print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info) -{ - pfm_event_attr_info_t ainfo; - int j, ret; - bool has_umask = false; - - ainfo.size = sizeof(ainfo); - - pfm_for_each_event_attr(j, info) { - ret = pfm_get_event_attr_info(info->idx, j, - PFM_OS_PERF_EVENT_EXT, &ainfo); - if (ret != PFM_SUCCESS) - continue; - - if (ainfo.type != PFM_ATTR_UMASK) - continue; - - printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name); - has_umask = true; - } - if (!has_umask) - printf("%s::%s\n", pinfo->name, info->name); -} - -void print_libpfm_events(bool name_only, bool long_desc) +void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state) { pfm_event_info_t info; pfm_pmu_info_t pinfo; - int i, p, ret; + int p, ret; + struct strbuf storage; libpfm_initialize(); @@ -249,12 +243,9 @@ void print_libpfm_events(bool name_only, bool long_desc) info.size = sizeof(info); pinfo.size = sizeof(pinfo); - if (!name_only) - puts("\nList of pre-defined events (to be used in --pfm-events):\n"); + strbuf_init(&storage, 2048); pfm_for_all_pmus(p) { - bool printed_pmu = false; - ret = pfm_get_pmu_info(p, &pinfo); if (ret != PFM_SUCCESS) continue; @@ -267,25 +258,14 @@ void print_libpfm_events(bool name_only, bool long_desc) if (pinfo.pmu == PFM_PMU_PERF_EVENT) continue; - for (i = pinfo.first_event; i != -1; - i = pfm_get_event_next(i)) { - + for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) { ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, &info); if (ret != PFM_SUCCESS) continue; - if (!name_only && !printed_pmu) { - printf("%s:\n", pinfo.name); - printed_pmu = true; - } - - if (!name_only) - print_libpfm_events_detailed(&info, long_desc); - else - print_libpfm_events_raw(&pinfo, &info); + print_libpfm_event(print_cb, print_state, &pinfo, &info, &storage); } - if (!name_only && printed_pmu) - putchar('\n'); } + strbuf_release(&storage); } diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h index 7d70dda87012..fb25c2749d26 100644 --- a/tools/perf/util/pfm.h +++ b/tools/perf/util/pfm.h @@ -7,13 +7,14 @@ #ifndef __PERF_PFM_H #define __PERF_PFM_H +#include "print-events.h" #include #ifdef HAVE_LIBPFM int parse_libpfm_events_option(const struct option *opt, const char *str, int unset); -void print_libpfm_events(bool name_only, bool long_desc); +void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state); #else #include @@ -26,8 +27,8 @@ static inline int parse_libpfm_events_option( return 0; } -static inline void print_libpfm_events(bool name_only __maybe_unused, - bool long_desc __maybe_unused) +static inline void print_libpfm_events(const struct print_callbacks *print_cb __maybe_unused, + void *print_state __maybe_unused) { } From d8a5b59c5fc75c99ba17e3eb1a8f580d8d172b28 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:41 +0100 Subject: [PATCH 2673/4122] phy: qcom-qmp-combo: fix out-of-bounds clock access The SM8250 only uses three clocks but the DP configuration erroneously described four clocks. In case the DP part of the PHY is initialised before the USB part, this would lead to uninitialised memory beyond the bulk-clocks array to be treated as a clock pointer as the clocks are requested based on the USB configuration. Fixes: aff188feb5e1 ("phy: qcom-qmp: add support for sm8250-usb3-dp phy") Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5e11b6a1d189..bb38b18258ca 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1270,8 +1270,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_sm8250_usbphy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, From e965ab8216a419fadb4520b65a95dc7017daa800 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:42 +0100 Subject: [PATCH 2674/4122] phy: qcom-qmp-combo: fix sdm845 reset The SDM845 has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Add a dedicated configuration for SDM845 rather than reuse the incompatible SC7180 configuration. Fixes: d88497fb6bbd ("phy: qualcomm: phy-qcom-qmp: add support for combo USB3+DP phy on SDM845") Cc: stable@vger.kernel.org # 6.1 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 39 ++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index bb38b18258ca..cc53e2f99121 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1084,9 +1084,46 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .has_pwrdn_delay = true, }; +static const struct qmp_phy_cfg sdm845_dpphy_cfg = { + .type = PHY_TYPE_DP, + .lanes = 2, + + .serdes_tbl = qmp_v3_dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .tx_tbl = qmp_v3_dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v3_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v3_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v3_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v3_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v3_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, + + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, +}; + static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { .usb_cfg = &sdm845_usb3phy_cfg, - .dp_cfg = &sc7180_dpphy_cfg, + .dp_cfg = &sdm845_dpphy_cfg, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { From 910dd4883d757af5faac92590f33f0f7da963032 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:43 +0100 Subject: [PATCH 2675/4122] phy: qcom-qmp-combo: fix sc8180x reset The SC8180X has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Fixes: 1633802cd4ac ("phy: qcom: qmp: Add SC8180x USB/DP combo") Cc: stable@vger.kernel.org # 5.15 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cc53e2f99121..40c25a0ead23 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1177,8 +1177,8 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, From 7a7d86d14d073dfa3429c550667a8e78b99edbd4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:44 +0100 Subject: [PATCH 2676/4122] phy: qcom-qmp-combo: fix broken power on The PHY is powered on during phy-init by setting the SW_PWRDN bit in the COM_POWER_DOWN_CTRL register and then setting the same bit in the in the PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the PHY. Currently, whether power on succeeds depends on probe order and having the USB part of the PHY be initialised first. In case the DP part of the PHY is instead initialised first, the intended power on of the USB block results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8). Add a pointer to the USB part of the PHY to the driver data and use that to power on the PHY also if the DP part of the PHY is initialised first. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Cc: stable@vger.kernel.org # 5.10 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 40c25a0ead23..17707f68d482 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -932,6 +932,7 @@ struct qcom_qmp { struct regulator_bulk_data *vregs; struct qmp_phy **phys; + struct qmp_phy *usb_phy; struct mutex phy_mutex; int init_count; @@ -1911,7 +1912,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + struct qmp_phy *usb_phy = qmp->usb_phy; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1963,7 +1964,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); + qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -2831,6 +2833,8 @@ static int qmp_combo_probe(struct platform_device *pdev) goto err_node_put; } + qmp->usb_phy = qmp->phys[id]; + /* * Register the pipe clock provided by phy. * See function description to see details of this pipe clock. @@ -2846,6 +2850,9 @@ static int qmp_combo_probe(struct platform_device *pdev) id++; } + if (!qmp->usb_phy) + return -EINVAL; + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); From c7b98de745cffdceefc077ad5cf9cda032ef8959 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:45 +0100 Subject: [PATCH 2677/4122] phy: qcom-qmp-combo: fix runtime suspend Drop the confused runtime-suspend type check which effectively broke runtime PM if the DP child node happens to be parsed before the USB child node during probe (e.g. due to order of child nodes in the devicetree). Instead use the new driver data USB PHY pointer to access the USB configuration and resources. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 17707f68d482..fde30205f332 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2207,15 +2207,11 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) { struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; + struct qmp_phy *qphy = qmp->usb_phy; const struct qmp_phy_cfg *cfg = qphy->cfg; dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); - /* Supported only for USB3 PHY and luckily USB3 is the first phy */ - if (cfg->type != PHY_TYPE_USB3) - return 0; - if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; @@ -2232,16 +2228,12 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) { struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; + struct qmp_phy *qphy = qmp->usb_phy; const struct qmp_phy_cfg *cfg = qphy->cfg; int ret = 0; dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); - /* Supported only for USB3 PHY and luckily USB3 is the first phy */ - if (cfg->type != PHY_TYPE_USB3) - return 0; - if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; From c209b1b0e1e87e862099482e62a2f2d0bef8e989 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:46 +0100 Subject: [PATCH 2678/4122] phy: qcom-qmp-combo: clean up common initialisation Commit 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") added support for the DisplayPort part of QMP PHYs but unfortunately did so by duplicating parts of the shared configuration, something which has lead to subtle bugs depending on probe order. As the resources have always been requested based on the USB configuration, make sure to not rely on fields from the DP configuration when using them (e.g. in case they get out of sync) and remove the now unused fields from the DP configurations. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 47 ++--------------------- 1 file changed, 4 insertions(+), 43 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index fde30205f332..c7a926d548d8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1043,14 +1043,6 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, @@ -1108,14 +1100,6 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, @@ -1176,14 +1160,6 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, @@ -1240,14 +1216,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, @@ -1308,14 +1276,6 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_sm8250_usbphy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, @@ -1911,8 +1871,8 @@ static int qcom_qmp_dp_phy_calibrate(struct phy *phy) static int qmp_combo_com_init(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; struct qmp_phy *usb_phy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = usb_phy->cfg; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1964,7 +1924,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -1984,7 +1944,8 @@ err_unlock: static int qmp_combo_com_exit(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_phy *usb_phy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = usb_phy->cfg; mutex_lock(&qmp->phy_mutex); if (--qmp->init_count) { From 23680f0b7d7f67a935adb38058110d2d81bbe6ea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Nov 2022 13:25:19 +0100 Subject: [PATCH 2679/4122] driver core: make struct class.dev_uevent() take a const * The dev_uevent() in struct class should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Jens Axboe Cc: Luis Chamberlain Cc: Russ Weight Cc: Jean Delvare Cc: Johan Hovold Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Karsten Keil Cc: Mauro Carvalho Chehab Cc: Keith Busch Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Dominik Brodowski Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Johannes Berg Cc: Wolfram Sang Cc: Raed Salem Cc: Chen Zhongjin Cc: Tetsuo Handa Cc: Avihai Horon Cc: "Matthew Wilcox (Oracle)" Cc: Alan Stern Cc: Colin Ian King Cc: Geert Uytterhoeven Cc: Jakob Koschel Cc: Antoine Tenart Cc: Frederic Weisbecker Cc: Wang Yufen Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linux-nvme@lists.infradead.org Cc: linux-pm@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Sebastian Reichel Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221123122523.1332370-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- block/genhd.c | 4 ++-- drivers/base/firmware_loader/sysfs.c | 6 +++--- drivers/base/firmware_loader/sysfs.h | 2 +- drivers/firmware/dmi-id.c | 2 +- drivers/gnss/core.c | 6 +++--- drivers/infiniband/core/device.c | 2 +- drivers/isdn/mISDN/core.c | 4 ++-- drivers/media/dvb-core/dvbdev.c | 4 ++-- drivers/nvme/host/core.c | 4 ++-- drivers/pcmcia/cs.c | 4 ++-- drivers/power/supply/power_supply.h | 2 +- drivers/power/supply/power_supply_sysfs.c | 8 ++++---- drivers/usb/gadget/udc/core.c | 4 ++-- include/linux/device/class.h | 2 +- include/linux/mISDNif.h | 2 +- net/atm/atm_sysfs.c | 4 ++-- net/core/net-sysfs.c | 4 ++-- net/rfkill/core.c | 2 +- 18 files changed, 33 insertions(+), 33 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 0f9769db2de8..3f1124713442 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1181,9 +1181,9 @@ static void disk_release(struct device *dev) iput(disk->part0->bd_inode); /* frees the disk */ } -static int block_uevent(struct device *dev, struct kobj_uevent_env *env) +static int block_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct gendisk *disk = dev_to_disk(dev); + const struct gendisk *disk = dev_to_disk(dev); return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq); } diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c index 5b66b3d1fa16..56911d75b90a 100644 --- a/drivers/base/firmware_loader/sysfs.c +++ b/drivers/base/firmware_loader/sysfs.c @@ -64,7 +64,7 @@ static struct attribute *firmware_class_attrs[] = { }; ATTRIBUTE_GROUPS(firmware_class); -static int do_firmware_uevent(struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env) +static int do_firmware_uevent(const struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env) { if (add_uevent_var(env, "FIRMWARE=%s", fw_sysfs->fw_priv->fw_name)) return -ENOMEM; @@ -76,9 +76,9 @@ static int do_firmware_uevent(struct fw_sysfs *fw_sysfs, struct kobj_uevent_env return 0; } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int firmware_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); + const struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); int err = 0; mutex_lock(&fw_lock); diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h index df1d5add698f..fd0b4ad9bdbb 100644 --- a/drivers/base/firmware_loader/sysfs.h +++ b/drivers/base/firmware_loader/sysfs.h @@ -81,7 +81,7 @@ struct fw_sysfs { void *fw_upload_priv; }; -static inline struct fw_sysfs *to_fw_sysfs(struct device *dev) +static inline struct fw_sysfs *to_fw_sysfs(const struct device *dev) { return container_of(dev, struct fw_sysfs, dev); } diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 940ddf916202..5f3a3e913d28 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -155,7 +155,7 @@ static const struct attribute_group* sys_dmi_attribute_groups[] = { NULL }; -static int dmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +static int dmi_dev_uevent(const struct device *dev, struct kobj_uevent_env *env) { ssize_t len; diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c index 1e82b7967570..77a4b280c552 100644 --- a/drivers/gnss/core.c +++ b/drivers/gnss/core.c @@ -337,7 +337,7 @@ static const char * const gnss_type_names[GNSS_TYPE_COUNT] = { [GNSS_TYPE_MTK] = "MTK", }; -static const char *gnss_type_name(struct gnss_device *gdev) +static const char *gnss_type_name(const struct gnss_device *gdev) { const char *name = NULL; @@ -365,9 +365,9 @@ static struct attribute *gnss_attrs[] = { }; ATTRIBUTE_GROUPS(gnss); -static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env) +static int gnss_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct gnss_device *gdev = to_gnss_device(dev); + const struct gnss_device *gdev = to_gnss_device(dev); int ret; ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev)); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index fa65c5d3d395..4186dbf9377f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -511,7 +511,7 @@ static void ib_device_release(struct device *device) kfree_rcu(dev, rcu_head); } -static int ib_device_uevent(struct device *device, +static int ib_device_uevent(const struct device *device, struct kobj_uevent_env *env) { if (add_uevent_var(env, "NAME=%s", dev_name(device))) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 90ee56d07a6e..9120be590325 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -139,9 +139,9 @@ static struct attribute *mISDN_attrs[] = { }; ATTRIBUTE_GROUPS(mISDN); -static int mISDN_uevent(struct device *dev, struct kobj_uevent_env *env) +static int mISDN_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct mISDNdevice *mdev = dev_to_mISDN(dev); + const struct mISDNdevice *mdev = dev_to_mISDN(dev); if (!mdev) return 0; diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 675d877a67b2..6ef18bab9648 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -1008,9 +1008,9 @@ void dvb_module_release(struct i2c_client *client) EXPORT_SYMBOL_GPL(dvb_module_release); #endif -static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env) +static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct dvb_device *dvbdev = dev_get_drvdata(dev); + const struct dvb_device *dvbdev = dev_get_drvdata(dev); add_uevent_var(env, "DVB_ADAPTER_NUM=%d", dvbdev->adapter->num); add_uevent_var(env, "DVB_DEVICE_TYPE=%s", dnames[dvbdev->type]); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..b4778b970dd4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4580,9 +4580,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); -static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env) +static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct nvme_ctrl *ctrl = + const struct nvme_ctrl *ctrl = container_of(dev, struct nvme_ctrl, ctrl_device); struct nvmf_ctrl_options *opts = ctrl->opts; int ret; diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index f70197154a36..e3224e49c43f 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -810,10 +810,10 @@ int pcmcia_reset_card(struct pcmcia_socket *skt) EXPORT_SYMBOL(pcmcia_reset_card); -static int pcmcia_socket_uevent(struct device *dev, +static int pcmcia_socket_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev); + const struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev); if (add_uevent_var(env, "SOCKET_NO=%u", s->sock)) return -ENOMEM; diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h index c310d4f36c10..645eee4d6b6a 100644 --- a/drivers/power/supply/power_supply.h +++ b/drivers/power/supply/power_supply.h @@ -16,7 +16,7 @@ struct power_supply; #ifdef CONFIG_SYSFS extern void power_supply_init_attrs(struct device_type *dev_type); -extern int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env); +extern int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env); #else diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 5369abaceb5c..6ca7d3985a40 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -427,7 +427,7 @@ void power_supply_init_attrs(struct device_type *dev_type) } } -static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env, +static int add_prop_uevent(const struct device *dev, struct kobj_uevent_env *env, enum power_supply_property prop, char *prop_buf) { int ret = 0; @@ -438,7 +438,7 @@ static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env, pwr_attr = &power_supply_attrs[prop]; dev_attr = &pwr_attr->dev_attr; - ret = power_supply_show_property(dev, dev_attr, prop_buf); + ret = power_supply_show_property((struct device *)dev, dev_attr, prop_buf); if (ret == -ENODEV || ret == -ENODATA) { /* * When a battery is absent, we expect -ENODEV. Don't abort; @@ -458,9 +458,9 @@ static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env, pwr_attr->prop_name, prop_buf); } -int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) +int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct power_supply *psy = dev_get_drvdata(dev); + const struct power_supply *psy = dev_get_drvdata(dev); int ret = 0, j; char *prop_buf; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index c63c0c2cf649..b5994a0604f6 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1723,9 +1723,9 @@ static const struct attribute_group *usb_udc_attr_groups[] = { NULL, }; -static int usb_udc_uevent(struct device *dev, struct kobj_uevent_env *env) +static int usb_udc_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct usb_udc *udc = container_of(dev, struct usb_udc, dev); + const struct usb_udc *udc = container_of(dev, struct usb_udc, dev); int ret; ret = add_uevent_var(env, "USB_UDC_NAME=%s", udc->gadget->name); diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 20103e0b03c3..94b1107258e5 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -59,7 +59,7 @@ struct class { const struct attribute_group **dev_groups; struct kobject *dev_kobj; - int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(struct device *dev, umode_t *mode); void (*class_release)(struct class *class); diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 7dd1f01ec4f9..7aab4a769736 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -586,7 +586,7 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); -static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) +static inline struct mISDNdevice *dev_to_mISDN(const struct device *dev) { if (dev) return dev_get_drvdata(dev); diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 0fdbdfd19474..466353b3dde4 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -108,9 +108,9 @@ static struct device_attribute *atm_attrs[] = { }; -static int atm_uevent(struct device *cdev, struct kobj_uevent_env *env) +static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env) { - struct atm_dev *adev; + const struct atm_dev *adev; if (!cdev) return -ENODEV; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9cfc80b8ed25..03a61d1dffbd 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1873,9 +1873,9 @@ const struct kobj_ns_type_operations net_ns_type_operations = { }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) +static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env) { - struct net_device *dev = to_net_dev(d); + const struct net_device *dev = to_net_dev(d); int retval; /* pass interface to uevent. */ diff --git a/net/rfkill/core.c b/net/rfkill/core.c index dac4fdc7488a..b390ff245d5e 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -832,7 +832,7 @@ static void rfkill_release(struct device *dev) kfree(rfkill); } -static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +static int rfkill_dev_uevent(const struct device *dev, struct kobj_uevent_env *env) { struct rfkill *rfkill = to_rfkill(dev); unsigned long flags; From ff62b8e6588fb07bedda7423622c140c4edd66a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Nov 2022 13:25:20 +0100 Subject: [PATCH 2680/4122] driver core: make struct class.devnode() take a const * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devnode() in struct class should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Fenghua Yu Cc: Reinette Chatre Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: FUJITA Tomonori Cc: Jens Axboe Cc: Justin Sanders Cc: Arnd Bergmann Cc: Sumit Semwal Cc: Benjamin Gaignard Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: John Stultz Cc: "Christian König" Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Dennis Dalessandro Cc: Dmitry Torokhov Cc: Mauro Carvalho Chehab Cc: Sean Young Cc: Frank Haverkamp Cc: Jiri Slaby Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Alex Williamson Cc: Cornelia Huck Cc: Kees Cook Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Hans Verkuil Cc: Christophe JAILLET Cc: Xie Yongji Cc: Gautam Dawar Cc: Dan Carpenter Cc: Eli Cohen Cc: Parav Pandit Cc: Maxime Coquelin Cc: alsa-devel@alsa-project.org Cc: dri-devel@lists.freedesktop.org Cc: kvm@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Cc: linux-block@vger.kernel.org Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Link: https://lore.kernel.org/r/20221123122523.1332370-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 ++-- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- block/bsg.c | 2 +- drivers/block/aoe/aoechr.c | 2 +- drivers/char/mem.c | 2 +- drivers/char/misc.c | 4 ++-- drivers/dma-buf/dma-heap.c | 2 +- drivers/gpu/drm/drm_sysfs.c | 2 +- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_main.c | 2 +- drivers/infiniband/hw/hfi1/device.c | 4 ++-- drivers/input/input.c | 2 +- drivers/media/dvb-core/dvbdev.c | 4 ++-- drivers/media/pci/ddbridge/ddbridge-core.c | 4 ++-- drivers/media/rc/rc-main.c | 2 +- drivers/misc/genwqe/card_base.c | 2 +- drivers/tty/tty_io.c | 2 +- drivers/usb/core/file.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- drivers/vfio/vfio_main.c | 2 +- fs/pstore/pmsg.c | 2 +- include/linux/device/class.h | 2 +- sound/sound_core.c | 2 +- 24 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index d961ae3ed96e..4e4231a58f38 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1560,9 +1560,9 @@ static const struct file_operations pseudo_lock_dev_fops = { .mmap = pseudo_lock_dev_mmap, }; -static char *pseudo_lock_devnode(struct device *dev, umode_t *mode) +static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) { - struct rdtgroup *rdtgrp; + const struct rdtgroup *rdtgrp; rdtgrp = dev_get_drvdata(dev); if (mode) diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6f7b8cc1bc9f..621ba9c0f17a 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -139,7 +139,7 @@ static int cpuid_device_destroy(unsigned int cpu) return 0; } -static char *cpuid_devnode(struct device *dev, umode_t *mode) +static char *cpuid_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index ed8ac6bcbafb..708751311786 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -250,7 +250,7 @@ static int msr_device_destroy(unsigned int cpu) return 0; } -static char *msr_devnode(struct device *dev, umode_t *mode) +static char *msr_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); } diff --git a/block/bsg.c b/block/bsg.c index 2ab1351eb082..08046bd9207d 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -232,7 +232,7 @@ out_put_device: } EXPORT_SYMBOL_GPL(bsg_register_queue); -static char *bsg_devnode(struct device *dev, umode_t *mode) +static char *bsg_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev)); } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 8eea2529da20..7a368c90467d 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -273,7 +273,7 @@ static const struct file_operations aoe_fops = { .llseek = noop_llseek, }; -static char *aoe_devnode(struct device *dev, umode_t *mode) +static char *aoe_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev)); } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5611d127363e..83bf2a4dcb57 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -746,7 +746,7 @@ static const struct file_operations memory_fops = { .llseek = noop_llseek, }; -static char *mem_devnode(struct device *dev, umode_t *mode) +static char *mem_devnode(const struct device *dev, umode_t *mode) { if (mode && devlist[MINOR(dev->devt)].mode) *mode = devlist[MINOR(dev->devt)].mode; diff --git a/drivers/char/misc.c b/drivers/char/misc.c index cba19bfdc44d..88c6995b9a3d 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -254,9 +254,9 @@ void misc_deregister(struct miscdevice *misc) } EXPORT_SYMBOL(misc_deregister); -static char *misc_devnode(struct device *dev, umode_t *mode) +static char *misc_devnode(const struct device *dev, umode_t *mode) { - struct miscdevice *c = dev_get_drvdata(dev); + const struct miscdevice *c = dev_get_drvdata(dev); if (mode && c->mode) *mode = c->mode; diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 8f5848aa144f..4d7150791315 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -299,7 +299,7 @@ err0: return err_ret; } -static char *dma_heap_devnode(struct device *dev, umode_t *mode) +static char *dma_heap_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 430e00b16eec..14bf156b3f1b 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -90,7 +90,7 @@ static void drm_sysfs_acpi_register(void) { } static void drm_sysfs_acpi_unregister(void) { } #endif -static char *drm_devnode(struct device *dev, umode_t *mode) +static char *drm_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev)); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 98cb594cd9a6..f83954180a33 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1224,7 +1224,7 @@ static struct attribute *umad_class_dev_attrs[] = { }; ATTRIBUTE_GROUPS(umad_class_dev); -static char *umad_devnode(struct device *dev, umode_t *mode) +static char *umad_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index d54434088727..bdb179a09d77 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1237,7 +1237,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) put_device(&uverbs_dev->dev); } -static char *uverbs_devnode(struct device *dev, umode_t *mode) +static char *uverbs_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 8ceff7141baf..1f4496032170 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -72,7 +72,7 @@ const char *class_name(void) return hfi1_class_name; } -static char *hfi1_devnode(struct device *dev, umode_t *mode) +static char *hfi1_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0600; @@ -85,7 +85,7 @@ static const char *class_name_user(void) return hfi1_class_name_user; } -static char *hfi1_user_devnode(struct device *dev, umode_t *mode) +static char *hfi1_user_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/input/input.c b/drivers/input/input.c index ebb2b7f0f8ff..50597165dc54 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1913,7 +1913,7 @@ static const struct device_type input_dev_type = { #endif }; -static char *input_devnode(struct device *dev, umode_t *mode) +static char *input_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev)); } diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 6ef18bab9648..e73f5240cc2c 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -1018,9 +1018,9 @@ static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env) return 0; } -static char *dvb_devnode(struct device *dev, umode_t *mode) +static char *dvb_devnode(const struct device *dev, umode_t *mode) { - struct dvb_device *dvbdev = dev_get_drvdata(dev); + const struct dvb_device *dvbdev = dev_get_drvdata(dev); return kasprintf(GFP_KERNEL, "dvb/adapter%d/%s%d", dvbdev->adapter->num, dnames[dvbdev->type], dvbdev->id); diff --git a/drivers/media/pci/ddbridge/ddbridge-core.c b/drivers/media/pci/ddbridge/ddbridge-core.c index fe833f39698a..ee8087f29b2c 100644 --- a/drivers/media/pci/ddbridge/ddbridge-core.c +++ b/drivers/media/pci/ddbridge/ddbridge-core.c @@ -2716,9 +2716,9 @@ static const struct file_operations ddb_fops = { .release = ddb_release, }; -static char *ddb_devnode(struct device *device, umode_t *mode) +static char *ddb_devnode(const struct device *device, umode_t *mode) { - struct ddb *dev = dev_get_drvdata(device); + const struct ddb *dev = dev_get_drvdata(device); return kasprintf(GFP_KERNEL, "ddbridge/card%d", dev->nr); } diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index eba0cd30e314..527d9324742b 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1017,7 +1017,7 @@ static void ir_close(struct input_dev *idev) } /* class for /sys/class/rc */ -static char *rc_devnode(struct device *dev, umode_t *mode) +static char *rc_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "rc/%s", dev_name(dev)); } diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 693981891870..0f00687f72d4 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -1349,7 +1349,7 @@ static struct pci_driver genwqe_driver = { * Default mode should be rw for everybody. Do not change default * device name. */ -static char *genwqe_devnode(struct device *dev, umode_t *mode) +static char *genwqe_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index de06c3c2ff70..aad8171f6c21 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3494,7 +3494,7 @@ void tty_default_fops(struct file_operations *fops) *fops = tty_fops; } -static char *tty_devnode(struct device *dev, umode_t *mode) +static char *tty_devnode(const struct device *dev, umode_t *mode) { if (!mode) return NULL; diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index 558890ada0e5..da7d88e069e6 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -62,7 +62,7 @@ static struct usb_class { struct class *class; } *usb_class; -static char *usb_devnode(struct device *dev, umode_t *mode) +static char *usb_devnode(const struct device *dev, umode_t *mode) { struct usb_class_driver *drv; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 35dceee3ed56..0dd3c1f291da 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1656,7 +1656,7 @@ static const struct file_operations vduse_ctrl_fops = { .llseek = noop_llseek, }; -static char *vduse_devnode(struct device *dev, umode_t *mode) +static char *vduse_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 6e8804fe0095..5bf4b3454918 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1812,7 +1812,7 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* * Module/class support */ -static char *vfio_devnode(struct device *dev, umode_t *mode) +static char *vfio_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); } diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index d8542ec2f38c..b31c9c72d90b 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -46,7 +46,7 @@ static int pmsg_major; #undef pr_fmt #define pr_fmt(fmt) PMSG_NAME ": " fmt -static char *pmsg_devnode(struct device *dev, umode_t *mode) +static char *pmsg_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0220; diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 94b1107258e5..42cc3fb44a84 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -60,7 +60,7 @@ struct class { struct kobject *dev_kobj; int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, umode_t *mode); + char *(*devnode)(const struct device *dev, umode_t *mode); void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); diff --git a/sound/sound_core.c b/sound/sound_core.c index 3332fe321737..3e7dd6fcb7cf 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -30,7 +30,7 @@ MODULE_DESCRIPTION("Core sound module"); MODULE_AUTHOR("Alan Cox"); MODULE_LICENSE("GPL"); -static char *sound_devnode(struct device *dev, umode_t *mode) +static char *sound_devnode(const struct device *dev, umode_t *mode) { if (MAJOR(dev->devt) == SOUND_MAJOR) return NULL; From a173ee25a758927adc12664d1ec162a18324a4bd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:00 +0100 Subject: [PATCH 2681/4122] phy: qcom-qmp-combo: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c7a926d548d8..d6a031bcfc30 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2651,14 +2651,6 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sc7180-qmp-usb3-dp-phy", .data = &sc7180_usb3dpphy_cfg, }, - { - .compatible = "qcom,sdm845-qmp-usb3-dp-phy", - .data = &sdm845_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sm8250-qmp-usb3-dp-phy", - .data = &sm8250_usb3dpphy_cfg, - }, { .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", .data = &sc8180x_usb3dpphy_cfg, @@ -2667,6 +2659,14 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", .data = &sc8280xp_usb43dpphy_combo_cfg, }, + { + .compatible = "qcom,sdm845-qmp-usb3-dp-phy", + .data = &sdm845_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sm8250-qmp-usb3-dp-phy", + .data = &sm8250_usb3dpphy_cfg, + }, { } }; MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); From 5c5f9fbc15aa58c1bac22724f429e6e399a2f2b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:01 +0100 Subject: [PATCH 2682/4122] phy: qcom-qmp-combo: move device-id table Move the device-id table after probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index d6a031bcfc30..e7c8c4417142 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2646,31 +2646,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_combo_of_match_table[] = { - { - .compatible = "qcom,sc7180-qmp-usb3-dp-phy", - .data = &sc7180_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", - .data = &sc8180x_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", - .data = &sc8280xp_usb43dpphy_combo_cfg, - }, - { - .compatible = "qcom,sdm845-qmp-usb3-dp-phy", - .data = &sdm845_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sm8250-qmp-usb3-dp-phy", - .data = &sm8250_usb3dpphy_cfg, - }, - { } -}; -MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); - static const struct dev_pm_ops qmp_combo_pm_ops = { SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, qmp_combo_runtime_resume, NULL) @@ -2815,6 +2790,31 @@ err_node_put: return ret; } +static const struct of_device_id qmp_combo_of_match_table[] = { + { + .compatible = "qcom,sc7180-qmp-usb3-dp-phy", + .data = &sc7180_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", + .data = &sc8180x_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", + .data = &sc8280xp_usb43dpphy_combo_cfg, + }, + { + .compatible = "qcom,sdm845-qmp-usb3-dp-phy", + .data = &sdm845_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sm8250-qmp-usb3-dp-phy", + .data = &sm8250_usb3dpphy_cfg, + }, + { } +}; +MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); + static struct platform_driver qmp_combo_driver = { .probe = qmp_combo_probe, .driver = { From 987a505fa7d79691013ec4bd325ecc5664781c81 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:02 +0100 Subject: [PATCH 2683/4122] phy: qcom-qmp-combo: move pm ops Move the PM ops structure next to the implementation to keep the driver callbacks grouped. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index e7c8c4417142..2ac29b71d3b7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2216,6 +2216,11 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) return 0; } +static const struct dev_pm_ops qmp_combo_pm_ops = { + SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, + qmp_combo_runtime_resume, NULL) +}; + static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2646,11 +2651,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct dev_pm_ops qmp_combo_pm_ops = { - SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, - qmp_combo_runtime_resume, NULL) -}; - static int qmp_combo_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; From d6c81688f9cd1d198475383c963cd3d9576d29c2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:03 +0100 Subject: [PATCH 2684/4122] phy: qcom-qmp-combo: rename PHY ops structures Rename the PHY operation structures so that they have a "phy_ops" suffix and move them next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2ac29b71d3b7..13800c2243e1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2118,6 +2118,24 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_combo_usb_phy_ops = { + .init = qmp_combo_enable, + .exit = qmp_combo_disable, + .set_mode = qmp_combo_set_mode, + .owner = THIS_MODULE, +}; + +static const struct phy_ops qmp_combo_dp_phy_ops = { + .init = qmp_combo_init, + .configure = qcom_qmp_dp_phy_configure, + .power_on = qmp_combo_power_on, + .calibrate = qcom_qmp_dp_phy_calibrate, + .power_off = qmp_combo_power_off, + .exit = qmp_combo_exit, + .set_mode = qmp_combo_set_mode, + .owner = THIS_MODULE, +}; + static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -2542,24 +2560,6 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_combo_usb_ops = { - .init = qmp_combo_enable, - .exit = qmp_combo_disable, - .set_mode = qmp_combo_set_mode, - .owner = THIS_MODULE, -}; - -static const struct phy_ops qmp_combo_dp_ops = { - .init = qmp_combo_init, - .configure = qcom_qmp_dp_phy_configure, - .power_on = qmp_combo_power_on, - .calibrate = qcom_qmp_dp_phy_calibrate, - .power_off = qmp_combo_power_off, - .exit = qmp_combo_exit, - .set_mode = qmp_combo_set_mode, - .owner = THIS_MODULE, -}; - static int qmp_combo_create(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { @@ -2632,9 +2632,9 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, } if (cfg->type == PHY_TYPE_DP) - ops = &qmp_combo_dp_ops; + ops = &qmp_combo_dp_phy_ops; else - ops = &qmp_combo_usb_ops; + ops = &qmp_combo_usb_phy_ops; generic_phy = devm_phy_create(dev, np, ops); if (IS_ERR(generic_phy)) { From 73d262f8e7ff095965bac0c4bf538f601257f53b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:04 +0100 Subject: [PATCH 2685/4122] phy: qcom-qmp-combo: drop unused DP PHY mode op The set-mode operation is currently only used by the USB part of the PHY so drop the corresponding callback from the DP PHY ops. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 13800c2243e1..cec487560fd7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2132,7 +2132,6 @@ static const struct phy_ops qmp_combo_dp_phy_ops = { .calibrate = qcom_qmp_dp_phy_calibrate, .power_off = qmp_combo_power_off, .exit = qmp_combo_exit, - .set_mode = qmp_combo_set_mode, .owner = THIS_MODULE, }; From ae1cdc709762129c33ab64f400c38c9a177189f1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:05 +0100 Subject: [PATCH 2686/4122] phy: qcom-qmp-combo: rename USB PHY ops Add a "usb" infix to the USB PHY operation functions and name them after the corresponding operations (e.g. "init" rather than "enable"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cec487560fd7..bf0874d22d91 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2084,7 +2084,7 @@ static int qmp_combo_exit(struct phy *phy) return 0; } -static int qmp_combo_enable(struct phy *phy) +static int qmp_combo_usb_init(struct phy *phy) { int ret; @@ -2099,7 +2099,7 @@ static int qmp_combo_enable(struct phy *phy) return ret; } -static int qmp_combo_disable(struct phy *phy) +static int qmp_combo_usb_exit(struct phy *phy) { int ret; @@ -2109,7 +2109,7 @@ static int qmp_combo_disable(struct phy *phy) return qmp_combo_exit(phy); } -static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) +static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct qmp_phy *qphy = phy_get_drvdata(phy); @@ -2119,9 +2119,9 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) } static const struct phy_ops qmp_combo_usb_phy_ops = { - .init = qmp_combo_enable, - .exit = qmp_combo_disable, - .set_mode = qmp_combo_set_mode, + .init = qmp_combo_usb_init, + .exit = qmp_combo_usb_exit, + .set_mode = qmp_combo_usb_set_mode, .owner = THIS_MODULE, }; From 0537692bbec18a173e1bda87f2bd024b3684b47d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:06 +0100 Subject: [PATCH 2687/4122] phy: qcom-qmp-combo: drop unnecessary debug message Drop the unnecessary (verbose) debug message from the init PHY op. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index bf0874d22d91..ae04f6219fd2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1967,10 +1967,8 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) static int qmp_combo_init(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; int ret; - dev_vdbg(qmp->dev, "Initializing QMP phy\n"); ret = qmp_combo_com_init(qphy); if (ret) From 8c75d9eab1dd402bde2fb1337db2130a409c2743 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:07 +0100 Subject: [PATCH 2688/4122] phy: qcom-qmp-combo: separate USB and DP init ops Separate the USB and DP init and exit operations by calling the common initialisation code directly from the USB operation and adding a "dp" infix to the DP callbacks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 36 ++++++++++++----------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index ae04f6219fd2..5d985195df38 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1964,7 +1964,7 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) return 0; } -static int qmp_combo_init(struct phy *phy) +static int qmp_combo_dp_init(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -1974,8 +1974,16 @@ static int qmp_combo_init(struct phy *phy) if (ret) return ret; - if (cfg->type == PHY_TYPE_DP) - cfg->dp_aux_init(qphy); + cfg->dp_aux_init(qphy); + + return 0; +} + +static int qmp_combo_dp_exit(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + qmp_combo_com_exit(qphy); return 0; } @@ -2073,38 +2081,32 @@ static int qmp_combo_power_off(struct phy *phy) return 0; } -static int qmp_combo_exit(struct phy *phy) -{ - struct qmp_phy *qphy = phy_get_drvdata(phy); - - qmp_combo_com_exit(qphy); - - return 0; -} - static int qmp_combo_usb_init(struct phy *phy) { + struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; - ret = qmp_combo_init(phy); + ret = qmp_combo_com_init(qphy); if (ret) return ret; ret = qmp_combo_power_on(phy); if (ret) - qmp_combo_exit(phy); + qmp_combo_com_exit(qphy); return ret; } static int qmp_combo_usb_exit(struct phy *phy) { + struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; ret = qmp_combo_power_off(phy); if (ret) return ret; - return qmp_combo_exit(phy); + + return qmp_combo_com_exit(qphy); } static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) @@ -2124,12 +2126,12 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { }; static const struct phy_ops qmp_combo_dp_phy_ops = { - .init = qmp_combo_init, + .init = qmp_combo_dp_init, .configure = qcom_qmp_dp_phy_configure, .power_on = qmp_combo_power_on, .calibrate = qcom_qmp_dp_phy_calibrate, .power_off = qmp_combo_power_off, - .exit = qmp_combo_exit, + .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, }; From 186266f65e68c16ba6714c9b7f561ddcd4998cae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:08 +0100 Subject: [PATCH 2689/4122] phy: qcom-qmp-combo: rename DP PHY ops Rename the configure and calibrate DP PHY ops using the common prefix for consistency. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5d985195df38..7392ae460fd7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1842,7 +1842,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) return 0; } -static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts *opts) +static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts) { const struct phy_configure_opts_dp *dp_opts = &opts->dp; struct qmp_phy *qphy = phy_get_drvdata(phy); @@ -1857,7 +1857,7 @@ static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts * return 0; } -static int qcom_qmp_dp_phy_calibrate(struct phy *phy) +static int qmp_combo_dp_calibrate(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -2127,9 +2127,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { static const struct phy_ops qmp_combo_dp_phy_ops = { .init = qmp_combo_dp_init, - .configure = qcom_qmp_dp_phy_configure, + .configure = qmp_combo_dp_configure, .power_on = qmp_combo_power_on, - .calibrate = qcom_qmp_dp_phy_calibrate, + .calibrate = qmp_combo_dp_calibrate, .power_off = qmp_combo_power_off, .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, From 3ade3ede57a0093da3b432ecceda36386d13a5e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:09 +0100 Subject: [PATCH 2690/4122] phy: qcom-qmp-combo: separate USB and DP power-on ops Separate the USB and DP power-on and power-off operations in two dedicated implementations. Note that the pipe clock is only used by the USB part of the PHY and that no DP configuration has a pcs (or rx) table or has has_pwrdn_delay set. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 100 +++++++++++++--------- 1 file changed, 59 insertions(+), 41 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7392ae460fd7..748fd32a6f72 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1988,7 +1988,39 @@ static int qmp_combo_dp_exit(struct phy *phy) return 0; } -static int qmp_combo_power_on(struct phy *phy) +static int qmp_combo_dp_power_on(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *tx = qphy->tx; + + qmp_combo_serdes_init(qphy); + + qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + + /* Configure special DP tx tunings */ + cfg->configure_dp_tx(qphy); + + /* Configure link rate, swing, etc. */ + cfg->configure_dp_phy(qphy); + + return 0; +} + +static int qmp_combo_dp_power_off(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + /* Assert DP PHY power down */ + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + + return 0; +} + +static int qmp_combo_usb_power_on(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; @@ -2014,39 +2046,30 @@ static int qmp_combo_power_on(struct phy *phy) if (cfg->lanes >= 2) qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - /* Configure special DP tx tunings */ - if (cfg->type == PHY_TYPE_DP) - cfg->configure_dp_tx(qphy); - qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - /* Configure link rate, swing, etc. */ - if (cfg->type == PHY_TYPE_DP) - cfg->configure_dp_phy(qphy); - else - qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(10, 20); - if (cfg->type != PHY_TYPE_DP) { - /* Pull PHY out of reset state */ - qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], - SERDES_START | PCS_START); + /* Pull PHY out of reset state */ + qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, - PHY_INIT_COMPLETE_TIMEOUT); - if (ret) { - dev_err(qmp->dev, "phy initialization timed-out\n"); - goto err_disable_pipe_clk; - } + /* start SerDes and Phy-Coding-Sublayer */ + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); + + status = pcs + cfg->regs[QPHY_PCS_STATUS]; + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, + PHY_INIT_COMPLETE_TIMEOUT); + if (ret) { + dev_err(qmp->dev, "phy initialization timed-out\n"); + goto err_disable_pipe_clk; } + return 0; err_disable_pipe_clk: @@ -2055,28 +2078,23 @@ err_disable_pipe_clk: return ret; } -static int qmp_combo_power_off(struct phy *phy) +static int qmp_combo_usb_power_off(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; clk_disable_unprepare(qphy->pipe_clk); - if (cfg->type == PHY_TYPE_DP) { - /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); - } else { - /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + /* PHY reset */ + qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], - SERDES_START | PCS_START); + /* stop SerDes and Phy-Coding-Sublayer */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); - /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - SW_PWRDN); - } + /* Put PHY into POWER DOWN state: active low */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + SW_PWRDN); return 0; } @@ -2090,7 +2108,7 @@ static int qmp_combo_usb_init(struct phy *phy) if (ret) return ret; - ret = qmp_combo_power_on(phy); + ret = qmp_combo_usb_power_on(phy); if (ret) qmp_combo_com_exit(qphy); @@ -2102,7 +2120,7 @@ static int qmp_combo_usb_exit(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; - ret = qmp_combo_power_off(phy); + ret = qmp_combo_usb_power_off(phy); if (ret) return ret; @@ -2128,9 +2146,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { static const struct phy_ops qmp_combo_dp_phy_ops = { .init = qmp_combo_dp_init, .configure = qmp_combo_dp_configure, - .power_on = qmp_combo_power_on, + .power_on = qmp_combo_dp_power_on, .calibrate = qmp_combo_dp_calibrate, - .power_off = qmp_combo_power_off, + .power_off = qmp_combo_dp_power_off, .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, }; From dae95d7f667d20ab81976f846ed0bf2c71dbfe18 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:10 +0100 Subject: [PATCH 2691/4122] phy: qcom-qmp-combo: clean up serdes initialisation Clean up serdes initialisation somewhat by making the current helper a dedicated helper for the DP part of the PHY. Note that no error is currently returned for non-supported link rates. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 49 +++++++++++------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 748fd32a6f72..c059e4aeecdb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1313,7 +1313,7 @@ static void qmp_combo_configure(void __iomem *base, qmp_combo_configure_lane(base, tbl, num, 0xff); } -static int qmp_combo_serdes_init(struct qmp_phy *qphy) +static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; @@ -1323,28 +1323,26 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy) qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); - if (cfg->type == PHY_TYPE_DP) { - switch (dp_opts->link_rate) { - case 1620: - qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, - cfg->serdes_tbl_rbr_num); - break; - case 2700: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, - cfg->serdes_tbl_hbr_num); - break; - case 5400: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, - cfg->serdes_tbl_hbr2_num); - break; - case 8100: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, - cfg->serdes_tbl_hbr3_num); - break; - default: - /* Other link rates aren't supported */ - return -EINVAL; - } + switch (dp_opts->link_rate) { + case 1620: + qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, + cfg->serdes_tbl_rbr_num); + break; + case 2700: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, + cfg->serdes_tbl_hbr_num); + break; + case 5400: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, + cfg->serdes_tbl_hbr2_num); + break; + case 8100: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, + cfg->serdes_tbl_hbr3_num); + break; + default: + /* Other link rates aren't supported */ + return -EINVAL; } return 0; @@ -1994,7 +1992,7 @@ static int qmp_combo_dp_power_on(struct phy *phy) const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *tx = qphy->tx; - qmp_combo_serdes_init(qphy); + qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); @@ -2025,6 +2023,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *serdes = qphy->serdes; void __iomem *tx = qphy->tx; void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; @@ -2032,7 +2031,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) unsigned int val; int ret; - qmp_combo_serdes_init(qphy); + qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { From c7fbe5bd14145425d38a3b1e4d59f1b3acff3eba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:11 +0100 Subject: [PATCH 2692/4122] phy: qcom-qmp-combo: separate USB and DP devicetree parsing Separate the devicetree parsing of the USB and DP child nodes in two dedicated helpers in preparation for merging the driver data. Note that only the USB part of the PHY has a pipe clock and that the DP implementation only uses the tx/tx2 and pcs register regions. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 ++++++++++++++++------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c059e4aeecdb..9c4528dff316 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2576,13 +2576,12 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); struct phy *generic_phy; struct qmp_phy *qphy; - const struct phy_ops *ops; int ret; qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); @@ -2592,7 +2591,57 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each PHY: + * Get memory resources from the DP child node: + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. + * For dual lane PHYs: tx2 -> 3, rx2 -> 4 + * + * Note that only tx/tx2 and pcs are used by the DP implementation. + */ + qphy->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qphy->tx)) + return PTR_ERR(qphy->tx); + + qphy->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qphy->pcs)) + return PTR_ERR(qphy->pcs); + + if (cfg->lanes >= 2) { + qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->tx2)) + return PTR_ERR(qphy->tx2); + } + + generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); + if (IS_ERR(generic_phy)) { + ret = PTR_ERR(generic_phy); + dev_err(dev, "failed to create DP PHY: %d\n", ret); + return ret; + } + + qphy->phy = generic_phy; + qphy->qmp = qmp; + qmp->phys[id] = qphy; + phy_set_drvdata(generic_phy, qphy); + + return 0; +} + +static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id, + void __iomem *serdes, const struct qmp_phy_cfg *cfg) +{ + struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct phy *generic_phy; + struct qmp_phy *qphy; + int ret; + + qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); + if (!qphy) + return -ENOMEM; + + qphy->cfg = cfg; + qphy->serdes = serdes; + /* + * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. @@ -2631,31 +2680,16 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->pcs_misc = NULL; } - /* - * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3 - * based phys, so they essentially have pipe clock. So, - * we return error in case phy is USB3 or PIPE type. - * Otherwise, we initialize pipe clock to NULL for - * all phys that don't need this. - */ qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); if (IS_ERR(qphy->pipe_clk)) { - if (cfg->type == PHY_TYPE_USB3) - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe_clk\n", - id); - qphy->pipe_clk = NULL; + return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), + "failed to get lane%d pipe_clk\n", id); } - if (cfg->type == PHY_TYPE_DP) - ops = &qmp_combo_dp_phy_ops; - else - ops = &qmp_combo_usb_phy_ops; - - generic_phy = devm_phy_create(dev, np, ops); + generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create USB PHY: %d\n", ret); return ret; } @@ -2752,7 +2786,7 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = dp_serdes; /* Create per-lane phy */ - ret = qmp_combo_create(dev, child, id, serdes, cfg); + ret = qmp_combo_create_dp(dev, child, id, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); @@ -2770,7 +2804,7 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = usb_serdes; /* Create per-lane phy */ - ret = qmp_combo_create(dev, child, id, serdes, cfg); + ret = qmp_combo_create_usb(dev, child, id, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); From 4197a2a22df7804b40335ab638eae211acd1a81b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:12 +0100 Subject: [PATCH 2693/4122] phy: qcom-qmp-combo: add dedicated DP iomem pointers In preparation for merging the driver data, add separate iomem pointers for the DP part of the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 242 +++++++++++----------- 1 file changed, 124 insertions(+), 118 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9c4528dff316..43193bfe6e11 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -895,6 +895,12 @@ struct qmp_phy { void __iomem *rx2; void __iomem *pcs_misc; void __iomem *pcs_usb; + + void __iomem *dp_serdes; + void __iomem *dp_tx; + void __iomem *dp_tx2; + void __iomem *dp_pcs; + struct clk *pipe_clk; struct qcom_qmp *qmp; enum phy_mode mode; @@ -1316,7 +1322,7 @@ static void qmp_combo_configure(void __iomem *base, static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + void __iomem *serdes = qphy->dp_serdes; const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -1352,43 +1358,43 @@ static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, - qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL | QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, - qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); qphy->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, @@ -1421,10 +1427,10 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN; pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN; - writel(voltage_swing_cfg, qphy->tx + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->tx + emp_post_reg); - writel(voltage_swing_cfg, qphy->tx2 + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->tx2 + emp_post_reg); + writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg); + writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg); + writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg); + writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg); return 0; } @@ -1446,10 +1452,10 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) drvr_en = 0x10; } - writel(drvr_en, qphy->tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); - writel(drvr_en, qphy->tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); } static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) @@ -1472,12 +1478,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qphy->pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qphy->pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE); return reverse; } @@ -1491,8 +1497,8 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) qmp_combo_configure_dp_mode(qphy); - writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1515,40 +1521,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->serdes + QSERDES_V3_COM_RESETSM_CNTRL); + writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->serdes + QSERDES_V3_COM_C_READY_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1568,7 +1574,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy) qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qphy->dp_aux_cfg]; - writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1577,37 +1583,37 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ - writel(0x17, qphy->serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); + writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); qphy->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy) { /* Program default values before writing proper values */ - writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL, QSERDES_V4_TX_TX_EMP_POST1_LVL); @@ -1620,15 +1626,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qphy->pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); qmp_combo_configure_dp_mode(qphy); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1651,49 +1657,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->serdes + QSERDES_V4_COM_RESETSM_CNTRL); + writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_C_READY_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1737,30 +1743,30 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->tx + QSERDES_V4_TX_TX_POL_INV); - writel(0x0a, qphy->tx2 + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); - writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); return 0; } @@ -1794,30 +1800,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->tx + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x0a, qphy->tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x27, qphy->tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); return 0; } @@ -1835,7 +1841,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qphy->dp_aux_cfg]; - writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1990,14 +1996,14 @@ static int qmp_combo_dp_power_on(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; + void __iomem *tx = qphy->dp_tx; qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); @@ -2013,7 +2019,7 @@ static int qmp_combo_dp_power_off(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); return 0; } @@ -2589,7 +2595,7 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i return -ENOMEM; qphy->cfg = cfg; - qphy->serdes = serdes; + qphy->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2597,18 +2603,18 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i * * Note that only tx/tx2 and pcs are used by the DP implementation. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qphy->dp_tx)) + return PTR_ERR(qphy->dp_tx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qphy->dp_pcs)) + return PTR_ERR(qphy->dp_pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->dp_tx2)) + return PTR_ERR(qphy->dp_tx2); } generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); From bc8615888f3e4dc8f3448b6b4f8dec04b8b5bce2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:13 +0100 Subject: [PATCH 2694/4122] phy: qcom-qmp-combo: clean up DP configurations In preparation for merging the USB and DP configurations, align the initialisations of the DP function pointers. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 43193bfe6e11..50c011d23a9e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1049,10 +1049,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { @@ -1106,10 +1106,10 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { @@ -1166,10 +1166,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { @@ -1222,10 +1222,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = { @@ -1282,10 +1282,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { From ad4db91d60636c2c28487c3f518eab5952511923 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:14 +0100 Subject: [PATCH 2695/4122] phy: qcom-qmp-combo: rename sc8280xp config In preparation for merging the USB and DP configurations, drop the "combo" infix from the SC8280XP combined configuration for consistency with the other platforms. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 50c011d23a9e..d3fd6bde4af5 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1228,7 +1228,7 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; -static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = { +static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { .usb_cfg = &sc8280xp_usb43dp_usb_cfg, .dp_cfg = &sc8280xp_usb43dp_dp_cfg, }; @@ -2857,7 +2857,7 @@ static const struct of_device_id qmp_combo_of_match_table[] = { }, { .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", - .data = &sc8280xp_usb43dpphy_combo_cfg, + .data = &sc8280xp_usb43dpphy_cfg, }, { .compatible = "qcom,sdm845-qmp-usb3-dp-phy", From 488f116de075f2fd0cb90205a76e2ca0756efaff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:15 +0100 Subject: [PATCH 2696/4122] phy: qcom-qmp-combo: add DP configuration tables In preparation for merging the USB and DP configurations, add dedicated pointers for the DP serdes and tx tables to the configurations. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 45 ++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index d3fd6bde4af5..2588cfa5e81e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -816,6 +816,11 @@ struct qmp_phy_cfg { const struct qmp_phy_init_tbl *pcs_usb_tbl; int pcs_usb_tbl_num; + const struct qmp_phy_init_tbl *dp_serdes_tbl; + int dp_serdes_tbl_num; + const struct qmp_phy_init_tbl *dp_tx_tbl; + int dp_tx_tbl_num; + /* Init sequence for DP PHY block link rates */ const struct qmp_phy_init_tbl *serdes_tbl_rbr; int serdes_tbl_rbr_num; @@ -1030,10 +1035,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v3_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), - .tx_tbl = qmp_v3_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .dp_tx_tbl = qmp_v3_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), @@ -1147,10 +1152,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v4_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), - .tx_tbl = qmp_v4_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), + .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1203,10 +1208,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v5_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), - .tx_tbl = qmp_v5_5nm_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl), + .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), + .dp_tx_tbl = qmp_v5_5nm_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1263,10 +1268,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v4_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), - .tx_tbl = qmp_v4_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), + .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1324,10 +1329,8 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->dp_serdes; const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; - const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; - int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); + qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num); switch (dp_opts->link_rate) { case 1620: @@ -2000,10 +2003,10 @@ static int qmp_combo_dp_power_on(struct phy *phy) qmp_combo_dp_serdes_init(qphy); - qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); From ba0af7b346db8149e33a2f6e1a7b8265cabbfacb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:16 +0100 Subject: [PATCH 2697/4122] phy: qcom-qmp-combo: drop lanes config parameter Since the QMP driver split there is really no need for the 'lanes' configuration parameter as all of these USB-C PHYs support dual-lane SuperSpeed USB and quad-lane (uni-directional) DP (even if the driver still only supports CC1 orientation using lanes 2 and 3). Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114110621.4639-18-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 62 ++++++++--------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2588cfa5e81e..a0abeb7c3bca 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -802,7 +802,6 @@ struct qmp_phy; struct qmp_phy_cfg { /* phy-type - PCIE/UFS/USB */ unsigned int type; - int lanes; /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; @@ -1010,7 +1009,6 @@ static const char * const sc7180_usb3phy_reset_l[] = { static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1033,7 +1031,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1067,7 +1064,6 @@ static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1090,7 +1086,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .serdes_tbl = qmp_v3_dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1124,7 +1119,6 @@ static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), @@ -1150,7 +1144,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1184,7 +1177,6 @@ static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), @@ -1206,7 +1198,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), @@ -1240,7 +1231,6 @@ static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), @@ -1266,7 +1256,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -2000,13 +1989,12 @@ static int qmp_combo_dp_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *tx = qphy->dp_tx; + void __iomem *tx2 = qphy->dp_tx2; qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); + qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); @@ -2035,6 +2023,8 @@ static int qmp_combo_usb_power_on(struct phy *phy) void __iomem *serdes = qphy->serdes; void __iomem *tx = qphy->tx; void __iomem *rx = qphy->rx; + void __iomem *tx2 = qphy->tx2; + void __iomem *rx2 = qphy->rx2; void __iomem *pcs = qphy->pcs; void __iomem *status; unsigned int val; @@ -2050,14 +2040,10 @@ static int qmp_combo_usb_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_combo_configure_lane(rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -2601,8 +2587,8 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i qphy->dp_serdes = serdes; /* * Get memory resources from the DP child node: - * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. - * For dual lane PHYs: tx2 -> 3, rx2 -> 4 + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; + * tx2 -> 3; rx2 -> 4 * * Note that only tx/tx2 and pcs are used by the DP implementation. */ @@ -2614,11 +2600,9 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i if (IS_ERR(qphy->dp_pcs)) return PTR_ERR(qphy->dp_pcs); - if (cfg->lanes >= 2) { - qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->dp_tx2)) - return PTR_ERR(qphy->dp_tx2); - } + qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->dp_tx2)) + return PTR_ERR(qphy->dp_tx2); generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); if (IS_ERR(generic_phy)) { @@ -2651,9 +2635,8 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int qphy->serdes = serdes; /* * Get memory resources from the USB child node: - * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. - * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 - * For single lane PHYs: pcs_misc (optional) -> 3. + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; + * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5 */ qphy->tx = devm_of_iomap(dev, np, 0, NULL); if (IS_ERR(qphy->tx)) @@ -2670,20 +2653,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int if (cfg->pcs_usb_offset) qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; - if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->tx2)) + return PTR_ERR(qphy->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); - - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); - } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); - } + qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qphy->rx2)) + return PTR_ERR(qphy->rx2); + qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); if (IS_ERR(qphy->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); qphy->pcs_misc = NULL; From 9e62877eefacecdcd0467cfeb6bcd20786465f9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:17 +0100 Subject: [PATCH 2698/4122] phy: qcom-qmp-combo: merge USB and DP configurations It does not really make any sense to keep separate configuration structures for the USB and DP parts of the same PHY so merge them. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-19-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 188 +++++++--------------- 1 file changed, 60 insertions(+), 128 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index a0abeb7c3bca..298477259ee6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -798,11 +798,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { struct qmp_phy; -/* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { - /* phy-type - PCIE/UFS/USB */ - unsigned int type; - /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -863,11 +859,6 @@ struct qmp_phy_cfg { }; -struct qmp_phy_combo_cfg { - const struct qmp_phy_cfg *usb_cfg; - const struct qmp_phy_cfg *dp_cfg; -}; - /** * struct qmp_phy - per-lane phy descriptor * @@ -1007,9 +998,7 @@ static const char * const sc7180_usb3phy_reset_l[] = { "phy", }; -static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), .tx_tbl = qmp_v3_usb3_tx_tbl, @@ -1018,19 +1007,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .rx_tbl_num = ARRAY_SIZE(qmp_v3_usb3_rx_tbl), .pcs_tbl = qmp_v3_usb3_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(qmp_v3_usb3_pcs_tbl), - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sc7180_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1055,16 +1031,19 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = sc7180_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, + + .has_pwrdn_delay = true, }; -static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { - .usb_cfg = &sc7180_usb3phy_cfg, - .dp_cfg = &sc7180_dpphy_cfg, -}; - -static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), .tx_tbl = qmp_v3_usb3_tx_tbl, @@ -1073,24 +1052,11 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .rx_tbl_num = ARRAY_SIZE(qmp_v3_usb3_rx_tbl), .pcs_tbl = qmp_v3_usb3_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(qmp_v3_usb3_pcs_tbl), - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sdm845_dpphy_cfg = { - .type = PHY_TYPE_DP, - - .serdes_tbl = qmp_v3_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), - .tx_tbl = qmp_v3_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .dp_tx_tbl = qmp_v3_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), @@ -1110,16 +1076,19 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, + + .has_pwrdn_delay = true, }; -static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { - .usb_cfg = &sdm845_usb3phy_cfg, - .dp_cfg = &sdm845_dpphy_cfg, -}; - -static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), .tx_tbl = sm8150_usb3_tx_tbl, @@ -1130,20 +1099,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1168,24 +1123,7 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { - .usb_cfg = &sm8150_usb3phy_cfg, - .dp_cfg = &sc8180x_dpphy_cfg, -}; - -static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { - .type = PHY_TYPE_USB3, - - .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), - .tx_tbl = sc8280xp_usb43dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl), - .rx_tbl = sc8280xp_usb43dp_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl), - .pcs_tbl = sc8280xp_usb43dp_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, @@ -1194,10 +1132,19 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, + + .has_pwrdn_delay = true, }; -static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { - .type = PHY_TYPE_DP, +static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { + .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), + .tx_tbl = sc8280xp_usb43dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl), + .rx_tbl = sc8280xp_usb43dp_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl), + .pcs_tbl = sc8280xp_usb43dp_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl), .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), @@ -1222,16 +1169,18 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + + .clk_list = qmp_v4_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x300, }; -static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { - .usb_cfg = &sc8280xp_usb43dp_usb_cfg, - .dp_cfg = &sc8280xp_usb43dp_dp_cfg, -}; - -static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), .tx_tbl = sm8250_usb3_tx_tbl, @@ -1242,20 +1191,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8250_usb3_pcs_tbl), .pcs_usb_tbl = sm8250_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8250_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_sm8250_usbphy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sm8250_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1280,11 +1215,17 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { - .usb_cfg = &sm8250_usb3phy_cfg, - .dp_cfg = &sm8250_dpphy_cfg, + .clk_list = qmp_v4_sm8250_usbphy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x300, + + .has_pwrdn_delay = true, }; static void qmp_combo_configure_lane(void __iomem *base, @@ -2697,10 +2638,7 @@ static int qmp_combo_probe(struct platform_device *pdev) void __iomem *serdes; void __iomem *usb_serdes; void __iomem *dp_serdes = NULL; - const struct qmp_phy_combo_cfg *combo_cfg = NULL; const struct qmp_phy_cfg *cfg = NULL; - const struct qmp_phy_cfg *usb_cfg = NULL; - const struct qmp_phy_cfg *dp_cfg = NULL; int num, id, expected_phys; int ret; @@ -2711,13 +2649,10 @@ static int qmp_combo_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - combo_cfg = of_device_get_match_data(dev); - if (!combo_cfg) + cfg = of_device_get_match_data(dev); + if (!cfg) return -EINVAL; - usb_cfg = combo_cfg->usb_cfg; - cfg = usb_cfg; /* Setup clks and regulators */ - usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); @@ -2730,7 +2665,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); - dp_cfg = combo_cfg->dp_cfg; expected_phys = 2; mutex_init(&qmp->phy_mutex); @@ -2769,7 +2703,6 @@ static int qmp_combo_probe(struct platform_device *pdev) id = 0; for_each_available_child_of_node(dev->of_node, child) { if (of_node_name_eq(child, "dp-phy")) { - cfg = dp_cfg; serdes = dp_serdes; /* Create per-lane phy */ @@ -2787,7 +2720,6 @@ static int qmp_combo_probe(struct platform_device *pdev) goto err_node_put; } } else if (of_node_name_eq(child, "usb3-phy")) { - cfg = usb_cfg; serdes = usb_serdes; /* Create per-lane phy */ From dd1153651b0383ee9597609bc449d1751eefdcae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:18 +0100 Subject: [PATCH 2699/4122] phy: qcom-qmp-combo: merge driver data The QMP combo driver manages a single PHY (even if it provides two interfaces for USB and DP, respectively) so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-20-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 688 ++++++++++------------ 1 file changed, 312 insertions(+), 376 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 298477259ee6..707dd68ba993 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -796,7 +796,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { { 0x3f, 0xff, 0xff, 0xff } }; -struct qmp_phy; +struct qmp_combo; struct qmp_phy_cfg { /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ @@ -833,10 +833,10 @@ struct qmp_phy_cfg { const u8 (*pre_emphasis_hbr3_hbr2)[4][4]; /* DP PHY callbacks */ - int (*configure_dp_phy)(struct qmp_phy *qphy); - void (*configure_dp_tx)(struct qmp_phy *qphy); - int (*calibrate_dp_phy)(struct qmp_phy *qphy); - void (*dp_aux_init)(struct qmp_phy *qphy); + int (*configure_dp_phy)(struct qmp_combo *qmp); + void (*configure_dp_tx)(struct qmp_combo *qmp); + int (*calibrate_dp_phy)(struct qmp_combo *qmp); + void (*dp_aux_init)(struct qmp_combo *qmp); /* clock ids to be requested */ const char * const *clk_list; @@ -859,29 +859,19 @@ struct qmp_phy_cfg { }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pcs_usb: iomapped memory space for lane's pcs_usb - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: current PHY mode - * @dp_aux_cfg: Display port aux config - * @dp_opts: Display port optional config - * @dp_clks: Display port clocks - */ -struct qmp_phy { - struct phy *phy; +struct qmp_phy_dp_clks { + struct qmp_combo *qmp; + struct clk_hw dp_link_hw; + struct clk_hw dp_pixel_hw; +}; + +struct qmp_combo { + struct device *dev; + const struct qmp_phy_cfg *cfg; + + void __iomem *dp_com; + void __iomem *serdes; void __iomem *tx; void __iomem *rx; @@ -897,59 +887,33 @@ struct qmp_phy { void __iomem *dp_pcs; struct clk *pipe_clk; - struct qcom_qmp *qmp; + struct clk_bulk_data *clks; + struct reset_control_bulk_data *resets; + struct regulator_bulk_data *vregs; + + struct mutex phy_mutex; + int init_count; + + struct phy *usb_phy; enum phy_mode mode; + + struct phy *dp_phy; unsigned int dp_aux_cfg; struct phy_configure_opts_dp dp_opts; struct qmp_phy_dp_clks *dp_clks; }; -struct qmp_phy_dp_clks { - struct qmp_phy *qphy; - struct clk_hw dp_link_hw; - struct clk_hw dp_pixel_hw; -}; +static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp); +static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp); +static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp); -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * @dp_com: iomapped memory space for phy's dp_com control block - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - * @phy_mutex: mutex lock for PHY common block initialization - * @init_count: phy common block initialization count - */ -struct qcom_qmp { - struct device *dev; - void __iomem *dp_com; +static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp); +static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp); +static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp); - struct clk_bulk_data *clks; - struct reset_control_bulk_data *resets; - struct regulator_bulk_data *vregs; - - struct qmp_phy **phys; - struct qmp_phy *usb_phy; - - struct mutex phy_mutex; - int init_count; -}; - -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy); -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy); -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy); -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy); - -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy); -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy); -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy); -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy); - -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy); +static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp); static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) { @@ -1254,11 +1218,11 @@ static void qmp_combo_configure(void __iomem *base, qmp_combo_configure_lane(base, tbl, num, 0xff); } -static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) +static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->dp_serdes; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->dp_serdes; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num); @@ -1287,54 +1251,54 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) return 0; } -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy) +static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, - qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL | QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, - qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); - qphy->dp_aux_cfg = 0; + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } -static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, +static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, unsigned int drv_lvl_reg, unsigned int emp_post_reg) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; - const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; + const struct qmp_phy_cfg *cfg = qmp->cfg; unsigned int v_level = 0, p_level = 0; u8 voltage_swing_cfg, pre_emphasis_cfg; int i; @@ -1360,20 +1324,20 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN; pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN; - writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg); - writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg); + writel(voltage_swing_cfg, qmp->dp_tx + drv_lvl_reg); + writel(pre_emphasis_cfg, qmp->dp_tx + emp_post_reg); + writel(voltage_swing_cfg, qmp->dp_tx2 + drv_lvl_reg); + writel(pre_emphasis_cfg, qmp->dp_tx2 + emp_post_reg); return 0; } -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) +static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias_en, drvr_en; - if (qmp_combo_configure_dp_swing(qphy, QSERDES_V3_TX_TX_DRV_LVL, + if (qmp_combo_configure_dp_swing(qmp, QSERDES_V3_TX_TX_DRV_LVL, QSERDES_V3_TX_TX_EMP_POST1_LVL) < 0) return; @@ -1385,13 +1349,13 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) drvr_en = 0x10; } - writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); - writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qmp->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qmp->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qmp->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qmp->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); } -static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) +static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) { u32 val; bool reverse = false; @@ -1411,27 +1375,27 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE); return reverse; } -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; - qmp_combo_configure_dp_mode(qphy); + qmp_combo_configure_dp_mode(qmp); - writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1454,40 +1418,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); + writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1498,76 +1462,76 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy) +static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d }; u8 val; - qphy->dp_aux_cfg++; - qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); - val = cfg1_settings[qphy->dp_aux_cfg]; + qmp->dp_aux_cfg++; + qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); + val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy) +static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ - writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); + writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); - qphy->dp_aux_cfg = 0; + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy) +static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) { /* Program default values before writing proper values */ - writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); - qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL, + qmp_combo_configure_dp_swing(qmp, QSERDES_V4_TX_TX_DRV_LVL, QSERDES_V4_TX_TX_EMP_POST1_LVL); } -static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); - qmp_combo_configure_dp_mode(qphy); + qmp_combo_configure_dp_mode(qmp); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1590,49 +1554,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); + writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1642,15 +1606,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) return 0; } -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; bool reverse = false; u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qphy); + ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1676,43 +1640,43 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qmp->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qmp->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV); - writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); - writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); return 0; } -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; bool reverse = false; u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qphy); + ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1733,30 +1697,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); return 0; } @@ -1765,16 +1729,16 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) +static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d }; u8 val; - qphy->dp_aux_cfg++; - qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); - val = cfg1_settings[qphy->dp_aux_cfg]; + qmp->dp_aux_cfg++; + qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); + val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1782,13 +1746,13 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts) { const struct phy_configure_opts_dp *dp_opts = &opts->dp; - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - memcpy(&qphy->dp_opts, dp_opts, sizeof(*dp_opts)); - if (qphy->dp_opts.set_voltages) { - cfg->configure_dp_tx(qphy); - qphy->dp_opts.set_voltages = 0; + memcpy(&qmp->dp_opts, dp_opts, sizeof(*dp_opts)); + if (qmp->dp_opts.set_voltages) { + cfg->configure_dp_tx(qmp); + qmp->dp_opts.set_voltages = 0; } return 0; @@ -1796,20 +1760,18 @@ static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opt static int qmp_combo_dp_calibrate(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; if (cfg->calibrate_dp_phy) - return cfg->calibrate_dp_phy(qphy); + return cfg->calibrate_dp_phy(qmp); return 0; } -static int qmp_combo_com_init(struct qmp_phy *qphy) +static int qmp_combo_com_init(struct qmp_combo *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - struct qmp_phy *usb_phy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = usb_phy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1861,7 +1823,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -1878,11 +1840,9 @@ err_unlock: return ret; } -static int qmp_combo_com_exit(struct qmp_phy *qphy) +static int qmp_combo_com_exit(struct qmp_combo *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - struct qmp_phy *usb_phy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = usb_phy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; mutex_lock(&qmp->phy_mutex); if (--qmp->init_count) { @@ -1903,77 +1863,76 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) static int qmp_combo_dp_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; - ret = qmp_combo_com_init(qphy); + ret = qmp_combo_com_init(qmp); if (ret) return ret; - cfg->dp_aux_init(qphy); + cfg->dp_aux_init(qmp); return 0; } static int qmp_combo_dp_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); - qmp_combo_com_exit(qphy); + qmp_combo_com_exit(qmp); return 0; } static int qmp_combo_dp_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->dp_tx; - void __iomem *tx2 = qphy->dp_tx2; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->dp_tx; + void __iomem *tx2 = qmp->dp_tx2; - qmp_combo_dp_serdes_init(qphy); + qmp_combo_dp_serdes_init(qmp); qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ - cfg->configure_dp_tx(qphy); + cfg->configure_dp_tx(qmp); /* Configure link rate, swing, etc. */ - cfg->configure_dp_phy(qphy); + cfg->configure_dp_phy(qmp); return 0; } static int qmp_combo_dp_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); return 0; } static int qmp_combo_usb_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *tx2 = qphy->tx2; - void __iomem *rx2 = qphy->rx2; - void __iomem *pcs = qphy->pcs; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *tx2 = qmp->tx2; + void __iomem *rx2 = qmp->rx2; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; @@ -2008,27 +1967,27 @@ static int qmp_combo_usb_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_combo_usb_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2036,37 +1995,37 @@ static int qmp_combo_usb_power_off(struct phy *phy) static int qmp_combo_usb_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); int ret; - ret = qmp_combo_com_init(qphy); + ret = qmp_combo_com_init(qmp); if (ret) return ret; ret = qmp_combo_usb_power_on(phy); if (ret) - qmp_combo_com_exit(qphy); + qmp_combo_com_exit(qmp); return ret; } static int qmp_combo_usb_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); int ret; ret = qmp_combo_usb_power_off(phy); if (ret) return ret; - return qmp_combo_com_exit(qphy); + return qmp_combo_com_exit(qmp); } static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); - qphy->mode = mode; + qmp->mode = mode; return 0; } @@ -2088,15 +2047,15 @@ static const struct phy_ops qmp_combo_dp_phy_ops = { .owner = THIS_MODULE, }; -static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_combo_enable_autonomous_mode(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; u32 intr_mask; - if (qphy->mode == PHY_MODE_USB_HOST_SS || - qphy->mode == PHY_MODE_USB_DEVICE_SS) + if (qmp->mode == PHY_MODE_USB_HOST_SS || + qmp->mode == PHY_MODE_USB_DEVICE_SS) intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN; else intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL; @@ -2117,11 +2076,11 @@ static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN); } -static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_combo_disable_autonomous_mode(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; /* Disable i/o clamp_n on resume for normal mode */ if (pcs_misc) @@ -2137,20 +2096,19 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; - dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode); if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } - qmp_combo_enable_autonomous_mode(qphy); + qmp_combo_enable_autonomous_mode(qmp); - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return 0; @@ -2158,12 +2116,11 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret = 0; - dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode); if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); @@ -2174,14 +2131,14 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(dev, "pipe_clk enable failed, err=%d\n", ret); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return ret; } - qmp_combo_disable_autonomous_mode(qphy); + qmp_combo_disable_autonomous_mode(qmp); return 0; } @@ -2193,7 +2150,7 @@ static const struct dev_pm_ops qmp_combo_pm_ops = { static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int ret, i; @@ -2225,7 +2182,7 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2246,7 +2203,7 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2283,7 +2240,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2386,12 +2343,12 @@ static unsigned long qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_phy_dp_clks *dp_clks; - const struct qmp_phy *qphy; + const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw); - qphy = dp_clks->qphy; - dp_opts = &qphy->dp_opts; + qmp = dp_clks->qmp; + dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { case 1620: @@ -2430,12 +2387,12 @@ static unsigned long qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_phy_dp_clks *dp_clks; - const struct qmp_phy *qphy; + const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw); - qphy = dp_clks->qphy; - dp_opts = &qphy->dp_opts; + qmp = dp_clks->qmp; + dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { case 1620: @@ -2470,8 +2427,7 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) return &dp_clks->dp_pixel_hw; } -static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, - struct device_node *np) +static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_init_data init = { }; struct qmp_phy_dp_clks *dp_clks; @@ -2482,8 +2438,8 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, if (!dp_clks) return -ENOMEM; - dp_clks->qphy = qphy; - qphy->dp_clks = dp_clks; + dp_clks->qmp = qmp; + qmp->dp_clks = dp_clks; snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_link_clk_ops; @@ -2512,20 +2468,15 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_dp(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->dp_serdes = serdes; + qmp->cfg = cfg; + qmp->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2533,17 +2484,17 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i * * Note that only tx/tx2 and pcs are used by the DP implementation. */ - qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->dp_tx)) - return PTR_ERR(qphy->dp_tx); + qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->dp_tx)) + return PTR_ERR(qmp->dp_tx); - qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->dp_pcs)) - return PTR_ERR(qphy->dp_pcs); + qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->dp_pcs)) + return PTR_ERR(qmp->dp_pcs); - qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->dp_tx2)) - return PTR_ERR(qphy->dp_tx2); + qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->dp_tx2)) + return PTR_ERR(qmp->dp_tx2); generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); if (IS_ERR(generic_phy)) { @@ -2552,66 +2503,59 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->dp_phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } -static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_usb(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5 */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->pcs_usb_offset) - qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; + qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset; - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); - if (IS_ERR(qphy->pcs_misc)) { + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + if (IS_ERR(qmp->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - qphy->pcs_misc = NULL; + qmp->pcs_misc = NULL; } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe_clk\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); @@ -2621,17 +2565,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->usb_phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_combo_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; + struct qmp_combo *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; @@ -2686,10 +2628,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (num > expected_phys) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2706,14 +2644,14 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = dp_serdes; /* Create per-lane phy */ - ret = qmp_combo_create_dp(dev, child, id, serdes, cfg); + ret = qmp_combo_create_dp(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); goto err_node_put; } - ret = phy_dp_clks_register(qmp, qmp->phys[id], child); + ret = phy_dp_clks_register(qmp, child); if (ret) { dev_err(qmp->dev, "failed to register DP clock source\n"); @@ -2723,15 +2661,13 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = usb_serdes; /* Create per-lane phy */ - ret = qmp_combo_create_usb(dev, child, id, serdes, cfg); + ret = qmp_combo_create_usb(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); goto err_node_put; } - qmp->usb_phy = qmp->phys[id]; - /* * Register the pipe clock provided by phy. * See function description to see details of this pipe clock. From 6c7c449a008b7a279e15254a829d096a7ea72ee3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:19 +0100 Subject: [PATCH 2700/4122] phy: qcom-qmp-combo: clean up device-tree parsing Since the QMP driver split there will be precisely two child nodes so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there is no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if either child node is missing. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-21-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 80 ++++++++--------------- 1 file changed, 27 insertions(+), 53 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 707dd68ba993..9eacbd224012 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2575,13 +2575,12 @@ static int qmp_combo_probe(struct platform_device *pdev) { struct qmp_combo *qmp; struct device *dev = &pdev->dev; - struct device_node *child; + struct device_node *dp_np, *usb_np; struct phy_provider *phy_provider; void __iomem *serdes; void __iomem *usb_serdes; void __iomem *dp_serdes = NULL; const struct qmp_phy_cfg *cfg = NULL; - int num, id, expected_phys; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2607,8 +2606,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); - expected_phys = 2; - mutex_init(&qmp->phy_mutex); ret = qmp_combo_clk_init(dev, cfg); @@ -2623,75 +2620,52 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > expected_phys) + usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); + if (!usb_np) return -EINVAL; + dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); + if (!dp_np) { + of_node_put(usb_np); + return -EINVAL; + } + pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) - return ret; + goto err_node_put; /* * Prevent runtime pm from being ON by default. Users can enable * it using power/control in sysfs. */ pm_runtime_forbid(dev); - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - if (of_node_name_eq(child, "dp-phy")) { - serdes = dp_serdes; + ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg); + if (ret) + goto err_node_put; - /* Create per-lane phy */ - ret = qmp_combo_create_dp(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = phy_pipe_clk_register(qmp, usb_np); + if (ret) + goto err_node_put; - ret = phy_dp_clks_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register DP clock source\n"); - goto err_node_put; - } - } else if (of_node_name_eq(child, "usb3-phy")) { - serdes = usb_serdes; + ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg); + if (ret) + goto err_node_put; - /* Create per-lane phy */ - ret = qmp_combo_create_usb(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } - - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } - } - - id++; - } - - if (!qmp->usb_phy) - return -EINVAL; + ret = phy_dp_clks_register(qmp, dp_np); + if (ret) + goto err_node_put; phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + of_node_put(usb_np); + of_node_put(dp_np); + return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(usb_np); + of_node_put(dp_np); return ret; } From 44aff8e31080e13a24313120aae259c659b04cd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:20 +0100 Subject: [PATCH 2701/4122] phy: qcom-qmp-combo: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-22-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 61 ++++++++++------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9eacbd224012..5d92cbfc458e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2148,9 +2148,10 @@ static const struct dev_pm_ops qmp_combo_pm_ops = { qmp_combo_runtime_resume, NULL) }; -static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_vreg_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int ret, i; @@ -2180,9 +2181,10 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg return 0; } -static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_reset_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2201,9 +2203,10 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf return 0; } -static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_clk_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2468,15 +2471,12 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2509,15 +2509,13 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, return 0; } -static int qmp_combo_create_usb(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2577,10 +2575,6 @@ static int qmp_combo_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *dp_np, *usb_np; struct phy_provider *phy_provider; - void __iomem *serdes; - void __iomem *usb_serdes; - void __iomem *dp_serdes = NULL; - const struct qmp_phy_cfg *cfg = NULL; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2588,35 +2582,34 @@ static int qmp_combo_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); - dp_serdes = devm_platform_ioremap_resource(pdev, 2); - if (IS_ERR(dp_serdes)) - return PTR_ERR(dp_serdes); + qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(qmp->dp_serdes)) + return PTR_ERR(qmp->dp_serdes); mutex_init(&qmp->phy_mutex); - ret = qmp_combo_clk_init(dev, cfg); + ret = qmp_combo_clk_init(qmp); if (ret) return ret; - ret = qmp_combo_reset_init(dev, cfg); + ret = qmp_combo_reset_init(qmp); if (ret) return ret; - ret = qmp_combo_vreg_init(dev, cfg); + ret = qmp_combo_vreg_init(qmp); if (ret) return ret; @@ -2640,7 +2633,7 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg); + ret = qmp_combo_create_usb(qmp, usb_np); if (ret) goto err_node_put; @@ -2648,7 +2641,7 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) goto err_node_put; - ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg); + ret = qmp_combo_create_dp(qmp, dp_np); if (ret) goto err_node_put; From 526103b7a6759e4afd1bcdd4de619642689a78d2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:21 +0100 Subject: [PATCH 2702/4122] phy: qcom-qmp-combo: clean up DP callback names Clean up and unify the DP callbacks by dropping the redundant "qcom" and "phy" prefix and infix and by using a common naming scheme ("qmp" + version + callback name). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-23-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 +++++++++++------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5d92cbfc458e..c5d8f8bfaaaa 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -903,17 +903,17 @@ struct qmp_combo { struct qmp_phy_dp_clks *dp_clks; }; -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp); -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp); -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp); +static void qmp_v3_dp_aux_init(struct qmp_combo *qmp); +static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp); +static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp); -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp); -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp); -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp); +static void qmp_v4_dp_aux_init(struct qmp_combo *qmp); +static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp); +static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp); static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) { @@ -991,10 +991,10 @@ static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qmp_v3_dp_aux_init, + .configure_dp_tx = qmp_v3_configure_dp_tx, + .configure_dp_phy = qmp_v3_configure_dp_phy, + .calibrate_dp_phy = qmp_v3_calibrate_dp_phy, .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), @@ -1036,10 +1036,10 @@ static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qmp_v3_dp_aux_init, + .configure_dp_tx = qmp_v3_configure_dp_tx, + .configure_dp_phy = qmp_v3_configure_dp_phy, + .calibrate_dp_phy = qmp_v3_calibrate_dp_phy, .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), @@ -1083,10 +1083,10 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), @@ -1129,10 +1129,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v5_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), @@ -1175,10 +1175,10 @@ static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_sm8250_usbphy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), @@ -1251,7 +1251,7 @@ static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp) return 0; } -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp) +static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, @@ -1332,7 +1332,7 @@ static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, return 0; } -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp) +static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias_en, drvr_en; @@ -1385,7 +1385,7 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) return reverse; } -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) { const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; @@ -1462,7 +1462,7 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) +static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d }; u8 val; @@ -1476,7 +1476,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) return 0; } -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) +static void qmp_v4_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, @@ -1503,7 +1503,7 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) +static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) { /* Program default values before writing proper values */ writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); @@ -1516,7 +1516,7 @@ static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) QSERDES_V4_TX_TX_EMP_POST1_LVL); } -static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) { const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; @@ -1606,7 +1606,7 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) return 0; } -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; @@ -1614,7 +1614,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); + ret = qmp_v45_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1668,7 +1668,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) return 0; } -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; @@ -1676,7 +1676,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); + ret = qmp_v45_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1729,7 +1729,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp) +static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d }; u8 val; From 32efdb0bb6e19965337fb63991237ecd99e0f9a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:44 +0100 Subject: [PATCH 2703/4122] dt-bindings: phy: qcom,qmp-usb3-dp: rename current bindings The current QMP USB3-DP PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB3-DP PHY block provides a single multi-protocol PHY and even if some resources are only used by either the USB or DP part of the device there is no real benefit in describing these resources in child nodes. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the PCS_LANE registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current schema file after SC7180, which was the first supported platform, and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb3-dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} (91%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml similarity index 91% rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml index 97a7ecafbf85..8afc5e815ae8 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml @@ -2,10 +2,17 @@ %YAML 1.2 --- -$id: "http://devicetree.org/schemas/phy/qcom,qmp-usb3-dp-phy.yaml#" -$schema: "http://devicetree.org/meta-schemas/core.yaml#" +$id: http://devicetree.org/schemas/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP USB3 DP PHY controller +title: Qualcomm QMP USB3 DP PHY controller (SC7180) + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS and USB. + + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-usb43dp-phy.yaml. maintainers: - Wesley Cheng From e1c4c5436b4ad579762fbe78bfabc8aef59bd5b1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:45 +0100 Subject: [PATCH 2704/4122] dt-bindings: phy: qcom,qmp-usb3-dp: fix sc8280xp binding The current QMP USB3-DP PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB3-DP PHY block provides a single multi-protocol PHY and even if some resources are only used by either the USB or DP part of the device there is no real benefit in describing these resources in child nodes. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the PCS LANE registers). This is specifically true for later USB4-USB3-DP QMP PHYs where the TX registers are used by both the USB3 and DP parts of the PHY (and where the USB4 part of the PHY was not covered by the binding at all). Notably there are also no DP "RX" (sic) registers as described by the current bindings and the DP "PCS" region is really a set of DP_PHY registers. Add a new binding for the USB4-USB3-DP QMP PHYs found on SC8280XP which further bindings can be based on. Note that the binding uses a PHY index to access either the USB3 or DP part of the PHY and that this can later be used also for the USB4 part if needed. Similarly, the clock inputs and outputs can later be extended to support USB4. Also note that the current binding is simply removed instead of being deprecated as it was only recently merged and would not allow for supporting DP mode. Reviewed-by: Rob Herring Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,sc7180-qmp-usb3-dp-phy.yaml | 12 --- .../phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml | 99 +++++++++++++++++++ include/dt-bindings/phy/phy-qcom-qmp.h | 20 ++++ 3 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml create mode 100644 include/dt-bindings/phy/phy-qcom-qmp.h diff --git a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml index 8afc5e815ae8..d9d0ab90edb1 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml @@ -23,7 +23,6 @@ properties: - qcom,sc7180-qmp-usb3-dp-phy - qcom,sc7280-qmp-usb3-dp-phy - qcom,sc8180x-qmp-usb3-dp-phy - - qcom,sc8280xp-qmp-usb43dp-phy - qcom,sdm845-qmp-usb3-dp-phy - qcom,sm8250-qmp-usb3-dp-phy reg: @@ -169,17 +168,6 @@ required: additionalProperties: false -allOf: - - if: - properties: - compatible: - contains: - enum: - - qcom,sc8280xp-qmp-usb43dp-phy - then: - required: - - power-domains - examples: - | #include diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml new file mode 100644 index 000000000000..6f31693d9868 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP USB4-USB3-DP PHY controller (SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-usb43dp-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 4 + + clock-names: + items: + - const: aux + - const: ref + - const: com_aux + - const: usb3_pipe + + power-domains: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: phy + - const: common + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#clock-cells": + const: 1 + description: + See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + + "#phy-cells": + const: 1 + description: + See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + + phy@88eb000 { + compatible = "qcom,sc8280xp-qmp-usb43dp-phy"; + reg = <0x088eb000 0x4000>; + + clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>, + <&gcc GCC_USB4_EUD_CLKREF_CLK>, + <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; + clock-names = "aux", "ref", "com_aux", "usb3_pipe"; + + power-domains = <&gcc USB30_PRIM_GDSC>; + + resets = <&gcc GCC_USB3_PHY_PRIM_BCR>, + <&gcc GCC_USB4_DP_PHY_PRIM_BCR>; + reset-names = "phy", "common"; + + vdda-phy-supply = <&vreg_l9d>; + vdda-pll-supply = <&vreg_l4d>; + + #clock-cells = <1>; + #phy-cells = <1>; + }; diff --git a/include/dt-bindings/phy/phy-qcom-qmp.h b/include/dt-bindings/phy/phy-qcom-qmp.h new file mode 100644 index 000000000000..4edec4c5b224 --- /dev/null +++ b/include/dt-bindings/phy/phy-qcom-qmp.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Qualcomm QMP PHY constants + * + * Copyright (C) 2022 Linaro Limited + */ + +#ifndef _DT_BINDINGS_PHY_QMP +#define _DT_BINDINGS_PHY_QMP + +/* QMP USB4-USB3-DP clocks */ +#define QMP_USB43DP_USB3_PIPE_CLK 0 +#define QMP_USB43DP_DP_LINK_CLK 1 +#define QMP_USB43DP_DP_VCO_DIV_CLK 2 + +/* QMP USB4-USB3-DP PHYs */ +#define QMP_USB43DP_USB3_PHY 0 +#define QMP_USB43DP_DP_PHY 1 + +#endif /* _DT_BINDINGS_PHY_QMP */ From 774903ca6c499887f554234bb019c91aa0a8f741 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:46 +0100 Subject: [PATCH 2705/4122] phy: qcom-qmp-combo: drop v4 reference-clock source The source clock for the reference clock should not be described by the devicetree and instead this relationship should be modelled in the clock driver. Drop the management of the source clock from the driver for SC8180X and SC8280XP. Note that support for the former is not yet in mainline. Also note that the binding has never been updated to describe the v4 clocks for SC8180X. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c5d8f8bfaaaa..5da42a4e5bf6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -945,7 +945,7 @@ static const char * const qmp_v3_phy_clk_l[] = { }; static const char * const qmp_v4_phy_clk_l[] = { - "aux", "ref_clk_src", "ref", "com_aux", + "aux", "ref", "com_aux", }; /* the primary usb3 phy on sm8250 doesn't have a ref clock */ From b3982f2144e10bd542189e38cd47709e55389606 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:47 +0100 Subject: [PATCH 2706/4122] phy: qcom-qmp-combo: restructure PHY creation In preparation for supporting devicetree bindings which do not use child nodes, move the PHY creation to probe() proper and parse the serdes, dp_com and dp_serdes resources in a dedicated legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 95 ++++++++++++----------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5da42a4e5bf6..85def6560e43 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2471,11 +2471,9 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) +static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) { struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; /* * Get memory resources from the DP child node: @@ -2496,25 +2494,13 @@ static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) if (IS_ERR(qmp->dp_tx2)) return PTR_ERR(qmp->dp_tx2); - generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create DP PHY: %d\n", ret); - return ret; - } - - qmp->dp_phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } -static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) +static int qmp_combo_parse_dt_lecacy_usb(struct qmp_combo *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; /* * Get memory resources from the USB child node: @@ -2556,15 +2542,34 @@ static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create USB PHY: %d\n", ret); - return ret; - } + return 0; +} - qmp->usb_phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); +static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node *usb_np, + struct device_node *dp_np) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); + + qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->dp_com)) + return PTR_ERR(qmp->dp_com); + + qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(qmp->dp_serdes)) + return PTR_ERR(qmp->dp_serdes); + + ret = qmp_combo_parse_dt_lecacy_usb(qmp, usb_np); + if (ret) + return ret; + + ret = qmp_combo_parse_dt_lecacy_dp(qmp, dp_np); + if (ret) + return ret; return 0; } @@ -2587,18 +2592,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); - - qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); - if (IS_ERR(qmp->dp_serdes)) - return PTR_ERR(qmp->dp_serdes); - mutex_init(&qmp->phy_mutex); ret = qmp_combo_clk_init(qmp); @@ -2623,6 +2616,10 @@ static int qmp_combo_probe(struct platform_device *pdev) return -EINVAL; } + ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); + if (ret) + goto err_node_put; + pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2633,22 +2630,32 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_combo_create_usb(qmp, usb_np); - if (ret) - goto err_node_put; - ret = phy_pipe_clk_register(qmp, usb_np); if (ret) goto err_node_put; - ret = qmp_combo_create_dp(qmp, dp_np); - if (ret) - goto err_node_put; - ret = phy_dp_clks_register(qmp, dp_np); if (ret) goto err_node_put; + qmp->usb_phy = devm_phy_create(dev, usb_np, &qmp_combo_usb_phy_ops); + if (IS_ERR(qmp->usb_phy)) { + ret = PTR_ERR(qmp->usb_phy); + dev_err(dev, "failed to create USB PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->usb_phy, qmp); + + qmp->dp_phy = devm_phy_create(dev, dp_np, &qmp_combo_dp_phy_ops); + if (IS_ERR(qmp->dp_phy)) { + ret = PTR_ERR(qmp->dp_phy); + dev_err(dev, "failed to create DP PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->dp_phy, qmp); + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); of_node_put(usb_np); From 0dd521d593ade3e8494d29abb653fda5bec5d508 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:48 +0100 Subject: [PATCH 2707/4122] phy: qcom-qmp-combo: generate pipe clock name In preparation for supporting devicetree bindings which do not use child nodes, generate also the USB3 pipe clock name based on the platform device name as is done for the DP clocks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 85def6560e43..7434955c8898 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2247,18 +2247,15 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; + char name[64]; int ret; - ret = of_property_read_string(np, "clock-output-names", &init.name); - if (ret) { - dev_err(qmp->dev, "%pOFn: No clock-output-names\n", np); - return ret; - } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); if (!fixed) return -ENOMEM; + snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); + init.name = name; init.ops = &clk_fixed_rate_ops; /* controllers using QMP phys use 125MHz pipe clock interface */ From ee81f2eb0ee0c99a109f91a9617a8d7698479181 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:49 +0100 Subject: [PATCH 2708/4122] phy: qcom-qmp-combo: drop redundant clock structure Drop the unnecessary DP clock structure and instead store the clocks directly in the driver data. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 52 ++++++++--------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7434955c8898..ebfefecffd86 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -859,12 +859,6 @@ struct qmp_phy_cfg { }; -struct qmp_phy_dp_clks { - struct qmp_combo *qmp; - struct clk_hw dp_link_hw; - struct clk_hw dp_pixel_hw; -}; - struct qmp_combo { struct device *dev; @@ -900,7 +894,9 @@ struct qmp_combo { struct phy *dp_phy; unsigned int dp_aux_cfg; struct phy_configure_opts_dp dp_opts; - struct qmp_phy_dp_clks *dp_clks; + + struct clk_hw dp_link_hw; + struct clk_hw dp_pixel_hw; }; static void qmp_v3_dp_aux_init(struct qmp_combo *qmp); @@ -1387,7 +1383,6 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; @@ -1420,8 +1415,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) } writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); - clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); - clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); + clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); + clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); @@ -1518,7 +1513,6 @@ static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; @@ -1556,8 +1550,8 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) } writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); - clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); - clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); + clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); + clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); @@ -2342,12 +2336,10 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, static unsigned long qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { - const struct qmp_phy_dp_clks *dp_clks; const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; - dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw); - qmp = dp_clks->qmp; + qmp = container_of(hw, struct qmp_combo, dp_pixel_hw); dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { @@ -2386,12 +2378,10 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, static unsigned long qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { - const struct qmp_phy_dp_clks *dp_clks; const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; - dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw); - qmp = dp_clks->qmp; + qmp = container_of(hw, struct qmp_combo, dp_link_hw); dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { @@ -2413,7 +2403,7 @@ static const struct clk_ops qcom_qmp_dp_link_clk_ops = { static struct clk_hw * qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) { - struct qmp_phy_dp_clks *dp_clks = data; + struct qmp_combo *qmp = data; unsigned int idx = clkspec->args[0]; if (idx >= 2) { @@ -2422,42 +2412,34 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) } if (idx == 0) - return &dp_clks->dp_link_hw; + return &qmp->dp_link_hw; - return &dp_clks->dp_pixel_hw; + return &qmp->dp_pixel_hw; } static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_init_data init = { }; - struct qmp_phy_dp_clks *dp_clks; char name[64]; int ret; - dp_clks = devm_kzalloc(qmp->dev, sizeof(*dp_clks), GFP_KERNEL); - if (!dp_clks) - return -ENOMEM; - - dp_clks->qmp = qmp; - qmp->dp_clks = dp_clks; - snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_link_clk_ops; init.name = name; - dp_clks->dp_link_hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_link_hw); + qmp->dp_link_hw.init = &init; + ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw); if (ret) return ret; snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_pixel_clk_ops; init.name = name; - dp_clks->dp_pixel_hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_pixel_hw); + qmp->dp_pixel_hw.init = &init; + ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw); if (ret) return ret; - ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, dp_clks); + ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp); if (ret) return ret; From 55b1c39b4990ebdab2faa2e4c06d17476d6d2d3c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:50 +0100 Subject: [PATCH 2709/4122] phy: qcom-qmp-combo: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index ebfefecffd86..9b945a72ae9b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -895,6 +895,7 @@ struct qmp_combo { unsigned int dp_aux_cfg; struct phy_configure_opts_dp dp_opts; + struct clk_fixed_rate pipe_clk_fixed; struct clk_hw dp_link_hw; struct clk_hw dp_pixel_hw; }; @@ -2239,15 +2240,11 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; char name[64]; int ret; - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); init.name = name; init.ops = &clk_fixed_rate_ops; From 74401c85fb3b134d884d5de968c66784527d12d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:51 +0100 Subject: [PATCH 2710/4122] phy: qcom-qmp-combo: add clock registration helper In preparation for supporting devicetree bindings which do not use child nodes, add a clock registration helper to handle the registration of both the USB and DP clocks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9b945a72ae9b..1079a16b45f6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2447,6 +2447,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } +static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, + struct device_node *dp_np) +{ + int ret; + + ret = phy_pipe_clk_register(qmp, usb_np); + if (ret) + return ret; + + ret = phy_dp_clks_register(qmp, dp_np); + if (ret) + return ret; + + return 0; +} + static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) { struct device *dev = qmp->dev; @@ -2606,11 +2622,7 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = phy_pipe_clk_register(qmp, usb_np); - if (ret) - goto err_node_put; - - ret = phy_dp_clks_register(qmp, dp_np); + ret = qmp_combo_register_clocks(qmp, usb_np, dp_np); if (ret) goto err_node_put; From ce51f7a70a3bbc20c07079c06e7721cabfe34dd9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:52 +0100 Subject: [PATCH 2711/4122] phy: qcom-qmp-combo: separate clock and provider registration In preparation for supporting devicetree bindings which do not use child nodes, separate clock registration from clock-provider registration. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 1079a16b45f6..89a5b51c770d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2243,7 +2243,6 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; char name[64]; - int ret; snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); init.name = name; @@ -2253,19 +2252,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) fixed->fixed_rate = 125000000; fixed->hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &fixed->hw); - if (ret) - return ret; - - ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &fixed->hw); - if (ret) - return ret; - - /* - * Roll a devm action because the clock provider is the child node, but - * the child node is not actually a device. - */ - return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); + return devm_clk_hw_register(qmp->dev, &fixed->hw); } /* @@ -2436,15 +2423,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) if (ret) return ret; - ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp); - if (ret) - return ret; - - /* - * Roll a devm action because the clock provider is the child node, but - * the child node is not actually a device. - */ - return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); + return 0; } static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, @@ -2460,7 +2439,24 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; - return 0; + ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get, + &qmp->pipe_clk_fixed.hw); + if (ret) + return ret; + + /* + * Roll a devm action because the clock provider is the child node, but + * the child node is not actually a device. + */ + ret = devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, usb_np); + if (ret) + return ret; + + ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp); + if (ret) + return ret; + + return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, dp_np); } static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) From b71bf1ebe936cc63983e5339d218918ed56e9804 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:53 +0100 Subject: [PATCH 2712/4122] phy: qcom-qmp-combo: clean up DP clock callbacks Clean up the DP clock callbacks somewhat by dropping the redundant "qcom" prefix and removing line breaks after type specifiers. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 33 ++++++++++------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 89a5b51c770d..fb3705b00823 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2304,8 +2304,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) * for DP pixel clock * */ -static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) +static int qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { switch (req->rate) { case 1620000000UL / 2: @@ -2317,8 +2316,7 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, } } -static unsigned long -qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +static unsigned long qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; @@ -2340,13 +2338,12 @@ qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) } } -static const struct clk_ops qcom_qmp_dp_pixel_clk_ops = { - .determine_rate = qcom_qmp_dp_pixel_clk_determine_rate, - .recalc_rate = qcom_qmp_dp_pixel_clk_recalc_rate, +static const struct clk_ops qmp_dp_pixel_clk_ops = { + .determine_rate = qmp_dp_pixel_clk_determine_rate, + .recalc_rate = qmp_dp_pixel_clk_recalc_rate, }; -static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) +static int qmp_dp_link_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { switch (req->rate) { case 162000000: @@ -2359,8 +2356,7 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, } } -static unsigned long -qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +static unsigned long qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; @@ -2379,13 +2375,12 @@ qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) } } -static const struct clk_ops qcom_qmp_dp_link_clk_ops = { - .determine_rate = qcom_qmp_dp_link_clk_determine_rate, - .recalc_rate = qcom_qmp_dp_link_clk_recalc_rate, +static const struct clk_ops qmp_dp_link_clk_ops = { + .determine_rate = qmp_dp_link_clk_determine_rate, + .recalc_rate = qmp_dp_link_clk_recalc_rate, }; -static struct clk_hw * -qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) +static struct clk_hw *qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) { struct qmp_combo *qmp = data; unsigned int idx = clkspec->args[0]; @@ -2408,7 +2403,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) int ret; snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); - init.ops = &qcom_qmp_dp_link_clk_ops; + init.ops = &qmp_dp_link_clk_ops; init.name = name; qmp->dp_link_hw.init = &init; ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw); @@ -2416,7 +2411,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return ret; snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev)); - init.ops = &qcom_qmp_dp_pixel_clk_ops; + init.ops = &qmp_dp_pixel_clk_ops; init.name = name; qmp->dp_pixel_hw.init = &init; ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw); @@ -2452,7 +2447,7 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; - ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp); + ret = of_clk_add_hw_provider(dp_np, qmp_dp_clks_hw_get, qmp); if (ret) return ret; From 9e5b59ea6c216d9b36e3250c2efa081ab4ea2ff5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:54 +0100 Subject: [PATCH 2713/4122] phy: qcom-qmp-combo: rename common-register pointers The common registers are shared by the USB and DP parts of the PHY so drop the misleading "dp" prefix from the corresponding pointers. Note that the "DP" prefix could also be dropped from the corresponding defines, but leave that in place for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index fb3705b00823..5777bd1f76b3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -864,7 +864,7 @@ struct qmp_combo { const struct qmp_phy_cfg *cfg; - void __iomem *dp_com; + void __iomem *com; void __iomem *serdes; void __iomem *tx; @@ -1767,7 +1767,7 @@ static int qmp_combo_dp_calibrate(struct phy *phy) static int qmp_combo_com_init(struct qmp_combo *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; - void __iomem *dp_com = qmp->dp_com; + void __iomem *com = qmp->com; int ret; mutex_lock(&qmp->phy_mutex); @@ -1798,25 +1798,25 @@ static int qmp_combo_com_init(struct qmp_combo *qmp) if (ret) goto err_assert_reset; - qphy_setbits(dp_com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN); + qphy_setbits(com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN); /* override hardware control for reset of qmp phy */ - qphy_setbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, + qphy_setbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, SW_DPPHY_RESET_MUX | SW_DPPHY_RESET | SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET); /* Default type-c orientation, i.e CC1 */ - qphy_setbits(dp_com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02); + qphy_setbits(com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02); - qphy_setbits(dp_com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE); + qphy_setbits(com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE); /* bring both QMP USB and QMP DP PHYs PCS block out of reset */ - qphy_clrbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, + qphy_clrbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, SW_DPPHY_RESET_MUX | SW_DPPHY_RESET | SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET); - qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); - qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); + qphy_clrbits(com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); + qphy_clrbits(com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); @@ -2538,9 +2538,9 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node * if (IS_ERR(qmp->serdes)) return PTR_ERR(qmp->serdes); - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); + qmp->com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->com)) + return PTR_ERR(qmp->com); qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(qmp->dp_serdes)) From 133836a7edf4e5783ab0caa669cdb94ab02b9b62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:55 +0100 Subject: [PATCH 2714/4122] phy: qcom-qmp-combo: rename DP_PHY register pointer The DP_PHY registers have erroneously been referred to as "PCS" registers since DisplayPort support was added to the QMP drivers (including in the devicetree binding). Rename the corresponding pointer to match the register names. Note that the repeated "dp" in the field name is intentional and this DP register block is called "DP_PHY" (not just "PHY"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 139 +++++++++++----------- 1 file changed, 70 insertions(+), 69 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5777bd1f76b3..b82bd0a221d6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -878,7 +878,7 @@ struct qmp_combo { void __iomem *dp_serdes; void __iomem *dp_tx; void __iomem *dp_tx2; - void __iomem *dp_pcs; + void __iomem *dp_dp_phy; struct clk *pipe_clk; struct clk_bulk_data *clks; @@ -1252,20 +1252,20 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | @@ -1273,22 +1273,22 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9); qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_dp_phy + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, @@ -1372,12 +1372,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); return reverse; } @@ -1390,8 +1390,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) qmp_combo_configure_dp_mode(qmp); - writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1414,16 +1414,16 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); @@ -1434,20 +1434,20 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) 10000)) return -ETIMEDOUT; - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1467,7 +1467,7 @@ static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp) qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1476,27 +1476,27 @@ static void qmp_v4_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9); qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_dp_phy + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) @@ -1518,15 +1518,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_CFG_1); qmp_combo_configure_dp_mode(qmp); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1549,15 +1549,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); @@ -1582,16 +1582,16 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) 10000)) return -ETIMEDOUT; - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1640,11 +1640,11 @@ static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp) writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1697,11 +1697,11 @@ static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp) writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1733,7 +1733,7 @@ static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp) qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1906,7 +1906,7 @@ static int qmp_combo_dp_power_off(struct phy *phy) struct qmp_combo *qmp = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); return 0; } @@ -2463,15 +2463,16 @@ static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_nod * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; * tx2 -> 3; rx2 -> 4 * - * Note that only tx/tx2 and pcs are used by the DP implementation. + * Note that only tx/tx2 and pcs (dp_phy) are used by the DP + * implementation. */ qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL); if (IS_ERR(qmp->dp_tx)) return PTR_ERR(qmp->dp_tx); - qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qmp->dp_pcs)) - return PTR_ERR(qmp->dp_pcs); + qmp->dp_dp_phy = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->dp_dp_phy)) + return PTR_ERR(qmp->dp_dp_phy); qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); if (IS_ERR(qmp->dp_tx2)) From 83a0bbe39b1797cab47665efcf689f774b42af88 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:56 +0100 Subject: [PATCH 2715/4122] phy: qcom-qmp-combo: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Also note that (possibly) unlike on earlier platforms, the TX registers are used by both the USB and DP implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 145 ++++++++++++++++++++-- 1 file changed, 134 insertions(+), 11 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index b82bd0a221d6..77052c66cf70 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include "phy-qcom-qmp.h" @@ -798,7 +798,23 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { struct qmp_combo; +struct qmp_combo_offsets { + u16 com; + u16 txa; + u16 rxa; + u16 txb; + u16 rxb; + u16 usb3_serdes; + u16 usb3_pcs_misc; + u16 usb3_pcs; + u16 usb3_pcs_usb; + u16 dp_serdes; + u16 dp_dp_phy; +}; + struct qmp_phy_cfg { + const struct qmp_combo_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -959,6 +975,20 @@ static const char * const sc7180_usb3phy_reset_l[] = { "phy", }; +static const struct qmp_combo_offsets qmp_combo_offsets_v5 = { + .com = 0x0000, + .txa = 0x0400, + .rxa = 0x0600, + .txb = 0x0a00, + .rxb = 0x0c00, + .usb3_serdes = 0x1000, + .usb3_pcs_misc = 0x1200, + .usb3_pcs = 0x1400, + .usb3_pcs_usb = 0x1700, + .dp_serdes = 0x2000, + .dp_dp_phy = 0x2200, +}; + static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1098,6 +1128,8 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { }; static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { + .offsets = &qmp_combo_offsets_v5, + .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), .tx_tbl = sc8280xp_usb43dp_tx_tbl, @@ -1138,7 +1170,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, }; static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { @@ -2421,6 +2452,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return 0; } +static struct clk_hw *qmp_combo_clk_hw_get(struct of_phandle_args *clkspec, void *data) +{ + struct qmp_combo *qmp = data; + + switch (clkspec->args[0]) { + case QMP_USB43DP_USB3_PIPE_CLK: + return &qmp->pipe_clk_fixed.hw; + case QMP_USB43DP_DP_LINK_CLK: + return &qmp->dp_link_hw; + case QMP_USB43DP_DP_VCO_DIV_CLK: + return &qmp->dp_pixel_hw; + } + + return ERR_PTR(-EINVAL); +} + static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, struct device_node *dp_np) { @@ -2434,6 +2481,15 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; + /* + * Register a single provider for bindings without child nodes. + */ + if (usb_np == qmp->dev->of_node) + return devm_of_clk_add_hw_provider(qmp->dev, qmp_combo_clk_hw_get, qmp); + + /* + * Register multiple providers for legacy bindings with child nodes. + */ ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get, &qmp->pipe_clk_fixed.hw); if (ret) @@ -2558,6 +2614,63 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node * return 0; } +static int qmp_combo_parse_dt(struct qmp_combo *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_combo_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->com = base + offs->com; + qmp->tx = base + offs->txa; + qmp->rx = base + offs->rxa; + qmp->tx2 = base + offs->txb; + qmp->rx2 = base + offs->rxb; + + qmp->serdes = base + offs->usb3_serdes; + qmp->pcs_misc = base + offs->usb3_pcs_misc; + qmp->pcs = base + offs->usb3_pcs; + qmp->pcs_usb = base + offs->usb3_pcs_usb; + + qmp->dp_serdes = base + offs->dp_serdes; + qmp->dp_tx = base + offs->txa; + qmp->dp_tx2 = base + offs->txb; + qmp->dp_dp_phy = base + offs->dp_dp_phy; + + qmp->pipe_clk = devm_clk_get(dev, "usb3_pipe"); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get usb3_pipe clock\n"); + } + + return 0; +} + +static struct phy *qmp_combo_phy_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct qmp_combo *qmp = dev_get_drvdata(dev); + + if (args->args_count == 0) + return ERR_PTR(-EINVAL); + + switch (args->args[0]) { + case QMP_USB43DP_USB3_PHY: + return qmp->usb_phy; + case QMP_USB43DP_DP_PHY: + return qmp->dp_phy; + } + + return ERR_PTR(-EINVAL); +} + static int qmp_combo_probe(struct platform_device *pdev) { struct qmp_combo *qmp; @@ -2590,17 +2703,22 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; + /* Check for legacy binding with child nodes. */ usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); - if (!usb_np) - return -EINVAL; + if (usb_np) { + dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); + if (!dp_np) { + of_node_put(usb_np); + return -EINVAL; + } - dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); - if (!dp_np) { - of_node_put(usb_np); - return -EINVAL; + ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); + } else { + usb_np = of_node_get(dev->of_node); + dp_np = of_node_get(dev->of_node); + + ret = qmp_combo_parse_dt(qmp); } - - ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); if (ret) goto err_node_put; @@ -2636,7 +2754,12 @@ static int qmp_combo_probe(struct platform_device *pdev) phy_set_drvdata(qmp->dp_phy, qmp); - phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + dev_set_drvdata(dev, qmp); + + if (usb_np == dev->of_node) + phy_provider = devm_of_phy_provider_register(dev, qmp_combo_phy_xlate); + else + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); of_node_put(usb_np); of_node_put(dp_np); From 1446d03ec290760788b1868b5aa967383d86dd77 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:38:55 +0100 Subject: [PATCH 2716/4122] dt-bindings: phy: qcom,sc8280xp-qmp-usb3-uni: drop reference-clock source The source clock for the reference clock is not used by the PHY directly and should not be included in the devicetree binding. Fixes: e8e58e29a0c9 ("dt-bindings: phy: qcom,qmp-usb: fix sc8280xp binding") Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221111093857.11360-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml index ef080509747a..16fce1038285 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -22,12 +22,11 @@ properties: maxItems: 1 clocks: - maxItems: 5 + maxItems: 4 clock-names: items: - const: aux - - const: ref_clk_src - const: ref - const: com_aux - const: pipe @@ -82,12 +81,10 @@ examples: reg = <0x088ef000 0x2000>; clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB3_MP0_CLKREF_CLK>, <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>; - clock-names = "aux", "ref_clk_src", "ref", "com_aux", - "pipe"; + clock-names = "aux", "ref", "com_aux", "pipe"; power-domains = <&gcc USB30_MP_GDSC>; From 3b41b61a2fe4174ba43fdb599c9d6accd35ac179 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:38:56 +0100 Subject: [PATCH 2717/4122] phy: qcom-qmp-usb: drop sc8280xp reference-clock source The source clock for the reference clock is not used by the PHY directly and should not be described by the devicetree (instead this relationship should be modelled in the clock driver). Drop the driver management of the reference-clock source for SC8280XP. Once the other clock drivers have been updated, the corresponding change can be done also for the other QMP v4 platforms. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221111093857.11360-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 55029ea63f73..9fbad6b1d3ab 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1523,6 +1523,10 @@ static const char * const qmp_v3_phy_clk_l[] = { }; static const char * const qmp_v4_phy_clk_l[] = { + "aux", "ref", "com_aux", +}; + +static const char * const qmp_v4_ref_phy_clk_l[] = { "aux", "ref_clk_src", "ref", "com_aux", }; @@ -1729,8 +1733,8 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1755,8 +1759,8 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1806,8 +1810,8 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8250_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8250_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8250_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1907,8 +1911,8 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8350_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8350_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8350_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, From 905abf1229efd33aa57f3f65881c378770dfbb65 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:37 +0100 Subject: [PATCH 2718/4122] phy: qcom-qmp: drop unused type header The PHY type defines are no longer used in the PCIe, UFS and USB QMP drivers so drop the corresponding include. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 -- 4 files changed, 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index ff198d846fd2..a088477e274f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 47cccc4b35b2..53dde20a4ce0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -23,8 +23,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 189103d1bd18..318eea35b972 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 9fbad6b1d3ab..64b9472a1a70 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ From 64e1f12b2658c1abca55cffd9413f2d3c3bbfa8f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:38 +0100 Subject: [PATCH 2719/4122] phy: qcom-qmp-usb: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 64b9472a1a70..4aa338fc4643 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1485,6 +1485,8 @@ struct qmp_usb { enum phy_mode mode; struct phy *phy; + + struct clk_fixed_rate pipe_clk_fixed; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -2357,7 +2359,7 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; int ret; @@ -2367,10 +2369,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) return ret; } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - init.ops = &clk_fixed_rate_ops; /* controllers using QMP phys use 125MHz pipe clock interface */ From e8511f407b078330dfcca0c7200e72b7638b6e17 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:39 +0100 Subject: [PATCH 2720/4122] phy: qcom-qmp-pcie: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 53dde20a4ce0..1b136a87053f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1550,6 +1550,8 @@ struct qmp_pcie { struct phy *phy; int mode; + + struct clk_fixed_rate pipe_clk_fixed; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -2416,7 +2418,7 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; int ret; @@ -2426,10 +2428,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return ret; } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - init.ops = &clk_fixed_rate_ops; /* From 5a0d2df462568486b85a88ed2c88ffbfa1645cd1 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:06 -0600 Subject: [PATCH 2721/4122] dt-bindings: sun6i-a31-mipi-dphy: Add the interrupts property The sun6i DPHY can generate several interrupts, mostly for reporting error conditions, but also for detecting BTA and UPLS sequences. Document this capability in order to accurately describe the hardware. The DPHY has no interrupt number provided in the vendor documentation because its interrupt line is shared with the DSI controller. A trivial interrupt handler was used to verify that interrupts were in fact generated by the DPHY and not the DSI controller. Signed-off-by: Samuel Holland Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221114022113.31694-2-samuel@sholland.org Signed-off-by: Vinod Koul --- .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml index dfb6a8993535..4c45d7fcc853 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml @@ -24,6 +24,9 @@ properties: reg: maxItems: 1 + interrupts: + maxItems: 1 + clocks: items: - description: Bus Clock @@ -53,6 +56,7 @@ required: - "#phy-cells" - compatible - reg + - interrupts - clocks - clock-names - resets @@ -61,9 +65,12 @@ additionalProperties: false examples: - | + #include + dphy0: d-phy@1ca1000 { compatible = "allwinner,sun6i-a31-mipi-dphy"; reg = <0x01ca1000 0x1000>; + interrupts = ; clocks = <&ccu 23>, <&ccu 97>; clock-names = "bus", "mod"; resets = <&ccu 4>; From e7a838694185c7d0965baa9ed2515f2e0ff8d502 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:09 -0600 Subject: [PATCH 2722/4122] dt-bindings: sun6i-a31-mipi-dphy: Add the A100 DPHY variant A100 features an updated DPHY, which moves PLL control inside the DPHY register space. (Previously PLL-MIPI was controlled from the CCU. This does not affect the "clocks" property because the link between PLL-MIPI and the DPHY was never represented in the devicetree.) It also requires a modified analog power-on sequence. Finally, the new DPHY adds support for operating as an LVDS PHY. D1 uses this same variant. Acked-by: Krzysztof Kozlowski Reviewed-by: Paul Kocialkowski Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-5-samuel@sholland.org Signed-off-by: Vinod Koul --- .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml index 4c45d7fcc853..fe9702e7bdd8 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml @@ -17,9 +17,13 @@ properties: compatible: oneOf: - const: allwinner,sun6i-a31-mipi-dphy + - const: allwinner,sun50i-a100-mipi-dphy - items: - const: allwinner,sun50i-a64-mipi-dphy - const: allwinner,sun6i-a31-mipi-dphy + - items: + - const: allwinner,sun20i-d1-mipi-dphy + - const: allwinner,sun50i-a100-mipi-dphy reg: maxItems: 1 From a709ae51e22802822de85ec7b672cf1cc5412fc0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:10 -0600 Subject: [PATCH 2723/4122] phy: allwinner: phy-sun6i-mipi-dphy: Make RX support optional While all variants of the DPHY likely support RX mode, the new variant in the A100 is not used in this direction by the BSP, and it has some analog register changes, so its RX power-on sequence is unknown. To be safe, limit RX support to variants where the power-on sequence is known. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-6-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index 3900f1650851..7d7322670a83 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -114,6 +114,10 @@ enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_RX, }; +struct sun6i_dphy_variant { + bool rx_supported; +}; + struct sun6i_dphy { struct clk *bus_clk; struct clk *mod_clk; @@ -123,6 +127,7 @@ struct sun6i_dphy { struct phy *phy; struct phy_configure_opts_mipi_dphy config; + const struct sun6i_dphy_variant *variant; enum sun6i_dphy_direction direction; }; @@ -409,6 +414,10 @@ static int sun6i_dphy_probe(struct platform_device *pdev) if (!dphy) return -ENOMEM; + dphy->variant = device_get_match_data(&pdev->dev); + if (!dphy->variant) + return -EINVAL; + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) { dev_err(&pdev->dev, "Couldn't map the DPHY encoder registers\n"); @@ -445,8 +454,14 @@ static int sun6i_dphy_probe(struct platform_device *pdev) ret = of_property_read_string(pdev->dev.of_node, "allwinner,direction", &direction); - if (!ret && !strncmp(direction, "rx", 2)) + if (!ret && !strncmp(direction, "rx", 2)) { + if (!dphy->variant->rx_supported) { + dev_err(&pdev->dev, "RX not supported on this variant\n"); + return -EOPNOTSUPP; + } + dphy->direction = SUN6I_DPHY_DIRECTION_RX; + } phy_set_drvdata(dphy->phy, dphy); phy_provider = devm_of_phy_provider_register(&pdev->dev, of_phy_simple_xlate); @@ -454,8 +469,15 @@ static int sun6i_dphy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } +static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { + .rx_supported = true, +}; + static const struct of_device_id sun6i_dphy_of_table[] = { - { .compatible = "allwinner,sun6i-a31-mipi-dphy" }, + { + .compatible = "allwinner,sun6i-a31-mipi-dphy", + .data = &sun6i_a31_mipi_dphy_variant, + }, { } }; MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table); From cb7f49a31597066b25c9bc6a0bf0781454dd4d2b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:11 -0600 Subject: [PATCH 2724/4122] phy: allwinner: phy-sun6i-mipi-dphy: Set the enable bit last The A100 variant of the DPHY requires configuring the analog registers before setting the global enable bit. Since this order also works on the other variants, always use it, to minimize the differences between them. Reviewed-by: Paul Kocialkowski Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-7-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index 7d7322670a83..a2afedc399fd 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -183,10 +183,6 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); - regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, - SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) | - SUN6I_DPHY_GCTL_EN); - regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, SUN6I_DPHY_ANA0_REG_PWS | SUN6I_DPHY_ANA0_REG_DMPC | @@ -244,6 +240,10 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK, SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask)); + regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, + SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) | + SUN6I_DPHY_GCTL_EN); + return 0; } From 3fd490a7197857dc3aa409e56e31deaeab097c5f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:12 -0600 Subject: [PATCH 2725/4122] phy: allwinner: phy-sun6i-mipi-dphy: Add a variant power-on hook The A100 variant uses the same values for the timing registers, and it uses the same final power-on sequence, but it needs a different analog register configuration in the middle. Support this by moving the variant-specific parts to a hook provided by the variant. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-8-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 59 ++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index a2afedc399fd..ac144ee0f0a6 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -114,7 +114,10 @@ enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_RX, }; +struct sun6i_dphy; + struct sun6i_dphy_variant { + void (*tx_power_on)(struct sun6i_dphy *dphy); bool rx_supported; }; @@ -156,33 +159,10 @@ static int sun6i_dphy_configure(struct phy *phy, union phy_configure_opts *opts) return 0; } -static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) +static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) { u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); - regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, - SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, - SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | - SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | - SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, - SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | - SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | - SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | - SUN6I_DPHY_TX_TIME1_CLK_POST(10)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, - SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, - SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | - SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); - regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, SUN6I_DPHY_ANA0_REG_PWS | SUN6I_DPHY_ANA0_REG_DMPC | @@ -214,6 +194,36 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_ANA3_EN_LDOC | SUN6I_DPHY_ANA3_EN_LDOD); udelay(1); +} + +static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) +{ + u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, + SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, + SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | + SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | + SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, + SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | + SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | + SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | + SUN6I_DPHY_TX_TIME1_CLK_POST(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, + SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, + SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | + SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); + + dphy->variant->tx_power_on(dphy); regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, SUN6I_DPHY_ANA3_EN_VTTC | @@ -470,6 +480,7 @@ static int sun6i_dphy_probe(struct platform_device *pdev) } static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { + .tx_power_on = sun6i_a31_mipi_dphy_tx_power_on, .rx_supported = true, }; From 4d0c2165e64eec00c19b68b1abc83e57e9633db9 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:13 -0600 Subject: [PATCH 2726/4122] phy: allwinner: phy-sun6i-mipi-dphy: Add the A100 DPHY variant A100 features an updated DPHY, which moves PLL control inside the DPHY register space (previously the PLL was controlled from the CCU). It also requires a modified analog power-on sequence. This "combo PHY" can also be used as an LVDS PHY, but that is not yet supported by the driver. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-9-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 143 +++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index ac144ee0f0a6..36eab95271b2 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -70,11 +70,19 @@ #define SUN6I_DPHY_ANA0_REG 0x4c #define SUN6I_DPHY_ANA0_REG_PWS BIT(31) +#define SUN6I_DPHY_ANA0_REG_PWEND BIT(30) +#define SUN6I_DPHY_ANA0_REG_PWENC BIT(29) #define SUN6I_DPHY_ANA0_REG_DMPC BIT(28) #define SUN6I_DPHY_ANA0_REG_DMPD(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA0_REG_SRXDT(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA0_REG_SRXCK(n) (((n) & 0xf) << 16) +#define SUN6I_DPHY_ANA0_REG_SDIV2 BIT(15) #define SUN6I_DPHY_ANA0_REG_SLV(n) (((n) & 7) << 12) #define SUN6I_DPHY_ANA0_REG_DEN(n) (((n) & 0xf) << 8) +#define SUN6I_DPHY_ANA0_REG_PLR(n) (((n) & 0xf) << 4) #define SUN6I_DPHY_ANA0_REG_SFB(n) (((n) & 3) << 2) +#define SUN6I_DPHY_ANA0_REG_RSD BIT(1) +#define SUN6I_DPHY_ANA0_REG_SELSCK BIT(0) #define SUN6I_DPHY_ANA1_REG 0x50 #define SUN6I_DPHY_ANA1_REG_VTTMODE BIT(31) @@ -97,8 +105,13 @@ #define SUN6I_DPHY_ANA3_EN_LDOR BIT(18) #define SUN6I_DPHY_ANA4_REG 0x5c +#define SUN6I_DPHY_ANA4_REG_EN_MIPI BIT(31) +#define SUN6I_DPHY_ANA4_REG_EN_COMTEST BIT(30) +#define SUN6I_DPHY_ANA4_REG_COMTEST(n) (((n) & 3) << 28) +#define SUN6I_DPHY_ANA4_REG_IB(n) (((n) & 3) << 25) #define SUN6I_DPHY_ANA4_REG_DMPLVC BIT(24) #define SUN6I_DPHY_ANA4_REG_DMPLVD(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA4_REG_VTT_SET(n) (((n) & 0x7) << 17) #define SUN6I_DPHY_ANA4_REG_CKDV(n) (((n) & 0x1f) << 12) #define SUN6I_DPHY_ANA4_REG_TMSC(n) (((n) & 3) << 10) #define SUN6I_DPHY_ANA4_REG_TMSD(n) (((n) & 3) << 8) @@ -109,6 +122,56 @@ #define SUN6I_DPHY_DBG5_REG 0xf4 +#define SUN50I_DPHY_TX_SLEW_REG0 0xf8 +#define SUN50I_DPHY_TX_SLEW_REG1 0xfc +#define SUN50I_DPHY_TX_SLEW_REG2 0x100 + +#define SUN50I_DPHY_PLL_REG0 0x104 +#define SUN50I_DPHY_PLL_REG0_CP36_EN BIT(23) +#define SUN50I_DPHY_PLL_REG0_LDO_EN BIT(22) +#define SUN50I_DPHY_PLL_REG0_EN_LVS BIT(21) +#define SUN50I_DPHY_PLL_REG0_PLL_EN BIT(20) +#define SUN50I_DPHY_PLL_REG0_P(n) (((n) & 0xf) << 16) +#define SUN50I_DPHY_PLL_REG0_N(n) (((n) & 0xff) << 8) +#define SUN50I_DPHY_PLL_REG0_NDET BIT(7) +#define SUN50I_DPHY_PLL_REG0_TDIV BIT(6) +#define SUN50I_DPHY_PLL_REG0_M0(n) (((n) & 3) << 4) +#define SUN50I_DPHY_PLL_REG0_M1(n) ((n) & 0xf) + +#define SUN50I_DPHY_PLL_REG1 0x108 +#define SUN50I_DPHY_PLL_REG1_UNLOCK_MDSEL(n) (((n) & 3) << 14) +#define SUN50I_DPHY_PLL_REG1_LOCKMDSEL BIT(13) +#define SUN50I_DPHY_PLL_REG1_LOCKDET_EN BIT(12) +#define SUN50I_DPHY_PLL_REG1_VSETA(n) (((n) & 0x7) << 9) +#define SUN50I_DPHY_PLL_REG1_VSETD(n) (((n) & 0x7) << 6) +#define SUN50I_DPHY_PLL_REG1_LPF_SW BIT(5) +#define SUN50I_DPHY_PLL_REG1_ICP_SEL(n) (((n) & 3) << 3) +#define SUN50I_DPHY_PLL_REG1_ATEST_SEL(n) (((n) & 3) << 1) +#define SUN50I_DPHY_PLL_REG1_TEST_EN BIT(0) + +#define SUN50I_DPHY_PLL_REG2 0x10c +#define SUN50I_DPHY_PLL_REG2_SDM_EN BIT(31) +#define SUN50I_DPHY_PLL_REG2_FF_EN BIT(30) +#define SUN50I_DPHY_PLL_REG2_SS_EN BIT(29) +#define SUN50I_DPHY_PLL_REG2_SS_FRAC(n) (((n) & 0x1ff) << 20) +#define SUN50I_DPHY_PLL_REG2_SS_INT(n) (((n) & 0xff) << 12) +#define SUN50I_DPHY_PLL_REG2_FRAC(n) ((n) & 0xfff) + +#define SUN50I_COMBO_PHY_REG0 0x110 +#define SUN50I_COMBO_PHY_REG0_EN_TEST_COMBOLDO BIT(5) +#define SUN50I_COMBO_PHY_REG0_EN_TEST_0P8 BIT(4) +#define SUN50I_COMBO_PHY_REG0_EN_MIPI BIT(3) +#define SUN50I_COMBO_PHY_REG0_EN_LVDS BIT(2) +#define SUN50I_COMBO_PHY_REG0_EN_COMBOLDO BIT(1) +#define SUN50I_COMBO_PHY_REG0_EN_CP BIT(0) + +#define SUN50I_COMBO_PHY_REG1 0x114 +#define SUN50I_COMBO_PHY_REG2_REG_VREF1P6(n) (((n) & 0x7) << 4) +#define SUN50I_COMBO_PHY_REG2_REG_VREF0P8(n) ((n) & 0x7) + +#define SUN50I_COMBO_PHY_REG2 0x118 +#define SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(n) ((n) & 0xff) + enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_TX, SUN6I_DPHY_DIRECTION_RX, @@ -196,6 +259,76 @@ static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) udelay(1); } +static void sun50i_a100_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) +{ + unsigned long mipi_symbol_rate = dphy->config.hs_clk_rate; + unsigned int div, n; + + regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_IB(2) | + SUN6I_DPHY_ANA4_REG_DMPLVD(4) | + SUN6I_DPHY_ANA4_REG_VTT_SET(3) | + SUN6I_DPHY_ANA4_REG_CKDV(3) | + SUN6I_DPHY_ANA4_REG_TMSD(1) | + SUN6I_DPHY_ANA4_REG_TMSC(1) | + SUN6I_DPHY_ANA4_REG_TXPUSD(2) | + SUN6I_DPHY_ANA4_REG_TXPUSC(3) | + SUN6I_DPHY_ANA4_REG_TXDNSD(2) | + SUN6I_DPHY_ANA4_REG_TXDNSC(3)); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_CK_CPU, + SUN6I_DPHY_ANA2_EN_CK_CPU); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_REG_ENIB, + SUN6I_DPHY_ANA2_REG_ENIB); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_LDOR | + SUN6I_DPHY_ANA3_EN_LDOC | + SUN6I_DPHY_ANA3_EN_LDOD); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, + SUN6I_DPHY_ANA0_REG_PLR(4) | + SUN6I_DPHY_ANA0_REG_SFB(1)); + + regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG0, + SUN50I_COMBO_PHY_REG0_EN_CP); + + /* Choose a divider to limit the VCO frequency to around 2 GHz. */ + div = 16 >> order_base_2(DIV_ROUND_UP(mipi_symbol_rate, 264000000)); + n = mipi_symbol_rate * div / 24000000; + + regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG0, + SUN50I_DPHY_PLL_REG0_CP36_EN | + SUN50I_DPHY_PLL_REG0_LDO_EN | + SUN50I_DPHY_PLL_REG0_EN_LVS | + SUN50I_DPHY_PLL_REG0_PLL_EN | + SUN50I_DPHY_PLL_REG0_NDET | + SUN50I_DPHY_PLL_REG0_P((div - 1) % 8) | + SUN50I_DPHY_PLL_REG0_N(n) | + SUN50I_DPHY_PLL_REG0_M0((div - 1) / 8) | + SUN50I_DPHY_PLL_REG0_M1(2)); + + /* Disable sigma-delta modulation. */ + regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG2, 0); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_EN_MIPI, + SUN6I_DPHY_ANA4_REG_EN_MIPI); + + regmap_update_bits(dphy->regs, SUN50I_COMBO_PHY_REG0, + SUN50I_COMBO_PHY_REG0_EN_MIPI | + SUN50I_COMBO_PHY_REG0_EN_COMBOLDO, + SUN50I_COMBO_PHY_REG0_EN_MIPI | + SUN50I_COMBO_PHY_REG0_EN_COMBOLDO); + + regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG2, + SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(20)); + udelay(1); +} + static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) { u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); @@ -408,7 +541,7 @@ static const struct regmap_config sun6i_dphy_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = SUN6I_DPHY_DBG5_REG, + .max_register = SUN50I_COMBO_PHY_REG2, .name = "mipi-dphy", }; @@ -484,11 +617,19 @@ static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { .rx_supported = true, }; +static const struct sun6i_dphy_variant sun50i_a100_mipi_dphy_variant = { + .tx_power_on = sun50i_a100_mipi_dphy_tx_power_on, +}; + static const struct of_device_id sun6i_dphy_of_table[] = { { .compatible = "allwinner,sun6i-a31-mipi-dphy", .data = &sun6i_a31_mipi_dphy_variant, }, + { + .compatible = "allwinner,sun50i-a100-mipi-dphy", + .data = &sun50i_a100_mipi_dphy_variant, + }, { } }; MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table); From b53e19799d45edf7ca7cfd0cf5d6fb2d5179edec Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Thu, 17 Nov 2022 14:11:43 +0800 Subject: [PATCH 2727/4122] phy: use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Minghao Chi Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/202211171411439883050@zte.com.cn Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mmp3-hsic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mmp3-hsic.c b/drivers/phy/marvell/phy-mmp3-hsic.c index 7cccf01848d8..f2537fdcc3ab 100644 --- a/drivers/phy/marvell/phy-mmp3-hsic.c +++ b/drivers/phy/marvell/phy-mmp3-hsic.c @@ -41,12 +41,10 @@ static int mmp3_hsic_phy_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct phy_provider *provider; - struct resource *resource; void __iomem *base; struct phy *phy; - resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(dev, resource); + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(base)) return PTR_ERR(base); From f12faa3be8e84fa9232a4654bccb30f46bbfee5c Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 22 Nov 2022 19:24:12 -0800 Subject: [PATCH 2728/4122] dt-bindings: phy-j721e-wiz: add j721s2 compatible string Add ti,j721s2-wiz-10g compatible string to binding documentation. Signed-off-by: Matt Ranostay Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221123032413.1193961-2-mranostay@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index a9e38739c010..c54b36c104ab 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -15,6 +15,7 @@ properties: enum: - ti,j721e-wiz-16g - ti,j721e-wiz-10g + - ti,j721s2-wiz-10g - ti,am64-wiz-10g - ti,j7200-wiz-10g - ti,j784s4-wiz-10g From bea3ce759b4664f20f1f57c53fe018c3b67da147 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 22 Nov 2022 19:24:13 -0800 Subject: [PATCH 2729/4122] phy: ti: phy-j721e-wiz: add j721s2-wiz-10g module support Add support for j721s2-wiz-10g device which is similar to j721e-wiz-10g but uses clock-names interface versus explicitly defining clock nodes within device tree node. Signed-off-by: Matt Ranostay Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221123032413.1193961-3-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-j721e-wiz.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 141b51af4427..ddce5ef7711c 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -322,6 +322,7 @@ enum wiz_type { AM64_WIZ_10G, J7200_WIZ_10G, /* J7200 SR2.0 */ J784S4_WIZ_10G, + J721S2_WIZ_10G, }; struct wiz_data { @@ -1000,6 +1001,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node) case AM64_WIZ_10G: case J7200_WIZ_10G: case J784S4_WIZ_10G: + case J721S2_WIZ_10G: of_clk_del_provider(dev->of_node); return; default: @@ -1132,6 +1134,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node) case AM64_WIZ_10G: case J7200_WIZ_10G: case J784S4_WIZ_10G: + case J721S2_WIZ_10G: ret = wiz_clock_register(wiz); if (ret) dev_err(dev, "Failed to register wiz clocks\n"); @@ -1214,6 +1217,7 @@ static int wiz_phy_fullrt_div(struct wiz *wiz, int lane) break; case J721E_WIZ_10G: case J7200_WIZ_10G: + case J721S2_WIZ_10G: if (wiz->lane_phy_type[lane] == PHY_TYPE_SGMII) return regmap_field_write(wiz->p0_fullrt_div[lane], 0x2); break; @@ -1318,6 +1322,15 @@ static struct wiz_data j784s4_10g_data = { .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, }; +static struct wiz_data j721s2_10g_data = { + .type = J721S2_WIZ_10G, + .pll0_refclk_mux_sel = &pll0_refclk_mux_sel, + .pll1_refclk_mux_sel = &pll1_refclk_mux_sel, + .refclk_dig_sel = &refclk_dig_sel_10g, + .clk_mux_sel = clk_mux_sel_10g, + .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, +}; + static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data, @@ -1334,6 +1347,9 @@ static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data, }, + { + .compatible = "ti,j721s2-wiz-10g", .data = &j721s2_10g_data, + }, {} }; MODULE_DEVICE_TABLE(of, wiz_id_table); From b4d46c57d2fb0fa2611fa2ffbaf715925989f83f Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Thu, 24 Nov 2022 19:49:33 +0800 Subject: [PATCH 2730/4122] RDMA/erdma: Fix a typo in annotation A non-ASCII character was wrongly put in a comment, use the ACSII version. Fixes: bee85e0e31ec ("RDMA/erdma: Add main include file") Link: https://lore.kernel.org/r/20221124114933.77250-1-chengyou@linux.alibaba.com Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index bb23d897c710..35726f25a989 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -219,7 +219,7 @@ struct erdma_dev { DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); /* * We provide max 496 uContexts that each has one SQ normal Db, - * and one directWQE db。 + * and one directWQE db. */ DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); From 35765dccaf3485575a4420da529c72484c980345 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Wed, 16 Nov 2022 10:31:05 +0800 Subject: [PATCH 2731/4122] RDMA/erdma: Add a workqueue for WRs reflushing ERDMA driver use a workqueue for asynchronous reflush command posting. Implement the lifecycle of this workqueue. Link: https://lore.kernel.org/r/20221116023107.82835-2-chengyou@linux.alibaba.com Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma.h | 1 + drivers/infiniband/hw/erdma/erdma_main.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 35726f25a989..3d8c11aa23a2 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -190,6 +190,7 @@ struct erdma_dev { struct net_device *netdev; struct pci_dev *pdev; struct notifier_block netdev_nb; + struct workqueue_struct *reflush_wq; resource_size_t func_bar_addr; resource_size_t func_bar_len; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index e44b06fea595..5dc31e5df5cb 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -521,13 +521,22 @@ static int erdma_ib_device_add(struct pci_dev *pdev) u64_to_ether_addr(mac, dev->attrs.peer_addr); + dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!dev->reflush_wq) { + ret = -ENOMEM; + goto err_alloc_workqueue; + } + ret = erdma_device_register(dev); if (ret) - goto err_out; + goto err_register; return 0; -err_out: +err_register: + destroy_workqueue(dev->reflush_wq); +err_alloc_workqueue: xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); @@ -543,6 +552,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) unregister_netdevice_notifier(&dev->netdev_nb); ib_unregister_device(&dev->ibdev); + destroy_workqueue(dev->reflush_wq); erdma_res_cb_free(dev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); From 54d8fffc2a500953ba90ff9462ae06bb05ca2354 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Wed, 16 Nov 2022 10:31:06 +0800 Subject: [PATCH 2732/4122] RDMA/erdma: Implement the lifecycle of reflushing work for each QP Each QP has a work for reflushing purpose. In the work, driver will report the latest pi to hardware. Link: https://lore.kernel.org/r/20221116023107.82835-3-chengyou@linux.alibaba.com Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma_hw.h | 8 ++++++++ drivers/infiniband/hw/erdma/erdma_verbs.c | 18 ++++++++++++++++++ drivers/infiniband/hw/erdma/erdma_verbs.h | 2 ++ 3 files changed, 28 insertions(+) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 1b2e2b70678f..ab371fec610c 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -145,6 +145,7 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_MODIFY_QP = 3, CMDQ_OPCODE_CREATE_CQ = 4, CMDQ_OPCODE_DESTROY_CQ = 5, + CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, CMDQ_OPCODE_DEREG_MR = 9 }; @@ -301,6 +302,13 @@ struct erdma_cmdq_destroy_qp_req { u32 qpn; }; +struct erdma_cmdq_reflush_req { + u64 hdr; + u32 qpn; + u32 sq_pi; + u32 rq_pi; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index d843ce1f35f3..5dab1e87975b 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -379,6 +379,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } +static void erdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct erdma_qp *qp = + container_of(dwork, struct erdma_qp, reflush_dwork); + struct erdma_cmdq_reflush_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_REFLUSH); + req.qpn = QP_ID(qp); + req.sq_pi = qp->kern_qp.sq_pi; + req.rq_pi = qp->kern_qp.rq_pi; + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); +} + static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -735,6 +750,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; qp->attrs.state = ERDMA_QP_STATE_IDLE; + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); ret = create_qp_cmd(dev, qp); if (ret) @@ -1028,6 +1044,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); up_write(&qp->state_lock); + cancel_delayed_work_sync(&qp->reflush_dwork); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index a5574f0252bb..9f341d032069 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -197,6 +197,8 @@ struct erdma_qp { struct erdma_cep *cep; struct rw_semaphore state_lock; + struct delayed_work reflush_dwork; + union { struct erdma_kqp kern_qp; struct erdma_uqp user_qp; From 0edf42cbcc8690ef349d4432fea74d7791e3c645 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Wed, 16 Nov 2022 10:31:07 +0800 Subject: [PATCH 2733/4122] RDMA/erdma: Notify the latest PI to FW for reflushing when necessary Firmware is responsible for flushing WRs in HW, and it's a little difficult for firmware to get the latest PI of QPs, especially for RQs after QP state being changed to ERROR. So we introduce a new CMDQ command, by which driver can notify to latest PI to FW, and then FW can flush all posted WRs. Link: https://lore.kernel.org/r/20221116023107.82835-4-chengyou@linux.alibaba.com Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma_qp.c | 30 ++++++++++++++++------- drivers/infiniband/hw/erdma/erdma_verbs.h | 5 ++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 521e97258de7..d088d6bef431 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { + bool need_reflush = false; int drop_conn, ret = 0; if (!mask) @@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); } else if (attrs->state == ERDMA_QP_STATE_ERROR) { qp->attrs.state = ERDMA_QP_STATE_ERROR; + need_reflush = true; if (qp->cep) { erdma_cep_put(qp->cep); qp->cep = NULL; @@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, case ERDMA_QP_STATE_RTS: drop_conn = 0; - if (attrs->state == ERDMA_QP_STATE_CLOSING) { + if (attrs->state == ERDMA_QP_STATE_CLOSING || + attrs->state == ERDMA_QP_STATE_TERMINATE || + attrs->state == ERDMA_QP_STATE_ERROR) { ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); drop_conn = 1; - } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) { - qp->attrs.state = ERDMA_QP_STATE_TERMINATE; - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - drop_conn = 1; - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - qp->attrs.state = ERDMA_QP_STATE_ERROR; - drop_conn = 1; + need_reflush = true; } if (drop_conn) @@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, break; } + if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + return ret; } @@ -527,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, } spin_unlock_irqrestore(&qp->lock, flags); + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + return ret; } @@ -580,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, } spin_unlock_irqrestore(&qp->lock, flags); + + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 9f341d032069..e0a993bc032a 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -173,6 +173,10 @@ enum erdma_qp_attr_mask { ERDMA_QP_ATTR_MPA = (1 << 7) }; +enum erdma_qp_flags { + ERDMA_QP_IN_FLUSHING = (1 << 0), +}; + struct erdma_qp_attrs { enum erdma_qp_state state; enum erdma_cc_alg cc; /* Congestion control algorithm */ @@ -197,6 +201,7 @@ struct erdma_qp { struct erdma_cep *cep; struct rw_semaphore state_lock; + unsigned long flags; struct delayed_work reflush_dwork; union { From 3a042fda8813dcb1012347665a69572adbaef1f1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 24 Nov 2022 09:49:27 +0100 Subject: [PATCH 2734/4122] iio: imu: st_lsm6dsx: add support to ISM330IS Add support to STM ISM330IS (accelerometer and gyroscope) Mems sensor. The ISM330IS sensor can use LSM6DSO16IS as fallback device since it implements all the ISM330IS features currently implemented in st_lsm6dsx. Datasheet: https://www.st.com/resource/en/datasheet/ism330is.pdf Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/4cd1614060e06f49cd92f5930d8fd40117c07920.1669279604.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Kconfig | 4 ++-- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 2 ++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 8 ++++++-- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 5 +++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c | 5 +++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig index 0096035728cd..f6660847fb58 100644 --- a/drivers/iio/imu/st_lsm6dsx/Kconfig +++ b/drivers/iio/imu/st_lsm6dsx/Kconfig @@ -13,8 +13,8 @@ config IIO_ST_LSM6DSX sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm, ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr, lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx, - lsm6dsv, lsm6dsv16x, lsm6dso16is, the accelerometer/gyroscope - of lsm9ds1 and lsm6dst. + lsm6dsv, lsm6dsv16x, lsm6dso16is, ism330is, lsm6dst and the + accelerometer/gyroscope of lsm9ds1. To compile this driver as a module, choose M here: the module will be called st_lsm6dsx. diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index abf14a2ce0e9..5b6f195748fc 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -36,6 +36,7 @@ #define ST_LSM6DSV_DEV_NAME "lsm6dsv" #define ST_LSM6DSV16X_DEV_NAME "lsm6dsv16x" #define ST_LSM6DSO16IS_DEV_NAME "lsm6dso16is" +#define ST_ISM330IS_DEV_NAME "ism330is" enum st_lsm6dsx_hw_id { ST_LSM6DS3_ID, @@ -59,6 +60,7 @@ enum st_lsm6dsx_hw_id { ST_LSM6DSV_ID, ST_LSM6DSV16X_ID, ST_LSM6DSO16IS_ID, + ST_ISM330IS_ID, ST_LSM6DSX_MAX_ID, }; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 57a79bf35bba..3f6060c64f32 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -27,7 +27,7 @@ * - FIFO size: 4KB * * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/ - * LSM6DSTX/LSM6DSO16IS: + * LSM6DSTX/LSM6DSO16IS/ISM330IS: * - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416, * 833 * - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16 @@ -1393,7 +1393,11 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .hw_id = ST_LSM6DSO16IS_ID, .name = ST_LSM6DSO16IS_DEV_NAME, .wai = 0x22, - }, + }, { + .hw_id = ST_ISM330IS_ID, + .name = ST_ISM330IS_DEV_NAME, + .wai = 0x22, + } }, .channels = { [ST_LSM6DSX_ID_ACC] = { diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c index c34ccc85e4c7..df5f60925260 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c @@ -121,6 +121,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = { .compatible = "st,lsm6dso16is", .data = (void *)ST_LSM6DSO16IS_ID, }, + { + .compatible = "st,ism330is", + .data = (void *)ST_ISM330IS_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match); @@ -147,6 +151,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = { { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, { ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID }, + { ST_ISM330IS_DEV_NAME, ST_ISM330IS_ID }, {}, }; MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c index 24d5e51a8662..974584bda875 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c @@ -121,6 +121,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = { .compatible = "st,lsm6dso16is", .data = (void *)ST_LSM6DSO16IS_ID, }, + { + .compatible = "st,ism330is", + .data = (void *)ST_ISM330IS_ID, + }, {}, }; MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match); @@ -147,6 +151,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = { { ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID }, { ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID }, { ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID }, + { ST_ISM330IS_DEV_NAME, ST_ISM330IS_ID }, {}, }; MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table); From 91f46207056d6af2052495f911901a5ff337e364 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 24 Nov 2022 09:49:28 +0100 Subject: [PATCH 2735/4122] dt-bindings: iio: imu: st_lsm6dsx: add ism330is Add device bindings for ism330is IMU sensor. Use lsm6dso16is as fallback device for ism330is since it implements all the features currently supported by ism330is. Signed-off-by: Lorenzo Bianconi Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/a7a8a00037952928364269615ee8b6da4547795b.1669279604.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml index 07d5aee7e442..68b481c63318 100644 --- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml +++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml @@ -43,6 +43,9 @@ properties: - items: - const: st,lsm6dsv16x - const: st,lsm6dsv + - items: + - const: st,ism330is + - const: st,lsm6dso16is reg: maxItems: 1 From f1e24342401a69da0519b289f0942fcd69e7db55 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 24 Nov 2022 17:25:59 +0200 Subject: [PATCH 2736/4122] iio: addac: ad74115: remove unused ad74115_dac_slew_rate_hz_tbl This table was supposed to be used for handling the DAC rate, but it ended up being unused. Remove it. Signed-off-by: Cosmin Tanislav Reported-by: kernel test robot Link: https://lore.kernel.org/r/20221124152559.7895-1-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74115.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c index 383b92e7b682..e6bc5eb3788d 100644 --- a/drivers/iio/addac/ad74115.c +++ b/drivers/iio/addac/ad74115.c @@ -317,10 +317,6 @@ static const unsigned int ad74115_dac_rate_step_tbl[][3] = { { AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_240KHZ }, }; -static const unsigned int ad74115_dac_slew_rate_hz_tbl[] = { - 4000, 64000, 150000, 240000 -}; - static const unsigned int ad74115_rtd_excitation_current_ua_tbl[] = { 250, 500, 750, 1000 }; From f84eec02b7248b6ae3bd8ef18a66f6f64eeab971 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 24 Nov 2022 17:30:49 +0200 Subject: [PATCH 2737/4122] iio: addac: ad74413r: fix blank line after declaration warning Checkpatch wants a blank line after all declarations. Add it now, even though the patch has already been submitted. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20221124153049.8851-1-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 61030053cbea..f32c8c2fb26d 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -1315,6 +1315,7 @@ static int ad74413r_probe(struct spi_device *spi) st->chip_info = device_get_match_data(&spi->dev); if (!st->chip_info) { const struct spi_device_id *id = spi_get_device_id(spi); + if (id) st->chip_info = (struct ad74413r_chip_info *)id->driver_data; From 955bee204f3dd307642c101b75e370662987e735 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 21 Nov 2022 09:18:31 +0800 Subject: [PATCH 2738/4122] power: supply: z2_battery: Fix possible memleak in z2_batt_probe() If devm_gpiod_get_optional() returns error, the charger should be freed before z2_batt_probe returns according to the context. We fix it by just gotoing to 'err' branch. Fixes: a3b4388ea19b ("power: supply: z2_battery: Convert to GPIO descriptors") Signed-off-by: Zhang Qilong Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/z2_battery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c index eb01b01bf593..0ba4a590a0a5 100644 --- a/drivers/power/supply/z2_battery.c +++ b/drivers/power/supply/z2_battery.c @@ -205,10 +205,12 @@ static int z2_batt_probe(struct i2c_client *client) charger->charge_gpiod = devm_gpiod_get_optional(&client->dev, NULL, GPIOD_IN); - if (IS_ERR(charger->charge_gpiod)) - return dev_err_probe(&client->dev, + if (IS_ERR(charger->charge_gpiod)) { + ret = dev_err_probe(&client->dev, PTR_ERR(charger->charge_gpiod), "failed to get charge GPIO\n"); + goto err; + } if (charger->charge_gpiod) { gpiod_set_consumer_name(charger->charge_gpiod, "BATT CHRG"); From 97f2b4ddb0aa700d673691a7d5e44d226d22bab7 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 10:45:58 +0800 Subject: [PATCH 2739/4122] power: supply: cw2015: Fix potential null-ptr-deref in cw_bat_probe() cw_bat_probe() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: cw_bat_probe() create_singlethread_workqueue() # failed, cw_bat->wq is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. Fixes: b4c7715c10c1 ("power: supply: add CellWise cw2015 fuel gauge driver") Signed-off-by: Shang XiaoJing Signed-off-by: Sebastian Reichel --- drivers/power/supply/cw2015_battery.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c index 6d52641151d9..473522b4326a 100644 --- a/drivers/power/supply/cw2015_battery.c +++ b/drivers/power/supply/cw2015_battery.c @@ -699,6 +699,9 @@ static int cw_bat_probe(struct i2c_client *client) } cw_bat->battery_workqueue = create_singlethread_workqueue("rk_battery"); + if (!cw_bat->battery_workqueue) + return -ENOMEM; + devm_delayed_work_autocancel(&client->dev, &cw_bat->battery_delay_work, cw_bat_work); queue_delayed_work(cw_bat->battery_workqueue, From 3ffa9f713c39a213a08d9ff13ab983a8aa5d8b5d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 24 Nov 2022 11:33:32 +0000 Subject: [PATCH 2740/4122] HSI: omap_ssi_core: Fix error handling in ssi_init() The ssi_init() returns the platform_driver_register() directly without checking its return value, if platform_driver_register() failed, the ssi_pdriver is not unregistered. Fix by unregister ssi_pdriver when the last platform_driver_register() failed. Fixes: 0fae198988b8 ("HSI: omap_ssi: built omap_ssi and omap_ssi_port into one module") Signed-off-by: Yuan Can Signed-off-by: Sebastian Reichel --- drivers/hsi/controllers/omap_ssi_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index 052cf3e92dd6..26f2c3c01297 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -631,7 +631,13 @@ static int __init ssi_init(void) { if (ret) return ret; - return platform_driver_register(&ssi_port_pdriver); + ret = platform_driver_register(&ssi_port_pdriver); + if (ret) { + platform_driver_unregister(&ssi_pdriver); + return ret; + } + + return 0; } module_init(ssi_init); From f0052d7a1edb3d8921b4e154aa8c46c4845b3714 Mon Sep 17 00:00:00 2001 From: Duke Xin Date: Sat, 19 Nov 2022 17:44:47 +0800 Subject: [PATCH 2741/4122] USB: serial: option: add Quectel EM05-G modem The EM05-G modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0311 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0311 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c3b7f1d98e78..dee79c7d82d5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b +#define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1160,6 +1161,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, From a7008584ab19d2df05caa95634cd72bc41f4cad3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:33 -0800 Subject: [PATCH 2742/4122] crypto: api - optimize algorithm registration when self-tests disabled Currently, registering an algorithm with the crypto API always causes a notification to be posted to the "cryptomgr", which then creates a kthread to self-test the algorithm. However, if self-tests are disabled in the kconfig (as is the default option), then this kthread just notifies waiters that the algorithm has been tested, then exits. This causes a significant amount of overhead, especially in the kthread creation and destruction, which is not necessary at all. For example, in a quick test I found that booting a "minimum" x86_64 kernel with all the crypto options enabled (except for the self-tests) takes about 400ms until PID 1 can start. Of that, a full 13ms is spent just doing this pointless dance, involving a kthread being created, run, and destroyed over 200 times. That's over 3% of the entire kernel start time. Fix this by just skipping the creation of the test larval and the posting of the registration notification entirely, when self-tests are disabled. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 156 +++++++++++++++++++++++++++--------------------- crypto/api.c | 3 - 2 files changed, 87 insertions(+), 72 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 5c69ff8e8fa5..950195e90bfc 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -222,12 +222,64 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, } EXPORT_SYMBOL_GPL(crypto_remove_spawns); +static void crypto_alg_finish_registration(struct crypto_alg *alg, + bool fulfill_requests, + struct list_head *algs_to_put) +{ + struct crypto_alg *q; + + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (q == alg) + continue; + + if (crypto_is_moribund(q)) + continue; + + if (crypto_is_larval(q)) { + struct crypto_larval *larval = (void *)q; + + /* + * Check to see if either our generic name or + * specific name can satisfy the name requested + * by the larval entry q. + */ + if (strcmp(alg->cra_name, q->cra_name) && + strcmp(alg->cra_driver_name, q->cra_name)) + continue; + + if (larval->adult) + continue; + if ((q->cra_flags ^ alg->cra_flags) & larval->mask) + continue; + + if (fulfill_requests && crypto_mod_get(alg)) + larval->adult = alg; + else + larval->adult = ERR_PTR(-EAGAIN); + + continue; + } + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (strcmp(alg->cra_driver_name, q->cra_driver_name) && + q->cra_priority > alg->cra_priority) + continue; + + crypto_remove_spawns(q, algs_to_put, alg); + } + + crypto_notify(CRYPTO_MSG_ALG_LOADED, alg); +} + static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) { struct crypto_larval *larval; - if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER)) - return NULL; + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) || + IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return NULL; /* No self-test needed */ larval = crypto_larval_alloc(alg->cra_name, alg->cra_flags | CRYPTO_ALG_TESTED, 0); @@ -248,7 +300,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) return larval; } -static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) +static struct crypto_larval * +__crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put) { struct crypto_alg *q; struct crypto_larval *larval; @@ -259,9 +312,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) INIT_LIST_HEAD(&alg->cra_users); - /* No cheating! */ - alg->cra_flags &= ~CRYPTO_ALG_TESTED; - ret = -EEXIST; list_for_each_entry(q, &crypto_alg_list, cra_list) { @@ -288,13 +338,18 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&alg->cra_list, &crypto_alg_list); - if (larval) - list_add(&larval->alg.cra_list, &crypto_alg_list); - else - alg->cra_flags |= CRYPTO_ALG_TESTED; - crypto_stats_init(alg); + if (larval) { + /* No cheating! */ + alg->cra_flags &= ~CRYPTO_ALG_TESTED; + + list_add(&larval->alg.cra_list, &crypto_alg_list); + } else { + alg->cra_flags |= CRYPTO_ALG_TESTED; + crypto_alg_finish_registration(alg, true, algs_to_put); + } + out: return larval; @@ -341,7 +396,10 @@ found: alg->cra_flags |= CRYPTO_ALG_TESTED; - /* Only satisfy larval waiters if we are the best. */ + /* + * If a higher-priority implementation of the same algorithm is + * currently being tested, then don't fulfill request larvals. + */ best = true; list_for_each_entry(q, &crypto_alg_list, cra_list) { if (crypto_is_moribund(q) || !crypto_is_larval(q)) @@ -356,47 +414,7 @@ found: } } - list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (q == alg) - continue; - - if (crypto_is_moribund(q)) - continue; - - if (crypto_is_larval(q)) { - struct crypto_larval *larval = (void *)q; - - /* - * Check to see if either our generic name or - * specific name can satisfy the name requested - * by the larval entry q. - */ - if (strcmp(alg->cra_name, q->cra_name) && - strcmp(alg->cra_driver_name, q->cra_name)) - continue; - - if (larval->adult) - continue; - if ((q->cra_flags ^ alg->cra_flags) & larval->mask) - continue; - - if (best && crypto_mod_get(alg)) - larval->adult = alg; - else - larval->adult = ERR_PTR(-EAGAIN); - - continue; - } - - if (strcmp(alg->cra_name, q->cra_name)) - continue; - - if (strcmp(alg->cra_driver_name, q->cra_driver_name) && - q->cra_priority > alg->cra_priority) - continue; - - crypto_remove_spawns(q, &list, alg); - } + crypto_alg_finish_registration(alg, best, &list); complete: complete_all(&test->completion); @@ -423,7 +441,8 @@ EXPORT_SYMBOL_GPL(crypto_remove_final); int crypto_register_alg(struct crypto_alg *alg) { struct crypto_larval *larval; - bool test_started; + LIST_HEAD(algs_to_put); + bool test_started = false; int err; alg->cra_flags &= ~CRYPTO_ALG_DEAD; @@ -432,17 +451,18 @@ int crypto_register_alg(struct crypto_alg *alg) return err; down_write(&crypto_alg_sem); - larval = __crypto_register_alg(alg); - test_started = static_key_enabled(&crypto_boot_test_finished); - if (!IS_ERR_OR_NULL(larval)) + larval = __crypto_register_alg(alg, &algs_to_put); + if (!IS_ERR_OR_NULL(larval)) { + test_started = static_key_enabled(&crypto_boot_test_finished); larval->test_started = test_started; + } up_write(&crypto_alg_sem); - if (IS_ERR_OR_NULL(larval)) + if (IS_ERR(larval)) return PTR_ERR(larval); - if (test_started) crypto_wait_for_test(larval); + crypto_remove_final(&algs_to_put); return 0; } EXPORT_SYMBOL_GPL(crypto_register_alg); @@ -619,6 +639,7 @@ int crypto_register_instance(struct crypto_template *tmpl, struct crypto_larval *larval; struct crypto_spawn *spawn; u32 fips_internal = 0; + LIST_HEAD(algs_to_put); int err; err = crypto_check_alg(&inst->alg); @@ -650,7 +671,7 @@ int crypto_register_instance(struct crypto_template *tmpl, inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL); - larval = __crypto_register_alg(&inst->alg); + larval = __crypto_register_alg(&inst->alg, &algs_to_put); if (IS_ERR(larval)) goto unlock; else if (larval) @@ -662,15 +683,12 @@ int crypto_register_instance(struct crypto_template *tmpl, unlock: up_write(&crypto_alg_sem); - err = PTR_ERR(larval); - if (IS_ERR_OR_NULL(larval)) - goto err; - - crypto_wait_for_test(larval); - err = 0; - -err: - return err; + if (IS_ERR(larval)) + return PTR_ERR(larval); + if (larval) + crypto_wait_for_test(larval); + crypto_remove_final(&algs_to_put); + return 0; } EXPORT_SYMBOL_GPL(crypto_register_instance); diff --git a/crypto/api.c b/crypto/api.c index 64f2d365a8e9..52ce10a35366 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -172,9 +172,6 @@ void crypto_wait_for_test(struct crypto_larval *larval) err = wait_for_completion_killable(&larval->completion); WARN_ON(err); - if (!err) - crypto_notify(CRYPTO_MSG_ALG_LOADED, larval); - out: crypto_larval_kill(&larval->alg); } From 9cadd73adef1e1d53ea100f28e3e258698b92418 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:34 -0800 Subject: [PATCH 2743/4122] crypto: algboss - optimize registration of internal algorithms Since algboss always skips testing of algorithms with the CRYPTO_ALG_INTERNAL flag, there is no need to go through the dance of creating the test kthread, which creates a lot of overhead. Instead, we can just directly finish the algorithm registration, like is now done when self-tests are disabled entirely. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 3 ++- crypto/algboss.c | 13 +------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 950195e90bfc..851b247f043d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -278,7 +278,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) struct crypto_larval *larval; if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) || - IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) || + (alg->cra_flags & CRYPTO_ALG_INTERNAL)) return NULL; /* No self-test needed */ larval = crypto_larval_alloc(alg->cra_name, diff --git a/crypto/algboss.c b/crypto/algboss.c index eb5fe84efb83..13d37320a66e 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -181,12 +181,8 @@ static int cryptomgr_test(void *data) goto skiptest; #endif - if (type & CRYPTO_ALG_TESTED) - goto skiptest; - err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED); -skiptest: crypto_alg_tested(param->driver, err); kfree(param); @@ -197,7 +193,6 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) { struct task_struct *thread; struct crypto_test_param *param; - u32 type; if (!try_module_get(THIS_MODULE)) goto err; @@ -208,13 +203,7 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) memcpy(param->driver, alg->cra_driver_name, sizeof(param->driver)); memcpy(param->alg, alg->cra_name, sizeof(param->alg)); - type = alg->cra_flags; - - /* Do not test internal algorithms. */ - if (type & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_TESTED; - - param->type = type; + param->type = alg->cra_flags; thread = kthread_run(cryptomgr_test, param, "cryptomgr_test"); if (IS_ERR(thread)) From 06bd9c967eaac5484c31c3dc6dfbef6183819508 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:35 -0800 Subject: [PATCH 2744/4122] crypto: api - compile out crypto_boot_test_finished when tests disabled The crypto_boot_test_finished static key is unnecessary when self-tests are disabled in the kconfig, so optimize it out accordingly, along with the entirety of crypto_start_tests(). This mainly avoids the overhead of an unnecessary static_branch_enable() on every boot. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 7 +++++-- crypto/api.c | 8 +++++--- crypto/internal.h | 20 +++++++++++++++++++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 851b247f043d..d08f864f08be 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -454,7 +454,7 @@ int crypto_register_alg(struct crypto_alg *alg) down_write(&crypto_alg_sem); larval = __crypto_register_alg(alg, &algs_to_put); if (!IS_ERR_OR_NULL(larval)) { - test_started = static_key_enabled(&crypto_boot_test_finished); + test_started = crypto_boot_test_finished(); larval->test_started = test_started; } up_write(&crypto_alg_sem); @@ -1253,6 +1253,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt); static void __init crypto_start_tests(void) { + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return; + for (;;) { struct crypto_larval *larval = NULL; struct crypto_alg *q; @@ -1286,7 +1289,7 @@ static void __init crypto_start_tests(void) crypto_wait_for_test(larval); } - static_branch_enable(&crypto_boot_test_finished); + set_crypto_boot_test_finished(); } static int __init crypto_algapi_init(void) diff --git a/crypto/api.c b/crypto/api.c index 52ce10a35366..b022702f6436 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -31,8 +31,10 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); -DEFINE_STATIC_KEY_FALSE(crypto_boot_test_finished); -EXPORT_SYMBOL_GPL(crypto_boot_test_finished); +#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS +DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished); +EXPORT_SYMBOL_GPL(__crypto_boot_test_finished); +#endif static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); @@ -202,7 +204,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; long timeout; - if (!static_branch_likely(&crypto_boot_test_finished)) + if (!crypto_boot_test_finished()) crypto_start_test(larval); timeout = wait_for_completion_killable_timeout( diff --git a/crypto/internal.h b/crypto/internal.h index c08385571853..932f0aafddc3 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -47,7 +47,25 @@ extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; extern struct blocking_notifier_head crypto_chain; -DECLARE_STATIC_KEY_FALSE(crypto_boot_test_finished); +#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS +static inline bool crypto_boot_test_finished(void) +{ + return true; +} +static inline void set_crypto_boot_test_finished(void) +{ +} +#else +DECLARE_STATIC_KEY_FALSE(__crypto_boot_test_finished); +static inline bool crypto_boot_test_finished(void) +{ + return static_branch_likely(&__crypto_boot_test_finished); +} +static inline void set_crypto_boot_test_finished(void) +{ + static_branch_enable(&__crypto_boot_test_finished); +} +#endif /* !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */ #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); From 0bf365c0efdd8fc03cb82e381ea4d76196c66bc2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:36 -0800 Subject: [PATCH 2745/4122] crypto: kdf - skip self-test when tests disabled Make kdf_sp800108 honor the CONFIG_CRYPTO_MANAGER_DISABLE_TESTS kconfig option, so that it doesn't always waste time running its self-test. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/kdf_sp800108.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c index 58edf7797abf..c6e3ad82d5f7 100644 --- a/crypto/kdf_sp800108.c +++ b/crypto/kdf_sp800108.c @@ -125,9 +125,13 @@ static const struct kdf_testvec kdf_ctr_hmac_sha256_tv_template[] = { static int __init crypto_kdf108_init(void) { - int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)", - crypto_kdf108_setkey, crypto_kdf108_ctr_generate); + int ret; + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return 0; + + ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)", + crypto_kdf108_setkey, crypto_kdf108_ctr_generate); if (ret) { if (fips_enabled) panic("alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n", From 790c4c9f532318e3fe8c6f0b498072abc80e1195 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:37 -0800 Subject: [PATCH 2746/4122] crypto: kdf - silence noisy self-test Make the kdf_sp800108 self-test only print a message on success when fips_enabled, so that it's consistent with testmgr.c and doesn't spam the kernel log with a message that isn't really important. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/kdf_sp800108.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c index c6e3ad82d5f7..c3f9938e1ad2 100644 --- a/crypto/kdf_sp800108.c +++ b/crypto/kdf_sp800108.c @@ -140,7 +140,7 @@ static int __init crypto_kdf108_init(void) WARN(1, "alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n", ret); - } else { + } else if (fips_enabled) { pr_info("alg: self-tests for CTR-KDF (hmac(sha256)) passed\n"); } From 441cb1b730006bd2d636f72dc7f6e11a8a0ecce5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2022 16:12:38 -0800 Subject: [PATCH 2747/4122] crypto: algboss - compile out test-related code when tests disabled When CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is set, the code in algboss.c that handles CRYPTO_MSG_ALG_REGISTER is unnecessary, so make it be compiled out. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algboss.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crypto/algboss.c b/crypto/algboss.c index 13d37320a66e..0de1e6697949 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -175,11 +175,7 @@ static int cryptomgr_test(void *data) { struct crypto_test_param *param = data; u32 type = param->type; - int err = 0; - -#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS - goto skiptest; -#endif + int err; err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED); @@ -194,6 +190,9 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) struct task_struct *thread; struct crypto_test_param *param; + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return NOTIFY_DONE; + if (!try_module_get(THIS_MODULE)) goto err; From 1aa33fc8d4032227253ceb736f47c52b859d9683 Mon Sep 17 00:00:00 2001 From: Zhang Yiqun Date: Wed, 16 Nov 2022 17:24:11 +0800 Subject: [PATCH 2748/4122] crypto: tcrypt - Fix multibuffer skcipher speed test mem leak In the past, the data for mb-skcipher test has been allocated twice, that means the first allcated memory area is without free, which may cause a potential memory leakage. So this patch is to remove one allocation to fix this error. Fixes: e161c5930c15 ("crypto: tcrypt - add multibuf skcipher...") Signed-off-by: Zhang Yiqun Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 0f101897e90f..a0833654ce94 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1090,15 +1090,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, goto out_free_tfm; } - - for (i = 0; i < num_mb; ++i) - if (testmgr_alloc_buf(data[i].xbuf)) { - while (i--) - testmgr_free_buf(data[i].xbuf); - goto out_free_tfm; - } - - for (i = 0; i < num_mb; ++i) { data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!data[i].req) { From 34c3a47d20ae55b3600fed733bf96eafe9c500d5 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 16 Nov 2022 20:28:02 -0500 Subject: [PATCH 2749/4122] padata: Always leave BHs disabled when running ->parallel() A deadlock can happen when an overloaded system runs ->parallel() in the context of the current task: padata_do_parallel ->parallel() pcrypt_aead_enc/dec padata_do_serial spin_lock(&reorder->lock) // BHs still enabled ... __do_softirq ... padata_do_serial spin_lock(&reorder->lock) It's a bug for BHs to be on in _do_serial as Steffen points out, so ensure they're off in the "current task" case like they are in padata_parallel_worker to avoid this situation. Reported-by: syzbot+bc05445bc14148d51915@syzkaller.appspotmail.com Fixes: 4611ce224688 ("padata: allocate work structures for parallel jobs from a pool") Signed-off-by: Daniel Jordan Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index e5819bb8bd1d..97f51e0c1776 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -207,14 +207,16 @@ int padata_do_parallel(struct padata_shell *ps, pw = padata_work_alloc(); spin_unlock(&padata_works_lock); + if (!pw) { + /* Maximum works limit exceeded, run in the current task. */ + padata->parallel(padata); + } + rcu_read_unlock_bh(); if (pw) { padata_work_init(pw, padata_parallel_worker, padata, 0); queue_work(pinst->parallel_wq, &pw->pw_work); - } else { - /* Maximum works limit exceeded, run in the current task. */ - padata->parallel(padata); } return 0; From 57ddfecc72a6c9941d159543e1c0c0a74fe9afdd Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 16 Nov 2022 20:28:04 -0500 Subject: [PATCH 2750/4122] padata: Fix list iterator in padata_do_serial() list_for_each_entry_reverse() assumes that the iterated list is nonempty and that every list_head is embedded in the same type, but its use in padata_do_serial() breaks both rules. This doesn't cause any issues now because padata_priv and padata_list happen to have their list fields at the same offset, but we really shouldn't be relying on that. Fixes: bfde23ce200e ("padata: unbind parallel jobs from specific CPUs") Signed-off-by: Daniel Jordan Signed-off-by: Herbert Xu --- kernel/padata.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 97f51e0c1776..de90af5fcbe6 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -390,13 +390,16 @@ void padata_do_serial(struct padata_priv *padata) int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr); struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu); struct padata_priv *cur; + struct list_head *pos; spin_lock(&reorder->lock); /* Sort in ascending order of sequence number. */ - list_for_each_entry_reverse(cur, &reorder->list, list) + list_for_each_prev(pos, &reorder->list) { + cur = list_entry(pos, struct padata_priv, list); if (cur->seq_nr < padata->seq_nr) break; - list_add(&padata->list, &cur->list); + } + list_add(&padata->list, pos); spin_unlock(&reorder->lock); /* From 8bd9974b6bfcd1e14a001deeca051aed7295559a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:10 -0800 Subject: [PATCH 2751/4122] crypto: x86/aegis128 - fix possible crash with CFI enabled crypto_aegis128_aesni_enc(), crypto_aegis128_aesni_enc_tail(), crypto_aegis128_aesni_dec(), and crypto_aegis128_aesni_dec_tail() are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect calls). Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-asm.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index b48ddebb4748..cdf3215ec272 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -7,6 +7,7 @@ */ #include +#include #include #define STATE0 %xmm0 @@ -402,7 +403,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) * void crypto_aegis128_aesni_enc(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) FRAME_BEGIN cmp $0x10, LEN @@ -499,7 +500,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc) * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_BEGIN /* load the state: */ @@ -556,7 +557,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) * void crypto_aegis128_aesni_dec(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) FRAME_BEGIN cmp $0x10, LEN @@ -653,7 +654,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec) * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) FRAME_BEGIN /* load the state: */ From c67b553a4f4a8bd921e4c9ceae00e111be09c488 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:11 -0800 Subject: [PATCH 2752/4122] crypto: x86/aria - fix crash with CFI enabled aria_aesni_avx_encrypt_16way(), aria_aesni_avx_decrypt_16way(), aria_aesni_avx_ctr_crypt_16way(), aria_aesni_avx_gfni_encrypt_16way(), aria_aesni_avx_gfni_decrypt_16way(), and aria_aesni_avx_gfni_ctr_crypt_16way() are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure. Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Cc: Taehee Yoo Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aria-aesni-avx-asm_64.S | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index c75fd7d015ed..03ae4cd1d976 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -7,6 +7,7 @@ */ #include +#include #include /* struct aria_ctx: */ @@ -913,7 +914,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) RET; SYM_FUNC_END(__aria_aesni_avx_crypt_16way) -SYM_FUNC_START(aria_aesni_avx_encrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -938,7 +939,7 @@ SYM_FUNC_START(aria_aesni_avx_encrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_encrypt_16way) -SYM_FUNC_START(aria_aesni_avx_decrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1039,7 +1040,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way) RET; SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way) -SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) /* input: * %rdi: ctx * %rsi: dst @@ -1208,7 +1209,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) RET; SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1233,7 +1234,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1258,7 +1259,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) /* input: * %rdi: ctx * %rsi: dst From 0f8bc4bd48dd148046c19c38568cd9449c79b45f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:12 -0800 Subject: [PATCH 2753/4122] crypto: x86/nhpoly1305 - eliminate unnecessary CFI wrappers Since the CFI implementation now supports indirect calls to assembly functions, take advantage of that rather than use wrapper functions. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/nh-avx2-x86_64.S | 5 +++-- arch/x86/crypto/nh-sse2-x86_64.S | 5 +++-- arch/x86/crypto/nhpoly1305-avx2-glue.c | 11 ++--------- arch/x86/crypto/nhpoly1305-sse2-glue.c | 11 ++--------- 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index 6a0b15e7196a..ef73a3ab8726 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -8,6 +8,7 @@ */ #include +#include #define PASS0_SUMS %ymm0 #define PASS1_SUMS %ymm1 @@ -65,11 +66,11 @@ /* * void nh_avx2(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_avx2) +SYM_TYPED_FUNC_START(nh_avx2) vmovdqu 0x00(KEY), K0 vmovdqu 0x10(KEY), K1 diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S index 34c567bbcb4f..75fb994b6d17 100644 --- a/arch/x86/crypto/nh-sse2-x86_64.S +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -8,6 +8,7 @@ */ #include +#include #define PASS0_SUMS %xmm0 #define PASS1_SUMS %xmm1 @@ -67,11 +68,11 @@ /* * void nh_sse2(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_sse2) +SYM_TYPED_FUNC_START(nh_sse2) movdqu 0x00(KEY), K0 movdqu 0x10(KEY), K1 diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index 8ea5ab0f1ca7..46b036204ed9 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_avx2(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_avx2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); + crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2); kernel_fpu_end(); src += n; srclen -= n; diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index 2b353d42ed13..4a4970d75107 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_sse2(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); + crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2); kernel_fpu_end(); src += n; srclen -= n; From 32f34bf7e44eeaa241fb845d6f52af5104bc30fd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:13 -0800 Subject: [PATCH 2754/4122] crypto: x86/sha1 - fix possible crash with CFI enabled sha1_transform_ssse3(), sha1_transform_avx(), and sha1_ni_transform() (but not sha1_transform_avx2()) are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect calls). Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ni_asm.S | 3 ++- arch/x86/crypto/sha1_ssse3_asm.S | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 2f94ec0e763b..3cae5a1bb3d6 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -54,6 +54,7 @@ */ #include +#include #define DIGEST_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ @@ -93,7 +94,7 @@ */ .text .align 32 -SYM_FUNC_START(sha1_ni_transform) +SYM_TYPED_FUNC_START(sha1_ni_transform) push %rbp mov %rsp, %rbp sub $FRAME_SIZE, %rsp diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 263f916362e0..f54988c80eb4 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -25,6 +25,7 @@ */ #include +#include #define CTX %rdi // arg1 #define BUF %rsi // arg2 @@ -67,7 +68,7 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - SYM_FUNC_START(\name) + SYM_TYPED_FUNC_START(\name) push %rbx push %r12 From 19940ebbb59c12146d05c5f8acd873197b290648 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:14 -0800 Subject: [PATCH 2755/4122] crypto: x86/sha256 - fix possible crash with CFI enabled sha256_transform_ssse3(), sha256_transform_avx(), sha256_transform_rorx(), and sha256_ni_transform() are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect calls). Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-avx-asm.S | 3 ++- arch/x86/crypto/sha256-avx2-asm.S | 3 ++- arch/x86/crypto/sha256-ssse3-asm.S | 3 ++- arch/x86/crypto/sha256_ni_asm.S | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 3baa1ec39097..06ea30c20828 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -346,7 +347,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_avx) +SYM_TYPED_FUNC_START(sha256_transform_avx) .align 32 pushq %rbx pushq %r12 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9bcdbc47b8b4..2d2be531a11e 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -49,6 +49,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -523,7 +524,7 @@ STACK_SIZE = _CTX + _CTX_SIZE ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_rorx) +SYM_TYPED_FUNC_START(sha256_transform_rorx) .align 32 pushq %rbx pushq %r12 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c4a5db612c32..7db28839108d 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -47,6 +47,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define MOVDQ movdqu @@ -355,7 +356,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_ssse3) +SYM_TYPED_FUNC_START(sha256_transform_ssse3) .align 32 pushq %rbx pushq %r12 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 94d50dd27cb5..47f93937f798 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -54,6 +54,7 @@ */ #include +#include #define DIGEST_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ @@ -97,7 +98,7 @@ .text .align 32 -SYM_FUNC_START(sha256_ni_transform) +SYM_TYPED_FUNC_START(sha256_ni_transform) shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash From a1d72fa33186ac69c7d8120c71f41ea4fc23dcc9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:15 -0800 Subject: [PATCH 2756/4122] crypto: x86/sha512 - fix possible crash with CFI enabled sha512_transform_ssse3(), sha512_transform_avx(), and sha512_transform_rorx() are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect calls). Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-avx-asm.S | 3 ++- arch/x86/crypto/sha512-avx2-asm.S | 3 ++- arch/x86/crypto/sha512-ssse3-asm.S | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 1fefe6dd3a9e..b0984f19fdb4 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include .text @@ -273,7 +274,7 @@ frame_size = frame_WK + WK_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks ######################################################################## -SYM_FUNC_START(sha512_transform_avx) +SYM_TYPED_FUNC_START(sha512_transform_avx) test msglen, msglen je nowork diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 5cdaab7d6901..b1ca99055ef9 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -50,6 +50,7 @@ ######################################################################## #include +#include .text @@ -565,7 +566,7 @@ frame_size = frame_CTX + CTX_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks ######################################################################## -SYM_FUNC_START(sha512_transform_rorx) +SYM_TYPED_FUNC_START(sha512_transform_rorx) # Save GPRs push %rbx push %r12 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index b84c22e06c5f..c06afb5270e5 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include .text @@ -274,7 +275,7 @@ frame_size = frame_WK + WK_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks. ######################################################################## -SYM_FUNC_START(sha512_transform_ssse3) +SYM_TYPED_FUNC_START(sha512_transform_ssse3) test msglen, msglen je nowork From 8ba490d9f5a56f52091644325a32d3f71a982776 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:16 -0800 Subject: [PATCH 2757/4122] crypto: x86/sm3 - fix possible crash with CFI enabled sm3_transform_avx() is called via indirect function calls. Therefore it needs to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause its type hash to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect call). Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sm3-avx-asm_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S index b12b9efb5ec5..8fc5ac681fd6 100644 --- a/arch/x86/crypto/sm3-avx-asm_64.S +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -12,6 +12,7 @@ */ #include +#include #include /* Context structure */ @@ -328,7 +329,7 @@ * const u8 *data, int nblocks); */ .align 16 -SYM_FUNC_START(sm3_transform_avx) +SYM_TYPED_FUNC_START(sm3_transform_avx) /* input: * %rdi: ctx, CTX * %rsi: data (64*nblks bytes) From 2d203c46a0fa5df0785383b13b722483e1fd27a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:17 -0800 Subject: [PATCH 2758/4122] crypto: x86/sm4 - fix crash with CFI enabled sm4_aesni_avx_ctr_enc_blk8(), sm4_aesni_avx_cbc_dec_blk8(), sm4_aesni_avx_cfb_dec_blk8(), sm4_aesni_avx2_ctr_enc_blk16(), sm4_aesni_avx2_cbc_dec_blk16(), and sm4_aesni_avx2_cfb_dec_blk16() are called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure. (Or at least that should be the case. For some reason the CFI checks in sm4_avx_cbc_decrypt(), sm4_avx_cfb_decrypt(), and sm4_avx_ctr_crypt() are not always being generated, using current tip-of-tree clang. Anyway, this patch is a good idea anyway.) Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sm4-aesni-avx-asm_64.S | 7 ++++--- arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 4767ab61ff48..22b6560eb9e1 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -14,6 +14,7 @@ */ #include +#include #include #define rRIP (%rip) @@ -420,7 +421,7 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) @@ -495,7 +496,7 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) @@ -545,7 +546,7 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index 4732fe8bb65b..23ee39a8ada8 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -14,6 +14,7 @@ */ #include +#include #include #define rRIP (%rip) @@ -282,7 +283,7 @@ SYM_FUNC_END(__sm4_crypt_blk16) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) @@ -395,7 +396,7 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) @@ -449,7 +450,7 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) * const u8 *src, u8 *iv) */ .align 8 -SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) From e5e1c67e2f01d924e9583b67a907934948d852aa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:18 -0800 Subject: [PATCH 2759/4122] crypto: arm64/nhpoly1305 - eliminate unnecessary CFI wrapper Since the CFI implementation now supports indirect calls to assembly functions, take advantage of that rather than use a wrapper function. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/nh-neon-core.S | 5 +++-- arch/arm64/crypto/nhpoly1305-neon-glue.c | 11 ++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S index 51c0a534ef87..13eda08fda1e 100644 --- a/arch/arm64/crypto/nh-neon-core.S +++ b/arch/arm64/crypto/nh-neon-core.S @@ -8,6 +8,7 @@ */ #include +#include KEY .req x0 MESSAGE .req x1 @@ -58,11 +59,11 @@ /* * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_neon) +SYM_TYPED_FUNC_START(nh_neon) ld1 {K0.4s,K1.4s}, [KEY], #32 movi PASS0_SUMS.2d, #0 diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index c5405e6a6db7..cd882c35d925 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_neon(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); kernel_neon_end(); src += n; srclen -= n; From be8f6b6496076588fd49cbe5bfaaf3ab883eb779 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:19 -0800 Subject: [PATCH 2760/4122] crypto: arm64/sm3 - fix possible crash with CFI enabled sm3_neon_transform() is called via indirect function calls. Therefore it needs to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause its type hash to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect call). Fixes: c50d32859e70 ("arm64: Add types to indirect called assembly functions") Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm3-neon-core.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S index 3e3b4e5c736f..4357e0e51be3 100644 --- a/arch/arm64/crypto/sm3-neon-core.S +++ b/arch/arm64/crypto/sm3-neon-core.S @@ -9,6 +9,7 @@ */ #include +#include #include /* Context structure */ @@ -351,7 +352,7 @@ */ .text .align 3 -SYM_FUNC_START(sm3_neon_transform) +SYM_TYPED_FUNC_START(sm3_neon_transform) ldp ra, rb, [RSTATE, #0] ldp rc, rd, [RSTATE, #8] ldp re, rf, [RSTATE, #16] From cc7acaadf6ab5d44f43170eb568e1cc9739c3df4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:20 -0800 Subject: [PATCH 2761/4122] crypto: arm/nhpoly1305 - eliminate unnecessary CFI wrapper The arm architecture doesn't support CFI yet, and even if it did, the new CFI implementation supports indirect calls to assembly functions. Therefore, there's no need to use a wrapper function for nh_neon(). Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/nh-neon-core.S | 2 +- arch/arm/crypto/nhpoly1305-neon-glue.c | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S index 434d80ab531c..01620a0782ca 100644 --- a/arch/arm/crypto/nh-neon-core.S +++ b/arch/arm/crypto/nh-neon-core.S @@ -69,7 +69,7 @@ /* * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index ffa8d73fe722..e93e41ff2656 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_neon(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); kernel_neon_end(); src += n; srclen -= n; From c060e16ddb51a92b1f7fa84c628d287ea5799864 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Nov 2022 11:44:21 -0800 Subject: [PATCH 2762/4122] Revert "crypto: shash - avoid comparing pointers to exported functions under CFI" This reverts commit 22ca9f4aaf431a9413dcc115dd590123307f274f because CFI no longer breaks cross-module function address equality, so crypto_shash_alg_has_setkey() can now be an inline function like before. This commit should not be backported to kernels that don't have the new CFI implementation. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/shash.c | 18 +++--------------- include/crypto/internal/hash.h | 8 +++++++- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/crypto/shash.c b/crypto/shash.c index 4c88e63b3350..0f8543158826 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -20,24 +20,12 @@ static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } - -/* - * Check whether an shash algorithm has a setkey function. - * - * For CFI compatibility, this must not be an inline function. This is because - * when CFI is enabled, modules won't get the same address for shash_no_setkey - * (if it were exported, which inlining would require) as the core kernel will. - */ -bool crypto_shash_alg_has_setkey(struct shash_alg *alg) -{ - return alg->setkey != shash_no_setkey; -} -EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey); +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 25806141db59..0a288dddcf5b 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -75,7 +75,13 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); -bool crypto_shash_alg_has_setkey(struct shash_alg *alg); +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen); + +static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { From b8ed0bff96393cbeef66f00f34fda2a4960b75e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:40 +0100 Subject: [PATCH 2763/4122] crypto: atmel-ecc - Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/crypto/atmel-ecc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 82bf15d49561..53100fb9b07b 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -311,9 +311,9 @@ static struct kpp_alg atmel_ecdh_nist_p256 = { }, }; -static int atmel_ecc_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atmel_ecc_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; @@ -390,7 +390,7 @@ static struct i2c_driver atmel_ecc_driver = { .name = "atmel-ecc", .of_match_table = of_match_ptr(atmel_ecc_dt_ids), }, - .probe = atmel_ecc_probe, + .probe_new = atmel_ecc_probe, .remove = atmel_ecc_remove, .id_table = atmel_ecc_id, }; From fa2ca3b275874d61f42e68f5eb13645bbeb5d72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:41 +0100 Subject: [PATCH 2764/4122] crypto: atmel-sha204a - Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index c0103e7fc2e7..272a06f0b588 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -91,9 +91,9 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, return max; } -static int atmel_sha204a_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atmel_sha204a_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; @@ -142,7 +142,7 @@ static const struct i2c_device_id atmel_sha204a_id[] = { MODULE_DEVICE_TABLE(i2c, atmel_sha204a_id); static struct i2c_driver atmel_sha204a_driver = { - .probe = atmel_sha204a_probe, + .probe_new = atmel_sha204a_probe, .remove = atmel_sha204a_remove, .id_table = atmel_sha204a_id, From 3901355624d14afe3230252cb36bc3da8ff6890e Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 19 Nov 2022 17:48:43 +0800 Subject: [PATCH 2765/4122] crypto: hisilicon/qm - fix 'QM_XEQ_DEPTH_CAP' mask value 'QM_XEQ_DEPTH_CAP' mask value is GENMASK(31, 0) instead of GENMASK(15, 0). If the mask value is incorrect, will cause abnormal events cannot be handled. So fix it. Fixes: 129a9f340172 ("crypto: hisilicon/qm - get qp num and depth from hardware registers") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 36d70b9f6117..9072bee7336f 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -336,7 +336,7 @@ static const struct hisi_qm_cap_info qm_cap_info_vf[] = { static const struct hisi_qm_cap_info qm_basic_info[] = { {QM_TOTAL_QP_NUM_CAP, 0x100158, 0, GENMASK(10, 0), 0x1000, 0x400, 0x400}, {QM_FUNC_MAX_QP_CAP, 0x100158, 11, GENMASK(10, 0), 0x1000, 0x400, 0x400}, - {QM_XEQ_DEPTH_CAP, 0x3104, 0, GENMASK(15, 0), 0x800, 0x4000800, 0x4000800}, + {QM_XEQ_DEPTH_CAP, 0x3104, 0, GENMASK(31, 0), 0x800, 0x4000800, 0x4000800}, {QM_QP_DEPTH_CAP, 0x3108, 0, GENMASK(31, 0), 0x4000400, 0x4000400, 0x4000400}, {QM_EQ_IRQ_TYPE_CAP, 0x310c, 0, GENMASK(31, 0), 0x10000, 0x10000, 0x10000}, {QM_AEQ_IRQ_TYPE_CAP, 0x3110, 0, GENMASK(31, 0), 0x0, 0x10001, 0x10001}, From 5f9c97a0e6dc873f662528ae591f2bd500eb5940 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 19 Nov 2022 17:50:03 +0800 Subject: [PATCH 2766/4122] crypto: hisilicon/qm - add device status check when start fails In function 'hisi_qm_resume', if the device fails to be started, directly returning error code will cause the device to be unavailable. However, the failure may be caused by device error, which will be reported to the driver, and driver can reset and restart device. Therefore, check device status instead of returning error code directly. Returns 0 if device error has occurred, otherwise returns error code. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 9072bee7336f..007ac7a69ce7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -5468,8 +5468,14 @@ int hisi_qm_resume(struct device *dev) } ret = hisi_qm_start(qm); - if (ret) - pci_err(pdev, "failed to start qm(%d)\n", ret); + if (ret) { + if (qm_check_dev_error(qm)) { + pci_info(pdev, "failed to start qm due to device error, device will be reset!\n"); + return 0; + } + + pci_err(pdev, "failed to start qm(%d)!\n", ret); + } return ret; } From 83478938f78fd640c72af83d739a90c840e1b876 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 19 Nov 2022 14:42:59 +0100 Subject: [PATCH 2767/4122] hwrng: u2fzero - account for high quality RNG The U2F zero apparently has a real TRNG in it with maximum quality, not one with quality of "1", which was likely a misinterpretation of the field as a boolean. So remove the assignment entirely, so that we get the default quality setting. In the u2f-zero firmware, the 0x21 RNG command used by this driver is handled as such [1]: case U2F_CUSTOM_GET_RNG: if (atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1,ATECC_RNG_P2, NULL, 0, appdata.tmp, sizeof(appdata.tmp), &res) == 0 ) { memmove(msg->pkt.init.payload, res.buf, 32); U2FHID_SET_LEN(msg, 32); usb_write((uint8_t*)msg, 64); } else { U2FHID_SET_LEN(msg, 0); usb_write((uint8_t*)msg, 64); } This same call to `atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1, ATECC_RNG_P2,...)` is then also used in the token's cryptographically critical "u2f_new_keypair" function, as its rather straightforward source of random bytes [2]: int8_t u2f_new_keypair(uint8_t * handle, uint8_t * appid, uint8_t * pubkey) { struct atecc_response res; uint8_t private_key[36]; int i; watchdog(); if (atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1,ATECC_RNG_P2, NULL, 0, appdata.tmp, sizeof(appdata.tmp), &res) != 0 ) { return -1; } So it seems rather plain that the ATECC RNG is considered to provide good random numbers. [1] https://github.com/conorpp/u2f-zero/blob/master/firmware/src/custom.c [2] https://github.com/conorpp/u2f-zero/blob/master/firmware/src/u2f_atecc.c Cc: Andrej Shadura Cc: Jiri Kosina Cc: Herbert Xu Signed-off-by: Jason A. Donenfeld Acked-by: Andrej Shadura Signed-off-by: Herbert Xu --- drivers/hid/hid-u2fzero.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index ad489caf53ad..744a91e6e78c 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -261,7 +261,6 @@ static int u2fzero_init_hwrng(struct u2fzero_device *dev, dev->hwrng.name = dev->rng_name; dev->hwrng.read = u2fzero_rng_read; - dev->hwrng.quality = 1; return devm_hwrng_register(&dev->hdev->dev, &dev->hwrng); } From 70aa0a5551f6a752f3a071529fffde903f1a2637 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:13:56 +0000 Subject: [PATCH 2768/4122] dt-bindings: misc: qcom,fastrpc: increase allowed iommus entries The fastrpc components on the SM8550 SoC can require up to 3 IOMMU entries, this bumps the maxItems to 3 for this purpose. Signed-off-by: Abel Vesa Signed-off-by: Neil Armstrong Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml b/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml index d7576f8ac94b..1ab9588cdd89 100644 --- a/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml +++ b/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml @@ -79,7 +79,7 @@ patternProperties: iommus: minItems: 1 - maxItems: 2 + maxItems: 3 qcom,nsessions: $ref: /schemas/types.yaml#/definitions/uint32 From 1959ab9edccd3de4bc8a876f97ce269bb9beeb31 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:13:57 +0000 Subject: [PATCH 2769/4122] misc: fastrpc: Rename audio protection domain to root The AUDIO_PD will be done via static pd, so the proper name here is actually ROOT_PD. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 7ff0b63c25e3..f2bda08adca7 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -83,7 +83,7 @@ #define FASTRPC_RMID_INIT_MEM_UNMAP 11 /* Protection Domain(PD) ids */ -#define AUDIO_PD (0) /* also GUEST_OS PD? */ +#define ROOT_PD (0) #define USER_PD (1) #define SENSORS_PD (2) @@ -1886,7 +1886,7 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = fastrpc_init_attach(fl, AUDIO_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: err = fastrpc_init_attach(fl, SENSORS_PD); From 1ce91d45ba77a4f6bf9209d142d5c89c42cf877a Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:13:58 +0000 Subject: [PATCH 2770/4122] misc: fastrpc: Add reserved mem support The reserved mem support is needed for CMA heap support, which will be used by AUDIOPD. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index f2bda08adca7..3d5809622a6b 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -19,6 +19,7 @@ #include #include #include +#include #define ADSP_DOMAIN_ID (0) #define MDSP_DOMAIN_ID (1) @@ -2065,6 +2066,9 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) return -EINVAL; } + if (of_reserved_mem_device_init_by_idx(rdev, rdev->of_node, 0)) + dev_info(rdev, "no reserved DMA memory for FASTRPC\n"); + vmcount = of_property_read_variable_u32_array(rdev->of_node, "qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS); if (vmcount < 0) From 6f18c7e845346f365e08613fdc47a60fc201aedb Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:13:59 +0000 Subject: [PATCH 2771/4122] misc: fastrpc: Add fastrpc_remote_heap_alloc Split fastrpc_buf_alloc in such a way it allows allocation of remote heap too and add fastrpc_remote_heap_alloc to do so. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 3d5809622a6b..8b43fe5207fb 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -369,7 +369,7 @@ static void fastrpc_buf_free(struct fastrpc_buf *buf) kfree(buf); } -static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, +static int __fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, u64 size, struct fastrpc_buf **obuf) { struct fastrpc_buf *buf; @@ -397,14 +397,37 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, return -ENOMEM; } - if (fl->sctx && fl->sctx->sid) - buf->phys += ((u64)fl->sctx->sid << 32); - *obuf = buf; return 0; } +static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, + u64 size, struct fastrpc_buf **obuf) +{ + int ret; + struct fastrpc_buf *buf; + + ret = __fastrpc_buf_alloc(fl, dev, size, obuf); + if (ret) + return ret; + + buf = *obuf; + + if (fl->sctx && fl->sctx->sid) + buf->phys += ((u64)fl->sctx->sid << 32); + + return 0; +} + +static int fastrpc_remote_heap_alloc(struct fastrpc_user *fl, struct device *dev, + u64 size, struct fastrpc_buf **obuf) +{ + struct device *rdev = &fl->cctx->rpdev->dev; + + return __fastrpc_buf_alloc(fl, rdev, size, obuf); +} + static void fastrpc_channel_ctx_free(struct kref *ref) { struct fastrpc_channel_ctx *cctx; From 334f1a1cbe032d85fd58e771629e3a3b373b96d5 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:00 +0000 Subject: [PATCH 2772/4122] misc: fastrpc: Use fastrpc_map_put in fastrpc_map_create on fail Move the kref_init right after the allocation so that we can use fastrpc_map_put on any following error case. Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-6-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 8b43fe5207fb..332626df5d39 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -735,6 +735,8 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, return -ENOMEM; INIT_LIST_HEAD(&map->node); + kref_init(&map->refcount); + map->fl = fl; map->fd = fd; map->buf = dma_buf_get(fd); @@ -761,7 +763,6 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, map->size = len; map->va = sg_virt(map->table->sgl); map->len = len; - kref_init(&map->refcount); if (attr & FASTRPC_ATTR_SECUREMAP) { /* @@ -791,7 +792,7 @@ map_err: attach_err: dma_buf_put(map->buf); get_err: - kfree(map); + fastrpc_map_put(map); return err; } From 72fa6f7820c4cf96c5f7aabc4e54bdf52d1e2ac2 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:01 +0000 Subject: [PATCH 2773/4122] misc: fastrpc: Rework fastrpc_req_munmap Move the lookup of the munmap request to the fastrpc_req_munmap and pass on only the buf to the lower level fastrpc_req_munmap_impl. That way we can use the lower level fastrpc_req_munmap_impl on error path in fastrpc_req_mmap to free the buf without searching for the munmap request it belongs to. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 47 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 332626df5d39..4590a11f7316 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1627,30 +1627,14 @@ static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp) return 0; } -static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, - struct fastrpc_req_munmap *req) +static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_buf *buf) { struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; - struct fastrpc_buf *buf = NULL, *iter, *b; struct fastrpc_munmap_req_msg req_msg; struct device *dev = fl->sctx->dev; int err; u32 sc; - spin_lock(&fl->lock); - list_for_each_entry_safe(iter, b, &fl->mmaps, node) { - if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) { - buf = iter; - break; - } - } - spin_unlock(&fl->lock); - - if (!buf) { - dev_err(dev, "mmap not in list\n"); - return -EINVAL; - } - req_msg.pgid = fl->tgid; req_msg.size = buf->size; req_msg.vaddr = buf->raddr; @@ -1676,12 +1660,29 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp) { + struct fastrpc_buf *buf = NULL, *iter, *b; struct fastrpc_req_munmap req; + struct device *dev = fl->sctx->dev; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; - return fastrpc_req_munmap_impl(fl, &req); + spin_lock(&fl->lock); + list_for_each_entry_safe(iter, b, &fl->mmaps, node) { + if ((iter->raddr == req.vaddrout) && (iter->size == req.size)) { + buf = iter; + break; + } + } + spin_unlock(&fl->lock); + + if (!buf) { + dev_err(dev, "mmap\t\tpt 0x%09llx [len 0x%08llx] not in list\n", + req.vaddrout, req.size); + return -EINVAL; + } + + return fastrpc_req_munmap_impl(fl, buf); } static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) @@ -1690,7 +1691,6 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) struct fastrpc_buf *buf = NULL; struct fastrpc_mmap_req_msg req_msg; struct fastrpc_mmap_rsp_msg rsp_msg; - struct fastrpc_req_munmap req_unmap; struct fastrpc_phy_page pages; struct fastrpc_req_mmap req; struct device *dev = fl->sctx->dev; @@ -1752,11 +1752,8 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) spin_unlock(&fl->lock); if (copy_to_user((void __user *)argp, &req, sizeof(req))) { - /* unmap the memory and release the buffer */ - req_unmap.vaddrout = buf->raddr; - req_unmap.size = buf->size; - fastrpc_req_munmap_impl(fl, &req_unmap); - return -EFAULT; + err = -EFAULT; + goto err_assign; } dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n", @@ -1764,6 +1761,8 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) return 0; +err_assign: + fastrpc_req_munmap_impl(fl, buf); err_invoke: fastrpc_buf_free(buf); From 0871561055e666da421d779397efcc1e5e964cab Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:02 +0000 Subject: [PATCH 2774/4122] misc: fastrpc: Add support for audiopd In order to be able to start the adsp listener for audiopd using adsprpcd, we need to add the corresponding ioctl for creating a static process. On that ioctl call we need to allocate the heap. Allocating the heap needs to be happening only once and needs to be kept between different device open calls, so attach it to the channel context to make sure that remains until the RPMSG driver is removed. Then, if there are any VMIDs associated with the static ADSP process, do a call to SCM to assign it. And then, send all the necessary info related to heap to the DSP. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-8-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 135 ++++++++++++++++++++++++++++++++++++ include/uapi/misc/fastrpc.h | 7 ++ 2 files changed, 142 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 4590a11f7316..adc44ba0eff6 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -37,8 +37,20 @@ #define FASTRPC_DSP_UTILITIES_HANDLE 2 #define FASTRPC_CTXID_MASK (0xFF0) #define INIT_FILELEN_MAX (2 * 1024 * 1024) +#define INIT_FILE_NAMELEN_MAX (128) #define FASTRPC_DEVICE_NAME "fastrpc" + +/* Add memory to static PD pool, protection thru XPU */ +#define ADSP_MMAP_HEAP_ADDR 4 +/* MAP static DMA buffer on DSP User PD */ +#define ADSP_MMAP_DMA_BUFFER 6 +/* Add memory to static PD pool protection thru hypervisor */ +#define ADSP_MMAP_REMOTE_HEAP_ADDR 8 +/* Add memory to userPD pool, for user heap */ #define ADSP_MMAP_ADD_PAGES 0x1000 +/* Add memory to userPD pool, for LLC heap */ +#define ADSP_MMAP_ADD_PAGES_LLC 0x3000, + #define DSP_UNSUPPORTED_API (0x80000414) /* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */ #define FASTRPC_MAX_DSP_ATTRIBUTES (256) @@ -72,6 +84,7 @@ FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0) #define FASTRPC_CREATE_PROCESS_NARGS 6 +#define FASTRPC_CREATE_STATIC_PROCESS_NARGS 3 /* Remote Method id table */ #define FASTRPC_RMID_INIT_ATTACH 0 #define FASTRPC_RMID_INIT_RELEASE 1 @@ -261,6 +274,7 @@ struct fastrpc_channel_ctx { u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES]; struct fastrpc_device *secure_fdevice; struct fastrpc_device *fdevice; + struct fastrpc_buf *remote_heap; bool secure; bool unsigned_support; }; @@ -1157,6 +1171,7 @@ bail: spin_unlock(&fl->lock); fastrpc_context_put(ctx); } + if (err) dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err); @@ -1181,6 +1196,120 @@ static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_reques return false; } +static int fastrpc_init_create_static_process(struct fastrpc_user *fl, + char __user *argp) +{ + struct fastrpc_init_create_static init; + struct fastrpc_invoke_args *args; + struct fastrpc_phy_page pages[1]; + char *name; + int err; + struct { + int pgid; + u32 namelen; + u32 pageslen; + } inbuf; + u32 sc; + + args = kcalloc(FASTRPC_CREATE_STATIC_PROCESS_NARGS, sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + + if (copy_from_user(&init, argp, sizeof(init))) { + err = -EFAULT; + goto err; + } + + if (init.namelen > INIT_FILE_NAMELEN_MAX) { + err = -EINVAL; + goto err; + } + + name = kzalloc(init.namelen, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto err; + } + + if (copy_from_user(name, (void __user *)(uintptr_t)init.name, init.namelen)) { + err = -EFAULT; + goto err_name; + } + + if (!fl->cctx->remote_heap) { + err = fastrpc_remote_heap_alloc(fl, fl->sctx->dev, init.memlen, + &fl->cctx->remote_heap); + if (err) + goto err_name; + + /* Map if we have any heap VMIDs associated with this ADSP Static Process. */ + if (fl->cctx->vmcount) { + unsigned int perms = BIT(QCOM_SCM_VMID_HLOS); + + err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys, + (u64)fl->cctx->remote_heap->size, &perms, + fl->cctx->vmperms, fl->cctx->vmcount); + if (err) { + dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", + fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); + goto err_map; + } + } + } + + inbuf.pgid = fl->tgid; + inbuf.namelen = init.namelen; + inbuf.pageslen = 0; + fl->pd = USER_PD; + + args[0].ptr = (u64)(uintptr_t)&inbuf; + args[0].length = sizeof(inbuf); + args[0].fd = -1; + + args[1].ptr = (u64)(uintptr_t)name; + args[1].length = inbuf.namelen; + args[1].fd = -1; + + pages[0].addr = fl->cctx->remote_heap->phys; + pages[0].size = fl->cctx->remote_heap->size; + + args[2].ptr = (u64)(uintptr_t) pages; + args[2].length = sizeof(*pages); + args[2].fd = -1; + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_STATIC, 3, 0); + + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, + sc, args); + if (err) + goto err_invoke; + + kfree(args); + + return 0; +err_invoke: + if (fl->cctx->vmcount) { + struct qcom_scm_vmperm perm; + + perm.vmid = QCOM_SCM_VMID_HLOS; + perm.perm = QCOM_SCM_PERM_RWX; + err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys, + (u64)fl->cctx->remote_heap->size, + &(fl->cctx->vmperms[0].vmid), &perm, 1); + if (err) + dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", + fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); + } +err_map: + fastrpc_buf_free(fl->cctx->remote_heap); +err_name: + kfree(name); +err: + kfree(args); + + return err; +} + static int fastrpc_init_create_process(struct fastrpc_user *fl, char __user *argp) { @@ -1915,6 +2044,9 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, case FASTRPC_IOCTL_INIT_ATTACH_SNS: err = fastrpc_init_attach(fl, SENSORS_PD); break; + case FASTRPC_IOCTL_INIT_CREATE_STATIC: + err = fastrpc_init_create_static_process(fl, argp); + break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); break; @@ -2184,6 +2316,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) if (cctx->secure_fdevice) misc_deregister(&cctx->secure_fdevice->miscdev); + if (cctx->remote_heap) + fastrpc_buf_free(cctx->remote_heap); + of_platform_depopulate(&rpdev->dev); cctx->rpdev = NULL; diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 5e29f2cfa42d..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -13,6 +13,7 @@ #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +#define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) #define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability) @@ -87,6 +88,12 @@ struct fastrpc_init_create { __u64 file; /* pointer to elf file */ }; +struct fastrpc_init_create_static { + __u32 namelen; /* length of pd process name */ + __u32 memlen; + __u64 name; /* pd process name */ +}; + struct fastrpc_alloc_dma_buf { __s32 fd; /* fd */ __u32 flags; /* flags to map with */ From 76e8e4ace1ed2c97dba3b1370e0e105e07c572bc Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:03 +0000 Subject: [PATCH 2775/4122] misc: fastrpc: Safekeep mmaps on interrupted invoke If the userspace daemon is killed in the middle of an invoke (e.g. audiopd listerner invoke), we need to skip the unmapping on device release, otherwise the DSP will crash. So lets safekeep all the maps only if there is in invoke interrupted, by attaching them to the channel context (which is resident until RPMSG driver is removed), and free them on RPMSG driver remove. Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index adc44ba0eff6..d7a98396ee94 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -275,6 +275,7 @@ struct fastrpc_channel_ctx { struct fastrpc_device *secure_fdevice; struct fastrpc_device *fdevice; struct fastrpc_buf *remote_heap; + struct list_head invoke_interrupted_mmaps; bool secure; bool unsigned_support; }; @@ -1109,6 +1110,8 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_invoke_args *args) { struct fastrpc_invoke_ctx *ctx = NULL; + struct fastrpc_buf *buf, *b; + int err = 0; if (!fl->sctx) @@ -1172,6 +1175,13 @@ bail: fastrpc_context_put(ctx); } + if (err == -ERESTARTSYS) { + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + list_del(&buf->node); + list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps); + } + } + if (err) dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err); @@ -2278,6 +2288,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) dev_set_drvdata(&rpdev->dev, data); dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32)); INIT_LIST_HEAD(&data->users); + INIT_LIST_HEAD(&data->invoke_interrupted_mmaps); spin_lock_init(&data->lock); idr_init(&data->ctx_idr); data->domain_id = domain_id; @@ -2302,6 +2313,7 @@ static void fastrpc_notify_users(struct fastrpc_user *user) static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) { struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev); + struct fastrpc_buf *buf, *b; struct fastrpc_user *user; unsigned long flags; @@ -2316,6 +2328,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) if (cctx->secure_fdevice) misc_deregister(&cctx->secure_fdevice->miscdev); + list_for_each_entry_safe(buf, b, &cctx->invoke_interrupted_mmaps, node) + list_del(&buf->node); + if (cctx->remote_heap) fastrpc_buf_free(cctx->remote_heap); From 532ad70c6d449029cfa3eac8408f427e31334f33 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:04 +0000 Subject: [PATCH 2776/4122] misc: fastrpc: Add mmap request assigning for static PD pool If the mmap request is to add pages and thre are VMIDs associated with that context, do a call to SCM to reassign that memory. Do not do this for remote heap allocation, that is done on init create static process only. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index d7a98396ee94..2e49bf1c31ef 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1839,8 +1839,9 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; - if (req.flags != ADSP_MMAP_ADD_PAGES) { + if (req.flags != ADSP_MMAP_ADD_PAGES && req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR) { dev_err(dev, "flag not supported 0x%x\n", req.flags); + return -EINVAL; } @@ -1886,6 +1887,22 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) /* let the client know the address to use */ req.vaddrout = rsp_msg.vaddr; + /* Add memory to static PD pool, protection thru hypervisor */ + if (req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) { + struct qcom_scm_vmperm perm; + int err = 0; + + perm.vmid = QCOM_SCM_VMID_HLOS; + perm.perm = QCOM_SCM_PERM_RWX; + err = qcom_scm_assign_mem(buf->phys, buf->size, + &(fl->cctx->vmperms[0].vmid), &perm, 1); + if (err) { + dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", + buf->phys, buf->size, err); + goto err_assign; + } + } + spin_lock(&fl->lock); list_add_tail(&buf->node, &fl->mmaps); spin_unlock(&fl->lock); From 9bde43a0e2f469961e18d0a3496a9a74379c22bf Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 25 Nov 2022 07:14:05 +0000 Subject: [PATCH 2777/4122] misc: fastrpc: Add dma_mask to fastrpc_channel_ctx dma_set_mask_and_coherent only updates the mask to which the device dma_mask pointer points to. Add a dma_mask to the channel ctx and set the device dma_mask to point to that, otherwise the dma_set_mask will return an error and the dma_set_coherent_mask will be skipped too. Co-developed-by: Srinivas Kandagatla Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221125071405.148786-11-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 2e49bf1c31ef..278ab6ca1a5a 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -278,6 +278,7 @@ struct fastrpc_channel_ctx { struct list_head invoke_interrupted_mmaps; bool secure; bool unsigned_support; + u64 dma_mask; }; struct fastrpc_device { @@ -2303,6 +2304,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) kref_init(&data->refcount); dev_set_drvdata(&rpdev->dev, data); + rdev->dma_mask = &data->dma_mask; dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32)); INIT_LIST_HEAD(&data->users); INIT_LIST_HEAD(&data->invoke_interrupted_mmaps); From 78316e9dfc24906dd474630928ed1d3c562b568e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 11:24:03 +0800 Subject: [PATCH 2778/4122] scsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add() In mpt3sas_transport_port_add(), if sas_rphy_add() returns error, sas_rphy_free() needs be called to free the resource allocated in sas_end_device_alloc(). Otherwise a kernel crash will happen: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000108 CPU: 45 PID: 37020 Comm: bash Kdump: loaded Tainted: G W 6.1.0-rc1+ #189 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x54/0x3d0 lr : device_del+0x37c/0x3d0 Call trace: device_del+0x54/0x3d0 attribute_container_class_device_del+0x28/0x38 transport_remove_classdev+0x6c/0x80 attribute_container_device_trigger+0x108/0x110 transport_remove_device+0x28/0x38 sas_rphy_remove+0x50/0x78 [scsi_transport_sas] sas_port_delete+0x30/0x148 [scsi_transport_sas] do_sas_phy_delete+0x78/0x80 [scsi_transport_sas] device_for_each_child+0x68/0xb0 sas_remove_children+0x30/0x50 [scsi_transport_sas] sas_rphy_remove+0x38/0x78 [scsi_transport_sas] sas_port_delete+0x30/0x148 [scsi_transport_sas] do_sas_phy_delete+0x78/0x80 [scsi_transport_sas] device_for_each_child+0x68/0xb0 sas_remove_children+0x30/0x50 [scsi_transport_sas] sas_remove_host+0x20/0x38 [scsi_transport_sas] scsih_remove+0xd8/0x420 [mpt3sas] Because transport_add_device() is not called when sas_rphy_add() fails, the device is not added. When sas_rphy_remove() is subsequently called to remove the device in the remove() path, a NULL pointer dereference happens. Fixes: f92363d12359 ("[SCSI] mpt3sas: add new driver supporting 12GB SAS") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221109032403.1636422-1-yangyingliang@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 0681daee6c14..e5ecd6ada6cd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -829,6 +829,8 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, if ((sas_rphy_add(rphy))) { ioc_err(ioc, "failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); + sas_rphy_free(rphy); + rphy = NULL; } if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) { From 4ef174a3ad9b5d73c1b6573e244ebba2b0d86eac Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 23:11:29 +0800 Subject: [PATCH 2779/4122] scsi: hpsa: Fix error handling in hpsa_add_sas_host() hpsa_sas_port_add_phy() does: ... sas_phy_add() -> may return error here sas_port_add_phy() ... Whereas hpsa_free_sas_phy() does: ... sas_port_delete_phy() sas_phy_delete() ... If hpsa_sas_port_add_phy() returns an error, hpsa_free_sas_phy() can not be called to free the memory because the port and the phy have not been added yet. Replace hpsa_free_sas_phy() with sas_phy_free() and kfree() to avoid kernel crash in this case. Fixes: d04e62b9d63a ("hpsa: add in sas transport class") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221110151129.394389-1-yangyingliang@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index e5cbc97a5ea4..6696967c5192 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -9786,7 +9786,8 @@ static int hpsa_add_sas_host(struct ctlr_info *h) return 0; free_sas_phy: - hpsa_free_sas_phy(hpsa_sas_phy); + sas_phy_free(hpsa_sas_phy->phy); + kfree(hpsa_sas_phy); free_sas_port: hpsa_free_sas_port(hpsa_sas_port); free_sas_node: From fda34a5d304d0b98cc967e8763b52221b66dc202 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 12:30:12 +0800 Subject: [PATCH 2780/4122] scsi: hpsa: Fix possible memory leak in hpsa_add_sas_device() If hpsa_sas_port_add_rphy() returns an error, the 'rphy' allocated in sas_end_device_alloc() needs to be freed. Address this by calling sas_rphy_free() in the error path. Fixes: d04e62b9d63a ("hpsa: add in sas transport class") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221111043012.1074466-1-yangyingliang@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 6696967c5192..4dbf51e2623a 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -9823,10 +9823,12 @@ static int hpsa_add_sas_device(struct hpsa_sas_node *hpsa_sas_node, rc = hpsa_sas_port_add_rphy(hpsa_sas_port, rphy); if (rc) - goto free_sas_port; + goto free_sas_rphy; return 0; +free_sas_rphy: + sas_rphy_free(rphy); free_sas_port: hpsa_free_sas_port(hpsa_sas_port); device->sas_port = NULL; From 859ed37c9c3f456510b97ecb0bf155cee2b9d3fc Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 11 Nov 2022 15:21:26 +0900 Subject: [PATCH 2781/4122] scsi: ufs: core: Separate function name and message Separate the function name and message to make it easier to check the log. Modify messages to fit the format of others. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221111062126.7307-1-cw9316.lee@samsung.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +++--- drivers/ufs/core/ufshpb.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 768cb49d269c..747183052114 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4480,7 +4480,7 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba) QUERY_FLAG_IDN_FDEVICEINIT, 0, NULL); if (err) { dev_err(hba->dev, - "%s setting fDeviceInit flag failed with error %d\n", + "%s: setting fDeviceInit flag failed with error %d\n", __func__, err); goto out; } @@ -4497,11 +4497,11 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba) if (err) { dev_err(hba->dev, - "%s reading fDeviceInit flag failed with error %d\n", + "%s: reading fDeviceInit flag failed with error %d\n", __func__, err); } else if (flag_res) { dev_err(hba->dev, - "%s fDeviceInit was not cleared by the device\n", + "%s: fDeviceInit was not cleared by the device\n", __func__); err = -EBUSY; } diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c index 3d69a81c5b17..fda3e7b494a6 100644 --- a/drivers/ufs/core/ufshpb.c +++ b/drivers/ufs/core/ufshpb.c @@ -2289,7 +2289,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba) /* wait for the device to complete HPB reset query */ for (try = 0; try < HPB_RESET_REQ_RETRIES; try++) { dev_dbg(hba->dev, - "%s start flag reset polling %d times\n", + "%s: start flag reset polling %d times\n", __func__, try); /* Poll fHpbReset flag to be cleared */ @@ -2298,7 +2298,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba) if (err) { dev_err(hba->dev, - "%s reading fHpbReset flag failed with error %d\n", + "%s: reading fHpbReset flag failed with error %d\n", __func__, err); return flag_res; } @@ -2310,7 +2310,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba) } if (flag_res) { dev_err(hba->dev, - "%s fHpbReset was not cleared by the device\n", + "%s: fHpbReset was not cleared by the device\n", __func__); } out: From 5277326d07fbf68aa7fc9e7bce6c381002e00fca Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 11 Nov 2022 15:22:09 +0900 Subject: [PATCH 2782/4122] scsi: ufs: core: Switch 'check_for_bkops' to bool Only checks true and false so it can be converted to bool. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221111062209.7365-1-cw9316.lee@samsung.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 747183052114..0e8850869b21 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8825,7 +8825,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, static int ufshcd_link_state_transition(struct ufs_hba *hba, enum uic_link_state req_link_state, - int check_for_bkops) + bool check_for_bkops) { int ret = 0; @@ -8976,7 +8976,7 @@ static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba) static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) { int ret = 0; - int check_for_bkops; + bool check_for_bkops; enum ufs_pm_level pm_lvl; enum ufs_dev_pwr_mode req_dev_pwr_mode; enum uic_link_state req_link_state; From 222d227f375b4cfa517a8f1f0f266ebe0263ad05 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 11 Nov 2022 15:23:01 +0900 Subject: [PATCH 2783/4122] scsi: ufs: core: Fix unnecessary operation for early return Setting bitmap_len is not required when returning early. Defer until it is needed. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221111062301.7423-1-cw9316.lee@samsung.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshpb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c index fda3e7b494a6..be3fb24b93d8 100644 --- a/drivers/ufs/core/ufshpb.c +++ b/drivers/ufs/core/ufshpb.c @@ -233,11 +233,6 @@ next_srgn: rgn = hpb->rgn_tbl + rgn_idx; srgn = rgn->srgn_tbl + srgn_idx; - if (likely(!srgn->is_last)) - bitmap_len = hpb->entries_per_srgn; - else - bitmap_len = hpb->last_srgn_entries; - if (!ufshpb_is_valid_srgn(rgn, srgn)) return true; @@ -253,6 +248,11 @@ next_srgn: return true; } + if (likely(!srgn->is_last)) + bitmap_len = hpb->entries_per_srgn; + else + bitmap_len = hpb->last_srgn_entries; + if ((srgn_offset + cnt) > bitmap_len) bit_len = bitmap_len - srgn_offset; else From bb0cd225dd37df1f4a22e36dad59ff33178ecdfc Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 11 Nov 2022 15:40:46 +0800 Subject: [PATCH 2784/4122] scsi: efct: Fix possible memleak in efct_device_init() In efct_device_init(), when efct_scsi_reg_fc_transport() fails, efct_scsi_tgt_driver_exit() is not called to release memory for efct_scsi_tgt_driver_init() and causes memleak: unreferenced object 0xffff8881020ce000 (size 2048): comm "modprobe", pid 465, jiffies 4294928222 (age 55.872s) backtrace: [<0000000021a1ef1b>] kmalloc_trace+0x27/0x110 [<000000004c3ed51c>] target_register_template+0x4fd/0x7b0 [target_core_mod] [<00000000f3393296>] efct_scsi_tgt_driver_init+0x18/0x50 [efct] [<00000000115de533>] 0xffffffffc0d90011 [<00000000d608f646>] do_one_initcall+0xd0/0x4e0 [<0000000067828cf1>] do_init_module+0x1cc/0x6a0 ... Fixes: 4df84e846624 ("scsi: elx: efct: Driver initialization routines") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221111074046.57061-1-chenzhongjin@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/efct/efct_driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c index b08fc8839808..49fd2cfed70c 100644 --- a/drivers/scsi/elx/efct/efct_driver.c +++ b/drivers/scsi/elx/efct/efct_driver.c @@ -42,6 +42,7 @@ efct_device_init(void) rc = efct_scsi_reg_fc_transport(); if (rc) { + efct_scsi_tgt_driver_exit(); pr_err("failed to register to FC host\n"); return rc; } From ed0f17b748b20271cb568c7ca0b23b120316a47d Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 11 Nov 2022 23:00:31 -0800 Subject: [PATCH 2785/4122] scsi: scsi_debug: Fix a warning in resp_verify() As 'vnum' is controlled by user, so if user tries to allocate memory larger than(>=) MAX_ORDER, then kcalloc() will fail, it creates a stack trace and messes up dmesg with a warning. Add __GFP_NOWARN in order to avoid too large allocation warning. This is detected by static analysis using smatch. Fixes: c3e2fe9222d4 ("scsi: scsi_debug: Implement VERIFY(10), add VERIFY(16)") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20221112070031.2121068-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 273224d29ce9..f556e36a2e0d 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4429,7 +4429,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) if (ret) return ret; - arr = kcalloc(lb_size, vnum, GFP_ATOMIC); + arr = kcalloc(lb_size, vnum, GFP_ATOMIC | __GFP_NOWARN); if (!arr) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, INSUFF_RES_ASCQ); From 07f2ca139d9a7a1ba71c4c03997c8de161db2346 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 11 Nov 2022 23:06:12 -0800 Subject: [PATCH 2786/4122] scsi: scsi_debug: Fix a warning in resp_report_zones() As 'alloc_len' is user controlled data, if user tries to allocate memory larger than(>=) MAX_ORDER, then kcalloc() will fail, it creates a stack trace and messes up dmesg with a warning. Add __GFP_NOWARN in order to avoid too large allocation warning. This is detected by static analysis using smatch. Fixes: 7db0e0c8190a ("scsi: scsi_debug: Fix buffer size of REPORT ZONES command") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20221112070612.2121535-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index f556e36a2e0d..19ad8e2aa07a 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4497,7 +4497,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, rep_max_zones = (alloc_len - 64) >> ilog2(RZONES_DESC_HD); - arr = kzalloc(alloc_len, GFP_ATOMIC); + arr = kzalloc(alloc_len, GFP_ATOMIC | __GFP_NOWARN); if (!arr) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, INSUFF_RES_ASCQ); From 47b6a122c7b69a876c7ee2fc064a26b09627de9d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 12 Nov 2022 17:43:10 +0800 Subject: [PATCH 2787/4122] scsi: fcoe: Fix possible name leak when device_register() fails If device_register() returns an error, the name allocated by dev_set_name() needs to be freed. As the comment of device_register() says, one should use put_device() to give up the reference in the error path. Fix this by calling put_device(), then the name can be freed in kobject_cleanup(). The 'fcf' is freed in fcoe_fcf_device_release(), so the kfree() in the error path can be removed. The 'ctlr' is freed in fcoe_ctlr_device_release(), so don't use the error label, just return NULL after calling put_device(). Fixes: 9a74e884ee71 ("[SCSI] libfcoe: Add fcoe_sysfs") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221112094310.3633291-1-yangyingliang@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_sysfs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c index af658aa38fed..6260aa5ea6af 100644 --- a/drivers/scsi/fcoe/fcoe_sysfs.c +++ b/drivers/scsi/fcoe/fcoe_sysfs.c @@ -830,14 +830,15 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent, dev_set_name(&ctlr->dev, "ctlr_%d", ctlr->id); error = device_register(&ctlr->dev); - if (error) - goto out_del_q2; + if (error) { + destroy_workqueue(ctlr->devloss_work_q); + destroy_workqueue(ctlr->work_q); + put_device(&ctlr->dev); + return NULL; + } return ctlr; -out_del_q2: - destroy_workqueue(ctlr->devloss_work_q); - ctlr->devloss_work_q = NULL; out_del_q: destroy_workqueue(ctlr->work_q); ctlr->work_q = NULL; @@ -1036,16 +1037,16 @@ struct fcoe_fcf_device *fcoe_fcf_device_add(struct fcoe_ctlr_device *ctlr, fcf->selected = new_fcf->selected; error = device_register(&fcf->dev); - if (error) - goto out_del; + if (error) { + put_device(&fcf->dev); + goto out; + } fcf->state = FCOE_FCF_STATE_CONNECTED; list_add_tail(&fcf->peers, &ctlr->fcfs); return fcf; -out_del: - kfree(fcf); out: return NULL; } From e6d773f93a49e0eda88a903a2a6542ca83380eb1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 12 Nov 2022 21:10:10 +0800 Subject: [PATCH 2788/4122] scsi: scsi_debug: Fix possible name leak in sdebug_add_host_helper() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, it needs be freed when device_register() returns error. As comment of device_register() says, one should use put_device() to give up the reference in the error path. Fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and sdbg_host is freed in sdebug_release_adapter(). When the device release is not set, it means the device is not initialized. We can not call put_device() in this case. Use kfree() to free memory. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221112131010.3757845-1-yangyingliang@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 19ad8e2aa07a..de3d3b1edaf5 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7329,7 +7329,10 @@ clean: kfree(sdbg_devinfo->zstate); kfree(sdbg_devinfo); } - kfree(sdbg_host); + if (sdbg_host->dev.release) + put_device(&sdbg_host->dev); + else + kfree(sdbg_host); pr_warn("%s: failed, errno=%d\n", __func__, -error); return error; } From e6f108bffc3708ddcff72324f7d40dfcd0204894 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Sun, 13 Nov 2022 14:45:13 +0800 Subject: [PATCH 2789/4122] scsi: ipr: Fix WARNING in ipr_init() ipr_init() will not call unregister_reboot_notifier() when pci_register_driver() fails, which causes a WARNING. Call unregister_reboot_notifier() when pci_register_driver() fails. notifier callback ipr_halt [ipr] already registered WARNING: CPU: 3 PID: 299 at kernel/notifier.c:29 notifier_chain_register+0x16d/0x230 Modules linked in: ipr(+) xhci_pci_renesas xhci_hcd ehci_hcd usbcore led_class gpu_sched drm_buddy video wmi drm_ttm_helper ttm drm_display_helper drm_kms_helper drm drm_panel_orientation_quirks agpgart cfbft CPU: 3 PID: 299 Comm: modprobe Tainted: G W 6.1.0-rc1-00190-g39508d23b672-dirty #332 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:notifier_chain_register+0x16d/0x230 Call Trace: __blocking_notifier_chain_register+0x73/0xb0 ipr_init+0x30/0x1000 [ipr] do_one_initcall+0xdb/0x480 do_init_module+0x1cf/0x680 load_module+0x6a50/0x70a0 __do_sys_finit_module+0x12f/0x1c0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: f72919ec2bbb ("[SCSI] ipr: implement shutdown changes and remove obsolete write cache parameter") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221113064513.14028-1-shangxiaojing@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 9d01a3e3c26a..2022ffb45041 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10872,11 +10872,19 @@ static struct notifier_block ipr_notifier = { **/ static int __init ipr_init(void) { + int rc; + ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n", IPR_DRIVER_VERSION, IPR_DRIVER_DATE); register_reboot_notifier(&ipr_notifier); - return pci_register_driver(&ipr_driver); + rc = pci_register_driver(&ipr_driver); + if (rc) { + unregister_reboot_notifier(&ipr_notifier); + return rc; + } + + return 0; } /** From 42c5907728867df91045f532a38682e0ec7a955b Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 15 Nov 2022 09:29:05 +0900 Subject: [PATCH 2790/4122] scsi: sd: Use 16-byte SYNCHRONIZE CACHE on ZBC devices ZBC Zoned Block Commands specification mandates SYNCHRONIZE CACHE(16) for host-managed zoned block devices, but does not mandate SYNCHRONIZE CACHE(10). Call SYNCHRONIZE CACHE(16) in place of SYNCHRONIZE CACHE(10) to ensure that the command is always supported. For this purpose, add use_16_for_sync flag to struct scsi_device in same manner as use_16_for_rw flag. To be precise, ZBC does not mandate SYNCHRONIZE CACHE(16) for host-aware zoned block devices. However, modern devices should support 16-byte commands. Hence, call SYNCHRONIZE CACHE (16) on both types of ZBC devices, host-aware and host-managed. Of note is that READ(16) and WRITE(16) have same story and they are already called for both types of ZBC devices. Another note is that this patch depends on the fix commit ea045fd344cb ("ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure"). Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221115002905.1709006-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 16 ++++++++++++---- drivers/scsi/sd_zbc.c | 3 ++- include/scsi/scsi_device.h | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index eb76ba055021..faa2b55d1a21 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1026,8 +1026,13 @@ static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) /* flush requests don't perform I/O, zero the S/G table */ memset(&cmd->sdb, 0, sizeof(cmd->sdb)); - cmd->cmnd[0] = SYNCHRONIZE_CACHE; - cmd->cmd_len = 10; + if (cmd->device->use_16_for_sync) { + cmd->cmnd[0] = SYNCHRONIZE_CACHE_16; + cmd->cmd_len = 16; + } else { + cmd->cmnd[0] = SYNCHRONIZE_CACHE; + cmd->cmd_len = 10; + } cmd->transfersize = 0; cmd->allowed = sdkp->max_retries; @@ -1587,9 +1592,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) sshdr = &my_sshdr; for (retries = 3; retries > 0; --retries) { - unsigned char cmd[10] = { 0 }; + unsigned char cmd[16] = { 0 }; - cmd[0] = SYNCHRONIZE_CACHE; + if (sdp->use_16_for_sync) + cmd[0] = SYNCHRONIZE_CACHE_16; + else + cmd[0] = SYNCHRONIZE_CACHE; /* * Leave the rest of the command zero to indicate * flush everything. diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index bd15624c6322..b163bf936acc 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -921,9 +921,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) return 0; } - /* READ16/WRITE16 is mandatory for ZBC disks */ + /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ sdkp->device->use_16_for_rw = 1; sdkp->device->use_10_for_rw = 0; + sdkp->device->use_16_for_sync = 1; if (!blk_queue_is_zoned(q)) { /* diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 24bdbf7999ab..3642b8e3928b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -184,6 +184,7 @@ struct scsi_device { unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ + unsigned use_16_for_sync:1; /* Use sync (16) over sync (10) */ unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */ unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */ unsigned skip_vpd_pages:1; /* do not read VPD pages */ From 4155658cee394b22b24c6d64e49247bf26d95b92 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 15 Nov 2022 17:24:42 +0800 Subject: [PATCH 2791/4122] scsi: fcoe: Fix transport not deattached when fcoe_if_init() fails fcoe_init() calls fcoe_transport_attach(&fcoe_sw_transport), but when fcoe_if_init() fails, &fcoe_sw_transport is not detached and leaves freed &fcoe_sw_transport on fcoe_transports list. This causes panic when reinserting module. BUG: unable to handle page fault for address: fffffbfff82e2213 RIP: 0010:fcoe_transport_attach+0xe1/0x230 [libfcoe] Call Trace: do_one_initcall+0xd0/0x4e0 load_module+0x5eee/0x7210 ... Fixes: 78a582463c1e ("[SCSI] fcoe: convert fcoe.ko to become an fcoe transport provider driver") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221115092442.133088-1-chenzhongjin@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 6ec296321ffc..38774a272e62 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -2491,6 +2491,7 @@ static int __init fcoe_init(void) out_free: mutex_unlock(&fcoe_config_mutex); + fcoe_transport_detach(&fcoe_sw_transport); out_destroy: destroy_workqueue(fcoe_wq); return rc; From 3620e174d260adf88fa6511e8a20831cbddc4b66 Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Tue, 15 Nov 2022 12:38:05 +0300 Subject: [PATCH 2792/4122] scsi: qla2xxx: Remove duplicate of vha->iocb_work initialization Commit 9b3e0f4d4147 ("scsi: qla2xxx: Move work element processing out of DPC thread") introduced the initialization of vha->iocb_work in qla2x00_create_host() function. This initialization is also called from qla2x00_probe_one() function, just after qla2x00_create_host(). Hence remove this duplicate call since it has already been called before. Signed-off-by: Gleb Chesnokov Link: https://lore.kernel.org/r/822b3823-f344-67d6-30f1-16e31cf68eed@scst.dev Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 96ba1398f20c..7fb28c207ee5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3284,7 +3284,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->max_cmd_len, host->max_channel, host->max_lun, host->transportt, sht->vendor_id); - INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn); /* Set up the irqs */ From 95da5e58172cd3c58b82cb01e6cd157b6c5eabe9 Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Tue, 15 Nov 2022 12:38:08 +0300 Subject: [PATCH 2793/4122] scsi: qla2xxx: Initialize vha->unknown_atio_[list, work] for NPIV hosts Initialization of vha->unknown_atio_list and vha->unknown_atio_work only happens for base_vha in qlt_probe_one_stage1(). But there is no initialization for NPIV hosts that are created in qla24xx_vport_create(). This causes a crash when trying to access these NPIV host fields. Fix this by adding initialization to qla_vport_create(). Signed-off-by: Gleb Chesnokov Link: https://lore.kernel.org/r/376c89a2-a9ac-bcf9-bf0f-dfe89a02fd4b@scst.dev Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index bb754a950802..548f22705ddc 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6741,6 +6741,9 @@ qlt_vport_create(struct scsi_qla_host *vha, struct qla_hw_data *ha) mutex_init(&vha->vha_tgt.tgt_mutex); mutex_init(&vha->vha_tgt.tgt_host_action_mutex); + INIT_LIST_HEAD(&vha->unknown_atio_list); + INIT_DELAYED_WORK(&vha->unknown_atio_work, qlt_unknown_atio_work_fn); + qlt_clear_mode(vha); /* From e118df492320176af94deec000ae034cc92be754 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 17 Nov 2022 11:51:00 +0800 Subject: [PATCH 2794/4122] scsi: snic: Fix possible UAF in snic_tgt_create() Smatch reports a warning as follows: drivers/scsi/snic/snic_disc.c:307 snic_tgt_create() warn: '&tgt->list' not removed from list If device_add() fails in snic_tgt_create(), tgt will be freed, but tgt->list will not be removed from snic->disc.tgt_list, then list traversal may cause UAF. Remove from snic->disc.tgt_list before free(). Fixes: c8806b6c9e82 ("snic: driver for Cisco SCSI HBA") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221117035100.2944812-1-cuigaosheng1@huawei.com Acked-by: Narsimhulu Musini Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/snic_disc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c index 9b2b5f8c23b9..8fbf3c1b1311 100644 --- a/drivers/scsi/snic/snic_disc.c +++ b/drivers/scsi/snic/snic_disc.c @@ -304,6 +304,9 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid) ret); put_device(&snic->shost->shost_gendev); + spin_lock_irqsave(snic->shost->host_lock, flags); + list_del(&tgt->list); + spin_unlock_irqrestore(snic->shost->host_lock, flags); kfree(tgt); tgt = NULL; From a500c4cc06cd2830c692b571dd0a1c3585f23150 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Nov 2022 10:36:25 -0800 Subject: [PATCH 2795/4122] scsi: device_handler: alua: Revert "Move a scsi_device_put() call out of alua_check_vpd()" There is a bug in commit 0b25e17e9018 ("scsi: alua: Move a scsi_device_put() call out of alua_check_vpd()"): that patch may cause alua_rtpg_queue() callers to call scsi_device_put() even if that function should not be called. Revert that commit to prepare for a different solution. Cc: Hannes Reinecke Cc: Martin Wilck Cc: Sachin Sant Cc: Benjamin Block Reported-by: Sachin Sant Reported-by: Benjamin Block Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221117183626.2656196-2-bvanassche@acm.org Tested-by: Sachin Sant Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 23 ++++++++-------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 693cd827e138..bd4ee294f5c7 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -324,7 +324,6 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, struct alua_port_group *pg, *old_pg = NULL; bool pg_updated = false; unsigned long flags; - bool put_sdev; group_id = scsi_vpd_tpg_id(sdev, &rel_port); if (group_id < 0) { @@ -374,14 +373,11 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, list_add_rcu(&h->node, &pg->dh_list); spin_unlock_irqrestore(&pg->lock, flags); - put_sdev = alua_rtpg_queue(rcu_dereference_protected(h->pg, + alua_rtpg_queue(rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)), sdev, NULL, true); spin_unlock(&h->pg_lock); - if (put_sdev) - scsi_device_put(sdev); - if (old_pg) kref_put(&old_pg->kref, release_port_group); @@ -982,10 +978,9 @@ queue_rtpg: * RTPG already has been scheduled. * * Returns true if and only if alua_rtpg_work() will be called asynchronously. - * That function is responsible for calling @qdata->fn(). If this function - * returns true, the caller is responsible for invoking scsi_device_put(@sdev). + * That function is responsible for calling @qdata->fn(). */ -static bool __must_check alua_rtpg_queue(struct alua_port_group *pg, +static bool alua_rtpg_queue(struct alua_port_group *pg, struct scsi_device *sdev, struct alua_queue_data *qdata, bool force) { @@ -1024,6 +1019,8 @@ static bool __must_check alua_rtpg_queue(struct alua_port_group *pg, else kref_put(&pg->kref, release_port_group); } + if (sdev) + scsi_device_put(sdev); return true; } @@ -1130,12 +1127,10 @@ static int alua_activate(struct scsi_device *sdev, rcu_read_unlock(); mutex_unlock(&h->init_mutex); - if (alua_rtpg_queue(pg, sdev, qdata, true)) { - scsi_device_put(sdev); + if (alua_rtpg_queue(pg, sdev, qdata, true)) fn = NULL; - } else { + else err = SCSI_DH_DEV_OFFLINED; - } kref_put(&pg->kref, release_port_group); out: if (fn) @@ -1161,9 +1156,7 @@ static void alua_check(struct scsi_device *sdev, bool force) return; } rcu_read_unlock(); - - if (alua_rtpg_queue(pg, sdev, NULL, force)) - scsi_device_put(sdev); + alua_rtpg_queue(pg, sdev, NULL, force); kref_put(&pg->kref, release_port_group); } From 50759b881e1d6879e7cef15c74bdea2e937338c9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Nov 2022 10:36:26 -0800 Subject: [PATCH 2796/4122] scsi: device_handler: alua: Call scsi_device_put() from non-atomic context Since commit f93ed747e2c7 ("scsi: core: Release SCSI devices synchronously"), scsi_device_put() might sleep. Avoid calling it from alua_rtpg_queue() with the pg_lock held. The lock only pretects h->pg, anyway. To avoid the pg being freed under us, because of a race with another thread, take a temporary reference. In alua_rtpg_queue(), verify that the pg still belongs to the sdev being passed before actually queueing the RTPG. This patch fixes the following smatch warning: drivers/scsi/device_handler/scsi_dh_alua.c:1013 alua_rtpg_queue() warn: sleeping in atomic context alua_check_vpd() <- disables preempt -> alua_rtpg_queue() -> scsi_device_put() Cc: Martin Wilck Cc: Hannes Reinecke Cc: Sachin Sant Cc: Benjamin Block Suggested-by: Martin Wilck Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221117183626.2656196-3-bvanassche@acm.org Tested-by: Sachin Sant Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 27 +++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index bd4ee294f5c7..49cc18a87473 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -354,6 +354,8 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, "%s: port group %x rel port %x\n", ALUA_DH_NAME, group_id, rel_port); + kref_get(&pg->kref); + /* Check for existing port group references */ spin_lock(&h->pg_lock); old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); @@ -373,11 +375,11 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, list_add_rcu(&h->node, &pg->dh_list); spin_unlock_irqrestore(&pg->lock, flags); - alua_rtpg_queue(rcu_dereference_protected(h->pg, - lockdep_is_held(&h->pg_lock)), - sdev, NULL, true); spin_unlock(&h->pg_lock); + alua_rtpg_queue(pg, sdev, NULL, true); + kref_put(&pg->kref, release_port_group); + if (old_pg) kref_put(&old_pg->kref, release_port_group); @@ -986,6 +988,9 @@ static bool alua_rtpg_queue(struct alua_port_group *pg, { int start_queue = 0; unsigned long flags; + + might_sleep(); + if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) return false; @@ -996,11 +1001,17 @@ static bool alua_rtpg_queue(struct alua_port_group *pg, force = true; } if (pg->rtpg_sdev == NULL) { - pg->interval = 0; - pg->flags |= ALUA_PG_RUN_RTPG; - kref_get(&pg->kref); - pg->rtpg_sdev = sdev; - start_queue = 1; + struct alua_dh_data *h = sdev->handler_data; + + rcu_read_lock(); + if (h && rcu_dereference(h->pg) == pg) { + pg->interval = 0; + pg->flags |= ALUA_PG_RUN_RTPG; + kref_get(&pg->kref); + pg->rtpg_sdev = sdev; + start_queue = 1; + } + rcu_read_unlock(); } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { pg->flags |= ALUA_PG_RUN_RTPG; /* Do not queue if the worker is already running */ From 541555285339313e831f8e446c03a7994c604d65 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 18 Nov 2022 13:41:36 +0900 Subject: [PATCH 2797/4122] scsi: ufs: ufs-mediatek: Remove unneeded code Remove unnecessary if/goto code. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221118044136.921-1-cw9316.lee@samsung.com Reviewed-by: Stanley Chu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 7309f3f87eac..7d13878dff47 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -441,8 +441,6 @@ static int ufs_mtk_mphy_power_on(struct ufs_hba *hba, bool on) if (ufs_mtk_is_va09_supported(hba)) { ufs_mtk_va09_pwr_ctrl(res, 0); ret = regulator_disable(host->reg_va09); - if (ret < 0) - goto out; } } out: From 96a2dfa1df4b9df0cfa2e807153b4d254db2fa82 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 18 Nov 2022 13:52:42 +0900 Subject: [PATCH 2798/4122] scsi: ufs: ufs-mediatek: Modify the return value Be consistent with the rest of driver wrt. functions returning bool. 91: return !!(host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE); 98: return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL); 105: return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC); Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221118045242.2770-1-cw9316.lee@samsung.com Reviewed-by: Stanley Chu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 7d13878dff47..ef5816d82326 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -109,7 +109,7 @@ static bool ufs_mtk_is_pmc_via_fastauto(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO); + return !!(host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO); } static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) From 7e613be7c63d2b9041b38d51fc324b8ad67d31e3 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 18 Nov 2022 16:37:10 +0800 Subject: [PATCH 2799/4122] scsi: Revert "scsi: hisi_sas: Drain bcast events in hisi_sas_rescan_topology()" This reverts commit 11ff0c98fca35df16c84d4eee52008faecaf10a6. Draining or flushing events in hisi_sas_rescan_topology() can hang the driver, typically with phy up or phy down events being processed, i.e. sas_porte_bytes_dmaed() or sas_phye_loss_of_signal(). Signed-off-by: Jie Zhan Link: https://lore.kernel.org/r/20221118083714.4034612-2-zhanjie9@hisilicon.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 54860d252466..4527ac266bb6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1323,7 +1323,6 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) { - struct sas_ha_struct *sas_ha = &hisi_hba->sha; struct asd_sas_port *_sas_port = NULL; int phy_no; @@ -1352,12 +1351,6 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) hisi_sas_phy_down(hisi_hba, phy_no, 0, GFP_KERNEL); } } - /* - * Ensure any bcast events are processed prior to calling async nexus - * reset calls from hisi_sas_clear_nexus_ha() -> - * hisi_sas_async_I_T_nexus_reset() - */ - sas_drain_work(sas_ha); } static void hisi_sas_reset_init_all_devices(struct hisi_hba *hisi_hba) From 94a3555d1f0f51cf029a8668624e1cd40628880f Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 18 Nov 2022 16:37:11 +0800 Subject: [PATCH 2800/4122] scsi: Revert "scsi: hisi_sas: Don't send bcast events from HW during nexus HA reset" This reverts commit f5f2a2716055ad8c0c4ff83e51d667646c6c5d8a. This is now unnecessary to solve the SATA devices missing issue in hisi_sas_clear_nexus_ha(). Hence, we should not ignore bcast events during sas_eh_handle_sas_errors() in case of missing bcast events, unless a justified need is found and a mechanism to defer (but not ignore) bcast events in sas_eh_handle_sas_errors() is provided. Also, in hisi_sas_clear_nexus_ha(), there is nothing further to handle in "out: " other than return, so that part can be reverted. Signed-off-by: Jie Zhan Link: https://lore.kernel.org/r/20221118083714.4034612-3-zhanjie9@hisilicon.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 4527ac266bb6..62080d0fad6f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1812,14 +1812,12 @@ static int hisi_sas_clear_nexus_ha(struct sas_ha_struct *sas_ha) struct hisi_hba *hisi_hba = sas_ha->lldd_ha; HISI_SAS_DECLARE_RST_WORK_ON_STACK(r); ASYNC_DOMAIN_EXCLUSIVE(async); - int i, ret; + int i; queue_work(hisi_hba->wq, &r.work); wait_for_completion(r.completion); - if (!r.done) { - ret = TMF_RESP_FUNC_FAILED; - goto out; - } + if (!r.done) + return TMF_RESP_FUNC_FAILED; for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) { struct hisi_sas_device *sas_dev = &hisi_hba->devices[i]; @@ -1836,9 +1834,7 @@ static int hisi_sas_clear_nexus_ha(struct sas_ha_struct *sas_ha) async_synchronize_full_domain(&async); hisi_sas_release_tasks(hisi_hba); - ret = TMF_RESP_FUNC_COMPLETE; -out: - return ret; + return TMF_RESP_FUNC_COMPLETE; } static int hisi_sas_query_task(struct sas_task *task) @@ -1986,14 +1982,10 @@ void hisi_sas_phy_bcast(struct hisi_sas_phy *phy) { struct asd_sas_phy *sas_phy = &phy->sas_phy; struct hisi_hba *hisi_hba = phy->hisi_hba; - struct sas_ha_struct *sha = &hisi_hba->sha; if (test_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags)) return; - if (test_bit(SAS_HA_FROZEN, &sha->state)) - return; - sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(hisi_sas_phy_bcast); From 9181ce3cb5d96f0ee28246a857ca651830fa3746 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 18 Nov 2022 16:37:12 +0800 Subject: [PATCH 2801/4122] scsi: libsas: Add smp_ata_check_ready_type() Create function smp_ata_check_ready_type() for LLDDs to wait for SATA devices to come up after a link reset. Signed-off-by: Jie Zhan Link: https://lore.kernel.org/r/20221118083714.4034612-4-zhanjie9@hisilicon.com Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 25 +++++++++++++++++++++++++ drivers/scsi/libsas/sas_expander.c | 4 ++-- drivers/scsi/libsas/sas_internal.h | 2 ++ include/scsi/sas_ata.h | 6 ++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 78e6046fb55a..4b65cd79150f 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -287,6 +287,31 @@ static int sas_ata_clear_pending(struct domain_device *dev, struct ex_phy *phy) return 1; } +int smp_ata_check_ready_type(struct ata_link *link) +{ + struct domain_device *dev = link->ap->private_data; + struct sas_phy *phy = sas_get_local_phy(dev); + struct domain_device *ex_dev = dev->parent; + enum sas_device_type type = SAS_PHY_UNUSED; + u8 sas_addr[SAS_ADDR_SIZE]; + int res; + + res = sas_get_phy_attached_dev(ex_dev, phy->number, sas_addr, &type); + sas_put_local_phy(phy); + if (res) + return res; + + switch (type) { + case SAS_SATA_PENDING: + return 0; + case SAS_END_DEVICE: + return 1; + default: + return -ENODEV; + } +} +EXPORT_SYMBOL_GPL(smp_ata_check_ready_type); + static int smp_ata_check_ready(struct ata_link *link) { int res; diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 2907ca5d0ed4..a04cad620e93 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1688,8 +1688,8 @@ static int sas_get_phy_change_count(struct domain_device *dev, return res; } -static int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, - u8 *sas_addr, enum sas_device_type *type) +int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, + u8 *sas_addr, enum sas_device_type *type) { int res; struct smp_disc_resp *disc_resp; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 6cf190ade35e..6f593fa69b58 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -88,6 +88,8 @@ struct domain_device *sas_ex_to_ata(struct domain_device *ex_dev, int phy_id); int sas_ex_phy_discover(struct domain_device *dev, int single); int sas_get_report_phy_sata(struct domain_device *dev, int phy_id, struct smp_rps_resp *rps_resp); +int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, + u8 *sas_addr, enum sas_device_type *type); int sas_try_ata_reset(struct asd_sas_phy *phy); void sas_hae_reset(struct work_struct *work); diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index e47f0aec0722..e7d466df8157 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -36,6 +36,7 @@ void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline); +int smp_ata_check_ready_type(struct ata_link *link); #else @@ -104,6 +105,11 @@ static inline int sas_ata_wait_after_reset(struct domain_device *dev, { return -ETIMEDOUT; } + +static inline int smp_ata_check_ready_type(struct ata_link *link) +{ + return 0; +} #endif #endif /* _SAS_ATA_H_ */ From 3c2673a09cf1181318c07b7dbc1bc532ba3d33e3 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 18 Nov 2022 16:37:13 +0800 Subject: [PATCH 2802/4122] scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset SATA devices on an expander may be removed and not be found again when I_T nexus reset and revalidation are processed simultaneously. The issue comes from: - Revalidation can remove SATA devices in link reset, e.g. in hisi_sas_clear_nexus_ha(). - However, hisi_sas_debug_I_T_nexus_reset() polls the state of a SATA device on an expander after sending link_reset, where it calls: hisi_sas_debug_I_T_nexus_reset sas_ata_wait_after_reset ata_wait_after_reset ata_wait_ready smp_ata_check_ready sas_ex_phy_discover sas_ex_phy_discover_helper sas_set_ex_phy The ex_phy's change count is updated in sas_set_ex_phy(), so SATA devices after a link reset may not be found later through revalidation. A similar issue was reported in: commit 0f3fce5cc77e ("[SCSI] libsas: fix ata_eh clobbering ex_phys via smp_ata_check_ready") commit 87c8331fcf72 ("[SCSI] libsas: prevent domain rediscovery competing with ata error handling"). To address this issue, in hisi_sas_debug_I_T_nexus_reset(), we now call smp_ata_check_ready_type() that only polls the device type while not updating the ex_phy's data of libsas. Fixes: 71453bd9d1bf ("scsi: hisi_sas: Use sas_ata_wait_after_reset() in IT nexus reset") Signed-off-by: Jie Zhan Link: https://lore.kernel.org/r/20221118083714.4034612-5-zhanjie9@hisilicon.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 62080d0fad6f..41ba22f6c7f0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1694,13 +1694,15 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device) return rc; } + /* Remote phy */ if (rc) return rc; - /* Remote phy */ if (dev_is_sata(device)) { - rc = sas_ata_wait_after_reset(device, - HISI_SAS_WAIT_PHYUP_TIMEOUT); + struct ata_link *link = &device->sata_dev.ap->link; + + rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + smp_ata_check_ready_type); } else { msleep(2000); } From 4d450cf2b00d34b53f52d93216dd23af57bdca73 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 18 Nov 2022 16:37:14 +0800 Subject: [PATCH 2803/4122] scsi: libsas: Do not export sas_ata_wait_after_reset() sas_ata_wait_after_reset() does not need to be exported since it is no longer referenced outside libsas. Signed-off-by: Jie Zhan Link: https://lore.kernel.org/r/20221118083714.4034612-6-zhanjie9@hisilicon.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 3 +-- include/scsi/sas_ata.h | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 4b65cd79150f..f7439bf9cdc6 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -383,7 +383,7 @@ static int sas_ata_printk(const char *level, const struct domain_device *ddev, return r; } -int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline) +static int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline) { struct sata_device *sata_dev = &dev->sata_dev; int (*check_ready)(struct ata_link *link); @@ -405,7 +405,6 @@ int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline) return ret; } -EXPORT_SYMBOL_GPL(sas_ata_wait_after_reset); static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class, unsigned long deadline) diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index e7d466df8157..9c927d46f136 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -35,7 +35,6 @@ void sas_ata_end_eh(struct ata_port *ap); void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); -int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline); int smp_ata_check_ready_type(struct ata_link *link); #else @@ -100,12 +99,6 @@ static inline int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, return 0; } -static inline int sas_ata_wait_after_reset(struct domain_device *dev, - unsigned long deadline) -{ - return -ETIMEDOUT; -} - static inline int smp_ata_check_ready_type(struct ata_link *link) { return 0; From ee8c88cab4afbd5ee10a127d6cbecd6b200185a5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2022 15:37:03 -0800 Subject: [PATCH 2804/4122] scsi: ufs: core: Fix the polling implementation Fix the following issues in ufshcd_poll(): - If polling succeeds, return a positive value. - Do not complete polling requests from interrupt context because the block layer expects these requests to be completed from thread context. From block/bio.c: If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process context, not hard/soft IRQ. Fixes: eaab9b573054 ("scsi: ufs: Implement polling support") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221118233717.441298-1-bvanassche@acm.org Reviewed-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0e8850869b21..2dbe24977822 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5344,6 +5344,26 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, } } +/* Any value that is not an existing queue number is fine for this constant. */ +enum { + UFSHCD_POLL_FROM_INTERRUPT_CONTEXT = -1 +}; + +static void ufshcd_clear_polled(struct ufs_hba *hba, + unsigned long *completed_reqs) +{ + int tag; + + for_each_set_bit(tag, completed_reqs, hba->nutrs) { + struct scsi_cmnd *cmd = hba->lrb[tag].cmd; + + if (!cmd) + continue; + if (scsi_cmd_to_rq(cmd)->cmd_flags & REQ_POLLED) + __clear_bit(tag, completed_reqs); + } +} + /* * Returns > 0 if one or more commands have been completed or 0 if no * requests have been completed. @@ -5360,13 +5380,17 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, "completed: %#lx; outstanding: %#lx\n", completed_reqs, hba->outstanding_reqs); + if (queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT) { + /* Do not complete polled requests from interrupt context. */ + ufshcd_clear_polled(hba, &completed_reqs); + } hba->outstanding_reqs &= ~completed_reqs; spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (completed_reqs) __ufshcd_transfer_req_compl(hba, completed_reqs); - return completed_reqs; + return completed_reqs != 0; } /** @@ -5397,7 +5421,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we * do not want polling to trigger spurious interrupt complaints. */ - ufshcd_poll(hba->host, 0); + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); return IRQ_HANDLED; } From d29c32efebf3f10b25e9f88ac75c962e7259412d Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Mon, 21 Nov 2022 09:33:38 +0900 Subject: [PATCH 2805/4122] scsi: ufs: ufs-mediatek: Remove unnecessary return code Modify to remove unnecessary 'return 0' code. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20221121003338.11034-1-cw9316.lee@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index ef5816d82326..21d9b047539f 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1095,7 +1095,7 @@ static void ufs_mtk_setup_clk_gating(struct ufs_hba *hba) } } -static int ufs_mtk_post_link(struct ufs_hba *hba) +static void ufs_mtk_post_link(struct ufs_hba *hba) { /* enable unipro clock gating feature */ ufs_mtk_cfg_unipro_cg(hba, true); @@ -1106,8 +1106,6 @@ static int ufs_mtk_post_link(struct ufs_hba *hba) FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, 3); ufs_mtk_setup_clk_gating(hba); - - return 0; } static int ufs_mtk_link_startup_notify(struct ufs_hba *hba, @@ -1120,7 +1118,7 @@ static int ufs_mtk_link_startup_notify(struct ufs_hba *hba, ret = ufs_mtk_pre_link(hba); break; case POST_CHANGE: - ret = ufs_mtk_post_link(hba); + ufs_mtk_post_link(hba); break; default: ret = -EINVAL; @@ -1272,9 +1270,8 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, struct arm_smccc_res res; if (status == PRE_CHANGE) { - if (!ufshcd_is_auto_hibern8_supported(hba)) - return 0; - ufs_mtk_auto_hibern8_disable(hba); + if (ufshcd_is_auto_hibern8_supported(hba)) + ufs_mtk_auto_hibern8_disable(hba); return 0; } From 35bd6f9fd33b8beb043aea0db51b726ca6edfd87 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Nov 2022 12:17:24 +0000 Subject: [PATCH 2806/4122] scsi: core: Use SCSI_SCAN_RESCAN in __scsi_add_device() Instead of using hardcoded '1' as the __scsi_add_device() -> scsi_probe_and_add_lun() rescan arg, use proper macro SCSI_SCAN_RESCAN. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20221121121725.1910795-2-john.g.garry@oracle.com Reviewed-by: Damien Le Moal Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 5d27f5196de6..6cc974b382c1 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1580,7 +1580,8 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel, scsi_complete_async_scans(); if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) { - scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata); + scsi_probe_and_add_lun(starget, lun, NULL, &sdev, + SCSI_SCAN_RESCAN, hostdata); scsi_autopm_put_host(shost); } mutex_unlock(&shost->scan_mutex); From 425b27a04dd8b2f5abaf8023166071b8342dc079 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Nov 2022 12:17:25 +0000 Subject: [PATCH 2807/4122] scsi: core: Use SCSI_SCAN_INITIAL in do_scsi_scan_host() Instead of using hardcoded '0' as the do_scsi_scan_host() -> scsi_scan_host_selected() rescan arg, use proper macro SCSI_SCAN_INITIAL. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20221121121725.1910795-3-john.g.garry@oracle.com Reviewed-by: Damien Le Moal Reviewed-by: Jason Yan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 6cc974b382c1..920b145f80b7 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1920,7 +1920,7 @@ static void do_scsi_scan_host(struct Scsi_Host *shost) msleep(10); } else { scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD, - SCAN_WILD_CARD, 0); + SCAN_WILD_CARD, SCSI_SCAN_INITIAL); } } From 76dc609556c699676776d53222d342276afd0442 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Fri, 25 Nov 2022 10:07:03 +0800 Subject: [PATCH 2808/4122] scsi: megaraid_sas: Fix some spelling mistakes in comment Fix typos in comment. Reviewed-by: Randy Dunlap Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20221125020703.22216-1-yuzhe@nfschina.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 6650f8c8e9b0..fe70f8f11435 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -80,7 +80,7 @@ static void megasas_fusion_crash_dump(struct megasas_instance *instance); * @ocr_context: If called from OCR context this will * be set to 1, else 0 * - * This function initates a chip reset followed by a wait for controller to + * This function initiates a chip reset followed by a wait for controller to * transition to ready state. * During this, driver will block all access to PCI config space from userspace */ @@ -334,7 +334,7 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance, * * This function is only for fusion controllers. * Update host can queue, if firmware downgrade max supported firmware commands. - * Firmware upgrade case will be skiped because underlying firmware has + * Firmware upgrade case will be skipped because underlying firmware has * more resource than exposed to the OS. * */ @@ -2588,7 +2588,7 @@ static void megasas_stream_detect(struct megasas_instance *instance, if ((io_info->ldStartBlock != current_sd->next_seq_lba) && ((!io_info->isRead) || (!is_read_ahead))) /* - * Once the API availible we need to change this. + * Once the API is available we need to change this. * At this point we are not allowing any gap */ continue; @@ -4650,7 +4650,7 @@ megasas_issue_tm(struct megasas_instance *instance, u16 device_handle, } /* - * megasas_fusion_smid_lookup : Look for fusion command correpspodning to SCSI + * megasas_fusion_smid_lookup : Look for fusion command corresponding to SCSI * @instance: per adapter struct * * Return Non Zero index, if SMID found in outstanding commands From 4e80eef45ad775a54fb06a66bf8267a154781ce5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 Nov 2022 02:23:04 +0300 Subject: [PATCH 2809/4122] scsi: sg: Fix get_user() in call sg_scsi_ioctl() get_user() expects the pointer to be pointer-to-simple-variable type, but sic->data is array of 'unsigned char'. It violates get_user() contracts. Explicitly take pointer to the first element of the array. It matches current behaviour. This is preparation for fixing sparse warnings caused by Linear Address Masking patchset. Signed-off-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20221117232304.1544-1-kirill.shutemov@linux.intel.com Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index 2d20da55fb64..fdd47565a311 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -519,7 +519,7 @@ static int sg_scsi_ioctl(struct request_queue *q, fmode_t mode, return -EFAULT; if (in_len > PAGE_SIZE || out_len > PAGE_SIZE) return -EINVAL; - if (get_user(opcode, sic->data)) + if (get_user(opcode, &sic->data[0])) return -EFAULT; bytes = max(in_len, out_len); From fd5ac974fc25feed084c2d1599d0dddb4e0556bc Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 23 Nov 2022 14:36:09 +0100 Subject: [PATCH 2810/4122] counter: stm32-lptimer-cnt: fix the check on arr and cmp registers update The ARR (auto reload register) and CMP (compare) registers are successively written. The status bits to check the update of these registers are polled together with regmap_read_poll_timeout(). The condition to end the loop may become true, even if one of the register isn't correctly updated. So ensure both status bits are set before clearing them. Fixes: d8958824cf07 ("iio: counter: Add support for STM32 LPTimer") Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20221123133609.465614-1-fabrice.gasnier@foss.st.com/ Signed-off-by: William Breathitt Gray --- drivers/counter/stm32-lptimer-cnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c index d6b80b6dfc28..8439755559b2 100644 --- a/drivers/counter/stm32-lptimer-cnt.c +++ b/drivers/counter/stm32-lptimer-cnt.c @@ -69,7 +69,7 @@ static int stm32_lptim_set_enable_state(struct stm32_lptim_cnt *priv, /* ensure CMP & ARR registers are properly written */ ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val, - (val & STM32_LPTIM_CMPOK_ARROK), + (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK, 100, 1000); if (ret) return ret; From c4d33381b134da188ccd1084aef21e2b8c3c422e Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 24 Nov 2022 03:19:32 +0000 Subject: [PATCH 2811/4122] power: supply: ab8500: Fix error handling in ab8500_charger_init() The ab8500_charger_init() returns the platform_driver_register() directly without checking its return value, if platform_driver_register() failed, all ab8500_charger_component_drivers are not unregistered. Fix by unregister ab8500_charger_component_drivers when platform_driver_register() failed. Fixes: 1c1f13a006ed ("power: supply: ab8500: Move to componentized binding") Signed-off-by: Yuan Can Signed-off-by: Sebastian Reichel --- drivers/power/supply/ab8500_charger.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c index cf45a2c26b7f..308e68545d44 100644 --- a/drivers/power/supply/ab8500_charger.c +++ b/drivers/power/supply/ab8500_charger.c @@ -3719,7 +3719,14 @@ static int __init ab8500_charger_init(void) if (ret) return ret; - return platform_driver_register(&ab8500_charger_driver); + ret = platform_driver_register(&ab8500_charger_driver); + if (ret) { + platform_unregister_drivers(ab8500_charger_component_drivers, + ARRAY_SIZE(ab8500_charger_component_drivers)); + return ret; + } + + return 0; } static void __exit ab8500_charger_exit(void) From 248043299bf61134fb675d16963e11f49e79b05b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Nov 2022 12:12:49 +0200 Subject: [PATCH 2812/4122] modpost: Mark uuid_le type to be suitable only for MEI The uuid_le type is used only in MEI ABI, do not advertise it for others. While at it, comment out that UUID types are not to be used in a new code. Signed-off-by: Andy Shevchenko --- scripts/mod/file2alias.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 7df23905fdf1..91c2e7ba5e52 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -34,19 +34,23 @@ typedef Elf64_Addr kernel_ulong_t; typedef uint32_t __u32; typedef uint16_t __u16; typedef unsigned char __u8; + +/* UUID types for backward compatibility, don't use in new code */ typedef struct { __u8 b[16]; } guid_t; -/* backwards compatibility, don't use in new code */ -typedef struct { - __u8 b[16]; -} uuid_le; typedef struct { __u8 b[16]; } uuid_t; + #define UUID_STRING_LEN 36 +/* MEI UUID type, don't use anywhere else */ +typedef struct { + __u8 b[16]; +} uuid_le; + /* Big exception to the "don't include kernel headers into userspace, which * even potentially has different endianness and word sizes, since * we handle those differences explicitly below */ From 571650b3a30f67d70df242508631ed5e7de2c2df Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 27 Nov 2022 19:02:24 +0100 Subject: [PATCH 2813/4122] power: supply: bq25890: Only use pdata->regulator_init_data for vbus bq25890_platform_data.regulator_init_data is intended to only provide regulator init_data for the vbus regulator. Remove this from the regulator_config before registering the vsys regulator. Otherwise the regulator_register() call for vsys will fail because it tries to register duplicate consumer_dev_name + supply names from init_data->consumer_supplies[], leading to the entire probe of the bq25890 driver failing: [ 32.017501] bq25890-charger i2c-bq25892_main: Failed to set supply vbus [ 32.017525] bq25890-charger i2c-bq25892_main: error -EBUSY: registering vsys regulator [ 32.124978] bq25890-charger: probe of i2c-bq25892_main failed with error -16 Fixes: 14a3d159abf8 ("power: supply: bq25890: Add Vsys regulator") Signed-off-by: Hans de Goede Reviewed-by: Marek Vasut Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index bfdd2213ba69..512c81662eea 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1161,6 +1161,8 @@ static int bq25890_register_regulator(struct bq25890_device *bq) "registering vbus regulator"); } + /* pdata->regulator_init_data is for vbus only */ + cfg.init_data = NULL; reg = devm_regulator_register(bq->dev, &bq25890_vsys_desc, &cfg); if (IS_ERR(reg)) { return dev_err_probe(bq->dev, PTR_ERR(reg), From e88906b169ebcb8046e8f0ad76edd09ab41cfdfe Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Sun, 27 Nov 2022 18:08:11 +0100 Subject: [PATCH 2814/4122] USB: serial: cp210x: add Kamstrup RF sniffer PIDs The RF sniffers are based on cp210x where the RF frontends are based on a different USB stack. RF sniffers can analyze packets meta data including power level and perform packet injection. Can be used to perform RF frontend self-test when connected to a concentrator, ex. arch/arm/boot/dts/imx7d-flex-concentrator.dts Signed-off-by: Bruno Thomsen Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 3bcec419f463..f6fb23620e87 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -195,6 +195,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ + { USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */ + { USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */ { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ From 09f530f0c6d6689eee5e690c6d98f495fcc3a0f9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 23 Nov 2022 20:27:14 -0400 Subject: [PATCH 2815/4122] RDMA: Add netdevice_tracker to ib_device_set_netdev() This will cause an informative backtrace to print if the user of ib_device_set_netdev() isn't careful about tearing down the ibdevice before its the netdevice parent is destroyed. Such as like this: unregister_netdevice: waiting for vlan0 to become free. Usage count = 2 leaked reference. ib_device_set_netdev+0x266/0x730 siw_newlink+0x4e0/0xfd0 nldev_newlink+0x35c/0x5c0 rdma_nl_rcv_msg+0x36d/0x690 rdma_nl_rcv+0x2ee/0x430 netlink_unicast+0x543/0x7f0 netlink_sendmsg+0x918/0xe20 sock_sendmsg+0xcf/0x120 ____sys_sendmsg+0x70d/0x8b0 ___sys_sendmsg+0x11d/0x1b0 __sys_sendmsg+0xfa/0x1d0 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd This will help debug the issues syzkaller is seeing. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-a7c81b3842ce+e5-netdev_tracker_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 6 ++++-- include/rdma/ib_verbs.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3409c55ea88b..ff35cebb25e2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } + if (old_ndev) + netdev_tracker_free(ndev, &pdata->netdev_tracker); if (ndev) - dev_hold(ndev); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); rcu_assign_pointer(pdata->netdev, ndev); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); if (old_ndev) - dev_put(old_ndev); + __dev_put(old_ndev); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1f4d53a4bb6..77dd9148815b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2203,6 +2203,7 @@ struct ib_port_data { struct ib_port_cache cache; struct net_device __rcu *netdev; + netdevice_tracker netdev_tracker; struct hlist_node ndev_hash_link; struct rdma_port_counter port_counter; struct ib_port *sysfs; From ea5ef136e215fdef35f14010bc51fcd6686e6922 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 26 Nov 2022 04:34:10 +0000 Subject: [PATCH 2816/4122] RDMA/nldev: Add checks for nla_nest_start() in fill_stat_counter_qps() As the nla_nest_start() may fail with NULL returned, the return value needs to be checked. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221126043410.85632-1-yuancan@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2be76a3fdd87..ca0ed7d14326 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg, int ret = 0; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); + if (!table_attr) + return -EMSGSIZE; rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); From 86815735aa571d493cf5768cad5fa8e6fd9c7ba8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 28 Nov 2022 19:26:29 +0530 Subject: [PATCH 2817/4122] KVM: arm64: PMU: Replace version number '0' with ID_AA64DFR0_EL1_PMUVer_NI kvm_host_pmu_init() returns when detected PMU is either not implemented, or implementation defined. kvm_pmu_probe_armpmu() also has a similar situation. Extracted ID_AA64DFR0_EL1_PMUVer value, when PMU is not implemented is '0', which can be replaced with ID_AA64DFR0_EL1_PMUVer_NI defined as '0b0000'. Cc: Arnaldo Carvalho de Melo Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128135629.118346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu-emul.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 3295dea34f4c..bb7251e670a9 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -668,7 +668,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || + pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -721,7 +722,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == 0 || + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) pmu = NULL; } From 292e8f1494764ac46dd1b7dd46fa317db691436c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:40:02 +0000 Subject: [PATCH 2818/4122] KVM: arm64: PMU: Simplify PMCR_EL0 reset handling Resetting PMCR_EL0 is a pretty involved process that includes poisoning some of the writable bits, just because we can. It makes it hard to reason about about what gets configured, and just resetting things to 0 seems like a much saner option. Reduce reset_pmcr() to just preserving PMCR_EL0.N from the host, and setting PMCR_EL0.LC if we don't support AArch32. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 67eac0f747be..eb56ad031116 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -639,24 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 pmcr, val; + u64 pmcr; /* No PMU available, PMCR_EL0 may UNDEF... */ if (!kvm_arm_support_pmu_v3()) return; - pmcr = read_sysreg(pmcr_el0); - /* - * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN - * except PMCR.E resetting to zero. - */ - val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) - | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; if (!kvm_supports_32bit_el0()) - val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, r->reg) = val; + pmcr |= ARMV8_PMU_PMCR_LC; + + __vcpu_sys_reg(vcpu, r->reg) = pmcr; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) From 64d6820d64c0a206e744bd8945374d563a76c16c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:44:59 +0000 Subject: [PATCH 2819/4122] KVM: arm64: PMU: Sanitise PMCR_EL0.LP on first vcpu run Userspace can play some dirty tricks on us by selecting a given PMU version (such as PMUv3p5), restore a PMCR_EL0 value that has PMCR_EL0.LP set, and then switch the PMU version to PMUv3p1, for example. In this situation, we end-up with PMCR_EL0.LP being set and spreading havoc in the PMU emulation. This is specially hard as the first two step can be done on one vcpu and the third step on another, meaning that we need to sanitise *all* vcpus when the PMU version is changed. In orer to avoid a pretty complicated locking situation, defer the sanitisation of PMCR_EL0 to the point where the vcpu is actually run for the first tine, using the existing KVM_REQ_RELOAD_PMU request that calls into kvm_pmu_handle_pmcr(). There is still an obscure corner case where userspace could do the above trick, and then save the VM without running it. They would then observe an inconsistent state (PMUv3.1 + LP set), but that state will be fixed on the first run anyway whenever the guest gets restored on a host. Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 6 ++++++ arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index bb7251e670a9..d8ea39943086 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -538,6 +538,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; + + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index eb56ad031116..528d253c571a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -693,15 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) { - /* Only update writeable bits of PMCR */ + /* + * Only update writeable bits of PMCR (continuing into + * kvm_pmu_handle_pmcr() as well) + */ val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { From 923d011febb4e2fb338036bb0ee6a0a7f9b10da1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 27 Nov 2022 22:52:10 +0100 Subject: [PATCH 2820/4122] gpio: Do not include when not really needed. is included only for using container_of(). Include instead, it is much lighter. Signed-off-by: Christophe JAILLET Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index a5166eb93437..6db627257a7b 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -34,7 +34,7 @@ enum of_gpio_flags { #ifdef CONFIG_OF_GPIO -#include +#include /* * OF GPIO chip for memory mapped banks From 4ef339bc053a62dac9017f80f7bb8cff0412bd29 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Nov 2022 16:17:28 +0200 Subject: [PATCH 2821/4122] gpiolib: Unify access to the device properties Some of the functions are using struct fwnode_handle, some struct device pointer. In the GPIO library the firmware node of the GPIO device is the same as GPIO node of the GPIO chip. Due to this fact we may use former to access properties everywhere in the code. Signed-off-by: Andy Shevchenko Reviewed-by: Brian Masney Tested-by: Marijn Suijten [Bartosz: stick to the 80-char limit where it's not hurting readability] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 51afdc6ac919..2729f7ebab9d 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -367,12 +367,12 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc) static int devprop_gpiochip_set_names(struct gpio_chip *chip) { struct gpio_device *gdev = chip->gpiodev; - const struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); + struct device *dev = &gdev->dev; const char **names; int ret, i; int count; - count = fwnode_property_string_array_count(fwnode, "gpio-line-names"); + count = device_property_string_array_count(dev, "gpio-line-names"); if (count < 0) return 0; @@ -385,7 +385,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip) * gpiochips. */ if (count <= chip->offset) { - dev_warn(&gdev->dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n", + dev_warn(dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n", count, chip->offset); return 0; } @@ -394,10 +394,10 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return -ENOMEM; - ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", + ret = device_property_read_string_array(dev, "gpio-line-names", names, count); if (ret < 0) { - dev_warn(&gdev->dev, "failed to read GPIO line names\n"); + dev_warn(dev, "failed to read GPIO line names\n"); kfree(names); return ret; } @@ -448,10 +448,11 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc) static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc) { + struct device *dev = &gc->gpiodev->dev; int size; /* Format is "start, count, ..." */ - size = fwnode_property_count_u32(gc->fwnode, "gpio-reserved-ranges"); + size = device_property_count_u32(dev, "gpio-reserved-ranges"); if (size > 0 && size % 2 == 0) return size; @@ -472,6 +473,7 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc) static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc) { + struct device *dev = &gc->gpiodev->dev; unsigned int size; u32 *ranges; int ret; @@ -484,7 +486,8 @@ static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc) if (!ranges) return -ENOMEM; - ret = fwnode_property_read_u32_array(gc->fwnode, "gpio-reserved-ranges", ranges, size); + ret = device_property_read_u32_array(dev, "gpio-reserved-ranges", + ranges, size); if (ret) { kfree(ranges); return ret; From 20ee8c223f792947378196307d8e707c9cdc2d61 Mon Sep 17 00:00:00 2001 From: Yang Shen Date: Tue, 22 Nov 2022 17:03:55 +0800 Subject: [PATCH 2822/4122] coresight: trbe: remove cpuhp instance node before remove cpuhp state cpuhp_state_add_instance() and cpuhp_state_remove_instance() should be used in pairs. Or there will lead to the warn on cpuhp_remove_multi_state() since the cpuhp_step list is not empty. The following is the error log with 'rmmod coresight-trbe': Error: Removing state 215 which has instances left. Call trace: __cpuhp_remove_state_cpuslocked+0x144/0x160 __cpuhp_remove_state+0xac/0x100 arm_trbe_device_remove+0x2c/0x60 [coresight_trbe] platform_remove+0x34/0x70 device_remove+0x54/0x90 device_release_driver_internal+0x1e4/0x250 driver_detach+0x5c/0xb0 bus_remove_driver+0x64/0xc0 driver_unregister+0x3c/0x70 platform_driver_unregister+0x20/0x30 arm_trbe_exit+0x1c/0x658 [coresight_trbe] __arm64_sys_delete_module+0x1ac/0x24c invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0x58/0x1a0 do_el0_svc+0x38/0xd0 el0_svc+0x2c/0xc0 el0t_64_sync_handler+0x1ac/0x1b0 el0t_64_sync+0x19c/0x1a0 ---[ end trace 0000000000000000 ]--- Fixes: 3fbf7f011f24 ("coresight: sink: Add TRBE driver") Reviewed-by: Anshuman Khandual Signed-off-by: Yang Shen Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20221122090355.23533-1-shenyang39@huawei.com --- drivers/hwtracing/coresight/coresight-trbe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 2b386bb848f8..1fc4fd79a1c6 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -1434,6 +1434,7 @@ static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata) static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata) { + cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node); cpuhp_remove_multi_state(drvdata->trbe_online); } From 3dc228b35387803d9c43ed1b098aabb1d3ae9c7d Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Wed, 23 Nov 2022 19:38:18 +0000 Subject: [PATCH 2823/4122] coresight: cti: Fix null pointer error on CTI init before ETM When CTI is discovered first then the function coresight_set_assoc_ectdev_mutex() is called to set the association between CTI and ETM device. Recent lockdep fix passes a null pointer. This patch passes the correct pointer. Before patch: log of boot oops sequence with CTI discovered first: [ 12.424091] cs_system_cfg: CoreSight Configuration manager initialised [ 12.483474] coresight cti_sys0: CTI initialized [ 12.488109] coresight cti_sys1: CTI initialized [ 12.503594] coresight cti_cpu0: CTI initialized [ 12.517877] coresight-cpu-debug 850000.debug: Coresight debug-CPU0 initialized [ 12.523479] coresight-cpu-debug 852000.debug: Coresight debug-CPU1 initialized [ 12.529926] coresight-cpu-debug 854000.debug: Coresight debug-CPU2 initialized [ 12.541808] coresight stm0: STM32 initialized [ 12.544421] coresight-cpu-debug 856000.debug: Coresight debug-CPU3 initialized [ 12.585639] coresight cti_cpu1: CTI initialized [ 12.614028] coresight cti_cpu2: CTI initialized [ 12.631679] CSCFG registered etm0 [ 12.633920] coresight etm0: CPU0: etm v4.0 initialized [ 12.656392] coresight cti_cpu3: CTI initialized ... [ 12.708383] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000348 ... [ 12.755094] Internal error: Oops: 0000000096000044 [#1] SMP [ 12.761817] Modules linked in: coresight_etm4x(+) coresight_tmc coresight_cpu_debug coresight_replicator coresight_funnel coresight_cti coresight_tpiu coresight_stm coresight [ 12.767210] CPU: 3 PID: 1346 Comm: systemd-udevd Not tainted 6.1.0-rc3tid-v6tid-v6-235166-gf7f7d7a2204a-dirty #498 [ 12.782827] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 12.793154] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 12.800010] pc : coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight] [ 12.806694] lr : coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight] ... [ 12.885064] Call trace: [ 12.892352] coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight] [ 12.894693] cti_add_assoc_to_csdev+0x144/0x1b0 [coresight_cti] [ 12.900943] coresight_register+0x2c8/0x320 [coresight] [ 12.906844] etm4_add_coresight_dev.isra.27+0x148/0x280 [coresight_etm4x] [ 12.912056] etm4_probe+0x144/0x1c0 [coresight_etm4x] [ 12.918998] etm4_probe_amba+0x40/0x78 [coresight_etm4x] [ 12.924032] amba_probe+0x11c/0x1f0 After patch: similar log [ 12.444467] cs_system_cfg: CoreSight Configuration manager initialised [ 12.456329] coresight-cpu-debug 850000.debug: Coresight debug-CPU0 initialized [ 12.456754] coresight-cpu-debug 852000.debug: Coresight debug-CPU1 initialized [ 12.469672] coresight-cpu-debug 854000.debug: Coresight debug-CPU2 initialized [ 12.476098] coresight-cpu-debug 856000.debug: Coresight debug-CPU3 initialized [ 12.532409] coresight stm0: STM32 initialized [ 12.533708] coresight cti_sys0: CTI initialized [ 12.539478] coresight cti_sys1: CTI initialized [ 12.550106] coresight cti_cpu0: CTI initialized [ 12.633931] coresight cti_cpu1: CTI initialized [ 12.634664] coresight cti_cpu2: CTI initialized [ 12.638090] coresight cti_cpu3: CTI initialized [ 12.721136] CSCFG registered etm0 ... [ 12.762643] CSCFG registered etm1 [ 12.762666] coresight etm1: CPU1: etm v4.0 initialized [ 12.776258] CSCFG registered etm2 [ 12.776282] coresight etm2: CPU2: etm v4.0 initialized [ 12.784357] CSCFG registered etm3 [ 12.785455] coresight etm3: CPU3: etm v4.0 initialized Error can also be triggered by manually starting the modules using modprobe in the following order: root@linaro-developer:/home/linaro/cs-mods# modprobe coresight root@linaro-developer:/home/linaro/cs-mods# modprobe coresight-cti root@linaro-developer:/home/linaro/cs-mods# modprobe coresight-etm4x Tested on Dragonboard DB410c Applies to coresight/next Fixes: 23722fb46725 ("coresight: Fix possible deadlock with lock dependency") Signed-off-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20221123193818.6253-1-mike.leach@linaro.org --- drivers/hwtracing/coresight/coresight-cti-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index c6e8c6542f24..d2cf4f4848e1 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -564,7 +564,7 @@ static void cti_add_assoc_to_csdev(struct coresight_device *csdev) * if we found a matching csdev then update the ECT * association pointer for the device with this CTI. */ - coresight_set_assoc_ectdev_mutex(csdev->ect_dev, + coresight_set_assoc_ectdev_mutex(csdev, ect_item->csdev); break; } From c767c34740132ffc478226864a7461493cdc2413 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Wed, 19 Oct 2022 20:49:53 +0800 Subject: [PATCH 2824/4122] coresight: etm4x: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: Jilin Yuan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20221019124953.45885-1-yuanjilin@cdjrlc.com --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 9dc0d7f57565..1cc052979e01 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1481,7 +1481,7 @@ static int etm4_set_event_filters(struct etmv4_drvdata *drvdata, /* * If filters::ssstatus == 1, trace acquisition was * started but the process was yanked away before the - * the stop address was hit. As such the start/stop + * stop address was hit. As such the start/stop * logic needs to be re-started so that tracing can * resume where it left. * From 3ca9d84e722e8044c09e80992aa7b15bd904d3ce Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Nov 2022 19:40:01 -0500 Subject: [PATCH 2825/4122] KVM: always declare prototype for kvm_arch_irqchip_in_kernel Architecture code might want to use it even if CONFIG_HAVE_KVM_IRQ_ROUTING is false; for example PPC XICS has KVM_IRQ_LINE and wants to use kvm_arch_irqchip_in_kernel from there, but it does not have KVM_SET_GSI_ROUTING so the prototype was not provided. Fixes: d663b8a28598 ("KVM: replace direct irq.h inclusion") Reported-by: kernel test robot Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f0f389f5f9c..b8d12356f015 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -663,9 +663,9 @@ struct kvm_irq_routing_table { */ struct hlist_head map[]; }; +#endif bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); -#endif #ifndef KVM_INTERNAL_MEM_SLOTS #define KVM_INTERNAL_MEM_SLOTS 0 From c3f3719952b9ab64e001e36df3d7bf24d5a4752d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 14 Nov 2022 12:48:57 -0800 Subject: [PATCH 2826/4122] KVM: x86/xen: Add CPL to Xen hypercall tracepoint Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/kvm/trace.h | 15 +++++++++------ arch/x86/kvm/xen.c | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 09f3392dd830..83843379813e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -113,12 +113,13 @@ TRACE_EVENT(kvm_hv_hypercall_done, * Tracepoint for Xen hypercall. */ TRACE_EVENT(kvm_xen_hypercall, - TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, unsigned long a4, - unsigned long a5), - TP_ARGS(nr, a0, a1, a2, a3, a4, a5), + TP_PROTO(u8 cpl, unsigned long nr, + unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5), + TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5), TP_STRUCT__entry( + __field(u8, cpl) __field(unsigned long, nr) __field(unsigned long, a0) __field(unsigned long, a1) @@ -129,6 +130,7 @@ TRACE_EVENT(kvm_xen_hypercall, ), TP_fast_assign( + __entry->cpl = cpl; __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; @@ -138,8 +140,9 @@ TRACE_EVENT(kvm_xen_hypercall, __entry->a4 = a5; ), - TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", - __entry->nr, __entry->a0, __entry->a1, __entry->a2, + TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", + __entry->cpl, __entry->nr, + __entry->a0, __entry->a1, __entry->a2, __entry->a3, __entry->a4, __entry->a5) ); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index f3098c0e386a..4b8e9628fbf5 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1256,7 +1256,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) } #endif cpl = static_call(kvm_x86_get_cpl)(vcpu); - trace_kvm_xen_hypercall(input, params[0], params[1], params[2], + trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); /* From 7927e27549d3f02354233a9ab3f28e0080ede29b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 12 Nov 2022 14:28:20 +0000 Subject: [PATCH 2827/4122] MAINTAINERS: Add KVM x86/xen maintainer list Adding Paul as co-maintainer of Xen support to help ensure that things don't fall through the cracks when I spend three months at a time travelling... Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Signed-off-by: Paolo Bonzini --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 046ff06ff97f..89672a59c0c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11324,6 +11324,16 @@ F: arch/x86/kvm/svm/hyperv.* F: arch/x86/kvm/svm/svm_onhyperv.* F: arch/x86/kvm/vmx/evmcs.* +KVM X86 Xen (KVM/Xen) +M: David Woodhouse +M: Paul Durrant +M: Sean Christopherson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kvm/xen.* + KERNFS M: Greg Kroah-Hartman M: Tejun Heo From c4690d016182d271a862767145db8b2bc792f4a8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Nov 2022 20:58:38 +0800 Subject: [PATCH 2828/4122] KVM: x86: Add BUILD_BUG_ON() to detect bad usage of "scattered" flags Add a compile-time assert in the SF() macro to detect improper usage, i.e. to detect passing in an X86_FEATURE_* flag that isn't actually scattered by the kernel. Upcoming feature flags will be 100% KVM-only and will have X86_FEATURE_* macros that point at a kvm_only_cpuid_leafs word, not a kernel-defined word. Using SF() and thus boot_cpu_has() for such feature flags would access memory beyond x86_capability[NCAPINTS] and at best incorrectly hide a feature, and at worst leak kernel state to userspace. Signed-off-by: Sean Christopherson Message-Id: <20221125125845.1182922-2-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6b5912578edd..ff2e9734e5c1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -65,7 +65,13 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) #define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define F feature_bit -#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0) + +/* Scattered Flag - For features that are scattered by cpufeatures.h. */ +#define SF(name) \ +({ \ + BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \ +}) /* * Magic value used by KVM when querying userspace-provided CPUID entries and From 047c7229906152fb85c23dc18fd25a00cd7cb4de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Nov 2022 20:58:39 +0800 Subject: [PATCH 2829/4122] KVM: x86: Update KVM-only leaf handling to allow for 100% KVM-only leafs Rename kvm_cpu_cap_init_scattered() to kvm_cpu_cap_init_kvm_defined() in anticipation of adding KVM-only CPUID leafs that aren't recognized by the kernel and thus not scattered, i.e. for leafs that are 100% KVM-defined. Adjust/add comments to kvm_only_cpuid_leafs and KVM_X86_FEATURE to document how to create new kvm_only_cpuid_leafs entries for scattered features as well as features that are entirely unknown to the kernel. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221125125845.1182922-3-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 8 ++++---- arch/x86/kvm/reverse_cpuid.h | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ff2e9734e5c1..73c3c6dc6e7b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -549,9 +549,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -561,7 +561,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -670,7 +670,7 @@ void kvm_set_cpu_caps(void) F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) ); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index a19d473d0184..443a6b3e66c0 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,9 +7,9 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, @@ -18,6 +18,18 @@ enum kvm_only_cpuid_leafs { NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ From 6a19d7aa5821522eec528fd44f24fe774b875377 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:40 +0800 Subject: [PATCH 2830/4122] x86: KVM: Advertise CMPccXADD CPUID to user space CMPccXADD is a new set of instructions in the latest Intel platform Sierra Forest. This new instruction set includes a semaphore operation that can compare and add the operands if condition is met, which can improve database performance. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 7] CMPccXADD is on an expected-dense CPUID leaf and some other bits on this leaf have kernel usages. Given that, define this feature bit like X86_FEATURE_ in kernel. Considering CMPccXADD itself has no truly kernel usages and /proc/cpuinfo has too much unreadable flags, hide this one in /proc/cpuinfo. Advertise CMPCCXADD to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Jiaxi Chen Acked-by: Borislav Petkov Message-Id: <20221125125845.1182922-4-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kvm/cpuid.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..5cdd57133d90 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -308,6 +308,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 73c3c6dc6e7b..2f263ff911d6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -663,7 +663,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) + F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, From af2872f6225476566bcbbd523a74dcaba29e159e Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 25 Nov 2022 20:58:41 +0800 Subject: [PATCH 2831/4122] x86: KVM: Advertise AMX-FP16 CPUID to user space Latest Intel platform Granite Rapids has introduced a new instruction - AMX-FP16, which performs dot-products of two FP16 tiles and accumulates the results into a packed single precision tile. AMX-FP16 adds FP16 capability and also allows a FP16 GPU trained model to run faster without loss of accuracy or added SW overhead. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 21] AMX-FP16 is on an expected-dense CPUID leaf and some other bits on this leaf have kernel usages. Given that, define this feature bit like X86_FEATURE_ in kernel. Considering AMX-FP16 itself has no truly kernel usages and /proc/cpuinfo has too much unreadable flags, hide this one in /proc/cpuinfo. Advertise AMX-FP16 to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Chang S. Bae Signed-off-by: Jiaxi Chen Acked-by: Borislav Petkov Message-Id: <20221125125845.1182922-5-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kvm/cpuid.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 5cdd57133d90..20059dc33d24 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -309,6 +309,7 @@ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2f263ff911d6..6360075ca70f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -663,7 +663,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) + F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, From 5e85c4ebf206e50c58e82ca44c15e2be2bac6923 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:42 +0800 Subject: [PATCH 2832/4122] x86: KVM: Advertise AVX-IFMA CPUID to user space AVX-IFMA is a new instruction in the latest Intel platform Sierra Forest. This instruction packed multiplies unsigned 52-bit integers and adds the low/high 52-bit products to Qword Accumulators. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 23] AVX-IFMA is on an expected-dense CPUID leaf and some other bits on this leaf have kernel usages. Given that, define this feature bit like X86_FEATURE_ in kernel. Considering AVX-IFMA itself has no truly kernel usages and /proc/cpuinfo has too much unreadable flags, hide this one in /proc/cpuinfo. Advertise AVX-IFMA to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Jiaxi Chen Acked-by: Borislav Petkov Message-Id: <20221125125845.1182922-6-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kvm/cpuid.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 20059dc33d24..1419c4e04d45 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -310,6 +310,7 @@ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6360075ca70f..5dfc0d036df5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -663,7 +663,8 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) + F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) | + F(AVX_IFMA) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, From 24d74b9f5f2a972ac9228372adeac62b2dc10ea2 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:43 +0800 Subject: [PATCH 2833/4122] KVM: x86: Advertise AVX-VNNI-INT8 CPUID to user space AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform Sierra Forest, aims for the platform to have superior AI capabilities. This instruction multiplies the individual bytes of two unsigned or unsigned source operands, then adds and accumulates the results into the destination dword element size operand. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 4] AVX-VNNI-INT8 is on a new and sparse CPUID leaf and all bits on this leaf have no truly kernel use case for now. Given that and to save space for kernel feature bits, move this new leaf to KVM-only subleaf and plus an x86_FEATURE definition for AVX-VNNI-INT8 to direct it to the KVM entry. Advertise AVX-VNNI-INT8 to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Jiaxi Chen Message-Id: <20221125125845.1182922-7-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 6 +++++- arch/x86/kvm/reverse_cpuid.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5dfc0d036df5..b6ce47c4e972 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -667,6 +667,10 @@ void kvm_set_cpu_caps(void) F(AVX_IFMA) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, + F(AVX_VNNI_INT8) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); @@ -920,9 +924,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); + cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; entry->ecx = 0; - entry->edx = 0; } break; case 0xa: { /* Architectural Performance Monitoring */ diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 443a6b3e66c0..84f56b662424 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -13,6 +13,7 @@ */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -36,6 +37,9 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) + struct cpuid_reg { u32 function; u32 index; @@ -60,6 +64,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, + [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, }; /* From 9977f0877de7f8fc51391e2d52bc993efbd58b90 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:44 +0800 Subject: [PATCH 2834/4122] KVM: x86: Advertise AVX-NE-CONVERT CPUID to user space AVX-NE-CONVERT is a new set of instructions which can convert low precision floating point like BF16/FP16 to high precision floating point FP32, and can also convert FP32 elements to BF16. This instruction allows the platform to have improved AI capabilities and better compatibility. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 5] AVX-NE-CONVERT is on a KVM-only subleaf. Plus an x86_FEATURE definition for this feature bit to direct it to the KVM entry. Advertise AVX-NE-CONVERT to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Jiaxi Chen Message-Id: <20221125125845.1182922-8-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/reverse_cpuid.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b6ce47c4e972..5a95624a658a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -668,7 +668,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) + F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 84f56b662424..43eff7207e01 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -39,6 +39,7 @@ enum kvm_only_cpuid_leafs { /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) +#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) struct cpuid_reg { u32 function; From 29c46979b25d5ca867e9859bfdd088d028739cdf Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:45 +0800 Subject: [PATCH 2835/4122] KVM: x86: Advertise PREFETCHIT0/1 CPUID to user space Latest Intel platform Granite Rapids has introduced a new instruction - PREFETCHIT0/1, which moves code to memory (cache) closer to the processor depending on specific hints. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 14] PREFETCHIT0/1 is on a KVM-only subleaf. Plus an x86_FEATURE definition for this feature bit to direct it to the KVM entry. Advertise PREFETCHIT0/1 to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Signed-off-by: Jiaxi Chen Message-Id: <20221125125845.1182922-9-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/reverse_cpuid.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5a95624a658a..723502181a3a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -668,7 +668,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) + F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 43eff7207e01..203fdad07bae 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -40,6 +40,7 @@ enum kvm_only_cpuid_leafs { /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) +#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) struct cpuid_reg { u32 function; From 41e8f85a75fc60e1543e4903428a1b481b672a17 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 11 Nov 2022 09:04:06 -0800 Subject: [PATCH 2836/4122] f2fs: introduce F2FS_IOC_START_ATOMIC_REPLACE introduce a new ioctl to replace the whole content of a file atomically, which means it induces truncate and content update at the same time. We can start it with F2FS_IOC_START_ATOMIC_REPLACE and complete it with F2FS_IOC_COMMIT_ATOMIC_WRITE. Or abort it with F2FS_IOC_ABORT_ATOMIC_WRITE. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 21 +++++++++++++++------ fs/f2fs/segment.c | 13 ++++++++++++- include/uapi/linux/f2fs.h | 1 + 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9b47ded653d1..560fa80590e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3466,6 +3466,9 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, else if (*blk_addr != NULL_ADDR) return 0; + if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) + goto reserve_block; + /* Look for the block in the original inode */ err = __find_data_block(inode, index, &ori_blk_addr); if (err) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6a8cbf5bb187..b89b5d755ce0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -769,6 +769,7 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_MAX, /* max flag, never be used */ }; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 28f586e77999..ab0a0d3730f6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2034,7 +2034,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) return put_user(inode->i_generation, (int __user *)arg); } -static int f2fs_ioc_start_atomic_write(struct file *filp) +static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) { struct inode *inode = file_inode(filp); struct user_namespace *mnt_userns = file_mnt_user_ns(filp); @@ -2103,15 +2103,22 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_write_inode(inode, NULL); - isize = i_size_read(inode); - fi->original_i_size = isize; - f2fs_i_size_write(fi->cow_inode, isize); - stat_inc_atomic_inode(inode); set_inode_flag(inode, FI_ATOMIC_FILE); set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + isize = i_size_read(inode); + fi->original_i_size = isize; + if (truncate) { + set_inode_flag(inode, FI_ATOMIC_REPLACE); + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, 0); + isize = 0; + } + f2fs_i_size_write(fi->cow_inode, isize); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); f2fs_update_time(sbi, REQ_TIME); @@ -4139,7 +4146,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case FS_IOC_GETVERSION: return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: - return f2fs_ioc_start_atomic_write(filp); + return f2fs_ioc_start_atomic_write(filp, false); + case F2FS_IOC_START_ATOMIC_REPLACE: + return f2fs_ioc_start_atomic_write(filp, true); case F2FS_IOC_COMMIT_ATOMIC_WRITE: return f2fs_ioc_commit_atomic_write(filp); case F2FS_IOC_ABORT_ATOMIC_WRITE: diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8aa81238c770..5ac026a57228 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -197,6 +197,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) fi->cow_inode = NULL; release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); + clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); stat_dec_atomic_inode(inode); @@ -261,14 +262,24 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head, bool revoke) { struct revoke_entry *cur, *tmp; + pgoff_t start_index = 0; + bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE); list_for_each_entry_safe(cur, tmp, head, list) { - if (revoke) + if (revoke) { __replace_atomic_write_block(inode, cur->index, cur->old_addr, NULL, true); + } else if (truncate) { + f2fs_truncate_hole(inode, start_index, cur->index); + start_index = cur->index + 1; + } + list_del(&cur->list); kmem_cache_free(revoke_entry_slab, cur); } + + if (!revoke && truncate) + f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false); } static int __f2fs_commit_atomic_write(struct inode *inode) diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 3121d127d5aa..955d440be104 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -42,6 +42,7 @@ struct f2fs_comp_option) #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) +#define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25) /* * should be same as XFS_IOC_GOINGDOWN. From d3b7b4afd6b2c344eabf9cc26b8bfa903c164c7c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Nov 2022 00:08:47 +0800 Subject: [PATCH 2837/4122] f2fs: fix to do sanity check on i_extra_isize in is_alive() syzbot found a f2fs bug: BUG: KASAN: slab-out-of-bounds in data_blkaddr fs/f2fs/f2fs.h:2891 [inline] BUG: KASAN: slab-out-of-bounds in is_alive fs/f2fs/gc.c:1117 [inline] BUG: KASAN: slab-out-of-bounds in gc_data_segment fs/f2fs/gc.c:1520 [inline] BUG: KASAN: slab-out-of-bounds in do_garbage_collect+0x386a/0x3df0 fs/f2fs/gc.c:1734 Read of size 4 at addr ffff888076557568 by task kworker/u4:3/52 CPU: 1 PID: 52 Comm: kworker/u4:3 Not tainted 6.1.0-rc4-syzkaller-00362-gfef7fd48922d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: writeback wb_workfn (flush-7:0) Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x45d mm/kasan/report.c:395 kasan_report+0xbb/0x1f0 mm/kasan/report.c:495 data_blkaddr fs/f2fs/f2fs.h:2891 [inline] is_alive fs/f2fs/gc.c:1117 [inline] gc_data_segment fs/f2fs/gc.c:1520 [inline] do_garbage_collect+0x386a/0x3df0 fs/f2fs/gc.c:1734 f2fs_gc+0x88c/0x20a0 fs/f2fs/gc.c:1831 f2fs_balance_fs+0x544/0x6b0 fs/f2fs/segment.c:410 f2fs_write_inode+0x57e/0xe20 fs/f2fs/inode.c:753 write_inode fs/fs-writeback.c:1440 [inline] __writeback_single_inode+0xcfc/0x1440 fs/fs-writeback.c:1652 writeback_sb_inodes+0x54d/0xf90 fs/fs-writeback.c:1870 wb_writeback+0x2c5/0xd70 fs/fs-writeback.c:2044 wb_do_writeback fs/fs-writeback.c:2187 [inline] wb_workfn+0x2dc/0x12f0 fs/fs-writeback.c:2227 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The root cause is that we forgot to do sanity check on .i_extra_isize in below path, result in accessing invalid address later, fix it. - gc_data_segment - is_alive - data_blkaddr - offset_in_addr Reported-by: syzbot+f8f3dfa4abc489e768a1@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-f2fs-devel/0000000000003cb3c405ed5c17f9@google.com/T/#u Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f1a46519a5fe..0f967b1e98f2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1077,7 +1077,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, { struct page *node_page; nid_t nid; - unsigned int ofs_in_node, max_addrs; + unsigned int ofs_in_node, max_addrs, base; block_t source_blkaddr; nid = le32_to_cpu(sum->nid); @@ -1103,11 +1103,17 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE : - DEF_ADDRS_PER_BLOCK; - if (ofs_in_node >= max_addrs) { - f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", - ofs_in_node, dni->ino, dni->nid, max_addrs); + if (IS_INODE(node_page)) { + base = offset_in_addr(F2FS_INODE(node_page)); + max_addrs = DEF_ADDRS_PER_INODE; + } else { + base = 0; + max_addrs = DEF_ADDRS_PER_BLOCK; + } + + if (base + ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u", + base, ofs_in_node, max_addrs, dni->ino, dni->nid); f2fs_put_page(node_page, 1); return false; } From 5b7b74b71c7fefbaa3e0ccc120c3cbd50b3fad86 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 12 Nov 2022 00:13:49 +0800 Subject: [PATCH 2838/4122] f2fs: remove submit label in __submit_discard_cmd() Complaint from Matthew Wilcox in another similar place: "submit? You don't submit anything at the 'submit' label. it should be called 'skip' or something. But I think this is just badly written and you don't need a goto at all." Let's remove submit label for readability. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5ac026a57228..8b0b76550578 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1143,13 +1143,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (time_to_inject(sbi, FAULT_DISCARD)) { f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; - goto submit; - } - err = __blkdev_issue_discard(bdev, + } else { + err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), GFP_NOFS, &bio); -submit: + } if (err) { spin_lock_irqsave(&dc->lock, flags); if (dc->state == D_PARTIAL) From b7ad23cec26a91a4f7c45ff7ff8e915f21ac5127 Mon Sep 17 00:00:00 2001 From: Yuwei Guan Date: Tue, 15 Nov 2022 14:35:35 +0800 Subject: [PATCH 2839/4122] f2fs: fix to alloc_mode changed after remount on a small volume device The commit 84b89e5d943d8 ("f2fs: add auto tuning for small devices") add tuning for small volume device, now support to tune alloce_mode to 'reuse' if it's small size. But the alloc_mode will change to 'default' when do remount on this small size dievce. This patch fo fix alloc_mode changed when do remount for a small volume device. Signed-off-by: Yuwei Guan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 75027ff85cd9..96cfe626a670 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2059,7 +2059,11 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE; F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main) <= + SMALL_VOLUME_SEGMENTS) + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + else + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); @@ -4077,7 +4081,6 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) /* adjust parameters according to the volume size */ if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; if (f2fs_block_unit_discard(sbi)) sm_i->dcc_info->discard_granularity = 1; sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | From 777cd95b8066ced4e9a2534941b81f8ad98e74fb Mon Sep 17 00:00:00 2001 From: Yuwei Guan Date: Tue, 15 Nov 2022 14:35:36 +0800 Subject: [PATCH 2840/4122] f2fs: cleanup for 'f2fs_tuning_parameters' function A cleanup patch for 'f2fs_tuning_parameters' function. Signed-off-by: Yuwei Guan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96cfe626a670..05101cd4140b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4077,13 +4077,11 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_i = SM_I(sbi); - /* adjust parameters according to the volume size */ - if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + if (MAIN_SEGS(sbi) <= SMALL_VOLUME_SEGMENTS) { if (f2fs_block_unit_discard(sbi)) - sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + SM_I(sbi)->dcc_info->discard_granularity = 1; + SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE | 1 << F2FS_IPU_HONOR_OPU_WRITE; } From 66aee5aaa237e5a3475581b462b8b22e0944d264 Mon Sep 17 00:00:00 2001 From: Yuwei Guan Date: Tue, 15 Nov 2022 14:35:37 +0800 Subject: [PATCH 2841/4122] f2fs: change type for 'sbi->readdir_ra' Before this patch, the varibale 'readdir_ra' takes effect if it's equal to '1' or not, so we can change type for it from 'int' to 'bool'. Signed-off-by: Yuwei Guan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 2 +- fs/f2fs/sysfs.c | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 030b7fd4142f..8e025157f35c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1010,7 +1010,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); struct blk_plug plug; - bool readdir_ra = sbi->readdir_ra == 1; + bool readdir_ra = sbi->readdir_ra; bool found_valid_dirent = false; int err = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b89b5d755ce0..96bd3461c0bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1698,7 +1698,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ int dir_level; /* directory level */ - int readdir_ra; /* readahead inode in readdir */ + bool readdir_ra; /* readahead inode in readdir */ u64 max_io_bytes; /* max io bytes to merge IOs */ block_t user_block_count; /* # of user blocks */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 05101cd4140b..31435c8645c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4085,7 +4085,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) 1 << F2FS_IPU_HONOR_OPU_WRITE; } - sbi->readdir_ra = 1; + sbi->readdir_ra = true; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 97bf0dbb0974..33ec467b3772 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -656,6 +656,11 @@ out: return count; } + if (!strcmp(a->attr.name, "readdir_ra")) { + sbi->readdir_ra = !!t; + return count; + } + *ui = (unsigned int)t; return count; From 4ff23a6547b81ca22adb852dfe93ee5fc45328ac Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 17 Nov 2022 23:10:54 +0800 Subject: [PATCH 2842/4122] f2fs: set zstd compress level correctly Fixes: cf30f6a5f0c6 ("lib: zstd: Add kernel-specific API") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Reviewed-by: Nick Terrell Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d315c2de136f..74d3f2d2271f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -346,7 +346,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) if (!level) level = F2FS_ZSTD_DEFAULT_CLEVEL; - params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen); + params = zstd_get_params(level, cc->rlen); workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), From 787caf1bdcd9f04058e4e8d8ed56db1dbafea0b7 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Fri, 11 Nov 2022 18:08:29 +0800 Subject: [PATCH 2843/4122] f2fs: fix to enable compress for newly created file if extension matches If compress_extension is set, and a newly created file matches the extension, the file could be marked as compression file. However, if inline_data is also enabled, there is no chance to check its extension since f2fs_should_compress() always returns false. This patch moves set_compress_inode(), which do extension check, in f2fs_should_compress() to check extensions before setting inline data flag. Fixes: 7165841d578e ("f2fs: fix to check inline_data during compressed inode conversion") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 329 ++++++++++++++++++++++++------------------------ 2 files changed, 164 insertions(+), 167 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 96bd3461c0bb..f0833638f59e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2980,7 +2980,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ - F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL) + F2FS_CASEFOLD_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e104409c3a0e..54448dccbb6a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,8 +22,163 @@ #include "acl.h" #include +static inline int is_extension_exist(const unsigned char *s, const char *sub, + bool tmp_ext) +{ + size_t slen = strlen(s); + size_t sublen = strlen(sub); + int i; + + if (sublen == 1 && *sub == '*') + return 1; + + /* + * filename format of multimedia file should be defined as: + * "filename + '.' + extension + (optional: '.' + temp extension)". + */ + if (slen < sublen + 2) + return 0; + + if (!tmp_ext) { + /* file has no temp extension */ + if (s[slen - sublen - 1] != '.') + return 0; + return !strncasecmp(s + slen - sublen, sub, sublen); + } + + for (i = 1; i < slen - sublen; i++) { + if (s[i] != '.') + continue; + if (!strncasecmp(s + i + 1, sub, sublen)) + return 1; + } + + return 0; +} + +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; + int i; + + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (hot) { + memcpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; + + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + memcpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); + } + return 0; +} + +static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir, + struct inode *inode, const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + unsigned char (*noext)[F2FS_EXTENSION_LEN] = + F2FS_OPTION(sbi).noextensions; + unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions; + unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + int i, cold_count, hot_count; + + if (!f2fs_sb_has_compression(sbi)) + return; + + if (S_ISDIR(inode->i_mode)) + goto inherit_comp; + + /* This name comes only from normal files. */ + if (!name) + return; + + /* Don't compress hot files. */ + f2fs_down_read(&sbi->sb_lock); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + for (i = cold_count; i < cold_count + hot_count; i++) + if (is_extension_exist(name, extlist[i], false)) + break; + f2fs_up_read(&sbi->sb_lock); + if (i < (cold_count + hot_count)) + return; + + /* Don't compress unallowed extension. */ + for (i = 0; i < noext_cnt; i++) + if (is_extension_exist(name, noext[i], false)) + return; + + /* Compress wanting extension. */ + for (i = 0; i < ext_cnt; i++) { + if (is_extension_exist(name, ext[i], false)) { + set_compress_context(inode); + return; + } + } +inherit_comp: + /* Inherit the {no-}compression flag in directory */ + if (F2FS_I(dir)->i_flags & F2FS_NOCOMP_FL) { + F2FS_I(inode)->i_flags |= F2FS_NOCOMP_FL; + f2fs_mark_inode_dirty_sync(inode, true); + } else if (F2FS_I(dir)->i_flags & F2FS_COMPR_FL) { + set_compress_context(inode); + } +} + static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, - struct inode *dir, umode_t mode) + struct inode *dir, umode_t mode, + const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); nid_t ino; @@ -114,12 +269,8 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_sb_has_compression(sbi)) { - /* Inherit the compression flag in directory */ - if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) && - f2fs_may_compress(inode)) - set_compress_context(inode); - } + /* Check compression first. */ + set_compress_new_inode(sbi, dir, inode, name); /* Should enable inline_data after compression set */ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) @@ -153,40 +304,6 @@ fail_drop: return ERR_PTR(err); } -static inline int is_extension_exist(const unsigned char *s, const char *sub, - bool tmp_ext) -{ - size_t slen = strlen(s); - size_t sublen = strlen(sub); - int i; - - if (sublen == 1 && *sub == '*') - return 1; - - /* - * filename format of multimedia file should be defined as: - * "filename + '.' + extension + (optional: '.' + temp extension)". - */ - if (slen < sublen + 2) - return 0; - - if (!tmp_ext) { - /* file has no temp extension */ - if (s[slen - sublen - 1] != '.') - return 0; - return !strncasecmp(s + slen - sublen, sub, sublen); - } - - for (i = 1; i < slen - sublen; i++) { - if (s[i] != '.') - continue; - if (!strncasecmp(s + i + 1, sub, sublen)) - return 1; - } - - return 0; -} - /* * Set file's temperature for hot/cold data separation */ @@ -217,124 +334,6 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * file_set_hot(inode); } -int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, - bool hot, bool set) -{ - __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int cold_count = le32_to_cpu(sbi->raw_super->extension_count); - int hot_count = sbi->raw_super->hot_ext_count; - int total_count = cold_count + hot_count; - int start, count; - int i; - - if (set) { - if (total_count == F2FS_MAX_EXTENSION) - return -EINVAL; - } else { - if (!hot && !cold_count) - return -EINVAL; - if (hot && !hot_count) - return -EINVAL; - } - - if (hot) { - start = cold_count; - count = total_count; - } else { - start = 0; - count = cold_count; - } - - for (i = start; i < count; i++) { - if (strcmp(name, extlist[i])) - continue; - - if (set) - return -EINVAL; - - memcpy(extlist[i], extlist[i + 1], - F2FS_EXTENSION_LEN * (total_count - i - 1)); - memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); - if (hot) - sbi->raw_super->hot_ext_count = hot_count - 1; - else - sbi->raw_super->extension_count = - cpu_to_le32(cold_count - 1); - return 0; - } - - if (!set) - return -EINVAL; - - if (hot) { - memcpy(extlist[count], name, strlen(name)); - sbi->raw_super->hot_ext_count = hot_count + 1; - } else { - char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; - - memcpy(buf, &extlist[cold_count], - F2FS_EXTENSION_LEN * hot_count); - memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); - memcpy(extlist[cold_count], name, strlen(name)); - memcpy(&extlist[cold_count + 1], buf, - F2FS_EXTENSION_LEN * hot_count); - sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); - } - return 0; -} - -static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, - const unsigned char *name) -{ - __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - unsigned char (*noext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).noextensions; - unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions; - unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; - int i, cold_count, hot_count; - - if (!f2fs_sb_has_compression(sbi) || - F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || - !f2fs_may_compress(inode) || - (!ext_cnt && !noext_cnt)) - return; - - f2fs_down_read(&sbi->sb_lock); - - cold_count = le32_to_cpu(sbi->raw_super->extension_count); - hot_count = sbi->raw_super->hot_ext_count; - - for (i = cold_count; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i], false)) { - f2fs_up_read(&sbi->sb_lock); - return; - } - } - - f2fs_up_read(&sbi->sb_lock); - - for (i = 0; i < noext_cnt; i++) { - if (is_extension_exist(name, noext[i], false)) { - f2fs_disable_compressed_file(inode); - return; - } - } - - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return; - - for (i = 0; i < ext_cnt; i++) { - if (!is_extension_exist(name, ext[i], false)) - continue; - - /* Do not use inline_data with compression */ - stat_dec_inline_inode(inode); - clear_inode_flag(inode, FI_INLINE_DATA); - set_compress_context(inode); - return; - } -} - static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -352,15 +351,13 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, dentry->d_name.name); if (IS_ERR(inode)) return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) set_file_temperature(sbi, inode, dentry->d_name.name); - set_compress_inode(sbi, inode, dentry->d_name.name); - inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -689,7 +686,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO); + inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -760,7 +757,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode); + inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -817,7 +814,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -856,7 +853,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); From b16bcaaf7a325f90967259a0b7cfcce4ff8c56ba Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Fri, 11 Nov 2022 18:08:30 +0800 Subject: [PATCH 2844/4122] f2fs: move set_file_temperature into f2fs_new_inode Since the file name has already passed to f2fs_new_inode(), let's move set_file_temperature() into f2fs_new_inode(). Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 62 +++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 54448dccbb6a..58a91ce8fe08 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -176,6 +176,32 @@ inherit_comp: } } +/* + * Set file's temperature for hot/cold data separation + */ +static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; + + f2fs_down_read(&sbi->sb_lock); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + for (i = 0; i < cold_count + hot_count; i++) + if (is_extension_exist(name, extlist[i], true)) + break; + f2fs_up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); +} + static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, struct inode *dir, umode_t mode, const char *name) @@ -276,6 +302,9 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); + if (name && !test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_file_temperature(sbi, inode, name); + stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); @@ -304,36 +333,6 @@ fail_drop: return ERR_PTR(err); } -/* - * Set file's temperature for hot/cold data separation - */ -static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, - const unsigned char *name) -{ - __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int i, cold_count, hot_count; - - f2fs_down_read(&sbi->sb_lock); - - cold_count = le32_to_cpu(sbi->raw_super->extension_count); - hot_count = sbi->raw_super->hot_ext_count; - - for (i = 0; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i], true)) - break; - } - - f2fs_up_read(&sbi->sb_lock); - - if (i == cold_count + hot_count) - return; - - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); -} - static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -355,9 +354,6 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (IS_ERR(inode)) return PTR_ERR(inode); - if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_file_temperature(sbi, inode, dentry->d_name.name); - inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; From fc031877b8229ee2f56a2cc3868ca7f282b1231d Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 19 Nov 2022 01:40:28 +0800 Subject: [PATCH 2845/4122] f2fs: fix description about discard_granularity node Let's fix the inconsistency in the text description. Default discard granularity is 16. For small devices, default value is 1. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 24e7cb77f265..32404781e76f 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -138,7 +138,8 @@ Contact: "Chao Yu" Description: Controls discard granularity of inner discard thread. Inner thread will not issue discards with size that is smaller than granularity. The unit size is one block(4KB), now only support configuring - in range of [1, 512]. Default value is 4(=16KB). + in range of [1, 512]. Default value is 16. + For small devices, default value is 1. What: /sys/fs/f2fs//umount_discard_timeout Date: January 2019 From 620816393239890feff8608251e2746b1cc2cfa0 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 17 Nov 2022 01:10:45 +0800 Subject: [PATCH 2846/4122] f2fs: make __queue_discard_cmd() return void Since __queue_discard_cmd() never returns an error, let's make it return void. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8b0b76550578..14ece4bf7c7e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1358,13 +1358,13 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, } } -static int __queue_discard_cmd(struct f2fs_sb_info *sbi, +static void __queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { block_t lblkstart = blkstart; if (!f2fs_bdev_support_discard(bdev)) - return 0; + return; trace_f2fs_queue_discard(bdev, blkstart, blklen); @@ -1376,7 +1376,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); - return 0; } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, @@ -1776,7 +1775,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } /* For conventional zones, use regular discard if supported */ - return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); + __queue_discard_cmd(sbi, bdev, lblkstart, blklen); + return 0; } #endif @@ -1787,7 +1787,8 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return __queue_discard_cmd(sbi, bdev, blkstart, blklen); + __queue_discard_cmd(sbi, bdev, blkstart, blklen); + return 0; } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, From 78a99fe6254cad4be310cd84af39f6c46b668c72 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Nov 2022 06:42:52 +0800 Subject: [PATCH 2847/4122] f2fs: truncate blocks in batch in __complete_revoke_list() Use f2fs_do_truncate_blocks() to truncate all blocks in-batch in __complete_revoke_list(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 14ece4bf7c7e..37c721e1eb03 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -262,24 +262,19 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head, bool revoke) { struct revoke_entry *cur, *tmp; - pgoff_t start_index = 0; bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE); list_for_each_entry_safe(cur, tmp, head, list) { - if (revoke) { + if (revoke) __replace_atomic_write_block(inode, cur->index, cur->old_addr, NULL, true); - } else if (truncate) { - f2fs_truncate_hole(inode, start_index, cur->index); - start_index = cur->index + 1; - } list_del(&cur->list); kmem_cache_free(revoke_entry_slab, cur); } if (!revoke && truncate) - f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false); + f2fs_do_truncate_blocks(inode, 0, false); } static int __f2fs_commit_atomic_write(struct inode *inode) From e219aecfd4b766c4e878a3769057e9809f7fcadc Mon Sep 17 00:00:00 2001 From: Yonggil Song Date: Tue, 22 Nov 2022 18:03:20 +0900 Subject: [PATCH 2848/4122] f2fs: avoid victim selection from previous victim section When f2fs chooses GC victim in large section & LFS mode, next_victim_seg[gc_type] is referenced first. After segment is freed, next_victim_seg[gc_type] has the next segment number. However, next_victim_seg[gc_type] still has the last segment number even after the last segment of section is freed. In this case, when f2fs chooses a victim for the next GC round, the last segment of previous victim section is chosen as a victim. Initialize next_victim_seg[gc_type] to NULL_SEGNO for the last segment in large section. Fixes: e3080b0120a1 ("f2fs: support subsectional garbage collection") Signed-off-by: Yonggil Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0f967b1e98f2..f1b68eda2235 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1749,8 +1749,9 @@ freed: get_valid_blocks(sbi, segno, false) == 0) seg_freed++; - if (__is_large_section(sbi) && segno + 1 < end_segno) - sbi->next_victim_seg[gc_type] = segno + 1; + if (__is_large_section(sbi)) + sbi->next_victim_seg[gc_type] = + (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } From 48c08c51f938d955dd8f5b8972bc29faa4c9556f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 18 Nov 2022 11:46:00 +0800 Subject: [PATCH 2849/4122] f2fs: init discard policy after thread wakeup Under the current logic, after the discard thread wakes up, it will not run according to the expected policy, but will use the expected policy before sleep. Move the strategy selection to after the thread wakes up, so that the running state of the thread meets expectations. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 37c721e1eb03..73ad8dc9a4d3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1679,6 +1679,11 @@ static int issue_discard_thread(void *data) set_freezable(); do { + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); + if (sbi->gc_mode == GC_URGENT_HIGH || !f2fs_available_free_memory(sbi, DISCARD_CACHE)) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); @@ -1686,14 +1691,6 @@ static int issue_discard_thread(void *data) __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, dcc->discard_granularity); - if (!atomic_read(&dcc->discard_cmd_cnt)) - wait_ms = dpolicy.max_interval; - - wait_event_interruptible_timeout(*q, - kthread_should_stop() || freezing(current) || - dcc->discard_wake, - msecs_to_jiffies(wait_ms)); - if (dcc->discard_wake) dcc->discard_wake = 0; @@ -1707,12 +1704,11 @@ static int issue_discard_thread(void *data) continue; if (kthread_should_stop()) return 0; - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + !atomic_read(&dcc->discard_cmd_cnt)) { wait_ms = dpolicy.max_interval; continue; } - if (!atomic_read(&dcc->discard_cmd_cnt)) - continue; sb_start_intwrite(sbi->sb); @@ -1727,6 +1723,8 @@ static int issue_discard_thread(void *data) } else { wait_ms = dpolicy.max_interval; } + if (!atomic_read(&dcc->discard_cmd_cnt)) + wait_ms = dpolicy.max_interval; sb_end_intwrite(sbi->sb); From 1cd2e6d544359ae13e6fd9029b6018b957cf08c3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 24 Nov 2022 00:44:01 +0800 Subject: [PATCH 2850/4122] f2fs: define MIN_DISCARD_GRANULARITY macro Do cleanup in f2fs_tuning_parameters() and __init_discard_policy(), let's use macro instead of number. Suggested-by: Chao Yu Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f0833638f59e..4694b55b6df4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -329,6 +329,8 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* minimum discard granularity, unit: block count */ +#define MIN_DISCARD_GRANULARITY 1 /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 /* default maximum discard granularity of ordered discard, unit: block count */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 73ad8dc9a4d3..c7afcc6cd75b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1065,7 +1065,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->sync = false; dpolicy->ordered = true; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; + dpolicy->granularity = MIN_DISCARD_GRANULARITY; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = dcc->min_discard_issue_time; @@ -1080,7 +1080,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_UMOUNT) { dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; + dpolicy->granularity = MIN_DISCARD_GRANULARITY; dpolicy->timeout = true; } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 31435c8645c8..daf14b55a972 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4080,7 +4080,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) /* adjust parameters according to the volume size */ if (MAIN_SEGS(sbi) <= SMALL_VOLUME_SEGMENTS) { if (f2fs_block_unit_discard(sbi)) - SM_I(sbi)->dcc_info->discard_granularity = 1; + SM_I(sbi)->dcc_info->discard_granularity = + MIN_DISCARD_GRANULARITY; SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE | 1 << F2FS_IPU_HONOR_OPU_WRITE; } From 8a47d228de6a4fd4c751142fb27d56f385b3fe41 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 24 Nov 2022 00:44:02 +0800 Subject: [PATCH 2851/4122] f2fs: introduce discard_urgent_util sysfs node Through this node, you can control the background discard to run more aggressively or not aggressively when reach the utilization rate of the space. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 3 ++- fs/f2fs/sysfs.c | 9 +++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 32404781e76f..84a009aab1a1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -647,3 +647,11 @@ Date: October 2022 Contact: "Yangtao Li" Description: Show the current gc_mode as a string. This is a read-only entry. + +What: /sys/fs/f2fs//discard_urgent_util +Date: November 2022 +Contact: "Yangtao Li" +Description: When space utilization exceeds this, do background DISCARD aggressively. + Does DISCARD forcibly in a period of given min_discard_issue_time when the number + of discards is not 0 and set discard granularity to 1. + Default: 80 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4694b55b6df4..296683648d4f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -413,6 +413,7 @@ struct discard_cmd_control { unsigned int min_discard_issue_time; /* min. interval between discard issue */ unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ unsigned int max_discard_issue_time; /* max. interval between discard issue */ + unsigned int discard_urgent_util; /* utilization which issue discard proactively */ unsigned int discard_granularity; /* discard granularity */ unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */ unsigned int undiscard_blks; /* # of undiscard blocks */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c7afcc6cd75b..0ff451ea18f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1064,7 +1064,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->io_aware = true; dpolicy->sync = false; dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { + if (utilization(sbi) > dcc->discard_urgent_util) { dpolicy->granularity = MIN_DISCARD_GRANULARITY; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = @@ -2079,6 +2079,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; + dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 33ec467b3772..a4745d596310 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -493,6 +493,13 @@ out: return count; } + if (!strcmp(a->attr.name, "discard_urgent_util")) { + if (t > 100) + return -EINVAL; + *ui = t; + return count; + } + if (!strcmp(a->attr.name, "migration_granularity")) { if (t == 0 || t > sbi->segs_per_sec) return -EINVAL; @@ -800,6 +807,7 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_req F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -930,6 +938,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_discard_issue_time), ATTR_LIST(mid_discard_issue_time), ATTR_LIST(max_discard_issue_time), + ATTR_LIST(discard_urgent_util), ATTR_LIST(discard_granularity), ATTR_LIST(max_ordered_discard), ATTR_LIST(pending_discard), From 4bded7af8b9af6e97514b0521004f90267905aef Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 23 Nov 2022 10:32:48 -0800 Subject: [PATCH 2852/4122] tracing/user_events: Fix call print_fmt leak If user_event_trace_register() fails within user_event_parse() the call's print_fmt member is not freed. Add kfree call to fix this. Link: https://lkml.kernel.org/r/20221123183248.554-1-beaub@linux.microsoft.com Fixes: aa3b2b4c6692 ("user_events: Add print_fmt generation support for basic types") Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 539b08ae7020..9cb53182bb31 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1359,6 +1359,7 @@ put_user_lock: put_user: user_event_destroy_fields(user); user_event_destroy_validators(user); + kfree(user->call.print_fmt); kfree(user); return ret; } From f43dc4dc3eff028b5ddddd99f3a66c5a6bdd4e78 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Nov 2022 09:09:11 +1100 Subject: [PATCH 2853/4122] iomap: buffered write failure should not truncate the page cache iomap_file_buffered_write_punch_delalloc() currently invalidates the page cache over the unused range of the delalloc extent that was allocated. While the write allocated the delalloc extent, it does not own it exclusively as the write does not hold any locks that prevent either writeback or mmap page faults from changing the state of either the page cache or the extent state backing this range. Whilst xfs_bmap_punch_delalloc_range() already handles races in extent conversion - it will only punch out delalloc extents and it ignores any other type of extent - the page cache truncate does not discriminate between data written by this write or some other task. As a result, truncating the page cache can result in data corruption if the write races with mmap modifications to the file over the same range. generic/346 exercises this workload, and if we randomly fail writes (as will happen when iomap gets stale iomap detection later in the patchset), it will randomly corrupt the file data because it removes data written by mmap() in the same page as the write() that failed. Hence we do not want to punch out the page cache over the range of the extent we failed to write to - what we actually need to do is detect the ranges that have dirty data in cache over them and *not punch them out*. To do this, we have to walk the page cache over the range of the delalloc extent we want to remove. This is made complex by the fact we have to handle partially up-to-date folios correctly and this can happen even when the FSB size == PAGE_SIZE because we now support multi-page folios in the page cache. Because we are only interested in discovering the edges of data ranges in the page cache (i.e. hole-data boundaries) we can make use of mapping_seek_hole_data() to find those transitions in the page cache. As we hold the invalidate_lock, we know that the boundaries are not going to change while we walk the range. This interface is also byte-based and is sub-page block aware, so we can find the data ranges in the cache based on byte offsets rather than page, folio or fs block sized chunks. This greatly simplifies the logic of finding dirty cached ranges in the page cache. Once we've identified a range that contains cached data, we can then iterate the range folio by folio. This allows us to determine if the data is dirty and hence perform the correct delalloc extent punching operations. The seek interface we use to iterate data ranges will give us sub-folio start/end granularity, so we may end up looking up the same folio multiple times as the seek interface iterates across each discontiguous data region in the folio. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 195 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 180 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 734b761a1e4a..dca9ec9dc4a8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -832,6 +832,165 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * Scan the data range passed to us for dirty page cache folios. If we find a + * dirty folio, punch out the preceeding range and update the offset from which + * the next punch will start from. + * + * We can punch out storage reservations under clean pages because they either + * contain data that has been written back - in which case the delalloc punch + * over that range is a no-op - or they have been read faults in which case they + * contain zeroes and we can remove the delalloc backing range and any new + * writes to those pages will do the normal hole filling operation... + * + * This makes the logic simple: we only need to keep the delalloc extents only + * over the dirty ranges of the page cache. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + */ +static int iomap_write_delalloc_scan(struct inode *inode, + loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t offset, loff_t length)) +{ + while (start_byte < end_byte) { + struct folio *folio; + + /* grab locked page */ + folio = filemap_lock_folio(inode->i_mapping, + start_byte >> PAGE_SHIFT); + if (!folio) { + start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + + PAGE_SIZE; + continue; + } + + /* if dirty, punch up to offset */ + if (folio_test_dirty(folio)) { + if (start_byte > *punch_start_byte) { + int error; + + error = punch(inode, *punch_start_byte, + start_byte - *punch_start_byte); + if (error) { + folio_unlock(folio); + folio_put(folio); + return error; + } + } + + /* + * Make sure the next punch start is correctly bound to + * the end of this data range, not the end of the folio. + */ + *punch_start_byte = min_t(loff_t, end_byte, + folio_next_index(folio) << PAGE_SHIFT); + } + + /* move offset to start of next folio in range */ + start_byte = folio_next_index(folio) << PAGE_SHIFT; + folio_unlock(folio); + folio_put(folio); + } + return 0; +} + +/* + * Punch out all the delalloc blocks in the range given except for those that + * have dirty data still pending in the page cache - those are going to be + * written and so must still retain the delalloc backing for writeback. + * + * As we are scanning the page cache for data, we don't need to reimplement the + * wheel - mapping_seek_hole_data() does exactly what we need to identify the + * start and end of data ranges correctly even for sub-folio block sizes. This + * byte range based iteration is especially convenient because it means we + * don't have to care about variable size folios, nor where the start or end of + * the data range lies within a folio, if they lie within the same folio or even + * if there are multiple discontiguous data ranges within the folio. + * + * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so + * can return data ranges that exist in the cache beyond EOF. e.g. a page fault + * spanning EOF will initialise the post-EOF data to zeroes and mark it up to + * date. A write page fault can then mark it dirty. If we then fail a write() + * beyond EOF into that up to date cached range, we allocate a delalloc block + * beyond EOF and then have to punch it out. Because the range is up to date, + * mapping_seek_hole_data() will return it, and we will skip the punch because + * the folio is dirty. THis is incorrect - we always need to punch out delalloc + * beyond EOF in this case as writeback will never write back and covert that + * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, + * resulting in always punching out the range from the EOF to the end of the + * range the iomap spans. + * + * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it + * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA + * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) + * returns the end of the data range (data_end). Using closed intervals would + * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose + * the code to subtle off-by-one bugs.... + */ +static int iomap_write_delalloc_release(struct inode *inode, + loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t punch_start_byte = start_byte; + loff_t scan_end_byte = min(i_size_read(inode), end_byte); + int error = 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite whilst we walk the + * cache and perform delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + while (start_byte < scan_end_byte) { + loff_t data_end; + + start_byte = mapping_seek_hole_data(inode->i_mapping, + start_byte, scan_end_byte, SEEK_DATA); + /* + * If there is no more data to scan, all that is left is to + * punch out the remaining range. + */ + if (start_byte == -ENXIO || start_byte == scan_end_byte) + break; + if (start_byte < 0) { + error = start_byte; + goto out_unlock; + } + WARN_ON_ONCE(start_byte < punch_start_byte); + WARN_ON_ONCE(start_byte > scan_end_byte); + + /* + * We find the end of this contiguous cached data range by + * seeking from start_byte to the beginning of the next hole. + */ + data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, + scan_end_byte, SEEK_HOLE); + if (data_end < 0) { + error = data_end; + goto out_unlock; + } + WARN_ON_ONCE(data_end <= start_byte); + WARN_ON_ONCE(data_end > scan_end_byte); + + error = iomap_write_delalloc_scan(inode, &punch_start_byte, + start_byte, data_end, punch); + if (error) + goto out_unlock; + + /* The next data search starts at the end of this one. */ + start_byte = data_end; + } + + if (punch_start_byte < end_byte) + error = punch(inode, punch_start_byte, + end_byte - punch_start_byte); +out_unlock: + filemap_invalidate_unlock(inode->i_mapping); + return error; +} + /* * When a short write occurs, the filesystem may need to remove reserved space * that was allocated in ->iomap_begin from it's ->iomap_end method. For @@ -842,8 +1001,25 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write); * allocated for this iomap. * * This function uses [start_byte, end_byte) intervals (i.e. open ended) to - * simplify range iterations, but converts them back to {offset,len} tuples for - * the punch callback. + * simplify range iterations. + * + * The punch() callback *must* only punch delalloc extents in the range passed + * to it. It must skip over all other types of extents in the range and leave + * them completely unchanged. It must do this punch atomically with respect to + * other extent modifications. + * + * The punch() callback may be called with a folio locked to prevent writeback + * extent allocation racing at the edge of the range we are currently punching. + * The locked folio may or may not cover the range being punched, so it is not + * safe for the punch() callback to lock folios itself. + * + * Lock order is: + * + * inode->i_rwsem (shared or exclusive) + * inode->i_mapping->invalidate_lock (exclusive) + * folio_lock() + * ->punch + * internal filesystem allocation lock */ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, struct iomap *iomap, loff_t pos, loff_t length, @@ -853,7 +1029,6 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t start_byte; loff_t end_byte; int blocksize = i_blocksize(inode); - int error = 0; if (iomap->type != IOMAP_DELALLOC) return 0; @@ -877,18 +1052,8 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, if (start_byte >= end_byte) return 0; - /* - * Lock the mapping to avoid races with page faults re-instantiating - * folios and dirtying them via ->page_mkwrite between the page cache - * truncation and the delalloc extent removal. Failing to do this can - * leave dirty pages with no space reservation in the cache. - */ - filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(inode, start_byte, end_byte - 1); - error = punch(inode, start_byte, end_byte - start_byte); - filemap_invalidate_unlock(inode->i_mapping); - - return error; + return iomap_write_delalloc_release(inode, start_byte, end_byte, + punch); } EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); From 7348b322332d8602a4133f0b861334ea021b134a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Nov 2022 09:09:17 +1100 Subject: [PATCH 2854/4122] xfs: xfs_bmap_punch_delalloc_range() should take a byte range All the callers of xfs_bmap_punch_delalloc_range() jump through hoops to convert a byte range to filesystem blocks before calling xfs_bmap_punch_delalloc_range(). Instead, pass the byte range to xfs_bmap_punch_delalloc_range() and have it do the conversion to filesystem blocks internally. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 16 ++++++---------- fs/xfs/xfs_bmap_util.c | 10 ++++++---- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_iomap.c | 8 ++------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5d1a995b15f8..6aadc5815068 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -114,9 +114,8 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, offset), - XFS_B_TO_FSB(mp, size)); + xfs_bmap_punch_delalloc_range(ip, offset, + offset + size); } goto done; } @@ -455,12 +454,8 @@ xfs_discard_folio( struct folio *folio, loff_t pos) { - struct inode *inode = folio->mapping->host; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; - size_t offset = offset_in_folio(folio, pos); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); int error; if (xfs_is_shutdown(mp)) @@ -470,8 +465,9 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_folio(inode, folio) - pageoff_fsb); + error = xfs_bmap_punch_delalloc_range(ip, pos, + round_up(pos, folio_size(folio))); + if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 04d0c2bff67c..867645b74d88 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -590,11 +590,13 @@ out_unlock_iolock: int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length) + xfs_off_t start_byte, + xfs_off_t end_byte) { + struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = &ip->i_df; - xfs_fileoff_t end_fsb = start_fsb + length; + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; @@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range( while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; - xfs_trim_extent(&del, start_fsb, length); + xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); /* * A delete can push the cursor forward. Step back to the diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 24b37d211f1d..6888078f5c31 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) #endif /* CONFIG_XFS_RT */ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, - xfs_fileoff_t start_fsb, xfs_fileoff_t length); + xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { __s64 bmv_offset; /* file offset of segment in blocks */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ea96e8a34868..09676ff6940e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1126,12 +1126,8 @@ xfs_buffered_write_delalloc_punch( loff_t offset, loff_t length) { - struct xfs_mount *mp = XFS_M(inode->i_sb); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); - - return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, - end_fsb - start_fsb); + return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, + offset + length); } static int From d7b64041164ca177170191d2ad775da074ab2926 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Nov 2022 09:09:17 +1100 Subject: [PATCH 2855/4122] iomap: write iomap validity checks A recent multithreaded write data corruption has been uncovered in the iomap write code. The core of the problem is partial folio writes can be flushed to disk while a new racing write can map it and fill the rest of the page: writeback new write allocate blocks blocks are unwritten submit IO ..... map blocks iomap indicates UNWRITTEN range loop { lock folio copyin data ..... IO completes runs unwritten extent conv blocks are marked written get next folio } Now add memory pressure such that memory reclaim evicts the partially written folio that has already been written to disk. When the new write finally gets to the last partial page of the new write, it does not find it in cache, so it instantiates a new page, sees the iomap is unwritten, and zeros the part of the page that it does not have data from. This overwrites the data on disk that was originally written. The full description of the corruption mechanism can be found here: https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/ To solve this problem, we need to check whether the iomap is still valid after we lock each folio during the write. We have to do it after we lock the page so that we don't end up with state changes occurring while we wait for the folio to be locked. Hence we need a mechanism to be able to check that the cached iomap is still valid (similar to what we already do in buffered writeback), and we need a way for ->begin_write to back out and tell the high level iomap iterator that we need to remap the remaining write range. The iomap needs to grow some storage for the validity cookie that the filesystem provides to travel with the iomap. XFS, in particular, also needs to know some more information about what the iomap maps (attribute extents rather than file data extents) to for the validity cookie to cover all the types of iomaps we might need to validate. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 29 +++++++++++++++++++++++++++- fs/iomap/iter.c | 19 ++++++++++++++++++- include/linux/iomap.h | 43 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index dca9ec9dc4a8..356193e44cf0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -584,7 +584,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); } -static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, +static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, size_t len, struct folio **foliop) { const struct iomap_page_ops *page_ops = iter->iomap.page_ops; @@ -618,6 +618,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM; goto out_no_page; } + + /* + * Now we have a locked folio, before we do anything with it we need to + * check that the iomap we have cached is not stale. The inode extent + * mapping can change due to concurrent IO in flight (e.g. + * IOMAP_UNWRITTEN state can change and memory reclaim could have + * reclaimed a previously partially written page at this index after IO + * completion before this write reaches this file offset) and hence we + * could do the wrong thing here (zero a page range incorrectly or fail + * to zero) and corrupt data. + */ + if (page_ops && page_ops->iomap_valid) { + bool iomap_valid = page_ops->iomap_valid(iter->inode, + &iter->iomap); + if (!iomap_valid) { + iter->iomap.flags |= IOMAP_F_STALE; + status = 0; + goto out_unlock; + } + } + if (pos + len > folio_pos(folio) + folio_size(folio)) len = folio_pos(folio) + folio_size(folio) - pos; @@ -773,6 +794,8 @@ again: status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) break; + if (iter->iomap.flags & IOMAP_F_STALE) + break; page = folio_file_page(folio, pos >> PAGE_SHIFT); if (mapping_writably_mapped(mapping)) @@ -1081,6 +1104,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; status = iomap_write_end(iter, pos, bytes, bytes, folio); if (WARN_ON_ONCE(status == 0)) @@ -1136,6 +1161,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index a1c7592d2ade..79a0614eaab7 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -7,12 +7,28 @@ #include #include "trace.h" +/* + * Advance to the next range we need to map. + * + * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully + * processed - it was aborted because the extent the iomap spanned may have been + * changed during the operation. In this case, the iteration behaviour is to + * remap the unprocessed range of the iter, and that means we may need to remap + * even when we've made no progress (i.e. iter->processed = 0). Hence the + * "finished iterating" case needs to distinguish between + * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we + * need to remap the entire remaining range. + */ static inline int iomap_iter_advance(struct iomap_iter *iter) { + bool stale = iter->iomap.flags & IOMAP_F_STALE; + /* handle the previous iteration (if any) */ if (iter->iomap.length) { - if (iter->processed <= 0) + if (iter->processed < 0) return iter->processed; + if (!iter->processed && !stale) + return 0; if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) return -EIO; iter->pos += iter->processed; @@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter) WARN_ON_ONCE(iter->iomap.offset > iter->pos); WARN_ON_ONCE(iter->iomap.length == 0); WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); + WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); trace_iomap_iter_dstmap(iter->inode, &iter->iomap); if (iter->srcmap.type != IOMAP_HOLE) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0698c4b8ce0e..0983dfc9a203 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -49,26 +49,35 @@ struct vm_fault; * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. + * + * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent + * rather than a file data extent. */ -#define IOMAP_F_NEW 0x01 -#define IOMAP_F_DIRTY 0x02 -#define IOMAP_F_SHARED 0x04 -#define IOMAP_F_MERGED 0x08 -#define IOMAP_F_BUFFER_HEAD 0x10 -#define IOMAP_F_ZONE_APPEND 0x20 +#define IOMAP_F_NEW (1U << 0) +#define IOMAP_F_DIRTY (1U << 1) +#define IOMAP_F_SHARED (1U << 2) +#define IOMAP_F_MERGED (1U << 3) +#define IOMAP_F_BUFFER_HEAD (1U << 4) +#define IOMAP_F_ZONE_APPEND (1U << 5) +#define IOMAP_F_XATTR (1U << 6) /* * Flags set by the core iomap code during operations: * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. + * + * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file + * range it covers needs to be remapped by the high level before the operation + * can proceed. */ -#define IOMAP_F_SIZE_CHANGED 0x100 +#define IOMAP_F_SIZE_CHANGED (1U << 8) +#define IOMAP_F_STALE (1U << 9) /* * Flags from 0x1000 up are for file system specific usage: */ -#define IOMAP_F_PRIVATE 0x1000 +#define IOMAP_F_PRIVATE (1U << 12) /* @@ -89,6 +98,7 @@ struct iomap { void *inline_data; void *private; /* filesystem private */ const struct iomap_page_ops *page_ops; + u64 validity_cookie; /* used with .iomap_valid() */ }; static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) @@ -128,6 +138,23 @@ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, struct page *page); + + /* + * Check that the cached iomap still maps correctly to the filesystem's + * internal extent map. FS internal extent maps can change while iomap + * is iterating a cached iomap, so this hook allows iomap to detect that + * the iomap needs to be refreshed during a long running write + * operation. + * + * The filesystem can store internal state (e.g. a sequence number) in + * iomap->validity_cookie when the iomap is first mapped to be able to + * detect changes between mapping time and whenever .iomap_valid() is + * called. + * + * This is called with the folio over the specified file position held + * locked by the iomap code. + */ + bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /* From 304a68b9c63bbfc1f6e159d68e8892fc54a06067 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Nov 2022 09:09:17 +1100 Subject: [PATCH 2856/4122] xfs: use iomap_valid method to detect stale cached iomaps Now that iomap supports a mechanism to validate cached iomaps for buffered write operations, hook it up to the XFS buffered write ops so that we can avoid data corruptions that result from stale cached iomaps. See: https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/ or the ->iomap_valid() introduction commit for exact details of the corruption vector. The validity cookie we store in the iomap is based on the type of iomap we return. It is expected that the iomap->flags we set in xfs_bmbt_to_iomap() is not perturbed by the iomap core and are returned to us in the iomap passed via the .iomap_valid() callback. This ensures that the validity cookie is always checking the correct inode fork sequence numbers to detect potential changes that affect the extent cached by the iomap. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 6 ++- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_iomap.c | 95 +++++++++++++++++++++++++++++++--------- fs/xfs/xfs_iomap.h | 5 ++- fs/xfs/xfs_pnfs.c | 6 ++- 5 files changed, 87 insertions(+), 27 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 49d0d4ea63fc..56b9b7db38bb 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc( * the extent. Just return the real extent at this offset. */ if (!isnullstartblock(bma.got.br_startblock)) { - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc( XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6aadc5815068..a22d90af40c8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -372,7 +372,7 @@ retry: isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 09676ff6940e..26ca3cc1a048 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -48,13 +48,45 @@ xfs_alert_fsblock_zero( return -EFSCORRUPTED; } +u64 +xfs_iomap_inode_sequence( + struct xfs_inode *ip, + u16 iomap_flags) +{ + u64 cookie = 0; + + if (iomap_flags & IOMAP_F_XATTR) + return READ_ONCE(ip->i_af.if_seq); + if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) + cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; + return cookie | READ_ONCE(ip->i_df.if_seq); +} + +/* + * Check that the iomap passed to us is still valid for the given offset and + * length. + */ +static bool +xfs_iomap_valid( + struct inode *inode, + const struct iomap *iomap) +{ + return iomap->validity_cookie == + xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags); +} + +const struct iomap_page_ops xfs_iomap_page_ops = { + .iomap_valid = xfs_iomap_valid, +}; + int xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags) + u16 iomap_flags, + u64 sequence_cookie) { struct xfs_mount *mp = ip->i_mount; struct xfs_buftarg *target = xfs_inode_buftarg(ip); @@ -91,6 +123,9 @@ xfs_bmbt_to_iomap( if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; + + iomap->validity_cookie = sequence_cookie; + iomap->page_ops = &xfs_iomap_page_ops; return 0; } @@ -195,7 +230,8 @@ xfs_iomap_write_direct( xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap) + struct xfs_bmbt_irec *imap, + u64 *seq) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -285,6 +321,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: + *seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin( bool shared = false; u16 iomap_flags = 0; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); @@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin( goto out_unlock; } + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); allocate_blocks: error = -EAGAIN; @@ -839,24 +878,26 @@ allocate_blocks: xfs_iunlock(ip, lockmode); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, - flags, &imap); + flags, &imap, &seq); if (error) return error; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - iomap_flags | IOMAP_F_NEW); + iomap_flags | IOMAP_F_NEW, seq); out_found_cow: - xfs_iunlock(ip, lockmode); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); if (imap.br_startblock != HOLESTARTBLOCK) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + seq = xfs_iomap_inode_sequence(ip, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, lockmode); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); out_unlock: if (lockmode) @@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; unsigned int lockmode = XFS_ILOCK_EXCL; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1094,26 +1136,31 @@ retry: * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); found_imap: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); found_cow: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1193,6 +1240,7 @@ xfs_read_iomap_begin( int nimaps = 1, error = 0; bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); @@ -1206,13 +1254,14 @@ xfs_read_iomap_begin( &nimaps, 0); if (!error && (flags & IOMAP_REPORT)) error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); xfs_iunlock(ip, lockmode); if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0); + shared ? IOMAP_F_SHARED : 0, seq); } const struct iomap_ops xfs_read_iomap_ops = { @@ -1237,6 +1286,7 @@ xfs_seek_iomap_begin( struct xfs_bmbt_irec imap, cmap; int error = 0; unsigned lockmode; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1271,8 +1321,9 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1293,8 +1344,9 @@ xfs_seek_iomap_begin( imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; done: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1320,6 +1372,7 @@ xfs_xattr_iomap_begin( struct xfs_bmbt_irec imap; int nimaps = 1, error = 0; unsigned lockmode; + int seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1336,12 +1389,14 @@ xfs_xattr_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: + + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); xfs_iunlock(ip, lockmode); if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); } const struct iomap_ops xfs_xattr_iomap_ops = { diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 0f62ab633040..4da13440bae9 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -13,14 +13,15 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap); + struct xfs_bmbt_irec *imap, u64 *sequence); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t end_fsb); +u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags); int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags); + u16 iomap_flags, u64 sequence_cookie); int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, bool *did_zero); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 37a24f0f7cd4..38d23f0e703a 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -125,6 +125,7 @@ xfs_fs_map_blocks( int nimaps = 1; uint lock_flags; int error = 0; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -176,6 +177,7 @@ xfs_fs_map_blocks( lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); + seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); @@ -189,7 +191,7 @@ xfs_fs_map_blocks( xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, - end_fsb - offset_fsb, 0, &imap); + end_fsb - offset_fsb, 0, &imap, &seq); if (error) goto out_unlock; @@ -209,7 +211,7 @@ xfs_fs_map_blocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); *device_generation = mp->m_generation; return error; out_unlock: From 6e8af15ccdc4e138a5b529c1901a0013e1dcaa09 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Nov 2022 09:09:17 +1100 Subject: [PATCH 2857/4122] xfs: drop write error injection is unfixable, remove it With the changes to scan the page cache for dirty data to avoid data corruptions from partial write cleanup racing with other page cache operations, the drop writes error injection no longer works the same way it used to and causes xfs/196 to fail. This is because xfs/196 writes to the file and populates the page cache before it turns on the error injection and starts failing -overwrites-. The result is that the original drop-writes code failed writes only -after- overwriting the data in the cache, followed by invalidates the cached data, then punching out the delalloc extent from under that data. On the surface, this looks fine. The problem is that page cache invalidation *doesn't guarantee that it removes anything from the page cache* and it doesn't change the dirty state of the folio. When block size == page size and we do page aligned IO (as xfs/196 does) everything happens to align perfectly and page cache invalidation removes the single page folios that span the written data. Hence the followup delalloc punch pass does not find cached data over that range and it can punch the extent out. IOWs, xfs/196 "works" for block size == page size with the new code. I say "works", because it actually only works for the case where IO is page aligned, and no data was read from disk before writes occur. Because the moment we actually read data first, the readahead code allocates multipage folios and suddenly the invalidate code goes back to zeroing subfolio ranges without changing dirty state. Hence, with multipage folios in play, block size == page size is functionally identical to block size < page size behaviour, and drop-writes is manifestly broken w.r.t to this case. Invalidation of a subfolio range doesn't result in the folio being removed from the cache, just the range gets zeroed. Hence after we've sequentially walked over a folio that we've dirtied (via write data) and then invalidated, we end up with a dirty folio full of zeroed data. And because the new code skips punching ranges that have dirty folios covering them, we end up leaving the delalloc range intact after failing all the writes. Hence failed writes now end up writing zeroes to disk in the cases where invalidation zeroes folios rather than removing them from cache. This is a fundamental change of behaviour that is needed to avoid the data corruption vectors that exist in the old write fail path, and it renders the drop-writes injection non-functional and unworkable as it stands. As it is, I think the error injection is also now unnecessary, as partial writes that need delalloc extent are going to be a lot more common with stale iomap detection in place. Hence this patch removes the drop-writes error injection completely. xfs/196 can remain for testing kernels that don't have this data corruption fix, but those that do will report: xfs/196 3s ... [not run] XFS error injection drop_writes unknown on this kernel. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_errortag.h | 12 +++++------- fs/xfs/xfs_error.c | 27 ++++++++++++++++++++------- fs/xfs/xfs_iomap.c | 9 --------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 5362908164b0..580ccbd5aadc 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -40,13 +40,12 @@ #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 #define XFS_ERRTAG_BMAP_FINISH_ONE 26 #define XFS_ERRTAG_AG_RESV_CRITICAL 27 + /* - * DEBUG mode instrumentation to test and/or trigger delayed allocation - * block killing in the event of failed writes. When enabled, all - * buffered writes are silenty dropped and handled as if they failed. - * All delalloc blocks in the range of the write (including pre-existing - * delalloc blocks!) are tossed as part of the write failure error - * handling sequence. + * Drop-writes support removed because write error handling cannot trash + * pre-existing delalloc extents in any useful way anymore. We retain the + * definition so that we can reject it as an invalid value in + * xfs_errortag_valid(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 @@ -95,7 +94,6 @@ #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 #define XFS_RANDOM_BMAP_FINISH_ONE 1 #define XFS_RANDOM_AG_RESV_CRITICAL 4 -#define XFS_RANDOM_DROP_WRITES 1 #define XFS_RANDOM_LOG_BAD_CRC 1 #define XFS_RANDOM_LOG_ITEM_PIN 1 #define XFS_RANDOM_BUF_LRU_REF 2 diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c6b2aabd6f18..dea3c0649d2f 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_REFCOUNT_FINISH_ONE, XFS_RANDOM_BMAP_FINISH_ONE, XFS_RANDOM_AG_RESV_CRITICAL, - XFS_RANDOM_DROP_WRITES, + 0, /* XFS_RANDOM_DROP_WRITES has been removed */ XFS_RANDOM_LOG_BAD_CRC, XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, @@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); -XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); @@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), - XFS_ERRORTAG_ATTR_LIST(drop_writes), XFS_ERRORTAG_ATTR_LIST(log_bad_crc), XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), @@ -256,6 +254,19 @@ xfs_errortag_del( kmem_free(mp->m_errortag); } +static bool +xfs_errortag_valid( + unsigned int error_tag) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return false; + + /* Error out removed injection types */ + if (error_tag == XFS_ERRTAG_DROP_WRITES) + return false; + return true; +} + bool xfs_errortag_test( struct xfs_mount *mp, @@ -277,7 +288,9 @@ xfs_errortag_test( if (!mp->m_errortag) return false; - ASSERT(error_tag < XFS_ERRTAG_MAX); + if (!xfs_errortag_valid(error_tag)) + return false; + randfactor = mp->m_errortag[error_tag]; if (!randfactor || prandom_u32_max(randfactor)) return false; @@ -293,7 +306,7 @@ xfs_errortag_get( struct xfs_mount *mp, unsigned int error_tag) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return mp->m_errortag[error_tag]; @@ -305,7 +318,7 @@ xfs_errortag_set( unsigned int error_tag, unsigned int tag_value) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; mp->m_errortag[error_tag] = tag_value; @@ -319,7 +332,7 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return xfs_errortag_set(mp, error_tag, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 26ca3cc1a048..1bdd7afc1010 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1190,15 +1190,6 @@ xfs_buffered_write_iomap_end( struct xfs_mount *mp = XFS_M(inode->i_sb); int error; - /* - * Behave as if the write failed if drop writes is enabled. Set the NEW - * flag to force delalloc cleanup. - */ - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { - iomap->flags |= IOMAP_F_NEW; - written = 0; - } - error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, length, written, &xfs_buffered_write_delalloc_punch); if (error && !xfs_is_shutdown(mp)) { From 46a5cd8c8dcacc58912b4bf04ca4d84d2977bbbc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 17 Oct 2022 14:05:13 -0400 Subject: [PATCH 2858/4122] tracing: Update MAINTAINERS file for new patchwork and mailing list The tracing subsystem now has its own mailing list (although patches should also be sent to LKML) as well as a new patchwork entry for kernel related tracing patches. Update the MAINTAINERS file to reflect the changes. Link: https://lore.kernel.org/linux-trace-kernel/20221017140513.14b9ce2e@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2585e7edc335..d12576150a70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8461,6 +8461,9 @@ FUNCTION HOOKS (FTRACE) M: Steven Rostedt M: Masami Hiramatsu R: Mark Rutland +L: linux-kernel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-trace-kernel/list/ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git F: Documentation/trace/ftrace* @@ -11483,6 +11486,9 @@ M: Naveen N. Rao M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu +L: linux-kernel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-trace-kernel/list/ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git F: Documentation/trace/kprobes.rst @@ -20862,6 +20868,9 @@ F: drivers/hwmon/pmbus/tps546d24.c TRACING M: Steven Rostedt M: Masami Hiramatsu +L: linux-kernel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-trace-kernel/list/ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git F: Documentation/trace/* From c2beff99eb03866df6fdbd3a93b08fd27eb8bf5c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:35 -0800 Subject: [PATCH 2859/4122] xfs: add debug knob to slow down writeback for fun Add a new error injection knob so that we can arbitrarily slow down writeback to test for race conditions and aberrant reclaim behavior if the writeback mechanisms are slow to issue writeback. This will enable functional testing for the ifork sequence counters introduced in commit 745b3f76d1c8 ("xfs: maintain a sequence count for inode fork manipulations"). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_errortag.h | 4 +++- fs/xfs/xfs_aops.c | 14 ++++++++++-- fs/xfs/xfs_error.c | 16 +++++++++++++ fs/xfs/xfs_error.h | 13 +++++++++++ fs/xfs/xfs_trace.c | 2 ++ fs/xfs/xfs_trace.h | 44 ++++++++++++++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 580ccbd5aadc..f5f629174eca 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -61,7 +61,8 @@ #define XFS_ERRTAG_LARP 39 #define XFS_ERRTAG_DA_LEAF_SPLIT 40 #define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41 -#define XFS_ERRTAG_MAX 42 +#define XFS_ERRTAG_WB_DELAY_MS 42 +#define XFS_ERRTAG_MAX 43 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -107,5 +108,6 @@ #define XFS_RANDOM_LARP 1 #define XFS_RANDOM_DA_LEAF_SPLIT 1 #define XFS_RANDOM_ATTR_LEAF_TO_NODE 1 +#define XFS_RANDOM_WB_DELAY_MS 3000 #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a22d90af40c8..41734202796f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -17,6 +17,8 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_reflink.h" +#include "xfs_errortag.h" +#include "xfs_error.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -217,11 +219,17 @@ xfs_imap_valid( * checked (and found nothing at this offset) could have added * overlapping blocks. */ - if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) + if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) { + trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap, + XFS_WPC(wpc)->data_seq, XFS_DATA_FORK); return false; + } if (xfs_inode_has_cow_data(ip) && - XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) + XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) { + trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap, + XFS_WPC(wpc)->cow_seq, XFS_COW_FORK); return false; + } return true; } @@ -285,6 +293,8 @@ xfs_map_blocks( if (xfs_is_shutdown(mp)) return -EIO; + XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); + /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index dea3c0649d2f..2d6e3c718e03 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -60,6 +60,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_LARP, XFS_RANDOM_DA_LEAF_SPLIT, XFS_RANDOM_ATTR_LEAF_TO_NODE, + XFS_RANDOM_WB_DELAY_MS, }; struct xfs_errortag_attr { @@ -175,6 +176,7 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL); XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP); XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT); XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE); +XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -218,6 +220,7 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(larp), XFS_ERRORTAG_ATTR_LIST(da_leaf_split), XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node), + XFS_ERRORTAG_ATTR_LIST(wb_delay_ms), NULL, }; ATTRIBUTE_GROUPS(xfs_errortag); @@ -267,6 +270,19 @@ xfs_errortag_valid( return true; } +bool +xfs_errortag_enabled( + struct xfs_mount *mp, + unsigned int tag) +{ + if (!mp->m_errortag) + return false; + if (!xfs_errortag_valid(tag)) + return false; + + return mp->m_errortag[tag] != 0; +} + bool xfs_errortag_test( struct xfs_mount *mp, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 5191e9145e55..dbe6c37dc697 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -45,6 +45,18 @@ extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression, const char *file, int line, unsigned int error_tag); #define XFS_TEST_ERROR(expr, mp, tag) \ ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag))) +bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); +#define XFS_ERRORTAG_DELAY(mp, tag) \ + do { \ + might_sleep(); \ + if (!xfs_errortag_enabled((mp), (tag))) \ + break; \ + xfs_warn_ratelimited((mp), \ +"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \ + (mp)->m_errortag[(tag)], __FILE__, __LINE__, \ + (mp)->m_super->s_id); \ + mdelay((mp)->m_errortag[(tag)]); \ + } while (0) extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag); extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag, @@ -55,6 +67,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); #define xfs_errortag_init(mp) (0) #define xfs_errortag_del(mp) #define XFS_TEST_ERROR(expr, mp, tag) (expr) +#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) #define xfs_errortag_set(mp, tag, val) (ENOSYS) #define xfs_errortag_add(mp, tag) (ENOSYS) #define xfs_errortag_clearall(mp) (ENOSYS) diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index d269ef57ff01..8a5dc1538aa8 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -34,6 +34,8 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_error.h" +#include +#include "xfs_iomap.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 372d871bccc5..c9ada9577a4a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3352,6 +3352,50 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, irec)) +/* inode iomap invalidation events */ +DECLARE_EVENT_CLASS(xfs_wb_invalid_class, + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), + TP_ARGS(ip, iomap, wpcseq, whichfork), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, addr) + __field(loff_t, pos) + __field(u64, len) + __field(u16, type) + __field(u16, flags) + __field(u32, wpcseq) + __field(u32, forkseq) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->addr = iomap->addr; + __entry->pos = iomap->offset; + __entry->len = iomap->length; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->wpcseq = wpcseq; + __entry->forkseq = READ_ONCE(xfs_ifork_ptr(ip, whichfork)->if_seq); + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->addr, + __entry->len, + __entry->type, + __entry->flags, + __entry->wpcseq, + __entry->forkseq) +); +#define DEFINE_WB_INVALID_EVENT(name) \ +DEFINE_EVENT(xfs_wb_invalid_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), \ + TP_ARGS(ip, iomap, wpcseq, whichfork)) +DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid); +DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid); + /* refcount/reflink tracepoint definitions */ /* reflink tracepoints */ From 254e3459285cbf2174350bbc0051e475e1bc5196 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:36 -0800 Subject: [PATCH 2860/4122] xfs: add debug knob to slow down write for fun Add a new error injection knob so that we can arbitrarily slow down pagecache writes to test for race conditions and aberrant reclaim behavior if the writeback mechanisms are slow to issue writeback. This will enable functional testing for the ifork sequence counters introduced in commit 304a68b9c63b ("xfs: use iomap_valid method to detect stale cached iomaps") that fixes write racing with reclaim writeback. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_errortag.h | 4 +++- fs/xfs/xfs_error.c | 3 +++ fs/xfs/xfs_iomap.c | 14 ++++++++++-- fs/xfs/xfs_trace.h | 42 ++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index f5f629174eca..01a9e86b3037 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -62,7 +62,8 @@ #define XFS_ERRTAG_DA_LEAF_SPLIT 40 #define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41 #define XFS_ERRTAG_WB_DELAY_MS 42 -#define XFS_ERRTAG_MAX 43 +#define XFS_ERRTAG_WRITE_DELAY_MS 43 +#define XFS_ERRTAG_MAX 44 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -109,5 +110,6 @@ #define XFS_RANDOM_DA_LEAF_SPLIT 1 #define XFS_RANDOM_ATTR_LEAF_TO_NODE 1 #define XFS_RANDOM_WB_DELAY_MS 3000 +#define XFS_RANDOM_WRITE_DELAY_MS 3000 #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 2d6e3c718e03..713341d246d1 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -61,6 +61,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_DA_LEAF_SPLIT, XFS_RANDOM_ATTR_LEAF_TO_NODE, XFS_RANDOM_WB_DELAY_MS, + XFS_RANDOM_WRITE_DELAY_MS, }; struct xfs_errortag_attr { @@ -177,6 +178,7 @@ XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP); XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT); XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE); XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS); +XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -221,6 +223,7 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(da_leaf_split), XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node), XFS_ERRORTAG_ATTR_LIST(wb_delay_ms), + XFS_ERRORTAG_ATTR_LIST(write_delay_ms), NULL, }; ATTRIBUTE_GROUPS(xfs_errortag); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1bdd7afc1010..1005f1e36545 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -27,6 +27,8 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_reflink.h" +#include "xfs_error.h" +#include "xfs_errortag.h" #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -71,8 +73,16 @@ xfs_iomap_valid( struct inode *inode, const struct iomap *iomap) { - return iomap->validity_cookie == - xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags); + struct xfs_inode *ip = XFS_I(inode); + + if (iomap->validity_cookie != + xfs_iomap_inode_sequence(ip, iomap->flags)) { + trace_xfs_iomap_invalid(ip, iomap); + return false; + } + + XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); + return true; } const struct iomap_page_ops xfs_iomap_page_ops = { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c9ada9577a4a..421d1e504ac4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3396,6 +3396,48 @@ DEFINE_EVENT(xfs_wb_invalid_class, name, \ DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid); DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid); +DECLARE_EVENT_CLASS(xfs_iomap_invalid_class, + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), + TP_ARGS(ip, iomap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, addr) + __field(loff_t, pos) + __field(u64, len) + __field(u64, validity_cookie) + __field(u64, inodeseq) + __field(u16, type) + __field(u16, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->addr = iomap->addr; + __entry->pos = iomap->offset; + __entry->len = iomap->length; + __entry->validity_cookie = iomap->validity_cookie; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->inodeseq = xfs_iomap_inode_sequence(ip, iomap->flags); + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->addr, + __entry->len, + __entry->type, + __entry->flags, + __entry->validity_cookie, + __entry->inodeseq) +); +#define DEFINE_IOMAP_INVALID_EVENT(name) \ +DEFINE_EVENT(xfs_iomap_invalid_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), \ + TP_ARGS(ip, iomap)) +DEFINE_IOMAP_INVALID_EVENT(xfs_iomap_invalid); + /* refcount/reflink tracepoint definitions */ /* reflink tracepoints */ From 1ab30c610630da5391a373cddb8a065bf4c4bc01 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 22 Nov 2022 19:12:26 +0800 Subject: [PATCH 2861/4122] usb: roles: fix of node refcount leak in usb_role_switch_is_parent() I got the following report while doing device(mt6370-tcpc) load test with CONFIG_OF_UNITTEST and CONFIG_OF_DYNAMIC enabled: OF: ERROR: memory leak, expected refcount 1 instead of 2, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /i2c/pmic@34 The 'parent' returned by fwnode_get_parent() with refcount incremented. it needs be put after using. Fixes: 6fadd72943b8 ("usb: roles: get usb-role-switch from parent") Reviewed-by: Heikki Krogerus Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221122111226.251588-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index dfaed7eee94f..32e6d19f7011 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -106,10 +106,13 @@ usb_role_switch_is_parent(struct fwnode_handle *fwnode) struct fwnode_handle *parent = fwnode_get_parent(fwnode); struct device *dev; - if (!parent || !fwnode_property_present(parent, "usb-role-switch")) + if (!fwnode_property_present(parent, "usb-role-switch")) { + fwnode_handle_put(parent); return NULL; + } dev = class_find_device_by_fwnode(role_class, parent); + fwnode_handle_put(parent); return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER); } From e0dced9c7d4763fd97c86a13902d135f03cc42eb Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 23 Nov 2022 11:30:21 +0200 Subject: [PATCH 2862/4122] usb: typec: ucsi: Resume in separate work It can take more than one second to check each connector when the system is resumed. So if you have, say, eight connectors, it may take eight seconds for ucsi_resume() to finish. That's a bit too much. This will modify ucsi_resume() so that it schedules a work where the interface is actually resumed instead of checking the connectors directly. The connections will also be checked in separate tasks which are queued for each connector separately. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216706 Fixes: 99f6d4361113 ("usb: typec: ucsi: Check the connection on resume") Cc: Reported-by: Todd Brandt Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221123093021.25981-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 17 +++++++++++++---- drivers/usb/typec/ucsi/ucsi.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index a7987fc764cc..eabe519013e7 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1270,8 +1270,9 @@ err: return ret; } -int ucsi_resume(struct ucsi *ucsi) +static void ucsi_resume_work(struct work_struct *work) { + struct ucsi *ucsi = container_of(work, struct ucsi, resume_work); struct ucsi_connector *con; u64 command; int ret; @@ -1279,15 +1280,21 @@ int ucsi_resume(struct ucsi *ucsi) /* Restore UCSI notification enable mask after system resume */ command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy; ret = ucsi_send_command(ucsi, command, NULL, 0); - if (ret < 0) - return ret; + if (ret < 0) { + dev_err(ucsi->dev, "failed to re-enable notifications (%d)\n", ret); + return; + } for (con = ucsi->connector; con->port; con++) { mutex_lock(&con->lock); - ucsi_check_connection(con); + ucsi_partner_task(con, ucsi_check_connection, 1, 0); mutex_unlock(&con->lock); } +} +int ucsi_resume(struct ucsi *ucsi) +{ + queue_work(system_long_wq, &ucsi->resume_work); return 0; } EXPORT_SYMBOL_GPL(ucsi_resume); @@ -1347,6 +1354,7 @@ struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops) if (!ucsi) return ERR_PTR(-ENOMEM); + INIT_WORK(&ucsi->resume_work, ucsi_resume_work); INIT_DELAYED_WORK(&ucsi->work, ucsi_init_work); mutex_init(&ucsi->ppm_lock); ucsi->dev = dev; @@ -1401,6 +1409,7 @@ void ucsi_unregister(struct ucsi *ucsi) /* Make sure that we are not in the middle of driver initialization */ cancel_delayed_work_sync(&ucsi->work); + cancel_work_sync(&ucsi->resume_work); /* Disable notifications */ ucsi->ops->async_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 8eb391e3e592..c968474ee547 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -287,6 +287,7 @@ struct ucsi { struct ucsi_capability cap; struct ucsi_connector *connector; + struct work_struct resume_work; struct delayed_work work; int work_count; #define UCSI_ROLE_SWITCH_RETRY_PER_HZ 10 From 57b7b733b1a7aeab25bc2670afff608214284863 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Wed, 23 Nov 2022 12:07:46 +0100 Subject: [PATCH 2863/4122] usb: gadget: function: Simplify diagnostic messaging in printer Don't issue messages which can be easily achieved with ftrace. In case of printer_open() the return code is propagated to other layers so the user will know about -EBUSY anyway. Signed-off-by: Andrzej Pietrasiewicz Link: https://lore.kernel.org/r/20221123110746.59611-1-andrzej.p@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index a881c69b1f2b..4903d761a872 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -364,7 +364,7 @@ printer_open(struct inode *inode, struct file *fd) spin_unlock_irqrestore(&dev->lock, flags); kref_get(&dev->kref); - DBG(dev, "printer_open returned %x\n", ret); + return ret; } @@ -382,7 +382,6 @@ printer_close(struct inode *inode, struct file *fd) spin_unlock_irqrestore(&dev->lock, flags); kref_put(&dev->kref, printer_dev_free); - DBG(dev, "printer_close\n"); return 0; } @@ -848,8 +847,6 @@ static void printer_reset_interface(struct printer_dev *dev) if (dev->interface < 0) return; - DBG(dev, "%s\n", __func__); - if (dev->in_ep->desc) usb_ep_disable(dev->in_ep); @@ -887,8 +884,6 @@ static void printer_soft_reset(struct printer_dev *dev) { struct usb_request *req; - INFO(dev, "Received Printer Reset Request\n"); - if (usb_ep_disable(dev->in_ep)) DBG(dev, "Failed to disable USB in_ep\n"); if (usb_ep_disable(dev->out_ep)) @@ -1185,8 +1180,6 @@ static void printer_func_disable(struct usb_function *f) { struct printer_dev *dev = func_to_printer(f); - DBG(dev, "%s\n", __func__); - printer_reset_interface(dev); } From 3c347cdafa3db43337870006e5c2d7b78a8dae20 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 25 Nov 2022 14:41:20 +0800 Subject: [PATCH 2864/4122] usb: core: hcd: Fix return value check in usb_hcd_setup_local_mem() If dmam_alloc_attrs() fails, it returns NULL pointer and never return ERR_PTR(), so repleace IS_ERR() with IS_ERR_OR_NULL() and if it's NULL, returns -ENOMEM. Fixes: 9ba26f5cecd8 ("ARM: sa1100/assabet: move dmabounce hack to ohci driver") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221125064120.2842452-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index faeaace0d197..8300baedafd2 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -3133,8 +3133,12 @@ int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, GFP_KERNEL, DMA_ATTR_WRITE_COMBINE); - if (IS_ERR(local_mem)) + if (IS_ERR_OR_NULL(local_mem)) { + if (!local_mem) + return -ENOMEM; + return PTR_ERR(local_mem); + } /* * Here we pass a dma_addr_t but the arg type is a phys_addr_t. From f05f80f217bf52443a2582bca19fd78188333f25 Mon Sep 17 00:00:00 2001 From: Shruthi Sanil Date: Fri, 25 Nov 2022 16:23:27 +0530 Subject: [PATCH 2865/4122] usb: dwc3: pci: Update PCIe device ID for USB3 controller on CPU sub-system for Raptor Lake The device ID 0xa70e is defined for the USB3 device controller in the CPU sub-system of Raptor Lake platform. Hence updating the ID accordingly. Fixes: bad0d1d726ac ("usb: dwc3: pci: Add support for Intel Raptor Lake") Cc: stable Reviewed-by: Heikki Krogerus Signed-off-by: Shruthi Sanil Link: https://lore.kernel.org/r/20221125105327.27945-1-shruthi.sanil@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index fb14511b1e10..89c9ab2b19f8 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -45,7 +45,7 @@ #define PCI_DEVICE_ID_INTEL_ADLN 0x465e #define PCI_DEVICE_ID_INTEL_ADLN_PCH 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 -#define PCI_DEVICE_ID_INTEL_RPL 0x460e +#define PCI_DEVICE_ID_INTEL_RPL 0xa70e #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e From 01792c6036af577e4cb1aa7b9ffce7a4882c86b5 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 28 Nov 2022 16:13:06 +0800 Subject: [PATCH 2866/4122] usb: host: fix a typo in ehci.h Change "ehci_hq" to "ehci_qh" in this comment. Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20221128081306.2772729-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index ad3f13a3eaf1..c5c7f8782549 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -471,7 +471,7 @@ struct ehci_iso_sched { * acts like a qh would, if EHCI had them for ISO. */ struct ehci_iso_stream { - /* first field matches ehci_hq, but is NULL */ + /* first field matches ehci_qh, but is NULL */ struct ehci_qh_hw *hw; u8 bEndpointAddress; From 27ef17849779edd5600aa27d1a246ad424761971 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 28 Nov 2022 19:29:54 +0900 Subject: [PATCH 2867/4122] usb: add usb_set_intfdata() documentation USB drivers do not need to call usb_set_intfdata(intf, NULL) in their usb_driver::disconnect callback because the core already does it in [1]. However, this fact is widely unknown, c.f.: $ git grep "usb_set_intfdata(.*NULL)" | wc -l 215 Especially, setting the interface to NULL before all action completed can result in a NULL pointer dereference. Not calling usb_set_intfdata() at all in disconnect() is the safest method. Add documentation to usb_set_intfdata() to clarify this point. Also remove the call in usb-skeletion's disconnect() not to confuse the new comers. [1] function usb_unbind_interface() from drivers/usb/core/driver.c Link: https://elixir.bootlin.com/linux/v6.0/source/drivers/usb/core/driver.c#L497 Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20221128102954.3615579-1-mailhol.vincent@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usb-skeleton.c | 1 - include/linux/usb.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index d87deee3e26e..900a64ad25e4 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -564,7 +564,6 @@ static void skel_disconnect(struct usb_interface *interface) int minor = interface->minor; dev = usb_get_intfdata(interface); - usb_set_intfdata(interface, NULL); /* give back our minor */ usb_deregister_dev(interface, &skel_class); diff --git a/include/linux/usb.h b/include/linux/usb.h index 9ff1ad4dfad1..d4afeeec1e1a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -265,6 +265,18 @@ static inline void *usb_get_intfdata(struct usb_interface *intf) return dev_get_drvdata(&intf->dev); } +/** + * usb_set_intfdata() - associate driver-specific data with the interface + * @intf: the usb interface + * @data: pointer to the device priv structure or %NULL + * + * Drivers should use this function in their probe() to associate their + * driver-specific data with the usb interface. + * + * When disconnecting, the core will take care of setting @intf back to %NULL, + * so no actions are needed on the driver side. The interface should not be set + * to %NULL before all actions completed (e.g. no outsanding URB remaining). + */ static inline void usb_set_intfdata(struct usb_interface *intf, void *data) { dev_set_drvdata(&intf->dev, data); From 03a88b0bafbe3f548729d970d8366f48718c9b19 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 28 Nov 2022 14:33:37 +0800 Subject: [PATCH 2868/4122] usb: xhci-mtk: fix leakage of shared hcd when fail to set wakeup irq Can not set the @shared_hcd to NULL before decrease the usage count by usb_put_hcd(), this will cause the shared hcd not released. Fixes: 04284eb74e0c ("usb: xhci-mtk: add support runtime PM") Cc: Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20221128063337.18124-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index cff3c4aea036..f7cbb08fc506 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -646,7 +646,6 @@ static int xhci_mtk_probe(struct platform_device *pdev) dealloc_usb3_hcd: usb_remove_hcd(xhci->shared_hcd); - xhci->shared_hcd = NULL; put_usb3_hcd: usb_put_hcd(xhci->shared_hcd); From 032399819dd5f135e6ffe446c8e97ab54eec3464 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 22 Nov 2022 22:05:36 +0000 Subject: [PATCH 2869/4122] usb: typec: Add partner PD object wrapper Some port drivers may want to set a Type-C partner as a parent for a USB Power Delivery object, but the Type-C partner struct isn't exposed outside of the Type-C class driver. Add a wrapper to usb_power_delivery_register() which sets the provided Type-C partner as a parent to the USB PD object. This helps to avoid exposing the Type-C partner's device struct unnecessarily. Cc: Benson Leung Suggested-by: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20221122220538.2991775-2-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 19 +++++++++++++++++++ include/linux/usb/typec.h | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index bd5e5dd70431..5897905cb4f0 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -821,6 +821,25 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, } EXPORT_SYMBOL_GPL(typec_partner_set_svdm_version); +/** + * typec_partner_usb_power_delivery_register - Register Type-C partner USB Power Delivery Support + * @partner: Type-C partner device. + * @desc: Description of the USB PD contract. + * + * This routine is a wrapper around usb_power_delivery_register(). It registers + * USB Power Delivery Capabilities for a Type-C partner device. Specifically, + * it sets the Type-C partner device as a parent for the resulting USB Power Delivery object. + * + * Returns handle to struct usb_power_delivery or ERR_PTR. + */ +struct usb_power_delivery * +typec_partner_usb_power_delivery_register(struct typec_partner *partner, + struct usb_power_delivery_desc *desc) +{ + return usb_power_delivery_register(&partner->dev, desc); +} +EXPORT_SYMBOL_GPL(typec_partner_usb_power_delivery_register); + /** * typec_register_partner - Register a USB Type-C Partner * @port: The USB Type-C Port the partner is connected to diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 7751bedcae5d..8fa781207970 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -23,6 +23,7 @@ struct fwnode_handle; struct device; struct usb_power_delivery; +struct usb_power_delivery_desc; enum typec_port_type { TYPEC_PORT_SRC, @@ -327,6 +328,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); +struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner, + struct usb_power_delivery_desc *desc); + int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_delivery *pd); int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); From ab3593eeef606816bcc28b12690c51379c3d12eb Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 22 Nov 2022 22:05:37 +0000 Subject: [PATCH 2870/4122] platform/chrome: cros_ec_typec: Set parent of partner PD object In order to tell what Type-C device a PD object belongs to, its parent needs to be set. Use the Type-C partner USB PD registration wrapper to set the parent appropriately for PD objects which are created for connected Type-C partners. Cc: Benson Leung Cc: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20221122220538.2991775-3-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/platform/chrome/cros_ec_typec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 2a7ff14dc37e..d5bc4021aca2 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -968,7 +968,7 @@ static void cros_typec_register_partner_pdos(struct cros_typec_data *typec, if (!resp->source_cap_count && !resp->sink_cap_count) return; - port->partner_pd = usb_power_delivery_register(NULL, &desc); + port->partner_pd = typec_partner_usb_power_delivery_register(port->partner, &desc); if (IS_ERR(port->partner_pd)) { dev_warn(typec->dev, "Failed to register partner PD device, port: %d\n", port_num); return; From 57f8e00d8a82073ab7893ab8ae4055580ef9552f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 25 Nov 2022 10:55:06 +0200 Subject: [PATCH 2871/4122] usb: musb: Drop old unused am35x glue layer The am35x glue layer is no longer in use and can be dropped. There are no longer any SoCs passing platform data for it as they are booting using devicetree. In general, the am35x SoCs are similar to am335x and ti81xx and can use the musb_dsps glue layer as long as there is a proper phy driver available. Cc: Arnd Bergmann Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20221125085506.38127-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 5 - drivers/usb/musb/Makefile | 1 - drivers/usb/musb/am35x.c | 610 -------------------------------------- 3 files changed, 616 deletions(-) delete mode 100644 drivers/usb/musb/am35x.c diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 290df4d5d5ce..3a1f4bcea80c 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -88,11 +88,6 @@ config USB_MUSB_OMAP2PLUS depends on OMAP_CONTROL_PHY || !OMAP_CONTROL_PHY select GENERIC_PHY -config USB_MUSB_AM35X - tristate "AM35x" - depends on ARCH_OMAP - depends on NOP_USB_XCEIV - config USB_MUSB_DSPS tristate "TI DSPS platforms" depends on ARCH_OMAP2PLUS || COMPILE_TEST diff --git a/drivers/usb/musb/Makefile b/drivers/usb/musb/Makefile index 44a9e27b2157..5dccf0e453e1 100644 --- a/drivers/usb/musb/Makefile +++ b/drivers/usb/musb/Makefile @@ -16,7 +16,6 @@ musb_hdrc-$(CONFIG_DEBUG_FS) += musb_debugfs.o # Hardware Glue Layer obj-$(CONFIG_USB_MUSB_OMAP2PLUS) += omap2430.o -obj-$(CONFIG_USB_MUSB_AM35X) += am35x.o obj-$(CONFIG_USB_MUSB_DSPS) += musb_dsps.o obj-$(CONFIG_USB_MUSB_TUSB6010) += tusb6010.o obj-$(CONFIG_USB_MUSB_DA8XX) += da8xx.o diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c deleted file mode 100644 index bf2c0fa6cb32..000000000000 --- a/drivers/usb/musb/am35x.c +++ /dev/null @@ -1,610 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Texas Instruments AM35x "glue layer" - * - * Copyright (c) 2010, by Texas Instruments - * - * Based on the DA8xx "glue layer" code. - * Copyright (c) 2008-2009, MontaVista Software, Inc. - * - * This file is part of the Inventra Controller Driver for Linux. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "musb_core.h" - -/* - * AM35x specific definitions - */ -/* USB 2.0 OTG module registers */ -#define USB_REVISION_REG 0x00 -#define USB_CTRL_REG 0x04 -#define USB_STAT_REG 0x08 -#define USB_EMULATION_REG 0x0c -/* 0x10 Reserved */ -#define USB_AUTOREQ_REG 0x14 -#define USB_SRP_FIX_TIME_REG 0x18 -#define USB_TEARDOWN_REG 0x1c -#define EP_INTR_SRC_REG 0x20 -#define EP_INTR_SRC_SET_REG 0x24 -#define EP_INTR_SRC_CLEAR_REG 0x28 -#define EP_INTR_MASK_REG 0x2c -#define EP_INTR_MASK_SET_REG 0x30 -#define EP_INTR_MASK_CLEAR_REG 0x34 -#define EP_INTR_SRC_MASKED_REG 0x38 -#define CORE_INTR_SRC_REG 0x40 -#define CORE_INTR_SRC_SET_REG 0x44 -#define CORE_INTR_SRC_CLEAR_REG 0x48 -#define CORE_INTR_MASK_REG 0x4c -#define CORE_INTR_MASK_SET_REG 0x50 -#define CORE_INTR_MASK_CLEAR_REG 0x54 -#define CORE_INTR_SRC_MASKED_REG 0x58 -/* 0x5c Reserved */ -#define USB_END_OF_INTR_REG 0x60 - -/* Control register bits */ -#define AM35X_SOFT_RESET_MASK 1 - -/* USB interrupt register bits */ -#define AM35X_INTR_USB_SHIFT 16 -#define AM35X_INTR_USB_MASK (0x1ff << AM35X_INTR_USB_SHIFT) -#define AM35X_INTR_DRVVBUS 0x100 -#define AM35X_INTR_RX_SHIFT 16 -#define AM35X_INTR_TX_SHIFT 0 -#define AM35X_TX_EP_MASK 0xffff /* EP0 + 15 Tx EPs */ -#define AM35X_RX_EP_MASK 0xfffe /* 15 Rx EPs */ -#define AM35X_TX_INTR_MASK (AM35X_TX_EP_MASK << AM35X_INTR_TX_SHIFT) -#define AM35X_RX_INTR_MASK (AM35X_RX_EP_MASK << AM35X_INTR_RX_SHIFT) - -#define USB_MENTOR_CORE_OFFSET 0x400 - -struct am35x_glue { - struct device *dev; - struct platform_device *musb; - struct platform_device *phy; - struct clk *phy_clk; - struct clk *clk; -}; - -/* - * am35x_musb_enable - enable interrupts - */ -static void am35x_musb_enable(struct musb *musb) -{ - void __iomem *reg_base = musb->ctrl_base; - u32 epmask; - - /* Workaround: setup IRQs through both register sets. */ - epmask = ((musb->epmask & AM35X_TX_EP_MASK) << AM35X_INTR_TX_SHIFT) | - ((musb->epmask & AM35X_RX_EP_MASK) << AM35X_INTR_RX_SHIFT); - - musb_writel(reg_base, EP_INTR_MASK_SET_REG, epmask); - musb_writel(reg_base, CORE_INTR_MASK_SET_REG, AM35X_INTR_USB_MASK); - - /* Force the DRVVBUS IRQ so we can start polling for ID change. */ - musb_writel(reg_base, CORE_INTR_SRC_SET_REG, - AM35X_INTR_DRVVBUS << AM35X_INTR_USB_SHIFT); -} - -/* - * am35x_musb_disable - disable HDRC and flush interrupts - */ -static void am35x_musb_disable(struct musb *musb) -{ - void __iomem *reg_base = musb->ctrl_base; - - musb_writel(reg_base, CORE_INTR_MASK_CLEAR_REG, AM35X_INTR_USB_MASK); - musb_writel(reg_base, EP_INTR_MASK_CLEAR_REG, - AM35X_TX_INTR_MASK | AM35X_RX_INTR_MASK); - musb_writel(reg_base, USB_END_OF_INTR_REG, 0); -} - -#define portstate(stmt) stmt - -static void am35x_musb_set_vbus(struct musb *musb, int is_on) -{ - WARN_ON(is_on && is_peripheral_active(musb)); -} - -#define POLL_SECONDS 2 - -static void otg_timer(struct timer_list *t) -{ - struct musb *musb = from_timer(musb, t, dev_timer); - void __iomem *mregs = musb->mregs; - u8 devctl; - unsigned long flags; - - /* - * We poll because AM35x's won't expose several OTG-critical - * status change events (from the transceiver) otherwise. - */ - devctl = musb_readb(mregs, MUSB_DEVCTL); - dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl, - usb_otg_state_string(musb->xceiv->otg->state)); - - spin_lock_irqsave(&musb->lock, flags); - switch (musb->xceiv->otg->state) { - case OTG_STATE_A_WAIT_BCON: - devctl &= ~MUSB_DEVCTL_SESSION; - musb_writeb(musb->mregs, MUSB_DEVCTL, devctl); - - devctl = musb_readb(musb->mregs, MUSB_DEVCTL); - if (devctl & MUSB_DEVCTL_BDEVICE) { - musb->xceiv->otg->state = OTG_STATE_B_IDLE; - MUSB_DEV_MODE(musb); - } else { - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - MUSB_HST_MODE(musb); - } - break; - case OTG_STATE_A_WAIT_VFALL: - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; - musb_writel(musb->ctrl_base, CORE_INTR_SRC_SET_REG, - MUSB_INTR_VBUSERROR << AM35X_INTR_USB_SHIFT); - break; - case OTG_STATE_B_IDLE: - devctl = musb_readb(mregs, MUSB_DEVCTL); - if (devctl & MUSB_DEVCTL_BDEVICE) - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - else - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - break; - default: - break; - } - spin_unlock_irqrestore(&musb->lock, flags); -} - -static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout) -{ - static unsigned long last_timer; - - if (timeout == 0) - timeout = jiffies + msecs_to_jiffies(3); - - /* Never idle if active, or when VBUS timeout is not set as host */ - if (musb->is_active || (musb->a_wait_bcon == 0 && - musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) { - dev_dbg(musb->controller, "%s active, deleting timer\n", - usb_otg_state_string(musb->xceiv->otg->state)); - del_timer(&musb->dev_timer); - last_timer = jiffies; - return; - } - - if (time_after(last_timer, timeout) && timer_pending(&musb->dev_timer)) { - dev_dbg(musb->controller, "Longer idle timer already pending, ignoring...\n"); - return; - } - last_timer = timeout; - - dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n", - usb_otg_state_string(musb->xceiv->otg->state), - jiffies_to_msecs(timeout - jiffies)); - mod_timer(&musb->dev_timer, timeout); -} - -static irqreturn_t am35x_musb_interrupt(int irq, void *hci) -{ - struct musb *musb = hci; - void __iomem *reg_base = musb->ctrl_base; - struct device *dev = musb->controller; - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - unsigned long flags; - irqreturn_t ret = IRQ_NONE; - u32 epintr, usbintr; - - spin_lock_irqsave(&musb->lock, flags); - - /* Get endpoint interrupts */ - epintr = musb_readl(reg_base, EP_INTR_SRC_MASKED_REG); - - if (epintr) { - musb_writel(reg_base, EP_INTR_SRC_CLEAR_REG, epintr); - - musb->int_rx = - (epintr & AM35X_RX_INTR_MASK) >> AM35X_INTR_RX_SHIFT; - musb->int_tx = - (epintr & AM35X_TX_INTR_MASK) >> AM35X_INTR_TX_SHIFT; - } - - /* Get usb core interrupts */ - usbintr = musb_readl(reg_base, CORE_INTR_SRC_MASKED_REG); - if (!usbintr && !epintr) - goto eoi; - - if (usbintr) { - musb_writel(reg_base, CORE_INTR_SRC_CLEAR_REG, usbintr); - - musb->int_usb = - (usbintr & AM35X_INTR_USB_MASK) >> AM35X_INTR_USB_SHIFT; - } - /* - * DRVVBUS IRQs are the only proxy we have (a very poor one!) for - * AM35x's missing ID change IRQ. We need an ID change IRQ to - * switch appropriately between halves of the OTG state machine. - * Managing DEVCTL.SESSION per Mentor docs requires that we know its - * value but DEVCTL.BDEVICE is invalid without DEVCTL.SESSION set. - * Also, DRVVBUS pulses for SRP (but not at 5V) ... - */ - if (usbintr & (AM35X_INTR_DRVVBUS << AM35X_INTR_USB_SHIFT)) { - int drvvbus = musb_readl(reg_base, USB_STAT_REG); - void __iomem *mregs = musb->mregs; - u8 devctl = musb_readb(mregs, MUSB_DEVCTL); - int err; - - err = musb->int_usb & MUSB_INTR_VBUSERROR; - if (err) { - /* - * The Mentor core doesn't debounce VBUS as needed - * to cope with device connect current spikes. This - * means it's not uncommon for bus-powered devices - * to get VBUS errors during enumeration. - * - * This is a workaround, but newer RTL from Mentor - * seems to allow a better one: "re"-starting sessions - * without waiting for VBUS to stop registering in - * devctl. - */ - musb->int_usb &= ~MUSB_INTR_VBUSERROR; - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL; - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - WARNING("VBUS error workaround (delay coming)\n"); - } else if (drvvbus) { - MUSB_HST_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; - portstate(musb->port1_status |= USB_PORT_STAT_POWER); - del_timer(&musb->dev_timer); - } else { - musb->is_active = 0; - MUSB_DEV_MODE(musb); - musb->xceiv->otg->state = OTG_STATE_B_IDLE; - portstate(musb->port1_status &= ~USB_PORT_STAT_POWER); - } - - /* NOTE: this must complete power-on within 100 ms. */ - dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n", - drvvbus ? "on" : "off", - usb_otg_state_string(musb->xceiv->otg->state), - err ? " ERROR" : "", - devctl); - ret = IRQ_HANDLED; - } - - /* Drop spurious RX and TX if device is disconnected */ - if (musb->int_usb & MUSB_INTR_DISCONNECT) { - musb->int_tx = 0; - musb->int_rx = 0; - } - - if (musb->int_tx || musb->int_rx || musb->int_usb) - ret |= musb_interrupt(musb); - -eoi: - /* EOI needs to be written for the IRQ to be re-asserted. */ - if (ret == IRQ_HANDLED || epintr || usbintr) { - /* clear level interrupt */ - if (data->clear_irq) - data->clear_irq(); - /* write EOI */ - musb_writel(reg_base, USB_END_OF_INTR_REG, 0); - } - - /* Poll for ID change */ - if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) - mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ); - - spin_unlock_irqrestore(&musb->lock, flags); - - return ret; -} - -static int am35x_musb_set_mode(struct musb *musb, u8 musb_mode) -{ - struct device *dev = musb->controller; - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - int retval = 0; - - if (data->set_mode) - data->set_mode(musb_mode); - else - retval = -EIO; - - return retval; -} - -static int am35x_musb_init(struct musb *musb) -{ - struct device *dev = musb->controller; - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - void __iomem *reg_base = musb->ctrl_base; - u32 rev; - - musb->mregs += USB_MENTOR_CORE_OFFSET; - - /* Returns zero if e.g. not clocked */ - rev = musb_readl(reg_base, USB_REVISION_REG); - if (!rev) - return -ENODEV; - - musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); - if (IS_ERR_OR_NULL(musb->xceiv)) - return -EPROBE_DEFER; - - timer_setup(&musb->dev_timer, otg_timer, 0); - - /* Reset the musb */ - if (data->reset) - data->reset(); - - /* Reset the controller */ - musb_writel(reg_base, USB_CTRL_REG, AM35X_SOFT_RESET_MASK); - - /* Start the on-chip PHY and its PLL. */ - if (data->set_phy_power) - data->set_phy_power(1); - - msleep(5); - - musb->isr = am35x_musb_interrupt; - - /* clear level interrupt */ - if (data->clear_irq) - data->clear_irq(); - - return 0; -} - -static int am35x_musb_exit(struct musb *musb) -{ - struct device *dev = musb->controller; - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - - del_timer_sync(&musb->dev_timer); - - /* Shutdown the on-chip PHY and its PLL. */ - if (data->set_phy_power) - data->set_phy_power(0); - - usb_put_phy(musb->xceiv); - - return 0; -} - -/* AM35x supports only 32bit read operation */ -static void am35x_read_fifo(struct musb_hw_ep *hw_ep, u16 len, u8 *dst) -{ - void __iomem *fifo = hw_ep->fifo; - u32 val; - int i; - - /* Read for 32bit-aligned destination address */ - if (likely((0x03 & (unsigned long) dst) == 0) && len >= 4) { - readsl(fifo, dst, len >> 2); - dst += len & ~0x03; - len &= 0x03; - } - /* - * Now read the remaining 1 to 3 byte or complete length if - * unaligned address. - */ - if (len > 4) { - for (i = 0; i < (len >> 2); i++) { - *(u32 *) dst = musb_readl(fifo, 0); - dst += 4; - } - len &= 0x03; - } - if (len > 0) { - val = musb_readl(fifo, 0); - memcpy(dst, &val, len); - } -} - -static const struct musb_platform_ops am35x_ops = { - .quirks = MUSB_DMA_INVENTRA | MUSB_INDEXED_EP, - .init = am35x_musb_init, - .exit = am35x_musb_exit, - - .read_fifo = am35x_read_fifo, -#ifdef CONFIG_USB_INVENTRA_DMA - .dma_init = musbhs_dma_controller_create, - .dma_exit = musbhs_dma_controller_destroy, -#endif - .enable = am35x_musb_enable, - .disable = am35x_musb_disable, - - .set_mode = am35x_musb_set_mode, - .try_idle = am35x_musb_try_idle, - - .set_vbus = am35x_musb_set_vbus, -}; - -static const struct platform_device_info am35x_dev_info = { - .name = "musb-hdrc", - .id = PLATFORM_DEVID_AUTO, - .dma_mask = DMA_BIT_MASK(32), -}; - -static int am35x_probe(struct platform_device *pdev) -{ - struct musb_hdrc_platform_data *pdata = dev_get_platdata(&pdev->dev); - struct platform_device *musb; - struct am35x_glue *glue; - struct platform_device_info pinfo; - struct clk *phy_clk; - struct clk *clk; - - int ret = -ENOMEM; - - glue = kzalloc(sizeof(*glue), GFP_KERNEL); - if (!glue) - goto err0; - - phy_clk = clk_get(&pdev->dev, "fck"); - if (IS_ERR(phy_clk)) { - dev_err(&pdev->dev, "failed to get PHY clock\n"); - ret = PTR_ERR(phy_clk); - goto err3; - } - - clk = clk_get(&pdev->dev, "ick"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - ret = PTR_ERR(clk); - goto err4; - } - - ret = clk_enable(phy_clk); - if (ret) { - dev_err(&pdev->dev, "failed to enable PHY clock\n"); - goto err5; - } - - ret = clk_enable(clk); - if (ret) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto err6; - } - - glue->dev = &pdev->dev; - glue->phy_clk = phy_clk; - glue->clk = clk; - - pdata->platform_ops = &am35x_ops; - - glue->phy = usb_phy_generic_register(); - if (IS_ERR(glue->phy)) { - ret = PTR_ERR(glue->phy); - goto err7; - } - platform_set_drvdata(pdev, glue); - - pinfo = am35x_dev_info; - pinfo.parent = &pdev->dev; - pinfo.res = pdev->resource; - pinfo.num_res = pdev->num_resources; - pinfo.data = pdata; - pinfo.size_data = sizeof(*pdata); - pinfo.fwnode = of_fwnode_handle(pdev->dev.of_node); - pinfo.of_node_reused = true; - - glue->musb = musb = platform_device_register_full(&pinfo); - if (IS_ERR(musb)) { - ret = PTR_ERR(musb); - dev_err(&pdev->dev, "failed to register musb device: %d\n", ret); - goto err8; - } - - return 0; - -err8: - usb_phy_generic_unregister(glue->phy); - -err7: - clk_disable(clk); - -err6: - clk_disable(phy_clk); - -err5: - clk_put(clk); - -err4: - clk_put(phy_clk); - -err3: - kfree(glue); - -err0: - return ret; -} - -static int am35x_remove(struct platform_device *pdev) -{ - struct am35x_glue *glue = platform_get_drvdata(pdev); - - platform_device_unregister(glue->musb); - usb_phy_generic_unregister(glue->phy); - clk_disable(glue->clk); - clk_disable(glue->phy_clk); - clk_put(glue->clk); - clk_put(glue->phy_clk); - kfree(glue); - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int am35x_suspend(struct device *dev) -{ - struct am35x_glue *glue = dev_get_drvdata(dev); - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - - /* Shutdown the on-chip PHY and its PLL. */ - if (data->set_phy_power) - data->set_phy_power(0); - - clk_disable(glue->phy_clk); - clk_disable(glue->clk); - - return 0; -} - -static int am35x_resume(struct device *dev) -{ - struct am35x_glue *glue = dev_get_drvdata(dev); - struct musb_hdrc_platform_data *plat = dev_get_platdata(dev); - struct omap_musb_board_data *data = plat->board_data; - int ret; - - /* Start the on-chip PHY and its PLL. */ - if (data->set_phy_power) - data->set_phy_power(1); - - ret = clk_enable(glue->phy_clk); - if (ret) { - dev_err(dev, "failed to enable PHY clock\n"); - return ret; - } - - ret = clk_enable(glue->clk); - if (ret) { - dev_err(dev, "failed to enable clock\n"); - return ret; - } - - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(am35x_pm_ops, am35x_suspend, am35x_resume); - -static struct platform_driver am35x_driver = { - .probe = am35x_probe, - .remove = am35x_remove, - .driver = { - .name = "musb-am35x", - .pm = &am35x_pm_ops, - }, -}; - -MODULE_DESCRIPTION("AM35x MUSB Glue Layer"); -MODULE_AUTHOR("Ajay Kumar Gupta "); -MODULE_LICENSE("GPL v2"); -module_platform_driver(am35x_driver); From ae423ef5d095e09970f52c08020fdbf7f9d87c22 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 22 Nov 2022 03:51:38 -0500 Subject: [PATCH 2872/4122] usb: cdnsp: fix lack of ZLP for ep0 Patch implements the handling of ZLP for control transfer. To send the ZLP driver must prepare the extra TRB in TD with length set to zero and TRB type to TRB_NORMAL. The first TRB must have set TRB_CHAIN flag, TD_SIZE = 1 and TRB type to TRB_DATA. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20221122085138.332434-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 42 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 2f29431f612e..b23e543b3a3d 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -2006,10 +2006,11 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) { - u32 field, length_field, remainder; + u32 field, length_field, zlp = 0; struct cdnsp_ep *pep = preq->pep; struct cdnsp_ring *ep_ring; int num_trbs; + u32 maxp; int ret; ep_ring = cdnsp_request_to_transfer_ring(pdev, preq); @@ -2019,26 +2020,33 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) /* 1 TRB for data, 1 for status */ num_trbs = (pdev->three_stage_setup) ? 2 : 1; + maxp = usb_endpoint_maxp(pep->endpoint.desc); + + if (preq->request.zero && preq->request.length && + (preq->request.length % maxp == 0)) { + num_trbs++; + zlp = 1; + } + ret = cdnsp_prepare_transfer(pdev, preq, num_trbs); if (ret) return ret; /* If there's data, queue data TRBs */ - if (pdev->ep0_expect_in) - field = TRB_TYPE(TRB_DATA) | TRB_IOC; - else - field = TRB_ISP | TRB_TYPE(TRB_DATA) | TRB_IOC; - if (preq->request.length > 0) { - remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, - preq->request.length, preq, 1, 0); + field = TRB_TYPE(TRB_DATA); - length_field = TRB_LEN(preq->request.length) | - TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); + if (zlp) + field |= TRB_CHAIN; + else + field |= TRB_IOC | (pdev->ep0_expect_in ? 0 : TRB_ISP); if (pdev->ep0_expect_in) field |= TRB_DIR_IN; + length_field = TRB_LEN(preq->request.length) | + TRB_TD_SIZE(zlp) | TRB_INTR_TARGET(0); + cdnsp_queue_trb(pdev, ep_ring, true, lower_32_bits(preq->request.dma), upper_32_bits(preq->request.dma), length_field, @@ -2046,6 +2054,20 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) TRB_SETUPID(pdev->setup_id) | pdev->setup_speed); + if (zlp) { + field = TRB_TYPE(TRB_NORMAL) | TRB_IOC; + + if (!pdev->ep0_expect_in) + field = TRB_ISP; + + cdnsp_queue_trb(pdev, ep_ring, true, + lower_32_bits(preq->request.dma), + upper_32_bits(preq->request.dma), 0, + field | ep_ring->cycle_state | + TRB_SETUPID(pdev->setup_id) | + pdev->setup_speed); + } + pdev->ep0_stage = CDNSP_DATA_STAGE; } From 22683e480b370ad1b3a34cfa461028d1f51da12d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 27 Nov 2022 15:52:30 +0100 Subject: [PATCH 2873/4122] usb: misc: onboard_usb_hub: Drop obsolete dependency on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it is possible to test-build any driver which depends on OF on any architecture by explicitly selecting OF. Therefore depending on COMPILE_TEST as an alternative is no longer needed. It is actually better to always build such drivers with OF enabled, so that the test builds are closer to how each driver will actually be built on its intended target. Building them without OF may not test much as the compiler will optimize out potentially large parts of the code. In the worst case, this could even pop false positive warnings. Dropping COMPILE_TEST here improves the quality of our testing and avoids wasting time on non-existent issues. Cc: Matthias Kaehlcke Cc: Greg Kroah-Hartman Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/20221127155230.144886b7@endymion.delvare Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 9367c12c7e6f..a5f7652db7da 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -298,7 +298,7 @@ config BRCM_USB_PINMAP config USB_ONBOARD_HUB tristate "Onboard USB hub support" - depends on OF || COMPILE_TEST + depends on OF help Say Y here if you want to support discrete onboard USB hubs that don't require an additional control bus for initialization, but From 49b42475dd8a9fddbb2f3dc17ff6e4b115c80bfb Mon Sep 17 00:00:00 2001 From: Allen-KH Cheng Date: Wed, 23 Nov 2022 21:55:27 +0800 Subject: [PATCH 2874/4122] dt-bindings: usb: mtu3: add compatible for mt8186 Add a new compatible for mt8186 SoC. Signed-off-by: Allen-KH Cheng Link: https://lore.kernel.org/r/20221123135531.23221-2-allen-kh.cheng@mediatek.com Acked-by: Krzysztof Kozlowski Reviewed-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml index 80750b0f458a..7168110e2f9d 100644 --- a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml +++ b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml @@ -24,6 +24,7 @@ properties: - mediatek,mt2712-mtu3 - mediatek,mt8173-mtu3 - mediatek,mt8183-mtu3 + - mediatek,mt8186-mtu3 - mediatek,mt8188-mtu3 - mediatek,mt8192-mtu3 - mediatek,mt8195-mtu3 From 89ff3dfac604614287ad5aad9370c3f984ea3f4b Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 22 Nov 2022 12:35:21 +0000 Subject: [PATCH 2875/4122] usb: gadget: f_hid: fix f_hidg lifetime vs cdev The embedded struct cdev does not have its lifetime correctly tied to the enclosing struct f_hidg, so there is a use-after-free if /dev/hidgN is held open while the gadget is deleted. This can readily be replicated with libusbgx's example programs (for conciseness - operating directly via configfs is equivalent): gadget-hid exec 3<> /dev/hidg0 gadget-vid-pid-remove exec 3<&- Pull the existing device up in to struct f_hidg and make use of the cdev_device_{add,del}() helpers. This changes the lifetime of the device object to match struct f_hidg, but note that it is still added and deleted at the same time. Fixes: 71adf1189469 ("USB: gadget: add HID gadget driver") Tested-by: Lee Jones Reviewed-by: Andrzej Pietrasiewicz Reviewed-by: Lee Jones Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20221122123523.3068034-2-john@metanate.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 52 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index ca0a7d9eaa34..8b8bbeaa27cb 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -71,7 +71,7 @@ struct f_hidg { wait_queue_head_t write_queue; struct usb_request *req; - int minor; + struct device dev; struct cdev cdev; struct usb_function func; @@ -84,6 +84,14 @@ static inline struct f_hidg *func_to_hidg(struct usb_function *f) return container_of(f, struct f_hidg, func); } +static void hidg_release(struct device *dev) +{ + struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); + + kfree(hidg->set_report_buf); + kfree(hidg); +} + /*-------------------------------------------------------------------------*/ /* Static descriptors */ @@ -904,9 +912,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct f_hidg *hidg = func_to_hidg(f); struct usb_string *us; - struct device *device; int status; - dev_t dev; /* maybe allocate device-global string IDs, and patch descriptors */ us = usb_gstrings_attach(c->cdev, ct_func_strings, @@ -999,21 +1005,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) /* create char device */ cdev_init(&hidg->cdev, &f_hidg_fops); - dev = MKDEV(major, hidg->minor); - status = cdev_add(&hidg->cdev, dev, 1); + status = cdev_device_add(&hidg->cdev, &hidg->dev); if (status) goto fail_free_descs; - device = device_create(hidg_class, NULL, dev, NULL, - "%s%d", "hidg", hidg->minor); - if (IS_ERR(device)) { - status = PTR_ERR(device); - goto del; - } - return 0; -del: - cdev_del(&hidg->cdev); fail_free_descs: usb_free_all_descriptors(f); fail: @@ -1244,9 +1240,7 @@ static void hidg_free(struct usb_function *f) hidg = func_to_hidg(f); opts = container_of(f->fi, struct f_hid_opts, func_inst); - kfree(hidg->report_desc); - kfree(hidg->set_report_buf); - kfree(hidg); + put_device(&hidg->dev); mutex_lock(&opts->lock); --opts->refcnt; mutex_unlock(&opts->lock); @@ -1256,8 +1250,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_hidg *hidg = func_to_hidg(f); - device_destroy(hidg_class, MKDEV(major, hidg->minor)); - cdev_del(&hidg->cdev); + cdev_device_del(&hidg->cdev, &hidg->dev); usb_free_all_descriptors(f); } @@ -1266,6 +1259,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) { struct f_hidg *hidg; struct f_hid_opts *opts; + int ret; /* allocate and initialize one new instance */ hidg = kzalloc(sizeof(*hidg), GFP_KERNEL); @@ -1277,17 +1271,27 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) mutex_lock(&opts->lock); ++opts->refcnt; - hidg->minor = opts->minor; + device_initialize(&hidg->dev); + hidg->dev.release = hidg_release; + hidg->dev.class = hidg_class; + hidg->dev.devt = MKDEV(major, opts->minor); + ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor); + if (ret) { + --opts->refcnt; + mutex_unlock(&opts->lock); + return ERR_PTR(ret); + } + hidg->bInterfaceSubClass = opts->subclass; hidg->bInterfaceProtocol = opts->protocol; hidg->report_length = opts->report_length; hidg->report_desc_length = opts->report_desc_length; if (opts->report_desc) { - hidg->report_desc = kmemdup(opts->report_desc, - opts->report_desc_length, - GFP_KERNEL); + hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, + opts->report_desc_length, + GFP_KERNEL); if (!hidg->report_desc) { - kfree(hidg); + put_device(&hidg->dev); mutex_unlock(&opts->lock); return ERR_PTR(-ENOMEM); } From 70a3288a7586526315105c699b687d78cd32559a Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 22 Nov 2022 12:35:22 +0000 Subject: [PATCH 2876/4122] usb: gadget: f_hid: fix refcount leak on error path When failing to allocate report_desc, opts->refcnt has already been incremented so it needs to be decremented to avoid leaving the options structure permanently locked. Fixes: 21a9476a7ba8 ("usb: gadget: hid: add configfs support") Tested-by: Lee Jones Reviewed-by: Andrzej Pietrasiewicz Reviewed-by: Lee Jones Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20221122123523.3068034-3-john@metanate.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8b8bbeaa27cb..6be6009f911e 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -1292,6 +1292,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) GFP_KERNEL); if (!hidg->report_desc) { put_device(&hidg->dev); + --opts->refcnt; mutex_unlock(&opts->lock); return ERR_PTR(-ENOMEM); } From 944fe915d00d3cb1bacb1e77cabfb6dc82e6f8b8 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 22 Nov 2022 12:35:23 +0000 Subject: [PATCH 2877/4122] usb: gadget: f_hid: tidy error handling in hidg_alloc Unify error handling at the end of the function, reducing the risk of missing something on one of the error paths. Moving the increment of opts->refcnt later means there is no need to decrement it on the error path and is safe as this is guarded by opts->lock which is held for this entire section. Tested-by: Lee Jones Reviewed-by: Andrzej Pietrasiewicz Reviewed-by: Lee Jones Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20221122123523.3068034-4-john@metanate.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 6be6009f911e..a8da3b4a2855 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -1269,18 +1269,14 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) opts = container_of(fi, struct f_hid_opts, func_inst); mutex_lock(&opts->lock); - ++opts->refcnt; device_initialize(&hidg->dev); hidg->dev.release = hidg_release; hidg->dev.class = hidg_class; hidg->dev.devt = MKDEV(major, opts->minor); ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor); - if (ret) { - --opts->refcnt; - mutex_unlock(&opts->lock); - return ERR_PTR(ret); - } + if (ret) + goto err_unlock; hidg->bInterfaceSubClass = opts->subclass; hidg->bInterfaceProtocol = opts->protocol; @@ -1291,14 +1287,13 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) opts->report_desc_length, GFP_KERNEL); if (!hidg->report_desc) { - put_device(&hidg->dev); - --opts->refcnt; - mutex_unlock(&opts->lock); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto err_put_device; } } hidg->use_out_ep = !opts->no_out_endpoint; + ++opts->refcnt; mutex_unlock(&opts->lock); hidg->func.name = "hid"; @@ -1313,6 +1308,12 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) hidg->qlen = 4; return &hidg->func; + +err_put_device: + put_device(&hidg->dev); +err_unlock: + mutex_unlock(&opts->lock); + return ERR_PTR(ret); } DECLARE_USB_FUNCTION_INIT(hid, hidg_alloc_inst, hidg_alloc); From a9efc04cfd05690e91279f41c2325c46335c43ef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Nov 2022 16:48:58 +0200 Subject: [PATCH 2878/4122] i915: Move list_count() to list.h for broader use Some of the existing users, and definitely will be new ones, want to count existing nodes in the list. Provide a generic API for that by moving code from i915 to list.h. Reviewed-by: Lucas De Marchi Acked-by: Jani Nikula Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221123144901.40493-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +------------ include/linux/list.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1f7188129cd1..47734c4ebfa0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2004,17 +2004,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq) } } -static unsigned long list_count(struct list_head *list) -{ - struct list_head *pos; - unsigned long count = 0; - - list_for_each(pos, list) - count++; - - return count; -} - static unsigned long read_ul(void *p, size_t x) { return *(unsigned long *)(p + x); @@ -2189,7 +2178,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->sched_engine->lock, flags); engine_dump_active_requests(engine, m); - drm_printf(m, "\tOn hold?: %lu\n", + drm_printf(m, "\tOn hold?: %zu\n", list_count(&engine->sched_engine->hold)); spin_unlock_irqrestore(&engine->sched_engine->lock, flags); diff --git a/include/linux/list.h b/include/linux/list.h index 61762054b4be..632a298c7018 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -655,6 +655,21 @@ static inline void list_splice_tail_init(struct list_head *list, !list_is_head(pos, (head)); \ pos = n, n = pos->prev) +/** + * list_count - count nodes in the list + * @head: the head for your list. + */ +static inline size_t list_count(struct list_head *head) +{ + struct list_head *pos; + size_t count = 0; + + list_for_each(pos, head) + count++; + + return count; +} + /** * list_entry_is_head - test if the entry points to the head of the list * @pos: the type * to cursor From 33f00f41d963c86176dba2f9faff9b428a542e60 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Nov 2022 16:48:59 +0200 Subject: [PATCH 2879/4122] usb: gadget: hid: Convert to use list_count() The list API now provides the list_count() to help with counting existing nodes in the list. Utilise it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221123144901.40493-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/hid.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c index 1187ee4f316a..6196c3456e0b 100644 --- a/drivers/usb/gadget/legacy/hid.c +++ b/drivers/usb/gadget/legacy/hid.c @@ -133,14 +133,11 @@ static struct usb_configuration config_driver = { static int hid_bind(struct usb_composite_dev *cdev) { struct usb_gadget *gadget = cdev->gadget; - struct list_head *tmp; struct hidg_func_node *n = NULL, *m, *iter_n; struct f_hid_opts *hid_opts; - int status, funcs = 0; - - list_for_each(tmp, &hidg_func_list) - funcs++; + int status, funcs; + funcs = list_count(&hidg_func_list); if (!funcs) return -ENODEV; From c2d9d02f7bf3c641f9b8e6c9f5de1e564cdeca69 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Nov 2022 16:49:00 +0200 Subject: [PATCH 2880/4122] usb: gadget: udc: bcm63xx: Convert to use list_count() The list API now provides the list_count() to help with counting existing nodes in the list. Utilise it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221123144901.40493-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/bcm63xx_udc.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c index 2cdb07905bde..0762e49e85f8 100644 --- a/drivers/usb/gadget/udc/bcm63xx_udc.c +++ b/drivers/usb/gadget/udc/bcm63xx_udc.c @@ -2172,7 +2172,6 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p) for (ch_idx = 0; ch_idx < BCM63XX_NUM_IUDMA; ch_idx++) { struct iudma_ch *iudma = &udc->iudma[ch_idx]; - struct list_head *pos; seq_printf(s, "IUDMA channel %d -- ", ch_idx); switch (iudma_defaults[ch_idx].ep_type) { @@ -2205,14 +2204,10 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p) seq_printf(s, " desc: %d/%d used", iudma->n_bds_used, iudma->n_bds); - if (iudma->bep) { - i = 0; - list_for_each(pos, &iudma->bep->queue) - i++; - seq_printf(s, "; %d queued\n", i); - } else { + if (iudma->bep) + seq_printf(s, "; %zu queued\n", list_count(&iudma->bep->queue)); + else seq_printf(s, "\n"); - } for (i = 0; i < iudma->n_bds; i++) { struct bcm_enet_desc *d = &iudma->bd_ring[i]; From b47ec9727f47d1dce4e8cbc9aef01c80b2332535 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Nov 2022 16:49:01 +0200 Subject: [PATCH 2881/4122] xhci: Convert to use list_count() The list API now provides the list_count() to help with counting existing nodes in the list. Utilise it. Acked-by: Mathias Nyman Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221123144901.40493-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ad81e9a508b1..817c31e3b0c8 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2532,7 +2532,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, union xhci_trb *ep_trb; int status = -EINPROGRESS; struct xhci_ep_ctx *ep_ctx; - struct list_head *tmp; u32 trb_comp_code; int td_num = 0; bool handling_skipped_tds = false; @@ -2580,10 +2579,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, } /* Count current td numbers if ep->skip is set */ - if (ep->skip) { - list_for_each(tmp, &ep_ring->td_list) - td_num++; - } + if (ep->skip) + td_num += list_count(&ep_ring->td_list); /* Look for common error cases */ switch (trb_comp_code) { From fb12940f51d96ead10f9c0fd578e69b8de10ca81 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 28 Nov 2022 18:35:39 +0100 Subject: [PATCH 2882/4122] driver core: fix up some missing class.devnode() conversions. In commit ff62b8e6588f ("driver core: make struct class.devnode() take a const *") the ->devnode callback changed the pointer to be const, but a few instances of PowerPC drivers were not caught for some reason. Fix this up by changing the pointers to be const. Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *") Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Frederic Barrat Cc: Andrew Donnellan Cc: Arnd Bergmann Cc: linuxppc-dev@lists.ozlabs.org Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20221128173539.3112234-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/book3s/vas-api.c | 2 +- drivers/misc/cxl/file.c | 2 +- drivers/misc/ocxl/file.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 40f5ae5e1238..eb5bed333750 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -53,7 +53,7 @@ struct coproc_instance { struct vas_window *txwin; }; -static char *coproc_devnode(struct device *dev, umode_t *mode) +static char *coproc_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); } diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 3dbdce96fae0..5878329b011a 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -546,7 +546,7 @@ static const struct file_operations afu_master_fops = { }; -static char *cxl_devnode(struct device *dev, umode_t *mode) +static char *cxl_devnode(const struct device *dev, umode_t *mode) { if (cpu_has_feature(CPU_FTR_HVMODE) && CXL_DEVT_IS_CARD(dev->devt)) { diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index d46dba2df5a1..d96be36405a0 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -581,7 +581,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu) device_unregister(&info->dev); } -static char *ocxl_devnode(struct device *dev, umode_t *mode) +static char *ocxl_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev)); } From 2d6c66f5253e7d168a76048d18e1209c52f98a2b Mon Sep 17 00:00:00 2001 From: zhang songyi Date: Tue, 29 Nov 2022 15:54:07 +0800 Subject: [PATCH 2883/4122] RDMA/mlx4: Remove NULL check before dev_{put, hold} The call netdev_{put, hold} of dev_{put, hold} will check NULL, so there is no need to check before using dev_{put, hold}. Fix the following coccicheck warnings: /drivers/infiniband/hw/mlx4/main.c:1311:2-10: WARNING: WARNING NULL check before dev_{put, hold} functions is not needed. /drivers/infiniband/hw/mlx4/main.c:148:2-10: WARNING: WARNING NULL check before dev_{put, hold} functions is not needed. /drivers/infiniband/hw/mlx4/main.c:1959:3-11: WARNING: WARNING NULL check before dev_{put, hold} functions is not needed. /drivers/infiniband/hw/mlx4/main.c:1962:3-10: WARNING: WARNING NULL check before dev_{put, hold} functions is not needed. Signed-off-by: zhang songyi Link: https://lore.kernel.org/r/202211291554079687539@zte.com.cn Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/main.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ba47874f90d3..dceebcd885bb 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -144,8 +144,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, } } } - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; @@ -1307,8 +1306,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, spin_lock_bh(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; - if (ndev) - dev_hold(ndev); + dev_hold(ndev); spin_unlock_bh(&mdev->iboe.lock); if (ndev) { @@ -1955,11 +1953,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (ge) { spin_lock_bh(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; - if (ndev) - dev_hold(ndev); + dev_hold(ndev); spin_unlock_bh(&mdev->iboe.lock); - if (ndev) - dev_put(ndev); + dev_put(ndev); list_del(&ge->list); kfree(ge); } else From b0284cd29a957e62d60c2886fd663be93c56f9c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:34 -0700 Subject: [PATCH 2884/4122] mm: Do not enable PG_arch_2 for all 64-bit architectures Commit 4beba9486abd ("mm: Add PG_arch_2 page flag") introduced a new page flag for all 64-bit architectures. However, even if an architecture is 64-bit, it may still have limited spare bits in the 'flags' member of 'struct page'. This may happen if an architecture enables SPARSEMEM without SPARSEMEM_VMEMMAP as is the case with the newly added loongarch. This architecture port needs 19 more bits for the sparsemem section information and, while it is currently fine with PG_arch_2, adding any more PG_arch_* flags will trigger build-time warnings. Add a new CONFIG_ARCH_USES_PG_ARCH_X option which can be selected by architectures that need more PG_arch_* flags beyond PG_arch_1. Select it on arm64. Signed-off-by: Catalin Marinas [pcc@google.com: fix build with CONFIG_ARM64_MTE disabled] Signed-off-by: Peter Collingbourne Reported-by: kernel test robot Cc: Andrew Morton Cc: Steven Price Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-2-pcc@google.com --- arch/arm64/Kconfig | 1 + fs/proc/page.c | 2 +- include/linux/page-flags.h | 2 +- include/trace/events/mmflags.h | 8 ++++---- mm/Kconfig | 8 ++++++++ mm/huge_memory.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 505c8a1ccbe0..cd93d0738425 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1965,6 +1965,7 @@ config ARM64_MTE depends on ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_USES_PG_ARCH_X help Memory Tagging (part of the ARMv8.5 Extensions) provides architectural support for run-time, always-on detection of diff --git a/fs/proc/page.c b/fs/proc/page.c index f2273b164535..882525c8e94c 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -219,7 +219,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0b0ae5084e60..5dc7977edf9d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, #endif #ifdef CONFIG_KASAN_HW_TAGS diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index e87cb2b80ed3..d9f6d35fb150 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -91,10 +91,10 @@ #define IF_HAVE_PG_IDLE(flag,string) #endif -#ifdef CONFIG_64BIT -#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string} +#ifdef CONFIG_ARCH_USES_PG_ARCH_X +#define IF_HAVE_PG_ARCH_X(flag,string) ,{1UL << flag, string} #else -#define IF_HAVE_PG_ARCH_2(flag,string) +#define IF_HAVE_PG_ARCH_X(flag,string) #endif #ifdef CONFIG_KASAN_HW_TAGS @@ -130,7 +130,7 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ -IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ +IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") #define show_page_flags(flags) \ diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b505..807bd7192f51 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1005,6 +1005,14 @@ config ARCH_USES_HIGH_VMA_FLAGS config ARCH_HAS_PKEYS bool +config ARCH_USES_PG_ARCH_X + bool + help + Enable the definition of PG_arch_x page flags with x > 1. Only + suitable for 64-bit architectures with CONFIG_FLATMEM or + CONFIG_SPARSEMEM_VMEMMAP enabled, otherwise there may not be + enough room for additional bits in page->flags. + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EXPERT diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 561a42567477..76e7b973919c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2444,7 +2444,7 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_workingset) | (1L << PG_locked) | (1L << PG_unevictable) | -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X (1L << PG_arch_2) | #endif (1L << PG_dirty) | From e059853d14ca4ed0f6a190d7109487918a22a976 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:35 -0700 Subject: [PATCH 2885/4122] arm64: mte: Fix/clarify the PG_mte_tagged semantics Currently the PG_mte_tagged page flag mostly means the page contains valid tags and it should be set after the tags have been cleared or restored. However, in mte_sync_tags() it is set before setting the tags to avoid, in theory, a race with concurrent mprotect(PROT_MTE) for shared pages. However, a concurrent mprotect(PROT_MTE) with a copy on write in another thread can cause the new page to have stale tags. Similarly, tag reading via ptrace() can read stale tags if the PG_mte_tagged flag is set before actually clearing/restoring the tags. Fix the PG_mte_tagged semantics so that it is only set after the tags have been cleared or restored. This is safe for swap restoring into a MAP_SHARED or CoW page since the core code takes the page lock. Add two functions to test and set the PG_mte_tagged flag with acquire and release semantics. The downside is that concurrent mprotect(PROT_MTE) on a MAP_SHARED page may cause tag loss. This is already the case for KVM guests if a VMM changes the page protection while the guest triggers a user_mem_abort(). Signed-off-by: Catalin Marinas [pcc@google.com: fix build with CONFIG_ARM64_MTE disabled] Signed-off-by: Peter Collingbourne Reviewed-by: Cornelia Huck Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-3-pcc@google.com --- arch/arm64/include/asm/mte.h | 30 ++++++++++++++++++++++++++++++ arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 +++- arch/arm64/kernel/elfcore.c | 2 +- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/kernel/mte.c | 17 +++++++++++------ arch/arm64/kvm/guest.c | 4 ++-- arch/arm64/kvm/mmu.c | 4 ++-- arch/arm64/mm/copypage.c | 5 +++-- arch/arm64/mm/fault.c | 2 +- arch/arm64/mm/mteswap.c | 2 +- 11 files changed, 56 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 760c62f8e22f..3f8199ba265a 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,6 +37,29 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +static inline void set_page_mte_tagged(struct page *page) +{ + /* + * Ensure that the tags written prior to this function are visible + * before the page flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &page->flags); +} + +static inline bool page_mte_tagged(struct page *page) +{ + bool ret = test_bit(PG_mte_tagged, &page->flags); + + /* + * If the page is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); @@ -56,6 +79,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void set_page_mte_tagged(struct page *page) +{ +} +static inline bool page_mte_tagged(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 71a1af42f0e8..98b638441521 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1050,7 +1050,7 @@ static inline void arch_swap_invalidate_area(int type) static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_bit(PG_mte_tagged, &folio->flags); + set_page_mte_tagged(&folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6062454a9067..df11cfe61fcb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2050,8 +2050,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags)) + if (!page_mte_tagged(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); + set_page_mte_tagged(ZERO_PAGE(0)); + } kasan_init_hw_tags_cpu(); } diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 27ef7ad3ffd2..353009d7f307 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, * Pages mapped in user space as !pte_access_permitted() (e.g. * PROT_EXEC only) may not have the PG_mte_tagged flag set. */ - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!page_mte_tagged(page)) { put_page(page); dump_skip(cprm, MTE_PAGE_TAG_STORAGE); continue; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index af5df48ba915..788597a6b6a2 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void) if (!page) continue; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) continue; ret = save_tags(page, pfn); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 7467217c1eaf..84a085d536f8 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -41,8 +41,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { + set_page_mte_tagged(page); return; + } } if (!pte_is_tagged) @@ -52,8 +54,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, * Test PG_mte_tagged again in case it was racing with another * set_pte_at(). */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) { mte_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } } void mte_sync_tags(pte_t old_pte, pte_t pte) @@ -69,9 +73,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) { mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); + set_page_mte_tagged(page); + } } /* ensure the tags are visible before the PTE is set */ @@ -96,8 +102,7 @@ int memcmp_pages(struct page *page1, struct page *page2) * pages is tagged, set_pte_at() may zero or change the tags of the * other page via mte_sync_tags(). */ - if (test_bit(PG_mte_tagged, &page1->flags) || - test_bit(PG_mte_tagged, &page2->flags)) + if (page_mte_tagged(page1) || page_mte_tagged(page2)) return addr1 != addr2; return ret; @@ -454,7 +459,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, put_page(page); break; } - WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags)); + WARN_ON_ONCE(!page_mte_tagged(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2ff13a3f8479..817fdd1ab778 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, maddr = page_address(page); if (!write) { - if (test_bit(PG_mte_tagged, &page->flags)) + if (page_mte_tagged(page)) num_tags = mte_copy_tags_to_user(tags, maddr, MTE_GRANULES_PER_PAGE); else @@ -1076,7 +1076,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, * completed fully */ if (num_tags == MTE_GRANULES_PER_PAGE) - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..2c3759f1f2c5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1110,9 +1110,9 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return -EFAULT; for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!page_mte_tagged(page)) { mte_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 24913271e898..731d8a35701e 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -21,9 +21,10 @@ void copy_highpage(struct page *to, struct page *from) copy_page(kto, kfrom); - if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { - set_bit(PG_mte_tagged, &to->flags); + if (system_supports_mte() && page_mte_tagged(from)) { + page_kasan_tag_reset(to); mte_copy_page_tags(kto, kfrom); + set_page_mte_tagged(to); } } EXPORT_SYMBOL(copy_highpage); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045..629e886ceec4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -934,5 +934,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index bed803d8e158..70f913205db9 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -24,7 +24,7 @@ int mte_save_tags(struct page *page) { void *tag_storage, *ret; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) return 0; tag_storage = mte_allocate_tag_storage(); From 2dbf12ae132cc78048615cfa19c9be64baaf0ced Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:36 -0700 Subject: [PATCH 2886/4122] KVM: arm64: Simplify the sanitise_mte_tags() logic Currently sanitise_mte_tags() checks if it's an online page before attempting to sanitise the tags. Such detection should be done in the caller via the VM_MTE_ALLOWED vma flag. Since kvm_set_spte_gfn() does not have the vma, leave the page unmapped if not already tagged. Tag initialisation will be done on a subsequent access fault in user_mem_abort(). Signed-off-by: Catalin Marinas [pcc@google.com: fix the page initializer] Signed-off-by: Peter Collingbourne Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-4-pcc@google.com --- arch/arm64/kvm/mmu.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2c3759f1f2c5..e81bfb730629 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1091,23 +1091,14 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) * - mmap_lock protects between a VM faulting a page in and the VMM performing * an mprotect() to add VM_MTE */ -static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, - unsigned long size) +static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) { unsigned long i, nr_pages = size >> PAGE_SHIFT; - struct page *page; + struct page *page = pfn_to_page(pfn); if (!kvm_has_mte(kvm)) - return 0; - - /* - * pfn_to_online_page() is used to reject ZONE_DEVICE pages - * that may not support tags. - */ - page = pfn_to_online_page(pfn); - - if (!page) - return -EFAULT; + return; for (i = 0; i < nr_pages; i++, page++) { if (!page_mte_tagged(page)) { @@ -1115,8 +1106,6 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, set_page_mte_tagged(page); } } - - return 0; } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, @@ -1127,7 +1116,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; - bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -1177,8 +1165,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_SHARED); - switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -1299,12 +1285,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new VM_SHARED VMA */ - if (!shared) - ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); - else + if ((vma->vm_flags & VM_MTE_ALLOWED) && + !(vma->vm_flags & VM_SHARED)) { + sanitise_mte_tags(kvm, pfn, vma_pagesize); + } else { ret = -EFAULT; - if (ret) goto out_unlock; + } } if (writable) @@ -1526,15 +1513,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); - int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); - ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); - if (ret) + /* + * If the page isn't tagged, defer to user_mem_abort() for sanitising + * the MTE tags. The S2 pte should have been unmapped by + * mmu_notifier_invalidate_range_end(). + */ + if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn))) return false; /* From ef6458b1b6ca3fdb991ce4182e981a88d4c58c0f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:37 -0700 Subject: [PATCH 2887/4122] mm: Add PG_arch_3 page flag As with PG_arch_2, this flag is only allowed on 64-bit architectures due to the shortage of bits available. It will be used by the arm64 MTE code in subsequent patches. Signed-off-by: Peter Collingbourne Cc: Will Deacon Cc: Marc Zyngier Cc: Steven Price [catalin.marinas@arm.com: added flag preserving in __split_huge_page_tail()] Signed-off-by: Catalin Marinas Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-5-pcc@google.com --- fs/proc/page.c | 1 + include/linux/kernel-page-flags.h | 1 + include/linux/page-flags.h | 1 + include/trace/events/mmflags.h | 1 + mm/huge_memory.c | 1 + 5 files changed, 5 insertions(+) diff --git a/fs/proc/page.c b/fs/proc/page.c index 882525c8e94c..6249c347809a 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -221,6 +221,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); #ifdef CONFIG_ARCH_USES_PG_ARCH_X u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); + u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); #endif return u; diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index eee1877a354e..859f4b0c1b2b 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -18,5 +18,6 @@ #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 +#define KPF_ARCH_3 42 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5dc7977edf9d..c50ce2812f17 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -134,6 +134,7 @@ enum pageflags { #endif #ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, + PG_arch_3, #endif #ifdef CONFIG_KASAN_HW_TAGS PG_skip_kasan_poison, diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d9f6d35fb150..412b5a46374c 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -131,6 +131,7 @@ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \ +IF_HAVE_PG_ARCH_X(PG_arch_3, "arch_3" ) \ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") #define show_page_flags(flags) \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 76e7b973919c..dfe72ea23c5f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2446,6 +2446,7 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | #ifdef CONFIG_ARCH_USES_PG_ARCH_X (1L << PG_arch_2) | + (1L << PG_arch_3) | #endif (1L << PG_dirty) | LRU_GEN_MASK | LRU_REFS_MASK)); From d77e59a8fccde7fb5dd8c57594ed147b4291c970 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:38 -0700 Subject: [PATCH 2888/4122] arm64: mte: Lock a page for MTE tag initialisation Initialising the tags and setting PG_mte_tagged flag for a page can race between multiple set_pte_at() on shared pages or setting the stage 2 pte via user_mem_abort(). Introduce a new PG_mte_lock flag as PG_arch_3 and set it before attempting page initialisation. Given that PG_mte_tagged is never cleared for a page, consider setting this flag to mean page unlocked and wait on this bit with acquire semantics if the page is locked: - try_page_mte_tagging() - lock the page for tagging, return true if it can be tagged, false if already tagged. No acquire semantics if it returns true (PG_mte_tagged not set) as there is no serialisation with a previous set_page_mte_tagged(). - set_page_mte_tagged() - set PG_mte_tagged with release semantics. The two-bit locking is based on Peter Collingbourne's idea. Signed-off-by: Catalin Marinas Signed-off-by: Peter Collingbourne Reviewed-by: Steven Price Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Collingbourne Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-6-pcc@google.com --- arch/arm64/include/asm/mte.h | 35 +++++++++++++++++++++++++++++++- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/mte.c | 12 +++-------- arch/arm64/kvm/guest.c | 16 +++++++++------ arch/arm64/kvm/mmu.c | 2 +- arch/arm64/mm/copypage.c | 2 ++ arch/arm64/mm/fault.c | 2 ++ arch/arm64/mm/mteswap.c | 14 +++++-------- 9 files changed, 60 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3f8199ba265a..20dd06d70af5 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from, unsigned long n); int mte_save_tags(struct page *page); void mte_save_page_tags(const void *page_addr, void *tag_storage); -bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_tags(swp_entry_t entry, struct page *page); void mte_restore_page_tags(void *page_addr, const void *tag_storage); void mte_invalidate_tags(int type, pgoff_t offset); void mte_invalidate_tags_area(int type); @@ -36,6 +36,8 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +/* simple lock to avoid multiple threads tagging the same page */ +#define PG_mte_lock PG_arch_3 static inline void set_page_mte_tagged(struct page *page) { @@ -60,6 +62,33 @@ static inline bool page_mte_tagged(struct page *page) return ret; } +/* + * Lock the page for tagging and return 'true' if the page can be tagged, + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the + * locking only happens once for page initialisation. + * + * The page MTE lock state: + * + * Locked: PG_mte_lock && !PG_mte_tagged + * Unlocked: !PG_mte_lock || PG_mte_tagged + * + * Acquire semantics only if the page is tagged (returning 'false'). + */ +static inline bool try_page_mte_tagging(struct page *page) +{ + if (!test_and_set_bit(PG_mte_lock, &page->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} + void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); @@ -86,6 +115,10 @@ static inline bool page_mte_tagged(struct page *page) { return false; } +static inline bool try_page_mte_tagging(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 98b638441521..8735ac1a1e32 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type) #define __HAVE_ARCH_SWAP_RESTORE static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_page_mte_tagged(&folio->page); + if (system_supports_mte()) + mte_restore_tags(entry, &folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index df11cfe61fcb..afb4ffd745c3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2050,7 +2050,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!page_mte_tagged(ZERO_PAGE(0))) { + if (try_page_mte_tagging(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); set_page_mte_tagged(ZERO_PAGE(0)); } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 84a085d536f8..f5bcb0dc6267 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -41,20 +41,14 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { - set_page_mte_tagged(page); - return; - } + if (!non_swap_entry(entry)) + mte_restore_tags(entry, page); } if (!pte_is_tagged) return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!page_mte_tagged(page)) { + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 817fdd1ab778..5626ddb540ce 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, clear_user(tags, MTE_GRANULES_PER_PAGE); kvm_release_pfn_clean(pfn); } else { + /* + * Only locking to serialise with a concurrent + * set_pte_at() in the VMM but still overriding the + * tags, hence ignoring the return value. + */ + try_page_mte_tagging(page); num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); - /* - * Set the flag after checking the write - * completed fully - */ - if (num_tags == MTE_GRANULES_PER_PAGE) - set_page_mte_tagged(page); + /* uaccess failed, don't leave stale tags */ + if (num_tags != MTE_GRANULES_PER_PAGE) + mte_clear_page_tags(page); + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e81bfb730629..fa2c85b93149 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1101,7 +1101,7 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return; for (i = 0; i < nr_pages; i++, page++) { - if (!page_mte_tagged(page)) { + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 731d8a35701e..8dd5a8fe64b4 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,6 +23,8 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && page_mte_tagged(from)) { page_kasan_tag_reset(to); + /* It's a new page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(to)); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 629e886ceec4..b8b299d1736a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -933,6 +933,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { + /* Newly allocated page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(page)); mte_zero_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 70f913205db9..cd508ba80ab1 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -46,21 +46,17 @@ int mte_save_tags(struct page *page) return 0; } -bool mte_restore_tags(swp_entry_t entry, struct page *page) +void mte_restore_tags(swp_entry_t entry, struct page *page) { void *tags = xa_load(&mte_pages, entry.val); if (!tags) - return false; + return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (try_page_mte_tagging(page)) { mte_restore_page_tags(page_address(page), tags); - - return true; + set_page_mte_tagged(page); + } } void mte_invalidate_tags(int type, pgoff_t offset) From d89585fbb30869011b326ef26c94c3137d228df9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:39 -0700 Subject: [PATCH 2889/4122] KVM: arm64: unify the tests for VMAs in memslots when MTE is enabled Previously we allowed creating a memslot containing a private mapping that was not VM_MTE_ALLOWED, but would later reject KVM_RUN with -EFAULT. Now we reject the memory region at memslot creation time. Since this is a minor tweak to the ABI (a VMM that created one of these memslots would fail later anyway), no VMM to my knowledge has MTE support yet, and the hardware with the necessary features is not generally available, we can probably make this ABI change at this point. Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Reviewed-by: Steven Price Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-7-pcc@google.com --- arch/arm64/kvm/mmu.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index fa2c85b93149..9ff9a271cf01 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1108,6 +1108,19 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, } } +static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) +{ + /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (vma->vm_flags & VM_SHARED) + return false; + + return vma->vm_flags & VM_MTE_ALLOWED; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1284,9 +1297,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { - /* Check the VMM hasn't introduced a new VM_SHARED VMA */ - if ((vma->vm_flags & VM_MTE_ALLOWED) && - !(vma->vm_flags & VM_SHARED)) { + /* Check the VMM hasn't introduced a new disallowed VMA */ + if (kvm_vma_mte_allowed(vma)) { sanitise_mte_tags(kvm, pfn, vma_pagesize); } else { ret = -EFAULT; @@ -1730,12 +1742,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; - /* - * VM_SHARED mappings are not allowed with MTE to avoid races - * when updating the PG_mte_tagged page flag, see - * sanitise_mte_tags for more details. - */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) { ret = -EINVAL; break; } From c911f0d4687947915f04024aa01803247fcf7f1a Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:40 -0700 Subject: [PATCH 2890/4122] KVM: arm64: permit all VM_MTE_ALLOWED mappings with MTE enabled Certain VMMs such as crosvm have features (e.g. sandboxing) that depend on being able to map guest memory as MAP_SHARED. The current restriction on sharing MAP_SHARED pages with the guest is preventing the use of those features with MTE. Now that the races between tasks concurrently clearing tags on the same page have been fixed, remove this restriction. Note that this is a relaxation of the ABI. Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Reviewed-by: Steven Price Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-8-pcc@google.com --- arch/arm64/kvm/mmu.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 9ff9a271cf01..b9402d8b5a90 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1110,14 +1110,6 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) { - /* - * VM_SHARED mappings are not allowed with MTE to avoid races - * when updating the PG_mte_tagged page flag, see - * sanitise_mte_tags for more details. - */ - if (vma->vm_flags & VM_SHARED) - return false; - return vma->vm_flags & VM_MTE_ALLOWED; } From a4baf8d2639f24d4d31983ff67c01878e7a5393f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:41 -0700 Subject: [PATCH 2891/4122] Documentation: document the ABI changes for KVM_CAP_ARM_MTE Document both the restriction on VM_MTE_ALLOWED mappings and the relaxation for shared mappings. Signed-off-by: Peter Collingbourne Acked-by: Catalin Marinas Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-9-pcc@google.com --- Documentation/virt/kvm/api.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..b55f80dadcfe 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7385,8 +7385,9 @@ hibernation of the host; however the VMM needs to manually save/restore the tags as appropriate if the VM is migrated. When this capability is enabled all memory in memslots must be mapped as -not-shareable (no MAP_SHARED), attempts to create a memslot with a -MAP_SHARED mmap will result in an -EINVAL return. +``MAP_ANONYMOUS`` or with a RAM-based file mapping (``tmpfs``, ``memfd``), +attempts to create a memslot with an invalid mmap will result in an +-EINVAL return. When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to perform a bulk copy of tags to/from the guest. From 62c73bfea048e66168df09da6d3e4510ecda40bb Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 28 Nov 2022 17:15:26 +0100 Subject: [PATCH 2892/4122] usb: dwc3: Fix race between dwc3_set_mode and __dwc3_set_mode dwc->desired_dr_role is changed by dwc3_set_mode inside a spinlock but then read by __dwc3_set_mode outside of that lock. This can lead to a race condition when very quick successive role switch events happen: CPU A dwc3_set_mode(DWC3_GCTL_PRTCAP_HOST) // first role switch event spin_lock_irqsave(&dwc->lock, flags); dwc->desired_dr_role = mode; // DWC3_GCTL_PRTCAP_HOST spin_unlock_irqrestore(&dwc->lock, flags); queue_work(system_freezable_wq, &dwc->drd_work); CPU B __dwc3_set_mode // .... spin_lock_irqsave(&dwc->lock, flags); // desired_dr_role is DWC3_GCTL_PRTCAP_HOST dwc3_set_prtcap(dwc, dwc->desired_dr_role); spin_unlock_irqrestore(&dwc->lock, flags); CPU A dwc3_set_mode(DWC3_GCTL_PRTCAP_DEVICE) // second event spin_lock_irqsave(&dwc->lock, flags); dwc->desired_dr_role = mode; // DWC3_GCTL_PRTCAP_DEVICE spin_unlock_irqrestore(&dwc->lock, flags); CPU B (continues running __dwc3_set_mode) switch (dwc->desired_dr_role) { // DWC3_GCTL_PRTCAP_DEVICE // .... case DWC3_GCTL_PRTCAP_DEVICE: // .... ret = dwc3_gadget_init(dwc); We then have DWC3_GCTL.DWC3_GCTL_PRTCAPDIR = DWC3_GCTL_PRTCAP_HOST and dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST but initialized the controller in device mode. It's also possible to get into a state where both host and device are intialized at the same time. Fix this race by creating a local copy of desired_dr_role inside __dwc3_set_mode while holding dwc->lock. Fixes: 41ce1456e1db ("usb: dwc3: core: make dwc3_set_mode() work properly") Cc: stable Acked-by: Thinh Nguyen Signed-off-by: Sven Peter Link: https://lore.kernel.org/r/20221128161526.79730-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 1f348bc867c2..fc38a8b13efa 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -122,21 +122,25 @@ static void __dwc3_set_mode(struct work_struct *work) unsigned long flags; int ret; u32 reg; + u32 desired_dr_role; mutex_lock(&dwc->mutex); + spin_lock_irqsave(&dwc->lock, flags); + desired_dr_role = dwc->desired_dr_role; + spin_unlock_irqrestore(&dwc->lock, flags); pm_runtime_get_sync(dwc->dev); if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_OTG) dwc3_otg_update(dwc, 0); - if (!dwc->desired_dr_role) + if (!desired_dr_role) goto out; - if (dwc->desired_dr_role == dwc->current_dr_role) + if (desired_dr_role == dwc->current_dr_role) goto out; - if (dwc->desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev) + if (desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev) goto out; switch (dwc->current_dr_role) { @@ -164,7 +168,7 @@ static void __dwc3_set_mode(struct work_struct *work) */ if (dwc->current_dr_role && ((DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC31, 190A)) && - dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) { + desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) { reg = dwc3_readl(dwc->regs, DWC3_GCTL); reg |= DWC3_GCTL_CORESOFTRESET; dwc3_writel(dwc->regs, DWC3_GCTL, reg); @@ -184,11 +188,11 @@ static void __dwc3_set_mode(struct work_struct *work) spin_lock_irqsave(&dwc->lock, flags); - dwc3_set_prtcap(dwc, dwc->desired_dr_role); + dwc3_set_prtcap(dwc, desired_dr_role); spin_unlock_irqrestore(&dwc->lock, flags); - switch (dwc->desired_dr_role) { + switch (desired_dr_role) { case DWC3_GCTL_PRTCAP_HOST: ret = dwc3_host_init(dwc); if (ret) { From 3b7c7478eda00945987d45f902bc3942c89243d3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Nov 2022 21:00:54 +0200 Subject: [PATCH 2893/4122] gpiolib: Provide to_gpio_device() helper Provide to_gpio_device() helper which can be utilized in the existing and future code. While at it, make sure it becomes no-op at compilation time. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- drivers/gpio/gpiolib.h | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 2729f7ebab9d..0058ee83989d 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -549,7 +549,7 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiodevice_release(struct device *dev) { - struct gpio_device *gdev = container_of(dev, struct gpio_device, dev); + struct gpio_device *gdev = to_gpio_device(dev); unsigned long flags; spin_lock_irqsave(&gpio_lock, flags); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index d900ecdbac46..e443c1023a37 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -20,9 +20,9 @@ /** * struct gpio_device - internal state container for GPIO devices - * @id: numerical ID number for the GPIO chip * @dev: the GPIO device struct * @chrdev: character device for the GPIO device + * @id: numerical ID number for the GPIO chip * @mockdev: class device used by the deprecated sysfs interface (may be * NULL) * @owner: helps prevent removal of modules exporting active GPIOs @@ -47,9 +47,9 @@ * userspace. */ struct gpio_device { - int id; struct device dev; struct cdev chrdev; + int id; struct device *mockdev; struct module *owner; struct gpio_chip *chip; @@ -72,6 +72,11 @@ struct gpio_device { #endif }; +static inline struct gpio_device *to_gpio_device(struct device *dev) +{ + return container_of(dev, struct gpio_device, dev); +} + /* gpio suffixes used for ACPI and device tree lookup */ static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" }; From 9ec1eb1bcceec735fb3c9255cdcdbcc2acf860a0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 21:15:02 +0000 Subject: [PATCH 2894/4122] KVM: selftests: Have perf_test_util signal when to stop vCPUs Signal that a test run is complete through perf_test_args instead of having tests open code a similar solution. Ensure that the field resets to false at the beginning of a test run as the structure is reused between test runs, eliminating a couple of bugs: access_tracking_perf_test hangs indefinitely on a subsequent test run, as 'done' remains true. The bug doesn't amount to much right now, as x86 supports a single guest mode. However, this is a precondition of enabling the test for other architectures with >1 guest mode, like arm64. memslot_modification_stress_test has the exact opposite problem, where subsequent test runs complete immediately as 'run_vcpus' remains false. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson [oliver: added commit message, preserve spin_wait_for_next_iteration()] Signed-off-by: Oliver Upton Reviewed-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118211503.4049023-2-oliver.upton@linux.dev --- tools/testing/selftests/kvm/access_tracking_perf_test.c | 8 +------- tools/testing/selftests/kvm/include/perf_test_util.h | 3 +++ tools/testing/selftests/kvm/lib/perf_test_util.c | 3 +++ .../selftests/kvm/memslot_modification_stress_test.c | 6 +----- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 76c583a07ea2..942370d57392 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -58,9 +58,6 @@ static enum { ITERATION_MARK_IDLE, } iteration_work; -/* Set to true when vCPU threads should exit. */ -static bool done; - /* The iteration that was last completed by each vCPU. */ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; @@ -211,7 +208,7 @@ static bool spin_wait_for_next_iteration(int *current_iteration) int last_iteration = *current_iteration; do { - if (READ_ONCE(done)) + if (READ_ONCE(perf_test_args.stop_vcpus)) return false; *current_iteration = READ_ONCE(iteration); @@ -321,9 +318,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, nr_vcpus); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory"); - /* Set done to signal the vCPU threads to exit */ - done = true; - perf_test_join_vcpu_threads(nr_vcpus); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index eaa88df0555a..536d7c3c3f14 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -40,6 +40,9 @@ struct perf_test_args { /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; + /* Test is done, stop running vCPUs. */ + bool stop_vcpus; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 9618b37c66f7..ee3f499ccbd2 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -267,6 +267,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus, vcpu_thread_fn = vcpu_fn; WRITE_ONCE(all_vcpu_threads_running, false); + WRITE_ONCE(perf_test_args.stop_vcpus, false); for (i = 0; i < nr_vcpus; i++) { struct vcpu_thread *vcpu = &vcpu_threads[i]; @@ -289,6 +290,8 @@ void perf_test_join_vcpu_threads(int nr_vcpus) { int i; + WRITE_ONCE(perf_test_args.stop_vcpus, true); + for (i = 0; i < nr_vcpus; i++) pthread_join(vcpu_threads[i].thread, NULL); } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index bb1d17a1171b..3a5e4518307c 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -34,8 +34,6 @@ static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; -static bool run_vcpus = true; - static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; @@ -45,7 +43,7 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) run = vcpu->run; /* Let the guest access its memory until a stop signal is received */ - while (READ_ONCE(run_vcpus)) { + while (!READ_ONCE(perf_test_args.stop_vcpus)) { ret = _vcpu_run(vcpu); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); @@ -110,8 +108,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) add_remove_memslot(vm, p->memslot_modification_delay, p->nr_memslot_modifications); - run_vcpus = false; - perf_test_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); From 4568180411e0fb5613e217da1c693466e39b9c27 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 18 Nov 2022 21:15:03 +0000 Subject: [PATCH 2895/4122] KVM: selftests: Build access_tracking_perf_test for arm64 Does exactly what it says on the tin. Reviewed-by: Gavin Shan Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118211503.4049023-3-oliver.upton@linux.dev --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..4c0ff91a8964 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -156,6 +156,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test From 41555cc9e2e9778ddc7c0293a4a2e4995e332643 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 21 Oct 2022 17:00:30 +0100 Subject: [PATCH 2896/4122] RISC-V: enable sparsemem by default for defconfig on an arch level, RISC-V defaults to FLATMEM. On PolarFire SoC, the memory layout is almost always sparse, with a maximum of 1 GiB at 0x8000_0000 & a possible 16 GiB range at 0x10_0000_0000. The Icicle kit, for example, has 2 GiB of DDR - so there's a big hole in the memory map between the two gigs. Prior to v6.1-rc1, boot times from defconfig builds were pretty bad on Icicle but enabling sparsemem would fix those issues. As of v6.1-rc1, the Icicle kit no longer boots from defconfig builds with the in-kernel devicetree. A change to the memory map resulted in a futher "sparse-ification", producing a splat on boot: OF: fdt: Ignoring memory range 0x80000000 - 0x80200000 Machine model: Microchip PolarFire-SoC Icicle Kit earlycon: ns16550a0 at MMIO32 0x0000000020100000 (options '115200n8') printk: bootconsole [ns16550a0] enabled printk: debug: skip boot console de-registration. efi: UEFI not found. Zone ranges: DMA32 [mem 0x0000000080200000-0x00000000ffffffff] Normal [mem 0x0000000100000000-0x000000107fffffff] Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000080200000-0x00000000bfbfffff] node 0: [mem 0x00000000bfc00000-0x00000000bfffffff] node 0: [mem 0x0000001040000000-0x000000107fffffff] Initmem setup node 0 [mem 0x0000000080200000-0x000000107fffffff] Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-dirty #1 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) Call Trace: [] show_stack+0x30/0x3c [] dump_stack_lvl+0x4a/0x66 [] dump_stack+0x18/0x20 [] panic+0x124/0x2c6 [] free_area_init_core+0x0/0x11e [] free_area_init_node+0xc2/0xf6 [] free_area_init+0x222/0x260 [] misc_mem_init+0x62/0x9a [] setup_arch+0xb0/0xea [] start_kernel+0x88/0x4ee ---[ end Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map ]--- With the aim of keeping defconfig builds booting on icicle, enable SPARSEMEM_MANUAL. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221021160028.4042304-1-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 05fd5fcf24f9..daba5d743862 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_NET=y CONFIG_PACKET=y From 4989764d8ed3d3d1024e4e831ff2affc40ee01d6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:24 -0400 Subject: [PATCH 2897/4122] iommu: Add IOMMU_CAP_ENFORCE_CACHE_COHERENCY This queries if a domain linked to a device should expect to support enforce_cache_coherency() so iommufd can negotiate the rules for when a domain should be shared or not. For iommufd a device that declares IOMMU_CAP_ENFORCE_CACHE_COHERENCY will not be attached to a domain that does not support it. Link: https://lore.kernel.org/r/1-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/iommu/amd/iommu.c | 2 ++ drivers/iommu/intel/iommu.c | 20 +++++++++++++------- include/linux/iommu.h | 5 +++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 45299eb7e8e3..240c535e317c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2278,6 +2278,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return false; case IOMMU_CAP_PRE_BOOT_PROTECTION: return amdr_ivrs_remap_support; + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return true; default: break; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f298e51d5aa6..157c97274110 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4450,14 +4450,20 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) { - if (cap == IOMMU_CAP_CACHE_COHERENCY) - return true; - if (cap == IOMMU_CAP_INTR_REMAP) - return irq_remapping_enabled == 1; - if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION) - return dmar_platform_optin(); + struct device_domain_info *info = dev_iommu_priv_get(dev); - return false; + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_INTR_REMAP: + return irq_remapping_enabled == 1; + case IOMMU_CAP_PRE_BOOT_PROTECTION: + return dmar_platform_optin(); + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return ecap_sc_support(info->iommu->ecap); + default: + return false; + } } static struct iommu_device *intel_iommu_probe_device(struct device *dev) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 68d7d304cdb7..a09fd32d8cc2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,6 +124,11 @@ enum iommu_cap { IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ + /* + * Per-device flag indicating if enforce_cache_coherency() will work on + * this device. + */ + IOMMU_CAP_ENFORCE_CACHE_COHERENCY, }; /* These are the possible reserved region types */ From 89395ccedbc153fecbc29342fbb94a6dfadf24cd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 29 Nov 2022 16:29:25 -0400 Subject: [PATCH 2898/4122] iommu: Add device-centric DMA ownership interfaces These complement the group interfaces used by VFIO and are for use by iommufd. The main difference is that multiple devices in the same group can all share the ownership by passing the same ownership pointer. Move the common code into shared functions. Link: https://lore.kernel.org/r/2-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu.c | 121 +++++++++++++++++++++++++++++++++--------- include/linux/iommu.h | 12 +++++ 2 files changed, 107 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6ca377f4fbf9..d69ebba81beb 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3108,41 +3108,49 @@ static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) return 0; } +static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) +{ + int ret; + + if ((group->domain && group->domain != group->default_domain) || + !xa_empty(&group->pasid_array)) + return -EBUSY; + + ret = __iommu_group_alloc_blocking_domain(group); + if (ret) + return ret; + ret = __iommu_group_set_domain(group, group->blocking_domain); + if (ret) + return ret; + + group->owner = owner; + group->owner_cnt++; + return 0; +} + /** * iommu_group_claim_dma_owner() - Set DMA ownership of a group * @group: The group. * @owner: Caller specified pointer. Used for exclusive ownership. * - * This is to support backward compatibility for vfio which manages - * the dma ownership in iommu_group level. New invocations on this - * interface should be prohibited. + * This is to support backward compatibility for vfio which manages the dma + * ownership in iommu_group level. New invocations on this interface should be + * prohibited. Only a single owner may exist for a group. */ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) { int ret = 0; + if (WARN_ON(!owner)) + return -EINVAL; + mutex_lock(&group->mutex); if (group->owner_cnt) { ret = -EPERM; goto unlock_out; - } else { - if ((group->domain && group->domain != group->default_domain) || - !xa_empty(&group->pasid_array)) { - ret = -EBUSY; - goto unlock_out; - } - - ret = __iommu_group_alloc_blocking_domain(group); - if (ret) - goto unlock_out; - - ret = __iommu_group_set_domain(group, group->blocking_domain); - if (ret) - goto unlock_out; - group->owner = owner; } - group->owner_cnt++; + ret = __iommu_take_dma_ownership(group, owner); unlock_out: mutex_unlock(&group->mutex); @@ -3151,30 +3159,91 @@ unlock_out: EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); /** - * iommu_group_release_dma_owner() - Release DMA ownership of a group - * @group: The group. + * iommu_device_claim_dma_owner() - Set DMA ownership of a device + * @dev: The device. + * @owner: Caller specified pointer. Used for exclusive ownership. * - * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + * Claim the DMA ownership of a device. Multiple devices in the same group may + * concurrently claim ownership if they present the same owner value. Returns 0 + * on success and error code on failure */ -void iommu_group_release_dma_owner(struct iommu_group *group) +int iommu_device_claim_dma_owner(struct device *dev, void *owner) +{ + struct iommu_group *group = iommu_group_get(dev); + int ret = 0; + + if (!group) + return -ENODEV; + if (WARN_ON(!owner)) + return -EINVAL; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + if (group->owner != owner) { + ret = -EPERM; + goto unlock_out; + } + group->owner_cnt++; + goto unlock_out; + } + + ret = __iommu_take_dma_ownership(group, owner); +unlock_out: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); + +static void __iommu_release_dma_ownership(struct iommu_group *group) { int ret; - mutex_lock(&group->mutex); if (WARN_ON(!group->owner_cnt || !group->owner || !xa_empty(&group->pasid_array))) - goto unlock_out; + return; group->owner_cnt = 0; group->owner = NULL; ret = __iommu_group_set_domain(group, group->default_domain); WARN(ret, "iommu driver failed to attach the default domain"); +} -unlock_out: +/** + * iommu_group_release_dma_owner() - Release DMA ownership of a group + * @dev: The device + * + * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + */ +void iommu_group_release_dma_owner(struct iommu_group *group) +{ + mutex_lock(&group->mutex); + __iommu_release_dma_ownership(group); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); +/** + * iommu_device_release_dma_owner() - Release DMA ownership of a device + * @group: The device. + * + * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). + */ +void iommu_device_release_dma_owner(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + + mutex_lock(&group->mutex); + if (group->owner_cnt > 1) + group->owner_cnt--; + else + __iommu_release_dma_ownership(group); + mutex_unlock(&group->mutex); + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); + /** * iommu_group_dma_owner_claimed() - Query group dma ownership status * @group: The group. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a09fd32d8cc2..1690c334e516 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -707,6 +707,9 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +int iommu_device_claim_dma_owner(struct device *dev, void *owner); +void iommu_device_release_dma_owner(struct device *dev); + struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, @@ -1064,6 +1067,15 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) return false; } +static inline void iommu_device_release_dma_owner(struct device *dev) +{ +} + +static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) +{ + return -ENODEV; +} + static inline struct iommu_domain * iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { From 5fe937862c8426f24cd1dcbf7c22fb1a31069b4f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:26 -0400 Subject: [PATCH 2899/4122] interval-tree: Add a utility to iterate over spans in an interval tree The span iterator travels over the indexes of the interval_tree, not the nodes, and classifies spans of indexes as either 'used' or 'hole'. 'used' spans are fully covered by nodes in the tree and 'hole' spans have no node intersecting the span. This is done greedily such that spans are maximally sized and every iteration step switches between used/hole. As an example a trivial allocator can be written as: for (interval_tree_span_iter_first(&span, itree, 0, ULONG_MAX); !interval_tree_span_iter_done(&span); interval_tree_span_iter_next(&span)) if (span.is_hole && span.last_hole - span.start_hole >= allocation_size - 1) return span.start_hole; With all the tricky boundary conditions handled by the library code. The following iommufd patches have several algorithms for its overlapping node interval trees that are significantly simplified with this kind of iteration primitive. As it seems generally useful, put it into lib/. Link: https://lore.kernel.org/r/3-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- .clang-format | 1 + include/linux/interval_tree.h | 58 +++++++++++++++ lib/Kconfig | 4 ++ lib/interval_tree.c | 132 ++++++++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+) diff --git a/.clang-format b/.clang-format index 1247d54f9e49..96d07786dcfb 100644 --- a/.clang-format +++ b/.clang-format @@ -440,6 +440,7 @@ ForEachMacros: - 'inet_lhash2_for_each_icsk' - 'inet_lhash2_for_each_icsk_continue' - 'inet_lhash2_for_each_icsk_rcu' + - 'interval_tree_for_each_span' - 'intlist__for_each_entry' - 'intlist__for_each_entry_safe' - 'kcore_copy__for_each_phdr' diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 288c26f50732..2b8026a39906 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -27,4 +27,62 @@ extern struct interval_tree_node * interval_tree_iter_next(struct interval_tree_node *node, unsigned long start, unsigned long last); +/** + * struct interval_tree_span_iter - Find used and unused spans. + * @start_hole: Start of an interval for a hole when is_hole == 1 + * @last_hole: Inclusive end of an interval for a hole when is_hole == 1 + * @start_used: Start of a used interval when is_hole == 0 + * @last_used: Inclusive end of a used interval when is_hole == 0 + * @is_hole: 0 == used, 1 == is_hole, -1 == done iteration + * + * This iterator travels over spans in an interval tree. It does not return + * nodes but classifies each span as either a hole, where no nodes intersect, or + * a used, which is fully covered by nodes. Each iteration step toggles between + * hole and used until the entire range is covered. The returned spans always + * fully cover the requested range. + * + * The iterator is greedy, it always returns the largest hole or used possible, + * consolidating all consecutive nodes. + * + * Use interval_tree_span_iter_done() to detect end of iteration. + */ +struct interval_tree_span_iter { + /* private: not for use by the caller */ + struct interval_tree_node *nodes[2]; + unsigned long first_index; + unsigned long last_index; + + /* public: */ + union { + unsigned long start_hole; + unsigned long start_used; + }; + union { + unsigned long last_hole; + unsigned long last_used; + }; + int is_hole; +}; + +void interval_tree_span_iter_first(struct interval_tree_span_iter *state, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index); +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index); +void interval_tree_span_iter_next(struct interval_tree_span_iter *state); + +static inline bool +interval_tree_span_iter_done(struct interval_tree_span_iter *state) +{ + return state->is_hole == -1; +} + +#define interval_tree_for_each_span(span, itree, first_index, last_index) \ + for (interval_tree_span_iter_first(span, itree, \ + first_index, last_index); \ + !interval_tree_span_iter_done(span); \ + interval_tree_span_iter_next(span)) + #endif /* _LINUX_INTERVAL_TREE_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 9bbf8a4b2108..c6c323fd2517 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -479,6 +479,10 @@ config INTERVAL_TREE for more information. +config INTERVAL_TREE_SPAN_ITER + bool + depends on INTERVAL_TREE + config XARRAY_MULTI bool help diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 593ce56ece50..3412737ff365 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert); EXPORT_SYMBOL_GPL(interval_tree_remove); EXPORT_SYMBOL_GPL(interval_tree_iter_first); EXPORT_SYMBOL_GPL(interval_tree_iter_next); + +#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER +/* + * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous + * span of nodes. This makes nodes[0]->last the end of that contiguous used span + * indexes that started at the original nodes[1]->start. nodes[1] is now the + * first node starting the next used span. A hole span is between nodes[0]->last + * and nodes[1]->start. nodes[1] must be !NULL. + */ +static void +interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state) +{ + struct interval_tree_node *cur = state->nodes[1]; + + state->nodes[0] = cur; + do { + if (cur->last > state->nodes[0]->last) + state->nodes[0] = cur; + cur = interval_tree_iter_next(cur, state->first_index, + state->last_index); + } while (cur && (state->nodes[0]->last >= cur->start || + state->nodes[0]->last + 1 == cur->start)); + state->nodes[1] = cur; +} + +void interval_tree_span_iter_first(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index) +{ + iter->first_index = first_index; + iter->last_index = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = + interval_tree_iter_first(itree, first_index, last_index); + if (!iter->nodes[1]) { + /* No nodes intersect the span, whole span is hole */ + iter->start_hole = first_index; + iter->last_hole = last_index; + iter->is_hole = 1; + return; + } + if (iter->nodes[1]->start > first_index) { + /* Leading hole on first iteration */ + iter->start_hole = first_index; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); + return; + } + + /* Starting inside a used */ + iter->start_used = first_index; + iter->is_hole = 0; + interval_tree_span_iter_next_gap(iter); + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= last_index) { + iter->last_used = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_first); + +void interval_tree_span_iter_next(struct interval_tree_span_iter *iter) +{ + if (!iter->nodes[0] && !iter->nodes[1]) { + iter->is_hole = -1; + return; + } + + if (iter->is_hole) { + iter->start_used = iter->last_hole + 1; + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= iter->last_index) { + iter->last_used = iter->last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } + iter->is_hole = 0; + return; + } + + if (!iter->nodes[1]) { + /* Trailing hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->last_index; + iter->nodes[0] = NULL; + iter->is_hole = 1; + return; + } + + /* must have both nodes[0] and [1], interior hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_next); + +/* + * Advance the iterator index to a specific position. The returned used/hole is + * updated to start at new_index. This is faster than calling + * interval_tree_span_iter_first() as it can avoid full searches in several + * cases where the iterator is already set. + */ +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index) +{ + if (iter->is_hole == -1) + return; + + iter->first_index = new_index; + if (new_index > iter->last_index) { + iter->is_hole = -1; + return; + } + + /* Rely on the union aliasing hole/used */ + if (iter->start_hole <= new_index && new_index <= iter->last_hole) { + iter->start_hole = new_index; + return; + } + if (new_index == iter->last_hole + 1) + interval_tree_span_iter_next(iter); + else + interval_tree_span_iter_first(iter, itree, new_index, + iter->last_index); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance); +#endif From 632ce1377dbbdabff575d33bec9c79d75ef0395a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:27 -0400 Subject: [PATCH 2900/4122] scripts/kernel-doc: support EXPORT_SYMBOL_NS_GPL() with -export Parse EXPORT_SYMBOL_NS_GPL() in addition to EXPORT_SYMBOL_GPL() for use with the -export flag. Link: https://lore.kernel.org/r/4-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Acked-by: Jonathan Corbet Signed-off-by: Jason Gunthorpe --- scripts/kernel-doc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index aea04365bc69..48e3feca3170 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -256,6 +256,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; +my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*\w+\)\s*;'; my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; @@ -1948,6 +1949,10 @@ sub process_export_file($) { next if (defined($nosymbol_table{$2})); $function_table{$2} = 1; } + if (/$export_symbol_ns/) { + next if (defined($nosymbol_table{$2})); + $function_table{$2} = 1; + } } close(IN); @@ -2419,12 +2424,12 @@ found on PATH. =item -export Only output documentation for the symbols that have been exported using -EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. +EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. =item -internal Only output documentation for the symbols that have NOT been exported using -EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. +EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. =item -function NAME @@ -2451,8 +2456,7 @@ Do not output DOC: sections. =item -export-file FILE -Specify an additional FILE in which to look for EXPORT_SYMBOL() and -EXPORT_SYMBOL_GPL(). +Specify an additional FILE in which to look for EXPORT_SYMBOL information. To be used with -export or -internal. From d03a6d4e2beaa358b6c4e16fe106e813a57e927a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:15:39 +0100 Subject: [PATCH 2901/4122] USB: serial: cp210x: add support for B0 hangup A request to set the line speed to B0 is used to hang up a modem connection by deasserting the modem control lines. Note that there is no need reconfigure the line speed in hardware when B0 is requested (even if some drivers do set it to an arbitrary value for implementation or protocol reasons). Reviewed-by: Alex Henrie Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f6fb23620e87..67372acc2352 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1049,11 +1049,12 @@ static void cp210x_change_speed(struct tty_struct *tty, struct cp210x_serial_private *priv = usb_get_serial_data(serial); u32 baud; + if (tty->termios.c_ospeed == 0) + return; + /* * This maps the requested rate to the actual rate, a valid rate on * cp2102 or cp2103, or to an arbitrary rate in [1M, max_speed]. - * - * NOTE: B0 is not implemented. */ baud = clamp(tty->termios.c_ospeed, priv->min_speed, priv->max_speed); @@ -1146,7 +1147,8 @@ static void cp210x_set_flow_control(struct tty_struct *tty, tty->termios.c_iflag &= ~(IXON | IXOFF); } - if (old_termios && + if (tty->termios.c_ospeed != 0 && + old_termios && old_termios->c_ospeed != 0 && C_CRTSCTS(tty) == (old_termios->c_cflag & CRTSCTS) && I_IXON(tty) == (old_termios->c_iflag & IXON) && I_IXOFF(tty) == (old_termios->c_iflag & IXOFF) && @@ -1171,6 +1173,14 @@ static void cp210x_set_flow_control(struct tty_struct *tty, mutex_lock(&port_priv->mutex); + if (tty->termios.c_ospeed == 0) { + port_priv->dtr = false; + port_priv->rts = false; + } else if (old_termios && old_termios->c_ospeed == 0) { + port_priv->dtr = true; + port_priv->rts = true; + } + ret = cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl, sizeof(flow_ctl)); if (ret) @@ -1243,7 +1253,8 @@ static void cp210x_set_termios(struct tty_struct *tty, u16 bits; int ret; - if (old_termios && !cp210x_termios_change(&tty->termios, old_termios)) + if (old_termios && !cp210x_termios_change(&tty->termios, old_termios) && + tty->termios.c_ospeed != 0) return; if (!old_termios || tty->termios.c_ospeed != old_termios->c_ospeed) From 67e6272d53386f9708f91c4d0015c4a1c470eef5 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Mon, 28 Nov 2022 13:52:45 +0200 Subject: [PATCH 2902/4122] RDMA/nldev: Add NULL check to silence false warnings Using nlmsg_put causes static analysis tools to many false positives of not checking the return value of nlmsg_put. In all uses in nldev.c, payload parameter is 0 so NULL will never be returned. So let's add useless checks to silence the warnings. Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/bd924da89d5b4f5291a4a01d9b5ae47c0a9b6a3f.1669636336.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 44 +++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index ca0ed7d14326..b4716cda65d8 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1043,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_dev_info(msg, device); if (err) @@ -1128,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, NLM_F_MULTI); - if (fill_dev_info(skb, device)) { + if (!nlh || fill_dev_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1187,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_port_info(msg, device, port, sock_net(skb->sk)); if (err) @@ -1248,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, RDMA_NLDEV_CMD_PORT_GET), 0, NLM_F_MULTI); - if (fill_port_info(skb, device, p, sock_net(skb->sk))) { + if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { nlmsg_cancel(skb, nlh); goto out; } @@ -1290,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_free; + } ret = fill_res_info(msg, device); if (ret) @@ -1321,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); - if (fill_res_info(skb, device)) { + if (!nlh || fill_res_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1456,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); - if (fill_nldev_handle(msg, device)) { + if (!nlh || fill_nldev_handle(msg, device)) { ret = -EMSGSIZE; goto err_free; } @@ -1535,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); - if (fill_nldev_handle(skb, device)) { + if (!nlh || fill_nldev_handle(skb, device)) { ret = -EMSGSIZE; goto err; } @@ -1797,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET_CHARDEV), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto out_nlmsg; + } data.nl_msg = msg; err = ib_get_client_nl_info(ibdev, client_name, &data); @@ -1854,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SYS_GET), 0, 0); + if (!nlh) { + nlmsg_free(msg); + return -EMSGSIZE; + } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, (u8)ib_devices_shared_netns); @@ -2034,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_free_msg; @@ -2103,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_fill; + } cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); @@ -2173,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, RDMA_NLDEV_CMD_STAT_GET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_msg; @@ -2261,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_msg; + } ret = rdma_counter_get_mode(device, port, &mode, &mask); if (ret) @@ -2393,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 0, 0); ret = -EMSGSIZE; - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) goto err_msg; From fc8f93ad3e5485d45c992233c96acd902992dfc4 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 28 Nov 2022 13:52:46 +0200 Subject: [PATCH 2903/4122] RDMA/nldev: Fix failure to send large messages Return "-EMSGSIZE" instead of "-EINVAL" when filling a QP entry, so that new SKBs will be allocated if there's not enough room in current SKB. Fixes: 65959522f806 ("RDMA: Add support to dump resource tracker in RAW format") Signed-off-by: Mark Zhang Reviewed-by: Patrisious Haddad Link: https://lore.kernel.org/r/b5e9c62f6b8369acab5648b661bf539cbceeffdc.1669636336.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index b4716cda65d8..a981ac2f0975 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, /* In create_qp() port is not set yet */ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) - return -EINVAL; + return -EMSGSIZE; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); if (ret) From 6c645b01e536757a9e1a9f72c13767f9b3f8559f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:26 +1000 Subject: [PATCH 2904/4122] KVM: PPC: Book3E: Fix CONFIG_TRACE_IRQFLAGS support 32-bit does not trace_irqs_off() to match the trace_irqs_on() call in kvmppc_fix_ee_before_entry(). This can lead to irqs being enabled twice in the trace, and the irqs-off region between guest exit and the host enabling local irqs again is not properly traced. 64-bit code does call this, but from asm code where volatiles are live and so incorrectly get clobbered. Move the irq reconcile into C to fix both problems. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-2-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_ppc.h | 12 ++++++++++++ arch/powerpc/kvm/booke.c | 3 +++ arch/powerpc/kvm/bookehv_interrupts.S | 9 --------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bfacf12784dd..eae9619b6190 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -1014,6 +1014,18 @@ static inline void kvmppc_fix_ee_before_entry(void) #endif } +static inline void kvmppc_fix_ee_after_exit(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ALL_DISABLED); +#endif + + trace_hardirqs_off(); +} + + static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) { ulong ea; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7b4920e9fd26..0dce93ccaadf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1015,6 +1015,9 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) u32 last_inst = KVM_INST_FETCH_FAILED; enum emulation_result emulated = EMULATE_DONE; + /* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */ + kvmppc_fix_ee_after_exit(); + /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 8262c14fc9e6..b5fe6fb53c66 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -424,15 +424,6 @@ _GLOBAL(kvmppc_resume_host) mtspr SPRN_EPCR, r3 isync -#ifdef CONFIG_64BIT - /* - * We enter with interrupts disabled in hardware, but - * we need to call RECONCILE_IRQ_STATE to ensure - * that the software state is kept in sync. - */ - RECONCILE_IRQ_STATE(r3,r5) -#endif - /* Switch to kernel stack and jump to handler. */ mr r3, r4 mr r5, r14 /* intno */ From dea681c91d3cd5326f87d0a3c93079573e22ce9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Nov 2022 08:22:25 +0100 Subject: [PATCH 2905/4122] powerpc/ps3: mark ps3_system_bus_type static ps3_system_bus_type is only used inside of system-bus.c, so remove the external declaration and the very outdated comment next to it. Signed-off-by: Christoph Hellwig Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221122072225.423432-1-hch@lst.de --- arch/powerpc/include/asm/ps3.h | 4 ---- arch/powerpc/platforms/ps3/system-bus.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 8a0d8fb35328..d503dbd7856c 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -425,10 +425,6 @@ static inline void *ps3_system_bus_get_drvdata( return dev_get_drvdata(&dev->core); } -/* These two need global scope for get_arch_dma_ops(). */ - -extern struct bus_type ps3_system_bus_type; - /* system manager */ struct ps3_sys_manager_ops { diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 2502e9b17df4..38a7e02295c8 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -466,7 +466,7 @@ static struct attribute *ps3_system_bus_dev_attrs[] = { }; ATTRIBUTE_GROUPS(ps3_system_bus_dev); -struct bus_type ps3_system_bus_type = { +static struct bus_type ps3_system_bus_type = { .name = "ps3_system_bus", .match = ps3_system_bus_match, .uevent = ps3_system_bus_uevent, From 71ae6305ad41cfd1ac5aa91d356e71c7a537df2e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:52 +0530 Subject: [PATCH 2906/4122] selftests/powerpc: Move perror closer to its use Right now, if perf_event_open() fails for the systemwide tests, error report is printed too late, sometimes after subsequent system calls. Move use of perror() to the main function, just after the syscall. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/372ac78c27899f1f612fbd6ac796604a4a9310aa.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index ecde2c199f3b..ea5e14ecbf30 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -146,6 +146,7 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len for (i = 0; i < nprocs; i++) { fd[i] = perf_cpu_event_open(i, type, addr, len); if (fd[i] < 0) { + perror("perf_systemwide_event_open"); close_fds(fd, i); return fd[i]; } @@ -543,15 +544,12 @@ static int test_syswide_multi_diff_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -590,15 +588,12 @@ static int test_syswide_multi_same_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -637,15 +632,12 @@ static int test_syswide_multi_diff_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -684,15 +676,12 @@ static int test_syswide_multi_same_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } From 616ad3f4aac287c48b66c92cb777395b4465ed4f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:53 +0530 Subject: [PATCH 2907/4122] selftests/powerpc: Bump up rlimit for perf-hwbreak test The systemwide perf hardware breakpoint test tries to open a perf event on each cpu. On large systems, we run out of file descriptors and fail the test. Instead, have the test set the file descriptor limit to an arbitraty high value. Reported-by: Rohan Deshpande Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/187fed5843cecc1e5066677b6296ee88337d7bef.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../testing/selftests/powerpc/ptrace/perf-hwbreak.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index ea5e14ecbf30..866e5be48ee3 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -140,8 +141,19 @@ static void disable_fds(int *fd, int n) static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) { + struct rlimit rlim; int i = 0; + if (getrlimit(RLIMIT_NOFILE, &rlim)) { + perror("getrlimit"); + return -1; + } + rlim.rlim_cur = 65536; + if (setrlimit(RLIMIT_NOFILE, &rlim)) { + perror("setrlimit"); + return -1; + } + /* Assume online processors are 0 to nprocs for simplisity */ for (i = 0; i < nprocs; i++) { fd[i] = perf_cpu_event_open(i, type, addr, len); From 260095926d3956071c6699a28824c3f0fa7cd97a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:54 +0530 Subject: [PATCH 2908/4122] selftests/powerpc: Account for offline cpus in perf-hwbreak test For systemwide tests, use online cpu mask to only open events on online cpus. This enables this test to work on systems in lower SMT modes. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/15fd447dcefd19945a7d31f0a475349f548a3603.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 866e5be48ee3..f75739bbad28 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -17,8 +17,11 @@ * Copyright (C) 2018 Michael Neuling, IBM Corporation. */ +#define _GNU_SOURCE + #include #include +#include #include #include #include @@ -141,8 +144,10 @@ static void disable_fds(int *fd, int n) static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) { + int i, ncpus, cpu, ret = 0; struct rlimit rlim; - int i = 0; + cpu_set_t *mask; + size_t size; if (getrlimit(RLIMIT_NOFILE, &rlim)) { perror("getrlimit"); @@ -154,16 +159,44 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len return -1; } - /* Assume online processors are 0 to nprocs for simplisity */ - for (i = 0; i < nprocs; i++) { - fd[i] = perf_cpu_event_open(i, type, addr, len); + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); + return -1; + } + + CPU_ZERO_S(size, mask); + + if (sched_getaffinity(0, size, mask)) { + perror("sched_getaffinity"); + ret = -1; + goto done; + } + + for (i = 0, cpu = 0; i < nprocs && cpu < ncpus; cpu++) { + if (!CPU_ISSET_S(cpu, size, mask)) + continue; + fd[i] = perf_cpu_event_open(cpu, type, addr, len); if (fd[i] < 0) { perror("perf_systemwide_event_open"); close_fds(fd, i); - return fd[i]; + ret = fd[i]; + goto done; } + i++; } - return 0; + + if (i < nprocs) { + printf("Error: Number of online cpus reduced since start of test: %d < %d\n", i, nprocs); + close_fds(fd, i); + ret = -1; + } + +done: + CPU_FREE(mask); + return ret; } static inline bool breakpoint_test(int len) From d5090716be6791ada9ee142163a4934c1c147aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:10:00 +0100 Subject: [PATCH 2909/4122] powerpc/book3e: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") removed the usage of the define UTS_RELEASE but forgot to drop the include. utsrelease.h is potentially generated on each build. By removing the unused include we can get rid of some spurious recompilations. Fixes: 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") Signed-off-by: Thomas Weißschuh Reviewed-by: Masahiro Yamada Reviewed-by: Christophe Leroy [mpe: Fix typo in change log and add more explanation] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126051002.123199-2-linux@weissschuh.net --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 0d04f9d5da8d..2fb3edafe9ab 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -19,7 +19,6 @@ #include #include #include -#include struct regions { unsigned long pa_start; From 67bbb62f61e810734da0a1577a9802ddaed24140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 30 Sep 2022 14:39:01 +0200 Subject: [PATCH 2910/4122] powerpc: dts: turris1x.dts: Add channel labels for temperature sensor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channel 0 of SA56004ED chip refers to internal SA56004ED chip sensor (chip itself is located on the board) and channel 1 of SA56004ED chip refers to external sensor which is connected to temperature diode of the P2020 CPU. Fixes: 54c15ec3b738 ("powerpc: dts: Add DTS file for CZ.NIC Turris 1.x routers") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220930123901.10251-1-pali@kernel.org --- arch/powerpc/boot/dts/turris1x.dts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index 045af668e928..e9cda34a140e 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -69,6 +69,20 @@ interrupt-parent = <&gpio>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */ <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */ + #address-cells = <1>; + #size-cells = <0>; + + /* Local temperature sensor (SA56004ED internal) */ + channel@0 { + reg = <0>; + label = "board"; + }; + + /* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */ + channel@1 { + reg = <1>; + label = "cpu"; + }; }; /* DDR3 SPD/EEPROM */ From e082e99f6f87f5204b2531d5a3db7bbd929d23b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 20 Aug 2022 14:33:27 +0200 Subject: [PATCH 2911/4122] powerpc/fsl-pci: Choose PCI host bridge with alias pci0 as the primary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there's no PCI host bridge with ISA then check for PCI host bridge with alias "pci0" (first PCI host bridge) and if it exists then choose it as the primary PCI host bridge. This makes choice of primary PCI host bridge more stable across boots and updates as the last fallback candidate for primary PCI host bridge (if there is no choice) is selected arbitrary. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220820123327.20551-1-pali@kernel.org --- arch/powerpc/sysdev/fsl_pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 974d3db6faab..b7232c46b244 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1138,6 +1138,19 @@ void __init fsl_pci_assign_primary(void) return; } + /* + * If there's no PCI host bridge with ISA then check for + * PCI host bridge with alias "pci0" (first PCI host bridge). + */ + np = of_find_node_by_path("pci0"); + if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) { + fsl_pci_primary = np; + of_node_put(np); + return; + } + if (np) + of_node_put(np); + /* * If there's no PCI host bridge with ISA, arbitrarily * designate one as primary. This can go away once From 3671f4ebe3eb12e7222e4d7b0f94e85cfe34253a Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Nov 2022 15:51:04 +1100 Subject: [PATCH 2912/4122] powerpc: Allow clearing and restoring registers independent of saved breakpoint state For the coming temporary mm used for instruction patching, the breakpoint registers need to be cleared to prevent them from accidentally being triggered. As soon as the patching is done, the breakpoints will be restored. The breakpoint state is stored in the per-cpu variable current_brk[]. Add a suspend_breakpoints() function which will clear the breakpoint registers without touching the state in current_brk[]. Add a pair function restore_breakpoints() which will move the state in current_brk[] back to the registers. Signed-off-by: Jordan Niethe Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-2-bgray@linux.ibm.com --- arch/powerpc/include/asm/debug.h | 2 ++ arch/powerpc/kernel/process.c | 38 +++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 86a14736c76c..51c744608f37 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); +void suspend_breakpoints(void); +void restore_breakpoints(void); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e3e1feaa536a..5265da2d8034 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) return 0; } -void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); - if (dawr_enabled()) // Power8 or later set_dawr(nr, brk); @@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +{ + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); + set_hw_breakpoint(nr, brk); +} + /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { @@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); +/* Disable the breakpoint in hardware without touching current_brk[] */ +void suspend_breakpoints(void) +{ + struct arch_hw_breakpoint brk = {0}; + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, &brk); +} + +/* + * Re-enable breakpoints suspended by suspend_breakpoints() in hardware + * from current_brk[] + */ +void restore_breakpoints(void) +{ + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i])); +} + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) From 071c95c1acbd96e76bab8b25b5cad0d71a011f37 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:05 +1100 Subject: [PATCH 2913/4122] powerpc/code-patching: Use WARN_ON and fix check in poking_init BUG_ON() when failing to initialise the code patching window is unnecessary, and use of BUG_ON is discouraged. We don't set poking_init_done in this case, so failure to init the boot CPU will result in a strict RWX error when a following patch_instruction uses raw_patch_instruction. If it only fails for later CPUs, they won't be onlined in the first place. The return value of cpuhp_setup_state() is also >= 0 on success, so check for < 0. Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-3-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd0..3055eef7dcdc 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -81,16 +81,17 @@ static int text_area_cpu_down(unsigned int cpu) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); -/* - * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and - * we judge it as being preferable to a kernel that will crash later when - * someone tries to use patch_instruction(). - */ void __init poking_init(void) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); + + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; + static_branch_enable(&poking_init_done); } From baf1ed24b27db475b38f534953885d0425e2232d Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:07 +1100 Subject: [PATCH 2914/4122] powerpc/mm: Remove empty hash__ functions The empty hash__* functions are unnecessary. The empty definitions were introduced when 64-bit Hash support was added, as the functions were still used in generic code. These empty definitions were prefixed with hash__ when Radix support was added, and new wrappers with the original names were added that selected the Radix or Hash version based on radix_enabled(). But the hash__ prefixed functions were not part of a public interface, so there is no need to include them for compatibility with anything. Generic code will use the non-prefixed wrappers, and Hash specific code will know that there is no point in calling them (or even worse, call them and expect them to do something). Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-5-bgray@linux.ibm.com --- .../include/asm/book3s/64/tlbflush-hash.h | 28 ------------------- arch/powerpc/include/asm/book3s/64/tlbflush.h | 27 ++++++------------ 2 files changed, 9 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 751921f6db46..a9ef40dc263e 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -65,13 +65,6 @@ extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); -static inline void hash__local_flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void hash__flush_tlb_mm(struct mm_struct *mm) -{ -} static inline void hash__local_flush_all_mm(struct mm_struct *mm) { @@ -95,27 +88,6 @@ static inline void hash__flush_all_mm(struct mm_struct *mm) WARN_ON_ONCE(1); } -static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void hash__flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ -} - - struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 67655cd60545..2254a40f0564 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -47,8 +47,7 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_pmd_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_pmd_tlb_range(vma, start, end); } #define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE @@ -57,39 +56,34 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long end) { if (radix_enabled()) - return radix__flush_hugetlb_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_hugetlb_tlb_range(vma, start, end); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_tlb_range(vma, start, end); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_kernel_range(start, end); - return hash__flush_tlb_kernel_range(start, end); + radix__flush_tlb_kernel_range(start, end); } static inline void local_flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__local_flush_tlb_mm(mm); - return hash__local_flush_tlb_mm(mm); + radix__local_flush_tlb_mm(mm); } static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__local_flush_tlb_page(vma, vmaddr); - return hash__local_flush_tlb_page(vma, vmaddr); + radix__local_flush_tlb_page(vma, vmaddr); } static inline void local_flush_all_mm(struct mm_struct *mm) @@ -102,24 +96,21 @@ static inline void local_flush_all_mm(struct mm_struct *mm) static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) - return radix__tlb_flush(tlb); - return hash__tlb_flush(tlb); + radix__tlb_flush(tlb); } #ifdef CONFIG_SMP static inline void flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__flush_tlb_mm(mm); - return hash__flush_tlb_mm(mm); + radix__flush_tlb_mm(mm); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__flush_tlb_page(vma, vmaddr); - return hash__flush_tlb_page(vma, vmaddr); + radix__flush_tlb_page(vma, vmaddr); } static inline void flush_all_mm(struct mm_struct *mm) From 0f0a0a6091e678b1a75078ecd6b02176f3228dbb Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:08 +1100 Subject: [PATCH 2915/4122] cxl: Use radix__flush_all_mm instead of generic flush_all_mm The generic implementation of this function isn't really generic (Hash is not implemented). Unfortunately, the runtime warnings cannot be replaced with BUILD_BUG's, so it seems safer not to provide a stub in the first place. Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-6-bgray@linux.ibm.com --- arch/powerpc/include/asm/mmu_context.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index c1ea270bb848..57f5017111f4 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -151,8 +151,8 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * nMMU and/or PSL need to be cleaned up. * * Both the 'copros' and 'active_cpus' counts are looked at in - * flush_all_mm() to determine the scope (local/global) of the - * TLBIs, so we need to flush first before decrementing + * radix__flush_all_mm() to determine the scope (local/global) + * of the TLBIs, so we need to flush first before decrementing * 'copros'. If this API is used by several callers for the * same context, it can lead to over-flushing. It's hopefully * not common enough to be a problem. @@ -164,7 +164,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * in-between. */ if (radix_enabled()) { - flush_all_mm(mm); + radix__flush_all_mm(mm); c = atomic_dec_if_positive(&mm->context.copros); /* Detect imbalance between add and remove */ From d34471c9bd5d47ab148dd68817631a4238f755c4 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:09 +1100 Subject: [PATCH 2916/4122] powerpc/mm: Remove flush_all_mm, local_flush_all_mm These functions were introduced for "cxl: Enable global TLBIs for cxl contexts" [1], which ended up using them for Radix only. They were never implemented on Hash (and creating an implementation appears to be difficult), so nothing can actually rely on them. They behave differently to the existing surrounding functions too, in that they actually need to do something on Hash. The other functions are primarily for use in generic code that expects their definitions, but Hash updates the TLB during PTE updates. After replacing the only usage with the Radix specific version, there are no more users of these functions, and given they are not implemented anyway it is safe to delete them. [1]: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20170903181513.29635-1-fbarrat@linux.vnet.ibm.com/ Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-7-bgray@linux.ibm.com --- .../include/asm/book3s/64/tlbflush-hash.h | 22 ------------------- arch/powerpc/include/asm/book3s/64/tlbflush.h | 15 ------------- 2 files changed, 37 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index a9ef40dc263e..146287d9580f 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -66,28 +66,6 @@ extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); -static inline void hash__local_flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - -static inline void hash__flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 2254a40f0564..c56a0aee8124 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -86,13 +86,6 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, radix__local_flush_tlb_page(vma, vmaddr); } -static inline void local_flush_all_mm(struct mm_struct *mm) -{ - if (radix_enabled()) - return radix__local_flush_all_mm(mm); - return hash__local_flush_all_mm(mm); -} - static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) @@ -112,17 +105,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, if (radix_enabled()) radix__flush_tlb_page(vma, vmaddr); } - -static inline void flush_all_mm(struct mm_struct *mm) -{ - if (radix_enabled()) - return radix__flush_all_mm(mm); - return hash__flush_all_mm(mm); -} #else #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) -#define flush_all_mm(mm) local_flush_all_mm(mm) #endif /* CONFIG_SMP */ #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault From 274d842fa1efd9449e62222c8896e0be11621f1f Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:10 +1100 Subject: [PATCH 2917/4122] powerpc/tlb: Add local flush for page given mm_struct and psize Adds a local TLB flush operation that works given an mm_struct, VA to flush, and page size representation. Most implementations mirror the surrounding code. The book3s/32/tlbflush.h implementation is left as a BUILD_BUG because it is more complicated and not required for anything as yet. This removes the need to create a vm_area_struct, which the temporary patching mm work does not need. Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-8-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 +++++++++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 7 +++++++ arch/powerpc/include/asm/nohash/tlbflush.h | 7 +++++++ arch/powerpc/mm/nohash/tlb.c | 8 ++++++++ 4 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index ba1743c52b56..4be572908124 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#include + #define MMU_NO_CONTEXT (0) /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx @@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, { flush_tlb_page(vma, vmaddr); } + +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + BUILD_BUG(); +} + static inline void local_flush_tlb_mm(struct mm_struct *mm) { flush_tlb_mm(mm); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index c56a0aee8124..dd39313242b4 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -86,6 +86,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, radix__local_flush_tlb_page(vma, vmaddr); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + if (radix_enabled()) + radix__local_flush_tlb_page_psize(mm, vmaddr, psize); +} + static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index bdaf34ad41ea..9a2cf83ea4f1 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; @@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 2c15c86c7015..a903b308acc5 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mmu_get_tsize(mmu_virtual_psize), 0); } EXPORT_SYMBOL(local_flush_tlb_page); + +void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0); +} +EXPORT_SYMBOL(local_flush_tlb_page_psize); + #endif /* From 33379c054211a5144ffae84e9e3c80e2e62416a9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 12:02:11 +0100 Subject: [PATCH 2918/4122] Revert "xhci: Convert to use list_count()" This reverts commit b47ec9727f47d1dce4e8cbc9aef01c80b2332535 as it breaks the build. Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com Cc: Mathias Nyman Cc: Andy Shevchenko Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 817c31e3b0c8..ad81e9a508b1 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2532,6 +2532,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, union xhci_trb *ep_trb; int status = -EINPROGRESS; struct xhci_ep_ctx *ep_ctx; + struct list_head *tmp; u32 trb_comp_code; int td_num = 0; bool handling_skipped_tds = false; @@ -2579,8 +2580,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, } /* Count current td numbers if ep->skip is set */ - if (ep->skip) - td_num += list_count(&ep_ring->td_list); + if (ep->skip) { + list_for_each(tmp, &ep_ring->td_list) + td_num++; + } /* Look for common error cases */ switch (trb_comp_code) { From acebf61919199771b5d7b92c68c5b515dfcbf800 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 12:02:12 +0100 Subject: [PATCH 2919/4122] Revert "usb: gadget: udc: bcm63xx: Convert to use list_count()" This reverts commit c2d9d02f7bf3c641f9b8e6c9f5de1e564cdeca69 as it breaks the build. Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com Cc: Andy Shevchenko Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/bcm63xx_udc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c index 0762e49e85f8..2cdb07905bde 100644 --- a/drivers/usb/gadget/udc/bcm63xx_udc.c +++ b/drivers/usb/gadget/udc/bcm63xx_udc.c @@ -2172,6 +2172,7 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p) for (ch_idx = 0; ch_idx < BCM63XX_NUM_IUDMA; ch_idx++) { struct iudma_ch *iudma = &udc->iudma[ch_idx]; + struct list_head *pos; seq_printf(s, "IUDMA channel %d -- ", ch_idx); switch (iudma_defaults[ch_idx].ep_type) { @@ -2204,10 +2205,14 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p) seq_printf(s, " desc: %d/%d used", iudma->n_bds_used, iudma->n_bds); - if (iudma->bep) - seq_printf(s, "; %zu queued\n", list_count(&iudma->bep->queue)); - else + if (iudma->bep) { + i = 0; + list_for_each(pos, &iudma->bep->queue) + i++; + seq_printf(s, "; %d queued\n", i); + } else { seq_printf(s, "\n"); + } for (i = 0; i < iudma->n_bds; i++) { struct bcm_enet_desc *d = &iudma->bd_ring[i]; From 54aa8af53905e39a825773883914810033f4d3d3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 12:02:13 +0100 Subject: [PATCH 2920/4122] Revert "usb: gadget: hid: Convert to use list_count()" This reverts commit 33f00f41d963c86176dba2f9faff9b428a542e60 as it breaks the build. Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com Cc: Andy Shevchenko Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/hid.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c index 6196c3456e0b..1187ee4f316a 100644 --- a/drivers/usb/gadget/legacy/hid.c +++ b/drivers/usb/gadget/legacy/hid.c @@ -133,11 +133,14 @@ static struct usb_configuration config_driver = { static int hid_bind(struct usb_composite_dev *cdev) { struct usb_gadget *gadget = cdev->gadget; + struct list_head *tmp; struct hidg_func_node *n = NULL, *m, *iter_n; struct f_hid_opts *hid_opts; - int status, funcs; + int status, funcs = 0; + + list_for_each(tmp, &hidg_func_list) + funcs++; - funcs = list_count(&hidg_func_list); if (!funcs) return -ENODEV; From 51daa42d6b86efa366320b99e7bbe29a490ed348 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 12:02:13 +0100 Subject: [PATCH 2921/4122] Revert "i915: Move list_count() to list.h for broader use" This reverts commit a9efc04cfd05690e91279f41c2325c46335c43ef as it breaks the build. Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com Cc: Lucas De Marchi Cc: Jani Nikula Cc: Andy Shevchenko Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 ++++++++++++- include/linux/list.h | 15 --------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 47734c4ebfa0..1f7188129cd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -2004,6 +2004,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq) } } +static unsigned long list_count(struct list_head *list) +{ + struct list_head *pos; + unsigned long count = 0; + + list_for_each(pos, list) + count++; + + return count; +} + static unsigned long read_ul(void *p, size_t x) { return *(unsigned long *)(p + x); @@ -2178,7 +2189,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->sched_engine->lock, flags); engine_dump_active_requests(engine, m); - drm_printf(m, "\tOn hold?: %zu\n", + drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->sched_engine->hold)); spin_unlock_irqrestore(&engine->sched_engine->lock, flags); diff --git a/include/linux/list.h b/include/linux/list.h index 632a298c7018..61762054b4be 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -655,21 +655,6 @@ static inline void list_splice_tail_init(struct list_head *list, !list_is_head(pos, (head)); \ pos = n, n = pos->prev) -/** - * list_count - count nodes in the list - * @head: the head for your list. - */ -static inline size_t list_count(struct list_head *head) -{ - struct list_head *pos; - size_t count = 0; - - list_for_each(pos, head) - count++; - - return count; -} - /** * list_entry_is_head - test if the entry points to the head of the list * @pos: the type * to cursor From 2a25e66d676dfb9b018abd503deed3d38a892dec Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Wed, 30 Nov 2022 11:19:39 +0200 Subject: [PATCH 2922/4122] xhci: print warning when HCE was set When HCE(Host Controller Error) is set, it means that the xhci hardware controller has an error at this time, but the current xhci driver software does not log this event. By adding an HCE event detection in the xhci interrupt processing interface, a warning log is output to the system, which is convenient for system device status tracking. Signed-off-by: Longfang Liu Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ad81e9a508b1..f6af479188e8 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3031,6 +3031,11 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (!(status & STS_EINT)) goto out; + if (status & STS_HCE) { + xhci_warn(xhci, "WARNING: Host Controller Error\n"); + goto out; + } + if (status & STS_FATAL) { xhci_warn(xhci, "WARNING: Host System Error\n"); xhci_halt(xhci); From fed70b61ef2c0aed54456db3d485b215f6cc3209 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Wed, 30 Nov 2022 11:19:40 +0200 Subject: [PATCH 2923/4122] xhci: Apply XHCI_RESET_TO_DEFAULT quirk to ADL-N ADL-N systems have the same issue as ADL-P, where a large boot firmware delay is seen if USB ports are left in U3 at shutdown. So apply the XHCI_RESET_TO_DEFAULT quirk to ADL-N as well. This patch depends on commit 34cd2db408d5 ("xhci: Add quirk to reset host back to default state at shutdown"). The issue it fixes is a ~20s boot time delay when booting from S5. It affects ADL-N devices, and ADL-N support was added starting from v5.16. Cc: stable@vger.kernel.org Signed-off-by: Reka Norman Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index a29b681b562e..1fb773ed3727 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI 0x54ed #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -246,7 +247,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_MISSING_CAS; if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI) + (pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI)) xhci->quirks |= XHCI_RESET_TO_DEFAULT; if (pdev->vendor == PCI_VENDOR_ID_INTEL && From 705c333a7ad2003ad99d96c19a31619b19ad14b9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 30 Nov 2022 11:19:41 +0200 Subject: [PATCH 2924/4122] xhci: export two xhci_hub functions for xhci-pci module usage some Intel Alder Lake xHC hosts on ChromeOS platforms need special workarounds touching port registers at xHC pci host hibernate. Export xhci_port_state_to_neutral() and xhci_find_slot_id_by_port() so they can be called from xhci-pci.c and thus the xhci-pci module. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 4619d5e89d5b..94c94db3faf6 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -426,24 +426,37 @@ static unsigned int xhci_port_speed(unsigned int port_status) */ #define XHCI_PORT_RZ ((1<<2) | (1<<24) | (0xf<<28)) -/* +/** + * xhci_port_state_to_neutral() - Clean up read portsc value back into writeable + * @state: u32 port value read from portsc register to be cleanup up + * * Given a port state, this function returns a value that would result in the * port being in the same state, if the value was written to the port status * control register. * Save Read Only (RO) bits and save read/write bits where * writing a 0 clears the bit and writing a 1 sets the bit (RWS). * For all other types (RW1S, RW1CS, RW, and RZ), writing a '0' has no effect. + * + * Return: u32 value that can be written back to portsc register without + * changing port state. */ + u32 xhci_port_state_to_neutral(u32 state) { /* Save read-only status and port state */ return (state & XHCI_PORT_RO) | (state & XHCI_PORT_RWS); } +EXPORT_SYMBOL_GPL(xhci_port_state_to_neutral); -/* - * find slot id based on port number. - * @port: The one-based port number from one of the two split roothubs. +/** + * xhci_find_slot_id_by_port() - Find slot id of a usb device on a roothub port + * @hcd: pointer to hcd of the roothub + * @xhci: pointer to xhci structure + * @port: one-based port number of the port in this roothub. + * + * Return: Slot id of the usb device connected to the root port, 0 if not found */ + int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci, u16 port) { @@ -465,6 +478,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci, return slot_id; } +EXPORT_SYMBOL_GPL(xhci_find_slot_id_by_port); /* * Stop device From c3bbacd61baace2f4fbab17012c3d149df2d50f1 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 30 Nov 2022 11:19:42 +0200 Subject: [PATCH 2925/4122] xhci: disable U3 suspended ports in S4 hibernate poweroff_late stage Disable U3 suspended ports in hibernate S4 poweroff_late for systems with XHCI_RESET_TO_DEFAULT quirk, if wakeup is not enabled. This reduces the number of self-powered usb devices from surviving in U3 suspended state into next reboot. Bootloader/firmware on these systems can't handle usb ports in U3, and will timeout, causing extra delay during reboot/restore from S4. Add pci_poweroff_late() callback to struct usb_hcd to get this done at the correct stage in hibernate. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd-pci.c | 13 ++++++++++ drivers/usb/host/xhci-pci.c | 52 +++++++++++++++++++++++++++++++++++++ include/linux/usb/hcd.h | 3 +++ 3 files changed, 68 insertions(+) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 9b77f49b3560..ab2f3737764e 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -558,6 +558,17 @@ static int hcd_pci_suspend_noirq(struct device *dev) return retval; } +static int hcd_pci_poweroff_late(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct usb_hcd *hcd = pci_get_drvdata(pci_dev); + + if (hcd->driver->pci_poweroff_late && !HCD_DEAD(hcd)) + return hcd->driver->pci_poweroff_late(hcd, device_may_wakeup(dev)); + + return 0; +} + static int hcd_pci_resume_noirq(struct device *dev) { powermac_set_asic(to_pci_dev(dev), 1); @@ -578,6 +589,7 @@ static int hcd_pci_restore(struct device *dev) #define hcd_pci_suspend NULL #define hcd_pci_suspend_noirq NULL +#define hcd_pci_poweroff_late NULL #define hcd_pci_resume_noirq NULL #define hcd_pci_resume NULL #define hcd_pci_restore NULL @@ -615,6 +627,7 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = { .thaw_noirq = NULL, .thaw = hcd_pci_resume, .poweroff = hcd_pci_suspend, + .poweroff_late = hcd_pci_poweroff_late, .poweroff_noirq = hcd_pci_suspend_noirq, .restore_noirq = hcd_pci_resume_noirq, .restore = hcd_pci_restore, diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1fb773ed3727..79d679b3e076 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -622,6 +622,57 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) return retval; } +static int xhci_pci_poweroff_late(struct usb_hcd *hcd, bool do_wakeup) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_port *port; + struct usb_device *udev; + unsigned int slot_id; + u32 portsc; + int i; + + /* + * Systems with XHCI_RESET_TO_DEFAULT quirk have boot firmware that + * cause significant boot delay if usb ports are in suspended U3 state + * during boot. Some USB devices survive in U3 state over S4 hibernate + * + * Disable ports that are in U3 if remote wake is not enabled for either + * host controller or connected device + */ + + if (!(xhci->quirks & XHCI_RESET_TO_DEFAULT)) + return 0; + + for (i = 0; i < HCS_MAX_PORTS(xhci->hcs_params1); i++) { + port = &xhci->hw_ports[i]; + portsc = readl(port->addr); + + if ((portsc & PORT_PLS_MASK) != XDEV_U3) + continue; + + slot_id = xhci_find_slot_id_by_port(port->rhub->hcd, xhci, + port->hcd_portnum + 1); + if (!slot_id || !xhci->devs[slot_id]) { + xhci_err(xhci, "No dev for slot_id %d for port %d-%d in U3\n", + slot_id, port->rhub->hcd->self.busnum, port->hcd_portnum + 1); + continue; + } + + udev = xhci->devs[slot_id]->udev; + + /* if wakeup is enabled then don't disable the port */ + if (udev->do_remote_wakeup && do_wakeup) + continue; + + xhci_dbg(xhci, "port %d-%d in U3 without wakeup, disable it\n", + port->rhub->hcd->self.busnum, port->hcd_portnum + 1); + portsc = xhci_port_state_to_neutral(portsc); + writel(portsc | PORT_PE, port->addr); + } + + return 0; +} + static void xhci_pci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -689,6 +740,7 @@ static int __init xhci_pci_init(void) #ifdef CONFIG_PM xhci_pci_hc_driver.pci_suspend = xhci_pci_suspend; xhci_pci_hc_driver.pci_resume = xhci_pci_resume; + xhci_pci_hc_driver.pci_poweroff_late = xhci_pci_poweroff_late; xhci_pci_hc_driver.shutdown = xhci_pci_shutdown; #endif return pci_register_driver(&xhci_pci_driver); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 78cd566ee238..b51c07111729 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -269,6 +269,9 @@ struct hc_driver { /* called after entering D0 (etc), before resuming the hub */ int (*pci_resume)(struct usb_hcd *hcd, bool hibernated); + /* called just before hibernate final D3 state, allows host to poweroff parts */ + int (*pci_poweroff_late)(struct usb_hcd *hcd, bool do_wakeup); + /* cleanly make HCD stop writing memory and doing I/O */ void (*stop) (struct usb_hcd *hcd); From a1575120972ecd7baa6af6a69e4e7ea9213bde7c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 30 Nov 2022 11:19:43 +0200 Subject: [PATCH 2926/4122] xhci: Prevent infinite loop in transaction errors recovery for streams Make sure to also limit the amount of soft reset retries for transaction errors on streams in cases where the transaction error event doesn't point to any specific TRB. In these cases we don't know the TRB or stream ring, but we do know which endpoint had the error. To keep error counting simple and functional, move the current err_count from ring structure to endpoint structure. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 14 ++++++++++---- drivers/usb/host/xhci.h | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f6af479188e8..039ec9734fcd 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2458,7 +2458,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, switch (trb_comp_code) { case COMP_SUCCESS: - ep_ring->err_count = 0; + ep->err_count = 0; /* handle success with untransferred data as short packet */ if (ep_trb != td->last_trb || remaining) { xhci_warn(xhci, "WARN Successful completion on short TX\n"); @@ -2484,7 +2484,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: if (xhci->quirks & XHCI_NO_SOFT_RETRY || - (ep_ring->err_count++ > MAX_SOFT_RETRY) || + (ep->err_count++ > MAX_SOFT_RETRY) || le32_to_cpu(slot_ctx->tt_info) & TT_SLOT) break; @@ -2565,8 +2565,14 @@ static int handle_tx_event(struct xhci_hcd *xhci, case COMP_USB_TRANSACTION_ERROR: case COMP_INVALID_STREAM_TYPE_ERROR: case COMP_INVALID_STREAM_ID_ERROR: - xhci_handle_halted_endpoint(xhci, ep, 0, NULL, - EP_SOFT_RESET); + xhci_dbg(xhci, "Stream transaction error ep %u no id\n", + ep_index); + if (ep->err_count++ > MAX_SOFT_RETRY) + xhci_handle_halted_endpoint(xhci, ep, 0, NULL, + EP_HARD_RESET); + else + xhci_handle_halted_endpoint(xhci, ep, 0, NULL, + EP_SOFT_RESET); goto cleanup; case COMP_RING_UNDERRUN: case COMP_RING_OVERRUN: diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index cc084d9505cd..c9f06c5e4e9d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -933,6 +933,7 @@ struct xhci_virt_ep { * have to restore the device state to the previous state */ struct xhci_ring *new_ring; + unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) #define EP_HALTED (1 << 1) /* For stall handling */ @@ -1627,7 +1628,6 @@ struct xhci_ring { * if we own the TRB (if we are the consumer). See section 4.9.1. */ u32 cycle_state; - unsigned int err_count; unsigned int stream_id; unsigned int num_segs; unsigned int num_trbs_free; From 7428a253315cefa34e6092a0119c56cb3a1c0c12 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 30 Nov 2022 11:19:44 +0200 Subject: [PATCH 2927/4122] xhci: remove unused stream_id parameter from xhci_handle_halted_endpoint() The stream_id parameter is no longer used when handling halted endpoints. Remove it Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20221130091944.2171610-7-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 039ec9734fcd..ddc30037f9ce 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -896,7 +896,7 @@ done: } static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci, - struct xhci_virt_ep *ep, unsigned int stream_id, + struct xhci_virt_ep *ep, struct xhci_td *td, enum xhci_ep_reset_type reset_type) { @@ -1110,8 +1110,7 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, td->status = -EPROTO; } /* reset ep, reset handler cleans up cancelled tds */ - err = xhci_handle_halted_endpoint(xhci, ep, 0, td, - reset_type); + err = xhci_handle_halted_endpoint(xhci, ep, td, reset_type); if (err) break; ep->ep_state &= ~EP_STOP_CMD_PENDING; @@ -2183,8 +2182,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, } /* Almost same procedure as for STALL_ERROR below */ xhci_clear_hub_tt_buffer(xhci, td, ep); - xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td, - EP_HARD_RESET); + xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET); return 0; case COMP_STALL_ERROR: /* @@ -2200,8 +2198,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, if (ep->ep_index != 0) xhci_clear_hub_tt_buffer(xhci, td, ep); - xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td, - EP_HARD_RESET); + xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET); return 0; /* xhci_handle_halted_endpoint marked td cancelled */ default: @@ -2490,8 +2487,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->status = 0; - xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td, - EP_SOFT_RESET); + xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return 0; default: /* do nothing */ @@ -2568,10 +2564,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, xhci_dbg(xhci, "Stream transaction error ep %u no id\n", ep_index); if (ep->err_count++ > MAX_SOFT_RETRY) - xhci_handle_halted_endpoint(xhci, ep, 0, NULL, + xhci_handle_halted_endpoint(xhci, ep, NULL, EP_HARD_RESET); else - xhci_handle_halted_endpoint(xhci, ep, 0, NULL, + xhci_handle_halted_endpoint(xhci, ep, NULL, EP_SOFT_RESET); goto cleanup; case COMP_RING_UNDERRUN: @@ -2755,9 +2751,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (trb_comp_code == COMP_STALL_ERROR || xhci_requires_manual_halt_cleanup(xhci, ep_ctx, trb_comp_code)) { - xhci_handle_halted_endpoint(xhci, ep, - ep_ring->stream_id, - NULL, + xhci_handle_halted_endpoint(xhci, ep, NULL, EP_HARD_RESET); } goto cleanup; @@ -2850,9 +2844,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (trb_comp_code == COMP_STALL_ERROR || xhci_requires_manual_halt_cleanup(xhci, ep_ctx, trb_comp_code)) - xhci_handle_halted_endpoint(xhci, ep, - ep_ring->stream_id, - td, EP_HARD_RESET); + xhci_handle_halted_endpoint(xhci, ep, td, + EP_HARD_RESET); goto cleanup; } From a08ca6ebafe615c9028c53fc4c9e6c9b2b1f2888 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:17:49 +0100 Subject: [PATCH 2928/4122] USB: serial: f81232: fix division by zero on line-speed change The driver leaves the line speed unchanged in case a requested speed is not supported. Make sure to handle the case where the current speed is B0 (hangup) without dividing by zero when determining the clock source. Fixes: 268ddb5e9b62 ("USB: serial: f81232: add high baud rate support") Cc: stable@vger.kernel.org # 5.2 Cc: Ji-Ze Hong (Peter Hong) Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/f81232.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c index 2dd58cd9f0cc..891fb1fe69df 100644 --- a/drivers/usb/serial/f81232.c +++ b/drivers/usb/serial/f81232.c @@ -130,9 +130,6 @@ static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ, static int calc_baud_divisor(speed_t baudrate, speed_t clockrate) { - if (!baudrate) - return 0; - return DIV_ROUND_CLOSEST(clockrate, baudrate); } @@ -498,9 +495,14 @@ static void f81232_set_baudrate(struct tty_struct *tty, speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE }; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { - idx = f81232_find_clk(baud_list[i]); + baudrate = baud_list[i]; + if (baudrate == 0) { + tty_encode_baud_rate(tty, 0, 0); + return; + } + + idx = f81232_find_clk(baudrate); if (idx >= 0) { - baudrate = baud_list[i]; tty_encode_baud_rate(tty, baudrate, baudrate); break; } From 188c9c2e0c7f4ae864113f80c40bafb394062271 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:18:19 +0100 Subject: [PATCH 2929/4122] USB: serial: f81534: fix division by zero on line-speed change The driver leaves the line speed unchanged in case a requested speed is not supported. Make sure to handle the case where the current speed is B0 (hangup) without dividing by zero when determining the clock source. Fixes: 3aacac02f385 ("USB: serial: f81534: add high baud rate support") Cc: stable@vger.kernel.org # 4.16 Cc: Ji-Ze Hong (Peter Hong) Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/f81534.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c index ddfcd72eb0ae..4083ae961be4 100644 --- a/drivers/usb/serial/f81534.c +++ b/drivers/usb/serial/f81534.c @@ -536,9 +536,6 @@ static int f81534_submit_writer(struct usb_serial_port *port, gfp_t mem_flags) static u32 f81534_calc_baud_divisor(u32 baudrate, u32 clockrate) { - if (!baudrate) - return 0; - /* Round to nearest divisor */ return DIV_ROUND_CLOSEST(clockrate, baudrate); } @@ -568,9 +565,14 @@ static int f81534_set_port_config(struct usb_serial_port *port, u32 baud_list[] = {baudrate, old_baudrate, F81534_DEFAULT_BAUD_RATE}; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { - idx = f81534_find_clk(baud_list[i]); + baudrate = baud_list[i]; + if (baudrate == 0) { + tty_encode_baud_rate(tty, 0, 0); + return 0; + } + + idx = f81534_find_clk(baudrate); if (idx >= 0) { - baudrate = baud_list[i]; tty_encode_baud_rate(tty, baudrate, baudrate); break; } From 63b8ed26cd093ecc1bcdd1fd841f238a52c11031 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2022 15:18:57 +0100 Subject: [PATCH 2930/4122] USB: serial: xr: avoid requesting zero DTE rate When the requested line speed is B0 (hangup) there is no need to use the current speed in the line-coding request. This specifically avoids requesting a zero DTE rate when the current speed is B0, which could potentially confuse buggy firmware. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/xr_serial.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/serial/xr_serial.c b/drivers/usb/serial/xr_serial.c index f3811e060a44..fdb0aae546c3 100644 --- a/drivers/usb/serial/xr_serial.c +++ b/drivers/usb/serial/xr_serial.c @@ -749,8 +749,6 @@ static void xr_cdc_set_line_coding(struct tty_struct *tty, if (tty->termios.c_ospeed) lc->dwDTERate = cpu_to_le32(tty->termios.c_ospeed); - else if (old_termios) - lc->dwDTERate = cpu_to_le32(old_termios->c_ospeed); else lc->dwDTERate = cpu_to_le32(9600); From 69b14fde0b8dd159ba19d2b2083a0f2958a9130c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 30 Nov 2022 12:13:00 +1100 Subject: [PATCH 2931/4122] driver core: fix up missed scsi/cxlflash class.devnode() conversion. Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *") Link: https://lore.kernel.org/r/20221130123851.6a9f2242@canb.auug.org.au Signed-off-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index cd1324ec742d..395b00b942f7 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3857,7 +3857,7 @@ static void cxlflash_pci_resume(struct pci_dev *pdev) * * Return: Allocated string describing the devtmpfs structure. */ -static char *cxlflash_devnode(struct device *dev, umode_t *mode) +static char *cxlflash_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cxlflash/%s", dev_name(dev)); } From 50dc8d18f62d58a2330f08fddc069f263d191c90 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 30 Nov 2022 12:32:57 +1100 Subject: [PATCH 2932/4122] driver core: fix up missed drivers/s390/char/hmcdrv_dev.c class.devnode() conversion. Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *") Link: https://lore.kernel.org/r/20221130123851.6a9f2242@canb.auug.org.au Signed-off-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/hmcdrv_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c index 20e9cd542e03..cb8fdf057eca 100644 --- a/drivers/s390/char/hmcdrv_dev.c +++ b/drivers/s390/char/hmcdrv_dev.c @@ -90,7 +90,7 @@ static dev_t hmcdrv_dev_no; /* device number (major/minor) */ * * Return: recommended device file name in /dev */ -static char *hmcdrv_dev_name(struct device *dev, umode_t *mode) +static char *hmcdrv_dev_name(const struct device *dev, umode_t *mode) { char *nodename = NULL; const char *devname = dev_name(dev); /* kernel device name */ From 5ec3289b31ab9bb209be59cee360aac4b03f320a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 18 Nov 2022 14:32:38 +0000 Subject: [PATCH 2933/4122] KVM: x86/xen: Compatibility fixes for shared runstate area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guest runstate area can be arbitrarily byte-aligned. In fact, even when a sane 32-bit guest aligns the overall structure nicely, the 64-bit fields in the structure end up being unaligned due to the fact that the 32-bit ABI only aligns them to 32 bits. So setting the ->state_entry_time field to something|XEN_RUNSTATE_UPDATE is buggy, because if it's unaligned then we can't update the whole field atomically; the low bytes might be observable before the _UPDATE bit is. Xen actually updates the *byte* containing that top bit, on its own. KVM should do the same. In addition, we cannot assume that the runstate area fits within a single page. One option might be to make the gfn_to_pfn cache cope with regions that cross a page — but getting a contiguous virtual kernel mapping of a discontiguous set of IOMEM pages is a distinctly non-trivial exercise, and it seems this is the *only* current use case for the GPC which would benefit from it. An earlier version of the runstate code did use a gfn_to_hva cache for this purpose, but it still had the single-page restriction because it used the uhva directly — because it needs to be able to do so atomically when the vCPU is being scheduled out, so it used pagefault_disable() around the accesses and didn't just use kvm_write_guest_cached() which has a fallback path. So... use a pair of GPCs for the first and potential second page covering the runstate area. We can get away with locking both at once because nothing else takes more than one GPC lock at a time so we can invent a trivial ordering rule. The common case where it's all in the same page is kept as a fast path, but in both cases, the actual guest structure (compat or not) is built up from the fields in @vx, following preset pointers to the state and times fields. The only difference is whether those pointers point to the kernel stack (in the split case) or to guest memory directly via the GPC. The fast path is also fixed to use a byte access for the XEN_RUNSTATE_UPDATE bit, then the only real difference is the dual memcpy. Finally, Xen also does write the runstate area immediately when it's configured. Flip the kvm_xen_update_runstate() and …_guest() functions and call the latter directly when the runstate area is set. This means that other ioctls which modify the runstate also write it immediately to the guest when they do so, which is also intended. Update the xen_shinfo_test to exercise the pathological case where the XEN_RUNSTATE_UPDATE flag in the top byte of the state_entry_time is actually in a different page to the rest of the 64-bit word. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/xen.c | 406 ++++++++++++------ arch/x86/kvm/xen.h | 6 +- .../selftests/kvm/x86_64/xen_shinfo_test.c | 12 +- 4 files changed, 294 insertions(+), 131 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d1013c4f673c..70af7240a1d5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -686,6 +686,7 @@ struct kvm_vcpu_xen { struct gfn_to_pfn_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_time_info_cache; struct gfn_to_pfn_cache runstate_cache; + struct gfn_to_pfn_cache runstate2_cache; u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 4b8e9628fbf5..cfc1c07bc78f 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -170,7 +170,246 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) vcpu->arch.xen.timer.function = xen_timer_callback; } -static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; + size_t user_len, user_len1, user_len2; + struct vcpu_runstate_info rs; + unsigned long flags; + size_t times_ofs; + uint8_t *update_bit; + uint64_t *rs_times; + int *rs_state; + + /* + * The only difference between 32-bit and 64-bit versions of the + * runstate struct is the alignment of uint64_t in 32-bit, which + * means that the 64-bit version has an additional 4 bytes of + * padding after the first field 'state'. Let's be really really + * paranoid about that, and matching it with our internal data + * structures that we memcpy into it... + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); +#ifdef CONFIG_X86_64 + /* + * The 64-bit structure has 4 bytes of padding before 'state_entry_time' + * so each subsequent field is shifted by 4, and it's 4 bytes longer. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != + offsetof(struct compat_vcpu_runstate_info, time) + 4); + BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); +#endif + /* + * The state field is in the same place at the start of both structs, + * and is the same size (int) as vx->current_runstate. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != + offsetof(struct compat_vcpu_runstate_info, state)); + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != + sizeof(vx->current_runstate)); + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != + sizeof(vx->current_runstate)); + + /* + * The state_entry_time field is 64 bits in both versions, and the + * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 + * is little-endian means that it's in the last *byte* of the word. + * That detail is important later. + */ + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); + + /* + * The time array is four 64-bit quantities in both versions, matching + * the vx->runstate_times and immediately following state_entry_time. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != + sizeof_field(struct compat_vcpu_runstate_info, time)); + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != + sizeof(vx->runstate_times)); + + if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { + user_len = sizeof(struct vcpu_runstate_info); + times_ofs = offsetof(struct vcpu_runstate_info, + state_entry_time); + } else { + user_len = sizeof(struct compat_vcpu_runstate_info); + times_ofs = offsetof(struct compat_vcpu_runstate_info, + state_entry_time); + } + + /* + * There are basically no alignment constraints. The guest can set it + * up so it crosses from one page to the next, and at arbitrary byte + * alignment (and the 32-bit ABI doesn't align the 64-bit integers + * anyway, even if the overall struct had been 64-bit aligned). + */ + if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { + user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); + user_len2 = user_len - user_len1; + } else { + user_len1 = user_len; + user_len2 = 0; + } + BUG_ON(user_len1 + user_len2 != user_len); + + retry: + /* + * Attempt to obtain the GPC lock on *both* (if there are two) + * gfn_to_pfn caches that cover the region. + */ + read_lock_irqsave(&gpc1->lock, flags); + while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, user_len1)) { + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) + return; + + read_lock_irqsave(&gpc1->lock, flags); + } + + if (likely(!user_len2)) { + /* + * Set up three pointers directly to the runstate_info + * struct in the guest (via the GPC). + * + * • @rs_state → state field + * • @rs_times → state_entry_time field. + * • @update_bit → last byte of state_entry_time, which + * contains the XEN_RUNSTATE_UPDATE bit. + */ + rs_state = gpc1->khva; + rs_times = gpc1->khva + times_ofs; + update_bit = ((void *)(&rs_times[1])) - 1; + } else { + /* + * The guest's runstate_info is split across two pages and we + * need to hold and validate both GPCs simultaneously. We can + * declare a lock ordering GPC1 > GPC2 because nothing else + * takes them more than one at a time. + */ + read_lock(&gpc2->lock); + + if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { + read_unlock(&gpc2->lock); + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + /* + * Use kvm_gpc_activate() here because if the runstate + * area was configured in 32-bit mode and only extends + * to the second page now because the guest changed to + * 64-bit mode, the second GPC won't have been set up. + */ + if (kvm_gpc_activate(v->kvm, gpc2, NULL, KVM_HOST_USES_PFN, + gpc1->gpa + user_len1, user_len2)) + return; + + /* + * We dropped the lock on GPC1 so we have to go all the + * way back and revalidate that too. + */ + goto retry; + } + + /* + * In this case, the runstate_info struct will be assembled on + * the kernel stack (compat or not as appropriate) and will + * be copied to GPC1/GPC2 with a dual memcpy. Set up the three + * rs pointers accordingly. + */ + rs_times = &rs.state_entry_time; + + /* + * The rs_state pointer points to the start of what we'll + * copy to the guest, which in the case of a compat guest + * is the 32-bit field that the compiler thinks is padding. + */ + rs_state = ((void *)rs_times) - times_ofs; + + /* + * The update_bit is still directly in the guest memory, + * via one GPC or the other. + */ + if (user_len1 >= times_ofs + sizeof(uint64_t)) + update_bit = gpc1->khva + times_ofs + + sizeof(uint64_t) - 1; + else + update_bit = gpc2->khva + times_ofs + + sizeof(uint64_t) - 1 - user_len1; + +#ifdef CONFIG_X86_64 + /* + * Don't leak kernel memory through the padding in the 64-bit + * version of the struct. + */ + memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); +#endif + } + + /* + * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the + * state_entry_time field, directly in the guest. We need to set + * that (and write-barrier) before writing to the rest of the + * structure, and clear it last. Just as Xen does, we address the + * single *byte* in which it resides because it might be in a + * different cache line to the rest of the 64-bit word, due to + * the (lack of) alignment constraints. + */ + *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; + smp_wmb(); + + /* + * Now assemble the actual structure, either on our kernel stack + * or directly in the guest according to how the rs_state and + * rs_times pointers were set up above. + */ + *rs_state = vx->current_runstate; + rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; + memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); + + /* For the split case, we have to then copy it to the guest. */ + if (user_len2) { + memcpy(gpc1->khva, rs_state, user_len1); + memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); + } + smp_wmb(); + + /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ + *update_bit = vx->runstate_entry_time >> 56; + smp_wmb(); + + if (user_len2) + read_unlock(&gpc2->lock); + + read_unlock_irqrestore(&gpc1->lock, flags); + + mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); + if (user_len2) + mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); +} + +void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; u64 now = get_kvmclock_ns(v->kvm); @@ -196,122 +435,9 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) vx->runstate_times[vx->current_runstate] += delta_ns; vx->current_runstate = state; vx->runstate_entry_time = now; -} -void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) -{ - struct kvm_vcpu_xen *vx = &v->arch.xen; - struct gfn_to_pfn_cache *gpc = &vx->runstate_cache; - uint64_t *user_times; - unsigned long flags; - size_t user_len; - int *user_state; - - kvm_xen_update_runstate(v, state); - - if (!vx->runstate_cache.active) - return; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_len = sizeof(struct vcpu_runstate_info); - else - user_len = sizeof(struct compat_vcpu_runstate_info); - - read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - user_len)) { - read_unlock_irqrestore(&gpc->lock, flags); - - /* When invoked from kvm_sched_out() we cannot sleep */ - if (state == RUNSTATE_runnable) - return; - - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len)) - return; - - read_lock_irqsave(&gpc->lock, flags); - } - - /* - * The only difference between 32-bit and 64-bit versions of the - * runstate struct us the alignment of uint64_t in 32-bit, which - * means that the 64-bit version has an additional 4 bytes of - * padding after the first field 'state'. - * - * So we use 'int __user *user_state' to point to the state field, - * and 'uint64_t __user *user_times' for runstate_entry_time. So - * the actual array of time[] in each state starts at user_times[1]. - */ - BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); - BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); - BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != - offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); - BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != - offsetof(struct compat_vcpu_runstate_info, time) + 4); -#endif - - user_state = gpc->khva; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_times = gpc->khva + offsetof(struct vcpu_runstate_info, - state_entry_time); - else - user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info, - state_entry_time); - - /* - * First write the updated state_entry_time at the appropriate - * location determined by 'offset'. - */ - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - - user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; - smp_wmb(); - - /* - * Next, write the new runstate. This is in the *same* place - * for 32-bit and 64-bit guests, asserted here for paranoia. - */ - BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != - offsetof(struct compat_vcpu_runstate_info, state)); - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != - sizeof(vx->current_runstate)); - BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != - sizeof(vx->current_runstate)); - - *user_state = vx->current_runstate; - - /* - * Write the actual runstate times immediately after the - * runstate_entry_time. - */ - BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != - offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); - BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != - offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != - sizeof_field(struct compat_vcpu_runstate_info, time)); - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != - sizeof(vx->runstate_times)); - - memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); - smp_wmb(); - - /* - * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's - * runstate_entry_time field. - */ - user_times[0] &= ~XEN_RUNSTATE_UPDATE; - smp_wmb(); - - read_unlock_irqrestore(&gpc->lock, flags); - - mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); + if (vx->runstate_cache.active) + kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) @@ -584,23 +710,57 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; - case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { + size_t sz, sz1, sz2; + if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == GPA_INVALID) { + r = 0; + deactivate_out: kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - r = 0; + kvm_gpc_deactivate(vcpu->kvm, + &vcpu->arch.xen.runstate2_cache); break; } - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_runstate_info)); - break; + /* + * If the guest switches to 64-bit mode after setting the runstate + * address, that's actually OK. kvm_xen_update_runstate_guest() + * will cope. + */ + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) + sz = sizeof(struct vcpu_runstate_info); + else + sz = sizeof(struct compat_vcpu_runstate_info); + /* How much fits in the (first) page? */ + sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, + NULL, KVM_HOST_USES_PFN, data->u.gpa, sz1); + if (r) + goto deactivate_out; + + /* Either map the second page, or deactivate the second GPC */ + if (sz1 >= sz) { + kvm_gpc_deactivate(vcpu->kvm, + &vcpu->arch.xen.runstate2_cache); + } else { + sz2 = sz - sz1; + BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache, + NULL, KVM_HOST_USES_PFN, + data->u.gpa + sz1, sz2); + if (r) + goto deactivate_out; + } + + kvm_xen_update_runstate_guest(vcpu, false); + break; + } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; @@ -1834,6 +1994,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); kvm_gpc_init(&vcpu->arch.xen.runstate_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); } @@ -1844,6 +2005,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) kvm_xen_stop_timer(vcpu); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 532a535a9e99..8503d2c6891e 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -143,11 +143,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu); #include #include -void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state); +void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state); static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu) { - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running); + kvm_xen_update_runstate(vcpu, RUNSTATE_running); } static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) @@ -162,7 +162,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(!vcpu->preempted)) return; - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); + kvm_xen_update_runstate(vcpu, RUNSTATE_runnable); } /* 32-bit compatibility definitions, also used natively in 32-bit build */ diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 2a5727188c8d..7f39815f1772 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -26,17 +26,17 @@ #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 -#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) +#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) #define DUMMY_REGION_SLOT 11 #define SHINFO_ADDR (SHINFO_REGION_GPA) -#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) -#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) +#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) #define SHINFO_VADDR (SHINFO_REGION_GVA) -#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) #define EVTCHN_VECTOR 0x10 @@ -449,8 +449,8 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); - virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); + SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); From d8ba8ba4c801b794f47852a6f1821ea48f83b5d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 27 Nov 2022 12:22:10 +0000 Subject: [PATCH 2934/4122] KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured Closer inspection of the Xen code shows that we aren't supposed to be using the XEN_RUNSTATE_UPDATE flag unconditionally. It should be explicitly enabled by guests through the HYPERVISOR_vm_assist hypercall. If we randomly set the top bit of ->state_entry_time for a guest that hasn't asked for it and doesn't expect it, that could make the runtimes fail to add up and confuse the guest. Without the flag it's perfectly safe for a vCPU to read its own vcpu_runstate_info; just not for one vCPU to read *another's*. I briefly pondered adding a word for the whole set of VMASST_TYPE_* flags but the only one we care about for HVM guests is this, so it seemed a bit pointless. Signed-off-by: David Woodhouse Message-Id: <20221127122210.248427-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 34 +++++++++-- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 3 +- arch/x86/kvm/xen.c | 57 ++++++++++++++----- include/uapi/linux/kvm.h | 4 ++ .../selftests/kvm/x86_64/xen_shinfo_test.c | 14 +++++ 6 files changed, 93 insertions(+), 20 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9175d41e8081..5617bc4f899f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5339,6 +5339,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM union { __u8 long_mode; __u8 vector; + __u8 runstate_update_flag; struct { __u64 gfn; } shared_info; @@ -5416,6 +5417,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION event channel delivery, so responding within the kernel without exiting to userspace is beneficial. +KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG + This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates + support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the + XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read + other vCPUs' vcpu_runstate_info. Xen guests enable this feature via + the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist + hypercall. + 4.127 KVM_XEN_HVM_GET_ATTR -------------------------- @@ -8059,12 +8068,13 @@ to userspace. This capability indicates the features that Xen supports for hosting Xen PVHVM guests. Valid flags are:: - #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) - #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) - #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) - #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) - #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) - #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) + #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) + #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) + #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) + #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) + #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG ioctl is available, for the guest to set its hypercall page. @@ -8096,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes. related to event channel delivery, timers, and the XENVER_version interception. +The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports +the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR +and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the +XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during +updates of the runstate information. Note that versions of KVM which support +the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will +always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure, +which is perhaps counterintuitive. When this flag is advertised, KVM will +behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless +specifically enabled (by the guest making the hypercall, causing the VMM +to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute). + 8.31 KVM_CAP_PPC_MULTITCE ------------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 70af7240a1d5..283cbb83d6ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1113,6 +1113,7 @@ struct msr_bitmap_range { struct kvm_xen { u32 xen_version; bool long_mode; + bool runstate_update_flag; u8 upcall_vector; struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 72ac6bf05c8b..59fd55badd73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4431,7 +4431,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | KVM_XEN_HVM_CONFIG_EVTCHN_SEND; if (sched_info_on()) - r |= KVM_XEN_HVM_CONFIG_RUNSTATE; + r |= KVM_XEN_HVM_CONFIG_RUNSTATE | + KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG; break; #endif case KVM_CAP_SYNC_REGS: diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index cfc1c07bc78f..7acac5dfe2f8 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -179,7 +179,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) struct vcpu_runstate_info rs; unsigned long flags; size_t times_ofs; - uint8_t *update_bit; + uint8_t *update_bit = NULL; + uint64_t entry_time; uint64_t *rs_times; int *rs_state; @@ -297,7 +298,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) */ rs_state = gpc1->khva; rs_times = gpc1->khva + times_ofs; - update_bit = ((void *)(&rs_times[1])) - 1; + if (v->kvm->arch.xen.runstate_update_flag) + update_bit = ((void *)(&rs_times[1])) - 1; } else { /* * The guest's runstate_info is split across two pages and we @@ -351,12 +353,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * The update_bit is still directly in the guest memory, * via one GPC or the other. */ - if (user_len1 >= times_ofs + sizeof(uint64_t)) - update_bit = gpc1->khva + times_ofs + - sizeof(uint64_t) - 1; - else - update_bit = gpc2->khva + times_ofs + - sizeof(uint64_t) - 1 - user_len1; + if (v->kvm->arch.xen.runstate_update_flag) { + if (user_len1 >= times_ofs + sizeof(uint64_t)) + update_bit = gpc1->khva + times_ofs + + sizeof(uint64_t) - 1; + else + update_bit = gpc2->khva + times_ofs + + sizeof(uint64_t) - 1 - user_len1; + } #ifdef CONFIG_X86_64 /* @@ -376,8 +380,12 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * different cache line to the rest of the 64-bit word, due to * the (lack of) alignment constraints. */ - *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; - smp_wmb(); + entry_time = vx->runstate_entry_time; + if (update_bit) { + entry_time |= XEN_RUNSTATE_UPDATE; + *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; + smp_wmb(); + } /* * Now assemble the actual structure, either on our kernel stack @@ -385,7 +393,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * rs_times pointers were set up above. */ *rs_state = vx->current_runstate; - rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; + rs_times[0] = entry_time; memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); /* For the split case, we have to then copy it to the guest. */ @@ -396,8 +404,11 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) smp_wmb(); /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ - *update_bit = vx->runstate_entry_time >> 56; - smp_wmb(); + if (update_bit) { + entry_time &= ~XEN_RUNSTATE_UPDATE; + *update_bit = entry_time >> 56; + smp_wmb(); + } if (user_len2) read_unlock(&gpc2->lock); @@ -619,6 +630,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + mutex_lock(&kvm->lock); + kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; + mutex_unlock(&kvm->lock); + r = 0; + break; + default: break; } @@ -656,6 +678,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; + r = 0; + break; + default: break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 88448397642c..64dfe9c07c87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1271,6 +1271,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) struct kvm_xen_hvm_config { __u32 flags; @@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr { union { __u8 long_mode; __u8 vector; + __u8 runstate_update_flag; struct { __u64 gfn; } shared_info; @@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr { /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 #define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 7f39815f1772..c9b0110d73f3 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -440,6 +440,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); @@ -475,6 +476,19 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + if (do_runstate_flag) { + struct kvm_xen_hvm_attr ruf = { + .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, + .u.runstate_update_flag = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); + + ruf.u.runstate_update_flag = 0; + vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); + TEST_ASSERT(ruf.u.runstate_update_flag == 1, + "Failed to read back RUNSTATE_UPDATE_FLAG attr"); + } + struct kvm_xen_hvm_attr ha = { .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, From 8acc35186ed63436bfaf60051c8bb53f344dcbfc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Nov 2022 09:27:46 +0000 Subject: [PATCH 2935/4122] KVM: x86/xen: Add runstate tests for 32-bit mode and crossing page boundary Torture test the cases where the runstate crosses a page boundary, and and especially the case where it's configured in 32-bit mode and doesn't, but then switching to 64-bit mode makes it go onto the second page. To simplify this, make the KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST ioctl also update the guest runstate area. It already did so if the actual runstate changed, as a side-effect of kvm_xen_update_runstate(). So doing it in the plain adjustment case is making it more consistent, as well as giving us a nice way to trigger the update without actually running the vCPU again and changing the values. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 2 + .../selftests/kvm/x86_64/xen_shinfo_test.c | 113 +++++++++++++++--- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 7acac5dfe2f8..60a9bdd4199f 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -884,6 +884,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (data->u.runstate.state <= RUNSTATE_offline) kvm_xen_update_runstate(vcpu, data->u.runstate.state); + else if (vcpu->arch.xen.runstate_cache.active) + kvm_xen_update_runstate_guest(vcpu, false); r = 0; break; diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index c9b0110d73f3..721f6a693799 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -88,14 +88,20 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); struct vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[4]; + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; /* Extra field for overrun check */ }; +struct compat_vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; +} __attribute__((__packed__));; + struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ }; struct vcpu_info { @@ -1013,22 +1019,91 @@ int main(int argc, char *argv[]) runstate_names[i], rs->time[i]); } } - TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); - TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, - "State entry time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, - "Running time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, - "Runnable time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, - "Blocked time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, - "Offline time mismatch"); - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + /* + * Exercise runstate info at all points across the page boundary, in + * 32-bit and 64-bit mode. In particular, test the case where it is + * configured in 32-bit mode and then switched to 64-bit mode while + * active, which takes it onto the second page. + */ + unsigned long runstate_addr; + struct compat_vcpu_runstate_info *crs; + for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; + runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { + + rs = addr_gpa2hva(vm, runstate_addr); + crs = (void *)rs; + + memset(rs, 0xa5, sizeof(*rs)); + + /* Set to compatibility mode */ + lm.u.long_mode = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + /* Set runstate to new address (kernel will write it) */ + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = runstate_addr, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); + + if (verbose) + printf("Compatibility runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + TEST_ASSERT(crs->state_entry_time == crs->time[0] + + crs->time[1] + crs->time[2] + crs->time[3], + "runstate times don't add up"); + + + /* Now switch to 64-bit mode */ + lm.u.long_mode = 1; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + memset(rs, 0xa5, sizeof(*rs)); + + /* Don't change the address, just trigger a write */ + struct kvm_xen_vcpu_attr adj = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, + .u.runstate.state = (uint64_t)-1 + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); + + if (verbose) + printf("64-bit runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } } + kvm_vm_free(vm); return 0; } From aba3caef58626f09b629085440eec5dd1368669a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:22 +0000 Subject: [PATCH 2936/4122] KVM: Shorten gfn_to_pfn_cache function names Formalize "gpc" as the acronym and use it in function names. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++---- arch/x86/kvm/xen.c | 30 +++++++++++++++--------------- include/linux/kvm_host.h | 21 ++++++++++----------- virt/kvm/pfncache.c | 20 ++++++++++---------- 4 files changed, 39 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59fd55badd73..246bdc9a9154 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3037,12 +3037,12 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, unsigned long flags; read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) { + while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, + offset + sizeof(*guest_hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) + if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, + offset + sizeof(*guest_hv_clock))) return; read_lock_irqsave(&gpc->lock, flags); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 60a9bdd4199f..9187d024d006 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -273,14 +273,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * gfn_to_pfn caches that cover the region. */ read_lock_irqsave(&gpc1->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, user_len1)) { + while (!kvm_gpc_check(v->kvm, gpc1, gpc1->gpa, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) + if (kvm_gpc_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) return; read_lock_irqsave(&gpc1->lock, flags); @@ -309,7 +309,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) */ read_lock(&gpc2->lock); - if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { + if (!kvm_gpc_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { read_unlock(&gpc2->lock); read_unlock_irqrestore(&gpc1->lock, flags); @@ -489,12 +489,12 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, + sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) + if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, + sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); @@ -554,8 +554,8 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, + sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* @@ -569,8 +569,8 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (in_atomic() || !task_is_running(current)) return 1; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, + sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. @@ -1167,7 +1167,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) goto out_rcu; ret = false; @@ -1564,7 +1564,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { @@ -1598,7 +1598,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (!kvm_gpc_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. @@ -1696,7 +1696,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); + rc = kvm_gpc_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8d12356f015..8f874a964313 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1288,16 +1288,15 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @kvm's list for - * invalidations to be processed. Callers are required to use - * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before - * accessing the target page. + * invalidations to be processed. Callers are required to use kvm_gpc_check() + * to ensure that the cache is valid before accessing the target page. */ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, gpa_t gpa, unsigned long len); /** - * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. + * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1314,11 +1313,11 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len); +bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len); /** - * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache. + * kvm_gpc_refresh - update a previously initialized cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1335,11 +1334,11 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len); +int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len); /** - * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. + * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1348,7 +1347,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * but at least the mapping from GPA to userspace HVA will remain cached * and can be reused on a subsequent refresh. */ -void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); +void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 5f83321bfd2a..8c4db3dcaf6d 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -76,8 +76,8 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, } } -bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len) +bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); @@ -96,7 +96,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, return true; } -EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check); +EXPORT_SYMBOL_GPL(kvm_gpc_check); static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva) { @@ -238,8 +238,8 @@ out_error: return -EFAULT; } -int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len) +int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); unsigned long page_offset = gpa & ~PAGE_MASK; @@ -333,9 +333,9 @@ out_unlock: return ret; } -EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh); +EXPORT_SYMBOL_GPL(kvm_gpc_refresh); -void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) +void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) { void *old_khva; kvm_pfn_t old_pfn; @@ -360,7 +360,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc_unmap_khva(kvm, old_pfn, old_khva); } -EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); +EXPORT_SYMBOL_GPL(kvm_gpc_unmap); void kvm_gpc_init(struct gfn_to_pfn_cache *gpc) { @@ -396,7 +396,7 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpc->active = true; write_unlock_irq(&gpc->lock); } - return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len); + return kvm_gpc_refresh(kvm, gpc, gpa, len); } EXPORT_SYMBOL_GPL(kvm_gpc_activate); @@ -416,7 +416,7 @@ void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) list_del(&gpc->list); spin_unlock(&kvm->gpc_lock); - kvm_gfn_to_pfn_cache_unmap(kvm, gpc); + kvm_gpc_unmap(kvm, gpc); } } EXPORT_SYMBOL_GPL(kvm_gpc_deactivate); From c1a81f3bd9b40edc1444dfaeac33f92cff0e770a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:23 +0000 Subject: [PATCH 2937/4122] KVM: x86: Remove unused argument in gpc_unmap_khva() Remove the unused @kvm argument from gpc_unmap_khva(). Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini --- virt/kvm/pfncache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 8c4db3dcaf6d..b4295474519f 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -98,7 +98,7 @@ bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, } EXPORT_SYMBOL_GPL(kvm_gpc_check); -static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva) +static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva) { /* Unmap the old pfn/page if it was mapped before. */ if (!is_error_noslot_pfn(pfn) && khva) { @@ -177,7 +177,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) * the existing mapping and didn't create a new one. */ if (new_khva != old_khva) - gpc_unmap_khva(kvm, new_pfn, new_khva); + gpc_unmap_khva(new_pfn, new_khva); kvm_release_pfn_clean(new_pfn); @@ -329,7 +329,7 @@ out_unlock: mutex_unlock(&gpc->refresh_lock); if (unmap_old) - gpc_unmap_khva(kvm, old_pfn, old_khva); + gpc_unmap_khva(old_pfn, old_khva); return ret; } @@ -358,7 +358,7 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) write_unlock_irq(&gpc->lock); mutex_unlock(&gpc->refresh_lock); - gpc_unmap_khva(kvm, old_pfn, old_khva); + gpc_unmap_khva(old_pfn, old_khva); } EXPORT_SYMBOL_GPL(kvm_gpc_unmap); From df0bb47baa95aad133820b149851d5b94cbc6790 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Nov 2022 11:14:35 -0500 Subject: [PATCH 2938/4122] KVM: x86: fix uninitialized variable use on KVM_REQ_TRIPLE_FAULT If a triple fault was fixed by kvm_x86_ops.nested_ops->triple_fault (by turning it into a vmexit), there is no need to leave vcpu_enter_guest(). Any vcpu->requests will be caught later before the actual vmentry, and in fact vcpu_enter_guest() was not initializing the "r" variable. Depending on the compiler's whims, this could cause the x86_64/triple_fault_event_test test to fail. Cc: Maxim Levitsky Fixes: 92e7d5c83aff ("KVM: x86: allow L1 to not intercept triple fault") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 246bdc9a9154..7f850dfb4086 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10280,8 +10280,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; + goto out; } - goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ From 032e160305f6872e590c77f11896fb28365c6d6c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:42 -0800 Subject: [PATCH 2939/4122] xfs: invalidate block device page cache during unmount Every now and then I see fstests failures on aarch64 (64k pages) that trigger on the following sequence: mkfs.xfs $dev mount $dev $mnt touch $mnt/a umount $mnt xfs_db -c 'path /a' -c 'print' $dev 99% of the time this succeeds, but every now and then xfs_db cannot find /a and fails. This turns out to be a race involving udev/blkid, the page cache for the block device, and the xfs_db process. udev is triggered whenever anyone closes a block device or unmounts it. The default udev rules invoke blkid to read the fs super and create symlinks to the bdev under /dev/disk. For this, it uses buffered reads through the page cache. xfs_db also uses buffered reads to examine metadata. There is no coordination between xfs_db and udev, which means that they can run concurrently. Note there is no coordination between the kernel and blkid either. On a system with 64k pages, the page cache can cache the superblock and the root inode (and hence the root dir) with the same 64k page. If udev spawns blkid after the mkfs and the system is busy enough that it is still running when xfs_db starts up, they'll both read from the same page in the pagecache. The unmount writes updated inode metadata to disk directly. The XFS buffer cache does not use the bdev pagecache, nor does it invalidate the pagecache on umount. If the above scenario occurs, the pagecache no longer reflects what's on disk, xfs_db reads the stale metadata, and fails to find /a. Most of the time this succeeds because closing a bdev invalidates the page cache, but when processes race, everyone loses. Fix the problem by invalidating the bdev pagecache after flushing the bdev, so that xfs_db will see up to date metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang Reviewed-by: Dave Chinner --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dde346450952..54c774af6e1c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1945,6 +1945,7 @@ xfs_free_buftarg( list_lru_destroy(&btp->bt_lru); blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); kmem_free(btp); From fd5beaff250d7e88912a937fad072d9d24f219da Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:42 -0800 Subject: [PATCH 2940/4122] xfs: use memcpy, not strncpy, to format the attr prefix during listxattr When -Wstringop-truncation is enabled, the compiler complains about truncation of the null byte at the end of the xattr name prefix. This is intentional, since we're concatenating the two strings together and do _not_ want a null byte in the middle of the name. We've already ensured that the name buffer is long enough to handle prefix and name, and the prefix_len is supposed to be the length of the prefix string without the null byte, so use memcpy here instead. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang Reviewed-by: Dave Chinner --- fs/xfs/xfs_xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index c325a28b89a8..10aa1fd39d2b 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -210,7 +210,7 @@ __xfs_xattr_put_listent( return; } offset = context->buffer + context->count; - strncpy(offset, prefix, prefix_len); + memcpy(offset, prefix, prefix_len); offset += prefix_len; strncpy(offset, (char *)name, namelen); /* real name */ offset += namelen; From e5827a007aa4bb737c63121fd2c77e089b18a372 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:42 -0800 Subject: [PATCH 2941/4122] xfs: shut up -Wuninitialized in xfsaild_push -Wuninitialized complains about @target in xfsaild_push being uninitialized in the case where the waitqueue is active but there is no last item in the AIL to wait for. I /think/ it should never be the case that the subsequent xfs_trans_ail_cursor_first returns a log item and hence we'll never end up at XFS_LSN_CMP, but let's make this explicit. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang Reviewed-by: Dave Chinner --- fs/xfs/xfs_trans_ail.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index f51df7d94ef7..7d4109af193e 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -422,7 +422,7 @@ xfsaild_push( struct xfs_ail_cursor cur; struct xfs_log_item *lip; xfs_lsn_t lsn; - xfs_lsn_t target; + xfs_lsn_t target = NULLCOMMITLSN; long tout; int stuck = 0; int flushing = 0; @@ -472,6 +472,8 @@ xfsaild_push( XFS_STATS_INC(mp, xs_push_ail); + ASSERT(target != NULLCOMMITLSN); + lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; From 4c6dbfd2756bd83a0085ed804e2bb7be9cc16bc5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2022 17:24:43 -0800 Subject: [PATCH 2942/4122] xfs: attach dquots to inode before reading data/cow fork mappings I've been running near-continuous integration testing of online fsck, and I've noticed that once a day, one of the ARM VMs will fail the test with out of order records in the data fork. xfs/804 races fsstress with online scrub (aka scan but do not change anything), so I think this might be a bug in the core xfs code. This also only seems to trigger if one runs the test for more than ~6 minutes via TIME_FACTOR=13 or something. https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfstests-dev.git/tree/tests/xfs/804?h=djwong-wtf I added a debugging patch to the kernel to check the data fork extents after taking the ILOCK, before dropping ILOCK, and before and after each bmapping operation. So far I've narrowed it down to the delalloc code inserting a record in the wrong place in the iext tree: xfs_bmap_add_extent_hole_delay, near line 2691: case 0: /* * New allocation is not contiguous with another * delayed allocation. * Insert a new entry. */ oldlen = newlen = 0; xfs_iunlock_check_datafork(ip); <-- ok here xfs_iext_insert(ip, icur, new, state); xfs_iunlock_check_datafork(ip); <-- bad here break; } I recorded the state of the data fork mappings and iext cursor state when a corrupt data fork is detected immediately after the xfs_bmap_add_extent_hole_delay call in xfs_bmapi_reserve_delalloc: ino 0x140bb3 func xfs_bmapi_reserve_delalloc line 4164 data fork: ino 0x140bb3 nr 0x0 nr_real 0x0 offset 0xb9 blockcount 0x1f startblock 0x935de2 state 1 ino 0x140bb3 nr 0x1 nr_real 0x1 offset 0xe6 blockcount 0xa startblock 0xffffffffe0007 state 0 ino 0x140bb3 nr 0x2 nr_real 0x1 offset 0xd8 blockcount 0xe startblock 0x935e01 state 0 Here we see that a delalloc extent was inserted into the wrong position in the iext leaf, same as all the other times. The extra trace data I collected are as follows: ino 0x140bb3 fork 0 oldoff 0xe6 oldlen 0x4 oldprealloc 0x6 isize 0xe6000 ino 0x140bb3 oldgotoff 0xea oldgotstart 0xfffffffffffffffe oldgotcount 0x0 oldgotstate 0 ino 0x140bb3 crapgotoff 0x0 crapgotstart 0x0 crapgotcount 0x0 crapgotstate 0 ino 0x140bb3 freshgotoff 0xd8 freshgotstart 0x935e01 freshgotcount 0xe freshgotstate 0 ino 0x140bb3 nowgotoff 0xe6 nowgotstart 0xffffffffe0007 nowgotcount 0xa nowgotstate 0 ino 0x140bb3 oldicurpos 1 oldleafnr 2 oldleaf 0xfffffc00f0609a00 ino 0x140bb3 crapicurpos 2 crapleafnr 2 crapleaf 0xfffffc00f0609a00 ino 0x140bb3 freshicurpos 1 freshleafnr 2 freshleaf 0xfffffc00f0609a00 ino 0x140bb3 newicurpos 1 newleafnr 3 newleaf 0xfffffc00f0609a00 The first line shows that xfs_bmapi_reserve_delalloc was called with whichfork=XFS_DATA_FORK, off=0xe6, len=0x4, prealloc=6. The second line ("oldgot") shows the contents of @got at the beginning of the call, which are the results of the first iext lookup in xfs_buffered_write_iomap_begin. Line 3 ("crapgot") is the result of duplicating the cursor at the start of the body of xfs_bmapi_reserve_delalloc and performing a fresh lookup at @off. Line 4 ("freshgot") is the result of a new xfs_iext_get_extent right before the call to xfs_bmap_add_extent_hole_delay. Totally garbage. Line 5 ("nowgot") is contents of @got after the xfs_bmap_add_extent_hole_delay call. Line 6 is the contents of @icur at the beginning fo the call. Lines 7-9 are the contents of the iext cursors at the point where the block mappings were sampled. I think @oldgot is a HOLESTARTBLOCK extent because the first lookup didn't find anything, so we filled in imap with "fake hole until the end". At the time of the first lookup, I suspect that there's only one 32-block unwritten extent in the mapping (hence oldicurpos==1) but by the time we get to recording crapgot, crapicurpos==2. Dave then added: Ok, that's much simpler to reason about, and implies the smoke is coming from xfs_buffered_write_iomap_begin() or xfs_bmapi_reserve_delalloc(). I suspect the former - it does a lot of stuff with the ILOCK_EXCL held..... .... including calling xfs_qm_dqattach_locked(). xfs_buffered_write_iomap_begin ILOCK_EXCL look up icur xfs_qm_dqattach_locked xfs_qm_dqattach_one xfs_qm_dqget_inode dquot cache miss xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); xfs_ilock(ip, XFS_ILOCK_EXCL); .... xfs_bmapi_reserve_delalloc(icur) Yup, that's what is letting the magic smoke out - xfs_qm_dqattach_locked() can cycle the ILOCK. If that happens, we can pass a stale icur to xfs_bmapi_reserve_delalloc() and it all goes downhill from there. Back to Darrick now: So. Fix this by moving the dqattach_locked call up before we take the ILOCK, like all the other callers in that file. Fixes: a526c85c2236 ("xfs: move xfs_file_iomap_begin_delay around") # goes further back than this Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1005f1e36545..68436370927d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -978,6 +978,10 @@ xfs_buffered_write_iomap_begin( ASSERT(!XFS_IS_REALTIME_INODE(ip)); + error = xfs_qm_dqattach(ip); + if (error) + return error; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -1081,10 +1085,6 @@ xfs_buffered_write_iomap_begin( allocfork = XFS_COW_FORK; } - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - if (eof && offset + count > XFS_ISIZE(ip)) { /* * Determine the initial size of the preallocation. From 1eb52a6a71981b80f9acbd915acd6a05a5037196 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Wed, 30 Nov 2022 09:25:46 -0800 Subject: [PATCH 2943/4122] xfs: wait iclog complete before tearing down AIL Fix uaf in xfs_trans_ail_delete during xlog force shutdown. In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks") changed the order of running callbacks and wait for iclog completion to avoid unmount path untimely destroy AIL. But which seems not enough to ensue this, adding mdelay in `xfs_buf_item_unpin` can prove that. The reproduction is as follows. To ensure destroy AIL safely, we should wait all xlog ioend workers done and sync the AIL. ================================================================== BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0 Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43 CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G W 6.1.0-rc1-00002-gc28266863c4a #137 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: xfs-log/sda xlog_ioend_work Call Trace: dump_stack_lvl+0x4d/0x66 print_report+0x171/0x4a6 kasan_report+0xb3/0x130 xfs_trans_ail_delete+0x240/0x2a0 xfs_buf_item_done+0x7b/0xa0 xfs_buf_ioend+0x1e9/0x11f0 xfs_buf_item_unpin+0x4c8/0x860 xfs_trans_committed_bulk+0x4c2/0x7c0 xlog_cil_committed+0xab6/0xfb0 xlog_cil_process_committed+0x117/0x1e0 xlog_state_shutdown_callbacks+0x208/0x440 xlog_force_shutdown+0x1b3/0x3a0 xlog_ioend_work+0xef/0x1d0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 Allocated by task 9606: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x7a/0x90 __kmalloc+0x59/0x140 kmem_alloc+0xb2/0x2f0 xfs_trans_ail_init+0x20/0x320 xfs_log_mount+0x37e/0x690 xfs_mountfs+0xe36/0x1b40 xfs_fs_fill_super+0xc5c/0x1a70 get_tree_bdev+0x3c5/0x6c0 vfs_get_tree+0x85/0x250 path_mount+0xec3/0x1830 do_mount+0xef/0x110 __x64_sys_mount+0x150/0x1f0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 9662: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 __kasan_slab_free+0x105/0x1a0 __kmem_cache_free+0x99/0x2d0 kvfree+0x3a/0x40 xfs_log_unmount+0x60/0xf0 xfs_unmountfs+0xf3/0x1d0 xfs_fs_put_super+0x78/0x300 generic_shutdown_super+0x151/0x400 kill_block_super+0x9a/0xe0 deactivate_locked_super+0x82/0xe0 deactivate_super+0x91/0xb0 cleanup_mnt+0x32a/0x4a0 task_work_run+0x15f/0x240 exit_to_user_mode_prepare+0x188/0x190 syscall_exit_to_user_mode+0x12/0x30 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff888023169400 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffff888023169400, ffff888023169480) The buggy address belongs to the physical page: page:ffffea00008c5a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888023168f80 pfn:0x23168 head:ffffea00008c5a00 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) raw: 001fffff80010200 ffffea00006b3988 ffffea0000577a88 ffff88800f842ac0 raw: ffff888023168f80 0000000000150007 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888023169300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888023169380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888023169400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888023169480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888023169500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint Fixes: cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks") Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0141d9907d31..fc61cc024023 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -888,6 +888,23 @@ xlog_force_iclog( return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); } +/* + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. + */ +static void +xlog_wait_iclog_completion(struct xlog *log) +{ + int i; + struct xlog_in_core *iclog = log->l_iclog; + + for (i = 0; i < log->l_iclog_bufs; i++) { + down(&iclog->ic_sema); + up(&iclog->ic_sema); + iclog = iclog->ic_next; + } +} + /* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions @@ -1113,6 +1130,14 @@ xfs_log_unmount( { xfs_log_clean(mp); + /* + * If shutdown has come from iclog IO context, the log + * cleaning will have been skipped and so we need to wait + * for the iclog to complete shutdown processing before we + * tear anything down. + */ + xlog_wait_iclog_completion(mp->m_log); + xfs_buftarg_drain(mp->m_ddev_targp); xfs_trans_ail_destroy(mp); @@ -2115,17 +2140,6 @@ xlog_dealloc_log( xlog_in_core_t *iclog, *next_iclog; int i; - /* - * Cycle all the iclogbuf locks to make sure all log IO completion - * is done before we tear down these buffers. - */ - iclog = log->l_iclog; - for (i = 0; i < log->l_iclog_bufs; i++) { - down(&iclog->ic_sema); - up(&iclog->ic_sema); - iclog = iclog->ic_next; - } - /* * Destroy the CIL after waiting for iclog IO completion because an * iclog EIO error will try to shut down the log, which accesses the From 575689fc0ffa6c4bb4e72fd18e31a6525a6124e0 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Wed, 30 Nov 2022 09:25:46 -0800 Subject: [PATCH 2944/4122] xfs: fix super block buf log item UAF during force shutdown xfs log io error will trigger xlog shut down, and end_io worker call xlog_state_shutdown_callbacks to unpin and release the buf log item. The race condition is that when there are some thread doing transaction commit and happened not to be intercepted by xlog_is_shutdown, then, these log item will be insert into CIL, when unpin and release these buf log item, UAF will occur. BTW, add delay before `xlog_cil_commit` can increase recurrence probability. The following call graph actually encountered this bad situation. fsstress io end worker kworker/0:1H-216 xlog_ioend_work ->xlog_force_shutdown ->xlog_state_shutdown_callbacks ->xlog_cil_process_committed ->xlog_cil_committed ->xfs_trans_committed_bulk ->xfs_trans_apply_sb_deltas ->li_ops->iop_unpin(lip, 1); ->xfs_trans_getsb ->_xfs_trans_bjoin ->xfs_buf_item_init ->if (bip) { return 0;} //relog ->xlog_cil_commit ->xlog_cil_insert_items //insert into CIL ->xfs_buf_ioend_fail(bp); ->xfs_buf_ioend ->xfs_buf_item_done ->xfs_buf_item_relse ->xfs_buf_item_free when cil push worker gather percpu cil and insert super block buf log item into ctx->log_items then uaf occurs. ================================================================== BUG: KASAN: use-after-free in xlog_cil_push_work+0x1c8f/0x22f0 Write of size 8 at addr ffff88801800f3f0 by task kworker/u4:4/105 CPU: 0 PID: 105 Comm: kworker/u4:4 Tainted: G W 6.1.0-rc1-00001-g274115149b42 #136 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: xfs-cil/sda xlog_cil_push_work Call Trace: dump_stack_lvl+0x4d/0x66 print_report+0x171/0x4a6 kasan_report+0xb3/0x130 xlog_cil_push_work+0x1c8f/0x22f0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 Allocated by task 2145: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_slab_alloc+0x54/0x60 kmem_cache_alloc+0x14a/0x510 xfs_buf_item_init+0x160/0x6d0 _xfs_trans_bjoin+0x7f/0x2e0 xfs_trans_getsb+0xb6/0x3f0 xfs_trans_apply_sb_deltas+0x1f/0x8c0 __xfs_trans_commit+0xa25/0xe10 xfs_symlink+0xe23/0x1660 xfs_vn_symlink+0x157/0x280 vfs_symlink+0x491/0x790 do_symlinkat+0x128/0x220 __x64_sys_symlink+0x7a/0x90 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 216: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 __kasan_slab_free+0x105/0x1a0 kmem_cache_free+0xb6/0x460 xfs_buf_ioend+0x1e9/0x11f0 xfs_buf_item_unpin+0x3d6/0x840 xfs_trans_committed_bulk+0x4c2/0x7c0 xlog_cil_committed+0xab6/0xfb0 xlog_cil_process_committed+0x117/0x1e0 xlog_state_shutdown_callbacks+0x208/0x440 xlog_force_shutdown+0x1b3/0x3a0 xlog_ioend_work+0xef/0x1d0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff88801800f388 which belongs to the cache xfs_buf_item of size 272 The buggy address is located 104 bytes inside of 272-byte region [ffff88801800f388, ffff88801800f498) The buggy address belongs to the physical page: page:ffffea0000600380 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88801800f208 pfn:0x1800e head:ffffea0000600380 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) raw: 001fffff80010200 ffffea0000699788 ffff88801319db50 ffff88800fb50640 raw: ffff88801800f208 000000000015000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88801800f280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801800f300: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88801800f380: fc fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801800f400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801800f480: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf_item.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 522d450a94b1..df7322ed73fa 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1018,6 +1018,8 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + if (atomic_read(&bip->bli_refcount)) + return; bp->b_log_item = NULL; xfs_buf_rele(bp); xfs_buf_item_free(bip); From 214b0a88c46d5f32d80abe0d1bc2eea1cbd38f11 Mon Sep 17 00:00:00 2001 From: Metin Kaya Date: Mon, 21 Mar 2022 11:05:32 +0000 Subject: [PATCH 2945/4122] KVM: x86/xen: add support for 32-bit guests in SCHEDOP_poll This patch introduces compat version of struct sched_poll for SCHEDOP_poll sub-operation of sched_op hypercall, reads correct amount of data (16 bytes in 32-bit case, 24 bytes otherwise) by using new compat_sched_poll struct, copies it to sched_poll properly, and lets rest of the code run as is. Signed-off-by: Metin Kaya Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- arch/x86/kvm/xen.c | 33 +++++++++++++++++++++++++++++---- arch/x86/kvm/xen.h | 7 +++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9187d024d006..3e434dc339fb 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1201,20 +1201,45 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, evtchn_port_t port, *ports; gpa_t gpa; - if (!longmode || !lapic_in_kernel(vcpu) || + if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; idx = srcu_read_lock(&vcpu->kvm->srcu); gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); srcu_read_unlock(&vcpu->kvm->srcu, idx); - - if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, - sizeof(sched_poll))) { + if (!gpa) { *r = -EFAULT; return true; } + if (IS_ENABLED(CONFIG_64BIT) && !longmode) { + struct compat_sched_poll sp32; + + /* Sanity check that the compat struct definition is correct */ + BUILD_BUG_ON(sizeof(sp32) != 16); + + if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) { + *r = -EFAULT; + return true; + } + + /* + * This is a 32-bit pointer to an array of evtchn_port_t which + * are uint32_t, so once it's converted no further compat + * handling is needed. + */ + sched_poll.ports = (void *)(unsigned long)(sp32.ports); + sched_poll.nr_ports = sp32.nr_ports; + sched_poll.timeout = sp32.timeout; + } else { + if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, + sizeof(sched_poll))) { + *r = -EFAULT; + return true; + } + } + if (unlikely(sched_poll.nr_ports > 1)) { /* Xen (unofficially) limits number of pollers to 128 */ if (sched_poll.nr_ports > 128) { diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 8503d2c6891e..ea33d80a0c51 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -207,4 +207,11 @@ struct compat_vcpu_runstate_info { uint64_t time[4]; } __attribute__((packed)); +struct compat_sched_poll { + /* This is actually a guest virtual address which points to ports. */ + uint32_t ports; + unsigned int nr_ports; + uint64_t timeout; +}; + #endif /* __ARCH_X86_KVM_XEN_H__ */ From 8c82a0b3ba1a411b84af5d43a4cc5994efa897ec Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:24 +0000 Subject: [PATCH 2946/4122] KVM: Store immutable gfn_to_pfn_cache properties Move the assignment of immutable properties @kvm, @vcpu, and @usage to the initializer. Make _activate() and _deactivate() use stored values. Note, @len is also effectively immutable for most cases, but not in the case of the Xen runstate cache, which may be split across two pages and the length of the first segment will depend on its address. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj [sean: handle @len in a separate patch] Signed-off-by: Sean Christopherson [dwmw2: acknowledge that @len can actually change for some use cases] Signed-off-by: David Woodhouse --- arch/x86/kvm/x86.c | 14 ++++----- arch/x86/kvm/xen.c | 65 ++++++++++++++++++--------------------- include/linux/kvm_host.h | 37 +++++++++++----------- include/linux/kvm_types.h | 1 + virt/kvm/pfncache.c | 22 ++++++++----- 5 files changed, 69 insertions(+), 70 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7f850dfb4086..b5e7aea22110 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2317,13 +2317,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ - if (system_time & 1) { - kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu, - KVM_HOST_USES_PFN, system_time & ~1ULL, + if (system_time & 1) + kvm_gpc_activate(&vcpu->arch.pv_time, system_time & ~1ULL, sizeof(struct pvclock_vcpu_time_info)); - } else { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); - } + else + kvm_gpc_deactivate(&vcpu->arch.pv_time); return; } @@ -3391,7 +3389,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); + kvm_gpc_deactivate(&vcpu->arch.pv_time); vcpu->arch.time = 0; } @@ -11542,7 +11540,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - kvm_gpc_init(&vcpu->arch.pv_time); + kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN); if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 3e434dc339fb..55257c2a1610 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -42,13 +42,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) int idx = srcu_read_lock(&kvm->srcu); if (gfn == GPA_INVALID) { - kvm_gpc_deactivate(kvm, gpc); + kvm_gpc_deactivate(gpc); goto out; } do { - ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa, - PAGE_SIZE); + ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE); if (ret) goto out; @@ -323,8 +322,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * to the second page now because the guest changed to * 64-bit mode, the second GPC won't have been set up. */ - if (kvm_gpc_activate(v->kvm, gpc2, NULL, KVM_HOST_USES_PFN, - gpc1->gpa + user_len1, user_len2)) + if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, + user_len2)) return; /* @@ -711,15 +710,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) offsetof(struct compat_vcpu_info, time)); if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_info_cache, NULL, - KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_info)); + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, + data->u.gpa, sizeof(struct vcpu_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -727,15 +724,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, + data->u.gpa, sizeof(struct pvclock_vcpu_time_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -751,10 +746,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (data->u.gpa == GPA_INVALID) { r = 0; deactivate_out: - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.runstate_cache); - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.runstate2_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); break; } @@ -770,20 +763,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) /* How much fits in the (first) page? */ sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, sz1); + r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, + data->u.gpa, sz1); if (r) goto deactivate_out; /* Either map the second page, or deactivate the second GPC */ if (sz1 >= sz) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.runstate2_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); } else { sz2 = sz - sz1; BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache, - NULL, KVM_HOST_USES_PFN, + r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, data->u.gpa + sz1, sz2); if (r) goto deactivate_out; @@ -2051,10 +2042,14 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); - kvm_gpc_init(&vcpu->arch.xen.runstate_cache); - kvm_gpc_init(&vcpu->arch.xen.runstate2_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) @@ -2062,10 +2057,10 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) if (kvm_xen_timer_enabled(vcpu)) kvm_xen_stop_timer(vcpu); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); del_timer_sync(&vcpu->arch.xen.poll_timer); } @@ -2073,7 +2068,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { idr_init(&kvm->arch.xen.evtchn_ports); - kvm_gpc_init(&kvm->arch.xen.shinfo_cache); + kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } void kvm_xen_destroy_vm(struct kvm *kvm) @@ -2081,7 +2076,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm) struct evtchnfd *evtchnfd; int i; - kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache); + kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { if (!evtchnfd->deliver.port.port) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f874a964313..73ded328f9dc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1260,18 +1260,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * kvm_gpc_init - initialize gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. - * - * This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must - * be zero-allocated (or zeroed by the caller before init). - */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); - -/** - * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest - * physical address. - * * @kvm: pointer to kvm instance. - * @gpc: struct gfn_to_pfn_cache object. * @vcpu: vCPU to be used for marking pages dirty and to be woken on * invalidation. * @usage: indicates if the resulting host physical PFN is used while @@ -1280,20 +1269,31 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); * changes!---will also force @vcpu to exit the guest and * refresh the cache); and/or if the PFN used directly * by KVM (and thus needs a kernel virtual mapping). + * + * This sets up a gfn_to_pfn_cache by initializing locks and assigning the + * immutable attributes. Note, the cache must be zero-allocated (or zeroed by + * the caller before init). + */ +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); + +/** + * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest + * physical address. + * + * @gpc: struct gfn_to_pfn_cache object. * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. - * -EFAULT for an untranslatable guest physical address. + * -EFAULT for an untranslatable guest physical address. * - * This primes a gfn_to_pfn_cache and links it into the @kvm's list for + * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for * invalidations to be processed. Callers are required to use kvm_gpc_check() * to ensure that the cache is valid before accessing the target page. */ -int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, - gpa_t gpa, unsigned long len); +int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. @@ -1352,13 +1352,12 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * - * This removes a cache from the @kvm's list to be processed on MMU notifier + * This removes a cache from the VM's list to be processed on MMU notifier * invocation. */ -void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); +void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 3ca3db020e0e..76de36e56cdf 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -67,6 +67,7 @@ struct gfn_to_pfn_cache { gpa_t gpa; unsigned long uhva; struct kvm_memory_slot *memslot; + struct kvm *kvm; struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index b4295474519f..d8ce30b893d9 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -362,25 +362,29 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) } EXPORT_SYMBOL_GPL(kvm_gpc_unmap); -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc) +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage) { + WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage); + WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu); + rwlock_init(&gpc->lock); mutex_init(&gpc->refresh_lock); + + gpc->kvm = kvm; + gpc->vcpu = vcpu; + gpc->usage = usage; } EXPORT_SYMBOL_GPL(kvm_gpc_init); -int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, - gpa_t gpa, unsigned long len) +int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) { - WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage); + struct kvm *kvm = gpc->kvm; if (!gpc->active) { gpc->khva = NULL; gpc->pfn = KVM_PFN_ERR_FAULT; gpc->uhva = KVM_HVA_ERR_BAD; - gpc->vcpu = vcpu; - gpc->usage = usage; gpc->valid = false; spin_lock(&kvm->gpc_lock); @@ -400,8 +404,10 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, } EXPORT_SYMBOL_GPL(kvm_gpc_activate); -void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) +void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) { + struct kvm *kvm = gpc->kvm; + if (gpc->active) { /* * Deactivate the cache before removing it from the list, KVM From e308c24a358d1e79951b16c387cbc6c6593639a5 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:26 +0000 Subject: [PATCH 2947/4122] KVM: Use gfn_to_pfn_cache's immutable "kvm" in kvm_gpc_check() Make kvm_gpc_check() use kvm instance cached in gfn_to_pfn_cache. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- arch/x86/kvm/x86.c | 2 +- arch/x86/kvm/xen.c | 16 +++++++--------- include/linux/kvm_host.h | 4 +--- virt/kvm/pfncache.c | 5 ++--- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5e7aea22110..441f08c3af96 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3035,7 +3035,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, unsigned long flags; read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, + while (!kvm_gpc_check(gpc, gpc->gpa, offset + sizeof(*guest_hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 55257c2a1610..148319e980c4 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -272,7 +272,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * gfn_to_pfn caches that cover the region. */ read_lock_irqsave(&gpc1->lock, flags); - while (!kvm_gpc_check(v->kvm, gpc1, gpc1->gpa, user_len1)) { + while (!kvm_gpc_check(gpc1, gpc1->gpa, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ @@ -308,7 +308,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) */ read_lock(&gpc2->lock); - if (!kvm_gpc_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { + if (!kvm_gpc_check(gpc2, gpc2->gpa, user_len2)) { read_unlock(&gpc2->lock); read_unlock_irqrestore(&gpc1->lock, flags); @@ -488,8 +488,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, @@ -553,8 +552,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* @@ -1158,7 +1156,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); - if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE)) goto out_rcu; ret = false; @@ -1580,7 +1578,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { @@ -1614,7 +1612,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gpc_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73ded328f9dc..befc8114ed0d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1298,7 +1298,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. @@ -1313,8 +1312,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len); +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index d8ce30b893d9..decf4fdde668 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -76,10 +76,9 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, } } -bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len) +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_memslots(gpc->kvm); if (!gpc->active) return false; From 2a0b128a906ab28b1ab41ceedcaf462b6f74f1aa Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:27 +0000 Subject: [PATCH 2948/4122] KVM: Clean up hva_to_pfn_retry() Make hva_to_pfn_retry() use kvm instance cached in gfn_to_pfn_cache. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- virt/kvm/pfncache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index decf4fdde668..9d506de6c150 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -138,7 +138,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s return kvm->mmu_invalidate_seq != mmu_seq; } -static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) +static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) { /* Note, the new page offset may be different than the old! */ void *old_khva = gpc->khva - offset_in_page(gpc->khva); @@ -158,7 +158,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc->valid = false; do { - mmu_seq = kvm->mmu_invalidate_seq; + mmu_seq = gpc->kvm->mmu_invalidate_seq; smp_rmb(); write_unlock_irq(&gpc->lock); @@ -216,7 +216,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) * attempting to refresh. */ WARN_ON_ONCE(gpc->valid); - } while (mmu_notifier_retry_cache(kvm, mmu_seq)); + } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); gpc->valid = true; gpc->pfn = new_pfn; @@ -294,7 +294,7 @@ int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, * drop the lock and do the HVA to PFN lookup again. */ if (!gpc->valid || old_uhva != gpc->uhva) { - ret = hva_to_pfn_retry(kvm, gpc); + ret = hva_to_pfn_retry(gpc); } else { /* * If the HVA→PFN mapping was already valid, don't unmap it. From 0318f207d1c2e297d1ec1c6e145bb8bd053236f9 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:28 +0000 Subject: [PATCH 2949/4122] KVM: Use gfn_to_pfn_cache's immutable "kvm" in kvm_gpc_refresh() Make kvm_gpc_refresh() use kvm instance cached in gfn_to_pfn_cache. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj [sean: leave kvm_gpc_unmap() as-is] Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- arch/x86/kvm/x86.c | 2 +- arch/x86/kvm/xen.c | 10 ++++------ include/linux/kvm_host.h | 10 ++++------ virt/kvm/pfncache.c | 7 +++---- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 441f08c3af96..490df3e997fa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3039,7 +3039,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, offset + sizeof(*guest_hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, + if (kvm_gpc_refresh(gpc, gpc->gpa, offset + sizeof(*guest_hv_clock))) return; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 148319e980c4..f50c88b1eaab 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -279,7 +279,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) if (atomic) return; - if (kvm_gpc_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) + if (kvm_gpc_refresh(gpc1, gpc1->gpa, user_len1)) return; read_lock_irqsave(&gpc1->lock, flags); @@ -491,8 +491,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) + if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); @@ -566,8 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (in_atomic() || !task_is_running(current)) return 1; - if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. @@ -1710,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gpc_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); + rc = kvm_gpc_refresh(gpc, gpc->gpa, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index befc8114ed0d..3ce4650776b8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1317,23 +1317,21 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * + * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. - * -EFAULT for an untranslatable guest physical address. + * -EFAULT for an untranslatable guest physical address. * * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful - * returm from this function does not mean the page can be immediately + * return from this function does not mean the page can be immediately * accessed because it may have raced with an invalidation. Callers must * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len); +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 9d506de6c150..015c5d16948a 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -237,10 +237,9 @@ out_error: return -EFAULT; } -int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len) +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_memslots(gpc->kvm); unsigned long page_offset = gpa & ~PAGE_MASK; bool unmap_old = false; unsigned long old_uhva; @@ -399,7 +398,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) gpc->active = true; write_unlock_irq(&gpc->lock); } - return kvm_gpc_refresh(kvm, gpc, gpa, len); + return kvm_gpc_refresh(gpc, gpa, len); } EXPORT_SYMBOL_GPL(kvm_gpc_activate); From 9f87791d686d85614584438d4f249eb32ef7964c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:29 +0000 Subject: [PATCH 2950/4122] KVM: Drop KVM's API to allow temporarily unmapping gfn=>pfn cache Drop kvm_gpc_unmap() as it has no users and unclear requirements. The API was added as part of the original gfn_to_pfn_cache support, but its sole usage[*] was never merged. Fold the guts of kvm_gpc_unmap() into the deactivate path and drop the API. Omit acquiring refresh_lock as as concurrent calls to kvm_gpc_deactivate() are not allowed (this is not enforced, e.g. via lockdep. due to it being called during vCPU destruction). If/when temporary unmapping makes a comeback, the desirable behavior is likely to restrict temporary unmapping to vCPU-exclusive mappings and require the vcpu->mutex be held to serialize unmap. Use of the refresh_lock to protect unmapping was somewhat specuatively added by commit 93984f19e7bc ("KVM: Fully serialize gfn=>pfn cache refresh via mutex") to guard against concurrent unmaps, but the primary use case of the temporary unmap, nested virtualization[*], doesn't actually need or want concurrent unmaps. [*] https://lore.kernel.org/all/20211210163625.2886-7-dwmw2@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 12 ----------- virt/kvm/pfncache.c | 44 +++++++++++++++------------------------- 2 files changed, 16 insertions(+), 40 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3ce4650776b8..eac76965cf44 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1333,18 +1333,6 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); */ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); -/** - * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. - * - * @kvm: pointer to kvm instance. - * @gpc: struct gfn_to_pfn_cache object. - * - * This unmaps the referenced page. The cache is left in the invalid state - * but at least the mapping from GPA to userspace HVA will remain cached - * and can be reused on a subsequent refresh. - */ -void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); - /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 015c5d16948a..5b2512793691 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -333,33 +333,6 @@ out_unlock: } EXPORT_SYMBOL_GPL(kvm_gpc_refresh); -void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) -{ - void *old_khva; - kvm_pfn_t old_pfn; - - mutex_lock(&gpc->refresh_lock); - write_lock_irq(&gpc->lock); - - gpc->valid = false; - - old_khva = gpc->khva - offset_in_page(gpc->khva); - old_pfn = gpc->pfn; - - /* - * We can leave the GPA → uHVA map cache intact but the PFN - * lookup will need to be redone even for the same page. - */ - gpc->khva = NULL; - gpc->pfn = KVM_PFN_ERR_FAULT; - - write_unlock_irq(&gpc->lock); - mutex_unlock(&gpc->refresh_lock); - - gpc_unmap_khva(old_pfn, old_khva); -} -EXPORT_SYMBOL_GPL(kvm_gpc_unmap); - void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage) { @@ -405,6 +378,8 @@ EXPORT_SYMBOL_GPL(kvm_gpc_activate); void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) { struct kvm *kvm = gpc->kvm; + kvm_pfn_t old_pfn; + void *old_khva; if (gpc->active) { /* @@ -414,13 +389,26 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) */ write_lock_irq(&gpc->lock); gpc->active = false; + gpc->valid = false; + + /* + * Leave the GPA => uHVA cache intact, it's protected by the + * memslot generation. The PFN lookup needs to be redone every + * time as mmu_notifier protection is lost when the cache is + * removed from the VM's gpc_list. + */ + old_khva = gpc->khva - offset_in_page(gpc->khva); + gpc->khva = NULL; + + old_pfn = gpc->pfn; + gpc->pfn = KVM_PFN_ERR_FAULT; write_unlock_irq(&gpc->lock); spin_lock(&kvm->gpc_lock); list_del(&gpc->list); spin_unlock(&kvm->gpc_lock); - kvm_gpc_unmap(kvm, gpc); + gpc_unmap_khva(old_pfn, old_khva); } } EXPORT_SYMBOL_GPL(kvm_gpc_deactivate); From 5762cb10235776dd1ed5f5f9d6c1aff2b73bec5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:30 +0000 Subject: [PATCH 2951/4122] KVM: Do not partially reinitialize gfn=>pfn cache during activation Don't partially reinitialize a gfn=>pfn cache when activating the cache, and instead assert that the cache is not valid during activation. Bug the VM if the assertion fails, as use-after-free and/or data corruption is all but guaranteed if KVM ends up with a valid-but-inactive cache. Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- virt/kvm/pfncache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 5b2512793691..c1a772cedc4b 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -345,6 +345,8 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, gpc->kvm = kvm; gpc->vcpu = vcpu; gpc->usage = usage; + gpc->pfn = KVM_PFN_ERR_FAULT; + gpc->uhva = KVM_HVA_ERR_BAD; } EXPORT_SYMBOL_GPL(kvm_gpc_init); @@ -353,10 +355,8 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) struct kvm *kvm = gpc->kvm; if (!gpc->active) { - gpc->khva = NULL; - gpc->pfn = KVM_PFN_ERR_FAULT; - gpc->uhva = KVM_HVA_ERR_BAD; - gpc->valid = false; + if (KVM_BUG_ON(gpc->valid, kvm)) + return -EIO; spin_lock(&kvm->gpc_lock); list_add(&gpc->list, &kvm->gpc_list); From 58f5ee5fedd981e05cb086cba4e8f923c3727a04 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:31 +0000 Subject: [PATCH 2952/4122] KVM: Drop @gpa from exported gfn=>pfn cache check() and refresh() helpers Drop the @gpa param from the exported check()+refresh() helpers and limit changing the cache's GPA to the activate path. All external users just feed in gpc->gpa, i.e. this is a fancy nop. Allowing users to change the GPA at check()+refresh() is dangerous as those helpers explicitly allow concurrent calls, e.g. KVM could get into a livelock scenario. It's also unclear as to what the expected behavior should be if multiple tasks attempt to refresh with different GPAs. Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- arch/x86/kvm/x86.c | 6 ++---- arch/x86/kvm/xen.c | 22 +++++++++++----------- include/linux/kvm_host.h | 8 +++----- virt/kvm/pfncache.c | 17 +++++++++++------ 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 490df3e997fa..006b445996a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3035,12 +3035,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, unsigned long flags; read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) { + while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gpc_refresh(gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) + if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock))) return; read_lock_irqsave(&gpc->lock, flags); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index f50c88b1eaab..5208e05ca9a6 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -272,14 +272,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * gfn_to_pfn caches that cover the region. */ read_lock_irqsave(&gpc1->lock, flags); - while (!kvm_gpc_check(gpc1, gpc1->gpa, user_len1)) { + while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; - if (kvm_gpc_refresh(gpc1, gpc1->gpa, user_len1)) + if (kvm_gpc_refresh(gpc1, user_len1)) return; read_lock_irqsave(&gpc1->lock, flags); @@ -308,7 +308,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) */ read_lock(&gpc2->lock); - if (!kvm_gpc_check(gpc2, gpc2->gpa, user_len2)) { + if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); read_unlock_irqrestore(&gpc1->lock, flags); @@ -488,10 +488,10 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); @@ -551,7 +551,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* @@ -565,7 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (in_atomic() || !task_is_running(current)) return 1; - if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. @@ -1154,7 +1154,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); - if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; ret = false; @@ -1576,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { @@ -1610,7 +1610,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. @@ -1708,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gpc_refresh(gpc, gpc->gpa, PAGE_SIZE); + rc = kvm_gpc_refresh(gpc, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eac76965cf44..7008846fd3dd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1299,7 +1299,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. - * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: %true if the cache is still valid and the address matches. @@ -1312,15 +1311,14 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * * @gpc: struct gfn_to_pfn_cache object. - * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - + * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. @@ -1331,7 +1329,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index c1a772cedc4b..a805cc1544bf 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -76,18 +76,17 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, } } -bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); if (!gpc->active) return false; - if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE) + if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE) return false; - if (gpc->gpa != gpa || gpc->generation != slots->generation || - kvm_is_error_hva(gpc->uhva)) + if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva)) return false; if (!gpc->valid) @@ -237,7 +236,8 @@ out_error: return -EFAULT; } -int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) +static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len) { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); unsigned long page_offset = gpa & ~PAGE_MASK; @@ -331,6 +331,11 @@ out_unlock: return ret; } + +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len) +{ + return __kvm_gpc_refresh(gpc, gpc->gpa, len); +} EXPORT_SYMBOL_GPL(kvm_gpc_refresh); void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, @@ -371,7 +376,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) gpc->active = true; write_unlock_irq(&gpc->lock); } - return kvm_gpc_refresh(gpc, gpa, len); + return __kvm_gpc_refresh(gpc, gpa, len); } EXPORT_SYMBOL_GPL(kvm_gpc_activate); From 06e155c44aa0e7921aa44d3c67f8ea464b16cb75 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:32 +0000 Subject: [PATCH 2953/4122] KVM: Skip unnecessary "unmap" if gpc is already valid during refresh When refreshing a gfn=>pfn cache, skip straight to unlocking if the cache already valid instead of stuffing the "old" variables to turn the unmapping outro into a nop. Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- virt/kvm/pfncache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index a805cc1544bf..2d6aba677830 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -301,9 +301,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, * may have changed. */ gpc->khva = old_khva + page_offset; - old_pfn = KVM_PFN_ERR_FAULT; - old_khva = NULL; ret = 0; + goto out_unlock; } out: From 2ea3498980f5e6f3001f2984b0b92736bf1b78cb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:32 +0000 Subject: [PATCH 2954/4122] mm/damon/core: split out DAMOS-charged region skip logic into a new function Patch series "mm/damon: cleanup and refactoring code", v2. This patchset cleans up and refactors a range of DAMON code including the core, DAMON sysfs interface, and DAMON modules, for better readability and convenient future feature implementations. In detail, this patchset splits unnecessarily long and complex functions in core into smaller functions (patches 1-4). Then, it cleans up the DAMON sysfs interface by using more type-safe code (patch 5) and removing unnecessary function parameters (patch 6). Further, it refactor the code by distributing the code into multiple files (patches 7-10). Last two patches (patches 11 and 12) deduplicates and remove unnecessary header inclusion in DAMON modules (reclaim and lru_sort). This patch (of 12): The DAMOS action applying function, 'damon_do_apply_schemes()', is quite long and not so simple. Split out the already quota-charged region skip code, which is not a small amount of simple code, into a new function with some comments for better readability. Link: https://lkml.kernel.org/r/20221026225943.100429-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221026225943.100429-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 96 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 31 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 36d098d06c55..06b50ede9cc6 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -694,6 +694,67 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; } +/* + * damos_skip_charged_region() - Check if the given region or starting part of + * it is already charged for the DAMOS quota. + * @t: The target of the region. + * @rp: The pointer to the region. + * @s: The scheme to be applied. + * + * If a quota of a scheme has exceeded in a quota charge window, the scheme's + * action would applied to only a part of the target access pattern fulfilling + * regions. To avoid applying the scheme action to only already applied + * regions, DAMON skips applying the scheme action to the regions that charged + * in the previous charge window. + * + * This function checks if a given region should be skipped or not for the + * reason. If only the starting part of the region has previously charged, + * this function splits the region into two so that the second one covers the + * area that not charged in the previous charge widnow and saves the second + * region in *rp and returns false, so that the caller can apply DAMON action + * to the second one. + * + * Return: true if the region should be entirely skipped, false otherwise. + */ +static bool damos_skip_charged_region(struct damon_target *t, + struct damon_region **rp, struct damos *s) +{ + struct damon_region *r = *rp; + struct damos_quota *quota = &s->quota; + unsigned long sz_to_skip; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + return true; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + return true; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + return true; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz_to_skip) { + if (damon_sz_region(r) <= DAMON_MIN_REGION) + return true; + sz_to_skip = DAMON_MIN_REGION; + } + damon_split_region_at(t, r, sz_to_skip); + r = damon_next_region(r); + *rp = r; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + return false; +} + static void damon_do_apply_schemes(struct damon_ctx *c, struct damon_target *t, struct damon_region *r) @@ -702,7 +763,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; - unsigned long sz = damon_sz_region(r); + unsigned long sz; struct timespec64 begin, end; unsigned long sz_applied = 0; @@ -713,41 +774,14 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (quota->esz && quota->charged_sz >= quota->esz) continue; - /* Skip previously charged regions */ - if (quota->charge_target_from) { - if (t != quota->charge_target_from) - continue; - if (r == damon_last_region(t)) { - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - continue; - } - if (quota->charge_addr_from && - r->ar.end <= quota->charge_addr_from) - continue; - - if (quota->charge_addr_from && r->ar.start < - quota->charge_addr_from) { - sz = ALIGN_DOWN(quota->charge_addr_from - - r->ar.start, DAMON_MIN_REGION); - if (!sz) { - if (damon_sz_region(r) <= - DAMON_MIN_REGION) - continue; - sz = DAMON_MIN_REGION; - } - damon_split_region_at(t, r, sz); - r = damon_next_region(r); - sz = damon_sz_region(r); - } - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - } + if (damos_skip_charged_region(t, &r, s)) + continue; if (!damos_valid_target(c, t, r, s)) continue; /* Apply the scheme */ + sz = damon_sz_region(r); if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { From e63a30c51f8400915db401c05d3c4db6743857e8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:33 +0000 Subject: [PATCH 2955/4122] mm/damon/core: split damos application logic into a new function The DAMOS action applying function, 'damon_do_apply_schemes()', is still long and not easy to read. Split out the code for applying a single action to a single region into a new function for better readability. Link: https://lkml.kernel.org/r/20221026225943.100429-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 73 ++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 06b50ede9cc6..c1a912bc46ae 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -755,6 +755,44 @@ static bool damos_skip_charged_region(struct damon_target *t, return false; } +static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + unsigned long sz = damon_sz_region(r); + struct timespec64 begin, end; + unsigned long sz_applied = 0; + + if (c->ops.apply_scheme) { + if (quota->esz && quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(t, r, sz); + } + ktime_get_coarse_ts64(&begin); + sz_applied = c->ops.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + s->stat.nr_tried++; + s->stat.sz_tried += sz; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; +} + static void damon_do_apply_schemes(struct damon_ctx *c, struct damon_target *t, struct damon_region *r) @@ -763,9 +801,6 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; - unsigned long sz; - struct timespec64 begin, end; - unsigned long sz_applied = 0; if (!s->wmarks.activated) continue; @@ -780,37 +815,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (!damos_valid_target(c, t, r, s)) continue; - /* Apply the scheme */ - sz = damon_sz_region(r); - if (c->ops.apply_scheme) { - if (quota->esz && - quota->charged_sz + sz > quota->esz) { - sz = ALIGN_DOWN(quota->esz - quota->charged_sz, - DAMON_MIN_REGION); - if (!sz) - goto update_stat; - damon_split_region_at(t, r, sz); - } - ktime_get_coarse_ts64(&begin); - sz_applied = c->ops.apply_scheme(c, t, r, s); - ktime_get_coarse_ts64(&end); - quota->total_charged_ns += timespec64_to_ns(&end) - - timespec64_to_ns(&begin); - quota->charged_sz += sz; - if (quota->esz && quota->charged_sz >= quota->esz) { - quota->charge_target_from = t; - quota->charge_addr_from = r->ar.end + 1; - } - } - if (s->action != DAMOS_STAT) - r->age = 0; - -update_stat: - s->stat.nr_tried++; - s->stat.sz_tried += sz; - if (sz_applied) - s->stat.nr_applied++; - s->stat.sz_applied += sz_applied; + damos_apply_scheme(c, t, r, s); } } From d1cbbf621fc25950938be74a228ef518d05d93a1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:34 +0000 Subject: [PATCH 2956/4122] mm/damon/core: split out scheme stat update logic into a new function The function for applying a given DAMON scheme action to a given DAMON region, 'damos_apply_scheme()' is not quite short. Make it better to read by splitting out the stat update logic into a new function. Link: https://lkml.kernel.org/r/20221026225943.100429-4-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index c1a912bc46ae..3a810c6e26bc 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -755,6 +755,16 @@ static bool damos_skip_charged_region(struct damon_target *t, return false; } +static void damos_update_stat(struct damos *s, + unsigned long sz_tried, unsigned long sz_applied) +{ + s->stat.nr_tried++; + s->stat.sz_tried += sz_tried; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; +} + static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, struct damon_region *r, struct damos *s) { @@ -786,11 +796,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, r->age = 0; update_stat: - s->stat.nr_tried++; - s->stat.sz_tried += sz; - if (sz_applied) - s->stat.nr_applied++; - s->stat.sz_applied += sz_applied; + damos_update_stat(s, sz, sz_applied); } static void damon_do_apply_schemes(struct damon_ctx *c, From 898810e5ca54691f4e173f5ffc92bbce0335bc69 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:35 +0000 Subject: [PATCH 2957/4122] mm/damon/core: split out scheme quota adjustment logic into a new function DAMOS quota adjustment logic in 'kdamond_apply_schemes()', has some amount of code, and the logic is not so straightforward. Split it out to a new function for better readability. Link: https://lkml.kernel.org/r/20221026225943.100429-5-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 91 ++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 3a810c6e26bc..80d5937fe337 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -848,6 +848,53 @@ static void damos_set_effective_quota(struct damos_quota *quota) quota->esz = esz; } +static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + struct damon_target *t; + struct damon_region *r; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; + + if (!quota->ms && !quota->sz) + return; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies(quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + damos_set_effective_quota(quota); + } + + if (!c->ops.get_scheme_score) + return; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->ops.get_scheme_score(c, t, r, s); + quota->histogram[score] += damon_sz_region(r); + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; +} + static void kdamond_apply_schemes(struct damon_ctx *c) { struct damon_target *t; @@ -855,52 +902,10 @@ static void kdamond_apply_schemes(struct damon_ctx *c) struct damos *s; damon_for_each_scheme(s, c) { - struct damos_quota *quota = &s->quota; - unsigned long cumulated_sz; - unsigned int score, max_score = 0; - if (!s->wmarks.activated) continue; - if (!quota->ms && !quota->sz) - continue; - - /* New charge window starts */ - if (time_after_eq(jiffies, quota->charged_from + - msecs_to_jiffies( - quota->reset_interval))) { - if (quota->esz && quota->charged_sz >= quota->esz) - s->stat.qt_exceeds++; - quota->total_charged_sz += quota->charged_sz; - quota->charged_from = jiffies; - quota->charged_sz = 0; - damos_set_effective_quota(quota); - } - - if (!c->ops.get_scheme_score) - continue; - - /* Fill up the score histogram */ - memset(quota->histogram, 0, sizeof(quota->histogram)); - damon_for_each_target(t, c) { - damon_for_each_region(r, t) { - if (!__damos_valid_target(r, s)) - continue; - score = c->ops.get_scheme_score( - c, t, r, s); - quota->histogram[score] += damon_sz_region(r); - if (score > max_score) - max_score = score; - } - } - - /* Set the min score limit */ - for (cumulated_sz = 0, score = max_score; ; score--) { - cumulated_sz += quota->histogram[score]; - if (cumulated_sz >= quota->esz || !score) - break; - } - quota->min_score = score; + damos_adjust_quota(c, s); } damon_for_each_target(t, c) { From 789a230613c8dd14bdd41653de0c22783726276f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:36 +0000 Subject: [PATCH 2958/4122] mm/damon/sysfs: use damon_addr_range for region's start and end values DAMON has a struct for each address range but DAMON sysfs interface is using the low type (unsigned long) for storing the start and end addresses of regions. Use the dedicated struct for better type safety. Link: https://lkml.kernel.org/r/20221026225943.100429-6-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 07e5f1bdf025..a5ef503d8444 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1062,13 +1062,11 @@ static struct kobj_type damon_sysfs_schemes_ktype = { struct damon_sysfs_region { struct kobject kobj; - unsigned long start; - unsigned long end; + struct damon_addr_range ar; }; static struct damon_sysfs_region *damon_sysfs_region_alloc( - unsigned long start, - unsigned long end) + struct damon_addr_range ar) { struct damon_sysfs_region *region = kmalloc(sizeof(*region), GFP_KERNEL); @@ -1076,8 +1074,7 @@ static struct damon_sysfs_region *damon_sysfs_region_alloc( if (!region) return NULL; region->kobj = (struct kobject){}; - region->start = start; - region->end = end; + region->ar = ar; return region; } @@ -1087,7 +1084,7 @@ static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - return sysfs_emit(buf, "%lu\n", region->start); + return sysfs_emit(buf, "%lu\n", region->ar.start); } static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1095,7 +1092,7 @@ static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - int err = kstrtoul(buf, 0, ®ion->start); + int err = kstrtoul(buf, 0, ®ion->ar.start); return err ? err : count; } @@ -1106,7 +1103,7 @@ static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - return sysfs_emit(buf, "%lu\n", region->end); + return sysfs_emit(buf, "%lu\n", region->ar.end); } static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1114,7 +1111,7 @@ static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - int err = kstrtoul(buf, 0, ®ion->end); + int err = kstrtoul(buf, 0, ®ion->ar.end); return err ? err : count; } @@ -1187,7 +1184,7 @@ static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions, regions->regions_arr = regions_arr; for (i = 0; i < nr_regions; i++) { - region = damon_sysfs_region_alloc(0, 0); + region = damon_sysfs_region_alloc((struct damon_addr_range){}); if (!region) { damon_sysfs_regions_rm_dirs(regions); return -ENOMEM; @@ -2147,11 +2144,11 @@ static int damon_sysfs_set_regions(struct damon_target *t, struct damon_sysfs_region *sys_region = sysfs_regions->regions_arr[i]; - if (sys_region->start > sys_region->end) + if (sys_region->ar.start > sys_region->ar.end) goto out; - ranges[i].start = sys_region->start; - ranges[i].end = sys_region->end; + ranges[i].start = sys_region->ar.start; + ranges[i].end = sys_region->ar.end; if (i == 0) continue; if (ranges[i - 1].end > ranges[i].start) From 1f71981408ef5696ad8544f282d336d4fc60a807 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:37 +0000 Subject: [PATCH 2959/4122] mm/damon/sysfs: remove parameters of damon_sysfs_region_alloc() 'damon_sysfs_region_alloc()' is always called with zero-filled 'struct damon_addr_range', because the start and end addresses should set by users. Remove unnecessary parameters of the function and simplify the body by using 'kzalloc()'. Link: https://lkml.kernel.org/r/20221026225943.100429-7-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index a5ef503d8444..f3d7b34ea0ab 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1065,17 +1065,9 @@ struct damon_sysfs_region { struct damon_addr_range ar; }; -static struct damon_sysfs_region *damon_sysfs_region_alloc( - struct damon_addr_range ar) +static struct damon_sysfs_region *damon_sysfs_region_alloc(void) { - struct damon_sysfs_region *region = kmalloc(sizeof(*region), - GFP_KERNEL); - - if (!region) - return NULL; - region->kobj = (struct kobject){}; - region->ar = ar; - return region; + return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL); } static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1184,7 +1176,7 @@ static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions, regions->regions_arr = regions_arr; for (i = 0; i < nr_regions; i++) { - region = damon_sysfs_region_alloc((struct damon_addr_range){}); + region = damon_sysfs_region_alloc(); if (!region) { damon_sysfs_regions_rm_dirs(regions); return -ENOMEM; From 39240595917ec0c4f71d7b9dd7909790715968b5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:38 +0000 Subject: [PATCH 2960/4122] mm/damon/sysfs: move sysfs_lock to common module DAMON sysfs interface is implemented in a single file, sysfs.c, which has about 2,800 lines of code. As the interface is hierarchical and some of the code can be reused by different hierarchies, it would make more sense to split out the implementation into common parts and different parts in multiple files. As the beginning of the work, create files for common code and move the global mutex for directories modifications protection into the new file. Link: https://lkml.kernel.org/r/20221026225943.100429-8-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/Makefile | 2 +- mm/damon/sysfs-common.c | 11 +++++++++++ mm/damon/sysfs-common.h | 11 +++++++++++ mm/damon/sysfs.c | 4 +--- 4 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 mm/damon/sysfs-common.c create mode 100644 mm/damon/sysfs-common.h diff --git a/mm/damon/Makefile b/mm/damon/Makefile index 3e6b8ad73858..f8d535a6253b 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -3,7 +3,7 @@ obj-y := core.o obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o -obj-$(CONFIG_DAMON_SYSFS) += sysfs.o +obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o obj-$(CONFIG_DAMON_LRU_SORT) += lru_sort.o diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c new file mode 100644 index 000000000000..9dc743868d5b --- /dev/null +++ b/mm/damon/sysfs-common.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for DAMON Sysfs Interface + * + * Author: SeongJae Park + */ + +#include "sysfs-common.h" + +DEFINE_MUTEX(damon_sysfs_lock); + diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h new file mode 100644 index 000000000000..745a918b94f5 --- /dev/null +++ b/mm/damon/sysfs-common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for DAMON Sysfs Interface + * + * Author: SeongJae Park + */ + +#include +#include + +extern struct mutex damon_sysfs_lock; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index f3d7b34ea0ab..a847b9159718 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -5,13 +5,11 @@ * Copyright (c) 2022 SeongJae Park */ -#include -#include #include #include #include -static DEFINE_MUTEX(damon_sysfs_lock); +#include "sysfs-common.h" /* * unsigned long range directory From d332fe11debe69fee3de4c2d84fa0b6649678ad2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:39 +0000 Subject: [PATCH 2961/4122] mm/damon/sysfs: move unsigned long range directory to common module The implementation of unsigned long type range directories can be reused by multiple DAMON sysfs directories including those for DAMON-based Operation Schemes and the range of number of monitoring regions. Move the code into the files for DAMON sysfs common logics. Link: https://lkml.kernel.org/r/20221026225943.100429-9-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs-common.c | 96 ++++++++++++++++++++++++++++++++++++++ mm/damon/sysfs-common.h | 13 ++++++ mm/damon/sysfs.c | 100 ---------------------------------------- 3 files changed, 109 insertions(+), 100 deletions(-) diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c index 9dc743868d5b..52bebf242f74 100644 --- a/mm/damon/sysfs-common.c +++ b/mm/damon/sysfs-common.c @@ -5,7 +5,103 @@ * Author: SeongJae Park */ +#include + #include "sysfs-common.h" DEFINE_MUTEX(damon_sysfs_lock); +/* + * unsigned long range directory + */ + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max) +{ + struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range), + GFP_KERNEL); + + if (!range) + return NULL; + range->kobj = (struct kobject){}; + range->min = min; + range->max = max; + + return range; +} + +static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->min); +} + +static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long min; + int err; + + err = kstrtoul(buf, 0, &min); + if (err) + return err; + + range->min = min; + return count; +} + +static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->max); +} + +static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long max; + int err; + + err = kstrtoul(buf, 0, &max); + if (err) + return err; + + range->max = max; + return count; +} + +void damon_sysfs_ul_range_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj)); +} + +static struct kobj_attribute damon_sysfs_ul_range_min_attr = + __ATTR_RW_MODE(min, 0600); + +static struct kobj_attribute damon_sysfs_ul_range_max_attr = + __ATTR_RW_MODE(max, 0600); + +static struct attribute *damon_sysfs_ul_range_attrs[] = { + &damon_sysfs_ul_range_min_attr.attr, + &damon_sysfs_ul_range_max_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_ul_range); + +struct kobj_type damon_sysfs_ul_range_ktype = { + .release = damon_sysfs_ul_range_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_ul_range_groups, +}; + diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h index 745a918b94f5..56e6a99e353b 100644 --- a/mm/damon/sysfs-common.h +++ b/mm/damon/sysfs-common.h @@ -9,3 +9,16 @@ #include extern struct mutex damon_sysfs_lock; + +struct damon_sysfs_ul_range { + struct kobject kobj; + unsigned long min; + unsigned long max; +}; + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max); +void damon_sysfs_ul_range_release(struct kobject *kobj); + +extern struct kobj_type damon_sysfs_ul_range_ktype; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index a847b9159718..6774a669962e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -11,106 +11,6 @@ #include "sysfs-common.h" -/* - * unsigned long range directory - */ - -struct damon_sysfs_ul_range { - struct kobject kobj; - unsigned long min; - unsigned long max; -}; - -static struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( - unsigned long min, - unsigned long max) -{ - struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range), - GFP_KERNEL); - - if (!range) - return NULL; - range->kobj = (struct kobject){}; - range->min = min; - range->max = max; - - return range; -} - -static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - - return sysfs_emit(buf, "%lu\n", range->min); -} - -static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - unsigned long min; - int err; - - err = kstrtoul(buf, 0, &min); - if (err) - return err; - - range->min = min; - return count; -} - -static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - - return sysfs_emit(buf, "%lu\n", range->max); -} - -static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - unsigned long max; - int err; - - err = kstrtoul(buf, 0, &max); - if (err) - return err; - - range->max = max; - return count; -} - -static void damon_sysfs_ul_range_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj)); -} - -static struct kobj_attribute damon_sysfs_ul_range_min_attr = - __ATTR_RW_MODE(min, 0600); - -static struct kobj_attribute damon_sysfs_ul_range_max_attr = - __ATTR_RW_MODE(max, 0600); - -static struct attribute *damon_sysfs_ul_range_attrs[] = { - &damon_sysfs_ul_range_min_attr.attr, - &damon_sysfs_ul_range_max_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_ul_range); - -static struct kobj_type damon_sysfs_ul_range_ktype = { - .release = damon_sysfs_ul_range_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_ul_range_groups, -}; - /* * schemes/stats directory */ From 4acd715ff57fd05a481c64d074db68f2cf5711aa Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:40 +0000 Subject: [PATCH 2962/4122] mm/damon/sysfs: split out kdamond-independent schemes stats update logic into a new function 'damon_sysfs_schemes_update_stats()' is coupled with both damon_sysfs_kdamond and damon_sysfs_schemes. It's a wide range of types dependency. It makes splitting the logics a little bit distracting. Split the function so that each function is coupled with smaller range of types. Link: https://lkml.kernel.org/r/20221026225943.100429-10-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 6774a669962e..836df19a7d86 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2246,25 +2246,13 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx) mutex_unlock(&ctx->kdamond_lock); } -/* - * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files. - * @kdamond: The kobject wrapper that associated to the kdamond thread. - * - * This function reads the schemes stats of specific kdamond and update the - * related values for sysfs files. This function should be called from DAMON - * callbacks while holding ``damon_syfs_lock``, to safely access the DAMON - * contexts-internal data and DAMON sysfs variables. - */ -static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) +static void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) { - struct damon_ctx *ctx = kdamond->damon_ctx; - struct damon_sysfs_schemes *sysfs_schemes; struct damos *scheme; int schemes_idx = 0; - if (!ctx) - return -EINVAL; - sysfs_schemes = kdamond->contexts->contexts_arr[0]->schemes; damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_stats *sysfs_stats; @@ -2279,6 +2267,25 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) sysfs_stats->sz_applied = scheme->stat.sz_applied; sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; } +} + +/* + * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files. + * @kdamond: The kobject wrapper that associated to the kdamond thread. + * + * This function reads the schemes stats of specific kdamond and update the + * related values for sysfs files. This function should be called from DAMON + * callbacks while holding ``damon_syfs_lock``, to safely access the DAMON + * contexts-internal data and DAMON sysfs variables. + */ +static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + damon_sysfs_schemes_update_stats( + kdamond->contexts->contexts_arr[0]->schemes, ctx); return 0; } From c8e7b4d0ba348a8ef14956a80c780f152f433764 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:41 +0000 Subject: [PATCH 2963/4122] mm/damon/sysfs: split out schemes directory implementation to separate file DAMON sysfs interface for 'schemes' directory is implemented using about one thousand lines of code. It has no strong dependency with other parts of its file, so split it out to another file for better code management. Link: https://lkml.kernel.org/r/20221026225943.100429-11-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/Makefile | 2 +- mm/damon/sysfs-common.h | 22 + mm/damon/sysfs-schemes.c | 1068 ++++++++++++++++++++++++++++++++++++++ mm/damon/sysfs.c | 1064 ------------------------------------- 4 files changed, 1091 insertions(+), 1065 deletions(-) create mode 100644 mm/damon/sysfs-schemes.c diff --git a/mm/damon/Makefile b/mm/damon/Makefile index f8d535a6253b..1e86f5253d7f 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -3,7 +3,7 @@ obj-y := core.o obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o -obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs.o +obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o obj-$(CONFIG_DAMON_LRU_SORT) += lru_sort.o diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h index 56e6a99e353b..4626b2784404 100644 --- a/mm/damon/sysfs-common.h +++ b/mm/damon/sysfs-common.h @@ -22,3 +22,25 @@ struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( void damon_sysfs_ul_range_release(struct kobject *kobj); extern struct kobj_type damon_sysfs_ul_range_ktype; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes { + struct kobject kobj; + struct damon_sysfs_scheme **schemes_arr; + int nr; +}; + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void); +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes); + +extern struct kobj_type damon_sysfs_schemes_ktype; + +int damon_sysfs_set_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes); + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c new file mode 100644 index 000000000000..9509d5c1e7fc --- /dev/null +++ b/mm/damon/sysfs-schemes.c @@ -0,0 +1,1068 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON sysfs Interface + * + * Copyright (c) 2022 SeongJae Park + */ + +#include + +#include "sysfs-common.h" + +/* + * schemes/stats directory + */ + +struct damon_sysfs_stats { + struct kobject kobj; + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL); +} + +static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_tried); +} + +static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_tried); +} + +static ssize_t nr_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_applied); +} + +static ssize_t sz_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_applied); +} + +static ssize_t qt_exceeds_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->qt_exceeds); +} + +static void damon_sysfs_stats_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_stats, kobj)); +} + +static struct kobj_attribute damon_sysfs_stats_nr_tried_attr = + __ATTR_RO_MODE(nr_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_tried_attr = + __ATTR_RO_MODE(sz_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_nr_applied_attr = + __ATTR_RO_MODE(nr_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_applied_attr = + __ATTR_RO_MODE(sz_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr = + __ATTR_RO_MODE(qt_exceeds, 0400); + +static struct attribute *damon_sysfs_stats_attrs[] = { + &damon_sysfs_stats_nr_tried_attr.attr, + &damon_sysfs_stats_sz_tried_attr.attr, + &damon_sysfs_stats_nr_applied_attr.attr, + &damon_sysfs_stats_sz_applied_attr.attr, + &damon_sysfs_stats_qt_exceeds_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_stats); + +static struct kobj_type damon_sysfs_stats_ktype = { + .release = damon_sysfs_stats_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_stats_groups, +}; + +/* + * watermarks directory + */ + +struct damon_sysfs_watermarks { + struct kobject kobj; + enum damos_wmark_metric metric; + unsigned long interval_us; + unsigned long high; + unsigned long mid; + unsigned long low; +}; + +static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc( + enum damos_wmark_metric metric, unsigned long interval_us, + unsigned long high, unsigned long mid, unsigned long low) +{ + struct damon_sysfs_watermarks *watermarks = kmalloc( + sizeof(*watermarks), GFP_KERNEL); + + if (!watermarks) + return NULL; + watermarks->kobj = (struct kobject){}; + watermarks->metric = metric; + watermarks->interval_us = interval_us; + watermarks->high = high; + watermarks->mid = mid; + watermarks->low = low; + return watermarks; +} + +/* Should match with enum damos_wmark_metric */ +static const char * const damon_sysfs_wmark_metric_strs[] = { + "none", + "free_mem_rate", +}; + +static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%s\n", + damon_sysfs_wmark_metric_strs[watermarks->metric]); +} + +static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + enum damos_wmark_metric metric; + + for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) { + if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) { + watermarks->metric = metric; + return count; + } + } + return -EINVAL; +} + +static ssize_t interval_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->interval_us); +} + +static ssize_t interval_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->interval_us); + + return err ? err : count; +} + +static ssize_t high_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->high); +} + +static ssize_t high_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->high); + + return err ? err : count; +} + +static ssize_t mid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->mid); +} + +static ssize_t mid_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->mid); + + return err ? err : count; +} + +static ssize_t low_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->low); +} + +static ssize_t low_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->low); + + return err ? err : count; +} + +static void damon_sysfs_watermarks_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj)); +} + +static struct kobj_attribute damon_sysfs_watermarks_metric_attr = + __ATTR_RW_MODE(metric, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr = + __ATTR_RW_MODE(interval_us, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_high_attr = + __ATTR_RW_MODE(high, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_mid_attr = + __ATTR_RW_MODE(mid, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_low_attr = + __ATTR_RW_MODE(low, 0600); + +static struct attribute *damon_sysfs_watermarks_attrs[] = { + &damon_sysfs_watermarks_metric_attr.attr, + &damon_sysfs_watermarks_interval_us_attr.attr, + &damon_sysfs_watermarks_high_attr.attr, + &damon_sysfs_watermarks_mid_attr.attr, + &damon_sysfs_watermarks_low_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_watermarks); + +static struct kobj_type damon_sysfs_watermarks_ktype = { + .release = damon_sysfs_watermarks_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_watermarks_groups, +}; + +/* + * scheme/weights directory + */ + +struct damon_sysfs_weights { + struct kobject kobj; + unsigned int sz; + unsigned int nr_accesses; + unsigned int age; +}; + +static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz, + unsigned int nr_accesses, unsigned int age) +{ + struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights), + GFP_KERNEL); + + if (!weights) + return NULL; + weights->kobj = (struct kobject){}; + weights->sz = sz; + weights->nr_accesses = nr_accesses; + weights->age = age; + return weights; +} + +static ssize_t sz_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->sz); +} + +static ssize_t sz_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->sz); + + return err ? err : count; +} + +static ssize_t nr_accesses_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->nr_accesses); +} + +static ssize_t nr_accesses_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->nr_accesses); + + return err ? err : count; +} + +static ssize_t age_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->age); +} + +static ssize_t age_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->age); + + return err ? err : count; +} + +static void damon_sysfs_weights_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_weights, kobj)); +} + +static struct kobj_attribute damon_sysfs_weights_sz_attr = + __ATTR_RW_MODE(sz_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr = + __ATTR_RW_MODE(nr_accesses_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_age_attr = + __ATTR_RW_MODE(age_permil, 0600); + +static struct attribute *damon_sysfs_weights_attrs[] = { + &damon_sysfs_weights_sz_attr.attr, + &damon_sysfs_weights_nr_accesses_attr.attr, + &damon_sysfs_weights_age_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_weights); + +static struct kobj_type damon_sysfs_weights_ktype = { + .release = damon_sysfs_weights_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_weights_groups, +}; + +/* + * quotas directory + */ + +struct damon_sysfs_quotas { + struct kobject kobj; + struct damon_sysfs_weights *weights; + unsigned long ms; + unsigned long sz; + unsigned long reset_interval_ms; +}; + +static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL); +} + +static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas) +{ + struct damon_sysfs_weights *weights; + int err; + + weights = damon_sysfs_weights_alloc(0, 0, 0); + if (!weights) + return -ENOMEM; + + err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype, + "as->kobj, "weights"); + if (err) + kobject_put(&weights->kobj); + else + quotas->weights = weights; + return err; +} + +static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas) +{ + kobject_put("as->weights->kobj); +} + +static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->ms); +} + +static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->ms); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->sz); +} + +static ssize_t bytes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->sz); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t reset_interval_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms); +} + +static ssize_t reset_interval_ms_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->reset_interval_ms); + + if (err) + return -EINVAL; + return count; +} + +static void damon_sysfs_quotas_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_quotas, kobj)); +} + +static struct kobj_attribute damon_sysfs_quotas_ms_attr = + __ATTR_RW_MODE(ms, 0600); + +static struct kobj_attribute damon_sysfs_quotas_sz_attr = + __ATTR_RW_MODE(bytes, 0600); + +static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr = + __ATTR_RW_MODE(reset_interval_ms, 0600); + +static struct attribute *damon_sysfs_quotas_attrs[] = { + &damon_sysfs_quotas_ms_attr.attr, + &damon_sysfs_quotas_sz_attr.attr, + &damon_sysfs_quotas_reset_interval_ms_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_quotas); + +static struct kobj_type damon_sysfs_quotas_ktype = { + .release = damon_sysfs_quotas_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_quotas_groups, +}; + +/* + * access_pattern directory + */ + +struct damon_sysfs_access_pattern { + struct kobject kobj; + struct damon_sysfs_ul_range *sz; + struct damon_sysfs_ul_range *nr_accesses; + struct damon_sysfs_ul_range *age; +}; + +static +struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void) +{ + struct damon_sysfs_access_pattern *access_pattern = + kmalloc(sizeof(*access_pattern), GFP_KERNEL); + + if (!access_pattern) + return NULL; + access_pattern->kobj = (struct kobject){}; + return access_pattern; +} + +static int damon_sysfs_access_pattern_add_range_dir( + struct damon_sysfs_access_pattern *access_pattern, + struct damon_sysfs_ul_range **range_dir_ptr, + char *name) +{ + struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0); + int err; + + if (!range) + return -ENOMEM; + err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, + &access_pattern->kobj, name); + if (err) + kobject_put(&range->kobj); + else + *range_dir_ptr = range; + return err; +} + +static int damon_sysfs_access_pattern_add_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + int err; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->sz, "sz"); + if (err) + goto put_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->nr_accesses, "nr_accesses"); + if (err) + goto put_nr_accesses_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->age, "age"); + if (err) + goto put_age_nr_accesses_sz_out; + return 0; + +put_age_nr_accesses_sz_out: + kobject_put(&access_pattern->age->kobj); + access_pattern->age = NULL; +put_nr_accesses_sz_out: + kobject_put(&access_pattern->nr_accesses->kobj); + access_pattern->nr_accesses = NULL; +put_sz_out: + kobject_put(&access_pattern->sz->kobj); + access_pattern->sz = NULL; + return err; +} + +static void damon_sysfs_access_pattern_rm_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + kobject_put(&access_pattern->sz->kobj); + kobject_put(&access_pattern->nr_accesses->kobj); + kobject_put(&access_pattern->age->kobj); +} + +static void damon_sysfs_access_pattern_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj)); +} + +static struct attribute *damon_sysfs_access_pattern_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_access_pattern); + +static struct kobj_type damon_sysfs_access_pattern_ktype = { + .release = damon_sysfs_access_pattern_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_access_pattern_groups, +}; + +/* + * scheme directory + */ + +struct damon_sysfs_scheme { + struct kobject kobj; + enum damos_action action; + struct damon_sysfs_access_pattern *access_pattern; + struct damon_sysfs_quotas *quotas; + struct damon_sysfs_watermarks *watermarks; + struct damon_sysfs_stats *stats; +}; + +/* This should match with enum damos_action */ +static const char * const damon_sysfs_damos_action_strs[] = { + "willneed", + "cold", + "pageout", + "hugepage", + "nohugepage", + "lru_prio", + "lru_deprio", + "stat", +}; + +static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc( + enum damos_action action) +{ + struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme), + GFP_KERNEL); + + if (!scheme) + return NULL; + scheme->kobj = (struct kobject){}; + scheme->action = action; + return scheme; +} + +static int damon_sysfs_scheme_set_access_pattern( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_access_pattern *access_pattern; + int err; + + access_pattern = damon_sysfs_access_pattern_alloc(); + if (!access_pattern) + return -ENOMEM; + err = kobject_init_and_add(&access_pattern->kobj, + &damon_sysfs_access_pattern_ktype, &scheme->kobj, + "access_pattern"); + if (err) + goto out; + err = damon_sysfs_access_pattern_add_dirs(access_pattern); + if (err) + goto out; + scheme->access_pattern = access_pattern; + return 0; + +out: + kobject_put(&access_pattern->kobj); + return err; +} + +static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc(); + int err; + + if (!quotas) + return -ENOMEM; + err = kobject_init_and_add("as->kobj, &damon_sysfs_quotas_ktype, + &scheme->kobj, "quotas"); + if (err) + goto out; + err = damon_sysfs_quotas_add_dirs(quotas); + if (err) + goto out; + scheme->quotas = quotas; + return 0; + +out: + kobject_put("as->kobj); + return err; +} + +static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_watermarks *watermarks = + damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0); + int err; + + if (!watermarks) + return -ENOMEM; + err = kobject_init_and_add(&watermarks->kobj, + &damon_sysfs_watermarks_ktype, &scheme->kobj, + "watermarks"); + if (err) + kobject_put(&watermarks->kobj); + else + scheme->watermarks = watermarks; + return err; +} + +static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); + int err; + + if (!stats) + return -ENOMEM; + err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype, + &scheme->kobj, "stats"); + if (err) + kobject_put(&stats->kobj); + else + scheme->stats = stats; + return err; +} + +static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) +{ + int err; + + err = damon_sysfs_scheme_set_access_pattern(scheme); + if (err) + return err; + err = damon_sysfs_scheme_set_quotas(scheme); + if (err) + goto put_access_pattern_out; + err = damon_sysfs_scheme_set_watermarks(scheme); + if (err) + goto put_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_stats(scheme); + if (err) + goto put_watermarks_quotas_access_pattern_out; + return 0; + +put_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->watermarks->kobj); + scheme->watermarks = NULL; +put_quotas_access_pattern_out: + kobject_put(&scheme->quotas->kobj); + scheme->quotas = NULL; +put_access_pattern_out: + kobject_put(&scheme->access_pattern->kobj); + scheme->access_pattern = NULL; + return err; +} + +static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) +{ + damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); + kobject_put(&scheme->access_pattern->kobj); + damon_sysfs_quotas_rm_dirs(scheme->quotas); + kobject_put(&scheme->quotas->kobj); + kobject_put(&scheme->watermarks->kobj); + kobject_put(&scheme->stats->kobj); +} + +static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + + return sysfs_emit(buf, "%s\n", + damon_sysfs_damos_action_strs[scheme->action]); +} + +static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + enum damos_action action; + + for (action = 0; action < NR_DAMOS_ACTIONS; action++) { + if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) { + scheme->action = action; + return count; + } + } + return -EINVAL; +} + +static void damon_sysfs_scheme_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_action_attr = + __ATTR_RW_MODE(action, 0600); + +static struct attribute *damon_sysfs_scheme_attrs[] = { + &damon_sysfs_scheme_action_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme); + +static struct kobj_type damon_sysfs_scheme_ktype = { + .release = damon_sysfs_scheme_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_groups, +}; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL); +} + +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes) +{ + struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr; + int i; + + for (i = 0; i < schemes->nr; i++) { + damon_sysfs_scheme_rm_dirs(schemes_arr[i]); + kobject_put(&schemes_arr[i]->kobj); + } + schemes->nr = 0; + kfree(schemes_arr); + schemes->schemes_arr = NULL; +} + +static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes, + int nr_schemes) +{ + struct damon_sysfs_scheme **schemes_arr, *scheme; + int err, i; + + damon_sysfs_schemes_rm_dirs(schemes); + if (!nr_schemes) + return 0; + + schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!schemes_arr) + return -ENOMEM; + schemes->schemes_arr = schemes_arr; + + for (i = 0; i < nr_schemes; i++) { + scheme = damon_sysfs_scheme_alloc(DAMOS_STAT); + if (!scheme) { + damon_sysfs_schemes_rm_dirs(schemes); + return -ENOMEM; + } + + err = kobject_init_and_add(&scheme->kobj, + &damon_sysfs_scheme_ktype, &schemes->kobj, + "%d", i); + if (err) + goto out; + err = damon_sysfs_scheme_add_dirs(scheme); + if (err) + goto out; + + schemes_arr[i] = scheme; + schemes->nr++; + } + return 0; + +out: + damon_sysfs_schemes_rm_dirs(schemes); + kobject_put(&scheme->kobj); + return err; +} + +static ssize_t nr_schemes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_schemes *schemes = container_of(kobj, + struct damon_sysfs_schemes, kobj); + + return sysfs_emit(buf, "%d\n", schemes->nr); +} + +static ssize_t nr_schemes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_schemes *schemes; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + schemes = container_of(kobj, struct damon_sysfs_schemes, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_schemes_add_dirs(schemes, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + return count; +} + +static void damon_sysfs_schemes_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_schemes, kobj)); +} + +static struct kobj_attribute damon_sysfs_schemes_nr_attr = + __ATTR_RW_MODE(nr_schemes, 0600); + +static struct attribute *damon_sysfs_schemes_attrs[] = { + &damon_sysfs_schemes_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_schemes); + +struct kobj_type damon_sysfs_schemes_ktype = { + .release = damon_sysfs_schemes_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_schemes_groups, +}; + +static struct damos *damon_sysfs_mk_scheme( + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + struct damos_access_pattern pattern = { + .min_sz_region = access_pattern->sz->min, + .max_sz_region = access_pattern->sz->max, + .min_nr_accesses = access_pattern->nr_accesses->min, + .max_nr_accesses = access_pattern->nr_accesses->max, + .min_age_region = access_pattern->age->min, + .max_age_region = access_pattern->age->max, + }; + struct damos_quota quota = { + .ms = sysfs_quotas->ms, + .sz = sysfs_quotas->sz, + .reset_interval = sysfs_quotas->reset_interval_ms, + .weight_sz = sysfs_weights->sz, + .weight_nr_accesses = sysfs_weights->nr_accesses, + .weight_age = sysfs_weights->age, + }; + struct damos_watermarks wmarks = { + .metric = sysfs_wmarks->metric, + .interval = sysfs_wmarks->interval_us, + .high = sysfs_wmarks->high, + .mid = sysfs_wmarks->mid, + .low = sysfs_wmarks->low, + }; + + return damon_new_scheme(&pattern, sysfs_scheme->action, "a, + &wmarks); +} + +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + +int damon_sysfs_set_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes) +{ + struct damos *scheme, *next; + int i = 0; + + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { + struct damos *scheme, *next; + + scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); + if (!scheme) { + damon_for_each_scheme_safe(scheme, next, ctx) + damon_destroy_scheme(scheme); + return -ENOMEM; + } + damon_add_scheme(ctx, scheme); + } + return 0; +} + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_stats *sysfs_stats; + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; + sysfs_stats->nr_tried = scheme->stat.nr_tried; + sysfs_stats->sz_tried = scheme->stat.sz_tried; + sysfs_stats->nr_applied = scheme->stat.nr_applied; + sysfs_stats->sz_applied = scheme->stat.sz_applied; + sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; + } +} diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 836df19a7d86..284daf274b3e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -11,949 +11,6 @@ #include "sysfs-common.h" -/* - * schemes/stats directory - */ - -struct damon_sysfs_stats { - struct kobject kobj; - unsigned long nr_tried; - unsigned long sz_tried; - unsigned long nr_applied; - unsigned long sz_applied; - unsigned long qt_exceeds; -}; - -static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL); -} - -static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->nr_tried); -} - -static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->sz_tried); -} - -static ssize_t nr_applied_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->nr_applied); -} - -static ssize_t sz_applied_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->sz_applied); -} - -static ssize_t qt_exceeds_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->qt_exceeds); -} - -static void damon_sysfs_stats_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_stats, kobj)); -} - -static struct kobj_attribute damon_sysfs_stats_nr_tried_attr = - __ATTR_RO_MODE(nr_tried, 0400); - -static struct kobj_attribute damon_sysfs_stats_sz_tried_attr = - __ATTR_RO_MODE(sz_tried, 0400); - -static struct kobj_attribute damon_sysfs_stats_nr_applied_attr = - __ATTR_RO_MODE(nr_applied, 0400); - -static struct kobj_attribute damon_sysfs_stats_sz_applied_attr = - __ATTR_RO_MODE(sz_applied, 0400); - -static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr = - __ATTR_RO_MODE(qt_exceeds, 0400); - -static struct attribute *damon_sysfs_stats_attrs[] = { - &damon_sysfs_stats_nr_tried_attr.attr, - &damon_sysfs_stats_sz_tried_attr.attr, - &damon_sysfs_stats_nr_applied_attr.attr, - &damon_sysfs_stats_sz_applied_attr.attr, - &damon_sysfs_stats_qt_exceeds_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_stats); - -static struct kobj_type damon_sysfs_stats_ktype = { - .release = damon_sysfs_stats_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_stats_groups, -}; - -/* - * watermarks directory - */ - -struct damon_sysfs_watermarks { - struct kobject kobj; - enum damos_wmark_metric metric; - unsigned long interval_us; - unsigned long high; - unsigned long mid; - unsigned long low; -}; - -static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc( - enum damos_wmark_metric metric, unsigned long interval_us, - unsigned long high, unsigned long mid, unsigned long low) -{ - struct damon_sysfs_watermarks *watermarks = kmalloc( - sizeof(*watermarks), GFP_KERNEL); - - if (!watermarks) - return NULL; - watermarks->kobj = (struct kobject){}; - watermarks->metric = metric; - watermarks->interval_us = interval_us; - watermarks->high = high; - watermarks->mid = mid; - watermarks->low = low; - return watermarks; -} - -/* Should match with enum damos_wmark_metric */ -static const char * const damon_sysfs_wmark_metric_strs[] = { - "none", - "free_mem_rate", -}; - -static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%s\n", - damon_sysfs_wmark_metric_strs[watermarks->metric]); -} - -static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - enum damos_wmark_metric metric; - - for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) { - if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) { - watermarks->metric = metric; - return count; - } - } - return -EINVAL; -} - -static ssize_t interval_us_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->interval_us); -} - -static ssize_t interval_us_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->interval_us); - - return err ? err : count; -} - -static ssize_t high_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->high); -} - -static ssize_t high_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->high); - - return err ? err : count; -} - -static ssize_t mid_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->mid); -} - -static ssize_t mid_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->mid); - - return err ? err : count; -} - -static ssize_t low_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->low); -} - -static ssize_t low_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->low); - - return err ? err : count; -} - -static void damon_sysfs_watermarks_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj)); -} - -static struct kobj_attribute damon_sysfs_watermarks_metric_attr = - __ATTR_RW_MODE(metric, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr = - __ATTR_RW_MODE(interval_us, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_high_attr = - __ATTR_RW_MODE(high, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_mid_attr = - __ATTR_RW_MODE(mid, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_low_attr = - __ATTR_RW_MODE(low, 0600); - -static struct attribute *damon_sysfs_watermarks_attrs[] = { - &damon_sysfs_watermarks_metric_attr.attr, - &damon_sysfs_watermarks_interval_us_attr.attr, - &damon_sysfs_watermarks_high_attr.attr, - &damon_sysfs_watermarks_mid_attr.attr, - &damon_sysfs_watermarks_low_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_watermarks); - -static struct kobj_type damon_sysfs_watermarks_ktype = { - .release = damon_sysfs_watermarks_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_watermarks_groups, -}; - -/* - * scheme/weights directory - */ - -struct damon_sysfs_weights { - struct kobject kobj; - unsigned int sz; - unsigned int nr_accesses; - unsigned int age; -}; - -static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz, - unsigned int nr_accesses, unsigned int age) -{ - struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights), - GFP_KERNEL); - - if (!weights) - return NULL; - weights->kobj = (struct kobject){}; - weights->sz = sz; - weights->nr_accesses = nr_accesses; - weights->age = age; - return weights; -} - -static ssize_t sz_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->sz); -} - -static ssize_t sz_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->sz); - - return err ? err : count; -} - -static ssize_t nr_accesses_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->nr_accesses); -} - -static ssize_t nr_accesses_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->nr_accesses); - - return err ? err : count; -} - -static ssize_t age_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->age); -} - -static ssize_t age_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->age); - - return err ? err : count; -} - -static void damon_sysfs_weights_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_weights, kobj)); -} - -static struct kobj_attribute damon_sysfs_weights_sz_attr = - __ATTR_RW_MODE(sz_permil, 0600); - -static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr = - __ATTR_RW_MODE(nr_accesses_permil, 0600); - -static struct kobj_attribute damon_sysfs_weights_age_attr = - __ATTR_RW_MODE(age_permil, 0600); - -static struct attribute *damon_sysfs_weights_attrs[] = { - &damon_sysfs_weights_sz_attr.attr, - &damon_sysfs_weights_nr_accesses_attr.attr, - &damon_sysfs_weights_age_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_weights); - -static struct kobj_type damon_sysfs_weights_ktype = { - .release = damon_sysfs_weights_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_weights_groups, -}; - -/* - * quotas directory - */ - -struct damon_sysfs_quotas { - struct kobject kobj; - struct damon_sysfs_weights *weights; - unsigned long ms; - unsigned long sz; - unsigned long reset_interval_ms; -}; - -static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL); -} - -static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas) -{ - struct damon_sysfs_weights *weights; - int err; - - weights = damon_sysfs_weights_alloc(0, 0, 0); - if (!weights) - return -ENOMEM; - - err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype, - "as->kobj, "weights"); - if (err) - kobject_put(&weights->kobj); - else - quotas->weights = weights; - return err; -} - -static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas) -{ - kobject_put("as->weights->kobj); -} - -static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->ms); -} - -static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->ms); - - if (err) - return -EINVAL; - return count; -} - -static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->sz); -} - -static ssize_t bytes_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->sz); - - if (err) - return -EINVAL; - return count; -} - -static ssize_t reset_interval_ms_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms); -} - -static ssize_t reset_interval_ms_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->reset_interval_ms); - - if (err) - return -EINVAL; - return count; -} - -static void damon_sysfs_quotas_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_quotas, kobj)); -} - -static struct kobj_attribute damon_sysfs_quotas_ms_attr = - __ATTR_RW_MODE(ms, 0600); - -static struct kobj_attribute damon_sysfs_quotas_sz_attr = - __ATTR_RW_MODE(bytes, 0600); - -static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr = - __ATTR_RW_MODE(reset_interval_ms, 0600); - -static struct attribute *damon_sysfs_quotas_attrs[] = { - &damon_sysfs_quotas_ms_attr.attr, - &damon_sysfs_quotas_sz_attr.attr, - &damon_sysfs_quotas_reset_interval_ms_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_quotas); - -static struct kobj_type damon_sysfs_quotas_ktype = { - .release = damon_sysfs_quotas_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_quotas_groups, -}; - -/* - * access_pattern directory - */ - -struct damon_sysfs_access_pattern { - struct kobject kobj; - struct damon_sysfs_ul_range *sz; - struct damon_sysfs_ul_range *nr_accesses; - struct damon_sysfs_ul_range *age; -}; - -static -struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void) -{ - struct damon_sysfs_access_pattern *access_pattern = - kmalloc(sizeof(*access_pattern), GFP_KERNEL); - - if (!access_pattern) - return NULL; - access_pattern->kobj = (struct kobject){}; - return access_pattern; -} - -static int damon_sysfs_access_pattern_add_range_dir( - struct damon_sysfs_access_pattern *access_pattern, - struct damon_sysfs_ul_range **range_dir_ptr, - char *name) -{ - struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0); - int err; - - if (!range) - return -ENOMEM; - err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, - &access_pattern->kobj, name); - if (err) - kobject_put(&range->kobj); - else - *range_dir_ptr = range; - return err; -} - -static int damon_sysfs_access_pattern_add_dirs( - struct damon_sysfs_access_pattern *access_pattern) -{ - int err; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->sz, "sz"); - if (err) - goto put_sz_out; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->nr_accesses, "nr_accesses"); - if (err) - goto put_nr_accesses_sz_out; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->age, "age"); - if (err) - goto put_age_nr_accesses_sz_out; - return 0; - -put_age_nr_accesses_sz_out: - kobject_put(&access_pattern->age->kobj); - access_pattern->age = NULL; -put_nr_accesses_sz_out: - kobject_put(&access_pattern->nr_accesses->kobj); - access_pattern->nr_accesses = NULL; -put_sz_out: - kobject_put(&access_pattern->sz->kobj); - access_pattern->sz = NULL; - return err; -} - -static void damon_sysfs_access_pattern_rm_dirs( - struct damon_sysfs_access_pattern *access_pattern) -{ - kobject_put(&access_pattern->sz->kobj); - kobject_put(&access_pattern->nr_accesses->kobj); - kobject_put(&access_pattern->age->kobj); -} - -static void damon_sysfs_access_pattern_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj)); -} - -static struct attribute *damon_sysfs_access_pattern_attrs[] = { - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_access_pattern); - -static struct kobj_type damon_sysfs_access_pattern_ktype = { - .release = damon_sysfs_access_pattern_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_access_pattern_groups, -}; - -/* - * scheme directory - */ - -struct damon_sysfs_scheme { - struct kobject kobj; - enum damos_action action; - struct damon_sysfs_access_pattern *access_pattern; - struct damon_sysfs_quotas *quotas; - struct damon_sysfs_watermarks *watermarks; - struct damon_sysfs_stats *stats; -}; - -/* This should match with enum damos_action */ -static const char * const damon_sysfs_damos_action_strs[] = { - "willneed", - "cold", - "pageout", - "hugepage", - "nohugepage", - "lru_prio", - "lru_deprio", - "stat", -}; - -static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc( - enum damos_action action) -{ - struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme), - GFP_KERNEL); - - if (!scheme) - return NULL; - scheme->kobj = (struct kobject){}; - scheme->action = action; - return scheme; -} - -static int damon_sysfs_scheme_set_access_pattern( - struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_access_pattern *access_pattern; - int err; - - access_pattern = damon_sysfs_access_pattern_alloc(); - if (!access_pattern) - return -ENOMEM; - err = kobject_init_and_add(&access_pattern->kobj, - &damon_sysfs_access_pattern_ktype, &scheme->kobj, - "access_pattern"); - if (err) - goto out; - err = damon_sysfs_access_pattern_add_dirs(access_pattern); - if (err) - goto out; - scheme->access_pattern = access_pattern; - return 0; - -out: - kobject_put(&access_pattern->kobj); - return err; -} - -static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc(); - int err; - - if (!quotas) - return -ENOMEM; - err = kobject_init_and_add("as->kobj, &damon_sysfs_quotas_ktype, - &scheme->kobj, "quotas"); - if (err) - goto out; - err = damon_sysfs_quotas_add_dirs(quotas); - if (err) - goto out; - scheme->quotas = quotas; - return 0; - -out: - kobject_put("as->kobj); - return err; -} - -static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_watermarks *watermarks = - damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0); - int err; - - if (!watermarks) - return -ENOMEM; - err = kobject_init_and_add(&watermarks->kobj, - &damon_sysfs_watermarks_ktype, &scheme->kobj, - "watermarks"); - if (err) - kobject_put(&watermarks->kobj); - else - scheme->watermarks = watermarks; - return err; -} - -static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); - int err; - - if (!stats) - return -ENOMEM; - err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype, - &scheme->kobj, "stats"); - if (err) - kobject_put(&stats->kobj); - else - scheme->stats = stats; - return err; -} - -static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) -{ - int err; - - err = damon_sysfs_scheme_set_access_pattern(scheme); - if (err) - return err; - err = damon_sysfs_scheme_set_quotas(scheme); - if (err) - goto put_access_pattern_out; - err = damon_sysfs_scheme_set_watermarks(scheme); - if (err) - goto put_quotas_access_pattern_out; - err = damon_sysfs_scheme_set_stats(scheme); - if (err) - goto put_watermarks_quotas_access_pattern_out; - return 0; - -put_watermarks_quotas_access_pattern_out: - kobject_put(&scheme->watermarks->kobj); - scheme->watermarks = NULL; -put_quotas_access_pattern_out: - kobject_put(&scheme->quotas->kobj); - scheme->quotas = NULL; -put_access_pattern_out: - kobject_put(&scheme->access_pattern->kobj); - scheme->access_pattern = NULL; - return err; -} - -static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) -{ - damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); - kobject_put(&scheme->access_pattern->kobj); - damon_sysfs_quotas_rm_dirs(scheme->quotas); - kobject_put(&scheme->quotas->kobj); - kobject_put(&scheme->watermarks->kobj); - kobject_put(&scheme->stats->kobj); -} - -static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_scheme *scheme = container_of(kobj, - struct damon_sysfs_scheme, kobj); - - return sysfs_emit(buf, "%s\n", - damon_sysfs_damos_action_strs[scheme->action]); -} - -static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_scheme *scheme = container_of(kobj, - struct damon_sysfs_scheme, kobj); - enum damos_action action; - - for (action = 0; action < NR_DAMOS_ACTIONS; action++) { - if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) { - scheme->action = action; - return count; - } - } - return -EINVAL; -} - -static void damon_sysfs_scheme_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_scheme, kobj)); -} - -static struct kobj_attribute damon_sysfs_scheme_action_attr = - __ATTR_RW_MODE(action, 0600); - -static struct attribute *damon_sysfs_scheme_attrs[] = { - &damon_sysfs_scheme_action_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_scheme); - -static struct kobj_type damon_sysfs_scheme_ktype = { - .release = damon_sysfs_scheme_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_scheme_groups, -}; - -/* - * schemes directory - */ - -struct damon_sysfs_schemes { - struct kobject kobj; - struct damon_sysfs_scheme **schemes_arr; - int nr; -}; - -static struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL); -} - -static void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes) -{ - struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr; - int i; - - for (i = 0; i < schemes->nr; i++) { - damon_sysfs_scheme_rm_dirs(schemes_arr[i]); - kobject_put(&schemes_arr[i]->kobj); - } - schemes->nr = 0; - kfree(schemes_arr); - schemes->schemes_arr = NULL; -} - -static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes, - int nr_schemes) -{ - struct damon_sysfs_scheme **schemes_arr, *scheme; - int err, i; - - damon_sysfs_schemes_rm_dirs(schemes); - if (!nr_schemes) - return 0; - - schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr), - GFP_KERNEL | __GFP_NOWARN); - if (!schemes_arr) - return -ENOMEM; - schemes->schemes_arr = schemes_arr; - - for (i = 0; i < nr_schemes; i++) { - scheme = damon_sysfs_scheme_alloc(DAMOS_STAT); - if (!scheme) { - damon_sysfs_schemes_rm_dirs(schemes); - return -ENOMEM; - } - - err = kobject_init_and_add(&scheme->kobj, - &damon_sysfs_scheme_ktype, &schemes->kobj, - "%d", i); - if (err) - goto out; - err = damon_sysfs_scheme_add_dirs(scheme); - if (err) - goto out; - - schemes_arr[i] = scheme; - schemes->nr++; - } - return 0; - -out: - damon_sysfs_schemes_rm_dirs(schemes); - kobject_put(&scheme->kobj); - return err; -} - -static ssize_t nr_schemes_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_schemes *schemes = container_of(kobj, - struct damon_sysfs_schemes, kobj); - - return sysfs_emit(buf, "%d\n", schemes->nr); -} - -static ssize_t nr_schemes_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_schemes *schemes; - int nr, err = kstrtoint(buf, 0, &nr); - - if (err) - return err; - if (nr < 0) - return -EINVAL; - - schemes = container_of(kobj, struct damon_sysfs_schemes, kobj); - - if (!mutex_trylock(&damon_sysfs_lock)) - return -EBUSY; - err = damon_sysfs_schemes_add_dirs(schemes, nr); - mutex_unlock(&damon_sysfs_lock); - if (err) - return err; - return count; -} - -static void damon_sysfs_schemes_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_schemes, kobj)); -} - -static struct kobj_attribute damon_sysfs_schemes_nr_attr = - __ATTR_RW_MODE(nr_schemes, 0600); - -static struct attribute *damon_sysfs_schemes_attrs[] = { - &damon_sysfs_schemes_nr_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_schemes); - -static struct kobj_type damon_sysfs_schemes_ktype = { - .release = damon_sysfs_schemes_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_schemes_groups, -}; - /* * init region directory */ @@ -2133,104 +1190,6 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, return 0; } -static struct damos *damon_sysfs_mk_scheme( - struct damon_sysfs_scheme *sysfs_scheme) -{ - struct damon_sysfs_access_pattern *access_pattern = - sysfs_scheme->access_pattern; - struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; - struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; - struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; - - struct damos_access_pattern pattern = { - .min_sz_region = access_pattern->sz->min, - .max_sz_region = access_pattern->sz->max, - .min_nr_accesses = access_pattern->nr_accesses->min, - .max_nr_accesses = access_pattern->nr_accesses->max, - .min_age_region = access_pattern->age->min, - .max_age_region = access_pattern->age->max, - }; - struct damos_quota quota = { - .ms = sysfs_quotas->ms, - .sz = sysfs_quotas->sz, - .reset_interval = sysfs_quotas->reset_interval_ms, - .weight_sz = sysfs_weights->sz, - .weight_nr_accesses = sysfs_weights->nr_accesses, - .weight_age = sysfs_weights->age, - }; - struct damos_watermarks wmarks = { - .metric = sysfs_wmarks->metric, - .interval = sysfs_wmarks->interval_us, - .high = sysfs_wmarks->high, - .mid = sysfs_wmarks->mid, - .low = sysfs_wmarks->low, - }; - - return damon_new_scheme(&pattern, sysfs_scheme->action, "a, - &wmarks); -} - -static void damon_sysfs_update_scheme(struct damos *scheme, - struct damon_sysfs_scheme *sysfs_scheme) -{ - struct damon_sysfs_access_pattern *access_pattern = - sysfs_scheme->access_pattern; - struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; - struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; - struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; - - scheme->pattern.min_sz_region = access_pattern->sz->min; - scheme->pattern.max_sz_region = access_pattern->sz->max; - scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; - scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; - scheme->pattern.min_age_region = access_pattern->age->min; - scheme->pattern.max_age_region = access_pattern->age->max; - - scheme->action = sysfs_scheme->action; - - scheme->quota.ms = sysfs_quotas->ms; - scheme->quota.sz = sysfs_quotas->sz; - scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; - scheme->quota.weight_sz = sysfs_weights->sz; - scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; - scheme->quota.weight_age = sysfs_weights->age; - - scheme->wmarks.metric = sysfs_wmarks->metric; - scheme->wmarks.interval = sysfs_wmarks->interval_us; - scheme->wmarks.high = sysfs_wmarks->high; - scheme->wmarks.mid = sysfs_wmarks->mid; - scheme->wmarks.low = sysfs_wmarks->low; -} - -static int damon_sysfs_set_schemes(struct damon_ctx *ctx, - struct damon_sysfs_schemes *sysfs_schemes) -{ - struct damos *scheme, *next; - int i = 0; - - damon_for_each_scheme_safe(scheme, next, ctx) { - if (i < sysfs_schemes->nr) - damon_sysfs_update_scheme(scheme, - sysfs_schemes->schemes_arr[i]); - else - damon_destroy_scheme(scheme); - i++; - } - - for (; i < sysfs_schemes->nr; i++) { - struct damos *scheme, *next; - - scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); - if (!scheme) { - damon_for_each_scheme_safe(scheme, next, ctx) - damon_destroy_scheme(scheme); - return -ENOMEM; - } - damon_add_scheme(ctx, scheme); - } - return 0; -} - static void damon_sysfs_before_terminate(struct damon_ctx *ctx) { struct damon_target *t, *next; @@ -2246,29 +1205,6 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx) mutex_unlock(&ctx->kdamond_lock); } -static void damon_sysfs_schemes_update_stats( - struct damon_sysfs_schemes *sysfs_schemes, - struct damon_ctx *ctx) -{ - struct damos *scheme; - int schemes_idx = 0; - - damon_for_each_scheme(scheme, ctx) { - struct damon_sysfs_stats *sysfs_stats; - - /* user could have removed the scheme sysfs dir */ - if (schemes_idx >= sysfs_schemes->nr) - break; - - sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; - sysfs_stats->nr_tried = scheme->stat.nr_tried; - sysfs_stats->sz_tried = scheme->stat.sz_tried; - sysfs_stats->nr_applied = scheme->stat.nr_applied; - sysfs_stats->sz_applied = scheme->stat.sz_applied; - sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; - } -} - /* * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files. * @kdamond: The kobject wrapper that associated to the kdamond thread. From 7ae2c17f53d5054d1fe5c1a103ad46068034617d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:42 +0000 Subject: [PATCH 2964/4122] mm/damon/modules: deduplicate init steps for DAMON context setup DAMON_RECLAIM and DAMON_LRU_SORT has duplicated code for DAMON context and target initializations. Deduplicate the part by implementing a function for the initialization in 'modules-common.c' and using it. Link: https://lkml.kernel.org/r/20221026225943.100429-12-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/Makefile | 4 ++-- mm/damon/lru_sort.c | 17 +++------------- mm/damon/modules-common.c | 42 +++++++++++++++++++++++++++++++++++++++ mm/damon/modules-common.h | 3 +++ mm/damon/reclaim.c | 17 +++------------- 5 files changed, 53 insertions(+), 30 deletions(-) create mode 100644 mm/damon/modules-common.c diff --git a/mm/damon/Makefile b/mm/damon/Makefile index 1e86f5253d7f..f7add3f4aa79 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o -obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o -obj-$(CONFIG_DAMON_LRU_SORT) += lru_sort.o +obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o +obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index efbc2bda8b9c..a1896c5acfe9 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -314,25 +314,14 @@ static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c) static int __init damon_lru_sort_init(void) { - ctx = damon_new_ctx(); - if (!ctx) - return -ENOMEM; + int err = damon_modules_new_paddr_ctx_target(&ctx, &target); - if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { - damon_destroy_ctx(ctx); - return -EINVAL; - } + if (err) + return err; ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check; ctx->callback.after_aggregation = damon_lru_sort_after_aggregation; - target = damon_new_target(); - if (!target) { - damon_destroy_ctx(ctx); - return -ENOMEM; - } - damon_add_target(ctx, target); - schedule_delayed_work(&damon_lru_sort_timer, 0); damon_lru_sort_initialized = true; diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c new file mode 100644 index 000000000000..b2381a8466ec --- /dev/null +++ b/mm/damon/modules-common.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for DAMON Modules + * + * Author: SeongJae Park + */ + +#include + +#include "modules-common.h" + +/* + * Allocate, set, and return a DAMON context for the physical address space. + * @ctxp: Pointer to save the point to the newly created context + * @targetp: Pointer to save the point to the newly created target + */ +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp) +{ + struct damon_ctx *ctx; + struct damon_target *target; + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + *ctxp = ctx; + *targetp = target; + return 0; +} diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h index 5a4921851d32..f49cdb417005 100644 --- a/mm/damon/modules-common.h +++ b/mm/damon/modules-common.h @@ -44,3 +44,6 @@ 0400); \ module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \ 0400); + +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca00f..3173f373435c 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -256,25 +256,14 @@ static int damon_reclaim_after_wmarks_check(struct damon_ctx *c) static int __init damon_reclaim_init(void) { - ctx = damon_new_ctx(); - if (!ctx) - return -ENOMEM; + int err = damon_modules_new_paddr_ctx_target(&ctx, &target); - if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { - damon_destroy_ctx(ctx); - return -EINVAL; - } + if (err) + return err; ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; ctx->callback.after_aggregation = damon_reclaim_after_aggregation; - target = damon_new_target(); - if (!target) { - damon_destroy_ctx(ctx); - return -ENOMEM; - } - damon_add_target(ctx, target); - schedule_delayed_work(&damon_reclaim_timer, 0); damon_reclaim_initialized = true; From b0d3dbd1b98660ec2154fccbd21c13916c967c05 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 26 Oct 2022 22:59:43 +0000 Subject: [PATCH 2965/4122] mm/damon/{reclaim,lru_sort}: remove unnecessarily included headers Some headers that 'reclaim.c' and 'lru_sort.c' are including are unnecessary now owing to previous cleanups and refactorings. Remove those. Link: https://lkml.kernel.org/r/20221026225943.100429-13-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 2 -- mm/damon/reclaim.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index a1896c5acfe9..5c60163e556c 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -8,9 +8,7 @@ #define pr_fmt(fmt) "damon-lru-sort: " fmt #include -#include #include -#include #include #include "modules-common.h" diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 3173f373435c..e14eb30c01f4 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -8,9 +8,7 @@ #define pr_fmt(fmt) "damon-reclaim: " fmt #include -#include #include -#include #include #include "modules-common.h" From 04e98764befa371836a78b2b489e8b931a3a9e9a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Oct 2022 17:36:47 +0000 Subject: [PATCH 2966/4122] mm/damon/reclaim: enable and disable synchronously Patch series "mm/damon/reclaim,lru_sort: enable/disable synchronously". Writing a value to DAMON_RECLAIM and DAMON_LRU_SORT's 'enabled' parameters turns on or off DAMON in an ansychronous way. This means the parameter cannot be used to read the current status of them. 'kdamond_pid' parameter should be used instead for the purpose. The documentation is easy to be read as it works in a synchronous way, so it is a little bit confusing. It also makes the user space tooling dirty. There's no real reason to have the asynchronous behavior, though. Simply make the parameter works synchronously, rather than updating the document. The first and second patches changes the behavior of the 'enabled' parameter for DAMON_RECLAIM and adds a selftest for the changed behavior, respectively. Following two patches make the same changes for DAMON_LRU_SORT. This patch (of 4): Writing a value to DAMON_RECLAIM's 'enabled' parameter turns on or off DAMON in an ansychronous way. This means the parameter cannot be used to read the current status of DAMON_RECLAIM. 'kdamond_pid' parameter should be used instead for the purpose. The documentation is easy to be read as it works in a synchronous way, so it is a little bit confusing. It also makes the user space tooling dirty. There's no real reason to have the asynchronous behavior, though. Simply make the parameter works synchronously, rather than updating the document. Link: https://lkml.kernel.org/r/20221025173650.90624-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221025173650.90624-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/reclaim.c | 53 ++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index e14eb30c01f4..e57604bec06d 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -9,7 +9,6 @@ #include #include -#include #include "modules-common.h" @@ -181,38 +180,31 @@ static int damon_reclaim_turn(bool on) return 0; } -static struct delayed_work damon_reclaim_timer; -static void damon_reclaim_timer_fn(struct work_struct *work) -{ - static bool last_enabled; - bool now_enabled; - - now_enabled = enabled; - if (last_enabled != now_enabled) { - if (!damon_reclaim_turn(now_enabled)) - last_enabled = now_enabled; - else - enabled = last_enabled; - } -} -static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); - -static bool damon_reclaim_initialized; - static int damon_reclaim_enabled_store(const char *val, const struct kernel_param *kp) { - int rc = param_set_bool(val, kp); + bool is_enabled = enabled; + bool enable; + int err; - if (rc < 0) - return rc; + err = strtobool(val, &enable); + if (err) + return err; - /* system_wq might not initialized yet */ - if (!damon_reclaim_initialized) - return rc; + if (is_enabled == enable) + return 0; - schedule_delayed_work(&damon_reclaim_timer, 0); - return 0; + /* Called before init function. The function will handle this. */ + if (!ctx) + goto set_param_out; + + err = damon_reclaim_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; } static const struct kernel_param_ops enabled_param_ops = { @@ -262,10 +254,11 @@ static int __init damon_reclaim_init(void) ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; ctx->callback.after_aggregation = damon_reclaim_after_aggregation; - schedule_delayed_work(&damon_reclaim_timer, 0); + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_reclaim_turn(true); - damon_reclaim_initialized = true; - return 0; + return err; } module_init(damon_reclaim_init); From 4cc0ee7787d7dc595752a8de2e073efa68f7c965 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Oct 2022 17:36:48 +0000 Subject: [PATCH 2967/4122] selftests/damon: add tests for DAMON_RECLAIM's enabled parameter Add simple test cases for DAMON_RECLAIM's 'enabled' parameter. Those tests are focusing on the synchronous behavior of DAMON_RECLAIM enabling and disabling. Link: https://lkml.kernel.org/r/20221025173650.90624-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + tools/testing/selftests/damon/reclaim.sh | 42 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tools/testing/selftests/damon/reclaim.sh diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index a1fa2eff8192..dbbf18cb3e6b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -8,5 +8,6 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += sysfs.sh +TEST_PROGS += reclaim.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh new file mode 100644 index 000000000000..78dbc2334cbe --- /dev/null +++ b/tools/testing/selftests/damon/reclaim.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled" +if [ ! -f "$damon_reclaim_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_RECLAIM not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_reclaim_enabled" + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_reclaim_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi From 7a034fbba3361e94956431d17660d7c5674d13c3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Oct 2022 17:36:49 +0000 Subject: [PATCH 2968/4122] mm/damon/lru_sort: enable and disable synchronously Writing a value to DAMON_RECLAIM's 'enabled' parameter turns on or off DAMON in an ansychronous way. This means the parameter cannot be used to read the current status of DAMON_RECLAIM. 'kdamond_pid' parameter should be used instead for the purpose. The documentation is easy to be read as it works in a synchronous way, so it is a little bit confusing. It also makes the user space tooling dirty. There's no real reason to have the asynchronous behavior, though. Simply make the parameter works synchronously, rather than updating the document. Link: https://lkml.kernel.org/r/20221025173650.90624-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 51 +++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 5c60163e556c..2a532e3983df 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -9,7 +9,6 @@ #include #include -#include #include "modules-common.h" @@ -235,38 +234,31 @@ static int damon_lru_sort_turn(bool on) return 0; } -static struct delayed_work damon_lru_sort_timer; -static void damon_lru_sort_timer_fn(struct work_struct *work) -{ - static bool last_enabled; - bool now_enabled; - - now_enabled = enabled; - if (last_enabled != now_enabled) { - if (!damon_lru_sort_turn(now_enabled)) - last_enabled = now_enabled; - else - enabled = last_enabled; - } -} -static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn); - -static bool damon_lru_sort_initialized; - static int damon_lru_sort_enabled_store(const char *val, const struct kernel_param *kp) { - int rc = param_set_bool(val, kp); + bool is_enabled = enabled; + bool enable; + int err; - if (rc < 0) - return rc; + err = strtobool(val, &enable); + if (err) + return err; - if (!damon_lru_sort_initialized) - return rc; + if (is_enabled == enable) + return 0; - schedule_delayed_work(&damon_lru_sort_timer, 0); + /* Called before init function. The function will handle this. */ + if (!ctx) + goto set_param_out; - return 0; + err = damon_lru_sort_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; } static const struct kernel_param_ops enabled_param_ops = { @@ -320,10 +312,11 @@ static int __init damon_lru_sort_init(void) ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check; ctx->callback.after_aggregation = damon_lru_sort_after_aggregation; - schedule_delayed_work(&damon_lru_sort_timer, 0); + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_lru_sort_turn(true); - damon_lru_sort_initialized = true; - return 0; + return err; } module_init(damon_lru_sort_init); From 9cd6ffa60256e931503d347006049b8bef508203 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Oct 2022 17:36:50 +0000 Subject: [PATCH 2969/4122] selftests/damon: add tests for DAMON_LRU_SORT's enabled parameter Add simple test cases for DAMON_LRU_SORT's 'enabled' parameter. Those tests are focusing on the synchronous behavior of DAMON_RECLAIM enabling and disabling. Link: https://lkml.kernel.org/r/20221025173650.90624-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 2 +- tools/testing/selftests/damon/lru_sort.sh | 41 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/damon/lru_sort.sh diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index dbbf18cb3e6b..af490acc5348 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -8,6 +8,6 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += sysfs.sh -TEST_PROGS += reclaim.sh +TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh new file mode 100644 index 000000000000..61b80197c896 --- /dev/null +++ b/tools/testing/selftests/damon/lru_sort.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled" +if [ ! -f "$damon_lru_sort_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_LRU_SORT not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi From eb3992e833d3a17f9b0a3e0371d0b1d3d566f740 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Sep 2022 23:31:32 +0000 Subject: [PATCH 2970/4122] KVM: VMX: Resume guest immediately when injecting #GP on ECREATE Resume the guest immediately when injecting a #GP on ECREATE due to an invalid enclave size, i.e. don't attempt ECREATE in the host. The #GP is a terminal fault, e.g. skipping the instruction if ECREATE is successful would result in KVM injecting #GP on the instruction following ECREATE. Fixes: 70210c044b4e ("KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions") Cc: stable@vger.kernel.org Cc: Kai Huang Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20220930233132.1723330-1-seanjc@google.com --- arch/x86/kvm/vmx/sgx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 8f95c7c01433..b12da2a6dec9 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -182,8 +182,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, /* Enforce CPUID restriction on max enclave size. */ max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : sgx_12_0->edx; - if (size >= BIT_ULL(max_size_log2)) + if (size >= BIT_ULL(max_size_log2)) { kvm_inject_gp(vcpu, 0); + return 1; + } /* * sgx_virt_ecreate() returns: From f1a7941243c102a44e8847e3b94ff4ff3ec56f25 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 24 Oct 2022 05:28:41 +0000 Subject: [PATCH 2971/4122] mm: convert mm's rss stats into percpu_counter Currently mm_struct maintains rss_stats which are updated on page fault and the unmapping codepaths. For page fault codepath the updates are cached per thread with the batch of TASK_RSS_EVENTS_THRESH which is 64. The reason for caching is performance for multithreaded applications otherwise the rss_stats updates may become hotspot for such applications. However this optimization comes with the cost of error margin in the rss stats. The rss_stats for applications with large number of threads can be very skewed. At worst the error margin is (nr_threads * 64) and we have a lot of applications with 100s of threads, so the error margin can be very high. Internally we had to reduce TASK_RSS_EVENTS_THRESH to 32. Recently we started seeing the unbounded errors for rss_stats for specific applications which use TCP rx0cp. It seems like vm_insert_pages() codepath does not sync rss_stats at all. This patch converts the rss_stats into percpu_counter to convert the error margin from (nr_threads * 64) to approximately (nr_cpus ^ 2). However this conversion enable us to get the accurate stats for situations where accuracy is more important than the cpu cost. This patch does not make such tradeoffs - we can just use percpu_counter_add_local() for the updates and percpu_counter_sum() (or percpu_counter_sync() + percpu_counter_read) for the readers. At the moment the readers are either procfs interface, oom_killer and memory reclaim which I think are not performance critical and should be ok with slow read. However I think we can make that change in a separate patch. Link: https://lkml.kernel.org/r/20221024052841.3291983-1-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Marek Szyprowski Signed-off-by: Andrew Morton --- include/linux/mm.h | 26 ++++-------- include/linux/mm_types.h | 7 +--- include/linux/mm_types_task.h | 13 ------ include/linux/percpu_counter.h | 1 - include/linux/sched.h | 3 -- include/trace/events/kmem.h | 8 ++-- kernel/fork.c | 16 +++++++- mm/memory.c | 73 +++++----------------------------- 8 files changed, 40 insertions(+), 107 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f919befc8fac..0cb4e196d60b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2052,40 +2052,30 @@ static inline bool get_user_page_fast_only(unsigned long addr, */ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) { - long val = atomic_long_read(&mm->rss_stat.count[member]); - -#ifdef SPLIT_RSS_COUNTING - /* - * counter is updated in asynchronous manner and may go to minus. - * But it's never be expected number for users. - */ - if (val < 0) - val = 0; -#endif - return (unsigned long)val; + return percpu_counter_read_positive(&mm->rss_stat[member]); } -void mm_trace_rss_stat(struct mm_struct *mm, int member, long count); +void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); + percpu_counter_add(&mm->rss_stat[member], value); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - long count = atomic_long_inc_return(&mm->rss_stat.count[member]); + percpu_counter_inc(&mm->rss_stat[member]); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - long count = atomic_long_dec_return(&mm->rss_stat.count[member]); + percpu_counter_dec(&mm->rss_stat[member]); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } /* Optimized variant when page is already known not to be PageAnon */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2d5b1575ffe0..e86861ff5bbd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -626,11 +627,7 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - /* - * Special counters, in some configurations protected by the - * page_table_lock, in other configurations by being atomic. - */ - struct mm_rss_stat rss_stat; + struct percpu_counter rss_stat[NR_MM_COUNTERS]; struct linux_binfmt *binfmt; diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 0bb4b6da9993..5414b5c6a103 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -36,19 +36,6 @@ enum { NR_MM_COUNTERS }; -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) -#define SPLIT_RSS_COUNTING -/* per-thread cached information, */ -struct task_rss_stat { - int events; /* for synchronization threshold */ - int count[NR_MM_COUNTERS]; -}; -#endif /* USE_SPLIT_PTE_PTLOCKS */ - -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; - struct page_frag { struct page *page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 8ed5fba6d156..bde6c4c1f405 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -13,7 +13,6 @@ #include #include #include -#include /* percpu_counter batch for local add or sub */ #define PERCPU_COUNTER_LOCAL_BATCH INT_MAX diff --git a/include/linux/sched.h b/include/linux/sched.h index ffb6eb55cd13..079d299fa465 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -870,9 +870,6 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; -#ifdef SPLIT_RSS_COUNTING - struct task_rss_stat rss_stat; -#endif int exit_state; int exit_code; int exit_signal; diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 243073cfc29d..58688768ef0f 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -346,10 +346,9 @@ TRACE_MM_PAGES TRACE_EVENT(rss_stat, TP_PROTO(struct mm_struct *mm, - int member, - long count), + int member), - TP_ARGS(mm, member, count), + TP_ARGS(mm, member), TP_STRUCT__entry( __field(unsigned int, mm_id) @@ -362,7 +361,8 @@ TRACE_EVENT(rss_stat, __entry->mm_id = mm_ptr_to_hash(mm); __entry->curr = !!(current->mm == mm); __entry->member = member; - __entry->size = (count << PAGE_SHIFT); + __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member]) + << PAGE_SHIFT); ), TP_printk("mm_id=%u curr=%d type=%s size=%ldB", diff --git a/kernel/fork.c b/kernel/fork.c index 08969f5aa38d..0fef202434c3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -753,7 +753,7 @@ static void check_mm(struct mm_struct *mm) "Please make sure 'struct resident_page_types[]' is updated as well"); for (i = 0; i < NR_MM_COUNTERS; i++) { - long x = atomic_long_read(&mm->rss_stat.count[i]); + long x = percpu_counter_sum(&mm->rss_stat[i]); if (unlikely(x)) pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", @@ -779,6 +779,8 @@ static void check_mm(struct mm_struct *mm) */ void __mmdrop(struct mm_struct *mm) { + int i; + BUG_ON(mm == &init_mm); WARN_ON_ONCE(mm == current->mm); WARN_ON_ONCE(mm == current->active_mm); @@ -788,6 +790,9 @@ void __mmdrop(struct mm_struct *mm) check_mm(mm); put_user_ns(mm->user_ns); mm_pasid_drop(mm); + + for (i = 0; i < NR_MM_COUNTERS; i++) + percpu_counter_destroy(&mm->rss_stat[i]); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1107,6 +1112,8 @@ static void mm_init_uprobes_state(struct mm_struct *mm) static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, struct user_namespace *user_ns) { + int i; + mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); atomic_set(&mm->mm_users, 1); @@ -1148,10 +1155,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, if (init_new_context(p, mm)) goto fail_nocontext; + for (i = 0; i < NR_MM_COUNTERS; i++) + if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT)) + goto fail_pcpu; + mm->user_ns = get_user_ns(user_ns); lru_gen_init_mm(mm); return mm; +fail_pcpu: + while (i > 0) + percpu_counter_destroy(&mm->rss_stat[--i]); fail_nocontext: mm_free_pgd(mm); fail_nopgd: diff --git a/mm/memory.c b/mm/memory.c index 7826143ec9cd..e0555ddd71b5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -162,58 +162,11 @@ static int __init init_zero_pfn(void) } early_initcall(init_zero_pfn); -void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) +void mm_trace_rss_stat(struct mm_struct *mm, int member) { - trace_rss_stat(mm, member, count); + trace_rss_stat(mm, member); } -#if defined(SPLIT_RSS_COUNTING) - -void sync_mm_rss(struct mm_struct *mm) -{ - int i; - - for (i = 0; i < NR_MM_COUNTERS; i++) { - if (current->rss_stat.count[i]) { - add_mm_counter(mm, i, current->rss_stat.count[i]); - current->rss_stat.count[i] = 0; - } - } - current->rss_stat.events = 0; -} - -static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) -{ - struct task_struct *task = current; - - if (likely(task->mm == mm)) - task->rss_stat.count[member] += val; - else - add_mm_counter(mm, member, val); -} -#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) -#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) - -/* sync counter once per 64 page faults */ -#define TASK_RSS_EVENTS_THRESH (64) -static void check_sync_rss_stat(struct task_struct *task) -{ - if (unlikely(task != current)) - return; - if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) - sync_mm_rss(task->mm); -} -#else /* SPLIT_RSS_COUNTING */ - -#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) -#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) - -static void check_sync_rss_stat(struct task_struct *task) -{ -} - -#endif /* SPLIT_RSS_COUNTING */ - /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. @@ -1857,7 +1810,7 @@ static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte, return -EBUSY; /* Ok, finally just insert the thing.. */ get_page(page); - inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); + inc_mm_counter(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, vma, false); set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot)); return 0; @@ -3153,12 +3106,11 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { - dec_mm_counter_fast(mm, - mm_counter_file(old_page)); - inc_mm_counter_fast(mm, MM_ANONPAGES); + dec_mm_counter(mm, mm_counter_file(old_page)); + inc_mm_counter(mm, MM_ANONPAGES); } } else { - inc_mm_counter_fast(mm, MM_ANONPAGES); + inc_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); @@ -3965,8 +3917,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (should_try_to_free_swap(folio, vma, vmf->flags)) folio_free_swap(folio); - inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); + inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); /* @@ -4146,7 +4098,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) return handle_userfault(vmf, VM_UFFD_MISSING); } - inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + inc_mm_counter(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address); lru_cache_add_inactive_or_unevictable(page, vma); setpte: @@ -4336,11 +4288,11 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) entry = pte_mkuffd_wp(pte_wrprotect(entry)); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { - inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + inc_mm_counter(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, addr); lru_cache_add_inactive_or_unevictable(page, vma); } else { - inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); + inc_mm_counter(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, vma, false); } set_pte_at(vma->vm_mm, addr, vmf->pte, entry); @@ -5192,9 +5144,6 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, count_vm_event(PGFAULT); count_memcg_event_mm(vma->vm_mm, PGFAULT); - /* do counter updates before entering really critical section. */ - check_sync_rss_stat(current); - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) From f689054aace2ff13af2e9a44a74fbba650ca31ba Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 9 Nov 2022 01:20:11 +0000 Subject: [PATCH 2972/4122] percpu_counter: add percpu_counter_sum_all interface The percpu_counter is used for scenarios where performance is more important than the accuracy. For percpu_counter users, who want more accurate information in their slowpath, percpu_counter_sum is provided which traverses all the online CPUs to accumulate the data. The reason it only needs to traverse online CPUs is because percpu_counter does implement CPU offline callback which syncs the local data of the offlined CPU. However there is a small race window between the online CPUs traversal of percpu_counter_sum and the CPU offline callback. The offline callback has to traverse all the percpu_counters on the system to flush the CPU local data which can be a lot. During that time, the CPU which is going offline has already been published as offline to all the readers. So, as the offline callback is running, percpu_counter_sum can be called for one counter which has some state on the CPU going offline. Since percpu_counter_sum only traverses online CPUs, it will skip that specific CPU and the offline callback might not have flushed the state for that specific percpu_counter on that offlined CPU. Normally this is not an issue because percpu_counter users can deal with some inaccuracy for small time window. However a new user i.e. mm_struct on the cleanup path wants to check the exact state of the percpu_counter through check_mm(). For such users, this patch introduces percpu_counter_sum_all() which traverses all possible CPUs and it is used in fork.c:check_mm() to avoid the potential race. This issue is exposed by the later patch "mm: convert mm's rss stats into percpu_counter". Link: https://lkml.kernel.org/r/20221109012011.881058-1-shakeelb@google.com Signed-off-by: Shakeel Butt Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Andrew Morton --- include/linux/percpu_counter.h | 6 ++++++ kernel/fork.c | 5 +++++ lib/percpu_counter.c | 29 +++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index bde6c4c1f405..a3aae8d57a42 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -45,6 +45,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -193,6 +194,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } +static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) +{ + return percpu_counter_read(fbc); +} + static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; diff --git a/kernel/fork.c b/kernel/fork.c index 0fef202434c3..1be4c4ab7f3e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -755,6 +755,11 @@ static void check_mm(struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { long x = percpu_counter_sum(&mm->rss_stat[i]); + if (likely(!x)) + continue; + + /* Making sure this is not due to race with CPU offlining. */ + x = percpu_counter_sum_all(&mm->rss_stat[i]); if (unlikely(x)) pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", mm, resident_page_types[i], x); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index ed610b75dc32..42f729c8e56c 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -117,11 +117,8 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -/* - * Add up all the per-cpu counts, return the result. This is a more accurate - * but much slower version of percpu_counter_read_positive() - */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, + const struct cpumask *cpu_mask) { s64 ret; int cpu; @@ -129,15 +126,35 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; - for_each_online_cpu(cpu) { + for_each_cpu(cpu, cpu_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +s64 __percpu_counter_sum(struct percpu_counter *fbc) +{ + return __percpu_counter_sum_mask(fbc, cpu_online_mask); +} EXPORT_SYMBOL(__percpu_counter_sum); +/* + * This is slower version of percpu_counter_sum as it traverses all possible + * cpus. Use this only in the cases where accurate data is needed in the + * presense of CPUs getting offlined. + */ +s64 percpu_counter_sum_all(struct percpu_counter *fbc) +{ + return __percpu_counter_sum_mask(fbc, cpu_possible_mask); +} +EXPORT_SYMBOL(percpu_counter_sum_all); + int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { From a873dfe1032a132bf89f9e19a6ac44f5a0b78754 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 21 Oct 2022 13:01:19 -0700 Subject: [PATCH 2973/4122] mm, hwpoison: try to recover from copy-on write faults Patch series "Copy-on-write poison recovery", v3. Part 1 deals with the process that triggered the copy on write fault with a store to a shared read-only page. That process is send a SIGBUS with the usual machine check decoration to specify the virtual address of the lost page, together with the scope. Part 2 sets up to asynchronously take the page with the uncorrected error offline to prevent additional machine check faults. H/t to Miaohe Lin and Shuai Xue for pointing me to the existing function to queue a call to memory_failure(). On x86 there is some duplicate reporting (because the error is also signalled by the memory controller as well as by the core that triggered the machine check). Console logs look like this: This patch (of 2): If the kernel is copying a page as the result of a copy-on-write fault and runs into an uncorrectable error, Linux will crash because it does not have recovery code for this case where poison is consumed by the kernel. It is easy to set up a test case. Just inject an error into a private page, fork(2), and have the child process write to the page. I wrapped that neatly into a test at: git://git.kernel.org/pub/scm/linux/kernel/git/aegl/ras-tools.git just enable ACPI error injection and run: # ./einj_mem-uc -f copy-on-write Add a new copy_user_highpage_mc() function that uses copy_mc_to_kernel() on architectures where that is available (currently x86 and powerpc). When an error is detected during the page copy, return VM_FAULT_HWPOISON to caller of wp_page_copy(). This propagates up the call stack. Both x86 and powerpc have code in their fault handler to deal with this code by sending a SIGBUS to the application. Note that this patch avoids a system crash and signals the process that triggered the copy-on-write action. It does not take any action for the memory error that is still in the shared page. To handle that a call to memory_failure() is needed. But this cannot be done from wp_page_copy() because it holds mmap_lock(). Perhaps the architecture fault handlers can deal with this loose end in a subsequent patch? On Intel/x86 this loose end will often be handled automatically because the memory controller provides an additional notification of the h/w poison in memory, the handler for this will call memory_failure(). This isn't a 100% solution. If there are multiple errors, not all may be logged in this way. [tony.luck@intel.com: add call to kmsan_unpoison_memory(), per Miaohe Lin] Link: https://lkml.kernel.org/r/20221031201029.102123-2-tony.luck@intel.com Link: https://lkml.kernel.org/r/20221021200120.175753-1-tony.luck@intel.com Link: https://lkml.kernel.org/r/20221021200120.175753-2-tony.luck@intel.com Signed-off-by: Tony Luck Reviewed-by: Dan Williams Reviewed-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Reviewed-by: Alexander Potapenko Tested-by: Shuai Xue Cc: Christophe Leroy Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Nicholas Piggin Signed-off-by: Andrew Morton --- include/linux/highmem.h | 26 ++++++++++++++++++++++++++ mm/memory.c | 30 ++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e9912da5441b..44242268f53b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -319,6 +319,32 @@ static inline void copy_user_highpage(struct page *to, struct page *from, #endif +#ifdef copy_mc_to_kernel +static inline int copy_mc_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + unsigned long ret; + char *vfrom, *vto; + + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); + if (!ret) + kmsan_unpoison_memory(page_address(to), PAGE_SIZE); + kunmap_local(vto); + kunmap_local(vfrom); + + return ret; +} +#else +static inline int copy_mc_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + copy_user_highpage(to, from, vaddr, vma); + return 0; +} +#endif + #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) diff --git a/mm/memory.c b/mm/memory.c index e0555ddd71b5..13b1fe661d86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2798,10 +2798,16 @@ static inline int pte_unmap_same(struct vm_fault *vmf) return same; } -static inline bool __wp_page_copy_user(struct page *dst, struct page *src, - struct vm_fault *vmf) +/* + * Return: + * 0: copied succeeded + * -EHWPOISON: copy failed due to hwpoison in source page + * -EAGAIN: copied failed (some other reason) + */ +static inline int __wp_page_copy_user(struct page *dst, struct page *src, + struct vm_fault *vmf) { - bool ret; + int ret; void *kaddr; void __user *uaddr; bool locked = false; @@ -2810,8 +2816,9 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src, unsigned long addr = vmf->address; if (likely(src)) { - copy_user_highpage(dst, src, addr, vma); - return true; + if (copy_mc_user_highpage(dst, src, addr, vma)) + return -EHWPOISON; + return 0; } /* @@ -2838,7 +2845,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src, * and update local tlb only */ update_mmu_tlb(vma, addr, vmf->pte); - ret = false; + ret = -EAGAIN; goto pte_unlock; } @@ -2863,7 +2870,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src, if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* The PTE changed under us, update local tlb */ update_mmu_tlb(vma, addr, vmf->pte); - ret = false; + ret = -EAGAIN; goto pte_unlock; } @@ -2882,7 +2889,7 @@ warn: } } - ret = true; + ret = 0; pte_unlock: if (locked) @@ -3054,6 +3061,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) pte_t entry; int page_copied = 0; struct mmu_notifier_range range; + int ret; delayacct_wpcopy_start(); @@ -3071,19 +3079,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) if (!new_page) goto oom; - if (!__wp_page_copy_user(new_page, old_page, vmf)) { + ret = __wp_page_copy_user(new_page, old_page, vmf); + if (ret) { /* * COW failed, if the fault was solved by other, * it's fine. If not, userspace would re-fault on * the same address and we will handle the fault * from the second attempt. + * The -EHWPOISON case will not be retried. */ put_page(new_page); if (old_page) put_page(old_page); delayacct_wpcopy_end(); - return 0; + return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; } kmsan_copy_page_meta(new_page, old_page); } From d302c2398ba269e788a4f37ae57c07a7fcabaa42 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 21 Oct 2022 13:01:20 -0700 Subject: [PATCH 2974/4122] mm, hwpoison: when copy-on-write hits poison, take page offline Cannot call memory_failure() directly from the fault handler because mmap_lock (and others) are held. It is important, but not urgent, to mark the source page as h/w poisoned and unmap it from other tasks. Use memory_failure_queue() to request a call to memory_failure() for the page with the error. Also provide a stub version for CONFIG_MEMORY_FAILURE=n Link: https://lkml.kernel.org/r/20221021200120.175753-3-tony.luck@intel.com Signed-off-by: Tony Luck Reviewed-by: Miaohe Lin Cc: Christophe Leroy Cc: Dan Williams Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Naoya Horiguchi Cc: Nicholas Piggin Cc: Shuai Xue Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++++- mm/memory.c | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0cb4e196d60b..3950ef45b9a9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3280,7 +3280,6 @@ enum mf_flags { int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); extern int memory_failure(unsigned long pfn, int flags); -extern void memory_failure_queue(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; @@ -3289,11 +3288,16 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE +extern void memory_failure_queue(unsigned long pfn, int flags); extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); #else +static inline void memory_failure_queue(unsigned long pfn, int flags) +{ +} + static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { diff --git a/mm/memory.c b/mm/memory.c index 13b1fe661d86..659620b6770f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2816,8 +2816,10 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src, unsigned long addr = vmf->address; if (likely(src)) { - if (copy_mc_user_highpage(dst, src, addr, vma)) + if (copy_mc_user_highpage(dst, src, addr, vma)) { + memory_failure_queue(page_to_pfn(src), 0); return -EHWPOISON; + } return 0; } From 1cc53a047b0b9389c2d8f4a69499c6135572f23e Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 27 Oct 2022 11:36:41 +0800 Subject: [PATCH 2975/4122] mm: hugetlb_vmemmap: remove redundant list_del() The ->lru field will be assigned to a new value in __free_page(). So it is unnecessary to delete it from the @list. Just remove it to simplify the code. Link: https://lkml.kernel.org/r/20221027033641.66709-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4962dd1ba4a6..7898c2c75e35 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -232,10 +232,8 @@ static void free_vmemmap_page_list(struct list_head *list) { struct page *page, *next; - list_for_each_entry_safe(page, next, list, lru) { - list_del(&page->lru); + list_for_each_entry_safe(page, next, list, lru) free_vmemmap_page(page); - } } static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, From bd4149290c3edc09454a8a7e7ef3a5544cb9eed6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 24 Oct 2022 17:46:18 +0000 Subject: [PATCH 2976/4122] Docs/admin-guide/mm/damon/usage: describe the rules of sysfs region directories Patch series "Docs/admin-buide/mm/damon/usage: minor fixes". DAMON usage document contains an unclear description and a wrong usage example. This patchset fixes the two minor problems. This patch (of 2): Target region directories of DAMON sysfs interface should contain no overlap and sorted by the address, but not clearly documented. Actually, a user had an issue[1] due to the poor documentation. Add clear description of it on the usage document. [1] https://lore.kernel.org/damon/CAEZ6=UNUcH2BvJj++OrT=XQLdkidU79wmCO=tantSOB36pPNTg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20221024174619.15600-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221024174619.15600-2-sj@kernel.org Signed-off-by: SeongJae Park Reported-by: Vinicius Petrucci Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index b47b0cbbd491..89d9a4f75a29 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -235,6 +235,9 @@ In each region directory, you will find two files (``start`` and ``end``). You can set and get the start and end addresses of the initial monitoring target region by writing to and reading from the files, respectively. +Each region should not overlap with others. ``end`` of directory ``N`` should +be equal or smaller than ``start`` of directory ``N+1``. + contexts//schemes/ --------------------- From 1b0166387586cae69d7da783f0a4521864534aad Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 24 Oct 2022 17:46:19 +0000 Subject: [PATCH 2977/4122] Docs/admin-guide/mm/damon/usage: fix wrong usage example of init_regions file DAMON debugfs interface assumes the users will write all inputs at once. However, redirecting a string of multiple lines sometimes end up writing line by line. Therefore, the example usage of 'init_regions' file, which writes input as a string of multiple lines can fail. Fix it to use a single line string instead. Also update the description of the usage to not assume users will write inputs in multiple lines. Link: https://lkml.kernel.org/r/20221024174619.15600-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Vinicius Petrucci Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 89d9a4f75a29..c17e02e1e426 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -468,8 +468,9 @@ regions in case of physical memory monitoring. Therefore, users should set the monitoring target regions by themselves. In such cases, users can explicitly set the initial monitoring target regions -as they want, by writing proper values to the ``init_regions`` file. Each line -of the input should represent one region in below form.:: +as they want, by writing proper values to the ``init_regions`` file. The input +should be a sequence of three integers separated by white spaces that represent +one region in below form.:: @@ -484,9 +485,9 @@ ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one # cd /damon # cat target_ids 42 4242 - # echo "0 1 100 - 0 100 200 - 1 20 40 + # echo "0 1 100 \ + 0 100 200 \ + 1 20 40 \ 1 50 100" > init_regions Note that this sets the initial monitoring target regions only. In case of From 57e9cc50f4dd926d6c38751799d25cad89fb2bd9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 26 Oct 2022 14:01:33 -0400 Subject: [PATCH 2978/4122] mm: vmscan: split khugepaged stats from direct reclaim stats Direct reclaim stats are useful for identifying a potential source for application latency, as well as spotting issues with kswapd. However, khugepaged currently distorts the picture: as a kernel thread it doesn't impose allocation latencies on userspace, and it explicitly opts out of kswapd reclaim. Its activity showing up in the direct reclaim stats is misleading. Counting it as kswapd reclaim could also cause confusion when trying to understand actual kswapd behavior. Break out khugepaged from the direct reclaim counters into new pgsteal_khugepaged, pgdemote_khugepaged, pgscan_khugepaged counters. Test with a huge executable (CONFIG_READ_ONLY_THP_FOR_FS): pgsteal_kswapd 1342185 pgsteal_direct 0 pgsteal_khugepaged 3623 pgscan_kswapd 1345025 pgscan_direct 0 pgscan_khugepaged 3623 Link: https://lkml.kernel.org/r/20221026180133.377671-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Eric Bergen Cc: Matthew Wilcox (Oracle) Cc: Yang Shi Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 6 +++++ include/linux/khugepaged.h | 6 +++++ include/linux/vm_event_item.h | 3 +++ mm/khugepaged.c | 5 ++++ mm/memcontrol.c | 8 +++++-- mm/vmscan.c | 32 ++++++++++++++++++------- mm/vmstat.c | 3 +++ 7 files changed, 53 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index dc254a3cb956..74cec76be9f2 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1488,12 +1488,18 @@ PAGE_SIZE multiple when read back. pgscan_direct (npn) Amount of scanned pages directly (in an inactive LRU list) + pgscan_khugepaged (npn) + Amount of scanned pages by khugepaged (in an inactive LRU list) + pgsteal_kswapd (npn) Amount of reclaimed pages by kswapd pgsteal_direct (npn) Amount of reclaimed pages directly + pgsteal_khugepaged (npn) + Amount of reclaimed pages by khugepaged + pgfault (npn) Total number of page faults incurred diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 70162d707caf..f68865e19b0b 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -15,6 +15,7 @@ extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); +extern bool current_is_khugepaged(void); #ifdef CONFIG_SHMEM extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); @@ -57,6 +58,11 @@ static inline int collapse_pte_mapped_thp(struct mm_struct *mm, static inline void khugepaged_min_free_kbytes_update(void) { } + +static inline bool current_is_khugepaged(void) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_KHUGEPAGED_H */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 3518dba1e02f..7f5d1caf5890 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -40,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, + PGDEMOTE_KHUGEPAGED, PGSCAN_KSWAPD, PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, PGSCAN_DIRECT_THROTTLE, PGSCAN_ANON, PGSCAN_FILE, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3703a56571c1..9c111273bbf9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2577,6 +2577,11 @@ void khugepaged_min_free_kbytes_update(void) mutex_unlock(&khugepaged_mutex); } +bool current_is_khugepaged(void) +{ + return kthread_func(current) == khugepaged; +} + static int madvise_collapse_errno(enum scan_result r) { /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c95e2ed6e7fd..23750cec0036 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -661,8 +661,10 @@ static const unsigned int memcg_vm_event_stat[] = { PGPGOUT, PGSCAN_KSWAPD, PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, PGFAULT, PGMAJFAULT, PGREFILL, @@ -1574,10 +1576,12 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) /* Accumulated memory events */ seq_buf_printf(&s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + - memcg_events(memcg, PGSCAN_DIRECT)); + memcg_events(memcg, PGSCAN_DIRECT) + + memcg_events(memcg, PGSCAN_KHUGEPAGED)); seq_buf_printf(&s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + - memcg_events(memcg, PGSTEAL_DIRECT)); + memcg_events(memcg, PGSTEAL_DIRECT) + + memcg_events(memcg, PGSTEAL_KHUGEPAGED)); for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { if (memcg_vm_event_stat[i] == PGPGIN || diff --git a/mm/vmscan.c b/mm/vmscan.c index 55a5b5d66d68..d7c71be6417d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -1047,6 +1048,24 @@ void drop_slab(void) drop_slab_node(nid); } +static int reclaimer_offset(void) +{ + BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != + PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD); + BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != + PGSCAN_DIRECT - PGSCAN_KSWAPD); + BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != + PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD); + BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != + PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD); + + if (current_is_kswapd()) + return 0; + if (current_is_khugepaged()) + return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD; + return PGSTEAL_DIRECT - PGSTEAL_KSWAPD; +} + static inline int is_page_cache_freeable(struct folio *folio) { /* @@ -1599,10 +1618,7 @@ static unsigned int demote_folio_list(struct list_head *demote_folios, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); - if (current_is_kswapd()) - __count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded); - else - __count_vm_events(PGDEMOTE_DIRECT, nr_succeeded); + __count_vm_events(PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded); return nr_succeeded; } @@ -2475,7 +2491,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); - item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; + item = PGSCAN_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_scanned); __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); @@ -2492,7 +2508,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, move_folios_to_lru(lruvec, &folio_list); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); - item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; + item = PGSTEAL_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); @@ -4871,7 +4887,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, break; } - item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; + item = PGSCAN_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) { __count_vm_events(item, isolated); __count_vm_events(PGREFILL, sorted); @@ -5049,7 +5065,7 @@ retry: if (walk && walk->batched) reset_batch_size(lruvec, walk); - item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; + item = PGSTEAL_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, reclaimed); __count_memcg_events(memcg, item, reclaimed); diff --git a/mm/vmstat.c b/mm/vmstat.c index b2371d745e00..1ea6a5ce1c41 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1271,10 +1271,13 @@ const char * const vmstat_text[] = { "pgreuse", "pgsteal_kswapd", "pgsteal_direct", + "pgsteal_khugepaged", "pgdemote_kswapd", "pgdemote_direct", + "pgdemote_khugepaged", "pgscan_kswapd", "pgscan_direct", + "pgscan_khugepaged", "pgscan_direct_throttle", "pgscan_anon", "pgscan_file", From 6e7ba8b5e2380f941dda8a1025d70c5ce5b38982 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Fri, 28 Oct 2022 14:45:34 +0000 Subject: [PATCH 2979/4122] maple_tree: mte_set_full() and mte_clear_full() clang-analyzer clean up mte_set_full() and mte_clear_full() were incorrectly setting a pointer to a value without returning a result. Fix this by returning the modified pointer to be use as necessary. Also add a third function to return if the bit is set or not. Link: https://lore.kernel.org/lkml/20221026120029.12555-1-lukas.bulwahn@gmail.com/ Link: https://lkml.kernel.org/r/20221028144520.2776767-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Suggested-by: Lukas Bulwahn Suggested-by: Dan Carpenter Signed-off-by: Andrew Morton --- lib/maple_tree.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index df352f6ccc24..3fe1491d2bf9 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -323,14 +323,19 @@ static inline void *mte_safe_root(const struct maple_enode *node) return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); } -static inline void mte_set_full(const struct maple_enode *node) +static inline void *mte_set_full(const struct maple_enode *node) { - node = (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); + return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); } -static inline void mte_clear_full(const struct maple_enode *node) +static inline void *mte_clear_full(const struct maple_enode *node) { - node = (void *)((unsigned long)node | MAPLE_ENODE_NULL); + return (void *)((unsigned long)node | MAPLE_ENODE_NULL); +} + +static inline bool mte_has_null(const struct maple_enode *node) +{ + return (unsigned long)node & MAPLE_ENODE_NULL; } static inline bool ma_is_root(struct maple_node *node) From b2b23ba03cb9059d11b270cc280dcdfa6dbbdf53 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2022 08:53:01 -0700 Subject: [PATCH 2980/4122] mempool: do not use ksize() for poisoning Nothing appears to be using ksize() within the kmalloc-backed mempools except the mempool poisoning logic. Use the actual pool size instead of the ksize() to avoid needing any special handling of the memory as needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE. [vbabka@suse.cz: for slab mempools pool_data is not object size] Link: https://lkml.kernel.org/r/13c4bd6e-09d3-efce-43a5-5a99be8bc96b@suse.cz Link: https://lkml.kernel.org/r/20221028154823.you.615-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka Suggested-by: Vlastimil Babka Link: https://lore.kernel.org/lkml/f4fc52c4-7c18-1d76-0c7a-4058ea2486b9@suse.cz/ Acked-by: Vlastimil Babka Reviewed-by: Andrey Konovalov Cc: David Rientjes Cc: Marco Elver Cc: Vincenzo Frascino Reported-by: Anders Roxell Link: https://lore.kernel.org/all/20221031105514.GB69385@mutt/ Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/mempool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/mempool.c b/mm/mempool.c index 96488b13a1ef..734bcf5afbb7 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -57,8 +57,10 @@ static void __check_element(mempool_t *pool, void *element, size_t size) static void check_element(mempool_t *pool, void *element) { /* Mempools backed by slab allocator */ - if (pool->free == mempool_free_slab || pool->free == mempool_kfree) { - __check_element(pool, element, ksize(element)); + if (pool->free == mempool_kfree) { + __check_element(pool, element, (size_t)pool->pool_data); + } else if (pool->free == mempool_free_slab) { + __check_element(pool, element, kmem_cache_size(pool->pool_data)); } else if (pool->free == mempool_free_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; @@ -80,8 +82,10 @@ static void __poison_element(void *element, size_t size) static void poison_element(mempool_t *pool, void *element) { /* Mempools backed by slab allocator */ - if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) { - __poison_element(element, ksize(element)); + if (pool->alloc == mempool_kmalloc) { + __poison_element(element, (size_t)pool->pool_data); + } else if (pool->alloc == mempool_alloc_slab) { + __poison_element(element, kmem_cache_size(pool->pool_data)); } else if (pool->alloc == mempool_alloc_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; @@ -111,8 +115,10 @@ static __always_inline void kasan_poison_element(mempool_t *pool, void *element) static void kasan_unpoison_element(mempool_t *pool, void *element) { - if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) - kasan_unpoison_range(element, __ksize(element)); + if (pool->alloc == mempool_kmalloc) + kasan_unpoison_range(element, (size_t)pool->pool_data); + else if (pool->alloc == mempool_alloc_slab) + kasan_unpoison_range(element, kmem_cache_size(pool->pool_data)); else if (pool->alloc == mempool_alloc_pages) kasan_unpoison_pages(element, (unsigned long)pool->pool_data, false); From a098c977722ca27d3b4bfeb966767af3cce45f85 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:51 -0700 Subject: [PATCH 2981/4122] mm/hugetlb_cgroup: convert __set_hugetlb_cgroup() to folios Patch series "convert hugetlb_cgroup helper functions to folios", v2. This patch series continues the conversion of hugetlb code from being managed in pages to folios by converting many of the hugetlb_cgroup helper functions to use folios. This allows the core hugetlb functions to pass in a folio to these helper functions. This patch (of 9); Change __set_hugetlb_cgroup() to use folios so it is explicit that the function operates on a head page. Link: https://lkml.kernel.org/r/20221101223059.460937-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20221101223059.460937-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 14 +++++++------- mm/hugetlb_cgroup.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 630cd255d0cf..7576e9ed8afe 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -90,31 +90,31 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return __hugetlb_cgroup_from_page(page, true); } -static inline void __set_hugetlb_cgroup(struct page *page, +static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return; if (rsvd) - set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, + set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), (unsigned long)h_cg); else - set_page_private(page + SUBPAGE_INDEX_CGROUP, + set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), (unsigned long)h_cg); } static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, false); + __set_hugetlb_cgroup(page_folio(page), h_cg, false); } static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, true); + __set_hugetlb_cgroup(page_folio(page), h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f61d132df52b..b2316bcbf634 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -314,7 +314,7 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, if (hugetlb_cgroup_disabled() || !h_cg) return; - __set_hugetlb_cgroup(page, h_cg, rsvd); + __set_hugetlb_cgroup(page_folio(page), h_cg, rsvd); if (!rsvd) { unsigned long usage = h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; @@ -356,7 +356,7 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, h_cg = __hugetlb_cgroup_from_page(page, rsvd); if (unlikely(!h_cg)) return; - __set_hugetlb_cgroup(page, NULL, rsvd); + __set_hugetlb_cgroup(page_folio(page), NULL, rsvd); page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), From f074732d599e19a2a5b12e54743ad5eaccbe6550 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:52 -0700 Subject: [PATCH 2982/4122] mm/hugetlb_cgroup: convert hugetlb_cgroup_from_page() to folios Introduce folios in __remove_hugetlb_page() by converting hugetlb_cgroup_from_page() to use folios. Also gets rid of unsed hugetlb_cgroup_from_page_resv() function. Link: https://lkml.kernel.org/r/20221101223059.460937-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 39 +++++++++++++++++----------------- mm/hugetlb.c | 5 +++-- mm/hugetlb_cgroup.c | 13 +++++++----- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 7576e9ed8afe..feb2edafc8b6 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -67,27 +67,34 @@ struct hugetlb_cgroup { }; static inline struct hugetlb_cgroup * -__hugetlb_cgroup_from_page(struct page *page, bool rsvd) +__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); + struct page *tail; - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - if (rsvd) - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD); - else - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP); + + if (rsvd) { + tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); + return (void *)page_private(tail); + } + + else { + tail = folio_page(folio, SUBPAGE_INDEX_CGROUP); + return (void *)page_private(tail); + } } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, false); + return __hugetlb_cgroup_from_folio(folio, false); } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, true); + return __hugetlb_cgroup_from_folio(folio, true); } static inline void __set_hugetlb_cgroup(struct folio *folio, @@ -181,19 +188,13 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, { } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { return NULL; } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_resv(struct page *page) -{ - return NULL; -} - -static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { return NULL; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index be09678d0582..f86a61a73112 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1446,9 +1446,10 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, bool demote) { int nid = page_to_nid(page); + struct folio *folio = page_folio(page); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio); lockdep_assert_held(&hugetlb_lock); if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index b2316bcbf634..8b95c1560f9c 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -191,8 +191,9 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct page_counter *counter; struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); + struct folio *folio = page_folio(page); - page_hcg = hugetlb_cgroup_from_page(page); + page_hcg = hugetlb_cgroup_from_folio(folio); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely @@ -349,14 +350,15 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page, bool rsvd) { struct hugetlb_cgroup *h_cg; + struct folio *folio = page_folio(page); if (hugetlb_cgroup_disabled()) return; lockdep_assert_held(&hugetlb_lock); - h_cg = __hugetlb_cgroup_from_page(page, rsvd); + h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); if (unlikely(!h_cg)) return; - __set_hugetlb_cgroup(page_folio(page), NULL, rsvd); + __set_hugetlb_cgroup(folio, NULL, rsvd); page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), @@ -888,13 +890,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) struct hugetlb_cgroup *h_cg; struct hugetlb_cgroup *h_cg_rsvd; struct hstate *h = page_hstate(oldhpage); + struct folio *old_folio = page_folio(oldhpage); if (hugetlb_cgroup_disabled()) return; spin_lock_irq(&hugetlb_lock); - h_cg = hugetlb_cgroup_from_page(oldhpage); - h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); + h_cg = hugetlb_cgroup_from_folio(old_folio); + h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); set_hugetlb_cgroup(oldhpage, NULL); set_hugetlb_cgroup_rsvd(oldhpage, NULL); From de656ed376c4cb47c5713fba52f8bbfbea44f387 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:53 -0700 Subject: [PATCH 2983/4122] mm/hugetlb_cgroup: convert set_hugetlb_cgroup*() to folios Allows __prep_new_huge_page() to operate on a folio by converting set_hugetlb_cgroup*() to take in a folio. Link: https://lkml.kernel.org/r/20221101223059.460937-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 12 ++++++------ mm/hugetlb.c | 33 +++++++++++++++++++-------------- mm/hugetlb_cgroup.c | 11 ++++++----- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index feb2edafc8b6..a7e3540f7f38 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -112,16 +112,16 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, (unsigned long)h_cg); } -static inline void set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page_folio(page), h_cg, false); + __set_hugetlb_cgroup(folio, h_cg, false); } -static inline void set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct folio *folio, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page_folio(page), h_cg, true); + __set_hugetlb_cgroup(folio, h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) @@ -199,12 +199,12 @@ hugetlb_cgroup_from_folio_rsvd(struct folio *folio) return NULL; } -static inline void set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg) { } -static inline void set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct folio *folio, struct hugetlb_cgroup *h_cg) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f86a61a73112..01ea43b22724 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1774,19 +1774,21 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) h->nr_huge_pages_node[nid]++; } -static void __prep_new_huge_page(struct hstate *h, struct page *page) +static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - hugetlb_vmemmap_optimize(h, page); - INIT_LIST_HEAD(&page->lru); - set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); - hugetlb_set_page_subpool(page, NULL); - set_hugetlb_cgroup(page, NULL); - set_hugetlb_cgroup_rsvd(page, NULL); + hugetlb_vmemmap_optimize(h, &folio->page); + INIT_LIST_HEAD(&folio->lru); + folio->_folio_dtor = HUGETLB_PAGE_DTOR; + hugetlb_set_folio_subpool(folio, NULL); + set_hugetlb_cgroup(folio, NULL); + set_hugetlb_cgroup_rsvd(folio, NULL); } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { - __prep_new_huge_page(h, page); + struct folio *folio = page_folio(page); + + __prep_new_hugetlb_folio(h, folio); spin_lock_irq(&hugetlb_lock); __prep_account_new_huge_page(h, nid); spin_unlock_irq(&hugetlb_lock); @@ -2748,8 +2750,10 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, struct list_head *list) { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - int nid = page_to_nid(old_page); + struct folio *old_folio = page_folio(old_page); + int nid = folio_nid(old_folio); struct page *new_page; + struct folio *new_folio; int ret = 0; /* @@ -2762,16 +2766,17 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL); if (!new_page) return -ENOMEM; - __prep_new_huge_page(h, new_page); + new_folio = page_folio(new_page); + __prep_new_hugetlb_folio(h, new_folio); retry: spin_lock_irq(&hugetlb_lock); - if (!PageHuge(old_page)) { + if (!folio_test_hugetlb(old_folio)) { /* * Freed from under us. Drop new_page too. */ goto free_new; - } else if (page_count(old_page)) { + } else if (folio_ref_count(old_folio)) { /* * Someone has grabbed the page, try to isolate it here. * Fail with -EBUSY if not possible. @@ -2780,7 +2785,7 @@ retry: ret = isolate_hugetlb(old_page, list); spin_lock_irq(&hugetlb_lock); goto free_new; - } else if (!HPageFreed(old_page)) { + } else if (!folio_test_hugetlb_freed(old_folio)) { /* * Page's refcount is 0 but it has not been enqueued in the * freelist yet. Race window is small, so we can succeed here if @@ -2818,7 +2823,7 @@ retry: free_new: spin_unlock_irq(&hugetlb_lock); /* Page has a zero ref count, but needs a ref to be freed */ - set_page_refcounted(new_page); + folio_ref_unfreeze(new_folio, 1); update_and_free_page(h, new_page, false); return ret; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 8b95c1560f9c..87a1125aa42d 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -212,7 +212,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, /* Take the pages off the local counter */ page_counter_cancel(counter, nr_pages); - set_hugetlb_cgroup(page, parent); + set_hugetlb_cgroup(folio, parent); out: return; } @@ -891,6 +891,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) struct hugetlb_cgroup *h_cg_rsvd; struct hstate *h = page_hstate(oldhpage); struct folio *old_folio = page_folio(oldhpage); + struct folio *new_folio = page_folio(newhpage); if (hugetlb_cgroup_disabled()) return; @@ -898,12 +899,12 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) spin_lock_irq(&hugetlb_lock); h_cg = hugetlb_cgroup_from_folio(old_folio); h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); - set_hugetlb_cgroup(oldhpage, NULL); - set_hugetlb_cgroup_rsvd(oldhpage, NULL); + set_hugetlb_cgroup(old_folio, NULL); + set_hugetlb_cgroup_rsvd(old_folio, NULL); /* move the h_cg details to new cgroup */ - set_hugetlb_cgroup(newhpage, h_cg); - set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); + set_hugetlb_cgroup(new_folio, h_cg); + set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); list_move(&newhpage->lru, &h->hugepage_activelist); spin_unlock_irq(&hugetlb_lock); return; From 29f394304f624b06fafb3cc9c3da8779f71f4bee Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:54 -0700 Subject: [PATCH 2984/4122] mm/hugetlb_cgroup: convert hugetlb_cgroup_migrate to folios Cleans up intermediate page to folio conversion code in hugetlb_cgroup_migrate() by changing its arguments from pages to folios. Link: https://lkml.kernel.org/r/20221101223059.460937-5-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 8 ++++---- mm/hugetlb.c | 2 +- mm/hugetlb_cgroup.c | 8 +++----- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index a7e3540f7f38..789b6fef176d 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -177,8 +177,8 @@ extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, bool region_del); extern void hugetlb_cgroup_file_init(void) __init; -extern void hugetlb_cgroup_migrate(struct page *oldhpage, - struct page *newhpage); +extern void hugetlb_cgroup_migrate(struct folio *old_folio, + struct folio *new_folio); #else static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, @@ -286,8 +286,8 @@ static inline void hugetlb_cgroup_file_init(void) { } -static inline void hugetlb_cgroup_migrate(struct page *oldhpage, - struct page *newhpage) +static inline void hugetlb_cgroup_migrate(struct folio *old_folio, + struct folio *new_folio) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 01ea43b22724..05a832886a09 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7325,7 +7325,7 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) { struct hstate *h = page_hstate(oldpage); - hugetlb_cgroup_migrate(oldpage, newpage); + hugetlb_cgroup_migrate(page_folio(oldpage), page_folio(newpage)); set_page_owner_migrate_reason(newpage, reason); /* diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 87a1125aa42d..b1b18337a56a 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -885,13 +885,11 @@ void __init hugetlb_cgroup_file_init(void) * hugetlb_lock will make sure a parallel cgroup rmdir won't happen * when we migrate hugepages */ -void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) +void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) { struct hugetlb_cgroup *h_cg; struct hugetlb_cgroup *h_cg_rsvd; - struct hstate *h = page_hstate(oldhpage); - struct folio *old_folio = page_folio(oldhpage); - struct folio *new_folio = page_folio(newhpage); + struct hstate *h = folio_hstate(old_folio); if (hugetlb_cgroup_disabled()) return; @@ -905,7 +903,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) /* move the h_cg details to new cgroup */ set_hugetlb_cgroup(new_folio, h_cg); set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); - list_move(&newhpage->lru, &h->hugepage_activelist); + list_move(&new_folio->lru, &h->hugepage_activelist); spin_unlock_irq(&hugetlb_lock); return; } From d5e33bd8c16b6f5f47665d378f078bee72b85225 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:55 -0700 Subject: [PATCH 2985/4122] mm/hugetlb: convert isolate_or_dissolve_huge_page to folios Removes a call to compound_head() by using a folio when operating on the head page of a hugetlb compound page. Link: https://lkml.kernel.org/r/20221101223059.460937-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- mm/hugetlb.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05a832886a09..666a771c9a3d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2832,7 +2832,7 @@ free_new: int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { struct hstate *h; - struct page *head; + struct folio *folio = page_folio(page); int ret = -EBUSY; /* @@ -2841,9 +2841,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) * Return success when racing as if we dissolved the page ourselves. */ spin_lock_irq(&hugetlb_lock); - if (PageHuge(page)) { - head = compound_head(page); - h = page_hstate(head); + if (folio_test_hugetlb(folio)) { + h = folio_hstate(folio); } else { spin_unlock_irq(&hugetlb_lock); return 0; @@ -2858,10 +2857,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (hstate_is_gigantic(h)) return -ENOMEM; - if (page_count(head) && !isolate_hugetlb(head, list)) + if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list)) ret = 0; - else if (!page_count(head)) - ret = alloc_and_dissolve_huge_page(h, head, list); + else if (!folio_ref_count(folio)) + ret = alloc_and_dissolve_huge_page(h, &folio->page, list); return ret; } From 0356c4b96f6890dd61af4c902f681764f4bdba09 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:56 -0700 Subject: [PATCH 2986/4122] mm/hugetlb: convert free_huge_page to folios Use folios inside free_huge_page(), this is in preparation for converting hugetlb_cgroup_uncharge_page() to take in a folio. Link: https://lkml.kernel.org/r/20221101223059.460937-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- mm/hugetlb.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 666a771c9a3d..9841fb0fcaf9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1704,21 +1704,22 @@ void free_huge_page(struct page *page) * Can't pass hstate in here because it is called from the * compound page destructor. */ - struct hstate *h = page_hstate(page); - int nid = page_to_nid(page); - struct hugepage_subpool *spool = hugetlb_page_subpool(page); + struct folio *folio = page_folio(page); + struct hstate *h = folio_hstate(folio); + int nid = folio_nid(folio); + struct hugepage_subpool *spool = hugetlb_folio_subpool(folio); bool restore_reserve; unsigned long flags; - VM_BUG_ON_PAGE(page_count(page), page); - VM_BUG_ON_PAGE(page_mapcount(page), page); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); + VM_BUG_ON_FOLIO(folio_mapcount(folio), folio); - hugetlb_set_page_subpool(page, NULL); - if (PageAnon(page)) - __ClearPageAnonExclusive(page); - page->mapping = NULL; - restore_reserve = HPageRestoreReserve(page); - ClearHPageRestoreReserve(page); + hugetlb_set_folio_subpool(folio, NULL); + if (folio_test_anon(folio)) + __ClearPageAnonExclusive(&folio->page); + folio->mapping = NULL; + restore_reserve = folio_test_hugetlb_restore_reserve(folio); + folio_clear_hugetlb_restore_reserve(folio); /* * If HPageRestoreReserve was set on page, page allocation consumed a @@ -1740,7 +1741,7 @@ void free_huge_page(struct page *page) } spin_lock_irqsave(&hugetlb_lock, flags); - ClearHPageMigratable(page); + folio_clear_hugetlb_migratable(folio); hugetlb_cgroup_uncharge_page(hstate_index(h), pages_per_huge_page(h), page); hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), @@ -1748,7 +1749,7 @@ void free_huge_page(struct page *page) if (restore_reserve) h->resv_huge_pages++; - if (HPageTemporary(page)) { + if (folio_test_hugetlb_temporary(folio)) { remove_hugetlb_page(h, page, false); spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page, true); From d4ab0316cc33aeedf6dcb1c2c25e097a25766132 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:57 -0700 Subject: [PATCH 2987/4122] mm/hugetlb_cgroup: convert hugetlb_cgroup_uncharge_page() to folios Continue to use a folio inside free_huge_page() by converting hugetlb_cgroup_uncharge_page*() to folios. Link: https://lkml.kernel.org/r/20221101223059.460937-8-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 16 ++++++++-------- mm/hugetlb.c | 15 +++++++++------ mm/hugetlb_cgroup.c | 21 ++++++++++----------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 789b6fef176d..c70f92fe493e 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -158,10 +158,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page); -extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page); -extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, - struct page *page); +extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio); +extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, + struct folio *folio); extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); @@ -254,14 +254,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, { } -static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page) +static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio) { } -static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx, +static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, - struct page *page) + struct folio *folio) { } static inline void hugetlb_cgroup_uncharge_cgroup(int idx, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9841fb0fcaf9..e1950fff6aa9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1742,10 +1742,10 @@ void free_huge_page(struct page *page) spin_lock_irqsave(&hugetlb_lock, flags); folio_clear_hugetlb_migratable(folio); - hugetlb_cgroup_uncharge_page(hstate_index(h), - pages_per_huge_page(h), page); - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + hugetlb_cgroup_uncharge_folio(hstate_index(h), + pages_per_huge_page(h), folio); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); if (restore_reserve) h->resv_huge_pages++; @@ -2872,6 +2872,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct page *page; + struct folio *folio; long map_chg, map_commit; long gbl_chg; int ret, idx; @@ -2935,6 +2936,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, * a reservation exists for the allocation. */ page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg); + if (!page) { spin_unlock_irq(&hugetlb_lock); page = alloc_buddy_huge_page_with_mpol(h, vma, addr); @@ -2949,6 +2951,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, set_page_refcounted(page); /* Fall through */ } + folio = page_folio(page); hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page); /* If allocation is not consuming a reservation, also store the * hugetlb_cgroup pointer on the page. @@ -2978,8 +2981,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); if (deferred_reserve) - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); } return page; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index b1b18337a56a..4cd57f979245 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -346,11 +346,10 @@ void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, /* * Should be called with hugetlb_lock held */ -static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page, bool rsvd) +static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio, bool rsvd) { struct hugetlb_cgroup *h_cg; - struct folio *folio = page_folio(page); if (hugetlb_cgroup_disabled()) return; @@ -368,27 +367,27 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, css_put(&h_cg->css); else { unsigned long usage = - h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; + h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway. */ - WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], + WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], usage - nr_pages); } } -void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page) +void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio) { - __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); + __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); } -void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, - struct page *page) +void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, + struct folio *folio) { - __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); + __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); } static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, From 541b7c7b3ec0555a09782b463bcbc2cb86d97085 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:58 -0700 Subject: [PATCH 2988/4122] mm/hugeltb_cgroup: convert hugetlb_cgroup_commit_charge*() to folios Convert hugetlb_cgroup_commit_charge*() to internally use folios to clean up the code after __set_hugetlb_cgroup() was changed to take a folio. Link: https://lkml.kernel.org/r/20221101223059.460937-9-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- mm/hugetlb_cgroup.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 4cd57f979245..d9e4425d81ac 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -310,21 +310,21 @@ int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, /* Should be called with hugetlb_lock held */ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page, bool rsvd) + struct folio *folio, bool rsvd) { if (hugetlb_cgroup_disabled() || !h_cg) return; - __set_hugetlb_cgroup(page_folio(page), h_cg, rsvd); + __set_hugetlb_cgroup(folio, h_cg, rsvd); if (!rsvd) { unsigned long usage = - h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; + h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway. */ - WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], + WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], usage + nr_pages); } } @@ -333,14 +333,18 @@ void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page) { - __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); + struct folio *folio = page_folio(page); + + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); } void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page) { - __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); + struct folio *folio = page_folio(page); + + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); } /* From 345c62d163496ae4b5c1ce530b1588067d8f5a8b Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:59 -0700 Subject: [PATCH 2989/4122] mm/hugetlb: convert move_hugetlb_state() to folios Clean up unmap_and_move_huge_page() by converting move_hugetlb_state() to take in folios. [akpm@linux-foundation.org: fix CONFIG_HUGETLB_PAGE=n build] Link: https://lkml.kernel.org/r/20221101223059.460937-10-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 11 ++++++++--- mm/hugetlb.c | 22 ++++++++++++---------- mm/migrate.c | 4 ++-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 65ea34022aa2..58a30938a9b1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -187,7 +187,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void putback_active_hugepage(struct page *page); -void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); +void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; @@ -407,8 +407,8 @@ static inline void putback_active_hugepage(struct page *page) { } -static inline void move_hugetlb_state(struct page *oldpage, - struct page *newpage, int reason) +static inline void move_hugetlb_state(struct folio *old_folio, + struct folio *new_folio, int reason) { } @@ -991,6 +991,11 @@ void hugetlb_unregister_node(struct node *node); #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +{ + return NULL; +} + static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { return NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e1950fff6aa9..76ebefe02827 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7324,15 +7324,15 @@ void putback_active_hugepage(struct page *page) put_page(page); } -void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) +void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason) { - struct hstate *h = page_hstate(oldpage); + struct hstate *h = folio_hstate(old_folio); - hugetlb_cgroup_migrate(page_folio(oldpage), page_folio(newpage)); - set_page_owner_migrate_reason(newpage, reason); + hugetlb_cgroup_migrate(old_folio, new_folio); + set_page_owner_migrate_reason(&new_folio->page, reason); /* - * transfer temporary state of the new huge page. This is + * transfer temporary state of the new hugetlb folio. This is * reverse to other transitions because the newpage is going to * be final while the old one will be freed so it takes over * the temporary status. @@ -7341,12 +7341,14 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) * here as well otherwise the global surplus count will not match * the per-node's. */ - if (HPageTemporary(newpage)) { - int old_nid = page_to_nid(oldpage); - int new_nid = page_to_nid(newpage); + if (folio_test_hugetlb_temporary(new_folio)) { + int old_nid = folio_nid(old_folio); + int new_nid = folio_nid(new_folio); + + + folio_set_hugetlb_temporary(old_folio); + folio_clear_hugetlb_temporary(new_folio); - SetHPageTemporary(oldpage); - ClearHPageTemporary(newpage); /* * There is no need to transfer the per-node surplus state diff --git a/mm/migrate.c b/mm/migrate.c index f8c85b42e2bc..4aea647a0180 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1298,7 +1298,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, * folio_mapping() set, hugetlbfs specific move page routine will not * be called and we could leak usage counts for subpools. */ - if (hugetlb_page_subpool(hpage) && !folio_mapping(src)) { + if (hugetlb_folio_subpool(src) && !folio_mapping(src)) { rc = -EBUSY; goto out_unlock; } @@ -1348,7 +1348,7 @@ put_anon: put_anon_vma(anon_vma); if (rc == MIGRATEPAGE_SUCCESS) { - move_hugetlb_state(hpage, new_hpage, reason); + move_hugetlb_state(src, dst, reason); put_new_page = NULL; } From 44467bbb7e81ebcef2a5bfc9d6546bf7cd015374 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:21 +0000 Subject: [PATCH 2990/4122] mm/damon/core: add a callback for scheme target regions check Patch series "efficiently expose damos action tried regions information". DAMON users can retrieve the monitoring results via 'after_aggregation' callbacks if the user is using the kernel API, or 'damon_aggregated' tracepoint if the user is in the user space. Those are useful if full monitoring results are necessary. However, if the user has interest in only a snapshot of the results for some regions having specific access pattern, the interfaces could be inefficient. For example, some users only want to know which memory regions are not accessed for more than a specific time at the moment. Also, some DAMOS users would want to know exactly to what memory regions the schemes' actions tried to be applied, for a debugging or a tuning. As DAMOS has its internal mechanism for quota and regions prioritization, the users would need to simulate DAMOS' mechanism against the monitoring results. That's unnecessarily complex. This patchset implements DAMON kernel API callbacks and sysfs directory for efficient exposure of the information for the use cases. The new callback will be called for each region when a DAMOS action is gonna tried to be applied to it. The sysfs directory will be called 'tried_regions' and placed under each scheme sysfs directory. Users can write a special keyworkd, 'update_schemes_regions', to the 'state' file of a kdamond sysfs directory. Then, DAMON sysfs interface will fill the directory with the information of regions that corresponding scheme action was tried to be applied for next one aggregation interval. Patches Sequence ---------------- The first one (patch 1) implements the callback for the kernel space users. Following two patches (patches 2 and 3) implements sysfs directories for the information and its sub directories. Two patches (patches 4 and 5) for implementing the special keywords for filling the data to and cleaning up the directories follow. Patch 6 adds a selftest for the new sysfs directory. Finally, two patches (patches 7 and 8) document the new feature in the administrator guide and the ABI document. This patch (of 8): Getting DAMON monitoring results of only specific access pattern (e.g., getting address ranges of memory that not accessed at all for two minutes) can be useful for efficient monitoring of the system. The information can also be helpful for deep level investigation of DAMON-based operation schemes. For that, users need to record (in case of the user space users) or iterate (in case of the kernel space users) full monitoring results and filter it out for the specific access pattern. In case of the DAMOS investigation, users will even need to simulate DAMOS' quota and prioritization mechanisms. It's inefficient and complex. Add a new DAMON callback that will be called before each scheme is applied to each region. DAMON kernel API users will be able to do the query-like monitoring results collection, or DAMOS investigation in an efficient and simple way using it. Commits for providing the capability to the user space users will follow. Link: https://lkml.kernel.org/r/20221101220328.95765-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221101220328.95765-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 +++++ mm/damon/core.c | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 620ada094c3b..35630634d790 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -357,6 +357,7 @@ struct damon_operations { * @after_wmarks_check: Called after each schemes' watermarks check. * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. + * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. * @private: User private data. * @@ -385,6 +386,10 @@ struct damon_callback { int (*after_wmarks_check)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); + int (*before_damos_apply)(struct damon_ctx *context, + struct damon_target *target, + struct damon_region *region, + struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; diff --git a/mm/damon/core.c b/mm/damon/core.c index 80d5937fe337..ceec75b88ef9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -772,6 +772,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, unsigned long sz = damon_sz_region(r); struct timespec64 begin, end; unsigned long sz_applied = 0; + int err = 0; if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { @@ -782,7 +783,10 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, damon_split_region_at(t, r, sz); } ktime_get_coarse_ts64(&begin); - sz_applied = c->ops.apply_scheme(c, t, r, s); + if (c->callback.before_damos_apply) + err = c->callback.before_damos_apply(c, t, r, s); + if (!err) + sz_applied = c->ops.apply_scheme(c, t, r, s); ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - timespec64_to_ns(&begin); From 5181b75f438d2e5b7f27bf48c6ea88a87c2882b7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:22 +0000 Subject: [PATCH 2991/4122] mm/damon/sysfs-schemes: implement schemes/tried_regions directory For efficient and simple query-like DAMON monitoring results readings and deep level investigations of DAMOS, DAMON kernel API (include/linux/damon.h) users can use 'before_damos_apply' DAMON callback. However, DAMON sysfs interface users don't have such option. Add a directory, namely 'tried_regions', under each scheme directory to use it as the interface for the purpose. Note that this commit is implementing only the directory but the data filling. After the data filling change is made, users will be able to signal DAMON to fill the directory with the regions that corresponding scheme has tried to be applied. By setting the access pattern of the scheme, users could do the efficient query-like monitoring. Link: https://lkml.kernel.org/r/20221101220328.95765-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 9509d5c1e7fc..500759d8b20c 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -9,6 +9,36 @@ #include "sysfs-common.h" +/* + * scheme regions directory + */ + +struct damon_sysfs_scheme_regions { + struct kobject kobj; +}; + +static struct damon_sysfs_scheme_regions * +damon_sysfs_scheme_regions_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_scheme_regions), GFP_KERNEL); +} + +static void damon_sysfs_scheme_regions_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj)); +} + +static struct attribute *damon_sysfs_scheme_regions_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions); + +static struct kobj_type damon_sysfs_scheme_regions_ktype = { + .release = damon_sysfs_scheme_regions_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_regions_groups, +}; + /* * schemes/stats directory */ @@ -635,6 +665,7 @@ struct damon_sysfs_scheme { struct damon_sysfs_quotas *quotas; struct damon_sysfs_watermarks *watermarks; struct damon_sysfs_stats *stats; + struct damon_sysfs_scheme_regions *tried_regions; }; /* This should match with enum damos_action */ @@ -743,6 +774,25 @@ static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) return err; } +static int damon_sysfs_scheme_set_tried_regions( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_scheme_regions *tried_regions = + damon_sysfs_scheme_regions_alloc(); + int err; + + if (!tried_regions) + return -ENOMEM; + err = kobject_init_and_add(&tried_regions->kobj, + &damon_sysfs_scheme_regions_ktype, &scheme->kobj, + "tried_regions"); + if (err) + kobject_put(&tried_regions->kobj); + else + scheme->tried_regions = tried_regions; + return err; +} + static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) { int err; @@ -759,8 +809,14 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) err = damon_sysfs_scheme_set_stats(scheme); if (err) goto put_watermarks_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_tried_regions(scheme); + if (err) + goto put_tried_regions_out; return 0; +put_tried_regions_out: + kobject_put(&scheme->tried_regions->kobj); + scheme->tried_regions = NULL; put_watermarks_quotas_access_pattern_out: kobject_put(&scheme->watermarks->kobj); scheme->watermarks = NULL; @@ -781,6 +837,7 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) kobject_put(&scheme->quotas->kobj); kobject_put(&scheme->watermarks->kobj); kobject_put(&scheme->stats->kobj); + kobject_put(&scheme->tried_regions->kobj); } static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, From 9277d0367ba18ef4bb98bafb1209e715844cdf7e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:23 +0000 Subject: [PATCH 2992/4122] mm/damon/sysfs-schemes: implement scheme region directory Implement region directories under 'tried_regions' directory of each scheme DAMON sysfs directory. This directory will provide the address range, the monitored access frequency ('nr_accesses'), and the age of each DAMON region that corresponding DAMON-based operation scheme has tried to be applied. Note that this commit doesn't implement the code for filling the data but only the sysfs directory. Link: https://lkml.kernel.org/r/20221101220328.95765-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 123 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 500759d8b20c..f0b5ad7e721d 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -9,18 +9,138 @@ #include "sysfs-common.h" +/* + * scheme region directory + */ + +struct damon_sysfs_scheme_region { + struct kobject kobj; + struct damon_addr_range ar; + unsigned int nr_accesses; + unsigned int age; + struct list_head list; +}; + +static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc( + struct damon_region *region) +{ + struct damon_sysfs_scheme_region *sysfs_region = kmalloc( + sizeof(*sysfs_region), GFP_KERNEL); + + if (!sysfs_region) + return NULL; + sysfs_region->kobj = (struct kobject){}; + sysfs_region->ar = region->ar; + sysfs_region->nr_accesses = region->nr_accesses; + sysfs_region->age = region->age; + INIT_LIST_HEAD(&sysfs_region->list); + return sysfs_region; +} + +static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.start); +} + +static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.end); +} + +static ssize_t nr_accesses_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->nr_accesses); +} + +static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->age); +} + +static void damon_sysfs_scheme_region_release(struct kobject *kobj) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + list_del(®ion->list); + kfree(region); +} + +static struct kobj_attribute damon_sysfs_scheme_region_start_attr = + __ATTR_RO_MODE(start, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_end_attr = + __ATTR_RO_MODE(end, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr = + __ATTR_RO_MODE(nr_accesses, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_age_attr = + __ATTR_RO_MODE(age, 0400); + +static struct attribute *damon_sysfs_scheme_region_attrs[] = { + &damon_sysfs_scheme_region_start_attr.attr, + &damon_sysfs_scheme_region_end_attr.attr, + &damon_sysfs_scheme_region_nr_accesses_attr.attr, + &damon_sysfs_scheme_region_age_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_region); + +static struct kobj_type damon_sysfs_scheme_region_ktype = { + .release = damon_sysfs_scheme_region_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_region_groups, +}; + /* * scheme regions directory */ struct damon_sysfs_scheme_regions { struct kobject kobj; + struct list_head regions_list; + int nr_regions; }; static struct damon_sysfs_scheme_regions * damon_sysfs_scheme_regions_alloc(void) { - return kzalloc(sizeof(struct damon_sysfs_scheme_regions), GFP_KERNEL); + struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions), + GFP_KERNEL); + + regions->kobj = (struct kobject){}; + INIT_LIST_HEAD(®ions->regions_list); + regions->nr_regions = 0; + return regions; +} + +static void damon_sysfs_scheme_regions_rm_dirs( + struct damon_sysfs_scheme_regions *regions) +{ + struct damon_sysfs_scheme_region *r, *next; + + list_for_each_entry_safe(r, next, ®ions->regions_list, list) { + /* release function deletes it from the list */ + kobject_put(&r->kobj); + regions->nr_regions--; + } } static void damon_sysfs_scheme_regions_release(struct kobject *kobj) @@ -837,6 +957,7 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) kobject_put(&scheme->quotas->kobj); kobject_put(&scheme->watermarks->kobj); kobject_put(&scheme->stats->kobj); + damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions); kobject_put(&scheme->tried_regions->kobj); } From f1d13cacabe140305844879e495ca67837e059cc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:24 +0000 Subject: [PATCH 2993/4122] mm/damon/sysfs: implement DAMOS tried regions update command Implement the code for filling the data of 'tried_regions' DAMON sysfs directory. With this commit, DAMON sysfs interface users can write a special keyword, 'update_schemes_tried_regions' to the corresponding 'state' file of the kdamond. Then, DAMON sysfs interface will collect the tried regions information using the 'before_damos_apply()' callback for one aggregation interval and populate scheme region directories with the values. [sj@kernel.org: skip tried regions update if the scheme directory was removed] Link: https://lkml.kernel.org/r/20221114182954.4745-2-sj@kernel.org Link: https://lkml.kernel.org/r/20221101220328.95765-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/sysfs-common.h | 6 +++ mm/damon/sysfs-schemes.c | 80 ++++++++++++++++++++++++++++++++++++++++ mm/damon/sysfs.c | 57 +++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 2 deletions(-) diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h index 4626b2784404..634a6e7fca78 100644 --- a/mm/damon/sysfs-common.h +++ b/mm/damon/sysfs-common.h @@ -44,3 +44,9 @@ int damon_sysfs_set_schemes(struct damon_ctx *ctx, void damon_sysfs_schemes_update_stats( struct damon_sysfs_schemes *sysfs_schemes, struct damon_ctx *ctx); + +int damon_sysfs_schemes_update_regions_start( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); + +int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index f0b5ad7e721d..5f14f18bcc49 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1244,3 +1244,83 @@ void damon_sysfs_schemes_update_stats( sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; } } + +/* + * damon_sysfs_schemes that need to update its schemes regions dir. Protected + * by damon_sysfs_lock + */ +static struct damon_sysfs_schemes *damon_sysfs_schemes_for_damos_callback; +static int damon_sysfs_schemes_region_idx; + +/* + * DAMON callback that called before damos apply. While this callback is + * registered, damon_sysfs_lock should be held to ensure the regions + * directories exist. + */ +static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *s) +{ + struct damos *scheme; + struct damon_sysfs_scheme_regions *sysfs_regions; + struct damon_sysfs_scheme_region *region; + struct damon_sysfs_schemes *sysfs_schemes = + damon_sysfs_schemes_for_damos_callback; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + if (scheme == s) + break; + schemes_idx++; + } + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + return 0; + + sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions; + region = damon_sysfs_scheme_region_alloc(r); + list_add_tail(®ion->list, &sysfs_regions->regions_list); + sysfs_regions->nr_regions++; + if (kobject_init_and_add(®ion->kobj, + &damon_sysfs_scheme_region_ktype, + &sysfs_regions->kobj, "%d", + damon_sysfs_schemes_region_idx++)) { + kobject_put(®ion->kobj); + } + return 0; +} + +/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ +int damon_sysfs_schemes_update_regions_start( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_scheme *sysfs_scheme; + + sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++]; + damon_sysfs_scheme_regions_rm_dirs( + sysfs_scheme->tried_regions); + } + + damon_sysfs_schemes_for_damos_callback = sysfs_schemes; + ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply; + return 0; +} + +/* + * Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock. Caller + * should unlock damon_sysfs_lock which held before + * damon_sysfs_schemes_update_regions_start() + */ +int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx) +{ + damon_sysfs_schemes_for_damos_callback = NULL; + ctx->callback.before_damos_apply = NULL; + damon_sysfs_schemes_region_idx = 0; + return 0; +} diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 284daf274b3e..ffb5a84059d7 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -999,6 +999,11 @@ enum damon_sysfs_cmd { * files. */ DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried + * regions + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS, /* * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands. */ @@ -1011,6 +1016,7 @@ static const char * const damon_sysfs_cmd_strs[] = { "off", "commit", "update_schemes_stats", + "update_schemes_tried_regions", }; /* @@ -1193,6 +1199,16 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, static void damon_sysfs_before_terminate(struct damon_ctx *ctx) { struct damon_target *t, *next; + struct damon_sysfs_kdamond *kdamond; + + /* damon_sysfs_schemes_update_regions_stop() might not yet called */ + kdamond = damon_sysfs_cmd_request.kdamond; + if (kdamond && damon_sysfs_cmd_request.cmd == + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS && + ctx == kdamond->damon_ctx) { + damon_sysfs_schemes_update_regions_stop(ctx); + mutex_unlock(&damon_sysfs_lock); + } if (!damon_target_has_pid(ctx)) return; @@ -1225,6 +1241,27 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) return 0; } +static int damon_sysfs_upd_schemes_regions_start( + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_update_regions_start( + kdamond->contexts->contexts_arr[0]->schemes, ctx); +} + +static int damon_sysfs_upd_schemes_regions_stop( + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_update_regions_stop(ctx); +} + static inline bool damon_sysfs_kdamond_running( struct damon_sysfs_kdamond *kdamond) { @@ -1277,10 +1314,12 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) { struct damon_sysfs_kdamond *kdamond; + static bool damon_sysfs_schemes_regions_updating; int err = 0; /* avoid deadlock due to concurrent state_store('off') */ - if (!mutex_trylock(&damon_sysfs_lock)) + if (!damon_sysfs_schemes_regions_updating && + !mutex_trylock(&damon_sysfs_lock)) return 0; kdamond = damon_sysfs_cmd_request.kdamond; if (!kdamond || kdamond->damon_ctx != c) @@ -1292,13 +1331,27 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) case DAMON_SYSFS_CMD_COMMIT: err = damon_sysfs_commit_input(kdamond); break; + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: + if (!damon_sysfs_schemes_regions_updating) { + err = damon_sysfs_upd_schemes_regions_start(kdamond); + if (!err) { + damon_sysfs_schemes_regions_updating = true; + goto keep_lock_out; + } + } else { + err = damon_sysfs_upd_schemes_regions_stop(kdamond); + damon_sysfs_schemes_regions_updating = false; + } + break; default: break; } /* Mark the request as invalid now. */ damon_sysfs_cmd_request.kdamond = NULL; out: - mutex_unlock(&damon_sysfs_lock); + if (!damon_sysfs_schemes_regions_updating) + mutex_unlock(&damon_sysfs_lock); +keep_lock_out: return err; } From 772c15e5adcb32a42dbbcdb905ec49f662312976 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:25 +0000 Subject: [PATCH 2994/4122] mm/damon/sysfs-schemes: implement DAMOS-tried regions clear command When there are huge number of DAMON regions that specific scheme actions are tried to be applied, directories and files under 'tried_regions' scheme directory could waste some memory. Add another special input keyword ('clear_schemes_tried_regions') for 'state' file of each kdamond sysfs directory that can be used for cleanup of the 'tried_regions' sub-directories. [sj@kernel.org: skip regions clearing if the scheme directory was removed] Link: https://lkml.kernel.org/r/20221114182954.4745-3-sj@kernel.org Link: https://lkml.kernel.org/r/20221101220328.95765-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/damon/sysfs-common.h | 4 ++++ mm/damon/sysfs-schemes.c | 14 +++++++++++++- mm/damon/sysfs.c | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h index 634a6e7fca78..604a6cbc3ede 100644 --- a/mm/damon/sysfs-common.h +++ b/mm/damon/sysfs-common.h @@ -50,3 +50,7 @@ int damon_sysfs_schemes_update_regions_start( struct damon_ctx *ctx); int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx); + +int damon_sysfs_schemes_clear_regions( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 5f14f18bcc49..81fc4d27f4e4 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1292,7 +1292,7 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx, } /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ -int damon_sysfs_schemes_update_regions_start( +int damon_sysfs_schemes_clear_regions( struct damon_sysfs_schemes *sysfs_schemes, struct damon_ctx *ctx) { @@ -1302,11 +1302,23 @@ int damon_sysfs_schemes_update_regions_start( damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_scheme *sysfs_scheme; + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++]; damon_sysfs_scheme_regions_rm_dirs( sysfs_scheme->tried_regions); } + return 0; +} +/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ +int damon_sysfs_schemes_update_regions_start( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx); damon_sysfs_schemes_for_damos_callback = sysfs_schemes; ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply; return 0; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index ffb5a84059d7..aeb0beb1da91 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1004,6 +1004,11 @@ enum damon_sysfs_cmd { * regions */ DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS, + /* + * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried + * regions + */ + DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS, /* * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands. */ @@ -1017,6 +1022,7 @@ static const char * const damon_sysfs_cmd_strs[] = { "commit", "update_schemes_stats", "update_schemes_tried_regions", + "clear_schemes_tried_regions", }; /* @@ -1262,6 +1268,17 @@ static int damon_sysfs_upd_schemes_regions_stop( return damon_sysfs_schemes_update_regions_stop(ctx); } +static int damon_sysfs_clear_schemes_regions( + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_clear_regions( + kdamond->contexts->contexts_arr[0]->schemes, ctx); +} + static inline bool damon_sysfs_kdamond_running( struct damon_sysfs_kdamond *kdamond) { @@ -1343,6 +1360,9 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) damon_sysfs_schemes_regions_updating = false; } break; + case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: + err = damon_sysfs_clear_schemes_regions(kdamond); + break; default: break; } From 2b3ee3f66c673312ea377bcfb54cb2b9abc8473b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:26 +0000 Subject: [PATCH 2995/4122] tools/selftets/damon/sysfs: test tried_regions directory existence Add a simple test case for ensuring tried_regions directory existence. Link: https://lkml.kernel.org/r/20221101220328.95765-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 89592c64462f..db4942383a50 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -80,6 +80,12 @@ test_range() ensure_file "$range_dir/max" "exist" 600 } +test_tried_regions() +{ + tried_regions_dir=$1 + ensure_dir "$tried_regions_dir" "exist" +} + test_stats() { stats_dir=$1 @@ -138,6 +144,7 @@ test_scheme() test_quotas "$scheme_dir/quotas" test_watermarks "$scheme_dir/watermarks" test_stats "$scheme_dir/stats" + test_tried_regions "$scheme_dir/tried_regions" } test_schemes() From 7f0a86f3c99bc9736445ef64aa65c9bd6161a47b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:27 +0000 Subject: [PATCH 2996/4122] Docs/admin-guide/mm/damon/usage: document schemes//tried_regions sysfs directory Document 'tried_regions' directory in DAMON sysfs interface usage in the administrator guide. Link: https://lkml.kernel.org/r/20221101220328.95765-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 45 ++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index c17e02e1e426..1a5b6b71efa1 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -88,6 +88,9 @@ comma (","). :: │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds + │ │ │ │ │ │ │ tried_regions/ + │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age + │ │ │ │ │ │ │ │ ... │ │ │ │ │ │ ... │ │ │ │ ... │ │ ... @@ -125,7 +128,14 @@ in the state. Writing ``commit`` to the ``state`` file makes kdamond reads the user inputs in the sysfs files except ``state`` file again. Writing ``update_schemes_stats`` to ``state`` file updates the contents of stats files for each DAMON-based operation scheme of the kdamond. For details of the -stats, please refer to :ref:`stats section `. +stats, please refer to :ref:`stats section `. Writing +``update_schemes_tried_regions`` to ``state`` file updates the DAMON-based +operation scheme action tried regions directory for each DAMON-based operation +scheme of the kdamond. Writing ``clear_schemes_tried_regions`` to ``state`` +file clears the DAMON-based operating scheme action tried regions directory for +each DAMON-based operation scheme of the kdamond. For details of the +DAMON-based operation scheme action tried regions directory, please refer to +:ref:tried_regions section `. If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread. @@ -166,6 +176,8 @@ You can set and get what type of monitoring operations DAMON will use for the context by writing one of the keywords listed in ``avail_operations`` file and reading from the ``operations`` file. +.. _sysfs_monitoring_attrs: + contexts//monitoring_attrs/ ------------------------------ @@ -255,8 +267,9 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme. schemes// ------------ -In each scheme directory, four directories (``access_pattern``, ``quotas``, -``watermarks``, and ``stats``) and one file (``action``) exist. +In each scheme directory, five directories (``access_pattern``, ``quotas``, +``watermarks``, ``stats``, and ``tried_regions``) and one file (``action``) +exist. The ``action`` file is for setting and getting what action you want to apply to memory regions having specific access pattern of the interest. The keywords @@ -351,6 +364,32 @@ should ask DAMON sysfs interface to updte the content of the files for the stats by writing a special keyword, ``update_schemes_stats`` to the relevant ``kdamonds//state`` file. +.. _sysfs_schemes_tried_regions: + +schemes//tried_regions/ +-------------------------- + +When a special keyword, ``update_schemes_tried_regions``, is written to the +relevant ``kdamonds//state`` file, DAMON creates directories named integer +starting from ``0`` under this directory. Each directory contains files +exposing detailed information about each of the memory region that the +corresponding scheme's ``action`` has tried to be applied under this directory, +during next :ref:`aggregation interval `. The +information includes address range, ``nr_accesses``, , and ``age`` of the +region. + +The directories will be removed when another special keyword, +``clear_schemes_tried_regions``, is written to the relevant +``kdamonds//state`` file. + +tried_regions// +------------------ + +In each region directory, you will find four files (``start``, ``end``, +``nr_accesses``, and ``age``). Reading the files will show the start and end +addresses, ``nr_accesses``, and ``age`` of the region that corresponding +DAMON-based operation scheme ``action`` has tried to be applied. + Example ~~~~~~~ From 1b0006daa36f2ccb7f213007365d504bcd016312 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 1 Nov 2022 22:03:28 +0000 Subject: [PATCH 2997/4122] Docs/ABI/damon: document 'schemes//tried_regions' sysfs directory Update DAMON ABI document for the 'tried_regions' directory of DAMON sysfs interface. Link: https://lkml.kernel.org/r/20221101220328.95765-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../ABI/testing/sysfs-kernel-mm-damon | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index 08b9df323560..13397b853692 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -27,6 +27,10 @@ Description: Writing 'on' or 'off' to this file makes the kdamond starts or makes the kdamond reads the user inputs in the sysfs files except 'state' again. Writing 'update_schemes_stats' to the file updates contents of schemes stats files of the kdamond. + Writing 'update_schemes_tried_regions' to the file updates + contents of 'tried_regions' directory of every scheme directory + of this kdamond. Writing 'clear_schemes_tried_regions' to the + file removes contents of the 'tried_regions' directory. What: /sys/kernel/mm/damon/admin/kdamonds//pid Date: Mar 2022 @@ -283,3 +287,31 @@ Date: Mar 2022 Contact: SeongJae Park Description: Reading this file returns the number of the exceed events of the scheme's quotas. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//tried_regions//start +Date: Oct 2022 +Contact: SeongJae Park +Description: Reading this file returns the start address of a memory region + that corresponding DAMON-based Operation Scheme's action has + tried to be applied. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//tried_regions//end +Date: Oct 2022 +Contact: SeongJae Park +Description: Reading this file returns the end address of a memory region + that corresponding DAMON-based Operation Scheme's action has + tried to be applied. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//tried_regions//nr_accesses +Date: Oct 2022 +Contact: SeongJae Park +Description: Reading this file returns the 'nr_accesses' of a memory region + that corresponding DAMON-based Operation Scheme's action has + tried to be applied. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//tried_regions//age +Date: Oct 2022 +Contact: SeongJae Park +Description: Reading this file returns the 'age' of a memory region that + corresponding DAMON-based Operation Scheme's action has tried + to be applied. From e6aff38b2e25e934e95471351c96d1410bb17561 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:14:08 +0100 Subject: [PATCH 2998/4122] mm/damon: use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Link: https://lkml.kernel.org/r/ed2b46489a513988688decb53850339cc228940c.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 3 ++- mm/damon/reclaim.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 2a532e3983df..7b8fce2f67a8 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "damon-lru-sort: " fmt #include +#include #include #include "modules-common.h" @@ -241,7 +242,7 @@ static int damon_lru_sort_enabled_store(const char *val, bool enable; int err; - err = strtobool(val, &enable); + err = kstrtobool(val, &enable); if (err) return err; diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index e57604bec06d..e82631f39481 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "damon-reclaim: " fmt #include +#include #include #include "modules-common.h" @@ -187,7 +188,7 @@ static int damon_reclaim_enabled_store(const char *val, bool enable; int err; - err = strtobool(val, &enable); + err = kstrtobool(val, &enable); if (err) return err; From f15be1b8d449a8eebe82d77164bf760804753651 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:14:09 +0100 Subject: [PATCH 2999/4122] mm: use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Link: https://lkml.kernel.org/r/03f9401a6c8b87a1c786a2138d16b048f8d0eb53.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Acked-by: Pasha Tatashin Signed-off-by: Andrew Morton --- mm/page_table_check.c | 3 ++- mm/usercopy.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 433dbce13fe1..93e633c1d587 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -4,6 +4,7 @@ * Copyright (c) 2021, Google LLC. * Pasha Tatashin */ +#include #include #include @@ -23,7 +24,7 @@ EXPORT_SYMBOL(page_table_check_disabled); static int __init early_page_table_check_param(char *buf) { - return strtobool(buf, &__page_table_check_enabled); + return kstrtobool(buf, &__page_table_check_enabled); } early_param("page_table_check", early_page_table_check_param); diff --git a/mm/usercopy.c b/mm/usercopy.c index c1ee15a98633..4c3164beacec 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -258,7 +259,7 @@ static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { - if (strtobool(str, &enable_checks)) + if (kstrtobool(str, &enable_checks)) pr_warn("Invalid option string for hardened_usercopy: '%s'\n", str); return 1; From ca92ea3dc5a2b01f98e9f02b7a6bc03be06fe124 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 30 Oct 2022 17:41:50 -0400 Subject: [PATCH 3000/4122] mm: always compile in pte markers Patch series "mm: Use pte marker for swapin errors". This series uses the pte marker to replace the swapin error swap entry, then we save one more swap entry slot for swap devices. A new pte marker bit is defined. This patch (of 2): The PTE markers code is tiny and now it's enabled for most of the distributions. It's fine to keep it as-is, but to make a broader use of it (e.g. replacing read error swap entry) it needs to be there always otherwise we need special code path to take care of !PTE_MARKER case. It'll be easier just make pte marker always exist. Use this chance to extend its usage to anonymous too by simply touching up some of the old comments, because it'll be used for anonymous pages in the follow up patches. Link: https://lkml.kernel.org/r/20221030214151.402274-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20221030214151.402274-2-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Huang Ying Reviewed-by: Miaohe Lin Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Naoya Horiguchi Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/swap.h | 10 +++------- include/linux/swapops.h | 31 ------------------------------- mm/Kconfig | 7 ------- mm/memory.c | 7 +++---- 4 files changed, 6 insertions(+), 49 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 369d7799205d..211aeca9bfa7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -60,17 +60,13 @@ static inline int current_is_kswapd(void) SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ SWP_PTE_MARKER_NUM) /* - * PTE markers are used to persist information onto PTEs that are mapped with - * file-backed memories. As its name "PTE" hints, it should only be applied to - * the leaves of pgtables. + * PTE markers are used to persist information onto PTEs that otherwise + * should be a none pte. As its name "PTE" hints, it should only be + * applied to the leaves of pgtables. */ -#ifdef CONFIG_PTE_MARKER #define SWP_PTE_MARKER_NUM 1 #define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ SWP_MIGRATION_NUM + SWP_DEVICE_NUM) -#else -#define SWP_PTE_MARKER_NUM 0 -#endif /* * Unaddressable device memory support. See include/linux/hmm.h and diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 3ba9bf56899d..35c1fe62d2e1 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -412,8 +412,6 @@ typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) #define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP) -#ifdef CONFIG_PTE_MARKER - static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { return swp_entry(SWP_PTE_MARKER, marker); @@ -434,32 +432,6 @@ static inline bool is_pte_marker(pte_t pte) return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); } -#else /* CONFIG_PTE_MARKER */ - -static inline swp_entry_t make_pte_marker_entry(pte_marker marker) -{ - /* This should never be called if !CONFIG_PTE_MARKER */ - WARN_ON_ONCE(1); - return swp_entry(0, 0); -} - -static inline bool is_pte_marker_entry(swp_entry_t entry) -{ - return false; -} - -static inline pte_marker pte_marker_get(swp_entry_t entry) -{ - return 0; -} - -static inline bool is_pte_marker(pte_t pte) -{ - return false; -} - -#endif /* CONFIG_PTE_MARKER */ - static inline pte_t make_pte_marker(pte_marker marker) { return swp_entry_to_pte(make_pte_marker_entry(marker)); @@ -477,9 +449,6 @@ static inline pte_t make_pte_marker(pte_marker marker) * memory, kernel-only memory (including when the system is during-boot), * non-ram based generic file-system. It's fine to be used even there, but the * extra pte marker check will be pure overhead. - * - * For systems configured with !CONFIG_PTE_MARKER this will be automatically - * optimized to pte_none(). */ static inline int pte_none_mostly(pte_t pte) { diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b505..4b28800d9be1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1107,17 +1107,10 @@ config HAVE_ARCH_USERFAULTFD_MINOR help Arch has userfaultfd minor fault support -config PTE_MARKER - bool - - help - Allows to create marker PTEs for file-backed memory. - config PTE_MARKER_UFFD_WP bool "Userfaultfd write protection support for shmem/hugetlbfs" default y depends on HAVE_ARCH_USERFAULTFD_WP - select PTE_MARKER help Allows to create marker PTEs for userfaultfd write protection diff --git a/mm/memory.c b/mm/memory.c index 659620b6770f..b79d27533722 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3662,11 +3662,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) unsigned long marker = pte_marker_get(entry); /* - * PTE markers should always be with file-backed memories, and the - * marker should never be empty. If anything weird happened, the best - * thing to do is to kill the process along with its mm. + * PTE markers should never be empty. If anything weird happened, + * the best thing to do is to kill the process along with its mm. */ - if (WARN_ON_ONCE(vma_is_anonymous(vmf->vma) || !marker)) + if (WARN_ON_ONCE(!marker)) return VM_FAULT_SIGBUS; if (pte_marker_entry_uffd_wp(entry)) From 15520a3f046998e3f57e695743e99b0875e2dae7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 30 Oct 2022 17:41:51 -0400 Subject: [PATCH 3001/4122] mm: use pte markers for swap errors PTE markers are ideal mechanism for things like SWP_SWAPIN_ERROR. Using a whole swap entry type for this purpose can be an overkill, especially if we already have PTE markers. Define a new bit for swapin error and replace it with pte markers. Then we can safely drop SWP_SWAPIN_ERROR and give one device slot back to swap. We used to have SWP_SWAPIN_ERROR taking the page pfn as part of the swap entry, but it's never used. Neither do I see how it can be useful because normally the swapin failure should not be caused by a bad page but bad swap device. Drop it alongside. Link: https://lkml.kernel.org/r/20221030214151.402274-3-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Huang Ying Reviewed-by: Miaohe Lin Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Naoya Horiguchi Signed-off-by: Andrew Morton --- include/linux/swap.h | 6 +----- include/linux/swapops.h | 26 ++++++++++++++------------ mm/memory.c | 6 ++++-- mm/shmem.c | 2 +- mm/swapfile.c | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 211aeca9bfa7..fec6647a289a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -55,10 +55,6 @@ static inline int current_is_kswapd(void) * actions on faults. */ -#define SWP_SWAPIN_ERROR_NUM 1 -#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ - SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ - SWP_PTE_MARKER_NUM) /* * PTE markers are used to persist information onto PTEs that otherwise * should be a none pte. As its name "PTE" hints, it should only be @@ -121,7 +117,7 @@ static inline int current_is_kswapd(void) #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ - SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM) + SWP_PTE_MARKER_NUM) /* * Magic header for a swap area. The first part of the union is diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 35c1fe62d2e1..27ade4f22abb 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -162,16 +162,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) return xa_mk_value(entry.val); } -static inline swp_entry_t make_swapin_error_entry(struct page *page) -{ - return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page)); -} - -static inline int is_swapin_error_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_SWAPIN_ERROR; -} - #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -409,8 +399,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) typedef unsigned long pte_marker; -#define PTE_MARKER_UFFD_WP BIT(0) -#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP) +#define PTE_MARKER_UFFD_WP BIT(0) +#define PTE_MARKER_SWAPIN_ERROR BIT(1) +#define PTE_MARKER_MASK (BIT(2) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { @@ -437,6 +428,17 @@ static inline pte_t make_pte_marker(pte_marker marker) return swp_entry_to_pte(make_pte_marker_entry(marker)); } +static inline swp_entry_t make_swapin_error_entry(void) +{ + return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR); +} + +static inline int is_swapin_error_entry(swp_entry_t entry) +{ + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR); +} + /* * This is a special version to check pte_none() just to cover the case when * the pte is a pte marker. It existed because in many cases the pte marker diff --git a/mm/memory.c b/mm/memory.c index b79d27533722..142c4229549b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3668,6 +3668,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) if (WARN_ON_ONCE(!marker)) return VM_FAULT_SIGBUS; + /* Higher priority than uffd-wp when data corrupted */ + if (marker & PTE_MARKER_SWAPIN_ERROR) + return VM_FAULT_SIGBUS; + if (pte_marker_entry_uffd_wp(entry)) return pte_marker_handle_uffd_wp(vmf); @@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; - } else if (is_swapin_error_entry(entry)) { - ret = VM_FAULT_SIGBUS; } else if (is_pte_marker_entry(entry)) { ret = handle_pte_marker(vmf); } else { diff --git a/mm/shmem.c b/mm/shmem.c index 0a7c4a748811..7428ae3fa4b9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1682,7 +1682,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, swp_entry_t swapin_error; void *old; - swapin_error = make_swapin_error_entry(&folio->page); + swapin_error = make_swapin_error_entry(); old = xa_cmpxchg_irq(&mapping->i_pages, index, swp_to_radix_entry(swap), swp_to_radix_entry(swapin_error), 0); diff --git a/mm/swapfile.c b/mm/swapfile.c index 72e481aacd5d..03fe0949f6b2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1781,7 +1781,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, pte_t pteval; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); - pteval = swp_entry_to_pte(make_swapin_error_entry(page)); + pteval = swp_entry_to_pte(make_swapin_error_entry()); set_pte_at(vma->vm_mm, addr, pte, pteval); swap_free(entry); ret = 0; From 65917b538bcc4d8c0d8e199a6f7b7426acf13d58 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Thu, 3 Nov 2022 22:38:18 -0400 Subject: [PATCH 3002/4122] zsmalloc: replace IS_ERR() with IS_ERR_VALUE() Avoid typecasts that are needed for IS_ERR() and use IS_ERR_VALUE() instead. Link: https://lkml.kernel.org/r/20221104023818.1728-1-wangdeming@inspur.com Signed-off-by: Deming Wang Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index d03941cace2c..b52b7bb88b52 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -387,7 +387,7 @@ static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp, { *handle = zs_malloc(pool, size, gfp); - if (IS_ERR((void *)(*handle))) + if (IS_ERR_VALUE(*handle)) return PTR_ERR((void *)*handle); return 0; } From 634ba645f9bc888227ca954ea643579268d1b6d8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 3 Nov 2022 18:16:32 -0700 Subject: [PATCH 3003/4122] selftests/vm: update hugetlb madvise Commit 8ebe0a5eaaeb ("mm,madvise,hugetlb: fix unexpected data loss with MADV_DONTNEED on hugetlbfs") changed how the passed length was interpreted for hugetlb mappings. It was changed from align up to align down. The hugetlb-madvise test explicitly tests this behavior. Change test to expect new behavior. Link: https://lkml.kernel.org/r/20221104011632.357049-1-mike.kravetz@oracle.com Link: https://lore.kernel.org/oe-lkp/202211040619.2ec447d7-oliver.sang@intel.com Signed-off-by: Mike Kravetz Reported-by: kernel test robot Cc: David Hildenbrand Cc: Rik van Riel Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugetlb-madvise.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index f96435b70986..a634f47d1e56 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -195,7 +195,7 @@ int main(int argc, char **argv) exit(1); } - /* addr + length should be aligned up to huge page size */ + /* addr + length should be aligned down to huge page size */ if (madvise(addr, ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size, MADV_DONTNEED)) { @@ -203,10 +203,11 @@ int main(int argc, char **argv) exit(1); } - /* should free all pages in mapping */ - validate_free_pages(free_hugepages); + /* should free all but last page in mapping */ + validate_free_pages(free_hugepages - 1); (void)munmap(addr, NR_HUGE_PAGES * huge_page_size); + validate_free_pages(free_hugepages); /* * Test MADV_DONTNEED on anonymous private mapping From dad6a5eb55564845aa17b8b20fa834af21e46c48 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Nov 2022 18:48:45 -0700 Subject: [PATCH 3004/4122] mm,hugetlb: use folio fields in second tail page Patch series "mm,huge,rmap: unify and speed up compound mapcounts". This patch (of 3): We want to declare one more int in the first tail of a compound page: that first tail page being valuable property, since every compound page has a first tail, but perhaps no more than that. No problem on 64-bit: there is already space for it. No problem with 32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always available") kindly cleared the space for it, apparently not realizing that only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail page->private might conflict) - but make sure of that in its Kconfig. But hugetlb pages use tail page->private of the first tail page for a subpool pointer, which will conflict; and they also use page->private of the 2nd, 3rd and 4th tails. Undo "mm: add private field of first tail to struct page and struct folio"'s recent addition of private_1 to the folio tail: instead add hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison to a second tail page of the folio: THP has long been using several fields of that tail, so make better use of it for hugetlb too. This is not how a generic folio should be declared in future, but it is an effective transitional way to make use of it. Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3. [hughd@google.com: prefix folio's page_1 and page_2 with double underscore, give folio's _flags_2 and _head_2 a line documentation each] Link: https://lkml.kernel.org/r/9e2cb6b-5b58-d3f2-b5ee-5f8a14e8f10@google.com Link: https://lkml.kernel.org/r/5f52de70-975-e94f-f141-543765736181@google.com Link: https://lkml.kernel.org/r/3818cc9a-9999-d064-d778-9c94c5911e6@google.com Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: David Hildenbrand Cc: James Houghton Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 23 +++-------- include/linux/hugetlb_cgroup.h | 31 +++++--------- include/linux/mm_types.h | 74 +++++++++++++++++++++++----------- mm/Kconfig | 2 +- mm/memory-failure.c | 5 +-- 5 files changed, 67 insertions(+), 68 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 58a30938a9b1..551834cd5299 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t; /* * For HugeTLB page, there are more metadata to save in the struct page. But * the head struct page cannot meet our needs, so we have to abuse other tail - * struct page to store the metadata. In order to avoid conflicts caused by - * subsequent use of more tail struct pages, we gather these discrete indexes - * of tail struct page here. + * struct page to store the metadata. */ -enum { - SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */ -#ifdef CONFIG_CGROUP_HUGETLB - SUBPAGE_INDEX_CGROUP, /* reuse page->private */ - SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ - __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, -#endif -#ifdef CONFIG_MEMORY_FAILURE - SUBPAGE_INDEX_HWPOISON, -#endif - __NR_USED_SUBPAGE, -}; +#define __NR_USED_SUBPAGE 3 struct hugepage_subpool { spinlock_t lock; @@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx; static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { - return (void *)folio_get_private_1(folio); + return folio->_hugetlb_subpool; } /* - * hugetlb page subpool pointer located in hpage[1].private + * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { @@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) static inline void hugetlb_set_folio_subpool(struct folio *folio, struct hugepage_subpool *subpool) { - folio_set_private_1(folio, (unsigned long)subpool); + folio->_hugetlb_subpool = subpool; } static inline void hugetlb_set_page_subpool(struct page *hpage, diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index c70f92fe493e..f706626a8063 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -24,12 +24,10 @@ struct file_region; #ifdef CONFIG_CGROUP_HUGETLB /* * Minimum page order trackable by hugetlb cgroup. - * At least 4 pages are necessary for all the tracking information. - * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault - * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD]) - * is the reservation usage cgroup. + * At least 3 pages are necessary for all the tracking information. + * The second tail page contains all of the hugetlb-specific fields. */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) enum hugetlb_memory_event { HUGETLB_MAX, @@ -69,21 +67,13 @@ struct hugetlb_cgroup { static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { - struct page *tail; - VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - - if (rsvd) { - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); - return (void *)page_private(tail); - } - - else { - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP); - return (void *)page_private(tail); - } + if (rsvd) + return folio->_hugetlb_cgroup_rsvd; + else + return folio->_hugetlb_cgroup; } static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) @@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return; if (rsvd) - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), - (unsigned long)h_cg); + folio->_hugetlb_cgroup_rsvd = h_cg; else - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), - (unsigned long)h_cg); + folio->_hugetlb_cgroup = h_cg; } static inline void set_hugetlb_cgroup(struct folio *folio, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e86861ff5bbd..44f1f8b6be02 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -145,15 +145,22 @@ struct page { atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ - unsigned long _private_1; #endif }; - struct { /* Second tail page of compound page */ + struct { /* Second tail page of transparent huge page */ unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; + struct { /* Second tail page of hugetlb page */ + unsigned long _hugetlb_pad_1; /* compound_head */ + void *hugetlb_subpool; + void *hugetlb_cgroup; + void *hugetlb_cgroup_rsvd; + void *hugetlb_hwpoison; + /* No more space on 32-bit: use third tail if more */ + }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ @@ -260,13 +267,18 @@ struct page { * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. * @_flags_1: For large folios, additional page flags. - * @__head: Points to the folio. Do not use. + * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). - * @_private_1: Do not use directly, call folio_get_private_1(). + * @_flags_2: For alignment. Do not use. + * @_head_2: Points to the folio. Do not use. + * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. + * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -305,16 +317,31 @@ struct folio { }; struct page page; }; - unsigned long _flags_1; - unsigned long __head; - unsigned char _folio_dtor; - unsigned char _folio_order; - atomic_t _total_mapcount; - atomic_t _pincount; + union { + struct { + unsigned long _flags_1; + unsigned long _head_1; + unsigned char _folio_dtor; + unsigned char _folio_order; + atomic_t _total_mapcount; + atomic_t _pincount; #ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; + unsigned int _folio_nr_pages; #endif - unsigned long _private_1; + }; + struct page __page_1; + }; + union { + struct { + unsigned long _flags_2; + unsigned long _head_2; + void *_hugetlb_subpool; + void *_hugetlb_cgroup; + void *_hugetlb_cgroup_rsvd; + void *_hugetlb_hwpoison; + }; + struct page __page_2; + }; }; #define FOLIO_MATCH(pg, fl) \ @@ -335,16 +362,25 @@ FOLIO_MATCH(memcg_data, memcg_data); static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); -FOLIO_MATCH(compound_head, __head); +FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); FOLIO_MATCH(compound_mapcount, _total_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); -FOLIO_MATCH(_private_1, _private_1); #endif #undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 2 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_2); +FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); +FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); +FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); +FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); +#undef FOLIO_MATCH static inline atomic_t *folio_mapcount_ptr(struct folio *folio) { @@ -388,16 +424,6 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } -static inline void folio_set_private_1(struct folio *folio, unsigned long private) -{ - folio->_private_1 = private; -} - -static inline unsigned long folio_get_private_1(struct folio *folio) -{ - return folio->_private_1; -} - struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) diff --git a/mm/Kconfig b/mm/Kconfig index 4b28800d9be1..c86b69aff7d4 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -775,7 +775,7 @@ endchoice config THP_SWAP def_bool y - depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP + depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT help Swap transparent huge pages in one piece, without splitting. XXX: For now, swap cluster backing transparent huge page diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 779a426d2cab..63d8501001c6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); #ifdef CONFIG_HUGETLB_PAGE /* * Struct raw_hwp_page represents information about "raw error page", - * constructing singly linked list originated from ->private field of - * SUBPAGE_INDEX_HWPOISON-th tail page. + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. */ struct raw_hwp_page { struct llist_node node; @@ -1697,7 +1696,7 @@ struct raw_hwp_page { static inline struct llist_head *raw_hwp_list_head(struct page *hpage) { - return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON); + return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison; } static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag) From cb67f4282bf9693658dbda934a441ddbbb1446df Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Nov 2022 18:51:38 -0700 Subject: [PATCH 3005/4122] mm,thp,rmap: simplify compound page mapcount handling Compound page (folio) mapcount calculations have been different for anon and file (or shmem) THPs, and involved the obscure PageDoubleMap flag. And each huge mapping and unmapping of a file (or shmem) THP involved atomically incrementing and decrementing the mapcount of every subpage of that huge page, dirtying many struct page cachelines. Add subpages_mapcount field to the struct folio and first tail page, so that the total of subpage mapcounts is available in one place near the head: then page_mapcount() and total_mapcount() and page_mapped(), and their folio equivalents, are so quick that anon and file and hugetlb don't need to be optimized differently. Delete the unloved PageDoubleMap. page_add and page_remove rmap functions must now maintain the subpages_mapcount as well as the subpage _mapcount, when dealing with pte mappings of huge pages; and correct maintenance of NR_ANON_MAPPED and NR_FILE_MAPPED statistics still needs reading through the subpages, using nr_subpages_unmapped() - but only when first or last pmd mapping finds subpages_mapcount raised (double-map case, not the common case). But are those counts (used to decide when to split an anon THP, and in vmscan's pagecache_reclaimable heuristic) correctly maintained? Not quite: since page_remove_rmap() (and also split_huge_pmd()) is often called without page lock, there can be races when a subpage pte mapcount 0<->1 while compound pmd mapcount 0<->1 is scanning - races which the previous implementation had prevented. The statistics might become inaccurate, and even drift down until they underflow through 0. That is not good enough, but is better dealt with in a followup patch. Update a few comments on first and second tail page overlaid fields. hugepage_add_new_anon_rmap() has to "increment" compound_mapcount, but subpages_mapcount and compound_pincount are already correctly at 0, so delete its reinitialization of compound_pincount. A simple 100 X munmap(mmap(2GB, MAP_SHARED|MAP_POPULATE, tmpfs), 2GB) took 18 seconds on small pages, and used to take 1 second on huge pages, but now takes 119 milliseconds on huge pages. Mapping by pmds a second time used to take 860ms and now takes 92ms; mapping by pmds after mapping by ptes (when the scan is needed) used to take 870ms and now takes 495ms. But there might be some benchmarks which would show a slowdown, because tail struct pages now fall out of cache until final freeing checks them. Link: https://lkml.kernel.org/r/47ad693-717-79c8-e1ba-46c3a6602e48@google.com Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: David Hildenbrand Cc: James Houghton Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- Documentation/mm/transhuge.rst | 18 ----- include/linux/mm.h | 85 ++++++++++++++------ include/linux/mm_types.h | 21 ++++- include/linux/page-flags.h | 21 ----- include/linux/rmap.h | 2 + mm/debug.c | 5 +- mm/folio-compat.c | 6 -- mm/huge_memory.c | 36 ++------- mm/hugetlb.c | 2 + mm/khugepaged.c | 11 +-- mm/page_alloc.c | 27 ++++--- mm/rmap.c | 142 +++++++++++++++++++-------------- mm/util.c | 79 ------------------ 13 files changed, 194 insertions(+), 261 deletions(-) diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index 216db1d67d04..a560e0c01b16 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -125,24 +125,6 @@ pages: ->_mapcount of all sub-pages in order to have race-free detection of last unmap of subpages. -PageDoubleMap() indicates that the page is *possibly* mapped with PTEs. - -For anonymous pages, PageDoubleMap() also indicates ->_mapcount in all -subpages is offset up by one. This additional reference is required to -get race-free detection of unmap of subpages when we have them mapped with -both PMDs and PTEs. - -This optimization is required to lower the overhead of per-subpage mapcount -tracking. The alternative is to alter ->_mapcount in all subpages on each -map/unmap of the whole compound page. - -For anonymous pages, we set PG_double_map when a PMD of the page is split -for the first time, but still have a PMD mapping. The additional references -go away with the last compound_mapcount. - -File pages get PG_double_map set on the first map of the page with PTE and -goes away when the page gets evicted from the page cache. - split_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page structures. It can be done easily for refcounts taken by page table diff --git a/include/linux/mm.h b/include/linux/mm.h index 3950ef45b9a9..a904c2d60f12 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -818,8 +818,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) /* * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for - * debugging purposes; look at folio_mapcount() or page_mapcount() - * instead. + * debugging purposes - it does not include PTE-mapped sub-pages; look + * at folio_mapcount() or page_mapcount() or total_mapcount() instead. */ static inline int folio_entire_mapcount(struct folio *folio) { @@ -829,12 +829,20 @@ static inline int folio_entire_mapcount(struct folio *folio) /* * Mapcount of compound page as a whole, does not include mapped sub-pages. - * - * Must be called only for compound pages. + * Must be called only on head of compound page. */ -static inline int compound_mapcount(struct page *page) +static inline int head_compound_mapcount(struct page *head) { - return folio_entire_mapcount(page_folio(page)); + return atomic_read(compound_mapcount_ptr(head)) + 1; +} + +/* + * Sum of mapcounts of sub-pages, does not include compound mapcount. + * Must be called only on head of compound page. + */ +static inline int head_subpages_mapcount(struct page *head) +{ + return atomic_read(subpages_mapcount_ptr(head)); } /* @@ -847,11 +855,9 @@ static inline void page_mapcount_reset(struct page *page) atomic_set(&(page)->_mapcount, -1); } -int __page_mapcount(struct page *page); - /* * Mapcount of 0-order page; when compound sub-page, includes - * compound_mapcount(). + * compound_mapcount of compound_head of page. * * Result is undefined for pages which cannot be mapped into userspace. * For example SLAB or special types of pages. See function page_has_type(). @@ -859,25 +865,61 @@ int __page_mapcount(struct page *page); */ static inline int page_mapcount(struct page *page) { - if (unlikely(PageCompound(page))) - return __page_mapcount(page); - return atomic_read(&page->_mapcount) + 1; + int mapcount = atomic_read(&page->_mapcount) + 1; + + if (likely(!PageCompound(page))) + return mapcount; + page = compound_head(page); + return head_compound_mapcount(page) + mapcount; } -int folio_mapcount(struct folio *folio); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int total_mapcount(struct page *page) { - return folio_mapcount(page_folio(page)); + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) + 1; + page = compound_head(page); + return head_compound_mapcount(page) + head_subpages_mapcount(page); } -#else -static inline int total_mapcount(struct page *page) +/* + * Return true if this page is mapped into pagetables. + * For compound page it returns true if any subpage of compound page is mapped, + * even if this particular subpage is not itself mapped by any PTE or PMD. + */ +static inline bool page_mapped(struct page *page) { - return page_mapcount(page); + return total_mapcount(page) > 0; +} + +/** + * folio_mapcount() - Calculate the number of mappings of this folio. + * @folio: The folio. + * + * A large folio tracks both how many times the entire folio is mapped, + * and how many times each individual page in the folio is mapped. + * This function calculates the total number of times the folio is + * mapped. + * + * Return: The number of times this folio is mapped. + */ +static inline int folio_mapcount(struct folio *folio) +{ + if (likely(!folio_test_large(folio))) + return atomic_read(&folio->_mapcount) + 1; + return atomic_read(folio_mapcount_ptr(folio)) + 1 + + atomic_read(folio_subpages_mapcount_ptr(folio)); +} + +/** + * folio_mapped - Is this folio mapped into userspace? + * @folio: The folio. + * + * Return: True if any page in this folio is referenced by user page tables. + */ +static inline bool folio_mapped(struct folio *folio) +{ + return folio_mapcount(folio) > 0; } -#endif static inline struct page *virt_to_head_page(const void *x) { @@ -1800,9 +1842,6 @@ static inline pgoff_t page_index(struct page *page) return page->index; } -bool page_mapped(struct page *page); -bool folio_mapped(struct folio *folio); - /* * Return true only if the page has been allocated with * ALLOC_NO_WATERMARKS and the low watermark was not diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 44f1f8b6be02..44a1a699b5ad 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -142,6 +142,7 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + atomic_t subpages_mapcount; atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ @@ -270,7 +271,8 @@ struct page { * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). - * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_subpages_mapcount: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_flags_2: For alignment. Do not use. @@ -323,7 +325,8 @@ struct folio { unsigned long _head_1; unsigned char _folio_dtor; unsigned char _folio_order; - atomic_t _total_mapcount; + atomic_t _compound_mapcount; + atomic_t _subpages_mapcount; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; @@ -365,7 +368,8 @@ FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _total_mapcount); +FOLIO_MATCH(compound_mapcount, _compound_mapcount); +FOLIO_MATCH(subpages_mapcount, _subpages_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); @@ -388,11 +392,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio) return &tail->compound_mapcount; } +static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->subpages_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; } +static inline atomic_t *subpages_mapcount_ptr(struct page *page) +{ + return &page[1].subpages_mapcount; +} + static inline atomic_t *compound_pincount_ptr(struct page *page) { return &page[1].compound_pincount; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0b0ae5084e60..e42c55a7e012 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -176,9 +176,6 @@ enum pageflags { /* SLOB */ PG_slob_free = PG_private, - /* Compound pages. Stored in first tail page's flags */ - PG_double_map = PG_workingset, - #ifdef CONFIG_MEMORY_FAILURE /* * Compound pages. Stored in first tail page's flags. @@ -874,29 +871,11 @@ static inline int PageTransTail(struct page *page) { return PageTail(page); } - -/* - * PageDoubleMap indicates that the compound page is mapped with PTEs as well - * as PMDs. - * - * This is required for optimization of rmap operations for THP: we can postpone - * per small page mapcount accounting (and its overhead from atomic operations) - * until the first PMD split. - * - * For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up - * by one. This reference will go away with last compound_mapcount. - * - * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap(). - */ -PAGEFLAG(DoubleMap, double_map, PF_SECOND) - TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) TESTPAGEFLAG_FALSE(TransTail, transtail) -PAGEFLAG_FALSE(DoubleMap, double_map) - TESTSCFLAG_FALSE(DoubleMap, double_map) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bd3504d11b15..1973649e8f93 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -206,6 +206,8 @@ void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, static inline void __page_dup_rmap(struct page *page, bool compound) { + if (!compound && PageCompound(page)) + atomic_inc(subpages_mapcount_ptr(compound_head(page))); atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); } diff --git a/mm/debug.c b/mm/debug.c index 0fd15ba70d16..7f8e5f744e42 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -94,9 +94,10 @@ static void __dump_page(struct page *page) page, page_ref_count(head), mapcount, mapping, page_to_pgoff(page), page_to_pfn(page)); if (compound) { - pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", + pr_warn("head:%p order:%u compound_mapcount:%d subpages_mapcount:%d compound_pincount:%d\n", head, compound_order(head), - folio_entire_mapcount(folio), + head_compound_mapcount(head), + head_subpages_mapcount(head), head_compound_pincount(head)); } diff --git a/mm/folio-compat.c b/mm/folio-compat.c index bac2a366aada..cbfe51091c39 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -39,12 +39,6 @@ void wait_for_stable_page(struct page *page) } EXPORT_SYMBOL_GPL(wait_for_stable_page); -bool page_mapped(struct page *page) -{ - return folio_mapped(page_folio(page)); -} -EXPORT_SYMBOL(page_mapped); - void mark_page_accessed(struct page *page) { folio_mark_accessed(page_folio(page)); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b26998d1845f..7703169107c6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2142,6 +2142,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); + atomic_add(HPAGE_PMD_NR, subpages_mapcount_ptr(page)); /* * Without "freeze", we'll simply split the PMD, propagating the @@ -2225,33 +2226,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pte_unmap(pte); } - if (!pmd_migration) { - /* - * Set PG_double_map before dropping compound_mapcount to avoid - * false-negative page_mapped(). - */ - if (compound_mapcount(page) > 1 && - !TestSetPageDoubleMap(page)) { - for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_inc(&page[i]._mapcount); - } - - lock_page_memcg(page); - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { - /* Last compound_mapcount is gone. */ - __mod_lruvec_page_state(page, NR_ANON_THPS, - -HPAGE_PMD_NR); - if (TestClearPageDoubleMap(page)) { - /* No need in mapcount reference anymore */ - for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_dec(&page[i]._mapcount); - } - } - unlock_page_memcg(page); - - /* Above is effectively page_remove_rmap(page, vma, true) */ - munlock_vma_page(page, vma, true); - } + if (!pmd_migration) + page_remove_rmap(page, vma, true); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); @@ -2453,7 +2429,7 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_dirty) | LRU_GEN_MASK | LRU_REFS_MASK)); - /* ->mapping in first tail page is compound_mapcount */ + /* ->mapping in first and second tail page is replaced by other uses */ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, page_tail); page_tail->mapping = head->mapping; @@ -2463,6 +2439,10 @@ static void __split_huge_page_tail(struct page *head, int tail, * page->private should not be set in tail pages with the exception * of swap cache pages that store the swp_entry_t in tail pages. * Fix up and warn once if private is unexpectedly set. + * + * What of 32-bit systems, on which head[1].compound_pincount overlays + * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and + * compound_pincount must be 0 for folio_ref_freeze() to have succeeded. */ if (!folio_test_swapcache(page_folio(head))) { VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 76ebefe02827..4f1338d82aab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1333,6 +1333,7 @@ static void __destroy_compound_gigantic_page(struct page *page, struct page *p; atomic_set(compound_mapcount_ptr(page), 0); + atomic_set(subpages_mapcount_ptr(page), 0); atomic_set(compound_pincount_ptr(page), 0); for (i = 1; i < nr_pages; i++) { @@ -1852,6 +1853,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, set_compound_head(p, page); } atomic_set(compound_mapcount_ptr(page), -1); + atomic_set(subpages_mapcount_ptr(page), 0); atomic_set(compound_pincount_ptr(page), 0); return true; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 9c111273bbf9..0d8f548d9d7e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1238,15 +1238,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, /* * Check if the page has any GUP (or other external) pins. * - * Here the check is racy it may see total_mapcount > refcount - * in some cases. - * For example, one process with one forked child process. - * The parent has the PMD split due to MADV_DONTNEED, then - * the child is trying unmap the whole PMD, but khugepaged - * may be scanning the parent between the child has - * PageDoubleMap flag cleared and dec the mapcount. So - * khugepaged may see total_mapcount > refcount. - * + * Here the check may be racy: + * it may see total_mapcount > refcount in some cases? * But such case is ephemeral we could always retry collapse * later. However it may report false positive if the page * has excessive GUP pins (i.e. 512). Anyway the same check diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e60657875d3..0705917ddf54 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -798,6 +798,7 @@ static void prep_compound_head(struct page *page, unsigned int order) set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); + atomic_set(subpages_mapcount_ptr(page), 0); atomic_set(compound_pincount_ptr(page), 0); } @@ -1324,11 +1325,19 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) } switch (page - head_page) { case 1: - /* the first tail page: ->mapping may be compound_mapcount() */ - if (unlikely(compound_mapcount(page))) { + /* the first tail page: these may be in place of ->mapping */ + if (unlikely(head_compound_mapcount(head_page))) { bad_page(page, "nonzero compound_mapcount"); goto out; } + if (unlikely(head_subpages_mapcount(head_page))) { + bad_page(page, "nonzero subpages_mapcount"); + goto out; + } + if (unlikely(head_compound_pincount(head_page))) { + bad_page(page, "nonzero compound_pincount"); + goto out; + } break; case 2: /* @@ -1431,10 +1440,8 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) { - ClearPageDoubleMap(page); + if (compound) ClearPageHasHWPoisoned(page); - } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); @@ -6874,13 +6881,11 @@ static void __ref memmap_init_compound(struct page *head, set_page_count(page, 0); /* - * The first tail page stores compound_mapcount_ptr() and - * compound_order() and the second tail page stores - * compound_pincount_ptr(). Call prep_compound_head() after - * the first and second tail pages have been initialized to - * not have the data overwritten. + * The first tail page stores important compound page info. + * Call prep_compound_head() after the first tail page has + * been initialized, to not have the data overwritten. */ - if (pfn == head_pfn + 2) + if (pfn == head_pfn + 1) prep_compound_head(head, order); } } diff --git a/mm/rmap.c b/mm/rmap.c index 3b2d18bbdc44..f43339ea4970 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1085,6 +1085,24 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, return page_vma_mkclean_one(&pvmw); } +/* + * When mapping a THP's first pmd, or unmapping its last pmd, if that THP + * also has pte mappings, then those must be discounted: in order to maintain + * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift, + * and to decide when an anon THP should be put on the deferred split queue. + */ +static int nr_subpages_unmapped(struct page *head, int nr_subpages) +{ + int nr = nr_subpages; + int i; + + /* Discount those subpages mapped by pte */ + for (i = 0; i < nr_subpages; i++) + if (atomic_read(&head[i]._mapcount) >= 0) + nr--; + return nr; +} + /** * page_move_anon_rmap - move a page to our anon_vma * @page: the page to move to our anon_vma @@ -1194,6 +1212,7 @@ static void __page_check_anon_rmap(struct page *page, void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, rmap_t flags) { + int nr, nr_pages; bool compound = flags & RMAP_COMPOUND; bool first; @@ -1202,28 +1221,32 @@ void page_add_anon_rmap(struct page *page, else VM_BUG_ON_PAGE(!PageLocked(page), page); - if (compound) { + if (compound && PageTransHuge(page)) { atomic_t *mapcount; VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(!PageTransHuge(page), page); mapcount = compound_mapcount_ptr(page); first = atomic_inc_and_test(mapcount); + + nr = nr_pages = thp_nr_pages(page); + if (first && head_subpages_mapcount(page)) + nr = nr_subpages_unmapped(page, nr_pages); } else { + nr = 1; + if (PageTransCompound(page)) { + struct page *head = compound_head(page); + + atomic_inc(subpages_mapcount_ptr(head)); + nr = !head_compound_mapcount(head); + } first = atomic_inc_and_test(&page->_mapcount); } + VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page); if (first) { - int nr = compound ? thp_nr_pages(page) : 1; - /* - * We use the irq-unsafe __{inc|mod}_zone_page_stat because - * these counters are not modified in interrupt context, and - * pte lock(a spinlock) is held, which implies preemption - * disabled. - */ if (compound) - __mod_lruvec_page_state(page, NR_ANON_THPS, nr); + __mod_lruvec_page_state(page, NR_ANON_THPS, nr_pages); __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); } @@ -1265,8 +1288,6 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); - __mod_lruvec_page_state(page, NR_ANON_THPS, nr); } else { /* increment count (starts at -1) */ @@ -1287,29 +1308,19 @@ void page_add_new_anon_rmap(struct page *page, void page_add_file_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { - int i, nr = 0; + int nr = 0; VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); if (compound && PageTransHuge(page)) { - int nr_pages = thp_nr_pages(page); + int nr_pages; - for (i = 0; i < nr_pages; i++) { - if (atomic_inc_and_test(&page[i]._mapcount)) - nr++; - } if (!atomic_inc_and_test(compound_mapcount_ptr(page))) goto out; - /* - * It is racy to ClearPageDoubleMap in page_remove_file_rmap(); - * but page lock is held by all page_add_file_rmap() compound - * callers, and SetPageDoubleMap below warns if !PageLocked: - * so here is a place that DoubleMap can be safely cleared. - */ - VM_WARN_ON_ONCE(!PageLocked(page)); - if (nr == nr_pages && PageDoubleMap(page)) - ClearPageDoubleMap(page); + nr = nr_pages = thp_nr_pages(page); + if (head_subpages_mapcount(page)) + nr = nr_subpages_unmapped(page, nr_pages); if (PageSwapBacked(page)) __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, @@ -1318,11 +1329,15 @@ void page_add_file_rmap(struct page *page, __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, nr_pages); } else { - if (PageTransCompound(page) && page_mapping(page)) { - VM_WARN_ON_ONCE(!PageLocked(page)); - SetPageDoubleMap(compound_head(page)); + bool pmd_mapped = false; + + if (PageTransCompound(page)) { + struct page *head = compound_head(page); + + atomic_inc(subpages_mapcount_ptr(head)); + pmd_mapped = head_compound_mapcount(head); } - if (atomic_inc_and_test(&page->_mapcount)) + if (atomic_inc_and_test(&page->_mapcount) && !pmd_mapped) nr++; } out: @@ -1335,7 +1350,7 @@ out: static void page_remove_file_rmap(struct page *page, bool compound) { - int i, nr = 0; + int nr = 0; VM_BUG_ON_PAGE(compound && !PageHead(page), page); @@ -1348,14 +1363,15 @@ static void page_remove_file_rmap(struct page *page, bool compound) /* page still mapped by someone else? */ if (compound && PageTransHuge(page)) { - int nr_pages = thp_nr_pages(page); + int nr_pages; - for (i = 0; i < nr_pages; i++) { - if (atomic_add_negative(-1, &page[i]._mapcount)) - nr++; - } if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) - goto out; + return; + + nr = nr_pages = thp_nr_pages(page); + if (head_subpages_mapcount(page)) + nr = nr_subpages_unmapped(page, nr_pages); + if (PageSwapBacked(page)) __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, -nr_pages); @@ -1363,17 +1379,25 @@ static void page_remove_file_rmap(struct page *page, bool compound) __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, -nr_pages); } else { - if (atomic_add_negative(-1, &page->_mapcount)) + bool pmd_mapped = false; + + if (PageTransCompound(page)) { + struct page *head = compound_head(page); + + atomic_dec(subpages_mapcount_ptr(head)); + pmd_mapped = head_compound_mapcount(head); + } + if (atomic_add_negative(-1, &page->_mapcount) && !pmd_mapped) nr++; } -out: + if (nr) __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr); } static void page_remove_anon_compound_rmap(struct page *page) { - int i, nr; + int nr, nr_pages; if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) return; @@ -1385,27 +1409,19 @@ static void page_remove_anon_compound_rmap(struct page *page) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return; - __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page)); + nr = nr_pages = thp_nr_pages(page); + __mod_lruvec_page_state(page, NR_ANON_THPS, -nr); - if (TestClearPageDoubleMap(page)) { - /* - * Subpages can be mapped with PTEs too. Check how many of - * them are still mapped. - */ - for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { - if (atomic_add_negative(-1, &page[i]._mapcount)) - nr++; - } + if (head_subpages_mapcount(page)) { + nr = nr_subpages_unmapped(page, nr_pages); /* * Queue the page for deferred split if at least one small * page of the compound page is unmapped, but at least one * small page is still mapped. */ - if (nr && nr < thp_nr_pages(page)) + if (nr && nr < nr_pages) deferred_split_huge_page(page); - } else { - nr = thp_nr_pages(page); } if (nr) @@ -1423,6 +1439,8 @@ static void page_remove_anon_compound_rmap(struct page *page) void page_remove_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { + bool pmd_mapped = false; + lock_page_memcg(page); if (!PageAnon(page)) { @@ -1435,15 +1453,17 @@ void page_remove_rmap(struct page *page, goto out; } + if (PageTransCompound(page)) { + struct page *head = compound_head(page); + + atomic_dec(subpages_mapcount_ptr(head)); + pmd_mapped = head_compound_mapcount(head); + } + /* page still mapped by someone else? */ - if (!atomic_add_negative(-1, &page->_mapcount)) + if (!atomic_add_negative(-1, &page->_mapcount) || pmd_mapped) goto out; - /* - * We use the irq-unsafe __{inc|mod}_zone_page_stat because - * these counters are not modified in interrupt context, and - * pte lock(a spinlock) is held, which implies preemption disabled. - */ __dec_lruvec_page_state(page, NR_ANON_MAPPED); if (PageTransCompound(page)) @@ -2569,8 +2589,8 @@ void hugepage_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { BUG_ON(address < vma->vm_start || address >= vma->vm_end); + /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); ClearHPageRestoreReserve(page); __page_set_anon_rmap(page, vma, address, 1); } diff --git a/mm/util.c b/mm/util.c index 12984e76767e..b56c92fb910f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -717,32 +717,6 @@ void *page_rmapping(struct page *page) return folio_raw_mapping(page_folio(page)); } -/** - * folio_mapped - Is this folio mapped into userspace? - * @folio: The folio. - * - * Return: True if any page in this folio is referenced by user page tables. - */ -bool folio_mapped(struct folio *folio) -{ - long i, nr; - - if (!folio_test_large(folio)) - return atomic_read(&folio->_mapcount) >= 0; - if (atomic_read(folio_mapcount_ptr(folio)) >= 0) - return true; - if (folio_test_hugetlb(folio)) - return false; - - nr = folio_nr_pages(folio); - for (i = 0; i < nr; i++) { - if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0) - return true; - } - return false; -} -EXPORT_SYMBOL(folio_mapped); - struct anon_vma *folio_anon_vma(struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; @@ -783,59 +757,6 @@ struct address_space *folio_mapping(struct folio *folio) } EXPORT_SYMBOL(folio_mapping); -/* Slow path of page_mapcount() for compound pages */ -int __page_mapcount(struct page *page) -{ - int ret; - - ret = atomic_read(&page->_mapcount) + 1; - /* - * For file THP page->_mapcount contains total number of mapping - * of the page: no need to look into compound_mapcount. - */ - if (!PageAnon(page) && !PageHuge(page)) - return ret; - page = compound_head(page); - ret += atomic_read(compound_mapcount_ptr(page)) + 1; - if (PageDoubleMap(page)) - ret--; - return ret; -} -EXPORT_SYMBOL_GPL(__page_mapcount); - -/** - * folio_mapcount() - Calculate the number of mappings of this folio. - * @folio: The folio. - * - * A large folio tracks both how many times the entire folio is mapped, - * and how many times each individual page in the folio is mapped. - * This function calculates the total number of times the folio is - * mapped. - * - * Return: The number of times this folio is mapped. - */ -int folio_mapcount(struct folio *folio) -{ - int i, compound, nr, ret; - - if (likely(!folio_test_large(folio))) - return atomic_read(&folio->_mapcount) + 1; - - compound = folio_entire_mapcount(folio); - if (folio_test_hugetlb(folio)) - return compound; - ret = compound; - nr = folio_nr_pages(folio); - for (i = 0; i < nr; i++) - ret += atomic_read(&folio_page(folio, i)->_mapcount) + 1; - /* File pages has compound_mapcount included in _mapcount */ - if (!folio_test_anon(folio)) - return ret - compound * nr; - if (folio_test_double_map(folio)) - ret -= nr; - return ret; -} - /** * folio_copy - Copy the contents of one folio to another. * @dst: Folio to copy to. From 9bd3155ed83b723be719e522760f107229e2a61b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Nov 2022 18:53:45 -0700 Subject: [PATCH 3006/4122] mm,thp,rmap: lock_compound_mapcounts() on THP mapcounts Fix the races in maintaining compound_mapcount, subpages_mapcount and subpage _mapcount by using PG_locked in the first tail of any compound page for a bit_spin_lock() on such modifications; skipping the usual atomic operations on those fields in this case. Bring page_remove_file_rmap() and page_remove_anon_compound_rmap() back into page_remove_rmap() itself. Rearrange page_add_anon_rmap() and page_add_file_rmap() and page_remove_rmap() to follow the same "if (compound) {lock} else if (PageCompound) {lock} else {atomic}" pattern (with a PageTransHuge in the compound test, like before, to avoid BUG_ONs and optimize away that block when THP is not configured). Move all the stats updates outside, after the bit_spin_locked section, so that it is sure to be a leaf lock. Add page_dup_compound_rmap() to manage compound locking versus atomics in sync with the rest. In particular, hugetlb pages are still using the atomics: to avoid unnecessary interference there, and because they never have subpage mappings; but this exception can easily be changed. Conveniently, page_dup_compound_rmap() turns out to suit an anon THP's __split_huge_pmd_locked() too. bit_spin_lock() is not popular with PREEMPT_RT folks: but PREEMPT_RT sensibly excludes TRANSPARENT_HUGEPAGE already, so its only exposure is to the non-hugetlb non-THP pte-mapped compound pages (with large folios being currently dependent on TRANSPARENT_HUGEPAGE). There is never any scan of subpages in this case; but we have chosen to use PageCompound tests rather than PageTransCompound tests to gate the use of lock_compound_mapcounts(), so that page_mapped() is correct on all compound pages, whether or not TRANSPARENT_HUGEPAGE is enabled: could that be a problem for PREEMPT_RT, when there is contention on the lock - under heavy concurrent forking for example? If so, then it can be turned into a sleeping lock (like folio_lock()) when PREEMPT_RT. A simple 100 X munmap(mmap(2GB, MAP_SHARED|MAP_POPULATE, tmpfs), 2GB) took 18 seconds on small pages, and used to take 1 second on huge pages, but now takes 115 milliseconds on huge pages. Mapping by pmds a second time used to take 860ms and now takes 86ms; mapping by pmds after mapping by ptes (when the scan is needed) used to take 870ms and now takes 495ms. Mapping huge pages by ptes is largely unaffected but variable: between 5% faster and 5% slower in what I've recorded. Contention on the lock is likely to behave worse than contention on the atomics behaved. Link: https://lkml.kernel.org/r/1b42bd1a-8223-e827-602f-d466c2db7d3c@google.com Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: David Hildenbrand Cc: James Houghton Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- Documentation/mm/transhuge.rst | 14 +- include/linux/rmap.h | 14 +- mm/huge_memory.c | 3 +- mm/rmap.c | 349 ++++++++++++++++++--------------- 4 files changed, 211 insertions(+), 169 deletions(-) diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index a560e0c01b16..1e2a637cc607 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -117,13 +117,15 @@ pages: - ->_refcount in tail pages is always zero: get_page_unless_zero() never succeeds on tail pages. - - map/unmap of the pages with PTE entry increment/decrement ->_mapcount - on relevant sub-page of the compound page. + - map/unmap of PMD entry for the whole compound page increment/decrement + ->compound_mapcount, stored in the first tail page of the compound page. - - map/unmap of the whole compound page is accounted for in compound_mapcount - (stored in first tail page). For file huge pages, we also increment - ->_mapcount of all sub-pages in order to have race-free detection of - last unmap of subpages. + - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount + on relevant sub-page of the compound page, and also increment/decrement + ->subpages_mapcount, stored in first tail page of the compound page. + In order to have race-free accounting of sub-pages mapped, changes to + sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are + are all locked by bit_spin_lock of PG_locked in the first tail ->flags. split_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1973649e8f93..011a7530dc76 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -204,16 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address); -static inline void __page_dup_rmap(struct page *page, bool compound) -{ - if (!compound && PageCompound(page)) - atomic_inc(subpages_mapcount_ptr(compound_head(page))); - atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); -} +void page_dup_compound_rmap(struct page *page, bool compound); static inline void page_dup_file_rmap(struct page *page, bool compound) { - __page_dup_rmap(page, compound); + if (PageCompound(page)) + page_dup_compound_rmap(page, compound); + else + atomic_inc(&page->_mapcount); } /** @@ -262,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, * the page R/O into both processes. */ dup: - __page_dup_rmap(page, compound); + page_dup_file_rmap(page, compound); return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7703169107c6..114517e8cbfc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2142,7 +2142,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); - atomic_add(HPAGE_PMD_NR, subpages_mapcount_ptr(page)); /* * Without "freeze", we'll simply split the PMD, propagating the @@ -2222,7 +2221,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); if (!pmd_migration) - atomic_inc(&page[i]._mapcount); + page_dup_compound_rmap(page + i, false); pte_unmap(pte); } diff --git a/mm/rmap.c b/mm/rmap.c index f43339ea4970..512e53cae2ca 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1085,11 +1085,66 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, return page_vma_mkclean_one(&pvmw); } +struct compound_mapcounts { + unsigned int compound_mapcount; + unsigned int subpages_mapcount; +}; + +/* + * lock_compound_mapcounts() first locks, then copies subpages_mapcount and + * compound_mapcount from head[1].compound_mapcount and subpages_mapcount, + * converting from struct page's internal representation to logical count + * (that is, adding 1 to compound_mapcount to hide its offset by -1). + */ +static void lock_compound_mapcounts(struct page *head, + struct compound_mapcounts *local) +{ + bit_spin_lock(PG_locked, &head[1].flags); + local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1; + local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head)); +} + +/* + * After caller has updated subpage._mapcount, local subpages_mapcount and + * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts + * and copies them back to the compound head[1] fields, and then unlocks. + */ +static void unlock_compound_mapcounts(struct page *head, + struct compound_mapcounts *local) +{ + atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1); + atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount); + bit_spin_unlock(PG_locked, &head[1].flags); +} + +/* + * When acting on a compound page under lock_compound_mapcounts(), avoid the + * unnecessary overhead of an actual atomic operation on its subpage mapcount. + * Return true if this is the first increment or the last decrement + * (remembering that page->_mapcount -1 represents logical mapcount 0). + */ +static bool subpage_mapcount_inc(struct page *page) +{ + int orig_mapcount = atomic_read(&page->_mapcount); + + atomic_set(&page->_mapcount, orig_mapcount + 1); + return orig_mapcount < 0; +} + +static bool subpage_mapcount_dec(struct page *page) +{ + int orig_mapcount = atomic_read(&page->_mapcount); + + atomic_set(&page->_mapcount, orig_mapcount - 1); + return orig_mapcount == 0; +} + /* * When mapping a THP's first pmd, or unmapping its last pmd, if that THP * also has pte mappings, then those must be discounted: in order to maintain * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift, * and to decide when an anon THP should be put on the deferred split queue. + * This function must be called between lock_ and unlock_compound_mapcounts(). */ static int nr_subpages_unmapped(struct page *head, int nr_subpages) { @@ -1103,6 +1158,40 @@ static int nr_subpages_unmapped(struct page *head, int nr_subpages) return nr; } +/* + * page_dup_compound_rmap(), used when copying mm, or when splitting pmd, + * provides a simple example of using lock_ and unlock_compound_mapcounts(). + */ +void page_dup_compound_rmap(struct page *page, bool compound) +{ + struct compound_mapcounts mapcounts; + struct page *head; + + /* + * Hugetlb pages could use lock_compound_mapcounts(), like THPs do; + * but at present they are still being managed by atomic operations: + * which are likely to be somewhat faster, so don't rush to convert + * them over without evaluating the effect. + * + * Note that hugetlb does not call page_add_file_rmap(): + * here is where hugetlb shared page mapcount is raised. + */ + if (PageHuge(page)) { + atomic_inc(compound_mapcount_ptr(page)); + return; + } + + head = compound_head(page); + lock_compound_mapcounts(head, &mapcounts); + if (compound) { + mapcounts.compound_mapcount++; + } else { + mapcounts.subpages_mapcount++; + subpage_mapcount_inc(page); + } + unlock_compound_mapcounts(head, &mapcounts); +} + /** * page_move_anon_rmap - move a page to our anon_vma * @page: the page to move to our anon_vma @@ -1212,7 +1301,8 @@ static void __page_check_anon_rmap(struct page *page, void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, rmap_t flags) { - int nr, nr_pages; + struct compound_mapcounts mapcounts; + int nr = 0, nr_pmdmapped = 0; bool compound = flags & RMAP_COMPOUND; bool first; @@ -1222,33 +1312,37 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON_PAGE(!PageLocked(page), page); if (compound && PageTransHuge(page)) { - atomic_t *mapcount; - VM_BUG_ON_PAGE(!PageLocked(page), page); - mapcount = compound_mapcount_ptr(page); - first = atomic_inc_and_test(mapcount); - - nr = nr_pages = thp_nr_pages(page); - if (first && head_subpages_mapcount(page)) - nr = nr_subpages_unmapped(page, nr_pages); - } else { - nr = 1; - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - atomic_inc(subpages_mapcount_ptr(head)); - nr = !head_compound_mapcount(head); + lock_compound_mapcounts(page, &mapcounts); + first = !mapcounts.compound_mapcount; + mapcounts.compound_mapcount++; + if (first) { + nr = nr_pmdmapped = thp_nr_pages(page); + if (mapcounts.subpages_mapcount) + nr = nr_subpages_unmapped(page, nr_pmdmapped); } + unlock_compound_mapcounts(page, &mapcounts); + + } else if (PageCompound(page)) { + struct page *head = compound_head(page); + + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount++; + first = subpage_mapcount_inc(page); + nr = first && !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + + } else { first = atomic_inc_and_test(&page->_mapcount); + nr = first; } VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page); - if (first) { - if (compound) - __mod_lruvec_page_state(page, NR_ANON_THPS, nr_pages); + if (nr_pmdmapped) + __mod_lruvec_page_state(page, NR_ANON_THPS, nr_pmdmapped); + if (nr) __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); - } if (unlikely(PageKsm(page))) unlock_page_memcg(page); @@ -1308,39 +1402,41 @@ void page_add_new_anon_rmap(struct page *page, void page_add_file_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { - int nr = 0; + struct compound_mapcounts mapcounts; + int nr = 0, nr_pmdmapped = 0; + bool first; VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); + if (compound && PageTransHuge(page)) { - int nr_pages; - - if (!atomic_inc_and_test(compound_mapcount_ptr(page))) - goto out; - - nr = nr_pages = thp_nr_pages(page); - if (head_subpages_mapcount(page)) - nr = nr_subpages_unmapped(page, nr_pages); - - if (PageSwapBacked(page)) - __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, - nr_pages); - else - __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, - nr_pages); - } else { - bool pmd_mapped = false; - - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - atomic_inc(subpages_mapcount_ptr(head)); - pmd_mapped = head_compound_mapcount(head); + lock_compound_mapcounts(page, &mapcounts); + first = !mapcounts.compound_mapcount; + mapcounts.compound_mapcount++; + if (first) { + nr = nr_pmdmapped = thp_nr_pages(page); + if (mapcounts.subpages_mapcount) + nr = nr_subpages_unmapped(page, nr_pmdmapped); } - if (atomic_inc_and_test(&page->_mapcount) && !pmd_mapped) - nr++; + unlock_compound_mapcounts(page, &mapcounts); + + } else if (PageCompound(page)) { + struct page *head = compound_head(page); + + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount++; + first = subpage_mapcount_inc(page); + nr = first && !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + + } else { + first = atomic_inc_and_test(&page->_mapcount); + nr = first; } -out: + + if (nr_pmdmapped) + __mod_lruvec_page_state(page, PageSwapBacked(page) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); if (nr) __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr); unlock_page_memcg(page); @@ -1348,86 +1444,6 @@ out: mlock_vma_page(page, vma, compound); } -static void page_remove_file_rmap(struct page *page, bool compound) -{ - int nr = 0; - - VM_BUG_ON_PAGE(compound && !PageHead(page), page); - - /* Hugepages are not counted in NR_FILE_MAPPED for now. */ - if (unlikely(PageHuge(page))) { - /* hugetlb pages are always mapped with pmds */ - atomic_dec(compound_mapcount_ptr(page)); - return; - } - - /* page still mapped by someone else? */ - if (compound && PageTransHuge(page)) { - int nr_pages; - - if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) - return; - - nr = nr_pages = thp_nr_pages(page); - if (head_subpages_mapcount(page)) - nr = nr_subpages_unmapped(page, nr_pages); - - if (PageSwapBacked(page)) - __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, - -nr_pages); - else - __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, - -nr_pages); - } else { - bool pmd_mapped = false; - - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - atomic_dec(subpages_mapcount_ptr(head)); - pmd_mapped = head_compound_mapcount(head); - } - if (atomic_add_negative(-1, &page->_mapcount) && !pmd_mapped) - nr++; - } - - if (nr) - __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr); -} - -static void page_remove_anon_compound_rmap(struct page *page) -{ - int nr, nr_pages; - - if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) - return; - - /* Hugepages are not counted in NR_ANON_PAGES for now. */ - if (unlikely(PageHuge(page))) - return; - - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) - return; - - nr = nr_pages = thp_nr_pages(page); - __mod_lruvec_page_state(page, NR_ANON_THPS, -nr); - - if (head_subpages_mapcount(page)) { - nr = nr_subpages_unmapped(page, nr_pages); - - /* - * Queue the page for deferred split if at least one small - * page of the compound page is unmapped, but at least one - * small page is still mapped. - */ - if (nr && nr < nr_pages) - deferred_split_huge_page(page); - } - - if (nr) - __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr); -} - /** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from @@ -1439,46 +1455,73 @@ static void page_remove_anon_compound_rmap(struct page *page) void page_remove_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { - bool pmd_mapped = false; + struct compound_mapcounts mapcounts; + int nr = 0, nr_pmdmapped = 0; + bool last; + + VM_BUG_ON_PAGE(compound && !PageHead(page), page); + + /* Hugetlb pages are not counted in NR_*MAPPED */ + if (unlikely(PageHuge(page))) { + /* hugetlb pages are always mapped with pmds */ + atomic_dec(compound_mapcount_ptr(page)); + return; + } lock_page_memcg(page); - if (!PageAnon(page)) { - page_remove_file_rmap(page, compound); - goto out; - } + /* page still mapped by someone else? */ + if (compound && PageTransHuge(page)) { + lock_compound_mapcounts(page, &mapcounts); + mapcounts.compound_mapcount--; + last = !mapcounts.compound_mapcount; + if (last) { + nr = nr_pmdmapped = thp_nr_pages(page); + if (mapcounts.subpages_mapcount) + nr = nr_subpages_unmapped(page, nr_pmdmapped); + } + unlock_compound_mapcounts(page, &mapcounts); - if (compound) { - page_remove_anon_compound_rmap(page); - goto out; - } - - if (PageTransCompound(page)) { + } else if (PageCompound(page)) { struct page *head = compound_head(page); - atomic_dec(subpages_mapcount_ptr(head)); - pmd_mapped = head_compound_mapcount(head); + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount--; + last = subpage_mapcount_dec(page); + nr = last && !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + + } else { + last = atomic_add_negative(-1, &page->_mapcount); + nr = last; } - /* page still mapped by someone else? */ - if (!atomic_add_negative(-1, &page->_mapcount) || pmd_mapped) - goto out; - - __dec_lruvec_page_state(page, NR_ANON_MAPPED); - - if (PageTransCompound(page)) - deferred_split_huge_page(compound_head(page)); + if (nr_pmdmapped) { + __mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_THPS : + (PageSwapBacked(page) ? NR_SHMEM_PMDMAPPED : + NR_FILE_PMDMAPPED), -nr_pmdmapped); + } + if (nr) { + __mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_MAPPED : + NR_FILE_MAPPED, -nr); + /* + * Queue anon THP for deferred split if at least one small + * page of the compound page is unmapped, but at least one + * small page is still mapped. + */ + if (PageTransCompound(page) && PageAnon(page)) + if (!compound || nr < nr_pmdmapped) + deferred_split_huge_page(compound_head(page)); + } /* - * It would be tidy to reset the PageAnon mapping here, + * It would be tidy to reset PageAnon mapping when fully unmapped, * but that might overwrite a racing page_add_anon_rmap * which increments mapcount after us but sets mapping - * before us: so leave the reset to free_unref_page, + * before us: so leave the reset to free_pages_prepare, * and remember that it's only reliable while mapped. - * Leaving it set also helps swapoff to reinstate ptes - * faster for those pages still in swapcache. */ -out: + unlock_page_memcg(page); munlock_vma_page(page, vma, compound); From d8dd5e979d09c7463618853fb4aedd88e3efc8ae Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 9 Nov 2022 18:18:49 -0800 Subject: [PATCH 3007/4122] mm,thp,rmap: handle the normal !PageCompound case first Commit ("mm,thp,rmap: lock_compound_mapcounts() on THP mapcounts") propagated the "if (compound) {lock} else if (PageCompound) {lock} else {atomic}" pattern throughout; but Linus hated the way that gives primacy to the uncommon case: switch to "if (!PageCompound) {atomic} else if (compound) {lock} else {lock}" throughout. Linus has a bigger idea for how to improve it all, but here just make that rearrangement. Link: https://lkml.kernel.org/r/fca2f694-2098-b0ef-d4e-f1d8b94d318c@google.com Signed-off-by: Hugh Dickins Cc: David Hildenbrand Cc: James Houghton Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- mm/rmap.c | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 512e53cae2ca..4833d28c5e1a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1311,7 +1311,11 @@ void page_add_anon_rmap(struct page *page, else VM_BUG_ON_PAGE(!PageLocked(page), page); - if (compound && PageTransHuge(page)) { + if (likely(!PageCompound(page))) { + first = atomic_inc_and_test(&page->_mapcount); + nr = first; + + } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); first = !mapcounts.compound_mapcount; mapcounts.compound_mapcount++; @@ -1321,8 +1325,7 @@ void page_add_anon_rmap(struct page *page, nr = nr_subpages_unmapped(page, nr_pmdmapped); } unlock_compound_mapcounts(page, &mapcounts); - - } else if (PageCompound(page)) { + } else { struct page *head = compound_head(page); lock_compound_mapcounts(head, &mapcounts); @@ -1330,10 +1333,6 @@ void page_add_anon_rmap(struct page *page, first = subpage_mapcount_inc(page); nr = first && !mapcounts.compound_mapcount; unlock_compound_mapcounts(head, &mapcounts); - - } else { - first = atomic_inc_and_test(&page->_mapcount); - nr = first; } VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); @@ -1373,20 +1372,23 @@ void page_add_anon_rmap(struct page *page, void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - const bool compound = PageCompound(page); - int nr = compound ? thp_nr_pages(page) : 1; + int nr; VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); __SetPageSwapBacked(page); - if (compound) { + + if (likely(!PageCompound(page))) { + /* increment count (starts at -1) */ + atomic_set(&page->_mapcount, 0); + nr = 1; + } else { VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); + nr = thp_nr_pages(page); __mod_lruvec_page_state(page, NR_ANON_THPS, nr); - } else { - /* increment count (starts at -1) */ - atomic_set(&page->_mapcount, 0); } + __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); __page_set_anon_rmap(page, vma, address, 1); } @@ -1409,7 +1411,11 @@ void page_add_file_rmap(struct page *page, VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); - if (compound && PageTransHuge(page)) { + if (likely(!PageCompound(page))) { + first = atomic_inc_and_test(&page->_mapcount); + nr = first; + + } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); first = !mapcounts.compound_mapcount; mapcounts.compound_mapcount++; @@ -1419,8 +1425,7 @@ void page_add_file_rmap(struct page *page, nr = nr_subpages_unmapped(page, nr_pmdmapped); } unlock_compound_mapcounts(page, &mapcounts); - - } else if (PageCompound(page)) { + } else { struct page *head = compound_head(page); lock_compound_mapcounts(head, &mapcounts); @@ -1428,10 +1433,6 @@ void page_add_file_rmap(struct page *page, first = subpage_mapcount_inc(page); nr = first && !mapcounts.compound_mapcount; unlock_compound_mapcounts(head, &mapcounts); - - } else { - first = atomic_inc_and_test(&page->_mapcount); - nr = first; } if (nr_pmdmapped) @@ -1471,7 +1472,11 @@ void page_remove_rmap(struct page *page, lock_page_memcg(page); /* page still mapped by someone else? */ - if (compound && PageTransHuge(page)) { + if (likely(!PageCompound(page))) { + last = atomic_add_negative(-1, &page->_mapcount); + nr = last; + + } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); mapcounts.compound_mapcount--; last = !mapcounts.compound_mapcount; @@ -1481,8 +1486,7 @@ void page_remove_rmap(struct page *page, nr = nr_subpages_unmapped(page, nr_pmdmapped); } unlock_compound_mapcounts(page, &mapcounts); - - } else if (PageCompound(page)) { + } else { struct page *head = compound_head(page); lock_compound_mapcounts(head, &mapcounts); @@ -1490,10 +1494,6 @@ void page_remove_rmap(struct page *page, last = subpage_mapcount_dec(page); nr = last && !mapcounts.compound_mapcount; unlock_compound_mapcounts(head, &mapcounts); - - } else { - last = atomic_add_negative(-1, &page->_mapcount); - nr = last; } if (nr_pmdmapped) { From d7ec8f421ade2817983963a106b0085cc478c17b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 7 Nov 2022 16:50:01 +0000 Subject: [PATCH 3008/4122] selftests/damon: test non-context inputs to rm_contexts file There was a bug[1] that triggered by writing non-context DAMON debugfs file names to the 'rm_contexts' DAMON debugfs file. Add a selftest for the bug to avoid it happen again. [1] https://lore.kernel.org/damon/000000000000ede3ac05ec4abf8e@google.com/ Link: https://lkml.kernel.org/r/20221107165001.5717-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + .../damon/debugfs_rm_non_contexts.sh | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tools/testing/selftests/damon/debugfs_rm_non_contexts.sh diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index af490acc5348..838a8e49f77b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,6 +7,7 @@ TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh +TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += sysfs.sh TEST_PROGS += reclaim.sh lru_sort.sh diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh new file mode 100644 index 000000000000..48b7af6b022c --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test putting non-ctx files/dirs to rm_contexts file +# =================================================== + +dmesg -C + +for file in "$DBGFS/"* +do + echo "$(basename "$f")" > "$DBGFS/rm_contexts" + if dmesg | grep -q BUG + then + dmesg + exit 1 + fi +done From 11aad2631bf74b3c811dee76154702aab855a323 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Mon, 7 Nov 2022 15:39:22 +0000 Subject: [PATCH 3009/4122] mm/hugetlb_vmemmap: remap head page to newly allocated page Today with `hugetlb_free_vmemmap=on` the struct page memory that is freed back to page allocator is as following: for a 2M hugetlb page it will reuse the first 4K vmemmap page to remap the remaining 7 vmemmap pages, and for a 1G hugetlb it will remap the remaining 4095 vmemmap pages. Essentially, that means that it breaks the first 4K of a potentially contiguous chunk of memory of 32K (for 2M hugetlb pages) or 16M (for 1G hugetlb pages). For this reason the memory that it's free back to page allocator cannot be used for hugetlb to allocate huge pages of the same size, but rather only of a smaller huge page size: Trying to assign a 64G node to hugetlb (on a 128G 2node guest, each node having 64G): * Before allocation: Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 ... Node 0, zone Normal, type Movable 340 100 32 15 1 2 0 0 0 1 15558 $ echo 32768 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages $ cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages 31987 * After: Node 0, zone Normal, type Movable 30893 32006 31515 7 0 0 0 0 0 0 0 Notice how the memory freed back are put back into 4K / 8K / 16K page pools. And it allocates a total of 31987 pages (63974M). To fix this behaviour rather than remapping second vmemmap page (thus breaking the contiguous block of memory backing the struct pages) repopulate the first vmemmap page with a new one. We allocate and copy from the currently mapped vmemmap page, and then remap it later on. The same algorithm works if there's a pre initialized walk::reuse_page and the head page doesn't need to be skipped and instead we remap it when the @addr being changed is the @reuse_addr. The new head page is allocated in vmemmap_remap_free() given that on restore there's no need for functional change. Note that, because right now one hugepage is remapped at a time, thus only one free 4K page at a time is needed to remap the head page. Should it fail to allocate said new page, it reuses the one that's already mapped just like before. As a result, for every 64G of contiguous hugepages it can give back 1G more of contiguous memory per 64G, while needing in total 128M new 4K pages (for 2M hugetlb) or 256k (for 1G hugetlb). After the changes, try to assign a 64G node to hugetlb (on a 128G 2node guest, each node with 64G): * Before allocation Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 ... Node 0, zone Normal, type Movable 1 1 1 0 0 1 0 0 1 1 15564 $ echo 32768 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages $ cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages 32394 * After: Node 0, zone Normal, type Movable 0 50 97 108 96 81 70 46 18 0 0 In the example above, 407 more hugeltb 2M pages are allocated i.e. 814M out of the 32394 (64788M) allocated. So the memory freed back is indeed being used back in hugetlb and there's no massive order-0..order-2 pages accumulated unused. [joao.m.martins@oracle.com: v3] Link: https://lkml.kernel.org/r/20221109200623.96867-1-joao.m.martins@oracle.com [joao.m.martins@oracle.com: add smp_wmb() to ensure page contents are visible prior to PTE write] Link: https://lkml.kernel.org/r/20221110121214.6297-1-joao.m.martins@oracle.com Link: https://lkml.kernel.org/r/20221107153922.77094-1-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Muchun Song Cc: Mike Kravetz Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 7898c2c75e35..45e93a545dd7 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -203,12 +203,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, return ret; } while (pgd++, addr = next, addr != end); - /* - * We only change the mapping of the vmemmap virtual address range - * [@start + PAGE_SIZE, end), so we only need to flush the TLB which - * belongs to the range. - */ - flush_tlb_kernel_range(start + PAGE_SIZE, end); + flush_tlb_kernel_range(start, end); return 0; } @@ -244,9 +239,23 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, * to the tail pages. */ pgprot_t pgprot = PAGE_KERNEL_RO; - pte_t entry = mk_pte(walk->reuse_page, pgprot); struct page *page = pte_page(*pte); + pte_t entry; + /* Remapping the head page requires r/w */ + if (unlikely(addr == walk->reuse_addr)) { + pgprot = PAGE_KERNEL; + list_del(&walk->reuse_page->lru); + + /* + * Makes sure that preceding stores to the page contents from + * vmemmap_remap_free() become visible before the set_pte_at() + * write. + */ + smp_wmb(); + } + + entry = mk_pte(walk->reuse_page, pgprot); list_add_tail(&page->lru, walk->vmemmap_pages); set_pte_at(&init_mm, addr, pte, entry); } @@ -315,6 +324,24 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, }; + int nid = page_to_nid((struct page *)start); + gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY | + __GFP_NOWARN; + + /* + * Allocate a new head vmemmap page to avoid breaking a contiguous + * block of struct page memory when freeing it back to page allocator + * in free_vmemmap_page_list(). This will allow the likely contiguous + * struct page backing memory to be kept contiguous and allowing for + * more allocations of hugepages. Fallback to the currently + * mapped head page in case should it fail to allocate. + */ + walk.reuse_page = alloc_pages_node(nid, gfp_mask, 0); + if (walk.reuse_page) { + copy_page(page_to_virt(walk.reuse_page), + (void *)walk.reuse_addr); + list_add(&walk.reuse_page->lru, &vmemmap_pages); + } /* * In order to make remapping routine most efficient for the huge pages, From be5ef2d9b006bbd93b1a03e1da2dbd19fb0b9f14 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Nov 2022 01:42:04 -0800 Subject: [PATCH 3010/4122] mm,thp,rmap: subpages_mapcount of PTE-mapped subpages Patch series "mm,thp,rmap: rework the use of subpages_mapcount", v2. This patch (of 3): Following suggestion from Linus, instead of counting every PTE map of a compound page in subpages_mapcount, just count how many of its subpages are PTE-mapped: this yields the exact number needed for NR_ANON_MAPPED and NR_FILE_MAPPED stats, without any need for a locked scan of subpages; and requires updating the count less often. This does then revert total_mapcount() and folio_mapcount() to needing a scan of subpages; but they are inherently racy, and need no locking, so Linus is right that the scans are much better done there. Plus (unlike in 6.1 and previous) subpages_mapcount lets us avoid the scan in the common case of no PTE maps. And page_mapped() and folio_mapped() remain scanless and just as efficient with the new meaning of subpages_mapcount: those are the functions which I most wanted to remove the scan from. The updated page_dup_compound_rmap() is no longer suitable for use by anon THP's __split_huge_pmd_locked(); but page_add_anon_rmap() can be used for that, so long as its VM_BUG_ON_PAGE(!PageLocked) is deleted. Evidence is that this way goes slightly faster than the previous implementation for most cases; but significantly faster in the (now scanless) pmds after ptes case, which started out at 870ms and was brought down to 495ms by the previous series, now takes around 105ms. Link: https://lkml.kernel.org/r/a5849eca-22f1-3517-bf29-95d982242742@google.com Link: https://lkml.kernel.org/r/eec17e16-4e1-7c59-f1bc-5bca90dac919@google.com Signed-off-by: Hugh Dickins Suggested-by: Linus Torvalds Acked-by: Kirill A. Shutemov Cc: Dan Carpenter Cc: David Hildenbrand Cc: James Houghton Cc: Johannes Weiner Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Yu Zhao Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- Documentation/mm/transhuge.rst | 3 +- include/linux/mm.h | 52 ++++++----- include/linux/rmap.h | 9 +- mm/huge_memory.c | 2 +- mm/rmap.c | 160 ++++++++++++++------------------- 5 files changed, 107 insertions(+), 119 deletions(-) diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index 1e2a637cc607..af4c9d70321d 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -122,7 +122,8 @@ pages: - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount on relevant sub-page of the compound page, and also increment/decrement - ->subpages_mapcount, stored in first tail page of the compound page. + ->subpages_mapcount, stored in first tail page of the compound page, when + _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE. In order to have race-free accounting of sub-pages mapped, changes to sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are are all locked by bit_spin_lock of PG_locked in the first tail ->flags. diff --git a/include/linux/mm.h b/include/linux/mm.h index a904c2d60f12..84fb91f6f56e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -837,7 +837,7 @@ static inline int head_compound_mapcount(struct page *head) } /* - * Sum of mapcounts of sub-pages, does not include compound mapcount. + * Number of sub-pages mapped by PTE, does not include compound mapcount. * Must be called only on head of compound page. */ static inline int head_subpages_mapcount(struct page *head) @@ -873,23 +873,7 @@ static inline int page_mapcount(struct page *page) return head_compound_mapcount(page) + mapcount; } -static inline int total_mapcount(struct page *page) -{ - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) + 1; - page = compound_head(page); - return head_compound_mapcount(page) + head_subpages_mapcount(page); -} - -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any subpage of compound page is mapped, - * even if this particular subpage is not itself mapped by any PTE or PMD. - */ -static inline bool page_mapped(struct page *page) -{ - return total_mapcount(page) > 0; -} +int total_compound_mapcount(struct page *head); /** * folio_mapcount() - Calculate the number of mappings of this folio. @@ -906,8 +890,20 @@ static inline int folio_mapcount(struct folio *folio) { if (likely(!folio_test_large(folio))) return atomic_read(&folio->_mapcount) + 1; - return atomic_read(folio_mapcount_ptr(folio)) + 1 + - atomic_read(folio_subpages_mapcount_ptr(folio)); + return total_compound_mapcount(&folio->page); +} + +static inline int total_mapcount(struct page *page) +{ + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) + 1; + return total_compound_mapcount(compound_head(page)); +} + +static inline bool folio_large_is_mapped(struct folio *folio) +{ + return atomic_read(folio_mapcount_ptr(folio)) + + atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0; } /** @@ -918,7 +914,21 @@ static inline int folio_mapcount(struct folio *folio) */ static inline bool folio_mapped(struct folio *folio) { - return folio_mapcount(folio) > 0; + if (likely(!folio_test_large(folio))) + return atomic_read(&folio->_mapcount) >= 0; + return folio_large_is_mapped(folio); +} + +/* + * Return true if this page is mapped into pagetables. + * For compound page it returns true if any sub-page of compound page is mapped, + * even if this particular sub-page is not itself mapped by any PTE or PMD. + */ +static inline bool page_mapped(struct page *page) +{ + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) >= 0; + return folio_large_is_mapped(page_folio(page)); } static inline struct page *virt_to_head_page(const void *x) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 011a7530dc76..5dadb9a3e010 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -204,14 +204,15 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address); -void page_dup_compound_rmap(struct page *page, bool compound); +void page_dup_compound_rmap(struct page *page); static inline void page_dup_file_rmap(struct page *page, bool compound) { - if (PageCompound(page)) - page_dup_compound_rmap(page, compound); - else + /* Is page being mapped by PTE? */ + if (likely(!compound)) atomic_inc(&page->_mapcount); + else + page_dup_compound_rmap(page); } /** diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 114517e8cbfc..681253adf529 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2221,7 +2221,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); if (!pmd_migration) - page_dup_compound_rmap(page + i, false); + page_add_anon_rmap(page + i, vma, addr, false); pte_unmap(pte); } diff --git a/mm/rmap.c b/mm/rmap.c index 4833d28c5e1a..e813785da613 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1117,55 +1117,36 @@ static void unlock_compound_mapcounts(struct page *head, bit_spin_unlock(PG_locked, &head[1].flags); } -/* - * When acting on a compound page under lock_compound_mapcounts(), avoid the - * unnecessary overhead of an actual atomic operation on its subpage mapcount. - * Return true if this is the first increment or the last decrement - * (remembering that page->_mapcount -1 represents logical mapcount 0). - */ -static bool subpage_mapcount_inc(struct page *page) +int total_compound_mapcount(struct page *head) { - int orig_mapcount = atomic_read(&page->_mapcount); - - atomic_set(&page->_mapcount, orig_mapcount + 1); - return orig_mapcount < 0; -} - -static bool subpage_mapcount_dec(struct page *page) -{ - int orig_mapcount = atomic_read(&page->_mapcount); - - atomic_set(&page->_mapcount, orig_mapcount - 1); - return orig_mapcount == 0; -} - -/* - * When mapping a THP's first pmd, or unmapping its last pmd, if that THP - * also has pte mappings, then those must be discounted: in order to maintain - * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift, - * and to decide when an anon THP should be put on the deferred split queue. - * This function must be called between lock_ and unlock_compound_mapcounts(). - */ -static int nr_subpages_unmapped(struct page *head, int nr_subpages) -{ - int nr = nr_subpages; + int mapcount = head_compound_mapcount(head); + int nr_subpages; int i; - /* Discount those subpages mapped by pte */ + /* In the common case, avoid the loop when no subpages mapped by PTE */ + if (head_subpages_mapcount(head) == 0) + return mapcount; + /* + * Add all the PTE mappings of those subpages mapped by PTE. + * Limit the loop, knowing that only subpages_mapcount are mapped? + * Perhaps: given all the raciness, that may be a good or a bad idea. + */ + nr_subpages = thp_nr_pages(head); for (i = 0; i < nr_subpages; i++) - if (atomic_read(&head[i]._mapcount) >= 0) - nr--; - return nr; + mapcount += atomic_read(&head[i]._mapcount); + + /* But each of those _mapcounts was based on -1 */ + mapcount += nr_subpages; + return mapcount; } /* - * page_dup_compound_rmap(), used when copying mm, or when splitting pmd, + * page_dup_compound_rmap(), used when copying mm, * provides a simple example of using lock_ and unlock_compound_mapcounts(). */ -void page_dup_compound_rmap(struct page *page, bool compound) +void page_dup_compound_rmap(struct page *head) { struct compound_mapcounts mapcounts; - struct page *head; /* * Hugetlb pages could use lock_compound_mapcounts(), like THPs do; @@ -1176,20 +1157,15 @@ void page_dup_compound_rmap(struct page *page, bool compound) * Note that hugetlb does not call page_add_file_rmap(): * here is where hugetlb shared page mapcount is raised. */ - if (PageHuge(page)) { - atomic_inc(compound_mapcount_ptr(page)); - return; - } + if (PageHuge(head)) { + atomic_inc(compound_mapcount_ptr(head)); + } else if (PageTransHuge(head)) { + /* That test is redundant: it's for safety or to optimize out */ - head = compound_head(page); - lock_compound_mapcounts(head, &mapcounts); - if (compound) { + lock_compound_mapcounts(head, &mapcounts); mapcounts.compound_mapcount++; - } else { - mapcounts.subpages_mapcount++; - subpage_mapcount_inc(page); + unlock_compound_mapcounts(head, &mapcounts); } - unlock_compound_mapcounts(head, &mapcounts); } /** @@ -1304,35 +1280,34 @@ void page_add_anon_rmap(struct page *page, struct compound_mapcounts mapcounts; int nr = 0, nr_pmdmapped = 0; bool compound = flags & RMAP_COMPOUND; - bool first; + bool first = true; if (unlikely(PageKsm(page))) lock_page_memcg(page); - else - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (likely(!PageCompound(page))) { + /* Is page being mapped by PTE? Is this its first map to be added? */ + if (likely(!compound)) { first = atomic_inc_and_test(&page->_mapcount); nr = first; + if (first && PageCompound(page)) { + struct page *head = compound_head(page); + + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount++; + nr = !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + } + } else if (PageTransHuge(page)) { + /* That test is redundant: it's for safety or to optimize out */ - } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); first = !mapcounts.compound_mapcount; mapcounts.compound_mapcount++; if (first) { - nr = nr_pmdmapped = thp_nr_pages(page); - if (mapcounts.subpages_mapcount) - nr = nr_subpages_unmapped(page, nr_pmdmapped); + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - mapcounts.subpages_mapcount; } unlock_compound_mapcounts(page, &mapcounts); - } else { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount++; - first = subpage_mapcount_inc(page); - nr = first && !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); } VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); @@ -1411,28 +1386,29 @@ void page_add_file_rmap(struct page *page, VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); - if (likely(!PageCompound(page))) { + /* Is page being mapped by PTE? Is this its first map to be added? */ + if (likely(!compound)) { first = atomic_inc_and_test(&page->_mapcount); nr = first; + if (first && PageCompound(page)) { + struct page *head = compound_head(page); + + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount++; + nr = !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + } + } else if (PageTransHuge(page)) { + /* That test is redundant: it's for safety or to optimize out */ - } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); first = !mapcounts.compound_mapcount; mapcounts.compound_mapcount++; if (first) { - nr = nr_pmdmapped = thp_nr_pages(page); - if (mapcounts.subpages_mapcount) - nr = nr_subpages_unmapped(page, nr_pmdmapped); + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - mapcounts.subpages_mapcount; } unlock_compound_mapcounts(page, &mapcounts); - } else { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount++; - first = subpage_mapcount_inc(page); - nr = first && !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); } if (nr_pmdmapped) @@ -1471,29 +1447,29 @@ void page_remove_rmap(struct page *page, lock_page_memcg(page); - /* page still mapped by someone else? */ - if (likely(!PageCompound(page))) { + /* Is page being unmapped by PTE? Is this its last map to be removed? */ + if (likely(!compound)) { last = atomic_add_negative(-1, &page->_mapcount); nr = last; + if (last && PageCompound(page)) { + struct page *head = compound_head(page); + + lock_compound_mapcounts(head, &mapcounts); + mapcounts.subpages_mapcount--; + nr = !mapcounts.compound_mapcount; + unlock_compound_mapcounts(head, &mapcounts); + } + } else if (PageTransHuge(page)) { + /* That test is redundant: it's for safety or to optimize out */ - } else if (compound && PageTransHuge(page)) { lock_compound_mapcounts(page, &mapcounts); mapcounts.compound_mapcount--; last = !mapcounts.compound_mapcount; if (last) { - nr = nr_pmdmapped = thp_nr_pages(page); - if (mapcounts.subpages_mapcount) - nr = nr_subpages_unmapped(page, nr_pmdmapped); + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - mapcounts.subpages_mapcount; } unlock_compound_mapcounts(page, &mapcounts); - } else { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount--; - last = subpage_mapcount_dec(page); - nr = last && !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); } if (nr_pmdmapped) { From 4b51634cd16a01b2be0f6b69cc0dae63de4751f2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Nov 2022 01:49:36 -0800 Subject: [PATCH 3011/4122] mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if PMD-mapped Can the lock_compound_mapcount() bit_spin_lock apparatus be removed now? Yes. Not by atomic64_t or cmpxchg games, those get difficult on 32-bit; but if we slightly abuse subpages_mapcount by additionally demanding that one bit be set there when the compound page is PMD-mapped, then a cascade of two atomic ops is able to maintain the stats without bit_spin_lock. This is harder to reason about than when bit_spin_locked, but I believe safe; and no drift in stats detected when testing. When there are racing removes and adds, of course the sequence of operations is less well- defined; but each operation on subpages_mapcount is atomically good. What might be disastrous, is if subpages_mapcount could ever fleetingly appear negative: but the pte lock (or pmd lock) these rmap functions are called under, ensures that a last remove cannot race ahead of a first add. Continue to make an exception for hugetlb (PageHuge) pages, though that exception can be easily removed by a further commit if necessary: leave subpages_mapcount 0, don't bother with COMPOUND_MAPPED in its case, just carry on checking compound_mapcount too in folio_mapped(), page_mapped(). Evidence is that this way goes slightly faster than the previous implementation in all cases (pmds after ptes now taking around 103ms); and relieves us of worrying about contention on the bit_spin_lock. Link: https://lkml.kernel.org/r/3978f3ca-5473-55a7-4e14-efea5968d892@google.com Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Dan Carpenter Cc: David Hildenbrand Cc: James Houghton Cc: Johannes Weiner Cc: John Hubbard Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Yu Zhao Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- Documentation/mm/transhuge.rst | 7 +- include/linux/mm.h | 19 +++++- include/linux/rmap.h | 13 ++-- mm/page_alloc.c | 2 +- mm/rmap.c | 121 +++++++-------------------------- 5 files changed, 51 insertions(+), 111 deletions(-) diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index af4c9d70321d..ec3dc5b04226 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -118,15 +118,14 @@ pages: succeeds on tail pages. - map/unmap of PMD entry for the whole compound page increment/decrement - ->compound_mapcount, stored in the first tail page of the compound page. + ->compound_mapcount, stored in the first tail page of the compound page; + and also increment/decrement ->subpages_mapcount (also in the first tail) + by COMPOUND_MAPPED when compound_mapcount goes from -1 to 0 or 0 to -1. - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount on relevant sub-page of the compound page, and also increment/decrement ->subpages_mapcount, stored in first tail page of the compound page, when _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE. - In order to have race-free accounting of sub-pages mapped, changes to - sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are - are all locked by bit_spin_lock of PG_locked in the first tail ->flags. split_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page diff --git a/include/linux/mm.h b/include/linux/mm.h index 84fb91f6f56e..d33639be3db3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -836,13 +836,22 @@ static inline int head_compound_mapcount(struct page *head) return atomic_read(compound_mapcount_ptr(head)) + 1; } +/* + * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages, + * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit + * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently + * leaves subpages_mapcount at 0, but avoid surprise if it participates later. + */ +#define COMPOUND_MAPPED 0x800000 +#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1) + /* * Number of sub-pages mapped by PTE, does not include compound mapcount. * Must be called only on head of compound page. */ static inline int head_subpages_mapcount(struct page *head) { - return atomic_read(subpages_mapcount_ptr(head)); + return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED; } /* @@ -902,8 +911,12 @@ static inline int total_mapcount(struct page *page) static inline bool folio_large_is_mapped(struct folio *folio) { - return atomic_read(folio_mapcount_ptr(folio)) + - atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0; + /* + * Reading folio_mapcount_ptr() below could be omitted if hugetlb + * participated in incrementing subpages_mapcount when compound mapped. + */ + return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 || + atomic_read(folio_mapcount_ptr(folio)) >= 0; } /** diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 5dadb9a3e010..bd3504d11b15 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -204,15 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address); -void page_dup_compound_rmap(struct page *page); +static inline void __page_dup_rmap(struct page *page, bool compound) +{ + atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); +} static inline void page_dup_file_rmap(struct page *page, bool compound) { - /* Is page being mapped by PTE? */ - if (likely(!compound)) - atomic_inc(&page->_mapcount); - else - page_dup_compound_rmap(page); + __page_dup_rmap(page, compound); } /** @@ -261,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, * the page R/O into both processes. */ dup: - page_dup_file_rmap(page, compound); + __page_dup_rmap(page, compound); return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0705917ddf54..c33b6963c2d7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1330,7 +1330,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) bad_page(page, "nonzero compound_mapcount"); goto out; } - if (unlikely(head_subpages_mapcount(head_page))) { + if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) { bad_page(page, "nonzero subpages_mapcount"); goto out; } diff --git a/mm/rmap.c b/mm/rmap.c index e813785da613..459dc1c44d8a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1085,38 +1085,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, return page_vma_mkclean_one(&pvmw); } -struct compound_mapcounts { - unsigned int compound_mapcount; - unsigned int subpages_mapcount; -}; - -/* - * lock_compound_mapcounts() first locks, then copies subpages_mapcount and - * compound_mapcount from head[1].compound_mapcount and subpages_mapcount, - * converting from struct page's internal representation to logical count - * (that is, adding 1 to compound_mapcount to hide its offset by -1). - */ -static void lock_compound_mapcounts(struct page *head, - struct compound_mapcounts *local) -{ - bit_spin_lock(PG_locked, &head[1].flags); - local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1; - local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head)); -} - -/* - * After caller has updated subpage._mapcount, local subpages_mapcount and - * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts - * and copies them back to the compound head[1] fields, and then unlocks. - */ -static void unlock_compound_mapcounts(struct page *head, - struct compound_mapcounts *local) -{ - atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1); - atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount); - bit_spin_unlock(PG_locked, &head[1].flags); -} - int total_compound_mapcount(struct page *head) { int mapcount = head_compound_mapcount(head); @@ -1140,34 +1108,6 @@ int total_compound_mapcount(struct page *head) return mapcount; } -/* - * page_dup_compound_rmap(), used when copying mm, - * provides a simple example of using lock_ and unlock_compound_mapcounts(). - */ -void page_dup_compound_rmap(struct page *head) -{ - struct compound_mapcounts mapcounts; - - /* - * Hugetlb pages could use lock_compound_mapcounts(), like THPs do; - * but at present they are still being managed by atomic operations: - * which are likely to be somewhat faster, so don't rush to convert - * them over without evaluating the effect. - * - * Note that hugetlb does not call page_add_file_rmap(): - * here is where hugetlb shared page mapcount is raised. - */ - if (PageHuge(head)) { - atomic_inc(compound_mapcount_ptr(head)); - } else if (PageTransHuge(head)) { - /* That test is redundant: it's for safety or to optimize out */ - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.compound_mapcount++; - unlock_compound_mapcounts(head, &mapcounts); - } -} - /** * page_move_anon_rmap - move a page to our anon_vma * @page: the page to move to our anon_vma @@ -1277,7 +1217,7 @@ static void __page_check_anon_rmap(struct page *page, void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, rmap_t flags) { - struct compound_mapcounts mapcounts; + atomic_t *mapped; int nr = 0, nr_pmdmapped = 0; bool compound = flags & RMAP_COMPOUND; bool first = true; @@ -1290,24 +1230,20 @@ void page_add_anon_rmap(struct page *page, first = atomic_inc_and_test(&page->_mapcount); nr = first; if (first && PageCompound(page)) { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount++; - nr = !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); + mapped = subpages_mapcount_ptr(compound_head(page)); + nr = atomic_inc_return_relaxed(mapped); + nr = !(nr & COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ - lock_compound_mapcounts(page, &mapcounts); - first = !mapcounts.compound_mapcount; - mapcounts.compound_mapcount++; + first = atomic_inc_and_test(compound_mapcount_ptr(page)); if (first) { + mapped = subpages_mapcount_ptr(page); + nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - mapcounts.subpages_mapcount; + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); } - unlock_compound_mapcounts(page, &mapcounts); } VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); @@ -1360,6 +1296,7 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); + atomic_set(subpages_mapcount_ptr(page), COMPOUND_MAPPED); nr = thp_nr_pages(page); __mod_lruvec_page_state(page, NR_ANON_THPS, nr); } @@ -1379,7 +1316,7 @@ void page_add_new_anon_rmap(struct page *page, void page_add_file_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { - struct compound_mapcounts mapcounts; + atomic_t *mapped; int nr = 0, nr_pmdmapped = 0; bool first; @@ -1391,24 +1328,20 @@ void page_add_file_rmap(struct page *page, first = atomic_inc_and_test(&page->_mapcount); nr = first; if (first && PageCompound(page)) { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount++; - nr = !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); + mapped = subpages_mapcount_ptr(compound_head(page)); + nr = atomic_inc_return_relaxed(mapped); + nr = !(nr & COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ - lock_compound_mapcounts(page, &mapcounts); - first = !mapcounts.compound_mapcount; - mapcounts.compound_mapcount++; + first = atomic_inc_and_test(compound_mapcount_ptr(page)); if (first) { + mapped = subpages_mapcount_ptr(page); + nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - mapcounts.subpages_mapcount; + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); } - unlock_compound_mapcounts(page, &mapcounts); } if (nr_pmdmapped) @@ -1432,7 +1365,7 @@ void page_add_file_rmap(struct page *page, void page_remove_rmap(struct page *page, struct vm_area_struct *vma, bool compound) { - struct compound_mapcounts mapcounts; + atomic_t *mapped; int nr = 0, nr_pmdmapped = 0; bool last; @@ -1452,24 +1385,20 @@ void page_remove_rmap(struct page *page, last = atomic_add_negative(-1, &page->_mapcount); nr = last; if (last && PageCompound(page)) { - struct page *head = compound_head(page); - - lock_compound_mapcounts(head, &mapcounts); - mapcounts.subpages_mapcount--; - nr = !mapcounts.compound_mapcount; - unlock_compound_mapcounts(head, &mapcounts); + mapped = subpages_mapcount_ptr(compound_head(page)); + nr = atomic_dec_return_relaxed(mapped); + nr = !(nr & COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ - lock_compound_mapcounts(page, &mapcounts); - mapcounts.compound_mapcount--; - last = !mapcounts.compound_mapcount; + last = atomic_add_negative(-1, compound_mapcount_ptr(page)); if (last) { + mapped = subpages_mapcount_ptr(page); + nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - mapcounts.subpages_mapcount; + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); } - unlock_compound_mapcounts(page, &mapcounts); } if (nr_pmdmapped) { From 96d82deb743ab42c8f0b911eb49db83f0e6db311 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Nov 2022 01:51:50 -0800 Subject: [PATCH 3012/4122] mm,thp,rmap: clean up the end of __split_huge_pmd_locked() It's hard to add a page_add_anon_rmap() into __split_huge_pmd_locked()'s HPAGE_PMD_NR set_pte_at() loop, without wincing at the "freeze" case's HPAGE_PMD_NR page_remove_rmap() loop below it. It's just a mistake to add rmaps in the "freeze" (insert migration entries prior to splitting huge page) case: the pmd_migration case already avoids doing that, so just follow its lead. page_add_ref() versus put_page() likewise. But why is one more put_page() needed in the "freeze" case? Because it's removing the pmd rmap, already removed when pmd_migration (and freeze and pmd_migration are mutually exclusive cases). Link: https://lkml.kernel.org/r/d43748aa-fece-e0b9-c4ab-f23c9ebc9011@google.com Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Dan Carpenter Cc: David Hildenbrand Cc: James Houghton Cc: Johannes Weiner Cc: John Hubbard Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Yu Zhao Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- mm/huge_memory.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 681253adf529..aba340684285 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2141,7 +2141,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, uffd_wp = pmd_uffd_wp(old_pmd); VM_BUG_ON_PAGE(!page_count(page), page); - page_ref_add(page, HPAGE_PMD_NR - 1); /* * Without "freeze", we'll simply split the PMD, propagating the @@ -2161,6 +2160,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, anon_exclusive = PageAnon(page) && PageAnonExclusive(page); if (freeze && anon_exclusive && page_try_share_anon_rmap(page)) freeze = false; + if (!freeze) + page_ref_add(page, HPAGE_PMD_NR - 1); } /* @@ -2216,27 +2217,21 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_mksoft_dirty(entry); if (uffd_wp) entry = pte_mkuffd_wp(entry); + page_add_anon_rmap(page + i, vma, addr, false); } pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - if (!pmd_migration) - page_add_anon_rmap(page + i, vma, addr, false); pte_unmap(pte); } if (!pmd_migration) page_remove_rmap(page, vma, true); + if (freeze) + put_page(page); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); - - if (freeze) { - for (i = 0; i < HPAGE_PMD_NR; i++) { - page_remove_rmap(page + i, vma, false); - put_page(page + i); - } - } } void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, From 1a1af17ea81115914c8efc1177fd94719c84fc11 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Tue, 8 Nov 2022 19:53:48 +0800 Subject: [PATCH 3013/4122] tools/vm/page_owner: ignore page_owner_sort binary page_owner_sort was introduced since commit 48c96a368579 ("mm/page_owner: keep track of page owners"), and we should ignore it. Link: https://lkml.kernel.org/r/tencent_F6CAC0ABE16839E2B2419BD07316DA65BB06@qq.com Signed-off-by: Rong Tao Signed-off-by: Andrew Morton --- tools/vm/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore index 79bb92ae1bb3..922879f93fc8 100644 --- a/tools/vm/.gitignore +++ b/tools/vm/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only slabinfo page-types +page_owner_sort From d84887739d5c982afa50b155aad628bb8ff206c5 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 8 Nov 2022 18:46:46 +0100 Subject: [PATCH 3014/4122] mm/mprotect: allow clean exclusive anon pages to be writable Patch series "mm/autonuma: replace savedwrite infrastructure", v2. As discussed in my talk at LPC, we can reuse the same mechanism for deciding whether to map a pte writable when upgrading permissions via mprotect() -- e.g., PROT_READ -> PROT_READ|PROT_WRITE -- to replace the savedwrite infrastructure used for NUMA hinting faults (e.g., PROT_NONE -> PROT_READ|PROT_WRITE). Instead of maintaining previous write permissions for a pte/pmd, we re-determine if the pte/pmd can be writable. The big benefit is that we have a common logic for deciding whether we can map a pte/pmd writable on protection changes. For private mappings, there should be no difference -- from what I understand, that is what autonuma benchmarks care about. I ran autonumabench for v1 on a system with 2 NUMA nodes, 96 GiB each via: perf stat --null --repeat 10 The numa01 benchmark is quite noisy in my environment and I failed to reduce the noise so far. numa01: mm-unstable: 146.88 +- 6.54 seconds time elapsed ( +- 4.45% ) mm-unstable++: 147.45 +- 13.39 seconds time elapsed ( +- 9.08% ) numa02: mm-unstable: 16.0300 +- 0.0624 seconds time elapsed ( +- 0.39% ) mm-unstable++: 16.1281 +- 0.0945 seconds time elapsed ( +- 0.59% ) It is worth noting that for shared writable mappings that require writenotify, we will only avoid write faults if the pte/pmd is dirty (inherited from the older mprotect logic). If we ever care about optimizing that further, we'd need a different mechanism to identify whether the FS still needs to get notified on the next write access. In any case, such an optimization will then not be autonuma-specific, but mprotect() permission upgrades would similarly benefit from it. This patch (of 7): Anonymous pages might have the dirty bit clear, but this should not prevent mprotect from making them writable if they are exclusive. Therefore, skip the test whether the page is dirty in this case. Note that there are already other ways to get a writable PTE mapping an anonymous page that is clean: for example, via MADV_FREE. In an ideal world, we'd have a different indication from the FS whether writenotify is still required. [david@redhat.com: return directly; update description] Link: https://lkml.kernel.org/r/20221108174652.198904-1-david@redhat.com Link: https://lkml.kernel.org/r/20221108174652.198904-2-david@redhat.com Signed-off-by: Nadav Amit Signed-off-by: David Hildenbrand Cc: Linus Torvalds Cc: Mel Gorman Cc: Dave Chinner Cc: Peter Xu Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Vlastimil Babka Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Mike Rapoport Cc: Anshuman Khandual Signed-off-by: Andrew Morton --- mm/mprotect.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 8d770855b591..86a28c0e190f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -46,7 +46,7 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte)); - if (pte_protnone(pte) || !pte_dirty(pte)) + if (pte_protnone(pte)) return false; /* Do we need write faults for softdirty tracking? */ @@ -65,11 +65,10 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, * the PT lock. */ page = vm_normal_page(vma, addr, pte); - if (!page || !PageAnon(page) || !PageAnonExclusive(page)) - return false; + return page && PageAnon(page) && PageAnonExclusive(page); } - return true; + return pte_dirty(pte); } static unsigned long change_pte_range(struct mmu_gather *tlb, From 7ea7e333842ed50fe0a0b256c270b54f8ec2353e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:47 +0100 Subject: [PATCH 3015/4122] mm/mprotect: minor can_change_pte_writable() cleanups We want to replicate this code for handling PMDs soon. (1) No need to crash the kernel, warning and rejecting is good enough. As this will no longer get optimized out, drop the pte_write() check: no harm would be done. (2) Add a comment why PROT_NONE mapped pages are excluded. (3) Add a comment regarding MAP_SHARED handling and why we rely on the dirty bit in the PTE. Link: https://lkml.kernel.org/r/20221108174652.198904-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/mprotect.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 86a28c0e190f..72aabffb7871 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -44,8 +44,10 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, { struct page *page; - VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte)); + if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) + return false; + /* Don't touch entries that are not even readable. */ if (pte_protnone(pte)) return false; @@ -59,15 +61,22 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, if (!(vma->vm_flags & VM_SHARED)) { /* - * We can only special-case on exclusive anonymous pages, - * because we know that our write-fault handler similarly would - * map them writable without any additional checks while holding - * the PT lock. + * Writable MAP_PRIVATE mapping: We can only special-case on + * exclusive anonymous pages, because we know that our + * write-fault handler similarly would map them writable without + * any additional checks while holding the PT lock. */ page = vm_normal_page(vma, addr, pte); return page && PageAnon(page) && PageAnonExclusive(page); } + /* + * Writable MAP_SHARED mapping: "clean" might indicate that the FS still + * needs a real write-fault for writenotify + * (see vma_wants_writenotify()). If "dirty", the assumption is that the + * FS was already notified and we can simply mark the PTE writable + * just like the write-fault handler would do. + */ return pte_dirty(pte); } From c27f479ef5428f691787fb6fe3703a70e931ae8c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:48 +0100 Subject: [PATCH 3016/4122] mm/huge_memory: try avoiding write faults when changing PMD protection Let's replicate what we have for PTEs in can_change_pte_writable() also for PMDs. While this might look like a pure performance improvement, we'll us this to get rid of savedwrite handling in do_huge_pmd_numa_page() next. Place do_huge_pmd_numa_page() strategically good for that purpose. Note that MM_CP_TRY_CHANGE_WRITABLE is currently only set when we come via mprotect_fixup(). Link: https://lkml.kernel.org/r/20221108174652.198904-4-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/huge_memory.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index aba340684285..fac917b78102 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1390,6 +1390,36 @@ fallback: return VM_FAULT_FALLBACK; } +static inline bool can_change_pmd_writable(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd) +{ + struct page *page; + + if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) + return false; + + /* Don't touch entries that are not even readable (NUMA hinting). */ + if (pmd_protnone(pmd)) + return false; + + /* Do we need write faults for softdirty tracking? */ + if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd)) + return false; + + /* Do we need write faults for uffd-wp tracking? */ + if (userfaultfd_huge_pmd_wp(vma, pmd)) + return false; + + if (!(vma->vm_flags & VM_SHARED)) { + /* See can_change_pte_writable(). */ + page = vm_normal_page_pmd(vma, addr, pmd); + return page && PageAnon(page) && PageAnonExclusive(page); + } + + /* See can_change_pte_writable(). */ + return pmd_dirty(pmd); +} + /* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, struct vm_area_struct *vma, @@ -1893,13 +1923,17 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, */ entry = pmd_clear_uffd_wp(entry); } + + /* See change_pte_range(). */ + if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) && + can_change_pmd_writable(vma, addr, entry)) + entry = pmd_mkwrite(entry); + ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); if (huge_pmd_needs_flush(oldpmd, entry)) tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); - - BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); unlock: spin_unlock(ptl); return ret; From eb309ec89953d6a3e8e35a3a577bab13893858d8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:49 +0100 Subject: [PATCH 3017/4122] mm/mprotect: factor out check whether manual PTE write upgrades are required Let's factor the check out into vma_wants_manual_pte_write_upgrade(), to be reused in NUMA hinting fault context soon. Link: https://lkml.kernel.org/r/20221108174652.198904-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 ++++++++++++++-- mm/mprotect.c | 17 ++++------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d33639be3db3..e203e8a83e2d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2088,6 +2088,20 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) +int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); +static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) +{ + /* + * We want to check manually if we can change individual PTEs writable + * if we can't do that automatically for all PTEs in a mapping. For + * private mappings, that's always the case when we have write + * permissions as we properly have to handle COW. + */ + if (vma->vm_flags & VM_SHARED) + return vma_wants_writenotify(vma, vma->vm_page_prot); + return !!(vma->vm_flags & VM_WRITE); + +} extern unsigned long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, @@ -2227,8 +2241,6 @@ static inline int pte_devmap(pte_t pte) } #endif -int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); - extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, diff --git a/mm/mprotect.c b/mm/mprotect.c index 72aabffb7871..fe22db2c9cdd 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -558,8 +558,8 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; + unsigned int mm_cp_flags = 0; unsigned long charged = 0; - bool try_change_writable; pgoff_t pgoff; int error; @@ -637,20 +637,11 @@ success: * held in write mode. */ vma->vm_flags = newflags; - /* - * We want to check manually if we can change individual PTEs writable - * if we can't do that automatically for all PTEs in a mapping. For - * private mappings, that's always the case when we have write - * permissions as we properly have to handle COW. - */ - if (vma->vm_flags & VM_SHARED) - try_change_writable = vma_wants_writenotify(vma, vma->vm_page_prot); - else - try_change_writable = !!(vma->vm_flags & VM_WRITE); + if (vma_wants_manual_pte_write_upgrade(vma)) + mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); - change_protection(tlb, vma, start, end, vma->vm_page_prot, - try_change_writable ? MM_CP_TRY_CHANGE_WRITABLE : 0); + change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags); /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major From 6a56ccbcf6c69538b152644107a1d7383c876ca7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:50 +0100 Subject: [PATCH 3018/4122] mm/autonuma: use can_change_(pte|pmd)_writable() to replace savedwrite commit b191f9b106ea ("mm: numa: preserve PTE write permissions across a NUMA hinting fault") added remembering write permissions using ordinary pte_write() for PROT_NONE mapped pages to avoid write faults when remapping the page !PROT_NONE on NUMA hinting faults. That commit noted: The patch looks hacky but the alternatives looked worse. The tidest was to rewalk the page tables after a hinting fault but it was more complex than this approach and the performance was worse. It's not generally safe to just mark the page writable during the fault if it's a write fault as it may have been read-only for COW so that approach was discarded. Later, commit 288bc54949fc ("mm/autonuma: let architecture override how the write bit should be stashed in a protnone pte.") introduced a family of savedwrite PTE functions that didn't necessarily improve the whole situation. One confusing thing is that nowadays, if a page is pte_protnone() and pte_savedwrite() then also pte_write() is true. Another source of confusion is that there is only a single pte_mk_savedwrite() call in the kernel. All other write-protection code seems to silently rely on pte_wrprotect(). Ever since PageAnonExclusive was introduced and we started using it in mprotect context via commit 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection"), we do have machinery in place to avoid write faults when changing protection, which is exactly what we want to do here. Let's similarly do what ordinary mprotect() does nowadays when upgrading write permissions and reuse can_change_pte_writable() and can_change_pmd_writable() to detect if we can upgrade PTE permissions to be writable. For anonymous pages there should be absolutely no change: if an anonymous page is not exclusive, it could not have been mapped writable -- because only exclusive anonymous pages can be mapped writable. However, there *might* be a change for writable shared mappings that require writenotify: if they are not dirty, we cannot map them writable. While it might not matter in practice, we'd need a different way to identify whether writenotify is actually required -- and ordinary mprotect would benefit from that as well. Note that we don't optimize for the actual migration case: (1) When migration succeeds the new PTE will not be writable because the source PTE was not writable (protnone); in the future we might just optimize that case similarly by reusing can_change_pte_writable()/can_change_pmd_writable() when removing migration PTEs. (2) When migration fails, we'd have to recalculate the "writable" flag because we temporarily dropped the PT lock; for now keep it simple and set "writable=false". We'll remove all savedwrite leftovers next. Link: https://lkml.kernel.org/r/20221108174652.198904-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ mm/huge_memory.c | 26 +++++++++++++++----------- mm/ksm.c | 9 ++++----- mm/memory.c | 16 +++++++++++++--- mm/mprotect.c | 7 ++----- 5 files changed, 36 insertions(+), 24 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e203e8a83e2d..8597ef676fc3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2102,6 +2102,8 @@ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma return !!(vma->vm_flags & VM_WRITE); } +bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); extern unsigned long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fac917b78102..29102e3ddf84 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1511,8 +1511,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) unsigned long haddr = vmf->address & HPAGE_PMD_MASK; int page_nid = NUMA_NO_NODE; int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK); - bool migrated = false; - bool was_writable = pmd_savedwrite(oldpmd); + bool migrated = false, writable = false; int flags = 0; vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -1522,12 +1521,22 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) } pmd = pmd_modify(oldpmd, vma->vm_page_prot); + + /* + * Detect now whether the PMD could be writable; this information + * is only valid while holding the PT lock. + */ + writable = pmd_write(pmd); + if (!writable && vma_wants_manual_pte_write_upgrade(vma) && + can_change_pmd_writable(vma, vmf->address, pmd)) + writable = true; + page = vm_normal_page_pmd(vma, haddr, pmd); if (!page) goto out_map; /* See similar comment in do_numa_page for explanation */ - if (!was_writable) + if (!writable) flags |= TNF_NO_GROUP; page_nid = page_to_nid(page); @@ -1546,6 +1555,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) } spin_unlock(vmf->ptl); + writable = false; migrated = migrate_misplaced_page(page, vma, target_nid); if (migrated) { @@ -1572,7 +1582,7 @@ out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); pmd = pmd_mkyoung(pmd); - if (was_writable) + if (writable) pmd = pmd_mkwrite(pmd); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); @@ -1813,11 +1823,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; pmd_t oldpmd, entry; - bool preserve_write; - int ret; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool uffd_wp = cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; + int ret = 1; tlb_change_page_size(tlb, HPAGE_PMD_SIZE); @@ -1828,9 +1837,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (!ptl) return 0; - preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; - #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION if (is_swap_pmd(*pmd)) { swp_entry_t entry = pmd_to_swp_entry(*pmd); @@ -1910,8 +1916,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, oldpmd = pmdp_invalidate_ad(vma, addr, pmd); entry = pmd_modify(oldpmd, newprot); - if (preserve_write) - entry = pmd_mk_savedwrite(entry); if (uffd_wp) { entry = pmd_wrprotect(entry); entry = pmd_mkuffd_wp(entry); diff --git a/mm/ksm.c b/mm/ksm.c index 7ba97f86d831..a71245241d22 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1041,7 +1041,6 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, anon_exclusive = PageAnonExclusive(page); if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) || - (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || anon_exclusive || mm_tlb_flush_pending(mm)) { pte_t entry; @@ -1079,11 +1078,11 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, if (pte_dirty(entry)) set_page_dirty(page); + entry = pte_mkclean(entry); + + if (pte_write(entry)) + entry = pte_wrprotect(entry); - if (pte_protnone(entry)) - entry = pte_mkclean(pte_clear_savedwrite(entry)); - else - entry = pte_mkclean(pte_wrprotect(entry)); set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry); } *orig_pte = *pvmw.pte; diff --git a/mm/memory.c b/mm/memory.c index 142c4229549b..1749c638734f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4675,10 +4675,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; int page_nid = NUMA_NO_NODE; + bool writable = false; int last_cpupid; int target_nid; pte_t pte, old_pte; - bool was_writable = pte_savedwrite(vmf->orig_pte); int flags = 0; /* @@ -4697,6 +4697,15 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) old_pte = ptep_get(vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); + /* + * Detect now whether the PTE could be writable; this information + * is only valid while holding the PT lock. + */ + writable = pte_write(pte); + if (!writable && vma_wants_manual_pte_write_upgrade(vma) && + can_change_pte_writable(vma, vmf->address, pte)) + writable = true; + page = vm_normal_page(vma, vmf->address, pte); if (!page || is_zone_device_page(page)) goto out_map; @@ -4713,7 +4722,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. */ - if (!was_writable) + if (!writable) flags |= TNF_NO_GROUP; /* @@ -4740,6 +4749,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) goto out_map; } pte_unmap_unlock(vmf->pte, vmf->ptl); + writable = false; /* Migrate to the requested node */ if (migrate_misplaced_page(page, vma, target_nid)) { @@ -4768,7 +4778,7 @@ out_map: old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_mkyoung(pte); - if (was_writable) + if (writable) pte = pte_mkwrite(pte); ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); update_mmu_cache(vma, vmf->address, vmf->pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index fe22db2c9cdd..093cb50f2fc4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -39,8 +39,8 @@ #include "internal.h" -static inline bool can_change_pte_writable(struct vm_area_struct *vma, - unsigned long addr, pte_t pte) +bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, + pte_t pte) { struct page *page; @@ -121,7 +121,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; - bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM @@ -177,8 +176,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, oldpte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_modify(oldpte, newprot); - if (preserve_write) - ptent = pte_mk_savedwrite(ptent); if (uffd_wp) { ptent = pte_wrprotect(ptent); From d6379159f47630813f06f97535cc82ce7b9eed49 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:51 +0100 Subject: [PATCH 3019/4122] mm: remove unused savedwrite infrastructure NUMA hinting no longer uses savedwrite, let's rip it out. ... and while at it, drop __pte_write() and __pmd_write() on ppc64. Link: https://lkml.kernel.org/r/20221108174652.198904-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/book3s/64/pgtable.h | 80 +------------------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- include/linux/pgtable.h | 24 ------ mm/debug_vm_pgtable.c | 32 -------- 4 files changed, 5 insertions(+), 133 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c436d8422654..cb4c67bf45d7 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -401,35 +401,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH #define pmdp_clear_flush_young pmdp_test_and_clear_young -static inline int __pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - -#ifdef CONFIG_NUMA_BALANCING -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) -{ - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); -} -#else -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) -{ - return false; -} -#endif - static inline int pte_write(pte_t pte) { - return __pte_write(pte) || pte_savedwrite(pte); + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); } static inline int pte_read(pte_t pte) @@ -441,24 +415,16 @@ static inline int pte_read(pte_t pte) static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (__pte_write(*ptep)) + if (pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); - else if (unlikely(pte_savedwrite(*ptep))) - pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - /* - * We should not find protnone for hugetlb, but this complete the - * interface. - */ - if (__pte_write(*ptep)) + if (pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); - else if (unlikely(pte_savedwrite(*ptep))) - pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -535,36 +501,6 @@ static inline int pte_protnone(pte_t pte) return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) == cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE); } - -#define pte_mk_savedwrite pte_mk_savedwrite -static inline pte_t pte_mk_savedwrite(pte_t pte) -{ - /* - * Used by Autonuma subsystem to preserve the write bit - * while marking the pte PROT_NONE. Only allow this - * on PROT_NONE pte - */ - VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) != - cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED)); - return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED)); -} - -#define pte_clear_savedwrite pte_clear_savedwrite -static inline pte_t pte_clear_savedwrite(pte_t pte) -{ - /* - * Used by KSM subsystem to make a protnone pte readonly. - */ - VM_BUG_ON(!pte_protnone(pte)); - return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED)); -} -#else -#define pte_clear_savedwrite pte_clear_savedwrite -static inline pte_t pte_clear_savedwrite(pte_t pte) -{ - VM_WARN_ON(1); - return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE)); -} #endif /* CONFIG_NUMA_BALANCING */ static inline bool pte_hw_valid(pte_t pte) @@ -641,8 +577,6 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { - if (unlikely(pte_savedwrite(pte))) - return pte_clear_savedwrite(pte); return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE)); } @@ -1139,8 +1073,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) -#define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd))) -#define pmd_clear_savedwrite(pmd) pte_pmd(pte_clear_savedwrite(pmd_pte(pmd))) #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd)) @@ -1162,8 +1094,6 @@ static inline int pmd_protnone(pmd_t pmd) #endif /* CONFIG_NUMA_BALANCING */ #define pmd_write(pmd) pte_write(pmd_pte(pmd)) -#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) -#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) @@ -1241,10 +1171,8 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (__pmd_write((*pmdp))) + if (pmd_write(*pmdp)) pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); - else if (unlikely(pmd_savedwrite(*pmdp))) - pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } /* diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 5a05953ae13f..9182324dbef9 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -265,7 +265,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !__pte_write(pte)) + if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5f0d7d0b9471..c74cce67eec8 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -503,30 +503,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte) #define pte_sw_mkyoung pte_sw_mkyoung #endif -#ifndef pte_savedwrite -#define pte_savedwrite pte_write -#endif - -#ifndef pte_mk_savedwrite -#define pte_mk_savedwrite pte_mkwrite -#endif - -#ifndef pte_clear_savedwrite -#define pte_clear_savedwrite pte_wrprotect -#endif - -#ifndef pmd_savedwrite -#define pmd_savedwrite pmd_write -#endif - -#ifndef pmd_mk_savedwrite -#define pmd_mk_savedwrite pmd_mkwrite -#endif - -#ifndef pmd_clear_savedwrite -#define pmd_clear_savedwrite pmd_wrprotect -#endif - #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pmdp_set_wrprotect(struct mm_struct *mm, diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 2b61fde8c38c..c631ade3f1d2 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -171,18 +171,6 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1); } -static void __init pte_savedwrite_tests(struct pgtable_debug_args *args) -{ - pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none); - - if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) - return; - - pr_debug("Validating PTE saved write\n"); - WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte)))); - WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte)))); -} - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { @@ -302,22 +290,6 @@ static void __init pmd_leaf_tests(struct pgtable_debug_args *args) WARN_ON(!pmd_leaf(pmd)); } -static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) -{ - pmd_t pmd; - - if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) - return; - - if (!has_transparent_hugepage()) - return; - - pr_debug("Validating PMD saved write\n"); - pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none); - WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd)))); - WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd)))); -} - #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { @@ -451,7 +423,6 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { } static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { } static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } -static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -1288,9 +1259,6 @@ static int __init debug_vm_pgtable(void) pmd_leaf_tests(&args); pud_leaf_tests(&args); - pte_savedwrite_tests(&args); - pmd_savedwrite_tests(&args); - pte_special_tests(&args); pte_protnone_tests(&args); pmd_protnone_tests(&args); From 07f8bac4982f98fc4b5ae05679d76fccc15079ea Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Nov 2022 18:46:52 +0100 Subject: [PATCH 3020/4122] selftests/vm: anon_cow: add mprotect() optimization tests Let's extend the test to cover the possible mprotect() optimization when removing write-protection. mprotect() must not allow write-access to a COW-shared page by accident. Link: https://lkml.kernel.org/r/20221108174652.198904-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Dave Chinner Cc: Hugh Dickins Cc: Linus Torvalds Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Xu Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/anon_cow.c | 49 +++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c index 705bd0b3db11..bbb251eb5025 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/anon_cow.c @@ -190,7 +190,8 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size, typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); -static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn) +static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, + child_fn fn) { struct comm_pipes comm_pipes; char buf; @@ -212,6 +213,22 @@ static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn) while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; + + if (do_mprotect) { + /* + * mprotect() optimizations might try avoiding + * write-faults by directly mapping pages writable. + */ + ret = mprotect(mem, size, PROT_READ); + ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + } + /* Modify the page. */ memset(mem, 0xff, size); write(comm_pipes.parent_ready[1], "0", 1); @@ -229,12 +246,22 @@ close_comm_pipes: static void test_cow_in_parent(char *mem, size_t size) { - do_test_cow_in_parent(mem, size, child_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_memcmp_fn); +} + +static void test_cow_in_parent_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_memcmp_fn); } static void test_vmsplice_in_child(char *mem, size_t size) { - do_test_cow_in_parent(mem, size, child_vmsplice_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); +} + +static void test_vmsplice_in_child_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); } static void do_test_vmsplice_in_parent(char *mem, size_t size, @@ -969,6 +996,14 @@ static const struct test_case test_cases[] = { "Basic COW after fork()", test_cow_in_parent, }, + /* + * Basic test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "Basic COW after fork() with mprotect() optimization", + test_cow_in_parent_mprotect, + }, /* * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If * we miss to break COW, the child observes modifications by the parent. @@ -978,6 +1013,14 @@ static const struct test_case test_cases[] = { "vmsplice() + unmap in child", test_vmsplice_in_child }, + /* + * vmsplice() test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "vmsplice() + unmap in child with mprotect() optimization", + test_vmsplice_in_child_mprotect + }, /* * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after * fork(); modify in the child. If we miss to break COW, the parent From 70fb4fdff5826a48886152fd5c5db04eb6c59a40 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 Nov 2022 12:30:48 -0800 Subject: [PATCH 3021/4122] mm: introduce 'encoded' page pointers with embedded extra bits We already have this notion in parts of the MM code (see the mlock code with the LRU_PAGE and NEW_PAGE bits), but I'm going to introduce a new case, and I refuse to do the same thing we've done before where we just put bits in the raw pointer and say it's still a normal pointer. So this introduces a 'struct encoded_page' pointer that cannot be used for anything else than to encode a real page pointer and a couple of extra bits in the low bits. That way the compiler can trivially track the state of the pointer and you just explicitly encode and decode the extra bits. Note that this makes the alignment of 'struct page' explicit even for the case where CONFIG_HAVE_ALIGNED_STRUCT_PAGE is not set. That is entirely redundant in almost all cases, since the page structure already contains several word-sized entries. However, on m68k, the alignment of even 32-bit data is just 16 bits, and as such in theory the alignment of 'struct page' could be too. So let's just make it very very explicit that the alignment needs to be at least 32 bits, giving us a guarantee of two unused low bits in the pointer. Now, in practice, our page struct array is aligned much more than that anyway, even on m68k, and our existing code in mm/mlock.c obviously already depended on that. But since the whole point of this change is to be careful about the type system when hiding extra bits in the pointer, let's also be explicit about the assumptions we make. NOTE! This is being very careful in another way too: it has a build-time assertion that the 'flags' added to the page pointer actually fit in the two bits. That means that this helper must be inlined, and can only be used in contexts where the compiler can statically determine that the value fits in the available bits. [akpm@linux-foundation.org: kerneldoc on a forward-declared struct confuses htmldocs] Link: https://lore.kernel.org/all/Y2tKixpO4RO6DgW5@tuxmaker.boeblingen.de.ibm.com/ Link: https://lkml.kernel.org/r/20221109203051.1835763-1-torvalds@linux-foundation.org Signed-off-by: Linus Torvalds Acked-by: Johannes Weiner Acked-by: Hugh Dickins Reviewed-by: David Hildenbrand Cc: Alexander Gordeev Cc: Aneesh Kumar K.V Cc: Christian Borntraeger Cc: Gerald Schaefer Cc: Heiko Carstens [s390] Cc: Nadav Amit Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 44a1a699b5ad..6b0009e7d4ae 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -68,7 +68,7 @@ struct mem_cgroup; #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) #else -#define _struct_page_alignment +#define _struct_page_alignment __aligned(sizeof(unsigned long)) #endif struct page { @@ -251,6 +251,38 @@ struct page { #endif } _struct_page_alignment; +/* + * struct encoded_page - a nonexistent type marking this pointer + * + * An 'encoded_page' pointer is a pointer to a regular 'struct page', but + * with the low bits of the pointer indicating extra context-dependent + * information. Not super-common, but happens in mmu_gather and mlock + * handling, and this acts as a type system check on that use. + * + * We only really have two guaranteed bits in general, although you could + * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) + * for more. + * + * Use the supplied helper functions to endcode/decode the pointer and bits. + */ +struct encoded_page; +#define ENCODE_PAGE_BITS 3ul +static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) +{ + BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + return (struct encoded_page *)(flags | (unsigned long)page); +} + +static inline unsigned long encoded_page_flags(struct encoded_page *page) +{ + return ENCODE_PAGE_BITS & (unsigned long)page; +} + +static inline struct page *encoded_page_ptr(struct encoded_page *page) +{ + return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); +} + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. From 449c796768c9a1c738d1fa8671fb01663380b8a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 Nov 2022 12:30:49 -0800 Subject: [PATCH 3022/4122] mm: teach release_pages() to take an array of encoded page pointers too release_pages() already could take either an array of page pointers, or an array of folio pointers. Expand it to also accept an array of encoded page pointers, which is what both the existing mlock() use and the upcoming mmu_gather use of encoded page pointers wants. Note that release_pages() won't actually use, or react to, any extra encoded bits. Instead, this is very much a case of "I have walked the array of encoded pages and done everything the extra bits tell me to do, now release it all". Also, while the "either page or folio pointers" dual use was handled with a cast of the pointer in "release_folios()", this takes a slightly different approach and uses the "transparent union" attribute to describe the set of arguments to the function: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html which has been supported by gcc forever, but the kernel hasn't used before. That allows us to avoid using various wrappers with casts, and just use the same function regardless of use. Link: https://lkml.kernel.org/r/20221109203051.1835763-2-torvalds@linux-foundation.org Signed-off-by: Linus Torvalds Acked-by: Johannes Weiner Acked-by: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/mm.h | 21 +++++++++++++++++++-- mm/swap.c | 16 ++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8597ef676fc3..f873441303b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1245,7 +1245,24 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } -void release_pages(struct page **pages, int nr); +/** + * release_pages - release an array of pages or folios + * + * This just releases a simple array of multiple pages, and + * accepts various different forms of said page array: either + * a regular old boring array of pages, an array of folios, or + * an array of encoded page pointers. + * + * The transparent union syntax for this kind of "any of these + * argument types" is all kinds of ugly, so look away. + */ +typedef union { + struct page **pages; + struct folio **folios; + struct encoded_page **encoded_pages; +} release_pages_arg __attribute__ ((__transparent_union__)); + +void release_pages(release_pages_arg, int nr); /** * folios_put - Decrement the reference count on an array of folios. @@ -1261,7 +1278,7 @@ void release_pages(struct page **pages, int nr); */ static inline void folios_put(struct folio **folios, unsigned int nr) { - release_pages((struct page **)folios, nr); + release_pages(folios, nr); } static inline void put_page(struct page *page) diff --git a/mm/swap.c b/mm/swap.c index b9a6817e07ff..70e2063ef43a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -981,22 +981,30 @@ void lru_cache_disable(void) /** * release_pages - batched put_page() - * @pages: array of pages to release + * @arg: array of pages to release * @nr: number of pages * - * Decrement the reference count on all the pages in @pages. If it + * Decrement the reference count on all the pages in @arg. If it * fell to zero, remove the page from the LRU and free it. + * + * Note that the argument can be an array of pages, encoded pages, + * or folio pointers. We ignore any encoded bits, and turn any of + * them into just a folio that gets free'd. */ -void release_pages(struct page **pages, int nr) +void release_pages(release_pages_arg arg, int nr) { int i; + struct encoded_page **encoded = arg.encoded_pages; LIST_HEAD(pages_to_free); struct lruvec *lruvec = NULL; unsigned long flags = 0; unsigned int lock_batch; for (i = 0; i < nr; i++) { - struct folio *folio = page_folio(pages[i]); + struct folio *folio; + + /* Turn any of the argument types into a folio */ + folio = page_folio(encoded_page_ptr(encoded[i])); /* * Make sure the IRQ-safe lock-holding time does not get From 7cc8f9c7146a5c2dad6e71653c4f69972e73df6b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 Nov 2022 12:30:50 -0800 Subject: [PATCH 3023/4122] mm: mmu_gather: prepare to gather encoded page pointers with flags This is purely a preparatory patch that makes all the data structures ready for encoding flags with the mmu_gather page pointers. The code currently always sets the flag to zero and doesn't use it yet, but now it's tracking the type state along. The next step will be to actually start using it. Link: https://lkml.kernel.org/r/20221109203051.1835763-3-torvalds@linux-foundation.org Signed-off-by: Linus Torvalds Acked-by: Johannes Weiner Acked-by: Hugh Dickins Signed-off-by: Andrew Morton --- arch/s390/include/asm/tlb.h | 8 +++++--- include/asm-generic/tlb.h | 9 +++++---- include/linux/swap.h | 2 +- mm/mmu_gather.c | 8 ++++---- mm/swap_state.c | 11 ++++------- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 3a5c8fb590e5..05142226d65d 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -25,7 +25,8 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct page *page, int page_size); + struct encoded_page *page, + int page_size); #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -42,9 +43,10 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, * has already been freed, so just do free_page_and_swap_cache. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct page *page, int page_size) + struct encoded_page *page, + int page_size) { - free_page_and_swap_cache(page); + free_page_and_swap_cache(encoded_page_ptr(page)); return false; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index cab7cfebf40b..54d03d1e712e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -246,7 +246,7 @@ struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; - struct page *pages[]; + struct encoded_page *encoded_pages[]; }; #define MAX_GATHER_BATCH \ @@ -260,7 +260,8 @@ struct mmu_gather_batch { */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct encoded_page *page, int page_size); #endif @@ -435,13 +436,13 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { - if (__tlb_remove_page_size(tlb, page, page_size)) + if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size)) tlb_flush_mmu(tlb); } static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - return __tlb_remove_page_size(tlb, page, PAGE_SIZE); + return __tlb_remove_page_size(tlb, encode_page(page, 0), PAGE_SIZE); } /* tlb_remove_page diff --git a/include/linux/swap.h b/include/linux/swap.h index fec6647a289a..b61e2007d156 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -463,7 +463,7 @@ static inline unsigned long total_swapcache_pages(void) extern void free_swap_cache(struct page *page); extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct page **, int); +extern void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 3a2c3f8cad2f..382581c4a9f6 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -48,7 +48,7 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb) struct mmu_gather_batch *batch; for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { - struct page **pages = batch->pages; + struct encoded_page **pages = batch->encoded_pages; do { /* @@ -77,7 +77,7 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) tlb->local.next = NULL; } -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size) { struct mmu_gather_batch *batch; @@ -92,13 +92,13 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ * Add the page and check if we are full. If so * force a flush. */ - batch->pages[batch->nr++] = page; + batch->encoded_pages[batch->nr++] = page; if (batch->nr == batch->max) { if (!tlb_next_batch(tlb)) return true; batch = tlb->active; } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); + VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page)); return false; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 40fe6f23e105..2927507b43d8 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -303,15 +303,12 @@ void free_page_and_swap_cache(struct page *page) * Passed an array of pages, drop them all from swapcache and then release * them. They are removed from the LRU and freed if this is their last use. */ -void free_pages_and_swap_cache(struct page **pages, int nr) +void free_pages_and_swap_cache(struct encoded_page **pages, int nr) { - struct page **pagep = pages; - int i; - lru_add_drain(); - for (i = 0; i < nr; i++) - free_swap_cache(pagep[i]); - release_pages(pagep, nr); + for (int i = 0; i < nr; i++) + free_swap_cache(encoded_page_ptr(pages[i])); + release_pages(pages, nr); } static inline bool swap_use_vma_readahead(void) From 5df397dec7c4c08c23bd14f162f1228836faa4ce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 Nov 2022 12:30:51 -0800 Subject: [PATCH 3024/4122] mm: delay page_remove_rmap() until after the TLB has been flushed When we remove a page table entry, we are very careful to only free the page after we have flushed the TLB, because other CPUs could still be using the page through stale TLB entries until after the flush. However, we have removed the rmap entry for that page early, which means that functions like folio_mkclean() would end up not serializing with the page table lock because the page had already been made invisible to rmap. And that is a problem, because while the TLB entry exists, we could end up with the following situation: (a) one CPU could come in and clean it, never seeing our mapping of the page (b) another CPU could continue to use the stale and dirty TLB entry and continue to write to said page resulting in a page that has been dirtied, but then marked clean again, all while another CPU might have dirtied it some more. End result: possibly lost dirty data. This extends our current TLB gather infrastructure to optionally track a "should I do a delayed page_remove_rmap() for this page after flushing the TLB". It uses the newly introduced 'encoded page pointer' to do that without having to keep separate data around. Note, this is complicated by a couple of issues: - we want to delay the rmap removal, but not past the page table lock, because that simplifies the memcg accounting - only SMP configurations want to delay TLB flushing, since on UP there are obviously no remote TLBs to worry about, and the page table lock means there are no preemption issues either - s390 has its own mmu_gather model that doesn't delay TLB flushing, and as a result also does not want the delayed rmap. As such, we can treat S390 like the UP case and use a common fallback for the "no delays" case. - we can track an enormous number of pages in our mmu_gather structure, with MAX_GATHER_BATCH_COUNT batches of MAX_TABLE_BATCH pages each, all set up to be approximately 10k pending pages. We do not want to have a huge number of batched pages that we then need to check for delayed rmap handling inside the page table lock. Particularly that last point results in a noteworthy detail, where the normal page batch gathering is limited once we have delayed rmaps pending, in such a way that only the last batch (the so-called "active batch") in the mmu_gather structure can have any delayed entries. NOTE! While the "possibly lost dirty data" sounds catastrophic, for this all to happen you need to have a user thread doing either madvise() with MADV_DONTNEED or a full re-mmap() of the area concurrently with another thread continuing to use said mapping. So arguably this is about user space doing crazy things, but from a VM consistency standpoint it's better if we track the dirty bit properly even when user space goes off the rails. [akpm@linux-foundation.org: fix UP build, per Linus] Link: https://lore.kernel.org/all/B88D3073-440A-41C7-95F4-895D3F657EF2@gmail.com/ Link: https://lkml.kernel.org/r/20221109203051.1835763-4-torvalds@linux-foundation.org Signed-off-by: Linus Torvalds Acked-by: Johannes Weiner Acked-by: Hugh Dickins Reported-by: Nadav Amit Tested-by: Nadav Amit Signed-off-by: Andrew Morton --- arch/s390/include/asm/tlb.h | 3 +++ include/asm-generic/tlb.h | 31 +++++++++++++++++++++++++++++-- mm/memory.c | 23 +++++++++++++++++------ mm/mmu_gather.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 05142226d65d..b91f4a9b044c 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -41,6 +41,9 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, * Release the page cache reference for a pte removed by * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. + * + * s390 doesn't delay rmap removal, so there is nothing encoded in + * the page pointer. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 54d03d1e712e..b46617207c93 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -263,6 +263,28 @@ struct mmu_gather_batch { extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size); + +#ifdef CONFIG_SMP +/* + * This both sets 'delayed_rmap', and returns true. It would be an inline + * function, except we define it before the 'struct mmu_gather'. + */ +#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true) +extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma); +#endif + +#endif + +/* + * We have a no-op version of the rmap removal that doesn't + * delay anything. That is used on S390, which flushes remote + * TLBs synchronously, and on UP, which doesn't have any + * remote TLBs to flush and is not preemptible due to this + * all happening under the page table lock. + */ +#ifndef tlb_delay_rmap +#define tlb_delay_rmap(tlb) (false) +static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif /* @@ -295,6 +317,11 @@ struct mmu_gather { */ unsigned int freed_tables : 1; + /* + * Do we have pending delayed rmap removals? + */ + unsigned int delayed_rmap : 1; + /* * at which levels have we cleared entries? */ @@ -440,9 +467,9 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, tlb_flush_mmu(tlb); } -static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags) { - return __tlb_remove_page_size(tlb, encode_page(page, 0), PAGE_SIZE); + return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE); } /* tlb_remove_page diff --git a/mm/memory.c b/mm/memory.c index 1749c638734f..6c85cba02113 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1374,6 +1374,8 @@ again: break; if (pte_present(ptent)) { + unsigned int delay_rmap; + page = vm_normal_page(vma, addr, ptent); if (unlikely(!should_zap_page(details, page))) continue; @@ -1385,20 +1387,26 @@ again: if (unlikely(!page)) continue; + delay_rmap = 0; if (!PageAnon(page)) { if (pte_dirty(ptent)) { - force_flush = 1; set_page_dirty(page); + if (tlb_delay_rmap(tlb)) { + delay_rmap = 1; + force_flush = 1; + } } if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); } rss[mm_counter(page)]--; - page_remove_rmap(page, vma, false); - if (unlikely(page_mapcount(page) < 0)) - print_bad_pte(vma, addr, ptent, page); - if (unlikely(__tlb_remove_page(tlb, page))) { + if (!delay_rmap) { + page_remove_rmap(page, vma, false); + if (unlikely(page_mapcount(page) < 0)) + print_bad_pte(vma, addr, ptent, page); + } + if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { force_flush = 1; addr += PAGE_SIZE; break; @@ -1455,8 +1463,11 @@ again: arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ - if (force_flush) + if (force_flush) { tlb_flush_mmu_tlbonly(tlb); + if (tlb->delayed_rmap) + tlb_flush_rmaps(tlb, vma); + } pte_unmap_unlock(start_pte, ptl); /* diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 382581c4a9f6..1de1cf9ba581 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,10 @@ static bool tlb_next_batch(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; + /* No more batching if we have delayed rmaps pending */ + if (tlb->delayed_rmap) + return false; + batch = tlb->active; if (batch->next) { tlb->active = batch->next; @@ -43,6 +48,33 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } +#ifdef CONFIG_SMP +/** + * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB + * @tlb: the current mmu_gather + * + * Note that because of how tlb_next_batch() above works, we will + * never start new batches with pending delayed rmaps, so we only + * need to walk through the current active batch. + */ +void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + struct mmu_gather_batch *batch; + + batch = tlb->active; + for (int i = 0; i < batch->nr; i++) { + struct encoded_page *enc = batch->encoded_pages[i]; + + if (encoded_page_flags(enc)) { + struct page *page = encoded_page_ptr(enc); + page_remove_rmap(page, vma, false); + } + } + + tlb->delayed_rmap = 0; +} +#endif + static void tlb_batch_pages_flush(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; @@ -284,6 +316,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, tlb->active = &tlb->local; tlb->batch_count = 0; #endif + tlb->delayed_rmap = 0; tlb_table_init(tlb); #ifdef CONFIG_MMU_GATHER_PAGE_SIZE From f036c8184f8b6750fa642485fb01eb6ff036a86b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 16 Nov 2022 08:49:30 +0100 Subject: [PATCH 3025/4122] mm: mmu_gather: do not expose delayed_rmap flag Flag delayed_rmap of 'struct mmu_gather' is rather a private member, but it is still accessed directly. Instead, let the TLB gather code access the flag. Link: https://lkml.kernel.org/r/Y3SWCu6NRaMQ5dbD@li-4a3a4a4c-28e5-11b2-a85c-a8d192c6f089.ibm.com Signed-off-by: Alexander Gordeev Acked-by: Linus Torvalds Signed-off-by: Andrew Morton --- mm/memory.c | 3 +-- mm/mmu_gather.c | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 6c85cba02113..086cb3dd8608 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1465,8 +1465,7 @@ again: /* Do the actual TLB flush before dropping ptl */ if (force_flush) { tlb_flush_mmu_tlbonly(tlb); - if (tlb->delayed_rmap) - tlb_flush_rmaps(tlb, vma); + tlb_flush_rmaps(tlb, vma); } pte_unmap_unlock(start_pte, ptl); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 1de1cf9ba581..dd1f8ca40cb5 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -61,6 +61,9 @@ void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { struct mmu_gather_batch *batch; + if (!tlb->delayed_rmap) + return; + batch = tlb->active; for (int i = 0; i < batch->nr; i++) { struct encoded_page *enc = batch->encoded_pages[i]; From 7ac07a26dea79c3892436bce41cce03dcbd3c4c7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:35 +0900 Subject: [PATCH 3026/4122] zram: preparation for multi-zcomp support Patch series "zram: Support multiple compression streams", v5. This series adds support for multiple compression streams. The main idea is that different compression algorithms have different characteristics and zram may benefit when it uses a combination of algorithms: a default algorithm that is faster but have lower compression rate and a secondary algorithm that can use higher compression rate at a price of slower compression/decompression. There are several use-case for this functionality: - huge pages re-compression: zstd or deflate can successfully compress huge pages (~50% of huge pages on my synthetic ChromeOS tests), IOW pages that lzo was not able to compress. - idle pages re-compression: idle/cold pages sit in the memory and we may reduce zsmalloc memory usage if we recompress those idle pages. Userspace has a number of ways to control the behavior and impact of zram recompression: what type of pages should be recompressed, size watermarks, etc. Please refer to documentation patch. This patch (of 13): The patch turns compression streams and compressor algorithm name struct zram members into arrays, so that we can have multiple compression streams support (in the next patches). The patch uses a rather explicit API for compressor selection: - Get primary (default) compression stream zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]) - Get secondary compression stream zcomp_stream_get(zram->comps[ZRAM_SECONDARY_COMP]) We use similar API for compression streams put(). At this point we always have just one compression stream, since CONFIG_ZRAM_MULTI_COMP is not yet defined. Link: https://lkml.kernel.org/r/20221109115047.2921851-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20221109115047.2921851-2-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Minchan Kim Cc: Nitin Gupta Cc: Suleiman Souhlal Cc: Nhat Pham Cc: Alexey Romanov Signed-off-by: Andrew Morton --- drivers/block/zram/zcomp.c | 6 +-- drivers/block/zram/zcomp.h | 2 +- drivers/block/zram/zram_drv.c | 90 +++++++++++++++++++++++++---------- drivers/block/zram/zram_drv.h | 14 +++++- 4 files changed, 80 insertions(+), 32 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 0916de952e09..55af4efd7983 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp) * case of allocation error, or any other error potentially * returned by zcomp_init(). */ -struct zcomp *zcomp_create(const char *compress) +struct zcomp *zcomp_create(const char *alg) { struct zcomp *comp; int error; @@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress) * is not loaded yet. We must do it here, otherwise we are about to * call /sbin/modprobe under CPU hot-plug lock. */ - if (!zcomp_available_algorithm(compress)) + if (!zcomp_available_algorithm(alg)) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); if (!comp) return ERR_PTR(-ENOMEM); - comp->name = compress; + comp->name = alg; error = zcomp_init(comp); if (error) { kfree(comp); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 40f6420f4b2e..cdefdef93da8 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node); ssize_t zcomp_available_show(const char *comp, char *buf); bool zcomp_available_algorithm(const char *comp); -struct zcomp *zcomp_create(const char *comp); +struct zcomp *zcomp_create(const char *alg); void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_stream_get(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 87711ddf4b54..fbe46c6177fd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1004,36 +1004,53 @@ static ssize_t comp_algorithm_show(struct device *dev, struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - sz = zcomp_available_show(zram->compressor, buf); + sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf); up_read(&zram->init_lock); return sz; } +static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) +{ + /* Do not kfree() algs that we didn't allocate, IOW the default ones */ + if (zram->comp_algs[prio] != default_compressor) + kfree(zram->comp_algs[prio]); + zram->comp_algs[prio] = alg; +} + static ssize_t comp_algorithm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); - char compressor[ARRAY_SIZE(zram->compressor)]; + char *compressor; size_t sz; - strscpy(compressor, buf, sizeof(compressor)); + sz = strlen(buf); + if (sz >= CRYPTO_MAX_ALG_NAME) + return -E2BIG; + + compressor = kstrdup(buf, GFP_KERNEL); + if (!compressor) + return -ENOMEM; + /* ignore trailing newline */ - sz = strlen(compressor); if (sz > 0 && compressor[sz - 1] == '\n') compressor[sz - 1] = 0x00; - if (!zcomp_available_algorithm(compressor)) + if (!zcomp_available_algorithm(compressor)) { + kfree(compressor); return -EINVAL; + } down_write(&zram->init_lock); if (init_done(zram)) { up_write(&zram->init_lock); + kfree(compressor); pr_info("Can't change algorithm for initialized device\n"); return -EBUSY; } - strcpy(zram->compressor, compressor); + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, compressor); up_write(&zram->init_lock); return len; } @@ -1281,7 +1298,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, size = zram_get_obj_size(zram, index); if (size != PAGE_SIZE) - zstrm = zcomp_stream_get(zram->comp); + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { @@ -1293,7 +1310,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); } zs_unmap_object(zram->mem_pool, handle); zram_slot_unlock(zram, index); @@ -1360,13 +1377,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(mem); compress_again: - zstrm = zcomp_stream_get(zram->comp); + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); src = kmap_atomic(page); ret = zcomp_compress(zstrm, src, &comp_len); kunmap_atomic(src); if (unlikely(ret)) { - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); pr_err("Compression failed! err=%d\n", ret); zs_free(zram->mem_pool, handle); return ret; @@ -1394,7 +1411,7 @@ compress_again: __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR((void *)handle)) { - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); atomic64_inc(&zram->stats.writestall); handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | @@ -1411,14 +1428,14 @@ compress_again: * zstrm buffer back. It is necessary that the dereferencing * of the zstrm variable below occurs correctly. */ - zstrm = zcomp_stream_get(zram->comp); + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); } alloced_pages = zs_get_total_pages(zram->mem_pool); update_used_max(zram, alloced_pages); if (zram->limit_pages && alloced_pages > zram->limit_pages) { - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); zs_free(zram->mem_pool, handle); return -ENOMEM; } @@ -1432,7 +1449,7 @@ compress_again: if (comp_len == PAGE_SIZE) kunmap_atomic(src); - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); zs_unmap_object(zram->mem_pool, handle); atomic64_add(comp_len, &zram->stats.compr_data_size); out: @@ -1707,6 +1724,20 @@ out: return ret; } +static void zram_destroy_comps(struct zram *zram) +{ + u32 prio; + + for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + struct zcomp *comp = zram->comps[prio]; + + zram->comps[prio] = NULL; + if (!comp) + continue; + zcomp_destroy(comp); + } +} + static void zram_reset_device(struct zram *zram) { down_write(&zram->init_lock); @@ -1724,11 +1755,11 @@ static void zram_reset_device(struct zram *zram) /* I/O operation under all of CPU are done so let's free */ zram_meta_free(zram, zram->disksize); zram->disksize = 0; + zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); - zcomp_destroy(zram->comp); - zram->comp = NULL; reset_bdev(zram); + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); up_write(&zram->init_lock); } @@ -1739,6 +1770,7 @@ static ssize_t disksize_store(struct device *dev, struct zcomp *comp; struct zram *zram = dev_to_zram(dev); int err; + u32 prio; disksize = memparse(buf, NULL); if (!disksize) @@ -1757,22 +1789,28 @@ static ssize_t disksize_store(struct device *dev, goto out_unlock; } - comp = zcomp_create(zram->compressor); - if (IS_ERR(comp)) { - pr_err("Cannot initialise %s compressing backend\n", - zram->compressor); - err = PTR_ERR(comp); - goto out_free_meta; - } + for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; - zram->comp = comp; + comp = zcomp_create(zram->comp_algs[prio]); + if (IS_ERR(comp)) { + pr_err("Cannot initialise %s compressing backend\n", + zram->comp_algs[prio]); + err = PTR_ERR(comp); + goto out_free_comps; + } + + zram->comps[prio] = comp; + } zram->disksize = disksize; set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; -out_free_meta: +out_free_comps: + zram_destroy_comps(zram); zram_meta_free(zram, disksize); out_unlock: up_write(&zram->init_lock); @@ -1959,7 +1997,7 @@ static int zram_add(void) if (ret) goto out_cleanup_disk; - strscpy(zram->compressor, default_compressor, sizeof(zram->compressor)); + zram->comp_algs[ZRAM_PRIMARY_COMP] = default_compressor; zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index a2bda53020fd..7a643c8c38ec 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -89,10 +89,20 @@ struct zram_stats { #endif }; +#ifdef CONFIG_ZRAM_MULTI_COMP +#define ZRAM_PRIMARY_COMP 0U +#define ZRAM_SECONDARY_COMP 1U +#define ZRAM_MAX_COMPS 4U +#else +#define ZRAM_PRIMARY_COMP 0U +#define ZRAM_SECONDARY_COMP 0U +#define ZRAM_MAX_COMPS 1U +#endif + struct zram { struct zram_table_entry *table; struct zs_pool *mem_pool; - struct zcomp *comp; + struct zcomp *comps[ZRAM_MAX_COMPS]; struct gendisk *disk; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; @@ -107,7 +117,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - char compressor[CRYPTO_MAX_ALG_NAME]; + const char *comp_algs[ZRAM_MAX_COMPS]; /* * zram is claimed so open request will be failed */ From 001d9273570115b2eb360d5452bbc46f6cc063a1 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:36 +0900 Subject: [PATCH 3027/4122] zram: add recompression algorithm sysfs knob Introduce recomp_algorithm sysfs knob that controls secondary algorithm selection used for recompression. We will support up to 3 secondary compression algorithms which are sorted in order of their priority. To select an algorithm user has to provide its name and priority: echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm During recompression zram iterates through the list of registered secondary algorithms in order of their priorities. We also have a short version for cases when there is only one secondary compression algorithm: echo "algo=zstd" > /sys/block/zramX/recomp_algorithm This will register zstd as the secondary algorithm with priority 1. Link: https://lkml.kernel.org/r/20221109115047.2921851-3-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 124 ++++++++++++++++++++++++++++------ 1 file changed, 105 insertions(+), 19 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fbe46c6177fd..8ea4908f5961 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -997,31 +997,28 @@ static ssize_t max_comp_streams_store(struct device *dev, return len; } -static ssize_t comp_algorithm_show(struct device *dev, - struct device_attribute *attr, char *buf) +static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) { - size_t sz; - struct zram *zram = dev_to_zram(dev); + /* Do not free statically defined compression algorithms */ + if (zram->comp_algs[prio] != default_compressor) + kfree(zram->comp_algs[prio]); + + zram->comp_algs[prio] = alg; +} + +static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) +{ + ssize_t sz; down_read(&zram->init_lock); - sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf); + sz = zcomp_available_show(zram->comp_algs[prio], buf); up_read(&zram->init_lock); return sz; } -static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) +static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) { - /* Do not kfree() algs that we didn't allocate, IOW the default ones */ - if (zram->comp_algs[prio] != default_compressor) - kfree(zram->comp_algs[prio]); - zram->comp_algs[prio] = alg; -} - -static ssize_t comp_algorithm_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); char *compressor; size_t sz; @@ -1050,11 +1047,94 @@ static ssize_t comp_algorithm_store(struct device *dev, return -EBUSY; } - comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, compressor); + comp_algorithm_set(zram, prio, compressor); up_write(&zram->init_lock); - return len; + return 0; } +static ssize_t comp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int ret; + + ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); + return ret ? ret : len; +} + +#ifdef CONFIG_ZRAM_MULTI_COMP +static ssize_t recomp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + ssize_t sz = 0; + u32 prio; + + for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; + + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); + sz += __comp_algorithm_show(zram, prio, buf + sz); + } + + return sz; +} + +static ssize_t recomp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int prio = ZRAM_SECONDARY_COMP; + char *args, *param, *val; + char *alg = NULL; + int ret; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "algo")) { + alg = val; + continue; + } + + if (!strcmp(param, "priority")) { + ret = kstrtoint(val, 10, &prio); + if (ret) + return ret; + continue; + } + } + + if (!alg) + return -EINVAL; + + if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) + return -EINVAL; + + ret = __comp_algorithm_store(zram, prio, alg); + return ret ? ret : len; +} +#endif + static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1895,6 +1975,9 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif +#ifdef CONFIG_ZRAM_MULTI_COMP +static DEVICE_ATTR_RW(recomp_algorithm); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1918,6 +2001,9 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_bd_stat.attr, #endif &dev_attr_debug_stat.attr, +#ifdef CONFIG_ZRAM_MULTI_COMP + &dev_attr_recomp_algorithm.attr, +#endif NULL, }; @@ -1997,7 +2083,7 @@ static int zram_add(void) if (ret) goto out_cleanup_disk; - zram->comp_algs[ZRAM_PRIMARY_COMP] = default_compressor; + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); From 5561347aa598b6b12fb6069788ccec9b5e5ebec1 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:37 +0900 Subject: [PATCH 3028/4122] zram: factor out WB and non-WB zram read functions We will use non-WB variant in ZRAM page recompression path. Link: https://lkml.kernel.org/r/20221109115047.2921851-4-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 72 ++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8ea4908f5961..135fb946f83e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1336,8 +1336,29 @@ out: ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } -static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) +/* + * Reads a page from the writeback devices. Corresponding ZRAM slot + * should be unlocked. + */ +static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, + u32 index, struct bio *bio, bool partial_io) +{ + struct bio_vec bvec = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0, + }; + + return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, + partial_io); +} + +/* + * Reads (decompresses if needed) a page from zspool (zsmalloc). + * Corresponding ZRAM slot should be locked. + */ +static int zram_read_from_zspool(struct zram *zram, struct page *page, + u32 index) { struct zcomp_strm *zstrm; unsigned long handle; @@ -1345,23 +1366,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, void *src, *dst; int ret; - zram_slot_lock(zram, index); - if (zram_test_flag(zram, index, ZRAM_WB)) { - struct bio_vec bvec; - - zram_slot_unlock(zram, index); - /* A null bio means rw_page was used, we must fallback to bio */ - if (!bio) - return -EOPNOTSUPP; - - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - return read_from_bdev(zram, &bvec, - zram_get_element(zram, index), - bio, partial_io); - } - handle = zram_get_handle(zram, index); if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; @@ -1371,7 +1375,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, mem = kmap_atomic(page); zram_fill_page(mem, PAGE_SIZE, value); kunmap_atomic(mem); - zram_slot_unlock(zram, index); return 0; } @@ -1393,17 +1396,40 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); } zs_unmap_object(zram->mem_pool, handle); - zram_slot_unlock(zram, index); + return ret; +} + +static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io) +{ + int ret; + + zram_slot_lock(zram, index); + if (!zram_test_flag(zram, index, ZRAM_WB)) { + /* Slot should be locked through out the function call */ + ret = zram_read_from_zspool(zram, page, index); + zram_slot_unlock(zram, index); + } else { + /* Slot should be unlocked before the function call */ + zram_slot_unlock(zram, index); + + /* A null bio means rw_page was used, we must fallback to bio */ + if (!bio) + return -EOPNOTSUPP; + + ret = zram_bvec_read_from_bdev(zram, page, index, bio, + partial_io); + } /* Should NEVER happen. Return bio error if it does. */ - if (WARN_ON(ret)) + if (WARN_ON(ret < 0)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); return ret; } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) + u32 index, int offset, struct bio *bio) { int ret; struct page *page; From 84b33bf7888975d28c0e57011b75c445279c60ec Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:38 +0900 Subject: [PATCH 3029/4122] zram: introduce recompress sysfs knob Allow zram to recompress (using secondary compression streams) pages. Re-compression algorithms (we support up to 3 at this stage) are selected via recomp_algorithm: echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm Please read documentation for more details. We support several recompression modes: 1) IDLE pages recompression is activated by `idle` mode echo "type=idle" > /sys/block/zram0/recompress 2) Since there may be many idle pages user-space may pass a size threshold value (in bytes) and we will recompress pages only of equal or greater size: echo "threshold=888" > /sys/block/zram0/recompress 3) HUGE pages recompression is activated by `huge` mode echo "type=huge" > /sys/block/zram0/recompress 4) HUGE_IDLE pages recompression is activated by `huge_idle` mode echo "type=huge_idle" > /sys/block/zram0/recompress [senozhatsky@chromium.org: we should always zero out err variable in recompress loop[ Link: https://lkml.kernel.org/r/20221110143423.3250790-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20221109115047.2921851-5-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nathan Chancellor Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/Kconfig | 9 ++ drivers/block/zram/zram_drv.c | 264 +++++++++++++++++++++++++++++++++- drivers/block/zram/zram_drv.h | 7 + 3 files changed, 277 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index d4100b0c083e..0386b7da02aa 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING /sys/kernel/debug/zram/zramX/block_state. See Documentation/admin-guide/blockdev/zram.rst for more information. + +config ZRAM_MULTI_COMP + bool "Enable multiple compression streams" + depends on ZRAM + help + This will enable multi-compression streams, so that ZRAM can + re-compress pages using a potentially slower but more effective + compression algorithm. Note, that IDLE page recompression + requires ZRAM_MEMORY_TRACKING. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 135fb946f83e..97300b3a83c3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif +static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) +{ + prio &= ZRAM_COMP_PRIORITY_MASK; + /* + * Clear previous priority value first, in case if we recompress + * further an already recompressed page + */ + zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << + ZRAM_COMP_PRIORITY_BIT1); + zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); +} + +static inline u32 zram_get_priority(struct zram *zram, u32 index) +{ + u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; + + return prio & ZRAM_COMP_PRIORITY_MASK; +} + /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -1304,6 +1323,11 @@ static void zram_free_page(struct zram *zram, size_t index) atomic64_dec(&zram->stats.huge_pages); } + if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); + + zram_set_priority(zram, index, 0); + if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); free_block_bdev(zram, zram_get_element(zram, index)); @@ -1364,6 +1388,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, unsigned long handle; unsigned int size; void *src, *dst; + u32 prio; int ret; handle = zram_get_handle(zram, index); @@ -1380,8 +1405,10 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, size = zram_get_obj_size(zram, index); - if (size != PAGE_SIZE) - zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); + if (size != PAGE_SIZE) { + prio = zram_get_priority(zram, index); + zstrm = zcomp_stream_get(zram->comps[prio]); + } src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { @@ -1393,7 +1420,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zram->comps[prio]); } zs_unmap_object(zram->mem_pool, handle); return ret; @@ -1624,6 +1651,235 @@ out: return ret; } +#ifdef CONFIG_ZRAM_MULTI_COMP +/* + * This function will decompress (unless it's ZRAM_HUGE) the page and then + * attempt to compress it using provided compression algorithm priority + * (which is potentially more effective). + * + * Corresponding ZRAM slot should be locked. + */ +static int zram_recompress(struct zram *zram, u32 index, struct page *page, + u32 threshold, u32 prio, u32 prio_max) +{ + struct zcomp_strm *zstrm = NULL; + unsigned long handle_old; + unsigned long handle_new; + unsigned int comp_len_old; + unsigned int comp_len_new; + void *src, *dst; + int ret; + + handle_old = zram_get_handle(zram, index); + if (!handle_old) + return -EINVAL; + + comp_len_old = zram_get_obj_size(zram, index); + /* + * Do not recompress objects that are already "small enough". + */ + if (comp_len_old < threshold) + return 0; + + ret = zram_read_from_zspool(zram, page, index); + if (ret) + return ret; + + /* + * Iterate the secondary comp algorithms list (in order of priority) + * and try to recompress the page. + */ + for (; prio < prio_max; prio++) { + if (!zram->comps[prio]) + continue; + + /* + * Skip if the object is already re-compressed with a higher + * priority algorithm (or same algorithm). + */ + if (prio <= zram_get_priority(zram, index)) + continue; + + zstrm = zcomp_stream_get(zram->comps[prio]); + src = kmap_atomic(page); + ret = zcomp_compress(zstrm, src, &comp_len_new); + kunmap_atomic(src); + + if (ret) { + zcomp_stream_put(zram->comps[prio]); + return ret; + } + + /* Continue until we make progress */ + if (comp_len_new >= huge_class_size || + comp_len_new >= comp_len_old || + (threshold && comp_len_new >= threshold)) { + zcomp_stream_put(zram->comps[prio]); + continue; + } + + /* Recompression was successful so break out */ + break; + } + + /* + * We did not try to recompress, e.g. when we have only one + * secondary algorithm and the page is already recompressed + * using that algorithm + */ + if (!zstrm) + return 0; + + /* + * All secondary algorithms failed to re-compress the page in a way + * that would save memory, mark the object as incompressible so that + * we will not try to compress it again. + */ + if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) { + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + return 0; + } + + /* Successful recompression but above threshold */ + if (threshold && comp_len_new >= threshold) + return 0; + + /* + * No direct reclaim (slow path) for handle allocation and no + * re-compression attempt (unlike in __zram_bvec_write()) since + * we already have stored that object in zsmalloc. If we cannot + * alloc memory for recompressed object then we bail out and + * simply keep the old (existing) object in zsmalloc. + */ + handle_new = zs_malloc(zram->mem_pool, comp_len_new, + __GFP_KSWAPD_RECLAIM | + __GFP_NOWARN | + __GFP_HIGHMEM | + __GFP_MOVABLE); + if (IS_ERR_VALUE(handle_new)) { + zcomp_stream_put(zram->comps[prio]); + return PTR_ERR((void *)handle_new); + } + + dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); + memcpy(dst, zstrm->buffer, comp_len_new); + zcomp_stream_put(zram->comps[prio]); + + zs_unmap_object(zram->mem_pool, handle_new); + + zram_free_page(zram, index); + zram_set_handle(zram, index, handle_new); + zram_set_obj_size(zram, index, comp_len_new); + zram_set_priority(zram, index, prio); + + atomic64_add(comp_len_new, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); + + return 0; +} + +#define RECOMPRESS_IDLE (1 << 0) +#define RECOMPRESS_HUGE (1 << 1) + +static ssize_t recompress_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP; + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + char *args, *param, *val; + unsigned long index; + struct page *page; + ssize_t ret; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "type")) { + if (!strcmp(val, "idle")) + mode = RECOMPRESS_IDLE; + if (!strcmp(val, "huge")) + mode = RECOMPRESS_HUGE; + if (!strcmp(val, "huge_idle")) + mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; + continue; + } + + if (!strcmp(param, "threshold")) { + /* + * We will re-compress only idle objects equal or + * greater in size than watermark. + */ + ret = kstrtouint(val, 10, &threshold); + if (ret) + return ret; + continue; + } + } + + if (threshold >= PAGE_SIZE) + return -EINVAL; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + ret = -EINVAL; + goto release_init_lock; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto release_init_lock; + } + + ret = len; + for (index = 0; index < nr_pages; index++) { + int err = 0; + + zram_slot_lock(zram, index); + + if (!zram_allocated(zram, index)) + goto next; + + if (mode & RECOMPRESS_IDLE && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + + if (mode & RECOMPRESS_HUGE && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + err = zram_recompress(zram, index, page, threshold, + prio, ZRAM_MAX_COMPS); +next: + zram_slot_unlock(zram, index); + if (err) { + ret = err; + break; + } + + cond_resched(); + } + + __free_page(page); + +release_init_lock: + up_read(&zram->init_lock); + return ret; +} +#endif + /* * zram_bio_discard - handler on discard request * @index: physical block index in PAGE_SIZE units @@ -2003,6 +2259,7 @@ static DEVICE_ATTR_RW(writeback_limit_enable); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); +static DEVICE_ATTR_WO(recompress); #endif static struct attribute *zram_disk_attrs[] = { @@ -2029,6 +2286,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_debug_stat.attr, #ifdef CONFIG_ZRAM_MULTI_COMP &dev_attr_recomp_algorithm.attr, + &dev_attr_recompress.attr, #endif NULL, }; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 7a643c8c38ec..b80faae76835 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -40,6 +40,9 @@ */ #define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1) +/* Only 2 bits are allowed for comp priority index */ +#define ZRAM_COMP_PRIORITY_MASK 0x3 + /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { /* zram slot is locked */ @@ -49,6 +52,10 @@ enum zram_pageflags { ZRAM_UNDER_WB, /* page is under writeback */ ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ + ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */ + + ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */ + ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */ __NR_ZRAM_PAGEFLAGS, }; From 60e9b39ebec56467c36c3da76eee28083196cdf1 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:39 +0900 Subject: [PATCH 3030/4122] zram: add recompress flag to read_block_state() Add a new flag to zram block state that shows if the page was recompressed (using alternative compression algorithm). Link: https://lkml.kernel.org/r/20221109115047.2921851-6-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- Documentation/admin-guide/blockdev/zram.rst | 9 ++++++--- drivers/block/zram/zram_drv.c | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index c73b16930449..177a142c3146 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -411,9 +411,10 @@ pages of the process with*pagemap. If you enable the feature, you could see block state via /sys/kernel/debug/zram/zram0/block_state". The output is as follows:: - 300 75.033841 .wh. - 301 63.806904 s... - 302 63.806919 ..hi + 300 75.033841 .wh.. + 301 63.806904 s.... + 302 63.806919 ..hi. + 303 62.801919 ....r First column zram's block index. @@ -430,6 +431,8 @@ Third column huge page i: idle page + r: + recompressed page (secondary compression algorithm) First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 97300b3a83c3..ddbfa70ef9a3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -936,13 +936,14 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', - zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); + zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', + zram_get_priority(zram, index) ? 'r' : '.'); if (count <= copied) { zram_slot_unlock(zram, index); From 9fda785dbd14cfc7d874d00d2b007cb143aa48d0 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:40 +0900 Subject: [PATCH 3031/4122] zram: clarify writeback_store() comment Re-phrase writeback BIO error comment. Link: https://lkml.kernel.org/r/20221109115047.2921851-7-senozhatsky@chromium.org Reported-by: Andrew Morton Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index ddbfa70ef9a3..0ca0bf330d8f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -769,8 +769,12 @@ static ssize_t writeback_store(struct device *dev, zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); /* - * Return last IO error unless every IO were - * not suceeded. + * BIO errors are not fatal, we continue and simply + * attempt to writeback the remaining objects (pages). + * At the same time we need to signal user-space that + * some writes (at least one, but also could be all of + * them) were not successful and we do so by returning + * the most recent BIO error. */ ret = err; continue; From f24ee92cbe13242758635e654b2422dbf4912e4b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:41 +0900 Subject: [PATCH 3032/4122] zram: use IS_ERR_VALUE() to check for zs_malloc() errors Avoid typecasts that are needed for IS_ERR() and use IS_ERR_VALUE() instead. Link: https://lkml.kernel.org/r/20221109115047.2921851-8-senozhatsky@chromium.org Suggested-by: Andrew Morton Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0ca0bf330d8f..473fc5eb71a1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1542,19 +1542,19 @@ compress_again: * if we have a 'non-null' handle here then we are coming * from the slow path and handle has already been allocated. */ - if (IS_ERR((void *)handle)) + if (IS_ERR_VALUE(handle)) handle = zs_malloc(zram->mem_pool, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | __GFP_MOVABLE); - if (IS_ERR((void *)handle)) { + if (IS_ERR_VALUE(handle)) { zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); atomic64_inc(&zram->stats.writestall); handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); - if (IS_ERR((void *)handle)) + if (IS_ERR_VALUE(handle)) return PTR_ERR((void *)handle); if (comp_len != PAGE_SIZE) From 7c2af309abd24ff4e313246bf9b68f398d95c871 Mon Sep 17 00:00:00 2001 From: Alexey Romanov Date: Wed, 9 Nov 2022 20:50:42 +0900 Subject: [PATCH 3033/4122] zram: add size class equals check into recompression It makes no sense for us to recompress the object if it will be in the same size class. We anyway don't get any memory gain. But, at the same time, we get a CPU time overhead when inserting this object into zspage and decompressing it afterwards. [senozhatsky: rebased and fixed conflicts] Link: https://lkml.kernel.org/r/20221109115047.2921851-9-senozhatsky@chromium.org Signed-off-by: Alexey Romanov Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 11 ++++++++++- include/linux/zsmalloc.h | 2 ++ mm/zsmalloc.c | 21 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 473fc5eb71a1..66659f16f6c8 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1672,6 +1672,8 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, unsigned long handle_new; unsigned int comp_len_old; unsigned int comp_len_new; + unsigned int class_index_old; + unsigned int class_index_new; void *src, *dst; int ret; @@ -1690,6 +1692,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (ret) return ret; + class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); /* * Iterate the secondary comp algorithms list (in order of priority) * and try to recompress the page. @@ -1715,9 +1718,13 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, return ret; } + class_index_new = zs_lookup_class_index(zram->mem_pool, + comp_len_new); + /* Continue until we make progress */ if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old || + class_index_new >= class_index_old || (threshold && comp_len_new >= threshold)) { zcomp_stream_put(zram->comps[prio]); continue; @@ -1740,7 +1747,9 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, * that would save memory, mark the object as incompressible so that * we will not try to compress it again. */ - if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) { + if (comp_len_new >= huge_class_size || + comp_len_new >= comp_len_old || + class_index_new >= class_index_old) { zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 2a430e713ce5..a48cd0ffe57d 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -55,5 +55,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle); unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); +unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); + void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b52b7bb88b52..78feda34ad9a 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1205,6 +1205,27 @@ static bool zspage_full(struct size_class *class, struct zspage *zspage) return get_zspage_inuse(zspage) == class->objs_per_zspage; } +/** + * zs_lookup_class_index() - Returns index of the zsmalloc &size_class + * that hold objects of the provided size. + * @pool: zsmalloc pool to use + * @size: object size + * + * Context: Any context. + * + * Return: the index of the zsmalloc &size_class that hold objects of the + * provided size. + */ +unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size) +{ + struct size_class *class; + + class = pool->size_class[get_size_class_index(size)]; + + return class->index; +} +EXPORT_SYMBOL_GPL(zs_lookup_class_index); + unsigned long zs_get_total_pages(struct zs_pool *pool) { return atomic_long_read(&pool->pages_allocated); From 4942cf6ad07c487d24112ffbb27362f4e6b409b8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:43 +0900 Subject: [PATCH 3034/4122] zram: remove redundant checks from zram_recompress() Size class index comparison is powerful enough so we can remove object size comparisons. Link: https://lkml.kernel.org/r/20221109115047.2921851-10-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 66659f16f6c8..72beb33366fb 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1722,9 +1722,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, comp_len_new); /* Continue until we make progress */ - if (comp_len_new >= huge_class_size || - comp_len_new >= comp_len_old || - class_index_new >= class_index_old || + if (class_index_new >= class_index_old || (threshold && comp_len_new >= threshold)) { zcomp_stream_put(zram->comps[prio]); continue; @@ -1747,9 +1745,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, * that would save memory, mark the object as incompressible so that * we will not try to compress it again. */ - if (comp_len_new >= huge_class_size || - comp_len_new >= comp_len_old || - class_index_new >= class_index_old) { + if (class_index_new >= class_index_old) { zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } From a55cf9648d3de486ccf0eca980a02f0faff8ec45 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:44 +0900 Subject: [PATCH 3035/4122] zram: add algo parameter support to zram_recompress() Recompression iterates through all the registered secondary compression algorithms in order of their priorities so that we have higher chances of finding the algorithm that compresses a particular page. This, however, may not always be best approach and sometimes we may want to limit recompression to only one particular algorithm. For instance, when a higher priority algorithm uses too much power and device has a relatively low battery level we may want to limit recompression to use only a lower priority algorithm, which uses less power. Introduce algo= parameter support to recompression sysfs knob so that user-sapce can request recompression with particular algorithm only: echo "type=idle algo=zstd" > /sys/block/zramX/recompress Link: https://lkml.kernel.org/r/20221109115047.2921851-11-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 54 +++++++++++++++++++++++++++++------ drivers/block/zram/zram_drv.h | 1 + 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 72beb33366fb..798c421fdd36 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1674,6 +1674,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, unsigned int comp_len_new; unsigned int class_index_old; unsigned int class_index_new; + u32 num_recomps = 0; void *src, *dst; int ret; @@ -1708,6 +1709,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (prio <= zram_get_priority(zram, index)) continue; + num_recomps++; zstrm = zcomp_stream_get(zram->comps[prio]); src = kmap_atomic(page); ret = zcomp_compress(zstrm, src, &comp_len_new); @@ -1740,13 +1742,19 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (!zstrm) return 0; - /* - * All secondary algorithms failed to re-compress the page in a way - * that would save memory, mark the object as incompressible so that - * we will not try to compress it again. - */ if (class_index_new >= class_index_old) { - zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + /* + * Secondary algorithms failed to re-compress the page + * in a way that would save memory, mark the object as + * incompressible so that we will not try to compress + * it again. + * + * We need to make sure that all secondary algorithms have + * failed, so we test if the number of recompressions matches + * the number of active secondary algorithms. + */ + if (num_recomps == zram->num_active_comps - 1) + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } @@ -1795,10 +1803,11 @@ static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; struct zram *zram = dev_to_zram(dev); - u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP; unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - char *args, *param, *val; + char *args, *param, *val, *algo = NULL; + u32 mode = 0, threshold = 0; unsigned long index; struct page *page; ssize_t ret; @@ -1830,6 +1839,11 @@ static ssize_t recompress_store(struct device *dev, return ret; continue; } + + if (!strcmp(param, "algo")) { + algo = val; + continue; + } } if (threshold >= PAGE_SIZE) @@ -1841,6 +1855,26 @@ static ssize_t recompress_store(struct device *dev, goto release_init_lock; } + if (algo) { + bool found = false; + + for (; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; + + if (!strcmp(zram->comp_algs[prio], algo)) { + prio_max = min(prio + 1, ZRAM_MAX_COMPS); + found = true; + break; + } + } + + if (!found) { + ret = -EINVAL; + goto release_init_lock; + } + } + page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; @@ -1871,7 +1905,7 @@ static ssize_t recompress_store(struct device *dev, goto next; err = zram_recompress(zram, index, page, threshold, - prio, ZRAM_MAX_COMPS); + prio, prio_max); next: zram_slot_unlock(zram, index); if (err) { @@ -2107,6 +2141,7 @@ static void zram_destroy_comps(struct zram *zram) if (!comp) continue; zcomp_destroy(comp); + zram->num_active_comps--; } } @@ -2174,6 +2209,7 @@ static ssize_t disksize_store(struct device *dev, } zram->comps[prio] = comp; + zram->num_active_comps++; } zram->disksize = disksize; set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b80faae76835..473325415a74 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -125,6 +125,7 @@ struct zram { */ u64 disksize; /* bytes */ const char *comp_algs[ZRAM_MAX_COMPS]; + s8 num_active_comps; /* * zram is claimed so open request will be failed */ From 443dd798062c1549e790539e572cbda4b7a8df30 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:45 +0900 Subject: [PATCH 3036/4122] documentation: add zram recompression documentation Document user-space visible device attributes that are enabled by ZRAM_MULTI_COMP. Link: https://lkml.kernel.org/r/20221109115047.2921851-12-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- Documentation/admin-guide/blockdev/zram.rst | 81 +++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 177a142c3146..d898b7ace33d 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -401,6 +401,87 @@ budget in next setting is user's job. If admin wants to measure writeback count in a certain period, they could know it via /sys/block/zram0/bd_stat's 3rd column. +recompression +------------- + +With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative +(secondary) compression algorithms. The basic idea is that alternative +compression algorithm can provide better compression ratio at a price of +(potentially) slower compression/decompression speeds. Alternative compression +algorithm can, for example, be more successful compressing huge pages (those +that default algorithm failed to compress). Another application is idle pages +recompression - pages that are cold and sit in the memory can be recompressed +using more effective algorithm and, hence, reduce zsmalloc memory usage. + +With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms: +one primary and up to 3 secondary ones. Primary zram compressor is explained +in "3) Select compression algorithm", secondary algorithms are configured +using recomp_algorithm device attribute. + +Example::: + + #show supported recompression algorithms + cat /sys/block/zramX/recomp_algorithm + #1: lzo lzo-rle lz4 lz4hc [zstd] + #2: lzo lzo-rle lz4 [lz4hc] zstd + +Alternative compression algorithms are sorted by priority. In the example +above, zstd is used as the first alternative algorithm, which has priority +of 1, while lz4hc is configured as a compression algorithm with priority 2. +Alternative compression algorithm's priority is provided during algorithms +configuration::: + + #select zstd recompression algorithm, priority 1 + echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm + + #select deflate recompression algorithm, priority 2 + echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm + +Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress, +which controls recompression. + +Examples::: + + #IDLE pages recompression is activated by `idle` mode + echo "type=idle" > /sys/block/zramX/recompress + + #HUGE pages recompression is activated by `huge` mode + echo "type=huge" > /sys/block/zram0/recompress + + #HUGE_IDLE pages recompression is activated by `huge_idle` mode + echo "type=huge_idle" > /sys/block/zramX/recompress + +The number of idle pages can be significant, so user-space can pass a size +threshold (in bytes) to the recompress knob: zram will recompress only pages +of equal or greater size::: + + #recompress all pages larger than 3000 bytes + echo "threshold=3000" > /sys/block/zramX/recompress + + #recompress idle pages larger than 2000 bytes + echo "type=idle threshold=2000" > /sys/block/zramX/recompress + +Recompression of idle pages requires memory tracking. + +During re-compression for every page, that matches re-compression criteria, +ZRAM iterates the list of registered alternative compression algorithms in +order of their priorities. ZRAM stops either when re-compression was +successful (re-compressed object is smaller in size than the original one) +and matches re-compression criteria (e.g. size threshold) or when there are +no secondary algorithms left to try. If none of the secondary algorithms can +successfully re-compressed the page such a page is marked as incompressible, +so ZRAM will not attempt to re-compress it in the future. + +This re-compression behaviour, when it iterates through the list of +registered compression algorithms, increases our chances of finding the +algorithm that successfully compresses a particular page. Sometimes, however, +it is convenient (and sometimes even necessary) to limit recompression to +only one particular algorithm so that it will not try any other algorithms. +This can be achieved by providing a algo=NAME parameter::: + + #use zstd algorithm only (if registered) + echo "type=huge algo=zstd" > /sys/block/zramX/recompress + memory tracking =============== From b46f9ea3cb351587b2cfc68f7211f7a7cc5b6673 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:46 +0900 Subject: [PATCH 3037/4122] zram: add incompressible writeback Add support for incompressible pages writeback: echo incompressible > /sys/block/zramX/writeback Link: https://lkml.kernel.org/r/20221109115047.2921851-13-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- Documentation/admin-guide/blockdev/zram.rst | 7 ++++++- drivers/block/zram/zram_drv.c | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index d898b7ace33d..f14c8c2e42f3 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -348,8 +348,13 @@ this can be accomplished with:: echo huge_idle > /sys/block/zramX/writeback +If a user chooses to writeback only incompressible pages (pages that none of +algorithms can compress) this can be accomplished with:: + + echo incompressible > /sys/block/zramX/writeback + If an admin wants to write a specific page in zram device to the backing device, -they could write a page index into the interface. +they could write a page index into the interface:: echo "page_index=1251" > /sys/block/zramX/writeback diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 798c421fdd36..25b7ff2b56bf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -645,10 +645,10 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, #define PAGE_WB_SIG "page_index=" -#define PAGE_WRITEBACK 0 -#define HUGE_WRITEBACK (1<<0) -#define IDLE_WRITEBACK (1<<1) - +#define PAGE_WRITEBACK 0 +#define HUGE_WRITEBACK (1<<0) +#define IDLE_WRITEBACK (1<<1) +#define INCOMPRESSIBLE_WRITEBACK (1<<2) static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -669,6 +669,8 @@ static ssize_t writeback_store(struct device *dev, mode = HUGE_WRITEBACK; else if (sysfs_streq(buf, "huge_idle")) mode = IDLE_WRITEBACK | HUGE_WRITEBACK; + else if (sysfs_streq(buf, "incompressible")) + mode = INCOMPRESSIBLE_WRITEBACK; else { if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) return -EINVAL; @@ -731,11 +733,15 @@ static ssize_t writeback_store(struct device *dev, goto next; if (mode & IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) + !zram_test_flag(zram, index, ZRAM_IDLE)) goto next; if (mode & HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) + !zram_test_flag(zram, index, ZRAM_HUGE)) goto next; + if (mode & INCOMPRESSIBLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + /* * Clearing ZRAM_UNDER_WB is duty of caller. * IOW, zram_free_page never clear it. From 77db7bb56bd711586243924a5582727f7a93fb7f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 9 Nov 2022 20:50:47 +0900 Subject: [PATCH 3038/4122] zram: add incompressible flag to read_block_state() Add a new flag to zram block state that shows if the page is incompressible: that none of the algorithm (including secondary ones) could compress it. Link: https://lkml.kernel.org/r/20221109115047.2921851-14-senozhatsky@chromium.org Suggested-by: Minchan Kim Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Alexey Romanov Cc: Nhat Pham Cc: Nitin Gupta Cc: Suleiman Souhlal Signed-off-by: Andrew Morton --- Documentation/admin-guide/blockdev/zram.rst | 11 +++++++---- drivers/block/zram/zram_drv.c | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index f14c8c2e42f3..e4551579cb12 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -497,10 +497,11 @@ pages of the process with*pagemap. If you enable the feature, you could see block state via /sys/kernel/debug/zram/zram0/block_state". The output is as follows:: - 300 75.033841 .wh.. - 301 63.806904 s.... - 302 63.806919 ..hi. - 303 62.801919 ....r + 300 75.033841 .wh... + 301 63.806904 s..... + 302 63.806919 ..hi.. + 303 62.801919 ....r. + 304 146.781902 ..hi.n First column zram's block index. @@ -519,6 +520,8 @@ Third column idle page r: recompressed page (secondary compression algorithm) + n: + none (including secondary) of algorithms could compress it First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 25b7ff2b56bf..9d33801e8ba8 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -946,14 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c%c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', - zram_get_priority(zram, index) ? 'r' : '.'); + zram_get_priority(zram, index) ? 'r' : '.', + zram_test_flag(zram, index, + ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); if (count <= copied) { zram_slot_unlock(zram, index); From c959a0e8de2c4db6ca6cc8f490223e2e1e58934b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 15 Nov 2022 11:03:14 +0900 Subject: [PATCH 3039/4122] Docs/ABI/zram: document zram recompress sysfs knobs Document zram re-compression sysfs knobs. Link: https://lkml.kernel.org/r/20221115020314.386235-1-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-block-zram | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 14b2bf2e5105..628a00fb20a9 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -137,3 +137,17 @@ Description: The writeback_limit file is read-write and specifies the maximum amount of writeback ZRAM can do. The limit could be changed in run time. + +What: /sys/block/zram/recomp_algorithm +Date: November 2022 +Contact: Sergey Senozhatsky +Description: + The recomp_algorithm file is read-write and allows to set + or show secondary compression algorithms. + +What: /sys/block/zram/recompress +Date: November 2022 +Contact: Sergey Senozhatsky +Description: + The recompress file is write-only and triggers re-compression + with secondary compression algorithms. From c66b6ead74ffdad8659eb829468343a88afc2f2c Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Wed, 9 Nov 2022 14:56:46 +0530 Subject: [PATCH 3040/4122] mm/kfence: remove hung_task cruft commit fdf756f71271 ("sched: Fix more TASK_state comparisons") makes hung_task not to monitor TASK_IDLE tasks. The special handling to workaround hung_task warnings is not required anymore. Link: https://lkml.kernel.org/r/1667986006-25420-1-git-send-email-quic_pkondeti@quicinc.com Signed-off-by: Pavankumar Kondeti Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- mm/kfence/core.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 141788858b70..08f5bd6fc36d 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -799,16 +798,7 @@ static void toggle_allocation_gate(struct work_struct *work) /* Enable static key, and await allocation to happen. */ static_branch_enable(&kfence_allocation_key); - if (sysctl_hung_task_timeout_secs) { - /* - * During low activity with no allocations we might wait a - * while; let's avoid the hung task warning. - */ - wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate), - sysctl_hung_task_timeout_secs * HZ / 2); - } else { - wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate)); - } + wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate)); /* Disable static key and reset timer. */ static_branch_disable(&kfence_allocation_key); From 16fd6b31dd9b24acf83d439a73a41c4138199424 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 9 Nov 2022 16:40:27 +0800 Subject: [PATCH 3041/4122] Revert "mm: migration: fix the FOLL_GET failure on following huge page" Revert commit 831568214883 ("mm: migration: fix the FOLL_GET failure on following huge page"), since after commit 1a6baaa0db73 ("s390/hugetlb: switch to generic version of follow_huge_pud()") and commit 57a196a58421 ("hugetlb: simplify hugetlb handling in follow_page_mask") were merged, now all the following huge page routines can support FOLL_GET operation. Link: https://lkml.kernel.org/r/496786039852aba90ffa68f10d0df3f4236a990b.1667983080.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: Haiyue Wang Cc: Baolin Wang Cc: "Huang, Ying" Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/migrate.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 4aea647a0180..4eccf4e1da2c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1899,7 +1899,6 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, for (i = 0; i < nr_pages; i++) { unsigned long addr = (unsigned long)(*pages); - unsigned int foll_flags = FOLL_DUMP; struct vm_area_struct *vma; struct page *page; int err = -EFAULT; @@ -1908,12 +1907,8 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, if (!vma) goto set_status; - /* Not all huge page follow APIs support 'FOLL_GET' */ - if (!is_vm_hugetlb_page(vma)) - foll_flags |= FOLL_GET; - /* FOLL_DUMP to ignore special (like zero) pages */ - page = follow_page(vma, addr, foll_flags); + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); err = PTR_ERR(page); if (IS_ERR(page)) @@ -1926,8 +1921,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, if (!is_zone_device_page(page)) err = page_to_nid(page); - if (foll_flags & FOLL_GET) - put_page(page); + put_page(page); set_status: *status = err; From 49f51859221a3dfee27488eaeaff800459cac6a9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 9 Nov 2022 09:23:47 +0800 Subject: [PATCH 3042/4122] migrate: convert unmap_and_move() to use folios Patch series "migrate: convert migrate_pages()/unmap_and_move() to use folios", v2. The conversion is quite straightforward, just replace the page API to the corresponding folio API. migrate_pages() and unmap_and_move() mostly work with folios (head pages) only. This patch (of 2): Quite straightforward, the page functions are converted to corresponding folio functions. Same for comments. Link: https://lkml.kernel.org/r/20221109012348.93849-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20221109012348.93849-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Zi Yan Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Baolin Wang Cc: Oscar Salvador Signed-off-by: Andrew Morton --- mm/migrate.c | 54 ++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 4eccf4e1da2c..e41a3cd24a59 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1150,79 +1150,79 @@ out: } /* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. + * Obtain the lock on folio, remove all ptes and migrate the folio + * to the newly allocated folio in dst. */ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, - unsigned long private, struct page *page, + unsigned long private, struct folio *src, int force, enum migrate_mode mode, enum migrate_reason reason, struct list_head *ret) { - struct folio *dst, *src = page_folio(page); + struct folio *dst; int rc = MIGRATEPAGE_SUCCESS; struct page *newpage = NULL; - if (!thp_migration_supported() && PageTransHuge(page)) + if (!thp_migration_supported() && folio_test_transhuge(src)) return -ENOSYS; - if (page_count(page) == 1) { - /* Page was freed from under us. So we are done. */ - ClearPageActive(page); - ClearPageUnevictable(page); + if (folio_ref_count(src) == 1) { + /* Folio was freed from under us. So we are done. */ + folio_clear_active(src); + folio_clear_unevictable(src); /* free_pages_prepare() will clear PG_isolated. */ goto out; } - newpage = get_new_page(page, private); + newpage = get_new_page(&src->page, private); if (!newpage) return -ENOMEM; dst = page_folio(newpage); - newpage->private = 0; + dst->private = 0; rc = __unmap_and_move(src, dst, force, mode); if (rc == MIGRATEPAGE_SUCCESS) - set_page_owner_migrate_reason(newpage, reason); + set_page_owner_migrate_reason(&dst->page, reason); out: if (rc != -EAGAIN) { /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been + * A folio that has been migrated has all references + * removed and will be freed. A folio that has not been * migrated will have kept its references and be restored. */ - list_del(&page->lru); + list_del(&src->lru); } /* * If migration is successful, releases reference grabbed during - * isolation. Otherwise, restore the page to right list unless + * isolation. Otherwise, restore the folio to right list unless * we want to retry. */ if (rc == MIGRATEPAGE_SUCCESS) { /* - * Compaction can migrate also non-LRU pages which are + * Compaction can migrate also non-LRU folios which are * not accounted to NR_ISOLATED_*. They can be recognized - * as __PageMovable + * as __folio_test_movable */ - if (likely(!__PageMovable(page))) - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -thp_nr_pages(page)); + if (likely(!__folio_test_movable(src))) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); if (reason != MR_MEMORY_FAILURE) /* - * We release the page in page_handle_poison. + * We release the folio in page_handle_poison. */ - put_page(page); + folio_put(src); } else { if (rc != -EAGAIN) - list_add_tail(&page->lru, ret); + list_add_tail(&src->lru, ret); if (put_new_page) - put_new_page(newpage, private); + put_new_page(&dst->page, private); else - put_page(newpage); + folio_put(dst); } return rc; @@ -1459,7 +1459,7 @@ thp_subpage_migration: &ret_pages); else rc = unmap_and_move(get_new_page, put_new_page, - private, page, pass > 2, mode, + private, page_folio(page), pass > 2, mode, reason, &ret_pages); /* * The rules are: From eaec4e639f11413ce75fbf38affd1aa5c40979e9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 9 Nov 2022 09:23:48 +0800 Subject: [PATCH 3043/4122] migrate: convert migrate_pages() to use folios Quite straightforward, the page functions are converted to corresponding folio functions. Same for comments. THP specific code are converted to be large folio. Link: https://lkml.kernel.org/r/20221109012348.93849-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/migrate.c | 210 +++++++++++++++++++++++++++------------------------ 1 file changed, 112 insertions(+), 98 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index e41a3cd24a59..4aa3b6d4f67c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1373,231 +1373,245 @@ out: return rc; } -static inline int try_split_thp(struct page *page, struct list_head *split_pages) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) { int rc; - lock_page(page); - rc = split_huge_page_to_list(page, split_pages); - unlock_page(page); + folio_lock(folio); + rc = split_folio_to_list(folio, split_folios); + folio_unlock(folio); if (!rc) - list_move_tail(&page->lru, split_pages); + list_move_tail(&folio->lru, split_folios); return rc; } /* - * migrate_pages - migrate the pages specified in a list, to the free pages + * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration * - * @from: The list of pages to be migrated. - * @get_new_page: The function used to allocate free pages to be used - * as the target of the page migration. - * @put_new_page: The function used to free target pages if migration + * @from: The list of folios to be migrated. + * @get_new_page: The function used to allocate free folios to be used + * as the target of the folio migration. + * @put_new_page: The function used to free target folios if migration * fails, or NULL if no special handling is necessary. * @private: Private data to be passed on to get_new_page() * @mode: The migration mode that specifies the constraints for - * page migration, if any. - * @reason: The reason for page migration. - * @ret_succeeded: Set to the number of normal pages migrated successfully if + * folio migration, if any. + * @reason: The reason for folio migration. + * @ret_succeeded: Set to the number of folios migrated successfully if * the caller passes a non-NULL pointer. * - * The function returns after 10 attempts or if no pages are movable any more - * because the list has become empty or no retryable pages exist any more. - * It is caller's responsibility to call putback_movable_pages() to return pages + * The function returns after 10 attempts or if no folios are movable any more + * because the list has become empty or no retryable folios exist any more. + * It is caller's responsibility to call putback_movable_pages() to return folios * to the LRU or free list only if ret != 0. * - * Returns the number of {normal page, THP, hugetlb} that were not migrated, or - * an error code. The number of THP splits will be considered as the number of - * non-migrated THP, no matter how many subpages of the THP are migrated successfully. + * Returns the number of {normal folio, large folio, hugetlb} that were not + * migrated, or an error code. The number of large folio splits will be + * considered as the number of non-migrated large folio, no matter how many + * split folios of the large folio are migrated successfully. */ int migrate_pages(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int retry = 1; + int large_retry = 1; int thp_retry = 1; int nr_failed = 0; int nr_failed_pages = 0; int nr_retry_pages = 0; int nr_succeeded = 0; int nr_thp_succeeded = 0; + int nr_large_failed = 0; int nr_thp_failed = 0; int nr_thp_split = 0; int pass = 0; + bool is_large = false; bool is_thp = false; - struct page *page; - struct page *page2; - int rc, nr_subpages; - LIST_HEAD(ret_pages); - LIST_HEAD(thp_split_pages); + struct folio *folio, *folio2; + int rc, nr_pages; + LIST_HEAD(ret_folios); + LIST_HEAD(split_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_subpage_counting = false; + bool no_split_folio_counting = false; trace_mm_migrate_pages_start(mode, reason); -thp_subpage_migration: - for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { +split_folio_migration: + for (pass = 0; pass < 10 && (retry || large_retry); pass++) { retry = 0; + large_retry = 0; thp_retry = 0; nr_retry_pages = 0; - list_for_each_entry_safe(page, page2, from, lru) { + list_for_each_entry_safe(folio, folio2, from, lru) { /* - * THP statistics is based on the source huge page. - * Capture required information that might get lost - * during migration. + * Large folio statistics is based on the source large + * folio. Capture required information that might get + * lost during migration. */ - is_thp = PageTransHuge(page) && !PageHuge(page); - nr_subpages = compound_nr(page); + is_large = folio_test_large(folio) && !folio_test_hugetlb(folio); + is_thp = is_large && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); cond_resched(); - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) rc = unmap_and_move_huge_page(get_new_page, - put_new_page, private, page, - pass > 2, mode, reason, - &ret_pages); + put_new_page, private, + &folio->page, pass > 2, mode, + reason, + &ret_folios); else rc = unmap_and_move(get_new_page, put_new_page, - private, page_folio(page), pass > 2, mode, - reason, &ret_pages); + private, folio, pass > 2, mode, + reason, &ret_folios); /* * The rules are: - * Success: non hugetlb page will be freed, hugetlb - * page will be put back + * Success: non hugetlb folio will be freed, hugetlb + * folio will be put back * -EAGAIN: stay on the from list * -ENOMEM: stay on the from list * -ENOSYS: stay on the from list - * Other errno: put on ret_pages list then splice to + * Other errno: put on ret_folios list then splice to * from list */ switch(rc) { /* - * THP migration might be unsupported or the - * allocation could've failed so we should - * retry on the same page with the THP split - * to base pages. + * Large folio migration might be unsupported or + * the allocation could've failed so we should retry + * on the same folio with the large folio split + * to normal folios. * - * Sub-pages are put in thp_split_pages, and + * Split folios are put in split_folios, and * we will migrate them after the rest of the * list is processed. */ case -ENOSYS: - /* THP migration is unsupported */ - if (is_thp) { - nr_thp_failed++; - if (!try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; + /* Large folio migration is unsupported */ + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + if (!try_split_folio(folio, &split_folios)) { + nr_thp_split += is_thp; break; } /* Hugetlb migration is unsupported */ - } else if (!no_subpage_counting) { + } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_subpages; - list_move_tail(&page->lru, &ret_pages); + nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, &ret_folios); break; case -ENOMEM: /* * When memory is low, don't bother to try to migrate - * other pages, just exit. + * other folios, just exit. */ - if (is_thp) { - nr_thp_failed++; - /* THP NUMA faulting doesn't split THP to retry. */ + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + /* Large folio NUMA faulting doesn't split to retry. */ if (!nosplit) { - int ret = try_split_thp(page, &thp_split_pages); + int ret = try_split_folio(folio, &split_folios); if (!ret) { - nr_thp_split++; + nr_thp_split += is_thp; break; } else if (reason == MR_LONGTERM_PIN && ret == -EAGAIN) { /* - * Try again to split THP to mitigate - * the failure of longterm pinning. + * Try again to split large folio to + * mitigate the failure of longterm pinning. */ - thp_retry++; - nr_retry_pages += nr_subpages; + large_retry++; + thp_retry += is_thp; + nr_retry_pages += nr_pages; break; } } - } else if (!no_subpage_counting) { + } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_subpages + nr_retry_pages; + nr_failed_pages += nr_pages + nr_retry_pages; /* - * There might be some subpages of fail-to-migrate THPs - * left in thp_split_pages list. Move them back to migration + * There might be some split folios of fail-to-migrate large + * folios left in split_folios list. Move them back to migration * list so that they could be put back to the right list by - * the caller otherwise the page refcnt will be leaked. + * the caller otherwise the folio refcnt will be leaked. */ - list_splice_init(&thp_split_pages, from); + list_splice_init(&split_folios, from); /* nr_failed isn't updated for not used */ + nr_large_failed += large_retry; nr_thp_failed += thp_retry; goto out; case -EAGAIN: - if (is_thp) - thp_retry++; - else if (!no_subpage_counting) + if (is_large) { + large_retry++; + thp_retry += is_thp; + } else if (!no_split_folio_counting) { retry++; - nr_retry_pages += nr_subpages; + } + nr_retry_pages += nr_pages; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_subpages; - if (is_thp) - nr_thp_succeeded++; + nr_succeeded += nr_pages; + nr_thp_succeeded += is_thp; break; default: /* * Permanent failure (-EBUSY, etc.): - * unlike -EAGAIN case, the failed page is - * removed from migration page list and not + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not * retried in the next outer loop. */ - if (is_thp) - nr_thp_failed++; - else if (!no_subpage_counting) + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + } else if (!no_split_folio_counting) { nr_failed++; + } - nr_failed_pages += nr_subpages; + nr_failed_pages += nr_pages; break; } } } nr_failed += retry; + nr_large_failed += large_retry; nr_thp_failed += thp_retry; nr_failed_pages += nr_retry_pages; /* - * Try to migrate subpages of fail-to-migrate THPs, no nr_failed - * counting in this round, since all subpages of a THP is counted - * as 1 failure in the first round. + * Try to migrate split folios of fail-to-migrate large folios, no + * nr_failed counting in this round, since all split folios of a + * large folio is counted as 1 failure in the first round. */ - if (!list_empty(&thp_split_pages)) { + if (!list_empty(&split_folios)) { /* - * Move non-migrated pages (after 10 retries) to ret_pages + * Move non-migrated folios (after 10 retries) to ret_folios * to avoid migrating them again. */ - list_splice_init(from, &ret_pages); - list_splice_init(&thp_split_pages, from); - no_subpage_counting = true; + list_splice_init(from, &ret_folios); + list_splice_init(&split_folios, from); + no_split_folio_counting = true; retry = 1; - goto thp_subpage_migration; + goto split_folio_migration; } - rc = nr_failed + nr_thp_failed; + rc = nr_failed + nr_large_failed; out: /* - * Put the permanent failure page back to migration list, they + * Put the permanent failure folio back to migration list, they * will be put back to the right list by the caller. */ - list_splice(&ret_pages, from); + list_splice(&ret_folios, from); /* - * Return 0 in case all subpages of fail-to-migrate THPs are - * migrated successfully. + * Return 0 in case all split folios of fail-to-migrate large folios + * are migrated successfully. */ if (list_empty(from)) rc = 0; From 4a625ceee8a0ab0273534cb6b432ce6b331db5ee Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 10 Nov 2022 07:07:51 +0400 Subject: [PATCH 3044/4122] mm/demotion: fix NULL vs IS_ERR checking in memory_tier_init alloc_memory_type() returns error pointers on error instead of NULL. Use IS_ERR() to check the return value to fix this. Link: https://lkml.kernel.org/r/20221110030751.1627266-1-linmq006@gmail.com Fixes: 7b88bda3761b ("mm/demotion/dax/kmem: set node's abstract distance to MEMTIER_DEFAULT_DAX_ADISTANCE") Signed-off-by: Miaoqian Lin Reviewed-by: "Huang, Ying" Cc: Aneesh Kumar K.V Cc: Wei Xu Signed-off-by: Andrew Morton --- mm/memory-tiers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 939e200c283b..c734658c6242 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -645,7 +645,7 @@ static int __init memory_tier_init(void) * than default DRAM tier. */ default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM); - if (!default_dram_type) + if (IS_ERR(default_dram_type)) panic("%s() failed to allocate default DRAM tier\n", __func__); /* From 9b34a307f39497198645de5e43f3f00b5e873249 Mon Sep 17 00:00:00 2001 From: Jian Wen Date: Fri, 11 Nov 2022 11:46:39 +0800 Subject: [PATCH 3045/4122] docs: admin-guide: cgroup-v1: update description of inactive_file MADV_FREE pages have been moved into the LRU_INACTIVE_FILE list by commit f7ad2a6cb9f7 ("mm: move MADV_FREE pages into LRU_INACTIVE_FILE list"). Link: https://lkml.kernel.org/r/20221111034639.3593380-1-wenjian1@xiaomi.com Signed-off-by: Jian Wen Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v1/memory.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 5b86245450bd..60370f2c67b9 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -543,7 +543,8 @@ inactive_anon # of bytes of anonymous and swap cache memory on inactive LRU list. active_anon # of bytes of anonymous and swap cache memory on active LRU list. -inactive_file # of bytes of file-backed memory on inactive LRU list. +inactive_file # of bytes of file-backed memory and MADV_FREE anonymous memory( + LazyFree pages) on inactive LRU list. active_file # of bytes of file-backed memory on active LRU list. unevictable # of bytes of memory that cannot be reclaimed (mlocked etc). =============== =============================================================== From 25e9fa22fbfec1e58c955d2670bf9a18f4ebe9ef Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Tue, 15 Nov 2022 01:14:26 +0800 Subject: [PATCH 3046/4122] mm/kmemleak.c: fix a comment I noticed a typo in a code comment and I fixed it. Link: https://lkml.kernel.org/r/20221114171426.91745-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Yixuan Cao Signed-off-by: Andrew Morton --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 646e2979641f..267332904354 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1461,7 +1461,7 @@ static void scan_gray_list(void) } /* - * Conditionally call resched() in a object iteration loop while making sure + * Conditionally call resched() in an object iteration loop while making sure * that the given object won't go away without RCU read lock by performing a * get_object() if !pinned. * From c2da319c2e2789dccb20fdafe520ac61c9df84f7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 13 Nov 2022 19:04:47 -0500 Subject: [PATCH 3047/4122] mm/uffd: sanity check write bit for uffd-wp protected ptes Let's add one sanity check for CONFIG_DEBUG_VM on the write bit in whatever chance we have when walking through the pgtables. It can bring the error earlier even before the app notices the data was corrupted on the snapshot. Also it helps us to identify this is a wrong pgtable setup, so hopefully a great information to have for debugging too. Link: https://lkml.kernel.org/r/20221114000447.1681003-3-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Alistair Popple Cc: Axel Rasmussen Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 286a71810f9e..0564edd24ffb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -292,7 +292,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { - return pte_flags(pte) & _PAGE_UFFD_WP; + bool wp = pte_flags(pte) & _PAGE_UFFD_WP; + +#ifdef CONFIG_DEBUG_VM + /* + * Having write bit for wr-protect-marked present ptes is fatal, + * because it means the uffd-wp bit will be ignored and write will + * just go through. + * + * Use any chance of pgtable walking to verify this (e.g., when + * page swapped out or being migrated for all purposes). It means + * something is already wrong. Tell the admin even before the + * process crashes. We also nail it with wrong pgtable setup. + */ + WARN_ON_ONCE(wp && pte_write(pte)); +#endif + + return wp; } static inline pte_t pte_mkuffd_wp(pte_t pte) From 369258ce41c6d7663a7b6d509356fecad577378d Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 14 Nov 2022 15:55:07 -0800 Subject: [PATCH 3048/4122] hugetlb: remove duplicate mmu notifications The common hugetlb unmap routine __unmap_hugepage_range performs mmu notification calls. However, in the case where __unmap_hugepage_range is called via __unmap_hugepage_range_final, mmu notification calls are performed earlier in other calling routines. Remove mmu notification calls from __unmap_hugepage_range. Add notification calls to the only other caller: unmap_hugepage_range. unmap_hugepage_range is called for truncation and hole punch, so change notification type from UNMAP to CLEAR as this is more appropriate. Link: https://lkml.kernel.org/r/20221114235507.294320-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Peter Xu Cc: Wei Chen Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Mina Almasry Cc: Nadav Amit Cc: Naoya Horiguchi Cc: Rik van Riel Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/hugetlb.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4f1338d82aab..3fd4570fb8b0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5076,7 +5076,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - struct mmu_notifier_range range; unsigned long last_addr_mask; bool force_flush = false; @@ -5091,13 +5090,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_change_page_size(tlb, sz); tlb_start_vma(tlb, vma); - /* - * If sharing possible, alert mmu notifiers of worst case. - */ - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start, - end); - adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); - mmu_notifier_invalidate_range_start(&range); last_addr_mask = hugetlb_mask_last_page(h); address = start; for (; address < end; address += sz) { @@ -5182,7 +5174,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (ref_page) break; } - mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); /* @@ -5210,6 +5201,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, hugetlb_vma_lock_write(vma); i_mmap_lock_write(vma->vm_file->f_mapping); + /* mmu notification performed in caller */ __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ @@ -5234,10 +5226,18 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) { + struct mmu_notifier_range range; struct mmu_gather tlb; + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + start, end); + adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + mmu_notifier_invalidate_range_start(&range); tlb_gather_mmu(&tlb, vma->vm_mm); + __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags); + + mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } From b7217a0bbe00a98a8f4b15ebc2a8355a31a59e1e Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Mon, 14 Nov 2022 23:59:49 +0000 Subject: [PATCH 3049/4122] mm: shrinkers: add missing includes for undeclared types The shrinker.h header depends on a user including other headers before it for types used by shrinker.h. Fix this by including the appropriate headers in shrinker.h. ./include/linux/shrinker.h:13:9: error: unknown type name `gfp_t' 13 | gfp_t gfp_mask; | ^~~~~ ./include/linux/shrinker.h:71:26: error: field `list' has incomplete type 71 | struct list_head list; | ^~~~ ./include/linux/shrinker.h:82:9: error: unknown type name `atomic_long_t' 82 | atomic_long_t *nr_deferred; | Link: https://lkml.kernel.org/r/20221114235949.201749-1-tjmercier@google.com Fixes: 83aeeada7c69 ("vmscan: use atomic-long for shrinker batching") Fixes: b0d40c92adaf ("superblock: introduce per-sb cache shrinker infrastructure") Signed-off-by: T.J. Mercier Cc: Al Viro Cc: Dave Chinner Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 08e6054e061f..71310efe2fab 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -2,6 +2,9 @@ #ifndef _LINUX_SHRINKER_H #define _LINUX_SHRINKER_H +#include +#include + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. From d09e8ca6cb93bb4b97517a18fbbf7eccb0e9ff43 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 15 Nov 2022 02:06:01 +0000 Subject: [PATCH 3050/4122] mm: anonymous shared memory naming Since commit 9a10064f5625 ("mm: add a field to store names for private anonymous memory"), name for private anonymous memory, but not shared anonymous, can be set. However, naming shared anonymous memory just as useful for tracking purposes. Extend the functionality to be able to set names for shared anon. There are two ways to create anonymous shared memory, using memfd or directly via mmap(): 1. fd = memfd_create(...) mem = mmap(..., MAP_SHARED, fd, ...) 2. mem = mmap(..., MAP_SHARED | MAP_ANONYMOUS, -1, ...) In both cases the anonymous shared memory is created the same way by mapping an unlinked file on tmpfs. The memfd way allows to give a name for anonymous shared memory, but not useful when parts of shared memory require to have distinct names. Example use case: The VMM maps VM memory as anonymous shared memory (not private because VMM is sandboxed and drivers are running in their own processes). However, the VM tells back to the VMM how parts of the memory are actually used by the guest, how each of the segments should be backed (i.e. 4K pages, 2M pages), and some other information about the segments. The naming allows us to monitor the effective memory footprint for each of these segments from the host without looking inside the guest. Sample output: /* Create shared anonymous segmenet */ anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); /* Name the segment: "MY-NAME" */ rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, anon_shmem, SIZE, "MY-NAME"); cat /proc//maps (and smaps): 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME] If the segment is not named, the output is: 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted) Link: https://lkml.kernel.org/r/20221115020602.804224-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: David Hildenbrand Cc: Arnd Bergmann Cc: Bagas Sanjaya Cc: Colin Cross Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet Cc: "Kirill A . Shutemov" Cc: Liam Howlett Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Paul Gortmaker Cc: Peter Xu Cc: Sean Christopherson Cc: Vincent Whitchurch Cc: Vlastimil Babka Cc: xu xin Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 8 +++++--- fs/proc/task_mmu.c | 15 +++++++++++---- include/linux/mm.h | 2 ++ include/linux/mm_types.h | 26 ++++++++++++-------------- mm/madvise.c | 7 ++----- mm/shmem.c | 29 +++++++++++++++++++++++++---- 6 files changed, 57 insertions(+), 30 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 898c99eae8e4..b8f175ae4853 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data). The "pathname" shows the name associated file for this mapping. If the mapping is not associated with a file: - ============= ==================================== + =================== =========================================== [heap] the heap of the program [stack] the stack of the main process [vdso] the "virtual dynamic shared object", the kernel system call handler - [anon:] an anonymous mapping that has been + [anon:] a private anonymous mapping that has been named by userspace - ============= ==================================== + [anon_shmem:] an anonymous shared memory mapping that has + been named by userspace + =================== =========================================== or if empty, the mapping is anonymous. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8a74cdcc9af0..89338950afd3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m, static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { + struct anon_vma_name *anon_name = NULL; struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; vm_flags_t flags = vma->vm_flags; @@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) start = vma->vm_start; end = vma->vm_end; show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); + if (mm) + anon_name = anon_vma_name(vma); /* * Print the dentry name for named mappings, and a @@ -300,7 +303,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) */ if (file) { seq_pad(m, ' '); - seq_file_path(m, file, "\n"); + /* + * If user named this anon shared memory via + * prctl(PR_SET_VMA ..., use the provided name. + */ + if (anon_name) + seq_printf(m, "[anon_shmem:%s]", anon_name->name); + else + seq_file_path(m, file, "\n"); goto done; } @@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) name = arch_vma_name(vma); if (!name) { - struct anon_vma_name *anon_name; - if (!mm) { name = "[vdso]"; goto done; @@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) goto done; } - anon_name = anon_vma_name(vma); if (anon_name) { seq_pad(m, ' '); seq_printf(m, "[anon:%s]", anon_name->name); diff --git a/include/linux/mm.h b/include/linux/mm.h index f873441303b7..686879dbb0bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -700,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) * paths in userfault. */ bool vma_is_shmem(struct vm_area_struct *vma); +bool vma_is_anon_shmem(struct vm_area_struct *vma); #else static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; } #endif int vma_is_stack_for_current(struct vm_area_struct *vma); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b0009e7d4ae..157c2e22cc7f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -549,21 +549,11 @@ struct vm_area_struct { * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. * - * For private anonymous mappings, a pointer to a null terminated string - * containing the name given to the vma, or NULL if unnamed. */ - - union { - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* - * Serialized by mmap_sem. Never use directly because it is - * valid only when vm_file is NULL. Use anon_vma_name instead. - */ - struct anon_vma_name *anon_name; - }; + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -584,6 +574,14 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_sem. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif diff --git a/mm/madvise.c b/mm/madvise.c index b913ba6efc10..83b0c91a126b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { mmap_assert_locked(vma->vm_mm); - if (vma->vm_file) - return NULL; - return vma->anon_name; } @@ -183,7 +180,7 @@ success: * vm_flags is protected by the mmap_lock held in write mode. */ vma->vm_flags = new_flags; - if (!vma->vm_file) { + if (!vma->vm_file || vma_is_anon_shmem(vma)) { error = replace_anon_vma_name(vma, anon_name); if (error) return error; @@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, int error; /* Only anonymous mappings can be named */ - if (vma->vm_file) + if (vma->vm_file && !vma_is_anon_shmem(vma)) return -EBADF; error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, diff --git a/mm/shmem.c b/mm/shmem.c index 7428ae3fa4b9..f418d21205be 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations; static const struct inode_operations shmem_dir_inode_operations; static const struct inode_operations shmem_special_inode_operations; static const struct vm_operations_struct shmem_vm_ops; +static const struct vm_operations_struct shmem_anon_vm_ops; static struct file_system_type shmem_fs_type; +bool vma_is_anon_shmem(struct vm_area_struct *vma) +{ + return vma->vm_ops == &shmem_anon_vm_ops; +} + bool vma_is_shmem(struct vm_area_struct *vma) { - return vma->vm_ops == &shmem_vm_ops; + return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops; } static LIST_HEAD(shmem_swaplist); @@ -2263,7 +2269,8 @@ out_nomem: static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { - struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + struct inode *inode = file_inode(file); + struct shmem_inode_info *info = SHMEM_I(inode); int ret; ret = seal_check_future_write(info->seals, vma); @@ -2274,7 +2281,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_MTE_ALLOWED; file_accessed(file); - vma->vm_ops = &shmem_vm_ops; + /* This is anonymous shared memory if it is unlinked at the time of mmap */ + if (inode->i_nlink) + vma->vm_ops = &shmem_vm_ops; + else + vma->vm_ops = &shmem_anon_vm_ops; return 0; } @@ -3988,6 +3999,15 @@ static const struct vm_operations_struct shmem_vm_ops = { #endif }; +static const struct vm_operations_struct shmem_anon_vm_ops = { + .fault = shmem_fault, + .map_pages = filemap_map_pages, +#ifdef CONFIG_NUMA + .set_policy = shmem_set_policy, + .get_policy = shmem_get_policy, +#endif +}; + int shmem_init_fs_context(struct fs_context *fc) { struct shmem_options *ctx; @@ -4163,6 +4183,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) EXPORT_SYMBOL_GPL(shmem_truncate_range); #define shmem_vm_ops generic_file_vm_ops +#define shmem_anon_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) #define shmem_acct_size(flags, size) 0 @@ -4268,7 +4289,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); vma->vm_file = file; - vma->vm_ops = &shmem_vm_ops; + vma->vm_ops = &shmem_anon_vm_ops; return 0; } From e83b39d6bbdb6d25bd6f5c258832774635d29b47 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Nov 2022 13:32:55 +0100 Subject: [PATCH 3051/4122] mm: make drop_caches keep reclaiming on all nodes Currently, drop_caches are reclaiming node-by-node, looping on each node until reclaim could not make progress. This can however leave quite some slab entries (such as filesystem inodes) unreclaimed if objects say on node 1 keep objects on node 0 pinned. So move the "loop until no progress" loop to the node-by-node iteration to retry reclaim also on other nodes if reclaim on some nodes made progress. This fixes problem when drop_caches was not reclaiming lots of otherwise perfectly fine to reclaim inodes. Link: https://lkml.kernel.org/r/20221115123255.12559-1-jack@suse.cz Signed-off-by: Jan Kara Reported-by: You Zhou Reported-by: Pengfei Xu Tested-by: Pengfei Xu Reviewed-by: Shakeel Butt Cc: Vladimir Davydov Signed-off-by: Andrew Morton --- mm/vmscan.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index d7c71be6417d..82f32c929b11 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1021,31 +1021,34 @@ out: return freed; } -static void drop_slab_node(int nid) +static unsigned long drop_slab_node(int nid) { - unsigned long freed; - int shift = 0; + unsigned long freed = 0; + struct mem_cgroup *memcg = NULL; + memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct mem_cgroup *memcg = NULL; + freed += shrink_slab(GFP_KERNEL, nid, memcg, 0); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); - if (fatal_signal_pending(current)) - return; - - freed = 0; - memcg = mem_cgroup_iter(NULL, NULL, NULL); - do { - freed += shrink_slab(GFP_KERNEL, nid, memcg, 0); - } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); - } while ((freed >> shift++) > 1); + return freed; } void drop_slab(void) { int nid; + int shift = 0; + unsigned long freed; - for_each_online_node(nid) - drop_slab_node(nid); + do { + freed = 0; + for_each_online_node(nid) { + if (fatal_signal_pending(current)) + return; + + freed += drop_slab_node(nid); + } + } while ((freed >> shift++) > 1); } static int reclaimer_offset(void) From dbaf7dc97ab8d526a20d3477419bc14b4890a82c Mon Sep 17 00:00:00 2001 From: Li zeming Date: Mon, 7 Nov 2022 09:56:59 +0800 Subject: [PATCH 3052/4122] hugetlbfs: inode: remove unnecessary (void*) conversions The ei pointer does not need to cast the type. Link: https://lkml.kernel.org/r/20221107015659.3221-1-zeming@nfschina.com Signed-off-by: Li zeming Reviewed-by: Muchun Song Cc: Mike Kravetz Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3ee84604e36d..790d2727141a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1279,7 +1279,7 @@ static const struct address_space_operations hugetlbfs_aops = { static void init_once(void *foo) { - struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; + struct hugetlbfs_inode_info *ei = foo; inode_init_once(&ei->vfs_inode); } From eff6aa17aa7c06c25c0df80060cd0fe621dac276 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sun, 13 Nov 2022 08:38:45 +0800 Subject: [PATCH 3053/4122] selftests/damon: fix unnecessary compilation warnings When testing overflow and overread, there is no need to keep unnecessary compilation warnings, we should simply ignore them. The motivation for this patch is to eliminate the compilation warning, maybe one day we will compile the kernel with "-Werror -Wall", at which point this compilation warning will turn into a compilation error, we should fix this error in advance. How to reproduce the problem (with gcc-11.3.1): $ make -C tools/testing/selftests/ ... warning: `write' reading 4294967295 bytes from a region of size 1 [-Wstringop-overread] warning: `read' writing 4294967295 bytes into a region of size 25 overflows the destination [-Wstringop-overflow=] "-Wno-stringop-overread" is supported at least in gcc-11.1.0. Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d14c547abd484d3540b692bb8048c4a6efe92c8b Link: https://lkml.kernel.org/r/tencent_51C4ACA8CB3895C2D7F35178440283602107@qq.com Signed-off-by: Rong Tao Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/huge_count_read_write.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index ad7a6b4cf338..a6fe0689f88d 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -8,6 +8,13 @@ #include #include +#pragma GCC diagnostic push +#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1 +/* Ignore read(2) overflow and write(2) overread compile warnings */ +#pragma GCC diagnostic ignored "-Wstringop-overread" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); @@ -27,6 +34,8 @@ void write_read_with_huge_count(char *file) close(filedesc); } +#pragma GCC diagnostic pop + int main(int argc, char *argv[]) { if (argc != 2) { From 53b2d09bdd12092a7341c08b6b863560db62fa57 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 16 Nov 2022 16:07:09 -0400 Subject: [PATCH 3054/4122] mm/gup: remove the restriction on locked with FOLL_LONGTERM This restriction was created because FOLL_LONGTERM used to scan the vma list, so it could not tolerate becoming unlocked. That was fixed in commit 52650c8b466b ("mm/gup: remove the vma allocation from gup_longterm_locked()") and the restriction on !vma was removed. However, the locked restriction remained, even though it isn't necessary anymore. Adjust __gup_longterm_locked() so it can handle the mmap_read_lock() becoming unlocked while it is looping for migration. Migration does not require the mmap_read_sem because it is only handling struct pages. If we had to unlock then ensure the whole thing returns unlocked. Remove __get_user_pages_remote() and __gup_longterm_unlocked(). These cases can now just directly call other functions. Link: https://lkml.kernel.org/r/0-v1-b9ae39aa8884+14dbb-gup_longterm_locked_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Alistair Popple Cc: John Hubbard Signed-off-by: Andrew Morton --- mm/gup.c | 109 ++++++++++++++----------------------------------------- 1 file changed, 27 insertions(+), 82 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 6b16aecf5d2c..2500d00db51b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2049,14 +2049,19 @@ static long __gup_longterm_locked(struct mm_struct *mm, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, + int *locked, unsigned int gup_flags) { + bool must_unlock = false; unsigned int flags; long rc, nr_pinned_pages; + if (locked && WARN_ON_ONCE(!*locked)) + return -EINVAL; + if (!(gup_flags & FOLL_LONGTERM)) return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, gup_flags); + locked, gup_flags); /* * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM @@ -2070,8 +2075,13 @@ static long __gup_longterm_locked(struct mm_struct *mm, return -EINVAL; flags = memalloc_pin_save(); do { + if (locked && !*locked) { + mmap_read_lock(mm); + must_unlock = true; + *locked = 1; + } nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages, - pages, vmas, NULL, + pages, vmas, locked, gup_flags); if (nr_pinned_pages <= 0) { rc = nr_pinned_pages; @@ -2081,6 +2091,10 @@ static long __gup_longterm_locked(struct mm_struct *mm, } while (rc == -EAGAIN); memalloc_pin_restore(flags); + if (locked && *locked && must_unlock) { + mmap_read_unlock(mm); + *locked = 0; + } return rc ? rc : nr_pinned_pages; } @@ -2104,35 +2118,6 @@ static bool is_valid_gup_flags(unsigned int gup_flags) } #ifdef CONFIG_MMU -static long __get_user_pages_remote(struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) -{ - /* - * Parts of FOLL_LONGTERM behavior are incompatible with - * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on - * vmas. However, this only comes up if locked is set, and there are - * callers that do request FOLL_LONGTERM, but do not set locked. So, - * allow what we can. - */ - if (gup_flags & FOLL_LONGTERM) { - if (WARN_ON_ONCE(locked)) - return -EINVAL; - /* - * This will check the vmas (even if our vmas arg is NULL) - * and return -ENOTSUPP if DAX isn't allowed in this case: - */ - return __gup_longterm_locked(mm, start, nr_pages, pages, - vmas, gup_flags | FOLL_TOUCH | - FOLL_REMOTE); - } - - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - locked, - gup_flags | FOLL_TOUCH | FOLL_REMOTE); -} - /** * get_user_pages_remote() - pin user pages in memory * @mm: mm_struct of target mm @@ -2201,8 +2186,8 @@ long get_user_pages_remote(struct mm_struct *mm, if (!is_valid_gup_flags(gup_flags)) return -EINVAL; - return __get_user_pages_remote(mm, start, nr_pages, gup_flags, - pages, vmas, locked); + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked, + gup_flags | FOLL_TOUCH | FOLL_REMOTE); } EXPORT_SYMBOL(get_user_pages_remote); @@ -2214,14 +2199,6 @@ long get_user_pages_remote(struct mm_struct *mm, { return 0; } - -static long __get_user_pages_remote(struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) -{ - return 0; -} #endif /* !CONFIG_MMU */ /** @@ -2248,7 +2225,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; return __gup_longterm_locked(current->mm, start, nr_pages, - pages, vmas, gup_flags | FOLL_TOUCH); + pages, vmas, NULL, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -2274,18 +2251,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int locked = 1; long ret; - /* - * FIXME: Current FOLL_LONGTERM behavior is incompatible with - * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on - * vmas. As there are no users of this flag in this call we simply - * disallow this option for now. - */ - if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM)) - return -EINVAL; - mmap_read_lock(mm); - ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, - &locked, gup_flags | FOLL_TOUCH); + ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked, + gup_flags | FOLL_TOUCH); if (locked) mmap_read_unlock(mm); return ret; @@ -2879,29 +2847,6 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end) } #endif -static int __gup_longterm_unlocked(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - int ret; - - /* - * FIXME: FOLL_LONGTERM does not work with - * get_user_pages_unlocked() (see comments in that function) - */ - if (gup_flags & FOLL_LONGTERM) { - mmap_read_lock(current->mm); - ret = __gup_longterm_locked(current->mm, - start, nr_pages, - pages, NULL, gup_flags); - mmap_read_unlock(current->mm); - } else { - ret = get_user_pages_unlocked(start, nr_pages, - pages, gup_flags); - } - - return ret; -} - static unsigned long lockless_pages_from_mm(unsigned long start, unsigned long end, unsigned int gup_flags, @@ -2985,8 +2930,8 @@ static int internal_get_user_pages_fast(unsigned long start, /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; - ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags, - pages); + ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages, + gup_flags); if (ret < 0) { /* * The caller has to unpin the pages we already pinned so @@ -3185,9 +3130,9 @@ long pin_user_pages_remote(struct mm_struct *mm, if (WARN_ON_ONCE(!pages)) return -EINVAL; - gup_flags |= FOLL_PIN; - return __get_user_pages_remote(mm, start, nr_pages, gup_flags, - pages, vmas, locked); + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked, + gup_flags | FOLL_PIN | FOLL_TOUCH | + FOLL_REMOTE); } EXPORT_SYMBOL(pin_user_pages_remote); @@ -3221,7 +3166,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, gup_flags |= FOLL_PIN; return __gup_longterm_locked(current->mm, start, nr_pages, - pages, vmas, gup_flags); + pages, vmas, NULL, gup_flags); } EXPORT_SYMBOL(pin_user_pages); From 749477244b05be0d9b6dcc10c161bfa4c4749d78 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 16 Nov 2022 14:19:22 +0100 Subject: [PATCH 3055/4122] mm: Kconfig: make config SECRETMEM visible with EXPERT Commit 6a108a14fa35 ("kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT") introduces CONFIG_EXPERT to carry the previous intent of CONFIG_EMBEDDED and just gives that intent a much better name. That has been clearly a good and long overdue renaming, and it is clearly an improvement to the kernel build configuration that has shown to help managing the kernel build configuration in the last decade. However, rather than bravely and radically just deleting CONFIG_EMBEDDED, this commit gives CONFIG_EMBEDDED a new intended semantics, but keeps it open for future contributors to implement that intended semantics: A new CONFIG_EMBEDDED option is added that automatically selects CONFIG_EXPERT when enabled and can be used in the future to isolate options that should only be considered for embedded systems (RISC architectures, SLOB, etc). Since then, this CONFIG_EMBEDDED implicitly had two purposes: - It can make even more options visible beyond what CONFIG_EXPERT makes visible. In other words, it may introduce another level of enabling the visibility of configuration options: always visible, visible with CONFIG_EXPERT and visible with CONFIG_EMBEDDED. - Set certain default values of some configurations differently, following the assumption that configuring a kernel build for an embedded system generally starts with a different set of default values compared to kernel builds for all other kind of systems. Considering the second purpose, note that already probably arguing that a kernel build for an embedded system would choose some values differently is already tricky: the set of embedded systems with Linux kernels is already quite diverse. Many embedded system have powerful CPUs and it would not be clear that all embedded systems just optimize towards one specific aspect, e.g., a smaller kernel image size. So, it is unclear if starting with "one set of default configuration" that is induced by CONFIG_EMBEDDED is a good offer for developers configuring their kernels. Also, the differences of needed user-space features in an embedded system compared to a non-embedded system are probably difficult or even impossible to name in some generic way. So it is not surprising that in the last decade hardly anyone has contributed changes to make something default differently in case of CONFIG_EMBEDDED=y. Currently, in v6.0-rc4, SECRETMEM is the only config switched off if CONFIG_EMBEDDED=y. As long as that is actually the only option that currently is selected or deselected, it is better to just make SECRETMEM configurable at build time by experts using menuconfig instead. Make SECRETMEM configurable when EXPERT is set and otherwise default to yes. Further, SECRETMEM needs ARCH_HAS_SET_DIRECT_MAP. This allows us to remove CONFIG_EMBEDDED in the close future. Link: https://lkml.kernel.org/r/20221116131922.25533-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: Mike Rapoport Acked-by: Arnd Bergmann Reviewed-by: Masahiro Yamada Signed-off-by: Andrew Morton --- mm/Kconfig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index c86b69aff7d4..4e8a2697f28d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1074,7 +1074,13 @@ config IO_MAPPING bool config SECRETMEM - def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED + default y + bool "Enable memfd_secret() system call" if EXPERT + depends on ARCH_HAS_SET_DIRECT_MAP + help + Enable the memfd_secret() system call with the ability to create + memory areas visible only in the context of the owning process and + not mapped to other processes and other kernel page tables. config ANON_VMA_NAME bool "Anonymous VMA name support" From 7aca5ca154930a06612f4d7b81f710f3e1027e04 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:40 +0100 Subject: [PATCH 3056/4122] selftests/vm: anon_cow: prepare for non-anonymous COW tests Patch series "mm/gup: remove FOLL_FORCE usage from drivers (reliable R/O long-term pinning)". For now, we did not support reliable R/O long-term pinning in COW mappings. That means, if we would trigger R/O long-term pinning in MAP_PRIVATE mapping, we could end up pinning the (R/O-mapped) shared zeropage or a pagecache page. The next write access would trigger a write fault and replace the pinned page by an exclusive anonymous page in the process page table; whatever the process would write to that private page copy would not be visible by the owner of the previous page pin: for example, RDMA could read stale data. The end result is essentially an unexpected and hard-to-debug memory corruption. Some drivers tried working around that limitation by using "FOLL_FORCE|FOLL_WRITE|FOLL_LONGTERM" for R/O long-term pinning for now. FOLL_WRITE would trigger a write fault, if required, and break COW before pinning the page. FOLL_FORCE is required because the VMA might lack write permissions, and drivers wanted to make that working as well, just like one would expect (no write access, but still triggering a write access to break COW). However, that is not a practical solution, because (1) Drivers that don't stick to that undocumented and debatable pattern would still run into that issue. For example, VFIO only uses FOLL_LONGTERM for R/O long-term pinning. (2) Using FOLL_WRITE just to work around a COW mapping + page pinning limitation is unintuitive. FOLL_WRITE would, for example, mark the page softdirty or trigger uffd-wp, even though, there actually isn't going to be any write access. (3) The purpose of FOLL_FORCE is debug access, not access without lack of VMA permissions by arbitrarty drivers. So instead, make R/O long-term pinning work as expected, by breaking COW in a COW mapping early, such that we can remove any FOLL_FORCE usage from drivers and make FOLL_FORCE ptrace-specific (renaming it to FOLL_PTRACE). More details in patch #8. This patch (of 19): Originally, the plan was to have a separate tests for testing COW of non-anonymous (e.g., shared zeropage) pages. Turns out, that we'd need a lot of similar functionality and that there isn't a really good reason to separate it. So let's prepare for non-anon tests by renaming to "cow". Link: https://lkml.kernel.org/r/20221116102659.70287-1-david@redhat.com Link: https://lkml.kernel.org/r/20221116102659.70287-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Alexander Shishkin Cc: Alexander Viro Cc: Alex Williamson Cc: Andrea Arcangeli Cc: Andy Walls Cc: Anton Ivanov Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Bernard Metzler Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Benvenuti Cc: Christian Gmeiner Cc: Christophe Leroy Cc: Christoph Hellwig Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: David S. Miller Cc: Dennis Dalessandro Cc: "Eric W . Biederman" Cc: Greg Kroah-Hartman Cc: Hans Verkuil Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Inki Dae Cc: Ivan Kokshaysky Cc: James Morris Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: Johannes Berg Cc: John Hubbard Cc: Kees Cook Cc: Kentaro Takeda Cc: Krzysztof Kozlowski Cc: Kyungmin Park Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Linus Torvalds Cc: Lucas Stach Cc: Marek Szyprowski Cc: Mark Rutland Cc: Matthew Wilcox Cc: Matt Turner Cc: Mauro Carvalho Chehab Cc: Michael Ellerman Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Cc: Namhyung Kim Cc: Nelson Escobar Cc: Nicholas Piggin Cc: Oded Gabbay Cc: Oleg Nesterov Cc: Paul Moore Cc: Peter Xu Cc: Peter Zijlstra Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Serge Hallyn Cc: Seung-Woo Kim Cc: Shuah Khan Cc: Tetsuo Handa Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Tomasz Figa Cc: Will Deacon Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 2 +- tools/testing/selftests/vm/Makefile | 10 ++++---- tools/testing/selftests/vm/check_config.sh | 4 +-- .../selftests/vm/{anon_cow.c => cow.c} | 25 +++++++++++-------- tools/testing/selftests/vm/run_vmtests.sh | 2 +- 5 files changed, 24 insertions(+), 19 deletions(-) rename tools/testing/selftests/vm/{anon_cow.c => cow.c} (97%) diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 8a536c731e3c..ee8c41c998e6 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -anon_cow +cow hugepage-mmap hugepage-mremap hugepage-shm diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 00920cb8b499..a4d764efd6e3 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -27,7 +27,7 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_FILES = anon_cow +TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests @@ -98,7 +98,7 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk -$(OUTPUT)/anon_cow: vm_util.c +$(OUTPUT)/cow: vm_util.c $(OUTPUT)/khugepaged: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c @@ -154,8 +154,8 @@ warn_32bit_failure: endif endif -# ANON_COW_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. -$(OUTPUT)/anon_cow: LDLIBS += $(ANON_COW_EXTRA_LIBS) +# cow_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. +$(OUTPUT)/cow: LDLIBS += $(COW_EXTRA_LIBS) $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap @@ -168,7 +168,7 @@ local_config.mk local_config.h: check_config.sh EXTRA_CLEAN += local_config.mk local_config.h -ifeq ($(ANON_COW_EXTRA_LIBS),) +ifeq ($(COW_EXTRA_LIBS),) all: warn_missing_liburing warn_missing_liburing: diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh index 9a44c6520925..bcba3af0acea 100644 --- a/tools/testing/selftests/vm/check_config.sh +++ b/tools/testing/selftests/vm/check_config.sh @@ -21,11 +21,11 @@ $CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 if [ -f $tmpfile_o ]; then echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE - echo "ANON_COW_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE + echo "COW_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE else echo "// No liburing support found" > $OUTPUT_H_FILE echo "# No liburing support found, so:" > $OUTPUT_MKFILE - echo "ANON_COW_EXTRA_LIBS = " >> $OUTPUT_MKFILE + echo "COW_EXTRA_LIBS = " >> $OUTPUT_MKFILE fi rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/cow.c similarity index 97% rename from tools/testing/selftests/vm/anon_cow.c rename to tools/testing/selftests/vm/cow.c index bbb251eb5025..d202bfd63585 100644 --- a/tools/testing/selftests/vm/anon_cow.c +++ b/tools/testing/selftests/vm/cow.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * COW (Copy On Write) tests for anonymous memory. + * COW (Copy On Write) tests. * * Copyright 2022, Red Hat, Inc. * @@ -986,7 +986,11 @@ struct test_case { test_fn fn; }; -static const struct test_case test_cases[] = { +/* + * Test cases that are specific to anonymous pages: pages in private mappings + * that may get shared via COW during fork(). + */ +static const struct test_case anon_test_cases[] = { /* * Basic COW tests for fork() without any GUP. If we miss to break COW, * either the child can observe modifications by the parent or the @@ -1104,7 +1108,7 @@ static const struct test_case test_cases[] = { }, }; -static void run_test_case(struct test_case const *test_case) +static void run_anon_test_case(struct test_case const *test_case) { int i; @@ -1125,15 +1129,17 @@ static void run_test_case(struct test_case const *test_case) hugetlbsizes[i]); } -static void run_test_cases(void) +static void run_anon_test_cases(void) { int i; - for (i = 0; i < ARRAY_SIZE(test_cases); i++) - run_test_case(&test_cases[i]); + ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) + run_anon_test_case(&anon_test_cases[i]); } -static int tests_per_test_case(void) +static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; @@ -1144,7 +1150,6 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int nr_test_cases = ARRAY_SIZE(test_cases); int err; pagesize = getpagesize(); @@ -1152,14 +1157,14 @@ int main(int argc, char **argv) detect_hugetlbsizes(); ksft_print_header(); - ksft_set_plan(nr_test_cases * tests_per_test_case()); + ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case()); gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_fail_msg("opening pagemap failed\n"); - run_test_cases(); + run_anon_test_cases(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 1fa783732296..54d7a822c2ce 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -186,6 +186,6 @@ fi run_test ./soft-dirty # COW tests for anonymous memory -run_test ./anon_cow +run_test ./cow exit $exitcode From f8664f3c4a08f799122e8f0a8093056a7b3fbc8d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:41 +0100 Subject: [PATCH 3057/4122] selftests/vm: cow: basic COW tests for non-anonymous pages Let's add basic tests for COW with non-anonymous pages in private mappings: write access should properly trigger COW and result in the private changes not being visible through other page mappings. Especially, add tests for: * Zeropage * Huge zeropage * Ordinary pagecache pages via memfd and tmpfile() * Hugetlb pages via memfd Fortunately, all tests pass. Link: https://lkml.kernel.org/r/20221116102659.70287-3-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/cow.c | 338 ++++++++++++++++++++++++++++++- 1 file changed, 337 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c index d202bfd63585..fb07bd44529c 100644 --- a/tools/testing/selftests/vm/cow.c +++ b/tools/testing/selftests/vm/cow.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "local_config.h" #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -35,6 +36,7 @@ static size_t thpsize; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; +static bool has_huge_zeropage; static void detect_thpsize(void) { @@ -64,6 +66,31 @@ static void detect_thpsize(void) close(fd); } +static void detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + size_t enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + enabled = strtoul(buf, NULL, 10); + if (enabled == 1) { + has_huge_zeropage = true; + ksft_print_msg("[INFO] huge zeropage is enabled\n"); + } + } + + close(fd); +} + static void detect_hugetlbsizes(void) { DIR *dir = opendir("/sys/kernel/mm/hugepages/"); @@ -1148,6 +1175,312 @@ static int tests_per_anon_test_case(void) return tests; } +typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); + +static void test_cow(char *mem, const char *smem, size_t size) +{ + char *old = malloc(size); + + /* Backup the original content. */ + memcpy(old, smem, size); + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* See if we still read the old values via the other mapping. */ + ksft_test_result(!memcmp(smem, old, size), + "Other mapping not modified\n"); + free(old); +} + +static void run_with_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + + ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Read from the page to populate the shared zeropage. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +} + +static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, *mmap_mem, *mmap_smem, tmp; + size_t mmap_size; + int ret; + + ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + + if (!has_huge_zeropage) { + ksft_test_result_skip("Huge zeropage not enabled\n"); + return; + } + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + mmap_smem = mmap(NULL, mmap_size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_smem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Read from the memory to populate the huge shared zeropage. Read from + * the first sub-page and test if we get another sub-page populated + * automatically. + */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || + !pagemap_is_populated(pagemap_fd, smem + pagesize)) { + ksft_test_result_skip("Did not get THPs populated\n"); + goto munmap; + } + + fn(mem, smem, thpsize); +munmap: + munmap(mmap_mem, mmap_size); + if (mmap_smem != MAP_FAILED) + munmap(mmap_smem, mmap_size); +} + +static void run_with_memfd(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd\n", desc); + + fd = memfd_create("test", 0); + if (fd < 0) { + ksft_test_result_fail("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + close(fd); +} + +static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + FILE *file; + int fd; + + ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + return; + } + + fd = fileno(file); + if (fd < 0) { + ksft_test_result_skip("fileno() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + fclose(file); +} + +static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, + size_t hugetlbsize) +{ + int flags = MFD_HUGETLB; + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; + + fd = memfd_create("test", flags); + if (fd < 0) { + ksft_test_result_skip("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, hugetlbsize)) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, + 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); + if (mem != MAP_FAILED) + munmap(smem, hugetlbsize); +close: + close(fd); +} + +struct non_anon_test_case { + const char *desc; + non_anon_test_fn fn; +}; + +/* + * Test cases that target any pages in private mappings that are non anonymous: + * pages that may get shared via COW ndependent of fork(). This includes + * the shared zeropage(s), pagecache pages, ... + */ +static const struct non_anon_test_case non_anon_test_cases[] = { + /* + * Basic COW test without any GUP. If we miss to break COW, changes are + * visible via other private/shared mappings. + */ + { + "Basic COW", + test_cow, + }, +}; + +static void run_non_anon_test_case(struct non_anon_test_case const *test_case) +{ + int i; + + run_with_zeropage(test_case->fn, test_case->desc); + run_with_memfd(test_case->fn, test_case->desc); + run_with_tmpfile(test_case->fn, test_case->desc); + if (thpsize) + run_with_huge_zeropage(test_case->fn, test_case->desc); + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_memfd_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static void run_non_anon_test_cases(void) +{ + int i; + + ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) + run_non_anon_test_case(&non_anon_test_cases[i]); +} + +static int tests_per_non_anon_test_case(void) +{ + int tests = 3 + nr_hugetlbsizes; + + if (thpsize) + tests += 1; + return tests; +} + int main(int argc, char **argv) { int err; @@ -1155,9 +1488,11 @@ int main(int argc, char **argv) pagesize = getpagesize(); detect_thpsize(); detect_hugetlbsizes(); + detect_huge_zeropage(); ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case()); + ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); @@ -1165,6 +1500,7 @@ int main(int argc, char **argv) ksft_exit_fail_msg("opening pagemap failed\n"); run_anon_test_cases(); + run_non_anon_test_cases(); err = ksft_get_fail_cnt(); if (err) From 97713a3abe338bb6c968e77264edbb68eb8d932a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:42 +0100 Subject: [PATCH 3058/4122] selftests/vm: cow: R/O long-term pinning reliability tests for non-anon pages Let's test whether R/O long-term pinning is reliable for non-anonymous memory: when R/O long-term pinning a page, the expectation is that we break COW early before pinning, such that actual write access via the page tables won't break COW later and end up replacing the R/O-pinned page in the page table. Consequently, R/O long-term pinning in private mappings would only target exclusive anonymous pages. For now, all tests fail: # [RUN] R/O longterm GUP pin ... with shared zeropage not ok 151 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd not ok 152 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with tmpfile not ok 153 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with huge zeropage not ok 154 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd hugetlb (2048 kB) not ok 155 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd hugetlb (1048576 kB) not ok 156 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with shared zeropage not ok 157 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd not ok 158 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with tmpfile not ok 159 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with huge zeropage not ok 160 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (2048 kB) not ok 161 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (1048576 kB) not ok 162 Longterm R/O pin is reliable Link: https://lkml.kernel.org/r/20221116102659.70287-4-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/cow.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c index fb07bd44529c..73e05b52c49e 100644 --- a/tools/testing/selftests/vm/cow.c +++ b/tools/testing/selftests/vm/cow.c @@ -561,6 +561,7 @@ static void test_iouring_fork(char *mem, size_t size) #endif /* LOCAL_CONFIG_HAVE_LIBURING */ enum ro_pin_test { + RO_PIN_TEST, RO_PIN_TEST_SHARED, RO_PIN_TEST_PREVIOUSLY_SHARED, RO_PIN_TEST_RO_EXCLUSIVE, @@ -593,6 +594,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, } switch (test) { + case RO_PIN_TEST: + break; case RO_PIN_TEST_SHARED: case RO_PIN_TEST_PREVIOUSLY_SHARED: /* @@ -1193,6 +1196,16 @@ static void test_cow(char *mem, const char *smem, size_t size) free(old); } +static void test_ro_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, false); +} + +static void test_ro_fast_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, true); +} + static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; @@ -1433,7 +1446,7 @@ struct non_anon_test_case { }; /* - * Test cases that target any pages in private mappings that are non anonymous: + * Test cases that target any pages in private mappings that are not anonymous: * pages that may get shared via COW ndependent of fork(). This includes * the shared zeropage(s), pagecache pages, ... */ @@ -1446,6 +1459,19 @@ static const struct non_anon_test_case non_anon_test_cases[] = { "Basic COW", test_cow, }, + /* + * Take a R/O longterm pin. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O longterm GUP pin", + test_ro_pin, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O longterm GUP-fast pin", + test_ro_fast_pin, + }, }; static void run_non_anon_test_case(struct non_anon_test_case const *test_case) From cdc5021cda194112bc0962d6a0e90b379968c504 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:43 +0100 Subject: [PATCH 3059/4122] mm: add early FAULT_FLAG_UNSHARE consistency checks For now, FAULT_FLAG_UNSHARE only applies to anonymous pages, which implies a COW mapping. Let's hide FAULT_FLAG_UNSHARE early if we're not dealing with a COW mapping, such that we treat it like a read fault as documented and don't have to worry about the flag throughout all fault handlers. While at it, centralize the check for mutual exclusion of FAULT_FLAG_UNSHARE and FAULT_FLAG_WRITE and just drop the check that either flag is set in the WP handler. Link: https://lkml.kernel.org/r/20221116102659.70287-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/huge_memory.c | 3 --- mm/hugetlb.c | 5 ----- mm/memory.c | 23 ++++++++++++++++++++--- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 29102e3ddf84..5eb702726a0e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1313,9 +1313,6 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); VM_BUG_ON_VMA(!vma->anon_vma, vma); - VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE)); - if (is_huge_zero_pmd(orig_pmd)) goto fallback; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3fd4570fb8b0..3d381b26d553 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5316,9 +5316,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr = address & huge_page_mask(h); struct mmu_notifier_range range; - VM_BUG_ON(unshare && (flags & FOLL_WRITE)); - VM_BUG_ON(!unshare && !(flags & FOLL_WRITE)); - /* * hugetlb does not support FOLL_FORCE-style write faults that keep the * PTE mapped R/O such as maybe_mkwrite() would do. @@ -5328,8 +5325,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, /* Let's take out MAP_SHARED mappings first. */ if (vma->vm_flags & VM_MAYSHARE) { - if (unlikely(unshare)) - return 0; set_huge_ptep_writable(vma, haddr, ptep); return 0; } diff --git a/mm/memory.c b/mm/memory.c index 086cb3dd8608..07380ef935ac 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3343,9 +3343,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct folio *folio; - VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE)); - if (likely(!unshare)) { if (userfaultfd_pte_wp(vma, *vmf->pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -5161,6 +5158,22 @@ static void lru_gen_exit_fault(void) } #endif /* CONFIG_LRU_GEN */ +static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma, + unsigned int *flags) +{ + if (unlikely(*flags & FAULT_FLAG_UNSHARE)) { + if (WARN_ON_ONCE(*flags & FAULT_FLAG_WRITE)) + return VM_FAULT_SIGSEGV; + /* + * FAULT_FLAG_UNSHARE only applies to COW mappings. Let's + * just treat it like an ordinary read-fault otherwise. + */ + if (!is_cow_mapping(vma->vm_flags)) + *flags &= ~FAULT_FLAG_UNSHARE; + } + return 0; +} + /* * By the time we get here, we already hold the mm semaphore * @@ -5177,6 +5190,10 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, count_vm_event(PGFAULT); count_memcg_event_mm(vma->vm_mm, PGFAULT); + ret = sanitize_fault_flags(vma, &flags); + if (ret) + return ret; + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) From 79881fed6052a9ce00cfb63297832b9faacf8cf3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:44 +0100 Subject: [PATCH 3060/4122] mm: add early FAULT_FLAG_WRITE consistency checks Let's catch abuse of FAULT_FLAG_WRITE early, such that we don't have to care in all other handlers and might get "surprises" if we forget to do so. Write faults without VM_MAYWRITE don't make any sense, and our maybe_mkwrite() logic could have hidden such abuse for now. Write faults without VM_WRITE on something that is not a COW mapping is similarly broken, and e.g., do_wp_page() could end up placing an anonymous page into a shared mapping, which would be bad. This is a preparation for reliable R/O long-term pinning of pages in private mappings, whereby we want to make sure that we will never break COW in a read-only private mapping. Link: https://lkml.kernel.org/r/20221116102659.70287-6-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/memory.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 07380ef935ac..5e4df6b87016 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5170,6 +5170,14 @@ static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma, */ if (!is_cow_mapping(vma->vm_flags)) *flags &= ~FAULT_FLAG_UNSHARE; + } else if (*flags & FAULT_FLAG_WRITE) { + /* Write faults on read-only mappings are impossible ... */ + if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE))) + return VM_FAULT_SIGSEGV; + /* ... and FOLL_FORCE only applies to COW mappings. */ + if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) && + !is_cow_mapping(vma->vm_flags))) + return VM_FAULT_SIGSEGV; } return 0; } From b9086fde6d44e8a95dc95b822bd87386129b832d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:45 +0100 Subject: [PATCH 3061/4122] mm: rework handling in do_wp_page() based on private vs. shared mappings We want to extent FAULT_FLAG_UNSHARE support to anything mapped into a COW mapping (pagecache page, zeropage, PFN, ...), not just anonymous pages. Let's prepare for that by handling shared mappings first such that we can handle private mappings last. While at it, use folio-based functions instead of page-based functions where we touch the code either way. Link: https://lkml.kernel.org/r/20221116102659.70287-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/memory.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5e4df6b87016..5d4b42f1a8d6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3341,7 +3341,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) { const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; struct vm_area_struct *vma = vmf->vma; - struct folio *folio; + struct folio *folio = NULL; if (likely(!unshare)) { if (userfaultfd_pte_wp(vma, *vmf->pte)) { @@ -3359,13 +3359,12 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) } vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); - if (!vmf->page) { - if (unlikely(unshare)) { - /* No anonymous page -> nothing to do. */ - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; - } + /* + * Shared mapping: we are guaranteed to have VM_WRITE and + * FAULT_FLAG_WRITE set at this point. + */ + if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a * VM_PFNMAP VMA. @@ -3373,20 +3372,19 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) * We should not cow pages in a shared writeable mapping. * Just mark the pages writable and/or call ops->pfn_mkwrite. */ - if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED)) + if (!vmf->page) return wp_pfn_shared(vmf); - - pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf); + return wp_page_shared(vmf); } + if (vmf->page) + folio = page_folio(vmf->page); + /* - * Take out anonymous pages first, anonymous shared vmas are - * not dirty accountable. + * Private mapping: create an exclusive anonymous page copy if reuse + * is impossible. We might miss VM_WRITE for FOLL_FORCE handling. */ - folio = page_folio(vmf->page); - if (folio_test_anon(folio)) { + if (folio && folio_test_anon(folio)) { /* * If the page is exclusive to this process we must reuse the * page without further checks. @@ -3437,19 +3435,17 @@ reuse: /* No anonymous page -> nothing to do. */ pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; - } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED))) { - return wp_page_shared(vmf); } copy: /* * Ok, we need to copy. Oh, well.. */ - get_page(vmf->page); + if (folio) + folio_get(folio); pte_unmap_unlock(vmf->pte, vmf->ptl); #ifdef CONFIG_KSM - if (PageKsm(vmf->page)) + if (folio && folio_test_ksm(folio)) count_vm_event(COW_KSM); #endif return wp_page_copy(vmf); From aea06577a9005ca81c35196d6171cac346d3b251 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:46 +0100 Subject: [PATCH 3062/4122] mm: don't call vm_ops->huge_fault() in wp_huge_pmd()/wp_huge_pud() for private mappings If we already have a PMD/PUD mapped write-protected in a private mapping and we want to break COW either due to FAULT_FLAG_WRITE or FAULT_FLAG_UNSHARE, there is no need to inform the file system just like on the PTE path. Let's just split (->zap) + fallback in that case. This is a preparation for more generic FAULT_FLAG_UNSHARE support in COW mappings. Link: https://lkml.kernel.org/r/20221116102659.70287-8-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/memory.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5d4b42f1a8d6..6cec0adab37f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4802,6 +4802,7 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) { const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; + vm_fault_t ret; if (vma_is_anonymous(vmf->vma)) { if (likely(!unshare) && @@ -4809,11 +4810,13 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) return handle_userfault(vmf, VM_UFFD_WP); return do_huge_pmd_wp_page(vmf); } - if (vmf->vma->vm_ops->huge_fault) { - vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); - if (!(ret & VM_FAULT_FALLBACK)) - return ret; + if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { + if (vmf->vma->vm_ops->huge_fault) { + ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } } /* COW or write-notify handled on pte level: split pmd. */ @@ -4839,14 +4842,17 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + vm_fault_t ret; + /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) goto split; - if (vmf->vma->vm_ops->huge_fault) { - vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); - - if (!(ret & VM_FAULT_FALLBACK)) - return ret; + if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { + if (vmf->vma->vm_ops->huge_fault) { + ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } } split: /* COW or write-notify not handled on PUD level: split pud.*/ From 8d6a0ac09a16c026e1e2a03a61e12e95c48a25a6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:47 +0100 Subject: [PATCH 3063/4122] mm: extend FAULT_FLAG_UNSHARE support to anything in a COW mapping Extend FAULT_FLAG_UNSHARE to break COW on anything mapped into a COW (i.e., private writable) mapping and adjust the documentation accordingly. FAULT_FLAG_UNSHARE will now also break COW when encountering the shared zeropage, a pagecache page, a PFNMAP, ... inside a COW mapping, by properly replacing the mapped page/pfn by a private copy (an exclusive anonymous page). Note that only do_wp_page() needs care: hugetlb_wp() already handles FAULT_FLAG_UNSHARE correctly. wp_huge_pmd()/wp_huge_pud() also handles it correctly, for example, splitting the huge zeropage on FAULT_FLAG_UNSHARE such that we can handle FAULT_FLAG_UNSHARE on the PTE level. This change is a requirement for reliable long-term R/O pinning in COW mappings. Link: https://lkml.kernel.org/r/20221116102659.70287-9-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 8 ++++---- mm/memory.c | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 157c2e22cc7f..018b1c098173 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1039,9 +1039,9 @@ typedef struct { * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. - * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark - * exclusive) a possibly shared anonymous page that is - * mapped R/O. + * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a + * COW mapping, making sure that an exclusive anon page is + * mapped after the fault. * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. * @@ -1066,7 +1066,7 @@ typedef struct { * * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal. * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when - * no existing R/O-mapped anonymous page is encountered. + * applied to mappings that are not COW mappings. */ enum fault_flag { FAULT_FLAG_WRITE = 1 << 0, diff --git a/mm/memory.c b/mm/memory.c index 6cec0adab37f..815d2ff05c62 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3431,10 +3431,6 @@ reuse: } wp_page_reuse(vmf); return VM_FAULT_WRITE; - } else if (unshare) { - /* No anonymous page -> nothing to do. */ - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; } copy: /* From 84209e87c6963f928194a890399e24e8ad299db1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:48 +0100 Subject: [PATCH 3064/4122] mm/gup: reliable R/O long-term pinning in COW mappings We already support reliable R/O pinning of anonymous memory. However, assume we end up pinning (R/O long-term) a pagecache page or the shared zeropage inside a writable private ("COW") mapping. The next write access will trigger a write-fault and replace the pinned page by an exclusive anonymous page in the process page tables to break COW: the pinned page no longer corresponds to the page mapped into the process' page table. Now that FAULT_FLAG_UNSHARE can break COW on anything mapped into a COW mapping, let's properly break COW first before R/O long-term pinning something that's not an exclusive anon page inside a COW mapping. FAULT_FLAG_UNSHARE will break COW and map an exclusive anon page instead that can get pinned safely. With this change, we can stop using FOLL_FORCE|FOLL_WRITE for reliable R/O long-term pinning in COW mappings. With this change, the new R/O long-term pinning tests for non-anonymous memory succeed: # [RUN] R/O longterm GUP pin ... with shared zeropage ok 151 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd ok 152 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with tmpfile ok 153 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with huge zeropage ok 154 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd hugetlb (2048 kB) ok 155 Longterm R/O pin is reliable # [RUN] R/O longterm GUP pin ... with memfd hugetlb (1048576 kB) ok 156 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with shared zeropage ok 157 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd ok 158 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with tmpfile ok 159 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with huge zeropage ok 160 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (2048 kB) ok 161 Longterm R/O pin is reliable # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (1048576 kB) ok 162 Longterm R/O pin is reliable Note 1: We don't care about short-term R/O-pinning, because they have snapshot semantics: they are not supposed to observe modifications that happen after pinning. As one example, assume we start direct I/O to read from a page and store page content into a file: modifications to page content after starting direct I/O are not guaranteed to end up in the file. So even if we'd pin the shared zeropage, the end result would be as expected -- getting zeroes stored to the file. Note 2: For shared mappings we'll now always fallback to the slow path to lookup the VMA when R/O long-term pining. While that's the necessary price we have to pay right now, it's actually not that bad in practice: most FOLL_LONGTERM users already specify FOLL_WRITE, for example, along with FOLL_FORCE because they tried dealing with COW mappings correctly ... Note 3: For users that use FOLL_LONGTERM right now without FOLL_WRITE, such as VFIO, we'd now no longer pin the shared zeropage. Instead, we'd populate exclusive anon pages that we can pin. There was a concern that this could affect the memlock limit of existing setups. For example, a VM running with VFIO could run into the memlock limit and fail to run. However, we essentially had the same behavior already in commit 17839856fd58 ("gup: document and work around "COW can break either way" issue") which got merged into some enterprise distros, and there were not any such complaints. So most probably, we're fine. Link: https://lkml.kernel.org/r/20221116102659.70287-10-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Daniel Vetter Reviewed-by: Vlastimil Babka Reviewed-by: John Hubbard Signed-off-by: Andrew Morton --- include/linux/mm.h | 27 ++++++++++++++++++++++++--- mm/gup.c | 10 +++++----- mm/huge_memory.c | 2 +- mm/hugetlb.c | 7 ++++--- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 686879dbb0bd..d8363ac34a7c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3149,8 +3149,12 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. + * + * If the vma is NULL, we're coming from the GUP-fast path and might have + * to fallback to the slow path just to lookup the vma. */ -static inline bool gup_must_unshare(unsigned int flags, struct page *page) +static inline bool gup_must_unshare(struct vm_area_struct *vma, + unsigned int flags, struct page *page) { /* * FOLL_WRITE is implicitly handled correctly as the page table entry @@ -3163,8 +3167,25 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) * Note: PageAnon(page) is stable until the page is actually getting * freed. */ - if (!PageAnon(page)) - return false; + if (!PageAnon(page)) { + /* + * We only care about R/O long-term pining: R/O short-term + * pinning does not have the semantics to observe successive + * changes through the process page tables. + */ + if (!(flags & FOLL_LONGTERM)) + return false; + + /* We really need the vma ... */ + if (!vma) + return true; + + /* + * ... because we only care about writable private ("COW") + * mappings where we have to break COW early. + */ + return is_cow_mapping(vma->vm_flags); + } /* Paired with a memory barrier in page_try_share_anon_rmap(). */ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) diff --git a/mm/gup.c b/mm/gup.c index 2500d00db51b..39c84a200f06 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -603,7 +603,7 @@ retry: } } - if (!pte_write(pte) && gup_must_unshare(flags, page)) { + if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) { page = ERR_PTR(-EMLINK); goto out; } @@ -2380,7 +2380,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, goto pte_unmap; } - if (!pte_write(pte) && gup_must_unshare(flags, page)) { + if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } @@ -2566,7 +2566,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 0; } - if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) { + if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } @@ -2632,7 +2632,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } - if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) { + if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } @@ -2672,7 +2672,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } - if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) { + if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5eb702726a0e..86a30041a2e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1480,7 +1480,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags)) return NULL; - if (!pmd_write(*pmd) && gup_must_unshare(flags, page)) + if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page)) return ERR_PTR(-EMLINK); VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3d381b26d553..9d97c9a2a15d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6197,7 +6197,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma, } } -static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, +static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma, + unsigned int flags, pte_t *pte, bool *unshare) { pte_t pteval = huge_ptep_get(pte); @@ -6209,7 +6210,7 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, return false; if (flags & FOLL_WRITE) return true; - if (gup_must_unshare(flags, pte_page(pteval))) { + if (gup_must_unshare(vma, flags, pte_page(pteval))) { *unshare = true; return true; } @@ -6338,7 +6339,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * directly from any kind of swap entries. */ if (absent || - __follow_hugetlb_must_fault(flags, pte, &unshare)) { + __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) { vm_fault_t ret; unsigned int fault_flags = 0; From b40656aa7d559adc1fe689396dc58b92a9a27286 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:49 +0100 Subject: [PATCH 3065/4122] RDMA/umem: remove FOLL_FORCE usage GUP now supports reliable R/O long-term pinning in COW mappings, such that we break COW early. MAP_SHARED VMAs only use the shared zeropage so far in one corner case (DAXFS file with holes), which can be ignored because GUP does not support long-term pinning in fsdax (see check_vma_flags()). Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop using FOLL_FORCE, which is really only for ptrace access. Link: https://lkml.kernel.org/r/20221116102659.70287-11-david@redhat.com Tested-by: Leon Romanovsky [over mlx4 and mlx5] Signed-off-by: David Hildenbrand Reviewed-by: Jason Gunthorpe Cc: Leon Romanovsky Signed-off-by: Andrew Morton --- drivers/infiniband/core/umem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 86d479772fbc..755a9c57db6f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -156,7 +156,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, struct mm_struct *mm; unsigned long npages; int pinned, ret; - unsigned int gup_flags = FOLL_WRITE; + unsigned int gup_flags = FOLL_LONGTERM; /* * If the combination of the addr and size requested for this memory @@ -210,8 +210,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base = addr & PAGE_MASK; - if (!umem->writable) - gup_flags |= FOLL_FORCE; + if (umem->writable) + gup_flags |= FOLL_WRITE; while (npages) { cond_resched(); @@ -219,7 +219,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - gup_flags | FOLL_LONGTERM, page_list); + gup_flags, page_list); if (pinned < 0) { ret = pinned; goto umem_release; From a9d0284033e974a355b806fdb5fbabf8301bcd16 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:50 +0100 Subject: [PATCH 3066/4122] RDMA/usnic: remove FOLL_FORCE usage GUP now supports reliable R/O long-term pinning in COW mappings, such that we break COW early. MAP_SHARED VMAs only use the shared zeropage so far in one corner case (DAXFS file with holes), which can be ignored because GUP does not support long-term pinning in fsdax (see check_vma_flags()). Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop using FOLL_FORCE, which is really only for ptrace access. Link: https://lkml.kernel.org/r/20221116102659.70287-12-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Jason Gunthorpe Cc: Christian Benvenuti Cc: Nelson Escobar Cc: Leon Romanovsky Signed-off-by: Andrew Morton --- drivers/infiniband/hw/usnic/usnic_uiom.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 67923ced6e2d..c301b3be9f30 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -85,6 +85,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int dmasync, struct usnic_uiom_reg *uiomr) { struct list_head *chunk_list = &uiomr->chunk_list; + unsigned int gup_flags = FOLL_LONGTERM; struct page **page_list; struct scatterlist *sg; struct usnic_uiom_chunk *chunk; @@ -96,7 +97,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int off; int i; dma_addr_t pa; - unsigned int gup_flags; struct mm_struct *mm; /* @@ -131,8 +131,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, goto out; } - gup_flags = FOLL_WRITE; - gup_flags |= (writable) ? 0 : FOLL_FORCE; + if (writable) + gup_flags |= FOLL_WRITE; cur_base = addr & PAGE_MASK; ret = 0; @@ -140,8 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = pin_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - gup_flags | FOLL_LONGTERM, - page_list, NULL); + gup_flags, page_list, NULL); if (ret < 0) goto out; From 129e636fe9837fcfea68bfd368a07548d9880726 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:51 +0100 Subject: [PATCH 3067/4122] RDMA/siw: remove FOLL_FORCE usage GUP now supports reliable R/O long-term pinning in COW mappings, such that we break COW early. MAP_SHARED VMAs only use the shared zeropage so far in one corner case (DAXFS file with holes), which can be ignored because GUP does not support long-term pinning in fsdax (see check_vma_flags()). Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop using FOLL_FORCE, which is really only for ptrace access. Link: https://lkml.kernel.org/r/20221116102659.70287-13-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Jason Gunthorpe Cc: Bernard Metzler Cc: Leon Romanovsky Signed-off-by: Andrew Morton --- drivers/infiniband/sw/siw/siw_mem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index 61c17db70d65..b2b33dd3b4fa 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -368,7 +368,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) struct mm_struct *mm_s; u64 first_page_va; unsigned long mlock_limit; - unsigned int foll_flags = FOLL_WRITE; + unsigned int foll_flags = FOLL_LONGTERM; int num_pages, num_chunks, i, rv = 0; if (!can_do_mlock()) @@ -391,8 +391,8 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) mmgrab(mm_s); - if (!writable) - foll_flags |= FOLL_FORCE; + if (writable) + foll_flags |= FOLL_WRITE; mmap_read_lock(mm_s); @@ -423,8 +423,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) while (nents) { struct page **plist = &umem->page_chunk[i].plist[got]; - rv = pin_user_pages(first_page_va, nents, - foll_flags | FOLL_LONGTERM, + rv = pin_user_pages(first_page_va, nents, foll_flags, plist, NULL); if (rv < 0) goto out_sem_up; From 3298de2c66e0276abe6b95041fd3605a377523fc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:52 +0100 Subject: [PATCH 3068/4122] media: videobuf-dma-sg: remove FOLL_FORCE usage GUP now supports reliable R/O long-term pinning in COW mappings, such that we break COW early. MAP_SHARED VMAs only use the shared zeropage so far in one corner case (DAXFS file with holes), which can be ignored because GUP does not support long-term pinning in fsdax (see check_vma_flags()). Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop using FOLL_FORCE, which is really only for ptrace access. Link: https://lkml.kernel.org/r/20221116102659.70287-14-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Daniel Vetter Acked-by: Hans Verkuil Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton --- drivers/media/v4l2-core/videobuf-dma-sg.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f75e5eedeee0..234e9f647c96 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -151,17 +151,16 @@ static void videobuf_dma_init(struct videobuf_dmabuf *dma) static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, int direction, unsigned long data, unsigned long size) { + unsigned int gup_flags = FOLL_LONGTERM; unsigned long first, last; - int err, rw = 0; - unsigned int flags = FOLL_FORCE; + int err; dma->direction = direction; switch (dma->direction) { case DMA_FROM_DEVICE: - rw = READ; + gup_flags |= FOLL_WRITE; break; case DMA_TO_DEVICE: - rw = WRITE; break; default: BUG(); @@ -177,14 +176,11 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, if (NULL == dma->pages) return -ENOMEM; - if (rw == READ) - flags |= FOLL_WRITE; - dprintk(1, "init user [0x%lx+0x%lx => %lu pages]\n", data, size, dma->nr_pages); - err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, - flags | FOLL_LONGTERM, dma->pages, NULL); + err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, gup_flags, + dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; From 7d96eb6a9164607df09c9589ed3ba9ef4e9cc2a6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:53 +0100 Subject: [PATCH 3069/4122] drm/etnaviv: remove FOLL_FORCE usage GUP now supports reliable R/O long-term pinning in COW mappings, such that we break COW early. MAP_SHARED VMAs only use the shared zeropage so far in one corner case (DAXFS file with holes), which can be ignored because GUP does not support long-term pinning in fsdax (see check_vma_flags()). commit cd5297b0855f ("drm/etnaviv: Use FOLL_FORCE for userptr") documents that FOLL_FORCE | FOLL_WRITE was really only used for reliable R/O pinning. Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop using FOLL_FORCE, which is really only for ptrace access. Link: https://lkml.kernel.org/r/20221116102659.70287-15-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Daniel Vetter Cc: Lucas Stach Cc: Russell King Cc: Christian Gmeiner Cc: David Airlie Signed-off-by: Andrew Morton --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index cc386f8a7116..efe2240945d0 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -638,6 +638,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) struct page **pvec = NULL; struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr; int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT; + unsigned int gup_flags = FOLL_LONGTERM; might_lock_read(¤t->mm->mmap_lock); @@ -648,14 +649,15 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) if (!pvec) return -ENOMEM; + if (!userptr->ro) + gup_flags |= FOLL_WRITE; + do { unsigned num_pages = npages - pinned; uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE; struct page **pages = pvec + pinned; - ret = pin_user_pages_fast(ptr, num_pages, - FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM, - pages); + ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages); if (ret < 0) { unpin_user_pages(pvec, pinned); kvfree(pvec); From 70b96f24a441e5a7e0853e3893edd9dc58b67996 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:54 +0100 Subject: [PATCH 3070/4122] media: pci/ivtv: remove FOLL_FORCE usage FOLL_FORCE is really only for ptrace access. R/O pinning a page is supposed to fail if the VMA misses proper access permissions (no VM_READ). Let's just remove FOLL_FORCE usage here; there would have to be a pretty good reason to allow arbitrary drivers to R/O pin pages in a PROT_NONE VMA. Most probably, FOLL_FORCE usage is just some legacy leftover. Link: https://lkml.kernel.org/r/20221116102659.70287-16-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Hans Verkuil Cc: Andy Walls Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton --- drivers/media/pci/ivtv/ivtv-udma.c | 2 +- drivers/media/pci/ivtv/ivtv-yuv.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index 210be8290f24..99b9f55ca829 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -115,7 +115,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, /* Pin user pages for DMA Xfer */ err = pin_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, - dma->map, FOLL_FORCE); + dma->map, 0); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index 4ba10c34a16a..582146f8d70d 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -63,12 +63,11 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma, /* Pin user pages for DMA Xfer */ y_pages = pin_user_pages_unlocked(y_dma.uaddr, - y_dma.page_count, &dma->map[0], FOLL_FORCE); + y_dma.page_count, &dma->map[0], 0); uv_pages = 0; /* silence gcc. value is set and consumed only if: */ if (y_pages == y_dma.page_count) { uv_pages = pin_user_pages_unlocked(uv_dma.uaddr, - uv_dma.page_count, &dma->map[y_pages], - FOLL_FORCE); + uv_dma.page_count, &dma->map[y_pages], 0); } if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) { From cb78a634f3f7ff743e19fbffcb72d794e4bd7f73 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:55 +0100 Subject: [PATCH 3071/4122] mm/frame-vector: remove FOLL_FORCE usage FOLL_FORCE is really only for ptrace access. According to commit 707947247e95 ("media: videobuf2-vmalloc: get_userptr: buffers are always writable"), get_vaddr_frames() currently pins all pages writable as a workaround for issues with read-only buffers. FOLL_FORCE, however, seems to be a legacy leftover as it predates commit 707947247e95 ("media: videobuf2-vmalloc: get_userptr: buffers are always writable"). Let's just remove it. Once the read-only buffer issue has been resolved, FOLL_WRITE could again be set depending on the DMA direction. Link: https://lkml.kernel.org/r/20221116102659.70287-17-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Daniel Vetter Acked-by: Hans Verkuil Acked-by: Tomasz Figa Cc: Marek Szyprowski Cc: Marek Szyprowski Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton --- drivers/media/common/videobuf2/frame_vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..062e98148c53 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -50,7 +50,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, start = untagged_addr(start); ret = pin_user_pages_fast(start, nr_frames, - FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); if (ret > 0) { vec->got_ref = true; From c098ce73c247a0e36d8a6b8cfdc7d05e4bc81bd0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:56 +0100 Subject: [PATCH 3072/4122] drm/exynos: remove FOLL_FORCE usage FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages using unpin_user_pages_dirty_lock(true), the assumption is that all these pages are writable. FOLL_FORCE in this case seems to be a legacy leftover. Let's just remove it. Link: https://lkml.kernel.org/r/20221116102659.70287-18-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Daniel Vetter Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: David Airlie Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 471fd6c8135f..e19c2ceb3759 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -477,7 +477,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d, } ret = pin_user_pages_fast(start, npages, - FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + FOLL_WRITE | FOLL_LONGTERM, g2d_userptr->pages); if (ret != npages) { DRM_DEV_ERROR(g2d->dev, From 20ea7783236c374ddb7f201132a5fb9624563a77 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:57 +0100 Subject: [PATCH 3073/4122] RDMA/hw/qib/qib_user_pages: remove FOLL_FORCE usage FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages using unpin_user_pages_dirty_lock(true), the assumption is that all these pages are writable. FOLL_FORCE in this case seems to be a legacy leftover. Let's just remove it. Link: https://lkml.kernel.org/r/20221116102659.70287-19-david@redhat.com Signed-off-by: David Hildenbrand Cc: Dennis Dalessandro Cc: Jason Gunthorpe Cc: Leon Romanovsky Signed-off-by: Andrew Morton --- drivers/infiniband/hw/qib/qib_user_pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index f4b5f05058e4..f693bc753b6b 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -110,7 +110,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = pin_user_pages(start_page + got * PAGE_SIZE, num_pages - got, - FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE, + FOLL_LONGTERM | FOLL_WRITE, p + got, NULL); if (ret < 0) { mmap_read_unlock(current->mm); From 052d9b0f7ae1200b4a0783cf934ee4a987d37fd7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 16 Nov 2022 11:26:58 +0100 Subject: [PATCH 3074/4122] habanalabs: remove FOLL_FORCE usage FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages using unpin_user_pages_dirty_lock(true), the assumption is that all these pages are writable. FOLL_FORCE in this case seems to be due to copy-and-past from other drivers. Let's just remove it. Link: https://lkml.kernel.org/r/20221116102659.70287-20-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oded Gabbay Cc: Oded Gabbay Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- drivers/misc/habanalabs/common/memory.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index ef28f3b37b93..e35cca96bbef 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -2312,8 +2312,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, if (!userptr->pages) return -ENOMEM; - rc = pin_user_pages_fast(start, npages, - FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + rc = pin_user_pages_fast(start, npages, FOLL_WRITE | FOLL_LONGTERM, userptr->pages); if (rc != npages) { From f347454d034184b4f0a2caf6e14daf7848cea01c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 31 Oct 2022 16:25:24 +0100 Subject: [PATCH 3075/4122] mm/gup: disallow FOLL_FORCE|FOLL_WRITE on hugetlb mappings hugetlb does not support fake write-faults (write faults without write permissions). However, we are currently able to trigger a FAULT_FLAG_WRITE fault on a VMA without VM_WRITE. If we'd ever want to support FOLL_FORCE|FOLL_WRITE, we'd have to teach hugetlb to: (1) Leave the page mapped R/O after the fake write-fault, like maybe_mkwrite() does. (2) Allow writing to an exclusive anon page that's mapped R/O when FOLL_FORCE is set, like can_follow_write_pte(). E.g., __follow_hugetlb_must_fault() needs adjustment. For now, it's not clear if that added complexity is really required. History tolds us that FOLL_FORCE is dangerous and that we better limit its use to a bare minimum. -------------------------------------------------------------------------- #include #include #include #include #include #include #include #include int main(int argc, char **argv) { char *map; int mem_fd; map = mmap(NULL, 2 * 1024 * 1024u, PROT_READ, MAP_PRIVATE|MAP_ANON|MAP_HUGETLB|MAP_HUGE_2MB, -1, 0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed: %d\n", errno); return 1; } mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) { fprintf(stderr, "open(/proc/self/mem) failed: %d\n", errno); return 1; } if (pwrite(mem_fd, "0", 1, (uintptr_t) map) == 1) { fprintf(stderr, "write() succeeded, which is unexpected\n"); return 1; } printf("write() failed as expected: %d\n", errno); return 0; } -------------------------------------------------------------------------- Fortunately, we have a sanity check in hugetlb_wp() in place ever since commit 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared mappings"), that bails out instead of silently mapping a page writable in a !PROT_WRITE VMA. Consequently, above reproducer triggers a warning, similar to the one reported by szsbot: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 3612 at mm/hugetlb.c:5313 hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313 Modules linked in: CPU: 1 PID: 3612 Comm: syz-executor250 Not tainted 6.1.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313 Code: ea 03 80 3c 02 00 0f 85 31 14 00 00 49 8b 5f 20 31 ff 48 89 dd 83 e5 02 48 89 ee e8 70 ab b7 ff 48 85 ed 75 5b e8 76 ae b7 ff <0f> 0b 41 bd 40 00 00 00 e8 69 ae b7 ff 48 b8 00 00 00 00 00 fc ff RSP: 0018:ffffc90003caf620 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000008640070 RCX: 0000000000000000 RDX: ffff88807b963a80 RSI: ffffffff81c4ed2a RDI: 0000000000000007 RBP: 0000000000000000 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000008c07e R12: ffff888023805800 R13: 0000000000000000 R14: ffffffff91217f38 R15: ffff88801d4b0360 FS: 0000555555bba300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff7a47a1b8 CR3: 000000002378d000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hugetlb_no_page mm/hugetlb.c:5755 [inline] hugetlb_fault+0x19cc/0x2060 mm/hugetlb.c:5874 follow_hugetlb_page+0x3f3/0x1850 mm/hugetlb.c:6301 __get_user_pages+0x2cb/0xf10 mm/gup.c:1202 __get_user_pages_locked mm/gup.c:1434 [inline] __get_user_pages_remote+0x18f/0x830 mm/gup.c:2187 get_user_pages_remote+0x84/0xc0 mm/gup.c:2260 __access_remote_vm+0x287/0x6b0 mm/memory.c:5517 ptrace_access_vm+0x181/0x1d0 kernel/ptrace.c:61 generic_ptrace_pokedata kernel/ptrace.c:1323 [inline] ptrace_request+0xb46/0x10c0 kernel/ptrace.c:1046 arch_ptrace+0x36/0x510 arch/x86/kernel/ptrace.c:828 __do_sys_ptrace kernel/ptrace.c:1296 [inline] __se_sys_ptrace kernel/ptrace.c:1269 [inline] __x64_sys_ptrace+0x178/0x2a0 kernel/ptrace.c:1269 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] So let's silence that warning by teaching GUP code that FOLL_FORCE -- so far -- does not apply to hugetlb. Note that FOLL_FORCE for read-access seems to be working as expected. The assumption is that this has been broken forever, only ever since above commit, we actually detect the wrong handling and WARN_ON_ONCE(). I assume this has been broken at least since 2014, when mm/gup.c came to life. I failed to come up with a suitable Fixes tag quickly. Link: https://lkml.kernel.org/r/20221031152524.173644-1-david@redhat.com Fixes: 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared mappings") Signed-off-by: David Hildenbrand Reported-by: Cc: Mike Kravetz Cc: Peter Xu Cc: John Hubbard Cc: Jason Gunthorpe Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/gup.c b/mm/gup.c index 39c84a200f06..90ae44f24870 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1009,6 +1009,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; + /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ + if (is_vm_hugetlb_page(vma)) + return -EFAULT; /* * We used to let the write,force case do COW in a * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could From 931b6a8b36a2de3985eca27e758900e70cd99779 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 15 Nov 2022 18:38:08 -0700 Subject: [PATCH 3076/4122] mm: multi-gen LRU: remove NULL checks on NODE_DATA() NODE_DATA() is preallocated for all possible nodes after commit 09f49dca570a ("mm: handle uninitialized numa nodes gracefully"). Checking its return value against NULL is now unnecessary. Link: https://lkml.kernel.org/r/20221116013808.3995280-2-yuzhao@google.com Signed-off-by: Yu Zhao Signed-off-by: Andrew Morton --- mm/vmscan.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 82f32c929b11..805fa51b175c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3166,7 +3166,7 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) if (memcg) { struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; - /* for hotadd_new_pgdat() */ + /* see the comment in mem_cgroup_lruvec() */ if (!lruvec->pgdat) lruvec->pgdat = pgdat; @@ -3175,7 +3175,7 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) #endif VM_WARN_ON_ONCE(!mem_cgroup_disabled()); - return pgdat ? &pgdat->__lruvec : NULL; + return &pgdat->__lruvec; } static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) @@ -3239,9 +3239,6 @@ void lru_gen_add_mm(struct mm_struct *mm) for_each_node_state(nid, N_MEMORY) { struct lruvec *lruvec = get_lruvec(memcg, nid); - if (!lruvec) - continue; - /* the first addition since the last iteration */ if (lruvec->mm_state.tail == &mm_list->fifo) lruvec->mm_state.tail = &mm->lru_gen.list; @@ -3271,9 +3268,6 @@ void lru_gen_del_mm(struct mm_struct *mm) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); - if (!lruvec) - continue; - /* where the last iteration ended (exclusive) */ if (lruvec->mm_state.tail == &mm->lru_gen.list) lruvec->mm_state.tail = lruvec->mm_state.tail->next; @@ -5348,9 +5342,6 @@ static void lru_gen_change_state(bool enabled) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); - if (!lruvec) - continue; - spin_lock_irq(&lruvec->lru_lock); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); From 4c74b65f478dc9353780a6be17fc82f1b06cea80 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 16 Nov 2022 09:23:45 +0800 Subject: [PATCH 3077/4122] mm/migrate.c: stop using 0 as NULL pointer mm/migrate.c:1198:24: warning: Using plain integer as NULL pointer Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3080 Link: https://lkml.kernel.org/r/20221116012345.84870-1-yang.lee@linux.alibaba.com Signed-off-by: Yang Li Reported-by: Abaci Robot Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 4aa3b6d4f67c..3be90351ad1d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1180,7 +1180,7 @@ static int unmap_and_move(new_page_t get_new_page, return -ENOMEM; dst = page_folio(newpage); - dst->private = 0; + dst->private = NULL; rc = __unmap_and_move(src, dst, force, mode); if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(&dst->page, reason); From 47939359add5242d27ee6a30e8bcb0cef15ba45c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 17 Nov 2022 23:13:26 +0900 Subject: [PATCH 3078/4122] zram: remove unused stats fields We don't show num_reads and num_writes since we removed corresponding sysfs nodes in 2017. Block layer stats are exposed via /sys/block/zramX/stat file. However, we still increment those atomic vars and store them in zram stats. Remove leftovers. Link: https://lkml.kernel.org/r/20221117141326.1105181-1-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 2 -- drivers/block/zram/zram_drv.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9d33801e8ba8..e290d6d97047 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1981,11 +1981,9 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int ret; if (!op_is_write(op)) { - atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); } else { - atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset, bio); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 473325415a74..c5254626f051 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -76,8 +76,6 @@ struct zram_table_entry { struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ - atomic64_t num_reads; /* failed + successful */ - atomic64_t num_writes; /* --do-- */ atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ From 91a99f1d1248bdde674b66e20ac472ec76f6a202 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 17 Nov 2022 16:29:15 -0500 Subject: [PATCH 3079/4122] selftests/vm: use memfd for hugepage-mmap test This test was overlooked with a hard-coded mntpoint path in test when we're removing the hugetlb mntpoint in commit 0796c7b8be84. Fix it up so the test can keep running. Link: https://lkml.kernel.org/r/Y3aojfUC2nSwbCzB@x1n Fixes: 0796c7b8be84 ("selftests/vm: drop mnt point for hugetlb in run_vmtests.sh") Signed-off-by: Peter Xu Reported-by: Joel Savitz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugepage-mmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c index 93f9e7b81331..955ef87f382c 100644 --- a/tools/testing/selftests/vm/hugepage-mmap.c +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -16,14 +16,13 @@ * range. * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ - +#define _GNU_SOURCE #include #include #include #include #include -#define FILE_NAME "huge/hugepagefile" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -67,16 +66,16 @@ int main(void) void *addr; int fd, ret; - fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); - unlink(FILE_NAME); + close(fd); exit(1); } @@ -87,7 +86,6 @@ int main(void) munmap(addr, LENGTH); close(fd); - unlink(FILE_NAME); return ret; } From c3e58a70425ac6ddaae1529c8146e88b4f7252bb Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 18 Nov 2022 10:17:13 +0000 Subject: [PATCH 3080/4122] mm/page_alloc: always remove pages from temporary list Patch series "Leave IRQs enabled for per-cpu page allocations", v3. This patch (of 2): free_unref_page_list() has neglected to remove pages properly from the list of pages to free since forever. It works by coincidence because list_add happened to do the right thing adding the pages to just the PCP lists. However, a later patch added pages to either the PCP list or the zone list but only properly deleted the page from the list in one path leading to list corruption and a subsequent failure. As a preparation patch, always delete the pages from one list properly before adding to another. On its own, this fixes nothing although it adds a fractional amount of overhead but is critical to the next patch. Link: https://lkml.kernel.org/r/20221118101714.19590-1-mgorman@techsingularity.net Link: https://lkml.kernel.org/r/20221118101714.19590-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Reported-by: Hugh Dickins Reviewed-by: Vlastimil Babka Cc: Marcelo Tosatti Cc: Marek Szyprowski Cc: Michal Hocko Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/page_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c33b6963c2d7..ca889fb53cbb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3553,6 +3553,8 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { struct zone *zone = page_zone(page); + list_del(&page->lru); + /* Different zone, different pcp lock. */ if (zone != locked_zone) { if (pcp) From 5749077415994eb02d660b2559b9d8278521e73d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 18 Nov 2022 10:17:14 +0000 Subject: [PATCH 3081/4122] mm/page_alloc: leave IRQs enabled for per-cpu page allocations The pcp_spin_lock_irqsave protecting the PCP lists is IRQ-safe as a task allocating from the PCP must not re-enter the allocator from IRQ context. In each instance where IRQ-reentrancy is possible, the lock is acquired using pcp_spin_trylock_irqsave() even though IRQs are disabled and re-entrancy is impossible. Demote the lock to pcp_spin_lock avoids an IRQ disable/enable in the common case at the cost of some IRQ allocations taking a slower path. If the PCP lists need to be refilled, the zone lock still needs to disable IRQs but that will only happen on PCP refill and drain. If an IRQ is raised when a PCP allocation is in progress, the trylock will fail and fallback to using the buddy lists directly. Note that this may not be a universal win if an interrupt-intensive workload also allocates heavily from interrupt context and contends heavily on the zone->lock as a result. [mgorman@techsingularity.net: migratetype might be wrong if a PCP was locked] Link: https://lkml.kernel.org/r/20221122131229.5263-2-mgorman@techsingularity.net [yuzhao@google.com: reported lockdep issue on IO completion from softirq] [hughd@google.com: fix list corruption, lock improvements, micro-optimsations] Link: https://lkml.kernel.org/r/20221118101714.19590-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Reviewed-by: Vlastimil Babka Cc: Marcelo Tosatti Cc: Marek Szyprowski Cc: Michal Hocko Signed-off-by: Andrew Morton --- mm/page_alloc.c | 124 +++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ca889fb53cbb..d9d83254c485 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock); _ret; \ }) -#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \ +#define pcpu_spin_trylock(type, member, ptr) \ ({ \ type *_ret; \ pcpu_task_pin(); \ _ret = this_cpu_ptr(ptr); \ - spin_lock_irqsave(&_ret->member, flags); \ - _ret; \ -}) - -#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \ -({ \ - type *_ret; \ - pcpu_task_pin(); \ - _ret = this_cpu_ptr(ptr); \ - if (!spin_trylock_irqsave(&_ret->member, flags)) { \ + if (!spin_trylock(&_ret->member)) { \ pcpu_task_unpin(); \ _ret = NULL; \ } \ @@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock); pcpu_task_unpin(); \ }) -#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \ -({ \ - spin_unlock_irqrestore(&ptr->member, flags); \ - pcpu_task_unpin(); \ -}) - /* struct per_cpu_pages specific helpers. */ #define pcp_spin_lock(ptr) \ pcpu_spin_lock(struct per_cpu_pages, lock, ptr) -#define pcp_spin_lock_irqsave(ptr, flags) \ - pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags) - -#define pcp_spin_trylock_irqsave(ptr, flags) \ - pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags) +#define pcp_spin_trylock(ptr) \ + pcpu_spin_trylock(struct per_cpu_pages, lock, ptr) #define pcp_spin_unlock(ptr) \ pcpu_spin_unlock(lock, ptr) -#define pcp_spin_unlock_irqrestore(ptr, flags) \ - pcpu_spin_unlock_irqrestore(lock, ptr, flags) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -1554,6 +1534,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp, int pindex) { + unsigned long flags; int min_pindex = 0; int max_pindex = NR_PCP_LISTS - 1; unsigned int order; @@ -1569,8 +1550,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* Ensure requested pindex is drained first. */ pindex = pindex - 1; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); isolated_pageblocks = has_isolate_pageblock(zone); while (count > 0) { @@ -1618,7 +1598,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, } while (count > 0 && !list_empty(list)); } - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void free_one_page(struct zone *zone, @@ -3132,10 +3112,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + unsigned long flags; int i, allocated = 0; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, alloc_flags); @@ -3169,7 +3149,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); return allocated; } @@ -3186,16 +3166,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - unsigned long flags; - - /* - * free_pcppages_bulk expects IRQs disabled for zone->lock - * so even though pcp->lock is not intended to be IRQ-safe, - * it's needed in this context. - */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } #endif @@ -3209,12 +3182,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); if (pcp->count) { - unsigned long flags; - - /* See drain_zone_pages on why this is disabling IRQs */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, pcp->count, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } @@ -3480,7 +3450,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, */ void free_unref_page(struct page *page, unsigned int order) { - unsigned long flags; unsigned long __maybe_unused UP_flags; struct per_cpu_pages *pcp; struct zone *zone; @@ -3508,10 +3477,10 @@ void free_unref_page(struct page *page, unsigned int order) zone = page_zone(page); pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { free_unref_page_commit(zone, pcp, page, migratetype, order); - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); } else { free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); } @@ -3523,10 +3492,10 @@ void free_unref_page(struct page *page, unsigned int order) */ void free_unref_page_list(struct list_head *list) { + unsigned long __maybe_unused UP_flags; struct page *page, *next; struct per_cpu_pages *pcp = NULL; struct zone *locked_zone = NULL; - unsigned long flags; int batch_count = 0; int migratetype; @@ -3554,21 +3523,36 @@ void free_unref_page_list(struct list_head *list) struct zone *zone = page_zone(page); list_del(&page->lru); + migratetype = get_pcppage_migratetype(page); /* Different zone, different pcp lock. */ if (zone != locked_zone) { - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } + /* + * trylock is necessary as pages may be getting freed + * from IRQ or SoftIRQ context after an IO completion. + */ + pcp_trylock_prepare(UP_flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (unlikely(!pcp)) { + pcp_trylock_finish(UP_flags); + free_one_page(zone, page, page_to_pfn(page), + 0, migratetype, FPI_NONE); + locked_zone = NULL; + continue; + } locked_zone = zone; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); + batch_count = 0; } /* * Non-isolated types over MIGRATE_PCPTYPES get added * to the MIGRATE_MOVABLE pcp list. */ - migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) migratetype = MIGRATE_MOVABLE; @@ -3576,18 +3560,23 @@ void free_unref_page_list(struct list_head *list) free_unref_page_commit(zone, pcp, page, migratetype, 0); /* - * Guard against excessive IRQ disabled times when we get - * a large list of pages to free. + * Guard against excessive lock hold times when freeing + * a large list of pages. Lock will be reacquired if + * necessary on the next iteration. */ if (++batch_count == SWAP_CLUSTER_MAX) { - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); batch_count = 0; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); + pcp = NULL; + locked_zone = NULL; } } - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } } /* @@ -3788,15 +3777,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct per_cpu_pages *pcp; struct list_head *list; struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; - /* - * spin_trylock may fail due to a parallel drain. In the future, the - * trylock will also protect against IRQ reentrancy. - */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) { pcp_trylock_finish(UP_flags); return NULL; @@ -3810,7 +3795,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, pcp->free_factor >>= 1; list = &pcp->lists[order_to_pindex(migratetype, order)]; page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); @@ -5381,7 +5366,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, struct page **page_array) { struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; struct zone *zone; struct zoneref *z; @@ -5463,9 +5447,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!zone)) goto failed; - /* Is a parallel drain in progress? */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) goto failed_irq; @@ -5484,7 +5468,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!page)) { /* Try and allocate at least one page */ if (!nr_account) { - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); goto failed_irq; } break; @@ -5499,7 +5483,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nr_populated++; } - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); From a4bafffb5dc5be6c7a3b77b2de0cbaf6776a3c8b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 22 Nov 2022 13:12:29 +0000 Subject: [PATCH 3082/4122] mm/page_alloc: simplify locking during free_unref_page_list While freeing a large list, the zone lock will be released and reacquired to avoid long hold times since commit c24ad77d962c ("mm/page_alloc.c: avoid excessive IRQ disabled times in free_unref_page_list()"). As suggested by Vlastimil Babka, the lockrelease/reacquire logic can be simplified by reusing the logic that acquires a different lock when changing zones. Link: https://lkml.kernel.org/r/20221122131229.5263-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/page_alloc.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d9d83254c485..5ab9dd29ef7e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3525,13 +3525,19 @@ void free_unref_page_list(struct list_head *list) list_del(&page->lru); migratetype = get_pcppage_migratetype(page); - /* Different zone, different pcp lock. */ - if (zone != locked_zone) { + /* + * Either different zone requiring a different pcp lock or + * excessive lock hold times when freeing a large list of + * pages. + */ + if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { if (pcp) { pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); } + batch_count = 0; + /* * trylock is necessary as pages may be getting freed * from IRQ or SoftIRQ context after an IO completion. @@ -3546,7 +3552,6 @@ void free_unref_page_list(struct list_head *list) continue; } locked_zone = zone; - batch_count = 0; } /* @@ -3558,19 +3563,7 @@ void free_unref_page_list(struct list_head *list) trace_mm_page_free_batched(page); free_unref_page_commit(zone, pcp, page, migratetype, 0); - - /* - * Guard against excessive lock hold times when freeing - * a large list of pages. Lock will be reacquired if - * necessary on the next iteration. - */ - if (++batch_count == SWAP_CLUSTER_MAX) { - pcp_spin_unlock(pcp); - pcp_trylock_finish(UP_flags); - batch_count = 0; - pcp = NULL; - locked_zone = NULL; - } + batch_count++; } if (pcp) { From 6dd8fe86fa84729538d8bed3149faf9c5886bb5b Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Thu, 17 Nov 2022 23:30:52 -0800 Subject: [PATCH 3083/4122] ext4: convert move_extent_per_page() to use folios Patch series "Removing the try_to_release_page() wrapper", v3. This patchset replaces the remaining calls of try_to_release_page() with the folio equivalent: filemap_release_folio(). This allows us to remove the wrapper. This patch (of 4): Convert move_extent_per_page() to use folios. This change removes 5 calls to compound_head() and is in preparation for the removal of the try_to_release_page() wrapper. Link: https://lkml.kernel.org/r/20221118073055.55694-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20221118073055.55694-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/ext4/move_extent.c | 52 ++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 044e34cd835c..8dbb87edf24c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -253,6 +253,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, { struct inode *orig_inode = file_inode(o_filp); struct page *pagep[2] = {NULL, NULL}; + struct folio *folio[2] = {NULL, NULL}; handle_t *handle; ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; @@ -313,6 +314,13 @@ again: * hold page's lock, if it is still the case data copy is not * necessary, just swap data blocks between orig and donor. */ + folio[0] = page_folio(pagep[0]); + folio[1] = page_folio(pagep[1]); + + VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]); + VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]); + VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]); + if (unwritten) { ext4_double_down_write_data_sem(orig_inode, donor_inode); /* If any of extents in range became initialized we have to @@ -331,10 +339,10 @@ again: ext4_double_up_write_data_sem(orig_inode, donor_inode); goto data_copy; } - if ((page_has_private(pagep[0]) && - !try_to_release_page(pagep[0], 0)) || - (page_has_private(pagep[1]) && - !try_to_release_page(pagep[1], 0))) { + if ((folio_has_private(folio[0]) && + !filemap_release_folio(folio[0], 0)) || + (folio_has_private(folio[1]) && + !filemap_release_folio(folio[1], 0))) { *err = -EBUSY; goto drop_data_sem; } @@ -344,19 +352,21 @@ again: block_len_in_page, 1, err); drop_data_sem: ext4_double_up_write_data_sem(orig_inode, donor_inode); - goto unlock_pages; + goto unlock_folios; } data_copy: - *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); + *err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size); if (*err) - goto unlock_pages; + goto unlock_folios; /* At this point all buffers in range are uptodate, old mapping layout * is no longer required, try to drop it now. */ - if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || - (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { + if ((folio_has_private(folio[0]) && + !filemap_release_folio(folio[0], 0)) || + (folio_has_private(folio[1]) && + !filemap_release_folio(folio[1], 0))) { *err = -EBUSY; - goto unlock_pages; + goto unlock_folios; } ext4_double_down_write_data_sem(orig_inode, donor_inode); replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, @@ -369,13 +379,13 @@ data_copy: replaced_size = block_len_in_page << orig_inode->i_blkbits; } else - goto unlock_pages; + goto unlock_folios; } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - if (!page_has_buffers(pagep[0])) - create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); - bh = page_buffers(pagep[0]); + if (!folio_buffers(folio[0])) + create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0); + bh = folio_buffers(folio[0]); for (i = 0; i < data_offset_in_page; i++) bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { @@ -385,7 +395,7 @@ data_copy: bh = bh->b_this_page; } if (!*err) - *err = block_commit_write(pagep[0], from, from + replaced_size); + *err = block_commit_write(&folio[0]->page, from, from + replaced_size); if (unlikely(*err < 0)) goto repair_branches; @@ -395,11 +405,11 @@ data_copy: *err = ext4_jbd2_inode_add_write(handle, orig_inode, (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); -unlock_pages: - unlock_page(pagep[0]); - put_page(pagep[0]); - unlock_page(pagep[1]); - put_page(pagep[1]); +unlock_folios: + folio_unlock(folio[0]); + folio_put(folio[0]); + folio_unlock(folio[1]); + folio_put(folio[1]); stop_journal: ext4_journal_stop(handle); if (*err == -ENOSPC && @@ -430,7 +440,7 @@ repair_branches: *err = -EIO; } replaced_count = 0; - goto unlock_pages; + goto unlock_folios; } /** From 64ab3195ea077eaeedc8b382939c3dc5ca56f369 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Thu, 17 Nov 2022 23:30:53 -0800 Subject: [PATCH 3084/4122] khugepage: replace try_to_release_page() with filemap_release_folio() Replace some calls with their folio equivalents. This change removes 4 calls to compound_head() and is in preparation for the removal of the try_to_release_page() wrapper. Link: https://lkml.kernel.org/r/20221118073055.55694-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- mm/khugepaged.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0d8f548d9d7e..913b0f489352 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1789,6 +1789,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, xas_set(&xas, start); for (index = start; index < end; index++) { struct page *page = xas_next(&xas); + struct folio *folio; VM_BUG_ON(index != xas.xa_index); if (is_shmem) { @@ -1815,8 +1816,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } if (xa_is_value(page) || !PageUptodate(page)) { - struct folio *folio; - xas_unlock_irq(&xas); /* swap in or instantiate fallocated page */ if (shmem_get_folio(mapping->host, index, @@ -1904,13 +1903,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, goto out_unlock; } - if (page_mapping(page) != mapping) { + folio = page_folio(page); + + if (folio_mapping(folio) != mapping) { result = SCAN_TRUNCATED; goto out_unlock; } - if (!is_shmem && (PageDirty(page) || - PageWriteback(page))) { + if (!is_shmem && (folio_test_dirty(folio) || + folio_test_writeback(folio))) { /* * khugepaged only works on read-only fd, so this * page is dirty because it hasn't been flushed @@ -1920,20 +1921,20 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, goto out_unlock; } - if (isolate_lru_page(page)) { + if (folio_isolate_lru(folio)) { result = SCAN_DEL_PAGE_LRU; goto out_unlock; } - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) { + if (folio_has_private(folio) && + !filemap_release_folio(folio, GFP_KERNEL)) { result = SCAN_PAGE_HAS_PRIVATE; - putback_lru_page(page); + folio_putback_lru(folio); goto out_unlock; } - if (page_mapped(page)) - try_to_unmap(page_folio(page), + if (folio_mapped(folio)) + try_to_unmap(folio, TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH); xas_lock_irq(&xas); From ac5efa782041670b63a05c36d92d02a80e50bb63 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Thu, 17 Nov 2022 23:30:54 -0800 Subject: [PATCH 3085/4122] memory-failure: convert truncate_error_page() to use folio Replace try_to_release_page() with filemap_release_folio(). This change is in preparation for the removal of the try_to_release_page() wrapper. Link: https://lkml.kernel.org/r/20221118073055.55694-4-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Acked-by: Naoya Horiguchi Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- mm/memory-failure.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 63d8501001c6..2e62940c7bae 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -840,12 +840,13 @@ static int truncate_error_page(struct page *p, unsigned long pfn, int ret = MF_FAILED; if (mapping->a_ops->error_remove_page) { + struct folio *folio = page_folio(p); int err = mapping->a_ops->error_remove_page(mapping, p); if (err != 0) { pr_info("%#lx: Failed to punch page: %d\n", pfn, err); - } else if (page_has_private(p) && - !try_to_release_page(p, GFP_NOIO)) { + } else if (folio_has_private(folio) && + !filemap_release_folio(folio, GFP_NOIO)) { pr_info("%#lx: failed to release buffers\n", pfn); } else { ret = MF_RECOVERED; From 7438899b0b8df16a73d9a5d49b2a345d165adfe8 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Thu, 17 Nov 2022 23:30:55 -0800 Subject: [PATCH 3086/4122] folio-compat: remove try_to_release_page() There are no more callers of try_to_release_page(), so remove it. This saves 85 bytes of kernel text. Link: https://lkml.kernel.org/r/20221118073055.55694-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 1 - mm/folio-compat.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b33ab86d5dca..2ec0ca1f3d38 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1105,7 +1105,6 @@ void __filemap_remove_folio(struct folio *folio, void *shadow); void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); -int try_to_release_page(struct page *page, gfp_t gfp); bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index cbfe51091c39..86933fa8f3e1 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -118,12 +118,6 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, } EXPORT_SYMBOL(grab_cache_page_write_begin); -int try_to_release_page(struct page *page, gfp_t gfp) -{ - return filemap_release_folio(page_folio(page), gfp); -} -EXPORT_SYMBOL(try_to_release_page); - int isolate_lru_page(struct page *page) { if (WARN_RATELIMIT(PageTail(page), "trying to isolate tail page")) From f6fbb8b23b8155a6f7af1349b4595d0373167636 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 10 Nov 2022 12:35:41 +0100 Subject: [PATCH 3087/4122] Revert "kmsan: unpoison @tlb in arch_tlb_gather_mmu()" This reverts commit ac801e7e252c5588325e3c983c7d4167fc68c024. The patch in question was picked to -mm from the KMSAN v6 patch series (https://lore.kernel.org/linux-mm/20220905122452.2258262-1-glider@google.com/) and sneaked into mainline despite its removal from the v7 series (https://lore.kernel.org/linux-mm/20220915150417.722975-1-glider@google.com/) Currently KMSAN does not warn about origin chains hitting the maximum depth, so keeping @tlb poisoned won't result in any inconveniences. Link: https://lkml.kernel.org/r/20221110113541.1844156-1-glider@google.com Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Marco Elver Cc: Peter Zijlstra (Intel) Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/mmu_gather.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index dd1f8ca40cb5..8247553a69c2 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -299,15 +298,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { - /* - * struct mmu_gather contains 7 1-bit fields packed into a 32-bit - * unsigned int value. The remaining 25 bits remain uninitialized - * and are never used, but KMSAN updates the origin for them in - * zap_pXX_range() in mm/memory.c, thus creating very long origin - * chains. This is technically correct, but consumes too much memory. - * Unpoisoning the whole structure will prevent creating such chains. - */ - kmsan_unpoison_memory(tlb, sizeof(*tlb)); tlb->mm = mm; tlb->fullmm = fullmm; From 845aad0aa038507c166cdc48fbf2e5d863fe73dc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 18 Nov 2022 21:51:17 -0800 Subject: [PATCH 3088/4122] maple_tree: allow TEST_MAPLE_TREE only when DEBUG_KERNEL is set Prevent a kconfig warning that is caused by TEST_MAPLE_TREE by adding a "depends on" clause for TEST_MAPLE_TREE since 'select' does not follow any kconfig dependencies. WARNING: unmet direct dependencies detected for DEBUG_MAPLE_TREE Depends on [n]: DEBUG_KERNEL [=n] Selected by [y]: - TEST_MAPLE_TREE [=y] && RUNTIME_TESTING_MENU [=y] Link: https://lkml.kernel.org/r/20221119055117.14094-1-rdunlap@infradead.org Fixes: 120b116208a0 ("maple_tree: reorganize testing to restore module testing") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Reported-by: kernel test robot Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 580e453e284e..a0dc28fdc567 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2244,6 +2244,7 @@ config TEST_XARRAY tristate "Test the XArray code at runtime" config TEST_MAPLE_TREE + depends on DEBUG_KERNEL select DEBUG_MAPLE_TREE tristate "Test the Maple Tree code at runtime" From 8e9d5ead865a1a7af74a444d2f00f1ef4539bfba Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:56 -0800 Subject: [PATCH 3089/4122] mm: add bdi_set_strict_limit() function Patch series "mm/block: add bdi sysfs knobs", v4. At meta network block devices (nbd) are used to implement remote block storage. In testing and during production it has been observed that these network block devices can consume a huge portion of the dirty writeback cache and writeback can take a considerable time. To be able to give stricter limits, I'm proposing the following changes: 1) introduce strictlimit knob Currently the max_ratio knob exists to limit the dirty_memory. However this knob only applies once (dirty_ratio + dirty_background_ratio) / 2 has been reached. With the BDI_CAP_STRICTLIMIT flag, the max_ratio can be applied without reaching that limit. This change exposes that knob. This knob can also be useful for NFS, fuse filesystems and USB devices. 2) Use part of 1000000 internal calculation The max_ratio is based on percentage. With the current machine sizes percentage values can be very high (1% of a 256GB main memory is already 2.5GB). This change uses part of 1000000 instead of percentages for the internal calculations. 3) Introduce two new sysfs knobs: min_bytes and max_bytes. Currently all calculations are based on ratio, but for a user it often more convenient to specify a limit in bytes. The new knobs will not store bytes values, instead they will translate the byte value to a corresponding ratio. As the internal values are now part of 1000, the ratio is closer to the specified value. However the value should be more seen as an approximation as it can fluctuate over time. 3) Introduce two new sysfs knobs: min_ratio_fine and max_ratio_fine. The granularity for the existing sysfs bdi knobs min_ratio and max_ratio is based on percentage values. The new sysfs bdi knobs min_ratio_fine and max_ratio_fine allow to specify the ratio as part of 1 million. This patch (of 20): This adds the bdi_set_strict_limit function to be able to set/unset the BDI_CAP_STRICTLIMIT flag. Link: https://lkml.kernel.org/r/20221119005215.3052436-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20221119005215.3052436-2-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Jens Axboe Cc: Chris Mason Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 439815cc1ab9..9c984ffc8a0a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,6 +104,7 @@ static inline unsigned long wb_stat_error(void) int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7e9d8d857ecc..3745b886722f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -698,6 +698,21 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) +{ + if (strict_limit > 1) + return -EINVAL; + + spin_lock_bh(&bdi_lock); + if (strict_limit) + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + else + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + spin_unlock_bh(&bdi_lock); + + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { From 27bbe9d48d4e298864e18b39f091342c68b81637 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:57 -0800 Subject: [PATCH 3090/4122] mm: add knob /sys/class/bdi//strict_limit Add a new knob to /sys/class/bdi//strict_limit. This new knob allows to set/unset the flag BDI_CAP_STRICTLIMIT in the bdi capabilities. Link: https://lkml.kernel.org/r/20221119005215.3052436-3-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c30419a5e119..a0899cce72ef 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -209,11 +209,40 @@ static ssize_t stable_pages_required_show(struct device *dev, } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strict_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int strict_limit; + ssize_t ret; + + ret = kstrtouint(buf, 10, &strict_limit); + if (ret < 0) + return ret; + + ret = bdi_set_strict_limit(bdi, strict_limit); + if (!ret) + ret = count; + + return ret; +} + +static ssize_t strict_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strict_limit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strict_limit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); From 16b837eb84e6948f92411eb32e97a05f89733ddc Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:58 -0800 Subject: [PATCH 3091/4122] mm: document /sys/class/bdi//strict_limit knob This documents the new /sys/class/bdi//strict_limit knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-4-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 6d2a2fc189dd..68b5d4018c2f 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -55,6 +55,17 @@ Description: mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. + (read-write) +What: /sys/class/bdi//strict_limit +Date: October 2022 +Contact: Stefan Roesch +Description: + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. + (read-write) What: /sys/class/bdi//stable_pages_required Date: January 2008 From ae82291e9ca47c3d6da6b77a00f427754aca413e Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:59 -0800 Subject: [PATCH 3092/4122] mm: use part per 1000000 for bdi ratios To get finer granularity for ratio calculations use part per million instead of percentiles. This is especially important if we want to automatically convert byte values to ratios. Otherwise the values that are actually used can be quite different. This is also important for machines with more main memory (1% of 256GB is already 2.5GB). Link: https://lkml.kernel.org/r/20221119005215.3052436-5-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 3 +++ mm/backing-dev.c | 6 +++--- mm/page-writeback.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9c984ffc8a0a..1b50c028e5ad 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -102,6 +102,9 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a0899cce72ef..90fa517123dc 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -178,7 +178,7 @@ static ssize_t min_ratio_store(struct device *dev, return ret; } -BDI_SHOW(min_ratio, bdi->min_ratio) +BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -197,7 +197,7 @@ static ssize_t max_ratio_store(struct device *dev, return ret; } -BDI_SHOW(max_ratio, bdi->max_ratio) +BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, @@ -809,7 +809,7 @@ int bdi_init(struct backing_dev_info *bdi) kref_init(&bdi->refcnt); bdi->min_ratio = 0; - bdi->max_ratio = 100; + bdi->max_ratio = 100 * BDI_RATIO_SCALE; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3745b886722f..dd98b2654302 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -197,7 +197,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, min *= this_bw; min = div64_ul(min, tot_bw); } - if (max < 100) { + if (max < 100 * BDI_RATIO_SCALE) { max *= this_bw; max = div64_ul(max, tot_bw); } @@ -655,6 +655,8 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned int delta; int ret = 0; + min_ratio *= BDI_RATIO_SCALE; + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; @@ -665,7 +667,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) bdi->min_ratio = min_ratio; } else { delta = min_ratio - bdi->min_ratio; - if (bdi_min_ratio + delta < 100) { + if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) { bdi_min_ratio += delta; bdi->min_ratio = min_ratio; } else { @@ -684,6 +686,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; + max_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { @@ -775,15 +778,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); - wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; + wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); wb_thresh *= numerator; wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); - wb_thresh += (thresh * wb_min_ratio) / 100; - if (wb_thresh > (thresh * wb_max_ratio) / 100) - wb_thresh = thresh * wb_max_ratio / 100; + wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE); + if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE)) + wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE); return wb_thresh; } From 00df7d51263b46ed93f7572e2d09579746f7b1eb Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:00 -0800 Subject: [PATCH 3093/4122] mm: add bdi_get_max_bytes() function This adds a function to return the specified value for max_bytes. It converts the stored max_ratio of the bdi to the corresponding bytes value. It introduces the bdi_get_bytes helper function to do the conversion. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The helper function will also be used by the min_bytes bdi knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-6-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1b50c028e5ad..473686c32775 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -105,6 +105,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dd98b2654302..719404e0d03d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -650,6 +650,18 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static u64 bdi_get_bytes(unsigned int ratio) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + u64 bytes; + + global_dirty_limits(&background_thresh, &dirty_thresh); + bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100; + + return bytes; +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; @@ -701,6 +713,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) From efc3e6ad53ea14225b434fddca261c9a1c56c707 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:01 -0800 Subject: [PATCH 3094/4122] mm: split off __bdi_set_max_ratio() function This splits off __bdi_set_max_ratio() from bdi_set_max_ratio(). __bdi_set_max_ratio() will also be called from bdi_set_max_bytes(), which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-7-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/page-writeback.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 719404e0d03d..e74ef596dc27 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -692,14 +692,10 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) return ret; } -int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { int ret = 0; - if (max_ratio > 100) - return -EINVAL; - max_ratio *= BDI_RATIO_SCALE; - spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -711,6 +707,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) return ret; } + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + if (max_ratio > 100) + return -EINVAL; + + return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); +} EXPORT_SYMBOL(bdi_set_max_ratio); u64 bdi_get_max_bytes(struct backing_dev_info *bdi) From 1bf27e98d26d1e62166a456ef17460be085cbe0b Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:02 -0800 Subject: [PATCH 3095/4122] mm: add bdi_set_max_bytes() function This introduces the bdi_set_max_bytes() function. The max_bytes function does not store the max_bytes value. Instead it converts the max_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-8-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 473686c32775..ea6c993433d5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -108,6 +108,7 @@ static inline unsigned long wb_stat_error(void) u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e74ef596dc27..20ae9adeb22f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -650,6 +651,28 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static int bdi_check_pages_limit(unsigned long pages) +{ + unsigned long max_dirty_pages = global_dirtyable_memory(); + + if (pages > max_dirty_pages) + return -EINVAL; + + return 0; +} + +static unsigned long bdi_ratio_from_pages(unsigned long pages) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long ratio; + + global_dirty_limits(&background_thresh, &dirty_thresh); + ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); + + return ratio; +} + static u64 bdi_get_bytes(unsigned int ratio) { unsigned long background_thresh; @@ -722,6 +745,20 @@ u64 bdi_get_max_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->max_ratio); } +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) +{ + int ret; + unsigned long pages = max_bytes >> PAGE_SHIFT; + unsigned long max_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + max_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) From c56e049a5e401a177c7c9b39a3bcc973ff5cec0b Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:03 -0800 Subject: [PATCH 3096/4122] mm: add knob /sys/class/bdi//max_bytes This adds the new knob max_bytes to specify a dirty memory limit for the corresponding bdi. The specified bytes value is converted to a ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-9-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 90fa517123dc..95d3229fc81f 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -199,6 +199,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi)); +} + +static ssize_t max_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_max_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(max_bytes); + static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -241,6 +269,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, NULL, From c354d9268d7825eb8643f658c5091079d4f11a4a Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:04 -0800 Subject: [PATCH 3097/4122] mm: document /sys/class/bdi//max_bytes knob This documents the new /sys/class/bdi//max_bytes knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-10-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-class-bdi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 68b5d4018c2f..580f723de049 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -56,6 +56,20 @@ Description: be trusted to play fair. (read-write) + +What: /sys/class/bdi//max_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given 'max_bytes' of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, a FUSE mount which cannot be + trusted to play fair, or a nbd device. + + (read-write) + What: /sys/class/bdi//strict_limit Date: October 2022 Contact: Stefan Roesch From 712c00d66a342a3ed375df41c3df7d3d2abad2c0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:05 -0800 Subject: [PATCH 3098/4122] mm: add bdi_get_min_bytes() function This adds a function to return the specified value for min_bytes. It converts the stored min_ratio of the bdi to the corresponding bytes value. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The returned value can be different than the value when the min_bytes value was set. Link: https://lkml.kernel.org/r/20221119005215.3052436-11-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ea6c993433d5..8e04567727e6 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -105,6 +105,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 20ae9adeb22f..c47824464f4c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -740,6 +740,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_min_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); From 8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:06 -0800 Subject: [PATCH 3099/4122] mm: split off __bdi_set_min_ratio() function This splits off the __bdi_set_min_ratio() function from the bdi_set_min_ratio() function. The __bdi_set_min_ratio() function will also be called from the bdi_set_min_bytes() function, which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-12-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/page-writeback.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c47824464f4c..cefee7210d83 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -685,7 +685,7 @@ static u64 bdi_get_bytes(unsigned int ratio) return bytes; } -int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; int ret = 0; @@ -731,6 +731,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); +} + int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { if (max_ratio > 100) From 803c98050569850be5fd51a2025c67622de887d9 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:07 -0800 Subject: [PATCH 3100/4122] mm: add bdi_set_min_bytes() function This introduces the bdi_set_min_bytes() function. The min_bytes function does not store the min_bytes value. Instead it converts the min_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-13-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e04567727e6..572669758c7f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cefee7210d83..3d151e7a9b6c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -750,6 +750,20 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->min_ratio); } +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) +{ + int ret; + unsigned long pages = min_bytes >> PAGE_SHIFT; + unsigned long min_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + min_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_min_ratio(bdi, min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); From 9c84819bd64ec15cb15d041c45ebe4725e9d4f3b Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:08 -0800 Subject: [PATCH 3101/4122] mm: add /sys/class/bdi//min_bytes knob bdi has two existing knobs to limit the amount of dirty memory: min_ratio and max_ratio. However the granularity of the knobs is limited and often it is more convenient to specify limits in terms of bytes. This change adds the min_bytes knob. It does not store the min_bytes value, instead it converts the max_bytes value to a ratio. The value is therefore more an approximation than an absolute value. It also maintains the sum over all the bdi min_ratio values stored in the variable bdi_min_ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-14-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 95d3229fc81f..3fab79061ade 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -199,6 +199,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t min_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi)); +} + +static ssize_t min_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_min_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(min_bytes); + static ssize_t max_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -269,6 +297,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, From 9c832a8d571784c998d0f9f5df480c62f7f3064c Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:09 -0800 Subject: [PATCH 3102/4122] mm: document /sys/class/bdi//min_bytes knob This documents the new /sys/class/bdi//min_bytes knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-15-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 580f723de049..a9a2f588a2b7 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -57,6 +57,21 @@ Description: (read-write) +What: /sys/class/bdi//min_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_bytes' parameter allows assigning a minimum + percentage of the write-back cache to a particular device + expressed in bytes. + For example, this is useful for providing a minimum QoS. + + (read-write) + What: /sys/class/bdi//max_bytes Date: October 2022 Contact: Stefan Roesch From 4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:10 -0800 Subject: [PATCH 3103/4122] mm: add bdi_set_max_ratio_no_scale() function This introduces bdi_set_max_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob max_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-16-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 572669758c7f..d9acbb22ff25 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3d151e7a9b6c..f44ade72966c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -719,6 +719,9 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra { int ret = 0; + if (max_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -731,6 +734,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); @@ -738,9 +746,6 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { - if (max_ratio > 100) - return -EINVAL; - return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); } EXPORT_SYMBOL(bdi_set_max_ratio); From bca52dcbadc583f4db6435599c44a79f97293f06 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:11 -0800 Subject: [PATCH 3104/4122] mm: add /sys/class/bdi//max_ratio_fine knob This adds the max_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-17-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3fab79061ade..94c2382367cf 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -199,6 +199,25 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_max_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(max_ratio_fine, bdi->max_ratio) + static ssize_t min_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -297,6 +316,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, From 54790f30fea74247e2f38b4a632ee3dc2fe42d86 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:12 -0800 Subject: [PATCH 3105/4122] mm: document /sys/class/bdi//max_ratio_fine knob This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-18-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-class-bdi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index a9a2f588a2b7..e9c584db316f 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -57,6 +57,19 @@ Description: (read-write) +What: /sys/class/bdi//max_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given value of the write-back cache. The value is given as part + of 1 million. This is useful in situations where we want to avoid + one device taking all or most of the write-back cache. For example + in case of an NFS mount that is prone to get stuck, or a FUSE mount + which cannot be trusted to play fair. + + (read-write) + What: /sys/class/bdi//min_bytes Date: October 2022 Contact: Stefan Roesch From 2c44af4f2aaa260199f218f11920c406e688693c Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:13 -0800 Subject: [PATCH 3106/4122] mm: add bdi_set_min_ratio_no_scale() function This introduces bdi_set_min_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob min_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-19-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d9acbb22ff25..fbad4fcd408e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f44ade72966c..ad608ef2a243 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -690,6 +690,8 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra unsigned int delta; int ret = 0; + if (min_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); @@ -734,6 +736,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio); +} + int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) { return __bdi_set_max_ratio(bdi, max_ratio); From ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:14 -0800 Subject: [PATCH 3107/4122] mm: add /sys/class/bdi//min_ratio_fine knob This adds the min_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-20-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 94c2382367cf..a53b9360b72e 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -180,6 +180,25 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) +static ssize_t min_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_min_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(min_ratio_fine, bdi->min_ratio) + static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -315,6 +334,7 @@ static DEVICE_ATTR_RW(strict_limit); static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, + &dev_attr_min_ratio_fine.attr, &dev_attr_max_ratio.attr, &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, From eba39236f18da7a50b6c51df5d902ee72c43e760 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:15 -0800 Subject: [PATCH 3108/4122] mm: document /sys/class/bdi//min_ratio_fine knob This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-21-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index e9c584db316f..0d2abd88a18c 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -44,6 +44,21 @@ Description: (read-write) +What: /sys/class/bdi//min_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio_fine' parameter allows assigning a minimum reserve + of the write-back cache to a particular device. The value is + expressed as part of 1 million. For example, this is useful for + providing a minimum QoS. + + (read-write) + What: /sys/class/bdi//max_ratio Date: January 2008 Contact: Peter Zijlstra From be21b32afe470c5ae98e27e49201158a47032942 Mon Sep 17 00:00:00 2001 From: NARIBAYASHI Akira Date: Wed, 26 Oct 2022 20:24:38 +0900 Subject: [PATCH 3109/4122] mm, compaction: fix fast_isolate_around() to stay within boundaries Depending on the memory configuration, isolate_freepages_block() may scan pages out of the target range and causes panic. Panic can occur on systems with multiple zones in a single pageblock. The reason it is rare is that it only happens in special configurations. Depending on how many similar systems there are, it may be a good idea to fix this problem for older kernels as well. The problem is that pfn as argument of fast_isolate_around() could be out of the target range. Therefore we should consider the case where pfn < start_pfn, and also the case where end_pfn < pfn. This problem should have been addressd by the commit 6e2b7044c199 ("mm, compaction: make fast_isolate_freepages() stay within zone") but there was an oversight. Case1: pfn < start_pfn | node X's zone | node Y's zone +-----------------+------------------------------... pageblock ^ ^ ^ +-----------+-----------+-----------+-----------+... ^ ^ ^ ^ ^ end_pfn ^ start_pfn = cc->zone->zone_start_pfn pfn <---------> scanned range by "Scan After" Case2: end_pfn < pfn | node X's zone | node Y's zone +-----------------+------------------------------... pageblock ^ ^ ^ +-----------+-----------+-----------+-----------+... ^ ^ ^ ^ ^ pfn ^ end_pfn start_pfn <---------> scanned range by "Scan Before" It seems that there is no good reason to skip nr_isolated pages just after given pfn. So let perform simple scan from start to end instead of dividing the scan into "Before" and "After". Link: https://lkml.kernel.org/r/20221026112438.236336-1-a.naribayashi@fujitsu.com Fixes: 6e2b7044c199 ("mm, compaction: make fast_isolate_freepages() stay within zone"). Signed-off-by: NARIBAYASHI Akira Cc: David Rientjes Cc: Mel Gorman Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/compaction.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 1f6da31dd9a5..ca1603524bbe 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1344,7 +1344,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage) } static void -fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated) +fast_isolate_around(struct compact_control *cc, unsigned long pfn) { unsigned long start_pfn, end_pfn; struct page *page; @@ -1365,21 +1365,13 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long if (!page) return; - /* Scan before */ - if (start_pfn != pfn) { - isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false); - if (cc->nr_freepages >= cc->nr_migratepages) - return; - } - - /* Scan after */ - start_pfn = pfn + nr_isolated; - if (start_pfn < end_pfn) - isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); + isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); /* Skip this pageblock in the future as it's full or nearly full */ if (cc->nr_freepages < cc->nr_migratepages) set_pageblock_skip(page); + + return; } /* Search orders in round-robin fashion */ @@ -1556,7 +1548,7 @@ fast_isolate_freepages(struct compact_control *cc) return cc->free_pfn; low_pfn = page_to_pfn(page); - fast_isolate_around(cc, low_pfn, nr_isolated); + fast_isolate_around(cc, low_pfn); return low_pfn; } From 7ce5f7e16afa82d33dc47d633404b8b1142a5e44 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 23 Nov 2022 10:43:19 +0530 Subject: [PATCH 3110/4122] documentation/mm: update pmd_present() in arch_pgtable_helpers.rst Although pmd_present() might seem to indicate a valid and mapped pmd entry, in reality it returns true when pmd_page() points to a valid page in memory , regardless whether the pmd entry is mapped or not. Andrea Arcangeli had earlier explained [1] the required semantics for pmd_present(). This just updates the documentation for pmd_present() as required. [1] https://lore.kernel.org/lkml/20181017020930.GN30832@redhat.com/ Link: https://lkml.kernel.org/r/20221123051319.1312582-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Cc: Jonathan Corbet Cc: Andrea Arcangeli Cc: Mike Rapoport Signed-off-by: Andrew Morton --- Documentation/mm/arch_pgtable_helpers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/mm/arch_pgtable_helpers.rst b/Documentation/mm/arch_pgtable_helpers.rst index cbaee9e59241..fd2a19df884e 100644 --- a/Documentation/mm/arch_pgtable_helpers.rst +++ b/Documentation/mm/arch_pgtable_helpers.rst @@ -94,7 +94,7 @@ PMD Page Table Helpers +---------------------------+--------------------------------------------------+ | pmd_trans_huge | Tests a Transparent Huge Page (THP) at PMD | +---------------------------+--------------------------------------------------+ -| pmd_present | Tests a valid mapped PMD | +| pmd_present | Tests whether pmd_page() points to valid memory | +---------------------------+--------------------------------------------------+ | pmd_young | Tests a young PMD | +---------------------------+--------------------------------------------------+ From 8d9b63708ddd1ac51e0260c7b8f641daf01f4caf Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 22 Nov 2022 12:30:22 +0900 Subject: [PATCH 3111/4122] zswap: do not allocate from atomic pool zswap_frontswap_load() should be called from preemptible context (we even call mutex_lock() there) and it does not look like we need to do GFP_ATOMIC allocaion for temp buffer. The same applies to zswap_writeback_entry(). Use GFP_KERNEL for temporary buffer allocation in both cases. Link: https://lkml.kernel.org/r/Y3xCTr6ikbtcUr/y@google.com Signed-off-by: Johannes Weiner Signed-off-by: Nhat Pham Signed-off-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zpool.c | 7 +++++++ mm/zswap.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/zpool.c b/mm/zpool.c index 68facc193496..f46c0d5e766c 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -387,6 +387,13 @@ bool zpool_evictable(struct zpool *zpool) * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped. * @zpool: The zpool to test * + * Some allocators enter non-preemptible context in ->map() callback (e.g. + * disable pagefaults) and exit that context in ->unmap(), which limits what + * we can do with the mapped object. For instance, we cannot wait for + * asynchronous crypto API to decompress such an object or take mutexes + * since those will call into the scheduler. This function tells us whether + * we use such an allocator. + * * Returns: true if zpool can sleep; false otherwise. */ bool zpool_can_sleep_mapped(struct zpool *zpool) diff --git a/mm/zswap.c b/mm/zswap.c index 2d48fd59cc7a..3019f0bde194 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -958,7 +958,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) }; if (!zpool_can_sleep_mapped(pool)) { - tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC); + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!tmp) return -ENOMEM; } @@ -1311,7 +1311,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, } if (!zpool_can_sleep_mapped(entry->pool->zpool)) { - tmp = kmalloc(entry->length, GFP_ATOMIC); + tmp = kmalloc(entry->length, GFP_KERNEL); if (!tmp) { ret = -ENOMEM; goto freeentry; From 373dfda2bac1173971099dc76254a016646f62ab Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Nov 2022 18:46:41 +0530 Subject: [PATCH 3112/4122] mm/thp: rename pmd_to_page() as pmd_pgtable_page() Current pmd_to_page(), which derives the page table page containing the pmd address has a very misleading name. The problem being, it sounds similar to pmd_page() which derives page embedded in a given pmd entry either for next level page or a mapped huge page. Rename it as pmd_pgtable_page() instead. Link: https://lkml.kernel.org/r/20221124131641.1523772-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d8363ac34a7c..2c73dc112ffc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2510,7 +2510,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) #if USE_SPLIT_PMD_PTLOCKS -static struct page *pmd_to_page(pmd_t *pmd) +static struct page *pmd_pgtable_page(pmd_t *pmd) { unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); return virt_to_page((void *)((unsigned long) pmd & mask)); @@ -2518,7 +2518,7 @@ static struct page *pmd_to_page(pmd_t *pmd) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_to_page(pmd)); + return ptlock_ptr(pmd_pgtable_page(pmd)); } static inline bool pmd_ptlock_init(struct page *page) @@ -2537,7 +2537,7 @@ static inline void pmd_ptlock_free(struct page *page) ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) #else From 7e25de77bc5ea56cc3ff618fc8f4ea1896a4dbb3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 25 Nov 2022 09:15:02 +0530 Subject: [PATCH 3113/4122] s390/mm: use pmd_pgtable_page() helper in __gmap_segment_gaddr() In __gmap_segment_gaddr() pmd level page table page is being extracted from the pmd pointer, similar to pmd_pgtable_page() implementation. This reduces some redundancy by directly using pmd_pgtable_page() instead, though first making it available. Link: https://lkml.kernel.org/r/20221125034502.1559986-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Alexander Gordeev Cc: Christian Borntraeger Cc: David Hildenbrand Cc: Heiko Carstens Signed-off-by: Andrew Morton --- arch/s390/mm/gmap.c | 5 ++--- include/linux/mm.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 02d15c8dc92e..8947451ae021 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -336,12 +336,11 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, static unsigned long __gmap_segment_gaddr(unsigned long *entry) { struct page *page; - unsigned long offset, mask; + unsigned long offset; offset = (unsigned long) entry / sizeof(unsigned long); offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); + page = pmd_pgtable_page((pmd_t *) entry); return page->index + offset; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 2c73dc112ffc..8df5cae69c80 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2510,7 +2510,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) #if USE_SPLIT_PMD_PTLOCKS -static struct page *pmd_pgtable_page(pmd_t *pmd) +static inline struct page *pmd_pgtable_page(pmd_t *pmd) { unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); return virt_to_page((void *)((unsigned long) pmd & mask)); From 8ef9c32a12a8a0012a4988050947c45521260c5d Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Thu, 24 Nov 2022 19:29:01 +0800 Subject: [PATCH 3114/4122] mm: vmscan: use sysfs_emit() to instead of scnprintf() Replace open-coded snprintf() with sysfs_emit() to simplify the code. Link: https://lkml.kernel.org/r/202211241929015476424@zte.com.cn Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Andrew Morton --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 805fa51b175c..9356a3ee639c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5407,7 +5407,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); - return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); + return sysfs_emit(buf, "0x%04x\n", caps); } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ From e833bc50340502a2a75b41bbd1a179aa769e2014 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Nov 2022 13:58:57 -0500 Subject: [PATCH 3115/4122] mm/thp: re-apply mkdirty for small pages after split We used to have 624a2c94f5b7 (Partly revert "mm/thp: carry over dirty bit when thp splits on pmd") fixing the regression reported here by Anatoly Pugachev on sparc64: https://lore.kernel.org/r/20221021160603.GA23307@u164.east.ru Where we temporarily ignored the dirty bit for small pages. Then, Hev also reported similar issue on loongarch: (the original mail was private, but Anatoly copied the list here) https://lore.kernel.org/r/CADxRZqxqb7f_WhMh=jweZP+ynf_JwGd-0VwbYgp4P+T0-AXosw@mail.gmail.com Hev pointed out that the issue is having HW write bit set within the pte_mkdirty() so the split pte can be written after split even if e.g. they were shared by more than one processes, causing data corrupt. Hev also tried to explain why loongarch set HW write bit in mkdirty: https://lore.kernel.org/r/CAHirt9itKO_K_HPboXh5AyJtt16Zf0cD73PtHvM=na39u_ztxA@mail.gmail.com One way to fix it is as what Huacai proposed here for loongarch (then we can re-apply the dirty bit in thp split): https://lore.kernel.org/r/20221117042532.4064448-1-chenhuacai@loongson.cnn We may need similar thing for sparc64, though. For now since we've found the root cause of the dirty bit issue the simpler solution (which won't lose the dirty bit for small) that will work for both is we wr-protect after pte_mkdirty(), so the HW write bit can be persistent after thp split. Add a comment for wrprotect, so we will not mess up the ordering later. With 624a2c94f5b7 (Partly revert "mm/thp: carry over dirty bit when thp splits on pmd") this is not a fix anymore, but just brings back the dirty bit for thp split safely, so we re-apply the optimization but in safe way. Provide a Tested-by credit to Hev too (not the exact same patch but the same outcome) for loongarch. Link: https://lkml.kernel.org/r/20221125185857.3110155-1-peterx@redhat.com Signed-off-by: Peter Xu Tested-by: Hev # loongarch Cc: Anatoly Pugachev Cc: Raghavendra K T Cc: Thorsten Leemhuis Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton --- mm/huge_memory.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86a30041a2e1..8f10afba17a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2238,16 +2238,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = maybe_mkwrite(entry, vma); if (anon_exclusive) SetPageAnonExclusive(page + i); - if (!write) - entry = pte_wrprotect(entry); if (!young) entry = pte_mkold(entry); + /* NOTE: this may set soft-dirty too on some archs */ + if (dirty) + entry = pte_mkdirty(entry); /* - * NOTE: we don't do pte_mkdirty when dirty==true - * because it breaks sparc64 which can sigsegv - * random process. Need to revisit when we figure - * out what is special with sparc64. + * NOTE: this needs to happen after pte_mkdirty, + * because some archs (sparc64, loongarch) could + * set hw write bit when mkdirty. */ + if (!write) + entry = pte_wrprotect(entry); if (soft_dirty) entry = pte_mksoft_dirty(entry); if (uffd_wp) From e0ff428042335c7b62785b3cf911c427a618bc86 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Fri, 25 Nov 2022 14:54:44 +0800 Subject: [PATCH 3116/4122] mm/memory-failure.c: cleanup in unpoison_memory If freeit is true, the value of ret must be zero, there is no need to check the value of freeit after label unlock_mutex. We can drop variable freeit to do this cleanup. Link: https://lkml.kernel.org/r/20221125065444.3462681-1-mawupeng1@huawei.com Signed-off-by: Ma Wupeng Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Cc: zhenwei pi Signed-off-by: Andrew Morton --- mm/memory-failure.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2e62940c7bae..c77a9e37e27e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2338,7 +2338,6 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int ret = -EBUSY; - int freeit = 0; unsigned long count = 1; bool huge = false; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, @@ -2413,10 +2412,9 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } } - freeit = !!TestClearPageHWPoison(p); put_page(page); - if (freeit) { + if (TestClearPageHWPoison(p)) { put_page(page); ret = 0; } @@ -2424,7 +2422,7 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); - if (!ret || freeit) { + if (!ret) { if (!huge) num_poisoned_pages_sub(pfn, 1); unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", From 4265df667bbdc71c640e43c905bd9aeeead92365 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Tue, 8 Nov 2022 11:50:54 +0800 Subject: [PATCH 3117/4122] KVM: x86: Keep the lock order consistent between SRCU and gpc spinlock Acquire SRCU before taking the gpc spinlock in wait_pending_event() so as to be consistent with all other functions that acquire both locks. It's not illegal to acquire SRCU inside a spinlock, nor is there deadlock potential, but in general it's preferable to order locks from least restrictive to most restrictive, e.g. if wait_pending_event() needed to sleep for whatever reason, it could do so while holding SRCU, but would need to drop the spinlock. Signed-off-by: Peng Hao Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/CAPm50a++Cb=QfnjMZ2EnCj-Sb9Y4UM-=uOEtHAcjnNLCAAf-dQ@mail.gmail.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9187d024d006..2f21fa5ee7de 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1165,8 +1165,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, bool ret = true; int idx, i; - read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); + read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) goto out_rcu; @@ -1187,8 +1187,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, } out_rcu: - srcu_read_unlock(&kvm->srcu, idx); read_unlock_irqrestore(&gpc->lock, flags); + srcu_read_unlock(&kvm->srcu, idx); return ret; } From 17122c06b86c9f77f45b86b8e62c3ed440847a59 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Sep 2022 23:36:32 +0000 Subject: [PATCH 3118/4122] KVM: x86: Fail emulation during EMULTYPE_SKIP on any exception Treat any exception during instruction decode for EMULTYPE_SKIP as a "full" emulation failure, i.e. signal failure instead of queuing the exception. When decoding purely to skip an instruction, KVM and/or the CPU has already done some amount of emulation that cannot be unwound, e.g. on an EPT misconfig VM-Exit KVM has already processeed the emulated MMIO. KVM already does this if a #UD is encountered, but not for other exceptions, e.g. if a #PF is encountered during fetch. In SVM's soft-injection use case, queueing the exception is particularly problematic as queueing exceptions while injecting events can put KVM into an infinite loop due to bailing from VM-Enter to service the newly pending exception. E.g. multiple warnings to detect such behavior fire: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1017 at arch/x86/kvm/x86.c:9873 kvm_arch_vcpu_ioctl_run+0x1de5/0x20a0 [kvm] Modules linked in: kvm_amd ccp kvm irqbypass CPU: 3 PID: 1017 Comm: svm_nested_soft Not tainted 6.0.0-rc1+ #220 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x1de5/0x20a0 [kvm] Call Trace: kvm_vcpu_ioctl+0x223/0x6d0 [kvm] __x64_sys_ioctl+0x85/0xc0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1017 at arch/x86/kvm/x86.c:9987 kvm_arch_vcpu_ioctl_run+0x12a3/0x20a0 [kvm] Modules linked in: kvm_amd ccp kvm irqbypass CPU: 3 PID: 1017 Comm: svm_nested_soft Tainted: G W 6.0.0-rc1+ #220 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x12a3/0x20a0 [kvm] Call Trace: kvm_vcpu_ioctl+0x223/0x6d0 [kvm] __x64_sys_ioctl+0x85/0xc0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ---[ end trace 0000000000000000 ]--- Fixes: 6ea6e84309ca ("KVM: x86: inject exceptions produced by x86_decode_insn") Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220930233632.1725475-1-seanjc@google.com --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7f850dfb4086..ef12747ecb63 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8772,7 +8772,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; - if (ctxt->have_exception) { + + if (ctxt->have_exception && + !(emulation_type & EMULTYPE_SKIP)) { /* * #UD should result in just EMULATION_FAILED, and trap-like * exception should not be encountered during decode. From 5c30e8101e8d5d020b1d7119117889756a6ed713 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Sep 2022 23:40:31 +0000 Subject: [PATCH 3119/4122] KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid Skip the WRMSR fastpath in SVM's VM-Exit handler if the next RIP isn't valid, e.g. because KVM is running with nrips=false. SVM must decode and emulate to skip the WRMSR if the CPU doesn't provide the next RIP. Getting the instruction bytes to decode the WRMSR requires reading guest memory, which in turn means dereferencing memslots, and that isn't safe because KVM doesn't hold SRCU when the fastpath runs. Don't bother trying to enable the fastpath for this case, e.g. by doing only the WRMSR and leaving the "skip" until later. NRIPS is supported on all modern CPUs (KVM has considered making it mandatory), and the next RIP will be valid the vast, vast majority of the time. ============================= WARNING: suspicious RCU usage 6.0.0-smp--4e557fcd3d80-skip #13 Tainted: G O ----------------------------- include/linux/kvm_host.h:954 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by stable/206475: #0: ffff9d9dfebcc0f0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x8b/0x620 [kvm] stack backtrace: CPU: 152 PID: 206475 Comm: stable Tainted: G O 6.0.0-smp--4e557fcd3d80-skip #13 Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022 Call Trace: dump_stack_lvl+0x69/0xaa dump_stack+0x10/0x12 lockdep_rcu_suspicious+0x11e/0x130 kvm_vcpu_gfn_to_memslot+0x155/0x190 [kvm] kvm_vcpu_gfn_to_hva_prot+0x18/0x80 [kvm] paging64_walk_addr_generic+0x183/0x450 [kvm] paging64_gva_to_gpa+0x63/0xd0 [kvm] kvm_fetch_guest_virt+0x53/0xc0 [kvm] __do_insn_fetch_bytes+0x18b/0x1c0 [kvm] x86_decode_insn+0xf0/0xef0 [kvm] x86_emulate_instruction+0xba/0x790 [kvm] kvm_emulate_instruction+0x17/0x20 [kvm] __svm_skip_emulated_instruction+0x85/0x100 [kvm_amd] svm_skip_emulated_instruction+0x13/0x20 [kvm_amd] handle_fastpath_set_msr_irqoff+0xae/0x180 [kvm] svm_vcpu_run+0x4b8/0x5a0 [kvm_amd] vcpu_enter_guest+0x16ca/0x22f0 [kvm] kvm_arch_vcpu_ioctl_run+0x39d/0x900 [kvm] kvm_vcpu_ioctl+0x538/0x620 [kvm] __se_sys_ioctl+0x77/0xc0 __x64_sys_ioctl+0x1d/0x20 do_syscall_64+0x3d/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 404d5d7bff0d ("KVM: X86: Introduce more exit_fastpath_completion enum values") Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220930234031.1732249-1-seanjc@google.com --- arch/x86/kvm/svm/svm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 91352d692845..6ffadbd57744 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3895,8 +3895,14 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { - if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + /* + * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM + * can't read guest memory (dereference memslots) to decode the WRMSR. + */ + if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 && + nrips && control->next_rip) return handle_fastpath_set_msr_irqoff(vcpu); return EXIT_FASTPATH_NONE; From a8a12c0069b9e3c909b3c22bf8711d2f18a0af97 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 28 Sep 2022 17:27:48 +0800 Subject: [PATCH 3120/4122] KVM: SVM: Replace kmap_atomic() with kmap_local_page() The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1]. The main difference between atomic and local mappings is that local mappings don't disable page faults or preemption. There're 2 reasons we can use kmap_local_page() here: 1. SEV is 64-bit only and kmap_local_page() doesn't disable migration in this case, but here the function clflush_cache_range() uses CLFLUSHOPT instruction to flush, and on x86 CLFLUSHOPT is not CPU-local and flushes the page out of the entire cache hierarchy on all CPUs (APM volume 3, chapter 3, CLFLUSHOPT). So there's no need to disable preemption to ensure CPU-local. 2. clflush_cache_range() doesn't need to disable pagefault and the mapping is still valid even if sleeps. This is also true for sched out/in when preempted. In addition, though kmap_local_page() is a thin wrapper around page_address() on 64-bit, kmap_local_page() should still be used here in preference to page_address() since page_address() isn't suitable to be used in a generic function (like sev_clflush_pages()) where the page passed in is not easy to determine the source of allocation. Keeping the kmap* API in place means it can be used for things other than highmem mappings[2]. Therefore, sev_clflush_pages() is a function that should use kmap_local_page() in place of kmap_atomic(). Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com [2]: https://lore.kernel.org/lkml/5d667258-b58b-3d28-3609-e7914c99b31b@intel.com/ Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20220928092748.463631-1-zhao1.liu@linux.intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 69dbf17f0d6a..86d6897f4806 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -465,9 +465,9 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) return; for (i = 0; i < npages; i++) { - page_virtual = kmap_atomic(pages[i]); + page_virtual = kmap_local_page(pages[i]); clflush_cache_range(page_virtual, PAGE_SIZE); - kunmap_atomic(page_virtual); + kunmap_local(page_virtual); cond_resched(); } } From 9cc409325ddd776f6fd6293d5ce93ce1248af6e4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:19:56 +0000 Subject: [PATCH 3121/4122] KVM: nVMX: Inject #GP, not #UD, if "generic" VMXON CR0/CR4 check fails Inject #GP for if VMXON is attempting with a CR0/CR4 that fails the generic "is CRx valid" check, but passes the CR4.VMXE check, and do the generic checks _after_ handling the post-VMXON VM-Fail. The CR4.VMXE check, and all other #UD cases, are special pre-conditions that are enforced prior to pivoting on the current VMX mode, i.e. occur before interception if VMXON is attempted in VMX non-root mode. All other CR0/CR4 checks generate #GP and effectively have lower priority than the post-VMXON check. Per the SDM: IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ... THEN #UD; ELSIF not in VMX operation THEN IF (CPL > 0) or (in A20M mode) or (the values of CR0 and CR4 are not supported in VMX operation) THEN #GP(0); ELSIF in VMX non-root operation THEN VMexit; ELSIF CPL > 0 THEN #GP(0); ELSE VMfail("VMXON executed in VMX root operation"); FI; which, if re-written without ELSIF, yields: IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ... THEN #UD IF in VMX non-root operation THEN VMexit; IF CPL > 0 THEN #GP(0) IF in VMX operation THEN VMfail("VMXON executed in VMX root operation"); IF (in A20M mode) or (the values of CR0 and CR4 are not supported in VMX operation) THEN #GP(0); Note, KVM unconditionally forwards VMXON VM-Exits that occur in L2 to L1, i.e. there is no need to check the vCPU is not in VMX non-root mode. Add a comment to explain why unconditionally forwarding such exits is functionally correct. Reported-by: Eric Li Fixes: c7d855c2aff2 ("KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006001956.329314-1-seanjc@google.com --- arch/x86/kvm/vmx/nested.c | 44 +++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b28be793de29..892791019968 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5131,24 +5131,35 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* - * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks - * that have higher priority than VM-Exit (see Intel SDM's pseudocode - * for VMXON), as KVM must load valid CR0/CR4 values into hardware while - * running the guest, i.e. KVM needs to check the _guest_ values. + * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter + * the guest and so cannot rely on hardware to perform the check, + * which has higher priority than VM-Exit (see Intel SDM's pseudocode + * for VMXON). * - * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and - * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real - * Mode, but KVM will never take the guest out of those modes. + * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 + * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't + * force any of the relevant guest state. For a restricted guest, KVM + * does force CR0.PE=1, but only to also force VM86 in order to emulate + * Real Mode, and so there's no need to check CR0.PE manually. */ - if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || - !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } /* - * CPL=0 and all other checks that are lower priority than VM-Exit must - * be checked manually. + * The CPL is checked for "not in VMX operation" and for "in VMX root", + * and has higher priority than the VM-Fail due to being post-VMXON, + * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, + * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits + * from L2 to L1, i.e. there's no need to check for the vCPU being in + * VMX non-root. + * + * Forwarding the VM-Exit unconditionally, i.e. without performing the + * #UD checks (see above), is functionally ok because KVM doesn't allow + * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's + * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are + * missed by hardware due to shadowing CR0 and/or CR4. */ if (vmx_get_cpl(vcpu)) { kvm_inject_gp(vcpu, 0); @@ -5158,6 +5169,17 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) if (vmx->nested.vmxon) return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + /* + * Invalid CR0/CR4 generates #GP. These checks are performed if and + * only if the vCPU isn't already in VMX operation, i.e. effectively + * have lower priority than the VM-Fail above. + */ + if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || + !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) != VMXON_NEEDED_FEATURES) { kvm_inject_gp(vcpu, 0); From 4f209989586c79e9bf59ba9381101f5fb449dfbb Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 19 Oct 2022 14:36:19 -0700 Subject: [PATCH 3122/4122] KVM: VMX: Guest usage of IA32_SPEC_CTRL is likely At this point in time, most guests (in the default, out-of-the-box configuration) are likely to use IA32_SPEC_CTRL. Therefore, drop the compiler hint that it is unlikely for KVM to be intercepting WRMSR of IA32_SPEC_CTRL. Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221019213620.1953281-2-jmattson@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cea8c07f5229..cb40f724d8cc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -858,7 +858,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) * to change it directly without causing a vmexit. In that case read * it after vmexit and store it in vmx->spec_ctrl. */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) flags |= VMX_RUN_SAVE_SPEC_CTRL; return flags; From 2e7eab81425ad6c875f2ed47c0ce01e78afc38a5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 19 Oct 2022 14:36:20 -0700 Subject: [PATCH 3123/4122] KVM: VMX: Execute IBPB on emulated VM-exit when guest has IBRS According to Intel's document on Indirect Branch Restricted Speculation, "Enabling IBRS does not prevent software from controlling the predicted targets of indirect branches of unrelated software executed later at the same predictor mode (for example, between two different user applications, or two different virtual machines). Such isolation can be ensured through use of the Indirect Branch Predictor Barrier (IBPB) command." This applies to both basic and enhanced IBRS. Since L1 and L2 VMs share hardware predictor modes (guest-user and guest-kernel), hardware IBRS is not sufficient to virtualize IBRS. (The way that basic IBRS is implemented on pre-eIBRS parts, hardware IBRS is actually sufficient in practice, even though it isn't sufficient architecturally.) For virtual CPUs that support IBRS, add an indirect branch prediction barrier on emulated VM-exit, to ensure that the predicted targets of indirect branches executed in L1 cannot be controlled by software that was executed in L2. Since we typically don't intercept guest writes to IA32_SPEC_CTRL, perform the IBPB at emulated VM-exit regardless of the current IA32_SPEC_CTRL.IBRS value, even though the IBPB could technically be deferred until L1 sets IA32_SPEC_CTRL.IBRS, if IA32_SPEC_CTRL.IBRS is clear at emulated VM-exit. This is CVE-2022-2196. Fixes: 5c911beff20a ("KVM: nVMX: Skip IBPB when switching between vmcs01 and vmcs02") Cc: Sean Christopherson Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221019213620.1953281-3-jmattson@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 11 +++++++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 892791019968..61c83424285c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4798,6 +4798,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); + /* + * If IBRS is advertised to the vCPU, KVM must flush the indirect + * branch predictors when transitioning from L2 to L1, as L1 expects + * hardware (KVM in this case) to provide separate predictor modes. + * Bare metal isolates VMX root (host) from VMX non-root (guest), but + * doesn't isolate different VMCSs, i.e. in this case, doesn't provide + * separate modes for L2 vs L1. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + indirect_branch_prediction_barrier(); + /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cb40f724d8cc..3f31c46c306e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1348,8 +1348,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, /* * No indirect branch prediction barrier needed when switching - * the active VMCS within a guest, e.g. on nested VM-Enter. - * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + * the active VMCS within a vCPU, unless IBRS is advertised to + * the vCPU. To minimize the number of IBPBs executed, KVM + * performs IBPB on nested VM-Exit (a single nested transition + * may switch the active VMCS multiple times). */ if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) indirect_branch_prediction_barrier(); From 658234de0d2ed3a1b86d793f4772e38a2e039b35 Mon Sep 17 00:00:00 2001 From: Kevin Tian Date: Tue, 29 Nov 2022 16:29:28 -0400 Subject: [PATCH 3124/4122] iommufd: Document overview of iommufd Add iommufd into the documentation tree, and supply initial documentation. Much of this is linked from code comments by kdoc. Link: https://lore.kernel.org/r/5-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Bagas Sanjaya Reviewed-by: Eric Auger Signed-off-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- Documentation/userspace-api/index.rst | 1 + Documentation/userspace-api/iommufd.rst | 223 ++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 Documentation/userspace-api/iommufd.rst diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index c78da9ce0ec4..f16337bdb852 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -25,6 +25,7 @@ place where this information is gathered. ebpf/index ioctl/index iommu + iommufd media/index netlink/index sysfs-platform_profile diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst new file mode 100644 index 000000000000..79dd9eb51587 --- /dev/null +++ b/Documentation/userspace-api/iommufd.rst @@ -0,0 +1,223 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +======= +IOMMUFD +======= + +:Author: Jason Gunthorpe +:Author: Kevin Tian + +Overview +======== + +IOMMUFD is the user API to control the IOMMU subsystem as it relates to managing +IO page tables from userspace using file descriptors. It intends to be general +and consumable by any driver that wants to expose DMA to userspace. These +drivers are eventually expected to deprecate any internal IOMMU logic +they may already/historically implement (e.g. vfio_iommu_type1.c). + +At minimum iommufd provides universal support of managing I/O address spaces and +I/O page tables for all IOMMUs, with room in the design to add non-generic +features to cater to specific hardware functionality. + +In this context the capital letter (IOMMUFD) refers to the subsystem while the +small letter (iommufd) refers to the file descriptors created via /dev/iommu for +use by userspace. + +Key Concepts +============ + +User Visible Objects +-------------------- + +Following IOMMUFD objects are exposed to userspace: + +- IOMMUFD_OBJ_IOAS, representing an I/O address space (IOAS), allowing map/unmap + of user space memory into ranges of I/O Virtual Address (IOVA). + + The IOAS is a functional replacement for the VFIO container, and like the VFIO + container it copies an IOVA map to a list of iommu_domains held within it. + +- IOMMUFD_OBJ_DEVICE, representing a device that is bound to iommufd by an + external driver. + +- IOMMUFD_OBJ_HW_PAGETABLE, representing an actual hardware I/O page table + (i.e. a single struct iommu_domain) managed by the iommu driver. + + The IOAS has a list of HW_PAGETABLES that share the same IOVA mapping and + it will synchronize its mapping with each member HW_PAGETABLE. + +All user-visible objects are destroyed via the IOMMU_DESTROY uAPI. + +The diagram below shows relationship between user-visible objects and kernel +datastructures (external to iommufd), with numbers referred to operations +creating the objects and links:: + + _________________________________________________________ + | iommufd | + | [1] | + | _________________ | + | | | | + | | | | + | | | | + | | | | + | | | | + | | | | + | | | [3] [2] | + | | | ____________ __________ | + | | IOAS |<--| |<------| | | + | | | |HW_PAGETABLE| | DEVICE | | + | | | |____________| |__________| | + | | | | | | + | | | | | | + | | | | | | + | | | | | | + | | | | | | + | |_________________| | | | + | | | | | + |_________|___________________|___________________|_______| + | | | + | _____v______ _______v_____ + | PFN storage | | | | + |------------>|iommu_domain| |struct device| + |____________| |_____________| + +1. IOMMUFD_OBJ_IOAS is created via the IOMMU_IOAS_ALLOC uAPI. An iommufd can + hold multiple IOAS objects. IOAS is the most generic object and does not + expose interfaces that are specific to single IOMMU drivers. All operations + on the IOAS must operate equally on each of the iommu_domains inside of it. + +2. IOMMUFD_OBJ_DEVICE is created when an external driver calls the IOMMUFD kAPI + to bind a device to an iommufd. The driver is expected to implement a set of + ioctls to allow userspace to initiate the binding operation. Successful + completion of this operation establishes the desired DMA ownership over the + device. The driver must also set the driver_managed_dma flag and must not + touch the device until this operation succeeds. + +3. IOMMUFD_OBJ_HW_PAGETABLE is created when an external driver calls the IOMMUFD + kAPI to attach a bound device to an IOAS. Similarly the external driver uAPI + allows userspace to initiate the attaching operation. If a compatible + pagetable already exists then it is reused for the attachment. Otherwise a + new pagetable object and iommu_domain is created. Successful completion of + this operation sets up the linkages among IOAS, device and iommu_domain. Once + this completes the device could do DMA. + + Every iommu_domain inside the IOAS is also represented to userspace as a + HW_PAGETABLE object. + + .. note:: + + Future IOMMUFD updates will provide an API to create and manipulate the + HW_PAGETABLE directly. + +A device can only bind to an iommufd due to DMA ownership claim and attach to at +most one IOAS object (no support of PASID yet). + +Kernel Datastructure +-------------------- + +User visible objects are backed by following datastructures: + +- iommufd_ioas for IOMMUFD_OBJ_IOAS. +- iommufd_device for IOMMUFD_OBJ_DEVICE. +- iommufd_hw_pagetable for IOMMUFD_OBJ_HW_PAGETABLE. + +Several terminologies when looking at these datastructures: + +- Automatic domain - refers to an iommu domain created automatically when + attaching a device to an IOAS object. This is compatible to the semantics of + VFIO type1. + +- Manual domain - refers to an iommu domain designated by the user as the + target pagetable to be attached to by a device. Though currently there are + no uAPIs to directly create such domain, the datastructure and algorithms + are ready for handling that use case. + +- In-kernel user - refers to something like a VFIO mdev that is using the + IOMMUFD access interface to access the IOAS. This starts by creating an + iommufd_access object that is similar to the domain binding a physical device + would do. The access object will then allow converting IOVA ranges into struct + page * lists, or doing direct read/write to an IOVA. + +iommufd_ioas serves as the metadata datastructure to manage how IOVA ranges are +mapped to memory pages, composed of: + +- struct io_pagetable holding the IOVA map +- struct iopt_area's representing populated portions of IOVA +- struct iopt_pages representing the storage of PFNs +- struct iommu_domain representing the IO page table in the IOMMU +- struct iopt_pages_access representing in-kernel users of PFNs +- struct xarray pinned_pfns holding a list of pages pinned by in-kernel users + +Each iopt_pages represents a logical linear array of full PFNs. The PFNs are +ultimately derived from userspace VAs via an mm_struct. Once they have been +pinned the PFNs are stored in IOPTEs of an iommu_domain or inside the pinned_pfns +xarray if they have been pinned through an iommufd_access. + +PFN have to be copied between all combinations of storage locations, depending +on what domains are present and what kinds of in-kernel "software access" users +exist. The mechanism ensures that a page is pinned only once. + +An io_pagetable is composed of iopt_areas pointing at iopt_pages, along with a +list of iommu_domains that mirror the IOVA to PFN map. + +Multiple io_pagetable-s, through their iopt_area-s, can share a single +iopt_pages which avoids multi-pinning and double accounting of page +consumption. + +iommufd_ioas is sharable between subsystems, e.g. VFIO and VDPA, as long as +devices managed by different subsystems are bound to a same iommufd. + +IOMMUFD User API +================ + +.. kernel-doc:: include/uapi/linux/iommufd.h + +IOMMUFD Kernel API +================== + +The IOMMUFD kAPI is device-centric with group-related tricks managed behind the +scene. This allows the external drivers calling such kAPI to implement a simple +device-centric uAPI for connecting its device to an iommufd, instead of +explicitly imposing the group semantics in its uAPI as VFIO does. + +.. kernel-doc:: drivers/iommu/iommufd/device.c + :export: + +.. kernel-doc:: drivers/iommu/iommufd/main.c + :export: + +VFIO and IOMMUFD +---------------- + +Connecting a VFIO device to iommufd can be done in two ways. + +First is a VFIO compatible way by directly implementing the /dev/vfio/vfio +container IOCTLs by mapping them into io_pagetable operations. Doing so allows +the use of iommufd in legacy VFIO applications by symlinking /dev/vfio/vfio to +/dev/iommufd or extending VFIO to SET_CONTAINER using an iommufd instead of a +container fd. + +The second approach directly extends VFIO to support a new set of device-centric +user API based on aforementioned IOMMUFD kernel API. It requires userspace +change but better matches the IOMMUFD API semantics and easier to support new +iommufd features when comparing it to the first approach. + +Currently both approaches are still work-in-progress. + +There are still a few gaps to be resolved to catch up with VFIO type1, as +documented in iommufd_vfio_check_extension(). + +Future TODOs +============ + +Currently IOMMUFD supports only kernel-managed I/O page table, similar to VFIO +type1. New features on the radar include: + + - Binding iommu_domain's to PASID/SSID + - Userspace page tables, for ARM, x86 and S390 + - Kernel bypass'd invalidation of user page tables + - Re-use of the KVM page table in the IOMMU + - Dirty page tracking in the IOMMU + - Runtime Increase/Decrease of IOPTE size + - PRI support with faults resolved in userspace From 2ff4bed7fee72ba1abfcff5f11ae8f8e570353f2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:29 -0400 Subject: [PATCH 3125/4122] iommufd: File descriptor, context, kconfig and makefiles This is the basic infrastructure of a new miscdevice to hold the iommufd IOCTL API. It provides: - A miscdevice to create file descriptors to run the IOCTL interface over - A table based ioctl dispatch and centralized extendable pre-validation step - An xarray mapping userspace ID's to kernel objects. The design has multiple inter-related objects held within in a single IOMMUFD fd - A simple usage count to build a graph of object relations and protect against hostile userspace racing ioctls The only IOCTL provided in this patch is the generic 'destroy any object by handle' operation. Link: https://lore.kernel.org/r/6-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- .../userspace-api/ioctl/ioctl-number.rst | 1 + MAINTAINERS | 12 + drivers/iommu/Kconfig | 1 + drivers/iommu/Makefile | 2 +- drivers/iommu/iommufd/Kconfig | 12 + drivers/iommu/iommufd/Makefile | 5 + drivers/iommu/iommufd/iommufd_private.h | 109 ++++++ drivers/iommu/iommufd/main.c | 344 ++++++++++++++++++ include/linux/iommufd.h | 31 ++ include/uapi/linux/iommufd.h | 55 +++ 10 files changed, 571 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/iommufd/Kconfig create mode 100644 drivers/iommu/iommufd/Makefile create mode 100644 drivers/iommu/iommufd/iommufd_private.h create mode 100644 drivers/iommu/iommufd/main.c create mode 100644 include/linux/iommufd.h create mode 100644 include/uapi/linux/iommufd.h diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 5f81e2a24a5c..eb045fc495a4 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -105,6 +105,7 @@ Code Seq# Include File Comments '8' all SNP8023 advanced NIC card ';' 64-7F linux/vfio.h +';' 80-FF linux/iommufd.h '=' 00-3f uapi/linux/ptp_clock.h '@' 00-0F linux/radeonfb.h conflict! '@' 00-0F drivers/video/aty/aty128fb.c conflict! diff --git a/MAINTAINERS b/MAINTAINERS index 379945f82a64..c0a93779731d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10717,6 +10717,18 @@ F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c F: include/linux/iova.h +IOMMUFD +M: Jason Gunthorpe +M: Kevin Tian +L: iommu@lists.linux.dev +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git +F: Documentation/userspace-api/iommufd.rst +F: drivers/iommu/iommufd/ +F: include/linux/iommufd.h +F: include/uapi/linux/iommufd.h +F: tools/testing/selftests/iommu/ + IOMMU SUBSYSTEM M: Joerg Roedel M: Will Deacon diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dc5f7a156ff5..319966cde5cf 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -188,6 +188,7 @@ config MSM_IOMMU source "drivers/iommu/amd/Kconfig" source "drivers/iommu/intel/Kconfig" +source "drivers/iommu/iommufd/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 7fbf6a337662..f461d0651385 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += amd/ intel/ arm/ +obj-y += amd/ intel/ arm/ iommufd/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig new file mode 100644 index 000000000000..164812084a67 --- /dev/null +++ b/drivers/iommu/iommufd/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +config IOMMUFD + tristate "IOMMU Userspace API" + select INTERVAL_TREE + select INTERVAL_TREE_SPAN_ITER + select IOMMU_API + default n + help + Provides /dev/iommu, the user API to control the IOMMU subsystem as + it relates to managing IO page tables that point at user space memory. + + If you don't know what to do here, say N. diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile new file mode 100644 index 000000000000..a07a8cffe937 --- /dev/null +++ b/drivers/iommu/iommufd/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +iommufd-y := \ + main.o + +obj-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h new file mode 100644 index 000000000000..bb720bc11317 --- /dev/null +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __IOMMUFD_PRIVATE_H +#define __IOMMUFD_PRIVATE_H + +#include +#include +#include +#include + +struct iommufd_ctx { + struct file *file; + struct xarray objects; +}; + +struct iommufd_ucmd { + struct iommufd_ctx *ictx; + void __user *ubuffer; + u32 user_size; + void *cmd; +}; + +/* Copy the response in ucmd->cmd back to userspace. */ +static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, + size_t cmd_len) +{ + if (copy_to_user(ucmd->ubuffer, ucmd->cmd, + min_t(size_t, ucmd->user_size, cmd_len))) + return -EFAULT; + return 0; +} + +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + struct rw_semaphore destroy_rwsem; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; + +static inline bool iommufd_lock_obj(struct iommufd_object *obj) +{ + if (!down_read_trylock(&obj->destroy_rwsem)) + return false; + if (!refcount_inc_not_zero(&obj->users)) { + up_read(&obj->destroy_rwsem); + return false; + } + return true; +} + +struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, + enum iommufd_object_type type); +static inline void iommufd_put_object(struct iommufd_object *obj) +{ + refcount_dec(&obj->users); + up_read(&obj->destroy_rwsem); +} + +/** + * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount + * protection + * @obj - Object to release + * + * Objects have two refcount protections (destroy_rwsem and the refcount_t + * users). Holding either of these will prevent the object from being destroyed. + * + * Depending on the use case, one protection or the other is appropriate. In + * most cases references are being protected by the destroy_rwsem. This allows + * orderly destruction of the object because iommufd_object_destroy_user() will + * wait for it to become unlocked. However, as a rwsem, it cannot be held across + * a system call return. So cases that have longer term needs must switch + * to the weaker users refcount_t. + * + * With users protection iommufd_object_destroy_user() will return false, + * refusing to destroy the object, causing -EBUSY to userspace. + */ +static inline void iommufd_ref_to_users(struct iommufd_object *obj) +{ + up_read(&obj->destroy_rwsem); + /* iommufd_lock_obj() obtains users as well */ +} +void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); +void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +void iommufd_object_finalize(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); + +#define iommufd_object_alloc(ictx, ptr, type) \ + container_of(_iommufd_object_alloc( \ + ictx, \ + sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ + offsetof(typeof(*(ptr)), \ + obj) != 0), \ + type), \ + typeof(*(ptr)), obj) + +#endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c new file mode 100644 index 000000000000..dfbc68b97506 --- /dev/null +++ b/drivers/iommu/iommufd/main.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Intel Corporation + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + * + * iommufd provides control over the IOMMU HW objects created by IOMMU kernel + * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA + * addresses (IOVA) to CPU addresses. + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iommufd_private.h" + +struct iommufd_object_ops { + void (*destroy)(struct iommufd_object *obj); +}; +static const struct iommufd_object_ops iommufd_object_ops[]; + +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + init_rwsem(&obj->destroy_rwsem); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, + xa_limit_32b, GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + +/* + * Allow concurrent access to the object. + * + * Once another thread can see the object pointer it can prevent object + * destruction. Expect for special kernel-only objects there is no in-kernel way + * to reliably destroy a single object. Thus all APIs that are creating objects + * must use iommufd_object_abort() to handle their errors and only call + * iommufd_object_finalize() once object creation cannot fail. + */ +void iommufd_object_finalize(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + void *old; + + old = xa_store(&ictx->objects, obj->id, obj, GFP_KERNEL); + /* obj->id was returned from xa_alloc() so the xa_store() cannot fail */ + WARN_ON(old); +} + +/* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */ +void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) +{ + void *old; + + old = xa_erase(&ictx->objects, obj->id); + WARN_ON(old); + kfree(obj); +} + +/* + * Abort an object that has been fully initialized and needs destroy, but has + * not been finalized. + */ +void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + iommufd_object_ops[obj->type].destroy(obj); + iommufd_object_abort(ictx, obj); +} + +struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + + xa_lock(&ictx->objects); + obj = xa_load(&ictx->objects, id); + if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || + !iommufd_lock_obj(obj)) + obj = ERR_PTR(-ENOENT); + xa_unlock(&ictx->objects); + return obj; +} + +/* + * The caller holds a users refcount and wants to destroy the object. Returns + * true if the object was destroyed. In all cases the caller no longer has a + * reference on obj. + */ +bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + /* + * The purpose of the destroy_rwsem is to ensure deterministic + * destruction of objects used by external drivers and destroyed by this + * function. Any temporary increment of the refcount must hold the read + * side of this, such as during ioctl execution. + */ + down_write(&obj->destroy_rwsem); + xa_lock(&ictx->objects); + refcount_dec(&obj->users); + if (!refcount_dec_if_one(&obj->users)) { + xa_unlock(&ictx->objects); + up_write(&obj->destroy_rwsem); + return false; + } + __xa_erase(&ictx->objects, obj->id); + xa_unlock(&ictx->objects); + up_write(&obj->destroy_rwsem); + + iommufd_object_ops[obj->type].destroy(obj); + kfree(obj); + return true; +} + +static int iommufd_destroy(struct iommufd_ucmd *ucmd) +{ + struct iommu_destroy *cmd = ucmd->cmd; + struct iommufd_object *obj; + + obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY); + if (IS_ERR(obj)) + return PTR_ERR(obj); + iommufd_ref_to_users(obj); + /* See iommufd_ref_to_users() */ + if (!iommufd_object_destroy_user(ucmd->ictx, obj)) + return -EBUSY; + return 0; +} + +static int iommufd_fops_open(struct inode *inode, struct file *filp) +{ + struct iommufd_ctx *ictx; + + ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT); + if (!ictx) + return -ENOMEM; + + xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); + ictx->file = filp; + filp->private_data = ictx; + return 0; +} + +static int iommufd_fops_release(struct inode *inode, struct file *filp) +{ + struct iommufd_ctx *ictx = filp->private_data; + struct iommufd_object *obj; + + /* + * The objects in the xarray form a graph of "users" counts, and we have + * to destroy them in a depth first manner. Leaf objects will reduce the + * users count of interior objects when they are destroyed. + * + * Repeatedly destroying all the "1 users" leaf objects will progress + * until the entire list is destroyed. If this can't progress then there + * is some bug related to object refcounting. + */ + while (!xa_empty(&ictx->objects)) { + unsigned int destroyed = 0; + unsigned long index; + + xa_for_each(&ictx->objects, index, obj) { + if (!refcount_dec_if_one(&obj->users)) + continue; + destroyed++; + xa_erase(&ictx->objects, index); + iommufd_object_ops[obj->type].destroy(obj); + kfree(obj); + } + /* Bug related to users refcount */ + if (WARN_ON(!destroyed)) + break; + } + kfree(ictx); + return 0; +} + +union ucmd_buffer { + struct iommu_destroy destroy; +}; + +struct iommufd_ioctl_op { + unsigned int size; + unsigned int min_size; + unsigned int ioctl_num; + int (*execute)(struct iommufd_ucmd *ucmd); +}; + +#define IOCTL_OP(_ioctl, _fn, _struct, _last) \ + [_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \ + .size = sizeof(_struct) + \ + BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ + sizeof(_struct)), \ + .min_size = offsetofend(_struct, _last), \ + .ioctl_num = _ioctl, \ + .execute = _fn, \ + } +static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { + IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), +}; + +static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + const struct iommufd_ioctl_op *op; + struct iommufd_ucmd ucmd = {}; + union ucmd_buffer buf; + unsigned int nr; + int ret; + + ucmd.ictx = filp->private_data; + ucmd.ubuffer = (void __user *)arg; + ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); + if (ret) + return ret; + + nr = _IOC_NR(cmd); + if (nr < IOMMUFD_CMD_BASE || + (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) + return -ENOIOCTLCMD; + op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; + if (op->ioctl_num != cmd) + return -ENOIOCTLCMD; + if (ucmd.user_size < op->min_size) + return -EINVAL; + + ucmd.cmd = &buf; + ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, + ucmd.user_size); + if (ret) + return ret; + ret = op->execute(&ucmd); + return ret; +} + +static const struct file_operations iommufd_fops = { + .owner = THIS_MODULE, + .open = iommufd_fops_open, + .release = iommufd_fops_release, + .unlocked_ioctl = iommufd_fops_ioctl, +}; + +/** + * iommufd_ctx_get - Get a context reference + * @ictx: Context to get + * + * The caller must already hold a valid reference to ictx. + */ +void iommufd_ctx_get(struct iommufd_ctx *ictx) +{ + get_file(ictx->file); +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD); + +/** + * iommufd_ctx_from_file - Acquires a reference to the iommufd context + * @file: File to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file + * remains owned by the caller and the caller must still do fput. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) +{ + struct iommufd_ctx *ictx; + + if (file->f_op != &iommufd_fops) + return ERR_PTR(-EBADFD); + ictx = file->private_data; + iommufd_ctx_get(ictx); + return ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); + +/** + * iommufd_ctx_put - Put back a reference + * @ictx: Context to put back + */ +void iommufd_ctx_put(struct iommufd_ctx *ictx) +{ + fput(ictx->file); +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); + +static const struct iommufd_object_ops iommufd_object_ops[] = { +}; + +static struct miscdevice iommu_misc_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "iommu", + .fops = &iommufd_fops, + .nodename = "iommu", + .mode = 0660, +}; + +static int __init iommufd_init(void) +{ + int ret; + + ret = misc_register(&iommu_misc_dev); + if (ret) + return ret; + return 0; +} + +static void __exit iommufd_exit(void) +{ + misc_deregister(&iommu_misc_dev); +} + +module_init(iommufd_init); +module_exit(iommufd_exit); + +MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h new file mode 100644 index 000000000000..d1817472c273 --- /dev/null +++ b/include/linux/iommufd.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Intel Corporation + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_IOMMUFD_H +#define __LINUX_IOMMUFD_H + +#include +#include +#include + +struct iommufd_ctx; +struct file; + +void iommufd_ctx_get(struct iommufd_ctx *ictx); + +#if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +void iommufd_ctx_put(struct iommufd_ctx *ictx); +#else /* !CONFIG_IOMMUFD */ +static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) +{ +} +#endif /* CONFIG_IOMMUFD */ +#endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h new file mode 100644 index 000000000000..37de92f0534b --- /dev/null +++ b/include/uapi/linux/iommufd.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _UAPI_IOMMUFD_H +#define _UAPI_IOMMUFD_H + +#include +#include + +#define IOMMUFD_TYPE (';') + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed in a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOENT: An ID or IOVA provided does not exist. + * - ENOMEM: Out of memory. + * - EOVERFLOW: Mathematics overflowed. + * + * As well as additional errnos, within specific ioctls. + */ +enum { + IOMMUFD_CMD_BASE = 0x80, + IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, +}; + +/** + * struct iommu_destroy - ioctl(IOMMU_DESTROY) + * @size: sizeof(struct iommu_destroy) + * @id: iommufd object ID to destroy. Can by any destroyable object type. + * + * Destroy any object held within iommufd. + */ +struct iommu_destroy { + __u32 size; + __u32 id; +}; +#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) + +#endif From ce5a23c835aa0f0a931b5bcde1e7811f951b0146 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:30 -0400 Subject: [PATCH 3126/4122] kernel/user: Allow user_struct::locked_vm to be usable for iommufd Following the pattern of io_uring, perf, skb, and bpf, iommfd will use user->locked_vm for accounting pinned pages. Ensure the value is included in the struct and export free_uid() as iommufd is modular. user->locked_vm is the good accounting to use for ulimit because it is per-user, and the security sandboxing of locked pages is not supposed to be per-process. Other places (vfio, vdpa and infiniband) have used mm->pinned_vm and/or mm->locked_vm for accounting pinned pages, but this is only per-process and inconsistent with the new FOLL_LONGTERM users in the kernel. Concurrent work is underway to try to put this in a cgroup, so everything can be consistent and the kernel can provide a FOLL_LONGTERM limit that actually provides security. Link: https://lore.kernel.org/r/7-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- include/linux/sched/user.h | 2 +- kernel/user.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index f054d0360a75..4cc52698e214 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -25,7 +25,7 @@ struct user_struct { #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ - defined(CONFIG_VFIO_PCI_ZDEV_KVM) + defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE diff --git a/kernel/user.c b/kernel/user.c index e2cf8c22b539..d667debeafd6 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -185,6 +185,7 @@ void free_uid(struct user_struct *up) if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags)) free_user(up, flags); } +EXPORT_SYMBOL_GPL(free_uid); struct user_struct *alloc_uid(kuid_t uid) { From f394576eb11dbcd3a740fa41e577b97f0720d26e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:31 -0400 Subject: [PATCH 3127/4122] iommufd: PFN handling for iopt_pages The top of the data structure provides an IO Address Space (IOAS) that is similar to a VFIO container. The IOAS allows map/unmap of memory into ranges of IOVA called iopt_areas. Multiple IOMMU domains (IO page tables) and in-kernel accesses (like VFIO mdevs) can be attached to the IOAS to access the PFNs that those IOVA areas cover. The IO Address Space (IOAS) datastructure is composed of: - struct io_pagetable holding the IOVA map - struct iopt_areas representing populated portions of IOVA - struct iopt_pages representing the storage of PFNs - struct iommu_domain representing each IO page table in the system IOMMU - struct iopt_pages_access representing in-kernel accesses of PFNs (ie VFIO mdevs) - struct xarray pinned_pfns holding a list of pages pinned by in-kernel accesses This patch introduces the lowest part of the datastructure - the movement of PFNs in a tiered storage scheme: 1) iopt_pages::pinned_pfns xarray 2) Multiple iommu_domains 3) The origin of the PFNs, i.e. the userspace pointer PFN have to be copied between all combinations of tiers, depending on the configuration. The interface is an iterator called a 'pfn_reader' which determines which tier each PFN is stored and loads it into a list of PFNs held in a struct pfn_batch. Each step of the iterator will fill up the pfn_batch, then the caller can use the pfn_batch to send the PFNs to the required destination. Repeating this loop will read all the PFNs in an IOVA range. The pfn_reader and pfn_batch also keep track of the pinned page accounting. While PFNs are always stored and accessed as full PAGE_SIZE units the iommu_domain tier can store with a sub-page offset/length to support IOMMUs with a smaller IOPTE size than PAGE_SIZE. Link: https://lore.kernel.org/r/8-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- .clang-format | 1 + drivers/iommu/iommufd/Makefile | 3 +- drivers/iommu/iommufd/double_span.h | 53 ++ drivers/iommu/iommufd/io_pagetable.h | 109 +++ drivers/iommu/iommufd/iommufd_private.h | 24 + drivers/iommu/iommufd/pages.c | 1066 +++++++++++++++++++++++ include/linux/iommufd.h | 7 + 7 files changed, 1262 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/iommufd/double_span.h create mode 100644 drivers/iommu/iommufd/io_pagetable.h create mode 100644 drivers/iommu/iommufd/pages.c diff --git a/.clang-format b/.clang-format index 96d07786dcfb..501241f89776 100644 --- a/.clang-format +++ b/.clang-format @@ -440,6 +440,7 @@ ForEachMacros: - 'inet_lhash2_for_each_icsk' - 'inet_lhash2_for_each_icsk_continue' - 'inet_lhash2_for_each_icsk_rcu' + - 'interval_tree_for_each_double_span' - 'interval_tree_for_each_span' - 'intlist__for_each_entry' - 'intlist__for_each_entry_safe' diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index a07a8cffe937..05a0e91e30af 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ - main.o + main.o \ + pages.o obj-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/iommu/iommufd/double_span.h b/drivers/iommu/iommufd/double_span.h new file mode 100644 index 000000000000..b37aab7488c0 --- /dev/null +++ b/drivers/iommu/iommufd/double_span.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __IOMMUFD_DOUBLE_SPAN_H +#define __IOMMUFD_DOUBLE_SPAN_H + +#include + +/* + * This is a variation of the general interval_tree_span_iter that computes the + * spans over the union of two different interval trees. Used ranges are broken + * up and reported based on the tree that provides the interval. The first span + * always takes priority. Like interval_tree_span_iter it is greedy and the same + * value of is_used will not repeat on two iteration cycles. + */ +struct interval_tree_double_span_iter { + struct rb_root_cached *itrees[2]; + struct interval_tree_span_iter spans[2]; + union { + unsigned long start_hole; + unsigned long start_used; + }; + union { + unsigned long last_hole; + unsigned long last_used; + }; + /* 0 = hole, 1 = used span[0], 2 = used span[1], -1 done iteration */ + int is_used; +}; + +void interval_tree_double_span_iter_update( + struct interval_tree_double_span_iter *iter); +void interval_tree_double_span_iter_first( + struct interval_tree_double_span_iter *iter, + struct rb_root_cached *itree1, struct rb_root_cached *itree2, + unsigned long first_index, unsigned long last_index); +void interval_tree_double_span_iter_next( + struct interval_tree_double_span_iter *iter); + +static inline bool +interval_tree_double_span_iter_done(struct interval_tree_double_span_iter *state) +{ + return state->is_used == -1; +} + +#define interval_tree_for_each_double_span(span, itree1, itree2, first_index, \ + last_index) \ + for (interval_tree_double_span_iter_first(span, itree1, itree2, \ + first_index, last_index); \ + !interval_tree_double_span_iter_done(span); \ + interval_tree_double_span_iter_next(span)) + +#endif diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h new file mode 100644 index 000000000000..b74bf01ffc52 --- /dev/null +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + */ +#ifndef __IO_PAGETABLE_H +#define __IO_PAGETABLE_H + +#include +#include +#include +#include + +#include "iommufd_private.h" + +struct iommu_domain; + +/* + * Each io_pagetable is composed of intervals of areas which cover regions of + * the iova that are backed by something. iova not covered by areas is not + * populated in the page table. Each area is fully populated with pages. + * + * iovas are in byte units, but must be iopt->iova_alignment aligned. + * + * pages can be NULL, this means some other thread is still working on setting + * up or tearing down the area. When observed under the write side of the + * domain_rwsem a NULL pages must mean the area is still being setup and no + * domains are filled. + * + * storage_domain points at an arbitrary iommu_domain that is holding the PFNs + * for this area. It is locked by the pages->mutex. This simplifies the locking + * as the pages code can rely on the storage_domain without having to get the + * iopt->domains_rwsem. + * + * The io_pagetable::iova_rwsem protects node + * The iopt_pages::mutex protects pages_node + * iopt and immu_prot are immutable + * The pages::mutex protects num_accesses + */ +struct iopt_area { + struct interval_tree_node node; + struct interval_tree_node pages_node; + struct io_pagetable *iopt; + struct iopt_pages *pages; + struct iommu_domain *storage_domain; + /* How many bytes into the first page the area starts */ + unsigned int page_offset; + /* IOMMU_READ, IOMMU_WRITE, etc */ + int iommu_prot; + unsigned int num_accesses; +}; + +static inline unsigned long iopt_area_index(struct iopt_area *area) +{ + return area->pages_node.start; +} + +static inline unsigned long iopt_area_last_index(struct iopt_area *area) +{ + return area->pages_node.last; +} + +static inline unsigned long iopt_area_iova(struct iopt_area *area) +{ + return area->node.start; +} + +static inline unsigned long iopt_area_last_iova(struct iopt_area *area) +{ + return area->node.last; +} + +enum { + IOPT_PAGES_ACCOUNT_NONE = 0, + IOPT_PAGES_ACCOUNT_USER = 1, + IOPT_PAGES_ACCOUNT_MM = 2, +}; + +/* + * This holds a pinned page list for multiple areas of IO address space. The + * pages always originate from a linear chunk of userspace VA. Multiple + * io_pagetable's, through their iopt_area's, can share a single iopt_pages + * which avoids multi-pinning and double accounting of page consumption. + * + * indexes in this structure are measured in PAGE_SIZE units, are 0 based from + * the start of the uptr and extend to npages. pages are pinned dynamically + * according to the intervals in the access_itree and domains_itree, npinned + * records the current number of pages pinned. + */ +struct iopt_pages { + struct kref kref; + struct mutex mutex; + size_t npages; + size_t npinned; + size_t last_npinned; + struct task_struct *source_task; + struct mm_struct *source_mm; + struct user_struct *source_user; + void __user *uptr; + bool writable:1; + u8 account_mode; + + struct xarray pinned_pfns; + /* Of iopt_pages_access::node */ + struct rb_root_cached access_itree; + /* Of iopt_area::pages_node */ + struct rb_root_cached domains_itree; +}; + +#endif diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index bb720bc11317..169a30ff3bf0 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -14,6 +14,30 @@ struct iommufd_ctx { struct xarray objects; }; +/* + * The IOVA to PFN map. The map automatically copies the PFNs into multiple + * domains and permits sharing of PFNs between io_pagetable instances. This + * supports both a design where IOAS's are 1:1 with a domain (eg because the + * domain is HW customized), or where the IOAS is 1:N with multiple generic + * domains. The io_pagetable holds an interval tree of iopt_areas which point + * to shared iopt_pages which hold the pfns mapped to the page table. + * + * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex + */ +struct io_pagetable { + struct rw_semaphore domains_rwsem; + struct xarray domains; + unsigned int next_domain_id; + + struct rw_semaphore iova_rwsem; + struct rb_root_cached area_itree; + /* IOVA that cannot become reserved, struct iopt_allowed */ + struct rb_root_cached allowed_itree; + /* IOVA that cannot be allocated, struct iopt_reserved */ + struct rb_root_cached reserved_itree; + u8 disable_large_pages; +}; + struct iommufd_ucmd { struct iommufd_ctx *ictx; void __user *ubuffer; diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c new file mode 100644 index 000000000000..ebca78e743c6 --- /dev/null +++ b/drivers/iommu/iommufd/pages.c @@ -0,0 +1,1066 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * The iopt_pages is the center of the storage and motion of PFNs. Each + * iopt_pages represents a logical linear array of full PFNs. The array is 0 + * based and has npages in it. Accessors use 'index' to refer to the entry in + * this logical array, regardless of its storage location. + * + * PFNs are stored in a tiered scheme: + * 1) iopt_pages::pinned_pfns xarray + * 2) An iommu_domain + * 3) The origin of the PFNs, i.e. the userspace pointer + * + * PFN have to be copied between all combinations of tiers, depending on the + * configuration. + * + * When a PFN is taken out of the userspace pointer it is pinned exactly once. + * The storage locations of the PFN's index are tracked in the two interval + * trees. If no interval includes the index then it is not pinned. + * + * If access_itree includes the PFN's index then an in-kernel access has + * requested the page. The PFN is stored in the xarray so other requestors can + * continue to find it. + * + * If the domains_itree includes the PFN's index then an iommu_domain is storing + * the PFN and it can be read back using iommu_iova_to_phys(). To avoid + * duplicating storage the xarray is not used if only iommu_domains are using + * the PFN's index. + * + * As a general principle this is designed so that destroy never fails. This + * means removing an iommu_domain or releasing a in-kernel access will not fail + * due to insufficient memory. In practice this means some cases have to hold + * PFNs in the xarray even though they are also being stored in an iommu_domain. + * + * While the iopt_pages can use an iommu_domain as storage, it does not have an + * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the + * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages + * and reference their own slice of the PFN array, with sub page granularity. + * + * In this file the term 'last' indicates an inclusive and closed interval, eg + * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to + * no PFNs. + * + * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so + * last_iova + 1 can overflow. An iopt_pages index will always be much less than + * ULONG_MAX so last_index + 1 cannot overflow. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "double_span.h" + +#define TEMP_MEMORY_LIMIT 65536 +#define BATCH_BACKUP_SIZE 32 + +/* + * More memory makes pin_user_pages() and the batching more efficient, but as + * this is only a performance optimization don't try too hard to get it. A 64k + * allocation can hold about 26M of 4k pages and 13G of 2M pages in an + * pfn_batch. Various destroy paths cannot fail and provide a small amount of + * stack memory as a backup contingency. If backup_len is given this cannot + * fail. + */ +static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) +{ + void *res; + + if (WARN_ON(*size == 0)) + return NULL; + + if (*size < backup_len) + return backup; + *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); + res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (res) + return res; + *size = PAGE_SIZE; + if (backup_len) { + res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (res) + return res; + *size = backup_len; + return backup; + } + return kmalloc(*size, GFP_KERNEL); +} + +void interval_tree_double_span_iter_update( + struct interval_tree_double_span_iter *iter) +{ + unsigned long last_hole = ULONG_MAX; + unsigned int i; + + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) { + if (interval_tree_span_iter_done(&iter->spans[i])) { + iter->is_used = -1; + return; + } + + if (iter->spans[i].is_hole) { + last_hole = min(last_hole, iter->spans[i].last_hole); + continue; + } + + iter->is_used = i + 1; + iter->start_used = iter->spans[i].start_used; + iter->last_used = min(iter->spans[i].last_used, last_hole); + return; + } + + iter->is_used = 0; + iter->start_hole = iter->spans[0].start_hole; + iter->last_hole = + min(iter->spans[0].last_hole, iter->spans[1].last_hole); +} + +void interval_tree_double_span_iter_first( + struct interval_tree_double_span_iter *iter, + struct rb_root_cached *itree1, struct rb_root_cached *itree2, + unsigned long first_index, unsigned long last_index) +{ + unsigned int i; + + iter->itrees[0] = itree1; + iter->itrees[1] = itree2; + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) + interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i], + first_index, last_index); + interval_tree_double_span_iter_update(iter); +} + +void interval_tree_double_span_iter_next( + struct interval_tree_double_span_iter *iter) +{ + unsigned int i; + + if (iter->is_used == -1 || + iter->last_hole == iter->spans[0].last_index) { + iter->is_used = -1; + return; + } + + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) + interval_tree_span_iter_advance( + &iter->spans[i], iter->itrees[i], iter->last_hole + 1); + interval_tree_double_span_iter_update(iter); +} + +static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) +{ + pages->npinned += npages; +} + +static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) +{ + pages->npinned -= npages; +} + +static void iopt_pages_err_unpin(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **page_list) +{ + unsigned long npages = last_index - start_index + 1; + + unpin_user_pages(page_list, npages); + iopt_pages_sub_npinned(pages, npages); +} + +/* + * index is the number of PAGE_SIZE units from the start of the area's + * iopt_pages. If the iova is sub page-size then the area has an iova that + * covers a portion of the first and last pages in the range. + */ +static unsigned long iopt_area_index_to_iova(struct iopt_area *area, + unsigned long index) +{ + index -= iopt_area_index(area); + if (index == 0) + return iopt_area_iova(area); + return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE; +} + +static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, + unsigned long index) +{ + if (index == iopt_area_last_index(area)) + return iopt_area_last_iova(area); + return iopt_area_iova(area) - area->page_offset + + (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1; +} + +static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + size_t ret; + + ret = iommu_unmap(domain, iova, size); + /* + * It is a logic error in this code or a driver bug if the IOMMU unmaps + * something other than exactly as requested. This implies that the + * iommu driver may not fail unmap for reasons beyond bad agruments. + * Particularly, the iommu driver may not do a memory allocation on the + * unmap path. + */ + WARN_ON(ret != size); +} + +static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, + unsigned long index) +{ + struct interval_tree_node *node; + + node = interval_tree_iter_first(&pages->domains_itree, index, index); + if (!node) + return NULL; + return container_of(node, struct iopt_area, pages_node); +} + +/* + * A simple datastructure to hold a vector of PFNs, optimized for contiguous + * PFNs. This is used as a temporary holding memory for shuttling pfns from one + * place to another. Generally everything is made more efficient if operations + * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles, + * better cache locality, etc + */ +struct pfn_batch { + unsigned long *pfns; + u32 *npfns; + unsigned int array_size; + unsigned int end; + unsigned int total_pfns; +}; + +static void batch_clear(struct pfn_batch *batch) +{ + batch->total_pfns = 0; + batch->end = 0; + batch->pfns[0] = 0; + batch->npfns[0] = 0; +} + +/* + * Carry means we carry a portion of the final hugepage over to the front of the + * batch + */ +static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) +{ + if (!keep_pfns) + return batch_clear(batch); + + batch->total_pfns = keep_pfns; + batch->npfns[0] = keep_pfns; + batch->pfns[0] = batch->pfns[batch->end - 1] + + (batch->npfns[batch->end - 1] - keep_pfns); + batch->end = 0; +} + +static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) +{ + if (!batch->total_pfns) + return; + skip_pfns = min(batch->total_pfns, skip_pfns); + batch->pfns[0] += skip_pfns; + batch->npfns[0] -= skip_pfns; + batch->total_pfns -= skip_pfns; +} + +static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, + size_t backup_len) +{ + const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns); + size_t size = max_pages * elmsz; + + batch->pfns = temp_kmalloc(&size, backup, backup_len); + if (!batch->pfns) + return -ENOMEM; + batch->array_size = size / elmsz; + batch->npfns = (u32 *)(batch->pfns + batch->array_size); + batch_clear(batch); + return 0; +} + +static int batch_init(struct pfn_batch *batch, size_t max_pages) +{ + return __batch_init(batch, max_pages, NULL, 0); +} + +static void batch_init_backup(struct pfn_batch *batch, size_t max_pages, + void *backup, size_t backup_len) +{ + __batch_init(batch, max_pages, backup, backup_len); +} + +static void batch_destroy(struct pfn_batch *batch, void *backup) +{ + if (batch->pfns != backup) + kfree(batch->pfns); +} + +/* true if the pfn could be added, false otherwise */ +static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) +{ + const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); + + if (batch->end && + pfn == batch->pfns[batch->end - 1] + batch->npfns[batch->end - 1] && + batch->npfns[batch->end - 1] != MAX_NPFNS) { + batch->npfns[batch->end - 1]++; + batch->total_pfns++; + return true; + } + if (batch->end == batch->array_size) + return false; + batch->total_pfns++; + batch->pfns[batch->end] = pfn; + batch->npfns[batch->end] = 1; + batch->end++; + return true; +} + +/* + * Fill the batch with pfns from the domain. When the batch is full, or it + * reaches last_index, the function will return. The caller should use + * batch->total_pfns to determine the starting point for the next iteration. + */ +static void batch_from_domain(struct pfn_batch *batch, + struct iommu_domain *domain, + struct iopt_area *area, unsigned long start_index, + unsigned long last_index) +{ + unsigned int page_offset = 0; + unsigned long iova; + phys_addr_t phys; + + iova = iopt_area_index_to_iova(area, start_index); + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + while (start_index <= last_index) { + /* + * This is pretty slow, it would be nice to get the page size + * back from the driver, or have the driver directly fill the + * batch. + */ + phys = iommu_iova_to_phys(domain, iova) - page_offset; + if (!batch_add_pfn(batch, PHYS_PFN(phys))) + return; + iova += PAGE_SIZE - page_offset; + page_offset = 0; + start_index++; + } +} + +static struct page **raw_pages_from_domain(struct iommu_domain *domain, + struct iopt_area *area, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + unsigned int page_offset = 0; + unsigned long iova; + phys_addr_t phys; + + iova = iopt_area_index_to_iova(area, start_index); + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + while (start_index <= last_index) { + phys = iommu_iova_to_phys(domain, iova) - page_offset; + *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); + iova += PAGE_SIZE - page_offset; + page_offset = 0; + start_index++; + } + return out_pages; +} + +/* Continues reading a domain until we reach a discontiguity in the pfns. */ +static void batch_from_domain_continue(struct pfn_batch *batch, + struct iommu_domain *domain, + struct iopt_area *area, + unsigned long start_index, + unsigned long last_index) +{ + unsigned int array_size = batch->array_size; + + batch->array_size = batch->end; + batch_from_domain(batch, domain, area, start_index, last_index); + batch->array_size = array_size; +} + +/* + * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That + * mode permits splitting a mapped area up, and then one of the splits is + * unmapped. Doing this normally would cause us to violate our invariant of + * pairing map/unmap. Thus, to support old VFIO compatibility disable support + * for batching consecutive PFNs. All PFNs mapped into the iommu are done in + * PAGE_SIZE units, not larger or smaller. + */ +static int batch_iommu_map_small(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + unsigned long start_iova = iova; + int rc; + + while (size) { + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + if (rc) + goto err_unmap; + iova += PAGE_SIZE; + paddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + return 0; + +err_unmap: + if (start_iova != iova) + iommu_unmap_nofail(domain, start_iova, iova - start_iova); + return rc; +} + +static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, + struct iopt_area *area, unsigned long start_index) +{ + bool disable_large_pages = area->iopt->disable_large_pages; + unsigned long last_iova = iopt_area_last_iova(area); + unsigned int page_offset = 0; + unsigned long start_iova; + unsigned long next_iova; + unsigned int cur = 0; + unsigned long iova; + int rc; + + /* The first index might be a partial page */ + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + next_iova = iova = start_iova = + iopt_area_index_to_iova(area, start_index); + while (cur < batch->end) { + next_iova = min(last_iova + 1, + next_iova + batch->npfns[cur] * PAGE_SIZE - + page_offset); + if (disable_large_pages) + rc = batch_iommu_map_small( + domain, iova, + PFN_PHYS(batch->pfns[cur]) + page_offset, + next_iova - iova, area->iommu_prot); + else + rc = iommu_map(domain, iova, + PFN_PHYS(batch->pfns[cur]) + page_offset, + next_iova - iova, area->iommu_prot); + if (rc) + goto err_unmap; + iova = next_iova; + page_offset = 0; + cur++; + } + return 0; +err_unmap: + if (start_iova != iova) + iommu_unmap_nofail(domain, start_iova, iova - start_iova); + return rc; +} + +static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa, + unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + rcu_read_lock(); + while (true) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + if (!batch_add_pfn(batch, xa_to_value(entry)) || + start_index == last_index) + break; + start_index++; + } + rcu_read_unlock(); +} + +static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa, + unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + xas_lock(&xas); + while (true) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + if (!batch_add_pfn(batch, xa_to_value(entry))) + break; + xas_store(&xas, NULL); + if (start_index == last_index) + break; + start_index++; + } + xas_unlock(&xas); +} + +static void clear_xarray(struct xarray *xa, unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + xas_lock(&xas); + xas_for_each(&xas, entry, last_index) + xas_store(&xas, NULL); + xas_unlock(&xas); +} + +static int pages_to_xarray(struct xarray *xa, unsigned long start_index, + unsigned long last_index, struct page **pages) +{ + struct page **end_pages = pages + (last_index - start_index) + 1; + XA_STATE(xas, xa, start_index); + + do { + void *old; + + xas_lock(&xas); + while (pages != end_pages) { + old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); + if (xas_error(&xas)) + break; + WARN_ON(old); + pages++; + xas_next(&xas); + } + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + if (xas_error(&xas)) { + if (xas.xa_index != start_index) + clear_xarray(xa, start_index, xas.xa_index - 1); + return xas_error(&xas); + } + return 0; +} + +static void batch_from_pages(struct pfn_batch *batch, struct page **pages, + size_t npages) +{ + struct page **end = pages + npages; + + for (; pages != end; pages++) + if (!batch_add_pfn(batch, page_to_pfn(*pages))) + break; +} + +static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages, + unsigned int first_page_off, size_t npages) +{ + unsigned int cur = 0; + + while (first_page_off) { + if (batch->npfns[cur] > first_page_off) + break; + first_page_off -= batch->npfns[cur]; + cur++; + } + + while (npages) { + size_t to_unpin = min_t(size_t, npages, + batch->npfns[cur] - first_page_off); + + unpin_user_page_range_dirty_lock( + pfn_to_page(batch->pfns[cur] + first_page_off), + to_unpin, pages->writable); + iopt_pages_sub_npinned(pages, to_unpin); + cur++; + first_page_off = 0; + npages -= to_unpin; + } +} + +static void copy_data_page(struct page *page, void *data, unsigned long offset, + size_t length, unsigned int flags) +{ + void *mem; + + mem = kmap_local_page(page); + if (flags & IOMMUFD_ACCESS_RW_WRITE) { + memcpy(mem + offset, data, length); + set_page_dirty_lock(page); + } else { + memcpy(data, mem + offset, length); + } + kunmap_local(mem); +} + +static unsigned long batch_rw(struct pfn_batch *batch, void *data, + unsigned long offset, unsigned long length, + unsigned int flags) +{ + unsigned long copied = 0; + unsigned int npage = 0; + unsigned int cur = 0; + + while (cur < batch->end) { + unsigned long bytes = min(length, PAGE_SIZE - offset); + + copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data, + offset, bytes, flags); + offset = 0; + length -= bytes; + data += bytes; + copied += bytes; + npage++; + if (npage == batch->npfns[cur]) { + npage = 0; + cur++; + } + if (!length) + break; + } + return copied; +} + +/* pfn_reader_user is just the pin_user_pages() path */ +struct pfn_reader_user { + struct page **upages; + size_t upages_len; + unsigned long upages_start; + unsigned long upages_end; + unsigned int gup_flags; + /* + * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is + * neither + */ + int locked; +}; + +static void pfn_reader_user_init(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + user->upages = NULL; + user->upages_start = 0; + user->upages_end = 0; + user->locked = -1; + + if (pages->writable) { + user->gup_flags = FOLL_LONGTERM | FOLL_WRITE; + } else { + /* Still need to break COWs on read */ + user->gup_flags = FOLL_LONGTERM | FOLL_FORCE | FOLL_WRITE; + } +} + +static void pfn_reader_user_destroy(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + if (user->locked != -1) { + if (user->locked) + mmap_read_unlock(pages->source_mm); + if (pages->source_mm != current->mm) + mmput(pages->source_mm); + user->locked = 0; + } + + kfree(user->upages); + user->upages = NULL; +} + +static int pfn_reader_user_pin(struct pfn_reader_user *user, + struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index) +{ + bool remote_mm = pages->source_mm != current->mm; + unsigned long npages; + uintptr_t uptr; + long rc; + + if (!user->upages) { + /* All undone in pfn_reader_destroy() */ + user->upages_len = + (last_index - start_index + 1) * sizeof(*user->upages); + user->upages = temp_kmalloc(&user->upages_len, NULL, 0); + if (!user->upages) + return -ENOMEM; + } + + if (user->locked == -1) { + /* + * The majority of usages will run the map task within the mm + * providing the pages, so we can optimize into + * get_user_pages_fast() + */ + if (remote_mm) { + if (!mmget_not_zero(pages->source_mm)) + return -EFAULT; + } + user->locked = 0; + } + + npages = min_t(unsigned long, last_index - start_index + 1, + user->upages_len / sizeof(*user->upages)); + + uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); + if (!remote_mm) + rc = pin_user_pages_fast(uptr, npages, user->gup_flags, + user->upages); + else { + if (!user->locked) { + mmap_read_lock(pages->source_mm); + user->locked = 1; + } + /* + * FIXME: last NULL can be &pfns->locked once the GUP patch + * is merged. + */ + rc = pin_user_pages_remote(pages->source_mm, uptr, npages, + user->gup_flags, user->upages, NULL, + NULL); + } + if (rc <= 0) { + if (WARN_ON(!rc)) + return -EFAULT; + return rc; + } + iopt_pages_add_npinned(pages, rc); + user->upages_start = start_index; + user->upages_end = start_index + rc; + return 0; +} + +/* This is the "modern" and faster accounting method used by io_uring */ +static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) +{ + unsigned long lock_limit; + unsigned long cur_pages; + unsigned long new_pages; + + lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> + PAGE_SHIFT; + npages = pages->npinned - pages->last_npinned; + do { + cur_pages = atomic_long_read(&pages->source_user->locked_vm); + new_pages = cur_pages + npages; + if (new_pages > lock_limit) + return -ENOMEM; + } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, + new_pages) != cur_pages); + return 0; +} + +static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) +{ + if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages)) + return; + atomic_long_sub(npages, &pages->source_user->locked_vm); +} + +/* This is the accounting method used for compatibility with VFIO */ +static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages, + bool inc, struct pfn_reader_user *user) +{ + bool do_put = false; + int rc; + + if (user && user->locked) { + mmap_read_unlock(pages->source_mm); + user->locked = 0; + /* If we had the lock then we also have a get */ + } else if ((!user || !user->upages) && + pages->source_mm != current->mm) { + if (!mmget_not_zero(pages->source_mm)) + return -EINVAL; + do_put = true; + } + + mmap_write_lock(pages->source_mm); + rc = __account_locked_vm(pages->source_mm, npages, inc, + pages->source_task, false); + mmap_write_unlock(pages->source_mm); + + if (do_put) + mmput(pages->source_mm); + return rc; +} + +static int do_update_pinned(struct iopt_pages *pages, unsigned long npages, + bool inc, struct pfn_reader_user *user) +{ + int rc = 0; + + switch (pages->account_mode) { + case IOPT_PAGES_ACCOUNT_NONE: + break; + case IOPT_PAGES_ACCOUNT_USER: + if (inc) + rc = incr_user_locked_vm(pages, npages); + else + decr_user_locked_vm(pages, npages); + break; + case IOPT_PAGES_ACCOUNT_MM: + rc = update_mm_locked_vm(pages, npages, inc, user); + break; + } + if (rc) + return rc; + + pages->last_npinned = pages->npinned; + if (inc) + atomic64_add(npages, &pages->source_mm->pinned_vm); + else + atomic64_sub(npages, &pages->source_mm->pinned_vm); + return 0; +} + +static void update_unpinned(struct iopt_pages *pages) +{ + if (WARN_ON(pages->npinned > pages->last_npinned)) + return; + if (pages->npinned == pages->last_npinned) + return; + do_update_pinned(pages, pages->last_npinned - pages->npinned, false, + NULL); +} + +/* + * Changes in the number of pages pinned is done after the pages have been read + * and processed. If the user lacked the limit then the error unwind will unpin + * everything that was just pinned. This is because it is expensive to calculate + * how many pages we have already pinned within a range to generate an accurate + * prediction in advance of doing the work to actually pin them. + */ +static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + unsigned long npages; + bool inc; + + lockdep_assert_held(&pages->mutex); + + if (pages->npinned == pages->last_npinned) + return 0; + + if (pages->npinned < pages->last_npinned) { + npages = pages->last_npinned - pages->npinned; + inc = false; + } else { + npages = pages->npinned - pages->last_npinned; + inc = true; + } + return do_update_pinned(pages, npages, inc, user); +} + +/* + * PFNs are stored in three places, in order of preference: + * - The iopt_pages xarray. This is only populated if there is a + * iopt_pages_access + * - The iommu_domain under an area + * - The original PFN source, ie pages->source_mm + * + * This iterator reads the pfns optimizing to load according to the + * above order. + */ +struct pfn_reader { + struct iopt_pages *pages; + struct interval_tree_double_span_iter span; + struct pfn_batch batch; + unsigned long batch_start_index; + unsigned long batch_end_index; + unsigned long last_index; + + struct pfn_reader_user user; +}; + +static int pfn_reader_update_pinned(struct pfn_reader *pfns) +{ + return pfn_reader_user_update_pinned(&pfns->user, pfns->pages); +} + +/* + * The batch can contain a mixture of pages that are still in use and pages that + * need to be unpinned. Unpin only pages that are not held anywhere else. + */ +static void pfn_reader_unpin(struct pfn_reader *pfns) +{ + unsigned long last = pfns->batch_end_index - 1; + unsigned long start = pfns->batch_start_index; + struct interval_tree_double_span_iter span; + struct iopt_pages *pages = pfns->pages; + + lockdep_assert_held(&pages->mutex); + + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start, last) { + if (span.is_used) + continue; + + batch_unpin(&pfns->batch, pages, span.start_hole - start, + span.last_hole - span.start_hole + 1); + } +} + +/* Process a single span to load it from the proper storage */ +static int pfn_reader_fill_span(struct pfn_reader *pfns) +{ + struct interval_tree_double_span_iter *span = &pfns->span; + unsigned long start_index = pfns->batch_end_index; + struct iopt_area *area; + int rc; + + if (span->is_used == 1) { + batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, + start_index, span->last_used); + return 0; + } + + if (span->is_used == 2) { + /* + * Pull as many pages from the first domain we find in the + * target span. If it is too small then we will be called again + * and we'll find another area. + */ + area = iopt_pages_find_domain_area(pfns->pages, start_index); + if (WARN_ON(!area)) + return -EINVAL; + + /* The storage_domain cannot change without the pages mutex */ + batch_from_domain( + &pfns->batch, area->storage_domain, area, start_index, + min(iopt_area_last_index(area), span->last_used)); + return 0; + } + + if (start_index >= pfns->user.upages_end) { + rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, + span->last_hole); + if (rc) + return rc; + } + + batch_from_pages(&pfns->batch, + pfns->user.upages + + (start_index - pfns->user.upages_start), + pfns->user.upages_end - start_index); + return 0; +} + +static bool pfn_reader_done(struct pfn_reader *pfns) +{ + return pfns->batch_start_index == pfns->last_index + 1; +} + +static int pfn_reader_next(struct pfn_reader *pfns) +{ + int rc; + + batch_clear(&pfns->batch); + pfns->batch_start_index = pfns->batch_end_index; + + while (pfns->batch_end_index != pfns->last_index + 1) { + unsigned int npfns = pfns->batch.total_pfns; + + rc = pfn_reader_fill_span(pfns); + if (rc) + return rc; + + if (WARN_ON(!pfns->batch.total_pfns)) + return -EINVAL; + + pfns->batch_end_index = + pfns->batch_start_index + pfns->batch.total_pfns; + if (pfns->batch_end_index == pfns->span.last_used + 1) + interval_tree_double_span_iter_next(&pfns->span); + + /* Batch is full */ + if (npfns == pfns->batch.total_pfns) + return 0; + } + return 0; +} + +static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, + unsigned long start_index, unsigned long last_index) +{ + int rc; + + lockdep_assert_held(&pages->mutex); + + pfns->pages = pages; + pfns->batch_start_index = start_index; + pfns->batch_end_index = start_index; + pfns->last_index = last_index; + pfn_reader_user_init(&pfns->user, pages); + rc = batch_init(&pfns->batch, last_index - start_index + 1); + if (rc) + return rc; + interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index); + return 0; +} + +/* + * There are many assertions regarding the state of pages->npinned vs + * pages->last_pinned, for instance something like unmapping a domain must only + * decrement the npinned, and pfn_reader_destroy() must be called only after all + * the pins are updated. This is fine for success flows, but error flows + * sometimes need to release the pins held inside the pfn_reader before going on + * to complete unmapping and releasing pins held in domains. + */ +static void pfn_reader_release_pins(struct pfn_reader *pfns) +{ + struct iopt_pages *pages = pfns->pages; + + if (pfns->user.upages_end > pfns->batch_end_index) { + size_t npages = pfns->user.upages_end - pfns->batch_end_index; + + /* Any pages not transferred to the batch are just unpinned */ + unpin_user_pages(pfns->user.upages + (pfns->batch_end_index - + pfns->user.upages_start), + npages); + iopt_pages_sub_npinned(pages, npages); + pfns->user.upages_end = pfns->batch_end_index; + } + if (pfns->batch_start_index != pfns->batch_end_index) { + pfn_reader_unpin(pfns); + pfns->batch_start_index = pfns->batch_end_index; + } +} + +static void pfn_reader_destroy(struct pfn_reader *pfns) +{ + struct iopt_pages *pages = pfns->pages; + + pfn_reader_release_pins(pfns); + pfn_reader_user_destroy(&pfns->user, pfns->pages); + batch_destroy(&pfns->batch, NULL); + WARN_ON(pages->last_npinned != pages->npinned); +} + +static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, + unsigned long start_index, unsigned long last_index) +{ + int rc; + + rc = pfn_reader_init(pfns, pages, start_index, last_index); + if (rc) + return rc; + rc = pfn_reader_next(pfns); + if (rc) { + pfn_reader_destroy(pfns); + return rc; + } + return 0; +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index d1817472c273..26e09d539737 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -13,6 +13,13 @@ struct iommufd_ctx; struct file; +enum { + IOMMUFD_ACCESS_RW_READ = 0, + IOMMUFD_ACCESS_RW_WRITE = 1 << 0, + /* Set if the caller is in a kthread then rw will use kthread_use_mm() */ + IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1, +}; + void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) From 8d160cd4d5066f864ec0f2c981470e55ac03ac27 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:32 -0400 Subject: [PATCH 3128/4122] iommufd: Algorithms for PFN storage The iopt_pages which represents a logical linear list of full PFNs held in different storage tiers. Each area points to a slice of exactly one iopt_pages, and each iopt_pages can have multiple areas and accesses. The three storage tiers are managed to meet these objectives: - If no iommu_domain or in-kerenel access exists then minimal memory should be consumed by iomufd - If a page has been pinned then an iopt_pages will not pin it again - If an in-kernel access exists then the xarray must provide the backing storage to avoid allocations on domain removals - Otherwise any iommu_domain will be used for storage In a common configuration with only an iommu_domain the iopt_pages does not allocate significant memory itself. The external interface for pages has several logical operations: iopt_area_fill_domain() will load the PFNs from storage into a single domain. This is used when attaching a new domain to an existing IOAS. iopt_area_fill_domains() will load the PFNs from storage into multiple domains. This is used when creating a new IOVA map in an existing IOAS iopt_pages_add_access() creates an iopt_pages_access that tracks an in-kernel access of PFNs. This is some external driver that might be accessing the IOVA using the CPU, or programming PFNs with the DMA API. ie a VFIO mdev. iopt_pages_rw_access() directly perform a memcpy on the PFNs, without the overhead of iopt_pages_add_access() iopt_pages_fill_xarray() will load PFNs into the xarray and return a 'struct page *' array. It is used by iopt_pages_access's to extract PFNs for in-kernel use. iopt_pages_fill_from_xarray() is a fast path when it is known the xarray is already filled. As an iopt_pages can be referred to in slices by many areas and accesses it uses interval trees to keep track of which storage tiers currently hold the PFNs. On a page-by-page basis any request for a PFN will be satisfied from one of the storage tiers and the PFN copied to target domain/array. Unfill actions are similar, on a page by page basis domains are unmapped, xarray entries freed or struct pages fully put back. Significant complexity is required to fully optimize all of these data motions. The implementation calculates the largest consecutive range of same-storage indexes and operates in blocks. The accumulation of PFNs always generates the largest contiguous PFN range possible to optimize and this gathering can cross storage tier boundaries. For cases like 'fill domains' care is taken to avoid duplicated work and PFNs are read once and pushed into all domains. The map/unmap interaction with the iommu_domain always works in contiguous PFN blocks. The implementation does not require or benefit from any split/merge optimization in the iommu_domain driver. This design suggests several possible improvements in the IOMMU API that would greatly help performance, particularly a way for the driver to map and read the pfns lists instead of working with one driver call per page to read, and one driver call per contiguous range to store. Link: https://lore.kernel.org/r/9-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.h | 74 +++ drivers/iommu/iommufd/pages.c | 843 +++++++++++++++++++++++++++ 2 files changed, 917 insertions(+) diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index b74bf01ffc52..a2b724175057 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -49,6 +49,15 @@ struct iopt_area { unsigned int num_accesses; }; +int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages); +void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages); + +int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain); +void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, + struct iommu_domain *domain); +void iopt_area_unmap_domain(struct iopt_area *area, + struct iommu_domain *domain); + static inline unsigned long iopt_area_index(struct iopt_area *area) { return area->pages_node.start; @@ -69,6 +78,39 @@ static inline unsigned long iopt_area_last_iova(struct iopt_area *area) return area->node.last; } +static inline size_t iopt_area_length(struct iopt_area *area) +{ + return (area->node.last - area->node.start) + 1; +} + +#define __make_iopt_iter(name) \ + static inline struct iopt_##name *iopt_##name##_iter_first( \ + struct io_pagetable *iopt, unsigned long start, \ + unsigned long last) \ + { \ + struct interval_tree_node *node; \ + \ + lockdep_assert_held(&iopt->iova_rwsem); \ + node = interval_tree_iter_first(&iopt->name##_itree, start, \ + last); \ + if (!node) \ + return NULL; \ + return container_of(node, struct iopt_##name, node); \ + } \ + static inline struct iopt_##name *iopt_##name##_iter_next( \ + struct iopt_##name *last_node, unsigned long start, \ + unsigned long last) \ + { \ + struct interval_tree_node *node; \ + \ + node = interval_tree_iter_next(&last_node->node, start, last); \ + if (!node) \ + return NULL; \ + return container_of(node, struct iopt_##name, node); \ + } + +__make_iopt_iter(area) + enum { IOPT_PAGES_ACCOUNT_NONE = 0, IOPT_PAGES_ACCOUNT_USER = 1, @@ -106,4 +148,36 @@ struct iopt_pages { struct rb_root_cached domains_itree; }; +struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, + bool writable); +void iopt_release_pages(struct kref *kref); +static inline void iopt_put_pages(struct iopt_pages *pages) +{ + kref_put(&pages->kref, iopt_release_pages); +} + +void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last, struct page **out_pages); +int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last, struct page **out_pages); +void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last); + +int iopt_area_add_access(struct iopt_area *area, unsigned long start, + unsigned long last, struct page **out_pages, + unsigned int flags); +void iopt_area_remove_access(struct iopt_area *area, unsigned long start, + unsigned long last); +int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, + void *data, unsigned long length, unsigned int flags); + +/* + * Each interval represents an active iopt_access_pages(), it acts as an + * interval lock that keeps the PFNs pinned and stored in the xarray. + */ +struct iopt_pages_access { + struct interval_tree_node node; + unsigned int users; +}; + #endif diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index ebca78e743c6..bafeee9d73e8 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -212,6 +212,18 @@ static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, WARN_ON(ret != size); } +static void iopt_area_unmap_domain_range(struct iopt_area *area, + struct iommu_domain *domain, + unsigned long start_index, + unsigned long last_index) +{ + unsigned long start_iova = iopt_area_index_to_iova(area, start_index); + + iommu_unmap_nofail(domain, start_iova, + iopt_area_index_to_iova_last(area, last_index) - + start_iova + 1); +} + static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, unsigned long index) { @@ -1064,3 +1076,834 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, } return 0; } + +struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, + bool writable) +{ + struct iopt_pages *pages; + + /* + * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP + * below from overflow + */ + if (length > SIZE_MAX - PAGE_SIZE || length == 0) + return ERR_PTR(-EINVAL); + + pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); + if (!pages) + return ERR_PTR(-ENOMEM); + + kref_init(&pages->kref); + xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT); + mutex_init(&pages->mutex); + pages->source_mm = current->mm; + mmgrab(pages->source_mm); + pages->uptr = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); + pages->npages = DIV_ROUND_UP(length + (uptr - pages->uptr), PAGE_SIZE); + pages->access_itree = RB_ROOT_CACHED; + pages->domains_itree = RB_ROOT_CACHED; + pages->writable = writable; + if (capable(CAP_IPC_LOCK)) + pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; + else + pages->account_mode = IOPT_PAGES_ACCOUNT_USER; + pages->source_task = current->group_leader; + get_task_struct(current->group_leader); + pages->source_user = get_uid(current_user()); + return pages; +} + +void iopt_release_pages(struct kref *kref) +{ + struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); + + WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root)); + WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); + WARN_ON(pages->npinned); + WARN_ON(!xa_empty(&pages->pinned_pfns)); + mmdrop(pages->source_mm); + mutex_destroy(&pages->mutex); + put_task_struct(pages->source_task); + free_uid(pages->source_user); + kfree(pages); +} + +static void +iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, + struct iopt_pages *pages, struct iommu_domain *domain, + unsigned long start_index, unsigned long last_index, + unsigned long *unmapped_end_index, + unsigned long real_last_index) +{ + while (start_index <= last_index) { + unsigned long batch_last_index; + + if (*unmapped_end_index <= last_index) { + unsigned long start = + max(start_index, *unmapped_end_index); + + batch_from_domain(batch, domain, area, start, + last_index); + batch_last_index = start + batch->total_pfns - 1; + } else { + batch_last_index = last_index; + } + + /* + * unmaps must always 'cut' at a place where the pfns are not + * contiguous to pair with the maps that always install + * contiguous pages. Thus, if we have to stop unpinning in the + * middle of the domains we need to keep reading pfns until we + * find a cut point to do the unmap. The pfns we read are + * carried over and either skipped or integrated into the next + * batch. + */ + if (batch_last_index == last_index && + last_index != real_last_index) + batch_from_domain_continue(batch, domain, area, + last_index + 1, + real_last_index); + + if (*unmapped_end_index <= batch_last_index) { + iopt_area_unmap_domain_range( + area, domain, *unmapped_end_index, + start_index + batch->total_pfns - 1); + *unmapped_end_index = start_index + batch->total_pfns; + } + + /* unpin must follow unmap */ + batch_unpin(batch, pages, 0, + batch_last_index - start_index + 1); + start_index = batch_last_index + 1; + + batch_clear_carry(batch, + *unmapped_end_index - batch_last_index - 1); + } +} + +static void __iopt_area_unfill_domain(struct iopt_area *area, + struct iopt_pages *pages, + struct iommu_domain *domain, + unsigned long last_index) +{ + struct interval_tree_double_span_iter span; + unsigned long start_index = iopt_area_index(area); + unsigned long unmapped_end_index = start_index; + u64 backup[BATCH_BACKUP_SIZE]; + struct pfn_batch batch; + + lockdep_assert_held(&pages->mutex); + + /* + * For security we must not unpin something that is still DMA mapped, + * so this must unmap any IOVA before we go ahead and unpin the pages. + * This creates a complexity where we need to skip over unpinning pages + * held in the xarray, but continue to unmap from the domain. + * + * The domain unmap cannot stop in the middle of a contiguous range of + * PFNs. To solve this problem the unpinning step will read ahead to the + * end of any contiguous span, unmap that whole span, and then only + * unpin the leading part that does not have any accesses. The residual + * PFNs that were unmapped but not unpinned are called a "carry" in the + * batch as they are moved to the front of the PFN list and continue on + * to the next iteration(s). + */ + batch_init_backup(&batch, last_index + 1, backup, sizeof(backup)); + interval_tree_for_each_double_span(&span, &pages->domains_itree, + &pages->access_itree, start_index, + last_index) { + if (span.is_used) { + batch_skip_carry(&batch, + span.last_used - span.start_used + 1); + continue; + } + iopt_area_unpin_domain(&batch, area, pages, domain, + span.start_hole, span.last_hole, + &unmapped_end_index, last_index); + } + /* + * If the range ends in a access then we do the residual unmap without + * any unpins. + */ + if (unmapped_end_index != last_index + 1) + iopt_area_unmap_domain_range(area, domain, unmapped_end_index, + last_index); + WARN_ON(batch.total_pfns); + batch_destroy(&batch, backup); + update_unpinned(pages); +} + +static void iopt_area_unfill_partial_domain(struct iopt_area *area, + struct iopt_pages *pages, + struct iommu_domain *domain, + unsigned long end_index) +{ + if (end_index != iopt_area_index(area)) + __iopt_area_unfill_domain(area, pages, domain, end_index - 1); +} + +/** + * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain + * @area: The IOVA range to unmap + * @domain: The domain to unmap + * + * The caller must know that unpinning is not required, usually because there + * are other domains in the iopt. + */ +void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) +{ + iommu_unmap_nofail(domain, iopt_area_iova(area), + iopt_area_length(area)); +} + +/** + * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain + * @area: IOVA area to use + * @pages: page supplier for the area (area->pages is NULL) + * @domain: Domain to unmap from + * + * The domain should be removed from the domains_itree before calling. The + * domain will always be unmapped, but the PFNs may not be unpinned if there are + * still accesses. + */ +void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, + struct iommu_domain *domain) +{ + __iopt_area_unfill_domain(area, pages, domain, + iopt_area_last_index(area)); +} + +/** + * iopt_area_fill_domain() - Map PFNs from the area into a domain + * @area: IOVA area to use + * @domain: Domain to load PFNs into + * + * Read the pfns from the area's underlying iopt_pages and map them into the + * given domain. Called when attaching a new domain to an io_pagetable. + */ +int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) +{ + unsigned long done_end_index; + struct pfn_reader pfns; + int rc; + + lockdep_assert_held(&area->pages->mutex); + + rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), + iopt_area_last_index(area)); + if (rc) + return rc; + + while (!pfn_reader_done(&pfns)) { + done_end_index = pfns.batch_start_index; + rc = batch_to_domain(&pfns.batch, domain, area, + pfns.batch_start_index); + if (rc) + goto out_unmap; + done_end_index = pfns.batch_end_index; + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_unmap; + } + + rc = pfn_reader_update_pinned(&pfns); + if (rc) + goto out_unmap; + goto out_destroy; + +out_unmap: + pfn_reader_release_pins(&pfns); + iopt_area_unfill_partial_domain(area, area->pages, domain, + done_end_index); +out_destroy: + pfn_reader_destroy(&pfns); + return rc; +} + +/** + * iopt_area_fill_domains() - Install PFNs into the area's domains + * @area: The area to act on + * @pages: The pages associated with the area (area->pages is NULL) + * + * Called during area creation. The area is freshly created and not inserted in + * the domains_itree yet. PFNs are read and loaded into every domain held in the + * area's io_pagetable and the area is installed in the domains_itree. + * + * On failure all domains are left unchanged. + */ +int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) +{ + unsigned long done_first_end_index; + unsigned long done_all_end_index; + struct iommu_domain *domain; + unsigned long unmap_index; + struct pfn_reader pfns; + unsigned long index; + int rc; + + lockdep_assert_held(&area->iopt->domains_rwsem); + + if (xa_empty(&area->iopt->domains)) + return 0; + + mutex_lock(&pages->mutex); + rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), + iopt_area_last_index(area)); + if (rc) + goto out_unlock; + + while (!pfn_reader_done(&pfns)) { + done_first_end_index = pfns.batch_end_index; + done_all_end_index = pfns.batch_start_index; + xa_for_each(&area->iopt->domains, index, domain) { + rc = batch_to_domain(&pfns.batch, domain, area, + pfns.batch_start_index); + if (rc) + goto out_unmap; + } + done_all_end_index = done_first_end_index; + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_unmap; + } + rc = pfn_reader_update_pinned(&pfns); + if (rc) + goto out_unmap; + + area->storage_domain = xa_load(&area->iopt->domains, 0); + interval_tree_insert(&area->pages_node, &pages->domains_itree); + goto out_destroy; + +out_unmap: + pfn_reader_release_pins(&pfns); + xa_for_each(&area->iopt->domains, unmap_index, domain) { + unsigned long end_index; + + if (unmap_index < index) + end_index = done_first_end_index; + else + end_index = done_all_end_index; + + /* + * The area is not yet part of the domains_itree so we have to + * manage the unpinning specially. The last domain does the + * unpin, every other domain is just unmapped. + */ + if (unmap_index != area->iopt->next_domain_id - 1) { + if (end_index != iopt_area_index(area)) + iopt_area_unmap_domain_range( + area, domain, iopt_area_index(area), + end_index - 1); + } else { + iopt_area_unfill_partial_domain(area, pages, domain, + end_index); + } + } +out_destroy: + pfn_reader_destroy(&pfns); +out_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/** + * iopt_area_unfill_domains() - unmap PFNs from the area's domains + * @area: The area to act on + * @pages: The pages associated with the area (area->pages is NULL) + * + * Called during area destruction. This unmaps the iova's covered by all the + * area's domains and releases the PFNs. + */ +void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) +{ + struct io_pagetable *iopt = area->iopt; + struct iommu_domain *domain; + unsigned long index; + + lockdep_assert_held(&iopt->domains_rwsem); + + mutex_lock(&pages->mutex); + if (!area->storage_domain) + goto out_unlock; + + xa_for_each(&iopt->domains, index, domain) + if (domain != area->storage_domain) + iopt_area_unmap_domain_range( + area, domain, iopt_area_index(area), + iopt_area_last_index(area)); + + interval_tree_remove(&area->pages_node, &pages->domains_itree); + iopt_area_unfill_domain(area, pages, area->storage_domain); + area->storage_domain = NULL; +out_unlock: + mutex_unlock(&pages->mutex); +} + +static void iopt_pages_unpin_xarray(struct pfn_batch *batch, + struct iopt_pages *pages, + unsigned long start_index, + unsigned long end_index) +{ + while (start_index <= end_index) { + batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index, + end_index); + batch_unpin(batch, pages, 0, batch->total_pfns); + start_index += batch->total_pfns; + batch_clear(batch); + } +} + +/** + * iopt_pages_unfill_xarray() - Update the xarry after removing an access + * @pages: The pages to act on + * @start_index: Starting PFN index + * @last_index: Last PFN index + * + * Called when an iopt_pages_access is removed, removes pages from the itree. + * The access should already be removed from the access_itree. + */ +void iopt_pages_unfill_xarray(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index) +{ + struct interval_tree_double_span_iter span; + u64 backup[BATCH_BACKUP_SIZE]; + struct pfn_batch batch; + bool batch_inited = false; + + lockdep_assert_held(&pages->mutex); + + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index) { + if (!span.is_used) { + if (!batch_inited) { + batch_init_backup(&batch, + last_index - start_index + 1, + backup, sizeof(backup)); + batch_inited = true; + } + iopt_pages_unpin_xarray(&batch, pages, span.start_hole, + span.last_hole); + } else if (span.is_used == 2) { + /* Covered by a domain */ + clear_xarray(&pages->pinned_pfns, span.start_used, + span.last_used); + } + /* Otherwise covered by an existing access */ + } + if (batch_inited) + batch_destroy(&batch, backup); + update_unpinned(pages); +} + +/** + * iopt_pages_fill_from_xarray() - Fast path for reading PFNs + * @pages: The pages to act on + * @start_index: The first page index in the range + * @last_index: The last page index in the range + * @out_pages: The output array to return the pages + * + * This can be called if the caller is holding a refcount on an + * iopt_pages_access that is known to have already been filled. It quickly reads + * the pages directly from the xarray. + * + * This is part of the SW iommu interface to read pages for in-kernel use. + */ +void iopt_pages_fill_from_xarray(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + XA_STATE(xas, &pages->pinned_pfns, start_index); + void *entry; + + rcu_read_lock(); + while (start_index <= last_index) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + *(out_pages++) = pfn_to_page(xa_to_value(entry)); + start_index++; + } + rcu_read_unlock(); +} + +static int iopt_pages_fill_from_domain(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + while (start_index != last_index + 1) { + unsigned long domain_last; + struct iopt_area *area; + + area = iopt_pages_find_domain_area(pages, start_index); + if (WARN_ON(!area)) + return -EINVAL; + + domain_last = min(iopt_area_last_index(area), last_index); + out_pages = raw_pages_from_domain(area->storage_domain, area, + start_index, domain_last, + out_pages); + start_index = domain_last + 1; + } + return 0; +} + +static int iopt_pages_fill_from_mm(struct iopt_pages *pages, + struct pfn_reader_user *user, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + unsigned long cur_index = start_index; + int rc; + + while (cur_index != last_index + 1) { + user->upages = out_pages + (cur_index - start_index); + rc = pfn_reader_user_pin(user, pages, cur_index, last_index); + if (rc) + goto out_unpin; + cur_index = user->upages_end; + } + return 0; + +out_unpin: + if (start_index != cur_index) + iopt_pages_err_unpin(pages, start_index, cur_index - 1, + out_pages); + return rc; +} + +/** + * iopt_pages_fill_xarray() - Read PFNs + * @pages: The pages to act on + * @start_index: The first page index in the range + * @last_index: The last page index in the range + * @out_pages: The output array to return the pages, may be NULL + * + * This populates the xarray and returns the pages in out_pages. As the slow + * path this is able to copy pages from other storage tiers into the xarray. + * + * On failure the xarray is left unchanged. + * + * This is part of the SW iommu interface to read pages for in-kernel use. + */ +int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index, + unsigned long last_index, struct page **out_pages) +{ + struct interval_tree_double_span_iter span; + unsigned long xa_end = start_index; + struct pfn_reader_user user; + int rc; + + lockdep_assert_held(&pages->mutex); + + pfn_reader_user_init(&user, pages); + user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages); + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index) { + struct page **cur_pages; + + if (span.is_used == 1) { + cur_pages = out_pages + (span.start_used - start_index); + iopt_pages_fill_from_xarray(pages, span.start_used, + span.last_used, cur_pages); + continue; + } + + if (span.is_used == 2) { + cur_pages = out_pages + (span.start_used - start_index); + iopt_pages_fill_from_domain(pages, span.start_used, + span.last_used, cur_pages); + rc = pages_to_xarray(&pages->pinned_pfns, + span.start_used, span.last_used, + cur_pages); + if (rc) + goto out_clean_xa; + xa_end = span.last_used + 1; + continue; + } + + /* hole */ + cur_pages = out_pages + (span.start_hole - start_index); + rc = iopt_pages_fill_from_mm(pages, &user, span.start_hole, + span.last_hole, cur_pages); + if (rc) + goto out_clean_xa; + rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole, + span.last_hole, cur_pages); + if (rc) { + iopt_pages_err_unpin(pages, span.start_hole, + span.last_hole, cur_pages); + goto out_clean_xa; + } + xa_end = span.last_hole + 1; + } + rc = pfn_reader_user_update_pinned(&user, pages); + if (rc) + goto out_clean_xa; + user.upages = NULL; + pfn_reader_user_destroy(&user, pages); + return 0; + +out_clean_xa: + if (start_index != xa_end) + iopt_pages_unfill_xarray(pages, start_index, xa_end - 1); + user.upages = NULL; + pfn_reader_user_destroy(&user, pages); + return rc; +} + +/* + * This uses the pfn_reader instead of taking a shortcut by using the mm. It can + * do every scenario and is fully consistent with what an iommu_domain would + * see. + */ +static int iopt_pages_rw_slow(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, unsigned long offset, + void *data, unsigned long length, + unsigned int flags) +{ + struct pfn_reader pfns; + int rc; + + mutex_lock(&pages->mutex); + + rc = pfn_reader_first(&pfns, pages, start_index, last_index); + if (rc) + goto out_unlock; + + while (!pfn_reader_done(&pfns)) { + unsigned long done; + + done = batch_rw(&pfns.batch, data, offset, length, flags); + data += done; + length -= done; + offset = 0; + pfn_reader_unpin(&pfns); + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_destroy; + } + if (WARN_ON(length != 0)) + rc = -EINVAL; +out_destroy: + pfn_reader_destroy(&pfns); +out_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/* + * A medium speed path that still allows DMA inconsistencies, but doesn't do any + * memory allocations or interval tree searches. + */ +static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, + unsigned long offset, void *data, + unsigned long length, unsigned int flags) +{ + struct page *page = NULL; + int rc; + + if (!mmget_not_zero(pages->source_mm)) + return iopt_pages_rw_slow(pages, index, index, offset, data, + length, flags); + + mmap_read_lock(pages->source_mm); + rc = pin_user_pages_remote( + pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), + 1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page, + NULL, NULL); + mmap_read_unlock(pages->source_mm); + if (rc != 1) { + if (WARN_ON(rc >= 0)) + rc = -EINVAL; + goto out_mmput; + } + copy_data_page(page, data, offset, length, flags); + unpin_user_page(page); + rc = 0; + +out_mmput: + mmput(pages->source_mm); + return rc; +} + +/** + * iopt_pages_rw_access - Copy to/from a linear slice of the pages + * @pages: pages to act on + * @start_byte: First byte of pages to copy to/from + * @data: Kernel buffer to get/put the data + * @length: Number of bytes to copy + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * This will find each page in the range, kmap it and then memcpy to/from + * the given kernel buffer. + */ +int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, + void *data, unsigned long length, unsigned int flags) +{ + unsigned long start_index = start_byte / PAGE_SIZE; + unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE; + bool change_mm = current->mm != pages->source_mm; + int rc = 0; + + if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) + return -EPERM; + + if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { + if (start_index == last_index) + return iopt_pages_rw_page(pages, start_index, + start_byte % PAGE_SIZE, data, + length, flags); + return iopt_pages_rw_slow(pages, start_index, last_index, + start_byte % PAGE_SIZE, data, length, + flags); + } + + /* + * Try to copy using copy_to_user(). We do this as a fast path and + * ignore any pinning inconsistencies, unlike a real DMA path. + */ + if (change_mm) { + if (!mmget_not_zero(pages->source_mm)) + return iopt_pages_rw_slow(pages, start_index, + last_index, + start_byte % PAGE_SIZE, data, + length, flags); + kthread_use_mm(pages->source_mm); + } + + if (flags & IOMMUFD_ACCESS_RW_WRITE) { + if (copy_to_user(pages->uptr + start_byte, data, length)) + rc = -EFAULT; + } else { + if (copy_from_user(data, pages->uptr + start_byte, length)) + rc = -EFAULT; + } + + if (change_mm) { + kthread_unuse_mm(pages->source_mm); + mmput(pages->source_mm); + } + + return rc; +} + +static struct iopt_pages_access * +iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, + unsigned long last) +{ + struct interval_tree_node *node; + + lockdep_assert_held(&pages->mutex); + + /* There can be overlapping ranges in this interval tree */ + for (node = interval_tree_iter_first(&pages->access_itree, index, last); + node; node = interval_tree_iter_next(node, index, last)) + if (node->start == index && node->last == last) + return container_of(node, struct iopt_pages_access, + node); + return NULL; +} + +/** + * iopt_area_add_access() - Record an in-knerel access for PFNs + * @area: The source of PFNs + * @start_index: First page index + * @last_index: Inclusive last page index + * @out_pages: Output list of struct page's representing the PFNs + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * Record that an in-kernel access will be accessing the pages, ensure they are + * pinned, and return the PFNs as a simple list of 'struct page *'. + * + * This should be undone through a matching call to iopt_area_remove_access() + */ +int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, + unsigned long last_index, struct page **out_pages, + unsigned int flags) +{ + struct iopt_pages *pages = area->pages; + struct iopt_pages_access *access; + int rc; + + if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) + return -EPERM; + + mutex_lock(&pages->mutex); + access = iopt_pages_get_exact_access(pages, start_index, last_index); + if (access) { + area->num_accesses++; + access->users++; + iopt_pages_fill_from_xarray(pages, start_index, last_index, + out_pages); + mutex_unlock(&pages->mutex); + return 0; + } + + access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT); + if (!access) { + rc = -ENOMEM; + goto err_unlock; + } + + rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages); + if (rc) + goto err_free; + + access->node.start = start_index; + access->node.last = last_index; + access->users = 1; + area->num_accesses++; + interval_tree_insert(&access->node, &pages->access_itree); + mutex_unlock(&pages->mutex); + return 0; + +err_free: + kfree(access); +err_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/** + * iopt_area_remove_access() - Release an in-kernel access for PFNs + * @area: The source of PFNs + * @start_index: First page index + * @last_index: Inclusive last page index + * + * Undo iopt_area_add_access() and unpin the pages if necessary. The caller + * must stop using the PFNs before calling this. + */ +void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, + unsigned long last_index) +{ + struct iopt_pages *pages = area->pages; + struct iopt_pages_access *access; + + mutex_lock(&pages->mutex); + access = iopt_pages_get_exact_access(pages, start_index, last_index); + if (WARN_ON(!access)) + goto out_unlock; + + WARN_ON(area->num_accesses == 0 || access->users == 0); + area->num_accesses--; + access->users--; + if (access->users) + goto out_unlock; + + interval_tree_remove(&access->node, &pages->access_itree); + iopt_pages_unfill_xarray(pages, start_index, last_index); + kfree(access); +out_unlock: + mutex_unlock(&pages->mutex); +} From 51fe6141f0f64ae0bbc096a41a07572273e8c0ef Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:33 -0400 Subject: [PATCH 3129/4122] iommufd: Data structure to provide IOVA to PFN mapping This is the remainder of the IOAS data structure. Provide an object called an io_pagetable that is composed of iopt_areas pointing at iopt_pages, along with a list of iommu_domains that mirror the IOVA to PFN map. At the top this is a simple interval tree of iopt_areas indicating the map of IOVA to iopt_pages. An xarray keeps track of a list of domains. Based on the attached domains there is a minimum alignment for areas (which may be smaller than PAGE_SIZE), an interval tree of reserved IOVA that can't be mapped and an IOVA of allowed IOVA that can always be mappable. The concept of an 'access' refers to something like a VFIO mdev that is accessing the IOVA and using a 'struct page *' for CPU based access. Externally an API is provided that matches the requirements of the IOCTL interface for map/unmap and domain attachment. The API provides a 'copy' primitive to establish a new IOVA map in a different IOAS from an existing mapping by re-using the iopt_pages. This is the basic mechanism to provide single pinning. This is designed to support a pre-registration flow where userspace would setup an dummy IOAS with no domains, map in memory and then establish an access to pin all PFNs into the xarray. Copy can then be used to create new IOVA mappings in a different IOAS, with iommu_domains attached. Upon copy the PFNs will be read out of the xarray and mapped into the iommu_domains, avoiding any pin_user_pages() overheads. Link: https://lore.kernel.org/r/10-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .clang-format | 1 + drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/io_pagetable.c | 1186 +++++++++++++++++++++++ drivers/iommu/iommufd/io_pagetable.h | 55 ++ drivers/iommu/iommufd/iommufd_private.h | 52 + 5 files changed, 1295 insertions(+) create mode 100644 drivers/iommu/iommufd/io_pagetable.c diff --git a/.clang-format b/.clang-format index 501241f89776..78aba4a10b1b 100644 --- a/.clang-format +++ b/.clang-format @@ -444,6 +444,7 @@ ForEachMacros: - 'interval_tree_for_each_span' - 'intlist__for_each_entry' - 'intlist__for_each_entry_safe' + - 'iopt_for_each_contig_area' - 'kcore_copy__for_each_phdr' - 'key_for_each' - 'key_for_each_safe' diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 05a0e91e30af..b66a8c47ff55 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ + io_pagetable.o \ main.o \ pages.o diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c new file mode 100644 index 000000000000..756d347948f0 --- /dev/null +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The + * PFNs can be placed into an iommu_domain, or returned to the caller as a page + * list for access by an in-kernel user. + * + * The datastructure uses the iopt_pages to optimize the storage of the PFNs + * between the domains and xarray. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "double_span.h" + +struct iopt_pages_list { + struct iopt_pages *pages; + struct iopt_area *area; + struct list_head next; + unsigned long start_byte; + unsigned long length; +}; + +struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, + struct io_pagetable *iopt, + unsigned long iova, + unsigned long last_iova) +{ + lockdep_assert_held(&iopt->iova_rwsem); + + iter->cur_iova = iova; + iter->last_iova = last_iova; + iter->area = iopt_area_iter_first(iopt, iova, iova); + if (!iter->area) + return NULL; + if (!iter->area->pages) { + iter->area = NULL; + return NULL; + } + return iter->area; +} + +struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) +{ + unsigned long last_iova; + + if (!iter->area) + return NULL; + last_iova = iopt_area_last_iova(iter->area); + if (iter->last_iova <= last_iova) + return NULL; + + iter->cur_iova = last_iova + 1; + iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, + iter->last_iova); + if (!iter->area) + return NULL; + if (iter->cur_iova != iopt_area_iova(iter->area) || + !iter->area->pages) { + iter->area = NULL; + return NULL; + } + return iter->area; +} + +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_used || span->last_hole - span->start_hole < length - 1) + return false; + + span->start_hole = ALIGN(span->start_hole, iova_alignment) | + page_offset; + if (span->start_hole > span->last_hole || + span->last_hole - span->start_hole < length - 1) + return false; + return true; +} + +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole || span->last_used - span->start_used < length - 1) + return false; + + span->start_used = ALIGN(span->start_used, iova_alignment) | + page_offset; + if (span->start_used > span->last_used || + span->last_used - span->start_used < length - 1) + return false; + return true; +} + +/* + * Automatically find a block of IOVA that is not being used and not reserved. + * Does not return a 0 IOVA even if it is valid. + */ +static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, + unsigned long uptr, unsigned long length) +{ + unsigned long page_offset = uptr % PAGE_SIZE; + struct interval_tree_double_span_iter used_span; + struct interval_tree_span_iter allowed_span; + unsigned long iova_alignment; + + lockdep_assert_held(&iopt->iova_rwsem); + + /* Protect roundup_pow-of_two() from overflow */ + if (length == 0 || length >= ULONG_MAX / 2) + return -EOVERFLOW; + + /* + * Keep alignment present in the uptr when building the IOVA, this + * increases the chance we can map a THP. + */ + if (!uptr) + iova_alignment = roundup_pow_of_two(length); + else + iova_alignment = min_t(unsigned long, + roundup_pow_of_two(length), + 1UL << __ffs64(uptr)); + + if (iova_alignment < iopt->iova_alignment) + return -EINVAL; + + interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, + PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { + if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { + allowed_span.start_used = PAGE_SIZE; + allowed_span.last_used = ULONG_MAX - PAGE_SIZE; + allowed_span.is_hole = false; + } + + if (!__alloc_iova_check_used(&allowed_span, length, + iova_alignment, page_offset)) + continue; + + interval_tree_for_each_double_span( + &used_span, &iopt->reserved_itree, &iopt->area_itree, + allowed_span.start_used, allowed_span.last_used) { + if (!__alloc_iova_check_hole(&used_span, length, + iova_alignment, + page_offset)) + continue; + + *iova = used_span.start_hole; + return 0; + } + } + return -ENOSPC; +} + +static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + unsigned long last; + + lockdep_assert_held(&iopt->iova_rwsem); + + if ((iova & (iopt->iova_alignment - 1))) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, &last)) + return -EOVERFLOW; + + /* No reserved IOVA intersects the range */ + if (iopt_reserved_iter_first(iopt, iova, last)) + return -EINVAL; + + /* Check that there is not already a mapping in the range */ + if (iopt_area_iter_first(iopt, iova, last)) + return -EEXIST; + return 0; +} + +/* + * The area takes a slice of the pages from start_bytes to start_byte + length + */ +static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, + struct iopt_pages *pages, unsigned long iova, + unsigned long start_byte, unsigned long length, + int iommu_prot) +{ + lockdep_assert_held_write(&iopt->iova_rwsem); + + if ((iommu_prot & IOMMU_WRITE) && !pages->writable) + return -EPERM; + + area->iommu_prot = iommu_prot; + area->page_offset = start_byte % PAGE_SIZE; + if (area->page_offset & (iopt->iova_alignment - 1)) + return -EINVAL; + + area->node.start = iova; + if (check_add_overflow(iova, length - 1, &area->node.last)) + return -EOVERFLOW; + + area->pages_node.start = start_byte / PAGE_SIZE; + if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) + return -EOVERFLOW; + area->pages_node.last = area->pages_node.last / PAGE_SIZE; + if (WARN_ON(area->pages_node.last >= pages->npages)) + return -EOVERFLOW; + + /* + * The area is inserted with a NULL pages indicating it is not fully + * initialized yet. + */ + area->iopt = iopt; + interval_tree_insert(&area->node, &iopt->area_itree); + return 0; +} + +static int iopt_alloc_area_pages(struct io_pagetable *iopt, + struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags) +{ + struct iopt_pages_list *elm; + unsigned long iova; + int rc = 0; + + list_for_each_entry(elm, pages_list, next) { + elm->area = kzalloc(sizeof(*elm->area), GFP_KERNEL_ACCOUNT); + if (!elm->area) + return -ENOMEM; + } + + down_write(&iopt->iova_rwsem); + if ((length & (iopt->iova_alignment - 1)) || !length) { + rc = -EINVAL; + goto out_unlock; + } + + if (flags & IOPT_ALLOC_IOVA) { + /* Use the first entry to guess the ideal IOVA alignment */ + elm = list_first_entry(pages_list, struct iopt_pages_list, + next); + rc = iopt_alloc_iova( + iopt, dst_iova, + (uintptr_t)elm->pages->uptr + elm->start_byte, length); + if (rc) + goto out_unlock; + } else { + rc = iopt_check_iova(iopt, *dst_iova, length); + if (rc) + goto out_unlock; + } + + /* + * Areas are created with a NULL pages so that the IOVA space is + * reserved and we can unlock the iova_rwsem. + */ + iova = *dst_iova; + list_for_each_entry(elm, pages_list, next) { + rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, + elm->start_byte, elm->length, iommu_prot); + if (rc) + goto out_unlock; + iova += elm->length; + } + +out_unlock: + up_write(&iopt->iova_rwsem); + return rc; +} + +static void iopt_abort_area(struct iopt_area *area) +{ + if (area->iopt) { + down_write(&area->iopt->iova_rwsem); + interval_tree_remove(&area->node, &area->iopt->area_itree); + up_write(&area->iopt->iova_rwsem); + } + kfree(area); +} + +void iopt_free_pages_list(struct list_head *pages_list) +{ + struct iopt_pages_list *elm; + + while ((elm = list_first_entry_or_null(pages_list, + struct iopt_pages_list, next))) { + if (elm->area) + iopt_abort_area(elm->area); + if (elm->pages) + iopt_put_pages(elm->pages); + list_del(&elm->next); + kfree(elm); + } +} + +static int iopt_fill_domains_pages(struct list_head *pages_list) +{ + struct iopt_pages_list *undo_elm; + struct iopt_pages_list *elm; + int rc; + + list_for_each_entry(elm, pages_list, next) { + rc = iopt_area_fill_domains(elm->area, elm->pages); + if (rc) + goto err_undo; + } + return 0; + +err_undo: + list_for_each_entry(undo_elm, pages_list, next) { + if (undo_elm == elm) + break; + iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); + } + return rc; +} + +int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags) +{ + struct iopt_pages_list *elm; + int rc; + + rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, + iommu_prot, flags); + if (rc) + return rc; + + down_read(&iopt->domains_rwsem); + rc = iopt_fill_domains_pages(pages_list); + if (rc) + goto out_unlock_domains; + + down_write(&iopt->iova_rwsem); + list_for_each_entry(elm, pages_list, next) { + /* + * area->pages must be set inside the domains_rwsem to ensure + * any newly added domains will get filled. Moves the reference + * in from the list. + */ + elm->area->pages = elm->pages; + elm->pages = NULL; + elm->area = NULL; + } + up_write(&iopt->iova_rwsem); +out_unlock_domains: + up_read(&iopt->domains_rwsem); + return rc; +} + +/** + * iopt_map_user_pages() - Map a user VA to an iova in the io page table + * @ictx: iommufd_ctx the iopt is part of + * @iopt: io_pagetable to act on + * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains + * the chosen iova on output. Otherwise is the iova to map to on input + * @uptr: User VA to map + * @length: Number of bytes to map + * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping + * @flags: IOPT_ALLOC_IOVA or zero + * + * iova, uptr, and length must be aligned to iova_alignment. For domain backed + * page tables this will pin the pages and load them into the domain at iova. + * For non-domain page tables this will only setup a lazy reference and the + * caller must use iopt_access_pages() to touch them. + * + * iopt_unmap_iova() must be called to undo this before the io_pagetable can be + * destroyed. + */ +int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, void __user *uptr, + unsigned long length, int iommu_prot, + unsigned int flags) +{ + struct iopt_pages_list elm = {}; + LIST_HEAD(pages_list); + int rc; + + elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); + if (IS_ERR(elm.pages)) + return PTR_ERR(elm.pages); + if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && + elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) + elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; + elm.start_byte = uptr - elm.pages->uptr; + elm.length = length; + list_add(&elm.next, &pages_list); + + rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); + if (rc) { + if (elm.area) + iopt_abort_area(elm.area); + if (elm.pages) + iopt_put_pages(elm.pages); + return rc; + } + return 0; +} + +int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, struct list_head *pages_list) +{ + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + int rc; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + struct iopt_pages_list *elm; + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + + elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); + if (!elm) { + rc = -ENOMEM; + goto err_free; + } + elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); + elm->pages = area->pages; + elm->length = (last - iter.cur_iova) + 1; + kref_get(&elm->pages->kref); + list_add_tail(&elm->next, pages_list); + } + if (!iopt_area_contig_done(&iter)) { + rc = -ENOENT; + goto err_free; + } + up_read(&iopt->iova_rwsem); + return 0; +err_free: + up_read(&iopt->iova_rwsem); + iopt_free_pages_list(pages_list); + return rc; +} + +static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, + unsigned long last, unsigned long *unmapped) +{ + struct iopt_area *area; + unsigned long unmapped_bytes = 0; + int rc = -ENOENT; + + /* + * The domains_rwsem must be held in read mode any time any area->pages + * is NULL. This prevents domain attach/detatch from running + * concurrently with cleaning up the area. + */ + down_read(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + while ((area = iopt_area_iter_first(iopt, start, last))) { + unsigned long area_last = iopt_area_last_iova(area); + unsigned long area_first = iopt_area_iova(area); + struct iopt_pages *pages; + + /* Userspace should not race map/unmap's of the same area */ + if (!area->pages) { + rc = -EBUSY; + goto out_unlock_iova; + } + + if (area_first < start || area_last > last) { + rc = -ENOENT; + goto out_unlock_iova; + } + + /* + * num_accesses writers must hold the iova_rwsem too, so we can + * safely read it under the write side of the iovam_rwsem + * without the pages->mutex. + */ + if (area->num_accesses) { + start = area_first; + area->prevent_access = true; + up_write(&iopt->iova_rwsem); + up_read(&iopt->domains_rwsem); + /* Later patch calls back to drivers to unmap */ + return -EBUSY; + } + + pages = area->pages; + area->pages = NULL; + up_write(&iopt->iova_rwsem); + + iopt_area_unfill_domains(area, pages); + iopt_abort_area(area); + iopt_put_pages(pages); + + unmapped_bytes += area_last - area_first + 1; + + down_write(&iopt->iova_rwsem); + } + if (unmapped_bytes) + rc = 0; + +out_unlock_iova: + up_write(&iopt->iova_rwsem); + up_read(&iopt->domains_rwsem); + if (unmapped) + *unmapped = unmapped_bytes; + return rc; +} + +/** + * iopt_unmap_iova() - Remove a range of iova + * @iopt: io_pagetable to act on + * @iova: Starting iova to unmap + * @length: Number of bytes to unmap + * @unmapped: Return number of bytes unmapped + * + * The requested range must be a superset of existing ranges. + * Splitting/truncating IOVA mappings is not allowed. + */ +int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, unsigned long *unmapped) +{ + unsigned long iova_last; + + if (!length) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, &iova_last)) + return -EOVERFLOW; + + return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); +} + +int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) +{ + int rc; + + rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); + /* If the IOVAs are empty then unmap all succeeds */ + if (rc == -ENOENT) + return 0; + return rc; +} + +/* The caller must always free all the nodes in the allowed_iova rb_root. */ +int iopt_set_allow_iova(struct io_pagetable *iopt, + struct rb_root_cached *allowed_iova) +{ + struct iopt_allowed *allowed; + + down_write(&iopt->iova_rwsem); + swap(*allowed_iova, iopt->allowed_itree); + + for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; + allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { + if (iopt_reserved_iter_first(iopt, allowed->node.start, + allowed->node.last)) { + swap(*allowed_iova, iopt->allowed_itree); + up_write(&iopt->iova_rwsem); + return -EADDRINUSE; + } + } + up_write(&iopt->iova_rwsem); + return 0; +} + +int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, + unsigned long last, void *owner) +{ + struct iopt_reserved *reserved; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + if (iopt_area_iter_first(iopt, start, last) || + iopt_allowed_iter_first(iopt, start, last)) + return -EADDRINUSE; + + reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); + if (!reserved) + return -ENOMEM; + reserved->node.start = start; + reserved->node.last = last; + reserved->owner = owner; + interval_tree_insert(&reserved->node, &iopt->reserved_itree); + return 0; +} + +static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) +{ + struct iopt_reserved *reserved, *next; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; + reserved = next) { + next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); + + if (reserved->owner == owner) { + interval_tree_remove(&reserved->node, + &iopt->reserved_itree); + kfree(reserved); + } + } +} + +void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) +{ + down_write(&iopt->iova_rwsem); + __iopt_remove_reserved_iova(iopt, owner); + up_write(&iopt->iova_rwsem); +} + +void iopt_init_table(struct io_pagetable *iopt) +{ + init_rwsem(&iopt->iova_rwsem); + init_rwsem(&iopt->domains_rwsem); + iopt->area_itree = RB_ROOT_CACHED; + iopt->allowed_itree = RB_ROOT_CACHED; + iopt->reserved_itree = RB_ROOT_CACHED; + xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); + xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); + + /* + * iopt's start as SW tables that can use the entire size_t IOVA space + * due to the use of size_t in the APIs. They have no alignment + * restriction. + */ + iopt->iova_alignment = 1; +} + +void iopt_destroy_table(struct io_pagetable *iopt) +{ + struct interval_tree_node *node; + + while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, + ULONG_MAX))) { + interval_tree_remove(node, &iopt->allowed_itree); + kfree(container_of(node, struct iopt_allowed, node)); + } + + WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); + WARN_ON(!xa_empty(&iopt->domains)); + WARN_ON(!xa_empty(&iopt->access_list)); + WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); +} + +/** + * iopt_unfill_domain() - Unfill a domain with PFNs + * @iopt: io_pagetable to act on + * @domain: domain to unfill + * + * This is used when removing a domain from the iopt. Every area in the iopt + * will be unmapped from the domain. The domain must already be removed from the + * domains xarray. + */ +static void iopt_unfill_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iopt_area *area; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held_write(&iopt->domains_rwsem); + + /* + * Some other domain is holding all the pfns still, rapidly unmap this + * domain. + */ + if (iopt->next_domain_id != 0) { + /* Pick an arbitrary remaining domain to act as storage */ + struct iommu_domain *storage_domain = + xa_load(&iopt->domains, 0); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + if (area->storage_domain == domain) + area->storage_domain = storage_domain; + mutex_unlock(&pages->mutex); + + iopt_area_unmap_domain(area, domain); + } + return; + } + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + interval_tree_remove(&area->pages_node, &pages->domains_itree); + WARN_ON(area->storage_domain != domain); + area->storage_domain = NULL; + iopt_area_unfill_domain(area, pages, domain); + mutex_unlock(&pages->mutex); + } +} + +/** + * iopt_fill_domain() - Fill a domain with PFNs + * @iopt: io_pagetable to act on + * @domain: domain to fill + * + * Fill the domain with PFNs from every area in the iopt. On failure the domain + * is left unchanged. + */ +static int iopt_fill_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iopt_area *end_area; + struct iopt_area *area; + int rc; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held_write(&iopt->domains_rwsem); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + rc = iopt_area_fill_domain(area, domain); + if (rc) { + mutex_unlock(&pages->mutex); + goto out_unfill; + } + if (!area->storage_domain) { + WARN_ON(iopt->next_domain_id != 0); + area->storage_domain = domain; + interval_tree_insert(&area->pages_node, + &pages->domains_itree); + } + mutex_unlock(&pages->mutex); + } + return 0; + +out_unfill: + end_area = area; + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (area == end_area) + break; + if (!pages) + continue; + mutex_lock(&pages->mutex); + if (iopt->next_domain_id == 0) { + interval_tree_remove(&area->pages_node, + &pages->domains_itree); + area->storage_domain = NULL; + } + iopt_area_unfill_domain(area, pages, domain); + mutex_unlock(&pages->mutex); + } + return rc; +} + +/* All existing area's conform to an increased page size */ +static int iopt_check_iova_alignment(struct io_pagetable *iopt, + unsigned long new_iova_alignment) +{ + unsigned long align_mask = new_iova_alignment - 1; + struct iopt_area *area; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held(&iopt->domains_rwsem); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) + if ((iopt_area_iova(area) & align_mask) || + (iopt_area_length(area) & align_mask) || + (area->page_offset & align_mask)) + return -EADDRINUSE; + return 0; +} + +int iopt_table_add_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + const struct iommu_domain_geometry *geometry = &domain->geometry; + struct iommu_domain *iter_domain; + unsigned int new_iova_alignment; + unsigned long index; + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + + xa_for_each(&iopt->domains, index, iter_domain) { + if (WARN_ON(iter_domain == domain)) { + rc = -EEXIST; + goto out_unlock; + } + } + + /* + * The io page size drives the iova_alignment. Internally the iopt_pages + * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE + * objects into the iommu_domain. + * + * A iommu_domain must always be able to accept PAGE_SIZE to be + * compatible as we can't guarantee higher contiguity. + */ + new_iova_alignment = max_t(unsigned long, + 1UL << __ffs(domain->pgsize_bitmap), + iopt->iova_alignment); + if (new_iova_alignment > PAGE_SIZE) { + rc = -EINVAL; + goto out_unlock; + } + if (new_iova_alignment != iopt->iova_alignment) { + rc = iopt_check_iova_alignment(iopt, new_iova_alignment); + if (rc) + goto out_unlock; + } + + /* No area exists that is outside the allowed domain aperture */ + if (geometry->aperture_start != 0) { + rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, + domain); + if (rc) + goto out_reserved; + } + if (geometry->aperture_end != ULONG_MAX) { + rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, + ULONG_MAX, domain); + if (rc) + goto out_reserved; + } + + rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); + if (rc) + goto out_reserved; + + rc = iopt_fill_domain(iopt, domain); + if (rc) + goto out_release; + + iopt->iova_alignment = new_iova_alignment; + xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); + iopt->next_domain_id++; + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return 0; +out_release: + xa_release(&iopt->domains, iopt->next_domain_id); +out_reserved: + __iopt_remove_reserved_iova(iopt, domain); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) +{ + unsigned long new_iova_alignment; + struct iommufd_access *access; + struct iommu_domain *domain; + unsigned long index; + + lockdep_assert_held_write(&iopt->iova_rwsem); + lockdep_assert_held(&iopt->domains_rwsem); + + /* See batch_iommu_map_small() */ + if (iopt->disable_large_pages) + new_iova_alignment = PAGE_SIZE; + else + new_iova_alignment = 1; + + xa_for_each(&iopt->domains, index, domain) + new_iova_alignment = max_t(unsigned long, + 1UL << __ffs(domain->pgsize_bitmap), + new_iova_alignment); + xa_for_each(&iopt->access_list, index, access) + new_iova_alignment = max_t(unsigned long, + access->iova_alignment, + new_iova_alignment); + + if (new_iova_alignment > iopt->iova_alignment) { + int rc; + + rc = iopt_check_iova_alignment(iopt, new_iova_alignment); + if (rc) + return rc; + } + iopt->iova_alignment = new_iova_alignment; + return 0; +} + +void iopt_table_remove_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iommu_domain *iter_domain = NULL; + unsigned long index; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + + xa_for_each(&iopt->domains, index, iter_domain) + if (iter_domain == domain) + break; + if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) + goto out_unlock; + + /* + * Compress the xarray to keep it linear by swapping the entry to erase + * with the tail entry and shrinking the tail. + */ + iopt->next_domain_id--; + iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); + if (index != iopt->next_domain_id) + xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); + + iopt_unfill_domain(iopt, domain); + __iopt_remove_reserved_iova(iopt, domain); + + WARN_ON(iopt_calculate_iova_alignment(iopt)); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +/** + * iopt_area_split - Split an area into two parts at iova + * @area: The area to split + * @iova: Becomes the last of a new area + * + * This splits an area into two. It is part of the VFIO compatibility to allow + * poking a hole in the mapping. The two areas continue to point at the same + * iopt_pages, just with different starting bytes. + */ +static int iopt_area_split(struct iopt_area *area, unsigned long iova) +{ + unsigned long alignment = area->iopt->iova_alignment; + unsigned long last_iova = iopt_area_last_iova(area); + unsigned long start_iova = iopt_area_iova(area); + unsigned long new_start = iova + 1; + struct io_pagetable *iopt = area->iopt; + struct iopt_pages *pages = area->pages; + struct iopt_area *lhs; + struct iopt_area *rhs; + int rc; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + if (iova == start_iova || iova == last_iova) + return 0; + + if (!pages || area->prevent_access) + return -EBUSY; + + if (new_start & (alignment - 1) || + iopt_area_start_byte(area, new_start) & (alignment - 1)) + return -EINVAL; + + lhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); + if (!lhs) + return -ENOMEM; + + rhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); + if (!rhs) { + rc = -ENOMEM; + goto err_free_lhs; + } + + mutex_lock(&pages->mutex); + /* + * Splitting is not permitted if an access exists, we don't track enough + * information to split existing accesses. + */ + if (area->num_accesses) { + rc = -EINVAL; + goto err_unlock; + } + + /* + * Splitting is not permitted if a domain could have been mapped with + * huge pages. + */ + if (area->storage_domain && !iopt->disable_large_pages) { + rc = -EINVAL; + goto err_unlock; + } + + interval_tree_remove(&area->node, &iopt->area_itree); + rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, + iopt_area_start_byte(area, start_iova), + (new_start - 1) - start_iova + 1, + area->iommu_prot); + if (WARN_ON(rc)) + goto err_insert; + + rc = iopt_insert_area(iopt, rhs, area->pages, new_start, + iopt_area_start_byte(area, new_start), + last_iova - new_start + 1, area->iommu_prot); + if (WARN_ON(rc)) + goto err_remove_lhs; + + lhs->storage_domain = area->storage_domain; + lhs->pages = area->pages; + rhs->storage_domain = area->storage_domain; + rhs->pages = area->pages; + kref_get(&rhs->pages->kref); + kfree(area); + mutex_unlock(&pages->mutex); + + /* + * No change to domains or accesses because the pages hasn't been + * changed + */ + return 0; + +err_remove_lhs: + interval_tree_remove(&lhs->node, &iopt->area_itree); +err_insert: + interval_tree_insert(&area->node, &iopt->area_itree); +err_unlock: + mutex_unlock(&pages->mutex); + kfree(rhs); +err_free_lhs: + kfree(lhs); + return rc; +} + +int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, + size_t num_iovas) +{ + int rc = 0; + int i; + + down_write(&iopt->iova_rwsem); + for (i = 0; i < num_iovas; i++) { + struct iopt_area *area; + + area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); + if (!area) + continue; + rc = iopt_area_split(area, iovas[i]); + if (rc) + break; + } + up_write(&iopt->iova_rwsem); + return rc; +} + +void iopt_enable_large_pages(struct io_pagetable *iopt) +{ + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + WRITE_ONCE(iopt->disable_large_pages, false); + rc = iopt_calculate_iova_alignment(iopt); + WARN_ON(rc); + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +int iopt_disable_large_pages(struct io_pagetable *iopt) +{ + int rc = 0; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + if (iopt->disable_large_pages) + goto out_unlock; + + /* Won't do it if domains already have pages mapped in them */ + if (!xa_empty(&iopt->domains) && + !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { + rc = -EINVAL; + goto out_unlock; + } + + WRITE_ONCE(iopt->disable_large_pages, true); + rc = iopt_calculate_iova_alignment(iopt); + if (rc) + WRITE_ONCE(iopt->disable_large_pages, false); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) +{ + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, + xa_limit_16b, GFP_KERNEL_ACCOUNT); + if (rc) + goto out_unlock; + + rc = iopt_calculate_iova_alignment(iopt); + if (rc) { + xa_erase(&iopt->access_list, access->iopt_access_list_id); + goto out_unlock; + } + +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +void iopt_remove_access(struct io_pagetable *iopt, + struct iommufd_access *access) +{ + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != + access); + WARN_ON(iopt_calculate_iova_alignment(iopt)); + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +/* Narrow the valid_iova_itree to include reserved ranges from a group. */ +int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, + struct device *device, + struct iommu_group *group, + phys_addr_t *sw_msi_start) +{ + struct iommu_resv_region *resv; + struct iommu_resv_region *tmp; + LIST_HEAD(group_resv_regions); + int rc; + + down_write(&iopt->iova_rwsem); + rc = iommu_get_group_resv_regions(group, &group_resv_regions); + if (rc) + goto out_unlock; + + list_for_each_entry(resv, &group_resv_regions, list) { + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + /* + * The presence of any 'real' MSI regions should take precedence + * over the software-managed one if the IOMMU driver happens to + * advertise both types. + */ + if (sw_msi_start && resv->type == IOMMU_RESV_MSI) { + *sw_msi_start = 0; + sw_msi_start = NULL; + } + if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) + *sw_msi_start = resv->start; + + rc = iopt_reserve_iova(iopt, resv->start, + resv->length - 1 + resv->start, device); + if (rc) + goto out_reserved; + } + rc = 0; + goto out_free_resv; + +out_reserved: + __iopt_remove_reserved_iova(iopt, device); +out_free_resv: + list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) + kfree(resv); +out_unlock: + up_write(&iopt->iova_rwsem); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index a2b724175057..2ee6942c3ef4 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -46,9 +46,19 @@ struct iopt_area { unsigned int page_offset; /* IOMMU_READ, IOMMU_WRITE, etc */ int iommu_prot; + bool prevent_access : 1; unsigned int num_accesses; }; +struct iopt_allowed { + struct interval_tree_node node; +}; + +struct iopt_reserved { + struct interval_tree_node node; + void *owner; +}; + int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages); void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages); @@ -83,6 +93,24 @@ static inline size_t iopt_area_length(struct iopt_area *area) return (area->node.last - area->node.start) + 1; } +/* + * Number of bytes from the start of the iopt_pages that the iova begins. + * iopt_area_start_byte() / PAGE_SIZE encodes the starting page index + * iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page + */ +static inline unsigned long iopt_area_start_byte(struct iopt_area *area, + unsigned long iova) +{ + return (iova - iopt_area_iova(area)) + area->page_offset + + iopt_area_index(area) * PAGE_SIZE; +} + +static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area, + unsigned long iova) +{ + return iopt_area_start_byte(area, iova) / PAGE_SIZE; +} + #define __make_iopt_iter(name) \ static inline struct iopt_##name *iopt_##name##_iter_first( \ struct io_pagetable *iopt, unsigned long start, \ @@ -110,6 +138,33 @@ static inline size_t iopt_area_length(struct iopt_area *area) } __make_iopt_iter(area) +__make_iopt_iter(allowed) +__make_iopt_iter(reserved) + +struct iopt_area_contig_iter { + unsigned long cur_iova; + unsigned long last_iova; + struct iopt_area *area; +}; +struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, + struct io_pagetable *iopt, + unsigned long iova, + unsigned long last_iova); +struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter); + +static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter) +{ + return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area); +} + +/* + * Iterate over a contiguous list of areas that span the iova,last_iova range. + * The caller must check iopt_area_contig_done() after the loop to see if + * contiguous areas existed. + */ +#define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova) \ + for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \ + area = iopt_area_contig_next(iter)) enum { IOPT_PAGES_ACCOUNT_NONE = 0, diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 169a30ff3bf0..f7ab6c6edafd 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -9,9 +9,14 @@ #include #include +struct iommu_domain; +struct iommu_group; + struct iommufd_ctx { struct file *file; struct xarray objects; + + u8 account_mode; }; /* @@ -27,6 +32,7 @@ struct iommufd_ctx { struct io_pagetable { struct rw_semaphore domains_rwsem; struct xarray domains; + struct xarray access_list; unsigned int next_domain_id; struct rw_semaphore iova_rwsem; @@ -36,8 +42,46 @@ struct io_pagetable { /* IOVA that cannot be allocated, struct iopt_reserved */ struct rb_root_cached reserved_itree; u8 disable_large_pages; + unsigned long iova_alignment; }; +void iopt_init_table(struct io_pagetable *iopt); +void iopt_destroy_table(struct io_pagetable *iopt); +int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, struct list_head *pages_list); +void iopt_free_pages_list(struct list_head *pages_list); +enum { + IOPT_ALLOC_IOVA = 1 << 0, +}; +int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, void __user *uptr, + unsigned long length, int iommu_prot, + unsigned int flags); +int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags); +int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, unsigned long *unmapped); +int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); + +int iopt_table_add_domain(struct io_pagetable *iopt, + struct iommu_domain *domain); +void iopt_table_remove_domain(struct io_pagetable *iopt, + struct iommu_domain *domain); +int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, + struct device *device, + struct iommu_group *group, + phys_addr_t *sw_msi_start); +int iopt_set_allow_iova(struct io_pagetable *iopt, + struct rb_root_cached *allowed_iova); +int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, + unsigned long last, void *owner); +void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); +int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, + size_t num_iovas); +void iopt_enable_large_pages(struct io_pagetable *iopt); +int iopt_disable_large_pages(struct io_pagetable *iopt); + struct iommufd_ucmd { struct iommufd_ctx *ictx; void __user *ubuffer; @@ -130,4 +174,12 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, type), \ typeof(*(ptr)), obj) +struct iommufd_access { + unsigned long iova_alignment; + u32 iopt_access_list_id; +}; + +int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); +void iopt_remove_access(struct io_pagetable *iopt, + struct iommufd_access *access); #endif From aad37e71d5c4dc1d3c25734f0bcd51c324f94b5e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:34 -0400 Subject: [PATCH 3130/4122] iommufd: IOCTLs for the io_pagetable Connect the IOAS to its IOCTL interface. This exposes most of the functionality in the io_pagetable to userspace. This is intended to be the core of the generic interface that IOMMUFD will provide. Every IOMMU driver should be able to implement an iommu_domain that is compatible with this generic mechanism. It is also designed to be easy to use for simple non virtual machine monitor users, like DPDK: - Universal simple support for all IOMMUs (no PPC special path) - An IOVA allocator that considers the aperture and the allowed/reserved ranges - io_pagetable allows any number of iommu_domains to be connected to the IOAS - Automatic allocation and re-use of iommu_domains Along with room in the design to add non-generic features to cater to specific HW functionality. Link: https://lore.kernel.org/r/11-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/ioas.c | 392 ++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 33 ++ drivers/iommu/iommufd/main.c | 48 +++ include/uapi/linux/iommufd.h | 258 +++++++++++++++- 5 files changed, 731 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/iommufd/ioas.c diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index b66a8c47ff55..2b4f36f1b72f 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ io_pagetable.o \ + ioas.o \ main.o \ pages.o diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c new file mode 100644 index 000000000000..6ff97dafc891 --- /dev/null +++ b/drivers/iommu/iommufd/ioas.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include + +#include "io_pagetable.h" + +void iommufd_ioas_destroy(struct iommufd_object *obj) +{ + struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj); + int rc; + + rc = iopt_unmap_all(&ioas->iopt, NULL); + WARN_ON(rc && rc != -ENOENT); + iopt_destroy_table(&ioas->iopt); +} + +struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) +{ + struct iommufd_ioas *ioas; + + ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS); + if (IS_ERR(ioas)) + return ioas; + + iopt_init_table(&ioas->iopt); + return ioas; +} + +int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_alloc *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + ioas = iommufd_ioas_alloc(ucmd->ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + cmd->out_ioas_id = ioas->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_table; + iommufd_object_finalize(ucmd->ictx, &ioas->obj); + return 0; + +out_table: + iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj); + return rc; +} + +int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) +{ + struct iommu_iova_range __user *ranges; + struct iommu_ioas_iova_ranges *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + struct interval_tree_span_iter span; + u32 max_iovas; + int rc; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + down_read(&ioas->iopt.iova_rwsem); + max_iovas = cmd->num_iovas; + ranges = u64_to_user_ptr(cmd->allowed_iovas); + cmd->num_iovas = 0; + cmd->out_iova_alignment = ioas->iopt.iova_alignment; + interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, + ULONG_MAX) { + if (!span.is_hole) + continue; + if (cmd->num_iovas < max_iovas) { + struct iommu_iova_range elm = { + .start = span.start_hole, + .last = span.last_hole, + }; + + if (copy_to_user(&ranges[cmd->num_iovas], &elm, + sizeof(elm))) { + rc = -EFAULT; + goto out_put; + } + } + cmd->num_iovas++; + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put; + if (cmd->num_iovas > max_iovas) + rc = -EMSGSIZE; +out_put: + up_read(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_ioas_load_iovas(struct rb_root_cached *itree, + struct iommu_iova_range __user *ranges, + u32 num) +{ + u32 i; + + for (i = 0; i != num; i++) { + struct iommu_iova_range range; + struct iopt_allowed *allowed; + + if (copy_from_user(&range, ranges + i, sizeof(range))) + return -EFAULT; + + if (range.start >= range.last) + return -EINVAL; + + if (interval_tree_iter_first(itree, range.start, range.last)) + return -EINVAL; + + allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT); + if (!allowed) + return -ENOMEM; + allowed->node.start = range.start; + allowed->node.last = range.last; + + interval_tree_insert(&allowed->node, itree); + } + return 0; +} + +int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_allow_iovas *cmd = ucmd->cmd; + struct rb_root_cached allowed_iova = RB_ROOT_CACHED; + struct interval_tree_node *node; + struct iommufd_ioas *ioas; + struct io_pagetable *iopt; + int rc = 0; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + iopt = &ioas->iopt; + + rc = iommufd_ioas_load_iovas(&allowed_iova, + u64_to_user_ptr(cmd->allowed_iovas), + cmd->num_iovas); + if (rc) + goto out_free; + + /* + * We want the allowed tree update to be atomic, so we have to keep the + * original nodes around, and keep track of the new nodes as we allocate + * memory for them. The simplest solution is to have a new/old tree and + * then swap new for old. On success we free the old tree, on failure we + * free the new tree. + */ + rc = iopt_set_allow_iova(iopt, &allowed_iova); +out_free: + while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) { + interval_tree_remove(node, &allowed_iova); + kfree(container_of(node, struct iopt_allowed, node)); + } + iommufd_put_object(&ioas->obj); + return rc; +} + +static int conv_iommu_prot(u32 map_flags) +{ + /* + * We provide no manual cache coherency ioctls to userspace and most + * architectures make the CPU ops for cache flushing privileged. + * Therefore we require the underlying IOMMU to support CPU coherent + * operation. Support for IOMMU_CACHE is enforced by the + * IOMMU_CAP_CACHE_COHERENCY test during bind. + */ + int iommu_prot = IOMMU_CACHE; + + if (map_flags & IOMMU_IOAS_MAP_WRITEABLE) + iommu_prot |= IOMMU_WRITE; + if (map_flags & IOMMU_IOAS_MAP_READABLE) + iommu_prot |= IOMMU_READ; + return iommu_prot; +} + +int iommufd_ioas_map(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_map *cmd = ucmd->cmd; + unsigned long iova = cmd->iova; + struct iommufd_ioas *ioas; + unsigned int flags = 0; + int rc; + + if ((cmd->flags & + ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) || + cmd->__reserved) + return -EOPNOTSUPP; + if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) + return -EOVERFLOW; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) + flags = IOPT_ALLOC_IOVA; + rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova, + u64_to_user_ptr(cmd->user_va), cmd->length, + conv_iommu_prot(cmd->flags), flags); + if (rc) + goto out_put; + + cmd->iova = iova; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_copy *cmd = ucmd->cmd; + struct iommufd_ioas *src_ioas; + struct iommufd_ioas *dst_ioas; + unsigned int flags = 0; + LIST_HEAD(pages_list); + unsigned long iova; + int rc; + + if ((cmd->flags & + ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE))) + return -EOPNOTSUPP; + if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX || + cmd->dst_iova >= ULONG_MAX) + return -EOVERFLOW; + + src_ioas = iommufd_get_ioas(ucmd, cmd->src_ioas_id); + if (IS_ERR(src_ioas)) + return PTR_ERR(src_ioas); + rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, + &pages_list); + iommufd_put_object(&src_ioas->obj); + if (rc) + return rc; + + dst_ioas = iommufd_get_ioas(ucmd, cmd->dst_ioas_id); + if (IS_ERR(dst_ioas)) { + rc = PTR_ERR(dst_ioas); + goto out_pages; + } + + if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) + flags = IOPT_ALLOC_IOVA; + iova = cmd->dst_iova; + rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova, + conv_iommu_prot(cmd->flags), flags); + if (rc) + goto out_put_dst; + + cmd->dst_iova = iova; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_put_dst: + iommufd_put_object(&dst_ioas->obj); +out_pages: + iopt_free_pages_list(&pages_list); + return rc; +} + +int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_unmap *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + unsigned long unmapped = 0; + int rc; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (cmd->iova == 0 && cmd->length == U64_MAX) { + rc = iopt_unmap_all(&ioas->iopt, &unmapped); + if (rc) + goto out_put; + } else { + if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) { + rc = -EOVERFLOW; + goto out_put; + } + rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length, + &unmapped); + if (rc) + goto out_put; + } + + cmd->length = unmapped; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_option_rlimit_mode(struct iommu_option *cmd, + struct iommufd_ctx *ictx) +{ + if (cmd->object_id) + return -EOPNOTSUPP; + + if (cmd->op == IOMMU_OPTION_OP_GET) { + cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM; + return 0; + } + if (cmd->op == IOMMU_OPTION_OP_SET) { + int rc = 0; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + xa_lock(&ictx->objects); + if (!xa_empty(&ictx->objects)) { + rc = -EBUSY; + } else { + if (cmd->val64 == 0) + ictx->account_mode = IOPT_PAGES_ACCOUNT_USER; + else if (cmd->val64 == 1) + ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; + else + rc = -EINVAL; + } + xa_unlock(&ictx->objects); + + return rc; + } + return -EOPNOTSUPP; +} + +static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd, + struct iommufd_ioas *ioas) +{ + if (cmd->op == IOMMU_OPTION_OP_GET) { + cmd->val64 = !ioas->iopt.disable_large_pages; + return 0; + } + if (cmd->op == IOMMU_OPTION_OP_SET) { + if (cmd->val64 == 0) + return iopt_disable_large_pages(&ioas->iopt); + if (cmd->val64 == 1) { + iopt_enable_large_pages(&ioas->iopt); + return 0; + } + return -EINVAL; + } + return -EOPNOTSUPP; +} + +int iommufd_ioas_option(struct iommufd_ucmd *ucmd) +{ + struct iommu_option *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + int rc = 0; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->object_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + switch (cmd->option_id) { + case IOMMU_OPTION_HUGE_PAGES: + rc = iommufd_ioas_option_huge_pages(cmd, ioas); + break; + default: + rc = -EOPNOTSUPP; + } + + iommufd_put_object(&ioas->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f7ab6c6edafd..1a13c54a8def 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -11,6 +11,7 @@ struct iommu_domain; struct iommu_group; +struct iommu_option; struct iommufd_ctx { struct file *file; @@ -102,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_IOAS, }; /* Base struct for all objects with a userspace ID handle. */ @@ -174,6 +176,37 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, type), \ typeof(*(ptr)), obj) +/* + * The IO Address Space (IOAS) pagetable is a virtual page table backed by the + * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The + * mapping is copied into all of the associated domains and made available to + * in-kernel users. + */ +struct iommufd_ioas { + struct iommufd_object obj; + struct io_pagetable iopt; +}; + +static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd, + u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_IOAS), + struct iommufd_ioas, obj); +} + +struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); +int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_ioas_destroy(struct iommufd_object *obj); +int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); +int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); +int iommufd_ioas_map(struct iommufd_ucmd *ucmd); +int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); +int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); +int iommufd_ioas_option(struct iommufd_ucmd *ucmd); +int iommufd_option_rlimit_mode(struct iommu_option *cmd, + struct iommufd_ctx *ictx); + struct iommufd_access { unsigned long iova_alignment; u32 iopt_access_list_id; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index dfbc68b97506..1c0a1f499378 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -204,8 +204,39 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) return 0; } +static int iommufd_option(struct iommufd_ucmd *ucmd) +{ + struct iommu_option *cmd = ucmd->cmd; + int rc; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + switch (cmd->option_id) { + case IOMMU_OPTION_RLIMIT_MODE: + rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx); + break; + case IOMMU_OPTION_HUGE_PAGES: + rc = iommufd_ioas_option(ucmd); + break; + default: + return -EOPNOTSUPP; + } + if (rc) + return rc; + if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64, + &cmd->val64, sizeof(cmd->val64))) + return -EFAULT; + return 0; +} + union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_ioas_alloc alloc; + struct iommu_ioas_allow_iovas allow_iovas; + struct iommu_ioas_iova_ranges iova_ranges; + struct iommu_ioas_map map; + struct iommu_ioas_unmap unmap; }; struct iommufd_ioctl_op { @@ -226,6 +257,20 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, + struct iommu_ioas_alloc, out_ioas_id), + IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, + struct iommu_ioas_allow_iovas, allowed_iovas), + IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, + src_iova), + IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, + struct iommu_ioas_iova_ranges, out_iova_alignment), + IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, + iova), + IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, + length), + IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, + val64), }; static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, @@ -312,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); static const struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_IOAS] = { + .destroy = iommufd_ioas_destroy, + }, }; static struct miscdevice iommu_misc_dev = { diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 37de92f0534b..30cc5c5e2b34 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -37,12 +37,19 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, + IOMMUFD_CMD_IOAS_ALLOC, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS, + IOMMUFD_CMD_IOAS_COPY, + IOMMUFD_CMD_IOAS_IOVA_RANGES, + IOMMUFD_CMD_IOAS_MAP, + IOMMUFD_CMD_IOAS_UNMAP, + IOMMUFD_CMD_OPTION, }; /** * struct iommu_destroy - ioctl(IOMMU_DESTROY) * @size: sizeof(struct iommu_destroy) - * @id: iommufd object ID to destroy. Can by any destroyable object type. + * @id: iommufd object ID to destroy. Can be any destroyable object type. * * Destroy any object held within iommufd. */ @@ -52,4 +59,253 @@ struct iommu_destroy { }; #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) +/** + * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) + * @size: sizeof(struct iommu_ioas_alloc) + * @flags: Must be 0 + * @out_ioas_id: Output IOAS ID for the allocated object + * + * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) + * to memory mapping. + */ +struct iommu_ioas_alloc { + __u32 size; + __u32 flags; + __u32 out_ioas_id; +}; +#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) + +/** + * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) + * @start: First IOVA + * @last: Inclusive last IOVA + * + * An interval in IOVA space. + */ +struct iommu_iova_range { + __aligned_u64 start; + __aligned_u64 last; +}; + +/** + * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) + * @size: sizeof(struct iommu_ioas_iova_ranges) + * @ioas_id: IOAS ID to read ranges from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to the output array of struct iommu_iova_range + * @out_iova_alignment: Minimum alignment required for mapping IOVA + * + * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges + * is not allowed. num_iovas will be set to the total number of iovas and + * the allowed_iovas[] will be filled in as space permits. + * + * The allowed ranges are dependent on the HW path the DMA operation takes, and + * can change during the lifetime of the IOAS. A fresh empty IOAS will have a + * full range, and each attached device will narrow the ranges based on that + * device's HW restrictions. Detaching a device can widen the ranges. Userspace + * should query ranges after every attach/detach to know what IOVAs are valid + * for mapping. + * + * On input num_iovas is the length of the allowed_iovas array. On output it is + * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set + * num_iovas to the required value if num_iovas is too small. In this case the + * caller should allocate a larger output array and re-issue the ioctl. + * + * out_iova_alignment returns the minimum IOVA alignment that can be given + * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: + * + * starting_iova % out_iova_alignment == 0 + * (starting_iova + length) % out_iova_alignment == 0 + * + * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot + * be higher than the system PAGE_SIZE. + */ +struct iommu_ioas_iova_ranges { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; + __aligned_u64 out_iova_alignment; +}; +#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) + +/** + * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) + * @size: sizeof(struct iommu_ioas_allow_iovas) + * @ioas_id: IOAS ID to allow IOVAs from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to array of struct iommu_iova_range + * + * Ensure a range of IOVAs are always available for allocation. If this call + * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges + * that are narrower than the ranges provided here. This call will fail if + * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. + * + * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as + * devices are attached the IOVA will narrow based on the device restrictions. + * When an allowed range is specified any narrowing will be refused, ie device + * attachment can fail if the device requires limiting within the allowed range. + * + * Automatic IOVA allocation is also impacted by this call. MAP will only + * allocate within the allowed IOVAs if they are present. + * + * This call replaces the entire allowed list with the given list. + */ +struct iommu_ioas_allow_iovas { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; +}; +#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) + +/** + * enum iommufd_ioas_map_flags - Flags for map and copy + * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate + * IOVA to place the mapping at + * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping + * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping + */ +enum iommufd_ioas_map_flags { + IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, + IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, + IOMMU_IOAS_MAP_READABLE = 1 << 2, +}; + +/** + * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) + * @size: sizeof(struct iommu_ioas_map) + * @flags: Combination of enum iommufd_ioas_map_flags + * @ioas_id: IOAS ID to change the mapping of + * @__reserved: Must be 0 + * @user_va: Userspace pointer to start mapping from + * @length: Number of bytes to map + * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set + * then this must be provided as input. + * + * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the + * mapping will be established at iova, otherwise a suitable location based on + * the reserved and allowed lists will be automatically selected and returned in + * iova. + * + * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently + * be unused, existing IOVA cannot be replaced. + */ +struct iommu_ioas_map { + __u32 size; + __u32 flags; + __u32 ioas_id; + __u32 __reserved; + __aligned_u64 user_va; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) + +/** + * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) + * @size: sizeof(struct iommu_ioas_copy) + * @flags: Combination of enum iommufd_ioas_map_flags + * @dst_ioas_id: IOAS ID to change the mapping of + * @src_ioas_id: IOAS ID to copy from + * @length: Number of bytes to copy and map + * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is + * set then this must be provided as input. + * @src_iova: IOVA to start the copy + * + * Copy an already existing mapping from src_ioas_id and establish it in + * dst_ioas_id. The src iova/length must exactly match a range used with + * IOMMU_IOAS_MAP. + * + * This may be used to efficiently clone a subset of an IOAS to another, or as a + * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over + * establishing equivalent new mappings, as internal resources are shared, and + * the kernel will pin the user memory only once. + */ +struct iommu_ioas_copy { + __u32 size; + __u32 flags; + __u32 dst_ioas_id; + __u32 src_ioas_id; + __aligned_u64 length; + __aligned_u64 dst_iova; + __aligned_u64 src_iova; +}; +#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) + +/** + * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) + * @size: sizeof(struct iommu_ioas_unmap) + * @ioas_id: IOAS ID to change the mapping of + * @iova: IOVA to start the unmapping at + * @length: Number of bytes to unmap, and return back the bytes unmapped + * + * Unmap an IOVA range. The iova/length must be a superset of a previously + * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or + * truncating ranges is not allowed. The values 0 to U64_MAX will unmap + * everything. + */ +struct iommu_ioas_unmap { + __u32 size; + __u32 ioas_id; + __aligned_u64 iova; + __aligned_u64 length; +}; +#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) + +/** + * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and + * ioctl(IOMMU_OPTION_HUGE_PAGES) + * @IOMMU_OPTION_RLIMIT_MODE: + * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege + * to invoke this. Value 0 (default) is user based accouting, 1 uses process + * based accounting. Global option, object_id must be 0 + * @IOMMU_OPTION_HUGE_PAGES: + * Value 1 (default) allows contiguous pages to be combined when generating + * iommu mappings. Value 0 disables combining, everything is mapped to + * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS + * option, the object_id must be the IOAS ID. + */ +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +/** + * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and + * ioctl(IOMMU_OPTION_OP_GET) + * @IOMMU_OPTION_OP_SET: Set the option's value + * @IOMMU_OPTION_OP_GET: Get the option's value + */ +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +/** + * struct iommu_option - iommu option multiplexer + * @size: sizeof(struct iommu_option) + * @option_id: One of enum iommufd_option + * @op: One of enum iommufd_option_ops + * @__reserved: Must be 0 + * @object_id: ID of the object if required + * @val64: Option value to set or value returned on get + * + * Change a simple option value. This multiplexor allows controlling options + * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET + * will return the current value. + */ +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; +#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) #endif From ea4acfac57b9dee57a7d5840359a41cc3251de92 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:35 -0400 Subject: [PATCH 3131/4122] iommufd: Add a HW pagetable object The hw_pagetable object exposes the internal struct iommu_domain's to userspace. An iommu_domain is required when any DMA device attaches to an IOAS to control the io page table through the iommu driver. For compatibility with VFIO the hw_pagetable is automatically created when a DMA device is attached to the IOAS. If a compatible iommu_domain already exists then the hw_pagetable associated with it is used for the attachment. In the initial series there is no iommufd uAPI for the hw_pagetable object. The next patch provides driver facing APIs for IO page table attachment that allows drivers to accept either an IOAS or a hw_pagetable ID and for the driver to return the hw_pagetable ID that was auto-selected from an IOAS. The expectation is the driver will provide uAPI through its own FD for attaching its device to iommufd. This allows userspace to learn the mapping of devices to iommu_domains and to override the automatic attachment. The future HW specific interface will allow userspace to create hw_pagetable objects using iommu_domains with IOMMU driver specific parameters. This infrastructure will allow linking those domains to IOAS's and devices. Link: https://lore.kernel.org/r/12-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/hw_pagetable.c | 57 +++++++++++++++++++++++++ drivers/iommu/iommufd/ioas.c | 3 ++ drivers/iommu/iommufd/iommufd_private.h | 33 ++++++++++++++ drivers/iommu/iommufd/main.c | 3 ++ 5 files changed, 97 insertions(+) create mode 100644 drivers/iommu/iommufd/hw_pagetable.c diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 2b4f36f1b72f..e13e971aa28c 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ + hw_pagetable.o \ io_pagetable.o \ ioas.o \ main.o \ diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c new file mode 100644 index 000000000000..43d473989a06 --- /dev/null +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include + +#include "iommufd_private.h" + +void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) +{ + struct iommufd_hw_pagetable *hwpt = + container_of(obj, struct iommufd_hw_pagetable, obj); + + WARN_ON(!list_empty(&hwpt->devices)); + + iommu_domain_free(hwpt->domain); + refcount_dec(&hwpt->ioas->obj.users); + mutex_destroy(&hwpt->devices_lock); +} + +/** + * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device + * @ictx: iommufd context + * @ioas: IOAS to associate the domain with + * @dev: Device to get an iommu_domain for + * + * Allocate a new iommu_domain and return it as a hw_pagetable. + */ +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct device *dev) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE); + if (IS_ERR(hwpt)) + return hwpt; + + hwpt->domain = iommu_domain_alloc(dev->bus); + if (!hwpt->domain) { + rc = -ENOMEM; + goto out_abort; + } + + INIT_LIST_HEAD(&hwpt->devices); + INIT_LIST_HEAD(&hwpt->hwpt_item); + mutex_init(&hwpt->devices_lock); + /* Pairs with iommufd_hw_pagetable_destroy() */ + refcount_inc(&ioas->obj.users); + hwpt->ioas = ioas; + return hwpt; + +out_abort: + iommufd_object_abort(ictx, &hwpt->obj); + return ERR_PTR(rc); +} diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 6ff97dafc891..302779b33bd4 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -17,6 +17,7 @@ void iommufd_ioas_destroy(struct iommufd_object *obj) rc = iopt_unmap_all(&ioas->iopt, NULL); WARN_ON(rc && rc != -ENOENT); iopt_destroy_table(&ioas->iopt); + mutex_destroy(&ioas->mutex); } struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) @@ -28,6 +29,8 @@ struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) return ioas; iopt_init_table(&ioas->iopt); + INIT_LIST_HEAD(&ioas->hwpt_list); + mutex_init(&ioas->mutex); return ioas; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 1a13c54a8def..6b0448702a95 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -103,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, }; @@ -181,10 +182,20 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The * mapping is copied into all of the associated domains and made available to * in-kernel users. + * + * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable + * object. When we go to attach a device to an IOAS we need to get an + * iommu_domain and wrapping iommufd_hw_pagetable for it. + * + * An iommu_domain & iommfd_hw_pagetable will be automatically selected + * for a device based on the hwpt_list. If no suitable iommu_domain + * is found a new iommu_domain will be created. */ struct iommufd_ioas { struct iommufd_object obj; struct io_pagetable iopt; + struct mutex mutex; + struct list_head hwpt_list; }; static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd, @@ -207,6 +218,28 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd); int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); +/* + * A HW pagetable is called an iommu_domain inside the kernel. This user object + * allows directly creating and inspecting the domains. Domains that have kernel + * owned page tables will be associated with an iommufd_ioas that provides the + * IOVA to PFN map. + */ +struct iommufd_hw_pagetable { + struct iommufd_object obj; + struct iommufd_ioas *ioas; + struct iommu_domain *domain; + bool auto_domain : 1; + /* Head at iommufd_ioas::hwpt_list */ + struct list_head hwpt_item; + struct mutex devices_lock; + struct list_head devices; +}; + +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct device *dev); +void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); + struct iommufd_access { unsigned long iova_alignment; u32 iopt_access_list_id; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 1c0a1f499378..ac6580a7b706 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -360,6 +360,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_IOAS] = { .destroy = iommufd_ioas_destroy, }, + [IOMMUFD_OBJ_HW_PAGETABLE] = { + .destroy = iommufd_hw_pagetable_destroy, + }, }; static struct miscdevice iommu_misc_dev = { From e8d57210035b6377d424ba964961892d01127cf6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:36 -0400 Subject: [PATCH 3132/4122] iommufd: Add kAPI toward external drivers for physical devices Add the four functions external drivers need to connect physical DMA to the IOMMUFD: iommufd_device_bind() / iommufd_device_unbind() Register the device with iommufd and establish security isolation. iommufd_device_attach() / iommufd_device_detach() Connect a bound device to a page table Binding a device creates a device object ID in the uAPI, however the generic API does not yet provide any IOCTLs to manipulate them. Link: https://lore.kernel.org/r/13-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/device.c | 419 ++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 5 + drivers/iommu/iommufd/main.c | 3 + include/linux/iommufd.h | 9 + 5 files changed, 437 insertions(+) create mode 100644 drivers/iommu/iommufd/device.c diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index e13e971aa28c..ca28a135b967 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ + device.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c new file mode 100644 index 000000000000..67cd00b4d926 --- /dev/null +++ b/drivers/iommu/iommufd/device.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include + +#include "iommufd_private.h" + +static bool allow_unsafe_interrupts; +module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + allow_unsafe_interrupts, + "Allow IOMMUFD to bind to devices even if the platform cannot isolate " + "the MSI interrupt window. Enabling this is a security weakness."); + +/* + * A iommufd_device object represents the binding relationship between a + * consuming driver and the iommufd. These objects are created/destroyed by + * external drivers, not by userspace. + */ +struct iommufd_device { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_hw_pagetable *hwpt; + /* Head at iommufd_hw_pagetable::devices */ + struct list_head devices_item; + /* always the physical device */ + struct device *dev; + struct iommu_group *group; + bool enforce_cache_coherency; +}; + +void iommufd_device_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + iommu_device_release_dma_owner(idev->dev); + iommu_group_put(idev->group); + iommufd_ctx_put(idev->ictx); +} + +/** + * iommufd_device_bind - Bind a physical device to an iommu fd + * @ictx: iommufd file descriptor + * @dev: Pointer to a physical device struct + * @id: Output ID number to return to userspace for this device + * + * A successful bind establishes an ownership over the device and returns + * struct iommufd_device pointer, otherwise returns error pointer. + * + * A driver using this API must set driver_managed_dma and must not touch + * the device until this routine succeeds and establishes ownership. + * + * Binding a PCI device places the entire RID under iommufd control. + * + * The caller must undo this with iommufd_device_unbind() + */ +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id) +{ + struct iommufd_device *idev; + struct iommu_group *group; + int rc; + + /* + * iommufd always sets IOMMU_CACHE because we offer no way for userspace + * to restore cache coherency. + */ + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) + return ERR_PTR(-EINVAL); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + rc = iommu_device_claim_dma_owner(dev, ictx); + if (rc) + goto out_group_put; + + idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); + if (IS_ERR(idev)) { + rc = PTR_ERR(idev); + goto out_release_owner; + } + idev->ictx = ictx; + iommufd_ctx_get(ictx); + idev->dev = dev; + idev->enforce_cache_coherency = + device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); + /* The calling driver is a user until iommufd_device_unbind() */ + refcount_inc(&idev->obj.users); + /* group refcount moves into iommufd_device */ + idev->group = group; + + /* + * If the caller fails after this success it must call + * iommufd_unbind_device() which is safe since we hold this refcount. + * This also means the device is a leaf in the graph and no other object + * can take a reference on it. + */ + iommufd_object_finalize(ictx, &idev->obj); + *id = idev->obj.id; + return idev; + +out_release_owner: + iommu_device_release_dma_owner(dev); +out_group_put: + iommu_group_put(group); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); + +/** + * iommufd_device_unbind - Undo iommufd_device_bind() + * @idev: Device returned by iommufd_device_bind() + * + * Release the device from iommufd control. The DMA ownership will return back + * to unowned with DMA controlled by the DMA API. This invalidates the + * iommufd_device pointer, other APIs that consume it must not be called + * concurrently. + */ +void iommufd_device_unbind(struct iommufd_device *idev) +{ + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); + +static int iommufd_device_setup_msi(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + phys_addr_t sw_msi_start) +{ + int rc; + + /* + * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it + * creates the MSI window by default in the iommu domain. Nothing + * further to do. + */ + if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP)) + return 0; + + /* + * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every + * allocated iommu_domain will block interrupts by default and this + * special flow is needed to turn them back on. iommu_dma_prepare_msi() + * will install pages into our domain after request_irq() to make this + * work. + * + * FIXME: This is conceptually broken for iommufd since we want to allow + * userspace to change the domains, eg switch from an identity IOAS to a + * DMA IOAS. There is currently no way to create a MSI window that + * matches what the IRQ layer actually expects in a newly created + * domain. + */ + if (irq_domain_check_msi_remap()) { + if (WARN_ON(!sw_msi_start)) + return -EPERM; + /* + * iommu_get_msi_cookie() can only be called once per domain, + * it returns -EBUSY on later calls. + */ + if (hwpt->msi_cookie) + return 0; + rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); + if (rc) + return rc; + hwpt->msi_cookie = true; + return 0; + } + + /* + * Otherwise the platform has a MSI window that is not isolated. For + * historical compat with VFIO allow a module parameter to ignore the + * insecurity. + */ + if (!allow_unsafe_interrupts) + return -EPERM; + + dev_warn( + idev->dev, + "MSI interrupt window cannot be isolated by the IOMMU, this platform is insecure. Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + return 0; +} + +static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, + struct iommu_group *group) +{ + struct iommufd_device *cur_dev; + + list_for_each_entry(cur_dev, &hwpt->devices, devices_item) + if (cur_dev->group == group) + return true; + return false; +} + +static int iommufd_device_do_attach(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) +{ + phys_addr_t sw_msi_start = 0; + int rc; + + mutex_lock(&hwpt->devices_lock); + + /* + * Try to upgrade the domain we have, it is an iommu driver bug to + * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail + * enforce_cache_coherency when there are no devices attached to the + * domain. + */ + if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { + if (hwpt->domain->ops->enforce_cache_coherency) + hwpt->enforce_cache_coherency = + hwpt->domain->ops->enforce_cache_coherency( + hwpt->domain); + if (!hwpt->enforce_cache_coherency) { + WARN_ON(list_empty(&hwpt->devices)); + rc = -EINVAL; + goto out_unlock; + } + } + + rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, + idev->group, &sw_msi_start); + if (rc) + goto out_unlock; + + rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); + if (rc) + goto out_iova; + + /* + * FIXME: Hack around missing a device-centric iommu api, only attach to + * the group once for the first device that is in the group. + */ + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + rc = iommu_attach_group(hwpt->domain, idev->group); + if (rc) + goto out_iova; + + if (list_empty(&hwpt->devices)) { + rc = iopt_table_add_domain(&hwpt->ioas->iopt, + hwpt->domain); + if (rc) + goto out_detach; + } + } + + idev->hwpt = hwpt; + refcount_inc(&hwpt->obj.users); + list_add(&idev->devices_item, &hwpt->devices); + mutex_unlock(&hwpt->devices_lock); + return 0; + +out_detach: + iommu_detach_group(hwpt->domain, idev->group); +out_iova: + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); +out_unlock: + mutex_unlock(&hwpt->devices_lock); + return rc; +} + +/* + * When automatically managing the domains we search for a compatible domain in + * the iopt and if one is found use it, otherwise create a new domain. + * Automatic domain selection will never pick a manually created domain. + */ +static int iommufd_device_auto_get_domain(struct iommufd_device *idev, + struct iommufd_ioas *ioas) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + /* + * There is no differentiation when domains are allocated, so any domain + * that is willing to attach to the device is interchangeable with any + * other. + */ + mutex_lock(&ioas->mutex); + list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { + if (!hwpt->auto_domain) + continue; + + rc = iommufd_device_do_attach(idev, hwpt); + + /* + * -EINVAL means the domain is incompatible with the device. + * Other error codes should propagate to userspace as failure. + * Success means the domain is attached. + */ + if (rc == -EINVAL) + continue; + goto out_unlock; + } + + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev->dev); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_unlock; + } + hwpt->auto_domain = true; + + rc = iommufd_device_do_attach(idev, hwpt); + if (rc) + goto out_abort; + list_add_tail(&hwpt->hwpt_item, &ioas->hwpt_list); + + mutex_unlock(&ioas->mutex); + iommufd_object_finalize(idev->ictx, &hwpt->obj); + return 0; + +out_abort: + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); +out_unlock: + mutex_unlock(&ioas->mutex); + return rc; +} + +/** + * iommufd_device_attach - Connect a device from an iommu_domain + * @idev: device to attach + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This connects the device to an iommu_domain, either automatically or manually + * selected. Once this completes the device could do DMA. + * + * The caller should return the resulting pt_id back to userspace. + * This function is undone by calling iommufd_device_detach(). + */ +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +{ + struct iommufd_object *pt_obj; + int rc; + + pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return PTR_ERR(pt_obj); + + switch (pt_obj->type) { + case IOMMUFD_OBJ_HW_PAGETABLE: { + struct iommufd_hw_pagetable *hwpt = + container_of(pt_obj, struct iommufd_hw_pagetable, obj); + + rc = iommufd_device_do_attach(idev, hwpt); + if (rc) + goto out_put_pt_obj; + + mutex_lock(&hwpt->ioas->mutex); + list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); + mutex_unlock(&hwpt->ioas->mutex); + break; + } + case IOMMUFD_OBJ_IOAS: { + struct iommufd_ioas *ioas = + container_of(pt_obj, struct iommufd_ioas, obj); + + rc = iommufd_device_auto_get_domain(idev, ioas); + if (rc) + goto out_put_pt_obj; + break; + } + default: + rc = -EINVAL; + goto out_put_pt_obj; + } + + refcount_inc(&idev->obj.users); + *pt_id = idev->hwpt->obj.id; + rc = 0; + +out_put_pt_obj: + iommufd_put_object(pt_obj); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); + +/** + * iommufd_device_detach - Disconnect a device to an iommu_domain + * @idev: device to detach + * + * Undo iommufd_device_attach(). This disconnects the idev from the previously + * attached pt_id. The device returns back to a blocked DMA translation. + */ +void iommufd_device_detach(struct iommufd_device *idev) +{ + struct iommufd_hw_pagetable *hwpt = idev->hwpt; + + mutex_lock(&hwpt->ioas->mutex); + mutex_lock(&hwpt->devices_lock); + list_del(&idev->devices_item); + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + if (list_empty(&hwpt->devices)) { + iopt_table_remove_domain(&hwpt->ioas->iopt, + hwpt->domain); + list_del(&hwpt->hwpt_item); + } + iommu_detach_group(hwpt->domain, idev->group); + } + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + mutex_unlock(&hwpt->devices_lock); + mutex_unlock(&hwpt->ioas->mutex); + + if (hwpt->auto_domain) + iommufd_object_destroy_user(idev->ictx, &hwpt->obj); + else + refcount_dec(&hwpt->obj.users); + + idev->hwpt = NULL; + + refcount_dec(&idev->obj.users); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 6b0448702a95..72a0c805be23 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -103,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, }; @@ -229,6 +230,8 @@ struct iommufd_hw_pagetable { struct iommufd_ioas *ioas; struct iommu_domain *domain; bool auto_domain : 1; + bool enforce_cache_coherency : 1; + bool msi_cookie : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; struct mutex devices_lock; @@ -240,6 +243,8 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct device *dev); void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); +void iommufd_device_destroy(struct iommufd_object *obj); + struct iommufd_access { unsigned long iova_alignment; u32 iopt_access_list_id; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index ac6580a7b706..fe98912bab0e 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -357,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); static const struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_DEVICE] = { + .destroy = iommufd_device_destroy, + }, [IOMMUFD_OBJ_IOAS] = { .destroy = iommufd_ioas_destroy, }, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 26e09d539737..185dff3eb32f 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -9,10 +9,19 @@ #include #include #include +#include +struct iommufd_device; struct iommufd_ctx; struct file; +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id); +void iommufd_device_unbind(struct iommufd_device *idev); + +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev); + enum { IOMMUFD_ACCESS_RW_READ = 0, IOMMUFD_ACCESS_RW_WRITE = 1 << 0, From 8d40205f6093f18e07fe3dc5920fc85e9f82b8b3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:37 -0400 Subject: [PATCH 3133/4122] iommufd: Add kAPI toward external drivers for kernel access Kernel access is the mode that VFIO "mdevs" use. In this case there is no struct device and no IOMMU connection. iommufd acts as a record keeper for accesses and returns the actual struct pages back to the caller to use however they need. eg with kmap or the DMA API. Each caller must create a struct iommufd_access with iommufd_access_create(), similar to how iommufd_device_bind() works. Using this struct the caller can access blocks of IOVA using iommufd_access_pin_pages() or iommufd_access_rw(). Callers must provide a callback that immediately unpins any IOVA being used within a range. This happens if userspace unmaps the IOVA under the pin. The implementation forwards the access requests directly to the iopt infrastructure that manages the iopt_pages_access. Link: https://lore.kernel.org/r/14-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 316 ++++++++++++++++++++++++ drivers/iommu/iommufd/io_pagetable.c | 8 +- drivers/iommu/iommufd/iommufd_private.h | 10 + drivers/iommu/iommufd/main.c | 3 + include/linux/iommufd.h | 43 +++- 5 files changed, 377 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 67cd00b4d926..06b6894b7706 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -6,6 +6,7 @@ #include #include +#include "io_pagetable.h" #include "iommufd_private.h" static bool allow_unsafe_interrupts; @@ -417,3 +418,318 @@ void iommufd_device_detach(struct iommufd_device *idev) refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); + +void iommufd_access_destroy_object(struct iommufd_object *obj) +{ + struct iommufd_access *access = + container_of(obj, struct iommufd_access, obj); + + iopt_remove_access(&access->ioas->iopt, access); + iommufd_ctx_put(access->ictx); + refcount_dec(&access->ioas->obj.users); +} + +/** + * iommufd_access_create - Create an iommufd_access + * @ictx: iommufd file descriptor + * @ioas_id: ID for a IOMMUFD_OBJ_IOAS + * @ops: Driver's ops to associate with the access + * @data: Opaque data to pass into ops functions + * + * An iommufd_access allows a driver to read/write to the IOAS without using + * DMA. The underlying CPU memory can be accessed using the + * iommufd_access_pin_pages() or iommufd_access_rw() functions. + * + * The provided ops are required to use iommufd_access_pin_pages(). + */ +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data) +{ + struct iommufd_access *access; + struct iommufd_object *obj; + int rc; + + /* + * There is no uAPI for the access object, but to keep things symmetric + * use the object infrastructure anyhow. + */ + access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + if (IS_ERR(access)) + return access; + + access->data = data; + access->ops = ops; + + obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS); + if (IS_ERR(obj)) { + rc = PTR_ERR(obj); + goto out_abort; + } + access->ioas = container_of(obj, struct iommufd_ioas, obj); + iommufd_ref_to_users(obj); + + if (ops->needs_pin_pages) + access->iova_alignment = PAGE_SIZE; + else + access->iova_alignment = 1; + rc = iopt_add_access(&access->ioas->iopt, access); + if (rc) + goto out_put_ioas; + + /* The calling driver is a user until iommufd_access_destroy() */ + refcount_inc(&access->obj.users); + access->ictx = ictx; + iommufd_ctx_get(ictx); + iommufd_object_finalize(ictx, &access->obj); + return access; +out_put_ioas: + refcount_dec(&access->ioas->obj.users); +out_abort: + iommufd_object_abort(ictx, &access->obj); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); + +/** + * iommufd_access_destroy - Destroy an iommufd_access + * @access: The access to destroy + * + * The caller must stop using the access before destroying it. + */ +void iommufd_access_destroy(struct iommufd_access *access) +{ + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); + +/** + * iommufd_access_notify_unmap - Notify users of an iopt to stop using it + * @iopt: iopt to work on + * @iova: Starting iova in the iopt + * @length: Number of bytes + * + * After this function returns there should be no users attached to the pages + * linked to this iopt that intersect with iova,length. Anyone that has attached + * a user through iopt_access_pages() needs to detach it through + * iommufd_access_unpin_pages() before this function returns. + * + * iommufd_access_destroy() will wait for any outstanding unmap callback to + * complete. Once iommufd_access_destroy() no unmap ops are running or will + * run in the future. Due to this a driver must not create locking that prevents + * unmap to complete while iommufd_access_destroy() is running. + */ +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + struct iommufd_ioas *ioas = + container_of(iopt, struct iommufd_ioas, iopt); + struct iommufd_access *access; + unsigned long index; + + xa_lock(&ioas->iopt.access_list); + xa_for_each(&ioas->iopt.access_list, index, access) { + if (!iommufd_lock_obj(&access->obj)) + continue; + xa_unlock(&ioas->iopt.access_list); + + access->ops->unmap(access->data, iova, length); + + iommufd_put_object(&access->obj); + xa_lock(&ioas->iopt.access_list); + } + xa_unlock(&ioas->iopt.access_list); +} + +/** + * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages + * @access: IOAS access to act on + * @iova: Starting IOVA + * @length: Number of bytes to access + * + * Return the struct page's. The caller must stop accessing them before calling + * this. The iova/length must exactly match the one provided to access_pages. + */ +void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, unsigned long length) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + + if (WARN_ON(!length) || + WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) + return; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) + iopt_area_remove_access( + area, iopt_area_iova_to_index(area, iter.cur_iova), + iopt_area_iova_to_index( + area, + min(last_iova, iopt_area_last_iova(area)))); + up_read(&iopt->iova_rwsem); + WARN_ON(!iopt_area_contig_done(&iter)); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); + +static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) +{ + if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) + return false; + + if (!iopt_area_contig_done(iter) && + (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % + PAGE_SIZE) != (PAGE_SIZE - 1)) + return false; + return true; +} + +static bool check_area_prot(struct iopt_area *area, unsigned int flags) +{ + if (flags & IOMMUFD_ACCESS_RW_WRITE) + return area->iommu_prot & IOMMU_WRITE; + return area->iommu_prot & IOMMU_READ; +} + +/** + * iommufd_access_pin_pages() - Return a list of pages under the iova + * @access: IOAS access to act on + * @iova: Starting IOVA + * @length: Number of bytes to access + * @out_pages: Output page list + * @flags: IOPMMUFD_ACCESS_RW_* flags + * + * Reads @length bytes starting at iova and returns the struct page * pointers. + * These can be kmap'd by the caller for CPU access. + * + * The caller must perform iommufd_access_unpin_pages() when done to balance + * this. + * + * This API always requires a page aligned iova. This happens naturally if the + * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However + * smaller alignments have corner cases where this API can fail on otherwise + * aligned iova. + */ +int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, + unsigned long length, struct page **out_pages, + unsigned int flags) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + int rc; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + unsigned long last_index = iopt_area_iova_to_index(area, last); + unsigned long index = + iopt_area_iova_to_index(area, iter.cur_iova); + + if (area->prevent_access || + !iopt_area_contig_is_aligned(&iter)) { + rc = -EINVAL; + goto err_remove; + } + + if (!check_area_prot(area, flags)) { + rc = -EPERM; + goto err_remove; + } + + rc = iopt_area_add_access(area, index, last_index, out_pages, + flags); + if (rc) + goto err_remove; + out_pages += last_index - index + 1; + } + if (!iopt_area_contig_done(&iter)) { + rc = -ENOENT; + goto err_remove; + } + + up_read(&iopt->iova_rwsem); + return 0; + +err_remove: + if (iova < iter.cur_iova) { + last_iova = iter.cur_iova - 1; + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) + iopt_area_remove_access( + area, + iopt_area_iova_to_index(area, iter.cur_iova), + iopt_area_iova_to_index( + area, min(last_iova, + iopt_area_last_iova(area)))); + } + up_read(&iopt->iova_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); + +/** + * iommufd_access_rw - Read or write data under the iova + * @access: IOAS access to act on + * @iova: Starting IOVA + * @data: Kernel buffer to copy to/from + * @length: Number of bytes to access + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * Copy kernel to/from data into the range given by IOVA/length. If flags + * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized + * by changing it into copy_to/from_user(). + */ +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t length, unsigned int flags) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + struct iopt_area *area; + unsigned long last_iova; + int rc; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + unsigned long bytes = (last - iter.cur_iova) + 1; + + if (area->prevent_access) { + rc = -EINVAL; + goto err_out; + } + + if (!check_area_prot(area, flags)) { + rc = -EPERM; + goto err_out; + } + + rc = iopt_pages_rw_access( + area->pages, iopt_area_start_byte(area, iter.cur_iova), + data, bytes, flags); + if (rc) + goto err_out; + data += bytes; + } + if (!iopt_area_contig_done(&iter)) + rc = -ENOENT; +err_out: + up_read(&iopt->iova_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 756d347948f0..4f4a9d9aac57 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -458,6 +458,7 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, * is NULL. This prevents domain attach/detatch from running * concurrently with cleaning up the area. */ +again: down_read(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); while ((area = iopt_area_iter_first(iopt, start, last))) { @@ -486,8 +487,11 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, area->prevent_access = true; up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); - /* Later patch calls back to drivers to unmap */ - return -EBUSY; + iommufd_access_notify_unmap(iopt, area_first, + iopt_area_length(area)); + if (WARN_ON(READ_ONCE(area->num_accesses))) + return -EDEADLOCK; + goto again; } pages = area->pages; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 72a0c805be23..40302cc0da36 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -65,6 +65,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length); int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain); void iopt_table_remove_domain(struct io_pagetable *iopt, @@ -106,6 +108,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, }; /* Base struct for all objects with a userspace ID handle. */ @@ -246,6 +249,11 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); struct iommufd_access { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_ioas *ioas; + const struct iommufd_access_ops *ops; + void *data; unsigned long iova_alignment; u32 iopt_access_list_id; }; @@ -253,4 +261,6 @@ struct iommufd_access { int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, struct iommufd_access *access); +void iommufd_access_destroy_object(struct iommufd_object *obj); + #endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index fe98912bab0e..4153f6a20255 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -357,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); static const struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_ACCESS] = { + .destroy = iommufd_access_destroy_object, + }, [IOMMUFD_OBJ_DEVICE] = { .destroy = iommufd_device_destroy, }, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 185dff3eb32f..46c481a26d79 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -9,10 +9,12 @@ #include #include #include -#include +struct device; struct iommufd_device; +struct page; struct iommufd_ctx; +struct iommufd_access; struct file; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, @@ -22,6 +24,11 @@ void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_access_ops { + u8 needs_pin_pages : 1; + void (*unmap)(void *data, unsigned long iova, unsigned long length); +}; + enum { IOMMUFD_ACCESS_RW_READ = 0, IOMMUFD_ACCESS_RW_WRITE = 1 << 0, @@ -29,11 +36,24 @@ enum { IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1, }; +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data); +void iommufd_access_destroy(struct iommufd_access *access); + void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); void iommufd_ctx_put(struct iommufd_ctx *ictx); + +int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, + unsigned long length, struct page **out_pages, + unsigned int flags); +void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, unsigned long length); +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -43,5 +63,26 @@ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) { } + +static inline int iommufd_access_pin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length, + struct page **out_pages, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length) +{ +} + +static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD */ #endif From d624d6652a65ad4f47a58b8651a1ec1163bb81d3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:38 -0400 Subject: [PATCH 3134/4122] iommufd: vfio container FD ioctl compatibility iommufd can directly implement the /dev/vfio/vfio container IOCTLs by mapping them into io_pagetable operations. A userspace application can test against iommufd and confirm compatibility then simply make a small change to open /dev/iommu instead of /dev/vfio/vfio. For testing purposes /dev/vfio/vfio can be symlinked to /dev/iommu and then all applications will use the compatibility path with no code changes. A later series allows /dev/vfio/vfio to be directly provided by iommufd, which allows the rlimit mode to work the same as well. This series just provides the iommufd side of compatibility. Actually linking this to VFIO_SET_CONTAINER is a followup series, with a link in the cover letter. Internally the compatibility API uses a normal IOAS object that, like vfio, is automatically allocated when the first device is attached. Userspace can also query or set this IOAS object directly using the IOMMU_VFIO_IOAS ioctl. This allows mixing and matching new iommufd only features while still using the VFIO style map/unmap ioctls. While this is enough to operate qemu, it has a few differences: - Resource limits rely on memory cgroups to bound what userspace can do instead of the module parameter dma_entry_limit. - VFIO P2P is not implemented. The DMABUF patches for vfio are a start at a solution where iommufd would import a special DMABUF. This is to avoid further propogating the follow_pfn() security problem. - A full audit for pedantic compatibility details (eg errnos, etc) has not yet been done - powerpc SPAPR is left out, as it is not connected to the iommu_domain framework. It seems interest in SPAPR is minimal as it is currently non-working in v6.1-rc1. They will have to convert to the iommu subsystem framework to enjoy iommfd. The following are not going to be implemented and we expect to remove them from VFIO type1: - SW access 'dirty tracking'. As discussed in the cover letter this will be done in VFIO. - VFIO_TYPE1_NESTING_IOMMU https://lore.kernel.org/all/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ - VFIO_DMA_MAP_FLAG_VADDR https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/ Link: https://lore.kernel.org/r/15-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 3 +- drivers/iommu/iommufd/iommufd_private.h | 6 + drivers/iommu/iommufd/main.c | 16 +- drivers/iommu/iommufd/vfio_compat.c | 472 ++++++++++++++++++++++++ include/linux/iommufd.h | 7 + include/uapi/linux/iommufd.h | 36 ++ 6 files changed, 534 insertions(+), 6 deletions(-) create mode 100644 drivers/iommu/iommufd/vfio_compat.c diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index ca28a135b967..2fdff04000b3 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -5,6 +5,7 @@ iommufd-y := \ io_pagetable.o \ ioas.o \ main.o \ - pages.o + pages.o \ + vfio_compat.o obj-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 40302cc0da36..8fe5f162ccbc 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -18,6 +18,7 @@ struct iommufd_ctx { struct xarray objects; u8 account_mode; + struct iommufd_ioas *vfio_ioas; }; /* @@ -92,6 +93,9 @@ struct iommufd_ucmd { void *cmd; }; +int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, + unsigned long arg); + /* Copy the response in ucmd->cmd back to userspace. */ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, size_t cmd_len) @@ -222,6 +226,8 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd); int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); +int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); + /* * A HW pagetable is called an iommu_domain inside the kernel. This user object * allows directly creating and inspecting the domains. Domains that have kernel diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 4153f6a20255..5cf69c4d591d 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -133,6 +133,8 @@ bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, return false; } __xa_erase(&ictx->objects, obj->id); + if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj) + ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); up_write(&obj->destroy_rwsem); @@ -271,27 +273,31 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { length), IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), + IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, + __reserved), }; static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct iommufd_ctx *ictx = filp->private_data; const struct iommufd_ioctl_op *op; struct iommufd_ucmd ucmd = {}; union ucmd_buffer buf; unsigned int nr; int ret; - ucmd.ictx = filp->private_data; + nr = _IOC_NR(cmd); + if (nr < IOMMUFD_CMD_BASE || + (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) + return iommufd_vfio_ioctl(ictx, cmd, arg); + + ucmd.ictx = ictx; ucmd.ubuffer = (void __user *)arg; ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); if (ret) return ret; - nr = _IOC_NR(cmd); - if (nr < IOMMUFD_CMD_BASE || - (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) - return -ENOIOCTLCMD; op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; if (op->ioctl_num != cmd) return -ENOIOCTLCMD; diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c new file mode 100644 index 000000000000..3ceca0e8311c --- /dev/null +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iommufd_private.h" + +static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) +{ + struct iommufd_ioas *ioas = ERR_PTR(-ENODEV); + + xa_lock(&ictx->objects); + if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj)) + goto out_unlock; + ioas = ictx->vfio_ioas; +out_unlock: + xa_unlock(&ictx->objects); + return ioas; +} + +/** + * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * @ictx: Context to operate on + * @out_ioas_id: The ioas_id the caller should use + * + * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate + * on since they do not have an IOAS ID input in their ABI. Only attaching a + * group should cause a default creation of the internal ioas, this returns the + * existing ioas if it has already been assigned somehow. + */ +int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas = NULL; + struct iommufd_ioas *out_ioas; + + ioas = iommufd_ioas_alloc(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + xa_lock(&ictx->objects); + if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) + out_ioas = ictx->vfio_ioas; + else { + out_ioas = ioas; + ictx->vfio_ioas = ioas; + } + xa_unlock(&ictx->objects); + + *out_ioas_id = out_ioas->obj.id; + if (out_ioas != ioas) { + iommufd_put_object(&out_ioas->obj); + iommufd_object_abort(ictx, &ioas->obj); + return 0; + } + /* + * An automatically created compat IOAS is treated as a userspace + * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, + * and if not manually destroyed it will be destroyed automatically + * at iommufd release. + */ + iommufd_object_finalize(ictx, &ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); + +int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) +{ + struct iommu_vfio_ioas *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + + if (cmd->__reserved) + return -EOPNOTSUPP; + switch (cmd->op) { + case IOMMU_VFIO_IOAS_GET: + ioas = get_compat_ioas(ucmd->ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + cmd->ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + + case IOMMU_VFIO_IOAS_SET: + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + xa_lock(&ucmd->ictx->objects); + ucmd->ictx->vfio_ioas = ioas; + xa_unlock(&ucmd->ictx->objects); + iommufd_put_object(&ioas->obj); + return 0; + + case IOMMU_VFIO_IOAS_CLEAR: + xa_lock(&ucmd->ictx->objects); + ucmd->ictx->vfio_ioas = NULL; + xa_unlock(&ucmd->ictx->objects); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, + void __user *arg) +{ + u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); + struct vfio_iommu_type1_dma_map map; + int iommu_prot = IOMMU_CACHE; + struct iommufd_ioas *ioas; + unsigned long iova; + int rc; + + if (copy_from_user(&map, arg, minsz)) + return -EFAULT; + + if (map.argsz < minsz || map.flags & ~supported_flags) + return -EINVAL; + + if (map.flags & VFIO_DMA_MAP_FLAG_READ) + iommu_prot |= IOMMU_READ; + if (map.flags & VFIO_DMA_MAP_FLAG_WRITE) + iommu_prot |= IOMMU_WRITE; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + /* + * Maps created through the legacy interface always use VFIO compatible + * rlimit accounting. If the user wishes to use the faster user based + * rlimit accounting then they must use the new interface. + */ + iova = map.iova; + rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), + map.size, iommu_prot, 0); + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, + void __user *arg) +{ + size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); + /* + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new + * dirty tracking direction: + * https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/ + * https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/ + */ + u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL; + struct vfio_iommu_type1_dma_unmap unmap; + unsigned long unmapped = 0; + struct iommufd_ioas *ioas; + int rc; + + if (copy_from_user(&unmap, arg, minsz)) + return -EFAULT; + + if (unmap.argsz < minsz || unmap.flags & ~supported_flags) + return -EINVAL; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) { + if (unmap.iova != 0 || unmap.size != 0) { + rc = -EINVAL; + goto err_put; + } + rc = iopt_unmap_all(&ioas->iopt, &unmapped); + } else { + if (READ_ONCE(ioas->iopt.disable_large_pages)) { + /* + * Create cuts at the start and last of the requested + * range. If the start IOVA is 0 then it doesn't need to + * be cut. + */ + unsigned long iovas[] = { unmap.iova + unmap.size - 1, + unmap.iova - 1 }; + + rc = iopt_cut_iova(&ioas->iopt, iovas, + unmap.iova ? 2 : 1); + if (rc) + goto err_put; + } + rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, + &unmapped); + } + unmap.size = unmapped; + if (copy_to_user(arg, &unmap, minsz)) + rc = -EFAULT; + +err_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas; + int rc = 1; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + mutex_lock(&ioas->mutex); + list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { + if (!hwpt->enforce_cache_coherency) { + rc = 0; + break; + } + } + mutex_unlock(&ioas->mutex); + + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, + unsigned long type) +{ + switch (type) { + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_UNMAP_ALL: + return 1; + + case VFIO_DMA_CC_IOMMU: + return iommufd_vfio_cc_iommu(ictx); + + /* + * This is obsolete, and to be removed from VFIO. It was an incomplete + * idea that got merged. + * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ + */ + case VFIO_TYPE1_NESTING_IOMMU: + return 0; + + /* + * VFIO_DMA_MAP_FLAG_VADDR + * https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/ + * https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/ + * + * It is hard to see how this could be implemented safely. + */ + case VFIO_UPDATE_VADDR: + default: + return 0; + } +} + +static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) +{ + struct iommufd_ioas *ioas = NULL; + int rc = 0; + + if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) + return -EINVAL; + + /* VFIO fails the set_iommu if there is no group */ + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + /* + * The difference between TYPE1 and TYPE1v2 is the ability to unmap in + * the middle of mapped ranges. This is complicated by huge page support + * which creates single large IOPTEs that cannot be split by the iommu + * driver. TYPE1 is very old at this point and likely nothing uses it, + * however it is simple enough to emulate by simply disabling the + * problematic large IOPTEs. Then we can safely unmap within any range. + */ + if (type == VFIO_TYPE1_IOMMU) + rc = iopt_disable_large_pages(&ioas->iopt); + iommufd_put_object(&ioas->obj); + return rc; +} + +static unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas) +{ + struct io_pagetable *iopt = &ioas->iopt; + unsigned long pgsize_bitmap = ULONG_MAX; + struct iommu_domain *domain; + unsigned long index; + + down_read(&iopt->domains_rwsem); + xa_for_each(&iopt->domains, index, domain) + pgsize_bitmap &= domain->pgsize_bitmap; + + /* See vfio_update_pgsize_bitmap() */ + if (pgsize_bitmap & ~PAGE_MASK) { + pgsize_bitmap &= PAGE_MASK; + pgsize_bitmap |= PAGE_SIZE; + } + pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment); + up_read(&iopt->domains_rwsem); + return pgsize_bitmap; +} + +static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail) +{ + struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas = + container_of(cur, + struct vfio_iommu_type1_info_cap_iova_range __user, + header); + struct vfio_iommu_type1_info_cap_iova_range cap_iovas = { + .header = { + .id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, + .version = 1, + }, + }; + struct interval_tree_span_iter span; + + interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, + ULONG_MAX) { + struct vfio_iova_range range; + + if (!span.is_hole) + continue; + range.start = span.start_hole; + range.end = span.last_hole; + if (avail >= struct_size(&cap_iovas, iova_ranges, + cap_iovas.nr_iovas + 1) && + copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas], + &range, sizeof(range))) + return -EFAULT; + cap_iovas.nr_iovas++; + } + if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) && + copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas))) + return -EFAULT; + return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas); +} + +static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail) +{ + struct vfio_iommu_type1_info_dma_avail cap_dma = { + .header = { + .id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL, + .version = 1, + }, + /* + * iommufd's limit is based on the cgroup's memory limit. + * Normally vfio would return U16_MAX here, and provide a module + * parameter to adjust it. Since S390 qemu userspace actually + * pays attention and needs a value bigger than U16_MAX return + * U32_MAX. + */ + .avail = U32_MAX, + }; + + if (avail >= sizeof(cap_dma) && + copy_to_user(cur, &cap_dma, sizeof(cap_dma))) + return -EFAULT; + return sizeof(cap_dma); +} + +static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, + void __user *arg) +{ + typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail); + static const fill_cap_fn fill_fns[] = { + iommufd_fill_cap_dma_avail, + iommufd_fill_cap_iova, + }; + size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + struct vfio_info_cap_header __user *last_cap = NULL; + struct vfio_iommu_type1_info info; + struct iommufd_ioas *ioas; + size_t total_cap_size; + int rc; + int i; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + minsz = min_t(size_t, info.argsz, sizeof(info)); + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + info.flags = VFIO_IOMMU_INFO_PGSIZES; + info.iova_pgsizes = iommufd_get_pagesizes(ioas); + info.cap_offset = 0; + + down_read(&ioas->iopt.iova_rwsem); + total_cap_size = sizeof(info); + for (i = 0; i != ARRAY_SIZE(fill_fns); i++) { + int cap_size; + + if (info.argsz > total_cap_size) + cap_size = fill_fns[i](ioas, arg + total_cap_size, + info.argsz - total_cap_size); + else + cap_size = fill_fns[i](ioas, NULL, 0); + if (cap_size < 0) { + rc = cap_size; + goto out_put; + } + if (last_cap && info.argsz >= total_cap_size && + put_user(total_cap_size, &last_cap->next)) { + rc = -EFAULT; + goto out_put; + } + last_cap = arg + total_cap_size; + total_cap_size += cap_size; + } + + /* + * If the user did not provide enough space then only some caps are + * returned and the argsz will be updated to the correct amount to get + * all caps. + */ + if (info.argsz >= total_cap_size) + info.cap_offset = sizeof(info); + info.argsz = total_cap_size; + info.flags |= VFIO_IOMMU_INFO_CAPS; + if (copy_to_user(arg, &info, minsz)) { + rc = -EFAULT; + goto out_put; + } + rc = 0; + +out_put: + up_read(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, + unsigned long arg) +{ + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_GET_API_VERSION: + return VFIO_API_VERSION; + case VFIO_SET_IOMMU: + return iommufd_vfio_set_iommu(ictx, arg); + case VFIO_CHECK_EXTENSION: + return iommufd_vfio_check_extension(ictx, arg); + case VFIO_IOMMU_GET_INFO: + return iommufd_vfio_iommu_get_info(ictx, uarg); + case VFIO_IOMMU_MAP_DMA: + return iommufd_vfio_map_dma(ictx, cmd, uarg); + case VFIO_IOMMU_UNMAP_DMA: + return iommufd_vfio_unmap_dma(ictx, cmd, uarg); + case VFIO_IOMMU_DIRTY_PAGES: + default: + return -ENOIOCTLCMD; + } + return -ENOIOCTLCMD; +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 46c481a26d79..84af9a239769 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -54,6 +54,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); +int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -84,5 +85,11 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long { return -EOPNOTSUPP; } + +static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, + u32 *out_ioas_id) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD */ #endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 30cc5c5e2b34..98ebba80cfa1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -44,6 +44,7 @@ enum { IOMMUFD_CMD_IOAS_MAP, IOMMUFD_CMD_IOAS_UNMAP, IOMMUFD_CMD_OPTION, + IOMMUFD_CMD_VFIO_IOAS, }; /** @@ -308,4 +309,39 @@ struct iommu_option { __aligned_u64 val64; }; #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +/** + * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls + * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS + * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS + * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility + */ +enum iommufd_vfio_ioas_op { + IOMMU_VFIO_IOAS_GET = 0, + IOMMU_VFIO_IOAS_SET = 1, + IOMMU_VFIO_IOAS_CLEAR = 2, +}; + +/** + * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) + * @size: sizeof(struct iommu_vfio_ioas) + * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set + * For IOMMU_VFIO_IOAS_GET will output the IOAS ID + * @op: One of enum iommufd_vfio_ioas_op + * @__reserved: Must be 0 + * + * The VFIO compatibility support uses a single ioas because VFIO APIs do not + * support the ID field. Set or Get the IOAS that VFIO compatibility will use. + * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the + * compatibility ioas, either by taking what is already set, or auto creating + * one. From then on VFIO will continue to use that ioas and is not effected by + * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. + */ +struct iommu_vfio_ioas { + __u32 size; + __u32 ioas_id; + __u16 op; + __u16 __reserved; +}; +#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) #endif From f4b20bb34c83dceade5470288f48f94ce3598ada Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:39 -0400 Subject: [PATCH 3135/4122] iommufd: Add kernel support for testing iommufd Provide a mock kernel module for the iommu_domain that allows it to run without any HW and the mocking provides a way to directly validate that the PFNs loaded into the iommu_domain are correct. This exposes the access kAPI toward userspace to allow userspace to explore the functionality of pages.c and io_pagetable.c The mock also simulates the rare case of PAGE_SIZE > iommu page size as the mock will operate at a 2K iommu page size. This allows exercising all of the calculations to support this mismatch. This is also intended to support syzkaller exploring the same space. However, it is an unusually invasive config option to enable all of this. The config option should not be enabled in a production kernel. Link: https://lore.kernel.org/r/16-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Matthew Rosato # s390 Tested-by: Eric Auger # aarch64 Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Kconfig | 12 + drivers/iommu/iommufd/Makefile | 2 + drivers/iommu/iommufd/device.c | 38 ++ drivers/iommu/iommufd/ioas.c | 3 + drivers/iommu/iommufd/iommufd_private.h | 35 + drivers/iommu/iommufd/iommufd_test.h | 93 +++ drivers/iommu/iommufd/main.c | 14 + drivers/iommu/iommufd/pages.c | 8 + drivers/iommu/iommufd/selftest.c | 853 ++++++++++++++++++++++++ include/linux/iommufd.h | 3 + 10 files changed, 1061 insertions(+) create mode 100644 drivers/iommu/iommufd/iommufd_test.h create mode 100644 drivers/iommu/iommufd/selftest.c diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 164812084a67..871244f2443f 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -10,3 +10,15 @@ config IOMMUFD it relates to managing IO page tables that point at user space memory. If you don't know what to do here, say N. + +if IOMMUFD +config IOMMUFD_TEST + bool "IOMMU Userspace API Test support" + depends on DEBUG_KERNEL + depends on FAULT_INJECTION + depends on RUNTIME_TESTING_MENU + default n + help + This is dangerous, do not enable unless running + tools/testing/selftests/iommu +endif diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 2fdff04000b3..8aeba81800c5 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -8,4 +8,6 @@ iommufd-y := \ pages.o \ vfio_compat.o +iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o + obj-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 06b6894b7706..67ce36152e8a 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -733,3 +733,41 @@ err_out: return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); + +#ifdef CONFIG_IOMMUFD_TEST +/* + * Creating a real iommufd_device is too hard, bypass creating a iommufd_device + * and go directly to attaching a domain. + */ +struct iommufd_hw_pagetable * +iommufd_device_selftest_attach(struct iommufd_ctx *ictx, + struct iommufd_ioas *ioas, + struct device *mock_dev) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + hwpt = iommufd_hw_pagetable_alloc(ictx, ioas, mock_dev); + if (IS_ERR(hwpt)) + return hwpt; + + rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); + if (rc) + goto out_hwpt; + + refcount_inc(&hwpt->obj.users); + iommufd_object_finalize(ictx, &hwpt->obj); + return hwpt; + +out_hwpt: + iommufd_object_abort_and_destroy(ictx, &hwpt->obj); + return ERR_PTR(rc); +} + +void iommufd_device_selftest_detach(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt) +{ + iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + refcount_dec(&hwpt->obj.users); +} +#endif diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 302779b33bd4..31577e9d434f 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -242,6 +242,9 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) unsigned long iova; int rc; + iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova, + &cmd->flags); + if ((cmd->flags & ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 8fe5f162ccbc..222e86591f8a 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -113,6 +113,9 @@ enum iommufd_object_type { IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif }; /* Base struct for all objects with a userspace ID handle. */ @@ -269,4 +272,36 @@ void iopt_remove_access(struct io_pagetable *iopt, struct iommufd_access *access); void iommufd_access_destroy_object(struct iommufd_object *obj); +#ifdef CONFIG_IOMMUFD_TEST +struct iommufd_hw_pagetable * +iommufd_device_selftest_attach(struct iommufd_ctx *ictx, + struct iommufd_ioas *ioas, + struct device *mock_dev); +void iommufd_device_selftest_detach(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt); +int iommufd_test(struct iommufd_ucmd *ucmd); +void iommufd_selftest_destroy(struct iommufd_object *obj); +extern size_t iommufd_test_memory_limit; +void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, u64 *iova, u32 *flags); +bool iommufd_should_fail(void); +void __init iommufd_test_init(void); +void iommufd_test_exit(void); +#else +static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, + u64 *iova, u32 *flags) +{ +} +static inline bool iommufd_should_fail(void) +{ + return false; +} +static inline void __init iommufd_test_init(void) +{ +} +static inline void iommufd_test_exit(void) +{ +} +#endif #endif diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h new file mode 100644 index 000000000000..1d96a8f466fd --- /dev/null +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _UAPI_IOMMUFD_TEST_H +#define _UAPI_IOMMUFD_TEST_H + +#include +#include + +enum { + IOMMU_TEST_OP_ADD_RESERVED = 1, + IOMMU_TEST_OP_MOCK_DOMAIN, + IOMMU_TEST_OP_MD_CHECK_MAP, + IOMMU_TEST_OP_MD_CHECK_REFS, + IOMMU_TEST_OP_CREATE_ACCESS, + IOMMU_TEST_OP_DESTROY_ACCESS_PAGES, + IOMMU_TEST_OP_ACCESS_PAGES, + IOMMU_TEST_OP_ACCESS_RW, + IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, +}; + +enum { + MOCK_APERTURE_START = 1UL << 24, + MOCK_APERTURE_LAST = (1UL << 31) - 1, +}; + +enum { + MOCK_FLAGS_ACCESS_WRITE = 1 << 0, + MOCK_FLAGS_ACCESS_SYZ = 1 << 16, +}; + +enum { + MOCK_ACCESS_RW_WRITE = 1 << 0, + MOCK_ACCESS_RW_SLOW_PATH = 1 << 2, +}; + +enum { + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0, +}; + +struct iommu_test_cmd { + __u32 size; + __u32 op; + __u32 id; + __u32 __reserved; + union { + struct { + __aligned_u64 start; + __aligned_u64 length; + } add_reserved; + struct { + __u32 out_device_id; + __u32 out_hwpt_id; + } mock_domain; + struct { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + } check_map; + struct { + __aligned_u64 length; + __aligned_u64 uptr; + __u32 refs; + } check_refs; + struct { + __u32 out_access_fd; + __u32 flags; + } create_access; + struct { + __u32 access_pages_id; + } destroy_access_pages; + struct { + __u32 flags; + __u32 out_access_pages_id; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + } access_pages; + struct { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + __u32 flags; + } access_rw; + struct { + __u32 limit; + } memory_limit; + }; + __u32 last; +}; +#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) + +#endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 5cf69c4d591d..7c8f40bc8d98 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -19,6 +19,7 @@ #include #include "iommufd_private.h" +#include "iommufd_test.h" struct iommufd_object_ops { void (*destroy)(struct iommufd_object *obj); @@ -239,6 +240,9 @@ union ucmd_buffer { struct iommu_ioas_iova_ranges iova_ranges; struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; +#ifdef CONFIG_IOMMUFD_TEST + struct iommu_test_cmd test; +#endif }; struct iommufd_ioctl_op { @@ -275,6 +279,9 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { val64), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), +#ifdef CONFIG_IOMMUFD_TEST + IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), +#endif }; static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, @@ -375,6 +382,11 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_HW_PAGETABLE] = { .destroy = iommufd_hw_pagetable_destroy, }, +#ifdef CONFIG_IOMMUFD_TEST + [IOMMUFD_OBJ_SELFTEST] = { + .destroy = iommufd_selftest_destroy, + }, +#endif }; static struct miscdevice iommu_misc_dev = { @@ -392,11 +404,13 @@ static int __init iommufd_init(void) ret = misc_register(&iommu_misc_dev); if (ret) return ret; + iommufd_test_init(); return 0; } static void __exit iommufd_exit(void) { + iommufd_test_exit(); misc_deregister(&iommu_misc_dev); } diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index bafeee9d73e8..640331b8a079 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -56,7 +56,11 @@ #include "io_pagetable.h" #include "double_span.h" +#ifndef CONFIG_IOMMUFD_TEST #define TEMP_MEMORY_LIMIT 65536 +#else +#define TEMP_MEMORY_LIMIT iommufd_test_memory_limit +#endif #define BATCH_BACKUP_SIZE 32 /* @@ -1756,6 +1760,10 @@ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, bool change_mm = current->mm != pages->source_mm; int rc = 0; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH)) + change_mm = true; + if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c new file mode 100644 index 000000000000..cfb5fe9a5e0e --- /dev/null +++ b/drivers/iommu/iommufd/selftest.c @@ -0,0 +1,853 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * Kernel side components to support tools/testing/selftests/iommu + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "iommufd_private.h" +#include "iommufd_test.h" + +static DECLARE_FAULT_ATTR(fail_iommufd); +static struct dentry *dbgfs_root; + +size_t iommufd_test_memory_limit = 65536; + +enum { + MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, + + /* + * Like a real page table alignment requires the low bits of the address + * to be zero. xarray also requires the high bit to be zero, so we store + * the pfns shifted. The upper bits are used for metadata. + */ + MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE, + + _MOCK_PFN_START = MOCK_PFN_MASK + 1, + MOCK_PFN_START_IOVA = _MOCK_PFN_START, + MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, +}; + +/* + * Syzkaller has trouble randomizing the correct iova to use since it is linked + * to the map ioctl's output, and it has no ide about that. So, simplify things. + * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset + * value. This has a much smaller randomization space and syzkaller can hit it. + */ +static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, + u64 *iova) +{ + struct syz_layout { + __u32 nth_area; + __u32 offset; + }; + struct syz_layout *syz = (void *)iova; + unsigned int nth = syz->nth_area; + struct iopt_area *area; + + down_read(&iopt->iova_rwsem); + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + if (nth == 0) { + up_read(&iopt->iova_rwsem); + return iopt_area_iova(area) + syz->offset; + } + nth--; + } + up_read(&iopt->iova_rwsem); + + return 0; +} + +void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, u64 *iova, u32 *flags) +{ + struct iommufd_ioas *ioas; + + if (!(*flags & MOCK_FLAGS_ACCESS_SYZ)) + return; + *flags &= ~(u32)MOCK_FLAGS_ACCESS_SYZ; + + ioas = iommufd_get_ioas(ucmd, ioas_id); + if (IS_ERR(ioas)) + return; + *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); + iommufd_put_object(&ioas->obj); +} + +struct mock_iommu_domain { + struct iommu_domain domain; + struct xarray pfns; +}; + +enum selftest_obj_type { + TYPE_IDEV, +}; + +struct selftest_obj { + struct iommufd_object obj; + enum selftest_obj_type type; + + union { + struct { + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ctx *ictx; + struct device mock_dev; + } idev; + }; +}; + +static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) +{ + struct mock_iommu_domain *mock; + + if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)) + return NULL; + + mock = kzalloc(sizeof(*mock), GFP_KERNEL); + if (!mock) + return NULL; + mock->domain.geometry.aperture_start = MOCK_APERTURE_START; + mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; + mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + xa_init(&mock->pfns); + return &mock->domain; +} + +static void mock_domain_free(struct iommu_domain *domain) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + + WARN_ON(!xa_empty(&mock->pfns)); + kfree(mock); +} + +static int mock_domain_map_pages(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t pgsize, size_t pgcount, int prot, + gfp_t gfp, size_t *mapped) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long flags = MOCK_PFN_START_IOVA; + unsigned long start_iova = iova; + + /* + * xarray does not reliably work with fault injection because it does a + * retry allocation, so put our own failure point. + */ + if (iommufd_should_fail()) + return -ENOENT; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); + for (; pgcount; pgcount--) { + size_t cur; + + for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { + void *old; + + if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) + flags = MOCK_PFN_LAST_IOVA; + old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE, + xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) | + flags), + gfp); + if (xa_is_err(old)) { + for (; start_iova != iova; + start_iova += MOCK_IO_PAGE_SIZE) + xa_erase(&mock->pfns, + start_iova / + MOCK_IO_PAGE_SIZE); + return xa_err(old); + } + WARN_ON(old); + iova += MOCK_IO_PAGE_SIZE; + paddr += MOCK_IO_PAGE_SIZE; + *mapped += MOCK_IO_PAGE_SIZE; + flags = 0; + } + } + return 0; +} + +static size_t mock_domain_unmap_pages(struct iommu_domain *domain, + unsigned long iova, size_t pgsize, + size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + bool first = true; + size_t ret = 0; + void *ent; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); + + for (; pgcount; pgcount--) { + size_t cur; + + for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { + ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + WARN_ON(!ent); + /* + * iommufd generates unmaps that must be a strict + * superset of the map's performend So every starting + * IOVA should have been an iova passed to map, and the + * + * First IOVA must be present and have been a first IOVA + * passed to map_pages + */ + if (first) { + WARN_ON(!(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); + first = false; + } + if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) + WARN_ON(!(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); + + iova += MOCK_IO_PAGE_SIZE; + ret += MOCK_IO_PAGE_SIZE; + } + } + return ret; +} + +static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + void *ent; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + WARN_ON(!ent); + return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE; +} + +static const struct iommu_ops mock_ops = { + .owner = THIS_MODULE, + .pgsize_bitmap = MOCK_IO_PAGE_SIZE, + .domain_alloc = mock_domain_alloc, + .default_domain_ops = + &(struct iommu_domain_ops){ + .free = mock_domain_free, + .map_pages = mock_domain_map_pages, + .unmap_pages = mock_domain_unmap_pages, + .iova_to_phys = mock_domain_iova_to_phys, + }, +}; + +static inline struct iommufd_hw_pagetable * +get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, + struct mock_iommu_domain **mock) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_object *obj; + + obj = iommufd_get_object(ucmd->ictx, mockpt_id, + IOMMUFD_OBJ_HW_PAGETABLE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + hwpt = container_of(obj, struct iommufd_hw_pagetable, obj); + if (hwpt->domain->ops != mock_ops.default_domain_ops) { + iommufd_put_object(&hwpt->obj); + return ERR_PTR(-EINVAL); + } + *mock = container_of(hwpt->domain, struct mock_iommu_domain, domain); + return hwpt; +} + +/* Create an hw_pagetable with the mock domain so we can test the domain ops */ +static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + static struct bus_type mock_bus = { .iommu_ops = &mock_ops }; + struct iommufd_hw_pagetable *hwpt; + struct selftest_obj *sobj; + struct iommufd_ioas *ioas; + int rc; + + ioas = iommufd_get_ioas(ucmd, cmd->id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + sobj = iommufd_object_alloc(ucmd->ictx, sobj, IOMMUFD_OBJ_SELFTEST); + if (IS_ERR(sobj)) { + rc = PTR_ERR(sobj); + goto out_ioas; + } + sobj->idev.ictx = ucmd->ictx; + sobj->type = TYPE_IDEV; + sobj->idev.mock_dev.bus = &mock_bus; + + hwpt = iommufd_device_selftest_attach(ucmd->ictx, ioas, + &sobj->idev.mock_dev); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_sobj; + } + sobj->idev.hwpt = hwpt; + + /* Userspace must destroy both of these IDs to destroy the object */ + cmd->mock_domain.out_hwpt_id = hwpt->obj.id; + cmd->mock_domain.out_device_id = sobj->obj.id; + iommufd_object_finalize(ucmd->ictx, &sobj->obj); + iommufd_put_object(&ioas->obj); + return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_sobj: + iommufd_object_abort(ucmd->ictx, &sobj->obj); +out_ioas: + iommufd_put_object(&ioas->obj); + return rc; +} + +/* Add an additional reserved IOVA to the IOAS */ +static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, + unsigned int mockpt_id, + unsigned long start, size_t length) +{ + struct iommufd_ioas *ioas; + int rc; + + ioas = iommufd_get_ioas(ucmd, mockpt_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + down_write(&ioas->iopt.iova_rwsem); + rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); + up_write(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +/* Check that every pfn under each iova matches the pfn under a user VA */ +static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, + unsigned int mockpt_id, unsigned long iova, + size_t length, void __user *uptr) +{ + struct iommufd_hw_pagetable *hwpt; + struct mock_iommu_domain *mock; + int rc; + + if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE || + (uintptr_t)uptr % MOCK_IO_PAGE_SIZE) + return -EINVAL; + + hwpt = get_md_pagetable(ucmd, mockpt_id, &mock); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + for (; length; length -= MOCK_IO_PAGE_SIZE) { + struct page *pages[1]; + unsigned long pfn; + long npages; + void *ent; + + npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0, + pages); + if (npages < 0) { + rc = npages; + goto out_put; + } + if (WARN_ON(npages != 1)) { + rc = -EFAULT; + goto out_put; + } + pfn = page_to_pfn(pages[0]); + put_page(pages[0]); + + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + if (!ent || + (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE != + pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) { + rc = -EINVAL; + goto out_put; + } + iova += MOCK_IO_PAGE_SIZE; + uptr += MOCK_IO_PAGE_SIZE; + } + rc = 0; + +out_put: + iommufd_put_object(&hwpt->obj); + return rc; +} + +/* Check that the page ref count matches, to look for missing pin/unpins */ +static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd, + void __user *uptr, size_t length, + unsigned int refs) +{ + if (length % PAGE_SIZE || (uintptr_t)uptr % PAGE_SIZE) + return -EINVAL; + + for (; length; length -= PAGE_SIZE) { + struct page *pages[1]; + long npages; + + npages = get_user_pages_fast((uintptr_t)uptr, 1, 0, pages); + if (npages < 0) + return npages; + if (WARN_ON(npages != 1)) + return -EFAULT; + if (!PageCompound(pages[0])) { + unsigned int count; + + count = page_ref_count(pages[0]); + if (count / GUP_PIN_COUNTING_BIAS != refs) { + put_page(pages[0]); + return -EIO; + } + } + put_page(pages[0]); + uptr += PAGE_SIZE; + } + return 0; +} + +struct selftest_access { + struct iommufd_access *access; + struct file *file; + struct mutex lock; + struct list_head items; + unsigned int next_id; + bool destroying; +}; + +struct selftest_access_item { + struct list_head items_elm; + unsigned long iova; + size_t length; + unsigned int id; +}; + +static const struct file_operations iommfd_test_staccess_fops; + +static struct selftest_access *iommufd_access_get(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADFD); + + if (file->f_op != &iommfd_test_staccess_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + return file->private_data; +} + +static void iommufd_test_access_unmap(void *data, unsigned long iova, + unsigned long length) +{ + unsigned long iova_last = iova + length - 1; + struct selftest_access *staccess = data; + struct selftest_access_item *item; + struct selftest_access_item *tmp; + + mutex_lock(&staccess->lock); + list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) { + if (iova > item->iova + item->length - 1 || + iova_last < item->iova) + continue; + list_del(&item->items_elm); + iommufd_access_unpin_pages(staccess->access, item->iova, + item->length); + kfree(item); + } + mutex_unlock(&staccess->lock); +} + +static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd, + unsigned int access_id, + unsigned int item_id) +{ + struct selftest_access_item *item; + struct selftest_access *staccess; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + mutex_lock(&staccess->lock); + list_for_each_entry(item, &staccess->items, items_elm) { + if (item->id == item_id) { + list_del(&item->items_elm); + iommufd_access_unpin_pages(staccess->access, item->iova, + item->length); + mutex_unlock(&staccess->lock); + kfree(item); + fput(staccess->file); + return 0; + } + } + mutex_unlock(&staccess->lock); + fput(staccess->file); + return -ENOENT; +} + +static int iommufd_test_staccess_release(struct inode *inode, + struct file *filep) +{ + struct selftest_access *staccess = filep->private_data; + + if (staccess->access) { + iommufd_test_access_unmap(staccess, 0, ULONG_MAX); + iommufd_access_destroy(staccess->access); + } + mutex_destroy(&staccess->lock); + kfree(staccess); + return 0; +} + +static const struct iommufd_access_ops selftest_access_ops_pin = { + .needs_pin_pages = 1, + .unmap = iommufd_test_access_unmap, +}; + +static const struct iommufd_access_ops selftest_access_ops = { + .unmap = iommufd_test_access_unmap, +}; + +static const struct file_operations iommfd_test_staccess_fops = { + .release = iommufd_test_staccess_release, +}; + +static struct selftest_access *iommufd_test_alloc_access(void) +{ + struct selftest_access *staccess; + struct file *filep; + + staccess = kzalloc(sizeof(*staccess), GFP_KERNEL_ACCOUNT); + if (!staccess) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&staccess->items); + mutex_init(&staccess->lock); + + filep = anon_inode_getfile("[iommufd_test_staccess]", + &iommfd_test_staccess_fops, staccess, + O_RDWR); + if (IS_ERR(filep)) { + kfree(staccess); + return ERR_CAST(filep); + } + staccess->file = filep; + return staccess; +} + +static int iommufd_test_create_access(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, unsigned int flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access *staccess; + struct iommufd_access *access; + int fdno; + int rc; + + if (flags & ~MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) + return -EOPNOTSUPP; + + staccess = iommufd_test_alloc_access(); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + rc = -ENOMEM; + goto out_free_staccess; + } + + access = iommufd_access_create( + ucmd->ictx, ioas_id, + (flags & MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) ? + &selftest_access_ops_pin : + &selftest_access_ops, + staccess); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_put_fdno; + } + cmd->create_access.out_access_fd = fdno; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_destroy; + + staccess->access = access; + fd_install(fdno, staccess->file); + return 0; + +out_destroy: + iommufd_access_destroy(access); +out_put_fdno: + put_unused_fd(fdno); +out_free_staccess: + fput(staccess->file); + return rc; +} + +/* Check that the pages in a page array match the pages in the user VA */ +static int iommufd_test_check_pages(void __user *uptr, struct page **pages, + size_t npages) +{ + for (; npages; npages--) { + struct page *tmp_pages[1]; + long rc; + + rc = get_user_pages_fast((uintptr_t)uptr, 1, 0, tmp_pages); + if (rc < 0) + return rc; + if (WARN_ON(rc != 1)) + return -EFAULT; + put_page(tmp_pages[0]); + if (tmp_pages[0] != *pages) + return -EBADE; + pages++; + uptr += PAGE_SIZE; + } + return 0; +} + +static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, + unsigned int access_id, unsigned long iova, + size_t length, void __user *uptr, + u32 flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access_item *item; + struct selftest_access *staccess; + struct page **pages; + size_t npages; + int rc; + + /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ + if (length > 16*1024*1024) + return -ENOMEM; + + if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ)) + return -EOPNOTSUPP; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + if (staccess->access->ops != &selftest_access_ops_pin) { + rc = -EOPNOTSUPP; + goto out_put; + } + + if (flags & MOCK_FLAGS_ACCESS_SYZ) + iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + &cmd->access_pages.iova); + + npages = (ALIGN(iova + length, PAGE_SIZE) - + ALIGN_DOWN(iova, PAGE_SIZE)) / + PAGE_SIZE; + pages = kvcalloc(npages, sizeof(*pages), GFP_KERNEL_ACCOUNT); + if (!pages) { + rc = -ENOMEM; + goto out_put; + } + + /* + * Drivers will need to think very carefully about this locking. The + * core code can do multiple unmaps instantaneously after + * iommufd_access_pin_pages() and *all* the unmaps must not return until + * the range is unpinned. This simple implementation puts a global lock + * around the pin, which may not suit drivers that want this to be a + * performance path. drivers that get this wrong will trigger WARN_ON + * races and cause EDEADLOCK failures to userspace. + */ + mutex_lock(&staccess->lock); + rc = iommufd_access_pin_pages(staccess->access, iova, length, pages, + flags & MOCK_FLAGS_ACCESS_WRITE); + if (rc) + goto out_unlock; + + /* For syzkaller allow uptr to be NULL to skip this check */ + if (uptr) { + rc = iommufd_test_check_pages( + uptr - (iova - ALIGN_DOWN(iova, PAGE_SIZE)), pages, + npages); + if (rc) + goto out_unaccess; + } + + item = kzalloc(sizeof(*item), GFP_KERNEL_ACCOUNT); + if (!item) { + rc = -ENOMEM; + goto out_unaccess; + } + + item->iova = iova; + item->length = length; + item->id = staccess->next_id++; + list_add_tail(&item->items_elm, &staccess->items); + + cmd->access_pages.out_access_pages_id = item->id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_free_item; + goto out_unlock; + +out_free_item: + list_del(&item->items_elm); + kfree(item); +out_unaccess: + iommufd_access_unpin_pages(staccess->access, iova, length); +out_unlock: + mutex_unlock(&staccess->lock); + kvfree(pages); +out_put: + fput(staccess->file); + return rc; +} + +static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, + unsigned int access_id, unsigned long iova, + size_t length, void __user *ubuf, + unsigned int flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access *staccess; + void *tmp; + int rc; + + /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ + if (length > 16*1024*1024) + return -ENOMEM; + + if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH | + MOCK_FLAGS_ACCESS_SYZ)) + return -EOPNOTSUPP; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + tmp = kvzalloc(length, GFP_KERNEL_ACCOUNT); + if (!tmp) { + rc = -ENOMEM; + goto out_put; + } + + if (flags & MOCK_ACCESS_RW_WRITE) { + if (copy_from_user(tmp, ubuf, length)) { + rc = -EFAULT; + goto out_free; + } + } + + if (flags & MOCK_FLAGS_ACCESS_SYZ) + iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + &cmd->access_rw.iova); + + rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); + if (rc) + goto out_free; + if (!(flags & MOCK_ACCESS_RW_WRITE)) { + if (copy_to_user(ubuf, tmp, length)) { + rc = -EFAULT; + goto out_free; + } + } + +out_free: + kvfree(tmp); +out_put: + fput(staccess->file); + return rc; +} +static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE); +static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH == + __IOMMUFD_ACCESS_RW_SLOW_PATH); + +void iommufd_selftest_destroy(struct iommufd_object *obj) +{ + struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); + + switch (sobj->type) { + case TYPE_IDEV: + iommufd_device_selftest_detach(sobj->idev.ictx, + sobj->idev.hwpt); + break; + } +} + +int iommufd_test(struct iommufd_ucmd *ucmd) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + + switch (cmd->op) { + case IOMMU_TEST_OP_ADD_RESERVED: + return iommufd_test_add_reserved(ucmd, cmd->id, + cmd->add_reserved.start, + cmd->add_reserved.length); + case IOMMU_TEST_OP_MOCK_DOMAIN: + return iommufd_test_mock_domain(ucmd, cmd); + case IOMMU_TEST_OP_MD_CHECK_MAP: + return iommufd_test_md_check_pa( + ucmd, cmd->id, cmd->check_map.iova, + cmd->check_map.length, + u64_to_user_ptr(cmd->check_map.uptr)); + case IOMMU_TEST_OP_MD_CHECK_REFS: + return iommufd_test_md_check_refs( + ucmd, u64_to_user_ptr(cmd->check_refs.uptr), + cmd->check_refs.length, cmd->check_refs.refs); + case IOMMU_TEST_OP_CREATE_ACCESS: + return iommufd_test_create_access(ucmd, cmd->id, + cmd->create_access.flags); + case IOMMU_TEST_OP_ACCESS_PAGES: + return iommufd_test_access_pages( + ucmd, cmd->id, cmd->access_pages.iova, + cmd->access_pages.length, + u64_to_user_ptr(cmd->access_pages.uptr), + cmd->access_pages.flags); + case IOMMU_TEST_OP_ACCESS_RW: + return iommufd_test_access_rw( + ucmd, cmd->id, cmd->access_rw.iova, + cmd->access_rw.length, + u64_to_user_ptr(cmd->access_rw.uptr), + cmd->access_rw.flags); + case IOMMU_TEST_OP_DESTROY_ACCESS_PAGES: + return iommufd_test_access_item_destroy( + ucmd, cmd->id, cmd->destroy_access_pages.access_pages_id); + case IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT: + /* Protect _batch_init(), can not be less than elmsz */ + if (cmd->memory_limit.limit < + sizeof(unsigned long) + sizeof(u32)) + return -EINVAL; + iommufd_test_memory_limit = cmd->memory_limit.limit; + return 0; + default: + return -EOPNOTSUPP; + } +} + +bool iommufd_should_fail(void) +{ + return should_fail(&fail_iommufd, 1); +} + +void __init iommufd_test_init(void) +{ + dbgfs_root = + fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd); +} + +void iommufd_test_exit(void) +{ + debugfs_remove_recursive(dbgfs_root); +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 84af9a239769..650d45629647 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -34,6 +34,9 @@ enum { IOMMUFD_ACCESS_RW_WRITE = 1 << 0, /* Set if the caller is in a kthread then rw will use kthread_use_mm() */ IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1, + + /* Only for use by selftest */ + __IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2, }; struct iommufd_access * From e26eed4f623da70913b535631a29764d108efe98 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:40 -0400 Subject: [PATCH 3136/4122] iommufd: Add some fault injection points This increases the coverage the fail_nth test gets, as well as via syzkaller. Link: https://lore.kernel.org/r/17-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Matthew Rosato # s390 Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/main.c | 3 +++ drivers/iommu/iommufd/pages.c | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 7c8f40bc8d98..bcb463e58100 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -102,6 +102,9 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, { struct iommufd_object *obj; + if (iommufd_should_fail()) + return ERR_PTR(-ENOENT); + xa_lock(&ictx->objects); obj = xa_load(&ictx->objects, id); if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 640331b8a079..c5d2d9a8c562 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -80,6 +80,10 @@ static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) if (*size < backup_len) return backup; + + if (!backup && iommufd_should_fail()) + return NULL; + *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (res) @@ -544,6 +548,7 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index, unsigned long last_index, struct page **pages) { struct page **end_pages = pages + (last_index - start_index) + 1; + struct page **half_pages = pages + (end_pages - pages) / 2; XA_STATE(xas, xa, start_index); do { @@ -551,6 +556,15 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index, xas_lock(&xas); while (pages != end_pages) { + /* xarray does not participate in fault injection */ + if (pages == half_pages && iommufd_should_fail()) { + xas_set_err(&xas, -EINVAL); + xas_unlock(&xas); + /* aka xas_destroy() */ + xas_nomem(&xas, GFP_KERNEL); + goto err_clear; + } + old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); if (xas_error(&xas)) break; @@ -561,6 +575,7 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index, xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); +err_clear: if (xas_error(&xas)) { if (xas.xa_index != start_index) clear_xarray(xa, start_index, xas.xa_index - 1); @@ -728,6 +743,10 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user, npages = min_t(unsigned long, last_index - start_index + 1, user->upages_len / sizeof(*user->upages)); + + if (iommufd_should_fail()) + return -EFAULT; + uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); if (!remote_mm) rc = pin_user_pages_fast(uptr, npages, user->gup_flags, @@ -872,6 +891,8 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, npages = pages->last_npinned - pages->npinned; inc = false; } else { + if (iommufd_should_fail()) + return -ENOMEM; npages = pages->npinned - pages->last_npinned; inc = true; } @@ -1721,6 +1742,11 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, return iopt_pages_rw_slow(pages, index, index, offset, data, length, flags); + if (iommufd_should_fail()) { + rc = -EINVAL; + goto out_mmput; + } + mmap_read_lock(pages->source_mm); rc = pin_user_pages_remote( pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), From 52f528583bb395495f7dd35e6e4d548bccbf8a73 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:41 -0400 Subject: [PATCH 3137/4122] iommufd: Add additional invariant assertions These are on performance paths so we protect them using the CONFIG_IOMMUFD_TEST to not take a hit during normal operation. These are useful when running the test suite and syzkaller to find data structure inconsistencies early. Link: https://lore.kernel.org/r/18-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Yi Liu Tested-by: Matthew Rosato # s390 Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 5 ++++ drivers/iommu/iommufd/io_pagetable.c | 22 +++++++++++++++ drivers/iommu/iommufd/io_pagetable.h | 3 ++ drivers/iommu/iommufd/pages.c | 42 ++++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 67ce36152e8a..dd2a415b603e 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -625,6 +625,11 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, struct iopt_area *area; int rc; + /* Driver's ops don't support pin_pages */ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) + return -EINVAL; + if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 4f4a9d9aac57..3467cea79568 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -251,6 +251,11 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt, (uintptr_t)elm->pages->uptr + elm->start_byte, length); if (rc) goto out_unlock; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { + rc = -EINVAL; + goto out_unlock; + } } else { rc = iopt_check_iova(iopt, *dst_iova, length); if (rc) @@ -277,6 +282,8 @@ out_unlock: static void iopt_abort_area(struct iopt_area *area) { + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(area->pages); if (area->iopt) { down_write(&area->iopt->iova_rwsem); interval_tree_remove(&area->node, &area->iopt->area_itree); @@ -642,6 +649,9 @@ void iopt_destroy_table(struct io_pagetable *iopt) { struct interval_tree_node *node; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + iopt_remove_reserved_iova(iopt, NULL); + while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, ULONG_MAX))) { interval_tree_remove(node, &iopt->allowed_itree); @@ -688,6 +698,8 @@ static void iopt_unfill_domain(struct io_pagetable *iopt, continue; mutex_lock(&pages->mutex); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(!area->storage_domain); if (area->storage_domain == domain) area->storage_domain = storage_domain; mutex_unlock(&pages->mutex); @@ -792,6 +804,16 @@ static int iopt_check_iova_alignment(struct io_pagetable *iopt, (iopt_area_length(area) & align_mask) || (area->page_offset & align_mask)) return -EADDRINUSE; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { + struct iommufd_access *access; + unsigned long index; + + xa_for_each(&iopt->access_list, index, access) + if (WARN_ON(access->iova_alignment > + new_iova_alignment)) + return -EADDRINUSE; + } return 0; } diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 2ee6942c3ef4..83e7c175f2a2 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -101,6 +101,9 @@ static inline size_t iopt_area_length(struct iopt_area *area) static inline unsigned long iopt_area_start_byte(struct iopt_area *area, unsigned long iova) { + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(iova < iopt_area_iova(area) || + iova > iopt_area_last_iova(area)); return (iova - iopt_area_iova(area)) + area->page_offset + iopt_area_index(area) * PAGE_SIZE; } diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index c5d2d9a8c562..429fa3b0a239 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -162,12 +162,20 @@ void interval_tree_double_span_iter_next( static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) { - pages->npinned += npages; + int rc; + + rc = check_add_overflow(pages->npinned, npages, &pages->npinned); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) { - pages->npinned -= npages; + int rc; + + rc = check_sub_overflow(pages->npinned, npages, &pages->npinned); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_err_unpin(struct iopt_pages *pages, @@ -189,6 +197,9 @@ static void iopt_pages_err_unpin(struct iopt_pages *pages, static unsigned long iopt_area_index_to_iova(struct iopt_area *area, unsigned long index) { + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(index < iopt_area_index(area) || + index > iopt_area_last_index(area)); index -= iopt_area_index(area); if (index == 0) return iopt_area_iova(area); @@ -198,6 +209,9 @@ static unsigned long iopt_area_index_to_iova(struct iopt_area *area, static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, unsigned long index) { + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(index < iopt_area_index(area) || + index > iopt_area_last_index(area)); if (index == iopt_area_last_index(area)) return iopt_area_last_iova(area); return iopt_area_iova(area) - area->page_offset + @@ -286,6 +300,8 @@ static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) { if (!batch->total_pfns) return; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(batch->total_pfns != batch->npfns[0]); skip_pfns = min(batch->total_pfns, skip_pfns); batch->pfns[0] += skip_pfns; batch->npfns[0] -= skip_pfns; @@ -301,6 +317,8 @@ static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, batch->pfns = temp_kmalloc(&size, backup, backup_len); if (!batch->pfns) return -ENOMEM; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz)) + return -EINVAL; batch->array_size = size / elmsz; batch->npfns = (u32 *)(batch->pfns + batch->array_size); batch_clear(batch); @@ -429,6 +447,10 @@ static int batch_iommu_map_small(struct iommu_domain *domain, unsigned long start_iova = iova; int rc; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE || + size % PAGE_SIZE); + while (size) { rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); if (rc) @@ -718,6 +740,10 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user, uintptr_t uptr; long rc; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(last_index < start_index)) + return -EINVAL; + if (!user->upages) { /* All undone in pfn_reader_destroy() */ user->upages_len = @@ -956,6 +982,10 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns) struct iopt_area *area; int rc; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(span->last_used < start_index)) + return -EINVAL; + if (span->is_used == 1) { batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, start_index, span->last_used); @@ -1008,6 +1038,10 @@ static int pfn_reader_next(struct pfn_reader *pfns) while (pfns->batch_end_index != pfns->last_index + 1) { unsigned int npfns = pfns->batch.total_pfns; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(interval_tree_double_span_iter_done(&pfns->span))) + return -EINVAL; + rc = pfn_reader_fill_span(pfns); if (rc) return rc; @@ -1091,6 +1125,10 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, { int rc; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(last_index < start_index)) + return -EINVAL; + rc = pfn_reader_init(pfns, pages, start_index, last_index); if (rc) return rc; From 57f0988706fec1b8dbc3fe00965828a47e2235a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:42 -0400 Subject: [PATCH 3138/4122] iommufd: Add a selftest Cover the essential functionality of the iommufd with a directed test from userspace. This aims to achieve reasonable functional coverage using the in-kernel self test framework. A second test does a failure injection sweep of the success paths to study error unwind behaviors. This allows achieving high coverage of the corner cases in pages.c. The selftest requires CONFIG_IOMMUFD_TEST to be enabled, and several huge pages which may require: echo 4 > /proc/sys/vm/nr_hugepages Link: https://lore.kernel.org/r/19-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Matthew Rosato # s390 Tested-by: Yi Liu Tested-by: Eric Auger # aarch64 Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/iommu/.gitignore | 3 + tools/testing/selftests/iommu/Makefile | 12 + tools/testing/selftests/iommu/config | 2 + tools/testing/selftests/iommu/iommufd.c | 1654 +++++++++++++++++ .../selftests/iommu/iommufd_fail_nth.c | 580 ++++++ tools/testing/selftests/iommu/iommufd_utils.h | 278 +++ 7 files changed, 2530 insertions(+) create mode 100644 tools/testing/selftests/iommu/.gitignore create mode 100644 tools/testing/selftests/iommu/Makefile create mode 100644 tools/testing/selftests/iommu/config create mode 100644 tools/testing/selftests/iommu/iommufd.c create mode 100644 tools/testing/selftests/iommu/iommufd_fail_nth.c create mode 100644 tools/testing/selftests/iommu/iommufd_utils.h diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f07aef7c592c..d6680af7b295 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -27,6 +27,7 @@ TARGETS += ftrace TARGETS += futex TARGETS += gpio TARGETS += intel_pstate +TARGETS += iommu TARGETS += ipc TARGETS += ir TARGETS += kcmp diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore new file mode 100644 index 000000000000..7d0703049eba --- /dev/null +++ b/tools/testing/selftests/iommu/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +/iommufd +/iommufd_fail_nth diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile new file mode 100644 index 000000000000..7cb74d26f141 --- /dev/null +++ b/tools/testing/selftests/iommu/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -Wall -O2 -Wno-unused-function +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../include/ + +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS := +TEST_GEN_PROGS += iommufd +TEST_GEN_PROGS += iommufd_fail_nth + +include ../lib.mk diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config new file mode 100644 index 000000000000..6c4f901d6fed --- /dev/null +++ b/tools/testing/selftests/iommu/config @@ -0,0 +1,2 @@ +CONFIG_IOMMUFD +CONFIG_IOMMUFD_TEST diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c new file mode 100644 index 000000000000..8aa8a346cf22 --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd.c @@ -0,0 +1,1654 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include +#include +#include + +#define __EXPORTED_HEADERS__ +#include + +#include "iommufd_utils.h" + +static void *buffer; + +static unsigned long PAGE_SIZE; +static unsigned long HUGEPAGE_SIZE; + +#define MOCK_PAGE_SIZE (PAGE_SIZE / 2) + +static unsigned long get_huge_page_size(void) +{ + char buf[80]; + int ret; + int fd; + + fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + if (fd < 0) + return 2 * 1024 * 1024; + + ret = read(fd, buf, sizeof(buf)); + close(fd); + if (ret <= 0 || ret == sizeof(buf)) + return 2 * 1024 * 1024; + buf[ret] = 0; + return strtoul(buf, NULL, 10); +} + +static __attribute__((constructor)) void setup_sizes(void) +{ + void *vrc; + int rc; + + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); + HUGEPAGE_SIZE = get_huge_page_size(); + + BUFFER_SIZE = PAGE_SIZE * 16; + rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE); + assert(!rc); + assert(buffer); + assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0); + vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(vrc == buffer); +} + +FIXTURE(iommufd) +{ + int fd; +}; + +FIXTURE_SETUP(iommufd) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); +} + +FIXTURE_TEARDOWN(iommufd) +{ + teardown_iommufd(self->fd, _metadata); +} + +TEST_F(iommufd, simple_close) +{ +} + +TEST_F(iommufd, cmd_fail) +{ + struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 }; + + /* object id is invalid */ + EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0)); + /* Bad pointer */ + EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL)); + /* Unknown ioctl */ + EXPECT_ERRNO(ENOTTY, + ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1), + &cmd)); +} + +TEST_F(iommufd, cmd_length) +{ +#define TEST_LENGTH(_struct, _ioctl) \ + { \ + struct { \ + struct _struct cmd; \ + uint8_t extra; \ + } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \ + .extra = UINT8_MAX }; \ + int old_errno; \ + int rc; \ + \ + EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct); \ + rc = ioctl(self->fd, _ioctl, &cmd); \ + old_errno = errno; \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + cmd.extra = 0; \ + if (rc) { \ + EXPECT_ERRNO(old_errno, \ + ioctl(self->fd, _ioctl, &cmd)); \ + } else { \ + ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd)); \ + } \ + } + + TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); + TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); + TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); + TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); + TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP); + TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY); + TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP); + TEST_LENGTH(iommu_option, IOMMU_OPTION); + TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS); +#undef TEST_LENGTH +} + +TEST_F(iommufd, cmd_ex_fail) +{ + struct { + struct iommu_destroy cmd; + __u64 future; + } cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } }; + + /* object id is invalid and command is longer */ + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* future area is non-zero */ + cmd.future = 1; + EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Original command "works" */ + cmd.cmd.size = sizeof(cmd.cmd); + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Short command fails */ + cmd.cmd.size = sizeof(cmd.cmd) - 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd)); +} + +TEST_F(iommufd, global_options) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_GET, + .val64 = 1, + }; + + cmd.option_id = IOMMU_OPTION_RLIMIT_MODE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + /* This requires root */ + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.option_id = IOMMU_OPTION_HUGE_PAGES; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.op = IOMMU_OPTION_OP_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +FIXTURE(iommufd_ioas) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint64_t base_iova; +}; + +FIXTURE_VARIANT(iommufd_ioas) +{ + unsigned int mock_domains; + unsigned int memory_limit; +}; + +FIXTURE_SETUP(iommufd_ioas) +{ + unsigned int i; + + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + if (!variant->memory_limit) { + test_ioctl_set_default_memory_limit(); + } else { + test_ioctl_set_temp_memory_limit(variant->memory_limit); + } + + for (i = 0; i != variant->mock_domains; i++) { + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_id); + self->base_iova = MOCK_APERTURE_START; + } +} + +FIXTURE_TEARDOWN(iommufd_ioas) +{ + test_ioctl_set_default_memory_limit(); + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain) +{ +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain) +{ + .mock_domains = 1, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain) +{ + .mock_domains = 2, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit) +{ + .mock_domains = 1, + .memory_limit = 16, +}; + +TEST_F(iommufd_ioas, ioas_auto_destroy) +{ +} + +TEST_F(iommufd_ioas, ioas_destroy) +{ + if (self->domain_id) { + /* IOAS cannot be freed while a domain is on it */ + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + } else { + /* Can allocate and manually free an IOAS table */ + test_ioctl_destroy(self->ioas_id); + } +} + +TEST_F(iommufd_ioas, ioas_area_destroy) +{ + /* Adding an area does not change ability to destroy */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + if (self->domain_id) + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + else + test_ioctl_destroy(self->ioas_id); +} + +TEST_F(iommufd_ioas, ioas_area_auto_destroy) +{ + int i; + + /* Can allocate and automatically free an IOAS table with many areas */ + for (i = 0; i != 10; i++) { + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + } +} + +TEST_F(iommufd_ioas, area) +{ + int i; + + /* Unmap fails if nothing is mapped */ + for (i = 0; i != 10; i++) + test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE); + + /* Unmap works */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE, + PAGE_SIZE); + + /* Split fails */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE, + PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE, + PAGE_SIZE); + + /* Over map fails */ + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 17 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 15 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3, + self->base_iova + 15 * PAGE_SIZE); + + /* unmap all works */ + test_ioctl_ioas_unmap(0, UINT64_MAX); + + /* Unmap all succeeds on an empty IOAS */ + test_ioctl_ioas_unmap(0, UINT64_MAX); +} + +TEST_F(iommufd_ioas, unmap_fully_contained_areas) +{ + uint64_t unmap_len; + int i; + + /* Give no_domain some space to rewind base_iova */ + self->base_iova += 4 * PAGE_SIZE; + + for (i = 0; i != 4; i++) + test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE, + self->base_iova + i * 16 * PAGE_SIZE); + + /* Unmap not fully contained area doesn't work */ + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, + self->base_iova + 3 * 16 * PAGE_SIZE + + 8 * PAGE_SIZE - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + + /* Unmap fully contained areas works */ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, + self->base_iova - 4 * PAGE_SIZE, + 3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE + + 4 * PAGE_SIZE, + &unmap_len)); + ASSERT_EQ(32 * PAGE_SIZE, unmap_len); +} + +TEST_F(iommufd_ioas, area_auto_iova) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + __u64 iovas[10]; + int i; + + /* Simple 4k pages */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE); + + /* Kernel automatically aligns IOVAs properly */ + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + if (self->domain_id) { + test_ioctl_ioas_map(buffer, length, &iovas[i]); + } else { + test_ioctl_ioas_map((void *)(1UL << 31), length, + &iovas[i]); + } + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Avoids a reserved region */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(false, + iovas[i] > test_cmd.add_reserved.start && + iovas[i] < + test_cmd.add_reserved.start + + test_cmd.add_reserved.length); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Allowed region intersects with a reserved region */ + ranges[0].start = PAGE_SIZE; + ranges[0].last = PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allocate from an allowed region */ + if (self->domain_id) { + ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE; + ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1; + } else { + ranges[0].start = PAGE_SIZE * 200; + ranges[0].last = PAGE_SIZE * 600 - 1; + } + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(true, iovas[i] >= ranges[0].start); + EXPECT_EQ(true, iovas[i] <= ranges[0].last); + EXPECT_EQ(true, iovas[i] + length > ranges[0].start); + EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); +} + +TEST_F(iommufd_ioas, area_allowed) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Reserved intersects an allowed */ + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE; + test_cmd.add_reserved.length = PAGE_SIZE; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allowed intersects a reserved */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); +} + +TEST_F(iommufd_ioas, copy_area) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .src_ioas_id = self->ioas_id, + .length = PAGE_SIZE, + }; + + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + + /* Copy inside a single IOAS */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = self->base_iova + PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + + /* Copy between IOAS's */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = 0; + test_ioctl_ioas_alloc(©_cmd.dst_ioas_id); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); +} + +TEST_F(iommufd_ioas, iova_ranges) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE }, + }; + struct iommu_iova_range *ranges = buffer; + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .ioas_id = self->ioas_id, + .num_iovas = BUFFER_SIZE / sizeof(*ranges), + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Range can be read */ + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + if (!self->domain_id) { + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(SIZE_MAX, ranges[0].last); + EXPECT_EQ(1, ranges_cmd.out_iova_alignment); + } else { + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 0; + EXPECT_ERRNO(EMSGSIZE, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(0, ranges[0].last); + + /* 2 ranges */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + if (!self->domain_id) { + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start); + EXPECT_EQ(SIZE_MAX, ranges[1].last); + } else { + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 1; + if (!self->domain_id) { + EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, + &ranges_cmd)); + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + EXPECT_EQ(0, ranges[1].start); + EXPECT_EQ(0, ranges[1].last); +} + +TEST_F(iommufd_ioas, access_pin) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_test_cmd check_map_cmd = { + .size = sizeof(check_map_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_MAP, + .check_map = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + uint32_t access_pages_id; + unsigned int npages; + + test_cmd_create_access(self->ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) { + uint32_t mock_device_id; + uint32_t mock_hwpt_id; + + access_cmd.access_pages.length = npages * PAGE_SIZE; + + /* Single map/unmap */ + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + /* Double user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + access_pages_id = access_cmd.access_pages.out_access_pages_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access_pages(access_cmd.id, access_pages_id); + + /* Add/remove a domain with a user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &mock_hwpt_id); + check_map_cmd.id = mock_hwpt_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), + &check_map_cmd)); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(mock_hwpt_id); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + } + test_cmd_destroy_access(access_cmd.id); +} + +TEST_F(iommufd_ioas, access_pin_unmap) +{ + struct iommu_test_cmd access_pages_cmd = { + .size = sizeof(access_pages_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + test_cmd_create_access(self->ioas_id, &access_pages_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_pages_cmd)); + + /* Trigger the unmap op */ + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + + /* kernel removed the item for us */ + test_err_destroy_access_pages( + ENOENT, access_pages_cmd.id, + access_pages_cmd.access_pages.out_access_pages_id); +} + +static void check_access_rw(struct __test_metadata *_metadata, int fd, + unsigned int access_id, uint64_t iova, + unsigned int def_flags) +{ + uint16_t tmp[32]; + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = access_id, + .access_rw = { .uptr = (uintptr_t)tmp }, + }; + uint16_t *buffer16 = buffer; + unsigned int i; + void *tmp2; + + for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++) + buffer16[i] = rand(); + + for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50; + access_cmd.access_rw.iova < iova + PAGE_SIZE + 50; + access_cmd.access_rw.iova++) { + for (access_cmd.access_rw.length = 1; + access_cmd.access_rw.length < sizeof(tmp); + access_cmd.access_rw.length++) { + access_cmd.access_rw.flags = def_flags; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + + for (i = 0; i != ARRAY_SIZE(tmp); i++) + tmp[i] = rand(); + access_cmd.access_rw.flags = def_flags | + MOCK_ACCESS_RW_WRITE; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + } + } + + /* Multi-page test */ + tmp2 = malloc(BUFFER_SIZE); + ASSERT_NE(NULL, tmp2); + access_cmd.access_rw.iova = iova; + access_cmd.access_rw.length = BUFFER_SIZE; + access_cmd.access_rw.flags = def_flags; + access_cmd.access_rw.uptr = (uintptr_t)tmp2; + ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length)); + free(tmp2); +} + +TEST_F(iommufd_ioas, access_rw) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + check_access_rw(_metadata, self->fd, access_id, iova, + MOCK_ACCESS_RW_SLOW_PATH); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, access_rw_unaligned) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Unaligned pages */ + iova = self->base_iova + MOCK_PAGE_SIZE; + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_gone) +{ + __u32 access_id; + pid_t child; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + if (self->domain_id) { + /* + * If a domain already existed then everything was pinned within + * the fork, so this copies from one domain to another. + */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, + MOCK_APERTURE_START, 0); + + } else { + /* + * Otherwise we need to actually pin pages which can't happen + * since the fork is gone. + */ + test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL); + } + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_present) +{ + __u32 access_id; + int pipefds[2]; + uint64_t tmp; + pid_t child; + int efd; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC)); + efd = eventfd(0, EFD_CLOEXEC); + ASSERT_NE(-1, efd); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + __u64 iova; + uint64_t one = 1; + + close(pipefds[1]); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + if (write(efd, &one, sizeof(one)) != sizeof(one)) + exit(100); + if (read(pipefds[0], &iova, 1) != 1) + exit(100); + exit(0); + } + close(pipefds[0]); + ASSERT_NE(-1, child); + ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); + + /* Read pages from the remote process */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); + + ASSERT_EQ(0, close(pipefds[1])); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, ioas_option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.val64 = 3; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +TEST_F(iommufd_ioas, ioas_iova_alloc) +{ + unsigned int length; + __u64 iova; + + for (length = 1; length != PAGE_SIZE * 2; length++) { + if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) { + test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova); + } else { + test_ioctl_ioas_map(buffer, length, &iova); + test_ioctl_ioas_unmap(iova, length); + } + } +} + +TEST_F(iommufd_ioas, ioas_align_change) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_SET, + .object_id = self->ioas_id, + /* 0 means everything must be aligned to PAGE_SIZE */ + .val64 = 0, + }; + + /* + * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain + * and map are present. + */ + if (variant->mock_domains) + return; + + /* + * We can upgrade to PAGE_SIZE alignment when things are aligned right + */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Misalignment is rejected at map time */ + test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE, + PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Reduce alignment */ + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Confirm misalignment is rejected during alignment upgrade */ + test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + cmd.val64 = 0; + EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE); + test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE); +} + +TEST_F(iommufd_ioas, copy_sweep) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .src_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = MOCK_PAGE_SIZE, + }; + unsigned int dst_ioas_id; + uint64_t last_iova; + uint64_t iova; + + test_ioctl_ioas_alloc(&dst_ioas_id); + copy_cmd.dst_ioas_id = dst_ioas_id; + + if (variant->mock_domains) + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1; + else + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2; + + test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1, + MOCK_APERTURE_START); + + for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova; + iova += 511) { + copy_cmd.src_iova = iova; + if (iova < MOCK_APERTURE_START || + iova + copy_cmd.length - 1 > last_iova) { + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY, + ©_cmd)); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova, + copy_cmd.length); + } + } + + test_ioctl_destroy(dst_ioas_id); +} + +FIXTURE(iommufd_mock_domain) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint32_t domain_ids[2]; + int mmap_flags; + size_t mmap_buf_size; +}; + +FIXTURE_VARIANT(iommufd_mock_domain) +{ + unsigned int mock_domains; + bool hugepages; +}; + +FIXTURE_SETUP(iommufd_mock_domain) +{ + unsigned int i; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + ASSERT_GE(ARRAY_SIZE(self->domain_ids), variant->mock_domains); + + for (i = 0; i != variant->mock_domains; i++) + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_ids[i]); + self->domain_id = self->domain_ids[0]; + + self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; + self->mmap_buf_size = PAGE_SIZE * 8; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + self->mmap_buf_size = HUGEPAGE_SIZE * 2; + } +} + +FIXTURE_TEARDOWN(iommufd_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) +{ + .mock_domains = 1, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) +{ + .mock_domains = 2, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) +{ + .mock_domains = 1, + .hugepages = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) +{ + .mock_domains = 2, + .hugepages = true, +}; + +/* Have the kernel check that the user pages made it to the iommu_domain */ +#define check_mock_iova(_ptr, _iova, _length) \ + ({ \ + struct iommu_test_cmd check_map_cmd = { \ + .size = sizeof(check_map_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_MAP, \ + .id = self->domain_id, \ + .check_map = { .iova = _iova, \ + .length = _length, \ + .uptr = (uintptr_t)(_ptr) }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + if (self->domain_ids[1]) { \ + check_map_cmd.id = self->domain_ids[1]; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + } \ + }) + +TEST_F(iommufd_mock_domain, basic) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + + /* Simple one page map */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + check_mock_iova(buffer, iova, PAGE_SIZE); + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + + /* EFAULT half way through mapping */ + ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); + + /* EFAULT on first page */ + ASSERT_EQ(0, munmap(buf, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); +} + +TEST_F(iommufd_mock_domain, ro_unshare) +{ + uint8_t *buf; + __u64 iova; + int fd; + + fd = open("/proc/self/exe", O_RDONLY); + ASSERT_NE(-1, fd); + + buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + ASSERT_NE(MAP_FAILED, buf); + close(fd); + + /* + * There have been lots of changes to the "unshare" mechanism in + * get_user_pages(), make sure it works right. The write to the page + * after we map it for reading should not change the assigned PFN. + */ + ASSERT_EQ(0, + _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE, + &iova, IOMMU_IOAS_MAP_READABLE)); + check_mock_iova(buf, iova, PAGE_SIZE); + memset(buf, 1, PAGE_SIZE); + check_mock_iova(buf, iova, PAGE_SIZE); + ASSERT_EQ(0, munmap(buf, PAGE_SIZE)); +} + +TEST_F(iommufd_mock_domain, all_aligns) +{ + size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, all_aligns_copy) +{ + size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + unsigned int old_id; + uint32_t mock_device_id; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + + /* Add and destroy a domain while the area exists */ + old_id = self->domain_ids[1]; + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &self->domain_ids[1]); + + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(self->domain_ids[1]); + self->domain_ids[1] = old_id; + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, user_copy) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + }; + unsigned int ioas_id; + + /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, + ©_cmd.src_iova); + + test_cmd_create_access(ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + access_cmd.access_pages.iova = copy_cmd.src_iova; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + copy_cmd.src_ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + + test_cmd_destroy_access_pages( + access_cmd.id, access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id); + + test_ioctl_destroy(ioas_id); +} + +/* VFIO compatibility IOCTLs */ + +TEST_F(iommufd, simple_ioctls) +{ + ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION)); + ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)); +} + +TEST_F(iommufd, unmap_cmd) +{ + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + }; + + unmap_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.argsz = sizeof(unmap_cmd); + unmap_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.flags = 0; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); +} + +TEST_F(iommufd, map_cmd) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + .vaddr = (__u64)buffer, + }; + + map_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + map_cmd.argsz = sizeof(map_cmd); + map_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + /* Requires a domain to be attached */ + map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); +} + +TEST_F(iommufd, info_cmd) +{ + struct vfio_iommu_type1_info info_cmd = {}; + + /* Invalid argsz */ + info_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); + + info_cmd.argsz = sizeof(info_cmd); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); +} + +TEST_F(iommufd, set_iommu_cmd) +{ + /* Requires a domain to be attached */ + EXPECT_ERRNO(ENODEV, + ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU)); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)); +} + +TEST_F(iommufd, vfio_ioas) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_GET, + }; + __u32 ioas_id; + + /* ENODEV if there is no compat ioas */ + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Invalid id for set */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Valid id for set*/ + test_ioctl_ioas_alloc(&ioas_id); + vfio_ioas_cmd.ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Same id comes back from get */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id); + + /* Clear works */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); +} + +FIXTURE(vfio_compat_mock_domain) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(vfio_compat_mock_domain) +{ + unsigned int version; +}; + +FIXTURE_SETUP(vfio_compat_mock_domain) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_SET, + }; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + /* Create what VFIO would consider a group */ + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + + /* Attach it to the vfio compat */ + vfio_ioas_cmd.ioas_id = self->ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version)); +} + +FIXTURE_TEARDOWN(vfio_compat_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2) +{ + .version = VFIO_TYPE1v2_IOMMU, +}; + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0) +{ + .version = VFIO_TYPE1_IOMMU, +}; + +TEST_F(vfio_compat_mock_domain, simple_close) +{ +} + +TEST_F(vfio_compat_mock_domain, option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, cmd.val64); + } else { + ASSERT_EQ(1, cmd.val64); + } +} + +/* + * Execute an ioctl command stored in buffer and check that the result does not + * overflow memory. + */ +static bool is_filled(const void *buf, uint8_t c, size_t len) +{ + const uint8_t *cbuf = buf; + + for (; len; cbuf++, len--) + if (*cbuf != c) + return false; + return true; +} + +#define ioctl_check_buf(fd, cmd) \ + ({ \ + size_t _cmd_len = *(__u32 *)buffer; \ + \ + memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \ + ASSERT_EQ(0, ioctl(fd, cmd, buffer)); \ + ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA, \ + BUFFER_SIZE - _cmd_len)); \ + }) + +static void check_vfio_info_cap_chain(struct __test_metadata *_metadata, + struct vfio_iommu_type1_info *info_cmd) +{ + const struct vfio_info_cap_header *cap; + + ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap)); + cap = buffer + info_cmd->cap_offset; + while (true) { + size_t cap_size; + + if (cap->next) + cap_size = (buffer + cap->next) - (void *)cap; + else + cap_size = (buffer + info_cmd->argsz) - (void *)cap; + + switch (cap->id) { + case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: { + struct vfio_iommu_type1_info_cap_iova_range *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(1, data->nr_iovas); + EXPECT_EQ(MOCK_APERTURE_START, + data->iova_ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end); + break; + } + case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: { + struct vfio_iommu_type1_info_dma_avail *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(sizeof(*data), cap_size); + break; + } + default: + ASSERT_EQ(false, true); + break; + } + if (!cap->next) + break; + + ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap)); + ASSERT_GE(buffer + cap->next, (void *)cap); + cap = buffer + cap->next; + } +} + +TEST_F(vfio_compat_mock_domain, get_info) +{ + struct vfio_iommu_type1_info *info_cmd = buffer; + unsigned int i; + size_t caplen; + + /* Pre-cap ABI */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = offsetof(struct vfio_iommu_type1_info, cap_offset), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + + /* Read the cap chain size */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = sizeof(*info_cmd), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + ASSERT_EQ(0, info_cmd->cap_offset); + ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz); + + /* Read the caps, kernel should never create a corrupted caps */ + caplen = info_cmd->argsz; + for (i = sizeof(*info_cmd); i < caplen; i++) { + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = i, + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + if (!info_cmd->cap_offset) + continue; + check_vfio_info_cap_chain(_metadata, info_cmd); + } +} + +static void shuffle_array(unsigned long *array, size_t nelms) +{ + unsigned int i; + + /* Shuffle */ + for (i = 0; i != nelms; i++) { + unsigned long tmp = array[i]; + unsigned int other = rand() % (nelms - i); + + array[i] = array[other]; + array[other] = tmp; + } +} + +TEST_F(vfio_compat_mock_domain, map) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = (uintptr_t)buffer, + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE]; + unsigned int i; + + /* Simple map/unmap */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* UNMAP_FLAG_ALL requres 0 iova/size */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.iova = 0; + unmap_cmd.size = 0; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* Small pages */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + map_cmd.iova = pages_iova[i] = + MOCK_APERTURE_START + i * PAGE_SIZE; + map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE; + map_cmd.size = PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + } + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + unmap_cmd.flags = 0; + unmap_cmd.size = PAGE_SIZE; + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + } +} + +TEST_F(vfio_compat_mock_domain, huge_map) +{ + size_t buf_size = HUGEPAGE_SIZE * 2; + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .size = buf_size, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + }; + unsigned long pages_iova[16]; + unsigned int i; + void *buf; + + /* Test huge pages and splitting */ + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + map_cmd.vaddr = (uintptr_t)buf; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) + pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size); + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + /* type1 mode can cut up larger mappings, type1v2 always fails */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } else { + EXPECT_ERRNO(ENOENT, + ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c new file mode 100644 index 000000000000..9713111b820d --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + * + * These tests are "kernel integrity" tests. They are looking for kernel + * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging + * features. It does not attempt to verify that the system calls are doing what + * they are supposed to do. + * + * The basic philosophy is to run a sequence of calls that will succeed and then + * sweep every failure injection point on that call chain to look for + * interesting things in error handling. + * + * This test is best run with: + * echo 1 > /proc/sys/kernel/panic_on_warn + * If something is actually going wrong. + */ +#include +#include + +#define __EXPORTED_HEADERS__ +#include + +#include "iommufd_utils.h" + +static bool have_fault_injection; + +static int writeat(int dfd, const char *fn, const char *val) +{ + size_t val_len = strlen(val); + ssize_t res; + int fd; + + fd = openat(dfd, fn, O_WRONLY); + if (fd == -1) + return -1; + res = write(fd, val, val_len); + assert(res == val_len); + close(fd); + return 0; +} + +static __attribute__((constructor)) void setup_buffer(void) +{ + BUFFER_SIZE = 2*1024*1024; + + buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); +} + +/* + * This sets up fail_injection in a way that is useful for this test. + * It does not attempt to restore things back to how they were. + */ +static __attribute__((constructor)) void setup_fault_injection(void) +{ + DIR *debugfs = opendir("/sys/kernel/debug/"); + struct dirent *dent; + + if (!debugfs) + return; + + /* Allow any allocation call to be fault injected */ + if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N")) + return; + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N"); + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N"); + + while ((dent = readdir(debugfs))) { + char fn[300]; + + if (strncmp(dent->d_name, "fail", 4) != 0) + continue; + + /* We are looking for kernel splats, quiet down the log */ + snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name); + writeat(dirfd(debugfs), fn, "0"); + } + closedir(debugfs); + have_fault_injection = true; +} + +struct fail_nth_state { + int proc_fd; + unsigned int iteration; +}; + +static void fail_nth_first(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + + snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid()); + nth_state->proc_fd = open(buf, O_RDWR); + ASSERT_NE(-1, nth_state->proc_fd); +} + +static bool fail_nth_next(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state, + int test_result) +{ + static const char disable_nth[] = "0"; + char buf[300]; + + /* + * This is just an arbitrary limit based on the current kernel + * situation. Changes in the kernel can dramtically change the number of + * required fault injection sites, so if this hits it doesn't + * necessarily mean a test failure, just that the limit has to be made + * bigger. + */ + ASSERT_GT(400, nth_state->iteration); + if (nth_state->iteration != 0) { + ssize_t res; + ssize_t res2; + + buf[0] = 0; + /* + * Annoyingly disabling the nth can also fail. This means + * the test passed without triggering failure + */ + res = pread(nth_state->proc_fd, buf, sizeof(buf), 0); + if (res == -1 && errno == EFAULT) { + buf[0] = '1'; + buf[1] = '\n'; + res = 2; + } + + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + if (res2 == -1 && errno == EFAULT) { + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + buf[0] = '1'; + buf[1] = '\n'; + } + ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2); + + /* printf(" nth %u result=%d nth=%u\n", nth_state->iteration, + test_result, atoi(buf)); */ + fflush(stdout); + ASSERT_LT(1, res); + if (res != 2 || buf[0] != '0' || buf[1] != '\n') + return false; + } else { + /* printf(" nth %u result=%d\n", nth_state->iteration, + test_result); */ + } + nth_state->iteration++; + return true; +} + +/* + * This is called during the test to start failure injection. It allows the test + * to do some setup that has already been swept and thus reduce the required + * iterations. + */ +void __fail_nth_enable(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + size_t len; + + if (!nth_state->iteration) + return; + + len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration); + ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0)); +} +#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state) + +#define TEST_FAIL_NTH(fixture_name, name) \ + static int test_nth_##name(struct __test_metadata *_metadata, \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + *variant, \ + struct fail_nth_state *_nth_state); \ + TEST_F(fixture_name, name) \ + { \ + struct fail_nth_state nth_state = {}; \ + int test_result = 0; \ + \ + if (!have_fault_injection) \ + SKIP(return, \ + "fault injection is not enabled in the kernel"); \ + fail_nth_first(_metadata, &nth_state); \ + ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \ + &nth_state)); \ + while (fail_nth_next(_metadata, &nth_state, test_result)) { \ + fixture_name##_teardown(_metadata, self, variant); \ + fixture_name##_setup(_metadata, self, variant); \ + test_result = test_nth_##name(_metadata, self, \ + variant, &nth_state); \ + }; \ + ASSERT_EQ(0, test_result); \ + } \ + static int test_nth_##name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \ + *variant, \ + struct fail_nth_state *_nth_state) + +FIXTURE(basic_fail_nth) +{ + int fd; + uint32_t access_id; +}; + +FIXTURE_SETUP(basic_fail_nth) +{ + self->fd = -1; + self->access_id = 0; +} + +FIXTURE_TEARDOWN(basic_fail_nth) +{ + int rc; + + if (self->access_id) { + /* The access FD holds the iommufd open until it closes */ + rc = _test_cmd_destroy_access(self->access_id); + assert(rc == 0); + } + teardown_iommufd(self->fd, _metadata); +} + +/* Cover ioas.c */ +TEST_FAIL_NTH(basic_fail_nth, basic) +{ + struct iommu_iova_range ranges[10]; + uint32_t ioas_id; + __u64 iova; + + fail_nth_enable(); + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + { + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .num_iovas = ARRAY_SIZE(ranges), + .ioas_id = ioas_id, + .allowed_iovas = (uintptr_t)ranges, + }; + if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)) + return -1; + } + + { + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + ranges[0].start = 16*1024; + ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1; + if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)) + return -1; + } + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + { + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE, + .dst_ioas_id = ioas_id, + .src_ioas_id = ioas_id, + .src_iova = iova, + .length = sizeof(ranges), + }; + + if (ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)) + return -1; + } + + if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, + NULL)) + return -1; + /* Failure path of no IOVA to unmap */ + _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL); + return 0; +} + +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_domain) +{ + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, map_two_domains) +{ + uint32_t ioas_id; + __u32 device_id2; + __u32 device_id; + __u32 hwpt_id2; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id2)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id2)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, access_rw) +{ + uint64_t tmp_big[4096]; + uint32_t ioas_id; + uint16_t tmp[32]; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0)) + return -1; + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .length = sizeof(tmp), + .uptr = (uintptr_t)tmp }, + }; + + // READ + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH | + MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .flags = MOCK_ACCESS_RW_SLOW_PATH, + .length = sizeof(tmp_big), + .uptr = (uintptr_t)tmp_big }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* pages.c access functions */ +TEST_FAIL_NTH(basic_fail_nth, access_pin) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* iopt_pages_fill_xarray() */ +TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + return 0; +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h new file mode 100644 index 000000000000..0d1f46369c2a --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#ifndef __SELFTEST_IOMMUFD_UTILS +#define __SELFTEST_IOMMUFD_UTILS + +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" +#include "../../../../drivers/iommu/iommufd/iommufd_test.h" + +/* Hack to make assertions more readable */ +#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD + +static void *buffer; +static unsigned long BUFFER_SIZE; + +/* + * Have the kernel check the refcount on pages. I don't know why a freshly + * mmap'd anon non-compound page starts out with a ref of 3 + */ +#define check_refs(_ptr, _length, _refs) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_REFS, \ + .check_refs = { .length = _length, \ + .uptr = (uintptr_t)(_ptr), \ + .refs = _refs }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \ + &test_cmd)); \ + }) + +static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *device_id, + __u32 *hwpt_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN, + .id = ioas_id, + .mock_domain = {}, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (device_id) + *device_id = cmd.mock_domain.out_device_id; + assert(cmd.id != 0); + if (hwpt_id) + *hwpt_id = cmd.mock_domain.out_hwpt_id; + return 0; +} +#define test_cmd_mock_domain(ioas_id, device_id, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, device_id, \ + hwpt_id)) +#define test_err_mock_domain(_errno, ioas_id, device_id, hwpt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ + device_id, hwpt_id)) + +static int _test_cmd_create_access(int fd, unsigned int ioas_id, + __u32 *access_id, unsigned int flags) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_CREATE_ACCESS, + .id = ioas_id, + .create_access = { .flags = flags }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + *access_id = cmd.create_access.out_access_fd; + return 0; +} +#define test_cmd_create_access(ioas_id, access_id, flags) \ + ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \ + flags)) + +static int _test_cmd_destroy_access(unsigned int access_id) +{ + return close(access_id); +} +#define test_cmd_destroy_access(access_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access(access_id)) + +static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id, + unsigned int access_pages_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES, + .id = access_id, + .destroy_access_pages = { .access_pages_id = access_pages_id }, + }; + return ioctl(fd, IOMMU_TEST_CMD, &cmd); +} +#define test_cmd_destroy_access_pages(access_id, access_pages_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \ + access_pages_id)) +#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \ + EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \ + self->fd, access_id, access_pages_id)) + +static int _test_ioctl_destroy(int fd, unsigned int id) +{ + struct iommu_destroy cmd = { + .size = sizeof(cmd), + .id = id, + }; + return ioctl(fd, IOMMU_DESTROY, &cmd); +} +#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id)) + +static int _test_ioctl_ioas_alloc(int fd, __u32 *id) +{ + struct iommu_ioas_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd); + if (ret) + return ret; + *id = cmd.out_ioas_id; + return 0; +} +#define test_ioctl_ioas_alloc(id) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \ + ASSERT_NE(0, *(id)); \ + }) + +static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer, + size_t length, __u64 *iova, unsigned int flags) +{ + struct iommu_ioas_map cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .user_va = (uintptr_t)buffer, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd); + *iova = cmd.iova; + return ret; +} +#define test_ioctl_ioas_map(buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \ + iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_fixed(buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + ASSERT_EQ(0, _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, + size_t length, uint64_t *out_len) +{ + struct iommu_ioas_unmap cmd = { + .size = sizeof(cmd), + .ioas_id = ioas_id, + .iova = iova, + .length = length, + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd); + if (out_len) + *out_len = cmd.length; + return ret; +} +#define test_ioctl_ioas_unmap(iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \ + length, NULL)) + +#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \ + NULL)) + +#define test_err_ioctl_ioas_unmap(_errno, iova, length) \ + EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ + iova, length, NULL)) + +static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) +{ + struct iommu_test_cmd memlimit_cmd = { + .size = sizeof(memlimit_cmd), + .op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, + .memory_limit = { .limit = limit }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT), + &memlimit_cmd); +} + +#define test_ioctl_set_temp_memory_limit(limit) \ + ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit)) + +#define test_ioctl_set_default_memory_limit() \ + test_ioctl_set_temp_memory_limit(65536) + +static void teardown_iommufd(int fd, struct __test_metadata *_metadata) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_REFS, + .check_refs = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (fd == -1) + return; + + EXPECT_EQ(0, close(fd)); + + fd = open("/dev/iommu", O_RDWR); + EXPECT_NE(-1, fd); + EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), + &test_cmd)); + EXPECT_EQ(0, close(fd)); +} + +#define EXPECT_ERRNO(expected_errno, cmd) \ + ({ \ + ASSERT_EQ(-1, cmd); \ + EXPECT_EQ(expected_errno, errno); \ + }) + +#endif From 5c8c0b3273822cf982c250a9a19e003e4b315edb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Aug 2022 00:17:04 +0000 Subject: [PATCH 3139/4122] KVM: x86: Delete documentation for READ|WRITE in KVM_X86_SET_MSR_FILTER Delete the paragraph that describes the behavior when both KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE are set for a range. There is nothing special about KVM's handling of this combination, whereas explicitly documenting the combination suggests that there is some magic behavior the user needs to be aware of. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220831001706.4075399-2-seanjc@google.com --- Documentation/virt/kvm/api.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 5617bc4f899f..373cf425e85c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4115,13 +4115,6 @@ flags values for ``struct kvm_msr_filter_range``: a write for a particular MSR should be handled regardless of the default filter action. -``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE`` - - Filter both read and write accesses to MSRs using the given bitmap. A 0 - in the bitmap indicates that both reads and writes should immediately fail, - while a 1 indicates that reads and writes for a particular MSR are not - filtered by this range. - flags values for ``struct kvm_msr_filter``: ``KVM_MSR_FILTER_DEFAULT_ALLOW`` From b93d2ec34ef368bb854289db99d8d6ca7f523e25 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Aug 2022 00:17:05 +0000 Subject: [PATCH 3140/4122] KVM: x86: Reword MSR filtering docs to more precisely define behavior Reword the MSR filtering documentatiion to more precisely define the behavior of filtering using common virtualization terminology. - Explicitly document KVM's behavior when an MSR is denied - s/handled/allowed as there is no guarantee KVM will "handle" the MSR access - Drop the "fall back" terminology, which incorrectly suggests that there is existing KVM behavior to fall back to - Fix an off-by-one error in the range (the end is exclusive) - Call out the interaction between MSR filtering and KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER - Delete the redundant paragraph on what '0' and '1' in the bitmap means, it's covered by the sections on KVM_MSR_FILTER_{READ,WRITE} - Delete the clause on x2APIC MSR behavior depending on APIC base, this is covered by stating that KVM follows architectural behavior when emulating/virtualizing MSR accesses Reported-by: Aaron Lewis Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220831001706.4075399-3-seanjc@google.com --- Documentation/virt/kvm/api.rst | 70 +++++++++++++++++----------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 373cf425e85c..a4d07b866dea 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4104,15 +4104,15 @@ flags values for ``struct kvm_msr_filter_range``: ``KVM_MSR_FILTER_READ`` Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap - indicates that a read should immediately fail, while a 1 indicates that - a read for a particular MSR should be handled regardless of the default + indicates that read accesses should be denied, while a 1 indicates that + a read for a particular MSR should be allowed regardless of the default filter action. ``KVM_MSR_FILTER_WRITE`` Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap - indicates that a write should immediately fail, while a 1 indicates that - a write for a particular MSR should be handled regardless of the default + indicates that write accesses should be denied, while a 1 indicates that + a write for a particular MSR should be allowed regardless of the default filter action. flags values for ``struct kvm_msr_filter``: @@ -4120,57 +4120,55 @@ flags values for ``struct kvm_msr_filter``: ``KVM_MSR_FILTER_DEFAULT_ALLOW`` If no filter range matches an MSR index that is getting accessed, KVM will - fall back to allowing access to the MSR. + allow accesses to all MSRs by default. ``KVM_MSR_FILTER_DEFAULT_DENY`` If no filter range matches an MSR index that is getting accessed, KVM will - fall back to rejecting access to the MSR. In this mode, all MSRs that should - be processed by KVM need to explicitly be marked as allowed in the bitmaps. + deny accesses to all MSRs by default. -This ioctl allows user space to define up to 16 bitmaps of MSR ranges to -specify whether a certain MSR access should be explicitly filtered for or not. +This ioctl allows userspace to define up to 16 bitmaps of MSR ranges to deny +guest MSR accesses that would normally be allowed by KVM. If an MSR is not +covered by a specific range, the "default" filtering behavior applies. Each +bitmap range covers MSRs from [base .. base+nmsrs). -If this ioctl has never been invoked, MSR accesses are not guarded and the -default KVM in-kernel emulation behavior is fully preserved. +If an MSR access is denied by userspace, the resulting KVM behavior depends on +whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is +enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace +on denied accesses, i.e. userspace effectively intercepts the MSR access. If +KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest +on denied accesses. + +If an MSR access is allowed by userspace, KVM will emulate and/or virtualize +the access in accordance with the vCPU model. Note, KVM may still ultimately +inject a #GP if an access is allowed by userspace, e.g. if KVM doesn't support +the MSR, or to follow architectural behavior for the MSR. + +By default, KVM operates in KVM_MSR_FILTER_DEFAULT_ALLOW mode with no MSR range +filters. Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes an error. -As soon as the filtering is in place, every MSR access is processed through -the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff); -x2APIC MSRs are always allowed, independent of the ``default_allow`` setting, -and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base -register. - .. warning:: - MSR accesses coming from nested vmentry/vmexit are not filtered. + MSR accesses as part of nested VM-Enter/VM-Exit are not filtered. This includes both writes to individual VMCS fields and reads/writes through the MSR lists pointed to by the VMCS. -If a bit is within one of the defined ranges, read and write accesses are -guarded by the bitmap's value for the MSR index if the kind of access -is included in the ``struct kvm_msr_filter_range`` flags. If no range -cover this particular access, the behavior is determined by the flags -field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW`` -and ``KVM_MSR_FILTER_DEFAULT_DENY``. - -Each bitmap range specifies a range of MSRs to potentially allow access on. -The range goes from MSR index [base .. base+nmsrs]. The flags field -indicates whether reads, writes or both reads and writes are filtered -by setting a 1 bit in the bitmap for the corresponding MSR index. - -If an MSR access is not permitted through the filtering, it generates a -#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that -allows user space to deflect and potentially handle various MSR accesses -into user space. + x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that + cover any x2APIC MSRs). Note, invoking this ioctl while a vCPU is running is inherently racy. However, KVM does guarantee that vCPUs will see either the previous filter or the new filter, e.g. MSRs with identical settings in both the old and new filter will have deterministic behavior. +Similarly, if userspace wishes to intercept on denied accesses, +KVM_MSR_EXIT_REASON_FILTER must be enabled before activating any filters, and +left enabled until after all filters are deactivated. Failure to do so may +result in KVM injecting a #GP instead of exiting to userspace. + 4.98 KVM_CREATE_SPAPR_TCE_64 ---------------------------- @@ -6500,6 +6498,8 @@ wants to write. Once finished processing the event, user space must continue vCPU execution. If the MSR write was unsuccessful, user space also sets the "error" field to "1". +See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering. + :: @@ -7937,7 +7937,7 @@ KVM_EXIT_X86_WRMSR exit notifications. This capability indicates that KVM supports that accesses to user defined MSRs may be rejected. With this capability exposed, KVM exports new VM ioctl KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR -ranges that KVM should reject access to. +ranges that KVM should deny access to. In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as From 1f158147181b83c5ae02273d0b3b9eddaebcc854 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Aug 2022 00:17:06 +0000 Subject: [PATCH 3141/4122] KVM: x86: Clean up KVM_CAP_X86_USER_SPACE_MSR documentation Clean up the KVM_CAP_X86_USER_SPACE_MSR documentation to eliminate misleading and/or inconsistent verbiage, and to actually document what accesses are intercepted by which flags. - s/will/may since not all #GPs are guaranteed to be intercepted - s/deflect/intercept to align with common KVM terminology - s/user space/userspace to align with the majority of KVM docs - Avoid using "trap" terminology, as KVM exits to userspace _before_ stepping, i.e. doesn't exhibit trap-like behavior - Actually document the flags Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220831001706.4075399-4-seanjc@google.com --- Documentation/virt/kvm/api.rst | 40 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a4d07b866dea..c6857f6b25ab 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6473,29 +6473,29 @@ if it decides to decode and emulate the instruction. Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code -will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR +may instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR exit for writes. -The "reason" field specifies why the MSR trap occurred. User space will only -receive MSR exit traps when a particular reason was requested during through +The "reason" field specifies why the MSR interception occurred. Userspace will +only receive MSR exits when a particular reason was requested during through ENABLE_CAP. Currently valid exit reasons are: KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER -For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest -wants to read. To respond to this request with a successful read, user space +For KVM_EXIT_X86_RDMSR, the "index" field tells userspace which MSR the guest +wants to read. To respond to this request with a successful read, userspace writes the respective data into the "data" field and must continue guest execution to ensure the read data is transferred into guest register state. -If the RDMSR request was unsuccessful, user space indicates that with a "1" in +If the RDMSR request was unsuccessful, userspace indicates that with a "1" in the "error" field. This will inject a #GP into the guest when the VCPU is executed again. -For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest -wants to write. Once finished processing the event, user space must continue -vCPU execution. If the MSR write was unsuccessful, user space also sets the +For KVM_EXIT_X86_WRMSR, the "index" field tells userspace which MSR the guest +wants to write. Once finished processing the event, userspace must continue +vCPU execution. If the MSR write was unsuccessful, userspace also sets the "error" field to "1". See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering. @@ -7265,19 +7265,27 @@ the module parameter for the target VM. :Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report :Returns: 0 on success; -1 on error -This capability enables trapping of #GP invoking RDMSR and WRMSR instructions -into user space. +This capability allows userspace to intercept RDMSR and WRMSR instructions if +access to an MSR is denied. By default, KVM injects #GP on denied accesses. When a guest requests to read or write an MSR, KVM may not implement all MSRs that are relevant to a respective system. It also does not differentiate by CPU type. -To allow more fine grained control over MSR handling, user space may enable +To allow more fine grained control over MSR handling, userspace may enable this capability. With it enabled, MSR accesses that match the mask specified in -args[0] and trigger a #GP event inside the guest by KVM will instead trigger -KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space -can then handle to implement model specific MSR handling and/or user notifications -to inform a user that an MSR was not handled. +args[0] and would trigger a #GP inside the guest will instead trigger +KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. Userspace +can then implement model specific MSR handling and/or user notifications +to inform a user that an MSR was not emulated/virtualized by KVM. + +The valid mask flags are: + + KVM_MSR_EXIT_REASON_UNKNOWN - intercept accesses to unknown (to KVM) MSRs + KVM_MSR_EXIT_REASON_INVAL - intercept accesses that are architecturally + invalid according to the vCPU model and/or mode + KVM_MSR_EXIT_REASON_FILTER - intercept accesses that are denied by userspace + via KVM_X86_SET_MSR_FILTER 7.22 KVM_CAP_X86_BUS_LOCK_EXIT ------------------------------- From 4a8fd4a720f8a8dbc370076d26388176c311218a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Aug 2022 00:07:21 +0000 Subject: [PATCH 3142/4122] KVM: nVMX: Reword comments about generating nested CR0/4 read shadows Reword the comments that (attempt to) document nVMX's overrides of the CR0/4 read shadows for L2 after calling vmx_set_cr0/4(). The important behavior that needs to be documented is that KVM needs to override the shadows to account for L1's masks even though the shadows are set by the common helpers (and that setting the shadows first would result in the correct shadows being clobbered). Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20220831000721.4066617-1-seanjc@google.com --- arch/x86/kvm/vmx/nested.c | 9 +++------ arch/x86/kvm/vmx/nested.h | 7 ++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 61c83424285c..b6f4411b613e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2588,12 +2588,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, nested_ept_init_mmu_context(vcpu); /* - * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those - * bits which we consider mandatory enabled. - * The CR0_READ_SHADOW is what L2 should have expected to read given - * the specifications by L1; It's not enough to take - * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we - * have more bits than L1 expected. + * Override the CR0/CR4 read shadows after setting the effective guest + * CR0/CR4. The common helpers also set the shadows, but they don't + * account for vmcs12's cr0/4_guest_host_mask. */ vmx_set_cr0(vcpu, vmcs12->guest_cr0); vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 6312c9541c3c..96952263b029 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -79,9 +79,10 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) } /* - * Return the cr0 value that a nested guest would read. This is a combination - * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by - * its hypervisor (cr0_read_shadow). + * Return the cr0/4 value that a nested guest would read. This is a combination + * of L1's "real" cr0 used to run the guest (guest_cr0), and the bits shadowed + * by the L1 hypervisor (cr0_read_shadow). KVM must emulate CPU behavior as + * the value+mask loaded into vmcs02 may not match the vmcs12 fields. */ static inline unsigned long nested_read_cr0(struct vmcs12 *fields) { From 0b5e7a16a0a79a3742f0df9e45bca46f01b40e6a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 28 Sep 2022 23:20:15 +0000 Subject: [PATCH 3143/4122] KVM: VMX: Make vmread_error_trampoline() uncallable from C code Declare vmread_error_trampoline() as an opaque symbol so that it cannot be called from C code, at least not without some serious fudging. The trampoline always passes parameters on the stack so that the inline VMREAD sequence doesn't need to clobber registers. regparm(0) was originally added to document the stack behavior, but it ended up being confusing because regparm(0) is a nop for 64-bit targets. Opportunustically wrap the trampoline and its declaration in #ifdeffery to make it even harder to invoke incorrectly, to document why it exists, and so that it's not left behind if/when CONFIG_CC_HAS_ASM_GOTO_OUTPUT is true for all supported toolchains. No functional change intended. Cc: Uros Bizjak Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220928232015.745948-1-seanjc@google.com --- arch/x86/kvm/vmx/vmenter.S | 2 ++ arch/x86/kvm/vmx/vmx_ops.h | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4d09..766c6b3ef5ed 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -269,6 +269,7 @@ SYM_FUNC_END(__vmx_vcpu_run) .section .text, "ax" +#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT /** * vmread_error_trampoline - Trampoline from inline asm to vmread_error() * @field: VMCS field encoding that failed @@ -317,6 +318,7 @@ SYM_FUNC_START(vmread_error_trampoline) RET SYM_FUNC_END(vmread_error_trampoline) +#endif SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) /* diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index f6f23c7397dc..842dc898c972 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -11,14 +11,28 @@ #include "../x86.h" void vmread_error(unsigned long field, bool fault); -__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, - bool fault); void vmwrite_error(unsigned long field, unsigned long value); void vmclear_error(struct vmcs *vmcs, u64 phys_addr); void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); +#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +/* + * The VMREAD error trampoline _always_ uses the stack to pass parameters, even + * for 64-bit targets. Preserving all registers allows the VMREAD inline asm + * blob to avoid clobbering GPRs, which in turn allows the compiler to better + * optimize sequences of VMREADs. + * + * Declare the trampoline as an opaque label as it's not safe to call from C + * code; there is no way to tell the compiler to pass params on the stack for + * 64-bit targets. + * + * void vmread_error_trampoline(unsigned long field, bool fault); + */ +extern unsigned long vmread_error_trampoline; +#endif + static __always_inline void vmcs_check16(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, From d2a00af2061db863890e32a4a99a6f82c330df1f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Jun 2022 23:23:51 +0000 Subject: [PATCH 3144/4122] KVM: VMX: Allow userspace to set all supported FEATURE_CONTROL bits Allow userspace to set all supported bits in MSR IA32_FEATURE_CONTROL irrespective of the guest CPUID model, e.g. via KVM_SET_MSRS. KVM's ABI is that userspace is allowed to set MSRs before CPUID, i.e. can set MSRs to values that would fault according to the guest CPUID model. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220607232353.3375324-2-seanjc@google.com --- arch/x86/kvm/vmx/vmx.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3f31c46c306e..7be1fb50a753 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1836,12 +1836,38 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu) return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); } -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, - uint64_t val) -{ - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; +/* + * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of + * guest CPUID. Note, KVM allows userspace to set "VMX in SMX" to maintain + * backwards compatibility even though KVM doesn't support emulating SMX. And + * because userspace set "VMX in SMX", the guest must also be allowed to set it, + * e.g. if the MSR is left unlocked and the guest does a RMW operation. + */ +#define KVM_SUPPORTED_FEATURE_CONTROL (FEAT_CTL_LOCKED | \ + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | \ + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \ + FEAT_CTL_SGX_LC_ENABLED | \ + FEAT_CTL_SGX_ENABLED | \ + FEAT_CTL_LMCE_ENABLED) - return !(val & ~valid_bits); +static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, + struct msr_data *msr) +{ + uint64_t valid_bits; + + /* + * Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are + * exposed to the guest. + */ + WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits & + ~KVM_SUPPORTED_FEATURE_CONTROL); + + if (msr->host_initiated) + valid_bits = KVM_SUPPORTED_FEATURE_CONTROL; + else + valid_bits = vmx->msr_ia32_feature_control_valid_bits; + + return !(msr->data & ~valid_bits); } static int vmx_get_msr_feature(struct kvm_msr_entry *msr) @@ -2240,7 +2266,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.mcg_ext_ctl = data; break; case MSR_IA32_FEAT_CTL: - if (!vmx_feature_control_msr_valid(vcpu, data) || + if (!vmx_feature_control_msr_valid(vmx, msr_info) || (to_vmx(vcpu)->msr_ia32_feature_control & FEAT_CTL_LOCKED && !msr_info->host_initiated)) return 1; From 2d6cd68636d60822219074b7c1d0bfe41321f106 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Jun 2022 23:23:52 +0000 Subject: [PATCH 3145/4122] KVM: VMX: Move MSR_IA32_FEAT_CTL.LOCKED check into "is valid" helper Move the check on IA32_FEATURE_CONTROL being locked, i.e. read-only from the guest, into the helper to check the overall validity of the incoming value. Opportunistically rename the helper to make it clear that it returns a bool. No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220607232353.3375324-3-seanjc@google.com --- arch/x86/kvm/vmx/vmx.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7be1fb50a753..fe5615fd8295 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1850,8 +1850,8 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu) FEAT_CTL_SGX_ENABLED | \ FEAT_CTL_LMCE_ENABLED) -static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, - struct msr_data *msr) +static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, + struct msr_data *msr) { uint64_t valid_bits; @@ -1862,6 +1862,10 @@ static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits & ~KVM_SUPPORTED_FEATURE_CONTROL); + if (!msr->host_initiated && + (vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED)) + return false; + if (msr->host_initiated) valid_bits = KVM_SUPPORTED_FEATURE_CONTROL; else @@ -2266,10 +2270,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.mcg_ext_ctl = data; break; case MSR_IA32_FEAT_CTL: - if (!vmx_feature_control_msr_valid(vmx, msr_info) || - (to_vmx(vcpu)->msr_ia32_feature_control & - FEAT_CTL_LOCKED && !msr_info->host_initiated)) + if (!is_vmx_feature_control_msr_valid(vmx, msr_info)) return 1; + vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) vmx_leave_nested(vcpu); From b80732fdc9b235046687a2999ed198fa55fde901 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Jun 2022 23:23:53 +0000 Subject: [PATCH 3146/4122] KVM: selftests: Verify userspace can stuff IA32_FEATURE_CONTROL at will Verify the KVM allows userspace to set all supported bits in the IA32_FEATURE_CONTROL MSR irrespective of the current guest CPUID, and that all unsupported bits are rejected. Throw the testcase into vmx_msrs_test even though it's not technically a VMX MSR; it's close enough, and the most frequently feature controlled by the MSR is VMX. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20220607232353.3375324-4-seanjc@google.com --- .../selftests/kvm/include/x86_64/processor.h | 2 + .../selftests/kvm/x86_64/vmx_msrs_test.c | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 5d310abe6c3f..ac4590abb44d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -102,6 +102,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) #define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) #define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) +#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2) #define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) #define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) #define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) @@ -115,6 +116,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) +#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) #define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) #define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) #define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c index 322d561b4260..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c @@ -67,6 +67,52 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); } +static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, + uint64_t msr_bit, + struct kvm_x86_cpu_feature feature) +{ + uint64_t val; + + vcpu_clear_cpuid_feature(vcpu, feature); + + val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val); + + if (!kvm_cpu_has(feature)) + return; + + vcpu_set_cpuid_feature(vcpu, feature); +} + +static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) +{ + uint64_t supported_bits = FEAT_CTL_LOCKED | + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | + FEAT_CTL_SGX_LC_ENABLED | + FEAT_CTL_SGX_ENABLED | + FEAT_CTL_LMCE_ENABLED; + int bit, r; + + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE); + + for_each_clear_bit(bit, &supported_bits, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit)); + TEST_ASSERT(r == 0, + "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit); + } +} + int main(void) { struct kvm_vcpu *vcpu; @@ -79,6 +125,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, NULL); vmx_save_restore_msrs_test(vcpu); + ia32_feature_control_msr_test(vcpu); kvm_vm_free(vm); } From 3ebcbd2244f5a69e06e5f655bfbd8127c08201c7 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Wed, 8 Jun 2022 18:35:26 +0000 Subject: [PATCH 3147/4122] KVM: x86: Use current rather than snapshotted TSC frequency if it is constant Don't snapshot tsc_khz into per-cpu cpu_tsc_khz if the host TSC is constant, in which case the actual TSC frequency will never change and thus capturing TSC during initialization is unnecessary, KVM can simply use tsc_khz. This value is snapshotted from kvm_timer_init->kvmclock_cpu_online->tsc_khz_changed(NULL) On CPUs with constant TSC, but not a hardware-specified TSC frequency, snapshotting cpu_tsc_khz and using that to set a VM's target TSC frequency can lead to VM to think its TSC frequency is not what it actually is if refining the TSC completes after KVM snapshots tsc_khz. The actual frequency never changes, only the kernel's calculation of what that frequency is changes. Ideally, KVM would not be able to race with TSC refinement, or would have a hook into tsc_refine_calibration_work() to get an alert when refinement is complete. Avoiding the race altogether isn't practical as refinement takes a relative eternity; it's deliberately put on a work queue outside of the normal boot sequence to avoid unnecessarily delaying boot. Adding a hook is doable, but somewhat gross due to KVM's ability to be built as a module. And if the TSC is constant, which is likely the case for every VMX/SVM-capable CPU produced in the last decade, the race can be hit if and only if userspace is able to create a VM before TSC refinement completes; refinement is slow, but not that slow. For now, punt on a proper fix, as not taking a snapshot can help some uses cases and not taking a snapshot is arguably correct irrespective of the race with refinement. Signed-off-by: Anton Romanov Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20220608183525.1143682-1-romanton@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef12747ecb63..152ea4993b76 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2974,6 +2974,22 @@ static void kvm_update_masterclock(struct kvm *kvm) kvm_end_pvclock_update(kvm); } +/* + * Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's + * per-CPU value (which may be zero if a CPU is going offline). Note, tsc_khz + * can change during boot even if the TSC is constant, as it's possible for KVM + * to be loaded before TSC calibration completes. Ideally, KVM would get a + * notification when calibration completes, but practically speaking calibration + * will complete before userspace is alive enough to create VMs. + */ +static unsigned long get_cpu_tsc_khz(void) +{ + if (static_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + /* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) { @@ -2984,7 +3000,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) get_cpu(); data->flags = 0; - if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) { + if (ka->use_master_clock && + (static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) { #ifdef CONFIG_X86_64 struct timespec64 ts; @@ -2998,7 +3015,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) data->flags |= KVM_CLOCK_TSC_STABLE; hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL, &hv_clock.tsc_shift, &hv_clock.tsc_to_system_mul); data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc); @@ -3108,7 +3125,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); + tgt_tsc_khz = get_cpu_tsc_khz(); if (unlikely(tgt_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -9038,9 +9055,11 @@ static void tsc_khz_changed(void *data) struct cpufreq_freqs *freq = data; unsigned long khz = 0; + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)); + if (data) khz = freq->new; - else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + else khz = cpufreq_quick_get(raw_smp_processor_id()); if (!khz) khz = tsc_khz; @@ -9061,8 +9080,10 @@ static void kvm_hyperv_tsc_notifier(void) hyperv_stop_tsc_emulation(); /* TSC frequency always matches when on Hyper-V */ - for_each_present_cpu(cpu) - per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + for_each_present_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + } kvm_caps.max_guest_tsc_khz = tsc_khz; list_for_each_entry(kvm, &vm_list, vm_list) { @@ -9199,10 +9220,10 @@ static void kvm_timer_init(void) } cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - } - cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", - kvmclock_cpu_online, kvmclock_cpu_down_prep); + cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", + kvmclock_cpu_online, kvmclock_cpu_down_prep); + } } #ifdef CONFIG_X86_64 @@ -9362,10 +9383,11 @@ void kvm_arch_exit(void) #endif kvm_lapic_exit(); - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); + } #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); irq_work_sync(&pvclock_irq_work); From 10aa7cd398a9ead7464a7f8b49d4e4c843806813 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 30 Nov 2022 17:44:37 +0800 Subject: [PATCH 3148/4122] IB/hfi1: Switch to netif_napi_add() There is no need to use netif_napi_add_weight() when the weight argument is 64. See "net: drop the weight argument from netif_napi_add". Signed-off-by: Yang Yang Link: https://lore.kernel.org/r/202211301744378304494@zte.com.cn Reviewed-by: xu xin Reviewed-by: Zhang Yunkai Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/netdev_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 3dfa5aff2512..720d4c85c9c9 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx) * right now. */ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state); - netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); + netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi); rc = msix_netdev_request_rcd_irq(rxq->rcd); if (rc) goto bail_context_irq_failure; From 32975c491ee410598b33201344c123fcc81a7c33 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Nov 2022 17:39:18 +0100 Subject: [PATCH 3149/4122] uapi: Add missing _UAPI prefix to include guard Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/asm-generic/types.h b/include/uapi/asm-generic/types.h index dfaa50d99d8f..7ad4dd01b8bf 100644 --- a/include/uapi/asm-generic/types.h +++ b/include/uapi/asm-generic/types.h @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_GENERIC_TYPES_H -#define _ASM_GENERIC_TYPES_H +#ifndef _UAPI_ASM_GENERIC_TYPES_H +#define _UAPI_ASM_GENERIC_TYPES_H /* * int-ll64 is used everywhere now. */ #include -#endif /* _ASM_GENERIC_TYPES_H */ +#endif /* _UAPI_ASM_GENERIC_TYPES_H */ From 38931d8989b5760b0bd17c9ec99e81986258e4cb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Sep 2022 13:08:16 -0700 Subject: [PATCH 3150/4122] mm: Make ksize() a reporting-only function With all "silently resizing" callers of ksize() refactored, remove the logic in ksize() that would allow it to be used to effectively change the size of an allocation (bypassing __alloc_size hints, etc). Users wanting this feature need to either use kmalloc_size_roundup() before an allocation, or use krealloc() directly. For kfree_sensitive(), move the unpoisoning logic inline. Replace the some of the partially open-coded ksize() in __do_krealloc with ksize() now that it doesn't perform unpoisoning. Adjust the KUnit tests to match the new ksize() behavior. Execution tested with: $ ./tools/testing/kunit/kunit.py run \ --kconfig_add CONFIG_KASAN=y \ --kconfig_add CONFIG_KASAN_GENERIC=y \ --arch x86_64 kasan Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Andrew Morton Cc: Roman Gushchin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: linux-mm@kvack.org Cc: kasan-dev@googlegroups.com Acked-by: Vlastimil Babka Acked-by: David Rientjes Enhanced-by: Andrey Konovalov Signed-off-by: Kees Cook --- mm/kasan/kasan_test.c | 19 +++++++++++++------ mm/slab_common.c | 26 ++++++++++---------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0d59098f0876..73684642c42d 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -783,23 +783,30 @@ static void kasan_global_oob_left(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); } -/* Check that ksize() makes the whole object accessible. */ +/* Check that ksize() does NOT unpoison whole object. */ static void ksize_unpoisons_memory(struct kunit *test) { char *ptr; - size_t size = 123, real_size; + size_t size = 128 - KASAN_GRANULE_SIZE - 5; + size_t real_size; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + real_size = ksize(ptr); + KUNIT_EXPECT_GT(test, real_size, size); OPTIMIZER_HIDE_VAR(ptr); - /* This access shouldn't trigger a KASAN report. */ - ptr[size] = 'x'; + /* These accesses shouldn't trigger a KASAN report. */ + ptr[0] = 'x'; + ptr[size - 1] = 'x'; - /* This one must. */ - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]); + /* These must trigger a KASAN report. */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size + 5]); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size - 1]); kfree(ptr); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 33b1886b06eb..7e96abf1bd7d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1333,11 +1333,11 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) void *ret; size_t ks; - /* Don't use instrumented ksize to allow precise KASAN poisoning. */ + /* Check for double-free before calling ksize. */ if (likely(!ZERO_OR_NULL_PTR(p))) { if (!kasan_check_byte(p)) return NULL; - ks = kfence_ksize(p) ?: __ksize(p); + ks = ksize(p); } else ks = 0; @@ -1405,8 +1405,10 @@ void kfree_sensitive(const void *p) void *mem = (void *)p; ks = ksize(mem); - if (ks) + if (ks) { + kasan_unpoison_range(mem, ks); memzero_explicit(mem, ks); + } kfree(mem); } EXPORT_SYMBOL(kfree_sensitive); @@ -1427,13 +1429,11 @@ EXPORT_SYMBOL(kfree_sensitive); */ size_t ksize(const void *objp) { - size_t size; - /* - * We need to first check that the pointer to the object is valid, and - * only then unpoison the memory. The report printed from ksize() is - * more useful, then when it's printed later when the behaviour could - * be undefined due to a potential use-after-free or double-free. + * We need to first check that the pointer to the object is valid. + * The KASAN report printed from ksize() is more useful, then when + * it's printed later when the behaviour could be undefined due to + * a potential use-after-free or double-free. * * We use kasan_check_byte(), which is supported for the hardware * tag-based KASAN mode, unlike kasan_check_read/write(). @@ -1447,13 +1447,7 @@ size_t ksize(const void *objp) if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp)) return 0; - size = kfence_ksize(objp) ?: __ksize(objp); - /* - * We assume that ksize callers could use whole allocated area, - * so we need to unpoison this area. - */ - kasan_unpoison_range(objp, size); - return size; + return kfence_ksize(objp) ?: __ksize(objp); } EXPORT_SYMBOL(ksize); From 25226df4b9be7f6d5d722af5b75e86e76e5c3a80 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 21 Sep 2022 13:46:03 -0500 Subject: [PATCH 3151/4122] mm/pgtable: Fix multiple -Wstringop-overflow warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual size of the following arrays at run-time depends on CONFIG_X86_PAE. 427 pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; 428 pmd_t *pmds[MAX_PREALLOCATED_PMDS]; If CONFIG_X86_PAE is not enabled, their final size will be zero (which is technically not a legal storage size in C, but remains "valid" via the GNU extension). In that case, the compiler complains about trying to access objects of size zero when calling functions where these objects are passed as arguments. Fix this by sanity-checking the size of those arrays just before the function calls. Also, the following warnings are fixed by these changes when building with GCC 11+ and -Wstringop-overflow enabled: arch/x86/mm/pgtable.c:437:13: warning: ‘preallocate_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] arch/x86/mm/pgtable.c:440:13: warning: ‘preallocate_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] arch/x86/mm/pgtable.c:462:9: warning: ‘free_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] arch/x86/mm/pgtable.c:455:9: warning: ‘pgd_prepopulate_user_pmd’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] arch/x86/mm/pgtable.c:464:9: warning: ‘free_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] This is one of the last cases in the ongoing effort to globally enable -Wstringop-overflow. The alternative to this is to make the originally suggested change: make the pmds argument from an array pointer to a pointer pointer. That situation is considered "legal" for C in the sense that it does not have a way to reason about the storage. i.e.: -static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) +static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmds) With the above change, there's no difference in binary output, and the compiler warning is silenced. However, with this patch, the compiler can actually figure out that it isn't using the code at all, and it gets dropped: text data bss dec hex filename 8218 718 32 8968 2308 arch/x86/mm/pgtable.o.before 7765 694 32 8491 212b arch/x86/mm/pgtable.o.after So this case (fixing a warning and reducing image size) is a clear win. Additionally drops an old work-around for GCC in the same code. Link: https://github.com/KSPP/linux/issues/203 Link: https://github.com/KSPP/linux/issues/181 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/Yytb67xvrnctxnEe@work --- arch/x86/mm/pgtable.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8525f2876fb4..e4f499eb0f29 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -299,9 +299,6 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) pud_t *pud; int i; - if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ - return; - p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); @@ -434,10 +431,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) + if (sizeof(pmds) != 0 && + preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) goto out_free_pgd; - if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) + if (sizeof(u_pmds) != 0 && + preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) goto out_free_pmds; if (paravirt_pgd_alloc(mm) != 0) @@ -451,17 +450,22 @@ pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock(&pgd_lock); pgd_ctor(mm, pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); - pgd_prepopulate_user_pmd(mm, pgd, u_pmds); + if (sizeof(pmds) != 0) + pgd_prepopulate_pmd(mm, pgd, pmds); + + if (sizeof(u_pmds) != 0) + pgd_prepopulate_user_pmd(mm, pgd, u_pmds); spin_unlock(&pgd_lock); return pgd; out_free_user_pmds: - free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); + if (sizeof(u_pmds) != 0) + free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); out_free_pmds: - free_pmds(mm, pmds, PREALLOCATED_PMDS); + if (sizeof(pmds) != 0) + free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: _pgd_free(pgd); out: From 9360d035a579d95d1e76c471061b9065b18a0eb1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:21 -0800 Subject: [PATCH 3152/4122] panic: Separate sysctl logic from CONFIG_SMP In preparation for adding more sysctls directly in kernel/panic.c, split CONFIG_SMP from the logic that adds sysctls. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-1-keescook@chromium.org --- kernel/panic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index da323209f583..d843d036651e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -75,8 +75,9 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); -#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +#ifdef CONFIG_SYSCTL static struct ctl_table kern_panic_table[] = { +#ifdef CONFIG_SMP { .procname = "oops_all_cpu_backtrace", .data = &sysctl_oops_all_cpu_backtrace, @@ -86,6 +87,7 @@ static struct ctl_table kern_panic_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif { } }; From d4ccd54d28d3c8598e2354acc13e28c060961dbb Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 17 Nov 2022 15:43:22 -0800 Subject: [PATCH 3153/4122] exit: Put an upper limit on how often we can oops Many Linux systems are configured to not panic on oops; but allowing an attacker to oops the system **really** often can make even bugs that look completely unexploitable exploitable (like NULL dereferences and such) if each crash elevates a refcount by one or a lock is taken in read mode, and this causes a counter to eventually overflow. The most interesting counters for this are 32 bits wide (like open-coded refcounts that don't use refcount_t). (The ldsem reader count on 32-bit platforms is just 16 bits, but probably nobody cares about 32-bit platforms that much nowadays.) So let's panic the system if the kernel is constantly oopsing. The speed of oopsing 2^32 times probably depends on several factors, like how long the stack trace is and which unwinder you're using; an empirically important one is whether your console is showing a graphical environment or a text console that oopses will be printed to. In a quick single-threaded benchmark, it looks like oopsing in a vfork() child with a very short stack trace only takes ~510 microseconds per run when a graphical console is active; but switching to a text console that oopses are printed to slows it down around 87x, to ~45 milliseconds per run. (Adding more threads makes this faster, but the actual oops printing happens under &die_lock on x86, so you can maybe speed this up by a factor of around 2 and then any further improvement gets eaten up by lock contention.) It looks like it would take around 8-12 days to overflow a 32-bit counter with repeated oopsing on a multi-core X86 system running a graphical environment; both me (in an X86 VM) and Seth (with a distro kernel on normal hardware in a standard configuration) got numbers in that ballpark. 12 days aren't *that* short on a desktop system, and you'd likely need much longer on a typical server system (assuming that people don't run graphical desktop environments on their servers), and this is a *very* noisy and violent approach to exploiting the kernel; and it also seems to take orders of magnitude longer on some machines, probably because stuff like EFI pstore will slow it down a ton if that's active. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org --- Documentation/admin-guide/sysctl/kernel.rst | 8 ++++ kernel/exit.c | 42 +++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 98d1b198b2b4..09f3fb2f8585 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -667,6 +667,14 @@ This is the default behavior. an oops event is detected. +oops_limit +========== + +Number of kernel oopses after which the kernel should panic when +``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect +as setting ``panic_on_oops=1``. + + osrelease, ostype & version =========================== diff --git a/kernel/exit.c b/kernel/exit.c index 35e0a31a0315..2ab3ead62118 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,6 +72,33 @@ #include #include +/* + * The default value should be high enough to not crash a system that randomly + * crashes its kernel from time to time, but low enough to at least not permit + * overflowing 32-bit refcounts or the ldsem writer count. + */ +static unsigned int oops_limit = 10000; + +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_exit_table[] = { + { + .procname = "oops_limit", + .data = &oops_limit, + .maxlen = sizeof(oops_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_exit_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_exit_table); + return 0; +} +late_initcall(kernel_exit_sysctls_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -874,6 +901,8 @@ void __noreturn do_exit(long code) void __noreturn make_task_dead(int signr) { + static atomic_t oops_count = ATOMIC_INIT(0); + /* * Take the task off the cpu after something catastrophic has * happened. @@ -897,6 +926,19 @@ void __noreturn make_task_dead(int signr) preempt_count_set(PREEMPT_ENABLED); } + /* + * Every time the system oopses, if the oops happens while a reference + * to an object was held, the reference leaks. + * If the oops doesn't also leak memory, repeated oopsing can cause + * reference counters to wrap around (if they're not using refcount_t). + * This means that repeated oopsing can make unexploitable-looking bugs + * exploitable through repeated oopsing. + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + /* * We're taking recursive faults here in make_task_dead. Safest is to just * leave this task alone and wait for reboot. From 9db89b41117024f80b38b15954017fb293133364 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:23 -0800 Subject: [PATCH 3154/4122] exit: Expose "oops_count" to sysfs Since Oops count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/oops_count to expose it to userspace. Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org --- .../ABI/testing/sysfs-kernel-oops_count | 6 +++++ MAINTAINERS | 1 + kernel/exit.c | 22 +++++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count new file mode 100644 index 000000000000..156cca9dbc96 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-oops_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Oopsed since last boot. diff --git a/MAINTAINERS b/MAINTAINERS index 1cd80c113721..0a1e95a58e54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11106,6 +11106,7 @@ M: Kees Cook L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening +F: Documentation/ABI/testing/sysfs-kernel-oops_count F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: mm/usercopy.c diff --git a/kernel/exit.c b/kernel/exit.c index 2ab3ead62118..dc1a32149f94 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,25 @@ static __init int kernel_exit_sysctls_init(void) late_initcall(kernel_exit_sysctls_init); #endif +static atomic_t oops_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); +} + +static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); + +static __init int kernel_exit_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_exit_sysfs_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -901,8 +921,6 @@ void __noreturn do_exit(long code) void __noreturn make_task_dead(int signr) { - static atomic_t oops_count = ATOMIC_INIT(0); - /* * Take the task off the cpu after something catastrophic has * happened. From 9d720a5a658f5135861773f26e927449bef93d61 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Nov 2022 09:25:51 -0800 Subject: [PATCH 3155/4122] xfs: hoist refcount record merge predicates Hoist these multiline conditionals into separate static inline helpers to improve readability and set the stage for corruption fixes that will be introduced in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Xiao Yang --- fs/xfs/libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 16 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 3f34bafe18dd..4408893333a6 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -815,11 +815,119 @@ out_error: /* Is this extent valid? */ static inline bool xfs_refc_valid( - struct xfs_refcount_irec *rc) + const struct xfs_refcount_irec *rc) { return rc->rc_startblock != NULLAGBLOCK; } +static inline bool +xfs_refc_want_merge_center( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + bool cleft_is_cright, + enum xfs_refc_adjust_op adjust, + unsigned long long *ulenp) +{ + unsigned long long ulen = left->rc_blockcount; + + /* + * To merge with a center record, both shoulder records must be + * adjacent to the record we want to adjust. This is only true if + * find_left and find_right made all four records valid. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || + !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) + return false; + + /* There must only be one record for the entire range. */ + if (!cleft_is_cright) + return false; + + /* The shoulder record refcounts must match the new refcount. */ + if (left->rc_refcount != cleft->rc_refcount + adjust) + return false; + if (right->rc_refcount != cleft->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount + right->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + *ulenp = ulen; + return true; +} + +static inline bool +xfs_refc_want_merge_left( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = left->rc_blockcount; + + /* + * For a left merge, the left shoulder record must be adjacent to the + * start of the range. If this is true, find_left made left and cleft + * contain valid contents. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) + return false; + + /* Left shoulder record refcount must match the new refcount. */ + if (left->rc_refcount != cleft->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + +static inline bool +xfs_refc_want_merge_right( + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = right->rc_blockcount; + + /* + * For a right merge, the right shoulder record must be adjacent to the + * end of the range. If this is true, find_right made cright and right + * contain valid contents. + */ + if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) + return false; + + /* Right shoulder record refcount must match the new refcount. */ + if (right->rc_refcount != cright->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cright->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + /* * Try to merge with any extents on the boundaries of the adjustment range. */ @@ -861,23 +969,15 @@ xfs_refcount_merge_extents( (cleft.rc_blockcount == cright.rc_blockcount); /* Try to merge left, cleft, and right. cleft must == cright. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + - right.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && - xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && - left.rc_refcount == cleft.rc_refcount + adjust && - right.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, + adjust, &ulen)) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, &right, ulen, aglen); } /* Try to merge left and cleft. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && - left.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { *shape_changed = true; error = xfs_refcount_merge_left_extent(cur, &left, &cleft, agbno, aglen); @@ -893,10 +993,7 @@ xfs_refcount_merge_extents( } /* Try to merge cright and right. */ - ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; - if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && - right.rc_refcount == cright.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_right(&cright, &right, adjust)) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, aglen); From b25d1984aa884fc91a73a5a407b9ac976d441e9b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Nov 2022 09:25:51 -0800 Subject: [PATCH 3156/4122] xfs: estimate post-merge refcounts correctly Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount metadata corruptions after being run. Specifically, xfs_repair noticed single-block refcount records that could be combined but had not been. The root cause of this is improper MAXREFCOUNT edge case handling in xfs_refcount_merge_extents. When we're trying to find candidates for a refcount btree record merge, we compute the refcount attribute of the merged record, but we fail to account for the fact that once a record hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence the computed refcount is wrong, and we fail to merge the extents. Fix this by adjusting the merge predicates to compute the adjusted refcount correctly. Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Xiao Yang --- fs/xfs/libxfs/xfs_refcount.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 4408893333a6..6f7ed9288fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -820,6 +820,17 @@ xfs_refc_valid( return rc->rc_startblock != NULLAGBLOCK; } +static inline xfs_nlink_t +xfs_refc_merge_refcount( + const struct xfs_refcount_irec *irec, + enum xfs_refc_adjust_op adjust) +{ + /* Once a record hits MAXREFCOUNT, it is pinned there forever */ + if (irec->rc_refcount == MAXREFCOUNT) + return MAXREFCOUNT; + return irec->rc_refcount + adjust; +} + static inline bool xfs_refc_want_merge_center( const struct xfs_refcount_irec *left, @@ -831,6 +842,7 @@ xfs_refc_want_merge_center( unsigned long long *ulenp) { unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; /* * To merge with a center record, both shoulder records must be @@ -846,9 +858,10 @@ xfs_refc_want_merge_center( return false; /* The shoulder record refcounts must match the new refcount. */ - if (left->rc_refcount != cleft->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) return false; - if (right->rc_refcount != cleft->rc_refcount + adjust) + if (right->rc_refcount != new_refcount) return false; /* @@ -871,6 +884,7 @@ xfs_refc_want_merge_left( enum xfs_refc_adjust_op adjust) { unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; /* * For a left merge, the left shoulder record must be adjacent to the @@ -881,7 +895,8 @@ xfs_refc_want_merge_left( return false; /* Left shoulder record refcount must match the new refcount. */ - if (left->rc_refcount != cleft->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) return false; /* @@ -903,6 +918,7 @@ xfs_refc_want_merge_right( enum xfs_refc_adjust_op adjust) { unsigned long long ulen = right->rc_blockcount; + xfs_nlink_t new_refcount; /* * For a right merge, the right shoulder record must be adjacent to the @@ -913,7 +929,8 @@ xfs_refc_want_merge_right( return false; /* Right shoulder record refcount must match the new refcount. */ - if (right->rc_refcount != cright->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cright, adjust); + if (right->rc_refcount != new_refcount) return false; /* From 8c25febf23963431686f04874b96321288504127 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Thu, 1 Dec 2022 09:36:16 -0800 Subject: [PATCH 3157/4122] xfs: get rid of assert from xfs_btree_islastblock xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up, turn on the debugging knob can trigger the assert of xfs_btree_islastblock, test script as follows: while true do mount $disk $mountpoint fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk sleep 10 umount $mountpoint done Kick off fsstress and only *then* turn on the debugging knob. If it happens that the knob gets turned on after the cntbt lookup succeeds but before the call to xfs_btree_islastblock, then we *can* end up in the situation where a previously checked btree block suddenly starts returning EFSCORRUPTED from xfs_btree_check_block. Kaboom. Darrick give a very detailed explanation as follows: Looking back at commit 27d9ee577dcce, I think the point of all this was to make sure that the cursor has actually performed a lookup, and that the btree block at whatever level we're asking about is ok. If the caller hasn't ever done a lookup, the bc_levels array will be empty, so cur->bc_levels[level].bp pointer will be NULL. The call to xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is pointless. If the caller did a lookup but the lookup failed due to block corruption, the corresponding cur->bc_levels[level].bp pointer will also be NULL, and we'll still crash. The "ASSERT(xfs_btree_check_block);" logic is also unnecessary. If the cursor level points to an inode root, the block buffer will be incore, so it had better always be consistent. If the caller ignores a failed lookup after a successful one and calls this function, the cursor state is garbage and the assert wouldn't have tripped anyway. So get rid of the assert. Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock") Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index eef27858a013..29c4b4ccb909 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -556,7 +556,6 @@ xfs_btree_islastblock( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); From ddfdd530e43fcb3f7a0a69966e5f6c33497b4ae3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 1 Dec 2022 09:36:16 -0800 Subject: [PATCH 3158/4122] xfs: invalidate xfs_bufs when allocating cow extents While investigating test failures in xfs/17[1-3] in alwayscow mode, I noticed through code inspection that xfs_bmap_alloc_userdata isn't setting XFS_ALLOC_USERDATA when allocating extents for a file's CoW fork. COW staging extents should be flagged as USERDATA, since user data are persisted to these blocks before being remapped into a file. This mis-classification has a few impacts on the behavior of the system. First, the filestreams allocator is supposed to keep allocating from a chosen AG until it runs out of space in that AG. However, it only does that for USERDATA allocations, which means that COW allocations aren't tied to the filestreams AG. Fortunately, few people use filestreams, so nobody's noticed. A more serious problem is that xfs_alloc_ag_vextent_small looks for a buffer to invalidate *if* the USERDATA flag is set and the AG is so full that the allocation had to come from the AGFL because the cntbt is empty. The consequences of not invalidating the buffer are severe -- if the AIL incorrectly checkpoints a buffer that is now being used to store user data, that action will clobber the user's written data. Fix filestreams and yet another data corruption vector by flagging COW allocations as USERDATA. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 56b9b7db38bb..0d56a8d862e8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata( * the busy list. */ bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { bma->datatype |= XFS_ALLOC_USERDATA; if (bma->offset == 0) bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; From 3d50b95b50db36de945bfc34d4d94b7e11ee8fd9 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Nov 2022 11:56:06 +0000 Subject: [PATCH 3159/4122] i2c: smbus: add DDR support for SPD On my x05 laptop I got: Memory type 0x12 not supported yet, not instantiating SPD Adding the 0x12 case lead to a successful instantiated SPD AT24 EEPROM. i801_smbus 0000:00:1f.3: SMBus using polling i2c i2c-6: 2/2 memory slots populated (from DMI) at24 6-0050: 256 byte spd EEPROM, read-only i2c i2c-6: Successfully instantiated SPD at 0x50 at24 6-0051: 256 byte spd EEPROM, read-only And then, I decoded it successfully via decode-dimms. Signed-off-by: Corentin Labbe Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-smbus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 07c92c8495a3..c85710ed9548 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -361,9 +361,15 @@ void i2c_register_spd(struct i2c_adapter *adap) return; } + /* + * Memory types could be found at section 7.18.2 (Memory Device — Type), table 78 + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.6.0.pdf + */ switch (common_mem_type) { + case 0x12: /* DDR */ case 0x13: /* DDR2 */ case 0x18: /* DDR3 */ + case 0x1B: /* LPDDR */ case 0x1C: /* LPDDR2 */ case 0x1D: /* LPDDR3 */ name = "spd"; From 5bf71889ad9a4d39b7665c105a005c5a33d730ba Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 17 Nov 2022 15:34:15 +0530 Subject: [PATCH 3160/4122] i2c: tegra: Set ACPI node as primary fwnode Set ACPI node as the primary fwnode of I2C adapter to allow enumeration of child devices from the ACPI table Signed-off-by: Zubair Waheed Signed-off-by: Akhil R Reviewed-by: Thierry Reding Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 954022c04cc4..69c9ae161bbe 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1826,6 +1826,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; i2c_dev->adapter.algo = &tegra_i2c_algo; i2c_dev->adapter.nr = pdev->id; + ACPI_COMPANION_SET(&i2c_dev->adapter.dev, ACPI_COMPANION(&pdev->dev)); if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; From de917701da5d5ccb31825dca850ac49399e0f289 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 15 Nov 2022 12:30:18 +0000 Subject: [PATCH 3161/4122] dt-bindings: i2c: renesas,riic: Document RZ/Five SoC The RIIC block on the RZ/Five SoC is identical to one found on the RZ/G2UL SoC. "renesas,riic-r9a07g043" compatible string will be used on the RZ/Five SoC so to make this clear, update the comment to include RZ/Five SoC. No driver changes are required as generic compatible string "renesas,riic-rz" will be used as a fallback on RZ/Five SoC. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,riic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml index d3c0d5c427ac..2291a7cd619b 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml +++ b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml @@ -19,7 +19,7 @@ properties: - enum: - renesas,riic-r7s72100 # RZ/A1H - renesas,riic-r7s9210 # RZ/A2M - - renesas,riic-r9a07g043 # RZ/G2UL + - renesas,riic-r9a07g043 # RZ/G2UL and RZ/Five - renesas,riic-r9a07g044 # RZ/G2{L,LC} - renesas,riic-r9a07g054 # RZ/V2L - const: renesas,riic-rz # RZ/A or RZ/G2L From a33004e844e4c60da86ecf8c249aab7179817fce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 29 Nov 2022 17:52:59 +0000 Subject: [PATCH 3162/4122] KVM: selftests: Fix inverted "warning" in access tracking perf test Warn if the number of idle pages is greater than or equal to 10% of the total number of pages, not if the percentage of idle pages is less than 10%. The original code asserted that less than 10% of pages were still idle, but the check got inverted when the assert was converted to a warning. Opportunistically clean up the warning; selftests are 64-bit only, there is no need to use "%PRIu64" instead of "%lu". Fixes: 6336a810db5c ("KVM: selftests: replace assertion with warning in access_tracking_perf_test") Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221129175300.4052283-2-seanjc@google.com --- tools/testing/selftests/kvm/access_tracking_perf_test.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 02d3587cab0a..d45ef319a68f 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -185,10 +185,9 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * happens, much more pages are cached there and guest won't see the * "idle" bit cleared. */ - if (still_idle < pages / 10) - printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64 - "out of %" PRIu64 "), this will affect performance results" - ".\n", + if (still_idle >= pages / 10) + printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", vcpu_idx, still_idle, pages); close(page_idle_fd); From 8fcee0421386344b58fdc1cd5940219617037968 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 29 Nov 2022 17:53:00 +0000 Subject: [PATCH 3163/4122] KVM: selftests: Restore assert for non-nested VMs in access tracking test Restore the assert (on x86-64) that <10% of pages are still idle when NOT running as a nested VM in the access tracking test. The original assert was converted to a "warning" to avoid false failures when running the test in a VM, but the non-nested case does not suffer from the same "infinite TLB size" issue. Using the HYPERVISOR flag isn't infallible as VMMs aren't strictly required to enumerate the "feature" in CPUID, but practically speaking anyone that is running KVM selftests in VMs is going to be using a VMM and hypervisor that sets the HYPERVISOR flag. Cc: David Matlack Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221129175300.4052283-3-seanjc@google.com --- .../selftests/kvm/access_tracking_perf_test.c | 17 ++++++++++++----- .../selftests/kvm/include/x86_64/processor.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index d45ef319a68f..9f9503e40ca5 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -46,6 +46,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "processor.h" /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; @@ -180,15 +181,21 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * access tracking but low enough as to not make the test too brittle * over time and across architectures. * - * Note that when run in nested virtualization, this check will trigger - * much more frequently because TLB size is unlimited and since no flush - * happens, much more pages are cached there and guest won't see the - * "idle" bit cleared. + * When running the guest as a nested VM, "warn" instead of asserting + * as the TLB size is effectively unlimited and the KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle >= pages / 10) + if (still_idle >= pages / 10) { +#ifdef __x86_64__ + TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), + "vCPU%d: Too many pages still idle (%lu out of %lu)", + vcpu_idx, still_idle, pages); +#endif printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages); + } close(page_idle_fd); close(pagemap_fd); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 5d310abe6c3f..22852bd32d7b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -94,6 +94,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31) #define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) From 18eee7bfd18d6c9586dd224cf5b74258700fe815 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Mon, 28 Nov 2022 22:57:32 +0000 Subject: [PATCH 3164/4122] KVM: selftests: Move XFD CPUID checking out of __vm_xsave_require_permission() Move the kvm_cpu_has() check on X86_FEATURE_XFD out of the helper to enable off-by-default XSAVE-managed features and into the one test that currenty requires XFD (XFeature Disable) support. kvm_cpu_has() uses kvm_get_supported_cpuid() and thus caches KVM_GET_SUPPORTED_CPUID, and so using kvm_cpu_has() before ARCH_REQ_XCOMP_GUEST_PERM effectively results in the test caching stale values, e.g. subsequent checks on AMX_TILE will get false negatives. Although off-by-default features are nonsensical without XFD, checking for XFD virtualization prior to enabling such features isn't strictly required. Signed-off-by: Lei Wang Fixes: 7fbb653e01fd ("KVM: selftests: Check KVM's supported CPUID, not host CPUID, for XFD") Link: https://lore.kernel.org/r/20221125023839.315207-1-lei4.wang@intel.com [sean: add Fixes, reword changelog] Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221128225735.3291648-2-seanjc@google.com --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 2 -- tools/testing/selftests/kvm/x86_64/amx_test.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d532c20c74fd..aac7b32a794b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -563,8 +563,6 @@ void __vm_xsave_require_permission(int bit, const char *name) .addr = (unsigned long) &bitmask }; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); - kvm_fd = open_kvm_dev_path_or_exit(); rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); close(kvm_fd); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 21de6ae42086..1256c7faadd3 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -254,6 +254,7 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); From 2ceade1d363c934633a1788d0f98fc2332062b92 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 28 Nov 2022 22:57:33 +0000 Subject: [PATCH 3165/4122] KVM: selftests: Move __vm_xsave_require_permission() below CPUID helpers Move __vm_xsave_require_permission() below the CPUID helpers so that a future change can reference the cached result of KVM_GET_SUPPORTED_CPUID while keeping the definition of the variable close to its intended user, kvm_get_supported_cpuid(). No functional change intended. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221128225735.3291648-3-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index aac7b32a794b..23067465c035 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -552,38 +552,6 @@ static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } -void __vm_xsave_require_permission(int bit, const char *name) -{ - int kvm_fd; - u64 bitmask; - long rc; - struct kvm_device_attr attr = { - .group = 0, - .attr = KVM_X86_XCOMP_GUEST_SUPP, - .addr = (unsigned long) &bitmask - }; - - kvm_fd = open_kvm_dev_path_or_exit(); - rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); - close(kvm_fd); - - if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); - - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - - __TEST_REQUIRE(bitmask & (1ULL << bit), - "Required XSAVE feature '%s' not supported", name); - - TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); - - rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); - TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & (1ULL << bit), - "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", - bitmask); -} - void kvm_arch_vm_post_create(struct kvm_vm *vm) { vm_create_irqchip(vm); @@ -705,6 +673,38 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } +void __vm_xsave_require_permission(int bit, const char *name) +{ + int kvm_fd; + u64 bitmask; + long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); + + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + + __TEST_REQUIRE(bitmask & (1ULL << bit), + "Required XSAVE feature '%s' not supported", name); + + TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & (1ULL << bit), + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) { TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); From cd5f3d210095347e9d40a9a1d464f5ee0bb5d7f2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 28 Nov 2022 22:57:34 +0000 Subject: [PATCH 3166/4122] KVM: selftests: Disallow "get supported CPUID" before REQ_XCOMP_GUEST_PERM Disallow using kvm_get_supported_cpuid() and thus caching KVM's supported CPUID info before enabling XSAVE-managed features that are off-by-default and must be enabled by ARCH_REQ_XCOMP_GUEST_PERM. Caching the supported CPUID before all XSAVE features are enabled can result in false negatives due to testing features that were cached before they were enabled. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221128225735.3291648-4-seanjc@google.com --- .../selftests/kvm/lib/x86_64/processor.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 23067465c035..1d3829e652e6 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -601,21 +601,24 @@ void vcpu_arch_free(struct kvm_vcpu *vcpu) free(vcpu->cpuid); } +/* Do not use kvm_supported_cpuid directly except for validity checks. */ +static void *kvm_supported_cpuid; + const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { - static struct kvm_cpuid2 *cpuid; int kvm_fd; - if (cpuid) - return cpuid; + if (kvm_supported_cpuid) + return kvm_supported_cpuid; - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); kvm_fd = open_kvm_dev_path_or_exit(); - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, + (struct kvm_cpuid2 *)kvm_supported_cpuid); close(kvm_fd); - return cpuid; + return kvm_supported_cpuid; } static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, @@ -684,6 +687,9 @@ void __vm_xsave_require_permission(int bit, const char *name) .addr = (unsigned long) &bitmask }; + TEST_ASSERT(!kvm_supported_cpuid, + "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); + kvm_fd = open_kvm_dev_path_or_exit(); rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); close(kvm_fd); From 553d1652b8615b5ae3080bb1a561207aee87fa85 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 28 Nov 2022 22:57:35 +0000 Subject: [PATCH 3167/4122] KVM: selftests: Do kvm_cpu_has() checks before creating VM+vCPU Move the AMX test's kvm_cpu_has() checks before creating the VM+vCPU, there are no dependencies between the two operations. Opportunistically add a comment to call out that enabling off-by-default XSAVE-managed features must be done before KVM_GET_SUPPORTED_CPUID is cached. Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221128225735.3291648-5-seanjc@google.com --- tools/testing/selftests/kvm/x86_64/amx_test.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 1256c7faadd3..bd72c6eb3b67 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -249,17 +249,21 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + /* + * Note, all off-by-default features must be enabled before anything + * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has(). + */ vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT); - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), "KVM should enumerate max XSAVE size when XSAVE is supported"); xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); From 0c3265235fc17e78773025ed0ddc7c0324b6ed89 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 22 Nov 2022 01:33:09 +0000 Subject: [PATCH 3168/4122] KVM: selftests: Define and use a custom static assert in lib headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define and use kvm_static_assert() in the common KVM selftests headers to provide deterministic behavior, and to allow creating static asserts without dummy messages. The kernel's static_assert() makes the message param optional, and on the surface, tools/include/linux/build_bug.h appears to follow suit. However, glibc may override static_assert() and redefine it as a direct alias of _Static_assert(), which makes the message parameter mandatory. This leads to non-deterministic behavior as KVM selftests code that utilizes static_assert() without a custom message may or not compile depending on the order of includes. E.g. recently added asserts in x86_64/processor.h fail on some systems with errors like In file included from lib/memstress.c:11:0: include/x86_64/processor.h: In function ‘this_cpu_has_p’: include/x86_64/processor.h:193:34: error: expected ‘,’ before ‘)’ token static_assert(low_bit < high_bit); \ ^ due to _Static_assert() expecting a comma before a message. The "message optional" version of static_assert() uses macro magic to strip away the comma when presented with empty an __VA_ARGS__ #ifndef static_assert #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert and effectively generates "_Static_assert(expr, #expr)". The incompatible version of static_assert() gets defined by this snippet in /usr/include/assert.h: #if defined __USE_ISOC11 && !defined __cplusplus # undef static_assert # define static_assert _Static_assert #endif which yields "_Static_assert(expr)" and thus fails as above. KVM selftests don't actually care about using C11, but __USE_ISOC11 gets defined because of _GNU_SOURCE, which many tests do #define. _GNU_SOURCE triggers a massive pile of defines in /usr/include/features.h, including _ISOC11_SOURCE: /* If _GNU_SOURCE was defined by the user, turn on all the other features. */ #ifdef _GNU_SOURCE # undef _ISOC95_SOURCE # define _ISOC95_SOURCE 1 # undef _ISOC99_SOURCE # define _ISOC99_SOURCE 1 # undef _ISOC11_SOURCE # define _ISOC11_SOURCE 1 # undef _POSIX_SOURCE # define _POSIX_SOURCE 1 # undef _POSIX_C_SOURCE # define _POSIX_C_SOURCE 200809L # undef _XOPEN_SOURCE # define _XOPEN_SOURCE 700 # undef _XOPEN_SOURCE_EXTENDED # define _XOPEN_SOURCE_EXTENDED 1 # undef _LARGEFILE64_SOURCE # define _LARGEFILE64_SOURCE 1 # undef _DEFAULT_SOURCE # define _DEFAULT_SOURCE 1 # undef _ATFILE_SOURCE # define _ATFILE_SOURCE 1 #endif which further down in /usr/include/features.h leads to: /* This is to enable the ISO C11 extension. */ #if (defined _ISOC11_SOURCE \ || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L)) # define __USE_ISOC11 1 #endif To make matters worse, /usr/include/assert.h doesn't guard against multiple inclusion by turning itself into a nop, but instead #undefs a few macros and continues on. As a result, it's all but impossible to ensure the "message optional" version of static_assert() will actually be used, e.g. explicitly including assert.h and #undef'ing static_assert() doesn't work as a later inclusion of assert.h will again redefine its version. #ifdef _ASSERT_H # undef _ASSERT_H # undef assert # undef __ASSERT_VOID_CAST # ifdef __USE_GNU # undef assert_perror # endif #endif /* assert.h */ #define _ASSERT_H 1 #include Fixes: fcba483e8246 ("KVM: selftests: Sanity check input to ioctls() at build time") Fixes: ee3795536664 ("KVM: selftests: Refactor X86_FEATURE_* framework to prep for X86_PROPERTY_*") Fixes: 53a7dc0f215e ("KVM: selftests: Add X86_PROPERTY_* framework to retrieve CPUID values") Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221122013309.1872347-1-seanjc@google.com --- .../selftests/kvm/include/kvm_util_base.h | 14 +++++++++++- .../selftests/kvm/include/x86_64/processor.h | 22 +++++++++---------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index c7685c7038ff..9fa0d340f291 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -22,6 +22,18 @@ #include "sparsebit.h" +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) + * #undefs and #defines static_assert() as a direct alias to _Static_assert(), + * i.e. effectively makes the message mandatory. Many KVM selftests #define + * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As + * a result, static_assert() behavior is non-deterministic and may or may not + * require a message depending on #include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + #define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 @@ -196,7 +208,7 @@ static inline bool kvm_has_cap(long cap) #define kvm_do_ioctl(fd, cmd, arg) \ ({ \ - static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ ioctl(fd, cmd, arg); \ }) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 22852bd32d7b..411549ef4947 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -72,11 +72,11 @@ struct kvm_x86_cpu_feature { .bit = __bit, \ }; \ \ - static_assert((fn & 0xc0000000) == 0 || \ - (fn & 0xc0000000) == 0x40000000 || \ - (fn & 0xc0000000) == 0x80000000 || \ - (fn & 0xc0000000) == 0xc0000000); \ - static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ feature; \ }) @@ -191,12 +191,12 @@ struct kvm_x86_cpu_property { .hi_bit = high_bit, \ }; \ \ - static_assert(low_bit < high_bit); \ - static_assert((fn & 0xc0000000) == 0 || \ - (fn & 0xc0000000) == 0x40000000 || \ - (fn & 0xc0000000) == 0x80000000 || \ - (fn & 0xc0000000) == 0xc0000000); \ - static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + kvm_static_assert(low_bit < high_bit); \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ property; \ }) From efa2afc3969e166702fd2ae3cfb1a7a195ef3533 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:37:05 +0000 Subject: [PATCH 3169/4122] RDMA: Extend RDMA user ABI to support atomic write 1) Define new atomic write request/completion in userspace. 2) Define new atomic write capability in userspace. Link: https://lore.kernel.org/r/1669905432-14-2-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_verbs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 43672cb1fd57..237814815544 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -466,6 +466,7 @@ enum ib_uverbs_wc_opcode { IB_UVERBS_WC_BIND_MW = 5, IB_UVERBS_WC_LOCAL_INV = 6, IB_UVERBS_WC_TSO = 7, + IB_UVERBS_WC_ATOMIC_WRITE = 9, }; struct ib_uverbs_wc { @@ -784,6 +785,7 @@ enum ib_uverbs_wr_opcode { IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, + IB_UVERBS_WR_ATOMIC_WRITE = 15, /* Review enum ib_wr_opcode before modifying this */ }; @@ -1331,6 +1333,8 @@ enum ib_uverbs_device_cap_flags { /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */ IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34, IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36, + /* Atomic write attributes */ + IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40, }; enum ib_uverbs_raw_packet_caps { From 3ff81e827b8d5cea36ff374a11c200b4306f45d2 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:37:06 +0000 Subject: [PATCH 3170/4122] RDMA: Extend RDMA kernel ABI to support atomic write 1) Define new atomic write request/completion in kernel. 2) Define new atomic write capability in kernel. 3) Define new atomic write opcode for RC service in packet. Link: https://lore.kernel.org/r/1669905432-14-3-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- include/rdma/ib_pack.h | 2 ++ include/rdma/ib_verbs.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index a9162f25beaf..f932d164af63 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -84,6 +84,7 @@ enum { /* opcode 0x15 is reserved */ IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, + IB_OPCODE_ATOMIC_WRITE = 0x1D, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ @@ -112,6 +113,7 @@ enum { IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), + IB_OPCODE(RC, ATOMIC_WRITE), /* UC */ IB_OPCODE(UC, SEND_FIRST), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 77dd9148815b..df6bb26ba0be 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -270,6 +270,7 @@ enum ib_device_cap_flags { /* The device supports padding incoming writes to cacheline. */ IB_DEVICE_PCI_WRITE_END_PADDING = IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, + IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE, }; enum ib_kernel_cap_flags { @@ -982,6 +983,7 @@ enum ib_wc_opcode { IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW, IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV, IB_WC_LSO = IB_UVERBS_WC_TSO, + IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, @@ -1325,6 +1327,7 @@ enum ib_wr_opcode { IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE, /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, From c2d939002934fa9d7b802f196b069963b46da194 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:37:07 +0000 Subject: [PATCH 3171/4122] RDMA/rxe: Extend rxe user ABI to support atomic write Define an atomic_wr array to store 8-byte value. Link: https://lore.kernel.org/r/1669905432-14-4-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_rxe.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index 73f679dfd2df..d20d1ecf046f 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -146,6 +146,7 @@ struct rxe_dma_info { __u32 reserved; union { __DECLARE_FLEX_ARRAY(__u8, inline_data); + __DECLARE_FLEX_ARRAY(__u8, atomic_wr); __DECLARE_FLEX_ARRAY(struct rxe_sge, sge); }; }; From 5c7af6c7938466aa2f3c52057f4dd28b4a1e9e42 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:37:08 +0000 Subject: [PATCH 3172/4122] RDMA/rxe: Extend rxe packet format to support atomic write Extend rxe_wr_opcode_info[] and rxe_opcode[] for new atomic write opcode. Link: https://lore.kernel.org/r/1669905432-14-5-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_opcode.c | 18 ++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.h | 3 +++ 2 files changed, 21 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index d4ba4d506f17..fb196029048e 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, + [IB_WR_ATOMIC_WRITE] = { + .name = "IB_WR_ATOMIC_WRITE", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK, + }, + }, }; struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { @@ -378,6 +384,18 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { RXE_IETH_BYTES, } }, + [IB_OPCODE_RC_ATOMIC_WRITE] = { + .name = "IB_OPCODE_RC_ATOMIC_WRITE", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_ATOMIC_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES, + } + }, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = { diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index 8f9aaaf260f2..a470e9b0b884 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -20,6 +20,7 @@ enum rxe_wr_mask { WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), WR_LOCAL_OP_MASK = BIT(5), + WR_ATOMIC_WRITE_MASK = BIT(7), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, @@ -81,6 +82,8 @@ enum rxe_hdr_mask { RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), From abb633cf28049e6a7c37c44f83a8584f7dbded7d Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:25 +0000 Subject: [PATCH 3173/4122] RDMA/rxe: Make requester support atomic write on RC service Make requester process and send an atomic write request on RC service. Link: https://lore.kernel.org/r/1669905568-62-1-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 4d45f508392f..2713e9058922 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -258,6 +258,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) else return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_ATOMIC_WRITE: + return IB_OPCODE_RC_ATOMIC_WRITE; + case IB_WR_REG_MR: case IB_WR_LOCAL_INV: return opcode; @@ -486,6 +490,11 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, } } + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload); + wqe->dma.resid -= payload; + } + return 0; } @@ -709,13 +718,15 @@ int rxe_requester(void *arg) } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { + if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK | + RXE_ATOMIC_WRITE_MASK))) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; + payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ? + wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a From 034e285f8b99062a0cf29112e1232154a6a44aa5 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:26 +0000 Subject: [PATCH 3174/4122] RDMA/rxe: Make responder support atomic write on RC service Make responder process an atomic write request and send a read response on RC service. Link: https://lore.kernel.org/r/1669905568-62-2-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 84 ++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 6761bcd1d4d8..6ac544477f3f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -22,6 +22,7 @@ enum resp_states { RESPST_EXECUTE, RESPST_READ_REPLY, RESPST_ATOMIC_REPLY, + RESPST_ATOMIC_WRITE_REPLY, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -57,6 +58,7 @@ static char *resp_state_name[] = { [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", + [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -263,7 +265,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, case IB_QPT_RC: if (((pkt->mask & RXE_READ_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & RXE_WRITE_MASK) && + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || ((pkt->mask & RXE_ATOMIC_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { @@ -367,7 +369,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { + if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -438,7 +440,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access; - if (pkt->mask & RXE_READ_OR_WRITE_MASK) { + if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (pkt->mask & RXE_RETH_MASK) { qp->resp.va = reth_va(pkt); qp->resp.offset = 0; @@ -504,7 +506,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, goto err; } - if (pkt->mask & RXE_WRITE_MASK) { + if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; @@ -604,6 +606,7 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, res->state = rdatm_res_state_new; break; case RXE_ATOMIC_MASK: + case RXE_ATOMIC_WRITE_MASK: res->first_psn = pkt->psn; res->last_psn = pkt->psn; res->cur_psn = pkt->psn; @@ -673,6 +676,55 @@ out: return ret; } +static enum resp_states atomic_write_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 src, *dst; + struct resp_res *res = qp->resp.res; + struct rxe_mr *mr = qp->resp.mr; + int payload = payload_size(pkt); + + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); + qp->resp.res = res; + } + + if (!res->replay) { +#ifdef CONFIG_64BIT + if (mr->state != RXE_MR_STATE_VALID) + return RESPST_ERR_RKEY_VIOLATION; + + memcpy(&src, payload_addr(pkt), payload); + + dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); + /* check vaddr is 8 bytes aligned. */ + if (!dst || (uintptr_t)dst & 7) + return RESPST_ERR_MISALIGNED_ATOMIC; + + /* Do atomic write after all prior operations have completed */ + smp_store_release(dst, src); + + /* decrease resp.resid to zero */ + qp->resp.resid -= sizeof(payload); + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +#else + return RESPST_ERR_UNSUPPORTED_OPCODE; +#endif /* CONFIG_64BIT */ + } + + return RESPST_ACKNOWLEDGE; +} + static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, struct rxe_pkt_info *ack, int opcode, @@ -912,6 +964,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { return RESPST_ATOMIC_REPLY; + } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + return RESPST_ATOMIC_WRITE_REPLY; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1085,6 +1139,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) return ret; } +static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, + "RDMA READ response of length zero ACK"); + + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; +} + static enum resp_states acknowledge(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -1095,6 +1162,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp, send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); + else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) + send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); @@ -1206,7 +1275,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->replay = 1; res->cur_psn = pkt->psn; qp->resp.res = res; - rc = RESPST_ATOMIC_REPLY; + rc = pkt->mask & RXE_ATOMIC_MASK ? + RESPST_ATOMIC_REPLY : + RESPST_ATOMIC_WRITE_REPLY; goto out; } @@ -1343,6 +1414,9 @@ int rxe_responder(void *arg) case RESPST_ATOMIC_REPLY: state = atomic_reply(qp, pkt); break; + case RESPST_ATOMIC_WRITE_REPLY: + state = atomic_write_reply(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; From 3aec427bb1499bd3325d2e251edb729a5a8643df Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:27 +0000 Subject: [PATCH 3175/4122] RDMA/rxe: Implement atomic write completion Generate an atomic write completion when the atomic write request has been finished. Link: https://lore.kernel.org/r/1669905568-62-3-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 4dca4f8bbb5a..1c525325e271 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -104,6 +104,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; default: return 0xff; @@ -269,6 +270,9 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ From 4cd9f1d320f905e7bc60f030566d15003745ba91 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:28 +0000 Subject: [PATCH 3176/4122] RDMA/rxe: Enable atomic write capability for rxe device The capability shows that rxe device supports atomic write operation. Link: https://lore.kernel.org/r/1669905568-62-4-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 86c7a8bf3cbb..bbc88cd71d95 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -51,7 +51,12 @@ enum rxe_device_param { | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW +#ifdef CONFIG_64BIT + | IB_DEVICE_MEM_WINDOW_TYPE_2B + | IB_DEVICE_ATOMIC_WRITE, +#else | IB_DEVICE_MEM_WINDOW_TYPE_2B, +#endif /* CONFIG_64BIT */ RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, From 9f61521c7a284e799050cd2adacc9a611bd2b491 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 13:11:13 +1000 Subject: [PATCH 3177/4122] powerpc/qspinlock: powerpc qspinlock implementation Add a powerpc specific implementation of queued spinlocks. This is the build framework with a very simple (non-queued) spinlock implementation to begin with. Later changes add queueing, and other features and optimisations one-at-a-time. It is done this way to more easily see how the queued spinlocks are built, and to make performance and correctness bisects more useful. Signed-off-by: Nicholas Piggin [mpe: Drop paravirt.h & processor.h changes to fix 32-bit build] [mpe: Fix 32-bit build of qspinlock.o & disallow GENERIC_LOCKBREAK per Nick] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/CONLLQB6DCJU.2ZPOS7T6S5GRR@bobo --- arch/powerpc/Kconfig | 3 +- arch/powerpc/include/asm/qspinlock.h | 98 +++++++------------ arch/powerpc/include/asm/qspinlock_paravirt.h | 7 -- arch/powerpc/include/asm/qspinlock_types.h | 13 +++ arch/powerpc/include/asm/spinlock.h | 2 +- arch/powerpc/include/asm/spinlock_types.h | 2 +- arch/powerpc/lib/Makefile | 4 +- arch/powerpc/lib/qspinlock.c | 17 ++++ 8 files changed, 71 insertions(+), 75 deletions(-) delete mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_types.h create mode 100644 arch/powerpc/lib/qspinlock.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2f..7fbdf22ce9a9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -96,7 +96,7 @@ config LOCKDEP_SUPPORT config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPTION + depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS config GENERIC_HWEIGHT bool @@ -154,7 +154,6 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS - select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b676c4fb90fd..5e6257313557 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,83 +2,55 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include +#include +#include +#include #include -#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_unlock(struct qspinlock *lock); - -static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - if (!is_shared_processor()) - native_queued_spin_lock_slowpath(lock, val); - else - __pv_queued_spin_lock_slowpath(lock, val); + return atomic_read(&lock->val); } -#define queued_spin_unlock queued_spin_unlock -static inline void queued_spin_unlock(struct qspinlock *lock) +static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - if (!is_shared_processor()) - smp_store_release(&lock->locked, 0); - else - __pv_queued_spin_unlock(lock); + return !atomic_read(&lock.val); } -#else -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -#endif +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) +{ + return 0; +} + +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0; +} + +void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) { - u32 val = 0; - - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) - return; - - queued_spin_lock_slowpath(lock, val); + if (!queued_spin_trylock(lock)) + queued_spin_lock_slowpath(lock); } -#define queued_spin_lock queued_spin_lock + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + atomic_set_release(&lock->val, 0); +} + +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) #ifdef CONFIG_PARAVIRT_SPINLOCKS -#define SPIN_THRESHOLD (1<<15) /* not tuned */ - -static __always_inline void pv_wait(u8 *ptr, u8 val) -{ - if (*ptr != val) - return; - yield_to_any(); - /* - * We could pass in a CPU here if waiting in the queue and yield to - * the previous CPU in the queue. - */ -} - -static __always_inline void pv_kick(int cpu) -{ - prod_cpu(cpu); -} - -extern void __pv_init_lock_hash(void); - -static inline void pv_spinlocks_init(void) -{ - __pv_init_lock_hash(); -} - +void pv_spinlocks_init(void); +#else +static inline void pv_spinlocks_init(void) { } #endif -/* - * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, - * which was found to have performance problems if implemented with - * the preferred spin_begin()/spin_end() SMT priority pattern. Use the - * generic version instead. - */ - -#include - #endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h deleted file mode 100644 index 6b60e7736a47..000000000000 --- a/arch/powerpc/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H -#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H - -EXPORT_SYMBOL(__pv_queued_spin_unlock); - -#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h new file mode 100644 index 000000000000..59606bc0c774 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H +#define _ASM_POWERPC_QSPINLOCK_TYPES_H + +#include + +typedef struct qspinlock { + atomic_t val; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } + +#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index bd75872a6334..7dafca8e3f02 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -13,7 +13,7 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() -#ifndef CONFIG_PARAVIRT_SPINLOCKS +#ifndef CONFIG_PPC_QUEUED_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index d5f8a74ed2e8..40b01446cf75 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ #endif #ifdef CONFIG_PPC_QUEUED_SPINLOCKS -#include +#include #include #else #include diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8560c912186d..4de71cbf6e8e 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o copy_mc_64.o -ifndef CONFIG_PPC_QUEUED_SPINLOCKS +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else obj64-$(CONFIG_SMP) += locks.o endif diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 000000000000..1c669b5b4607 --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + while (!queued_spin_trylock(lock)) + cpu_relax(); +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif From 84990b169557428c318df87b7836cd15f65b62dc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:16 +1000 Subject: [PATCH 3178/4122] powerpc/qspinlock: add mcs queueing for contended waiters This forms the basis of the qspinlock slow path. Like generic qspinlocks and unlike the vanilla MCS algorithm, the lock owner does not participate in the queue, only waiters. The first waiter spins on the lock word, then when the lock is released it takes ownership and unqueues the next waiter. This is how qspinlocks can be implemented with the spinlock API -- lock owners don't need a node, only waiters do. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-2-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 10 +- arch/powerpc/include/asm/qspinlock_types.h | 23 +++ arch/powerpc/lib/qspinlock.c | 187 ++++++++++++++++++++- 3 files changed, 214 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 5e6257313557..6946dba5d087 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -19,12 +19,12 @@ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - return 0; + return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK); } static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0; + return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0; } void queued_spin_lock_slowpath(struct qspinlock *lock); @@ -37,7 +37,11 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - atomic_set_release(&lock->val, 0); + for (;;) { + int val = atomic_read(&lock->val); + if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) + return; + } } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 59606bc0c774..20a36dfb14e2 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -10,4 +10,27 @@ typedef struct qspinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +/* + * Bitfields in the lock word: + * + * 0: locked bit + * 1-16: unused bits + * 17-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +/* 0x00000001 */ +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 1 +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) + +/* 0xfffe0000 */ +#define _Q_TAIL_CPU_OFFSET 17 +#define _Q_TAIL_CPU_BITS 15 +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */ diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 1c669b5b4607..86504628501e 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -1,12 +1,193 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include #include -#include +#include +#include #include +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); + +static inline int encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(int val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail) +{ + int val = atomic_read(&lock->val); + + BUG_ON(val & _Q_LOCKED_VAL); + + /* If we're the last queued, must clean up the tail. */ + if ((val & _Q_TAIL_CPU_MASK) == tail) { + if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val) + return val; + /* Another waiter must have enqueued */ + val = atomic_read(&lock->val); + BUG_ON(val & _Q_LOCKED_VAL); + } + + /* We must be the owner, just set the lock bit and acquire */ + atomic_or(_Q_LOCKED_VAL, &lock->val); + __atomic_acquire_fence(); + + return val; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail) +{ + for (;;) { + int val = atomic_read(&lock->val); + int newval = (val & ~_Q_TAIL_CPU_MASK) | tail; + int old; + + old = atomic_cmpxchg_release(&lock->val, val, newval); + if (old == val) + return old; + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, int val) +{ + int cpu = decode_tail_cpu(val); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) +{ + struct qnodes *qnodesp; + struct qnode *next, *node; + int val, old, tail; + int idx; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->locked = 0; + + tail = encode_tail_cpu(smp_processor_id()); + + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + struct qnode *prev = get_tail_qnode(lock, old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + while (!node->locked) + cpu_relax(); + + smp_rmb(); /* acquire barrier for the mcs lock */ + } + + /* We're at the head of the waitqueue, wait for the lock. */ + for (;;) { + val = atomic_read(&lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + + cpu_relax(); + } + + /* If we're the last queued, must clean up the tail. */ + old = set_locked_clean_tail(lock, tail); + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* Another waiter must have enqueued */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + WRITE_ONCE(next->locked, 1); + +release: + qnodesp->count--; /* release the node */ +} + void queued_spin_lock_slowpath(struct qspinlock *lock) { - while (!queued_spin_trylock(lock)) - cpu_relax(); + queued_spin_lock_mcs_queue(lock); } EXPORT_SYMBOL(queued_spin_lock_slowpath); From 4c93c2e4b9e8988511c06b9c042f23d4b8f593ad Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:17 +1000 Subject: [PATCH 3179/4122] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx. The first 16 bits of the lock are only modified by the owner, and other modifications always use atomic operations on the entire 32 bits, so unlocks can use plain stores on the 16 bits. This is the same kind of optimisation done by core qspinlock code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-3-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 6 +----- arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 6946dba5d087..713f6629f6fb 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -37,11 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - for (;;) { - int val = atomic_read(&lock->val); - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) - return; - } + smp_store_release(&lock->locked, 0); } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 20a36dfb14e2..fe87181c59e5 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -3,12 +3,27 @@ #define _ASM_POWERPC_QSPINLOCK_TYPES_H #include +#include typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + +#ifdef __LITTLE_ENDIAN + struct { + u16 locked; + u8 reserved[2]; + }; +#else + struct { + u8 reserved[2]; + u16 locked; + }; +#endif + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the lock word: From b3a73b7db2b6cb3b2e5bfda5518a0e92230ef673 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:18 +1000 Subject: [PATCH 3180/4122] powerpc/qspinlock: convert atomic operations to assembly This uses more optimal ll/sc style access patterns (rather than cmpxchg), and also sets the EH=1 lock hint on those operations which acquire ownership of the lock. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-4-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 24 +++++-- arch/powerpc/include/asm/qspinlock_types.h | 4 +- arch/powerpc/lib/qspinlock.c | 74 +++++++++++++--------- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 713f6629f6fb..c16e1f0674b5 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,29 +2,43 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include #include #include #include static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - return atomic_read(&lock->val); + return READ_ONCE(lock->val); } static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - return !atomic_read(&lock.val); + return !lock.val; } static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK); + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0; + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" +" cmpwi 0,%0,0 \n" +" bne- 2f \n" +" stwcx. %2,0,%1 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_LOCKED_VAL), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(prev == 0); } void queued_spin_lock_slowpath(struct qspinlock *lock); diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index fe87181c59e5..b9a5a52fa670 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -7,7 +7,7 @@ typedef struct qspinlock { union { - atomic_t val; + u32 val; #ifdef __LITTLE_ENDIAN struct { @@ -23,7 +23,7 @@ typedef struct qspinlock { }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } } /* * Bitfields in the lock word: diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 86504628501e..645d9affacfd 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include @@ -22,12 +21,12 @@ struct qnodes { static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); -static inline int encode_tail_cpu(int cpu) +static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; } -static inline int decode_tail_cpu(int val) +static inline int decode_tail_cpu(u32 val) { return (val >> _Q_TAIL_CPU_OFFSET) - 1; } @@ -39,26 +38,34 @@ static inline int decode_tail_cpu(int val) * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued. */ -static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail) +static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail) { - int val = atomic_read(&lock->val); + u32 newval = _Q_LOCKED_VAL; + u32 prev, tmp; - BUG_ON(val & _Q_LOCKED_VAL); + asm volatile( +"1: lwarx %0,0,%2,%6 # set_locked_clean_tail \n" + /* Test whether the lock tail == tail */ +" and %1,%0,%5 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%5 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); - /* If we're the last queued, must clean up the tail. */ - if ((val & _Q_TAIL_CPU_MASK) == tail) { - if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val) - return val; - /* Another waiter must have enqueued */ - val = atomic_read(&lock->val); - BUG_ON(val & _Q_LOCKED_VAL); - } + BUG_ON(prev & _Q_LOCKED_VAL); - /* We must be the owner, just set the lock bit and acquire */ - atomic_or(_Q_LOCKED_VAL, &lock->val); - __atomic_acquire_fence(); - - return val; + return prev; } /* @@ -68,20 +75,25 @@ static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tai * acquire barrier in get_tail_qnode() when the next CPU finds this tail * value. */ -static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail) +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) { - for (;;) { - int val = atomic_read(&lock->val); - int newval = (val & ~_Q_TAIL_CPU_MASK) | tail; - int old; + u32 prev, tmp; - old = atomic_cmpxchg_release(&lock->val, val, newval); - if (old == val) - return old; - } + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; } -static struct qnode *get_tail_qnode(struct qspinlock *lock, int val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); @@ -109,7 +121,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnodes *qnodesp; struct qnode *next, *node; - int val, old, tail; + u32 val, old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -156,7 +168,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) /* We're at the head of the waitqueue, wait for the lock. */ for (;;) { - val = atomic_read(&lock->val); + val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; From 6aa42f883c438ea132a28801bef3f86f3883d14c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:19 +1000 Subject: [PATCH 3181/4122] powerpc/qspinlock: allow new waiters to steal the lock before queueing Allow new waiters to "steal" the lock before queueing. That is, to acquire it while other CPUs have queued. This particularly helps paravirt performance when physical CPUs are oversubscribed, by keeping the lock from becoming a strict FIFO and vCPU preemption causing queue train wrecks. The new __queued_spin_trylock_steal() function is put in qspinlock.h to save having to move it, because it will be used there by a later change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-5-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 23 ++++++ arch/powerpc/lib/qspinlock.c | 110 ++++++++++++++++++++++++--- 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index c16e1f0674b5..cebd2c89c08d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -41,6 +41,29 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) return likely(prev == 0); } +static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) +{ + u32 prev, tmp; + + /* Trylock may get ahead of queued nodes if it finds unlocked */ + asm volatile( +"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n" +" andc. %1,%0,%4 \n" +" bne- 2f \n" +" and %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(!(prev & ~_Q_TAIL_CPU_MASK)); +} + void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 645d9affacfd..6ffd3261064c 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -19,8 +19,17 @@ struct qnodes { struct qnode nodes[MAX_NODES]; }; +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static bool maybe_stealers __read_mostly = true; + static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static __always_inline int get_steal_spins(void) +{ + return steal_spins; +} + static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; @@ -38,33 +47,35 @@ static inline int decode_tail_cpu(u32 val) * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued. */ -static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail) +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) { u32 newval = _Q_LOCKED_VAL; u32 prev, tmp; asm volatile( -"1: lwarx %0,0,%2,%6 # set_locked_clean_tail \n" - /* Test whether the lock tail == tail */ -" and %1,%0,%5 \n" +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" " cmpw 0,%1,%3 \n" /* Merge the new locked value */ " or %1,%1,%4 \n" " bne 2f \n" /* If the lock tail matched, then clear it, otherwise leave it. */ -" andc %1,%1,%5 \n" +" andc %1,%1,%6 \n" "2: stwcx. %1,0,%2 \n" " bne- 1b \n" "\t" PPC_ACQUIRE_BARRIER " \n" "3: \n" : "=&r" (prev), "=&r" (tmp) : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); - BUG_ON(prev & _Q_LOCKED_VAL); - return prev; } @@ -117,6 +128,30 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } +static inline bool try_to_steal_lock(struct qspinlock *lock) +{ + int iters = 0; + + if (!steal_spins) + return false; + + /* Attempt to steal the lock */ + do { + u32 val = READ_ONCE(lock->val); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + if (__queued_spin_trylock_steal(lock)) + return true; + } else { + cpu_relax(); + } + + iters++; + } while (iters < get_steal_spins()); + + return false; +} + static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnodes *qnodesp; @@ -166,6 +201,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) smp_rmb(); /* acquire barrier for the mcs lock */ } +again: /* We're at the head of the waitqueue, wait for the lock. */ for (;;) { val = READ_ONCE(lock->val); @@ -176,9 +212,14 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) } /* If we're the last queued, must clean up the tail. */ - old = set_locked_clean_tail(lock, tail); + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + if ((old & _Q_TAIL_CPU_MASK) == tail) - goto release; /* Another waiter must have enqueued */ + goto release; /* We were the tail, no next. */ /* There is a next, must wait for node->next != NULL (MCS protocol) */ while (!(next = READ_ONCE(node->next))) @@ -199,6 +240,9 @@ release: void queued_spin_lock_slowpath(struct qspinlock *lock) { + if (try_to_steal_lock(lock)) + return; + queued_spin_lock_mcs_queue(lock); } EXPORT_SYMBOL(queued_spin_lock_slowpath); @@ -208,3 +252,51 @@ void pv_spinlocks_init(void) { } #endif + +#include +static int steal_spins_set(void *data, u64 val) +{ + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + + return 0; +} +device_initcall(spinlock_debugfs_init); From 0944534ef4d5cf39c8133575524be0be3337dd62 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:20 +1000 Subject: [PATCH 3182/4122] powerpc/qspinlock: theft prevention to control latency Give the queue head the ability to stop stealers. After a number of spins without successfully acquiring the lock, the queue head sets this, which halts stealing and will assure it is the next owner. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-6-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock_types.h | 8 +++- arch/powerpc/lib/qspinlock.c | 53 ++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index b9a5a52fa670..1911a8a16237 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -29,7 +29,8 @@ typedef struct qspinlock { * Bitfields in the lock word: * * 0: locked bit - * 1-16: unused bits + * 1-15: unused bits + * 16: must queue bit * 17-31: tail cpu (+1) */ #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ @@ -39,6 +40,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +/* 0x00010000 */ +#define _Q_MUST_Q_OFFSET 16 +#define _Q_MUST_Q_BITS 1 +#define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET) + /* 0xfffe0000 */ #define _Q_TAIL_CPU_OFFSET 17 #define _Q_TAIL_CPU_BITS 15 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 6ffd3261064c..9cd442d46b9f 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -22,6 +22,7 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); static bool maybe_stealers __read_mostly = true; +static int head_spins __read_mostly = (1 << 8); static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void) return steal_spins; } +static __always_inline int get_head_spins(void) +{ + return head_spins; +} + static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; @@ -104,6 +110,22 @@ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) return prev; } +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -139,6 +161,9 @@ static inline bool try_to_steal_lock(struct qspinlock *lock) do { u32 val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + if (unlikely(!(val & _Q_LOCKED_VAL))) { if (__queued_spin_trylock_steal(lock)) return true; @@ -157,7 +182,9 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) struct qnodes *qnodesp; struct qnode *next, *node; u32 val, old, tail; + bool mustq = false; int idx; + int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -209,6 +236,15 @@ again: break; cpu_relax(); + if (!maybe_stealers) + continue; + iters++; + + if (!mustq && iters >= get_head_spins()) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } } /* If we're the last queued, must clean up the tail. */ @@ -293,9 +329,26 @@ static int steal_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); return 0; } From e1a31e7fd7130628cfd229253da2b4630e7a809c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:21 +1000 Subject: [PATCH 3183/4122] powerpc/qspinlock: store owner CPU in lock word Store the owner CPU number in the lock word so it may be yielded to, as powerpc's paravirtualised simple spinlocks do. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-7-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 12 ++++++++++-- arch/powerpc/include/asm/qspinlock_types.h | 12 +++++++++++- arch/powerpc/lib/qspinlock.c | 2 +- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index cebd2c89c08d..9572a2ef974d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -21,8 +21,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } +static __always_inline u32 queued_spin_encode_locked_val(void) +{ + /* XXX: make this use lock value in paca like simple spinlocks? */ + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); +} + static __always_inline int queued_spin_trylock(struct qspinlock *lock) { + u32 new = queued_spin_encode_locked_val(); u32 prev; asm volatile( @@ -34,7 +41,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) "\t" PPC_ACQUIRE_BARRIER " \n" "2: \n" : "=&r" (prev) - : "r" (&lock->val), "r" (_Q_LOCKED_VAL), + : "r" (&lock->val), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); @@ -43,6 +50,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) { + u32 new = queued_spin_encode_locked_val(); u32 prev, tmp; /* Trylock may get ahead of queued nodes if it finds unlocked */ @@ -57,7 +65,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) "\t" PPC_ACQUIRE_BARRIER " \n" "2: \n" : "=&r" (prev), "=&r" (tmp) - : "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), + : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 1911a8a16237..adfeed4aa495 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -29,7 +29,8 @@ typedef struct qspinlock { * Bitfields in the lock word: * * 0: locked bit - * 1-15: unused bits + * 1-14: lock holder cpu + * 15: unused bit * 16: must queue bit * 17-31: tail cpu (+1) */ @@ -40,6 +41,15 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +/* 0x00007ffe */ +#define _Q_OWNER_CPU_OFFSET 1 +#define _Q_OWNER_CPU_BITS 14 +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) + +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + /* 0x00010000 */ #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 9cd442d46b9f..4d74db0e565f 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -55,7 +55,7 @@ static inline int decode_tail_cpu(u32 val) */ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) { - u32 newval = _Q_LOCKED_VAL; + u32 newval = queued_spin_encode_locked_val(); u32 prev, tmp; asm volatile( From 085f03311bcede99550e08a1f7cad41bf758b460 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:22 +1000 Subject: [PATCH 3184/4122] powerpc/qspinlock: paravirt yield to lock owner Waiters spinning on the lock word should yield to the lock owner if the vCPU is preempted. This improves performance when the hypervisor has oversubscribed physical CPUs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-8-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 99 +++++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 4d74db0e565f..18e21574e6c5 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -5,6 +5,7 @@ #include #include #include +#include #define MAX_NODES 4 @@ -24,14 +25,16 @@ static int steal_spins __read_mostly = (1 << 5); static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); +static bool pv_yield_owner __read_mostly = true; + static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); -static __always_inline int get_steal_spins(void) +static __always_inline int get_steal_spins(bool paravirt) { return steal_spins; } -static __always_inline int get_head_spins(void) +static __always_inline int get_head_spins(bool paravirt) { return head_spins; } @@ -46,6 +49,11 @@ static inline int decode_tail_cpu(u32 val) return (val >> _Q_TAIL_CPU_OFFSET) - 1; } +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + /* * Try to acquire the lock if it was not already locked. If the tail matches * mytail then clear it, otherwise leave it unchnaged. Return previous value. @@ -150,7 +158,45 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -static inline bool try_to_steal_lock(struct qspinlock *lock) +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + int owner; + u32 yield_count; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + yield_to_preempted(owner, yield_count); + /* Don't relax if we yielded. Maybe we should? */ + return; + } +relax: + cpu_relax(); +} + + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { int iters = 0; @@ -168,16 +214,16 @@ static inline bool try_to_steal_lock(struct qspinlock *lock) if (__queued_spin_trylock_steal(lock)) return true; } else { - cpu_relax(); + yield_to_locked_owner(lock, val, paravirt); } iters++; - } while (iters < get_steal_spins()); + } while (iters < get_steal_spins(paravirt)); return false; } -static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) { struct qnodes *qnodesp; struct qnode *next, *node; @@ -235,12 +281,12 @@ again: if (!(val & _Q_LOCKED_VAL)) break; - cpu_relax(); + yield_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; iters++; - if (!mustq && iters >= get_head_spins()) { + if (!mustq && iters >= get_head_spins(paravirt)) { mustq = true; set_mustq(lock); val |= _Q_MUST_Q_VAL; @@ -276,10 +322,20 @@ release: void queued_spin_lock_slowpath(struct qspinlock *lock) { - if (try_to_steal_lock(lock)) - return; - - queued_spin_lock_mcs_queue(lock); + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) + return; + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) + return; + queued_spin_lock_mcs_queue(lock, false); + } } EXPORT_SYMBOL(queued_spin_lock_slowpath); @@ -345,10 +401,29 @@ static int head_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + } return 0; } From bd48287b2cf4cd6e95576db3a94fd2a7cdf9832d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:23 +1000 Subject: [PATCH 3185/4122] powerpc/qspinlock: implement option to yield to previous node Queued waiters which are not at the head of the queue don't spin on the lock word but their qnode lock word, waiting for the previous queued CPU to release them. Add an option which allows these waiters to yield to the previous CPU if its vCPU is preempted. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-9-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 18e21574e6c5..41afd8e68918 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_prev __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -195,6 +196,32 @@ relax: cpu_relax(); } +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +{ + int prev_cpu = decode_tail_cpu(val); + u32 yield_count; + + if (!paravirt) + goto relax; + + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + smp_rmb(); /* See yield_to_locked_owner comment */ + + if (!node->locked) { + yield_to_preempted(prev_cpu, yield_count); + return; + } + +relax: + cpu_relax(); +} + static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { @@ -269,7 +296,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ while (!node->locked) - cpu_relax(); + yield_to_prev(lock, node, old, paravirt); smp_rmb(); /* acquire barrier for the mcs lock */ } @@ -417,12 +444,29 @@ static int pv_yield_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); } return 0; From b4c3cdc1a698a2f6168768d0bed4bf062723722e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:24 +1000 Subject: [PATCH 3186/4122] powerpc/qspinlock: allow stealing when head of queue yields If the head of queue is preventing stealing but it finds the owner vCPU is preempted, it will yield its cycles to the owner which could cause it to become preempted. Add an option to re-allow stealers before yielding, and disallow them again after returning from the yield. Disable this option by default for now, i.e., no logical change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-10-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 59 ++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 41afd8e68918..c1f3b699b63f 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -135,6 +136,22 @@ static __always_inline u32 set_mustq(struct qspinlock *lock) return prev; } +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -159,7 +176,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; u32 yield_count; @@ -188,7 +205,11 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va smp_rmb(); if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); /* Don't relax if we yielded. Maybe we should? */ return; } @@ -196,6 +217,21 @@ relax: cpu_relax(); } +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + __yield_to_locked_owner(lock, val, paravirt, false); +} + +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + __yield_to_locked_owner(lock, val, paravirt, mustq); +} + static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); @@ -211,7 +247,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ - smp_rmb(); /* See yield_to_locked_owner comment */ + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); @@ -308,7 +344,7 @@ again: if (!(val & _Q_LOCKED_VAL)) break; - yield_to_locked_owner(lock, val, paravirt); + yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; iters++; @@ -444,6 +480,22 @@ static int pv_yield_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -466,6 +518,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); } From 28db61e207ea3890d286cff3141c1ce67346074d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:25 +1000 Subject: [PATCH 3187/4122] powerpc/qspinlock: allow propagation of yield CPU down the queue Having all CPUs poll the lock word for the owner CPU that should be yielded to defeats most of the purpose of using MCS queueing for scalability. Yet it may be desirable for queued waiters to yield to a preempted owner. With this change, queue waiters never sample the owner CPU directly from the lock word. The queue head (which is spinning on the lock) propagates the owner CPU back to the next waiter if it finds the owner has been preempted. That waiter then propagates the owner CPU back to the next waiter, and so on. s390 addresses this problem differenty, by having queued waiters sample the lock word to find the owner at a low frequency. That has the advantage of being simpler, the advantage of propagation is that the lock word never has to be accesed by queued waiters, and the transfer of cache lines to transmit the owner data is only required when lock holder vCPU preemption occurs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-11-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index c1f3b699b63f..c45f30c9a19e 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -12,6 +12,7 @@ struct qnode { struct qnode *next; struct qspinlock *lock; + int yield_cpu; u8 locked; /* 1 if lock acquired */ }; @@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_propagate_owner __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u __yield_to_locked_owner(lock, val, paravirt, mustq); } +static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_propagate_owner) + return; + + owner = get_owner_cpu(val); + if (*set_yield_cpu == owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (vcpu_is_preempted(owner)) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } else if (*set_yield_cpu != -1) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } +} + static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); u32 yield_count; + int yield_cpu; if (!paravirt) goto relax; + if (!pv_yield_propagate_owner) + goto yield_prev; + + yield_cpu = READ_ONCE(node->yield_cpu); + if (yield_cpu == -1) { + /* Propagate back the -1 CPU */ + if (node->next && node->next->yield_cpu != -1) + node->next->yield_cpu = yield_cpu; + goto yield_prev; + } + + yield_count = yield_count_of(yield_cpu); + if ((yield_count & 1) == 0) + goto yield_prev; /* owner vcpu is running */ + + smp_rmb(); + + if (yield_cpu == node->yield_cpu) { + if (node->next && node->next->yield_cpu != yield_cpu) + node->next->yield_cpu = yield_cpu; + yield_to_preempted(yield_cpu, yield_count); + return; + } + +yield_prev: if (!pv_yield_prev) goto relax; @@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b u32 val, old, tail; bool mustq = false; int idx; + int set_yield_cpu = -1; int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node = &qnodesp->nodes[idx]; node->next = NULL; node->lock = lock; + node->yield_cpu = -1; node->locked = 0; tail = encode_tail_cpu(smp_processor_id()); @@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b while (!node->locked) yield_to_prev(lock, node, old, paravirt); + /* Clear out stale propagated yield_cpu */ + if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) + node->yield_cpu = -1; + smp_rmb(); /* acquire barrier for the mcs lock */ } @@ -344,6 +405,7 @@ again: if (!(val & _Q_LOCKED_VAL)) break; + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; @@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); +static int pv_yield_propagate_owner_set(void *data, u64 val) +{ + pv_yield_propagate_owner = !!val; + + return 0; +} + +static int pv_yield_propagate_owner_get(void *data, u64 *val) +{ + *val = pv_yield_propagate_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); @@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); } return 0; From be742c573fdafcfa1752642ca1c7aaf08c258128 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:26 +1000 Subject: [PATCH 3188/4122] powerpc/qspinlock: add ability to prod new queue head CPU After the head of the queue acquires the lock, it releases the next waiter in the queue to become the new head. Add an option to prod the new head if its vCPU was preempted. This may only have an effect if queue waiters are yielding. Disable this option by default for now, i.e., no logical change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-12-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index c45f30c9a19e..2f6c0bed25ea 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -12,6 +12,7 @@ struct qnode { struct qnode *next; struct qspinlock *lock; + int cpu; int yield_cpu; u8 locked; /* 1 if lock acquired */ }; @@ -30,6 +31,7 @@ static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -370,10 +372,11 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node = &qnodesp->nodes[idx]; node->next = NULL; node->lock = lock; + node->cpu = smp_processor_id(); node->yield_cpu = -1; node->locked = 0; - tail = encode_tail_cpu(smp_processor_id()); + tail = encode_tail_cpu(node->cpu); old = publish_tail_cpu(lock, tail); @@ -439,7 +442,14 @@ again: * this store to locked. The corresponding barrier is the smp_rmb() * acquire barrier for mcs lock, above. */ - WRITE_ONCE(next->locked, 1); + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + } release: qnodesp->count--; /* release the node */ @@ -590,6 +600,22 @@ static int pv_yield_propagate_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); @@ -599,6 +625,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); } return 0; From f61ab43cc1a6146d6eef7e0713a452c3677ad13e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:27 +1000 Subject: [PATCH 3189/4122] powerpc/qspinlock: allow lock stealing in trylock and lock fastpath This change allows trylock to steal the lock. It also allows the initial lock attempt to steal the lock rather than bailing out and going to the slow path. This gives trylock more strength: without this a continually-contended lock will never permit a trylock to succeed. With this change, the trylock has a small but non-zero chance. It also gives the lock fastpath most of the benefit of passing the reservation back through to the steal loop in the slow path without the complexity. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-13-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 22 ++++++++++++++++++++-- arch/powerpc/lib/qspinlock.c | 9 +++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 9572a2ef974d..93b1c976db8a 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -6,6 +6,15 @@ #include #include +/* + * The trylock itself may steal. This makes trylocks slightly stronger, and + * might make spin locks slightly more efficient when stealing. + * + * This is compile-time, so if true then there may always be stealers, so the + * nosteal paths become unused. + */ +#define _Q_SPIN_TRY_LOCK_STEAL 1 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { return READ_ONCE(lock->val); @@ -27,13 +36,14 @@ static __always_inline u32 queued_spin_encode_locked_val(void) return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); } -static __always_inline int queued_spin_trylock(struct qspinlock *lock) +static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) { u32 new = queued_spin_encode_locked_val(); u32 prev; + /* Trylock succeeds only when unlocked and no queued nodes */ asm volatile( -"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" +"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n" " cmpwi 0,%0,0 \n" " bne- 2f \n" " stwcx. %2,0,%1 \n" @@ -72,6 +82,14 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) return likely(!(prev & ~_Q_TAIL_CPU_MASK)); } +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!_Q_SPIN_TRY_LOCK_STEAL) + return __queued_spin_trylock_nosteal(lock); + else + return __queued_spin_trylock_steal(lock); +} + void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 2f6c0bed25ea..8e5b8bc3f094 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -24,7 +24,11 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else static bool maybe_stealers __read_mostly = true; +#endif static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; @@ -483,6 +487,10 @@ void pv_spinlocks_init(void) #include static int steal_spins_set(void *data, u64 val) { +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else static DEFINE_MUTEX(lock); /* @@ -507,6 +515,7 @@ static int steal_spins_set(void *data, u64 val) steal_spins = val; } mutex_unlock(&lock); +#endif return 0; } From 71c235027ce7940434acd3f553602ad8b5d36469 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:28 +1000 Subject: [PATCH 3190/4122] powerpc/qspinlock: use spin_begin/end API Use the spin_begin/spin_cpu_relax/spin_end APIs in qspinlock, which helps to prevent threads issuing a lot of expensive priority nops which may not have much effect due to immediately executing low then medium priority. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-14-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 39 ++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 8e5b8bc3f094..36aff7defda8 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -184,6 +184,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } +/* Called inside spin_begin() */ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; @@ -203,6 +204,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ + spin_end(); + /* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the @@ -218,18 +221,22 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 yield_to_preempted(owner, yield_count); if (mustq) set_mustq(lock); + spin_begin(); /* Don't relax if we yielded. Maybe we should? */ return; } + spin_begin(); relax: - cpu_relax(); + spin_cpu_relax(); } +/* Called inside spin_begin() */ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { __yield_to_locked_owner(lock, val, paravirt, false); } +/* Called inside spin_begin() */ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { bool mustq = false; @@ -267,6 +274,7 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int } } +/* Called inside spin_begin() */ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); @@ -291,14 +299,18 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto yield_prev; /* owner vcpu is running */ + spin_end(); + smp_rmb(); if (yield_cpu == node->yield_cpu) { if (node->next && node->next->yield_cpu != yield_cpu) node->next->yield_cpu = yield_cpu; yield_to_preempted(yield_cpu, yield_count); + spin_begin(); return; } + spin_begin(); yield_prev: if (!pv_yield_prev) @@ -308,15 +320,19 @@ yield_prev: if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ + spin_end(); + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); + spin_begin(); return; } + spin_begin(); relax: - cpu_relax(); + spin_cpu_relax(); } @@ -328,6 +344,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav return false; /* Attempt to steal the lock */ + spin_begin(); + do { u32 val = READ_ONCE(lock->val); @@ -335,8 +353,10 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav break; if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); if (__queued_spin_trylock_steal(lock)) return true; + spin_begin(); } else { yield_to_locked_owner(lock, val, paravirt); } @@ -344,6 +364,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav iters++; } while (iters < get_steal_spins(paravirt)); + spin_end(); + return false; } @@ -395,8 +417,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b WRITE_ONCE(prev->next, node); /* Wait for mcs node lock to be released */ + spin_begin(); while (!node->locked) yield_to_prev(lock, node, old, paravirt); + spin_end(); /* Clear out stale propagated yield_cpu */ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) @@ -407,6 +431,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b again: /* We're at the head of the waitqueue, wait for the lock. */ + spin_begin(); for (;;) { val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) @@ -424,6 +449,7 @@ again: val |= _Q_MUST_Q_VAL; } } + spin_end(); /* If we're the last queued, must clean up the tail. */ old = trylock_clean_tail(lock, tail); @@ -436,8 +462,13 @@ again: goto release; /* We were the tail, no next. */ /* There is a next, must wait for node->next != NULL (MCS protocol) */ - while (!(next = READ_ONCE(node->next))) - cpu_relax(); + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } /* * Unlock the next mcs waiter node. Release barrier is not required From cc79701114154efe79663ba47d9e51aad2ed3c78 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:29 +1000 Subject: [PATCH 3191/4122] powerpc/qspinlock: reduce remote node steal spins Allow for a reduction in the number of times a CPU from a different node than the owner can attempt to steal the lock before queueing. This could bias the transfer behaviour of the lock across the machine and reduce NUMA crossings. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-15-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 43 +++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 36aff7defda8..8c6b5ef87118 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); #if _Q_SPIN_TRY_LOCK_STEAL == 1 static const bool maybe_stealers = true; #else @@ -44,6 +46,11 @@ static __always_inline int get_steal_spins(bool paravirt) return steal_spins; } +static __always_inline int get_remote_steal_spins(bool paravirt) +{ + return remote_steal_spins; +} + static __always_inline int get_head_spins(bool paravirt) { return head_spins; @@ -335,10 +342,24 @@ relax: spin_cpu_relax(); } +static __always_inline bool steal_break(u32 val, int iters, bool paravirt) +{ + if (iters >= get_steal_spins(paravirt)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { int iters = 0; + u32 val; if (!steal_spins) return false; @@ -347,8 +368,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav spin_begin(); do { - u32 val = READ_ONCE(lock->val); - + val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; @@ -362,7 +382,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav } iters++; - } while (iters < get_steal_spins(paravirt)); + } while (!steal_break(val, iters, paravirt)); spin_end(); @@ -560,6 +580,22 @@ static int steal_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + static int head_spins_set(void *data, u64 val) { head_spins = val; @@ -659,6 +695,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, " static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); From 39dfc73596b48bb50cf7e4f3f54e38427dda5b4e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:30 +1000 Subject: [PATCH 3192/4122] powerpc/qspinlock: allow indefinite spinning on a preempted owner Provide an option that holds off queueing indefinitely while the lock owner is preempted. This could reduce queueing latencies for very overcommitted vcpu situations. This is disabled by default. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-16-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 77 +++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 8c6b5ef87118..eeaaecfd5b77 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -35,6 +35,7 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; @@ -191,11 +192,12 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -/* Called inside spin_begin() */ -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; u32 yield_count; + bool preempted = false; BUG_ON(!(val & _Q_LOCKED_VAL)); @@ -213,6 +215,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + preempted = true; + /* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the @@ -229,29 +233,32 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 if (mustq) set_mustq(lock); spin_begin(); + /* Don't relax if we yielded. Maybe we should? */ - return; + return preempted; } spin_begin(); relax: spin_cpu_relax(); + + return preempted; } -/* Called inside spin_begin() */ -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { - __yield_to_locked_owner(lock, val, paravirt, false); + return __yield_to_locked_owner(lock, val, paravirt, false); } -/* Called inside spin_begin() */ -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { bool mustq = false; if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) mustq = true; - __yield_to_locked_owner(lock, val, paravirt, mustq); + return __yield_to_locked_owner(lock, val, paravirt, mustq); } static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) @@ -361,13 +368,16 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav int iters = 0; u32 val; - if (!steal_spins) + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ return false; + } /* Attempt to steal the lock */ spin_begin(); - do { + bool preempted = false; + val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; @@ -378,10 +388,23 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav return true; spin_begin(); } else { - yield_to_locked_owner(lock, val, paravirt); + preempted = yield_to_locked_owner(lock, val, paravirt); } - iters++; + if (preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } } while (!steal_break(val, iters, paravirt)); spin_end(); @@ -453,15 +476,22 @@ again: /* We're at the head of the waitqueue, wait for the lock. */ spin_begin(); for (;;) { + bool preempted; + val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); - yield_head_to_locked_owner(lock, val, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; - iters++; + if (preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } if (!mustq && iters >= get_head_spins(paravirt)) { mustq = true; @@ -644,6 +674,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -700,6 +746,7 @@ static __init int spinlock_debugfs_init(void) if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); From 12b459a5ebf3308e718bc1dd48acb7c4cf7f1a75 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:31 +1000 Subject: [PATCH 3193/4122] powerpc/qspinlock: provide accounting and options for sleepy locks Finding the owner or a queued waiter on a lock with a preempted vcpu is indicative of an oversubscribed guest causing the lock to get into trouble. Provide some options to detect this situation and have new CPUs avoid queueing for a longer time (more steal iterations) to minimise the problems caused by vcpu preemption on the queue. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-17-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock_types.h | 7 +- arch/powerpc/lib/qspinlock.c | 242 +++++++++++++++++++-- 2 files changed, 230 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index adfeed4aa495..4766a7aa03cb 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -30,7 +30,7 @@ typedef struct qspinlock { * * 0: locked bit * 1-14: lock holder cpu - * 15: unused bit + * 15: lock owner or queuer vcpus observed to be preempted bit * 16: must queue bit * 17-31: tail cpu (+1) */ @@ -50,6 +50,11 @@ typedef struct qspinlock { #error "qspinlock does not support such large CONFIG_NR_CPUS" #endif +/* 0x00008000 */ +#define _Q_SLEEPY_OFFSET 15 +#define _Q_SLEEPY_BITS 1 +#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET) + /* 0x00010000 */ #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index eeaaecfd5b77..0f33a07c1d19 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -36,25 +37,56 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); -static __always_inline int get_steal_spins(bool paravirt) +static __always_inline bool recently_sleepy(void) { - return steal_spins; + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; } -static __always_inline int get_remote_steal_spins(bool paravirt) +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) { - return remote_steal_spins; + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; } -static __always_inline int get_head_spins(bool paravirt) +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) { - return head_spins; + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; } static inline u32 encode_tail_cpu(int cpu) @@ -168,6 +200,56 @@ static __always_inline u32 clear_mustq(struct qspinlock *lock) return prev; } +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (val & _Q_LOCKED_VAL) { + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } + } +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -215,6 +297,7 @@ static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + seen_sleepy_owner(lock, val); preempted = true; /* @@ -289,11 +372,12 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int } /* Called inside spin_begin() */ -static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); u32 yield_count; int yield_cpu; + bool preempted = false; if (!paravirt) goto relax; @@ -315,6 +399,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_end(); + preempted = true; + seen_sleepy_node(lock, val); + smp_rmb(); if (yield_cpu == node->yield_cpu) { @@ -322,7 +409,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * node->next->yield_cpu = yield_cpu; yield_to_preempted(yield_cpu, yield_count); spin_begin(); - return; + return preempted; } spin_begin(); @@ -336,26 +423,31 @@ yield_prev: spin_end(); + preempted = true; + seen_sleepy_node(lock, val); + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); spin_begin(); - return; + return preempted; } spin_begin(); relax: spin_cpu_relax(); + + return preempted; } -static __always_inline bool steal_break(u32 val, int iters, bool paravirt) +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) { - if (iters >= get_steal_spins(paravirt)) + if (iters >= get_steal_spins(paravirt, sleepy)) return true; if (IS_ENABLED(CONFIG_NUMA) && - (iters >= get_remote_steal_spins(paravirt))) { + (iters >= get_remote_steal_spins(paravirt, sleepy))) { int cpu = get_owner_cpu(val); if (numa_node_id() != cpu_to_node(cpu)) return true; @@ -365,6 +457,8 @@ static __always_inline bool steal_break(u32 val, int iters, bool paravirt) static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { + bool seen_preempted = false; + bool sleepy = false; int iters = 0; u32 val; @@ -391,7 +485,25 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav preempted = yield_to_locked_owner(lock, val, paravirt); } + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + if (preempted) { + seen_preempted = true; + sleepy = true; if (!pv_spin_on_preempted_owner) iters++; /* @@ -405,7 +517,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav } else { iters++; } - } while (!steal_break(val, iters, paravirt)); + } while (!steal_break(val, iters, paravirt, sleepy)); spin_end(); @@ -417,6 +529,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b struct qnodes *qnodesp; struct qnode *next, *node; u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; bool mustq = false; int idx; int set_yield_cpu = -1; @@ -461,8 +575,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ spin_begin(); - while (!node->locked) - yield_to_prev(lock, node, old, paravirt); + while (!node->locked) { + if (yield_to_prev(lock, node, old, paravirt)) + seen_preempted = true; + } spin_end(); /* Clear out stale propagated yield_cpu */ @@ -472,8 +588,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b smp_rmb(); /* acquire barrier for the mcs lock */ } -again: /* We're at the head of the waitqueue, wait for the lock. */ +again: spin_begin(); for (;;) { bool preempted; @@ -482,18 +598,40 @@ again: if (!(val & _Q_LOCKED_VAL)) break; + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; - if (preempted) { + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + if (!pv_spin_on_preempted_owner) iters++; } else { iters++; } - if (!mustq && iters >= get_head_spins(paravirt)) { + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { mustq = true; set_mustq(lock); val |= _Q_MUST_Q_VAL; @@ -690,6 +828,70 @@ static int pv_spin_on_preempted_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -747,6 +949,10 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); From 0b2199841a7952d01a717b465df028b40b2cf3e9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:32 +1000 Subject: [PATCH 3194/4122] powerpc/qspinlock: add compile-time tuning adjustments This adds compile-time options that allow the EH lock hint bit to be enabled or disabled, and adds some new options that may or may not help matters. To help with experimentation and tuning. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-18-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 61 ++++++++++++++++++++++++++-- arch/powerpc/lib/qspinlock.c | 39 ++++++++++++++++-- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 93b1c976db8a..28a53fb69b38 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -6,15 +6,68 @@ #include #include +#ifdef CONFIG_PPC64 +/* + * Use the EH=1 hint for accesses that result in the lock being acquired. + * The hardware is supposed to optimise this pattern by holding the lock + * cacheline longer, and releasing when a store to the same memory (the + * unlock) is performed. + */ +#define _Q_SPIN_EH_HINT 1 +#else +#define _Q_SPIN_EH_HINT 0 +#endif + /* * The trylock itself may steal. This makes trylocks slightly stronger, and - * might make spin locks slightly more efficient when stealing. + * makes locks slightly more efficient when stealing. * * This is compile-time, so if true then there may always be stealers, so the * nosteal paths become unused. */ #define _Q_SPIN_TRY_LOCK_STEAL 1 +/* + * Put a speculation barrier after testing the lock/node and finding it + * busy. Try to prevent pointless speculation in slow paths. + * + * Slows down the lockstorm microbenchmark with no stealing, where locking + * is purely FIFO through the queue. May have more benefit in real workload + * where speculating into the wrong place could have a greater cost. + */ +#define _Q_SPIN_SPEC_BARRIER 0 + +#ifdef CONFIG_PPC64 +/* + * Execute a miso instruction after passing the MCS lock ownership to the + * queue head. Miso is intended to make stores visible to other CPUs sooner. + * + * This seems to make the lockstorm microbenchmark nospin test go slightly + * faster on POWER10, but disable for now. + */ +#define _Q_SPIN_MISO 0 +#else +#define _Q_SPIN_MISO 0 +#endif + +#ifdef CONFIG_PPC64 +/* + * This executes miso after an unlock of the lock word, having ownership + * pass to the next CPU sooner. This will slow the uncontended path to some + * degree. Not evidence it helps yet. + */ +#define _Q_SPIN_MISO_UNLOCK 0 +#else +#define _Q_SPIN_MISO_UNLOCK 0 +#endif + +/* + * Seems to slow down lockstorm microbenchmark, suspect queue node just + * has to become shared again right afterwards when its waiter spins on + * the lock field. + */ +#define _Q_SPIN_PREFETCH_NEXT 0 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { return READ_ONCE(lock->val); @@ -52,7 +105,7 @@ static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) "2: \n" : "=&r" (prev) : "r" (&lock->val), "r" (new), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return likely(prev == 0); @@ -76,7 +129,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) "2: \n" : "=&r" (prev), "=&r" (tmp) : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return likely(!(prev & ~_Q_TAIL_CPU_MASK)); @@ -101,6 +154,8 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { smp_store_release(&lock->locked, 0); + if (_Q_SPIN_MISO_UNLOCK) + asm volatile("miso" ::: "memory"); } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 0f33a07c1d19..1cf5d3e75250 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -48,6 +48,12 @@ static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + static __always_inline bool recently_sleepy(void) { /* pv_sleepy_lock is true when this is called */ @@ -137,7 +143,7 @@ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) : "r" (&lock->val), "r"(tail), "r" (newval), "i" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return prev; @@ -475,6 +481,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; + spec_barrier(); if (unlikely(!(val & _Q_LOCKED_VAL))) { spin_end(); @@ -540,6 +547,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b qnodesp = this_cpu_ptr(&qnodes); if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); while (!queued_spin_trylock(lock)) cpu_relax(); return; @@ -576,9 +584,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ spin_begin(); while (!node->locked) { + spec_barrier(); + if (yield_to_prev(lock, node, old, paravirt)) seen_preempted = true; } + spec_barrier(); spin_end(); /* Clear out stale propagated yield_cpu */ @@ -586,6 +597,17 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node->yield_cpu = -1; smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } } /* We're at the head of the waitqueue, wait for the lock. */ @@ -597,6 +619,7 @@ again: val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; + spec_barrier(); if (paravirt && pv_sleepy_lock && maybe_stealers) { if (!sleepy) { @@ -637,6 +660,7 @@ again: val |= _Q_MUST_Q_VAL; } } + spec_barrier(); spin_end(); /* If we're the last queued, must clean up the tail. */ @@ -657,6 +681,7 @@ again: cpu_relax(); spin_end(); } + spec_barrier(); /* * Unlock the next mcs waiter node. Release barrier is not required @@ -668,10 +693,14 @@ again: if (paravirt && pv_prod_head) { int next_cpu = next->cpu; WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); if (vcpu_is_preempted(next_cpu)) prod_cpu(next_cpu); } else { WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); } release: @@ -686,12 +715,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock) * is passed as the paravirt argument to the functions. */ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { - if (try_to_steal_lock(lock, true)) + if (try_to_steal_lock(lock, true)) { + spec_barrier(); return; + } queued_spin_lock_mcs_queue(lock, true); } else { - if (try_to_steal_lock(lock, false)) + if (try_to_steal_lock(lock, false)) { + spec_barrier(); return; + } queued_spin_lock_mcs_queue(lock, false); } } From c28c15b6d28a776538482101522cbcd9f906b15c Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Wed, 9 Nov 2022 15:51:11 +1100 Subject: [PATCH 3195/4122] powerpc/code-patching: Use temporary mm for Radix MMU x86 supports the notion of a temporary mm which restricts access to temporary PTEs to a single CPU. A temporary mm is useful for situations where a CPU needs to perform sensitive operations (such as patching a STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing said mappings to other CPUs. Another benefit is that other CPU TLBs do not need to be flushed when the temporary mm is torn down. Mappings in the temporary mm can be set in the userspace portion of the address-space. Interrupts must be disabled while the temporary mm is in use. HW breakpoints, which may have been set by userspace as watchpoints on addresses now within the temporary mm, are saved and disabled when loading the temporary mm. The HW breakpoints are restored when unloading the temporary mm. All HW breakpoints are indiscriminately disabled while the temporary mm is in use - this may include breakpoints set by perf. Use the `poking_init` init hook to prepare a temporary mm and patching address. Initialize the temporary mm using mm_alloc(). Choose a randomized patching address inside the temporary mm userspace address space. The patching address is randomized between PAGE_SIZE and DEFAULT_MAP_WINDOW-PAGE_SIZE. Bits of entropy with 64K page size on BOOK3S_64: bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE) PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB bits of entropy = log2(128TB / 64K) bits of entropy = 31 The upper limit is DEFAULT_MAP_WINDOW due to how the Book3s64 Hash MMU operates - by default the space above DEFAULT_MAP_WINDOW is not available. Currently the Hash MMU does not use a temporary mm so technically this upper limit isn't necessary; however, a larger randomization range does not further "harden" this overall approach and future work may introduce patching with a temporary mm on Hash as well. Randomization occurs only once during initialization for each CPU as it comes online. The patching page is mapped with PAGE_KERNEL to set EAA[0] for the PTE which ignores the AMR (so no need to unlock/lock KUAP) according to PowerISA v3.0b Figure 35 on Radix. Based on x86 implementation: commit 4fc19708b165 ("x86/alternatives: Initialize temporary mm for patching") and: commit b3fd8e83ada0 ("x86/alternatives: Use temporary mm for text poking") From: Benjamin Gray Synchronisation is done according to ISA 3.1B Book 3 Chapter 13 "Synchronization Requirements for Context Alterations". Switching the mm is a change to the PID, which requires a CSI before and after the change, and a hwsync between the last instruction that performs address translation for an associated storage access. Instruction fetch is an associated storage access, but the instruction address mappings are not being changed, so it should not matter which context they use. We must still perform a hwsync to guard arbitrary prior code that may have accessed a userspace address. TLB invalidation is local and VA specific. Local because only this core used the patching mm, and VA specific because we only care that the writable mapping is purged. Leaving the other mappings intact is more efficient, especially when performing many code patches in a row (e.g., as ftrace would). Signed-off-by: Christopher M. Riedl Signed-off-by: Benjamin Gray [mpe: Use mm_alloc() per 107b6828a7cd ("x86/mm: Use mm_alloc() in poking_init()")] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-9-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 177 ++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3055eef7dcdc..a1902241ff5d 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -4,12 +4,17 @@ */ #include +#include +#include #include #include #include #include #include +#include +#include +#include #include #include #include @@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) } #ifdef CONFIG_STRICT_KERNEL_RWX + static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); +static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); +static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} + +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; @@ -79,14 +127,86 @@ static int text_area_cpu_down(unsigned int cpu) return 0; } +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) +{ + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} + +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_mm, mm); + this_cpu_write(cpu_patching_addr, addr); + this_cpu_write(cpu_patching_pte, pte); + + return 0; + +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; +} + +static int text_area_cpu_down_mm(unsigned int cpu) +{ + put_patching_mm(this_cpu_read(cpu_patching_mm), + this_cpu_read(cpu_patching_addr)); + + this_cpu_write(cpu_patching_mm, NULL); + this_cpu_write(cpu_patching_addr, 0); + this_cpu_write(cpu_patching_pte, NULL); + + return 0; +} + static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); void __init poking_init(void) { - int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", - text_area_cpu_up, - text_area_cpu_down); + int ret; + + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); + else + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); /* cpuhp_setup_state returns >= 0 on success */ if (WARN_ON(ret < 0)) @@ -148,6 +268,50 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } +static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + + patching_mm = __this_cpu_read(cpu_patching_mm); + pte = __this_cpu_read(cpu_patching_pte); + text_poke_addr = __this_cpu_read(cpu_patching_addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instruction(addr, instr, patch_addr); + + /* hwsync performed by __patch_instruction (sync) if successful */ + if (err) + mb(); /* sync */ + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + return err; +} + static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; @@ -187,7 +351,10 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return raw_patch_instruction(addr, instr); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + if (mm_patch_enabled()) + err = __do_patch_instruction_mm(addr, instr); + else + err = __do_patch_instruction(addr, instr); local_irq_restore(flags); return err; From 2f228ee1ade5d8d1f26cf94863a36c5693023c58 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:12 +1100 Subject: [PATCH 3196/4122] powerpc/code-patching: Consolidate and cache per-cpu patching context With the temp mm context support, there are CPU local variables to hold the patch address and pte. Use these in the non-temp mm path as well instead of adding a level of indirection through the text_poke_area vm_struct and pointer chasing the pte. As both paths use these fields now, there is no need to let unreferenced variables be dropped by the compiler, so it is cleaner to merge them into a single context struct. This has the additional benefit of removing a redundant CPU local pointer, as only one of cpu_patching_mm / text_poke_area is ever used, while remaining well-typed. It also groups each CPU's data into a single cacheline. Signed-off-by: Benjamin Gray [mpe: Shorten name to 'area' as suggested by Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-10-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 49 +++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index a1902241ff5d..5b8f87db1217 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -48,10 +48,16 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); -static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); -static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); -static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); @@ -116,14 +122,19 @@ static int text_area_cpu_up(unsigned int cpu) unmap_patch_area(addr); - this_cpu_write(text_poke_area, area); + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -167,9 +178,9 @@ static int text_area_cpu_up_mm(unsigned int cpu) goto fail_no_pte; pte_unmap_unlock(pte, ptl); - this_cpu_write(cpu_patching_mm, mm); - this_cpu_write(cpu_patching_addr, addr); - this_cpu_write(cpu_patching_pte, pte); + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, pte); return 0; @@ -181,12 +192,12 @@ fail_no_mm: static int text_area_cpu_down_mm(unsigned int cpu) { - put_patching_mm(this_cpu_read(cpu_patching_mm), - this_cpu_read(cpu_patching_addr)); + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); - this_cpu_write(cpu_patching_mm, NULL); - this_cpu_write(cpu_patching_addr, 0); - this_cpu_write(cpu_patching_pte, NULL); + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -278,9 +289,9 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) struct mm_struct *patching_mm; struct mm_struct *orig_mm; - patching_mm = __this_cpu_read(cpu_patching_mm); - pte = __this_cpu_read(cpu_patching_pte); - text_poke_addr = __this_cpu_read(cpu_patching_addr); + patching_mm = __this_cpu_read(cpu_patching_context.mm); + pte = __this_cpu_read(cpu_patching_context.pte); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); @@ -320,10 +331,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) pte_t *pte; unsigned long pfn = get_patch_pfn(addr); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); - pte = virt_to_kpte(text_poke_addr); + pte = __this_cpu_read(cpu_patching_context.pte); __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) From f9231a996e229c13d23f907352c2cea84bd1c30a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:36 +1000 Subject: [PATCH 3197/4122] module: add module_elf_check_arch for module-specific checks The elf_check_arch() function is also used to test compatibility of usermode binaries. Kernel modules may have more specific requirements, for example powerpc would like to test for ABI version compatibility. Add a weak module_elf_check_arch() that defaults to true, and call it from elf_validity_check(). Signed-off-by: Jessica Yu [np: added changelog, adjust name, rebase] Acked-by: Luis Chamberlain Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-2-npiggin@gmail.com --- include/linux/moduleloader.h | 3 +++ kernel/module/main.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 9e09d11ffe5b..7b4587a19189 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -13,6 +13,9 @@ * must be implemented by each architecture. */ +/* arch may override to do additional checking of ELF header architecture */ +bool module_elf_check_arch(Elf_Ehdr *hdr); + /* Adjust arch-specific sections. Return 0 on success. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/kernel/module/main.c b/kernel/module/main.c index d02d39c7174e..7b3f6fb0d428 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1674,6 +1674,11 @@ static int elf_validity_check(struct load_info *info) info->hdr->e_machine); goto no_exec; } + if (!module_elf_check_arch(info->hdr)) { + pr_err("Invalid module architecture in ELF header: %u\n", + info->hdr->e_machine); + goto no_exec; + } if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { pr_err("Invalid ELF section header size\n"); goto no_exec; @@ -2247,6 +2252,11 @@ static void flush_module_icache(const struct module *mod) (unsigned long)mod->core_layout.base + mod->core_layout.size); } +bool __weak module_elf_check_arch(Elf_Ehdr *hdr) +{ + return true; +} + int __weak module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstrings, From de3d098dd1fc635535e3689c5d4aa0684242adde Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:37 +1000 Subject: [PATCH 3198/4122] powerpc/64: Add module check for ELF ABI version Override the generic module ELF check to provide a check for the ELF ABI version. This becomes important if we allow big-endian ELF ABI V2 builds but it doesn't hurt to check now. Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-3-npiggin@gmail.com --- arch/powerpc/kernel/module_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7e45dc98df8a..ff045644f13f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -31,6 +31,16 @@ this, and makes other things simpler. Anton? --RR. */ +bool module_elf_check_arch(Elf_Ehdr *hdr) +{ + unsigned long abi_level = hdr->e_flags & 0x3; + + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + return abi_level == 2; + else + return abi_level < 2; +} + #ifdef CONFIG_PPC64_ELF_ABI_V2 static func_desc_t func_desc(unsigned long addr) From 505ea33089dcfc3ee3201b0fcb94751165805413 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:38 +1000 Subject: [PATCH 3199/4122] powerpc/64: Add big-endian ELFv2 flavour to crypto VMX asm generation This allows asm generation for big-endian ELFv2 builds. Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-4-npiggin@gmail.com --- drivers/crypto/vmx/Makefile | 12 +++++++++++- drivers/crypto/vmx/ppc-xlate.pl | 10 ++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index 2560cfea1dec..e33c7238e7f8 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -2,8 +2,18 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o +ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +override flavour := linux-ppc64le +else +ifdef CONFIG_PPC64_ELF_ABI_V2 +override flavour := linux-ppc64-elfv2 +else +override flavour := linux-ppc64 +endif +endif + quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ + cmd_perl = $(PERL) $< $(flavour) > $@ targets += aesp8-ppc.S ghashp8-ppc.S diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl index 36db2ef09e5b..b583898c11ae 100644 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ b/drivers/crypto/vmx/ppc-xlate.pl @@ -9,6 +9,8 @@ open STDOUT,">$output" || die "can't open $output: $!"; my %GLOBALS; my $dotinlocallabels=($flavour=~/linux/)?1:0; +my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0; +my $dotfunctions=($elfv2abi=~1)?0:1; ################################################################ # directives which need special treatment on different platforms @@ -40,7 +42,7 @@ my $globl = sub { }; my $text = sub { my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret = ".abiversion 2\n".$ret if ($elfv2abi); $ret; }; my $machine = sub { @@ -56,8 +58,8 @@ my $size = sub { if ($flavour =~ /linux/) { shift; my $name = shift; $name =~ s|^[\.\_]||; - my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; - $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + my $ret = ".size $name,.-".($dotfunctions?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($dotfunctions); $ret; } else @@ -142,7 +144,7 @@ my $vmr = sub { # Some ABIs specify vrsave, special-purpose register #256, as reserved # for system use. -my $no_vrsave = ($flavour =~ /linux-ppc64le/); +my $no_vrsave = ($elfv2abi); my $mtspr = sub { my ($f,$idx,$ra) = @_; if ($idx == 256 && $no_vrsave) { From 5017b45946722bdd20ac255c9ae7273b78d1f12e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:39 +1000 Subject: [PATCH 3200/4122] powerpc/64: Option to build big-endian with ELFv2 ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide an option to build big-endian kernels using the ELFv2 ABI. This works on GCC only for now. Clang is rumored to support this, but core build files need updating first, at least. This gives big-endian kernels useful advantages of the ELFv2 ABI, e.g., less stack usage, -mprofile-kernel support, better compatibility with eBPF tools. BE+ELFv2 is not officially supported by the GNU toolchain, but it works fine in testing and has been used by some userspace for some time (e.g., Void Linux). Tested-by: Michal Suchánek Reviewed-by: Segher Boessenkool Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-5-npiggin@gmail.com --- arch/powerpc/Kconfig | 21 +++++++++++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index abaf1ef1795c..1a134c9769f8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 source "arch/powerpc/platforms/Kconfig.cputype" +config CC_HAS_ELFV2 + def_bool PPC64 && $(cc-option, -mabi=elfv2) + config 32BIT bool default y if PPC32 @@ -586,6 +589,24 @@ config KEXEC_FILE config ARCH_HAS_KEXEC_PURGATORY def_bool KEXEC_FILE +config PPC64_BIG_ENDIAN_ELF_ABI_V2 + bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)" + depends on PPC64 && CPU_BIG_ENDIAN + depends on CC_HAS_ELFV2 + depends on LD_IS_BFD && LD_VERSION >= 22400 + default n + help + This builds the kernel image using the "Power Architecture 64-Bit ELF + V2 ABI Specification", which has a reduced stack overhead and faster + function calls. This internal kernel ABI option does not affect + userspace compatibility. + + The V2 ABI is standard for 64-bit little-endian, but for big-endian + it is less well tested by kernel and toolchain. However some distros + build userspace this way, and it can produce a functioning kernel. + + This requires GCC and binutils 2.24 or newer. + config RELOCATABLE bool "Build a relocatable kernel" depends on PPC64 || (FLATMEM && (44x || PPC_85xx)) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7bac213b4125..9563336e3348 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -580,10 +580,10 @@ config CPU_LITTLE_ENDIAN endchoice config PPC64_ELF_ABI_V1 - def_bool PPC64 && CPU_BIG_ENDIAN + def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2) config PPC64_ELF_ABI_V2 - def_bool PPC64 && CPU_LITTLE_ENDIAN + def_bool PPC64 && !PPC64_ELF_ABI_V1 config PPC64_BOOT_WRAPPER def_bool n From d6aee468e4ecbfec46a3eafae4d31d6efc0d4da4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:27 +1000 Subject: [PATCH 3201/4122] powerpc/64: Remove asm interrupt tracing call helpers These are now unused. Remove. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-3-npiggin@gmail.com --- arch/powerpc/include/asm/irqflags.h | 58 ----------------------------- 1 file changed, 58 deletions(-) diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1a6c1ce17735..47d46712928a 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -11,64 +11,6 @@ */ #include -#else -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_IRQSOFF_TRACER -/* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ -#define TRACE_WITH_FRAME_BUFFER(func) \ - mflr r0; \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - std r0, 16(r1); \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - bl func; \ - ld r1, 0(r1); \ - ld r1, 0(r1); -#else -#define TRACE_WITH_FRAME_BUFFER(func) \ - bl func; -#endif - -/* - * These are calls to C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) -#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) - -/* - * This is used by assembly code to soft-disable interrupts first and - * reconcile irq state. - * - * NB: This may call C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQSOFTMASK(r13); \ - lbz __rB,PACAIRQHAPPENED(r13); \ - andi. __rA,__rA,IRQS_DISABLED; \ - li __rA,IRQS_DISABLED; \ - ori __rB,__rB,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQHAPPENED(r13); \ - bne 44f; \ - stb __rA,PACAIRQSOFTMASK(r13); \ - TRACE_DISABLE_INTS; \ -44: - -#else -#define TRACE_ENABLE_INTS -#define TRACE_DISABLE_INTS - -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,IRQS_DISABLED; \ - ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQSOFTMASK(r13); \ - stb __rA,PACAIRQHAPPENED(r13) -#endif #endif #endif From 32c5209214bd8d4f8c4e9d9b630ef4c671f58e79 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:28 +1000 Subject: [PATCH 3202/4122] powerpc/perf: callchain validate kernel stack pointer bounds The interrupt frame detection and loads from the hypothetical pt_regs are not bounds-checked. The next-frame validation only bounds-checks STACK_FRAME_OVERHEAD, which does not include the pt_regs. Add another test for this. The user could set r1 to be equal to the address matching the first interrupt frame - STACK_INT_FRAME_SIZE, which is in the previous page due to the kernel redzone, and induce the kernel to load the marker from there. Possibly this could cause a crash at least. If the user could induce the previous page to contain a valid marker, then it might be able to direct perf to read specific memory addresses in a way that could be transmitted back to the user in the perf data. Fixes: 20002ded4d93 ("perf_counter: powerpc: Add callchain support") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-4-npiggin@gmail.com --- arch/powerpc/perf/callchain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 082f6d0308a4..8718289c051d 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -61,6 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re next_sp = fp[0]; if (next_sp == sp + STACK_INT_FRAME_SIZE && + validate_sp(sp, current, STACK_INT_FRAME_SIZE) && fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an From bc0677363d0ffaec0c56685291e97b080116976c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:29 +1000 Subject: [PATCH 3203/4122] powerpc: Rearrange copy_thread child stack creation This makes it a bit clearer where the stack frame is created, and will allow easier use of some of the stack offset constants in a later change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-5-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5265da2d8034..f93703ea4a12 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1755,13 +1755,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) klp_init_thread_info(p); + /* Create initial stack frame. */ + sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + ((unsigned long *)sp)[0] = 0; + /* Copy registers */ - sp -= sizeof(struct pt_regs); - childregs = (struct pt_regs *) sp; + childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); if (unlikely(args->fn)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + sizeof(struct pt_regs); + childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); /* function */ if (args->fn) childregs->gpr[14] = ppc_function_entry((void *)args->fn); @@ -1796,7 +1799,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); - sp -= STACK_FRAME_OVERHEAD; /* * The way this works is that at some point in the future @@ -1806,7 +1808,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - ((unsigned long *)sp)[0] = 0; sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; From baa49d81a94bb4170e7f2f4d97016772117d0f60 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:30 +1000 Subject: [PATCH 3204/4122] powerpc/pseries: hvcall stack frame overhead This call may use the min size stack frame. The scratch space used is in the caller's parameter area frame, not this function's frame. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-6-npiggin@gmail.com --- arch/powerpc/platforms/pseries/hvCall.S | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 762eb15d3bd4..783c16ad648b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -27,7 +27,9 @@ hcall_tracepoint_refcount: /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. STK_PARAM() in the caller's + * frame will be available even on ELFv2 because these are all + * variadic functions. */ #define HCALL_INST_PRECALL(FIRST_REG) \ mflr r0; \ @@ -41,29 +43,29 @@ hcall_tracepoint_refcount: std r10,STK_PARAM(R10)(r1); \ std r0,16(r1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + stdu r1,-STACK_FRAME_MIN_SIZE(r1); \ bl __trace_hcall_entry; \ - ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ - ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ - ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ - ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ - ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ - ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) + ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ - ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ mr r4,r3; \ mr r3,r0; \ bl __trace_hcall_exit; \ - ld r0,STACK_FRAME_OVERHEAD+16(r1); \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,STACK_FRAME_MIN_SIZE+16(r1); \ + addi r1,r1,STACK_FRAME_MIN_SIZE; \ ld r3,STK_PARAM(R3)(r1); \ mtlr r0 @@ -303,14 +305,14 @@ plpar_hcall9_trace: mr r7,r8 mr r8,r9 mr r9,r10 - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) - ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1) HVSC mr r0,r12 - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) From 37195b820d32c23bdefce3f460ed7de48a57e5e4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:31 +1000 Subject: [PATCH 3205/4122] powerpc: simplify ppc_save_regs Adjust the pt_regs pointer so the interrupt frame offsets can be used to save registers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-7-npiggin@gmail.com --- arch/powerpc/kernel/ppc_save_regs.S | 57 ++++++++--------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 2d4d21bb46a9..6e86f3bf4673 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -21,60 +21,33 @@ * different ABIs, though). */ _GLOBAL(ppc_save_regs) - PPC_STL r0,0*SZL(r3) + /* This allows stack frame accessor macros and offsets to be used */ + subi r3,r3,STACK_FRAME_OVERHEAD + PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 - stmw r2, 2*SZL(r3) + stmw r2,GPR2(r3) #else - PPC_STL r2,2*SZL(r3) - PPC_STL r3,3*SZL(r3) - PPC_STL r4,4*SZL(r3) - PPC_STL r5,5*SZL(r3) - PPC_STL r6,6*SZL(r3) - PPC_STL r7,7*SZL(r3) - PPC_STL r8,8*SZL(r3) - PPC_STL r9,9*SZL(r3) - PPC_STL r10,10*SZL(r3) - PPC_STL r11,11*SZL(r3) - PPC_STL r12,12*SZL(r3) - PPC_STL r13,13*SZL(r3) - PPC_STL r14,14*SZL(r3) - PPC_STL r15,15*SZL(r3) - PPC_STL r16,16*SZL(r3) - PPC_STL r17,17*SZL(r3) - PPC_STL r18,18*SZL(r3) - PPC_STL r19,19*SZL(r3) - PPC_STL r20,20*SZL(r3) - PPC_STL r21,21*SZL(r3) - PPC_STL r22,22*SZL(r3) - PPC_STL r23,23*SZL(r3) - PPC_STL r24,24*SZL(r3) - PPC_STL r25,25*SZL(r3) - PPC_STL r26,26*SZL(r3) - PPC_STL r27,27*SZL(r3) - PPC_STL r28,28*SZL(r3) - PPC_STL r29,29*SZL(r3) - PPC_STL r30,30*SZL(r3) - PPC_STL r31,31*SZL(r3) + SAVE_GPRS(2, 31, r3) lbz r0,PACAIRQSOFTMASK(r13) - PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,SOFTE(r3) #endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) - PPC_STL r4,1*SZL(r3) + PPC_STL r4,GPR1(r3) /* get caller's LR */ PPC_LL r0,LRSAVE(r4) - PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK(r3) mflr r0 - PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_NIP(r3) mfmsr r0 - PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_MSR(r3) mfctr r0 - PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CTR(r3) mfxer r0 - PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_XER(r3) mfcr r0 - PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CCR(r3) li r0,0 - PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) - PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_TRAP(r3) + PPC_STL r0,ORIG_GPR3(r3) blr From c03be0a3f3cc656eab5c427b78959b8f1b169a11 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:32 +1000 Subject: [PATCH 3206/4122] powerpc: add definition for pt_regs offset within an interrupt frame This is a common offset that currently uses the overloaded STACK_FRAME_OVERHEAD constant. It's easier to read and more flexible to use a specific regs offset for this. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-8-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 2 + arch/powerpc/kernel/asm-offsets.c | 7 +- arch/powerpc/kernel/entry_32.S | 6 +- arch/powerpc/kernel/exceptions-64e.S | 42 +++++----- arch/powerpc/kernel/exceptions-64s.S | 80 +++++++++---------- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_85xx.S | 4 +- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/interrupt_64.S | 22 ++--- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/optprobes_head.S | 4 +- arch/powerpc/kernel/ppc_save_regs.S | 2 +- arch/powerpc/kernel/process.c | 4 +- arch/powerpc/kernel/tm.S | 8 +- arch/powerpc/kernel/trace/ftrace_mprofile.S | 2 +- .../lib/test_emulate_step_exec_instr.S | 2 +- arch/powerpc/perf/callchain.c | 2 +- arch/powerpc/xmon/xmon.c | 7 +- 18 files changed, 100 insertions(+), 100 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 2efec6d87049..a4ae67aa9b76 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -124,6 +124,7 @@ struct pt_regs #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -143,6 +144,7 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b4b661f631f5..68905c9f7c21 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -72,7 +72,7 @@ #endif #define STACK_PT_REGS_OFFSET(sym, val) \ - DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) + DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val)) int main(void) { @@ -167,9 +167,8 @@ int main(void) OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr); - /* Local pt_regs on stack for Transactional Memory funcs. */ - DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + - sizeof(struct pt_regs) + 16); + /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */ + DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3fc7c9886bb7..24c8d84a56c9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -123,12 +123,12 @@ transfer_to_syscall: kuep_lock /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 bl system_call_exception ret_from_syscall: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 bl syscall_exit_prepare #ifdef CONFIG_PPC_47x @@ -293,7 +293,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2f68fb2ee4fc..62033d022e0a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -455,7 +455,7 @@ exc_##n##_bad_stack: \ EXCEPTION_COMMON(trapnum) \ ack(r8); \ CHECK_NAPPING(); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ + addi r3,r1,STACK_INT_FRAME_REGS; \ bl hdlr; \ b interrupt_return @@ -504,7 +504,7 @@ __end_interrupts: EXCEPTION_COMMON_CRIT(0x100) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -515,7 +515,7 @@ __end_interrupts: EXCEPTION_COMMON_MC(0x000) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception b ret_from_mc_except @@ -570,7 +570,7 @@ __end_interrupts: std r14,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) EXCEPTION_COMMON(0x700) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -586,7 +586,7 @@ __end_interrupts: beq- 1f bl load_up_fpu b fast_interrupt_return -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception b interrupt_return @@ -606,7 +606,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return @@ -616,7 +616,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception @@ -643,7 +643,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0x9f0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else @@ -664,7 +664,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -731,7 +731,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) EXCEPTION_COMMON_CRIT(0xd00) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -802,7 +802,7 @@ kernel_dbg_exc: ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) EXCEPTION_COMMON_DBG(0xd08) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -812,7 +812,7 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) CHECK_NAPPING() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS /* * XXX: Returning from performance_monitor_exception taken as a * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return @@ -834,7 +834,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2a0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -846,7 +846,7 @@ kernel_dbg_exc: GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -857,7 +857,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2e0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -866,7 +866,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -875,7 +875,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -884,7 +884,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -979,7 +979,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_page_fault b interrupt_return @@ -988,7 +988,7 @@ storage_fault_common: * continues here. */ alignment_more: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) b interrupt_return @@ -1069,7 +1069,7 @@ bad_stack_book3e: ZEROIZE_GPR(12) std r12,0(r11) LOAD_PACA_TOC() -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_bad_stack b 1b diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 651c36b056bd..29b78536ca59 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1061,7 +1061,7 @@ EXC_COMMON_BEGIN(system_reset_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl system_reset_exception /* Clear MSR_RI before setting SRR0 and SRR1. */ @@ -1208,7 +1208,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_FTR_SECTION bl machine_check_early_boot END_FTR_SECTION(0, 1) // nop out after boot @@ -1298,7 +1298,7 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception_async b interrupt_return_srr @@ -1364,14 +1364,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * This is the NMI version of the handler because we are called from * the early handler which is a true NMI. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . @@ -1422,7 +1422,7 @@ EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andis. r0,r4,DSISR_DABRMATCH@h bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU @@ -1479,7 +1479,7 @@ EXC_COMMON_BEGIN(data_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1493,7 +1493,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1525,7 +1525,7 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1567,7 +1567,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1581,7 +1581,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1635,7 +1635,7 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ BEGIN_FTR_SECTION b interrupt_return_hsrr @@ -1665,7 +1665,7 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1731,7 +1731,7 @@ EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_BODY program_check .Ldo_program_check: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1762,7 +1762,7 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 @@ -1780,7 +1780,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl fp_unavailable_tm b interrupt_return_srr #endif @@ -1824,7 +1824,7 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl timer_interrupt b interrupt_return_srr @@ -1909,7 +1909,7 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2076,7 +2076,7 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl single_step_exception b interrupt_return_srr @@ -2110,7 +2110,7 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE @@ -2139,7 +2139,7 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2162,7 +2162,7 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl emulation_assist_interrupt REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2222,7 +2222,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl hmi_exception_realmode cmpdi cr0,r3,0 bne 1f @@ -2240,7 +2240,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl handle_hmi_exception b interrupt_return_hsrr @@ -2274,7 +2274,7 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2310,7 +2310,7 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ b interrupt_return_hsrr @@ -2356,7 +2356,7 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS lbz r4,PACAIRQSOFTMASK(r13) cmpdi r4,IRQS_ENABLED bne 1f @@ -2410,14 +2410,14 @@ BEGIN_FTR_SECTION b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return_srr @@ -2458,14 +2458,14 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_exception b interrupt_return_srr @@ -2492,7 +2492,7 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2520,7 +2520,7 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ b interrupt_return_hsrr @@ -2550,7 +2550,7 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_system_error_exception b interrupt_return_hsrr @@ -2581,7 +2581,7 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl instruction_breakpoint_exception b interrupt_return_srr @@ -2703,7 +2703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2720,7 +2720,7 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_maintenance_exception b interrupt_return_hsrr @@ -2745,7 +2745,7 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) EXC_VIRT_END(altivec_assist, 0x5700, 0x100) EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC bl altivec_assist_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ @@ -2767,7 +2767,7 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_thermal_exception b interrupt_return_hsrr @@ -2800,7 +2800,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl soft_nmi_interrupt /* Clear MSR_RI before setting SRR0 and SRR1. */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c3286260a7d1..117d25330e13 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -127,7 +127,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 52c0ab416326..24f39abf81df 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -972,10 +972,10 @@ _GLOBAL(__giveup_spe) li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ beq 1f - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lwz r4,_MSR-STACK_INT_FRAME_REGS(r5) lis r3,MSR_SPE@h andc r4,r4,r3 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) + stw r4,_MSR-STACK_INT_FRAME_REGS(r5) 1: blr #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1cb9d0f7cbf2..3149ac20b18e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -99,7 +99,7 @@ END_BTB_FLUSH_SECTION mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc839..49d585eae7c8 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -78,7 +78,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r11,-16(r3) /* "regshere" marker */ @@ -99,7 +99,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) bl system_call_exception .Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -176,7 +176,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -251,7 +251,7 @@ END_BTB_FLUSH_SECTION std r12,_CCR(r1) std r3,ORIG_GPR3(r1) /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r11,-16(r3) /* "regshere" marker */ @@ -278,7 +278,7 @@ END_BTB_FLUSH_SECTION bl system_call_exception .Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -357,7 +357,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -388,7 +388,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . /* should not get here */ #else @@ -406,7 +406,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) beq interrupt_return_\srr\()_kernel interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_user_prepare cmpdi r3,0 bne- .Lrestore_nvgprs_\srr @@ -503,7 +503,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_user_restart @@ -518,7 +518,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr .balign IFETCH_ALIGN_BYTES interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_kernel_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -684,7 +684,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_kernel_restart diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 1a1e9995dae3..ebe4d1645ca1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -191,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + - STACK_FRAME_OVERHEAD); + STACK_INT_FRAME_REGS); unsigned long *ptr = gdb_regs; int reg; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cd4e7bc32609..35932f45fb4e 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -85,7 +85,7 @@ optprobe_template_op_address: TEMPLATE_FOR_IMM_LOAD_INSNS /* 2. pt_regs pointer in r4 */ - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS .global optprobe_template_call_handler optprobe_template_call_handler: @@ -96,7 +96,7 @@ optprobe_template_call_handler: * Parameters for instruction emulation: * 1. Pass SP in register r3. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .global optprobe_template_insn optprobe_template_insn: diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 6e86f3bf4673..49813f982468 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -22,7 +22,7 @@ */ _GLOBAL(ppc_save_regs) /* This allows stack frame accessor macros and offsets to be used */ - subi r3,r3,STACK_FRAME_OVERHEAD + subi r3,r3,STACK_INT_FRAME_REGS PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 stmw r2,GPR2(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f93703ea4a12..d7a581997d92 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2260,12 +2260,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. - * We look for the "regshere" marker in the current frame. + * We look for the "regs" marker in the current frame. */ if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) - (sp + STACK_FRAME_OVERHEAD); + (sp + STACK_INT_FRAME_REGS); lr = regs->link; printk("%s--- interrupt: %lx at %pS\n", diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5a0f023a26e9..9feab5e0485b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) - /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */ std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) @@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ @@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint) stdu r1, -TM_FRAME_SIZE(r1) /* - * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. * This is used for backing up the NVGPRs: */ SAVE_NVGPRS(r1) @@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is now NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* We need to setup MSR for FP/VMX/VSX register save instructions. */ mfmsr r6 diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index d031093bc436..ffb1db386849 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -110,7 +110,7 @@ .endif /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD + addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 5473f9d03df3..e2b646a4f7fa 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -16,7 +16,7 @@ _GLOBAL(exec_instr) /* * Stack frame layout (INT_FRAME_SIZE bytes) - * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) * Scratch space (SP + 8) * Back chain (SP + 0) */ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 8718289c051d..9e254aed1f61 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -67,7 +67,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re * This looks like an interrupt frame for an * interrupt that occurred in the kernel */ - regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); next_ip = regs->nip; lr = regs->link; level = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e34d7809f6c9..a14eb4d815c2 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1782,14 +1782,13 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, xmon_print_symbol(ip, " ", "\n"); } - /* Look for "regshere" marker to see if this is + /* Look for "regs" marker to see if this is an exception frame. */ if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) && marker == STACK_FRAME_REGS_MARKER) { - if (mread(sp + STACK_FRAME_OVERHEAD, ®s, sizeof(regs)) - != sizeof(regs)) { + if (mread(sp + STACK_INT_FRAME_REGS, ®s, sizeof(regs)) != sizeof(regs)) { printf("Couldn't read registers at %lx\n", - sp + STACK_FRAME_OVERHEAD); + sp + STACK_INT_FRAME_REGS); break; } printf("--- Exception: %lx %s at ", regs.trap, From d2e8ff9f1492f44c5a6d93f759eea27574d753de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:33 +1000 Subject: [PATCH 3207/4122] powerpc: add a definition for the marker offset within the interrupt frame Define a constant rather than open-code the offset for the "regs" marker. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-9-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 2 ++ arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/interrupt_64.S | 10 +++++----- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index a4ae67aa9b76..8a9f4cf8c4c5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -125,6 +125,7 @@ struct pt_regs #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) #define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -145,6 +146,7 @@ struct pt_regs #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 24c8d84a56c9..2f61b7d3677c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -114,7 +114,7 @@ transfer_to_syscall: addi r12,r12,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r1) li r2, INTERRUPT_SYSCALL - stw r12,8(r1) + stw r12,STACK_INT_FRAME_MARKER(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 62033d022e0a..b9cec22df9f9 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -391,7 +391,7 @@ exc_##n##_common: \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ - std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ SAVE_NVGPRS(r1); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 29b78536ca59..ac3b0580224e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -591,7 +591,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) li r10,0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r10,RESULT(r1) /* clear regs->result */ - std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ + std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */ .endm /* diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 117d25330e13..f8e2911478a7 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -112,7 +112,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r1) + stw r10,STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 3149ac20b18e..37d43c172676 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -84,7 +84,7 @@ END_BTB_FLUSH_SECTION stw r0,GPR0(r1) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r1) + stw r10, STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 49d585eae7c8..321992c1c9f9 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -77,11 +77,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ BEGIN_FTR_SECTION HMT_MEDIUM @@ -250,11 +250,11 @@ END_BTB_FLUSH_SECTION std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ #ifdef CONFIG_PPC_BOOK3S li r11,1 @@ -637,7 +637,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - std r0,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_INT_FRAME_MARKER(r1) REST_GPRS(2, 5, r1) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98..a9e162a1deec 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2729,7 +2729,7 @@ kvmppc_bad_host_intr: std r6, SOFTE(r1) LOAD_PACA_TOC() LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER) - std r3, STACK_FRAME_OVERHEAD-16(r1) + std r3, STACK_INT_FRAME_MARKER(r1) /* * XXX On POWER7 and POWER8, we just spin here since we don't From e856e336924b0ecd0b7058e65e6b3e7266ee0b95 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:34 +1000 Subject: [PATCH 3208/4122] powerpc: Rename STACK_FRAME_MARKER and derive it from frame offset This is a count of longs from the stack pointer to the regs marker. Rename it to make it more distinct from the other byte offsets. It can be derived from the byte offset definitions just added. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-10-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 4 ++-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/stacktrace.c | 2 +- arch/powerpc/perf/callchain.c | 2 +- arch/powerpc/xmon/xmon.c | 3 +-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 8a9f4cf8c4c5..fdd50648df56 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -126,7 +126,6 @@ struct pt_regs STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) -#define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 #define STACK_FRAME_MIN_SIZE 32 @@ -147,7 +146,6 @@ struct pt_regs #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) -#define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ @@ -155,6 +153,8 @@ struct pt_regs #endif /* __powerpc64__ */ +#define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long)) + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d7a581997d92..6c0a3c664266 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2263,7 +2263,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, * We look for the "regs" marker in the current frame. */ if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) - && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index a2443d61728e..7efa0ec9dd77 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -136,7 +136,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* Mark stacktraces with exception frames as unreliable. */ if (sp <= stack_end - STACK_INT_FRAME_SIZE && - stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { return -EINVAL; } diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 9e254aed1f61..b01497ed5173 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -62,7 +62,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re if (next_sp == sp + STACK_INT_FRAME_SIZE && validate_sp(sp, current, STACK_INT_FRAME_SIZE) && - fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an * interrupt that occurred in the kernel diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index a14eb4d815c2..0da66bc4823d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1721,7 +1721,6 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp, } #define LRSAVE_OFFSET (STACK_FRAME_LR_SAVE * sizeof(unsigned long)) -#define MARKER_OFFSET (STACK_FRAME_MARKER * sizeof(unsigned long)) static void xmon_show_stack(unsigned long sp, unsigned long lr, unsigned long pc) @@ -1784,7 +1783,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, /* Look for "regs" marker to see if this is an exception frame. */ - if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) + if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long)) && marker == STACK_FRAME_REGS_MARKER) { if (mread(sp + STACK_INT_FRAME_REGS, ®s, sizeof(regs)) != sizeof(regs)) { printf("Couldn't read registers at %lx\n", From 1223e5a20f7fb3c31c91a328d1a04ed26d5e889b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:35 +1000 Subject: [PATCH 3209/4122] powerpc: add a define for the user interrupt frame size The user interrupt frame is a different size from the kernel frame, so give it its own name. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-11-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 6 +++--- arch/powerpc/kernel/process.c | 6 +++--- arch/powerpc/kernel/stacktrace.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index fdd50648df56..705ce26ae887 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -122,8 +122,7 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ - STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) @@ -143,7 +142,7 @@ struct pt_regs #define KERNEL_REDZONE_SIZE 0 #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD @@ -153,6 +152,7 @@ struct pt_regs #endif /* __powerpc64__ */ +#define STACK_INT_FRAME_SIZE (KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE) #define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long)) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6c0a3c664266..010a5ee746ae 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1756,15 +1756,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) klp_init_thread_info(p); /* Create initial stack frame. */ - sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + sp -= STACK_USER_INT_FRAME_SIZE; ((unsigned long *)sp)[0] = 0; /* Copy registers */ - childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); if (unlikely(args->fn)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; /* function */ if (args->fn) childregs->gpr[14] = ppc_function_entry((void *)args->fn); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 7efa0ec9dd77..453ac317a6cf 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -77,7 +77,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common()/EXCEPTION_PROLOG_COMMON(). + * system_call_common(). * * Likewise for non-swapper kernel threads, * this also happens to be the top of the stack @@ -88,7 +88,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * an unreliable stack trace until it's been * _switch()'ed to for the first time. */ - stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + stack_end -= STACK_USER_INT_FRAME_SIZE; } else { /* * idle tasks have a custom stack layout, From 6f291a03819e4051ebc870471d26915ef2e6ba31 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:36 +1000 Subject: [PATCH 3210/4122] powerpc: add a define for the switch frame size and regs offset This is open-coded in process.c, ppc32 uses a different define with the same value, and the C definition is name differently which makes it an extra indirection to grep for. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-12-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 6 ++++-- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_32.S | 6 +++--- arch/powerpc/kernel/process.c | 12 ++++++++---- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 705ce26ae887..412ef0749775 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -97,8 +97,6 @@ struct pt_regs #endif -#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) - // Always displays as "REGS" in memory dumps #ifdef CONFIG_CPU_BIG_ENDIAN #define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753) @@ -125,6 +123,8 @@ struct pt_regs #define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC64_ELF_ABI_V2 #define STACK_FRAME_MIN_SIZE 32 @@ -146,6 +146,8 @@ struct pt_regs #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 68905c9f7c21..d24a59a98c0c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -260,7 +260,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); + DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2f61b7d3677c..6e99ec10be89 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -215,9 +215,9 @@ ret_from_kernel_thread: * in arch/ppc/kernel/process.c */ _GLOBAL(_switch) - stwu r1,-INT_FRAME_SIZE(r1) + stwu r1,-SWITCH_FRAME_SIZE(r1) mflr r0 - stw r0,INT_FRAME_SIZE+4(r1) + stw r0,SWITCH_FRAME_SIZE+4(r1) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ @@ -248,7 +248,7 @@ _GLOBAL(_switch) lwz r4,_NIP(r1) /* Return to _switch caller in new task */ mtlr r4 - addi r1,r1,INT_FRAME_SIZE + addi r1,r1,SWITCH_FRAME_SIZE blr .globl fast_exception_return diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 010a5ee746ae..0cb5296c6c41 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1808,10 +1808,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - sp -= sizeof(struct pt_regs); - kregs = (struct pt_regs *) sp; - sp -= STACK_FRAME_OVERHEAD; + sp -= STACK_SWITCH_FRAME_SIZE; + kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); p->thread.ksp = sp; + #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -2261,8 +2261,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. * We look for the "regs" marker in the current frame. + * + * STACK_SWITCH_FRAME_SIZE being the smallest frame that + * could hold a pt_regs, if that does not fit then it can't + * have regs. */ - if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) + if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE) && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); From 6895dfc0474170c492191c126fcfc420f7771a09 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:37 +1000 Subject: [PATCH 3211/4122] powerpc: copy_thread fill in interrupt frame marker and back chain Backtraces will not recognise the fork system call interrupt without the regs marker. And regular interrupt entry from userspace creates the back chain to the user stack, so do this for the initial fork frame too, to be consistent. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-13-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0cb5296c6c41..6b1d80bd370e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1757,12 +1757,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Create initial stack frame. */ sp -= STACK_USER_INT_FRAME_SIZE; - ((unsigned long *)sp)[0] = 0; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; /* Copy registers */ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); if (unlikely(args->fn)) { /* kernel thread */ + ((unsigned long *)sp)[0] = 0; memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; /* function */ @@ -1782,6 +1783,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) *childregs = *regs; if (usp) childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; p->thread.regs = childregs; /* 64s sets this in ret_from_fork */ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) From edbd0387f3249cc7e102f86d4852a9a9f3bb1305 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:38 +1000 Subject: [PATCH 3212/4122] powerpc: copy_thread add a back chain to the switch stack frame Stack unwinders need LR and the back chain as a minimum. The switch stack uses regs->nip for its return pointer rather than lrsave, so that was not set in the fork frame, and neither was the back chain. This change sets those fields in the stack. With this and the previous change, a stack trace in the switch or interrupt stack goes from looking like this: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 3 PID: 90 Comm: systemd Not tainted NIP: c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff [ ... regs ... ] NIP [c000000000011060] _switch+0x160/0x17c LR [c000000000010f68] _switch+0x68/0x17c Call Trace: To this: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries CPU: 0 PID: 93 Comm: systemd Not tainted NIP: c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff [ ... regs ... ] NIP [c000000000011060] _switch+0x160/0x17c LR [c000000000010f68] _switch+0x68/0x17c Call Trace: [c000000005a93e10] [c00000000000cdbc] ret_from_fork_scv+0x0/0x54 --- interrupt: 3000 at 0x7fffa72f56d8 NIP: 00007fffa72f56d8 LR: 0000000000000000 CTR: 0000000000000000 [ ... regs ... ] NIP [00007fffa72f56d8] 0x7fffa72f56d8 LR [0000000000000000] 0x0 --- interrupt: 3000 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-14-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6b1d80bd370e..096b6ea52378 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1810,7 +1810,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ + ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f; sp -= STACK_SWITCH_FRAME_SIZE; + ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); p->thread.ksp = sp; From 4cefb0f6c555971b3e6544a9b15470f9d1f12089 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:39 +1000 Subject: [PATCH 3213/4122] powerpc: split validate_sp into two functions Most callers just want to validate an arbitrary kernel stack pointer, some need a particular size. Make the size case the exceptional one with an extra function. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-15-npiggin@gmail.com --- arch/powerpc/include/asm/processor.h | 15 ++++++++++++--- arch/powerpc/kernel/process.c | 23 ++++++++++++++--------- arch/powerpc/kernel/stacktrace.c | 2 +- arch/powerpc/perf/callchain.c | 6 +++--- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 631802999d59..e96c9b8c2a60 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -374,9 +374,18 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #endif -/* Check that a certain kernel stack pointer is valid in task_struct p */ -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes); +/* + * Check that a certain kernel stack pointer is a valid (minimum sized) + * stack frame in task_struct p. + */ +int validate_sp(unsigned long sp, struct task_struct *p); + +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes); /* * Prefetch macros. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 096b6ea52378..9446bee8ca32 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2157,9 +2157,12 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, return 0; } - -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes) +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -2175,7 +2178,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, return valid_emergency_stack(sp, p, nbytes); } -EXPORT_SYMBOL(validate_sp); +int validate_sp(unsigned long sp, struct task_struct *p) +{ + return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD); +} static unsigned long ___get_wchan(struct task_struct *p) { @@ -2183,13 +2189,12 @@ static unsigned long ___get_wchan(struct task_struct *p) int count = 0; sp = p->thread.ksp; - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, p)) return 0; do { sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || - task_is_running(p)) + if (!validate_sp(sp, p) || task_is_running(p)) return 0; if (count > 0) { ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); @@ -2243,7 +2248,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = 0; printk("%sCall Trace:\n", loglvl); do { - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, tsk)) break; stack = (unsigned long *) sp; @@ -2270,7 +2275,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, * could hold a pt_regs, if that does not fit then it can't * have regs. */ - if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE) + if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE) && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 453ac317a6cf..1dbbf30f265e 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -43,7 +43,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task)) return; newsp = stack[0]; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index b01497ed5173..6b4434dd0ff3 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -27,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) { if (sp & 0xf) return 0; /* must be 16-byte aligned */ - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return 0; if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; @@ -53,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re sp = regs->gpr[1]; perf_callchain_store(entry, perf_instruction_pointer(regs)); - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return; for (;;) { @@ -61,7 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re next_sp = fp[0]; if (next_sp == sp + STACK_INT_FRAME_SIZE && - validate_sp(sp, current, STACK_INT_FRAME_SIZE) && + validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) && fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an From 90f1b43196c5e79f6c986a359011a19857984c27 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:40 +1000 Subject: [PATCH 3214/4122] powerpc: allow minimum sized kernel stack frames This affects only 64-bit ELFv2 kernels, and reduces the minimum asm-created stack frame size from 112 to 32 byte on those kernels. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-16-npiggin@gmail.com --- arch/powerpc/kernel/head_40x.S | 2 +- arch/powerpc/kernel/head_44x.S | 6 +++--- arch/powerpc/kernel/head_64.S | 6 +++--- arch/powerpc/kernel/head_85xx.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 2 +- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 4 ++-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/kernel/stacktrace.c | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 088f500896c7..918547b93b5e 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -602,7 +602,7 @@ start_here: lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init /* We have to do this with MMU on */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f15cb9fdb692..63a85c16fef4 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -109,7 +109,7 @@ _GLOBAL(_start); lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init @@ -1012,7 +1012,7 @@ _GLOBAL(start_secondary_47x) */ lis r1,temp_boot_stack@h ori r1,r1,temp_boot_stack@l - addi r1,r1,1024-STACK_FRAME_OVERHEAD + addi r1,r1,1024-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) bl mmu_init_secondary @@ -1025,7 +1025,7 @@ _GLOBAL(start_secondary_47x) lwz r1,TASK_STACK(r2) /* Current stack pointer */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dedcc6fe2263..b513d13bf79e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -424,7 +424,7 @@ generic_secondary_common_init: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) @@ -780,7 +780,7 @@ _GLOBAL(pmac_secondary_start) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE b __secondary_start @@ -958,7 +958,7 @@ start_here_multiplatform: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) + stdu r0,-STACK_FRAME_MIN_SIZE(r1) /* * Do very early kernel initializations, including initial hash table diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 24f39abf81df..d9bd377dec91 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -229,7 +229,7 @@ set_ivor: lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) #ifdef CONFIG_SMP stw r24, TASK_CPU(r2) @@ -1044,7 +1044,7 @@ __secondary_start: lwz r1,TASK_STACK(r2) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9..cf546d0e5c40 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -537,7 +537,7 @@ start_here: ori r0, r0, STACK_END_MAGIC@l stw r0, 0(r1) li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) lis r6, swapper_pg_dir@ha tophys(r6,r6) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 519b60695167..40854d092dd3 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -840,7 +840,7 @@ __secondary_start: lwz r1,TASK_STACK(r1) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 tophys(r3,r1) stw r0,0(r3) @@ -966,7 +966,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) /* * Do early platform-specific initialization, * and set up the MMU. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9ede61a5a469..c5b9ce887483 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -210,7 +210,7 @@ static __always_inline void call_do_softirq(const void *sp) PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_softirq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", @@ -264,7 +264,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) : // Outputs "+r" (r3) : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_irq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e5127b19fec2..daf8f87d2372 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -382,7 +382,7 @@ EXPORT_SYMBOL(__bswapdi2) _GLOBAL(start_secondary_resume) /* Reset stack */ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b..4bb6dd30c556 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -384,7 +384,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3) mr r1,r3 li r0,0 @@ -401,7 +401,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-STACK_FRAME_OVERHEAD-64(r1) + stdu r1,-STACK_FRAME_MIN_SIZE-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9446bee8ca32..edb46d0806ef 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2180,7 +2180,7 @@ int validate_sp_size(unsigned long sp, struct task_struct *p, int validate_sp(unsigned long sp, struct task_struct *p) { - return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD); + return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE); } static unsigned long ___get_wchan(struct task_struct *p) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0da6e59161cd..6b90f10a6c81 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1249,7 +1249,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_PPC64 paca_ptrs[cpu]->__current = idle; paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + - THREAD_SIZE - STACK_FRAME_OVERHEAD; + THREAD_SIZE - STACK_FRAME_MIN_SIZE; #endif task_thread_info(idle)->cpu = cpu; secondary_current = current_set[cpu] = idle; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1dbbf30f265e..5de8597eaab8 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -94,7 +94,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * idle tasks have a custom stack layout, * c.f. cpu_idle_thread_init(). */ - stack_end -= STACK_FRAME_OVERHEAD; + stack_end -= STACK_FRAME_MIN_SIZE; } if (task == current) From cd52414d5a6ccea6ce956ef05161fe824522a107 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:41 +1000 Subject: [PATCH 3215/4122] powerpc/64: ELFv2 use minimal stack frames in int and switch frame sizes Adjust the ELFv2 interrupt and switch frames to the minimum C ABI size, plus pt_regs, plus 16 bytes for the aligned regs marker for the int frame (and the switch frame needs to match that because it uses the same regs offset as the int frame). This saves 80 bytes of kernel stack per interrupt. It's the principle of getting our accounting right that's more important than the practical saving. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-17-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 412ef0749775..4ab606f390bc 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -120,16 +120,26 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ + +#ifdef CONFIG_PPC64_ELF_ABI_V2 +#define STACK_FRAME_MIN_SIZE 32 +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_MARKER STACK_FRAME_MIN_SIZE +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_SWITCH_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) +#else +/* + * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save + * area. This parameter area is not used by calls to C from interrupt entry, + * so the second from last one of those is used for the frame marker. + */ +#define STACK_FRAME_MIN_SIZE 112 #define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) #define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD - -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#define STACK_FRAME_MIN_SIZE 32 -#else -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD #endif /* Size of dummy stack frame allocated when calling signal handler. */ From dfecd06bc5524517ed7737c30eaaf747338b280a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:42 +1000 Subject: [PATCH 3216/4122] powerpc: remove STACK_FRAME_OVERHEAD This is equal to STACK_FRAME_MIN_SIZE on 32-bit and 64-bit ELFv1, and no longer used in 64-bit ELFv2, so replace STACK_FRAME_OVERHEAD occurrences with STACK_FRAME_MIN_SIZE. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-18-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 4ab606f390bc..0eb90a013346 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -118,7 +118,6 @@ struct pt_regs #define USER_REDZONE_SIZE 512 #define KERNEL_REDZONE_SIZE 288 -#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -135,11 +134,11 @@ struct pt_regs * so the second from last one of those is used for the frame marker. */ #define STACK_FRAME_MIN_SIZE 112 -#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD -#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) -#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 16) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE #endif /* Size of dummy stack frame allocated when calling signal handler. */ @@ -150,14 +149,13 @@ struct pt_regs #define USER_REDZONE_SIZE 0 #define KERNEL_REDZONE_SIZE 0 -#define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ +#define STACK_FRAME_MIN_SIZE 16 #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD -#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD -#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 8) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 From 6b34a099faa123488b13caf704562f4dbe483fc4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 24 Oct 2022 13:01:50 +1000 Subject: [PATCH 3217/4122] powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults This option increases the number of hash misses by limiting the number of kernel HPT entries, by keeping a per-CPU record of the last kernel HPTEs installed, and removing that from the hash table on the next hash insertion. A timer round-robins CPUs removing remaining kernel HPTEs and clearing the TLB (in the case of bare metal) to increase and slightly randomise kernel fault activity. Signed-off-by: Nicholas Piggin [mpe: Add comment about NR_CPUS usage, fixup whitespace] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com --- .../admin-guide/kernel-parameters.txt | 5 + arch/powerpc/mm/book3s64/hash_4k.c | 5 + arch/powerpc/mm/book3s64/hash_64k.c | 10 ++ arch/powerpc/mm/book3s64/hash_utils.c | 130 +++++++++++++++++- arch/powerpc/mm/book3s64/internal.h | 11 ++ 5 files changed, 160 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..9f3d256529d0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1042,6 +1042,11 @@ them frequently to increase the rate of SLB faults on kernel addresses. + stress_hpt [PPC] + Limits the number of kernel HPT entries in the hash + page table to increase the rate of hash page table + faults on kernel addresses. + disable= [IPV6] See Documentation/networking/ipv6.rst. diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 7de1a8a0c62a..02acbfd05b46 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) @@ -118,6 +120,9 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 998c6817ed47..954af420f358 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + /* * Return true, if the entry has a slot value which * the software considers as invalid. @@ -216,6 +218,9 @@ repeat: new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -327,7 +332,12 @@ repeat: new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } + *ptep = __pte(new_pte & ~H_PAGE_BUSY); + return 0; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6df4c6d38b66..80a148c57de8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -995,6 +1046,20 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1980,6 +2045,69 @@ repeat: return slot; } +void hpt_clear_stress(void) +{ + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; + + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } + + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; + } +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index 5045048ce244..a57a25f06a21 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -13,6 +13,17 @@ static inline bool stress_slb(void) return static_branch_unlikely(&stress_slb_key); } +extern bool stress_hpt_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_hpt_key); + +static inline bool stress_hpt(void) +{ + return static_branch_unlikely(&stress_hpt_key); +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group); + void slb_setup_new_exec(void); void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); From 5921eb36d2a1b276b16a24e529788550e6a65449 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 1 Dec 2022 10:49:57 +0800 Subject: [PATCH 3218/4122] selftests: powerpc: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this using "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/testing/selftests/powerpc` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Signed-off-by: Tiezhu Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1669862997-31335-1-git-send-email-yangtiezhu@loongson.cn --- tools/testing/selftests/powerpc/scripts/hmi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh index dcdb392e8427..bcc7b6b65009 100755 --- a/tools/testing/selftests/powerpc/scripts/hmi.sh +++ b/tools/testing/selftests/powerpc/scripts/hmi.sh @@ -36,7 +36,7 @@ trap "ppc64_cpu --smt-snooze-delay=100" 0 1 # for each chip+core combination # todo - less fragile parsing -egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | +grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | while read chipcore; do chip=$(echo "$chipcore"|awk '{print $3}') core=$(echo "$chipcore"|awk '{print $5}') From aecfd680099ba518c34dff2941017c5aa97def52 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Mon, 28 Nov 2022 15:19:42 +1100 Subject: [PATCH 3219/4122] selftests/powerpc: Use mfspr/mtspr macros No need to write inline asm for mtspr/mfspr, we have macros for this in reg.h Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041948.58339-2-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/dscr/dscr.h | 17 +++++------------ .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 6 ++---- tools/testing/selftests/powerpc/ptrace/ptrace.h | 5 +---- .../selftests/powerpc/security/flush_utils.c | 3 ++- 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h index 13e9b9e28e2c..b703714e7d98 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr.h +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -23,6 +23,7 @@ #include #include +#include "reg.h" #include "utils.h" #define THREADS 100 /* Max threads */ @@ -41,31 +42,23 @@ /* Prilvilege state DSCR access */ inline unsigned long get_dscr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV)); - - return ret; + return mfspr(SPRN_DSCR_PRIV); } inline void set_dscr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV)); + mtspr(SPRN_DSCR_PRIV, val); } /* Problem state DSCR access */ inline unsigned long get_dscr_usr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR)); - - return ret; + return mfspr(SPRN_DSCR); } inline void set_dscr_usr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } /* Default DSCR access */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index a0635a3819aa..1345e9b9af0f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -23,6 +23,7 @@ #include #include #include "ptrace.h" +#include "reg.h" #define SPRN_PVR 0x11F #define PVR_8xx 0x00500000 @@ -620,10 +621,7 @@ static int ptrace_hwbreak(void) int main(int argc, char **argv, char **envp) { - int pvr = 0; - asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); - if (pvr == PVR_8xx) - is_8xx = true; + is_8xx = mfspr(SPRN_PVR) == PVR_8xx; return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 4e0233c0f2b3..04788e5fc504 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -745,10 +745,7 @@ int show_tm_spr(pid_t child, struct tm_spr_regs *out) /* Analyse TEXASR after TM failure */ inline unsigned long get_tfiar(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR)); - return ret; + return mfspr(SPRN_TFIAR); } void analyse_texasr(unsigned long texasr) diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 4d95965cb751..9c5c00e04f63 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -14,6 +14,7 @@ #include #include #include +#include "reg.h" #include "utils.h" #include "flush_utils.h" @@ -79,5 +80,5 @@ void set_dscr(unsigned long val) init = 1; } - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } From 94ba4f2c33f42dae7813dc169a177e922a39560c Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Mon, 28 Nov 2022 15:19:43 +1100 Subject: [PATCH 3220/4122] selftests/powerpc: Add ptrace setup_core_pattern() null-terminator - malloc() does not zero the buffer, - fread() does not null-terminate it's output, - `cat /proc/sys/kernel/core_pattern | hexdump -C` shows the file is not inherently null-terminated So using string operations on the buffer is risky. Explicitly add a null character to the end to make it safer. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041948.58339-3-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 1a70a96f0bfe..4e8d0ce1ff58 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -383,7 +383,7 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } - ret = fread(core_pattern, 1, PATH_MAX, f); + ret = fread(core_pattern, 1, PATH_MAX - 1, f); fclose(f); if (!ret) { perror("Error reading core_pattern file"); @@ -391,6 +391,8 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } + core_pattern[ret] = '\0'; + /* Check whether we can predict the name of the core file. */ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) *changed_ = false; From 4d0eea415216fe3791da2f65eb41399e70c7bedf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 19:16:26 +0800 Subject: [PATCH 3221/4122] powerpc/83xx/mpc832x_rdb: call platform_device_put() in error case in of_fsl_spi_probe() If platform_device_add() is not called or failed, it can not call platform_device_del() to clean up memory, it should call platform_device_put() in error case. Fixes: 26f6cb999366 ("[POWERPC] fsl_soc: add support for fsl_spi") Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221029111626.429971-1-yangyingliang@huawei.com --- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index e12cb44e717f..caa96edf0e72 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -107,7 +107,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto next; unreg: - platform_device_del(pdev); + platform_device_put(pdev); err: pr_err("%pOF: registration failed\n", np); next: From 03f7c1d2a49acd30e38789cd809d3300721e9b0e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 30 Nov 2022 23:15:13 +0530 Subject: [PATCH 3222/4122] powerpc/hv-gpci: Fix hv_gpci event list Based on getPerfCountInfo v1.018 documentation, some of the hv_gpci events were deprecated for platform firmware that supports counter_info_version 0x8 or above. Fix the hv_gpci event list by adding a new attribute group called "hv_gpci_event_attrs_v6" and a "ENABLE_EVENTS_COUNTERINFO_V6" macro to enable these events for platform firmware that supports counter_info_version 0x6 or below. And assigning the hv_gpci event list based on output counter info version of underlying plaform. Fixes: 97bf2640184f ("powerpc/perf/hv-gpci: add the remaining gpci requests") Signed-off-by: Kajol Jain Reviewed-by: Madhavan Srinivasan Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221130174513.87501-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci-requests.h | 4 ++++ arch/powerpc/perf/hv-gpci.c | 35 ++++++++++++++++++++++++++-- arch/powerpc/perf/hv-gpci.h | 1 + arch/powerpc/perf/req-gen/perf.h | 20 ++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 8965b4463d43..5e86371a20c7 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 /* * Not available for counter_info_version >= 0x8, use * run_instruction_cycles_by_partition(0x100) instead. @@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id) __count(0x10, 8, cycles) ) #include I(REQUEST_END) +#endif #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 @@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 #define REQUEST_NAME processor_bus_utilization_abc_links #define REQUEST_NUM 0x50 #define REQUEST_IDX_KIND "hw_chip_id=?" @@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx) __count(0x28, 8, instructions_completed) ) #include I(REQUEST_END) +#endif /* Processor_core_power_mode (0x95) skipped, no counters */ /* Affinity_domain_information_by_virtual_processor (0xA0) skipped, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 5eb60ed5b5e8..7ff8ff3509f5 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -70,9 +70,9 @@ static const struct attribute_group format_group = { .attrs = format_attrs, }; -static const struct attribute_group event_group = { +static struct attribute_group event_group = { .name = "events", - .attrs = hv_gpci_event_attrs, + /* .attrs is set in init */ }; #define HV_CAPS_ATTR(_name, _format) \ @@ -330,6 +330,7 @@ static int hv_gpci_init(void) int r; unsigned long hret; struct hv_perf_caps caps; + struct hv_gpci_request_buffer *arg; hv_gpci_assert_offsets_correct(); @@ -353,6 +354,36 @@ static int hv_gpci_init(void) /* sampling not supported */ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the output + * counter_info_version value based on the system hypervisor. + * Pass the counter request 0x10 corresponds to request type + * 'Dispatch_timebase_by_processor', to get the supported + * counter_info_version. + */ + arg->params.counter_request = cpu_to_be32(0x10); + + r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + if (r) { + pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); + arg->params.counter_info_version_out = 0x8; + } + + /* + * Use counter_info_version_out value to assign + * required hv-gpci event list. + */ + if (arg->params.counter_info_version_out >= 0x8) + event_group.attrs = hv_gpci_event_attrs; + else + event_group.attrs = hv_gpci_event_attrs_v6; + + put_cpu_var(hv_gpci_reqb); + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index 4d108262bed7..c72020912dea 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -26,6 +26,7 @@ enum { #define REQUEST_FILE "../hv-gpci-requests.h" #define NAME_LOWER hv_gpci #define NAME_UPPER HV_GPCI +#define ENABLE_EVENTS_COUNTERINFO_V6 #include "req-gen/perf.h" #undef REQUEST_FILE #undef NAME_LOWER diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h index fa9bc804e67a..6b2a59fefffa 100644 --- a/arch/powerpc/perf/req-gen/perf.h +++ b/arch/powerpc/perf/req-gen/perf.h @@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \ #define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ r_fields +/* Generate event list for platforms with counter_info_version 0x6 or below */ +static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = { +#include REQUEST_FILE + NULL +}; + +/* + * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci + * events were deprecated for platform firmware that supports + * counter_info_version 0x8 or above. + * Those deprecated events are still part of platform firmware that + * support counter_info_version 0x6 and below. As per the getPerfCountInfo + * v1.018 documentation there is no counter_info_version 0x7. + * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of + * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms + * that supports counter_info_version 0x8 or above. + */ +#undef ENABLE_EVENTS_COUNTERINFO_V6 + +/* Generate event list for platforms with counter_info_version 0x8 or above*/ static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { #include REQUEST_FILE NULL From 0e23347f1e0f2b1c98f87a4088231d0d6f59b962 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:13 +1100 Subject: [PATCH 3223/4122] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig Add Kconfig option for enabling clearing of registers on arrival in an interrupt handler. This reduces the speculation influence of registers on kernel internals. The option will be consumed by 64-bit systems that feature speculation and wish to implement this mitigation. This patch only introduces the Kconfig option, no actual mitigations. The primary overhead of this mitigation lies in an increased number of registers that must be saved and restored by interrupt handlers on Book3S systems. Enable by default on Book3E systems, which prior to this patch eagerly save and restore register state, meaning that the mitigation when implemented will have minimal overhead. Acked-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-1-rmclure@linux.ibm.com --- arch/powerpc/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe2aa445b654..aec1431be06e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -534,6 +534,15 @@ config HOTPLUG_CPU Say N if you are unsure. +config INTERRUPT_SANITIZE_REGISTERS + bool "Clear gprs on interrupt arrival" + depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER + default PPC_BOOK3E_64 + help + Reduce the influence of user register state on interrupt handlers and + syscalls through clearing user state from registers before handling + the exception. + config PPC_QUEUED_SPINLOCKS bool "Queued spinlocks" if EXPERT depends on SMP From cbf892ba56677b942020d2bc7ca9b79281fa0bcc Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:14 +1100 Subject: [PATCH 3224/4122] powerpc/64: Add interrupt register sanitisation macros Include in asm/ppc_asm.h macros to be used in multiple successive patches to implement zeroising architected registers in interrupt handlers. Registers will be sanitised in this fashion in future patches to reduce the speculation influence of user-controlled register values. These mitigations will be configurable through the CONFIG_INTERRUPT_SANITIZE_REGISTERS Kconfig option. Included are macros for conditionally zeroising registers and restoring as required with the mitigation enabled. With the mitigation disabled, non-volatiles must be restored on demand at separate locations to those required by the mitigation. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-2-rmclure@linux.ibm.com --- arch/powerpc/include/asm/ppc_asm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 753a2757bcd4..d2f44612f4b0 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -74,6 +74,25 @@ #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) #define REST_GPR(n, base) REST_GPRS(n, n, base) +/* macros for handling user register sanitisation */ +#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS +#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \ + ZEROIZE_GPRS(5, 12); \ + ZEROIZE_NVGPRS() +#define SANITIZE_GPR(n) ZEROIZE_GPR(n) +#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1) +#define HANDLER_RESTORE_NVGPRS() +#else +#define SANITIZE_SYSCALL_GPRS() +#define SANITIZE_GPR(n) +#define SANITIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() +#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1) +#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */ + #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) #define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base) From 75c5d6b1e194c341371639469fcb8691afa0e254 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:15 +1100 Subject: [PATCH 3225/4122] powerpc/64: Sanitise common exit code for interrupts Interrupt code is shared between Book3E/S 64-bit systems for interrupt handlers. Ensure that exit code correctly restores non-volatile gprs on each system when CONFIG_INTERRUPT_SANITIZE_REGISTERS is enabled. Also introduce macros for clearing/restoring registers on interrupt entry for when this configuration option is either disabled or enabled. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-3-rmclure@linux.ibm.com --- arch/powerpc/kernel/interrupt_64.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 321992c1c9f9..dd04b0ba3959 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -408,9 +408,11 @@ interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_user_prepare +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr .Lrestore_nvgprs_\srr\()_cont: +#endif std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Linterrupt_return_\srr\()_user_rst_start: @@ -424,6 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS .Lfast_user_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr lbz r4,PACASRR_VALID(r13) @@ -493,9 +496,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b . /* prevent speculative execution */ .Linterrupt_return_\srr\()_user_rst_end: +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS .Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) b .Lrestore_nvgprs_\srr\()_cont +#endif #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: @@ -585,6 +590,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() cmpdi cr1,r3,0 #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr From 2487fd2e6d61b5293eed8ecd25add3cc78593d38 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:16 +1100 Subject: [PATCH 3226/4122] powerpc/64s: IOption for MSR stored in r12 Interrupt handlers in asm/exceptions-64s.S contain a great deal of common code produced by the GEN_COMMON macros. Currently, at the exit point of the macro, r12 will contain the contents of the MSR. A future patch will cause these macros to zeroise architected registers to avoid potential speculation influence of user data. Provide an IOption that signals that r12 must be retained, as the interrupt handler assumes it to hold the contents of the MSR. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-4-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ac3b0580224e..42b7c3212f29 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -111,6 +111,7 @@ name: #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ +#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */ #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -176,6 +177,9 @@ do_define_int n .ifndef IKUAP IKUAP=1 .endif + .ifndef IMSR_R12 + IMSR_R12=0 + .endif .endm /* @@ -1751,6 +1755,7 @@ INT_DEFINE_BEGIN(fp_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(fp_unavailable) EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) @@ -2384,6 +2389,7 @@ INT_DEFINE_BEGIN(altivec_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -2433,6 +2439,7 @@ INT_DEFINE_BEGIN(vsx_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) From 1df45d78b8a89da6544fab5267e8f5da15073d28 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:17 +1100 Subject: [PATCH 3227/4122] powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 27 ++++++++++++++++++--------- arch/powerpc/kernel/interrupt_64.S | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 42b7c3212f29..429096b037d7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -506,6 +506,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text) std r10,0(r1) /* make stack chain pointer */ std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + SANITIZE_GPR(0) /* Mark our [H]SRRs valid for return */ li r10,1 @@ -548,8 +549,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) + .if !IMSR_R12 + SANITIZE_GPRS(9, 12) + .else + SANITIZE_GPRS(9, 11) + .endif SAVE_NVGPRS(r1) + SANITIZE_NVGPRS() .if IDAR .if IISIDE @@ -581,8 +588,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) - std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ + SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */ + SANITIZE_GPRS(2, 8) mflr r9 /* Get LR, later save to stack */ LOAD_PACA_TOC() /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -700,6 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 + SANITIZE_RESTORE_NVGPRS() REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ @@ -1445,7 +1453,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() b interrupt_return_srr @@ -1671,7 +1679,7 @@ EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1737,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common) .Ldo_program_check: addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2169,7 +2177,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist addi r3,r1,STACK_INT_FRAME_REGS bl emulation_assist_interrupt - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2501,7 +2509,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2529,7 +2537,8 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ + /* XXX Shouldn't be necessary in practice */ + HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2755,7 +2764,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC bl altivec_assist_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else bl unknown_exception #endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index dd04b0ba3959..fccc34489add 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -96,6 +96,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * but this is the best we can do. */ + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_vectored_\name\()_exit: @@ -124,6 +129,7 @@ BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_vectored_\name\()_restore_regs @@ -159,7 +165,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r4,_LINK(r1) ld r5,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() REST_GPR(0, r1) mtcr r2 mtctr r3 @@ -275,6 +281,11 @@ END_BTB_FLUSH_SECTION wrteei 1 #endif + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_exit: @@ -315,6 +326,7 @@ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ @@ -342,7 +354,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .Lsyscall_restore_regs: ld r3,_CTR(r1) ld r4,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() mtctr r3 mtspr SPRN_XER,r4 REST_GPR(0, r1) From efe1691ac814e4cf3653538b701662cbd905bddc Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:18 +1100 Subject: [PATCH 3228/4122] powerpc/64e: Clear gprs on interrupt routine entry on Book3E Zero GPRS r14-r31 on entry into the kernel for interrupt sources to limit influence of user-space values in potential speculation gadgets. Prior to this commit, all other GPRS are reassigned during the common prologue to interrupt handlers and so need not be zeroised explicitly. This may be done safely, without loss of register state prior to the interrupt, as the common prologue saves the initial values of non-volatiles, which are unconditionally restored in interrupt_64.S. Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-6-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64e.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index b9cec22df9f9..3f86091e68b3 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -358,7 +358,6 @@ ret_from_mc_except: std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) - /* Core exception code for all exceptions except TLB misses. */ #define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ @@ -394,7 +393,8 @@ exc_##n##_common: \ std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ - SAVE_NVGPRS(r1); + SAVE_NVGPRS(r1); \ + SANITIZE_NVGPRS(); /* minimise speculation influence */ #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) From 7cd882df9485988f7d9b3fae04fde4e95a4c7a74 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:19 +1100 Subject: [PATCH 3229/4122] powerpc/64: Sanitise user registers on interrupt in pseries, POWERNV Cause pseries and POWERNV platforms to default to zeroising all potentially user-defined registers when entering the kernel by means of any interrupt source, reducing user-influence of the kernel and the likelihood or producing speculation gadgets. Acked-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-7-rmclure@linux.ibm.com --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aec1431be06e..e21d6de797d6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -537,7 +537,7 @@ config HOTPLUG_CPU config INTERRUPT_SANITIZE_REGISTERS bool "Clear gprs on interrupt arrival" depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER - default PPC_BOOK3E_64 + default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV help Reduce the influence of user register state on interrupt handlers and syscalls through clearing user state from registers before handling From ad050d2390fccb22aa3e6f65e11757ce7a5a7ca5 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Thu, 1 Dec 2022 11:14:42 -0500 Subject: [PATCH 3230/4122] powerpc/ftrace: fix syscall tracing on PPC64_ELF_ABI_V1 In v5.7 the powerpc syscall entry/exit logic was rewritten in C, on PPC64_ELF_ABI_V1 this resulted in the symbols in the syscall table changing from their dot prefixed variant to the non-prefixed ones. Since ftrace prefixes a dot to the syscall names when matching them to build its syscall event list, this resulted in no syscall events being available. Remove the PPC64_ELF_ABI_V1 specific version of arch_syscall_match_sym_name to have the same behavior across all powerpc variants. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Michael Jeanson Reviewed-by: Mathieu Desnoyers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201161442.2127231-1-mjeanson@efficios.com --- arch/powerpc/include/asm/ftrace.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ade406dc6504..441c5f08258b 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -71,17 +71,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, * those. */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME -#ifdef CONFIG_PPC64_ELF_ABI_V1 -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* We need to skip past the initial dot, and the __se_sys alias */ - return !strcmp(sym + 1, name) || - (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || - (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || - (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || - (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); -} -#else static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { return !strcmp(sym, name) || @@ -90,7 +79,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ #if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) From 31f81401e23fb88cc030cd586abd28740e6c8136 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Mon, 21 Nov 2022 19:27:34 +0800 Subject: [PATCH 3231/4122] crypto: qat - fix error return code in adf_probe Fix to return a negative error code -EINVAL instead of 0. Fixes: 0cec19c761e5 ("crypto: qat - add support for compression for 4xxx") Signed-off-by: Wang Yufen Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index 2f212561acc4..670a58b25cb1 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -261,6 +261,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); if (!hw_data->accel_capabilities_mask) { dev_err(&pdev->dev, "Failed to get capabilities mask.\n"); + ret = -EINVAL; goto out_err; } From 6a83830f649a614aca445bbcadbd582c7929e63d Mon Sep 17 00:00:00 2001 From: Nikolaus Voss Date: Mon, 21 Nov 2022 15:12:41 +0100 Subject: [PATCH 3232/4122] crypto: caam - warn if blob_gen key is insecure If CAAM is not in "trusted" or "secure" state, a fixed non-volatile key is used instead of the unique device key. This is the default mode of operation without secure boot (HAB). In this scenario, CAAM encrypted blobs should be used only for testing but not in a production environment, so issue a warning. Signed-off-by: Nikolaus Voss Reviewed-by: Ahmad Fatoum Signed-off-by: Herbert Xu --- drivers/crypto/caam/blob_gen.c | 9 +++++++++ drivers/crypto/caam/regs.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c index 6345c7269eb0..1f65df489847 100644 --- a/drivers/crypto/caam/blob_gen.c +++ b/drivers/crypto/caam/blob_gen.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "caam blob_gen: " fmt +#include #include #include @@ -61,12 +62,14 @@ static void caam_blob_job_done(struct device *dev, u32 *desc, u32 err, void *con int caam_process_blob(struct caam_blob_priv *priv, struct caam_blob_info *info, bool encap) { + const struct caam_drv_private *ctrlpriv; struct caam_blob_job_result testres; struct device *jrdev = &priv->jrdev; dma_addr_t dma_in, dma_out; int op = OP_PCLID_BLOB; size_t output_len; u32 *desc; + u32 moo; int ret; if (info->key_mod_len > CAAM_BLOB_KEYMOD_LENGTH) @@ -100,6 +103,12 @@ int caam_process_blob(struct caam_blob_priv *priv, goto out_unmap_in; } + ctrlpriv = dev_get_drvdata(jrdev->parent); + moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status)); + if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED) + dev_warn(jrdev, + "using insecure test key, enable HAB to use unique device key!\n"); + /* * A data blob is encrypted using a blob key (BK); a random number. * The BK is used as an AES-CCM key. The initial block (B0) and the diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 66d6dad841bb..66928f8a0c4b 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -426,6 +426,9 @@ struct caam_perfmon { u32 rsvd2; #define CSTA_PLEND BIT(10) #define CSTA_ALT_PLEND BIT(18) +#define CSTA_MOO GENMASK(9, 8) +#define CSTA_MOO_SECURE 1 +#define CSTA_MOO_TRUSTED 2 u32 status; /* CSTA - CAAM Status */ u64 rsvd3; From 5b11d1a360ea23c80c6d4ec3f5986a788d0a0995 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 13:53:38 +0800 Subject: [PATCH 3233/4122] crypto: rsa-pkcs1pad - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 3285e3af43e1..3bc76edb3f8a 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -579,6 +579,10 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm) return PTR_ERR(child_tfm); ctx->child = child_tfm; + + akcipher_set_reqsize(tfm, sizeof(struct pkcs1pad_request) + + crypto_akcipher_reqsize(child_tfm)); + return 0; } @@ -674,7 +678,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.set_pub_key = pkcs1pad_set_pub_key; inst->alg.set_priv_key = pkcs1pad_set_priv_key; inst->alg.max_size = pkcs1pad_get_max_size; - inst->alg.reqsize = sizeof(struct pkcs1pad_request) + rsa_alg->reqsize; inst->free = pkcs1pad_free; From bd71e0dced921e00599052b445f9a9f7916a5452 Mon Sep 17 00:00:00 2001 From: Yushan Zhou Date: Tue, 22 Nov 2022 15:49:00 +0800 Subject: [PATCH 3234/4122] crypto: marvell/octeontx - remove redundant NULL check release_firmware() checks whether firmware pointer is NULL. Remove the redundant NULL check in release_tar_archive(). Signed-off-by: Yushan Zhou Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c index df9c2b8747e6..c4250e5fcf8f 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -345,8 +345,7 @@ static void release_tar_archive(struct tar_arch_info_t *tar_arch) kfree(curr); } - if (tar_arch->fw) - release_firmware(tar_arch->fw); + release_firmware(tar_arch->fw); kfree(tar_arch); } From 56861cbde1b9f3b34d300e6ba87f2c3de1a9c309 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 17:24:01 +0800 Subject: [PATCH 3235/4122] crypto: kpp - Add helper to set reqsize The value of reqsize should only be changed through a helper. To do so we need to first add a helper for this. Signed-off-by: Herbert Xu --- include/crypto/internal/kpp.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index 9cb0662ebe87..31ff3c1986ef 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -50,6 +50,12 @@ static inline void *kpp_request_ctx(struct kpp_request *req) return req->__ctx; } +static inline void kpp_set_reqsize(struct crypto_kpp *kpp, + unsigned int reqsize) +{ + crypto_kpp_alg(kpp)->reqsize = reqsize; +} + static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm) { return tfm->base.__crt_ctx; From 5ba78373561f12d23c975b0a154104a07866f94b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 17:28:38 +0800 Subject: [PATCH 3236/4122] crypto: hisilicon/hpre - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu Reviewed-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index ef02dadd6217..5f6d363c9435 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -740,6 +740,8 @@ static int hpre_dh_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); } @@ -1165,6 +1167,9 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm) return PTR_ERR(ctx->rsa.soft_tfm); } + akcipher_set_reqsize(tfm, sizeof(struct hpre_asym_request) + + HPRE_ALIGN_SZ); + ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); if (ret) crypto_free_akcipher(ctx->rsa.soft_tfm); @@ -1617,6 +1622,8 @@ static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P192; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1626,6 +1633,8 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P256; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1635,6 +1644,8 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P384; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1961,6 +1972,8 @@ static int hpre_curve25519_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1981,7 +1994,6 @@ static struct akcipher_alg rsa = { .max_size = hpre_rsa_max_size, .init = hpre_rsa_init_tfm, .exit = hpre_rsa_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -1998,7 +2010,6 @@ static struct kpp_alg dh = { .max_size = hpre_dh_max_size, .init = hpre_dh_init_tfm, .exit = hpre_dh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2016,7 +2027,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p192_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2031,7 +2041,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p256_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2046,7 +2055,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p384_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2064,7 +2072,6 @@ static struct kpp_alg curve25519_alg = { .max_size = hpre_curve25519_max_size, .init = hpre_curve25519_init_tfm, .exit = hpre_curve25519_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, From 80e62ad58db084920d8cf23323b713391e09f374 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 17:30:58 +0800 Subject: [PATCH 3237/4122] crypto: qat - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 94a26702aeae..935a7e012946 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -494,6 +494,8 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm) if (!inst) return -EINVAL; + kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->p_size = 0; ctx->g2 = false; ctx->inst = inst; @@ -1230,6 +1232,8 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm) if (!inst) return -EINVAL; + akcipher_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->key_sz = 0; ctx->inst = inst; return 0; @@ -1252,7 +1256,6 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -1269,7 +1272,6 @@ static struct kpp_alg dh = { .max_size = qat_dh_max_size, .init = qat_dh_init_tfm, .exit = qat_dh_exit_tfm, - .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "dh", .cra_driver_name = "qat-dh", From 908d383b6c94be0f89c5e2a5a346d99495efd4d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 17:40:51 +0800 Subject: [PATCH 3238/4122] crypto: caam - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu Reviewed-by: Gaurav Jain Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 886727576710..642846693d7c 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -1099,6 +1099,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) { struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx)); + ctx->dev = caam_jr_alloc(); if (IS_ERR(ctx->dev)) { @@ -1141,7 +1143,6 @@ static struct caam_akcipher_alg caam_rsa = { .max_size = caam_rsa_max_size, .init = caam_rsa_init_tfm, .exit = caam_rsa_exit_tfm, - .reqsize = sizeof(struct caam_rsa_req_ctx), .base = { .cra_name = "rsa", .cra_driver_name = "rsa-caam", From 93c446cd36a410b31519af7a2dd32e899cc03d06 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 17:42:26 +0800 Subject: [PATCH 3239/4122] crypto: virtio - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu Acked-by: Gonglei Signed-off-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672e2e..b2979be613b8 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -479,6 +479,9 @@ static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm) ctx->enginectx.op.prepare_request = NULL; ctx->enginectx.op.unprepare_request = NULL; + akcipher_set_reqsize(tfm, + sizeof(struct virtio_crypto_akcipher_request)); + return 0; } @@ -505,7 +508,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { .max_size = virtio_crypto_rsa_max_size, .init = virtio_crypto_rsa_init_tfm, .exit = virtio_crypto_rsa_exit_tfm, - .reqsize = sizeof(struct virtio_crypto_akcipher_request), .base = { .cra_name = "rsa", .cra_driver_name = "virtio-crypto-rsa", @@ -528,7 +530,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { .max_size = virtio_crypto_rsa_max_size, .init = virtio_crypto_rsa_init_tfm, .exit = virtio_crypto_rsa_exit_tfm, - .reqsize = sizeof(struct virtio_crypto_akcipher_request), .base = { .cra_name = "pkcs1pad(rsa,sha1)", .cra_driver_name = "virtio-pkcs1-rsa-with-sha1", From 3e71e5b0efcc730216f4450b796df4fdd627ecd0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 18:03:35 +0800 Subject: [PATCH 3240/4122] crypto: akcipher - Move reqsize into tfm The value of reqsize cannot be determined in case of fallbacks. Therefore it must be stored in the tfm and not the alg object. Signed-off-by: Herbert Xu --- include/crypto/akcipher.h | 7 ++++--- include/crypto/internal/akcipher.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 5764b46bd1ec..734c213918bd 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -43,9 +43,12 @@ struct akcipher_request { * struct crypto_akcipher - user-instantiated objects which encapsulate * algorithms and core processing logic * + * @reqsize: Request context size required by algorithm implementation * @base: Common crypto API algorithm data structure */ struct crypto_akcipher { + unsigned int reqsize; + struct crypto_tfm base; }; @@ -86,7 +89,6 @@ struct crypto_akcipher { * counterpart to @init, used to remove various changes set in * @init. * - * @reqsize: Request context size required by algorithm implementation * @base: Common crypto API algorithm data structure */ struct akcipher_alg { @@ -102,7 +104,6 @@ struct akcipher_alg { int (*init)(struct crypto_akcipher *tfm); void (*exit)(struct crypto_akcipher *tfm); - unsigned int reqsize; struct crypto_alg base; }; @@ -155,7 +156,7 @@ static inline struct akcipher_alg *crypto_akcipher_alg( static inline unsigned int crypto_akcipher_reqsize(struct crypto_akcipher *tfm) { - return crypto_akcipher_alg(tfm)->reqsize; + return tfm->reqsize; } static inline void akcipher_request_set_tfm(struct akcipher_request *req, diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index 8d3220c9ab77..1474a2d890fc 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -36,7 +36,7 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req) static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher, unsigned int reqsize) { - crypto_akcipher_alg(akcipher)->reqsize = reqsize; + akcipher->reqsize = reqsize; } static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm) From cb99fc0dd1f6985e8b6ade93e3d69f5e33930539 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 18:06:56 +0800 Subject: [PATCH 3241/4122] crypto: dh - Use helper to set reqsize The value of reqsize must only be changed through the helper. Signed-off-by: Herbert Xu --- crypto/dh.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/dh.c b/crypto/dh.c index 99c3b2ef7adc..e39c1bde1ac0 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -318,6 +318,9 @@ static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm) if (IS_ERR(tfm_ctx->dh_tfm)) return PTR_ERR(tfm_ctx->dh_tfm); + kpp_set_reqsize(tfm, sizeof(struct kpp_request) + + crypto_kpp_reqsize(tfm_ctx->dh_tfm)); + return 0; } @@ -593,7 +596,6 @@ static int __maybe_unused __dh_safe_prime_create( inst->alg.max_size = dh_safe_prime_max_size; inst->alg.init = dh_safe_prime_init_tfm; inst->alg.exit = dh_safe_prime_exit_tfm; - inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize; inst->alg.base.cra_priority = dh_alg->base.cra_priority; inst->alg.base.cra_module = THIS_MODULE; inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx); From 4d2b225a67e6df962bbeaad473bfd8f97cfbf478 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Nov 2022 18:09:16 +0800 Subject: [PATCH 3242/4122] crypto: kpp - Move reqsize into tfm The value of reqsize cannot be determined in case of fallbacks. Therefore it must be stored in the tfm and not the alg object. Signed-off-by: Herbert Xu --- include/crypto/internal/kpp.h | 2 +- include/crypto/kpp.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index 31ff3c1986ef..167662407e36 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -53,7 +53,7 @@ static inline void *kpp_request_ctx(struct kpp_request *req) static inline void kpp_set_reqsize(struct crypto_kpp *kpp, unsigned int reqsize) { - crypto_kpp_alg(kpp)->reqsize = reqsize; + kpp->reqsize = reqsize; } static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm) diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 24d01e9877c1..33ff32878802 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -37,9 +37,13 @@ struct kpp_request { * struct crypto_kpp - user-instantiated object which encapsulate * algorithms and core processing logic * + * @reqsize: Request context size required by algorithm + * implementation * @base: Common crypto API algorithm data structure */ struct crypto_kpp { + unsigned int reqsize; + struct crypto_tfm base; }; @@ -64,8 +68,6 @@ struct crypto_kpp { * put in place here. * @exit: Undo everything @init did. * - * @reqsize: Request context size required by algorithm - * implementation * @base: Common crypto API algorithm data structure */ struct kpp_alg { @@ -79,7 +81,6 @@ struct kpp_alg { int (*init)(struct crypto_kpp *tfm); void (*exit)(struct crypto_kpp *tfm); - unsigned int reqsize; struct crypto_alg base; }; @@ -128,7 +129,7 @@ static inline struct kpp_alg *crypto_kpp_alg(struct crypto_kpp *tfm) static inline unsigned int crypto_kpp_reqsize(struct crypto_kpp *tfm) { - return crypto_kpp_alg(tfm)->reqsize; + return tfm->reqsize; } static inline void kpp_request_set_tfm(struct kpp_request *req, From 3d780c8a9850ad60dee47a8d971ba7888f3d1bd3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 22 Nov 2022 22:56:19 +0100 Subject: [PATCH 3243/4122] crypto: amlogic - Remove kcalloc without check There is no real point in allocating dedicated memory for the irqs array. MAXFLOW is only 2, so it is easier to allocated the needed space directly within the 'meson_dev' structure. This saves some memory allocation and avoids an indirection when using the irqs array. Fixes: 48fe583fe541 ("crypto: amlogic - Add crypto accelerator...") Signed-off-by: Christophe JAILLET Signed-off-by: Herbert Xu --- drivers/crypto/amlogic/amlogic-gxl-core.c | 1 - drivers/crypto/amlogic/amlogic-gxl.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c index 6e7ae896717c..937187027ad5 100644 --- a/drivers/crypto/amlogic/amlogic-gxl-core.c +++ b/drivers/crypto/amlogic/amlogic-gxl-core.c @@ -237,7 +237,6 @@ static int meson_crypto_probe(struct platform_device *pdev) return err; } - mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL); for (i = 0; i < MAXFLOW; i++) { mc->irqs[i] = platform_get_irq(pdev, i); if (mc->irqs[i] < 0) diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h index dc0f142324a3..8c0746a1d6d4 100644 --- a/drivers/crypto/amlogic/amlogic-gxl.h +++ b/drivers/crypto/amlogic/amlogic-gxl.h @@ -95,7 +95,7 @@ struct meson_dev { struct device *dev; struct meson_flow *chanlist; atomic_t flow; - int *irqs; + int irqs[MAXFLOW]; #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG struct dentry *dbgfs_dir; #endif From c390c452ebeb44cb979b7374d3acc3859415e86c Mon Sep 17 00:00:00 2001 From: Joe Fradley Date: Tue, 22 Nov 2022 14:54:49 -0800 Subject: [PATCH 3244/4122] crypto: x86/curve25519 - disable gcov curve25519-x86_64.c fails to build when CONFIG_GCOV_KERNEL is enabled. The error is "inline assembly requires more registers than available" thrown from the `fsqr()` function. Therefore, excluding this file from GCOV profiling until this issue is resolved. Thereby allowing CONFIG_GCOV_PROFILE_ALL to be enabled for x86. Signed-off-by: Joe Fradley Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3b1d701a4f6c..3e7a329235bd 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -107,3 +107,6 @@ quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perlasm) + +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_curve25519-x86_64.o := n From 7bcceb4c9896b1b672b636ae70fe75110d6bf1ad Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 24 Nov 2022 14:49:40 +0800 Subject: [PATCH 3245/4122] crypto: omap-sham - Use pm_runtime_resume_and_get() in omap_sham_probe() omap_sham_probe() calls pm_runtime_get_sync() and calls pm_runtime_put_sync() latter to put usage_counter. However, pm_runtime_get_sync() will increment usage_counter even it failed. Fix it by replacing it with pm_runtime_resume_and_get() to keep usage counter balanced. Fixes: b359f034c8bf ("crypto: omap-sham - Convert to use pm_runtime API") Signed-off-by: Shang XiaoJing Acked-by: Mark Greer Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 655a7f5a406a..cbeda59c6b19 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2114,7 +2114,7 @@ static int omap_sham_probe(struct platform_device *pdev) pm_runtime_enable(dev); - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get sync: %d\n", err); goto err_pm; From 14386d471322a204344ae81a28738b71e261d3a0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:28 +0800 Subject: [PATCH 3246/4122] crypto: Prepare to move crypto_tfm_ctx The helper crypto_tfm_ctx is only used by the Crypto API algorithm code and should really be in algapi.h. However, for historical reasons many files relied on it to be in crypto.h. This patch changes those files to use algapi.h instead in prepartion for a move. Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-glue.c | 2 +- arch/arm64/crypto/aes-ce-glue.c | 2 +- arch/arm64/crypto/aes-cipher-glue.c | 2 +- arch/arm64/crypto/sm4-ce-cipher-glue.c | 2 +- arch/x86/crypto/twofish_glue.c | 2 +- crypto/aes_generic.c | 2 +- crypto/aes_ti.c | 2 +- crypto/anubis.c | 2 +- crypto/blowfish_common.c | 3 ++- crypto/blowfish_generic.c | 3 ++- crypto/camellia_generic.c | 2 +- crypto/cast5_generic.c | 2 +- crypto/cast6_generic.c | 2 +- crypto/des_generic.c | 2 +- crypto/fcrypt.c | 2 +- crypto/khazad.c | 2 +- crypto/seed.c | 2 +- crypto/serpent_generic.c | 2 +- crypto/sm4_generic.c | 2 +- crypto/tea.c | 2 +- crypto/twofish_common.c | 2 +- crypto/twofish_generic.c | 2 +- drivers/crypto/nx/nx-842.h | 2 +- include/crypto/aria.h | 2 +- include/crypto/internal/acompress.h | 2 ++ include/crypto/internal/scompress.h | 3 ++- 26 files changed, 30 insertions(+), 25 deletions(-) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c index 8cd00f56800e..6dfaef2d8f91 100644 --- a/arch/arm/crypto/aes-cipher-glue.c +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 56a5f6f0b0c1..e921823ca103 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -9,9 +9,9 @@ #include #include #include +#include #include #include -#include #include #include "aes-ce-setkey.h" diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c index 8caf6dfefce8..4ec55e568941 100644 --- a/arch/arm64/crypto/aes-cipher-glue.c +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c index 76a34ef4abbb..c31d76fb5a17 100644 --- a/arch/arm64/crypto/sm4-ce-cipher-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -2,11 +2,11 @@ #include #include +#include #include #include #include #include -#include #include MODULE_ALIAS_CRYPTO("sm4"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index f9c4adc27404..0614beece279 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -38,8 +38,8 @@ * Third Edition. */ +#include #include -#include #include #include #include diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 27ab27931813..666474b81c6a 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -48,11 +48,11 @@ */ #include +#include #include #include #include #include -#include #include #include diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c index 205c2c257d49..a3b342f92fab 100644 --- a/crypto/aes_ti.c +++ b/crypto/aes_ti.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key, diff --git a/crypto/anubis.c b/crypto/anubis.c index 5da0241ef453..9f0cf61bbc6e 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -29,11 +29,11 @@ * */ +#include #include #include #include #include -#include #include #define ANUBIS_MIN_KEY_SIZE 16 diff --git a/crypto/blowfish_common.c b/crypto/blowfish_common.c index 1c072012baff..c0208ce269a3 100644 --- a/crypto/blowfish_common.c +++ b/crypto/blowfish_common.c @@ -14,11 +14,12 @@ * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris */ + +#include #include #include #include #include -#include #include #include diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 003b52c6880e..0e74c7242e77 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -11,11 +11,12 @@ * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris */ + +#include #include #include #include #include -#include #include #include diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index fd1a88af9e77..c04670cf51ac 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -9,7 +9,7 @@ * https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html */ -#include +#include #include #include #include diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 0257c14cefc2..085a1eedae03 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -14,8 +14,8 @@ #include +#include #include -#include #include #include #include diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index 75346380aa0b..34f1ab53e3a7 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -11,8 +11,8 @@ #include +#include #include -#include #include #include #include diff --git a/crypto/des_generic.c b/crypto/des_generic.c index c85354a5e94c..1274e18d3eb9 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -8,11 +8,11 @@ */ #include +#include #include #include #include #include -#include #include diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 76a04d000c0d..95a16e88899b 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -43,10 +43,10 @@ */ #include +#include #include #include #include -#include #define ROUNDS 16 diff --git a/crypto/khazad.c b/crypto/khazad.c index f19339954c89..70cafe73f974 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -19,11 +19,11 @@ * */ +#include #include #include #include #include -#include #include #define KHAZAD_KEY_SIZE 16 diff --git a/crypto/seed.c b/crypto/seed.c index 27720140820e..d0506ade2a5f 100644 --- a/crypto/seed.c +++ b/crypto/seed.c @@ -8,11 +8,11 @@ * Copyright (C) 2007 Korea Information Security Agency (KISA). */ +#include #include #include #include #include -#include #include #define SEED_NUM_KCONSTANTS 16 diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 45f98b750053..c6bca47931e2 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -7,11 +7,11 @@ * Copyright (C) 2002 Dag Arne Osvik */ +#include #include #include #include #include -#include #include #include diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 4a6480a27fee..560eba37dc55 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -7,12 +7,12 @@ * All rights reserved. */ +#include #include #include #include #include #include -#include #include #include diff --git a/crypto/tea.c b/crypto/tea.c index 02efc5d81690..896f863f3067 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -14,11 +14,11 @@ * Copyright (c) 2004 Aaron Grothe ajgrothe@yahoo.com */ +#include #include #include #include #include -#include #include #define TEA_KEY_SIZE 16 diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c index f921f30334f4..bf4f28742f77 100644 --- a/crypto/twofish_common.c +++ b/crypto/twofish_common.c @@ -25,9 +25,9 @@ * Third Edition. */ +#include #include #include -#include #include #include #include diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 86b2f067a416..557915e4062d 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -25,12 +25,12 @@ */ #include +#include #include #include #include #include #include -#include #include /* Macros to compute the g() function in the encryption and decryption diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index b66f19ac600f..7590bfb24d79 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -3,10 +3,10 @@ #ifndef __NX_842_H__ #define __NX_842_H__ +#include #include #include #include -#include #include #include #include diff --git a/include/crypto/aria.h b/include/crypto/aria.h index 254da46cc385..73295146be11 100644 --- a/include/crypto/aria.h +++ b/include/crypto/aria.h @@ -18,11 +18,11 @@ #ifndef _CRYPTO_ARIA_H #define _CRYPTO_ARIA_H +#include #include #include #include #include -#include #include #define ARIA_MIN_KEY_SIZE 16 diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index cfc47e18820f..49339003bd2c 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -8,7 +8,9 @@ */ #ifndef _CRYPTO_ACOMP_INT_H #define _CRYPTO_ACOMP_INT_H + #include +#include /* * Transform internal helpers. diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index f834274c2493..252cc949d4ee 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -8,7 +8,8 @@ */ #ifndef _CRYPTO_SCOMP_INT_H #define _CRYPTO_SCOMP_INT_H -#include + +#include #define SCOMP_SCRATCH_SIZE 131072 From e634ac4a8aaab37bdc69177df9b40acf92eccc6d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:31 +0800 Subject: [PATCH 3247/4122] crypto: api - Add crypto_tfm_ctx_dma This patch adds the helpers crypto_tfm_ctx_aligned and crypto_tfm_ctx_dma. The first aligns the tfm context to the value cra_alignmask. The second sets the alignment according to dma_cache_get_alignment(); This patch also moves crypto_tfm_ctx into algapi.h. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 41 +++++++++++++++++++++++++++++++++++++++-- include/linux/crypto.h | 5 ----- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f50c5d1725da..4c99eb66e654 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -7,6 +7,7 @@ #ifndef _CRYPTO_ALGAPI_H #define _CRYPTO_ALGAPI_H +#include #include #include #include @@ -25,6 +26,14 @@ #define MAX_CIPHER_BLOCKSIZE 16 #define MAX_CIPHER_ALIGNMASK 15 +#ifdef ARCH_DMA_MINALIGN +#define CRYPTO_DMA_ALIGN ARCH_DMA_MINALIGN +#else +#define CRYPTO_DMA_ALIGN CRYPTO_MINALIGN +#endif + +#define CRYPTO_DMA_PADDING ((CRYPTO_DMA_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) + struct crypto_aead; struct crypto_instance; struct module; @@ -189,10 +198,38 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, } } +static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) +{ + return tfm->__crt_ctx; +} + +static inline void *crypto_tfm_ctx_align(struct crypto_tfm *tfm, + unsigned int align) +{ + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(crypto_tfm_ctx(tfm), align); +} + static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm) { - return PTR_ALIGN(crypto_tfm_ctx(tfm), - crypto_tfm_alg_alignmask(tfm) + 1); + return crypto_tfm_ctx_align(tfm, crypto_tfm_alg_alignmask(tfm) + 1); +} + +static inline unsigned int crypto_dma_align(void) +{ + return CRYPTO_DMA_ALIGN; +} + +static inline unsigned int crypto_dma_padding(void) +{ + return (crypto_dma_align() - 1) & ~(crypto_tfm_ctx_alignment() - 1); +} + +static inline void *crypto_tfm_ctx_dma(struct crypto_tfm *tfm) +{ + return crypto_tfm_ctx_align(tfm, crypto_dma_align()); } static inline struct crypto_instance *crypto_tfm_alg_instance( diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 2324ab6f1846..5d1e961f810e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -714,11 +714,6 @@ static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) tfm->crt_flags &= ~flags; } -static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) -{ - return tfm->__crt_ctx; -} - static inline unsigned int crypto_tfm_ctx_alignment(void) { struct crypto_tfm *tfm; From f8e4d1d0ac832de8efc98f302acf9476bbfffb55 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:33 +0800 Subject: [PATCH 3248/4122] crypto: aead - Add ctx helpers with DMA alignment This patch adds helpers to access the aead context structure and request context structure with an added alignment for DMA access. Signed-off-by: Herbert Xu --- include/crypto/internal/aead.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h index d482017f3e20..cd8cb1e921b7 100644 --- a/include/crypto/internal/aead.h +++ b/include/crypto/internal/aead.h @@ -39,6 +39,11 @@ static inline void *crypto_aead_ctx(struct crypto_aead *tfm) return crypto_tfm_ctx(&tfm->base); } +static inline void *crypto_aead_ctx_dma(struct crypto_aead *tfm) +{ + return crypto_tfm_ctx_dma(&tfm->base); +} + static inline struct crypto_instance *aead_crypto_instance( struct aead_instance *inst) { @@ -65,6 +70,16 @@ static inline void *aead_request_ctx(struct aead_request *req) return req->__ctx; } +static inline void *aead_request_ctx_dma(struct aead_request *req) +{ + unsigned int align = crypto_dma_align(); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(aead_request_ctx(req), align); +} + static inline void aead_request_complete(struct aead_request *req, int err) { req->base.complete(&req->base, err); @@ -108,6 +123,13 @@ static inline void crypto_aead_set_reqsize(struct crypto_aead *aead, aead->reqsize = reqsize; } +static inline void crypto_aead_set_reqsize_dma(struct crypto_aead *aead, + unsigned int reqsize) +{ + reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); + aead->reqsize = reqsize; +} + static inline void aead_init_queue(struct aead_queue *queue, unsigned int max_qlen) { From b5f755fbd5d1102104c502ae213e9b42478c098f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:35 +0800 Subject: [PATCH 3249/4122] crypto: hash - Add ctx helpers with DMA alignment This patch adds helpers to access the ahash context structure and request context structure with an added alignment for DMA access. Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0a288dddcf5b..1a2a41b79253 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -140,6 +140,11 @@ static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } +static inline void *crypto_ahash_ctx_dma(struct crypto_ahash *tfm) +{ + return crypto_tfm_ctx_dma(crypto_ahash_tfm(tfm)); +} + static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) { return container_of(__crypto_hash_alg_common(alg), struct ahash_alg, @@ -152,6 +157,13 @@ static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, tfm->reqsize = reqsize; } +static inline void crypto_ahash_set_reqsize_dma(struct crypto_ahash *ahash, + unsigned int reqsize) +{ + reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); + ahash->reqsize = reqsize; +} + static inline struct crypto_instance *ahash_crypto_instance( struct ahash_instance *inst) { @@ -175,6 +187,16 @@ static inline void *ahash_instance_ctx(struct ahash_instance *inst) return crypto_instance_ctx(ahash_crypto_instance(inst)); } +static inline void *ahash_request_ctx_dma(struct ahash_request *req) +{ + unsigned int align = crypto_dma_align(); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(ahash_request_ctx(req), align); +} + static inline void ahash_request_complete(struct ahash_request *req, int err) { req->base.complete(&req->base, err); From 12658ac5e612214023c26f0689e6bbe8bbea0871 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:37 +0800 Subject: [PATCH 3250/4122] crypto: skcipher - Add ctx helpers with DMA alignment This patch adds helpers to access the skcipher context structure and request context structure with an added alignment for DMA access. Signed-off-by: Herbert Xu --- include/crypto/internal/skcipher.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 2a97540156bb..06d0a5491cf3 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -130,6 +130,13 @@ static inline void crypto_skcipher_set_reqsize( skcipher->reqsize = reqsize; } +static inline void crypto_skcipher_set_reqsize_dma( + struct crypto_skcipher *skcipher, unsigned int reqsize) +{ + reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); + skcipher->reqsize = reqsize; +} + int crypto_register_skcipher(struct skcipher_alg *alg); void crypto_unregister_skcipher(struct skcipher_alg *alg); int crypto_register_skciphers(struct skcipher_alg *algs, int count); @@ -159,11 +166,26 @@ static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) return crypto_tfm_ctx(&tfm->base); } +static inline void *crypto_skcipher_ctx_dma(struct crypto_skcipher *tfm) +{ + return crypto_tfm_ctx_dma(&tfm->base); +} + static inline void *skcipher_request_ctx(struct skcipher_request *req) { return req->__ctx; } +static inline void *skcipher_request_ctx_dma(struct skcipher_request *req) +{ + unsigned int align = crypto_dma_align(); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(skcipher_request_ctx(req), align); +} + static inline u32 skcipher_request_flags(struct skcipher_request *req) { return req->base.flags; From 1c799571976da15e055f32a0e244697500e97f64 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:39 +0800 Subject: [PATCH 3251/4122] crypto: api - Increase MAX_ALGAPI_ALIGNMASK to 127 Previously we limited the maximum alignment mask to 63. This is mostly due to stack usage for shash. This patch introduces a separate limit for shash algorithms and increases the general limit to 127 which is the value that we need for DMA allocations on arm64. Signed-off-by: Herbert Xu --- crypto/shash.c | 9 +++++++-- include/crypto/algapi.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/crypto/shash.c b/crypto/shash.c index 0f8543158826..868b6ba2b3b7 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -18,6 +18,8 @@ #include "internal.h" +#define MAX_SHASH_ALIGNMASK 63 + static const struct crypto_type crypto_shash_type; int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, @@ -88,7 +90,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ - u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2]; + u8 ubuf[MAX_SHASH_ALIGNMASK * 2]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -130,7 +132,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ - u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE]; + u8 ubuf[MAX_SHASH_ALIGNMASK + HASH_MAX_DIGESTSIZE]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -524,6 +526,9 @@ static int shash_prepare_alg(struct shash_alg *alg) alg->statesize > HASH_MAX_STATESIZE) return -EINVAL; + if (base->cra_alignmask > MAX_SHASH_ALIGNMASK) + return -EINVAL; + if ((alg->export && !alg->import) || (alg->import && !alg->export)) return -EINVAL; diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 4c99eb66e654..8722fd67f40a 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -22,7 +22,7 @@ * algs and architectures. Ciphers have a lower maximum size. */ #define MAX_ALGAPI_BLOCKSIZE 160 -#define MAX_ALGAPI_ALIGNMASK 63 +#define MAX_ALGAPI_ALIGNMASK 127 #define MAX_CIPHER_BLOCKSIZE 16 #define MAX_CIPHER_ALIGNMASK 15 From 4ac3377645e98d319cb5404e72d40a4aa69d252c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:41 +0800 Subject: [PATCH 3252/4122] crypto: akcipher - Add ctx helpers with DMA alignment This patch adds helpers to access the akcipher context structure and request context structure with an added alignment for DMA access. Signed-off-by: Herbert Xu --- include/crypto/internal/akcipher.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index 1474a2d890fc..aaf1092b93b8 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -33,15 +33,37 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req) return req->__ctx; } +static inline void *akcipher_request_ctx_dma(struct akcipher_request *req) +{ + unsigned int align = crypto_dma_align(); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(akcipher_request_ctx(req), align); +} + static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher, unsigned int reqsize) { akcipher->reqsize = reqsize; } +static inline void akcipher_set_reqsize_dma(struct crypto_akcipher *akcipher, + unsigned int reqsize) +{ + reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); + akcipher->reqsize = reqsize; +} + static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm) { - return tfm->base.__crt_ctx; + return crypto_tfm_ctx(&tfm->base); +} + +static inline void *akcipher_tfm_ctx_dma(struct crypto_akcipher *tfm) +{ + return crypto_tfm_ctx_dma(&tfm->base); } static inline void akcipher_request_complete(struct akcipher_request *req, From a5a49249effb6f03086214b25719d415cc867b3d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:43 +0800 Subject: [PATCH 3253/4122] crypto: kpp - Add ctx helpers with DMA alignment This patch adds helpers to access the kpp context structure and request context structure with an added alignment for DMA access. Signed-off-by: Herbert Xu --- include/crypto/internal/kpp.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index 167662407e36..3c9726e89f53 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -50,15 +50,37 @@ static inline void *kpp_request_ctx(struct kpp_request *req) return req->__ctx; } +static inline void *kpp_request_ctx_dma(struct kpp_request *req) +{ + unsigned int align = crypto_dma_align(); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + + return PTR_ALIGN(kpp_request_ctx(req), align); +} + static inline void kpp_set_reqsize(struct crypto_kpp *kpp, unsigned int reqsize) { kpp->reqsize = reqsize; } +static inline void kpp_set_reqsize_dma(struct crypto_kpp *kpp, + unsigned int reqsize) +{ + reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1); + kpp->reqsize = reqsize; +} + static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm) { - return tfm->base.__crt_ctx; + return crypto_tfm_ctx(&tfm->base); +} + +static inline void *kpp_tfm_ctx_dma(struct crypto_kpp *tfm) +{ + return crypto_tfm_ctx_dma(&tfm->base); } static inline void kpp_request_complete(struct kpp_request *req, int err) From 4cb4f7c11deef5222ac15631b16ab54625b926b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Nov 2022 12:36:45 +0800 Subject: [PATCH 3254/4122] crypto: caam - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 72 ++++++------- drivers/crypto/caam/caamalg_qi.c | 52 ++++----- drivers/crypto/caam/caamalg_qi2.c | 173 +++++++++++++++--------------- drivers/crypto/caam/caamhash.c | 87 ++++++++------- drivers/crypto/caam/caampkc.c | 47 ++++---- 5 files changed, 216 insertions(+), 215 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index d3d8bb0a6990..ecc15bc521db 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -131,7 +131,7 @@ struct caam_aead_req_ctx { static int aead_null_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 *desc; @@ -184,7 +184,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), struct caam_aead_alg, aead); unsigned int ivsize = crypto_aead_ivsize(aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 ctx1_iv_off = 0; @@ -312,7 +312,7 @@ skip_givenc: static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -322,7 +322,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -372,7 +372,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -387,7 +387,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -440,7 +440,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -455,7 +455,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -508,7 +508,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -521,7 +521,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int chachapoly_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -547,7 +547,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead) static int chachapoly_setauthsize(struct crypto_aead *aead, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); if (authsize != POLY1305_DIGEST_SIZE) return -EINVAL; @@ -559,7 +559,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead, static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; @@ -575,7 +575,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); struct crypto_authenc_keys keys; @@ -656,7 +656,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -677,7 +677,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -703,7 +703,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -729,7 +729,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), typeof(*alg), skcipher); @@ -832,7 +832,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 *desc; @@ -1057,7 +1057,7 @@ static void init_aead_job(struct aead_request *req, bool all_contig, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int authsize = ctx->authsize; u32 *desc = edesc->hw_desc; u32 out_options, in_options; @@ -1118,7 +1118,7 @@ static void init_gcm_job(struct aead_request *req, bool all_contig, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc = edesc->hw_desc; bool generic_gcm = (ivsize == GCM_AES_IV_SIZE); @@ -1185,7 +1185,7 @@ static void init_authenc_job(struct aead_request *req, struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), struct caam_aead_alg, aead); unsigned int ivsize = crypto_aead_ivsize(aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent); const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == OP_ALG_AAI_CTR_MOD128); @@ -1234,7 +1234,7 @@ static void init_skcipher_job(struct skcipher_request *req, const bool encrypt) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; int ivsize = crypto_skcipher_ivsize(skcipher); u32 *desc = edesc->hw_desc; @@ -1290,7 +1290,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_aead_req_ctx *rctx = aead_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? @@ -1457,7 +1457,7 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; @@ -1491,7 +1491,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; @@ -1524,7 +1524,7 @@ static int aead_decrypt(struct aead_request *req) static int aead_do_one_req(struct crypto_engine *engine, void *areq) { struct aead_request *req = aead_request_cast(areq); - struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct caam_ctx *ctx = crypto_aead_ctx_dma(crypto_aead_reqtfm(req)); struct caam_aead_req_ctx *rctx = aead_request_ctx(req); u32 *desc = rctx->edesc->hw_desc; int ret; @@ -1550,7 +1550,7 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; @@ -1597,7 +1597,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, int desc_bytes) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? @@ -1756,7 +1756,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, static int skcipher_do_one_req(struct crypto_engine *engine, void *areq) { struct skcipher_request *req = skcipher_request_cast(areq); - struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(crypto_skcipher_reqtfm(req)); struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); u32 *desc = rctx->edesc->hw_desc; int ret; @@ -1790,7 +1790,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); @@ -3397,7 +3397,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -3434,7 +3434,7 @@ static int caam_aead_init(struct crypto_aead *tfm) struct aead_alg *alg = crypto_aead_alg(tfm); struct caam_aead_alg *caam_alg = container_of(alg, struct caam_aead_alg, aead); - struct caam_ctx *ctx = crypto_aead_ctx(tfm); + struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm); crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx)); @@ -3454,7 +3454,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -3463,7 +3463,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_aead_exit(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } void caam_algapi_exit(void) @@ -3491,7 +3491,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -3505,7 +3505,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 189a7438b29c..c37b67be0492 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -81,7 +81,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) { struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); u32 ctx1_iv_off = 0; u32 *nonce = NULL; @@ -184,7 +184,7 @@ skip_givenc: static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -195,7 +195,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); struct crypto_authenc_keys keys; @@ -299,7 +299,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -342,7 +342,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -358,7 +358,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -402,7 +402,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -446,7 +446,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -462,7 +462,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -510,7 +510,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -554,7 +554,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -568,7 +568,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -617,7 +617,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), typeof(*alg), skcipher); @@ -731,7 +731,7 @@ static int des_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); int ret = 0; @@ -915,7 +915,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status) struct aead_edesc *edesc; struct aead_request *aead_req = drv_req->app_ctx; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); - struct caam_ctx *caam_ctx = crypto_aead_ctx(aead); + struct caam_ctx *caam_ctx = crypto_aead_ctx_dma(aead); int ecode = 0; qidev = caam_ctx->qidev; @@ -937,7 +937,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); struct device *qidev = ctx->qidev; @@ -1157,7 +1157,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ret; if (unlikely(caam_congested)) @@ -1207,7 +1207,7 @@ static void skcipher_done(struct caam_drv_req *drv_req, u32 status) struct skcipher_edesc *edesc; struct skcipher_request *req = drv_req->app_ctx; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *caam_ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *caam_ctx = crypto_skcipher_ctx_dma(skcipher); struct device *qidev = caam_ctx->qidev; int ivsize = crypto_skcipher_ivsize(skcipher); int ecode = 0; @@ -1245,7 +1245,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *qidev = ctx->qidev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1405,7 +1405,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent); int ret; @@ -2491,7 +2491,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -2524,7 +2524,7 @@ static int caam_aead_init(struct crypto_aead *tfm) struct aead_alg *alg = crypto_aead_alg(tfm); struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(tfm); + struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm); return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp); } @@ -2542,7 +2542,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -2551,7 +2551,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_aead_exit(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } void caam_qi_algapi_exit(void) @@ -2579,7 +2579,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -2593,7 +2593,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 4482cb145d05..1b0dd742c53f 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -134,12 +134,12 @@ static struct caam_request *to_caam_req(struct crypto_async_request *areq) { switch (crypto_tfm_alg_type(areq->tfm)) { case CRYPTO_ALG_TYPE_SKCIPHER: - return skcipher_request_ctx(skcipher_request_cast(areq)); + return skcipher_request_ctx_dma(skcipher_request_cast(areq)); case CRYPTO_ALG_TYPE_AEAD: - return aead_request_ctx(container_of(areq, struct aead_request, - base)); + return aead_request_ctx_dma( + container_of(areq, struct aead_request, base)); case CRYPTO_ALG_TYPE_AHASH: - return ahash_request_ctx(ahash_request_cast(areq)); + return ahash_request_ctx_dma(ahash_request_cast(areq)); default: return ERR_PTR(-EINVAL); } @@ -171,7 +171,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) { struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); struct device *dev = ctx->dev; struct dpaa2_caam_priv *priv = dev_get_drvdata(dev); @@ -276,7 +276,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -287,7 +287,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; struct crypto_authenc_keys keys; @@ -350,10 +350,10 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_request *req_ctx = aead_request_ctx(req); + struct caam_request *req_ctx = aead_request_ctx_dma(req); struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); struct device *dev = ctx->dev; @@ -587,7 +587,7 @@ skip_out_fle: static int chachapoly_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); struct device *dev = ctx->dev; struct caam_flc *flc; @@ -620,7 +620,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead) static int chachapoly_setauthsize(struct crypto_aead *aead, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); if (authsize != POLY1305_DIGEST_SIZE) return -EINVAL; @@ -632,7 +632,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead, static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; @@ -647,7 +647,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -704,7 +704,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -720,7 +720,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -739,7 +739,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -799,7 +799,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -815,7 +815,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -840,7 +840,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -900,7 +900,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -914,7 +914,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -940,7 +940,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), struct caam_skcipher_alg, skcipher); @@ -1059,7 +1059,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *dev = ctx->dev; struct dpaa2_caam_priv *priv = dev_get_drvdata(dev); struct caam_flc *flc; @@ -1109,10 +1109,10 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_request *req_ctx = skcipher_request_ctx(req); + struct caam_request *req_ctx = skcipher_request_ctx_dma(req); struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *dev = ctx->dev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1286,7 +1286,7 @@ static void aead_encrypt_done(void *cbk_ctx, u32 status) struct caam_request *req_ctx = to_caam_req(areq); struct aead_edesc *edesc = req_ctx->edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -1307,7 +1307,7 @@ static void aead_decrypt_done(void *cbk_ctx, u32 status) struct caam_request *req_ctx = to_caam_req(areq); struct aead_edesc *edesc = req_ctx->edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -1324,8 +1324,8 @@ static int aead_encrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct caam_request *caam_req = aead_request_ctx(req); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); + struct caam_request *caam_req = aead_request_ctx_dma(req); int ret; /* allocate extended descriptor */ @@ -1352,8 +1352,8 @@ static int aead_decrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct caam_request *caam_req = aead_request_ctx(req); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); + struct caam_request *caam_req = aead_request_ctx_dma(req); int ret; /* allocate extended descriptor */ @@ -1392,7 +1392,7 @@ static void skcipher_encrypt_done(void *cbk_ctx, u32 status) struct skcipher_request *req = skcipher_request_cast(areq); struct caam_request *req_ctx = to_caam_req(areq); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct skcipher_edesc *edesc = req_ctx->edesc; int ecode = 0; int ivsize = crypto_skcipher_ivsize(skcipher); @@ -1430,7 +1430,7 @@ static void skcipher_decrypt_done(void *cbk_ctx, u32 status) struct skcipher_request *req = skcipher_request_cast(areq); struct caam_request *req_ctx = to_caam_req(areq); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct skcipher_edesc *edesc = req_ctx->edesc; int ecode = 0; int ivsize = crypto_skcipher_ivsize(skcipher); @@ -1474,8 +1474,8 @@ static int skcipher_encrypt(struct skcipher_request *req) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct caam_request *caam_req = skcipher_request_ctx(req); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); + struct caam_request *caam_req = skcipher_request_ctx_dma(req); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); int ret; @@ -1524,8 +1524,8 @@ static int skcipher_decrypt(struct skcipher_request *req) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct caam_request *caam_req = skcipher_request_ctx(req); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); + struct caam_request *caam_req = skcipher_request_ctx_dma(req); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); int ret; @@ -1603,7 +1603,7 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -1621,10 +1621,12 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) } ctx->fallback = fallback; - crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request) + - crypto_skcipher_reqsize(fallback)); + crypto_skcipher_set_reqsize_dma( + tfm, sizeof(struct caam_request) + + crypto_skcipher_reqsize(fallback)); } else { - crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request)); + crypto_skcipher_set_reqsize_dma(tfm, + sizeof(struct caam_request)); } ret = caam_cra_init(ctx, &caam_alg->caam, false); @@ -1640,8 +1642,8 @@ static int caam_cra_init_aead(struct crypto_aead *tfm) struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg), aead); - crypto_aead_set_reqsize(tfm, sizeof(struct caam_request)); - return caam_cra_init(crypto_aead_ctx(tfm), &caam_alg->caam, + crypto_aead_set_reqsize_dma(tfm, sizeof(struct caam_request)); + return caam_cra_init(crypto_aead_ctx_dma(tfm), &caam_alg->caam, !caam_alg->caam.nodkp); } @@ -1654,7 +1656,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -1663,7 +1665,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_cra_exit_aead(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } static struct caam_skcipher_alg driver_algs[] = { @@ -3008,7 +3010,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -3022,7 +3024,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; @@ -3132,7 +3134,7 @@ static inline int ctx_map_to_qm_sg(struct device *dev, static int ahash_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); struct caam_flc *flc; @@ -3305,7 +3307,7 @@ err_flc: static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); unsigned int blocksize = crypto_tfm_alg_blocksize(&ahash->base); unsigned int digestsize = crypto_ahash_digestsize(ahash); int ret; @@ -3356,7 +3358,7 @@ bad_free_key: static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); @@ -3376,7 +3378,7 @@ static inline void ahash_unmap_ctx(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, u32 flag) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (state->ctx_dma) { dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag); @@ -3390,9 +3392,9 @@ static void ahash_done(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; @@ -3417,9 +3419,9 @@ static void ahash_done_bi(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -3455,9 +3457,9 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; @@ -3482,9 +3484,9 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -3518,8 +3520,8 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) static int ahash_update_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3637,8 +3639,8 @@ unmap_ctx: static int ahash_final_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3708,8 +3710,8 @@ unmap_ctx: static int ahash_finup_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3802,8 +3804,8 @@ unmap_ctx: static int ahash_digest(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3897,8 +3899,8 @@ unmap: static int ahash_final_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3970,8 +3972,8 @@ unmap: static int ahash_update_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4091,8 +4093,8 @@ unmap_ctx: static int ahash_finup_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4187,8 +4189,8 @@ unmap: static int ahash_update_first(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4320,7 +4322,7 @@ static int ahash_finup_first(struct ahash_request *req) static int ahash_init(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); state->update = ahash_update_first; state->finup = ahash_finup_first; @@ -4337,28 +4339,28 @@ static int ahash_init(struct ahash_request *req) static int ahash_update(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->update(req); } static int ahash_finup(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->finup(req); } static int ahash_final(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->final(req); } static int ahash_export(struct ahash_request *req, void *out) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_export_state *export = out; u8 *buf = state->buf; int len = state->buflen; @@ -4375,7 +4377,7 @@ static int ahash_export(struct ahash_request *req, void *out) static int ahash_import(struct ahash_request *req, const void *in) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); const struct caam_export_state *export = in; memset(state, 0, sizeof(*state)); @@ -4547,7 +4549,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) container_of(halg, struct ahash_alg, halg); struct caam_hash_alg *caam_hash = container_of(alg, struct caam_hash_alg, ahash_alg); - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, HASH_MSG_LEN + SHA1_DIGEST_SIZE, @@ -4594,8 +4596,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT]; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct caam_hash_state)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state)); /* * For keyed hash algorithms shared descriptors @@ -4606,7 +4607,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) static void caam_hash_cra_exit(struct crypto_tfm *tfm) { - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc), DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); @@ -4646,7 +4647,7 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; alg->cra_exit = caam_hash_cra_exit; - alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CAAM_CRA_PRIORITY; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 36ef738e4a18..1050e965a438 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -199,7 +199,7 @@ static inline int ctx_map_to_sec4_sg(struct device *jrdev, static int ahash_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); @@ -255,7 +255,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) static int axcbc_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; u32 *desc; @@ -307,7 +307,7 @@ static int axcbc_set_sh_desc(struct crypto_ahash *ahash) static int acmac_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; u32 *desc; @@ -421,7 +421,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *jrdev = ctx->jrdev; int blocksize = crypto_tfm_alg_blocksize(&ahash->base); int digestsize = crypto_ahash_digestsize(ahash); @@ -484,7 +484,7 @@ static int ahash_setkey(struct crypto_ahash *ahash, static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *jrdev = ctx->jrdev; if (keylen != AES_KEYSIZE_128) @@ -504,7 +504,7 @@ static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key, static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int err; err = aes_check_keylen(keylen); @@ -543,7 +543,7 @@ static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, int dst_len) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); @@ -563,7 +563,7 @@ static inline void ahash_unmap_ctx(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, int dst_len, u32 flag) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (state->ctx_dma) { dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag); @@ -580,8 +580,8 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); int digestsize = crypto_ahash_digestsize(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; bool has_bklog; @@ -630,8 +630,8 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; bool has_bklog; @@ -695,8 +695,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, dma_addr_t sh_desc_dma) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; @@ -755,8 +755,8 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx, static int ahash_do_one_req(struct crypto_engine *engine, void *areq) { struct ahash_request *req = ahash_request_cast(areq); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(crypto_ahash_reqtfm(req)); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u32 *desc = state->edesc->hw_desc; int ret; @@ -785,7 +785,7 @@ static int ahash_enqueue_req(struct device *jrdev, int dst_len, enum dma_data_direction dir) { struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->edesc; u32 *desc = edesc->hw_desc; int ret; @@ -815,8 +815,8 @@ static int ahash_enqueue_req(struct device *jrdev, static int ahash_update_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -940,8 +940,8 @@ unmap_ctx: static int ahash_final_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1001,8 +1001,8 @@ static int ahash_final_ctx(struct ahash_request *req) static int ahash_finup_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1075,8 +1075,8 @@ static int ahash_finup_ctx(struct ahash_request *req) static int ahash_digest(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); @@ -1142,8 +1142,8 @@ static int ahash_digest(struct ahash_request *req) static int ahash_final_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int buflen = state->buflen; @@ -1191,8 +1191,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) static int ahash_update_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -1312,8 +1312,8 @@ static int ahash_update_no_ctx(struct ahash_request *req) static int ahash_finup_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1388,8 +1388,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) static int ahash_update_first(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -1498,7 +1498,7 @@ static int ahash_finup_first(struct ahash_request *req) static int ahash_init(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); state->update = ahash_update_first; state->finup = ahash_finup_first; @@ -1515,28 +1515,28 @@ static int ahash_init(struct ahash_request *req) static int ahash_update(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->update(req); } static int ahash_finup(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->finup(req); } static int ahash_final(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->final(req); } static int ahash_export(struct ahash_request *req, void *out) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_export_state *export = out; u8 *buf = state->buf; int len = state->buflen; @@ -1553,7 +1553,7 @@ static int ahash_export(struct ahash_request *req, void *out) static int ahash_import(struct ahash_request *req, const void *in) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); const struct caam_export_state *export = in; memset(state, 0, sizeof(*state)); @@ -1762,7 +1762,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) container_of(halg, struct ahash_alg, halg); struct caam_hash_alg *caam_hash = container_of(alg, struct caam_hash_alg, ahash_alg); - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, HASH_MSG_LEN + SHA1_DIGEST_SIZE, @@ -1854,8 +1854,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->enginectx.op.do_one_request = ahash_do_one_req; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct caam_hash_state)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state)); /* * For keyed hash algorithms shared descriptors @@ -1866,7 +1865,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) static void caam_hash_cra_exit(struct crypto_tfm *tfm) { - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, offsetof(struct caam_hash_ctx, key) - @@ -1926,7 +1925,7 @@ caam_hash_alloc(struct caam_hash_template *template, alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; alg->cra_exit = caam_hash_cra_exit; - alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CAAM_CRA_PRIORITY; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 642846693d7c..aef031946f33 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -57,7 +57,7 @@ static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_pub_pdb *pdb = &edesc->pdb.pub; @@ -69,7 +69,7 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; @@ -81,7 +81,7 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; size_t p_sz = key->p_sz; @@ -98,7 +98,7 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; size_t p_sz = key->p_sz; @@ -149,7 +149,7 @@ static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, struct akcipher_request *req = context; struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc; @@ -242,7 +242,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, size_t desclen) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *dev = ctx->dev; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct caam_rsa_key *key = &ctx->key; @@ -371,7 +371,7 @@ static int akcipher_do_one_req(struct crypto_engine *engine, void *areq) base); struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; u32 *desc = req_ctx->edesc->hw_desc; int ret; @@ -399,7 +399,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req, { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_pub_pdb *pdb = &edesc->pdb.pub; @@ -444,7 +444,7 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; @@ -491,7 +491,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; @@ -568,7 +568,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; @@ -664,7 +664,7 @@ static int akcipher_enqueue_req(struct device *jrdev, { struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc = req_ctx->edesc; @@ -707,7 +707,7 @@ static int akcipher_enqueue_req(struct device *jrdev, static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; @@ -746,7 +746,7 @@ init_fail: static int caam_rsa_dec_priv_f1(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -775,7 +775,7 @@ init_fail: static int caam_rsa_dec_priv_f2(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -804,7 +804,7 @@ init_fail: static int caam_rsa_dec_priv_f3(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -833,7 +833,7 @@ init_fail: static int caam_rsa_dec(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; int ret; @@ -936,7 +936,7 @@ static int caam_rsa_check_key_length(unsigned int len) static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; @@ -1038,7 +1038,7 @@ free_p: static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; @@ -1089,7 +1089,7 @@ err: static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); return ctx->key.n_sz; } @@ -1097,7 +1097,7 @@ static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm) /* Per session pkc's driver context creation function */ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx)); @@ -1125,7 +1125,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) /* Per session pkc's driver context cleanup function */ static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; dma_unmap_single(ctx->dev, ctx->padding_dma, CAAM_RSA_MAX_INPUT_SIZE - @@ -1148,7 +1148,8 @@ static struct caam_akcipher_alg caam_rsa = { .cra_driver_name = "rsa-caam", .cra_priority = 3000, .cra_module = THIS_MODULE, - .cra_ctxsize = sizeof(struct caam_rsa_ctx), + .cra_ctxsize = sizeof(struct caam_rsa_ctx) + + CRYPTO_DMA_PADDING, }, } }; From 2ae6feb1a1f6678fe11864f1b6920ed10b09ad6a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 25 Nov 2022 20:18:11 +0800 Subject: [PATCH 3255/4122] crypto: ccree,hisilicon - Fix dependencies to correct algorithm Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moves the SM3 and SM4 stand-alone library and the algorithm implementation for the Crypto API into the same directory, and the corresponding relationship of Kconfig is modified, CONFIG_CRYPTO_SM3/4 corresponds to the stand-alone library of SM3/4, and CONFIG_CRYPTO_SM3/4_GENERIC corresponds to the algorithm implementation for the Crypto API. Therefore, it is necessary for this module to depend on the correct algorithm. Fixes: d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") Cc: Jason A. Donenfeld Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 4 ++-- drivers/crypto/hisilicon/Kconfig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 2947888d3b82..dfb103f81a64 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -800,8 +800,8 @@ config CRYPTO_DEV_CCREE select CRYPTO_ECB select CRYPTO_CTR select CRYPTO_XTS - select CRYPTO_SM4 - select CRYPTO_SM3 + select CRYPTO_SM4_GENERIC + select CRYPTO_SM3_GENERIC help Say 'Y' to enable a driver for the REE interface of the Arm TrustZone CryptoCell family of processors. Currently the diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 27e1fa912063..743ce4fc3158 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -26,7 +26,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 - select CRYPTO_SM4 + select CRYPTO_SM4_GENERIC depends on PCI && PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) From 84ecfe6f38ae4ee779ebd97ee173937fff565bf9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:39 +0100 Subject: [PATCH 3256/4122] powerpc/code-patching: Remove #ifdef CONFIG_STRICT_KERNEL_RWX No need to have one implementation of patch_instruction() for CONFIG_STRICT_KERNEL_RWX and one for !CONFIG_STRICT_KERNEL_RWX. In patch_instruction(), call raw_patch_instruction() when !CONFIG_STRICT_KERNEL_RWX. In poking_init(), bail out immediately, it will be equivalent to the weak default implementation. Everything else is declared static and will be discarded by GCC when !CONFIG_STRICT_KERNEL_RWX. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f67d2a109404d03e8fdf1ea15388c8778337a76b.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 5b8f87db1217..a6a5047f8ba2 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -46,8 +46,6 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) return __patch_instruction(addr, instr, addr); } -#ifdef CONFIG_STRICT_KERNEL_RWX - struct patch_context { union { struct vm_struct *area; @@ -208,6 +206,9 @@ void __init poking_init(void) { int ret; + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + return; + if (mm_patch_enabled()) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke_mm:online", @@ -358,7 +359,8 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!static_branch_likely(&poking_init_done)) + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -370,14 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -#else /* !CONFIG_STRICT_KERNEL_RWX */ - -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) -{ - return raw_patch_instruction(addr, instr); -} - -#endif /* CONFIG_STRICT_KERNEL_RWX */ __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); From 6076dc349b1c587c74c37027efff76f0fa4646f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:40 +0100 Subject: [PATCH 3257/4122] powerpc/feature-fixups: Refactor entry fixups patching Several fonctions have the same loop for patching instructions. Introduce function do_patch_entry_fixups() to refactor those loops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/79eeff7b20a98f7136da5f79b1f7c436928f27f3.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 84 ++++++++++++------------------- 1 file changed, 32 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 31f40f544de5..93b3f8ea38aa 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -118,9 +118,33 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -144,23 +168,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - // See comment in do_entry_flush_fixups() RE order of patching - if (types & STF_BARRIER_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -325,7 +334,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) static int __do_entry_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -375,42 +384,13 @@ static int __do_entry_flush_fixups(void *data) start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); start = PTRRELOC(&__start___scv_entry_flush_fixup); end = PTRRELOC(&__stop___scv_entry_flush_fixup); - for (; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } - + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : From 3d1dbbca33a9c6dd3aafd4d14aaea9cc310723e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:41 +0100 Subject: [PATCH 3258/4122] powerpc/feature-fixups: Refactor other fixups patching Several fonctions have the same loop for patching instructions. Introduce function do_patch_fixups() to refactor those loops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/58ab36949c18f94d466fc98d6c085783b0cd474f.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 77 +++++++++++-------------------- 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 93b3f8ea38aa..25168a59d1ce 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -117,6 +117,24 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + #ifdef CONFIG_PPC_BOOK3S_64 static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, bool do_fallback, void *fallback) @@ -181,7 +199,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[6], *dest; + unsigned int instrs[6]; long *start, *end; int i; @@ -215,18 +233,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - patch_instruction(dest + 4, ppc_inst(instrs[4])); - patch_instruction(dest + 5, ppc_inst(instrs[5])); - } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : (types == STF_BARRIER_FALLBACK) ? "fallback" : @@ -283,7 +291,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) void do_uaccess_flush_fixups(enum l1d_flush_type types) { - unsigned int instrs[4], *dest; + unsigned int instrs[4]; long *start, *end; int i; @@ -309,17 +317,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -418,7 +416,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types) static int __do_rfi_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -442,15 +440,7 @@ static int __do_rfi_flush_fixups(void *data) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -492,7 +482,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr, *dest; + unsigned int instr; long *start, *end; int i; @@ -506,12 +496,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr)); - } + i = do_patch_fixups(start, end, &instr, 1); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } @@ -533,7 +518,7 @@ void do_barrier_nospec_fixups(bool enable) #ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr[2], *dest; + unsigned int instr[2]; long *start, *end; int i; @@ -549,13 +534,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr[1] = PPC_RAW_SYNC(); } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr[0])); - patch_instruction(dest + 1, ppc_inst(instr[1])); - } + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } From b988e7797d09379057cf991ae082f9ad7a309a63 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:42 +0100 Subject: [PATCH 3259/4122] powerpc/feature-fixups: Do not patch init section after init Once init section is freed, attempting to patch init code ends up in the weed. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") protected patch_instruction() against that, but it is the responsibility of the caller to ensure that the patched memory is valid. In the same spirit as jump_label with its jump_label_can_update() function, add is_fixup_addr_valid() function to skip patching on freed init section. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8e9311fc1b057e4e6a2a3a0701ebcc74b787affe.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 25168a59d1ce..80def1c2afcb 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -118,6 +118,12 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) { int i; @@ -126,6 +132,9 @@ static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num int j; unsigned int *dest = (void *)start + *start; + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + pr_devel("patching dest %lx\n", (unsigned long)dest); for (j = 0; j < num; j++) @@ -144,6 +153,9 @@ static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, for (i = 0; start < end; start++, i++) { unsigned int *dest = (void *)start + *start; + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + pr_devel("patching dest %lx\n", (unsigned long)dest); // See comment in do_entry_flush_fixups() RE order of patching From 6f3a81b60091031c2c14eb2373d1937b027deb46 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:43 +0100 Subject: [PATCH 3260/4122] powerpc/code-patching: Remove protection against patching init addresses after init Once init section is freed, attempting to patch init code ends up in the weed. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") protected patch_instruction() against that, but it is the responsibility of the caller to ensure that the patched memory is valid. All callers have now been verified and fixed so the check can be removed. This improves ftrace activation by about 2% on 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/504310828f473d424e2ed229eff57bf075f52796.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 2 -- arch/powerpc/lib/code-patching.c | 13 +------------ arch/powerpc/mm/mem.c | 1 - 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74..3f881548fb61 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,8 +22,6 @@ #define BRANCH_SET_LINK 0x1 #define BRANCH_ABSOLUTE 0x2 -DECLARE_STATIC_KEY_FALSE(init_mem_is_free); - /* * Powerpc branch instruction is : * diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index a6a5047f8ba2..73ce4b90bb1b 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -349,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { int err; unsigned long flags; @@ -372,17 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } - -__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); - -int patch_instruction(u32 *addr, ppc_inst_t instr) -{ - /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) - return 0; - - return do_patch_instruction(addr, instr); -} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 84d171953ba4..8b121df7b08f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -344,7 +344,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - static_branch_enable(&init_mem_is_free); free_initmem_default(POISON_FREE_INITMEM); ftrace_free_init_tramp(); } From 25483dedd2f5d9bc6928cd790ee59772fb880a79 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 2 Dec 2022 17:11:44 +0530 Subject: [PATCH 3261/4122] dmaengine: Revert "dmaengine: remove s3c24xx driver" This reverts cccc46ae3623 ("dmaengine: remove s3c24xx driver") as it causes regression due to missing header Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 12 + drivers/dma/Makefile | 1 + drivers/dma/s3c24xx-dma.c | 1428 +++++++++++++++++++++ include/linux/platform_data/dma-s3c24xx.h | 48 + 4 files changed, 1489 insertions(+) create mode 100644 drivers/dma/s3c24xx-dma.c create mode 100644 include/linux/platform_data/dma-s3c24xx.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index b9d54f20812f..80848c6b5cd5 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -610,6 +610,18 @@ config SPRD_DMA help Enable support for the on-chip DMA controller on Spreadtrum platform. +config S3C24XX_DMAC + bool "Samsung S3C24XX DMA support" + depends on ARCH_S3C24XX || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the Samsung S3C24XX DMA controller driver. The + DMA controller is having multiple DMA channels which can be + configured for different peripherals like audio, UART, SPI. + The DMA controller can transfer data from memory to peripheral, + periphal to memory, periphal to periphal and memory to memory. + config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a4fd1ce29510..5b55ada052a7 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o obj-$(CONFIG_SPRD_DMA) += sprd-dma.o +obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c new file mode 100644 index 000000000000..a09eeb545f7d --- /dev/null +++ b/drivers/dma/s3c24xx-dma.c @@ -0,0 +1,1428 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * S3C24XX DMA handling + * + * Copyright (c) 2013 Heiko Stuebner + * + * based on amba-pl08x.c + * + * Copyright (c) 2006 ARM Ltd. + * Copyright (c) 2010 ST-Ericsson SA + * + * Author: Peter Pearse + * Author: Linus Walleij + * + * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals + * that can be routed to any of the 4 to 8 hardware-channels. + * + * Therefore on these DMA controllers the number of channels + * and the number of incoming DMA signals are two totally different things. + * It is usually not possible to theoretically handle all physical signals, + * so a multiplexing scheme with possible denial of use is necessary. + * + * Open items: + * - bursts + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define MAX_DMA_CHANNELS 8 + +#define S3C24XX_DISRC 0x00 +#define S3C24XX_DISRCC 0x04 +#define S3C24XX_DISRCC_INC_INCREMENT 0 +#define S3C24XX_DISRCC_INC_FIXED BIT(0) +#define S3C24XX_DISRCC_LOC_AHB 0 +#define S3C24XX_DISRCC_LOC_APB BIT(1) + +#define S3C24XX_DIDST 0x08 +#define S3C24XX_DIDSTC 0x0c +#define S3C24XX_DIDSTC_INC_INCREMENT 0 +#define S3C24XX_DIDSTC_INC_FIXED BIT(0) +#define S3C24XX_DIDSTC_LOC_AHB 0 +#define S3C24XX_DIDSTC_LOC_APB BIT(1) +#define S3C24XX_DIDSTC_INT_TC0 0 +#define S3C24XX_DIDSTC_INT_RELOAD BIT(2) + +#define S3C24XX_DCON 0x10 + +#define S3C24XX_DCON_TC_MASK 0xfffff +#define S3C24XX_DCON_DSZ_BYTE (0 << 20) +#define S3C24XX_DCON_DSZ_HALFWORD (1 << 20) +#define S3C24XX_DCON_DSZ_WORD (2 << 20) +#define S3C24XX_DCON_DSZ_MASK (3 << 20) +#define S3C24XX_DCON_DSZ_SHIFT 20 +#define S3C24XX_DCON_AUTORELOAD 0 +#define S3C24XX_DCON_NORELOAD BIT(22) +#define S3C24XX_DCON_HWTRIG BIT(23) +#define S3C24XX_DCON_HWSRC_SHIFT 24 +#define S3C24XX_DCON_SERV_SINGLE 0 +#define S3C24XX_DCON_SERV_WHOLE BIT(27) +#define S3C24XX_DCON_TSZ_UNIT 0 +#define S3C24XX_DCON_TSZ_BURST4 BIT(28) +#define S3C24XX_DCON_INT BIT(29) +#define S3C24XX_DCON_SYNC_PCLK 0 +#define S3C24XX_DCON_SYNC_HCLK BIT(30) +#define S3C24XX_DCON_DEMAND 0 +#define S3C24XX_DCON_HANDSHAKE BIT(31) + +#define S3C24XX_DSTAT 0x14 +#define S3C24XX_DSTAT_STAT_BUSY BIT(20) +#define S3C24XX_DSTAT_CURRTC_MASK 0xfffff + +#define S3C24XX_DMASKTRIG 0x20 +#define S3C24XX_DMASKTRIG_SWTRIG BIT(0) +#define S3C24XX_DMASKTRIG_ON BIT(1) +#define S3C24XX_DMASKTRIG_STOP BIT(2) + +#define S3C24XX_DMAREQSEL 0x24 +#define S3C24XX_DMAREQSEL_HW BIT(0) + +/* + * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel + * for a DMA source. Instead only specific channels are valid. + * All of these SoCs have 4 physical channels and the number of request + * source bits is 3. Additionally we also need 1 bit to mark the channel + * as valid. + * Therefore we separate the chansel element of the channel data into 4 + * parts of 4 bits each, to hold the information if the channel is valid + * and the hw request source to use. + * + * Example: + * SDI is valid on channels 0, 2 and 3 - with varying hw request sources. + * For it the chansel field would look like + * + * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1 + * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2 + * ((BIT(3) | 2) << 0 * 4) // channel 0, with request source 2 + */ +#define S3C24XX_CHANSEL_WIDTH 4 +#define S3C24XX_CHANSEL_VALID BIT(3) +#define S3C24XX_CHANSEL_REQ_MASK 7 + +/* + * struct soc_data - vendor-specific config parameters for individual SoCs + * @stride: spacing between the registers of each channel + * @has_reqsel: does the controller use the newer requestselection mechanism + * @has_clocks: are controllable dma-clocks present + */ +struct soc_data { + int stride; + bool has_reqsel; + bool has_clocks; +}; + +/* + * enum s3c24xx_dma_chan_state - holds the virtual channel states + * @S3C24XX_DMA_CHAN_IDLE: the channel is idle + * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport + * channel and is running a transfer on it + * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport + * channel to become available (only pertains to memcpy channels) + */ +enum s3c24xx_dma_chan_state { + S3C24XX_DMA_CHAN_IDLE, + S3C24XX_DMA_CHAN_RUNNING, + S3C24XX_DMA_CHAN_WAITING, +}; + +/* + * struct s3c24xx_sg - structure containing data per sg + * @src_addr: src address of sg + * @dst_addr: dst address of sg + * @len: transfer len in bytes + * @node: node for txd's dsg_list + */ +struct s3c24xx_sg { + dma_addr_t src_addr; + dma_addr_t dst_addr; + size_t len; + struct list_head node; +}; + +/* + * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor + * @vd: virtual DMA descriptor + * @dsg_list: list of children sg's + * @at: sg currently being transfered + * @width: transfer width + * @disrcc: value for source control register + * @didstc: value for destination control register + * @dcon: base value for dcon register + * @cyclic: indicate cyclic transfer + */ +struct s3c24xx_txd { + struct virt_dma_desc vd; + struct list_head dsg_list; + struct list_head *at; + u8 width; + u32 disrcc; + u32 didstc; + u32 dcon; + bool cyclic; +}; + +struct s3c24xx_dma_chan; + +/* + * struct s3c24xx_dma_phy - holder for the physical channels + * @id: physical index to this channel + * @valid: does the channel have all required elements + * @base: virtual memory base (remapped) for the this channel + * @irq: interrupt for this channel + * @clk: clock for this channel + * @lock: a lock to use when altering an instance of this struct + * @serving: virtual channel currently being served by this physicalchannel + * @host: a pointer to the host (internal use) + */ +struct s3c24xx_dma_phy { + unsigned int id; + bool valid; + void __iomem *base; + int irq; + struct clk *clk; + spinlock_t lock; + struct s3c24xx_dma_chan *serving; + struct s3c24xx_dma_engine *host; +}; + +/* + * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel + * @id: the id of the channel + * @name: name of the channel + * @vc: wrapped virtual channel + * @phy: the physical channel utilized by this channel, if there is one + * @runtime_addr: address for RX/TX according to the runtime config + * @at: active transaction on this channel + * @lock: a lock for this channel data + * @host: a pointer to the host (internal use) + * @state: whether the channel is idle, running etc + * @slave: whether this channel is a device (slave) or for memcpy + */ +struct s3c24xx_dma_chan { + int id; + const char *name; + struct virt_dma_chan vc; + struct s3c24xx_dma_phy *phy; + struct dma_slave_config cfg; + struct s3c24xx_txd *at; + struct s3c24xx_dma_engine *host; + enum s3c24xx_dma_chan_state state; + bool slave; +}; + +/* + * struct s3c24xx_dma_engine - the local state holder for the S3C24XX + * @pdev: the corresponding platform device + * @pdata: platform data passed in from the platform/machine + * @base: virtual memory base (remapped) + * @slave: slave engine for this instance + * @memcpy: memcpy engine for this instance + * @phy_chans: array of data for the physical channels + */ +struct s3c24xx_dma_engine { + struct platform_device *pdev; + const struct s3c24xx_dma_platdata *pdata; + struct soc_data *sdata; + void __iomem *base; + struct dma_device slave; + struct dma_device memcpy; + struct s3c24xx_dma_phy *phy_chans; +}; + +/* + * Physical channel handling + */ + +/* + * Check whether a certain channel is busy or not. + */ +static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy) +{ + unsigned int val = readl(phy->base + S3C24XX_DSTAT); + return val & S3C24XX_DSTAT_STAT_BUSY; +} + +static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan, + struct s3c24xx_dma_phy *phy) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + int phyvalid; + + /* every phy is valid for memcopy channels */ + if (!s3cchan->slave) + return true; + + /* On newer variants all phys can be used for all virtual channels */ + if (s3cdma->sdata->has_reqsel) + return true; + + phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH)); + return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false; +} + +/* + * Allocate a physical channel for a virtual channel + * + * Try to locate a physical channel to be used for this transfer. If all + * are taken return NULL and the requester will have to cope by using + * some fallback PIO mode or retrying later. + */ +static +struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy = NULL; + unsigned long flags; + int i; + int ret; + + for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { + phy = &s3cdma->phy_chans[i]; + + if (!phy->valid) + continue; + + if (!s3c24xx_dma_phy_valid(s3cchan, phy)) + continue; + + spin_lock_irqsave(&phy->lock, flags); + + if (!phy->serving) { + phy->serving = s3cchan; + spin_unlock_irqrestore(&phy->lock, flags); + break; + } + + spin_unlock_irqrestore(&phy->lock, flags); + } + + /* No physical channel available, cope with it */ + if (i == s3cdma->pdata->num_phy_channels) { + dev_warn(&s3cdma->pdev->dev, "no phy channel available\n"); + return NULL; + } + + /* start the phy clock */ + if (s3cdma->sdata->has_clocks) { + ret = clk_enable(phy->clk); + if (ret) { + dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n", + phy->id, ret); + phy->serving = NULL; + return NULL; + } + } + + return phy; +} + +/* + * Mark the physical channel as free. + * + * This drops the link between the physical and virtual channel. + */ +static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy) +{ + struct s3c24xx_dma_engine *s3cdma = phy->host; + + if (s3cdma->sdata->has_clocks) + clk_disable(phy->clk); + + phy->serving = NULL; +} + +/* + * Stops the channel by writing the stop bit. + * This should not be used for an on-going transfer, but as a method of + * shutting down a channel (eg, when it's no longer used) or terminating a + * transfer. + */ +static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy) +{ + writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG); +} + +/* + * Virtual channel handling + */ + +static inline +struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct s3c24xx_dma_chan, vc.chan); +} + +static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_phy *phy = s3cchan->phy; + struct s3c24xx_txd *txd = s3cchan->at; + u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK; + + return tc * txd->width; +} + +static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + unsigned long flags; + int ret = 0; + + /* Reject definitely invalid configurations */ + if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + + if (!s3cchan->slave) { + ret = -EINVAL; + goto out; + } + + s3cchan->cfg = *config; + +out: + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + return ret; +} + +/* + * Transfer handling + */ + +static inline +struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct s3c24xx_txd, vd.tx); +} + +static struct s3c24xx_txd *s3c24xx_dma_get_txd(void) +{ + struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + + if (txd) { + INIT_LIST_HEAD(&txd->dsg_list); + txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD; + } + + return txd; +} + +static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd) +{ + struct s3c24xx_sg *dsg, *_dsg; + + list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { + list_del(&dsg->node); + kfree(dsg); + } + + kfree(txd); +} + +static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan, + struct s3c24xx_txd *txd) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy = s3cchan->phy; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node); + u32 dcon = txd->dcon; + u32 val; + + /* transfer-size and -count from len and width */ + switch (txd->width) { + case 1: + dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len; + break; + case 2: + dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2); + break; + case 4: + dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4); + break; + } + + if (s3cchan->slave) { + struct s3c24xx_dma_channel *cdata = + &pdata->channels[s3cchan->id]; + + if (s3cdma->sdata->has_reqsel) { + writel_relaxed((cdata->chansel << 1) | + S3C24XX_DMAREQSEL_HW, + phy->base + S3C24XX_DMAREQSEL); + } else { + int csel = cdata->chansel >> (phy->id * + S3C24XX_CHANSEL_WIDTH); + + csel &= S3C24XX_CHANSEL_REQ_MASK; + dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT; + dcon |= S3C24XX_DCON_HWTRIG; + } + } else { + if (s3cdma->sdata->has_reqsel) + writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL); + } + + writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC); + writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC); + writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST); + writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC); + writel_relaxed(dcon, phy->base + S3C24XX_DCON); + + val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG); + val &= ~S3C24XX_DMASKTRIG_STOP; + val |= S3C24XX_DMASKTRIG_ON; + + /* trigger the dma operation for memcpy transfers */ + if (!s3cchan->slave) + val |= S3C24XX_DMASKTRIG_SWTRIG; + + writel(val, phy->base + S3C24XX_DMASKTRIG); +} + +/* + * Set the initial DMA register values and start first sg. + */ +static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_phy *phy = s3cchan->phy; + struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc); + struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); + + list_del(&txd->vd.node); + + s3cchan->at = txd; + + /* Wait for channel inactive */ + while (s3c24xx_dma_phy_busy(phy)) + cpu_relax(); + + /* point to the first element of the sg list */ + txd->at = txd->dsg_list.next; + s3c24xx_dma_start_next_sg(s3cchan, txd); +} + +/* + * Try to allocate a physical channel. When successful, assign it to + * this virtual channel, and initiate the next descriptor. The + * virtual channel lock must be held at this point. + */ +static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy; + + phy = s3c24xx_dma_get_phy(s3cchan); + if (!phy) { + dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n", + s3cchan->name); + s3cchan->state = S3C24XX_DMA_CHAN_WAITING; + return; + } + + dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n", + phy->id, s3cchan->name); + + s3cchan->phy = phy; + s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; + + s3c24xx_dma_start_next_txd(s3cchan); +} + +static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy, + struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + + dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n", + phy->id, s3cchan->name); + + /* + * We do this without taking the lock; we're really only concerned + * about whether this pointer is NULL or not, and we're guaranteed + * that this will only be called when it _already_ is non-NULL. + */ + phy->serving = s3cchan; + s3cchan->phy = phy; + s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; + s3c24xx_dma_start_next_txd(s3cchan); +} + +/* + * Free a physical DMA channel, potentially reallocating it to another + * virtual channel if we have any pending. + */ +static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_chan *p, *next; + +retry: + next = NULL; + + /* Find a waiting virtual channel for the next transfer. */ + list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node) + if (p->state == S3C24XX_DMA_CHAN_WAITING) { + next = p; + break; + } + + if (!next) { + list_for_each_entry(p, &s3cdma->slave.channels, + vc.chan.device_node) + if (p->state == S3C24XX_DMA_CHAN_WAITING && + s3c24xx_dma_phy_valid(p, s3cchan->phy)) { + next = p; + break; + } + } + + /* Ensure that the physical channel is stopped */ + s3c24xx_dma_terminate_phy(s3cchan->phy); + + if (next) { + bool success; + + /* + * Eww. We know this isn't going to deadlock + * but lockdep probably doesn't. + */ + spin_lock(&next->vc.lock); + /* Re-check the state now that we have the lock */ + success = next->state == S3C24XX_DMA_CHAN_WAITING; + if (success) + s3c24xx_dma_phy_reassign_start(s3cchan->phy, next); + spin_unlock(&next->vc.lock); + + /* If the state changed, try to find another channel */ + if (!success) + goto retry; + } else { + /* No more jobs, so free up the physical channel */ + s3c24xx_dma_put_phy(s3cchan->phy); + } + + s3cchan->phy = NULL; + s3cchan->state = S3C24XX_DMA_CHAN_IDLE; +} + +static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd) +{ + struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan); + + if (!s3cchan->slave) + dma_descriptor_unmap(&vd->tx); + + s3c24xx_dma_free_txd(txd); +} + +static irqreturn_t s3c24xx_dma_irq(int irq, void *data) +{ + struct s3c24xx_dma_phy *phy = data; + struct s3c24xx_dma_chan *s3cchan = phy->serving; + struct s3c24xx_txd *txd; + + dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id); + + /* + * Interrupts happen to notify the completion of a transfer and the + * channel should have moved into its stop state already on its own. + * Therefore interrupts on channels not bound to a virtual channel + * should never happen. Nevertheless send a terminate command to the + * channel if the unlikely case happens. + */ + if (unlikely(!s3cchan)) { + dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n", + phy->id); + + s3c24xx_dma_terminate_phy(phy); + + return IRQ_HANDLED; + } + + spin_lock(&s3cchan->vc.lock); + txd = s3cchan->at; + if (txd) { + /* when more sg's are in this txd, start the next one */ + if (!list_is_last(txd->at, &txd->dsg_list)) { + txd->at = txd->at->next; + if (txd->cyclic) + vchan_cyclic_callback(&txd->vd); + s3c24xx_dma_start_next_sg(s3cchan, txd); + } else if (!txd->cyclic) { + s3cchan->at = NULL; + vchan_cookie_complete(&txd->vd); + + /* + * And start the next descriptor (if any), + * otherwise free this channel. + */ + if (vchan_next_desc(&s3cchan->vc)) + s3c24xx_dma_start_next_txd(s3cchan); + else + s3c24xx_dma_phy_free(s3cchan); + } else { + vchan_cyclic_callback(&txd->vd); + + /* Cyclic: reset at beginning */ + txd->at = txd->dsg_list.next; + s3c24xx_dma_start_next_sg(s3cchan, txd); + } + } + spin_unlock(&s3cchan->vc.lock); + + return IRQ_HANDLED; +} + +/* + * The DMA ENGINE API + */ + +static int s3c24xx_dma_terminate_all(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + LIST_HEAD(head); + unsigned long flags; + int ret; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + + if (!s3cchan->phy && !s3cchan->at) { + dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n", + s3cchan->id); + ret = -EINVAL; + goto unlock; + } + + s3cchan->state = S3C24XX_DMA_CHAN_IDLE; + + /* Mark physical channel as free */ + if (s3cchan->phy) + s3c24xx_dma_phy_free(s3cchan); + + /* Dequeue current job */ + if (s3cchan->at) { + vchan_terminate_vdesc(&s3cchan->at->vd); + s3cchan->at = NULL; + } + + /* Dequeue jobs not yet fired as well */ + + vchan_get_all_descriptors(&s3cchan->vc, &head); + + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + vchan_dma_desc_free_list(&s3cchan->vc, &head); + + return 0; + +unlock: + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + return ret; +} + +static void s3c24xx_dma_synchronize(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + + vchan_synchronize(&s3cchan->vc); +} + +static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan) +{ + /* Ensure all queued descriptors are freed */ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + ret = dma_cookie_status(chan, cookie, txstate); + + /* + * There's no point calculating the residue if there's + * no txstate to store the value. + */ + if (ret == DMA_COMPLETE || !txstate) { + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + return ret; + } + + vd = vchan_find_desc(&s3cchan->vc, cookie); + if (vd) { + /* On the issued list, so hasn't been processed yet */ + txd = to_s3c24xx_txd(&vd->tx); + + list_for_each_entry(dsg, &txd->dsg_list, node) + bytes += dsg->len; + } else { + /* + * Currently running, so sum over the pending sg's and + * the currently active one. + */ + txd = s3cchan->at; + + dsg = list_entry(txd->at, struct s3c24xx_sg, node); + list_for_each_entry_from(dsg, &txd->dsg_list, node) + bytes += dsg->len; + + bytes += s3c24xx_dma_getbytes_chan(s3cchan); + } + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + /* + * This cookie not complete yet + * Get number of bytes left in the active transactions and queue + */ + dma_set_residue(txstate, bytes); + + /* Whether waiting or running, we're in progress */ + return ret; +} + +/* + * Initialize a descriptor to be used by memcpy submit + */ +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + int src_mod, dest_mod; + + dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n", + len, s3cchan->name); + + if ((len & S3C24XX_DCON_TC_MASK) != len) { + dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len); + return NULL; + } + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->src_addr = src; + dsg->dst_addr = dest; + dsg->len = len; + + /* + * Determine a suitable transfer width. + * The DMA controller cannot fetch/store information which is not + * naturally aligned on the bus, i.e., a 4 byte fetch must start at + * an address divisible by 4 - more generally addr % width must be 0. + */ + src_mod = src % 4; + dest_mod = dest % 4; + switch (len % 4) { + case 0: + txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1; + break; + case 2: + txd->width = ((src_mod == 2 || src_mod == 0) && + (dest_mod == 2 || dest_mod == 0)) ? 2 : 1; + break; + default: + txd->width = 1; + break; + } + + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT; + txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK | + S3C24XX_DCON_SERV_WHOLE; + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, + enum dma_transfer_direction direction, unsigned long flags) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + unsigned sg_len; + dma_addr_t slave_addr; + u32 hwcfg = 0; + int i; + + dev_dbg(&s3cdma->pdev->dev, + "prepare cyclic transaction of %zu bytes with period %zu from %s\n", + size, period, s3cchan->name); + + if (!is_slave_direction(direction)) { + dev_err(&s3cdma->pdev->dev, + "direction %d unsupported\n", direction); + return NULL; + } + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + txd->cyclic = 1; + + if (cdata->handshake) + txd->dcon |= S3C24XX_DCON_HANDSHAKE; + + switch (cdata->bus) { + case S3C24XX_DMA_APB: + txd->dcon |= S3C24XX_DCON_SYNC_PCLK; + hwcfg |= S3C24XX_DISRCC_LOC_APB; + break; + case S3C24XX_DMA_AHB: + txd->dcon |= S3C24XX_DCON_SYNC_HCLK; + hwcfg |= S3C24XX_DISRCC_LOC_AHB; + break; + } + + /* + * Always assume our peripheral desintation is a fixed + * address in memory. + */ + hwcfg |= S3C24XX_DISRCC_INC_FIXED; + + /* + * Individual dma operations are requested by the slave, + * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). + */ + txd->dcon |= S3C24XX_DCON_SERV_SINGLE; + + if (direction == DMA_MEM_TO_DEV) { + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | + S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = hwcfg; + slave_addr = s3cchan->cfg.dst_addr; + txd->width = s3cchan->cfg.dst_addr_width; + } else { + txd->disrcc = hwcfg; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | + S3C24XX_DIDSTC_INC_INCREMENT; + slave_addr = s3cchan->cfg.src_addr; + txd->width = s3cchan->cfg.src_addr_width; + } + + sg_len = size / period; + + for (i = 0; i < sg_len; i++) { + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = period; + /* Check last period length */ + if (i == sg_len - 1) + dsg->len = size - period * i; + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = addr + period * i; + dsg->dst_addr = slave_addr; + } else { /* DMA_DEV_TO_MEM */ + dsg->src_addr = slave_addr; + dsg->dst_addr = addr + period * i; + } + } + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + struct scatterlist *sg; + dma_addr_t slave_addr; + u32 hwcfg = 0; + int tmp; + + dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n", + sg_dma_len(sgl), s3cchan->name); + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + if (cdata->handshake) + txd->dcon |= S3C24XX_DCON_HANDSHAKE; + + switch (cdata->bus) { + case S3C24XX_DMA_APB: + txd->dcon |= S3C24XX_DCON_SYNC_PCLK; + hwcfg |= S3C24XX_DISRCC_LOC_APB; + break; + case S3C24XX_DMA_AHB: + txd->dcon |= S3C24XX_DCON_SYNC_HCLK; + hwcfg |= S3C24XX_DISRCC_LOC_AHB; + break; + } + + /* + * Always assume our peripheral desintation is a fixed + * address in memory. + */ + hwcfg |= S3C24XX_DISRCC_INC_FIXED; + + /* + * Individual dma operations are requested by the slave, + * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). + */ + txd->dcon |= S3C24XX_DCON_SERV_SINGLE; + + if (direction == DMA_MEM_TO_DEV) { + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | + S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = hwcfg; + slave_addr = s3cchan->cfg.dst_addr; + txd->width = s3cchan->cfg.dst_addr_width; + } else if (direction == DMA_DEV_TO_MEM) { + txd->disrcc = hwcfg; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | + S3C24XX_DIDSTC_INC_INCREMENT; + slave_addr = s3cchan->cfg.src_addr; + txd->width = s3cchan->cfg.src_addr_width; + } else { + s3c24xx_dma_free_txd(txd); + dev_err(&s3cdma->pdev->dev, + "direction %d unsupported\n", direction); + return NULL; + } + + for_each_sg(sgl, sg, sg_len, tmp) { + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = sg_dma_len(sg); + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = sg_dma_address(sg); + dsg->dst_addr = slave_addr; + } else { /* DMA_DEV_TO_MEM */ + dsg->src_addr = slave_addr; + dsg->dst_addr = sg_dma_address(sg); + } + } + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +/* + * Slave transactions callback to the slave device to allow + * synchronization of slave DMA signals with the DMAC enable + */ +static void s3c24xx_dma_issue_pending(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + if (vchan_issue_pending(&s3cchan->vc)) { + if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING) + s3c24xx_dma_phy_alloc_and_start(s3cchan); + } + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); +} + +/* + * Bringup and teardown + */ + +/* + * Initialise the DMAC memcpy/slave channels. + * Make a local wrapper to hold required data + */ +static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma, + struct dma_device *dmadev, unsigned int channels, bool slave) +{ + struct s3c24xx_dma_chan *chan; + int i; + + INIT_LIST_HEAD(&dmadev->channels); + + /* + * Register as many memcpy as we have physical channels, + * we won't always be able to use all but the code will have + * to cope with that situation. + */ + for (i = 0; i < channels; i++) { + chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->id = i; + chan->host = s3cdma; + chan->state = S3C24XX_DMA_CHAN_IDLE; + + if (slave) { + chan->slave = true; + chan->name = kasprintf(GFP_KERNEL, "slave%d", i); + if (!chan->name) + return -ENOMEM; + } else { + chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); + if (!chan->name) + return -ENOMEM; + } + dev_dbg(dmadev->dev, + "initialize virtual channel \"%s\"\n", + chan->name); + + chan->vc.desc_free = s3c24xx_dma_desc_free; + vchan_init(&chan->vc, dmadev); + } + dev_info(dmadev->dev, "initialized %d virtual %s channels\n", + i, slave ? "slave" : "memcpy"); + return i; +} + +static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev) +{ + struct s3c24xx_dma_chan *chan = NULL; + struct s3c24xx_dma_chan *next; + + list_for_each_entry_safe(chan, + next, &dmadev->channels, vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } +} + +/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */ +static struct soc_data soc_s3c2410 = { + .stride = 0x40, + .has_reqsel = false, + .has_clocks = false, +}; + +/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */ +static struct soc_data soc_s3c2412 = { + .stride = 0x40, + .has_reqsel = true, + .has_clocks = true, +}; + +/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */ +static struct soc_data soc_s3c2443 = { + .stride = 0x100, + .has_reqsel = true, + .has_clocks = true, +}; + +static const struct platform_device_id s3c24xx_dma_driver_ids[] = { + { + .name = "s3c2410-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2410, + }, { + .name = "s3c2412-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2412, + }, { + .name = "s3c2443-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2443, + }, + { }, +}; + +static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev) +{ + return (struct soc_data *) + platform_get_device_id(pdev)->driver_data; +} + +static int s3c24xx_dma_probe(struct platform_device *pdev) +{ + const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); + struct s3c24xx_dma_engine *s3cdma; + struct soc_data *sdata; + struct resource *res; + int ret; + int i; + + if (!pdata) { + dev_err(&pdev->dev, "platform data missing\n"); + return -ENODEV; + } + + /* Basic sanity check */ + if (pdata->num_phy_channels > MAX_DMA_CHANNELS) { + dev_err(&pdev->dev, "too many dma channels %d, max %d\n", + pdata->num_phy_channels, MAX_DMA_CHANNELS); + return -EINVAL; + } + + sdata = s3c24xx_dma_get_soc_data(pdev); + if (!sdata) + return -EINVAL; + + s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL); + if (!s3cdma) + return -ENOMEM; + + s3cdma->pdev = pdev; + s3cdma->pdata = pdata; + s3cdma->sdata = sdata; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + s3cdma->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(s3cdma->base)) + return PTR_ERR(s3cdma->base); + + s3cdma->phy_chans = devm_kcalloc(&pdev->dev, + pdata->num_phy_channels, + sizeof(struct s3c24xx_dma_phy), + GFP_KERNEL); + if (!s3cdma->phy_chans) + return -ENOMEM; + + /* acquire irqs and clocks for all physical channels */ + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + char clk_name[6]; + + phy->id = i; + phy->base = s3cdma->base + (i * sdata->stride); + phy->host = s3cdma; + + phy->irq = platform_get_irq(pdev, i); + if (phy->irq < 0) + continue; + + ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq, + 0, pdev->name, phy); + if (ret) { + dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n", + i, ret); + continue; + } + + if (sdata->has_clocks) { + sprintf(clk_name, "dma.%d", i); + phy->clk = devm_clk_get(&pdev->dev, clk_name); + if (IS_ERR(phy->clk) && sdata->has_clocks) { + dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n", + i, PTR_ERR(phy->clk)); + continue; + } + + ret = clk_prepare(phy->clk); + if (ret) { + dev_err(&pdev->dev, "clock for phy %d failed, error %d\n", + i, ret); + continue; + } + } + + spin_lock_init(&phy->lock); + phy->valid = true; + + dev_dbg(&pdev->dev, "physical channel %d is %s\n", + i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE"); + } + + /* Initialize memcpy engine */ + dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask); + dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask); + s3cdma->memcpy.dev = &pdev->dev; + s3cdma->memcpy.device_free_chan_resources = + s3c24xx_dma_free_chan_resources; + s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy; + s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status; + s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending; + s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config; + s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all; + s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize; + + /* Initialize slave engine for SoC internal dedicated peripherals */ + dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask); + s3cdma->slave.dev = &pdev->dev; + s3cdma->slave.device_free_chan_resources = + s3c24xx_dma_free_chan_resources; + s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status; + s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending; + s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg; + s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic; + s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config; + s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all; + s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize; + s3cdma->slave.filter.map = pdata->slave_map; + s3cdma->slave.filter.mapcnt = pdata->slavecnt; + s3cdma->slave.filter.fn = s3c24xx_dma_filter; + + /* Register as many memcpy channels as there are physical channels */ + ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy, + pdata->num_phy_channels, false); + if (ret <= 0) { + dev_warn(&pdev->dev, + "%s failed to enumerate memcpy channels - %d\n", + __func__, ret); + goto err_memcpy; + } + + /* Register slave channels */ + ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave, + pdata->num_channels, true); + if (ret <= 0) { + dev_warn(&pdev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto err_slave; + } + + ret = dma_async_device_register(&s3cdma->memcpy); + if (ret) { + dev_warn(&pdev->dev, + "%s failed to register memcpy as an async device - %d\n", + __func__, ret); + goto err_memcpy_reg; + } + + ret = dma_async_device_register(&s3cdma->slave); + if (ret) { + dev_warn(&pdev->dev, + "%s failed to register slave as an async device - %d\n", + __func__, ret); + goto err_slave_reg; + } + + platform_set_drvdata(pdev, s3cdma); + dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n", + pdata->num_phy_channels); + + return 0; + +err_slave_reg: + dma_async_device_unregister(&s3cdma->memcpy); +err_memcpy_reg: + s3c24xx_dma_free_virtual_channels(&s3cdma->slave); +err_slave: + s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); +err_memcpy: + if (sdata->has_clocks) + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + if (phy->valid) + clk_unprepare(phy->clk); + } + + return ret; +} + +static void s3c24xx_dma_free_irq(struct platform_device *pdev, + struct s3c24xx_dma_engine *s3cdma) +{ + int i; + + for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + + devm_free_irq(&pdev->dev, phy->irq, phy); + } +} + +static int s3c24xx_dma_remove(struct platform_device *pdev) +{ + const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); + struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev); + struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev); + int i; + + dma_async_device_unregister(&s3cdma->slave); + dma_async_device_unregister(&s3cdma->memcpy); + + s3c24xx_dma_free_irq(pdev, s3cdma); + + s3c24xx_dma_free_virtual_channels(&s3cdma->slave); + s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); + + if (sdata->has_clocks) + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + if (phy->valid) + clk_unprepare(phy->clk); + } + + return 0; +} + +static struct platform_driver s3c24xx_dma_driver = { + .driver = { + .name = "s3c24xx-dma", + }, + .id_table = s3c24xx_dma_driver_ids, + .probe = s3c24xx_dma_probe, + .remove = s3c24xx_dma_remove, +}; + +module_platform_driver(s3c24xx_dma_driver); + +bool s3c24xx_dma_filter(struct dma_chan *chan, void *param) +{ + struct s3c24xx_dma_chan *s3cchan; + + if (chan->device->dev->driver != &s3c24xx_dma_driver.driver) + return false; + + s3cchan = to_s3c24xx_dma_chan(chan); + + return s3cchan->id == (uintptr_t)param; +} +EXPORT_SYMBOL(s3c24xx_dma_filter); + +MODULE_DESCRIPTION("S3C24XX DMA Driver"); +MODULE_AUTHOR("Heiko Stuebner"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h new file mode 100644 index 000000000000..96d02dbeea67 --- /dev/null +++ b/include/linux/platform_data/dma-s3c24xx.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * S3C24XX DMA handling + * + * Copyright (c) 2013 Heiko Stuebner + */ + +/* Helper to encode the source selection constraints for early s3c socs. */ +#define S3C24XX_DMA_CHANREQ(src, chan) ((BIT(3) | src) << chan * 4) + +enum s3c24xx_dma_bus { + S3C24XX_DMA_APB, + S3C24XX_DMA_AHB, +}; + +/** + * @bus: on which bus does the peripheral reside - AHB or APB. + * @handshake: is a handshake with the peripheral necessary + * @chansel: channel selection information, depending on variant; reqsel for + * s3c2443 and later and channel-selection map for earlier SoCs + * see CHANSEL doc in s3c2443-dma.c + */ +struct s3c24xx_dma_channel { + enum s3c24xx_dma_bus bus; + bool handshake; + u16 chansel; +}; + +struct dma_slave_map; + +/** + * struct s3c24xx_dma_platdata - platform specific settings + * @num_phy_channels: number of physical channels + * @channels: array of virtual channel descriptions + * @num_channels: number of virtual channels + * @slave_map: dma slave map matching table + * @slavecnt: number of elements in slave_map + */ +struct s3c24xx_dma_platdata { + int num_phy_channels; + struct s3c24xx_dma_channel *channels; + int num_channels; + const struct dma_slave_map *slave_map; + int slavecnt; +}; + +struct dma_chan; +bool s3c24xx_dma_filter(struct dma_chan *chan, void *param); From 35c23fba4eb4b3043b42acbdd3fbabdd8824f56f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 2 Nov 2022 23:57:50 +0100 Subject: [PATCH 3262/4122] gfs2: Add extra error check in alloc_dinode We have reserved the number of blocks we want to allocate, so the actual allocation isn't expected to fail. Nevertheless, make the code behave correctly even when things go wrong. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 04a201584fa7..41fa69c1be1b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -403,12 +403,15 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) goto out_ipreserv; error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); + if (error) + goto out_trans_end; + ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; +out_trans_end: gfs2_trans_end(sdp); - out_ipreserv: gfs2_inplace_release(ip); out_quota: From 761fdbbce96fb3d0569f50a77b1214dbc4b17c44 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Nov 2022 13:26:46 +0100 Subject: [PATCH 3263/4122] gfs2: Get rid of ghs[] in gfs2_create_inode In gfs2_create_inode, get rid of the ghs array in favor of two separate variables. This makes the code much less irritating. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 41fa69c1be1b..465f1673101f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -599,7 +599,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, { const struct qstr *name = &dentry->d_name; struct posix_acl *default_acl, *acl; - struct gfs2_holder ghs[2]; + struct gfs2_holder d_gh, gh; struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); @@ -620,10 +620,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); if (error) goto fail; - gfs2_holder_mark_uninitialized(ghs + 1); + gfs2_holder_mark_uninitialized(&gh); error = create_ok(dip, name, mode); if (error) @@ -645,7 +645,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, else error = finish_no_open(file, NULL); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); goto fail; } else if (error != -ENOENT) { goto fail_gunlock; @@ -734,7 +734,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (error) goto fail_gunlock3; @@ -788,9 +788,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, file->f_mode |= FMODE_CREATED; error = finish_open(file, dentry, gfs2_open_common); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); gfs2_qa_put(ip); - gfs2_glock_dq_uninit(ghs + 1); + gfs2_glock_dq_uninit(&gh); gfs2_glock_put(io_gl); gfs2_qa_put(dip); unlock_new_inode(inode); @@ -815,7 +815,7 @@ fail_free_acls: posix_acl_release(acl); fail_gunlock: gfs2_dir_no_add(&da); - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { clear_nlink(inode); if (!free_vfs_inode) @@ -827,8 +827,8 @@ fail_gunlock: else iput(inode); } - if (gfs2_holder_initialized(ghs + 1)) - gfs2_glock_dq_uninit(ghs + 1); + if (gfs2_holder_initialized(&gh)) + gfs2_glock_dq_uninit(&gh); fail: gfs2_qa_put(dip); return error; From 3d0258bc11185ccb21f922332eca731e1928c5a4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 2 Nov 2022 18:34:42 +0100 Subject: [PATCH 3264/4122] gfs2: Clean up initialization of "ip" in gfs2_create_inode Initialize variable "ip" earlier so that it can be used interchangeably with "inode" everywhere. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 465f1673101f..b91f15abe24e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -659,12 +659,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = -ENOMEM; if (!inode) goto fail_gunlock; + ip = GFS2_I(inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) goto fail_gunlock; - ip = GFS2_I(inode); error = gfs2_qa_get(ip); if (error) goto fail_free_acls; @@ -821,7 +821,7 @@ fail_gunlock: if (!free_vfs_inode) mark_inode_dirty(inode); set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, - &GFS2_I(inode)->i_flags); + &ip->i_flags); if (inode->i_state & I_NEW) iget_failed(inode); else From 38552ff676f072e7d15c5e0a877fda613e57ed2d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 2 Nov 2022 17:06:58 +0100 Subject: [PATCH 3265/4122] gfs2: Fix and clean up create / evict interaction When gfs2_create_inode() fails after creating a new inode, it uses the GIF_FREE_VFS_INODE and GIF_ALLOC_FAILED inode flags to communicate to gfs2_evict_inode() which parts of the inode need to be deallocated and destroyed. In some error cases, the inode ends up being allocated on disk and then accidentally left behind. In others, the inode is partially constructed and then not properly destroyed. Clean this up by completely handling the inode deallocation and destruction in gfs2_evict_inode(). This means that gfs2_evict_inode() may now be faced with partially constructed inodes, so add the necessary checks to cope with that. In particular, make sure that for incompletely constructed inodes, we're not accessing the buffers backing the on-disk blocks; the contents may be undefined. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 26 ++++++++++++-------------- fs/gfs2/meta_io.c | 6 ++++++ fs/gfs2/super.c | 35 +++++++++++++++++++++-------------- fs/gfs2/xattr.c | 26 +++++++++++++++----------- 4 files changed, 54 insertions(+), 39 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b91f15abe24e..c057f3bd475f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -409,6 +409,8 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; + if (*dblocks > 1) + ip->i_eattr = ip->i_no_addr + 1; out_trans_end: gfs2_trans_end(sdp); @@ -589,6 +591,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, * @size: The initial size of the inode (ignored for directories) * @excl: Force fail if inode exists * + * FIXME: Change to allocate the disk blocks and write them out in the same + * transaction. That way, we can no longer end up in a situation in which an + * inode is allocated, the node crashes, and the block looks like a valid + * inode. (With atomic creates in place, we will also no longer need to zero + * the link count and dirty the inode here on failure.) + * * Returns: 0 on success, or error code */ @@ -604,7 +612,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error, free_vfs_inode = 1; + int error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; @@ -742,10 +750,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock3; - if (blocks > 1) { - ip->i_eattr = ip->i_no_addr + 1; + if (blocks > 1) gfs2_init_xattr(ip); - } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -753,9 +759,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, glock_set_object(io_gl, ip); gfs2_set_iop(inode); - free_vfs_inode = 0; /* After this point, the inode is no longer - considered free. Any failures need to undo - the gfs2 structures. */ if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) @@ -804,10 +807,6 @@ fail_gunlock3: fail_gunlock2: gfs2_glock_put(io_gl); fail_free_inode: - if (ip->i_gl) { - if (free_vfs_inode) /* else evict will do the put for us */ - gfs2_glock_put(ip->i_gl); - } gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: @@ -817,11 +816,10 @@ fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { + set_bit(GIF_ALLOC_FAILED, &ip->i_flags); clear_nlink(inode); - if (!free_vfs_inode) + if (ip->i_no_addr) mark_inode_dirty(inode); - set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, - &ip->i_flags); if (inode->i_state & I_NEW) iget_failed(inode); else diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6ed728aae9a5..3c41b864ee5b 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; + if (!ip->i_gl) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { ty = REMOVE_META; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b018957a1bb2..eac9b0c34aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -475,6 +475,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; + if (unlikely(!ip->i_gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + if (unlikely(gfs2_withdrawn(sdp))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { @@ -927,8 +933,7 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && - inode->i_nlink && + if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (test_bit(GLF_DEMOTE, &gl->gl_flags)) @@ -1076,7 +1081,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; - truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); + if (unlikely(!gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); if (atomic_read(&gl->gl_revokes) == 0) { @@ -1218,10 +1229,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) { - BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl)); + if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; - } if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) return SHOULD_DEFER_EVICTION; @@ -1298,9 +1307,11 @@ static int evict_unlinked_inode(struct inode *inode) do, gfs2_create_inode can create another inode at the same block location and try to set gl_object again. We clear gl_object here so that subsequent inode creates don't see an old gl_object. */ - glock_clear_object(ip->i_gl, ip); + if (ip->i_gl) { + glock_clear_object(ip->i_gl, ip); + gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + } ret = gfs2_dinode_dealloc(ip); - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); out: return ret; } @@ -1367,12 +1378,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_holder gh; int ret; - if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { - clear_inode(inode); - return; - } - - if (inode->i_nlink || sb_rdonly(sb)) + if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; gfs2_holder_mark_uninitialized(&gh); @@ -1429,6 +1435,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL); if (!ip) return NULL; + ip->i_no_addr = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f6a66050380e..518c0677e12a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_trans_add_meta(ip->i_gl, dibh); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + } } gfs2_trans_end(sdp); @@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); - if (error) - goto out_quota; - - if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { - error = ea_dealloc_indirect(ip); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; + + if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { + error = ea_dealloc_indirect(ip); + if (error) + goto out_quota; + } } error = ea_dealloc_block(ip); From 4dc334cab1c34efb17fa6cd10b12fbc9458e5760 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 2 Dec 2022 05:54:01 -0800 Subject: [PATCH 3266/4122] i915/gvt: Move gvt mapping cache initialization to intel_vgpu_init_dev() vfio container registers .dma_unmap() callback after the device is opened. So it's fine for mdev drivers to initialize internal mapping cache in .open_device(). See vfio_device_container_register(). Now with iommufd an access ops with an unmap callback is registered when the device is bound to iommufd which is before .open_device() is called. This implies gvt's .dma_unmap() could be called before its internal mapping cache is initialized. The fix is moving gvt mapping cache initialization to vGPU init. While at it also move ptable initialization together. Link: https://lore.kernel.org/r/20221202135402.756470-2-yi.l.liu@intel.com Reviewed-by: Zhi Wang Reviewed-by: Zhenyu Wang Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/i915/gvt/kvmgt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..aaf0d9e8da95 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -671,9 +671,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->attached = true; - kvmgt_protect_table_init(vgpu); - gvt_cache_init(vgpu); - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(vgpu->vfio_device.kvm, @@ -718,6 +715,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); + WARN_ON(vgpu->nr_cache_entries); + + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; @@ -1451,9 +1453,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct intel_vgpu_type *type = container_of(mdev->type, struct intel_vgpu_type, type); + int ret; vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt; - return intel_gvt_create_vgpu(vgpu, type->conf); + ret = intel_gvt_create_vgpu(vgpu, type->conf); + if (ret) + return ret; + + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); + + return 0; } static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) From 2a54e347d990574ceb047b71ea0b03979232b85e Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 2 Dec 2022 05:54:02 -0800 Subject: [PATCH 3267/4122] vfio/ap: Validate iova during dma_unmap and trigger irq disable Currently, each mapped iova is stashed in its associated vfio_ap_queue; when we get an unmap request, validate that it matches with one or more of these stashed values before attempting unpins. Each stashed iova represents IRQ that was enabled for a queue. Therefore, if a match is found, trigger IRQ disable for this queue to ensure that underlying firmware will no longer try to use the associated pfn after the page is unpinned. IRQ disable will also handle the associated unpin. Link: https://lore.kernel.org/r/20221202135402.756470-3-yi.l.liu@intel.com Reviewed-by: Tony Krowiak Signed-off-by: Matthew Rosato Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/s390/crypto/vfio_ap_ops.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0b4cc8c597ae..8bf353d46820 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1535,13 +1535,29 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, return 0; } +static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length) +{ + struct ap_queue_table *qtable = &matrix_mdev->qtable; + struct vfio_ap_queue *q; + int loop_cursor; + + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + if (q->saved_iova >= iova && q->saved_iova < iova + length) + vfio_ap_irq_disable(q); + } +} + static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unpin_pages(&matrix_mdev->vdev, iova, 1); + mutex_lock(&matrix_dev->mdevs_lock); + + unmap_iova(matrix_mdev, iova, length); + + mutex_unlock(&matrix_dev->mdevs_lock); } /** From 294aaccb50130f596943be892c5d3a3568b76c57 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:46 -0400 Subject: [PATCH 3268/4122] vfio: Move vfio_device driver open/close code to a function This error unwind is getting complicated. Move all the code into two pair'd function. The functions should be called when the open_count == 1 after incrementing/before decrementing. Link: https://lore.kernel.org/r/1-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Yi Liu Tested-by: Nicolin Chen Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 97 ++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2d168793d4e1..2e8346d13c16 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -734,6 +734,51 @@ bool vfio_assert_device_open(struct vfio_device *device) return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } +static int vfio_device_first_open(struct vfio_device *device) +{ + int ret; + + lockdep_assert_held(&device->dev_set->lock); + + if (!try_module_get(device->dev->driver->owner)) + return -ENODEV; + + /* + * Here we pass the KVM pointer with the group under the lock. If the + * device driver will use it, it must obtain a reference and release it + * during close_device. + */ + mutex_lock(&device->group->group_lock); + device->kvm = device->group->kvm; + if (device->ops->open_device) { + ret = device->ops->open_device(device); + if (ret) + goto err_module_put; + } + vfio_device_container_register(device); + mutex_unlock(&device->group->group_lock); + return 0; + +err_module_put: + device->kvm = NULL; + mutex_unlock(&device->group->group_lock); + module_put(device->dev->driver->owner); + return ret; +} + +static void vfio_device_last_close(struct vfio_device *device) +{ + lockdep_assert_held(&device->dev_set->lock); + + mutex_lock(&device->group->group_lock); + vfio_device_container_unregister(device); + if (device->ops->close_device) + device->ops->close_device(device); + device->kvm = NULL; + mutex_unlock(&device->group->group_lock); + module_put(device->dev->driver->owner); +} + static struct file *vfio_device_open(struct vfio_device *device) { struct file *filep; @@ -745,29 +790,12 @@ static struct file *vfio_device_open(struct vfio_device *device) if (ret) return ERR_PTR(ret); - if (!try_module_get(device->dev->driver->owner)) { - ret = -ENODEV; - goto err_unassign_container; - } - mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { - /* - * Here we pass the KVM pointer with the group under the read - * lock. If the device driver will use it, it must obtain a - * reference and release it during close_device. - */ - mutex_lock(&device->group->group_lock); - device->kvm = device->group->kvm; - - if (device->ops->open_device) { - ret = device->ops->open_device(device); - if (ret) - goto err_undo_count; - } - vfio_device_container_register(device); - mutex_unlock(&device->group->group_lock); + ret = vfio_device_first_open(device); + if (ret) + goto err_unassign_container; } mutex_unlock(&device->dev_set->lock); @@ -800,20 +828,11 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) { - device->ops->close_device(device); - - vfio_device_container_unregister(device); - } -err_undo_count: - mutex_unlock(&device->group->group_lock); - device->open_count--; - if (device->open_count == 0 && device->kvm) - device->kvm = NULL; - mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); + if (device->open_count == 1) + vfio_device_last_close(device); err_unassign_container: + device->open_count--; + mutex_unlock(&device->dev_set->lock); vfio_device_unassign_container(device); return ERR_PTR(ret); } @@ -1016,19 +1035,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) - device->ops->close_device(device); - - vfio_device_container_unregister(device); - mutex_unlock(&device->group->group_lock); + if (device->open_count == 1) + vfio_device_last_close(device); device->open_count--; - if (device->open_count == 0) - device->kvm = NULL; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); - vfio_device_unassign_container(device); vfio_device_put_registration(device); From bab6fabc01d99c7e0293807e835231740379b692 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:47 -0400 Subject: [PATCH 3269/4122] vfio: Move vfio_device_assign_container() into vfio_device_first_open() The only thing this function does is assert the group has an assigned container and incrs refcounts. The overall model we have is that once a container_users refcount is incremented it cannot be de-assigned from the group - vfio_group_ioctl_unset_container() will fail and the group FD cannot be closed. Thus we do not need to check this on every device FD open, just the first. Reorganize the code so that only the first open and last close manages the container. Link: https://lore.kernel.org/r/2-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/container.c | 4 ++-- drivers/vfio/vfio_main.c | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf40..dd79a66ec62c 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -531,11 +531,11 @@ int vfio_device_assign_container(struct vfio_device *device) void vfio_device_unassign_container(struct vfio_device *device) { - mutex_lock(&device->group->group_lock); + lockdep_assert_held_write(&device->group->group_lock); + WARN_ON(device->group->container_users <= 1); device->group->container_users--; fput(device->group->opened_file); - mutex_unlock(&device->group->group_lock); } /* diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2e8346d13c16..717c7f404fee 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -749,18 +749,24 @@ static int vfio_device_first_open(struct vfio_device *device) * during close_device. */ mutex_lock(&device->group->group_lock); + ret = vfio_device_assign_container(device); + if (ret) + goto err_module_put; + device->kvm = device->group->kvm; if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) - goto err_module_put; + goto err_container; } vfio_device_container_register(device); mutex_unlock(&device->group->group_lock); return 0; -err_module_put: +err_container: device->kvm = NULL; + vfio_device_unassign_container(device); +err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); return ret; @@ -775,6 +781,7 @@ static void vfio_device_last_close(struct vfio_device *device) if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; + vfio_device_unassign_container(device); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } @@ -784,18 +791,12 @@ static struct file *vfio_device_open(struct vfio_device *device) struct file *filep; int ret; - mutex_lock(&device->group->group_lock); - ret = vfio_device_assign_container(device); - mutex_unlock(&device->group->group_lock); - if (ret) - return ERR_PTR(ret); - mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { ret = vfio_device_first_open(device); if (ret) - goto err_unassign_container; + goto err_unlock; } mutex_unlock(&device->dev_set->lock); @@ -830,10 +831,9 @@ err_close_device: mutex_lock(&device->dev_set->lock); if (device->open_count == 1) vfio_device_last_close(device); -err_unassign_container: +err_unlock: device->open_count--; mutex_unlock(&device->dev_set->lock); - vfio_device_unassign_container(device); return ERR_PTR(ret); } @@ -1040,8 +1040,6 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) device->open_count--; mutex_unlock(&device->dev_set->lock); - vfio_device_unassign_container(device); - vfio_device_put_registration(device); return 0; From 04f930c3e44bb9010bba8521f970d00d95a94eb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:48 -0400 Subject: [PATCH 3270/4122] vfio: Rename vfio_device_assign/unassign_container() These functions don't really assign anything anymore, they just increment some refcounts and do a sanity check. Call them vfio_group_[un]use_container() Link: https://lore.kernel.org/r/3-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/container.c | 14 ++++++-------- drivers/vfio/vfio.h | 4 ++-- drivers/vfio/vfio_main.c | 6 +++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index dd79a66ec62c..499777930b08 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -511,10 +511,8 @@ void vfio_group_detach_container(struct vfio_group *group) vfio_container_put(container); } -int vfio_device_assign_container(struct vfio_device *device) +int vfio_group_use_container(struct vfio_group *group) { - struct vfio_group *group = device->group; - lockdep_assert_held(&group->group_lock); if (!group->container || !group->container->iommu_driver || @@ -529,13 +527,13 @@ int vfio_device_assign_container(struct vfio_device *device) return 0; } -void vfio_device_unassign_container(struct vfio_device *device) +void vfio_group_unuse_container(struct vfio_group *group) { - lockdep_assert_held_write(&device->group->group_lock); + lockdep_assert_held(&group->group_lock); - WARN_ON(device->group->container_users <= 1); - device->group->container_users--; - fput(device->group->opened_file); + WARN_ON(group->container_users <= 1); + group->container_users--; + fput(group->opened_file); } /* diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index bcad54bbab08..f95f4925b83b 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -112,8 +112,8 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops); bool vfio_assert_device_open(struct vfio_device *device); struct vfio_container *vfio_container_from_file(struct file *filep); -int vfio_device_assign_container(struct vfio_device *device); -void vfio_device_unassign_container(struct vfio_device *device); +int vfio_group_use_container(struct vfio_group *group); +void vfio_group_unuse_container(struct vfio_group *group); int vfio_container_attach_group(struct vfio_container *container, struct vfio_group *group); void vfio_group_detach_container(struct vfio_group *group); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 717c7f404fee..8c2dcb481ae1 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -749,7 +749,7 @@ static int vfio_device_first_open(struct vfio_device *device) * during close_device. */ mutex_lock(&device->group->group_lock); - ret = vfio_device_assign_container(device); + ret = vfio_group_use_container(device->group); if (ret) goto err_module_put; @@ -765,7 +765,7 @@ static int vfio_device_first_open(struct vfio_device *device) err_container: device->kvm = NULL; - vfio_device_unassign_container(device); + vfio_group_unuse_container(device->group); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -781,7 +781,7 @@ static void vfio_device_last_close(struct vfio_device *device) if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; - vfio_device_unassign_container(device); + vfio_group_unuse_container(device->group); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } From 0d8227b622f3529661ad6a9702a52932e149a30d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:49 -0400 Subject: [PATCH 3271/4122] vfio: Use IOMMU_CAP_ENFORCE_CACHE_COHERENCY for vfio_file_enforced_coherent() iommufd doesn't establish the iommu_domains until after the device FD is opened, even if the container has been set. This design is part of moving away from the group centric iommu APIs. This is fine, except that the normal sequence of establishing the kvm wbinvd won't work: group = open("/dev/vfio/XX") ioctl(group, VFIO_GROUP_SET_CONTAINER) ioctl(kvm, KVM_DEV_VFIO_GROUP_ADD) ioctl(group, VFIO_GROUP_GET_DEVICE_FD) As the domains don't start existing until GET_DEVICE_FD. Further, GET_DEVICE_FD requires that KVM_DEV_VFIO_GROUP_ADD already be done as that is what sets the group->kvm and thus device->kvm for the driver to use during open. Now that we have device centric cap ops and the new IOMMU_CAP_ENFORCE_CACHE_COHERENCY we know what the iommu_domain will be capable of without having to create it. Use this to compute vfio_file_enforced_coherent() and resolve the ordering problems. VFIO always tries to upgrade domains to enforce cache coherency, it never attaches a device that supports enforce cache coherency to a less capable domain, so the cap test is a sufficient proxy for the ultimate outcome. iommufd also ensures that devices that set the cap will be connected to enforcing domains. Link: https://lore.kernel.org/r/4-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/container.c | 5 +++-- drivers/vfio/vfio.h | 2 -- drivers/vfio/vfio_main.c | 29 ++++++++++++++++------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 499777930b08..d97747dfb05d 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device) device->group->container->iommu_data, device); } -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg) +static long +vfio_container_ioctl_check_extension(struct vfio_container *container, + unsigned long arg) { struct vfio_iommu_driver *driver; long ret = 0; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index f95f4925b83b..731561258704 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -119,8 +119,6 @@ int vfio_container_attach_group(struct vfio_container *container, void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg); int __init vfio_container_init(void); void vfio_container_cleanup(void); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 8c2dcb481ae1..77d6c0ba6a83 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1622,24 +1622,27 @@ EXPORT_SYMBOL_GPL(vfio_file_is_group); bool vfio_file_enforced_coherent(struct file *file) { struct vfio_group *group = file->private_data; - bool ret; + struct vfio_device *device; + bool ret = true; if (!vfio_file_is_group(file)) return true; - mutex_lock(&group->group_lock); - if (group->container) { - ret = vfio_container_ioctl_check_extension(group->container, - VFIO_DMA_CC_IOMMU); - } else { - /* - * Since the coherency state is determined only once a container - * is attached the user must do so before they can prove they - * have permission. - */ - ret = true; + /* + * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then + * any domain later attached to it will also not support it. If the cap + * is set then the iommu_domain eventually attached to the device/group + * must use a domain with enforce_cache_coherency(). + */ + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (!device_iommu_capable(device->dev, + IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { + ret = false; + break; + } } - mutex_unlock(&group->group_lock); + mutex_unlock(&group->device_lock); return ret; } EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); From 2a3dab19a0a6c1823645764188776f271de1b3cf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:50 -0400 Subject: [PATCH 3272/4122] vfio-iommufd: Allow iommufd to be used in place of a container fd This makes VFIO_GROUP_SET_CONTAINER accept both a vfio container FD and an iommufd. In iommufd mode an IOAS will exist after the SET_CONTAINER, but it will not be attached to any groups. For VFIO this means that the VFIO_GROUP_GET_STATUS and VFIO_GROUP_FLAGS_VIABLE works subtly differently. With the container FD the iommu_group_claim_dma_owner() is done during SET_CONTAINER but for IOMMUFD this is done during VFIO_GROUP_GET_DEVICE_FD. Meaning that VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due to viability. As GET_DEVICE_FD can fail for many reasons already this is not expected to be a meaningful difference. Reorganize the tests for if the group has an assigned container or iommu into a vfio_group_has_iommu() function and consolidate all the duplicated WARN_ON's etc related to this. Call container functions only if a container is actually present on the group. Link: https://lore.kernel.org/r/5-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/Kconfig | 1 + drivers/vfio/container.c | 7 +++- drivers/vfio/vfio.h | 2 + drivers/vfio/vfio_main.c | 88 +++++++++++++++++++++++++++++++++------- 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1..1118d322eec9 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,6 +2,7 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API + depends on IOMMUFD || !IOMMUFD select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) select INTERVAL_TREE help diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d97747dfb05d..8772dad68085 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -516,8 +516,11 @@ int vfio_group_use_container(struct vfio_group *group) { lockdep_assert_held(&group->group_lock); - if (!group->container || !group->container->iommu_driver || - WARN_ON(!group->container_users)) + /* + * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but + * VFIO_SET_IOMMU hasn't been done yet. + */ + if (!group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 731561258704..a9dd0615266c 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -10,6 +10,7 @@ #include #include +struct iommufd_ctx; struct iommu_group; struct vfio_device; struct vfio_container; @@ -60,6 +61,7 @@ struct vfio_group { struct kvm *kvm; struct file *opened_file; struct blocking_notifier_head notifier; + struct iommufd_ctx *iommufd; }; /* events for the backend driver notify callback */ diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 77d6c0ba6a83..f11157d056e6 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -662,6 +663,18 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); /* * VFIO Group fd, /dev/vfio/$GROUP */ +static bool vfio_group_has_iommu(struct vfio_group *group) +{ + lockdep_assert_held(&group->group_lock); + /* + * There can only be users if there is a container, and if there is a + * container there must be users. + */ + WARN_ON(!group->container != !group->container_users); + + return group->container || group->iommufd; +} + /* * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or * if there was no container to unset. Since the ioctl is called on @@ -673,15 +686,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group) int ret = 0; mutex_lock(&group->group_lock); - if (!group->container) { + if (!vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } - if (group->container_users != 1) { - ret = -EBUSY; - goto out_unlock; + if (group->container) { + if (group->container_users != 1) { + ret = -EBUSY; + goto out_unlock; + } + vfio_group_detach_container(group); + } + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; } - vfio_group_detach_container(group); out_unlock: mutex_unlock(&group->group_lock); @@ -692,6 +711,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, int __user *arg) { struct vfio_container *container; + struct iommufd_ctx *iommufd; struct fd f; int ret; int fd; @@ -704,7 +724,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, return -EBADF; mutex_lock(&group->group_lock); - if (group->container || WARN_ON(group->container_users)) { + if (vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } @@ -714,12 +734,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, } container = vfio_container_from_file(f.file); - ret = -EINVAL; if (container) { ret = vfio_container_attach_group(container, group); goto out_unlock; } + iommufd = iommufd_ctx_from_file(f.file); + if (!IS_ERR(iommufd)) { + u32 ioas_id; + + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } + + group->iommufd = iommufd; + goto out_unlock; + } + + /* The FD passed is not recognized. */ + ret = -EBADFD; + out_unlock: mutex_unlock(&group->group_lock); fdput(f); @@ -749,9 +785,16 @@ static int vfio_device_first_open(struct vfio_device *device) * during close_device. */ mutex_lock(&device->group->group_lock); - ret = vfio_group_use_container(device->group); - if (ret) + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; goto err_module_put; + } + + if (device->group->container) { + ret = vfio_group_use_container(device->group); + if (ret) + goto err_module_put; + } device->kvm = device->group->kvm; if (device->ops->open_device) { @@ -759,13 +802,15 @@ static int vfio_device_first_open(struct vfio_device *device) if (ret) goto err_container; } - vfio_device_container_register(device); + if (device->group->container) + vfio_device_container_register(device); mutex_unlock(&device->group->group_lock); return 0; err_container: device->kvm = NULL; - vfio_group_unuse_container(device->group); + if (device->group->container) + vfio_group_unuse_container(device->group); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -777,11 +822,13 @@ static void vfio_device_last_close(struct vfio_device *device) lockdep_assert_held(&device->dev_set->lock); mutex_lock(&device->group->group_lock); - vfio_device_container_unregister(device); + if (device->group->container) + vfio_device_container_unregister(device); if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; - vfio_group_unuse_container(device->group); + if (device->group->container) + vfio_group_unuse_container(device->group); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } @@ -897,7 +944,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group, return -ENODEV; } - if (group->container) + /* + * With the container FD the iommu_group_claim_dma_owner() is done + * during SET_CONTAINER but for IOMMFD this is done during + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due + * to viability. + */ + if (vfio_group_has_iommu(group)) status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | VFIO_GROUP_FLAGS_VIABLE; else if (!iommu_group_dma_owner_claimed(group->iommu_group)) @@ -980,6 +1034,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) WARN_ON(group->notifier.head); if (group->container) vfio_group_detach_container(group); + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } group->opened_file = NULL; mutex_unlock(&group->group_lock); return 0; @@ -1878,6 +1936,8 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); From a4d1f91db5021c57e14721ac090616c90386ac70 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:51 -0400 Subject: [PATCH 3273/4122] vfio-iommufd: Support iommufd for physical VFIO devices This creates the iommufd_device for the physical VFIO drivers. These are all the drivers that are calling vfio_register_group_dev() and expect the type1 code to setup a real iommu_domain against their parent struct device. The design gives the driver a choice in how it gets connected to iommufd by providing bind_iommufd/unbind_iommufd/attach_ioas callbacks to implement as required. The core code provides three default callbacks for physical mode using a real iommu_domain. This is suitable for drivers using vfio_register_group_dev() Link: https://lore.kernel.org/r/6-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/Makefile | 1 + drivers/vfio/fsl-mc/vfio_fsl_mc.c | 3 + drivers/vfio/iommufd.c | 100 ++++++++++++++++++ .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 6 ++ drivers/vfio/pci/mlx5/main.c | 3 + drivers/vfio/pci/vfio_pci.c | 3 + drivers/vfio/platform/vfio_amba.c | 3 + drivers/vfio/platform/vfio_platform.c | 3 + drivers/vfio/vfio.h | 15 +++ drivers/vfio/vfio_main.c | 15 ++- include/linux/vfio.h | 25 +++++ 11 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 drivers/vfio/iommufd.c diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b693a1169286..3863922529ef 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ iova_bitmap.o \ container.o +vfio-$(CONFIG_IOMMUFD) += iommufd.o obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index b16874e913e4..5cd4bb476440 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, .mmap = vfio_fsl_mc_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct fsl_mc_driver vfio_fsl_mc_driver = { diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c new file mode 100644 index 000000000000..6e47a3df1a71 --- /dev/null +++ b/drivers/vfio/iommufd.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include + +#include "vfio.h" + +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); + +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + u32 ioas_id; + u32 device_id; + int ret; + + lockdep_assert_held(&vdev->dev_set->lock); + + /* + * If the driver doesn't provide this op then it means the device does + * not do DMA at all. So nothing to do. + */ + if (!vdev->ops->bind_iommufd) + return 0; + + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); + if (ret) + return ret; + + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + if (ret) + goto err_unbind; + ret = vdev->ops->attach_ioas(vdev, &ioas_id); + if (ret) + goto err_unbind; + + /* + * The legacy path has no way to return the device id or the selected + * pt_id + */ + return 0; + +err_unbind: + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); + return ret; +} + +void vfio_iommufd_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); +} + +/* + * The physical standard ops mean that the iommufd_device is bound to the + * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers + * using this ops set should call vfio_register_group_dev() + */ +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + struct iommufd_device *idev; + + idev = iommufd_device_bind(ictx, vdev->dev, out_device_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + vdev->iommufd_device = idev; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); + +void vfio_iommufd_physical_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_attached) { + iommufd_device_detach(vdev->iommufd_device); + vdev->iommufd_attached = false; + } + iommufd_device_unbind(vdev->iommufd_device); + vdev->iommufd_device = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind); + +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + int rc; + + rc = iommufd_device_attach(vdev->iommufd_device, pt_id); + if (rc) + return rc; + vdev->iommufd_attached = true; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 39eeca18a0f7..40019b11c5a9 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fd6ccb8454a2..32d1f38d351e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1d4919edfbde..29091ee2e984 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index eaea63e5294c..5a046098d0bd 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct amba_id pl330_ids[] = { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 82cedcebfd90..b87c3b708783 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index a9dd0615266c..9766f70a12c5 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -124,6 +124,21 @@ void vfio_device_container_unregister(struct vfio_device *device); int __init vfio_container_init(void); void vfio_container_cleanup(void); +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); +void vfio_iommufd_unbind(struct vfio_device *device); +#else +static inline int vfio_iommufd_bind(struct vfio_device *device, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_iommufd_unbind(struct vfio_device *device) +{ +} +#endif + #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; #else diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index f11157d056e6..a74c34232c03 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -525,6 +525,11 @@ static int __vfio_register_dev(struct vfio_device *device, if (IS_ERR(group)) return PTR_ERR(group); + if (WARN_ON(device->ops->bind_iommufd && + (!device->ops->unbind_iommufd || + !device->ops->attach_ioas))) + return -EINVAL; + /* * If the driver doesn't specify a set then the device is added to a * singleton set just for itself. @@ -794,6 +799,10 @@ static int vfio_device_first_open(struct vfio_device *device) ret = vfio_group_use_container(device->group); if (ret) goto err_module_put; + } else if (device->group->iommufd) { + ret = vfio_iommufd_bind(device, device->group->iommufd); + if (ret) + goto err_module_put; } device->kvm = device->group->kvm; @@ -811,6 +820,8 @@ err_container: device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -829,6 +840,8 @@ static void vfio_device_last_close(struct vfio_device *device) device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } @@ -1936,8 +1949,6 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); -MODULE_IMPORT_NS(IOMMUFD); -MODULE_IMPORT_NS(IOMMUFD_VFIO); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..a7fc4d747dc2 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -17,6 +17,8 @@ #include struct kvm; +struct iommufd_ctx; +struct iommufd_device; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -54,6 +56,10 @@ struct vfio_device { struct completion comp; struct list_head group_next; struct list_head iommu_entry; +#if IS_ENABLED(CONFIG_IOMMUFD) + struct iommufd_device *iommufd_device; + bool iommufd_attached; +#endif }; /** @@ -80,6 +86,10 @@ struct vfio_device_ops { char *name; int (*init)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); + int (*bind_iommufd)(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); + void (*unbind_iommufd)(struct vfio_device *vdev); + int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -96,6 +106,21 @@ struct vfio_device_ops { void __user *arg, size_t argsz); }; +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_physical_unbind(struct vfio_device *vdev); +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +#else +#define vfio_iommufd_physical_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_physical_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#endif + /** * @migration_set_state: Optional callback to change the migration state for * devices that support migration. It's mandatory for From 4741f2e941298ad7553b65e66624435e14793391 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:52 -0400 Subject: [PATCH 3274/4122] vfio-iommufd: Support iommufd for emulated VFIO devices Emulated VFIO devices are calling vfio_register_emulated_iommu_dev() and consist of all the mdev drivers. Like the physical drivers, support for iommufd is provided by the driver supplying the correct standard ops. Provide ops from the core that duplicate what vfio_register_emulated_iommu_dev() does. Emulated drivers are where it is more likely to see variation in the iommfd support ops. For instance IDXD will probably need to setup both a iommfd_device context linked to a PASID and an iommufd_access context to support all their mdev operations. Link: https://lore.kernel.org/r/7-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/i915/gvt/kvmgt.c | 3 + drivers/s390/cio/vfio_ccw_ops.c | 3 + drivers/s390/crypto/vfio_ap_ops.c | 3 + drivers/vfio/container.c | 110 +++++---------------------- drivers/vfio/iommufd.c | 58 ++++++++++++++ drivers/vfio/vfio.h | 10 ++- drivers/vfio/vfio_main.c | 122 +++++++++++++++++++++++++++++- include/linux/vfio.h | 14 ++++ 8 files changed, 229 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index aaf0d9e8da95..f5164099c264 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1484,6 +1484,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, .dma_unmap = intel_vgpu_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 6ae4d012d800..560453d99c24 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -588,6 +588,9 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, .dma_unmap = vfio_ccw_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 8bf353d46820..68eeb25fb661 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1805,6 +1805,9 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, .dma_unmap = vfio_ap_mdev_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static struct mdev_driver vfio_ap_matrix_driver = { diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 8772dad68085..7f3961fd4b5a 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -540,113 +540,41 @@ void vfio_group_unuse_container(struct vfio_group *group) fput(group->opened_file); } -/* - * Pin contiguous user pages and return their associated host pages for local - * domain only. - * @device [in] : device - * @iova [in] : starting IOVA of user pages to be pinned. - * @npage [in] : count of pages to be pinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * @prot [in] : protection flags - * @pages[out] : array of host pages - * Return error or number of pages pinned. - * - * A driver may only call this function if the vfio_device was created - * by vfio_register_emulated_iommu_dev(). - */ -int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, struct page **pages) +int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, dma_addr_t iova, + int npage, int prot, struct page **pages) { - struct vfio_container *container; - struct vfio_group *group = device->group; - struct vfio_iommu_driver *driver; - int ret; - - if (!pages || !npage || !vfio_assert_device_open(device)) - return -EINVAL; + struct vfio_iommu_driver *driver = container->iommu_driver; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - /* group->container cannot change while a vfio device is open */ - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, iova, - npage, prot, pages); - else - ret = -ENOTTY; - - return ret; + if (unlikely(!driver || !driver->ops->pin_pages)) + return -ENOTTY; + return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, + npage, prot, pages); } -EXPORT_SYMBOL(vfio_pin_pages); -/* - * Unpin contiguous host pages for local domain only. - * @device [in] : device - * @iova [in] : starting address of user pages to be unpinned. - * @npage [in] : count of pages to be unpinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - */ -void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; - if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) return; - if (WARN_ON(!vfio_assert_device_open(device))) - return; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; - - driver->ops->unpin_pages(container->iommu_data, iova, npage); + container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, + npage); } -EXPORT_SYMBOL(vfio_unpin_pages); -/* - * This interface allows the CPUs to perform some sort of virtual DMA on - * behalf of the device. - * - * CPUs read/write from/into a range of IOVAs pointing to user space memory - * into/from a kernel buffer. - * - * As the read/write of user space memory is conducted via the CPUs and is - * not a real device DMA, it is not necessary to pin the user space memory. - * - * @device [in] : VFIO device - * @iova [in] : base IOVA of a user space buffer - * @data [in] : pointer to kernel buffer - * @len [in] : kernel buffer length - * @write : indicate read or write - * Return error code on failure or 0 on success. - */ -int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, - size_t len, bool write) +int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, + void *data, size_t len, bool write) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret = 0; + struct vfio_iommu_driver *driver = container->iommu_driver; - if (!data || len <= 0 || !vfio_assert_device_open(device)) - return -EINVAL; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; - - if (likely(driver && driver->ops->dma_rw)) - ret = driver->ops->dma_rw(container->iommu_data, - iova, data, len, write); - else - ret = -ENOTTY; - return ret; + if (unlikely(!driver || !driver->ops->dma_rw)) + return -ENOTTY; + return driver->ops->dma_rw(container->iommu_data, iova, data, len, + write); } -EXPORT_SYMBOL(vfio_dma_rw); int __init vfio_container_init(void) { diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 6e47a3df1a71..4f82a6fa7c6c 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -98,3 +98,61 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); + +/* + * The emulated standard ops mean that vfio_device is going to use the + * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this + * ops set should call vfio_register_emulated_iommu_dev(). + */ + +static void vfio_emulated_unmap(void *data, unsigned long iova, + unsigned long length) +{ + struct vfio_device *vdev = data; + + vdev->ops->dma_unmap(vdev, iova, length); +} + +static const struct iommufd_access_ops vfio_user_ops = { + .needs_pin_pages = 1, + .unmap = vfio_emulated_unmap, +}; + +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + vdev->iommufd_ictx = ictx; + iommufd_ctx_get(ictx); + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind); + +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_access) { + iommufd_access_destroy(vdev->iommufd_access); + vdev->iommufd_access = NULL; + } + iommufd_ctx_put(vdev->iommufd_ictx); + vdev->iommufd_ictx = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind); + +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + struct iommufd_access *user; + + lockdep_assert_held(&vdev->dev_set->lock); + + user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops, + vdev); + if (IS_ERR(user)) + return PTR_ERR(user); + vdev->iommufd_access = user; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 9766f70a12c5..b1ef84249637 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -111,8 +111,6 @@ struct vfio_iommu_driver { int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops); -bool vfio_assert_device_open(struct vfio_device *device); - struct vfio_container *vfio_container_from_file(struct file *filep); int vfio_group_use_container(struct vfio_group *group); void vfio_group_unuse_container(struct vfio_group *group); @@ -121,6 +119,14 @@ int vfio_container_attach_group(struct vfio_container *container, void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); +int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, dma_addr_t iova, + int npage, int prot, struct page **pages); +void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage); +int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, + void *data, size_t len, bool write); + int __init vfio_container_init(void); void vfio_container_cleanup(void); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index a74c34232c03..fd5e969ab653 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -770,7 +770,7 @@ out_unlock: static const struct file_operations vfio_device_fops; /* true if the vfio_device has open_device() called but not close_device() */ -bool vfio_assert_device_open(struct vfio_device *device) +static bool vfio_assert_device_open(struct vfio_device *device) { return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } @@ -1876,6 +1876,126 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, } EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); +/* + * Pin contiguous user pages and return their associated host pages for local + * domain only. + * @device [in] : device + * @iova [in] : starting IOVA of user pages to be pinned. + * @npage [in] : count of pages to be pinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + * @prot [in] : protection flags + * @pages[out] : array of host pages + * Return error or number of pages pinned. + * + * A driver may only call this function if the vfio_device was created + * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages(). + */ +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, + int npage, int prot, struct page **pages) +{ + /* group->container cannot change while a vfio device is open */ + if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) + return -EINVAL; + if (device->group->container) + return vfio_container_pin_pages(device->group->container, + device->group->iommu_group, + iova, npage, prot, pages); + if (device->iommufd_access) { + int ret; + + if (iova > ULONG_MAX) + return -EINVAL; + /* + * VFIO ignores the sub page offset, npages is from the start of + * a PAGE_SIZE chunk of IOVA. The caller is expected to recover + * the sub page offset by doing: + * pages[0] + (iova % PAGE_SIZE) + */ + ret = iommufd_access_pin_pages( + device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE, pages, + (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); + if (ret) + return ret; + return npage; + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_pin_pages); + +/* + * Unpin contiguous host pages for local domain only. + * @device [in] : device + * @iova [in] : starting address of user pages to be unpinned. + * @npage [in] : count of pages to be unpinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + */ +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +{ + if (WARN_ON(!vfio_assert_device_open(device))) + return; + + if (device->group->container) { + vfio_container_unpin_pages(device->group->container, iova, + npage); + return; + } + if (device->iommufd_access) { + if (WARN_ON(iova > ULONG_MAX)) + return; + iommufd_access_unpin_pages(device->iommufd_access, + ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE); + return; + } +} +EXPORT_SYMBOL(vfio_unpin_pages); + +/* + * This interface allows the CPUs to perform some sort of virtual DMA on + * behalf of the device. + * + * CPUs read/write from/into a range of IOVAs pointing to user space memory + * into/from a kernel buffer. + * + * As the read/write of user space memory is conducted via the CPUs and is + * not a real device DMA, it is not necessary to pin the user space memory. + * + * @device [in] : VFIO device + * @iova [in] : base IOVA of a user space buffer + * @data [in] : pointer to kernel buffer + * @len [in] : kernel buffer length + * @write : indicate read or write + * Return error code on failure or 0 on success. + */ +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, + size_t len, bool write) +{ + if (!data || len <= 0 || !vfio_assert_device_open(device)) + return -EINVAL; + + if (device->group->container) + return vfio_container_dma_rw(device->group->container, iova, + data, len, write); + + if (device->iommufd_access) { + unsigned int flags = 0; + + if (iova > ULONG_MAX) + return -EINVAL; + + /* VFIO historically tries to auto-detect a kthread */ + if (!current->mm) + flags |= IOMMUFD_ACCESS_RW_KTHREAD; + if (write) + flags |= IOMMUFD_ACCESS_RW_WRITE; + return iommufd_access_rw(device->iommufd_access, iova, data, + len, flags); + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_dma_rw); + /* * Module/class support */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a7fc4d747dc2..d5f84f98c0fa 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -19,6 +19,7 @@ struct kvm; struct iommufd_ctx; struct iommufd_device; +struct iommufd_access; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -56,8 +57,10 @@ struct vfio_device { struct completion comp; struct list_head group_next; struct list_head iommu_entry; + struct iommufd_access *iommufd_access; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct iommufd_ctx *iommufd_ictx; bool iommufd_attached; #endif }; @@ -111,6 +114,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); #else #define vfio_iommufd_physical_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ @@ -119,6 +126,13 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_physical_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_emulated_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_emulated_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #endif /** From 81ab9890da97e07862476bf635c80adee9b1c515 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:53 -0400 Subject: [PATCH 3275/4122] vfio: Move container related MODULE_ALIAS statements into container.c The miscdev is in container.c, so should these related MODULE_ALIAS statements. This is necessary for the next patch to be able to fully disable /dev/vfio/vfio. Fixes: cdc71fe4ecbf ("vfio: Move container code into drivers/vfio/container.c") Link: https://lore.kernel.org/r/8-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Reported-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/container.c | 3 +++ drivers/vfio/vfio_main.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 7f3961fd4b5a..6b362d97d682 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -608,3 +608,6 @@ void vfio_container_cleanup(void) misc_deregister(&vfio_dev); mutex_destroy(&vfio.iommu_drivers_lock); } + +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index fd5e969ab653..ce6e6a560c70 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -2073,6 +2073,4 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_ALIAS_MISCDEV(VFIO_MINOR); -MODULE_ALIAS("devname:vfio/vfio"); MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); From e5a9ec7e096ab7a3b34022409a6ddc63e4e83674 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:54 -0400 Subject: [PATCH 3276/4122] vfio: Make vfio_container optionally compiled Add a kconfig CONFIG_VFIO_CONTAINER that controls compiling the container code. If 'n' then only iommufd will provide the container service. All the support for vfio iommu drivers, including type1, will not be built. This allows a compilation check that no inappropriate dependencies between the device/group and container have been created. Link: https://lore.kernel.org/r/9-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/vfio/Kconfig | 35 +++++++++++++++-------- drivers/vfio/Makefile | 4 +-- drivers/vfio/vfio.h | 65 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 1118d322eec9..286c1663bd75 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -3,8 +3,8 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API depends on IOMMUFD || !IOMMUFD - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) select INTERVAL_TREE + select VFIO_CONTAINER if IOMMUFD=n help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. @@ -12,6 +12,18 @@ menuconfig VFIO If you don't know what to do here, say N. if VFIO +config VFIO_CONTAINER + bool "Support for the VFIO container /dev/vfio/vfio" + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + default y + help + The VFIO container is the classic interface to VFIO for establishing + IOMMU mappings. If N is selected here then IOMMUFD must be used to + manage the mappings. + + Unless testing IOMMUFD say Y here. + +if VFIO_CONTAINER config VFIO_IOMMU_TYPE1 tristate default n @@ -21,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE depends on SPAPR_TCE_IOMMU default VFIO -config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - -config VFIO_VIRQFD - tristate - select EVENTFD - default n - config VFIO_NOIOMMU bool "VFIO No-IOMMU support" help @@ -44,6 +46,17 @@ config VFIO_NOIOMMU this mode since there is no IOMMU to provide DMA translation. If you don't know what to do here, say N. +endif + +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default VFIO + +config VFIO_VIRQFD + tristate + select EVENTFD + default n source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 3863922529ef..b953517dc70f 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -4,9 +4,9 @@ vfio_virqfd-y := virqfd.o obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - iova_bitmap.o \ - container.o + iova_bitmap.o vfio-$(CONFIG_IOMMUFD) += iommufd.o +vfio-$(CONFIG_VFIO_CONTAINER) += container.o obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index b1ef84249637..ce5fe3fc493b 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -55,7 +55,9 @@ struct vfio_group { struct list_head device_list; struct mutex device_lock; struct list_head vfio_next; +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) struct list_head container_next; +#endif enum vfio_group_type type; struct mutex group_lock; struct kvm *kvm; @@ -64,6 +66,7 @@ struct vfio_group { struct iommufd_ctx *iommufd; }; +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, @@ -129,6 +132,68 @@ int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, int __init vfio_container_init(void); void vfio_container_cleanup(void); +#else +static inline struct vfio_container * +vfio_container_from_file(struct file *filep) +{ + return NULL; +} + +static inline int vfio_group_use_container(struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_unuse_container(struct vfio_group *group) +{ +} + +static inline int vfio_container_attach_group(struct vfio_container *container, + struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_detach_container(struct vfio_group *group) +{ +} + +static inline void vfio_device_container_register(struct vfio_device *device) +{ +} + +static inline void vfio_device_container_unregister(struct vfio_device *device) +{ +} + +static inline int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, + dma_addr_t iova, int npage, int prot, + struct page **pages) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage) +{ +} + +static inline int vfio_container_dma_rw(struct vfio_container *container, + dma_addr_t iova, void *data, size_t len, + bool write) +{ + return -EOPNOTSUPP; +} + +static inline int vfio_container_init(void) +{ + return 0; +} +static inline void vfio_container_cleanup(void) +{ +} +#endif #if IS_ENABLED(CONFIG_IOMMUFD) int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); From 01f70cbb26eadb5959344598977cb7159948263a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:31:55 -0400 Subject: [PATCH 3277/4122] iommufd: Allow iommufd to supply /dev/vfio/vfio If the VFIO container is compiled out, give a kconfig option for iommufd to provide the miscdev node with the same name and permissions as vfio uses. The compatibility node supports the same ioctls as VFIO and automatically enables the VFIO compatible pinned page accounting mode. Link: https://lore.kernel.org/r/10-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Reviewed-by: Alex Williamson Tested-by: Alex Williamson Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Tested-by: Yu He Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Kconfig | 20 +++++++++++++++++++ drivers/iommu/iommufd/main.c | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 871244f2443f..8306616b6d81 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -12,6 +12,26 @@ config IOMMUFD If you don't know what to do here, say N. if IOMMUFD +config IOMMUFD_VFIO_CONTAINER + bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" + depends on VFIO && !VFIO_CONTAINER + default VFIO && !VFIO_CONTAINER + help + IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on + IOMMUFD providing compatibility emulation to give the same ioctls. + It provides an option to build a kernel with legacy VFIO components + removed. + + IOMMUFD VFIO container emulation is known to lack certain features + of the native VFIO container, such as no-IOMMU support, peer-to-peer + DMA mapping, PPC IOMMU support, as well as other potentially + undiscovered gaps. This option is currently intended for the + purpose of testing IOMMUFD with unmodified userspace supporting VFIO + and making use of the Type1 VFIO IOMMU backend. General purpose + enabling of this option is currently discouraged. + + Unless testing IOMMUFD, say N here. + config IOMMUFD_TEST bool "IOMMU Userspace API Test support" depends on DEBUG_KERNEL diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index bcb463e58100..083e6fcbe10a 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -18,6 +18,7 @@ #include #include +#include "io_pagetable.h" #include "iommufd_private.h" #include "iommufd_test.h" @@ -25,6 +26,7 @@ struct iommufd_object_ops { void (*destroy)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; +static struct miscdevice vfio_misc_dev; struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -170,6 +172,16 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) if (!ictx) return -ENOMEM; + /* + * For compatibility with VFIO when /dev/vfio/vfio is opened we default + * to the same rlimit accounting as vfio uses. + */ + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) && + filp->private_data == &vfio_misc_dev) { + ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; + pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n"); + } + xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); ictx->file = filp; filp->private_data = ictx; @@ -400,6 +412,15 @@ static struct miscdevice iommu_misc_dev = { .mode = 0660, }; + +static struct miscdevice vfio_misc_dev = { + .minor = VFIO_MINOR, + .name = "vfio", + .fops = &iommufd_fops, + .nodename = "vfio/vfio", + .mode = 0666, +}; + static int __init iommufd_init(void) { int ret; @@ -407,18 +428,33 @@ static int __init iommufd_init(void) ret = misc_register(&iommu_misc_dev); if (ret) return ret; + + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) { + ret = misc_register(&vfio_misc_dev); + if (ret) + goto err_misc; + } iommufd_test_init(); return 0; +err_misc: + misc_deregister(&iommu_misc_dev); + return ret; } static void __exit iommufd_exit(void) { iommufd_test_exit(); + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) + misc_deregister(&vfio_misc_dev); misc_deregister(&iommu_misc_dev); } module_init(iommufd_init); module_exit(iommufd_exit); +#if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); +#endif MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); From d7237462561fcd224fa687c56ccb68629f50fc0d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 2 Dec 2022 01:38:49 -0800 Subject: [PATCH 3278/4122] drivers: mcb: fix resource leak in mcb_probe() When probe hook function failed in mcb_probe(), it doesn't put the device. Compiled test only. Fixes: 7bc364097a89 ("mcb: Acquire reference to device in probe") Signed-off-by: Zhengchao Shao Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/9f87de36bfb85158b506cb78c6fc9db3f6a3bad1.1669624063.git.johannes.thumshirn@wdc.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 338fc889b357..b8ad4f16b4ac 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -71,8 +71,10 @@ static int mcb_probe(struct device *dev) get_device(dev); ret = mdrv->probe(mdev, found_id); - if (ret) + if (ret) { module_put(carrier_mod); + put_device(dev); + } return ret; } From 728ac3389296caf68638628c987aeae6c8851e2d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 2 Dec 2022 01:38:50 -0800 Subject: [PATCH 3279/4122] mcb: mcb-parse: fix error handing in chameleon_parse_gdd() If mcb_device_register() returns error in chameleon_parse_gdd(), the refcount of bus and device name are leaked. Fix this by calling put_device() to give up the reference, so they can be released in mcb_release_dev() and kobject_cleanup(). Fixes: 3764e82e5150 ("drivers: Introduce MEN Chameleon Bus") Reviewed-by: Johannes Thumshirn Signed-off-by: Yang Yingliang Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/ebfb06e39b19272f0197fa9136b5e4b6f34ad732.1669624063.git.johannes.thumshirn@wdc.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 0266bfddfbe2..aa6938da0db8 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -108,7 +108,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, return 0; err: - mcb_free_dev(mdev); + put_device(&mdev->dev); return ret; } From 11fa7fefe3d8fac7da56bc9aa3dd5fb3081ca797 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 2 Dec 2022 11:02:37 +0800 Subject: [PATCH 3280/4122] chardev: fix error handling in cdev_device_add() While doing fault injection test, I got the following report: ------------[ cut here ]------------ kobject: '(null)' (0000000039956980): is not initialized, yet kobject_put() is being called. WARNING: CPU: 3 PID: 6306 at kobject_put+0x23d/0x4e0 CPU: 3 PID: 6306 Comm: 283 Tainted: G W 6.1.0-rc2-00005-g307c1086d7c9 #1253 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:kobject_put+0x23d/0x4e0 Call Trace: cdev_device_add+0x15e/0x1b0 __iio_device_register+0x13b4/0x1af0 [industrialio] __devm_iio_device_register+0x22/0x90 [industrialio] max517_probe+0x3d8/0x6b4 [max517] i2c_device_probe+0xa81/0xc00 When device_add() is injected fault and returns error, if dev->devt is not set, cdev_add() is not called, cdev_del() is not needed. Fix this by checking dev->devt in error path. Fixes: 233ed09d7fda ("chardev: add helper function to register char devs with a struct device") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221202030237.520280-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- fs/char_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/char_dev.c b/fs/char_dev.c index 340e4543b24a..13deb45f1ec6 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -554,7 +554,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev) } rc = device_add(dev); - if (rc) + if (rc && dev->devt) cdev_del(cdev); return rc; From b058ea3ab5afea873ab8d976277539ca9e43869a Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 29 Nov 2022 13:12:35 +0000 Subject: [PATCH 3281/4122] vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries Commit f38044e5ef58 ("vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps") had fixed the unaligned bitmaps by capping the remaining iterable set at the start of the bitmap. Although, that mistakenly worked around iova_bitmap_set() incorrectly setting bits across page boundary. Fix this by reworking the loop inside iova_bitmap_set() to iterate over a range of bits to set (cur_bit .. last_bit) which may span different pinned pages, thus updating @page_idx and @offset as it sets the bits. The previous cap to the first page is now adjusted to be always accounted rather than when there's only a non-zero pgoff. While at it, make @page_idx , @offset and @nbits to be unsigned int given that it won't be more than 512 and 4096 respectively (even a bigger PAGE_SIZE or a smaller struct page size won't make this bigger than the above 32-bit max). Also, delete the stale kdoc on Return type. Cc: Avihai Horon Fixes: f38044e5ef58 ("vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps") Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Tested-by: Avihai Horon Link: https://lore.kernel.org/r/20221129131235.38880-1-joao.m.martins@oracle.com Signed-off-by: Alex Williamson --- drivers/vfio/iova_bitmap.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index de6d6ea5c496..0848f920efb7 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -298,9 +298,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) { unsigned long remaining, bytes; - /* Cap to one page in the first iteration, if PAGE_SIZE unaligned. */ - bytes = !bitmap->mapped.pgoff ? bitmap->mapped.npages << PAGE_SHIFT : - PAGE_SIZE - bitmap->mapped.pgoff; + bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff; remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, @@ -399,29 +397,27 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, * Set the bits corresponding to the range [iova .. iova+length-1] in * the user bitmap. * - * Return: The number of bits set. */ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length) { struct iova_bitmap_map *mapped = &bitmap->mapped; - unsigned long offset = (iova - mapped->iova) >> mapped->pgshift; - unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift); - unsigned long page_idx = offset / BITS_PER_PAGE; - unsigned long page_offset = mapped->pgoff; - void *kaddr; - - offset = offset % BITS_PER_PAGE; + unsigned long cur_bit = ((iova - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; do { - unsigned long size = min(BITS_PER_PAGE - offset, nbits); + unsigned int page_idx = cur_bit / BITS_PER_PAGE; + unsigned int offset = cur_bit % BITS_PER_PAGE; + unsigned int nbits = min(BITS_PER_PAGE - offset, + last_bit - cur_bit + 1); + void *kaddr; kaddr = kmap_local_page(mapped->pages[page_idx]); - bitmap_set(kaddr + page_offset, offset, size); + bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); - page_offset = offset = 0; - nbits -= size; - page_idx++; - } while (nbits > 0); + cur_bit += nbits; + } while (cur_bit <= last_bit); } EXPORT_SYMBOL_GPL(iova_bitmap_set); From 61e15f871241ee86f217320909005cd022dd844f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:08 +0100 Subject: [PATCH 3282/4122] KVM: Delete all references to removed KVM_SET_MEMORY_REGION ioctl The documentation says that the ioctl has been deprecated, but it has been actually removed and the remaining references are just left overs. Suggested-by: Sean Christopherson Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-2-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 16 ---------------- include/uapi/linux/kvm.h | 12 ------------ tools/include/uapi/linux/kvm.h | 12 ------------ 3 files changed, 40 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 5617bc4f899f..850e187c0a38 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -272,18 +272,6 @@ the VCPU file descriptor can be mmap-ed, including: KVM_CAP_DIRTY_LOG_RING, see section 8.3. -4.6 KVM_SET_MEMORY_REGION -------------------------- - -:Capability: basic -:Architectures: all -:Type: vm ioctl -:Parameters: struct kvm_memory_region (in) -:Returns: 0 on success, -1 on error - -This ioctl is obsolete and has been removed. - - 4.7 KVM_CREATE_VCPU ------------------- @@ -1377,10 +1365,6 @@ the memory region are automatically reflected into the guest. For example, an mmap() that affects the region will be made visible immediately. Another example is madvise(MADV_DROP). -It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. -The KVM_SET_MEMORY_REGION does not allow fine grained control over memory -allocation and is deprecated. - 4.36 KVM_SET_TSS_ADDR --------------------- diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 64dfe9c07c87..c338ca2c972d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -86,14 +86,6 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -1442,10 +1434,6 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; -/* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 0d5d4419139a..8899201d5964 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -86,14 +86,6 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -1437,10 +1429,6 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; -/* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. From 66a9221d73e71199a120ca12ea5bfaac2aa670a3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:09 +0100 Subject: [PATCH 3283/4122] KVM: Delete all references to removed KVM_SET_MEMORY_ALIAS ioctl The documentation says that the ioctl has been deprecated, but it has been actually removed and the remaining references are just left overs. Suggested-by: Sean Christopherson Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-3-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 11 ----------- arch/x86/include/uapi/asm/kvm.h | 8 -------- include/uapi/linux/kvm.h | 2 -- tools/arch/x86/include/uapi/asm/kvm.h | 8 -------- tools/include/uapi/linux/kvm.h | 2 -- 5 files changed, 31 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 850e187c0a38..07e8a42d839a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -356,17 +356,6 @@ see the description of the capability. Note that the Xen shared info page, if configured, shall always be assumed to be dirty. KVM will not explicitly mark it such. -4.9 KVM_SET_MEMORY_ALIAS ------------------------- - -:Capability: basic -:Architectures: x86 -:Type: vm ioctl -:Parameters: struct kvm_memory_alias (in) -:Returns: 0 (success), -1 (error) - -This ioctl is obsolete and has been removed. - 4.10 KVM_RUN ------------ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index c6df6b16a088..e48deab8901d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -53,14 +53,6 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c338ca2c972d..ce9183765616 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1440,8 +1440,6 @@ struct kvm_vfio_spapr_tce { */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 46de10a809ec..649e50a8f9dd 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -53,14 +53,6 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 8899201d5964..6ba2928f8f18 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1435,8 +1435,6 @@ struct kvm_vfio_spapr_tce { */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ From 30ee198ce42d60101620d33f8bc70c3234798365 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:10 +0100 Subject: [PATCH 3284/4122] KVM: Reference to kvm_userspace_memory_region in doc and comments There are still references to the removed kvm_memory_region data structure but the doc and comments should mention struct kvm_userspace_memory_region instead, since that is what's used by the ioctl that replaced the old one and this data structure support the same set of flags. Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-4-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- include/linux/kvm_host.h | 4 ++-- include/uapi/linux/kvm.h | 6 +++--- tools/include/uapi/linux/kvm.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 07e8a42d839a..92e14823619a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1309,7 +1309,7 @@ yet and must be cleared on entry. __u64 userspace_addr; /* start of the userspace allocated memory */ }; - /* for kvm_memory_region::flags */ + /* for kvm_userspace_memory_region::flags */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f874a964313..a5a82b536774 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -50,8 +50,8 @@ #endif /* - * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used - * in kvm, other bits are visible for userspace which are defined in + * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally + * used in kvm, other bits are visible for userspace which are defined in * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ce9183765616..03708ce10bda 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -96,9 +96,9 @@ struct kvm_userspace_memory_region { }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + * in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6ba2928f8f18..21d6d29502e4 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -96,9 +96,9 @@ struct kvm_userspace_memory_region { }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + *in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) From 10c5e80b2c4d67fa9a931ac57beab782cc3db2ef Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:11 +0100 Subject: [PATCH 3285/4122] KVM: Add missing arch for KVM_CREATE_DEVICE and KVM_{SET,GET}_DEVICE_ATTR The ioctls are missing an architecture property that is present in others. Suggested-by: Sergio Lopez Pascual Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-5-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 92e14823619a..a63d86be45d9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3266,6 +3266,7 @@ valid entries found. ---------------------- :Capability: KVM_CAP_DEVICE_CTRL +:Architectures: all :Type: vm ioctl :Parameters: struct kvm_create_device (in/out) :Returns: 0 on success, -1 on error @@ -3306,6 +3307,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) +:Architectures: x86, arm64, s390 :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error From dd03cc90e09daeb8a9509e65a39eb576256790b2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Dec 2022 22:04:33 +0000 Subject: [PATCH 3286/4122] KVM: Remove stale comment about KVM_REQ_UNHALT Remove a comment about KVM_REQ_UNHALT being set by kvm_vcpu_check_block() that was missed when KVM_REQ_UNHALT was dropped. Fixes: c59fb1275838 ("KVM: remove KVM_REQ_UNHALT") Signed-off-by: Sean Christopherson Message-Id: <20221201220433.31366-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1782c4555d94..1401dcba2f82 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3518,10 +3518,6 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); do { - /* - * This sets KVM_REQ_UNHALT if an interrupt - * arrives. - */ if (kvm_vcpu_check_block(vcpu) < 0) goto out; cpu_relax(); From 8a6841c439dfbba2067a533b0e8264ea438689f6 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:29 +0000 Subject: [PATCH 3287/4122] RISC-V: use REG_S/REG_L for mcount In preparation for rv32i ftrace support, convert mcount routines to use native sized loads/stores. Reviewed-by: Andrew Jones Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-2-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 6d462681c9c0..9cf0904afd6d 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -15,8 +15,8 @@ .macro SAVE_ABI_STATE addi sp, sp, -16 - sd s0, 0(sp) - sd ra, 8(sp) + REG_S s0, 0(sp) + REG_S ra, 8(sp) addi s0, sp, 16 .endm @@ -26,22 +26,22 @@ */ .macro SAVE_RET_ABI_STATE addi sp, sp, -32 - sd s0, 16(sp) - sd ra, 24(sp) - sd a0, 8(sp) + REG_S s0, 16(sp) + REG_S ra, 24(sp) + REG_S a0, 8(sp) addi s0, sp, 32 .endm .macro RESTORE_ABI_STATE - ld ra, 8(sp) - ld s0, 0(sp) + REG_L ra, 8(sp) + REG_L s0, 0(sp) addi sp, sp, 16 .endm .macro RESTORE_RET_ABI_STATE - ld ra, 24(sp) - ld s0, 16(sp) - ld a0, 8(sp) + REG_L ra, 24(sp) + REG_L s0, 16(sp) + REG_L a0, 8(sp) addi sp, sp, 32 .endm @@ -82,16 +82,16 @@ ENTRY(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return - ld t1, 0(t0) + REG_L t1, 0(t0) bne t1, t4, do_ftrace_graph_caller la t3, ftrace_graph_entry - ld t2, 0(t3) + REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub bne t2, t6, do_ftrace_graph_caller #endif la t3, ftrace_trace_function - ld t5, 0(t3) + REG_L t5, 0(t3) bne t5, t4, do_trace ret @@ -104,7 +104,7 @@ do_ftrace_graph_caller: addi a0, s0, -8 mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - ld a2, -16(s0) + REG_L a2, -16(s0) #endif SAVE_ABI_STATE call prepare_ftrace_return @@ -117,7 +117,7 @@ do_ftrace_graph_caller: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ do_trace: - ld a1, -8(s0) + REG_L a1, -8(s0) mv a0, ra SAVE_ABI_STATE From 3bd7743f8d6d7171db9897a746038eefd52a1fbd Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:30 +0000 Subject: [PATCH 3288/4122] RISC-V: reduce mcount save space on RV32 For RV32 we can reduce the size of the ABI save+restore state by using SZREG so that register stores are packed rather than on an 8 byte boundary. Signed-off-by: Jamie Iles Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20221115200832.706370-3-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 9cf0904afd6d..613bd07c6268 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -15,8 +15,8 @@ .macro SAVE_ABI_STATE addi sp, sp, -16 - REG_S s0, 0(sp) - REG_S ra, 8(sp) + REG_S s0, 0*SZREG(sp) + REG_S ra, 1*SZREG(sp) addi s0, sp, 16 .endm @@ -25,24 +25,24 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -32 - REG_S s0, 16(sp) - REG_S ra, 24(sp) - REG_S a0, 8(sp) - addi s0, sp, 32 + addi sp, sp, -4*SZREG + REG_S s0, 2*SZREG(sp) + REG_S ra, 3*SZREG(sp) + REG_S a0, 1*SZREG(sp) + addi s0, sp, 4*SZREG .endm .macro RESTORE_ABI_STATE - REG_L ra, 8(sp) - REG_L s0, 0(sp) + REG_L ra, 1*SZREG(sp) + REG_L s0, 0*SZREG(sp) addi sp, sp, 16 .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 24(sp) - REG_L s0, 16(sp) - REG_L a0, 8(sp) - addi sp, sp, 32 + REG_L ra, 3*SZREG(sp) + REG_L s0, 2*SZREG(sp) + REG_L a0, 1*SZREG(sp) + addi sp, sp, 4*SZREG .endm ENTRY(ftrace_stub) @@ -101,10 +101,10 @@ ENTRY(MCOUNT_NAME) * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ do_ftrace_graph_caller: - addi a0, s0, -8 + addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_L a2, -16(s0) + REG_L a2, -2*SZREG(s0) #endif SAVE_ABI_STATE call prepare_ftrace_return @@ -117,7 +117,7 @@ do_ftrace_graph_caller: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ do_trace: - REG_L a1, -8(s0) + REG_L a1, -SZREG(s0) mv a0, ra SAVE_ABI_STATE From dc58a24db8c12ea361e94eaf53adc5d471534694 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:31 +0000 Subject: [PATCH 3289/4122] RISC-V: preserve a1 in mcount The RISC-V ELF psABI states that both a0 and a1 are used for return values so we should preserve them both in return_to_handler. This is especially important for RV32 for functions returning a 64-bit quantity otherwise the return value can be corrupted and undefined behaviour results. Reviewed-by: Andrew Jones Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-4-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 613bd07c6268..30102aadc4d7 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -29,6 +29,7 @@ REG_S s0, 2*SZREG(sp) REG_S ra, 3*SZREG(sp) REG_S a0, 1*SZREG(sp) + REG_S a1, 0*SZREG(sp) addi s0, sp, 4*SZREG .endm @@ -42,6 +43,7 @@ REG_L ra, 3*SZREG(sp) REG_L s0, 2*SZREG(sp) REG_L a0, 1*SZREG(sp) + REG_L a1, 0*SZREG(sp) addi sp, sp, 4*SZREG .endm @@ -71,9 +73,9 @@ ENTRY(return_to_handler) mv a0, t6 #endif call ftrace_return_to_handler - mv a1, a0 + mv a2, a0 RESTORE_RET_ABI_STATE - jalr a1 + jalr a2 ENDPROC(return_to_handler) #endif From f32b4b467ebd8a035f8342b7ea27efc84b10d96b Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:32 +0000 Subject: [PATCH 3290/4122] RISC-V: enable dynamic ftrace for RV32I The RISC-V mcount function is now capable of supporting RV32I so make it available in the kernel config. Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-5-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..5ae4f7ce2a05 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -129,6 +129,11 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -274,11 +279,6 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice From ef40757743b47cc95de9b4ed41525c94f8dc73d9 Mon Sep 17 00:00:00 2001 From: Yuan ZhaoXiong Date: Fri, 2 Dec 2022 20:36:14 +0800 Subject: [PATCH 3291/4122] KVM: x86: fix APICv/x2AVIC disabled when vm reboot by itself When a VM reboots itself, the reset process will result in an ioctl(KVM_SET_LAPIC, ...) to disable x2APIC mode and set the xAPIC id of the vCPU to its default value, which is the vCPU id. That will be handled in KVM as follows: kvm_vcpu_ioctl_set_lapic kvm_apic_set_state kvm_lapic_set_base => disable X2APIC mode kvm_apic_state_fixup kvm_lapic_xapic_id_updated kvm_xapic_id(apic) != apic->vcpu->vcpu_id kvm_set_apicv_inhibit(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)) => update APIC_ID When kvm_apic_set_state invokes kvm_lapic_set_base to disable x2APIC mode, the old 32-bit x2APIC id is still present rather than the 8-bit xAPIC id. kvm_lapic_xapic_id_updated will set the APICV_INHIBIT_REASON_APIC_ID_MODIFIED bit and disable APICv/x2AVIC. Instead, kvm_lapic_xapic_id_updated must be called after APIC_ID is changed. In fact, this fixes another small issue in the code in that potential changes to a vCPU's xAPIC ID need not be tracked for KVM_GET_LAPIC. Fixes: 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base") Signed-off-by: Yuan ZhaoXiong Message-Id: <1669984574-32692-1-git-send-email-yuanzhaoxiong@baidu.com> Cc: stable@vger.kernel.org Reported-by: Alejandro Jimenez Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1bb63746e991..8224ac8b617a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2724,8 +2724,6 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } - } else { - kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; @@ -2761,6 +2759,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + if (!apic_x2apic_mode(apic)) + kvm_lapic_xapic_id_updated(apic); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); From ef16b2dff4d1c71eb32b306d400d4c0f3a383ba7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:44 +0000 Subject: [PATCH 3292/4122] KVM: arm64: selftests: Enable single-step without a "full" ucall() Add a new ucall hook, GUEST_UCALL_NONE(), to allow tests to make ucalls without allocating a ucall struct, and use it to enable single-step in ARM's debug-exceptions test. Like the disable single-step path, the enabling path also needs to ensure that no exclusive access sequences are attempted after enabling single-step, as the exclusive monitor is cleared on ERET from the debug exception taken to EL2. The test currently "works" because clear_bit() isn't actually an atomic operation... yet. Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/aarch64/debug-exceptions.c | 21 ++++++++++--------- .../selftests/kvm/include/ucall_common.h | 8 +++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index d86c4e4d1c82..c62ec4d7f6a3 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -239,10 +239,6 @@ static void guest_svc_handler(struct ex_regs *regs) svc_addr = regs->pc; } -enum single_step_op { - SINGLE_STEP_ENABLE = 0, -}; - static void guest_code_ss(int test_cnt) { uint64_t i; @@ -253,8 +249,16 @@ static void guest_code_ss(int test_cnt) w_bvr = i << 2; w_wvr = i << 2; - /* Enable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_ENABLE); + /* + * Enable Single Step execution. Note! This _must_ be a bare + * ucall as the ucall() path uses atomic operations to manage + * the ucall structures, and the built-in "atomics" are usually + * implemented via exclusive access instructions. The exlusive + * monitor is cleared on ERET, and so taking debug exceptions + * during a LDREX=>STREX sequence will prevent forward progress + * and hang the guest/test. + */ + GUEST_UCALL_NONE(); /* * The userspace will verify that the pc is as expected during @@ -356,12 +360,9 @@ void test_single_step_from_userspace(int test_cnt) break; } - TEST_ASSERT(cmd == UCALL_SYNC, + TEST_ASSERT(cmd == UCALL_NONE, "Unexpected ucall cmd 0x%lx", cmd); - TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE, - "Unexpected ucall action 0x%lx", uc.args[1]); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; ss_enable = true; diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index bdd373189a77..1a6aaef5ccae 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -35,6 +35,14 @@ void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +/* + * Perform userspace call without any associated data. This bare call avoids + * allocating a ucall struct, which can be useful if the atomic operations in + * the full ucall() are problematic and/or unwanted. Note, this will come out + * as UCALL_NONE on the backend. + */ +#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL) + #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) From 7f2b47f22b825c16d9843e6e78bbb2370d2c31a0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:45 +0000 Subject: [PATCH 3293/4122] tools: Take @bit as an "unsigned long" in {clear,set}_bit() helpers Take @bit as an unsigned long instead of a signed int in clear_bit() and set_bit() so that they match the double-underscore versions, __clear_bit() and __set_bit(). This will allow converting users that really don't want atomic operations to the double-underscores without introducing a functional change, which will in turn allow making {clear,set}_bit() atomic (as advertised). Practically speaking, this _should_ have no functional impact. KVM's selftests usage is either hardcoded (Hyper-V tests) or is artificially limited (arch_timer test and dirty_log test). In KVM, dirty_log test is the only mildly interesting case as it's use indirectly restricted to unsigned 32-bit values, but in theory it could generate a negative value when cast to a signed int. But in that case, taking an "unsigned long" is actually a bug fix. Perf's usage is more difficult to audit, but any code that is affected by the switch is likely already broken. perf_header__{set,clear}_feat() and perf_file_header__read() effectively use only hardcoded enums with small, positive values, atom_new() passes an unsigned long, but its value is capped at 128 via NR_ATOM_PER_PAGE, etc... The only real potential for breakage is in the perf flows that take a "cpu", but it's unlikely perf is subtly relying on a negative index into bitmaps, e.g. "cpu" can be "-1", but only as "not valid" placeholder. Note, tools/testing/nvdimm/ makes heavy use of set_bit(), but that code builds into a kernel module of sorts, i.e. pulls in all of the kernel's header and so is getting the kernel's atomic set_bit(). The NVDIMM test usage of atomics is likely unnecessary, e.g. ndtest_dimm_register() sets bits in a local variable, but that's neither here nor there as far as this change is concerned. Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/include/asm-generic/bitops/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h index 2f6ea28764a7..f64b049d236c 100644 --- a/tools/include/asm-generic/bitops/atomic.h +++ b/tools/include/asm-generic/bitops/atomic.h @@ -5,12 +5,12 @@ #include #include -static inline void set_bit(int nr, unsigned long *addr) +static inline void set_bit(unsigned long nr, unsigned long *addr) { addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); } -static inline void clear_bit(int nr, unsigned long *addr) +static inline void clear_bit(unsigned long nr, unsigned long *addr) { addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); } From 75d7ba32f9829e778484cf6e96e6e8f80914b0b3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:46 +0000 Subject: [PATCH 3294/4122] perf tools: Use dedicated non-atomic clear/set bit helpers Use the dedicated non-atomic helpers for {clear,set}_bit() and their test variants, i.e. the double-underscore versions. Depsite being defined in atomic.h, and despite the kernel versions being atomic in the kernel, tools' {clear,set}_bit() helpers aren't actually atomic. Move to the double-underscore versions so that the versions that are expected to be atomic (for kernel developers) can be made atomic without affecting users that don't want atomic operations. No functional change intended. Signed-off-by: Sean Christopherson Acked-by: Namhyung Kim Message-Id: <20221119013450.2643007-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/perf/bench/find-bit-bench.c | 2 +- tools/perf/builtin-c2c.c | 6 +++--- tools/perf/builtin-kwork.c | 6 +++--- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-sched.c | 2 +- tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/mem2node.c | 2 +- tools/perf/util/affinity.c | 4 ++-- tools/perf/util/header.c | 8 ++++---- tools/perf/util/mmap.c | 6 +++--- tools/perf/util/pmu.c | 2 +- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/svghelper.c | 2 +- 15 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index 22b5cfe97023..d103c3136983 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits) bitmap_zero(to_test, num_bits); skip = num_bits / set_bits; for (i = 0; i < num_bits; i += skip) - set_bit(i, to_test); + __set_bit(i, to_test); for (i = 0; i < outer_iterations; i++) { old = accumulator; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a9190458d2d5..52d94c7dd836 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, "WARNING: no sample cpu value")) return; - set_bit(sample->cpu, c2c_he->cpuset); + __set_bit(sample->cpu, c2c_he->cpuset); } static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, @@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, if (WARN_ONCE(node < 0, "WARNING: failed to find node\n")) return; - set_bit(node, c2c_he->nodeset); + __set_bit(node, c2c_he->nodeset); if (c2c_he->paddr != sample->phys_addr) { c2c_he->paddr_cnt++; @@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { - set_bit(cpu.cpu, set); + __set_bit(cpu.cpu, set); if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index fb8c63656ad8..1f63e24f704e 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -216,7 +216,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork, list_add_tail(&page->list, &kwork->atom_page_list); found_atom: - set_bit(i, page->bitmap); + __set_bit(i, page->bitmap); atom->time = sample->time; atom->prev = NULL; atom->page_addr = page; @@ -229,8 +229,8 @@ static void atom_free(struct kwork_atom *atom) if (atom->prev != NULL) atom_free(atom->prev); - clear_bit(atom->bit_inpage, - ((struct kwork_atom_page *)atom->page_addr)->bitmap); + __clear_bit(atom->bit_inpage, + ((struct kwork_atom_page *)atom->page_addr)->bitmap); } static void atom_del(struct kwork_atom *atom) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e128b855ddde..2711c141c5bf 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } return 0; @@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map pr_debug("nr_threads: %d\n", rec->nr_threads); for (t = 0; t < rec->nr_threads; t++) { - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); if (verbose) { pr_debug("thread_masks[%d]: ", t); mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f93737eef07b..86e18575c9be 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { + if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 4965dd666956..0173f5402a35 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, bm); + __set_bit(perf_cpu_map__cpu(map, i).cpu, bm); } if (map) diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 4c96829510c9..a0e88c496107 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) int i; perf_cpu_map__for_each_cpu(cpu, i, map) - set_bit(cpu.cpu, bm); + __set_bit(cpu.cpu, bm); } if (map) diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 4ee96b3c755b..38dc4524b7e8 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu) return; a->changed = true; - set_bit(cpu, a->sched_cpus); + __set_bit(cpu, a->sched_cpus); /* * We ignore errors because affinity is just an optimization. * This could happen for example with isolated CPUs or cpusets. * In this case the IPIs inside the kernel's perf API still work. */ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus); - clear_bit(cpu, a->sched_cpus); + __clear_bit(cpu, a->sched_cpus); } static void __affinity__cleanup(struct affinity *a) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 98dfaf84bd13..dc2ae397d400 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -79,12 +79,12 @@ struct perf_file_attr { void perf_header__set_feat(struct perf_header *header, int feat) { - set_bit(feat, header->adds_features); + __set_bit(feat, header->adds_features); } void perf_header__clear_feat(struct perf_header *header, int feat) { - clear_bit(feat, header->adds_features); + __clear_bit(feat, header->adds_features); } bool perf_header__has_feat(const struct perf_header *header, int feat) @@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) rewinddir(dir); for_each_memory(phys, dir) { - set_bit(phys, n->set); + __set_bit(phys, n->set); } closedir(dir); @@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header, if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { bitmap_zero(header->adds_features, HEADER_FEAT_BITS); - set_bit(HEADER_BUILD_ID, header->adds_features); + __set_bit(HEADER_BUILD_ID, header->adds_features); } } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a4dff881be39..49093b21ee2d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } - set_bit(node_index, node_mask); + __set_bit(node_index, node_mask); if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", data, data + mmap_len, node_index); @@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) for (idx = 0; idx < nr_cpus; idx++) { cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } } @@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu.cpu, map->affinity_mask.bits); + __set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 03284059175f..371d8f7a3de3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1513,7 +1513,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); for (b = from; b <= to; b++) - set_bit(b, bits); + __set_bit(b, bits); } void perf_pmu__del_formats(struct list_head *formats) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a5d945415bbc..5b602b6d4685 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, sprintf(handler, "%s::%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler, event->print_fmt.args); s = nsecs / NSEC_PER_SEC; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1f2040f36d4e..0f229fa29163 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -933,7 +933,7 @@ static void python_process_tracepoint(struct perf_sample *sample, sprintf(handler_name, "%s__%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler_name, event->print_fmt.args); handler = get_handler(handler_name); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a4f10de29ff..873fd51ec1b2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2748,7 +2748,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, goto out_delete_map; } - set_bit(cpu.cpu, cpu_bitmap); + __set_bit(cpu.cpu, cpu_bitmap); } err = 0; diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1e0c731fc539..5c62d3118c41 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) break; } - set_bit(c.cpu, cpumask_bits(b)); + __set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); From 03a0c819e71755398d59993b9adee203544617d5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:47 +0000 Subject: [PATCH 3295/4122] KVM: selftests: Use non-atomic clear/set bit helpers in KVM tests Use the dedicated non-atomic helpers for {clear,set}_bit() and their test variants, i.e. the double-underscore versions. Depsite being defined in atomic.h, and despite the kernel versions being atomic in the kernel, tools' {clear,set}_bit() helpers aren't actually atomic. Move to the double-underscore versions so that the versions that are expected to be atomic (for kernel developers) can be made atomic without affecting users that don't want atomic operations. Leave the usage in ucall_free() as-is, it's the one place in tools/ that actually wants/needs atomic behavior. Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/aarch64/arch_timer.c | 2 +- tools/testing/selftests/kvm/dirty_log_test.c | 34 +++++++++---------- .../selftests/kvm/x86_64/hyperv_evmcs.c | 4 +-- .../selftests/kvm/x86_64/hyperv_svm_test.c | 4 +-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index f2a96779716a..26556a266021 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -222,7 +222,7 @@ static void *test_vcpu_run(void *arg) /* Currently, any exit from guest is an indication of completion */ pthread_mutex_lock(&vcpu_done_map_lock); - set_bit(vcpu_idx, vcpu_done_map); + __set_bit(vcpu_idx, vcpu_done_map); pthread_mutex_unlock(&vcpu_done_map_lock); switch (get_ucall(vcpu, &uc)) { diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index a38c4369fb8e..a75548865f6b 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -44,20 +44,20 @@ # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) # define test_bit_le(nr, addr) \ test_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define set_bit_le(nr, addr) \ - set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define clear_bit_le(nr, addr) \ - clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_set_bit_le(nr, addr) \ - test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_clear_bit_le(nr, addr) \ - test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __set_bit_le(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __clear_bit_le(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_set_bit_le(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_clear_bit_le(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) #else -# define test_bit_le test_bit -# define set_bit_le set_bit -# define clear_bit_le clear_bit -# define test_and_set_bit_le test_and_set_bit -# define test_and_clear_bit_le test_and_clear_bit +# define test_bit_le test_bit +# define __set_bit_le __set_bit +# define __clear_bit_le __clear_bit +# define __test_and_set_bit_le __test_and_set_bit +# define __test_and_clear_bit_le __test_and_clear_bit #endif #define TEST_DIRTY_RING_COUNT 65536 @@ -305,7 +305,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); - set_bit_le(cur->offset, bitmap); + __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); (*fetch_index)++; @@ -560,7 +560,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ - if (test_and_clear_bit_le(page, host_bmap_track)) { + if (__test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++; TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " @@ -568,7 +568,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_and_clear_bit_le(page, bmap)) { + if (__test_and_clear_bit_le(page, bmap)) { bool matched; host_dirty_count++; @@ -661,7 +661,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) * should report its dirtyness in the * next run */ - set_bit_le(page, host_bmap_track); + __set_bit_le(page, host_bmap_track); } } } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index ba09d300c953..af29e5776d40 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -142,7 +142,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, /* Intercept RDMSR 0xc0000100 */ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); - set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ @@ -154,7 +154,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, current_evmcs->guest_rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; GUEST_ASSERT(!vmresume()); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 3b3cc94ba8e4..68a7d354ea07 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -103,7 +103,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, /* Intercept RDMSR 0xc0000100 */ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; - set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ @@ -115,7 +115,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, vmcb->save.rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); From 7f32a6cf8b5a8067537f25a1f12744292431aae1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:48 +0000 Subject: [PATCH 3296/4122] tools: Drop conflicting non-atomic test_and_{clear,set}_bit() helpers Drop tools' non-atomic test_and_set_bit() and test_and_clear_bit() helpers now that all users are gone. The names will be claimed in the future for atomic versions. Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/include/linux/bitmap.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 65d0747c5205..f3566ea0f932 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -77,40 +77,6 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - */ -static inline int test_and_set_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old | mask; - - return (old & mask) != 0; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - */ -static inline int test_and_clear_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old & ~mask; - - return (old & mask) != 0; -} - /** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits From 36293352ff433061d45d52784983e44950c09ae3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:49 +0000 Subject: [PATCH 3297/4122] tools: Drop "atomic_" prefix from atomic test_and_set_bit() Drop the "atomic_" prefix from tools' atomic_test_and_set_bit() to match the kernel nomenclature where test_and_set_bit() is atomic, and __test_and_set_bit() provides the non-atomic variant. Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/asm/atomic.h | 3 +-- tools/include/asm-generic/atomic-gcc.h | 2 +- tools/testing/selftests/kvm/lib/ucall_common.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 01cc27ec4520..a42733af7d51 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -71,10 +71,9 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) return cmpxchg(&v->counter, old, new); } -static inline int atomic_test_and_set_bit(long nr, unsigned long *addr) +static inline int test_and_set_bit(long nr, unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c"); - } #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 6daa68bf5b9e..37ef522aaac4 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -70,7 +70,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) return cmpxchg(&(v)->counter, oldval, newval); } -static inline int atomic_test_and_set_bit(long nr, unsigned long *addr) +static inline int test_and_set_bit(long nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); long old; diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index fcae96461e46..820ce6c82829 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -44,7 +44,7 @@ static struct ucall *ucall_alloc(void) GUEST_ASSERT(ucall_pool); for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) { + if (!test_and_set_bit(i, ucall_pool->in_use)) { uc = &ucall_pool->ucalls[i]; memset(uc->args, 0, sizeof(uc->args)); return uc; From bb056c0f080a3d15c2a9ad9057a8b542d45e4ba0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:50 +0000 Subject: [PATCH 3298/4122] tools: KVM: selftests: Convert clear/set_bit() to actual atomics Convert {clear,set}_bit() to atomics as KVM's ucall implementation relies on clear_bit() being atomic, they are defined in atomic.h, and the same helpers in the kernel proper are atomic. KVM's ucall infrastructure is the only user of clear_bit() in tools/, and there are no true set_bit() users. tools/testing/nvdimm/ does make heavy use of set_bit(), but that code builds into a kernel module of sorts, i.e. pulls in all of the kernel's header and so is already getting the kernel's atomic set_bit(). Signed-off-by: Sean Christopherson Message-Id: <20221119013450.2643007-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/asm/atomic.h | 5 +++++ tools/include/asm-generic/atomic-gcc.h | 11 +++++++++++ tools/include/asm-generic/bitops/atomic.h | 15 ++++++--------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index a42733af7d51..365cf182df12 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -76,4 +76,9 @@ static inline int test_and_set_bit(long nr, unsigned long *addr) GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c"); } +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, "Ir", nr, "%0", "c"); +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 37ef522aaac4..9b3c528bab92 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -81,4 +81,15 @@ static inline int test_and_set_bit(long nr, unsigned long *addr) return !!(old & mask); } +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_and(addr, ~mask); + return !!(old & mask); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h index f64b049d236c..ab37a221b41a 100644 --- a/tools/include/asm-generic/bitops/atomic.h +++ b/tools/include/asm-generic/bitops/atomic.h @@ -5,14 +5,11 @@ #include #include -static inline void set_bit(unsigned long nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); -} - -static inline void clear_bit(unsigned long nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); -} +/* + * Just alias the test versions, all of the compiler built-in atomics "fetch", + * and optimizing compile-time constants on x86 isn't worth the complexity. + */ +#define set_bit test_and_set_bit +#define clear_bit test_and_clear_bit #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */ From 4bf46e35826d8dc4fc0a103dd0ccd94c072a4c6a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Dec 2022 09:13:54 +0000 Subject: [PATCH 3299/4122] KVM: selftests: Fix spelling mistake "probabalistic" -> "probabilistic" There is a spelling mistake in some help text. Fix it. Signed-off-by: Colin Ian King Message-Id: <20221201091354.1613652-1-colin.i.king@gmail.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index c33e89012ae6..e9d6d1aecf89 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -398,7 +398,7 @@ static void help(char *name) printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); printf(" -w: specify the percentage of pages which should be written to\n" - " as an integer from 0-100 inclusive. This is probabalistic,\n" + " as an integer from 0-100 inclusive. This is probabilistic,\n" " so -w X means each page has an X%% chance of writing\n" " and a (100-X)%% chance of reading.\n" " (default: 100 i.e. all pages are written to.)\n"); From 6925ba3d9b8ccf1989b4cf13d6f0d7e341899481 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Fri, 18 Nov 2022 09:17:14 +0800 Subject: [PATCH 3300/4122] RISC-V: defconfig: Enable CONFIG_SERIAL_8250_DW Add CONFIG_SERIAL_8250_DW=y, which is a necessary option for StarFive JH7110 and JH7100 SoCs to boot with serial ports. Reviewed-by: Conor Dooley Signed-off-by: Hal Feng Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221118011714.70877-9-hal.feng@starfivetech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index daba5d743862..74ed7037314f 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -123,6 +123,7 @@ CONFIG_MICROSEMI_PHY=y CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y From 0c2a04128f500ea4dfc6bc449507005b998b76ab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 2 Dec 2022 13:34:45 -0500 Subject: [PATCH 3301/4122] KVM: x86: remove unnecessary exports Several symbols are not used by vendor modules but still exported. Removing them ensures that new coupling between kvm.ko and kvm-*.ko is noticed and reviewed. Co-developed-by: Sean Christopherson Co-developed-by: Like Xu Signed-off-by: Sean Christopherson Signed-off-by: Like Xu Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 - arch/x86/kvm/irq.c | 2 -- arch/x86/kvm/lapic.c | 3 --- arch/x86/kvm/x86.c | 8 -------- 4 files changed, 14 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2c7f2a26421e..cc3e8c7d0850 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -898,7 +898,6 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) return false; return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; } -EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index d8d50558f165..a70952eca905 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -31,7 +31,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return r; } -EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* * check if there is a pending userspace external interrupt @@ -150,7 +149,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) if (kvm_xen_timer_enabled(vcpu)) kvm_xen_inject_timer_irqs(vcpu); } -EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8224ac8b617a..4efdb4a4d72c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -160,7 +160,6 @@ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) && !(kvm_mwait_in_guest(vcpu->kvm) || kvm_can_post_timer_interrupt(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { @@ -1914,7 +1913,6 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) return vcpu->arch.apic->lapic_timer.hv_timer_in_use; } -EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { @@ -2432,7 +2430,6 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) apic->isr_count = count_vectors(apic->regs + APIC_ISR); } } -EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 152ea4993b76..4825773886f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -463,7 +463,6 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { return vcpu->arch.apic_base; } -EXPORT_SYMBOL_GPL(kvm_get_apic_base); enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) { @@ -491,7 +490,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_recalculate_apic_map(vcpu->kvm); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_apic_base); /* * Handle a fault on a hardware virtualization (VMX or SVM) instruction. @@ -782,7 +780,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code, fault->address); } -EXPORT_SYMBOL_GPL(kvm_inject_page_fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) @@ -811,7 +808,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu) atomic_inc(&vcpu->arch.nmi_queued); kvm_make_request(KVM_REQ_NMI, vcpu); } -EXPORT_SYMBOL_GPL(kvm_inject_nmi); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { @@ -836,7 +832,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return false; } -EXPORT_SYMBOL_GPL(kvm_require_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) { @@ -2069,7 +2064,6 @@ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_as_nop); int kvm_emulate_invd(struct kvm_vcpu *vcpu) { @@ -2515,7 +2509,6 @@ u64 kvm_scale_tsc(u64 tsc, u64 ratio) return _tsc; } -EXPORT_SYMBOL_GPL(kvm_scale_tsc); static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { @@ -12068,7 +12061,6 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { From 74bee0cad8dcd8ddec5e763c369239fc5990676a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 7 Oct 2022 15:16:44 -0700 Subject: [PATCH 3302/4122] KVM: x86: Advertise that the SMM_CTL MSR is not supported CPUID.80000021H:EAX[bit 9] indicates that the SMM_CTL MSR (0xc0010116) is not supported. This defeature can be advertised by KVM_GET_SUPPORTED_CPUID regardless of whether or not the host enumerates it; currently it will be included only if the host enumerates at least leaf 8000001DH, due to a preexisting bug in QEMU that KVM has to work around (commit f751d8eac176, "KVM: x86: work around QEMU issue with synthetic CPUID leaves", 2022-04-29). Signed-off-by: Jim Mattson Message-Id: <20221007221644.138355-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 723502181a3a..0b5bf013fcb8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1233,8 +1233,12 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * Other defined bits are for MSRs that KVM does not expose: * EAX 3 SPCL, SMM page configuration lock * EAX 13 PCMSR, Prefetch control MSR + * + * KVM doesn't support SMM_CTL. + * EAX 9 SMM_CTL MSR is not supported */ entry->eax &= BIT(0) | BIT(2) | BIT(6); + entry->eax |= BIT(9); if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)) entry->eax |= BIT(2); if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) From d33deda095d3637d218e7eed441633b2a01e1413 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 24 Oct 2022 09:47:24 +0000 Subject: [PATCH 3303/4122] riscv/mm: hugepage's PG_dcache_clean flag is only set in head page HugeTLB pages are always fully mapped, so only setting head page's PG_dcache_clean flag is enough. Signed-off-by: Tong Tiangen Link: https://lore.kernel.org/lkml/20220331065640.5777-2-songmuchun@bytedance.com/ Link: https://lore.kernel.org/r/20221024094725.3054311-2-tongtiangen@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 7 +++++++ arch/riscv/mm/cacheflush.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8a5c246b0a21..c172d05de474 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -17,6 +17,13 @@ static inline void local_flush_icache_all(void) static inline void flush_dcache_page(struct page *page) { + /* + * HugeTLB pages are always fully mapped and only head page will be + * set PG_dcache_clean (see comments in flush_icache_pte()). + */ + if (PageHuge(page)) + page = compound_head(page); + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); } diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 6cb7d96ad9c7..062559c04fc3 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -82,6 +82,13 @@ void flush_icache_pte(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) flush_icache_all(); } From d8bf77a1dc3079692f54be3087a5fd16d90027b0 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 24 Oct 2022 09:47:25 +0000 Subject: [PATCH 3304/4122] riscv/mm: add arch hook arch_clear_hugepage_flags With the PG_arch_1 we keep track if the page's data cache is clean, architecture rely on this property to treat new pages as dirty with respect to the data cache and perform the flushing before mapping the pages into userspace. This patch adds a new architecture hook, arch_clear_hugepage_flags,so that architectures which rely on the page flags being in a particular state for fresh allocations can adjust the flags accordingly when a page is freed into the pool. Fixes: 9e953cda5cdf ("riscv: Introduce huge page support for 32/64bit kernel") Signed-off-by: Tong Tiangen Link: https://lore.kernel.org/r/20221024094725.3054311-3-tongtiangen@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index a5c2ca1d1cd8..ec19d6afc896 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,4 +5,10 @@ #include #include +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} +#define arch_clear_hugepage_flags arch_clear_hugepage_flags + #endif /* _ASM_RISCV_HUGETLB_H */ From b57c2f124098459a4acc15d5044f87cba31c87f0 Mon Sep 17 00:00:00 2001 From: Binglei Wang Date: Tue, 25 Oct 2022 16:18:32 +0100 Subject: [PATCH 3305/4122] riscv: add riscv rethook implementation Implement the kretprobes on riscv arch by using rethook machenism which abstracts general kretprobe info into a struct rethook_node to be embedded in the struct kretprobe_instance. Acked-by: Masami Hiramatsu (Google) Signed-off-by: Binglei Wang Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221025151831.1097417-1-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/kprobes.h | 2 -- arch/riscv/kernel/probes/Makefile | 2 +- arch/riscv/kernel/probes/kprobes.c | 13 --------- arch/riscv/kernel/probes/rethook.c | 27 +++++++++++++++++++ arch/riscv/kernel/probes/rethook.h | 8 ++++++ ...obes_trampoline.S => rethook_trampoline.S} | 6 ++--- 7 files changed, 40 insertions(+), 19 deletions(-) create mode 100644 arch/riscv/kernel/probes/rethook.c create mode 100644 arch/riscv/kernel/probes/rethook.h rename arch/riscv/kernel/probes/{kprobes_trampoline.S => rethook_trampoline.S} (94%) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8d47562be90c..ef8d66de5f38 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -101,6 +101,7 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index 217ef89f22b9..e7882ccb0fd4 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,8 +40,6 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); -void __kretprobe_trampoline(void); -void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 7f0840dcc31b..c40139e9ca47 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o -obj-$(CONFIG_KPROBES) += kprobes_trampoline.o +obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index e6e950b7cf32..f21592d20306 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -345,19 +345,6 @@ int __init arch_populate_kprobe_blacklist(void) return ret; } -void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) -{ - return (void *)kretprobe_trampoline_handler(regs, NULL); -} - -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->ra; - ri->fp = NULL; - regs->ra = (unsigned long) &__kretprobe_trampoline; -} - int __kprobes arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/riscv/kernel/probes/rethook.c b/arch/riscv/kernel/probes/rethook.c new file mode 100644 index 000000000000..5c27c1f50989 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic return hook for riscv. + */ + +#include +#include +#include "rethook.h" + +/* This is called from arch_rethook_trampoline() */ +unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->s0); +} + +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount) +{ + rhn->ret_addr = regs->ra; + rhn->frame = regs->s0; + + /* replace return addr with trampoline */ + regs->ra = (unsigned long)arch_rethook_trampoline; +} + +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/riscv/kernel/probes/rethook.h b/arch/riscv/kernel/probes/rethook.h new file mode 100644 index 000000000000..4758f7e3ce88 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __RISCV_RETHOOK_H +#define __RISCV_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount); + +#endif diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S similarity index 94% rename from arch/riscv/kernel/probes/kprobes_trampoline.S rename to arch/riscv/kernel/probes/rethook_trampoline.S index 7bdb09ded39b..21bac92a170a 100644 --- a/arch/riscv/kernel/probes/kprobes_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,13 +75,13 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(__kretprobe_trampoline) +ENTRY(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs move a0, sp /* pt_regs */ - call trampoline_probe_handler + call arch_rethook_trampoline_callback /* use the result as the return-address */ move ra, a0 @@ -90,4 +90,4 @@ ENTRY(__kretprobe_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(__kretprobe_trampoline) +ENDPROC(arch_rethook_trampoline) From de92f65719cd672f4b48397540b9f9eff67eca40 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 12:59:11 -0800 Subject: [PATCH 3306/4122] exit: Allow oops_limit to be disabled In preparation for keeping oops_limit logic in sync with warn_limit, have oops_limit == 0 disable checking the Oops counter. Cc: Jann Horn Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: "Eric W. Biederman" Cc: Arnd Bergmann Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook --- Documentation/admin-guide/sysctl/kernel.rst | 5 +++-- kernel/exit.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 09f3fb2f8585..a31d8d81ea07 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -671,8 +671,9 @@ oops_limit ========== Number of kernel oopses after which the kernel should panic when -``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect -as setting ``panic_on_oops=1``. +``panic_on_oops`` is not set. Setting this to 0 disables checking +the count. Setting this to 1 has the same effect as setting +``panic_on_oops=1``. The default value is 10000. osrelease, ostype & version diff --git a/kernel/exit.c b/kernel/exit.c index dc1a32149f94..deffb8e4b1b2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -954,7 +954,7 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); /* From 79cc1ba7badf9e7a12af99695a557e9ce27ee967 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:24 -0800 Subject: [PATCH 3307/4122] panic: Consolidate open-coded panic_on_warn checks Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll their own warnings, and each check "panic_on_warn". Consolidate this into a single function so that future instrumentation can be added in a single location. Cc: Marco Elver Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Vincenzo Frascino Cc: Andrew Morton Cc: David Gow Cc: tangmeng Cc: Jann Horn Cc: Shuah Khan Cc: Petr Mladek Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org --- include/linux/panic.h | 1 + kernel/kcsan/report.c | 3 +-- kernel/panic.c | 9 +++++++-- kernel/sched/core.c | 3 +-- lib/ubsan.c | 3 +-- mm/kasan/report.c | 4 ++-- mm/kfence/report.c | 3 +-- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index c7759b3f2045..979b776e3bcb 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -11,6 +11,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); +void check_panic_on_warn(const char *origin); extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 67794404042a..e95ce7d7a76e 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -492,8 +492,7 @@ static void print_report(enum kcsan_value_change value_change, dump_stack_print_info(KERN_DEFAULT); pr_err("==================================================================\n"); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("KCSAN"); } static void release_report(unsigned long *flags, struct other_info *other_info) diff --git a/kernel/panic.c b/kernel/panic.c index d843d036651e..cfa354322d5f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -201,6 +201,12 @@ static void panic_print_sys_info(bool console_flush) ftrace_dump(DUMP_ALL); } +void check_panic_on_warn(const char *origin) +{ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); +} + /** * panic - halt the system * @fmt: The text string to print @@ -619,8 +625,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (regs) show_regs(regs); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("kernel"); if (!regs) dump_stack(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5800b0623ff3..285ef8821b4f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5729,8 +5729,7 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_err("Preemption disabled at:"); print_ip_sym(KERN_ERR, preempt_disable_ip); } - if (panic_on_warn) - panic("scheduling while atomic\n"); + check_panic_on_warn("scheduling while atomic"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/lib/ubsan.c b/lib/ubsan.c index 36bd75e33426..60c7099857a0 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -154,8 +154,7 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("UBSAN"); } void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index df3602062bfd..cc98dfdd3ed2 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -164,8 +164,8 @@ static void end_report(unsigned long *flags, void *addr) (unsigned long)addr); pr_err("==================================================================\n"); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) - panic("panic_on_warn set ...\n"); + if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + check_panic_on_warn("KASAN"); if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) panic("kasan.fault=panic set ...\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 7e496856c2eb..110c27ca597d 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -268,8 +268,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r lockdep_on(); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("KFENCE"); /* We encountered a memory safety error, taint the kernel! */ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); From 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:25 -0800 Subject: [PATCH 3308/4122] panic: Introduce warn_limit Like oops_limit, add warn_limit for limiting the number of warnings when panic_on_warn is not set. Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: Petr Mladek Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org --- Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++ kernel/panic.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a31d8d81ea07..179bd303b585 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1509,6 +1509,16 @@ entry will default to 2 instead of 0. 2 Unprivileged calls to ``bpf()`` are disabled = ============================================================= + +warn_limit +========== + +Number of kernel warnings after which the kernel should panic when +``panic_on_warn`` is not set. Setting this to 0 disables checking +the warning count. Setting this to 1 has the same effect as setting +``panic_on_warn=1``. The default value is 0. + + watchdog ======== diff --git a/kernel/panic.c b/kernel/panic.c index cfa354322d5f..f4403fc14f67 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -58,6 +58,7 @@ bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; +static unsigned int warn_limit __read_mostly; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -88,6 +89,13 @@ static struct ctl_table kern_panic_table[] = { .extra2 = SYSCTL_ONE, }, #endif + { + .procname = "warn_limit", + .data = &warn_limit, + .maxlen = sizeof(warn_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, { } }; @@ -203,8 +211,14 @@ static void panic_print_sys_info(bool console_flush) void check_panic_on_warn(const char *origin) { + static atomic_t warn_count = ATOMIC_INIT(0); + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); + + if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", + origin, warn_limit); } /** From 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:26 -0800 Subject: [PATCH 3309/4122] panic: Expose "warn_count" to sysfs Since Warn count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/warn_count to expose it to userspace. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org --- .../ABI/testing/sysfs-kernel-warn_count | 6 +++++ MAINTAINERS | 1 + kernel/panic.c | 22 +++++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count new file mode 100644 index 000000000000..08f083d2fd51 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Warned since last boot. diff --git a/MAINTAINERS b/MAINTAINERS index 0a1e95a58e54..282cd8a513fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11107,6 +11107,7 @@ L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/ABI/testing/sysfs-kernel-oops_count +F: Documentation/ABI/testing/sysfs-kernel-warn_count F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: mm/usercopy.c diff --git a/kernel/panic.c b/kernel/panic.c index f4403fc14f67..54deb743b2d5 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,25 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +static atomic_t warn_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); +} + +static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); + +static __init int kernel_panic_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_panic_sysfs_init); +#endif + static long no_blink(int state) { return 0; @@ -211,8 +231,6 @@ static void panic_print_sys_info(bool console_flush) void check_panic_on_warn(const char *origin) { - static atomic_t warn_count = ATOMIC_INIT(0); - if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); From 5abf698754b8e5e4f1ca1058ee2b9785fbce6d23 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 28 Nov 2022 02:44:03 -0800 Subject: [PATCH 3310/4122] lib: fortify_kunit: build without structleak plugin Building allmodconfig with aarch64-linux-gnu-gcc (Debian 11.3.0-6), fortify_kunit with strucleak plugin enabled makes the stack frame size to grow too large: lib/fortify_kunit.c:140:1: error: the frame size of 2368 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] Turn off the structleak plugin checks for fortify_kunit. Suggested-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Kees Cook --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Makefile b/lib/Makefile index 2f0454b931dc..83c650bb4459 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -379,6 +379,7 @@ obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced) +CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o From 3a017d6355f24de42f2ad688df9fa19e0cb128f2 Mon Sep 17 00:00:00 2001 From: "haifeng.xu" Date: Mon, 28 Nov 2022 06:56:06 +0000 Subject: [PATCH 3311/4122] signal: Initialize the info in ksignal When handing the SIGNAL_GROUP_EXIT flag, the info in ksignal isn't cleared. However, the info acquired by dequeue_synchronous_signal/dequeue_signal is initialized and can be safely used. Fortunately, the fatal signal process just uses the si_signo and doesn't use any other member. Even so, the initialization before use is more safer. Signed-off-by: haifeng.xu Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221128065606.19570-1-haifeng.xu@shopee.com --- kernel/signal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/signal.c b/kernel/signal.c index d140672185a4..b9b0c8c620e7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2693,6 +2693,7 @@ relock: /* Has this task already been marked for death? */ if ((signal->flags & SIGNAL_GROUP_EXIT) || signal->group_exec_task) { + clear_siginfo(&ksig->info); ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, From bdc77507fecd00ddad2f502f86a48a9ec38f0f84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Dec 2022 16:23:25 -0800 Subject: [PATCH 3312/4122] um: virt-pci: Avoid GCC non-NULL warning GCC gets confused about the return value of get_cpu_var() possibly being NULL, so explicitly test for it before calls to memcpy() and memset(). Avoids warnings like this: arch/um/drivers/virt-pci.c: In function 'um_pci_send_cmd': include/linux/fortify-string.h:48:33: warning: argument 1 null where non-null expected [-Wnonnull] 48 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:438:9: note: in expansion of macro '__underlying_memcpy' 438 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:483:26: note: in expansion of macro '__fortify_memcpy_chk' 483 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ arch/um/drivers/virt-pci.c:100:9: note: in expansion of macro 'memcpy' 100 | memcpy(buf, cmd, cmd_size); | ^~~~~~ While at it, avoid literal "8" and use stored sizeof(buf->data) in memset() and um_pci_send_cmd(). Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211271212.SUZSC9f9-lkp@intel.com Fixes: ba38961a069b ("um: Enable FORTIFY_SOURCE") Cc: Richard Weinberger Cc: Anton Ivanov Cc: Johannes Berg Cc: "Michael S. Tsirkin" Cc: Al Viro Cc: Xiu Jianfeng Cc: Vincent Whitchurch Cc: linux-um@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- arch/um/drivers/virt-pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index acb55b302b14..3ac220dafec4 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -97,7 +97,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } buf = get_cpu_var(um_pci_msg_bufs); - memcpy(buf, cmd, cmd_size); + if (buf) + memcpy(buf, cmd, cmd_size); if (posted) { u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); @@ -182,6 +183,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, struct um_pci_message_buffer *buf; u8 *data; unsigned long ret = ULONG_MAX; + size_t bytes = sizeof(buf->data); if (!dev) return ULONG_MAX; @@ -189,7 +191,8 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, buf = get_cpu_var(um_pci_msg_bufs); data = buf->data; - memset(buf->data, 0xff, sizeof(buf->data)); + if (buf) + memset(data, 0xff, bytes); switch (size) { case 1: @@ -204,7 +207,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, goto out; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8)) + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) goto out; switch (size) { From d662198e03bc7fb4635156ee7e8b8d325e2d8512 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 19:42:55 -0800 Subject: [PATCH 3313/4122] hpet: Replace one-element array with flexible-array member One-element arrays are deprecated[1] and are being replaced with flexible array members in support of the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy(), correctly instrument array indexing with UBSAN_BOUNDS, and to globally enable -fstrict-flex-arrays=3. Replace one-element array with flexible-array member in struct hpet. This results in no differences in binary output. The use of struct hpet is never used with sizeof() and accesses via hpet_timers array are already done after explicit bounds checking. [1] https://github.com/KSPP/linux/issues/79 Cc: Clemens Ladisch Cc: "Gustavo A. R. Silva" Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20221118034250.never.999-kees@kernel.org --- include/linux/hpet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 8604564b985d..21e69eaf7a36 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -30,7 +30,7 @@ struct hpet { unsigned long _hpet_compare; } _u1; u64 hpet_fsb[2]; /* FSB route */ - } hpet_timers[1]; + } hpet_timers[]; }; #define hpet_mc _u0._hpet_mc From d272e01fa0a2f15c5c331a37cd99c6875c7b7186 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Nov 2022 09:35:10 -0600 Subject: [PATCH 3314/4122] ksmbd: replace one-element arrays with flexible-array members One-element arrays are deprecated, and we are replacing them with flexible array members instead. So, replace one-element arrays with flexible-array members in multiple structs in fs/ksmbd/smb_common.h and one in fs/ksmbd/smb2pdu.h. Important to mention is that doing a build before/after this patch results in no binary output differences. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/242 Link: https://github.com/KSPP/linux/issues/79 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Sergey Senozhatsky Acked-by: Namjae Jeon Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/Y3OxronfaPYv9qGP@work --- fs/ksmbd/smb2pdu.c | 4 ++-- fs/ksmbd/smb2pdu.h | 2 +- fs/ksmbd/smb_common.h | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index b2fc85d440d0..31f00cec5255 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -3438,7 +3438,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, goto free_conv_name; } - struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len; + struct_sz = readdir_info_level_struct_sz(info_level) + conv_len; next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); d_info->last_entry_off_align = next_entry_offset - struct_sz; @@ -3690,7 +3690,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info, return -EOPNOTSUPP; conv_len = (d_info->name_len + 1) * 2; - next_entry_offset = ALIGN(struct_sz - 1 + conv_len, + next_entry_offset = ALIGN(struct_sz + conv_len, KSMBD_DIR_INFO_ALIGNMENT); if (next_entry_offset > d_info->out_buf_len) { diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index 092fdd3f8750..aa5dbe54f5a1 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -443,7 +443,7 @@ struct smb2_posix_info { /* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */ u8 SidBuffer[32]; __le32 name_len; - u8 name[1]; + u8 name[]; /* * var sized owner SID * var sized group SID diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index 318c16fa81da..e663ab9ea759 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -277,14 +277,14 @@ struct file_directory_info { __le64 AllocationSize; __le32 ExtFileAttributes; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __packed; /* level 0x101 FF resp data */ struct file_names_info { __le32 NextEntryOffset; __u32 FileIndex; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __packed; /* level 0xc FF resp data */ struct file_full_directory_info { @@ -299,7 +299,7 @@ struct file_full_directory_info { __le32 ExtFileAttributes; __le32 FileNameLength; __le32 EaSize; - char FileName[1]; + char FileName[]; } __packed; /* level 0x102 FF resp */ struct file_both_directory_info { @@ -317,7 +317,7 @@ struct file_both_directory_info { __u8 ShortNameLength; __u8 Reserved; __u8 ShortName[24]; - char FileName[1]; + char FileName[]; } __packed; /* level 0x104 FFrsp data */ struct file_id_both_directory_info { @@ -337,7 +337,7 @@ struct file_id_both_directory_info { __u8 ShortName[24]; __le16 Reserved2; __le64 UniqueId; - char FileName[1]; + char FileName[]; } __packed; struct file_id_full_dir_info { @@ -354,7 +354,7 @@ struct file_id_full_dir_info { __le32 EaSize; /* EA size */ __le32 Reserved; __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ - char FileName[1]; + char FileName[]; } __packed; /* level 0x105 FF rsp data */ struct smb_version_values { From 649d6b1019a2f243bc3a98cb85902a8ebf74289a Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 26 Oct 2022 22:42:07 +0800 Subject: [PATCH 3315/4122] RISC-V: Add arch_crash_save_vmcoreinfo support Add arch_crash_save_vmcoreinfo(), which exports VM layout(MODULES, VMALLOC, VMEMMAP ranges and KERNEL_LINK_ADDR), va bits and ram base for vmcore. Default pagetable levels and PAGE_OFFSET aren't same for different kernel version as below. For pagetable levels, it sets sv57 by default and falls back to setting sv48 at boot time if sv57 is not supported by the hardware. For ram base, the default value is 0x80200000 for qemu riscv64 env and, for example, is 0x200000 on the XuanTie 910 CPU. * Linux Kernel 5.18 ~ * PGTABLE_LEVELS = 5 * PAGE_OFFSET = 0xff60000000000000 * Linux Kernel 5.17 ~ * PGTABLE_LEVELS = 4 * PAGE_OFFSET = 0xffffaf8000000000 * Linux Kernel 4.19 ~ * PGTABLE_LEVELS = 3 * PAGE_OFFSET = 0xffffffe000000000 Since these configurations change from time to time and version to version, it is preferable to export them via vmcoreinfo than to change the crash's code frequently, it can simplify the development of crash tool. Signed-off-by: Xianting Tian Tested-by: Deepak Gupta Tested-by: Guo Ren Acked-by: Baoquan He Link: https://lore.kernel.org/r/20221026144208.373504-2-xianting.tian@linux.alibaba.com [Palmer: wrap commit text] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/crash_core.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 arch/riscv/kernel/crash_core.c diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index db6e4b1294ba..4cf303a779ab 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c new file mode 100644 index 000000000000..b351a3c01355 --- /dev/null +++ b/arch/riscv/kernel/crash_core.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + VMCOREINFO_NUMBER(phys_ram_base); + + vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); +#ifdef CONFIG_64BIT + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif + vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +} From c5b4216929ebc8ac9107a373db65babc14ba4e80 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 26 Oct 2022 22:42:08 +0800 Subject: [PATCH 3316/4122] Documentation: kdump: describe VMCOREINFO export for RISCV64 The following interrelated definitions and ranges are needed by the kdump crash tool, which are exported by "arch/riscv/kernel/crash_core.c": VA_BITS, PAGE_OFFSET, phys_ram_base, KERNEL_LINK_ADDR, MODULES_VADDR ~ MODULES_END, VMALLOC_START ~ VMALLOC_END, VMEMMAP_START ~ VMEMMAP_END, Document these RISCV64 exports above. Reviewed-by: Bagas Sanjaya Signed-off-by: Xianting Tian Acked-by: Baoquan He Link: https://lore.kernel.org/r/20221026144208.373504-3-xianting.tian@linux.alibaba.com [Palmer: wrap commit text] Signed-off-by: Palmer Dabbelt --- .../admin-guide/kdump/vmcoreinfo.rst | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 6726f439958c..86fd88492870 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -595,3 +595,32 @@ X2TLB ----- Indicates whether the crashed kernel enabled SH extended mode. + +RISCV64 +======= + +VA_BITS +------- + +The maximum number of bits for virtual addresses. Used to compute the +virtual memory ranges. + +PAGE_OFFSET +----------- + +Indicates the virtual kernel start address of the direct-mapped RAM region. + +phys_ram_base +------------- + +Indicates the start physical RAM address. + +MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END|KERNEL_LINK_ADDR +---------------------------------------------------------------------------------------------- + +Used to get the correct ranges: + + * MODULES_VADDR ~ MODULES_END : Kernel module space. + * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space. + * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array. + * KERNEL_LINK_ADDR : start address of Kernel link and BPF From 54c03bfd094fb74f9533a9c28250219afe182382 Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Mon, 28 Nov 2022 22:27:40 +0800 Subject: [PATCH 3317/4122] power: supply: Fix refcount leak in rk817_charger_probe of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817") Signed-off-by: Qiheng Lin Reviewed-by: Chris Morgan Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 635f051b0821..d25a81d79fac 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -1060,8 +1060,10 @@ static int rk817_charger_probe(struct platform_device *pdev) return -ENODEV; charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL); - if (!charger) + if (!charger) { + of_node_put(node); return -ENOMEM; + } charger->rk808 = rk808; From a7aaa80098d5b7608b2dc1e883e3c3f929415243 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 28 Nov 2022 10:28:48 +0100 Subject: [PATCH 3318/4122] power: supply: bq25890: Ensure pump_express_work is cancelled on remove The pump_express_work which gets queued from an external_power_changed callback might be pending / running on remove() (or on probe failure). Add a devm action cancelling the work, to ensure that it is cancelled. Note the devm action is added before devm_power_supply_register(), making it run after devm unregisters the power_supply, so that the work cannot be queued anymore (this is also why a devm action is used for this). Fixes: 48f45b094dbb ("power: supply: bq25890: Support higher charging voltages through Pump Express+ protocol") Reviewed-by: Marek Vasut Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 512c81662eea..866c475bb735 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1317,6 +1317,13 @@ static int bq25890_fw_probe(struct bq25890_device *bq) return 0; } +static void bq25890_non_devm_cleanup(void *data) +{ + struct bq25890_device *bq = data; + + cancel_delayed_work_sync(&bq->pump_express_work); +} + static int bq25890_probe(struct i2c_client *client) { struct device *dev = &client->dev; @@ -1372,6 +1379,14 @@ static int bq25890_probe(struct i2c_client *client) /* OTG reporting */ bq->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); + /* + * This must be before bq25890_power_supply_init(), so that it runs + * after devm unregisters the power_supply. + */ + ret = devm_add_action_or_reset(dev, bq25890_non_devm_cleanup, bq); + if (ret) + return ret; + ret = bq25890_register_regulator(bq); if (ret) return ret; From 7e6fb67808ab5ceba73a6f45d0942e1e25ac56a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 28 Nov 2022 10:28:49 +0100 Subject: [PATCH 3319/4122] power: supply: bq25890: Fix usb-notifier probe and remove races There are 2 races surrounding the usb-notifier: 1. The notifier, and thus usb_work, may run before the bq->charger power_supply class device is registered. But usb_work may call power_supply_changed() which relies on the psy device being registered. 2. usb_work may be pending/running at remove() time, so it needs to be cancelled on remove after unregistering the usb-notifier. Fix 1. by moving usb-notifier registration to after the power_supply registration. Fix 2. by adding a cancel_work_sync() call directly after the usb-notifier unregistration. Reviewed-by: Marek Vasut Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 30 +++++++++++--------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 866c475bb735..2d731ea58323 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -1391,40 +1391,34 @@ static int bq25890_probe(struct i2c_client *client) if (ret) return ret; - if (!IS_ERR_OR_NULL(bq->usb_phy)) { - INIT_WORK(&bq->usb_work, bq25890_usb_work); - bq->usb_nb.notifier_call = bq25890_usb_notifier; - usb_register_notifier(bq->usb_phy, &bq->usb_nb); - } - ret = bq25890_power_supply_init(bq); - if (ret < 0) { - dev_err(dev, "Failed to register power supply\n"); - goto err_unregister_usb_notifier; - } + if (ret < 0) + return dev_err_probe(dev, ret, "registering power supply\n"); ret = devm_request_threaded_irq(dev, client->irq, NULL, bq25890_irq_handler_thread, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, BQ25890_IRQ_PIN, bq); if (ret) - goto err_unregister_usb_notifier; + return ret; + + if (!IS_ERR_OR_NULL(bq->usb_phy)) { + INIT_WORK(&bq->usb_work, bq25890_usb_work); + bq->usb_nb.notifier_call = bq25890_usb_notifier; + usb_register_notifier(bq->usb_phy, &bq->usb_nb); + } return 0; - -err_unregister_usb_notifier: - if (!IS_ERR_OR_NULL(bq->usb_phy)) - usb_unregister_notifier(bq->usb_phy, &bq->usb_nb); - - return ret; } static void bq25890_remove(struct i2c_client *client) { struct bq25890_device *bq = i2c_get_clientdata(client); - if (!IS_ERR_OR_NULL(bq->usb_phy)) + if (!IS_ERR_OR_NULL(bq->usb_phy)) { usb_unregister_notifier(bq->usb_phy, &bq->usb_nb); + cancel_work_sync(&bq->usb_work); + } if (!bq->skip_reset) { /* reset all registers to default values */ From 5de7cdd7fa0f62b3e8d2facc8f604e49d887677e Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sat, 12 Nov 2022 15:24:46 +0000 Subject: [PATCH 3320/4122] extcon: max77843: Replace irqchip mask_invert with unmask_base Remove use of the deprecated mask_invert flag. Inverted mask registers (where a '1' bit enables an IRQ) can be described more directly as an unmask register. Signed-off-by: Aidan MacDonald Reviewed-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-max77843.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c index 8e6e97ec65a8..1bc0426ce3f1 100644 --- a/drivers/extcon/extcon-max77843.c +++ b/drivers/extcon/extcon-max77843.c @@ -189,8 +189,7 @@ static const struct regmap_irq max77843_muic_irq[] = { static const struct regmap_irq_chip max77843_muic_irq_chip = { .name = "max77843-muic", .status_base = MAX77843_MUIC_REG_INT1, - .mask_base = MAX77843_MUIC_REG_INTMASK1, - .mask_invert = true, + .unmask_base = MAX77843_MUIC_REG_INTMASK1, .num_regs = 3, .irqs = max77843_muic_irq, .num_irqs = ARRAY_SIZE(max77843_muic_irq), From df9c4faa81c9659eefc9e149ae9b2124de17dfa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:42 +0100 Subject: [PATCH 3321/4122] extcon: fsa9480: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-fsa9480.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/extcon/extcon-fsa9480.c b/drivers/extcon/extcon-fsa9480.c index 7cff66c29907..e8b2671eb29b 100644 --- a/drivers/extcon/extcon-fsa9480.c +++ b/drivers/extcon/extcon-fsa9480.c @@ -257,8 +257,7 @@ static irqreturn_t fsa9480_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static int fsa9480_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int fsa9480_probe(struct i2c_client *client) { struct fsa9480_usbsw *info; int ret; @@ -370,7 +369,7 @@ static struct i2c_driver fsa9480_i2c_driver = { .pm = &fsa9480_pm_ops, .of_match_table = fsa9480_of_match, }, - .probe = fsa9480_probe, + .probe_new = fsa9480_probe, .id_table = fsa9480_id, }; From 881de30c28ac0725ab8fb9af905b568a849f8d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:43 +0100 Subject: [PATCH 3322/4122] extcon: rt8973: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-rt8973a.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/extcon/extcon-rt8973a.c b/drivers/extcon/extcon-rt8973a.c index e6e448f6ea2f..afc9b405d103 100644 --- a/drivers/extcon/extcon-rt8973a.c +++ b/drivers/extcon/extcon-rt8973a.c @@ -548,8 +548,7 @@ static void rt8973a_init_dev_type(struct rt8973a_muic_info *info) } } -static int rt8973a_muic_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static int rt8973a_muic_i2c_probe(struct i2c_client *i2c) { struct device_node *np = i2c->dev.of_node; struct rt8973a_muic_info *info; @@ -696,7 +695,7 @@ static struct i2c_driver rt8973a_muic_i2c_driver = { .pm = &rt8973a_muic_pm_ops, .of_match_table = rt8973a_dt_match, }, - .probe = rt8973a_muic_i2c_probe, + .probe_new = rt8973a_muic_i2c_probe, .remove = rt8973a_muic_i2c_remove, .id_table = rt8973a_i2c_id, }; From 5313121b22fd11db0d14f305c110168b8176efdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:35:44 +0100 Subject: [PATCH 3323/4122] extcon: usbc-tusb320: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-usbc-tusb320.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 2a120d8d3c27..396a026e0c90 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -421,8 +421,7 @@ static int tusb320_typec_probe(struct i2c_client *client, return 0; } -static int tusb320_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tusb320_probe(struct i2c_client *client) { struct tusb320_priv *priv; const void *match_data; @@ -495,7 +494,7 @@ static const struct of_device_id tusb320_extcon_dt_match[] = { MODULE_DEVICE_TABLE(of, tusb320_extcon_dt_match); static struct i2c_driver tusb320_extcon_driver = { - .probe = tusb320_probe, + .probe_new = tusb320_probe, .driver = { .name = "extcon-tusb320", .of_match_table = tusb320_extcon_dt_match, From 323a74fc20f53c0d0e13a16aee703a30d9751235 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 Dec 2022 13:19:40 -0800 Subject: [PATCH 3324/4122] RDMA: Disable IB HW for UML Disable all of drivers/infiniband/hw/ and rdmavt for UML builds until someone needs it and provides patches to support it. This prevents build errors in hw/qib/qib_wc_x86_64.c. Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Randy Dunlap Cc: Jason Gunthorpe Cc: Dennis Dalessandro Cc: Christoph Hellwig Cc: linux-rdma@vger.kernel.org Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Johannes Berg Cc: linux-um@lists.infradead.org Link: https://lore.kernel.org/r/20221202211940.29111-1-rdunlap@infradead.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index ccc874478f0b..a5827d11e934 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -78,6 +78,7 @@ config INFINIBAND_VIRT_DMA def_bool !HIGHMEM if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS +if !UML source "drivers/infiniband/hw/bnxt_re/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" @@ -95,6 +96,7 @@ source "drivers/infiniband/hw/qib/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +endif # !UML source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/sw/siw/Kconfig" endif From 725349f8ba1e78a146c6ff8f3ee5e2712e517106 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Fri, 2 Dec 2022 12:00:37 +0800 Subject: [PATCH 3325/4122] RDMA/hfi1: Fix error return code in parse_platform_config() In the previous iteration of the while loop, the "ret" may have been assigned a value of 0, so the error return code -EINVAL may have been incorrectly set to 0. To fix set valid return code before calling to goto. Fixes: 97167e813415 ("staging/rdma/hfi1: Tune for unknown channel if configuration file is absent") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1669953638-11747-1-git-send-email-wangyufen@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/firmware.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 1d77514ebbee..0c0cef5b1e0e 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1743,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd) if (!dd->platform_config.data) { dd_dev_err(dd, "%s: Missing config file\n", __func__); + ret = -EINVAL; goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1751,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd) ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { dd_dev_err(dd, "%s: Bad config file\n", __func__); + ret = -EINVAL; goto bail; } @@ -1774,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd) if (file_length > dd->platform_config.size) { dd_dev_info(dd, "%s:File claims to be larger than read size\n", __func__); + ret = -EINVAL; goto bail; } else if (file_length < dd->platform_config.size) { dd_dev_info(dd, @@ -1794,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd) dd_dev_err(dd, "%s: Failed validation at offset %ld\n", __func__, (ptr - (u32 *) dd->platform_config.data)); + ret = -EINVAL; goto bail; } @@ -1837,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd) __func__, table_type, (ptr - (u32 *) dd->platform_config.data)); + ret = -EINVAL; goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1856,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd) __func__, table_type, (ptr - (u32 *)dd->platform_config.data)); + ret = -EINVAL; goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = From ed461b30b22c8fa85c25189c14cb89f29595cd14 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Fri, 2 Dec 2022 12:00:38 +0800 Subject: [PATCH 3326/4122] RDMA/srp: Fix error return code in srp_parse_options() In the previous iteration of the while loop, the "ret" may have been assigned a value of 0, so the error return code -EINVAL may have been incorrectly set to 0. To fix set valid return code before calling to goto. Also investigate each case separately as Andy suggessted. Fixes: e711f968c49c ("IB/srp: replace custom implementation of hex2bin()") Fixes: 2a174df0c602 ("IB/srp: Use kstrtoull() instead of simple_strtoull()") Fixes: 19f313438c77 ("IB/srp: Add RDMA/CM support") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1669953638-11747-2-git-send-email-wangyufen@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 96 ++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1075c2ac8fe2..b4d6a4a5ae81 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3410,7 +3410,8 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_PKEY: - if (match_hex(args, &token)) { + ret = match_hex(args, &token); + if (ret) { pr_warn("bad P_Key parameter '%s'\n", p); goto out; } @@ -3470,7 +3471,8 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_MAX_SECT: - if (match_int(args, &token)) { + ret = match_int(args, &token); + if (ret) { pr_warn("bad max sect parameter '%s'\n", p); goto out; } @@ -3478,8 +3480,15 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_QUEUE_SIZE: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad queue_size parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->scsi_host->can_queue = token; @@ -3490,25 +3499,40 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_MAX_CMD_PER_LUN: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad max cmd_per_lun parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->scsi_host->cmd_per_lun = token; break; case SRP_OPT_TARGET_CAN_QUEUE: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad max target_can_queue parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->target_can_queue = token; break; case SRP_OPT_IO_CLASS: - if (match_hex(args, &token)) { + ret = match_hex(args, &token); + if (ret) { pr_warn("bad IO class parameter '%s'\n", p); goto out; } @@ -3517,6 +3541,7 @@ static int srp_parse_options(struct net *net, const char *buf, pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS); + ret = -EINVAL; goto out; } target->io_class = token; @@ -3539,16 +3564,24 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_CMD_SG_ENTRIES: - if (match_int(args, &token) || token < 1 || token > 255) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > 255) { pr_warn("bad max cmd_sg_entries parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->cmd_sg_cnt = token; break; case SRP_OPT_ALLOW_EXT_SG: - if (match_int(args, &token)) { + ret = match_int(args, &token); + if (ret) { pr_warn("bad allow_ext_sg parameter '%s'\n", p); goto out; } @@ -3556,43 +3589,77 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_SG_TABLESIZE: - if (match_int(args, &token) || token < 1 || - token > SG_MAX_SEGMENTS) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > SG_MAX_SEGMENTS) { pr_warn("bad max sg_tablesize parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->sg_tablesize = token; break; case SRP_OPT_COMP_VECTOR: - if (match_int(args, &token) || token < 0) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { pr_warn("bad comp_vector parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->comp_vector = token; break; case SRP_OPT_TL_RETRY_COUNT: - if (match_int(args, &token) || token < 2 || token > 7) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 2 || token > 7) { pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", p); + ret = -EINVAL; goto out; } target->tl_retry_count = token; break; case SRP_OPT_MAX_IT_IU_SIZE: - if (match_int(args, &token) || token < 0) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { pr_warn("bad maximum initiator to target IU size '%s'\n", p); + ret = -EINVAL; goto out; } target->max_it_iu_size = token; break; case SRP_OPT_CH_COUNT: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for channel count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad channel count %s\n", p); + ret = -EINVAL; goto out; } target->ch_count = token; @@ -3601,6 +3668,7 @@ static int srp_parse_options(struct net *net, const char *buf, default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); + ret = -EINVAL; goto out; } } From 6978837ce42f8bea85041fc08c854f4e28852b3e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Sat, 3 Dec 2022 11:37:14 +0800 Subject: [PATCH 3327/4122] RDMA/mlx5: no need to kfree NULL pointer Goto label 'free' where it will kfree the 'in' is not needed though it's safe to kfree NULL. Return err code directly to simplify the code. 1973 free: 1974 kfree(in); 1975 return err; Signed-off-by: Li Zhijian Link: https://lore.kernel.org/r/20221203033714.25870-1-lizhijian@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 410cc5fd2523..053fe946e45a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1929,10 +1929,8 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); in = kzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto free; - } + if (!in) + return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); From 1f5619ed881081be300db61da552ffae7163bb72 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 2 Dec 2022 13:30:51 -0800 Subject: [PATCH 3328/4122] xfs: Remove duplicated include in xfs_iomap.c ./fs/xfs/xfs_iomap.c: xfs_error.h is included more than once. ./fs/xfs/xfs_iomap.c: xfs_errortag.h is included more than once. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3337 Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 68436370927d..43f447199c08 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -27,8 +27,6 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_reflink.h" -#include "xfs_error.h" -#include "xfs_errortag.h" #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) From af5d74e32eb8e1b833f687047f0ffe3801d7229d Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Sat, 3 Dec 2022 14:10:56 +0800 Subject: [PATCH 3329/4122] m68k: use strscpy() to instead of strncpy() The implementation of strscpy() is more robust and safer. Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Greg Ungerer --- arch/m68k/kernel/setup_no.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index cb6def585851..37fb663559b4 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -90,8 +90,7 @@ void __init setup_arch(char **cmdline_p) config_BSP(&command_line[0], sizeof(command_line)); #if defined(CONFIG_BOOTPARAM) - strncpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line)); - command_line[sizeof(command_line) - 1] = 0; + strscpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line)); #endif /* CONFIG_BOOTPARAM */ process_uboot_commandline(&command_line[0], sizeof(command_line)); From af45de8368883c9620a7735a8c2532e52101c1a2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:01 +0100 Subject: [PATCH 3330/4122] dt-bindings: qcom: geni-se: document I2C Master Hub wrapper variant The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Document the variant compatible, forbid UART and SPI sub-nodes, and remove requirement for the Master AHB clock and iommu property. Signed-off-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- .../bindings/soc/qcom/qcom,geni-se.yaml | 44 ++++++++++++++++--- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml index 2bf5293fc995..ab4df0205285 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml @@ -21,20 +21,19 @@ properties: compatible: enum: - qcom,geni-se-qup + - qcom,geni-se-i2c-master-hub reg: description: QUP wrapper common register address and length. maxItems: 1 clock-names: - items: - - const: m-ahb - - const: s-ahb + minItems: 1 + maxItems: 2 clocks: - items: - - description: Master AHB Clock - - description: Slave AHB Clock + minItems: 1 + maxItems: 2 "#address-cells": const: 2 @@ -81,6 +80,39 @@ patternProperties: description: GENI Serial Engine based UART Controller. $ref: /schemas/serial/qcom,serial-geni-qcom.yaml# +allOf: + - if: + properties: + compatible: + contains: + const: qcom,geni-se-i2c-master-hub + then: + properties: + clock-names: + items: + - const: s-ahb + + clocks: + items: + - description: Slave AHB Clock + + iommus: false + + patternProperties: + "spi@[0-9a-f]+$": false + "serial@[0-9a-f]+$": false + else: + properties: + clock-names: + items: + - const: m-ahb + - const: s-ahb + + clocks: + items: + - description: Master AHB Clock + - description: Slave AHB Clock + additionalProperties: false examples: From cb29d4e6a9ef004eed24774bc26839aa9e373ba4 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:02 +0100 Subject: [PATCH 3331/4122] dt-bindings: i2c: qcom-geni: document I2C Master Hub serial I2C engine The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Document the I2C Serial Engine variant used within the I2C Master Hub Wrapper. This serial engine variant lacks DMA support, requires a core clock, and since DMA support is lacking the memory interconnect path isn't needed. Signed-off-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- .../bindings/i2c/qcom,i2c-geni-qcom.yaml | 64 ++++++++++++++++--- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml index 0e7ed00562e2..f5f7dc8f325c 100644 --- a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml +++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml @@ -10,18 +10,19 @@ maintainers: - Andy Gross - Bjorn Andersson -allOf: - - $ref: /schemas/i2c/i2c-controller.yaml# - properties: compatible: - const: qcom,geni-i2c + enum: + - qcom,geni-i2c + - qcom,geni-i2c-master-hub clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 clock-names: - const: se + minItems: 1 + maxItems: 2 clock-frequency: default: 100000 @@ -35,13 +36,12 @@ properties: - const: rx interconnects: + minItems: 2 maxItems: 3 interconnect-names: - items: - - const: qup-core - - const: qup-config - - const: qup-memory + minItems: 2 + maxItems: 3 interrupts: maxItems: 1 @@ -71,6 +71,50 @@ required: - clock-names - reg +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + - if: + properties: + compatible: + contains: + const: qcom,geni-i2c-master-hub + then: + properties: + clocks: + minItems: 2 + + clock-names: + items: + - const: se + - const: core + + dmas: false + dma-names: false + + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: qup-core + - const: qup-config + else: + properties: + clocks: + maxItems: 1 + + clock-names: + const: se + + interconnects: + minItems: 3 + + interconnect-names: + items: + - const: qup-core + - const: qup-config + - const: qup-memory + unevaluatedProperties: false examples: From 63fc9af83c11e02ea9c981d3bd0382e36f49916f Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:03 +0100 Subject: [PATCH 3332/4122] soc: qcom: geni-se: add desc struct to specify clocks from device match data The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Prepare support for the I2C Master Hub variant by moving the required clocks list to a new desc struct then passing it through the compatible match data. Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Wolfram Sang --- drivers/soc/qcom/qcom-geni-se.c | 69 ++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index a0ceeede450f..9ddee9fd11ba 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -81,19 +81,31 @@ */ #define MAX_CLK_PERF_LEVEL 32 -#define NUM_AHB_CLKS 2 +#define MAX_CLKS 2 /** * struct geni_wrapper - Data structure to represent the QUP Wrapper Core * @dev: Device pointer of the QUP wrapper core * @base: Base address of this instance of QUP wrapper core - * @ahb_clks: Handle to the primary & secondary AHB clocks + * @clks: Handle to the primary & optional secondary AHB clocks + * @num_clks: Count of clocks * @to_core: Core ICC path */ struct geni_wrapper { struct device *dev; void __iomem *base; - struct clk_bulk_data ahb_clks[NUM_AHB_CLKS]; + struct clk_bulk_data clks[MAX_CLKS]; + unsigned int num_clks; +}; + +/** + * struct geni_se_desc - Data structure to represent the QUP Wrapper resources + * @clks: Name of the primary & optional secondary AHB clocks + * @num_clks: Count of clock names + */ +struct geni_se_desc { + unsigned int num_clks; + const char * const *clks; }; static const char * const icc_path_names[] = {"qup-core", "qup-config", @@ -496,8 +508,7 @@ static void geni_se_clks_off(struct geni_se *se) struct geni_wrapper *wrapper = se->wrapper; clk_disable_unprepare(se->clk); - clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks); } /** @@ -528,15 +539,13 @@ static int geni_se_clks_on(struct geni_se *se) int ret; struct geni_wrapper *wrapper = se->wrapper; - ret = clk_bulk_prepare_enable(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + ret = clk_bulk_prepare_enable(wrapper->num_clks, wrapper->clks); if (ret) return ret; ret = clk_prepare_enable(se->clk); if (ret) - clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks); return ret; } @@ -887,11 +896,33 @@ static int geni_se_probe(struct platform_device *pdev) return PTR_ERR(wrapper->base); if (!has_acpi_companion(&pdev->dev)) { - wrapper->ahb_clks[0].id = "m-ahb"; - wrapper->ahb_clks[1].id = "s-ahb"; - ret = devm_clk_bulk_get(dev, NUM_AHB_CLKS, wrapper->ahb_clks); + const struct geni_se_desc *desc; + int i; + + desc = device_get_match_data(&pdev->dev); + if (!desc) + return -EINVAL; + + wrapper->num_clks = min_t(unsigned int, desc->num_clks, MAX_CLKS); + + for (i = 0; i < wrapper->num_clks; ++i) + wrapper->clks[i].id = desc->clks[i]; + + ret = of_count_phandle_with_args(dev->of_node, "clocks", "#clock-cells"); + if (ret < 0) { + dev_err(dev, "invalid clocks property at %pOF\n", dev->of_node); + return ret; + } + + if (ret < wrapper->num_clks) { + dev_err(dev, "invalid clocks count at %pOF, expected %d entries\n", + dev->of_node, wrapper->num_clks); + return -EINVAL; + } + + ret = devm_clk_bulk_get(dev, wrapper->num_clks, wrapper->clks); if (ret) { - dev_err(dev, "Err getting AHB clks %d\n", ret); + dev_err(dev, "Err getting clks %d\n", ret); return ret; } } @@ -901,8 +932,18 @@ static int geni_se_probe(struct platform_device *pdev) return devm_of_platform_populate(dev); } +static const char * const qup_clks[] = { + "m-ahb", + "s-ahb", +}; + +static const struct geni_se_desc qup_desc = { + .clks = qup_clks, + .num_clks = ARRAY_SIZE(qup_clks), +}; + static const struct of_device_id geni_se_dt_match[] = { - { .compatible = "qcom,geni-se-qup", }, + { .compatible = "qcom,geni-se-qup", .data = &qup_desc }, {} }; MODULE_DEVICE_TABLE(of, geni_se_dt_match); From f4aba01db4801cc02d1fae4c8099984a23740996 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:04 +0100 Subject: [PATCH 3333/4122] soc: qcom: geni-se: add support for I2C Master Hub wrapper variant The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Add the clock list for the I2C Master Hub variant to a new desc struct then pass it through the I2C Master Hub compatible match data. Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Wolfram Sang --- drivers/soc/qcom/qcom-geni-se.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index 9ddee9fd11ba..f0475b93ca73 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -942,8 +942,18 @@ static const struct geni_se_desc qup_desc = { .num_clks = ARRAY_SIZE(qup_clks), }; +static const char * const i2c_master_hub_clks[] = { + "s-ahb", +}; + +static const struct geni_se_desc i2c_master_hub_desc = { + .clks = i2c_master_hub_clks, + .num_clks = ARRAY_SIZE(i2c_master_hub_clks), +}; + static const struct of_device_id geni_se_dt_match[] = { { .compatible = "qcom,geni-se-qup", .data = &qup_desc }, + { .compatible = "qcom,geni-se-i2c-master-hub", .data = &i2c_master_hub_desc }, {} }; MODULE_DEVICE_TABLE(of, geni_se_dt_match); From 14d02fbadb5dc1cdf66078ef8430dd1cd22bfd53 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:05 +0100 Subject: [PATCH 3334/4122] i2c: qcom-geni: add desc struct to prepare support for I2C Master Hub variant The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Those I2C serial engines variants have some requirements: - a separate "core" clock - doesn't support DMA, thus no memory interconnect path - fixed FIFO size not discoverable in the HW_PARAM_0 register Add a desc struct specifying all those requirements which will be used in a next change when adding the I2C Master Hub serial engine compatible. Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-geni.c | 50 ++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..75dd0718c5a1 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -88,6 +88,7 @@ struct geni_i2c_dev { int cur_wr; int cur_rd; spinlock_t lock; + struct clk *core_clk; u32 clk_freq_out; const struct geni_i2c_clk_fld *clk_fld; int suspended; @@ -100,6 +101,13 @@ struct geni_i2c_dev { bool abort_done; }; +struct geni_i2c_desc { + bool has_core_clk; + char *icc_ddr; + bool no_dma_support; + unsigned int tx_fifo_depth; +}; + struct geni_i2c_err_log { int err; const char *msg; @@ -764,6 +772,7 @@ static int geni_i2c_probe(struct platform_device *pdev) u32 proto, tx_depth, fifo_disable; int ret; struct device *dev = &pdev->dev; + const struct geni_i2c_desc *desc = NULL; gi2c = devm_kzalloc(dev, sizeof(*gi2c), GFP_KERNEL); if (!gi2c) @@ -776,6 +785,14 @@ static int geni_i2c_probe(struct platform_device *pdev) if (IS_ERR(gi2c->se.base)) return PTR_ERR(gi2c->se.base); + desc = device_get_match_data(&pdev->dev); + + if (desc && desc->has_core_clk) { + gi2c->core_clk = devm_clk_get(dev, "core"); + if (IS_ERR(gi2c->core_clk)) + return PTR_ERR(gi2c->core_clk); + } + gi2c->se.clk = devm_clk_get(dev, "se"); if (IS_ERR(gi2c->se.clk) && !has_acpi_companion(dev)) return PTR_ERR(gi2c->se.clk); @@ -819,7 +836,7 @@ static int geni_i2c_probe(struct platform_device *pdev) gi2c->adap.dev.of_node = dev->of_node; strscpy(gi2c->adap.name, "Geni-I2C", sizeof(gi2c->adap.name)); - ret = geni_icc_get(&gi2c->se, "qup-memory"); + ret = geni_icc_get(&gi2c->se, desc ? desc->icc_ddr : "qup-memory"); if (ret) return ret; /* @@ -829,12 +846,17 @@ static int geni_i2c_probe(struct platform_device *pdev) */ gi2c->se.icc_paths[GENI_TO_CORE].avg_bw = GENI_DEFAULT_BW; gi2c->se.icc_paths[CPU_TO_GENI].avg_bw = GENI_DEFAULT_BW; - gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out); + if (!desc || desc->icc_ddr) + gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out); ret = geni_icc_set_bw(&gi2c->se); if (ret) return ret; + ret = clk_prepare_enable(gi2c->core_clk); + if (ret) + return ret; + ret = geni_se_resources_on(&gi2c->se); if (ret) { dev_err(dev, "Error turning on resources %d\n", ret); @@ -844,10 +866,15 @@ static int geni_i2c_probe(struct platform_device *pdev) if (proto != GENI_SE_I2C) { dev_err(dev, "Invalid proto %d\n", proto); geni_se_resources_off(&gi2c->se); + clk_disable_unprepare(gi2c->core_clk); return -ENXIO; } - fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE; + if (desc && desc->no_dma_support) + fifo_disable = false; + else + fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE; + if (fifo_disable) { /* FIFO is disabled, so we can only use GPI DMA */ gi2c->gpi_mode = true; @@ -859,6 +886,16 @@ static int geni_i2c_probe(struct platform_device *pdev) } else { gi2c->gpi_mode = false; tx_depth = geni_se_get_tx_fifo_depth(&gi2c->se); + + /* I2C Master Hub Serial Elements doesn't have the HW_PARAM_0 register */ + if (!tx_depth && desc) + tx_depth = desc->tx_fifo_depth; + + if (!tx_depth) { + dev_err(dev, "Invalid TX FIFO depth\n"); + return -EINVAL; + } + gi2c->tx_wm = tx_depth - 1; geni_se_init(&gi2c->se, gi2c->tx_wm, tx_depth); geni_se_config_packing(&gi2c->se, BITS_PER_BYTE, @@ -867,6 +904,7 @@ static int geni_i2c_probe(struct platform_device *pdev) dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth); } + clk_disable_unprepare(gi2c->core_clk); ret = geni_se_resources_off(&gi2c->se); if (ret) { dev_err(dev, "Error turning off resources %d\n", ret); @@ -932,6 +970,8 @@ static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev) gi2c->suspended = 1; } + clk_disable_unprepare(gi2c->core_clk); + return geni_icc_disable(&gi2c->se); } @@ -944,6 +984,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) if (ret) return ret; + ret = clk_prepare_enable(gi2c->core_clk); + if (ret) + return ret; + ret = geni_se_resources_on(&gi2c->se); if (ret) return ret; From cacd9643eca7a1f4635479aff4ec33aaade45e64 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 29 Nov 2022 15:47:06 +0100 Subject: [PATCH 3335/4122] i2c: qcom-geni: add support for I2C Master Hub variant The I2C Master Hub is a stripped down version of the GENI Serial Engine QUP Wrapper Controller but only supporting I2C serial engines without DMA support. Add the I2C Master Hub serial engine compatible along the specific requirements in a new desc struct passed through the device match data. Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-geni.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 75dd0718c5a1..bfe75038bc14 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -1026,8 +1026,16 @@ static const struct dev_pm_ops geni_i2c_pm_ops = { NULL) }; +const struct geni_i2c_desc i2c_master_hub = { + .has_core_clk = true, + .icc_ddr = NULL, + .no_dma_support = true, + .tx_fifo_depth = 16, +}; + static const struct of_device_id geni_i2c_dt_match[] = { { .compatible = "qcom,geni-i2c" }, + { .compatible = "qcom,geni-i2c-master-hub", .data = &i2c_master_hub }, {} }; MODULE_DEVICE_TABLE(of, geni_i2c_dt_match); From eaade84a6302f139aede74fe5a568a70adb9baa2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 5 Dec 2022 12:31:44 +0800 Subject: [PATCH 3336/4122] crypto: api - Use linux/cache.h instead of asm/cache.h Directly including asm/cache.h leads to build failures on powerpc so replace it with linux/cache.h instead. Fixes: e634ac4a8aaa ("crypto: api - Add crypto_tfm_ctx_dma") Reported-by: Stephen Rothwell Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 8722fd67f40a..61b327206b55 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -7,8 +7,8 @@ #ifndef _CRYPTO_ALGAPI_H #define _CRYPTO_ALGAPI_H -#include #include +#include #include #include #include From 73e9841ba7e3e394c276d5f24ef4e639bd5f24e1 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Wed, 30 Nov 2022 13:55:51 +0800 Subject: [PATCH 3337/4122] i2c: gpio: Fix potential unused warning for 'i2c_gpio_dt_ids' Dropping a matching #ifdef check along with dropping of_match_ptr() is just a cleanup, while dropping of_match_ptr() that has no corresponding #ifdef fixes an actual warning. Suggested-by: Andy Shevchenko Acked-by: Arnd Bergmann Signed-off-by: Binbin Zhou Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-gpio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index b1985c1667e1..0e4385a9bcf7 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -482,19 +482,17 @@ static int i2c_gpio_remove(struct platform_device *pdev) return 0; } -#if defined(CONFIG_OF) static const struct of_device_id i2c_gpio_dt_ids[] = { { .compatible = "i2c-gpio", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, i2c_gpio_dt_ids); -#endif static struct platform_driver i2c_gpio_driver = { .driver = { .name = "i2c-gpio", - .of_match_table = of_match_ptr(i2c_gpio_dt_ids), + .of_match_table = i2c_gpio_dt_ids, }, .probe = i2c_gpio_probe, .remove = i2c_gpio_remove, From 99c4ec2397bb88c3325676a308b93aa8ba362de8 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Tue, 17 Apr 2018 16:32:32 +0200 Subject: [PATCH 3338/4122] i2c: mux: pca9541: switch to using .probe_new Use the new probe style for i2c drivers. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca9541.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index ea83de78f52d..09d1d9e67e31 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -283,8 +283,7 @@ static int pca9541_release_chan(struct i2c_mux_core *muxc, u32 chan) /* * I2C init/probing/exit functions */ -static int pca9541_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pca9541_probe(struct i2c_client *client) { struct i2c_adapter *adap = client->adapter; struct i2c_mux_core *muxc; @@ -337,7 +336,7 @@ static struct i2c_driver pca9541_driver = { .name = "pca9541", .of_match_table = of_match_ptr(pca9541_of_match), }, - .probe = pca9541_probe, + .probe_new = pca9541_probe, .remove = pca9541_remove, .id_table = pca9541_id, }; From a00f6d3723f5617222ab8df228228c3c2c84e3ec Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Wed, 12 Oct 2022 18:36:47 +0200 Subject: [PATCH 3339/4122] drivers/i2c: use simple i2c probe All these drivers have an i2c probe function which doesn't use the "struct i2c_device_id *id" parameter, so they can trivially be converted to the "probe_new" style of probe with a single argument. This is part of an ongoing transition to single-argument i2c probe functions. Old-style probe functions involve a call to i2c_match_id: in drivers/i2c/i2c-core-base.c, /* * When there are no more users of probe(), * rename probe_new to probe. */ if (driver->probe_new) status = driver->probe_new(client); else if (driver->probe) status = driver->probe(client, i2c_match_id(driver->id_table, client)); else status = -EINVAL; Drivers which don't need the second parameter can be declared using probe_new instead, avoiding the call to i2c_match_id. Drivers which do can still be converted to probe_new-style, calling i2c_match_id themselves (as is done currently for of_match_id). This change was done using the following Coccinelle script, and fixed up for whitespace changes: @ rule1 @ identifier fn; identifier client, id; @@ - static int fn(struct i2c_client *client, const struct i2c_device_id *id) + static int fn(struct i2c_client *client) { ...when != id } @ rule2 depends on rule1 @ identifier rule1.fn; identifier driver; @@ struct i2c_driver driver = { - .probe + .probe_new = ( fn | - &fn + fn ) , }; Signed-off-by: Stephen Kitt Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 5 ++--- drivers/i2c/i2c-smbus.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9aa7b9d9a485..82478eab71af 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1017,15 +1017,14 @@ static const struct i2c_device_id dummy_id[] = { { }, }; -static int dummy_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dummy_probe(struct i2c_client *client) { return 0; } static struct i2c_driver dummy_driver = { .driver.name = "dummy", - .probe = dummy_probe, + .probe_new = dummy_probe, .id_table = dummy_id, }; diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index c85710ed9548..cd19546d31fc 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -112,8 +112,7 @@ static void smbalert_work(struct work_struct *work) } /* Setup SMBALERT# infrastructure */ -static int smbalert_probe(struct i2c_client *ara, - const struct i2c_device_id *id) +static int smbalert_probe(struct i2c_client *ara) { struct i2c_smbus_alert_setup *setup = dev_get_platdata(&ara->dev); struct i2c_smbus_alert *alert; @@ -170,7 +169,7 @@ static struct i2c_driver smbalert_driver = { .driver = { .name = "smbus_alert", }, - .probe = smbalert_probe, + .probe_new = smbalert_probe, .remove = smbalert_remove, .id_table = smbalert_ids, }; From a5eacd2e3790ecf1cbc0be2e53a6f3d1ce3bb719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:22 +0100 Subject: [PATCH 3340/4122] i2c: mux: pca954x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index a5f458b635df..3639e6d7304c 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -411,9 +411,9 @@ static int pca954x_init(struct i2c_client *client, struct pca954x *data) /* * I2C init/probing/exit functions */ -static int pca954x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pca954x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adap = client->adapter; struct device *dev = &client->dev; struct gpio_desc *gpio; @@ -554,7 +554,7 @@ static struct i2c_driver pca954x_driver = { .pm = &pca954x_pm, .of_match_table = pca954x_of_match, }, - .probe = pca954x_probe, + .probe_new = pca954x_probe, .remove = pca954x_remove, .id_table = pca954x_id, }; From 87ab726952267caabb9c1404e007e16e42f03b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:36:19 +0100 Subject: [PATCH 3341/4122] i2c: slave-eeprom: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-slave-eeprom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 4abc2d919881..5f25f23c4ff8 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -140,8 +140,9 @@ static int i2c_slave_init_eeprom_data(struct eeprom_data *eeprom, struct i2c_cli return 0; } -static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int i2c_slave_eeprom_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct eeprom_data *eeprom; int ret; unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data) + 1; @@ -206,7 +207,7 @@ static struct i2c_driver i2c_slave_eeprom_driver = { .driver = { .name = "i2c-slave-eeprom", }, - .probe = i2c_slave_eeprom_probe, + .probe_new = i2c_slave_eeprom_probe, .remove = i2c_slave_eeprom_remove, .id_table = i2c_slave_eeprom_id, }; From d78a167332e1ca8113268ed922c1212fd71b73ad Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 14 Nov 2022 17:25:40 +0800 Subject: [PATCH 3342/4122] i2c: pxa-pci: fix missing pci_disable_device() on error in ce4100_i2c_probe Using pcim_enable_device() to avoid missing pci_disable_device(). Fixes: 7e94dd154e93 ("i2c-pxa2xx: Add PCI support for PXA I2C controller") Signed-off-by: Hui Tang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pxa-pci.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c index f614cade432b..30e38bc8b6db 100644 --- a/drivers/i2c/busses/i2c-pxa-pci.c +++ b/drivers/i2c/busses/i2c-pxa-pci.c @@ -105,7 +105,7 @@ static int ce4100_i2c_probe(struct pci_dev *dev, int i; struct ce4100_devices *sds; - ret = pci_enable_device_mem(dev); + ret = pcim_enable_device(dev); if (ret) return ret; @@ -114,10 +114,8 @@ static int ce4100_i2c_probe(struct pci_dev *dev, return -EINVAL; } sds = kzalloc(sizeof(*sds), GFP_KERNEL); - if (!sds) { - ret = -ENOMEM; - goto err_mem; - } + if (!sds) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) { sds->pdev[i] = add_i2c_device(dev, i); @@ -133,8 +131,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev, err_dev_add: kfree(sds); -err_mem: - pci_disable_device(dev); return ret; } From 8f4ab7da904ab7027ccd43ddb4f0094e932a5877 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 5 Dec 2022 12:44:27 +0400 Subject: [PATCH 3343/4122] selftests/powerpc: Fix resource leaks In check_all_cpu_dscr_defaults, opendir() opens the directory stream. Add missing closedir() in the error path to release it. In check_cpu_dscr_default, open() creates an open file descriptor. Add missing close() in the error path to release it. Fixes: ebd5858c904b ("selftests/powerpc: Add test for all DSCR sysfs interfaces") Signed-off-by: Miaoqian Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221205084429.570654-1-linmq006@gmail.com --- tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index fbbdffdb2e5d..f20d1c166d1e 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val) rc = read(fd, buf, sizeof(buf)); if (rc == -1) { perror("read() failed"); + close(fd); return 1; } close(fd); @@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val) if (access(file, F_OK)) continue; - if (check_cpu_dscr_default(file, val)) + if (check_cpu_dscr_default(file, val)) { + closedir(sysfs); return 1; + } } closedir(sysfs); return 0; From dcb40e9fcce9bd251eaff19f3724131db522846c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:53 +0800 Subject: [PATCH 3344/4122] iommu/mediatek: Add platform_device_put for recovering the device refcnt Add platform_device_put to match with of_find_device_by_node. Meanwhile, I add a new variable "pcommdev" which is for smi common device. Otherwise, "platform_device_put(plarbdev)" for smi-common dev may be not readable. And add a checking for whether pcommdev is NULL. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-2-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c80f33dd2d43..cce948eead6a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1055,7 +1055,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m struct mtk_iommu_data *data) { struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; - struct platform_device *plarbdev; + struct platform_device *plarbdev, *pcommdev; struct device_link *link; int i, larb_nr, ret; @@ -1086,12 +1086,14 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m } if (!plarbdev->dev.driver) { of_node_put(larbnode); + platform_device_put(plarbdev); return -EPROBE_DEFER; } data->larb_imu[id].dev = &plarbdev->dev; component_match_add_release(dev, match, component_release_of, component_compare_of, larbnode); + platform_device_put(plarbdev); } /* Get smi-(sub)-common dev from the last larb. */ @@ -1109,12 +1111,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m else smicomm_node = smi_subcomm_node; - plarbdev = of_find_device_by_node(smicomm_node); + pcommdev = of_find_device_by_node(smicomm_node); of_node_put(smicomm_node); - data->smicomm_dev = &plarbdev->dev; + if (!pcommdev) + return -ENODEV; + data->smicomm_dev = &pcommdev->dev; link = device_link_add(data->smicomm_dev, dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + platform_device_put(pcommdev); if (!link) { dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); return -EINVAL; From b5765a1b44bea9dfcae69c53ffeb4c689d0922a7 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:54 +0800 Subject: [PATCH 3345/4122] iommu/mediatek: Use component_match_add In order to simplify the error patch(avoid call of_node_put), Use component_match_add instead component_match_add_release since we are only interested in the "device" here. Then we could always call of_node_put in normal path. Strictly this is not a fixes patch, but it is a prepare for adding the error path, thus I add a Fixes tag too. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Suggested-by: Robin Murphy Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-3-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index cce948eead6a..df490236e1fb 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1080,19 +1080,17 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m id = i; plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) { - of_node_put(larbnode); + of_node_put(larbnode); + if (!plarbdev) return -ENODEV; - } + if (!plarbdev->dev.driver) { - of_node_put(larbnode); platform_device_put(plarbdev); return -EPROBE_DEFER; } data->larb_imu[id].dev = &plarbdev->dev; - component_match_add_release(dev, match, component_release_of, - component_compare_of, larbnode); + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); } From 26593928564cf5b576ff05d3cbd958f57c9534bb Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:55 +0800 Subject: [PATCH 3346/4122] iommu/mediatek: Add error path for loop of mm_dts_parse The mtk_iommu_mm_dts_parse will parse the smi larbs nodes. if the i+1 larb is parsed fail, we should put_device for the i..0 larbs. There are two places need to comment: 1) The larbid may be not linear mapping, we should loop whole the array in the error path. 2) I move this line position: "data->larb_imu[id].dev = &plarbdev->dev;" before "if (!plarbdev->dev.driver)", That means set data->larb_imu[id].dev before the error path. then we don't need "platform_device_put(plarbdev)" again in probe_defer case. All depend on "put_device" of the error path in error cases. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-4-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index df490236e1fb..e0c669b75271 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1067,8 +1067,10 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); - if (!larbnode) - return -EINVAL; + if (!larbnode) { + ret = -EINVAL; + goto err_larbdev_put; + } if (!of_device_is_available(larbnode)) { of_node_put(larbnode); @@ -1081,15 +1083,17 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m plarbdev = of_find_device_by_node(larbnode); of_node_put(larbnode); - if (!plarbdev) - return -ENODEV; - - if (!plarbdev->dev.driver) { - platform_device_put(plarbdev); - return -EPROBE_DEFER; + if (!plarbdev) { + ret = -ENODEV; + goto err_larbdev_put; } data->larb_imu[id].dev = &plarbdev->dev; + if (!plarbdev->dev.driver) { + ret = -EPROBE_DEFER; + goto err_larbdev_put; + } + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); } @@ -1123,6 +1127,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return -EINVAL; } return 0; + +err_larbdev_put: + /* id may be not linear mapping, loop whole the array */ + for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) { + if (!data->larb_imu[i].dev) + continue; + put_device(data->larb_imu[i].dev); + } + return ret; } static int mtk_iommu_probe(struct platform_device *pdev) From ef693a8440926884bfd9cc3d6d36f65719513350 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 18 Oct 2022 10:42:56 +0800 Subject: [PATCH 3347/4122] iommu/mediatek: Validate number of phandles associated with "mediatek,larbs" Fix the smatch warnings: drivers/iommu/mtk_iommu.c:878 mtk_iommu_mm_dts_parse() error: uninitialized symbol 'larbnode'. If someone abuse the dtsi node(Don't follow the definition of dt-binding), for example "mediatek,larbs" is provided as boolean property, "larb_nr" will be zero and cause abnormal. To fix this problem and improve the code safety, add some checking for the invalid input from dtsi, e.g. checking the larb_nr/larbid valid range, and avoid "mediatek,larb-id" property conflicts in the smi-larb nodes. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Guenter Roeck Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-5-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e0c669b75271..41e96da6160f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1062,6 +1062,8 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m larb_nr = of_count_phandle_with_args(dev->of_node, "mediatek,larbs", NULL); if (larb_nr < 0) return larb_nr; + if (larb_nr == 0 || larb_nr > MTK_LARB_NR_MAX) + return -EINVAL; for (i = 0; i < larb_nr; i++) { u32 id; @@ -1080,6 +1082,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); if (ret)/* The id is consecutive if there is no this property */ id = i; + if (id >= MTK_LARB_NR_MAX) { + of_node_put(larbnode); + ret = -EINVAL; + goto err_larbdev_put; + } plarbdev = of_find_device_by_node(larbnode); of_node_put(larbnode); @@ -1087,6 +1094,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m ret = -ENODEV; goto err_larbdev_put; } + if (data->larb_imu[id].dev) { + platform_device_put(plarbdev); + ret = -EEXIST; + goto err_larbdev_put; + } data->larb_imu[id].dev = &plarbdev->dev; if (!plarbdev->dev.driver) { From 6cde583d5352818a51985b32a960cdde85ab3821 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:57 +0800 Subject: [PATCH 3348/4122] iommu/mediatek: Improve safety for mediatek,smi property in larb nodes No functional change. Just improve safety from dts. All the larbs that connect to one IOMMU must connect with the same smi-common. This patch checks all the mediatek,smi property for each larb, If their mediatek,smi are different, it will return fails. Also avoid there is no available smi-larb nodes. Suggested-by: Guenter Roeck Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-6-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 41e96da6160f..4a6ee25a6f99 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1054,7 +1054,7 @@ static const struct component_master_ops mtk_iommu_com_ops = { static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match, struct mtk_iommu_data *data) { - struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; + struct device_node *larbnode, *frst_avail_smicomm_node = NULL; struct platform_device *plarbdev, *pcommdev; struct device_link *link; int i, larb_nr, ret; @@ -1066,6 +1066,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return -EINVAL; for (i = 0; i < larb_nr; i++) { + struct device_node *smicomm_node, *smi_subcomm_node; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); @@ -1106,27 +1107,47 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m goto err_larbdev_put; } + /* Get smi-(sub)-common dev from the last larb. */ + smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smi_subcomm_node) { + ret = -EINVAL; + goto err_larbdev_put; + } + + /* + * It may have two level smi-common. the node is smi-sub-common if it + * has a new mediatek,smi property. otherwise it is smi-commmon. + */ + smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); + if (smicomm_node) + of_node_put(smi_subcomm_node); + else + smicomm_node = smi_subcomm_node; + + /* + * All the larbs that connect to one IOMMU must connect with the same + * smi-common. + */ + if (!frst_avail_smicomm_node) { + frst_avail_smicomm_node = smicomm_node; + } else if (frst_avail_smicomm_node != smicomm_node) { + dev_err(dev, "mediatek,smi property is not right @larb%d.", id); + of_node_put(smicomm_node); + ret = -EINVAL; + goto err_larbdev_put; + } else { + of_node_put(smicomm_node); + } + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); } - /* Get smi-(sub)-common dev from the last larb. */ - smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); - if (!smi_subcomm_node) + if (!frst_avail_smicomm_node) return -EINVAL; - /* - * It may have two level smi-common. the node is smi-sub-common if it - * has a new mediatek,smi property. otherwise it is smi-commmon. - */ - smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); - if (smicomm_node) - of_node_put(smi_subcomm_node); - else - smicomm_node = smi_subcomm_node; - - pcommdev = of_find_device_by_node(smicomm_node); - of_node_put(smicomm_node); + pcommdev = of_find_device_by_node(frst_avail_smicomm_node); + of_node_put(frst_avail_smicomm_node); if (!pcommdev) return -ENODEV; data->smicomm_dev = &pcommdev->dev; From 9ff894edd542618dad2fef538f8272c620a501db Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:58 +0800 Subject: [PATCH 3349/4122] iommu/mediatek: Remove unused "mapping" member from mtk_iommu_data Just remove a unused variable that only is for mtk_iommu_v1. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-7-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 4a6ee25a6f99..9c1ce8b46a17 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -229,10 +229,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; - - struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ struct regmap *pericfg; - struct mutex mutex; /* Protect m4u_group/m4u_dom above */ /* From 88699c024f9227b79af90adc929625e4b7867932 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Tue, 22 Nov 2022 14:18:25 -0800 Subject: [PATCH 3350/4122] iommu/amd: Fix typo in macro parameter name IVRS_GET_SBDF_ID is only called with fn as the fourth parameter, so this had no effect, but fixing the name will avoid bugs if that ever changes. Signed-off-by: Michael Forney Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/381fbc430c0ccdd78b3b696cfc0c32b233526ca5.1669159392.git.mforney@mforney.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 34029d116107..467b194975b3 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -85,7 +85,7 @@ #define LOOP_TIMEOUT 2000000 -#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ +#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) /* From ef5bb8e7a7127218f826b9ccdf7508e7a339f4c2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 10:06:28 -0400 Subject: [PATCH 3351/4122] iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY This driver treats IOMMU_DOMAIN_IDENTITY the same as UNMANAGED, which cannot possibly be correct. UNMANAGED domains are required to start out blocking all DMAs. This seems to be what this driver does as it allocates a first level 'dt' for the IO page table that is 0 filled. Thus UNMANAGED looks like a working IO page table, and so IDENTITY must be a mistake. Remove it. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jason Gunthorpe Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/0-v1-97f0adf27b5e+1f0-s50_identity_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 5cb2d44dfb92..5b585eace3d4 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -672,7 +672,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) struct sun50i_iommu_domain *sun50i_domain; if (type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_UNMANAGED) return NULL; From 00ef8885a945c37551547d8ac8361cacd20c4e42 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 28 Nov 2022 23:16:48 +0100 Subject: [PATCH 3352/4122] iommu/mediatek: Fix crash on isr after kexec() If the system is rebooted via isr(), the IRQ handler might be triggered before the domain is initialized. Resulting on an invalid memory access error. Fix: [ 0.500930] Unable to handle kernel read from unreadable memory at virtual address 0000000000000070 [ 0.501166] Call trace: [ 0.501174] report_iommu_fault+0x28/0xfc [ 0.501180] mtk_iommu_isr+0x10c/0x1c0 Signed-off-by: Ricardo Ribalda Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20221125-mtk-iommu-v2-0-e168dff7d43e@chromium.org [ joro: Fixed spelling in commit message ] Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 9c1ce8b46a17..392b8c167c44 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -462,7 +462,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; } - if (report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, + if (!dom || report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( bank->parent_dev, From 58ff6569bc6ec369482eb2d132868870380be64c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Dec 2022 12:05:51 +0000 Subject: [PATCH 3353/4122] KVM: arm64: PMU: Fix period computation for 64bit counters with 32bit overflow Fix the bogus masking when computing the period of a 64bit counter with 32bit overflow. It really should be treated like a 32bit counter for the purpose of the period. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/Y4jbosgHbUDI0WF4@google.com --- arch/arm64/kvm/pmu-emul.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d8ea39943086..24908400e190 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmc_is_64bit(pmc)) { - if (!kvm_pmc_has_64bit_overflow(pmc)) - val = -(counter & GENMASK(31, 0)); - else - val = (-counter) & GENMASK(63, 0); - } else { + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) + val = (-counter) & GENMASK(63, 0); + else val = (-counter) & GENMASK(31, 0); - } return val; } From bcc5e2dcf09089b337b76fc1a589f6ff95ca19ac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Nov 2022 09:43:14 +0300 Subject: [PATCH 3354/4122] staging: rtl8192u: Fix use after free in ieee80211_rx() We cannot dereference the "skb" pointer after calling ieee80211_monitor_rx(), because it is a use after free. Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y33BArx3k/aw6yv/@kili Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c index f142d0986990..5c73e3f8541a 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c @@ -951,9 +951,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #endif if (ieee->iw_mode == IW_MODE_MONITOR) { + unsigned int len = skb->len; + ieee80211_monitor_rx(ieee, skb, rx_stats); stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += len; return 1; } From 24a525a671ef1e410926da010ef2a3a1b1b96481 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Tue, 22 Nov 2022 23:45:57 +0100 Subject: [PATCH 3355/4122] staging: rtl8192e: Remove unchanged variable AcmMethod AcmMethod is initialized and never changed. The evaluation will always have the same result. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/7d73a66184e13d5f8d4af7d21564032247a7e923.1669156825.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 -- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 2 -- drivers/staging/rtl8192e/rtl819x_Qos.h | 7 ------- 4 files changed, 12 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index b9c846015d28..80eba5d5ab37 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -224,8 +224,6 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val) u8 acm = pAciAifsn->f.acm; u8 AcmCtrl = rtl92e_readb(dev, AcmHwCtrl); - AcmCtrl = AcmCtrl | ((priv->AcmMethod == 2) ? 0x0 : 0x1); - if (acm) { switch (eACI) { case AC0_BE: diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 399ee9783f99..a8cbeb9545da 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -830,7 +830,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) struct r8192_priv *priv = rtllib_priv(dev); u8 i; - priv->AcmMethod = eAcmWay2_SW; priv->dot11_current_preamble_mode = PREAMBLE_AUTO; priv->rtllib->status = 0; priv->polling_timer_on = 0; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index b1656d4ecbad..54c7a2128053 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -340,8 +340,6 @@ struct r8192_priv { struct bb_reg_definition PHYRegDef[4]; struct rate_adaptive rate_adaptive; - enum acm_method AcmMethod; - struct rt_firmware *pFirmware; enum rtl819x_loopback LoopbackMode; diff --git a/drivers/staging/rtl8192e/rtl819x_Qos.h b/drivers/staging/rtl8192e/rtl819x_Qos.h index 5073f9f40fdc..c010eb0d6036 100644 --- a/drivers/staging/rtl8192e/rtl819x_Qos.h +++ b/drivers/staging/rtl8192e/rtl819x_Qos.h @@ -97,13 +97,6 @@ enum direction_value { DIR_BI_DIR = 3, }; -enum acm_method { - eAcmWay0_SwAndHw = 0, - eAcmWay1_HW = 1, - eAcmWay2_SW = 2, -}; - - struct acm { u64 UsedTime; u64 MediumTime; From d694a05fda8797521d51ceba2e7942751391626a Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Tue, 22 Nov 2022 23:46:08 +0100 Subject: [PATCH 3356/4122] staging: rtl8192e: Remove unused variable skb_aggQ skb_aggQ is initialized, never used and purged. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/3197be3cb412eea1c662a5bec1b1afda2cee675d.1669156825.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 -- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 2 -- drivers/staging/rtl8192e/rtllib.h | 1 - 3 files changed, 5 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 80eba5d5ab37..f02e67f68e23 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1946,8 +1946,6 @@ void rtl92e_stop_adapter(struct net_device *dev, bool reset) for (i = 0; i < MAX_QUEUE_SIZE; i++) skb_queue_purge(&priv->rtllib->skb_waitQ[i]); - for (i = 0; i < MAX_QUEUE_SIZE; i++) - skb_queue_purge(&priv->rtllib->skb_aggQ[i]); skb_queue_purge(&priv->skb_queue); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index a8cbeb9545da..369a59a753fe 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -903,8 +903,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) for (i = 0; i < MAX_QUEUE_SIZE; i++) skb_queue_head_init(&priv->rtllib->skb_waitQ[i]); - for (i = 0; i < MAX_QUEUE_SIZE; i++) - skb_queue_head_init(&priv->rtllib->skb_aggQ[i]); } static void _rtl92e_init_priv_lock(struct r8192_priv *priv) diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 7119c9c5e1fe..1152fbf43383 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1624,7 +1624,6 @@ struct rtllib_device { int mgmt_queue_tail; u8 AsocRetryCount; struct sk_buff_head skb_waitQ[MAX_QUEUE_SIZE]; - struct sk_buff_head skb_aggQ[MAX_QUEUE_SIZE]; bool bdynamic_txpower_enable; From efc6f7ce3adf3331298a4bfe2fa9d3912e660261 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Tue, 22 Nov 2022 23:46:15 +0100 Subject: [PATCH 3357/4122] staging: rtl8192e: Remove unused variable initialized_at_probe initialized_at_probe is initialized and never used. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/4855b2dd5b7296b0eb10e697f605fb820e1dfc7b.1669156825.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 - drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 369a59a753fe..7b78bdfadc8d 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -836,7 +836,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->up_first_time = 1; priv->blinked_ingpio = false; priv->being_init_adapter = false; - priv->initialized_at_probe = false; priv->bdisable_nic = false; priv->bfirst_init = false; priv->txringcount = 64; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 54c7a2128053..1c00ad709a3c 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -309,7 +309,6 @@ struct r8192_priv { bool bfirst_init; bool bfirst_after_down; - bool initialized_at_probe; bool being_init_adapter; int irq; From aae3567db0e9e4be26c8c51c441d8bc43d613116 Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Tue, 22 Nov 2022 23:46:21 +0100 Subject: [PATCH 3358/4122] staging: rtl8192e: Remove unused variable ChannelAccessSetting ChannelAccessSetting is never used. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/b88a31954532f47a4caf9abfcad8e20b32a618cc.1669156825.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index 1c00ad709a3c..ca2e685148d4 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -234,15 +234,6 @@ struct rt_stats { u32 CurrentShowTxate; }; -struct channel_access_setting { - u16 SIFS_Timer; - u16 DIFS_Timer; - u16 SlotTimeTimer; - u16 EIFS_Timer; - u16 CWminIndex; - u16 CWmaxIndex; -}; - struct init_gain { u8 xaagccore1; u8 xbagccore1; @@ -321,9 +312,6 @@ struct r8192_priv { struct delayed_work txpower_tracking_wq; struct delayed_work rfpath_check_wq; struct delayed_work gpio_change_rf_wq; - - struct channel_access_setting ChannelAccessSetting; - struct rtl819x_ops *ops; struct rtllib_device *rtllib; From e0005909e5e98f230397064de391371ba6a66fba Mon Sep 17 00:00:00 2001 From: Philipp Hortmann Date: Tue, 22 Nov 2022 23:46:29 +0100 Subject: [PATCH 3359/4122] staging: rtl8192e: Remove unused variable int_log int_log is initialized and incremented but never evaluated. Remove resulting dead code. Signed-off-by: Philipp Hortmann Link: https://lore.kernel.org/r/e2edcde052cc3c47e6e6b94d09e460b8cf6a49a8.1669156825.git.philipp.g.hortmann@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8190P_def.h | 15 --------------- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 3 --- drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 2 -- 3 files changed, 20 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h b/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h index 53fd79a28189..ac192254a4bb 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h +++ b/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h @@ -154,21 +154,6 @@ struct tx_fwinfo_8190pci { }; -struct log_int_8190 { - u32 nIMR_COMDOK; - u32 nIMR_MGNTDOK; - u32 nIMR_HIGH; - u32 nIMR_VODOK; - u32 nIMR_VIDOK; - u32 nIMR_BEDOK; - u32 nIMR_BKDOK; - u32 nIMR_ROK; - u32 nIMR_RCOK; - u32 nIMR_TBDOK; - u32 nIMR_BDOK; - u32 nIMR_RXFOVW; -}; - struct phy_ofdm_rx_status_rxsc_sgien_exintfflag { u8 reserved:4; u8 rxsc:2; diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 7b78bdfadc8d..f8fbe78ccad9 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -866,8 +866,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev) priv->rst_progress = RESET_TYPE_NORESET; priv->force_reset = false; memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32); - - memset(&priv->int_log, 0, sizeof(struct log_int_8190)); priv->rx_ctr = 0; priv->rtllib->wx_set_enc = 0; priv->hw_radio_off = false; @@ -2185,7 +2183,6 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev) if (inta & IMR_ROK) { priv->stats.rxint++; - priv->int_log.nIMR_ROK++; tasklet_schedule(&priv->irq_rx_tasklet); } diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h index ca2e685148d4..cceb77492363 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h @@ -317,8 +317,6 @@ struct r8192_priv { struct work_struct reset_wq; - struct log_int_8190 int_log; - enum rt_customer_id CustomerID; From d30f4436f364b4ad915ca2c09be07cd0f93ceb44 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 Nov 2022 16:12:53 +0800 Subject: [PATCH 3360/4122] staging: rtl8192e: Fix potential use-after-free in rtllib_rx_Monitor() The skb is delivered to netif_rx() in rtllib_monitor_rx(), which may free it, after calling this, dereferencing skb may trigger use-after-free. Found by Smatch. Fixes: 94a799425eee ("From: wlanfae [PATCH 1/8] rtl8192e: Import new version of driver from realtek") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20221123081253.22296-1-yuehaibing@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index 6a0f5bbb99ef..f8965afab767 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -1489,9 +1489,9 @@ static int rtllib_rx_Monitor(struct rtllib_device *ieee, struct sk_buff *skb, hdrlen += 4; } - rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen); ieee->stats.rx_packets++; ieee->stats.rx_bytes += skb->len; + rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen); return 1; } From d43ea3d4d2efd015ae7999168a58796671df4138 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 16:42:49 +0100 Subject: [PATCH 3361/4122] staging: r8188eu: drop return value from issue_probereq_ex The only caller of issue_probereq_ex does not check the return value. We can remove it and make issue_probereq_ex a void function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126154253.178275-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 11 ++--------- drivers/staging/r8188eu/include/rtw_mlme_ext.h | 4 ++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 161cb67f7882..2f3f7da08d0c 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -4482,8 +4482,8 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps _issue_probereq(padapter, pssid, da, false); } -int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da, - int try_cnt, int wait_ms) +void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da, + int try_cnt, int wait_ms) { int ret; int i = 0; @@ -4497,13 +4497,6 @@ int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, msleep(wait_ms); } while ((i < try_cnt) && ((ret == _FAIL) || (wait_ms == 0))); - - if (ret != _FAIL) { - ret = _SUCCESS; - goto exit; - } -exit: - return ret; } /* if psta == NULL, indicate we are station (client) now... */ diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index c46fc1a53085..720610bc8fef 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -512,8 +512,8 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short status); void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da); -s32 issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid, - u8 *da, int try_cnt, int wait_ms); +void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid, + u8 *da, int try_cnt, int wait_ms); int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms); int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, From e0e14bbee1811209435d3ba29008769bd7a45057 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 16:42:50 +0100 Subject: [PATCH 3362/4122] staging: r8188eu: remove wait_ms parameter Remove the wait_ms parameter from function issue_probereq_ex. There's only one caller, who requests a waiting time of 1 ms. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126154253.178275-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 12 ++++++------ drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 2f3f7da08d0c..990336244030 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -4483,20 +4483,20 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps } void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da, - int try_cnt, int wait_ms) + int try_cnt) { int ret; int i = 0; do { - ret = _issue_probereq(padapter, pssid, da, wait_ms > 0); + ret = _issue_probereq(padapter, pssid, da, true); i++; - if (i < try_cnt && wait_ms > 0 && ret == _FAIL) - msleep(wait_ms); + if (i < try_cnt && ret == _FAIL) + msleep(1); - } while ((i < try_cnt) && ((ret == _FAIL) || (wait_ms == 0))); + } while ((i < try_cnt) && (ret == _FAIL)); } /* if psta == NULL, indicate we are station (client) now... */ @@ -7046,7 +7046,7 @@ void linked_status_chk(struct adapter *padapter) } if (rx_chk != _SUCCESS) - issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3, 1); + issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3); if ((tx_chk != _SUCCESS && pmlmeinfo->link_count++ == 0xf) || rx_chk != _SUCCESS) { tx_chk = issue_nulldata(padapter, psta->hwaddr, 0, 3, 1); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 720610bc8fef..04fd673d3a04 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -513,7 +513,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da); void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid, - u8 *da, int try_cnt, int wait_ms); + u8 *da, int try_cnt); int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms); int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, From 77833c305dc897c094e5c5419c27568509346609 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 16:42:51 +0100 Subject: [PATCH 3363/4122] staging: r8188eu: fix the number of probereq retries issue_probereq_ex sends a probe request and retries if this fails. There's no point in making the number of retries configurable. Hard-code the value that's used by issue_probereq_ex's only caller. Simplify the code to check the loop condition only once. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126154253.178275-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 21 +++++++------------ .../staging/r8188eu/include/rtw_mlme_ext.h | 3 +-- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 990336244030..667f54e313ef 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -4482,21 +4482,16 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps _issue_probereq(padapter, pssid, da, false); } -void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da, - int try_cnt) +void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da) { - int ret; - int i = 0; + int i; - do { - ret = _issue_probereq(padapter, pssid, da, true); - - i++; - - if (i < try_cnt && ret == _FAIL) + for (i = 0; i < 3; i++) { + if (_issue_probereq(padapter, pssid, da, true) == _FAIL) msleep(1); - - } while ((i < try_cnt) && (ret == _FAIL)); + else + break; + } } /* if psta == NULL, indicate we are station (client) now... */ @@ -7046,7 +7041,7 @@ void linked_status_chk(struct adapter *padapter) } if (rx_chk != _SUCCESS) - issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3); + issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr); if ((tx_chk != _SUCCESS && pmlmeinfo->link_count++ == 0xf) || rx_chk != _SUCCESS) { tx_chk = issue_nulldata(padapter, psta->hwaddr, 0, 3, 1); diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 04fd673d3a04..089bd5446773 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -512,8 +512,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short status); void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da); -void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid, - u8 *da, int try_cnt); +void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da); int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms); int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, From 92f1bb5ff104d7f36754f27c17483d4e8ffe85ae Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 16:42:52 +0100 Subject: [PATCH 3364/4122] staging: r8188eu: simplify the checks for zero address In function ap2sta_data_frame, we can use is_zero_ether_addr to check for all-zero ethernet addresses. Both pattrib->bssid and mybssid are 16-bit aligned. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126154253.178275-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_recv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index 5b0a66aebff1..631c500dda42 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -779,9 +779,8 @@ static int ap2sta_data_frame( } /* check BSSID */ - if (!memcmp(pattrib->bssid, "\x0\x0\x0\x0\x0\x0", ETH_ALEN) || - !memcmp(mybssid, "\x0\x0\x0\x0\x0\x0", ETH_ALEN) || - (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) { + if (is_zero_ether_addr(pattrib->bssid) || is_zero_ether_addr(mybssid) || + (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) { if (!bmcast) issue_deauth(adapter, pattrib->bssid, WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA); From 57a8f00f03f1fa493fadd20992e700d24f6860cb Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 16:42:53 +0100 Subject: [PATCH 3365/4122] staging: r8188eu: use ieee80211 helper for protected bit Use ieee80211_has_protected to check if the "protected" bit is set. Remove the r8188eu driver's internal macro for this check. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126154253.178275-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- drivers/staging/r8188eu/include/wifi.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 667f54e313ef..93696892ec7d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -809,7 +809,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE)) return; - offset = (GetPrivacy(pframe)) ? 4 : 0; + offset = ieee80211_has_protected(hdr->frame_control) ? 4 : 0; seq = le16_to_cpu(*(__le16 *)((size_t)pframe + WLAN_HDR_A3_LEN + offset + 2)); status = le16_to_cpu(*(__le16 *)((size_t)pframe + WLAN_HDR_A3_LEN + offset + 4)); diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h index 2381c519ceaf..254a4bc1a141 100644 --- a/drivers/staging/r8188eu/include/wifi.h +++ b/drivers/staging/r8188eu/include/wifi.h @@ -170,9 +170,6 @@ enum WIFI_REG_DOMAIN { #define SetPrivacy(pbuf) \ *(__le16 *)(pbuf) |= cpu_to_le16(_PRIVACY_) -#define GetPrivacy(pbuf) \ - (((*(__le16 *)(pbuf)) & cpu_to_le16(_PRIVACY_)) != 0) - #define GetFrameType(pbuf) \ (le16_to_cpu(*(__le16 *)(pbuf)) & (BIT(3) | BIT(2))) From d664761116ea12a2417b268af2fd936fe571f612 Mon Sep 17 00:00:00 2001 From: Jack Schofield Date: Wed, 30 Nov 2022 21:38:50 -0800 Subject: [PATCH 3366/4122] staging: greybus: loopback_test: Add blank line after declaration. Declarations are to be followed by a blank line. Add to follow Linux Kernel coding style. Issue reported by checkpatch. Reviewed-by: Johan Hovold Signed-off-by: Jack Schofield Link: https://lore.kernel.org/r/048967b5c3b157e19ccf4608f71da8418fc31877.1669872193.git.schofija@oregonstate.edu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index 4c42e393cd3d..242138c4f6cf 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -675,6 +675,7 @@ err: static int close_poll_files(struct loopback_test *t) { int i; + for (i = 0; i < t->poll_count; i++) close(t->fds[i].fd); From 2206c10685ab92a74ead37c30a7f817125ccd723 Mon Sep 17 00:00:00 2001 From: Jack Schofield Date: Wed, 30 Nov 2022 21:38:51 -0800 Subject: [PATCH 3367/4122] staging: greybus: loopback_test: Remove void function return statement Void function return statement is not useful in this case. Issue reported by checkpatch. Reviewed-by: Johan Hovold Signed-off-by: Jack Schofield Link: https://lore.kernel.org/r/5d465d56a6e3de2cb9d2f34377cbb82678cc67b1.1669872193.git.schofija@oregonstate.edu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index 242138c4f6cf..7c1697304ab4 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -862,7 +862,6 @@ void loopback_run(struct loopback_test *t) err: printf("Error running test\n"); - return; } static int sanity_check(struct loopback_test *t) From 2cb3ecf1c5fa4850073d48fb4a759fc650f08736 Mon Sep 17 00:00:00 2001 From: Jack Schofield Date: Wed, 30 Nov 2022 21:38:52 -0800 Subject: [PATCH 3368/4122] staging: greybus: loopback_test: Remove extra blank lines Clean up extra uses of blank lines that do not follow the kernel coding style. Issue reported by checkpatch. Reviewed-by: Johan Hovold Signed-off-by: Jack Schofield Link: https://lore.kernel.org/r/51edbcc7583d64d3fe5a203d3a8649a695ef4a75.1669872193.git.schofija@oregonstate.edu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index 7c1697304ab4..d7ad51ff60c5 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -239,7 +239,6 @@ static void show_loopback_devices(struct loopback_test *t) for (i = 0; i < t->device_count; i++) printf("device[%d] = %s\n", i, t->devices[i].name); - } int open_sysfs(const char *sys_pfx, const char *node, int flags) @@ -274,7 +273,6 @@ float read_sysfs_float_fd(int fd, const char *sys_pfx, const char *node) char buf[SYSFS_MAX_INT]; if (read(fd, buf, sizeof(buf)) < 0) { - fprintf(stderr, "unable to read from %s%s %s\n", sys_pfx, node, strerror(errno)); close(fd); @@ -367,7 +365,6 @@ static int get_results(struct loopback_test *t) r->apbridge_unipro_latency_max - r->apbridge_unipro_latency_min; r->gbphy_firmware_latency_jitter = r->gbphy_firmware_latency_max - r->gbphy_firmware_latency_min; - } /*calculate the aggregate results of all enabled devices */ @@ -407,7 +404,6 @@ static int get_results(struct loopback_test *t) r->apbridge_unipro_latency_max - r->apbridge_unipro_latency_min; r->gbphy_firmware_latency_jitter = r->gbphy_firmware_latency_max - r->gbphy_firmware_latency_min; - } return 0; @@ -536,7 +532,6 @@ static int log_results(struct loopback_test *t) fprintf(stderr, "unable to open %s for appending\n", file_name); abort(); } - } for (i = 0; i < t->device_count; i++) { if (!device_enabled(t, i)) @@ -550,10 +545,8 @@ static int log_results(struct loopback_test *t) if (ret == -1) fprintf(stderr, "unable to write %d bytes to csv.\n", len); } - } - if (t->aggregate_output) { len = format_output(t, &t->aggregate_results, "aggregate", data, sizeof(data), &tm); @@ -741,7 +734,6 @@ static int wait_for_complete(struct loopback_test *t) ts = &t->poll_timeout; while (1) { - ret = ppoll(t->fds, t->poll_count, ts, &mask_old); if (ret <= 0) { stop_tests(t); @@ -781,7 +773,6 @@ static void prepare_devices(struct loopback_test *t) if (t->stop_all || device_enabled(t, i)) write_sysfs_val(t->devices[i].sysfs_entry, "type", 0); - for (i = 0; i < t->device_count; i++) { if (!device_enabled(t, i)) continue; @@ -824,7 +815,6 @@ static int start(struct loopback_test *t) return 0; } - void loopback_run(struct loopback_test *t) { int i; @@ -853,7 +843,6 @@ void loopback_run(struct loopback_test *t) if (ret) goto err; - get_results(t); log_results(t); @@ -881,10 +870,8 @@ static int sanity_check(struct loopback_test *t) fprintf(stderr, "Bad device mask %x\n", (1 << i)); return -1; } - } - return 0; } From 64a4ade699e70b2b700cf6088916827b73988d52 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:20 +0100 Subject: [PATCH 3369/4122] staging: r8188eu: replace one GetAddr3Ptr call Define a struct ieee80211_mgmt in the OnBeacon function. Use it to replace one GetAddr3Ptr call. This is a tiny step towards making GetAddr3Ptr obsolete. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 93696892ec7d..80342b53a129 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -560,6 +560,7 @@ static void OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame) static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; int cam_idx; struct sta_info *psta; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -576,7 +577,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) return; } - if (!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) { + if (!memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) { if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) { /* we should update current network before auth, or some IE is wrong */ pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC); From 84d55656bf4486eee77b783b88adebad0cf5d649 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:21 +0100 Subject: [PATCH 3370/4122] staging: r8188eu: read timestamp from ieee80211_mgmt Read the 64-bit timestamp from struct ieee80211_mgmt instead of parsing the beacon message ourselves. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 80342b53a129..66e3a90da3a3 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -389,21 +389,6 @@ static u32 p2p_listen_state_process(struct adapter *padapter, unsigned char *da) return _SUCCESS; } -static void update_TSF(struct mlme_ext_priv *pmlmeext, u8 *pframe) -{ - u8 *pIE; - __le32 *pbuf; - - pIE = pframe + sizeof(struct ieee80211_hdr_3addr); - pbuf = (__le32 *)pIE; - - pmlmeext->TSFValue = le32_to_cpu(*(pbuf + 1)); - - pmlmeext->TSFValue = pmlmeext->TSFValue << 32; - - pmlmeext->TSFValue |= le32_to_cpu(*pbuf); -} - static void correct_TSF(struct adapter *padapter) { u8 reg; @@ -592,8 +577,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) /* check the vendor of the assoc AP */ pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr)); - /* update TSF Value */ - update_TSF(pmlmeext, pframe); + pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); /* start auth */ start_clnt_auth(padapter); @@ -635,8 +619,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) return; } - /* update TSF Value */ - update_TSF(pmlmeext, pframe); + pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); /* report sta add event */ report_add_sta_event(padapter, GetAddr2Ptr(pframe), cam_idx); From 0e73b1276a51eeb649c9676e41c9e4208c8b40af Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:22 +0100 Subject: [PATCH 3371/4122] staging: r8188eu: replace GetAddr2Ptr calls Get the source address of a beacon frame from our struct ieee80211_mgmt instead of using the driver's internal GetAddr2Ptr helper. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 66e3a90da3a3..5a31b20dc46d 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -586,7 +586,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) } if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) { - psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe)); + psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (psta) { ret = rtw_check_bcn_info(padapter, pframe, len); if (!ret) { @@ -601,7 +601,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) process_p2p_ps_ie(padapter, (pframe + WLAN_HDR_A3_LEN), (len - WLAN_HDR_A3_LEN)); } } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { - psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe)); + psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (psta) { /* update WMM, ERP in the beacon */ /* todo: the timer is used instead of the number of the beacon received */ @@ -621,8 +621,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); - /* report sta add event */ - report_add_sta_event(padapter, GetAddr2Ptr(pframe), cam_idx); + report_add_sta_event(padapter, mgmt->sa, cam_idx); } } } From 1ed513f3e0ff13f2a84e3479b7774f213c7d7066 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:23 +0100 Subject: [PATCH 3372/4122] staging: r8188eu: pass only ies to process_p2p_ps_ie The process_p2p_ps_ie function parses the information elements of a beacon message and extracts p2p-related info. process_p2p_ps_ie does not receive a pointer to the information elements as one would expect. Instead it receives a pointer to the timestamp field in the beacon message. process_p2p_ps_ie increments this pointer by _BEACON_IE_OFFSET_ to jump to the start of the information elements (and decreases the buffer length accordingly). This is clumsy and hard to understand. Rewrite this such that process_p2p_ps_ie takes a pointer to the information elements and the total length of all elements. Check up-front that the total length is not negative. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 9 ++++++++- drivers/staging/r8188eu/core/rtw_p2p.c | 11 ++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 5a31b20dc46d..07c57a2b61b9 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -556,6 +556,13 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) uint len = precv_frame->len; struct wlan_bssid_ex *pbss; int ret = _SUCCESS; + u8 *ie_ptr; + u32 ie_len; + + ie_ptr = (u8 *)&mgmt->u.beacon.variable; + if (precv_frame->len < offsetof(struct ieee80211_mgmt, u.beacon.variable)) + return; + ie_len = precv_frame->len - offsetof(struct ieee80211_mgmt, u.beacon.variable); if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) { report_survey_event(padapter, precv_frame); @@ -598,7 +605,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) /* todo: the timer is used instead of the number of the beacon received */ if ((sta_rx_pkts(psta) & 0xf) == 0) update_beacon_info(padapter, pframe, len, psta); - process_p2p_ps_ie(padapter, (pframe + WLAN_HDR_A3_LEN), (len - WLAN_HDR_A3_LEN)); + process_p2p_ps_ie(padapter, ie_ptr, ie_len); } } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { psta = rtw_get_stainfo(pstapriv, mgmt->sa); diff --git a/drivers/staging/r8188eu/core/rtw_p2p.c b/drivers/staging/r8188eu/core/rtw_p2p.c index dc159e58f428..ce05458bd1ad 100644 --- a/drivers/staging/r8188eu/core/rtw_p2p.c +++ b/drivers/staging/r8188eu/core/rtw_p2p.c @@ -1505,8 +1505,6 @@ void p2p_protocol_wk_hdl(struct adapter *padapter, int intCmdType) void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength) { - u8 *ies; - u32 ies_len; u8 *p2p_ie; u32 p2p_ielen = 0; u8 noa_attr[MAX_P2P_IE_LEN] = { 0x00 };/* NoA length should be n*(13) + 2 */ @@ -1518,13 +1516,8 @@ void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength) if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_NONE)) return; - if (IELength <= _BEACON_IE_OFFSET_) - return; - ies = IEs + _BEACON_IE_OFFSET_; - ies_len = IELength - _BEACON_IE_OFFSET_; - - p2p_ie = rtw_get_p2p_ie(ies, ies_len, NULL, &p2p_ielen); + p2p_ie = rtw_get_p2p_ie(IEs, IELength, NULL, &p2p_ielen); while (p2p_ie) { find_p2p = true; @@ -1579,7 +1572,7 @@ void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength) } /* Get the next P2P IE */ - p2p_ie = rtw_get_p2p_ie(p2p_ie + p2p_ielen, ies_len - (p2p_ie - ies + p2p_ielen), NULL, &p2p_ielen); + p2p_ie = rtw_get_p2p_ie(p2p_ie + p2p_ielen, IELength - (p2p_ie - IEs + p2p_ielen), NULL, &p2p_ielen); } if (find_p2p) { From 018da8b60688caec763d75cc6fb022a165028cea Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:24 +0100 Subject: [PATCH 3373/4122] staging: r8188eu: use ie buffer in update_beacon_info The update_beacon_info function parses information elements of a beacon message. It should take the pointer to the information elements and their total length, not the entire beacon message. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 ++-- drivers/staging/r8188eu/core/rtw_wlan_util.c | 9 +++------ drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 +-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 07c57a2b61b9..38dc98cffbc4 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -604,7 +604,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) /* update WMM, ERP in the beacon */ /* todo: the timer is used instead of the number of the beacon received */ if ((sta_rx_pkts(psta) & 0xf) == 0) - update_beacon_info(padapter, pframe, len, psta); + update_beacon_info(padapter, ie_ptr, ie_len, psta); process_p2p_ps_ie(padapter, ie_ptr, ie_len); } } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { @@ -613,7 +613,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) /* update WMM, ERP in the beacon */ /* todo: the timer is used instead of the number of the beacon received */ if ((sta_rx_pkts(psta) & 0xf) == 0) - update_beacon_info(padapter, pframe, len, psta); + update_beacon_info(padapter, ie_ptr, ie_len, psta); } else { /* allocate a new CAM entry for IBSS station */ cam_idx = allocate_fw_sta_entry(padapter); diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c index 965bb7da4cce..da3465d6bb0f 100644 --- a/drivers/staging/r8188eu/core/rtw_wlan_util.c +++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c @@ -1035,16 +1035,13 @@ _mismatch: return _FAIL; } -void update_beacon_info(struct adapter *padapter, u8 *pframe, uint pkt_len, struct sta_info *psta) +void update_beacon_info(struct adapter *padapter, u8 *ie_ptr, uint ie_len, struct sta_info *psta) { unsigned int i; - unsigned int len; struct ndis_802_11_var_ie *pIE; - len = pkt_len - (_BEACON_IE_OFFSET_ + WLAN_HDR_A3_LEN); - - for (i = 0; i < len;) { - pIE = (struct ndis_802_11_var_ie *)(pframe + (_BEACON_IE_OFFSET_ + WLAN_HDR_A3_LEN) + i); + for (i = 0; i < ie_len;) { + pIE = (struct ndis_802_11_var_ie *)(ie_ptr + i); switch (pIE->ElementID) { case _HT_EXTRA_INFO_IE_: /* HT info */ diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 089bd5446773..6724424a334e 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -447,8 +447,7 @@ void HTOnAssocRsp(struct adapter *padapter); void ERP_IE_handler(struct adapter *padapter, struct ndis_802_11_var_ie *pIE); void VCS_update(struct adapter *padapter, struct sta_info *psta); -void update_beacon_info(struct adapter *padapter, u8 *pframe, uint len, - struct sta_info *psta); +void update_beacon_info(struct adapter *padapter, u8 *ie_ptr, uint ie_len, struct sta_info *psta); int rtw_check_bcn_info(struct adapter *Adapter, u8 *pframe, u32 packet_len); void update_IOT_info(struct adapter *padapter); void update_capinfo(struct adapter *adapter, u16 updatecap); From 02b51d08ec3a0f476c8b1537faa46e35e87e5ce0 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:25 +0100 Subject: [PATCH 3374/4122] staging: r8188eu: simplify update_sta_support_rate params The update_sta_support_rate function takes a pointer to information elements and the total length of all these elements. Use our variables for pointer and length instead of calculating them manually. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-7-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 38dc98cffbc4..f7d3ecf551bf 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -621,7 +621,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) return; /* get supported rate */ - if (update_sta_support_rate(padapter, (pframe + WLAN_HDR_A3_LEN + _BEACON_IE_OFFSET_), (len - WLAN_HDR_A3_LEN - _BEACON_IE_OFFSET_), cam_idx) == _FAIL) { + if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) { pmlmeinfo->FW_sta_info[cam_idx].status = 0; return; } From 32634359a9c3d70cbf01af0ae87b27d8c0e1ebe3 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:26 +0100 Subject: [PATCH 3375/4122] staging: r8188eu: exit if beacon is not from our bss Do not process an incoming beacon message in the OnBeacon function if the beacon was sent by a base station other than the one to which we're connected. This patch does not modify the behaviour of the code. It reverts the if condition and returns if the beacon should not be processed. This is simpler than wrapping the entire processing into a large if clause. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-8-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 115 ++++++++++---------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index f7d3ecf551bf..a15998d912a7 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -569,67 +569,68 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) return; } - if (!memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) { - if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) { - /* we should update current network before auth, or some IE is wrong */ - pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC); - if (pbss) { - if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) { - update_network(&pmlmepriv->cur_network.network, pbss, padapter, true); - rtw_get_bcn_info(&pmlmepriv->cur_network); - } - kfree(pbss); - } + if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) + return; - /* check the vendor of the assoc AP */ - pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr)); + if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) { + /* we should update current network before auth, or some IE is wrong */ + pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC); + if (pbss) { + if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) { + update_network(&pmlmepriv->cur_network.network, pbss, padapter, true); + rtw_get_bcn_info(&pmlmepriv->cur_network); + } + kfree(pbss); + } + + /* check the vendor of the assoc AP */ + pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr)); + + pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); + + /* start auth */ + start_clnt_auth(padapter); + + return; + } + + if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) { + psta = rtw_get_stainfo(pstapriv, mgmt->sa); + if (psta) { + ret = rtw_check_bcn_info(padapter, pframe, len); + if (!ret) { + receive_disconnect(padapter, + pmlmeinfo->network.MacAddress, 0); + return; + } + /* update WMM, ERP in the beacon */ + /* todo: the timer is used instead of the number of the beacon received */ + if ((sta_rx_pkts(psta) & 0xf) == 0) + update_beacon_info(padapter, ie_ptr, ie_len, psta); + process_p2p_ps_ie(padapter, ie_ptr, ie_len); + } + } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { + psta = rtw_get_stainfo(pstapriv, mgmt->sa); + if (psta) { + /* update WMM, ERP in the beacon */ + /* todo: the timer is used instead of the number of the beacon received */ + if ((sta_rx_pkts(psta) & 0xf) == 0) + update_beacon_info(padapter, ie_ptr, ie_len, psta); + } else { + /* allocate a new CAM entry for IBSS station */ + cam_idx = allocate_fw_sta_entry(padapter); + if (cam_idx == NUM_STA) + return; + + /* get supported rate */ + if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) { + pmlmeinfo->FW_sta_info[cam_idx].status = 0; + return; + } pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); - /* start auth */ - start_clnt_auth(padapter); - - return; - } - - if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) { - psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - ret = rtw_check_bcn_info(padapter, pframe, len); - if (!ret) { - receive_disconnect(padapter, - pmlmeinfo->network.MacAddress, 0); - return; - } - /* update WMM, ERP in the beacon */ - /* todo: the timer is used instead of the number of the beacon received */ - if ((sta_rx_pkts(psta) & 0xf) == 0) - update_beacon_info(padapter, ie_ptr, ie_len, psta); - process_p2p_ps_ie(padapter, ie_ptr, ie_len); - } - } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { - psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - /* update WMM, ERP in the beacon */ - /* todo: the timer is used instead of the number of the beacon received */ - if ((sta_rx_pkts(psta) & 0xf) == 0) - update_beacon_info(padapter, ie_ptr, ie_len, psta); - } else { - /* allocate a new CAM entry for IBSS station */ - cam_idx = allocate_fw_sta_entry(padapter); - if (cam_idx == NUM_STA) - return; - - /* get supported rate */ - if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) { - pmlmeinfo->FW_sta_info[cam_idx].status = 0; - return; - } - - pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp); - - report_add_sta_event(padapter, mgmt->sa, cam_idx); - } + report_add_sta_event(padapter, mgmt->sa, cam_idx); } } } From fa205589d5e9fc2d1b2f8d31f665152da04160bc Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:27 +0100 Subject: [PATCH 3376/4122] staging: r8188eu: stop beacon processing if kmalloc fails If we cannot allocate a struct wlan_bssid_ex in the OnBeacon function, we should stop processing the incoming beacon message and return. For kmalloc failures, the current code just skips the update of network and beacon info and tries to continue with the authentication. The update would set the encryption algorithm that should be used for the authentication. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-9-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index a15998d912a7..76424bcba416 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -575,13 +575,14 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) { /* we should update current network before auth, or some IE is wrong */ pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC); - if (pbss) { - if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) { - update_network(&pmlmepriv->cur_network.network, pbss, padapter, true); - rtw_get_bcn_info(&pmlmepriv->cur_network); - } - kfree(pbss); + if (!pbss) + return; + + if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) { + update_network(&pmlmepriv->cur_network.network, pbss, padapter, true); + rtw_get_bcn_info(&pmlmepriv->cur_network); } + kfree(pbss); /* check the vendor of the assoc AP */ pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr)); From dd99fe1f5a8319f0e236c0548cdba849451779d5 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:28 +0100 Subject: [PATCH 3377/4122] staging: r8188eu: simplify error handling for missing station Simplify the code to handle the case where we're associated to a station that is not in our list of known stations. We can simply exit in this case. This patch reverts the if-condition and saves one level of indentation. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-10-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 76424bcba416..362313c49c52 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -597,19 +597,19 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) { psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - ret = rtw_check_bcn_info(padapter, pframe, len); - if (!ret) { - receive_disconnect(padapter, - pmlmeinfo->network.MacAddress, 0); - return; - } - /* update WMM, ERP in the beacon */ - /* todo: the timer is used instead of the number of the beacon received */ - if ((sta_rx_pkts(psta) & 0xf) == 0) - update_beacon_info(padapter, ie_ptr, ie_len, psta); - process_p2p_ps_ie(padapter, ie_ptr, ie_len); + if (!psta) + return; + + ret = rtw_check_bcn_info(padapter, pframe, len); + if (!ret) { + receive_disconnect(padapter, pmlmeinfo->network.MacAddress, 0); + return; } + /* update WMM, ERP in the beacon */ + /* todo: the timer is used instead of the number of the beacon received */ + if ((sta_rx_pkts(psta) & 0xf) == 0) + update_beacon_info(padapter, ie_ptr, ie_len, psta); + process_p2p_ps_ie(padapter, ie_ptr, ie_len); } else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) { psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (psta) { From 17c1202a9e689dc875c864c1f1e8dded70c0a8a0 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sat, 26 Nov 2022 17:01:29 +0100 Subject: [PATCH 3378/4122] staging: r8188eu: remove a variable Check the result of rtw_check_bcn_info directly and remove the ret variable. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221126160129.178697-11-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 362313c49c52..d32b2d569e23 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -555,7 +555,6 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) u8 *pframe = precv_frame->rx_data; uint len = precv_frame->len; struct wlan_bssid_ex *pbss; - int ret = _SUCCESS; u8 *ie_ptr; u32 ie_len; @@ -600,8 +599,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame) if (!psta) return; - ret = rtw_check_bcn_info(padapter, pframe, len); - if (!ret) { + if (rtw_check_bcn_info(padapter, pframe, len) != _SUCCESS) { receive_disconnect(padapter, pmlmeinfo->network.MacAddress, 0); return; } From 49bd97c28b7e7f014a72821fd95fcf11e11599a4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:46 +0000 Subject: [PATCH 3379/4122] perf tools: Use dedicated non-atomic clear/set bit helpers Use the dedicated non-atomic helpers for {clear,set}_bit() and their test variants, i.e. the double-underscore versions. Depsite being defined in atomic.h, and despite the kernel versions being atomic in the kernel, tools' {clear,set}_bit() helpers aren't actually atomic. Move to the double-underscore versions so that the versions that are expected to be atomic (for kernel developers) can be made atomic without affecting users that don't want atomic operations. No functional change intended. Signed-off-by: Sean Christopherson Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: James Morse Cc: Jiri Olsa Cc: Marc Zyngier Cc: Mark Rutland Cc: Oliver Upton Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Yury Norov Cc: alexandru elisei Cc: kvm@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20221119013450.2643007-6-seanjc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/find-bit-bench.c | 2 +- tools/perf/builtin-c2c.c | 6 +++--- tools/perf/builtin-kwork.c | 6 +++--- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-sched.c | 2 +- tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/mem2node.c | 2 +- tools/perf/util/affinity.c | 4 ++-- tools/perf/util/header.c | 8 ++++---- tools/perf/util/mmap.c | 6 +++--- tools/perf/util/pmu.c | 2 +- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/svghelper.c | 2 +- 15 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index 22b5cfe97023..d103c3136983 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits) bitmap_zero(to_test, num_bits); skip = num_bits / set_bits; for (i = 0; i < num_bits; i += skip) - set_bit(i, to_test); + __set_bit(i, to_test); for (i = 0; i < outer_iterations; i++) { old = accumulator; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a9190458d2d5..52d94c7dd836 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, "WARNING: no sample cpu value")) return; - set_bit(sample->cpu, c2c_he->cpuset); + __set_bit(sample->cpu, c2c_he->cpuset); } static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, @@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, if (WARN_ONCE(node < 0, "WARNING: failed to find node\n")) return; - set_bit(node, c2c_he->nodeset); + __set_bit(node, c2c_he->nodeset); if (c2c_he->paddr != sample->phys_addr) { c2c_he->paddr_cnt++; @@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { - set_bit(cpu.cpu, set); + __set_bit(cpu.cpu, set); if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index 4ffbf5908070..0e02b8098644 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -222,7 +222,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork, list_add_tail(&page->list, &kwork->atom_page_list); found_atom: - set_bit(i, page->bitmap); + __set_bit(i, page->bitmap); atom->time = sample->time; atom->prev = NULL; atom->page_addr = page; @@ -235,8 +235,8 @@ static void atom_free(struct kwork_atom *atom) if (atom->prev != NULL) atom_free(atom->prev); - clear_bit(atom->bit_inpage, - ((struct kwork_atom_page *)atom->page_addr)->bitmap); + __clear_bit(atom->bit_inpage, + ((struct kwork_atom_page *)atom->page_addr)->bitmap); } static void atom_del(struct kwork_atom *atom) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bd462a3f2bbd..b7fd7ec586fb 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } return 0; @@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map pr_debug("nr_threads: %d\n", rec->nr_threads); for (t = 0; t < rec->nr_threads; t++) { - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); if (verbose) { pr_debug("thread_masks[%d]: ", t); mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f93737eef07b..86e18575c9be 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { + if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 4965dd666956..0173f5402a35 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, bm); + __set_bit(perf_cpu_map__cpu(map, i).cpu, bm); } if (map) diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 4c96829510c9..a0e88c496107 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) int i; perf_cpu_map__for_each_cpu(cpu, i, map) - set_bit(cpu.cpu, bm); + __set_bit(cpu.cpu, bm); } if (map) diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 4ee96b3c755b..38dc4524b7e8 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu) return; a->changed = true; - set_bit(cpu, a->sched_cpus); + __set_bit(cpu, a->sched_cpus); /* * We ignore errors because affinity is just an optimization. * This could happen for example with isolated CPUs or cpusets. * In this case the IPIs inside the kernel's perf API still work. */ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus); - clear_bit(cpu, a->sched_cpus); + __clear_bit(cpu, a->sched_cpus); } static void __affinity__cleanup(struct affinity *a) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 98dfaf84bd13..dc2ae397d400 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -79,12 +79,12 @@ struct perf_file_attr { void perf_header__set_feat(struct perf_header *header, int feat) { - set_bit(feat, header->adds_features); + __set_bit(feat, header->adds_features); } void perf_header__clear_feat(struct perf_header *header, int feat) { - clear_bit(feat, header->adds_features); + __clear_bit(feat, header->adds_features); } bool perf_header__has_feat(const struct perf_header *header, int feat) @@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) rewinddir(dir); for_each_memory(phys, dir) { - set_bit(phys, n->set); + __set_bit(phys, n->set); } closedir(dir); @@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header, if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { bitmap_zero(header->adds_features, HEADER_FEAT_BITS); - set_bit(HEADER_BUILD_ID, header->adds_features); + __set_bit(HEADER_BUILD_ID, header->adds_features); } } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a4dff881be39..49093b21ee2d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } - set_bit(node_index, node_mask); + __set_bit(node_index, node_mask); if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", data, data + mmap_len, node_index); @@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) for (idx = 0; idx < nr_cpus; idx++) { cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } } @@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu.cpu, map->affinity_mask.bits); + __set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e9a4f31926bf..8ff6462f051e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1533,7 +1533,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); for (b = from; b <= to; b++) - set_bit(b, bits); + __set_bit(b, bits); } void perf_pmu__del_formats(struct list_head *formats) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a5d945415bbc..5b602b6d4685 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, sprintf(handler, "%s::%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler, event->print_fmt.args); s = nsecs / NSEC_PER_SEC; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1cf65db8f861..d685a7399ee2 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -934,7 +934,7 @@ static void python_process_tracepoint(struct perf_sample *sample, sprintf(handler_name, "%s__%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler_name, event->print_fmt.args); handler = get_handler(handler_name); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0e1a3d6bacb9..1facd4616317 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2749,7 +2749,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, goto out_delete_map; } - set_bit(cpu.cpu, cpu_bitmap); + __set_bit(cpu.cpu, cpu_bitmap); } err = 0; diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1e0c731fc539..5c62d3118c41 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) break; } - set_bit(c.cpu, cpumask_bits(b)); + __set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); From 4de531ce41e5aab9d3c5884e8360faaec82fb62e Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 29 Nov 2022 21:51:48 +0100 Subject: [PATCH 3380/4122] staging: r8188eu: use ieee80211_mgmt to parse addresses Define a struct ieee80211_mgmt in the OnDisassoc function and use it to parse the addresses in the incoming disassoc message. This replaces some driver-specific functions for address parsing. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221129205152.128172-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index d32b2d569e23..b2b2cb57ed04 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1427,6 +1427,7 @@ static void OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame) static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; u16 reason; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; @@ -1434,8 +1435,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) u8 *pframe = precv_frame->rx_data; struct wifidirect_info *pwdinfo = &padapter->wdinfo; - /* check A3 */ - if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN))) + if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) return; if (pwdinfo->rx_invitereq_info.scan_op_ch_only) { @@ -1449,7 +1449,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) struct sta_info *psta; struct sta_priv *pstapriv = &padapter->stapriv; - psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe)); + psta = rtw_get_stainfo(pstapriv, mgmt->sa); if (psta) { u8 updated = 0; @@ -1466,7 +1466,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) return; } else { - receive_disconnect(padapter, GetAddr3Ptr(pframe), reason); + receive_disconnect(padapter, mgmt->bssid, reason); } pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } From e70cac829d63f24aac0ea858a5407d25cf55b85a Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 29 Nov 2022 21:51:49 +0100 Subject: [PATCH 3381/4122] staging: r8188eu: read reason code from ieee80211_mgmt Use the struct ieee80211_mgmt to read the reason code instead of parsing the message manually. Remove the pframe pointer, it's no longer used. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221129205152.128172-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index b2b2cb57ed04..fe58d4e3e260 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1432,7 +1432,6 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; - u8 *pframe = precv_frame->rx_data; struct wifidirect_info *pwdinfo = &padapter->wdinfo; if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) @@ -1443,7 +1442,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) _set_timer(&pwdinfo->reset_ch_sitesurvey, 10); } - reason = le16_to_cpu(*(__le16 *)(pframe + WLAN_HDR_A3_LEN)); + reason = le16_to_cpu(mgmt->u.disassoc.reason_code); if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { struct sta_info *psta; From 8f7f05934ea24676c5c259a66bd71fa35ee22ef3 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 29 Nov 2022 21:51:50 +0100 Subject: [PATCH 3382/4122] staging: r8188eu: move bBusyTraffic update Move the update of bBusyTraffic into the else branch. The if branch ends with a return statement, so bBusyTraffic will not be updated in this case. With this change in place, we can reorder the code and save some levels of indentation. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221129205152.128172-4-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index fe58d4e3e260..dd1e0b4fc5a0 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1466,8 +1466,8 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) return; } else { receive_disconnect(padapter, mgmt->bssid, reason); + pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } - pmlmepriv->LinkDetectInfo.bBusyTraffic = false; } static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame) From 17580de3e2b876247f2201b6486559ef1e6b7f58 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 29 Nov 2022 21:51:51 +0100 Subject: [PATCH 3383/4122] staging: r8188eu: handle the non-ap case first The OnDisassoc function hasn't got much to do if we're not working as an access point. Move this case out of the large if statement to simplify the code. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221129205152.128172-5-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 40 ++++++++++----------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index dd1e0b4fc5a0..d5c9eb52fb0e 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1433,6 +1433,8 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; struct wifidirect_info *pwdinfo = &padapter->wdinfo; + struct sta_info *psta; + struct sta_priv *pstapriv = &padapter->stapriv; if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) return; @@ -1444,29 +1446,25 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) reason = le16_to_cpu(mgmt->u.disassoc.reason_code); - if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) { - struct sta_info *psta; - struct sta_priv *pstapriv = &padapter->stapriv; - - psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - u8 updated = 0; - - spin_lock_bh(&pstapriv->asoc_list_lock); - if (!list_empty(&psta->asoc_list)) { - list_del_init(&psta->asoc_list); - pstapriv->asoc_list_cnt--; - updated = ap_free_sta(padapter, psta, false, reason); - } - spin_unlock_bh(&pstapriv->asoc_list_lock); - - associated_clients_update(padapter, updated); - } - - return; - } else { + if (!check_fwstate(pmlmepriv, WIFI_AP_STATE)) { receive_disconnect(padapter, mgmt->bssid, reason); pmlmepriv->LinkDetectInfo.bBusyTraffic = false; + return; + } + + psta = rtw_get_stainfo(pstapriv, mgmt->sa); + if (psta) { + u8 updated = 0; + + spin_lock_bh(&pstapriv->asoc_list_lock); + if (!list_empty(&psta->asoc_list)) { + list_del_init(&psta->asoc_list); + pstapriv->asoc_list_cnt--; + updated = ap_free_sta(padapter, psta, false, reason); + } + spin_unlock_bh(&pstapriv->asoc_list_lock); + + associated_clients_update(padapter, updated); } } From 30b28fd639abf51464f015f6bebd7506bc895be2 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 29 Nov 2022 21:51:52 +0100 Subject: [PATCH 3384/4122] staging: r8188eu: simplify err handling for unknown station If we receive a disassoc message from an unknown station, we can drop this message immediately. Reorder the code to make this clearer. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221129205152.128172-6-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index d5c9eb52fb0e..49e0b50b1243 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -1435,6 +1435,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) struct wifidirect_info *pwdinfo = &padapter->wdinfo; struct sta_info *psta; struct sta_priv *pstapriv = &padapter->stapriv; + u8 updated = 0; if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) return; @@ -1453,19 +1454,18 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame) } psta = rtw_get_stainfo(pstapriv, mgmt->sa); - if (psta) { - u8 updated = 0; + if (!psta) + return; - spin_lock_bh(&pstapriv->asoc_list_lock); - if (!list_empty(&psta->asoc_list)) { - list_del_init(&psta->asoc_list); - pstapriv->asoc_list_cnt--; - updated = ap_free_sta(padapter, psta, false, reason); - } - spin_unlock_bh(&pstapriv->asoc_list_lock); - - associated_clients_update(padapter, updated); + spin_lock_bh(&pstapriv->asoc_list_lock); + if (!list_empty(&psta->asoc_list)) { + list_del_init(&psta->asoc_list); + pstapriv->asoc_list_cnt--; + updated = ap_free_sta(padapter, psta, false, reason); } + spin_unlock_bh(&pstapriv->asoc_list_lock); + + associated_clients_update(padapter, updated); } static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame) From 20ed9fa4965875fdde5bfd65d838465e38d46b22 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 30 Nov 2022 16:51:58 +0000 Subject: [PATCH 3385/4122] perf branch: Fix interpretation of branch records Commit 93315e46b000fc80 ("perf/core: Add speculation info to branch entries") added a new field in between type and new_type. Perf has its own copy of this struct so update it to match the kernel side. This doesn't currently cause any issues because new_type is only used by the Arm BRBE driver which isn't merged yet. Committer notes: Is this really an ABI? How are we supposed to deal with old perf.data files with new tools and vice versa? :-\ Fixes: 93315e46b000fc80 ("perf/core: Add speculation info to branch entries") Reviewed-by: Anshuman Khandual Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sandipan Das Link: https://lore.kernel.org/r/20221130165158.517385-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index d6017c9b1872..3ed792db1125 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -22,9 +22,10 @@ struct branch_flags { u64 abort:1; u64 cycles:16; u64 type:4; + u64 spec:2; u64 new_type:4; u64 priv:3; - u64 reserved:33; + u64 reserved:31; }; }; }; From 7e8e5e879729b238cd17dcc292d86c1e6cc83ecc Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Mon, 28 Dec 2020 03:39:41 +0100 Subject: [PATCH 3386/4122] perf arm64: Fix mksyscalltbl, don't lose syscalls due to sort -nu When using "sort -nu", arm64 syscalls were lost. That is, the io_setup syscall (number 0) and all but one (typically ftruncate; 64) of the syscalls that are defined symbolically (like "#define __NR_ftruncate __NR3264_ftruncate") at the point where "sort" is applied. This creation-of-syscalls.c-scheme is, judging from comments, copy-pasted from powerpc, and worked there because at the time, its tools/arch/powerpc/include/uapi/asm/unistd.h had *literals*, like "#define __NR_ftruncate 93". With sort being numeric and the non-numeric key effectively evaluating to 0, the sort option "-u" means these "duplicates" are removed. There's no need to remove syscall lines with duplicate numbers for arm64 because there are none, so let's fix that by just losing the "-u". Having the table numerically sorted on syscall-number for the rest of the syscalls looks nice, so keep the "-n". Reviewed-by: Leo Yan Signed-off-by: Hans-Peter Nilsson Tested-by: Leo Yan Acked-by: Arnd Bergmann Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20201228023941.E0DE2203B5@pchp3.se.axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index 459469b7222c..a7ca48d1e37b 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -58,5 +58,5 @@ create_table() $gcc -E -dM -x c -I $incpath/include/uapi $input \ |sed -ne 's/^#define __NR_//p' \ - |sort -t' ' -k2 -nu \ + |sort -t' ' -k2 -n \ |create_table From 3f48ad1f243d07d6018a4af2762b6538febde689 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 2 Dec 2022 10:25:24 +0100 Subject: [PATCH 3387/4122] staging: r8188eu: merge two probereq_p2p functions _issue_probereq_p2p is called only by issue_probereq_p2p. Merge the two functions and remove the unused return value. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221202092525.403887-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 49e0b50b1243..cf7b39cfb8f4 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3183,9 +3183,8 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da) dump_mgntframe(padapter, pmgntframe); } -static int _issue_probereq_p2p(struct adapter *padapter, u8 *da) +inline void issue_probereq_p2p(struct adapter *padapter, u8 *da) { - int ret = _FAIL; struct xmit_frame *pmgntframe; struct pkt_attrib *pattrib; unsigned char *pframe; @@ -3201,7 +3200,7 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da) pmgntframe = alloc_mgtxmitframe(pxmitpriv); if (!pmgntframe) - goto exit; + return; /* update attribute */ pattrib = &pmgntframe->attrib; @@ -3457,15 +3456,6 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da) pattrib->last_txcmdsz = pattrib->pktlen; dump_mgntframe(padapter, pmgntframe); - ret = _SUCCESS; - -exit: - return ret; -} - -inline void issue_probereq_p2p(struct adapter *adapter, u8 *da) -{ - _issue_probereq_p2p(adapter, da); } static s32 rtw_action_public_decache(struct recv_frame *recv_frame, u8 token) From 3d0862367ab8af1144c0e5495b8bf0e2ce892afe Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 2 Dec 2022 10:25:25 +0100 Subject: [PATCH 3388/4122] staging: r8188eu: remove unused da parameter All callers of issue_probereq_p2p set the da parameter to NULL. Remove this parameters and the code that runs only for da != NULL. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221202092525.403887-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 28 ++++++++----------- drivers/staging/r8188eu/core/rtw_p2p.c | 8 +++--- .../staging/r8188eu/include/rtw_mlme_ext.h | 2 +- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index cf7b39cfb8f4..95a9470f4c99 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -3183,7 +3183,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da) dump_mgntframe(padapter, pmgntframe); } -inline void issue_probereq_p2p(struct adapter *padapter, u8 *da) +inline void issue_probereq_p2p(struct adapter *padapter) { struct xmit_frame *pmgntframe; struct pkt_attrib *pattrib; @@ -3216,20 +3216,16 @@ inline void issue_probereq_p2p(struct adapter *padapter, u8 *da) fctrl = &pwlanhdr->frame_control; *(fctrl) = 0; - if (da) { - memcpy(pwlanhdr->addr1, da, ETH_ALEN); - memcpy(pwlanhdr->addr3, da, ETH_ALEN); + if ((pwdinfo->p2p_info.scan_op_ch_only) || (pwdinfo->rx_invitereq_info.scan_op_ch_only)) { + /* This two flags will be set when this is only the P2P client mode. */ + memcpy(pwlanhdr->addr1, pwdinfo->p2p_peer_interface_addr, ETH_ALEN); + memcpy(pwlanhdr->addr3, pwdinfo->p2p_peer_interface_addr, ETH_ALEN); } else { - if ((pwdinfo->p2p_info.scan_op_ch_only) || (pwdinfo->rx_invitereq_info.scan_op_ch_only)) { - /* This two flags will be set when this is only the P2P client mode. */ - memcpy(pwlanhdr->addr1, pwdinfo->p2p_peer_interface_addr, ETH_ALEN); - memcpy(pwlanhdr->addr3, pwdinfo->p2p_peer_interface_addr, ETH_ALEN); - } else { - /* broadcast probe request frame */ - eth_broadcast_addr(pwlanhdr->addr1); - eth_broadcast_addr(pwlanhdr->addr3); - } + /* broadcast probe request frame */ + eth_broadcast_addr(pwlanhdr->addr1); + eth_broadcast_addr(pwlanhdr->addr3); } + memcpy(pwlanhdr->addr2, mac, ETH_ALEN); SetSeqNum(pwlanhdr, pmlmeext->mgnt_seq); @@ -5867,9 +5863,9 @@ void site_survey(struct adapter *padapter) if (ScanType == SCAN_ACTIVE) { /* obey the channel plan setting... */ if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_SCAN) || rtw_p2p_chk_state(pwdinfo, P2P_STATE_FIND_PHASE_SEARCH)) { - issue_probereq_p2p(padapter, NULL); - issue_probereq_p2p(padapter, NULL); - issue_probereq_p2p(padapter, NULL); + issue_probereq_p2p(padapter); + issue_probereq_p2p(padapter); + issue_probereq_p2p(padapter); } else { int i; for (i = 0; i < RTW_SSID_SCAN_AMOUNT; i++) { diff --git a/drivers/staging/r8188eu/core/rtw_p2p.c b/drivers/staging/r8188eu/core/rtw_p2p.c index ce05458bd1ad..93d3c9c4399c 100644 --- a/drivers/staging/r8188eu/core/rtw_p2p.c +++ b/drivers/staging/r8188eu/core/rtw_p2p.c @@ -1453,7 +1453,7 @@ static void pre_tx_invitereq_handler(struct adapter *padapter) set_channel_bwmode(padapter, pwdinfo->invitereq_info.peer_ch, HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20); rtw_mlme_under_site_survey(padapter); - issue_probereq_p2p(padapter, NULL); + issue_probereq_p2p(padapter); _set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT); } @@ -1464,7 +1464,7 @@ static void pre_tx_provdisc_handler(struct adapter *padapter) set_channel_bwmode(padapter, pwdinfo->tx_prov_disc_info.peer_channel_num[0], HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20); rtw_mlme_under_site_survey(padapter); - issue_probereq_p2p(padapter, NULL); + issue_probereq_p2p(padapter); _set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT); } @@ -1475,7 +1475,7 @@ static void pre_tx_negoreq_handler(struct adapter *padapter) set_channel_bwmode(padapter, pwdinfo->nego_req_info.peer_channel_num[0], HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20); rtw_mlme_under_site_survey(padapter); - issue_probereq_p2p(padapter, NULL); + issue_probereq_p2p(padapter); _set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT); } @@ -1725,7 +1725,7 @@ static void pre_tx_scan_timer_process(struct timer_list *t) if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_TX_PROVISION_DIS_REQ)) { if (pwdinfo->tx_prov_disc_info.benable) { /* the provision discovery request frame is trigger to send or not */ p2p_protocol_wk_cmd(adapter, P2P_PRE_TX_PROVDISC_PROCESS_WK); - /* issue_probereq_p2p(adapter, NULL); */ + /* issue_probereq_p2p(adapter); */ /* _set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT); */ } } else if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_GONEGO_ING)) { diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h index 6724424a334e..589de7c54d93 100644 --- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h +++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h @@ -497,7 +497,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da); void issue_p2p_provision_request(struct adapter *padapter, u8 *pssid, u8 ussidlen, u8 *pdev_raddr); void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr); -void issue_probereq_p2p(struct adapter *padapter, u8 *da); +void issue_probereq_p2p(struct adapter *padapter); void issue_p2p_invitation_response(struct adapter *padapter, u8 *raddr, u8 dialogToken, u8 success); void issue_p2p_invitation_request(struct adapter *padapter, u8 *raddr); From eb0b3f501e4dd5e6668bddbbb3c20ecd1545b60d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Nov 2022 11:36:13 -0800 Subject: [PATCH 3389/4122] Revert "perf stat: Rename "aggregate-number" to "cpu-count" in JSON" This reverts commit c4b41b83c25073c09bfcc4e5ec496c9dd316656b. As Ian said, the "cpu-count" is not appropriate for uncore events, also it caused a perf test failure. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221130193613.1046804-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 847acdb5dc40..f1ee4b052198 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", id.socket, id.die, id.core, nr); break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", id.socket, id.die, nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", id.socket, nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", id.node, nr); break; case AGGR_NONE: From 506783ffa96f953eea94d6ff99c4e908965fc383 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 2 Dec 2022 10:31:58 +0100 Subject: [PATCH 3390/4122] staging: r8188eu: use subtype helpers in collect_bss_info Use the iee80211 helper functions to check the frame subtype in collect_bss_info. Replace the call to the driver-specific GetFrameSubType function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221202093159.404111-2-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_mlme_ext.c | 26 +++++++++------------ 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c index 95a9470f4c99..1b9cf7596a76 100644 --- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c +++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c @@ -5963,10 +5963,11 @@ void site_survey(struct adapter *padapter) /* collect bss info from Beacon and Probe request/response frames. */ u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, struct wlan_bssid_ex *bssid) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data; int i; u32 len; u8 *p; - u16 val16, subtype; + u16 val16; u8 *pframe = precv_frame->rx_data; u32 packet_len = precv_frame->len; u8 ie_offset; @@ -5982,23 +5983,18 @@ u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, st memset(bssid, 0, sizeof(struct wlan_bssid_ex)); - subtype = GetFrameSubType(pframe); - - if (subtype == WIFI_BEACON) { + if (ieee80211_is_beacon(mgmt->frame_control)) { bssid->Reserved[0] = 1; ie_offset = _BEACON_IE_OFFSET_; + } else if (ieee80211_is_probe_req(mgmt->frame_control)) { + ie_offset = _PROBEREQ_IE_OFFSET_; + bssid->Reserved[0] = 2; + } else if (ieee80211_is_probe_resp(mgmt->frame_control)) { + ie_offset = _PROBERSP_IE_OFFSET_; + bssid->Reserved[0] = 3; } else { - /* FIXME : more type */ - if (subtype == WIFI_PROBEREQ) { - ie_offset = _PROBEREQ_IE_OFFSET_; - bssid->Reserved[0] = 2; - } else if (subtype == WIFI_PROBERSP) { - ie_offset = _PROBERSP_IE_OFFSET_; - bssid->Reserved[0] = 3; - } else { - bssid->Reserved[0] = 0; - ie_offset = _FIXED_IE_LENGTH_; - } + bssid->Reserved[0] = 0; + ie_offset = _FIXED_IE_LENGTH_; } bssid->Length = sizeof(struct wlan_bssid_ex) - MAX_IE_SZ + len; From 7868f8f858968066b23287c9733e777a8407384c Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 2 Dec 2022 10:31:59 +0100 Subject: [PATCH 3391/4122] staging: r8188eu: use subtype helper in rtw_check_bcn_info Use ieee80211_is_beacon to check the frame subtype in rtw_check_bcn_info. Replace the call to the driver-specific GetFrameSubType function. Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Martin Kaiser Link: https://lore.kernel.org/r/20221202093159.404111-3-martin@kaiser.cx Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_wlan_util.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c index da3465d6bb0f..f1ebb5358cb9 100644 --- a/drivers/staging/r8188eu/core/rtw_wlan_util.c +++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c @@ -874,9 +874,10 @@ void VCS_update(struct adapter *padapter, struct sta_info *psta) int rtw_check_bcn_info(struct adapter *Adapter, u8 *pframe, u32 packet_len) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)pframe; unsigned int len; unsigned char *p; - unsigned short val16, subtype; + unsigned short val16; struct wlan_network *cur_network = &Adapter->mlmepriv.cur_network; /* u8 wpa_ie[255], rsn_ie[255]; */ u16 wpa_len = 0, rsn_len = 0; @@ -908,9 +909,7 @@ int rtw_check_bcn_info(struct adapter *Adapter, u8 *pframe, u32 packet_len) if (!bssid) return _FAIL; - subtype = GetFrameSubType(pframe) >> 4; - - if (subtype == WIFI_BEACON) + if (ieee80211_is_beacon(mgmt->frame_control)) bssid->Reserved[0] = 1; bssid->Length = sizeof(struct wlan_bssid_ex) - MAX_IE_SZ + len; From bf44039f1ee828c47e1eb83eb315f0fed5a66126 Mon Sep 17 00:00:00 2001 From: Matt Jan Date: Sun, 4 Dec 2022 18:19:16 +0800 Subject: [PATCH 3392/4122] staging: vme_user: remove multiple blank lines Remove multiple unnecessary blank lines in accordance with the Linux kernel coding-style regulations. The issues were reported by the checkpatch script. Signed-off-by: Matt Jan Link: https://lore.kernel.org/r/20221204101916.GA51992@zoo868e Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme.h | 2 -- drivers/staging/vme_user/vme_fake.c | 5 ----- drivers/staging/vme_user/vme_tsi148.c | 6 ------ drivers/staging/vme_user/vme_tsi148.h | 7 ------- 4 files changed, 20 deletions(-) diff --git a/drivers/staging/vme_user/vme.h b/drivers/staging/vme_user/vme.h index b204a9b4be1b..98da8d039d60 100644 --- a/drivers/staging/vme_user/vme.h +++ b/drivers/staging/vme_user/vme.h @@ -27,7 +27,6 @@ enum vme_resource_type { #define VME_A64_MAX 0x10000000000000000ULL #define VME_CRCSR_MAX 0x1000000ULL - /* VME Cycle Types */ #define VME_SCT 0x1 #define VME_BLT 0x2 @@ -185,6 +184,5 @@ int vme_bus_num(struct vme_dev *); int vme_register_driver(struct vme_driver *, unsigned int); void vme_unregister_driver(struct vme_driver *); - #endif /* _VME_H_ */ diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index dd646b0c531d..a9d3a7f5c440 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -356,7 +356,6 @@ static int __fake_master_get(struct vme_master_resource *image, int *enabled, return 0; } - static int fake_master_get(struct vme_master_resource *image, int *enabled, unsigned long long *vme_base, unsigned long long *size, u32 *aspace, u32 *cycle, u32 *dwidth) @@ -373,7 +372,6 @@ static int fake_master_get(struct vme_master_resource *image, int *enabled, return retval; } - static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, u32 aspace, u32 cycle) { @@ -1060,7 +1058,6 @@ static void fake_crcsr_exit(struct vme_bridge *fake_bridge) kfree(bridge->crcsr_kernel); } - static int __init fake_init(void) { int retval, i; @@ -1238,7 +1235,6 @@ err_struct: } - static void __exit fake_exit(void) { struct list_head *pos = NULL; @@ -1294,7 +1290,6 @@ static void __exit fake_exit(void) root_device_unregister(vme_root); } - MODULE_PARM_DESC(geoid, "Set geographical addressing"); module_param(geoid, int, 0); diff --git a/drivers/staging/vme_user/vme_tsi148.c b/drivers/staging/vme_user/vme_tsi148.c index 0171f46d1848..482049cfc664 100644 --- a/drivers/staging/vme_user/vme_tsi148.c +++ b/drivers/staging/vme_user/vme_tsi148.c @@ -34,7 +34,6 @@ static int tsi148_probe(struct pci_dev *, const struct pci_device_id *); static void tsi148_remove(struct pci_dev *); - /* Module parameter */ static bool err_chk; static int geoid; @@ -673,7 +672,6 @@ static int tsi148_slave_get(struct vme_slave_resource *image, int *enabled, /* Need granularity before we set the size */ *size = (unsigned long long)((vme_bound - *vme_base) + granularity); - if ((ctl & TSI148_LCSR_ITAT_2eSSTM_M) == TSI148_LCSR_ITAT_2eSSTM_160) *cycle |= VME_2eSST160; if ((ctl & TSI148_LCSR_ITAT_2eSSTM_M) == TSI148_LCSR_ITAT_2eSSTM_267) @@ -1142,7 +1140,6 @@ static int __tsi148_master_get(struct vme_master_resource *image, int *enabled, return 0; } - static int tsi148_master_get(struct vme_master_resource *image, int *enabled, unsigned long long *vme_base, unsigned long long *size, u32 *aspace, u32 *cycle, u32 *dwidth) @@ -1244,7 +1241,6 @@ out: return retval; } - static ssize_t tsi148_master_write(struct vme_master_resource *image, void *buf, size_t count, loff_t offset) { @@ -2000,7 +1996,6 @@ static int tsi148_lm_get(struct vme_lm_resource *lm, if ((lm_ctl & TSI148_LCSR_LMAT_AS_M) == TSI148_LCSR_LMAT_AS_A64) *aspace |= VME_A64; - if (lm_ctl & TSI148_LCSR_LMAT_SUPR) *cycle |= VME_SUPER; if (lm_ctl & TSI148_LCSR_LMAT_NPRIV) @@ -2551,7 +2546,6 @@ static void tsi148_remove(struct pci_dev *pdev) bridge = tsi148_bridge->driver_priv; - dev_dbg(&pdev->dev, "Driver is being unloaded.\n"); /* diff --git a/drivers/staging/vme_user/vme_tsi148.h b/drivers/staging/vme_user/vme_tsi148.h index 226fedc6f167..b3cb4a089cc8 100644 --- a/drivers/staging/vme_user/vme_tsi148.h +++ b/drivers/staging/vme_user/vme_tsi148.h @@ -87,7 +87,6 @@ struct tsi148_dma_entry { * Control and Status Registers */ - /* * Command/Status Registers (CRG + $004) */ @@ -342,7 +341,6 @@ static const int TSI148_LCSR_VIACK[8] = { 0, TSI148_LCSR_VIACK1, #define TSI148_LCSR_IT7_ITOFL 0x3F4 #define TSI148_LCSR_IT7_ITAT 0x3F8 - #define TSI148_LCSR_IT0 0x300 #define TSI148_LCSR_IT1 0x320 #define TSI148_LCSR_IT2 0x340 @@ -464,7 +462,6 @@ static const int TSI148_LCSR_IT[8] = { TSI148_LCSR_IT0, TSI148_LCSR_IT1, #define TSI148_LCSR_DMA0 0x500 #define TSI148_LCSR_DMA1 0x580 - static const int TSI148_LCSR_DMA[TSI148_MAX_DMA] = { TSI148_LCSR_DMA0, TSI148_LCSR_DMA1 }; @@ -532,9 +529,6 @@ static const int TSI148_GCSR_MBOX[4] = { TSI148_GCSR_MBOX0, #define TSI148_CSRBSR 0xFF8 #define TSI148_CBAR 0xFFC - - - /* * TSI148 Register Bit Definitions */ @@ -828,7 +822,6 @@ static const int TSI148_GCSR_MBOX[4] = { TSI148_GCSR_MBOX0, #define TSI148_LCSR_VEAT_AM_M (0x3F<<8) /* Address Mode Mask */ #define TSI148_LCSR_VEAT_XAM_M (0xFF<<0) /* Master AMode Mask */ - /* * VMEbus PCI Error Diagnostics PCI/X Attributes Register CRG + $280 */ From 7bef797d707f1744f71156b21d41e3b8c946631f Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Mon, 5 Dec 2022 16:48:05 +0800 Subject: [PATCH 3393/4122] vme: Fix error not catched in fake_init() In fake_init(), __root_device_register() is possible to fail but it's ignored, which can cause unregistering vme_root fail when exit. general protection fault, probably for non-canonical address 0xdffffc000000008c KASAN: null-ptr-deref in range [0x0000000000000460-0x0000000000000467] RIP: 0010:root_device_unregister+0x26/0x60 Call Trace: __x64_sys_delete_module+0x34f/0x540 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Return error when __root_device_register() fails. Fixes: 658bcdae9c67 ("vme: Adding Fake VME driver") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221205084805.147436-1-chenzhongjin@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme_fake.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index a9d3a7f5c440..6454ccf6e5b4 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -1070,6 +1070,8 @@ static int __init fake_init(void) /* We need a fake parent device */ vme_root = __root_device_register("vme", THIS_MODULE); + if (IS_ERR(vme_root)) + return PTR_ERR(vme_root); /* If we want to support more than one bridge at some point, we need to * dynamically allocate this so we get one per device. From f794eec86c7cd9a340a66109e6f32f8659ff30fa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Sep 2022 15:44:58 -0300 Subject: [PATCH 3394/4122] vfio: Simplify vfio_create_group() The vfio.group_lock is now only used to serialize vfio_group creation and destruction, we don't need a micro-optimization of searching, unlocking, then allocating and searching again. Just hold the lock the whole time. Grabbed from: https://lore.kernel.org/kvm/20220922152338.2a2238fe.alex.williamson@redhat.com/ Link: https://lore.kernel.org/r/20221201145535.589687-2-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Jason Gunthorpe Signed-off-by: Alex Williamson Signed-off-by: Yi Liu --- drivers/vfio/vfio_main.c | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 6d51b700764e..f913d862a386 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -143,10 +143,12 @@ EXPORT_SYMBOL_GPL(vfio_device_set_open_count); * Group objects - create, release, get, put, search */ static struct vfio_group * -__vfio_group_get_from_iommu(struct iommu_group *iommu_group) +vfio_group_get_from_iommu(struct iommu_group *iommu_group) { struct vfio_group *group; + lockdep_assert_held(&vfio.group_lock); + /* * group->iommu_group from the vfio.group_list cannot be NULL * under the vfio.group_lock. @@ -160,17 +162,6 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group) return NULL; } -static struct vfio_group * -vfio_group_get_from_iommu(struct iommu_group *iommu_group) -{ - struct vfio_group *group; - - mutex_lock(&vfio.group_lock); - group = __vfio_group_get_from_iommu(iommu_group); - mutex_unlock(&vfio.group_lock); - return group; -} - static void vfio_group_release(struct device *dev) { struct vfio_group *group = container_of(dev, struct vfio_group, dev); @@ -225,6 +216,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, struct vfio_group *ret; int err; + lockdep_assert_held(&vfio.group_lock); + group = vfio_group_alloc(iommu_group, type); if (IS_ERR(group)) return group; @@ -237,26 +230,16 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, goto err_put; } - mutex_lock(&vfio.group_lock); - - /* Did we race creating this group? */ - ret = __vfio_group_get_from_iommu(iommu_group); - if (ret) - goto err_unlock; - err = cdev_device_add(&group->cdev, &group->dev); if (err) { ret = ERR_PTR(err); - goto err_unlock; + goto err_put; } list_add(&group->vfio_next, &vfio.group_list); - mutex_unlock(&vfio.group_lock); return group; -err_unlock: - mutex_unlock(&vfio.group_lock); err_put: put_device(&group->dev); return ret; @@ -467,7 +450,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, if (ret) goto out_put_group; + mutex_lock(&vfio.group_lock); group = vfio_create_group(iommu_group, type); + mutex_unlock(&vfio.group_lock); if (IS_ERR(group)) { ret = PTR_ERR(group); goto out_remove_device; @@ -516,9 +501,11 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) return ERR_PTR(-EINVAL); } + mutex_lock(&vfio.group_lock); group = vfio_group_get_from_iommu(iommu_group); if (!group) group = vfio_create_group(iommu_group, VFIO_IOMMU); + mutex_unlock(&vfio.group_lock); /* The vfio_group holds a reference to the iommu_group */ iommu_group_put(iommu_group); From dcb93d0364a238315dd71f834b199d4b95ae09eb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Sep 2022 15:44:59 -0300 Subject: [PATCH 3395/4122] vfio: Move the sanity check of the group to vfio_create_group() This avoids opening group specific code in __vfio_register_dev() for the sanity check if an (existing) group is not corrupted by having two copies of the same struct device in it. It also simplifies the error unwind for this sanity check since the failure can be detected in the group allocation. This also prepares for moving the group specific code into separate group.c. Grabbed from: https://lore.kernel.org/kvm/20220922152338.2a2238fe.alex.williamson@redhat.com/ Link: https://lore.kernel.org/r/20221201145535.589687-3-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Jason Gunthorpe Signed-off-by: Alex Williamson Signed-off-by: Yi Liu --- drivers/vfio/vfio_main.c | 62 ++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index f913d862a386..87d9a1670a2f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(vfio_device_set_open_count); * Group objects - create, release, get, put, search */ static struct vfio_group * -vfio_group_get_from_iommu(struct iommu_group *iommu_group) +vfio_group_find_from_iommu(struct iommu_group *iommu_group) { struct vfio_group *group; @@ -154,10 +154,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group) * under the vfio.group_lock. */ list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group->iommu_group == iommu_group) { - refcount_inc(&group->drivers); + if (group->iommu_group == iommu_group) return group; - } } return NULL; } @@ -307,23 +305,6 @@ static bool vfio_device_try_get_registration(struct vfio_device *device) return refcount_inc_not_zero(&device->refcount); } -static struct vfio_device *vfio_group_get_device(struct vfio_group *group, - struct device *dev) -{ - struct vfio_device *device; - - mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (device->dev == dev && - vfio_device_try_get_registration(device)) { - mutex_unlock(&group->device_lock); - return device; - } - } - mutex_unlock(&group->device_lock); - return NULL; -} - /* * VFIO driver API */ @@ -467,6 +448,21 @@ out_put_group: return ERR_PTR(ret); } +static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) +{ + struct vfio_device *device; + + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (device->dev == dev) { + mutex_unlock(&group->device_lock); + return true; + } + } + mutex_unlock(&group->device_lock); + return false; +} + static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) { struct iommu_group *iommu_group; @@ -502,9 +498,15 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) } mutex_lock(&vfio.group_lock); - group = vfio_group_get_from_iommu(iommu_group); - if (!group) + group = vfio_group_find_from_iommu(iommu_group); + if (group) { + if (WARN_ON(vfio_group_has_device(group, dev))) + group = ERR_PTR(-EINVAL); + else + refcount_inc(&group->drivers); + } else { group = vfio_create_group(iommu_group, VFIO_IOMMU); + } mutex_unlock(&vfio.group_lock); /* The vfio_group holds a reference to the iommu_group */ @@ -515,7 +517,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) static int __vfio_register_dev(struct vfio_device *device, struct vfio_group *group) { - struct vfio_device *existing_device; int ret; /* @@ -537,19 +538,6 @@ static int __vfio_register_dev(struct vfio_device *device, if (!device->dev_set) vfio_assign_device_set(device, device); - existing_device = vfio_group_get_device(group, device->dev); - if (existing_device) { - /* - * group->iommu_group is non-NULL because we hold the drivers - * refcount. - */ - dev_WARN(device->dev, "Device already exists on group %d\n", - iommu_group_id(group->iommu_group)); - vfio_device_put_registration(existing_device); - ret = -EBUSY; - goto err_out; - } - /* Our reference on group is moved to the device */ device->group = group; From 32e0922821f2115eb8940b6a6b942ba61eff15c2 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 23 Sep 2022 02:19:34 -0700 Subject: [PATCH 3396/4122] vfio: Create wrappers for group register/unregister This avoids decoding group fields in the common functions used by vfio_device registration, and prepares for further moving the vfio group specific code into separate file. Link: https://lore.kernel.org/r/20221201145535.589687-4-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 87d9a1670a2f..a5122fa4bf4d 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -514,6 +514,20 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) return group; } +static void vfio_device_group_register(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_add(&device->group_next, &device->group->device_list); + mutex_unlock(&device->group->device_lock); +} + +static void vfio_device_group_unregister(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_del(&device->group_next); + mutex_unlock(&device->group->device_lock); +} + static int __vfio_register_dev(struct vfio_device *device, struct vfio_group *group) { @@ -552,9 +566,7 @@ static int __vfio_register_dev(struct vfio_device *device, /* Refcounting can't start until the driver calls register */ refcount_set(&device->refcount, 1); - mutex_lock(&group->device_lock); - list_add(&device->group_next, &group->device_list); - mutex_unlock(&group->device_lock); + vfio_device_group_register(device); return 0; err_out: @@ -614,7 +626,6 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, * removed. Open file descriptors for the device... */ void vfio_unregister_group_dev(struct vfio_device *device) { - struct vfio_group *group = device->group; unsigned int i = 0; bool interrupted = false; long rc; @@ -642,9 +653,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } - mutex_lock(&group->device_lock); - list_del(&device->group_next); - mutex_unlock(&group->device_lock); + vfio_device_group_unregister(device); /* Balances device_add in register path */ device_del(&device->device); From 49ea02d390a34a538a3f54b9ce5665e474690bc7 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 24 Nov 2022 21:22:27 -0800 Subject: [PATCH 3397/4122] vfio: Set device->group in helper function This avoids referencing device->group in __vfio_register_dev(). Link: https://lore.kernel.org/r/20221201145535.589687-5-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 41 +++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index a5122fa4bf4d..7e42ee0ee1bc 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -528,18 +528,29 @@ static void vfio_device_group_unregister(struct vfio_device *device) mutex_unlock(&device->group->device_lock); } -static int __vfio_register_dev(struct vfio_device *device, - struct vfio_group *group) +static int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type) { - int ret; + struct vfio_group *group; + + if (type == VFIO_IOMMU) + group = vfio_group_find_or_alloc(device->dev); + else + group = vfio_noiommu_group_alloc(device->dev, type); - /* - * In all cases group is the output of one of the group allocation - * functions and we have group->drivers incremented for us. - */ if (IS_ERR(group)) return PTR_ERR(group); + /* Our reference on group is moved to the device */ + device->group = group; + return 0; +} + +static int __vfio_register_dev(struct vfio_device *device, + enum vfio_group_type type) +{ + int ret; + if (WARN_ON(device->ops->bind_iommufd && (!device->ops->unbind_iommufd || !device->ops->attach_ioas))) @@ -552,12 +563,13 @@ static int __vfio_register_dev(struct vfio_device *device, if (!device->dev_set) vfio_assign_device_set(device, device); - /* Our reference on group is moved to the device */ - device->group = group; - ret = dev_set_name(&device->device, "vfio%d", device->index); if (ret) - goto err_out; + return ret; + + ret = vfio_device_set_group(device, type); + if (ret) + return ret; ret = device_add(&device->device); if (ret) @@ -576,8 +588,7 @@ err_out: int vfio_register_group_dev(struct vfio_device *device) { - return __vfio_register_dev(device, - vfio_group_find_or_alloc(device->dev)); + return __vfio_register_dev(device, VFIO_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_group_dev); @@ -587,8 +598,7 @@ EXPORT_SYMBOL_GPL(vfio_register_group_dev); */ int vfio_register_emulated_iommu_dev(struct vfio_device *device) { - return __vfio_register_dev(device, - vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); + return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); @@ -658,6 +668,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) /* Balances device_add in register path */ device_del(&device->device); + /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); From 07b465863325faceb865871ff5f22c1ebba6df54 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 10 Nov 2022 07:26:09 -0800 Subject: [PATCH 3398/4122] vfio: Swap order of vfio_device_container_register() and open_device() This makes the DMA unmap callback registration to container be consistent across the vfio iommufd compat mode and the legacy container mode. In the vfio iommufd compat mode, this registration is done in the vfio_iommufd_bind() when creating access which has an unmap callback. This is prior to calling the open_device() op. The existing mdev drivers have been converted to be OK with this order. So it is ok to swap the order of vfio_device_container_register() and open_device() for legacy mode. This also prepares for further moving group specific code into separate source file. Link: https://lore.kernel.org/r/20221201145535.589687-6-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 7e42ee0ee1bc..5dddf962f650 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -807,6 +807,7 @@ static int vfio_device_first_open(struct vfio_device *device) ret = vfio_group_use_container(device->group); if (ret) goto err_module_put; + vfio_device_container_register(device); } else if (device->group->iommufd) { ret = vfio_iommufd_bind(device, device->group->iommufd); if (ret) @@ -819,17 +820,17 @@ static int vfio_device_first_open(struct vfio_device *device) if (ret) goto err_container; } - if (device->group->container) - vfio_device_container_register(device); mutex_unlock(&device->group->group_lock); return 0; err_container: device->kvm = NULL; - if (device->group->container) + if (device->group->container) { + vfio_device_container_unregister(device); vfio_group_unuse_container(device->group); - else if (device->group->iommufd) + } else if (device->group->iommufd) { vfio_iommufd_unbind(device); + } err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -841,15 +842,15 @@ static void vfio_device_last_close(struct vfio_device *device) lockdep_assert_held(&device->dev_set->lock); mutex_lock(&device->group->group_lock); - if (device->group->container) - vfio_device_container_unregister(device); if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; - if (device->group->container) + if (device->group->container) { + vfio_device_container_unregister(device); vfio_group_unuse_container(device->group); - else if (device->group->iommufd) + } else if (device->group->iommufd) { vfio_iommufd_unbind(device); + } mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } From 5cfff0774353ee35601e3d3fe2f0bd95c33aa5db Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 30 Sep 2022 03:22:55 -0700 Subject: [PATCH 3399/4122] vfio: Make vfio_device_open() truly device specific Then move group related logic into vfio_device_open_file(). Accordingly introduce a vfio_device_close() to pair up. Link: https://lore.kernel.org/r/20221201145535.589687-7-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 46 +++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 5dddf962f650..37413ac254c0 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -855,20 +855,41 @@ static void vfio_device_last_close(struct vfio_device *device) module_put(device->dev->driver->owner); } -static struct file *vfio_device_open(struct vfio_device *device) +static int vfio_device_open(struct vfio_device *device) { - struct file *filep; - int ret; + int ret = 0; mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { ret = vfio_device_first_open(device); if (ret) - goto err_unlock; + device->open_count--; } mutex_unlock(&device->dev_set->lock); + return ret; +} + +static void vfio_device_close(struct vfio_device *device) +{ + mutex_lock(&device->dev_set->lock); + vfio_assert_device_open(device); + if (device->open_count == 1) + vfio_device_last_close(device); + device->open_count--; + mutex_unlock(&device->dev_set->lock); +} + +static struct file *vfio_device_open_file(struct vfio_device *device) +{ + struct file *filep; + int ret; + + ret = vfio_device_open(device); + if (ret) + goto err_out; + /* * We can't use anon_inode_getfd() because we need to modify * the f_mode flags directly to allow more than just ioctls @@ -897,12 +918,8 @@ static struct file *vfio_device_open(struct vfio_device *device) return filep; err_close_device: - mutex_lock(&device->dev_set->lock); - if (device->open_count == 1) - vfio_device_last_close(device); -err_unlock: - device->open_count--; - mutex_unlock(&device->dev_set->lock); + vfio_device_close(device); +err_out: return ERR_PTR(ret); } @@ -930,7 +947,7 @@ static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, goto err_put_device; } - filep = vfio_device_open(device); + filep = vfio_device_open_file(device); if (IS_ERR(filep)) { ret = PTR_ERR(filep); goto err_put_fdno; @@ -1113,12 +1130,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - mutex_lock(&device->dev_set->lock); - vfio_assert_device_open(device); - if (device->open_count == 1) - vfio_device_last_close(device); - device->open_count--; - mutex_unlock(&device->dev_set->lock); + vfio_device_close(device); vfio_device_put_registration(device); From 5c8d3d93f6a7c9371212690b0195160e5f88bdff Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 2 Nov 2022 04:42:25 -0700 Subject: [PATCH 3400/4122] vfio: Refactor vfio_device open and close This refactor makes the vfio_device_open() to accept device, iommufd_ctx pointer and kvm pointer. These parameters are generic items in today's group path and future device cdev path. Caller of vfio_device_open() should take care the necessary protections. e.g. the current group path need to hold the group_lock to ensure the iommufd_ctx and kvm pointer are valid. This refactor also wraps the group spefcific codes in the device open and close paths to be paired helpers like: - vfio_device_group_open/close(): call vfio_device_open/close() - vfio_device_group_use/unuse_iommu(): this pair is container specific. iommufd vs. container is selected in vfio_device_first_open(). Such helpers are supposed to be moved to group.c. While iommufd related codes will be kept in the generic helpers since future device cdev path also need to handle iommufd. Link: https://lore.kernel.org/r/20221201145535.589687-8-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 133 +++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 46 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 37413ac254c0..a4583f4827e5 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -783,7 +783,38 @@ static bool vfio_assert_device_open(struct vfio_device *device) return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } -static int vfio_device_first_open(struct vfio_device *device) +static int vfio_device_group_use_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + int ret = 0; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return -EINVAL; + + ret = vfio_group_use_container(group); + if (ret) + return ret; + vfio_device_container_register(device); + return 0; +} + +static void vfio_device_group_unuse_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return; + + vfio_device_container_unregister(device); + vfio_group_unuse_container(group); +} + +static int vfio_device_first_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm) { int ret; @@ -792,77 +823,56 @@ static int vfio_device_first_open(struct vfio_device *device) if (!try_module_get(device->dev->driver->owner)) return -ENODEV; - /* - * Here we pass the KVM pointer with the group under the lock. If the - * device driver will use it, it must obtain a reference and release it - * during close_device. - */ - mutex_lock(&device->group->group_lock); - if (!vfio_group_has_iommu(device->group)) { - ret = -EINVAL; + if (iommufd) + ret = vfio_iommufd_bind(device, iommufd); + else + ret = vfio_device_group_use_iommu(device); + if (ret) goto err_module_put; - } - if (device->group->container) { - ret = vfio_group_use_container(device->group); - if (ret) - goto err_module_put; - vfio_device_container_register(device); - } else if (device->group->iommufd) { - ret = vfio_iommufd_bind(device, device->group->iommufd); - if (ret) - goto err_module_put; - } - - device->kvm = device->group->kvm; + device->kvm = kvm; if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) - goto err_container; + goto err_unuse_iommu; } - mutex_unlock(&device->group->group_lock); return 0; -err_container: +err_unuse_iommu: device->kvm = NULL; - if (device->group->container) { - vfio_device_container_unregister(device); - vfio_group_unuse_container(device->group); - } else if (device->group->iommufd) { + if (iommufd) vfio_iommufd_unbind(device); - } + else + vfio_device_group_unuse_iommu(device); err_module_put: - mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); return ret; } -static void vfio_device_last_close(struct vfio_device *device) +static void vfio_device_last_close(struct vfio_device *device, + struct iommufd_ctx *iommufd) { lockdep_assert_held(&device->dev_set->lock); - mutex_lock(&device->group->group_lock); if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; - if (device->group->container) { - vfio_device_container_unregister(device); - vfio_group_unuse_container(device->group); - } else if (device->group->iommufd) { + if (iommufd) vfio_iommufd_unbind(device); - } - mutex_unlock(&device->group->group_lock); + else + vfio_device_group_unuse_iommu(device); module_put(device->dev->driver->owner); } -static int vfio_device_open(struct vfio_device *device) +static int vfio_device_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm) { int ret = 0; mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { - ret = vfio_device_first_open(device); + ret = vfio_device_first_open(device, iommufd, kvm); if (ret) device->open_count--; } @@ -871,22 +881,53 @@ static int vfio_device_open(struct vfio_device *device) return ret; } -static void vfio_device_close(struct vfio_device *device) +static void vfio_device_close(struct vfio_device *device, + struct iommufd_ctx *iommufd) { mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); if (device->open_count == 1) - vfio_device_last_close(device); + vfio_device_last_close(device, iommufd); device->open_count--; mutex_unlock(&device->dev_set->lock); } +static int vfio_device_group_open(struct vfio_device *device) +{ + int ret; + + mutex_lock(&device->group->group_lock); + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; + goto out_unlock; + } + + /* + * Here we pass the KVM pointer with the group under the lock. If the + * device driver will use it, it must obtain a reference and release it + * during close_device. + */ + ret = vfio_device_open(device, device->group->iommufd, + device->group->kvm); + +out_unlock: + mutex_unlock(&device->group->group_lock); + return ret; +} + +static void vfio_device_group_close(struct vfio_device *device) +{ + mutex_lock(&device->group->group_lock); + vfio_device_close(device, device->group->iommufd); + mutex_unlock(&device->group->group_lock); +} + static struct file *vfio_device_open_file(struct vfio_device *device) { struct file *filep; int ret; - ret = vfio_device_open(device); + ret = vfio_device_group_open(device); if (ret) goto err_out; @@ -918,7 +959,7 @@ static struct file *vfio_device_open_file(struct vfio_device *device) return filep; err_close_device: - vfio_device_close(device); + vfio_device_group_close(device); err_out: return ERR_PTR(ret); } @@ -1130,7 +1171,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - vfio_device_close(device); + vfio_device_group_close(device); vfio_device_put_registration(device); From 1334e47ee798ac4715330a6ade0afc929cd54aff Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 23 Sep 2022 07:08:36 -0700 Subject: [PATCH 3401/4122] vfio: Wrap vfio group module init/clean code into helpers This wraps the init/clean code of vfio group global variable to be helpers, and prepares for further moving vfio group specific code into separate file. As container is used by group, so vfio_container_init/cleanup() is moved into vfio_group_init/cleanup(). Link: https://lore.kernel.org/r/20221201145535.589687-9-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_main.c | 56 ++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index a4583f4827e5..e053998baffd 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -2066,12 +2066,11 @@ static char *vfio_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); } -static int __init vfio_init(void) +static int __init vfio_group_init(void) { int ret; ida_init(&vfio.group_ida); - ida_init(&vfio.device_ida); mutex_init(&vfio.group_lock); INIT_LIST_HEAD(&vfio.group_list); @@ -2088,24 +2087,12 @@ static int __init vfio_init(void) vfio.class->devnode = vfio_devnode; - /* /sys/class/vfio-dev/vfioX */ - vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); - if (IS_ERR(vfio.device_class)) { - ret = PTR_ERR(vfio.device_class); - goto err_dev_class; - } - ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); if (ret) goto err_alloc_chrdev; - - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; err_alloc_chrdev: - class_destroy(vfio.device_class); - vfio.device_class = NULL; -err_dev_class: class_destroy(vfio.class); vfio.class = NULL; err_group_class: @@ -2113,18 +2100,47 @@ err_group_class: return ret; } -static void __exit vfio_cleanup(void) +static void vfio_group_cleanup(void) { WARN_ON(!list_empty(&vfio.group_list)); - - ida_destroy(&vfio.device_ida); ida_destroy(&vfio.group_ida); unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); + class_destroy(vfio.class); + vfio.class = NULL; + vfio_container_cleanup(); +} + +static int __init vfio_init(void) +{ + int ret; + + ida_init(&vfio.device_ida); + + ret = vfio_group_init(); + if (ret) + return ret; + + /* /sys/class/vfio-dev/vfioX */ + vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); + if (IS_ERR(vfio.device_class)) { + ret = PTR_ERR(vfio.device_class); + goto err_dev_class; + } + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + return 0; + +err_dev_class: + vfio_group_cleanup(); + return ret; +} + +static void __exit vfio_cleanup(void) +{ + ida_destroy(&vfio.device_ida); class_destroy(vfio.device_class); vfio.device_class = NULL; - class_destroy(vfio.class); - vfio_container_cleanup(); - vfio.class = NULL; + vfio_group_cleanup(); xa_destroy(&vfio_device_set_xa); } From 8da7a0e79f9b15330ae68d8532425399f4c27045 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 10 Nov 2022 18:57:01 -0800 Subject: [PATCH 3402/4122] vfio: Refactor dma APIs for emulated devices To use group helpers instead of opening group related code in the API. This prepares moving group specific code out of vfio_main.c. Link: https://lore.kernel.org/r/20221201145535.589687-10-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Lixiao Yang Tested-by: Yu He Signed-off-by: Yi Liu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/vfio/container.c | 20 +++++++++++++------- drivers/vfio/vfio.h | 32 ++++++++++++++++---------------- drivers/vfio/vfio_main.c | 25 ++++++++++++++----------- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 6b362d97d682..b7a9560ab25e 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -540,10 +540,12 @@ void vfio_group_unuse_container(struct vfio_group *group) fput(group->opened_file); } -int vfio_container_pin_pages(struct vfio_container *container, - struct iommu_group *iommu_group, dma_addr_t iova, - int npage, int prot, struct page **pages) +int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages) { + struct vfio_container *container = device->group->container; + struct iommu_group *iommu_group = device->group->iommu_group; struct vfio_iommu_driver *driver = container->iommu_driver; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -555,9 +557,11 @@ int vfio_container_pin_pages(struct vfio_container *container, npage, prot, pages); } -void vfio_container_unpin_pages(struct vfio_container *container, - dma_addr_t iova, int npage) +void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage) { + struct vfio_container *container = device->group->container; + if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) return; @@ -565,9 +569,11 @@ void vfio_container_unpin_pages(struct vfio_container *container, npage); } -int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, - void *data, size_t len, bool write) +int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write) { + struct vfio_container *container = device->group->container; struct vfio_iommu_driver *driver = container->iommu_driver; if (unlikely(!driver || !driver->ops->dma_rw)) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index ce5fe3fc493b..a112e8f2b291 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -122,13 +122,14 @@ int vfio_container_attach_group(struct vfio_container *container, void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); -int vfio_container_pin_pages(struct vfio_container *container, - struct iommu_group *iommu_group, dma_addr_t iova, - int npage, int prot, struct page **pages); -void vfio_container_unpin_pages(struct vfio_container *container, - dma_addr_t iova, int npage); -int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, - void *data, size_t len, bool write); +int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages); +void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage); +int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write); int __init vfio_container_init(void); void vfio_container_cleanup(void); @@ -166,22 +167,21 @@ static inline void vfio_device_container_unregister(struct vfio_device *device) { } -static inline int vfio_container_pin_pages(struct vfio_container *container, - struct iommu_group *iommu_group, - dma_addr_t iova, int npage, int prot, - struct page **pages) +static inline int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages) { return -EOPNOTSUPP; } -static inline void vfio_container_unpin_pages(struct vfio_container *container, - dma_addr_t iova, int npage) +static inline void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage) { } -static inline int vfio_container_dma_rw(struct vfio_container *container, - dma_addr_t iova, void *data, size_t len, - bool write) +static inline int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write) { return -EOPNOTSUPP; } diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index e053998baffd..b7f94ace7b10 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1938,6 +1938,11 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, } EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); +static bool vfio_device_has_container(struct vfio_device *device) +{ + return device->group->container; +} + /* * Pin contiguous user pages and return their associated host pages for local * domain only. @@ -1950,7 +1955,7 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); * Return error or number of pages pinned. * * A driver may only call this function if the vfio_device was created - * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages(). + * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). */ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, struct page **pages) @@ -1958,10 +1963,9 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, /* group->container cannot change while a vfio device is open */ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) return -EINVAL; - if (device->group->container) - return vfio_container_pin_pages(device->group->container, - device->group->iommu_group, - iova, npage, prot, pages); + if (vfio_device_has_container(device)) + return vfio_device_container_pin_pages(device, iova, + npage, prot, pages); if (device->iommufd_access) { int ret; @@ -1997,9 +2001,8 @@ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) if (WARN_ON(!vfio_assert_device_open(device))) return; - if (device->group->container) { - vfio_container_unpin_pages(device->group->container, iova, - npage); + if (vfio_device_has_container(device)) { + vfio_device_container_unpin_pages(device, iova, npage); return; } if (device->iommufd_access) { @@ -2036,9 +2039,9 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, if (!data || len <= 0 || !vfio_assert_device_open(device)) return -EINVAL; - if (device->group->container) - return vfio_container_dma_rw(device->group->container, iova, - data, len, write); + if (vfio_device_has_container(device)) + return vfio_device_container_dma_rw(device, iova, + data, len, write); if (device->iommufd_access) { unsigned int flags = 0; From 9eefba8002c27d65ab52a533fd0611b099b73591 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 25 Nov 2022 03:26:42 -0800 Subject: [PATCH 3403/4122] vfio: Move vfio group specific code into group.c This prepares for compiling out vfio group after vfio device cdev is added. No vfio_group decode code should be in vfio_main.c, and neither device->group reference should be in vfio_main.c. No functional change is intended. Link: https://lore.kernel.org/r/20221201145535.589687-11-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Yu He Tested-by: Lixiao Yang Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/Makefile | 1 + drivers/vfio/group.c | 877 +++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 22 + drivers/vfio/vfio_main.c | 877 +-------------------------------------- 4 files changed, 907 insertions(+), 870 deletions(-) create mode 100644 drivers/vfio/group.c diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b953517dc70f..3783db7e8082 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -4,6 +4,7 @@ vfio_virqfd-y := virqfd.o obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ + group.o \ iova_bitmap.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c new file mode 100644 index 000000000000..c5d8bf10495e --- /dev/null +++ b/drivers/vfio/group.c @@ -0,0 +1,877 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VFIO core + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#include +#include +#include +#include "vfio.h" + +static struct vfio { + struct class *class; + struct list_head group_list; + struct mutex group_lock; /* locks group_list */ + struct ida group_ida; + dev_t group_devt; +} vfio; + +static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, + char *buf) +{ + struct vfio_device *it, *device = ERR_PTR(-ENODEV); + + mutex_lock(&group->device_lock); + list_for_each_entry(it, &group->device_list, group_next) { + int ret; + + if (it->ops->match) { + ret = it->ops->match(it, buf); + if (ret < 0) { + device = ERR_PTR(ret); + break; + } + } else { + ret = !strcmp(dev_name(it->dev), buf); + } + + if (ret && vfio_device_try_get_registration(it)) { + device = it; + break; + } + } + mutex_unlock(&group->device_lock); + + return device; +} + +/* + * VFIO Group fd, /dev/vfio/$GROUP + */ +static bool vfio_group_has_iommu(struct vfio_group *group) +{ + lockdep_assert_held(&group->group_lock); + /* + * There can only be users if there is a container, and if there is a + * container there must be users. + */ + WARN_ON(!group->container != !group->container_users); + + return group->container || group->iommufd; +} + +/* + * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or + * if there was no container to unset. Since the ioctl is called on + * the group, we know that still exists, therefore the only valid + * transition here is 1->0. + */ +static int vfio_group_ioctl_unset_container(struct vfio_group *group) +{ + int ret = 0; + + mutex_lock(&group->group_lock); + if (!vfio_group_has_iommu(group)) { + ret = -EINVAL; + goto out_unlock; + } + if (group->container) { + if (group->container_users != 1) { + ret = -EBUSY; + goto out_unlock; + } + vfio_group_detach_container(group); + } + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } + +out_unlock: + mutex_unlock(&group->group_lock); + return ret; +} + +static int vfio_group_ioctl_set_container(struct vfio_group *group, + int __user *arg) +{ + struct vfio_container *container; + struct iommufd_ctx *iommufd; + struct fd f; + int ret; + int fd; + + if (get_user(fd, arg)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + mutex_lock(&group->group_lock); + if (vfio_group_has_iommu(group)) { + ret = -EINVAL; + goto out_unlock; + } + if (!group->iommu_group) { + ret = -ENODEV; + goto out_unlock; + } + + container = vfio_container_from_file(f.file); + if (container) { + ret = vfio_container_attach_group(container, group); + goto out_unlock; + } + + iommufd = iommufd_ctx_from_file(f.file); + if (!IS_ERR(iommufd)) { + u32 ioas_id; + + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } + + group->iommufd = iommufd; + goto out_unlock; + } + + /* The FD passed is not recognized. */ + ret = -EBADFD; + +out_unlock: + mutex_unlock(&group->group_lock); + fdput(f); + return ret; +} + +static int vfio_device_group_open(struct vfio_device *device) +{ + int ret; + + mutex_lock(&device->group->group_lock); + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; + goto out_unlock; + } + + /* + * Here we pass the KVM pointer with the group under the lock. If the + * device driver will use it, it must obtain a reference and release it + * during close_device. + */ + ret = vfio_device_open(device, device->group->iommufd, + device->group->kvm); + +out_unlock: + mutex_unlock(&device->group->group_lock); + return ret; +} + +void vfio_device_group_close(struct vfio_device *device) +{ + mutex_lock(&device->group->group_lock); + vfio_device_close(device, device->group->iommufd); + mutex_unlock(&device->group->group_lock); +} + +static struct file *vfio_device_open_file(struct vfio_device *device) +{ + struct file *filep; + int ret; + + ret = vfio_device_group_open(device); + if (ret) + goto err_out; + + /* + * We can't use anon_inode_getfd() because we need to modify + * the f_mode flags directly to allow more than just ioctls + */ + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, + device, O_RDWR); + if (IS_ERR(filep)) { + ret = PTR_ERR(filep); + goto err_close_device; + } + + /* + * TODO: add an anon_inode interface to do this. + * Appears to be missing by lack of need rather than + * explicitly prevented. Now there's need. + */ + filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); + + if (device->group->type == VFIO_NO_IOMMU) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + /* + * On success the ref of device is moved to the file and + * put in vfio_device_fops_release() + */ + return filep; + +err_close_device: + vfio_device_group_close(device); +err_out: + return ERR_PTR(ret); +} + +static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, + char __user *arg) +{ + struct vfio_device *device; + struct file *filep; + char *buf; + int fdno; + int ret; + + buf = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + device = vfio_device_get_from_name(group, buf); + kfree(buf); + if (IS_ERR(device)) + return PTR_ERR(device); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + ret = fdno; + goto err_put_device; + } + + filep = vfio_device_open_file(device); + if (IS_ERR(filep)) { + ret = PTR_ERR(filep); + goto err_put_fdno; + } + + fd_install(fdno, filep); + return fdno; + +err_put_fdno: + put_unused_fd(fdno); +err_put_device: + vfio_device_put_registration(device); + return ret; +} + +static int vfio_group_ioctl_get_status(struct vfio_group *group, + struct vfio_group_status __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_group_status, flags); + struct vfio_group_status status; + + if (copy_from_user(&status, arg, minsz)) + return -EFAULT; + + if (status.argsz < minsz) + return -EINVAL; + + status.flags = 0; + + mutex_lock(&group->group_lock); + if (!group->iommu_group) { + mutex_unlock(&group->group_lock); + return -ENODEV; + } + + /* + * With the container FD the iommu_group_claim_dma_owner() is done + * during SET_CONTAINER but for IOMMFD this is done during + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due + * to viability. + */ + if (vfio_group_has_iommu(group)) + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | + VFIO_GROUP_FLAGS_VIABLE; + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) + status.flags |= VFIO_GROUP_FLAGS_VIABLE; + mutex_unlock(&group->group_lock); + + if (copy_to_user(arg, &status, minsz)) + return -EFAULT; + return 0; +} + +static long vfio_group_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_group *group = filep->private_data; + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_GROUP_GET_DEVICE_FD: + return vfio_group_ioctl_get_device_fd(group, uarg); + case VFIO_GROUP_GET_STATUS: + return vfio_group_ioctl_get_status(group, uarg); + case VFIO_GROUP_SET_CONTAINER: + return vfio_group_ioctl_set_container(group, uarg); + case VFIO_GROUP_UNSET_CONTAINER: + return vfio_group_ioctl_unset_container(group); + default: + return -ENOTTY; + } +} + +static int vfio_group_fops_open(struct inode *inode, struct file *filep) +{ + struct vfio_group *group = + container_of(inode->i_cdev, struct vfio_group, cdev); + int ret; + + mutex_lock(&group->group_lock); + + /* + * drivers can be zero if this races with vfio_device_remove_group(), it + * will be stable at 0 under the group rwsem + */ + if (refcount_read(&group->drivers) == 0) { + ret = -ENODEV; + goto out_unlock; + } + + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { + ret = -EPERM; + goto out_unlock; + } + + /* + * Do we need multiple instances of the group open? Seems not. + */ + if (group->opened_file) { + ret = -EBUSY; + goto out_unlock; + } + group->opened_file = filep; + filep->private_data = group; + ret = 0; +out_unlock: + mutex_unlock(&group->group_lock); + return ret; +} + +static int vfio_group_fops_release(struct inode *inode, struct file *filep) +{ + struct vfio_group *group = filep->private_data; + + filep->private_data = NULL; + + mutex_lock(&group->group_lock); + /* + * Device FDs hold a group file reference, therefore the group release + * is only called when there are no open devices. + */ + WARN_ON(group->notifier.head); + if (group->container) + vfio_group_detach_container(group); + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } + group->opened_file = NULL; + mutex_unlock(&group->group_lock); + return 0; +} + +static const struct file_operations vfio_group_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vfio_group_fops_unl_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .open = vfio_group_fops_open, + .release = vfio_group_fops_release, +}; + +/* + * Group objects - create, release, get, put, search + */ +static struct vfio_group * +vfio_group_find_from_iommu(struct iommu_group *iommu_group) +{ + struct vfio_group *group; + + lockdep_assert_held(&vfio.group_lock); + + /* + * group->iommu_group from the vfio.group_list cannot be NULL + * under the vfio.group_lock. + */ + list_for_each_entry(group, &vfio.group_list, vfio_next) { + if (group->iommu_group == iommu_group) + return group; + } + return NULL; +} + +static void vfio_group_release(struct device *dev) +{ + struct vfio_group *group = container_of(dev, struct vfio_group, dev); + + mutex_destroy(&group->device_lock); + mutex_destroy(&group->group_lock); + WARN_ON(group->iommu_group); + ida_free(&vfio.group_ida, MINOR(group->dev.devt)); + kfree(group); +} + +static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, + enum vfio_group_type type) +{ + struct vfio_group *group; + int minor; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); + if (minor < 0) { + kfree(group); + return ERR_PTR(minor); + } + + device_initialize(&group->dev); + group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); + group->dev.class = vfio.class; + group->dev.release = vfio_group_release; + cdev_init(&group->cdev, &vfio_group_fops); + group->cdev.owner = THIS_MODULE; + + refcount_set(&group->drivers, 1); + mutex_init(&group->group_lock); + INIT_LIST_HEAD(&group->device_list); + mutex_init(&group->device_lock); + group->iommu_group = iommu_group; + /* put in vfio_group_release() */ + iommu_group_ref_get(iommu_group); + group->type = type; + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); + + return group; +} + +static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, + enum vfio_group_type type) +{ + struct vfio_group *group; + struct vfio_group *ret; + int err; + + lockdep_assert_held(&vfio.group_lock); + + group = vfio_group_alloc(iommu_group, type); + if (IS_ERR(group)) + return group; + + err = dev_set_name(&group->dev, "%s%d", + group->type == VFIO_NO_IOMMU ? "noiommu-" : "", + iommu_group_id(iommu_group)); + if (err) { + ret = ERR_PTR(err); + goto err_put; + } + + err = cdev_device_add(&group->cdev, &group->dev); + if (err) { + ret = ERR_PTR(err); + goto err_put; + } + + list_add(&group->vfio_next, &vfio.group_list); + + return group; + +err_put: + put_device(&group->dev); + return ret; +} + +static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, + enum vfio_group_type type) +{ + struct iommu_group *iommu_group; + struct vfio_group *group; + int ret; + + iommu_group = iommu_group_alloc(); + if (IS_ERR(iommu_group)) + return ERR_CAST(iommu_group); + + ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); + if (ret) + goto out_put_group; + ret = iommu_group_add_device(iommu_group, dev); + if (ret) + goto out_put_group; + + mutex_lock(&vfio.group_lock); + group = vfio_create_group(iommu_group, type); + mutex_unlock(&vfio.group_lock); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto out_remove_device; + } + iommu_group_put(iommu_group); + return group; + +out_remove_device: + iommu_group_remove_device(dev); +out_put_group: + iommu_group_put(iommu_group); + return ERR_PTR(ret); +} + +static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) +{ + struct vfio_device *device; + + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (device->dev == dev) { + mutex_unlock(&group->device_lock); + return true; + } + } + mutex_unlock(&group->device_lock); + return false; +} + +static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) +{ + struct iommu_group *iommu_group; + struct vfio_group *group; + + iommu_group = iommu_group_get(dev); + if (!iommu_group && vfio_noiommu) { + /* + * With noiommu enabled, create an IOMMU group for devices that + * don't already have one, implying no IOMMU hardware/driver + * exists. Taint the kernel because we're about to give a DMA + * capable device to a user without IOMMU protection. + */ + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); + if (!IS_ERR(group)) { + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); + } + return group; + } + + if (!iommu_group) + return ERR_PTR(-EINVAL); + + /* + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to + * restore cache coherency. It has to be checked here because it is only + * valid for cases where we are using iommu groups. + */ + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { + iommu_group_put(iommu_group); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&vfio.group_lock); + group = vfio_group_find_from_iommu(iommu_group); + if (group) { + if (WARN_ON(vfio_group_has_device(group, dev))) + group = ERR_PTR(-EINVAL); + else + refcount_inc(&group->drivers); + } else { + group = vfio_create_group(iommu_group, VFIO_IOMMU); + } + mutex_unlock(&vfio.group_lock); + + /* The vfio_group holds a reference to the iommu_group */ + iommu_group_put(iommu_group); + return group; +} + +int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type) +{ + struct vfio_group *group; + + if (type == VFIO_IOMMU) + group = vfio_group_find_or_alloc(device->dev); + else + group = vfio_noiommu_group_alloc(device->dev, type); + + if (IS_ERR(group)) + return PTR_ERR(group); + + /* Our reference on group is moved to the device */ + device->group = group; + return 0; +} + +void vfio_device_remove_group(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + struct iommu_group *iommu_group; + + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) + iommu_group_remove_device(device->dev); + + /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ + if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) + return; + list_del(&group->vfio_next); + + /* + * We could concurrently probe another driver in the group that might + * race vfio_device_remove_group() with vfio_get_group(), so we have to + * ensure that the sysfs is all cleaned up under lock otherwise the + * cdev_device_add() will fail due to the name aready existing. + */ + cdev_device_del(&group->cdev, &group->dev); + + mutex_lock(&group->group_lock); + /* + * These data structures all have paired operations that can only be + * undone when the caller holds a live reference on the device. Since + * all pairs must be undone these WARN_ON's indicate some caller did not + * properly hold the group reference. + */ + WARN_ON(!list_empty(&group->device_list)); + WARN_ON(group->notifier.head); + + /* + * Revoke all users of group->iommu_group. At this point we know there + * are no devices active because we are unplugging the last one. Setting + * iommu_group to NULL blocks all new users. + */ + if (group->container) + vfio_group_detach_container(group); + iommu_group = group->iommu_group; + group->iommu_group = NULL; + mutex_unlock(&group->group_lock); + mutex_unlock(&vfio.group_lock); + + iommu_group_put(iommu_group); + put_device(&group->dev); +} + +void vfio_device_group_register(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_add(&device->group_next, &device->group->device_list); + mutex_unlock(&device->group->device_lock); +} + +void vfio_device_group_unregister(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_del(&device->group_next); + mutex_unlock(&device->group->device_lock); +} + +int vfio_device_group_use_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + int ret = 0; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return -EINVAL; + + ret = vfio_group_use_container(group); + if (ret) + return ret; + vfio_device_container_register(device); + return 0; +} + +void vfio_device_group_unuse_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return; + + vfio_device_container_unregister(device); + vfio_group_unuse_container(group); +} + +bool vfio_device_has_container(struct vfio_device *device) +{ + return device->group->container; +} + +/** + * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file + * @file: VFIO group file + * + * The returned iommu_group is valid as long as a ref is held on the file. This + * returns a reference on the group. This function is deprecated, only the SPAPR + * path in kvm should call it. + */ +struct iommu_group *vfio_file_iommu_group(struct file *file) +{ + struct vfio_group *group = file->private_data; + struct iommu_group *iommu_group = NULL; + + if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) + return NULL; + + if (!vfio_file_is_group(file)) + return NULL; + + mutex_lock(&group->group_lock); + if (group->iommu_group) { + iommu_group = group->iommu_group; + iommu_group_ref_get(iommu_group); + } + mutex_unlock(&group->group_lock); + return iommu_group; +} +EXPORT_SYMBOL_GPL(vfio_file_iommu_group); + +/** + * vfio_file_is_group - True if the file is usable with VFIO aPIS + * @file: VFIO group file + */ +bool vfio_file_is_group(struct file *file) +{ + return file->f_op == &vfio_group_fops; +} +EXPORT_SYMBOL_GPL(vfio_file_is_group); + +/** + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file + * is always CPU cache coherent + * @file: VFIO group file + * + * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop + * bit in DMA transactions. A return of false indicates that the user has + * rights to access additional instructions such as wbinvd on x86. + */ +bool vfio_file_enforced_coherent(struct file *file) +{ + struct vfio_group *group = file->private_data; + struct vfio_device *device; + bool ret = true; + + if (!vfio_file_is_group(file)) + return true; + + /* + * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then + * any domain later attached to it will also not support it. If the cap + * is set then the iommu_domain eventually attached to the device/group + * must use a domain with enforce_cache_coherency(). + */ + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (!device_iommu_capable(device->dev, + IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { + ret = false; + break; + } + } + mutex_unlock(&group->device_lock); + return ret; +} +EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); + +/** + * vfio_file_set_kvm - Link a kvm with VFIO drivers + * @file: VFIO group file + * @kvm: KVM to link + * + * When a VFIO device is first opened the KVM will be available in + * device->kvm if one was associated with the group. + */ +void vfio_file_set_kvm(struct file *file, struct kvm *kvm) +{ + struct vfio_group *group = file->private_data; + + if (!vfio_file_is_group(file)) + return; + + mutex_lock(&group->group_lock); + group->kvm = kvm; + mutex_unlock(&group->group_lock); +} +EXPORT_SYMBOL_GPL(vfio_file_set_kvm); + +/** + * vfio_file_has_dev - True if the VFIO file is a handle for device + * @file: VFIO file to check + * @device: Device that must be part of the file + * + * Returns true if given file has permission to manipulate the given device. + */ +bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + struct vfio_group *group = file->private_data; + + if (!vfio_file_is_group(file)) + return false; + + return group == device->group; +} +EXPORT_SYMBOL_GPL(vfio_file_has_dev); + +static char *vfio_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); +} + +int __init vfio_group_init(void) +{ + int ret; + + ida_init(&vfio.group_ida); + mutex_init(&vfio.group_lock); + INIT_LIST_HEAD(&vfio.group_list); + + ret = vfio_container_init(); + if (ret) + return ret; + + /* /dev/vfio/$GROUP */ + vfio.class = class_create(THIS_MODULE, "vfio"); + if (IS_ERR(vfio.class)) { + ret = PTR_ERR(vfio.class); + goto err_group_class; + } + + vfio.class->devnode = vfio_devnode; + + ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); + if (ret) + goto err_alloc_chrdev; + return 0; + +err_alloc_chrdev: + class_destroy(vfio.class); + vfio.class = NULL; +err_group_class: + vfio_container_cleanup(); + return ret; +} + +void vfio_group_cleanup(void) +{ + WARN_ON(!list_empty(&vfio.group_list)); + ida_destroy(&vfio.group_ida); + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); + class_destroy(vfio.class); + vfio.class = NULL; + vfio_container_cleanup(); +} diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index a112e8f2b291..2e05418fd18d 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -6,6 +6,7 @@ #ifndef __VFIO_VFIO_H__ #define __VFIO_VFIO_H__ +#include #include #include #include @@ -15,6 +16,15 @@ struct iommu_group; struct vfio_device; struct vfio_container; +void vfio_device_put_registration(struct vfio_device *device); +bool vfio_device_try_get_registration(struct vfio_device *device); +int vfio_device_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm); +void vfio_device_close(struct vfio_device *device, + struct iommufd_ctx *iommufd); + +extern const struct file_operations vfio_device_fops; + enum vfio_group_type { /* * Physical device with IOMMU backing. @@ -66,6 +76,18 @@ struct vfio_group { struct iommufd_ctx *iommufd; }; +int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type); +void vfio_device_remove_group(struct vfio_device *device); +void vfio_device_group_register(struct vfio_device *device); +void vfio_device_group_unregister(struct vfio_device *device); +int vfio_device_group_use_iommu(struct vfio_device *device); +void vfio_device_group_unuse_iommu(struct vfio_device *device); +void vfio_device_group_close(struct vfio_device *device); +bool vfio_device_has_container(struct vfio_device *device); +int __init vfio_group_init(void); +void vfio_group_cleanup(void); + #if IS_ENABLED(CONFIG_VFIO_CONTAINER) /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index b7f94ace7b10..e21ff965141e 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include #include #include @@ -43,17 +41,11 @@ #define DRIVER_DESC "VFIO - User Level meta-driver" static struct vfio { - struct class *class; - struct list_head group_list; - struct mutex group_lock; /* locks group_list */ - struct ida group_ida; - dev_t group_devt; struct class *device_class; struct ida device_ida; } vfio; static DEFINE_XARRAY(vfio_device_set_xa); -static const struct file_operations vfio_group_fops; int vfio_assign_device_set(struct vfio_device *device, void *set_id) { @@ -139,168 +131,17 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); -/* - * Group objects - create, release, get, put, search - */ -static struct vfio_group * -vfio_group_find_from_iommu(struct iommu_group *iommu_group) -{ - struct vfio_group *group; - - lockdep_assert_held(&vfio.group_lock); - - /* - * group->iommu_group from the vfio.group_list cannot be NULL - * under the vfio.group_lock. - */ - list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group->iommu_group == iommu_group) - return group; - } - return NULL; -} - -static void vfio_group_release(struct device *dev) -{ - struct vfio_group *group = container_of(dev, struct vfio_group, dev); - - mutex_destroy(&group->device_lock); - mutex_destroy(&group->group_lock); - WARN_ON(group->iommu_group); - ida_free(&vfio.group_ida, MINOR(group->dev.devt)); - kfree(group); -} - -static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, - enum vfio_group_type type) -{ - struct vfio_group *group; - int minor; - - group = kzalloc(sizeof(*group), GFP_KERNEL); - if (!group) - return ERR_PTR(-ENOMEM); - - minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); - if (minor < 0) { - kfree(group); - return ERR_PTR(minor); - } - - device_initialize(&group->dev); - group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); - group->dev.class = vfio.class; - group->dev.release = vfio_group_release; - cdev_init(&group->cdev, &vfio_group_fops); - group->cdev.owner = THIS_MODULE; - - refcount_set(&group->drivers, 1); - mutex_init(&group->group_lock); - INIT_LIST_HEAD(&group->device_list); - mutex_init(&group->device_lock); - group->iommu_group = iommu_group; - /* put in vfio_group_release() */ - iommu_group_ref_get(iommu_group); - group->type = type; - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); - - return group; -} - -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, - enum vfio_group_type type) -{ - struct vfio_group *group; - struct vfio_group *ret; - int err; - - lockdep_assert_held(&vfio.group_lock); - - group = vfio_group_alloc(iommu_group, type); - if (IS_ERR(group)) - return group; - - err = dev_set_name(&group->dev, "%s%d", - group->type == VFIO_NO_IOMMU ? "noiommu-" : "", - iommu_group_id(iommu_group)); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - - err = cdev_device_add(&group->cdev, &group->dev); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - - list_add(&group->vfio_next, &vfio.group_list); - - return group; - -err_put: - put_device(&group->dev); - return ret; -} - -static void vfio_device_remove_group(struct vfio_device *device) -{ - struct vfio_group *group = device->group; - struct iommu_group *iommu_group; - - if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) - iommu_group_remove_device(device->dev); - - /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ - if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) - return; - list_del(&group->vfio_next); - - /* - * We could concurrently probe another driver in the group that might - * race vfio_device_remove_group() with vfio_get_group(), so we have to - * ensure that the sysfs is all cleaned up under lock otherwise the - * cdev_device_add() will fail due to the name aready existing. - */ - cdev_device_del(&group->cdev, &group->dev); - - mutex_lock(&group->group_lock); - /* - * These data structures all have paired operations that can only be - * undone when the caller holds a live reference on the device. Since - * all pairs must be undone these WARN_ON's indicate some caller did not - * properly hold the group reference. - */ - WARN_ON(!list_empty(&group->device_list)); - WARN_ON(group->notifier.head); - - /* - * Revoke all users of group->iommu_group. At this point we know there - * are no devices active because we are unplugging the last one. Setting - * iommu_group to NULL blocks all new users. - */ - if (group->container) - vfio_group_detach_container(group); - iommu_group = group->iommu_group; - group->iommu_group = NULL; - mutex_unlock(&group->group_lock); - mutex_unlock(&vfio.group_lock); - - iommu_group_put(iommu_group); - put_device(&group->dev); -} - /* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ -static void vfio_device_put_registration(struct vfio_device *device) +void vfio_device_put_registration(struct vfio_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } -static bool vfio_device_try_get_registration(struct vfio_device *device) +bool vfio_device_try_get_registration(struct vfio_device *device) { return refcount_inc_not_zero(&device->refcount); } @@ -413,139 +254,6 @@ void vfio_free_device(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_free_device); -static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, - enum vfio_group_type type) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - int ret; - - iommu_group = iommu_group_alloc(); - if (IS_ERR(iommu_group)) - return ERR_CAST(iommu_group); - - ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); - if (ret) - goto out_put_group; - ret = iommu_group_add_device(iommu_group, dev); - if (ret) - goto out_put_group; - - mutex_lock(&vfio.group_lock); - group = vfio_create_group(iommu_group, type); - mutex_unlock(&vfio.group_lock); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_remove_device; - } - iommu_group_put(iommu_group); - return group; - -out_remove_device: - iommu_group_remove_device(dev); -out_put_group: - iommu_group_put(iommu_group); - return ERR_PTR(ret); -} - -static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) -{ - struct vfio_device *device; - - mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (device->dev == dev) { - mutex_unlock(&group->device_lock); - return true; - } - } - mutex_unlock(&group->device_lock); - return false; -} - -static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - - iommu_group = iommu_group_get(dev); - if (!iommu_group && vfio_noiommu) { - /* - * With noiommu enabled, create an IOMMU group for devices that - * don't already have one, implying no IOMMU hardware/driver - * exists. Taint the kernel because we're about to give a DMA - * capable device to a user without IOMMU protection. - */ - group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); - if (!IS_ERR(group)) { - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); - } - return group; - } - - if (!iommu_group) - return ERR_PTR(-EINVAL); - - /* - * VFIO always sets IOMMU_CACHE because we offer no way for userspace to - * restore cache coherency. It has to be checked here because it is only - * valid for cases where we are using iommu groups. - */ - if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { - iommu_group_put(iommu_group); - return ERR_PTR(-EINVAL); - } - - mutex_lock(&vfio.group_lock); - group = vfio_group_find_from_iommu(iommu_group); - if (group) { - if (WARN_ON(vfio_group_has_device(group, dev))) - group = ERR_PTR(-EINVAL); - else - refcount_inc(&group->drivers); - } else { - group = vfio_create_group(iommu_group, VFIO_IOMMU); - } - mutex_unlock(&vfio.group_lock); - - /* The vfio_group holds a reference to the iommu_group */ - iommu_group_put(iommu_group); - return group; -} - -static void vfio_device_group_register(struct vfio_device *device) -{ - mutex_lock(&device->group->device_lock); - list_add(&device->group_next, &device->group->device_list); - mutex_unlock(&device->group->device_lock); -} - -static void vfio_device_group_unregister(struct vfio_device *device) -{ - mutex_lock(&device->group->device_lock); - list_del(&device->group_next); - mutex_unlock(&device->group->device_lock); -} - -static int vfio_device_set_group(struct vfio_device *device, - enum vfio_group_type type) -{ - struct vfio_group *group; - - if (type == VFIO_IOMMU) - group = vfio_group_find_or_alloc(device->dev); - else - group = vfio_noiommu_group_alloc(device->dev, type); - - if (IS_ERR(group)) - return PTR_ERR(group); - - /* Our reference on group is moved to the device */ - device->group = group; - return 0; -} - static int __vfio_register_dev(struct vfio_device *device, enum vfio_group_type type) { @@ -602,35 +310,6 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); -static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, - char *buf) -{ - struct vfio_device *it, *device = ERR_PTR(-ENODEV); - - mutex_lock(&group->device_lock); - list_for_each_entry(it, &group->device_list, group_next) { - int ret; - - if (it->ops->match) { - ret = it->ops->match(it, buf); - if (ret < 0) { - device = ERR_PTR(ret); - break; - } - } else { - ret = !strcmp(dev_name(it->dev), buf); - } - - if (ret && vfio_device_try_get_registration(it)) { - device = it; - break; - } - } - mutex_unlock(&group->device_lock); - - return device; -} - /* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ @@ -673,146 +352,12 @@ void vfio_unregister_group_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); -/* - * VFIO Group fd, /dev/vfio/$GROUP - */ -static bool vfio_group_has_iommu(struct vfio_group *group) -{ - lockdep_assert_held(&group->group_lock); - /* - * There can only be users if there is a container, and if there is a - * container there must be users. - */ - WARN_ON(!group->container != !group->container_users); - - return group->container || group->iommufd; -} - -/* - * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or - * if there was no container to unset. Since the ioctl is called on - * the group, we know that still exists, therefore the only valid - * transition here is 1->0. - */ -static int vfio_group_ioctl_unset_container(struct vfio_group *group) -{ - int ret = 0; - - mutex_lock(&group->group_lock); - if (!vfio_group_has_iommu(group)) { - ret = -EINVAL; - goto out_unlock; - } - if (group->container) { - if (group->container_users != 1) { - ret = -EBUSY; - goto out_unlock; - } - vfio_group_detach_container(group); - } - if (group->iommufd) { - iommufd_ctx_put(group->iommufd); - group->iommufd = NULL; - } - -out_unlock: - mutex_unlock(&group->group_lock); - return ret; -} - -static int vfio_group_ioctl_set_container(struct vfio_group *group, - int __user *arg) -{ - struct vfio_container *container; - struct iommufd_ctx *iommufd; - struct fd f; - int ret; - int fd; - - if (get_user(fd, arg)) - return -EFAULT; - - f = fdget(fd); - if (!f.file) - return -EBADF; - - mutex_lock(&group->group_lock); - if (vfio_group_has_iommu(group)) { - ret = -EINVAL; - goto out_unlock; - } - if (!group->iommu_group) { - ret = -ENODEV; - goto out_unlock; - } - - container = vfio_container_from_file(f.file); - if (container) { - ret = vfio_container_attach_group(container, group); - goto out_unlock; - } - - iommufd = iommufd_ctx_from_file(f.file); - if (!IS_ERR(iommufd)) { - u32 ioas_id; - - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); - if (ret) { - iommufd_ctx_put(group->iommufd); - goto out_unlock; - } - - group->iommufd = iommufd; - goto out_unlock; - } - - /* The FD passed is not recognized. */ - ret = -EBADFD; - -out_unlock: - mutex_unlock(&group->group_lock); - fdput(f); - return ret; -} - -static const struct file_operations vfio_device_fops; - /* true if the vfio_device has open_device() called but not close_device() */ static bool vfio_assert_device_open(struct vfio_device *device) { return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } -static int vfio_device_group_use_iommu(struct vfio_device *device) -{ - struct vfio_group *group = device->group; - int ret = 0; - - lockdep_assert_held(&group->group_lock); - - if (WARN_ON(!group->container)) - return -EINVAL; - - ret = vfio_group_use_container(group); - if (ret) - return ret; - vfio_device_container_register(device); - return 0; -} - -static void vfio_device_group_unuse_iommu(struct vfio_device *device) -{ - struct vfio_group *group = device->group; - - lockdep_assert_held(&group->group_lock); - - if (WARN_ON(!group->container)) - return; - - vfio_device_container_unregister(device); - vfio_group_unuse_container(group); -} - static int vfio_device_first_open(struct vfio_device *device, struct iommufd_ctx *iommufd, struct kvm *kvm) { @@ -864,8 +409,8 @@ static void vfio_device_last_close(struct vfio_device *device, module_put(device->dev->driver->owner); } -static int vfio_device_open(struct vfio_device *device, - struct iommufd_ctx *iommufd, struct kvm *kvm) +int vfio_device_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm) { int ret = 0; @@ -881,8 +426,8 @@ static int vfio_device_open(struct vfio_device *device, return ret; } -static void vfio_device_close(struct vfio_device *device, - struct iommufd_ctx *iommufd) +void vfio_device_close(struct vfio_device *device, + struct iommufd_ctx *iommufd) { mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); @@ -892,245 +437,6 @@ static void vfio_device_close(struct vfio_device *device, mutex_unlock(&device->dev_set->lock); } -static int vfio_device_group_open(struct vfio_device *device) -{ - int ret; - - mutex_lock(&device->group->group_lock); - if (!vfio_group_has_iommu(device->group)) { - ret = -EINVAL; - goto out_unlock; - } - - /* - * Here we pass the KVM pointer with the group under the lock. If the - * device driver will use it, it must obtain a reference and release it - * during close_device. - */ - ret = vfio_device_open(device, device->group->iommufd, - device->group->kvm); - -out_unlock: - mutex_unlock(&device->group->group_lock); - return ret; -} - -static void vfio_device_group_close(struct vfio_device *device) -{ - mutex_lock(&device->group->group_lock); - vfio_device_close(device, device->group->iommufd); - mutex_unlock(&device->group->group_lock); -} - -static struct file *vfio_device_open_file(struct vfio_device *device) -{ - struct file *filep; - int ret; - - ret = vfio_device_group_open(device); - if (ret) - goto err_out; - - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - device, O_RDWR); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_close_device; - } - - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); - - if (device->group->type == VFIO_NO_IOMMU) - dev_warn(device->dev, "vfio-noiommu device opened by user " - "(%s:%d)\n", current->comm, task_pid_nr(current)); - /* - * On success the ref of device is moved to the file and - * put in vfio_device_fops_release() - */ - return filep; - -err_close_device: - vfio_device_group_close(device); -err_out: - return ERR_PTR(ret); -} - -static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, - char __user *arg) -{ - struct vfio_device *device; - struct file *filep; - char *buf; - int fdno; - int ret; - - buf = strndup_user(arg, PAGE_SIZE); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - device = vfio_device_get_from_name(group, buf); - kfree(buf); - if (IS_ERR(device)) - return PTR_ERR(device); - - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - ret = fdno; - goto err_put_device; - } - - filep = vfio_device_open_file(device); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_put_fdno; - } - - fd_install(fdno, filep); - return fdno; - -err_put_fdno: - put_unused_fd(fdno); -err_put_device: - vfio_device_put_registration(device); - return ret; -} - -static int vfio_group_ioctl_get_status(struct vfio_group *group, - struct vfio_group_status __user *arg) -{ - unsigned long minsz = offsetofend(struct vfio_group_status, flags); - struct vfio_group_status status; - - if (copy_from_user(&status, arg, minsz)) - return -EFAULT; - - if (status.argsz < minsz) - return -EINVAL; - - status.flags = 0; - - mutex_lock(&group->group_lock); - if (!group->iommu_group) { - mutex_unlock(&group->group_lock); - return -ENODEV; - } - - /* - * With the container FD the iommu_group_claim_dma_owner() is done - * during SET_CONTAINER but for IOMMFD this is done during - * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd - * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due - * to viability. - */ - if (vfio_group_has_iommu(group)) - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | - VFIO_GROUP_FLAGS_VIABLE; - else if (!iommu_group_dma_owner_claimed(group->iommu_group)) - status.flags |= VFIO_GROUP_FLAGS_VIABLE; - mutex_unlock(&group->group_lock); - - if (copy_to_user(arg, &status, minsz)) - return -EFAULT; - return 0; -} - -static long vfio_group_fops_unl_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) -{ - struct vfio_group *group = filep->private_data; - void __user *uarg = (void __user *)arg; - - switch (cmd) { - case VFIO_GROUP_GET_DEVICE_FD: - return vfio_group_ioctl_get_device_fd(group, uarg); - case VFIO_GROUP_GET_STATUS: - return vfio_group_ioctl_get_status(group, uarg); - case VFIO_GROUP_SET_CONTAINER: - return vfio_group_ioctl_set_container(group, uarg); - case VFIO_GROUP_UNSET_CONTAINER: - return vfio_group_ioctl_unset_container(group); - default: - return -ENOTTY; - } -} - -static int vfio_group_fops_open(struct inode *inode, struct file *filep) -{ - struct vfio_group *group = - container_of(inode->i_cdev, struct vfio_group, cdev); - int ret; - - mutex_lock(&group->group_lock); - - /* - * drivers can be zero if this races with vfio_device_remove_group(), it - * will be stable at 0 under the group rwsem - */ - if (refcount_read(&group->drivers) == 0) { - ret = -ENODEV; - goto out_unlock; - } - - if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { - ret = -EPERM; - goto out_unlock; - } - - /* - * Do we need multiple instances of the group open? Seems not. - */ - if (group->opened_file) { - ret = -EBUSY; - goto out_unlock; - } - group->opened_file = filep; - filep->private_data = group; - ret = 0; -out_unlock: - mutex_unlock(&group->group_lock); - return ret; -} - -static int vfio_group_fops_release(struct inode *inode, struct file *filep) -{ - struct vfio_group *group = filep->private_data; - - filep->private_data = NULL; - - mutex_lock(&group->group_lock); - /* - * Device FDs hold a group file reference, therefore the group release - * is only called when there are no open devices. - */ - WARN_ON(group->notifier.head); - if (group->container) - vfio_group_detach_container(group); - if (group->iommufd) { - iommufd_ctx_put(group->iommufd); - group->iommufd = NULL; - } - group->opened_file = NULL; - mutex_unlock(&group->group_lock); - return 0; -} - -static const struct file_operations vfio_group_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = vfio_group_fops_unl_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = vfio_group_fops_open, - .release = vfio_group_fops_release, -}; - /* * Wrapper around pm_runtime_resume_and_get(). * Return error code on failure or 0 on success. @@ -1694,7 +1000,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device, vma); } -static const struct file_operations vfio_device_fops = { +const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .release = vfio_device_fops_release, .read = vfio_device_fops_read, @@ -1704,121 +1010,6 @@ static const struct file_operations vfio_device_fops = { .mmap = vfio_device_fops_mmap, }; -/** - * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file - * @file: VFIO group file - * - * The returned iommu_group is valid as long as a ref is held on the file. This - * returns a reference on the group. This function is deprecated, only the SPAPR - * path in kvm should call it. - */ -struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - struct vfio_group *group = file->private_data; - struct iommu_group *iommu_group = NULL; - - if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) - return NULL; - - if (!vfio_file_is_group(file)) - return NULL; - - mutex_lock(&group->group_lock); - if (group->iommu_group) { - iommu_group = group->iommu_group; - iommu_group_ref_get(iommu_group); - } - mutex_unlock(&group->group_lock); - return iommu_group; -} -EXPORT_SYMBOL_GPL(vfio_file_iommu_group); - -/** - * vfio_file_is_group - True if the file is usable with VFIO aPIS - * @file: VFIO group file - */ -bool vfio_file_is_group(struct file *file) -{ - return file->f_op == &vfio_group_fops; -} -EXPORT_SYMBOL_GPL(vfio_file_is_group); - -/** - * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file - * is always CPU cache coherent - * @file: VFIO group file - * - * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop - * bit in DMA transactions. A return of false indicates that the user has - * rights to access additional instructions such as wbinvd on x86. - */ -bool vfio_file_enforced_coherent(struct file *file) -{ - struct vfio_group *group = file->private_data; - struct vfio_device *device; - bool ret = true; - - if (!vfio_file_is_group(file)) - return true; - - /* - * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then - * any domain later attached to it will also not support it. If the cap - * is set then the iommu_domain eventually attached to the device/group - * must use a domain with enforce_cache_coherency(). - */ - mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (!device_iommu_capable(device->dev, - IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { - ret = false; - break; - } - } - mutex_unlock(&group->device_lock); - return ret; -} -EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); - -/** - * vfio_file_set_kvm - Link a kvm with VFIO drivers - * @file: VFIO group file - * @kvm: KVM to link - * - * When a VFIO device is first opened the KVM will be available in - * device->kvm if one was associated with the group. - */ -void vfio_file_set_kvm(struct file *file, struct kvm *kvm) -{ - struct vfio_group *group = file->private_data; - - if (!vfio_file_is_group(file)) - return; - - mutex_lock(&group->group_lock); - group->kvm = kvm; - mutex_unlock(&group->group_lock); -} -EXPORT_SYMBOL_GPL(vfio_file_set_kvm); - -/** - * vfio_file_has_dev - True if the VFIO file is a handle for device - * @file: VFIO file to check - * @device: Device that must be part of the file - * - * Returns true if given file has permission to manipulate the given device. - */ -bool vfio_file_has_dev(struct file *file, struct vfio_device *device) -{ - struct vfio_group *group = file->private_data; - - if (!vfio_file_is_group(file)) - return false; - - return group == device->group; -} -EXPORT_SYMBOL_GPL(vfio_file_has_dev); - /* * Sub-module support */ @@ -1938,11 +1129,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, } EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); -static bool vfio_device_has_container(struct vfio_device *device) -{ - return device->group->container; -} - /* * Pin contiguous user pages and return their associated host pages for local * domain only. @@ -2064,55 +1250,6 @@ EXPORT_SYMBOL(vfio_dma_rw); /* * Module/class support */ -static char *vfio_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); -} - -static int __init vfio_group_init(void) -{ - int ret; - - ida_init(&vfio.group_ida); - mutex_init(&vfio.group_lock); - INIT_LIST_HEAD(&vfio.group_list); - - ret = vfio_container_init(); - if (ret) - return ret; - - /* /dev/vfio/$GROUP */ - vfio.class = class_create(THIS_MODULE, "vfio"); - if (IS_ERR(vfio.class)) { - ret = PTR_ERR(vfio.class); - goto err_group_class; - } - - vfio.class->devnode = vfio_devnode; - - ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); - if (ret) - goto err_alloc_chrdev; - return 0; - -err_alloc_chrdev: - class_destroy(vfio.class); - vfio.class = NULL; -err_group_class: - vfio_container_cleanup(); - return ret; -} - -static void vfio_group_cleanup(void) -{ - WARN_ON(!list_empty(&vfio.group_list)); - ida_destroy(&vfio.group_ida); - unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); - class_destroy(vfio.class); - vfio.class = NULL; - vfio_container_cleanup(); -} - static int __init vfio_init(void) { int ret; From 219072c09abde0f1d0a6ce091be375e8eb7d08f0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 5 Dec 2022 11:40:31 +0000 Subject: [PATCH 3404/4122] KVM: arm64: Fix benign bug with incorrect use of VA_BITS get_user_mapping_size() uses kvm's pgtable library to walk a user space page table created by the kernel, and in doing so, passes metadata that the library needs, including ia_bits, which defines the size of the input address. For the case where the kernel is compiled for 52 VA bits but runs on HW that does not support LVA, it will fall back to 48 VA bits at runtime. Therefore we must use vabits_actual rather than VA_BITS to get the true address size. This is benign in the current code base because the pgtable library only uses it for error checking. Fixes: 6011cf68c885 ("KVM: arm64: Walk userspace page tables to compute the THP mapping size") Signed-off-by: Ryan Roberts Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221205114031.3972780-1-ryan.roberts@arm.com --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4efb983cff43..1ef0704420d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -641,7 +641,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { .pgd = (kvm_pte_t *)kvm->mm->pgd, - .ia_bits = VA_BITS, + .ia_bits = vabits_actual, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), .mm_ops = &kvm_user_mm_ops, From 96df59b1ae23f5c11698c3c2159aeb2ecd4944a4 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:57 +0800 Subject: [PATCH 3405/4122] RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Reviewed-by: Liao Chang Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 5 +++++ arch/riscv/kernel/elf_kexec.c | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index eee260e8ab30..2b56769cb530 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs, #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + void *fdt; /* For CONFIG_KEXEC_FILE */ unsigned long fdt_addr; }; @@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +struct kimage; +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 0cb94992c15b..ff30fcb43f47 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -21,6 +21,14 @@ #include #include +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + return kexec_image_post_load_cleanup_default(image); +} + static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, unsigned long new_pbase) @@ -298,6 +306,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error add DTB kbuf ret=%d\n", ret); goto out_free_fdt; } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; From cbc32023ddbdf4baa3d9dc513a2184a84080a5a2 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:58 +0800 Subject: [PATCH 3406/4122] RISC-V: kexec: Fix memory leak of elf header buffer This is reported by kmemleak detector: unreferenced object 0xff2000000403d000 (size 4096): comm "kexec", pid 146, jiffies 4294900633 (age 64.792s) hex dump (first 32 bytes): 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 04 00 f3 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000566ca97c>] kmemleak_vmalloc+0x3c/0xbe [<00000000979283d8>] __vmalloc_node_range+0x3ac/0x560 [<00000000b4b3712a>] __vmalloc_node+0x56/0x62 [<00000000854f75e2>] vzalloc+0x2c/0x34 [<00000000e9a00db9>] crash_prepare_elf64_headers+0x80/0x30c [<0000000067e8bf48>] elf_kexec_load+0x3e8/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via vzalloc() to store elf headers. While it's not freed back to system when kdump kernel is reloaded or unloaded, or when image->elf_header is successfully set and then fails to load kdump kernel for some reason. Fix it by freeing the buffer in arch_kimage_file_post_load_cleanup(). Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221104095658.141222-2-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index ff30fcb43f47..5372b708fae2 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -26,6 +26,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) kvfree(image->arch.fdt); image->arch.fdt = NULL; + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } From 8f8bcc8c720c360885639de66fe69756febed824 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Dec 2022 11:29:16 -0400 Subject: [PATCH 3407/4122] vfio/pci: Move all the SPAPR PCI specific logic to vfio_pci_core.ko The vfio_spapr_pci_eeh_open/release() functions are one line wrappers around an arch function. Just call them directly. This eliminates some weird exported symbols that don't need to exist. Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/1-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 11 +++++++++-- drivers/vfio/vfio_spapr_eeh.c | 13 ------------- include/linux/vfio.h | 11 ----------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 189d4930c276..56501e7ef564 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -27,6 +27,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) +#include +#endif #include "vfio_pci_priv.h" @@ -686,7 +689,9 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) vdev->sriov_pf_core_dev->vf_token->users--; mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); } - vfio_spapr_pci_eeh_release(vdev->pdev); +#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) + eeh_dev_release(vdev->pdev); +#endif vfio_pci_core_disable(vdev); mutex_lock(&vdev->igate); @@ -705,7 +710,9 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); - vfio_spapr_pci_eeh_open(vdev->pdev); +#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) + eeh_dev_open(vdev->pdev); +#endif if (vdev->sriov_pf_core_dev) { mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 67f55ac1d459..c9d102aafbcd 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -15,19 +15,6 @@ #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" #define DRIVER_DESC "VFIO IOMMU SPAPR EEH" -/* We might build address mapping here for "fast" path later */ -void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) -{ - eeh_dev_open(pdev); -} -EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); - -void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) -{ - eeh_dev_release(pdev); -} -EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); - long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) { diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 43b67e46a2cb..9378ca79d548 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -233,21 +233,10 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, int max_irq_type, size_t *data_size); -struct pci_dev; #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) -void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); -void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg); #else -static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) -{ -} - -static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) -{ -} - static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) From e5c38a203eb4343993e889eb69f5386f085f25ef Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Dec 2022 11:29:17 -0400 Subject: [PATCH 3408/4122] vfio/spapr: Move VFIO_CHECK_EXTENSION into tce_iommu_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PPC64 kconfig is a bit of a rats nest, but it turns out that if CONFIG_SPAPR_TCE_IOMMU is on then EEH must be too: config SPAPR_TCE_IOMMU bool "sPAPR TCE IOMMU Support" depends on PPC_POWERNV || PPC_PSERIES select IOMMU_API help Enables bits of IOMMU API required by VFIO. The iommu_ops is not implemented as it is not necessary for VFIO. config PPC_POWERNV select FORCE_PCI config PPC_PSERIES select FORCE_PCI config EEH bool depends on (PPC_POWERNV || PPC_PSERIES) && PCI default y So, just open code the call to eeh_enabled() into tce_iommu_ioctl(). Reviewed-by: Christoph Hellwig Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 10 ++++------ drivers/vfio/vfio_spapr_eeh.c | 6 ------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 169f07ac162d..73cec2beae70 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -785,14 +785,12 @@ static long tce_iommu_ioctl(void *iommu_data, switch (arg) { case VFIO_SPAPR_TCE_IOMMU: case VFIO_SPAPR_TCE_v2_IOMMU: - ret = 1; - break; + return 1; + case VFIO_EEH: + return eeh_enabled(); default: - ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); - break; + return 0; } - - return (ret < 0) ? 0 : ret; } /* diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index c9d102aafbcd..221b1b637e18 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -24,12 +24,6 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, long ret = -EINVAL; switch (cmd) { - case VFIO_CHECK_EXTENSION: - if (arg == VFIO_EEH) - ret = eeh_enabled() ? 1 : 0; - else - ret = 0; - break; case VFIO_EEH_PE_OP: pe = eeh_iommu_group_to_pe(group); if (!pe) From e276e25819b8a173a21947720bb0a548c0b724b7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Dec 2022 11:29:18 -0400 Subject: [PATCH 3409/4122] vfio: Move vfio_spapr_iommu_eeh_ioctl into vfio_iommu_spapr_tce.c As with the previous patch EEH is always enabled if SPAPR_TCE_IOMMU, so move this last bit of code into the main module. Now that this function only processes VFIO_EEH_PE_OP remove a level of indenting as well, it is only called by a case statement that already checked VFIO_EEH_PE_OP. This eliminates an unnecessary module and SPAPR code in a global header. Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Makefile | 1 - drivers/vfio/vfio_iommu_spapr_tce.c | 55 +++++++++++++++++- drivers/vfio/vfio_spapr_eeh.c | 88 ----------------------------- include/linux/vfio.h | 12 ---- 4 files changed, 53 insertions(+), 103 deletions(-) delete mode 100644 drivers/vfio/vfio_spapr_eeh.c diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b693a1169286..50b8e8e3fb10 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -10,7 +10,6 @@ vfio-y += vfio_main.o \ obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PLATFORM) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 73cec2beae70..60a50ce8701e 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -4,6 +4,7 @@ * * Copyright (C) 2013 IBM Corp. All rights reserved. * Author: Alexey Kardashevskiy + * Copyright Gavin Shan, IBM Corporation 2014. * * Derived from original vfio_iommu_type1.c: * Copyright (C) 2012 Red Hat, Inc. All rights reserved. @@ -773,6 +774,57 @@ static long tce_iommu_create_default_window(struct tce_container *container) return ret; } +static long vfio_spapr_ioctl_eeh_pe_op(struct iommu_group *group, + unsigned long arg) +{ + struct eeh_pe *pe; + struct vfio_eeh_pe_op op; + unsigned long minsz; + + pe = eeh_iommu_group_to_pe(group); + if (!pe) + return -ENODEV; + + minsz = offsetofend(struct vfio_eeh_pe_op, op); + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + if (op.argsz < minsz || op.flags) + return -EINVAL; + + switch (op.op) { + case VFIO_EEH_PE_DISABLE: + return eeh_pe_set_option(pe, EEH_OPT_DISABLE); + case VFIO_EEH_PE_ENABLE: + return eeh_pe_set_option(pe, EEH_OPT_ENABLE); + case VFIO_EEH_PE_UNFREEZE_IO: + return eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); + case VFIO_EEH_PE_UNFREEZE_DMA: + return eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); + case VFIO_EEH_PE_GET_STATE: + return eeh_pe_get_state(pe); + break; + case VFIO_EEH_PE_RESET_DEACTIVATE: + return eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); + case VFIO_EEH_PE_RESET_HOT: + return eeh_pe_reset(pe, EEH_RESET_HOT, true); + case VFIO_EEH_PE_RESET_FUNDAMENTAL: + return eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); + case VFIO_EEH_PE_CONFIGURE: + return eeh_pe_configure(pe); + case VFIO_EEH_PE_INJECT_ERR: + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); + if (op.argsz < minsz) + return -EINVAL; + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + + return eeh_pe_inject_err(pe, op.err.type, op.err.func, + op.err.addr, op.err.mask); + default: + return -EINVAL; + } +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1044,8 +1096,7 @@ static long tce_iommu_ioctl(void *iommu_data, ret = 0; list_for_each_entry(tcegrp, &container->group_list, next) { - ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, - cmd, arg); + ret = vfio_spapr_ioctl_eeh_pe_op(tcegrp->grp, arg); if (ret) return ret; } diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c deleted file mode 100644 index 221b1b637e18..000000000000 --- a/drivers/vfio/vfio_spapr_eeh.c +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * EEH functionality support for VFIO devices. The feature is only - * available on sPAPR compatible platforms. - * - * Copyright Gavin Shan, IBM Corporation 2014. - */ - -#include -#include -#include -#include - -#define DRIVER_VERSION "0.1" -#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" -#define DRIVER_DESC "VFIO IOMMU SPAPR EEH" - -long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, - unsigned int cmd, unsigned long arg) -{ - struct eeh_pe *pe; - struct vfio_eeh_pe_op op; - unsigned long minsz; - long ret = -EINVAL; - - switch (cmd) { - case VFIO_EEH_PE_OP: - pe = eeh_iommu_group_to_pe(group); - if (!pe) - return -ENODEV; - - minsz = offsetofend(struct vfio_eeh_pe_op, op); - if (copy_from_user(&op, (void __user *)arg, minsz)) - return -EFAULT; - if (op.argsz < minsz || op.flags) - return -EINVAL; - - switch (op.op) { - case VFIO_EEH_PE_DISABLE: - ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE); - break; - case VFIO_EEH_PE_ENABLE: - ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE); - break; - case VFIO_EEH_PE_UNFREEZE_IO: - ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); - break; - case VFIO_EEH_PE_UNFREEZE_DMA: - ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); - break; - case VFIO_EEH_PE_GET_STATE: - ret = eeh_pe_get_state(pe); - break; - case VFIO_EEH_PE_RESET_DEACTIVATE: - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); - break; - case VFIO_EEH_PE_RESET_HOT: - ret = eeh_pe_reset(pe, EEH_RESET_HOT, true); - break; - case VFIO_EEH_PE_RESET_FUNDAMENTAL: - ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); - break; - case VFIO_EEH_PE_CONFIGURE: - ret = eeh_pe_configure(pe); - break; - case VFIO_EEH_PE_INJECT_ERR: - minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); - if (op.argsz < minsz) - return -EINVAL; - if (copy_from_user(&op, (void __user *)arg, minsz)) - return -EFAULT; - - ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, - op.err.addr, op.err.mask); - break; - default: - ret = -EINVAL; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl); - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9378ca79d548..b4d5d4ca3d7d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -233,18 +233,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, int max_irq_type, size_t *data_size); -#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) -long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, - unsigned long arg); -#else -static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, - unsigned int cmd, - unsigned long arg) -{ - return -ENOTTY; -} -#endif /* CONFIG_VFIO_SPAPR_EEH */ - /* * IRQfd - generic */ From 20601c45a0fa20bbb5545f4dd69f4f18448f4973 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Dec 2022 11:29:19 -0400 Subject: [PATCH 3410/4122] vfio: Remove CONFIG_VFIO_SPAPR_EEH We don't need a kconfig symbol for this, just directly test CONFIG_EEH in the few places that need it. Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 5 ----- drivers/vfio/pci/vfio_pci_core.c | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1..d25b91adfd64 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -20,11 +20,6 @@ config VFIO_IOMMU_SPAPR_TCE depends on SPAPR_TCE_IOMMU default VFIO -config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - config VFIO_VIRQFD tristate select EVENTFD diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 56501e7ef564..f9365a5bc961 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -27,7 +27,7 @@ #include #include #include -#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) +#if IS_ENABLED(CONFIG_EEH) #include #endif @@ -689,7 +689,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) vdev->sriov_pf_core_dev->vf_token->users--; mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); } -#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) +#if IS_ENABLED(CONFIG_EEH) eeh_dev_release(vdev->pdev); #endif vfio_pci_core_disable(vdev); @@ -710,7 +710,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); -#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) +#if IS_ENABLED(CONFIG_EEH) eeh_dev_open(vdev->pdev); #endif From e2d55709398e62cf53e5c7df3758ae52cc62d63a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Dec 2022 11:29:20 -0400 Subject: [PATCH 3411/4122] vfio: Fold vfio_virqfd.ko into vfio.ko This is only 1.8k, putting it in its own module is not really necessary. The kconfig infrastructure is still there to completely remove it for systems that are trying for small footprint. Put it in the main vfio.ko module now that kbuild can support multiple .c files. Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/5-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 2 +- drivers/vfio/Makefile | 4 +--- drivers/vfio/vfio.h | 13 +++++++++++++ drivers/vfio/vfio_main.c | 7 +++++++ drivers/vfio/virqfd.c | 17 +++-------------- 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index d25b91adfd64..0b8d53f63c7e 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -21,7 +21,7 @@ config VFIO_IOMMU_SPAPR_TCE default VFIO config VFIO_VIRQFD - tristate + bool select EVENTFD default n diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50b8e8e3fb10..0721ed4831c9 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,13 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -vfio_virqfd-y := virqfd.o - obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ iova_bitmap.o \ container.o +vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o -obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index bcad54bbab08..a7113b4baaa2 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -124,6 +124,19 @@ long vfio_container_ioctl_check_extension(struct vfio_container *container, int __init vfio_container_init(void); void vfio_container_cleanup(void); +#if IS_ENABLED(CONFIG_VFIO_VIRQFD) +int __init vfio_virqfd_init(void); +void vfio_virqfd_exit(void); +#else +static inline int __init vfio_virqfd_init(void) +{ + return 0; +} +static inline void vfio_virqfd_exit(void) +{ +} +#endif + #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; #else diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 662e267a3e13..7f88569c3eba 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1832,6 +1832,10 @@ static int __init vfio_init(void) if (ret) return ret; + ret = vfio_virqfd_init(); + if (ret) + goto err_virqfd; + /* /dev/vfio/$GROUP */ vfio.class = class_create(THIS_MODULE, "vfio"); if (IS_ERR(vfio.class)) { @@ -1862,6 +1866,8 @@ err_dev_class: class_destroy(vfio.class); vfio.class = NULL; err_group_class: + vfio_virqfd_exit(); +err_virqfd: vfio_container_cleanup(); return ret; } @@ -1876,6 +1882,7 @@ static void __exit vfio_cleanup(void) class_destroy(vfio.device_class); vfio.device_class = NULL; class_destroy(vfio.class); + vfio_virqfd_exit(); vfio_container_cleanup(); vfio.class = NULL; xa_destroy(&vfio_device_set_xa); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b02..497a17b37865 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -12,15 +12,12 @@ #include #include #include - -#define DRIVER_VERSION "0.1" -#define DRIVER_AUTHOR "Alex Williamson " -#define DRIVER_DESC "IRQFD support for VFIO bus drivers" +#include "vfio.h" static struct workqueue_struct *vfio_irqfd_cleanup_wq; static DEFINE_SPINLOCK(virqfd_lock); -static int __init vfio_virqfd_init(void) +int __init vfio_virqfd_init(void) { vfio_irqfd_cleanup_wq = create_singlethread_workqueue("vfio-irqfd-cleanup"); @@ -30,7 +27,7 @@ static int __init vfio_virqfd_init(void) return 0; } -static void __exit vfio_virqfd_exit(void) +void vfio_virqfd_exit(void) { destroy_workqueue(vfio_irqfd_cleanup_wq); } @@ -216,11 +213,3 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) flush_workqueue(vfio_irqfd_cleanup_wq); } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); - -module_init(vfio_virqfd_init); -module_exit(vfio_virqfd_exit); - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); From ce3895735cc26957dc6b2a8f5af07ddab09483ae Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 2 Dec 2022 09:46:15 -0700 Subject: [PATCH 3412/4122] vfio/ap/ccw/samples: Fix device_register() unwind path We always need to call put_device() if device_register() fails. All vfio drivers calling device_register() include a similar unwind stack via gotos, therefore split device_unregister() into its device_del() and put_device() components in the unwind path, and add a goto target to handle only the put_device() requirement. Reported-by: Ruan Jinjie Link: https://lore.kernel.org/all/20221118032827.3725190-1-ruanjinjie@huawei.com Fixes: d61fc96f47fd ("sample: vfio mdev display - host device") Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.") Fixes: a5e6e6505f38 ("sample: vfio bochs vbe display (host device for bochs-drm)") Fixes: 9e6f07cd1eaa ("vfio/ccw: create a parent struct") Fixes: 36360658eb5a ("s390: vfio_ap: link the vfio_ap devices to the vfio_ap bus subsystem") Cc: Tony Krowiak Cc: Halil Pasic Cc: Jason Herne Cc: Kirti Wankhede Reviewed-by: Kevin Tian Reviewed-by: Eric Farman Reviewed-by: Tony Krowiak Reviewed-by: Jason J. Herne Link: https://lore.kernel.org/r/166999942139.645727.12439756512449846442.stgit@omen Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 3 ++- drivers/s390/crypto/vfio_ap_drv.c | 2 +- samples/vfio-mdev/mbochs.c | 7 ++++--- samples/vfio-mdev/mdpy.c | 7 ++++--- samples/vfio-mdev/mtty.c | 7 ++++--- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index c2a65808605a..54aba7cceb33 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -199,8 +199,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) return 0; out_unreg: - device_unregister(&parent->dev); + device_del(&parent->dev); out_free: + put_device(&parent->dev); dev_set_drvdata(&sch->dev, NULL); return ret; } diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index f43cfeabd2cc..997b524bdd2b 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -122,7 +122,7 @@ static int vfio_ap_matrix_dev_create(void) return 0; matrix_drv_err: - device_unregister(&matrix_dev->device); + device_del(&matrix_dev->device); matrix_reg_err: put_device(&matrix_dev->device); matrix_alloc_err: diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 8b5a3a778a25..e54eb752e1ba 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1430,7 +1430,7 @@ static int __init mbochs_dev_init(void) ret = device_register(&mbochs_dev); if (ret) - goto err_class; + goto err_put; ret = mdev_register_parent(&mbochs_parent, &mbochs_dev, &mbochs_driver, mbochs_mdev_types, @@ -1441,8 +1441,9 @@ static int __init mbochs_dev_init(void) return 0; err_device: - device_unregister(&mbochs_dev); -err_class: + device_del(&mbochs_dev); +err_put: + put_device(&mbochs_dev); class_destroy(mbochs_class); err_driver: mdev_unregister_driver(&mbochs_driver); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 721fb06c6413..e8400fdab71d 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -717,7 +717,7 @@ static int __init mdpy_dev_init(void) ret = device_register(&mdpy_dev); if (ret) - goto err_class; + goto err_put; ret = mdev_register_parent(&mdpy_parent, &mdpy_dev, &mdpy_driver, mdpy_mdev_types, @@ -728,8 +728,9 @@ static int __init mdpy_dev_init(void) return 0; err_device: - device_unregister(&mdpy_dev); -err_class: + device_del(&mdpy_dev); +err_put: + put_device(&mdpy_dev); class_destroy(mdpy_class); err_driver: mdev_unregister_driver(&mdpy_driver); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 3c2a421b9b69..e887de672c52 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1330,7 +1330,7 @@ static int __init mtty_dev_init(void) ret = device_register(&mtty_dev.dev); if (ret) - goto err_class; + goto err_put; ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev, &mtty_driver, mtty_mdev_types, @@ -1340,8 +1340,9 @@ static int __init mtty_dev_init(void) return 0; err_device: - device_unregister(&mtty_dev.dev); -err_class: + device_del(&mtty_dev.dev); +err_put: + put_device(&mtty_dev.dev); class_destroy(mtty_dev.vd_class); err_driver: mdev_unregister_driver(&mtty_driver); From 4ec3c19d058f7391ec631b8a1b0a690422b246a9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 5 Nov 2022 17:12:34 +0100 Subject: [PATCH 3413/4122] gfs2: Handle -EBUSY result of insert_inode_locked4 When creating a new inode, there is a small chance that an inode lookup for a previous version of the same inode is still in progress. In that case, that previous lookup will eventually fail, but we may still need to retry here. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c057f3bd475f..9fbbc365a404 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -734,8 +734,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_free_inode; gfs2_cancel_delete_work(io_gl); +retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); - BUG_ON(error); + if (error == -EBUSY) + goto retry; + if (error) + goto fail_gunlock2; error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID, &ip->i_iopen_gh); From 104bb8a663451404a26331263ce5b96c34504049 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Mon, 5 Dec 2022 15:51:53 +0800 Subject: [PATCH 3414/4122] power: supply: fix null pointer dereferencing in power_supply_get_battery_info when kmalloc() fail to allocate memory in kasprintf(), propname will be NULL, strcmp() called by of_get_property() will cause null pointer dereference. So return ENOMEM if kasprintf() return NULL pointer. Fixes: 3afb50d7125b ("power: supply: core: Add some helpers to use the battery OCV capacity table") Signed-off-by: ruanjinjie Reviewed-by: Baolin Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 00cb19b46001..7c790c41e2fe 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -750,6 +750,11 @@ int power_supply_get_battery_info(struct power_supply *psy, int i, tab_len, size; propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index); + if (!propname) { + power_supply_put_battery_info(psy, info); + err = -ENOMEM; + goto out_put_node; + } list = of_get_property(battery_np, propname, &size); if (!list || !size) { dev_err(&psy->dev, "failed to get %s\n", propname); From 5c3022e4a616d800cf5f4c3a981d7992179e44a1 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:36 -0500 Subject: [PATCH 3415/4122] riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument The 'retp' is a pointer to the return address on the stack, so we must pass the current return address pointer as the 'retp' argument to ftrace_push_return_trace(). Not parent function's return address on the stack. Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 08d11a53f39e..bcfe9eb55f80 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -58,7 +58,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, - (unsigned long *)(fp - 8)); + &frame->ra); } } From 7ecdadf7f8c659524f6b2aebf6be7bf619764d90 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:37 -0500 Subject: [PATCH 3416/4122] riscv: stacktrace: Make walk_stackframe cross pt_regs frame The current walk_stackframe with FRAME_POINTER would stop unwinding at ret_from_exception: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc The optimization would help walk_stackframe cross the pt_regs frame and get more backtrace of debug info: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc [] riscv_intc_irq+0x1a/0x72 [] ret_from_exception+0x0/0xc [] vma_link+0x54/0x160 [] mmap_region+0x2cc/0x4d0 [] do_mmap+0x2d8/0x3ac [] vm_mmap_pgoff+0x70/0xb8 [] vm_mmap+0x2a/0x36 [] elf_map+0x72/0x84 [] load_elf_binary+0x69a/0xec8 [] bprm_execve+0x246/0x53a [] kernel_execve+0xe8/0x124 [] run_init_process+0xfa/0x10c [] try_to_run_init_process+0x12/0x3c [] kernel_init+0xb4/0xf8 [] ret_from_exception+0x0/0xc Here is the error injection test code for the above output: drivers/irqchip/irq-riscv-intc.c: static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + u32 tmp; __get_user(tmp, (u32 *)0); Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org [Palmer: use SYM_CODE_*] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 3 ++- arch/riscv/kernel/stacktrace.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..da44fe2d0d82 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -248,7 +248,7 @@ ret_from_syscall_rejected: andi t0, t0, _TIF_SYSCALL_WORK bnez t0, handle_syscall_trace_exit -ret_from_exception: +SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS @@ -262,6 +262,7 @@ ret_from_exception: andi s0, s0, SR_SPP #endif bnez s0, resume_kernel +SYM_CODE_END(ret_from_exception) resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index bcfe9eb55f80..75c8dd64fc48 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,8 @@ #ifdef CONFIG_FRAME_POINTER +extern asmlinkage void ret_from_exception(void); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, &frame->ra); + if (pc == (unsigned long)ret_from_exception) { + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + break; + + pc = ((struct pt_regs *)sp)->epc; + fp = ((struct pt_regs *)sp)->s0; + } } } From 0a584655ef89541dae4d48d2c523b1480ae80284 Mon Sep 17 00:00:00 2001 From: Francisco Munoz Date: Mon, 5 Dec 2022 17:16:37 -0700 Subject: [PATCH 3417/4122] PCI: vmd: Fix secondary bus reset for Intel bridges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reset was never applied in the current implementation because Intel Bridges owned by VMD are parentless. Internally, pci_reset_bus() applies a reset to the parent of the PCI device supplied as argument, but in this case it failed because there wasn't a parent. In more detail, this change allows the VMD driver to enumerate NVMe devices in pass-through configurations when guest reboots are performed. There was an attempted to fix this, but later we discovered that the code inside pci_reset_bus() wasn’t triggering secondary bus resets. Therefore, we updated the parameters passed to it, and now NVMe SSDs attached to VMD bridges are properly enumerated in VT-d pass-through scenarios. Link: https://lore.kernel.org/r/20221206001637.4744-1-francisco.munoz.ruiz@linux.intel.com Fixes: 6aab5622296b ("PCI: vmd: Clean up domain before enumeration") Signed-off-by: Francisco Munoz Signed-off-by: Lorenzo Pieralisi Reviewed-by: Nirmal Patel Reviewed-by: Jonathan Derrick --- drivers/pci/controller/vmd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 98e0746e681c..769eedeb8802 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -719,6 +719,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) resource_size_t offset[2] = {0}; resource_size_t membar2_offset = 0x2000; struct pci_bus *child; + struct pci_dev *dev; int ret; /* @@ -859,8 +860,25 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) pci_scan_child_bus(vmd->bus); vmd_domain_reset(vmd); - list_for_each_entry(child, &vmd->bus->children, node) - pci_reset_bus(child->self); + + /* When Intel VMD is enabled, the OS does not discover the Root Ports + * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies + * a reset to the parent of the PCI device supplied as argument. This + * is why we pass a child device, so the reset can be triggered at + * the Intel bridge level and propagated to all the children in the + * hierarchy. + */ + list_for_each_entry(child, &vmd->bus->children, node) { + if (!list_empty(&child->devices)) { + dev = list_first_entry(&child->devices, + struct pci_dev, bus_list); + if (pci_reset_bus(dev)) + pci_warn(dev, "can't reset device: %d\n", ret); + + break; + } + } + pci_assign_unassigned_bus_resources(vmd->bus); /* From 19098934f910b4d47cb30251dd39ffa57bef9523 Mon Sep 17 00:00:00 2001 From: John Thomson Date: Tue, 6 Dec 2022 06:46:45 +1000 Subject: [PATCH 3418/4122] PCI: mt7621: Add sentinel to quirks table Current driver is missing a sentinel in the struct soc_device_attribute array, which causes an oops when assessed by the soc_device_match(mt7621_pcie_quirks_match) call. This was only exposed once the CONFIG_SOC_MT7621 mt7621 soc_dev_attr was fixed to register the SOC as a device, in: commit 7c18b64bba3b ("mips: ralink: mt7621: do not use kzalloc too early") Fix it by adding the required sentinel. Link: https://lore.kernel.org/lkml/26ebbed1-0fe9-4af9-8466-65f841d0b382@app.fastmail.com Link: https://lore.kernel.org/r/20221205204645.301301-1-git@johnthomson.fastmail.com.au Fixes: b483b4e4d3f6 ("staging: mt7621-pci: add quirks for 'E2' revision using 'soc_device_attribute'") Signed-off-by: John Thomson Signed-off-by: Lorenzo Pieralisi Acked-by: Sergio Paracuellos --- drivers/pci/controller/pcie-mt7621.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index 4bd1abf26008..ee7aad09d627 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -466,7 +466,8 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host) } static const struct soc_device_attribute mt7621_pcie_quirks_match[] = { - { .soc_id = "mt7621", .revision = "E2" } + { .soc_id = "mt7621", .revision = "E2" }, + { /* sentinel */ } }; static int mt7621_pcie_probe(struct platform_device *pdev) From 6aecc0a59e07ba895b5473e0c916ba5f3d556c15 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 5 Dec 2022 16:32:31 -0600 Subject: [PATCH 3419/4122] cxl: Remove unnecessary cxl_pci_window_alignment() cxl_pci_window_alignment() is referenced only via the struct pci_controller_ops.window_alignment function pointer, and only in the powerpc implementation of pcibios_window_alignment(). pcibios_window_alignment() defaults to returning 1 if the function pointer is NULL, which is the same was what cxl_pci_window_alignment() does. cxl_pci_window_alignment() is unnecessary, so remove it. No functional change intended. Signed-off-by: Bjorn Helgaas Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221205223231.1268085-1-helgaas@kernel.org --- drivers/misc/cxl/vphb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 1264253cc07b..6332db8044bd 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -67,12 +67,6 @@ static void cxl_pci_disable_device(struct pci_dev *dev) } } -static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, - unsigned long type) -{ - return 1; -} - static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) { /* Should we do an AFU reset here ? */ @@ -200,7 +194,6 @@ static struct pci_controller_ops cxl_pci_controller_ops = .enable_device_hook = cxl_pci_enable_device_hook, .disable_device = cxl_pci_disable_device, .release_device = cxl_pci_disable_device, - .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, .teardown_msi_irqs = cxl_teardown_msi_irqs, From 3ae7c96dd51025550c8001c6f833337f11d00807 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:26 +0100 Subject: [PATCH 3420/4122] powerpc/dts/fsl: Fix pca954x i2c-mux node names "make dtbs_check": arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dtb: pca9546@77: $nodename:0: 'pca9546@77' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/powerpc/boot/dts/fsl/t1024qds.dtb: pca9547@77: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@2', 'i2c@3' were unexpected) From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml ... Fix this by renaming pca954x nodes to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6c5d86c49ac170e9d56ab121ea0602f3873849ca.1669999298.git.geert+renesas@glider.be --- arch/powerpc/boot/dts/fsl/t1024qds.dts | 2 +- arch/powerpc/boot/dts/fsl/t1024rdb.dts | 2 +- arch/powerpc/boot/dts/fsl/t104xqds.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t208xqds.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t208xrdb.dtsi | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index d6858b7cd93f..9ea7942f914e 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -151,7 +151,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index dbcd31cc35dc..270aaf631f2a 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -165,7 +165,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi index 615479732252..1c329f076f64 100644 --- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi @@ -268,7 +268,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index bfe1ed5be337..fc7bec5dcb90 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -128,7 +128,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi index db4139999b28..962c99941645 100644 --- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi @@ -135,7 +135,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi index ff87e67c70da..ecc3e8c7394c 100644 --- a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi @@ -138,7 +138,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; }; From 5ddcc03a07ae1ab5062f89a946d9495f1fd8eaa4 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Mon, 14 Nov 2022 20:26:11 +0530 Subject: [PATCH 3421/4122] powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for snooze state During the comparative study of cpuidle governors, it is noticed that the menu governor does not select CEDE state in some scenarios even though when the sleep duration of the CPU exceeds the target residency of the CEDE idle state this is because the CPU exits the snooze "polling" state when snooze time limit is reached in the snooze_loop(), which is not a real wake up and it just means that the polling state selection was not adequate. cpuidle governors rely on CPUIDLE_FLAG_POLLING flag to be set for the polling states to handle the condition mentioned above. Hence, set the CPUIDLE_FLAG_POLLING flag for snooze state (polling state) in powerpc arch to make the cpuidle governor work as expected. Reference Commits: - Timeout enabled for snooze state: commit 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of snooze to deeper idle state") - commit dc2251bf98c6 ("cpuidle: Eliminate the CPUIDLE_DRIVER_STATE_START symbol") - Fix wakeup stats in governor for polling states commit 5f26bdceb9c0 ("cpuidle: menu: Fix wakeup statistics updates for polling state") Signed-off-by: Aboorva Devarajan Tested-by: Vishal Chourasia Reviewed-by: Vaidyanathan Srinivasan Reviewed-by: Vishal Chourasia Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114145611.37669-1-aboorvad@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-powernv.c | 5 ++++- drivers/cpuidle/cpuidle-pseries.c | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 0b5461b3d7dd..9ebedd972df0 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -76,6 +76,7 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index); + dev->poll_time_limit = false; ppc64_runlatch_off(); HMT_very_low(); while (!need_resched()) { @@ -86,6 +87,7 @@ static int snooze_loop(struct cpuidle_device *dev, * cleared to order subsequent test of need_resched(). */ clear_thread_flag(TIF_POLLING_NRFLAG); + dev->poll_time_limit = true; smp_mb(); break; } @@ -155,7 +157,8 @@ static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = snooze_loop }, + .enter = snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, }; static int powernv_cpuidle_cpu_online(unsigned int cpu) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 7e7ab5597d7a..1bad4d2b7be3 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -44,6 +44,7 @@ static int snooze_loop(struct cpuidle_device *dev, pseries_idle_prolog(); local_irq_enable(); snooze_exit_time = get_tb() + snooze_timeout; + dev->poll_time_limit = false; while (!need_resched()) { HMT_low(); @@ -54,6 +55,7 @@ static int snooze_loop(struct cpuidle_device *dev, * loop anyway. Require a barrier after polling is * cleared to order subsequent test of need_resched(). */ + dev->poll_time_limit = true; clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); break; @@ -268,7 +270,8 @@ static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = &snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, { /* CEDE */ .name = "CEDE", .desc = "CEDE", @@ -286,7 +289,8 @@ static struct cpuidle_state shared_states[] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = &snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, { /* Shared Cede */ .name = "Shared Cede", .desc = "Shared Cede", From 74eac50391ce42c5d0038d6f0e580576e53aec4e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 5 Dec 2022 10:45:30 +0100 Subject: [PATCH 3422/4122] dt-bindings: PCI: qcom: Allow 'dma-coherent' property Devices on some PCIe buses may be cache coherent and must be marked as such in the devicetree to avoid data corruption. This is specifically needed on recent Qualcomm platforms like SC8280XP. Link: https://lore.kernel.org/r/20221205094530.12883-1-johan+linaro@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Lorenzo Pieralisi Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/qcom,pcie.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml index 2f851c804bb0..a5859bb3dc28 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml @@ -62,6 +62,8 @@ properties: minItems: 3 maxItems: 13 + dma-coherent: true + interconnects: maxItems: 2 From 7db354444ad8429e660b0f8145d425285d4f90ff Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 16:50:41 +0100 Subject: [PATCH 3423/4122] gfs2: Cosmetic gfs2_dinode_{in,out} cleanup In each of the two functions, add an inode variable that points to &ip->i_inode and use that throughout the rest of the function. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 41 +++++++++++++++++++++-------------------- fs/gfs2/super.c | 27 ++++++++++++++------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49210a2e7ce7..af69a1bacd55 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct timespec64 atime; u16 height, depth; umode_t mode = be32_to_cpu(str->di_mode); - bool is_new = ip->i_inode.i_state & I_NEW; + struct inode *inode = &ip->i_inode; + bool is_new = inode->i_state & I_NEW; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) goto corrupt; - if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode))) + if (unlikely(!is_new && inode_wrong_type(inode, mode))) goto corrupt; ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); - ip->i_inode.i_mode = mode; + inode->i_mode = mode; if (is_new) { - ip->i_inode.i_rdev = 0; + inode->i_rdev = 0; switch (mode & S_IFMT) { case S_IFBLK: case S_IFCHR: - ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), - be32_to_cpu(str->di_minor)); + inode->i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); break; } } - i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); - i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); - set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); - gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); + i_uid_write(inode, be32_to_cpu(str->di_uid)); + i_gid_write(inode, be32_to_cpu(str->di_gid)); + set_nlink(inode, be32_to_cpu(str->di_nlink)); + i_size_write(inode, be64_to_cpu(str->di_size)); + gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0) - ip->i_inode.i_atime = atime; - ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + if (timespec64_compare(&inode->i_atime, &atime) < 0) + inode->i_atime = atime; + inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); + inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); ip->i_goal = be64_to_cpu(str->di_goal_meta); ip->i_generation = be64_to_cpu(str->di_generation); @@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_diskflags = be32_to_cpu(str->di_flags); ip->i_eattr = be64_to_cpu(str->di_eattr); /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ - gfs2_set_inode_flags(&ip->i_inode); + gfs2_set_inode_flags(inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) goto corrupt; @@ -448,8 +449,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - if (S_ISREG(ip->i_inode.i_mode)) - gfs2_set_aops(&ip->i_inode); + if (S_ISREG(inode->i_mode)) + gfs2_set_aops(inode); return 0; corrupt: diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index eac9b0c34aac..075fad8fb1d1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -379,6 +379,7 @@ out: void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { + const struct inode *inode = &ip->i_inode; struct gfs2_dinode *str = buf; str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode)); - str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode)); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); - str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + str->di_mode = cpu_to_be32(inode->i_mode); + str->di_uid = cpu_to_be32(i_uid_read(inode)); + str->di_gid = cpu_to_be32(i_gid_read(inode)); + str->di_nlink = cpu_to_be32(inode->i_nlink); + str->di_size = cpu_to_be64(i_size_read(inode)); + str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); + str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); + str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); @@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_flags = cpu_to_be32(ip->i_diskflags); str->di_height = cpu_to_be16(ip->i_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && !(ip->i_diskflags & GFS2_DIF_EXHASH) ? GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(ip->i_depth); str->di_entries = cpu_to_be32(ip->i_entries); str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); + str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); } /** From 70376c7ff31221f1d21db5611d8209e677781d3a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 17:00:04 +0100 Subject: [PATCH 3424/4122] gfs2: Always check inode size of inline inodes Check if the inode size of stuffed (inline) inodes is within the allowed range when reading inodes from disk (gfs2_dinode_in()). This prevents us from on-disk corruption. The two checks in stuffed_readpage() and gfs2_unstuffer_page() that just truncate inline data to the maximum allowed size don't actually make sense, and they can be removed now as well. Reported-by: syzbot+7bb81dfa9cda07d9cd9d@syzkaller.appspotmail.com Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 2 -- fs/gfs2/bmap.c | 3 --- fs/gfs2/glops.c | 3 +++ 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 05bee80ac7de..e782b4f1d104 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) return error; kaddr = kmap_atomic(page); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap_atomic(kaddr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3bdb2c668a71..e7537fd305dd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, void *kaddr = kmap(page); u64 dsize = i_size_read(inode); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap(page); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index af69a1bacd55..d78b61ecc1cd 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -449,6 +449,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) + goto corrupt; + if (S_ISREG(inode->i_mode)) gfs2_set_aops(inode); From 4ad02083a092b497f35804de03eaa62cf81fada6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 2 Dec 2022 18:00:15 +0100 Subject: [PATCH 3425/4122] gfs2: Make gfs2_glock_hold return its glock argument This allows code like 'gl = gfs2_glock_hold(...)'. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 3 +-- fs/gfs2/glock.c | 6 +++--- fs/gfs2/glock.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 60c6fb91fb58..eea5be4fbf0e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) { - struct gfs2_glock *gl = fl_gh->gh_gl; + struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl); /* * Make sure gfs2_glock_put() won't sleep under the file->f_lock * spinlock. */ - gfs2_glock_hold(gl); spin_lock(&file->f_lock); gfs2_holder_uninit(fl_gh); spin_unlock(&file->f_lock); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index df335c258eb0..1a6c1eb7bd6b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); lockref_get(&gl->gl_lockref); + return gl; } /** @@ -1256,13 +1257,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); - gh->gh_gl = gl; + gh->gh_gl = gfs2_glock_hold(gl); gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_iflags = 0; - gfs2_glock_hold(gl); } /** diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 0d068f4fd7d6..76cd2fabc668 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -196,7 +196,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); +extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_glock_queue_put(struct gfs2_glock *gl); From 97236ad5a68c6b7603cea2ad01c588887e5cb961 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 13:02:39 +0100 Subject: [PATCH 3426/4122] gfs2: Avoid dequeuing GL_ASYNC glock holders twice When a locking request fails, the associated glock holder is automatically dequeued from the list of active and waiting holders. For GL_ASYNC locking requests, this will obviously happen asynchronously and it can race with attempts to cancel that locking request via gfs2_glock_dq(). Therefore, don't forget to check if a locking request has already been dequeued in gfs2_glock_dq(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1a6c1eb7bd6b..0f5c5c12d8c6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1707,6 +1707,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; spin_lock(&gl->gl_lockref.lock); + if (!gfs2_holder_queued(gh)) { + /* + * May have already been dequeued because the locking request + * was GL_ASYNC and it has failed in the meantime. + */ + goto out; + } if (list_is_first(&gh->gh_list, &gl->gl_holders) && !test_bit(HIF_HOLDER, &gh->gh_iflags)) { spin_unlock(&gl->gl_lockref.lock); @@ -1716,6 +1723,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) } __gfs2_glock_dq(gh); +out: spin_unlock(&gl->gl_lockref.lock); } From 764665c6775251d4569ba9f09981459bbb166359 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 03:48:52 +0100 Subject: [PATCH 3427/4122] gfs2: Clean up after gfs2_create_inode rework Since commit 3d36e57ff768 ("gfs2: gfs2_create_inode rework"), gfs2_evict_inode() and gfs2_create_inode() / gfs2_inode_lookup() will synchronize via the inode hash table and we can be certain that once a new inode is inserted into the inode hash table(), gfs2_evict_inode() has completely destroyed any previous versions. We no longer need to worry about overlapping inode object lifespans. Update the code and comments accordingly. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.h | 14 -------------- fs/gfs2/super.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 76cd2fabc668..d561126cfb47 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -322,20 +322,6 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object) /** * glock_clear_object - clear the gl_object field of a glock * @gl: the glock - * @object: the object - * - * I'd love to similarly add this: - * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) - * gfs2_dump_glock(NULL, gl, true); - * Unfortunately, that's not possible because as soon as gfs2_delete_inode - * frees the block in the rgrp, another process can reassign it for an I_NEW - * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. - * That means gfs2_delete_inode may subsequently try to call this function - * for a glock that's already pointing to a brand new inode. If we clear the - * new inode's gl_object, we'll introduce metadata corruption. Function - * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also - * tries to clear gl_object, so it's more than just gfs2_delete_inode. - * */ static inline void glock_clear_object(struct gfs2_glock *gl, void *object) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 075fad8fb1d1..02f1b5f2d7f2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1304,14 +1304,21 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - /* We're about to clear the bitmap for the dinode, but as soon as we - do, gfs2_create_inode can create another inode at the same block - location and try to set gl_object again. We clear gl_object here so - that subsequent inode creates don't see an old gl_object. */ - if (ip->i_gl) { - glock_clear_object(ip->i_gl, ip); + if (ip->i_gl) gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); - } + + /* + * As soon as we clear the bitmap for the dinode, gfs2_create_inode() + * can get called to recreate it, or even gfs2_inode_lookup() if the + * inode was recreated on another node in the meantime. + * + * However, inserting the new inode into the inode hash table will not + * succeed until the old inode is removed, and that only happens after + * ->evict_inode() returns. The new inode is attached to its inode and + * iopen glocks after inserting it into the inode hash table, so at + * that point we can be sure that both glocks are unused. + */ + ret = gfs2_dinode_dealloc(ip); out: return ret; From fe1bff6517de789d491844f53e61e4ff02e8f8b1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 13:27:11 +0100 Subject: [PATCH 3428/4122] gfs2: Simply dequeue iopen glock in gfs2_evict_inode With the previous change, to simplify things, we can always just dequeue and uninitialize the iopen glock in gfs2_evict_inode() even if it isn't queued anymore. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 02f1b5f2d7f2..999cc146d708 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1419,12 +1419,9 @@ out: struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); - } gfs2_glock_hold(gl); - gfs2_holder_uninit(&ip->i_iopen_gh); + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } if (ip->i_gl) { From 3781ec9e09123d955b93fc8522ffb683a51f865d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Dec 2022 14:44:37 +0100 Subject: [PATCH 3429/4122] gfs2: Uninline and improve glock_{set,clear}_object Those functions have reached a size at which having them inline isn't useful anymore, so uninline them. In addition, report the glock name on assertion failures. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/glock.h | 29 +++-------------------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0f5c5c12d8c6..76432efe6e02 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -928,6 +928,48 @@ out_unlock: return; } +/** + * glock_set_object - set the gl_object field of a glock + * @gl: the glock + * @object: the object + */ +void glock_set_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = object; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + +/** + * glock_clear_object - clear the gl_object field of a glock + * @gl: the glock + */ +void glock_clear_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = NULL; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, + prev_object == object || prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) { struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index d561126cfb47..e4be9e4bc979 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -288,6 +288,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); extern void gfs2_register_debugfs(void); extern void gfs2_unregister_debugfs(void); +extern void glock_set_object(struct gfs2_glock *gl, void *object); +extern void glock_clear_object(struct gfs2_glock *gl, void *object); + extern const struct lm_lockops gfs2_dlm_ops; static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) @@ -305,32 +308,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) return !list_empty(&gh->gh_list); } -/** - * glock_set_object - set the gl_object field of a glock - * @gl: the glock - * @object: the object - */ -static inline void glock_set_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) - gfs2_dump_glock(NULL, gl, true); - gl->gl_object = object; - spin_unlock(&gl->gl_lockref.lock); -} - -/** - * glock_clear_object - clear the gl_object field of a glock - * @gl: the glock - */ -static inline void glock_clear_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gl->gl_object == object) - gl->gl_object = NULL; - spin_unlock(&gl->gl_lockref.lock); -} - static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; From 2ec750a01d189cf1872cd79490d0911a7bd519f8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Dec 2022 12:51:55 +0100 Subject: [PATCH 3430/4122] gfs2: Add gfs2_inode_lookup comment Add comment on when and why gfs2_cancel_delete_work() needs to be skipped in gfs2_inode_lookup(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9fbbc365a404..8d4c4b5c4c0d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail; + /* + * The only caller that sets @blktype to GFS2_BLKST_UNLINKED is + * delete_work_func(). Make sure not to cancel the delete work + * from within itself here. + */ if (blktype == GFS2_BLKST_UNLINKED) extra_flags |= LM_FLAG_TRY; else From 88f4a9f813c549f6b8a6fbf12030949b48a4d5a4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Dec 2022 22:27:28 +0100 Subject: [PATCH 3431/4122] gfs2: Partially revert gfs2_inode_lookup change Commit c412a97cf6c5 changed delete_work_func() to always perform an inode lookup when gfs2_try_evict() fails. This doesn't make sense as a gfs2_try_evict() failure indicates that the inode is likely still in use. Revert that change. Fixes: c412a97cf6c5 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 76432efe6e02..6f2de8c0b2d0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1082,6 +1082,7 @@ static void delete_work_func(struct work_struct *work) if (gfs2_queue_delete_work(gl, 5 * HZ)) return; } + goto out; } inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, @@ -1094,6 +1095,7 @@ static void delete_work_func(struct work_struct *work) d_prune_aliases(inode); iput(inode); } +out: gfs2_glock_put(gl); } From ec9eaf68c1dcd1b0d4e0bad0630ddac49c20bbe8 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sun, 27 Nov 2022 12:41:37 +0100 Subject: [PATCH 3432/4122] dt-bindings: PCI: mediatek-gen3: add SoC based clock config The PCIe driver covers different SOC which needing different clock configs. Define them based on compatible. Link: https://lore.kernel.org/r/20221127114142.156573-4-linux@fw-web.de Signed-off-by: Frank Wunderlich Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Acked-by: Jianjun Wang --- .../bindings/pci/mediatek-pcie-gen3.yaml | 47 ++++++++++++++----- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml index bc90f0ec7bd9..ef5cc1fc4d10 100644 --- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml @@ -43,9 +43,6 @@ description: |+ each set has its own address for MSI message, and supports 32 MSI vectors to generate interrupt. -allOf: - - $ref: /schemas/pci/pci-bus.yaml# - properties: compatible: oneOf: @@ -90,15 +87,7 @@ properties: maxItems: 6 clock-names: - items: - - const: pl_250m - - const: tl_26m - - const: tl_96m - - const: tl_32k - - const: peri_26m - - enum: - - top_133m # for MT8192 - - peri_mem # for MT8188/MT8195 + maxItems: 6 assigned-clocks: maxItems: 1 @@ -147,6 +136,40 @@ required: - '#interrupt-cells' - interrupt-controller +allOf: + - $ref: /schemas/pci/pci-bus.yaml# + - if: + properties: + compatible: + const: mediatek,mt8192-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: tl_96m + - const: tl_32k + - const: peri_26m + - const: top_133m + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt8188-pcie + - mediatek,mt8195-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: tl_96m + - const: tl_32k + - const: peri_26m + - const: peri_mem + unevaluatedProperties: false examples: From d3fd0ee7a4a1e796413fab7affc72eeec31bed13 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sun, 27 Nov 2022 12:41:38 +0100 Subject: [PATCH 3433/4122] dt-bindings: PCI: mediatek-gen3: add support for mt7986 Add compatible string and clock-definition for mt7986. It needs 4 clocks for PCIe, define them in binding. Link: https://lore.kernel.org/r/20221127114142.156573-5-linux@fw-web.de Signed-off-by: Frank Wunderlich Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: Matthias Brugger Acked-by: Jianjun Wang --- .../bindings/pci/mediatek-pcie-gen3.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml index ef5cc1fc4d10..7e8c7a2a5f9b 100644 --- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml @@ -48,6 +48,7 @@ properties: oneOf: - items: - enum: + - mediatek,mt7986-pcie - mediatek,mt8188-pcie - mediatek,mt8195-pcie - const: mediatek,mt8192-pcie @@ -84,9 +85,11 @@ properties: enum: [ phy, mac ] clocks: + minItems: 4 maxItems: 6 clock-names: + minItems: 4 maxItems: 6 assigned-clocks: @@ -169,6 +172,20 @@ allOf: - const: tl_32k - const: peri_26m - const: peri_mem + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt7986-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: peri_26m + - const: top_133m unevaluatedProperties: false From 64f6a5d1922bf6d2b2d845de20d4563a6f328e2d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 5 Dec 2022 13:12:03 +0100 Subject: [PATCH 3434/4122] container_of: add container_of_const() that preserves const-ness of the pointer container_of does not preserve the const-ness of a pointer that is passed into it, which can cause C code that passes in a const pointer to get a pointer back that is not const and then scribble all over the data in it. To prevent this, container_of_const() will preserve the const status of the pointer passed into it using the newly available _Generic() method. Suggested-by: Jason Gunthorpe Suggested-by: Sakari Ailus Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Jason Gunthorpe Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221205121206.166576-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 2008e9f4058c..1d898f9158b4 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -22,4 +22,17 @@ "pointer type mismatch in container_of()"); \ ((type *)(__mptr - offsetof(type, member))); }) +/** + * container_of_const - cast a member of a structure out to the containing + * structure and preserve the const-ness of the pointer + * @ptr: the pointer to the member + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + */ +#define container_of_const(ptr, type, member) \ + _Generic(ptr, \ + const typeof(*(ptr)) *: ((const type *)container_of(ptr, type, member)),\ + default: ((type *)container_of(ptr, type, member)) \ + ) + #endif /* _LINUX_CONTAINER_OF_H */ From 6149f83b3165955e9519de483b30b26d1518ad0f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 5 Dec 2022 13:12:04 +0100 Subject: [PATCH 3435/4122] device.h: move kobj_to_dev() to use container_of_const() Instead of rolling our own const-checking logic, use the newly introduced container_of_const() to handle it all for us automatically. Cc: Thomas Gleixner Reviewed-by: Sakari Ailus Reviewed-by: Jason Gunthorpe Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221205121206.166576-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 84ae52de6746..8d172d06b8c1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -680,26 +680,7 @@ struct device_link { bool supplier_preactivated; /* Owned by consumer probe. */ }; -static inline struct device *__kobj_to_dev(struct kobject *kobj) -{ - return container_of(kobj, struct device, kobj); -} - -static inline const struct device *__kobj_to_dev_const(const struct kobject *kobj) -{ - return container_of(kobj, const struct device, kobj); -} - -/* - * container_of() will happily take a const * and spit back a non-const * as it - * is just doing pointer math. But we want to be a bit more careful in the - * driver code, so manually force any const * of a kobject to also be a const * - * to a device. - */ -#define kobj_to_dev(kobj) \ - _Generic((kobj), \ - const struct kobject *: __kobj_to_dev_const, \ - struct kobject *: __kobj_to_dev)(kobj) +#define kobj_to_dev(__kobj) container_of_const(__kobj, struct device, kobj) /** * device_iommu_mapped - Returns true when the device DMA is translated From c3da679286bee1d897bb24a804cca4ff58781bec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 5 Dec 2022 13:12:05 +0100 Subject: [PATCH 3436/4122] usb.h: take advantage of container_of_const() Instead of rolling our own const-checking logic in to_usb_interface() and to_usb_device() use the newly added container_of_const() instead, making the logic much simpler overall. Reviewed-by: Sakari Ailus Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221205121206.166576-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 42 ++---------------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/include/linux/usb.h b/include/linux/usb.h index 4b463a5e4ba2..010c681b8822 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -259,26 +259,7 @@ struct usb_interface { struct work_struct reset_ws; /* for resets in atomic context */ }; -static inline struct usb_interface *__to_usb_interface(struct device *d) -{ - return container_of(d, struct usb_interface, dev); -} - -static inline const struct usb_interface *__to_usb_interface_const(const struct device *d) -{ - return container_of(d, struct usb_interface, dev); -} - -/* - * container_of() will happily take a const * and spit back a non-const * as it - * is just doing pointer math. But we want to be a bit more careful in the USB - * driver code, so manually force any const * of a device to also be a const * - * to a usb_device. - */ -#define to_usb_interface(dev) \ - _Generic((dev), \ - const struct device *: __to_usb_interface_const, \ - struct device *: __to_usb_interface)(dev) +#define to_usb_interface(__dev) container_of_const(__dev, struct usb_interface, dev) static inline void *usb_get_intfdata(struct usb_interface *intf) { @@ -730,26 +711,7 @@ struct usb_device { unsigned use_generic_driver:1; }; -static inline struct usb_device *__to_usb_device(struct device *d) -{ - return container_of(d, struct usb_device, dev); -} - -static inline const struct usb_device *__to_usb_device_const(const struct device *d) -{ - return container_of(d, struct usb_device, dev); -} - -/* - * container_of() will happily take a const * and spit back a non-const * as it - * is just doing pointer math. But we want to be a bit more careful in the USB - * driver code, so manually force any const * of a device to also be a const * - * to a usb_device. - */ -#define to_usb_device(dev) \ - _Generic((dev), \ - const struct device *: __to_usb_device_const, \ - struct device *: __to_usb_device)(dev) +#define to_usb_device(__dev) container_of_const(__dev, struct usb_device, dev) static inline struct usb_device *__intf_to_usbdev(struct usb_interface *intf) { From 47446b50ad2549af4fcc93f82a148ab107e6ef6a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 5 Dec 2022 13:12:06 +0100 Subject: [PATCH 3437/4122] firmware_loader: fix up to_fw_sysfs() to preserve const to_fw_sysfs() was changed in commit 23680f0b7d7f ("driver core: make struct class.dev_uevent() take a const *") to pass in a const pointer but not pass it back out to handle some changes in the driver core. That isn't the best idea as it could cause problems if used incorrectly, so switch to use the container_of_const() macro instead which will preserve the const status of the pointer and enforce it by the compiler. Fixes: 23680f0b7d7f ("driver core: make struct class.dev_uevent() take a const *") Cc: Luis Chamberlain Reviewed-by: Sakari Ailus Reviewed-by: Jason Gunthorpe Acked-by: Russ Weight Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221205121206.166576-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/sysfs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h index fd0b4ad9bdbb..2060add8ef81 100644 --- a/drivers/base/firmware_loader/sysfs.h +++ b/drivers/base/firmware_loader/sysfs.h @@ -80,11 +80,7 @@ struct fw_sysfs { struct firmware *fw; void *fw_upload_priv; }; - -static inline struct fw_sysfs *to_fw_sysfs(const struct device *dev) -{ - return container_of(dev, struct fw_sysfs, dev); -} +#define to_fw_sysfs(__dev) container_of_const(__dev, struct fw_sysfs, dev) void __fw_load_abort(struct fw_priv *fw_priv); From c943a9374d12bebca2d0de7e273b1c723b58c122 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 10:34:25 +0200 Subject: [PATCH 3438/4122] net/mlx5: Introduce ifc bits for pre_copy Introduce ifc related stuff to enable PRE_COPY of VF during migration. Signed-off-by: Shay Drory Acked-by: Leon Romanovsky Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5a4e914e2a6f..230a96626a5f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1882,7 +1882,12 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 max_reformat_remove_size[0x8]; u8 max_reformat_remove_offset[0x8]; - u8 reserved_at_c0[0xe0]; + u8 reserved_at_c0[0x8]; + u8 migration_multi_load[0x1]; + u8 migration_tracking_state[0x1]; + u8 reserved_at_ca[0x16]; + + u8 reserved_at_e0[0xc0]; u8 reserved_at_1a0[0xb]; u8 log_min_mkey_entity_size[0x5]; @@ -11918,7 +11923,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 incremental[0x1]; + u8 reserved_at_41[0xf]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -11944,7 +11950,9 @@ struct mlx5_ifc_save_vhca_state_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 incremental[0x1]; + u8 set_track[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; From 4db52602a6074e9cc523500b8304600ff63e7b85 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Dec 2022 10:34:26 +0200 Subject: [PATCH 3439/4122] vfio: Extend the device migration protocol with PRE_COPY The optional PRE_COPY states open the saving data transfer FD before reaching STOP_COPY and allows the device to dirty track internal state changes with the general idea to reduce the volume of data transferred in the STOP_COPY stage. While in PRE_COPY the device remains RUNNING, but the saving FD is open. Only if the device also supports RUNNING_P2P can it support PRE_COPY_P2P, which halts P2P transfers while continuing the saving FD. PRE_COPY, with P2P support, requires the driver to implement 7 new arcs and exists as an optional FSM branch between RUNNING and STOP_COPY: RUNNING -> PRE_COPY -> PRE_COPY_P2P -> STOP_COPY A new ioctl VFIO_MIG_GET_PRECOPY_INFO is provided to allow userspace to query the progress of the precopy operation in the driver with the idea it will judge to move to STOP_COPY at least once the initial data set is transferred, and possibly after the dirty size has shrunk appropriately. This ioctl is valid only in PRE_COPY states and kernel driver should return -EINVAL from any other migration state. Compared to the v1 clarification, STOP_COPY -> PRE_COPY is blocked and to be defined in future. We also split the pending_bytes report into the initial and sustaining values, e.g.: initial_bytes and dirty_bytes. initial_bytes: Amount of initial precopy data. dirty_bytes: Device state changes relative to data previously retrieved. These fields are not required to have any bearing to STOP_COPY phase. It is recommended to leave PRE_COPY for STOP_COPY only after the initial_bytes field reaches zero. Leaving PRE_COPY earlier might make things slower. Signed-off-by: Jason Gunthorpe Signed-off-by: Shay Drory Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 74 +++++++++++++++++++++- include/uapi/linux/vfio.h | 125 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 192 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 7f88569c3eba..03dbcd3d96f0 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1042,7 +1042,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm) { - enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; + enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; /* * The coding in this table requires the driver to implement the * following FSM arcs: @@ -1057,30 +1057,65 @@ int vfio_mig_get_next_state(struct vfio_device *device, * RUNNING_P2P -> RUNNING * RUNNING_P2P -> STOP * STOP -> RUNNING_P2P - * Without P2P the driver must implement: + * + * If precopy is supported then the driver must support these additional + * FSM arcs: + * RUNNING -> PRE_COPY + * PRE_COPY -> RUNNING + * PRE_COPY -> STOP_COPY + * However, if precopy and P2P are supported together then the driver + * must support these additional arcs beyond the P2P arcs above: + * PRE_COPY -> RUNNING + * PRE_COPY -> PRE_COPY_P2P + * PRE_COPY_P2P -> PRE_COPY + * PRE_COPY_P2P -> RUNNING_P2P + * PRE_COPY_P2P -> STOP_COPY + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * + * Without P2P and precopy the driver must implement: * RUNNING -> STOP * STOP -> RUNNING * * The coding will step through multiple states for some combination * transitions; if all optional features are supported, this means the * following ones: + * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY + * PRE_COPY -> RUNNING -> RUNNING_P2P + * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP + * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING + * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING + * PRE_COPY_P2P -> RUNNING_P2P -> STOP + * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING * RESUMING -> STOP -> RUNNING_P2P + * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P * RESUMING -> STOP -> RUNNING_P2P -> RUNNING + * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * RESUMING -> STOP -> STOP_COPY + * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P * RUNNING -> RUNNING_P2P -> STOP * RUNNING -> RUNNING_P2P -> STOP -> RESUMING * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY + * RUNNING_P2P -> RUNNING -> PRE_COPY * RUNNING_P2P -> STOP -> RESUMING * RUNNING_P2P -> STOP -> STOP_COPY + * STOP -> RUNNING_P2P -> PRE_COPY_P2P * STOP -> RUNNING_P2P -> RUNNING + * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * STOP_COPY -> STOP -> RESUMING * STOP_COPY -> STOP -> RUNNING_P2P * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING + * + * The following transitions are blocked: + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P */ static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, @@ -1089,14 +1124,38 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RUNNING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, + [VFIO_DEVICE_STATE_PRE_COPY] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, [VFIO_DEVICE_STATE_STOP_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, @@ -1105,6 +1164,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RESUMING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, @@ -1113,6 +1174,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RUNNING_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, @@ -1121,6 +1184,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_ERROR] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, @@ -1131,6 +1196,11 @@ int vfio_mig_get_next_state(struct vfio_device *device, static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_PRE_COPY] = + VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING_P2P] = diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 3e45dbaf190e..23105eb036fa 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -819,12 +819,20 @@ struct vfio_device_feature { * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P * is supported in addition to the STOP_COPY states. * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that + * PRE_COPY is supported in addition to the STOP_COPY states. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY + * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported + * in addition to the STOP_COPY states. + * * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) #define VFIO_MIGRATION_P2P (1 << 1) +#define VFIO_MIGRATION_PRE_COPY (1 << 2) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -875,8 +883,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * - * And 1 optional state to support VFIO_MIGRATION_P2P: + * And optional states to support VFIO_MIGRATION_P2P: * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * And VFIO_MIGRATION_PRE_COPY: + * PRE_COPY - The device is running normally but tracking internal state + * changes + * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY: + * PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: @@ -908,20 +921,48 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * + * PRE_COPY -> RUNNING * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * The PRE_COPY arc will terminate a data transfer session. + * + * PRE_COPY_P2P -> RUNNING_P2P * RUNNING -> RUNNING_P2P * STOP -> RUNNING_P2P * While in RUNNING_P2P the device is partially running in the P2P quiescent * state defined below. * - * STOP -> STOP_COPY - * This arc begin the process of saving the device state and will return a - * new data_fd. + * The PRE_COPY_P2P arc will terminate a data transfer session. * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * STOP -> STOP_COPY + * PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states + * which share a data transfer session. Moving between these states alters + * what is streamed in session, but does not terminate or otherwise affect + * the associated fd. + * + * These arcs begin the process of saving the device state and will return a + * new data_fd. The migration driver may perform actions such as enabling + * dirty logging of device state when entering PRE_COPY or PER_COPY_P2P. + * + * Each arc does not change the device operation, the device remains + * RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below + * in PRE_COPY_P2P -> STOP_COPY. + * + * PRE_COPY -> PRE_COPY_P2P + * Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above. + * However, while in the PRE_COPY_P2P state, the device is partially running + * in the P2P quiescent state defined below, like RUNNING_P2P. + * + * PRE_COPY_P2P -> PRE_COPY + * This arc allows returning the device to a full RUNNING behavior while + * continuing all the behaviors of PRE_COPY. + * + * PRE_COPY_P2P -> STOP_COPY * While in the STOP_COPY state the device has the same behavior as STOP * with the addition that the data transfers session continues to stream the * migration state. End of stream on the FD indicates the entire device @@ -939,6 +980,13 @@ struct vfio_device_feature_mig_state { * device state for this arc if required to prepare the device to receive the * migration data. * + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P + * These arcs are not permitted and return error if requested. Future + * revisions of this API may define behaviors for these arcs, in this case + * support will be discoverable by a new flag in + * VFIO_DEVICE_FEATURE_MIGRATION. + * * any -> ERROR * ERROR cannot be specified as a device state, however any transition request * can be failed with an errno return and may then move the device_state into @@ -950,7 +998,7 @@ struct vfio_device_feature_mig_state { * The optional peer to peer (P2P) quiescent state is intended to be a quiescent * state for the device for the purposes of managing multiple devices within a * user context where peer-to-peer DMA between devices may be active. The - * RUNNING_P2P states must prevent the device from initiating + * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating * any new P2P DMA transactions. If the device can identify P2P transactions * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration * driver must complete any such outstanding operations prior to completing the @@ -963,6 +1011,8 @@ struct vfio_device_feature_mig_state { * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: * - Select the shortest path. + * - The path cannot have saving group states as interior arcs, only + * starting/end states. * Refer to vfio_mig_get_next_state() for the result of the algorithm. * * The automatic transit through the FSM arcs that make up the combination @@ -976,6 +1026,9 @@ struct vfio_device_feature_mig_state { * support them. The user can discover if these states are supported by using * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can * avoid knowing about these optional states if the kernel driver supports them. + * + * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY + * is not present. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -984,8 +1037,70 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, VFIO_DEVICE_STATE_RUNNING_P2P = 5, + VFIO_DEVICE_STATE_PRE_COPY = 6, + VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, }; +/** + * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21) + * + * This ioctl is used on the migration data FD in the precopy phase of the + * migration data transfer. It returns an estimate of the current data sizes + * remaining to be transferred. It allows the user to judge when it is + * appropriate to leave PRE_COPY for STOP_COPY. + * + * This ioctl is valid only in PRE_COPY states and kernel driver should + * return -EINVAL from any other migration state. + * + * The vfio_precopy_info data structure returned by this ioctl provides + * estimates of data available from the device during the PRE_COPY states. + * This estimate is split into two categories, initial_bytes and + * dirty_bytes. + * + * The initial_bytes field indicates the amount of initial precopy + * data available from the device. This field should have a non-zero initial + * value and decrease as migration data is read from the device. + * It is recommended to leave PRE_COPY for STOP_COPY only after this field + * reaches zero. Leaving PRE_COPY earlier might make things slower. + * + * The dirty_bytes field tracks device state changes relative to data + * previously retrieved. This field starts at zero and may increase as + * the internal device state is modified or decrease as that modified + * state is read from the device. + * + * Userspace may use the combination of these fields to estimate the + * potential data size available during the PRE_COPY phases, as well as + * trends relative to the rate the device is dirtying its internal + * state, but these fields are not required to have any bearing relative + * to the data size available during the STOP_COPY phase. + * + * Drivers have a lot of flexibility in when and what they transfer during the + * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO. + * + * During pre-copy the migration data FD has a temporary "end of stream" that is + * reached when both initial_bytes and dirty_byte are zero. For instance, this + * may indicate that the device is idle and not currently dirtying any internal + * state. When read() is done on this temporary end of stream the kernel driver + * should return ENOMSG from read(). Userspace can wait for more data (which may + * never come) by using poll. + * + * Once in STOP_COPY the migration data FD has a permanent end of stream + * signaled in the usual way by read() always returning 0 and poll always + * returning readable. ENOMSG may not be returned in STOP_COPY. + * Support for this ioctl is mandatory if a driver claims to support + * VFIO_MIGRATION_PRE_COPY. + * + * Return: 0 on success, -1 and errno set on failure. + */ +struct vfio_precopy_info { + __u32 argsz; + __u32 flags; + __aligned_u64 initial_bytes; + __aligned_u64 dirty_bytes; +}; + +#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21) + /* * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power * state with the platform-based power management. Device use of lower power From 0e7caa65d707b93fbb4322c6313f739fa9103dfa Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:27 +0200 Subject: [PATCH 3440/4122] vfio/mlx5: Enforce a single SAVE command at a time Enforce a single SAVE command at a time. As the SAVE command is an asynchronous one, we must enforce running only a single command at a time. This will preserve ordering between multiple calls and protect from races on the migration file data structure. This is a must for the next patches from the series where as part of PRE_COPY we may have multiple images to be saved and multiple SAVE commands may be issued from different flows. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 6 ++++++ drivers/vfio/pci/mlx5/cmd.h | 1 + drivers/vfio/pci/mlx5/main.c | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 0848bc905d3e..55ee8036f59c 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -281,6 +281,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); mlx5_core_dealloc_pd(mdev, async_data->pdn); kvfree(async_data->out); + complete(&migf->save_comp); fput(migf->filp); } @@ -321,6 +322,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, return -ENOTCONN; mdev = mvdev->mdev; + err = wait_for_completion_interruptible(&migf->save_comp); + if (err) + return err; + err = mlx5_core_alloc_pd(mdev, &pdn); if (err) return err; @@ -371,6 +376,7 @@ err_create_mkey: dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); err_dma_map: mlx5_core_dealloc_pd(mdev, pdn); + complete(&migf->save_comp); return err; } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 921d5720a1e5..8ffa7699872c 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -37,6 +37,7 @@ struct mlx5_vf_migration_file { unsigned long last_offset; struct mlx5vf_pci_core_device *mvdev; wait_queue_head_t poll_wait; + struct completion save_comp; struct mlx5_async_ctx async_ctx; struct mlx5vf_async_data async_data; }; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 6e9cf2aacc52..0d71ebb2a972 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -245,6 +245,13 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); init_waitqueue_head(&migf->poll_wait); + init_completion(&migf->save_comp); + /* + * save_comp is being used as a binary semaphore built from + * a completion. A normal mutex cannot be used because the lock is + * passed between kernel threads and lockdep can't model this. + */ + complete(&migf->save_comp); mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, From 9945a67ea4b30657dd998c7fbbea1b3950747168 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:28 +0200 Subject: [PATCH 3441/4122] vfio/mlx5: Refactor PD usage This patch refactors PD usage such as its life cycle will be as of the migration file instead of allocating/destroying it upon each SAVE/LOAD command. This is a preparation step towards the PRE_COPY series where multiple images will be SAVED/LOADED and a single PD can be simply reused. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-5-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 53 ++++++++++++++++++++++++------------ drivers/vfio/pci/mlx5/cmd.h | 5 +++- drivers/vfio/pci/mlx5/main.c | 44 ++++++++++++++++++++++-------- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 55ee8036f59c..a97eac49e3d6 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -279,7 +279,6 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mlx5_core_destroy_mkey(mdev, async_data->mkey); dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); - mlx5_core_dealloc_pd(mdev, async_data->pdn); kvfree(async_data->out); complete(&migf->save_comp); fput(migf->filp); @@ -314,7 +313,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5vf_async_data *async_data; struct mlx5_core_dev *mdev; - u32 pdn, mkey; + u32 mkey; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -326,16 +325,12 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; - err = mlx5_core_alloc_pd(mdev, &pdn); - if (err) - return err; - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); if (err) goto err_dma_map; - err = _create_mkey(mdev, pdn, migf, NULL, &mkey); + err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey); if (err) goto err_create_mkey; @@ -357,7 +352,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, migf->total_length = 0; get_file(migf->filp); async_data->mkey = mkey; - async_data->pdn = pdn; err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), async_data->out, out_size, mlx5vf_save_callback, @@ -375,7 +369,6 @@ err_out: err_create_mkey: dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); err_dma_map: - mlx5_core_dealloc_pd(mdev, pdn); complete(&migf->save_comp); return err; } @@ -386,7 +379,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_core_dev *mdev; u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; - u32 pdn, mkey; + u32 mkey; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -400,15 +393,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, } mdev = mvdev->mdev; - err = mlx5_core_alloc_pd(mdev, &pdn); + err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); if (err) goto end; - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); - if (err) - goto err_reg; - - err = _create_mkey(mdev, pdn, migf, NULL, &mkey); + err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey); if (err) goto err_mkey; @@ -424,13 +413,41 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, mlx5_core_destroy_mkey(mdev, mkey); err_mkey: dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); -err_reg: - mlx5_core_dealloc_pd(mdev, pdn); end: mutex_unlock(&migf->lock); return err; } +int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) +{ + int err; + + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return -ENOTCONN; + + err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn); + return err; +} + +void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) +{ + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return; + + mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn); +} + +void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) +{ + lockdep_assert_held(&migf->mvdev->state_mutex); + + WARN_ON(migf->mvdev->mdev_detach); + + mlx5vf_cmd_dealloc_pd(migf); +} + static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes, u32 req_nodes) { diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 8ffa7699872c..ba760f956d53 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -16,7 +16,6 @@ struct mlx5vf_async_data { struct mlx5_async_work cb_work; struct work_struct work; int status; - u32 pdn; u32 mkey; void *out; }; @@ -27,6 +26,7 @@ struct mlx5_vf_migration_file { u8 disabled:1; u8 is_err:1; + u32 pdn; struct sg_append_table table; size_t total_length; size_t allocated_length; @@ -127,6 +127,9 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); +int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf); +void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf); +void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 0d71ebb2a972..1916f7c1468c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -236,12 +236,15 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf, O_RDONLY); if (IS_ERR(migf->filp)) { - int err = PTR_ERR(migf->filp); - - kfree(migf); - return ERR_PTR(err); + ret = PTR_ERR(migf->filp); + goto end; } + migf->mvdev = mvdev; + ret = mlx5vf_cmd_alloc_pd(migf); + if (ret) + goto out_free; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); init_waitqueue_head(&migf->poll_wait); @@ -257,20 +260,25 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &migf->total_length); if (ret) - goto out_free; + goto out_pd; ret = mlx5vf_add_migration_pages( migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE)); if (ret) - goto out_free; + goto out_pd; - migf->mvdev = mvdev; ret = mlx5vf_cmd_save_vhca_state(mvdev, migf); if (ret) - goto out_free; + goto out_save; return migf; +out_save: + mlx5vf_disable_fd(migf); +out_pd: + mlx5vf_cmd_dealloc_pd(migf); out_free: fput(migf->filp); +end: + kfree(migf); return ERR_PTR(ret); } @@ -352,6 +360,7 @@ static struct mlx5_vf_migration_file * mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) { struct mlx5_vf_migration_file *migf; + int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL); if (!migf) @@ -360,20 +369,30 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf, O_WRONLY); if (IS_ERR(migf->filp)) { - int err = PTR_ERR(migf->filp); - - kfree(migf); - return ERR_PTR(err); + ret = PTR_ERR(migf->filp); + goto end; } + + migf->mvdev = mvdev; + ret = mlx5vf_cmd_alloc_pd(migf); + if (ret) + goto out_free; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); return migf; +out_free: + fput(migf->filp); +end: + kfree(migf); + return ERR_PTR(ret); } void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); + mlx5fv_cmd_clean_migf_resources(mvdev->resuming_migf); fput(mvdev->resuming_migf->filp); mvdev->resuming_migf = NULL; } @@ -381,6 +400,7 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); mlx5vf_disable_fd(mvdev->saving_migf); + mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); fput(mvdev->saving_migf->filp); mvdev->saving_migf = NULL; } From 91454f8b9bf4ce6be1d9a0b4de401bc3c6313a95 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:29 +0200 Subject: [PATCH 3442/4122] vfio/mlx5: Refactor MKEY usage This patch refactors MKEY usage such as its life cycle will be as of the migration file instead of allocating/destroying it upon each SAVE/LOAD command. This is a preparation step towards the PRE_COPY series where multiple images will be SAVED/LOADED. We achieve it by having a new struct named mlx5_vhca_data_buffer which holds the mkey and its related stuff as of sg_append_table, allocated_length, etc. The above fields were taken out from the migration file main struct, into mlx5_vhca_data_buffer dedicated struct with the proper helpers in place. For now we have a single mlx5_vhca_data_buffer per migration file. However, in coming patches we'll have multiple of them to support multiple images. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-6-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 162 ++++++++++++++++++++++------------- drivers/vfio/pci/mlx5/cmd.h | 37 +++++--- drivers/vfio/pci/mlx5/main.c | 92 +++++++++++--------- 3 files changed, 178 insertions(+), 113 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index a97eac49e3d6..ed4c472d2eae 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -210,11 +210,11 @@ err_exec: } static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf, struct mlx5_vhca_recv_buf *recv_buf, u32 *mkey) { - size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) : + size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) : recv_buf->npages; int err = 0, inlen; __be64 *mtt; @@ -232,10 +232,10 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, DIV_ROUND_UP(npages, 2)); mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (migf) { + if (buf) { struct sg_dma_page_iter dma_iter; - for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) + for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0) *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); } else { int i; @@ -255,20 +255,99 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); - MLX5_SET64(mkc, mkc, len, - migf ? migf->total_length : (npages * PAGE_SIZE)); + MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); kvfree(in); return err; } +static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; + struct mlx5_core_dev *mdev = mvdev->mdev; + int ret; + + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) + return -ENOTCONN; + + if (buf->dmaed || !buf->allocated_length) + return -EINVAL; + + ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); + if (ret) + return ret; + + ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey); + if (ret) + goto err; + + buf->dmaed = true; + + return 0; +err: + dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); + return ret; +} + +void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + struct mlx5_vf_migration_file *migf = buf->migf; + struct sg_page_iter sg_iter; + + lockdep_assert_held(&migf->mvdev->state_mutex); + WARN_ON(migf->mvdev->mdev_detach); + + if (buf->dmaed) { + mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey); + dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt, + buf->dma_dir, 0); + } + + /* Undo alloc_pages_bulk_array() */ + for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) + __free_page(sg_page_iter_page(&sg_iter)); + sg_free_append_table(&buf->table); + kfree(buf); +} + +struct mlx5_vhca_data_buffer * +mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, + enum dma_data_direction dma_dir) +{ + struct mlx5_vhca_data_buffer *buf; + int ret; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + buf->dma_dir = dma_dir; + buf->migf = migf; + if (length) { + ret = mlx5vf_add_migration_pages(buf, + DIV_ROUND_UP_ULL(length, PAGE_SIZE)); + if (ret) + goto end; + + ret = mlx5vf_dma_data_buffer(buf); + if (ret) + goto end; + } + + return buf; +end: + mlx5vf_free_data_buffer(buf); + return ERR_PTR(ret); +} + void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) { struct mlx5vf_async_data *async_data = container_of(_work, struct mlx5vf_async_data, work); struct mlx5_vf_migration_file *migf = container_of(async_data, struct mlx5_vf_migration_file, async_data); - struct mlx5_core_dev *mdev = migf->mvdev->mdev; mutex_lock(&migf->lock); if (async_data->status) { @@ -276,9 +355,6 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) wake_up_interruptible(&migf->poll_wait); } mutex_unlock(&migf->lock); - - mlx5_core_destroy_mkey(mdev, async_data->mkey); - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); kvfree(async_data->out); complete(&migf->save_comp); fput(migf->filp); @@ -292,7 +368,7 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { - WRITE_ONCE(migf->total_length, + WRITE_ONCE(migf->buf->length, MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size)); wake_up_interruptible(&migf->poll_wait); @@ -307,39 +383,28 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) } int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf) + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf) { u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5vf_async_data *async_data; - struct mlx5_core_dev *mdev; - u32 mkey; int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; - mdev = mvdev->mdev; err = wait_for_completion_interruptible(&migf->save_comp); if (err) return err; - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, - 0); - if (err) - goto err_dma_map; - - err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey); - if (err) - goto err_create_mkey; - MLX5_SET(save_vhca_state_in, in, opcode, MLX5_CMD_OP_SAVE_VHCA_STATE); MLX5_SET(save_vhca_state_in, in, op_mod, 0); MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); - MLX5_SET(save_vhca_state_in, in, mkey, mkey); - MLX5_SET(save_vhca_state_in, in, size, migf->total_length); + MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); + MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); async_data = &migf->async_data; async_data->out = kvzalloc(out_size, GFP_KERNEL); @@ -348,10 +413,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, goto err_out; } - /* no data exists till the callback comes back */ - migf->total_length = 0; get_file(migf->filp); - async_data->mkey = mkey; err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), async_data->out, out_size, mlx5vf_save_callback, @@ -365,57 +427,33 @@ err_exec: fput(migf->filp); kvfree(async_data->out); err_out: - mlx5_core_destroy_mkey(mdev, mkey); -err_create_mkey: - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); -err_dma_map: complete(&migf->save_comp); return err; } int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf) + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf) { - struct mlx5_core_dev *mdev; u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; - u32 mkey; int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; - mutex_lock(&migf->lock); - if (!migf->total_length) { - err = -EINVAL; - goto end; - } - - mdev = mvdev->mdev; - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); + err = mlx5vf_dma_data_buffer(buf); if (err) - goto end; - - err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey); - if (err) - goto err_mkey; + return err; MLX5_SET(load_vhca_state_in, in, opcode, MLX5_CMD_OP_LOAD_VHCA_STATE); MLX5_SET(load_vhca_state_in, in, op_mod, 0); MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); - MLX5_SET(load_vhca_state_in, in, mkey, mkey); - MLX5_SET(load_vhca_state_in, in, size, migf->total_length); - - err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); - - mlx5_core_destroy_mkey(mdev, mkey); -err_mkey: - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); -end: - mutex_unlock(&migf->lock); - return err; + MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey); + MLX5_SET(load_vhca_state_in, in, size, buf->length); + return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out); } int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) @@ -445,6 +483,10 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) WARN_ON(migf->mvdev->mdev_detach); + if (migf->buf) { + mlx5vf_free_data_buffer(migf->buf); + migf->buf = NULL; + } mlx5vf_cmd_dealloc_pd(migf); } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index ba760f956d53..b0f08dfc8120 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -12,11 +12,25 @@ #include #include +struct mlx5_vhca_data_buffer { + struct sg_append_table table; + loff_t start_pos; + u64 length; + u64 allocated_length; + u32 mkey; + enum dma_data_direction dma_dir; + u8 dmaed:1; + struct mlx5_vf_migration_file *migf; + /* Optimize mlx5vf_get_migration_page() for sequential access */ + struct scatterlist *last_offset_sg; + unsigned int sg_last_entry; + unsigned long last_offset; +}; + struct mlx5vf_async_data { struct mlx5_async_work cb_work; struct work_struct work; int status; - u32 mkey; void *out; }; @@ -27,14 +41,7 @@ struct mlx5_vf_migration_file { u8 is_err:1; u32 pdn; - struct sg_append_table table; - size_t total_length; - size_t allocated_length; - - /* Optimize mlx5vf_get_migration_page() for sequential access */ - struct scatterlist *last_offset_sg; - unsigned int sg_last_entry; - unsigned long last_offset; + struct mlx5_vhca_data_buffer *buf; struct mlx5vf_pci_core_device *mvdev; wait_queue_head_t poll_wait; struct completion save_comp; @@ -124,12 +131,20 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf); + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf); + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf); int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf); void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf); void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf); +struct mlx5_vhca_data_buffer * +mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir); +void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf); +int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 1916f7c1468c..5f694fce854c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -33,7 +33,7 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) } static struct page * -mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, +mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset) { unsigned long cur_offset = 0; @@ -41,20 +41,20 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, unsigned int i; /* All accesses are sequential */ - if (offset < migf->last_offset || !migf->last_offset_sg) { - migf->last_offset = 0; - migf->last_offset_sg = migf->table.sgt.sgl; - migf->sg_last_entry = 0; + if (offset < buf->last_offset || !buf->last_offset_sg) { + buf->last_offset = 0; + buf->last_offset_sg = buf->table.sgt.sgl; + buf->sg_last_entry = 0; } - cur_offset = migf->last_offset; + cur_offset = buf->last_offset; - for_each_sg(migf->last_offset_sg, sg, - migf->table.sgt.orig_nents - migf->sg_last_entry, i) { + for_each_sg(buf->last_offset_sg, sg, + buf->table.sgt.orig_nents - buf->sg_last_entry, i) { if (offset < sg->length + cur_offset) { - migf->last_offset_sg = sg; - migf->sg_last_entry += i; - migf->last_offset = cur_offset; + buf->last_offset_sg = sg; + buf->sg_last_entry += i; + buf->last_offset = cur_offset; return nth_page(sg_page(sg), (offset - cur_offset) / PAGE_SIZE); } @@ -63,8 +63,8 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, return NULL; } -static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf, - unsigned int npages) +int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) { unsigned int to_alloc = npages; struct page **page_list; @@ -85,13 +85,13 @@ static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf, } to_alloc -= filled; ret = sg_alloc_append_table_from_pages( - &migf->table, page_list, filled, 0, + &buf->table, page_list, filled, 0, filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, GFP_KERNEL); if (ret) goto err; - migf->allocated_length += filled * PAGE_SIZE; + buf->allocated_length += filled * PAGE_SIZE; /* clean input for another bulk allocation */ memset(page_list, 0, filled * sizeof(*page_list)); to_fill = min_t(unsigned int, to_alloc, @@ -108,16 +108,8 @@ err: static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { - struct sg_page_iter sg_iter; - mutex_lock(&migf->lock); - /* Undo alloc_pages_bulk_array() */ - for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0) - __free_page(sg_page_iter_page(&sg_iter)); - sg_free_append_table(&migf->table); migf->disabled = true; - migf->total_length = 0; - migf->allocated_length = 0; migf->filp->f_pos = 0; mutex_unlock(&migf->lock); } @@ -136,6 +128,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; ssize_t done = 0; if (pos) @@ -144,16 +137,16 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (!(filp->f_flags & O_NONBLOCK)) { if (wait_event_interruptible(migf->poll_wait, - READ_ONCE(migf->total_length) || migf->is_err)) + READ_ONCE(vhca_buf->length) || migf->is_err)) return -ERESTARTSYS; } mutex_lock(&migf->lock); - if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) { + if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(vhca_buf->length)) { done = -EAGAIN; goto out_unlock; } - if (*pos > migf->total_length) { + if (*pos > vhca_buf->length) { done = -EINVAL; goto out_unlock; } @@ -162,7 +155,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, goto out_unlock; } - len = min_t(size_t, migf->total_length - *pos, len); + len = min_t(size_t, vhca_buf->length - *pos, len); while (len) { size_t page_offset; struct page *page; @@ -171,7 +164,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, int ret; page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(migf, *pos - page_offset); + page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset); if (!page) { if (done == 0) done = -EINVAL; @@ -208,7 +201,7 @@ static __poll_t mlx5vf_save_poll(struct file *filp, mutex_lock(&migf->lock); if (migf->disabled || migf->is_err) pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - else if (READ_ONCE(migf->total_length)) + else if (READ_ONCE(migf->buf->length)) pollflags = EPOLLIN | EPOLLRDNORM; mutex_unlock(&migf->lock); @@ -227,6 +220,8 @@ static struct mlx5_vf_migration_file * mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) { struct mlx5_vf_migration_file *migf; + struct mlx5_vhca_data_buffer *buf; + size_t length; int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL); @@ -257,22 +252,23 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) complete(&migf->save_comp); mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, - &migf->total_length); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length); if (ret) goto out_pd; - ret = mlx5vf_add_migration_pages( - migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE)); - if (ret) + buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); goto out_pd; + } - ret = mlx5vf_cmd_save_vhca_state(mvdev, migf); + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf); if (ret) goto out_save; + migf->buf = buf; return migf; out_save: - mlx5vf_disable_fd(migf); + mlx5vf_free_data_buffer(buf); out_pd: mlx5vf_cmd_dealloc_pd(migf); out_free: @@ -286,6 +282,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; loff_t requested_length; ssize_t done = 0; @@ -306,10 +303,10 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, goto out_unlock; } - if (migf->allocated_length < requested_length) { + if (vhca_buf->allocated_length < requested_length) { done = mlx5vf_add_migration_pages( - migf, - DIV_ROUND_UP(requested_length - migf->allocated_length, + vhca_buf, + DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, PAGE_SIZE)); if (done) goto out_unlock; @@ -323,7 +320,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, int ret; page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(migf, *pos - page_offset); + page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset); if (!page) { if (done == 0) done = -EINVAL; @@ -342,7 +339,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, len -= page_len; done += page_len; buf += page_len; - migf->total_length += page_len; + vhca_buf->length += page_len; } out_unlock: mutex_unlock(&migf->lock); @@ -360,6 +357,7 @@ static struct mlx5_vf_migration_file * mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) { struct mlx5_vf_migration_file *migf; + struct mlx5_vhca_data_buffer *buf; int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL); @@ -378,9 +376,18 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) if (ret) goto out_free; + buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_pd; + } + + migf->buf = buf; stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); return migf; +out_pd: + mlx5vf_cmd_dealloc_pd(migf); out_free: fput(migf->filp); end: @@ -474,7 +481,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf); + mvdev->resuming_migf, + mvdev->resuming_migf->buf); if (ret) return ERR_PTR(ret); mlx5vf_disable_fds(mvdev); From 8b599d143419669e57da3881d8293f17809688d7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:30 +0200 Subject: [PATCH 3443/4122] vfio/mlx5: Refactor migration file state Refactor migration file state to be an emum which is mutual exclusive. As of that dropped the 'disabled' state as 'error' is the same from functional point of view. Next patches from the series will extend this enum for other relevant states. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-7-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 2 +- drivers/vfio/pci/mlx5/cmd.h | 7 +++++-- drivers/vfio/pci/mlx5/main.c | 11 ++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index ed4c472d2eae..fcba12326185 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -351,7 +351,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mutex_lock(&migf->lock); if (async_data->status) { - migf->is_err = true; + migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } mutex_unlock(&migf->lock); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index b0f08dfc8120..14403e654e4e 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -12,6 +12,10 @@ #include #include +enum mlx5_vf_migf_state { + MLX5_MIGF_STATE_ERROR = 1, +}; + struct mlx5_vhca_data_buffer { struct sg_append_table table; loff_t start_pos; @@ -37,8 +41,7 @@ struct mlx5vf_async_data { struct mlx5_vf_migration_file { struct file *filp; struct mutex lock; - u8 disabled:1; - u8 is_err:1; + enum mlx5_vf_migf_state state; u32 pdn; struct mlx5_vhca_data_buffer *buf; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 5f694fce854c..d95646c2f010 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -109,7 +109,7 @@ err: static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { mutex_lock(&migf->lock); - migf->disabled = true; + migf->state = MLX5_MIGF_STATE_ERROR; migf->filp->f_pos = 0; mutex_unlock(&migf->lock); } @@ -137,7 +137,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (!(filp->f_flags & O_NONBLOCK)) { if (wait_event_interruptible(migf->poll_wait, - READ_ONCE(vhca_buf->length) || migf->is_err)) + READ_ONCE(vhca_buf->length) || + migf->state == MLX5_MIGF_STATE_ERROR)) return -ERESTARTSYS; } @@ -150,7 +151,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, done = -EINVAL; goto out_unlock; } - if (migf->disabled || migf->is_err) { + if (migf->state == MLX5_MIGF_STATE_ERROR) { done = -ENODEV; goto out_unlock; } @@ -199,7 +200,7 @@ static __poll_t mlx5vf_save_poll(struct file *filp, poll_wait(filp, &migf->poll_wait, wait); mutex_lock(&migf->lock); - if (migf->disabled || migf->is_err) + if (migf->state == MLX5_MIGF_STATE_ERROR) pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; else if (READ_ONCE(migf->buf->length)) pollflags = EPOLLIN | EPOLLRDNORM; @@ -298,7 +299,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, return -ENOMEM; mutex_lock(&migf->lock); - if (migf->disabled) { + if (migf->state == MLX5_MIGF_STATE_ERROR) { done = -ENODEV; goto out_unlock; } From c668878381b5702f867ec7f43ee3b74259c6ea03 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:31 +0200 Subject: [PATCH 3444/4122] vfio/mlx5: Refactor to use queue based data chunks Refactor to use queue based data chunks on the migration file. The SAVE command adds a chunk to the tail of the queue while the read() API finds the required chunk and returns its data. In case the queue is empty but the state of the migration file is MLX5_MIGF_STATE_COMPLETE, read() may not be blocked but will return 0 to indicate end of file. This is a step towards maintaining multiple images and their meta data (i.e. headers) on the migration file as part of next patches from the series. Note: At that point, we still use a single chunk on the migration file but becomes ready to support multiple. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-8-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 24 +++++- drivers/vfio/pci/mlx5/cmd.h | 5 ++ drivers/vfio/pci/mlx5/main.c | 145 +++++++++++++++++++++++++++-------- 3 files changed, 136 insertions(+), 38 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index fcba12326185..0e36b4c8c816 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -351,6 +351,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mutex_lock(&migf->lock); if (async_data->status) { + migf->buf = async_data->buf; migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } @@ -368,9 +369,15 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { - WRITE_ONCE(migf->buf->length, - MLX5_GET(save_vhca_state_out, async_data->out, - actual_image_size)); + unsigned long flags; + + async_data->buf->length = + MLX5_GET(save_vhca_state_out, async_data->out, + actual_image_size); + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + migf->state = MLX5_MIGF_STATE_COMPLETE; wake_up_interruptible(&migf->poll_wait); } @@ -407,6 +414,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); async_data = &migf->async_data; + async_data->buf = buf; async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; @@ -479,14 +487,22 @@ void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) { - lockdep_assert_held(&migf->mvdev->state_mutex); + struct mlx5_vhca_data_buffer *entry; + lockdep_assert_held(&migf->mvdev->state_mutex); WARN_ON(migf->mvdev->mdev_detach); if (migf->buf) { mlx5vf_free_data_buffer(migf->buf); migf->buf = NULL; } + + while ((entry = list_first_entry_or_null(&migf->buf_list, + struct mlx5_vhca_data_buffer, buf_elm))) { + list_del(&entry->buf_elm); + mlx5vf_free_data_buffer(entry); + } + mlx5vf_cmd_dealloc_pd(migf); } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 14403e654e4e..6e594689566e 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -14,6 +14,7 @@ enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, + MLX5_MIGF_STATE_COMPLETE, }; struct mlx5_vhca_data_buffer { @@ -24,6 +25,7 @@ struct mlx5_vhca_data_buffer { u32 mkey; enum dma_data_direction dma_dir; u8 dmaed:1; + struct list_head buf_elm; struct mlx5_vf_migration_file *migf; /* Optimize mlx5vf_get_migration_page() for sequential access */ struct scatterlist *last_offset_sg; @@ -34,6 +36,7 @@ struct mlx5_vhca_data_buffer { struct mlx5vf_async_data { struct mlx5_async_work cb_work; struct work_struct work; + struct mlx5_vhca_data_buffer *buf; int status; void *out; }; @@ -45,6 +48,8 @@ struct mlx5_vf_migration_file { u32 pdn; struct mlx5_vhca_data_buffer *buf; + spinlock_t list_lock; + struct list_head buf_list; struct mlx5vf_pci_core_device *mvdev; wait_queue_head_t poll_wait; struct completion save_comp; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index d95646c2f010..ca16425811c4 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -124,11 +124,90 @@ static int mlx5vf_release_file(struct inode *inode, struct file *filp) return 0; } +static struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buff_from_pos(struct mlx5_vf_migration_file *migf, loff_t pos, + bool *end_of_data) +{ + struct mlx5_vhca_data_buffer *buf; + bool found = false; + + *end_of_data = false; + spin_lock_irq(&migf->list_lock); + if (list_empty(&migf->buf_list)) { + *end_of_data = true; + goto end; + } + + buf = list_first_entry(&migf->buf_list, struct mlx5_vhca_data_buffer, + buf_elm); + if (pos >= buf->start_pos && + pos < buf->start_pos + buf->length) { + found = true; + goto end; + } + + /* + * As we use a stream based FD we may expect having the data always + * on first chunk + */ + migf->state = MLX5_MIGF_STATE_ERROR; + +end: + spin_unlock_irq(&migf->list_lock); + return found ? buf : NULL; +} + +static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf, + char __user **buf, size_t *len, loff_t *pos) +{ + unsigned long offset; + ssize_t done = 0; + size_t copy_len; + + copy_len = min_t(size_t, + vhca_buf->start_pos + vhca_buf->length - *pos, *len); + while (copy_len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *from_buff; + int ret; + + offset = *pos - vhca_buf->start_pos; + page_offset = offset % PAGE_SIZE; + offset -= page_offset; + page = mlx5vf_get_migration_page(vhca_buf, offset); + if (!page) + return -EINVAL; + page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset); + from_buff = kmap_local_page(page); + ret = copy_to_user(*buf, from_buff + page_offset, page_len); + kunmap_local(from_buff); + if (ret) + return -EFAULT; + *pos += page_len; + *len -= page_len; + *buf += page_len; + done += page_len; + copy_len -= page_len; + } + + if (*pos >= vhca_buf->start_pos + vhca_buf->length) { + spin_lock_irq(&vhca_buf->migf->list_lock); + list_del_init(&vhca_buf->buf_elm); + spin_unlock_irq(&vhca_buf->migf->list_lock); + } + + return done; +} + static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; - struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; + struct mlx5_vhca_data_buffer *vhca_buf; + bool first_loop_call = true; + bool end_of_data; ssize_t done = 0; if (pos) @@ -137,53 +216,47 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (!(filp->f_flags & O_NONBLOCK)) { if (wait_event_interruptible(migf->poll_wait, - READ_ONCE(vhca_buf->length) || - migf->state == MLX5_MIGF_STATE_ERROR)) + !list_empty(&migf->buf_list) || + migf->state == MLX5_MIGF_STATE_ERROR || + migf->state == MLX5_MIGF_STATE_COMPLETE)) return -ERESTARTSYS; } mutex_lock(&migf->lock); - if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(vhca_buf->length)) { - done = -EAGAIN; - goto out_unlock; - } - if (*pos > vhca_buf->length) { - done = -EINVAL; - goto out_unlock; - } if (migf->state == MLX5_MIGF_STATE_ERROR) { done = -ENODEV; goto out_unlock; } - len = min_t(size_t, vhca_buf->length - *pos, len); while (len) { - size_t page_offset; - struct page *page; - size_t page_len; - u8 *from_buff; - int ret; + ssize_t count; - page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset); - if (!page) { - if (done == 0) - done = -EINVAL; + vhca_buf = mlx5vf_get_data_buff_from_pos(migf, *pos, + &end_of_data); + if (first_loop_call) { + first_loop_call = false; + if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) { + if (filp->f_flags & O_NONBLOCK) { + done = -EAGAIN; + goto out_unlock; + } + } + } + + if (end_of_data) + goto out_unlock; + + if (!vhca_buf) { + done = -EINVAL; goto out_unlock; } - page_len = min_t(size_t, len, PAGE_SIZE - page_offset); - from_buff = kmap_local_page(page); - ret = copy_to_user(buf, from_buff + page_offset, page_len); - kunmap_local(from_buff); - if (ret) { - done = -EFAULT; + count = mlx5vf_buf_read(vhca_buf, &buf, &len, pos); + if (count < 0) { + done = count; goto out_unlock; } - *pos += page_len; - len -= page_len; - done += page_len; - buf += page_len; + done += count; } out_unlock: @@ -202,7 +275,8 @@ static __poll_t mlx5vf_save_poll(struct file *filp, mutex_lock(&migf->lock); if (migf->state == MLX5_MIGF_STATE_ERROR) pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - else if (READ_ONCE(migf->buf->length)) + else if (!list_empty(&migf->buf_list) || + migf->state == MLX5_MIGF_STATE_COMPLETE) pollflags = EPOLLIN | EPOLLRDNORM; mutex_unlock(&migf->lock); @@ -253,6 +327,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) complete(&migf->save_comp); mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); + INIT_LIST_HEAD(&migf->buf_list); + spin_lock_init(&migf->list_lock); ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length); if (ret) goto out_pd; @@ -266,7 +342,6 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf); if (ret) goto out_save; - migf->buf = buf; return migf; out_save: mlx5vf_free_data_buffer(buf); @@ -386,6 +461,8 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) migf->buf = buf; stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + INIT_LIST_HEAD(&migf->buf_list); + spin_lock_init(&migf->list_lock); return migf; out_pd: mlx5vf_cmd_dealloc_pd(migf); From 3319d287f4c04b9deece8ea00e27a70bbe32941b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:32 +0200 Subject: [PATCH 3445/4122] vfio/mlx5: Introduce device transitions of PRE_COPY In order to support PRE_COPY, mlx5 driver is transferring multiple states (images) of the device. e.g.: the source VF can save and transfer multiple states, and the target VF will load them by that order. The device is saving three kinds of states: 1) Initial state - when the device moves to PRE_COPY state. 2) Middle state - during PRE_COPY phase via VFIO_MIG_GET_PRECOPY_INFO. There can be multiple states of this type. 3) Final state - when the device moves to STOP_COPY state. After moving to PRE_COPY state, user is holding the saving migf FD and can use it. For example: user can start transferring data via read() callback. Also, user can switch from PRE_COPY to STOP_COPY whenever he sees it fits. This will invoke saving of final state. This means that mlx5 VFIO device can be switched to STOP_COPY without transferring any data in PRE_COPY state. Therefore, when the device moves to STOP_COPY, mlx5 will store the final state on a dedicated queue entry on the list. Co-developed-by: Shay Drory Signed-off-by: Shay Drory Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-9-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 96 +++++++++++++++++++++++++++++++++--- drivers/vfio/pci/mlx5/cmd.h | 16 +++++- drivers/vfio/pci/mlx5/main.c | 90 ++++++++++++++++++++++++++++++--- 3 files changed, 184 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 0e36b4c8c816..5fcece201d4c 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -14,18 +14,36 @@ _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) { + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; + int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; + /* + * In case PRE_COPY is used, saving_migf is exposed while the device is + * running. Make sure to run only once there is no active save command. + * Running both in parallel, might end-up with a failure in the save + * command once it will try to turn on 'tracking' on a suspended device. + */ + if (migf) { + err = wait_for_completion_interruptible(&migf->save_comp); + if (err) + return err; + } + MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); - return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); + err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); + if (migf) + complete(&migf->save_comp); + + return err; } int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) @@ -45,7 +63,7 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) } int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size) + size_t *state_size, u8 query_flags) { u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; @@ -59,6 +77,8 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); + MLX5_SET(query_vhca_migration_state_in, in, incremental, + query_flags & MLX5VF_QUERY_INC); ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, out); @@ -342,6 +362,56 @@ end: return ERR_PTR(ret); } +void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + spin_lock_irq(&buf->migf->list_lock); + list_add_tail(&buf->buf_elm, &buf->migf->avail_list); + spin_unlock_irq(&buf->migf->list_lock); +} + +struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir) +{ + struct mlx5_vhca_data_buffer *buf, *temp_buf; + struct list_head free_list; + + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return ERR_PTR(-ENOTCONN); + + INIT_LIST_HEAD(&free_list); + + spin_lock_irq(&migf->list_lock); + list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { + if (buf->dma_dir == dma_dir) { + list_del_init(&buf->buf_elm); + if (buf->allocated_length >= length) { + spin_unlock_irq(&migf->list_lock); + goto found; + } + /* + * Prevent holding redundant buffers. Put in a free + * list and call at the end not under the spin lock + * (&migf->list_lock) to mlx5vf_free_data_buffer which + * might sleep. + */ + list_add(&buf->buf_elm, &free_list); + } + } + spin_unlock_irq(&migf->list_lock); + buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir); + +found: + while ((temp_buf = list_first_entry_or_null(&free_list, + struct mlx5_vhca_data_buffer, buf_elm))) { + list_del(&temp_buf->buf_elm); + mlx5vf_free_data_buffer(temp_buf); + } + + return buf; +} + void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) { struct mlx5vf_async_data *async_data = container_of(_work, @@ -351,7 +421,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mutex_lock(&migf->lock); if (async_data->status) { - migf->buf = async_data->buf; + mlx5vf_put_data_buffer(async_data->buf); migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } @@ -369,15 +439,19 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { + size_t image_size; unsigned long flags; - async_data->buf->length = - MLX5_GET(save_vhca_state_out, async_data->out, - actual_image_size); + image_size = MLX5_GET(save_vhca_state_out, async_data->out, + actual_image_size); + async_data->buf->length = image_size; + async_data->buf->start_pos = migf->max_pos; + migf->max_pos += async_data->buf->length; spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); - migf->state = MLX5_MIGF_STATE_COMPLETE; + if (async_data->last_chunk) + migf->state = MLX5_MIGF_STATE_COMPLETE; wake_up_interruptible(&migf->poll_wait); } @@ -391,7 +465,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf, - struct mlx5_vhca_data_buffer *buf) + struct mlx5_vhca_data_buffer *buf, bool inc, + bool track) { u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; @@ -412,9 +487,12 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); + MLX5_SET(save_vhca_state_in, in, incremental, inc); + MLX5_SET(save_vhca_state_in, in, set_track, track); async_data = &migf->async_data; async_data->buf = buf; + async_data->last_chunk = !track; async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; @@ -497,6 +575,8 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) migf->buf = NULL; } + list_splice(&migf->avail_list, &migf->buf_list); + while ((entry = list_first_entry_or_null(&migf->buf_list, struct mlx5_vhca_data_buffer, buf_elm))) { list_del(&entry->buf_elm); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 6e594689566e..34e61c7aa23d 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -38,6 +38,7 @@ struct mlx5vf_async_data { struct work_struct work; struct mlx5_vhca_data_buffer *buf; int status; + u8 last_chunk:1; void *out; }; @@ -47,9 +48,11 @@ struct mlx5_vf_migration_file { enum mlx5_vf_migf_state state; u32 pdn; + loff_t max_pos; struct mlx5_vhca_data_buffer *buf; spinlock_t list_lock; struct list_head buf_list; + struct list_head avail_list; struct mlx5vf_pci_core_device *mvdev; wait_queue_head_t poll_wait; struct completion save_comp; @@ -129,10 +132,14 @@ struct mlx5vf_pci_core_device { struct mlx5_core_dev *mdev; }; +enum { + MLX5VF_QUERY_INC = (1UL << 0), +}; + int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size); + size_t *state_size, u8 query_flags); void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, const struct vfio_migration_ops *mig_ops, const struct vfio_log_ops *log_ops); @@ -140,7 +147,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf, - struct mlx5_vhca_data_buffer *buf); + struct mlx5_vhca_data_buffer *buf, bool inc, + bool track); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *buf); @@ -151,6 +159,10 @@ struct mlx5_vhca_data_buffer * mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, enum dma_data_direction dma_dir); void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf); +struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir); +void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, unsigned int npages); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index ca16425811c4..9cabba456044 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -195,6 +195,7 @@ static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf, if (*pos >= vhca_buf->start_pos + vhca_buf->length) { spin_lock_irq(&vhca_buf->migf->list_lock); list_del_init(&vhca_buf->buf_elm); + list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); spin_unlock_irq(&vhca_buf->migf->list_lock); } @@ -283,6 +284,16 @@ static __poll_t mlx5vf_save_poll(struct file *filp, return pollflags; } +/* + * FD is exposed and user can use it after receiving an error. + * Mark migf in error, and wake the user. + */ +static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) +{ + migf->state = MLX5_MIGF_STATE_ERROR; + wake_up_interruptible(&migf->poll_wait); +} + static const struct file_operations mlx5vf_save_fops = { .owner = THIS_MODULE, .read = mlx5vf_save_read, @@ -291,8 +302,42 @@ static const struct file_operations mlx5vf_save_fops = { .llseek = no_llseek, }; +static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) +{ + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; + struct mlx5_vhca_data_buffer *buf; + size_t length; + int ret; + + if (migf->state == MLX5_MIGF_STATE_ERROR) + return -ENODEV; + + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, + MLX5VF_QUERY_INC); + if (ret) + goto err; + + buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); + if (ret) + goto err_save; + + return 0; + +err_save: + mlx5vf_put_data_buffer(buf); +err: + mlx5vf_mark_err(migf); + return ret; +} + static struct mlx5_vf_migration_file * -mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) +mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) { struct mlx5_vf_migration_file *migf; struct mlx5_vhca_data_buffer *buf; @@ -328,8 +373,9 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); INIT_LIST_HEAD(&migf->buf_list); + INIT_LIST_HEAD(&migf->avail_list); spin_lock_init(&migf->list_lock); - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0); if (ret) goto out_pd; @@ -339,7 +385,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) goto out_pd; } - ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf); + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track); if (ret) goto out_save; return migf; @@ -462,6 +508,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); + INIT_LIST_HEAD(&migf->avail_list); spin_lock_init(&migf->list_lock); return migf; out_pd: @@ -514,7 +561,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { ret = mlx5vf_cmd_suspend_vhca(mvdev, MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR); if (ret) @@ -522,7 +570,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { ret = mlx5vf_cmd_resume_vhca(mvdev, MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR); if (ret) @@ -533,7 +582,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { struct mlx5_vf_migration_file *migf; - migf = mlx5vf_pci_save_device_data(mvdev); + migf = mlx5vf_pci_save_device_data(mvdev, false); if (IS_ERR(migf)) return ERR_CAST(migf); get_file(migf->filp); @@ -541,7 +590,10 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return migf->filp; } - if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) { + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_RUNNING_P2P)) { mlx5vf_disable_fds(mvdev); return NULL; } @@ -567,6 +619,28 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return NULL; } + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + struct mlx5_vf_migration_file *migf; + + migf = mlx5vf_pci_save_device_data(mvdev, true); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + mvdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { + ret = mlx5vf_cmd_suspend_vhca(mvdev, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER); + if (ret) + return ERR_PTR(ret); + ret = mlx5vf_pci_save_device_inc_data(mvdev); + return ret ? ERR_PTR(ret) : NULL; + } + /* * vfio_mig_get_next_state() does not use arcs other than the above */ @@ -635,7 +709,7 @@ static int mlx5vf_pci_get_data_size(struct vfio_device *vdev, mutex_lock(&mvdev->state_mutex); ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, - &state_size); + &state_size, 0); if (!ret) *stop_copy_length = state_size; mlx5vf_state_mutex_unlock(mvdev); From 0c9a38fee8b210a8dfd3f177526daac567ec9265 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:33 +0200 Subject: [PATCH 3446/4122] vfio/mlx5: Introduce SW headers for migration states As mentioned in the previous patches, mlx5 is transferring multiple states when the PRE_COPY protocol is used. This states mechanism requires the target VM to know the states' size in order to execute multiple loads. Therefore, add SW header, with the needed information, for each saved state the source VM is transferring to the target VM. This patch implements the source VM handling of the headers, following patch will implement the target VM handling of the headers. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-10-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 56 ++++++++++++++++++++++++++++++++++-- drivers/vfio/pci/mlx5/cmd.h | 13 +++++++++ drivers/vfio/pci/mlx5/main.c | 2 +- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 5fcece201d4c..160fa38fc78d 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -351,9 +351,11 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, if (ret) goto end; - ret = mlx5vf_dma_data_buffer(buf); - if (ret) - goto end; + if (dma_dir != DMA_NONE) { + ret = mlx5vf_dma_data_buffer(buf); + if (ret) + goto end; + } } return buf; @@ -422,6 +424,8 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mutex_lock(&migf->lock); if (async_data->status) { mlx5vf_put_data_buffer(async_data->buf); + if (async_data->header_buf) + mlx5vf_put_data_buffer(async_data->header_buf); migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } @@ -431,6 +435,32 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) fput(migf->filp); } +static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, + size_t image_size) +{ + struct mlx5_vf_migration_file *migf = header_buf->migf; + struct mlx5_vf_migration_header header = {}; + unsigned long flags; + struct page *page; + u8 *to_buff; + + header.image_size = cpu_to_le64(image_size); + page = mlx5vf_get_migration_page(header_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + memcpy(to_buff, &header, sizeof(header)); + kunmap_local(to_buff); + header_buf->length = sizeof(header); + header_buf->header_image_size = image_size; + header_buf->start_pos = header_buf->migf->max_pos; + migf->max_pos += header_buf->length; + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&header_buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + return 0; +} + static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) { struct mlx5vf_async_data *async_data = container_of(context, @@ -444,6 +474,11 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) image_size = MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size); + if (async_data->header_buf) { + status = add_buf_header(async_data->header_buf, image_size); + if (status) + goto err; + } async_data->buf->length = image_size; async_data->buf->start_pos = migf->max_pos; migf->max_pos += async_data->buf->length; @@ -455,6 +490,7 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) wake_up_interruptible(&migf->poll_wait); } +err: /* * The error and the cleanup flows can't run from an * interrupt context @@ -470,6 +506,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, { u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; int err; @@ -499,6 +536,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, goto err_out; } + if (MLX5VF_PRE_COPY_SUPP(mvdev)) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; + } + } + + async_data->header_buf = header_buf; get_file(migf->filp); err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), async_data->out, @@ -510,7 +557,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, return 0; err_exec: + if (header_buf) + mlx5vf_put_data_buffer(header_buf); fput(migf->filp); +err_free: kvfree(async_data->out); err_out: complete(&migf->save_comp); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 34e61c7aa23d..3e36ccca820a 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -12,16 +12,26 @@ #include #include +#define MLX5VF_PRE_COPY_SUPP(mvdev) \ + ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) + enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_COMPLETE, }; +struct mlx5_vf_migration_header { + __le64 image_size; + /* For future use in case we may need to change the kernel protocol */ + __le64 flags; +}; + struct mlx5_vhca_data_buffer { struct sg_append_table table; loff_t start_pos; u64 length; u64 allocated_length; + u64 header_image_size; u32 mkey; enum dma_data_direction dma_dir; u8 dmaed:1; @@ -37,6 +47,7 @@ struct mlx5vf_async_data { struct mlx5_async_work cb_work; struct work_struct work; struct mlx5_vhca_data_buffer *buf; + struct mlx5_vhca_data_buffer *header_buf; int status; u8 last_chunk:1; void *out; @@ -165,6 +176,8 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, unsigned int npages); +struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, + unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 9cabba456044..9a36e36ec33b 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -32,7 +32,7 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) core_device); } -static struct page * +struct page * mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset) { From 0dce165b1adf8d7f67030bb257e00107db8022de Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:34 +0200 Subject: [PATCH 3447/4122] vfio/mlx5: Introduce vfio precopy ioctl implementation vfio precopy ioctl returns an estimation of data available for transferring from the device. Whenever a user is using VFIO_MIG_GET_PRECOPY_INFO, track the current state of the device, and if needed, append the dirty data to the transfer FD data. This is done by saving a middle state. As mlx5 runs the SAVE command asynchronously, make sure to query for incremental data only once there is no active save command. Running both in parallel, might end-up with a failure in the incremental query command on un-tracked vhca. Also, a middle state will be saved only after the previous state has finished its SAVE command and has been fully transferred, this prevents endless use resources. Co-developed-by: Shay Drory Signed-off-by: Shay Drory Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-11-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 16 +++++ drivers/vfio/pci/mlx5/main.c | 111 +++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 160fa38fc78d..12e74ecebe64 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -67,12 +67,25 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, { u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; + bool inc = query_flags & MLX5VF_QUERY_INC; int ret; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; + /* + * In case PRE_COPY is used, saving_migf is exposed while device is + * running. Make sure to run only once there is no active save command. + * Running both in parallel, might end-up with a failure in the + * incremental query command on un-tracked vhca. + */ + if (inc) { + ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); + if (ret) + return ret; + } + MLX5_SET(query_vhca_migration_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); @@ -82,6 +95,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, out); + if (inc) + complete(&mvdev->saving_migf->save_comp); + if (ret) return ret; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 9a36e36ec33b..2c8ac763057c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -294,10 +294,121 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) wake_up_interruptible(&migf->poll_wait); } +static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5vf_pci_core_device *mvdev = migf->mvdev; + struct mlx5_vhca_data_buffer *buf; + struct vfio_precopy_info info = {}; + loff_t *pos = &filp->f_pos; + unsigned long minsz; + size_t inc_length = 0; + bool end_of_data; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&mvdev->state_mutex); + if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && + mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + ret = -EINVAL; + goto err_state_unlock; + } + + /* + * We can't issue a SAVE command when the device is suspended, so as + * part of VFIO_DEVICE_STATE_PRE_COPY_P2P no reason to query for extra + * bytes that can't be read. + */ + if (mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY) { + /* + * Once the query returns it's guaranteed that there is no + * active SAVE command. + * As so, the other code below is safe with the proper locks. + */ + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length, + MLX5VF_QUERY_INC); + if (ret) + goto err_state_unlock; + } + + mutex_lock(&migf->lock); + if (migf->state == MLX5_MIGF_STATE_ERROR) { + ret = -ENODEV; + goto err_migf_unlock; + } + + buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); + if (buf) { + if (buf->start_pos == 0) { + info.initial_bytes = buf->header_image_size - *pos; + } else if (buf->start_pos == + sizeof(struct mlx5_vf_migration_header)) { + /* First data buffer following the header */ + info.initial_bytes = buf->start_pos + + buf->length - *pos; + } else { + info.dirty_bytes = buf->start_pos + buf->length - *pos; + } + } else { + if (!end_of_data) { + ret = -EINVAL; + goto err_migf_unlock; + } + + info.dirty_bytes = inc_length; + } + + if (!end_of_data || !inc_length) { + mutex_unlock(&migf->lock); + goto done; + } + + mutex_unlock(&migf->lock); + /* + * We finished transferring the current state and the device has a + * dirty state, save a new state to be ready for. + */ + buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + mlx5vf_mark_err(migf); + goto err_state_unlock; + } + + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true); + if (ret) { + mlx5vf_mark_err(migf); + mlx5vf_put_data_buffer(buf); + goto err_state_unlock; + } + +done: + mlx5vf_state_mutex_unlock(mvdev); + return copy_to_user((void __user *)arg, &info, minsz); +err_migf_unlock: + mutex_unlock(&migf->lock); +err_state_unlock: + mlx5vf_state_mutex_unlock(mvdev); + return ret; +} + static const struct file_operations mlx5vf_save_fops = { .owner = THIS_MODULE, .read = mlx5vf_save_read, .poll = mlx5vf_save_poll, + .unlocked_ioctl = mlx5vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = mlx5vf_release_file, .llseek = no_llseek, }; From 81156c27271c4a6c594e492c8d119fbacfc99f36 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:35 +0200 Subject: [PATCH 3448/4122] vfio/mlx5: Consider temporary end of stream as part of PRE_COPY During PRE_COPY the migration data FD may have a temporary "end of stream" that is reached when the initial_bytes were read and no other dirty data exists yet. For instance, this may indicate that the device is idle and not currently dirtying any internal state. When read() is done on this temporary end of stream the kernel driver should return ENOMSG from read(). Userspace can wait for more data or consider moving to STOP_COPY. To not block the user upon read() and let it get ENOMSG we add a new state named MLX5_MIGF_STATE_PRE_COPY on the migration file. In addition, we add the MLX5_MIGF_STATE_SAVE_LAST state to block the read() once we call the last SAVE upon moving to STOP_COPY. Any further error will be marked with MLX5_MIGF_STATE_ERROR and the user won't be blocked. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-12-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 7 +++++-- drivers/vfio/pci/mlx5/cmd.h | 2 ++ drivers/vfio/pci/mlx5/main.c | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 12e74ecebe64..f6293da033cc 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -501,8 +501,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); - if (async_data->last_chunk) - migf->state = MLX5_MIGF_STATE_COMPLETE; + migf->state = async_data->last_chunk ? + MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; wake_up_interruptible(&migf->poll_wait); } @@ -561,6 +561,9 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } } + if (async_data->last_chunk) + migf->state = MLX5_MIGF_STATE_SAVE_LAST; + async_data->header_buf = header_buf; get_file(migf->filp); err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 3e36ccca820a..d048f23977dd 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -17,6 +17,8 @@ enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, + MLX5_MIGF_STATE_PRE_COPY, + MLX5_MIGF_STATE_SAVE_LAST, MLX5_MIGF_STATE_COMPLETE, }; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 2c8ac763057c..44b1543c751c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -219,6 +219,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (wait_event_interruptible(migf->poll_wait, !list_empty(&migf->buf_list) || migf->state == MLX5_MIGF_STATE_ERROR || + migf->state == MLX5_MIGF_STATE_PRE_COPY || migf->state == MLX5_MIGF_STATE_COMPLETE)) return -ERESTARTSYS; } @@ -236,6 +237,12 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, &end_of_data); if (first_loop_call) { first_loop_call = false; + /* Temporary end of file as part of PRE_COPY */ + if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) { + done = -ENOMSG; + goto out_unlock; + } + if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) { if (filp->f_flags & O_NONBLOCK) { done = -EAGAIN; From 34e2f27143d1b373f088e805f7e11cdf778f791d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 6 Dec 2022 10:34:36 +0200 Subject: [PATCH 3449/4122] vfio/mlx5: Introduce multiple loads In order to support PRE_COPY, mlx5 driver transfers multiple states (images) of the device. e.g.: the source VF can save and transfer multiple states, and the target VF will load them by that order. This patch implements the changes for the target VF to decompose the header for each state and to write and load multiple states. Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-13-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 13 +- drivers/vfio/pci/mlx5/cmd.h | 10 ++ drivers/vfio/pci/mlx5/main.c | 287 +++++++++++++++++++++++++++++------ 3 files changed, 261 insertions(+), 49 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index f6293da033cc..993749818d90 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -598,9 +598,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (mvdev->mdev_detach) return -ENOTCONN; - err = mlx5vf_dma_data_buffer(buf); - if (err) - return err; + if (!buf->dmaed) { + err = mlx5vf_dma_data_buffer(buf); + if (err) + return err; + } MLX5_SET(load_vhca_state_in, in, opcode, MLX5_CMD_OP_LOAD_VHCA_STATE); @@ -644,6 +646,11 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) migf->buf = NULL; } + if (migf->buf_header) { + mlx5vf_free_data_buffer(migf->buf_header); + migf->buf_header = NULL; + } + list_splice(&migf->avail_list, &migf->buf_list); while ((entry = list_first_entry_or_null(&migf->buf_list, diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index d048f23977dd..7729eac8c78c 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -22,6 +22,14 @@ enum mlx5_vf_migf_state { MLX5_MIGF_STATE_COMPLETE, }; +enum mlx5_vf_load_state { + MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, + MLX5_VF_LOAD_STATE_READ_HEADER, + MLX5_VF_LOAD_STATE_PREP_IMAGE, + MLX5_VF_LOAD_STATE_READ_IMAGE, + MLX5_VF_LOAD_STATE_LOAD_IMAGE, +}; + struct mlx5_vf_migration_header { __le64 image_size; /* For future use in case we may need to change the kernel protocol */ @@ -60,9 +68,11 @@ struct mlx5_vf_migration_file { struct mutex lock; enum mlx5_vf_migf_state state; + enum mlx5_vf_load_state load_state; u32 pdn; loff_t max_pos; struct mlx5_vhca_data_buffer *buf; + struct mlx5_vhca_data_buffer *buf_header; spinlock_t list_lock; struct list_head buf_list; struct list_head avail_list; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 44b1543c751c..5a669b73994a 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -518,13 +518,162 @@ end: return ERR_PTR(ret); } +static int +mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + unsigned long offset; + size_t page_offset; + struct page *page; + size_t page_len; + u8 *to_buff; + int ret; + + offset = *pos - vhca_buf->start_pos; + page_offset = offset % PAGE_SIZE; + + page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset); + if (!page) + return -EINVAL; + page_len = min_t(size_t, *len, PAGE_SIZE - page_offset); + to_buff = kmap_local_page(page); + ret = copy_from_user(to_buff + page_offset, *buf, page_len); + kunmap_local(to_buff); + if (ret) + return -EFAULT; + + *pos += page_len; + *done += page_len; + *buf += page_len; + *len -= page_len; + vhca_buf->length += page_len; + return 0; +} + +static int +mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, + loff_t requested_length, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + int ret; + + if (requested_length > MAX_MIGRATION_SIZE) + return -ENOMEM; + + if (vhca_buf->allocated_length < requested_length) { + ret = mlx5vf_add_migration_pages( + vhca_buf, + DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, + PAGE_SIZE)); + if (ret) + return ret; + } + + while (*len) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, + done); + if (ret) + return ret; + } + + return 0; +} + +static ssize_t +mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + size_t image_size, const char __user **buf, + size_t *len, loff_t *pos, ssize_t *done, + bool *has_work) +{ + size_t copy_len, to_copy; + int ret; + + to_copy = min_t(size_t, *len, image_size - vhca_buf->length); + copy_len = to_copy; + while (to_copy) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos, + done); + if (ret) + return ret; + } + + *len -= copy_len; + if (vhca_buf->length == image_size) { + migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE; + migf->max_pos += image_size; + *has_work = true; + } + + return 0; +} + +static int +mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, + size_t *len, loff_t *pos, + ssize_t *done, bool *has_work) +{ + struct page *page; + size_t copy_len; + u8 *to_buff; + int ret; + + copy_len = min_t(size_t, *len, + sizeof(struct mlx5_vf_migration_header) - vhca_buf->length); + page = mlx5vf_get_migration_page(vhca_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len); + if (ret) { + ret = -EFAULT; + goto end; + } + + *buf += copy_len; + *pos += copy_len; + *done += copy_len; + *len -= copy_len; + vhca_buf->length += copy_len; + if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) { + u64 flags; + + vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff); + if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) { + ret = -ENOMEM; + goto end; + } + + flags = le64_to_cpup((__le64 *)(to_buff + + offsetof(struct mlx5_vf_migration_header, flags))); + if (flags) { + ret = -EOPNOTSUPP; + goto end; + } + + migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; + migf->max_pos += vhca_buf->length; + *has_work = true; + } +end: + kunmap_local(to_buff); + return ret; +} + static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; + struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header; loff_t requested_length; + bool has_work = false; ssize_t done = 0; + int ret = 0; if (pos) return -ESPIPE; @@ -534,56 +683,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, check_add_overflow((loff_t)len, *pos, &requested_length)) return -EINVAL; - if (requested_length > MAX_MIGRATION_SIZE) - return -ENOMEM; - + mutex_lock(&migf->mvdev->state_mutex); mutex_lock(&migf->lock); if (migf->state == MLX5_MIGF_STATE_ERROR) { - done = -ENODEV; + ret = -ENODEV; goto out_unlock; } - if (vhca_buf->allocated_length < requested_length) { - done = mlx5vf_add_migration_pages( - vhca_buf, - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, - PAGE_SIZE)); - if (done) - goto out_unlock; + while (len || has_work) { + has_work = false; + switch (migf->load_state) { + case MLX5_VF_LOAD_STATE_READ_HEADER: + ret = mlx5vf_resume_read_header(migf, vhca_buf_header, + &buf, &len, pos, + &done, &has_work); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_PREP_IMAGE: + { + u64 size = vhca_buf_header->header_image_size; + + if (vhca_buf->allocated_length < size) { + mlx5vf_free_data_buffer(vhca_buf); + + migf->buf = mlx5vf_alloc_data_buffer(migf, + size, DMA_TO_DEVICE); + if (IS_ERR(migf->buf)) { + ret = PTR_ERR(migf->buf); + migf->buf = NULL; + goto out_unlock; + } + + vhca_buf = migf->buf; + } + + vhca_buf->start_pos = migf->max_pos; + migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; + break; + } + case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: + ret = mlx5vf_resume_read_image_no_header(vhca_buf, + requested_length, + &buf, &len, pos, &done); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_READ_IMAGE: + ret = mlx5vf_resume_read_image(migf, vhca_buf, + vhca_buf_header->header_image_size, + &buf, &len, pos, &done, &has_work); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_LOAD_IMAGE: + ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf); + if (ret) + goto out_unlock; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + + /* prep header buf for next image */ + vhca_buf_header->length = 0; + vhca_buf_header->header_image_size = 0; + /* prep data buf for next image */ + vhca_buf->length = 0; + + break; + default: + break; + } } - while (len) { - size_t page_offset; - struct page *page; - size_t page_len; - u8 *to_buff; - int ret; - - page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset); - if (!page) { - if (done == 0) - done = -EINVAL; - goto out_unlock; - } - - page_len = min_t(size_t, len, PAGE_SIZE - page_offset); - to_buff = kmap_local_page(page); - ret = copy_from_user(to_buff + page_offset, buf, page_len); - kunmap_local(to_buff); - if (ret) { - done = -EFAULT; - goto out_unlock; - } - *pos += page_len; - len -= page_len; - done += page_len; - buf += page_len; - vhca_buf->length += page_len; - } out_unlock: + if (ret) + migf->state = MLX5_MIGF_STATE_ERROR; mutex_unlock(&migf->lock); - return done; + mlx5vf_state_mutex_unlock(migf->mvdev); + return ret ? ret : done; } static const struct file_operations mlx5vf_resume_fops = { @@ -623,12 +799,29 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) } migf->buf = buf; + if (MLX5VF_PRE_COPY_SUPP(mvdev)) { + buf = mlx5vf_alloc_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_buf; + } + + migf->buf_header = buf; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + } else { + /* Initial state will be to read the image */ + migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; + } + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->avail_list); spin_lock_init(&migf->list_lock); return migf; +out_buf: + mlx5vf_free_data_buffer(buf); out_pd: mlx5vf_cmd_dealloc_pd(migf); out_free: @@ -728,11 +921,13 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf, - mvdev->resuming_migf->buf); - if (ret) - return ERR_PTR(ret); + if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { + ret = mlx5vf_cmd_load_vhca_state(mvdev, + mvdev->resuming_migf, + mvdev->resuming_migf->buf); + if (ret) + return ERR_PTR(ret); + } mlx5vf_disable_fds(mvdev); return NULL; } From d6e18a4bec431c181a60d32876c6c89955b2a4f8 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 10:34:37 +0200 Subject: [PATCH 3450/4122] vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error Before a SAVE command is issued, a QUERY command is issued in order to know the device data size. In case PRE_COPY is used, the above commands are issued while the device is running. Thus, it is possible that between the QUERY and the SAVE commands the state of the device will be changed significantly and thus the SAVE will fail. Currently, if a SAVE command is failing, the driver will fail the migration. In the above case, don't fail the migration, but don't allow for new SAVEs to be executed while the device is in a RUNNING state. Once the device will be moved to STOP_COPY, SAVE can be executed again and the full device state will be read. Signed-off-by: Shay Drory Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-14-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 27 ++++++++++++++++++++++++++- drivers/vfio/pci/mlx5/cmd.h | 2 ++ drivers/vfio/pci/mlx5/main.c | 6 ++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 993749818d90..01ef695e9441 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -84,6 +84,19 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); if (ret) return ret; + if (mvdev->saving_migf->state == + MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* + * In case we had a PRE_COPY error, only query full + * image for final image + */ + if (!(query_flags & MLX5VF_QUERY_FINAL)) { + *state_size = 0; + complete(&mvdev->saving_migf->save_comp); + return 0; + } + query_flags &= ~MLX5VF_QUERY_INC; + } } MLX5_SET(query_vhca_migration_state_in, in, opcode, @@ -442,7 +455,10 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mlx5vf_put_data_buffer(async_data->buf); if (async_data->header_buf) mlx5vf_put_data_buffer(async_data->header_buf); - migf->state = MLX5_MIGF_STATE_ERROR; + if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) + migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; + else + migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } mutex_unlock(&migf->lock); @@ -511,6 +527,8 @@ err: * The error and the cleanup flows can't run from an * interrupt context */ + if (status == -EREMOTEIO) + status = MLX5_GET(save_vhca_state_out, async_data->out, status); async_data->status = status; queue_work(migf->mvdev->cb_wq, &async_data->work); } @@ -534,6 +552,13 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) + /* + * In case we had a PRE_COPY error, SAVE is triggered only for + * the final image, read device full image. + */ + inc = false; + MLX5_SET(save_vhca_state_in, in, opcode, MLX5_CMD_OP_SAVE_VHCA_STATE); MLX5_SET(save_vhca_state_in, in, op_mod, 0); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 7729eac8c78c..5483171d57ad 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -17,6 +17,7 @@ enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, + MLX5_MIGF_STATE_PRE_COPY_ERROR, MLX5_MIGF_STATE_PRE_COPY, MLX5_MIGF_STATE_SAVE_LAST, MLX5_MIGF_STATE_COMPLETE, @@ -157,6 +158,7 @@ struct mlx5vf_pci_core_device { enum { MLX5VF_QUERY_INC = (1UL << 0), + MLX5VF_QUERY_FINAL = (1UL << 1), }; int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 5a669b73994a..cd90eb86128c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -219,6 +219,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (wait_event_interruptible(migf->poll_wait, !list_empty(&migf->buf_list) || migf->state == MLX5_MIGF_STATE_ERROR || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR || migf->state == MLX5_MIGF_STATE_PRE_COPY || migf->state == MLX5_MIGF_STATE_COMPLETE)) return -ERESTARTSYS; @@ -238,7 +239,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (first_loop_call) { first_loop_call = false; /* Temporary end of file as part of PRE_COPY */ - if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) { + if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) { done = -ENOMSG; goto out_unlock; } @@ -431,7 +433,7 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) return -ENODEV; ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, - MLX5VF_QUERY_INC); + MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL); if (ret) goto err; From ccc2a52e464d1c983efede1a6d44728c151cb2ed Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 10:34:38 +0200 Subject: [PATCH 3451/4122] vfio/mlx5: Enable MIGRATION_PRE_COPY flag Now that everything has been set up for MIGRATION_PRE_COPY, enable it. Signed-off-by: Shay Drory Reviewed-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-15-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 01ef695e9441..64e68d13cb98 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -222,6 +222,11 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; + if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) + mvdev->core_device.vdev.migration_flags |= + VFIO_MIGRATION_PRE_COPY; + end: mlx5_vf_put_core_dev(mvdev->mdev); } From 64ffbbb1e948876dd9044c32a3ab8a790662b9bb Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 23 Nov 2022 11:32:33 +0000 Subject: [PATCH 3452/4122] hisi_acc_vfio_pci: Add support for precopy IOCTL PRECOPY IOCTL in the case of HiSiIicon ACC driver can be used to perform the device compatibility check earlier during migration. Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20221123113236.896-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 52 +++++++++++++++++++ .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 1 + 2 files changed, 53 insertions(+) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 0c0c0c7f0521..f3b74a06edb6 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -764,9 +764,58 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + migf->hisi_acc_vdev = hisi_acc_vdev; return migf; } +static long hisi_acc_vf_precopy_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct hisi_acc_vf_migration_file *migf = filp->private_data; + struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev; + loff_t *pos = &filp->f_pos; + struct vfio_precopy_info info; + unsigned long minsz; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&hisi_acc_vdev->state_mutex); + if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) { + mutex_unlock(&hisi_acc_vdev->state_mutex); + return -EINVAL; + } + + mutex_lock(&migf->lock); + + if (migf->disabled) { + ret = -ENODEV; + goto out; + } + + if (*pos > migf->total_length) { + ret = -EINVAL; + goto out; + } + + info.dirty_bytes = 0; + info.initial_bytes = migf->total_length - *pos; + + ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +out: + mutex_unlock(&migf->lock); + mutex_unlock(&hisi_acc_vdev->state_mutex); + return ret; +} + static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { @@ -807,6 +856,8 @@ out_unlock: static const struct file_operations hisi_acc_vf_save_fops = { .owner = THIS_MODULE, .read = hisi_acc_vf_save_read, + .unlocked_ioctl = hisi_acc_vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = hisi_acc_vf_release_file, .llseek = no_llseek, }; @@ -832,6 +883,7 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + migf->hisi_acc_vdev = hisi_acc_vdev; ret = vf_qm_state_save(hisi_acc_vdev, migf); if (ret) { diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 67343325b320..11d51345f5b5 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -91,6 +91,7 @@ struct hisi_acc_vf_migration_file { struct mutex lock; bool disabled; + struct hisi_acc_vf_core_device *hisi_acc_vdev; struct acc_vf_data vf_data; size_t total_length; }; From d9a871e4a143047d1d84a606772af319f11516f9 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 23 Nov 2022 11:32:34 +0000 Subject: [PATCH 3453/4122] hisi_acc_vfio_pci: Introduce support for PRE_COPY state transitions The saving_migf is open in PRE_COPY state if it is supported and reads initial device match data. hisi_acc_vf_stop_copy() is refactored to make use of common code. Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20221123113236.896-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 74 ++++++++++++++++++- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index f3b74a06edb6..c8658636a84c 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -863,7 +863,7 @@ static const struct file_operations hisi_acc_vf_save_fops = { }; static struct hisi_acc_vf_migration_file * -hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) +hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct hisi_acc_vf_migration_file *migf; int ret; @@ -885,7 +885,7 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) mutex_init(&migf->lock); migf->hisi_acc_vdev = hisi_acc_vdev; - ret = vf_qm_state_save(hisi_acc_vdev, migf); + ret = vf_qm_get_match_data(hisi_acc_vdev, &migf->vf_data); if (ret) { fput(migf->filp); return ERR_PTR(ret); @@ -894,6 +894,44 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) return migf; } +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_pre_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_open_saving_migf(hisi_acc_vdev); + if (IS_ERR(migf)) + return migf; + + migf->total_length = QM_MATCH_SIZE; + return migf; +} + +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, bool open) +{ + int ret; + struct hisi_acc_vf_migration_file *migf = NULL; + + if (open) { + /* + * Userspace didn't use PRECOPY support. Hence saving_migf + * is not opened yet. + */ + migf = hisi_acc_open_saving_migf(hisi_acc_vdev); + if (IS_ERR(migf)) + return migf; + } else { + migf = hisi_acc_vdev->saving_migf; + } + + ret = vf_qm_state_save(hisi_acc_vdev, migf); + if (ret) + return ERR_PTR(ret); + + return open ? migf : NULL; +} + static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct device *dev = &hisi_acc_vdev->vf_dev->dev; @@ -921,6 +959,31 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, u32 cur = hisi_acc_vdev->mig_state; int ret; + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) { + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_vf_pre_copy(hisi_acc_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + hisi_acc_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct hisi_acc_vf_migration_file *migf; + + ret = hisi_acc_vf_stop_device(hisi_acc_vdev); + if (ret) + return ERR_PTR(ret); + + migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, false); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + return NULL; + } + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) { ret = hisi_acc_vf_stop_device(hisi_acc_vdev); if (ret) @@ -931,7 +994,7 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { struct hisi_acc_vf_migration_file *migf; - migf = hisi_acc_vf_stop_copy(hisi_acc_vdev); + migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, true); if (IS_ERR(migf)) return ERR_CAST(migf); get_file(migf->filp); @@ -963,6 +1026,11 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, return NULL; } + if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) { + hisi_acc_vf_disable_fds(hisi_acc_vdev); + return NULL; + } + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) { hisi_acc_vf_start_device(hisi_acc_vdev); return NULL; From 190125adcad4c5850fd74ecd697e20a446b74ed8 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 23 Nov 2022 11:32:35 +0000 Subject: [PATCH 3454/4122] hisi_acc_vfio_pci: Move the dev compatibility tests for early check Instead of waiting till data transfer is complete to perform dev compatibility, do it as soon as we have enough data to perform the check. This will be useful when we enable the support for PRE_COPY. Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20221123113236.896-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 19 +++++++------------ .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 1 + 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index c8658636a84c..9a51f41e1d2a 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -360,8 +360,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, u32 que_iso_state; int ret; - if (migf->total_length < QM_MATCH_SIZE) - return -EINVAL; + if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done) + return 0; if (vf_data->acc_magic != ACC_DEV_MAGIC) { dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); @@ -406,6 +406,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, } hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + hisi_acc_vdev->match_done = true; return 0; } @@ -493,10 +494,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct device *dev = &vf_qm->pdev->dev; int ret; - ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data); - if (ret) - return ret; - if (unlikely(qm_wait_dev_not_ready(vf_qm))) { /* Update state and return with match data */ vf_data->vf_qm_state = QM_NOT_READY; @@ -673,12 +670,6 @@ static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev) struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf; int ret; - /* Check dev compatibility */ - ret = vf_qm_check_match(hisi_acc_vdev, migf); - if (ret) { - dev_err(dev, "failed to match the VF!\n"); - return ret; - } /* Recover data to VF */ ret = vf_qm_load_data(hisi_acc_vdev, migf); if (ret) { @@ -732,6 +723,10 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu *pos += len; done = len; migf->total_length += len; + + ret = vf_qm_check_match(migf->hisi_acc_vdev, migf); + if (ret) + done = -EFAULT; out_unlock: mutex_unlock(&migf->lock); return done; diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 11d51345f5b5..dcabfeec6ca1 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -98,6 +98,7 @@ struct hisi_acc_vf_migration_file { struct hisi_acc_vf_core_device { struct vfio_pci_core_device core_device; + u8 match_done:1; u8 deferred_reset:1; /* For migration state */ struct mutex state_mutex; From f2240b4441cc5bd87820371ee79ad7c9125e76b6 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 23 Nov 2022 11:32:36 +0000 Subject: [PATCH 3455/4122] hisi_acc_vfio_pci: Enable PRE_COPY flag Now that we have everything to support the PRE_COPY state, enable it. Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20221123113236.896-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 9a51f41e1d2a..51941bb4f31f 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1351,7 +1351,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) hisi_acc_vdev->vf_dev = pdev; mutex_init(&hisi_acc_vdev->state_mutex); - core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY; + core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY; core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops; return vfio_pci_core_init_dev(core_vdev); From 533aae7c94dbc2b14301cfd68ae7e0e90f0c8438 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 5 Dec 2022 13:39:02 +0100 Subject: [PATCH 3456/4122] gpiolib: cdev: fix NULL-pointer dereferences There are several places where we can crash the kernel by requesting lines, unbinding the GPIO device, then calling any of the system calls relevant to the GPIO character device's annonymous file descriptors: ioctl(), read(), poll(). While I observed it with the GPIO simulator, it will also happen for any of the GPIO devices that can be hot-unplugged - for instance any HID GPIO expander (e.g. CP2112). This affects both v1 and v2 uAPI. This fixes it partially by checking if gdev->chip is not NULL but it doesn't entirely remedy the situation as we still have a race condition in which another thread can remove the device after the check. Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL") Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL") Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL") Fixes: 7b8e00d98168 ("gpiolib: cdev: support GPIO_V2_LINE_SET_VALUES_IOCTL") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib-cdev.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 08606f32372c..ac10c9494bf0 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -201,6 +201,9 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, unsigned int i; int ret; + if (!lh->gdev->chip) + return -ENODEV; + switch (cmd) { case GPIOHANDLE_GET_LINE_VALUES_IOCTL: /* NOTE: It's okay to read values of output lines */ @@ -1384,6 +1387,9 @@ static long linereq_ioctl(struct file *file, unsigned int cmd, struct linereq *lr = file->private_data; void __user *ip = (void __user *)arg; + if (!lr->gdev->chip) + return -ENODEV; + switch (cmd) { case GPIO_V2_LINE_GET_VALUES_IOCTL: return linereq_get_values(lr, ip); @@ -1410,6 +1416,9 @@ static __poll_t linereq_poll(struct file *file, struct linereq *lr = file->private_data; __poll_t events = 0; + if (!lr->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &lr->wait, wait); if (!kfifo_is_empty_spinlocked_noirqsave(&lr->events, @@ -1429,6 +1438,9 @@ static ssize_t linereq_read(struct file *file, ssize_t bytes_read = 0; int ret; + if (!lr->gdev->chip) + return -ENODEV; + if (count < sizeof(le)) return -EINVAL; @@ -1716,6 +1728,9 @@ static __poll_t lineevent_poll(struct file *file, struct lineevent_state *le = file->private_data; __poll_t events = 0; + if (!le->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &le->wait, wait); if (!kfifo_is_empty_spinlocked_noirqsave(&le->events, &le->wait.lock)) @@ -1740,6 +1755,9 @@ static ssize_t lineevent_read(struct file *file, ssize_t ge_size; int ret; + if (!le->gdev->chip) + return -ENODEV; + /* * When compatible system call is being used the struct gpioevent_data, * in case of at least ia32, has different size due to the alignment @@ -1821,6 +1839,9 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd, void __user *ip = (void __user *)arg; struct gpiohandle_data ghd; + if (!le->gdev->chip) + return -ENODEV; + /* * We can get the value for an event line but not set it, * because it is input by definition. @@ -2407,6 +2428,9 @@ static __poll_t lineinfo_watch_poll(struct file *file, struct gpio_chardev_data *cdev = file->private_data; __poll_t events = 0; + if (!cdev->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &cdev->wait, pollt); if (!kfifo_is_empty_spinlocked_noirqsave(&cdev->events, @@ -2425,6 +2449,9 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, int ret; size_t event_size; + if (!cdev->gdev->chip) + return -ENODEV; + #ifndef CONFIG_GPIO_CDEV_V1 event_size = sizeof(struct gpio_v2_line_info_changed); if (count < event_size) From bdbbae241a04f387ba910b8609f95fad5f1470c7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 5 Dec 2022 13:39:03 +0100 Subject: [PATCH 3457/4122] gpiolib: protect the GPIO device against being dropped while in use by user-space While any of the GPIO cdev syscalls is in progress, the kernel can call gpiochip_remove() (for instance, when a USB GPIO expander is disconnected) which will set gdev->chip to NULL after which any subsequent access will cause a crash. To avoid that: use an RW-semaphore in which the syscalls take it for reading (so that we don't needlessly prohibit the user-space from calling syscalls simultaneously) while gpiochip_remove() takes it for writing so that it can only happen once all syscalls return. Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL") Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL") Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL") Fixes: 7b8e00d98168 ("gpiolib: cdev: support GPIO_V2_LINE_SET_VALUES_IOCTL") Signed-off-by: Bartosz Golaszewski [Nick: fixed a build failure with CDEV_V1 disabled] Co-authored-by: Nick Hainke Reviewed-by: Kent Gibson Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib-cdev.c | 177 +++++++++++++++++++++++++++++++----- drivers/gpio/gpiolib.c | 4 + drivers/gpio/gpiolib.h | 5 + 3 files changed, 161 insertions(+), 25 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index ac10c9494bf0..f7289c2f3a3c 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -55,6 +55,50 @@ static_assert(IS_ALIGNED(sizeof(struct gpio_v2_line_values), 8)); * interface to gpiolib GPIOs via ioctl()s. */ +typedef __poll_t (*poll_fn)(struct file *, struct poll_table_struct *); +typedef long (*ioctl_fn)(struct file *, unsigned int, unsigned long); +typedef ssize_t (*read_fn)(struct file *, char __user *, + size_t count, loff_t *); + +static __poll_t call_poll_locked(struct file *file, + struct poll_table_struct *wait, + struct gpio_device *gdev, poll_fn func) +{ + __poll_t ret; + + down_read(&gdev->sem); + ret = func(file, wait); + up_read(&gdev->sem); + + return ret; +} + +static long call_ioctl_locked(struct file *file, unsigned int cmd, + unsigned long arg, struct gpio_device *gdev, + ioctl_fn func) +{ + long ret; + + down_read(&gdev->sem); + ret = func(file, cmd, arg); + up_read(&gdev->sem); + + return ret; +} + +static ssize_t call_read_locked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps, + struct gpio_device *gdev, read_fn func) +{ + ssize_t ret; + + down_read(&gdev->sem); + ret = func(file, buf, count, f_ps); + up_read(&gdev->sem); + + return ret; +} + /* * GPIO line handle management */ @@ -191,8 +235,8 @@ static long linehandle_set_config(struct linehandle_state *lh, return 0; } -static long linehandle_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long linehandle_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct linehandle_state *lh = file->private_data; void __user *ip = (void __user *)arg; @@ -250,6 +294,15 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, } } +static long linehandle_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct linehandle_state *lh = file->private_data; + + return call_ioctl_locked(file, cmd, arg, lh->gdev, + linehandle_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long linehandle_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -1381,8 +1434,8 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) return ret; } -static long linereq_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long linereq_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct linereq *lr = file->private_data; void __user *ip = (void __user *)arg; @@ -1402,6 +1455,15 @@ static long linereq_ioctl(struct file *file, unsigned int cmd, } } +static long linereq_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct linereq *lr = file->private_data; + + return call_ioctl_locked(file, cmd, arg, lr->gdev, + linereq_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long linereq_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -1410,8 +1472,8 @@ static long linereq_ioctl_compat(struct file *file, unsigned int cmd, } #endif -static __poll_t linereq_poll(struct file *file, - struct poll_table_struct *wait) +static __poll_t linereq_poll_unlocked(struct file *file, + struct poll_table_struct *wait) { struct linereq *lr = file->private_data; __poll_t events = 0; @@ -1428,10 +1490,16 @@ static __poll_t linereq_poll(struct file *file, return events; } -static ssize_t linereq_read(struct file *file, - char __user *buf, - size_t count, - loff_t *f_ps) +static __poll_t linereq_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct linereq *lr = file->private_data; + + return call_poll_locked(file, wait, lr->gdev, linereq_poll_unlocked); +} + +static ssize_t linereq_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) { struct linereq *lr = file->private_data; struct gpio_v2_line_event le; @@ -1485,6 +1553,15 @@ static ssize_t linereq_read(struct file *file, return bytes_read; } +static ssize_t linereq_read(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) +{ + struct linereq *lr = file->private_data; + + return call_read_locked(file, buf, count, f_ps, lr->gdev, + linereq_read_unlocked); +} + static void linereq_free(struct linereq *lr) { unsigned int i; @@ -1722,8 +1799,8 @@ struct lineevent_state { (GPIOEVENT_REQUEST_RISING_EDGE | \ GPIOEVENT_REQUEST_FALLING_EDGE) -static __poll_t lineevent_poll(struct file *file, - struct poll_table_struct *wait) +static __poll_t lineevent_poll_unlocked(struct file *file, + struct poll_table_struct *wait) { struct lineevent_state *le = file->private_data; __poll_t events = 0; @@ -1739,15 +1816,21 @@ static __poll_t lineevent_poll(struct file *file, return events; } +static __poll_t lineevent_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct lineevent_state *le = file->private_data; + + return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked); +} + struct compat_gpioeevent_data { compat_u64 timestamp; u32 id; }; -static ssize_t lineevent_read(struct file *file, - char __user *buf, - size_t count, - loff_t *f_ps) +static ssize_t lineevent_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) { struct lineevent_state *le = file->private_data; struct gpioevent_data ge; @@ -1815,6 +1898,15 @@ static ssize_t lineevent_read(struct file *file, return bytes_read; } +static ssize_t lineevent_read(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) +{ + struct lineevent_state *le = file->private_data; + + return call_read_locked(file, buf, count, f_ps, le->gdev, + lineevent_read_unlocked); +} + static void lineevent_free(struct lineevent_state *le) { if (le->irq) @@ -1832,8 +1924,8 @@ static int lineevent_release(struct inode *inode, struct file *file) return 0; } -static long lineevent_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long lineevent_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct lineevent_state *le = file->private_data; void __user *ip = (void __user *)arg; @@ -1864,6 +1956,15 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } +static long lineevent_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct lineevent_state *le = file->private_data; + + return call_ioctl_locked(file, cmd, arg, le->gdev, + lineevent_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long lineevent_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -2422,8 +2523,8 @@ static int lineinfo_changed_notify(struct notifier_block *nb, return NOTIFY_OK; } -static __poll_t lineinfo_watch_poll(struct file *file, - struct poll_table_struct *pollt) +static __poll_t lineinfo_watch_poll_unlocked(struct file *file, + struct poll_table_struct *pollt) { struct gpio_chardev_data *cdev = file->private_data; __poll_t events = 0; @@ -2440,8 +2541,17 @@ static __poll_t lineinfo_watch_poll(struct file *file, return events; } -static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, - size_t count, loff_t *off) +static __poll_t lineinfo_watch_poll(struct file *file, + struct poll_table_struct *pollt) +{ + struct gpio_chardev_data *cdev = file->private_data; + + return call_poll_locked(file, pollt, cdev->gdev, + lineinfo_watch_poll_unlocked); +} + +static ssize_t lineinfo_watch_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *off) { struct gpio_chardev_data *cdev = file->private_data; struct gpio_v2_line_info_changed event; @@ -2519,6 +2629,15 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, return bytes_read; } +static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, + size_t count, loff_t *off) +{ + struct gpio_chardev_data *cdev = file->private_data; + + return call_read_locked(file, buf, count, off, cdev->gdev, + lineinfo_watch_read_unlocked); +} + /** * gpio_chrdev_open() - open the chardev for ioctl operations * @inode: inode for this chardev @@ -2532,13 +2651,17 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) struct gpio_chardev_data *cdev; int ret = -ENOMEM; + down_read(&gdev->sem); + /* Fail on open if the backing gpiochip is gone */ - if (!gdev->chip) - return -ENODEV; + if (!gdev->chip) { + ret = -ENODEV; + goto out_unlock; + } cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) - return -ENOMEM; + goto out_unlock; cdev->watched_lines = bitmap_zalloc(gdev->chip->ngpio, GFP_KERNEL); if (!cdev->watched_lines) @@ -2561,6 +2684,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) if (ret) goto out_unregister_notifier; + up_read(&gdev->sem); + return ret; out_unregister_notifier: @@ -2570,6 +2695,8 @@ out_free_bitmap: bitmap_free(cdev->watched_lines); out_free_cdev: kfree(cdev); +out_unlock: + up_read(&gdev->sem); return ret; } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0058ee83989d..b8e17a4e38c5 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -790,6 +790,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, spin_unlock_irqrestore(&gpio_lock, flags); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier); + init_rwsem(&gdev->sem); #ifdef CONFIG_PINCTRL INIT_LIST_HEAD(&gdev->pin_ranges); @@ -924,6 +925,8 @@ void gpiochip_remove(struct gpio_chip *gc) unsigned long flags; unsigned int i; + down_write(&gdev->sem); + /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ gpiochip_sysfs_unregister(gdev); gpiochip_free_hogs(gc); @@ -958,6 +961,7 @@ void gpiochip_remove(struct gpio_chip *gc) * gone. */ gcdev_unregister(gdev); + up_write(&gdev->sem); put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpiochip_remove); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index e443c1023a37..b3c2db6eba80 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -15,6 +15,7 @@ #include #include #include +#include #define GPIOCHIP_NAME "gpiochip" @@ -39,6 +40,9 @@ * @list: links gpio_device:s together for traversal * @notifier: used to notify subscribers about lines being requested, released * or reconfigured + * @sem: protects the structure from a NULL-pointer dereference of @chip by + * user-space operations when the device gets unregistered during + * a hot-unplug event * @pin_ranges: range of pins served by the GPIO driver * * This state container holds most of the runtime variable data @@ -60,6 +64,7 @@ struct gpio_device { void *data; struct list_head list; struct blocking_notifier_head notifier; + struct rw_semaphore sem; #ifdef CONFIG_PINCTRL /* From d074f0aebde5649f7a9f1807551efc019b8e81c4 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Wed, 7 Dec 2022 16:32:18 +0800 Subject: [PATCH 3458/4122] RDMA/hfi1: use sysfs_emit() to instead of scnprintf() Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/202212071632188074249@zte.com.cn Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8e71bef9d982..bcc6bc0540f0 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -112,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp) cap_mask &= ~HFI1_CAP_LOCKED_SMASK; cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT); - return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask); + return sysfs_emit(buffer, "0x%lx\n", cap_mask); } struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) From e13d23a404f2e6dfaf8b1ef7d161a0836fce4fa5 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 10 Nov 2022 19:06:18 +0100 Subject: [PATCH 3459/4122] powerpc: export the CPU node count At boot time, the FDT is parsed to compute the number of CPUs. In addition count the number of CPU nodes and export it. This is useful when building the FDT for a kexeced kernel since we need to take in account the CPU node added since the boot time during CPU hotplug operations. Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221110180619.15796-2-ldufour@linux.ibm.com --- arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/prom.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 2e82820fbd64..c0107d8ddd8c 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -85,6 +85,7 @@ struct of_drc_info { extern int of_read_drc_info_cell(struct property **prop, const __be32 **curval, struct of_drc_info *data); +extern unsigned int boot_cpu_node_count; /* * There are two methods for telling firmware what our capabilities are. diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1eed87d954ba..645f4450dfc3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -72,6 +72,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; +unsigned int boot_cpu_node_count __ro_after_init; #endif static phys_addr_t first_memblock_size; static int __initdata boot_cpu_count; @@ -335,6 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + boot_cpu_node_count++; + /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); if (!intserv) From 340a4a9f8773e102cc5ef531665970a686dfa245 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 10 Nov 2022 19:06:19 +0100 Subject: [PATCH 3460/4122] powerpc: Take in account addition CPU node when building kexec FDT On a system with a large number of CPUs, the creation of the FDT for a kexec kernel may fail because the allocated FDT is not large enough. When this happens, such a message is displayed on the console: Unable to add ibm,processor-vadd-size property: FDT_ERR_NOSPACE The property's name may change depending when the buffer overwrite is detected. Obviously the created FDT is missing information, and it is expected that system dump or kexec kernel failed to run properly. When the FDT is allocated, the size of the FDT the kernel received at boot time is used and an extra size can be applied. Currently, only memory added after boot time is taken in account, not the CPU nodes. The extra size should take in account these additional CPU nodes and compute the required extra space. To achieve that, the size of a CPU node, including its subnode is computed once and multiplied by the number of additional CPU nodes. The assumption is that the size of the CPU node is _same_ for all the node, the only variable part should be the name "PowerPC,POWERxx@##" where "##" may vary a little. Signed-off-by: Laurent Dufour [mpe: Don't shadow function name w/variable, minor coding style changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221110180619.15796-3-ldufour@linux.ibm.com --- arch/powerpc/kexec/file_load_64.c | 59 ++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 349a781cea0b..2500c37c628c 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -26,6 +26,7 @@ #include #include #include +#include struct umem_info { u64 *buf; /* data buffer for usable-memory property */ @@ -928,6 +929,45 @@ out: return ret; } +/** + * get_cpu_node_size - Compute the size of a CPU node in the FDT. + * This should be done only once and the value is stored in + * a static variable. + * Returns the max size of a CPU node in the FDT. + */ +static unsigned int cpu_node_size(void) +{ + static unsigned int size; + struct device_node *dn; + struct property *pp; + + /* + * Don't compute it twice, we are assuming that the per CPU node size + * doesn't change during the system's life. + */ + if (size) + return size; + + dn = of_find_node_by_type(NULL, "cpu"); + if (WARN_ON_ONCE(!dn)) { + // Unlikely to happen + return 0; + } + + /* + * We compute the sub node size for a CPU node, assuming it + * will be the same for all. + */ + size += strlen(dn->name) + 5; + for_each_property_of_node(dn, pp) { + size += strlen(pp->name); + size += pp->length; + } + + of_node_put(dn); + return size; +} + /** * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to * setup FDT for kexec/kdump kernel. @@ -937,6 +977,8 @@ out: */ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) { + unsigned int cpu_nodes, extra_size; + struct device_node *dn; u64 usm_entries; if (image->type != KEXEC_TYPE_CRASH) @@ -949,7 +991,22 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) */ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); - return (unsigned int)(usm_entries * sizeof(u64)); + + extra_size = (unsigned int)(usm_entries * sizeof(u64)); + + /* + * Get the number of CPU nodes in the current DT. This allows to + * reserve places for CPU nodes added since the boot time. + */ + cpu_nodes = 0; + for_each_node_by_type(dn, "cpu") { + cpu_nodes++; + } + + if (cpu_nodes > boot_cpu_node_count) + extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); + + return extra_size; } /** From 9b574cfab7d4e68c67c4ee4fcde912ef54a25b88 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Nov 2022 18:32:04 +0100 Subject: [PATCH 3461/4122] powerpc/pseries: reset the RCU watchdogs after a LPM The RCU watchdog timer should be reset when restarting the CPU after a Live Partition Mobility operation. Signed-off-by: Laurent Dufour Acked-by: Nicholas Piggin [mpe: Combine comments into a single comment block] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221125173204.15329-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 634fac5db3f9..4cea71aa0f41 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -635,10 +635,13 @@ retry: prod_others(); } /* - * Execution may have been suspended for several seconds, so - * reset the watchdog. + * Execution may have been suspended for several seconds, so reset + * the watchdogs. touch_nmi_watchdog() also touches the soft lockup + * watchdog. */ + rcu_cpu_stall_reset(); touch_nmi_watchdog(); + return ret; } From f6aa37c51ec0d053ee34c235bfe0e666618a3baf Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 14 Nov 2022 17:01:50 +0100 Subject: [PATCH 3462/4122] powerpc/pseries: unregister VPA when hot unplugging a CPU The VPA should unregister when offlining a CPU. Otherwise there could be a short window where 2 CPUs could share the same VPA. This happens because the hypervisor is still keeping the VPA attached to the vCPU even if it became offline. Here is a potential situation: 1. remove proc A, 2. add proc B. If proc B gets proc A's place in cpu_present_mask, then it registers proc A's VPAs. 3. If proc B is then re-added to the LP, its threads are sharing VPAs with proc A briefly as they come online. As the hypervisor may check for the VPA's yield_count field oddity, it may detect an unexpected value and kill the LPAR. Suggested-by: Nathan Lynch Signed-off-by: Laurent Dufour Reviewed-by: Nathan Lynch [mpe: s/cpu_present_map/cpu_present_mask/ in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114160150.13554-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index e0a7ac5db15d..090ae5a1e0f5 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -70,6 +70,7 @@ static void pseries_cpu_offline_self(void) xics_teardown_cpu(); unregister_slb_shadow(hwcpu); + unregister_vpa(hwcpu); rtas_stop_self(); /* Should never get here... */ From 336e2554ec99eb97616004c791ee89abe96bdab2 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:39 -0600 Subject: [PATCH 3463/4122] powerpc/rtas: document rtas_call() rtas_call() has a complex calling convention, non-standard return values, and many users. Add kernel-doc for it and remove the less structured commentary from rtas.h. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-2-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 15 --------- arch/powerpc/kernel/rtas.c | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 56319aea646e..479a95cb2770 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -33,21 +33,6 @@ #define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ -/* - * In general to call RTAS use rtas_token("string") to lookup - * an RTAS token for the given string (e.g. "event-scan"). - * To actually perform the call use - * ret = rtas_call(token, n_in, n_out, ...) - * Where n_in is the number of input parameters and - * n_out is the number of output parameters - * - * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE - * will be returned as a token. rtas_call() does look for this - * token and error out gracefully so rtas_call(rtas_token("str"), ...) - * may be safely used for one-shot calls to RTAS. - * - */ - /* RTAS event classes */ #define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index e847f9b1c5b9..c12dd5ed5e00 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -467,6 +467,64 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, static int ibm_open_errinjct_token; static int ibm_errinjct_token; +/** + * rtas_call() - Invoke an RTAS firmware function. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @outputs: Array of @nret output words. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_token(). + * + * The @nargs and @nret arguments must match the number of input and + * output parameters specified for the RTAS function. + * + * rtas_call() returns RTAS status codes, not conventional Linux errno + * values. Callers must translate any failure to an appropriate errno + * in syscall context. Most callers of RTAS functions that can return + * -2 or 990x should use rtas_busy_delay() to correctly handle those + * statuses before calling again. + * + * The return value descriptions are adapted from 7.2.8 [RTAS] Return + * Codes of the PAPR and CHRP specifications. + * + * Context: Process context preferably, interrupt context if + * necessary. Acquires an internal spinlock and may perform + * GFP_ATOMIC slab allocation in error path. Unsafe for NMI + * context. + * Return: + * * 0 - RTAS function call succeeded. + * * -1 - RTAS function encountered a hardware or + * platform error, or the token is invalid, + * or the function is restricted by kernel policy. + * * -2 - Specs say "A necessary hardware device was busy, + * and the requested function could not be + * performed. The operation should be retried at + * a later time." This is misleading, at least with + * respect to current RTAS implementations. What it + * usually means in practice is that the function + * could not be completed while meeting RTAS's + * deadline for returning control to the OS (250us + * for PAPR/PowerVM, typically), but the call may be + * immediately reattempted to resume work on it. + * * -3 - Parameter error. + * * -7 - Unexpected state change. + * * 9000...9899 - Vendor-specific success codes. + * * 9900...9905 - Advisory extended delay. Caller should try + * again after ~10^x ms has elapsed, where x is + * the last digit of the status [0-5]. Again going + * beyond the PAPR text, 990x on PowerVM indicates + * contention for RTAS-internal resources. Other + * RTAS call sequences in progress should be + * allowed to complete before reattempting the + * call. + * * -9000 - Multi-level isolation error. + * * -9999...-9004 - Vendor-specific error codes. + * * Additional negative values - Function-specific error. + * * Additional positive values - Function-specific success. + */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; From b10af504a2015d12c566b6b0a4c7e3b602949eeb Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:40 -0600 Subject: [PATCH 3464/4122] powerpc/rtasd: use correct OF API for event scan rate rtas_token() should be used only for properties that are RTAS function tokens. "rtas-event-scan-rate" does not contain a function token, but it has the same size/format as token properties so reading it with rtas_token() happens to work. Convert to of_property_read_u32(). Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-3-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtasd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 5270b450bbde..cc56ac6ba4b0 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -499,6 +500,8 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); static int __init rtas_event_scan_init(void) { + int err; + if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -509,8 +512,8 @@ static int __init rtas_event_scan_init(void) return -ENODEV; } - rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); - if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate); + if (err) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); return -ENODEV; } From ed2213bfb192ab51f09f12e9b49b5d482c6493f3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:41 -0600 Subject: [PATCH 3465/4122] powerpc/rtas: avoid device tree lookups in rtas_os_term() rtas_os_term() is called during panic. Its behavior depends on a couple of conditions in the /rtas node of the device tree, the traversal of which entails locking and local IRQ state changes. If the kernel panics while devtree_lock is held, rtas_os_term() as currently written could hang. Instead of discovering the relevant characteristics at panic time, cache them in file-static variables at boot. Note the lookup for "ibm,extended-os-term" is converted to of_property_read_bool() since it is a boolean property, not an RTAS function token. Signed-off-by: Nathan Lynch Reviewed-by: Nicholas Piggin Reviewed-by: Andrew Donnellan [mpe: Incorporate suggested change from Nick] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-4-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c12dd5ed5e00..db43cbdcc74c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -947,6 +947,7 @@ void __noreturn rtas_halt(void) /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; +static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; void rtas_os_term(char *str) { @@ -958,14 +959,13 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || - RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) + if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); do { - status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, + status = rtas_call(ibm_os_term_token, 1, 1, NULL, __pa(rtas_os_term_buf)); } while (rtas_busy_delay(status)); @@ -1335,6 +1335,13 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + /* + * Discover these now to avoid device tree lookups in the + * panic path. + */ + if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) + ibm_os_term_token = rtas_token("ibm,os-term"); + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ From 6c606e57eecc37d6b36d732b1ff7e55b7dc32dd4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:42 -0600 Subject: [PATCH 3466/4122] powerpc/rtas: avoid scheduling in rtas_os_term() It's unsafe to use rtas_busy_delay() to handle a busy status from the ibm,os-term RTAS function in rtas_os_term(): Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b BUG: sleeping function called from invalid context at arch/powerpc/kernel/rtas.c:618 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 preempt_count: 2, expected: 0 CPU: 7 PID: 1 Comm: swapper/0 Tainted: G D 6.0.0-rc5-02182-gf8553a572277-dirty #9 Call Trace: [c000000007b8f000] [c000000001337110] dump_stack_lvl+0xb4/0x110 (unreliable) [c000000007b8f040] [c0000000002440e4] __might_resched+0x394/0x3c0 [c000000007b8f0e0] [c00000000004f680] rtas_busy_delay+0x120/0x1b0 [c000000007b8f100] [c000000000052d04] rtas_os_term+0xb8/0xf4 [c000000007b8f180] [c0000000001150fc] pseries_panic+0x50/0x68 [c000000007b8f1f0] [c000000000036354] ppc_panic_platform_handler+0x34/0x50 [c000000007b8f210] [c0000000002303c4] notifier_call_chain+0xd4/0x1c0 [c000000007b8f2b0] [c0000000002306cc] atomic_notifier_call_chain+0xac/0x1c0 [c000000007b8f2f0] [c0000000001d62b8] panic+0x228/0x4d0 [c000000007b8f390] [c0000000001e573c] do_exit+0x140c/0x1420 [c000000007b8f480] [c0000000001e586c] make_task_dead+0xdc/0x200 Use rtas_busy_delay_time() instead, which signals without side effects whether to attempt the ibm,os-term RTAS call again. Signed-off-by: Nathan Lynch Reviewed-by: Nicholas Piggin Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-5-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index db43cbdcc74c..f21b39fcaf99 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -964,10 +964,15 @@ void rtas_os_term(char *str) snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + /* + * Keep calling as long as RTAS returns a "try again" status, + * but don't use rtas_busy_delay(), which potentially + * schedules. + */ do { status = rtas_call(ibm_os_term_token, 1, 1, NULL, __pa(rtas_os_term_buf)); - } while (rtas_busy_delay(status)); + } while (rtas_busy_delay_time(status)); if (status != 0) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); From 9aafbfa5f57a4b75bafd3bed0191e8429c5fa618 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:43 -0600 Subject: [PATCH 3467/4122] powerpc/pseries/eeh: use correct API for error log size rtas-error-log-max is not the name of an RTAS function, so rtas_token() is not the appropriate API for retrieving its value. We already have rtas_get_error_log_max() which returns a sensible value if the property is absent for any reason, so use that instead. Fixes: 8d633291b4fc ("powerpc/eeh: pseries platform EEH error log retrieval") Signed-off-by: Nathan Lynch [mpe: Drop no-longer possible error handling as noticed by ajd] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-6-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ea890037843c..6b507b62ce8f 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -848,16 +848,7 @@ static int __init eeh_pseries_init(void) } /* Initialize error log size */ - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_info("%s: unknown EEH error log size\n", - __func__); - eeh_error_buf_size = 1024; - } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_info("%s: EEH error log size %d exceeds the maximal %d\n", - __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } + eeh_error_buf_size = rtas_get_error_log_max(); /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); From c67a0e411d0ffe0648fe84e25e9f899ce770feb3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:44 -0600 Subject: [PATCH 3468/4122] powerpc/rtas: clean up rtas_error_log_max initialization The code in rtas_get_error_log_max() doesn't cause problems in practice, but there are no measures to ensure that the lazy initialization of the static rtas_error_log_max variable is atomic, and it's not worth adding them. Initialize the static rtas_error_log_max variable at boot when we're single-threaded instead of lazily on first use. Use the more appropriate of_property_read_u32() API instead of rtas_token() to consult the "rtas-error-log-max" property, which is not the name of an RTAS function. Convert use of printk() to pr_warn() and distinguish the possible error cases. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-7-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f21b39fcaf99..a1b637259e3d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -353,6 +353,9 @@ int rtas_service_present(const char *service) EXPORT_SYMBOL(rtas_service_present); #ifdef CONFIG_RTAS_ERROR_LOGGING + +static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; + /* * Return the firmware-specified size of the error log buffer * for all rtas calls that require an error buffer argument. @@ -360,21 +363,30 @@ EXPORT_SYMBOL(rtas_service_present); */ int rtas_get_error_log_max(void) { - static int rtas_error_log_max; - if (rtas_error_log_max) - return rtas_error_log_max; - - rtas_error_log_max = rtas_token ("rtas-error-log-max"); - if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || - (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { - printk (KERN_WARNING "RTAS: bad log buffer size %d\n", - rtas_error_log_max); - rtas_error_log_max = RTAS_ERROR_LOG_MAX; - } return rtas_error_log_max; } EXPORT_SYMBOL(rtas_get_error_log_max); +static void __init init_error_log_max(void) +{ + static const char propname[] __initconst = "rtas-error-log-max"; + u32 max; + + if (of_property_read_u32(rtas.dev, propname, &max)) { + pr_warn("%s not found, using default of %u\n", + propname, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + if (max > RTAS_ERROR_LOG_MAX) { + pr_warn("%s = %u, clamping max error log size to %u\n", + propname, max, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + rtas_error_log_max = max; +} + static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; static int rtas_last_error_token; @@ -432,6 +444,7 @@ static char *__fetch_rtas_last_error(char *altbuf) #else /* CONFIG_RTAS_ERROR_LOGGING */ #define __fetch_rtas_last_error(x) NULL #define get_errorlog_buffer() NULL +static void __init init_error_log_max(void) {} #endif @@ -1340,6 +1353,8 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + init_error_log_max(); + /* * Discover these now to avoid device tree lookups in the * panic path. From 9581f8a00777a073fdd8146659a51ca007cae8d6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:45 -0600 Subject: [PATCH 3469/4122] powerpc/rtas: clean up includes rtas.c used to host complex code related to pseries-specific guest migration and suspend, which used atomics, completions, hcalls, and CPU hotplug APIs. That's all been deleted or moved, so remove the include directives that have been rendered unnecessary. Sort the remainder (with linux/ before asm/) to impose some order on where future additions go. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-8-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 50 +++++++++++++++----------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a1b637259e3d..5ce17b4bd7eb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,43 +7,33 @@ * Copyright (C) 2001 IBM. */ -#include -#include -#include -#include -#include -#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include #include #include - -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include + +#include +#include +#include +#include #include -#include +#include +#include +#include +#include /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); From f975b6559bac510f1b1b39637997bb240f0a9969 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:46 -0600 Subject: [PATCH 3470/4122] powerpc/rtas: define pr_fmt and convert printk call sites Set pr_fmt to "rtas: " and convert the handful of printk() uses in rtas.c, adjusting the messages to remove now-redundant "RTAS" strings. Note that rtas_restart(), rtas_power_off(), and rtas_halt() all currently use printk() without specifying a log level. These have been changed to use pr_emerg(), which matches the behavior of rtas_os_term(). Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-9-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 5ce17b4bd7eb..10c19228aaa3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,6 +7,8 @@ * Copyright (C) 2001 IBM. */ +#define pr_fmt(fmt) "rtas: " fmt + #include #include #include @@ -718,8 +720,7 @@ static int rtas_error_rc(int rtas_rc) rc = -ENODEV; break; default: - printk(KERN_ERR "%s: unexpected RTAS error %d\n", - __func__, rtas_rc); + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); rc = -ERANGE; break; } @@ -923,8 +924,8 @@ void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); - printk("RTAS system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + pr_emerg("system-reboot returned %d\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); for (;;); } @@ -933,8 +934,8 @@ void rtas_power_off(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } @@ -943,8 +944,8 @@ void __noreturn rtas_halt(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } @@ -978,7 +979,7 @@ void rtas_os_term(char *str) } while (rtas_busy_delay_time(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", status); + pr_emerg("ibm,os-term call failed %d\n", status); } /** From 98c738c8cee6e5a58d4060862e2f8cf3cdc8a328 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:47 -0600 Subject: [PATCH 3471/4122] powerpc/rtas: mandate RTAS syscall filtering CONFIG_PPC_RTAS_FILTER has been optional but default-enabled since its introduction. It's been enabled in enterprise distro kernels for a while without causing ABI breakage that wasn't easily fixed, and it prevents harmful abuses of the rtas syscall. Let's make it unconditional. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-10-nathanl@linux.ibm.com --- arch/powerpc/Kconfig | 13 ------------- arch/powerpc/kernel/rtas.c | 16 ---------------- 2 files changed, 29 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e21d6de797d6..65952f62ea4b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1044,19 +1044,6 @@ config PPC_SECVAR_SYSFS read/write operations on these variables. Say Y if you have secure boot enabled and want to expose variables to userspace. -config PPC_RTAS_FILTER - bool "Enable filtering of RTAS syscalls" - default y - depends on PPC_RTAS - help - The RTAS syscall API has security issues that could be used to - compromise system integrity. This option enforces restrictions on the - RTAS calls and arguments passed by userspace programs to mitigate - these issues. - - Say Y unless you know what you are doing and the filter is causing - problems for you. - endmenu config ISA_DMA_API diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 10c19228aaa3..deded51a7978 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1050,8 +1050,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log return NULL; } -#ifdef CONFIG_PPC_RTAS_FILTER - /* * The sys_rtas syscall, as originally designed, allows root to pass * arbitrary physical addresses to RTAS calls. A number of RTAS calls @@ -1200,20 +1198,6 @@ static void __init rtas_syscall_filter_init(void) rtas_filters[i].token = rtas_token(rtas_filters[i].name); } -#else - -static bool block_rtas_call(int token, int nargs, - struct rtas_args *args) -{ - return false; -} - -static void __init rtas_syscall_filter_init(void) -{ -} - -#endif /* CONFIG_PPC_RTAS_FILTER */ - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { From 7ccb966779645636679a723588b7bae4f0a8d7d5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Nov 2022 10:42:25 -0800 Subject: [PATCH 3472/4122] PCI: aardvark: Switch to using devm_gpiod_get_optional() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch the driver to the generic version of gpiod API (and away from OF-specific variant), so that we can stop exporting devm_gpiod_get_from_of_node(). Link: https://lore.kernel.org/r/Y3KMEZFv6dpxA+Gv@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Bjorn Helgaas Reviewed-by: Linus Walleij Acked-by: Pali Rohár --- drivers/pci/controller/pci-aardvark.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index ba36bbc5897d..513d8edf3a5c 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -1859,20 +1859,18 @@ static int advk_pcie_probe(struct platform_device *pdev) return ret; } - pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node, - "reset-gpios", 0, - GPIOD_OUT_LOW, - "pcie1-reset"); + pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); ret = PTR_ERR_OR_ZERO(pcie->reset_gpio); if (ret) { - if (ret == -ENOENT) { - pcie->reset_gpio = NULL; - } else { - if (ret != -EPROBE_DEFER) - dev_err(dev, "Failed to get reset-gpio: %i\n", - ret); - return ret; - } + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get reset-gpio: %i\n", ret); + return ret; + } + + ret = gpiod_set_consumer_name(pcie->reset_gpio, "pcie1-reset"); + if (ret) { + dev_err(dev, "Failed to set reset gpio name: %d\n", ret); + return ret; } ret = of_pci_get_max_link_speed(dev->of_node); From 6d4671b534f6c084e92ef167a52dc47e55f636c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 27 Sep 2022 16:19:17 +0200 Subject: [PATCH 3473/4122] PCI: pciehp: Enable Command Completed Interrupt only if supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The No Command Completed Support bit in the Slot Capabilities register indicates whether Command Completed Interrupt Enable is unsupported. We already check whether No Command Completed Support bit is set in pcie_wait_cmd(), and do not wait in this case. Don't enable this Command Completed Interrupt at all if NCCS is set, so that when users dump configuration space from userspace, the dump does not confuse them by saying that Command Completed Interrupt is not supported, but it is enabled. Link: https://lore.kernel.org/r/20220927141926.8895-2-kabel@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Marek Behún Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner --- drivers/pci/hotplug/pciehp_hpc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 040ae076ec0e..10e9670eea0b 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -811,7 +811,9 @@ static void pcie_enable_notification(struct controller *ctrl) else cmd |= PCI_EXP_SLTCTL_PDCE; if (!pciehp_poll_mode) - cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE; + cmd |= PCI_EXP_SLTCTL_HPIE; + if (!pciehp_poll_mode && !NO_CMD_CMPL(ctrl)) + cmd |= PCI_EXP_SLTCTL_CCIE; mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PFDE | From f18caf261398a7f2de4fa3f600deb87072fe7b8d Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 7 Dec 2022 15:22:18 +0400 Subject: [PATCH 3474/4122] device property: Fix documentation for fwnode_get_next_parent() Use fwnode_handle_put() on the node pointer to release the refcount. Change fwnode_handle_node() to fwnode_handle_put(). Fixes: 233872585de1 ("device property: Add fwnode_get_next_parent()") Reviewed-by: Andy Shevchenko Reviewed-by: Daniel Scally Acked-by: Sakari Ailus Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20221207112219.2652411-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index ed74083c179d..bbb3e499ff4a 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -609,7 +609,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent); * node's parents. * * Returns a node pointer with refcount incremented, use - * fwnode_handle_node() on it when done. + * fwnode_handle_put() on it when done. */ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode) { From 447242e1292e8aa22e1371110867b9c543bf0373 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 6 Dec 2022 10:45:10 +0000 Subject: [PATCH 3475/4122] MAINTAINERS: Add additional co-maintainer to LEDs Add myself as co-maintainer for the LED subsystem, in support of Pavel during busy times. Suggested-by: Pavel Machek Signed-off-by: Lee Jones Acked-by: Krzysztof Kozlowski Acked-by: Andy Shevchenko Signed-off-by: Pavel Machek --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..e12755e75c82 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11555,6 +11555,7 @@ F: scripts/leaking_addresses.pl LED SUBSYSTEM M: Pavel Machek +M: Lee Jones L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git From 135780f1048b3f956f5b10bb23dec9c2d2c4ef6d Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 29 Nov 2022 22:29:01 +0100 Subject: [PATCH 3476/4122] leds: is31fl319x: Fix setting current limit for is31fl319{0,1,3} The current setting lives in bits 4:2 (as also defined by the mask) but the current limit defines in the driver use bits 2:0 which should be shifted over so they don't get masked out completely (except for 17.5mA which became 10mA). Now checking /sys/kernel/debug/regmap/1-0068/registers shows that the current limit is applied correctly and doesn't take the default b000 = 42mA. Fixes: fa877cf1abb9 ("leds: is31fl319x: Add support for is31fl319{0,1,3} chips") Signed-off-by: Luca Weiss Reviewed-by: Vincent Knecht Signed-off-by: Pavel Machek --- drivers/leds/leds-is31fl319x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-is31fl319x.c b/drivers/leds/leds-is31fl319x.c index 52b59b62f437..b2f4c4ec7c56 100644 --- a/drivers/leds/leds-is31fl319x.c +++ b/drivers/leds/leds-is31fl319x.c @@ -38,6 +38,7 @@ #define IS31FL3190_CURRENT_uA_MIN 5000 #define IS31FL3190_CURRENT_uA_DEFAULT 42000 #define IS31FL3190_CURRENT_uA_MAX 42000 +#define IS31FL3190_CURRENT_SHIFT 2 #define IS31FL3190_CURRENT_MASK GENMASK(4, 2) #define IS31FL3190_CURRENT_5_mA 0x02 #define IS31FL3190_CURRENT_10_mA 0x01 @@ -553,7 +554,7 @@ static int is31fl319x_probe(struct i2c_client *client) is31fl3196_db_to_gain(is31->audio_gain_db)); else regmap_update_bits(is31->regmap, IS31FL3190_CURRENT, IS31FL3190_CURRENT_MASK, - is31fl3190_microamp_to_cs(dev, aggregated_led_microamp)); + is31fl3190_microamp_to_cs(dev, aggregated_led_microamp) << IS31FL3190_CURRENT_SHIFT); for (i = 0; i < is31->cdef->num_leds; i++) { struct is31fl319x_led *led = &is31->leds[i]; From 3256412fc57b6cabbc1cf1317d020e42f6d7aeab Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Tue, 8 Nov 2022 09:58:11 +0800 Subject: [PATCH 3477/4122] i2c: hisi: Add support to get clock frequency from clock The clk_rate attribute is not generic device tree bindings for I2C busses described in Documentation/devicetree/bindings/i2c/i2c.txt. It can be managed by clock binding. Support the driver to obtain clock information by clk_rate or clock property. Find clock first, if not, fall back to clk_rate. Signed-off-by: Weilong Chen Acked-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hisi.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index bcc97e4fcb65..8c6c7075c765 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -88,6 +89,7 @@ struct hisi_i2c_controller { struct i2c_adapter adapter; void __iomem *iobase; struct device *dev; + struct clk *clk; int irq; /* Intermediates for recording the transfer process */ @@ -454,10 +456,15 @@ static int hisi_i2c_probe(struct platform_device *pdev) return ret; } - ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz); - if (ret) { - dev_err(dev, "failed to get clock frequency, ret = %d\n", ret); - return ret; + ctlr->clk = devm_clk_get_optional_enabled(&pdev->dev, NULL); + if (IS_ERR_OR_NULL(ctlr->clk)) { + ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz); + if (ret) { + dev_err(dev, "failed to get clock frequency, ret = %d\n", ret); + return ret; + } + } else { + clk_rate_hz = clk_get_rate(ctlr->clk); } ctlr->clk_rate_khz = DIV_ROUND_UP_ULL(clk_rate_hz, HZ_PER_KHZ); From 810199f7315604bd969409109f1c96b4ebe772ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 19 Oct 2022 22:28:08 +0200 Subject: [PATCH 3478/4122] i2c: xiic: Make sure to disable clock on .remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If for whatever reasons pm_runtime_resume_and_get() failed, .remove() is exited early, the clock isn't freed and runtime PM state isn't reset. The right thing to do however is to free all resources that don't need HW access after a problem with runtime PM. Also issue a warning in that case and return 0 to suppress a less helpful warning by the driver core. Signed-off-by: Uwe Kleine-König Acked-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 277a02455cdd..bee5a2ef1f22 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -858,11 +858,14 @@ static int xiic_i2c_remove(struct platform_device *pdev) /* remove adapter & data */ i2c_del_adapter(&i2c->adap); - ret = pm_runtime_resume_and_get(i2c->dev); - if (ret < 0) - return ret; + ret = pm_runtime_get_sync(i2c->dev); + + if (ret < 0) + dev_warn(&pdev->dev, "Failed to activate device for removal (%pe)\n", + ERR_PTR(ret)); + else + xiic_deinit(i2c); - xiic_deinit(i2c); pm_runtime_put_sync(i2c->dev); clk_disable_unprepare(i2c->clk); pm_runtime_disable(&pdev->dev); From 3f6fb1cfaf30d0d701d877ffacfd88dd6bcc5841 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Thu, 1 Dec 2022 16:11:24 +0800 Subject: [PATCH 3479/4122] leds: use sysfs_emit() to instead of scnprintf() Replace the open-code with sysfs_emit() to simplify the code. Signed-off-by: ye xingchen Signed-off-by: Pavel Machek --- drivers/leds/leds-blinkm.c | 8 ++++---- drivers/leds/leds-lm3533.c | 12 ++++++------ drivers/leds/leds-lp5521.c | 2 +- drivers/leds/leds-lp55xx-common.c | 4 ++-- drivers/leds/trigger/ledtrig-pattern.c | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/leds/leds-blinkm.c b/drivers/leds/leds-blinkm.c index 3fb6a2fdaefa..e19cc8a7b7ca 100644 --- a/drivers/leds/leds-blinkm.c +++ b/drivers/leds/leds-blinkm.c @@ -139,11 +139,11 @@ static ssize_t show_color_common(struct device *dev, char *buf, int color) return ret; switch (color) { case RED: - return scnprintf(buf, PAGE_SIZE, "%02X\n", data->red); + return sysfs_emit(buf, "%02X\n", data->red); case GREEN: - return scnprintf(buf, PAGE_SIZE, "%02X\n", data->green); + return sysfs_emit(buf, "%02X\n", data->green); case BLUE: - return scnprintf(buf, PAGE_SIZE, "%02X\n", data->blue); + return sysfs_emit(buf, "%02X\n", data->blue); default: return -EINVAL; } @@ -253,7 +253,7 @@ static DEVICE_ATTR_RW(blue); static ssize_t test_show(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, + return sysfs_emit(buf, "#Write into test to start test sequence!#\n"); } diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c index 43d5970d96aa..bcd414eb4724 100644 --- a/drivers/leds/leds-lm3533.c +++ b/drivers/leds/leds-lm3533.c @@ -314,7 +314,7 @@ static ssize_t show_id(struct device *dev, struct led_classdev *led_cdev = dev_get_drvdata(dev); struct lm3533_led *led = to_lm3533_led(led_cdev); - return scnprintf(buf, PAGE_SIZE, "%d\n", led->id); + return sysfs_emit(buf, "%d\n", led->id); } /* @@ -344,7 +344,7 @@ static ssize_t show_risefalltime(struct device *dev, if (ret) return ret; - return scnprintf(buf, PAGE_SIZE, "%x\n", val); + return sysfs_emit(buf, "%x\n", val); } static ssize_t show_risetime(struct device *dev, @@ -415,7 +415,7 @@ static ssize_t show_als_channel(struct device *dev, channel = (val & LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK) + 1; - return scnprintf(buf, PAGE_SIZE, "%u\n", channel); + return sysfs_emit(buf, "%u\n", channel); } static ssize_t store_als_channel(struct device *dev, @@ -465,7 +465,7 @@ static ssize_t show_als_en(struct device *dev, enable = val & LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK; - return scnprintf(buf, PAGE_SIZE, "%d\n", enable); + return sysfs_emit(buf, "%d\n", enable); } static ssize_t store_als_en(struct device *dev, @@ -518,7 +518,7 @@ static ssize_t show_linear(struct device *dev, else linear = 0; - return scnprintf(buf, PAGE_SIZE, "%x\n", linear); + return sysfs_emit(buf, "%x\n", linear); } static ssize_t store_linear(struct device *dev, @@ -564,7 +564,7 @@ static ssize_t show_pwm(struct device *dev, if (ret) return ret; - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t store_pwm(struct device *dev, diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 7ff20c260504..19478d9c19a7 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -469,7 +469,7 @@ static ssize_t lp5521_selftest(struct device *dev, ret = lp5521_run_selftest(chip, buf); mutex_unlock(&chip->lock); - return scnprintf(buf, PAGE_SIZE, "%s\n", ret ? "FAIL" : "OK"); + return sysfs_emit(buf, "%s\n", ret ? "FAIL" : "OK"); } /* device attributes */ diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index ca2e28fb843f..c1940964067a 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -88,7 +88,7 @@ static ssize_t led_current_show(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", led->led_current); + return sysfs_emit(buf, "%d\n", led->led_current); } static ssize_t led_current_store(struct device *dev, @@ -121,7 +121,7 @@ static ssize_t max_current_show(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", led->max_current); + return sysfs_emit(buf, "%d\n", led->max_current); } static DEVICE_ATTR_RW(led_current); diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c index 43a265dc4696..885ca63f383f 100644 --- a/drivers/leds/trigger/ledtrig-pattern.c +++ b/drivers/leds/trigger/ledtrig-pattern.c @@ -155,7 +155,7 @@ static ssize_t repeat_show(struct device *dev, struct device_attribute *attr, mutex_unlock(&data->lock); - return scnprintf(buf, PAGE_SIZE, "%d\n", repeat); + return sysfs_emit(buf, "%d\n", repeat); } static ssize_t repeat_store(struct device *dev, struct device_attribute *attr, From 6afd8bd5db7c5f734f24a2df8c1093df9f2ec38f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 1 Dec 2022 14:15:05 +0100 Subject: [PATCH 3480/4122] leds: qcom,pm8058-led: Convert to DT schema Convert the Qualcomm PM8058 PMIC LED bindings to DT schema. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Signed-off-by: Pavel Machek --- .../devicetree/bindings/leds/leds-pm8058.txt | 67 ------------------- .../bindings/leds/qcom,pm8058-led.yaml | 57 ++++++++++++++++ .../devicetree/bindings/mfd/qcom-pm8xxx.yaml | 4 ++ 3 files changed, 61 insertions(+), 67 deletions(-) delete mode 100644 Documentation/devicetree/bindings/leds/leds-pm8058.txt create mode 100644 Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml diff --git a/Documentation/devicetree/bindings/leds/leds-pm8058.txt b/Documentation/devicetree/bindings/leds/leds-pm8058.txt deleted file mode 100644 index 89584c49aab2..000000000000 --- a/Documentation/devicetree/bindings/leds/leds-pm8058.txt +++ /dev/null @@ -1,67 +0,0 @@ -Qualcomm PM8058 LED driver - -The Qualcomm PM8058 is a multi-functional device which contains -an LED driver block for up to six LEDs: three normal LEDs, two -"flash" LEDs and one "keypad backlight" LED. The names are -quoted because sometimes these LED drivers are used for wildly -different things than flash or keypad backlight: their names -are more of a suggestion than a hard-wired usecase. - -Hardware-wise the different LEDs support slightly different -output currents. The "flash" LEDs do not need to charge nor -do they support external triggers. They are just powerful LED -drivers. - -The LEDs appear as children to the PM8058 device, with the -proper compatible string. For the PM8058 bindings see: -mfd/qcom-pm8xxx.txt. - -Each LED is represented as a sub-node of the syscon device. Each -node's name represents the name of the corresponding LED. - -LED sub-node properties: - -Required properties: -- compatible: one of - "qcom,pm8058-led" (for the normal LEDs at 0x131, 0x132 and 0x133) - "qcom,pm8058-keypad-led" (for the "keypad" LED at 0x48) - "qcom,pm8058-flash-led" (for the "flash" LEDs at 0x49 and 0xFB) - -Optional properties: -- label: see Documentation/devicetree/bindings/leds/common.txt -- default-state: see Documentation/devicetree/bindings/leds/common.txt -- linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt - -Example: - -qcom,ssbi@500000 { - pmicintc: pmic@0 { - compatible = "qcom,pm8058"; - led@48 { - compatible = "qcom,pm8058-keypad-led"; - reg = <0x48>; - label = "pm8050:white:keypad"; - default-state = "off"; - }; - led@131 { - compatible = "qcom,pm8058-led"; - reg = <0x131>; - label = "pm8058:red"; - default-state = "off"; - }; - led@132 { - compatible = "qcom,pm8058-led"; - reg = <0x132>; - label = "pm8058:yellow"; - default-state = "off"; - linux,default-trigger = "mmc0"; - }; - led@133 { - compatible = "qcom,pm8058-led"; - reg = <0x133>; - label = "pm8058:green"; - default-state = "on"; - linux,default-trigger = "heartbeat"; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml b/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml new file mode 100644 index 000000000000..fa03e73622d4 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/leds/qcom,pm8058-led.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm PM8058 PMIC LED + +maintainers: + - Krzysztof Kozlowski + +description: | + The Qualcomm PM8058 contains an LED block for up to six LEDs:: three normal + LEDs, two "flash" LEDs and one "keypad backlight" LED. The names are quoted + because sometimes these LED drivers are used for wildly different things than + flash or keypad backlight:: their names are more of a suggestion than a + hard-wired usecase. + + Hardware-wise the different LEDs support slightly different output currents. + The "flash" LEDs do not need to charge nor do they support external triggers. + They are just powerful LED drivers. + +allOf: + - $ref: common.yaml# + +properties: + compatible: + enum: + - qcom,pm8058-led + - qcom,pm8058-keypad-led + - qcom,pm8058-flash-led + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + + pmic { + #address-cells = <1>; + #size-cells = <0>; + + led@131 { + compatible = "qcom,pm8058-led"; + reg = <0x131>; + label = "pm8058:red"; + color = ; + default-state = "off"; + }; + }; diff --git a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml index 61bd0b3ce02f..bd6e4aecfe2b 100644 --- a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml +++ b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml @@ -39,6 +39,10 @@ properties: interrupt-controller: true patternProperties: + "led@[0-9a-f]+$": + type: object + $ref: /schemas/leds/qcom,pm8058-led.yaml# + "rtc@[0-9a-f]+$": type: object $ref: "../rtc/qcom-pm8xxx-rtc.yaml" From 7cb092a0336c5770656c6742e7a7ce3042c8c44e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 30 Aug 2022 11:36:25 +0300 Subject: [PATCH 3481/4122] leds: MAINTAINERS: include dt-bindings headers Include the Devicetree binding headers in LED SUBSYSTEM entry. Signed-off-by: Krzysztof Kozlowski Acked-by: Alexander Dahl Signed-off-by: Pavel Machek --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e12755e75c82..ce0f246d0544 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11561,6 +11561,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git F: Documentation/devicetree/bindings/leds/ F: drivers/leds/ +F: include/dt-bindings/leds/ F: include/linux/leds.h LEGACY EEPROM DRIVER From b3525072835b523b397d459fadd0785d9c24bbd1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2022 14:29:39 +0100 Subject: [PATCH 3482/4122] orangefs: remove variable i Variable i is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 7a8c0c6e698d..eaa35a966115 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -530,7 +530,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; - int i = 0; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", @@ -556,7 +555,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, while (iov_iter_count(iter)) { size_t each_count = iov_iter_count(iter); size_t amt_complete; - i++; /* how much to transfer in this loop iteration */ if (each_count > orangefs_bufmap_size_query()) From 610defdccff7b955fe899a825990c2202153a22e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 17 Oct 2022 22:49:37 +0100 Subject: [PATCH 3483/4122] orangefs: remove redundant assignment to variable buffer_index The variable buffer_index is assigned a value that is never read, it is assigned just before the function returns. The assignment is redundant and can be removed. Cleans up clang scan build warning: fs/orangefs/file.c:276:3: warning: Value stored to 'buffer_index' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 732661aa2680..167fa43b24f9 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -273,7 +273,6 @@ out: gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); - buffer_index = -1; } op_release(new_op); return ret; From ea60a4ad0cf88b411cde6888b8c890935686ecd7 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 18 Oct 2022 12:40:04 +0800 Subject: [PATCH 3484/4122] orangefs: Fix sysfs not cleanup when dev init failed When the dev init failed, should cleanup the sysfs, otherwise, the module will never be loaded since can not create duplicate sysfs directory: sysfs: cannot create duplicate filename '/fs/orangefs' CPU: 1 PID: 6549 Comm: insmod Tainted: G W 6.0.0+ #44 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 sysfs_warn_dup.cold+0x17/0x24 sysfs_create_dir_ns+0x16d/0x180 kobject_add_internal+0x156/0x3a0 kobject_init_and_add+0xcf/0x120 orangefs_sysfs_init+0x7e/0x3a0 [orangefs] orangefs_init+0xfe/0x1000 [orangefs] do_one_initcall+0x87/0x2a0 do_init_module+0xdf/0x320 load_module+0x2f98/0x3330 __do_sys_finit_module+0x113/0x1b0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 kobject_add_internal failed for orangefs with -EEXIST, don't try to register things with the same name in the same directory. Fixes: 2f83ace37181 ("orangefs: put register_chrdev immediately before register_filesystem") Signed-off-by: Zhang Xiaoxu Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index cd7297815f91..5ab741c60b7e 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -141,7 +141,7 @@ static int __init orangefs_init(void) gossip_err("%s: could not initialize device subsystem %d!\n", __func__, ret); - goto cleanup_device; + goto cleanup_sysfs; } ret = register_filesystem(&orangefs_fs_type); @@ -152,11 +152,11 @@ static int __init orangefs_init(void) goto out; } - orangefs_sysfs_exit(); - -cleanup_device: orangefs_dev_cleanup(); +cleanup_sysfs: + orangefs_sysfs_exit(); + sysfs_init_failed: orangefs_debugfs_cleanup(); From d23417a5bf3a3afc55de5442eb46e1e60458b0a1 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 18 Oct 2022 12:40:05 +0800 Subject: [PATCH 3485/4122] orangefs: Fix kmemleak in orangefs_prepare_debugfs_help_string() When insert and remove the orangefs module, then debug_help_string will be leaked: unreferenced object 0xffff8881652ba000 (size 4096): comm "insmod", pid 1701, jiffies 4294893639 (age 13218.530s) hex dump (first 32 bytes): 43 6c 69 65 6e 74 20 44 65 62 75 67 20 4b 65 79 Client Debug Key 77 6f 72 64 73 20 61 72 65 20 75 6e 6b 6e 6f 77 words are unknow backtrace: [<0000000004e6f8e3>] kmalloc_trace+0x27/0xa0 [<0000000006f75d85>] orangefs_prepare_debugfs_help_string+0x5e/0x480 [orangefs] [<0000000091270a2a>] _sub_I_65535_1+0x57/0xf70 [crc_itu_t] [<000000004b1ee1a3>] do_one_initcall+0x87/0x2a0 [<000000001d0614ae>] do_init_module+0xdf/0x320 [<00000000efef068c>] load_module+0x2f98/0x3330 [<000000006533b44d>] __do_sys_finit_module+0x113/0x1b0 [<00000000a0da6f99>] do_syscall_64+0x35/0x80 [<000000007790b19b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 When remove the module, should always free debug_help_string. Should always free the allocated buffer when change the free_debug_help_string. Signed-off-by: Zhang Xiaoxu Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 29eaa4544372..a848b6ef9599 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -222,6 +222,8 @@ out: void orangefs_debugfs_cleanup(void) { debugfs_remove_recursive(debug_dir); + kfree(debug_help_string); + debug_help_string = NULL; } /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */ @@ -671,6 +673,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot) memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE); strlcat(debug_help_string, new, string_size); mutex_unlock(&orangefs_help_file_lock); + kfree(new); } rc = 0; From 1f2c0e8a587bcafad85019a2d80f158d8d41a868 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 18 Oct 2022 12:40:06 +0800 Subject: [PATCH 3486/4122] orangefs: Fix kmemleak in orangefs_sysfs_init() When insert and remove the orangefs module, there are kobjects memory leaked as below: unreferenced object 0xffff88810f95af00 (size 64): comm "insmod", pid 783, jiffies 4294813439 (age 65.512s) hex dump (first 32 bytes): a0 83 af 01 81 88 ff ff 08 af 95 0f 81 88 ff ff ................ 08 af 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000005a6e4dfe>] orangefs_sysfs_init+0x42/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 unreferenced object 0xffff88810f95ae80 (size 64): comm "insmod", pid 783, jiffies 4294813439 (age 65.512s) hex dump (first 32 bytes): c8 90 0f 02 81 88 ff ff 88 ae 95 0f 81 88 ff ff ................ 88 ae 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000001a4841fa>] orangefs_sysfs_init+0xc7/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 unreferenced object 0xffff88810f95ae00 (size 64): comm "insmod", pid 783, jiffies 4294813440 (age 65.511s) hex dump (first 32 bytes): 60 87 a1 00 81 88 ff ff 08 ae 95 0f 81 88 ff ff `............... 08 ae 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000005915e797>] orangefs_sysfs_init+0x12b/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 unreferenced object 0xffff88810f95ad80 (size 64): comm "insmod", pid 783, jiffies 4294813440 (age 65.511s) hex dump (first 32 bytes): 78 90 0f 02 81 88 ff ff 88 ad 95 0f 81 88 ff ff x............... 88 ad 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000007a14eb35>] orangefs_sysfs_init+0x1ac/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 unreferenced object 0xffff88810f95ac00 (size 64): comm "insmod", pid 783, jiffies 4294813440 (age 65.531s) hex dump (first 32 bytes): e0 ff 67 02 81 88 ff ff 08 ac 95 0f 81 88 ff ff ..g............. 08 ac 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000001f38adcb>] orangefs_sysfs_init+0x291/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 unreferenced object 0xffff88810f95ab80 (size 64): comm "insmod", pid 783, jiffies 4294813441 (age 65.530s) hex dump (first 32 bytes): 50 bf 2f 02 81 88 ff ff 88 ab 95 0f 81 88 ff ff P./............. 88 ab 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000009cc7d95b>] orangefs_sysfs_init+0x2f5/0x3a0 [<00000000722645ca>] 0xffffffffa02780fe [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Should add release function for each kobject_type to free the memory. Signed-off-by: Zhang Xiaoxu Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-sysfs.c | 71 ++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index de80b62553bb..be4ba03a01a0 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -896,9 +896,18 @@ static struct attribute *orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(orangefs_default); +static struct kobject *orangefs_obj; + +static void orangefs_obj_release(struct kobject *kobj) +{ + kfree(orangefs_obj); + orangefs_obj = NULL; +} + static struct kobj_type orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = orangefs_default_groups, + .release = orangefs_obj_release, }; static struct orangefs_attribute acache_hard_limit_attribute = @@ -934,9 +943,18 @@ static struct attribute *acache_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(acache_orangefs_default); +static struct kobject *acache_orangefs_obj; + +static void acache_orangefs_obj_release(struct kobject *kobj) +{ + kfree(acache_orangefs_obj); + acache_orangefs_obj = NULL; +} + static struct kobj_type acache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = acache_orangefs_default_groups, + .release = acache_orangefs_obj_release, }; static struct orangefs_attribute capcache_hard_limit_attribute = @@ -972,9 +990,18 @@ static struct attribute *capcache_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(capcache_orangefs_default); +static struct kobject *capcache_orangefs_obj; + +static void capcache_orangefs_obj_release(struct kobject *kobj) +{ + kfree(capcache_orangefs_obj); + capcache_orangefs_obj = NULL; +} + static struct kobj_type capcache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = capcache_orangefs_default_groups, + .release = capcache_orangefs_obj_release, }; static struct orangefs_attribute ccache_hard_limit_attribute = @@ -1010,9 +1037,18 @@ static struct attribute *ccache_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(ccache_orangefs_default); +static struct kobject *ccache_orangefs_obj; + +static void ccache_orangefs_obj_release(struct kobject *kobj) +{ + kfree(ccache_orangefs_obj); + ccache_orangefs_obj = NULL; +} + static struct kobj_type ccache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = ccache_orangefs_default_groups, + .release = ccache_orangefs_obj_release, }; static struct orangefs_attribute ncache_hard_limit_attribute = @@ -1048,9 +1084,18 @@ static struct attribute *ncache_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(ncache_orangefs_default); +static struct kobject *ncache_orangefs_obj; + +static void ncache_orangefs_obj_release(struct kobject *kobj) +{ + kfree(ncache_orangefs_obj); + ncache_orangefs_obj = NULL; +} + static struct kobj_type ncache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = ncache_orangefs_default_groups, + .release = ncache_orangefs_obj_release, }; static struct orangefs_attribute pc_acache_attribute = @@ -1079,9 +1124,18 @@ static struct attribute *pc_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(pc_orangefs_default); +static struct kobject *pc_orangefs_obj; + +static void pc_orangefs_obj_release(struct kobject *kobj) +{ + kfree(pc_orangefs_obj); + pc_orangefs_obj = NULL; +} + static struct kobj_type pc_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = pc_orangefs_default_groups, + .release = pc_orangefs_obj_release, }; static struct orangefs_attribute stats_reads_attribute = @@ -1103,19 +1157,20 @@ static struct attribute *stats_orangefs_default_attrs[] = { }; ATTRIBUTE_GROUPS(stats_orangefs_default); +static struct kobject *stats_orangefs_obj; + +static void stats_orangefs_obj_release(struct kobject *kobj) +{ + kfree(stats_orangefs_obj); + stats_orangefs_obj = NULL; +} + static struct kobj_type stats_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, .default_groups = stats_orangefs_default_groups, + .release = stats_orangefs_obj_release, }; -static struct kobject *orangefs_obj; -static struct kobject *acache_orangefs_obj; -static struct kobject *capcache_orangefs_obj; -static struct kobject *ccache_orangefs_obj; -static struct kobject *ncache_orangefs_obj; -static struct kobject *pc_orangefs_obj; -static struct kobject *stats_orangefs_obj; - int orangefs_sysfs_init(void) { int rc = -EINVAL; From 31720a2b109b3080eb77e97b8f6f50a27b4ae599 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 18 Oct 2022 12:40:07 +0800 Subject: [PATCH 3487/4122] orangefs: Fix kmemleak in orangefs_{kernel,client}_debug_init() When insert and remove the orangefs module, there are memory leaked as below: unreferenced object 0xffff88816b0cc000 (size 2048): comm "insmod", pid 783, jiffies 4294813439 (age 65.512s) hex dump (first 32 bytes): 6e 6f 6e 65 0a 00 00 00 00 00 00 00 00 00 00 00 none............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000031ab7788>] kmalloc_trace+0x27/0xa0 [<000000005b405fee>] orangefs_debugfs_init.cold+0xaf/0x17f [<00000000e5a0085b>] 0xffffffffa02780f9 [<000000004232d9f7>] do_one_initcall+0x87/0x2a0 [<0000000054f22384>] do_init_module+0xdf/0x320 [<000000003263bdea>] load_module+0x2f98/0x3330 [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0 [<00000000250ae02b>] do_syscall_64+0x35/0x80 [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Use the golbal variable as the buffer rather than dynamic allocate to slove the problem. Signed-off-by: Zhang Xiaoxu Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index a848b6ef9599..1b508f543384 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -194,15 +194,10 @@ void orangefs_debugfs_init(int debug_mask) */ static void orangefs_kernel_debug_init(void) { - int rc = -ENOMEM; - char *k_buffer = NULL; + static char k_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { }; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); - k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); - if (!k_buffer) - goto out; - if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { strcpy(k_buffer, kernel_debug_string); strcat(k_buffer, "\n"); @@ -213,9 +208,6 @@ static void orangefs_kernel_debug_init(void) debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, &kernel_debug_fops); - -out: - gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); } @@ -299,18 +291,13 @@ static int help_show(struct seq_file *m, void *v) /* * initialize the client-debug file. */ -static int orangefs_client_debug_init(void) +static void orangefs_client_debug_init(void) { - int rc = -ENOMEM; - char *c_buffer = NULL; + static char c_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { }; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); - c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); - if (!c_buffer) - goto out; - if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { strcpy(c_buffer, client_debug_string); strcat(c_buffer, "\n"); @@ -324,13 +311,6 @@ static int orangefs_client_debug_init(void) debug_dir, c_buffer, &kernel_debug_fops); - - rc = 0; - -out: - - gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); - return rc; } /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/ From 2d47b79d2bd39cc6369eccf94a06568d84c906ae Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 17:38:25 +0800 Subject: [PATCH 3488/4122] i2c: mux: reg: check return value after calling platform_get_resource() It will cause null-ptr-deref in resource_size(), if platform_get_resource() returns NULL, move calling resource_size() after devm_ioremap_resource() that will check 'res' to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Fixes: b3fdd32799d8 ("i2c: mux: Add register-based mux i2c-mux-reg") Signed-off-by: Yang Yingliang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-reg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 0e0679f65cf7..30a6de1694e0 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -183,13 +183,12 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) if (!mux->data.reg) { dev_info(&pdev->dev, "Register not set, using platform resource\n"); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mux->data.reg_size = resource_size(res); - mux->data.reg = devm_ioremap_resource(&pdev->dev, res); + mux->data.reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(mux->data.reg)) { ret = PTR_ERR(mux->data.reg); goto err_put_parent; } + mux->data.reg_size = resource_size(res); } if (mux->data.reg_size != 4 && mux->data.reg_size != 2 && From 39244cc754829bf707dccd12e2ce37510f5b1f8d Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Fri, 29 Jul 2022 19:02:16 +0800 Subject: [PATCH 3489/4122] i2c: ismt: Fix an out-of-bounds bug in ismt_access() When the driver does not check the data from the user, the variable 'data->block[0]' may be very large to cause an out-of-bounds bug. The following log can reveal it: [ 33.995542] i2c i2c-1: ioctl, cmd=0x720, arg=0x7ffcb3dc3a20 [ 33.995978] ismt_smbus 0000:00:05.0: I2C_SMBUS_BLOCK_DATA: WRITE [ 33.996475] ================================================================== [ 33.996995] BUG: KASAN: out-of-bounds in ismt_access.cold+0x374/0x214b [ 33.997473] Read of size 18446744073709551615 at addr ffff88810efcfdb1 by task ismt_poc/485 [ 33.999450] Call Trace: [ 34.001849] memcpy+0x20/0x60 [ 34.002077] ismt_access.cold+0x374/0x214b [ 34.003382] __i2c_smbus_xfer+0x44f/0xfb0 [ 34.004007] i2c_smbus_xfer+0x10a/0x390 [ 34.004291] i2cdev_ioctl_smbus+0x2c8/0x710 [ 34.005196] i2cdev_ioctl+0x5ec/0x74c Fix this bug by checking the size of 'data->block[0]' first. Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller") Signed-off-by: Zheyu Ma Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index fe2349590f75..c74985d77b0e 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -509,6 +509,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, if (read_write == I2C_SMBUS_WRITE) { /* Block Write */ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n"); + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + dma_size = data->block[0] + 1; dma_direction = DMA_TO_DEVICE; desc->wr_len_cmd = dma_size; From 76007ccc5727f86c105e96697e96dcf2df6b1634 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 7 Dec 2022 13:07:10 -0800 Subject: [PATCH 3490/4122] PCI: mvebu: Switch to using gpiod API Switch the driver away from legacy gpio/of_gpio API to gpiod API, and remove use of of_get_named_gpio_flags() which I want to make private to gpiolib. Link: https://lore.kernel.org/r/Y5EAft42YiT66mVj@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-mvebu.c | 51 ++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 1ced73726a26..b12e128fc5d7 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -11,14 +11,14 @@ #include #include #include -#include +#include #include +#include #include #include #include #include #include -#include #include #include @@ -1261,9 +1261,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, struct mvebu_pcie_port *port, struct device_node *child) { struct device *dev = &pcie->pdev->dev; - enum of_gpio_flags flags; u32 slot_power_limit; - int reset_gpio, ret; + int ret; u32 num_lanes; port->pcie = pcie; @@ -1327,40 +1326,24 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, port->name, child); } - reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags); - if (reset_gpio == -EPROBE_DEFER) { - ret = reset_gpio; + port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset", + port->name); + if (!port->reset_name) { + ret = -ENOMEM; goto err; } - if (gpio_is_valid(reset_gpio)) { - unsigned long gpio_flags; - - port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset", - port->name); - if (!port->reset_name) { - ret = -ENOMEM; + port->reset_gpio = devm_fwnode_gpiod_get(dev, of_fwnode_handle(child), + "reset", GPIOD_OUT_HIGH, + port->name); + ret = PTR_ERR_OR_ZERO(port->reset_gpio); + if (ret) { + if (ret != -ENOENT) goto err; - } - - if (flags & OF_GPIO_ACTIVE_LOW) { - dev_info(dev, "%pOF: reset gpio is active low\n", - child); - gpio_flags = GPIOF_ACTIVE_LOW | - GPIOF_OUT_INIT_LOW; - } else { - gpio_flags = GPIOF_OUT_INIT_HIGH; - } - - ret = devm_gpio_request_one(dev, reset_gpio, gpio_flags, - port->reset_name); - if (ret) { - if (ret == -EPROBE_DEFER) - goto err; - goto skip; - } - - port->reset_gpio = gpio_to_desc(reset_gpio); + /* reset gpio is optional */ + port->reset_gpio = NULL; + devm_kfree(dev, port->reset_name); + port->reset_name = NULL; } slot_power_limit = of_pci_get_slot_power_limit(child, From 64fdcbcc064966bbf261bb455876dffa58858d32 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 Dec 2022 09:43:15 +1100 Subject: [PATCH 3491/4122] powerpc/prom: Fix 32-bit build Add an IS_ENABLED() check to fix the build error: arch/powerpc/kernel/prom.o: in function `early_init_dt_scan_cpus': prom.c:(.init.text+0x2ea): undefined reference to `boot_cpu_node_count' Fixes: e13d23a404f2 ("powerpc: export the CPU node count") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 645f4450dfc3..4f1c920aa13e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -336,7 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - boot_cpu_node_count++; + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_node_count++; /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); From fb4907f487254375830f135dcfe5dd7e6f8b705f Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Fri, 25 Nov 2022 09:00:26 +0800 Subject: [PATCH 3492/4122] RDMA/cma: Change RoCE packet life time from 18 to 16 The ack timeout retransmission time is affected by the following two factors: one is packet life time, another is the HCA processing time. Now the default packet lifetime(CMA_IBOE_PACKET_LIFETIME) is 18. That means the minimum ack timeout is 2 seconds (2^(18+1)*4us=2.097seconds). The packet lifetime means the maximum transmission time of packets on the network, 2 seconds is too long. Assume the network is a clos topology with three layers, every packet will pass through five hops of switches. Assume the buffer of every switch is 128MB and the port transmission rate is 25 Gbit/s, the maximum transmission time of the packet is 200ms(128MB*5/25Gbit/s). Add double redundancy, it is less than 500ms. So change the CMA_IBOE_PACKET_LIFETIME to 16, the maximum transmission time of the packet will be about 500+ms, it is long enough. Link: https://lore.kernel.org/r/20221125010026.755-1-lengchao@huawei.com Signed-off-by: Chao Leng Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index cc2222b85c88..2f5b5e6f3d11 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) -#define CMA_IBOE_PACKET_LIFETIME 18 +#define CMA_IBOE_PACKET_LIFETIME 16 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP static const char * const cma_events[] = { From 487d65090a3dce1ae54946aded55d0f8ac87cbab Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Sat, 26 Nov 2022 18:29:06 +0800 Subject: [PATCH 3493/4122] RDMA/hns: Fix the gid problem caused by free mr After the hns roce driver is loaded, if you modify the mac address of the network port, the following error will appear: __ib_cache_gid_add: unable to add gid fe80:0000:0000:0000:4600:4dff:fe22:abb5 error=-28 hns3 0000:7d:00.0 hns_0: attr path_mtu(1) invalid while modify qp The reason for the error is that the gid being occupied will cause the failure to modify the gid. The gid is occupied by the loopback QP used by free mr. When the mac address is modified, the gid will change. If there is a busy QP at this time, the gid will not be released and the modification will fail. The QP of free mr is created using the ib interface. The ib interface will add a reference count to the gid, resulting in this error scenario. Considering that free mr is solving a bug in HIP08, not an actual business, it is not necessary to use ib interfaces. Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Link: https://lore.kernel.org/r/20221126102911.2921820-2-xuhaoyue1@hisilicon.com Signed-off-by: Yixing Liu Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 175 ++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 +- 2 files changed, 137 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 939811867249..93c677239686 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2634,31 +2634,124 @@ static void free_dip_list(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); } +static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_pd *hr_pd; + struct ib_pd *pd; + + hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_pd)) + return NULL; + pd = &hr_pd->ibpd; + pd->device = ibdev; + + if (hns_roce_alloc_pd(pd, NULL)) { + ibdev_err(ibdev, "failed to create pd for free mr.\n"); + kfree(hr_pd); + return NULL; + } + free_mr->rsv_pd = to_hr_pd(pd); + free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev; + free_mr->rsv_pd->ibpd.uobject = NULL; + free_mr->rsv_pd->ibpd.__internal_mr = NULL; + atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0); + + return pd; +} + +static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_cq_init_attr cq_init_attr = {}; + struct hns_roce_cq *hr_cq; + struct ib_cq *cq; + + cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; + + hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_cq)) + return NULL; + + cq = &hr_cq->ib_cq; + cq->device = ibdev; + + if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) { + ibdev_err(ibdev, "failed to create cq for free mr.\n"); + kfree(hr_cq); + return NULL; + } + free_mr->rsv_cq = to_hr_cq(cq); + free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev; + free_mr->rsv_cq->ib_cq.uobject = NULL; + free_mr->rsv_cq->ib_cq.comp_handler = NULL; + free_mr->rsv_cq->ib_cq.event_handler = NULL; + free_mr->rsv_cq->ib_cq.cq_context = NULL; + atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0); + + return cq; +} + +static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq, + struct ib_qp_init_attr *init_attr, int i) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + struct ib_qp *qp; + int ret; + + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_qp)) + return -ENOMEM; + + qp = &hr_qp->ibqp; + qp->device = ibdev; + + ret = hns_roce_create_qp(qp, init_attr, NULL); + if (ret) { + ibdev_err(ibdev, "failed to create qp for free mr.\n"); + kfree(hr_qp); + return ret; + } + + free_mr->rsv_qp[i] = hr_qp; + free_mr->rsv_qp[i]->ibqp.recv_cq = cq; + free_mr->rsv_qp[i]->ibqp.send_cq = cq; + + return 0; +} + static void free_mr_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; - int ret; + struct ib_qp *qp; int i; for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { if (free_mr->rsv_qp[i]) { - ret = ib_destroy_qp(free_mr->rsv_qp[i]); - if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to destroy qp in free mr.\n"); - + qp = &free_mr->rsv_qp[i]->ibqp; + hns_roce_v2_destroy_qp(qp, NULL); + kfree(free_mr->rsv_qp[i]); free_mr->rsv_qp[i] = NULL; } } if (free_mr->rsv_cq) { - ib_destroy_cq(free_mr->rsv_cq); + hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL); + kfree(free_mr->rsv_cq); free_mr->rsv_cq = NULL; } if (free_mr->rsv_pd) { - ib_dealloc_pd(free_mr->rsv_pd); + hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL); + kfree(free_mr->rsv_pd); free_mr->rsv_pd = NULL; } } @@ -2667,55 +2760,46 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_cq_init_attr cq_init_attr = {}; struct ib_qp_init_attr qp_init_attr = {}; struct ib_pd *pd; struct ib_cq *cq; - struct ib_qp *qp; int ret; int i; - pd = ib_alloc_pd(ibdev, 0); - if (IS_ERR(pd)) { - ibdev_err(ibdev, "failed to create pd for free mr.\n"); - return PTR_ERR(pd); - } - free_mr->rsv_pd = pd; + pd = free_mr_init_pd(hr_dev); + if (!pd) + return -ENOMEM; - cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; - cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr); - if (IS_ERR(cq)) { - ibdev_err(ibdev, "failed to create cq for free mr.\n"); - ret = PTR_ERR(cq); - goto create_failed; + cq = free_mr_init_cq(hr_dev); + if (!cq) { + ret = -ENOMEM; + goto create_failed_cq; } - free_mr->rsv_cq = cq; qp_init_attr.qp_type = IB_QPT_RC; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.send_cq = free_mr->rsv_cq; - qp_init_attr.recv_cq = free_mr->rsv_cq; + qp_init_attr.send_cq = cq; + qp_init_attr.recv_cq = cq; for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM; qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM; qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM; qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM; - qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr); - if (IS_ERR(qp)) { - ibdev_err(ibdev, "failed to create qp for free mr.\n"); - ret = PTR_ERR(qp); - goto create_failed; - } - - free_mr->rsv_qp[i] = qp; + ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i); + if (ret) + goto create_failed_qp; } return 0; -create_failed: - free_mr_exit(hr_dev); +create_failed_qp: + hns_roce_destroy_cq(cq, NULL); + kfree(cq); + +create_failed_cq: + hns_roce_dealloc_pd(pd, NULL); + kfree(pd); return ret; } @@ -2731,14 +2815,17 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, int mask; int ret; - hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]); + hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp); hr_qp->free_mr_en = 1; + hr_qp->ibqp.device = ibdev; + hr_qp->ibqp.qp_type = IB_QPT_RC; mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, + IB_QPS_INIT); if (ret) { ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", ret); @@ -2759,7 +2846,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num); - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, + IB_QPS_RTR); hr_dev->loop_idc = loopback; if (ret) { ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n", @@ -2773,7 +2861,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN; attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT; attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT; - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR, + IB_QPS_RTS); if (ret) ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n", ret); @@ -3416,7 +3505,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) mutex_lock(&free_mr->mutex); for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { - hr_qp = to_hr_qp(free_mr->rsv_qp[i]); + hr_qp = free_mr->rsv_qp[i]; ret = free_mr_post_send_lp_wqe(hr_qp); if (ret) { @@ -3431,7 +3520,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies; while (cqe_cnt) { - npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc); + npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); if (npolled < 0) { ibdev_err(ibdev, "failed to poll cqe for free mr, remain %d cqe.\n", @@ -5474,7 +5563,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index b11579027e82..017462e52843 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1329,9 +1329,9 @@ struct hns_roce_link_table { #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) struct hns_roce_v2_free_mr { - struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; - struct ib_cq *rsv_cq; - struct ib_pd *rsv_pd; + struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; + struct hns_roce_cq *rsv_cq; + struct hns_roce_pd *rsv_pd; struct mutex mutex; }; @@ -1461,6 +1461,8 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); + static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { From bc34c04f7b97c3794dec5a6d6d27ffd5f0e4f5c8 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:07 +0800 Subject: [PATCH 3494/4122] RDMA/hns: Fix AH attr queried by query_qp The queried AH attr is invalid. This patch fix it. This problem is found by rdma-core test test_mr_rereg_pd ERROR: test_mr_rereg_pd (tests.test_mr.MRTest) Test that cover rereg MR's PD with this flow: ---------------------------------------------------------------------- Traceback (most recent call last): File "./tests/test_mr.py", line 157, in test_mr_rereg_pd self.restate_qps() File "./tests/test_mr.py", line 113, in restate_qps self.server.qp.to_rts(self.server_qp_attr) File "qp.pyx", line 1137, in pyverbs.qp.QP.to_rts File "qp.pyx", line 1123, in pyverbs.qp.QP.to_rtr pyverbs.pyverbs_error.PyverbsRDMAError: Failed to modify QP state to RTR. Errno: 22, Invalid argument Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/20221126102911.2921820-3-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 93c677239686..a8f8c790d31d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5470,6 +5470,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, rdma_ah_set_sl(&qp_attr->ah_attr, hr_reg_read(&context, QPC_SL)); + rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1); + rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH); grh->flow_label = hr_reg_read(&context, QPC_FL); grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX); grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT); From 9fb39ef2ff3e18f1740625ba04093dfbef086d2b Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:08 +0800 Subject: [PATCH 3495/4122] RDMA/hns: Fix PBL page MTR find Now, The address of the first two pages in the MR will be searched, which use to speed up the lookup of the pbl table for hardware. An exception will occur when there is only one page in this MR. This patch fix the number of page to search. Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process") Link: https://lore.kernel.org/r/20221126102911.2921820-4-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a8f8c790d31d..41835cb05983 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3274,7 +3274,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, int i, count; count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, - ARRAY_SIZE(pages), &pbl_ba); + min_t(int, ARRAY_SIZE(pages), mr->npages), + &pbl_ba); if (count < 1) { ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", count); From 99dc5a0712883d5d13b620d25b3759d429577bc8 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:09 +0800 Subject: [PATCH 3496/4122] RDMA/hns: Fix page size cap from firmware Add verification to make sure the roce page size cap is supported by the system page size. Fixes: ba6bb7e97421 ("RDMA/hns: Add interfaces to get pf capabilities from firmware") Link: https://lore.kernel.org/r/20221126102911.2921820-5-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 41835cb05983..6e74735bbcf8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2345,6 +2345,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM); caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM); + if (!(caps->page_size_cap & PAGE_SIZE)) + caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; + return 0; } From 667d6164b84884c64de3fc18670cd5a98b0b10cf Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:10 +0800 Subject: [PATCH 3497/4122] RDMA/hns: Fix error code of CMD The error code is fixed to EIO when CMD fails to excute. This patch converts the error status reported by firmware to linux errno. Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Link: https://lore.kernel.org/r/20221126102911.2921820-6-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 26 +++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 +++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6e74735bbcf8..f32100c6f1d9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1277,6 +1277,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev) hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; } +static int hns_roce_cmd_err_convert_errno(u16 desc_ret) +{ + struct hns_roce_cmd_errcode errcode_table[] = { + {CMD_EXEC_SUCCESS, 0}, + {CMD_NO_AUTH, -EPERM}, + {CMD_NOT_EXIST, -EOPNOTSUPP}, + {CMD_CRQ_FULL, -EXFULL}, + {CMD_NEXT_ERR, -ENOSR}, + {CMD_NOT_EXEC, -ENOTBLK}, + {CMD_PARA_ERR, -EINVAL}, + {CMD_RESULT_ERR, -ERANGE}, + {CMD_TIMEOUT, -ETIME}, + {CMD_HILINK_ERR, -ENOLINK}, + {CMD_INFO_ILLEGAL, -ENXIO}, + {CMD_INVALID, -EBADR}, + }; + u16 i; + + for (i = 0; i < ARRAY_SIZE(errcode_table); i++) + if (desc_ret == errcode_table[i].return_status) + return errcode_table[i].errno; + return -EIO; +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1322,7 +1346,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, dev_err_ratelimited(hr_dev->dev, "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", desc->opcode, desc_ret); - ret = -EIO; + ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { /* FW/HW reset or incorrect number of desc */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 017462e52843..47fad456839d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -273,6 +273,11 @@ enum hns_roce_cmd_return_status { CMD_OTHER_ERR = 0xff }; +struct hns_roce_cmd_errcode { + enum hns_roce_cmd_return_status return_status; + int errno; +}; + enum hns_roce_sgid_type { GID_TYPE_FLAG_ROCE_V1 = 0, GID_TYPE_FLAG_ROCE_V2_IPV4, From 682c0722addae4b4a1440c9db9d8c86cb8e09ce5 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:11 +0800 Subject: [PATCH 3498/4122] RDMA/hns: Fix XRC caps on HIP08 XRC caps has been set by default. But in fact, XRC is not supported in HIP08. Fixes: 32548870d438 ("RDMA/hns: Add support for XRC on HIP09") Link: https://lore.kernel.org/r/20221126102911.2921820-7-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f32100c6f1d9..2716852f5e92 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2051,13 +2051,14 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC; + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { caps->flags |= HNS_ROCE_CAP_FLAG_STASH | - HNS_ROCE_CAP_FLAG_DIRECT_WQE; + HNS_ROCE_CAP_FLAG_DIRECT_WQE | + HNS_ROCE_CAP_FLAG_XRC; caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; } else { caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; From 6cfe7bd0dfd33033683639039b5608d6534c19eb Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 8 Dec 2022 05:19:54 -0500 Subject: [PATCH 3499/4122] RDMA/mlx5: Remove not-used IB_FLOW_SPEC_IB define IB_FLOW_SPEC_IB is not used in mlx5 and can be deleted. Signed-off-by: Zhu Yanjun Link: https://lore.kernel.org/r/20221208101954.687960-1-yanjun.zhu@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/fs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 490ec308e309..3008632a6c20 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -127,7 +127,6 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) } #define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port From 74d58cd48a8f5848dfda8bc09d11c90f3ea42b0e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Dec 2022 10:07:46 +0100 Subject: [PATCH 3500/4122] USB: sisusbvga: remove console support It was marked as BROKEN since commit 862ee699fefe (USB: sisusbvga: Make console support depend on BROKEN) 2 years ago. Since noone stepped up to fix it, remove it completely. Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Winischhofer Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221208090749.28056-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - drivers/usb/misc/sisusbvga/Kconfig | 34 - drivers/usb/misc/sisusbvga/Makefile | 1 - drivers/usb/misc/sisusbvga/sisusb.c | 276 +--- drivers/usb/misc/sisusbvga/sisusb.h | 21 - drivers/usb/misc/sisusbvga/sisusb_con.c | 1496 ---------------------- drivers/usb/misc/sisusbvga/sisusb_init.c | 955 -------------- drivers/usb/misc/sisusbvga/sisusb_init.h | 180 --- 9 files changed, 6 insertions(+), 2959 deletions(-) delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_con.c delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_init.c delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_init.h diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index d23deb94b36e..f73c98be56c8 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -912,7 +912,6 @@ CONFIG_USB_IDMOUSE=m CONFIG_USB_FTDI_ELAN=m CONFIG_USB_APPLEDISPLAY=m CONFIG_USB_SISUSBVGA=m -CONFIG_USB_SISUSBVGA_CON=y CONFIG_USB_LD=m CONFIG_USB_TRANCEVIBRATOR=m CONFIG_USB_IOWARRIOR=m diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 492a0a2e0e36..7037320b654a 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -92,7 +92,6 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m -CONFIG_USB_SISUSBVGA_CON=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set diff --git a/drivers/usb/misc/sisusbvga/Kconfig b/drivers/usb/misc/sisusbvga/Kconfig index c12cdd015410..42f81c8eaa92 100644 --- a/drivers/usb/misc/sisusbvga/Kconfig +++ b/drivers/usb/misc/sisusbvga/Kconfig @@ -3,7 +3,6 @@ config USB_SISUSBVGA tristate "USB 2.0 SVGA dongle support (Net2280/SiS315)" depends on (USB_MUSB_HDRC || USB_EHCI_HCD) - select FONT_SUPPORT if USB_SISUSBVGA_CON help Say Y here if you intend to attach a USB2VGA dongle based on a Net2280 and a SiS315 chip. @@ -13,36 +12,3 @@ config USB_SISUSBVGA To compile this driver as a module, choose M here; the module will be called sisusbvga. If unsure, say N. - -config USB_SISUSBVGA_CON - bool "Text console and mode switching support" if USB_SISUSBVGA - depends on VT && BROKEN - select FONT_8x16 - help - Say Y here if you want a VGA text console via the USB dongle or - want to support userland applications that utilize the driver's - display mode switching capabilities. - - Note that this console supports VGA/EGA text mode only. - - By default, the console part of the driver will not kick in when - the driver is initialized. If you want the driver to take over - one or more of the consoles, you need to specify the number of - the first and last consoles (starting at 1) as driver parameters. - - For example, if the driver is compiled as a module: - - modprobe sisusbvga first=1 last=5 - - If you use hotplug, add this to your modutils config files with - the "options" keyword, such as eg. - - options sisusbvga first=1 last=5 - - If the driver is compiled into the kernel image, the parameters - must be given in the kernel command like, such as - - sisusbvga.first=1 sisusbvga.last=5 - - - diff --git a/drivers/usb/misc/sisusbvga/Makefile b/drivers/usb/misc/sisusbvga/Makefile index 6551bce68ac5..93265de80eb9 100644 --- a/drivers/usb/misc/sisusbvga/Makefile +++ b/drivers/usb/misc/sisusbvga/Makefile @@ -6,4 +6,3 @@ obj-$(CONFIG_USB_SISUSBVGA) += sisusbvga.o sisusbvga-y := sisusb.o -sisusbvga-$(CONFIG_USB_SISUSBVGA_CON) += sisusb_con.o sisusb_init.o diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index f08de33d9ff3..a0d5ba8058f8 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -51,25 +51,11 @@ #include #include "sisusb.h" -#include "sisusb_init.h" - -#ifdef CONFIG_USB_SISUSBVGA_CON -#include -#endif #define SISUSB_DONTSYNC /* Forward declarations / clean-up routines */ -#ifdef CONFIG_USB_SISUSBVGA_CON -static int sisusb_first_vc; -static int sisusb_last_vc; -module_param_named(first, sisusb_first_vc, int, 0); -module_param_named(last, sisusb_last_vc, int, 0); -MODULE_PARM_DESC(first, "Number of first console to take over (1 - MAX_NR_CONSOLES)"); -MODULE_PARM_DESC(last, "Number of last console to take over (1 - MAX_NR_CONSOLES)"); -#endif - static struct usb_driver sisusb_driver; static void sisusb_free_buffers(struct sisusb_usb_data *sisusb) @@ -1198,19 +1184,7 @@ static int sisusb_read_mem_bulk(struct sisusb_usb_data *sisusb, u32 addr, /* High level: Gfx (indexed) register access */ -#ifdef CONFIG_USB_SISUSBVGA_CON -int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data) -{ - return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); -} - -int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 *data) -{ - return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); -} -#endif - -int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, +static int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 data) { int ret; @@ -1220,7 +1194,7 @@ int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, return ret; } -int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, +static int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 *data) { int ret; @@ -1230,7 +1204,7 @@ int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, return ret; } -int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, +static int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand, u8 myor) { int ret; @@ -1258,13 +1232,13 @@ static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb, return ret; } -int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, +static int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 myor) { return sisusb_setidxregandor(sisusb, port, index, 0xff, myor); } -int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, +static int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand) { return sisusb_setidxregandor(sisusb, port, idx, myand, 0x00); @@ -1272,38 +1246,6 @@ int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, /* Write/read video ram */ -#ifdef CONFIG_USB_SISUSBVGA_CON -int sisusb_writeb(struct sisusb_usb_data *sisusb, u32 adr, u8 data) -{ - return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_MEM, adr, data); -} - -int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 *data) -{ - return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_MEM, adr, data); -} - -int sisusb_copy_memory(struct sisusb_usb_data *sisusb, u8 *src, - u32 dest, int length) -{ - size_t dummy; - - return sisusb_write_mem_bulk(sisusb, dest, src, length, - NULL, 0, &dummy); -} - -#ifdef SISUSBENDIANTEST -static int sisusb_read_memory(struct sisusb_usb_data *sisusb, char *dest, - u32 src, int length) -{ - size_t dummy; - - return sisusb_read_mem_bulk(sisusb, src, dest, length, - NULL, &dummy); -} -#endif -#endif - #ifdef SISUSBENDIANTEST static void sisusb_testreadwrite(struct sisusb_usb_data *sisusb) { @@ -2252,131 +2194,6 @@ static int sisusb_init_gfxdevice(struct sisusb_usb_data *sisusb, int initscreen) return ret; } - -#ifdef CONFIG_USB_SISUSBVGA_CON - -/* Set up default text mode: - * - Set text mode (0x03) - * - Upload default font - * - Upload user font (if available) - */ - -int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init) -{ - int ret = 0, slot = sisusb->font_slot, i; - const struct font_desc *myfont; - u8 *tempbuf; - u16 *tempbufb; - static const char bootstring[] = - "SiSUSB VGA text console, (C) 2005 Thomas Winischhofer."; - static const char bootlogo[] = "(o_ //\\ V_/_"; - - /* sisusb->lock is down */ - - if (!sisusb->SiS_Pr) - return 1; - - sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30; - sisusb->SiS_Pr->sisusb = (void *)sisusb; - - /* Set mode 0x03 */ - SiSUSBSetMode(sisusb->SiS_Pr, 0x03); - - myfont = find_font("VGA8x16"); - if (!myfont) - return 1; - - tempbuf = vmalloc(8192); - if (!tempbuf) - return 1; - - for (i = 0; i < 256; i++) - memcpy(tempbuf + (i * 32), myfont->data + (i * 16), 16); - - /* Upload default font */ - ret = sisusbcon_do_font_op(sisusb, 1, 0, tempbuf, 8192, - 0, 1, NULL, 16, 0); - - vfree(tempbuf); - - /* Upload user font (and reset current slot) */ - if (sisusb->font_backup) { - ret |= sisusbcon_do_font_op(sisusb, 1, 2, sisusb->font_backup, - 8192, sisusb->font_backup_512, 1, NULL, - sisusb->font_backup_height, 0); - if (slot != 2) - sisusbcon_do_font_op(sisusb, 1, 0, NULL, 0, 0, 1, - NULL, 16, 0); - } - - if (init && !sisusb->scrbuf) { - - tempbuf = vmalloc(8192); - if (tempbuf) { - - i = 4096; - tempbufb = (u16 *)tempbuf; - while (i--) - *(tempbufb++) = 0x0720; - - i = 0; - tempbufb = (u16 *)tempbuf; - while (bootlogo[i]) { - *(tempbufb++) = 0x0700 | bootlogo[i++]; - if (!(i % 4)) - tempbufb += 76; - } - - i = 0; - tempbufb = (u16 *)tempbuf + 6; - while (bootstring[i]) - *(tempbufb++) = 0x0700 | bootstring[i++]; - - ret |= sisusb_copy_memory(sisusb, tempbuf, - sisusb->vrambase, 8192); - - vfree(tempbuf); - - } - - } else if (sisusb->scrbuf) { - ret |= sisusb_copy_memory(sisusb, (u8 *)sisusb->scrbuf, - sisusb->vrambase, sisusb->scrbuf_size); - } - - if (sisusb->sisusb_cursor_size_from >= 0 && - sisusb->sisusb_cursor_size_to >= 0) { - sisusb_setidxreg(sisusb, SISCR, 0x0a, - sisusb->sisusb_cursor_size_from); - sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0xe0, - sisusb->sisusb_cursor_size_to); - } else { - sisusb_setidxreg(sisusb, SISCR, 0x0a, 0x2d); - sisusb_setidxreg(sisusb, SISCR, 0x0b, 0x0e); - sisusb->sisusb_cursor_size_to = -1; - } - - slot = sisusb->sisusb_cursor_loc; - if (slot < 0) - slot = 0; - - sisusb->sisusb_cursor_loc = -1; - sisusb->bad_cursor_pos = 1; - - sisusb_set_cursor(sisusb, slot); - - sisusb_setidxreg(sisusb, SISCR, 0x0c, (sisusb->cur_start_addr >> 8)); - sisusb_setidxreg(sisusb, SISCR, 0x0d, (sisusb->cur_start_addr & 0xff)); - - sisusb->textmodedestroyed = 0; - - /* sisusb->lock is down */ - - return ret; -} - -#endif - /* fops */ static int sisusb_open(struct inode *inode, struct file *file) @@ -2434,7 +2251,7 @@ static int sisusb_open(struct inode *inode, struct file *file) return 0; } -void sisusb_delete(struct kref *kref) +static void sisusb_delete(struct kref *kref) { struct sisusb_usb_data *sisusb = to_sisusb_dev(kref); @@ -2446,9 +2263,6 @@ void sisusb_delete(struct kref *kref) sisusb->sisusb_dev = NULL; sisusb_free_buffers(sisusb); sisusb_free_urbs(sisusb); -#ifdef CONFIG_USB_SISUSBVGA_CON - kfree(sisusb->SiS_Pr); -#endif kfree(sisusb); } @@ -2842,54 +2656,8 @@ static int sisusb_handle_command(struct sisusb_usb_data *sisusb, case SUCMD_HANDLETEXTMODE: retval = 0; -#ifdef CONFIG_USB_SISUSBVGA_CON - /* Gfx core must be initialized, SiS_Pr must exist */ - if (!sisusb->gfxinit || !sisusb->SiS_Pr) - return -ENODEV; - - switch (y->data0) { - case 0: - retval = sisusb_reset_text_mode(sisusb, 0); - break; - case 1: - sisusb->textmodedestroyed = 1; - break; - } -#endif break; -#ifdef CONFIG_USB_SISUSBVGA_CON - case SUCMD_SETMODE: - /* Gfx core must be initialized, SiS_Pr must exist */ - if (!sisusb->gfxinit || !sisusb->SiS_Pr) - return -ENODEV; - - retval = 0; - - sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30; - sisusb->SiS_Pr->sisusb = (void *)sisusb; - - if (SiSUSBSetMode(sisusb->SiS_Pr, y->data3)) - retval = -EINVAL; - - break; - - case SUCMD_SETVESAMODE: - /* Gfx core must be initialized, SiS_Pr must exist */ - if (!sisusb->gfxinit || !sisusb->SiS_Pr) - return -ENODEV; - - retval = 0; - - sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30; - sisusb->SiS_Pr->sisusb = (void *)sisusb; - - if (SiSUSBSetVESAMode(sisusb->SiS_Pr, y->data3)) - retval = -EINVAL; - - break; -#endif - default: retval = -EINVAL; } @@ -2942,11 +2710,7 @@ static long sisusb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) x.sisusb_vramsize = sisusb->vramsize; x.sisusb_minor = sisusb->minor; x.sisusb_fbdevactive = 0; -#ifdef CONFIG_USB_SISUSBVGA_CON - x.sisusb_conactive = sisusb->haveconsole ? 1 : 0; -#else x.sisusb_conactive = 0; -#endif memset(x.sisusb_reserved, 0, sizeof(x.sisusb_reserved)); if (copy_to_user((void __user *)arg, &x, sizeof(x))) @@ -3090,15 +2854,6 @@ static int sisusb_probe(struct usb_interface *intf, dev_info(&sisusb->sisusb_dev->dev, "Allocated %d output buffers\n", sisusb->numobufs); -#ifdef CONFIG_USB_SISUSBVGA_CON - /* Allocate our SiS_Pr */ - sisusb->SiS_Pr = kmalloc(sizeof(struct SiS_Private), GFP_KERNEL); - if (!sisusb->SiS_Pr) { - retval = -ENOMEM; - goto error_4; - } -#endif - /* Do remaining init stuff */ init_waitqueue_head(&sisusb->wait_q); @@ -3111,12 +2866,6 @@ static int sisusb_probe(struct usb_interface *intf, if (dev->speed == USB_SPEED_HIGH || dev->speed >= USB_SPEED_SUPER) { int initscreen = 1; -#ifdef CONFIG_USB_SISUSBVGA_CON - if (sisusb_first_vc > 0 && sisusb_last_vc > 0 && - sisusb_first_vc <= sisusb_last_vc && - sisusb_last_vc <= MAX_NR_CONSOLES) - initscreen = 0; -#endif if (sisusb_init_gfxdevice(sisusb, initscreen)) dev_err(&sisusb->sisusb_dev->dev, "Failed to early initialize device\n"); @@ -3133,10 +2882,6 @@ static int sisusb_probe(struct usb_interface *intf, dev_dbg(&sisusb->sisusb_dev->dev, "*** RWTEST END ***\n"); #endif -#ifdef CONFIG_USB_SISUSBVGA_CON - sisusb_console_init(sisusb, sisusb_first_vc, sisusb_last_vc); -#endif - return 0; error_4: @@ -3159,10 +2904,6 @@ static void sisusb_disconnect(struct usb_interface *intf) if (!sisusb) return; -#ifdef CONFIG_USB_SISUSBVGA_CON - sisusb_console_exit(sisusb); -#endif - usb_deregister_dev(intf, &usb_sisusb_class); mutex_lock(&sisusb->lock); @@ -3208,11 +2949,6 @@ static struct usb_driver sisusb_driver = { static int __init usb_sisusb_init(void) { - -#ifdef CONFIG_USB_SISUSBVGA_CON - sisusb_init_concode(); -#endif - return usb_register(&sisusb_driver); } diff --git a/drivers/usb/misc/sisusbvga/sisusb.h b/drivers/usb/misc/sisusbvga/sisusb.h index c0fb9e1c5361..e5b1228655d0 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.h +++ b/drivers/usb/misc/sisusbvga/sisusb.h @@ -48,7 +48,6 @@ /* Include console and mode switching code? */ -#include #include #include "sisusb_struct.h" @@ -126,26 +125,6 @@ struct sisusb_usb_data { unsigned char gfxinit; /* graphics core initialized? */ unsigned short chipid, chipvendor; unsigned short chiprevision; -#ifdef CONFIG_USB_SISUSBVGA_CON - struct SiS_Private *SiS_Pr; - unsigned long scrbuf; - unsigned int scrbuf_size; - int haveconsole, con_first, con_last; - int havethisconsole[MAX_NR_CONSOLES]; - int textmodedestroyed; - unsigned int sisusb_num_columns; /* real number, not vt's idea */ - int cur_start_addr, con_rolled_over; - int sisusb_cursor_loc, bad_cursor_pos; - int sisusb_cursor_size_from; - int sisusb_cursor_size_to; - int current_font_height, current_font_512; - int font_backup_size, font_backup_height, font_backup_512; - char *font_backup; - int font_slot; - struct vc_data *sisusb_display_fg; - int is_gfx; - int con_blanked; -#endif }; #define to_sisusb_dev(d) container_of(d, struct sisusb_usb_data, kref) diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c deleted file mode 100644 index fcb95fb639e0..000000000000 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ /dev/null @@ -1,1496 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -/* - * sisusb - usb kernel driver for SiS315(E) based USB2VGA dongles - * - * VGA text mode console part - * - * Copyright (C) 2005 by Thomas Winischhofer, Vienna, Austria - * - * If distributed as part of the Linux kernel, this code is licensed under the - * terms of the GPL v2. - * - * Otherwise, the following license terms apply: - * - * * Redistribution and use in source and binary forms, with or without - * * modification, are permitted provided that the following conditions - * * are met: - * * 1) Redistributions of source code must retain the above copyright - * * notice, this list of conditions and the following disclaimer. - * * 2) Redistributions in binary form must reproduce the above copyright - * * notice, this list of conditions and the following disclaimer in the - * * documentation and/or other materials provided with the distribution. - * * 3) The name of the author may not be used to endorse or promote products - * * derived from this software without specific psisusbr written permission. - * * - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESSED OR - * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Author: Thomas Winischhofer - * - * Portions based on vgacon.c which are - * Created 28 Sep 1997 by Geert Uytterhoeven - * Rewritten by Martin Mares , July 1998 - * based on code Copyright (C) 1991, 1992 Linus Torvalds - * 1995 Jay Estabrook - * - * A note on using in_atomic() in here: We can't handle console - * calls from non-schedulable context due to our USB-dependend - * nature. For now, this driver just ignores any calls if it - * detects this state. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sisusb.h" -#include "sisusb_init.h" - -/* vc_data -> sisusb conversion table */ -static struct sisusb_usb_data *mysisusbs[MAX_NR_CONSOLES]; - -/* Forward declaration */ -static const struct consw sisusb_con; - -static inline void -sisusbcon_memsetw(u16 *s, u16 c, unsigned int count) -{ - memset16(s, c, count / 2); -} - -static inline void -sisusb_initialize(struct sisusb_usb_data *sisusb) -{ - /* Reset cursor and start address */ - if (sisusb_setidxreg(sisusb, SISCR, 0x0c, 0x00)) - return; - if (sisusb_setidxreg(sisusb, SISCR, 0x0d, 0x00)) - return; - if (sisusb_setidxreg(sisusb, SISCR, 0x0e, 0x00)) - return; - sisusb_setidxreg(sisusb, SISCR, 0x0f, 0x00); -} - -static inline void -sisusbcon_set_start_address(struct sisusb_usb_data *sisusb, struct vc_data *c) -{ - sisusb->cur_start_addr = (c->vc_visible_origin - sisusb->scrbuf) / 2; - - sisusb_setidxreg(sisusb, SISCR, 0x0c, (sisusb->cur_start_addr >> 8)); - sisusb_setidxreg(sisusb, SISCR, 0x0d, (sisusb->cur_start_addr & 0xff)); -} - -void -sisusb_set_cursor(struct sisusb_usb_data *sisusb, unsigned int location) -{ - if (sisusb->sisusb_cursor_loc == location) - return; - - sisusb->sisusb_cursor_loc = location; - - /* Hardware bug: Text cursor appears twice or not at all - * at some positions. Work around it with the cursor skew - * bits. - */ - - if ((location & 0x0007) == 0x0007) { - sisusb->bad_cursor_pos = 1; - location--; - if (sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0x1f, 0x20)) - return; - } else if (sisusb->bad_cursor_pos) { - if (sisusb_setidxregand(sisusb, SISCR, 0x0b, 0x1f)) - return; - sisusb->bad_cursor_pos = 0; - } - - if (sisusb_setidxreg(sisusb, SISCR, 0x0e, (location >> 8))) - return; - sisusb_setidxreg(sisusb, SISCR, 0x0f, (location & 0xff)); -} - -static inline struct sisusb_usb_data * -sisusb_get_sisusb(unsigned short console) -{ - return mysisusbs[console]; -} - -static inline int -sisusb_sisusb_valid(struct sisusb_usb_data *sisusb) -{ - if (!sisusb->present || !sisusb->ready || !sisusb->sisusb_dev) - return 0; - - return 1; -} - -static struct sisusb_usb_data * -sisusb_get_sisusb_lock_and_check(unsigned short console) -{ - struct sisusb_usb_data *sisusb; - - /* We can't handle console calls in non-schedulable - * context due to our locks and the USB transport. - * So we simply ignore them. This should only affect - * some calls to printk. - */ - if (in_atomic()) - return NULL; - - sisusb = sisusb_get_sisusb(console); - if (!sisusb) - return NULL; - - mutex_lock(&sisusb->lock); - - if (!sisusb_sisusb_valid(sisusb) || - !sisusb->havethisconsole[console]) { - mutex_unlock(&sisusb->lock); - return NULL; - } - - return sisusb; -} - -static int -sisusb_is_inactive(struct vc_data *c, struct sisusb_usb_data *sisusb) -{ - if (sisusb->is_gfx || - sisusb->textmodedestroyed || - c->vc_mode != KD_TEXT) - return 1; - - return 0; -} - -/* con_startup console interface routine */ -static const char * -sisusbcon_startup(void) -{ - return "SISUSBCON"; -} - -/* con_init console interface routine */ -static void -sisusbcon_init(struct vc_data *c, int init) -{ - struct sisusb_usb_data *sisusb; - int cols, rows; - - /* This is called by do_take_over_console(), - * ie by us/under our control. It is - * only called after text mode and fonts - * are set up/restored. - */ - - sisusb = sisusb_get_sisusb(c->vc_num); - if (!sisusb) - return; - - mutex_lock(&sisusb->lock); - - if (!sisusb_sisusb_valid(sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - c->vc_can_do_color = 1; - - c->vc_complement_mask = 0x7700; - - c->vc_hi_font_mask = sisusb->current_font_512 ? 0x0800 : 0; - - sisusb->haveconsole = 1; - - sisusb->havethisconsole[c->vc_num] = 1; - - /* We only support 640x400 */ - c->vc_scan_lines = 400; - - c->vc_font.height = sisusb->current_font_height; - - /* We only support width = 8 */ - cols = 80; - rows = c->vc_scan_lines / c->vc_font.height; - - /* Increment usage count for our sisusb. - * Doing so saves us from upping/downing - * the disconnect semaphore; we can't - * lose our sisusb until this is undone - * in con_deinit. For all other console - * interface functions, it suffices to - * use sisusb->lock and do a quick check - * of sisusb for device disconnection. - */ - kref_get(&sisusb->kref); - - if (!*c->uni_pagedict_loc) - con_set_default_unimap(c); - - mutex_unlock(&sisusb->lock); - - if (init) { - c->vc_cols = cols; - c->vc_rows = rows; - } else - vc_resize(c, cols, rows); -} - -/* con_deinit console interface routine */ -static void -sisusbcon_deinit(struct vc_data *c) -{ - struct sisusb_usb_data *sisusb; - int i; - - /* This is called by do_take_over_console() - * and others, ie not under our control. - */ - - sisusb = sisusb_get_sisusb(c->vc_num); - if (!sisusb) - return; - - mutex_lock(&sisusb->lock); - - /* Clear ourselves in mysisusbs */ - mysisusbs[c->vc_num] = NULL; - - sisusb->havethisconsole[c->vc_num] = 0; - - /* Free our font buffer if all consoles are gone */ - if (sisusb->font_backup) { - for(i = 0; i < MAX_NR_CONSOLES; i++) { - if (sisusb->havethisconsole[c->vc_num]) - break; - } - if (i == MAX_NR_CONSOLES) { - vfree(sisusb->font_backup); - sisusb->font_backup = NULL; - } - } - - mutex_unlock(&sisusb->lock); - - /* decrement the usage count on our sisusb */ - kref_put(&sisusb->kref, sisusb_delete); -} - -/* interface routine */ -static u8 -sisusbcon_build_attr(struct vc_data *c, u8 color, enum vc_intensity intensity, - bool blink, bool underline, bool reverse, - bool unused) -{ - u8 attr = color; - - if (underline) - attr = (attr & 0xf0) | c->vc_ulcolor; - else if (intensity == VCI_HALF_BRIGHT) - attr = (attr & 0xf0) | c->vc_halfcolor; - - if (reverse) - attr = ((attr) & 0x88) | - ((((attr) >> 4) | - ((attr) << 4)) & 0x77); - - if (blink) - attr ^= 0x80; - - if (intensity == VCI_BOLD) - attr ^= 0x08; - - return attr; -} - -/* Interface routine */ -static void -sisusbcon_invert_region(struct vc_data *vc, u16 *p, int count) -{ - /* Invert a region. This is called with a pointer - * to the console's internal screen buffer. So we - * simply do the inversion there and rely on - * a call to putc(s) to update the real screen. - */ - - while (count--) { - u16 a = *p; - - *p++ = ((a) & 0x88ff) | - (((a) & 0x7000) >> 4) | - (((a) & 0x0700) << 4); - } -} - -static inline void *sisusb_vaddr(const struct sisusb_usb_data *sisusb, - const struct vc_data *c, unsigned int x, unsigned int y) -{ - return (u16 *)c->vc_origin + y * sisusb->sisusb_num_columns + x; -} - -static inline unsigned long sisusb_haddr(const struct sisusb_usb_data *sisusb, - const struct vc_data *c, unsigned int x, unsigned int y) -{ - unsigned long offset = c->vc_origin - sisusb->scrbuf; - - /* 2 bytes per each character */ - offset += 2 * (y * sisusb->sisusb_num_columns + x); - - return sisusb->vrambase + offset; -} - -/* Interface routine */ -static void -sisusbcon_putc(struct vc_data *c, int ch, int y, int x) -{ - struct sisusb_usb_data *sisusb; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y), - sisusb_haddr(sisusb, c, x, y), 2); - - mutex_unlock(&sisusb->lock); -} - -/* Interface routine */ -static void -sisusbcon_putcs(struct vc_data *c, const unsigned short *s, - int count, int y, int x) -{ - struct sisusb_usb_data *sisusb; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - /* Need to put the characters into the buffer ourselves, - * because the vt does this AFTER calling us. - */ - - memcpy(sisusb_vaddr(sisusb, c, x, y), s, count * 2); - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y), - sisusb_haddr(sisusb, c, x, y), count * 2); - - mutex_unlock(&sisusb->lock); -} - -/* Interface routine */ -static void -sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width) -{ - struct sisusb_usb_data *sisusb; - u16 eattr = c->vc_video_erase_char; - int i, length, cols; - u16 *dest; - - if (width <= 0 || height <= 0) - return; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - /* Need to clear buffer ourselves, because the vt does - * this AFTER calling us. - */ - - dest = sisusb_vaddr(sisusb, c, x, y); - - cols = sisusb->sisusb_num_columns; - - if (width > cols) - width = cols; - - if (x == 0 && width >= c->vc_cols) { - - sisusbcon_memsetw(dest, eattr, height * cols * 2); - - } else { - - for (i = height; i > 0; i--, dest += cols) - sisusbcon_memsetw(dest, eattr, width * 2); - - } - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - length = ((height * cols) - x - (cols - width - x)) * 2; - - - sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y), - sisusb_haddr(sisusb, c, x, y), length); - - mutex_unlock(&sisusb->lock); -} - -/* interface routine */ -static int -sisusbcon_switch(struct vc_data *c) -{ - struct sisusb_usb_data *sisusb; - int length; - - /* Returnvalue 0 means we have fully restored screen, - * and vt doesn't need to call do_update_region(). - * Returnvalue != 0 naturally means the opposite. - */ - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return 0; - - /* sisusb->lock is down */ - - /* Don't write to screen if in gfx mode */ - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return 0; - } - - /* That really should not happen. It would mean we are - * being called while the vc is using its private buffer - * as origin. - */ - if (c->vc_origin == (unsigned long)c->vc_screenbuf) { - mutex_unlock(&sisusb->lock); - dev_dbg(&sisusb->sisusb_dev->dev, "ASSERT ORIGIN != SCREENBUF!\n"); - return 0; - } - - /* Check that we don't copy too much */ - length = min((int)c->vc_screenbuf_size, - (int)(sisusb->scrbuf + sisusb->scrbuf_size - c->vc_origin)); - - /* Restore the screen contents */ - memcpy((u16 *)c->vc_origin, (u16 *)c->vc_screenbuf, length); - - sisusb_copy_memory(sisusb, (u8 *)c->vc_origin, - sisusb_haddr(sisusb, c, 0, 0), length); - - mutex_unlock(&sisusb->lock); - - return 0; -} - -/* interface routine */ -static void -sisusbcon_save_screen(struct vc_data *c) -{ - struct sisusb_usb_data *sisusb; - int length; - - /* Save the current screen contents to vc's private - * buffer. - */ - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - /* Check that we don't copy too much */ - length = min((int)c->vc_screenbuf_size, - (int)(sisusb->scrbuf + sisusb->scrbuf_size - c->vc_origin)); - - /* Save the screen contents to vc's private buffer */ - memcpy((u16 *)c->vc_screenbuf, (u16 *)c->vc_origin, length); - - mutex_unlock(&sisusb->lock); -} - -/* interface routine */ -static void -sisusbcon_set_palette(struct vc_data *c, const unsigned char *table) -{ - struct sisusb_usb_data *sisusb; - int i, j; - - /* Return value not used by vt */ - - if (!con_is_visible(c)) - return; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - for (i = j = 0; i < 16; i++) { - if (sisusb_setreg(sisusb, SISCOLIDX, table[i])) - break; - if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2)) - break; - if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2)) - break; - if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2)) - break; - } - - mutex_unlock(&sisusb->lock); -} - -/* interface routine */ -static int -sisusbcon_blank(struct vc_data *c, int blank, int mode_switch) -{ - struct sisusb_usb_data *sisusb; - u8 sr1, cr17, pmreg, cr63; - int ret = 0; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return 0; - - /* sisusb->lock is down */ - - if (mode_switch) - sisusb->is_gfx = blank ? 1 : 0; - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return 0; - } - - switch (blank) { - - case 1: /* Normal blanking: Clear screen */ - case -1: - sisusbcon_memsetw((u16 *)c->vc_origin, - c->vc_video_erase_char, - c->vc_screenbuf_size); - sisusb_copy_memory(sisusb, (u8 *)c->vc_origin, - sisusb_haddr(sisusb, c, 0, 0), - c->vc_screenbuf_size); - sisusb->con_blanked = 1; - ret = 1; - break; - - default: /* VESA blanking */ - switch (blank) { - case 0: /* Unblank */ - sr1 = 0x00; - cr17 = 0x80; - pmreg = 0x00; - cr63 = 0x00; - ret = 1; - sisusb->con_blanked = 0; - break; - case VESA_VSYNC_SUSPEND + 1: - sr1 = 0x20; - cr17 = 0x80; - pmreg = 0x80; - cr63 = 0x40; - break; - case VESA_HSYNC_SUSPEND + 1: - sr1 = 0x20; - cr17 = 0x80; - pmreg = 0x40; - cr63 = 0x40; - break; - case VESA_POWERDOWN + 1: - sr1 = 0x20; - cr17 = 0x00; - pmreg = 0xc0; - cr63 = 0x40; - break; - default: - mutex_unlock(&sisusb->lock); - return -EINVAL; - } - - sisusb_setidxregandor(sisusb, SISSR, 0x01, ~0x20, sr1); - sisusb_setidxregandor(sisusb, SISCR, 0x17, 0x7f, cr17); - sisusb_setidxregandor(sisusb, SISSR, 0x1f, 0x3f, pmreg); - sisusb_setidxregandor(sisusb, SISCR, 0x63, 0xbf, cr63); - - } - - mutex_unlock(&sisusb->lock); - - return ret; -} - -/* interface routine */ -static void -sisusbcon_scrolldelta(struct vc_data *c, int lines) -{ - struct sisusb_usb_data *sisusb; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - vc_scrolldelta_helper(c, lines, sisusb->con_rolled_over, - (void *)sisusb->scrbuf, sisusb->scrbuf_size); - - sisusbcon_set_start_address(sisusb, c); - - mutex_unlock(&sisusb->lock); -} - -/* Interface routine */ -static void -sisusbcon_cursor(struct vc_data *c, int mode) -{ - struct sisusb_usb_data *sisusb; - int from, to, baseline; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return; - } - - if (c->vc_origin != c->vc_visible_origin) { - c->vc_visible_origin = c->vc_origin; - sisusbcon_set_start_address(sisusb, c); - } - - if (mode == CM_ERASE) { - sisusb_setidxregor(sisusb, SISCR, 0x0a, 0x20); - sisusb->sisusb_cursor_size_to = -1; - mutex_unlock(&sisusb->lock); - return; - } - - sisusb_set_cursor(sisusb, (c->vc_pos - sisusb->scrbuf) / 2); - - baseline = c->vc_font.height - (c->vc_font.height < 10 ? 1 : 2); - - switch (CUR_SIZE(c->vc_cursor_type)) { - case CUR_BLOCK: from = 1; - to = c->vc_font.height; - break; - case CUR_TWO_THIRDS: from = c->vc_font.height / 3; - to = baseline; - break; - case CUR_LOWER_HALF: from = c->vc_font.height / 2; - to = baseline; - break; - case CUR_LOWER_THIRD: from = (c->vc_font.height * 2) / 3; - to = baseline; - break; - case CUR_NONE: from = 31; - to = 30; - break; - default: - case CUR_UNDERLINE: from = baseline - 1; - to = baseline; - break; - } - - if (sisusb->sisusb_cursor_size_from != from || - sisusb->sisusb_cursor_size_to != to) { - - sisusb_setidxreg(sisusb, SISCR, 0x0a, from); - sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0xe0, to); - - sisusb->sisusb_cursor_size_from = from; - sisusb->sisusb_cursor_size_to = to; - } - - mutex_unlock(&sisusb->lock); -} - -static bool -sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb, - unsigned int t, unsigned int b, enum con_scroll dir, - unsigned int lines) -{ - int cols = sisusb->sisusb_num_columns; - int length = ((b - t) * cols) * 2; - u16 eattr = c->vc_video_erase_char; - - /* sisusb->lock is down */ - - /* Scroll an area which does not match the - * visible screen's dimensions. This needs - * to be done separately, as it does not - * use hardware panning. - */ - - switch (dir) { - - case SM_UP: - memmove(sisusb_vaddr(sisusb, c, 0, t), - sisusb_vaddr(sisusb, c, 0, t + lines), - (b - t - lines) * cols * 2); - sisusbcon_memsetw(sisusb_vaddr(sisusb, c, 0, b - lines), - eattr, lines * cols * 2); - break; - - case SM_DOWN: - memmove(sisusb_vaddr(sisusb, c, 0, t + lines), - sisusb_vaddr(sisusb, c, 0, t), - (b - t - lines) * cols * 2); - sisusbcon_memsetw(sisusb_vaddr(sisusb, c, 0, t), eattr, - lines * cols * 2); - break; - } - - sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, 0, t), - sisusb_haddr(sisusb, c, 0, t), length); - - mutex_unlock(&sisusb->lock); - - return true; -} - -/* Interface routine */ -static bool -sisusbcon_scroll(struct vc_data *c, unsigned int t, unsigned int b, - enum con_scroll dir, unsigned int lines) -{ - struct sisusb_usb_data *sisusb; - u16 eattr = c->vc_video_erase_char; - int copyall = 0; - unsigned long oldorigin; - unsigned int delta = lines * c->vc_size_row; - - /* Returning != 0 means we have done the scrolling successfully. - * Returning 0 makes vt do the scrolling on its own. - * Note that con_scroll is only called if the console is - * visible. In that case, the origin should be our buffer, - * not the vt's private one. - */ - - if (!lines) - return true; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return false; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb)) { - mutex_unlock(&sisusb->lock); - return false; - } - - /* Special case */ - if (t || b != c->vc_rows) - return sisusbcon_scroll_area(c, sisusb, t, b, dir, lines); - - if (c->vc_origin != c->vc_visible_origin) { - c->vc_visible_origin = c->vc_origin; - sisusbcon_set_start_address(sisusb, c); - } - - /* limit amount to maximum realistic size */ - if (lines > c->vc_rows) - lines = c->vc_rows; - - oldorigin = c->vc_origin; - - switch (dir) { - - case SM_UP: - - if (c->vc_scr_end + delta >= - sisusb->scrbuf + sisusb->scrbuf_size) { - memcpy((u16 *)sisusb->scrbuf, - (u16 *)(oldorigin + delta), - c->vc_screenbuf_size - delta); - c->vc_origin = sisusb->scrbuf; - sisusb->con_rolled_over = oldorigin - sisusb->scrbuf; - copyall = 1; - } else - c->vc_origin += delta; - - sisusbcon_memsetw( - (u16 *)(c->vc_origin + c->vc_screenbuf_size - delta), - eattr, delta); - - break; - - case SM_DOWN: - - if (oldorigin - delta < sisusb->scrbuf) { - memmove((void *)sisusb->scrbuf + sisusb->scrbuf_size - - c->vc_screenbuf_size + delta, - (u16 *)oldorigin, - c->vc_screenbuf_size - delta); - c->vc_origin = sisusb->scrbuf + - sisusb->scrbuf_size - - c->vc_screenbuf_size; - sisusb->con_rolled_over = 0; - copyall = 1; - } else - c->vc_origin -= delta; - - c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - - scr_memsetw((u16 *)(c->vc_origin), eattr, delta); - - break; - } - - if (copyall) - sisusb_copy_memory(sisusb, - (u8 *)c->vc_origin, - sisusb_haddr(sisusb, c, 0, 0), - c->vc_screenbuf_size); - else if (dir == SM_UP) - sisusb_copy_memory(sisusb, - (u8 *)c->vc_origin + c->vc_screenbuf_size - delta, - sisusb_haddr(sisusb, c, 0, 0) + - c->vc_screenbuf_size - delta, - delta); - else - sisusb_copy_memory(sisusb, - (u8 *)c->vc_origin, - sisusb_haddr(sisusb, c, 0, 0), - delta); - - c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - c->vc_visible_origin = c->vc_origin; - - sisusbcon_set_start_address(sisusb, c); - - c->vc_pos = c->vc_pos - oldorigin + c->vc_origin; - - mutex_unlock(&sisusb->lock); - - return true; -} - -/* Interface routine */ -static int -sisusbcon_set_origin(struct vc_data *c) -{ - struct sisusb_usb_data *sisusb; - - /* Returning != 0 means we were successful. - * Returning 0 will vt make to use its own - * screenbuffer as the origin. - */ - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return 0; - - /* sisusb->lock is down */ - - if (sisusb_is_inactive(c, sisusb) || sisusb->con_blanked) { - mutex_unlock(&sisusb->lock); - return 0; - } - - c->vc_origin = c->vc_visible_origin = sisusb->scrbuf; - - sisusbcon_set_start_address(sisusb, c); - - sisusb->con_rolled_over = 0; - - mutex_unlock(&sisusb->lock); - - return true; -} - -/* Interface routine */ -static int -sisusbcon_resize(struct vc_data *c, unsigned int newcols, unsigned int newrows, - unsigned int user) -{ - struct sisusb_usb_data *sisusb; - int fh; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return -ENODEV; - - fh = sisusb->current_font_height; - - mutex_unlock(&sisusb->lock); - - /* We are quite unflexible as regards resizing. The vt code - * handles sizes where the line length isn't equal the pitch - * quite badly. As regards the rows, our panning tricks only - * work well if the number of rows equals the visible number - * of rows. - */ - - if (newcols != 80 || c->vc_scan_lines / fh != newrows) - return -EINVAL; - - return 0; -} - -int -sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot, - u8 *arg, int cmapsz, int ch512, int dorecalc, - struct vc_data *c, int fh, int uplock) -{ - int font_select = 0x00, i, err = 0; - u32 offset = 0; - u8 dummy; - - /* sisusb->lock is down */ - - /* - * The default font is kept in slot 0. - * A user font is loaded in slot 2 (256 ch) - * or 2+3 (512 ch). - */ - - if ((slot != 0 && slot != 2) || !fh) { - if (uplock) - mutex_unlock(&sisusb->lock); - return -EINVAL; - } - - if (set) - sisusb->font_slot = slot; - - /* Default font is always 256 */ - if (slot == 0) - ch512 = 0; - else - offset = 4 * cmapsz; - - font_select = (slot == 0) ? 0x00 : (ch512 ? 0x0e : 0x0a); - - err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x01); /* Reset */ - err |= sisusb_setidxreg(sisusb, SISSR, 0x02, 0x04); /* Write to plane 2 */ - err |= sisusb_setidxreg(sisusb, SISSR, 0x04, 0x07); /* Memory mode a0-bf */ - err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x03); /* Reset */ - - if (err) - goto font_op_error; - - err |= sisusb_setidxreg(sisusb, SISGR, 0x04, 0x03); /* Select plane read 2 */ - err |= sisusb_setidxreg(sisusb, SISGR, 0x05, 0x00); /* Disable odd/even */ - err |= sisusb_setidxreg(sisusb, SISGR, 0x06, 0x00); /* Address range a0-bf */ - - if (err) - goto font_op_error; - - if (arg) { - if (set) - for (i = 0; i < cmapsz; i++) { - err |= sisusb_writeb(sisusb, - sisusb->vrambase + offset + i, - arg[i]); - if (err) - break; - } - else - for (i = 0; i < cmapsz; i++) { - err |= sisusb_readb(sisusb, - sisusb->vrambase + offset + i, - &arg[i]); - if (err) - break; - } - - /* - * In 512-character mode, the character map is not contiguous if - * we want to remain EGA compatible -- which we do - */ - - if (ch512) { - if (set) - for (i = 0; i < cmapsz; i++) { - err |= sisusb_writeb(sisusb, - sisusb->vrambase + offset + - (2 * cmapsz) + i, - arg[cmapsz + i]); - if (err) - break; - } - else - for (i = 0; i < cmapsz; i++) { - err |= sisusb_readb(sisusb, - sisusb->vrambase + offset + - (2 * cmapsz) + i, - &arg[cmapsz + i]); - if (err) - break; - } - } - } - - if (err) - goto font_op_error; - - err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x01); /* Reset */ - err |= sisusb_setidxreg(sisusb, SISSR, 0x02, 0x03); /* Write to planes 0+1 */ - err |= sisusb_setidxreg(sisusb, SISSR, 0x04, 0x03); /* Memory mode a0-bf */ - if (set) - sisusb_setidxreg(sisusb, SISSR, 0x03, font_select); - err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x03); /* Reset end */ - - if (err) - goto font_op_error; - - err |= sisusb_setidxreg(sisusb, SISGR, 0x04, 0x00); /* Select plane read 0 */ - err |= sisusb_setidxreg(sisusb, SISGR, 0x05, 0x10); /* Enable odd/even */ - err |= sisusb_setidxreg(sisusb, SISGR, 0x06, 0x06); /* Address range b8-bf */ - - if (err) - goto font_op_error; - - if ((set) && (ch512 != sisusb->current_font_512)) { - - /* Font is shared among all our consoles. - * And so is the hi_font_mask. - */ - for (i = 0; i < MAX_NR_CONSOLES; i++) { - struct vc_data *d = vc_cons[i].d; - if (d && d->vc_sw == &sisusb_con) - d->vc_hi_font_mask = ch512 ? 0x0800 : 0; - } - - sisusb->current_font_512 = ch512; - - /* color plane enable register: - 256-char: enable intensity bit - 512-char: disable intensity bit */ - sisusb_getreg(sisusb, SISINPSTAT, &dummy); - sisusb_setreg(sisusb, SISAR, 0x12); - sisusb_setreg(sisusb, SISAR, ch512 ? 0x07 : 0x0f); - - sisusb_getreg(sisusb, SISINPSTAT, &dummy); - sisusb_setreg(sisusb, SISAR, 0x20); - sisusb_getreg(sisusb, SISINPSTAT, &dummy); - } - - if (dorecalc) { - - /* - * Adjust the screen to fit a font of a certain height - */ - - unsigned char ovr, vde, fsr; - int rows = 0, maxscan = 0; - - if (c) { - - /* Number of video rows */ - rows = c->vc_scan_lines / fh; - /* Scan lines to actually display-1 */ - maxscan = rows * fh - 1; - - /*printk(KERN_DEBUG "sisusb recalc rows %d maxscan %d fh %d sl %d\n", - rows, maxscan, fh, c->vc_scan_lines);*/ - - sisusb_getidxreg(sisusb, SISCR, 0x07, &ovr); - vde = maxscan & 0xff; - ovr = (ovr & 0xbd) | - ((maxscan & 0x100) >> 7) | - ((maxscan & 0x200) >> 3); - sisusb_setidxreg(sisusb, SISCR, 0x07, ovr); - sisusb_setidxreg(sisusb, SISCR, 0x12, vde); - - } - - sisusb_getidxreg(sisusb, SISCR, 0x09, &fsr); - fsr = (fsr & 0xe0) | (fh - 1); - sisusb_setidxreg(sisusb, SISCR, 0x09, fsr); - sisusb->current_font_height = fh; - - sisusb->sisusb_cursor_size_from = -1; - sisusb->sisusb_cursor_size_to = -1; - - } - - if (uplock) - mutex_unlock(&sisusb->lock); - - if (dorecalc && c) { - int rows = c->vc_scan_lines / fh; - - /* Now adjust our consoles' size */ - - for (i = 0; i < MAX_NR_CONSOLES; i++) { - struct vc_data *vc = vc_cons[i].d; - - if (vc && vc->vc_sw == &sisusb_con) { - if (con_is_visible(vc)) { - vc->vc_sw->con_cursor(vc, CM_DRAW); - } - vc->vc_font.height = fh; - vc_resize(vc, 0, rows); - } - } - } - - return 0; - -font_op_error: - if (uplock) - mutex_unlock(&sisusb->lock); - - return -EIO; -} - -/* Interface routine */ -static int -sisusbcon_font_set(struct vc_data *c, struct console_font *font, - unsigned int flags) -{ - struct sisusb_usb_data *sisusb; - unsigned charcount = font->charcount; - - if (font->width != 8 || (charcount != 256 && charcount != 512)) - return -EINVAL; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return -ENODEV; - - /* sisusb->lock is down */ - - /* Save the user-provided font into a buffer. This - * is used for restoring text mode after quitting - * from X and for the con_getfont routine. - */ - if (sisusb->font_backup) { - if (sisusb->font_backup_size < charcount) { - vfree(sisusb->font_backup); - sisusb->font_backup = NULL; - } - } - - if (!sisusb->font_backup) - sisusb->font_backup = vmalloc(array_size(charcount, 32)); - - if (sisusb->font_backup) { - memcpy(sisusb->font_backup, font->data, array_size(charcount, 32)); - sisusb->font_backup_size = charcount; - sisusb->font_backup_height = font->height; - sisusb->font_backup_512 = (charcount == 512) ? 1 : 0; - } - - /* do_font_op ups sisusb->lock */ - - return sisusbcon_do_font_op(sisusb, 1, 2, font->data, - 8192, (charcount == 512), - (!(flags & KD_FONT_FLAG_DONT_RECALC)) ? 1 : 0, - c, font->height, 1); -} - -/* Interface routine */ -static int -sisusbcon_font_get(struct vc_data *c, struct console_font *font) -{ - struct sisusb_usb_data *sisusb; - - sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); - if (!sisusb) - return -ENODEV; - - /* sisusb->lock is down */ - - font->width = 8; - font->height = c->vc_font.height; - font->charcount = 256; - - if (!font->data) { - mutex_unlock(&sisusb->lock); - return 0; - } - - if (!sisusb->font_backup) { - mutex_unlock(&sisusb->lock); - return -ENODEV; - } - - /* Copy 256 chars only, like vgacon */ - memcpy(font->data, sisusb->font_backup, 256 * 32); - - mutex_unlock(&sisusb->lock); - - return 0; -} - -/* - * The console `switch' structure for the sisusb console - */ - -static const struct consw sisusb_con = { - .owner = THIS_MODULE, - .con_startup = sisusbcon_startup, - .con_init = sisusbcon_init, - .con_deinit = sisusbcon_deinit, - .con_clear = sisusbcon_clear, - .con_putc = sisusbcon_putc, - .con_putcs = sisusbcon_putcs, - .con_cursor = sisusbcon_cursor, - .con_scroll = sisusbcon_scroll, - .con_switch = sisusbcon_switch, - .con_blank = sisusbcon_blank, - .con_font_set = sisusbcon_font_set, - .con_font_get = sisusbcon_font_get, - .con_set_palette = sisusbcon_set_palette, - .con_scrolldelta = sisusbcon_scrolldelta, - .con_build_attr = sisusbcon_build_attr, - .con_invert_region = sisusbcon_invert_region, - .con_set_origin = sisusbcon_set_origin, - .con_save_screen = sisusbcon_save_screen, - .con_resize = sisusbcon_resize, -}; - -/* Our very own dummy console driver */ - -static const char *sisusbdummycon_startup(void) -{ - return "SISUSBVGADUMMY"; -} - -static void sisusbdummycon_init(struct vc_data *vc, int init) -{ - vc->vc_can_do_color = 1; - if (init) { - vc->vc_cols = 80; - vc->vc_rows = 25; - } else - vc_resize(vc, 80, 25); -} - -static void sisusbdummycon_deinit(struct vc_data *vc) { } -static void sisusbdummycon_clear(struct vc_data *vc, int sy, int sx, - int height, int width) { } -static void sisusbdummycon_putc(struct vc_data *vc, int c, int ypos, - int xpos) { } -static void sisusbdummycon_putcs(struct vc_data *vc, const unsigned short *s, - int count, int ypos, int xpos) { } -static void sisusbdummycon_cursor(struct vc_data *vc, int mode) { } - -static bool sisusbdummycon_scroll(struct vc_data *vc, unsigned int top, - unsigned int bottom, enum con_scroll dir, - unsigned int lines) -{ - return false; -} - -static int sisusbdummycon_switch(struct vc_data *vc) -{ - return 0; -} - -static int sisusbdummycon_blank(struct vc_data *vc, int blank, int mode_switch) -{ - return 0; -} - -static const struct consw sisusb_dummy_con = { - .owner = THIS_MODULE, - .con_startup = sisusbdummycon_startup, - .con_init = sisusbdummycon_init, - .con_deinit = sisusbdummycon_deinit, - .con_clear = sisusbdummycon_clear, - .con_putc = sisusbdummycon_putc, - .con_putcs = sisusbdummycon_putcs, - .con_cursor = sisusbdummycon_cursor, - .con_scroll = sisusbdummycon_scroll, - .con_switch = sisusbdummycon_switch, - .con_blank = sisusbdummycon_blank, -}; - -int -sisusb_console_init(struct sisusb_usb_data *sisusb, int first, int last) -{ - int i, ret; - - mutex_lock(&sisusb->lock); - - /* Erm.. that should not happen */ - if (sisusb->haveconsole || !sisusb->SiS_Pr) { - mutex_unlock(&sisusb->lock); - return 1; - } - - sisusb->con_first = first; - sisusb->con_last = last; - - if (first > last || - first > MAX_NR_CONSOLES || - last > MAX_NR_CONSOLES) { - mutex_unlock(&sisusb->lock); - return 1; - } - - /* If gfxcore not initialized or no consoles given, quit graciously */ - if (!sisusb->gfxinit || first < 1 || last < 1) { - mutex_unlock(&sisusb->lock); - return 0; - } - - sisusb->sisusb_cursor_loc = -1; - sisusb->sisusb_cursor_size_from = -1; - sisusb->sisusb_cursor_size_to = -1; - - /* Set up text mode (and upload default font) */ - if (sisusb_reset_text_mode(sisusb, 1)) { - mutex_unlock(&sisusb->lock); - dev_err(&sisusb->sisusb_dev->dev, "Failed to set up text mode\n"); - return 1; - } - - /* Initialize some gfx registers */ - sisusb_initialize(sisusb); - - for (i = first - 1; i <= last - 1; i++) { - /* Save sisusb for our interface routines */ - mysisusbs[i] = sisusb; - } - - /* Initial console setup */ - sisusb->sisusb_num_columns = 80; - - /* Use a 32K buffer (matches b8000-bffff area) */ - sisusb->scrbuf_size = 32 * 1024; - - /* Allocate screen buffer */ - if (!(sisusb->scrbuf = (unsigned long)vmalloc(sisusb->scrbuf_size))) { - mutex_unlock(&sisusb->lock); - dev_err(&sisusb->sisusb_dev->dev, "Failed to allocate screen buffer\n"); - return 1; - } - - mutex_unlock(&sisusb->lock); - - /* Now grab the desired console(s) */ - console_lock(); - ret = do_take_over_console(&sisusb_con, first - 1, last - 1, 0); - console_unlock(); - if (!ret) - sisusb->haveconsole = 1; - else { - for (i = first - 1; i <= last - 1; i++) - mysisusbs[i] = NULL; - } - - return ret; -} - -void -sisusb_console_exit(struct sisusb_usb_data *sisusb) -{ - int i; - - /* This is called if the device is disconnected - * and while disconnect and lock semaphores - * are up. This should be save because we - * can't lose our sisusb any other way but by - * disconnection (and hence, the disconnect - * sema is for protecting all other access - * functions from disconnection, not the - * other way round). - */ - - /* Now what do we do in case of disconnection: - * One alternative would be to simply call - * give_up_console(). Nah, not a good idea. - * give_up_console() is obviously buggy as it - * only discards the consw pointer from the - * driver_map, but doesn't adapt vc->vc_sw - * of the affected consoles. Hence, the next - * call to any of the console functions will - * eventually take a trip to oops county. - * Also, give_up_console for some reason - * doesn't decrement our module refcount. - * Instead, we switch our consoles to a private - * dummy console. This, of course, keeps our - * refcount up as well, but it works perfectly. - */ - - if (sisusb->haveconsole) { - for (i = 0; i < MAX_NR_CONSOLES; i++) - if (sisusb->havethisconsole[i]) { - console_lock(); - do_take_over_console(&sisusb_dummy_con, i, i, 0); - console_unlock(); - /* At this point, con_deinit for all our - * consoles is executed by do_take_over_console(). - */ - } - sisusb->haveconsole = 0; - } - - vfree((void *)sisusb->scrbuf); - sisusb->scrbuf = 0; - - vfree(sisusb->font_backup); - sisusb->font_backup = NULL; -} - -void __init sisusb_init_concode(void) -{ - int i; - - for (i = 0; i < MAX_NR_CONSOLES; i++) - mysisusbs[i] = NULL; -} diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.c b/drivers/usb/misc/sisusbvga/sisusb_init.c deleted file mode 100644 index 7c11198d5dda..000000000000 --- a/drivers/usb/misc/sisusbvga/sisusb_init.c +++ /dev/null @@ -1,955 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -/* - * sisusb - usb kernel driver for SiS315(E) based USB2VGA dongles - * - * Display mode initializing code - * - * Copyright (C) 2001-2005 by Thomas Winischhofer, Vienna, Austria - * - * If distributed as part of the Linux kernel, this code is licensed under the - * terms of the GPL v2. - * - * Otherwise, the following license terms apply: - * - * * Redistribution and use in source and binary forms, with or without - * * modification, are permitted provided that the following conditions - * * are met: - * * 1) Redistributions of source code must retain the above copyright - * * notice, this list of conditions and the following disclaimer. - * * 2) Redistributions in binary form must reproduce the above copyright - * * notice, this list of conditions and the following disclaimer in the - * * documentation and/or other materials provided with the distribution. - * * 3) The name of the author may not be used to endorse or promote products - * * derived from this software without specific prior written permission. - * * - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Author: Thomas Winischhofer - * - */ - -#include -#include -#include -#include -#include - -#include "sisusb.h" -#include "sisusb_init.h" -#include "sisusb_tables.h" - -/*********************************************/ -/* POINTER INITIALIZATION */ -/*********************************************/ - -static void SiSUSB_InitPtr(struct SiS_Private *SiS_Pr) -{ - SiS_Pr->SiS_ModeResInfo = SiSUSB_ModeResInfo; - SiS_Pr->SiS_StandTable = SiSUSB_StandTable; - - SiS_Pr->SiS_SModeIDTable = SiSUSB_SModeIDTable; - SiS_Pr->SiS_EModeIDTable = SiSUSB_EModeIDTable; - SiS_Pr->SiS_RefIndex = SiSUSB_RefIndex; - SiS_Pr->SiS_CRT1Table = SiSUSB_CRT1Table; - - SiS_Pr->SiS_VCLKData = SiSUSB_VCLKData; -} - -/*********************************************/ -/* HELPER: SetReg, GetReg */ -/*********************************************/ - -static void -SiS_SetReg(struct SiS_Private *SiS_Pr, unsigned long port, - unsigned short index, unsigned short data) -{ - sisusb_setidxreg(SiS_Pr->sisusb, port, index, data); -} - -static void -SiS_SetRegByte(struct SiS_Private *SiS_Pr, unsigned long port, - unsigned short data) -{ - sisusb_setreg(SiS_Pr->sisusb, port, data); -} - -static unsigned char -SiS_GetReg(struct SiS_Private *SiS_Pr, unsigned long port, unsigned short index) -{ - u8 data; - - sisusb_getidxreg(SiS_Pr->sisusb, port, index, &data); - - return data; -} - -static unsigned char -SiS_GetRegByte(struct SiS_Private *SiS_Pr, unsigned long port) -{ - u8 data; - - sisusb_getreg(SiS_Pr->sisusb, port, &data); - - return data; -} - -static void -SiS_SetRegANDOR(struct SiS_Private *SiS_Pr, unsigned long port, - unsigned short index, unsigned short DataAND, - unsigned short DataOR) -{ - sisusb_setidxregandor(SiS_Pr->sisusb, port, index, DataAND, DataOR); -} - -static void -SiS_SetRegAND(struct SiS_Private *SiS_Pr, unsigned long port, - unsigned short index, unsigned short DataAND) -{ - sisusb_setidxregand(SiS_Pr->sisusb, port, index, DataAND); -} - -static void -SiS_SetRegOR(struct SiS_Private *SiS_Pr, unsigned long port, - unsigned short index, unsigned short DataOR) -{ - sisusb_setidxregor(SiS_Pr->sisusb, port, index, DataOR); -} - -/*********************************************/ -/* HELPER: DisplayOn, DisplayOff */ -/*********************************************/ - -static void SiS_DisplayOn(struct SiS_Private *SiS_Pr) -{ - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, 0xDF); -} - -/*********************************************/ -/* HELPER: Init Port Addresses */ -/*********************************************/ - -static void SiSUSBRegInit(struct SiS_Private *SiS_Pr, unsigned long BaseAddr) -{ - SiS_Pr->SiS_P3c4 = BaseAddr + 0x14; - SiS_Pr->SiS_P3d4 = BaseAddr + 0x24; - SiS_Pr->SiS_P3c0 = BaseAddr + 0x10; - SiS_Pr->SiS_P3ce = BaseAddr + 0x1e; - SiS_Pr->SiS_P3c2 = BaseAddr + 0x12; - SiS_Pr->SiS_P3ca = BaseAddr + 0x1a; - SiS_Pr->SiS_P3c6 = BaseAddr + 0x16; - SiS_Pr->SiS_P3c7 = BaseAddr + 0x17; - SiS_Pr->SiS_P3c8 = BaseAddr + 0x18; - SiS_Pr->SiS_P3c9 = BaseAddr + 0x19; - SiS_Pr->SiS_P3cb = BaseAddr + 0x1b; - SiS_Pr->SiS_P3cc = BaseAddr + 0x1c; - SiS_Pr->SiS_P3cd = BaseAddr + 0x1d; - SiS_Pr->SiS_P3da = BaseAddr + 0x2a; - SiS_Pr->SiS_Part1Port = BaseAddr + SIS_CRT2_PORT_04; -} - -/*********************************************/ -/* HELPER: GetSysFlags */ -/*********************************************/ - -static void SiS_GetSysFlags(struct SiS_Private *SiS_Pr) -{ - SiS_Pr->SiS_MyCR63 = 0x63; -} - -/*********************************************/ -/* HELPER: Init PCI & Engines */ -/*********************************************/ - -static void SiSInitPCIetc(struct SiS_Private *SiS_Pr) -{ - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x20, 0xa1); - /* - Enable 2D (0x40) - * - Enable 3D (0x02) - * - Enable 3D vertex command fetch (0x10) - * - Enable 3D command parser (0x08) - * - Enable 3D G/L transformation engine (0x80) - */ - SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1E, 0xDA); -} - -/*********************************************/ -/* HELPER: SET SEGMENT REGISTERS */ -/*********************************************/ - -static void SiS_SetSegRegLower(struct SiS_Private *SiS_Pr, unsigned short value) -{ - unsigned short temp; - - value &= 0x00ff; - temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb) & 0xf0; - temp |= (value >> 4); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb, temp); - temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd) & 0xf0; - temp |= (value & 0x0f); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd, temp); -} - -static void SiS_SetSegRegUpper(struct SiS_Private *SiS_Pr, unsigned short value) -{ - unsigned short temp; - - value &= 0x00ff; - temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb) & 0x0f; - temp |= (value & 0xf0); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb, temp); - temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd) & 0x0f; - temp |= (value << 4); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd, temp); -} - -static void SiS_SetSegmentReg(struct SiS_Private *SiS_Pr, unsigned short value) -{ - SiS_SetSegRegLower(SiS_Pr, value); - SiS_SetSegRegUpper(SiS_Pr, value); -} - -static void SiS_ResetSegmentReg(struct SiS_Private *SiS_Pr) -{ - SiS_SetSegmentReg(SiS_Pr, 0); -} - -static void -SiS_SetSegmentRegOver(struct SiS_Private *SiS_Pr, unsigned short value) -{ - unsigned short temp = value >> 8; - - temp &= 0x07; - temp |= (temp << 4); - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1d, temp); - SiS_SetSegmentReg(SiS_Pr, value); -} - -static void SiS_ResetSegmentRegOver(struct SiS_Private *SiS_Pr) -{ - SiS_SetSegmentRegOver(SiS_Pr, 0); -} - -static void SiS_ResetSegmentRegisters(struct SiS_Private *SiS_Pr) -{ - SiS_ResetSegmentReg(SiS_Pr); - SiS_ResetSegmentRegOver(SiS_Pr); -} - -/*********************************************/ -/* HELPER: SearchModeID */ -/*********************************************/ - -static int -SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo, - unsigned short *ModeIdIndex) -{ - if ((*ModeNo) <= 0x13) { - - if ((*ModeNo) != 0x03) - return 0; - - (*ModeIdIndex) = 0; - - } else { - - for (*ModeIdIndex = 0;; (*ModeIdIndex)++) { - - if (SiS_Pr->SiS_EModeIDTable[*ModeIdIndex].Ext_ModeID == - (*ModeNo)) - break; - - if (SiS_Pr->SiS_EModeIDTable[*ModeIdIndex].Ext_ModeID == - 0xFF) - return 0; - } - - } - - return 1; -} - -/*********************************************/ -/* HELPER: ENABLE CRT1 */ -/*********************************************/ - -static void SiS_HandleCRT1(struct SiS_Private *SiS_Pr) -{ - /* Enable CRT1 gating */ - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, SiS_Pr->SiS_MyCR63, 0xbf); -} - -/*********************************************/ -/* HELPER: GetColorDepth */ -/*********************************************/ - -static unsigned short -SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex) -{ - static const unsigned short ColorDepth[6] = { 1, 2, 4, 4, 6, 8 }; - unsigned short modeflag; - short index; - - if (ModeNo <= 0x13) { - modeflag = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ModeFlag; - } else { - modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag; - } - - index = (modeflag & ModeTypeMask) - ModeEGA; - if (index < 0) - index = 0; - return ColorDepth[index]; -} - -/*********************************************/ -/* HELPER: GetOffset */ -/*********************************************/ - -static unsigned short -SiS_GetOffset(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex, unsigned short rrti) -{ - unsigned short xres, temp, colordepth, infoflag; - - infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag; - xres = SiS_Pr->SiS_RefIndex[rrti].XRes; - - colordepth = SiS_GetColorDepth(SiS_Pr, ModeNo, ModeIdIndex); - - temp = xres / 16; - - if (infoflag & InterlaceMode) - temp <<= 1; - - temp *= colordepth; - - if (xres % 16) - temp += (colordepth >> 1); - - return temp; -} - -/*********************************************/ -/* SEQ */ -/*********************************************/ - -static void -SiS_SetSeqRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex) -{ - unsigned char SRdata; - int i; - - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x00, 0x03); - - SRdata = SiS_Pr->SiS_StandTable[StandTableIndex].SR[0] | 0x20; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, SRdata); - - for (i = 2; i <= 4; i++) { - SRdata = SiS_Pr->SiS_StandTable[StandTableIndex].SR[i - 1]; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, i, SRdata); - } -} - -/*********************************************/ -/* MISC */ -/*********************************************/ - -static void -SiS_SetMiscRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex) -{ - unsigned char Miscdata = SiS_Pr->SiS_StandTable[StandTableIndex].MISC; - - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c2, Miscdata); -} - -/*********************************************/ -/* CRTC */ -/*********************************************/ - -static void -SiS_SetCRTCRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex) -{ - unsigned char CRTCdata; - unsigned short i; - - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, 0x11, 0x7f); - - for (i = 0; i <= 0x18; i++) { - CRTCdata = SiS_Pr->SiS_StandTable[StandTableIndex].CRTC[i]; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, i, CRTCdata); - } -} - -/*********************************************/ -/* ATT */ -/*********************************************/ - -static void -SiS_SetATTRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex) -{ - unsigned char ARdata; - unsigned short i; - - for (i = 0; i <= 0x13; i++) { - ARdata = SiS_Pr->SiS_StandTable[StandTableIndex].ATTR[i]; - SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, i); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, ARdata); - } - SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x14); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x00); - - SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x20); - SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da); -} - -/*********************************************/ -/* GRC */ -/*********************************************/ - -static void -SiS_SetGRCRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex) -{ - unsigned char GRdata; - unsigned short i; - - for (i = 0; i <= 0x08; i++) { - GRdata = SiS_Pr->SiS_StandTable[StandTableIndex].GRC[i]; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3ce, i, GRdata); - } - - if (SiS_Pr->SiS_ModeType > ModeVGA) { - /* 256 color disable */ - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3ce, 0x05, 0xBF); - } -} - -/*********************************************/ -/* CLEAR EXTENDED REGISTERS */ -/*********************************************/ - -static void SiS_ClearExt1Regs(struct SiS_Private *SiS_Pr, unsigned short ModeNo) -{ - int i; - - for (i = 0x0A; i <= 0x0E; i++) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, i, 0x00); - } - - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x37, 0xFE); -} - -/*********************************************/ -/* Get rate index */ -/*********************************************/ - -static unsigned short -SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex) -{ - unsigned short rrti, i, index, temp; - - if (ModeNo <= 0x13) - return 0xFFFF; - - index = SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x33) & 0x0F; - if (index > 0) - index--; - - rrti = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].REFindex; - ModeNo = SiS_Pr->SiS_RefIndex[rrti].ModeID; - - i = 0; - do { - if (SiS_Pr->SiS_RefIndex[rrti + i].ModeID != ModeNo) - break; - - temp = - SiS_Pr->SiS_RefIndex[rrti + i].Ext_InfoFlag & ModeTypeMask; - if (temp < SiS_Pr->SiS_ModeType) - break; - - i++; - index--; - } while (index != 0xFFFF); - - i--; - - return (rrti + i); -} - -/*********************************************/ -/* SYNC */ -/*********************************************/ - -static void SiS_SetCRT1Sync(struct SiS_Private *SiS_Pr, unsigned short rrti) -{ - unsigned short sync = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag >> 8; - sync &= 0xC0; - sync |= 0x2f; - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c2, sync); -} - -/*********************************************/ -/* CRTC/2 */ -/*********************************************/ - -static void -SiS_SetCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex, unsigned short rrti) -{ - unsigned char index; - unsigned short temp, i, j, modeflag; - - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, 0x11, 0x7f); - - modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag; - - index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRT1CRTC; - - for (i = 0, j = 0; i <= 7; i++, j++) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j, - SiS_Pr->SiS_CRT1Table[index].CR[i]); - } - for (j = 0x10; i <= 10; i++, j++) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j, - SiS_Pr->SiS_CRT1Table[index].CR[i]); - } - for (j = 0x15; i <= 12; i++, j++) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j, - SiS_Pr->SiS_CRT1Table[index].CR[i]); - } - for (j = 0x0A; i <= 15; i++, j++) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, j, - SiS_Pr->SiS_CRT1Table[index].CR[i]); - } - - temp = SiS_Pr->SiS_CRT1Table[index].CR[16] & 0xE0; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0E, temp); - - temp = ((SiS_Pr->SiS_CRT1Table[index].CR[16]) & 0x01) << 5; - if (modeflag & DoubleScanMode) - temp |= 0x80; - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3d4, 0x09, 0x5F, temp); - - if (SiS_Pr->SiS_ModeType > ModeVGA) - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x14, 0x4F); -} - -/*********************************************/ -/* OFFSET & PITCH */ -/*********************************************/ -/* (partly overruled by SetPitch() in XF86) */ -/*********************************************/ - -static void -SiS_SetCRT1Offset(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex, unsigned short rrti) -{ - unsigned short du = SiS_GetOffset(SiS_Pr, ModeNo, ModeIdIndex, rrti); - unsigned short infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag; - unsigned short temp; - - temp = (du >> 8) & 0x0f; - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0E, 0xF0, temp); - - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x13, (du & 0xFF)); - - if (infoflag & InterlaceMode) - du >>= 1; - - du <<= 5; - temp = (du >> 8) & 0xff; - if (du & 0xff) - temp++; - temp++; - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x10, temp); -} - -/*********************************************/ -/* VCLK */ -/*********************************************/ - -static void -SiS_SetCRT1VCLK(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short rrti) -{ - unsigned short index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRTVCLK; - unsigned short clka = SiS_Pr->SiS_VCLKData[index].SR2B; - unsigned short clkb = SiS_Pr->SiS_VCLKData[index].SR2C; - - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x31, 0xCF); - - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2B, clka); - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2C, clkb); - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2D, 0x01); -} - -/*********************************************/ -/* FIFO */ -/*********************************************/ - -static void -SiS_SetCRT1FIFO_310(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short mi) -{ - unsigned short modeflag = SiS_Pr->SiS_EModeIDTable[mi].Ext_ModeFlag; - - /* disable auto-threshold */ - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x3D, 0xFE); - - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x08, 0xAE); - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x09, 0xF0); - - if (ModeNo <= 0x13) - return; - - if ((!(modeflag & DoubleScanMode)) || (!(modeflag & HalfDCLK))) { - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x08, 0x34); - SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x3D, 0x01); - } -} - -/*********************************************/ -/* MODE REGISTERS */ -/*********************************************/ - -static void -SiS_SetVCLKState(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short rrti) -{ - unsigned short data = 0, VCLK = 0, index = 0; - - if (ModeNo > 0x13) { - index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRTVCLK; - VCLK = SiS_Pr->SiS_VCLKData[index].CLOCK; - } - - if (VCLK >= 166) - data |= 0x0c; - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x32, 0xf3, data); - - if (VCLK >= 166) - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1f, 0xe7); - - /* DAC speed */ - data = 0x03; - if (VCLK >= 260) - data = 0x00; - else if (VCLK >= 160) - data = 0x01; - else if (VCLK >= 135) - data = 0x02; - - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x07, 0xF8, data); -} - -static void -SiS_SetCRT1ModeRegs(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex, unsigned short rrti) -{ - unsigned short data, infoflag = 0, modeflag; - - if (ModeNo <= 0x13) - modeflag = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ModeFlag; - else { - modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag; - infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag; - } - - /* Disable DPMS */ - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1F, 0x3F); - - data = 0; - if (ModeNo > 0x13) { - if (SiS_Pr->SiS_ModeType > ModeEGA) { - data |= 0x02; - data |= ((SiS_Pr->SiS_ModeType - ModeVGA) << 2); - } - if (infoflag & InterlaceMode) - data |= 0x20; - } - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x06, 0xC0, data); - - data = 0; - if (infoflag & InterlaceMode) { - /* data = (Hsync / 8) - ((Htotal / 8) / 2) + 3 */ - unsigned short hrs = - (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x04) | - ((SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0b) & 0xc0) << 2)) - - 3; - unsigned short hto = - (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x00) | - ((SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0b) & 0x03) << 8)) - + 5; - data = hrs - (hto >> 1) + 3; - } - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x19, (data & 0xFF)); - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3d4, 0x1a, 0xFC, (data >> 8)); - - if (modeflag & HalfDCLK) - SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, 0x08); - - data = 0; - if (modeflag & LineCompareOff) - data = 0x08; - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0F, 0xB7, data); - - if ((SiS_Pr->SiS_ModeType == ModeEGA) && (ModeNo > 0x13)) - SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0F, 0x40); - - SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x31, 0xfb); - - data = 0x60; - if (SiS_Pr->SiS_ModeType != ModeText) { - data ^= 0x60; - if (SiS_Pr->SiS_ModeType != ModeEGA) - data ^= 0xA0; - } - SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x21, 0x1F, data); - - SiS_SetVCLKState(SiS_Pr, ModeNo, rrti); - - if (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x31) & 0x40) - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x52, 0x2c); - else - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x52, 0x6c); -} - -/*********************************************/ -/* LOAD DAC */ -/*********************************************/ - -static void -SiS_WriteDAC(struct SiS_Private *SiS_Pr, unsigned long DACData, - unsigned short shiftflag, unsigned short dl, unsigned short ah, - unsigned short al, unsigned short dh) -{ - unsigned short d1, d2, d3; - - switch (dl) { - case 0: - d1 = dh; - d2 = ah; - d3 = al; - break; - case 1: - d1 = ah; - d2 = al; - d3 = dh; - break; - default: - d1 = al; - d2 = dh; - d3 = ah; - } - SiS_SetRegByte(SiS_Pr, DACData, (d1 << shiftflag)); - SiS_SetRegByte(SiS_Pr, DACData, (d2 << shiftflag)); - SiS_SetRegByte(SiS_Pr, DACData, (d3 << shiftflag)); -} - -static void -SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short mi) -{ - unsigned short data, data2, time, i, j, k, m, n, o; - unsigned short si, di, bx, sf; - unsigned long DACAddr, DACData; - const unsigned char *table = NULL; - - if (ModeNo < 0x13) - data = SiS_Pr->SiS_SModeIDTable[mi].St_ModeFlag; - else - data = SiS_Pr->SiS_EModeIDTable[mi].Ext_ModeFlag; - - data &= DACInfoFlag; - - j = time = 64; - if (data == 0x00) - table = SiS_MDA_DAC; - else if (data == 0x08) - table = SiS_CGA_DAC; - else if (data == 0x10) - table = SiS_EGA_DAC; - else { - j = 16; - time = 256; - table = SiS_VGA_DAC; - } - - DACAddr = SiS_Pr->SiS_P3c8; - DACData = SiS_Pr->SiS_P3c9; - sf = 0; - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c6, 0xFF); - - SiS_SetRegByte(SiS_Pr, DACAddr, 0x00); - - for (i = 0; i < j; i++) { - data = table[i]; - for (k = 0; k < 3; k++) { - data2 = 0; - if (data & 0x01) - data2 += 0x2A; - if (data & 0x02) - data2 += 0x15; - SiS_SetRegByte(SiS_Pr, DACData, (data2 << sf)); - data >>= 2; - } - } - - if (time == 256) { - for (i = 16; i < 32; i++) { - data = table[i] << sf; - for (k = 0; k < 3; k++) - SiS_SetRegByte(SiS_Pr, DACData, data); - } - si = 32; - for (m = 0; m < 9; m++) { - di = si; - bx = si + 4; - for (n = 0; n < 3; n++) { - for (o = 0; o < 5; o++) { - SiS_WriteDAC(SiS_Pr, DACData, sf, n, - table[di], table[bx], - table[si]); - si++; - } - si -= 2; - for (o = 0; o < 3; o++) { - SiS_WriteDAC(SiS_Pr, DACData, sf, n, - table[di], table[si], - table[bx]); - si--; - } - } - si += 5; - } - } -} - -/*********************************************/ -/* SET CRT1 REGISTER GROUP */ -/*********************************************/ - -static void -SiS_SetCRT1Group(struct SiS_Private *SiS_Pr, unsigned short ModeNo, - unsigned short ModeIdIndex) -{ - unsigned short StandTableIndex, rrti; - - SiS_Pr->SiS_CRT1Mode = ModeNo; - - if (ModeNo <= 0x13) - StandTableIndex = 0; - else - StandTableIndex = 1; - - SiS_ResetSegmentRegisters(SiS_Pr); - SiS_SetSeqRegs(SiS_Pr, StandTableIndex); - SiS_SetMiscRegs(SiS_Pr, StandTableIndex); - SiS_SetCRTCRegs(SiS_Pr, StandTableIndex); - SiS_SetATTRegs(SiS_Pr, StandTableIndex); - SiS_SetGRCRegs(SiS_Pr, StandTableIndex); - SiS_ClearExt1Regs(SiS_Pr, ModeNo); - - rrti = SiS_GetRatePtr(SiS_Pr, ModeNo, ModeIdIndex); - - if (rrti != 0xFFFF) { - SiS_SetCRT1Sync(SiS_Pr, rrti); - SiS_SetCRT1CRTC(SiS_Pr, ModeNo, ModeIdIndex, rrti); - SiS_SetCRT1Offset(SiS_Pr, ModeNo, ModeIdIndex, rrti); - SiS_SetCRT1VCLK(SiS_Pr, ModeNo, rrti); - } - - SiS_SetCRT1FIFO_310(SiS_Pr, ModeNo, ModeIdIndex); - - SiS_SetCRT1ModeRegs(SiS_Pr, ModeNo, ModeIdIndex, rrti); - - SiS_LoadDAC(SiS_Pr, ModeNo, ModeIdIndex); - - SiS_DisplayOn(SiS_Pr); -} - -/*********************************************/ -/* SiSSetMode() */ -/*********************************************/ - -int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo) -{ - unsigned short ModeIdIndex; - unsigned long BaseAddr = SiS_Pr->IOAddress; - - SiSUSB_InitPtr(SiS_Pr); - SiSUSBRegInit(SiS_Pr, BaseAddr); - SiS_GetSysFlags(SiS_Pr); - - if (!(SiS_SearchModeID(SiS_Pr, &ModeNo, &ModeIdIndex))) - return 0; - - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x05, 0x86); - - SiSInitPCIetc(SiS_Pr); - - ModeNo &= 0x7f; - - SiS_Pr->SiS_ModeType = - SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag & ModeTypeMask; - - SiS_Pr->SiS_SetFlag = LowModeTests; - - /* Set mode on CRT1 */ - SiS_SetCRT1Group(SiS_Pr, ModeNo, ModeIdIndex); - - SiS_HandleCRT1(SiS_Pr); - - SiS_DisplayOn(SiS_Pr); - SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c6, 0xFF); - - /* Store mode number */ - SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x34, ModeNo); - - return 1; -} - -int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo) -{ - unsigned short ModeNo = 0; - int i; - - SiSUSB_InitPtr(SiS_Pr); - - if (VModeNo == 0x03) { - - ModeNo = 0x03; - - } else { - - i = 0; - do { - - if (SiS_Pr->SiS_EModeIDTable[i].Ext_VESAID == VModeNo) { - ModeNo = SiS_Pr->SiS_EModeIDTable[i].Ext_ModeID; - break; - } - - } while (SiS_Pr->SiS_EModeIDTable[i++].Ext_ModeID != 0xff); - - } - - if (!ModeNo) - return 0; - - return SiSUSBSetMode(SiS_Pr, ModeNo); -} diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.h b/drivers/usb/misc/sisusbvga/sisusb_init.h deleted file mode 100644 index b5cd77ae941d..000000000000 --- a/drivers/usb/misc/sisusbvga/sisusb_init.h +++ /dev/null @@ -1,180 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ -/* $XFree86$ */ -/* $XdotOrg$ */ -/* - * Data and prototypes for init.c - * - * Copyright (C) 2001-2005 by Thomas Winischhofer, Vienna, Austria - * - * If distributed as part of the Linux kernel, the following license terms - * apply: - * - * * This program is free software; you can redistribute it and/or modify - * * it under the terms of the GNU General Public License as published by - * * the Free Software Foundation; either version 2 of the named License, - * * or any later version. - * * - * * This program is distributed in the hope that it will be useful, - * * but WITHOUT ANY WARRANTY; without even the implied warranty of - * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * * GNU General Public License for more details. - * * - * * You should have received a copy of the GNU General Public License - * * along with this program; if not, write to the Free Software - * * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA - * - * Otherwise, the following license terms apply: - * - * * Redistribution and use in source and binary forms, with or without - * * modification, are permitted provided that the following conditions - * * are met: - * * 1) Redistributions of source code must retain the above copyright - * * notice, this list of conditions and the following disclaimer. - * * 2) Redistributions in binary form must reproduce the above copyright - * * notice, this list of conditions and the following disclaimer in the - * * documentation and/or other materials provided with the distribution. - * * 3) The name of the author may not be used to endorse or promote products - * * derived from this software without specific prior written permission. - * * - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Author: Thomas Winischhofer - * - */ - -#ifndef _SISUSB_INIT_H_ -#define _SISUSB_INIT_H_ - -/* SiS_ModeType */ -#define ModeText 0x00 -#define ModeCGA 0x01 -#define ModeEGA 0x02 -#define ModeVGA 0x03 -#define Mode15Bpp 0x04 -#define Mode16Bpp 0x05 -#define Mode24Bpp 0x06 -#define Mode32Bpp 0x07 - -#define ModeTypeMask 0x07 -#define IsTextMode 0x07 - -#define DACInfoFlag 0x0018 -#define MemoryInfoFlag 0x01E0 -#define MemorySizeShift 5 - -/* modeflag */ -#define Charx8Dot 0x0200 -#define LineCompareOff 0x0400 -#define CRT2Mode 0x0800 -#define HalfDCLK 0x1000 -#define NoSupportSimuTV 0x2000 -#define NoSupportLCDScale 0x4000 /* SiS bridge: No scaling possible (no matter what panel) */ -#define DoubleScanMode 0x8000 - -/* Infoflag */ -#define SupportTV 0x0008 -#define SupportTV1024 0x0800 -#define SupportCHTV 0x0800 -#define Support64048060Hz 0x0800 /* Special for 640x480 LCD */ -#define SupportHiVision 0x0010 -#define SupportYPbPr750p 0x1000 -#define SupportLCD 0x0020 -#define SupportRAMDAC2 0x0040 /* All (<= 100Mhz) */ -#define SupportRAMDAC2_135 0x0100 /* All except DH (<= 135Mhz) */ -#define SupportRAMDAC2_162 0x0200 /* B, C (<= 162Mhz) */ -#define SupportRAMDAC2_202 0x0400 /* C (<= 202Mhz) */ -#define InterlaceMode 0x0080 -#define SyncPP 0x0000 -#define SyncPN 0x4000 -#define SyncNP 0x8000 -#define SyncNN 0xc000 - -/* SetFlag */ -#define ProgrammingCRT2 0x0001 -#define LowModeTests 0x0002 -#define LCDVESATiming 0x0008 -#define EnableLVDSDDA 0x0010 -#define SetDispDevSwitchFlag 0x0020 -#define CheckWinDos 0x0040 -#define SetDOSMode 0x0080 - -/* Index in ModeResInfo table */ -#define SIS_RI_320x200 0 -#define SIS_RI_320x240 1 -#define SIS_RI_320x400 2 -#define SIS_RI_400x300 3 -#define SIS_RI_512x384 4 -#define SIS_RI_640x400 5 -#define SIS_RI_640x480 6 -#define SIS_RI_800x600 7 -#define SIS_RI_1024x768 8 -#define SIS_RI_1280x1024 9 -#define SIS_RI_1600x1200 10 -#define SIS_RI_1920x1440 11 -#define SIS_RI_2048x1536 12 -#define SIS_RI_720x480 13 -#define SIS_RI_720x576 14 -#define SIS_RI_1280x960 15 -#define SIS_RI_800x480 16 -#define SIS_RI_1024x576 17 -#define SIS_RI_1280x720 18 -#define SIS_RI_856x480 19 -#define SIS_RI_1280x768 20 -#define SIS_RI_1400x1050 21 -#define SIS_RI_1152x864 22 /* Up to here SiS conforming */ -#define SIS_RI_848x480 23 -#define SIS_RI_1360x768 24 -#define SIS_RI_1024x600 25 -#define SIS_RI_1152x768 26 -#define SIS_RI_768x576 27 -#define SIS_RI_1360x1024 28 -#define SIS_RI_1680x1050 29 -#define SIS_RI_1280x800 30 -#define SIS_RI_1920x1080 31 -#define SIS_RI_960x540 32 -#define SIS_RI_960x600 33 - -#define SIS_VIDEO_CAPTURE 0x00 - 0x30 -#define SIS_VIDEO_PLAYBACK 0x02 - 0x30 -#define SIS_CRT2_PORT_04 0x04 - 0x30 - -int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo); -int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo); - -extern int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data); -extern int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 * data); -extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, - u8 index, u8 data); -extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, - u8 index, u8 * data); -extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, - u8 idx, u8 myand, u8 myor); -extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, - u8 index, u8 myor); -extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, - u8 idx, u8 myand); - -void sisusb_delete(struct kref *kref); -int sisusb_writeb(struct sisusb_usb_data *sisusb, u32 adr, u8 data); -int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 * data); -int sisusb_copy_memory(struct sisusb_usb_data *sisusb, u8 *src, - u32 dest, int length); -int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init); -int sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot, - u8 * arg, int cmapsz, int ch512, int dorecalc, - struct vc_data *c, int fh, int uplock); -void sisusb_set_cursor(struct sisusb_usb_data *sisusb, unsigned int location); -int sisusb_console_init(struct sisusb_usb_data *sisusb, int first, int last); -void sisusb_console_exit(struct sisusb_usb_data *sisusb); -void sisusb_init_concode(void); - -#endif From a2f3d83cd74eb7cfc69c92d086ec4509cd9c58fb Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Dec 2022 10:07:47 +0100 Subject: [PATCH 3501/4122] USB: sisusbvga: rename sisusb.c to sisusbvga.c As it's the only source for the sisusbvga module, there is no need for a 2-steps build. Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Winischhofer Cc: Greg Kroah-Hartman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221208090749.28056-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/Makefile | 2 -- drivers/usb/misc/sisusbvga/{sisusb.c => sisusbvga.c} | 0 2 files changed, 2 deletions(-) rename drivers/usb/misc/sisusbvga/{sisusb.c => sisusbvga.c} (100%) diff --git a/drivers/usb/misc/sisusbvga/Makefile b/drivers/usb/misc/sisusbvga/Makefile index 93265de80eb9..28aa1e6ef823 100644 --- a/drivers/usb/misc/sisusbvga/Makefile +++ b/drivers/usb/misc/sisusbvga/Makefile @@ -4,5 +4,3 @@ # obj-$(CONFIG_USB_SISUSBVGA) += sisusbvga.o - -sisusbvga-y := sisusb.o diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusbvga.c similarity index 100% rename from drivers/usb/misc/sisusbvga/sisusb.c rename to drivers/usb/misc/sisusbvga/sisusbvga.c From 4b6be020bd6b126112c06648de17ead360919ab4 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Dec 2022 10:07:48 +0100 Subject: [PATCH 3502/4122] USB: sisusbvga: use module_usb_driver() Now, that we only do usb_register() and usb_sisusb_exit() in module_init() and module_exit() respectivelly, we can simply use module_usb_driver(). Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Winischhofer Cc: Greg Kroah-Hartman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20221208090749.28056-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusbvga.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/usb/misc/sisusbvga/sisusbvga.c b/drivers/usb/misc/sisusbvga/sisusbvga.c index a0d5ba8058f8..654a79fd3231 100644 --- a/drivers/usb/misc/sisusbvga/sisusbvga.c +++ b/drivers/usb/misc/sisusbvga/sisusbvga.c @@ -2947,18 +2947,7 @@ static struct usb_driver sisusb_driver = { .id_table = sisusb_table, }; -static int __init usb_sisusb_init(void) -{ - return usb_register(&sisusb_driver); -} - -static void __exit usb_sisusb_exit(void) -{ - usb_deregister(&sisusb_driver); -} - -module_init(usb_sisusb_init); -module_exit(usb_sisusb_exit); +module_usb_driver(sisusb_driver); MODULE_AUTHOR("Thomas Winischhofer "); MODULE_DESCRIPTION("sisusbvga - Driver for Net2280/SiS315-based USB2VGA dongles"); From c35ca10f53c51eeb610d3f8fbc6dd6d511b58a58 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 8 Dec 2022 19:00:58 +0800 Subject: [PATCH 3503/4122] usb: storage: Add check for kcalloc As kcalloc may return NULL pointer, the return value should be checked and return error if fails as same as the ones in alauda_read_map. Fixes: e80b0fade09e ("[PATCH] USB Storage: add alauda support") Acked-by: Alan Stern Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20221208110058.12983-1-jiasheng@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/alauda.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c index 747be69e5e69..5e912dd29b4c 100644 --- a/drivers/usb/storage/alauda.c +++ b/drivers/usb/storage/alauda.c @@ -438,6 +438,8 @@ static int alauda_init_media(struct us_data *us) + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); + if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL) + return USB_STOR_TRANSPORT_ERROR; if (alauda_reset_media(us) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; From dc18a4c7b3bd447cef2395deeb1f6ac16dfaca0e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 3 Dec 2022 15:10:27 +0800 Subject: [PATCH 3504/4122] usb: typec: wusb3801: fix fwnode refcount leak in wusb3801_probe() I got the following report while doing fault injection test: OF: ERROR: memory leak, expected refcount 1 instead of 4, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /i2c/tcpc@60/connector If wusb3801_hw_init() fails, fwnode_handle_put() needs be called to avoid refcount leak. Fixes: d016cbe4d7ac ("usb: typec: Support the WUSB3801 port controller") Reviewed-by: Heikki Krogerus Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221203071027.3808308-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/wusb3801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/wusb3801.c b/drivers/usb/typec/wusb3801.c index 3cc7a15ecbd3..a43a18d4b02e 100644 --- a/drivers/usb/typec/wusb3801.c +++ b/drivers/usb/typec/wusb3801.c @@ -364,7 +364,7 @@ static int wusb3801_probe(struct i2c_client *client) /* Initialize the hardware with the devicetree settings. */ ret = wusb3801_hw_init(wusb3801); if (ret) - return ret; + goto err_put_connector; wusb3801->cap.revision = USB_TYPEC_REV_1_2; wusb3801->cap.accessory[0] = TYPEC_ACCESSORY_AUDIO; From 97a48da1619ba6bd42a0e5da0a03aa490a9496b1 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 6 Dec 2022 12:17:31 +0400 Subject: [PATCH 3505/4122] usb: dwc3: qcom: Fix memory leak in dwc3_qcom_interconnect_init of_icc_get() alloc resources for path handle, we should release it when not need anymore. Like the release in dwc3_qcom_interconnect_exit() function. Add icc_put() in error handling to fix this. Fixes: bea46b981515 ("usb: dwc3: qcom: Add interconnect support in dwc3 driver") Cc: stable Acked-by: Thinh Nguyen Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20221206081731.818107-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 7c40f3ffc054..b0a0351d2d8b 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -261,7 +261,8 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom) if (IS_ERR(qcom->icc_path_apps)) { dev_err(dev, "failed to get apps-usb path: %ld\n", PTR_ERR(qcom->icc_path_apps)); - return PTR_ERR(qcom->icc_path_apps); + ret = PTR_ERR(qcom->icc_path_apps); + goto put_path_ddr; } max_speed = usb_get_maximum_speed(&qcom->dwc3->dev); @@ -274,16 +275,22 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom) } if (ret) { dev_err(dev, "failed to set bandwidth for usb-ddr path: %d\n", ret); - return ret; + goto put_path_apps; } ret = icc_set_bw(qcom->icc_path_apps, APPS_USB_AVG_BW, APPS_USB_PEAK_BW); if (ret) { dev_err(dev, "failed to set bandwidth for apps-usb path: %d\n", ret); - return ret; + goto put_path_apps; } return 0; + +put_path_apps: + icc_put(qcom->icc_path_apps); +put_path_ddr: + icc_put(qcom->icc_path_ddr); + return ret; } /** From 4c92670b16727365699fe4b19ed32013bab2c107 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Tue, 6 Dec 2022 15:13:01 +0100 Subject: [PATCH 3506/4122] usb: gadget: uvc: Prevent buffer overflow in setup handler Setup function uvc_function_setup permits control transfer requests with up to 64 bytes of payload (UVC_MAX_REQUEST_SIZE), data stage handler for OUT transfer uses memcpy to copy req->actual bytes to uvc_event->data.data array of size 60. This may result in an overflow of 4 bytes. Fixes: cdda479f15cd ("USB gadget: video class function driver") Cc: stable Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Signed-off-by: Szymon Heidrich Link: https://lore.kernel.org/r/20221206141301.51305-1-szymon.heidrich@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 6e131624011a..32f2c1645467 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -213,8 +213,9 @@ uvc_function_ep0_complete(struct usb_ep *ep, struct usb_request *req) memset(&v4l2_event, 0, sizeof(v4l2_event)); v4l2_event.type = UVC_EVENT_DATA; - uvc_event->data.length = req->actual; - memcpy(&uvc_event->data.data, req->buf, req->actual); + uvc_event->data.length = min_t(unsigned int, req->actual, + sizeof(uvc_event->data.data)); + memcpy(&uvc_event->data.data, req->buf, uvc_event->data.length); v4l2_event_queue(&uvc->vdev, &v4l2_event); } } From ecec4b20d29c3d6922dafe7d2555254a454272d2 Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Fri, 25 Nov 2022 20:21:15 +0200 Subject: [PATCH 3507/4122] usb: musb: remove extra check in musb_gadget_vbus_draw The checks for musb->xceiv and musb->xceiv->set_power duplicate those in usb_phy_set_power(), so there is no need of them. Moreover, not calling usb_phy_set_power() results in usb_phy_set_charger_current() not being called, so current USB config max current is not propagated through USB charger framework and charger drivers may try to draw more current than allowed or possible. Fix that by removing those extra checks and calling usb_phy_set_power() directly. Tested on Motorola Droid4 and Nokia N900 Fixes: a9081a008f84 ("usb: phy: Add USB charger support") Cc: stable Signed-off-by: Ivaylo Dimitrov Link: https://lore.kernel.org/r/1669400475-4762-1-git-send-email-ivo.g.dimitrov.75@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6cb9514ef340..31c44325e828 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1630,8 +1630,6 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA) { struct musb *musb = gadget_to_musb(gadget); - if (!musb->xceiv || !musb->xceiv->set_power) - return -EOPNOTSUPP; return usb_phy_set_power(musb->xceiv, mA); } From 6f1f0ad910f73f5533b65e1748448d334e0ec697 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 25 Nov 2022 17:04:44 +0100 Subject: [PATCH 3508/4122] usb: gadget: udc: drop obsolete dependencies on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it is possible to test-build any driver which depends on OF on any architecture by explicitly selecting OF. Therefore depending on COMPILE_TEST as an alternative is no longer needed. It is actually better to always build such drivers with OF enabled, so that the test builds are closer to how each driver will actually be built on its intended target. Building them without OF may not test much as the compiler will optimize out potentially large parts of the code. In the worst case, this could even pop false positive warnings. Dropping COMPILE_TEST here improves the quality of our testing and avoids wasting time on non-existent issues. Cc: Greg Kroah-Hartman Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Cc: Michal Simek Acked-by: Nicolas Ferre Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/20221125170444.36620123@endymion.delvare Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 16243964b1cd..b3006d8b04ab 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -33,7 +33,7 @@ menu "USB Peripheral Controller" config USB_AT91 tristate "Atmel AT91 USB Device Port" depends on ARCH_AT91 - depends on OF || COMPILE_TEST + depends on OF help Many Atmel AT91 processors (such as the AT91RM2000) have a full speed USB Device Port with support for five configurable @@ -419,7 +419,7 @@ config USB_EG20T config USB_GADGET_XILINX tristate "Xilinx USB Driver" depends on HAS_DMA - depends on OF || COMPILE_TEST + depends on OF help USB peripheral controller driver for Xilinx USB2 device. Xilinx USB2 device is a soft IP which supports both full From 59d54aa09020fe52061d4cda51d474f5bd5e6be1 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 28 Nov 2022 17:23:04 +0100 Subject: [PATCH 3509/4122] usb: typec: tipd: Set mode of operation for USB Type-C connector Forward the mode of operation to the typec subsystem such that it can configure the mux correctly. Reviewed-by: Heikki Krogerus Signed-off-by: Sven Peter Link: https://lore.kernel.org/r/20221128162304.80125-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 982bd2cad931..46a4d8b128f0 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "tps6598x.h" @@ -257,6 +258,7 @@ static int tps6598x_connect(struct tps6598x *tps, u32 status) typec_set_orientation(tps->port, TYPEC_ORIENTATION_REVERSE); else typec_set_orientation(tps->port, TYPEC_ORIENTATION_NORMAL); + typec_set_mode(tps->port, TYPEC_STATE_USB); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), true); tps->partner = typec_register_partner(tps->port, &desc); @@ -280,6 +282,7 @@ static void tps6598x_disconnect(struct tps6598x *tps, u32 status) typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status)); typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status)); typec_set_orientation(tps->port, TYPEC_ORIENTATION_NONE); + typec_set_mode(tps->port, TYPEC_STATE_SAFE); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), false); power_supply_changed(tps->psy); From 0cd142b4665ee3133cd80539b5a430be5fd326c6 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Fri, 2 Dec 2022 09:21:26 +0800 Subject: [PATCH 3510/4122] usb: fotg210-udc: fix potential memory leak in fotg210_udc_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In fotg210_udc_probe(), if devm_clk_get() or clk_prepare_enable() fails, 'fotg210' will not be freed, which will lead to a memory leak. Fix it by moving kfree() to a proper location. In addition,we can use "return -ENOMEM" instead of "goto err" to simplify the code. Fixes: 718a38d092ec ("fotg210-udc: Handle PCLK") Reviewed-by: Andrzej Pietrasiewicz Reviewed-by: Linus Walleij Signed-off-by: Yi Yang Link: https://lore.kernel.org/r/20221202012126.246953-1-yiyang13@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index b9ea6c6d931c..66e1b7ee3346 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1163,12 +1163,10 @@ int fotg210_udc_probe(struct platform_device *pdev) return -ENODEV; } - ret = -ENOMEM; - /* initialize udc */ fotg210 = kzalloc(sizeof(struct fotg210_udc), GFP_KERNEL); if (fotg210 == NULL) - goto err; + return -ENOMEM; fotg210->dev = dev; @@ -1178,7 +1176,7 @@ int fotg210_udc_probe(struct platform_device *pdev) ret = clk_prepare_enable(fotg210->pclk); if (ret) { dev_err(dev, "failed to enable PCLK\n"); - return ret; + goto err; } } else if (PTR_ERR(fotg210->pclk) == -EPROBE_DEFER) { /* @@ -1302,8 +1300,7 @@ err_pclk: if (!IS_ERR(fotg210->pclk)) clk_disable_unprepare(fotg210->pclk); - kfree(fotg210); - err: + kfree(fotg210); return ret; } From 38cea8e31e9ef143187135d714aed4d7bd18463c Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 6 Dec 2022 13:52:23 +0800 Subject: [PATCH 3511/4122] dt-bindings: vendor-prefixes: add Genesys Logic Genesys Logic, Inc. is a manufacturer for interface chips, especially USB hubs. https://www.genesyslogic.com.tw/ Signed-off-by: Icenowy Zheng Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221206055228.306074-2-uwu@icenowy.me Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 6e323a380294..43359c0ccaf5 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -488,6 +488,8 @@ patternProperties: description: GE Fanuc Intelligent Platforms Embedded Systems, Inc. "^gemei,.*": description: Gemei Digital Technology Co., Ltd. + "^genesys,.*": + description: Genesys Logic, Inc. "^geniatech,.*": description: Geniatech, Inc. "^giantec,.*": From 4e3a4fcd871274c0233ea498c685b118a21ff3d0 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 6 Dec 2022 13:52:24 +0800 Subject: [PATCH 3512/4122] dt-bindings: usb: Add binding for Genesys Logic GL850G hub controller The Genesys Logic GL850G is a USB 2.0 Single TT hub controller that features 4 downstream ports, an internal 5V-to-3.3V LDO regulator (can be bypassed) and an external reset pin. Add a device tree binding for its USB protocol part. The internal LDO is not covered by this and can just be modelled as a fixed regulator. Signed-off-by: Icenowy Zheng Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221206055228.306074-3-uwu@icenowy.me Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/genesys,gl850g.yaml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Documentation/devicetree/bindings/usb/genesys,gl850g.yaml diff --git a/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml new file mode 100644 index 000000000000..a9f831448cca --- /dev/null +++ b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/genesys,gl850g.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Genesys Logic GL850G USB 2.0 hub controller + +maintainers: + - Icenowy Zheng + +allOf: + - $ref: usb-device.yaml# + +properties: + compatible: + enum: + - usb5e3,608 + + reg: true + + reset-gpios: + description: GPIO controlling the RESET# pin. + + vdd-supply: + description: + the regulator that provides 3.3V core power to the hub. + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + usb { + dr_mode = "host"; + #address-cells = <1>; + #size-cells = <0>; + + hub: hub@1 { + compatible = "usb5e3,608"; + reg = <1>; + reset-gpios = <&pio 7 2 GPIO_ACTIVE_LOW>; + }; + }; From 9bae996ffa28ac03b6d95382a2a082eb219e745a Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 6 Dec 2022 13:52:25 +0800 Subject: [PATCH 3513/4122] usb: misc: onboard_usb_hub: add Genesys Logic GL850G hub support Genesys Logic GL850G is a 4-port USB 2.0 STT hub that has a reset pin to toggle and a 3.3V core supply exported (although an integrated LDO is available for powering it with 5V). Add the support for this hub, for controlling the reset pin and the core power supply. Signed-off-by: Icenowy Zheng Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20221206055228.306074-4-uwu@icenowy.me Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 2 ++ drivers/usb/misc/onboard_usb_hub.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index d63c63942af1..94e7966e199d 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -331,6 +331,7 @@ static struct platform_driver onboard_hub_driver = { /************************** USB driver **************************/ +#define VENDOR_ID_GENESYS 0x05e3 #define VENDOR_ID_MICROCHIP 0x0424 #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_TI 0x0451 @@ -407,6 +408,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev) } static const struct usb_device_id onboard_hub_id_table[] = { + { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 34beab8bce3d..62129a6a1ba5 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -22,10 +22,15 @@ static const struct onboard_hub_pdata ti_tusb8041_data = { .reset_us = 3000, }; +static const struct onboard_hub_pdata genesys_gl850g_data = { + .reset_us = 3, +}; + static const struct of_device_id onboard_hub_match[] = { { .compatible = "usb424,2514", .data = µchip_usb424_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, + { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, { .compatible = "usbbda,411", .data = &realtek_rts5411_data, }, { .compatible = "usbbda,5411", .data = &realtek_rts5411_data, }, { .compatible = "usbbda,414", .data = &realtek_rts5411_data, }, From 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Mon, 5 Dec 2022 21:15:26 +0100 Subject: [PATCH 3514/4122] usb: ulpi: defer ulpi_register on ulpi_read_id timeout Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") Dual Role support on Intel Merrifield platform broke due to rearranging the call to dwc3_get_extcon(). It appears to be caused by ulpi_read_id() on the first test write failing with -ETIMEDOUT. Currently ulpi_read_id() expects to discover the phy via DT when the test write fails and returns 0 in that case, even if DT does not provide the phy. As a result usb probe completes without phy. Make ulpi_read_id() return -ETIMEDOUT to its user if the first test write fails. The user should then handle it appropriately. A follow up patch will make dwc3_core_init() set -EPROBE_DEFER in this case and bail out. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221205201527.13525-2-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index d7c8461976ce..60e8174686a1 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - goto err; + return ret; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From 63130462c919ece0ad0d9bb5a1f795ef8d79687e Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Mon, 5 Dec 2022 21:15:27 +0100 Subject: [PATCH 3515/4122] usb: dwc3: core: defer probe on ulpi_read_id timeout Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present"), Dual Role support on Intel Merrifield platform broke due to rearranging the call to dwc3_get_extcon(). It appears to be caused by ulpi_read_id() masking the timeout on the first test write. In the past dwc3 probe continued by calling dwc3_core_soft_reset() followed by dwc3_get_extcon() which happend to return -EPROBE_DEFER. On deferred probe ulpi_read_id() finally succeeded. Due to above mentioned rearranging -EPROBE_DEFER is not returned and probe completes without phy. On Intel Merrifield the timeout on the first test write issue is reproducible but it is difficult to find the root cause. Using a mainline kernel and rootfs with buildroot ulpi_read_id() succeeds. As soon as adding ftrace / bootconfig to find out why, ulpi_read_id() fails and we can't analyze the flow. Using another rootfs ulpi_read_id() fails even without adding ftrace. We suspect the issue is some kind of timing / race, but merely retrying ulpi_read_id() does not resolve the issue. As we now changed ulpi_read_id() to return -ETIMEDOUT in this case, we need to handle the error by calling dwc3_core_soft_reset() and request -EPROBE_DEFER. On deferred probe ulpi_read_id() is retried and succeeds. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Cc: stable@vger.kernel.org Acked-by: Thinh Nguyen Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221205201527.13525-3-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index fc38a8b13efa..476b63618511 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1100,8 +1100,13 @@ static int dwc3_core_init(struct dwc3 *dwc) if (!dwc->ulpi_ready) { ret = dwc3_core_ulpi_init(dwc); - if (ret) + if (ret) { + if (ret == -ETIMEDOUT) { + dwc3_core_soft_reset(dwc); + ret = -EPROBE_DEFER; + } goto err0; + } dwc->ulpi_ready = true; } From 2a81a7aa420b80865fdd82ec383fe365e18f922b Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sun, 27 Nov 2022 12:41:36 +0100 Subject: [PATCH 3516/4122] dt-bindings: usb: mtk-xhci: add support for mt7986 Add compatible string for mt7986. Signed-off-by: Frank Wunderlich Acked-by: Krzysztof Kozlowski Reviewed-by: Chunfeng Yun Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221127114142.156573-3-linux@fw-web.de Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml index 939623867a64..a3c37944c630 100644 --- a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml +++ b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml @@ -28,6 +28,7 @@ properties: - mediatek,mt7622-xhci - mediatek,mt7623-xhci - mediatek,mt7629-xhci + - mediatek,mt7986-xhci - mediatek,mt8173-xhci - mediatek,mt8183-xhci - mediatek,mt8186-xhci From 42a317d076b58f08413219b1679d211783c2e5f3 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Wed, 7 Dec 2022 14:19:16 +0100 Subject: [PATCH 3517/4122] usb: dwc2: disable lpm feature on Rockchip SoCs LPM feature of DWC2 module integrated in Rockchip SoCs doesn't work properly or needs some additional handling, so disable it for now. Without disabling LPM feature, the USB ADB communication fail with the following error log: dwc2 ff580000.usb: new address 27 dwc2 ff580000.usb: Failed to exit L1 sleep state in 200us. dwc2 ff580000.usb: dwc2_hsotg_send_reply: cannot queue req dwc2 ff580000.usb: dwc2_hsotg_process_req_status: failed to send reply dwc2 ff580000.usb: dwc2_hsotg_enqueue_setup: failed queue (-11) dwc2 ff580000.usb: Failed to exit L1 sleep state in 200us. [diff vs vendor kernel: added lpm_clock_gating, besl and hird_threshold_en settings as seen in commit 53febc956900 ("usb: dwc2: disable Link Power Management on STM32MP15 HS OTG")] Signed-off-by: William Wu Signed-off-by: Frank Wang Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-1-36515e1092cd@theobroma-systems.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/params.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 8eab5f38b110..9ed9fd956940 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -113,6 +113,10 @@ static void dwc2_set_rk_params(struct dwc2_hsotg *hsotg) p->ahbcfg = GAHBCFG_HBSTLEN_INCR16 << GAHBCFG_HBSTLEN_SHIFT; p->power_down = DWC2_POWER_DOWN_PARAM_NONE; + p->lpm = false; + p->lpm_clock_gating = false; + p->besl = false; + p->hird_threshold_en = false; } static void dwc2_set_ltq_params(struct dwc2_hsotg *hsotg) From ade23d7b7ec5c38bd43ec44ccb753cb7ea8ac08a Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Wed, 7 Dec 2022 14:19:17 +0100 Subject: [PATCH 3518/4122] usb: dwc2: power on/off phy for peripheral mode in dual-role mode The PHY power is handled for peripheral mode but only when the device is forced into this peripheral mode. It is missing when the device is operating in peripheral mode when dual-role mode is enabled, so let's update the condition to match this scenario. Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-2-36515e1092cd@theobroma-systems.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 6 ++++-- drivers/usb/dwc2/platform.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 8b15742d9e8a..62fa6378d2d7 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4549,7 +4549,8 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget, hsotg->gadget.dev.of_node = hsotg->dev->of_node; hsotg->gadget.speed = USB_SPEED_UNKNOWN; - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) { + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || + (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) { ret = dwc2_lowlevel_hw_enable(hsotg); if (ret) goto err; @@ -4611,7 +4612,8 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) if (!IS_ERR_OR_NULL(hsotg->uphy)) otg_set_peripheral(hsotg->uphy->otg, NULL); - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || + (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) dwc2_lowlevel_hw_disable(hsotg); return 0; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 262c13b6362a..23ef75996823 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -576,7 +576,8 @@ static int dwc2_driver_probe(struct platform_device *dev) dwc2_debugfs_init(hsotg); /* Gadget code manages lowlevel hw on its own */ - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || + (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) dwc2_lowlevel_hw_disable(hsotg); #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \ From 0bf588274f73b29d7058042a6b7cc2b764502cc1 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 23 Nov 2022 17:51:21 +0100 Subject: [PATCH 3519/4122] Fix path in cifs/usage.rst /sys/module/... not /proc/module/... Signed-off-by: Volker Lendecke Signed-off-by: Steve French --- Documentation/admin-guide/cifs/usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index 3766bf8a1c20..ed3b8dc854ec 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -858,7 +858,7 @@ CIFS kernel module parameters These module parameters can be specified or modified either during the time of module loading or during the runtime by using the interface:: - /proc/module/cifs/parameters/ + /sys/module/cifs/parameters/ i.e.:: From 83fb8abec29383eb0cf35495d21669e38548771b Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 25 Nov 2022 12:26:00 +0100 Subject: [PATCH 3520/4122] cifs: Add "extbuf" and "extbuflen" args to smb2_compound_op() Will carry the variable-sized reply from SMB_FIND_FILE_POSIX_INFO Signed-off-by: Volker Lendecke Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 68e08c85fbb8..1be86ba950b3 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -59,6 +59,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, __u32 desired_access, __u32 create_disposition, __u32 create_options, umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile, + __u8 **extbuf, size_t *extbuflen, struct kvec *err_iov, int *err_buftype) { struct cop_vars *vars = NULL; @@ -539,7 +540,7 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_QUERY_INFO, cfile, - err_iov, err_buftype); + NULL, NULL, err_iov, err_buftype); if (rc == -EOPNOTSUPP) { if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE && @@ -555,7 +556,7 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_QUERY_INFO, cfile, NULL, NULL); + SMB2_OP_QUERY_INFO, cfile, NULL, NULL, NULL, NULL); } out: @@ -589,7 +590,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile, - err_iov, err_buftype); + NULL, NULL, err_iov, err_buftype); if (rc == -EOPNOTSUPP) { /* BB TODO: When support for special files added to Samba re-verify this path */ if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && @@ -606,7 +607,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL); + SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL, NULL, NULL); } out: @@ -624,7 +625,7 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); } void @@ -646,7 +647,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, - &data, SMB2_OP_SET_INFO, cfile, NULL, NULL); + &data, SMB2_OP_SET_INFO, cfile, NULL, NULL, NULL, NULL); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } @@ -658,7 +659,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, drop_cached_dir_by_name(xid, tcon, name, cifs_sb); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE, - NULL, SMB2_OP_RMDIR, NULL, NULL, NULL); + NULL, SMB2_OP_RMDIR, NULL, NULL, NULL, NULL, NULL); } int @@ -667,7 +668,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL); + ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL, NULL, NULL); } static int @@ -686,7 +687,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, } rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name, - command, cfile, NULL, NULL); + command, cfile, NULL, NULL, NULL, NULL); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -727,7 +728,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, - &eof, SMB2_OP_SET_EOF, cfile, NULL, NULL); + &eof, SMB2_OP_SET_EOF, cfile, NULL, NULL, NULL, NULL); } int @@ -754,7 +755,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile, - NULL, NULL); + NULL, NULL, NULL, NULL); cifs_put_tlink(tlink); return rc; } From 64ce47cb1b29d7d6aab6dcc287ae1fddb4876bd5 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 25 Nov 2022 12:37:44 +0100 Subject: [PATCH 3521/4122] cifs: Parse owner/group for stat in smb311 posix extensions stat was returning default owner and group (unlike readdir) for SMB3.1.1 POSIX extensions Signed-off-by: Volker Lendecke Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/inode.c | 13 ++++++++---- fs/cifs/smb2inode.c | 49 ++++++++++++++++++++++++++++++++++++++++++--- fs/cifs/smb2proto.h | 5 ++++- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4e2ca3c6e5c0..286a5400b94e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -632,6 +632,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from POSIX info struct */ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, struct super_block *sb, bool adjust_tz, bool symlink) { struct smb311_posix_qinfo *info = &data->posix_fi; @@ -680,8 +682,8 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_ope } /* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */ - fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */ - fattr->cf_gid = cifs_sb->ctx->linux_gid; + sid_to_id(cifs_sb, owner, fattr, SIDOWNER); + sid_to_id(cifs_sb, group, fattr, SIDGROUP); cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n", fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); @@ -1175,6 +1177,7 @@ smb311_posix_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {0}; bool symlink = false; struct cifs_open_info_data data = {}; + struct cifs_sid owner, group; int rc = 0; int tmprc = 0; @@ -1192,7 +1195,8 @@ smb311_posix_get_inode_info(struct inode **inode, goto out; } - rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, &adjust_tz, + rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, + &owner, &group, &adjust_tz, &symlink); /* @@ -1201,7 +1205,8 @@ smb311_posix_get_inode_info(struct inode **inode, switch (rc) { case 0: - smb311_posix_info_to_fattr(&fattr, &data, sb, adjust_tz, symlink); + smb311_posix_info_to_fattr(&fattr, &data, &owner, &group, + sb, adjust_tz, symlink); break; case -EREMOTE: /* DFS link, no metadata available on this server */ diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1be86ba950b3..fbd46db1023a 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -431,6 +431,21 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, &rsp_iov[1], sizeof(idata->posix_fi) /* add SIDs */, (char *)&idata->posix_fi); } + if (rc == 0) { + unsigned int length = le32_to_cpu(qi_rsp->OutputBufferLength); + + if (length > sizeof(idata->posix_fi)) { + char *base = (char *)rsp_iov[1].iov_base + + le16_to_cpu(qi_rsp->OutputBufferOffset) + + sizeof(idata->posix_fi); + *extbuflen = length - sizeof(idata->posix_fi); + *extbuf = kmemdup(base, *extbuflen, GFP_KERNEL); + if (!*extbuf) + rc = -ENOMEM; + } else { + rc = -EINVAL; + } + } if (rqst[1].rq_iov) SMB2_query_info_free(&rqst[1]); if (rqst[2].rq_iov) @@ -569,13 +584,20 @@ out: int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse) + struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, + bool *adjust_tz, bool *reparse) { int rc; __u32 create_options = 0; struct cifsFileInfo *cfile; struct kvec err_iov[3] = {}; int err_buftype[3] = {}; + __u8 *sidsbuf = NULL; + __u8 *sidsbuf_end = NULL; + size_t sidsbuflen = 0; + size_t owner_len, group_len; *adjust_tz = false; *reparse = false; @@ -590,7 +612,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile, - NULL, NULL, err_iov, err_buftype); + &sidsbuf, &sidsbuflen, err_iov, err_buftype); if (rc == -EOPNOTSUPP) { /* BB TODO: When support for special files added to Samba re-verify this path */ if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && @@ -607,10 +629,31 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL, NULL, NULL); + SMB2_OP_POSIX_QUERY_INFO, cfile, + &sidsbuf, &sidsbuflen, NULL, NULL); + } + + if (rc == 0) { + sidsbuf_end = sidsbuf + sidsbuflen; + + owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end); + if (owner_len == -1) { + rc = -EINVAL; + goto out; + } + memcpy(owner, sidsbuf, owner_len); + + group_len = posix_info_sid_size( + sidsbuf + owner_len, sidsbuf_end); + if (group_len == -1) { + rc = -EINVAL; + goto out; + } + memcpy(group, sidsbuf + owner_len, group_len); } out: + kfree(sidsbuf); free_rsp_buf(err_buftype[0], err_iov[0].iov_base); free_rsp_buf(err_buftype[1], err_iov[1].iov_base); free_rsp_buf(err_buftype[2], err_iov[2].iov_base); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index be21b5d26f67..d5d7ffb7711c 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -277,7 +277,10 @@ extern int smb2_query_info_compound(const unsigned int xid, /* query path info from the server using SMB311 POSIX extensions*/ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse); + struct cifs_open_info_data *data, + struct cifs_sid *owner, + struct cifs_sid *group, + bool *adjust_tz, bool *reparse); int posix_info_parse(const void *beg, const void *end, struct smb2_posix_info_parsed *out); int posix_info_sid_size(const void *beg, const void *end); From 9381666e289852f93be9d7f4f7844017e04f6315 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 14:18:33 +0100 Subject: [PATCH 3522/4122] cifs: wire up >migrate_folio CIFS does not use page private data that needs migration, so it can just wire up filemap_migrate_folio. This prepares for removing ->writepage, which is used as a fallback if no migrate_folio method is set. Signed-off-by: Christoph Hellwig Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd9698209930..6be924caed39 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -5240,10 +5240,10 @@ const struct address_space_operations cifs_addr_ops = { .direct_IO = cifs_direct_io, .invalidate_folio = cifs_invalidate_folio, .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, /* - * TODO: investigate and if useful we could add an cifs_migratePage - * helper (under an CONFIG_MIGRATION) in the future, and also - * investigate and add an is_dirty_writeback helper if needed + * TODO: investigate and if useful we could add an is_dirty_writeback + * helper if needed */ .swap_activate = cifs_swap_activate, .swap_deactivate = cifs_swap_deactivate, @@ -5264,4 +5264,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .release_folio = cifs_release_folio, .invalidate_folio = cifs_invalidate_folio, .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, }; From bff9018d3a52c45711bd63c446a2c80c0275e935 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 14:18:34 +0100 Subject: [PATCH 3523/4122] cifs: stop using generic_writepages generic_writepages is just a wrapper that calls ->writepages on a range, and thus in the way of eventually removing ->writepage. Switch cifs to just open code it in preparation of removing ->writepage. [note: I suspect just integrating the small wsize case with the rest of the writeback code might be a better idea here, but that needs someone more familiar with the code] Signed-off-by: Christoph Hellwig Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/file.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6be924caed39..ec14e38411a1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2646,6 +2646,21 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, return rc; } +static int +cifs_writepage_locked(struct page *page, struct writeback_control *wbc); + +static int cifs_write_one_page(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret; + + ret = cifs_writepage_locked(page, wbc); + unlock_page(page); + mapping_set_error(mapping, ret); + return ret; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -2662,10 +2677,11 @@ static int cifs_writepages(struct address_space *mapping, /* * If wsize is smaller than the page cache size, default to writing - * one page at a time via cifs_writepage + * one page at a time. */ if (cifs_sb->ctx->wsize < PAGE_SIZE) - return generic_writepages(mapping, wbc); + return write_cache_pages(mapping, wbc, cifs_write_one_page, + mapping); xid = get_xid(); if (wbc->range_cyclic) { From ebaad77c89921c8237ca17791d5462bd289052d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 14:18:35 +0100 Subject: [PATCH 3524/4122] cifs: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Now that cifs implements ->migrate_folio and doesn't call generic_writepages, the writepage method can be removed. Signed-off-by: Christoph Hellwig Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/file.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ec14e38411a1..6701257541ab 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2868,13 +2868,6 @@ retry_write: return rc; } -static int cifs_writepage(struct page *page, struct writeback_control *wbc) -{ - int rc = cifs_writepage_locked(page, wbc); - unlock_page(page); - return rc; -} - static int cifs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -5247,7 +5240,6 @@ static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) const struct address_space_operations cifs_addr_ops = { .read_folio = cifs_read_folio, .readahead = cifs_readahead, - .writepage = cifs_writepage, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, @@ -5272,7 +5264,6 @@ const struct address_space_operations cifs_addr_ops = { */ const struct address_space_operations cifs_addr_ops_smallbuf = { .read_folio = cifs_read_folio, - .writepage = cifs_writepage, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, From d406d26745aba3365ab9171b2d5cbea9c1757305 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 5 Dec 2022 23:31:53 -0300 Subject: [PATCH 3525/4122] cifs: skip alloc when request has no pages When smb3_init_transform_rq() was being called with requests (@old_rq) which had no pages, it was unnecessarily allocating a single page for every request in @new_rq. Fix this by skipping page array allocation when requests have no pages (e.g. !smb_rqst::rq_npages). Also get rid of deprecated kmap() and use kmap_local_page() instead while we're at it. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bfaafd02fb1f..72b22d033ed5 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4445,21 +4445,27 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, int rc = -ENOMEM; for (i = 1; i < num_rqst; i++) { - npages = old_rq[i - 1].rq_npages; + struct smb_rqst *old = &old_rq[i - 1]; + struct smb_rqst *new = &new_rq[i]; + + orig_len += smb_rqst_len(server, old); + new->rq_iov = old->rq_iov; + new->rq_nvec = old->rq_nvec; + + npages = old->rq_npages; + if (!npages) + continue; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!pages) goto err_free; - new_rq[i].rq_pages = pages; - new_rq[i].rq_npages = npages; - new_rq[i].rq_offset = old_rq[i - 1].rq_offset; - new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz; - new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz; - new_rq[i].rq_iov = old_rq[i - 1].rq_iov; - new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec; - - orig_len += smb_rqst_len(server, &old_rq[i - 1]); + new->rq_pages = pages; + new->rq_npages = npages; + new->rq_offset = old->rq_offset; + new->rq_pagesz = old->rq_pagesz; + new->rq_tailsz = old->rq_tailsz; for (j = 0; j < npages; j++) { pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); @@ -4472,14 +4478,14 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, char *dst, *src; unsigned int offset, len; - rqst_page_get_length(&new_rq[i], j, &len, &offset); + rqst_page_get_length(new, j, &len, &offset); - dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset; - src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset; + dst = kmap_local_page(new->rq_pages[j]) + offset; + src = kmap_local_page(old->rq_pages[j]) + offset; memcpy(dst, src, len); - kunmap(new_rq[i].rq_pages[j]); - kunmap(old_rq[i - 1].rq_pages[j]); + kunmap(new->rq_pages[j]); + kunmap(old->rq_pages[j]); } } From 81c25247a2a03a0f97e4805d7aff7541ccff6baa Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 6 Dec 2022 16:12:03 +0000 Subject: [PATCH 3526/4122] usb: gadget: uvc: Rename bmInterfaceFlags -> bmInterlaceFlags In the specification documents for the Uncompressed and MJPEG USB Video Payloads, the field name is bmInterlaceFlags - it has been misnamed within the kernel. Although renaming the field does break the kernel's interface to userspace it should be low-risk in this instance. The field is read only and hardcoded to 0, so there was never any value in anyone reading it. A search of the uvc-gadget application and all the forks that I could find for it did not reveal any users either. Fixes: cdda479f15cd ("USB gadget: video class function driver") Reviewed-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20221206161203.1562827-1-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/configfs-usb-gadget-uvc | 4 ++-- drivers/usb/gadget/function/uvc_configfs.c | 12 ++++++------ drivers/usb/gadget/legacy/webcam.c | 4 ++-- include/uapi/linux/usb/video.h | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index 611b23e6488d..f00cff6d8c5c 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -197,7 +197,7 @@ Description: Specific MJPEG format descriptors read-only bmaControls this format's data for bmaControls in the streaming header - bmInterfaceFlags specifies interlace information, + bmInterlaceFlags specifies interlace information, read-only bAspectRatioY the X dimension of the picture aspect ratio, read-only @@ -253,7 +253,7 @@ Description: Specific uncompressed format descriptors read-only bmaControls this format's data for bmaControls in the streaming header - bmInterfaceFlags specifies interlace information, + bmInterlaceFlags specifies interlace information, read-only bAspectRatioY the X dimension of the picture aspect ratio, read-only diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 4303a3283ba0..76cb60d13049 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -1512,7 +1512,7 @@ UVCG_UNCOMPRESSED_ATTR(b_bits_per_pixel, bBitsPerPixel, 8); UVCG_UNCOMPRESSED_ATTR(b_default_frame_index, bDefaultFrameIndex, 8); UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8); UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8); -UVCG_UNCOMPRESSED_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8); +UVCG_UNCOMPRESSED_ATTR_RO(bm_interlace_flags, bmInterlaceFlags, 8); #undef UVCG_UNCOMPRESSED_ATTR #undef UVCG_UNCOMPRESSED_ATTR_RO @@ -1541,7 +1541,7 @@ static struct configfs_attribute *uvcg_uncompressed_attrs[] = { &uvcg_uncompressed_attr_b_default_frame_index, &uvcg_uncompressed_attr_b_aspect_ratio_x, &uvcg_uncompressed_attr_b_aspect_ratio_y, - &uvcg_uncompressed_attr_bm_interface_flags, + &uvcg_uncompressed_attr_bm_interlace_flags, &uvcg_uncompressed_attr_bma_controls, NULL, }; @@ -1574,7 +1574,7 @@ static struct config_group *uvcg_uncompressed_make(struct config_group *group, h->desc.bDefaultFrameIndex = 1; h->desc.bAspectRatioX = 0; h->desc.bAspectRatioY = 0; - h->desc.bmInterfaceFlags = 0; + h->desc.bmInterlaceFlags = 0; h->desc.bCopyProtect = 0; INIT_LIST_HEAD(&h->fmt.frames); @@ -1700,7 +1700,7 @@ UVCG_MJPEG_ATTR(b_default_frame_index, bDefaultFrameIndex, 8); UVCG_MJPEG_ATTR_RO(bm_flags, bmFlags, 8); UVCG_MJPEG_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8); UVCG_MJPEG_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8); -UVCG_MJPEG_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8); +UVCG_MJPEG_ATTR_RO(bm_interlace_flags, bmInterlaceFlags, 8); #undef UVCG_MJPEG_ATTR #undef UVCG_MJPEG_ATTR_RO @@ -1728,7 +1728,7 @@ static struct configfs_attribute *uvcg_mjpeg_attrs[] = { &uvcg_mjpeg_attr_bm_flags, &uvcg_mjpeg_attr_b_aspect_ratio_x, &uvcg_mjpeg_attr_b_aspect_ratio_y, - &uvcg_mjpeg_attr_bm_interface_flags, + &uvcg_mjpeg_attr_bm_interlace_flags, &uvcg_mjpeg_attr_bma_controls, NULL, }; @@ -1755,7 +1755,7 @@ static struct config_group *uvcg_mjpeg_make(struct config_group *group, h->desc.bDefaultFrameIndex = 1; h->desc.bAspectRatioX = 0; h->desc.bAspectRatioY = 0; - h->desc.bmInterfaceFlags = 0; + h->desc.bmInterlaceFlags = 0; h->desc.bCopyProtect = 0; INIT_LIST_HEAD(&h->fmt.frames); diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c index 94e22867da1d..53e38f87472b 100644 --- a/drivers/usb/gadget/legacy/webcam.c +++ b/drivers/usb/gadget/legacy/webcam.c @@ -171,7 +171,7 @@ static const struct uvc_format_uncompressed uvc_format_yuv = { .bDefaultFrameIndex = 1, .bAspectRatioX = 0, .bAspectRatioY = 0, - .bmInterfaceFlags = 0, + .bmInterlaceFlags = 0, .bCopyProtect = 0, }; @@ -222,7 +222,7 @@ static const struct uvc_format_mjpeg uvc_format_mjpg = { .bDefaultFrameIndex = 1, .bAspectRatioX = 0, .bAspectRatioY = 0, - .bmInterfaceFlags = 0, + .bmInterlaceFlags = 0, .bCopyProtect = 0, }; diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h index bfdae12cdacf..6e8e572c2980 100644 --- a/include/uapi/linux/usb/video.h +++ b/include/uapi/linux/usb/video.h @@ -466,7 +466,7 @@ struct uvc_format_uncompressed { __u8 bDefaultFrameIndex; __u8 bAspectRatioX; __u8 bAspectRatioY; - __u8 bmInterfaceFlags; + __u8 bmInterlaceFlags; __u8 bCopyProtect; } __attribute__((__packed__)); @@ -522,7 +522,7 @@ struct uvc_format_mjpeg { __u8 bDefaultFrameIndex; __u8 bAspectRatioX; __u8 bAspectRatioY; - __u8 bmInterfaceFlags; + __u8 bmInterlaceFlags; __u8 bCopyProtect; } __attribute__((__packed__)); From c7912f27dedd874d49eadf78b5b6fbfdec52c7c3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Dec 2022 09:33:31 +0000 Subject: [PATCH 3527/4122] staging: rtl8192e: Fix spelling mistake "ContryIE" -> "CountryIE" There is a spelling mistake in a netdev_info message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20221207093331.2280355-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c index f8965afab767..669e74a67190 100644 --- a/drivers/staging/rtl8192e/rtllib_rx.c +++ b/drivers/staging/rtl8192e/rtllib_rx.c @@ -1776,7 +1776,7 @@ static inline void rtllib_extract_country_ie( if (rtllib_act_scanning(ieee, false) && ieee->FirstIe_InScan) netdev_info(ieee->dev, - "Received beacon ContryIE, SSID: <%s>\n", + "Received beacon CountryIE, SSID: <%s>\n", network->ssid); dot11d_update_country(ieee, addr2, info_element->len, From 37aa6b98237082a4bf882b4d986329b94ebd447d Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 6 Dec 2022 09:50:33 +0800 Subject: [PATCH 3528/4122] vme: Use root_device_register() not underlined version root_device_register and __root_device_register have exactly same effect. Use the not underlined version to keep usage consistency. Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221206015033.125827-1-chenzhongjin@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme_fake.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index 6454ccf6e5b4..f5d2c345978a 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -1069,7 +1069,7 @@ static int __init fake_init(void) struct vme_lm_resource *lm; /* We need a fake parent device */ - vme_root = __root_device_register("vme", THIS_MODULE); + vme_root = root_device_register("vme"); if (IS_ERR(vme_root)) return PTR_ERR(vme_root); From 52f31ed228212ba572c44e15e818a3a5c74122c0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 8 Dec 2022 08:29:22 -0800 Subject: [PATCH 3529/4122] xfs: dquot shrinker doesn't check for XFS_DQFLAG_FREEING Resulting in a UAF if the shrinker races with some other dquot freeing mechanism that sets XFS_DQFLAG_FREEING before the dquot is removed from the LRU. This can occur if a dquot purge races with drop_caches. Reported-by: syzbot+912776840162c13db1a3@syzkaller.appspotmail.com Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_qm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 18bb4ec4d7c9..ff53d40a2dae 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -422,6 +422,14 @@ xfs_qm_dquot_isolate( if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; + /* + * If something else is freeing this dquot and hasn't yet removed it + * from the LRU, leave it for the freeing task to complete the freeing + * process rather than risk it being free from under us here. + */ + if (dqp->q_flags & XFS_DQFLAG_FREEING) + goto out_miss_unlock; + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. @@ -441,10 +449,8 @@ xfs_qm_dquot_isolate( * skip it so there is time for the IO to complete before we try to * reclaim it again on the next LRU pass. */ - if (!xfs_dqflock_nowait(dqp)) { - xfs_dqunlock(dqp); - goto out_miss_busy; - } + if (!xfs_dqflock_nowait(dqp)) + goto out_miss_unlock; if (XFS_DQ_IS_DIRTY(dqp)) { struct xfs_buf *bp = NULL; @@ -478,6 +484,8 @@ xfs_qm_dquot_isolate( XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); return LRU_REMOVED; +out_miss_unlock: + xfs_dqunlock(dqp); out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); From 7dfb216eda99bbfc2a8c3b03d2eec63314f52b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Thu, 8 Dec 2022 23:40:16 +0100 Subject: [PATCH 3530/4122] ACPICA: Fix operand resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In our tests we get UBSAN warning coming from ACPI parser. This is caused by trying to resolve operands when there is none. [ 0.000000] Linux version 5.15.0-rc3chromeavsrel1.0.184+ (root@...) (gcc (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #1 SMP PREEMPT Sat Oct 16 00:08:27 UTC 2021 ... [ 14.719508] ================================================================================ [ 14.719551] UBSAN: array-index-out-of-bounds in /.../linux/drivers/acpi/acpica/dswexec.c:401:12 [ 14.719594] index -1 is out of range for type 'acpi_operand_object *[9]' [ 14.719621] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc3chromeavsrel1.0.184+ #1 [ 14.719657] Hardware name: Intel Corp. Geminilake/GLK RVP2 LP4SD (07), BIOS GELKRVPA.X64.0214.B50.2009111159 09/11/2020 [ 14.719694] Call Trace: [ 14.719712] dump_stack_lvl+0x38/0x49 [ 14.719749] dump_stack+0x10/0x12 [ 14.719775] ubsan_epilogue+0x9/0x45 [ 14.719801] __ubsan_handle_out_of_bounds.cold+0x44/0x49 [ 14.719835] acpi_ds_exec_end_op+0x1d7/0x6b5 [ 14.719870] acpi_ps_parse_loop+0x942/0xb34 ... Problem happens because WalkState->NumOperands is 0 and it is used when trying to access into operands table. Actual code is: WalkState->Operands [WalkState->NumOperands -1] which causes out of bound access. Improve the check before above access to check if ACPI opcode should have any arguments (operands) at all. Link: https://github.com/acpica/acpica/pull/745 Signed-off-by: Amadeusz Sławiński Reviewed-by: Cezary Rojewski Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dswexec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c index e8ad41387f84..b082eb942a0f 100644 --- a/drivers/acpi/acpica/dswexec.c +++ b/drivers/acpi/acpica/dswexec.c @@ -389,9 +389,11 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) /* * All opcodes require operand resolution, with the only exceptions - * being the object_type and size_of operators. + * being the object_type and size_of operators as well as opcodes that + * take no arguments. */ - if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE)) { + if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE) && + (walk_state->op_info->flags & AML_HAS_ARGS)) { /* Resolve all operands */ From 7a9d74e7e403cb2e60d4d00c05f2f3ab2a33d0c3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 8 Dec 2022 15:23:32 +0100 Subject: [PATCH 3531/4122] ACPICA: include/acpi/acpixf.h: Fix indentation A bunch of the functions declared in include/acpi/acpixf.h have their name aligned a space after the '(' of e.g. the `ACPI_EXTERNAL_RETURN_STATUS(acpi_status` line above rather then being directly aligned after the '('. This breaks applying patches generated from the ACPICA upstream git, remove the extra space before the function-names and all the arguments to fix this. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 128 +++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 9e49b37fc869..d1329d6d526d 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -589,82 +589,82 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_install_initialization_handler (acpi_init_handler handler, u32 function)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_install_sci_handler(acpi_sci_handler - address, - void *context)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_remove_sci_handler(acpi_sci_handler - address)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_install_global_event_handler - (acpi_gbl_event_handler handler, - void *context)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_install_fixed_event_handler(u32 - acpi_event, - acpi_event_handler - handler, - void - *context)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_remove_fixed_event_handler(u32 acpi_event, - acpi_event_handler - handler)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_install_gpe_handler(acpi_handle - gpe_device, - u32 gpe_number, - u32 type, - acpi_gpe_handler - address, - void *context)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_install_gpe_raw_handler(acpi_handle - gpe_device, - u32 gpe_number, - u32 type, - acpi_gpe_handler - address, - void *context)) -ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_remove_gpe_handler(acpi_handle gpe_device, - u32 gpe_number, - acpi_gpe_handler - address)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_install_notify_handler(acpi_handle device, - u32 handler_type, - acpi_notify_handler - handler, + acpi_install_sci_handler(acpi_sci_handler + address, void *context)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_remove_sci_handler(acpi_sci_handler + address)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_install_global_event_handler + (acpi_gbl_event_handler handler, + void *context)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_install_fixed_event_handler(u32 + acpi_event, + acpi_event_handler + handler, + void + *context)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_remove_fixed_event_handler(u32 acpi_event, + acpi_event_handler + handler)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_install_gpe_handler(acpi_handle + gpe_device, + u32 gpe_number, + u32 type, + acpi_gpe_handler + address, + void *context)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_install_gpe_raw_handler(acpi_handle + gpe_device, + u32 gpe_number, + u32 type, + acpi_gpe_handler + address, + void *context)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_remove_gpe_handler(acpi_handle gpe_device, + u32 gpe_number, + acpi_gpe_handler + address)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_remove_notify_handler(acpi_handle device, + acpi_install_notify_handler(acpi_handle device, u32 handler_type, acpi_notify_handler - handler)) + handler, + void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_install_address_space_handler(acpi_handle - device, - acpi_adr_space_type - space_id, - acpi_adr_space_handler - handler, - acpi_adr_space_setup - setup, - void *context)) + acpi_remove_notify_handler(acpi_handle device, + u32 handler_type, + acpi_notify_handler + handler)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_remove_address_space_handler(acpi_handle + acpi_install_address_space_handler(acpi_handle device, acpi_adr_space_type space_id, acpi_adr_space_handler - handler)) + handler, + acpi_adr_space_setup + setup, + void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_install_exception_handler - (acpi_exception_handler handler)) + acpi_remove_address_space_handler(acpi_handle + device, + acpi_adr_space_type + space_id, + acpi_adr_space_handler + handler)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_install_interface_handler - (acpi_interface_handler handler)) + acpi_install_exception_handler + (acpi_exception_handler handler)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_install_interface_handler + (acpi_interface_handler handler)) /* * Global Lock interfaces From 54c516aeb8b39eeae6450b7d8076d381568dca46 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 8 Dec 2022 15:23:33 +0100 Subject: [PATCH 3532/4122] ACPICA: Allow address_space_handler Install and _REG execution as 2 separate steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI-2.0 says that the EC op_region handler must be available immediately (like the standard default op_region handlers): Quoting from the ACPI spec version 6.3: "6.5.4 _REG (Region) ... 2. OSPM must make Embedded Controller operation regions, accessed via the Embedded Controllers described in ECDT, available before executing any control method. These operation regions may become inaccessible after OSPM runs _REG(EmbeddedControl, 0)." So the OS must probe the ECDT described EC and install the OpRegion handler before calling acpi_enable_subsystem() and acpi_initialize_objects(). This is a problem because calling acpi_install_address_space_handler() does not just install the op_region handler, it also runs the EC's _REG method. This _REG method may rely on initialization done by the _INI methods of one of the PCI / _SB root devices. For the other early/default op_region handlers the op_region handler install and the _REG execution is split into 2 separate steps: 1. acpi_ev_install_region_handlers(), called early from acpi_load_tables() 2. acpi_ev_initialize_op_regions(), called from acpi_initialize_objects() To fix the EC op_region issue, add 2 bew functions: 1. acpi_install_address_space_handler_no_reg() 2. acpi_execute_reg_methods() to allow doing things in 2 steps for other op_region handlers, like the EC handler, too. Note that the comment describing acpi_ev_install_region_handlers() even has an alinea describing this problem. Using the new methods allows users to avoid this problem. Link: https://github.com/acpica/acpica/pull/786 Link: https://bugzilla.kernel.org/show_bug.cgi?id=214899 Reported-and-tested-by: Johannes Penßel Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/evxfregn.c | 92 +++++++++++++++++++++++++++++++--- include/acpi/acpixf.h | 10 ++++ 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 0a8372bf6a77..a5c19f46ec17 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -20,13 +20,14 @@ ACPI_MODULE_NAME("evxfregn") /******************************************************************************* * - * FUNCTION: acpi_install_address_space_handler + * FUNCTION: acpi_install_address_space_handler_internal * * PARAMETERS: device - Handle for the device * space_id - The address space ID * handler - Address of the handler * setup - Address of the setup function * context - Value passed to the handler on each access + * Run_reg - Run _REG methods for this address space? * * RETURN: Status * @@ -37,13 +38,16 @@ ACPI_MODULE_NAME("evxfregn") * are executed here, and these methods can only be safely executed after * the default handlers have been installed and the hardware has been * initialized (via acpi_enable_subsystem.) + * To avoid this problem pass FALSE for Run_Reg and later on call + * acpi_execute_reg_methods() to execute _REG. * ******************************************************************************/ -acpi_status -acpi_install_address_space_handler(acpi_handle device, - acpi_adr_space_type space_id, - acpi_adr_space_handler handler, - acpi_adr_space_setup setup, void *context) +static acpi_status +acpi_install_address_space_handler_internal(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, + void *context, u8 run_reg) { struct acpi_namespace_node *node; acpi_status status; @@ -80,14 +84,40 @@ acpi_install_address_space_handler(acpi_handle device, /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + if (run_reg) { + acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + } unlock_and_exit: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); } +acpi_status +acpi_install_address_space_handler(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, void *context) +{ + return acpi_install_address_space_handler_internal(device, space_id, + handler, setup, + context, TRUE); +} + ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler) +acpi_status +acpi_install_address_space_handler_no_reg(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, + void *context) +{ + return acpi_install_address_space_handler_internal(device, space_id, + handler, setup, + context, FALSE); +} + +ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler_no_reg) /******************************************************************************* * @@ -228,3 +258,51 @@ unlock_and_exit: } ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) +/******************************************************************************* + * + * FUNCTION: acpi_execute_reg_methods + * + * PARAMETERS: device - Handle for the device + * space_id - The address space ID + * + * RETURN: Status + * + * DESCRIPTION: Execute _REG for all op_regions of a given space_id. + * + ******************************************************************************/ +acpi_status +acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +{ + struct acpi_namespace_node *node; + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_execute_reg_methods); + + /* Parameter validation */ + + if (!device) { + return_ACPI_STATUS(AE_BAD_PARAMETER); + } + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Convert and validate the device handle */ + + node = acpi_ns_validate_handle(device); + if (node) { + + /* Run all _REG methods for this address space */ + + acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + } else { + status = AE_BAD_PARAMETER; + } + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + return_ACPI_STATUS(status); +} + +ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d1329d6d526d..9778408f8db4 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -652,6 +652,16 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_adr_space_setup setup, void *context)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_install_address_space_handler_no_reg + (acpi_handle device, acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, + void *context)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_execute_reg_methods(acpi_handle device, + acpi_adr_space_type + space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, From a5072078dbfaa9d70130805766dfa34bbb7bf2a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 8 Dec 2022 15:23:34 +0100 Subject: [PATCH 3533/4122] ACPI: EC: Fix EC address space handler unregistration When an ECDT table is present the EC address space handler gets registered on the root node. So to unregister it properly the unregister call also must be done on the root node. Store the ACPI handle used for the acpi_install_address_space_handler() call and use te same handle for the acpi_remove_address_space_handler() call. Reported-by: Rafael J. Wysocki Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 4 +++- drivers/acpi/internal.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 9751b84c1b22..5a21e4d58322 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1475,6 +1475,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) return -ENODEV; } set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags); + ec->address_space_handler_holder = ec->handle; } if (!device) @@ -1526,7 +1527,8 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) static void ec_remove_handlers(struct acpi_ec *ec) { if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { - if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle, + if (ACPI_FAILURE(acpi_remove_address_space_handler( + ec->address_space_handler_holder, ACPI_ADR_SPACE_EC, &acpi_ec_space_handler))) pr_err("failed to remove space handler\n"); clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219c02df9a08..ec584442fb29 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -173,6 +173,7 @@ enum acpi_ec_event_state { struct acpi_ec { acpi_handle handle; + acpi_handle address_space_handler_holder; int gpe; int irq; unsigned long command_addr; From ab4620f58d38206687b9f99d9d2cc1d5a2640985 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 8 Dec 2022 15:23:35 +0100 Subject: [PATCH 3534/4122] ACPI: EC: Fix ECDT probe ordering issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI-2.0 says that the EC OpRegion handler must be available immediately (like the standard default OpRegion handlers): Quoting from the ACPI spec version 6.3: "6.5.4 _REG (Region) ... 2. OSPM must make Embedded Controller operation regions, accessed via the Embedded Controllers described in ECDT, available before executing any control method. These operation regions may become inaccessible after OSPM runs _REG(EmbeddedControl, 0)." So acpi_bus_init() calls acpi_ec_ecdt_probe(), which calls acpi_install_address_space_handler() to install the EC's OpRegion handler, early on. This not only installs the OpRegion handler, but also calls the EC's _REG method. The _REG method call is a problem because it may rely on initialization done by the _INI methods of one of the PCI / _SB root devs, see for example: https://bugzilla.kernel.org/show_bug.cgi?id=214899 . Generally speaking _REG methods are executed when the ACPI-device they are part of has a driver bound to it. Where as _INI methods must be executed at table load time (according to the spec). The problem here is that the early acpi_install_address_space_handler() call causes the _REG handler to run too early. To allow fixing this the ACPICA code now allows to split the OpRegion handler installation and the executing of _REG into 2 separate steps. This commit uses this ACPICA functionality to fix the EC probe ordering by delaying the executing of _REG for ECDT described ECs till the matching EC device in the DSDT gets parsed and acpi_ec_add() for it gets called. This moves the calling of _REG for the EC on devices with an ECDT to the same point in time where it is called on devices without an ECDT table. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214899 Reported-and-tested-by: Johannes Penßel Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5a21e4d58322..73ac2f222897 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -94,6 +94,7 @@ enum { EC_FLAGS_QUERY_ENABLED, /* Query is enabled */ EC_FLAGS_EVENT_HANDLER_INSTALLED, /* Event handler installed */ EC_FLAGS_EC_HANDLER_INSTALLED, /* OpReg handler installed */ + EC_FLAGS_EC_REG_CALLED, /* OpReg ACPI _REG method called */ EC_FLAGS_QUERY_METHODS_INSTALLED, /* _Qxx handlers installed */ EC_FLAGS_STARTED, /* Driver is started */ EC_FLAGS_STOPPED, /* Driver is stopped */ @@ -1446,6 +1447,7 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) * ec_install_handlers - Install service callbacks and register query methods. * @ec: Target EC. * @device: ACPI device object corresponding to @ec. + * @call_reg: If _REG should be called to notify OpRegion availability * * Install a handler for the EC address space type unless it has been installed * already. If @device is not NULL, also look for EC query methods in the @@ -1458,7 +1460,8 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) * -EPROBE_DEFER if GPIO IRQ acquisition needs to be deferred, * or 0 (success) otherwise. */ -static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) +static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, + bool call_reg) { acpi_status status; @@ -1466,10 +1469,10 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { acpi_ec_enter_noirq(ec); - status = acpi_install_address_space_handler(ec->handle, - ACPI_ADR_SPACE_EC, - &acpi_ec_space_handler, - NULL, ec); + status = acpi_install_address_space_handler_no_reg(ec->handle, + ACPI_ADR_SPACE_EC, + &acpi_ec_space_handler, + NULL, ec); if (ACPI_FAILURE(status)) { acpi_ec_stop(ec, false); return -ENODEV; @@ -1478,6 +1481,11 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) ec->address_space_handler_holder = ec->handle; } + if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { + acpi_execute_reg_methods(ec->handle, ACPI_ADR_SPACE_EC); + set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); + } + if (!device) return 0; @@ -1564,11 +1572,11 @@ static void ec_remove_handlers(struct acpi_ec *ec) } } -static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device) +static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { int ret; - ret = ec_install_handlers(ec, device); + ret = ec_install_handlers(ec, device, call_reg); if (ret) return ret; @@ -1633,7 +1641,7 @@ static int acpi_ec_add(struct acpi_device *device) } } - ret = acpi_ec_setup(ec, device); + ret = acpi_ec_setup(ec, device, true); if (ret) goto err; @@ -1753,7 +1761,7 @@ void __init acpi_ec_dsdt_probe(void) * At this point, the GPE is not fully initialized, so do not to * handle the events. */ - ret = acpi_ec_setup(ec, NULL); + ret = acpi_ec_setup(ec, NULL, true); if (ret) { acpi_ec_free(ec); return; @@ -1947,7 +1955,7 @@ void __init acpi_ec_ecdt_probe(void) * At this point, the namespace is not initialized, so do not find * the namespace objects, or handle the events. */ - ret = acpi_ec_setup(ec, NULL); + ret = acpi_ec_setup(ec, NULL, false); if (ret) { acpi_ec_free(ec); goto out; From c1ddc3dad85dda4421e852c72f7596cdb10e9fc6 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 8 Dec 2022 13:38:50 +0100 Subject: [PATCH 3535/4122] PCI: xilinx-nwl: Fix coding style violations Fix code alignments and remove additional newline. Link: https://lore.kernel.org/r/17c75e7003bb8c43a0f45ae3d7c45cac230ef852.1670503129.git.michal.simek@amd.com Signed-off-by: Michal Simek Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-xilinx-nwl.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 40d070e54ad2..e10a58649bf5 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -474,15 +474,15 @@ static int nwl_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip, - domain->host_data, handle_simple_irq, - NULL, NULL); + domain->host_data, handle_simple_irq, + NULL, NULL); } mutex_unlock(&msi->lock); return 0; } static void nwl_irq_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs) + unsigned int nr_irqs) { struct irq_data *data = irq_domain_get_irq_data(domain, virq); struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); @@ -722,7 +722,6 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) /* Enable all misc interrupts */ nwl_bridge_writel(pcie, MSGF_MISC_SR_MASKALL, MSGF_MISC_MASK); - /* Disable all legacy interrupts */ nwl_bridge_writel(pcie, (u32)~MSGF_LEG_SR_MASKALL, MSGF_LEG_MASK); From 1c8a8ec0a0e9a1176022a35c4daf04fe1594d270 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2022 10:43:44 +0100 Subject: [PATCH 3536/4122] f2fs: remove struct segment_allocation default_salloc_ops There is only single instance of these ops, so remove the indirection and call allocate_segment_by_default directly. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++--------- fs/f2fs/segment.h | 6 ------ 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0ff451ea18f6..bbe6556799ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2926,7 +2926,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + allocate_segment_by_default(sbi, type, true); locate_dirty_segment(sbi, old_segno); } @@ -2957,10 +2957,6 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) f2fs_up_read(&SM_I(sbi)->curseg_lock); } -static const struct segment_allocation default_salloc_ops = { - .allocate_segment = allocate_segment_by_default, -}; - bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) { @@ -3284,7 +3280,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); else - sit_i->s_ops->allocate_segment(sbi, type, false); + allocate_segment_by_default(sbi, type, false); } /* * segment dirty status should be updated after segment allocation, @@ -4270,9 +4266,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - /* init SIT information */ - sit_i->s_ops = &default_salloc_ops; - sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; sit_i->written_valid_blocks = 0; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index be8f2d7d007b..3ad1b7b6fa94 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -222,10 +222,6 @@ struct sec_entry { unsigned int valid_blocks; /* # of valid blocks in a section */ }; -struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); -}; - #define MAX_SKIP_GC_COUNT 16 struct revoke_entry { @@ -235,8 +231,6 @@ struct revoke_entry { }; struct sit_info { - const struct segment_allocation *s_ops; - block_t sit_base_addr; /* start block address of SIT area */ block_t sit_blocks; /* # of blocks used by SIT area */ block_t written_valid_blocks; /* # of valid blocks in main area */ From 8442d94b8ac8d5d8300725a9ffa9def526b71170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2022 10:43:45 +0100 Subject: [PATCH 3537/4122] f2fs: open code allocate_segment_by_default allocate_segment_by_default has just two callers, which use very different code pathes inside it based on the force paramter. Just open code the logic in the two callers using a new helper to decided if a new segment should be allocated. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 50 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bbe6556799ce..c4e118eb7d19 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2849,31 +2849,20 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, return 0; } -/* - * flush out current segment and replace it with new segment - * This function should be returned with success, otherwise BUG - */ -static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) +static bool need_new_seg(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) - new_curseg(sbi, type, true); - else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && - curseg->seg_type == CURSEG_WARM_NODE) - new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && - is_next_segment_free(sbi, curseg, type) && - likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - new_curseg(sbi, type, false); - else if (f2fs_need_SSR(sbi) && - get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type, true); - else - new_curseg(sbi, type, false); - - stat_inc_seg_type(sbi, curseg); + if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && + curseg->seg_type == CURSEG_WARM_NODE) + return true; + if (curseg->alloc_type == LFS && + is_next_segment_free(sbi, curseg, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return true; + if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) + return true; + return false; } void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, @@ -2926,7 +2915,8 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - allocate_segment_by_default(sbi, type, true); + new_curseg(sbi, type, true); + stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); } @@ -3276,11 +3266,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + /* + * Flush out current segment and replace it with new segment. + */ + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - allocate_segment_by_default(sbi, type, false); + } else { + if (need_new_seg(sbi, type)) + new_curseg(sbi, type, false); + else + change_curseg(sbi, type, true); + stat_inc_seg_type(sbi, curseg); + } } /* * segment dirty status should be updated after segment allocation, From 5bcd655fffaec24e849bda1207446f5cc821713e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2022 10:43:46 +0100 Subject: [PATCH 3538/4122] f2fs: remove the unused flush argument to change_curseg Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4e118eb7d19..9486ca49ecb1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2656,7 +2656,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2664,9 +2664,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) struct f2fs_summary_block *sum_node; struct page *sum_page; - if (flush) - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); __set_test_and_inuse(sbi, new_segno); @@ -2705,7 +2703,7 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; @@ -2880,7 +2878,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, true); @@ -3276,7 +3274,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (need_new_seg(sbi, type)) new_curseg(sbi, type, false); else - change_curseg(sbi, type, true); + change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } } @@ -3539,7 +3537,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -3567,7 +3565,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; From 398bb30d4f4e857ee1352130c6935f0fb16d7af2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 Nov 2022 10:38:07 +0800 Subject: [PATCH 3539/4122] MAINTAINERS: Add f2fs bug tracker link As f2fs component in bugzilla.kernel.org was created and used since 2018-7. Signed-off-by: Chao Yu Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 6 +++++- MAINTAINERS | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 6e67c5e6c7c3..67e1f3e86f32 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). - git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git -For reporting bugs and sending patches, please use the following mailing list: +For sending patches, please use the following mailing list: - linux-f2fs-devel@lists.sourceforge.net +For reporting bugs, please use the following f2fs bug tracker link: + +- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs + Background and Design issues ============================ diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..01fdbb592ea7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7828,6 +7828,7 @@ M: Chao Yu L: linux-f2fs-devel@lists.sourceforge.net S: Maintained W: https://f2fs.wiki.kernel.org/ +B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git F: Documentation/ABI/testing/sysfs-fs-f2fs F: Documentation/filesystems/f2fs.rst From 870af777da22505851174a34c0228042d7ed5f5f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 25 Nov 2022 19:47:36 +0800 Subject: [PATCH 3540/4122] f2fs: do some cleanup for f2fs module init Just for cleanup, no functional changes. Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 46 ++++++---------------------------------------- fs/f2fs/data.c | 14 ++++---------- fs/f2fs/gc.c | 4 +--- fs/f2fs/recovery.c | 4 +--- fs/f2fs/super.c | 8 ++------ 5 files changed, 14 insertions(+), 62 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 74d3f2d2271f..9723f0bed923 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -567,10 +567,7 @@ MODULE_PARM_DESC(num_compress_pages, int f2fs_init_compress_mempool(void) { compress_page_pool = mempool_create_page_pool(num_compress_pages, 0); - if (!compress_page_pool) - return -ENOMEM; - - return 0; + return compress_page_pool ? 0 : -ENOMEM; } void f2fs_destroy_compress_mempool(void) @@ -1983,9 +1980,7 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) sbi->page_array_slab = f2fs_kmem_cache_create(slab_name, sbi->page_array_slab_size); - if (!sbi->page_array_slab) - return -ENOMEM; - return 0; + return sbi->page_array_slab ? 0 : -ENOMEM; } void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) @@ -1993,53 +1988,24 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) kmem_cache_destroy(sbi->page_array_slab); } -static int __init f2fs_init_cic_cache(void) +int __init f2fs_init_compress_cache(void) { cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry", sizeof(struct compress_io_ctx)); if (!cic_entry_slab) return -ENOMEM; - return 0; -} - -static void f2fs_destroy_cic_cache(void) -{ - kmem_cache_destroy(cic_entry_slab); -} - -static int __init f2fs_init_dic_cache(void) -{ dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry", sizeof(struct decompress_io_ctx)); if (!dic_entry_slab) - return -ENOMEM; - return 0; -} - -static void f2fs_destroy_dic_cache(void) -{ - kmem_cache_destroy(dic_entry_slab); -} - -int __init f2fs_init_compress_cache(void) -{ - int err; - - err = f2fs_init_cic_cache(); - if (err) - goto out; - err = f2fs_init_dic_cache(); - if (err) goto free_cic; return 0; free_cic: - f2fs_destroy_cic_cache(); -out: + kmem_cache_destroy(cic_entry_slab); return -ENOMEM; } void f2fs_destroy_compress_cache(void) { - f2fs_destroy_dic_cache(); - f2fs_destroy_cic_cache(); + kmem_cache_destroy(dic_entry_slab); + kmem_cache_destroy(cic_entry_slab); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 560fa80590e9..35c19248b1e2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -39,10 +39,8 @@ static struct bio_set f2fs_bioset; int __init f2fs_init_bioset(void) { - if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, - 0, BIOSET_NEED_BVECS)) - return -ENOMEM; - return 0; + return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, + 0, BIOSET_NEED_BVECS); } void f2fs_destroy_bioset(void) @@ -4090,9 +4088,7 @@ int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", WQ_UNBOUND | WQ_HIGHPRI, num_online_cpus()); - if (!sbi->post_read_wq) - return -ENOMEM; - return 0; + return sbi->post_read_wq ? 0 : -ENOMEM; } void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) @@ -4105,9 +4101,7 @@ int __init f2fs_init_bio_entry_cache(void) { bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); - if (!bio_entry_slab) - return -ENOMEM; - return 0; + return bio_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_bio_entry_cache(void) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f1b68eda2235..d19e26b2e875 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1904,9 +1904,7 @@ int __init f2fs_create_garbage_collection_cache(void) { victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry", sizeof(struct victim_entry)); - if (!victim_entry_slab) - return -ENOMEM; - return 0; + return victim_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_garbage_collection_cache(void) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dea95b48b647..77fd453949b1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -923,9 +923,7 @@ int __init f2fs_create_recovery_cache(void) { fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) - return -ENOMEM; - return 0; + return fsync_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_recovery_cache(void) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index daf14b55a972..a5f6f632cf7c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -288,9 +288,7 @@ static int __init f2fs_create_casefold_cache(void) { f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", F2FS_NAME_LEN); - if (!f2fs_cf_name_slab) - return -ENOMEM; - return 0; + return f2fs_cf_name_slab ? 0 : -ENOMEM; } static void f2fs_destroy_casefold_cache(void) @@ -4647,9 +4645,7 @@ static int __init init_inodecache(void) f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache", sizeof(struct f2fs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL); - if (!f2fs_inode_cachep) - return -ENOMEM; - return 0; + return f2fs_inode_cachep ? 0 : -ENOMEM; } static void destroy_inodecache(void) From 5fb45f95eec682621748b7cb012c6a8f0f981e6a Mon Sep 17 00:00:00 2001 From: Qingfang DENG Date: Thu, 8 Dec 2022 20:35:29 +0800 Subject: [PATCH 3541/4122] netfilter: flowtable: really fix NAT IPv6 offload The for-loop was broken from the start. It translates to: for (i = 0; i < 4; i += 4) which means the loop statement is run only once, so only the highest 32-bit of the IPv6 address gets mangled. Fix the loop increment. Fixes: 0e07e25b481a ("netfilter: flowtable: fix NAT IPv6 offload mangling") Fixes: 5c27d8d76ce8 ("netfilter: nf_flow_table_offload: add IPv6 support") Signed-off-by: Qingfang DENG Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 0fdcdb2c9ae4..4d9b99abe37d 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -383,12 +383,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; - int i, j; + int i; - for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { + for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, &addr[j], mask); + offset + i * sizeof(u32), &addr[i], mask); } } From 4bd1d80efb5af640f99157f39b50fb11326ce641 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 29 Aug 2022 23:52:19 +0300 Subject: [PATCH 3542/4122] riscv: mm: notify remote harts about mmu cache updates Current implementation of update_mmu_cache function performs local TLB flush. It does not take into account ASID information. Besides, it does not take into account other harts currently running the same mm context or possible migration of the running context to other harts. Meanwhile TLB flush is not performed for every context switch if ASID support is enabled. Patch [1] proposed to add ASID support to update_mmu_cache to avoid flushing local TLB entirely. This patch takes into account other harts currently running the same mm context as well as possible migration of this context to other harts. For this purpose the approach from flush_icache_mm is reused. Remote harts currently running the same mm context are informed via SBI calls that they need to flush their local TLBs. All the other harts are marked as needing a deferred TLB flush when this mm context runs on them. [1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/ Signed-off-by: Sergey Matyukevich Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#t Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 2 ++ arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/include/asm/tlbflush.h | 18 ++++++++++++++++++ arch/riscv/mm/context.c | 10 ++++++++++ arch/riscv/mm/tlbflush.c | 28 +++++++++++----------------- 5 files changed, 42 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 0099dc116168..5ff1f19fd45c 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* A local tlb flush is needed before user execution can resume. */ + cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index c61ae83aadee..2359f1f9bda9 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + flush_tlb_page(vma, address); } #define __HAVE_ARCH_UPDATE_MMU_TLB diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea..907b9efd39a8 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 7acbfbd14557..80ce9caba8d2 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,6 +196,16 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); +#ifdef CONFIG_SMP + else { + cpumask_t *mask = &mm->context.tlb_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + local_flush_tlb_all_asid(cntx & asid_mask); + } + } +#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c..ce7dfc81bb3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,23 +5,7 @@ #include #include #include - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} +#include void flush_tlb_all(void) { @@ -31,6 +15,7 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { + struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); + /* + * TLB will be immediately flushed on harts concurrently + * executing this MM context. TLB flush on other harts + * is deferred until this MM context migrates there. + */ + cpumask_setall(pmask); + cpumask_clear_cpu(cpuid, pmask); + cpumask_andnot(pmask, pmask, cmask); + if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { From b0f4c74eadbf69a3298f38566bfaa2e202541f2f Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 11 Nov 2022 17:31:08 -0500 Subject: [PATCH 3543/4122] RISC-V: Fix unannoted hardirqs-on in return to userspace slow-path The return to userspace path in entry.S may enable interrupts without the corresponding lockdep annotation, producing a splat[0] when DEBUG_LOCKDEP is enabled. Simply calling __trace_hardirqs_on() here gets a bit messy due to the use of RA to point back to ret_from_exception, so just move the whole slow-path loop into C. It's more readable and it lets us use local_irq_{enable,disable}(), avoiding the need for manual annotations altogether. [0]: ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled()) WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5512 check_flags+0x10a/0x1e0 Modules linked in: CPU: 2 PID: 1 Comm: init Not tainted 6.1.0-rc4-00160-gb56b6e2b4f31 #53 Hardware name: riscv-virtio,qemu (DT) epc : check_flags+0x10a/0x1e0 ra : check_flags+0x10a/0x1e0 status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [] lock_is_held_type+0x78/0x14e [] __might_resched+0x26/0x22c [] __might_sleep+0x3c/0x66 [] get_signal+0x9e/0xa70 [] do_notify_resume+0x6e/0x422 [] ret_from_exception+0x0/0x10 irq event stamp: 44512 hardirqs last enabled at (44511): [] _raw_spin_unlock_irqrestore+0x54/0x62 hardirqs last disabled at (44512): [] __trace_hardirqs_off+0xc/0x14 softirqs last enabled at (44472): [] __do_softirq+0x3de/0x51e softirqs last disabled at (44467): [] irq_exit+0xd6/0x104 ---[ end trace 0000000000000000 ]--- possible reason: unannotated irqs-on. Signed-off-by: Andrew Bresticker Fixes: 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") Link: https://lore.kernel.org/r/20221111223108.1976562-1-abrestic@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 18 +++++------------- arch/riscv/kernel/signal.c | 34 +++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..58dfa8595e19 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -263,12 +263,11 @@ ret_from_exception: #endif bnez s0, resume_kernel -resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ andi s1, s0, _TIF_WORK_MASK - bnez s1, work_pending - + bnez s1, resume_userspace_slow +resume_userspace: #ifdef CONFIG_CONTEXT_TRACKING_USER call user_enter_callable #endif @@ -368,19 +367,12 @@ resume_kernel: j restore_all #endif -work_pending: +resume_userspace_slow: /* Enter slow path for supplementary processing */ - la ra, ret_from_exception - andi s1, s0, _TIF_NEED_RESCHED - bnez s1, work_resched -work_notifysig: - /* Handle pending signals and notify-resume requests */ - csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */ move a0, sp /* pt_regs */ move a1, s0 /* current_thread_info->flags */ - tail do_notify_resume -work_resched: - tail schedule + call do_work_pending + j resume_userspace /* Slow paths for ptrace. */ handle_syscall_trace_enter: diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5c591123c440..bfb2afa4135f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -313,19 +313,27 @@ static void do_signal(struct pt_regs *regs) } /* - * notification of userspace execution resumption - * - triggered by the _TIF_WORK_MASK flags + * Handle any pending work on the resume-to-userspace path, as indicated by + * _TIF_WORK_MASK. Entered from assembly with IRQs off. */ -asmlinkage __visible void do_notify_resume(struct pt_regs *regs, - unsigned long thread_info_flags) +asmlinkage __visible void do_work_pending(struct pt_regs *regs, + unsigned long thread_info_flags) { - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* Handle pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); + do { + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + /* Handle pending signal delivery */ + if (thread_info_flags & (_TIF_SIGPENDING | + _TIF_NOTIFY_SIGNAL)) + do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) + resume_user_mode_work(regs); + } + local_irq_disable(); + thread_info_flags = read_thread_flags(); + } while (thread_info_flags & _TIF_WORK_MASK); } From b91676fc16cd384a81e3af52c641aa61985cc231 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:34 +0530 Subject: [PATCH 3544/4122] RISC-V: Fix MEMREMAP_WB for systems with Svpbmt Currently, the memremap() called with MEMREMAP_WB maps memory using the generic ioremap() function which breaks on system with Svpbmt because memory mapped using _PAGE_IOREMAP page attributes is treated as strongly-ordered non-cacheable IO memory. To address this, we implement RISC-V specific arch_memremap_wb() which maps memory using _PAGE_KERNEL page attributes resulting in write-back cacheable mapping on systems with Svpbmt. Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support") Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-2-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 92080a227937..42497d487a17 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -135,4 +135,9 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include +#ifdef CONFIG_MMU +#define arch_memremap_wb(addr, size) \ + ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) +#endif + #endif /* _ASM_RISCV_IO_H */ From a49ab905a1fc8630a94221f9a06ce0dafb266576 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:35 +0530 Subject: [PATCH 3545/4122] RISC-V: Implement arch specific PMEM APIs The NVDIMM PMEM driver expects arch specific APIs for cache maintenance and if arch does not provide these APIs then NVDIMM PMEM driver will always use MEMREMAP_WT to map persistent memory which in-turn maps as UC memory type defined by the RISC-V Svpbmt specification. Now that the Svpbmt and Zicbom support is available in RISC-V kernel, we implement PMEM APIs using ALT_CMO_OP() macros so that the NVDIMM PMEM driver can use MEMREMAP_WB to map persistent memory. Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-3-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/mm/Makefile | 1 + arch/riscv/mm/pmem.c | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 arch/riscv/mm/pmem.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..025e2a1b1c60 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -25,6 +25,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB + select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d76aabf4b94d..b4f35da889bf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,6 +13,7 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o +obj-y += pmem.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c new file mode 100644 index 000000000000..089df92ae876 --- /dev/null +++ b/arch/riscv/mm/pmem.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Ventana Micro Systems Inc. + */ + +#include +#include + +#include + +void arch_wb_cache_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + +void arch_invalidate_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); From 497bcbe3ce0466123a834f2777a8a762bd5d7aae Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:36 +0530 Subject: [PATCH 3546/4122] RISC-V: Enable PMEM drivers We now have PMEM arch support available in RISC-V kernel so let us enable relevant drivers in defconfig. Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-4-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 05fd5fcf24f9..462da9f7410d 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -159,6 +159,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y +CONFIG_LIBNVDIMM=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y From fdb1742aff436399f5769a7559bbb71c7f37a85f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 18 Nov 2022 10:42:59 +0000 Subject: [PATCH 3547/4122] irqchip/sifive-plic: remove user selectability of SIFIVE_PLIC The SiFive PLIC driver is used by all current implementations, including those that do not have a SiFive PLIC. The current driver supports more than just SiFive PLICs at present and, where possible, future PLIC implementations will also use this driver. As every supported RISC-V SoC selects the driver directly in Kconfig.socs there's no point in exposing this kconfig option to users. The Kconfig help text, in its current form, is misleading. There's no point doing anything about that though, as it will no longer be user selectable. Remove it. Suggested-by: Marc Zyngier Signed-off-by: Conor Dooley Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118104300.85016-2-conor@kernel.org Signed-off-by: Palmer Dabbelt --- drivers/irqchip/Kconfig | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7ef9f5e696d3..ecb3e3119d2e 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -551,18 +551,10 @@ config RISCV_INTC If you don't know what to do here, say Y. config SIFIVE_PLIC - bool "SiFive Platform-Level Interrupt Controller" + bool depends on RISCV select IRQ_DOMAIN_HIERARCHY select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP - help - This enables support for the PLIC chip found in SiFive (and - potentially other) RISC-V systems. The PLIC controls devices - interrupts and connects them to each core's local interrupt - controller. Aside from timer and software interrupts, all other - interrupt sources are subordinate to the PLIC. - - If you don't know what to do here, say Y. config EXYNOS_IRQ_COMBINER bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST From d8fb13070c3c99b6a17b75fda28943f9261e23e7 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 18 Nov 2022 10:43:00 +0000 Subject: [PATCH 3548/4122] irqchip/riscv-intc: remove user selectability of RISCV_INTC Since commit e71ee06e3ca3 ("RISC-V: Force select RISCV_INTC for CONFIG_RISCV") the driver has been enabled at the arch level - and is mandatory anyway. There's no point exposing this as a choice to users, so stop bothering. Signed-off-by: Conor Dooley Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118104300.85016-3-conor@kernel.org Signed-off-by: Palmer Dabbelt --- drivers/irqchip/Kconfig | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index ecb3e3119d2e..4633a549ebbf 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -538,17 +538,8 @@ config TI_PRUSS_INTC different processors within the SoC. config RISCV_INTC - bool "RISC-V Local Interrupt Controller" + bool depends on RISCV - default y - help - This enables support for the per-HART local interrupt controller - found in standard RISC-V systems. The per-HART local interrupt - controller handles timer interrupts, software interrupts, and - hardware interrupts. Without a per-HART local interrupt controller, - a RISC-V system will be unable to handle any interrupts. - - If you don't know what to do here, say Y. config SIFIVE_PLIC bool From bf3d7b1d8499ca46874c7373d2043ecbe252cccc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 18 Nov 2022 10:43:01 +0000 Subject: [PATCH 3549/4122] RISC-V: stop selecting SIFIVE_PLIC at the SoC level The SIFIVE_PLIC driver is used by all current RISC-V SoCs & will be, where possible, used for future implementations. Rather than having each driver select the option on a case-by-case basis, do so at the arch level. Signed-off-by: Conor Dooley Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118104300.85016-4-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/Kconfig.socs | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..3ee67dc4e98b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -123,6 +123,7 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI + select SIFIVE_PLIC select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 69774bb362d6..15e391f38f75 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -3,7 +3,6 @@ menu "SoC selection" config SOC_MICROCHIP_POLARFIRE bool "Microchip PolarFire SoCs" select MCHP_CLK_MPFS - select SIFIVE_PLIC help This enables support for Microchip PolarFire SoC platforms. @@ -13,7 +12,6 @@ config SOC_SIFIVE select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE select CLK_SIFIVE_PRCI - select SIFIVE_PLIC select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. @@ -22,7 +20,6 @@ config SOC_STARFIVE bool "StarFive SoCs" select PINCTRL select RESET_CONTROLLER - select SIFIVE_PLIC help This enables support for StarFive SoC platform hardware. @@ -34,7 +31,6 @@ config SOC_VIRT select POWER_RESET_SYSCON_POWEROFF select GOLDFISH select RTC_DRV_GOLDFISH if RTC_CLASS - select SIFIVE_PLIC select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI @@ -47,7 +43,6 @@ config SOC_CANAAN select CLINT_TIMER if RISCV_M_MODE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY - select SIFIVE_PLIC select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK From de59b6ed0618b909be78f6bc60874a57dd016063 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Nov 2022 23:02:57 +0800 Subject: [PATCH 3550/4122] riscv: boot: add zstd support Support build the zstd compressed Image.zst. Similar as other compressed formats, the Image.zst is not self-decompressing and the bootloader still needs to handle decompression before launching the kernel image. Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221123150257.3108-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index d1a49adcb1d7..c72de7232abb 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -56,6 +56,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) + $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) From 0c49688174f5347c3f8012e84c0ffa0d2b2890c8 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 00:09:19 -0600 Subject: [PATCH 3551/4122] riscv: Fix crash during early errata patching The patch function for the T-Head PBMT errata calls __pa_symbol() before relocation. This crashes when CONFIG_DEBUG_VIRTUAL is enabled, because __pa_symbol() forwards to __phys_addr_symbol(), and __phys_addr_symbol() checks against the absolute kernel start/end address. Fix this by checking against the kernel map instead of a symbol address. Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head") Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221126060920.65009-1-samuel@sholland.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/physaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 19cf25a74ee2..9b18bda74154 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -22,7 +22,7 @@ EXPORT_SYMBOL(__virt_to_phys); phys_addr_t __phys_addr_symbol(unsigned long x) { unsigned long kernel_start = kernel_map.virt_addr; - unsigned long kernel_end = (unsigned long)_end; + unsigned long kernel_end = kernel_start + kernel_map.size; /* * Boundary checking aginst the kernel image mapping. From 583286e2072ed25c31b7db14d69fdf03f1fae7ba Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 00:09:20 -0600 Subject: [PATCH 3552/4122] riscv: Move cast inside kernel_mapping_[pv]a_to_[vp]a Before commit 44c922572952 ("RISC-V: enable XIP"), these macros cast their argument to unsigned long. That commit moved the cast after an assignment to an unsigned long variable, rendering it ineffectual. Move the cast back, so we can remove the cast at each call site. Reviewed-by: Alexandre Ghiti Reviewed-by: Heiko Stuebner Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221126060920.65009-2-samuel@sholland.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 18 +++++++++--------- arch/riscv/mm/init.c | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index ac70b0fd9a9a..9f432c1b5289 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -123,20 +123,20 @@ extern phys_addr_t phys_ram_base; ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) -#define kernel_mapping_pa_to_va(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ +#define kernel_mapping_pa_to_va(y) ({ \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)(_y + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset) #define kernel_mapping_va_to_pa(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ - ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \ - ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ + (_y - kernel_map.va_kernel_xip_pa_offset) : \ + (_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ }) #define __va_to_pa_nodebug(x) ({ \ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b56a0a75533f..7d59516ce6b3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -927,15 +927,15 @@ static void __init pt_ops_set_early(void) */ static void __init pt_ops_set_fixmap(void) { - pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); - pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); + pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap); #ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); - pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); - pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); - pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); - pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); - pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); + pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap); #endif } From e8b9a055fa0481679132781db574ecb771960f16 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 7 Dec 2022 21:48:07 +0000 Subject: [PATCH 3553/4122] KVM: arm64: selftests: Align VA space allocator with TTBR0 An interesting feature of the Arm architecture is that the stage-1 MMU supports two distinct VA regions, controlled by TTBR{0,1}_EL1. As KVM selftests on arm64 only uses TTBR0_EL1, the VA space is constrained to [0, 2^(va_bits-1)). This is different from other architectures that allow for addressing low and high regions of the VA space from a single page table. KVM selftests' VA space allocator presumes the valid address range is split between low and high memory based the MSB, which of course is a poor match for arm64's TTBR0 region. Allow architectures to override the default VA space layout. Make use of the override to align vpages_valid with the behavior of TTBR0 on arm64. Signed-off-by: Oliver Upton Message-Id: <20221207214809.489070-4-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util_base.h | 1 + .../testing/selftests/kvm/lib/aarch64/processor.c | 10 ++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 15 ++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 37500c92dd0a..2e267cd69288 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -408,6 +408,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 316de70db91d..5972a23b2765 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -541,3 +541,13 @@ void kvm_selftest_arch_init(void) */ guest_modes_append_default(); } + +void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +{ + /* + * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space + * is [0, 2^(64 - TCR_EL1.T0SZ)). + */ + sparsebit_set_num(vm->vpages_valid, 0, + (1ULL << vm->va_bits) >> vm->page_shift); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e9607eb089be..c88c3ace16d2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); +__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +{ + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, + (1ULL << (vm->va_bits - 1)) >> vm->page_shift); +} + struct kvm_vm *____vm_create(enum vm_guest_mode mode) { struct kvm_vm *vm; @@ -274,11 +283,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - sparsebit_set_num(vm->vpages_valid, - 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); - sparsebit_set_num(vm->vpages_valid, - (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, - (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + vm_vaddr_populate_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); From 2afc1fbbdab2aee831561f09f859989dcd5ed648 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 7 Dec 2022 21:48:08 +0000 Subject: [PATCH 3554/4122] KVM: selftests: Allocate ucall pool from MEM_REGION_DATA MEM_REGION_TEST_DATA is meant to hold data explicitly used by a selftest, not implicit allocations due to the selftests infrastructure. Allocate the ucall pool from MEM_REGION_DATA much like the rest of the selftests library allocations. Fixes: 426729b2cf2e ("KVM: selftests: Add ucall pool based implementation") Signed-off-by: Oliver Upton Message-Id: <20221207214809.489070-5-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/ucall_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index fcae96461e46..72420171c0d4 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -22,7 +22,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) vm_vaddr_t vaddr; int i; - vaddr = vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR); + vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA); hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); memset(hdr, 0, sizeof(*hdr)); From e9612987e437b7ada686f472c7596686fabecb2b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:12 +0000 Subject: [PATCH 3555/4122] crypto: qat - relocate bufferlist logic Move the logic that maps, unmaps and converts scatterlists into QAT bufferlists from qat_algs.c to a new module, qat_bl. This is to allow reuse of the logic by the data compression service. This commit does not implement any functional change. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/Makefile | 3 +- drivers/crypto/qat/qat_common/qat_algs.c | 184 +-------------------- drivers/crypto/qat/qat_common/qat_bl.c | 194 +++++++++++++++++++++++ drivers/crypto/qat/qat_common/qat_bl.h | 17 ++ 4 files changed, 214 insertions(+), 184 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/qat_bl.c create mode 100644 drivers/crypto/qat/qat_common/qat_bl.h diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 80919cfcc29d..b0587d03eac2 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -19,7 +19,8 @@ intel_qat-objs := adf_cfg.o \ qat_asym_algs.o \ qat_algs_send.o \ qat_uclo.o \ - qat_hal.o + qat_hal.o \ + qat_bl.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index cad9c58caab1..2ee4fa64032f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -23,6 +23,7 @@ #include "icp_qat_hw.h" #include "icp_qat_fw.h" #include "icp_qat_fw_la.h" +#include "qat_bl.h" #define QAT_AES_HW_CONFIG_ENC(alg, mode) \ ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \ @@ -663,189 +664,6 @@ static int qat_alg_aead_setkey(struct crypto_aead *tfm, const u8 *key, return qat_alg_aead_newkey(tfm, key, keylen); } -static void qat_alg_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req) -{ - struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_alg_buf_list *bl = qat_req->buf.bl; - struct qat_alg_buf_list *blout = qat_req->buf.blout; - dma_addr_t blp = qat_req->buf.blp; - dma_addr_t blpout = qat_req->buf.bloutp; - size_t sz = qat_req->buf.sz; - size_t sz_out = qat_req->buf.sz_out; - int bl_dma_dir; - int i; - - bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; - - for (i = 0; i < bl->num_bufs; i++) - dma_unmap_single(dev, bl->bufers[i].addr, - bl->bufers[i].len, bl_dma_dir); - - dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - - if (!qat_req->buf.sgl_src_valid) - kfree(bl); - - if (blp != blpout) { - /* If out of place operation dma unmap only data */ - int bufless = blout->num_bufs - blout->num_mapped_bufs; - - for (i = bufless; i < blout->num_bufs; i++) { - dma_unmap_single(dev, blout->bufers[i].addr, - blout->bufers[i].len, - DMA_FROM_DEVICE); - } - dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - - if (!qat_req->buf.sgl_dst_valid) - kfree(blout); - } -} - -static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, - struct scatterlist *sgl, - struct scatterlist *sglout, - struct qat_crypto_request *qat_req, - gfp_t flags) -{ - struct device *dev = &GET_DEV(inst->accel_dev); - int i, sg_nctr = 0; - int n = sg_nents(sgl); - struct qat_alg_buf_list *bufl; - struct qat_alg_buf_list *buflout = NULL; - dma_addr_t blp = DMA_MAPPING_ERROR; - dma_addr_t bloutp = DMA_MAPPING_ERROR; - struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n); - int node = dev_to_node(&GET_DEV(inst->accel_dev)); - int bufl_dma_dir; - - if (unlikely(!n)) - return -EINVAL; - - qat_req->buf.sgl_src_valid = false; - qat_req->buf.sgl_dst_valid = false; - - if (n > QAT_MAX_BUFF_DESC) { - bufl = kzalloc_node(sz, flags, node); - if (unlikely(!bufl)) - return -ENOMEM; - } else { - bufl = &qat_req->buf.sgl_src.sgl_hdr; - memset(bufl, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_src_valid = true; - } - - bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; - - for_each_sg(sgl, sg, n, i) - bufl->bufers[i].addr = DMA_MAPPING_ERROR; - - for_each_sg(sgl, sg, n, i) { - int y = sg_nctr; - - if (!sg->length) - continue; - - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - bufl_dma_dir); - bufl->bufers[y].len = sg->length; - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) - goto err_in; - sg_nctr++; - } - bufl->num_bufs = sg_nctr; - blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, blp))) - goto err_in; - qat_req->buf.bl = bufl; - qat_req->buf.blp = blp; - qat_req->buf.sz = sz; - /* Handle out of place operation */ - if (sgl != sglout) { - struct qat_alg_buf *bufers; - - n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n); - sg_nctr = 0; - - if (n > QAT_MAX_BUFF_DESC) { - buflout = kzalloc_node(sz_out, flags, node); - if (unlikely(!buflout)) - goto err_in; - } else { - buflout = &qat_req->buf.sgl_dst.sgl_hdr; - memset(buflout, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_dst_valid = true; - } - - bufers = buflout->bufers; - for_each_sg(sglout, sg, n, i) - bufers[i].addr = DMA_MAPPING_ERROR; - - for_each_sg(sglout, sg, n, i) { - int y = sg_nctr; - - if (!sg->length) - continue; - - bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, bufers[y].addr))) - goto err_out; - bufers[y].len = sg->length; - sg_nctr++; - } - buflout->num_bufs = sg_nctr; - buflout->num_mapped_bufs = sg_nctr; - bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, bloutp))) - goto err_out; - qat_req->buf.blout = buflout; - qat_req->buf.bloutp = bloutp; - qat_req->buf.sz_out = sz_out; - } else { - /* Otherwise set the src and dst to the same address */ - qat_req->buf.bloutp = qat_req->buf.blp; - qat_req->buf.sz_out = 0; - } - return 0; - -err_out: - if (!dma_mapping_error(dev, bloutp)) - dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); - - n = sg_nents(sglout); - for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, buflout->bufers[i].addr)) - dma_unmap_single(dev, buflout->bufers[i].addr, - buflout->bufers[i].len, - DMA_FROM_DEVICE); - - if (!qat_req->buf.sgl_dst_valid) - kfree(buflout); - -err_in: - if (!dma_mapping_error(dev, blp)) - dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - - n = sg_nents(sgl); - for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) - dma_unmap_single(dev, bufl->bufers[i].addr, - bufl->bufers[i].len, - bufl_dma_dir); - - if (!qat_req->buf.sgl_src_valid) - kfree(bufl); - - dev_err(dev, "Failed to map buf for dma\n"); - return -ENOMEM; -} - static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, struct qat_crypto_request *qat_req) { diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c new file mode 100644 index 000000000000..6d0a39f8ce10 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2014 - 2022 Intel Corporation */ +#include +#include +#include +#include +#include +#include +#include "adf_accel_devices.h" +#include "qat_bl.h" +#include "qat_crypto.h" + +void qat_alg_free_bufl(struct qat_crypto_instance *inst, + struct qat_crypto_request *qat_req) +{ + struct device *dev = &GET_DEV(inst->accel_dev); + struct qat_alg_buf_list *bl = qat_req->buf.bl; + struct qat_alg_buf_list *blout = qat_req->buf.blout; + dma_addr_t blp = qat_req->buf.blp; + dma_addr_t blpout = qat_req->buf.bloutp; + size_t sz = qat_req->buf.sz; + size_t sz_out = qat_req->buf.sz_out; + int bl_dma_dir; + int i; + + bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + + for (i = 0; i < bl->num_bufs; i++) + dma_unmap_single(dev, bl->bufers[i].addr, + bl->bufers[i].len, bl_dma_dir); + + dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); + + if (!qat_req->buf.sgl_src_valid) + kfree(bl); + + if (blp != blpout) { + /* If out of place operation dma unmap only data */ + int bufless = blout->num_bufs - blout->num_mapped_bufs; + + for (i = bufless; i < blout->num_bufs; i++) { + dma_unmap_single(dev, blout->bufers[i].addr, + blout->bufers[i].len, + DMA_FROM_DEVICE); + } + dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); + + if (!qat_req->buf.sgl_dst_valid) + kfree(blout); + } +} + +int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_crypto_request *qat_req, + gfp_t flags) +{ + struct device *dev = &GET_DEV(inst->accel_dev); + int i, sg_nctr = 0; + int n = sg_nents(sgl); + struct qat_alg_buf_list *bufl; + struct qat_alg_buf_list *buflout = NULL; + dma_addr_t blp = DMA_MAPPING_ERROR; + dma_addr_t bloutp = DMA_MAPPING_ERROR; + struct scatterlist *sg; + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(inst->accel_dev)); + int bufl_dma_dir; + + if (unlikely(!n)) + return -EINVAL; + + qat_req->buf.sgl_src_valid = false; + qat_req->buf.sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, flags, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &qat_req->buf.sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_src_valid = true; + } + + bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + + for_each_sg(sgl, sg, n, i) + bufl->bufers[i].addr = DMA_MAPPING_ERROR; + + for_each_sg(sgl, sg, n, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + bufl_dma_dir); + bufl->bufers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + goto err_in; + sg_nctr++; + } + bufl->num_bufs = sg_nctr; + blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, blp))) + goto err_in; + qat_req->buf.bl = bufl; + qat_req->buf.blp = blp; + qat_req->buf.sz = sz; + /* Handle out of place operation */ + if (sgl != sglout) { + struct qat_alg_buf *bufers; + + n = sg_nents(sglout); + sz_out = struct_size(buflout, bufers, n); + sg_nctr = 0; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, flags, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &qat_req->buf.sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_dst_valid = true; + } + + bufers = buflout->bufers; + for_each_sg(sglout, sg, n, i) + bufers[i].addr = DMA_MAPPING_ERROR; + + for_each_sg(sglout, sg, n, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, bufers[y].addr))) + goto err_out; + bufers[y].len = sg->length; + sg_nctr++; + } + buflout->num_bufs = sg_nctr; + buflout->num_mapped_bufs = sg_nctr; + bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, bloutp))) + goto err_out; + qat_req->buf.blout = buflout; + qat_req->buf.bloutp = bloutp; + qat_req->buf.sz_out = sz_out; + } else { + /* Otherwise set the src and dst to the same address */ + qat_req->buf.bloutp = qat_req->buf.blp; + qat_req->buf.sz_out = 0; + } + return 0; + +err_out: + if (!dma_mapping_error(dev, bloutp)) + dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); + + n = sg_nents(sglout); + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, buflout->bufers[i].addr)) + dma_unmap_single(dev, buflout->bufers[i].addr, + buflout->bufers[i].len, + DMA_FROM_DEVICE); + + if (!qat_req->buf.sgl_dst_valid) + kfree(buflout); + +err_in: + if (!dma_mapping_error(dev, blp)) + dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); + + n = sg_nents(sgl); + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bufl->bufers[i].addr)) + dma_unmap_single(dev, bufl->bufers[i].addr, + bufl->bufers[i].len, + bufl_dma_dir); + + if (!qat_req->buf.sgl_src_valid) + kfree(bufl); + + dev_err(dev, "Failed to map buf for dma\n"); + return -ENOMEM; +} diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h new file mode 100644 index 000000000000..7a916f1ec645 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2014 - 2022 Intel Corporation */ +#ifndef QAT_BL_H +#define QAT_BL_H +#include +#include +#include "qat_crypto.h" + +void qat_alg_free_bufl(struct qat_crypto_instance *inst, + struct qat_crypto_request *qat_req); +int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_crypto_request *qat_req, + gfp_t flags); + +#endif From b0cd997f35598c4fc01bf22061e1eb88fc10afad Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:13 +0000 Subject: [PATCH 3556/4122] crypto: qat - rename bufferlist functions Rename the functions qat_alg_sgl_to_bufl() and qat_alg_free_bufl() as qat_bl_sgl_to_bufl() and qat_bl_free_bufl() after their relocation into the qat_bl module. This commit does not implement any functional change. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 20 ++++++++++---------- drivers/crypto/qat/qat_common/qat_bl.c | 14 +++++++------- drivers/crypto/qat/qat_common/qat_bl.h | 14 +++++++------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 2ee4fa64032f..3e7e9fffe28b 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -673,7 +673,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_alg_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst, qat_req); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EBADMSG; areq->base.complete(&areq->base, res); @@ -743,7 +743,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_alg_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst, qat_req); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EINVAL; @@ -799,7 +799,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) if (cipher_len % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -821,7 +821,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst, qat_req); return ret; } @@ -842,7 +842,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -866,7 +866,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst, qat_req); return ret; } @@ -1027,7 +1027,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -1048,7 +1048,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst, qat_req); return ret; } @@ -1093,7 +1093,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -1115,7 +1115,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst, qat_req); return ret; } diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index 6d0a39f8ce10..8f7743f3c89b 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -10,8 +10,8 @@ #include "qat_bl.h" #include "qat_crypto.h" -void qat_alg_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req) +void qat_bl_free_bufl(struct qat_crypto_instance *inst, + struct qat_crypto_request *qat_req) { struct device *dev = &GET_DEV(inst->accel_dev); struct qat_alg_buf_list *bl = qat_req->buf.bl; @@ -50,11 +50,11 @@ void qat_alg_free_bufl(struct qat_crypto_instance *inst, } } -int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, - struct scatterlist *sgl, - struct scatterlist *sglout, - struct qat_crypto_request *qat_req, - gfp_t flags) +int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_crypto_request *qat_req, + gfp_t flags) { struct device *dev = &GET_DEV(inst->accel_dev); int i, sg_nctr = 0; diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 7a916f1ec645..ed4c200ac619 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -6,12 +6,12 @@ #include #include "qat_crypto.h" -void qat_alg_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req); -int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, - struct scatterlist *sgl, - struct scatterlist *sglout, - struct qat_crypto_request *qat_req, - gfp_t flags); +void qat_bl_free_bufl(struct qat_crypto_instance *inst, + struct qat_crypto_request *qat_req); +int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_crypto_request *qat_req, + gfp_t flags); #endif From 3ed330d0dba61d2e08a0eed7aa3d5def3f0c749b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:14 +0000 Subject: [PATCH 3557/4122] crypto: qat - change bufferlist logic interface The functions qat_alg_sgl_to_bufl() and qat_alg_free_bufl() take as argument a qat_crypto_instance and a qat_crypto_request structure. These two structures are used only to get a reference to the adf_accel_dev and qat_crypto_request_buffs. In order to reuse these functions for the compression service, change the signature so that they take adf_accel_dev and qat_crypto_request_buffs. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 24 +++++---- drivers/crypto/qat/qat_common/qat_bl.c | 62 ++++++++++++------------ drivers/crypto/qat/qat_common/qat_bl.h | 8 +-- 3 files changed, 49 insertions(+), 45 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 3e7e9fffe28b..dfa65e42db78 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -673,7 +673,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_bl_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EBADMSG; areq->base.complete(&areq->base, res); @@ -743,7 +743,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_bl_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EINVAL; @@ -799,7 +799,8 @@ static int qat_alg_aead_dec(struct aead_request *areq) if (cipher_len % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, f); if (unlikely(ret)) return ret; @@ -821,7 +822,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_bl_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -842,7 +843,8 @@ static int qat_alg_aead_enc(struct aead_request *areq) if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, f); if (unlikely(ret)) return ret; @@ -866,7 +868,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_bl_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -1027,7 +1029,8 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, + &qat_req->buf, f); if (unlikely(ret)) return ret; @@ -1048,7 +1051,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_bl_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -1093,7 +1096,8 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, + &qat_req->buf, f); if (unlikely(ret)) return ret; @@ -1115,7 +1119,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_bl_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index 8f7743f3c89b..5e319887f8d6 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -10,16 +10,16 @@ #include "qat_bl.h" #include "qat_crypto.h" -void qat_bl_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req) +void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, + struct qat_crypto_request_buffs *buf) { - struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_alg_buf_list *bl = qat_req->buf.bl; - struct qat_alg_buf_list *blout = qat_req->buf.blout; - dma_addr_t blp = qat_req->buf.blp; - dma_addr_t blpout = qat_req->buf.bloutp; - size_t sz = qat_req->buf.sz; - size_t sz_out = qat_req->buf.sz_out; + struct device *dev = &GET_DEV(accel_dev); + struct qat_alg_buf_list *bl = buf->bl; + struct qat_alg_buf_list *blout = buf->blout; + dma_addr_t blp = buf->blp; + dma_addr_t blpout = buf->bloutp; + size_t sz = buf->sz; + size_t sz_out = buf->sz_out; int bl_dma_dir; int i; @@ -31,7 +31,7 @@ void qat_bl_free_bufl(struct qat_crypto_instance *inst, dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - if (!qat_req->buf.sgl_src_valid) + if (!buf->sgl_src_valid) kfree(bl); if (blp != blpout) { @@ -45,18 +45,18 @@ void qat_bl_free_bufl(struct qat_crypto_instance *inst, } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - if (!qat_req->buf.sgl_dst_valid) + if (!buf->sgl_dst_valid) kfree(blout); } } -int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, +int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct scatterlist *sgl, struct scatterlist *sglout, - struct qat_crypto_request *qat_req, + struct qat_crypto_request_buffs *buf, gfp_t flags) { - struct device *dev = &GET_DEV(inst->accel_dev); + struct device *dev = &GET_DEV(accel_dev); int i, sg_nctr = 0; int n = sg_nents(sgl); struct qat_alg_buf_list *bufl; @@ -65,23 +65,23 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; size_t sz_out, sz = struct_size(bufl, bufers, n); - int node = dev_to_node(&GET_DEV(inst->accel_dev)); + int node = dev_to_node(&GET_DEV(accel_dev)); int bufl_dma_dir; if (unlikely(!n)) return -EINVAL; - qat_req->buf.sgl_src_valid = false; - qat_req->buf.sgl_dst_valid = false; + buf->sgl_src_valid = false; + buf->sgl_dst_valid = false; if (n > QAT_MAX_BUFF_DESC) { bufl = kzalloc_node(sz, flags, node); if (unlikely(!bufl)) return -ENOMEM; } else { - bufl = &qat_req->buf.sgl_src.sgl_hdr; + bufl = &buf->sgl_src.sgl_hdr; memset(bufl, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_src_valid = true; + buf->sgl_src_valid = true; } bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; @@ -107,9 +107,9 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, blp))) goto err_in; - qat_req->buf.bl = bufl; - qat_req->buf.blp = blp; - qat_req->buf.sz = sz; + buf->bl = bufl; + buf->blp = blp; + buf->sz = sz; /* Handle out of place operation */ if (sgl != sglout) { struct qat_alg_buf *bufers; @@ -123,9 +123,9 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, if (unlikely(!buflout)) goto err_in; } else { - buflout = &qat_req->buf.sgl_dst.sgl_hdr; + buflout = &buf->sgl_dst.sgl_hdr; memset(buflout, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_dst_valid = true; + buf->sgl_dst_valid = true; } bufers = buflout->bufers; @@ -151,13 +151,13 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, bloutp))) goto err_out; - qat_req->buf.blout = buflout; - qat_req->buf.bloutp = bloutp; - qat_req->buf.sz_out = sz_out; + buf->blout = buflout; + buf->bloutp = bloutp; + buf->sz_out = sz_out; } else { /* Otherwise set the src and dst to the same address */ - qat_req->buf.bloutp = qat_req->buf.blp; - qat_req->buf.sz_out = 0; + buf->bloutp = buf->blp; + buf->sz_out = 0; } return 0; @@ -172,7 +172,7 @@ err_out: buflout->bufers[i].len, DMA_FROM_DEVICE); - if (!qat_req->buf.sgl_dst_valid) + if (!buf->sgl_dst_valid) kfree(buflout); err_in: @@ -186,7 +186,7 @@ err_in: bufl->bufers[i].len, bufl_dma_dir); - if (!qat_req->buf.sgl_src_valid) + if (!buf->sgl_src_valid) kfree(bufl); dev_err(dev, "Failed to map buf for dma\n"); diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index ed4c200ac619..241299c219dd 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -6,12 +6,12 @@ #include #include "qat_crypto.h" -void qat_bl_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req); -int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst, +void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, + struct qat_crypto_request_buffs *buf); +int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct scatterlist *sgl, struct scatterlist *sglout, - struct qat_crypto_request *qat_req, + struct qat_crypto_request_buffs *buf, gfp_t flags); #endif From 36ebc7472afeb58f1eb1d4c1f0546b9e98acea46 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:15 +0000 Subject: [PATCH 3558/4122] crypto: qat - generalize crypto request buffers The structure qat_crypto_request_buffs which contains the source and destination buffer lists and correspondent sizes and dma addresses is also required for the compression service. Rename it as qat_request_buffs and move it to qat_bl.h. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_bl.c | 4 +-- drivers/crypto/qat/qat_common/qat_bl.h | 38 ++++++++++++++++++++-- drivers/crypto/qat/qat_common/qat_crypto.h | 36 ++------------------ 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index 5e319887f8d6..c32b12d386f0 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -11,7 +11,7 @@ #include "qat_crypto.h" void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, - struct qat_crypto_request_buffs *buf) + struct qat_request_buffs *buf) { struct device *dev = &GET_DEV(accel_dev); struct qat_alg_buf_list *bl = buf->bl; @@ -53,7 +53,7 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct scatterlist *sgl, struct scatterlist *sglout, - struct qat_crypto_request_buffs *buf, + struct qat_request_buffs *buf, gfp_t flags) { struct device *dev = &GET_DEV(accel_dev); diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 241299c219dd..1c534c57a36b 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -4,14 +4,46 @@ #define QAT_BL_H #include #include -#include "qat_crypto.h" + +#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + +struct qat_request_buffs { + struct qat_alg_buf_list *bl; + dma_addr_t blp; + struct qat_alg_buf_list *blout; + dma_addr_t bloutp; + size_t sz; + size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; +}; void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, - struct qat_crypto_request_buffs *buf); + struct qat_request_buffs *buf); int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct scatterlist *sgl, struct scatterlist *sglout, - struct qat_crypto_request_buffs *buf, + struct qat_request_buffs *buf, gfp_t flags); #endif diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index df3c738ce323..bb116357a568 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -8,6 +8,7 @@ #include #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +#include "qat_bl.h" struct qat_instance_backlog { struct list_head list; @@ -35,39 +36,6 @@ struct qat_crypto_instance { struct qat_instance_backlog backlog; }; -#define QAT_MAX_BUFF_DESC 4 - -struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed; - -struct qat_alg_fixed_buf_list { - struct qat_alg_buf_list sgl_hdr; - struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; -} __packed __aligned(64); - -struct qat_crypto_request_buffs { - struct qat_alg_buf_list *bl; - dma_addr_t blp; - struct qat_alg_buf_list *blout; - dma_addr_t bloutp; - size_t sz; - size_t sz_out; - bool sgl_src_valid; - bool sgl_dst_valid; - struct qat_alg_fixed_buf_list sgl_src; - struct qat_alg_fixed_buf_list sgl_dst; -}; - struct qat_crypto_request; struct qat_crypto_request { @@ -80,7 +48,7 @@ struct qat_crypto_request { struct aead_request *aead_req; struct skcipher_request *skcipher_req; }; - struct qat_crypto_request_buffs buf; + struct qat_request_buffs buf; void (*cb)(struct icp_qat_fw_la_resp *resp, struct qat_crypto_request *req); union { From cf692906bd61af2eec06a32a83d2a8ec3acf3548 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:16 +0000 Subject: [PATCH 3559/4122] crypto: qat - extend buffer list interface The compression service requires an additional pre-allocated buffer for each destination scatter list. Extend the function qat_alg_sgl_to_bufl() to take an additional structure that contains the dma address and the size of the extra buffer which will be appended in the destination FW SGL. The logic that unmaps buffers in qat_alg_free_bufl() has been changed to start unmapping from buffer 0 instead of skipping the initial buffers num_buff - num_mapped_bufs as that functionality was not used in the code. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 8 ++-- drivers/crypto/qat/qat_common/qat_bl.c | 58 ++++++++++++++++++------ drivers/crypto/qat/qat_common/qat_bl.h | 6 +++ 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index dfa65e42db78..b4b9f0aa59b9 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -800,7 +800,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) return -EINVAL; ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, - &qat_req->buf, f); + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -844,7 +844,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) return -EINVAL; ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, - &qat_req->buf, f); + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1030,7 +1030,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) return 0; ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, - &qat_req->buf, f); + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1097,7 +1097,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) return 0; ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, - &qat_req->buf, f); + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index c32b12d386f0..221a4eb610a3 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -35,10 +35,7 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, kfree(bl); if (blp != blpout) { - /* If out of place operation dma unmap only data */ - int bufless = blout->num_bufs - blout->num_mapped_bufs; - - for (i = bufless; i < blout->num_bufs; i++) { + for (i = 0; i < blout->num_mapped_bufs; i++) { dma_unmap_single(dev, blout->bufers[i].addr, blout->bufers[i].len, DMA_FROM_DEVICE); @@ -50,11 +47,13 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, } } -int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, - struct scatterlist *sgl, - struct scatterlist *sglout, - struct qat_request_buffs *buf, - gfp_t flags) +static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_request_buffs *buf, + dma_addr_t extra_dst_buff, + size_t sz_extra_dst_buff, + gfp_t flags) { struct device *dev = &GET_DEV(accel_dev); int i, sg_nctr = 0; @@ -86,7 +85,7 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; - for_each_sg(sgl, sg, n, i) + for (i = 0; i < n; i++) bufl->bufers[i].addr = DMA_MAPPING_ERROR; for_each_sg(sgl, sg, n, i) { @@ -113,8 +112,10 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, /* Handle out of place operation */ if (sgl != sglout) { struct qat_alg_buf *bufers; + int extra_buff = extra_dst_buff ? 1 : 0; + int n_sglout = sg_nents(sglout); - n = sg_nents(sglout); + n = n_sglout + extra_buff; sz_out = struct_size(buflout, bufers, n); sg_nctr = 0; @@ -129,10 +130,10 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, } bufers = buflout->bufers; - for_each_sg(sglout, sg, n, i) + for (i = 0; i < n; i++) bufers[i].addr = DMA_MAPPING_ERROR; - for_each_sg(sglout, sg, n, i) { + for_each_sg(sglout, sg, n_sglout, i) { int y = sg_nctr; if (!sg->length) @@ -146,7 +147,13 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, bufers[y].len = sg->length; sg_nctr++; } + if (extra_buff) { + bufers[sg_nctr].addr = extra_dst_buff; + bufers[sg_nctr].len = sz_extra_dst_buff; + } + buflout->num_bufs = sg_nctr; + buflout->num_bufs += extra_buff; buflout->num_mapped_bufs = sg_nctr; bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, bloutp))) @@ -166,11 +173,14 @@ err_out: dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); n = sg_nents(sglout); - for (i = 0; i < n; i++) + for (i = 0; i < n; i++) { + if (buflout->bufers[i].addr == extra_dst_buff) + break; if (!dma_mapping_error(dev, buflout->bufers[i].addr)) dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_FROM_DEVICE); + } if (!buf->sgl_dst_valid) kfree(buflout); @@ -192,3 +202,23 @@ err_in: dev_err(dev, "Failed to map buf for dma\n"); return -ENOMEM; } + +int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_request_buffs *buf, + struct qat_sgl_to_bufl_params *params, + gfp_t flags) +{ + dma_addr_t extra_dst_buff = 0; + size_t sz_extra_dst_buff = 0; + + if (params) { + extra_dst_buff = params->extra_dst_buff; + sz_extra_dst_buff = params->sz_extra_dst_buff; + } + + return __qat_bl_sgl_to_bufl(accel_dev, sgl, sglout, buf, + extra_dst_buff, sz_extra_dst_buff, + flags); +} diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 1c534c57a36b..0c174fee9e64 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -38,12 +38,18 @@ struct qat_request_buffs { struct qat_alg_fixed_buf_list sgl_dst; }; +struct qat_sgl_to_bufl_params { + dma_addr_t extra_dst_buff; + size_t sz_extra_dst_buff; +}; + void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, struct qat_request_buffs *buf); int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct scatterlist *sgl, struct scatterlist *sglout, struct qat_request_buffs *buf, + struct qat_sgl_to_bufl_params *params, gfp_t flags); #endif From 4d76f3880987a00da79f455876488ac3c7343e83 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:17 +0000 Subject: [PATCH 3560/4122] crypto: qat - relocate backlog related structures Move the structures qat_instance_backlog and qat_alg_req from qat_crypto.h to qat_algs_send.h since they are not unique to crypto. Both structures will be used by the compression service to support requests with the CRYPTO_TFM_REQ_MAY_BACKLOG flag set. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs_send.h | 16 +++++++++++++++- drivers/crypto/qat/qat_common/qat_crypto.h | 14 +------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h index 5ce9f4f69d8f..0baca16e1eff 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.h +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -3,7 +3,21 @@ #ifndef QAT_ALGS_SEND_H #define QAT_ALGS_SEND_H -#include "qat_crypto.h" +#include +#include "adf_transport_internal.h" + +struct qat_instance_backlog { + struct list_head list; + spinlock_t lock; /* protects backlog list */ +}; + +struct qat_alg_req { + u32 *fw_req; + struct adf_etr_ring_data *tx_ring; + struct crypto_async_request *base; + struct list_head list; + struct qat_instance_backlog *backlog; +}; int qat_alg_send_message(struct qat_alg_req *req); void qat_alg_send_backlog(struct qat_instance_backlog *backlog); diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index bb116357a568..505e881022a7 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -8,21 +8,9 @@ #include #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +#include "qat_algs_send.h" #include "qat_bl.h" -struct qat_instance_backlog { - struct list_head list; - spinlock_t lock; /* protects backlog list */ -}; - -struct qat_alg_req { - u32 *fw_req; - struct adf_etr_ring_data *tx_ring; - struct crypto_async_request *base; - struct list_head list; - struct qat_instance_backlog *backlog; -}; - struct qat_crypto_instance { struct adf_etr_ring_data *sym_tx; struct adf_etr_ring_data *sym_rx; From 79d8dbf155d4e670b6ac20acbb6b22f02c728da5 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:18 +0000 Subject: [PATCH 3561/4122] crypto: qat - relocate qat_algs_alloc_flags() Move qat_algs_alloc_flags() from qat_crypto.h to qat_bl.h as this will be used also by the compression logic. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_bl.h | 6 ++++++ drivers/crypto/qat/qat_common/qat_crypto.h | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 0c174fee9e64..5f2ea8f352f7 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -2,6 +2,7 @@ /* Copyright(c) 2014 - 2022 Intel Corporation */ #ifndef QAT_BL_H #define QAT_BL_H +#include #include #include @@ -52,4 +53,9 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct qat_sgl_to_bufl_params *params, gfp_t flags); +static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) +{ + return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; +} + #endif diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 505e881022a7..6a0e961bb9dc 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -65,9 +65,4 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) return true; } -static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) -{ - return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; -} - #endif From 93b2f5799cee57814a36882e61ef5f03d5dc5392 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:19 +0000 Subject: [PATCH 3562/4122] crypto: qat - rename and relocate GEN2 config function Rename qat_crypto_dev_config() in adf_gen2_dev_config() and relocate it to the newly created file adf_gen2_config.c. This function is specific to QAT GEN2 devices and will be used also to configure the compression service. In addition change the drivers to use the dev_config() in the hardware data structure (which for GEN2 devices now points to adf_gen2_dev_config()), for consistency. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 2 + drivers/crypto/qat/qat_c3xxx/adf_drv.c | 2 +- .../qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 2 + .../crypto/qat/qat_c62x/adf_c62x_hw_data.c | 2 + drivers/crypto/qat/qat_c62x/adf_drv.c | 2 +- .../qat/qat_c62xvf/adf_c62xvf_hw_data.c | 2 + drivers/crypto/qat/qat_common/Makefile | 1 + .../crypto/qat/qat_common/adf_common_drv.h | 1 - .../crypto/qat/qat_common/adf_gen2_config.c | 131 ++++++++++++++++++ .../crypto/qat/qat_common/adf_gen2_config.h | 10 ++ drivers/crypto/qat/qat_common/qat_crypto.c | 120 +--------------- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 2 + drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 2 +- .../qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 2 + 14 files changed, 158 insertions(+), 123 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_config.c create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_config.h diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 50d5afa26a9b..c0519a79060a 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include #include #include #include "adf_c3xxx_hw_data.h" @@ -124,6 +125,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 2aef0bb791df..1f4fbf4562b2 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index a9fbe57b32ae..6c37dda6da2e 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include #include #include #include @@ -86,6 +87,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index c00386fe6587..689358cb7bb0 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include #include #include #include "adf_c62x_hw_data.h" @@ -126,6 +127,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 56163083f161..4ccaf298250c 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 0282038fca54..521110ecd07f 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include #include #include #include @@ -86,6 +87,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index b0587d03eac2..b59b6315134b 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_sysfs.o \ adf_gen2_hw_data.o \ + adf_gen2_config.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ qat_crypto.o \ diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 7bb477c3ce25..b8ec0268d2d2 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -110,7 +110,6 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev); void adf_cleanup_etr_data(struct adf_accel_dev *accel_dev); int qat_crypto_register(void); int qat_crypto_unregister(void); -int qat_crypto_dev_config(struct adf_accel_dev *accel_dev); int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev); struct qat_crypto_instance *qat_crypto_get_instance_node(int node); void qat_crypto_put_instance(struct qat_crypto_instance *inst); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.c b/drivers/crypto/qat/qat_common/adf_gen2_config.c new file mode 100644 index 000000000000..1c490e1859a7 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_config.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_cfg.h" +#include "adf_cfg_strings.h" +#include "adf_gen2_config.h" +#include "adf_common_drv.h" +#include "qat_crypto.h" +#include "adf_transport_access_macros.h" + +static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_crypto(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, + i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); + val = 128; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 2; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 8; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 10; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); + return ret; +} + +/** + * adf_gen2_dev_config() - create dev config required to create instances + * + * @accel_dev: Pointer to acceleration device. + * + * Function creates device configuration required to create instances + * + * Return: 0 on success, error code otherwise. + */ +int adf_gen2_dev_config(struct adf_accel_dev *accel_dev) +{ + return adf_gen2_crypto_dev_config(accel_dev); +} +EXPORT_SYMBOL_GPL(adf_gen2_dev_config); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.h b/drivers/crypto/qat/qat_common/adf_gen2_config.h new file mode 100644 index 000000000000..4bf9da2de68a --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_config.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN2_CONFIG_H_ +#define ADF_GEN2_CONFIG_H_ + +#include "adf_accel_devices.h" + +int adf_gen2_dev_config(struct adf_accel_dev *accel_dev); + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 9341d892533a..e31199eade5b 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -5,7 +5,6 @@ #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_transport.h" -#include "adf_transport_access_macros.h" #include "adf_cfg.h" #include "adf_cfg_strings.h" #include "adf_gen2_hw_data.h" @@ -126,126 +125,9 @@ int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev) return -EFAULT; } - return qat_crypto_dev_config(accel_dev); + return GET_HW_DATA(accel_dev)->dev_config(accel_dev); } -/** - * qat_crypto_dev_config() - create dev config required to create crypto inst. - * - * @accel_dev: Pointer to acceleration device. - * - * Function creates device configuration required to create crypto instances - * - * Return: 0 on success, error code otherwise. - */ -int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) -{ - char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; - int banks = GET_MAX_BANKS(accel_dev); - int cpus = num_online_cpus(); - unsigned long val; - int instances; - int ret; - int i; - - if (adf_hw_dev_has_crypto(accel_dev)) - instances = min(cpus, banks); - else - instances = 0; - - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - - for (i = 0; i < instances; i++) { - val = i; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, - i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); - val = 128; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 512; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 2; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 8; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 10; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = ADF_COALESCING_DEF_TIME; - snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); - ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", - key, &val, ADF_DEC); - if (ret) - goto err; - } - - val = i; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, - &val, ADF_DEC); - if (ret) - goto err; - - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); - return 0; -err: - dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); - return ret; -} -EXPORT_SYMBOL_GPL(qat_crypto_dev_config); - static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) { unsigned long num_inst, num_msg_sym, num_msg_asym; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index cb3bdd3618fb..baacf817abf6 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include #include #include #include "adf_dh895xcc_hw_data.h" @@ -234,6 +235,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_gen2_enable_ints; hw_data->reset_device = adf_reset_sbr; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index acca56752aa0..ebeb17b67fcd 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 31c14d7e1c11..b933a00fb91b 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -2,6 +2,7 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include #include #include #include @@ -86,6 +87,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); From 1198ae56c9a520384dcf53f01cd9adecd73751d0 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:20 +0000 Subject: [PATCH 3563/4122] crypto: qat - expose deflate through acomp api for QAT GEN2 Add infrastructure for implementing the acomp APIs in the QAT driver and expose the deflate algorithm for QAT GEN2 devices. This adds (1) the compression service which includes logic to create, allocate and handle compression instances; (2) logic to create configuration entries at probe time for the compression instances; (3) updates to the firmware API for allowing the compression service; and; (4) a back-end for deflate that implements the acomp api for QAT GEN2 devices. The implementation configures the device to produce data compressed statically, optimized for throughput over compression ratio. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 6 + .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 2 + .../qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 2 + .../crypto/qat/qat_c62x/adf_c62x_hw_data.c | 2 + .../qat/qat_c62xvf/adf_c62xvf_hw_data.c | 2 + drivers/crypto/qat/qat_common/Makefile | 3 + .../crypto/qat/qat_common/adf_accel_devices.h | 14 + .../crypto/qat/qat_common/adf_cfg_strings.h | 1 + .../crypto/qat/qat_common/adf_common_drv.h | 8 + drivers/crypto/qat/qat_common/adf_ctl_drv.c | 6 + .../crypto/qat/qat_common/adf_gen2_config.c | 99 ++++- drivers/crypto/qat/qat_common/adf_gen2_dc.c | 70 +++ drivers/crypto/qat/qat_common/adf_gen2_dc.h | 10 + drivers/crypto/qat/qat_common/adf_init.c | 11 + drivers/crypto/qat/qat_common/adf_sriov.c | 4 + drivers/crypto/qat/qat_common/icp_qat_fw.h | 24 ++ .../crypto/qat/qat_common/icp_qat_fw_comp.h | 404 ++++++++++++++++++ drivers/crypto/qat/qat_common/icp_qat_hw.h | 66 +++ drivers/crypto/qat/qat_common/qat_comp_algs.c | 274 ++++++++++++ drivers/crypto/qat/qat_common/qat_comp_req.h | 113 +++++ .../crypto/qat/qat_common/qat_compression.c | 297 +++++++++++++ .../crypto/qat/qat_common/qat_compression.h | 37 ++ .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 2 + .../qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 2 + 24 files changed, 1447 insertions(+), 12 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_dc.c create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_dc.h create mode 100644 drivers/crypto/qat/qat_common/icp_qat_fw_comp.h create mode 100644 drivers/crypto/qat/qat_common/qat_comp_algs.c create mode 100644 drivers/crypto/qat/qat_common/qat_comp_req.h create mode 100644 drivers/crypto/qat/qat_common/qat_compression.c create mode 100644 drivers/crypto/qat/qat_common/qat_compression.h diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index 670a58b25cb1..8496c451b48e 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -155,6 +155,12 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); return 0; err: diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index c0519a79060a..c55c51a07677 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "adf_c3xxx_hw_data.h" @@ -129,6 +130,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c3xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 6c37dda6da2e..84d9486e04de 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,6 +92,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 689358cb7bb0..b7aa19d2fa80 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "adf_c62x_hw_data.h" @@ -131,6 +132,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c62x(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 521110ecd07f..751d7aa57fc7 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,6 +92,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index b59b6315134b..e3db4786738f 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -15,7 +15,10 @@ intel_qat-objs := adf_cfg.o \ adf_gen2_config.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ + adf_gen2_dc.o \ qat_crypto.o \ + qat_compression.o \ + qat_comp_algs.o \ qat_algs.o \ qat_asym_algs.o \ qat_algs_send.o \ diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 0a55a4f34dcf..284f5aad3ee0 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -163,6 +163,10 @@ struct adf_pfvf_ops { u32 pfvf_offset, u8 compat_ver); }; +struct adf_dc_ops { + void (*build_deflate_ctx)(void *ctx); +}; + struct adf_hw_device_data { struct adf_hw_device_class *dev_class; u32 (*get_accel_mask)(struct adf_hw_device_data *self); @@ -202,6 +206,7 @@ struct adf_hw_device_data { int (*dev_config)(struct adf_accel_dev *accel_dev); struct adf_pfvf_ops pfvf_ops; struct adf_hw_csr_ops csr_ops; + struct adf_dc_ops dc_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -247,6 +252,7 @@ struct adf_hw_device_data { #define GET_MAX_ACCELENGINES(accel_dev) (GET_HW_DATA(accel_dev)->num_engines) #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) +#define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev struct adf_admin_comms; @@ -266,13 +272,21 @@ struct adf_accel_vf_info { u8 vf_compat_ver; }; +struct adf_dc_data { + u8 *ovf_buff; + size_t ovf_buff_sz; + dma_addr_t ovf_buff_p; +}; + struct adf_accel_dev { struct adf_etr_data *transport; struct adf_hw_device_data *hw_device; struct adf_cfg_device_data *cfg; struct adf_fw_loader_data *fw_loader; struct adf_admin_comms *admin; + struct adf_dc_data *dc_data; struct list_head crypto_list; + struct list_head compression_list; unsigned long status; atomic_t ref_count; struct dentry *debugfs_dir; diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index 655248dbf962..5d8c3bdb258c 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h @@ -20,6 +20,7 @@ #define ADF_ETRMGR_BANK "Bank" #define ADF_RING_SYM_BANK_NUM "BankSymNumber" #define ADF_RING_ASYM_BANK_NUM "BankAsymNumber" +#define ADF_RING_DC_BANK_NUM "BankDcNumber" #define ADF_CY "Cy" #define ADF_DC "Dc" #define ADF_CFG_DC "dc" diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index b8ec0268d2d2..7189265573c0 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -120,6 +120,14 @@ void qat_algs_unregister(void); int qat_asym_algs_register(void); void qat_asym_algs_unregister(void); +struct qat_compression_instance *qat_compression_get_instance_node(int node); +void qat_compression_put_instance(struct qat_compression_instance *inst); +int qat_compression_register(void); +int qat_compression_unregister(void); +int qat_comp_algs_register(void); +void qat_comp_algs_unregister(void); +void qat_comp_alg_callback(void *resp); + int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev); void adf_isr_resource_free(struct adf_accel_dev *accel_dev); int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 82b69e1f725b..9190532b27eb 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -438,8 +438,13 @@ static int __init adf_register_ctl_device_driver(void) if (qat_crypto_register()) goto err_crypto_register; + if (qat_compression_register()) + goto err_compression_register; + return 0; +err_compression_register: + qat_crypto_unregister(); err_crypto_register: adf_exit_vf_wq(); err_vf_wq: @@ -463,6 +468,7 @@ static void __exit adf_unregister_ctl_device_driver(void) adf_exit_vf_wq(); adf_exit_pf_wq(); qat_crypto_unregister(); + qat_compression_unregister(); adf_clean_vf_map(false); mutex_destroy(&adf_ctl_lock); } diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.c b/drivers/crypto/qat/qat_common/adf_gen2_config.c index 1c490e1859a7..eeb30da7587a 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_config.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_config.c @@ -6,6 +6,7 @@ #include "adf_gen2_config.h" #include "adf_common_drv.h" #include "qat_crypto.h" +#include "qat_compression.h" #include "adf_transport_access_macros.h" static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev) @@ -23,14 +24,6 @@ static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev) else instances = 0; - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); @@ -108,10 +101,68 @@ static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); - return 0; + return ret; + err: - dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); + return ret; +} + +static int adf_gen2_comp_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_compression(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 6; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 14; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + return ret; + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); return ret; } @@ -126,6 +177,30 @@ err: */ int adf_gen2_dev_config(struct adf_accel_dev *accel_dev) { - return adf_gen2_crypto_dev_config(accel_dev); + int ret; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + ret = adf_gen2_crypto_dev_config(accel_dev); + if (ret) + goto err; + + ret = adf_gen2_comp_dev_config(accel_dev); + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); + return ret; } EXPORT_SYMBOL_GPL(adf_gen2_dev_config); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.c b/drivers/crypto/qat/qat_common/adf_gen2_dc.c new file mode 100644 index 000000000000..47261b1c1da6 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_gen2_dc.h" +#include "icp_qat_fw_comp.h" + +static void qat_comp_build_deflate_ctx(void *ctx) +{ + struct icp_qat_fw_comp_req *req_tmpl = (struct icp_qat_fw_comp_req *)ctx; + struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars; + struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars; + struct icp_qat_fw_comp_cd_hdr *comp_cd_ctrl = &req_tmpl->comp_cd_ctrl; + + memset(req_tmpl, 0, sizeof(*req_tmpl)); + header->hdr_flags = + ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); + header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC; + header->comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA, + QAT_COMN_PTR_TYPE_SGL); + header->serv_specif_flags = + ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION, + ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF); + cd_pars->u.sl.comp_slice_cfg_word[0] = + ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_COMPRESS, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED, + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE, + ICP_QAT_HW_COMPRESSION_DEPTH_1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0); + req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER; + req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC; + req_pars->req_par_flags = + ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP, + ICP_QAT_FW_COMP_EOP, + ICP_QAT_FW_COMP_BFINAL, + ICP_QAT_FW_COMP_CNV, + ICP_QAT_FW_COMP_CNV_RECOVERY, + ICP_QAT_FW_COMP_NO_CNV_DFX, + ICP_QAT_FW_COMP_CRC_MODE_LEGACY, + ICP_QAT_FW_COMP_NO_XXHASH_ACC, + ICP_QAT_FW_COMP_CNV_ERROR_NONE, + ICP_QAT_FW_COMP_NO_APPEND_CRC, + ICP_QAT_FW_COMP_NO_DROP_DATA); + ICP_QAT_FW_COMN_NEXT_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR); + ICP_QAT_FW_COMN_CURR_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_COMP); + + /* Fill second half of the template for decompression */ + memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl)); + req_tmpl++; + header = &req_tmpl->comn_hdr; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; + cd_pars = &req_tmpl->cd_pars; + cd_pars->u.sl.comp_slice_cfg_word[0] = + ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED, + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE, + ICP_QAT_HW_COMPRESSION_DEPTH_1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0); +} + +void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops) +{ + dc_ops->build_deflate_ctx = qat_comp_build_deflate_ctx; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_dc_ops); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.h b/drivers/crypto/qat/qat_common/adf_gen2_dc.h new file mode 100644 index 000000000000..6eae023354d7 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN2_DC_H +#define ADF_GEN2_DC_H + +#include "adf_accel_devices.h" + +void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops); + +#endif /* ADF_GEN2_DC_H */ diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 33a9a46d6949..cef7bb8ec007 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -209,6 +209,14 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) clear_bit(ADF_STATUS_STARTED, &accel_dev->status); return -EFAULT; } + + if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) { + dev_err(&GET_DEV(accel_dev), + "Failed to register compression algs\n"); + set_bit(ADF_STATUS_STARTING, &accel_dev->status); + clear_bit(ADF_STATUS_STARTED, &accel_dev->status); + return -EFAULT; + } return 0; } EXPORT_SYMBOL_GPL(adf_dev_start); @@ -242,6 +250,9 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev) qat_asym_algs_unregister(); } + if (!list_empty(&accel_dev->compression_list)) + qat_comp_algs_unregister(); + list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); if (!test_bit(accel_dev->accel_id, service->start_status)) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index b2db1d70d71f..d85a90cc387b 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -170,6 +170,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, (void *)&val, ADF_DEC)) return -EFAULT; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + return ret; set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw.h b/drivers/crypto/qat/qat_common/icp_qat_fw.h index 6dc09d270082..c141160421e1 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw.h @@ -116,6 +116,10 @@ struct icp_qat_fw_comn_resp { #define ICP_QAT_FW_COMN_VALID_FLAG_BITPOS 7 #define ICP_QAT_FW_COMN_VALID_FLAG_MASK 0x1 #define ICP_QAT_FW_COMN_HDR_RESRVD_FLD_MASK 0x7F +#define ICP_QAT_FW_COMN_CNV_FLAG_BITPOS 6 +#define ICP_QAT_FW_COMN_CNV_FLAG_MASK 0x1 +#define ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS 5 +#define ICP_QAT_FW_COMN_CNVNR_FLAG_MASK 0x1 #define ICP_QAT_FW_COMN_OV_SRV_TYPE_GET(icp_qat_fw_comn_req_hdr_t) \ icp_qat_fw_comn_req_hdr_t.service_type @@ -132,6 +136,26 @@ struct icp_qat_fw_comn_resp { #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_GET(hdr_t) \ ICP_QAT_FW_COMN_VALID_FLAG_GET(hdr_t.hdr_flags) +#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_GET(hdr_flags) \ + QAT_FIELD_GET(hdr_flags, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_SET(hdr_t, val) \ + QAT_FIELD_SET((hdr_t.hdr_flags), (val), \ + ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(hdr_flags) \ + QAT_FIELD_GET(hdr_flags, \ + ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNV_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_SET(hdr_t, val) \ + QAT_FIELD_SET((hdr_t.hdr_flags), (val), \ + ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNV_FLAG_MASK) + #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_SET(hdr_t, val) \ ICP_QAT_FW_COMN_VALID_FLAG_SET(hdr_t, val) diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h new file mode 100644 index 000000000000..a03d43fef2b3 --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h @@ -0,0 +1,404 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_FW_COMP_H_ +#define _ICP_QAT_FW_COMP_H_ +#include "icp_qat_fw.h" + +enum icp_qat_fw_comp_cmd_id { + ICP_QAT_FW_COMP_CMD_STATIC = 0, + ICP_QAT_FW_COMP_CMD_DYNAMIC = 1, + ICP_QAT_FW_COMP_CMD_DECOMPRESS = 2, + ICP_QAT_FW_COMP_CMD_DELIMITER +}; + +enum icp_qat_fw_comp_20_cmd_id { + ICP_QAT_FW_COMP_20_CMD_LZ4_COMPRESS = 3, + ICP_QAT_FW_COMP_20_CMD_LZ4_DECOMPRESS = 4, + ICP_QAT_FW_COMP_20_CMD_LZ4S_COMPRESS = 5, + ICP_QAT_FW_COMP_20_CMD_LZ4S_DECOMPRESS = 6, + ICP_QAT_FW_COMP_20_CMD_XP10_COMPRESS = 7, + ICP_QAT_FW_COMP_20_CMD_XP10_DECOMPRESS = 8, + ICP_QAT_FW_COMP_20_CMD_RESERVED_9 = 9, + ICP_QAT_FW_COMP_23_CMD_ZSTD_COMPRESS = 10, + ICP_QAT_FW_COMP_23_CMD_ZSTD_DECOMPRESS = 11, + ICP_QAT_FW_COMP_20_CMD_DELIMITER +}; + +#define ICP_QAT_FW_COMP_STATELESS_SESSION 0 +#define ICP_QAT_FW_COMP_STATEFUL_SESSION 1 +#define ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_ENH_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_USED_AS_INTMD_BUF 1 +#define ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF 0 +#define ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS 2 +#define ICP_QAT_FW_COMP_SESSION_TYPE_MASK 0x1 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS 3 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK 0x1 +#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS 4 +#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK 0x1 +#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS 5 +#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK 0x1 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS 7 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK 0x1 + +#define ICP_QAT_FW_COMP_FLAGS_BUILD(sesstype, autoselect, enhanced_asb, \ + ret_uncomp, secure_ram) \ + ((((sesstype) & ICP_QAT_FW_COMP_SESSION_TYPE_MASK) << \ + ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS) | \ + (((autoselect) & ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK) << \ + ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS) | \ + (((enhanced_asb) & ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK) << \ + ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS) | \ + (((ret_uncomp) & ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK) << \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS) | \ + (((secure_ram) & ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK) << \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS)) + +#define ICP_QAT_FW_COMP_SESSION_TYPE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \ + ICP_QAT_FW_COMP_SESSION_TYPE_MASK) + +#define ICP_QAT_FW_COMP_SESSION_TYPE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \ + ICP_QAT_FW_COMP_SESSION_TYPE_MASK) + +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS, \ + ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK) + +#define ICP_QAT_FW_COMP_EN_ASB_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS, \ + ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK) + +#define ICP_QAT_FW_COMP_RET_UNCOMP_GET(flags) \ + QAT_FIELD_GET(flags, \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS, \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK) + +#define ICP_QAT_FW_COMP_SECURE_RAM_USE_GET(flags) \ + QAT_FIELD_GET(flags, \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS, \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK) + +struct icp_qat_fw_comp_req_hdr_cd_pars { + union { + struct { + __u64 content_desc_addr; + __u16 content_desc_resrvd1; + __u8 content_desc_params_sz; + __u8 content_desc_hdr_resrvd2; + __u32 content_desc_resrvd3; + } s; + struct { + __u32 comp_slice_cfg_word[ICP_QAT_FW_NUM_LONGWORDS_2]; + __u32 content_desc_resrvd4; + } sl; + } u; +}; + +struct icp_qat_fw_comp_req_params { + __u32 comp_len; + __u32 out_buffer_sz; + union { + struct { + __u32 initial_crc32; + __u32 initial_adler; + } legacy; + __u64 crc_data_addr; + } crc; + __u32 req_par_flags; + __u32 rsrvd; +}; + +#define ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(sop, eop, bfinal, cnv, cnvnr, \ + cnvdfx, crc, xxhash_acc, \ + cnv_error_type, append_crc, \ + drop_data) \ + ((((sop) & ICP_QAT_FW_COMP_SOP_MASK) << \ + ICP_QAT_FW_COMP_SOP_BITPOS) | \ + (((eop) & ICP_QAT_FW_COMP_EOP_MASK) << \ + ICP_QAT_FW_COMP_EOP_BITPOS) | \ + (((bfinal) & ICP_QAT_FW_COMP_BFINAL_MASK) \ + << ICP_QAT_FW_COMP_BFINAL_BITPOS) | \ + (((cnv) & ICP_QAT_FW_COMP_CNV_MASK) << \ + ICP_QAT_FW_COMP_CNV_BITPOS) | \ + (((cnvnr) & ICP_QAT_FW_COMP_CNVNR_MASK) \ + << ICP_QAT_FW_COMP_CNVNR_BITPOS) | \ + (((cnvdfx) & ICP_QAT_FW_COMP_CNV_DFX_MASK) \ + << ICP_QAT_FW_COMP_CNV_DFX_BITPOS) | \ + (((crc) & ICP_QAT_FW_COMP_CRC_MODE_MASK) \ + << ICP_QAT_FW_COMP_CRC_MODE_BITPOS) | \ + (((xxhash_acc) & ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) \ + << ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS) | \ + (((cnv_error_type) & ICP_QAT_FW_COMP_CNV_ERROR_MASK) \ + << ICP_QAT_FW_COMP_CNV_ERROR_BITPOS) | \ + (((append_crc) & ICP_QAT_FW_COMP_APPEND_CRC_MASK) \ + << ICP_QAT_FW_COMP_APPEND_CRC_BITPOS) | \ + (((drop_data) & ICP_QAT_FW_COMP_DROP_DATA_MASK) \ + << ICP_QAT_FW_COMP_DROP_DATA_BITPOS)) + +#define ICP_QAT_FW_COMP_NOT_SOP 0 +#define ICP_QAT_FW_COMP_SOP 1 +#define ICP_QAT_FW_COMP_NOT_EOP 0 +#define ICP_QAT_FW_COMP_EOP 1 +#define ICP_QAT_FW_COMP_NOT_BFINAL 0 +#define ICP_QAT_FW_COMP_BFINAL 1 +#define ICP_QAT_FW_COMP_NO_CNV 0 +#define ICP_QAT_FW_COMP_CNV 1 +#define ICP_QAT_FW_COMP_NO_CNV_RECOVERY 0 +#define ICP_QAT_FW_COMP_CNV_RECOVERY 1 +#define ICP_QAT_FW_COMP_NO_CNV_DFX 0 +#define ICP_QAT_FW_COMP_CNV_DFX 1 +#define ICP_QAT_FW_COMP_CRC_MODE_LEGACY 0 +#define ICP_QAT_FW_COMP_CRC_MODE_E2E 1 +#define ICP_QAT_FW_COMP_NO_XXHASH_ACC 0 +#define ICP_QAT_FW_COMP_XXHASH_ACC 1 +#define ICP_QAT_FW_COMP_APPEND_CRC 1 +#define ICP_QAT_FW_COMP_NO_APPEND_CRC 0 +#define ICP_QAT_FW_COMP_DROP_DATA 1 +#define ICP_QAT_FW_COMP_NO_DROP_DATA 0 +#define ICP_QAT_FW_COMP_SOP_BITPOS 0 +#define ICP_QAT_FW_COMP_SOP_MASK 0x1 +#define ICP_QAT_FW_COMP_EOP_BITPOS 1 +#define ICP_QAT_FW_COMP_EOP_MASK 0x1 +#define ICP_QAT_FW_COMP_BFINAL_BITPOS 6 +#define ICP_QAT_FW_COMP_BFINAL_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_BITPOS 16 +#define ICP_QAT_FW_COMP_CNV_MASK 0x1 +#define ICP_QAT_FW_COMP_CNVNR_BITPOS 17 +#define ICP_QAT_FW_COMP_CNVNR_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_DFX_BITPOS 18 +#define ICP_QAT_FW_COMP_CNV_DFX_MASK 0x1 +#define ICP_QAT_FW_COMP_CRC_MODE_BITPOS 19 +#define ICP_QAT_FW_COMP_CRC_MODE_MASK 0x1 +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS 20 +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_ERROR_BITPOS 21 +#define ICP_QAT_FW_COMP_CNV_ERROR_MASK 0b111 +#define ICP_QAT_FW_COMP_CNV_ERROR_NONE 0b000 +#define ICP_QAT_FW_COMP_CNV_ERROR_CHECKSUM 0b001 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_OBC_DIFF 0b010 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR 0b011 +#define ICP_QAT_FW_COMP_CNV_ERROR_XLT 0b100 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_IBC_DIFF 0b101 +#define ICP_QAT_FW_COMP_APPEND_CRC_BITPOS 24 +#define ICP_QAT_FW_COMP_APPEND_CRC_MASK 0x1 +#define ICP_QAT_FW_COMP_DROP_DATA_BITPOS 25 +#define ICP_QAT_FW_COMP_DROP_DATA_MASK 0x1 + +#define ICP_QAT_FW_COMP_SOP_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SOP_BITPOS, \ + ICP_QAT_FW_COMP_SOP_MASK) + +#define ICP_QAT_FW_COMP_SOP_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SOP_BITPOS, \ + ICP_QAT_FW_COMP_SOP_MASK) + +#define ICP_QAT_FW_COMP_EOP_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_EOP_BITPOS, \ + ICP_QAT_FW_COMP_EOP_MASK) + +#define ICP_QAT_FW_COMP_EOP_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_EOP_BITPOS, \ + ICP_QAT_FW_COMP_EOP_MASK) + +#define ICP_QAT_FW_COMP_BFINAL_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_BFINAL_BITPOS, \ + ICP_QAT_FW_COMP_BFINAL_MASK) + +#define ICP_QAT_FW_COMP_BFINAL_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_BFINAL_BITPOS, \ + ICP_QAT_FW_COMP_BFINAL_MASK) + +#define ICP_QAT_FW_COMP_CNV_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_BITPOS, \ + ICP_QAT_FW_COMP_CNV_MASK) + +#define ICP_QAT_FW_COMP_CNVNR_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNVNR_BITPOS, \ + ICP_QAT_FW_COMP_CNVNR_MASK) + +#define ICP_QAT_FW_COMP_CNV_DFX_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \ + ICP_QAT_FW_COMP_CNV_DFX_MASK) + +#define ICP_QAT_FW_COMP_CNV_DFX_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \ + ICP_QAT_FW_COMP_CNV_DFX_MASK) + +#define ICP_QAT_FW_COMP_CRC_MODE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CRC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_CRC_MODE_MASK) + +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) + +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) + +#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \ + ICP_QAT_FW_COMP_CNV_ERROR_MASK) + +#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \ + ICP_QAT_FW_COMP_CNV_ERROR_MASK) + +struct icp_qat_fw_xlt_req_params { + __u64 inter_buff_ptr; +}; + +struct icp_qat_fw_comp_cd_hdr { + __u16 ram_bank_flags; + __u8 comp_cfg_offset; + __u8 next_curr_id; + __u32 resrvd; + __u64 comp_state_addr; + __u64 ram_banks_addr; +}; + +#define COMP_CPR_INITIAL_CRC 0 +#define COMP_CPR_INITIAL_ADLER 1 + +struct icp_qat_fw_xlt_cd_hdr { + __u16 resrvd1; + __u8 resrvd2; + __u8 next_curr_id; + __u32 resrvd3; +}; + +struct icp_qat_fw_comp_req { + struct icp_qat_fw_comn_req_hdr comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars cd_pars; + struct icp_qat_fw_comn_req_mid comn_mid; + struct icp_qat_fw_comp_req_params comp_pars; + union { + struct icp_qat_fw_xlt_req_params xlt_pars; + __u32 resrvd1[ICP_QAT_FW_NUM_LONGWORDS_2]; + } u1; + __u32 resrvd2[ICP_QAT_FW_NUM_LONGWORDS_2]; + struct icp_qat_fw_comp_cd_hdr comp_cd_ctrl; + union { + struct icp_qat_fw_xlt_cd_hdr xlt_cd_ctrl; + __u32 resrvd3[ICP_QAT_FW_NUM_LONGWORDS_2]; + } u2; +}; + +struct icp_qat_fw_resp_comp_pars { + __u32 input_byte_counter; + __u32 output_byte_counter; + union { + struct { + __u32 curr_crc32; + __u32 curr_adler_32; + } legacy; + __u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_2]; + } crc; +}; + +struct icp_qat_fw_comp_state { + __u32 rd8_counter; + __u32 status_flags; + __u32 in_counter; + __u32 out_counter; + __u64 intermediate_state; + __u32 lobc; + __u32 replaybc; + __u64 pcrc64_poly; + __u32 crc32; + __u32 adler_xxhash32; + __u64 pcrc64_xorout; + __u32 out_buf_size; + __u32 in_buf_size; + __u64 in_pcrc64; + __u64 out_pcrc64; + __u32 lobs; + __u32 libc; + __u64 reserved; + __u32 xxhash_state[4]; + __u32 cleartext[4]; +}; + +struct icp_qat_fw_comp_resp { + struct icp_qat_fw_comn_resp_hdr comn_resp; + __u64 opaque_data; + struct icp_qat_fw_resp_comp_pars comp_resp_pars; +}; + +#define QAT_FW_COMP_BANK_FLAG_MASK 0x1 +#define QAT_FW_COMP_BANK_I_BITPOS 8 +#define QAT_FW_COMP_BANK_H_BITPOS 7 +#define QAT_FW_COMP_BANK_G_BITPOS 6 +#define QAT_FW_COMP_BANK_F_BITPOS 5 +#define QAT_FW_COMP_BANK_E_BITPOS 4 +#define QAT_FW_COMP_BANK_D_BITPOS 3 +#define QAT_FW_COMP_BANK_C_BITPOS 2 +#define QAT_FW_COMP_BANK_B_BITPOS 1 +#define QAT_FW_COMP_BANK_A_BITPOS 0 + +enum icp_qat_fw_comp_bank_enabled { + ICP_QAT_FW_COMP_BANK_DISABLED = 0, + ICP_QAT_FW_COMP_BANK_ENABLED = 1, + ICP_QAT_FW_COMP_BANK_DELIMITER = 2 +}; + +#define ICP_QAT_FW_COMP_RAM_FLAGS_BUILD(bank_i_enable, bank_h_enable, \ + bank_g_enable, bank_f_enable, \ + bank_e_enable, bank_d_enable, \ + bank_c_enable, bank_b_enable, \ + bank_a_enable) \ + ((((bank_i_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_I_BITPOS) | \ + (((bank_h_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_H_BITPOS) | \ + (((bank_g_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_G_BITPOS) | \ + (((bank_f_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_F_BITPOS) | \ + (((bank_e_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_E_BITPOS) | \ + (((bank_d_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_D_BITPOS) | \ + (((bank_c_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_C_BITPOS) | \ + (((bank_b_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_B_BITPOS) | \ + (((bank_a_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_A_BITPOS)) + +struct icp_qat_fw_comp_crc_data_struct { + __u32 crc32; + union { + __u32 adler; + __u32 xxhash; + } adler_xxhash_u; + __u32 cpr_in_crc_lo; + __u32 cpr_in_crc_hi; + __u32 cpr_out_crc_lo; + __u32 cpr_out_crc_hi; + __u32 xlt_in_crc_lo; + __u32 xlt_in_crc_hi; + __u32 xlt_out_crc_lo; + __u32 xlt_out_crc_hi; + __u32 prog_crc_poly_lo; + __u32 prog_crc_poly_hi; + __u32 xor_out_lo; + __u32 xor_out_hi; + __u32 append_crc_lo; + __u32 append_crc_hi; +}; + +struct xxhash_acc_state_buff { + __u32 in_counter; + __u32 out_counter; + __u32 xxhash_state[4]; + __u32 clear_txt[4]; +}; + +#endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h index 433304cad2ed..4042739bb6fa 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h @@ -307,4 +307,70 @@ struct icp_qat_hw_cipher_algo_blk { struct icp_qat_hw_ucs_cipher_aes256_f8 ucs_aes; }; } __aligned(64); + +enum icp_qat_hw_compression_direction { + ICP_QAT_HW_COMPRESSION_DIR_COMPRESS = 0, + ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS = 1, + ICP_QAT_HW_COMPRESSION_DIR_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_delayed_match { + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED = 0, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_ENABLED = 1, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_algo { + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE = 0, + ICP_QAT_HW_COMPRESSION_ALGO_LZS = 1, + ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_depth { + ICP_QAT_HW_COMPRESSION_DEPTH_1 = 0, + ICP_QAT_HW_COMPRESSION_DEPTH_4 = 1, + ICP_QAT_HW_COMPRESSION_DEPTH_8 = 2, + ICP_QAT_HW_COMPRESSION_DEPTH_16 = 3, + ICP_QAT_HW_COMPRESSION_DEPTH_128 = 4, + ICP_QAT_HW_COMPRESSION_DEPTH_DELIMITER = 5 +}; + +enum icp_qat_hw_compression_file_type { + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0 = 0, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_1 = 1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_2 = 2, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_3 = 3, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_4 = 4, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_DELIMITER = 5 +}; + +struct icp_qat_hw_compression_config { + __u32 lower_val; + __u32 upper_val; +}; + +#define QAT_COMPRESSION_DIR_BITPOS 4 +#define QAT_COMPRESSION_DIR_MASK 0x7 +#define QAT_COMPRESSION_DELAYED_MATCH_BITPOS 16 +#define QAT_COMPRESSION_DELAYED_MATCH_MASK 0x1 +#define QAT_COMPRESSION_ALGO_BITPOS 31 +#define QAT_COMPRESSION_ALGO_MASK 0x1 +#define QAT_COMPRESSION_DEPTH_BITPOS 28 +#define QAT_COMPRESSION_DEPTH_MASK 0x7 +#define QAT_COMPRESSION_FILE_TYPE_BITPOS 24 +#define QAT_COMPRESSION_FILE_TYPE_MASK 0xF + +#define ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(dir, delayed, \ + algo, depth, filetype) \ + ((((dir) & QAT_COMPRESSION_DIR_MASK) << \ + QAT_COMPRESSION_DIR_BITPOS) | \ + (((delayed) & QAT_COMPRESSION_DELAYED_MATCH_MASK) << \ + QAT_COMPRESSION_DELAYED_MATCH_BITPOS) | \ + (((algo) & QAT_COMPRESSION_ALGO_MASK) << \ + QAT_COMPRESSION_ALGO_BITPOS) | \ + (((depth) & QAT_COMPRESSION_DEPTH_MASK) << \ + QAT_COMPRESSION_DEPTH_BITPOS) | \ + (((filetype) & QAT_COMPRESSION_FILE_TYPE_MASK) << \ + QAT_COMPRESSION_FILE_TYPE_BITPOS)) + #endif diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c new file mode 100644 index 000000000000..63fd4ac33dbf --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include +#include +#include +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "qat_bl.h" +#include "qat_comp_req.h" +#include "qat_compression.h" +#include "qat_algs_send.h" + +static DEFINE_MUTEX(algs_lock); +static unsigned int active_devs; + +enum direction { + DECOMPRESSION = 0, + COMPRESSION = 1, +}; + +struct qat_compression_ctx { + u8 comp_ctx[QAT_COMP_CTX_SIZE]; + struct qat_compression_instance *inst; +}; + +struct qat_compression_req { + u8 req[QAT_COMP_REQ_SIZE]; + struct qat_compression_ctx *qat_compression_ctx; + struct acomp_req *acompress_req; + struct qat_request_buffs buf; + enum direction dir; + int actual_dlen; + struct qat_alg_req alg_req; +}; + +static int qat_alg_send_dc_message(struct qat_compression_req *qat_req, + struct qat_compression_instance *inst, + struct crypto_async_request *base) +{ + struct qat_alg_req *alg_req = &qat_req->alg_req; + + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->dc_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; + + return qat_alg_send_message(alg_req); +} + +static void qat_comp_generic_callback(struct qat_compression_req *qat_req, + void *resp) +{ + struct acomp_req *areq = qat_req->acompress_req; + struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx; + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; + struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq); + struct qat_compression_instance *inst = ctx->inst; + int consumed, produced; + s8 cmp_err, xlt_err; + int res = -EBADMSG; + int status; + u8 cnv; + + status = qat_comp_get_cmp_status(resp); + status |= qat_comp_get_xlt_status(resp); + cmp_err = qat_comp_get_cmp_err(resp); + xlt_err = qat_comp_get_xlt_err(resp); + + consumed = qat_comp_get_consumed_ctr(resp); + produced = qat_comp_get_produced_ctr(resp); + + dev_dbg(&GET_DEV(accel_dev), + "[%s][%s][%s] slen = %8d dlen = %8d consumed = %8d produced = %8d cmp_err = %3d xlt_err = %3d", + crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)), + qat_req->dir == COMPRESSION ? "comp " : "decomp", + status ? "ERR" : "OK ", + areq->slen, areq->dlen, consumed, produced, cmp_err, xlt_err); + + areq->dlen = 0; + + if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) + goto end; + + if (qat_req->dir == COMPRESSION) { + cnv = qat_comp_get_cmp_cnv_flag(resp); + if (unlikely(!cnv)) { + dev_err(&GET_DEV(accel_dev), + "Verified compression not supported\n"); + goto end; + } + + if (unlikely(produced > qat_req->actual_dlen)) { + memset(inst->dc_data->ovf_buff, 0, + inst->dc_data->ovf_buff_sz); + dev_dbg(&GET_DEV(accel_dev), + "Actual buffer overflow: produced=%d, dlen=%d\n", + produced, qat_req->actual_dlen); + goto end; + } + } + + res = 0; + areq->dlen = produced; + +end: + qat_bl_free_bufl(accel_dev, &qat_req->buf); + areq->base.complete(&areq->base, res); +} + +void qat_comp_alg_callback(void *resp) +{ + struct qat_compression_req *qat_req = + (void *)(__force long)qat_comp_get_opaque(resp); + struct qat_instance_backlog *backlog = qat_req->alg_req.backlog; + + qat_comp_generic_callback(qat_req, resp); + + qat_alg_send_backlog(backlog); +} + +static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct qat_compression_instance *inst; + int node; + + if (tfm->node == NUMA_NO_NODE) + node = numa_node_id(); + else + node = tfm->node; + + memset(ctx, 0, sizeof(*ctx)); + inst = qat_compression_get_instance_node(node); + if (!inst) + return -EINVAL; + ctx->inst = inst; + + ctx->inst->build_deflate_ctx(ctx->comp_ctx); + + return 0; +} + +static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + + qat_compression_put_instance(ctx->inst); + memset(ctx, 0, sizeof(*ctx)); +} + +static int qat_comp_alg_compress_decompress(struct acomp_req *areq, + enum direction dir) +{ + struct qat_compression_req *qat_req = acomp_request_ctx(areq); + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(areq); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct qat_compression_instance *inst = ctx->inst; + struct qat_sgl_to_bufl_params *p_params = NULL; + gfp_t f = qat_algs_alloc_flags(&areq->base); + struct qat_sgl_to_bufl_params params; + unsigned int slen = areq->slen; + unsigned int dlen = areq->dlen; + dma_addr_t sfbuf, dfbuf; + u8 *req = qat_req->req; + size_t ovf_buff_sz; + int ret; + + if (!areq->src || !slen) + return -EINVAL; + + if (areq->dst && !dlen) + return -EINVAL; + + /* Handle acomp requests that require the allocation of a destination + * buffer. The size of the destination buffer is double the source + * buffer (rounded up to the size of a page) to fit the decompressed + * output or an expansion on the data for compression. + */ + if (!areq->dst) { + dlen = round_up(2 * slen, PAGE_SIZE); + areq->dst = sgl_alloc(dlen, f, NULL); + if (!areq->dst) + return -ENOMEM; + } + + if (dir == COMPRESSION) { + params.extra_dst_buff = inst->dc_data->ovf_buff_p; + ovf_buff_sz = inst->dc_data->ovf_buff_sz; + params.sz_extra_dst_buff = ovf_buff_sz; + p_params = ¶ms; + } + + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, p_params, f); + if (unlikely(ret)) + return ret; + + sfbuf = qat_req->buf.blp; + dfbuf = qat_req->buf.bloutp; + qat_req->qat_compression_ctx = ctx; + qat_req->acompress_req = areq; + qat_req->dir = dir; + + if (dir == COMPRESSION) { + qat_req->actual_dlen = dlen; + dlen += ovf_buff_sz; + qat_comp_create_compression_req(ctx->comp_ctx, req, + (u64)(__force long)sfbuf, slen, + (u64)(__force long)dfbuf, dlen, + (u64)(__force long)qat_req); + } else { + qat_comp_create_decompression_req(ctx->comp_ctx, req, + (u64)(__force long)sfbuf, slen, + (u64)(__force long)dfbuf, dlen, + (u64)(__force long)qat_req); + } + + ret = qat_alg_send_dc_message(qat_req, inst, &areq->base); + if (ret == -ENOSPC) + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); + + return ret; +} + +static int qat_comp_alg_compress(struct acomp_req *req) +{ + return qat_comp_alg_compress_decompress(req, COMPRESSION); +} + +static int qat_comp_alg_decompress(struct acomp_req *req) +{ + return qat_comp_alg_compress_decompress(req, DECOMPRESSION); +} + +static struct acomp_alg qat_acomp[] = { { + .base = { + .cra_name = "deflate", + .cra_driver_name = "qat_deflate", + .cra_priority = 4001, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct qat_compression_ctx), + .cra_module = THIS_MODULE, + }, + .init = qat_comp_alg_init_tfm, + .exit = qat_comp_alg_exit_tfm, + .compress = qat_comp_alg_compress, + .decompress = qat_comp_alg_decompress, + .dst_free = sgl_free, + .reqsize = sizeof(struct qat_compression_req), +} }; + +int qat_comp_algs_register(void) +{ + int ret = 0; + + mutex_lock(&algs_lock); + if (++active_devs == 1) + ret = crypto_register_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); + mutex_unlock(&algs_lock); + return ret; +} + +void qat_comp_algs_unregister(void) +{ + mutex_lock(&algs_lock); + if (--active_devs == 0) + crypto_unregister_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); + mutex_unlock(&algs_lock); +} diff --git a/drivers/crypto/qat/qat_common/qat_comp_req.h b/drivers/crypto/qat/qat_common/qat_comp_req.h new file mode 100644 index 000000000000..18a1f33a6db9 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_comp_req.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _QAT_COMP_REQ_H_ +#define _QAT_COMP_REQ_H_ + +#include "icp_qat_fw_comp.h" + +#define QAT_COMP_REQ_SIZE (sizeof(struct icp_qat_fw_comp_req)) +#define QAT_COMP_CTX_SIZE (QAT_COMP_REQ_SIZE * 2) + +static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen, + u64 dst, u32 dlen, u64 opaque) +{ + struct icp_qat_fw_comp_req *fw_tmpl = ctx; + struct icp_qat_fw_comp_req *fw_req = req; + struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars; + + memcpy(fw_req, fw_tmpl, sizeof(*fw_req)); + fw_req->comn_mid.src_data_addr = src; + fw_req->comn_mid.src_length = slen; + fw_req->comn_mid.dest_data_addr = dst; + fw_req->comn_mid.dst_length = dlen; + fw_req->comn_mid.opaque_data = opaque; + req_pars->comp_len = slen; + req_pars->out_buffer_sz = dlen; +} + +static inline void qat_comp_create_compression_req(void *ctx, void *req, + u64 src, u32 slen, + u64 dst, u32 dlen, + u64 opaque) +{ + qat_comp_create_req(ctx, req, src, slen, dst, dlen, opaque); +} + +static inline void qat_comp_create_decompression_req(void *ctx, void *req, + u64 src, u32 slen, + u64 dst, u32 dlen, + u64 opaque) +{ + struct icp_qat_fw_comp_req *fw_tmpl = ctx; + + fw_tmpl++; + qat_comp_create_req(fw_tmpl, req, src, slen, dst, dlen, opaque); +} + +static inline u32 qat_comp_get_consumed_ctr(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.input_byte_counter; +} + +static inline u32 qat_comp_get_produced_ctr(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.output_byte_counter; +} + +static inline u32 qat_comp_get_produced_adler32(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.crc.legacy.curr_adler_32; +} + +static inline u64 qat_comp_get_opaque(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->opaque_data; +} + +static inline s8 qat_comp_get_cmp_err(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comn_resp.comn_error.cmp_err_code; +} + +static inline s8 qat_comp_get_xlt_err(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comn_resp.comn_error.xlat_err_code; +} + +static inline s8 qat_comp_get_cmp_status(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 stat_filed = qat_resp->comn_resp.comn_status; + + return ICP_QAT_FW_COMN_RESP_CMP_STAT_GET(stat_filed); +} + +static inline s8 qat_comp_get_xlt_status(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 stat_filed = qat_resp->comn_resp.comn_status; + + return ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET(stat_filed); +} + +static inline u8 qat_comp_get_cmp_cnv_flag(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 flags = qat_resp->comn_resp.hdr_flags; + + return ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(flags); +} + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_compression.c b/drivers/crypto/qat/qat_common/qat_compression.c new file mode 100644 index 000000000000..9fd10f4242f8 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_compression.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_transport.h" +#include "adf_transport_access_macros.h" +#include "adf_cfg.h" +#include "adf_cfg_strings.h" +#include "qat_compression.h" +#include "icp_qat_fw.h" + +#define SEC ADF_KERNEL_SEC + +static struct service_hndl qat_compression; + +void qat_compression_put_instance(struct qat_compression_instance *inst) +{ + atomic_dec(&inst->refctr); + adf_dev_put(inst->accel_dev); +} + +static int qat_compression_free_instances(struct adf_accel_dev *accel_dev) +{ + struct qat_compression_instance *inst; + struct list_head *list_ptr, *tmp; + int i; + + list_for_each_safe(list_ptr, tmp, &accel_dev->compression_list) { + inst = list_entry(list_ptr, + struct qat_compression_instance, list); + + for (i = 0; i < atomic_read(&inst->refctr); i++) + qat_compression_put_instance(inst); + + if (inst->dc_tx) + adf_remove_ring(inst->dc_tx); + + if (inst->dc_rx) + adf_remove_ring(inst->dc_rx); + + list_del(list_ptr); + kfree(inst); + } + return 0; +} + +struct qat_compression_instance *qat_compression_get_instance_node(int node) +{ + struct qat_compression_instance *inst = NULL; + struct adf_accel_dev *accel_dev = NULL; + unsigned long best = ~0; + struct list_head *itr; + + list_for_each(itr, adf_devmgr_get_head()) { + struct adf_accel_dev *tmp_dev; + unsigned long ctr; + int tmp_dev_node; + + tmp_dev = list_entry(itr, struct adf_accel_dev, list); + tmp_dev_node = dev_to_node(&GET_DEV(tmp_dev)); + + if ((node == tmp_dev_node || tmp_dev_node < 0) && + adf_dev_started(tmp_dev) && !list_empty(&tmp_dev->compression_list)) { + ctr = atomic_read(&tmp_dev->ref_count); + if (best > ctr) { + accel_dev = tmp_dev; + best = ctr; + } + } + } + + if (!accel_dev) { + pr_info("QAT: Could not find a device on node %d\n", node); + /* Get any started device */ + list_for_each(itr, adf_devmgr_get_head()) { + struct adf_accel_dev *tmp_dev; + + tmp_dev = list_entry(itr, struct adf_accel_dev, list); + if (adf_dev_started(tmp_dev) && + !list_empty(&tmp_dev->compression_list)) { + accel_dev = tmp_dev; + break; + } + } + } + + if (!accel_dev) + return NULL; + + best = ~0; + list_for_each(itr, &accel_dev->compression_list) { + struct qat_compression_instance *tmp_inst; + unsigned long ctr; + + tmp_inst = list_entry(itr, struct qat_compression_instance, list); + ctr = atomic_read(&tmp_inst->refctr); + if (best > ctr) { + inst = tmp_inst; + best = ctr; + } + } + if (inst) { + if (adf_dev_get(accel_dev)) { + dev_err(&GET_DEV(accel_dev), "Could not increment dev refctr\n"); + return NULL; + } + atomic_inc(&inst->refctr); + } + return inst; +} + +static int qat_compression_create_instances(struct adf_accel_dev *accel_dev) +{ + struct qat_compression_instance *inst; + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; + unsigned long num_inst, num_msg_dc; + unsigned long bank; + int msg_size; + int ret; + int i; + + INIT_LIST_HEAD(&accel_dev->compression_list); + strscpy(key, ADF_NUM_DC, sizeof(key)); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &num_inst); + if (ret) + return ret; + + for (i = 0; i < num_inst; i++) { + inst = kzalloc_node(sizeof(*inst), GFP_KERNEL, + dev_to_node(&GET_DEV(accel_dev))); + if (!inst) { + ret = -ENOMEM; + goto err; + } + + list_add_tail(&inst->list, &accel_dev->compression_list); + inst->id = i; + atomic_set(&inst->refctr, 0); + inst->accel_dev = accel_dev; + inst->build_deflate_ctx = GET_DC_OPS(accel_dev)->build_deflate_ctx; + + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &bank); + if (ret) + return ret; + + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &num_msg_dc); + if (ret) + return ret; + + msg_size = ICP_QAT_FW_REQ_DEFAULT_SZ; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc, + msg_size, key, NULL, 0, &inst->dc_tx); + if (ret) + return ret; + + msg_size = ICP_QAT_FW_RESP_DEFAULT_SZ; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc, + msg_size, key, qat_comp_alg_callback, 0, + &inst->dc_rx); + if (ret) + return ret; + + inst->dc_data = accel_dev->dc_data; + INIT_LIST_HEAD(&inst->backlog.list); + spin_lock_init(&inst->backlog.lock); + } + return 0; +err: + qat_compression_free_instances(accel_dev); + return ret; +} + +static int qat_compression_alloc_dc_data(struct adf_accel_dev *accel_dev) +{ + struct device *dev = &GET_DEV(accel_dev); + dma_addr_t obuff_p = DMA_MAPPING_ERROR; + size_t ovf_buff_sz = QAT_COMP_MAX_SKID; + struct adf_dc_data *dc_data = NULL; + u8 *obuff = NULL; + + dc_data = devm_kzalloc(dev, sizeof(*dc_data), GFP_KERNEL); + if (!dc_data) + goto err; + + obuff = kzalloc_node(ovf_buff_sz, GFP_KERNEL, dev_to_node(dev)); + if (!obuff) + goto err; + + obuff_p = dma_map_single(dev, obuff, ovf_buff_sz, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, obuff_p))) + goto err; + + dc_data->ovf_buff = obuff; + dc_data->ovf_buff_p = obuff_p; + dc_data->ovf_buff_sz = ovf_buff_sz; + + accel_dev->dc_data = dc_data; + + return 0; + +err: + accel_dev->dc_data = NULL; + kfree(obuff); + devm_kfree(dev, dc_data); + return -ENOMEM; +} + +static void qat_free_dc_data(struct adf_accel_dev *accel_dev) +{ + struct adf_dc_data *dc_data = accel_dev->dc_data; + struct device *dev = &GET_DEV(accel_dev); + + if (!dc_data) + return; + + dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz, + DMA_FROM_DEVICE); + memset(dc_data->ovf_buff, 0, dc_data->ovf_buff_sz); + kfree(dc_data->ovf_buff); + devm_kfree(dev, dc_data); + accel_dev->dc_data = NULL; +} + +static int qat_compression_init(struct adf_accel_dev *accel_dev) +{ + int ret; + + ret = qat_compression_alloc_dc_data(accel_dev); + if (ret) + return ret; + + ret = qat_compression_create_instances(accel_dev); + if (ret) + qat_free_dc_data(accel_dev); + + return ret; +} + +static int qat_compression_shutdown(struct adf_accel_dev *accel_dev) +{ + qat_free_dc_data(accel_dev); + return qat_compression_free_instances(accel_dev); +} + +static int qat_compression_event_handler(struct adf_accel_dev *accel_dev, + enum adf_event event) +{ + int ret; + + switch (event) { + case ADF_EVENT_INIT: + ret = qat_compression_init(accel_dev); + break; + case ADF_EVENT_SHUTDOWN: + ret = qat_compression_shutdown(accel_dev); + break; + case ADF_EVENT_RESTARTING: + case ADF_EVENT_RESTARTED: + case ADF_EVENT_START: + case ADF_EVENT_STOP: + default: + ret = 0; + } + return ret; +} + +int qat_compression_register(void) +{ + memset(&qat_compression, 0, sizeof(qat_compression)); + qat_compression.event_hld = qat_compression_event_handler; + qat_compression.name = "qat_compression"; + return adf_service_register(&qat_compression); +} + +int qat_compression_unregister(void) +{ + return adf_service_unregister(&qat_compression); +} diff --git a/drivers/crypto/qat/qat_common/qat_compression.h b/drivers/crypto/qat/qat_common/qat_compression.h new file mode 100644 index 000000000000..aebac2302dcf --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_compression.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _QAT_COMPRESSION_H_ +#define _QAT_COMPRESSION_H_ + +#include +#include +#include "adf_accel_devices.h" +#include "qat_algs_send.h" + +#define QAT_COMP_MAX_SKID 4096 + +struct qat_compression_instance { + struct adf_etr_ring_data *dc_tx; + struct adf_etr_ring_data *dc_rx; + struct adf_accel_dev *accel_dev; + struct list_head list; + unsigned long state; + int id; + atomic_t refctr; + struct qat_instance_backlog backlog; + struct adf_dc_data *dc_data; + void (*build_deflate_ctx)(void *ctx); +}; + +static inline bool adf_hw_dev_has_compression(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + u32 mask = ~hw_device->accel_capabilities_mask; + + if (mask & ADF_ACCEL_CAPABILITIES_COMPRESSION) + return false; + + return true; +} + +#endif diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index baacf817abf6..bc80bb475118 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "adf_dh895xcc_hw_data.h" @@ -242,6 +243,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->pfvf_ops.disable_all_vf2pf_interrupts = disable_all_vf2pf_interrupts; hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index b933a00fb91b..70e56cc16ece 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,6 +92,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) From 5b14b2b307e4045b38a4961718cbe9c17cef2bf4 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:21 +0000 Subject: [PATCH 3564/4122] crypto: qat - enable deflate for QAT GEN4 Enable deflate for QAT GEN4 devices. This adds (1) logic to create configuration entries at probe time for the compression instances for QAT GEN4 devices; (2) the implementation of QAT GEN4 specific compression operations, required since the creation of the compression request template is different between GEN2 and GEN4; and (3) updates to the firmware API related to compression for GEN4. The implementation configures the device to produce data compressed dynamically, optimized for throughput over compression ratio. Signed-off-by: Giovanni Cabiddu Reviewed-by: Wojciech Ziemba Reviewed-by: Adam Guerin Signed-off-by: Herbert Xu --- .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 4 +- .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.h | 2 +- drivers/crypto/qat/qat_4xxx/adf_drv.c | 139 +++++++- drivers/crypto/qat/qat_common/Makefile | 1 + drivers/crypto/qat/qat_common/adf_gen4_dc.c | 83 +++++ drivers/crypto/qat/qat_common/adf_gen4_dc.h | 10 + .../qat/qat_common/icp_qat_hw_20_comp.h | 164 ++++++++++ .../qat/qat_common/icp_qat_hw_20_comp_defs.h | 300 ++++++++++++++++++ 8 files changed, 689 insertions(+), 14 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_dc.c create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_dc.h create mode 100644 drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h create mode 100644 drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index fda5f699ff57..834a705180c0 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -357,10 +358,11 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; - hw_data->dev_config = adf_crypto_dev_config; + hw_data->dev_config = adf_gen4_dev_config; adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops); + adf_gen4_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h index 9d49248931f6..e98428ba78e2 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -70,6 +70,6 @@ enum icp_qat_4xxx_slice_mask { void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data); -int adf_crypto_dev_config(struct adf_accel_dev *accel_dev); +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev); #endif diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index 8496c451b48e..b3a4c7b23864 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -9,6 +9,7 @@ #include #include "adf_4xxx_hw_data.h" +#include "qat_compression.h" #include "qat_crypto.h" #include "adf_transport_access_macros.h" @@ -19,6 +20,16 @@ static const struct pci_device_id adf_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, adf_pci_tbl); +enum configs { + DEV_CFG_CY = 0, + DEV_CFG_DC, +}; + +static const char * const services_operations[] = { + ADF_CFG_CY, + ADF_CFG_DC, +}; + static void adf_cleanup_accel(struct adf_accel_dev *accel_dev) { if (accel_dev->hw_device) { @@ -53,7 +64,7 @@ static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev) return 0; } -int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) +static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) { char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; int banks = GET_MAX_BANKS(accel_dev); @@ -68,14 +79,6 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) else instances = 0; - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - for (i = 0; i < instances; i++) { val = i; bank = i * 2; @@ -161,10 +164,122 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); return 0; err: - dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); + return ret; +} + +static int adf_comp_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_compression(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); + return ret; +} + +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev) +{ + char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; + int ret; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, + ADF_SERVICES_ENABLED, services); + if (ret) + goto err; + + ret = sysfs_match_string(services_operations, services); + if (ret < 0) + goto err; + + switch (ret) { + case DEV_CFG_CY: + ret = adf_crypto_dev_config(accel_dev); + break; + case DEV_CFG_DC: + ret = adf_comp_dev_config(accel_dev); + break; + } + + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); return ret; } @@ -300,7 +415,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_disable_aer; - ret = adf_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index e3db4786738f..1fb8d50f509f 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ + adf_gen4_dc.o \ qat_crypto.o \ qat_compression.o \ qat_comp_algs.o \ diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.c b/drivers/crypto/qat/qat_common/adf_gen4_dc.c new file mode 100644 index 000000000000..5859238e37de --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "icp_qat_fw_comp.h" +#include "icp_qat_hw_20_comp.h" +#include "adf_gen4_dc.h" + +static void qat_comp_build_deflate(void *ctx) +{ + struct icp_qat_fw_comp_req *req_tmpl = + (struct icp_qat_fw_comp_req *)ctx; + struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars; + struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars; + struct icp_qat_hw_comp_20_config_csr_upper hw_comp_upper_csr = {0}; + struct icp_qat_hw_comp_20_config_csr_lower hw_comp_lower_csr = {0}; + struct icp_qat_hw_decomp_20_config_csr_lower hw_decomp_lower_csr = {0}; + u32 upper_val; + u32 lower_val; + + memset(req_tmpl, 0, sizeof(*req_tmpl)); + header->hdr_flags = + ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); + header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC; + header->comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA, + QAT_COMN_PTR_TYPE_SGL); + header->serv_specif_flags = + ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION, + ICP_QAT_FW_COMP_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF); + hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL; + hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77; + hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED; + hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1; + hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW; + hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED; + hw_comp_upper_csr.nice = ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL; + hw_comp_upper_csr.lazy = ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL; + + upper_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(hw_comp_upper_csr); + lower_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(hw_comp_lower_csr); + + cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val; + cd_pars->u.sl.comp_slice_cfg_word[1] = upper_val; + + req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER; + req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC; + req_pars->req_par_flags = + ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP, + ICP_QAT_FW_COMP_EOP, + ICP_QAT_FW_COMP_BFINAL, + ICP_QAT_FW_COMP_CNV, + ICP_QAT_FW_COMP_CNV_RECOVERY, + ICP_QAT_FW_COMP_NO_CNV_DFX, + ICP_QAT_FW_COMP_CRC_MODE_LEGACY, + ICP_QAT_FW_COMP_NO_XXHASH_ACC, + ICP_QAT_FW_COMP_CNV_ERROR_NONE, + ICP_QAT_FW_COMP_NO_APPEND_CRC, + ICP_QAT_FW_COMP_NO_DROP_DATA); + + /* Fill second half of the template for decompression */ + memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl)); + req_tmpl++; + header = &req_tmpl->comn_hdr; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; + cd_pars = &req_tmpl->cd_pars; + + hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE; + lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr); + + cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val; + cd_pars->u.sl.comp_slice_cfg_word[1] = 0; +} + +void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops) +{ + dc_ops->build_deflate_ctx = qat_comp_build_deflate; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops); diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.h b/drivers/crypto/qat/qat_common/adf_gen4_dc.h new file mode 100644 index 000000000000..0b1a6774412e --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN4_DC_H +#define ADF_GEN4_DC_H + +#include "adf_accel_devices.h" + +void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops); + +#endif /* ADF_GEN4_DC_H */ diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h new file mode 100644 index 000000000000..7ea8962272f2 --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_HW_20_COMP_H_ +#define _ICP_QAT_HW_20_COMP_H_ + +#include "icp_qat_hw_20_comp_defs.h" +#include "icp_qat_fw.h" + +struct icp_qat_hw_comp_20_config_csr_lower { + enum icp_qat_hw_comp_20_extended_delay_match_mode edmm; + enum icp_qat_hw_comp_20_hw_comp_format algo; + enum icp_qat_hw_comp_20_search_depth sd; + enum icp_qat_hw_comp_20_hbs_control hbs; + enum icp_qat_hw_comp_20_abd abd; + enum icp_qat_hw_comp_20_lllbd_ctrl lllbd; + enum icp_qat_hw_comp_20_min_match_control mmctrl; + enum icp_qat_hw_comp_20_skip_hash_collision hash_col; + enum icp_qat_hw_comp_20_skip_hash_update hash_update; + enum icp_qat_hw_comp_20_byte_skip skip_ctrl; +}; + +static inline __u32 +ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_comp_20_config_csr_lower csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.algo, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK); + QAT_FIELD_SET(val32, csr.sd, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK); + QAT_FIELD_SET(val32, csr.edmm, + ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK); + QAT_FIELD_SET(val32, csr.hbs, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lllbd, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK); + QAT_FIELD_SET(val32, csr.mmctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.hash_col, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK); + QAT_FIELD_SET(val32, csr.hash_update, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK); + QAT_FIELD_SET(val32, csr.skip_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK); + QAT_FIELD_SET(val32, csr.abd, ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_comp_20_config_csr_upper { + enum icp_qat_hw_comp_20_scb_control scb_ctrl; + enum icp_qat_hw_comp_20_rmb_control rmb_ctrl; + enum icp_qat_hw_comp_20_som_control som_ctrl; + enum icp_qat_hw_comp_20_skip_hash_rd_control skip_hash_ctrl; + enum icp_qat_hw_comp_20_scb_unload_control scb_unload_ctrl; + enum icp_qat_hw_comp_20_disable_token_fusion_control disable_token_fusion_ctrl; + enum icp_qat_hw_comp_20_lbms lbms; + enum icp_qat_hw_comp_20_scb_mode_reset_mask scb_mode_reset; + __u16 lazy; + __u16 nice; +}; + +static inline __u32 +ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_comp_20_config_csr_upper csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.scb_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.rmb_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.som_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.skip_hash_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.scb_unload_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.disable_token_fusion_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbms, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK); + QAT_FIELD_SET(val32, csr.scb_mode_reset, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK); + QAT_FIELD_SET(val32, csr.lazy, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK); + QAT_FIELD_SET(val32, csr.nice, + ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_decomp_20_config_csr_lower { + enum icp_qat_hw_decomp_20_hbs_control hbs; + enum icp_qat_hw_decomp_20_lbms lbms; + enum icp_qat_hw_decomp_20_hw_comp_format algo; + enum icp_qat_hw_decomp_20_min_match_control mmctrl; + enum icp_qat_hw_decomp_20_lz4_block_checksum_present lbc; +}; + +static inline __u32 +ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_decomp_20_config_csr_lower csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.hbs, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbms, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK); + QAT_FIELD_SET(val32, csr.algo, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK); + QAT_FIELD_SET(val32, csr.mmctrl, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_decomp_20_config_csr_upper { + enum icp_qat_hw_decomp_20_speculative_decoder_control sdc; + enum icp_qat_hw_decomp_20_mini_cam_control mcc; +}; + +static inline __u32 +ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_decomp_20_config_csr_upper csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.sdc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.mcc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK); + + return __builtin_bswap32(val32); +} + +#endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h new file mode 100644 index 000000000000..208d4554283b --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_HW_20_COMP_DEFS_H +#define _ICP_QAT_HW_20_COMP_DEFS_H + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS 31 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_control { + ICP_QAT_HW_COMP_20_SCB_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS 30 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_rmb_control { + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL = 0x0, + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_FC_ONLY = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS 28 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK 0x3 + +enum icp_qat_hw_comp_20_som_control { + ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE = 0x0, + ICP_QAT_HW_COMP_20_SOM_CONTROL_REPLAY_MODE = 0x1, + ICP_QAT_HW_COMP_20_SOM_CONTROL_INPUT_CRC = 0x2, + ICP_QAT_HW_COMP_20_SOM_CONTROL_RESERVED_MODE = 0x3, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS 27 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_rd_control { + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_SKIP_HASH_READS = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS 26 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_unload_control { + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD = 0x0, + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_NO_UNLOAD = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS 21 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_disable_token_fusion_control { + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS 19 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK 0x3 + +enum icp_qat_hw_comp_20_lbms { + ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB = 0x0, + ICP_QAT_HW_COMP_20_LBMS_LBMS_256KB = 0x1, + ICP_QAT_HW_COMP_20_LBMS_LBMS_1MB = 0x2, + ICP_QAT_HW_COMP_20_LBMS_LBMS_4MB = 0x3, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS 18 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_mode_reset_mask { + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS = 0x0, + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS_AND_HISTORY = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS 9 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK 0x1ff +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL 258 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS 0 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK 0x1ff +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL 259 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7 + +enum icp_qat_hw_comp_20_hbs_control { + ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0, + ICP_QAT_HW_COMP_23_HBS_CONTROL_HBS_IS_64KB = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS 13 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK 0x1 + +enum icp_qat_hw_comp_20_abd { + ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED = 0x0, + ICP_QAT_HW_COMP_20_ABD_ABD_DISABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS 12 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK 0x1 + +enum icp_qat_hw_comp_20_lllbd_ctrl { + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED = 0x0, + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_DISABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS 8 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK 0xf + +enum icp_qat_hw_comp_20_search_depth { + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1 = 0x1, + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_6 = 0x3, + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_9 = 0x4, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS 5 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK 0x7 + +enum icp_qat_hw_comp_20_hw_comp_format { + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77 = 0x0, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE = 0x1, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4 = 0x2, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4S = 0x3, + ICP_QAT_HW_COMP_23_HW_COMP_FORMAT_ZSTD = 0x4, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_min_match_control { + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0, + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS 3 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_collision { + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_DONT_ALLOW = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS 2 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_update { + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS 1 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK 0x1 + +enum icp_qat_hw_comp_20_byte_skip { + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN = 0x0, + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS 0 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK 0x1 + +enum icp_qat_hw_comp_20_extended_delay_match_mode { + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED = 0x0, + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS 31 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_speculative_decoder_control { + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS 30 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_mini_cam_control { + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7 + +enum icp_qat_hw_decomp_20_hbs_control { + ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS 8 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK 0x3 + +enum icp_qat_hw_decomp_20_lbms { + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB = 0x0, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_256KB = 0x1, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_1MB = 0x2, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_4MB = 0x3, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS 5 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK 0x7 + +enum icp_qat_hw_decomp_20_hw_comp_format { + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE = 0x1, + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4 = 0x2, + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4S = 0x3, + ICP_QAT_HW_DECOMP_23_HW_DECOMP_FORMAT_ZSTD = 0x4, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_min_match_control { + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0, + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS 3 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK 0x1 + +enum icp_qat_hw_decomp_20_lz4_block_checksum_present { + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT = 0x0, + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_PRESENT = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT + +#endif From 5fc8041e56782e4d44682f8c2e4d822817a4dae6 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:22 +0000 Subject: [PATCH 3565/4122] crypto: acomp - define max size for destination The acomp API allows to send requests with a NULL destination buffer. In this case, the algorithm implementation needs to allocate the destination scatter list, perform the operation and return the buffer to the user. For decompression, data is likely to expand and be bigger than the allocated buffer. Define the maximum size (128KB) that acomp implementations will allocate for decompression operations as destination buffer when they receive a request with a NULL destination buffer. Suggested-by: Herbert Xu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index cb3d6b1c655d..e4bc96528902 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -11,6 +11,7 @@ #include #define CRYPTO_ACOMP_ALLOC_OUTPUT 0x00000001 +#define CRYPTO_ACOMP_DST_MAX 131072 /** * struct acomp_req - asynchronous (de)compression request From 3112d0f1b0b32daac97d170dbc9d3cce69f7ff49 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 28 Nov 2022 12:21:23 +0000 Subject: [PATCH 3566/4122] crypto: qat - add resubmit logic for decompression The acomp API allows to send requests with a NULL destination buffer. In this case, the algorithm implementation needs to allocate the destination scatter list, perform the operation and return the buffer to the user. For decompression, data is likely to expand and be bigger than the allocated buffer. This implements a re-submission mechanism for decompression requests that is triggered if the destination buffer, allocated by the driver, is not sufficiently big to store the output from decompression. If an overflow is detected when processing the callback for a decompression request with a NULL destination buffer, a workqueue is scheduled. This allocates a new scatter list of size CRYPTO_ACOMP_DST_MAX, now 128KB, creates a new firmware scatter list and resubmits the job to the hardware accelerator. Suggested-by: Herbert Xu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_bl.c | 159 ++++++++++++++++++ drivers/crypto/qat/qat_common/qat_bl.h | 6 + drivers/crypto/qat/qat_common/qat_comp_algs.c | 70 ++++++++ drivers/crypto/qat/qat_common/qat_comp_req.h | 10 ++ 4 files changed, 245 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index 221a4eb610a3..2e89ff08041b 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -222,3 +222,162 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, extra_dst_buff, sz_extra_dst_buff, flags); } + +static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev, + struct qat_alg_buf_list *bl) +{ + struct device *dev = &GET_DEV(accel_dev); + int n = bl->num_bufs; + int i; + + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bl->bufers[i].addr)) + dma_unmap_single(dev, bl->bufers[i].addr, + bl->bufers[i].len, DMA_FROM_DEVICE); +} + +static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct qat_alg_buf_list **bl) +{ + struct device *dev = &GET_DEV(accel_dev); + struct qat_alg_buf_list *bufl; + int node = dev_to_node(dev); + struct scatterlist *sg; + int n, i, sg_nctr; + size_t sz; + + n = sg_nents(sgl); + sz = struct_size(bufl, bufers, n); + bufl = kzalloc_node(sz, GFP_KERNEL, node); + if (unlikely(!bufl)) + return -ENOMEM; + + for (i = 0; i < n; i++) + bufl->bufers[i].addr = DMA_MAPPING_ERROR; + + sg_nctr = 0; + for_each_sg(sgl, sg, n, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + DMA_FROM_DEVICE); + bufl->bufers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + goto err_map; + sg_nctr++; + } + bufl->num_bufs = sg_nctr; + bufl->num_mapped_bufs = sg_nctr; + + *bl = bufl; + + return 0; + +err_map: + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bufl->bufers[i].addr)) + dma_unmap_single(dev, bufl->bufers[i].addr, + bufl->bufers[i].len, + DMA_FROM_DEVICE); + kfree(bufl); + *bl = NULL; + + return -ENOMEM; +} + +static void qat_bl_sgl_free_unmap(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct qat_alg_buf_list *bl, + bool free_bl) +{ + if (bl) { + qat_bl_sgl_unmap(accel_dev, bl); + + if (free_bl) + kfree(bl); + } + if (sgl) + sgl_free(sgl); +} + +static int qat_bl_sgl_alloc_map(struct adf_accel_dev *accel_dev, + struct scatterlist **sgl, + struct qat_alg_buf_list **bl, + unsigned int dlen, + gfp_t gfp) +{ + struct scatterlist *dst; + int ret; + + dst = sgl_alloc(dlen, gfp, NULL); + if (!dst) { + dev_err(&GET_DEV(accel_dev), "sg_alloc failed\n"); + return -ENOMEM; + } + + ret = qat_bl_sgl_map(accel_dev, dst, bl); + if (ret) + goto err; + + *sgl = dst; + + return 0; + +err: + sgl_free(dst); + *sgl = NULL; + return ret; +} + +int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev, + struct scatterlist **sg, + unsigned int dlen, + struct qat_request_buffs *qat_bufs, + gfp_t gfp) +{ + struct device *dev = &GET_DEV(accel_dev); + dma_addr_t new_blp = DMA_MAPPING_ERROR; + struct qat_alg_buf_list *new_bl; + struct scatterlist *new_sg; + size_t new_bl_size; + int ret; + + ret = qat_bl_sgl_alloc_map(accel_dev, &new_sg, &new_bl, dlen, gfp); + if (ret) + return ret; + + new_bl_size = struct_size(new_bl, bufers, new_bl->num_bufs); + + /* Map new firmware SGL descriptor */ + new_blp = dma_map_single(dev, new_bl, new_bl_size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, new_blp))) + goto err; + + /* Unmap old firmware SGL descriptor */ + dma_unmap_single(dev, qat_bufs->bloutp, qat_bufs->sz_out, DMA_TO_DEVICE); + + /* Free and unmap old scatterlist */ + qat_bl_sgl_free_unmap(accel_dev, *sg, qat_bufs->blout, + !qat_bufs->sgl_dst_valid); + + qat_bufs->sgl_dst_valid = false; + qat_bufs->blout = new_bl; + qat_bufs->bloutp = new_blp; + qat_bufs->sz_out = new_bl_size; + + *sg = new_sg; + + return 0; +err: + qat_bl_sgl_free_unmap(accel_dev, new_sg, new_bl, true); + + if (!dma_mapping_error(dev, new_blp)) + dma_unmap_single(dev, new_blp, new_bl_size, DMA_TO_DEVICE); + + return -ENOMEM; +} diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 5f2ea8f352f7..8ca5e52ee9e2 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -58,4 +58,10 @@ static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; } +int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev, + struct scatterlist **newd, + unsigned int dlen, + struct qat_request_buffs *qat_bufs, + gfp_t gfp); + #endif diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c index 63fd4ac33dbf..1480d36a8d2b 100644 --- a/drivers/crypto/qat/qat_common/qat_comp_algs.c +++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "qat_bl.h" @@ -25,6 +26,11 @@ struct qat_compression_ctx { struct qat_compression_instance *inst; }; +struct qat_dst { + bool is_null; + int resubmitted; +}; + struct qat_compression_req { u8 req[QAT_COMP_REQ_SIZE]; struct qat_compression_ctx *qat_compression_ctx; @@ -33,6 +39,8 @@ struct qat_compression_req { enum direction dir; int actual_dlen; struct qat_alg_req alg_req; + struct work_struct resubmit; + struct qat_dst dst; }; static int qat_alg_send_dc_message(struct qat_compression_req *qat_req, @@ -49,6 +57,46 @@ static int qat_alg_send_dc_message(struct qat_compression_req *qat_req, return qat_alg_send_message(alg_req); } +static void qat_comp_resubmit(struct work_struct *work) +{ + struct qat_compression_req *qat_req = + container_of(work, struct qat_compression_req, resubmit); + struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx; + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; + struct qat_request_buffs *qat_bufs = &qat_req->buf; + struct qat_compression_instance *inst = ctx->inst; + struct acomp_req *areq = qat_req->acompress_req; + struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq); + unsigned int dlen = CRYPTO_ACOMP_DST_MAX; + u8 *req = qat_req->req; + dma_addr_t dfbuf; + int ret; + + areq->dlen = dlen; + + dev_dbg(&GET_DEV(accel_dev), "[%s][%s] retry NULL dst request - dlen = %d\n", + crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)), + qat_req->dir == COMPRESSION ? "comp" : "decomp", dlen); + + ret = qat_bl_realloc_map_new_dst(accel_dev, &areq->dst, dlen, qat_bufs, + qat_algs_alloc_flags(&areq->base)); + if (ret) + goto err; + + qat_req->dst.resubmitted = true; + + dfbuf = qat_req->buf.bloutp; + qat_comp_override_dst(req, dfbuf, dlen); + + ret = qat_alg_send_dc_message(qat_req, inst, &areq->base); + if (ret != -ENOSPC) + return; + +err: + qat_bl_free_bufl(accel_dev, qat_bufs); + areq->base.complete(&areq->base, ret); +} + static void qat_comp_generic_callback(struct qat_compression_req *qat_req, void *resp) { @@ -80,6 +128,21 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req, areq->dlen = 0; + if (qat_req->dir == DECOMPRESSION && qat_req->dst.is_null) { + if (cmp_err == ERR_CODE_OVERFLOW_ERROR) { + if (qat_req->dst.resubmitted) { + dev_dbg(&GET_DEV(accel_dev), + "Output does not fit destination buffer\n"); + res = -EOVERFLOW; + goto end; + } + + INIT_WORK(&qat_req->resubmit, qat_comp_resubmit); + adf_misc_wq_queue_work(&qat_req->resubmit); + return; + } + } + if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) goto end; @@ -176,16 +239,23 @@ static int qat_comp_alg_compress_decompress(struct acomp_req *areq, if (areq->dst && !dlen) return -EINVAL; + qat_req->dst.is_null = false; + /* Handle acomp requests that require the allocation of a destination * buffer. The size of the destination buffer is double the source * buffer (rounded up to the size of a page) to fit the decompressed * output or an expansion on the data for compression. */ if (!areq->dst) { + qat_req->dst.is_null = true; + dlen = round_up(2 * slen, PAGE_SIZE); areq->dst = sgl_alloc(dlen, f, NULL); if (!areq->dst) return -ENOMEM; + + areq->dlen = dlen; + qat_req->dst.resubmitted = false; } if (dir == COMPRESSION) { diff --git a/drivers/crypto/qat/qat_common/qat_comp_req.h b/drivers/crypto/qat/qat_common/qat_comp_req.h index 18a1f33a6db9..404e32c5e778 100644 --- a/drivers/crypto/qat/qat_common/qat_comp_req.h +++ b/drivers/crypto/qat/qat_common/qat_comp_req.h @@ -25,6 +25,16 @@ static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen, req_pars->out_buffer_sz = dlen; } +static inline void qat_comp_override_dst(void *req, u64 dst, u32 dlen) +{ + struct icp_qat_fw_comp_req *fw_req = req; + struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars; + + fw_req->comn_mid.dest_data_addr = dst; + fw_req->comn_mid.dst_length = dlen; + req_pars->out_buffer_sz = dlen; +} + static inline void qat_comp_create_compression_req(void *ctx, void *req, u64 src, u32 slen, u64 dst, u32 dlen, From 3564f5a2144355cadb4f0c5c14d2bc7fcd2418b9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 29 Nov 2022 17:52:35 +0800 Subject: [PATCH 3567/4122] crypto: chelsio - Fix flexible struct array warning This patch fixes the sparse warning about arrays of flexible structures by removing an unnecessary use of them in struct __crypto_ctx. Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 6 +++--- drivers/crypto/chelsio/chcr_crypto.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 6933546f87b1..9fac1e758406 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -98,17 +98,17 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, static inline struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->aeadctx; + return &ctx->crypto_ctx->aeadctx; } static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->ablkctx; + return &ctx->crypto_ctx->ablkctx; } static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->hmacctx; + return &ctx->crypto_ctx->hmacctx; } static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx) diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index c7816c83e324..7f88ddb08631 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -248,9 +248,9 @@ struct hmac_ctx { struct __crypto_ctx { union { - DECLARE_FLEX_ARRAY(struct hmac_ctx, hmacctx); - DECLARE_FLEX_ARRAY(struct ablk_ctx, ablkctx); - DECLARE_FLEX_ARRAY(struct chcr_aead_ctx, aeadctx); + struct hmac_ctx hmacctx; + struct ablk_ctx ablkctx; + struct chcr_aead_ctx aeadctx; }; }; From 67ab02dce3adad3ea399e824b37f8e1c2453449f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Nov 2022 17:48:49 +0100 Subject: [PATCH 3568/4122] crypto: arm64/aes-neonbs - use frame_push/pop consistently Use the frame_push and frame_pop macros consistently to create the stack frame, so that we will get PAC and/or shadow call stack handling as well when enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-core.S | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index d427f4556b6e..7278a37c2d5c 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -760,7 +760,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) eor v6.16b, v6.16b, v31.16b eor v7.16b, v7.16b, v16.16b - stp q16, q17, [sp, #16] + stp q16, q17, [x6] mov bskey, x2 mov rounds, x3 @@ -768,8 +768,8 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-48]! - mov x29, sp + frame_push 0, 32 + add x6, sp, #.Lframe_local_offset ld1 {v25.16b}, [x5] @@ -781,7 +781,7 @@ SYM_FUNC_END(__xts_crypt8) eor v18.16b, \o2\().16b, v27.16b eor v19.16b, \o3\().16b, v28.16b - ldp q24, q25, [sp, #16] + ldp q24, q25, [x6] eor v20.16b, \o4\().16b, v29.16b eor v21.16b, \o5\().16b, v30.16b @@ -795,7 +795,7 @@ SYM_FUNC_END(__xts_crypt8) b.gt 0b st1 {v25.16b}, [x5] - ldp x29, x30, [sp], #48 + frame_pop ret .endm @@ -820,9 +820,7 @@ SYM_FUNC_END(aesbs_xts_decrypt) * int rounds, int blocks, u8 iv[]) */ SYM_FUNC_START(aesbs_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - + frame_push 0 ldp x7, x8, [x5] ld1 {v0.16b}, [x5] CPU_LE( rev x7, x7 ) @@ -862,6 +860,6 @@ CPU_LE( rev x8, x8 ) b.gt 0b st1 {v0.16b}, [x5] - ldp x29, x30, [sp], #16 + frame_pop ret SYM_FUNC_END(aesbs_ctr_encrypt) From 7d709af18054bc9e2043499bb35eb1809c2a316f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Nov 2022 17:48:50 +0100 Subject: [PATCH 3569/4122] crypto: arm64/aes-modes - use frame_push/pop macros consistently Use the frame_push and frame_pop macros to create the stack frames in the AES chaining mode wrappers so that they will get PAC and/or shadow call stack protection when configured. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 5abc834271f4..0e834a2c062c 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -52,8 +52,7 @@ SYM_FUNC_END(aes_decrypt_block5x) */ AES_FUNC_START(aes_ecb_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 enc_prepare w3, x2, x5 @@ -77,14 +76,13 @@ ST5( st1 {v4.16b}, [x0], #16 ) subs w4, w4, #1 bne .Lecbencloop .Lecbencout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_ecb_encrypt) AES_FUNC_START(aes_ecb_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 dec_prepare w3, x2, x5 @@ -108,7 +106,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) subs w4, w4, #1 bne .Lecbdecloop .Lecbdecout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_ecb_decrypt) @@ -171,9 +169,6 @@ AES_FUNC_END(aes_cbc_encrypt) AES_FUNC_END(aes_essiv_cbc_encrypt) AES_FUNC_START(aes_essiv_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - ld1 {cbciv.16b}, [x5] /* get iv */ mov w8, #14 /* AES-256: 14 rounds */ @@ -182,11 +177,9 @@ AES_FUNC_START(aes_essiv_cbc_decrypt) b .Lessivcbcdecstart AES_FUNC_START(aes_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - ld1 {cbciv.16b}, [x5] /* get iv */ .Lessivcbcdecstart: + frame_push 0 dec_prepare w3, x2, x6 .LcbcdecloopNx: @@ -236,7 +229,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) bne .Lcbcdecloop .Lcbcdecout: st1 {cbciv.16b}, [x5] /* return iv */ - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_cbc_decrypt) AES_FUNC_END(aes_essiv_cbc_decrypt) @@ -337,8 +330,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt) BLOCKS .req x13 BLOCKS_W .req w13 - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 enc_prepare ROUNDS_W, KEY, IV_PART ld1 {vctr.16b}, [IV] @@ -481,7 +473,7 @@ ST5( st1 {v4.16b}, [OUT], #16 ) .if !\xctr st1 {vctr.16b}, [IV] /* return next CTR value */ .endif - ldp x29, x30, [sp], #16 + frame_pop ret .Lctrtail\xctr: @@ -645,8 +637,7 @@ AES_FUNC_END(aes_xctr_encrypt) .endm AES_FUNC_START(aes_xts_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 ld1 {v4.16b}, [x6] xts_load_mask v8 @@ -704,7 +695,7 @@ AES_FUNC_START(aes_xts_encrypt) st1 {v0.16b}, [x0] .Lxtsencret: st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + frame_pop ret .LxtsencctsNx: @@ -732,8 +723,7 @@ AES_FUNC_START(aes_xts_encrypt) AES_FUNC_END(aes_xts_encrypt) AES_FUNC_START(aes_xts_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 /* subtract 16 bytes if we are doing CTS */ sub w8, w4, #0x10 @@ -794,7 +784,7 @@ AES_FUNC_START(aes_xts_decrypt) b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + frame_pop ret .Lxtsdeccts: From 489a4a05fe6d544f1f1052d2c6cd5bffbd89ddb6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Nov 2022 17:48:51 +0100 Subject: [PATCH 3570/4122] crypto: arm64/crct10dif - use frame_push/pop macros consistently Use the frame_push and frame_pop macros to set up the stack frame so that return address protections will be enabled automically when configured. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/crct10dif-ce-core.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index dce6dcebfca1..5604de61d06d 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -429,7 +429,7 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) umov w0, v0.h[0] .ifc \p, p8 - ldp x29, x30, [sp], #16 + frame_pop .endif ret @@ -466,8 +466,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) // Assumes len >= 16. // SYM_FUNC_START(crc_t10dif_pmull_p8) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 1 crc_t10dif_pmull p8 SYM_FUNC_END(crc_t10dif_pmull_p8) From a428636d4c827ebe967aa31a83684b4c8e742ed1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Nov 2022 17:48:52 +0100 Subject: [PATCH 3571/4122] crypto: arm64/ghash-ce - use frame_push/pop macros consistently Use the frame_push and frame_pop macros to set up the stack frame so that return address protections will be enabled automically when configured. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/ghash-ce-core.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index ebe5558929b7..23ee9a5eaf27 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -436,9 +436,7 @@ SYM_FUNC_END(pmull_ghash_update_p8) .align 6 .macro pmull_gcm_do_crypt, enc - stp x29, x30, [sp, #-32]! - mov x29, sp - str x19, [sp, #24] + frame_push 1 load_round_keys x7, x6, x8 @@ -529,7 +527,7 @@ CPU_LE( rev w8, w8 ) .endif bne 0b -3: ldp x19, x10, [sp, #24] +3: ldr x10, [sp, #.Lframe_local_offset] cbz x10, 5f // output tag? ld1 {INP3.16b}, [x10] // load lengths[] @@ -562,7 +560,7 @@ CPU_LE( rev w8, w8 ) smov w0, v0.b[0] // return b0 .endif -4: ldp x29, x30, [sp], #32 +4: frame_pop ret 5: From 04ba54e5af8f8f0137b08cb51a0b3a2e1ea46c94 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 1 Dec 2022 14:25:26 +0800 Subject: [PATCH 3572/4122] crypto: img-hash - Fix variable dereferenced before check 'hdev->req' Smatch report warning as follows: drivers/crypto/img-hash.c:366 img_hash_dma_task() warn: variable dereferenced before check 'hdev->req' Variable dereferenced should be done after check 'hdev->req', fix it. Fixes: d358f1abbf71 ("crypto: img-hash - Add Imagination Technologies hw hash accelerator") Fixes: 10badea259fa ("crypto: img-hash - Fix null pointer exception") Signed-off-by: Gaosheng Cui Signed-off-by: Herbert Xu --- drivers/crypto/img-hash.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index d8e82d69745d..9629e98bd68b 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -358,12 +358,16 @@ static int img_hash_dma_init(struct img_hash_dev *hdev) static void img_hash_dma_task(unsigned long d) { struct img_hash_dev *hdev = (struct img_hash_dev *)d; - struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + struct img_hash_request_ctx *ctx; u8 *addr; size_t nbytes, bleft, wsend, len, tbc; struct scatterlist tsg; - if (!hdev->req || !ctx->sg) + if (!hdev->req) + return; + + ctx = ahash_request_ctx(hdev->req); + if (!ctx->sg) return; addr = sg_virt(ctx->sg); From 1c64a7e1f931821acadf964c5ddb0dc41abf9e20 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:47 +0800 Subject: [PATCH 3573/4122] crypto: cavium - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptvf_algs.c | 10 +++++----- drivers/crypto/cavium/nitrox/nitrox_aead.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c index ce3b91c612f0..9eca0c302186 100644 --- a/drivers/crypto/cavium/cpt/cptvf_algs.c +++ b/drivers/crypto/cavium/cpt/cptvf_algs.c @@ -97,7 +97,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct fc_context *fctx = &rctx->fctx; u32 enc_iv_len = crypto_skcipher_ivsize(tfm); struct cpt_request_info *req_info = &rctx->cpt_req; @@ -151,7 +151,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline u32 create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct cpt_request_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -173,7 +173,7 @@ static inline void store_cb_info(struct skcipher_request *req, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct cpt_request_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -193,7 +193,7 @@ static inline void create_output_list(struct skcipher_request *req, static inline int cvm_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); u32 enc_iv_len = crypto_skcipher_ivsize(tfm); struct fc_context *fctx = &rctx->fctx; struct cpt_request_info *req_info = &rctx->cpt_req; @@ -335,7 +335,7 @@ static int cvm_ecb_des3_setkey(struct crypto_skcipher *cipher, const u8 *key, static int cvm_enc_dec_init(struct crypto_skcipher *tfm) { - crypto_skcipher_set_reqsize(tfm, sizeof(struct cvm_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct cvm_req_ctx)); return 0; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c index c93c4e41d267..0653484df23f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_aead.c +++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c @@ -392,7 +392,7 @@ static int nitrox_rfc4106_setauthsize(struct crypto_aead *aead, static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq) { - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; unsigned int assoclen = areq->assoclen - GCM_RFC4106_IV_SIZE; struct scatterlist *sg; @@ -424,7 +424,7 @@ static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq) static void nitrox_rfc4106_callback(void *arg, int err) { struct aead_request *areq = arg; - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_kcrypt_request *nkreq = &rctx->base.nkreq; free_src_sglist(nkreq); @@ -441,7 +441,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq) { struct crypto_aead *aead = crypto_aead_reqtfm(areq); struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; struct se_crypto_request *creq = &aead_rctx->nkreq.creq; int ret; @@ -472,7 +472,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq) static int nitrox_rfc4106_dec(struct aead_request *areq) { struct crypto_aead *aead = crypto_aead_reqtfm(areq); - struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx_dma(aead); struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; struct se_crypto_request *creq = &aead_rctx->nkreq.creq; @@ -510,8 +510,8 @@ static int nitrox_rfc4106_init(struct crypto_aead *aead) if (ret) return ret; - crypto_aead_set_reqsize(aead, sizeof(struct aead_request) + - sizeof(struct nitrox_rfc4106_rctx)); + crypto_aead_set_reqsize_dma(aead, sizeof(struct aead_request) + + sizeof(struct nitrox_rfc4106_rctx)); return 0; } From 99c6b20edfc031610240afca97ba9be5ec6f5750 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:49 +0800 Subject: [PATCH 3574/4122] crypto: ccp - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 21 ++++++++-------- drivers/crypto/ccp/ccp-crypto-aes-galois.c | 12 ++++----- drivers/crypto/ccp/ccp-crypto-aes-xts.c | 20 ++++++++------- drivers/crypto/ccp/ccp-crypto-aes.c | 29 +++++++++++----------- drivers/crypto/ccp/ccp-crypto-des3.c | 17 +++++++------ drivers/crypto/ccp/ccp-crypto-main.c | 4 +-- drivers/crypto/ccp/ccp-crypto-rsa.c | 18 +++++++------- drivers/crypto/ccp/ccp-crypto-sha.c | 26 +++++++++---------- 8 files changed, 76 insertions(+), 71 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 11a305fa19e6..d8426bdf3190 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -25,7 +25,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); unsigned int digest_size = crypto_ahash_digestsize(tfm); if (ret) @@ -56,8 +56,8 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, unsigned int final) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct scatterlist *sg, *cmac_key_sg = NULL; unsigned int block_size = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); @@ -182,7 +182,7 @@ e_free: static int ccp_aes_cmac_init(struct ahash_request *req) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); memset(rctx, 0, sizeof(*rctx)); @@ -219,7 +219,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) static int ccp_aes_cmac_export(struct ahash_request *req, void *out) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_aes_cmac_exp_ctx state; /* Don't let anything leak to 'out' */ @@ -238,7 +238,7 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_aes_cmac_exp_ctx state; /* 'in' may not be aligned so memcpy to local variable */ @@ -256,7 +256,7 @@ static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; @@ -334,13 +334,14 @@ static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); ctx->complete = ccp_aes_cmac_complete; ctx->u.aes.key_len = 0; - crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); + crypto_ahash_set_reqsize_dma(ahash, + sizeof(struct ccp_aes_cmac_req_ctx)); return 0; } @@ -382,7 +383,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head) CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; base->cra_blocksize = AES_BLOCK_SIZE; - base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding(); base->cra_priority = CCP_CRA_PRIORITY; base->cra_init = ccp_aes_cmac_cra_init; base->cra_module = THIS_MODULE; diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c index 1c1c939f5c39..b1dbb8cea559 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -29,7 +29,7 @@ static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret) static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); switch (key_len) { case AES_KEYSIZE_128: @@ -76,8 +76,8 @@ static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm, static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); - struct ccp_aes_req_ctx *rctx = aead_request_ctx(req); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = aead_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; int i; @@ -148,12 +148,12 @@ static int ccp_aes_gcm_decrypt(struct aead_request *req) static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm) { - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->complete = ccp_aes_gcm_complete; ctx->u.aes.key_len = 0; - crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx)); return 0; } @@ -176,7 +176,7 @@ static struct aead_alg ccp_aes_gcm_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ccp_ctx), + .cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .cra_priority = CCP_CRA_PRIORITY, .cra_exit = ccp_aes_gcm_cra_exit, .cra_module = THIS_MODULE, diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 6849261ca47d..93f735d6b02b 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -62,7 +62,7 @@ static struct ccp_unit_size_map xts_unit_sizes[] = { static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -75,7 +75,7 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); unsigned int ccpversion = ccp_version(); int ret; @@ -105,8 +105,8 @@ static int ccp_aes_xts_crypt(struct skcipher_request *req, unsigned int encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); unsigned int ccpversion = ccp_version(); unsigned int fallback = 0; unsigned int unit; @@ -196,7 +196,7 @@ static int ccp_aes_xts_decrypt(struct skcipher_request *req) static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; @@ -210,15 +210,16 @@ static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm) } ctx->u.aes.tfm_skcipher = fallback_tfm; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx) + - crypto_skcipher_reqsize(fallback_tfm)); + crypto_skcipher_set_reqsize_dma(tfm, + sizeof(struct ccp_aes_req_ctx) + + crypto_skcipher_reqsize(fallback_tfm)); return 0; } static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } @@ -246,7 +247,8 @@ static int ccp_register_aes_xts_alg(struct list_head *head, CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; alg->base.cra_blocksize = AES_BLOCK_SIZE; - alg->base.cra_ctxsize = sizeof(struct ccp_ctx); + alg->base.cra_ctxsize = sizeof(struct ccp_ctx) + + crypto_dma_padding(); alg->base.cra_priority = CCP_CRA_PRIORITY; alg->base.cra_module = THIS_MODULE; diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index bed331953ff9..918e223f21b6 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -22,8 +22,9 @@ static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma( + crypto_skcipher_reqtfm(req)); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -38,7 +39,7 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); switch (key_len) { case AES_KEYSIZE_128: @@ -65,8 +66,8 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; @@ -118,7 +119,7 @@ static int ccp_aes_decrypt(struct skcipher_request *req) static int ccp_aes_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_aes_complete; ctx->u.aes.key_len = 0; @@ -132,7 +133,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); /* Restore the original pointer */ req->iv = rctx->rfc3686_info; @@ -143,7 +144,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; @@ -157,8 +158,8 @@ static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_aes_rfc3686_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); u8 *iv; /* Initialize the CTR block */ @@ -190,12 +191,12 @@ static int ccp_aes_rfc3686_decrypt(struct skcipher_request *req) static int ccp_aes_rfc3686_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_aes_rfc3686_complete; ctx->u.aes.key_len = 0; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx)); return 0; } @@ -213,7 +214,7 @@ static const struct skcipher_alg ccp_aes_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; @@ -231,7 +232,7 @@ static const struct skcipher_alg ccp_aes_rfc3686_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = CTR_RFC3686_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c index 278636ed251a..afae30adb703 100644 --- a/drivers/crypto/ccp/ccp-crypto-des3.c +++ b/drivers/crypto/ccp/ccp-crypto-des3.c @@ -21,8 +21,9 @@ static int ccp_des3_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma( + crypto_skcipher_reqtfm(req)); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -37,7 +38,7 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); int err; err = verify_skcipher_des3_key(tfm, key); @@ -60,8 +61,8 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_des3_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; @@ -114,12 +115,12 @@ static int ccp_des3_decrypt(struct skcipher_request *req) static int ccp_des3_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_des3_complete; ctx->u.des3.key_len = 0; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_des3_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_des3_req_ctx)); return 0; } @@ -137,7 +138,7 @@ static const struct skcipher_alg ccp_des3_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index dd86d2650bea..73442a382f68 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -139,7 +139,7 @@ static void ccp_crypto_complete(void *data, int err) struct ccp_crypto_cmd *crypto_cmd = data; struct ccp_crypto_cmd *held, *next, *backlog; struct crypto_async_request *req = crypto_cmd->req; - struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(req->tfm); int ret; if (err == -EINPROGRESS) { @@ -183,7 +183,7 @@ static void ccp_crypto_complete(void *data, int err) break; /* Error occurred, report it and get the next entry */ - ctx = crypto_tfm_ctx(held->req->tfm); + ctx = crypto_tfm_ctx_dma(held->req->tfm); if (ctx->complete) ret = ctx->complete(held->req, ret); held->req->complete(held->req, ret); diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c index 1223ac70aea2..a14f85512cf4 100644 --- a/drivers/crypto/ccp/ccp-crypto-rsa.c +++ b/drivers/crypto/ccp/ccp-crypto-rsa.c @@ -44,7 +44,7 @@ static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen, static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) { struct akcipher_request *req = akcipher_request_cast(async_req); - struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req); if (ret) return ret; @@ -56,7 +56,7 @@ static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); return ctx->u.rsa.n_len; } @@ -64,8 +64,8 @@ static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); - struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req); int ret = 0; memset(&rctx->cmd, 0, sizeof(rctx->cmd)); @@ -126,7 +126,7 @@ static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx) static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key, unsigned int keylen, bool private) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key; int ret; @@ -192,9 +192,9 @@ static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); - akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx)); + akcipher_set_reqsize_dma(tfm, sizeof(struct ccp_rsa_req_ctx)); ctx->complete = ccp_rsa_complete; return 0; @@ -202,7 +202,7 @@ static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); ccp_rsa_free_key_bufs(ctx); } @@ -220,7 +220,7 @@ static struct akcipher_alg ccp_rsa_defaults = { .cra_driver_name = "rsa-ccp", .cra_priority = CCP_CRA_PRIORITY, .cra_module = THIS_MODULE, - .cra_ctxsize = 2 * sizeof(struct ccp_ctx), + .cra_ctxsize = 2 * sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, }, }; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 74fa5360e722..fa3ae8e78f6f 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -28,7 +28,7 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); unsigned int digest_size = crypto_ahash_digestsize(tfm); if (ret) @@ -59,8 +59,8 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, unsigned int final) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct scatterlist *sg; unsigned int block_size = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); @@ -182,8 +182,8 @@ e_free: static int ccp_sha_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); unsigned int block_size = @@ -231,7 +231,7 @@ static int ccp_sha_digest(struct ahash_request *req) static int ccp_sha_export(struct ahash_request *req, void *out) { - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_sha_exp_ctx state; /* Don't let anything leak to 'out' */ @@ -252,7 +252,7 @@ static int ccp_sha_export(struct ahash_request *req, void *out) static int ccp_sha_import(struct ahash_request *req, const void *in) { - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_sha_exp_ctx state; /* 'in' may not be aligned so memcpy to local variable */ @@ -272,7 +272,7 @@ static int ccp_sha_import(struct ahash_request *req, const void *in) static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct crypto_shash *shash = ctx->u.sha.hmac_tfm; unsigned int block_size = crypto_shash_blocksize(shash); unsigned int digest_size = crypto_shash_digestsize(shash); @@ -313,13 +313,13 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, static int ccp_sha_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(ahash); ctx->complete = ccp_sha_complete; ctx->u.sha.key_len = 0; - crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct ccp_sha_req_ctx)); return 0; } @@ -330,7 +330,7 @@ static void ccp_sha_cra_exit(struct crypto_tfm *tfm) static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); struct crypto_shash *hmac_tfm; @@ -348,7 +348,7 @@ static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); if (ctx->u.sha.hmac_tfm) crypto_free_shash(ctx->u.sha.hmac_tfm); @@ -492,7 +492,7 @@ static int ccp_register_sha_alg(struct list_head *head, CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; base->cra_blocksize = def->block_size; - base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding(); base->cra_priority = CCP_CRA_PRIORITY; base->cra_init = ccp_sha_cra_init; base->cra_exit = ccp_sha_cra_exit; From 07547fa73e4645363165e662f50427a7d302dcf1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:51 +0800 Subject: [PATCH 3575/4122] crypto: ccree - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 62 ++++++++++---------- drivers/crypto/ccree/cc_buffer_mgr.c | 18 +++--- drivers/crypto/ccree/cc_hash.c | 86 ++++++++++++++-------------- 3 files changed, 83 insertions(+), 83 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 35794c7271fb..109ffb375fc6 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -138,7 +138,7 @@ static int cc_aead_init(struct crypto_aead *tfm) ctx->flow_mode = cc_alg->flow_mode; ctx->auth_mode = cc_alg->auth_mode; ctx->drvdata = cc_alg->drvdata; - crypto_aead_set_reqsize(tfm, sizeof(struct aead_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct aead_req_ctx)); /* Allocate key buffer, cache line aligned */ ctx->enckey = dma_alloc_coherent(dev, AES_MAX_KEY_SIZE, @@ -208,7 +208,7 @@ init_failed: static void cc_aead_complete(struct device *dev, void *cc_req, int err) { struct aead_request *areq = (struct aead_request *)cc_req; - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); struct crypto_aead *tfm = crypto_aead_reqtfm(cc_req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -723,7 +723,7 @@ static void cc_set_assoc_desc(struct aead_request *areq, unsigned int flow_mode, { struct crypto_aead *tfm = crypto_aead_reqtfm(areq); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type assoc_dma_type = areq_ctx->assoc_buff_type; unsigned int idx = *seq_size; struct device *dev = drvdata_to_dev(ctx->drvdata); @@ -762,7 +762,7 @@ static void cc_proc_authen_desc(struct aead_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size, int direct) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type; unsigned int idx = *seq_size; struct crypto_aead *tfm = crypto_aead_reqtfm(areq); @@ -827,7 +827,7 @@ static void cc_proc_cipher_desc(struct aead_request *areq, unsigned int *seq_size) { unsigned int idx = *seq_size; - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type; struct crypto_aead *tfm = crypto_aead_reqtfm(areq); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -873,7 +873,7 @@ static void cc_proc_digest_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ? DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256; @@ -923,7 +923,7 @@ static void cc_set_cipher_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = req_ctx->hw_iv_size; unsigned int idx = *seq_size; int direct = req_ctx->gen_ctx.op_type; @@ -965,7 +965,7 @@ static void cc_set_cipher_desc(struct aead_request *req, static void cc_proc_cipher(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size, unsigned int data_flow_mode) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int idx = *seq_size; @@ -1082,7 +1082,7 @@ static void cc_proc_header_desc(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* Hash associated data */ @@ -1158,7 +1158,7 @@ static void cc_proc_scheme_desc(struct aead_request *req, static void cc_mlli_to_sram(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); @@ -1212,7 +1212,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int data_flow_mode = cc_get_data_flow(direct, ctx->flow_mode, @@ -1265,7 +1265,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int data_flow_mode = cc_get_data_flow(direct, ctx->flow_mode, @@ -1312,7 +1312,7 @@ static int validate_data_size(struct cc_aead_ctx *ctx, enum drv_crypto_direction direct, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int assoclen = areq_ctx->assoclen; unsigned int cipherlen = (direct == DRV_CRYPTO_DIRECTION_DECRYPT) ? @@ -1411,7 +1411,7 @@ static int cc_ccm(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; unsigned int cipher_flow_mode; dma_addr_t mac_result; @@ -1533,7 +1533,7 @@ static int config_ccm_adata(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); //unsigned int size_of_a = 0, rem_a_size = 0; unsigned int lp = req->iv[0]; /* Note: The code assume that req->iv[0] already contains the value @@ -1591,7 +1591,7 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); /* L' */ memset(areq_ctx->ctr_iv, 0, AES_BLOCK_SIZE); @@ -1615,7 +1615,7 @@ static void cc_set_ghash_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* load key to AES*/ @@ -1693,7 +1693,7 @@ static void cc_set_gctr_desc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* load key to AES*/ @@ -1730,7 +1730,7 @@ static void cc_proc_gcm_result(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); dma_addr_t mac_result; unsigned int idx = *seq_size; @@ -1792,7 +1792,7 @@ static void cc_proc_gcm_result(struct aead_request *req, static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int cipher_flow_mode; //in RFC4543 no data to encrypt. just copy data from src to dest. @@ -1830,7 +1830,7 @@ static int config_gcm_context(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int cryptlen = (req_ctx->gen_ctx.op_type == @@ -1879,7 +1879,7 @@ static void cc_proc_rfc4_gcm(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_NONCE_OFFSET, ctx->ctr_nonce, GCM_BLOCK_RFC4_NONCE_SIZE); @@ -1896,7 +1896,7 @@ static int cc_proc_aead(struct aead_request *req, struct cc_hw_desc desc[MAX_AEAD_PROCESS_SEQ]; struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; @@ -2019,7 +2019,7 @@ exit: static int cc_aead_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2039,7 +2039,7 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) { /* Very similar to cc_aead_encrypt() above. */ - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2063,7 +2063,7 @@ out: static int cc_aead_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2081,7 +2081,7 @@ static int cc_aead_decrypt(struct aead_request *req) static int cc_rfc4309_ccm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2193,7 +2193,7 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc, static int cc_rfc4106_gcm_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2217,7 +2217,7 @@ out: static int cc_rfc4543_gcm_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2244,7 +2244,7 @@ out: static int cc_rfc4106_gcm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2268,7 +2268,7 @@ out: static int cc_rfc4543_gcm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 9efd88f871d1..bcca55bff910 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -52,7 +52,7 @@ static inline char *cc_dma_buf_type(enum cc_req_dma_buf_type type) static void cc_copy_mac(struct device *dev, struct aead_request *req, enum cc_sg_cpy_direct dir) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); u32 skip = req->assoclen + req->cryptlen; cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src, @@ -456,7 +456,7 @@ cipher_exit: void cc_unmap_aead_request(struct device *dev, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); @@ -546,7 +546,7 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct device *dev = drvdata_to_dev(drvdata); gfp_t flags = cc_gfp_flags(&req->base); @@ -586,7 +586,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc = 0; int mapped_nents = 0; struct device *dev = drvdata_to_dev(drvdata); @@ -652,7 +652,7 @@ chain_assoc_exit: static void cc_prepare_aead_data_dlli(struct aead_request *req, u32 *src_last_bytes, u32 *dst_last_bytes) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; struct scatterlist *sg; @@ -678,7 +678,7 @@ static void cc_prepare_aead_data_mlli(struct cc_drvdata *drvdata, u32 *src_last_bytes, u32 *dst_last_bytes, bool is_last_table) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; struct device *dev = drvdata_to_dev(drvdata); @@ -790,7 +790,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last_table, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(drvdata); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; @@ -895,7 +895,7 @@ chain_data_exit: static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); u32 curr_mlli_size = 0; if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI) { @@ -945,7 +945,7 @@ static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata, int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct mlli_params *mlli_params = &areq_ctx->mlli_params; struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 683c9a430e11..f418162932fe 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -283,9 +283,9 @@ static void cc_unmap_result(struct device *dev, struct ahash_req_ctx *state, static void cc_update_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -301,9 +301,9 @@ static void cc_update_complete(struct device *dev, void *cc_req, int err) static void cc_digest_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -321,9 +321,9 @@ static void cc_digest_complete(struct device *dev, void *cc_req, int err) static void cc_hash_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -341,9 +341,9 @@ static void cc_hash_complete(struct device *dev, void *cc_req, int err) static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, int idx) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); /* Get final MAC result */ @@ -364,9 +364,9 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req, int idx) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); /* store the hash digest result in the context */ @@ -417,9 +417,9 @@ static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req, static int cc_hash_digest(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -555,9 +555,9 @@ static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx, static int cc_hash_update(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -631,9 +631,9 @@ static int cc_hash_update(struct ahash_request *req) static int cc_do_finup(struct ahash_request *req, bool update) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -711,9 +711,9 @@ static int cc_hash_final(struct ahash_request *req) static int cc_hash_init(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "===== init (%d) ====\n", req->nbytes); @@ -736,7 +736,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, u32 larval_addr; struct device *dev; - ctx = crypto_ahash_ctx(ahash); + ctx = crypto_ahash_ctx_dma(ahash); dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "start keylen: %d", keylen); @@ -922,7 +922,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { struct cc_crypto_req cc_req = {}; - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); int rc = 0; unsigned int idx = 0; @@ -1007,7 +1007,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, static int cc_cmac_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "===== setkey (%d) ====\n", keylen); @@ -1109,7 +1109,7 @@ fail: static int cc_get_hash_len(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); if (ctx->hash_mode == DRV_HASH_SM3) return CC_SM3_HASH_LEN_SIZE; @@ -1119,7 +1119,7 @@ static int cc_get_hash_len(struct crypto_tfm *tfm) static int cc_cra_init(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct hash_alg_common *hash_alg_common = container_of(tfm->__crt_alg, struct hash_alg_common, base); struct ahash_alg *ahash_alg = @@ -1127,8 +1127,8 @@ static int cc_cra_init(struct crypto_tfm *tfm) struct cc_hash_alg *cc_alg = container_of(ahash_alg, struct cc_hash_alg, ahash_alg); - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_req_ctx)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct ahash_req_ctx)); ctx->hash_mode = cc_alg->hash_mode; ctx->hw_mode = cc_alg->hw_mode; @@ -1140,7 +1140,7 @@ static int cc_cra_init(struct crypto_tfm *tfm) static void cc_cra_exit(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "cc_cra_exit"); @@ -1149,9 +1149,9 @@ static void cc_cra_exit(struct crypto_tfm *tfm) static int cc_mac_update(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base); struct cc_crypto_req cc_req = {}; @@ -1217,9 +1217,9 @@ static int cc_mac_update(struct ahash_request *req) static int cc_mac_final(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; @@ -1338,9 +1338,9 @@ static int cc_mac_final(struct ahash_request *req) static int cc_mac_finup(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; @@ -1419,9 +1419,9 @@ static int cc_mac_finup(struct ahash_request *req) static int cc_mac_digest(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); u32 digestsize = crypto_ahash_digestsize(tfm); struct cc_crypto_req cc_req = {}; @@ -1499,8 +1499,8 @@ static int cc_mac_digest(struct ahash_request *req) static int cc_hash_export(struct ahash_request *req, void *out) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); u8 *curr_buff = cc_hash_buf(state); u32 curr_buff_cnt = *cc_hash_buf_cnt(state); const u32 tmp = CC_EXPORT_MAGIC; @@ -1525,9 +1525,9 @@ static int cc_hash_export(struct ahash_request *req, void *out) static int cc_hash_import(struct ahash_request *req, const void *in) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); u32 tmp; memcpy(&tmp, in, sizeof(u32)); @@ -1846,7 +1846,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, template->driver_name); } alg->cra_module = THIS_MODULE; - alg->cra_ctxsize = sizeof(struct cc_hash_ctx); + alg->cra_ctxsize = sizeof(struct cc_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CC_CRA_PRIO; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; @@ -2073,9 +2073,9 @@ static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size) { unsigned int idx = *seq_size; - struct ahash_req_ctx *state = ahash_request_ctx(areq); + struct ahash_req_ctx *state = ahash_request_ctx_dma(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); /* Setup XCBC MAC K1 */ hw_desc_init(&desc[idx]); @@ -2130,9 +2130,9 @@ static void cc_setup_cmac(struct ahash_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size) { unsigned int idx = *seq_size; - struct ahash_req_ctx *state = ahash_request_ctx(areq); + struct ahash_req_ctx *state = ahash_request_ctx_dma(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); /* Setup CMAC Key */ hw_desc_init(&desc[idx]); From e055bffaa390042d73fed56a0ef9bfe71a675614 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:53 +0800 Subject: [PATCH 3576/4122] crypto: chelsio - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 43 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 9fac1e758406..68d65773ef2b 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -210,7 +210,7 @@ static inline int chcr_handle_aead_resp(struct aead_request *req, unsigned char *input, int err) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_dev *dev = a_ctx(tfm)->dev; @@ -718,7 +718,7 @@ static inline int get_qidxs(struct crypto_async_request *req, { struct aead_request *aead_req = container_of(req, struct aead_request, base); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(aead_req); *txqidx = reqctx->txqidx; *rxqidx = reqctx->rxqidx; break; @@ -2362,7 +2362,7 @@ static void chcr_hmac_cra_exit(struct crypto_tfm *tfm) inline void chcr_aead_common_exit(struct aead_request *req) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm)); @@ -2373,7 +2373,7 @@ static int chcr_aead_common_init(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); unsigned int authsize = crypto_aead_authsize(tfm); int error = -EINVAL; @@ -2417,7 +2417,7 @@ static int chcr_aead_fallback(struct aead_request *req, unsigned short op_type) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct aead_request *subreq = aead_request_ctx(req); + struct aead_request *subreq = aead_request_ctx_dma(req); aead_request_set_tfm(subreq, aeadctx->sw_cipher); aead_request_set_callback(subreq, req->base.flags, @@ -2438,7 +2438,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -2576,7 +2576,7 @@ int chcr_aead_dma_map(struct device *dev, unsigned short op_type) { int error; - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); int src_len, dst_len; @@ -2637,7 +2637,7 @@ void chcr_aead_dma_unmap(struct device *dev, struct aead_request *req, unsigned short op_type) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); int src_len, dst_len; @@ -2678,7 +2678,7 @@ void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx) { struct ulptx_walk ulp_walk; - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); if (reqctx->imm) { u8 *buf = (u8 *)ulptx; @@ -2704,7 +2704,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req, struct cpl_rx_phys_dsgl *phys_cpl, unsigned short qid) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct dsgl_walk dsgl_walk; unsigned int authsize = crypto_aead_authsize(tfm); @@ -2894,7 +2894,7 @@ static int generate_b0(struct aead_request *req, u8 *ivptr, unsigned int l, lp, m; int rc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); u8 *b0 = reqctx->scratch_pad; m = crypto_aead_authsize(aead); @@ -2932,7 +2932,7 @@ static int ccm_format_packet(struct aead_request *req, unsigned short op_type, unsigned int assoclen) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); int rc = 0; @@ -2963,7 +2963,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; @@ -3036,7 +3036,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -3135,7 +3135,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -3255,9 +3255,10 @@ static int chcr_aead_cra_init(struct crypto_aead *tfm) CRYPTO_ALG_ASYNC); if (IS_ERR(aeadctx->sw_cipher)) return PTR_ERR(aeadctx->sw_cipher); - crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx), - sizeof(struct aead_request) + - crypto_aead_reqsize(aeadctx->sw_cipher))); + crypto_aead_set_reqsize_dma( + tfm, max(sizeof(struct chcr_aead_reqctx), + sizeof(struct aead_request) + + crypto_aead_reqsize(aeadctx->sw_cipher))); return chcr_device_init(a_ctx(tfm)); } @@ -3735,7 +3736,7 @@ static int chcr_aead_op(struct aead_request *req, create_wr_t create_wr_fn) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; @@ -3785,7 +3786,7 @@ static int chcr_aead_op(struct aead_request *req, static int chcr_aead_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct chcr_context *ctx = a_ctx(tfm); unsigned int cpu; @@ -3816,7 +3817,7 @@ static int chcr_aead_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_context *ctx = a_ctx(tfm); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); int size; unsigned int cpu; From 80b61baca4c8698139881f41473e652bedc65a73 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:55 +0800 Subject: [PATCH 3577/4122] crypto: hisilicon/hpre - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 40 +++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 5f6d363c9435..8ede77310dc5 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -147,6 +147,16 @@ struct hpre_asym_request { struct timespec64 req_time; }; +static inline unsigned int hpre_align_sz(void) +{ + return ((crypto_dma_align() - 1) | (HPRE_ALIGN_SZ - 1)) + 1; +} + +static inline unsigned int hpre_align_pd(void) +{ + return (hpre_align_sz() - 1) & ~(crypto_tfm_ctx_alignment() - 1); +} + static int hpre_alloc_req_id(struct hpre_ctx *ctx) { unsigned long flags; @@ -517,7 +527,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) } tmp = akcipher_request_ctx(akreq); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_rsa_cb; h_req->areq.rsa = akreq; msg = &h_req->req; @@ -531,7 +541,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) } tmp = kpp_request_ctx(kreq); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_dh_cb; h_req->areq.dh = kreq; msg = &h_req->req; @@ -582,7 +592,7 @@ static int hpre_dh_compute_value(struct kpp_request *req) struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -740,7 +750,7 @@ static int hpre_dh_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); } @@ -785,7 +795,7 @@ static int hpre_rsa_enc(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm); void *tmp = akcipher_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -833,7 +843,7 @@ static int hpre_rsa_dec(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm); void *tmp = akcipher_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1168,7 +1178,7 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm) } akcipher_set_reqsize(tfm, sizeof(struct hpre_asym_request) + - HPRE_ALIGN_SZ); + hpre_align_pd()); ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); if (ret) @@ -1490,7 +1500,7 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx, } tmp = kpp_request_ctx(req); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_ecdh_cb; h_req->areq.ecdh = req; msg = &h_req->req; @@ -1571,7 +1581,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req) struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1622,7 +1632,7 @@ static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P192; - kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1633,7 +1643,7 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P256; - kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1644,7 +1654,7 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P384; - kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1802,7 +1812,7 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx, } tmp = kpp_request_ctx(req); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_curve25519_cb; h_req->areq.curve25519 = req; msg = &h_req->req; @@ -1923,7 +1933,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req) struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1972,7 +1982,7 @@ static int hpre_curve25519_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } From b2e2e2da7b4f62c54ce0d6a66c54e9fb05a8d514 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:57 +0800 Subject: [PATCH 3578/4122] crypto: safexcel - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 99 ++++++++++---------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 103fc551d2af..ca46328472d4 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -231,7 +231,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); - struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); + struct safexcel_ahash_req *sreq = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash); u64 cache_len; @@ -312,7 +312,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); struct safexcel_crypto_priv *priv = ctx->base.priv; struct safexcel_command_desc *cdesc, *first_cdesc = NULL; @@ -569,7 +569,7 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, bool *should_complete, int *ret) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int err; BUG_ON(!(priv->flags & EIP197_TRC_CACHE) && req->needs_inv); @@ -608,7 +608,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int ret; if (req->needs_inv) @@ -624,7 +624,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->base.priv; EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE); - struct safexcel_ahash_req *rctx = ahash_request_ctx(req); + struct safexcel_ahash_req *rctx = ahash_request_ctx_dma(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; @@ -663,7 +663,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) */ static int safexcel_ahash_cache(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); u64 cache_len; /* cache_len: everything accepted by the driver but not sent yet, @@ -689,7 +689,7 @@ static int safexcel_ahash_cache(struct ahash_request *areq) static int safexcel_ahash_enqueue(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_crypto_priv *priv = ctx->base.priv; int ret, ring; @@ -741,7 +741,7 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) static int safexcel_ahash_update(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int ret; /* If the request is 0 length, do nothing */ @@ -766,7 +766,7 @@ static int safexcel_ahash_update(struct ahash_request *areq) static int safexcel_ahash_final(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); req->finish = true; @@ -870,7 +870,7 @@ static int safexcel_ahash_final(struct ahash_request *areq) static int safexcel_ahash_finup(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); req->finish = true; @@ -880,7 +880,7 @@ static int safexcel_ahash_finup(struct ahash_request *areq) static int safexcel_ahash_export(struct ahash_request *areq, void *out) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_export_state *export = out; export->len = req->len; @@ -896,7 +896,7 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out) static int safexcel_ahash_import(struct ahash_request *areq, const void *in) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); const struct safexcel_ahash_export_state *export = in; int ret; @@ -927,15 +927,15 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) ctx->base.handle_result = safexcel_handle_result; ctx->fb_do_setkey = false; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct safexcel_ahash_req)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct safexcel_ahash_req)); return 0; } static int safexcel_sha1_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1012,7 +1012,7 @@ struct safexcel_alg_template safexcel_alg_sha1 = { static int safexcel_hmac_sha1_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1124,7 +1124,7 @@ static int safexcel_hmac_init_iv(struct ahash_request *areq, if (ret) return ret; - req = ahash_request_ctx(areq); + req = ahash_request_ctx_dma(areq); req->hmac = true; req->last_req = true; @@ -1264,7 +1264,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha1 = { static int safexcel_sha256_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1321,7 +1321,7 @@ struct safexcel_alg_template safexcel_alg_sha256 = { static int safexcel_sha224_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1385,7 +1385,7 @@ static int safexcel_hmac_sha224_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha224_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1457,7 +1457,7 @@ static int safexcel_hmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha256_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1522,7 +1522,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha256 = { static int safexcel_sha512_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1579,7 +1579,7 @@ struct safexcel_alg_template safexcel_alg_sha512 = { static int safexcel_sha384_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1643,7 +1643,7 @@ static int safexcel_hmac_sha512_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha512_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1715,7 +1715,7 @@ static int safexcel_hmac_sha384_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha384_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1780,7 +1780,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha384 = { static int safexcel_md5_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1837,7 +1837,7 @@ struct safexcel_alg_template safexcel_alg_md5 = { static int safexcel_hmac_md5_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1920,7 +1920,7 @@ static int safexcel_crc32_cra_init(struct crypto_tfm *tfm) static int safexcel_crc32_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1992,7 +1992,7 @@ struct safexcel_alg_template safexcel_alg_crc32 = { static int safexcel_cbcmac_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2252,7 +2252,7 @@ struct safexcel_alg_template safexcel_alg_cmac = { static int safexcel_sm3_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2316,7 +2316,7 @@ static int safexcel_hmac_sm3_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sm3_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2382,7 +2382,7 @@ static int safexcel_sha3_224_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2400,7 +2400,7 @@ static int safexcel_sha3_fbcheck(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); int ret = 0; if (ctx->do_fallback) { @@ -2437,7 +2437,7 @@ static int safexcel_sha3_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_update(subreq); @@ -2447,7 +2447,7 @@ static int safexcel_sha3_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_final(subreq); @@ -2457,7 +2457,7 @@ static int safexcel_sha3_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback |= !req->nbytes; if (ctx->do_fallback) @@ -2472,7 +2472,7 @@ static int safexcel_sha3_digest_fallback(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; ctx->fb_init_done = false; @@ -2492,7 +2492,7 @@ static int safexcel_sha3_export(struct ahash_request *req, void *out) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_export(subreq, out); @@ -2502,7 +2502,7 @@ static int safexcel_sha3_import(struct ahash_request *req, const void *in) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_import(subreq, in); @@ -2526,9 +2526,10 @@ static int safexcel_sha3_cra_init(struct crypto_tfm *tfm) /* Update statesize from fallback algorithm! */ crypto_hash_alg_common(ahash)->statesize = crypto_ahash_statesize(ctx->fback); - crypto_ahash_set_reqsize(ahash, max(sizeof(struct safexcel_ahash_req), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(ctx->fback))); + crypto_ahash_set_reqsize_dma( + ahash, max(sizeof(struct safexcel_ahash_req), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(ctx->fback))); return 0; } @@ -2575,7 +2576,7 @@ static int safexcel_sha3_256_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2633,7 +2634,7 @@ static int safexcel_sha3_384_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2691,7 +2692,7 @@ static int safexcel_sha3_512_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2841,7 +2842,7 @@ static int safexcel_hmac_sha3_224_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2912,7 +2913,7 @@ static int safexcel_hmac_sha3_256_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2983,7 +2984,7 @@ static int safexcel_hmac_sha3_384_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -3054,7 +3055,7 @@ static int safexcel_hmac_sha3_512_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); From be75969c81d9a6e13487e1c043e62ed5432d9fa1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:20:59 +0800 Subject: [PATCH 3579/4122] crypto: keembay - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/keembay/keembay-ocs-hcu-core.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c index 0379dbf32a4c..d4bcbed1f546 100644 --- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -226,7 +226,7 @@ static void kmb_ocs_hcu_dma_cleanup(struct ahash_request *req, */ static int kmb_ocs_dma_prepare(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct device *dev = rctx->hcu_dev->dev; unsigned int remainder = 0; unsigned int total; @@ -356,7 +356,7 @@ cleanup: static void kmb_ocs_hcu_secure_cleanup(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Clear buffer of any data. */ memzero_explicit(rctx->buffer, sizeof(rctx->buffer)); @@ -374,7 +374,7 @@ static int kmb_ocs_hcu_handle_queue(struct ahash_request *req) static int prepare_ipad(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); int i; @@ -414,7 +414,7 @@ static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq) base); struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct ocs_hcu_ctx *tctx = crypto_ahash_ctx(tfm); int rc; int i; @@ -561,7 +561,7 @@ error: static int kmb_ocs_hcu_init(struct ahash_request *req) { struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); @@ -614,7 +614,7 @@ static int kmb_ocs_hcu_init(struct ahash_request *req) static int kmb_ocs_hcu_update(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); int rc; if (!req->nbytes) @@ -650,7 +650,7 @@ static int kmb_ocs_hcu_update(struct ahash_request *req) /* Common logic for kmb_ocs_hcu_final() and kmb_ocs_hcu_finup(). */ static int kmb_ocs_hcu_fin_common(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); int rc; @@ -687,7 +687,7 @@ static int kmb_ocs_hcu_fin_common(struct ahash_request *req) static int kmb_ocs_hcu_final(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); rctx->sg_data_total = 0; rctx->sg_data_offset = 0; @@ -698,7 +698,7 @@ static int kmb_ocs_hcu_final(struct ahash_request *req) static int kmb_ocs_hcu_finup(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); rctx->sg_data_total = req->nbytes; rctx->sg_data_offset = 0; @@ -726,7 +726,7 @@ static int kmb_ocs_hcu_digest(struct ahash_request *req) static int kmb_ocs_hcu_export(struct ahash_request *req, void *out) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Intermediate data is always stored and applied per request. */ memcpy(out, rctx, sizeof(*rctx)); @@ -736,7 +736,7 @@ static int kmb_ocs_hcu_export(struct ahash_request *req, void *out) static int kmb_ocs_hcu_import(struct ahash_request *req, const void *in) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Intermediate data is always stored and applied per request. */ memcpy(rctx, in, sizeof(*rctx)); @@ -822,8 +822,8 @@ err_free_ahash: /* Set request size and initialize tfm context. */ static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx) { - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ocs_hcu_rctx)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct ocs_hcu_rctx)); /* Init context to 0. */ memzero_explicit(ctx, sizeof(*ctx)); From 0a55f4e38556f7e59b0f30fac0751e3a04be44c2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:21:01 +0800 Subject: [PATCH 3580/4122] crypto: octeontx - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- .../crypto/marvell/octeontx/otx_cptvf_algs.c | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c index 01c48ddc4eeb..80ba77c793a7 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -103,7 +103,7 @@ static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) req = container_of(cpt_req->areq, struct aead_request, base); tfm = crypto_aead_reqtfm(req); - rctx = aead_request_ctx(req); + rctx = aead_request_ctx_dma(req); if (memcmp(rctx->fctx.hmac.s.hmac_calc, rctx->fctx.hmac.s.hmac_recv, crypto_aead_authsize(tfm)) != 0) @@ -155,7 +155,7 @@ static void output_iv_copyback(struct crypto_async_request *areq) ctx = crypto_skcipher_ctx(stfm); if (ctx->cipher_type == OTX_CPT_AES_CBC || ctx->cipher_type == OTX_CPT_DES3_CBC) { - rctx = skcipher_request_ctx(sreq); + rctx = skcipher_request_ctx_dma(sreq); req_info = &rctx->cpt_req; ivsize = crypto_skcipher_ivsize(stfm); start = sreq->cryptlen - ivsize; @@ -233,7 +233,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, u32 *argcnt) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); @@ -303,7 +303,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline u32 create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; int ret; @@ -321,7 +321,7 @@ static inline u32 create_input_list(struct skcipher_request *req, u32 enc, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -340,7 +340,7 @@ static inline void create_output_list(struct skcipher_request *req, static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 enc_iv_len = crypto_skcipher_ivsize(stfm); struct pci_dev *pdev; @@ -501,15 +501,16 @@ static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) * allocated since the cryptd daemon uses * this memory for request_ctx information */ - crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_set_reqsize_dma( + tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); return 0; } static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->cipher_type = cipher_type; ctx->mac_type = mac_type; @@ -551,7 +552,7 @@ static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) } } - crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct otx_cpt_req_ctx)); return 0; } @@ -603,7 +604,7 @@ static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) static void otx_cpt_aead_exit(struct crypto_aead *tfm) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); kfree(ctx->ipad); kfree(ctx->opad); @@ -619,7 +620,7 @@ static void otx_cpt_aead_exit(struct crypto_aead *tfm) static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); switch (ctx->mac_type) { case OTX_CPT_SHA1: @@ -739,7 +740,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) static int aead_hmac_init(struct crypto_aead *cipher) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); int state_size = crypto_shash_statesize(ctx->hashalg); int ds = crypto_shash_digestsize(ctx->hashalg); int bs = crypto_shash_blocksize(ctx->hashalg); @@ -837,7 +838,7 @@ static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; int enckeylen = 0, authkeylen = 0; struct rtattr *rta = (void *)key; @@ -896,7 +897,7 @@ static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; struct rtattr *rta = (void *)key; int enckeylen = 0; @@ -932,7 +933,7 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); /* * For aes gcm we expect to get encryption key (16, 24, 32 bytes) @@ -965,9 +966,9 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, u32 *argcnt) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct otx_cpt_fc_ctx *fctx = &rctx->fctx; int mac_len = crypto_aead_authsize(tfm); @@ -1050,9 +1051,9 @@ static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, u32 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx_cpt_req_info *req_info = &rctx->cpt_req; req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; @@ -1076,7 +1077,7 @@ static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen = req->cryptlen + req->assoclen; u32 status, argcnt = 0; @@ -1093,7 +1094,7 @@ static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0, outputlen = 0; @@ -1111,7 +1112,7 @@ static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, static inline u32 create_aead_null_input_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen, argcnt = 0; @@ -1130,7 +1131,7 @@ static inline u32 create_aead_null_input_list(struct aead_request *req, static inline u32 create_aead_null_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct scatterlist *dst; u8 *ptr = NULL; @@ -1217,7 +1218,7 @@ error: static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct pci_dev *pdev; @@ -1409,7 +1410,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1428,7 +1429,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1447,7 +1448,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1466,7 +1467,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1485,7 +1486,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1504,7 +1505,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1523,7 +1524,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1542,7 +1543,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1561,7 +1562,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_rfc4106_gcm_aes", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, From d887dec105cdeda6b8da0e84d96c7a07d80269bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:21:03 +0800 Subject: [PATCH 3581/4122] crypto: octeontx2 - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cptvf_algs.c | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 67530e90bbfe..30b423605c9c 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -87,7 +87,7 @@ static inline int validate_hmac_cipher_null(struct otx2_cpt_req_info *cpt_req) req = container_of(cpt_req->areq, struct aead_request, base); tfm = crypto_aead_reqtfm(req); - rctx = aead_request_ctx(req); + rctx = aead_request_ctx_dma(req); if (memcmp(rctx->fctx.hmac.s.hmac_calc, rctx->fctx.hmac.s.hmac_recv, crypto_aead_authsize(tfm)) != 0) @@ -137,7 +137,7 @@ static void output_iv_copyback(struct crypto_async_request *areq) ctx = crypto_skcipher_ctx(stfm); if (ctx->cipher_type == OTX2_CPT_AES_CBC || ctx->cipher_type == OTX2_CPT_DES3_CBC) { - rctx = skcipher_request_ctx(sreq); + rctx = skcipher_request_ctx_dma(sreq); req_info = &rctx->cpt_req; ivsize = crypto_skcipher_ivsize(stfm); start = sreq->cryptlen - ivsize; @@ -219,7 +219,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, u32 *argcnt) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; @@ -288,7 +288,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline int create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; int ret; @@ -306,7 +306,7 @@ static inline int create_input_list(struct skcipher_request *req, u32 enc, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -325,7 +325,7 @@ static inline void create_output_list(struct skcipher_request *req, static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); int ret; @@ -348,7 +348,7 @@ static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 enc_iv_len = crypto_skcipher_ivsize(stfm); @@ -537,8 +537,9 @@ static int otx2_cpt_enc_dec_init(struct crypto_skcipher *stfm) * allocated since the cryptd daemon uses * this memory for request_ctx information */ - crypto_skcipher_set_reqsize(stfm, sizeof(struct otx2_cpt_req_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_set_reqsize_dma( + stfm, sizeof(struct otx2_cpt_req_ctx) + + sizeof(struct skcipher_request)); return cpt_skcipher_fallback_init(ctx, alg); } @@ -572,7 +573,7 @@ static int cpt_aead_fallback_init(struct otx2_cpt_aead_ctx *ctx, static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(atfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(atfm); struct crypto_tfm *tfm = crypto_aead_tfm(atfm); struct crypto_alg *alg = tfm->__crt_alg; @@ -629,7 +630,7 @@ static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) ctx->enc_align_len = 1; break; } - crypto_aead_set_reqsize(atfm, sizeof(struct otx2_cpt_req_ctx)); + crypto_aead_set_reqsize_dma(atfm, sizeof(struct otx2_cpt_req_ctx)); return cpt_aead_fallback_init(ctx, alg); } @@ -681,7 +682,7 @@ static int otx2_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) static void otx2_cpt_aead_exit(struct crypto_aead *tfm) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); kfree(ctx->ipad); kfree(ctx->opad); @@ -698,7 +699,7 @@ static void otx2_cpt_aead_exit(struct crypto_aead *tfm) static int otx2_cpt_aead_gcm_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); if (crypto_rfc4106_check_authsize(authsize)) return -EINVAL; @@ -722,7 +723,7 @@ static int otx2_cpt_aead_set_authsize(struct crypto_aead *tfm, static int otx2_cpt_aead_null_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->is_trunc_hmac = true; tfm->authsize = authsize; @@ -794,7 +795,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) static int aead_hmac_init(struct crypto_aead *cipher) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); int state_size = crypto_shash_statesize(ctx->hashalg); int ds = crypto_shash_digestsize(ctx->hashalg); int bs = crypto_shash_blocksize(ctx->hashalg); @@ -892,7 +893,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; int enckeylen = 0, authkeylen = 0; struct rtattr *rta = (void *)key; @@ -944,7 +945,7 @@ static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; struct rtattr *rta = (void *)key; int enckeylen = 0; @@ -979,7 +980,7 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); /* * For aes gcm we expect to get encryption key (16, 24, 32 bytes) @@ -1012,9 +1013,9 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, u32 *argcnt) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; int mac_len = crypto_aead_authsize(tfm); @@ -1103,9 +1104,9 @@ static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, u32 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; @@ -1127,7 +1128,7 @@ static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, static inline int create_aead_input_list(struct aead_request *req, u32 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen = req->cryptlen + req->assoclen; u32 status, argcnt = 0; @@ -1144,7 +1145,7 @@ static inline int create_aead_input_list(struct aead_request *req, u32 enc) static inline void create_aead_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0, outputlen = 0; @@ -1160,7 +1161,7 @@ static inline void create_aead_output_list(struct aead_request *req, u32 enc, static inline void create_aead_null_input_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen, argcnt = 0; @@ -1177,7 +1178,7 @@ static inline void create_aead_null_input_list(struct aead_request *req, static inline int create_aead_null_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct scatterlist *dst; u8 *ptr = NULL; @@ -1257,9 +1258,9 @@ error_free: static int aead_do_fallback(struct aead_request *req, bool is_enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(aead); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(aead); int ret; if (ctx->fbk_cipher) { @@ -1281,10 +1282,10 @@ static int aead_do_fallback(struct aead_request *req, bool is_enc) static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct pci_dev *pdev; int status, cpu_num; @@ -1458,7 +1459,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1477,7 +1478,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1496,7 +1497,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1515,7 +1516,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1534,7 +1535,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1553,7 +1554,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1572,7 +1573,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1591,7 +1592,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1610,7 +1611,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_rfc4106_gcm_aes", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, From 18daae5b0c41bf54af5f162a3e205858c9771400 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2022 17:21:05 +0800 Subject: [PATCH 3582/4122] crypto: qce - Set DMA alignment explicitly This driver has been implicitly relying on kmalloc alignment to be sufficient for DMA. This may no longer be the case with upcoming arm64 changes. This patch changes it to explicitly request DMA alignment from the Crypto API. Signed-off-by: Herbert Xu --- drivers/crypto/qce/aead.c | 22 +++++++++++----------- drivers/crypto/qce/common.c | 5 +++-- drivers/crypto/qce/sha.c | 18 +++++++++--------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c index 6eb4d2e35629..7d811728f047 100644 --- a/drivers/crypto/qce/aead.c +++ b/drivers/crypto/qce/aead.c @@ -24,7 +24,7 @@ static void qce_aead_done(void *data) { struct crypto_async_request *async_req = data; struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; @@ -92,7 +92,7 @@ static void qce_aead_done(void *data) static struct scatterlist * qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; @@ -103,7 +103,7 @@ qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req) static struct scatterlist * qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE); return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE); @@ -112,7 +112,7 @@ qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req) static struct scatterlist * qce_aead_prepare_dst_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; struct scatterlist *sg, *msg_sg, __sg[2]; @@ -186,7 +186,7 @@ qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req) { struct scatterlist *sg, *msg_sg, __sg[2]; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); unsigned int assoclen = rctx->assoclen; unsigned int adata_header_len, cryptlen, totallen; @@ -300,7 +300,7 @@ err_free: static int qce_aead_prepare_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; struct scatterlist *sg; @@ -328,7 +328,7 @@ static int qce_aead_prepare_buf(struct aead_request *req) static int qce_aead_ccm_prepare_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); struct scatterlist *sg; @@ -408,7 +408,7 @@ static int qce_aead_async_req_handle(struct crypto_async_request *async_req) { struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); @@ -502,7 +502,7 @@ error_free: static int qce_aead_crypt(struct aead_request *req, int encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); struct qce_alg_template *tmpl = to_aead_tmpl(tfm); unsigned int blocksize = crypto_aead_blocksize(tfm); @@ -675,8 +675,8 @@ static int qce_aead_init(struct crypto_aead *tfm) if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); - crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) + - crypto_aead_reqsize(ctx->fallback)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct qce_aead_reqctx) + + crypto_aead_reqsize(ctx->fallback)); return 0; } diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 7c612ba5068f..04253a8d3340 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -147,7 +148,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *ahash = __crypto_ahash_cast(async_req->tfm); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; unsigned int digestsize = crypto_ahash_digestsize(ahash); @@ -419,7 +420,7 @@ static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int static int qce_setup_regs_aead(struct crypto_async_request *async_req) { struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index 37bafd7aeb79..fc72af8aa9a7 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -38,7 +38,7 @@ static void qce_ahash_done(void *data) struct crypto_async_request *async_req = data; struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; struct qce_result_dump *result = qce->dma.result_buf; @@ -75,7 +75,7 @@ static void qce_ahash_done(void *data) static int qce_ahash_async_req_handle(struct crypto_async_request *async_req) { struct ahash_request *req = ahash_request_cast(async_req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_sha_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; @@ -132,7 +132,7 @@ error_unmap_src: static int qce_ahash_init(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); const u32 *std_iv = tmpl->std_iv; @@ -147,7 +147,7 @@ static int qce_ahash_init(struct ahash_request *req) static int qce_ahash_export(struct ahash_request *req, void *out) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_sha_saved_state *export_state = out; memcpy(export_state->pending_buf, rctx->buf, rctx->buflen); @@ -164,7 +164,7 @@ static int qce_ahash_export(struct ahash_request *req, void *out) static int qce_ahash_import(struct ahash_request *req, const void *in) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); const struct qce_sha_saved_state *import_state = in; memset(rctx, 0, sizeof(*rctx)); @@ -183,7 +183,7 @@ static int qce_ahash_import(struct ahash_request *req, const void *in) static int qce_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; struct scatterlist *sg_last, *sg; @@ -275,7 +275,7 @@ static int qce_ahash_update(struct ahash_request *req) static int qce_ahash_final(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; @@ -302,7 +302,7 @@ static int qce_ahash_final(struct ahash_request *req) static int qce_ahash_digest(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; int ret; @@ -395,7 +395,7 @@ static int qce_ahash_cra_init(struct crypto_tfm *tfm) struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); struct qce_sha_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_ahash_set_reqsize(ahash, sizeof(struct qce_sha_reqctx)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct qce_sha_reqctx)); memset(ctx, 0, sizeof(*ctx)); return 0; } From ecadb5b0111ea19fc7c240bb25d424a94471eb7d Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 2 Dec 2022 21:22:33 +0800 Subject: [PATCH 3583/4122] hwrng: amd - Fix PCI device refcount leak for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() for the normal and error path. Fixes: 96d63c0297cc ("[PATCH] Add AMD HW RNG driver") Signed-off-by: Xiongfeng Wang Signed-off-by: Herbert Xu --- drivers/char/hw_random/amd-rng.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index c22d4184bb61..0555e3838bce 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void) found: err = pci_read_config_dword(pdev, 0x58, &pmbase); if (err) - return err; + goto put_dev; pmbase &= 0x0000FF00; - if (pmbase == 0) - return -EIO; + if (pmbase == 0) { + err = -EIO; + goto put_dev; + } priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + err = -ENOMEM; + goto put_dev; + } if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) { dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n", @@ -185,6 +189,8 @@ err_iomap: release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE); out: kfree(priv); +put_dev: + pci_dev_put(pdev); return err; } @@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void) release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE); + pci_dev_put(priv->pcidev); + kfree(priv); } From 9f6ec8dc574efb7f4f3d7ee9cd59ae307e78f445 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 2 Dec 2022 21:22:34 +0800 Subject: [PATCH 3584/4122] hwrng: geode - Fix PCI device refcount leak for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. We add a new struct 'amd_geode_priv' to record pointer of the pci_dev and membase, and then add missing pci_dev_put() for the normal and error path. Fixes: ef5d862734b8 ("[PATCH] Add Geode HW RNG driver") Signed-off-by: Xiongfeng Wang Signed-off-by: Herbert Xu --- drivers/char/hw_random/geode-rng.c | 36 +++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index 138ce434f86b..12fbe8091831 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, pci_tbl); +struct amd_geode_priv { + struct pci_dev *pcidev; + void __iomem *membase; +}; static int geode_rng_data_read(struct hwrng *rng, u32 *data) { @@ -90,6 +94,7 @@ static int __init geode_rng_init(void) const struct pci_device_id *ent; void __iomem *mem; unsigned long rng_base; + struct amd_geode_priv *priv; for_each_pci_dev(pdev) { ent = pci_match_id(pci_tbl, pdev); @@ -97,17 +102,26 @@ static int __init geode_rng_init(void) goto found; } /* Device not found. */ - goto out; + return err; found: + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + err = -ENOMEM; + goto put_dev; + } + rng_base = pci_resource_start(pdev, 0); if (rng_base == 0) - goto out; + goto free_priv; err = -ENOMEM; mem = ioremap(rng_base, 0x58); if (!mem) - goto out; - geode_rng.priv = (unsigned long)mem; + goto free_priv; + + geode_rng.priv = (unsigned long)priv; + priv->membase = mem; + priv->pcidev = pdev; pr_info("AMD Geode RNG detected\n"); err = hwrng_register(&geode_rng); @@ -116,20 +130,26 @@ found: err); goto err_unmap; } -out: return err; err_unmap: iounmap(mem); - goto out; +free_priv: + kfree(priv); +put_dev: + pci_dev_put(pdev); + return err; } static void __exit geode_rng_exit(void) { - void __iomem *mem = (void __iomem *)geode_rng.priv; + struct amd_geode_priv *priv; + priv = (struct amd_geode_priv *)geode_rng.priv; hwrng_unregister(&geode_rng); - iounmap(mem); + iounmap(priv->membase); + pci_dev_put(priv->pcidev); + kfree(priv); } module_init(geode_rng_init); From 6c013679eb5c7e0b09cbcb64276f6dd97b473d12 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 3 Dec 2022 10:15:15 +0100 Subject: [PATCH 3585/4122] dt-bindings: crypto: Let STM32 define Ux500 CRYP This adds device tree bindings for the Ux500 CRYP block as a compatible in the STM32 CRYP bindings. The Ux500 CRYP binding has been used for ages in the kernel device tree for Ux500 but was never documented, so fill in the gap by making it a sibling of the STM32 CRYP block, which is what it is. The relationship to the existing STM32 CRYP block is pretty obvious when looking at the register map, and I have written patches to reuse the STM32 CRYP driver on the Ux500. The two properties added are DMA channels and power domain. Power domains are a generic SoC feature and the STM32 variant also has DMA channels. Cc: devicetree@vger.kernel.org Cc: Rob Herring Cc: Krzysztof Kozlowski Cc: Lionel Debieve Cc: Maxime Coquelin Cc: Alexandre Torgue Acked-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- .../bindings/crypto/st,stm32-cryp.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml index ed23bf94a8e0..6759c5bf3e57 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml @@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics STM32 CRYP bindings +description: The STM32 CRYP block is built on the CRYP block found in + the STn8820 SoC introduced in 2007, and subsequently used in the U8500 + SoC in 2010. + maintainers: - Lionel Debieve properties: compatible: enum: + - st,stn8820-cryp + - stericsson,ux500-cryp - st,stm32f756-cryp - st,stm32mp1-cryp @@ -27,6 +33,19 @@ properties: resets: maxItems: 1 + dmas: + items: + - description: mem2cryp DMA channel + - description: cryp2mem DMA channel + + dma-names: + items: + - const: mem2cryp + - const: cryp2mem + + power-domains: + maxItems: 1 + required: - compatible - reg From fe867538c1620738bda5328a14179a3c2bc95ab1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 3 Dec 2022 10:15:16 +0100 Subject: [PATCH 3586/4122] crypto: stm32 - enable drivers to be used on Ux500 The Ux500 cryp and hash drivers are older versions of the hardware managed by the stm32 driver. Instead of trying to improve the Ux500 cryp and hash drivers, start to switch over to the modern and more well-maintained STM32 drivers. Cc: Maxime Coquelin Cc: Alexandre Torgue Acked-by: Lionel Debieve Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/Makefile | 2 +- drivers/crypto/stm32/Kconfig | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 116de173a66c..fa8bf1be1a8c 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ -obj-$(CONFIG_ARCH_STM32) += stm32/ +obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig index 4a4c3284ae1f..4fc581e9e595 100644 --- a/drivers/crypto/stm32/Kconfig +++ b/drivers/crypto/stm32/Kconfig @@ -10,7 +10,7 @@ config CRYPTO_DEV_STM32_CRC config CRYPTO_DEV_STM32_HASH tristate "Support for STM32 hash accelerators" - depends on ARCH_STM32 + depends on ARCH_STM32 || ARCH_U8500 depends on HAS_DMA select CRYPTO_HASH select CRYPTO_MD5 @@ -23,7 +23,7 @@ config CRYPTO_DEV_STM32_HASH config CRYPTO_DEV_STM32_CRYP tristate "Support for STM32 cryp accelerators" - depends on ARCH_STM32 + depends on ARCH_STM32 || ARCH_U8500 select CRYPTO_HASH select CRYPTO_ENGINE select CRYPTO_LIB_DES From 0b496efbd2d00f658dbf906882d935e7fa3dfd03 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 3 Dec 2022 10:15:17 +0100 Subject: [PATCH 3587/4122] crypto: stm32/cryp - enable for use with Ux500 This adds a few small quirks to handle the differences between the STM32 and Ux500 cryp blocks. The following differences are handled with special bool switch bits in the capabilities: - The main difference is that some registers are removed, so we add register offsets for all registers in the per-variant data. Then we assign the right offsets for Ux500 vs the STM32 variants. - The Ux500 does not support the aeads algorithms; gcm(aes) and ccm(aes). Avoid registering them when running on Ux500. - The Ux500 has a special "linear" key format and does some elaborare bit swizzling of the key bits before writing them into the key registers. This is written as an "application note" inside the DB8500 design specification, and seems to be the result of some mishap when assigning the data lines to register bits. (STM32 has clearly fixed this.) - The Ux500 does not have the KP "key prepare" bit in the CR register. Instead, we need to set the KSE bit, "key schedule encryption" bit which does the same thing but is in bit 11 rather than being a special "algorithm type" as on STM32. The algorithm must however be specified as AES ECB while doing this. - The Ux500 cannot just read out IV registers, we need to set the KEYRDEN "key read enable" bit, as this protects not just the key but also the IV from being read out. Enable this bit before reading out the IV and disable it afterwards. Cc: Maxime Coquelin Cc: Alexandre Torgue Acked by: Lionel Debieve Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/stm32/stm32-cryp.c | 415 +++++++++++++++++++++++------- 1 file changed, 323 insertions(+), 92 deletions(-) diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 59638dfce573..4208338e72b6 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -2,6 +2,7 @@ /* * Copyright (C) STMicroelectronics SA 2017 * Author: Fabien Dessenne + * Ux500 support taken from snippets in the old Ux500 cryp driver */ #include @@ -62,6 +63,29 @@ #define CRYP_CSGCMCCM0R 0x00000050 #define CRYP_CSGCM0R 0x00000070 +#define UX500_CRYP_CR 0x00000000 +#define UX500_CRYP_SR 0x00000004 +#define UX500_CRYP_DIN 0x00000008 +#define UX500_CRYP_DINSIZE 0x0000000C +#define UX500_CRYP_DOUT 0x00000010 +#define UX500_CRYP_DOUSIZE 0x00000014 +#define UX500_CRYP_DMACR 0x00000018 +#define UX500_CRYP_IMSC 0x0000001C +#define UX500_CRYP_RIS 0x00000020 +#define UX500_CRYP_MIS 0x00000024 +#define UX500_CRYP_K1L 0x00000028 +#define UX500_CRYP_K1R 0x0000002C +#define UX500_CRYP_K2L 0x00000030 +#define UX500_CRYP_K2R 0x00000034 +#define UX500_CRYP_K3L 0x00000038 +#define UX500_CRYP_K3R 0x0000003C +#define UX500_CRYP_K4L 0x00000040 +#define UX500_CRYP_K4R 0x00000044 +#define UX500_CRYP_IV0L 0x00000048 +#define UX500_CRYP_IV0R 0x0000004C +#define UX500_CRYP_IV1L 0x00000050 +#define UX500_CRYP_IV1R 0x00000054 + /* Registers values */ #define CR_DEC_NOT_ENC 0x00000004 #define CR_TDES_ECB 0x00000000 @@ -71,7 +95,8 @@ #define CR_AES_ECB 0x00000020 #define CR_AES_CBC 0x00000028 #define CR_AES_CTR 0x00000030 -#define CR_AES_KP 0x00000038 +#define CR_AES_KP 0x00000038 /* Not on Ux500 */ +#define CR_AES_XTS 0x00000038 /* Only on Ux500 */ #define CR_AES_GCM 0x00080000 #define CR_AES_CCM 0x00080008 #define CR_AES_UNKNOWN 0xFFFFFFFF @@ -83,6 +108,8 @@ #define CR_KEY128 0x00000000 #define CR_KEY192 0x00000100 #define CR_KEY256 0x00000200 +#define CR_KEYRDEN 0x00000400 /* Only on Ux500 */ +#define CR_KSE 0x00000800 /* Only on Ux500 */ #define CR_FFLUSH 0x00004000 #define CR_CRYPEN 0x00008000 #define CR_PH_INIT 0x00000000 @@ -107,8 +134,25 @@ #define CRYP_AUTOSUSPEND_DELAY 50 struct stm32_cryp_caps { - bool swap_final; - bool padding_wa; + bool aeads_support; + bool linear_aes_key; + bool kp_mode; + bool iv_protection; + bool swap_final; + bool padding_wa; + u32 cr; + u32 sr; + u32 din; + u32 dout; + u32 imsc; + u32 mis; + u32 k1l; + u32 k1r; + u32 k3r; + u32 iv0l; + u32 iv0r; + u32 iv1l; + u32 iv1r; }; struct stm32_cryp_ctx { @@ -228,20 +272,21 @@ static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status, !(status & SR_BUSY), 10, 100000); } static inline void stm32_cryp_enable(struct stm32_cryp *cryp) { - writel_relaxed(readl_relaxed(cryp->regs + CRYP_CR) | CR_CRYPEN, cryp->regs + CRYP_CR); + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_CRYPEN, + cryp->regs + cryp->caps->cr); } static inline int stm32_cryp_wait_enable(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_CR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->cr, status, !(status & CR_CRYPEN), 10, 100000); } @@ -249,10 +294,22 @@ static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status, status & SR_OFNE, 10, 100000); } +static inline void stm32_cryp_key_read_enable(struct stm32_cryp *cryp) +{ + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_KEYRDEN, + cryp->regs + cryp->caps->cr); +} + +static inline void stm32_cryp_key_read_disable(struct stm32_cryp *cryp) +{ + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) & ~CR_KEYRDEN, + cryp->regs + cryp->caps->cr); +} + static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp); static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err); @@ -281,12 +338,12 @@ static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, __be32 *iv) if (!iv) return; - stm32_cryp_write(cryp, CRYP_IV0LR, be32_to_cpu(*iv++)); - stm32_cryp_write(cryp, CRYP_IV0RR, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv0l, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv0r, be32_to_cpu(*iv++)); if (is_aes(cryp)) { - stm32_cryp_write(cryp, CRYP_IV1LR, be32_to_cpu(*iv++)); - stm32_cryp_write(cryp, CRYP_IV1RR, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv1l, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv1r, be32_to_cpu(*iv++)); } } @@ -298,12 +355,102 @@ static void stm32_cryp_get_iv(struct stm32_cryp *cryp) if (!tmp) return; - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR)); - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR)); + if (cryp->caps->iv_protection) + stm32_cryp_key_read_enable(cryp); + + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r)); if (is_aes(cryp)) { - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR)); - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r)); + } + + if (cryp->caps->iv_protection) + stm32_cryp_key_read_disable(cryp); +} + +/** + * ux500_swap_bits_in_byte() - mirror the bits in a byte + * @b: the byte to be mirrored + * + * The bits are swapped the following way: + * Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and + * nibble 2 (n2) bits 4-7. + * + * Nibble 1 (n1): + * (The "old" (moved) bit is replaced with a zero) + * 1. Move bit 6 and 7, 4 positions to the left. + * 2. Move bit 3 and 5, 2 positions to the left. + * 3. Move bit 1-4, 1 position to the left. + * + * Nibble 2 (n2): + * 1. Move bit 0 and 1, 4 positions to the right. + * 2. Move bit 2 and 4, 2 positions to the right. + * 3. Move bit 3-6, 1 position to the right. + * + * Combine the two nibbles to a complete and swapped byte. + */ +static inline u8 ux500_swap_bits_in_byte(u8 b) +{ +#define R_SHIFT_4_MASK 0xc0 /* Bits 6 and 7, right shift 4 */ +#define R_SHIFT_2_MASK 0x28 /* (After right shift 4) Bits 3 and 5, + right shift 2 */ +#define R_SHIFT_1_MASK 0x1e /* (After right shift 2) Bits 1-4, + right shift 1 */ +#define L_SHIFT_4_MASK 0x03 /* Bits 0 and 1, left shift 4 */ +#define L_SHIFT_2_MASK 0x14 /* (After left shift 4) Bits 2 and 4, + left shift 2 */ +#define L_SHIFT_1_MASK 0x78 /* (After left shift 1) Bits 3-6, + left shift 1 */ + + u8 n1; + u8 n2; + + /* Swap most significant nibble */ + /* Right shift 4, bits 6 and 7 */ + n1 = ((b & R_SHIFT_4_MASK) >> 4) | (b & ~(R_SHIFT_4_MASK >> 4)); + /* Right shift 2, bits 3 and 5 */ + n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2)); + /* Right shift 1, bits 1-4 */ + n1 = (n1 & R_SHIFT_1_MASK) >> 1; + + /* Swap least significant nibble */ + /* Left shift 4, bits 0 and 1 */ + n2 = ((b & L_SHIFT_4_MASK) << 4) | (b & ~(L_SHIFT_4_MASK << 4)); + /* Left shift 2, bits 2 and 4 */ + n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2)); + /* Left shift 1, bits 3-6 */ + n2 = (n2 & L_SHIFT_1_MASK) << 1; + + return n1 | n2; +} + +/** + * ux500_swizzle_key() - Shuffle around words and bits in the AES key + * @in: key to swizzle + * @out: swizzled key + * @len: length of key, in bytes + * + * This "key swizzling procedure" is described in the examples in the + * DB8500 design specification. There is no real description of why + * the bits have been arranged like this in the hardware. + */ +static inline void ux500_swizzle_key(const u8 *in, u8 *out, u32 len) +{ + int i = 0; + int bpw = sizeof(u32); + int j; + int index = 0; + + j = len - bpw; + while (j >= 0) { + for (i = 0; i < bpw; i++) { + index = len - j - bpw + i; + out[j + i] = + ux500_swap_bits_in_byte(in[index]); + } + j -= bpw; } } @@ -313,14 +460,33 @@ static void stm32_cryp_hw_write_key(struct stm32_cryp *c) int r_id; if (is_des(c)) { - stm32_cryp_write(c, CRYP_K1LR, be32_to_cpu(c->ctx->key[0])); - stm32_cryp_write(c, CRYP_K1RR, be32_to_cpu(c->ctx->key[1])); - } else { - r_id = CRYP_K3RR; - for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4) - stm32_cryp_write(c, r_id, - be32_to_cpu(c->ctx->key[i - 1])); + stm32_cryp_write(c, c->caps->k1l, be32_to_cpu(c->ctx->key[0])); + stm32_cryp_write(c, c->caps->k1r, be32_to_cpu(c->ctx->key[1])); + return; } + + /* + * On the Ux500 the AES key is considered as a single bit sequence + * of 128, 192 or 256 bits length. It is written linearly into the + * registers from K1L and down, and need to be processed to become + * a proper big-endian bit sequence. + */ + if (is_aes(c) && c->caps->linear_aes_key) { + u32 tmpkey[8]; + + ux500_swizzle_key((u8 *)c->ctx->key, + (u8 *)tmpkey, c->ctx->keylen); + + r_id = c->caps->k1l; + for (i = 0; i < c->ctx->keylen / sizeof(u32); i++, r_id += 4) + stm32_cryp_write(c, r_id, tmpkey[i]); + + return; + } + + r_id = c->caps->k3r; + for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4) + stm32_cryp_write(c, r_id, be32_to_cpu(c->ctx->key[i - 1])); } static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp) @@ -373,7 +539,7 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg) cryp->gcm_ctr = GCM_CTR_INIT; stm32_cryp_hw_write_iv(cryp, iv); - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN); + stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN); /* Wait for end of processing */ ret = stm32_cryp_wait_enable(cryp); @@ -385,10 +551,10 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg) /* Prepare next phase */ if (cryp->areq->assoclen) { cfg |= CR_PH_HEADER; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else if (stm32_cryp_get_input_text_len(cryp)) { cfg |= CR_PH_PAYLOAD; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } return 0; @@ -405,20 +571,20 @@ static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp) err = stm32_cryp_wait_busy(cryp); if (err) { dev_err(cryp->dev, "Timeout (gcm/ccm header)\n"); - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); stm32_cryp_finish_req(cryp, err); return; } if (stm32_cryp_get_input_text_len(cryp)) { /* Phase 3 : payload */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); cfg &= ~CR_PH_MASK; cfg |= CR_PH_PAYLOAD | CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else { /* * Phase 4 : tag. @@ -458,7 +624,7 @@ static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->header_in -= written; @@ -494,7 +660,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) b0[AES_BLOCK_SIZE - 1] = textlen & 0xFF; /* Enable HW */ - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN); + stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN); /* Write B0 */ d = (u32 *)b0; @@ -505,7 +671,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) if (!cryp->caps->padding_wa) xd = be32_to_cpu(bd[i]); - stm32_cryp_write(cryp, CRYP_DIN, xd); + stm32_cryp_write(cryp, cryp->caps->din, xd); } /* Wait for end of processing */ @@ -518,13 +684,13 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) /* Prepare next phase */ if (cryp->areq->assoclen) { cfg |= CR_PH_HEADER | CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* Write first (special) block (may move to next phase [payload]) */ stm32_cryp_write_ccm_first_header(cryp); } else if (stm32_cryp_get_input_text_len(cryp)) { cfg |= CR_PH_PAYLOAD; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } return 0; @@ -538,7 +704,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) pm_runtime_get_sync(cryp->dev); /* Disable interrupt */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); /* Set configuration */ cfg = CR_DATA8 | CR_FFLUSH; @@ -566,7 +732,12 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) if (is_decrypt(cryp) && ((hw_mode == CR_AES_ECB) || (hw_mode == CR_AES_CBC))) { /* Configure in key preparation mode */ - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_AES_KP); + if (cryp->caps->kp_mode) + stm32_cryp_write(cryp, cryp->caps->cr, + cfg | CR_AES_KP); + else + stm32_cryp_write(cryp, + cryp->caps->cr, cfg | CR_AES_ECB | CR_KSE); /* Set key only after full configuration done */ stm32_cryp_hw_write_key(cryp); @@ -583,14 +754,14 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) cfg |= hw_mode | CR_DEC_NOT_ENC; /* Apply updated config (Decrypt + algo) and flush */ - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else { cfg |= hw_mode; if (is_decrypt(cryp)) cfg |= CR_DEC_NOT_ENC; /* Apply config and flush */ - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* Set key only after configuration done */ stm32_cryp_hw_write_key(cryp); @@ -649,7 +820,7 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) static int stm32_cryp_cpu_start(struct stm32_cryp *cryp) { /* Enable interrupt and let the IRQ handler do everything */ - stm32_cryp_write(cryp, CRYP_IMSCR, IMSCR_IN | IMSCR_OUT); + stm32_cryp_write(cryp, cryp->caps->imsc, IMSCR_IN | IMSCR_OUT); return 0; } @@ -1137,14 +1308,14 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) int ret = 0; /* Update Config */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_PH_MASK; cfg |= CR_PH_FINAL; cfg &= ~CR_DEC_NOT_ENC; cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); if (is_gcm(cryp)) { /* GCM: write aad and payload size (in bits) */ @@ -1152,8 +1323,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (cryp->caps->swap_final) size_bit = (__force u32)cpu_to_be32(size_bit); - stm32_cryp_write(cryp, CRYP_DIN, 0); - stm32_cryp_write(cryp, CRYP_DIN, size_bit); + stm32_cryp_write(cryp, cryp->caps->din, 0); + stm32_cryp_write(cryp, cryp->caps->din, size_bit); size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen : cryp->areq->cryptlen - cryp->authsize; @@ -1161,8 +1332,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (cryp->caps->swap_final) size_bit = (__force u32)cpu_to_be32(size_bit); - stm32_cryp_write(cryp, CRYP_DIN, 0); - stm32_cryp_write(cryp, CRYP_DIN, size_bit); + stm32_cryp_write(cryp, cryp->caps->din, 0); + stm32_cryp_write(cryp, cryp->caps->din, size_bit); } else { /* CCM: write CTR0 */ u32 iv32[AES_BLOCK_32]; @@ -1177,7 +1348,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (!cryp->caps->padding_wa) xiv = be32_to_cpu(biv[i]); - stm32_cryp_write(cryp, CRYP_DIN, xiv); + stm32_cryp_write(cryp, cryp->caps->din, xiv); } } @@ -1193,7 +1364,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) /* Get and write tag */ for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); + out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); } else { @@ -1203,7 +1374,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); + out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); if (crypto_memneq(in_tag, out_tag, cryp->authsize)) ret = -EBADMSG; @@ -1211,7 +1382,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) /* Disable cryp */ cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); return ret; } @@ -1227,19 +1398,19 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp) */ crypto_inc((u8 *)cryp->last_ctr, sizeof(cryp->last_ctr)); - cr = stm32_cryp_read(cryp, CRYP_CR); - stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN); + cr = stm32_cryp_read(cryp, cryp->caps->cr); + stm32_cryp_write(cryp, cryp->caps->cr, cr & ~CR_CRYPEN); stm32_cryp_hw_write_iv(cryp, cryp->last_ctr); - stm32_cryp_write(cryp, CRYP_CR, cr); + stm32_cryp_write(cryp, cryp->caps->cr, cr); } /* The IV registers are BE */ - cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR)); - cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR)); - cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR)); - cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR)); + cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l)); + cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r)); + cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l)); + cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r)); } static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) @@ -1248,7 +1419,7 @@ static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) u32 block[AES_BLOCK_32]; for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1264,7 +1435,7 @@ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_in), 0); for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); } @@ -1278,22 +1449,22 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) /* 'Special workaround' procedure described in the datasheet */ /* a) disable ip */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); - cfg = stm32_cryp_read(cryp, CRYP_CR); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) Update IV1R */ - stm32_cryp_write(cryp, CRYP_IV1RR, cryp->gcm_ctr - 2); + stm32_cryp_write(cryp, cryp->caps->iv1r, cryp->gcm_ctr - 2); /* c) change mode to CTR */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CTR; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* a) enable IP */ cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); @@ -1310,7 +1481,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) * block value */ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1320,16 +1491,16 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) /* d) change mode back to AES GCM */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_GCM; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* e) change phase to Final */ cfg &= ~CR_PH_MASK; cfg |= CR_PH_FINAL; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* f) write padded data */ for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); /* g) Empty fifo out */ err = stm32_cryp_wait_output(cryp); @@ -1339,7 +1510,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) } for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_read(cryp, CRYP_DOUT); + stm32_cryp_read(cryp, cryp->caps->dout); /* h) run the he normal Final phase */ stm32_cryp_finish_req(cryp, 0); @@ -1350,13 +1521,13 @@ static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp) u32 cfg; /* disable ip, set NPBLB and reneable ip */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); cfg |= (cryp->hw_blocksize - cryp->payload_in) << CR_NBPBL_SHIFT; cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) @@ -1370,11 +1541,11 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* 'Special workaround' procedure described in the datasheet */ /* a) disable ip */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) get IV1 from CRYP_CSGCMCCM7 */ iv1tmp = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + 7 * 4); @@ -1384,16 +1555,16 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) cstmp1[i] = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + i * 4); /* d) Write IV1R */ - stm32_cryp_write(cryp, CRYP_IV1RR, iv1tmp); + stm32_cryp_write(cryp, cryp->caps->iv1r, iv1tmp); /* e) change mode to CTR */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CTR; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* a) enable IP */ cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); @@ -1410,7 +1581,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) * block value */ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1423,18 +1594,18 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* e) change mode back to AES CCM */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CCM; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* f) change phase to header */ cfg &= ~CR_PH_MASK; cfg |= CR_PH_HEADER; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* g) XOR and write padded data */ for (i = 0; i < ARRAY_SIZE(block); i++) { block[i] ^= cstmp1[i]; block[i] ^= cstmp2[i]; - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); } /* h) wait for completion */ @@ -1497,7 +1668,7 @@ static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) scatterwalk_copychunks(block, &cryp->in_walk, written, 0); for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->header_in -= written; @@ -1508,7 +1679,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) { struct stm32_cryp *cryp = arg; u32 ph; - u32 it_mask = stm32_cryp_read(cryp, CRYP_IMSCR); + u32 it_mask = stm32_cryp_read(cryp, cryp->caps->imsc); if (cryp->irq_status & MISR_OUT) /* Output FIFO IRQ: read data */ @@ -1516,7 +1687,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) if (cryp->irq_status & MISR_IN) { if (is_gcm(cryp) || is_ccm(cryp)) { - ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK; + ph = stm32_cryp_read(cryp, cryp->caps->cr) & CR_PH_MASK; if (unlikely(ph == CR_PH_HEADER)) /* Write Header */ stm32_cryp_irq_write_gcmccm_header(cryp); @@ -1536,7 +1707,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) it_mask &= ~IMSCR_IN; if (!cryp->payload_out) it_mask &= ~IMSCR_OUT; - stm32_cryp_write(cryp, CRYP_IMSCR, it_mask); + stm32_cryp_write(cryp, cryp->caps->imsc, it_mask); if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out) stm32_cryp_finish_req(cryp, 0); @@ -1548,7 +1719,7 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg) { struct stm32_cryp *cryp = arg; - cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR); + cryp->irq_status = stm32_cryp_read(cryp, cryp->caps->mis); return IRQ_WAKE_THREAD; } @@ -1722,17 +1893,74 @@ static struct aead_alg aead_algs[] = { }, }; -static const struct stm32_cryp_caps f7_data = { +static const struct stm32_cryp_caps ux500_data = { + .aeads_support = false, + .linear_aes_key = true, + .kp_mode = false, + .iv_protection = true, .swap_final = true, .padding_wa = true, + .cr = UX500_CRYP_CR, + .sr = UX500_CRYP_SR, + .din = UX500_CRYP_DIN, + .dout = UX500_CRYP_DOUT, + .imsc = UX500_CRYP_IMSC, + .mis = UX500_CRYP_MIS, + .k1l = UX500_CRYP_K1L, + .k1r = UX500_CRYP_K1R, + .k3r = UX500_CRYP_K3R, + .iv0l = UX500_CRYP_IV0L, + .iv0r = UX500_CRYP_IV0R, + .iv1l = UX500_CRYP_IV1L, + .iv1r = UX500_CRYP_IV1R, +}; + +static const struct stm32_cryp_caps f7_data = { + .aeads_support = true, + .linear_aes_key = false, + .kp_mode = true, + .iv_protection = false, + .swap_final = true, + .padding_wa = true, + .cr = CRYP_CR, + .sr = CRYP_SR, + .din = CRYP_DIN, + .dout = CRYP_DOUT, + .imsc = CRYP_IMSCR, + .mis = CRYP_MISR, + .k1l = CRYP_K1LR, + .k1r = CRYP_K1RR, + .k3r = CRYP_K3RR, + .iv0l = CRYP_IV0LR, + .iv0r = CRYP_IV0RR, + .iv1l = CRYP_IV1LR, + .iv1r = CRYP_IV1RR, }; static const struct stm32_cryp_caps mp1_data = { + .aeads_support = true, + .linear_aes_key = false, + .kp_mode = true, + .iv_protection = false, .swap_final = false, .padding_wa = false, + .cr = CRYP_CR, + .sr = CRYP_SR, + .din = CRYP_DIN, + .dout = CRYP_DOUT, + .imsc = CRYP_IMSCR, + .mis = CRYP_MISR, + .k1l = CRYP_K1LR, + .k1r = CRYP_K1RR, + .k3r = CRYP_K3RR, + .iv0l = CRYP_IV0LR, + .iv0r = CRYP_IV0RR, + .iv1l = CRYP_IV1LR, + .iv1r = CRYP_IV1RR, }; static const struct of_device_id stm32_dt_ids[] = { + { .compatible = "stericsson,ux500-cryp", .data = &ux500_data}, { .compatible = "st,stm32f756-cryp", .data = &f7_data}, { .compatible = "st,stm32mp1-cryp", .data = &mp1_data}, {}, @@ -1829,9 +2057,11 @@ static int stm32_cryp_probe(struct platform_device *pdev) goto err_algs; } - ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); - if (ret) - goto err_aead_algs; + if (cryp->caps->aeads_support) { + ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); + if (ret) + goto err_aead_algs; + } dev_info(dev, "Initialized\n"); @@ -1869,7 +2099,8 @@ static int stm32_cryp_remove(struct platform_device *pdev) if (ret < 0) return ret; - crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs)); + if (cryp->caps->aeads_support) + crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs)); crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs)); crypto_engine_exit(cryp->engine); From 453de3eb08c4b7e31b3019a4b0cc3ebce51a6219 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 3 Dec 2022 10:15:18 +0100 Subject: [PATCH 3588/4122] crypto: ux500/cryp - delete driver It turns out we can just modify the newer STM32 CRYP driver to be used with Ux500 and now that we have done that, delete the old and sparsely maintained Ux500 CRYP driver. Cc: Lionel Debieve Cc: Maxime Coquelin Cc: Alexandre Torgue Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ux500/Kconfig | 10 - drivers/crypto/ux500/Makefile | 1 - drivers/crypto/ux500/cryp/Makefile | 10 - drivers/crypto/ux500/cryp/cryp.c | 394 ------ drivers/crypto/ux500/cryp/cryp.h | 315 ----- drivers/crypto/ux500/cryp/cryp_core.c | 1600 ------------------------- drivers/crypto/ux500/cryp/cryp_irq.c | 45 - drivers/crypto/ux500/cryp/cryp_irq.h | 31 - drivers/crypto/ux500/cryp/cryp_irqp.h | 125 -- drivers/crypto/ux500/cryp/cryp_p.h | 122 -- 10 files changed, 2653 deletions(-) delete mode 100644 drivers/crypto/ux500/cryp/Makefile delete mode 100644 drivers/crypto/ux500/cryp/cryp.c delete mode 100644 drivers/crypto/ux500/cryp/cryp.h delete mode 100644 drivers/crypto/ux500/cryp/cryp_core.c delete mode 100644 drivers/crypto/ux500/cryp/cryp_irq.c delete mode 100644 drivers/crypto/ux500/cryp/cryp_irq.h delete mode 100644 drivers/crypto/ux500/cryp/cryp_irqp.h delete mode 100644 drivers/crypto/ux500/cryp/cryp_p.h diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig index f56d65c56ccf..dcbd7404768f 100644 --- a/drivers/crypto/ux500/Kconfig +++ b/drivers/crypto/ux500/Kconfig @@ -4,16 +4,6 @@ # Author: Shujuan Chen (shujuan.chen@stericsson.com) # -config CRYPTO_DEV_UX500_CRYP - tristate "UX500 crypto driver for CRYP block" - depends on CRYPTO_DEV_UX500 - select CRYPTO_ALGAPI - select CRYPTO_SKCIPHER - select CRYPTO_LIB_DES - help - This selects the crypto driver for the UX500_CRYP hardware. It supports - AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes. - config CRYPTO_DEV_UX500_HASH tristate "UX500 crypto driver for HASH block" depends on CRYPTO_DEV_UX500 diff --git a/drivers/crypto/ux500/Makefile b/drivers/crypto/ux500/Makefile index f014eb01710a..f1aa4edf66f4 100644 --- a/drivers/crypto/ux500/Makefile +++ b/drivers/crypto/ux500/Makefile @@ -5,4 +5,3 @@ # obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += hash/ -obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += cryp/ diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile deleted file mode 100644 index 3e67531f484c..000000000000 --- a/drivers/crypto/ux500/cryp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -#/* -# * Copyright (C) ST-Ericsson SA 2010 -# * Author: shujuan.chen@stericsson.com for ST-Ericsson. -# */ - -ccflags-$(CONFIG_CRYPTO_DEV_UX500_DEBUG) += -DDEBUG - -obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o -ux500_cryp-objs := cryp.o cryp_irq.o cryp_core.o diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c deleted file mode 100644 index 759d0d9786fd..000000000000 --- a/drivers/crypto/ux500/cryp/cryp.c +++ /dev/null @@ -1,394 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - */ - -#include -#include -#include - -#include "cryp_p.h" -#include "cryp.h" - -/* - * cryp_wait_until_done - wait until the device logic is not busy - */ -void cryp_wait_until_done(struct cryp_device_data *device_data) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); -} - -/** - * cryp_check - This routine checks Peripheral and PCell Id - * @device_data: Pointer to the device data struct for base address. - */ -int cryp_check(struct cryp_device_data *device_data) -{ - int peripheralid2 = 0; - - if (NULL == device_data) - return -EINVAL; - - peripheralid2 = readl_relaxed(&device_data->base->periphId2); - - if (peripheralid2 != CRYP_PERIPHERAL_ID2_DB8500) - return -EPERM; - - /* Check Peripheral and Pcell Id Register for CRYP */ - if ((CRYP_PERIPHERAL_ID0 == - readl_relaxed(&device_data->base->periphId0)) - && (CRYP_PERIPHERAL_ID1 == - readl_relaxed(&device_data->base->periphId1)) - && (CRYP_PERIPHERAL_ID3 == - readl_relaxed(&device_data->base->periphId3)) - && (CRYP_PCELL_ID0 == - readl_relaxed(&device_data->base->pcellId0)) - && (CRYP_PCELL_ID1 == - readl_relaxed(&device_data->base->pcellId1)) - && (CRYP_PCELL_ID2 == - readl_relaxed(&device_data->base->pcellId2)) - && (CRYP_PCELL_ID3 == - readl_relaxed(&device_data->base->pcellId3))) { - return 0; - } - - return -EPERM; -} - -/** - * cryp_activity - This routine enables/disable the cryptography function. - * @device_data: Pointer to the device data struct for base address. - * @cryp_crypen: Enable/Disable functionality - */ -void cryp_activity(struct cryp_device_data *device_data, - enum cryp_crypen cryp_crypen) -{ - CRYP_PUT_BITS(&device_data->base->cr, - cryp_crypen, - CRYP_CR_CRYPEN_POS, - CRYP_CR_CRYPEN_MASK); -} - -/** - * cryp_flush_inoutfifo - Resets both the input and the output FIFOs - * @device_data: Pointer to the device data struct for base address. - */ -void cryp_flush_inoutfifo(struct cryp_device_data *device_data) -{ - /* - * We always need to disable the hardware before trying to flush the - * FIFO. This is something that isn't written in the design - * specification, but we have been informed by the hardware designers - * that this must be done. - */ - cryp_activity(device_data, CRYP_CRYPEN_DISABLE); - cryp_wait_until_done(device_data); - - CRYP_SET_BITS(&device_data->base->cr, CRYP_CR_FFLUSH_MASK); - /* - * CRYP_SR_INFIFO_READY_MASK is the expected value on the status - * register when starting a new calculation, which means Input FIFO is - * not full and input FIFO is empty. - */ - while (readl_relaxed(&device_data->base->sr) != - CRYP_SR_INFIFO_READY_MASK) - cpu_relax(); -} - -/** - * cryp_set_configuration - This routine set the cr CRYP IP - * @device_data: Pointer to the device data struct for base address. - * @cryp_config: Pointer to the configuration parameter - * @control_register: The control register to be written later on. - */ -int cryp_set_configuration(struct cryp_device_data *device_data, - struct cryp_config *cryp_config, - u32 *control_register) -{ - u32 cr_for_kse; - - if (NULL == device_data || NULL == cryp_config) - return -EINVAL; - - *control_register |= (cryp_config->keysize << CRYP_CR_KEYSIZE_POS); - - /* Prepare key for decryption in AES_ECB and AES_CBC mode. */ - if ((CRYP_ALGORITHM_DECRYPT == cryp_config->algodir) && - ((CRYP_ALGO_AES_ECB == cryp_config->algomode) || - (CRYP_ALGO_AES_CBC == cryp_config->algomode))) { - cr_for_kse = *control_register; - /* - * This seems a bit odd, but it is indeed needed to set this to - * encrypt even though it is a decryption that we are doing. It - * also mentioned in the design spec that you need to do this. - * After the keyprepartion for decrypting is done you should set - * algodir back to decryption, which is done outside this if - * statement. - * - * According to design specification we should set mode ECB - * during key preparation even though we might be running CBC - * when enter this function. - * - * Writing to KSE_ENABLED will drop CRYPEN when key preparation - * is done. Therefore we need to set CRYPEN again outside this - * if statement when running decryption. - */ - cr_for_kse |= ((CRYP_ALGORITHM_ENCRYPT << CRYP_CR_ALGODIR_POS) | - (CRYP_ALGO_AES_ECB << CRYP_CR_ALGOMODE_POS) | - (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS) | - (KSE_ENABLED << CRYP_CR_KSE_POS)); - - writel_relaxed(cr_for_kse, &device_data->base->cr); - cryp_wait_until_done(device_data); - } - - *control_register |= - ((cryp_config->algomode << CRYP_CR_ALGOMODE_POS) | - (cryp_config->algodir << CRYP_CR_ALGODIR_POS)); - - return 0; -} - -/** - * cryp_configure_protection - set the protection bits in the CRYP logic. - * @device_data: Pointer to the device data struct for base address. - * @p_protect_config: Pointer to the protection mode and - * secure mode configuration - */ -int cryp_configure_protection(struct cryp_device_data *device_data, - struct cryp_protection_config *p_protect_config) -{ - if (NULL == p_protect_config) - return -EINVAL; - - CRYP_WRITE_BIT(&device_data->base->cr, - (u32) p_protect_config->secure_access, - CRYP_CR_SECURE_MASK); - CRYP_PUT_BITS(&device_data->base->cr, - p_protect_config->privilege_access, - CRYP_CR_PRLG_POS, - CRYP_CR_PRLG_MASK); - - return 0; -} - -/** - * cryp_is_logic_busy - returns the busy status of the CRYP logic - * @device_data: Pointer to the device data struct for base address. - */ -int cryp_is_logic_busy(struct cryp_device_data *device_data) -{ - return CRYP_TEST_BITS(&device_data->base->sr, - CRYP_SR_BUSY_MASK); -} - -/** - * cryp_configure_for_dma - configures the CRYP IP for DMA operation - * @device_data: Pointer to the device data struct for base address. - * @dma_req: Specifies the DMA request type value. - */ -void cryp_configure_for_dma(struct cryp_device_data *device_data, - enum cryp_dma_req_type dma_req) -{ - CRYP_SET_BITS(&device_data->base->dmacr, - (u32) dma_req); -} - -/** - * cryp_configure_key_values - configures the key values for CRYP operations - * @device_data: Pointer to the device data struct for base address. - * @key_reg_index: Key value index register - * @key_value: The key value struct - */ -int cryp_configure_key_values(struct cryp_device_data *device_data, - enum cryp_key_reg_index key_reg_index, - struct cryp_key_value key_value) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); - - switch (key_reg_index) { - case CRYP_KEY_REG_1: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_1_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_1_r); - break; - case CRYP_KEY_REG_2: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_2_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_2_r); - break; - case CRYP_KEY_REG_3: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_3_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_3_r); - break; - case CRYP_KEY_REG_4: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_4_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_4_r); - break; - default: - return -EINVAL; - } - - return 0; -} - -/** - * cryp_configure_init_vector - configures the initialization vector register - * @device_data: Pointer to the device data struct for base address. - * @init_vector_index: Specifies the index of the init vector. - * @init_vector_value: Specifies the value for the init vector. - */ -int cryp_configure_init_vector(struct cryp_device_data *device_data, - enum cryp_init_vector_index - init_vector_index, - struct cryp_init_vector_value - init_vector_value) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); - - switch (init_vector_index) { - case CRYP_INIT_VECTOR_INDEX_0: - writel_relaxed(init_vector_value.init_value_left, - &device_data->base->init_vect_0_l); - writel_relaxed(init_vector_value.init_value_right, - &device_data->base->init_vect_0_r); - break; - case CRYP_INIT_VECTOR_INDEX_1: - writel_relaxed(init_vector_value.init_value_left, - &device_data->base->init_vect_1_l); - writel_relaxed(init_vector_value.init_value_right, - &device_data->base->init_vect_1_r); - break; - default: - return -EINVAL; - } - - return 0; -} - -/** - * cryp_save_device_context - Store hardware registers and - * other device context parameter - * @device_data: Pointer to the device data struct for base address. - * @ctx: Crypto device context - * @cryp_mode: Mode: Polling, Interrupt or DMA - */ -void cryp_save_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx, - int cryp_mode) -{ - enum cryp_algo_mode algomode; - struct cryp_register __iomem *src_reg = device_data->base; - struct cryp_config *config = - (struct cryp_config *)device_data->current_ctx; - - /* - * Always start by disable the hardware and wait for it to finish the - * ongoing calculations before trying to reprogram it. - */ - cryp_activity(device_data, CRYP_CRYPEN_DISABLE); - cryp_wait_until_done(device_data); - - if (cryp_mode == CRYP_MODE_DMA) - cryp_configure_for_dma(device_data, CRYP_DMA_DISABLE_BOTH); - - if (CRYP_TEST_BITS(&src_reg->sr, CRYP_SR_IFEM_MASK) == 0) - ctx->din = readl_relaxed(&src_reg->din); - - ctx->cr = readl_relaxed(&src_reg->cr) & CRYP_CR_CONTEXT_SAVE_MASK; - - switch (config->keysize) { - case CRYP_KEY_SIZE_256: - ctx->key_4_l = readl_relaxed(&src_reg->key_4_l); - ctx->key_4_r = readl_relaxed(&src_reg->key_4_r); - fallthrough; - - case CRYP_KEY_SIZE_192: - ctx->key_3_l = readl_relaxed(&src_reg->key_3_l); - ctx->key_3_r = readl_relaxed(&src_reg->key_3_r); - fallthrough; - - case CRYP_KEY_SIZE_128: - ctx->key_2_l = readl_relaxed(&src_reg->key_2_l); - ctx->key_2_r = readl_relaxed(&src_reg->key_2_r); - fallthrough; - - default: - ctx->key_1_l = readl_relaxed(&src_reg->key_1_l); - ctx->key_1_r = readl_relaxed(&src_reg->key_1_r); - } - - /* Save IV for CBC mode for both AES and DES. */ - algomode = ((ctx->cr & CRYP_CR_ALGOMODE_MASK) >> CRYP_CR_ALGOMODE_POS); - if (algomode == CRYP_ALGO_TDES_CBC || - algomode == CRYP_ALGO_DES_CBC || - algomode == CRYP_ALGO_AES_CBC) { - ctx->init_vect_0_l = readl_relaxed(&src_reg->init_vect_0_l); - ctx->init_vect_0_r = readl_relaxed(&src_reg->init_vect_0_r); - ctx->init_vect_1_l = readl_relaxed(&src_reg->init_vect_1_l); - ctx->init_vect_1_r = readl_relaxed(&src_reg->init_vect_1_r); - } -} - -/** - * cryp_restore_device_context - Restore hardware registers and - * other device context parameter - * @device_data: Pointer to the device data struct for base address. - * @ctx: Crypto device context - */ -void cryp_restore_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx) -{ - struct cryp_register __iomem *reg = device_data->base; - struct cryp_config *config = - (struct cryp_config *)device_data->current_ctx; - - /* - * Fall through for all items in switch statement. DES is captured in - * the default. - */ - switch (config->keysize) { - case CRYP_KEY_SIZE_256: - writel_relaxed(ctx->key_4_l, ®->key_4_l); - writel_relaxed(ctx->key_4_r, ®->key_4_r); - fallthrough; - - case CRYP_KEY_SIZE_192: - writel_relaxed(ctx->key_3_l, ®->key_3_l); - writel_relaxed(ctx->key_3_r, ®->key_3_r); - fallthrough; - - case CRYP_KEY_SIZE_128: - writel_relaxed(ctx->key_2_l, ®->key_2_l); - writel_relaxed(ctx->key_2_r, ®->key_2_r); - fallthrough; - - default: - writel_relaxed(ctx->key_1_l, ®->key_1_l); - writel_relaxed(ctx->key_1_r, ®->key_1_r); - } - - /* Restore IV for CBC mode for AES and DES. */ - if (config->algomode == CRYP_ALGO_TDES_CBC || - config->algomode == CRYP_ALGO_DES_CBC || - config->algomode == CRYP_ALGO_AES_CBC) { - writel_relaxed(ctx->init_vect_0_l, ®->init_vect_0_l); - writel_relaxed(ctx->init_vect_0_r, ®->init_vect_0_r); - writel_relaxed(ctx->init_vect_1_l, ®->init_vect_1_l); - writel_relaxed(ctx->init_vect_1_r, ®->init_vect_1_r); - } -} diff --git a/drivers/crypto/ux500/cryp/cryp.h b/drivers/crypto/ux500/cryp/cryp.h deleted file mode 100644 index 59e1557a620a..000000000000 --- a/drivers/crypto/ux500/cryp/cryp.h +++ /dev/null @@ -1,315 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_H_ -#define _CRYP_H_ - -#include -#include -#include -#include - -#define DEV_DBG_NAME "crypX crypX:" - -/* CRYP enable/disable */ -enum cryp_crypen { - CRYP_CRYPEN_DISABLE = 0, - CRYP_CRYPEN_ENABLE = 1 -}; - -/* CRYP Start Computation enable/disable */ -enum cryp_start { - CRYP_START_DISABLE = 0, - CRYP_START_ENABLE = 1 -}; - -/* CRYP Init Signal enable/disable */ -enum cryp_init { - CRYP_INIT_DISABLE = 0, - CRYP_INIT_ENABLE = 1 -}; - -/* Cryp State enable/disable */ -enum cryp_state { - CRYP_STATE_DISABLE = 0, - CRYP_STATE_ENABLE = 1 -}; - -/* Key preparation bit enable */ -enum cryp_key_prep { - KSE_DISABLED = 0, - KSE_ENABLED = 1 -}; - -/* Key size for AES */ -#define CRYP_KEY_SIZE_128 (0) -#define CRYP_KEY_SIZE_192 (1) -#define CRYP_KEY_SIZE_256 (2) - -/* AES modes */ -enum cryp_algo_mode { - CRYP_ALGO_TDES_ECB, - CRYP_ALGO_TDES_CBC, - CRYP_ALGO_DES_ECB, - CRYP_ALGO_DES_CBC, - CRYP_ALGO_AES_ECB, - CRYP_ALGO_AES_CBC, - CRYP_ALGO_AES_CTR, - CRYP_ALGO_AES_XTS -}; - -/* Cryp Encryption or Decryption */ -enum cryp_algorithm_dir { - CRYP_ALGORITHM_ENCRYPT, - CRYP_ALGORITHM_DECRYPT -}; - -/* Hardware access method */ -enum cryp_mode { - CRYP_MODE_POLLING, - CRYP_MODE_INTERRUPT, - CRYP_MODE_DMA -}; - -/** - * struct cryp_config - - * @keysize: Key size for AES - * @algomode: AES modes - * @algodir: Cryp Encryption or Decryption - * - * CRYP configuration structure to be passed to set configuration - */ -struct cryp_config { - int keysize; - enum cryp_algo_mode algomode; - enum cryp_algorithm_dir algodir; -}; - -/** - * struct cryp_protection_config - - * @privilege_access: Privileged cryp state enable/disable - * @secure_access: Secure cryp state enable/disable - * - * Protection configuration structure for setting privilage access - */ -struct cryp_protection_config { - enum cryp_state privilege_access; - enum cryp_state secure_access; -}; - -/* Cryp status */ -enum cryp_status_id { - CRYP_STATUS_BUSY = 0x10, - CRYP_STATUS_OUTPUT_FIFO_FULL = 0x08, - CRYP_STATUS_OUTPUT_FIFO_NOT_EMPTY = 0x04, - CRYP_STATUS_INPUT_FIFO_NOT_FULL = 0x02, - CRYP_STATUS_INPUT_FIFO_EMPTY = 0x01 -}; - -/* Cryp DMA interface */ -#define CRYP_DMA_TX_FIFO 0x08 -#define CRYP_DMA_RX_FIFO 0x10 - -enum cryp_dma_req_type { - CRYP_DMA_DISABLE_BOTH, - CRYP_DMA_ENABLE_IN_DATA, - CRYP_DMA_ENABLE_OUT_DATA, - CRYP_DMA_ENABLE_BOTH_DIRECTIONS -}; - -enum cryp_dma_channel { - CRYP_DMA_RX = 0, - CRYP_DMA_TX -}; - -/* Key registers */ -enum cryp_key_reg_index { - CRYP_KEY_REG_1, - CRYP_KEY_REG_2, - CRYP_KEY_REG_3, - CRYP_KEY_REG_4 -}; - -/* Key register left and right */ -struct cryp_key_value { - u32 key_value_left; - u32 key_value_right; -}; - -/* Cryp Initialization structure */ -enum cryp_init_vector_index { - CRYP_INIT_VECTOR_INDEX_0, - CRYP_INIT_VECTOR_INDEX_1 -}; - -/* struct cryp_init_vector_value - - * @init_value_left - * @init_value_right - * */ -struct cryp_init_vector_value { - u32 init_value_left; - u32 init_value_right; -}; - -/** - * struct cryp_device_context - structure for a cryp context. - * @cr: control register - * @dmacr: DMA control register - * @imsc: Interrupt mask set/clear register - * @key_1_l: Key 1l register - * @key_1_r: Key 1r register - * @key_2_l: Key 2l register - * @key_2_r: Key 2r register - * @key_3_l: Key 3l register - * @key_3_r: Key 3r register - * @key_4_l: Key 4l register - * @key_4_r: Key 4r register - * @init_vect_0_l: Initialization vector 0l register - * @init_vect_0_r: Initialization vector 0r register - * @init_vect_1_l: Initialization vector 1l register - * @init_vect_1_r: Initialization vector 0r register - * @din: Data in register - * @dout: Data out register - * - * CRYP power management specifc structure. - */ -struct cryp_device_context { - u32 cr; - u32 dmacr; - u32 imsc; - - u32 key_1_l; - u32 key_1_r; - u32 key_2_l; - u32 key_2_r; - u32 key_3_l; - u32 key_3_r; - u32 key_4_l; - u32 key_4_r; - - u32 init_vect_0_l; - u32 init_vect_0_r; - u32 init_vect_1_l; - u32 init_vect_1_r; - - u32 din; - u32 dout; -}; - -struct cryp_dma { - dma_cap_mask_t mask; - struct completion cryp_dma_complete; - struct dma_chan *chan_cryp2mem; - struct dma_chan *chan_mem2cryp; - struct stedma40_chan_cfg *cfg_cryp2mem; - struct stedma40_chan_cfg *cfg_mem2cryp; - int sg_src_len; - int sg_dst_len; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; - int nents_src; - int nents_dst; -}; - -/** - * struct cryp_device_data - structure for a cryp device. - * @base: Pointer to virtual base address of the cryp device. - * @phybase: Pointer to physical memory location of the cryp device. - * @dev: Pointer to the devices dev structure. - * @clk: Pointer to the device's clock control. - * @irq: IRQ number - * @pwr_regulator: Pointer to the device's power control. - * @power_status: Current status of the power. - * @ctx_lock: Lock for current_ctx. - * @current_ctx: Pointer to the currently allocated context. - * @list_node: For inclusion into a klist. - * @dma: The dma structure holding channel configuration. - * @power_state: TRUE = power state on, FALSE = power state off. - * @power_state_spinlock: Spinlock for power_state. - * @restore_dev_ctx: TRUE = saved ctx, FALSE = no saved ctx. - */ -struct cryp_device_data { - struct cryp_register __iomem *base; - phys_addr_t phybase; - struct device *dev; - struct clk *clk; - int irq; - struct regulator *pwr_regulator; - int power_status; - spinlock_t ctx_lock; - struct cryp_ctx *current_ctx; - struct klist_node list_node; - struct cryp_dma dma; - bool power_state; - spinlock_t power_state_spinlock; - bool restore_dev_ctx; -}; - -void cryp_wait_until_done(struct cryp_device_data *device_data); - -/* Initialization functions */ - -int cryp_check(struct cryp_device_data *device_data); - -void cryp_activity(struct cryp_device_data *device_data, - enum cryp_crypen cryp_crypen); - -void cryp_flush_inoutfifo(struct cryp_device_data *device_data); - -int cryp_set_configuration(struct cryp_device_data *device_data, - struct cryp_config *cryp_config, - u32 *control_register); - -void cryp_configure_for_dma(struct cryp_device_data *device_data, - enum cryp_dma_req_type dma_req); - -int cryp_configure_key_values(struct cryp_device_data *device_data, - enum cryp_key_reg_index key_reg_index, - struct cryp_key_value key_value); - -int cryp_configure_init_vector(struct cryp_device_data *device_data, - enum cryp_init_vector_index - init_vector_index, - struct cryp_init_vector_value - init_vector_value); - -int cryp_configure_protection(struct cryp_device_data *device_data, - struct cryp_protection_config *p_protect_config); - -/* Power management funtions */ -void cryp_save_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx, - int cryp_mode); - -void cryp_restore_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx); - -/* Data transfer and status bits. */ -int cryp_is_logic_busy(struct cryp_device_data *device_data); - -int cryp_get_status(struct cryp_device_data *device_data); - -/** - * cryp_write_indata - This routine writes 32 bit data into the data input - * register of the cryptography IP. - * @device_data: Pointer to the device data struct for base address. - * @write_data: Data to write. - */ -int cryp_write_indata(struct cryp_device_data *device_data, u32 write_data); - -/** - * cryp_read_outdata - This routine reads the data from the data output - * register of the CRYP logic - * @device_data: Pointer to the device data struct for base address. - * @read_data: Read the data from the output FIFO. - */ -int cryp_read_outdata(struct cryp_device_data *device_data, u32 *read_data); - -#endif /* _CRYP_H_ */ diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c deleted file mode 100644 index 5a57c9afd8c8..000000000000 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ /dev/null @@ -1,1600 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Andreas Westin for ST-Ericsson. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include "cryp_p.h" -#include "cryp.h" - -#define CRYP_MAX_KEY_SIZE 32 -#define BYTES_PER_WORD 4 - -static int cryp_mode; -static atomic_t session_id; - -static struct stedma40_chan_cfg *mem_to_engine; -static struct stedma40_chan_cfg *engine_to_mem; - -/** - * struct cryp_driver_data - data specific to the driver. - * - * @device_list: A list of registered devices to choose from. - * @device_allocation: A semaphore initialized with number of devices. - */ -struct cryp_driver_data { - struct klist device_list; - struct semaphore device_allocation; -}; - -/** - * struct cryp_ctx - Crypto context - * @config: Crypto mode. - * @key: Key array. - * @keylen: Length of key. - * @iv: Pointer to initialization vector. - * @indata: Pointer to indata. - * @outdata: Pointer to outdata. - * @datalen: Length of indata. - * @outlen: Length of outdata. - * @blocksize: Size of blocks. - * @updated: Updated flag. - * @dev_ctx: Device dependent context. - * @device: Pointer to the device. - * @session_id: Atomic session ID. - */ -struct cryp_ctx { - struct cryp_config config; - u8 key[CRYP_MAX_KEY_SIZE]; - u32 keylen; - u8 *iv; - const u8 *indata; - u8 *outdata; - u32 datalen; - u32 outlen; - u32 blocksize; - u8 updated; - struct cryp_device_context dev_ctx; - struct cryp_device_data *device; - u32 session_id; -}; - -static struct cryp_driver_data driver_data; - -/** - * swap_bits_in_byte - mirror the bits in a byte - * @b: the byte to be mirrored - * - * The bits are swapped the following way: - * Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and - * nibble 2 (n2) bits 4-7. - * - * Nibble 1 (n1): - * (The "old" (moved) bit is replaced with a zero) - * 1. Move bit 6 and 7, 4 positions to the left. - * 2. Move bit 3 and 5, 2 positions to the left. - * 3. Move bit 1-4, 1 position to the left. - * - * Nibble 2 (n2): - * 1. Move bit 0 and 1, 4 positions to the right. - * 2. Move bit 2 and 4, 2 positions to the right. - * 3. Move bit 3-6, 1 position to the right. - * - * Combine the two nibbles to a complete and swapped byte. - */ - -static inline u8 swap_bits_in_byte(u8 b) -{ -#define R_SHIFT_4_MASK 0xc0 /* Bits 6 and 7, right shift 4 */ -#define R_SHIFT_2_MASK 0x28 /* (After right shift 4) Bits 3 and 5, - right shift 2 */ -#define R_SHIFT_1_MASK 0x1e /* (After right shift 2) Bits 1-4, - right shift 1 */ -#define L_SHIFT_4_MASK 0x03 /* Bits 0 and 1, left shift 4 */ -#define L_SHIFT_2_MASK 0x14 /* (After left shift 4) Bits 2 and 4, - left shift 2 */ -#define L_SHIFT_1_MASK 0x78 /* (After left shift 1) Bits 3-6, - left shift 1 */ - - u8 n1; - u8 n2; - - /* Swap most significant nibble */ - /* Right shift 4, bits 6 and 7 */ - n1 = ((b & R_SHIFT_4_MASK) >> 4) | (b & ~(R_SHIFT_4_MASK >> 4)); - /* Right shift 2, bits 3 and 5 */ - n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2)); - /* Right shift 1, bits 1-4 */ - n1 = (n1 & R_SHIFT_1_MASK) >> 1; - - /* Swap least significant nibble */ - /* Left shift 4, bits 0 and 1 */ - n2 = ((b & L_SHIFT_4_MASK) << 4) | (b & ~(L_SHIFT_4_MASK << 4)); - /* Left shift 2, bits 2 and 4 */ - n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2)); - /* Left shift 1, bits 3-6 */ - n2 = (n2 & L_SHIFT_1_MASK) << 1; - - return n1 | n2; -} - -static inline void swap_words_in_key_and_bits_in_byte(const u8 *in, - u8 *out, u32 len) -{ - unsigned int i = 0; - int j; - int index = 0; - - j = len - BYTES_PER_WORD; - while (j >= 0) { - for (i = 0; i < BYTES_PER_WORD; i++) { - index = len - j - BYTES_PER_WORD + i; - out[j + i] = - swap_bits_in_byte(in[index]); - } - j -= BYTES_PER_WORD; - } -} - -static void add_session_id(struct cryp_ctx *ctx) -{ - /* - * We never want 0 to be a valid value, since this is the default value - * for the software context. - */ - if (unlikely(atomic_inc_and_test(&session_id))) - atomic_inc(&session_id); - - ctx->session_id = atomic_read(&session_id); -} - -static irqreturn_t cryp_interrupt_handler(int irq, void *param) -{ - struct cryp_ctx *ctx; - int count; - struct cryp_device_data *device_data; - - if (param == NULL) { - BUG_ON(!param); - return IRQ_HANDLED; - } - - /* The device is coming from the one found in hw_crypt_noxts. */ - device_data = (struct cryp_device_data *)param; - - ctx = device_data->current_ctx; - - if (ctx == NULL) { - BUG_ON(!ctx); - return IRQ_HANDLED; - } - - dev_dbg(ctx->device->dev, "[%s] (len: %d) %s, ", __func__, ctx->outlen, - cryp_pending_irq_src(device_data, CRYP_IRQ_SRC_OUTPUT_FIFO) ? - "out" : "in"); - - if (cryp_pending_irq_src(device_data, - CRYP_IRQ_SRC_OUTPUT_FIFO)) { - if (ctx->outlen / ctx->blocksize > 0) { - count = ctx->blocksize / 4; - - readsl(&device_data->base->dout, ctx->outdata, count); - ctx->outdata += count; - ctx->outlen -= count; - - if (ctx->outlen == 0) { - cryp_disable_irq_src(device_data, - CRYP_IRQ_SRC_OUTPUT_FIFO); - } - } - } else if (cryp_pending_irq_src(device_data, - CRYP_IRQ_SRC_INPUT_FIFO)) { - if (ctx->datalen / ctx->blocksize > 0) { - count = ctx->blocksize / 4; - - writesl(&device_data->base->din, ctx->indata, count); - - ctx->indata += count; - ctx->datalen -= count; - - if (ctx->datalen == 0) - cryp_disable_irq_src(device_data, - CRYP_IRQ_SRC_INPUT_FIFO); - - if (ctx->config.algomode == CRYP_ALGO_AES_XTS) { - CRYP_PUT_BITS(&device_data->base->cr, - CRYP_START_ENABLE, - CRYP_CR_START_POS, - CRYP_CR_START_MASK); - - cryp_wait_until_done(device_data); - } - } - } - - return IRQ_HANDLED; -} - -static int mode_is_aes(enum cryp_algo_mode mode) -{ - return CRYP_ALGO_AES_ECB == mode || - CRYP_ALGO_AES_CBC == mode || - CRYP_ALGO_AES_CTR == mode || - CRYP_ALGO_AES_XTS == mode; -} - -static int cfg_iv(struct cryp_device_data *device_data, u32 left, u32 right, - enum cryp_init_vector_index index) -{ - struct cryp_init_vector_value vector_value; - - dev_dbg(device_data->dev, "[%s]", __func__); - - vector_value.init_value_left = left; - vector_value.init_value_right = right; - - return cryp_configure_init_vector(device_data, - index, - vector_value); -} - -static int cfg_ivs(struct cryp_device_data *device_data, struct cryp_ctx *ctx) -{ - int i; - int status = 0; - int num_of_regs = ctx->blocksize / 8; - __be32 *civ = (__be32 *)ctx->iv; - u32 iv[AES_BLOCK_SIZE / 4]; - - dev_dbg(device_data->dev, "[%s]", __func__); - - /* - * Since we loop on num_of_regs we need to have a check in case - * someone provides an incorrect blocksize which would force calling - * cfg_iv with i greater than 2 which is an error. - */ - if (num_of_regs > 2) { - dev_err(device_data->dev, "[%s] Incorrect blocksize %d", - __func__, ctx->blocksize); - return -EINVAL; - } - - for (i = 0; i < ctx->blocksize / 4; i++) - iv[i] = be32_to_cpup(civ + i); - - for (i = 0; i < num_of_regs; i++) { - status = cfg_iv(device_data, iv[i*2], iv[i*2+1], - (enum cryp_init_vector_index) i); - if (status != 0) - return status; - } - return status; -} - -static int set_key(struct cryp_device_data *device_data, - u32 left_key, - u32 right_key, - enum cryp_key_reg_index index) -{ - struct cryp_key_value key_value; - int cryp_error; - - dev_dbg(device_data->dev, "[%s]", __func__); - - key_value.key_value_left = left_key; - key_value.key_value_right = right_key; - - cryp_error = cryp_configure_key_values(device_data, - index, - key_value); - if (cryp_error != 0) - dev_err(device_data->dev, "[%s]: " - "cryp_configure_key_values() failed!", __func__); - - return cryp_error; -} - -static int cfg_keys(struct cryp_ctx *ctx) -{ - int i; - int num_of_regs = ctx->keylen / 8; - u32 swapped_key[CRYP_MAX_KEY_SIZE / 4]; - __be32 *ckey = (__be32 *)ctx->key; - int cryp_error = 0; - - dev_dbg(ctx->device->dev, "[%s]", __func__); - - if (mode_is_aes(ctx->config.algomode)) { - swap_words_in_key_and_bits_in_byte((u8 *)ckey, - (u8 *)swapped_key, - ctx->keylen); - } else { - for (i = 0; i < ctx->keylen / 4; i++) - swapped_key[i] = be32_to_cpup(ckey + i); - } - - for (i = 0; i < num_of_regs; i++) { - cryp_error = set_key(ctx->device, - swapped_key[i * 2], - swapped_key[i * 2 + 1], - (enum cryp_key_reg_index) i); - - if (cryp_error != 0) { - dev_err(ctx->device->dev, "[%s]: set_key() failed!", - __func__); - return cryp_error; - } - } - return cryp_error; -} - -static int cryp_setup_context(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - u32 control_register = CRYP_CR_DEFAULT; - - switch (cryp_mode) { - case CRYP_MODE_INTERRUPT: - writel_relaxed(CRYP_IMSC_DEFAULT, &device_data->base->imsc); - break; - - case CRYP_MODE_DMA: - writel_relaxed(CRYP_DMACR_DEFAULT, &device_data->base->dmacr); - break; - - default: - break; - } - - if (ctx->updated == 0) { - cryp_flush_inoutfifo(device_data); - if (cfg_keys(ctx) != 0) { - dev_err(ctx->device->dev, "[%s]: cfg_keys failed!", - __func__); - return -EINVAL; - } - - if (ctx->iv && - CRYP_ALGO_AES_ECB != ctx->config.algomode && - CRYP_ALGO_DES_ECB != ctx->config.algomode && - CRYP_ALGO_TDES_ECB != ctx->config.algomode) { - if (cfg_ivs(device_data, ctx) != 0) - return -EPERM; - } - - cryp_set_configuration(device_data, &ctx->config, - &control_register); - add_session_id(ctx); - } else if (ctx->updated == 1 && - ctx->session_id != atomic_read(&session_id)) { - cryp_flush_inoutfifo(device_data); - cryp_restore_device_context(device_data, &ctx->dev_ctx); - - add_session_id(ctx); - control_register = ctx->dev_ctx.cr; - } else - control_register = ctx->dev_ctx.cr; - - writel(control_register | - (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS), - &device_data->base->cr); - - return 0; -} - -static int cryp_get_device_data(struct cryp_ctx *ctx, - struct cryp_device_data **device_data) -{ - int ret; - struct klist_iter device_iterator; - struct klist_node *device_node; - struct cryp_device_data *local_device_data = NULL; - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - /* Wait until a device is available */ - ret = down_interruptible(&driver_data.device_allocation); - if (ret) - return ret; /* Interrupted */ - - /* Select a device */ - klist_iter_init(&driver_data.device_list, &device_iterator); - - device_node = klist_next(&device_iterator); - while (device_node) { - local_device_data = container_of(device_node, - struct cryp_device_data, list_node); - spin_lock(&local_device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (local_device_data->current_ctx) { - device_node = klist_next(&device_iterator); - } else { - local_device_data->current_ctx = ctx; - ctx->device = local_device_data; - spin_unlock(&local_device_data->ctx_lock); - break; - } - spin_unlock(&local_device_data->ctx_lock); - } - klist_iter_exit(&device_iterator); - - if (!device_node) { - /** - * No free device found. - * Since we allocated a device with down_interruptible, this - * should not be able to happen. - * Number of available devices, which are contained in - * device_allocation, is therefore decremented by not doing - * an up(device_allocation). - */ - return -EBUSY; - } - - *device_data = local_device_data; - - return 0; -} - -static void cryp_dma_setup_channel(struct cryp_device_data *device_data, - struct device *dev) -{ - struct dma_slave_config mem2cryp = { - .direction = DMA_MEM_TO_DEV, - .dst_addr = device_data->phybase + CRYP_DMA_TX_FIFO, - .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, - .dst_maxburst = 4, - }; - struct dma_slave_config cryp2mem = { - .direction = DMA_DEV_TO_MEM, - .src_addr = device_data->phybase + CRYP_DMA_RX_FIFO, - .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, - .src_maxburst = 4, - }; - - dma_cap_zero(device_data->dma.mask); - dma_cap_set(DMA_SLAVE, device_data->dma.mask); - - device_data->dma.cfg_mem2cryp = mem_to_engine; - device_data->dma.chan_mem2cryp = - dma_request_channel(device_data->dma.mask, - stedma40_filter, - device_data->dma.cfg_mem2cryp); - - device_data->dma.cfg_cryp2mem = engine_to_mem; - device_data->dma.chan_cryp2mem = - dma_request_channel(device_data->dma.mask, - stedma40_filter, - device_data->dma.cfg_cryp2mem); - - dmaengine_slave_config(device_data->dma.chan_mem2cryp, &mem2cryp); - dmaengine_slave_config(device_data->dma.chan_cryp2mem, &cryp2mem); - - init_completion(&device_data->dma.cryp_dma_complete); -} - -static void cryp_dma_out_callback(void *data) -{ - struct cryp_ctx *ctx = (struct cryp_ctx *) data; - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - complete(&ctx->device->dma.cryp_dma_complete); -} - -static int cryp_set_dma_transfer(struct cryp_ctx *ctx, - struct scatterlist *sg, - int len, - enum dma_data_direction direction) -{ - struct dma_async_tx_descriptor *desc; - struct dma_chan *channel = NULL; - dma_cookie_t cookie; - - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - if (unlikely(!IS_ALIGNED((unsigned long)sg, 4))) { - dev_err(ctx->device->dev, "[%s]: Data in sg list isn't " - "aligned! Addr: 0x%08lx", __func__, (unsigned long)sg); - return -EFAULT; - } - - switch (direction) { - case DMA_TO_DEVICE: - channel = ctx->device->dma.chan_mem2cryp; - ctx->device->dma.sg_src = sg; - ctx->device->dma.sg_src_len = dma_map_sg(channel->device->dev, - ctx->device->dma.sg_src, - ctx->device->dma.nents_src, - direction); - - if (!ctx->device->dma.sg_src_len) { - dev_dbg(ctx->device->dev, - "[%s]: Could not map the sg list (TO_DEVICE)", - __func__); - return -EFAULT; - } - - dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer " - "(TO_DEVICE)", __func__); - - desc = dmaengine_prep_slave_sg(channel, - ctx->device->dma.sg_src, - ctx->device->dma.sg_src_len, - DMA_MEM_TO_DEV, DMA_CTRL_ACK); - break; - - case DMA_FROM_DEVICE: - channel = ctx->device->dma.chan_cryp2mem; - ctx->device->dma.sg_dst = sg; - ctx->device->dma.sg_dst_len = dma_map_sg(channel->device->dev, - ctx->device->dma.sg_dst, - ctx->device->dma.nents_dst, - direction); - - if (!ctx->device->dma.sg_dst_len) { - dev_dbg(ctx->device->dev, - "[%s]: Could not map the sg list (FROM_DEVICE)", - __func__); - return -EFAULT; - } - - dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer " - "(FROM_DEVICE)", __func__); - - desc = dmaengine_prep_slave_sg(channel, - ctx->device->dma.sg_dst, - ctx->device->dma.sg_dst_len, - DMA_DEV_TO_MEM, - DMA_CTRL_ACK | - DMA_PREP_INTERRUPT); - - desc->callback = cryp_dma_out_callback; - desc->callback_param = ctx; - break; - - default: - dev_dbg(ctx->device->dev, "[%s]: Invalid DMA direction", - __func__); - return -EFAULT; - } - - cookie = dmaengine_submit(desc); - if (dma_submit_error(cookie)) { - dev_dbg(ctx->device->dev, "[%s]: DMA submission failed\n", - __func__); - return cookie; - } - - dma_async_issue_pending(channel); - - return 0; -} - -static void cryp_dma_done(struct cryp_ctx *ctx) -{ - struct dma_chan *chan; - - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - chan = ctx->device->dma.chan_mem2cryp; - dmaengine_terminate_all(chan); - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_src, - ctx->device->dma.nents_src, DMA_TO_DEVICE); - - chan = ctx->device->dma.chan_cryp2mem; - dmaengine_terminate_all(chan); - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_dst, - ctx->device->dma.nents_dst, DMA_FROM_DEVICE); -} - -static int cryp_dma_write(struct cryp_ctx *ctx, struct scatterlist *sg, - int len) -{ - int error = cryp_set_dma_transfer(ctx, sg, len, DMA_TO_DEVICE); - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - if (error) { - dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() " - "failed", __func__); - return error; - } - - return len; -} - -static int cryp_dma_read(struct cryp_ctx *ctx, struct scatterlist *sg, int len) -{ - int error = cryp_set_dma_transfer(ctx, sg, len, DMA_FROM_DEVICE); - if (error) { - dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() " - "failed", __func__); - return error; - } - - return len; -} - -static void cryp_polling_mode(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - int len = ctx->blocksize / BYTES_PER_WORD; - int remaining_length = ctx->datalen; - u32 *indata = (u32 *)ctx->indata; - u32 *outdata = (u32 *)ctx->outdata; - - while (remaining_length > 0) { - writesl(&device_data->base->din, indata, len); - indata += len; - remaining_length -= (len * BYTES_PER_WORD); - cryp_wait_until_done(device_data); - - readsl(&device_data->base->dout, outdata, len); - outdata += len; - cryp_wait_until_done(device_data); - } -} - -static int cryp_disable_power(struct device *dev, - struct cryp_device_data *device_data, - bool save_device_context) -{ - int ret = 0; - - dev_dbg(dev, "[%s]", __func__); - - spin_lock(&device_data->power_state_spinlock); - if (!device_data->power_state) - goto out; - - spin_lock(&device_data->ctx_lock); - if (save_device_context && device_data->current_ctx) { - cryp_save_device_context(device_data, - &device_data->current_ctx->dev_ctx, - cryp_mode); - device_data->restore_dev_ctx = true; - } - spin_unlock(&device_data->ctx_lock); - - clk_disable(device_data->clk); - ret = regulator_disable(device_data->pwr_regulator); - if (ret) - dev_err(dev, "[%s]: " - "regulator_disable() failed!", - __func__); - - device_data->power_state = false; - -out: - spin_unlock(&device_data->power_state_spinlock); - - return ret; -} - -static int cryp_enable_power( - struct device *dev, - struct cryp_device_data *device_data, - bool restore_device_context) -{ - int ret = 0; - - dev_dbg(dev, "[%s]", __func__); - - spin_lock(&device_data->power_state_spinlock); - if (!device_data->power_state) { - ret = regulator_enable(device_data->pwr_regulator); - if (ret) { - dev_err(dev, "[%s]: regulator_enable() failed!", - __func__); - goto out; - } - - ret = clk_enable(device_data->clk); - if (ret) { - dev_err(dev, "[%s]: clk_enable() failed!", - __func__); - regulator_disable(device_data->pwr_regulator); - goto out; - } - device_data->power_state = true; - } - - if (device_data->restore_dev_ctx) { - spin_lock(&device_data->ctx_lock); - if (restore_device_context && device_data->current_ctx) { - device_data->restore_dev_ctx = false; - cryp_restore_device_context(device_data, - &device_data->current_ctx->dev_ctx); - } - spin_unlock(&device_data->ctx_lock); - } -out: - spin_unlock(&device_data->power_state_spinlock); - - return ret; -} - -static int hw_crypt_noxts(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - int ret = 0; - - const u8 *indata = ctx->indata; - u8 *outdata = ctx->outdata; - u32 datalen = ctx->datalen; - u32 outlen = datalen; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->outlen = ctx->datalen; - - if (unlikely(!IS_ALIGNED((unsigned long)indata, 4))) { - pr_debug(DEV_DBG_NAME " [%s]: Data isn't aligned! Addr: " - "0x%08lx", __func__, (unsigned long)indata); - return -EINVAL; - } - - ret = cryp_setup_context(ctx, device_data); - - if (ret) - goto out; - - if (cryp_mode == CRYP_MODE_INTERRUPT) { - cryp_enable_irq_src(device_data, CRYP_IRQ_SRC_INPUT_FIFO | - CRYP_IRQ_SRC_OUTPUT_FIFO); - - /* - * ctx->outlen is decremented in the cryp_interrupt_handler - * function. We had to add cpu_relax() (barrier) to make sure - * that gcc didn't optimze away this variable. - */ - while (ctx->outlen > 0) - cpu_relax(); - } else if (cryp_mode == CRYP_MODE_POLLING || - cryp_mode == CRYP_MODE_DMA) { - /* - * The reason for having DMA in this if case is that if we are - * running cryp_mode = 2, then we separate DMA routines for - * handling cipher/plaintext > blocksize, except when - * running the normal CRYPTO_ALG_TYPE_CIPHER, then we still use - * the polling mode. Overhead of doing DMA setup eats up the - * benefits using it. - */ - cryp_polling_mode(ctx, device_data); - } else { - dev_err(ctx->device->dev, "[%s]: Invalid operation mode!", - __func__); - ret = -EPERM; - goto out; - } - - cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode); - ctx->updated = 1; - -out: - ctx->indata = indata; - ctx->outdata = outdata; - ctx->datalen = datalen; - ctx->outlen = outlen; - - return ret; -} - -static int get_nents(struct scatterlist *sg, int nbytes) -{ - int nents = 0; - - while (nbytes > 0) { - nbytes -= sg->length; - sg = sg_next(sg); - nents++; - } - - return nents; -} - -static int ablk_dma_crypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - struct cryp_device_data *device_data; - - int bytes_written = 0; - int bytes_read = 0; - int ret; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->datalen = areq->cryptlen; - ctx->outlen = areq->cryptlen; - - ret = cryp_get_device_data(ctx, &device_data); - if (ret) - return ret; - - ret = cryp_setup_context(ctx, device_data); - if (ret) - goto out; - - /* We have the device now, so store the nents in the dma struct. */ - ctx->device->dma.nents_src = get_nents(areq->src, ctx->datalen); - ctx->device->dma.nents_dst = get_nents(areq->dst, ctx->outlen); - - /* Enable DMA in- and output. */ - cryp_configure_for_dma(device_data, CRYP_DMA_ENABLE_BOTH_DIRECTIONS); - - bytes_written = cryp_dma_write(ctx, areq->src, ctx->datalen); - bytes_read = cryp_dma_read(ctx, areq->dst, bytes_written); - - wait_for_completion(&ctx->device->dma.cryp_dma_complete); - cryp_dma_done(ctx); - - cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode); - ctx->updated = 1; - -out: - spin_lock(&device_data->ctx_lock); - device_data->current_ctx = NULL; - ctx->device = NULL; - spin_unlock(&device_data->ctx_lock); - - /* - * The down_interruptible part for this semaphore is called in - * cryp_get_device_data. - */ - up(&driver_data.device_allocation); - - if (unlikely(bytes_written != bytes_read)) - return -EPERM; - - return 0; -} - -static int ablk_crypt(struct skcipher_request *areq) -{ - struct skcipher_walk walk; - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - struct cryp_device_data *device_data; - unsigned long src_paddr; - unsigned long dst_paddr; - int ret; - int nbytes; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ret = cryp_get_device_data(ctx, &device_data); - if (ret) - goto out; - - ret = skcipher_walk_async(&walk, areq); - - if (ret) { - pr_err(DEV_DBG_NAME "[%s]: skcipher_walk_async() failed!", - __func__); - goto out; - } - - while ((nbytes = walk.nbytes) > 0) { - ctx->iv = walk.iv; - src_paddr = (page_to_phys(walk.src.phys.page) + walk.src.phys.offset); - ctx->indata = phys_to_virt(src_paddr); - - dst_paddr = (page_to_phys(walk.dst.phys.page) + walk.dst.phys.offset); - ctx->outdata = phys_to_virt(dst_paddr); - - ctx->datalen = nbytes - (nbytes % ctx->blocksize); - - ret = hw_crypt_noxts(ctx, device_data); - if (ret) - goto out; - - nbytes -= ctx->datalen; - ret = skcipher_walk_done(&walk, nbytes); - if (ret) - goto out; - } - -out: - /* Release the device */ - spin_lock(&device_data->ctx_lock); - device_data->current_ctx = NULL; - ctx->device = NULL; - spin_unlock(&device_data->ctx_lock); - - /* - * The down_interruptible part for this semaphore is called in - * cryp_get_device_data. - */ - up(&driver_data.device_allocation); - - return ret; -} - -static int aes_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - switch (keylen) { - case AES_KEYSIZE_128: - ctx->config.keysize = CRYP_KEY_SIZE_128; - break; - - case AES_KEYSIZE_192: - ctx->config.keysize = CRYP_KEY_SIZE_192; - break; - - case AES_KEYSIZE_256: - ctx->config.keysize = CRYP_KEY_SIZE_256; - break; - - default: - pr_err(DEV_DBG_NAME "[%s]: Unknown keylen!", __func__); - return -EINVAL; - } - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - - return 0; -} - -static int des_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - err = verify_skcipher_des_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - return 0; -} - -static int des3_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - err = verify_skcipher_des3_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - return 0; -} - -static int cryp_blk_encrypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->config.algodir = CRYP_ALGORITHM_ENCRYPT; - - /* - * DMA does not work for DES due to a hw bug */ - if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode)) - return ablk_dma_crypt(areq); - - /* For everything except DMA, we run the non DMA version. */ - return ablk_crypt(areq); -} - -static int cryp_blk_decrypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->config.algodir = CRYP_ALGORITHM_DECRYPT; - - /* DMA does not work for DES due to a hw bug */ - if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode)) - return ablk_dma_crypt(areq); - - /* For everything except DMA, we run the non DMA version. */ - return ablk_crypt(areq); -} - -struct cryp_algo_template { - enum cryp_algo_mode algomode; - struct skcipher_alg skcipher; -}; - -static int cryp_init_tfm(struct crypto_skcipher *tfm) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct cryp_algo_template *cryp_alg = container_of(alg, - struct cryp_algo_template, - skcipher); - - ctx->config.algomode = cryp_alg->algomode; - ctx->blocksize = crypto_skcipher_blocksize(tfm); - - return 0; -} - -static struct cryp_algo_template cryp_algs[] = { - { - .algomode = CRYP_ALGO_AES_ECB, - .skcipher = { - .base.cra_name = "ecb(aes)", - .base.cra_driver_name = "ecb-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_AES_CBC, - .skcipher = { - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "cbc-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - .ivsize = AES_BLOCK_SIZE, - } - }, - { - .algomode = CRYP_ALGO_AES_CTR, - .skcipher = { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - } - }, - { - .algomode = CRYP_ALGO_DES_ECB, - .skcipher = { - .base.cra_name = "ecb(des)", - .base.cra_driver_name = "ecb-des-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_TDES_ECB, - .skcipher = { - .base.cra_name = "ecb(des3_ede)", - .base.cra_driver_name = "ecb-des3_ede-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_DES_CBC, - .skcipher = { - .base.cra_name = "cbc(des)", - .base.cra_driver_name = "cbc-des-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .ivsize = DES_BLOCK_SIZE, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_TDES_CBC, - .skcipher = { - .base.cra_name = "cbc(des3_ede)", - .base.cra_driver_name = "cbc-des3_ede-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .ivsize = DES3_EDE_BLOCK_SIZE, - .init = cryp_init_tfm, - } - } -}; - -/** - * cryp_algs_register_all - - */ -static int cryp_algs_register_all(void) -{ - int ret; - int i; - int count; - - pr_debug("[%s]", __func__); - - for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) { - ret = crypto_register_skcipher(&cryp_algs[i].skcipher); - if (ret) { - count = i; - pr_err("[%s] alg registration failed", - cryp_algs[i].skcipher.base.cra_driver_name); - goto unreg; - } - } - return 0; -unreg: - for (i = 0; i < count; i++) - crypto_unregister_skcipher(&cryp_algs[i].skcipher); - return ret; -} - -/** - * cryp_algs_unregister_all - - */ -static void cryp_algs_unregister_all(void) -{ - int i; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) - crypto_unregister_skcipher(&cryp_algs[i].skcipher); -} - -static int ux500_cryp_probe(struct platform_device *pdev) -{ - int ret; - struct resource *res; - struct cryp_device_data *device_data; - struct cryp_protection_config prot = { - .privilege_access = CRYP_STATE_ENABLE - }; - struct device *dev = &pdev->dev; - - dev_dbg(dev, "[%s]", __func__); - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); - if (!device_data) { - ret = -ENOMEM; - goto out; - } - - device_data->dev = dev; - device_data->current_ctx = NULL; - - /* Grab the DMA configuration from platform data. */ - mem_to_engine = &((struct cryp_platform_data *) - dev->platform_data)->mem_to_engine; - engine_to_mem = &((struct cryp_platform_data *) - dev->platform_data)->engine_to_mem; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "[%s]: platform_get_resource() failed", - __func__); - ret = -ENODEV; - goto out; - } - - device_data->phybase = res->start; - device_data->base = devm_ioremap_resource(dev, res); - if (IS_ERR(device_data->base)) { - ret = PTR_ERR(device_data->base); - goto out; - } - - spin_lock_init(&device_data->ctx_lock); - spin_lock_init(&device_data->power_state_spinlock); - - /* Enable power for CRYP hardware block */ - device_data->pwr_regulator = regulator_get(&pdev->dev, "v-ape"); - if (IS_ERR(device_data->pwr_regulator)) { - dev_err(dev, "[%s]: could not get cryp regulator", __func__); - ret = PTR_ERR(device_data->pwr_regulator); - device_data->pwr_regulator = NULL; - goto out; - } - - /* Enable the clk for CRYP hardware block */ - device_data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(device_data->clk)) { - dev_err(dev, "[%s]: clk_get() failed!", __func__); - ret = PTR_ERR(device_data->clk); - goto out_regulator; - } - - ret = clk_prepare(device_data->clk); - if (ret) { - dev_err(dev, "[%s]: clk_prepare() failed!", __func__); - goto out_regulator; - } - - /* Enable device power (and clock) */ - ret = cryp_enable_power(device_data->dev, device_data, false); - if (ret) { - dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__); - goto out_clk_unprepare; - } - - if (cryp_check(device_data)) { - dev_err(dev, "[%s]: cryp_check() failed!", __func__); - ret = -EINVAL; - goto out_power; - } - - if (cryp_configure_protection(device_data, &prot)) { - dev_err(dev, "[%s]: cryp_configure_protection() failed!", - __func__); - ret = -EINVAL; - goto out_power; - } - - device_data->irq = platform_get_irq(pdev, 0); - if (device_data->irq <= 0) { - ret = device_data->irq ? device_data->irq : -ENXIO; - goto out_power; - } - - ret = devm_request_irq(&pdev->dev, device_data->irq, - cryp_interrupt_handler, 0, "cryp1", device_data); - if (ret) { - dev_err(dev, "[%s]: Unable to request IRQ", __func__); - goto out_power; - } - - if (cryp_mode == CRYP_MODE_DMA) - cryp_dma_setup_channel(device_data, dev); - - platform_set_drvdata(pdev, device_data); - - /* Put the new device into the device list... */ - klist_add_tail(&device_data->list_node, &driver_data.device_list); - - /* ... and signal that a new device is available. */ - up(&driver_data.device_allocation); - - atomic_set(&session_id, 1); - - ret = cryp_algs_register_all(); - if (ret) { - dev_err(dev, "[%s]: cryp_algs_register_all() failed!", - __func__); - goto out_power; - } - - dev_info(dev, "successfully registered\n"); - - return 0; - -out_power: - cryp_disable_power(device_data->dev, device_data, false); - -out_clk_unprepare: - clk_unprepare(device_data->clk); - -out_regulator: - regulator_put(device_data->pwr_regulator); - -out: - return ret; -} - -static int ux500_cryp_remove(struct platform_device *pdev) -{ - struct cryp_device_data *device_data; - - dev_dbg(&pdev->dev, "[%s]", __func__); - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!", - __func__); - return -ENOMEM; - } - - /* Try to decrease the number of available devices. */ - if (down_trylock(&driver_data.device_allocation)) - return -EBUSY; - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (device_data->current_ctx) { - /* The device is busy */ - spin_unlock(&device_data->ctx_lock); - /* Return the device to the pool. */ - up(&driver_data.device_allocation); - return -EBUSY; - } - - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - cryp_algs_unregister_all(); - - if (cryp_disable_power(&pdev->dev, device_data, false)) - dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed", - __func__); - - clk_unprepare(device_data->clk); - regulator_put(device_data->pwr_regulator); - - return 0; -} - -static void ux500_cryp_shutdown(struct platform_device *pdev) -{ - struct cryp_device_data *device_data; - - dev_dbg(&pdev->dev, "[%s]", __func__); - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!", - __func__); - return; - } - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (!device_data->current_ctx) { - if (down_trylock(&driver_data.device_allocation)) - dev_dbg(&pdev->dev, "[%s]: Cryp still in use!" - "Shutting down anyway...", __func__); - /** - * (Allocate the device) - * Need to set this to non-null (dummy) value, - * to avoid usage if context switching. - */ - device_data->current_ctx++; - } - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - cryp_algs_unregister_all(); - - if (cryp_disable_power(&pdev->dev, device_data, false)) - dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed", - __func__); - -} - -#ifdef CONFIG_PM_SLEEP -static int ux500_cryp_suspend(struct device *dev) -{ - int ret; - struct platform_device *pdev = to_platform_device(dev); - struct cryp_device_data *device_data; - struct cryp_ctx *temp_ctx = NULL; - - dev_dbg(dev, "[%s]", __func__); - - /* Handle state? */ - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__); - return -ENOMEM; - } - - disable_irq(device_data->irq); - - spin_lock(&device_data->ctx_lock); - if (!device_data->current_ctx) - device_data->current_ctx++; - spin_unlock(&device_data->ctx_lock); - - if (device_data->current_ctx == ++temp_ctx) { - if (down_interruptible(&driver_data.device_allocation)) - dev_dbg(dev, "[%s]: down_interruptible() failed", - __func__); - ret = cryp_disable_power(dev, device_data, false); - - } else - ret = cryp_disable_power(dev, device_data, true); - - if (ret) - dev_err(dev, "[%s]: cryp_disable_power()", __func__); - - return ret; -} - -static int ux500_cryp_resume(struct device *dev) -{ - int ret = 0; - struct platform_device *pdev = to_platform_device(dev); - struct cryp_device_data *device_data; - struct cryp_ctx *temp_ctx = NULL; - - dev_dbg(dev, "[%s]", __func__); - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__); - return -ENOMEM; - } - - spin_lock(&device_data->ctx_lock); - if (device_data->current_ctx == ++temp_ctx) - device_data->current_ctx = NULL; - spin_unlock(&device_data->ctx_lock); - - - if (!device_data->current_ctx) - up(&driver_data.device_allocation); - else - ret = cryp_enable_power(dev, device_data, true); - - if (ret) - dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__); - else - enable_irq(device_data->irq); - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(ux500_cryp_pm, ux500_cryp_suspend, ux500_cryp_resume); - -static const struct of_device_id ux500_cryp_match[] = { - { .compatible = "stericsson,ux500-cryp" }, - { }, -}; -MODULE_DEVICE_TABLE(of, ux500_cryp_match); - -static struct platform_driver cryp_driver = { - .probe = ux500_cryp_probe, - .remove = ux500_cryp_remove, - .shutdown = ux500_cryp_shutdown, - .driver = { - .name = "cryp1", - .of_match_table = ux500_cryp_match, - .pm = &ux500_cryp_pm, - } -}; - -static int __init ux500_cryp_mod_init(void) -{ - pr_debug("[%s] is called!", __func__); - klist_init(&driver_data.device_list, NULL, NULL); - /* Initialize the semaphore to 0 devices (locked state) */ - sema_init(&driver_data.device_allocation, 0); - return platform_driver_register(&cryp_driver); -} - -static void __exit ux500_cryp_mod_fini(void) -{ - pr_debug("[%s] is called!", __func__); - platform_driver_unregister(&cryp_driver); -} - -module_init(ux500_cryp_mod_init); -module_exit(ux500_cryp_mod_fini); - -module_param(cryp_mode, int, 0); - -MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine."); -MODULE_ALIAS_CRYPTO("aes-all"); -MODULE_ALIAS_CRYPTO("des-all"); - -MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/ux500/cryp/cryp_irq.c b/drivers/crypto/ux500/cryp/cryp_irq.c deleted file mode 100644 index 6d2f07bec98a..000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irq.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#include -#include -#include - -#include "cryp.h" -#include "cryp_p.h" -#include "cryp_irq.h" -#include "cryp_irqp.h" - -void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - u32 i; - - dev_dbg(device_data->dev, "[%s]", __func__); - - i = readl_relaxed(&device_data->base->imsc); - i = i | irq_src; - writel_relaxed(i, &device_data->base->imsc); -} - -void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - u32 i; - - dev_dbg(device_data->dev, "[%s]", __func__); - - i = readl_relaxed(&device_data->base->imsc); - i = i & ~irq_src; - writel_relaxed(i, &device_data->base->imsc); -} - -bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - return (readl_relaxed(&device_data->base->mis) & irq_src) > 0; -} diff --git a/drivers/crypto/ux500/cryp/cryp_irq.h b/drivers/crypto/ux500/cryp/cryp_irq.h deleted file mode 100644 index da90029ea141..000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irq.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_IRQ_H_ -#define _CRYP_IRQ_H_ - -#include "cryp.h" - -enum cryp_irq_src_id { - CRYP_IRQ_SRC_INPUT_FIFO = 0x1, - CRYP_IRQ_SRC_OUTPUT_FIFO = 0x2, - CRYP_IRQ_SRC_ALL = 0x3 -}; - -/* - * M0 Funtions - */ -void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -#endif /* _CRYP_IRQ_H_ */ diff --git a/drivers/crypto/ux500/cryp/cryp_irqp.h b/drivers/crypto/ux500/cryp/cryp_irqp.h deleted file mode 100644 index 4981a3f461e5..000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irqp.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef __CRYP_IRQP_H_ -#define __CRYP_IRQP_H_ - -#include "cryp_irq.h" - -/* - * - * CRYP Registers - Offset mapping - * +-----------------+ - * 00h | CRYP_CR | Configuration register - * +-----------------+ - * 04h | CRYP_SR | Status register - * +-----------------+ - * 08h | CRYP_DIN | Data In register - * +-----------------+ - * 0ch | CRYP_DOUT | Data out register - * +-----------------+ - * 10h | CRYP_DMACR | DMA control register - * +-----------------+ - * 14h | CRYP_IMSC | IMSC - * +-----------------+ - * 18h | CRYP_RIS | Raw interrupt status - * +-----------------+ - * 1ch | CRYP_MIS | Masked interrupt status. - * +-----------------+ - * Key registers - * IVR registers - * Peripheral - * Cell IDs - * - * Refer data structure for other register map - */ - -/** - * struct cryp_register - * @cr - Configuration register - * @status - Status register - * @din - Data input register - * @din_size - Data input size register - * @dout - Data output register - * @dout_size - Data output size register - * @dmacr - Dma control register - * @imsc - Interrupt mask set/clear register - * @ris - Raw interrupt status - * @mis - Masked interrupt statu register - * @key_1_l - Key register 1 L - * @key_1_r - Key register 1 R - * @key_2_l - Key register 2 L - * @key_2_r - Key register 2 R - * @key_3_l - Key register 3 L - * @key_3_r - Key register 3 R - * @key_4_l - Key register 4 L - * @key_4_r - Key register 4 R - * @init_vect_0_l - init vector 0 L - * @init_vect_0_r - init vector 0 R - * @init_vect_1_l - init vector 1 L - * @init_vect_1_r - init vector 1 R - * @cryp_unused1 - unused registers - * @itcr - Integration test control register - * @itip - Integration test input register - * @itop - Integration test output register - * @cryp_unused2 - unused registers - * @periphId0 - FE0 CRYP Peripheral Identication Register - * @periphId1 - FE4 - * @periphId2 - FE8 - * @periphId3 - FEC - * @pcellId0 - FF0 CRYP PCell Identication Register - * @pcellId1 - FF4 - * @pcellId2 - FF8 - * @pcellId3 - FFC - */ -struct cryp_register { - u32 cr; /* Configuration register */ - u32 sr; /* Status register */ - u32 din; /* Data input register */ - u32 din_size; /* Data input size register */ - u32 dout; /* Data output register */ - u32 dout_size; /* Data output size register */ - u32 dmacr; /* Dma control register */ - u32 imsc; /* Interrupt mask set/clear register */ - u32 ris; /* Raw interrupt status */ - u32 mis; /* Masked interrupt statu register */ - - u32 key_1_l; /*Key register 1 L */ - u32 key_1_r; /*Key register 1 R */ - u32 key_2_l; /*Key register 2 L */ - u32 key_2_r; /*Key register 2 R */ - u32 key_3_l; /*Key register 3 L */ - u32 key_3_r; /*Key register 3 R */ - u32 key_4_l; /*Key register 4 L */ - u32 key_4_r; /*Key register 4 R */ - - u32 init_vect_0_l; /*init vector 0 L */ - u32 init_vect_0_r; /*init vector 0 R */ - u32 init_vect_1_l; /*init vector 1 L */ - u32 init_vect_1_r; /*init vector 1 R */ - - u32 cryp_unused1[(0x80 - 0x58) / sizeof(u32)]; /* unused registers */ - u32 itcr; /*Integration test control register */ - u32 itip; /*Integration test input register */ - u32 itop; /*Integration test output register */ - u32 cryp_unused2[(0xFE0 - 0x8C) / sizeof(u32)]; /* unused registers */ - - u32 periphId0; /* FE0 CRYP Peripheral Identication Register */ - u32 periphId1; /* FE4 */ - u32 periphId2; /* FE8 */ - u32 periphId3; /* FEC */ - - u32 pcellId0; /* FF0 CRYP PCell Identication Register */ - u32 pcellId1; /* FF4 */ - u32 pcellId2; /* FF8 */ - u32 pcellId3; /* FFC */ -}; - -#endif diff --git a/drivers/crypto/ux500/cryp/cryp_p.h b/drivers/crypto/ux500/cryp/cryp_p.h deleted file mode 100644 index 60b47fe4de35..000000000000 --- a/drivers/crypto/ux500/cryp/cryp_p.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_P_H_ -#define _CRYP_P_H_ - -#include -#include - -#include "cryp.h" -#include "cryp_irqp.h" - -/* - * Generic Macros - */ -#define CRYP_SET_BITS(reg_name, mask) \ - writel_relaxed((readl_relaxed(reg_name) | mask), reg_name) - -#define CRYP_WRITE_BIT(reg_name, val, mask) \ - writel_relaxed(((readl_relaxed(reg_name) & ~(mask)) |\ - ((val) & (mask))), reg_name) - -#define CRYP_TEST_BITS(reg_name, val) \ - (readl_relaxed(reg_name) & (val)) - -#define CRYP_PUT_BITS(reg, val, shift, mask) \ - writel_relaxed(((readl_relaxed(reg) & ~(mask)) | \ - (((u32)val << shift) & (mask))), reg) - -/* - * CRYP specific Macros - */ -#define CRYP_PERIPHERAL_ID0 0xE3 -#define CRYP_PERIPHERAL_ID1 0x05 - -#define CRYP_PERIPHERAL_ID2_DB8500 0x28 -#define CRYP_PERIPHERAL_ID3 0x00 - -#define CRYP_PCELL_ID0 0x0D -#define CRYP_PCELL_ID1 0xF0 -#define CRYP_PCELL_ID2 0x05 -#define CRYP_PCELL_ID3 0xB1 - -/* - * CRYP register default values - */ -#define MAX_DEVICE_SUPPORT 2 - -/* Priv set, keyrden set and datatype 8bits swapped set as default. */ -#define CRYP_CR_DEFAULT 0x0482 -#define CRYP_DMACR_DEFAULT 0x0 -#define CRYP_IMSC_DEFAULT 0x0 -#define CRYP_DIN_DEFAULT 0x0 -#define CRYP_DOUT_DEFAULT 0x0 -#define CRYP_KEY_DEFAULT 0x0 -#define CRYP_INIT_VECT_DEFAULT 0x0 - -/* - * CRYP Control register specific mask - */ -#define CRYP_CR_SECURE_MASK BIT(0) -#define CRYP_CR_PRLG_MASK BIT(1) -#define CRYP_CR_ALGODIR_MASK BIT(2) -#define CRYP_CR_ALGOMODE_MASK (BIT(5) | BIT(4) | BIT(3)) -#define CRYP_CR_DATATYPE_MASK (BIT(7) | BIT(6)) -#define CRYP_CR_KEYSIZE_MASK (BIT(9) | BIT(8)) -#define CRYP_CR_KEYRDEN_MASK BIT(10) -#define CRYP_CR_KSE_MASK BIT(11) -#define CRYP_CR_START_MASK BIT(12) -#define CRYP_CR_INIT_MASK BIT(13) -#define CRYP_CR_FFLUSH_MASK BIT(14) -#define CRYP_CR_CRYPEN_MASK BIT(15) -#define CRYP_CR_CONTEXT_SAVE_MASK (CRYP_CR_SECURE_MASK |\ - CRYP_CR_PRLG_MASK |\ - CRYP_CR_ALGODIR_MASK |\ - CRYP_CR_ALGOMODE_MASK |\ - CRYP_CR_KEYSIZE_MASK |\ - CRYP_CR_KEYRDEN_MASK |\ - CRYP_CR_DATATYPE_MASK) - - -#define CRYP_SR_INFIFO_READY_MASK (BIT(0) | BIT(1)) -#define CRYP_SR_IFEM_MASK BIT(0) -#define CRYP_SR_BUSY_MASK BIT(4) - -/* - * Bit position used while setting bits in register - */ -#define CRYP_CR_PRLG_POS 1 -#define CRYP_CR_ALGODIR_POS 2 -#define CRYP_CR_ALGOMODE_POS 3 -#define CRYP_CR_DATATYPE_POS 6 -#define CRYP_CR_KEYSIZE_POS 8 -#define CRYP_CR_KEYRDEN_POS 10 -#define CRYP_CR_KSE_POS 11 -#define CRYP_CR_START_POS 12 -#define CRYP_CR_INIT_POS 13 -#define CRYP_CR_CRYPEN_POS 15 - -#define CRYP_SR_BUSY_POS 4 - -/* - * CRYP PCRs------PC_NAND control register - * BIT_MASK - */ -#define CRYP_DMA_REQ_MASK (BIT(1) | BIT(0)) -#define CRYP_DMA_REQ_MASK_POS 0 - - -struct cryp_system_context { - /* CRYP Register structure */ - struct cryp_register *p_cryp_reg[MAX_DEVICE_SUPPORT]; -}; - -#endif From fd3dc56253acbe9c641a66d312d8393cd55eb04c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 9 Dec 2022 10:52:47 -0500 Subject: [PATCH 3589/4122] ftrace/x86: Add back ftrace_expected for ftrace bug reports After someone reported a bug report with a failed modification due to the expected value not matching what was found, it came to my attention that the ftrace_expected is no longer set when that happens. This makes for debugging the issue a bit more difficult. Set ftrace_expected to the expected code before calling ftrace_bug, so that it shows what was expected and why it failed. Link: https://lore.kernel.org/all/CA+wXwBQ-VhK+hpBtYtyZP-NiX4g8fqRRWithFOHQW-0coQ3vLg@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221209105247.01d4e51d@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "x86@kernel.org" Cc: Borislav Petkov Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Signed-off-by: Steven Rostedt (Google) --- arch/x86/kernel/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bd165004776d..e07234ec7e23 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -217,7 +217,9 @@ void ftrace_replace_code(int enable) ret = ftrace_verify_code(rec->ip, old); if (ret) { + ftrace_expected = old; ftrace_bug(ret, rec); + ftrace_expected = NULL; return; } } From c9b8a83a8f2dca9f82288a621595a6a5970cdc5e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Dec 2022 16:44:41 -0400 Subject: [PATCH 3590/4122] iommufd: Fix comment typos Repair some typos in comments that were noticed late in the review cycle. Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Link: https://lore.kernel.org/r/1-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Reported-by: Binbin Wu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.h | 2 +- drivers/iommu/iommufd/pages.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 83e7c175f2a2..0ec3509b7e33 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -33,7 +33,7 @@ struct iommu_domain; * * The io_pagetable::iova_rwsem protects node * The iopt_pages::mutex protects pages_node - * iopt and immu_prot are immutable + * iopt and iommu_prot are immutable * The pages::mutex protects num_accesses */ struct iopt_area { diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 429fa3b0a239..fccdba782cb6 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -342,7 +342,7 @@ static void batch_destroy(struct pfn_batch *batch, void *backup) kfree(batch->pfns); } -/* true if the pfn could be added, false otherwise */ +/* true if the pfn was added, false otherwise */ static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) { const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); @@ -418,7 +418,7 @@ static struct page **raw_pages_from_domain(struct iommu_domain *domain, return out_pages; } -/* Continues reading a domain until we reach a discontiguity in the pfns. */ +/* Continues reading a domain until we reach a discontinuity in the pfns. */ static void batch_from_domain_continue(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, From a26fa392068d1dcdf781397b7a7dd908dd68f030 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Dec 2022 16:44:42 -0400 Subject: [PATCH 3591/4122] iommufd: Improve a few unclear bits of code Correct a few items noticed late in review: - We should assert that the math in batch_clear_carry() doesn't underflow - user->locked should be -1 not 0 sicne we just did mmput - npages should not have been recalculated, it already has that value No functional change. Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Link: https://lore.kernel.org/r/2-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Reported-by: Binbin Wu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index fccdba782cb6..c77177229648 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -289,6 +289,10 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) if (!keep_pfns) return batch_clear(batch); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(!batch->end || + batch->npfns[batch->end - 1] < keep_pfns); + batch->total_pfns = keep_pfns; batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + @@ -723,7 +727,7 @@ static void pfn_reader_user_destroy(struct pfn_reader_user *user, mmap_read_unlock(pages->source_mm); if (pages->source_mm != current->mm) mmput(pages->source_mm); - user->locked = 0; + user->locked = -1; } kfree(user->upages); @@ -810,7 +814,6 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; - npages = pages->npinned - pages->last_npinned; do { cur_pages = atomic_long_read(&pages->source_user->locked_vm); new_pages = cur_pages + npages; From d6c55c0a20e5059abdde81713ddf6324a946eb3c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Dec 2022 16:44:43 -0400 Subject: [PATCH 3592/4122] iommufd: Change the order of MSI setup Eric points out this is wrong for the rare case of someone using allow_unsafe_interrupts on ARM. We always have to setup the MSI window in the domain if the iommu driver asks for it. Move the iommu_get_msi_cookie() setup to the top of the function and always do it, regardless of the security mode. Add checks to iommufd_device_setup_msi() to ensure the driver is not doing something incomprehensible. No current driver will set both a HW and SW MSI window, or have more than one SW MSI window. Fixes: e8d57210035b ("iommufd: Add kAPI toward external drivers for physical devices") Link: https://lore.kernel.org/r/3-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Eric Auger Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 56 +++++++++++++--------------- drivers/iommu/iommufd/io_pagetable.c | 24 +++++++----- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index dd2a415b603e..d81f93a321af 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -139,19 +139,11 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, int rc; /* - * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it - * creates the MSI window by default in the iommu domain. Nothing - * further to do. - */ - if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP)) - return 0; - - /* - * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every - * allocated iommu_domain will block interrupts by default and this - * special flow is needed to turn them back on. iommu_dma_prepare_msi() - * will install pages into our domain after request_irq() to make this - * work. + * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to + * call iommu_get_msi_cookie() on its behalf. This is necessary to setup + * the MSI window so iommu_dma_prepare_msi() can install pages into our + * domain after request_irq(). If it is not done interrupts will not + * work on this domain. * * FIXME: This is conceptually broken for iommufd since we want to allow * userspace to change the domains, eg switch from an identity IOAS to a @@ -159,33 +151,35 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, * matches what the IRQ layer actually expects in a newly created * domain. */ - if (irq_domain_check_msi_remap()) { - if (WARN_ON(!sw_msi_start)) - return -EPERM; + if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) { + rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); + if (rc) + return rc; + /* * iommu_get_msi_cookie() can only be called once per domain, * it returns -EBUSY on later calls. */ - if (hwpt->msi_cookie) - return 0; - rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); - if (rc) - return rc; hwpt->msi_cookie = true; - return 0; } /* - * Otherwise the platform has a MSI window that is not isolated. For - * historical compat with VFIO allow a module parameter to ignore the - * insecurity. + * For historical compat with VFIO the insecure interrupt path is + * allowed if the module parameter is set. Insecure means that a MemWr + * operation from the device (eg a simple DMA) cannot trigger an + * interrupt outside this iommufd context. */ - if (!allow_unsafe_interrupts) - return -EPERM; + if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && + !irq_domain_check_msi_remap()) { + if (!allow_unsafe_interrupts) + return -EPERM; - dev_warn( - idev->dev, - "MSI interrupt window cannot be isolated by the IOMMU, this platform is insecure. Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + dev_warn( + idev->dev, + "MSI interrupts are not secure, they cannot be isolated by the platform. " + "Check that platform features like interrupt remapping are enabled. " + "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + } return 0; } @@ -203,7 +197,7 @@ static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, static int iommufd_device_do_attach(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { - phys_addr_t sw_msi_start = 0; + phys_addr_t sw_msi_start = PHYS_ADDR_MAX; int rc; mutex_lock(&hwpt->devices_lock); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 3467cea79568..e0ae72b9e67f 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1170,6 +1170,8 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, struct iommu_resv_region *resv; struct iommu_resv_region *tmp; LIST_HEAD(group_resv_regions); + unsigned int num_hw_msi = 0; + unsigned int num_sw_msi = 0; int rc; down_write(&iopt->iova_rwsem); @@ -1181,23 +1183,25 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) continue; - /* - * The presence of any 'real' MSI regions should take precedence - * over the software-managed one if the IOMMU driver happens to - * advertise both types. - */ - if (sw_msi_start && resv->type == IOMMU_RESV_MSI) { - *sw_msi_start = 0; - sw_msi_start = NULL; - } - if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) + if (sw_msi_start && resv->type == IOMMU_RESV_MSI) + num_hw_msi++; + if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { *sw_msi_start = resv->start; + num_sw_msi++; + } rc = iopt_reserve_iova(iopt, resv->start, resv->length - 1 + resv->start, device); if (rc) goto out_reserved; } + + /* Drivers must offer sane combinations of regions */ + if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { + rc = -EINVAL; + goto out_reserved; + } + rc = 0; goto out_free_resv; From 3282a549cf9b300e2d1b007925ed007ab24e4131 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Fri, 9 Dec 2022 13:59:26 +0900 Subject: [PATCH 3593/4122] RDMA/rxe: Fix oops with zero length reads The commit 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing") causes a kernel crash. If responder get a zero-byte RDMA Read request, qp->resp.mr is not set in check_rkey() (see IBA C9-88). The mr is NULL in this case, and a NULL pointer dereference occurs as shown below. BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 2 PID: 3622 Comm: python3 Kdump: loaded Not tainted 6.1.0-rc3+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:__rxe_put+0xc/0x60 [rdma_rxe] Code: cc cc cc 31 f6 e8 64 36 1b d3 41 b8 01 00 00 00 44 89 c0 c3 cc cc cc cc 41 89 c0 eb c1 90 0f 1f 44 00 00 41 54 b8 ff ff ff ff 0f c1 47 10 83 f8 01 74 11 45 31 e4 85 c0 7e 20 44 89 e0 41 5c RSP: 0018:ffffb27bc012ce78 EFLAGS: 00010246 RAX: 00000000ffffffff RBX: ffff9790857b0580 RCX: 0000000000000000 RDX: ffff979080fe145a RSI: 000055560e3e0000 RDI: 0000000000000000 RBP: ffff97909c7dd800 R08: 0000000000000001 R09: e7ce43d97f7bed0f R10: ffff97908b29c300 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff97908b29c300 R15: 0000000000000000 FS: 00007f276f7bd740(0000) GS:ffff9792b5c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000114230002 CR4: 0000000000060ee0 Call Trace: read_reply+0xda/0x310 [rdma_rxe] rxe_responder+0x82d/0xe50 [rdma_rxe] do_task+0x84/0x170 [rdma_rxe] tasklet_action_common.constprop.0+0xa7/0x120 __do_softirq+0xcb/0x2ac do_softirq+0x63/0x90 Support a NULL mr during read_reply() Fixes: 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing") Fixes: b5f9a01fae42 ("RDMA/rxe: Fix mr leak in RESPST_ERR_RNR") Link: https://lore.kernel.org/r/20221209045926.531689-1-matsuda-daisuke@fujitsu.com Link: https://lore.kernel.org/r/20221202145713.13152-1-lizhijian@fujitsu.com Signed-off-by: Daisuke Matsuda Signed-off-by: Li Zhijian Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 12eb85e8d415..0c8bec281937 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -460,7 +460,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } - /* A zero-byte op is not required to set an addr or rkey. */ + /* A zero-byte op is not required to set an addr or rkey. See C9-88 */ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { @@ -880,13 +880,15 @@ static enum resp_states read_reply(struct rxe_qp *qp, skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, res->cur_psn, AETH_ACK_UNLIMITED); if (!skb) { - rxe_put(mr); + if (mr) + rxe_put(mr); return RESPST_ERR_RNR; } err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); - rxe_put(mr); + if (mr) + rxe_put(mr); if (err) { kfree_skb(skb); return RESPST_ERR_RKEY_VIOLATION; From 6ff8ca3f93d3cd2a77f051d2d971cf3638d39546 Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Mon, 28 Nov 2022 10:36:43 +0800 Subject: [PATCH 3594/4122] riscv: mm: call best_map_size many times during linear-mapping Modify the best_map_size function to give map_size many times instead of only once, so a memory region can be mapped by both PMD_SIZE and PAGE_SIZE. Signed-off-by: Qinglin Pan Reviewed-by: Andrew Jones Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20221128023643.329091-1-panqinglin2020@iscas.ac.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7d59516ce6b3..bb0028c07ef3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -673,10 +673,11 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) { /* Upgrade to PMD_SIZE mappings whenever possible */ - if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1))) - return PAGE_SIZE; + base &= PMD_SIZE - 1; + if (!base && size >= PMD_SIZE) + return PMD_SIZE; - return PMD_SIZE; + return PAGE_SIZE; } #ifdef CONFIG_XIP_KERNEL @@ -1111,9 +1112,9 @@ static void __init setup_vm_final(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); + map_size = best_map_size(pa, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); From 689c5421bfe0eac65526bd97a466b9590a6aad3c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 8 Dec 2022 15:09:46 -0600 Subject: [PATCH 3595/4122] RDMA/rxe: Fix incorrect responder length checking The code in rxe_resp.c at check_length() is incorrect as it compares pkt->opcode an 8 bit value against various mask bits which are all higher than 256 so nothing is ever reported. This patch rewrites this to compare against pkt->mask which is correct. However this now exposes another error. For UD send packets the value of the pmtu cannot be determined from qp->mtu. All that is required here is to later check if the payload fits into the posted receive buffer in that case. Fixes: 837a55847ead ("RDMA/rxe: Implement packet length validation on responder") Link: https://lore.kernel.org/r/20221208210945.28607-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Reviewed-by: Daisuke Matsuda Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 60 ++++++++++++++++------------ 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 0c8bec281937..abbaa41017e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -392,36 +392,46 @@ static enum resp_states check_resource(struct rxe_qp *qp, return RESPST_CHK_LENGTH; } -static enum resp_states check_length(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - int mtu = qp->mtu; - u32 payload = payload_size(pkt); - u32 dmalen = reth_len(pkt); - - /* RoCEv2 packets do not have LRH. - * Let's skip checking it. + /* + * See IBA C9-92 + * For UD QPs we only check if the packet will fit in the + * receive buffer later. For rmda operations additional + * length checks are performed in check_rkey. */ + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || + (qp_type(qp) == IB_QPT_UC))) { + unsigned int mtu = qp->mtu; + unsigned int payload = payload_size(pkt); - if ((pkt->opcode & RXE_START_MASK) && - (pkt->opcode & RXE_END_MASK)) { - /* "only" packets */ - if (payload > mtu) - return RESPST_ERR_LENGTH; - } else if ((pkt->opcode & RXE_START_MASK) || - (pkt->opcode & RXE_MIDDLE_MASK)) { - /* "first" or "middle" packets */ - if (payload != mtu) - return RESPST_ERR_LENGTH; - } else if (pkt->opcode & RXE_END_MASK) { - /* "last" packets */ - if ((payload == 0) || (payload > mtu)) - return RESPST_ERR_LENGTH; + if ((pkt->mask & RXE_START_MASK) && + (pkt->mask & RXE_END_MASK)) { + if (unlikely(payload > mtu)) { + rxe_dbg_qp(qp, "only packet too long"); + return RESPST_ERR_LENGTH; + } + } else if ((pkt->mask & RXE_START_MASK) || + (pkt->mask & RXE_MIDDLE_MASK)) { + if (unlikely(payload != mtu)) { + rxe_dbg_qp(qp, "first or middle packet not mtu"); + return RESPST_ERR_LENGTH; + } + } else if (pkt->mask & RXE_END_MASK) { + if (unlikely((payload == 0) || (payload > mtu))) { + rxe_dbg_qp(qp, "last packet zero or too long"); + return RESPST_ERR_LENGTH; + } + } } - if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) { - if (dmalen > (1 << 31)) + /* See IBA C9-94 */ + if (pkt->mask & RXE_RETH_MASK) { + if (reth_len(pkt) > (1U << 31)) { + rxe_dbg_qp(qp, "dma length too long"); return RESPST_ERR_LENGTH; + } } return RESPST_CHK_RKEY; @@ -1401,7 +1411,7 @@ int rxe_responder(void *arg) state = check_resource(qp, pkt); break; case RESPST_CHK_LENGTH: - state = check_length(qp, pkt); + state = rxe_resp_check_length(qp, pkt); break; case RESPST_CHK_RKEY: state = check_rkey(qp, pkt); From 0c17da492dc6c33cc5b99633adb4bd7b2587153c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:52 +0800 Subject: [PATCH 3596/4122] RDMA: Extend RDMA user ABI to support flush This commit extends the RDMA user ABI to support the flush operation defined in IBA A19.4.1. These changes are backward compatible with the existing RDMA user ABI. Link: https://lore.kernel.org/r/20221206130201.30986-2-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_ioctl_verbs.h | 2 ++ include/uapi/rdma/ib_user_verbs.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index e0c25537fd2e..d7c5aaa32744 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -57,6 +57,8 @@ enum ib_uverbs_access_flags { IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5, IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, IB_UVERBS_ACCESS_HUGETLB = 1 << 7, + IB_UVERBS_ACCESS_FLUSH_GLOBAL = 1 << 8, + IB_UVERBS_ACCESS_FLUSH_PERSISTENT = 1 << 9, IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST, IB_UVERBS_ACCESS_OPTIONAL_RANGE = diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 237814815544..e16650f0c85d 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -105,6 +105,18 @@ enum { IB_USER_VERBS_EX_CMD_MODIFY_CQ }; +/* see IBA A19.4.1.1 Placement Types */ +enum ib_placement_type { + IB_FLUSH_GLOBAL = 1U << 0, + IB_FLUSH_PERSISTENT = 1U << 1, +}; + +/* see IBA A19.4.1.2 Selectivity Level */ +enum ib_selectivity_level { + IB_FLUSH_RANGE = 0, + IB_FLUSH_MR, +}; + /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to @@ -466,6 +478,7 @@ enum ib_uverbs_wc_opcode { IB_UVERBS_WC_BIND_MW = 5, IB_UVERBS_WC_LOCAL_INV = 6, IB_UVERBS_WC_TSO = 7, + IB_UVERBS_WC_FLUSH = 8, IB_UVERBS_WC_ATOMIC_WRITE = 9, }; @@ -785,6 +798,7 @@ enum ib_uverbs_wr_opcode { IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, + IB_UVERBS_WR_FLUSH = 14, IB_UVERBS_WR_ATOMIC_WRITE = 15, /* Review enum ib_wr_opcode before modifying this */ }; @@ -1333,6 +1347,9 @@ enum ib_uverbs_device_cap_flags { /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */ IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34, IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36, + /* Flush placement types */ + IB_UVERBS_DEVICE_FLUSH_GLOBAL = 1ULL << 38, + IB_UVERBS_DEVICE_FLUSH_PERSISTENT = 1ULL << 39, /* Atomic write attributes */ IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40, }; From 208e3a134b50d95ea3962d7a37b4d8a8f5368376 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:53 +0800 Subject: [PATCH 3597/4122] RDMA: Extend RDMA kernel verbs ABI to support flush This commit extends the RDMA kernel verbs ABI to support the flush operation defined in IBA A19.4.1. These changes are backward compatible with the existing RDMA kernel verbs ABI. It makes device/HCA support new FLUSH attributes/capabilities, and it also makes memory region support new FLUSH access flags. Users can use ibv_reg_mr(3) to register flush access flags. Only the access flags also supported by device's capabilities can be registered successfully. Once registered successfully, it means the MR is flushable. Similarly, A flushable MR should also have one or both of GLOBAL_VISIBILITY and PERSISTENT attributes/capabilities like device/HCA. Link: https://lore.kernel.org/r/20221206130201.30986-3-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- include/rdma/ib_pack.h | 3 +++ include/rdma/ib_verbs.h | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index f932d164af63..b8c56d7dc35d 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -84,6 +84,7 @@ enum { /* opcode 0x15 is reserved */ IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, + IB_OPCODE_FLUSH = 0x1C, IB_OPCODE_ATOMIC_WRITE = 0x1D, /* real constants follow -- see comment about above IB_OPCODE() @@ -113,6 +114,7 @@ enum { IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), + IB_OPCODE(RC, FLUSH), IB_OPCODE(RC, ATOMIC_WRITE), /* UC */ @@ -151,6 +153,7 @@ enum { IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RD, COMPARE_SWAP), IB_OPCODE(RD, FETCH_ADD), + IB_OPCODE(RD, FLUSH), /* UD */ IB_OPCODE(UD, SEND_ONLY), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index df6bb26ba0be..a9a429172c0a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -270,6 +270,9 @@ enum ib_device_cap_flags { /* The device supports padding incoming writes to cacheline. */ IB_DEVICE_PCI_WRITE_END_PADDING = IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, + /* Placement type attributes */ + IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL, + IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT, IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE, }; @@ -987,6 +990,7 @@ enum ib_wc_opcode { IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, + IB_WC_FLUSH = IB_UVERBS_WC_FLUSH, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). @@ -1327,6 +1331,7 @@ enum ib_wr_opcode { IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_FLUSH = IB_UVERBS_WR_FLUSH, IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE, /* These are kernel only and can not be issued by userspace */ @@ -1461,10 +1466,12 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, + IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL, + IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_SUPPORTED = - ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, + ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -4325,6 +4332,8 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(struct ib_device *ib_dev, unsigned int flags) { + u64 device_cap = ib_dev->attrs.device_cap_flags; + /* * Local write permission is required if remote write or * remote atomic permission is also requested. @@ -4339,6 +4348,13 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev, if (flags & IB_ACCESS_ON_DEMAND && !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) return -EOPNOTSUPP; + + if ((flags & IB_ACCESS_FLUSH_GLOBAL && + !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) || + (flags & IB_ACCESS_FLUSH_PERSISTENT && + !(device_cap & IB_DEVICE_FLUSH_PERSISTENT))) + return -EOPNOTSUPP; + return 0; } From 668ce52d5eef477c0def757610768a1a3ccc9785 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:54 +0800 Subject: [PATCH 3598/4122] RDMA/rxe: Extend rxe user ABI to support flush This commit extends the rxe user ABI to support the flush operation defined in IBA A19.4.1. These changes are backward compatible with the existing rxe user ABI. The user API request a flush by filling this structure. Link: https://lore.kernel.org/r/20221206130201.30986-4-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_rxe.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index d20d1ecf046f..bb092fccb813 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -82,6 +82,13 @@ struct rxe_send_wr { __u32 invalidate_rkey; } ex; union { + struct { + __aligned_u64 remote_addr; + __u32 length; + __u32 rkey; + __u8 type; + __u8 level; + } flush; struct { __aligned_u64 remote_addr; __u32 rkey; From 02ea0a511558c907bde0e01fdebcd4536924d996 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:55 +0800 Subject: [PATCH 3599/4122] RDMA/rxe: Allow registering persistent flag for pmem MR only Memory region could support at most 2 flush access flags: IB_ACCESS_FLUSH_PERSISTENT and IB_ACCESS_FLUSH_GLOBAL But we only allow user to register persistent flush flags to the pmem MR where it has the ability of persisting data across power cycles. So registering a persistent access flag to a non-pmem MR will be rejected. Link: https://lore.kernel.org/r/20221206130201.30986-5-lizhijian@fujitsu.com CC: Dan Williams Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index b7c9ff1ddf0e..81a438e5010a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -111,6 +111,15 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr) mr->ibmr.type = IB_MR_TYPE_DMA; } +static bool is_pmem_page(struct page *pg) +{ + unsigned long paddr = page_to_phys(pg); + + return REGION_INTERSECTS == + region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM, + IORES_DESC_PERSISTENT_MEMORY); +} + int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { @@ -146,16 +155,25 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, num_buf = 0; map = mr->map; if (length > 0) { - buf = map[0]->buf; + bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT; + buf = map[0]->buf; for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { + struct page *pg = sg_page_iter_page(&sg_iter); + + if (persistent_access && !is_pmem_page(pg)) { + rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n"); + err = -EINVAL; + goto err_release_umem; + } + if (num_buf >= RXE_BUF_PER_MAP) { map++; buf = map[0]->buf; num_buf = 0; } - vaddr = page_address(sg_page_iter_page(&sg_iter)); + vaddr = page_address(pg); if (!vaddr) { rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; From 02e9a31c897d17981508ceaac4430b93ff56ffc7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:56 +0800 Subject: [PATCH 3600/4122] RDMA/rxe: Extend rxe packet format to support flush Extend rxe opcode tables, headers, helper and constants to support flush operations. Refer to the IBA A19.4.1 for more FETH definition details Link: https://lore.kernel.org/r/20221206130201.30986-6-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_hdr.h | 47 ++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.c | 17 ++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.h | 14 +++++--- 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 804594b76040..46f82b27fcd2 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -607,6 +607,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } +/****************************************************************************** + * FLUSH Extended Transport Header + ******************************************************************************/ + +struct rxe_feth { + __be32 bits; +}; + +#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */ +#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */ +#define FETH_SEL_SHIFT (4U) + +static inline u32 __feth_plt(void *arg) +{ + struct rxe_feth *feth = arg; + + return be32_to_cpu(feth->bits) & FETH_PLT_MASK; +} + +static inline u32 __feth_sel(void *arg) +{ + struct rxe_feth *feth = arg; + + return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT; +} + +static inline u32 feth_plt(struct rxe_pkt_info *pkt) +{ + return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline u32 feth_sel(struct rxe_pkt_info *pkt) +{ + return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level) +{ + struct rxe_feth *feth = (struct rxe_feth *) + (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); + u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) | + (type & FETH_PLT_MASK); + + feth->bits = cpu_to_be32(bits); +} + /****************************************************************************** * Atomic Extended Transport Header ******************************************************************************/ @@ -909,6 +955,7 @@ enum rxe_hdr_length { RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), RXE_IETH_BYTES = sizeof(struct rxe_ieth), RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), + RXE_FETH_BYTES = sizeof(struct rxe_feth), }; static inline size_t header_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index fb196029048e..5c0d5c6ffda4 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, + [IB_WR_FLUSH] = { + .name = "IB_WR_FLUSH", + .mask = { + [IB_QPT_RC] = WR_FLUSH_MASK, + }, + }, [IB_WR_ATOMIC_WRITE] = { .name = "IB_WR_ATOMIC_WRITE", .mask = { @@ -384,6 +390,17 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { RXE_IETH_BYTES, } }, + [IB_OPCODE_RC_FLUSH] = { + .name = "IB_OPCODE_RC_FLUSH", + .mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK | + RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK, + .length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_FETH] = RXE_BTH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES, + } + }, [IB_OPCODE_RC_ATOMIC_WRITE] = { .name = "IB_OPCODE_RC_ATOMIC_WRITE", .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index a470e9b0b884..cea4e0a63919 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -20,6 +20,7 @@ enum rxe_wr_mask { WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), WR_LOCAL_OP_MASK = BIT(5), + WR_FLUSH_MASK = BIT(6), WR_ATOMIC_WRITE_MASK = BIT(7), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, @@ -48,6 +49,7 @@ enum rxe_hdr_type { RXE_RDETH, RXE_DETH, RXE_IMMDT, + RXE_FETH, RXE_PAYLOAD, NUM_HDR_TYPES }; @@ -64,6 +66,7 @@ enum rxe_hdr_mask { RXE_IETH_MASK = BIT(RXE_IETH), RXE_RDETH_MASK = BIT(RXE_RDETH), RXE_DETH_MASK = BIT(RXE_DETH), + RXE_FETH_MASK = BIT(RXE_FETH), RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), @@ -72,13 +75,14 @@ enum rxe_hdr_mask { RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), - RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_START_MASK = BIT(NUM_HDR_TYPES + 9), + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_END_MASK = BIT(NUM_HDR_TYPES + 11), RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), From fa1fd682ad3ef35b0e532c3bb14140786d17527c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:57 +0800 Subject: [PATCH 3601/4122] RDMA/rxe: Implement RC RDMA FLUSH service in requester side Implement FLUSH request operation in the requester. Link: https://lore.kernel.org/r/20221206130201.30986-7-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2713e9058922..899c8779f800 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : IB_OPCODE_RC_SEND_FIRST; + case IB_WR_FLUSH: + return IB_OPCODE_RC_FLUSH; + case IB_WR_RDMA_READ: return IB_OPCODE_RC_RDMA_READ_REQUEST; @@ -425,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init optional headers */ if (pkt->mask & RXE_RETH_MASK) { - reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + if (pkt->mask & RXE_FETH_MASK) + reth_set_rkey(pkt, ibwr->wr.flush.rkey); + else + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); reth_set_len(pkt, wqe->dma.resid); } + /* Fill Flush Extension Transport Header */ + if (pkt->mask & RXE_FETH_MASK) + feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level); + if (pkt->mask & RXE_IMMDT_MASK) immdt_set_imm(pkt, ibwr->ex.imm_data); @@ -488,6 +498,9 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, memset(pad, 0, bth_pad(pkt)); } + } else if (pkt->mask & RXE_FLUSH_MASK) { + /* oA19-2: shall have no payload. */ + wqe->dma.resid = 0; } if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { From ea1bb00ee9a5527b032a6efebe4a879db4cb42bb Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:58 +0800 Subject: [PATCH 3602/4122] RDMA/rxe: Implement flush execution in responder side Only the requested placement types that also registered in the destination memory region are acceptable. Otherwise, responder will also reply NAK "Remote Access Error" if it found a placement type violation. We will persist data via arch_wb_cache_pmem(), which could be architecture specific. This commit also adds 2 helpers to update qp.resp from the incoming packet. Link: https://lore.kernel.org/r/20221206130201.30986-8-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 36 ++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 160 ++++++++++++++++++++++---- drivers/infiniband/sw/rxe/rxe_verbs.h | 6 + 4 files changed, 183 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a22476d27b38..948ce4902b10 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -64,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 81a438e5010a..072eac4b65d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -4,6 +4,8 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +#include + #include "rxe.h" #include "rxe_loc.h" @@ -192,6 +194,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->ibmr.type = IB_MR_TYPE_USER; + mr->ibmr.page_size = PAGE_SIZE; return 0; @@ -295,6 +298,39 @@ out: return addr; } +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length) +{ + size_t offset; + + if (length == 0) + return 0; + + if (mr->ibmr.type == IB_MR_TYPE_DMA) + return -EFAULT; + + offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask; + while (length > 0) { + u8 *va; + int bytes; + + bytes = mr->ibmr.page_size - offset; + if (bytes > length) + bytes = length; + + va = iova_to_vaddr(mr, iova, length); + if (!va) + return -EFAULT; + + arch_wb_cache_pmem(va, bytes); + + length -= bytes; + iova += bytes; + offset = 0; + } + + return 0; +} + /* copy data from a range (vaddr, vaddr+length-1) to or from * a mr object starting at iova. */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index abbaa41017e8..7a60c7709da0 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -23,6 +23,7 @@ enum resp_states { RESPST_READ_REPLY, RESPST_ATOMIC_REPLY, RESPST_ATOMIC_WRITE_REPLY, + RESPST_PROCESS_FLUSH, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -59,6 +60,7 @@ static char *resp_state_name[] = { [RESPST_READ_REPLY] = "READ_REPLY", [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", + [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -258,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp, } } +static bool check_qp_attr_access(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + return false; + + if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if ((flush_type & IB_FLUSH_GLOBAL && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || + (flush_type & IB_FLUSH_PERSISTENT && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) + return false; + } + + return true; +} + static enum resp_states check_op_valid(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: - if (((pkt->mask & RXE_READ_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || - ((pkt->mask & RXE_ATOMIC_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + if (!check_qp_attr_access(qp, pkt)) return RESPST_ERR_UNSUPPORTED_OPCODE; - } break; @@ -437,6 +457,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, return RESPST_CHK_RKEY; } +static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = reth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + qp->resp.length = reth_len(pkt); +} + +static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = atmeth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); +} + static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -448,23 +485,26 @@ static enum resp_states check_rkey(struct rxe_qp *qp, u32 pktlen; int mtu = qp->mtu; enum resp_states state; - int access; + int access = 0; if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { - if (pkt->mask & RXE_RETH_MASK) { - qp->resp.va = reth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = reth_rkey(pkt); - qp->resp.resid = reth_len(pkt); - qp->resp.length = reth_len(pkt); - } + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + + if (flush_type & IB_FLUSH_GLOBAL) + access |= IB_ACCESS_FLUSH_GLOBAL; + if (flush_type & IB_FLUSH_PERSISTENT) + access |= IB_ACCESS_FLUSH_PERSISTENT; } else if (pkt->mask & RXE_ATOMIC_MASK) { - qp->resp.va = atmeth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = atmeth_rkey(pkt); - qp->resp.resid = sizeof(u64); + qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { return RESPST_EXECUTE; @@ -511,11 +551,20 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } } + if (pkt->mask & RXE_FLUSH_MASK) { + /* FLUSH MR may not set va or resid + * no need to check range since we will flush whole mr + */ + if (feth_sel(pkt) == IB_FLUSH_MR) + goto skip_check_range; + } + if (mr_check_range(mr, va + qp->resp.offset, resid)) { state = RESPST_ERR_RKEY_VIOLATION; goto err; } +skip_check_range: if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { @@ -621,11 +670,61 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, res->last_psn = pkt->psn; res->cur_psn = pkt->psn; break; + case RXE_FLUSH_MASK: + res->flush.va = qp->resp.va + qp->resp.offset; + res->flush.length = qp->resp.length; + res->flush.type = feth_plt(pkt); + res->flush.level = feth_sel(pkt); } return res; } +static enum resp_states process_flush(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 length, start; + struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + + /* oA19-14, oA19-15 */ + if (res && res->replay) + return RESPST_ACKNOWLEDGE; + else if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); + qp->resp.res = res; + } + + if (res->flush.level == IB_FLUSH_RANGE) { + start = res->flush.va; + length = res->flush.length; + } else { /* level == IB_FLUSH_MR */ + start = mr->ibmr.iova; + length = mr->ibmr.length; + } + + if (res->flush.type & IB_FLUSH_PERSISTENT) { + if (rxe_flush_pmem_iova(mr, start, length)) + return RESPST_ERR_RKEY_VIOLATION; + /* Make data persistent. */ + wmb(); + } else if (res->flush.type & IB_FLUSH_GLOBAL) { + /* Make data global visibility. */ + wmb(); + } + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +} + /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); @@ -980,6 +1079,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ATOMIC_REPLY; } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { return RESPST_ATOMIC_WRITE_REPLY; + } else if (pkt->mask & RXE_FLUSH_MASK) { + return RESPST_PROCESS_FLUSH; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1176,7 +1277,7 @@ static enum resp_states acknowledge(struct rxe_qp *qp, send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); - else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) + else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); @@ -1234,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* SEND. Ack again and cleanup. C9-105. */ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); return RESPST_CLEANUP; + } else if (pkt->mask & RXE_FLUSH_MASK) { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_PROCESS_FLUSH; + goto out; + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1431,6 +1548,9 @@ int rxe_responder(void *arg) case RESPST_ATOMIC_WRITE_REPLY: state = atomic_write_reply(qp, pkt); break; + case RESPST_PROCESS_FLUSH: + state = process_flush(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 22a299b0a9f0..19ddfa890480 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -165,6 +165,12 @@ struct resp_res { u64 va; u32 resid; } read; + struct { + u32 length; + u64 va; + u8 type; + u8 level; + } flush; }; }; From 70aad902ce8aeb094dd3ef14988a652f24cce7c8 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:59 +0800 Subject: [PATCH 3603/4122] RDMA/rxe: Implement flush completion Per IBA SPEC, FLUSH will ack in rdma read response with 0 length. Use IB_WC_FLUSH (aka IB_UVERBS_WC_FLUSH) code to tell userspace a FLUSH completion. Link: https://lore.kernel.org/r/20221206130201.30986-9-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 1c525325e271..20737fec392b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -105,6 +105,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -278,7 +279,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } From 8b4d379b399d19f4c803e565bfe13f07b66b5ad7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:02:00 +0800 Subject: [PATCH 3604/4122] RDMA/cm: Make QP FLUSHABLE for supported device Similar to RDMA and Atomic qp attributes enabled by default in CM, enable FLUSH attribute for supported device. That makes applications that are built with rdma_create_ep, rdma_accept APIs have FLUSH qp attribute natively so that user is able to request FLUSH operation simpler. Note that, a FLUSH operation requires FLUSH are supported by both device(HCA) and memory region(MR) and QP at the same time, so it's safe to enable FLUSH qp attribute by default here. FLUSH attribute can be disable by modify_qp() interface. Link: https://lore.kernel.org/r/20221206130201.30986-10-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1f9938a2c475..603c0aecc361 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - if (cm_id_priv->responder_resources) + if (cm_id_priv->responder_resources) { + struct ib_device *ib_dev = cm_id_priv->id.device; + u64 support_flush = ib_dev->attrs.device_cap_flags & + (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT); + u32 flushable = support_flush ? + (IB_ACCESS_FLUSH_GLOBAL | + IB_ACCESS_FLUSH_PERSISTENT) : 0; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_ATOMIC; + IB_ACCESS_REMOTE_ATOMIC | + flushable; + } qp_attr->pkey_index = cm_id_priv->av.pkey_index; if (cm_id_priv->av.port) qp_attr->port_num = cm_id_priv->av.port->port_num; From 124011e6e933bead5852c3f69b32dec43919fe1a Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:02:01 +0800 Subject: [PATCH 3605/4122] RDMA/rxe: Enable RDMA FLUSH capability for rxe device Now we are ready to enable RDMA FLUSH capability for RXE. It can support Global Visibility and Persistence placement types. Link: https://lore.kernel.org/r/20221206130201.30986-11-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index bbc88cd71d95..a754fc902e3d 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -51,6 +51,8 @@ enum rxe_device_param { | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW + | IB_DEVICE_FLUSH_GLOBAL + | IB_DEVICE_FLUSH_PERSISTENT #ifdef CONFIG_64BIT | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_ATOMIC_WRITE, From bd328def2f987ebd4e20725a490f005556d737bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:09:59 +0100 Subject: [PATCH 3606/4122] firmware_loader: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit utsrelease.h is potentially generated on each build. By removing this unused include we can get rid of some spurious recompilations. Signed-off-by: Thomas Weißschuh Reviewed-by: Russ Weight Signed-off-by: Masahiro Yamada --- drivers/base/firmware_loader/firmware.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index fe77e91c38a2..bf549d6500d7 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -9,8 +9,6 @@ #include #include -#include - /** * enum fw_opt - options to control firmware loading behaviour * From 9edb4fd3d70a9dffd8ac6af6d060e97672b4a22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:10:01 +0100 Subject: [PATCH 3607/4122] init/version.c: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2df8220cc511 ("kbuild: build init/built-in.a just once") moved the usage of the define UTS_RELEASE to the file version-timestamp.c. version-timestamp.c in turn is included from version.c but already includes utsrelease.h itself properly. The unneeded include of utsrelease.h from version.c can be dropped. Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once") Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- init/version.c | 1 - 1 file changed, 1 deletion(-) diff --git a/init/version.c b/init/version.c index 01d4ab05f0ba..f117921811b4 100644 --- a/init/version.c +++ b/init/version.c @@ -15,7 +15,6 @@ #include #include #include -#include #include static int __init early_hostname(char *arg) From 4bf73588165ba7d32131a043775557a54b6e1db5 Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Mon, 5 Dec 2022 16:48:19 -0500 Subject: [PATCH 3608/4122] kbuild: Port silent mode detection to future gnu make. Port silent mode detection to the future (post make-4.4) versions of gnu make. Makefile contains the following piece of make code to detect if option -s is specified on the command line. ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),) This code is executed by make at parse time and assumes that MAKEFLAGS does not contain command line variable definitions. Currently if the user defines a=s on the command line, then at build only time MAKEFLAGS contains " -- a=s". However, starting with commit dc2d963989b96161472b2cd38cef5d1f4851ea34 MAKEFLAGS contains command line definitions at both parse time and build time. This '-s' detection code then confuses a command line variable definition which contains letter 's' with option -s. $ # old make $ make net/wireless/ocb.o a=s CALL scripts/checksyscalls.sh DESCEND objtool $ # this a new make which defines makeflags at parse time $ ~/src/gmake/make/l64/make net/wireless/ocb.o a=s $ We can see here that the letter 's' from 'a=s' was confused with -s. This patch checks for presence of -s using a method recommended by the make manual here https://www.gnu.org/software/make/manual/make.html#Testing-Flags. Link: https://lists.gnu.org/archive/html/bug-make/2022-11/msg00190.html Reported-by: Jan Palus Signed-off-by: Dmitry Goncharov Signed-off-by: Masahiro Yamada --- Makefile | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6f846b1f2618..fbd9ff4a61e7 100644 --- a/Makefile +++ b/Makefile @@ -93,10 +93,17 @@ endif # If the user is running make -s (silent mode), suppress echoing of # commands +# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. -ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),) - quiet=silent_ - KBUILD_VERBOSE = 0 +ifeq ($(filter 3.%,$(MAKE_VERSION)),) +silence:=$(findstring s,$(firstword -$(MAKEFLAGS))) +else +silence:=$(findstring s,$(filter-out --%,$(MAKEFLAGS))) +endif + +ifeq ($(silence),s) +quiet=silent_ +KBUILD_VERBOSE = 0 endif export quiet Q KBUILD_VERBOSE From 2ba8c7dc71c098935977528747b82ffae43f3f18 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:50 +0100 Subject: [PATCH 3609/4122] riscv: Don't duplicate __ALTERNATIVE_CFG in __ALTERNATIVE_CFG_2 Build __ALTERNATIVE_CFG_2 by adding on to __ALTERNATIVE_CFG rather than duplicating it. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index ec2f3f1b836f..c7036166af2c 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -49,14 +49,7 @@ .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ new_c_2, vendor_id_2, errata_id_2, enable_2 -886 : - .option push - .option norvc - .option norelax - \old_c - .option pop -887 : - ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 + __ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm @@ -116,14 +109,7 @@ enable_1, \ new_c_2, vendor_id_2, errata_id_2, \ enable_2) \ - "886 :\n" \ - ".option push\n" \ - ".option norvc\n" \ - ".option norelax\n" \ - old_c "\n" \ - ".option pop\n" \ - "887 :\n" \ - ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ From 7d52eace1bf5c55704bb0ca5dc8f2489927683ff Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:51 +0100 Subject: [PATCH 3610/4122] riscv: alternatives: Don't name unused macro parameters Without CONFIG_RISCV_ALTERNATIVE only the first parameter of the ALTERNATIVE macros is needed. Use ... for the rest to cut down on clutter. While there, fix a couple space vs. tab issues. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-3-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index c7036166af2c..7cc2b587c5c4 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -130,28 +130,22 @@ \old_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ +#define _ALTERNATIVE_CFG(old_c, ...) \ __ALTERNATIVE_CFG old_c -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + __ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -#define __ALTERNATIVE_CFG(old_c) \ +#define __ALTERNATIVE_CFG(old_c) \ old_c "\n" -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ +#define _ALTERNATIVE_CFG(old_c, ...) \ __ALTERNATIVE_CFG(old_c) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + __ALTERNATIVE_CFG(old_c) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_RISCV_ALTERNATIVE */ From bb2efcde594628ae08ee6e4be51b2047df9d2d06 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:52 +0100 Subject: [PATCH 3611/4122] riscv: alternatives: Drop the underscores from the assembly macro names The underscores aren't needed because there isn't anything already named without them and the _CFG extension. This is a bit of a cleanup by itself, but the real motivation is for a coming patch which would otherwise need to add two more underscores to these macro names, i.e. ____ALTERNATIVE_CFG, and that'd be gross. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-4-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 7cc2b587c5c4..9ea95331a280 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -33,7 +33,7 @@ .endif .endm -.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable +.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable 886 : .option push .option norvc @@ -45,11 +45,11 @@ .endm #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) + ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) -.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - new_c_2, vendor_id_2, errata_id_2, enable_2 - __ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 +.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2 + ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm @@ -57,9 +57,9 @@ CONFIG_k_1, \ new_c_2, vendor_id_2, errata_id_2, \ CONFIG_k_2) \ - __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ + ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ + new_c_2, vendor_id_2, errata_id_2, \ IS_ENABLED(CONFIG_k_2) #else /* !__ASSEMBLY__ */ @@ -126,15 +126,15 @@ #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ -.macro __ALTERNATIVE_CFG old_c +.macro ALTERNATIVE_CFG old_c \old_c .endm #define _ALTERNATIVE_CFG(old_c, ...) \ - __ALTERNATIVE_CFG old_c + ALTERNATIVE_CFG old_c #define _ALTERNATIVE_CFG_2(old_c, ...) \ - __ALTERNATIVE_CFG old_c + ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ From 26fb4b90b745a808e94a81dc732d440c285fa74b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:53 +0100 Subject: [PATCH 3612/4122] riscv: Don't duplicate _ALTERNATIVE_CFG* macros Reduce clutter by only defining the _ALTERNATIVE_CFG* macros once, rather than once for assembly and once for C. To do that, we need to add __ALTERNATIVE_CFG* macros to the assembly side, but those are one-liners. Also take the opportunity to do a bit of reformatting, taking full advantage of the fact checkpatch gives us 100 char lines. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-5-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 53 +++++++-------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 9ea95331a280..7226e2462584 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -44,23 +44,14 @@ ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) - .macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ new_c_2, vendor_id_2, errata_id_2, enable_2 ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2) +#define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__ +#define __ALTERNATIVE_CFG_2(...) ALTERNATIVE_CFG_2 __VA_ARGS__ #else /* !__ASSEMBLY__ */ @@ -102,26 +93,20 @@ "887 :\n" \ ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) +#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ + ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) + +#endif /* __ASSEMBLY__ */ + #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) -#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - enable_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - enable_2) \ - __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ - ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) - -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2)) - -#endif /* __ASSEMBLY__ */ +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2)) #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ @@ -173,13 +158,9 @@ * on the following sample code and then replace ALTERNATIVE() with * ALTERNATIVE_2() to append its customized content. */ -#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) \ - _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) +#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) #endif From 726855549cf8d5c6b05795cf74a9c23584f45544 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:45 +0100 Subject: [PATCH 3613/4122] RISC-V: Improve use of isa2hwcap[] Improve isa2hwcap[] by removing it from static storage, as riscv_fill_hwcap() is only called once, and by reducing its size from 256 bytes to 26. The latter improvement is possible because isa2hwcap[] will never be indexed with capital letters and we can precompute the offsets from 'a'. No functional change intended. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 694267d1fe81..4677320d7e31 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -74,15 +74,15 @@ void __init riscv_fill_hwcap(void) const char *isa; char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; - static unsigned long isa2hwcap[256] = {0}; + unsigned long isa2hwcap[26] = {0}; unsigned long hartid; - isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; - isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; - isa2hwcap['a'] = isa2hwcap['A'] = COMPAT_HWCAP_ISA_A; - isa2hwcap['f'] = isa2hwcap['F'] = COMPAT_HWCAP_ISA_F; - isa2hwcap['d'] = isa2hwcap['D'] = COMPAT_HWCAP_ISA_D; - isa2hwcap['c'] = isa2hwcap['C'] = COMPAT_HWCAP_ISA_C; + isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; + isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; + isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A; + isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; + isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; + isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; elf_hwcap = 0; @@ -196,8 +196,10 @@ void __init riscv_fill_hwcap(void) if (unlikely(ext_err)) continue; if (!ext_long) { - this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; - set_bit(*ext - 'a', this_isa); + int nr = *ext - 'a'; + + this_hwcap |= isa2hwcap[nr]; + set_bit(nr, this_isa); } else { SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); From fb0ff0a95d61f69415cb8d8f2d921e1f7eed75af Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:46 +0100 Subject: [PATCH 3614/4122] RISC-V: Introduce riscv_isa_extension_check Currently any isa extension found in the isa string is set in the isa bitmap. An isa extension set in the bitmap indicates that the extension is present and may be used (a.k.a is enabled). However, when an extension cannot be used due to missing dependencies or errata it should not be added to the bitmap. Introduce a function where additional checks may be placed in order to determine if an extension should be enabled or not. Note, the checks may simply indicate an issue with the DT, but, since extensions may be used in early boot, it's not always possible to simply produce an error at the point the issue is determined. It's best to keep the extension disabled and produce an error. No functional change intended, as the function is only introduced and always returns true. A later patch will provide checks for an isa extension. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-3-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 4677320d7e31..220be7222129 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -68,6 +68,11 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static bool riscv_isa_extension_check(int id) +{ + return true; +} + void __init riscv_fill_hwcap(void) { struct device_node *node; @@ -189,7 +194,8 @@ void __init riscv_fill_hwcap(void) #define SET_ISA_EXT_MAP(name, bit) \ do { \ if ((ext_end - ext == sizeof(name) - 1) && \ - !memcmp(ext, name, sizeof(name) - 1)) \ + !memcmp(ext, name, sizeof(name) - 1) && \ + riscv_isa_extension_check(bit)) \ set_bit(bit, this_isa); \ } while (false) \ @@ -198,8 +204,10 @@ void __init riscv_fill_hwcap(void) if (!ext_long) { int nr = *ext - 'a'; - this_hwcap |= isa2hwcap[nr]; - set_bit(nr, this_isa); + if (riscv_isa_extension_check(nr)) { + this_hwcap |= isa2hwcap[nr]; + set_bit(nr, this_isa); + } } else { SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); From 9daaca4a44d6f0741060e67c54a0175c035edb1f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:47 +0100 Subject: [PATCH 3615/4122] RISC-V: Ensure Zicbom has a valid block size When a DT puts zicbom in the isa string, but does not provide a block size, ALT_CMO_OP() will attempt to do cache operations on address zero since the start address will be ANDed with zero. We can't simply BUG() in riscv_init_cbom_blocksize() when we fail to find a block size because the failure will happen before logging works, leaving users to scratch their heads as to why the boot hung. Instead, ensure Zicbom is disabled and output an error which will hopefully alert people that the DT needs to be fixed. While at it, add a check that the block size is a power-of-2 too. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-4-ajones@ventanamicro.com [Palmer: base on 5c20a3a9df19 ("RISC-V: Fix compilation without RISCV_ISA_ZICBOM"] Reported-by: kernel test robot Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 220be7222129..93e45560af30 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,18 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); static bool riscv_isa_extension_check(int id) { + switch (id) { + case RISCV_ISA_EXT_ZICBOM: + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n"); + return false; + } else if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("cbom-block-size present, but is not a power-of-2\n"); + return false; + } + return true; + } + return true; } From 5f2e094ed2592abb3d1f49e263957188e121c2ac Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 20 Oct 2022 00:31:55 +0900 Subject: [PATCH 3616/4122] tracing: Allow multiple hitcount values in histograms The hitcount is treated specially in the histograms - since it's always expected to be there regardless of whether the user specified anything or not, it's always added as the first histogram value. Currently the code doesn't allow it to be added more than once as a value, which is inconsistent with all the other possible values. It would seem to be a pointless thing to want to do, but other features being added such as percent and graph modifiers don't work properly with the current hitcount restrictions. Fix this by allowing multiple hitcounts to be added. Link: https://lore.kernel.org/linux-trace-kernel/166610812248.56030.16754785928712505251.stgit@devnote2 Signed-off-by: Tom Zanussi Signed-off-by: Masami Hiramatsu (Google) Tested-by: Tom Zanussi --- kernel/trace/trace_events_hist.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1c82478e8dff..31d58ddcc1d9 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1356,6 +1356,8 @@ static const char *hist_field_name(struct hist_field *field, field_name = field->name; } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) field_name = "common_timestamp"; + else if (field->flags & HIST_FIELD_FL_HITCOUNT) + field_name = "hitcount"; if (field_name == NULL) field_name = ""; @@ -2328,6 +2330,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, hist_data->attrs->ts_in_usecs = true; } else if (strcmp(field_name, "common_cpu") == 0) *flags |= HIST_FIELD_FL_CPU; + else if (strcmp(field_name, "hitcount") == 0) + *flags |= HIST_FIELD_FL_HITCOUNT; else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { @@ -4328,8 +4332,8 @@ static int create_var_field(struct hist_trigger_data *hist_data, static int create_val_fields(struct hist_trigger_data *hist_data, struct trace_event_file *file) { + unsigned int i, j = 1, n_hitcount = 0; char *fields_str, *field_str; - unsigned int i, j = 1; int ret; ret = create_hitcount_val(hist_data); @@ -4346,8 +4350,10 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (!field_str) break; - if (strcmp(field_str, "hitcount") == 0) - continue; + if (strcmp(field_str, "hitcount") == 0) { + if (!n_hitcount++) + continue; + } ret = create_val_field(hist_data, j++, file, field_str); if (ret) From abaa5258ce5e5887a9de049f50a85dc023391a1c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 20 Oct 2022 00:31:55 +0900 Subject: [PATCH 3617/4122] tracing: Add .percent suffix option to histogram values Add .percent suffix option to show the histogram values in percentage. This feature is useful when we need yo undersntand the overall trend for the histograms of large values. E.g. this shows the runtime percentage for each tasks. ------ # cd /sys/kernel/debug/tracing/ # echo hist:keys=pid:vals=hitcount,runtime.percent:sort=pid > \ events/sched/sched_stat_runtime/trigger # sleep 10 # cat events/sched/sched_stat_runtime/hist # event histogram # # trigger info: hist:keys=pid:vals=hitcount,runtime.percent:sort=pid:size=2048 [active] # { pid: 8 } hitcount: 7 runtime (%): 4.14 { pid: 14 } hitcount: 5 runtime (%): 3.69 { pid: 16 } hitcount: 11 runtime (%): 3.41 { pid: 61 } hitcount: 41 runtime (%): 19.75 { pid: 65 } hitcount: 4 runtime (%): 1.48 { pid: 70 } hitcount: 6 runtime (%): 3.60 { pid: 72 } hitcount: 2 runtime (%): 1.10 { pid: 144 } hitcount: 10 runtime (%): 32.01 { pid: 151 } hitcount: 8 runtime (%): 22.66 { pid: 152 } hitcount: 2 runtime (%): 8.10 Totals: Hits: 96 Entries: 10 Dropped: 0 ----- Link: https://lore.kernel.org/linux-trace-kernel/166610813077.56030.4238090506973562347.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi --- kernel/trace/trace.c | 3 +- kernel/trace/trace_events_hist.c | 90 +++++++++++++++++++++++++++----- 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 93a75a97118f..08e9568849b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5724,7 +5724,8 @@ static const char readme_msg[] = "\t .syscall display a syscall id as a syscall name\n" "\t .log2 display log2 value rather than raw number\n" "\t .buckets=size display values in groups of size rather than raw number\n" - "\t .usecs display a common_timestamp in microseconds\n\n" + "\t .usecs display a common_timestamp in microseconds\n" + "\t .percent display a number of percentage value\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" "\t until told to do so. 'continue' can be used to start or\n" diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 31d58ddcc1d9..35b0e956f06e 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -506,6 +506,7 @@ enum hist_field_flags { HIST_FIELD_FL_ALIAS = 1 << 16, HIST_FIELD_FL_BUCKET = 1 << 17, HIST_FIELD_FL_CONST = 1 << 18, + HIST_FIELD_FL_PERCENT = 1 << 19, }; struct var_defs { @@ -1707,6 +1708,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field) flags_str = "buckets"; else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) flags_str = "usecs"; + else if (hist_field->flags & HIST_FIELD_FL_PERCENT) + flags_str = "percent"; return flags_str; } @@ -2315,6 +2318,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, if (ret || !(*buckets)) goto error; *flags |= HIST_FIELD_FL_BUCKET; + } else if (strncmp(modifier, "percent", 7) == 0) { + if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY)) + goto error; + *flags |= HIST_FIELD_FL_PERCENT; } else { error: hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); @@ -5291,33 +5298,69 @@ static void hist_trigger_print_key(struct seq_file *m, seq_puts(m, "}"); } +/* Get the 100 times of the percentage of @val in @total */ +static inline unsigned int __get_percentage(u64 val, u64 total) +{ + if (!total) + goto div0; + + if (val < (U64_MAX / 10000)) + return (unsigned int)div64_ul(val * 10000, total); + + total = div64_u64(total, 10000); + if (!total) + goto div0; + + return (unsigned int)div64_ul(val, total); +div0: + return val ? UINT_MAX : 0; +} + +static void hist_trigger_print_val(struct seq_file *m, unsigned int idx, + const char *field_name, unsigned long flags, + u64 *totals, struct tracing_map_elt *elt) +{ + u64 val = tracing_map_read_sum(elt, idx); + unsigned int pc; + + if (flags & HIST_FIELD_FL_PERCENT) { + pc = __get_percentage(val, totals[idx]); + if (pc == UINT_MAX) + seq_printf(m, " %s (%%):[ERROR]", field_name); + else + seq_printf(m, " %s (%%): %3u.%02u", field_name, + pc / 100, pc % 100); + } else if (flags & HIST_FIELD_FL_HEX) { + seq_printf(m, " %s: %10llx", field_name, val); + } else { + seq_printf(m, " %s: %10llu", field_name, val); + } +} + static void hist_trigger_entry_print(struct seq_file *m, struct hist_trigger_data *hist_data, + u64 *totals, void *key, struct tracing_map_elt *elt) { const char *field_name; - unsigned int i; + unsigned int i = HITCOUNT_IDX; + unsigned long flags; hist_trigger_print_key(m, hist_data, key, elt); - seq_printf(m, " hitcount: %10llu", - tracing_map_read_sum(elt, HITCOUNT_IDX)); + /* At first, show the raw hitcount always */ + hist_trigger_print_val(m, i, "hitcount", 0, totals, elt); for (i = 1; i < hist_data->n_vals; i++) { field_name = hist_field_name(hist_data->fields[i], 0); + flags = hist_data->fields[i]->flags; - if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || - hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR) + if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR) continue; - if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { - seq_printf(m, " %s: %10llx", field_name, - tracing_map_read_sum(elt, i)); - } else { - seq_printf(m, " %s: %10llu", field_name, - tracing_map_read_sum(elt, i)); - } + seq_puts(m, " "); + hist_trigger_print_val(m, i, field_name, flags, totals, elt); } print_actions(m, hist_data, elt); @@ -5330,7 +5373,8 @@ static int print_entries(struct seq_file *m, { struct tracing_map_sort_entry **sort_entries = NULL; struct tracing_map *map = hist_data->map; - int i, n_entries; + int i, j, n_entries; + u64 *totals = NULL; n_entries = tracing_map_sort_entries(map, hist_data->sort_keys, hist_data->n_sort_keys, @@ -5338,11 +5382,29 @@ static int print_entries(struct seq_file *m, if (n_entries < 0) return n_entries; + for (j = 0; j < hist_data->n_vals; j++) { + if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT)) + continue; + if (!totals) { + totals = kcalloc(hist_data->n_vals, sizeof(u64), + GFP_KERNEL); + if (!totals) { + n_entries = -ENOMEM; + goto out; + } + } + for (i = 0; i < n_entries; i++) + totals[j] += tracing_map_read_sum( + sort_entries[i]->elt, j); + } + for (i = 0; i < n_entries; i++) - hist_trigger_entry_print(m, hist_data, + hist_trigger_entry_print(m, hist_data, totals, sort_entries[i]->key, sort_entries[i]->elt); + kfree(totals); +out: tracing_map_destroy_sort_entries(sort_entries, n_entries); return n_entries; From a2c54256dec7510477e2b4f4db187e638f7cac37 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 20 Oct 2022 00:31:55 +0900 Subject: [PATCH 3618/4122] tracing: Add .graph suffix option to histogram value Add the .graph suffix which shows the bar graph of the histogram value. For example, the below example shows that the bar graph of the histogram of the runtime for each tasks. ------ # cd /sys/kernel/debug/tracing/ # echo hist:keys=pid:vals=runtime.graph:sort=pid > \ events/sched/sched_stat_runtime/trigger # sleep 10 # cat events/sched/sched_stat_runtime/hist # event histogram # # trigger info: hist:keys=pid:vals=hitcount,runtime.graph:sort=pid:size=2048 [active] # { pid: 14 } hitcount: 2 runtime: { pid: 16 } hitcount: 8 runtime: { pid: 26 } hitcount: 1 runtime: { pid: 57 } hitcount: 3 runtime: { pid: 61 } hitcount: 20 runtime: ### { pid: 66 } hitcount: 2 runtime: { pid: 70 } hitcount: 3 runtime: { pid: 72 } hitcount: 2 runtime: { pid: 145 } hitcount: 14 runtime: #################### { pid: 152 } hitcount: 5 runtime: ####### { pid: 153 } hitcount: 2 runtime: #### Totals: Hits: 62 Entries: 11 Dropped: 0 ------- Link: https://lore.kernel.org/linux-trace-kernel/166610813953.56030.10944148382315789485.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi --- kernel/trace/trace.c | 3 +- kernel/trace/trace_events_hist.c | 77 +++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 08e9568849b1..55aec4616d8b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5725,7 +5725,8 @@ static const char readme_msg[] = "\t .log2 display log2 value rather than raw number\n" "\t .buckets=size display values in groups of size rather than raw number\n" "\t .usecs display a common_timestamp in microseconds\n" - "\t .percent display a number of percentage value\n\n" + "\t .percent display a number of percentage value\n" + "\t .graph display a bar-graph of a value\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" "\t until told to do so. 'continue' can be used to start or\n" diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 35b0e956f06e..946b2b8f0f2c 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -507,6 +507,7 @@ enum hist_field_flags { HIST_FIELD_FL_BUCKET = 1 << 17, HIST_FIELD_FL_CONST = 1 << 18, HIST_FIELD_FL_PERCENT = 1 << 19, + HIST_FIELD_FL_GRAPH = 1 << 20, }; struct var_defs { @@ -1710,6 +1711,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field) flags_str = "usecs"; else if (hist_field->flags & HIST_FIELD_FL_PERCENT) flags_str = "percent"; + else if (hist_field->flags & HIST_FIELD_FL_GRAPH) + flags_str = "graph"; return flags_str; } @@ -2322,6 +2325,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY)) goto error; *flags |= HIST_FIELD_FL_PERCENT; + } else if (strncmp(modifier, "graph", 5) == 0) { + if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY)) + goto error; + *flags |= HIST_FIELD_FL_GRAPH; } else { error: hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); @@ -5316,20 +5323,52 @@ div0: return val ? UINT_MAX : 0; } +#define BAR_CHAR '#' + +static inline const char *__fill_bar_str(char *buf, int size, u64 val, u64 max) +{ + unsigned int len = __get_percentage(val, max); + int i; + + if (len == UINT_MAX) { + snprintf(buf, size, "[ERROR]"); + return buf; + } + + len = len * size / 10000; + for (i = 0; i < len && i < size; i++) + buf[i] = BAR_CHAR; + while (i < size) + buf[i++] = ' '; + buf[size] = '\0'; + + return buf; +} + +struct hist_val_stat { + u64 max; + u64 total; +}; + static void hist_trigger_print_val(struct seq_file *m, unsigned int idx, const char *field_name, unsigned long flags, - u64 *totals, struct tracing_map_elt *elt) + struct hist_val_stat *stats, + struct tracing_map_elt *elt) { u64 val = tracing_map_read_sum(elt, idx); unsigned int pc; + char bar[21]; if (flags & HIST_FIELD_FL_PERCENT) { - pc = __get_percentage(val, totals[idx]); + pc = __get_percentage(val, stats[idx].total); if (pc == UINT_MAX) seq_printf(m, " %s (%%):[ERROR]", field_name); else seq_printf(m, " %s (%%): %3u.%02u", field_name, pc / 100, pc % 100); + } else if (flags & HIST_FIELD_FL_GRAPH) { + seq_printf(m, " %s: %20s", field_name, + __fill_bar_str(bar, 20, val, stats[idx].max)); } else if (flags & HIST_FIELD_FL_HEX) { seq_printf(m, " %s: %10llx", field_name, val); } else { @@ -5339,7 +5378,7 @@ static void hist_trigger_print_val(struct seq_file *m, unsigned int idx, static void hist_trigger_entry_print(struct seq_file *m, struct hist_trigger_data *hist_data, - u64 *totals, + struct hist_val_stat *stats, void *key, struct tracing_map_elt *elt) { @@ -5350,7 +5389,7 @@ static void hist_trigger_entry_print(struct seq_file *m, hist_trigger_print_key(m, hist_data, key, elt); /* At first, show the raw hitcount always */ - hist_trigger_print_val(m, i, "hitcount", 0, totals, elt); + hist_trigger_print_val(m, i, "hitcount", 0, stats, elt); for (i = 1; i < hist_data->n_vals; i++) { field_name = hist_field_name(hist_data->fields[i], 0); @@ -5360,7 +5399,7 @@ static void hist_trigger_entry_print(struct seq_file *m, continue; seq_puts(m, " "); - hist_trigger_print_val(m, i, field_name, flags, totals, elt); + hist_trigger_print_val(m, i, field_name, flags, stats, elt); } print_actions(m, hist_data, elt); @@ -5374,7 +5413,8 @@ static int print_entries(struct seq_file *m, struct tracing_map_sort_entry **sort_entries = NULL; struct tracing_map *map = hist_data->map; int i, j, n_entries; - u64 *totals = NULL; + struct hist_val_stat *stats = NULL; + u64 val; n_entries = tracing_map_sort_entries(map, hist_data->sort_keys, hist_data->n_sort_keys, @@ -5382,28 +5422,33 @@ static int print_entries(struct seq_file *m, if (n_entries < 0) return n_entries; + /* Calculate the max and the total for each field if needed. */ for (j = 0; j < hist_data->n_vals; j++) { - if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT)) + if (!(hist_data->fields[j]->flags & + (HIST_FIELD_FL_PERCENT | HIST_FIELD_FL_GRAPH))) continue; - if (!totals) { - totals = kcalloc(hist_data->n_vals, sizeof(u64), - GFP_KERNEL); - if (!totals) { + if (!stats) { + stats = kcalloc(hist_data->n_vals, sizeof(*stats), + GFP_KERNEL); + if (!stats) { n_entries = -ENOMEM; goto out; } } - for (i = 0; i < n_entries; i++) - totals[j] += tracing_map_read_sum( - sort_entries[i]->elt, j); + for (i = 0; i < n_entries; i++) { + val = tracing_map_read_sum(sort_entries[i]->elt, j); + stats[j].total += val; + if (stats[j].max < val) + stats[j].max = val; + } } for (i = 0; i < n_entries; i++) - hist_trigger_entry_print(m, hist_data, totals, + hist_trigger_entry_print(m, hist_data, stats, sort_entries[i]->key, sort_entries[i]->elt); - kfree(totals); + kfree(stats); out: tracing_map_destroy_sort_entries(sort_entries, n_entries); From ccf47f5cc4cebdba7444cf1c04fc1b02d4b3cd15 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 20 Oct 2022 00:31:55 +0900 Subject: [PATCH 3619/4122] tracing: Add nohitcount option for suppressing display of raw hitcount Add 'nohitcount' ('NOHC' for short) option for suppressing display of the raw hitcount column in the histogram. Note that you must specify at least one value except raw 'hitcount' when you specify this nohitcount option. # cd /sys/kernel/debug/tracing/ # echo hist:keys=pid:vals=runtime.percent,runtime.graph:sort=pid:NOHC > \ events/sched/sched_stat_runtime/trigger # sleep 10 # cat events/sched/sched_stat_runtime/hist # event histogram # # trigger info: hist:keys=pid:vals=runtime.percent,runtime.graph:sort=pid:size=2048:nohitcount [active] # { pid: 8 } runtime (%): 3.02 runtime: # { pid: 14 } runtime (%): 2.25 runtime: { pid: 16 } runtime (%): 2.25 runtime: { pid: 26 } runtime (%): 0.17 runtime: { pid: 61 } runtime (%): 11.52 runtime: #### { pid: 67 } runtime (%): 1.56 runtime: { pid: 68 } runtime (%): 0.84 runtime: { pid: 76 } runtime (%): 0.92 runtime: { pid: 117 } runtime (%): 2.50 runtime: # { pid: 146 } runtime (%): 49.88 runtime: #################### { pid: 157 } runtime (%): 16.63 runtime: ###### { pid: 158 } runtime (%): 8.38 runtime: ### Link: https://lore.kernel.org/linux-trace-kernel/166610814787.56030.4980636083486339906.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi --- kernel/trace/trace.c | 3 +++ kernel/trace/trace_events_hist.c | 34 ++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55aec4616d8b..948f321b9df1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5678,6 +5678,7 @@ static const char readme_msg[] = "\t [:size=#entries]\n" "\t [:pause][:continue][:clear]\n" "\t [:name=histname1]\n" + "\t [:nohitcount]\n" "\t [:.]\n" "\t [if ]\n\n" "\t Note, special fields can be used as well:\n" @@ -5734,6 +5735,8 @@ static const char readme_msg[] = "\t The 'clear' parameter will clear the contents of a running\n" "\t hist trigger and leave its current paused/active state\n" "\t unchanged.\n\n" + "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" + "\t raw hitcount in the histogram.\n\n" "\t The enable_hist and disable_hist triggers can be used to\n" "\t have one event conditionally start and stop another event's\n" "\t already-attached hist trigger. The syntax is analogous to\n" diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 946b2b8f0f2c..a0cd118af527 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -69,7 +69,8 @@ C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \ C(EXPECT_NUMBER, "Expecting numeric literal"), \ C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \ - C(DIVISION_BY_ZERO, "Division by zero"), + C(DIVISION_BY_ZERO, "Division by zero"), \ + C(NEED_NOHC_VAL, "Non-hitcount value is required for 'nohitcount'"), #undef C #define C(a, b) HIST_ERR_##a @@ -526,6 +527,7 @@ struct hist_trigger_attrs { bool cont; bool clear; bool ts_in_usecs; + bool no_hitcount; unsigned int map_bits; char *assignment_str[TRACING_MAP_VARS_MAX]; @@ -1550,7 +1552,10 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) ret = parse_assignment(tr, str, attrs); if (ret) goto free; - } else if (strcmp(str, "pause") == 0) + } else if (strcmp(str, "nohitcount") == 0 || + strcmp(str, "NOHC") == 0) + attrs->no_hitcount = true; + else if (strcmp(str, "pause") == 0) attrs->pause = true; else if ((strcmp(str, "cont") == 0) || (strcmp(str, "continue") == 0)) @@ -4377,6 +4382,12 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (fields_str && (strcmp(fields_str, "hitcount") != 0)) ret = -EINVAL; out: + /* There is only raw hitcount but nohitcount suppresses it. */ + if (j == 1 && hist_data->attrs->no_hitcount) { + hist_err(hist_data->event_file->tr, HIST_ERR_NEED_NOHC_VAL, 0); + ret = -ENOENT; + } + return ret; } @@ -5388,13 +5399,13 @@ static void hist_trigger_entry_print(struct seq_file *m, hist_trigger_print_key(m, hist_data, key, elt); - /* At first, show the raw hitcount always */ - hist_trigger_print_val(m, i, "hitcount", 0, stats, elt); + /* At first, show the raw hitcount if !nohitcount */ + if (!hist_data->attrs->no_hitcount) + hist_trigger_print_val(m, i, "hitcount", 0, stats, elt); for (i = 1; i < hist_data->n_vals; i++) { field_name = hist_field_name(hist_data->fields[i], 0); flags = hist_data->fields[i]->flags; - if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR) continue; @@ -5839,6 +5850,7 @@ static int event_hist_trigger_print(struct seq_file *m, struct hist_trigger_data *hist_data = data->private_data; struct hist_field *field; bool have_var = false; + bool show_val = false; unsigned int i; seq_puts(m, HIST_PREFIX); @@ -5869,12 +5881,16 @@ static int event_hist_trigger_print(struct seq_file *m, continue; } - if (i == HITCOUNT_IDX) + if (i == HITCOUNT_IDX) { + if (hist_data->attrs->no_hitcount) + continue; seq_puts(m, "hitcount"); - else { - seq_puts(m, ","); + } else { + if (show_val) + seq_puts(m, ","); hist_field_print(m, field); } + show_val = true; } if (have_var) { @@ -5925,6 +5941,8 @@ static int event_hist_trigger_print(struct seq_file *m, seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); if (hist_data->enable_timestamps) seq_printf(m, ":clock=%s", hist_data->attrs->clock); + if (hist_data->attrs->no_hitcount) + seq_puts(m, ":nohitcount"); print_actions_spec(m, hist_data); From 8c2b99790196d34a58a2b00a4c9862d2de3af3e2 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 20 Oct 2022 00:31:56 +0900 Subject: [PATCH 3620/4122] tracing: docs: Update histogram doc for .percent/.graph and 'nohitcount' Update histogram document for .percent/.graph suffixes and 'nohitcount' option. Link: https://lore.kernel.org/linux-trace-kernel/166610815604.56030.4124933216911828519.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi --- Documentation/trace/histogram.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 87bd772836c0..f95459aa984f 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -25,7 +25,7 @@ Documentation written by Tom Zanussi hist:keys=[:values=] [:sort=][:size=#entries][:pause][:continue] - [:clear][:name=histname1][:.] [if ] + [:clear][:name=histname1][:nohitcount][:.] [if ] When a matching event is hit, an entry is added to a hash table using the key(s) and value(s) named. Keys and values correspond to @@ -79,6 +79,8 @@ Documentation written by Tom Zanussi .log2 display log2 value rather than raw number .buckets=size display grouping of values rather than raw number .usecs display a common_timestamp in microseconds + .percent display a number of percentage value + .graph display a bar-graph of a value ============= ================================================= Note that in general the semantics of a given field aren't @@ -137,6 +139,12 @@ Documentation written by Tom Zanussi existing trigger, rather than via the '>' operator, which will cause the trigger to be removed through truncation. + The 'nohitcount' (or NOHC) parameter will suppress display of + raw hitcount in the histogram. This option requires at least one + value field which is not a 'raw hitcount'. For example, + 'hist:...:vals=hitcount:nohitcount' is rejected, but + 'hist:...:vals=hitcount.percent:nohitcount' is OK. + - enable_hist/disable_hist The enable_hist and disable_hist triggers can be used to have one From fff1787adaeebe66f27c01d5c40d8d2e4d79d5ee Mon Sep 17 00:00:00 2001 From: Song Chen Date: Wed, 19 Oct 2022 10:11:18 +0800 Subject: [PATCH 3621/4122] trace/kprobe: remove duplicated calls of ring_buffer_event_data Function __kprobe_trace_func calls ring_buffer_event_data to get a ring buffer, however, it has been done in above call trace_event_buffer_reserve. So does __kretprobe_trace_func. This patch removes those duplicated calls. Link: https://lore.kernel.org/all/1666145478-4706-1-git-send-email-chensong_2000@189.cn/ Reviewed-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Song Chen Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_kprobe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5a75b039e586..ee77c8203bd5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1344,7 +1344,6 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, return; fbuffer.regs = regs; - entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); @@ -1385,7 +1384,6 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, return; fbuffer.regs = regs; - entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = get_kretprobe_retaddr(ri); store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); From 575b76cb885532aae13a9d979fd476bb2b156cb9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 22 Nov 2022 12:23:45 -0500 Subject: [PATCH 3622/4122] tracing/probes: Handle system names with hyphens When creating probe names, a check is done to make sure it matches basic C standard variable naming standards. Basically, starts with alphabetic or underline, and then the rest of the characters have alpha-numeric or underline in them. But system names do not have any true naming conventions, as they are created by the TRACE_SYSTEM macro and nothing tests to see what they are. The "xhci-hcd" trace events has a '-' in the system name. When trying to attach a eprobe to one of these trace points, it fails because the system name does not follow the variable naming convention because of the hyphen, and the eprobe checks fail on this. Allow hyphens in the system name so that eprobes can attach to the "xhci-hcd" trace events. Link: https://lore.kernel.org/all/Y3eJ8GiGnEvVd8%2FN@macondo/ Link: https://lore.kernel.org/linux-trace-kernel/20221122122345.160f5077@gandalf.local.home Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 5b7a96220900e ("tracing/probe: Check event/group naming rule at parsing") Reported-by: Rafael Mendonca Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.h | 19 ++++++++++++++++--- kernel/trace/trace_probe.c | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 48643f07bc01..8f37ff032b4f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1954,17 +1954,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) } /* Check the name is good for event/group/fields */ -static inline bool is_good_name(const char *name) +static inline bool __is_good_name(const char *name, bool hash_ok) { - if (!isalpha(*name) && *name != '_') + if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-')) return false; while (*++name != '\0') { - if (!isalpha(*name) && !isdigit(*name) && *name != '_') + if (!isalpha(*name) && !isdigit(*name) && *name != '_' && + (!hash_ok || *name != '-')) return false; } return true; } +/* Check the name is good for event/group/fields */ +static inline bool is_good_name(const char *name) +{ + return __is_good_name(name, false); +} + +/* Check the name is good for system */ +static inline bool is_good_system_name(const char *name) +{ + return __is_good_name(name, true); +} + /* Convert certain expected symbols into '_' when generating event names */ static inline void sanitize_event_name(char *name) { diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 36dff277de46..bb2f95d7175c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -246,7 +246,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, return -EINVAL; } strlcpy(buf, event, slash - event + 1); - if (!is_good_name(buf)) { + if (!is_good_system_name(buf)) { trace_probe_log_err(offset, BAD_GROUP_NAME); return -EINVAL; } From e25e43a4e5d8cb2323553d8b6a7ba08d2ebab21f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 6 Dec 2022 23:18:01 +0900 Subject: [PATCH 3623/4122] tracing: Fix complicated dependency of CONFIG_TRACER_MAX_TRACE Both CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER partially enables the CONFIG_TRACER_MAX_TRACE code, but that is complicated and has introduced a bug; It declares tracing_max_lat_fops data structure outside of #ifdefs, but since it is defined only when CONFIG_TRACER_MAX_TRACE=y or CONFIG_HWLAT_TRACER=y, if only CONFIG_OSNOISE_TRACER=y, that declaration comes to a definition(!). To fix this issue, and do not repeat the similar problem, makes CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER enables the CONFIG_TRACER_MAX_TRACE always. It has there benefits; - Fix the tracing_max_lat_fops bug - Simplify the #ifdefs - CONFIG_TRACER_MAX_TRACE code is fully enabled, or not. Link: https://lore.kernel.org/linux-trace-kernel/167033628155.4111793.12185405690820208159.stgit@devnote3 Fixes: 424b650f35c7 ("tracing: Fix missing osnoise tracer on max_latency") Cc: Daniel Bristot de Oliveira Cc: stable@vger.kernel.org Reported-by: David Howells Reported-by: kernel test robot Signed-off-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/all/166992525941.1716618.13740663757583361463.stgit@warthog.procyon.org.uk/ (original thread and v1) Link: https://lore.kernel.org/all/202212052253.VuhZ2ulJ-lkp@intel.com/T/#u (v1 error report) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/Kconfig | 2 ++ kernel/trace/trace.c | 23 +++++++++++++---------- kernel/trace/trace.h | 8 +++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e9e95c790b8e..93d724996283 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -375,6 +375,7 @@ config SCHED_TRACER config HWLAT_TRACER bool "Tracer to detect hardware latencies (like SMIs)" select GENERIC_TRACER + select TRACER_MAX_TRACE help This tracer, when enabled will create one or more kernel threads, depending on what the cpumask file is set to, which each thread @@ -410,6 +411,7 @@ config HWLAT_TRACER config OSNOISE_TRACER bool "OS Noise tracer" select GENERIC_TRACER + select TRACER_MAX_TRACE help In the context of high-performance computing (HPC), the Operating System Noise (osnoise) refers to the interference experienced by an diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 948f321b9df1..664619b3f1e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1421,6 +1421,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) return false; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); +#define free_snapshot(tr) do { } while (0) #endif /* CONFIG_TRACER_SNAPSHOT */ void tracer_tracing_off(struct trace_array *tr) @@ -1692,6 +1693,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) } unsigned long __read_mostly tracing_thresh; + +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops; #ifdef LATENCY_FS_NOTIFY @@ -1748,18 +1751,14 @@ void latency_fsnotify(struct trace_array *tr) irq_work_queue(&tr->fsnotify_irqwork); } -#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ d_tracer, &tr->max_latency, &tracing_max_lat_fops) -#else -#define trace_create_maxlat_file(tr, d_tracer) do { } while (0) #endif -#ifdef CONFIG_TRACER_MAX_TRACE /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, @@ -1834,14 +1833,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, ring_buffer_record_off(tr->max_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT - if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) - goto out_unlock; + if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { + arch_spin_unlock(&tr->max_lock); + return; + } #endif swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); - out_unlock: arch_spin_unlock(&tr->max_lock); } @@ -1888,6 +1888,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } + #endif /* CONFIG_TRACER_MAX_TRACE */ static int wait_on_pipe(struct trace_iterator *iter, int full) @@ -6577,7 +6578,7 @@ out: return ret; } -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, @@ -7592,7 +7593,7 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -9606,7 +9607,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) create_trace_options_dir(tr); +#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); +#endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8f37ff032b4f..9dc920b01c17 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -308,8 +308,7 @@ struct trace_array { struct array_buffer max_buffer; bool allocated_snapshot; #endif -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; @@ -688,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -#endif /* CONFIG_TRACER_MAX_TRACE */ -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY) +#ifdef CONFIG_FSNOTIFY #define LATENCY_FS_NOTIFY #endif +#endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef LATENCY_FS_NOTIFY void latency_fsnotify(struct trace_array *tr); From 4994e387d7332f03cd4eab55f7896ddf04cab1c0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 6 Dec 2022 14:12:02 -0500 Subject: [PATCH 3624/4122] x86/mm/kmmio: Switch to arch_spin_lock() The mmiotrace tracer is "special". The purpose is to help reverse engineer binary drivers by removing the memory allocated by the driver and when the driver goes to access it, a fault occurs, the mmiotracer will record what the driver was doing and then do the work on its behalf by single stepping through the process. But to achieve this ability, it must do some special things. One is it needs to grab a lock while in the breakpoint handler. This is considered an NMI state, and then lockdep warns that the lock is being held in both an NMI state (really a breakpoint handler) and also in normal context. As the breakpoint/NMI state only happens when the driver is accessing memory, there's no concern of a race condition against the setup and tear-down of mmiotracer. To make lockdep and mmiotrace work together, convert the locks used in the breakpoint handler into arch_spin_lock(). Link: https://lkml.kernel.org/r/20221206191229.656244029@goodmis.org Link: https://lore.kernel.org/lkml/20221201213126.620b7dd3@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Karol Herbst Cc: Pekka Paalanen Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Borislav Petkov Suggested-by: Thomas Gleixner Signed-off-by: Steven Rostedt (Google) --- arch/x86/mm/kmmio.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index d3efbc5b3449..edb486450158 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -62,7 +62,13 @@ struct kmmio_context { int active; }; -static DEFINE_SPINLOCK(kmmio_lock); +/* + * The kmmio_lock is taken in int3 context, which is treated as NMI context. + * This causes lockdep to complain about it bein in both NMI and normal + * context. Hide it from lockdep, as it should not have any other locks + * taken under it, and this is only enabled for debugging mmio anyway. + */ +static arch_spinlock_t kmmio_lock = __ARCH_SPIN_LOCK_UNLOCKED; /* Protected by kmmio_lock */ unsigned int kmmio_count; @@ -346,10 +352,10 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) ctx->probe->post_handler(ctx->probe, condition, regs); /* Prevent racing against release_kmmio_fault_page(). */ - spin_lock(&kmmio_lock); + arch_spin_lock(&kmmio_lock); if (ctx->fpage->count) arm_kmmio_fault_page(ctx->fpage); - spin_unlock(&kmmio_lock); + arch_spin_unlock(&kmmio_lock); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -440,7 +446,8 @@ int register_kmmio_probe(struct kmmio_probe *p) unsigned int l; pte_t *pte; - spin_lock_irqsave(&kmmio_lock, flags); + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); if (get_kmmio_probe(addr)) { ret = -EEXIST; goto out; @@ -460,7 +467,9 @@ int register_kmmio_probe(struct kmmio_probe *p) size += page_level_size(l); } out: - spin_unlock_irqrestore(&kmmio_lock, flags); + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); + /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist @@ -494,7 +503,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; - spin_lock_irqsave(&kmmio_lock, flags); + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); while (f) { if (!f->count) { list_del_rcu(&f->list); @@ -506,7 +516,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) } f = *prevp; } - spin_unlock_irqrestore(&kmmio_lock, flags); + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); @@ -540,14 +551,16 @@ void unregister_kmmio_probe(struct kmmio_probe *p) if (!pte) return; - spin_lock_irqsave(&kmmio_lock, flags); + local_irq_save(flags); + arch_spin_lock(&kmmio_lock); while (size < size_lim) { release_kmmio_fault_page(addr + size, &release_list); size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; - spin_unlock_irqrestore(&kmmio_lock, flags); + arch_spin_unlock(&kmmio_lock); + local_irq_restore(flags); if (!release_list) return; From f0c0ade8d874fb127f9b451d415bee8cbb6bf7a6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Dec 2022 22:27:38 +0100 Subject: [PATCH 3625/4122] gfs2: Minor gfs2_try_evict cleanup In gfs2_try_evict(), when an inode can't be evicted, we are grabbing a temporary reference on the inode glock to poke that glock. That should be safe, but it's easier to just grab an inode reference as we already do earlier in this function. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6f2de8c0b2d0..c32c25b4c37c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1023,8 +1023,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - struct gfs2_glock *inode_gl = NULL; - gl->gl_no_formal_ino = ip->i_no_formal_ino; set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); @@ -1034,14 +1032,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { - inode_gl = ip->i_gl; - lockref_get(&inode_gl->gl_lockref); clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + if (!igrab(&ip->i_inode)) + ip = NULL; } spin_unlock(&gl->gl_lockref.lock); - if (inode_gl) { - gfs2_glock_poke(inode_gl); - gfs2_glock_put(inode_gl); + if (ip) { + gfs2_glock_poke(ip->i_gl); + iput(&ip->i_inode); } evicted = !ip; } From d8d2b65a940bb497749d66bdab59b530901d3854 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Dec 2022 11:01:00 -0600 Subject: [PATCH 3626/4122] PCI/portdrv: Allow AER service only for Root Ports & RCECs Previously portdrv allowed the AER service for any device with an AER capability (assuming Linux had control of AER) even though the AER service driver only attaches to Root Port and RCECs. Because get_port_device_capability() included AER for non-RP, non-RCEC devices, we tried to initialize the AER IRQ even though these devices don't generate AER interrupts. Intel DG1 and DG2 discrete graphics cards contain a switch leading to a GPU. The switch supports AER but not MSI, so initializing an AER IRQ failed, and portdrv failed to claim the switch port at all. The GPU itself could be suspended, but the switch could not be put in a low-power state because it had no driver. Don't allow the AER service on non-Root Port, non-Root Complex Event Collector devices. This means we won't enable Bus Mastering if the device doesn't require MSI, the AER service will not appear in sysfs, and the AER service driver will not bind to the device. Link: https://lore.kernel.org/r/20221207084105.84947-1-mika.westerberg@linux.intel.com Link: https://lore.kernel.org/r/20221210002922.1749403-1-helgaas@kernel.org Based-on-patch-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pcie/portdrv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index a6c4225505d5..8b16e96ec15c 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -232,7 +232,9 @@ static int get_port_device_capability(struct pci_dev *dev) } #ifdef CONFIG_PCIEAER - if (dev->aer_cap && pci_aer_available() && + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + dev->aer_cap && pci_aer_available() && (pcie_ports_native || host->native_aer)) services |= PCIE_PORT_SERVICE_AER; #endif From 07eab0901ede8b7540c52160663bd300cc238164 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:38 -0600 Subject: [PATCH 3627/4122] efi/x86: Remove EfiMemoryMappedIO from E820 map Firmware can use EfiMemoryMappedIO to request that MMIO regions be mapped by the OS so they can be accessed by EFI runtime services, but should have no other significance to the OS (UEFI r2.10, sec 7.2). However, most bootloaders and EFI stubs convert EfiMemoryMappedIO regions to E820_TYPE_RESERVED entries, which prevent Linux from allocating space from them (see remove_e820_regions()). Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and PCI host bridge windows, which means Linux can't allocate BAR space for hot-added devices. Remove large EfiMemoryMappedIO regions from the E820 map to avoid this problem. Leave small (< 256KB) EfiMemoryMappedIO regions alone because on some platforms, these describe non-window space that's included in host bridge _CRS. If we assign that space to PCI devices, they don't work. On the Lenovo X1 Carbon, this leads to suspend/resume failures. The previous solution to the problem of allocating BARs in these regions was to add pci_crs_quirks[] entries to disable E820 checking for these machines (see d341838d776a ("x86/PCI: Disable E820 reserved region clipping via quirks")): Acer DMI_PRODUCT_NAME Spin SP513-54N Clevo DMI_BOARD_NAME X170KM-G Lenovo DMI_PRODUCT_VERSION *IIL* Florent reported the BAR allocation issue on the Clevo NL4XLU. We could add another quirk for the NL4XLU, but I hope this generic change can solve it for many machines without having to add quirks. This change has been tested on Clevo X170KM-G (Konrad) and Lenovo Ideapad Slim 3 (Matt) and solves the problem even when overriding the existing quirks by booting with "pci=use_e820". Link: https://bugzilla.kernel.org/show_bug.cgi?id=216565 Clevo NL4XLU Link: https://bugzilla.kernel.org/show_bug.cgi?id=206459#c78 Clevo X170KM-G Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 Ideapad Slim 3 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2029207 X1 Carbon Link: https://lore.kernel.org/r/20221208190341.1560157-2-helgaas@kernel.org Reported-by: Florent DELAHAYE Tested-by: Konrad J Hambrick Tested-by: Matt Hansen <2lprbe78@duck.com> Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- arch/x86/platform/efi/efi.c | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ebc98a68c400..75bf0e56bb53 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -303,6 +303,50 @@ static void __init efi_clean_memmap(void) } } +/* + * Firmware can use EfiMemoryMappedIO to request that MMIO regions be + * mapped by the OS so they can be accessed by EFI runtime services, but + * should have no other significance to the OS (UEFI r2.10, sec 7.2). + * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO + * regions to E820_TYPE_RESERVED entries, which prevent Linux from + * allocating space from them (see remove_e820_regions()). + * + * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and + * PCI host bridge windows, which means Linux can't allocate BAR space for + * hot-added devices. + * + * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this + * problem. + * + * Retain small EfiMemoryMappedIO regions because on some platforms, these + * describe non-window space that's included in host bridge _CRS. If we + * assign that space to PCI devices, they don't work. + */ +static void __init efi_remove_e820_mmio(void) +{ + efi_memory_desc_t *md; + u64 size, start, end; + int i = 0; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + start = md->phys_addr; + end = start + size - 1; + if (size >= 256*1024) { + pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n", + i, start, end, size >> 20); + e820__range_remove(start, size, + E820_TYPE_RESERVED, 1); + } else { + pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n", + i, start, end, size >> 10); + } + } + i++; + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -474,6 +518,8 @@ void __init efi_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); + efi_remove_e820_mmio(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } From 5c5fb3c3a793b34554e1d21f07cda34308b082cd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:39 -0600 Subject: [PATCH 3628/4122] PCI: Skip allocate_resource() if too little space available pci_bus_alloc_from_region() allocates MMIO space by iterating through all the resources available on the bus. The available resource might be reduced if the caller requires 32-bit space or we're avoiding BIOS or E820 areas. Don't bother calling allocate_resource() if we need more space than is available in this resource. This prevents some pointless and annoying messages about avoided areas. Link: https://lore.kernel.org/r/20221208190341.1560157-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- drivers/pci/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 3cef835b375f..83ae838ceb5f 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -197,6 +197,10 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, max = avail.end; + /* Don't bother if available space isn't large enough */ + if (size > max - min_used + 1) + continue; + /* Ok, try it out.. */ ret = allocate_resource(r, res, size, min_used, max, align, alignf, alignf_data); From 00904bf64c2819d90a76407d79bdbc8918541320 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:40 -0600 Subject: [PATCH 3629/4122] x86/PCI: Tidy E820 removal messages These messages: clipped [mem size 0x00000000 64bit] to [mem size 0xfffffffffffa0000 64bit] for e820 entry [mem 0x0009f000-0x000fffff] aren't as useful as they could be because (a) the resource is often IORESOURCE_UNSET, so we print the size instead of the start/end and (b) we print the available resource even if it is empty after removing the E820 entry. Print the available space by hand to avoid the IORESOURCE_UNSET problem and only if it's non-empty. No functional change intended. Link: https://lore.kernel.org/r/20221208190341.1560157-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- arch/x86/kernel/resource.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index bba1abd05bfe..79bc8a97a083 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -42,8 +42,16 @@ static void remove_e820_regions(struct resource *avail) resource_clip(avail, e820_start, e820_end); if (orig.start != avail->start || orig.end != avail->end) { - pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", - &orig, avail, e820_start, e820_end); + pr_info("resource: avoiding allocation from e820 entry [mem %#010Lx-%#010Lx]\n", + e820_start, e820_end); + if (avail->end > avail->start) + /* + * Use %pa instead of %pR because "avail" + * is typically IORESOURCE_UNSET, so %pR + * shows the size instead of addresses. + */ + pr_info("resource: remaining [mem %pa-%pa] available\n", + &avail->start, &avail->end); orig = *avail; } } From 2bfa89fab5ff06703c034c4702e6ea4418194ffe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:41 -0600 Subject: [PATCH 3630/4122] x86/PCI: Fix log message typo Add missing word in the log message: - ... so future kernels can this automatically + ... so future kernels can do this automatically Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221208190341.1560157-5-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Hans de Goede --- arch/x86/pci/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2f82480fd430..83dfea9e9894 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -245,7 +245,7 @@ void __init pci_acpi_crs_quirks(void) printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n", pci_use_e820 ? "Using" : "Ignoring"); if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) - printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n"); + printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } #ifdef CONFIG_PCI_MMCONFIG From d91482bb212b36354b0e46d7a5c0adae807e7a12 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Dec 2022 14:41:27 -0600 Subject: [PATCH 3631/4122] x86/PCI: Use pr_info() when possible Use pr_info() and similar when possible. No functional change intended. Link: https://lore.kernel.org/r/20221209205131.GA1726524@bhelgaas Suggested-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko --- arch/x86/pci/acpi.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 83dfea9e9894..ea2eb2ec90e2 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "PCI: " fmt + #include #include #include @@ -37,15 +40,15 @@ static int __init set_nouse_crs(const struct dmi_system_id *id) static int __init set_ignore_seg(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident); + pr_info("%s detected: ignoring ACPI _SEG\n", id->ident); pci_ignore_seg = true; return 0; } static int __init set_no_e820(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: not clipping E820 regions from _CRS\n", - id->ident); + pr_info("%s detected: not clipping E820 regions from _CRS\n", + id->ident); pci_use_e820 = false; return 0; } @@ -231,10 +234,9 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE__CRS) pci_use_crs = true; - printk(KERN_INFO "PCI: %s host bridge windows from ACPI; " - "if necessary, use \"pci=%s\" and report a bug\n", - pci_use_crs ? "Using" : "Ignoring", - pci_use_crs ? "nocrs" : "use_crs"); + pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n", + pci_use_crs ? "Using" : "Ignoring", + pci_use_crs ? "nocrs" : "use_crs"); /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */ if (pci_probe & PCI_NO_E820) @@ -242,19 +244,17 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE_E820) pci_use_e820 = true; - printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n", - pci_use_e820 ? "Using" : "Ignoring"); + pr_info("%s E820 reservations for host bridge windows\n", + pci_use_e820 ? "Using" : "Ignoring"); if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) - printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); + pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } #ifdef CONFIG_PCI_MMCONFIG static int check_segment(u16 seg, struct device *dev, char *estr) { if (seg) { - dev_err(dev, - "%s can't access PCI configuration " - "space under this host bridge.\n", + dev_err(dev, "%s can't access configuration space under this host bridge\n", estr); return -EIO; } @@ -264,9 +264,7 @@ static int check_segment(u16 seg, struct device *dev, char *estr) * just can't access extended configuration space of * devices under this host bridge. */ - dev_warn(dev, - "%s can't access extended PCI configuration " - "space under this bridge.\n", + dev_warn(dev, "%s can't access extended configuration space under this bridge\n", estr); return 0; @@ -421,9 +419,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) root->segment = domain = 0; if (domain && !pci_domains_supported) { - printk(KERN_WARNING "pci_bus %04x:%02x: " - "ignored (multiple domains not supported)\n", - domain, busnum); + pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -491,7 +488,7 @@ int __init pci_acpi_init(void) if (acpi_noirq) return -ENODEV; - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + pr_info("Using ACPI for IRQ routing\n"); acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; @@ -503,7 +500,7 @@ int __init pci_acpi_init(void) * also do it here in case there are still broken drivers that * don't use pci_enable_device(). */ - printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); for_each_pci_dev(dev) acpi_pci_irq_enable(dev); } From 20fb6c997600012209699564a79911584dd5876b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Dec 2022 13:41:44 -0500 Subject: [PATCH 3632/4122] x86/mm/kmmio: Use rcu_read_lock_sched_notrace() The mmiotrace tracer is "special". The purpose is to help reverse engineer binary drivers by removing the memory allocated by the driver and when the driver goes to access it, a fault occurs, the mmiotracer will record what the driver was doing and then do the work on its behalf by single stepping through the process. But to achieve this ability, it must do some special things. One is to take the rcu_read_lock() when the fault occurs, and then release it in the breakpoint that is single stepping. This makes lockdep unhappy, as it changes the state of RCU from within an exception that is not contained in that exception, and we get a nasty splat from lockdep. Instead, switch to rcu_read_lock_sched_notrace() as the RCU sched variant has the same grace period as normal RCU. This is basically the same as rcu_read_lock() but does not make lockdep complain about it. Note, the preempt_disable() is still needed as it uses preempt_enable_no_resched(). Link: https://lore.kernel.org/linux-trace-kernel/20221209134144.04f33626@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Karol Herbst Cc: Pekka Paalanen Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (Google) --- arch/x86/mm/kmmio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index edb486450158..853c49877c16 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -254,7 +254,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * again. */ preempt_disable(); - rcu_read_lock(); + rcu_read_lock_sched_notrace(); faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { @@ -323,7 +323,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) return 1; /* fault handled */ no_kmmio: - rcu_read_unlock(); + rcu_read_unlock_sched_notrace(); preempt_enable_no_resched(); return ret; } @@ -363,7 +363,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) /* These were acquired in kmmio_handler(). */ ctx->active--; BUG_ON(ctx->active); - rcu_read_unlock(); + rcu_read_unlock_sched_notrace(); preempt_enable_no_resched(); /* From 2cc6a528882d0e0ccbc1bca5f95b8c963cedac54 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 11:46:35 +0800 Subject: [PATCH 3633/4122] tracing/hist: Fix wrong return value in parse_action_params() When number of synth fields is more than SYNTH_FIELDS_MAX, parse_action_params() should return -EINVAL. Link: https://lore.kernel.org/linux-trace-kernel/20221207034635.2253990-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: c282a386a397 ("tracing: Add 'onmatch' hist trigger action support") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index a0cd118af527..b4ad86c22b43 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3609,6 +3609,7 @@ static int parse_action_params(struct trace_array *tr, char *params, while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); + ret = -EINVAL; goto out; } From 82470f7d9044842618c847a7166de2b7458157a7 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 11:51:43 +0800 Subject: [PATCH 3634/4122] tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx' When generate a synthetic event with many params and then create a trace action for it [1], kernel panic happened [2]. It is because that in trace_action_create() 'data->n_params' is up to SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx' keeps indices into array 'hist_data->var_refs' for each synthetic event param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX (current value is 16), so out-of-bound write happened when 'data->n_params' more than 16. In this case, 'data->match_data.event' is overwritten and eventually cause the panic. To solve the issue, adjust the length of 'data->var_ref_idx' to be SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write. [1] # cd /sys/kernel/tracing/ # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\ int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\ int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\ int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\ int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\ int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\ int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\ int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\ int v63" >> synthetic_events # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \ events/sched/sched_waking/trigger # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger [2] BUG: unable to handle page fault for address: ffff91c900000000 PGD 61001067 P4D 61001067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 322 Comm: bash Tainted: G W 6.1.0-rc8+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:strcmp+0xc/0x30 Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3 RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000 RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000 RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000 R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580 R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538 FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0 Call Trace: __find_event_file+0x55/0x90 action_create+0x76c/0x1060 event_hist_trigger_parse+0x146d/0x2060 ? event_trigger_write+0x31/0xd0 trigger_process_regex+0xbb/0x110 event_trigger_write+0x6b/0xd0 vfs_write+0xc8/0x3e0 ? alloc_fd+0xc0/0x160 ? preempt_count_add+0x4d/0xa0 ? preempt_count_add+0x70/0xa0 ksys_write+0x5f/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f1d1d0cf077 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077 RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001 RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142 R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143 R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0 Modules linked in: CR2: ffff91c900000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strcmp+0xc/0x30 Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3 RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000 RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000 RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000 R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580 R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538 FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0 Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index b4ad86c22b43..8264b28d5a57 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -621,7 +621,7 @@ struct action_data { * event param, and is passed to the synthetic event * invocation. */ - unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; + unsigned int var_ref_idx[SYNTH_FIELDS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -2186,7 +2186,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, return ref_field; } } - + /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */ + if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX) + return NULL; ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); if (ref_field) { if (init_var_ref(ref_field, var_field, system, event_name)) { @@ -3946,6 +3948,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data, lockdep_assert_held(&event_mutex); + /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */ + if (data->n_params > SYNTH_FIELDS_MAX) + return -EINVAL; + if (data->use_trace_keyword) synth_event_name = data->synth_event_name; else From ff4837f7fe59ff018eca4705a70eca5e0b486b97 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 17:15:57 +0800 Subject: [PATCH 3635/4122] tracing: Fix issue of missing one synthetic field The maximum number of synthetic fields supported is defined as SYNTH_FIELDS_MAX which value currently is 64, but it actually fails when try to generate a synthetic event with 64 fields by executing like: # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\ int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\ int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\ int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\ int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\ int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\ int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\ int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\ int v63; int v64" >> /sys/kernel/tracing/synthetic_events Correct the field counting to fix it. Link: https://lore.kernel.org/linux-trace-kernel/20221207091557.3137904-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: c9e759b1e845 ("tracing: Rework synthetic event command parsing") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index c3b582d19b62..67592eed0be8 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1282,12 +1282,12 @@ static int __create_synth_event(const char *name, const char *raw_fields) goto err_free_arg; } - fields[n_fields++] = field; if (n_fields == SYNTH_FIELDS_MAX) { synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); ret = -EINVAL; goto err_free_arg; } + fields[n_fields++] = field; n_fields_this_loop++; } From 608c6ed3337850c767ab0dd6c583477922233e29 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 21:53:26 +0800 Subject: [PATCH 3636/4122] tracing/hist: Fix issue of losting command info in error_log When input some constructed invalid 'trigger' command, command info in 'error_log' are lost [1]. The root cause is that there is a path that event_hist_trigger_parse() is recursely called once and 'last_cmd' which save origin command is cleared, then later calling of hist_err() will no longer record origin command info: event_hist_trigger_parse() { last_cmd_set() // <1> 'last_cmd' save origin command here at first create_actions() { onmatch_create() { action_create() { trace_action_create() { trace_action_create_field_var() { create_field_var_hist() { event_hist_trigger_parse() { // <2> recursely called once hist_err_clear() // <3> 'last_cmd' is cleared here } hist_err() // <4> No longer find origin command!!! Since 'glob' is empty string while running into the recurse call, we can trickly check it and bypass the call of hist_err_clear() to solve it. [1] # cd /sys/kernel/tracing # echo "my_synth_event int v1; int v2; int v3;" >> synthetic_events # echo 'hist:keys=pid' >> events/sched/sched_waking/trigger # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\ pid,pid1)" >> events/sched/sched_switch/trigger # cat error_log [ 8.405018] hist:sched:sched_switch: error: Couldn't find synthetic event Command: hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1) ^ [ 8.816902] hist:sched:sched_switch: error: Couldn't find field Command: hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1) ^ [ 8.816902] hist:sched:sched_switch: error: Couldn't parse field variable Command: hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1) ^ [ 8.999880] : error: Couldn't find field Command: ^ [ 8.999880] : error: Couldn't parse field variable Command: ^ [ 8.999880] : error: Couldn't find field Command: ^ [ 8.999880] : error: Couldn't create histogram for field Command: ^ Link: https://lore.kernel.org/linux-trace-kernel/20221207135326.3483216-1-zhengyejian1@huawei.com Cc: Cc: Fixes: f404da6e1d46 ("tracing: Add 'last error' error facility for hist triggers") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 8264b28d5a57..fcaf226b7744 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6576,7 +6576,7 @@ enable: if (se) se->ref++; out: - if (ret == 0) + if (ret == 0 && glob[0]) hist_err_clear(); return ret; From 88ca6a71dcab4a4ba6e6e2ff66415a5c4f86e874 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Dec 2022 10:11:51 -0500 Subject: [PATCH 3637/4122] ring-buffer: Handle resize in early boot up With the new command line option that allows trace event triggers to be added at boot, the "snapshot" trigger will allocate the snapshot buffer very early, when interrupts can not be enabled. Allocating the ring buffer is not the problem, but it also resizes it, which is, as the resize code does synchronization that can not be preformed at early boot. To handle this, first change the raw_spin_lock_irq() in rb_insert_pages() to raw_spin_lock_irqsave(), such that the unlocking of that spin lock will not enable interrupts. Next, where it calls schedule_work_on(), disable migration and check if the CPU to update is the current CPU, and if so, perform the work directly, otherwise re-enable migration and call the schedule_work_on() to the CPU that is being updated. The rb_insert_pages() just needs to be run on the CPU that it is updating, and does not need preemption nor interrupts disabled when calling it. Link: https://lore.kernel.org/lkml/Y5J%2FCajlNh1gexvo@google.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221209101151.1fec1167@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option") Reported-by: Ross Zwisler Signed-off-by: Steven Rostedt Tested-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 843818ee4814..c366a0a9ddba 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2062,8 +2062,10 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *pages = &cpu_buffer->new_pages; int retries, success; + unsigned long flags; - raw_spin_lock_irq(&cpu_buffer->reader_lock); + /* Can be called at early boot up, where interrupts must not been enabled */ + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* * We are holding the reader lock, so the reader page won't be swapped * in the ring buffer. Now we are racing with the writer trying to @@ -2120,7 +2122,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) * tracing */ RB_WARN_ON(cpu_buffer, !success); - raw_spin_unlock_irq(&cpu_buffer->reader_lock); + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); /* free pages if they weren't inserted */ if (!success) { @@ -2248,8 +2250,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { - schedule_work_on(cpu, - &cpu_buffer->update_pages_work); + /* Run directly if possible. */ + migrate_disable(); + if (cpu != smp_processor_id()) { + migrate_enable(); + schedule_work_on(cpu, + &cpu_buffer->update_pages_work); + } else { + update_pages_handler(&cpu_buffer->update_pages_work); + migrate_enable(); + } } } @@ -2298,9 +2308,17 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { - schedule_work_on(cpu_id, - &cpu_buffer->update_pages_work); - wait_for_completion(&cpu_buffer->update_done); + /* Run directly if possible. */ + migrate_disable(); + if (cpu_id == smp_processor_id()) { + rb_update_pages(cpu_buffer); + migrate_enable(); + } else { + migrate_enable(); + schedule_work_on(cpu_id, + &cpu_buffer->update_pages_work); + wait_for_completion(&cpu_buffer->update_done); + } } cpu_buffer->nr_pages_to_update = 0; From 98629dadcd020cba690ce0c49a564a1166444646 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 8 Dec 2022 17:33:10 -0700 Subject: [PATCH 3638/4122] tracing: remove unnecessary trace_trigger ifdef The trace_trigger command line option introduced by commit a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option") doesn't need to depend on the CONFIG_HIST_TRIGGERS kernel config option. This code doesn't depend on the histogram code, and the run-time selection of triggers is usable without CONFIG_HIST_TRIGGERS. Link: https://lore.kernel.org/linux-trace-kernel/20221209003310.1737039-1-zwisler@google.com Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Tom Zanussi Fixes: a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option") Signed-off-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3bfaf560ecc4..33e0b4f8ebe6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2796,7 +2796,6 @@ trace_create_new_event(struct trace_event_call *call, return file; } -#ifdef CONFIG_HIST_TRIGGERS #define MAX_BOOT_TRIGGERS 32 static struct boot_triggers { @@ -2832,7 +2831,6 @@ static __init int setup_trace_triggers(char *str) return 1; } __setup("trace_trigger=", setup_trace_triggers); -#endif /* Add an event to a trace directory */ static int @@ -2850,7 +2848,6 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) return event_define_fields(call); } -#ifdef CONFIG_HIST_TRIGGERS static void trace_early_triggers(struct trace_event_file *file, const char *name) { int ret; @@ -2868,9 +2865,6 @@ static void trace_early_triggers(struct trace_event_file *file, const char *name bootup_triggers[i].event); } } -#else -static inline void trace_early_triggers(struct trace_event_file *file, const char *name) { } -#endif /* * Just create a descriptor for early init. A descriptor is required From ec370890f92ba8ad5476a34068655b06ba48def7 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 5 Dec 2022 09:27:38 +0100 Subject: [PATCH 3639/4122] tracing/osnoise: Make osnoise_options static Make osnoise_options static, as reported by the kernel test robot. Link: https://lkml.kernel.org/r/63255826485400d7a2270e9c5e66111079671e7a.1670228712.git.bristot@kernel.org Reported-by: kernel test robot Cc: Daniel Bristot de Oliveira Cc: Steven Rostedt Cc: Masami Hiramatsu Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 3f10dd1f2f1c..8ba82c71268f 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -59,8 +59,8 @@ enum osnoise_options_index { static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" }; -#define OSN_DEFAULT_OPTIONS 0x2 -unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; +#define OSN_DEFAULT_OPTIONS 0x2 +static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; /* * trace_array of the enabled osnoise/timerlat instances. From bfd5a5e82d22da43afa0e2bb9fb72339aa79c6cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Dec 2022 10:21:52 +0000 Subject: [PATCH 3640/4122] tracing: Fix some checker warnings Fix some checker warnings in the trace code by adding __printf attributes to a number of trace functions and their declarations. Changes: ======== ver #2) - Dropped the fix for the unconditional tracing_max_lat_fops decl[1]. Link: https://lore.kernel.org/r/20221205180617.9b9d3971cbe06ee536603523@kernel.org/ [1] Link: https://lore.kernel.org/r/166992525941.1716618.13740663757583361463.stgit@warthog.procyon.org.uk/ # v1 Link: https://lkml.kernel.org/r/167023571258.382307.15314866482834835192.stgit@warthog.procyon.org.uk Signed-off-by: David Howells Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 3 ++- include/linux/trace_seq.h | 3 ++- kernel/trace/trace.h | 2 +- kernel/trace/trace_output.c | 5 +++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index bb2053246d6a..4342e996bcdb 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -234,7 +234,8 @@ void tracing_record_taskinfo_sched_switch(struct task_struct *prev, void tracing_record_cmdline(struct task_struct *task); void tracing_record_tgid(struct task_struct *task); -int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); +int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) + __printf(3, 4); struct event_filter; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 5a2c650d9e1c..0c4c7587d6c3 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -97,7 +97,8 @@ extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str, const void *buf, size_t len, bool ascii); #else /* CONFIG_TRACING */ -static inline void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +static inline __printf(2, 3) +void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { } static inline void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9dc920b01c17..e46a49269be2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -614,7 +614,7 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, - va_list ap); + va_list ap) __printf(2, 0); int trace_empty(struct trace_iterator *iter); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index f0ba97121345..57a13b61f186 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -322,8 +322,9 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) } EXPORT_SYMBOL(trace_event_printf); -static int trace_output_raw(struct trace_iterator *iter, char *name, - char *fmt, va_list ap) +static __printf(3, 0) +int trace_output_raw(struct trace_iterator *iter, char *name, + char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; From 0e162c6f1c8f7f502b3978f5a37ed6fa2e4d21f9 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 25 Nov 2022 10:43:00 +0700 Subject: [PATCH 3641/4122] Documentation/osnoise: Escape underscore of NO_ prefix kernel test robot reported unknown target name warning: Documentation/trace/osnoise-tracer.rst:112: WARNING: Unknown target name: "no". The warning causes NO_ prefix to be rendered as link text instead, which points to non-existent link target. Escape the prefix underscore to fix the warning. Link: https://lkml.kernel.org/r/20221125034300.24168-1-bagasdotme@gmail.com Cc: Masami Hiramatsu Cc: Jonathan Corbet Cc: Ammar Faizi Cc: GNU/Weeb Mailing List Link: https://lore.kernel.org/linux-doc/202211240447.HxRNftE5-lkp@intel.com/ Fixes: 67543cd6b8eee5 ("Documentation/osnoise: Add osnoise/options documentation") Reported-by: kernel test robot Signed-off-by: Bagas Sanjaya Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/osnoise-tracer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst index 3c675ed82b27..fdd562d7c22d 100644 --- a/Documentation/trace/osnoise-tracer.rst +++ b/Documentation/trace/osnoise-tracer.rst @@ -111,7 +111,7 @@ The tracer has a set of options inside the osnoise directory, they are: be used, which is currently 5 us. - osnoise/options: a set of on/off options that can be enabled by writing the option name to the file or disabled by writing the option - name preceded with the 'NO_' prefix. For example, writing + name preceded with the 'NO\_' prefix. For example, writing NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The special DEAFAULTS option resets all options to the default value. From 1603dda47714cebe8a29b2154407da7a929d13f4 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 9 Dec 2022 23:05:52 +0100 Subject: [PATCH 3642/4122] tracing/osnoise: Add PANIC_ON_STOP option Often the latency observed in a CPU is not caused by the work being done in the CPU itself, but by work done on another CPU that causes the hardware to stall all CPUs. In this case, it is interesting to know what is happening on ALL CPUs, and the best way to do this is via crash dump analysis. Add the PANIC_ON_STOP option to osnoise/timerlat tracers. The default behavior is having this option off. When enabled by the user, the system will panic after hitting a stop tracing condition. This option was motivated by a real scenario that Juri Lelli and I were debugging. Link: https://lkml.kernel.org/r/249ce4287c6725543e6db845a6e0df621dc67db5.1670623111.git.bristot@kernel.org Cc: Juri Lelli Cc: Clark Williams Cc: Bagas Sanjaya Cc: Daniel Bristot de Oliveira Cc: Masami Hiramatsu Cc: Jonathan Corbet Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 8ba82c71268f..5a7613942223 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -54,10 +54,11 @@ enum osnoise_options_index { OSN_DEFAULTS = 0, OSN_WORKLOAD, + OSN_PANIC_ON_STOP, OSN_MAX }; -static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" }; +static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD", "PANIC_ON_STOP" }; #define OSN_DEFAULT_OPTIONS 0x2 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; @@ -1270,6 +1271,9 @@ static __always_inline void osnoise_stop_tracing(void) trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, "stop tracing hit on cpu %d\n", smp_processor_id()); + if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) + panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); + tracer_tracing_off(tr); } rcu_read_unlock(); From b5dce2002567a9b1a83ef3e3a8678d8c32be2a78 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 9 Dec 2022 23:05:53 +0100 Subject: [PATCH 3643/4122] tracing/osnoise: Add preempt and/or irq disabled options The osnoise workload runs with preemption and IRQs enabled in such a way as to allow all sorts of noise to disturb osnoise's execution. hwlat tracer has a similar workload but works with irq disabled, allowing only NMIs and the hardware to generate noise. While thinking about adding an options file to hwlat tracer to allow the system to panic, and other features I was thinking to add, like having a tracepoint at each noise detection, it came to my mind that is easier to make osnoise and also do hardware latency detection than making hwlat "feature compatible" with osnoise. Other points are: - osnoise already has an independent cpu file. - osnoise has a more intuitive interface, e.g., runtime/period vs. window/width (and people often need help remembering what it is). - osnoise: tracepoints - osnoise stop options - osnoise options file itself Moreover, the user-space side (in rtla) is simplified by reusing the existing osnoise code. Finally, people have been asking me about using osnoise for hw latency detection, and I have to explain that it was sufficient but not necessary. These options make it sufficient and necessary. Adding a Suggested-by Clark, as he often asked me about this possibility. Link: https://lkml.kernel.org/r/d9c6c19135497054986900f94c8e47410b15316a.1670623111.git.bristot@kernel.org Cc: Suggested-by: Clark Williams Cc: Juri Lelli Cc: Bagas Sanjaya Cc: Daniel Bristot de Oliveira Cc: Masami Hiramatsu Cc: Jonathan Corbet Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 5a7613942223..94c1b5eb1dc0 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -55,10 +55,17 @@ enum osnoise_options_index { OSN_DEFAULTS = 0, OSN_WORKLOAD, OSN_PANIC_ON_STOP, + OSN_PREEMPT_DISABLE, + OSN_IRQ_DISABLE, OSN_MAX }; -static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD", "PANIC_ON_STOP" }; +static const char * const osnoise_options_str[OSN_MAX] = { + "DEFAULTS", + "OSNOISE_WORKLOAD", + "PANIC_ON_STOP", + "OSNOISE_PREEMPT_DISABLE", + "OSNOISE_IRQ_DISABLE" }; #define OSN_DEFAULT_OPTIONS 0x2 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; @@ -1308,18 +1315,26 @@ static void notify_new_max_latency(u64 latency) */ static int run_osnoise(void) { + bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); struct osnoise_variables *osn_var = this_cpu_osn_var(); u64 start, sample, last_sample; u64 last_int_count, int_count; s64 noise = 0, max_noise = 0; s64 total, last_total = 0; struct osnoise_sample s; + bool disable_preemption; unsigned int threshold; u64 runtime, stop_in; u64 sum_noise = 0; int hw_count = 0; int ret = -1; + /* + * Disabling preemption is only required if IRQs are enabled, + * and the options is set on. + */ + disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); + /* * Considers the current thread as the workload. */ @@ -1335,6 +1350,15 @@ static int run_osnoise(void) */ threshold = tracing_thresh ? : 5000; + /* + * Apply PREEMPT and IRQ disabled options. + */ + if (disable_irq) + local_irq_disable(); + + if (disable_preemption) + preempt_disable(); + /* * Make sure NMIs see sampling first */ @@ -1422,16 +1446,21 @@ static int run_osnoise(void) * cond_resched() */ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { - local_irq_disable(); + if (!disable_irq) + local_irq_disable(); + rcu_momentary_dyntick_idle(); - local_irq_enable(); + + if (!disable_irq) + local_irq_enable(); } /* * For the non-preemptive kernel config: let threads runs, if - * they so wish. + * they so wish, unless set not do to so. */ - cond_resched(); + if (!disable_irq && !disable_preemption) + cond_resched(); last_sample = sample; last_int_count = int_count; @@ -1450,6 +1479,15 @@ static int run_osnoise(void) */ barrier(); + /* + * Return to the preemptive state. + */ + if (disable_preemption) + preempt_enable(); + + if (disable_irq) + local_irq_enable(); + /* * Save noise info. */ From d358dfe60b7724ad0acb8cf8375a608b983e2b59 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 9 Dec 2022 23:05:54 +0100 Subject: [PATCH 3644/4122] Documentation/osnoise: Add osnoise/options documentation Add the documentation about the osnoise/options file, the options, and some additional explanation about the OSNOISE_WORKLOAD option. Link: https://lkml.kernel.org/r/fde5567a4bae364f67fd1e9a644d1d62862618a6.1670623111.git.bristot@kernel.org Cc: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Clark Williams Cc: Masami Hiramatsu Cc: Jonathan Corbet Cc: Bagas Sanjaya Signed-off-by: Daniel Bristot de Oliveira Reviewed-by: Bagas Sanjaya Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/osnoise-tracer.rst | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst index fdd562d7c22d..140ef2533d26 100644 --- a/Documentation/trace/osnoise-tracer.rst +++ b/Documentation/trace/osnoise-tracer.rst @@ -92,8 +92,8 @@ Note that the example above shows a high number of HW noise samples. The reason being is that this sample was taken on a virtual machine, and the host interference is detected as a hardware interference. -Tracer options ---------------------- +Tracer Configuration +-------------------- The tracer has a set of options inside the osnoise directory, they are: @@ -115,6 +115,22 @@ The tracer has a set of options inside the osnoise directory, they are: NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The special DEAFAULTS option resets all options to the default value. +Tracer Options +-------------- + +The osnoise/options file exposes a set of on/off configuration options for +the osnoise tracer. These options are: + + - DEFAULTS: reset the options to the default value. + - OSNOISE_WORKLOAD: do not dispatch osnoise workload (see dedicated + section below). + - PANIC_ON_STOP: call panic() if the tracer stops. This option serves to + capture a vmcore. + - OSNOISE_PREEMPT_DISABLE: disable preemption while running the osnoise + workload, allowing only IRQ and hardware-related noise. + - OSNOISE_IRQ_DISABLE: disable IRQs while running the osnoise workload, + allowing only NMIs and hardware-related noise, like hwlat tracer. + Additional Tracing ------------------ From 5fb733d7bd6949e90028efdce8bd528c6ab7cf1e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Wed, 23 Nov 2022 09:48:05 +0800 Subject: [PATCH 3645/4122] rtc: st-lpc: Add missing clk_disable_unprepare in st_rtc_probe() The clk_disable_unprepare() should be called in the error handling of clk_get_rate(), fix it. Fixes: b5b2bdfc2893 ("rtc: st: Add new driver for ST's LPC RTC") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221123014805.1993052-1-cuigaosheng1@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-st-lpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index bdb20f63254e..0f8e4231098e 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -238,6 +238,7 @@ static int st_rtc_probe(struct platform_device *pdev) rtc->clkrate = clk_get_rate(rtc->clk); if (!rtc->clkrate) { + clk_disable_unprepare(rtc->clk); dev_err(&pdev->dev, "Unable to fetch clock rate\n"); return -EINVAL; } From 90cd5c88830140c9fade92a8027e0fb2c6e4cc49 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Wed, 23 Nov 2022 09:59:53 +0800 Subject: [PATCH 3646/4122] rtc: pic32: Move devm_rtc_allocate_device earlier in pic32_rtc_probe() The pic32_rtc_enable(pdata, 0) and clk_disable_unprepare(pdata->clk) should be called in the error handling of devm_rtc_allocate_device(), so we should move devm_rtc_allocate_device earlier in pic32_rtc_probe() to fix it. Fixes: 6515e23b9fde ("rtc: pic32: convert to devm_rtc_allocate_device") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221123015953.1998521-1-cuigaosheng1@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pic32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index 7fb9145c43bd..fa351ac20158 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -324,16 +324,16 @@ static int pic32_rtc_probe(struct platform_device *pdev) spin_lock_init(&pdata->alarm_lock); + pdata->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(pdata->rtc)) + return PTR_ERR(pdata->rtc); + clk_prepare_enable(pdata->clk); pic32_rtc_enable(pdata, 1); device_init_wakeup(&pdev->dev, 1); - pdata->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(pdata->rtc)) - return PTR_ERR(pdata->rtc); - pdata->rtc->ops = &pic32_rtcops; pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pdata->rtc->range_max = RTC_TIMESTAMP_END_2099; From 800b55b4dc62c4348fbc1f7570a8ac8be3f0eb66 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 18 Nov 2022 15:33:32 +0100 Subject: [PATCH 3647/4122] dt-bindings: rtc: convert rtc-meson.txt to dt-schema Convert the Amlogic Meson6 RTC bindings to dt-schema. Signed-off-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221117-b4-amlogic-bindings-convert-v1-6-3f025599b968@linaro.org Signed-off-by: Alexandre Belloni --- .../bindings/rtc/amlogic,meson6-rtc.yaml | 62 +++++++++++++++++++ .../devicetree/bindings/rtc/rtc-meson.txt | 35 ----------- 2 files changed, 62 insertions(+), 35 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-meson.txt diff --git a/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml new file mode 100644 index 000000000000..8bf7d3a9be98 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/amlogic,meson6-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC + +maintainers: + - Neil Armstrong + - Martin Blumenstingl + +allOf: + - $ref: rtc.yaml# + - $ref: /schemas/nvmem/nvmem.yaml# + +properties: + compatible: + enum: + - amlogic,meson6-rtc + - amlogic,meson8-rtc + - amlogic,meson8b-rtc + - amlogic,meson8m2-rtc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + + resets: + maxItems: 1 + + vdd-supply: true + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + #include + rtc: rtc@740 { + compatible = "amlogic,meson6-rtc"; + reg = <0x740 0x14>; + interrupts = ; + clocks = <&rtc32k_xtal>; + vdd-supply = <&rtc_vdd>; + resets = <&reset_rtc>; + #address-cells = <1>; + #size-cells = <1>; + + mac@0 { + reg = <0 6>; + }; + }; diff --git a/Documentation/devicetree/bindings/rtc/rtc-meson.txt b/Documentation/devicetree/bindings/rtc/rtc-meson.txt deleted file mode 100644 index e921fe66a362..000000000000 --- a/Documentation/devicetree/bindings/rtc/rtc-meson.txt +++ /dev/null @@ -1,35 +0,0 @@ -* Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC - -Required properties: -- compatible: should be one of the following describing the hardware: - * "amlogic,meson6-rtc" - * "amlogic,meson8-rtc" - * "amlogic,meson8b-rtc" - * "amlogic,meson8m2-rtc" - -- reg: physical register space for the controller's memory mapped registers. -- interrupts: the interrupt line of the RTC block. -- clocks: reference to the external 32.768kHz crystal oscillator. -- vdd-supply: reference to the power supply of the RTC block. -- resets: reset controller reference to allow reset of the controller - -Optional properties for the battery-backed non-volatile memory: -- #address-cells: should be 1 to address the battery-backed non-volatile memory -- #size-cells: should be 1 to reference the battery-backed non-volatile memory - -Optional child nodes: -- see ../nvmem/nvmem.txt - -Example: - - rtc: rtc@740 { - compatible = "amlogic,meson6-rtc"; - reg = <0x740 0x14>; - interrupts = ; - clocks = <&rtc32k_xtal>; - vdd-supply = <&rtc_vdd>; - resets = <&reset RESET_RTC>; - - #address-cells = <1>; - #size-cells = <1>; - }; From e42f9c2e6aad583986e91979bf2fce47aaced1c2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 9 Dec 2022 10:21:56 -0400 Subject: [PATCH 3648/4122] RDMA: Add missed netdev_put() for the netdevice_tracker The netdev core will detect if any untracked puts are done on tracked pointers and throw refcount warnings: refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 1 PID: 33 at lib/refcount.c:31 refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31 Modules linked in: CPU: 1 PID: 33 Comm: kworker/u4:2 Not tainted 6.1.0-rc8-next-20221207-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: ib-unreg-wq ib_unregister_work RIP: 0010:refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31 Code: 05 5a 60 51 0a 01 e8 35 0a b5 05 0f 0b e9 d3 fe ff ff e8 6c 9b 75 fd 48 c7 c7 c0 6d a6 8a c6 05 37 60 51 0a 01 e8 16 0a b5 05 <0f> 0b e9 b4 fe +ff ff 48 89 ef e8 5a b5 c3 fd e9 5c fe ff ff 0f 1f RSP: 0018:ffffc90000aa7b30 EFLAGS: 00010082 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8880172f9d40 RSI: ffffffff8166b1dc RDI: fffff52000154f58 RBP: ffff88807906c600 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000080000001 R11: 0000000000000000 R12: 1ffff92000154f6b R13: 0000000000000000 R14: ffff88807906c600 R15: ffff888046894000 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe350a8ff8 CR3: 000000007a9e7000 CR4: 00000000003526e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_dec include/linux/refcount.h:344 [inline] refcount_dec include/linux/refcount.h:359 [inline] ref_tracker_free+0x539/0x6b0 lib/ref_tracker.c:118 netdev_tracker_free include/linux/netdevice.h:4039 [inline] netdev_put include/linux/netdevice.h:4056 [inline] dev_put include/linux/netdevice.h:4082 [inline] free_netdevs+0x1f8/0x470 drivers/infiniband/core/device.c:2204 __ib_unregister_device+0xa0/0x1a0 drivers/infiniband/core/device.c:1478 ib_unregister_work+0x19/0x30 drivers/infiniband/core/device.c:1586 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 So change the missed dev_put for pdata->netdev to also follow the tracker. Fixes: 09f530f0c6d6 ("RDMA: Add netdevice_tracker to ib_device_set_netdev()") Reported-by: syzbot+3fd8326d9a0812d19218@syzkaller.appspotmail.com Reported-by: syzbot+a1ed8ffe3121380cd5dd@syzkaller.appspotmail.com Reported-by: syzbot+8d0a099c8a6d1e4e601c@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-e99919867b8d+1e2-netdev_tracker2_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9211d2794aac..894c06846224 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2201,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev) * comparisons after the put */ rcu_assign_pointer(pdata->netdev, NULL); - dev_put(ndev); + netdev_put(ndev, &pdata->netdev_tracker); } spin_unlock_irqrestore(&pdata->netdev_lock, flags); } From efa80b028c7a9c74fd875517aa0fc9fd8d610ed0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Dec 2022 13:07:30 +0900 Subject: [PATCH 3649/4122] kbuild: move -Werror from KBUILD_CFLAGS to KBUILD_CPPFLAGS CONFIG_WERROR turns warnings into errors, which happens only for *.c files because -Werror is added to KBUILD_CFLAGS. Adding it to KBUILD_CPPFLAGS makes more sense because preprocessors understand the -Werror option. For example, you can put a #warning directive in any preprocessed code. warning: #warning "this is a warning message" [-Wcpp] If -Werror is added, it is promoted to an error. error: #warning "this is a warning message" [-Werror=cpp] This commit moves -Werror to KBUILD_CPPFLAGS so it works in the same way for *.c, *.S, *.lds.S or whatever needs preprocessing. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fbd9ff4a61e7..6b047daa46cc 100644 --- a/Makefile +++ b/Makefile @@ -866,7 +866,8 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) -KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror +KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror +KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y) KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings From 80b6093b55e31c2c40ff082fb32523d4e852954f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Dec 2022 13:07:31 +0900 Subject: [PATCH 3650/4122] kbuild: add -Wundef to KBUILD_CPPFLAGS for W=1 builds The use of an undefined macro in an #if directive is warned, but only in *.c files. No warning from other files such as *.S, *.lds.S. Since -Wundef is a preprocessor-related warning, it should be added to KBUILD_CPPFLAGS instead of KBUILD_CFLAGS. My previous attempt [1] uncovered several issues. I could not finish fixing them all. This commit adds -Wundef to KBUILD_CPPFLAGS for W=1 builds in order to block new breakages. (The kbuild test robot tests with W=1) We can fix the warnings one by one. After fixing all of them, we can make it default in the top Makefile, and remove -Wundef from KBUILD_CFLAGS. [1]: https://lore.kernel.org/all/20221012180118.331005-2-masahiroy@kernel.org/ Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers --- scripts/Makefile.extrawarn | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6bbba36c5969..40cd13eca82e 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -38,6 +38,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +KBUILD_CPPFLAGS += -Wundef KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 else From dbc94a0fb81771a38733c0e8f2ea8c4fa6934dc1 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 8 Dec 2022 09:52:54 +0200 Subject: [PATCH 3651/4122] IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces There are 2 ways to create IPoIB PKEY child interfaces: 1) Writing a PKEY to /sys/class/net//create_child. 2) Using netlink with iproute. While with sysfs the child interface has the same number of tx and rx queues as the parent, with netlink there will always be 1 tx and 1 rx queue for the child interface. That's because the get_num_tx/rx_queues() netlink ops are missing and the default value of 1 is taken for the number of queues (in rtnl_create_link()). This change adds the get_num_tx/rx_queues() ops which allows for interfaces with multiple queues to be created over netlink. This constant only represents the max number of tx and rx queues on that net device. Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support") Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/r/f4a42c8aa43c02d5ae5559a60c3e5e0f18c82531.1670485816.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index ea16ba5d8da6..9ad8d9856275 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -41,6 +41,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 }, }; +static unsigned int ipoib_get_max_num_queues(void) +{ + return min_t(unsigned int, num_possible_cpus(), 128); +} + static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -172,6 +177,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .changelink = ipoib_changelink, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, + .get_num_rx_queues = ipoib_get_max_num_queues, + .get_num_tx_queues = ipoib_get_max_num_queues, }; struct rtnl_link_ops *ipoib_get_link_ops(void) From 37ba7b005a7a4454046bd8659c7a9c5330552396 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 29 Oct 2022 00:01:38 +0900 Subject: [PATCH 3652/4122] ksmbd: set SMB2_SESSION_FLAG_ENCRYPT_DATA when enforcing data encryption for this share Currently, SMB2_SESSION_FLAG_ENCRYPT_DATA is always set session setup response. Since this forces data encryption from the client, there is a problem that data is always encrypted regardless of the use of the cifs seal mount option. SMB2_SESSION_FLAG_ENCRYPT_DATA should be set according to KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION flags, and in case of KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF, encryption mode is turned off for all connections. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/ksmbd_netlink.h | 1 + fs/ksmbd/smb2ops.c | 10 ++++++++-- fs/ksmbd/smb2pdu.c | 8 +++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index ff07c67f4565..b6bd8311e6b4 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -74,6 +74,7 @@ struct ksmbd_heartbeat { #define KSMBD_GLOBAL_FLAG_SMB2_LEASES BIT(0) #define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION BIT(1) #define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL BIT(2) +#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF BIT(3) /* * IPC request for ksmbd server startup diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index ab23da2120b9..e401302478c3 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -247,8 +247,9 @@ void init_smb3_02_server(struct ksmbd_conn *conn) if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) @@ -271,6 +272,11 @@ int init_smb3_11_server(struct ksmbd_conn *conn) if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index b2fc85d440d0..56d68ddc409c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -903,7 +903,7 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, return; } - if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION)) + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) return; for (i = 0; i < cph_cnt; i++) { @@ -1508,7 +1508,8 @@ static int ntlm_authenticate(struct ksmbd_work *work) return -EINVAL; } sess->enc = true; - rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION) + rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; /* * signing is disable if encryption is enable * on this session @@ -1599,7 +1600,8 @@ static int krb5_authenticate(struct ksmbd_work *work) return -EINVAL; } sess->enc = true; - rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION) + rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; sess->sign = false; } From 7ecbe92696bb7fe32c80b6cf64736a0d157717a9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 08:11:53 -0500 Subject: [PATCH 3653/4122] ksmbd: use F_SETLK when unlocking a file ksmbd seems to be trying to use a cmd value of 0 when unlocking a file. That activity requires a type of F_UNLCK with a cmd of F_SETLK. For local POSIX locking, it doesn't matter much since vfs_lock_file ignores @cmd, but filesystems that define their own ->lock operation expect to see it set sanely. Cc: David Howells Signed-off-by: Jeff Layton Reviewed-by: David Howells Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 56d68ddc409c..de8e367095c9 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6753,7 +6753,7 @@ static int smb2_set_flock_flags(struct file_lock *flock, int flags) case SMB2_LOCKFLAG_UNLOCK: ksmbd_debug(SMB, "received unlock request\n"); flock->fl_type = F_UNLCK; - cmd = 0; + cmd = F_SETLK; break; } @@ -7131,7 +7131,7 @@ out: rlock->fl_start = smb_lock->start; rlock->fl_end = smb_lock->end; - rc = vfs_lock_file(filp, 0, rlock, NULL); + rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); if (rc) pr_err("rollback unlock fail : %d\n", rc); From 30429388531b120902126a39cf64f877fc5c7773 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Nov 2022 09:35:10 -0600 Subject: [PATCH 3654/4122] ksmbd: replace one-element arrays with flexible-array members One-element arrays are deprecated, and we are replacing them with flexible array members instead. So, replace one-element arrays with flexible-array members in multiple structs in fs/ksmbd/smb_common.h and one in fs/ksmbd/smb2pdu.h. Important to mention is that doing a build before/after this patch results in no binary output differences. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/242 Link: https://github.com/KSPP/linux/issues/79 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Sergey Senozhatsky Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++-- fs/ksmbd/smb2pdu.h | 2 +- fs/ksmbd/smb_common.h | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index de8e367095c9..79261493a212 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -3440,7 +3440,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, goto free_conv_name; } - struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len; + struct_sz = readdir_info_level_struct_sz(info_level) + conv_len; next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); d_info->last_entry_off_align = next_entry_offset - struct_sz; @@ -3692,7 +3692,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info, return -EOPNOTSUPP; conv_len = (d_info->name_len + 1) * 2; - next_entry_offset = ALIGN(struct_sz - 1 + conv_len, + next_entry_offset = ALIGN(struct_sz + conv_len, KSMBD_DIR_INFO_ALIGNMENT); if (next_entry_offset > d_info->out_buf_len) { diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index 092fdd3f8750..aa5dbe54f5a1 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -443,7 +443,7 @@ struct smb2_posix_info { /* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */ u8 SidBuffer[32]; __le32 name_len; - u8 name[1]; + u8 name[]; /* * var sized owner SID * var sized group SID diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index 318c16fa81da..e663ab9ea759 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -277,14 +277,14 @@ struct file_directory_info { __le64 AllocationSize; __le32 ExtFileAttributes; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __packed; /* level 0x101 FF resp data */ struct file_names_info { __le32 NextEntryOffset; __u32 FileIndex; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __packed; /* level 0xc FF resp data */ struct file_full_directory_info { @@ -299,7 +299,7 @@ struct file_full_directory_info { __le32 ExtFileAttributes; __le32 FileNameLength; __le32 EaSize; - char FileName[1]; + char FileName[]; } __packed; /* level 0x102 FF resp */ struct file_both_directory_info { @@ -317,7 +317,7 @@ struct file_both_directory_info { __u8 ShortNameLength; __u8 Reserved; __u8 ShortName[24]; - char FileName[1]; + char FileName[]; } __packed; /* level 0x104 FFrsp data */ struct file_id_both_directory_info { @@ -337,7 +337,7 @@ struct file_id_both_directory_info { __u8 ShortName[24]; __le16 Reserved2; __le64 UniqueId; - char FileName[1]; + char FileName[]; } __packed; struct file_id_full_dir_info { @@ -354,7 +354,7 @@ struct file_id_full_dir_info { __le32 EaSize; /* EA size */ __le32 Reserved; __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ - char FileName[1]; + char FileName[]; } __packed; /* level 0x105 FF rsp data */ struct smb_version_values { From bc044414fa0326a4e5c3c509c00b1fcaf621b5f4 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 16 Nov 2022 20:22:37 +0800 Subject: [PATCH 3655/4122] ksmbd: Fix resource leak in ksmbd_session_rpc_open() When ksmbd_rpc_open() fails then it must call ksmbd_rpc_id_free() to undo the result of ksmbd_ipc_id_alloc(). Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Xiu Jianfeng Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/mgmt/user_session.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c index 3fa2139a0b30..92b1603b5abe 100644 --- a/fs/ksmbd/mgmt/user_session.c +++ b/fs/ksmbd/mgmt/user_session.c @@ -108,15 +108,17 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) entry->method = method; entry->id = ksmbd_ipc_id_alloc(); if (entry->id < 0) - goto error; + goto free_entry; resp = ksmbd_rpc_open(sess, entry->id); if (!resp) - goto error; + goto free_id; kvfree(resp); return entry->id; -error: +free_id: + ksmbd_rpc_id_free(entry->id); +free_entry: list_del(&entry->list); kfree(entry); return -EINVAL; From 01f6c61bae3d658058ee6322af77acea26a5ee3a Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Tue, 29 Nov 2022 12:19:33 +0100 Subject: [PATCH 3656/4122] ksmbd: Fix resource leak in smb2_lock() "flock" is leaked if an error happens before smb2_lock_init(), as the lock is not added to the lock_list to be cleaned up. Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 79261493a212..88d2c23369fe 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6857,6 +6857,7 @@ int smb2_lock(struct ksmbd_work *work) if (lock_start > U64_MAX - lock_length) { pr_err("Invalid lock range requested\n"); rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE; + locks_free_lock(flock); goto out; } @@ -6876,6 +6877,7 @@ int smb2_lock(struct ksmbd_work *work) "the end offset(%llx) is smaller than the start offset(%llx)\n", flock->fl_end, flock->fl_start); rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE; + locks_free_lock(flock); goto out; } @@ -6887,6 +6889,7 @@ int smb2_lock(struct ksmbd_work *work) flock->fl_type != F_UNLCK) { pr_err("conflict two locks in one request\n"); err = -EINVAL; + locks_free_lock(flock); goto out; } } @@ -6895,6 +6898,7 @@ int smb2_lock(struct ksmbd_work *work) smb_lock = smb2_lock_init(flock, cmd, flags, &lock_list); if (!smb_lock) { err = -EINVAL; + locks_free_lock(flock); goto out; } } From 72ee45fd46d0d3578c4e6046f66fae3218543ce3 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Wed, 7 Dec 2022 09:29:27 +0800 Subject: [PATCH 3657/4122] ksmbd: Convert to use sysfs_emit()/sysfs_emit_at() APIs Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: ye xingchen Reviewed-by: Sergey Senozhatsky Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/server.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c index a0d635304754..394b6ceac431 100644 --- a/fs/ksmbd/server.c +++ b/fs/ksmbd/server.c @@ -432,11 +432,9 @@ static ssize_t stats_show(struct class *class, struct class_attribute *attr, "reset", "shutdown" }; - - ssize_t sz = scnprintf(buf, PAGE_SIZE, "%d %s %d %lu\n", stats_version, - state[server_conf.state], server_conf.tcp_port, - server_conf.ipc_last_active / HZ); - return sz; + return sysfs_emit(buf, "%d %s %d %lu\n", stats_version, + state[server_conf.state], server_conf.tcp_port, + server_conf.ipc_last_active / HZ); } static ssize_t kill_server_store(struct class *class, @@ -468,19 +466,13 @@ static ssize_t debug_show(struct class *class, struct class_attribute *attr, for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) { if ((ksmbd_debug_types >> i) & 1) { - pos = scnprintf(buf + sz, - PAGE_SIZE - sz, - "[%s] ", - debug_type_strings[i]); + pos = sysfs_emit_at(buf, sz, "[%s] ", debug_type_strings[i]); } else { - pos = scnprintf(buf + sz, - PAGE_SIZE - sz, - "%s ", - debug_type_strings[i]); + pos = sysfs_emit_at(buf, sz, "%s ", debug_type_strings[i]); } sz += pos; } - sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); + sz += sysfs_emit_at(buf, sz, "\n"); return sz; } From ac3a2585f018f10039b4a856dcb122da88c1c1c9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 11 Dec 2022 06:19:33 -0500 Subject: [PATCH 3658/4122] nfsd: rework refcounting in filecache The filecache refcounting is a bit non-standard for something searchable by RCU, in that we maintain a sentinel reference while it's hashed. This in turn requires that we have to do things differently in the "put" depending on whether its hashed, which we believe to have led to races. There are other problems in here too. nfsd_file_close_inode_sync can end up freeing an nfsd_file while there are still outstanding references to it, and there are a number of subtle ToC/ToU races. Rework the code so that the refcount is what drives the lifecycle. When the refcount goes to zero, then unhash and rcu free the object. A task searching for a nfsd_file is allowed to bump its refcount, but only if it's not already 0. Ensure that we don't make any other changes to it until a reference is held. With this change, the LRU carries a reference. Take special care to deal with it when removing an entry from the list, and ensure that we only repurpose the nf_lru list_head when the refcount is 0 to ensure exclusive access to it. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 328 +++++++++++++++++++++++--------------------- fs/nfsd/trace.h | 51 +++---- 2 files changed, 194 insertions(+), 185 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1998b4d5f692..45b2c9e3f636 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -324,8 +324,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -377,24 +376,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -402,10 +412,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -421,17 +430,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -442,54 +457,48 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } + + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } + } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void @@ -497,33 +506,13 @@ nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + nfsd_file_free(nf); } } -static void -nfsd_file_dispose_list_sync(struct list_head *dispose) -{ - bool flush = false; - struct nfsd_file *nf; - - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; - } - if (flush) - flush_delayed_fput(); -} - static void nfsd_file_list_remove_disposal(struct list_head *dst, struct nfsd_fcache_disposal *l) @@ -591,21 +580,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -616,40 +592,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -659,7 +625,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -685,7 +651,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -695,72 +661,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; -} - -/** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file - * @inode: inode of the file to attempt to remove - * - * Unhash and put, then flush and fput all cache items associated with @inode. - */ -void -nfsd_file_close_inode_sync(struct inode *inode) -{ - LIST_HEAD(dispose); - unsigned int count; - - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); } /** * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ static void nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); + nfsd_file_queue_for_close(inode, &dispose); nfsd_file_dispose_list_delayed(&dispose); } +/** + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * @inode: inode of the file to attempt to remove + * + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. + */ +void +nfsd_file_close_inode_sync(struct inode *inode) +{ + struct nfsd_file *nf; + LIST_HEAD(dispose); + + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); +} + /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -782,7 +787,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -903,6 +908,13 @@ out_err: goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -916,8 +928,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1084,8 +1099,12 @@ retry: if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1118,11 +1137,11 @@ wait_for_construction: goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1132,7 +1151,8 @@ out: this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1158,8 +1178,10 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 46b8f68a2497..c852ae8eaf37 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -876,8 +876,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -912,6 +912,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1103,35 +1104,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1209,7 +1181,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1241,6 +1212,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, From 21b8a1dd56a163825e5749b303858fb902ebf198 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 10:45:30 -0800 Subject: [PATCH 3659/4122] rtc: msc313: Fix function prototype mismatch in msc313_rtc_probe() With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. msc313_rtc_probe() was passing clk_disable_unprepare() directly, which did not have matching prototypes for devm_add_action_or_reset()'s callback argument. Refactor to use devm_clk_get_enabled() instead. This was found as a result of Clang's new -Wcast-function-type-strict flag, which is more sensitive than the simpler -Wcast-function-type, which only checks for type width mismatches. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com Suggested-by: Christophe JAILLET Cc: Daniel Palmer Cc: Romain Perier Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rtc@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Daniel Palmer Tested-by: Daniel Palmer Link: https://lore.kernel.org/r/20221202184525.gonna.423-kees@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-msc313.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c index f3fde013c4b8..8d7737e0e2e0 100644 --- a/drivers/rtc/rtc-msc313.c +++ b/drivers/rtc/rtc-msc313.c @@ -212,22 +212,12 @@ static int msc313_rtc_probe(struct platform_device *pdev) return ret; } - clk = devm_clk_get(dev, NULL); + clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(clk)) { dev_err(dev, "No input reference clock\n"); return PTR_ERR(clk); } - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "Failed to enable the reference clock, %d\n", ret); - return ret; - } - - ret = devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare, clk); - if (ret) - return ret; - rate = clk_get_rate(clk); writew(rate & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_L); writew((rate >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_H); From 2e830ccc21eb67a4c2490279d907e5e9199e5156 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 20 Oct 2022 22:42:41 +0200 Subject: [PATCH 3660/4122] rtc: rk808: reduce 'struct rk808' usage Reduce usage of 'struct rk808' (driver data of the parent MFD), so that only the chip variant field is still being accessed directly. This allows restructuring the MFD driver to support SPI based PMICs. Acked-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221020204251.108565-4-sebastian.reichel@collabora.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rk808.c | 47 ++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index e920da8c08da..2d9bcb3ce1e3 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -14,7 +14,6 @@ #include #include #include -#include /* RTC_CTRL_REG bitfields */ #define BIT_RTC_CTRL_REG_STOP_RTC_M BIT(0) @@ -51,7 +50,7 @@ struct rk_rtc_compat_reg { }; struct rk808_rtc { - struct rk808 *rk808; + struct regmap *regmap; struct rtc_device *rtc; struct rk_rtc_compat_reg *creg; int irq; @@ -97,12 +96,11 @@ static void gregorian_to_rockchip(struct rtc_time *tm) static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 rtc_data[NUM_TIME_REGS]; int ret; /* Force an update of the shadowed registers right now */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_RTC_GET_TIME, BIT_RTC_CTRL_REG_RTC_GET_TIME); if (ret) { @@ -116,7 +114,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) * 32khz. If we clear the GET_TIME bit here, the time of i2c transfer * certainly more than 31.25us: 16 * 2.5us at 400kHz bus frequency. */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_RTC_GET_TIME, 0); if (ret) { @@ -124,7 +122,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) return ret; } - ret = regmap_bulk_read(rk808->regmap, rk808_rtc->creg->seconds_reg, + ret = regmap_bulk_read(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg, rtc_data, NUM_TIME_REGS); if (ret) { dev_err(dev, "Failed to bulk read rtc_data: %d\n", ret); @@ -148,7 +146,6 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 rtc_data[NUM_TIME_REGS]; int ret; @@ -163,7 +160,7 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[6] = bin2bcd(tm->tm_wday); /* Stop RTC while updating the RTC registers */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M, BIT_RTC_CTRL_REG_STOP_RTC_M); if (ret) { @@ -171,14 +168,14 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) return ret; } - ret = regmap_bulk_write(rk808->regmap, rk808_rtc->creg->seconds_reg, + ret = regmap_bulk_write(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg, rtc_data, NUM_TIME_REGS); if (ret) { dev_err(dev, "Failed to bull write rtc_data: %d\n", ret); return ret; } /* Start RTC again */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M, 0); if (ret) { dev_err(dev, "Failed to update RTC control: %d\n", ret); @@ -191,12 +188,11 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 alrm_data[NUM_ALARM_REGS]; uint32_t int_reg; int ret; - ret = regmap_bulk_read(rk808->regmap, + ret = regmap_bulk_read(rk808_rtc->regmap, rk808_rtc->creg->alarm_seconds_reg, alrm_data, NUM_ALARM_REGS); if (ret) { @@ -212,7 +208,7 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_year = (bcd2bin(alrm_data[5] & YEARS_REG_MSK)) + 100; rockchip_to_gregorian(&alrm->time); - ret = regmap_read(rk808->regmap, rk808_rtc->creg->int_reg, &int_reg); + ret = regmap_read(rk808_rtc->regmap, rk808_rtc->creg->int_reg, &int_reg); if (ret) { dev_err(dev, "Failed to read RTC INT REG: %d\n", ret); return ret; @@ -228,10 +224,9 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc) { - struct rk808 *rk808 = rk808_rtc->rk808; int ret; - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M, 0); return ret; @@ -239,10 +234,9 @@ static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc) static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc) { - struct rk808 *rk808 = rk808_rtc->rk808; int ret; - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); @@ -252,7 +246,6 @@ static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc) static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 alrm_data[NUM_ALARM_REGS]; int ret; @@ -272,7 +265,7 @@ static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm_data[4] = bin2bcd(alrm->time.tm_mon + 1); alrm_data[5] = bin2bcd(alrm->time.tm_year - 100); - ret = regmap_bulk_write(rk808->regmap, + ret = regmap_bulk_write(rk808_rtc->regmap, rk808_rtc->creg->alarm_seconds_reg, alrm_data, NUM_ALARM_REGS); if (ret) { @@ -313,20 +306,18 @@ static int rk808_rtc_alarm_irq_enable(struct device *dev, static irqreturn_t rk808_alarm_irq(int irq, void *data) { struct rk808_rtc *rk808_rtc = data; - struct rk808 *rk808 = rk808_rtc->rk808; - struct i2c_client *client = rk808->i2c; int ret; - ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg, + ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg, RTC_STATUS_MASK); if (ret) { - dev_err(&client->dev, + dev_err(&rk808_rtc->rtc->dev, "%s:Failed to update RTC status: %d\n", __func__, ret); return ret; } rtc_update_irq(rk808_rtc->rtc, 1, RTC_IRQF | RTC_AF); - dev_dbg(&client->dev, + dev_dbg(&rk808_rtc->rtc->dev, "%s:irq=%d\n", __func__, irq); return IRQ_HANDLED; } @@ -404,10 +395,12 @@ static int rk808_rtc_probe(struct platform_device *pdev) break; } platform_set_drvdata(pdev, rk808_rtc); - rk808_rtc->rk808 = rk808; + rk808_rtc->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!rk808_rtc->regmap) + return -ENODEV; /* start rtc running by default, and use shadowed timer. */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M | BIT_RTC_CTRL_REG_RTC_READSEL_M, BIT_RTC_CTRL_REG_RTC_READSEL_M); @@ -417,7 +410,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg, + ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg, RTC_STATUS_MASK); if (ret) { dev_err(&pdev->dev, From 01744ce9f07f0b76b0b2d30adba2a7c104f1ff2a Mon Sep 17 00:00:00 2001 From: Naveen Krishna Chatradhi Date: Mon, 5 Dec 2022 10:54:13 +0000 Subject: [PATCH 3661/4122] i3c: Correct the macro module_i3c_i2c_driver Present definition for module_i3c_i2c_driver uses only the 1st argument i.e., struct i3c_driver. Irrespective of CONFIG_I3C being enabled/disabled, struct i2c_driver is never passed to module_driver() Passing struct i2c_driver as the 4th argument works. Signed-off-by: Akshay Gupta Signed-off-by: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20221205105413.937704-1-naveenkrishna.chatradhi@amd.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 8242e13e7b0b..419192b5cc4d 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -287,7 +287,8 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, #define module_i3c_i2c_driver(__i3cdrv, __i2cdrv) \ module_driver(__i3cdrv, \ i3c_i2c_driver_register, \ - i3c_i2c_driver_unregister) + i3c_i2c_driver_unregister, \ + __i2cdrv) int i3c_device_do_priv_xfers(struct i3c_device *dev, struct i3c_priv_xfer *xfers, From 672825cd2823a0cee4687ce80fef5b702ff3caa3 Mon Sep 17 00:00:00 2001 From: Jack Chen Date: Wed, 7 Dec 2022 15:50:59 -0500 Subject: [PATCH 3662/4122] i3c: export SETDASA method Because not all I3C drivers have the hot-join feature ready, and especially not all I3C devices support hot-join feature, exporting SETDASA method could be useful. With this function, the I3C controller could perform a DAA to I3C devices when users decide to turn these I3C devices off and on again during run-time. Tested: This change has been tested with turnning off an I3C device and turning on it again during run-time. The device driver calls SETDASA method to perform DAA to the device. And communication between I3C controller and device is set up again correctly. Signed-off-by: Jack Chen Link: https://lore.kernel.org/r/20221207205059.3848851-1-zenghuchen@google.com Signed-off-by: Alexandre Belloni --- drivers/i3c/device.c | 20 ++++++++++++++++++++ drivers/i3c/internals.h | 1 + drivers/i3c/master.c | 19 +++++++++++++++++++ include/linux/i3c/device.h | 2 ++ 4 files changed, 42 insertions(+) diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c index e92d3e9a52bd..9762630b917e 100644 --- a/drivers/i3c/device.c +++ b/drivers/i3c/device.c @@ -50,6 +50,26 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev, } EXPORT_SYMBOL_GPL(i3c_device_do_priv_xfers); +/** + * i3c_device_do_setdasa() - do I3C dynamic address assignement with + * static address + * + * @dev: device with which the DAA should be done + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_do_setdasa(struct i3c_device *dev) +{ + int ret; + + i3c_bus_normaluse_lock(dev->bus); + ret = i3c_dev_setdasa_locked(dev->desc); + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_do_setdasa); + /** * i3c_device_get_info() - get I3C device information * diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 86b7b44cfca2..908a807badaf 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -15,6 +15,7 @@ extern struct bus_type i3c_bus_type; void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); +int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev); int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, struct i3c_priv_xfer *xfers, int nxfers); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 351c81a929a6..d7e6f6c99aea 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2708,6 +2708,25 @@ int i3c_master_unregister(struct i3c_master_controller *master) } EXPORT_SYMBOL_GPL(i3c_master_unregister); +int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master; + + if (!dev) + return -ENOENT; + + master = i3c_dev_get_master(dev); + if (!master) + return -EINVAL; + + if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr || + !dev->boardinfo->static_addr) + return -EINVAL; + + return i3c_master_setdasa_locked(master, dev->info.static_addr, + dev->boardinfo->init_dyn_addr); +} + int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, struct i3c_priv_xfer *xfers, int nxfers) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 419192b5cc4d..1c997abe868c 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -294,6 +294,8 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev, struct i3c_priv_xfer *xfers, int nxfers); +int i3c_device_do_setdasa(struct i3c_device *dev); + void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info); struct i3c_ibi_payload { From 08dcf0732cb4d97b85493d9f60470e48eebf87fe Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Dec 2022 21:55:38 +0100 Subject: [PATCH 3663/4122] MAINTAINERS: mark I3C DRIVER FOR SYNOPSYS DESIGNWARE orphan Vitor left Synopsys and the email address is now bouncing. Link: https://lore.kernel.org/r/20221211205539.19353-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..ebd7f7c957ea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9697,8 +9697,7 @@ F: Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml F: drivers/i3c/master/i3c-master-cdns.c I3C DRIVER FOR SYNOPSYS DESIGNWARE -M: Vitor Soares -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml F: drivers/i3c/master/dw* From 103c14db61a24cc0cd344dc5d93d264a36687c35 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Dec 2022 22:57:55 +0100 Subject: [PATCH 3664/4122] rtc: rx6110: fix warning with !OF rx6110_spi_of_match is not used when !OF, leading to a warning: >> drivers/rtc/rtc-rx6110.c:384:34: warning: 'rx6110_spi_of_match' defined but not used [-Wunused-const-variable=] 384 | static const struct of_device_id rx6110_spi_of_match[] = { | ^~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Link: https://lore.kernel.org/r/20221211215756.54002-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx6110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c index cc634558b928..76a49838014b 100644 --- a/drivers/rtc/rtc-rx6110.c +++ b/drivers/rtc/rtc-rx6110.c @@ -376,7 +376,7 @@ static const struct spi_device_id rx6110_spi_id[] = { }; MODULE_DEVICE_TABLE(spi, rx6110_spi_id); -static const struct of_device_id rx6110_spi_of_match[] = { +static const __maybe_unused struct of_device_id rx6110_spi_of_match[] = { { .compatible = "epson,rx6110" }, { }, }; From c2d12e85336f6d4172fb2bab5935027c446d7343 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Dec 2022 23:35:53 +0100 Subject: [PATCH 3665/4122] rtc: pcf85063: fix pcf85063_clkout_control pcf85063_clkout_control reads the wrong register but then update the correct one. Reported-by: Janne Terho Fixes: 8c229ab6048b ("rtc: pcf85063: Add pcf85063 clkout control to common clock framework") Link: https://lore.kernel.org/r/20221211223553.59955-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 99f9cc57c7b3..754e03984f98 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -424,7 +424,7 @@ static int pcf85063_clkout_control(struct clk_hw *hw, bool enable) unsigned int buf; int ret; - ret = regmap_read(pcf85063->regmap, PCF85063_REG_OFFSET, &buf); + ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &buf); if (ret < 0) return ret; buf &= PCF85063_REG_CLKO_F_MASK; From 13959373e9c9021cc80730c7bd1242e07b10b328 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 8 Dec 2022 22:32:25 +1000 Subject: [PATCH 3666/4122] powerpc/qspinlock: Fix 32-bit build Some 32-bit configurations don't pull in the spin_begin/end/relax definitions. Fix is to restore a lost include. Reported-by: kernel test robot Fixes: 84990b169557 ("powerpc/qspinlock: add mcs queueing for contended waiters") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/oe-kbuild-all/202212050224.i7uh9fOh-lkp@intel.com Link: https://lore.kernel.org/r/20221208123225.1566113-1-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 1cf5d3e75250..e4bd145255d0 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include From ff39899be80b9d90d5e13775eb9fd150338b6e15 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 21 Jul 2022 09:59:32 +0800 Subject: [PATCH 3667/4122] exfat: simplify empty entry hint This commit adds exfat_set_empty_hint()/exfat_reset_empty_hint() to reduce code complexity and make code more readable. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 58 +++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 0fc08fdcba73..c0e60ff9ec7d 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -897,6 +897,29 @@ free_es: return NULL; } +static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp) +{ + hint_femp->eidx = EXFAT_HINT_NONE; + hint_femp->count = 0; +} + +static inline void exfat_set_empty_hint(struct exfat_inode_info *ei, + struct exfat_hint_femp *candi_empty, struct exfat_chain *clu, + int dentry, int num_entries) +{ + if (ei->hint_femp.eidx == EXFAT_HINT_NONE || + ei->hint_femp.eidx > dentry) { + if (candi_empty->count == 0) { + candi_empty->cur = *clu; + candi_empty->eidx = dentry; + } + + candi_empty->count++; + if (candi_empty->count == num_entries) + ei->hint_femp = *candi_empty; + } +} + enum { DIRENT_STEP_FILE, DIRENT_STEP_STRM, @@ -921,7 +944,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, { int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; int order, step, name_len = 0; - int dentries_per_clu, num_empty = 0; + int dentries_per_clu; unsigned int entry_type; unsigned short *uniname = NULL; struct exfat_chain clu; @@ -939,10 +962,13 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, end_eidx = dentry; } - candi_empty.eidx = EXFAT_HINT_NONE; + exfat_reset_empty_hint(&ei->hint_femp); + rewind: order = 0; step = DIRENT_STEP_FILE; + exfat_reset_empty_hint(&candi_empty); + while (clu.dir != EXFAT_EOF_CLUSTER) { i = dentry & (dentries_per_clu - 1); for (; i < dentries_per_clu; i++, dentry++) { @@ -962,26 +988,8 @@ rewind: entry_type == TYPE_DELETED) { step = DIRENT_STEP_FILE; - num_empty++; - if (candi_empty.eidx == EXFAT_HINT_NONE && - num_empty == 1) { - exfat_chain_set(&candi_empty.cur, - clu.dir, clu.size, clu.flags); - } - - if (candi_empty.eidx == EXFAT_HINT_NONE && - num_empty >= num_entries) { - candi_empty.eidx = - dentry - (num_empty - 1); - WARN_ON(candi_empty.eidx < 0); - candi_empty.count = num_empty; - - if (ei->hint_femp.eidx == - EXFAT_HINT_NONE || - candi_empty.eidx <= - ei->hint_femp.eidx) - ei->hint_femp = candi_empty; - } + exfat_set_empty_hint(ei, &candi_empty, &clu, + dentry, num_entries); brelse(bh); if (entry_type == TYPE_UNUSED) @@ -989,8 +997,7 @@ rewind: continue; } - num_empty = 0; - candi_empty.eidx = EXFAT_HINT_NONE; + exfat_reset_empty_hint(&candi_empty); if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) { step = DIRENT_STEP_FILE; @@ -1090,9 +1097,6 @@ not_found: rewind = 1; dentry = 0; clu.dir = p_dir->dir; - /* reset empty hint */ - num_empty = 0; - candi_empty.eidx = EXFAT_HINT_NONE; goto rewind; } From e298c8a818a3e517582e60c412f4a41b3a1647c5 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 7 Nov 2022 17:22:13 +0900 Subject: [PATCH 3668/4122] exfat: hint the empty entry which at the end of cluster chain After traversing all directory entries, hint the empty directory entry no matter whether or not there are enough empty directory entries. After this commit, hint the empty directory entries like this: 1. Hint the deleted directory entries if enough; 2. Hint the deleted and unused directory entries which at the end of the cluster chain no matter whether enough or not(Add by this commit); 3. If no any empty directory entries, hint the empty directory entries in the new cluster(Add by this commit). This avoids repeated traversal of directory entries, reduces CPU usage, and improves the performance of creating files and directories(especially on low-performance CPUs). Test create 5000 files in a class 4 SD card on imx6q-sabrelite with: for ((i=0;i<5;i++)); do sync time (for ((j=1;j<=1000;j++)); do touch file$((i*1000+j)); done) done The more files, the more performance improvements. Before After Improvement 1~1000 25.360s 22.168s 14.40% 1001~2000 38.242s 28.72ss 33.15% 2001~3000 49.134s 35.037s 40.23% 3001~4000 62.042s 41.624s 49.05% 4001~5000 73.629s 46.772s 57.42% Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 26 ++++++++++++++++++++++---- fs/exfat/namei.c | 33 +++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index c0e60ff9ec7d..30d0ac43b66c 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -905,17 +905,24 @@ static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp) static inline void exfat_set_empty_hint(struct exfat_inode_info *ei, struct exfat_hint_femp *candi_empty, struct exfat_chain *clu, - int dentry, int num_entries) + int dentry, int num_entries, int entry_type) { if (ei->hint_femp.eidx == EXFAT_HINT_NONE || ei->hint_femp.eidx > dentry) { + int total_entries = EXFAT_B_TO_DEN(i_size_read(&ei->vfs_inode)); + if (candi_empty->count == 0) { candi_empty->cur = *clu; candi_empty->eidx = dentry; } - candi_empty->count++; - if (candi_empty->count == num_entries) + if (entry_type == TYPE_UNUSED) + candi_empty->count += total_entries - dentry; + else + candi_empty->count++; + + if (candi_empty->count == num_entries || + candi_empty->count + candi_empty->eidx == total_entries) ei->hint_femp = *candi_empty; } } @@ -989,7 +996,8 @@ rewind: step = DIRENT_STEP_FILE; exfat_set_empty_hint(ei, &candi_empty, &clu, - dentry, num_entries); + dentry, num_entries, + entry_type); brelse(bh); if (entry_type == TYPE_UNUSED) @@ -1100,6 +1108,16 @@ not_found: goto rewind; } + /* + * set the EXFAT_EOF_CLUSTER flag to avoid search + * from the beginning again when allocated a new cluster + */ + if (ei->hint_femp.eidx == EXFAT_HINT_NONE) { + ei->hint_femp.cur.dir = EXFAT_EOF_CLUSTER; + ei->hint_femp.eidx = p_dir->size * dentries_per_clu; + ei->hint_femp.count = 0; + } + /* initialized hint_stat */ hint_stat->clu = p_dir->dir; hint_stat->eidx = 0; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index b617bebc3d0f..99e00a36c029 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -224,11 +224,18 @@ static int exfat_search_empty_slot(struct super_block *sb, if (hint_femp->eidx != EXFAT_HINT_NONE) { dentry = hint_femp->eidx; - if (num_entries <= hint_femp->count) { - hint_femp->eidx = EXFAT_HINT_NONE; - return dentry; - } + /* + * If hint_femp->count is enough, it is needed to check if + * there are actual empty entries. + * Otherwise, and if "dentry + hint_famp->count" is also equal + * to "p_dir->size * dentries_per_clu", it means ENOSPC. + */ + if (dentry + hint_femp->count == p_dir->size * dentries_per_clu && + num_entries > hint_femp->count) + return -ENOSPC; + + hint_femp->eidx = EXFAT_HINT_NONE; exfat_chain_dup(&clu, &hint_femp->cur); } else { exfat_chain_dup(&clu, p_dir); @@ -293,6 +300,12 @@ static int exfat_search_empty_slot(struct super_block *sb, } } + hint_femp->eidx = p_dir->size * dentries_per_clu - num_empty; + hint_femp->count = num_empty; + if (num_empty == 0) + exfat_chain_set(&hint_femp->cur, EXFAT_EOF_CLUSTER, 0, + clu.flags); + return -ENOSPC; } @@ -369,15 +382,11 @@ static int exfat_find_empty_entry(struct inode *inode, if (exfat_ent_set(sb, last_clu, clu.dir)) return -EIO; - if (hint_femp.eidx == EXFAT_HINT_NONE) { - /* the special case that new dentry - * should be allocated from the start of new cluster - */ - hint_femp.eidx = EXFAT_B_TO_DEN_IDX(p_dir->size, sbi); - hint_femp.count = sbi->dentries_per_clu; - + if (hint_femp.cur.dir == EXFAT_EOF_CLUSTER) exfat_chain_set(&hint_femp.cur, clu.dir, 0, clu.flags); - } + + hint_femp.count += sbi->dentries_per_clu; + hint_femp.cur.size++; p_dir->size++; size = EXFAT_CLU_TO_B(p_dir->size, sbi); From f83d8a3b532097276266b5e81073ea46e27b17ab Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 7 Apr 2022 15:55:56 +0800 Subject: [PATCH 3669/4122] exfat: reduce the size of exfat_entry_set_cache In normal, there are 19 directory entries at most for a file or a directory. - A file directory entry - A stream extension directory entry - 1~17 file name directory entry So the directory entries are in 3 sectors at most, it is enough for struct exfat_entry_set_cache to pre-allocate 3 bh. This commit changes the size of struct exfat_entry_set_cache as: Before After 32-bit system 88 32 bytes 64-bit system 168 48 bytes Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index a8f8eee4937c..af55018ff22e 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -9,6 +9,7 @@ #include #include #include +#include #define EXFAT_ROOT_INO 1 @@ -41,6 +42,14 @@ enum { #define ES_2_ENTRIES 2 #define ES_ALL_ENTRIES 0 +#define ES_IDX_FILE 0 +#define ES_IDX_STREAM 1 +#define ES_IDX_FIRST_FILENAME 2 +#define EXFAT_FILENAME_ENTRY_NUM(name_len) \ + DIV_ROUND_UP(name_len, EXFAT_FILE_NAME_LEN) +#define ES_IDX_LAST_FILENAME(name_len) \ + (ES_IDX_FIRST_FILENAME + EXFAT_FILENAME_ENTRY_NUM(name_len) - 1) + #define DIR_DELETED 0xFFFF0321 /* type values */ @@ -68,9 +77,6 @@ enum { #define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ #define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE) -/* Enough size to hold 256 dentry (even 512 Byte sector) */ -#define DIR_CACHE_SIZE (256*sizeof(struct exfat_dentry)/512+1) - #define EXFAT_HINT_NONE -1 #define EXFAT_MIN_SUBDIR 2 @@ -125,6 +131,17 @@ enum { #define BITS_PER_BYTE_MASK 0x7 #define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1) +#define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1) +/* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */ +#define ES_MAX_ENTRY_NUM ES_ENTRY_NUM(MAX_NAME_LENGTH) + +/* + * 19 entries x 32 bytes/entry = 608 bytes. + * The 608 bytes are in 3 sectors at most (even 512 Byte sector). + */ +#define DIR_CACHE_SIZE \ + (DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1) + struct exfat_dentry_namebuf { char *lfn; int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */ @@ -166,11 +183,11 @@ struct exfat_hint { struct exfat_entry_set_cache { struct super_block *sb; - bool modified; unsigned int start_off; int num_bh; struct buffer_head *bh[DIR_CACHE_SIZE]; unsigned int num_entries; + bool modified; }; struct exfat_dir_entry { From a3ff29a95fde16906304455aa8c0bd84eb770258 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Wed, 9 Nov 2022 13:50:22 +0800 Subject: [PATCH 3670/4122] exfat: support dynamic allocate bh for exfat_entry_set_cache In special cases, a file or a directory may occupied more than 19 directory entries, pre-allocating 3 bh is not enough. Such as - Support vendor secondary directory entry in the future. - Since file directory entry is damaged, the SecondaryCount field is bigger than 18. So this commit supports dynamic allocation of bh. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 15 +++++++++++++++ fs/exfat/exfat_fs.h | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 30d0ac43b66c..03e9c9e3966e 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -615,6 +615,10 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) bforget(es->bh[i]); else brelse(es->bh[i]); + + if (IS_DYNAMIC_ES(es)) + kfree(es->bh); + kfree(es); return err; } @@ -847,6 +851,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, /* byte offset in sector */ off = EXFAT_BLK_OFFSET(byte_offset, sb); es->start_off = off; + es->bh = es->__bh; /* sector offset in cluster */ sec = EXFAT_B_TO_BLK(byte_offset, sb); @@ -866,6 +871,16 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, es->num_entries = num_entries; num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb); + if (num_bh > ARRAY_SIZE(es->__bh)) { + es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL); + if (!es->bh) { + brelse(bh); + kfree(es); + return NULL; + } + es->bh[0] = bh; + } + for (i = 1; i < num_bh; i++) { /* get the next sector */ if (exfat_is_last_sector_in_cluster(sbi, sec)) { diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index af55018ff22e..82395ae80dba 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -185,11 +185,14 @@ struct exfat_entry_set_cache { struct super_block *sb; unsigned int start_off; int num_bh; - struct buffer_head *bh[DIR_CACHE_SIZE]; + struct buffer_head *__bh[DIR_CACHE_SIZE]; + struct buffer_head **bh; unsigned int num_entries; bool modified; }; +#define IS_DYNAMIC_ES(es) ((es)->__bh != (es)->bh) + struct exfat_dir_entry { struct exfat_chain dir; int entry; From 20914ff6dd56dd6b548bf5dd90bff09ef89999e4 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 17 Nov 2022 11:37:13 +0800 Subject: [PATCH 3671/4122] exfat: move exfat_entry_set_cache from heap to stack The size of struct exfat_entry_set_cache is only 56 bytes on 64-bit system, and allocating from stack is more efficient than allocating from heap. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 35 +++++++++++++++-------------------- fs/exfat/exfat_fs.h | 5 +++-- fs/exfat/inode.c | 13 ++++++------- fs/exfat/namei.c | 11 +++++------ 4 files changed, 29 insertions(+), 35 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 03e9c9e3966e..a3fb609dd129 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -33,10 +33,9 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned short *uniname) { int i; - struct exfat_entry_set_cache *es; + struct exfat_entry_set_cache es; - es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES)) return; /* @@ -45,8 +44,8 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, * Third entry : first file-name entry * So, the index of first file-name dentry should start from 2. */ - for (i = 2; i < es->num_entries; i++) { - struct exfat_dentry *ep = exfat_get_dentry_cached(es, i); + for (i = 2; i < es.num_entries; i++) { + struct exfat_dentry *ep = exfat_get_dentry_cached(&es, i); /* end of name entry */ if (exfat_get_entry_type(ep) != TYPE_EXTEND) @@ -56,7 +55,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, uniname += EXFAT_FILE_NAME_LEN; } - exfat_free_dentry_set(es, false); + exfat_free_dentry_set(&es, false); } /* read a directory entry from the opened directory */ @@ -619,7 +618,6 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) if (IS_DYNAMIC_ES(es)) kfree(es->bh); - kfree(es); return err; } @@ -816,14 +814,14 @@ struct exfat_dentry *exfat_get_dentry_cached( * pointer of entry set on success, * NULL on failure. */ -struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, - struct exfat_chain *p_dir, int entry, unsigned int type) +int exfat_get_dentry_set(struct exfat_entry_set_cache *es, + struct super_block *sb, struct exfat_chain *p_dir, int entry, + unsigned int type) { int ret, i, num_bh; unsigned int off, byte_offset, clu = 0; sector_t sec; struct exfat_sb_info *sbi = EXFAT_SB(sb); - struct exfat_entry_set_cache *es; struct exfat_dentry *ep; int num_entries; enum exfat_validate_dentry_mode mode = ES_MODE_STARTED; @@ -831,17 +829,15 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, if (p_dir->dir == DIR_DELETED) { exfat_err(sb, "access to deleted dentry"); - return NULL; + return -EIO; } byte_offset = EXFAT_DEN_TO_B(entry); ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu); if (ret) - return NULL; + return ret; - es = kzalloc(sizeof(*es), GFP_KERNEL); - if (!es) - return NULL; + memset(es, 0, sizeof(*es)); es->sb = sb; es->modified = false; @@ -859,7 +855,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, bh = sb_bread(sb, sec); if (!bh) - goto free_es; + return -EIO; es->bh[es->num_bh++] = bh; ep = exfat_get_dentry_cached(es, 0); @@ -875,8 +871,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL); if (!es->bh) { brelse(bh); - kfree(es); - return NULL; + return -ENOMEM; } es->bh[0] = bh; } @@ -905,11 +900,11 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) goto free_es; } - return es; + return 0; free_es: exfat_free_dentry_set(es, false); - return NULL; + return -EIO; } static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 82395ae80dba..2ffe5792b1a9 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -490,8 +490,9 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb, struct exfat_chain *p_dir, int entry, struct buffer_head **bh); struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); -struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, - struct exfat_chain *p_dir, int entry, unsigned int type); +int exfat_get_dentry_set(struct exfat_entry_set_cache *es, + struct super_block *sb, struct exfat_chain *p_dir, int entry, + unsigned int type); int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 5590a1e83126..cdcf037a304f 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -21,7 +21,7 @@ int __exfat_write_inode(struct inode *inode, int sync) { unsigned long long on_disk_size; struct exfat_dentry *ep, *ep2; - struct exfat_entry_set_cache *es = NULL; + struct exfat_entry_set_cache es; struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); @@ -42,11 +42,10 @@ int __exfat_write_inode(struct inode *inode, int sync) exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ - es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(es, 0); - ep2 = exfat_get_dentry_cached(es, 1); + ep = exfat_get_dentry_cached(&es, 0); + ep2 = exfat_get_dentry_cached(&es, 1); ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); @@ -83,8 +82,8 @@ int __exfat_write_inode(struct inode *inode, int sync) ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER; } - exfat_update_dir_chksum_with_entry_set(es); - return exfat_free_dentry_set(es, sync); + exfat_update_dir_chksum_with_entry_set(&es); + return exfat_free_dentry_set(&es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 99e00a36c029..8d72527dfb78 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -604,7 +604,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(dir); struct exfat_dentry *ep, *ep2; - struct exfat_entry_set_cache *es; + struct exfat_entry_set_cache es; /* for optimized dir & entry to prevent long traverse of cluster chain */ struct exfat_hint hint_opt; @@ -644,11 +644,10 @@ static int exfat_find(struct inode *dir, struct qstr *qname, if (cdir.flags & ALLOC_NO_FAT_CHAIN) cdir.size -= dentry / sbi->dentries_per_clu; dentry = hint_opt.eidx; - es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES); - if (!es) + if (exfat_get_dentry_set(&es, sb, &cdir, dentry, ES_2_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(es, 0); - ep2 = exfat_get_dentry_cached(es, 1); + ep = exfat_get_dentry_cached(&es, 0); + ep2 = exfat_get_dentry_cached(&es, 1); info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); @@ -677,7 +676,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, ep->dentry.file.access_time, ep->dentry.file.access_date, 0); - exfat_free_dentry_set(es, false); + exfat_free_dentry_set(&es, false); if (ei->start_clu == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, From 3b9681acb0ef739343d8cfd35e054aab9597f1dc Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 17 Mar 2022 18:12:40 +0800 Subject: [PATCH 3672/4122] exfat: rename exfat_free_dentry_set() to exfat_put_dentry_set() Since struct exfat_entry_set_cache is allocated from stack, no need to free, so rename exfat_free_dentry_set() to exfat_put_dentry_set(). After renaming, the new function pair is exfat_get_dentry_set()/exfat_put_dentry_set(). Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 16 ++++++++-------- fs/exfat/exfat_fs.h | 2 +- fs/exfat/inode.c | 2 +- fs/exfat/namei.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index a3fb609dd129..a9a0b3e46af2 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -55,7 +55,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, uniname += EXFAT_FILE_NAME_LEN; } - exfat_free_dentry_set(&es, false); + exfat_put_dentry_set(&es, false); } /* read a directory entry from the opened directory */ @@ -602,7 +602,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) es->modified = true; } -int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync) { int i, err = 0; @@ -860,7 +860,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, ep = exfat_get_dentry_cached(es, 0); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) - goto free_es; + goto put_es; num_entries = type == ES_ALL_ENTRIES ? ep->dentry.file.num_ext + 1 : type; @@ -882,7 +882,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, if (p_dir->flags == ALLOC_NO_FAT_CHAIN) clu++; else if (exfat_get_next_cluster(sb, &clu)) - goto free_es; + goto put_es; sec = exfat_cluster_to_sector(sbi, clu); } else { sec++; @@ -890,7 +890,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, bh = sb_bread(sb, sec); if (!bh) - goto free_es; + goto put_es; es->bh[es->num_bh++] = bh; } @@ -898,12 +898,12 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, for (i = 1; i < num_entries; i++) { ep = exfat_get_dentry_cached(es, i); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) - goto free_es; + goto put_es; } return 0; -free_es: - exfat_free_dentry_set(es, false); +put_es: + exfat_put_dentry_set(es, false); return -EIO; } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 2ffe5792b1a9..324acc57d029 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -493,7 +493,7 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int exfat_get_dentry_set(struct exfat_entry_set_cache *es, struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned int type); -int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index cdcf037a304f..a84eae72556d 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -83,7 +83,7 @@ int __exfat_write_inode(struct inode *inode, int sync) } exfat_update_dir_chksum_with_entry_set(&es); - return exfat_free_dentry_set(&es, sync); + return exfat_put_dentry_set(&es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 8d72527dfb78..57510d7f58cf 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -676,7 +676,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, ep->dentry.file.access_time, ep->dentry.file.access_date, 0); - exfat_free_dentry_set(&es, false); + exfat_put_dentry_set(&es, false); if (ei->start_clu == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, From f3fe3954c09f97d8227d9d2edc807796a8b228ab Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 17 Mar 2022 19:39:20 +0800 Subject: [PATCH 3673/4122] exfat: replace magic numbers with Macros Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 12 ++++++------ fs/exfat/inode.c | 4 ++-- fs/exfat/namei.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index a9a0b3e46af2..c05493fc9124 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -44,7 +44,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, * Third entry : first file-name entry * So, the index of first file-name dentry should start from 2. */ - for (i = 2; i < es.num_entries; i++) { + for (i = ES_IDX_FIRST_FILENAME; i < es.num_entries; i++) { struct exfat_dentry *ep = exfat_get_dentry_cached(&es, i); /* end of name entry */ @@ -336,7 +336,7 @@ int exfat_calc_num_entries(struct exfat_uni_name *p_uniname) return -EINVAL; /* 1 file entry + 1 stream entry + name entries */ - return ((len - 1) / EXFAT_FILE_NAME_LEN + 3); + return ES_ENTRY_NUM(len); } unsigned int exfat_get_entry_type(struct exfat_dentry *ep) @@ -591,13 +591,13 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) unsigned short chksum = 0; struct exfat_dentry *ep; - for (i = 0; i < es->num_entries; i++) { + for (i = ES_IDX_FILE; i < es->num_entries; i++) { ep = exfat_get_dentry_cached(es, i); chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum, chksum_type); chksum_type = CS_DEFAULT; } - ep = exfat_get_dentry_cached(es, 0); + ep = exfat_get_dentry_cached(es, ES_IDX_FILE); ep->dentry.file.checksum = cpu_to_le16(chksum); es->modified = true; } @@ -858,7 +858,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, return -EIO; es->bh[es->num_bh++] = bh; - ep = exfat_get_dentry_cached(es, 0); + ep = exfat_get_dentry_cached(es, ES_IDX_FILE); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) goto put_es; @@ -895,7 +895,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, } /* validate cached dentries */ - for (i = 1; i < num_entries; i++) { + for (i = ES_IDX_STREAM; i < num_entries; i++) { ep = exfat_get_dentry_cached(es, i); if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) goto put_es; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index a84eae72556d..dac5001bae9e 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -44,8 +44,8 @@ int __exfat_write_inode(struct inode *inode, int sync) /* get the directory entry of given file or directory */ if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(&es, 0); - ep2 = exfat_get_dentry_cached(&es, 1); + ep = exfat_get_dentry_cached(&es, ES_IDX_FILE); + ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM); ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 57510d7f58cf..01e4e8c60bbe 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -646,8 +646,8 @@ static int exfat_find(struct inode *dir, struct qstr *qname, dentry = hint_opt.eidx; if (exfat_get_dentry_set(&es, sb, &cdir, dentry, ES_2_ENTRIES)) return -EIO; - ep = exfat_get_dentry_cached(&es, 0); - ep2 = exfat_get_dentry_cached(&es, 1); + ep = exfat_get_dentry_cached(&es, ES_IDX_FILE); + ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM); info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); From 088f1343d9108c16fca064951d85e6de9f5cab42 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 16 Aug 2022 16:55:06 +0800 Subject: [PATCH 3674/4122] exfat: remove call ilog2() from exfat_readdir() There is no need to call ilog2() for the conversions between cluster and dentry in exfat_readdir(), because these conversions can be replaced with EXFAT_DEN_TO_CLU()/EXFAT_CLU_TO_DEN(). Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 9 ++++----- fs/exfat/exfat_fs.h | 10 ++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index c05493fc9124..397ea2d98848 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -61,7 +61,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, /* read a directory entry from the opened directory */ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_entry *dir_entry) { - int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext; + int i, dentries_per_clu, num_ext; unsigned int type, clu_offset, max_dentries; struct exfat_chain dir, clu; struct exfat_uni_name uni_name; @@ -83,11 +83,10 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); dentries_per_clu = sbi->dentries_per_clu; - dentries_per_clu_bits = ilog2(dentries_per_clu); max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES, - (u64)sbi->num_clusters << dentries_per_clu_bits); + (u64)EXFAT_CLU_TO_DEN(sbi->num_clusters, sbi)); - clu_offset = dentry >> dentries_per_clu_bits; + clu_offset = EXFAT_DEN_TO_CLU(dentry, sbi); exfat_chain_dup(&clu, &dir); if (clu.flags == ALLOC_NO_FAT_CHAIN) { @@ -162,7 +161,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent dir_entry->entry = dentry; brelse(bh); - ei->hint_bmap.off = dentry >> dentries_per_clu_bits; + ei->hint_bmap.off = EXFAT_DEN_TO_CLU(dentry, sbi); ei->hint_bmap.clu = clu.dir; *cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 324acc57d029..37e8af8042aa 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -101,11 +101,17 @@ enum { /* * helpers for block size to dentry size conversion. */ -#define EXFAT_B_TO_DEN_IDX(b, sbi) \ - ((b) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) #define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS) #define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS) +/* + * helpers for cluster size to dentry size conversion. + */ +#define EXFAT_CLU_TO_DEN(clu, sbi) \ + ((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) +#define EXFAT_DEN_TO_CLU(dentry, sbi) \ + ((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) + /* * helpers for fat entry. */ From 015c0d4f6b1e65857de88279f07d7ecc5e305137 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 15 Aug 2022 10:15:16 +0800 Subject: [PATCH 3675/4122] exfat: remove unneeded codes from __exfat_rename() The code gets the dentry, but the dentry is not used, remove the code. Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 01e4e8c60bbe..347c8df45bd0 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1175,7 +1175,7 @@ static int __exfat_rename(struct inode *old_parent_inode, struct exfat_inode_info *new_ei = NULL; unsigned int new_entry_type = TYPE_UNUSED; int new_entry = 0; - struct buffer_head *old_bh, *new_bh = NULL; + struct buffer_head *new_bh = NULL; /* check the validity of pointer parameters */ if (new_path == NULL || strlen(new_path) == 0) @@ -1191,13 +1191,6 @@ static int __exfat_rename(struct inode *old_parent_inode, EXFAT_I(old_parent_inode)->flags); dentry = ei->entry; - ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh); - if (!ep) { - ret = -EIO; - goto out; - } - brelse(old_bh); - /* check whether new dir is existing directory and empty */ if (new_inode) { ret = -EIO; From 72880cb5f157514d797d5f6ab3184bbde671a18a Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Sun, 10 Apr 2022 16:12:14 +0800 Subject: [PATCH 3676/4122] exfat: remove unnecessary arguments from exfat_find_dir_entry() This commit removes argument 'num_entries' and 'type' from exfat_find_dir_entry(). Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 12 +++++++----- fs/exfat/exfat_fs.h | 3 +-- fs/exfat/namei.c | 10 ++-------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 397ea2d98848..8121a7e073bc 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -956,7 +956,7 @@ enum { */ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type, struct exfat_hint *hint_opt) + struct exfat_hint *hint_opt) { int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; int order, step, name_len = 0; @@ -967,6 +967,10 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_hint *hint_stat = &ei->hint_stat; struct exfat_hint_femp candi_empty; struct exfat_sb_info *sbi = EXFAT_SB(sb); + int num_entries = exfat_calc_num_entries(p_uniname); + + if (num_entries < 0) + return num_entries; dentries_per_clu = sbi->dentries_per_clu; @@ -1020,10 +1024,8 @@ rewind: step = DIRENT_STEP_FILE; hint_opt->clu = clu.dir; hint_opt->eidx = i; - if (type == TYPE_ALL || type == entry_type) { - num_ext = ep->dentry.file.num_ext; - step = DIRENT_STEP_STRM; - } + num_ext = ep->dentry.file.num_ext; + step = DIRENT_STEP_STRM; brelse(bh); continue; } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 37e8af8042aa..21fec01d68ff 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -71,7 +71,6 @@ enum { #define TYPE_PADDING 0x0402 #define TYPE_ACLTAB 0x0403 #define TYPE_BENIGN_SEC 0x0800 -#define TYPE_ALL 0x0FFF #define MAX_CHARSET_SIZE 6 /* max size of multi-byte character */ #define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ @@ -490,7 +489,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es); int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type, struct exfat_hint *hint_opt); + struct exfat_hint *hint_opt); int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); struct exfat_dentry *exfat_get_dentry(struct super_block *sb, struct exfat_chain *p_dir, int entry, struct buffer_head **bh); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 347c8df45bd0..5f995eba5dbb 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -597,7 +597,7 @@ unlock: static int exfat_find(struct inode *dir, struct qstr *qname, struct exfat_dir_entry *info) { - int ret, dentry, num_entries, count; + int ret, dentry, count; struct exfat_chain cdir; struct exfat_uni_name uni_name; struct super_block *sb = dir->i_sb; @@ -616,10 +616,6 @@ static int exfat_find(struct inode *dir, struct qstr *qname, if (ret) return ret; - num_entries = exfat_calc_num_entries(&uni_name); - if (num_entries < 0) - return num_entries; - /* check the validation of hint_stat and initialize it if required */ if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) { ei->hint_stat.clu = cdir.dir; @@ -629,9 +625,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, } /* search the file name for directories */ - dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, - num_entries, TYPE_ALL, &hint_opt); - + dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, &hint_opt); if (dentry < 0) return dentry; /* -error value */ From e981917b3fae689e9372647a38746444205bb905 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 17 Nov 2022 10:36:21 +0800 Subject: [PATCH 3677/4122] exfat: remove argument 'size' from exfat_truncate() argument 'size' is not used in exfat_truncate(), remove it. Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 2 +- fs/exfat/file.c | 4 ++-- fs/exfat/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 21fec01d68ff..ae048802f9db 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -449,7 +449,7 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); /* file.c */ extern const struct file_operations exfat_file_operations; int __exfat_truncate(struct inode *inode, loff_t new_size); -void exfat_truncate(struct inode *inode, loff_t size); +void exfat_truncate(struct inode *inode); int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr); int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path, diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 4e0793f35e8f..7c97c1df1305 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -189,7 +189,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) return 0; } -void exfat_truncate(struct inode *inode, loff_t size) +void exfat_truncate(struct inode *inode) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -310,7 +310,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * __exfat_write_inode() is called from exfat_truncate(), inode * is already written by it, so mark_inode_dirty() is unneeded. */ - exfat_truncate(inode, attr->ia_size); + exfat_truncate(inode); up_write(&EXFAT_I(inode)->truncate_lock); } else mark_inode_dirty(inode); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index dac5001bae9e..0d147f8a1f7c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -362,7 +362,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to) if (to > i_size_read(inode)) { truncate_pagecache(inode, i_size_read(inode)); inode->i_mtime = inode->i_ctime = current_time(inode); - exfat_truncate(inode, EXFAT_I(inode)->i_size_aligned); + exfat_truncate(inode); } } From f7cde96710a4362dca199458d3de04f631178453 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 28 Mar 2022 16:37:58 +0800 Subject: [PATCH 3678/4122] exfat: remove i_size_write() from __exfat_truncate() The file/directory size is updated into inode by i_size_write() before __exfat_truncate() is called, so it is redundant to re-update by i_size_write() in __exfat_truncate(). Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 2 +- fs/exfat/file.c | 8 +++----- fs/exfat/inode.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index ae048802f9db..a1e7feb22079 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -448,7 +448,7 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); /* file.c */ extern const struct file_operations exfat_file_operations; -int __exfat_truncate(struct inode *inode, loff_t new_size); +int __exfat_truncate(struct inode *inode); void exfat_truncate(struct inode *inode); int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 7c97c1df1305..f5b29072775d 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -93,7 +93,7 @@ static int exfat_sanitize_mode(const struct exfat_sb_info *sbi, } /* resize the file length */ -int __exfat_truncate(struct inode *inode, loff_t new_size) +int __exfat_truncate(struct inode *inode) { unsigned int num_clusters_new, num_clusters_phys; unsigned int last_clu = EXFAT_FREE_CLUSTER; @@ -113,7 +113,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); - if (new_size > 0) { + if (i_size_read(inode) > 0) { /* * Truncate FAT chain num_clusters after the first cluster * num_clusters = min(new, phys); @@ -143,8 +143,6 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) ei->start_clu = EXFAT_EOF_CLUSTER; } - i_size_write(inode, new_size); - if (ei->type == TYPE_FILE) ei->attr |= ATTR_ARCHIVE; @@ -207,7 +205,7 @@ void exfat_truncate(struct inode *inode) goto write_size; } - err = __exfat_truncate(inode, i_size_read(inode)); + err = __exfat_truncate(inode); if (err) goto write_size; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 0d147f8a1f7c..95adc4b2e436 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -626,7 +626,7 @@ void exfat_evict_inode(struct inode *inode) if (!inode->i_nlink) { i_size_write(inode, 0); mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); - __exfat_truncate(inode, 0); + __exfat_truncate(inode); mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); } From 85463321e726fe59873bbc21f2f480747810aef8 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Tue, 18 Oct 2022 19:12:22 -0400 Subject: [PATCH 3679/4122] selftests/vm: enable running select groups of tests Our memory management kernel CI testing at Red Hat uses the VM selftests and we have run into two problems: First, our LTP tests overlap with the VM selftests. We want to avoid unhelpful redundancy in our testing practices. Second, we have observed the current run_vmtests.sh to report overall failure/ambiguous results in the case that a machine lacks the necessary hardware to perform one or more of the tests. E.g. ksm tests that require more than one numa node. We want to be able to run the vm selftests suitable to particular hardware. Add the ability to run one or more groups of vm tests via run_vmtests.sh instead of simply all-or-none in order to solve these problems. Preserve existing default behavior of running all tests when the script is invoked with no arguments. Documentation of test groups is included in the patch as follows: # ./run_vmtests.sh [ -h || --help ] usage: ./tools/testing/selftests/vm/run_vmtests.sh [ -h | -t ""] -t: specify specific categories to tests to run -h: display this message The default behavior is to run all tests. Alternatively, specific groups tests can be run by passing a string to the -t argument containing one or more of the following categories separated by spaces: - mmap tests for mmap(2) - gup_test tests for gup using gup_test interface - userfaultfd tests for userfaultfd(2) - compaction a test for the patch "Allow compaction of unevictable pages" - mlock tests for mlock(2) - mremap tests for mremap(2) - hugevm tests for very large virtual address space - vmalloc vmalloc smoke tests - hmm hmm smoke tests - madv_populate test memadvise(2) MADV_POPULATE_{READ,WRITE} options - memfd_secret test memfd_secret(2) - process_mrelease test process_mrelease(2) - ksm ksm tests that do not require >=2 NUMA nodes - ksm_numa ksm tests that require >=2 NUMA nodes - pkey memory protection key tests - soft_dirty test soft dirty page bit semantics - anon_cow test anonymous copy-on-write semantics example: ./run_vmtests.sh -t "hmm mmap ksm" Link: https://lkml.kernel.org/r/20221018231222.1884715-1-jsavitz@redhat.com Signed-off-by: Joel Savitz Cc: Joel Savitz Cc: Nico Pache Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 205 +++++++++++++++------- 1 file changed, 143 insertions(+), 62 deletions(-) diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 54d7a822c2ce..e26661feacf5 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -1,13 +1,88 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -#please run as root +# Please run as root # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 exitcode=0 -#get huge pagesize and freepages from /proc/meminfo +usage() { + cat <"] + -t: specify specific categories to tests to run + -h: display this message + +The default behavior is to run all tests. + +Alternatively, specific groups tests can be run by passing a string +to the -t argument containing one or more of the following categories +separated by spaces: +- mmap + tests for mmap(2) +- gup_test + tests for gup using gup_test interface +- userfaultfd + tests for userfaultfd(2) +- compaction + a test for the patch "Allow compaction of unevictable pages" +- mlock + tests for mlock(2) +- mremap + tests for mremap(2) +- hugevm + tests for very large virtual address space +- vmalloc + vmalloc smoke tests +- hmm + hmm smoke tests +- madv_populate + test memadvise(2) MADV_POPULATE_{READ,WRITE} options +- memfd_secret + test memfd_secret(2) +- process_mrelease + test process_mrelease(2) +- ksm + ksm tests that do not require >=2 NUMA nodes +- ksm_numa + ksm tests that require >=2 NUMA nodes +- pkey + memory protection key tests +- soft_dirty + test soft dirty page bit semantics +- cow + test copy-on-write semantics +example: ./run_vmtests.sh -t "hmm mmap ksm" +EOF + exit 0 +} + + +while getopts "ht:" OPT; do + case ${OPT} in + "h") usage ;; + "t") VM_SELFTEST_ITEMS=${OPTARG} ;; + esac +done +shift $((OPTIND -1)) + +# default behavior: run all tests +VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default} + +test_selected() { + if [ "$VM_SELFTEST_ITEMS" == "default" ]; then + # If no VM_SELFTEST_ITEMS are specified, run all tests + return 0 + fi + # If test selected argument is one of the test items + if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then + return 0 + else + return 1 + fi +} + +# get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs="$size" @@ -27,7 +102,7 @@ hpgsize_MB=$((hpgsize_KB / 1024)) half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) needmem_KB=$((half_ufd_size_MB * 2 * 1024)) -#set proper nr_hugepages +# set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) @@ -56,136 +131,142 @@ else exit 1 fi -#filter 64bit architectures +# filter 64bit architectures ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi VADDR64=0 -echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1 +echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1 # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { - local title="running $*" - local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) - printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + if test_selected ${CATEGORY}; then + local title="running $*" + local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" - "$@" - local ret=$? - if [ $ret -eq 0 ]; then - echo "[PASS]" - elif [ $ret -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip - else - echo "[FAIL]" - exitcode=1 - fi + "$@" + local ret=$? + if [ $ret -eq 0 ]; then + echo "[PASS]" + elif [ $ret -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip + else + echo "[FAIL]" + exitcode=1 + fi + fi # test_selected } -run_test ./hugepage-mmap +CATEGORY="hugetlb" run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) shmall=$(cat /proc/sys/kernel/shmall) echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall -run_test ./hugepage-shm +CATEGORY="hugetlb" run_test ./hugepage-shm echo "$shmmax" > /proc/sys/kernel/shmmax echo "$shmall" > /proc/sys/kernel/shmall -run_test ./map_hugetlb -run_test ./hugepage-mremap -run_test ./hugepage-vmemmap -run_test ./hugetlb-madvise +CATEGORY="hugetlb" run_test ./map_hugetlb +CATEGORY="hugetlb" run_test ./hugepage-mremap +CATEGORY="hugetlb" run_test ./hugepage-vmemmap +CATEGORY="hugetlb" run_test ./hugetlb-madvise -echo "NOTE: The above hugetlb tests provide minimal coverage. Use" -echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" -echo " hugetlb regression testing." +if test_selected "hugetlb"; then + echo "NOTE: These hugetlb tests provide minimal coverage. Use" + echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" + echo " hugetlb regression testing." +fi -run_test ./map_fixed_noreplace +CATEGORY="mmap" run_test ./map_fixed_noreplace # get_user_pages_fast() benchmark -run_test ./gup_test -u +CATEGORY="gup_test" run_test ./gup_test -u # pin_user_pages_fast() benchmark -run_test ./gup_test -a +CATEGORY="gup_test" run_test ./gup_test -a # Dump pages 0, 19, and 4096, using pin_user_pages: -run_test ./gup_test -ct -F 0x1 0 19 0x1000 +CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 uffd_mods=("" ":dev") for mod in "${uffd_mods[@]}"; do - run_test ./userfaultfd anon${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd anon${mod} 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size ($half_ufd_size_MB), which is used for *each*. - run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 - run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 - run_test ./userfaultfd shmem${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd shmem${mod} 20 16 done #cleanup echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages -run_test ./compaction_test +CATEGORY="compaction" run_test ./compaction_test -run_test sudo -u nobody ./on-fault-limit +CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit -run_test ./map_populate +CATEGORY="mmap" run_test ./map_populate -run_test ./mlock-random-test +CATEGORY="mlock" run_test ./mlock-random-test -run_test ./mlock2-tests +CATEGORY="mlock" run_test ./mlock2-tests -run_test ./mrelease_test +CATEGORY="process_mrelease" run_test ./mrelease_test -run_test ./mremap_test +CATEGORY="mremap" run_test ./mremap_test -run_test ./thuge-gen +CATEGORY="hugetlb" run_test ./thuge-gen if [ $VADDR64 -ne 0 ]; then - run_test ./virtual_address_range + CATEGORY="hugevm" run_test ./virtual_address_range # virtual address 128TB switch test - run_test ./va_128TBswitch.sh + CATEGORY="hugevm" run_test ./va_128TBswitch.sh fi # VADDR64 # vmalloc stability smoke test -run_test ./test_vmalloc.sh smoke +CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke -run_test ./mremap_dontunmap +CATEGORY="mremap" run_test ./mremap_dontunmap -run_test ./test_hmm.sh smoke +CATEGORY="hmm" run_test ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests -run_test ./madv_populate +CATEGORY="madv_populate" run_test ./madv_populate -run_test ./memfd_secret +CATEGORY="memfd_secret" run_test ./memfd_secret # KSM MADV_MERGEABLE test with 10 identical pages -run_test ./ksm_tests -M -p 10 +CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test -run_test ./ksm_tests -U +CATEGORY="ksm" run_test ./ksm_tests -U # KSM test with 10 zero pages and use_zero_pages = 0 -run_test ./ksm_tests -Z -p 10 -z 0 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0 # KSM test with 10 zero pages and use_zero_pages = 1 -run_test ./ksm_tests -Z -p 10 -z 1 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 1 -run_test ./ksm_tests -N -m 1 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 0 -run_test ./ksm_tests -N -m 0 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 + +CATEGORY="ksm" run_test ./ksm_functional_tests # protection_keys tests if [ -x ./protection_keys_32 ] then - run_test ./protection_keys_32 + CATEGORY="pkey" run_test ./protection_keys_32 fi if [ -x ./protection_keys_64 ] then - run_test ./protection_keys_64 + CATEGORY="pkey" run_test ./protection_keys_64 fi -run_test ./soft-dirty +CATEGORY="soft_dirty" run_test ./soft-dirty -# COW tests for anonymous memory -run_test ./cow +# COW tests +CATEGORY="cow" run_test ./cow exit $exitcode From 93fb70aa5904c2577fab8100fa990ecfa4f5b4c7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:36 +0200 Subject: [PATCH 3680/4122] selftests/vm: add KSM unmerge tests Patch series "mm/ksm: break_ksm() cleanups and fixes", v2. This series cleans up and fixes break_ksm(). In summary, we no longer use fake write faults to break COW but instead FAULT_FLAG_UNSHARE. Further, we move away from using follow_page() --- that we can hopefully remove completely at one point --- and use new walk_page_range_vma() instead. Fortunately, we can get rid of VM_FAULT_WRITE and FOLL_MIGRATION in common code now. Extend the existing ksm tests by an unmerge benchmark, and a some new unmerge tests. Also, add a selftest to measure MADV_UNMERGEABLE performance. In my setup (AMD Ryzen 9 3900X), running the KSM selftest to test unmerge performance on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in a performance degradation of ~6% -- 7% (old: ~5250 MiB/s, new: ~4900 MiB/s). I don't think we particularly care for now, but it's good to be aware of the implication. This patch (of 9): Let's add three unmerge tests (MADV_UNMERGEABLE unmerging all pages in the range). test_unmerge(): basic unmerge tests test_unmerge_discarded(): have some pte_none() entries in the range test_unmerge_uffd_wp(): protect the merged pages using uffd-wp ksm_tests.c currently contains a mixture of benchmarks and tests, whereby each test is carried out by executing the ksm_tests binary with specific parameters. Let's add new ksm_functional_tests.c that performs multiple, smaller functional tests all at once. Link: https://lkml.kernel.org/r/20221021101141.84170-1-david@redhat.com Link: https://lkml.kernel.org/r/20221021101141.84170-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 2 + .../selftests/vm/ksm_functional_tests.c | 279 ++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 2 + tools/testing/selftests/vm/vm_util.c | 10 + tools/testing/selftests/vm/vm_util.h | 1 + 5 files changed, 294 insertions(+) create mode 100644 tools/testing/selftests/vm/ksm_functional_tests.c diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a4d764efd6e3..89c14e41bd43 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -55,6 +55,7 @@ TEST_GEN_FILES += userfaultfd TEST_GEN_PROGS += soft-dirty TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests +TEST_GEN_PROGS += ksm_functional_tests ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -100,6 +101,7 @@ include ../lib.mk $(OUTPUT)/cow: vm_util.c $(OUTPUT)/khugepaged: vm_util.c +$(OUTPUT)/ksm_functional_tests: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c new file mode 100644 index 000000000000..96644be68962 --- /dev/null +++ b/tools/testing/selftests/vm/ksm_functional_tests.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KSM functional tests + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" +#include "vm_util.h" + +#define KiB 1024u +#define MiB (1024 * KiB) + +static int ksm_fd; +static int ksm_full_scans_fd; +static int pagemap_fd; +static size_t pagesize; + +static bool range_maps_duplicates(char *addr, unsigned long size) +{ + unsigned long offs_a, offs_b, pfn_a, pfn_b; + + /* + * There is no easy way to check if there are KSM pages mapped into + * this range. We only check that the range does not map the same PFN + * twice by comaring each pair of mapped pages. + */ + for (offs_a = 0; offs_a < size; offs_a += pagesize) { + pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a); + /* Page not present or PFN not exposed by the kernel. */ + if (pfn_a == -1ull || !pfn_a) + continue; + + for (offs_b = offs_a + pagesize; offs_b < size; + offs_b += pagesize) { + pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b); + if (pfn_b == -1ull || !pfn_b) + continue; + if (pfn_a == pfn_b) + return true; + } + } + return false; +} + +static long ksm_get_full_scans(void) +{ + char buf[10]; + ssize_t ret; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +static int ksm_merge(void) +{ + long start_scans, end_scans; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) + return start_scans; + if (write(ksm_fd, "1", 1) != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size) +{ + char *map; + + map = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (map == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return MAP_FAILED; + } + + /* Don't use THP. Ignore if THP are not around on a kernel. */ + if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto unmap; + } + + /* Make sure each page contains the same values to merge them. */ + memset(map, val, size); + if (madvise(map, size, MADV_MERGEABLE)) { + ksft_test_result_fail("MADV_MERGEABLE failed\n"); + goto unmap; + } + + /* Run KSM to trigger merging and wait. */ + if (ksm_merge()) { + ksft_test_result_fail("Running KSM failed\n"); + goto unmap; + } + return map; +unmap: + munmap(map, size); + return MAP_FAILED; +} + +static void test_unmerge(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +static void test_unmerge_discarded(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* Discard half of all mapped pages so we have pte_none() entries. */ + if (madvise(map, size / 2, MADV_DONTNEED)) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto unmap; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +#ifdef __NR_userfaultfd +static void test_unmerge_uffd_wp(void) +{ + struct uffdio_writeprotect uffd_writeprotect; + struct uffdio_register uffdio_register; + const unsigned int size = 2 * MiB; + struct uffdio_api uffdio_api; + char *map; + int uffd; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* See if UFFD is around. */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + ksft_test_result_skip("__NR_userfaultfd failed\n"); + goto unmap; + } + + /* See if UFFD-WP is around. */ + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { + ksft_test_result_fail("UFFDIO_API failed\n"); + goto close_uffd; + } + if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { + ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n"); + goto close_uffd; + } + + /* Register UFFD-WP, no need for an actual handler. */ + uffdio_register.range.start = (unsigned long) map; + uffdio_register.range.len = size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { + ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n"); + goto close_uffd; + } + + /* Write-protect the range using UFFD-WP. */ + uffd_writeprotect.range.start = (unsigned long) map; + uffd_writeprotect.range.len = size; + uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { + ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n"); + goto close_uffd; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto close_uffd; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +close_uffd: + close(uffd); +unmap: + munmap(map, size); +} +#endif + +int main(int argc, char **argv) +{ + unsigned int tests = 2; + int err; + +#ifdef __NR_userfaultfd + tests++; +#endif + + ksft_print_header(); + ksft_set_plan(tests); + + pagesize = getpagesize(); + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + + test_unmerge(); + test_unmerge_discarded(); +#ifdef __NR_userfaultfd + test_unmerge_uffd_wp(); +#endif + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e26661feacf5..8984e0bb58c7 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -253,6 +253,8 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests +run_test ./ksm_functional_tests + # protection_keys tests if [ -x ./protection_keys_32 ] then diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index 5bbf7641a0f0..710571902743 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -43,6 +43,16 @@ bool pagemap_is_populated(int fd, char *start) return entry & 0xc000000000000000ull; } +unsigned long pagemap_get_pfn(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* If present (63th bit), PFN is at bit 0 -- 54. */ + if (entry & 0x8000000000000000ull) + return entry & 0x007fffffffffffffull; + return -1ull; +} + void clear_softdirty(void) { int ret; diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 80d5a6ad413b..1995ee911ef2 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -6,6 +6,7 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); bool pagemap_is_populated(int fd, char *start); +unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); From c31783eeae7b22dc3f6edde7339de6112959225d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:38 +0200 Subject: [PATCH 3681/4122] mm/pagewalk: don't trigger test_walk() in walk_page_vma() As Peter points out, the caller passes a single VMA and can just do that check itself. And in fact, no existing users rely on test_walk() getting called. So let's just remove it and make the implementation slightly more efficient. Link: https://lkml.kernel.org/r/20221021101141.84170-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 2 ++ mm/pagewalk.c | 7 ------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index f3fafb731ffd..37dc0208862d 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -27,6 +27,8 @@ struct mm_walk; * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * Note that this callback is not called when the caller + * passes in a single VMA as for walk_page_vma(). * @pre_vma: if set, called before starting walk on a non-null vma. * @post_vma: if set, called after a walk on a non-null vma, provided * that @pre_vma and the vma walk succeeded. diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 2ff3a5bebceb..0a5d71aaf9c7 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -526,18 +526,11 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, .vma = vma, .private = private, }; - int err; if (!walk.mm) return -EINVAL; mmap_assert_locked(walk.mm); - - err = walk_page_test(vma->vm_start, vma->vm_end, &walk); - if (err > 0) - return 0; - if (err < 0) - return err; return __walk_page_range(vma->vm_start, vma->vm_end, &walk); } From 5036880efdad976165c817dcb6a1c8c24fb16caa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:33 +0200 Subject: [PATCH 3682/4122] selftests/vm: add test to measure MADV_UNMERGEABLE performance Let's add a test to measure performance of KSM breaking not triggered via COW, but triggered by disabling KSM on an area filled with KSM pages via MADV_UNMERGEABLE. Link: https://lkml.kernel.org/r/20221021101141.84170-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Peter Xu Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/ksm_tests.c | 76 +++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa..f9eb4d67e0dd 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -40,6 +40,7 @@ enum ksm_test_name { CHECK_KSM_NUMA_MERGE, KSM_MERGE_TIME, KSM_MERGE_TIME_HUGE_PAGES, + KSM_UNMERGE_TIME, KSM_COW_TIME }; @@ -108,7 +109,10 @@ static void print_help(void) " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" - " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n" + " -D evaluate unmerging time and speed when disabling KSM.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" " -C evaluate the time required to break COW of merged pages.\n\n"); @@ -188,6 +192,16 @@ static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, return 0; } +static int ksm_unmerge_pages(void *addr, size_t size, + struct timespec start_time, int timeout) +{ + if (madvise(addr, size, MADV_UNMERGEABLE)) { + perror("madvise"); + return 1; + } + return 0; +} + static bool assert_ksm_pages_count(long dupl_page_count) { unsigned long max_page_sharing, pages_sharing, pages_shared; @@ -560,6 +574,53 @@ err_out: return KSFT_FAIL; } +static int ksm_unmerge_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + + map_size *= MB; + + map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size); + if (!map_ptr) + return KSFT_FAIL; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr, map_size); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, map_size); + return KSFT_FAIL; +} + static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size) { void *map_ptr; @@ -644,7 +705,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCHD")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -701,6 +762,9 @@ int main(int argc, char *argv[]) case 'H': test_name = KSM_MERGE_TIME_HUGE_PAGES; break; + case 'D': + test_name = KSM_UNMERGE_TIME; + break; case 'C': test_name = KSM_COW_TIME; break; @@ -762,6 +826,14 @@ int main(int argc, char *argv[]) ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_UNMERGE_TIME: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_unmerge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, + ksm_scan_limit_sec, size_MB); + break; case KSM_COW_TIME: ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); From 58f595c6659198e1ad0ed431a408ddd79b21e579 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:34 +0200 Subject: [PATCH 3683/4122] mm/ksm: simplify break_ksm() to not rely on VM_FAULT_WRITE Now that GUP no longer requires VM_FAULT_WRITE, break_ksm() is the sole remaining user of VM_FAULT_WRITE. As we also want to stop triggering a fake write fault and instead use FAULT_FLAG_UNSHARE -- similar to GUP-triggered unsharing when taking a R/O pin on a shared anonymous page (including KSM pages), let's stop relying on VM_FAULT_WRITE. Let's rework break_ksm() to not rely on the return value of handle_mm_fault() anymore to figure out whether COW-breaking was successful. Simply perform another follow_page() lookup to verify the result. While this makes break_ksm() slightly less efficient, we can simplify handle_mm_fault() a little and easily switch to FAULT_FLAG_UNSHARE without introducing similar KSM-specific behavior for FAULT_FLAG_UNSHARE. In my setup (AMD Ryzen 9 3900X), running the KSM selftest to test unmerge performance on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in a performance degradation of ~4% -- 5% (old: ~5250 MiB/s, new: ~5010 MiB/s). I don't think that we particularly care about that performance drop when unmerging. If it ever turns out to be an actual performance issue, we can think about a better alternative for FAULT_FLAG_UNSHARE -- let's just keep it simple for now. Link: https://lkml.kernel.org/r/20221021101141.84170-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Peter Xu Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/ksm.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index a71245241d22..4efdc424a3fc 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -440,26 +440,27 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) vm_fault_t ret = 0; do { + bool ksm_page = false; + cond_resched(); page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) - ret = handle_mm_fault(vma, addr, - FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, - NULL); - else - ret = VM_FAULT_WRITE; + ksm_page = true; put_page(page); - } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); + + if (!ksm_page) + return 0; + ret = handle_mm_fault(vma, addr, + FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, + NULL); + } while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); /* - * We must loop because handle_mm_fault() may back out if there's - * any difficulty e.g. if pte accessed bit gets updated concurrently. - * - * VM_FAULT_WRITE is what we have been hoping for: it indicates that - * COW has been broken, even if the vma does not permit VM_WRITE; - * but note that a concurrent fault might break PageKsm for us. + * We must loop until we no longer find a KSM page because + * handle_mm_fault() may back out if there's any difficulty e.g. if + * pte accessed bit gets updated concurrently. * * VM_FAULT_SIGBUS could occur if we race with truncation of the * backing file, which also invalidates anonymous pages: that's From cb8d863313436339fb60f7dd5131af2e5854621e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:35 +0200 Subject: [PATCH 3684/4122] mm: remove VM_FAULT_WRITE All users -- GUP and KSM -- are gone, let's just remove it. Link: https://lkml.kernel.org/r/20221021101141.84170-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Peter Xu Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 3 --- mm/huge_memory.c | 2 +- mm/memory.c | 9 ++++----- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 018b1c098173..199f98be6f9c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -929,7 +929,6 @@ typedef __bitwise unsigned int vm_fault_t; * @VM_FAULT_OOM: Out Of Memory * @VM_FAULT_SIGBUS: Bad access * @VM_FAULT_MAJOR: Page read from storage - * @VM_FAULT_WRITE: Special case for get_user_pages * @VM_FAULT_HWPOISON: Hit poisoned small page * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded * in upper bits @@ -950,7 +949,6 @@ enum vm_fault_reason { VM_FAULT_OOM = (__force vm_fault_t)0x000001, VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, - VM_FAULT_WRITE = (__force vm_fault_t)0x000008, VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, @@ -976,7 +974,6 @@ enum vm_fault_reason { { VM_FAULT_OOM, "OOM" }, \ { VM_FAULT_SIGBUS, "SIGBUS" }, \ { VM_FAULT_MAJOR, "MAJOR" }, \ - { VM_FAULT_WRITE, "WRITE" }, \ { VM_FAULT_HWPOISON, "HWPOISON" }, \ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8f10afba17a6..1d9ad909c87c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1376,7 +1376,7 @@ reuse: if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - return VM_FAULT_WRITE; + return 0; } unlock_fallback: diff --git a/mm/memory.c b/mm/memory.c index 815d2ff05c62..aad226daf41b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3213,7 +3213,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } delayacct_wpcopy_end(); - return (page_copied && !unshare) ? VM_FAULT_WRITE : 0; + return 0; oom_free_new: put_page(new_page); oom: @@ -3277,14 +3277,14 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) return finish_mkwrite_fault(vmf); } wp_page_reuse(vmf); - return VM_FAULT_WRITE; + return 0; } static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; - vm_fault_t ret = VM_FAULT_WRITE; + vm_fault_t ret = 0; get_page(vmf->page); @@ -3430,7 +3430,7 @@ reuse: return 0; } wp_page_reuse(vmf); - return VM_FAULT_WRITE; + return 0; } copy: /* @@ -3944,7 +3944,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (vmf->flags & FAULT_FLAG_WRITE) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; - ret |= VM_FAULT_WRITE; } rmap_flags |= RMAP_EXCLUSIVE; } From 6cce3314b928b2db7d5f48171e18314226551c3f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:37 +0200 Subject: [PATCH 3685/4122] mm/ksm: fix KSM COW breaking with userfaultfd-wp via FAULT_FLAG_UNSHARE Let's stop breaking COW via a fake write fault and let's use FAULT_FLAG_UNSHARE instead. This avoids any wrong side effects of the fake write fault, such as mapping the PTE writable and marking the pte dirty/softdirty. Consequently, we will no longer trigger a fake write fault and break COW without any such side-effects. Also, this fixes KSM interaction with userfaultfd-wp: when we have a KSM page that's write-protected by userfaultfd, break_ksm()->handle_mm_fault() will fail with VM_FAULT_SIGBUS and will simply return in break_ksm() with 0 instead of actually breaking COW. For now, the KSM unmerge tests can trigger that: $ sudo ./ksm_functional_tests TAP version 13 1..3 # [RUN] test_unmerge ok 1 Pages were unmerged # [RUN] test_unmerge_discarded ok 2 Pages were unmerged # [RUN] test_unmerge_uffd_wp not ok 3 Pages were unmerged Bail out! 1 out of 3 tests failed # Planned tests != run tests (2 != 3) # Totals: pass:2 fail:1 xfail:0 xpass:0 skip:0 error:0 The warning in dmesg also indicates this wrong handling: [ 230.096368] FAULT_FLAG_ALLOW_RETRY missing 881 [ 230.100822] CPU: 1 PID: 1643 Comm: ksm-uffd-wp [...] [ 230.110124] Hardware name: [...] [ 230.117775] Call Trace: [ 230.120227] [ 230.122334] dump_stack_lvl+0x44/0x5c [ 230.126010] handle_userfault.cold+0x14/0x19 [ 230.130281] ? tlb_finish_mmu+0x65/0x170 [ 230.134207] ? uffd_wp_range+0x65/0xa0 [ 230.137959] ? _raw_spin_unlock+0x15/0x30 [ 230.141972] ? do_wp_page+0x50/0x590 [ 230.145551] __handle_mm_fault+0x9f5/0xf50 [ 230.149652] ? mmput+0x1f/0x40 [ 230.152712] handle_mm_fault+0xb9/0x2a0 [ 230.156550] break_ksm+0x141/0x180 [ 230.159964] unmerge_ksm_pages+0x60/0x90 [ 230.163890] ksm_madvise+0x3c/0xb0 [ 230.167295] do_madvise.part.0+0x10c/0xeb0 [ 230.171396] ? do_syscall_64+0x67/0x80 [ 230.175157] __x64_sys_madvise+0x5a/0x70 [ 230.179082] do_syscall_64+0x58/0x80 [ 230.182661] ? do_syscall_64+0x67/0x80 [ 230.186413] entry_SYSCALL_64_after_hwframe+0x63/0xcd This is primarily a fix for KSM+userfaultfd-wp, however, the fake write fault was always questionable. As this fix is not easy to backport and it's not very critical, let's not cc stable. Link: https://lkml.kernel.org/r/20221021101141.84170-6-david@redhat.com Fixes: 529b930b87d9 ("userfaultfd: wp: hook userfault handler to write protection fault") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/ksm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 4efdc424a3fc..0805221e1d4c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -420,17 +420,15 @@ static inline bool ksm_test_exit(struct mm_struct *mm) } /* - * We use break_ksm to break COW on a ksm page: it's a stripped down + * We use break_ksm to break COW on a ksm page by triggering unsharing, + * such that the ksm page will get replaced by an exclusive anonymous page. * - * if (get_user_pages(addr, 1, FOLL_WRITE, &page, NULL) == 1) - * put_page(page); - * - * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma, + * We take great care only to touch a ksm page, in a VM_MERGEABLE vma, * in case the application has unmapped and remapped mm,addr meanwhile. * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP * mmap of /dev/mem, where we would not want to touch it. * - * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context + * FAULT_FLAG_REMOTE/FOLL_REMOTE are because we do this outside the context * of the process that owns 'vma'. We also do not want to enforce * protection keys here anyway. */ @@ -454,7 +452,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) if (!ksm_page) return 0; ret = handle_mm_fault(vma, addr, - FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, NULL); } while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); /* From e07cda5f232fac4de0925d8a4c92e51e41fa2f6e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:39 +0200 Subject: [PATCH 3686/4122] mm/pagewalk: add walk_page_range_vma() Let's add walk_page_range_vma(), which is similar to walk_page_vma(), however, is only interested in a subset of the VMA range. To be used in KSM code to stop using follow_page() next. Link: https://lkml.kernel.org/r/20221021101141.84170-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 3 +++ mm/pagewalk.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 37dc0208862d..959f52e5867d 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -101,6 +101,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private); +int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + void *private); int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private); int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 0a5d71aaf9c7..7f1c9b274906 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -517,6 +517,26 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, return walk_pgd_range(start, end, &walk); } +int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + void *private) +{ + struct mm_walk walk = { + .ops = ops, + .mm = vma->vm_mm, + .vma = vma, + .private = private, + }; + + if (start >= end || !walk.mm) + return -EINVAL; + if (start < vma->vm_start || end > vma->vm_end) + return -EINVAL; + + mmap_assert_locked(walk.mm); + return __walk_page_range(start, end, &walk); +} + int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private) { From d7c0e68dab98f0f5a2af501eaefeb90cc855fc80 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:40 +0200 Subject: [PATCH 3687/4122] mm/ksm: convert break_ksm() to use walk_page_range_vma() FOLL_MIGRATION exists only for the purpose of break_ksm(), and actually, there is not even the need to wait for the migration to finish, we only want to know if we're dealing with a KSM page. Using follow_page() just to identify a KSM page overcomplicates GUP code. Let's use walk_page_range_vma() instead, because we don't actually care about the page itself, we only need to know a single property -- no need to even grab a reference. So, get rid of follow_page() usage such that we can get rid of FOLL_MIGRATION now and eventually be able to get rid of follow_page() in the future. In my setup (AMD Ryzen 9 3900X), running the KSM selftest to test unmerge performance on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in a performance degradation of ~2% (old: ~5010 MiB/s, new: ~4900 MiB/s). I don't think we particularly care for now. Interestingly, the benchmark reduction is due to the single callback. Adding a second callback (e.g., pud_entry()) reduces the benchmark by another 100-200 MiB/s. Link: https://lkml.kernel.org/r/20221021101141.84170-9-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/ksm.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 0805221e1d4c..dd02780c387f 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "internal.h" @@ -419,6 +420,39 @@ static inline bool ksm_test_exit(struct mm_struct *mm) return atomic_read(&mm->mm_users) == 0; } +static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, + struct mm_walk *walk) +{ + struct page *page = NULL; + spinlock_t *ptl; + pte_t *pte; + int ret; + + if (pmd_leaf(*pmd) || !pmd_present(*pmd)) + return 0; + + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (pte_present(*pte)) { + page = vm_normal_page(walk->vma, addr, *pte); + } else if (!pte_none(*pte)) { + swp_entry_t entry = pte_to_swp_entry(*pte); + + /* + * As KSM pages remain KSM pages until freed, no need to wait + * here for migration to end. + */ + if (is_migration_entry(entry)) + page = pfn_swap_entry_to_page(entry); + } + ret = page && PageKsm(page); + pte_unmap_unlock(pte, ptl); + return ret; +} + +static const struct mm_walk_ops break_ksm_ops = { + .pmd_entry = break_ksm_pmd_entry, +}; + /* * We use break_ksm to break COW on a ksm page by triggering unsharing, * such that the ksm page will get replaced by an exclusive anonymous page. @@ -434,21 +468,16 @@ static inline bool ksm_test_exit(struct mm_struct *mm) */ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) { - struct page *page; vm_fault_t ret = 0; do { - bool ksm_page = false; + int ksm_page; cond_resched(); - page = follow_page(vma, addr, - FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); - if (IS_ERR_OR_NULL(page)) - break; - if (PageKsm(page)) - ksm_page = true; - put_page(page); - + ksm_page = walk_page_range_vma(vma, addr, addr + 1, + &break_ksm_ops, NULL); + if (WARN_ON_ONCE(ksm_page < 0)) + return ksm_page; if (!ksm_page) return 0; ret = handle_mm_fault(vma, addr, From f7355e99d9f71fcde093193fd4b569a648ba5ce3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Oct 2022 12:11:41 +0200 Subject: [PATCH 3688/4122] mm/gup: remove FOLL_MIGRATION Fortunately, the last user (KSM) is gone, so let's just remove this rather special code from generic GUP handling -- especially because KSM never required the PMD handling as KSM only deals with individual base pages. [akpm@linux-foundation.org: fix merge snafu]Link: https://lkml.kernel.org/r/20221021101141.84170-10-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - mm/gup.c | 55 +++++----------------------------------------- 2 files changed, 5 insertions(+), 51 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8df5cae69c80..767c8c522e70 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3057,7 +3057,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * and return without waiting upon it */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_ANON 0x8000 /* don't do file mappings */ diff --git a/mm/gup.c b/mm/gup.c index 2860cf4a85e1..82b275bbaad5 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -537,30 +537,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); -retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; - if (!pte_present(pte)) { - swp_entry_t entry; - /* - * KSM's break_ksm() relies upon recognizing a ksm page - * even while it is being migrated, so for that case we - * need migration_entry_wait(). - */ - if (likely(!(flags & FOLL_MIGRATION))) - goto no_page; - if (pte_none(pte)) - goto no_page; - entry = pte_to_swp_entry(pte); - if (!is_migration_entry(entry)) - goto no_page; - pte_unmap_unlock(ptep, ptl); - migration_entry_wait(mm, pmd, address); - goto retry; - } + if (!pte_present(pte)) + goto no_page; if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) goto no_page; @@ -668,28 +651,8 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, pmdval = READ_ONCE(*pmd); if (pmd_none(pmdval)) return no_page_table(vma, flags); -retry: - if (!pmd_present(pmdval)) { - /* - * Should never reach here, if thp migration is not supported; - * Otherwise, it must be a thp migration entry. - */ - VM_BUG_ON(!thp_migration_supported() || - !is_pmd_migration_entry(pmdval)); - - if (likely(!(flags & FOLL_MIGRATION))) - return no_page_table(vma, flags); - - pmd_migration_entry_wait(mm, pmd); - pmdval = READ_ONCE(*pmd); - /* - * MADV_DONTNEED may convert the pmd to null because - * mmap_lock is held in read mode - */ - if (pmd_none(pmdval)) - return no_page_table(vma, flags); - goto retry; - } + if (!pmd_present(pmdval)) + return no_page_table(vma, flags); if (pmd_devmap(pmdval)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap); @@ -703,18 +666,10 @@ retry: if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags)) return no_page_table(vma, flags); -retry_locked: ptl = pmd_lock(mm, pmd); - if (unlikely(pmd_none(*pmd))) { - spin_unlock(ptl); - return no_page_table(vma, flags); - } if (unlikely(!pmd_present(*pmd))) { spin_unlock(ptl); - if (likely(!(flags & FOLL_MIGRATION))) - return no_page_table(vma, flags); - pmd_migration_entry_wait(mm, pmd); - goto retry_locked; + return no_page_table(vma, flags); } if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); From 4c9473e87e75a2a77ccd02e55c91ffe6a52b5df6 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Wed, 26 Oct 2022 10:52:18 +0530 Subject: [PATCH 3689/4122] mm/khugepaged: add tracepoint to collapse_file() "mm_khugepaged_collapse_file" for capturing is_shmem. Currently, is_shmem is not being captured. Capturing is_shmem is useful as it can indicate if tmpfs is being used as a backing store instead of persistent storage. Add the tracepoint in collapse_file() named "mm_khugepaged_collapse_file" for capturing is_shmem. [gautammenghani201@gmail.com: swap is_shmem and addr to save space, per Steven Rostedt] Link: https://lkml.kernel.org/r/20221202201807.182829-1-gautammenghani201@gmail.com Link: https://lkml.kernel.org/r/20221026052218.148234-1-gautammenghani201@gmail.com Signed-off-by: Gautam Menghani Reviewed-by: Steven Rostedt (Google) [tracing] Cc: David Hildenbrand Cc: Masami Hiramatsu (Google) Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/trace/events/huge_memory.h | 38 ++++++++++++++++++++++++++++++ mm/khugepaged.c | 7 +++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 760455dfa860..3e6fb05852f9 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -203,5 +203,43 @@ TRACE_EVENT(mm_khugepaged_scan_file, __print_symbolic(__entry->result, SCAN_STATUS)) ); +TRACE_EVENT(mm_khugepaged_collapse_file, + TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index, + bool is_shmem, unsigned long addr, struct file *file, + int nr, int result), + TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result), + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(unsigned long, hpfn) + __field(pgoff_t, index) + __field(unsigned long, addr) + __field(bool, is_shmem) + __string(filename, file->f_path.dentry->d_iname) + __field(int, nr) + __field(int, result) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->hpfn = hpage ? page_to_pfn(hpage) : -1; + __entry->index = index; + __entry->addr = addr; + __entry->is_shmem = is_shmem; + __assign_str(filename, file->f_path.dentry->d_iname); + __entry->nr = nr; + __entry->result = result; + ), + + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + __entry->mm, + __entry->hpfn, + __entry->index, + __entry->addr, + __entry->is_shmem, + __get_str(filename), + __entry->nr, + __print_symbolic(__entry->result, SCAN_STATUS)) +); + #endif /* __HUGE_MEMORY_H */ #include diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 913b0f489352..78ec2771cc65 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1744,12 +1744,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, { struct address_space *mapping = file->f_mapping; struct page *hpage; - pgoff_t index, end = start + HPAGE_PMD_NR; + pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); - int nr; + int nr = 0; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); @@ -2102,7 +2102,8 @@ out: mem_cgroup_uncharge(page_folio(hpage)); put_page(hpage); } - /* TODO: tracepoints */ + + trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result); return result; } From fd3b1bc3c86ee11ba77421b00c70280605b521c6 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Wed, 9 Nov 2022 10:48:36 +0530 Subject: [PATCH 3690/4122] mm/madvise: fix madvise_pageout for private file mappings When MADV_PAGEOUT is called on a private file mapping VMA region, we bail out early if the process is neither owner nor write capable of the file. However, this VMA may have both private/shared clean pages and private dirty pages. The opportunity of paging out the private dirty pages (Anon pages) is missed. Fix this behavior by allowing private file mappings pageout further and perform the file access check along with PageAnon() during page walk. We observe ~10% improvement in zram usage, thus leaving more available memory on a 4GB RAM system running Android. [quic_pkondeti@quicinc.com: v2] Link: https://lkml.kernel.org/r/1669962597-27724-1-git-send-email-quic_pkondeti@quicinc.com Link: https://lkml.kernel.org/r/1667971116-12900-1-git-send-email-quic_pkondeti@quicinc.com Signed-off-by: Pavankumar Kondeti Cc: Charan Teja Kalla Cc: Minchan Kim Cc: Suren Baghdasaryan Cc: David Hildenbrand Signed-off-by: Andrew Morton --- mm/madvise.c | 53 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 83b0c91a126b..2573ea3ed684 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -318,6 +318,21 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } +static inline bool can_do_file_pageout(struct vm_area_struct *vma) +{ + if (!vma->vm_file) + return false; + /* + * paging out pagecache only for non-anonymous mappings that correspond + * to the files the calling process could (if tried) open for writing; + * otherwise we'd be including shared non-exclusive mappings, which + * opens a side channel. + */ + return inode_owner_or_capable(&init_user_ns, + file_inode(vma->vm_file)) || + file_permission(vma->vm_file, MAY_WRITE) == 0; +} + static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -331,10 +346,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, spinlock_t *ptl; struct page *page = NULL; LIST_HEAD(page_list); + bool pageout_anon_only_filter; if (fatal_signal_pending(current)) return -EINTR; + pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) && + !can_do_file_pageout(vma); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(*pmd)) { pmd_t orig_pmd; @@ -361,6 +380,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (page_mapcount(page) != 1) goto huge_unlock; + if (pageout_anon_only_filter && !PageAnon(page)) + goto huge_unlock; + if (next - addr != HPAGE_PMD_SIZE) { int err; @@ -429,6 +451,8 @@ regular_page: if (PageTransCompound(page)) { if (page_mapcount(page) != 1) break; + if (pageout_anon_only_filter && !PageAnon(page)) + break; get_page(page); if (!trylock_page(page)) { put_page(page); @@ -456,6 +480,9 @@ regular_page: if (!PageLRU(page) || page_mapcount(page) != 1) continue; + if (pageout_anon_only_filter && !PageAnon(page)) + continue; + VM_BUG_ON_PAGE(PageTransCompound(page), page); if (pte_young(ptent)) { @@ -550,23 +577,6 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, tlb_end_vma(tlb, vma); } -static inline bool can_do_pageout(struct vm_area_struct *vma) -{ - if (vma_is_anonymous(vma)) - return true; - if (!vma->vm_file) - return false; - /* - * paging out pagecache only for non-anonymous mappings that correspond - * to the files the calling process could (if tried) open for writing; - * otherwise we'd be including shared non-exclusive mappings, which - * opens a side channel. - */ - return inode_owner_or_capable(&init_user_ns, - file_inode(vma->vm_file)) || - file_permission(vma->vm_file, MAY_WRITE) == 0; -} - static long madvise_pageout(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) @@ -578,7 +588,14 @@ static long madvise_pageout(struct vm_area_struct *vma, if (!can_madv_lru_vma(vma)) return -EINVAL; - if (!can_do_pageout(vma)) + /* + * If the VMA belongs to a private file mapping, there can be private + * dirty pages which can be paged out if even this process is neither + * owner nor write capable of the file. We allow private file mappings + * further to pageout dirty anon pages. + */ + if (!vma_is_anonymous(vma) && (!can_do_file_pageout(vma) && + (vma->vm_flags & VM_MAYSHARE))) return 0; lru_add_drain(); From 6b3379e8dcbea09b7e27bf0eea2f53fd15a164ac Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 28 Nov 2022 11:16:10 -0800 Subject: [PATCH 3691/4122] zswap: fix writeback lock ordering for zsmalloc Patch series "Implement writeback for zsmalloc", v7. Unlike other zswap allocators such as zbud or z3fold, zsmalloc currently lacks the writeback mechanism. This means that when the zswap pool is full, it will simply reject further allocations, and the pages will be written directly to swap. This series of patches implements writeback for zsmalloc. When the zswap pool becomes full, zsmalloc will attempt to evict all the compressed objects in the least-recently used zspages. This patch (of 6): zswap's customary lock order is tree->lock before pool->lock, because the tree->lock protects the entries' refcount, and the free callbacks in the backends acquire their respective pool locks to dispatch the backing object. zsmalloc's map callback takes the pool lock, so zswap must not grab the tree->lock while a handle is mapped. This currently only happens during writeback, which isn't implemented for zsmalloc. In preparation for it, move the tree->lock section out of the mapped entry section Link: https://lkml.kernel.org/r/20221128191616.1261026-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20221128191616.1261026-2-nphamcs@gmail.com Signed-off-by: Johannes Weiner Signed-off-by: Nhat Pham Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Minchan Kim Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zswap.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 3019f0bde194..f6c89049cf70 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -968,6 +968,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) swpentry = zhdr->swpentry; /* here */ tree = zswap_trees[swp_type(swpentry)]; offset = swp_offset(swpentry); + zpool_unmap_handle(pool, handle); /* find and ref zswap entry */ spin_lock(&tree->lock); @@ -975,20 +976,12 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) if (!entry) { /* entry was invalidated */ spin_unlock(&tree->lock); - zpool_unmap_handle(pool, handle); kfree(tmp); return 0; } spin_unlock(&tree->lock); BUG_ON(offset != entry->offset); - src = (u8 *)zhdr + sizeof(struct zswap_header); - if (!zpool_can_sleep_mapped(pool)) { - memcpy(tmp, src, entry->length); - src = tmp; - zpool_unmap_handle(pool, handle); - } - /* try to allocate swap cache page */ switch (zswap_get_swap_cache_page(swpentry, &page)) { case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ @@ -1006,6 +999,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); dlen = PAGE_SIZE; + zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); + src = (u8 *)zhdr + sizeof(struct zswap_header); + if (!zpool_can_sleep_mapped(pool)) { + memcpy(tmp, src, entry->length); + src = tmp; + zpool_unmap_handle(pool, handle); + } + mutex_lock(acomp_ctx->mutex); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); @@ -1015,6 +1016,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) dlen = acomp_ctx->req->dlen; mutex_unlock(acomp_ctx->mutex); + if (!zpool_can_sleep_mapped(pool)) + kfree(tmp); + else + zpool_unmap_handle(pool, handle); + BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); @@ -1045,7 +1051,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) zswap_entry_put(tree, entry); spin_unlock(&tree->lock); - goto end; + return ret; + +fail: + if (!zpool_can_sleep_mapped(pool)) + kfree(tmp); /* * if we get here due to ZSWAP_SWAPCACHE_EXIST @@ -1054,17 +1064,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) * if we free the entry in the following put * it is also okay to return !0 */ -fail: spin_lock(&tree->lock); zswap_entry_put(tree, entry); spin_unlock(&tree->lock); -end: - if (zpool_can_sleep_mapped(pool)) - zpool_unmap_handle(pool, handle); - else - kfree(tmp); - return ret; } From 6a05aa30109d5cd4bebfb89415c58fa4599ef875 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 28 Nov 2022 11:16:11 -0800 Subject: [PATCH 3692/4122] zpool: clean out dead code There is a lot of provision for flexibility that isn't actually needed or used. Zswap (the only zpool user) always passes zpool_ops with an .evict method set. The backends who reclaim only do so for zswap, so they can also directly call zpool_ops without indirection or checks. Finally, there is no need to check the retries parameters and bail with -EINVAL in the reclaim function, when that's called just a few lines below with a hard-coded 8. There is no need to duplicate the evictable and sleep_mapped attrs from the driver in zpool_ops. Link: https://lkml.kernel.org/r/20221128191616.1261026-3-nphamcs@gmail.com Reviewed-by: Sergey Senozhatsky Signed-off-by: Johannes Weiner Signed-off-by: Nhat Pham Cc: Dan Streetman Cc: Minchan Kim Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/z3fold.c | 36 +++++------------------------------- mm/zbud.c | 32 +++++--------------------------- mm/zpool.c | 10 ++-------- 3 files changed, 12 insertions(+), 66 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index cf71da10d04e..a4de0c317ac7 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -68,9 +68,6 @@ * Structures *****************/ struct z3fold_pool; -struct z3fold_ops { - int (*evict)(struct z3fold_pool *pool, unsigned long handle); -}; enum buddy { HEADLESS = 0, @@ -138,8 +135,6 @@ struct z3fold_header { * @stale: list of pages marked for freeing * @pages_nr: number of z3fold pages in the pool. * @c_handle: cache for z3fold_buddy_slots allocation - * @ops: pointer to a structure of user defined operations specified at - * pool creation time. * @zpool: zpool driver * @zpool_ops: zpool operations structure with an evict callback * @compact_wq: workqueue for page layout background optimization @@ -158,7 +153,6 @@ struct z3fold_pool { struct list_head stale; atomic64_t pages_nr; struct kmem_cache *c_handle; - const struct z3fold_ops *ops; struct zpool *zpool; const struct zpool_ops *zpool_ops; struct workqueue_struct *compact_wq; @@ -907,13 +901,11 @@ out_fail: * z3fold_create_pool() - create a new z3fold pool * @name: pool name * @gfp: gfp flags when allocating the z3fold pool structure - * @ops: user-defined operations for the z3fold pool * * Return: pointer to the new z3fold pool or NULL if the metadata allocation * failed. */ -static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, - const struct z3fold_ops *ops) +static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp) { struct z3fold_pool *pool = NULL; int i, cpu; @@ -949,7 +941,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, if (!pool->release_wq) goto out_wq; INIT_WORK(&pool->work, free_pages_work); - pool->ops = ops; return pool; out_wq: @@ -1230,10 +1221,6 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE); spin_lock(&pool->lock); - if (!pool->ops || !pool->ops->evict || retries == 0) { - spin_unlock(&pool->lock); - return -EINVAL; - } for (i = 0; i < retries; i++) { if (list_empty(&pool->lru)) { spin_unlock(&pool->lock); @@ -1319,17 +1306,17 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } /* Issue the eviction callback(s) */ if (middle_handle) { - ret = pool->ops->evict(pool, middle_handle); + ret = pool->zpool_ops->evict(pool->zpool, middle_handle); if (ret) goto next; } if (first_handle) { - ret = pool->ops->evict(pool, first_handle); + ret = pool->zpool_ops->evict(pool->zpool, first_handle); if (ret) goto next; } if (last_handle) { - ret = pool->ops->evict(pool, last_handle); + ret = pool->zpool_ops->evict(pool->zpool, last_handle); if (ret) goto next; } @@ -1593,26 +1580,13 @@ static const struct movable_operations z3fold_mops = { * zpool ****************/ -static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) -{ - if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) - return pool->zpool_ops->evict(pool->zpool, handle); - else - return -ENOENT; -} - -static const struct z3fold_ops z3fold_zpool_ops = { - .evict = z3fold_zpool_evict -}; - static void *z3fold_zpool_create(const char *name, gfp_t gfp, const struct zpool_ops *zpool_ops, struct zpool *zpool) { struct z3fold_pool *pool; - pool = z3fold_create_pool(name, gfp, - zpool_ops ? &z3fold_zpool_ops : NULL); + pool = z3fold_create_pool(name, gfp); if (pool) { pool->zpool = zpool; pool->zpool_ops = zpool_ops; diff --git a/mm/zbud.c b/mm/zbud.c index 6348932430b8..3acd26193920 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -74,10 +74,6 @@ struct zbud_pool; -struct zbud_ops { - int (*evict)(struct zbud_pool *pool, unsigned long handle); -}; - /** * struct zbud_pool - stores metadata for each zbud pool * @lock: protects all pool fields and first|last_chunk fields of any @@ -90,8 +86,6 @@ struct zbud_ops { * @lru: list tracking the zbud pages in LRU order by most recently * added buddy. * @pages_nr: number of zbud pages in the pool. - * @ops: pointer to a structure of user defined operations specified at - * pool creation time. * @zpool: zpool driver * @zpool_ops: zpool operations structure with an evict callback * @@ -110,7 +104,6 @@ struct zbud_pool { }; struct list_head lru; u64 pages_nr; - const struct zbud_ops *ops; struct zpool *zpool; const struct zpool_ops *zpool_ops; }; @@ -212,12 +205,11 @@ static int num_free_chunks(struct zbud_header *zhdr) /** * zbud_create_pool() - create a new zbud pool * @gfp: gfp flags when allocating the zbud pool structure - * @ops: user-defined operations for the zbud pool * * Return: pointer to the new zbud pool or NULL if the metadata allocation * failed. */ -static struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops) +static struct zbud_pool *zbud_create_pool(gfp_t gfp) { struct zbud_pool *pool; int i; @@ -231,7 +223,6 @@ static struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops) INIT_LIST_HEAD(&pool->buddied); INIT_LIST_HEAD(&pool->lru); pool->pages_nr = 0; - pool->ops = ops; return pool; } @@ -419,8 +410,7 @@ static int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) unsigned long first_handle = 0, last_handle = 0; spin_lock(&pool->lock); - if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) || - retries == 0) { + if (list_empty(&pool->lru)) { spin_unlock(&pool->lock); return -EINVAL; } @@ -444,12 +434,12 @@ static int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) /* Issue the eviction callback(s) */ if (first_handle) { - ret = pool->ops->evict(pool, first_handle); + ret = pool->zpool_ops->evict(pool->zpool, first_handle); if (ret) goto next; } if (last_handle) { - ret = pool->ops->evict(pool, last_handle); + ret = pool->zpool_ops->evict(pool->zpool, last_handle); if (ret) goto next; } @@ -524,25 +514,13 @@ static u64 zbud_get_pool_size(struct zbud_pool *pool) * zpool ****************/ -static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle) -{ - if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) - return pool->zpool_ops->evict(pool->zpool, handle); - else - return -ENOENT; -} - -static const struct zbud_ops zbud_zpool_ops = { - .evict = zbud_zpool_evict -}; - static void *zbud_zpool_create(const char *name, gfp_t gfp, const struct zpool_ops *zpool_ops, struct zpool *zpool) { struct zbud_pool *pool; - pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); + pool = zbud_create_pool(gfp); if (pool) { pool->zpool = zpool; pool->zpool_ops = zpool_ops; diff --git a/mm/zpool.c b/mm/zpool.c index f46c0d5e766c..571f5c5031dd 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -21,9 +21,6 @@ struct zpool { struct zpool_driver *driver; void *pool; - const struct zpool_ops *ops; - bool evictable; - bool can_sleep_mapped; }; static LIST_HEAD(drivers_head); @@ -177,9 +174,6 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, zpool->driver = driver; zpool->pool = driver->create(name, gfp, ops, zpool); - zpool->ops = ops; - zpool->evictable = driver->shrink && ops && ops->evict; - zpool->can_sleep_mapped = driver->sleep_mapped; if (!zpool->pool) { pr_err("couldn't create %s pool\n", type); @@ -380,7 +374,7 @@ u64 zpool_get_total_size(struct zpool *zpool) */ bool zpool_evictable(struct zpool *zpool) { - return zpool->evictable; + return zpool->driver->shrink; } /** @@ -398,7 +392,7 @@ bool zpool_evictable(struct zpool *zpool) */ bool zpool_can_sleep_mapped(struct zpool *zpool) { - return zpool->can_sleep_mapped; + return zpool->driver->sleep_mapped; } MODULE_LICENSE("GPL"); From c0547d0b6a4b637db05406b90ba82e1b2e71de56 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 28 Nov 2022 11:16:12 -0800 Subject: [PATCH 3693/4122] zsmalloc: consolidate zs_pool's migrate_lock and size_class's locks Currently, zsmalloc has a hierarchy of locks, which includes a pool-level migrate_lock, and a lock for each size class. We have to obtain both locks in the hotpath in most cases anyway, except for zs_malloc. This exception will no longer exist when we introduce a LRU into the zs_pool for the new writeback functionality - we will need to obtain a pool-level lock to synchronize LRU handling even in zs_malloc. In preparation for zsmalloc writeback, consolidate these locks into a single pool-level lock, which drastically reduces the complexity of synchronization in zsmalloc. We have also benchmarked the lock consolidation to see the performance effect of this change on zram. First, we ran a synthetic FS workload on a server machine with 36 cores (same machine for all runs), using fs_mark -d ../zram1mnt -s 100000 -n 2500 -t 32 -k before and after for btrfs and ext4 on zram (FS usage is 80%). Here is the result (unit is file/second): With lock consolidation (btrfs): Average: 13520.2, Median: 13531.0, Stddev: 137.5961482019028 Without lock consolidation (btrfs): Average: 13487.2, Median: 13575.0, Stddev: 309.08283679298665 With lock consolidation (ext4): Average: 16824.4, Median: 16839.0, Stddev: 89.97388510006668 Without lock consolidation (ext4) Average: 16958.0, Median: 16986.0, Stddev: 194.7370021336469 As you can see, we observe a 0.3% regression for btrfs, and a 0.9% regression for ext4. This is a small, barely measurable difference in my opinion. For a more realistic scenario, we also tries building the kernel on zram. Here is the time it takes (in seconds): With lock consolidation (btrfs): real Average: 319.6, Median: 320.0, Stddev: 0.8944271909999159 user Average: 6894.2, Median: 6895.0, Stddev: 25.528415540334656 sys Average: 521.4, Median: 522.0, Stddev: 1.51657508881031 Without lock consolidation (btrfs): real Average: 319.8, Median: 320.0, Stddev: 0.8366600265340756 user Average: 6896.6, Median: 6899.0, Stddev: 16.04057355583023 sys Average: 520.6, Median: 521.0, Stddev: 1.140175425099138 With lock consolidation (ext4): real Average: 320.0, Median: 319.0, Stddev: 1.4142135623730951 user Average: 6896.8, Median: 6878.0, Stddev: 28.621670111997307 sys Average: 521.2, Median: 521.0, Stddev: 1.7888543819998317 Without lock consolidation (ext4) real Average: 319.6, Median: 319.0, Stddev: 0.8944271909999159 user Average: 6886.2, Median: 6887.0, Stddev: 16.93221781102523 sys Average: 520.4, Median: 520.0, Stddev: 1.140175425099138 The difference is entirely within the noise of a typical run on zram. This hardly justifies the complexity of maintaining both the pool lock and the class lock. In fact, for writeback, we would need to introduce yet another lock to prevent data races on the pool's LRU, further complicating the lock handling logic. IMHO, it is just better to collapse all of these into a single pool-level lock. Link: https://lkml.kernel.org/r/20221128191616.1261026-4-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 87 ++++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 78feda34ad9a..5427a00a0518 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -33,8 +33,7 @@ /* * lock ordering: * page_lock - * pool->migrate_lock - * class->lock + * pool->lock * zspage->lock */ @@ -192,7 +191,6 @@ static const int fullness_threshold_frac = 4; static size_t huge_class_size; struct size_class { - spinlock_t lock; struct list_head fullness_list[NR_ZS_FULLNESS]; /* * Size of objects stored in this class. Must be multiple @@ -247,8 +245,7 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - /* protect page/zspage migration */ - rwlock_t migrate_lock; + spinlock_t lock; }; struct zspage { @@ -355,7 +352,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } -/* class->lock(which owns the handle) synchronizes races */ +/* pool->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { *(unsigned long *)handle = obj; @@ -452,7 +449,7 @@ static __maybe_unused int is_first_page(struct page *page) return PagePrivate(page); } -/* Protected by class->lock */ +/* Protected by pool->lock */ static inline int get_zspage_inuse(struct zspage *zspage) { return zspage->inuse; @@ -597,13 +594,13 @@ static int zs_stats_size_show(struct seq_file *s, void *v) if (class->index != i) continue; - spin_lock(&class->lock); + spin_lock(&pool->lock); class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL); class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY); obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); obj_used = zs_stat_get(class, OBJ_USED); freeable = zs_can_compact(class); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); objs_per_zspage = class->objs_per_zspage; pages_used = obj_allocated / objs_per_zspage * @@ -916,7 +913,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, get_zspage_mapping(zspage, &class_idx, &fg); - assert_spin_locked(&class->lock); + assert_spin_locked(&pool->lock); VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(fg != ZS_EMPTY); @@ -1268,19 +1265,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, BUG_ON(in_interrupt()); /* It guarantees it can get zspage from handle safely */ - read_lock(&pool->migrate_lock); + spin_lock(&pool->lock); obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); /* - * migration cannot move any zpages in this zspage. Here, class->lock + * migration cannot move any zpages in this zspage. Here, pool->lock * is too heavy since callers would take some time until they calls * zs_unmap_object API so delegate the locking from class to zspage * which is smaller granularity. */ migrate_read_lock(zspage); - read_unlock(&pool->migrate_lock); + spin_unlock(&pool->lock); class = zspage_class(pool, zspage); off = (class->size * obj_idx) & ~PAGE_MASK; @@ -1433,8 +1430,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) size += ZS_HANDLE_SIZE; class = pool->size_class[get_size_class_index(size)]; - /* class->lock effectively protects the zpage migration */ - spin_lock(&class->lock); + /* pool->lock effectively protects the zpage migration */ + spin_lock(&pool->lock); zspage = find_get_zspage(class); if (likely(zspage)) { obj = obj_malloc(pool, zspage, handle); @@ -1442,12 +1439,12 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) fix_fullness_group(class, zspage); record_obj(handle, obj); class_stat_inc(class, OBJ_USED, 1); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); return handle; } - spin_unlock(&class->lock); + spin_unlock(&pool->lock); zspage = alloc_zspage(pool, class, gfp); if (!zspage) { @@ -1455,7 +1452,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) return (unsigned long)ERR_PTR(-ENOMEM); } - spin_lock(&class->lock); + spin_lock(&pool->lock); obj = obj_malloc(pool, zspage, handle); newfg = get_fullness_group(class, zspage); insert_zspage(class, zspage, newfg); @@ -1468,7 +1465,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); return handle; } @@ -1512,16 +1509,14 @@ void zs_free(struct zs_pool *pool, unsigned long handle) return; /* - * The pool->migrate_lock protects the race with zpage's migration + * The pool->lock protects the race with zpage's migration * so it's safe to get the page from handle. */ - read_lock(&pool->migrate_lock); + spin_lock(&pool->lock); obj = handle_to_obj(handle); obj_to_page(obj, &f_page); zspage = get_zspage(f_page); class = zspage_class(pool, zspage); - spin_lock(&class->lock); - read_unlock(&pool->migrate_lock); obj_free(class->size, obj); class_stat_dec(class, OBJ_USED, 1); @@ -1531,7 +1526,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) free_zspage(pool, class, zspage); out: - spin_unlock(&class->lock); + spin_unlock(&pool->lock); cache_free_handle(pool, handle); } EXPORT_SYMBOL_GPL(zs_free); @@ -1888,16 +1883,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page, pool = zspage->pool; /* - * The pool migrate_lock protects the race between zpage migration + * The pool's lock protects the race between zpage migration * and zs_free. */ - write_lock(&pool->migrate_lock); + spin_lock(&pool->lock); class = zspage_class(pool, zspage); - /* - * the class lock protects zpage alloc/free in the zspage. - */ - spin_lock(&class->lock); /* the migrate_write_lock protects zpage access via zs_map_object */ migrate_write_lock(zspage); @@ -1927,10 +1918,9 @@ static int zs_page_migrate(struct page *newpage, struct page *page, replace_sub_page(class, zspage, newpage, page); /* * Since we complete the data copy and set up new zspage structure, - * it's okay to release migration_lock. + * it's okay to release the pool's lock. */ - write_unlock(&pool->migrate_lock); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); dec_zspage_isolation(zspage); migrate_write_unlock(zspage); @@ -1985,9 +1975,9 @@ static void async_free_zspage(struct work_struct *work) if (class->index != i) continue; - spin_lock(&class->lock); + spin_lock(&pool->lock); list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); } list_for_each_entry_safe(zspage, tmp, &free_pages, list) { @@ -1997,9 +1987,9 @@ static void async_free_zspage(struct work_struct *work) get_zspage_mapping(zspage, &class_idx, &fullness); VM_BUG_ON(fullness != ZS_EMPTY); class = pool->size_class[class_idx]; - spin_lock(&class->lock); + spin_lock(&pool->lock); __free_zspage(pool, class, zspage); - spin_unlock(&class->lock); + spin_unlock(&pool->lock); } }; @@ -2060,10 +2050,11 @@ static unsigned long __zs_compact(struct zs_pool *pool, struct zspage *dst_zspage = NULL; unsigned long pages_freed = 0; - /* protect the race between zpage migration and zs_free */ - write_lock(&pool->migrate_lock); - /* protect zpage allocation/free */ - spin_lock(&class->lock); + /* + * protect the race between zpage migration and zs_free + * as well as zpage allocation/free + */ + spin_lock(&pool->lock); while ((src_zspage = isolate_zspage(class, true))) { /* protect someone accessing the zspage(i.e., zs_map_object) */ migrate_write_lock(src_zspage); @@ -2088,7 +2079,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, putback_zspage(class, dst_zspage); migrate_write_unlock(dst_zspage); dst_zspage = NULL; - if (rwlock_is_contended(&pool->migrate_lock)) + if (spin_is_contended(&pool->lock)) break; } @@ -2105,11 +2096,9 @@ static unsigned long __zs_compact(struct zs_pool *pool, pages_freed += class->pages_per_zspage; } else migrate_write_unlock(src_zspage); - spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + spin_unlock(&pool->lock); cond_resched(); - write_lock(&pool->migrate_lock); - spin_lock(&class->lock); + spin_lock(&pool->lock); } if (src_zspage) { @@ -2117,8 +2106,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, migrate_write_unlock(src_zspage); } - spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + spin_unlock(&pool->lock); return pages_freed; } @@ -2221,7 +2209,7 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); - rwlock_init(&pool->migrate_lock); + spin_lock_init(&pool->lock); pool->name = kstrdup(name, GFP_KERNEL); if (!pool->name) @@ -2292,7 +2280,6 @@ struct zs_pool *zs_create_pool(const char *name) class->index = i; class->pages_per_zspage = pages_per_zspage; class->objs_per_zspage = objs_per_zspage; - spin_lock_init(&class->lock); pool->size_class[i] = class; for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS; fullness++) From 64f768c6b32e1957e2b65b70e97cb4cb62344bc4 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 28 Nov 2022 11:16:13 -0800 Subject: [PATCH 3694/4122] zsmalloc: add a LRU to zs_pool to keep track of zspages in LRU order This helps determines the coldest zspages as candidates for writeback. Link: https://lkml.kernel.org/r/20221128191616.1261026-5-nphamcs@gmail.com Signed-off-by: Nhat Pham Acked-by: Johannes Weiner Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Minchan Kim Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5427a00a0518..b1bc231d94a3 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -239,6 +239,11 @@ struct zs_pool { /* Compact classes */ struct shrinker shrinker; +#ifdef CONFIG_ZPOOL + /* List tracking the zspages in LRU order by most recently added object */ + struct list_head lru; +#endif + #ifdef CONFIG_ZSMALLOC_STAT struct dentry *stat_dentry; #endif @@ -260,6 +265,12 @@ struct zspage { unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ + +#ifdef CONFIG_ZPOOL + /* links the zspage to the lru list in the pool */ + struct list_head lru; +#endif + struct zs_pool *pool; #ifdef CONFIG_COMPACTION rwlock_t lock; @@ -953,6 +964,9 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, } remove_zspage(class, zspage, ZS_EMPTY); +#ifdef CONFIG_ZPOOL + list_del(&zspage->lru); +#endif __free_zspage(pool, class, zspage); } @@ -998,6 +1012,10 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) off %= PAGE_SIZE; } +#ifdef CONFIG_ZPOOL + INIT_LIST_HEAD(&zspage->lru); +#endif + set_freeobj(zspage, 0); } @@ -1270,6 +1288,31 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); +#ifdef CONFIG_ZPOOL + /* + * Move the zspage to front of pool's LRU. + * + * Note that this is swap-specific, so by definition there are no ongoing + * accesses to the memory while the page is swapped out that would make + * it "hot". A new entry is hot, then ages to the tail until it gets either + * written back or swaps back in. + * + * Furthermore, map is also called during writeback. We must not put an + * isolated page on the LRU mid-reclaim. + * + * As a result, only update the LRU when the page is mapped for write + * when it's first instantiated. + * + * This is a deviation from the other backends, which perform this update + * in the allocation function (zbud_alloc, z3fold_alloc). + */ + if (mm == ZS_MM_WO) { + if (!list_empty(&zspage->lru)) + list_del(&zspage->lru); + list_add(&zspage->lru, &pool->lru); + } +#endif + /* * migration cannot move any zpages in this zspage. Here, pool->lock * is too heavy since callers would take some time until they calls @@ -1988,6 +2031,9 @@ static void async_free_zspage(struct work_struct *work) VM_BUG_ON(fullness != ZS_EMPTY); class = pool->size_class[class_idx]; spin_lock(&pool->lock); +#ifdef CONFIG_ZPOOL + list_del(&zspage->lru); +#endif __free_zspage(pool, class, zspage); spin_unlock(&pool->lock); } @@ -2299,6 +2345,10 @@ struct zs_pool *zs_create_pool(const char *name) */ zs_register_shrinker(pool); +#ifdef CONFIG_ZPOOL + INIT_LIST_HEAD(&pool->lru); +#endif + return pool; err: From bd0fded29689a762e6a749a1258a59cc2b99a18a Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 28 Nov 2022 11:16:14 -0800 Subject: [PATCH 3695/4122] zsmalloc: add zpool_ops field to zs_pool to store evict handlers This adds a new field to zs_pool to store evict handlers for writeback, analogous to the zbud allocator. Link: https://lkml.kernel.org/r/20221128191616.1261026-6-nphamcs@gmail.com Signed-off-by: Nhat Pham Acked-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b1bc231d94a3..d06f9150b9da 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -242,6 +242,8 @@ struct zs_pool { #ifdef CONFIG_ZPOOL /* List tracking the zspages in LRU order by most recently added object */ struct list_head lru; + struct zpool *zpool; + const struct zpool_ops *zpool_ops; #endif #ifdef CONFIG_ZSMALLOC_STAT @@ -382,7 +384,14 @@ static void *zs_zpool_create(const char *name, gfp_t gfp, * different contexts and its caller must provide a valid * gfp mask. */ - return zs_create_pool(name); + struct zs_pool *pool = zs_create_pool(name); + + if (pool) { + pool->zpool = zpool; + pool->zpool_ops = zpool_ops; + } + + return pool; } static void zs_zpool_destroy(void *pool) From 9997bc017549acd6425e32300eff28424ffeeb6b Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 28 Nov 2022 11:16:15 -0800 Subject: [PATCH 3696/4122] zsmalloc: implement writeback mechanism for zsmalloc This commit adds the writeback mechanism for zsmalloc, analogous to the zbud allocator. Zsmalloc will attempt to determine the coldest zspage (i.e least recently used) in the pool, and attempt to write back all the stored compressed objects via the pool's evict handler. Link: https://lkml.kernel.org/r/20221128191616.1261026-7-nphamcs@gmail.com Signed-off-by: Nhat Pham Acked-by: Johannes Weiner Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Minchan Kim Cc: Nitin Gupta Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 184 insertions(+), 12 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index d06f9150b9da..9445bee6b014 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -271,12 +271,13 @@ struct zspage { #ifdef CONFIG_ZPOOL /* links the zspage to the lru list in the pool */ struct list_head lru; + bool under_reclaim; + /* list of unfreed handles whose objects have been reclaimed */ + unsigned long *deferred_handles; #endif struct zs_pool *pool; -#ifdef CONFIG_COMPACTION rwlock_t lock; -#endif }; struct mapping_area { @@ -297,10 +298,11 @@ static bool ZsHugePage(struct zspage *zspage) return zspage->huge; } -#ifdef CONFIG_COMPACTION static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); + +#ifdef CONFIG_COMPACTION static void migrate_write_lock(struct zspage *zspage); static void migrate_write_lock_nested(struct zspage *zspage); static void migrate_write_unlock(struct zspage *zspage); @@ -308,9 +310,6 @@ static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static void migrate_lock_init(struct zspage *zspage) {} -static void migrate_read_lock(struct zspage *zspage) {} -static void migrate_read_unlock(struct zspage *zspage) {} static void migrate_write_lock(struct zspage *zspage) {} static void migrate_write_lock_nested(struct zspage *zspage) {} static void migrate_write_unlock(struct zspage *zspage) {} @@ -413,6 +412,27 @@ static void zs_zpool_free(void *pool, unsigned long handle) zs_free(pool, handle); } +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries); + +static int zs_zpool_shrink(void *pool, unsigned int pages, + unsigned int *reclaimed) +{ + unsigned int total = 0; + int ret = -EINVAL; + + while (total < pages) { + ret = zs_reclaim_page(pool, 8); + if (ret < 0) + break; + total++; + } + + if (reclaimed) + *reclaimed = total; + + return ret; +} + static void *zs_zpool_map(void *pool, unsigned long handle, enum zpool_mapmode mm) { @@ -451,6 +471,7 @@ static struct zpool_driver zs_zpool_driver = { .malloc_support_movable = true, .malloc = zs_zpool_malloc, .free = zs_zpool_free, + .shrink = zs_zpool_shrink, .map = zs_zpool_map, .unmap = zs_zpool_unmap, .total_size = zs_zpool_total_size, @@ -924,6 +945,25 @@ unlock: return 0; } +#ifdef CONFIG_ZPOOL +/* + * Free all the deferred handles whose objects are freed in zs_free. + */ +static void free_handles(struct zs_pool *pool, struct zspage *zspage) +{ + unsigned long handle = (unsigned long)zspage->deferred_handles; + + while (handle) { + unsigned long nxt_handle = handle_to_obj(handle); + + cache_free_handle(pool, handle); + handle = nxt_handle; + } +} +#else +static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {} +#endif + static void __free_zspage(struct zs_pool *pool, struct size_class *class, struct zspage *zspage) { @@ -938,6 +978,9 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(fg != ZS_EMPTY); + /* Free all deferred handles from zs_free */ + free_handles(pool, zspage); + next = page = get_first_page(zspage); do { VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -1023,6 +1066,8 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) #ifdef CONFIG_ZPOOL INIT_LIST_HEAD(&zspage->lru); + zspage->under_reclaim = false; + zspage->deferred_handles = NULL; #endif set_freeobj(zspage, 0); @@ -1572,12 +1617,26 @@ void zs_free(struct zs_pool *pool, unsigned long handle) obj_free(class->size, obj); class_stat_dec(class, OBJ_USED, 1); - fullness = fix_fullness_group(class, zspage); - if (fullness != ZS_EMPTY) - goto out; - free_zspage(pool, class, zspage); -out: +#ifdef CONFIG_ZPOOL + if (zspage->under_reclaim) { + /* + * Reclaim needs the handles during writeback. It'll free + * them along with the zspage when it's done with them. + * + * Record current deferred handle at the memory location + * whose address is given by handle. + */ + record_obj(handle, (unsigned long)zspage->deferred_handles); + zspage->deferred_handles = (unsigned long *)handle; + spin_unlock(&pool->lock); + return; + } +#endif + fullness = fix_fullness_group(class, zspage); + if (fullness == ZS_EMPTY) + free_zspage(pool, class, zspage); + spin_unlock(&pool->lock); cache_free_handle(pool, handle); } @@ -1777,7 +1836,7 @@ static enum fullness_group putback_zspage(struct size_class *class, return fullness; } -#ifdef CONFIG_COMPACTION +#if defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) /* * To prevent zspage destroy during migration, zspage freeing should * hold locks of all pages in the zspage. @@ -1819,6 +1878,24 @@ static void lock_zspage(struct zspage *zspage) } migrate_read_unlock(zspage); } +#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */ + +#ifdef CONFIG_ZPOOL +/* + * Unlocks all the pages of the zspage. + * + * pool->lock must be held before this function is called + * to prevent the underlying pages from migrating. + */ +static void unlock_zspage(struct zspage *zspage) +{ + struct page *page = get_first_page(zspage); + + do { + unlock_page(page); + } while ((page = get_next_page(page)) != NULL); +} +#endif /* CONFIG_ZPOOL */ static void migrate_lock_init(struct zspage *zspage) { @@ -1835,6 +1912,7 @@ static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) read_unlock(&zspage->lock); } +#ifdef CONFIG_COMPACTION static void migrate_write_lock(struct zspage *zspage) { write_lock(&zspage->lock); @@ -2399,6 +2477,100 @@ void zs_destroy_pool(struct zs_pool *pool) } EXPORT_SYMBOL_GPL(zs_destroy_pool); +#ifdef CONFIG_ZPOOL +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries) +{ + int i, obj_idx, ret = 0; + unsigned long handle; + struct zspage *zspage; + struct page *page; + enum fullness_group fullness; + + /* Lock LRU and fullness list */ + spin_lock(&pool->lock); + if (list_empty(&pool->lru)) { + spin_unlock(&pool->lock); + return -EINVAL; + } + + for (i = 0; i < retries; i++) { + struct size_class *class; + + zspage = list_last_entry(&pool->lru, struct zspage, lru); + list_del(&zspage->lru); + + /* zs_free may free objects, but not the zspage and handles */ + zspage->under_reclaim = true; + + class = zspage_class(pool, zspage); + fullness = get_fullness_group(class, zspage); + + /* Lock out object allocations and object compaction */ + remove_zspage(class, zspage, fullness); + + spin_unlock(&pool->lock); + cond_resched(); + + /* Lock backing pages into place */ + lock_zspage(zspage); + + obj_idx = 0; + page = get_first_page(zspage); + while (1) { + handle = find_alloced_obj(class, page, &obj_idx); + if (!handle) { + page = get_next_page(page); + if (!page) + break; + obj_idx = 0; + continue; + } + + /* + * This will write the object and call zs_free. + * + * zs_free will free the object, but the + * under_reclaim flag prevents it from freeing + * the zspage altogether. This is necessary so + * that we can continue working with the + * zspage potentially after the last object + * has been freed. + */ + ret = pool->zpool_ops->evict(pool->zpool, handle); + if (ret) + goto next; + + obj_idx++; + } + +next: + /* For freeing the zspage, or putting it back in the pool and LRU list. */ + spin_lock(&pool->lock); + zspage->under_reclaim = false; + + if (!get_zspage_inuse(zspage)) { + /* + * Fullness went stale as zs_free() won't touch it + * while the page is removed from the pool. Fix it + * up for the check in __free_zspage(). + */ + zspage->fullness = ZS_EMPTY; + + __free_zspage(pool, class, zspage); + spin_unlock(&pool->lock); + return 0; + } + + putback_zspage(class, zspage); + list_add(&zspage->lru, &pool->lru); + unlock_zspage(zspage); + } + + spin_unlock(&pool->lock); + return -EAGAIN; +} +#endif /* CONFIG_ZPOOL */ + static int __init zs_init(void) { int ret; From feeb9b26952367bc1171592ee476f95aa81ee588 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 28 Nov 2022 10:56:31 -0500 Subject: [PATCH 3697/4122] filemap: skip write and wait if end offset precedes start Patch series "filemap: skip write and wait if end offset precedes start", v2. A fix for the odd write and wait behavior described in the patch 1 commit log. Technically patch 1 could simply remove the check rather than lift it into the callers, but this seemed a bit more user friendly to me. Patch 2 is appended after observation that fadvise() interacted poorly with the v1 patch. This is no longer a problem with v2, making patch 2 purely a cleanup. This series survived both fstests and ltp regression runs without observable problems. I had (end < start) warning checks in each relevant function, with fadvise() being the only caller that triggered them. That said, I dropped the warnings after testing because there seemed to much potential for noise from the various other callers. This patch (of 2): A call to file[map]_write_and_wait_range() with an end offset that precedes the start offset but happens to land in the same page can trigger writeback submission but fails to wait on the submitted page. Writeback submission occurs because __filemap_fdatawrite_range() passes both offsets down into write_cache_pages(), which rounds down to page indexes before it starts processing writeback. However, __filemap_fdatawait_range() immediately returns if the byte-granular end offset precedes the start offset. This behavior was observed in the form of unpredictable latency from a frequent write and wait call with incorrect parameters. The behavior gave the impression that the fdatawait path might occasionally fail to wait on writeback, but further investigation showed the latency was from write_cache_pages() waiting on writeback state to clear for a page already under writeback. Therefore, this indicated that fdatawait actually never waits on writeback in this particular situation. The byte granular check in __filemap_fdatawait_range() goes all the way back to the old wait_on_page_writeback() helper. It originally used page offsets and so would have waited in this problematic case. That changed to byte granularity file offsets in commit 94004ed726f3 ("kill wait_on_page_writeback_range"), which subtly changed this behavior. The check itself has become somewhat redundant since the error checking code that used to follow the wait loop (at the time of the aforementioned commit) has now been removed and lifted into the higher level callers. Therefore, we can restore historical fdatawait behavior by simply removing the check. Since the current fdatawait behavior has been in place for quite some time and is consistent with other interfaces that use file offsets, instead lift the check into the file[map]_write_and_wait_range() helpers to provide consistent behavior between the write and wait. Link: https://lkml.kernel.org/r/20221128155632.3950447-1-bfoster@redhat.com Link: https://lkml.kernel.org/r/20221128155632.3950447-2-bfoster@redhat.com Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/filemap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 65eee6ec1066..242cd8bd8330 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -506,9 +506,6 @@ static void __filemap_fdatawait_range(struct address_space *mapping, struct pagevec pvec; int nr_pages; - if (end_byte < start_byte) - return; - pagevec_init(&pvec); while (index <= end) { unsigned i; @@ -670,6 +667,9 @@ int filemap_write_and_wait_range(struct address_space *mapping, { int err = 0, err2; + if (lend < lstart) + return 0; + if (mapping_needs_writeback(mapping)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); @@ -770,6 +770,9 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) int err = 0, err2; struct address_space *mapping = file->f_mapping; + if (lend < lstart) + return 0; + if (mapping_needs_writeback(mapping)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); From 3cd629e5775397103e0428f62ce64747741dbfe5 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 28 Nov 2022 10:56:32 -0500 Subject: [PATCH 3698/4122] mm/fadvise: use LLONG_MAX instead of -1 for eof generic_fadvise() sets endbyte = -1 to specify end of file (i.e. if length == 0 is passed from userspace). Most other callers to filemap_fdatawrite_range() use LLONG_MAX for this purpose, particularly if they also call fdatawait_range() (which requires end >= start). For example, sync_file_range(), vfs_fsync() (where the range is passed down through per-fs ->fsync() callbacks), filemap_flush(), etc. generic_fadvise() does not currently wait on writeback, but fix the call up to be consistent with other callers. Link: https://lkml.kernel.org/r/20221128155632.3950447-3-bfoster@redhat.com Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/fadvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/fadvise.c b/mm/fadvise.c index c76ee665355a..bf04fec87f35 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -72,7 +72,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) */ endbyte = (u64)offset + (u64)len; if (!len || endbyte < len) - endbyte = -1; + endbyte = LLONG_MAX; else endbyte--; /* inclusive */ From d3a89233583bf8edab18ac09732759c71dbe0173 Mon Sep 17 00:00:00 2001 From: zhang songyi Date: Mon, 28 Nov 2022 21:07:43 +0800 Subject: [PATCH 3699/4122] include/linux/pgtable.h: : remove redundant pte variable Return value from ptep_get_and_clear_full() directly instead of taking this in another redundant variable. Link: https://lkml.kernel.org/r/202211282107437343474@zte.com.cn Signed-off-by: zhang songyi Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c74cce67eec8..dfabd549d2e7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -425,9 +425,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, int full) { - pte_t pte; - pte = ptep_get_and_clear(mm, address, ptep); - return pte; + return ptep_get_and_clear(mm, address, ptep); } #endif From 1e8e4a7cc2fa3017b1daf02612e095d51924ce1e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 28 Nov 2022 10:45:40 +0100 Subject: [PATCH 3700/4122] lockdep: allow instrumenting lockdep.c with KMSAN Lockdep and KMSAN used to play badly together, causing deadlocks when KMSAN instrumentation of lockdep.c called lockdep functions recursively. Looks like this is no more the case, and a kernel can run (yet slower) with both KMSAN and lockdep enabled. This patch should fix false positives on wq_head->lock->dep_map, which KMSAN used to consider uninitialized because of lockdep.c not being instrumented. Link: https://lore.kernel.org/lkml/Y3b9AAEKp2Vr3e6O@sol.localdomain/ Link: https://lkml.kernel.org/r/20221128094541.2645890-1-glider@google.com Signed-off-by: Alexander Potapenko Reported-by: Eric Biggers Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Marco Elver Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Andrew Morton --- kernel/locking/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index ea925731fa40..0db4093d17b8 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -7,7 +7,6 @@ obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o # Avoid recursion lockdep -> sanitizer -> ... -> lockdep. KCSAN_SANITIZE_lockdep.o := n -KMSAN_SANITIZE_lockdep.o := n ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) From 85716a80c16dd6b6d1aaed87cd4b91c9b1d9b9b2 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 28 Nov 2022 10:45:41 +0100 Subject: [PATCH 3701/4122] kmsan: allow using __msan_instrument_asm_store() inside runtime In certain cases (e.g. when handling a softirq) __msan_instrument_asm_store(&var, sizeof(var)) may be called with from within KMSAN runtime, but later the value of @var is used with !kmsan_in_runtime(), leading to false positives. Because kmsan_internal_unpoison_memory() doesn't take locks, it should be fine to call it without kmsan_in_runtime() checks, which fixes the mentioned false positives. Link: https://lkml.kernel.org/r/20221128094541.2645890-2-glider@google.com Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Ingo Molnar Cc: Marco Elver Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/kmsan/instrumentation.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c index 271f135f97a1..770fe02904f3 100644 --- a/mm/kmsan/instrumentation.c +++ b/mm/kmsan/instrumentation.c @@ -81,12 +81,16 @@ DECLARE_METADATA_PTR_GETTER(8); * Handle a memory store performed by inline assembly. KMSAN conservatively * attempts to unpoison the outputs of asm() directives to prevent false * positives caused by missed stores. + * + * __msan_instrument_asm_store() may be called for inline assembly code when + * entering or leaving IRQ. We omit the check for kmsan_in_runtime() to ensure + * the memory written to in these cases is also marked as initialized. */ void __msan_instrument_asm_store(void *addr, uintptr_t size) { unsigned long ua_flags; - if (!kmsan_enabled || kmsan_in_runtime()) + if (!kmsan_enabled) return; ua_flags = user_access_save(); @@ -103,10 +107,8 @@ void __msan_instrument_asm_store(void *addr, uintptr_t size) user_access_restore(ua_flags); return; } - kmsan_enter_runtime(); /* Unpoisoning the memory on best effort. */ kmsan_internal_unpoison_memory(addr, size, /*checked*/ false); - kmsan_leave_runtime(); user_access_restore(ua_flags); } EXPORT_SYMBOL(__msan_instrument_asm_store); From 22c4e80466eb88cff283ed50a5d0b0ff1654d0c3 Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Thu, 27 Oct 2022 20:52:50 +0800 Subject: [PATCH 3702/4122] MIPS&LoongArch&NIOS2: adjust prototypes of p?d_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm/sparse-vmemmap: Generalise helpers and enable for LoongArch", v14. This series is in order to enable sparse-vmemmap for LoongArch. But LoongArch cannot use generic helpers directly because MIPS&LoongArch need to call pgd_init()/pud_init()/pmd_init() when populating page tables. So we adjust the prototypes of p?d_init() to make generic helpers can call them, then enable sparse-vmemmap with generic helpers, and to be further, generalise vmemmap_populate_hugepages() for ARM64, X86 and LoongArch. This patch (of 4): We are preparing to add sparse vmemmap support to LoongArch. MIPS and LoongArch need to call pgd_init()/pud_init()/pmd_init() when populating page tables, so adjust their prototypes to make generic helpers can call them. NIOS2 declares pmd_init() but doesn't use, just remove it to avoid build errors. Link: https://lkml.kernel.org/r/20221027125253.3458989-1-chenhuacai@loongson.cn Link: https://lkml.kernel.org/r/20221027125253.3458989-2-chenhuacai@loongson.cn Signed-off-by: Feiyang Chen Signed-off-by: Huacai Chen Reviewed-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Arnd Bergmann Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Dave Hansen Cc: Dinh Nguyen Cc: Guo Ren Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Cc: Will Deacon Cc: Xuefeng Li Cc: Xuerui Wang Cc: Min Zhou Cc: Muchun Song Signed-off-by: Andrew Morton --- arch/loongarch/include/asm/pgalloc.h | 13 ++----------- arch/loongarch/include/asm/pgtable.h | 8 ++++---- arch/loongarch/kernel/numa.c | 4 ++-- arch/loongarch/mm/pgtable.c | 23 +++++++++++++---------- arch/mips/include/asm/pgalloc.h | 10 +++++----- arch/mips/include/asm/pgtable-64.h | 8 ++++---- arch/mips/kvm/mmu.c | 3 +-- arch/mips/mm/pgtable-32.c | 9 ++++----- arch/mips/mm/pgtable-64.c | 18 ++++++++++-------- arch/mips/mm/pgtable.c | 2 +- arch/nios2/include/asm/pgalloc.h | 5 ----- 11 files changed, 46 insertions(+), 57 deletions(-) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 4bfeb3c9c9ac..af1d1e4a6965 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -42,15 +42,6 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) extern void pagetable_init(void); -/* - * Initialize a new pmd table with invalid pointers. - */ -extern void pmd_init(unsigned long page, unsigned long pagetable); - -/* - * Initialize a new pgd / pmd table with invalid pointers. - */ -extern void pgd_init(unsigned long page); extern pgd_t *pgd_alloc(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, address) \ @@ -76,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) } pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pmd_init(pmd); return pmd; } @@ -92,7 +83,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) pud = (pud_t *) __get_free_page(GFP_KERNEL); if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + pud_init(pud); return pud; } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index b8d837ee6910..9e6651846db9 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -237,11 +237,11 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm #define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) /* - * Initialize a new pgd / pmd table with invalid pointers. + * Initialize a new pgd / pud / pmd table with invalid pointers. */ -extern void pgd_init(unsigned long page); -extern void pud_init(unsigned long page, unsigned long pagetable); -extern void pmd_init(unsigned long page, unsigned long pagetable); +extern void pgd_init(void *addr); +extern void pud_init(void *addr); +extern void pmd_init(void *addr); /* * Non-present pages: high 40 bits are offset, next 8 bits type, diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a13f92593cfd..eb5d3a4c8a7a 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -78,7 +78,7 @@ void __init pcpu_populate_pte(unsigned long addr) new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); pgd_populate(&init_mm, pgd, new); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); + pud_init(new); #endif } @@ -89,7 +89,7 @@ void __init pcpu_populate_pte(unsigned long addr) new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); pud_populate(&init_mm, pud, new); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); + pmd_init(new); #endif } diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index ee179ccd3e3f..36a6dc0148ae 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -16,7 +16,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) ret = (pgd_t *) __get_free_page(GFP_KERNEL); if (ret) { init = pgd_offset(&init_mm, 0UL); - pgd_init((unsigned long)ret); + pgd_init(ret); memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } @@ -25,7 +25,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(pgd_alloc); -void pgd_init(unsigned long page) +void pgd_init(void *addr) { unsigned long *p, *end; unsigned long entry; @@ -38,7 +38,7 @@ void pgd_init(unsigned long page) entry = (unsigned long)invalid_pte_table; #endif - p = (unsigned long *) page; + p = (unsigned long *)addr; end = p + PTRS_PER_PGD; do { @@ -56,11 +56,12 @@ void pgd_init(unsigned long page) EXPORT_SYMBOL_GPL(pgd_init); #ifndef __PAGETABLE_PMD_FOLDED -void pmd_init(unsigned long addr, unsigned long pagetable) +void pmd_init(void *addr) { unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pte_table; - p = (unsigned long *) addr; + p = (unsigned long *)addr; end = p + PTRS_PER_PMD; do { @@ -79,9 +80,10 @@ EXPORT_SYMBOL_GPL(pmd_init); #endif #ifndef __PAGETABLE_PUD_FOLDED -void pud_init(unsigned long addr, unsigned long pagetable) +void pud_init(void *addr) { unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pmd_table; p = (unsigned long *)addr; end = p + PTRS_PER_PUD; @@ -98,6 +100,7 @@ void pud_init(unsigned long addr, unsigned long pagetable) p[-1] = pagetable; } while (p != end); } +EXPORT_SYMBOL_GPL(pud_init); #endif pmd_t mk_pmd(struct page *page, pgprot_t prot) @@ -119,12 +122,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, void __init pagetable_init(void) { /* Initialize the entire pgd. */ - pgd_init((unsigned long)swapper_pg_dir); - pgd_init((unsigned long)invalid_pg_dir); + pgd_init(swapper_pg_dir); + pgd_init(invalid_pg_dir); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table); + pud_init(invalid_pud_table); #endif #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); + pmd_init(invalid_pmd_table); #endif } diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 796035784c73..f72e737dda21 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -33,7 +33,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, /* * Initialize a new pmd table with invalid pointers. */ -extern void pmd_init(unsigned long page, unsigned long pagetable); +extern void pmd_init(void *addr); #ifndef __PAGETABLE_PMD_FOLDED @@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) #endif /* - * Initialize a new pgd / pmd table with invalid pointers. + * Initialize a new pgd table with invalid pointers. */ -extern void pgd_init(unsigned long page); +extern void pgd_init(void *addr); extern pgd_t *pgd_alloc(struct mm_struct *mm); static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -77,7 +77,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) } pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pmd_init(pmd); return pmd; } @@ -93,7 +93,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER); if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + pud_init(pud); return pud; } diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 436c29d698fa..c6310192b654 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -313,11 +313,11 @@ static inline pmd_t *pud_pgtable(pud_t pud) #endif /* - * Initialize a new pgd / pmd table with invalid pointers. + * Initialize a new pgd / pud / pmd table with invalid pointers. */ -extern void pgd_init(unsigned long page); -extern void pud_init(unsigned long page, unsigned long pagetable); -extern void pmd_init(unsigned long page, unsigned long pagetable); +extern void pgd_init(void *addr); +extern void pud_init(void *addr); +extern void pmd_init(void *addr); /* * Non-present pages: high 40 bits are offset, next 8 bits type, diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 74cd64a24d05..e8c08988ed37 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -122,8 +122,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, if (!cache) return NULL; new_pmd = kvm_mmu_memory_cache_alloc(cache); - pmd_init((unsigned long)new_pmd, - (unsigned long)invalid_pte_table); + pmd_init(new_pmd); pud_populate(NULL, pud, new_pmd); } pmd = pmd_offset(pud, addr); diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index 61891af25019..f57fb69472f8 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -13,9 +13,9 @@ #include #include -void pgd_init(unsigned long page) +void pgd_init(void *addr) { - unsigned long *p = (unsigned long *) page; + unsigned long *p = (unsigned long *)addr; int i; for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { @@ -61,9 +61,8 @@ void __init pagetable_init(void) #endif /* Initialize the entire pgd. */ - pgd_init((unsigned long)swapper_pg_dir); - pgd_init((unsigned long)swapper_pg_dir - + sizeof(pgd_t) * USER_PTRS_PER_PGD); + pgd_init(swapper_pg_dir); + pgd_init(&swapper_pg_dir[USER_PTRS_PER_PGD]); pgd_base = swapper_pg_dir; diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index 7536f7804c44..b4386a0e2ef8 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -13,7 +13,7 @@ #include #include -void pgd_init(unsigned long page) +void pgd_init(void *addr) { unsigned long *p, *end; unsigned long entry; @@ -26,7 +26,7 @@ void pgd_init(unsigned long page) entry = (unsigned long)invalid_pte_table; #endif - p = (unsigned long *) page; + p = (unsigned long *) addr; end = p + PTRS_PER_PGD; do { @@ -43,11 +43,12 @@ void pgd_init(unsigned long page) } #ifndef __PAGETABLE_PMD_FOLDED -void pmd_init(unsigned long addr, unsigned long pagetable) +void pmd_init(void *addr) { unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pte_table; - p = (unsigned long *) addr; + p = (unsigned long *)addr; end = p + PTRS_PER_PMD; do { @@ -66,9 +67,10 @@ EXPORT_SYMBOL_GPL(pmd_init); #endif #ifndef __PAGETABLE_PUD_FOLDED -void pud_init(unsigned long addr, unsigned long pagetable) +void pud_init(void *addr) { unsigned long *p, *end; + unsigned long pagetable = (unsigned long)invalid_pmd_table; p = (unsigned long *)addr; end = p + PTRS_PER_PUD; @@ -108,12 +110,12 @@ void __init pagetable_init(void) pgd_t *pgd_base; /* Initialize the entire pgd. */ - pgd_init((unsigned long)swapper_pg_dir); + pgd_init(swapper_pg_dir); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table); + pud_init(invalid_pud_table); #endif #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); + pmd_init(invalid_pmd_table); #endif pgd_base = swapper_pg_dir; /* diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c index 3b7590660a04..b13314be5d0e 100644 --- a/arch/mips/mm/pgtable.c +++ b/arch/mips/mm/pgtable.c @@ -15,7 +15,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER); if (ret) { init = pgd_offset(&init_mm, 0UL); - pgd_init((unsigned long)ret); + pgd_init(ret); memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 3c4ae74d5798..ecd1657bb2ce 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -26,11 +26,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -/* - * Initialize a new pmd table with invalid pointers. - */ -extern void pmd_init(unsigned long page, unsigned long pagetable); - extern pgd_t *pgd_alloc(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) \ From 7b09f5af01ede480cbe7abcb281cf17550a46ff5 Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Thu, 27 Oct 2022 20:52:51 +0800 Subject: [PATCH 3703/4122] LoongArch: add sparse memory vmemmap support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn operations. This is the most efficient option when sufficient kernel resources are available. Link: https://lkml.kernel.org/r/20221027125253.3458989-3-chenhuacai@loongson.cn Signed-off-by: Min Zhou Signed-off-by: Feiyang Chen Signed-off-by: Huacai Chen Reviewed-by: Arnd Bergmann Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Dave Hansen Cc: Dinh Nguyen Cc: Guo Ren Cc: Jiaxun Yang Cc: Peter Zijlstra Cc: Philippe Mathieu-Daudé Cc: Thomas Bogendoerfer Cc: Will Deacon Cc: Xuefeng Li Cc: Xuerui Wang Cc: Muchun Song Signed-off-by: Andrew Morton --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/pgtable.h | 7 ++- arch/loongarch/include/asm/sparsemem.h | 8 +++ arch/loongarch/mm/init.c | 72 ++++++++++++++++++++++++-- include/linux/mm.h | 2 + mm/sparse-vmemmap.c | 10 ++++ 6 files changed, 96 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 903096bd87f8..6f7fa0c0ca08 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -487,6 +487,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y + select SPARSEMEM_VMEMMAP_ENABLE help Say Y to support efficient handling of sparse physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 9e6651846db9..022ec6be3602 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -11,6 +11,7 @@ #include #include +#include #include #if CONFIG_PGTABLE_LEVELS == 2 @@ -59,6 +60,7 @@ #include #include #include +#include struct mm_struct; struct vm_area_struct; @@ -86,7 +88,10 @@ extern unsigned long zero_page_mask; #define VMALLOC_START MODULES_END #define VMALLOC_END \ (vm_map_base + \ - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE) + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) + +#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) +#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h index 3d18cdf1b069..8d4af6aff8a8 100644 --- a/arch/loongarch/include/asm/sparsemem.h +++ b/arch/loongarch/include/asm/sparsemem.h @@ -11,8 +11,16 @@ #define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ #define MAX_PHYSMEM_BITS 48 +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT))) +#endif + #endif /* CONFIG_SPARSEMEM */ +#ifndef VMEMMAP_SIZE +#define VMEMMAP_SIZE 0 /* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */ +#endif + #ifdef CONFIG_MEMORY_HOTPLUG int memory_add_physaddr_to_nid(u64 addr); #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 080061793c85..451d93667bcc 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -152,6 +152,72 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ + unsigned long addr = start; + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = NULL; + + p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL); + if (p) { + pmd_t entry; + + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; + set_pmd_at(&init_mm, addr, pmd, entry); + + continue; + } + } else if (pmd_val(*pmd) & _PAGE_HUGE) { + vmemmap_verify((pte_t *)pmd, node, addr, next); + continue; + } + if (vmemmap_populate_basepages(addr, next, node, NULL)) + return -ENOMEM; + } + + return 0; +} + +int __meminit vmemmap_populate(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ +#if CONFIG_PGTABLE_LEVELS == 2 + return vmemmap_populate_basepages(start, end, node, NULL); +#else + return vmemmap_populate_hugepages(start, end, node, NULL); +#endif +} + +#ifdef CONFIG_MEMORY_HOTPLUG +void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) +{ +} +#endif +#endif + static pte_t *fixmap_pte(unsigned long addr) { pgd_t *pgd; @@ -168,7 +234,7 @@ static pte_t *fixmap_pte(unsigned long addr) new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); pgd_populate(&init_mm, pgd, new); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); + pud_init(new); #endif } @@ -179,7 +245,7 @@ static pte_t *fixmap_pte(unsigned long addr) new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); pud_populate(&init_mm, pud, new); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); + pmd_init(new); #endif } diff --git a/include/linux/mm.h b/include/linux/mm.h index 767c8c522e70..7c31f898337c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3360,6 +3360,8 @@ void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); +void pmd_init(void *addr); +void pud_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 46ae542118c0..797b30e9050c 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -196,6 +196,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) return pmd; } +void __weak __meminit pmd_init(void *addr) +{ +} + pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { pud_t *pud = pud_offset(p4d, addr); @@ -203,11 +207,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; + pmd_init(p); pud_populate(&init_mm, pud, p); } return pud; } +void __weak __meminit pud_init(void *addr) +{ +} + p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) { p4d_t *p4d = p4d_offset(pgd, addr); @@ -215,6 +224,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; + pud_init(p); p4d_populate(&init_mm, p4d, p); } return p4d; From 2045a3b8911b6ee64dd9b522d61abc468ecdcdb5 Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Thu, 27 Oct 2022 20:52:52 +0800 Subject: [PATCH 3704/4122] mm/sparse-vmemmap: generalise vmemmap_populate_hugepages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can share its implementation. Link: https://lkml.kernel.org/r/20221027125253.3458989-4-chenhuacai@loongson.cn Signed-off-by: Feiyang Chen Signed-off-by: Huacai Chen Acked-by: Will Deacon Acked-by: Dave Hansen Reviewed-by: Arnd Bergmann Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Dinh Nguyen Cc: Guo Ren Cc: Jiaxun Yang Cc: Min Zhou Cc: Peter Zijlstra Cc: Philippe Mathieu-Daudé Cc: Thomas Bogendoerfer Cc: Xuefeng Li Cc: Xuerui Wang Cc: Muchun Song Signed-off-by: Andrew Morton --- arch/arm64/mm/mmu.c | 55 +++++++---------------- arch/loongarch/mm/init.c | 55 ++++++----------------- arch/x86/mm/init_64.c | 96 ++++++++++++++-------------------------- include/linux/mm.h | 6 +++ mm/sparse-vmemmap.c | 63 ++++++++++++++++++++++++++ 5 files changed, 132 insertions(+), 143 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 556154d821bf..27217ba12e57 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1137,53 +1137,28 @@ static void free_empty_tables(unsigned long addr, unsigned long end, } #endif +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node, + unsigned long addr, unsigned long next) +{ + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, + unsigned long addr, unsigned long next) +{ + vmemmap_verify((pte_t *)pmdp, node, addr, next); + return 1; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - unsigned long addr = start; - unsigned long next; - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); if (!ARM64_KERNEL_USES_PMD_MAPS) return vmemmap_populate_basepages(start, end, node, altmap); - - do { - next = pmd_addr_end(addr, end); - - pgdp = vmemmap_pgd_populate(addr, node); - if (!pgdp) - return -ENOMEM; - - p4dp = vmemmap_p4d_populate(pgdp, addr, node); - if (!p4dp) - return -ENOMEM; - - pudp = vmemmap_pud_populate(p4dp, addr, node); - if (!pudp) - return -ENOMEM; - - pmdp = pmd_offset(pudp, addr); - if (pmd_none(READ_ONCE(*pmdp))) { - void *p = NULL; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); - if (!p) { - if (vmemmap_populate_basepages(addr, next, node, altmap)) - return -ENOMEM; - continue; - } - - pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); - } else - vmemmap_verify((pte_t *)pmdp, node, addr, next); - } while (addr = next, addr != end); - - return 0; + else + return vmemmap_populate_hugepages(start, end, node, altmap); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 451d93667bcc..e018aed34586 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -153,52 +153,25 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, - int node, struct vmem_altmap *altmap) +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) { - unsigned long addr = start; - unsigned long next; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; + pmd_t entry; - for (addr = start; addr < end; addr = next) { - next = pmd_addr_end(addr, end); + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; + set_pmd_at(&init_mm, addr, pmd, entry); +} - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) - return -ENOMEM; - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) - return -ENOMEM; +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, + unsigned long addr, unsigned long next) +{ + int huge = pmd_val(*pmd) & _PAGE_HUGE; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - void *p = NULL; + if (huge) + vmemmap_verify((pte_t *)pmd, node, addr, next); - p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL); - if (p) { - pmd_t entry; - - entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); - pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; - set_pmd_at(&init_mm, addr, pmd, entry); - - continue; - } - } else if (pmd_val(*pmd) & _PAGE_HUGE) { - vmemmap_verify((pte_t *)pmd, node, addr, next); - continue; - } - if (vmemmap_populate_basepages(addr, next, node, NULL)) - return -ENOMEM; - } - - return 0; + return huge; } int __meminit vmemmap_populate(unsigned long start, unsigned long end, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e8db4edd7cc9..a190aae8ceaf 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1492,72 +1492,44 @@ static long __meminitdata addr_start, addr_end; static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; -static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node, struct vmem_altmap *altmap) +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) { - unsigned long addr; - unsigned long next; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; + pte_t entry; - for (addr = start; addr < end; addr = next) { - next = pmd_addr_end(addr, end); + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, + PAGE_KERNEL_LARGE); + set_pmd(pmd, __pmd(pte_val(entry))); - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) - return -ENOMEM; - - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) - return -ENOMEM; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - void *p; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); - if (p) { - pte_t entry; - - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE); - set_pmd(pmd, __pmd(pte_val(entry))); - - /* check to see if we have contiguous blocks */ - if (p_end != p || node_start != node) { - if (p_start) - pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", - addr_start, addr_end-1, p_start, p_end-1, node_start); - addr_start = addr; - node_start = node; - p_start = p; - } - - addr_end = addr + PMD_SIZE; - p_end = p + PMD_SIZE; - - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) - vmemmap_use_new_sub_pmd(addr, next); - - continue; - } else if (altmap) - return -ENOMEM; /* no fallback */ - } else if (pmd_large(*pmd)) { - vmemmap_verify((pte_t *)pmd, node, addr, next); - vmemmap_use_sub_pmd(addr, next); - continue; - } - if (vmemmap_populate_basepages(addr, next, node, NULL)) - return -ENOMEM; + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; } - return 0; + + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; + + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) + vmemmap_use_new_sub_pmd(addr, next); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, + unsigned long addr, unsigned long next) +{ + int large = pmd_large(*pmd); + + if (pmd_large(*pmd)) { + vmemmap_verify((pte_t *)pmd, node, addr, next); + vmemmap_use_sub_pmd(addr, next); + } + + return large; } int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7c31f898337c..472cb60ace07 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3373,8 +3373,14 @@ struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); +void vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next); +int vmemmap_check_pmd(pmd_t *pmd, int node, + unsigned long addr, unsigned long next); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 797b30e9050c..c5398a5960d0 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -295,6 +295,69 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, return vmemmap_populate_range(start, end, node, altmap, NULL); } +void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) +{ +} + +int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, + unsigned long addr, unsigned long next) +{ + return 0; +} + +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ + unsigned long addr; + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + + pud = vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(READ_ONCE(*pmd))) { + void *p; + + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (p) { + vmemmap_set_pmd(pmd, p, node, addr, next); + continue; + } else if (altmap) { + /* + * No fallback: In any case we care about, the + * altmap should be reasonably sized and aligned + * such that vmemmap_alloc_block_buf() will always + * succeed. For consistency with the PTE case, + * return an error here as failure could indicate + * a configuration issue with the size of the altmap. + */ + return -ENOMEM; + } + } else if (vmemmap_check_pmd(pmd, node, addr, next)) + continue; + if (vmemmap_populate_basepages(addr, next, node, altmap)) + return -ENOMEM; + } + return 0; +} + /* * For compound pages bigger than section size (e.g. x86 1G compound * pages with 2M subsection size) fill the rest of sections as tail From c5a303a51b9ca85b52250fd8d92bf4918fbbdf0d Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Thu, 27 Oct 2022 20:52:53 +0800 Subject: [PATCH 3705/4122] LoongArch: enable ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The feature of minimizing overhead of struct page associated with each HugeTLB page is implemented on x86_64. However, the infrastructure of this feature is already there, so just select ARCH_WANT_HUGETLB_PAGE_ OPTIMIZE_VMEMMAP is enough to enable this feature for LoongArch. Link: https://lkml.kernel.org/r/20221027125253.3458989-5-chenhuacai@loongson.cn Signed-off-by: Feiyang Chen Signed-off-by: Huacai Chen Reviewed-by: Philippe Mathieu-Daudé Acked-by: Muchun Song Reviewed-by: Arnd Bergmann Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Dave Hansen Cc: Dinh Nguyen Cc: Guo Ren Cc: Jiaxun Yang Cc: Min Zhou Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Cc: Will Deacon Cc: Xuefeng Li Cc: Xuerui Wang Signed-off-by: Andrew Morton --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6f7fa0c0ca08..0a6ef613124c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -52,6 +52,7 @@ config LOONGARCH select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT From 3720dd6dcac38d03424d6ba38107f39af5318bcf Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Tue, 1 Nov 2022 10:53:22 -0700 Subject: [PATCH 3706/4122] filemap: convert replace_page_cache_page() to replace_page_cache_folio() Patch series "Removing the lru_cache_add() wrapper". This patchset replaces all calls of lru_cache_add() with the folio equivalent: folio_add_lru(). This is allows us to get rid of the wrapper The series passes xfstests and the userfaultfd selftests. This patch (of 5): Eliminates 7 calls to compound_head(). Link: https://lkml.kernel.org/r/20221101175326.13265-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20221101175326.13265-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Miklos Szeredi Signed-off-by: Andrew Morton --- fs/fuse/dev.c | 2 +- include/linux/pagemap.h | 2 +- mm/filemap.c | 50 ++++++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b4a6e0a1b945..26817a2db463 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -837,7 +837,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (WARN_ON(PageMlocked(oldpage))) goto out_fallback_unlock; - replace_page_cache_page(oldpage, newpage); + replace_page_cache_folio(page_folio(oldpage), page_folio(newpage)); get_page(newpage); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2ec0ca1f3d38..29e1f9e76eb6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1102,7 +1102,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); void filemap_remove_folio(struct folio *folio); void __filemap_remove_folio(struct folio *folio, void *shadow); -void replace_page_cache_page(struct page *old, struct page *new); +void replace_page_cache_folio(struct folio *old, struct folio *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); bool filemap_release_folio(struct folio *folio, gfp_t gfp); diff --git a/mm/filemap.c b/mm/filemap.c index 242cd8bd8330..c4d4ace9cc70 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -788,56 +788,54 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) EXPORT_SYMBOL(file_write_and_wait_range); /** - * replace_page_cache_page - replace a pagecache page with a new one - * @old: page to be replaced - * @new: page to replace with + * replace_page_cache_folio - replace a pagecache folio with a new one + * @old: folio to be replaced + * @new: folio to replace with * - * This function replaces a page in the pagecache with a new one. On - * success it acquires the pagecache reference for the new page and - * drops it for the old page. Both the old and new pages must be - * locked. This function does not add the new page to the LRU, the + * This function replaces a folio in the pagecache with a new one. On + * success it acquires the pagecache reference for the new folio and + * drops it for the old folio. Both the old and new folios must be + * locked. This function does not add the new folio to the LRU, the * caller must do that. * * The remove + add is atomic. This function cannot fail. */ -void replace_page_cache_page(struct page *old, struct page *new) +void replace_page_cache_folio(struct folio *old, struct folio *new) { - struct folio *fold = page_folio(old); - struct folio *fnew = page_folio(new); struct address_space *mapping = old->mapping; void (*free_folio)(struct folio *) = mapping->a_ops->free_folio; pgoff_t offset = old->index; XA_STATE(xas, &mapping->i_pages, offset); - VM_BUG_ON_PAGE(!PageLocked(old), old); - VM_BUG_ON_PAGE(!PageLocked(new), new); - VM_BUG_ON_PAGE(new->mapping, new); + VM_BUG_ON_FOLIO(!folio_test_locked(old), old); + VM_BUG_ON_FOLIO(!folio_test_locked(new), new); + VM_BUG_ON_FOLIO(new->mapping, new); - get_page(new); + folio_get(new); new->mapping = mapping; new->index = offset; - mem_cgroup_migrate(fold, fnew); + mem_cgroup_migrate(old, new); xas_lock_irq(&xas); xas_store(&xas, new); old->mapping = NULL; /* hugetlb pages do not participate in page cache accounting. */ - if (!PageHuge(old)) - __dec_lruvec_page_state(old, NR_FILE_PAGES); - if (!PageHuge(new)) - __inc_lruvec_page_state(new, NR_FILE_PAGES); - if (PageSwapBacked(old)) - __dec_lruvec_page_state(old, NR_SHMEM); - if (PageSwapBacked(new)) - __inc_lruvec_page_state(new, NR_SHMEM); + if (!folio_test_hugetlb(old)) + __lruvec_stat_sub_folio(old, NR_FILE_PAGES); + if (!folio_test_hugetlb(new)) + __lruvec_stat_add_folio(new, NR_FILE_PAGES); + if (folio_test_swapbacked(old)) + __lruvec_stat_sub_folio(old, NR_SHMEM); + if (folio_test_swapbacked(new)) + __lruvec_stat_add_folio(new, NR_SHMEM); xas_unlock_irq(&xas); if (free_folio) - free_folio(fold); - folio_put(fold); + free_folio(old); + folio_put(old); } -EXPORT_SYMBOL_GPL(replace_page_cache_page); +EXPORT_SYMBOL_GPL(replace_page_cache_folio); noinline int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) From 063aaad792eef49a11d7575dc9914b43c0fa3792 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Tue, 1 Nov 2022 10:53:23 -0700 Subject: [PATCH 3707/4122] fuse: convert fuse_try_move_page() to use folios Converts the function to try to move folios instead of pages. Also converts fuse_check_page() to fuse_get_folio() since this is its only caller. This change removes 15 calls to compound_head(). Link: https://lkml.kernel.org/r/20221101175326.13265-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Acked-by: Miklos Szeredi Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Signed-off-by: Andrew Morton --- fs/fuse/dev.c | 55 ++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 26817a2db463..204c332cd343 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) return ncpy; } -static int fuse_check_page(struct page *page) +static int fuse_check_folio(struct folio *folio) { - if (page_mapcount(page) || - page->mapping != NULL || - (page->flags & PAGE_FLAGS_CHECK_AT_PREP & + if (folio_mapped(folio) || + folio->mapping != NULL || + (folio->flags & PAGE_FLAGS_CHECK_AT_PREP & ~(1 << PG_locked | 1 << PG_referenced | 1 << PG_uptodate | @@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page) 1 << PG_reclaim | 1 << PG_waiters | LRU_GEN_MASK | LRU_REFS_MASK))) { - dump_page(page, "fuse: trying to steal weird page"); + dump_page(&folio->page, "fuse: trying to steal weird page"); return 1; } return 0; @@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page) static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) { int err; - struct page *oldpage = *pagep; - struct page *newpage; + struct folio *oldfolio = page_folio(*pagep); + struct folio *newfolio; struct pipe_buffer *buf = cs->pipebufs; - get_page(oldpage); + folio_get(oldfolio); err = unlock_request(cs->req); if (err) goto out_put_old; @@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (!pipe_buf_try_steal(cs->pipe, buf)) goto out_fallback; - newpage = buf->page; + newfolio = page_folio(buf->page); - if (!PageUptodate(newpage)) - SetPageUptodate(newpage); + if (!folio_test_uptodate(newfolio)) + folio_mark_uptodate(newfolio); - ClearPageMappedToDisk(newpage); + folio_clear_mappedtodisk(newfolio); - if (fuse_check_page(newpage) != 0) + if (fuse_check_folio(newfolio) != 0) goto out_fallback_unlock; /* * This is a new and locked page, it shouldn't be mapped or * have any special flags on it */ - if (WARN_ON(page_mapped(oldpage))) + if (WARN_ON(folio_mapped(oldfolio))) goto out_fallback_unlock; - if (WARN_ON(page_has_private(oldpage))) + if (WARN_ON(folio_has_private(oldfolio))) goto out_fallback_unlock; - if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage))) + if (WARN_ON(folio_test_dirty(oldfolio) || + folio_test_writeback(oldfolio))) goto out_fallback_unlock; - if (WARN_ON(PageMlocked(oldpage))) + if (WARN_ON(folio_test_mlocked(oldfolio))) goto out_fallback_unlock; - replace_page_cache_folio(page_folio(oldpage), page_folio(newpage)); + replace_page_cache_folio(oldfolio, newfolio); - get_page(newpage); + folio_get(newfolio); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) - lru_cache_add(newpage); + folio_add_lru(newfolio); /* * Release while we have extra ref on stolen page. Otherwise @@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else - *pagep = newpage; + *pagep = &newfolio->page; spin_unlock(&cs->req->waitq.lock); if (err) { - unlock_page(newpage); - put_page(newpage); + folio_unlock(newfolio); + folio_put(newfolio); goto out_put_old; } - unlock_page(oldpage); + folio_unlock(oldfolio); /* Drop ref for ap->pages[] array */ - put_page(oldpage); + folio_put(oldfolio); cs->len = 0; err = 0; out_put_old: /* Drop ref obtained in this function */ - put_page(oldpage); + folio_put(oldfolio); return err; out_fallback_unlock: - unlock_page(newpage); + folio_unlock(newfolio); out_fallback: cs->pg = buf->page; cs->offset = buf->offset; From 28965f0f8be62e1ed8296fe0240b5d5dc064b681 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Tue, 1 Nov 2022 10:53:24 -0700 Subject: [PATCH 3708/4122] userfaultfd: replace lru_cache functions with folio_add functions Replaces lru_cache_add() and lru_cache_add_inactive_or_unevictable() with folio_add_lru() and folio_add_lru_vma(). This is in preparation for the removal of lru_cache_add(). Link: https://lkml.kernel.org/r/20221101175326.13265-4-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Miklos Szeredi Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 650ab6cfd5f4..b7a9479bece2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -66,6 +66,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, bool vm_shared = dst_vma->vm_flags & VM_SHARED; bool page_in_cache = page_mapping(page); spinlock_t *ptl; + struct folio *folio; struct inode *inode; pgoff_t offset, max_off; @@ -113,14 +114,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, if (!pte_none_mostly(*dst_pte)) goto out_unlock; + folio = page_folio(page); if (page_in_cache) { /* Usually, cache pages are already added to LRU */ if (newly_allocated) - lru_cache_add(page); + folio_add_lru(folio); page_add_file_rmap(page, dst_vma, false); } else { page_add_new_anon_rmap(page, dst_vma, dst_addr); - lru_cache_add_inactive_or_unevictable(page, dst_vma); + folio_add_lru_vma(folio, dst_vma); } /* From 284a344ed19dc92526024d062b30f90774fea50f Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Tue, 1 Nov 2022 10:53:25 -0700 Subject: [PATCH 3709/4122] khugepage: replace lru_cache_add() with folio_add_lru() Replaces some calls with their folio equivalents. This is in preparation for the removal of lru_cache_add(). This replaces 3 calls to compound_head() with 1. Link: https://lkml.kernel.org/r/20221101175326.13265-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Miklos Szeredi Signed-off-by: Andrew Morton --- mm/khugepaged.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 78ec2771cc65..5a7d2d5093f9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2013,6 +2013,7 @@ xa_unlocked: if (result == SCAN_SUCCEED) { struct page *page, *tmp; + struct folio *folio; /* * Replacing old pages with new one has succeeded, now we @@ -2040,11 +2041,13 @@ xa_unlocked: index++; } - SetPageUptodate(hpage); - page_ref_add(hpage, HPAGE_PMD_NR - 1); + folio = page_folio(hpage); + folio_mark_uptodate(folio); + folio_ref_add(folio, HPAGE_PMD_NR - 1); + if (is_shmem) - set_page_dirty(hpage); - lru_cache_add(hpage); + folio_mark_dirty(folio); + folio_add_lru(folio); /* * Remove pte page tables, so we can re-fault the page as huge. From 6e1ca48d0669b0f5efcbaa051b23cd8e651a1614 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Tue, 1 Nov 2022 10:53:26 -0700 Subject: [PATCH 3710/4122] folio-compat: remove lru_cache_add() There are no longer any callers of lru_cache_add(), so remove it. This saves 79 bytes of kernel text. Also cleanup some comments such that they reference the new folio_add_lru() instead. Link: https://lkml.kernel.org/r/20221101175326.13265-6-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Miklos Szeredi Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 - mm/folio-compat.c | 6 ------ mm/truncate.c | 2 +- mm/workingset.c | 5 ++++- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index b61e2007d156..0ceed49516ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -381,7 +381,6 @@ void lru_note_cost(struct lruvec *lruvec, bool file, void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); -void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 86933fa8f3e1..69ed25790c68 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -76,12 +76,6 @@ bool redirty_page_for_writepage(struct writeback_control *wbc, } EXPORT_SYMBOL(redirty_page_for_writepage); -void lru_cache_add(struct page *page) -{ - folio_add_lru(page_folio(page)); -} -EXPORT_SYMBOL(lru_cache_add); - void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma) { diff --git a/mm/truncate.c b/mm/truncate.c index c7bfd247a651..7b4ea4c4a46b 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -565,7 +565,7 @@ EXPORT_SYMBOL(invalidate_mapping_pages); * refcount. We do this because invalidate_inode_pages2() needs stronger * invalidation guarantees, and cannot afford to leave pages behind because * shrink_page_list() has a temp ref on them, or because they're transiently - * sitting in the lru_cache_add() pagevecs. + * sitting in the folio_add_lru() pagevecs. */ static int invalidate_complete_folio2(struct address_space *mapping, struct folio *folio) diff --git a/mm/workingset.c b/mm/workingset.c index d2d02978588c..1a86645b7b3c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -492,7 +492,10 @@ void workingset_refault(struct folio *folio, void *shadow) /* Folio was active prior to eviction */ if (workingset) { folio_set_workingset(folio); - /* XXX: Move to lru_cache_add() when it supports new vs putback */ + /* + * XXX: Move to folio_add_lru() when it supports new vs + * putback + */ lru_note_cost_refault(folio); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } From 9fd330582b2fe43c49ebcd02b2480f051f85aad4 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:30 -0800 Subject: [PATCH 3711/4122] mm: add folio dtor and order setter functions Patch series "convert core hugetlb functions to folios", v5. ============== OVERVIEW =========================== Now that many hugetlb helper functions that deal with hugetlb specific flags[1] and hugetlb cgroups[2] are converted to folios, higher level allocation, prep, and freeing functions within hugetlb can also be converted to operate in folios. Patch 1 of this series implements the wrapper functions around setting the compound destructor and compound order for a folio. Besides the user added in patch 1, patch 2 and patch 9 also use these helper functions. Patches 2-10 convert the higher level hugetlb functions to folios. ============== TESTING =========================== LTP: Ran 10 back to back rounds of the LTP hugetlb test suite. Gigantic Huge Pages: Test allocation and freeing via hugeadm commands: hugeadm --pool-pages-min 1GB:10 hugeadm --pool-pages-min 1GB:0 Demote: Demote 1 1GB hugepages to 512 2MB hugepages echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/demote cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # 512 cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages # 0 [1] https://lore.kernel.org/lkml/20220922154207.1575343-1-sidhartha.kumar@oracle.com/ [2] https://lore.kernel.org/linux-mm/20221101223059.460937-1-sidhartha.kumar@oracle.com/ This patch (of 10): Add folio equivalents for set_compound_order() and set_compound_page_dtor(). Also remove extra new-lines introduced by mm/hugetlb: convert move_hugetlb_state() to folios and mm/hugetlb_cgroup: convert hugetlb_cgroup_uncharge_page() to folios. [sidhartha.kumar@oracle.com: clarify folio_set_compound_order() zero support] Link: https://lkml.kernel.org/r/20221207223731.32784-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20221129225039.82257-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20221129225039.82257-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Suggested-by: Mike Kravetz Suggested-by: Muchun Song Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Tarun Sahu Cc: Rasmus Villemoes Cc: Wei Chen Signed-off-by: Andrew Morton --- include/linux/mm.h | 23 +++++++++++++++++++++++ mm/hugetlb.c | 4 +--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 472cb60ace07..7dc376052d40 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -997,6 +997,13 @@ static inline void set_compound_page_dtor(struct page *page, page[1].compound_dtor = compound_dtor; } +static inline void folio_set_compound_dtor(struct folio *folio, + enum compound_dtor_id compound_dtor) +{ + VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio); + folio->_folio_dtor = compound_dtor; +} + void destroy_large_folio(struct folio *folio); static inline int head_compound_pincount(struct page *head) @@ -1012,6 +1019,22 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } +/* + * folio_set_compound_order is generally passed a non-zero order to + * initialize a large folio. However, hugetlb code abuses this by + * passing in zero when 'dissolving' a large folio. + */ +static inline void folio_set_compound_order(struct folio *folio, + unsigned int order) +{ + VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + + folio->_folio_order = order; +#ifdef CONFIG_64BIT + folio->_folio_nr_pages = order ? 1U << order : 0; +#endif +} + /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9d97c9a2a15d..22512f7b0237 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1780,7 +1780,7 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) { hugetlb_vmemmap_optimize(h, &folio->page); INIT_LIST_HEAD(&folio->lru); - folio->_folio_dtor = HUGETLB_PAGE_DTOR; + folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); set_hugetlb_cgroup_rsvd(folio, NULL); @@ -2938,7 +2938,6 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, * a reservation exists for the allocation. */ page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg); - if (!page) { spin_unlock_irq(&hugetlb_lock); page = alloc_buddy_huge_page_with_mpol(h, vma, addr); @@ -7343,7 +7342,6 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re int old_nid = folio_nid(old_folio); int new_nid = folio_nid(new_folio); - folio_set_hugetlb_temporary(old_folio); folio_clear_hugetlb_temporary(new_folio); From 911565b8285381e62d3bfd0cae2889a022737c37 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:31 -0800 Subject: [PATCH 3712/4122] mm/hugetlb: convert destroy_compound_gigantic_page() to folios Convert page operations within __destroy_compound_gigantic_page() to the corresponding folio operations. Link: https://lkml.kernel.org/r/20221129225039.82257-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 22512f7b0237..5960a05cb370 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1325,43 +1325,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) nr_nodes--) /* used to demote non-gigantic_huge pages as well */ -static void __destroy_compound_gigantic_page(struct page *page, +static void __destroy_compound_gigantic_folio(struct folio *folio, unsigned int order, bool demote) { int i; int nr_pages = 1 << order; struct page *p; - atomic_set(compound_mapcount_ptr(page), 0); - atomic_set(subpages_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(folio_mapcount_ptr(folio), 0); + atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(folio_pincount_ptr(folio), 0); for (i = 1; i < nr_pages; i++) { - p = nth_page(page, i); + p = folio_page(folio, i); p->mapping = NULL; clear_compound_head(p); if (!demote) set_page_refcounted(p); } - set_compound_order(page, 0); -#ifdef CONFIG_64BIT - page[1].compound_nr = 0; -#endif - __ClearPageHead(page); + folio_set_compound_order(folio, 0); + __folio_clear_head(folio); } -static void destroy_compound_hugetlb_page_for_demote(struct page *page, +static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio, unsigned int order) { - __destroy_compound_gigantic_page(page, order, true); + __destroy_compound_gigantic_folio(folio, order, true); } #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static void destroy_compound_gigantic_page(struct page *page, +static void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { - __destroy_compound_gigantic_page(page, order, false); + __destroy_compound_gigantic_folio(folio, order, false); } static void free_gigantic_page(struct page *page, unsigned int order) @@ -1430,7 +1427,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, return NULL; } static inline void free_gigantic_page(struct page *page, unsigned int order) { } -static inline void destroy_compound_gigantic_page(struct page *page, +static inline void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { } #endif @@ -1477,8 +1474,8 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, * * For gigantic pages set the destructor to the null dtor. This * destructor will never be called. Before freeing the gigantic - * page destroy_compound_gigantic_page will turn the compound page - * into a simple group of pages. After this the destructor does not + * page destroy_compound_gigantic_folio will turn the folio into a + * simple group of pages. After this the destructor does not * apply. * * This handles the case where more than one ref is held when and @@ -1559,6 +1556,7 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, static void __update_and_free_page(struct hstate *h, struct page *page) { int i; + struct folio *folio = page_folio(page); struct page *subpage; if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) @@ -1587,8 +1585,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * Move PageHWPoison flag from head page to the raw error pages, * which makes any healthy subpages reusable. */ - if (unlikely(PageHWPoison(page))) - hugetlb_clear_page_hwpoison(page); + if (unlikely(folio_test_hwpoison(folio))) + hugetlb_clear_page_hwpoison(&folio->page); for (i = 0; i < pages_per_huge_page(h); i++) { subpage = nth_page(page, i); @@ -1604,7 +1602,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) */ if (hstate_is_gigantic(h) || hugetlb_cma_page(page, huge_page_order(h))) { - destroy_compound_gigantic_page(page, huge_page_order(h)); + destroy_compound_gigantic_folio(folio, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { __free_pages(page, huge_page_order(h)); @@ -3437,6 +3435,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) { int i, nid = page_to_nid(page); struct hstate *target_hstate; + struct folio *folio = page_folio(page); struct page *subpage; int rc = 0; @@ -3455,10 +3454,10 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) } /* - * Use destroy_compound_hugetlb_page_for_demote for all huge page + * Use destroy_compound_hugetlb_folio_for_demote for all huge page * sizes as it will not ref count pages. */ - destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); + destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h)); /* * Taking target hstate mutex synchronizes with set_max_huge_pages. From 1a7cdab59b22465b850501e3897a3f3aa01670d8 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:32 -0800 Subject: [PATCH 3713/4122] mm/hugetlb: convert dissolve_free_huge_page() to folios Removes compound_head() call by using a folio rather than a head page. Link: https://lkml.kernel.org/r/20221129225039.82257-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5960a05cb370..d02293fd2e64 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2128,21 +2128,21 @@ static struct page *remove_pool_huge_page(struct hstate *h, int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; + struct folio *folio = page_folio(page); retry: /* Not to disrupt normal path by vainly holding hugetlb_lock */ - if (!PageHuge(page)) + if (!folio_test_hugetlb(folio)) return 0; spin_lock_irq(&hugetlb_lock); - if (!PageHuge(page)) { + if (!folio_test_hugetlb(folio)) { rc = 0; goto out; } - if (!page_count(page)) { - struct page *head = compound_head(page); - struct hstate *h = page_hstate(head); + if (!folio_ref_count(folio)) { + struct hstate *h = folio_hstate(folio); if (!available_huge_pages(h)) goto out; @@ -2150,7 +2150,7 @@ retry: * We should make sure that the page is already on the free list * when it is dissolved. */ - if (unlikely(!HPageFreed(head))) { + if (unlikely(!folio_test_hugetlb_freed(folio))) { spin_unlock_irq(&hugetlb_lock); cond_resched(); @@ -2165,7 +2165,7 @@ retry: goto retry; } - remove_hugetlb_page(h, head, false); + remove_hugetlb_page(h, &folio->page, false); h->max_huge_pages--; spin_unlock_irq(&hugetlb_lock); @@ -2177,12 +2177,12 @@ retry: * Attempt to allocate vmemmmap here so that we can take * appropriate action on failure. */ - rc = hugetlb_vmemmap_restore(h, head); + rc = hugetlb_vmemmap_restore(h, &folio->page); if (!rc) { - update_and_free_page(h, head, false); + update_and_free_page(h, &folio->page, false); } else { spin_lock_irq(&hugetlb_lock); - add_hugetlb_page(h, head, false); + add_hugetlb_page(h, &folio->page, false); h->max_huge_pages++; spin_unlock_irq(&hugetlb_lock); } From cfd5082b514765f873504cc60a50cce30738bfd3 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:33 -0800 Subject: [PATCH 3714/4122] mm/hugetlb: convert remove_hugetlb_page() to folios Removes page_folio() call by converting callers to directly pass a folio into __remove_hugetlb_page(). Link: https://lkml.kernel.org/r/20221129225039.82257-5-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d02293fd2e64..9b1c9d05ba34 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1432,19 +1432,18 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio, #endif /* - * Remove hugetlb page from lists, and update dtor so that page appears + * Remove hugetlb folio from lists, and update dtor so that the folio appears * as just a compound page. * - * A reference is held on the page, except in the case of demote. + * A reference is held on the folio, except in the case of demote. * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_page(struct hstate *h, struct page *page, +static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus, bool demote) { - int nid = page_to_nid(page); - struct folio *folio = page_folio(page); + int nid = folio_nid(folio); VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio); VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio); @@ -1453,9 +1452,9 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; - list_del(&page->lru); + list_del(&folio->lru); - if (HPageFreed(page)) { + if (folio_test_hugetlb_freed(folio)) { h->free_huge_pages--; h->free_huge_pages_node[nid]--; } @@ -1485,26 +1484,26 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, * be turned into a page of smaller size. */ if (!demote) - set_page_refcounted(page); + folio_ref_unfreeze(folio, 1); if (hstate_is_gigantic(h)) - set_compound_page_dtor(page, NULL_COMPOUND_DTOR); + folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); else - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_page(struct hstate *h, struct page *page, +static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { - __remove_hugetlb_page(h, page, adjust_surplus, false); + __remove_hugetlb_folio(h, folio, adjust_surplus, false); } -static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, +static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, bool adjust_surplus) { - __remove_hugetlb_page(h, page, adjust_surplus, true); + __remove_hugetlb_folio(h, folio, adjust_surplus, true); } static void add_hugetlb_page(struct hstate *h, struct page *page, @@ -1639,8 +1638,9 @@ static void free_hpage_workfn(struct work_struct *work) /* * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate() * is going to trigger because a previous call to - * remove_hugetlb_page() will set_compound_page_dtor(page, - * NULL_COMPOUND_DTOR), so do not use page_hstate() directly. + * remove_hugetlb_folio() will call folio_set_compound_dtor + * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate() + * directly. */ h = size_to_hstate(page_size(page)); @@ -1749,12 +1749,12 @@ void free_huge_page(struct page *page) h->resv_huge_pages++; if (folio_test_hugetlb_temporary(folio)) { - remove_hugetlb_page(h, page, false); + remove_hugetlb_folio(h, folio, false); spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page, true); } else if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ - remove_hugetlb_page(h, page, true); + remove_hugetlb_folio(h, folio, true); spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page, true); } else { @@ -2092,6 +2092,7 @@ static struct page *remove_pool_huge_page(struct hstate *h, { int nr_nodes, node; struct page *page = NULL; + struct folio *folio; lockdep_assert_held(&hugetlb_lock); for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { @@ -2103,7 +2104,8 @@ static struct page *remove_pool_huge_page(struct hstate *h, !list_empty(&h->hugepage_freelists[node])) { page = list_entry(h->hugepage_freelists[node].next, struct page, lru); - remove_hugetlb_page(h, page, acct_surplus); + folio = page_folio(page); + remove_hugetlb_folio(h, folio, acct_surplus); break; } } @@ -2165,7 +2167,7 @@ retry: goto retry; } - remove_hugetlb_page(h, &folio->page, false); + remove_hugetlb_folio(h, folio, false); h->max_huge_pages--; spin_unlock_irq(&hugetlb_lock); @@ -2803,7 +2805,7 @@ retry: * and enqueue_huge_page() for new_page. The counters will remain * stable since this happens under the lock. */ - remove_hugetlb_page(h, old_page, false); + remove_hugetlb_folio(h, old_folio, false); /* * Ref count on new page is already zero as it was dropped @@ -3230,7 +3232,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count, goto out; if (PageHighMem(page)) continue; - remove_hugetlb_page(h, page, false); + remove_hugetlb_folio(h, page_folio(page), false); list_add(&page->lru, &page_list); } } @@ -3441,7 +3443,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - remove_hugetlb_page_for_demote(h, page, false); + remove_hugetlb_folio_for_demote(h, folio, false); spin_unlock_irq(&hugetlb_lock); rc = hugetlb_vmemmap_restore(h, page); From d6ef19e25df2aa50f932a78c368d7bb710eaaa1b Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:34 -0800 Subject: [PATCH 3715/4122] mm/hugetlb: convert update_and_free_page() to folios Make more progress on converting the free_huge_page() destructor to operate on folios by converting update_and_free_page() to folios. Link: https://lkml.kernel.org/r/20221129225039.82257-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar \ Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9b1c9d05ba34..5f3622ce791f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1478,7 +1478,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, * apply. * * This handles the case where more than one ref is held when and - * after update_and_free_page is called. + * after update_and_free_hugetlb_folio is called. * * In the case of demote we do not ref count the page as it will soon * be turned into a page of smaller size. @@ -1609,7 +1609,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) } /* - * As update_and_free_page() can be called under any context, so we cannot + * As update_and_free_hugetlb_folio() can be called under any context, so we cannot * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate * the vmemmap pages. @@ -1657,11 +1657,11 @@ static inline void flush_free_hpage_work(struct hstate *h) flush_work(&free_hpage_work); } -static void update_and_free_page(struct hstate *h, struct page *page, +static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio, bool atomic) { - if (!HPageVmemmapOptimized(page) || !atomic) { - __update_and_free_page(h, page); + if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) { + __update_and_free_page(h, &folio->page); return; } @@ -1672,16 +1672,18 @@ static void update_and_free_page(struct hstate *h, struct page *page, * empty. Otherwise, schedule_work() had been called but the workfn * hasn't retrieved the list yet. */ - if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist)) + if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist)) schedule_work(&free_hpage_work); } static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list) { struct page *page, *t_page; + struct folio *folio; list_for_each_entry_safe(page, t_page, list, lru) { - update_and_free_page(h, page, false); + folio = page_folio(page); + update_and_free_hugetlb_folio(h, folio, false); cond_resched(); } } @@ -1751,12 +1753,12 @@ void free_huge_page(struct page *page) if (folio_test_hugetlb_temporary(folio)) { remove_hugetlb_folio(h, folio, false); spin_unlock_irqrestore(&hugetlb_lock, flags); - update_and_free_page(h, page, true); + update_and_free_hugetlb_folio(h, folio, true); } else if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ remove_hugetlb_folio(h, folio, true); spin_unlock_irqrestore(&hugetlb_lock, flags); - update_and_free_page(h, page, true); + update_and_free_hugetlb_folio(h, folio, true); } else { arch_clear_hugepage_flags(page); enqueue_huge_page(h, page); @@ -2172,8 +2174,8 @@ retry: spin_unlock_irq(&hugetlb_lock); /* - * Normally update_and_free_page will allocate required vmemmmap - * before freeing the page. update_and_free_page will fail to + * Normally update_and_free_hugtlb_folio will allocate required vmemmmap + * before freeing the page. update_and_free_hugtlb_folio will fail to * free the page if it can not allocate required vmemmap. We * need to adjust max_huge_pages if the page is not freed. * Attempt to allocate vmemmmap here so that we can take @@ -2181,7 +2183,7 @@ retry: */ rc = hugetlb_vmemmap_restore(h, &folio->page); if (!rc) { - update_and_free_page(h, &folio->page, false); + update_and_free_hugetlb_folio(h, folio, false); } else { spin_lock_irq(&hugetlb_lock); add_hugetlb_page(h, &folio->page, false); @@ -2818,7 +2820,7 @@ retry: * Pages have been replaced, we can safely free the old one. */ spin_unlock_irq(&hugetlb_lock); - update_and_free_page(h, old_page, false); + update_and_free_hugetlb_folio(h, old_folio, false); } return ret; @@ -2827,7 +2829,7 @@ free_new: spin_unlock_irq(&hugetlb_lock); /* Page has a zero ref count, but needs a ref to be freed */ folio_ref_unfreeze(new_folio, 1); - update_and_free_page(h, new_page, false); + update_and_free_hugetlb_folio(h, new_folio, false); return ret; } From 2f6c57d696abcd2d27d07b8506d5e6bcc060e77a Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:35 -0800 Subject: [PATCH 3716/4122] mm/hugetlb: convert add_hugetlb_page() to folios and add hugetlb_cma_folio() Convert add_hugetlb_page() to take in a folio, also convert hugetlb_cma_page() to take in a folio. Link: https://lkml.kernel.org/r/20221129225039.82257-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5f3622ce791f..d80a83490b9a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE]; #ifdef CONFIG_CMA static struct cma *hugetlb_cma[MAX_NUMNODES]; static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; -static bool hugetlb_cma_page(struct page *page, unsigned int order) +static bool hugetlb_cma_folio(struct folio *folio, unsigned int order) { - return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, + return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page, 1 << order); } #else -static bool hugetlb_cma_page(struct page *page, unsigned int order) +static bool hugetlb_cma_folio(struct folio *folio, unsigned int order) { return false; } @@ -1506,17 +1506,17 @@ static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *foli __remove_hugetlb_folio(h, folio, adjust_surplus, true); } -static void add_hugetlb_page(struct hstate *h, struct page *page, +static void add_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { int zeroed; - int nid = page_to_nid(page); + int nid = folio_nid(folio); - VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page); + VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); lockdep_assert_held(&hugetlb_lock); - INIT_LIST_HEAD(&page->lru); + INIT_LIST_HEAD(&folio->lru); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; @@ -1525,21 +1525,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, h->surplus_huge_pages_node[nid]++; } - set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); - set_page_private(page, 0); + folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR); + folio_change_private(folio, NULL); /* - * We have to set HPageVmemmapOptimized again as above - * set_page_private(page, 0) cleared it. + * We have to set hugetlb_vmemmap_optimized again as above + * folio_change_private(folio, NULL) cleared it. */ - SetHPageVmemmapOptimized(page); + folio_set_hugetlb_vmemmap_optimized(folio); /* - * This page is about to be managed by the hugetlb allocator and + * This folio is about to be managed by the hugetlb allocator and * should have no users. Drop our reference, and check for others * just in case. */ - zeroed = put_page_testzero(page); - if (!zeroed) + zeroed = folio_put_testzero(folio); + if (unlikely(!zeroed)) /* * It is VERY unlikely soneone else has taken a ref on * the page. In this case, we simply return as the @@ -1548,8 +1548,8 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, */ return; - arch_clear_hugepage_flags(page); - enqueue_huge_page(h, page); + arch_clear_hugepage_flags(&folio->page); + enqueue_huge_page(h, &folio->page); } static void __update_and_free_page(struct hstate *h, struct page *page) @@ -1575,7 +1575,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * page and put the page back on the hugetlb free list and treat * as a surplus page. */ - add_hugetlb_page(h, page, true); + add_hugetlb_folio(h, page_folio(page), true); spin_unlock_irq(&hugetlb_lock); return; } @@ -1600,7 +1600,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * need to be given back to CMA in free_gigantic_page. */ if (hstate_is_gigantic(h) || - hugetlb_cma_page(page, huge_page_order(h))) { + hugetlb_cma_folio(folio, huge_page_order(h))) { destroy_compound_gigantic_folio(folio, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { @@ -2186,7 +2186,7 @@ retry: update_and_free_hugetlb_folio(h, folio, false); } else { spin_lock_irq(&hugetlb_lock); - add_hugetlb_page(h, &folio->page, false); + add_hugetlb_folio(h, folio, false); h->max_huge_pages++; spin_unlock_irq(&hugetlb_lock); } @@ -3453,7 +3453,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) /* Allocation of vmemmmap failed, we can not demote page */ spin_lock_irq(&hugetlb_lock); set_page_refcounted(page); - add_hugetlb_page(h, page, false); + add_hugetlb_folio(h, page_folio(page), false); return rc; } From 240d67a86ecb0fa18863821a0cb55783ad50ef30 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:36 -0800 Subject: [PATCH 3717/4122] mm/hugetlb: convert enqueue_huge_page() to folios Convert callers of enqueue_huge_page() to pass in a folio, function is renamed to enqueue_hugetlb_folio(). Link: https://lkml.kernel.org/r/20221129225039.82257-8-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d80a83490b9a..6da673f1d830 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1127,17 +1127,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) return false; } -static void enqueue_huge_page(struct hstate *h, struct page *page) +static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio) { - int nid = page_to_nid(page); + int nid = folio_nid(folio); lockdep_assert_held(&hugetlb_lock); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); - list_move(&page->lru, &h->hugepage_freelists[nid]); + list_move(&folio->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; - SetHPageFreed(page); + folio_set_hugetlb_freed(folio); } static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) @@ -1549,7 +1549,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, return; arch_clear_hugepage_flags(&folio->page); - enqueue_huge_page(h, &folio->page); + enqueue_hugetlb_folio(h, folio); } static void __update_and_free_page(struct hstate *h, struct page *page) @@ -1761,7 +1761,7 @@ void free_huge_page(struct page *page) update_and_free_hugetlb_folio(h, folio, true); } else { arch_clear_hugepage_flags(page); - enqueue_huge_page(h, page); + enqueue_hugetlb_folio(h, folio); spin_unlock_irqrestore(&hugetlb_lock, flags); } } @@ -2438,7 +2438,7 @@ retry: if ((--needed) < 0) break; /* Add the page to the hugetlb allocator */ - enqueue_huge_page(h, page); + enqueue_hugetlb_folio(h, page_folio(page)); } free: spin_unlock_irq(&hugetlb_lock); @@ -2804,8 +2804,8 @@ retry: * Ok, old_page is still a genuine free hugepage. Remove it from * the freelist and decrease the counters. These will be * incremented again when calling __prep_account_new_huge_page() - * and enqueue_huge_page() for new_page. The counters will remain - * stable since this happens under the lock. + * and enqueue_hugetlb_folio() for new_folio. The counters will + * remain stable since this happens under the lock. */ remove_hugetlb_folio(h, old_folio, false); @@ -2814,7 +2814,7 @@ retry: * earlier. It can be directly added to the pool free list. */ __prep_account_new_huge_page(h, nid); - enqueue_huge_page(h, new_page); + enqueue_hugetlb_folio(h, new_folio); /* * Pages have been replaced, we can safely free the old one. From 7f325a8d25631e68cd75afaeaf330187e45e0eb5 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:37 -0800 Subject: [PATCH 3718/4122] mm/hugetlb: convert free_gigantic_page() to folios Convert callers of free_gigantic_page() to use folios, function is then renamed to free_gigantic_folio(). Link: https://lkml.kernel.org/r/20221129225039.82257-9-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6da673f1d830..eb58b0f38222 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1361,18 +1361,20 @@ static void destroy_compound_gigantic_folio(struct folio *folio, __destroy_compound_gigantic_folio(folio, order, false); } -static void free_gigantic_page(struct page *page, unsigned int order) +static void free_gigantic_folio(struct folio *folio, unsigned int order) { /* * If the page isn't allocated using the cma allocator, * cma_release() returns false. */ #ifdef CONFIG_CMA - if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order)) + int nid = folio_nid(folio); + + if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order)) return; #endif - free_contig_range(page_to_pfn(page), 1 << order); + free_contig_range(folio_pfn(folio), 1 << order); } #ifdef CONFIG_CONTIG_ALLOC @@ -1426,7 +1428,8 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, { return NULL; } -static inline void free_gigantic_page(struct page *page, unsigned int order) { } +static inline void free_gigantic_folio(struct folio *folio, + unsigned int order) { } static inline void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { } #endif @@ -1565,7 +1568,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * If we don't know which subpages are hwpoisoned, we can't free * the hugepage, so it's leaked intentionally. */ - if (HPageRawHwpUnreliable(page)) + if (folio_test_hugetlb_raw_hwp_unreliable(folio)) return; if (hugetlb_vmemmap_restore(h, page)) { @@ -1575,7 +1578,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * page and put the page back on the hugetlb free list and treat * as a surplus page. */ - add_hugetlb_folio(h, page_folio(page), true); + add_hugetlb_folio(h, folio, true); spin_unlock_irq(&hugetlb_lock); return; } @@ -1588,7 +1591,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) hugetlb_clear_page_hwpoison(&folio->page); for (i = 0; i < pages_per_huge_page(h); i++) { - subpage = nth_page(page, i); + subpage = folio_page(folio, i); subpage->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_private | @@ -1597,12 +1600,12 @@ static void __update_and_free_page(struct hstate *h, struct page *page) /* * Non-gigantic pages demoted from CMA allocated gigantic pages - * need to be given back to CMA in free_gigantic_page. + * need to be given back to CMA in free_gigantic_folio. */ if (hstate_is_gigantic(h) || hugetlb_cma_folio(folio, huge_page_order(h))) { destroy_compound_gigantic_folio(folio, huge_page_order(h)); - free_gigantic_page(page, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); } else { __free_pages(page, huge_page_order(h)); } @@ -2025,6 +2028,7 @@ static struct page *alloc_fresh_huge_page(struct hstate *h, nodemask_t *node_alloc_noretry) { struct page *page; + struct folio *folio; bool retry = false; retry: @@ -2035,14 +2039,14 @@ retry: nid, nmask, node_alloc_noretry); if (!page) return NULL; - + folio = page_folio(page); if (hstate_is_gigantic(h)) { if (!prep_compound_gigantic_page(page, huge_page_order(h))) { /* * Rare failure to convert pages to compound page. * Free pages and try again - ONCE! */ - free_gigantic_page(page, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); if (!retry) { retry = true; goto retry; @@ -3050,6 +3054,7 @@ static void __init gather_bootmem_prealloc(void) list_for_each_entry(m, &huge_boot_pages, list) { struct page *page = virt_to_page(m); + struct folio *folio = page_folio(page); struct hstate *h = m->hstate; VM_BUG_ON(!hstate_is_gigantic(h)); @@ -3060,7 +3065,7 @@ static void __init gather_bootmem_prealloc(void) free_huge_page(page); /* add to the hugepage allocator */ } else { /* VERY unlikely inflated ref count on a tail page */ - free_gigantic_page(page, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); } /* From d1c6095572d0cf00c0cd30378639ff9387b34edd Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:38 -0800 Subject: [PATCH 3719/4122] mm/hugetlb: convert hugetlb prep functions to folios Convert prep_new_huge_page() and __prep_compound_gigantic_page() to folios. Link: https://lkml.kernel.org/r/20221129225039.82257-10-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: John Hubbard Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Rasmus Villemoes Cc: Tarun Sahu Cc: Wei Chen Signed-off-by: Andrew Morton --- mm/hugetlb.c | 69 +++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eb58b0f38222..903ee75cccd6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1789,29 +1789,27 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) set_hugetlb_cgroup_rsvd(folio, NULL); } -static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) +static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid) { - struct folio *folio = page_folio(page); - __prep_new_hugetlb_folio(h, folio); spin_lock_irq(&hugetlb_lock); __prep_account_new_huge_page(h, nid); spin_unlock_irq(&hugetlb_lock); } -static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, - bool demote) +static bool __prep_compound_gigantic_folio(struct folio *folio, + unsigned int order, bool demote) { int i, j; int nr_pages = 1 << order; struct page *p; - /* we rely on prep_new_huge_page to set the destructor */ - set_compound_order(page, order); - __ClearPageReserved(page); - __SetPageHead(page); + /* we rely on prep_new_hugetlb_folio to set the destructor */ + folio_set_compound_order(folio, order); + __folio_clear_reserved(folio); + __folio_set_head(folio); for (i = 0; i < nr_pages; i++) { - p = nth_page(page, i); + p = folio_page(folio, i); /* * For gigantic hugepages allocated through bootmem at @@ -1853,43 +1851,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, VM_BUG_ON_PAGE(page_count(p), p); } if (i != 0) - set_compound_head(p, page); + set_compound_head(p, &folio->page); } - atomic_set(compound_mapcount_ptr(page), -1); - atomic_set(subpages_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(folio_mapcount_ptr(folio), -1); + atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(folio_pincount_ptr(folio), 0); return true; out_error: /* undo page modifications made above */ for (j = 0; j < i; j++) { - p = nth_page(page, j); + p = folio_page(folio, j); if (j != 0) clear_compound_head(p); set_page_refcounted(p); } /* need to clear PG_reserved on remaining tail pages */ for (; j < nr_pages; j++) { - p = nth_page(page, j); + p = folio_page(folio, j); __ClearPageReserved(p); } - set_compound_order(page, 0); -#ifdef CONFIG_64BIT - page[1].compound_nr = 0; -#endif - __ClearPageHead(page); + folio_set_compound_order(folio, 0); + __folio_clear_head(folio); return false; } -static bool prep_compound_gigantic_page(struct page *page, unsigned int order) -{ - return __prep_compound_gigantic_page(page, order, false); -} - -static bool prep_compound_gigantic_page_for_demote(struct page *page, +static bool prep_compound_gigantic_folio(struct folio *folio, unsigned int order) { - return __prep_compound_gigantic_page(page, order, true); + return __prep_compound_gigantic_folio(folio, order, false); +} + +static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, + unsigned int order) +{ + return __prep_compound_gigantic_folio(folio, order, true); } /* @@ -2041,7 +2037,7 @@ retry: return NULL; folio = page_folio(page); if (hstate_is_gigantic(h)) { - if (!prep_compound_gigantic_page(page, huge_page_order(h))) { + if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) { /* * Rare failure to convert pages to compound page. * Free pages and try again - ONCE! @@ -2054,7 +2050,7 @@ retry: return NULL; } } - prep_new_huge_page(h, page, page_to_nid(page)); + prep_new_hugetlb_folio(h, folio, folio_nid(folio)); return page; } @@ -3058,10 +3054,10 @@ static void __init gather_bootmem_prealloc(void) struct hstate *h = m->hstate; VM_BUG_ON(!hstate_is_gigantic(h)); - WARN_ON(page_count(page) != 1); - if (prep_compound_gigantic_page(page, huge_page_order(h))) { - WARN_ON(PageReserved(page)); - prep_new_huge_page(h, page, page_to_nid(page)); + WARN_ON(folio_ref_count(folio) != 1); + if (prep_compound_gigantic_folio(folio, huge_page_order(h))) { + WARN_ON(folio_test_reserved(folio)); + prep_new_hugetlb_folio(h, folio, folio_nid(folio)); free_huge_page(page); /* add to the hugepage allocator */ } else { /* VERY unlikely inflated ref count on a tail page */ @@ -3480,13 +3476,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) for (i = 0; i < pages_per_huge_page(h); i += pages_per_huge_page(target_hstate)) { subpage = nth_page(page, i); + folio = page_folio(subpage); if (hstate_is_gigantic(target_hstate)) - prep_compound_gigantic_page_for_demote(subpage, + prep_compound_gigantic_folio_for_demote(folio, target_hstate->order); else prep_compound_page(subpage, target_hstate->order); set_page_private(subpage, 0); - prep_new_huge_page(target_hstate, subpage, nid); + prep_new_hugetlb_folio(target_hstate, folio, nid); free_huge_page(subpage); } mutex_unlock(&target_hstate->resize_lock); From 19fc1a7e8b2b3b0e18fbea84ee26517e1b0f1a6e Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 29 Nov 2022 14:50:39 -0800 Subject: [PATCH 3720/4122] mm/hugetlb: change hugetlb allocation functions to return a folio Many hugetlb allocation helper functions have now been converting to folios, update their higher level callers to be compatible with folios. alloc_pool_huge_page is reorganized to avoid a smatch warning reporting the folio variable is uninitialized. [sidhartha.kumar@oracle.com: update alloc_and_dissolve_hugetlb_folio comments] Link: https://lkml.kernel.org/r/20221206233512.146535-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20221129225039.82257-11-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reported-by: Wei Chen Suggested-by: John Hubbard Suggested-by: Rasmus Villemoes Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Muchun Song Cc: Tarun Sahu Signed-off-by: Andrew Morton --- mm/hugetlb.c | 134 ++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 70 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 903ee75cccd6..8c6fe2286814 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1378,23 +1378,23 @@ static void free_gigantic_folio(struct folio *folio, unsigned int order) } #ifdef CONFIG_CONTIG_ALLOC -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { + struct page *page; unsigned long nr_pages = pages_per_huge_page(h); if (nid == NUMA_NO_NODE) nid = numa_mem_id(); #ifdef CONFIG_CMA { - struct page *page; int node; if (hugetlb_cma[nid]) { page = cma_alloc(hugetlb_cma[nid], nr_pages, huge_page_order(h), true); if (page) - return page; + return page_folio(page); } if (!(gfp_mask & __GFP_THISNODE)) { @@ -1405,17 +1405,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, page = cma_alloc(hugetlb_cma[node], nr_pages, huge_page_order(h), true); if (page) - return page; + return page_folio(page); } } } #endif - return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); + page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); + return page ? page_folio(page) : NULL; } #else /* !CONFIG_CONTIG_ALLOC */ -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { return NULL; @@ -1423,7 +1424,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, #endif /* CONFIG_CONTIG_ALLOC */ #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */ -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { return NULL; @@ -1950,7 +1951,7 @@ pgoff_t hugetlb_basepage_index(struct page *page) return (index << compound_order(page_head)) + compound_idx; } -static struct page *alloc_buddy_huge_page(struct hstate *h, +static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { @@ -1988,11 +1989,6 @@ retry: page = NULL; } - if (page) - __count_vm_event(HTLB_BUDDY_PGALLOC); - else - __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); - /* * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this * indicates an overall state change. Clear bit so that we resume @@ -2009,7 +2005,13 @@ retry: if (node_alloc_noretry && !page && alloc_try_hard) node_set(nid, *node_alloc_noretry); - return page; + if (!page) { + __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); + return NULL; + } + + __count_vm_event(HTLB_BUDDY_PGALLOC); + return page_folio(page); } /* @@ -2019,23 +2021,21 @@ retry: * Note that returned page is 'frozen': ref count of head page and all tail * pages is zero. */ -static struct page *alloc_fresh_huge_page(struct hstate *h, +static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { - struct page *page; struct folio *folio; bool retry = false; retry: if (hstate_is_gigantic(h)) - page = alloc_gigantic_page(h, gfp_mask, nid, nmask); + folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); else - page = alloc_buddy_huge_page(h, gfp_mask, + folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry); - if (!page) + if (!folio) return NULL; - folio = page_folio(page); if (hstate_is_gigantic(h)) { if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) { /* @@ -2052,7 +2052,7 @@ retry: } prep_new_hugetlb_folio(h, folio, folio_nid(folio)); - return page; + return folio; } /* @@ -2062,23 +2062,20 @@ retry: static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, nodemask_t *node_alloc_noretry) { - struct page *page; + struct folio *folio; int nr_nodes, node; gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { - page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed, - node_alloc_noretry); - if (page) - break; + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node, + nodes_allowed, node_alloc_noretry); + if (folio) { + free_huge_page(&folio->page); /* free it into the hugepage allocator */ + return 1; + } } - if (!page) - return 0; - - free_huge_page(page); /* free it into the hugepage allocator */ - - return 1; + return 0; } /* @@ -2237,7 +2234,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { - struct page *page = NULL; + struct folio *folio = NULL; if (hstate_is_gigantic(h)) return NULL; @@ -2247,8 +2244,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); - if (!page) + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (!folio) return NULL; spin_lock_irq(&hugetlb_lock); @@ -2260,43 +2257,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, * codeflow */ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { - SetHPageTemporary(page); + folio_set_hugetlb_temporary(folio); spin_unlock_irq(&hugetlb_lock); - free_huge_page(page); + free_huge_page(&folio->page); return NULL; } h->surplus_huge_pages++; - h->surplus_huge_pages_node[page_to_nid(page)]++; + h->surplus_huge_pages_node[folio_nid(folio)]++; out_unlock: spin_unlock_irq(&hugetlb_lock); - return page; + return &folio->page; } static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { - struct page *page; + struct folio *folio; if (hstate_is_gigantic(h)) return NULL; - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); - if (!page) + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (!folio) return NULL; /* fresh huge pages are frozen */ - set_page_refcounted(page); - + folio_ref_unfreeze(folio, 1); /* * We do not account these pages as surplus because they are only * temporary and will be released properly on the last reference */ - SetHPageTemporary(page); + folio_set_hugetlb_temporary(folio); - return page; + return &folio->page; } /* @@ -2745,54 +2741,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, } /* - * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one + * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve + * the old one * @h: struct hstate old page belongs to - * @old_page: Old page to dissolve + * @old_folio: Old folio to dissolve * @list: List to isolate the page in case we need to * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, - struct list_head *list) +static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, + struct folio *old_folio, struct list_head *list) { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - struct folio *old_folio = page_folio(old_page); int nid = folio_nid(old_folio); - struct page *new_page; struct folio *new_folio; int ret = 0; /* - * Before dissolving the page, we need to allocate a new one for the - * pool to remain stable. Here, we allocate the page and 'prep' it + * Before dissolving the folio, we need to allocate a new one for the + * pool to remain stable. Here, we allocate the folio and 'prep' it * by doing everything but actually updating counters and adding to * the pool. This simplifies and let us do most of the processing * under the lock. */ - new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL); - if (!new_page) + new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL); + if (!new_folio) return -ENOMEM; - new_folio = page_folio(new_page); __prep_new_hugetlb_folio(h, new_folio); retry: spin_lock_irq(&hugetlb_lock); if (!folio_test_hugetlb(old_folio)) { /* - * Freed from under us. Drop new_page too. + * Freed from under us. Drop new_folio too. */ goto free_new; } else if (folio_ref_count(old_folio)) { /* - * Someone has grabbed the page, try to isolate it here. + * Someone has grabbed the folio, try to isolate it here. * Fail with -EBUSY if not possible. */ spin_unlock_irq(&hugetlb_lock); - ret = isolate_hugetlb(old_page, list); + ret = isolate_hugetlb(&old_folio->page, list); spin_lock_irq(&hugetlb_lock); goto free_new; } else if (!folio_test_hugetlb_freed(old_folio)) { /* - * Page's refcount is 0 but it has not been enqueued in the + * Folio's refcount is 0 but it has not been enqueued in the * freelist yet. Race window is small, so we can succeed here if * we retry. */ @@ -2801,7 +2795,7 @@ retry: goto retry; } else { /* - * Ok, old_page is still a genuine free hugepage. Remove it from + * Ok, old_folio is still a genuine free hugepage. Remove it from * the freelist and decrease the counters. These will be * incremented again when calling __prep_account_new_huge_page() * and enqueue_hugetlb_folio() for new_folio. The counters will @@ -2810,14 +2804,14 @@ retry: remove_hugetlb_folio(h, old_folio, false); /* - * Ref count on new page is already zero as it was dropped + * Ref count on new_folio is already zero as it was dropped * earlier. It can be directly added to the pool free list. */ __prep_account_new_huge_page(h, nid); enqueue_hugetlb_folio(h, new_folio); /* - * Pages have been replaced, we can safely free the old one. + * Folio has been replaced, we can safely free the old one. */ spin_unlock_irq(&hugetlb_lock); update_and_free_hugetlb_folio(h, old_folio, false); @@ -2827,7 +2821,7 @@ retry: free_new: spin_unlock_irq(&hugetlb_lock); - /* Page has a zero ref count, but needs a ref to be freed */ + /* Folio has a zero ref count, but needs a ref to be freed */ folio_ref_unfreeze(new_folio, 1); update_and_free_hugetlb_folio(h, new_folio, false); @@ -2865,7 +2859,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list)) ret = 0; else if (!folio_ref_count(folio)) - ret = alloc_and_dissolve_huge_page(h, &folio->page, list); + ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); return ret; } @@ -3083,14 +3077,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) if (!alloc_bootmem_huge_page(h, nid)) break; } else { - struct page *page; + struct folio *folio; gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - page = alloc_fresh_huge_page(h, gfp_mask, nid, + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, &node_states[N_MEMORY], NULL); - if (!page) + if (!folio) break; - free_huge_page(page); /* free it into the hugepage allocator */ + free_huge_page(&folio->page); /* free it into the hugepage allocator */ } cond_resched(); } From c8c7016f50c85688d71feea2dba1bd955d5f5358 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 30 Nov 2022 16:02:03 +0100 Subject: [PATCH 3721/4122] kasan: fail non-kasan KUnit tests on KASAN reports After the recent changes done to KUnit-enabled KASAN tests, non-KASAN KUnit tests stopped being failed when KASAN report is detected. Recover that property by failing the currently running non-KASAN KUnit test when KASAN detects and prints a report for a bad memory access. Note that if the bad accesses happened in a kernel thread that doesn't have a reference to the currently running KUnit-test available via current->kunit_test, the test won't be failed. This is a limitation of KUnit, which doesn't yet provide a thread-agnostic way to find the reference to the currenly running test. Link: https://lkml.kernel.org/r/7be29a8ea967cee6b7e48d3d5a242d1d0bd96851.1669820505.git.andreyknvl@google.com Fixes: 49d9977ac909 ("kasan: check CONFIG_KASAN_KUNIT_TEST instead of CONFIG_KUNIT") Fixes: 7ce0ea19d50e ("kasan: switch kunit tests to console tracepoints") Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: David Gow Cc: Dmitry Vyukov Cc: Marco Elver Signed-off-by: Andrew Morton --- mm/kasan/kasan.h | 12 ++++++++++ mm/kasan/kasan_test.c | 4 ++++ mm/kasan/report.c | 53 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index a84491bc4867..ea8cf1310b1e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -541,6 +541,18 @@ static inline bool kasan_arch_is_ready(void) { return true; } #error kasan_arch_is_ready only works in KASAN generic outline mode! #endif +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) + +void kasan_kunit_test_suite_start(void); +void kasan_kunit_test_suite_end(void); + +#else /* CONFIG_KASAN_KUNIT_TEST */ + +static inline void kasan_kunit_test_suite_start(void) { } +static inline void kasan_kunit_test_suite_end(void) { } + +#endif /* CONFIG_KASAN_KUNIT_TEST */ + #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST) bool kasan_save_enable_multi_shot(void); diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index e27591ef2777..9aa892e7b76c 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -76,6 +76,9 @@ static int kasan_suite_init(struct kunit_suite *suite) return -1; } + /* Stop failing KUnit tests on KASAN reports. */ + kasan_kunit_test_suite_start(); + /* * Temporarily enable multi-shot mode. Otherwise, KASAN would only * report the first detected bug and panic the kernel if panic_on_warn @@ -94,6 +97,7 @@ static int kasan_suite_init(struct kunit_suite *suite) static void kasan_suite_exit(struct kunit_suite *suite) { + kasan_kunit_test_suite_end(); kasan_restore_multi_shot(multishot); for_each_kernel_tracepoint(unregister_tracepoints, NULL); tracepoint_synchronize_unregister(); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 31355851a5ec..f2db8605ee0f 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -9,6 +9,7 @@ * Andrey Konovalov */ +#include #include #include #include @@ -112,10 +113,62 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); #endif +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) + +/* + * Whether the KASAN KUnit test suite is currently being executed. + * Updated in kasan_test.c. + */ +bool kasan_kunit_executing; + +void kasan_kunit_test_suite_start(void) +{ + WRITE_ONCE(kasan_kunit_executing, true); +} +EXPORT_SYMBOL_GPL(kasan_kunit_test_suite_start); + +void kasan_kunit_test_suite_end(void) +{ + WRITE_ONCE(kasan_kunit_executing, false); +} +EXPORT_SYMBOL_GPL(kasan_kunit_test_suite_end); + +static bool kasan_kunit_test_suite_executing(void) +{ + return READ_ONCE(kasan_kunit_executing); +} + +#else /* CONFIG_KASAN_KUNIT_TEST */ + +static inline bool kasan_kunit_test_suite_executing(void) { return false; } + +#endif /* CONFIG_KASAN_KUNIT_TEST */ + +#if IS_ENABLED(CONFIG_KUNIT) + +static void fail_non_kasan_kunit_test(void) +{ + struct kunit *test; + + if (kasan_kunit_test_suite_executing()) + return; + + test = current->kunit_test; + if (test) + kunit_set_failure(test); +} + +#else /* CONFIG_KUNIT */ + +static inline void fail_non_kasan_kunit_test(void) { } + +#endif /* CONFIG_KUNIT */ + static DEFINE_SPINLOCK(report_lock); static void start_report(unsigned long *flags, bool sync) { + fail_non_kasan_kunit_test(); /* Respect the /proc/sys/kernel/traceoff_on_warning interface. */ disable_trace_on_warning(); /* Do not allow LOCKDEP mangling KASAN reports. */ From 0b7623bdf89b9f6d320784e929acb78291aaf5f6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 1 Dec 2022 17:08:34 +0000 Subject: [PATCH 3722/4122] selftests/damon: test removed scheme sysfs dir access bug A DAMON sysfs user could start DAMON with a scheme, remove the sysfs directory for the scheme, and then ask stats or schemes tried regions update. The related logic were not aware of the already removed directory situation, so it was able to results in invalid memory accesses. The fix has made with commit 8468b486612c ("mm/damon/sysfs-schemes: skip stats update if the scheme directory is removed"), though. Add a selftest to prevent such kinds of bugs from being introduced again. Link: https://lkml.kernel.org/r/20221201170834.62823-1-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 2 +- .../damon/sysfs_update_removed_scheme_dir.sh | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 838a8e49f77b..b71247ba7196 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -8,7 +8,7 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh -TEST_PROGS += sysfs.sh +TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh new file mode 100644 index 000000000000..ade35576e748 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_sysfs="/sys/kernel/mm/damon/admin" +if [ ! -d "$damon_sysfs" ] +then + echo "damon sysfs not found" + exit $ksft_skip +fi + +# clear log +dmesg -C + +# start DAMON with a scheme +echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts" +echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets" +echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" +scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0" +echo 4096000 > "$scheme_dir/access_pattern/sz/max" +echo 20 > "$scheme_dir/access_pattern/nr_accesses/max" +echo 1024 > "$scheme_dir/access_pattern/age/max" +echo "on" > "$damon_sysfs/kdamonds/0/state" +sleep 0.3 + +# remove scheme sysfs dir +echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" + +# try to update stat of already removed scheme sysfs dir +echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_stats triggers a kernel bug" + dmesg + exit 1 +fi + +# try to update tried regions of already removed scheme sysfs dir +echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_tried_regions triggers a kernel bug" + dmesg + exit 1 +fi + +echo "off" > "$damon_sysfs/kdamonds/0/state" From 169004265860327182ecf92297b25b6271e81e96 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:28:51 +0000 Subject: [PATCH 3723/4122] fsdax: introduce page->share for fsdax in reflink mode Patch series "fsdax,xfs: fix warning messages", v2. Many testcases failed in dax+reflink mode with warning message in dmesg. Such as generic/051,075,127. The warning message is like this: [ 775.509337] ------------[ cut here ]------------ [ 775.509636] WARNING: CPU: 1 PID: 16815 at fs/dax.c:386 dax_insert_entry.cold+0x2e/0x69 [ 775.510151] Modules linked in: auth_rpcgss oid_registry nfsv4 algif_hash af_alg af_packet nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink ip6table_filter ip6_tables iptable_filter ip_tables x_tables dax_pmem nd_pmem nd_btt sch_fq_codel configfs xfs libcrc32c fuse [ 775.524288] CPU: 1 PID: 16815 Comm: fsx Kdump: loaded Tainted: G W 6.1.0-rc4+ #164 eb34e4ee4200c7cbbb47de2b1892c5a3e027fd6d [ 775.524904] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.0-3-3 04/01/2014 [ 775.525460] RIP: 0010:dax_insert_entry.cold+0x2e/0x69 [ 775.525797] Code: c7 c7 18 eb e0 81 48 89 4c 24 20 48 89 54 24 10 e8 73 6d ff ff 48 83 7d 18 00 48 8b 54 24 10 48 8b 4c 24 20 0f 84 e3 e9 b9 ff <0f> 0b e9 dc e9 b9 ff 48 c7 c6 a0 20 c3 81 48 c7 c7 f0 ea e0 81 48 [ 775.526708] RSP: 0000:ffffc90001d57b30 EFLAGS: 00010082 [ 775.527042] RAX: 000000000000002a RBX: 0000000000000000 RCX: 0000000000000042 [ 775.527396] RDX: ffffea000a0f6c80 RSI: ffffffff81dfab1b RDI: 00000000ffffffff [ 775.527819] RBP: ffffea000a0f6c40 R08: 0000000000000000 R09: ffffffff820625e0 [ 775.528241] R10: ffffc90001d579d8 R11: ffffffff820d2628 R12: ffff88815fc98320 [ 775.528598] R13: ffffc90001d57c18 R14: 0000000000000000 R15: 0000000000000001 [ 775.528997] FS: 00007f39fc75d740(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000 [ 775.529474] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 775.529800] CR2: 00007f39fc772040 CR3: 0000000107eb6001 CR4: 00000000003706e0 [ 775.530214] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 775.530592] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 775.531002] Call Trace: [ 775.531230] [ 775.531444] dax_fault_iter+0x267/0x6c0 [ 775.531719] dax_iomap_pte_fault+0x198/0x3d0 [ 775.532002] __xfs_filemap_fault+0x24a/0x2d0 [xfs aa8d25411432b306d9554da38096f4ebb86bdfe7] [ 775.532603] __do_fault+0x30/0x1e0 [ 775.532903] do_fault+0x314/0x6c0 [ 775.533166] __handle_mm_fault+0x646/0x1250 [ 775.533480] handle_mm_fault+0xc1/0x230 [ 775.533810] do_user_addr_fault+0x1ac/0x610 [ 775.534110] exc_page_fault+0x63/0x140 [ 775.534389] asm_exc_page_fault+0x22/0x30 [ 775.534678] RIP: 0033:0x7f39fc55820a [ 775.534950] Code: 00 01 00 00 00 74 99 83 f9 c0 0f 87 7b fe ff ff c5 fe 6f 4e 20 48 29 fe 48 83 c7 3f 49 8d 0c 10 48 83 e7 c0 48 01 fe 48 29 f9 a4 c4 c1 7e 7f 00 c4 c1 7e 7f 48 20 c5 f8 77 c3 0f 1f 44 00 00 [ 775.535839] RSP: 002b:00007ffc66a08118 EFLAGS: 00010202 [ 775.536157] RAX: 00007f39fc772001 RBX: 0000000000042001 RCX: 00000000000063c1 [ 775.536537] RDX: 0000000000006400 RSI: 00007f39fac42050 RDI: 00007f39fc772040 [ 775.536919] RBP: 0000000000006400 R08: 00007f39fc772001 R09: 0000000000042000 [ 775.537304] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000001 [ 775.537694] R13: 00007f39fc772000 R14: 0000000000006401 R15: 0000000000000003 [ 775.538086] [ 775.538333] ---[ end trace 0000000000000000 ]--- This also affects dax+noreflink mode if we run the test after a dax+reflink test. So, the most urgent thing is solving the warning messages. With these fixes, most warning messages in dax_associate_entry() are gone. But honestly, generic/388 will randomly failed with the warning. The case shutdown the xfs when fsstress is running, and do it for many times. I think the reason is that dax pages in use are not able to be invalidated in time when fs is shutdown. The next time dax page to be associated, it still remains the mapping value set last time. I'll keep on solving it. The warning message in dax_writeback_one() can also be fixed because of the dax unshare. This patch (of 8): fsdax page is used not only when CoW, but also mapread. To make the it easily understood, use 'share' to indicate that the dax page is shared by more than one extent. And add helper functions to use it. Also, the flag needs to be renamed to PAGE_MAPPING_DAX_SHARED. [ruansy.fnst@fujitsu.com: rename several functions] Link: https://lkml.kernel.org/r/1669972991-246-1-git-send-email-ruansy.fnst@fujitsu.com [ruansy.fnst@fujitsu.com: v2.2] Link: https://lkml.kernel.org/r/1670381359-53-1-git-send-email-ruansy.fnst@fujitsu.com Link: https://lkml.kernel.org/r/1669908538-55-1-git-send-email-ruansy.fnst@fujitsu.com Link: https://lkml.kernel.org/r/1669908538-55-2-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Allison Henderson Reviewed-by: Darrick J. Wong Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Alistair Popple Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 38 ++++++++++++++++++++++---------------- include/linux/mm_types.h | 5 ++++- include/linux/page-flags.h | 2 +- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 1c6867810cbd..84fadea08705 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -334,35 +334,41 @@ static unsigned long dax_end_pfn(void *entry) for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) -static inline bool dax_mapping_is_cow(struct address_space *mapping) +static inline bool dax_page_is_shared(struct page *page) { - return (unsigned long)mapping == PAGE_MAPPING_DAX_COW; + return page->mapping == PAGE_MAPPING_DAX_SHARED; } /* - * Set the page->mapping with FS_DAX_MAPPING_COW flag, increase the refcount. + * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the + * refcount. */ -static inline void dax_mapping_set_cow(struct page *page) +static inline void dax_page_share_get(struct page *page) { - if ((uintptr_t)page->mapping != PAGE_MAPPING_DAX_COW) { + if (page->mapping != PAGE_MAPPING_DAX_SHARED) { /* * Reset the index if the page was already mapped * regularly before. */ if (page->mapping) - page->index = 1; - page->mapping = (void *)PAGE_MAPPING_DAX_COW; + page->share = 1; + page->mapping = PAGE_MAPPING_DAX_SHARED; } - page->index++; + page->share++; +} + +static inline unsigned long dax_page_share_put(struct page *page) +{ + return --page->share; } /* - * When it is called in dax_insert_entry(), the cow flag will indicate that + * When it is called in dax_insert_entry(), the shared flag will indicate that * whether this entry is shared by multiple files. If so, set the page->mapping - * FS_DAX_MAPPING_COW, and use page->index as refcount. + * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount. */ static void dax_associate_entry(void *entry, struct address_space *mapping, - struct vm_area_struct *vma, unsigned long address, bool cow) + struct vm_area_struct *vma, unsigned long address, bool shared) { unsigned long size = dax_entry_size(entry), pfn, index; int i = 0; @@ -374,8 +380,8 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); - if (cow) { - dax_mapping_set_cow(page); + if (shared) { + dax_page_share_get(page); } else { WARN_ON_ONCE(page->mapping); page->mapping = mapping; @@ -396,9 +402,9 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(trunc && page_ref_count(page) > 1); - if (dax_mapping_is_cow(page->mapping)) { - /* keep the CoW flag if this page is still shared */ - if (page->index-- > 0) + if (dax_page_is_shared(page)) { + /* keep the shared flag if this page is still shared */ + if (dax_page_share_put(page) > 0) continue; } else WARN_ON_ONCE(page->mapping && page->mapping != mapping); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 199f98be6f9c..3b8475007734 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -104,7 +104,10 @@ struct page { }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; - pgoff_t index; /* Our offset within mapping. */ + union { + pgoff_t index; /* Our offset within mapping. */ + unsigned long share; /* share count for fsdax */ + }; /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e42c55a7e012..9aec9fd8c50b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -638,7 +638,7 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * Different with flags above, this flag is used only for fsdax mode. It * indicates that this page->mapping is now under reflink case. */ -#define PAGE_MAPPING_DAX_COW 0x1 +#define PAGE_MAPPING_DAX_SHARED ((void *)0x1) static __always_inline bool folio_mapping_flags(struct folio *folio) { From f80e1668888f34c0764822e74953c997daf2ccdb Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:28:52 +0000 Subject: [PATCH 3724/4122] fsdax: invalidate pages when CoW CoW changes the share state of a dax page, but the share count of the page isn't updated. The next time access this page, it should have been a newly accessed, but old association exists. So, we need to clear the share state when CoW happens, in both dax_iomap_rw() and dax_zero_iter(). Link: https://lkml.kernel.org/r/1669908538-55-3-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 84fadea08705..c975d075e77b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1264,6 +1264,15 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero) if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) return length; + /* + * invalidate the pages whose sharing state is to be changed + * because of CoW. + */ + if (iomap->flags & IOMAP_F_SHARED) + invalidate_inode_pages2_range(iter->inode->i_mapping, + pos >> PAGE_SHIFT, + (pos + length - 1) >> PAGE_SHIFT); + do { unsigned offset = offset_in_page(pos); unsigned size = min_t(u64, PAGE_SIZE - offset, length); @@ -1324,12 +1333,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, struct iov_iter *iter) { const struct iomap *iomap = &iomi->iomap; - const struct iomap *srcmap = &iomi->srcmap; + const struct iomap *srcmap = iomap_iter_srcmap(iomi); loff_t length = iomap_length(iomi); loff_t pos = iomi->pos; struct dax_device *dax_dev = iomap->dax_dev; loff_t end = pos + length, done = 0; bool write = iov_iter_rw(iter) == WRITE; + bool cow = write && iomap->flags & IOMAP_F_SHARED; ssize_t ret = 0; size_t xfer; int id; @@ -1356,7 +1366,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ - if (iomap->flags & IOMAP_F_NEW) { + if (iomap->flags & IOMAP_F_NEW || cow) { invalidate_inode_pages2_range(iomi->inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); @@ -1390,8 +1400,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; } - if (write && - srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { + if (cow) { ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap, kaddr); if (ret) From 708dfad2eb4169324189782edd6d3763237e0489 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:28:53 +0000 Subject: [PATCH 3725/4122] fsdax: zero the edges if source is HOLE or UNWRITTEN If srcmap contains invalid data, such as HOLE and UNWRITTEN, the dest page should be zeroed. Otherwise, since it's a pmem, old data may remains on the dest page, the result of CoW will be incorrect. The function name is also not easy to understand, rename it to "dax_iomap_copy_around()", which means it copies data around the range. [akpm@linux-foundation.org: update dax_iomap_copy_around() kerneldoc, per Darrick] Link: https://lkml.kernel.org/r/1669973145-318-1-git-send-email-ruansy.fnst@fujitsu.com Link: https://lkml.kernel.org/r/1669908538-55-4-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Allison Henderson Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 79 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index c975d075e77b..359b958eb835 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1092,7 +1092,8 @@ out: } /** - * dax_iomap_cow_copy - Copy the data from source to destination before write + * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page + * by copying the data before and after the range to be written. * @pos: address to do copy from. * @length: size of copy operation. * @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE) @@ -1101,35 +1102,50 @@ out: * * This can be called from two places. Either during DAX write fault (page * aligned), to copy the length size data to daddr. Or, while doing normal DAX - * write operation, dax_iomap_actor() might call this to do the copy of either + * write operation, dax_iomap_iter() might call this to do the copy of either * start or end unaligned address. In the latter case the rest of the copy of - * aligned ranges is taken care by dax_iomap_actor() itself. + * aligned ranges is taken care by dax_iomap_iter() itself. + * If the srcmap contains invalid data, such as HOLE and UNWRITTEN, zero the + * area to make sure no old data remains. */ -static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, +static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size, const struct iomap *srcmap, void *daddr) { loff_t head_off = pos & (align_size - 1); size_t size = ALIGN(head_off + length, align_size); loff_t end = pos + length; loff_t pg_end = round_up(end, align_size); + /* copy_all is usually in page fault case */ bool copy_all = head_off == 0 && end == pg_end; + /* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */ + bool zero_edge = srcmap->flags & IOMAP_F_SHARED || + srcmap->type == IOMAP_UNWRITTEN; void *saddr = 0; int ret = 0; - ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); - if (ret) - return ret; + if (!zero_edge) { + ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); + if (ret) + return ret; + } if (copy_all) { - ret = copy_mc_to_kernel(daddr, saddr, length); - return ret ? -EIO : 0; + if (zero_edge) + memset(daddr, 0, size); + else + ret = copy_mc_to_kernel(daddr, saddr, length); + goto out; } /* Copy the head part of the range */ if (head_off) { - ret = copy_mc_to_kernel(daddr, saddr, head_off); - if (ret) - return -EIO; + if (zero_edge) + memset(daddr, 0, head_off); + else { + ret = copy_mc_to_kernel(daddr, saddr, head_off); + if (ret) + return -EIO; + } } /* Copy the tail part of the range */ @@ -1137,12 +1153,19 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, loff_t tail_off = head_off + length; loff_t tail_len = pg_end - end; - ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off, - tail_len); - if (ret) - return -EIO; + if (zero_edge) + memset(daddr + tail_off, 0, tail_len); + else { + ret = copy_mc_to_kernel(daddr + tail_off, + saddr + tail_off, tail_len); + if (ret) + return -EIO; + } } - return 0; +out: + if (zero_edge) + dax_flush(srcmap->dax_dev, daddr, size); + return ret ? -EIO : 0; } /* @@ -1241,13 +1264,10 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) if (ret < 0) return ret; memset(kaddr + offset, 0, size); - if (srcmap->addr != iomap->addr) { - ret = dax_iomap_cow_copy(pos, size, PAGE_SIZE, srcmap, - kaddr); - if (ret < 0) - return ret; - dax_flush(iomap->dax_dev, kaddr, PAGE_SIZE); - } else + if (iomap->flags & IOMAP_F_SHARED) + ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap, + kaddr); + else dax_flush(iomap->dax_dev, kaddr + offset, size); return ret; } @@ -1401,8 +1421,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, } if (cow) { - ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap, - kaddr); + ret = dax_iomap_copy_around(pos, length, PAGE_SIZE, + srcmap, kaddr); if (ret) break; } @@ -1547,7 +1567,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, struct xa_state *xas, void **entry, bool pmd) { const struct iomap *iomap = &iter->iomap; - const struct iomap *srcmap = &iter->srcmap; + const struct iomap *srcmap = iomap_iter_srcmap(iter); size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; bool write = iter->flags & IOMAP_WRITE; @@ -1578,9 +1598,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags); - if (write && - srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { - err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr); + if (write && iomap->flags & IOMAP_F_SHARED) { + err = dax_iomap_copy_around(pos, size, size, srcmap, kaddr); if (err) return dax_fault_return(err); } From c6f0b395b2110aa26a134a9a395875b1ec0a5aae Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:28:54 +0000 Subject: [PATCH 3726/4122] fsdax,xfs: set the shared flag when file extent is shared If a dax page is shared, mapread at different offsets can also trigger page fault on same dax page. So, change the flag from "cow" to "shared". And get the shared flag from filesystem when read. Link: https://lkml.kernel.org/r/1669908538-55-5-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 19 +++++++------------ fs/xfs/xfs_iomap.c | 2 +- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 359b958eb835..fa547ce41add 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -846,12 +846,6 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter, (iter->iomap.flags & IOMAP_F_DIRTY); } -static bool dax_fault_is_cow(const struct iomap_iter *iter) -{ - return (iter->flags & IOMAP_WRITE) && - (iter->iomap.flags & IOMAP_F_SHARED); -} - /* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to @@ -865,13 +859,14 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; void *new_entry = dax_make_entry(pfn, flags); - bool dirty = !dax_fault_is_synchronous(iter, vmf->vma); - bool cow = dax_fault_is_cow(iter); + bool write = iter->flags & IOMAP_WRITE; + bool dirty = write && !dax_fault_is_synchronous(iter, vmf->vma); + bool shared = iter->iomap.flags & IOMAP_F_SHARED; if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { + if (shared || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -883,12 +878,12 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, xas_reset(xas); xas_lock_irq(xas); - if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (shared || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, - cow); + shared); /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -908,7 +903,7 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, if (dirty) xas_set_mark(xas, PAGECACHE_TAG_DIRTY); - if (cow) + if (write && shared) xas_set_mark(xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irq(xas); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 07da03976ec1..881de99766ca 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1215,7 +1215,7 @@ xfs_read_iomap_begin( return error; error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); - if (!error && (flags & IOMAP_REPORT)) + if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) error = xfs_reflink_trim_around_shared(ip, &imap, &shared); xfs_iunlock(ip, lockmode); From 0e79e3736d54bb8efbc9fb29cc3b54a132783565 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:31:41 +0000 Subject: [PATCH 3727/4122] fsdax: dedupe: iter two files at the same time The iomap_iter() on a range of one file may loop more than once. In this case, the inner dst_iter can update its iomap but the outer src_iter can't. This may cause the wrong remapping in filesystem. Let them called at the same time. Link: https://lkml.kernel.org/r/1669908701-93-1-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index fa547ce41add..8fb928cd9dce 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1965,15 +1965,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, .len = len, .flags = IOMAP_DAX, }; - int ret; + int ret, compared = 0; - while ((ret = iomap_iter(&src_iter, ops)) > 0) { - while ((ret = iomap_iter(&dst_iter, ops)) > 0) { - dst_iter.processed = dax_range_compare_iter(&src_iter, - &dst_iter, len, same); - } - if (ret <= 0) - src_iter.processed = ret; + while ((ret = iomap_iter(&src_iter, ops)) > 0 && + (ret = iomap_iter(&dst_iter, ops)) > 0) { + compared = dax_range_compare_iter(&src_iter, &dst_iter, len, + same); + if (compared < 0) + return ret; + src_iter.processed = dst_iter.processed = compared; } return ret; } From 64e6edc185da7e101e867c4732c097fedb1da08e Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:32:10 +0000 Subject: [PATCH 3728/4122] xfs: use dax ops for zero and truncate in fsdax mode Zero and truncate on a dax file may execute CoW. So use dax ops which contains end work for CoW. Link: https://lkml.kernel.org/r/1669908730-131-1-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/xfs/xfs_iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 881de99766ca..d9401d0300ad 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1370,7 +1370,7 @@ xfs_zero_range( if (IS_DAX(inode)) return dax_zero_range(inode, pos, len, did_zero, - &xfs_direct_write_iomap_ops); + &xfs_dax_write_iomap_ops); return iomap_zero_range(inode, pos, len, did_zero, &xfs_buffered_write_iomap_ops); } @@ -1385,7 +1385,7 @@ xfs_truncate_page( if (IS_DAX(inode)) return dax_truncate_page(inode, pos, did_zero, - &xfs_direct_write_iomap_ops); + &xfs_dax_write_iomap_ops); return iomap_truncate_page(inode, pos, did_zero, &xfs_buffered_write_iomap_ops); } From d984648e428bf88cbd94ebe346c73632cb92fffb Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:32:33 +0000 Subject: [PATCH 3729/4122] fsdax,xfs: port unshare to fsdax Implement unshare in fsdax mode: copy data from srcmap to iomap. Link: https://lkml.kernel.org/r/1669908753-169-1-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.c | 8 +++++-- include/linux/dax.h | 2 ++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 8fb928cd9dce..c48a3a93ab29 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1245,6 +1245,58 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #endif /* CONFIG_FS_DAX_PMD */ +static s64 dax_unshare_iter(struct iomap_iter *iter) +{ + struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; + loff_t length = iomap_length(iter); + int id = 0; + s64 ret = 0; + void *daddr = NULL, *saddr = NULL; + + /* don't bother with blocks that are not shared to start with */ + if (!(iomap->flags & IOMAP_F_SHARED)) + return length; + /* don't bother with holes or unwritten extents */ + if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) + return length; + + id = dax_read_lock(); + ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL); + if (ret < 0) + goto out_unlock; + + ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL); + if (ret < 0) + goto out_unlock; + + ret = copy_mc_to_kernel(daddr, saddr, length); + if (ret) + ret = -EIO; + +out_unlock: + dax_read_unlock(id); + return ret; +} + +int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, + const struct iomap_ops *ops) +{ + struct iomap_iter iter = { + .inode = inode, + .pos = pos, + .len = len, + .flags = IOMAP_WRITE | IOMAP_UNSHARE | IOMAP_DAX, + }; + int ret; + + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = dax_unshare_iter(&iter); + return ret; +} +EXPORT_SYMBOL_GPL(dax_file_unshare); + static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) { const struct iomap *iomap = &iter->iomap; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 93bdd25680bc..fe46bce8cae6 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1693,8 +1693,12 @@ xfs_reflink_unshare( inode_dio_wait(inode); - error = iomap_file_unshare(inode, offset, len, - &xfs_buffered_write_iomap_ops); + if (IS_DAX(inode)) + error = dax_file_unshare(inode, offset, len, + &xfs_dax_write_iomap_ops); + else + error = iomap_file_unshare(inode, offset, len, + &xfs_buffered_write_iomap_ops); if (error) goto out; diff --git a/include/linux/dax.h b/include/linux/dax.h index ba985333e26b..2b5ecb591059 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -205,6 +205,8 @@ static inline void dax_unlock_mapping_entry(struct address_space *mapping, } #endif +int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, + const struct iomap_ops *ops); int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, From 480017957d6380d3336a8e80ad90f70415bb86f7 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 1 Dec 2022 15:32:53 +0000 Subject: [PATCH 3730/4122] xfs: remove restrictions for fsdax and reflink Since the basic function for fsdax and reflink has been implemented, remove the restrictions of them for widly test. Link: https://lkml.kernel.org/r/1669908773-207-1-git-send-email-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/xfs/xfs_ioctl.c | 4 ---- fs/xfs/xfs_iops.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f783e979629..13f1b2add390 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags( if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; - /* Don't allow us to set DAX mode for a reflinked file for now. */ - if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) - return -EINVAL; - /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); if (i_flags2 && !xfs_has_v3inodes(mp)) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2e10e1c66ad6..bf0495f7a5e1 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1185,10 +1185,6 @@ xfs_inode_supports_dax( if (!S_ISREG(VFS_I(ip)->i_mode)) return false; - /* Only supported on non-reflinked files. */ - if (xfs_is_reflink_inode(ip)) - return false; - /* Block size must match page size */ if (mp->m_sb.sb_blocksize != PAGE_SIZE) return false; From ac4b2901a112e4dcee1455c96d89ef83cc7aa545 Mon Sep 17 00:00:00 2001 From: Deyan Wang Date: Thu, 1 Dec 2022 21:50:45 +0800 Subject: [PATCH 3731/4122] mm/page_alloc: update comments in __free_pages_ok() Add a comment to explain why we call get_pfnblock_migratetype() twice in __free_pages_ok(). Link: https://lkml.kernel.org/r/20221201135045.31663-1-wonder_rock@126.com Signed-off-by: Deyan Wang Signed-off-by: Andrew Morton --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5ab9dd29ef7e..0745aedebb37 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1702,6 +1702,11 @@ static void __free_pages_ok(struct page *page, unsigned int order, if (!free_pages_prepare(page, order, true, fpi_flags)) return; + /* + * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here + * is used to avoid calling get_pfnblock_migratetype() under the lock. + * This will reduce the lock holding time. + */ migratetype = get_pfnblock_migratetype(page, pfn); spin_lock_irqsave(&zone->lock, flags); From a11774122180a782b327b0a9a5091d99c91a4db7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:38 +0100 Subject: [PATCH 3732/4122] extfat: remove ->writepage Patch series "start removing writepage instances v2". The VM doesn't need or want ->writepage for writeback and is fine with just having ->writepages as long as ->migrate_folio is implemented. This series removes all ->writepage instances that use block_write_full_page directly and also have a plain mpage_writepages based ->writepages. This patch (of 7): ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and remove the ->writepage implementation. Link: https://lkml.kernel.org/r/20221202102644.770505-1-hch@lst.de Link: https://lkml.kernel.org/r/20221202102644.770505-2-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Namjae Jeon Acked-by: Johannes Weiner Cc: Bob Copeland Cc: Dave Kleikamp Cc: Jan Kara Cc: Mikulas Patocka Cc: OGAWA Hirofumi Cc: Sungjong Seo Signed-off-by: Andrew Morton --- fs/exfat/inode.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 5590a1e83126..eac95bcd9a8a 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -345,11 +345,6 @@ static void exfat_readahead(struct readahead_control *rac) mpage_readahead(rac, exfat_get_block); } -static int exfat_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, exfat_get_block, wbc); -} - static int exfat_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -473,12 +468,12 @@ static const struct address_space_operations exfat_aops = { .invalidate_folio = block_invalidate_folio, .read_folio = exfat_read_folio, .readahead = exfat_readahead, - .writepage = exfat_writepage, .writepages = exfat_writepages, .write_begin = exfat_write_begin, .write_end = exfat_write_end, .direct_IO = exfat_direct_IO, - .bmap = exfat_aop_bmap + .bmap = exfat_aop_bmap, + .migrate_folio = buffer_migrate_folio, }; static inline unsigned long exfat_hash(loff_t i_pos) From ee649af0d9a60ea61a5dad99ef5d6b4aa346f0a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:39 +0100 Subject: [PATCH 3733/4122] fat: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and remove the ->writepage implementation. Link: https://lkml.kernel.org/r/20221202102644.770505-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/fat/inode.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 1cbcc4608dc7..d99b8549ec8f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -194,11 +194,6 @@ static int fat_get_block(struct inode *inode, sector_t iblock, return 0; } -static int fat_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, fat_get_block, wbc); -} - static int fat_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -346,12 +341,12 @@ static const struct address_space_operations fat_aops = { .invalidate_folio = block_invalidate_folio, .read_folio = fat_read_folio, .readahead = fat_readahead, - .writepage = fat_writepage, .writepages = fat_writepages, .write_begin = fat_write_begin, .write_end = fat_write_end, .direct_IO = fat_direct_IO, - .bmap = _fat_bmap + .bmap = _fat_bmap, + .migrate_folio = buffer_migrate_folio, }; /* From ba195d9f14829690b8e4f67549960d83169a314e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:40 +0100 Subject: [PATCH 3734/4122] hfs: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and stop wiring up ->writepage for hfs_aops. Link: https://lkml.kernel.org/r/20221202102644.770505-4-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/hfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index c4526f16355d..16466a5e88b4 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -173,12 +173,12 @@ const struct address_space_operations hfs_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfs_read_folio, - .writepage = hfs_writepage, .write_begin = hfs_write_begin, .write_end = generic_write_end, .bmap = hfs_bmap, .direct_IO = hfs_direct_IO, .writepages = hfs_writepages, + .migrate_folio = buffer_migrate_folio, }; /* From 12f9b9a73dc603e658bf24eed2777cecdaf4103e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:41 +0100 Subject: [PATCH 3735/4122] hfsplus: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and stop wiring up ->writepage for hfsplus_aops. Link: https://lkml.kernel.org/r/20221202102644.770505-5-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/hfsplus/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index aeab83ed1c9c..d6572ad2407a 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -170,12 +170,12 @@ const struct address_space_operations hfsplus_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfsplus_read_folio, - .writepage = hfsplus_writepage, .write_begin = hfsplus_write_begin, .write_end = generic_write_end, .bmap = hfsplus_bmap, .direct_IO = hfsplus_direct_IO, .writepages = hfsplus_writepages, + .migrate_folio = buffer_migrate_folio, }; const struct dentry_operations hfsplus_dentry_operations = { From cd2e6024260de27a523e0af6ee47a20a6b8b8aa8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:42 +0100 Subject: [PATCH 3736/4122] hpfs: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and remove the ->writepage implementation. Link: https://lkml.kernel.org/r/20221202102644.770505-6-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/hpfs/file.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index f7547a62c81f..88952d4a631e 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -163,11 +163,6 @@ static int hpfs_read_folio(struct file *file, struct folio *folio) return mpage_read_folio(folio, hpfs_get_block); } -static int hpfs_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, hpfs_get_block, wbc); -} - static void hpfs_readahead(struct readahead_control *rac) { mpage_readahead(rac, hpfs_get_block); @@ -248,12 +243,12 @@ const struct address_space_operations hpfs_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hpfs_read_folio, - .writepage = hpfs_writepage, .readahead = hpfs_readahead, .writepages = hpfs_writepages, .write_begin = hpfs_write_begin, .write_end = hpfs_write_end, - .bmap = _hpfs_bmap + .bmap = _hpfs_bmap, + .migrate_folio = buffer_migrate_folio, }; const struct file_operations hpfs_file_ops = From 2274c3b281bb47e6980ae42fb8dc93b7a38192d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:43 +0100 Subject: [PATCH 3737/4122] jfs: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and remove the ->writepage implementation. Link: https://lkml.kernel.org/r/20221202102644.770505-7-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Dave Kleikamp Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/jfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index d1ec920aa030..8ac10e396050 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -264,11 +264,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock, return rc; } -static int jfs_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, jfs_get_block, wbc); -} - static int jfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -355,12 +350,12 @@ const struct address_space_operations jfs_aops = { .invalidate_folio = block_invalidate_folio, .read_folio = jfs_read_folio, .readahead = jfs_readahead, - .writepage = jfs_writepage, .writepages = jfs_writepages, .write_begin = jfs_write_begin, .write_end = jfs_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, + .migrate_folio = buffer_migrate_folio, }; /* From 1bda9dad5aa0199c8592bac32b91afbf8ea236ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Dec 2022 11:26:44 +0100 Subject: [PATCH 3738/4122] omfs: remove ->writepage ->writepage is a very inefficient method to write back data, and only used through write_cache_pages or a a fallback when no ->migrate_folio method is present. Set ->migrate_folio to the generic buffer_head based helper, and remove the ->writepage implementation. Link: https://lkml.kernel.org/r/20221202102644.770505-8-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Bob Copeland Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- fs/omfs/file.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/omfs/file.c b/fs/omfs/file.c index fa7fe2393ff6..3a5b4b88a583 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -294,11 +294,6 @@ static void omfs_readahead(struct readahead_control *rac) mpage_readahead(rac, omfs_get_block); } -static int omfs_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, omfs_get_block, wbc); -} - static int omfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -375,10 +370,10 @@ const struct address_space_operations omfs_aops = { .invalidate_folio = block_invalidate_folio, .read_folio = omfs_read_folio, .readahead = omfs_readahead, - .writepage = omfs_writepage, .writepages = omfs_writepages, .write_begin = omfs_write_begin, .write_end = generic_write_end, .bmap = omfs_bmap, + .migrate_folio = buffer_migrate_folio, }; From 675eaca1f441acd4f0d403d71036b100cd49036a Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 2 Dec 2022 15:53:39 +1100 Subject: [PATCH 3739/4122] mm/mmap: properly unaccount memory on mas_preallocate() failure security_vm_enough_memory_mm() accounts memory via a call to vm_acct_memory(). Therefore any subsequent failures should unaccount for this memory prior to returning the error. Link: https://lkml.kernel.org/r/20221202045339.2999017-1-apopple@nvidia.com Fixes: 28c5609fb236 ("mm/mmap: preallocate maple nodes for brk vma expansion") Signed-off-by: Alistair Popple Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 1741273ac34c..7d24fc478ffa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2953,7 +2953,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); if (mas_preallocate(mas, vma, GFP_KERNEL)) - return -ENOMEM; + goto unacct_fail; vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); if (vma->anon_vma) { @@ -2975,7 +2975,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, /* create a vma struct for an anonymous mapping */ vma = vm_area_alloc(mm); if (!vma) - goto vma_alloc_fail; + goto unacct_fail; vma_set_anonymous(vma); vma->vm_start = addr; @@ -3000,7 +3000,7 @@ out: mas_store_fail: vm_area_free(vma); -vma_alloc_fail: +unacct_fail: vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } From adb8213014b25c7f1d75d5b219becaadcd695efb Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 2 Dec 2022 03:15:10 +0000 Subject: [PATCH 3740/4122] mm: memcg: fix stale protection of reclaim target memcg Patch series "mm: memcg: fix protection of reclaim target memcg", v3. This series fixes a bug in calculating the protection of the reclaim target memcg where we end up using stale effective protection values from the last reclaim operation, instead of completely ignoring the protection of the reclaim target as intended. More detailed explanation and examples in patch 1, which includes the fix. Patches 2 & 3 introduce a selftest case that catches the bug. This patch (of 3): When we are doing memcg reclaim, the intended behavior is that we ignore any protection (memory.min, memory.low) of the target memcg (but not its children). Ever since the patch pointed to by the "Fixes" tag, we actually read a stale value for the target memcg protection when deciding whether to skip the memcg or not because it is protected. If the stale value happens to be high enough, we don't reclaim from the target memcg. Essentially, in some cases we may falsely skip reclaiming from the target memcg of reclaim because we read a stale protection value from last time we reclaimed from it. During reclaim, mem_cgroup_calculate_protection() is used to determine the effective protection (emin and elow) values of a memcg. The protection of the reclaim target is ignored, but we cannot set their effective protection to 0 due to a limitation of the current implementation (see comment in mem_cgroup_protection()). Instead, we leave their effective protection values unchaged, and later ignore it in mem_cgroup_protection(). However, mem_cgroup_protection() is called later in shrink_lruvec()->get_scan_count(), which is after the mem_cgroup_below_{min/low}() checks in shrink_node_memcgs(). As a result, the stale effective protection values of the target memcg may lead us to skip reclaiming from the target memcg entirely, before calling shrink_lruvec(). This can be even worse with recursive protection, where the stale target memcg protection can be higher than its standalone protection. See two examples below (a similar version of example (a) is added to test_memcontrol in a later patch). (a) A simple example with proactive reclaim is as follows. Consider the following hierarchy: ROOT | A | B (memory.min = 10M) Consider the following scenario: - B has memory.current = 10M. - The system undergoes global reclaim (or memcg reclaim in A). - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() calculates the effective min (emin) of B as 10M. - mem_cgroup_below_min() returns true for B, we do not reclaim from B. - Now if we want to reclaim 5M from B using proactive reclaim (memory.reclaim), we should be able to, as the protection of the target memcg should be ignored. - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() immediately returns for B without doing anything, as B is the target memcg, relying on mem_cgroup_protection() to ignore B's stale effective min (still 10M). - mem_cgroup_below_min() reads the stale effective min for B and we skip it instead of ignoring its protection as intended, as we never reach mem_cgroup_protection(). (b) An more complex example with recursive protection is as follows. Consider the following hierarchy with memory_recursiveprot: ROOT | A (memory.min = 50M) | B (memory.min = 10M, memory.high = 40M) Consider the following scenario: - B has memory.current = 35M. - The system undergoes global reclaim (target memcg is NULL). - B will have an effective min of 50M (all of A's unclaimed protection). - B will not be reclaimed from. - Now allocate 10M more memory in B, pushing it above it's high limit. - The system undergoes memcg reclaim from B (target memcg is B). - Like example (a), we do nothing in mem_cgroup_calculate_protection(), then call mem_cgroup_below_min(), which will read the stale effective min for B (50M) and skip it. In this case, it's even worse because we are not just considering B's standalone protection (10M), but we are reading a much higher stale protection (50M) which will cause us to not reclaim from B at all. This is an artifact of commit 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") which made mem_cgroup_calculate_protection() only change the state without returning any value. Before that commit, we used to return MEMCG_PROT_NONE for the target memcg, which would cause us to skip the mem_cgroup_below_{min/low}() checks. After that commit we do not return anything and we end up checking the min & low effective protections for the target memcg, which are stale. Update mem_cgroup_supports_protection() to also check if we are reclaiming from the target, and rename it to mem_cgroup_unprotected() (now returns true if we should not protect the memcg, much simpler logic). Link: https://lkml.kernel.org/r/20221202031512.1365483-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20221202031512.1365483-2-yosryahmed@google.com Fixes: 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") Signed-off-by: Yosry Ahmed Reviewed-by: Roman Gushchin Cc: Chris Down Cc: David Rientjes Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: Tejun Heo Cc: Vasily Averin Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 31 +++++++++++++++++++++---------- mm/vmscan.c | 11 ++++++----- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e1644a24009c..d3c8203cab6c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -615,28 +615,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root, struct mem_cgroup *memcg); -static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) { /* * The root memcg doesn't account charges, and doesn't support - * protection. + * protection. The target memcg's protection is ignored, see + * mem_cgroup_calculate_protection() and mem_cgroup_protection() */ - return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); - + return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) || + memcg == target; } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.elow) >= page_counter_read(&memcg->memory); } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.emin) >= @@ -1209,12 +1213,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, { } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) +{ + return true; +} +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 9356a3ee639c..dcd476a66a59 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4513,7 +4513,7 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned mem_cgroup_calculate_protection(NULL, memcg); - if (mem_cgroup_below_min(memcg)) + if (mem_cgroup_below_min(NULL, memcg)) return false; need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan); @@ -5100,8 +5100,9 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control * DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); - if (mem_cgroup_below_min(memcg) || - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || + (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) && + !sc->memcg_low_reclaim)) return 0; *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan); @@ -6096,13 +6097,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) mem_cgroup_calculate_protection(target_memcg, memcg); - if (mem_cgroup_below_min(memcg)) { + if (mem_cgroup_below_min(target_memcg, memcg)) { /* * Hard protection. * If there is no reclaimable memory, OOM. */ continue; - } else if (mem_cgroup_below_low(memcg)) { + } else if (mem_cgroup_below_low(target_memcg, memcg)) { /* * Soft protection. * Respect the protection only as long as From e5d64edac64531375716fabe35c9e0a502ca2894 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 2 Dec 2022 03:15:11 +0000 Subject: [PATCH 3741/4122] selftests: cgroup: refactor proactive reclaim code to reclaim_until() Refactor the code that drives writing to memory.reclaim (retrying, error handling, etc) from test_memcg_reclaim() to a helper called reclaim_until(), which proactively reclaims from a memcg until its usage reaches a certain value. While we are at it, refactor and simplify the reclaim loop. This will be used in a following patch in another test. Link: https://lkml.kernel.org/r/20221202031512.1365483-3-yosryahmed@google.com Signed-off-by: Yosry Ahmed Suggested-by: Roman Gushchin Reviewed-by: Roman Gushchin Cc: Chris Down Cc: David Rientjes Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: Tejun Heo Cc: Vasily Averin Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- .../selftests/cgroup/test_memcontrol.c | 80 ++++++++++--------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 8833359556f3..a8f4700353a4 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -645,6 +645,48 @@ cleanup: return ret; } +/* + * Reclaim from @memcg until usage reaches @goal by writing to + * memory.reclaim. + * + * This function will return false if the usage is already below the + * goal. + * + * This function assumes that writing to memory.reclaim is the only + * source of change in memory.current (no concurrent allocations or + * reclaim). + * + * This function makes sure memory.reclaim is sane. It will return + * false if memory.reclaim's error codes do not make sense, even if + * the usage goal was satisfied. + */ +static bool reclaim_until(const char *memcg, long goal) +{ + char buf[64]; + int retries, err; + long current, to_reclaim; + bool reclaimed = false; + + for (retries = 5; retries > 0; retries--) { + current = cg_read_long(memcg, "memory.current"); + + if (current < goal || values_close(current, goal, 3)) + break; + /* Did memory.reclaim return 0 incorrectly? */ + else if (reclaimed) + return false; + + to_reclaim = current - goal; + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) + reclaimed = true; + else if (err != -EAGAIN) + return false; + } + return reclaimed; +} + /* * This test checks that memory.reclaim reclaims the given * amount of memory (from both anon and file, if possible). @@ -653,8 +695,7 @@ static int test_memcg_reclaim(const char *root) { int ret = KSFT_FAIL, fd, retries; char *memcg; - long current, expected_usage, to_reclaim; - char buf[64]; + long current, expected_usage; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -705,41 +746,8 @@ static int test_memcg_reclaim(const char *root) * Reclaim until current reaches 30M, this makes sure we hit both anon * and file if swap is enabled. */ - retries = 5; - while (true) { - int err; - - current = cg_read_long(memcg, "memory.current"); - to_reclaim = current - MB(30); - - /* - * We only keep looping if we get EAGAIN, which means we could - * not reclaim the full amount. - */ - if (to_reclaim <= 0) - goto cleanup; - - - snprintf(buf, sizeof(buf), "%ld", to_reclaim); - err = cg_write(memcg, "memory.reclaim", buf); - if (!err) { - /* - * If writing succeeds, then the written amount should have been - * fully reclaimed (and maybe more). - */ - current = cg_read_long(memcg, "memory.current"); - if (!values_close(current, MB(30), 3) && current > MB(30)) - goto cleanup; - break; - } - - /* The kernel could not reclaim the full amount, try again. */ - if (err == -EAGAIN && retries--) - continue; - - /* We got an unexpected error or ran out of retries. */ + if (!reclaim_until(memcg, MB(30))) goto cleanup; - } ret = KSFT_PASS; cleanup: From 1c74697776e17619e485a40cf8cfdb4bf18fd18e Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 2 Dec 2022 03:15:12 +0000 Subject: [PATCH 3742/4122] selftests: cgroup: make sure reclaim target memcg is unprotected Make sure that we ignore protection of a memcg that is the target of memcg reclaim. Link: https://lkml.kernel.org/r/20221202031512.1365483-4-yosryahmed@google.com Signed-off-by: Yosry Ahmed Reviewed-by: Roman Gushchin Cc: Chris Down Cc: David Rientjes Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: Tejun Heo Cc: Vasily Averin Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index a8f4700353a4..1e616a8c6a9c 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -238,6 +238,8 @@ static int cg_test_proc_killed(const char *cgroup) return -1; } +static bool reclaim_until(const char *memcg, long goal); + /* * First, this test creates the following hierarchy: * A memory.min = 0, memory.max = 200M @@ -266,6 +268,12 @@ static int cg_test_proc_killed(const char *cgroup) * unprotected memory in A available, and checks that: * a) memory.min protects pagecache even in this case, * b) memory.low allows reclaiming page cache with low events. + * + * Then we try to reclaim from A/B/C using memory.reclaim until its + * usage reaches 10M. + * This makes sure that: + * (a) We ignore the protection of the reclaim target memcg. + * (b) The previously calculated emin value (~29M) should be dismissed. */ static int test_memcg_protection(const char *root, bool min) { @@ -385,6 +393,9 @@ static int test_memcg_protection(const char *root, bool min) if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; + if (!reclaim_until(children[0], MB(10))) + goto cleanup; + if (min) { ret = KSFT_PASS; goto cleanup; From 6b426d071419a40f61fe41fe1bd9e1b4fa5aeb37 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 1 Dec 2022 15:33:17 -0800 Subject: [PATCH 3743/4122] mm: disable top-tier fallback to reclaim on proactive reclaim Reclaiming directly from top tier nodes breaks the aging pipeline of memory tiers. If we have a RAM -> CXL -> storage hierarchy, we should demote from RAM to CXL and from CXL to storage. If we reclaim a page from RAM, it means we 'demote' it directly from RAM to storage, bypassing potentially a huge amount of pages colder than it in CXL. However disabling reclaim from top tier nodes entirely would cause ooms in edge scenarios where lower tier memory is unreclaimable for whatever reason, e.g. memory being mlocked() or too hot to reclaim. In these cases we would rather the job run with a performance regression rather than it oom altogether. However, we can disable reclaim from top tier nodes for proactive reclaim. That reclaim is not real memory pressure, and we don't have any cause to be breaking the aging pipeline. [akpm@linux-foundation.org: restore comment layout, per Ying Huang] Link: https://lkml.kernel.org/r/20221201233317.1394958-1-almasrymina@google.com Signed-off-by: Mina Almasry Reviewed-by: "Huang, Ying" Reviewed-by: Yang Shi Cc: Greg Thelen Cc: Shakeel Butt Cc: Tim Chen Cc: Wei Xu Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/vmscan.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index dcd476a66a59..1a59171c6695 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2088,10 +2088,29 @@ keep: nr_reclaimed += demote_folio_list(&demote_folios, pgdat); /* Folios that could not be demoted are still in @demote_folios */ if (!list_empty(&demote_folios)) { - /* Folios which weren't demoted go back on @folio_list for retry: */ + /* Folios which weren't demoted go back on @folio_list */ list_splice_init(&demote_folios, folio_list); - do_demote_pass = false; - goto retry; + + /* + * goto retry to reclaim the undemoted folios in folio_list if + * desired. + * + * Reclaiming directly from top tier nodes is not often desired + * due to it breaking the LRU ordering: in general memory + * should be reclaimed from lower tier nodes and demoted from + * top tier nodes. + * + * However, disabling reclaim from top tier nodes entirely + * would cause ooms in edge scenarios where lower tier memory + * is unreclaimable for whatever reason, eg memory being + * mlocked or too hot to reclaim. We can disable reclaim + * from top tier nodes in proactive reclaim though as that is + * not real memory pressure. + */ + if (!sc->proactive) { + do_demote_pass = false; + goto retry; + } } pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; From 12a5d3955227b0d7e04fb793ccceeb2a1dd275c5 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 2 Dec 2022 14:35:31 -0800 Subject: [PATCH 3744/4122] mm: add nodes= arg to memory.reclaim The nodes= arg instructs the kernel to only scan the given nodes for proactive reclaim. For example use cases, consider a 2 tier memory system: nodes 0,1 -> top tier nodes 2,3 -> second tier $ echo "1m nodes=0" > memory.reclaim This instructs the kernel to attempt to reclaim 1m memory from node 0. Since node 0 is a top tier node, demotion will be attempted first. This is useful to direct proactive reclaim to specific nodes that are under pressure. $ echo "1m nodes=2,3" > memory.reclaim This instructs the kernel to attempt to reclaim 1m memory in the second tier, since this tier of memory has no demotion targets the memory will be reclaimed. $ echo "1m nodes=0,1" > memory.reclaim Instructs the kernel to reclaim memory from the top tier nodes, which can be desirable according to the userspace policy if there is pressure on the top tiers. Since these nodes have demotion targets, the kernel will attempt demotion first. Since commit 3f1509c57b1b ("Revert "mm/vmscan: never demote for memcg reclaim""), the proactive reclaim interface memory.reclaim does both reclaim and demotion. Reclaim and demotion incur different latency costs to the jobs in the cgroup. Demoted memory would still be addressable by the userspace at a higher latency, but reclaimed memory would need to incur a pagefault. The 'nodes' arg is useful to allow the userspace to control demotion and reclaim independently according to its policy: if the memory.reclaim is called on a node with demotion targets, it will attempt demotion first; if it is called on a node without demotion targets, it will only attempt reclaim. Link: https://lkml.kernel.org/r/20221202223533.1785418-1-almasrymina@google.com Signed-off-by: Mina Almasry Acked-by: Michal Hocko Acked-by: Shakeel Butt Acked-by: Muchun Song Cc: Bagas Sanjaya Cc: "Huang, Ying" Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Roman Gushchin Cc: Tejun Heo Cc: Wei Xu Cc: Yang Shi Cc: Yosry Ahmed Cc: zefan li Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 15 +++--- include/linux/swap.h | 3 +- mm/memcontrol.c | 67 ++++++++++++++++++++----- mm/vmscan.c | 4 +- 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 74cec76be9f2..c8ae7c897f14 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1245,17 +1245,13 @@ PAGE_SIZE multiple when read back. This is a simple interface to trigger memory reclaim in the target cgroup. - This file accepts a single key, the number of bytes to reclaim. - No nested keys are currently supported. + This file accepts a string which contains the number of bytes to + reclaim. Example:: echo "1G" > memory.reclaim - The interface can be later extended with nested keys to - configure the reclaim behavior. For example, specify the - type of memory to reclaim from (anon, file, ..). - Please note that the kernel can over or under reclaim from the target cgroup. If less bytes are reclaimed than the specified amount, -EAGAIN is returned. @@ -1267,6 +1263,13 @@ PAGE_SIZE multiple when read back. This means that the networking layer will not adapt based on reclaim induced by memory.reclaim. + This file also allows the user to specify the nodes to reclaim from, + via the 'nodes=' key, for example:: + + echo "1G nodes=0,1" > memory.reclaim + + The above instructs the kernel to reclaim memory from nodes 0,1. + memory.peak A read-only single value file which exists on non-root cgroups. diff --git a/include/linux/swap.h b/include/linux/swap.h index 0ceed49516ad..2787b84eaf12 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options); + unsigned int reclaim_options, + nodemask_t *nodemask); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2c7a91689fef..ff65bc23be13 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "internal.h" #include #include @@ -2392,7 +2393,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, psi_memstall_enter(&pflags); nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, - MEMCG_RECLAIM_MAY_SWAP); + MEMCG_RECLAIM_MAY_SWAP, + NULL); psi_memstall_leave(&pflags); } while ((memcg = parent_mem_cgroup(memcg)) && !mem_cgroup_is_root(memcg)); @@ -2683,7 +2685,8 @@ retry: psi_memstall_enter(&pflags); nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, - gfp_mask, reclaim_options); + gfp_mask, reclaim_options, + NULL); psi_memstall_leave(&pflags); if (mem_cgroup_margin(mem_over_limit) >= nr_pages) @@ -3503,7 +3506,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg, } if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, - memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) { + memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, + NULL)) { ret = -EBUSY; break; } @@ -3614,7 +3618,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) return -EINTR; if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, - MEMCG_RECLAIM_MAY_SWAP)) + MEMCG_RECLAIM_MAY_SWAP, + NULL)) nr_retries--; } @@ -6418,7 +6423,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, } reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, + NULL); if (!reclaimed && !nr_retries--) break; @@ -6467,7 +6473,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (nr_reclaims) { if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP)) + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, + NULL)) nr_reclaims--; continue; } @@ -6590,21 +6597,54 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +enum { + MEMORY_RECLAIM_NODES = 0, + MEMORY_RECLAIM_NULL, +}; + +static const match_table_t if_tokens = { + { MEMORY_RECLAIM_NODES, "nodes=%s" }, + { MEMORY_RECLAIM_NULL, NULL }, +}; + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned int nr_retries = MAX_RECLAIM_RETRIES; unsigned long nr_to_reclaim, nr_reclaimed = 0; - unsigned int reclaim_options; - int err; + unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP | + MEMCG_RECLAIM_PROACTIVE; + char *old_buf, *start; + substring_t args[MAX_OPT_ARGS]; + int token; + char value[256]; + nodemask_t nodemask = NODE_MASK_ALL; buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); - if (err) - return err; - reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + old_buf = buf; + nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE; + if (buf == old_buf) + return -EINVAL; + + buf = strstrip(buf); + + while ((start = strsep(&buf, " ")) != NULL) { + if (!strlen(start)) + continue; + token = match_token(start, if_tokens, args); + match_strlcpy(value, args, sizeof(value)); + switch (token) { + case MEMORY_RECLAIM_NODES: + if (nodelist_parse(value, nodemask) < 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + while (nr_reclaimed < nr_to_reclaim) { unsigned long reclaimed; @@ -6621,7 +6661,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_to_reclaim - nr_reclaimed, - GFP_KERNEL, reclaim_options); + GFP_KERNEL, reclaim_options, + &nodemask); if (!reclaimed && !nr_retries--) return -EAGAIN; diff --git a/mm/vmscan.c b/mm/vmscan.c index 1a59171c6695..2b42ac9ad755 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6758,7 +6758,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options) + unsigned int reclaim_options, + nodemask_t *nodemask) { unsigned long nr_reclaimed; unsigned int noreclaim_flag; @@ -6773,6 +6774,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), + .nodemask = nodemask, }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put From c449deb2b99ff2458214ed4a3526277bc9e40757 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 4 Dec 2022 17:01:03 -0800 Subject: [PATCH 3745/4122] mm: memcg: fix swapcached stat accounting I'd been worried by high "swapcached" counts in memcg OOM reports, thought we had a problem freeing swapcache, but it was just the accounting that was wrong. Two issues: 1. When __remove_mapping() removes swapcache, __delete_from_swap_cache() relies on memcg_data for the right counts to be updated; but that had already been reset by mem_cgroup_swapout(). Swap those calls around - mem_cgroup_swapout() does not require the swapcached flag to be set. 6.1 commit ac35a4902374 ("mm: multi-gen LRU: minimal implementation") already made a similar swap for workingset_eviction(), but not for this. 2. memcg's "swapcached" count was added for memcg v2 stats, but displayed on OOM even for memcg v1: so mem_cgroup_move_account() ought to move it. Link: https://lkml.kernel.org/r/b8b96ee0-1e1e-85f8-df97-c82a11d7cd14@google.com Fixes: b6038942480e ("mm: memcg: add swapcache stat for memcg v2") Signed-off-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Michal Hocko Cc: Roman Gushchin Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/memcontrol.c | 6 ++++++ mm/vmscan.c | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff65bc23be13..ab457f0394ab 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5767,6 +5767,12 @@ static int mem_cgroup_move_account(struct page *page, } } +#ifdef CONFIG_SWAP + if (folio_test_swapcache(folio)) { + __mod_lruvec_state(from_vec, NR_SWAPCACHE, -nr_pages); + __mod_lruvec_state(to_vec, NR_SWAPCACHE, nr_pages); + } +#endif if (folio_test_writeback(folio)) { __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); diff --git a/mm/vmscan.c b/mm/vmscan.c index 2b42ac9ad755..aba991c505f1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1368,11 +1368,10 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, if (folio_test_swapcache(folio)) { swp_entry_t swap = folio_swap_entry(folio); - /* get a shadow entry before mem_cgroup_swapout() clears folio_memcg() */ if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(folio, target_memcg); - mem_cgroup_swapout(folio, swap); __delete_from_swap_cache(folio, swap, shadow); + mem_cgroup_swapout(folio, swap); xa_unlock_irq(&mapping->i_pages); put_swap_folio(folio, swap); } else { From 6287b7dae80944bfa37784a8f9d6861a4facaa6e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 4 Dec 2022 17:57:07 -0800 Subject: [PATCH 3746/4122] mm,thp,rmap: fix races between updates of subpages_mapcount Commit 4b51634cd16a, introducing the COMPOUND_MAPPED bit, paid attention to the impossibility of subpages_mapcount ever appearing negative; but did not attend to those races in which it can momentarily appear larger than thought possible. These arise from how page_remove_rmap() first decrements page->_mapcount or compound_mapcount, then, if that transition goes negative (logical 0), decrements subpages_mapcount. The initial decrement lets a racing page_add_*_rmap() reincrement _mapcount or compound_mapcount immediately, and then in rare cases its corresponding increment of subpages_mapcount may be completed before page_remove_rmap()'s decrement. There could even (with increasing unlikelihood) be a series of increments intermixed with the decrements. In practice, checking subpages_mapcount with a temporary WARN on range, has caught values of 0x1000000 (2*COMPOUND_MAPPED, when move_pages() was using remove_migration_pmd()) and 0x800201 (do_huge_pmd_wp_page() using __split_huge_pmd()): page_add_anon_rmap() racing page_remove_rmap(), as predicted. I certainly found it harder to reason about than when bit_spin_locked, but the easy case gives a clue to how to handle the harder case. The easy case being the three !(nr & COMPOUND_MAPPED) checks, which should obviously be replaced by (nr < COMPOUND_MAPPED) checks - to count a page as compound mapped, even while the bit in that position is 0. The harder case is when trying to decide how many subpages are newly covered or uncovered, when compound map is first added or last removed: not knowing all that racily happened between first and second atomic ops. But the easy way to handle that, is again to count the page as compound mapped all the while that its subpages_mapcount indicates so - ignoring the _mapcount or compound_mapcount transition while it is on the way to being reversed. Link: https://lkml.kernel.org/r/4388158-3092-a960-ff2d-55f2b0fe4ef8@google.com Fixes: 4b51634cd16a ("mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if PMD-mapped") Signed-off-by: Hugh Dickins Cc: David Hildenbrand Cc: James Houghton Cc: Johannes Weiner Cc: John Hubbard Cc: "Kirill A . Shutemov" Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- mm/rmap.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 459dc1c44d8a..b616870a09be 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1232,7 +1232,7 @@ void page_add_anon_rmap(struct page *page, if (first && PageCompound(page)) { mapped = subpages_mapcount_ptr(compound_head(page)); nr = atomic_inc_return_relaxed(mapped); - nr = !(nr & COMPOUND_MAPPED); + nr = (nr < COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ @@ -1241,8 +1241,16 @@ void page_add_anon_rmap(struct page *page, if (first) { mapped = subpages_mapcount_ptr(page); nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); - nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) { + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + /* Raced ahead of a remove and another add? */ + if (unlikely(nr < 0)) + nr = 0; + } else { + /* Raced ahead of a remove of COMPOUND_MAPPED */ + nr = 0; + } } } @@ -1330,7 +1338,7 @@ void page_add_file_rmap(struct page *page, if (first && PageCompound(page)) { mapped = subpages_mapcount_ptr(compound_head(page)); nr = atomic_inc_return_relaxed(mapped); - nr = !(nr & COMPOUND_MAPPED); + nr = (nr < COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ @@ -1339,8 +1347,16 @@ void page_add_file_rmap(struct page *page, if (first) { mapped = subpages_mapcount_ptr(page); nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); - nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) { + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + /* Raced ahead of a remove and another add? */ + if (unlikely(nr < 0)) + nr = 0; + } else { + /* Raced ahead of a remove of COMPOUND_MAPPED */ + nr = 0; + } } } @@ -1387,7 +1403,7 @@ void page_remove_rmap(struct page *page, if (last && PageCompound(page)) { mapped = subpages_mapcount_ptr(compound_head(page)); nr = atomic_dec_return_relaxed(mapped); - nr = !(nr & COMPOUND_MAPPED); + nr = (nr < COMPOUND_MAPPED); } } else if (PageTransHuge(page)) { /* That test is redundant: it's for safety or to optimize out */ @@ -1396,8 +1412,16 @@ void page_remove_rmap(struct page *page, if (last) { mapped = subpages_mapcount_ptr(page); nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); - nr_pmdmapped = thp_nr_pages(page); - nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + if (likely(nr < COMPOUND_MAPPED)) { + nr_pmdmapped = thp_nr_pages(page); + nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); + /* Raced ahead of another remove and an add? */ + if (unlikely(nr < 0)) + nr = 0; + } else { + /* An add of COMPOUND_MAPPED raced ahead */ + nr = 0; + } } } From a0ac9b3598fac36907bd1baec28c99656d2ed0b6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 Dec 2022 20:37:13 +0100 Subject: [PATCH 3747/4122] mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem Patch series "selftests/vm: fix some tests on 32bit". I finally had the time to run some of the selftests written by me (especially "cow") on x86 PAE. I found some unexpected "surprises" :) With these changes, and with [1] on top of mm-unstable, the "cow" tests and the "ksm_functional_tests" compile and pass as expected (expected failures with hugetlb in the "cow" tests). "madv_populate" has one expected test failure -- x86 does not support softdirty tracking. #1-#3 fix commits with stable commit ids. #4 fixes a test that is not in mm-stable yet. A note that there are many other compile errors/warnings when compiling on 32bit and with older Linux headers ... something for another day. [1] https://lkml.kernel.org/r/20221205150857.167583-1-david@redhat.com This patch (of 4): ... we have to kmap()/kunmap(), otherwise this won't work as expected with highmem. Link: https://lkml.kernel.org/r/20221205193716.276024-1-david@redhat.com Link: https://lkml.kernel.org/r/20221205193716.276024-2-david@redhat.com Fixes: c77369b437f9 ("mm/gup_test: start/stop/read functionality for PIN LONGTERM test") Signed-off-by: David Hildenbrand Cc: Shuah Khan , Cc: Yang Li Signed-off-by: Andrew Morton --- mm/gup_test.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/gup_test.c b/mm/gup_test.c index 0d76d9b4bb5a..33f431e0da60 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "gup_test.h" static void put_back_pages(unsigned int cmd, struct page **pages, @@ -297,10 +298,13 @@ static inline int pin_longterm_test_read(unsigned long arg) return -EFAULT; for (i = 0; i < pin_longterm_test_nr_pages; i++) { - void *addr = page_to_virt(pin_longterm_test_pages[i]); + void *addr = kmap_local_page(pin_longterm_test_pages[i]); + unsigned long ret; - if (copy_to_user((void __user *)(unsigned long)user_addr, addr, - PAGE_SIZE)) + ret = copy_to_user((void __user *)(unsigned long)user_addr, addr, + PAGE_SIZE); + kunmap_local(addr); + if (ret) return -EFAULT; user_addr += PAGE_SIZE; } From d88825f22b8f1cad8acb429b6bf0a54c85d7b93f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 Dec 2022 20:37:14 +0100 Subject: [PATCH 3748/4122] selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions The tests fail to compile in some environments (e.g., Debian 11.5 on x86). Let's simply conditionally define MADV_POPULATE_(READ|WRITE) if not already defined, similar to how the khugepaged.c test handles it. Link: https://lkml.kernel.org/r/20221205193716.276024-3-david@redhat.com Fixes: 39b2e5cae43d ("selftests/vm: make MADV_POPULATE_(READ|WRITE) use in-tree headers") Signed-off-by: David Hildenbrand Cc: Shuah Khan Cc: Yang Li Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/madv_populate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 60547245e479..262eae6b58f2 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -20,6 +20,13 @@ #include "../kselftest.h" #include "vm_util.h" +#ifndef MADV_POPULATE_READ +#define MADV_POPULATE_READ 22 +#endif /* MADV_POPULATE_READ */ +#ifndef MADV_POPULATE_WRITE +#define MADV_POPULATE_WRITE 23 +#endif /* MADV_POPULATE_WRITE */ + /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ From 380969fe5aacdea661d3f9ab32e84327e8624ae2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 Dec 2022 20:37:15 +0100 Subject: [PATCH 3749/4122] selftests/vm: cow: fix compile warning on 32bit The compiler complains about the conversion of a pointer to an int of different width. Link: https://lkml.kernel.org/r/20221205193716.276024-4-david@redhat.com Fixes: 6f1405efc61b ("selftests/vm: anon_cow: add R/O longterm tests via gup_test") Signed-off-by: David Hildenbrand Cc: Shuah Khan Cc: Yang Li Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/cow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c index 73e05b52c49e..26f6ea3079e2 100644 --- a/tools/testing/selftests/vm/cow.c +++ b/tools/testing/selftests/vm/cow.c @@ -650,7 +650,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, } /* Take a R/O pin. This should trigger unsharing. */ - args.addr = (__u64)mem; + args.addr = (__u64)(uintptr_t)mem; args.size = size; args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); @@ -669,7 +669,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, * Read back the content via the pin to the temporary buffer and * test if we observed the modification. */ - tmp_val = (__u64)tmp; + tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); if (ret) ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); From 9d789c3b4170574baa4242dd8cae5988cf97d48d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 Dec 2022 20:37:16 +0100 Subject: [PATCH 3750/4122] selftests/vm: ksm_functional_tests: fixes for 32bit The test currently fails on 32bit. Fixing the "-1ull" vs. "-1ul" seems to make the test pass and the compiler happy. Note: This test is not in mm-stable yet. This fix should be squashed into "selftests/vm: add KSM unmerge tests". Link: https://lkml.kernel.org/r/20221205193716.276024-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Shuah Khan Cc: Yang Li Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/ksm_functional_tests.c | 4 ++-- tools/testing/selftests/vm/vm_util.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c index 96644be68962..b11b7e5115dc 100644 --- a/tools/testing/selftests/vm/ksm_functional_tests.c +++ b/tools/testing/selftests/vm/ksm_functional_tests.c @@ -42,13 +42,13 @@ static bool range_maps_duplicates(char *addr, unsigned long size) for (offs_a = 0; offs_a < size; offs_a += pagesize) { pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a); /* Page not present or PFN not exposed by the kernel. */ - if (pfn_a == -1ull || !pfn_a) + if (pfn_a == -1ul || !pfn_a) continue; for (offs_b = offs_a + pagesize; offs_b < size; offs_b += pagesize) { pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b); - if (pfn_b == -1ull || !pfn_b) + if (pfn_b == -1ul || !pfn_b) continue; if (pfn_a == pfn_b) return true; diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index 710571902743..40e795624ff3 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -50,7 +50,7 @@ unsigned long pagemap_get_pfn(int fd, char *start) /* If present (63th bit), PFN is at bit 0 -- 54. */ if (entry & 0x8000000000000000ull) return entry & 0x007fffffffffffffull; - return -1ull; + return -1ul; } void clear_softdirty(void) From 8614d6c5eda005ad72b37afeaae2879d7c101b18 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 5 Dec 2022 18:30:07 +0100 Subject: [PATCH 3751/4122] mm: do not show fs mm pc for VM_LOCKONFAULT pages When VM_LOCKONFAULT was added, /proc/PID/smaps wasn't hooked up to it, so looking at /proc/PID/smaps, it shows '??' instead of something intelligable. This can be reached by userspace by simply calling `mlock2(..., MLOCK_ONFAULT);`. Fix this by adding "lf" to denote VM_LOCKONFAULT. Link: https://lkml.kernel.org/r/20221205173007.580210-1-Jason@zx2c4.com Fixes: de60f5f10c58 ("mm: introduce VM_LOCKONFAULT") Signed-off-by: Jason A. Donenfeld Acked-by: Vlastimil Babka Cc: Eric B Munson Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 89338950afd3..e35a0398db63 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -674,6 +674,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_RAND_READ)] = "rr", [ilog2(VM_DONTCOPY)] = "dc", [ilog2(VM_DONTEXPAND)] = "de", + [ilog2(VM_LOCKONFAULT)] = "lf", [ilog2(VM_ACCOUNT)] = "ac", [ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_HUGETLB)] = "ht", From de2e5171433126d340573cb7d0d4fcac084ab2a0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 5 Dec 2022 22:03:27 +0800 Subject: [PATCH 3752/4122] mm: add cond_resched() in swapin_walk_pmd_entry() When handling MADV_WILLNEED in madvise(), a soflockup may occurr in swapin_walk_pmd_entry() if swapping in lots of memory on a slow device. Add a cond_resched() to avoid the possible softlockup. Link: https://lkml.kernel.org/r/20221205140327.72304-1-wangkefeng.wang@huawei.com Fixes: 1998cc048901 ("mm: make madvise(MADV_WILLNEED) support swap file prefetch") Signed-off-by: Kefeng Wang Cc: Shaohua Li Cc: Hugh Dickins Cc: Rik van Riel Signed-off-by: Andrew Morton --- mm/madvise.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/madvise.c b/mm/madvise.c index 2573ea3ed684..b7d9b1a1c135 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -223,6 +223,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, put_page(page); } swap_read_unplug(splug); + cond_resched(); return 0; } From 5478afc55a2104caaef5b78c7c1f9acb9ec1f92a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 5 Dec 2022 15:57:40 +0100 Subject: [PATCH 3753/4122] kmsan: fix memcpy tests Recent Clang changes may cause it to delete calls of memcpy(), if the source is an uninitialized volatile local. This happens because passing a pointer to a volatile local into memcpy() discards the volatile qualifier, giving the compiler a free hand to optimize the memcpy() call away. Use OPTIMIZER_HIDE_VAR() to hide the uninitialized var from the too-smart compiler. Link: https://lkml.kernel.org/r/20221205145740.694038-1-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Marco Elver Reviewed-by: Marco Elver Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kmsan/kmsan_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c index 9a29ea2dbfb9..eb44ef3c5f29 100644 --- a/mm/kmsan/kmsan_test.c +++ b/mm/kmsan/kmsan_test.c @@ -419,6 +419,7 @@ static void test_memcpy_aligned_to_aligned(struct kunit *test) kunit_info( test, "memcpy()ing aligned uninit src to aligned dst (UMR report)\n"); + OPTIMIZER_HIDE_VAR(uninit_src); memcpy((void *)&dst, (void *)&uninit_src, sizeof(uninit_src)); kmsan_check_memory((void *)&dst, sizeof(dst)); KUNIT_EXPECT_TRUE(test, report_matches(&expect)); @@ -441,6 +442,7 @@ static void test_memcpy_aligned_to_unaligned(struct kunit *test) kunit_info( test, "memcpy()ing aligned uninit src to unaligned dst (UMR report)\n"); + OPTIMIZER_HIDE_VAR(uninit_src); memcpy((void *)&dst[1], (void *)&uninit_src, sizeof(uninit_src)); kmsan_check_memory((void *)dst, 4); KUNIT_EXPECT_TRUE(test, report_matches(&expect)); @@ -464,6 +466,7 @@ static void test_memcpy_aligned_to_unaligned2(struct kunit *test) kunit_info( test, "memcpy()ing aligned uninit src to unaligned dst - part 2 (UMR report)\n"); + OPTIMIZER_HIDE_VAR(uninit_src); memcpy((void *)&dst[1], (void *)&uninit_src, sizeof(uninit_src)); kmsan_check_memory((void *)&dst[4], sizeof(uninit_src)); KUNIT_EXPECT_TRUE(test, report_matches(&expect)); From c7cdf94e9cd7a03549e61b0f85949959191b8a10 Mon Sep 17 00:00:00 2001 From: Wang Yong Date: Wed, 7 Dec 2022 07:40:11 +0000 Subject: [PATCH 3754/4122] mm: fix typo in struct pglist_data code comment change "stat" to "start". Link: https://lkml.kernel.org/r/20221207074011.GA151242@cloud Fixes: c959924b0dc5 ("memory tiering: adjust hot threshold automatically") Signed-off-by: Wang Yong Reviewed-by: "Huang, Ying" Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5f74891556f3..128f3cde800c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1200,7 +1200,7 @@ typedef struct pglist_data { /* start time in ms of current promote threshold adjustment period */ unsigned int nbp_th_start; /* - * number of promote candidate pages at stat time of current promote + * number of promote candidate pages at start time of current promote * threshold adjustment period */ unsigned long nbp_th_nr_cand; From c47454823bd4e3ab34ed3f795afd4479ab938a3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Dec 2022 11:15:09 -0800 Subject: [PATCH 3755/4122] mm: mmu_gather: allow more than one batch of delayed rmaps Commit 5df397dec7c4 ("mm: delay page_remove_rmap() until after the TLB has been flushed") limited the page batching for the mmu gather operation when a dirty shared page needed to delay rmap removal until after the TLB had been flushed. It did so because it needs to walk that array of pages while still holding the page table lock, and our mmu_gather infrastructure allows for batching quite a lot of pages. We may have thousands on pages queued up for freeing, and we wanted to walk only the last batch if we then added a dirty page to the queue. However, when I limited it to one batch, I didn't think of the degenerate case of the special first batch that is embedded on-stack in the mmu_gather structure (called "local") and that only has eight entries. So with the right pattern, that "limit delayed rmap to just one batch" will trigger over and over in that first small batch, and we'll waste a lot of time flushing TLB's every eight pages. And those right patterns are trivially triggered by just having a shared mappings with lots of adjacent dirty pages. Like the 'page_fault3' subtest of the 'will-it-scale' benchmark, that just maps a shared area, dirties all pages, and unmaps it. Rinse and repeat. We still want to limit the batching, but to fix this (easily triggered) degenerate case, just expand the "only one batch" logic to instead be "only one batch that isn't the special first on-stack ('local') batch". That way, when we need to flush the delayed rmaps, we can still limit our walk to just the last batch - and that first small one. Link: https://lkml.kernel.org/r/CAHk-=whkL5aM1fR7kYUmhHQHBcMUc-bDoFP7EwYjTxy64DGtvw@mail.gmail.com Fixes: 5df397dec7c4 ("mm: delay page_remove_rmap() until after the TLB has been flushed") Signed-off-by: Linus Torvalds Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202212051534.852804af-yujie.liu@intel.com Tested-by: Huang, Ying Tested-by: Hugh Dickins Cc: Feng Tang Cc: Johannes Weiner Cc: Nadav Amit Cc: Xing Zhengjun Cc: "Yin, Fengwei" Signed-off-by: Andrew Morton --- mm/mmu_gather.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 8247553a69c2..2b93cf6ac9ae 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -19,8 +19,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; - /* No more batching if we have delayed rmaps pending */ - if (tlb->delayed_rmap) + /* Limit batching if we have delayed rmaps pending */ + if (tlb->delayed_rmap && tlb->active != &tlb->local) return false; batch = tlb->active; @@ -48,22 +48,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) } #ifdef CONFIG_SMP -/** - * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB - * @tlb: the current mmu_gather - * - * Note that because of how tlb_next_batch() above works, we will - * never start new batches with pending delayed rmaps, so we only - * need to walk through the current active batch. - */ -void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) +static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma) { - struct mmu_gather_batch *batch; - - if (!tlb->delayed_rmap) - return; - - batch = tlb->active; for (int i = 0; i < batch->nr; i++) { struct encoded_page *enc = batch->encoded_pages[i]; @@ -72,7 +58,25 @@ void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) page_remove_rmap(page, vma, false); } } +} +/** + * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB + * @tlb: the current mmu_gather + * + * Note that because of how tlb_next_batch() above works, we will + * never start multiple new batches with pending delayed rmaps, so + * we only need to walk through the current active batch and the + * original local one. + */ +void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (!tlb->delayed_rmap) + return; + + tlb_flush_rmap_batch(&tlb->local, vma); + if (tlb->active != &tlb->local) + tlb_flush_rmap_batch(tlb->active, vma); tlb->delayed_rmap = 0; } #endif From 3e39f7971d75cafe1c90dec60526ad45484657c0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 11 Dec 2022 21:51:23 +0100 Subject: [PATCH 3756/4122] dt-bindings: rtc: m41t80: Convert text schema to YAML one Convert the m41t80 text schema to YAML schema. Add "#clock-cells" requirement, which is required by clock-output-names. Signed-off-by: Marek Vasut Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221211205124.23823-1-marex@denx.de Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/rtc-m41t80.txt | 39 ---------- .../devicetree/bindings/rtc/st,m41t80.yaml | 73 +++++++++++++++++++ 2 files changed, 73 insertions(+), 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-m41t80.txt create mode 100644 Documentation/devicetree/bindings/rtc/st,m41t80.yaml diff --git a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt b/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt deleted file mode 100644 index cdd196b1e9bd..000000000000 --- a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt +++ /dev/null @@ -1,39 +0,0 @@ -ST M41T80 family of RTC and compatible - -Required properties: -- compatible: should be one of: - "st,m41t62", - "st,m41t65", - "st,m41t80", - "st,m41t81", - "st,m41t81s", - "st,m41t82", - "st,m41t83", - "st,m41t84", - "st,m41t85", - "st,m41t87", - "microcrystal,rv4162", -- reg: I2C bus address of the device - -Optional properties: -- interrupts: rtc alarm interrupt. -- clock-output-names: From common clock binding to override the default output - clock name -- wakeup-source: Enables wake up of host system on alarm - -Optional child node: -- clock: Provide this if the square wave pin is used as boot-enabled fixed clock. - -Example: - rtc@68 { - compatible = "st,m41t80"; - reg = <0x68>; - interrupt-parent = <&UIC0>; - interrupts = <0x9 0x8>; - - clock { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - }; - }; diff --git a/Documentation/devicetree/bindings/rtc/st,m41t80.yaml b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml new file mode 100644 index 000000000000..fc9c6da6483f --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/st,m41t80.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ST M41T80 family of RTC and compatible + +maintainers: + - Alexandre Belloni + +properties: + compatible: + enum: + - st,m41t62 + - st,m41t65 + - st,m41t80 + - st,m41t81 + - st,m41t81s + - st,m41t82 + - st,m41t83 + - st,m41t84 + - st,m41t85 + - st,m41t87 + - microcrystal,rv4162 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#clock-cells": + const: 1 + + clock-output-names: + maxItems: 1 + description: From common clock binding to override the default output clock name. + + clock: + type: object + $ref: /schemas/clock/fixed-clock.yaml# + properties: + clock-frequency: + const: 32768 + +allOf: + - $ref: rtc.yaml + +unevaluatedProperties: false + +required: + - compatible + - reg + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + rtc@68 { + compatible = "st,m41t80"; + reg = <0x68>; + interrupt-parent = <&UIC0>; + interrupts = <0x9 0x8>; + + clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + }; + }; From 462e768b55a2331324ff72e74706261134369826 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2022 18:56:09 +0300 Subject: [PATCH 3757/4122] iommu/mediatek: Fix forever loop in error handling There is a typo so this loop does i++ where i-- was intended. It will result in looping until the kernel crashes. Fixes: 26593928564c ("iommu/mediatek: Add error path for loop of mm_dts_parse") Signed-off-by: Dan Carpenter Reviewed-by: Yong Wu Link: https://lore.kernel.org/r/Y5C3mTam2nkbaz6o@kili Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 392b8c167c44..e9b3b794811d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1159,8 +1159,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return 0; err_larbdev_put: - /* id may be not linear mapping, loop whole the array */ - for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) { + for (i = MTK_LARB_NR_MAX - 1; i >= 0; i--) { if (!data->larb_imu[i].dev) continue; put_device(data->larb_imu[i].dev); From 3bc8edc98bd43540dbe648e4ef91f443d6d20a24 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Mon, 12 Dec 2022 13:11:06 +0200 Subject: [PATCH 3758/4122] nfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure On error situation `clp->cl_cb_conn.cb_xprt` should not be given a reference to the xprt otherwise both client cleanup and the error handling path of the caller call to put it. Better to delay handing over the reference to a later branch. [ 72.530665] refcount_t: underflow; use-after-free. [ 72.531933] WARNING: CPU: 0 PID: 173 at lib/refcount.c:28 refcount_warn_saturate+0xcf/0x120 [ 72.533075] Modules linked in: nfsd(OE) nfsv4(OE) nfsv3(OE) nfs(OE) lockd(OE) compat_nfs_ssc(OE) nfs_acl(OE) rpcsec_gss_krb5(OE) auth_rpcgss(OE) rpcrdma(OE) dns_resolver fscache netfs grace rdma_cm iw_cm ib_cm sunrpc(OE) mlx5_ib mlx5_core mlxfw pci_hyperv_intf ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nft_counter xt_addrtype nft_compat br_netfilter bridge stp llc nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set overlay nf_tables nfnetlink crct10dif_pclmul crc32_pclmul ghash_clmulni_intel xfs serio_raw virtio_net virtio_blk net_failover failover fuse [last unloaded: sunrpc] [ 72.540389] CPU: 0 PID: 173 Comm: kworker/u16:5 Tainted: G OE 5.15.82-dan #1 [ 72.541511] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.16.0-3.module+el8.7.0+1084+97b81f61 04/01/2014 [ 72.542717] Workqueue: nfsd4_callbacks nfsd4_run_cb_work [nfsd] [ 72.543575] RIP: 0010:refcount_warn_saturate+0xcf/0x120 [ 72.544299] Code: 55 00 0f 0b 5d e9 01 50 98 00 80 3d 75 9e 39 08 00 0f 85 74 ff ff ff 48 c7 c7 e8 d1 60 8e c6 05 61 9e 39 08 01 e8 f6 51 55 00 <0f> 0b 5d e9 d9 4f 98 00 80 3d 4b 9e 39 08 00 0f 85 4c ff ff ff 48 [ 72.546666] RSP: 0018:ffffb3f841157cf0 EFLAGS: 00010286 [ 72.547393] RAX: 0000000000000026 RBX: ffff89ac6231d478 RCX: 0000000000000000 [ 72.548324] RDX: ffff89adb7c2c2c0 RSI: ffff89adb7c205c0 RDI: ffff89adb7c205c0 [ 72.549271] RBP: ffffb3f841157cf0 R08: 0000000000000000 R09: c0000000ffefffff [ 72.550209] R10: 0000000000000001 R11: ffffb3f841157ad0 R12: ffff89ac6231d180 [ 72.551142] R13: ffff89ac6231d478 R14: ffff89ac40c06180 R15: ffff89ac6231d4b0 [ 72.552089] FS: 0000000000000000(0000) GS:ffff89adb7c00000(0000) knlGS:0000000000000000 [ 72.553175] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.553934] CR2: 0000563a310506a8 CR3: 0000000109a66000 CR4: 0000000000350ef0 [ 72.554874] Call Trace: [ 72.555278] [ 72.555614] svc_xprt_put+0xaf/0xe0 [sunrpc] [ 72.556276] nfsd4_process_cb_update.isra.11+0xb7/0x410 [nfsd] [ 72.557087] ? update_load_avg+0x82/0x610 [ 72.557652] ? cpuacct_charge+0x60/0x70 [ 72.558212] ? dequeue_entity+0xdb/0x3e0 [ 72.558765] ? queued_spin_unlock+0x9/0x20 [ 72.559358] nfsd4_run_cb_work+0xfc/0x270 [nfsd] [ 72.560031] process_one_work+0x1df/0x390 [ 72.560600] worker_thread+0x37/0x3b0 [ 72.561644] ? process_one_work+0x390/0x390 [ 72.562247] kthread+0x12f/0x150 [ 72.562710] ? set_kthread_struct+0x50/0x50 [ 72.563309] ret_from_fork+0x22/0x30 [ 72.563818] [ 72.564189] ---[ end trace 031117b1c72ec616 ]--- [ 72.566019] list_add corruption. next->prev should be prev (ffff89ac4977e538), but was ffff89ac4763e018. (next=ffff89ac4763e018). [ 72.567647] ------------[ cut here ]------------ Fixes: a4abc6b12eb1 ("nfsd: Fix svc_xprt refcnt leak when setup callback client failed") Cc: Xiyu Yang Cc: J. Bruce Fields Signed-off-by: Dan Aloni Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1b57f2c2f0bb..905d66acf6ab 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -988,7 +988,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -1008,6 +1007,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c rpc_shutdown_client(client); return -ENOMEM; } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; rcu_read_lock(); From c1ac03af6ed45d05786c219d102f37eb44880f28 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 29 Nov 2022 19:30:09 +0800 Subject: [PATCH 3759/4122] tracing: Fix infinite loop in tracing_read_pipe on overflowed print_trace_line print_trace_line may overflow seq_file buffer. If the event is not consumed, the while loop keeps peeking this event, causing a infinite loop. Link: https://lkml.kernel.org/r/20221129113009.182425-1-yangjihong1@huawei.com Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 088b1e427dbba ("ftrace: pipe fixes") Signed-off-by: Yang Jihong Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 664619b3f1e1..548890c7c0f5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6802,7 +6802,20 @@ waitagain: ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { - /* don't print partial lines */ + /* + * If one print_trace_line() fills entire trace_seq in one shot, + * trace_seq_to_user() will returns -EBUSY because save_len == 0, + * In this case, we need to consume it, otherwise, loop will peek + * this event next time, resulting in an infinite loop. + */ + if (save_len == 0) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + trace_consume(iter); + break; + } + + /* In other cases, don't print partial lines */ iter->seq.seq.len = save_len; break; } From 3e12758392bee50135301b0189c064ab80980aca Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 12 Dec 2022 10:37:03 -0500 Subject: [PATCH 3760/4122] x86/mm/kmmio: Remove redundant preempt_disable() Now that kmmio uses rcu_read_lock_sched_notrace() there's no reason to call preempt_disable() as the read_lock_sched_notrace() already does that and is redundant. This also removes the preempt_enable_no_resched() as the "no_resched()" portion was bogus as there's no reason to do that. Link: https://lkml.kernel.org/r/20221212103703.7129cc5d@gandalf.local.home Cc: Masami Hiramatsu Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: "x86@kernel.org" Cc: Karol Herbst Cc: Pekka Paalanen Cc: Dave Hansen Cc: Andy Lutomirski Cc: Ingo Molnar Signed-off-by: Steven Rostedt (Google) --- arch/x86/mm/kmmio.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 853c49877c16..9f82019179e1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -246,14 +246,13 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) page_base &= page_level_mask(l); /* - * Preemption is now disabled to prevent process switch during - * single stepping. We can only handle one active kmmio trace + * Hold the RCU read lock over single stepping to avoid looking + * up the probe and kmmio_fault_page again. The rcu_read_lock_sched() + * also disables preemption and prevents process switch during + * the single stepping. We can only handle one active kmmio trace * per cpu, so ensure that we finish it before something else - * gets to run. We also hold the RCU read lock over single - * stepping to avoid looking up the probe and kmmio_fault_page - * again. + * gets to run. */ - preempt_disable(); rcu_read_lock_sched_notrace(); faultpage = get_kmmio_fault_page(page_base); @@ -324,7 +323,6 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) no_kmmio: rcu_read_unlock_sched_notrace(); - preempt_enable_no_resched(); return ret; } @@ -364,7 +362,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) ctx->active--; BUG_ON(ctx->active); rcu_read_unlock_sched_notrace(); - preempt_enable_no_resched(); /* * if somebody else is singlestepping across a probe point, flags From b003b3b77d65133a0011ae3b7b255347438c12f6 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Nov 2022 18:35:14 -0800 Subject: [PATCH 3761/4122] RISC-V: Align the shadow stack The standard RISC-V ABIs all require 16-byte stack alignment. We're only calling that one function on the shadow stack so I doubt it'd result in a real issue, but might as well keep this lined up. Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") Reviewed-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221130023515.20217-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index be54ccea8c47..acdfcacd7e57 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -206,7 +206,7 @@ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used * to get per-cpu overflow stack(get_overflow_stack). */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)]; +long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); asmlinkage unsigned long get_overflow_stack(void) { return (unsigned long)this_cpu_ptr(overflow_stack) + From de57ecc476103179e93fd85091770921f76a19af Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Nov 2022 18:35:15 -0800 Subject: [PATCH 3762/4122] RISC-V: Add some comments about the shadow and overflow stacks It took me a while to page all this back in when trying to review the recent spin_shadow_stack, so I figured I'd just write up some comments. Reviewed-by: Guo Ren Reviewed-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221130023515.20217-2-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index acdfcacd7e57..336d4aadadb1 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -200,18 +200,18 @@ void __init trap_init(void) } #ifdef CONFIG_VMAP_STACK +/* + * Extra stack space that allows us to provide panic messages when the kernel + * has overflowed its stack. + */ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); /* - * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used - * to get per-cpu overflow stack(get_overflow_stack). + * A temporary stack for use by handle_kernel_stack_overflow. This is used so + * we can call into C code to get the per-hart overflow stack. Usage of this + * stack must be protected by spin_shadow_stack. */ long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} /* * A pseudo spinlock to protect the shadow stack from being used by multiple @@ -222,6 +222,12 @@ asmlinkage unsigned long get_overflow_stack(void) */ unsigned long spin_shadow_stack; +asmlinkage unsigned long get_overflow_stack(void) +{ + return (unsigned long)this_cpu_ptr(overflow_stack) + + OVERFLOW_STACK_SIZE; +} + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; From e4b731ccb0975fd97283e0c0d9841a89063ec31a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 18 Oct 2022 09:03:29 +0800 Subject: [PATCH 3763/4122] ceph: remove useless session parameter for check_caps() The session parameter makes no sense any more. Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 2 +- fs/ceph/caps.c | 23 +++++++++-------------- fs/ceph/file.c | 17 +++++++---------- fs/ceph/inode.c | 6 +++--- fs/ceph/ioctl.c | 2 +- fs/ceph/super.h | 3 +-- 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index dcf701b05cc1..ff6e3c279a79 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1367,7 +1367,7 @@ out: folio_put(folio); if (check_cap) - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); + ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY); return copied; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e54814d0c2f7..1323fa28ab01 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1898,8 +1898,7 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. */ -void ceph_check_caps(struct ceph_inode_info *ci, int flags, - struct ceph_mds_session *session) +void ceph_check_caps(struct ceph_inode_info *ci, int flags) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); @@ -1913,15 +1912,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, bool queue_invalidate = false; bool tried_invalidate = false; bool queue_writeback = false; - - if (session) - ceph_get_mds_session(session); + struct ceph_mds_session *session = NULL; spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { /* Don't send messages until we get async create reply */ spin_unlock(&ci->i_ceph_lock); - ceph_put_mds_session(session); return; } @@ -2851,7 +2847,7 @@ static void check_max_size(struct inode *inode, loff_t endoff) check = 1; spin_unlock(&ci->i_ceph_lock); if (check) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); + ceph_check_caps(ci, CHECK_CAPS_AUTHONLY); } static inline int get_used_fmode(int caps) @@ -3140,7 +3136,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, switch (mode) { case PUT_CAP_REFS_SYNC: if (last) - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); else if (flushsnaps) ceph_flush_snaps(ci, NULL); break; @@ -3255,7 +3251,7 @@ unlock: spin_unlock(&ci->i_ceph_lock); if (last) { - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); } else if (flush_snaps) { ceph_flush_snaps(ci, NULL); } @@ -3604,10 +3600,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL, - session); + ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session); + ceph_check_caps(ci, CHECK_CAPS_NOINVAL); } /* @@ -4333,7 +4328,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) if (inode) { spin_unlock(&mdsc->cap_delay_lock); dout("check_delayed_caps on %p\n", inode); - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); iput(inode); spin_lock(&mdsc->cap_delay_lock); } @@ -4362,7 +4357,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s) dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode)); spin_unlock(&mdsc->cap_dirty_lock); ceph_wait_on_async_create(inode); - ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL); + ceph_check_caps(ci, CHECK_CAPS_FLUSH); iput(inode); spin_lock(&mdsc->cap_dirty_lock); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 04fd34557de8..4e68220bc06d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -313,7 +313,7 @@ int ceph_renew_caps(struct inode *inode, int fmode) spin_unlock(&ci->i_ceph_lock); dout("renew caps %p want %s issued %s updating mds_wanted\n", inode, ceph_cap_string(wanted), ceph_cap_string(issued)); - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); return 0; } spin_unlock(&ci->i_ceph_lock); @@ -408,7 +408,7 @@ int ceph_open(struct inode *inode, struct file *file) if ((issued & wanted) != wanted && (mds_wanted & wanted) != wanted && ceph_snap(inode) != CEPH_SNAPDIR) - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); return ceph_init_file(inode, file, fmode); } else if (ceph_snap(inode) != CEPH_NOSNAP && @@ -1092,7 +1092,7 @@ static void ceph_aio_complete(struct inode *inode, loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len; if (endoff > i_size_read(inode)) { if (ceph_inode_set_size(inode, endoff)) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); + ceph_check_caps(ci, CHECK_CAPS_AUTHONLY); } spin_lock(&ci->i_ceph_lock); @@ -1421,8 +1421,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (write && pos > size) { if (ceph_inode_set_size(inode, pos)) ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_AUTHONLY, - NULL); + CHECK_CAPS_AUTHONLY); } } @@ -1577,8 +1576,7 @@ out: check_caps = ceph_inode_set_size(inode, pos); if (check_caps) ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_AUTHONLY, - NULL); + CHECK_CAPS_AUTHONLY); } } @@ -1906,7 +1904,7 @@ retry_snap: if (dirty) __mark_inode_dirty(inode, dirty); if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos)) - ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL); + ceph_check_caps(ci, CHECK_CAPS_FLUSH); } dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", @@ -2521,8 +2519,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, /* Let the MDS know about dst file size change */ if (ceph_inode_set_size(dst_inode, dst_off) || ceph_quota_is_max_bytes_approaching(dst_inode, dst_off)) - ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_FLUSH, - NULL); + ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_FLUSH); } /* Mark Fw dirty */ spin_lock(&dst_ci->i_ceph_lock); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bad9eeb6a1a5..12173c00129f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1909,7 +1909,7 @@ static void ceph_do_invalidate_pages(struct inode *inode) mutex_unlock(&ci->i_truncate_mutex); out: if (check) - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); } /* @@ -1969,7 +1969,7 @@ retry: mutex_unlock(&ci->i_truncate_mutex); if (wrbuffer_refs == 0) - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); wake_up_all(&ci->i_cap_wq); } @@ -1991,7 +1991,7 @@ static void ceph_inode_work(struct work_struct *work) __ceph_do_pending_vmtruncate(inode); if (test_and_clear_bit(CEPH_I_WORK_CHECK_CAPS, &ci->i_work_mask)) - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); if (test_and_clear_bit(CEPH_I_WORK_FLUSH_SNAPS, &ci->i_work_mask)) ceph_flush_snaps(ci, NULL); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 6e061bf62ad4..deac817647eb 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -253,7 +253,7 @@ static long ceph_ioctl_lazyio(struct file *file) spin_unlock(&ci->i_ceph_lock); dout("ioctl_layzio: file %p marked lazy\n", file); - ceph_check_caps(ci, 0, NULL); + ceph_check_caps(ci, 0); } else { dout("ioctl_layzio: file %p already lazy\n", file); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 40630e6f691c..e8bc1d0d2614 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1200,8 +1200,7 @@ extern void ceph_remove_capsnap(struct inode *inode, extern void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession); extern bool __ceph_should_report_size(struct ceph_inode_info *ci); -extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, - struct ceph_mds_session *session); +extern void ceph_check_caps(struct ceph_inode_info *ci, int flags); extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_drop_caps_for_unlink(struct inode *inode); From 68c62bee9d081cf815310b3a96e38d94fc16007d Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 17 Oct 2022 22:17:35 +0800 Subject: [PATCH 3764/4122] ceph: try to check caps immediately after async creating finishes We should call the check_caps() again immediately after the async creating finishes in case the MDS is waiting for caps revocation to finish. Link: https://tracker.ceph.com/issues/46904 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 ++ fs/ceph/file.c | 9 +++++++++ fs/ceph/super.h | 2 ++ 3 files changed, 13 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1323fa28ab01..4b159f97fe7b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1916,6 +1916,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { + ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS; + /* Don't send messages until we get async create reply */ spin_unlock(&ci->i_ceph_lock); return; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4e68220bc06d..a6681b12e280 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -534,14 +534,23 @@ static void wake_async_create_waiters(struct inode *inode, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); + bool check_cap = false; spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE; wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT); + + if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) { + ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS; + check_cap = true; + } } ceph_kick_flushing_inode_caps(session, ci); spin_unlock(&ci->i_ceph_lock); + + if (check_cap) + ceph_check_caps(ci, CHECK_CAPS_FLUSH); } static void ceph_async_create_cb(struct ceph_mds_client *mdsc, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e8bc1d0d2614..e1bd6f487226 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -593,6 +593,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ +#define CEPH_I_ASYNC_CHECK_CAPS (1 << 14) /* check caps immediately after async + creating finishes */ /* * Masks of ceph inode work. From c19204cbd65c12fdcd34fb8f5d645007238ed5cd Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 8 Dec 2022 16:11:00 -0600 Subject: [PATCH 3765/4122] cifs: minor cleanup of some headers checkpatch showed formatting problems with extra spaces, and extra semicolon and some missing blank lines in some cifs headers. Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Germano Percossi Signed-off-by: Steve French --- fs/cifs/cifs_ioctl.h | 2 +- fs/cifs/cifsfs.h | 4 ++-- fs/cifs/cifsglob.h | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index d86d78d5bfdc..332588e77c31 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -108,7 +108,7 @@ struct smb3_notify_info { #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info) -#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32) +#define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* * Flags for going down operation diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 388b745a978e..00a573e0ad0e 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -105,8 +105,8 @@ extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); extern int cifs_flush(struct file *, fl_owner_t id); -extern int cifs_file_mmap(struct file * , struct vm_area_struct *); -extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); +extern int cifs_file_mmap(struct file *file, struct vm_area_struct *vma); +extern int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, struct dir_context *ctx); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1420acf987f0..cd3a173e65b1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -785,6 +785,7 @@ static inline unsigned int in_flight(struct TCP_Server_Info *server) { unsigned int num; + spin_lock(&server->req_lock); num = server->in_flight; spin_unlock(&server->req_lock); @@ -795,6 +796,7 @@ static inline bool has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; + spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); @@ -1025,7 +1027,7 @@ struct cifs_ses { struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */ - unsigned overrideSecFlg; /* if non-zero override global sec flags */ + unsigned int overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ @@ -1381,7 +1383,7 @@ struct cifsFileInfo { __u32 pid; /* process id who opened file */ struct cifs_fid fid; /* file id from remote */ struct list_head rlist; /* reconnect list */ - /* BB add lock scope info here if needed */ ; + /* BB add lock scope info here if needed */ /* lock scope id (0 if none) */ struct dentry *dentry; struct tcon_link *tlink; @@ -1769,6 +1771,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, int number_of_items) { int i; + if ((number_of_items == 0) || (param == NULL)) return; for (i = 0; i < number_of_items; i++) { From 9544597b5b63ff1674d60e069f93555ab924b62b Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 9 Dec 2022 16:55:51 -0600 Subject: [PATCH 3766/4122] cifs: fix various whitespace errors in headers Fix some extra spaces and a few comments that were unnecessarily split over two lines. These were some trivial issues pointed out by checkpatch) Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 50 +++++++++++++++++++-------------------------- fs/cifs/cifsproto.h | 2 +- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index d1abaeea974a..623caece2b10 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1429,7 +1429,7 @@ typedef struct smb_com_transaction_change_notify_req { __u8 WatchTree; /* 1 = Monitor subdirectories */ __u8 Reserved2; __le16 ByteCount; -/* __u8 Pad[3];*/ +/* __u8 Pad[3];*/ /* __u8 Data[1];*/ } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; @@ -1752,8 +1752,7 @@ struct smb_com_transaction2_sfi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ } __attribute__((packed)); struct smb_t2_qfi_req { @@ -1768,8 +1767,7 @@ struct smb_t2_qfi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ } __attribute__((packed)); /* @@ -2146,13 +2144,11 @@ typedef struct { #define CIFS_UNIX_POSIX_PATH_OPS_CAP 0x00000020 /* Allow new POSIX path based calls including posix open and posix unlink */ -#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up - to 0xFFFF00 */ +#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up to 0xFFFF00 */ #define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */ #define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ -#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and - QFS PROXY call */ +#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and QFS PROXY call */ #ifdef CONFIG_CIFS_POSIX /* presumably don't need the 0x20 POSIX_PATH_OPS_CAP since we never send LockingX instead of posix locking call on unix sess (and we do not expect @@ -2368,8 +2364,7 @@ typedef struct { struct file_allocation_info { __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */ -} __attribute__((packed)); /* size used on disk, for level 0x103 for set, - 0x105 for query */ +} __packed; /* size used on disk, for level 0x103 for set, 0x105 for query */ struct file_end_of_file_info { __le64 FileSize; /* offset to end of file */ @@ -2409,8 +2404,7 @@ struct cifs_posix_acl { /* access conrol list (ACL) */ __le16 access_entry_count; /* access ACL - count of entries */ __le16 default_entry_count; /* default ACL - count of entries */ struct cifs_posix_ace ace_array[]; - /* followed by - struct cifs_posix_ace default_ace_arraay[] */ + /* followed by struct cifs_posix_ace default_ace_array[] */ } __attribute__((packed)); /* level 0x204 */ /* types of access control entries already defined in posix_acl.h */ @@ -2429,17 +2423,17 @@ struct cifs_posix_acl { /* access conrol list (ACL) */ /* end of POSIX ACL definitions */ /* POSIX Open Flags */ -#define SMB_O_RDONLY 0x1 -#define SMB_O_WRONLY 0x2 -#define SMB_O_RDWR 0x4 -#define SMB_O_CREAT 0x10 -#define SMB_O_EXCL 0x20 -#define SMB_O_TRUNC 0x40 -#define SMB_O_APPEND 0x80 -#define SMB_O_SYNC 0x100 -#define SMB_O_DIRECTORY 0x200 -#define SMB_O_NOFOLLOW 0x400 -#define SMB_O_DIRECT 0x800 +#define SMB_O_RDONLY 0x1 +#define SMB_O_WRONLY 0x2 +#define SMB_O_RDWR 0x4 +#define SMB_O_CREAT 0x10 +#define SMB_O_EXCL 0x20 +#define SMB_O_TRUNC 0x40 +#define SMB_O_APPEND 0x80 +#define SMB_O_SYNC 0x100 +#define SMB_O_DIRECTORY 0x200 +#define SMB_O_NOFOLLOW 0x400 +#define SMB_O_DIRECT 0x800 typedef struct { __le32 OpenFlags; /* same as NT CreateX */ @@ -2716,15 +2710,13 @@ typedef struct file_xattr_info { __u32 xattr_value_len; char xattr_name[]; /* followed by xattr_value[xattr_value_len], no pad */ -} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info - level 0x205 */ +} __packed FILE_XATTR_INFO; /* extended attribute info level 0x205 */ /* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */ typedef struct file_chattr_info { __le64 mask; /* list of all possible attribute bits */ __le64 mode; /* list of actual attribute bits on this inode */ -} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes - (chattr, chflags) level 0x206 */ -#endif /* POSIX */ +} __packed FILE_CHATTR_INFO; /* ext attributes (chattr, chflags) level 0x206 */ +#endif /* POSIX */ #endif /* _CIFSPDU_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 83e83d8beabb..f216fa269c85 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -124,7 +124,7 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec * /* resp vec */); extern int SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *ptcon, - struct smb_hdr *in_buf , + struct smb_hdr *in_buf, struct smb_hdr *out_buf, int *bytes_returned); void From 2bfd81043e944af0e52835ef6d9b41795af22341 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 11 Dec 2022 13:54:21 -0600 Subject: [PATCH 3767/4122] cifs: fix missing display of three mount options Three mount options: "tcpnodelay" and "noautotune" and "noblocksend" were not displayed when passed in on cifs/smb3 mounts (e.g. displayed in /proc/mounts e.g.). No change to defaults so these are not displayed if not specified on mount. Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 712a43161448..6094cb2ff099 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -678,9 +678,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); - /* Only display max_credits if it was overridden on mount */ + /* Only display the following if overridden on mount */ if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); + if (tcon->ses->server->tcp_nodelay) + seq_puts(s, ",tcpnodelay"); + if (tcon->ses->server->noautotune) + seq_puts(s, ",noautotune"); + if (tcon->ses->server->noblocksnd) + seq_puts(s, ",noblocksend"); if (tcon->snapshot_time) seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); From 9d91f8108ebfed54284332e04d2073107df18794 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 11 Dec 2022 14:44:31 -0600 Subject: [PATCH 3768/4122] cifs: print warning when conflicting soft vs. hard mount options specified If the user specifies conflicting hard vs. soft mount options (or nosoft vs. nohard) print a warning to dmesg We were missing a warning when a user e.g. mounted with both "hard,soft" mount options. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/fs_context.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 45119597c765..2c92a821e028 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -884,16 +884,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->nodfs = 1; break; case Opt_hard: - if (result.negated) + if (result.negated) { + if (ctx->retry == 1) + cifs_dbg(VFS, "conflicting hard vs. soft mount options\n"); ctx->retry = 0; - else + } else ctx->retry = 1; break; case Opt_soft: if (result.negated) ctx->retry = 1; - else + else { + if (ctx->retry == 1) + cifs_dbg(VFS, "conflicting hard vs soft mount options\n"); ctx->retry = 0; + } break; case Opt_mapposix: if (result.negated) From f7f291e14dde32a07b1f0aa06921d28f875a7b54 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 11 Dec 2022 18:18:55 -0300 Subject: [PATCH 3769/4122] cifs: fix oops during encryption When running xfstests against Azure the following oops occurred on an arm64 system Unable to handle kernel write to read-only memory at virtual address ffff0001221cf000 Mem abort info: ESR = 0x9600004f EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x0f: level 3 permission fault Data abort info: ISV = 0, ISS = 0x0000004f CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000294f3000 [ffff0001221cf000] pgd=18000001ffff8003, p4d=18000001ffff8003, pud=18000001ff82e003, pmd=18000001ff71d003, pte=00600001221cf787 Internal error: Oops: 9600004f [#1] PREEMPT SMP ... pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) pc : __memcpy+0x40/0x230 lr : scatterwalk_copychunks+0xe0/0x200 sp : ffff800014e92de0 x29: ffff800014e92de0 x28: ffff000114f9de80 x27: 0000000000000008 x26: 0000000000000008 x25: ffff800014e92e78 x24: 0000000000000008 x23: 0000000000000001 x22: 0000040000000000 x21: ffff000000000000 x20: 0000000000000001 x19: ffff0001037c4488 x18: 0000000000000014 x17: 235e1c0d6efa9661 x16: a435f9576b6edd6c x15: 0000000000000058 x14: 0000000000000001 x13: 0000000000000008 x12: ffff000114f2e590 x11: ffffffffffffffff x10: 0000040000000000 x9 : ffff8000105c3580 x8 : 2e9413b10000001a x7 : 534b4410fb86b005 x6 : 534b4410fb86b005 x5 : ffff0001221cf008 x4 : ffff0001037c4490 x3 : 0000000000000001 x2 : 0000000000000008 x1 : ffff0001037c4488 x0 : ffff0001221cf000 Call trace: __memcpy+0x40/0x230 scatterwalk_map_and_copy+0x98/0x100 crypto_ccm_encrypt+0x150/0x180 crypto_aead_encrypt+0x2c/0x40 crypt_message+0x750/0x880 smb3_init_transform_rq+0x298/0x340 smb_send_rqst.part.11+0xd8/0x180 smb_send_rqst+0x3c/0x100 compound_send_recv+0x534/0xbc0 smb2_query_info_compound+0x32c/0x440 smb2_set_ea+0x438/0x4c0 cifs_xattr_set+0x5d4/0x7c0 This is because in scatterwalk_copychunks(), we attempted to write to a buffer (@sign) that was allocated in the stack (vmalloc area) by crypt_message() and thus accessing its remaining 8 (x2) bytes ended up crossing a page boundary. To simply fix it, we could just pass @sign kmalloc'd from crypt_message() and then we're done. Luckily, we don't seem to pass any other vmalloc'd buffers in smb_rqst::rq_iov... Instead, let's map the correct pages and offsets from vmalloc buffers as well in cifs_sg_set_buf() and then avoiding such oopses. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 68 ++++++++++++++++++++ fs/cifs/cifsproto.h | 4 +- fs/cifs/misc.c | 4 +- fs/cifs/smb2ops.c | 147 +++++++++++++++++++++----------------------- 4 files changed, 142 insertions(+), 81 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cd3a173e65b1..703685e2db5e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -2140,4 +2142,70 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const dst->FileNameLength = src->FileNameLength; } +static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, + int num_rqst, + const u8 *sig) +{ + unsigned int len, skip; + unsigned int nents = 0; + unsigned long addr; + int i, j; + + /* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted + */ + for (i = 0; i < num_rqst; i++) { + /* + * The first rqst has a transform header where the + * first 20 bytes are not part of the encrypted blob. + */ + for (j = 0; j < rqst[i].rq_nvec; j++) { + struct kvec *iov = &rqst[i].rq_iov[j]; + + skip = (i == 0) && (j == 0) ? 20 : 0; + addr = (unsigned long)iov->iov_base + skip; + if (unlikely(is_vmalloc_addr((void *)addr))) { + len = iov->iov_len - skip; + nents += DIV_ROUND_UP(offset_in_page(addr) + len, + PAGE_SIZE); + } else { + nents++; + } + } + nents += rqst[i].rq_npages; + } + nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); + return nents; +} + +/* We can not use the normal sg_set_buf() as we will sometimes pass a + * stack object as buf. + */ +static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, + const void *buf, + unsigned int buflen) +{ + unsigned long addr = (unsigned long)buf; + unsigned int off = offset_in_page(addr); + + addr &= PAGE_MASK; + if (unlikely(is_vmalloc_addr((void *)addr))) { + do { + unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); + + sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off); + + off = 0; + addr += PAGE_SIZE; + buflen -= len; + } while (buflen); + } else { + sg_set_page(sg++, virt_to_page(addr), buflen, off); + } + return sg; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f216fa269c85..9c6147ca029d 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -600,8 +600,8 @@ int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); -extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, - unsigned int *len, unsigned int *offset); +void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, + unsigned int *len, unsigned int *offset); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 3e68d8208cf5..1cbecd64d697 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1136,8 +1136,8 @@ cifs_free_hash(struct shash_desc **sdesc) * @len: Where to store the length for this page: * @offset: Where to store the offset for this page */ -void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, - unsigned int *len, unsigned int *offset) +void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, + unsigned int *len, unsigned int *offset) { *len = rqst->rq_pagesz; *offset = (page == 0) ? rqst->rq_offset : 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 72b22d033ed5..6e772b31e02a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4204,69 +4204,82 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8); } -/* We can not use the normal sg_set_buf() as we will sometimes pass a - * stack object as buf. - */ -static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, - unsigned int buflen) +static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst, + int num_rqst, const u8 *sig, u8 **iv, + struct aead_request **req, struct scatterlist **sgl, + unsigned int *num_sgs) { - void *addr; - /* - * VMAP_STACK (at least) puts stack into the vmalloc address space - */ - if (is_vmalloc_addr(buf)) - addr = vmalloc_to_page(buf); - else - addr = virt_to_page(buf); - sg_set_page(sg, addr, buflen, offset_in_page(buf)); -} + unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm); + unsigned int iv_size = crypto_aead_ivsize(tfm); + unsigned int len; + u8 *p; -/* Assumes the first rqst has a transform header as the first iov. - * I.e. - * rqst[0].rq_iov[0] is transform header - * rqst[0].rq_iov[1+] data to be encrypted/decrypted - * rqst[1+].rq_iov[0+] data to be encrypted/decrypted - */ -static struct scatterlist * -init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) -{ - unsigned int sg_len; - struct scatterlist *sg; - unsigned int i; - unsigned int j; - unsigned int idx = 0; - int skip; + *num_sgs = cifs_get_num_sgs(rqst, num_rqst, sig); - sg_len = 1; - for (i = 0; i < num_rqst; i++) - sg_len += rqst[i].rq_nvec + rqst[i].rq_npages; + len = iv_size; + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + len += req_size; + len = ALIGN(len, __alignof__(struct scatterlist)); + len += *num_sgs * sizeof(**sgl); - sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL); - if (!sg) + p = kmalloc(len, GFP_ATOMIC); + if (!p) return NULL; - sg_init_table(sg, sg_len); + *iv = (u8 *)PTR_ALIGN(p, crypto_aead_alignmask(tfm) + 1); + *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, + crypto_tfm_ctx_alignment()); + *sgl = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, + __alignof__(struct scatterlist)); + return p; +} + +static void *smb2_get_aead_req(struct crypto_aead *tfm, const struct smb_rqst *rqst, + int num_rqst, const u8 *sig, u8 **iv, + struct aead_request **req, struct scatterlist **sgl) +{ + unsigned int off, len, skip; + struct scatterlist *sg; + unsigned int num_sgs; + unsigned long addr; + int i, j; + void *p; + + p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, sgl, &num_sgs); + if (!p) + return NULL; + + sg_init_table(*sgl, num_sgs); + sg = *sgl; + + /* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted + */ for (i = 0; i < num_rqst; i++) { + /* + * The first rqst has a transform header where the + * first 20 bytes are not part of the encrypted blob. + */ for (j = 0; j < rqst[i].rq_nvec; j++) { - /* - * The first rqst has a transform header where the - * first 20 bytes are not part of the encrypted blob - */ + struct kvec *iov = &rqst[i].rq_iov[j]; + skip = (i == 0) && (j == 0) ? 20 : 0; - smb2_sg_set_buf(&sg[idx++], - rqst[i].rq_iov[j].iov_base + skip, - rqst[i].rq_iov[j].iov_len - skip); - } - + addr = (unsigned long)iov->iov_base + skip; + len = iov->iov_len - skip; + sg = cifs_sg_set_buf(sg, (void *)addr, len); + } for (j = 0; j < rqst[i].rq_npages; j++) { - unsigned int len, offset; - - rqst_page_get_length(&rqst[i], j, &len, &offset); - sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset); + rqst_page_get_length(&rqst[i], j, &len, &off); + sg_set_page(sg++, rqst[i].rq_pages[j], len, off); } } - smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE); - return sg; + cifs_sg_set_buf(sg, sig, SMB2_SIGNATURE_SIZE); + + return p; } static int @@ -4314,11 +4327,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, u8 sign[SMB2_SIGNATURE_SIZE] = {}; u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; - char *iv; - unsigned int iv_len; + u8 *iv; DECLARE_CRYPTO_WAIT(wait); struct crypto_aead *tfm; unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); + void *creq; rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key); if (rc) { @@ -4352,32 +4365,15 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, return rc; } - req = aead_request_alloc(tfm, GFP_KERNEL); - if (!req) { - cifs_server_dbg(VFS, "%s: Failed to alloc aead request\n", __func__); + creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg); + if (unlikely(!creq)) return -ENOMEM; - } if (!enc) { memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE); crypt_len += SMB2_SIGNATURE_SIZE; } - sg = init_sg(num_rqst, rqst, sign); - if (!sg) { - cifs_server_dbg(VFS, "%s: Failed to init sg\n", __func__); - rc = -ENOMEM; - goto free_req; - } - - iv_len = crypto_aead_ivsize(tfm); - iv = kzalloc(iv_len, GFP_KERNEL); - if (!iv) { - cifs_server_dbg(VFS, "%s: Failed to alloc iv\n", __func__); - rc = -ENOMEM; - goto free_sg; - } - if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE); @@ -4386,6 +4382,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE); } + aead_request_set_tfm(req, tfm); aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len); @@ -4398,11 +4395,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); - kfree_sensitive(iv); -free_sg: - kfree_sensitive(sg); -free_req: - kfree_sensitive(req); + kfree_sensitive(creq); return rc; } From a9438b44bc7015b18931e312bbd249a25bb59a65 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Dec 2022 12:36:33 +0100 Subject: [PATCH 3770/4122] writeback: Add asserts for adding freed inode to lists In the past we had several use-after-free issues with inodes getting added to writeback lists after evict() removed them. These are painful to debug so add some asserts to catch the problem earlier. The only non-obvious change in the commit is that we need to tweak redirty_tail_locked() to avoid triggering assertion in inode_io_list_move_locked(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221212113633.29181-1-jack@suse.cz Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..6cd172c4cb3e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -121,6 +121,7 @@ static bool inode_io_list_move_locked(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); list_move(&inode->i_io_list, head); @@ -280,6 +281,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; if (wb != &wb->bdi->wb) @@ -1129,6 +1131,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode, { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); @@ -1294,6 +1297,17 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC_QUEUED; + /* + * When the inode is being freed just don't bother with dirty list + * tracking. Flush worker will ignore this inode anyway and it will + * trigger assertions in inode_io_list_move_locked(). + */ + if (inode->i_state & I_FREEING) { + list_del_init(&inode->i_io_list); + wb_io_lists_depopulated(wb); + return; + } if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -1302,7 +1316,6 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); - inode->i_state &= ~I_SYNC_QUEUED; } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) From 23e188a16423a6e65290abf39dd427ff047e6843 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 10 Dec 2022 18:10:42 +0800 Subject: [PATCH 3771/4122] writeback: remove obsolete macro EXPIRE_DIRTY_ATIME EXPIRE_DIRTY_ATIME is not used anymore. Remove it. Signed-off-by: Miaohe Lin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221210101042.2012931-1-linmiaohe@huawei.com Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6cd172c4cb3e..6c113585e782 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1358,8 +1358,6 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) return ret; } -#define EXPIRE_DIRTY_ATIME 0x0001 - /* * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. From d1f0f50fbbbbca1e3e8157e51934613bf88f6d44 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 8 Dec 2022 09:33:41 +0800 Subject: [PATCH 3772/4122] samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe() Add missing pci_disable_device() in fail path of mdpy_fb_probe(). Besides, fix missing release functions in mdpy_fb_remove(). Fixes: cacade1946a4 ("sample: vfio mdev display - guest driver") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221208013341.3999-1-shangxiaojing@huawei.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mdpy-fb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c index 9ec93d90e8a5..4eb7aa11cfbb 100644 --- a/samples/vfio-mdev/mdpy-fb.c +++ b/samples/vfio-mdev/mdpy-fb.c @@ -109,7 +109,7 @@ static int mdpy_fb_probe(struct pci_dev *pdev, ret = pci_request_regions(pdev, "mdpy-fb"); if (ret < 0) - return ret; + goto err_disable_dev; pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format); pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width); @@ -191,6 +191,9 @@ err_release_fb: err_release_regions: pci_release_regions(pdev); +err_disable_dev: + pci_disable_device(pdev); + return ret; } @@ -199,7 +202,10 @@ static void mdpy_fb_remove(struct pci_dev *pdev) struct fb_info *info = pci_get_drvdata(pdev); unregister_framebuffer(info); + iounmap(info->screen_base); framebuffer_release(info); + pci_release_regions(pdev); + pci_disable_device(pdev); } static struct pci_device_id mdpy_fb_pci_table[] = { From fe3dd71db2b81c202bc80532bbe0e07238a45ed9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Dec 2022 19:01:26 +0300 Subject: [PATCH 3773/4122] vfio/mlx5: fix error code in mlx5vf_precopy_ioctl() The copy_to_user() function returns the number of bytes remaining to be copied but we want to return a negative error code here. Fixes: 0dce165b1adf ("vfio/mlx5: Introduce vfio precopy ioctl implementation") Signed-off-by: Dan Carpenter Reviewed-by: Yishai Hadas Link: https://lore.kernel.org/r/Y5IKVknlf5Z5NPtU@kili Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index cd90eb86128c..94f7a0fd10e8 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -404,7 +404,10 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, done: mlx5vf_state_mutex_unlock(mvdev); - return copy_to_user((void __user *)arg, &info, minsz); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + return 0; + err_migf_unlock: mutex_unlock(&migf->lock); err_state_unlock: From 70be6f322860d322ebcd120cf0c05402ead5c6de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Dec 2022 19:02:17 +0300 Subject: [PATCH 3774/4122] vfio/mlx5: error pointer dereference in error handling This code frees the wrong "buf" variable and results in an error pointer dereference. Fixes: 34e2f27143d1 ("vfio/mlx5: Introduce multiple loads") Signed-off-by: Dan Carpenter Reviewed-by: Yishai Hadas Link: https://lore.kernel.org/r/Y5IKia5SaiVxYmG5@kili Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 94f7a0fd10e8..031ac8cc215d 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -826,7 +826,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) spin_lock_init(&migf->list_lock); return migf; out_buf: - mlx5vf_free_data_buffer(buf); + mlx5vf_free_data_buffer(migf->buf); out_pd: mlx5vf_cmd_dealloc_pd(migf); out_free: From e480751970e84bc13ab5c288dbbe16b0638cc088 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 24 Nov 2022 11:37:08 +0800 Subject: [PATCH 3775/4122] f2fs: remove F2FS_SET_FEATURE() and F2FS_CLEAR_FEATURE() macro F2FS_SET_FEATURE() and F2FS_CLEAR_FEATURE() have never been used since they were introduced by this commit 76f105a2dbcd("f2fs: add feature facility in superblock"). So let's remove them. BTW, convert f2fs_sb_has_##name to return bool. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 296683648d4f..cf738f1275b2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -203,10 +203,6 @@ struct f2fs_mount_info { #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) #define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) -#define F2FS_SET_FEATURE(sbi, mask) \ - (sbi->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sbi, mask) \ - (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* * Default values for user and/or group using reserved blocks @@ -4387,7 +4383,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ +static inline bool f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } From ed8ac22b6b75804743f1dae6563d75f85cfd1483 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 24 Nov 2022 10:48:42 +0800 Subject: [PATCH 3776/4122] f2fs: introduce f2fs_is_readonly() for readability Introduce f2fs_is_readonly() and use it to simplify code. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/super.c | 5 ++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cf738f1275b2..eb8c27c4e5fc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4575,6 +4575,11 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, } } +static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a5f6f632cf7c..79bf1faf4161 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1351,8 +1351,7 @@ default_check: return -EINVAL; } - if ((f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb)) && - test_opt(sbi, FLUSH_MERGE)) { + if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } @@ -2083,7 +2082,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; - if (!f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) + if (!f2fs_is_readonly(sbi)) set_opt(sbi, FLUSH_MERGE); if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) set_opt(sbi, DISCARD); From 12607c1ba7637e750402f555b6695c50fce77a2b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:36:43 -0800 Subject: [PATCH 3777/4122] f2fs: specify extent cache for read explicitly Let's descrbie it's read extent cache. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 12 ++++++------ 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 932c070173b9..8cd87aee0292 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -383,7 +383,7 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) if (!i_ext || !i_ext->len) return; - get_extent_info(&ei, i_ext); + get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt)) @@ -710,7 +710,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) unsigned int node_cnt = 0, tree_cnt = 0; int remained; - if (!test_opt(sbi, EXTENT_CACHE)) + if (!test_opt(sbi, READ_EXTENT_CACHE)) return 0; if (!atomic_read(&sbi->total_zombie_tree)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb8c27c4e5fc..1c39f8145b61 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -92,7 +92,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 -#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 +#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -600,7 +600,7 @@ enum { #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ -#define EXTENT_CACHE_SHRINK_NUMBER 128 +#define READ_EXTENT_CACHE_SHRINK_NUMBER 128 #define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS #define RECOVERY_MIN_RA_BLOCKS 1 @@ -830,7 +830,7 @@ struct f2fs_inode_info { loff_t original_i_size; /* original i_size before atomic write */ }; -static inline void get_extent_info(struct extent_info *ext, +static inline void get_read_extent_info(struct extent_info *ext, struct f2fs_extent *i_ext) { ext->fofs = le32_to_cpu(i_ext->fofs); @@ -838,7 +838,7 @@ static inline void get_extent_info(struct extent_info *ext, ext->len = le32_to_cpu(i_ext->len); } -static inline void set_raw_extent(struct extent_info *ext, +static inline void set_raw_read_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); @@ -4407,7 +4407,7 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (!test_opt(sbi, EXTENT_CACHE) || + if (!test_opt(sbi, READ_EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT) || (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(sbi))) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 577f109b4e1d..2c705c60019b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -629,7 +629,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (et) { read_lock(&et->lock); - set_raw_extent(&et->largest, &ri->i_ext); + set_raw_read_extent(&et->largest, &ri->i_ext); read_unlock(&et->lock); } else { memset(&ri->i_ext, 0, sizeof(ri->i_ext)); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b9ee5a1176a0..84b147966080 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -85,7 +85,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == EXTENT_CACHE) { + } else if (type == READ_EXTENT_CACHE) { mem_size = (atomic_read(&sbi->total_ext_tree) * sizeof(struct extent_tree) + atomic_read(&sbi->total_ext_node) * diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3c09cae058b0..0aa48704c77a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -146,7 +146,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ - EXTENT_CACHE, /* indicates extent cache */ + READ_EXTENT_CACHE, /* indicates read extent cache */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9486ca49ecb1..51de358bc452 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -449,8 +449,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) return; /* try to shrink extent cache when there is no enough memory */ - if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) - f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) + f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 79bf1faf4161..412c2e7352c0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -814,10 +814,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, FASTBOOT); break; case Opt_extent_cache: - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: - clear_opt(sbi, EXTENT_CACHE); + clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: clear_opt(sbi, INLINE_DATA); @@ -1954,7 +1954,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",barrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); - if (test_opt(sbi, EXTENT_CACHE)) + if (test_opt(sbi, READ_EXTENT_CACHE)) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); @@ -2076,7 +2076,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, MERGE_CHECKPOINT); @@ -2218,7 +2218,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; bool need_restart_discard = false, need_stop_discard = false; - bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); @@ -2308,7 +2308,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } /* disallow enable/disable extent_cache dynamically */ - if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { + if (no_read_extent_cache == !!test_opt(sbi, READ_EXTENT_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; From 3bac20a8f011b8ed4012b43f4f33010432b3c647 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:44:58 -0800 Subject: [PATCH 3778/4122] f2fs: move internal functions into extent_cache.c No functional change. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 88 +++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 69 +-------------------------------- 2 files changed, 81 insertions(+), 76 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 8cd87aee0292..2a8e31e6d518 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -15,6 +15,77 @@ #include "node.h" #include +static void __set_extent_info(struct extent_info *ei, + unsigned int fofs, unsigned int len, + block_t blk, bool keep_clen) +{ + ei->fofs = fofs; + ei->blk = blk; + ei->len = len; + + if (keep_clen) + return; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + ei->c_len = 0; +#endif +} + +static bool f2fs_may_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + + if (!test_opt(sbi, READ_EXTENT_CACHE) || + is_inode_flag_set(inode, FI_NO_EXTENT) || + (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi))) + return false; + + return S_ISREG(inode->i_mode); +} + +static void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) +{ + if (en->ei.len <= et->largest.len) + return; + + et->largest = en->ei; + et->largest_updated = true; +} + +static bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; +#endif + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); +} + +static bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back) +{ + return __is_extent_mergeable(back, cur); +} + +static bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front) +{ + return __is_extent_mergeable(cur, front); +} + static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, unsigned int ofs) { @@ -591,16 +662,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { if (parts) { - set_extent_info(&ei, end, - end - dei.fofs + dei.blk, - org_end - end); + __set_extent_info(&ei, + end, org_end - end, + end - dei.fofs + dei.blk, false); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { - en->ei.fofs = end; - en->ei.blk += end - dei.fofs; - en->ei.len -= end - dei.fofs; + __set_extent_info(&en->ei, + end, en->ei.len - (end - dei.fofs), + en->ei.blk + (end - dei.fofs), true); next_en = en; } parts++; @@ -632,8 +703,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, /* 3. update extent in extent cache */ if (blkaddr) { - - set_extent_info(&ei, fofs, blkaddr, len); + __set_extent_info(&ei, fofs, len, blkaddr, false); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -692,7 +762,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - set_extent_info(&ei, fofs, blkaddr, llen); + __set_extent_info(&ei, fofs, llen, blkaddr, true); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1c39f8145b61..04fdf010bb77 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -621,7 +621,7 @@ struct rb_entry { struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - u32 blk; /* start block address of the extent */ + block_t blk; /* start block address of the extent */ #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned int c_len; /* physical extent length of compressed blocks */ #endif @@ -846,17 +846,6 @@ static inline void set_raw_read_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } -static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, - u32 blk, unsigned int len) -{ - ei->fofs = fofs; - ei->blk = blk; - ei->len = len; -#ifdef CONFIG_F2FS_FS_COMPRESSION - ei->c_len = 0; -#endif -} - static inline bool __is_discard_mergeable(struct discard_info *back, struct discard_info *front, unsigned int max_len) { @@ -876,41 +865,6 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur, return __is_discard_mergeable(cur, front, max_len); } -static inline bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) -{ -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (back->c_len && back->len != back->c_len) - return false; - if (front->c_len && front->len != front->c_len) - return false; -#endif - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); -} - -static inline bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) -{ - return __is_extent_mergeable(back, cur); -} - -static inline bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) -{ - return __is_extent_mergeable(cur, front); -} - -extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) -{ - if (en->ei.len > et->largest.len) { - et->largest = en->ei; - et->largest_updated = true; - } -} - /* * For free nid management */ @@ -2581,6 +2535,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } +extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { @@ -4403,26 +4358,6 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); -static inline bool f2fs_may_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, READ_EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi))) - return false; - - /* - * for recovered files during mount do not create extents - * if shrinker is not registered. - */ - if (list_empty(&sbi->s_list)) - return false; - - return S_ISREG(inode->i_mode); -} - #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, block_t blkaddr) From 749d543c0d451fff31e8f7a3e0a031ffcbf1ebb1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 10:01:18 -0800 Subject: [PATCH 3779/4122] f2fs: remove unnecessary __init_extent_tree Added into the caller. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2a8e31e6d518..c6810347e205 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -386,21 +386,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) return et; } -static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, struct extent_info *ei) -{ - struct rb_node **p = &et->root.rb_root.rb_node; - struct extent_node *en; - - en = __attach_extent_node(sbi, et, ei, NULL, p, true); - if (!en) - return NULL; - - et->largest = en->ei; - et->cached_en = en; - return en; -} - static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et) { @@ -460,8 +445,12 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) if (atomic_read(&et->node_cnt)) goto out; - en = __init_extent_tree(sbi, et, &ei); + en = __attach_extent_node(sbi, et, &ei, NULL, + &et->root.rb_root.rb_node, true); if (en) { + et->largest = en->ei; + et->cached_en = en; + spin_lock(&sbi->extent_lock); list_add_tail(&en->list, &sbi->extent_list); spin_unlock(&sbi->extent_lock); From e7547daccd6a37522f0af74ec4b5a3036f3dd328 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:26:29 -0800 Subject: [PATCH 3780/4122] f2fs: refactor extent_cache to support for read and more This patch prepares extent_cache to be ready for addition. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 20 +- fs/f2fs/debug.c | 65 +++-- fs/f2fs/extent_cache.c | 465 +++++++++++++++++++++--------------- fs/f2fs/f2fs.h | 119 +++++---- fs/f2fs/file.c | 8 +- fs/f2fs/gc.c | 4 +- fs/f2fs/inode.c | 6 +- fs/f2fs/node.c | 8 +- fs/f2fs/segment.c | 3 +- fs/f2fs/shrinker.c | 19 +- include/trace/events/f2fs.h | 62 +++-- 11 files changed, 471 insertions(+), 308 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 35c19248b1e2..75abd450730b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1126,7 +1126,7 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { dn->data_blkaddr = blkaddr; f2fs_set_data_blkaddr(dn); - f2fs_update_extent_cache(dn); + f2fs_update_read_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ @@ -1195,7 +1195,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) struct extent_info ei = {0, }; struct inode *inode = dn->inode; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn->data_blkaddr = ei.blk + index - ei.fofs; return 0; } @@ -1217,7 +1217,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!page) return ERR_PTR(-ENOMEM); - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { @@ -1485,7 +1485,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; - if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) { if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1695,7 +1695,7 @@ skip: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -1740,7 +1740,7 @@ sync_out: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -2201,7 +2201,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (f2fs_cluster_is_empty(cc)) goto out; - if (f2fs_lookup_extent_cache(inode, start_idx, &ei)) + if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei)) from_dnode = false; if (!from_dnode) @@ -2635,7 +2635,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_extent_cache(inode, page->index, &ei)) { + f2fs_lookup_read_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, @@ -3359,7 +3359,7 @@ restart: } else if (locked) { err = f2fs_get_block(&dn, index); } else { - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ @@ -3400,7 +3400,7 @@ static int __find_data_block(struct inode *inode, pgoff_t index, set_new_dnode(&dn, inode, ipage, ipage, 0); - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a216dcdf6941..a9baa121d829 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -72,15 +72,23 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->main_area_zones = si->main_area_sections / le32_to_cpu(raw_super->secs_per_zone); - /* validation check of the segment numbers */ + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]); + si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]); + si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]); + si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i]; + si->ext_tree[i] = atomic_read(&eti->total_ext_tree); + si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree); + si->ext_node[i] = atomic_read(&eti->total_ext_node); + } + /* read extent_cache only */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); - si->hit_cached = atomic64_read(&sbi->read_hit_cached); - si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); - si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; - si->total_ext = atomic64_read(&sbi->total_hit_ext); - si->ext_tree = atomic_read(&sbi->total_ext_tree); - si->zombie_tree = atomic_read(&sbi->total_zombie_tree); - si->ext_node = atomic_read(&sbi->total_ext_node); + si->hit_total[EX_READ] += si->hit_largest; + + /* validation check of the segment numbers */ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); @@ -294,10 +302,16 @@ get_cache: sizeof(struct nat_entry_set); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); - si->cache_mem += atomic_read(&sbi->total_ext_tree) * + + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->ext_mem[i] = atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree); - si->cache_mem += atomic_read(&sbi->total_ext_node) * + si->ext_mem[i] += atomic_read(&eti->total_ext_node) * sizeof(struct extent_node); + si->cache_mem += si->ext_mem[i]; + } si->page_mem = 0; if (sbi->node_inode) { @@ -490,16 +504,18 @@ static int stat_show(struct seq_file *s, void *v) si->bg_node_blks); seq_printf(s, "BG skip : IO: %u, Other: %u\n", si->io_skip_bggc, si->other_skip_bggc); - seq_puts(s, "\nExtent Cache:\n"); + seq_puts(s, "\nExtent Cache (Read):\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", - si->hit_largest, si->hit_cached, - si->hit_rbtree); + si->hit_largest, si->hit_cached[EX_READ], + si->hit_rbtree[EX_READ]); seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", - !si->total_ext ? 0 : - div64_u64(si->hit_total * 100, si->total_ext), - si->hit_total, si->total_ext); + !si->total_ext[EX_READ] ? 0 : + div64_u64(si->hit_total[EX_READ] * 100, + si->total_ext[EX_READ]), + si->hit_total[EX_READ], si->total_ext[EX_READ]); seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", - si->ext_tree, si->zombie_tree, si->ext_node); + si->ext_tree[EX_READ], si->zombie_tree[EX_READ], + si->ext_node[EX_READ]); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - DIO (R: %4d, W: %4d)\n", si->nr_dio_read, si->nr_dio_write); @@ -566,8 +582,10 @@ static int stat_show(struct seq_file *s, void *v) (si->base_mem + si->cache_mem + si->page_mem) >> 10); seq_printf(s, " - static: %llu KB\n", si->base_mem >> 10); - seq_printf(s, " - cached: %llu KB\n", + seq_printf(s, " - cached all: %llu KB\n", si->cache_mem >> 10); + seq_printf(s, " - read extent cache: %llu KB\n", + si->ext_mem[EX_READ] >> 10); seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } @@ -600,10 +618,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; - atomic64_set(&sbi->total_hit_ext, 0); - atomic64_set(&sbi->read_hit_rbtree, 0); + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + atomic64_set(&sbi->total_hit_ext[i], 0); + atomic64_set(&sbi->read_hit_rbtree[i], 0); + atomic64_set(&sbi->read_hit_cached[i], 0); + } + + /* read extent_cache only */ atomic64_set(&sbi->read_hit_largest, 0); - atomic64_set(&sbi->read_hit_cached, 0); atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index c6810347e205..654a14ab8977 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -17,21 +17,37 @@ static void __set_extent_info(struct extent_info *ei, unsigned int fofs, unsigned int len, - block_t blk, bool keep_clen) + block_t blk, bool keep_clen, + enum extent_type type) { ei->fofs = fofs; - ei->blk = blk; ei->len = len; - if (keep_clen) - return; - + if (type == EX_READ) { + ei->blk = blk; + if (keep_clen) + return; #ifdef CONFIG_F2FS_FS_COMPRESSION - ei->c_len = 0; + ei->c_len = 0; #endif + } } -static bool f2fs_may_extent_tree(struct inode *inode) +static bool __may_read_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi)) + return false; + return S_ISREG(inode->i_mode); +} + +static bool __may_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -42,18 +58,16 @@ static bool f2fs_may_extent_tree(struct inode *inode) if (list_empty(&sbi->s_list)) return false; - if (!test_opt(sbi, READ_EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi))) - return false; - - return S_ISREG(inode->i_mode); + if (type == EX_READ) + return __may_read_extent_tree(inode); + return false; } static void __try_update_largest_extent(struct extent_tree *et, struct extent_node *en) { + if (et->type != EX_READ) + return; if (en->ei.len <= et->largest.len) return; @@ -62,28 +76,31 @@ static void __try_update_largest_extent(struct extent_tree *et, } static bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) + struct extent_info *front, enum extent_type type) { + if (type == EX_READ) { #ifdef CONFIG_F2FS_FS_COMPRESSION - if (back->c_len && back->len != back->c_len) - return false; - if (front->c_len && front->len != front->c_len) - return false; + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; #endif - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); + } + return false; } static bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) + struct extent_info *back, enum extent_type type) { - return __is_extent_mergeable(back, cur); + return __is_extent_mergeable(back, cur, type); } static bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) + struct extent_info *front, enum extent_type type) { - return __is_extent_mergeable(cur, front); + return __is_extent_mergeable(cur, front, type); } static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, @@ -308,6 +325,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct rb_node *parent, struct rb_node **p, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en; en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); @@ -321,16 +339,18 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, rb_link_node(&en->rb_node, parent, p); rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); - atomic_inc(&sbi->total_ext_node); + atomic_inc(&eti->total_ext_node); return en; } static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); - atomic_dec(&sbi->total_ext_node); + atomic_dec(&eti->total_ext_node); if (et->cached_en == en) et->cached_en = NULL; @@ -346,42 +366,47 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, static void __release_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { - spin_lock(&sbi->extent_lock); + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + + spin_lock(&eti->extent_lock); f2fs_bug_on(sbi, list_empty(&en->list)); list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); } -static struct extent_tree *__grab_extent_tree(struct inode *inode) +static struct extent_tree *__grab_extent_tree(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et; nid_t ino = inode->i_ino; - mutex_lock(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, ino); + mutex_lock(&eti->extent_tree_lock); + et = radix_tree_lookup(&eti->extent_tree_root, ino); if (!et) { et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS, true, NULL); - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; + et->type = type; et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); atomic_set(&et->node_cnt, 0); - atomic_inc(&sbi->total_ext_tree); + atomic_inc(&eti->total_ext_tree); } else { - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_zombie_tree); list_del_init(&et->list); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); /* never died until evict_inode */ - F2FS_I(inode)->extent_tree = et; + F2FS_I(inode)->extent_tree[type] = et; return et; } @@ -415,35 +440,38 @@ static void __drop_largest_extent(struct extent_tree *et, } /* return true, if inode page is changed */ -static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!f2fs_may_extent_tree(inode)) { - /* drop largest extent */ - if (i_ext && i_ext->len) { + if (!__may_extent_tree(inode, type)) { + /* drop largest read extent */ + if (type == EX_READ && i_ext && i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); - return; } - return; + goto out; } - et = __grab_extent_tree(inode); + et = __grab_extent_tree(inode, type); if (!i_ext || !i_ext->len) - return; + goto out; + + BUG_ON(type != EX_READ); get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt)) - goto out; + goto unlock_out; en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -451,37 +479,40 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) et->largest = en->ei; et->cached_en = en; - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); - spin_unlock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); + spin_unlock(&eti->extent_lock); } -out: +unlock_out: write_unlock(&et->lock); +out: + if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ]) + set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { - __f2fs_init_extent_tree(inode, ipage); - - if (!F2FS_I(inode)->extent_tree) - set_inode_flag(inode, FI_NO_EXTENT); + /* initialize read cache */ + __f2fs_init_extent_tree(inode, ipage, EX_READ); } -static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en; bool ret = false; f2fs_bug_on(sbi, !et); - trace_f2fs_lookup_extent_tree_start(inode, pgofs); + trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); read_lock(&et->lock); - if (et->largest.fofs <= pgofs && + if (type == EX_READ && + et->largest.fofs <= pgofs && et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; @@ -495,23 +526,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; if (en == et->cached_en) - stat_inc_cached_node_hit(sbi); + stat_inc_cached_node_hit(sbi, type); else - stat_inc_rbtree_node_hit(sbi); + stat_inc_rbtree_node_hit(sbi, type); *ei = en->ei; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); ret = true; out: - stat_inc_total_hit(sbi); + stat_inc_total_hit(sbi, type); read_unlock(&et->lock); - trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); + if (type == EX_READ) + trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); return ret; } @@ -520,18 +552,20 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en = NULL; - if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { + if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { prev_ex->ei.len += ei->len; ei = &prev_ex->ei; en = prev_ex; } - if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { + if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { next_ex->ei.fofs = ei->fofs; - next_ex->ei.blk = ei->blk; next_ex->ei.len += ei->len; + if (et->type == EX_READ) + next_ex->ei.blk = ei->blk; if (en) __release_extent_node(sbi, et, prev_ex); @@ -543,12 +577,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, __try_update_largest_extent(et, en); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } @@ -558,6 +592,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -580,48 +615,51 @@ do_insert: __try_update_largest_extent(et, en); /* update in global extent list */ - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); et->cached_en = en; - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } -static void f2fs_update_extent_tree_range(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int len) +static void __update_extent_tree_range(struct inode *inode, + struct extent_info *tei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en = NULL, *en1 = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei, dei, prev; struct rb_node **insert_p = NULL, *insert_parent = NULL; + unsigned int fofs = tei->fofs, len = tei->len; unsigned int end = fofs + len; - unsigned int pos = (unsigned int)fofs; bool updated = false; bool leftmost = false; if (!et) return; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0); - + if (type == EX_READ) + trace_f2fs_update_read_extent_tree_range(inode, fofs, len, + tei->blk, 0); write_lock(&et->lock); - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + + prev = et->largest; + dei.len = 0; + + /* + * drop largest extent before lookup, in case it's already + * been shrunk from extent tree + */ + __drop_largest_extent(et, fofs, len); } - prev = et->largest; - dei.len = 0; - - /* - * drop largest extent before lookup, in case it's already - * been shrunk from extent tree - */ - __drop_largest_extent(et, fofs, len); - /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, (struct rb_entry *)et->cached_en, fofs, @@ -641,26 +679,30 @@ static void f2fs_update_extent_tree_range(struct inode *inode, dei = en->ei; org_end = dei.fofs + dei.len; - f2fs_bug_on(sbi, pos >= org_end); + f2fs_bug_on(sbi, fofs >= org_end); - if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { - en->ei.len = pos - en->ei.fofs; + if (fofs > dei.fofs && (type != EX_READ || + fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { + en->ei.len = fofs - en->ei.fofs; prev_en = en; parts = 1; } - if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { + if (end < org_end && (type != EX_READ || + org_end - end >= F2FS_MIN_EXTENT_LEN)) { if (parts) { __set_extent_info(&ei, end, org_end - end, - end - dei.fofs + dei.blk, false); + end - dei.fofs + dei.blk, false, + type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { __set_extent_info(&en->ei, end, en->ei.len - (end - dei.fofs), - en->ei.blk + (end - dei.fofs), true); + en->ei.blk + (end - dei.fofs), true, + type); next_en = en; } parts++; @@ -690,9 +732,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode, en = next_en; } - /* 3. update extent in extent cache */ - if (blkaddr) { - __set_extent_info(&ei, fofs, len, blkaddr, false); + /* 3. update extent in read extent cache */ + BUG_ON(type != EX_READ); + + if (tei->blk) { + __set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -722,19 +766,20 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } #ifdef CONFIG_F2FS_FS_COMPRESSION -void f2fs_update_extent_tree_range_compressed(struct inode *inode, +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; struct extent_node *en = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei; struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len); + trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, + blkaddr, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) @@ -751,7 +796,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - __set_extent_info(&ei, fofs, llen, blkaddr, true); + __set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) @@ -762,24 +807,43 @@ unlock_out: } #endif -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { + struct extent_info ei; + + if (!__may_extent_tree(dn->inode, type)) + return; + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + dn->ofs_in_node; + ei.len = 1; + + if (type == EX_READ) { + if (dn->data_blkaddr == NEW_ADDR) + ei.blk = NULL_ADDR; + else + ei.blk = dn->data_blkaddr; + } + __update_extent_tree_range(dn->inode, &ei, type); +} + +static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, + enum extent_type type) +{ + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et, *next; struct extent_node *en; unsigned int node_cnt = 0, tree_cnt = 0; int remained; - if (!test_opt(sbi, READ_EXTENT_CACHE)) - return 0; - - if (!atomic_read(&sbi->total_zombie_tree)) + if (!atomic_read(&eti->total_zombie_tree)) goto free_node; - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; /* 1. remove unreferenced extent tree */ - list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { + list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); node_cnt += __free_extent_tree(sbi, et); @@ -787,61 +851,100 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) } f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); list_del_init(&et->list); - radix_tree_delete(&sbi->extent_tree_root, et->ino); + radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_ext_tree); + atomic_dec(&eti->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; cond_resched(); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); free_node: /* 2. remove LRU extent entries */ - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; remained = nr_shrink - (node_cnt + tree_cnt); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); for (; remained > 0; remained--) { - if (list_empty(&sbi->extent_list)) + if (list_empty(&eti->extent_list)) break; - en = list_first_entry(&sbi->extent_list, + en = list_first_entry(&eti->extent_list, struct extent_node, list); et = en->et; if (!write_trylock(&et->lock)) { /* refresh this extent node's position in extent list */ - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); continue; } list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); write_unlock(&et->lock); node_cnt++; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); unlock_out: - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); out: - trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); return node_cnt + tree_cnt; } -unsigned int f2fs_destroy_extent_node(struct inode *inode) +/* read extent cache operations */ +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_READ)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_READ); +} + +void f2fs_update_read_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_READ); +} + +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + .blk = blkaddr, + }; + + if (!__may_extent_tree(dn->inode, EX_READ)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_READ); +} + +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_READ); +} + +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et || !atomic_read(&et->node_cnt)) @@ -854,31 +957,44 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) return node_cnt; } -void f2fs_drop_extent_tree(struct inode *inode) +void f2fs_destroy_extent_node(struct inode *inode) +{ + __destroy_extent_node(inode, EX_READ); +} + +static void __drop_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; - if (!f2fs_may_extent_tree(inode)) + if (!__may_extent_tree(inode, type)) return; write_lock(&et->lock); - set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); - if (et->largest.len) { - et->largest.len = 0; - updated = true; + if (type == EX_READ) { + set_inode_flag(inode, FI_NO_EXTENT); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } } write_unlock(&et->lock); if (updated) f2fs_mark_inode_dirty_sync(inode, true); } -void f2fs_destroy_extent_tree(struct inode *inode) +void f2fs_drop_extent_tree(struct inode *inode) +{ + __drop_extent_tree(inode, EX_READ); +} + +static void __destroy_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et) @@ -886,76 +1002,49 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && atomic_read(&et->node_cnt)) { - mutex_lock(&sbi->extent_tree_lock); - list_add_tail(&et->list, &sbi->zombie_list); - atomic_inc(&sbi->total_zombie_tree); - mutex_unlock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); + list_add_tail(&et->list, &eti->zombie_list); + atomic_inc(&eti->total_zombie_tree); + mutex_unlock(&eti->extent_tree_lock); return; } /* free all extent info belong to this extent tree */ - node_cnt = f2fs_destroy_extent_node(inode); + node_cnt = __destroy_extent_node(inode, type); /* delete extent tree entry in radix tree */ - mutex_lock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); - radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + radix_tree_delete(&eti->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - mutex_unlock(&sbi->extent_tree_lock); + atomic_dec(&eti->total_ext_tree); + mutex_unlock(&eti->extent_tree_lock); - F2FS_I(inode)->extent_tree = NULL; + F2FS_I(inode)->extent_tree[type] = NULL; - trace_f2fs_destroy_extent_tree(inode, node_cnt); + trace_f2fs_destroy_extent_tree(inode, node_cnt, type); } -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +void f2fs_destroy_extent_tree(struct inode *inode) { - if (!f2fs_may_extent_tree(inode)) - return false; - - return f2fs_lookup_extent_tree(inode, pgofs, ei); + __destroy_extent_tree(inode, EX_READ); } -void f2fs_update_extent_cache(struct dnode_of_data *dn) +static void __init_extent_tree_info(struct extent_tree_info *eti) { - pgoff_t fofs; - block_t blkaddr; - - if (!f2fs_may_extent_tree(dn->inode)) - return; - - if (dn->data_blkaddr == NEW_ADDR) - blkaddr = NULL_ADDR; - else - blkaddr = dn->data_blkaddr; - - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); -} - -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len) - -{ - if (!f2fs_may_extent_tree(dn->inode)) - return; - - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); + INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); + mutex_init(&eti->extent_tree_lock); + INIT_LIST_HEAD(&eti->extent_list); + spin_lock_init(&eti->extent_lock); + atomic_set(&eti->total_ext_tree, 0); + INIT_LIST_HEAD(&eti->zombie_list); + atomic_set(&eti->total_zombie_tree, 0); + atomic_set(&eti->total_ext_node, 0); } void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { - INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); - mutex_init(&sbi->extent_tree_lock); - INIT_LIST_HEAD(&sbi->extent_list); - spin_lock_init(&sbi->extent_lock); - atomic_set(&sbi->total_ext_tree, 0); - INIT_LIST_HEAD(&sbi->zombie_list); - atomic_set(&sbi->total_zombie_tree, 0); - atomic_set(&sbi->total_ext_node, 0); + __init_extent_tree_info(&sbi->extent_tree[EX_READ]); } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 04fdf010bb77..7c68bedee649 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -596,16 +596,22 @@ enum { /* dirty segments threshold for triggering CP */ #define DEFAULT_DIRTY_THRESHOLD 4 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + +#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ #define READ_EXTENT_CACHE_SHRINK_NUMBER 128 -#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS -#define RECOVERY_MIN_RA_BLOCKS 1 - -#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ +/* extent cache type */ +enum extent_type { + EX_READ, + NR_EXTENT_CACHES, +}; struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ @@ -621,10 +627,17 @@ struct rb_entry { struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - block_t blk; /* start block address of the extent */ + union { + /* read extent_cache */ + struct { + /* start block address of the extent */ + block_t blk; #ifdef CONFIG_F2FS_FS_COMPRESSION - unsigned int c_len; /* physical extent length of compressed blocks */ + /* physical extent length of compressed blocks */ + unsigned int c_len; #endif + }; + }; }; struct extent_node { @@ -636,13 +649,25 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ + enum extent_type type; /* keep the extent tree type */ struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ - struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ bool largest_updated; /* largest extent updated */ + struct extent_info largest; /* largest cached extent for EX_READ */ +}; + +struct extent_tree_info { + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct mutex extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ + atomic_t total_zombie_tree; /* extent zombie tree count */ + atomic_t total_ext_node; /* extent info count */ }; /* @@ -805,7 +830,8 @@ struct f2fs_inode_info { struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct task_struct *atomic_write_task; /* store atomic write task */ - struct extent_tree *extent_tree; /* cached extent_tree entry */ + struct extent_tree *extent_tree[NR_EXTENT_CACHES]; + /* cached extent_tree entry */ struct inode *cow_inode; /* copy-on-write inode for atomic write */ /* avoid racing between foreground op and gc */ @@ -1626,14 +1652,7 @@ struct f2fs_sb_info { struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ - struct radix_tree_root extent_tree_root;/* cache extent cache entries */ - struct mutex extent_tree_lock; /* locking extent radix tree */ - struct list_head extent_list; /* lru list for shrinker */ - spinlock_t extent_lock; /* locking extent lru list */ - atomic_t total_ext_tree; /* extent tree count */ - struct list_head zombie_list; /* extent zombie tree list */ - atomic_t total_zombie_tree; /* extent zombie tree count */ - atomic_t total_ext_node; /* extent info count */ + struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -1718,10 +1737,14 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ - atomic64_t total_hit_ext; /* # of lookup extent cache */ - atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */ - atomic64_t read_hit_largest; /* # of hit largest extent node */ - atomic64_t read_hit_cached; /* # of hit cached extent node */ + /* # of lookup extent cache */ + atomic64_t total_hit_ext[NR_EXTENT_CACHES]; + /* # of hit rbtree extent node */ + atomic64_t read_hit_rbtree[NR_EXTENT_CACHES]; + /* # of hit cached extent node */ + atomic64_t read_hit_cached[NR_EXTENT_CACHES]; + /* # of hit largest extent node in read extent cache */ + atomic64_t read_hit_largest; atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ @@ -3823,9 +3846,17 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - unsigned long long hit_largest, hit_cached, hit_rbtree; - unsigned long long hit_total, total_ext; - int ext_tree, zombie_tree, ext_node; + unsigned long long hit_cached[NR_EXTENT_CACHES]; + unsigned long long hit_rbtree[NR_EXTENT_CACHES]; + unsigned long long total_ext[NR_EXTENT_CACHES]; + unsigned long long hit_total[NR_EXTENT_CACHES]; + int ext_tree[NR_EXTENT_CACHES]; + int zombie_tree[NR_EXTENT_CACHES]; + int ext_node[NR_EXTENT_CACHES]; + /* to count memory footprint */ + unsigned long long ext_mem[NR_EXTENT_CACHES]; + /* for read extent cache */ + unsigned long long hit_largest; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; @@ -3884,10 +3915,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) -#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) -#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) +#define stat_inc_total_hit(sbi, type) (atomic64_inc(&(sbi)->total_hit_ext[type])) +#define stat_inc_rbtree_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_rbtree[type])) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) -#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached)) +#define stat_inc_cached_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_cached[type])) #define stat_inc_inline_xattr(inode) \ do { \ if (f2fs_has_inline_xattr(inode)) \ @@ -4010,10 +4041,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sbi) do { } while (0) -#define stat_inc_rbtree_node_hit(sbi) do { } while (0) +#define stat_inc_total_hit(sbi, type) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi, type) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) -#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi, type) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) #define stat_dec_inline_xattr(inode) do { } while (0) #define stat_inc_inline_inode(inode) do { } while (0) @@ -4119,20 +4150,23 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root, bool check_key); -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); void f2fs_drop_extent_tree(struct inode *inode); -unsigned int f2fs_destroy_extent_node(struct inode *inode); +void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei); -void f2fs_update_extent_cache(struct dnode_of_data *dn); -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len); void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); +/* read extent cache ops */ +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_read_extent_cache(struct dnode_of_data *dn); +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len); +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + /* * sysfs.c */ @@ -4202,9 +4236,9 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); -void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len); +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); @@ -4285,9 +4319,10 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) -static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len) { } +static inline void f2fs_update_read_extent_tree_range_compressed( + struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len) { } #endif static inline int set_compress_context(struct inode *inode) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ab0a0d3730f6..cbe7c24065c7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -618,7 +618,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) */ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; - f2fs_update_extent_cache_range(dn, fofs, 0, len); + f2fs_update_read_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); } dn->ofs_in_node = ofs; @@ -1496,7 +1496,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, f2fs_set_data_blkaddr(dn); } - f2fs_update_extent_cache_range(dn, start, 0, index - start); + f2fs_update_read_extent_cache_range(dn, start, 0, index - start); return ret; } @@ -2558,7 +2558,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -2590,7 +2590,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * lookup mapping info in extent cache, skip defragmenting if physical * block addresses are continuous. */ - if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { if (ei.fofs + ei.len >= pg_end) goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d19e26b2e875..f0c6506d8975 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1146,7 +1146,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1164,7 +1164,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (!page) return -ENOMEM; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c705c60019b..086f201f15a0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -262,8 +262,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (fi->extent_tree) { - struct extent_info *ei = &fi->extent_tree->largest; + if (fi->extent_tree[EX_READ]) { + struct extent_info *ei = &fi->extent_tree[EX_READ]->largest; if (ei->len && (!f2fs_is_valid_blkaddr(sbi, ei->blk, @@ -607,7 +607,7 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 84b147966080..07419c3e42a5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -86,9 +86,11 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == READ_EXTENT_CACHE) { - mem_size = (atomic_read(&sbi->total_ext_tree) * + struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + + mem_size = (atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree) + - atomic_read(&sbi->total_ext_node) * + atomic_read(&eti->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == DISCARD_CACHE) { @@ -859,7 +861,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + 1); - f2fs_update_extent_tree_range_compressed(dn->inode, + f2fs_update_read_extent_tree_range_compressed(dn->inode, index, blkaddr, F2FS_I(dn->inode)->i_cluster_size, c_len); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51de358bc452..8722d1a13c17 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -450,7 +450,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) /* try to shrink extent cache when there is no enough memory */ if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) - f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); + f2fs_shrink_read_extent_tree(sbi, + READ_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index dd3c3c7a90ec..33c490e69ae3 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -28,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) return count > 0 ? count : 0; } -static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) +static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi, + enum extent_type type) { - return atomic_read(&sbi->total_zombie_tree) + - atomic_read(&sbi->total_ext_node); + struct extent_tree_info *eti = &sbi->extent_tree[type]; + + return atomic_read(&eti->total_zombie_tree) + + atomic_read(&eti->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, @@ -53,8 +56,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, } spin_unlock(&f2fs_list_lock); - /* count extent cache entries */ - count += __count_extent_cache(sbi); + /* count read extent cache entries */ + count += __count_extent_cache(sbi, EX_READ); /* count clean nat cache entries */ count += __count_nat_entries(sbi); @@ -99,8 +102,8 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, sbi->shrinker_run_no = run_no; - /* shrink extent cache entries */ - freed += f2fs_shrink_extent_tree(sbi, nr >> 1); + /* shrink read extent cache entries */ + freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1); /* shrink clean nat cache entries */ if (freed < nr) @@ -130,7 +133,7 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { - f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); + f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7fbfce498472..2bb37892d2ba 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -48,6 +48,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); TRACE_DEFINE_ENUM(CP_RESIZE); +TRACE_DEFINE_ENUM(EX_READ); #define show_block_type(type) \ __print_symbolic(type, \ @@ -1522,28 +1523,31 @@ TRACE_EVENT(f2fs_issue_flush, TRACE_EVENT(f2fs_lookup_extent_tree_start, - TP_PROTO(struct inode *inode, unsigned int pgofs), + TP_PROTO(struct inode *inode, unsigned int pgofs, enum extent_type type), - TP_ARGS(inode, pgofs), + TP_ARGS(inode, pgofs, type), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(unsigned int, pgofs) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pgofs = pgofs; + __entry->type = type; ), - TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u", + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s", show_dev_ino(__entry), - __entry->pgofs) + __entry->pgofs, + __entry->type == EX_READ ? "Read" : "N/A") ); -TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, +TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, TP_PROTO(struct inode *inode, unsigned int pgofs, struct extent_info *ei), @@ -1557,8 +1561,8 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, __field(ino_t, ino) __field(unsigned int, pgofs) __field(unsigned int, fofs) - __field(u32, blk) __field(unsigned int, len) + __field(u32, blk) ), TP_fast_assign( @@ -1566,26 +1570,26 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, __entry->ino = inode->i_ino; __entry->pgofs = pgofs; __entry->fofs = ei->fofs; - __entry->blk = ei->blk; __entry->len = ei->len; + __entry->blk = ei->blk; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "ext_info(fofs: %u, blk: %u, len: %u)", + "read_ext_info(fofs: %u, len: %u, blk: %u)", show_dev_ino(__entry), __entry->pgofs, __entry->fofs, - __entry->blk, - __entry->len) + __entry->len, + __entry->blk) ); -TRACE_EVENT(f2fs_update_extent_tree_range, +TRACE_EVENT(f2fs_update_read_extent_tree_range, - TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr, - unsigned int len, + TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, + block_t blkaddr, unsigned int c_len), - TP_ARGS(inode, pgofs, blkaddr, len, c_len), + TP_ARGS(inode, pgofs, len, blkaddr, c_len), TP_STRUCT__entry( __field(dev_t, dev) @@ -1600,67 +1604,73 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pgofs = pgofs; - __entry->blk = blkaddr; __entry->len = len; + __entry->blk = blkaddr; __entry->c_len = c_len; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "blkaddr = %u, len = %u, " - "c_len = %u", + "len = %u, blkaddr = %u, c_len = %u", show_dev_ino(__entry), __entry->pgofs, - __entry->blk, __entry->len, + __entry->blk, __entry->c_len) ); TRACE_EVENT(f2fs_shrink_extent_tree, TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, - unsigned int tree_cnt), + unsigned int tree_cnt, enum extent_type type), - TP_ARGS(sbi, node_cnt, tree_cnt), + TP_ARGS(sbi, node_cnt, tree_cnt, type), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, node_cnt) __field(unsigned int, tree_cnt) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = sbi->sb->s_dev; __entry->node_cnt = node_cnt; __entry->tree_cnt = tree_cnt; + __entry->type = type; ), - TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", + TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u, type = %s", show_dev(__entry->dev), __entry->node_cnt, - __entry->tree_cnt) + __entry->tree_cnt, + __entry->type == EX_READ ? "Read" : "N/A") ); TRACE_EVENT(f2fs_destroy_extent_tree, - TP_PROTO(struct inode *inode, unsigned int node_cnt), + TP_PROTO(struct inode *inode, unsigned int node_cnt, + enum extent_type type), - TP_ARGS(inode, node_cnt), + TP_ARGS(inode, node_cnt, type), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(unsigned int, node_cnt) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->node_cnt = node_cnt; + __entry->type = type; ), - TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u", + TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s", show_dev_ino(__entry), - __entry->node_cnt) + __entry->node_cnt, + __entry->type == EX_READ ? "Read" : "N/A") ); DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, From 72840cccc0a1a0a0dc1bb27b669a9111be6d0f6a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Dec 2022 13:51:09 -0800 Subject: [PATCH 3781/4122] f2fs: allocate the extent_cache by default Let's allocate it to remove the runtime complexity. Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 38 +++++++++++++++++++------------------- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/inode.c | 6 ++++-- fs/f2fs/namei.c | 4 ++-- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 654a14ab8977..305f969e3ad1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -47,20 +47,23 @@ static bool __may_read_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } +static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) +{ + if (type == EX_READ) + return __may_read_extent_tree(inode); + return false; +} + static bool __may_extent_tree(struct inode *inode, enum extent_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* * for recovered files during mount do not create extents * if shrinker is not registered. */ - if (list_empty(&sbi->s_list)) + if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; - if (type == EX_READ) - return __may_read_extent_tree(inode); - return false; + return __init_may_extent_tree(inode, type); } static void __try_update_largest_extent(struct extent_tree *et, @@ -439,20 +442,18 @@ static void __drop_largest_extent(struct extent_tree *et, } } -/* return true, if inode page is changed */ -static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, - enum extent_type type) +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree_info *eti = &sbi->extent_tree[type]; - struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; + struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!__may_extent_tree(inode, type)) { + if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (type == EX_READ && i_ext && i_ext->len) { + if (i_ext && i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); @@ -460,13 +461,11 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, goto out; } - et = __grab_extent_tree(inode, type); + et = __grab_extent_tree(inode, EX_READ); if (!i_ext || !i_ext->len) goto out; - BUG_ON(type != EX_READ); - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); @@ -486,14 +485,15 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, unlock_out: write_unlock(&et->lock); out: - if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ]) + if (!F2FS_I(inode)->extent_tree[EX_READ]) set_inode_flag(inode, FI_NO_EXTENT); } -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_extent_tree(struct inode *inode) { /* initialize read cache */ - __f2fs_init_extent_tree(inode, ipage, EX_READ); + if (__init_may_extent_tree(inode, EX_READ)) + __grab_extent_tree(inode, EX_READ); } static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7c68bedee649..ec52e06f8e61 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4150,7 +4150,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root, bool check_key); -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); +void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); @@ -4159,6 +4159,7 @@ int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); /* read extent cache ops */ +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage); bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei); void f2fs_update_read_extent_cache(struct dnode_of_data *dn); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 086f201f15a0..c845c16f97d0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -392,8 +392,6 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - f2fs_init_extent_tree(inode, node_page); - get_inline_info(inode, ri); fi->i_extra_isize = f2fs_has_extra_attr(inode) ? @@ -479,6 +477,10 @@ static int do_read_inode(struct inode *inode) } init_idisk_time(inode); + + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 58a91ce8fe08..46de782c2baa 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -284,8 +284,6 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, } F2FS_I(inode)->i_inline_xattr_size = xattr_size; - f2fs_init_extent_tree(inode, NULL); - F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); @@ -311,6 +309,8 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, f2fs_set_inode_flags(inode); + f2fs_init_extent_tree(inode); + trace_f2fs_new_inode(inode, 0); return inode; From 71644dff481180ba024ac4f5cb1f068756357adf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 1 Dec 2022 17:37:15 -0800 Subject: [PATCH 3782/4122] f2fs: add block_age-based extent cache This patch introduces a runtime hot/cold data separation method for f2fs, in order to improve the accuracy for data temperature classification, reduce the garbage collection overhead after long-term data updates. Enhanced hot/cold data separation can record data block update frequency as "age" of the extent per inode, and take use of the age info to indicate better temperature type for data block allocation: - It records total data blocks allocated since mount; - When file extent has been updated, it calculate the count of data blocks allocated since last update as the age of the extent; - Before the data block allocated, it searches for the age info and chooses the suitable segment for allocation. Test and result: - Prepare: create about 30000 files * 3% for cold files (with cold file extension like .apk, from 3M to 10M) * 50% for warm files (with random file extension like .FcDxq, from 1K to 4M) * 47% for hot files (with hot file extension like .db, from 1K to 256K) - create(5%)/random update(90%)/delete(5%) the files * total write amount is about 70G * fsync will be called for .db files, and buffered write will be used for other files The storage of test device is large enough(128G) so that it will not switch to SSR mode during the test. Benefit: dirty segment count increment reduce about 14% - before: Dirty +21110 - after: Dirty +18286 Signed-off-by: qixiaoyu1 Signed-off-by: xiongping1 Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 14 ++ Documentation/filesystems/f2fs.rst | 4 + fs/f2fs/debug.c | 21 +++ fs/f2fs/extent_cache.c | 183 +++++++++++++++++++++++- fs/f2fs/f2fs.h | 38 +++++ fs/f2fs/file.c | 1 + fs/f2fs/inode.c | 1 + fs/f2fs/node.c | 10 +- fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 33 +++++ fs/f2fs/shrinker.c | 10 +- fs/f2fs/super.c | 14 ++ fs/f2fs/sysfs.c | 24 ++++ include/trace/events/f2fs.h | 86 ++++++++++- 14 files changed, 430 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 84a009aab1a1..9e3756625a81 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -655,3 +655,17 @@ Description: When space utilization exceeds this, do background DISCARD aggressi Does DISCARD forcibly in a period of given min_discard_issue_time when the number of discards is not 0 and set discard granularity to 1. Default: 80 + +What: /sys/fs/f2fs//hot_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as hot. By default it was initialized as 262144 blocks + (equals to 1GB). + +What: /sys/fs/f2fs//warm_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as warm. By default it was initialized as 2621440 blocks + (equals to 10GB). diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 67e1f3e86f32..220f3e0d3f55 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -347,6 +347,10 @@ memory=%s Control memory mode. This supports "normal" and "low" modes. Because of the nature of low memory devices, in this mode, f2fs will try to save memory sometimes by sacrificing performance. "normal" mode is the default mode and same as before. +age_extent_cache Enable an age extent cache based on rb-tree. It records + data block update frequency of the extent per inode, in + order to provide better temperature hints for data block + allocation. ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a9baa121d829..8f1ef742551f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -88,6 +88,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_total[EX_READ] += si->hit_largest; + /* block age extent_cache only */ + si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks); + /* validation check of the segment numbers */ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -516,6 +519,22 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree[EX_READ], si->zombie_tree[EX_READ], si->ext_node[EX_READ]); + seq_puts(s, "\nExtent Cache (Block Age):\n"); + seq_printf(s, " - Allocated Data Blocks: %llu\n", + si->allocated_data_blocks); + seq_printf(s, " - Hit Count: L1:%llu L2:%llu\n", + si->hit_cached[EX_BLOCK_AGE], + si->hit_rbtree[EX_BLOCK_AGE]); + seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", + !si->total_ext[EX_BLOCK_AGE] ? 0 : + div64_u64(si->hit_total[EX_BLOCK_AGE] * 100, + si->total_ext[EX_BLOCK_AGE]), + si->hit_total[EX_BLOCK_AGE], + si->total_ext[EX_BLOCK_AGE]); + seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", + si->ext_tree[EX_BLOCK_AGE], + si->zombie_tree[EX_BLOCK_AGE], + si->ext_node[EX_BLOCK_AGE]); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - DIO (R: %4d, W: %4d)\n", si->nr_dio_read, si->nr_dio_write); @@ -586,6 +605,8 @@ static int stat_show(struct seq_file *s, void *v) si->cache_mem >> 10); seq_printf(s, " - read extent cache: %llu KB\n", si->ext_mem[EX_READ] >> 10); + seq_printf(s, " - block age extent cache: %llu KB\n", + si->ext_mem[EX_BLOCK_AGE] >> 10); seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 305f969e3ad1..1bd38a78ebba 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -6,6 +6,10 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim * Chao Yu + * + * block_age-based extent cache added by: + * Copyright (c) 2022 xiaomi Co., Ltd. + * http://www.xiaomi.com/ */ #include @@ -18,6 +22,7 @@ static void __set_extent_info(struct extent_info *ei, unsigned int fofs, unsigned int len, block_t blk, bool keep_clen, + unsigned long age, unsigned long last_blocks, enum extent_type type) { ei->fofs = fofs; @@ -30,6 +35,9 @@ static void __set_extent_info(struct extent_info *ei, #ifdef CONFIG_F2FS_FS_COMPRESSION ei->c_len = 0; #endif + } else if (type == EX_BLOCK_AGE) { + ei->age = age; + ei->last_blocks = last_blocks; } } @@ -47,10 +55,27 @@ static bool __may_read_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } +static bool __may_age_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return false; + /* don't cache block age info for cold file */ + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) return __may_read_extent_tree(inode); + else if (type == EX_BLOCK_AGE) + return __may_age_extent_tree(inode); return false; } @@ -90,6 +115,11 @@ static bool __is_extent_mergeable(struct extent_info *back, #endif return (back->fofs + back->len == front->fofs && back->blk + back->len == front->blk); + } else if (type == EX_BLOCK_AGE) { + return (back->fofs + back->len == front->fofs && + abs(back->age - front->age) <= SAME_AGE_REGION && + abs(back->last_blocks - front->last_blocks) <= + SAME_AGE_REGION); } return false; } @@ -489,11 +519,22 @@ out: set_inode_flag(inode, FI_NO_EXTENT); } +void f2fs_init_age_extent_tree(struct inode *inode) +{ + if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) + return; + __grab_extent_tree(inode, EX_BLOCK_AGE); +} + void f2fs_init_extent_tree(struct inode *inode) { /* initialize read cache */ if (__init_may_extent_tree(inode, EX_READ)) __grab_extent_tree(inode, EX_READ); + + /* initialize block age cache */ + if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) + __grab_extent_tree(inode, EX_BLOCK_AGE); } static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, @@ -544,6 +585,8 @@ out: if (type == EX_READ) trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); + else if (type == EX_BLOCK_AGE) + trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); return ret; } @@ -642,6 +685,10 @@ static void __update_extent_tree_range(struct inode *inode, if (type == EX_READ) trace_f2fs_update_read_extent_tree_range(inode, fofs, len, tei->blk, 0); + else if (type == EX_BLOCK_AGE) + trace_f2fs_update_age_extent_tree_range(inode, fofs, len, + tei->age, tei->last_blocks); + write_lock(&et->lock); if (type == EX_READ) { @@ -694,6 +741,7 @@ static void __update_extent_tree_range(struct inode *inode, __set_extent_info(&ei, end, org_end - end, end - dei.fofs + dei.blk, false, + dei.age, dei.last_blocks, type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); @@ -702,6 +750,7 @@ static void __update_extent_tree_range(struct inode *inode, __set_extent_info(&en->ei, end, en->ei.len - (end - dei.fofs), en->ei.blk + (end - dei.fofs), true, + dei.age, dei.last_blocks, type); next_en = en; } @@ -732,11 +781,15 @@ static void __update_extent_tree_range(struct inode *inode, en = next_en; } + if (type == EX_BLOCK_AGE) + goto update_age_extent_cache; + /* 3. update extent in read extent cache */ BUG_ON(type != EX_READ); if (tei->blk) { - __set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ); + __set_extent_info(&ei, fofs, len, tei->blk, false, + 0, 0, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -758,7 +811,17 @@ static void __update_extent_tree_range(struct inode *inode, et->largest_updated = false; updated = true; } + goto out_read_extent_cache; +update_age_extent_cache: + if (!tei->last_blocks) + goto out_read_extent_cache; + __set_extent_info(&ei, fofs, len, 0, false, + tei->age, tei->last_blocks, EX_BLOCK_AGE); + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, + insert_p, insert_parent, leftmost); +out_read_extent_cache: write_unlock(&et->lock); if (updated) @@ -796,7 +859,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - __set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ); + __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) @@ -807,6 +870,72 @@ unlock_out: } #endif +static unsigned long long __calculate_block_age(unsigned long long new, + unsigned long long old) +{ + unsigned long long diff; + + diff = (new >= old) ? new - (new - old) : new + (old - new); + + return div_u64(diff * LAST_AGE_WEIGHT, 100); +} + +/* This returns a new age and allocated blocks in ei */ +static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + loff_t f_size = i_size_read(inode); + unsigned long long cur_blocks = + atomic64_read(&sbi->allocated_data_blocks); + + /* + * When I/O is not aligned to a PAGE_SIZE, update will happen to the last + * file block even in seq write. So don't record age for newly last file + * block here. + */ + if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && + ei->blk == NEW_ADDR) + return -EINVAL; + + if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + unsigned long long cur_age; + + if (cur_blocks >= ei->last_blocks) + cur_age = cur_blocks - ei->last_blocks; + else + /* allocated_data_blocks overflow */ + cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + + if (ei->age) + ei->age = __calculate_block_age(cur_age, ei->age); + else + ei->age = cur_age; + ei->last_blocks = cur_blocks; + WARN_ON(ei->age > cur_blocks); + return 0; + } + + f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + + /* the data block was allocated for the first time */ + if (ei->blk == NEW_ADDR) + goto out; + + if (__is_valid_data_blkaddr(ei->blk) && + !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + f2fs_bug_on(sbi, 1); + return -EINVAL; + } +out: + /* + * init block age with zero, this can happen when the block age extent + * was reclaimed due to memory constraint or system reboot + */ + ei->age = 0; + ei->last_blocks = cur_blocks; + return 0; +} + static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { struct extent_info ei; @@ -823,6 +952,10 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ ei.blk = NULL_ADDR; else ei.blk = dn->data_blkaddr; + } else if (type == EX_BLOCK_AGE) { + ei.blk = dn->data_blkaddr; + if (__get_new_block_age(dn->inode, &ei)) + return; } __update_extent_tree_range(dn->inode, &ei, type); } @@ -940,6 +1073,43 @@ unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrin return __shrink_extent_tree(sbi, nr_shrink, EX_READ); } +/* block age extent cache operations */ +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_BLOCK_AGE)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + }; + + if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); +} + +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); +} + static unsigned int __destroy_extent_node(struct inode *inode, enum extent_type type) { @@ -960,6 +1130,7 @@ static unsigned int __destroy_extent_node(struct inode *inode, void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); + __destroy_extent_node(inode, EX_BLOCK_AGE); } static void __drop_extent_tree(struct inode *inode, enum extent_type type) @@ -988,6 +1159,7 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) void f2fs_drop_extent_tree(struct inode *inode) { __drop_extent_tree(inode, EX_READ); + __drop_extent_tree(inode, EX_BLOCK_AGE); } static void __destroy_extent_tree(struct inode *inode, enum extent_type type) @@ -1028,6 +1200,7 @@ static void __destroy_extent_tree(struct inode *inode, enum extent_type type) void f2fs_destroy_extent_tree(struct inode *inode) { __destroy_extent_tree(inode, EX_READ); + __destroy_extent_tree(inode, EX_BLOCK_AGE); } static void __init_extent_tree_info(struct extent_tree_info *eti) @@ -1045,6 +1218,12 @@ static void __init_extent_tree_info(struct extent_tree_info *eti) void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { __init_extent_tree_info(&sbi->extent_tree[EX_READ]); + __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); + + /* initialize for block age extents */ + atomic64_set(&sbi->allocated_data_blocks, 0); + sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; + sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ec52e06f8e61..e8953c3dc81a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -107,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 #define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 +#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -607,9 +608,22 @@ enum { /* number of extent info in extent cache we try to shrink */ #define READ_EXTENT_CACHE_SHRINK_NUMBER 128 +/* number of age extent info in extent cache we try to shrink */ +#define AGE_EXTENT_CACHE_SHRINK_NUMBER 128 +#define LAST_AGE_WEIGHT 30 +#define SAME_AGE_REGION 1024 + +/* + * Define data block with age less than 1GB as hot data + * define data block with age less than 10GB but more than 1GB as warm data + */ +#define DEF_HOT_DATA_AGE_THRESHOLD 262144 +#define DEF_WARM_DATA_AGE_THRESHOLD 2621440 + /* extent cache type */ enum extent_type { EX_READ, + EX_BLOCK_AGE, NR_EXTENT_CACHES, }; @@ -637,6 +651,13 @@ struct extent_info { unsigned int c_len; #endif }; + /* block age extent_cache */ + struct { + /* block age of the extent */ + unsigned long long age; + /* last total blocks allocated */ + unsigned long long last_blocks; + }; }; }; @@ -1653,6 +1674,11 @@ struct f2fs_sb_info { /* for extent tree cache */ struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; + atomic64_t allocated_data_blocks; /* for block age extent_cache */ + + /* The threshold used for hot and warm data seperation*/ + unsigned int hot_data_age_threshold; + unsigned int warm_data_age_threshold; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -3857,6 +3883,8 @@ struct f2fs_stat_info { unsigned long long ext_mem[NR_EXTENT_CACHES]; /* for read extent cache */ unsigned long long hit_largest; + /* for block age extent cache */ + unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; @@ -4168,6 +4196,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); +/* block age extent cache ops */ +void f2fs_init_age_extent_tree(struct inode *inode); +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_age_extent_cache(struct dnode_of_data *dn); +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len); +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + /* * sysfs.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cbe7c24065c7..56c23b5e9d65 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -619,6 +619,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); + f2fs_update_age_extent_cache_range(dn, fofs, nr_free); dec_valid_block_count(sbi, dn->inode, nr_free); } dn->ofs_in_node = ofs; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c845c16f97d0..ff6cf66ed46b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -480,6 +480,7 @@ static int do_read_inode(struct inode *inode) /* Need all the flag bits */ f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); f2fs_put_page(node_page, 1); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 07419c3e42a5..dde4c0458704 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) avail_ram = val.totalram - val.totalhigh; /* - * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively */ if (type == FREE_NIDS) { mem_size = (nm_i->nid_cnt[FREE_NID] * @@ -85,14 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == READ_EXTENT_CACHE) { - struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) { + enum extent_type etype = type == READ_EXTENT_CACHE ? + EX_READ : EX_BLOCK_AGE; + struct extent_tree_info *eti = &sbi->extent_tree[etype]; mem_size = (atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree) + atomic_read(&eti->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; - res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DISCARD_CACHE) { mem_size = (atomic_read(&dcc->discard_cmd_cnt) * sizeof(struct discard_cmd)) >> PAGE_SHIFT; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0aa48704c77a..99454d46a939 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -147,6 +147,7 @@ enum mem_type { DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ READ_EXTENT_CACHE, /* indicates read extent cache */ + AGE_EXTENT_CACHE, /* indicates age extent cache */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8722d1a13c17..dee712f7225f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -453,6 +453,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) f2fs_shrink_read_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); + /* try to shrink age extent cache when there is no enough memory */ + if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE)) + f2fs_shrink_age_extent_tree(sbi, + AGE_EXTENT_CACHE_SHRINK_NUMBER); + /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); @@ -3151,10 +3156,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) } } +static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_info ei; + + if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { + if (!ei.age) + return NO_CHECK_TYPE; + if (ei.age <= sbi->hot_data_age_threshold) + return CURSEG_HOT_DATA; + if (ei.age <= sbi->warm_data_age_threshold) + return CURSEG_WARM_DATA; + return CURSEG_COLD_DATA; + } + return NO_CHECK_TYPE; +} + static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; + int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return CURSEG_COLD_DATA_PINNED; @@ -3169,6 +3192,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; + + type = __get_age_segment_type(inode, fio->page->index); + if (type != NO_CHECK_TYPE) + return type; + if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_cow_file(inode)) @@ -3287,6 +3315,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); + if (IS_DATASEG(type)) + atomic64_inc(&sbi->allocated_data_blocks); + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { @@ -3414,6 +3445,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, struct f2fs_summary sum; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); + if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO) + f2fs_update_age_extent_cache(dn); set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 33c490e69ae3..83d6fb97dcae 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count read extent cache entries */ count += __count_extent_cache(sbi, EX_READ); + /* count block age extent cache entries */ + count += __count_extent_cache(sbi, EX_BLOCK_AGE); + /* count clean nat cache entries */ count += __count_nat_entries(sbi); @@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, sbi->shrinker_run_no = run_no; + /* shrink extent cache entries */ + freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2); + /* shrink read extent cache entries */ - freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1); + freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2); /* shrink clean nat cache entries */ if (freed < nr) @@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); + f2fs_shrink_age_extent_tree(sbi, + __count_extent_cache(sbi, EX_BLOCK_AGE)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 412c2e7352c0..180d8b804d13 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -163,6 +163,7 @@ enum { Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, + Opt_age_extent_cache, Opt_err, }; @@ -241,6 +242,7 @@ static match_table_t f2fs_tokens = { {Opt_nogc_merge, "nogc_merge"}, {Opt_discard_unit, "discard_unit=%s"}, {Opt_memory_mode, "memory=%s"}, + {Opt_age_extent_cache, "age_extent_cache"}, {Opt_err, NULL}, }; @@ -1257,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_age_extent_cache: + set_opt(sbi, AGE_EXTENT_CACHE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1958,6 +1963,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); + if (test_opt(sbi, AGE_EXTENT_CACHE)) + seq_puts(seq, ",age_extent_cache"); if (test_opt(sbi, DATA_FLUSH)) seq_puts(seq, ",data_flush"); @@ -2219,6 +2226,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_flush = false, need_stop_flush = false; bool need_restart_discard = false, need_stop_discard = false; bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); + bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); @@ -2313,6 +2321,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } + /* disallow enable/disable age extent_cache dynamically */ + if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) { + err = -EINVAL; + f2fs_warn(sbi, "switch age_extent_cache option is not allowed"); + goto restore_opts; + } if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { err = -EINVAL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a4745d596310..2ab215110596 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -668,6 +668,24 @@ out: return count; } + if (!strcmp(a->attr.name, "hot_data_age_threshold")) { + if (t == 0 || t >= sbi->warm_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "warm_data_age_threshold")) { + if (t == 0 || t <= sbi->hot_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -923,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); +/* For block age extent cache */ +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold); + #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), @@ -1018,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(peak_atomic_write), ATTR_LIST(committed_atomic_block), ATTR_LIST(revoked_atomic_block), + ATTR_LIST(hot_data_age_threshold), + ATTR_LIST(warm_data_age_threshold), NULL, }; ATTRIBUTE_GROUPS(f2fs); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 2bb37892d2ba..31d994e6b4ca 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); TRACE_DEFINE_ENUM(CP_RESIZE); TRACE_DEFINE_ENUM(EX_READ); +TRACE_DEFINE_ENUM(EX_BLOCK_AGE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -155,6 +156,11 @@ TRACE_DEFINE_ENUM(EX_READ); { COMPRESS_ZSTD, "ZSTD" }, \ { COMPRESS_LZORLE, "LZO-RLE" }) +#define show_extent_type(type) \ + __print_symbolic(type, \ + { EX_READ, "Read" }, \ + { EX_BLOCK_AGE, "Block Age" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1544,7 +1550,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start, TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s", show_dev_ino(__entry), __entry->pgofs, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, @@ -1583,6 +1589,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, __entry->blk) ); +TRACE_EVENT_CONDITION(f2fs_lookup_age_extent_tree_end, + + TP_PROTO(struct inode *inode, unsigned int pgofs, + struct extent_info *ei), + + TP_ARGS(inode, pgofs, ei), + + TP_CONDITION(ei), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, fofs) + __field(unsigned int, len) + __field(unsigned long long, age) + __field(unsigned long long, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->fofs = ei->fofs; + __entry->len = ei->len; + __entry->age = ei->age; + __entry->blocks = ei->last_blocks; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "age_ext_info(fofs: %u, len: %u, age: %llu, blocks: %llu)", + show_dev_ino(__entry), + __entry->pgofs, + __entry->fofs, + __entry->len, + __entry->age, + __entry->blocks) +); + TRACE_EVENT(f2fs_update_read_extent_tree_range, TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, @@ -1618,6 +1663,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range, __entry->c_len) ); +TRACE_EVENT(f2fs_update_age_extent_tree_range, + + TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, + unsigned long long age, + unsigned long long last_blks), + + TP_ARGS(inode, pgofs, len, age, last_blks), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, len) + __field(unsigned long long, age) + __field(unsigned long long, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->len = len; + __entry->age = age; + __entry->blocks = last_blks; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "len = %u, age = %llu, blocks = %llu", + show_dev_ino(__entry), + __entry->pgofs, + __entry->len, + __entry->age, + __entry->blocks) +); + TRACE_EVENT(f2fs_shrink_extent_tree, TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, @@ -1643,7 +1723,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree, show_dev(__entry->dev), __entry->node_cnt, __entry->tree_cnt, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); TRACE_EVENT(f2fs_destroy_extent_tree, @@ -1670,7 +1750,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree, TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s", show_dev_ino(__entry), __entry->node_cnt, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, From db8dcd25ec84120d4e57a7f17a566825cec17ae8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Dec 2022 13:42:17 +0000 Subject: [PATCH 3783/4122] f2fs: Fix spelling mistake in label: free_bio_enrty_cache -> free_bio_entry_cache There is a spelling mistake in a label name. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 180d8b804d13..c02a717cf880 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4723,7 +4723,7 @@ static int __init init_f2fs_fs(void) goto free_iostat; err = f2fs_init_bioset(); if (err) - goto free_bio_enrty_cache; + goto free_bio_entry_cache; err = f2fs_init_compress_mempool(); if (err) goto free_bioset; @@ -4740,7 +4740,7 @@ free_compress_mempool: f2fs_destroy_compress_mempool(); free_bioset: f2fs_destroy_bioset(); -free_bio_enrty_cache: +free_bio_entry_cache: f2fs_destroy_bio_entry_cache(); free_iostat: f2fs_destroy_iostat_processing(); From 15e38ee44d50cad264da80ef75626b9224ddc4a3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 5 Dec 2022 22:56:03 +0800 Subject: [PATCH 3784/4122] f2fs: fix iostat parameter for discard Just like other data we count uses the number of bytes as the basic unit, but discard uses the number of cmds as the statistical unit. In fact the discard command contains the number of blocks, so let's change to the number of bytes as the base unit. Fixes: b0af6d491a6b ("f2fs: add app/fs io stat") Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dee712f7225f..f1845a032885 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1187,7 +1187,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, atomic_inc(&dcc->issued_discard); - f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1); + f2fs_update_iostat(sbi, NULL, FS_DISCARD, len * F2FS_BLKSIZE); lstart += len; start += len; From 25547439f1dcc3def6062bd3e69165cd806a594e Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 2 Dec 2022 12:58:41 +0800 Subject: [PATCH 3785/4122] f2fs: don't call f2fs_issue_discard_timeout() when discard_cmd_cnt is 0 in f2fs_put_super() No need to call f2fs_issue_discard_timeout() in f2fs_put_super, when no discard command requires issue. Since the caller of f2fs_issue_discard_timeout() usually judges the number of discard commands before using it. Let's move this logic to f2fs_issue_discard_timeout(). By the way, use f2fs_realtime_discard_enable to simplify the code. Reported-by: kernel test robot Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++-- fs/f2fs/super.c | 8 ++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f1845a032885..a9099a754dd2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1661,6 +1661,9 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; + if (!atomic_read(&dcc->discard_cmd_cnt)) + return false; + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); @@ -2116,8 +2119,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) * Recovery can cache discard commands, so in error path of * fill_super(), it needs to give a chance to handle them. */ - if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c02a717cf880..1f812b9ce985 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1581,8 +1581,7 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ dropped = f2fs_issue_discard_timeout(sbi); - if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && - !sbi->discard_blks && !dropped) { + if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -2233,7 +2232,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool block_unit_discard = f2fs_block_unit_discard(sbi); - struct discard_cmd_control *dcc; #ifdef CONFIG_QUOTA int i, j; #endif @@ -2420,10 +2418,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_flush; need_stop_discard = true; } else { - dcc = SM_I(sbi)->dcc_info; f2fs_stop_discard_thread(sbi); - if (atomic_read(&dcc->discard_cmd_cnt)) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi); need_restart_discard = true; } } From 7411143f2021530d7641fbb40daaada4ee63f7e6 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 29 Nov 2022 12:15:23 +0800 Subject: [PATCH 3786/4122] f2fs: fix some format WARNING in debug.c and sysfs.c To fix: WARNING: function definition argument 'struct f2fs_attr *' should also have an identifier name + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); WARNING: return sysfs_emit(...) formats should include a terminating newline + return sysfs_emit(buf, "(none)"); WARNING: Prefer 'unsigned int' to bare use of 'unsigned' + unsigned npages = NODE_MAPPING(sbi)->nrpages; WARNING: Missing a blank line after declarations + unsigned npages = COMPRESS_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; WARNING: quoted string split across lines + seq_printf(s, "CP merge (Queued: %4d, Issued: %4d, Total: %4d, " + "Cur time: %4d(ms), Peak time: %4d(ms))\n", Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 45 +++++++++++++++++++++++---------------------- fs/f2fs/sysfs.c | 10 +++++----- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8f1ef742551f..32af4f0c5735 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -318,18 +318,19 @@ get_cache: si->page_mem = 0; if (sbi->node_inode) { - unsigned npages = NODE_MAPPING(sbi)->nrpages; + unsigned long npages = NODE_MAPPING(sbi)->nrpages; si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } if (sbi->meta_inode) { - unsigned npages = META_MAPPING(sbi)->nrpages; + unsigned long npages = META_MAPPING(sbi)->nrpages; si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } #ifdef CONFIG_F2FS_FS_COMPRESSION if (sbi->compress_inode) { - unsigned npages = COMPRESS_MAPPING(sbi)->nrpages; + unsigned long npages = COMPRESS_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } #endif @@ -477,28 +478,28 @@ static int stat_show(struct seq_file *s, void *v) si->meta_count[META_NAT]); seq_printf(s, " - ssa blocks : %u\n", si->meta_count[META_SSA]); - seq_printf(s, "CP merge (Queued: %4d, Issued: %4d, Total: %4d, " - "Cur time: %4d(ms), Peak time: %4d(ms))\n", - si->nr_queued_ckpt, si->nr_issued_ckpt, - si->nr_total_ckpt, si->cur_ckpt_time, - si->peak_ckpt_time); + seq_puts(s, "CP merge:\n"); + seq_printf(s, " - Queued : %4d\n", si->nr_queued_ckpt); + seq_printf(s, " - Issued : %4d\n", si->nr_issued_ckpt); + seq_printf(s, " - Total : %4d\n", si->nr_total_ckpt); + seq_printf(s, " - Cur time : %4d(ms)\n", si->cur_ckpt_time); + seq_printf(s, " - Peak time : %4d(ms)\n", si->peak_ckpt_time); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", si->data_segs, si->bg_data_segs); seq_printf(s, " - node segments : %d (%d)\n", si->node_segs, si->bg_node_segs); - seq_printf(s, " - Reclaimed segs : Normal (%d), Idle CB (%d), " - "Idle Greedy (%d), Idle AT (%d), " - "Urgent High (%d), Urgent Mid (%d), " - "Urgent Low (%d)\n", - si->sbi->gc_reclaimed_segs[GC_NORMAL], - si->sbi->gc_reclaimed_segs[GC_IDLE_CB], - si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY], - si->sbi->gc_reclaimed_segs[GC_IDLE_AT], - si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH], - si->sbi->gc_reclaimed_segs[GC_URGENT_MID], - si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); + seq_puts(s, " - Reclaimed segs :\n"); + seq_printf(s, " - Normal : %d\n", si->sbi->gc_reclaimed_segs[GC_NORMAL]); + seq_printf(s, " - Idle CB : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_CB]); + seq_printf(s, " - Idle Greedy : %d\n", + si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]); + seq_printf(s, " - Idle AT : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_AT]); + seq_printf(s, " - Urgent High : %d\n", + si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH]); + seq_printf(s, " - Urgent Mid : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_MID]); + seq_printf(s, " - Urgent Low : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, si->bg_data_blks + si->bg_node_blks); seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, @@ -540,11 +541,11 @@ static int stat_show(struct seq_file *s, void *v) si->nr_dio_read, si->nr_dio_write); seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); - seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " - "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), ", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, - si->flush_list_empty, + si->flush_list_empty); + seq_printf(s, "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_discarding, si->nr_discarded, si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - atomic IO: %4d (Max. %4d)\n", diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2ab215110596..83a366f3ee80 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -53,9 +53,9 @@ static const char *gc_mode_names[MAX_GC_MODE] = { struct f2fs_attr { struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); + ssize_t (*show)(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf); + ssize_t (*store)(struct f2fs_attr *a, struct f2fs_sb_info *sbi, + const char *buf, size_t len); int struct_type; int offset; int id; @@ -232,13 +232,13 @@ static ssize_t encoding_show(struct f2fs_attr *a, (sb->s_encoding->version >> 8) & 0xff, sb->s_encoding->version & 0xff); #endif - return sysfs_emit(buf, "(none)"); + return sysfs_emit(buf, "(none)\n"); } static ssize_t mounted_time_sec_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sysfs_emit(buf, "%llu", SIT_I(sbi)->mounted_time); + return sysfs_emit(buf, "%llu\n", SIT_I(sbi)->mounted_time); } #ifdef CONFIG_F2FS_STAT_FS From 26a8057a1ada97b528b93fdf3ac4fd03170f1900 Mon Sep 17 00:00:00 2001 From: Yuwei Guan Date: Sun, 11 Dec 2022 21:08:41 +0800 Subject: [PATCH 3787/4122] f2fs: reset wait_ms to default if any of the victims have been selected In non-foreground gc mode, if no victim is selected, the gc process will wait for no_gc_sleep_time before waking up again. In this subsequent time, even though a victim will be selected, the gc process still waits for no_gc_sleep_time before waking up. The configuration of wait_ms is not reasonable. After any of the victims have been selected, we need to reset wait_ms to default sleep time from no_gc_sleep_time. Signed-off-by: Yuwei Guan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f0c6506d8975..d7a9d84ba57c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -141,6 +141,10 @@ do_gc: /* don't bother wait_ms by foreground gc */ if (!foreground) wait_ms = gc_th->no_gc_sleep_time; + } else { + /* reset wait_ms to default sleep time */ + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->min_sleep_time; } if (foreground) From c45bc55a99957b20e4e0333bcd42e12d1833a7f5 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Mon, 12 Dec 2022 14:55:29 -0800 Subject: [PATCH 3788/4122] mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio folio_set_compound_order() checks if the passed in folio is a large folio. A large folio is indicated by the PG_head flag. Call __folio_set_head() before setting the order. Link: https://lkml.kernel.org/r/20221212225529.22493-1-sidhartha.kumar@oracle.com Fixes: d1c6095572d0 ("mm/hugetlb: convert hugetlb prep functions to folios") Signed-off-by: Sidhartha Kumar Reported-by: David Hildenbrand Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8c6fe2286814..7cdbcc22587b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1805,10 +1805,10 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, int nr_pages = 1 << order; struct page *p; - /* we rely on prep_new_hugetlb_folio to set the destructor */ - folio_set_compound_order(folio, order); __folio_clear_reserved(folio); __folio_set_head(folio); + /* we rely on prep_new_hugetlb_folio to set the destructor */ + folio_set_compound_order(folio, order); for (i = 0; i < nr_pages; i++) { p = folio_page(folio, i); From e923f4625ed3ad7656c3f9f086c898798bafbbc5 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 1 Dec 2022 12:37:50 +0100 Subject: [PATCH 3789/4122] riscv: Apply a static assert to riscv_isa_ext_id Add a static assert to ensure a RISCV_ISA_EXT_* enum is never created with a value >= RISCV_ISA_EXT_MAX. We can do this by putting RISCV_ISA_EXT_ID_MAX to more work. Before it was redundant with RISCV_ISA_EXT_MAX and hence only used to document the limit. Now it grows with the enum and is used to check the limit. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221201113750.18021-1-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b22525290073..86328e3acb02 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -59,8 +59,9 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_SSTC, RISCV_ISA_EXT_SVINVAL, - RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, + RISCV_ISA_EXT_ID_MAX }; +static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX); /* * This enum represents the logical ID for each RISC-V ISA extension static From 71fc3621efc38ace9640ee6a0db3300900689592 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 1 Dec 2022 14:51:28 +0100 Subject: [PATCH 3790/4122] riscv: Fix P4D_SHIFT definition for 3-level page table mode RISC-V kernels support 3,4,5-level page tables at runtime by folding upper levels. In case of a 3-level page table, PGDIR is folded into P4D which in turn is folded into PUD: PGDIR_SHIFT value is correctly set to the same value as PUD_SHIFT, but P4D_SHIFT is not, then any use of P4D_SHIFT will access invalid address bits (all set to 1). Fix this by dynamically defining P4D_SHIFT value, like we already do for PGDIR_SHIFT. Fixes: d10efa21a937 ("riscv: mm: Control p4d's folding by pgtable_l5_enabled") Signed-off-by: Alexandre Ghiti Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221201135128.1482189-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-64.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index dc42375c2357..42a042c0e13e 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -25,7 +25,11 @@ extern bool pgtable_l5_enabled; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* p4d is folded into pgd in case of 4-level page table */ -#define P4D_SHIFT 39 +#define P4D_SHIFT_L3 30 +#define P4D_SHIFT_L4 39 +#define P4D_SHIFT_L5 39 +#define P4D_SHIFT (pgtable_l5_enabled ? P4D_SHIFT_L5 : \ + (pgtable_l4_enabled ? P4D_SHIFT_L4 : P4D_SHIFT_L3)) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) From 40306b4d1ba25970dafd53432e8daa5d591ebd99 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 13 Dec 2022 09:40:32 +0800 Subject: [PATCH 3791/4122] exfat: fix overflow in sector and cluster conversion According to the exFAT specification, there are at most 2^32-11 clusters in a volume. so using 'int' is not enough for cluster index, the return value type of exfat_sector_to_cluster() should be 'unsigned int'. Signed-off-by: Yuezhang Mo Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index a1e7feb22079..bc6d21d7c5ad 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -400,7 +400,7 @@ static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, sbi->data_start_sector; } -static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, +static inline unsigned int exfat_sector_to_cluster(struct exfat_sb_info *sbi, sector_t sec) { return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) + From 36955d368dc101be885ad2c71618e3c3a93cd8ee Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 17 Nov 2022 11:31:30 +0800 Subject: [PATCH 3792/4122] exfat: reuse exfat_find_location() to simplify exfat_get_dentry_set() In exfat_get_dentry_set(), part of the code is the same as exfat_find_location(), reuse exfat_find_location() to simplify exfat_get_dentry_set(). Code refinement, no functional changes. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 8121a7e073bc..1dfa67f307f1 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -818,7 +818,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, unsigned int type) { int ret, i, num_bh; - unsigned int off, byte_offset, clu = 0; + unsigned int off; sector_t sec; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_dentry *ep; @@ -831,27 +831,16 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, return -EIO; } - byte_offset = EXFAT_DEN_TO_B(entry); - ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu); + ret = exfat_find_location(sb, p_dir, entry, &sec, &off); if (ret) return ret; memset(es, 0, sizeof(*es)); es->sb = sb; es->modified = false; - - /* byte offset in cluster */ - byte_offset = EXFAT_CLU_OFFSET(byte_offset, sbi); - - /* byte offset in sector */ - off = EXFAT_BLK_OFFSET(byte_offset, sb); es->start_off = off; es->bh = es->__bh; - /* sector offset in cluster */ - sec = EXFAT_B_TO_BLK(byte_offset, sb); - sec += exfat_cluster_to_sector(sbi, clu); - bh = sb_bread(sb, sec); if (!bh) return -EIO; @@ -878,6 +867,8 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es, for (i = 1; i < num_bh; i++) { /* get the next sector */ if (exfat_is_last_sector_in_cluster(sbi, sec)) { + unsigned int clu = exfat_sector_to_cluster(sbi, sec); + if (p_dir->flags == ALLOC_NO_FAT_CHAIN) clu++; else if (exfat_get_next_cluster(sb, &clu)) From ba57ee0944ff0085652cf8df91f9c571883debe6 Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Mon, 12 Dec 2022 15:43:51 +0800 Subject: [PATCH 3793/4122] ipvs: add a 'default' case in do_ip_vs_set_ctl() It is better to return the default switch case with '-EINVAL', in case new commands are added. otherwise, return a uninitialized value of ret. Signed-off-by: Li Qiong Reviewed-by: Simon Horman Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 988222fff9f0..97f6a1c8933a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2590,6 +2590,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) break; case IP_VS_SO_SET_DELDEST: ret = ip_vs_del_dest(svc, &udest); + break; + default: + WARN_ON_ONCE(1); + ret = -EINVAL; + break; } out_unlock: From f9645abe4255bd79e4c63799634c996dd53db321 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Mon, 12 Dec 2022 11:07:05 +0100 Subject: [PATCH 3794/4122] netfilter: conntrack: document sctp timeouts Exposed through sysctl, update documentation to describe sctp states and their default timeouts. Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso --- .../networking/nf_conntrack-sysctl.rst | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 1120d71f28d7..49db1d11d7c4 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -163,6 +163,39 @@ nf_conntrack_timestamp - BOOLEAN Enable connection tracking flow timestamping. +nf_conntrack_sctp_timeout_closed - INTEGER (seconds) + default 10 + +nf_conntrack_sctp_timeout_cookie_wait - INTEGER (seconds) + default 3 + +nf_conntrack_sctp_timeout_cookie_echoed - INTEGER (seconds) + default 3 + +nf_conntrack_sctp_timeout_established - INTEGER (seconds) + default 432000 (5 days) + +nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds) + default 0.3 + +nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds) + default 0.3 + +nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds) + default 3 + +nf_conntrack_sctp_timeout_heartbeat_sent - INTEGER (seconds) + default 30 + + This timeout is used to setup conntrack entry on secondary paths. + Default is set to hb_interval. + +nf_conntrack_sctp_timeout_heartbeat_acked - INTEGER (seconds) + default 210 + + This timeout is used to setup conntrack entry on secondary paths. + Default is set to (hb_interval * path_max_retrans + rto_max) + nf_conntrack_udp_timeout - INTEGER (seconds) default 30 From 2c05bf3aa0741f4f3c72432db7801371dbbcf289 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 13 Dec 2022 12:28:51 +0100 Subject: [PATCH 3795/4122] mnt_idmapping: move ima-only helpers to ima The vfs{g,u}id_{gt,lt}_* helpers are currently not needed outside of ima and we shouldn't incentivize people to use them by placing them into the header. Let's just define them locally in the one file in ima where they are used. Suggested-by: Linus Torvalds Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 20 -------------------- security/integrity/ima/ima_policy.c | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 092c52aa6c2c..0ccca33a7a6d 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -96,26 +96,6 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } -static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid) -{ - return __vfsuid_val(vfsuid) > __kuid_val(kuid); -} - -static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid) -{ - return __vfsgid_val(vfsgid) > __kgid_val(kgid); -} - -static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid) -{ - return __vfsuid_val(vfsuid) < __kuid_val(kuid); -} - -static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid) -{ - return __vfsgid_val(vfsgid) < __kgid_val(kgid); -} - /* * vfs{g,u}ids are created from k{g,u}ids. * We don't allow them to be created from regular {u,g}id. diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 54c475f98ce1..edd95ba02c11 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -71,6 +71,30 @@ struct ima_rule_opt_list { char *items[]; }; +/* + * These comparators are needed nowhere outside of ima so just define them here. + * This pattern should hopefully never be needed outside of ima. + */ +static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) > __kuid_val(kuid); +} + +static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) > __kgid_val(kgid); +} + +static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) < __kuid_val(kuid); +} + +static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) < __kgid_val(kgid); +} + struct ima_rule_entry { struct list_head list; int action; From e4412739472b743e18860ad8d979a7ceb3071652 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Oct 2022 03:18:41 +0900 Subject: [PATCH 3796/4122] Documentation: raise minimum supported version of binutils to 2.25 Binutils 2.23 was released in 2012. Almost 10 years old. We already require GCC 5.1, released in 2015. Bump the binutils version to 2.25, which was released some months before GCC 5.1. With this applied, some subsystems can start to clean up code. Examples: arch/arm/Kconfig.assembler arch/mips/vdso/Kconfig arch/powerpc/Makefile arch/x86/Kconfig.assembler Signed-off-by: Masahiro Yamada Acked-by: Linus Torvalds Reviewed-by: Nick Desaulniers --- Documentation/process/changes.rst | 4 ++-- scripts/min-tool-version.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 9844ca3a71a6..ef540865ad22 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -35,7 +35,7 @@ Rust (optional) 1.62.0 rustc --version bindgen (optional) 0.56.0 bindgen --version GNU make 3.82 make --version bash 4.2 bash --version -binutils 2.23 ld -v +binutils 2.25 ld -v flex 2.5.35 flex --version bison 2.0 bison --version pahole 1.16 pahole --version @@ -119,7 +119,7 @@ Bash 4.2 or newer is needed. Binutils -------- -Binutils 2.23 or newer is needed to build the kernel. +Binutils 2.25 or newer is needed to build the kernel. pkg-config ---------- diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 201bccfbc678..a814f1efb39d 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -14,7 +14,7 @@ fi case "$1" in binutils) - echo 2.23.0 + echo 2.25.0 ;; gcc) echo 5.1.0 From fccb3d3eda8d19b893e1fd18e8c70b78784b2a72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:46:47 +0900 Subject: [PATCH 3797/4122] kbuild: add test-{ge,gt,le,lt} macros GNU Make 4.4 introduced $(intcmp ...), which is useful to compare two integers without forking a new process. Add test-{ge,gt,le,lt} macros, which work more efficiently with GNU Make >= 4.4. For older Make versions, they fall back to the 'test' shell command. The first two parameters to $(intcmp ...) must not be empty. To avoid the syntax error, I appended '0' to them. Fortunately, '00' is treated as '0'. This is needed because CONFIG options may expand to an empty string when the kernel configuration is not included. Signed-off-by: Masahiro Yamada Acked-by: Palmer Dabbelt # RISC-V Reviewed-by: Nathan Chancellor Reviewed-by: Nicolas Schier --- Makefile | 2 +- arch/riscv/Makefile | 2 +- arch/x86/Makefile | 2 +- scripts/Kbuild.include | 16 ++++++++++++++++ scripts/Makefile.compiler | 4 ++-- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 6b047daa46cc..ff36288ae671 100644 --- a/Makefile +++ b/Makefile @@ -994,7 +994,7 @@ KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 # Check for frame size exceeding threshold during prolog/epilog insertion # when using lld < 13.0.0. ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y) KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN) endif endif diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0d13b597cb55..faf2c2177094 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -37,7 +37,7 @@ else endif ifeq ($(CONFIG_LD_IS_LLD),y) -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 415a5d138de4..e72c7a49cd59 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -211,7 +211,7 @@ endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y) KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) endif endif diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index cbe28744637b..3be7c2d75667 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -11,6 +11,22 @@ space := $(empty) $(empty) space_escape := _-_SPACE_-_ pound := \# +### +# Comparison macros. +# Usage: $(call test-lt, $(CONFIG_LLD_VERSION), 150000) +# +# Use $(intcmp ...) if supported. (Make >= 4.4) +# Otherwise, fall back to the 'test' shell command. +ifeq ($(intcmp 1,0,,,y),y) +test-ge = $(intcmp $(strip $1)0, $(strip $2)0,,y,y) +test-gt = $(intcmp $(strip $1)0, $(strip $2)0,,,y) +else +test-ge = $(shell test $(strip $1)0 -ge $(strip $2)0 && echo y) +test-gt = $(shell test $(strip $1)0 -gt $(strip $2)0 && echo y) +endif +test-le = $(call test-ge, $2, $1) +test-lt = $(call test-gt, $2, $1) + ### # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o dot-target = $(dir $@).$(notdir $@) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 20d353dcabfb..3d8adfd34af1 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\ # gcc-min-version # Usage: cflags-$(call gcc-min-version, 70100) += -foo -gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y) +gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1) # clang-min-version # Usage: cflags-$(call clang-min-version, 110000) += -foo -clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y) +clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1) # ld-option # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) From a5db80c65dbf9144de155f8a0f08becc9c307db0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 18:49:18 +0900 Subject: [PATCH 3798/4122] kbuild: do not sort after reading modules.order modules.order lists modules in the deterministic order (that is why "modules order"), and there is no duplication in the list. $(sort ) is pointless. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/Makefile.modfinal | 2 +- scripts/Makefile.modinst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 25bedd83644b..4705d32388f3 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order -modules := $(sort $(shell cat $(MODORDER))) +modules := $(shell cat $(MODORDER)) __modfinal: $(modules) @: diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index a4c987c23750..f4cff42069ad 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -9,7 +9,7 @@ __modinst: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -modules := $(sort $(shell cat $(MODORDER))) +modules := $(shell cat $(MODORDER)) ifeq ($(KBUILD_EXTMOD),) dst := $(MODLIB)/kernel From 6768fa4bcb6c1618248f135d04b9287ba2724ae0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:54:47 +0900 Subject: [PATCH 3799/4122] kbuild: add read-file macro Since GNU Make 4.2, $(file ...) supports the read operater '<', which is useful to read a file without forking a new process. No warning is shown even if the input file is missing. For older Make versions, it falls back to the cat command. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin --- Makefile | 2 +- scripts/Kbuild.include | 14 ++++++++++++++ scripts/Makefile.modfinal | 2 +- scripts/Makefile.modinst | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ff36288ae671..591485152a95 100644 --- a/Makefile +++ b/Makefile @@ -376,7 +376,7 @@ else # !mixed-build include $(srctree)/scripts/Kbuild.include # Read KERNELRELEASE from include/config/kernel.release (if it exists) -KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) +KERNELRELEASE = $(call read-file, include/config/kernel.release) KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 3be7c2d75667..21e76ba0de17 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -10,6 +10,10 @@ empty := space := $(empty) $(empty) space_escape := _-_SPACE_-_ pound := \# +define newline + + +endef ### # Comparison macros. @@ -61,6 +65,16 @@ stringify = $(squote)$(quote)$1$(quote)$(squote) kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) +### +# Read a file, replacing newlines with spaces +# +# Make 4.2 or later can read a file by using its builtin function. +ifneq ($(filter-out 3.% 4.0 4.1, $(MAKE_VERSION)),) +read-file = $(subst $(newline),$(space),$(file < $1)) +else +read-file = $(shell cat $1 2>/dev/null) +endif + ### # Easy method for doing a status message kecho := : diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 4705d32388f3..83f2797e530c 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order -modules := $(shell cat $(MODORDER)) +modules := $(call read-file, $(MODORDER)) __modfinal: $(modules) @: diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index f4cff42069ad..65aac6be78ec 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -9,7 +9,7 @@ __modinst: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -modules := $(shell cat $(MODORDER)) +modules := $(call read-file, $(MODORDER)) ifeq ($(KBUILD_EXTMOD),) dst := $(MODLIB)/kernel From 3122c84409d578a5df8bcb1953547e0b871ac4c2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:54:48 +0900 Subject: [PATCH 3800/4122] kconfig: refactor Makefile to reduce process forks Refactor Makefile and use read-file macro. For Make >= 4.2, it can read out a file by using the built-in function. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/kconfig/.gitignore | 4 +++- scripts/kconfig/Makefile | 45 +++++++++++++++++++----------------- scripts/kconfig/gconf-cfg.sh | 7 ++++-- scripts/kconfig/mconf-cfg.sh | 25 +++++++++++--------- scripts/kconfig/nconf-cfg.sh | 23 ++++++++++-------- scripts/kconfig/qconf-cfg.sh | 10 +++++--- scripts/remove-stale-files | 2 ++ 7 files changed, 68 insertions(+), 48 deletions(-) diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore index 500e7424b3ef..c8a3f9cd52f0 100644 --- a/scripts/kconfig/.gitignore +++ b/scripts/kconfig/.gitignore @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /conf /[gmnq]conf -/[gmnq]conf-cfg +/[gmnq]conf-cflags +/[gmnq]conf-libs +/qconf-bin /qconf-moc.cc diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index b8ef0fb4bbef..0b1d15efaeb0 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -159,11 +159,12 @@ conf-objs := conf.o $(common-objs) hostprogs += nconf nconf-objs := nconf.o nconf.gui.o $(common-objs) -HOSTLDLIBS_nconf = $(shell . $(obj)/nconf-cfg && echo $$libs) -HOSTCFLAGS_nconf.o = $(shell . $(obj)/nconf-cfg && echo $$cflags) -HOSTCFLAGS_nconf.gui.o = $(shell . $(obj)/nconf-cfg && echo $$cflags) +HOSTLDLIBS_nconf = $(call read-file, $(obj)/nconf-libs) +HOSTCFLAGS_nconf.o = $(call read-file, $(obj)/nconf-cflags) +HOSTCFLAGS_nconf.gui.o = $(call read-file, $(obj)/nconf-cflags) -$(obj)/nconf.o $(obj)/nconf.gui.o: $(obj)/nconf-cfg +$(obj)/nconf: | $(obj)/nconf-libs +$(obj)/nconf.o $(obj)/nconf.gui.o: | $(obj)/nconf-cflags # mconf: Used for the menuconfig target based on lxdialog hostprogs += mconf @@ -171,27 +172,28 @@ lxdialog := $(addprefix lxdialog/, \ checklist.o inputbox.o menubox.o textbox.o util.o yesno.o) mconf-objs := mconf.o $(lxdialog) $(common-objs) -HOSTLDLIBS_mconf = $(shell . $(obj)/mconf-cfg && echo $$libs) +HOSTLDLIBS_mconf = $(call read-file, $(obj)/mconf-libs) $(foreach f, mconf.o $(lxdialog), \ - $(eval HOSTCFLAGS_$f = $$(shell . $(obj)/mconf-cfg && echo $$$$cflags))) + $(eval HOSTCFLAGS_$f = $$(call read-file, $(obj)/mconf-cflags))) -$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/mconf-cfg +$(obj)/mconf: | $(obj)/mconf-libs +$(addprefix $(obj)/, mconf.o $(lxdialog)): | $(obj)/mconf-cflags # qconf: Used for the xconfig target based on Qt hostprogs += qconf qconf-cxxobjs := qconf.o qconf-moc.o qconf-objs := images.o $(common-objs) -HOSTLDLIBS_qconf = $(shell . $(obj)/qconf-cfg && echo $$libs) -HOSTCXXFLAGS_qconf.o = $(shell . $(obj)/qconf-cfg && echo $$cflags) -HOSTCXXFLAGS_qconf-moc.o = $(shell . $(obj)/qconf-cfg && echo $$cflags) - -$(obj)/qconf.o: $(obj)/qconf-cfg +HOSTLDLIBS_qconf = $(call read-file, $(obj)/qconf-libs) +HOSTCXXFLAGS_qconf.o = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags) +HOSTCXXFLAGS_qconf-moc.o = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags) +$(obj)/qconf: | $(obj)/qconf-libs +$(obj)/qconf.o $(obj)/qconf-moc.o: | $(obj)/qconf-cflags quiet_cmd_moc = MOC $@ - cmd_moc = $(shell . $(obj)/qconf-cfg && echo $$moc) $< -o $@ + cmd_moc = $(call read-file, $(obj)/qconf-bin)/moc $< -o $@ -$(obj)/qconf-moc.cc: $(src)/qconf.h $(obj)/qconf-cfg FORCE +$(obj)/qconf-moc.cc: $(src)/qconf.h FORCE | $(obj)/qconf-bin $(call if_changed,moc) targets += qconf-moc.cc @@ -200,15 +202,16 @@ targets += qconf-moc.cc hostprogs += gconf gconf-objs := gconf.o images.o $(common-objs) -HOSTLDLIBS_gconf = $(shell . $(obj)/gconf-cfg && echo $$libs) -HOSTCFLAGS_gconf.o = $(shell . $(obj)/gconf-cfg && echo $$cflags) +HOSTLDLIBS_gconf = $(call read-file, $(obj)/gconf-libs) +HOSTCFLAGS_gconf.o = $(call read-file, $(obj)/gconf-cflags) -$(obj)/gconf.o: $(obj)/gconf-cfg +$(obj)/gconf: | $(obj)/gconf-libs +$(obj)/gconf.o: | $(obj)/gconf-cflags # check if necessary packages are available, and configure build flags -filechk_conf_cfg = $(CONFIG_SHELL) $< +cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin) -$(obj)/%conf-cfg: $(src)/%conf-cfg.sh FORCE - $(call filechk,conf_cfg) +$(obj)/%conf-cflags $(obj)/%conf-libs $(obj)/%conf-bin: $(src)/%conf-cfg.sh + $(call cmd,conf_cfg) -clean-files += *conf-cfg +clean-files += *conf-cflags *conf-libs *conf-bin diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh index cbd90c28c05f..040d8f338820 100755 --- a/scripts/kconfig/gconf-cfg.sh +++ b/scripts/kconfig/gconf-cfg.sh @@ -1,6 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then @@ -26,5 +29,5 @@ if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then exit 1 fi -echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" -echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" +${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} +${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index 025b565e0b7c..1e61f50a5905 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -1,19 +1,22 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="ncursesw" PKG2="ncurses" if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} exit 0 fi - if ${HOSTPKG_CONFIG} --exists $PKG2; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + if ${HOSTPKG_CONFIG} --exists ${PKG2}; then + ${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs} exit 0 fi fi @@ -22,22 +25,22 @@ fi # (Even if it is installed, some distributions such as openSUSE cannot # find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" - echo libs=\"-lncursesw\" + echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags} + echo -lncursesw > ${libs} exit 0 fi if [ -f /usr/include/ncurses/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\" - echo libs=\"-lncurses\" + echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags} + echo -lncurses > ${libs} exit 0 fi # As a final fallback before giving up, check if $HOSTCC knows of a default # ncurses installation (e.g. from a vendor-specific sysroot). if echo '#include ' | ${HOSTCC} -E - >/dev/null 2>&1; then - echo cflags=\"-D_GNU_SOURCE\" - echo libs=\"-lncurses\" + echo -D_GNU_SOURCE > ${cflags} + echo -lncurses > ${libs} exit 0 fi diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh index 3a10bac2adb3..f871a2160e36 100755 --- a/scripts/kconfig/nconf-cfg.sh +++ b/scripts/kconfig/nconf-cfg.sh @@ -1,19 +1,22 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="ncursesw menuw panelw" PKG2="ncurses menu panel" if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} exit 0 fi if ${HOSTPKG_CONFIG} --exists $PKG2; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + ${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs} exit 0 fi fi @@ -22,20 +25,20 @@ fi # (Even if it is installed, some distributions such as openSUSE cannot # find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" - echo libs=\"-lncursesw -lmenuw -lpanelw\" + echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags} + echo -lncursesw -lmenuw -lpanelw > ${libs} exit 0 fi if [ -f /usr/include/ncurses/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\" - echo libs=\"-lncurses -lmenu -lpanel\" + echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags} + echo -lncurses -lmenu -lpanel > ${libs} exit 0 fi if [ -f /usr/include/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE\" - echo libs=\"-lncurses -lmenu -lpanel\" + echo -D_GNU_SOURCE > ${cflags} + echo -lncurses -lmenu -lpanel > ${libs} exit 0 fi diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh index ad652cb53947..117f36e568fc 100755 --- a/scripts/kconfig/qconf-cfg.sh +++ b/scripts/kconfig/qconf-cfg.sh @@ -1,6 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 +bin=$3 + PKG="Qt5Core Qt5Gui Qt5Widgets" if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then @@ -11,9 +15,9 @@ if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then fi if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"-std=c++11 -fPIC $(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" - echo moc=\"$(${HOSTPKG_CONFIG} --variable=host_bins Qt5Core)/moc\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} + ${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin} exit 0 fi diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index ccadfa3afb2b..64b14aa5aebf 100755 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -47,3 +47,5 @@ rm -f arch/riscv/purgatory/kexec-purgatory.c rm -f scripts/extract-cert rm -f arch/x86/purgatory/kexec-purgatory.c + +rm -f scripts/kconfig/[gmnq]conf-cfg From 875ef1a57f32fcb91010dc9bc8bd1166956a579e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 12:10:59 +0900 Subject: [PATCH 3801/4122] kbuild: use .NOTINTERMEDIATE for future GNU Make versions In Kbuild, some files are generated by chains of pattern/implicit rules. For example, *.dtb.o files in drivers/of/unittest-data/Makefile are generated by the chain of 3 pattern rules, like this: %.dts -> %.dtb -> %.dtb.S -> %.dtb.o Here, %.dts is the real source, %.dtb.o is the final target. %.dtb and %.dtb.S are called "intermediate files". As GNU Make manual [1] says, intermediate files are treated differently in two ways: (a) The first difference is what happens if the intermediate file does not exist. If an ordinary file 'b' does not exist, and make considers a target that depends on 'b', it invariably creates 'b' and then updates the target from 'b'. But if 'b' is an intermediate file, then make can leave well enough alone: it won't create 'b' unless one of its prerequisites is out of date. This means the target depending on 'b' won't be rebuilt either, unless there is some other reason to update that target: for example the target doesn't exist or a different prerequisite is newer than the target. (b) The second difference is that if make does create 'b' in order to update something else, it deletes 'b' later on after it is no longer needed. Therefore, an intermediate file which did not exist before make also does not exist after make. make reports the deletion to you by printing a 'rm' command showing which file it is deleting. The combination of these is problematic for Kbuild because most of the build rules depend on FORCE and the if_changed* macros really determine if the target should be updated. So, all missing files, whether they are intermediate or not, are always rebuilt. To see the problem, delete ".SECONDARY:" from scripts/Kbuild.include, and repeat this command: $ make allmodconfig drivers/of/unittest-data/ The intermediate files will be deleted, which results in rebuilding intermediate and final objects in the next run of make. In the old days, people suppressed (b) in inconsistent ways. As commit 54a702f70589 ("kbuild: mark $(targets) as .SECONDARY and remove .PRECIOUS markers") noted, you should not use .PRECIOUS because .PRECIOUS has the following behavior (c), which is not likely what you want. (c) If make is killed or interrupted during the execution of their recipes, the target is not deleted. Also, the target is not deleted on error even if .DELETE_ON_ERROR is specified. .SECONDARY is a much better way to disable (b), but a small problem is that .SECONDARY enables (a), which gives a side-effect to $?; prerequisites marked as .SECONDARY do not appear in $?. This is a drawback for Kbuild. I thought it was a bug and opened a bug report. As Paul, the GNU Make maintainer, concluded in [2], this is not a bug. A good news is that, GNU Make 4.4 added the perfect solution, .NOTINTERMEDIATE, which cancels both (a) and (b). For clarificaton, my understanding of .INTERMEDIATE, .SECONDARY, .PRECIOUS and .NOTINTERMEDIATE are as follows: (a) (b) (c) .INTERMEDIATE enable enable disable .SECONDARY enable disable disable .PRECIOUS disable disable enable .NOTINTERMEDIATE disable disable disable However, GNU Make 4.4 has a bug for the global .NOTINTERMEDIATE. [3] It was fixed by commit 6164608900ad ("[SV 63417] Ensure global .NOTINTERMEDIATE disables all intermediates"), and will be available in the next release of GNU Make. The following is the gain for .NOTINTERMEDIATE: [Current Make] $ make allnoconfig vmlinux [ full build ] $ rm include/linux/device.h $ make vmlinux CALL scripts/checksyscalls.sh Make does not notice the removal of . [Future Make] $ make-latest allnoconfig vmlinux [ full build ] $ rm include/linux/device.h $ make-latest vmlinux CC arch/x86/kernel/asm-offsets.s In file included from ./include/linux/writeback.h:13, from ./include/linux/memcontrol.h:22, from ./include/linux/swap.h:9, from ./include/linux/suspend.h:5, from arch/x86/kernel/asm-offsets.c:13: ./include/linux/blk_types.h:11:10: fatal error: linux/device.h: No such file or directory 11 | #include | ^~~~~~~~~~~~~~~~ compilation terminated. make-latest[1]: *** [scripts/Makefile.build:114: arch/x86/kernel/asm-offsets.s] Error 1 make-latest: *** [Makefile:1282: prepare0] Error 2 Make notices the removal of , and rebuilds objects that depended on . There exists a source file that includes , and it raises an error. To see detailed background information, refer to commit 2d3b1b8f0da7 ("kbuild: drop $(wildcard $^) check in if_changed* for faster rebuild"). [1]: https://www.gnu.org/software/make/manual/make.html#Chained-Rules [2]: https://savannah.gnu.org/bugs/?55532 [3]: https://savannah.gnu.org/bugs/?63417 Signed-off-by: Masahiro Yamada --- scripts/Kbuild.include | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 21e76ba0de17..2f7356b2990b 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -185,9 +185,6 @@ endif make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) # Find any prerequisites that are newer than target or that do not exist. -# (This is not true for now; $? should contain any non-existent prerequisites, -# but it does not work as expected when .SECONDARY is present. This seems a bug -# of GNU Make.) # PHONY targets skipped in both cases. newer-prereqs = $(filter-out $(PHONY),$?) @@ -263,4 +260,14 @@ endif .DELETE_ON_ERROR: # do not delete intermediate files automatically +# +# .NOTINTERMEDIATE is more correct, but only available on newer Make versions. +# Make 4.4 introduced .NOTINTERMEDIATE, and it appears in .FEATURES, but the +# global .NOTINTERMEDIATE does not work. We can use it on Make > 4.4. +# Use .SECONDARY for older Make versions, but "newer-prereq" cannot detect +# deleted files. +ifneq ($(and $(filter notintermediate, $(.FEATURES)),$(filter-out 4.4,$(MAKE_VERSION))),) +.NOTINTERMEDIATE: +else .SECONDARY: +endif From c528ef0888b75f673f7d48022de8d31d5b451e8c Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 7 Dec 2022 04:11:12 -0500 Subject: [PATCH 3802/4122] riscv: Fixup compile error with !MMU Current nommu_virt_defconfig can't compile: In file included from arch/riscv/kernel/crash_core.c:3: arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:8:27: error: 'VA_BITS' undeclared (first use in this function) 8 | VMCOREINFO_NUMBER(VA_BITS); | ^~~~~~~ Add MMU dependency for KEXEC_FILE. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Reported-by: Conor Dooley Reported-by: kernel test robot Signed-off-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221207091112.2258674-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 67ef08d33d3a..e1a9fa47f012 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -498,7 +498,7 @@ config KEXEC_FILE select KEXEC_CORE select KEXEC_ELF select HAVE_IMA_KEXEC if IMA - depends on 64BIT + depends on 64BIT && MMU help This is new version of kexec system call. This system call is file based and takes file descriptors as system call argument From 37f0ab1477994a0d0dc3c1e0de030fae07d37965 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 6 Dec 2022 18:08:12 -0800 Subject: [PATCH 3803/4122] Documentation: RISC-V: Fix a typo in patch-acceptance I just stumbled on this when modifying the docs. Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20221207020815.16214-2-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/patch-acceptance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index dfe0ac5624fb..5da6f9b273d6 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -29,7 +29,7 @@ their own custom extensions. These custom extensions aren't required to go through any review or ratification process by the RISC-V Foundation. To avoid the maintenance complexity and potential performance impact of adding kernel code for implementor-specific -RISC-V extensions, we'll only to accept patches for extensions that +RISC-V extensions, we'll only accept patches for extensions that have been officially frozen or ratified by the RISC-V Foundation. (Implementors, may, of course, maintain their own Linux kernel trees containing code for any custom extensions that they wish.) From 936100d4507f2e9f0be4621b0c698180d65e8264 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 6 Dec 2022 18:08:13 -0800 Subject: [PATCH 3804/4122] Documentation: RISC-V: Allow patches for non-standard behavior The patch acceptance policy forbids accepting support for non-standard behavior. This policy was written in order to both steer implementers towards the standards and to avoid coupling the upstream kernel too tightly to vendor-specific features. Those were good goals, but in practice the policy just isn't working: every RISC-V system we have needs vendor-specific behavior in the kernel and we end up taking that support which violates the policy. That's confusing for contributors, which is the main reason we have a written policy in the first place. So let's just start taking code for vendor-defined behavior. Reviewed-by: Conor Dooley Reviewed-by: Anup Patel Signed-off-by: Paul Walmsley Link: https://lore.kernel.org/all/alpine.DEB.2.21.999.2211181027590.4480@utopia.booyaka.com/ [Palmer: merge in Paul's suggestions] Link: https://lore.kernel.org/r/20221207020815.16214-3-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/patch-acceptance.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index 5da6f9b273d6..d9d628505cd8 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -29,7 +29,11 @@ their own custom extensions. These custom extensions aren't required to go through any review or ratification process by the RISC-V Foundation. To avoid the maintenance complexity and potential performance impact of adding kernel code for implementor-specific -RISC-V extensions, we'll only accept patches for extensions that -have been officially frozen or ratified by the RISC-V Foundation. -(Implementors, may, of course, maintain their own Linux kernel trees -containing code for any custom extensions that they wish.) +RISC-V extensions, we'll only consider patches for extensions that either: + +- Have been officially frozen or ratified by the RISC-V Foundation, or +- Have been implemented in hardware that is widely available, per standard + Linux practice. + +(Implementors, may, of course, maintain their own Linux kernel trees containing +code for any custom extensions that they wish.) From 68eabc72023f2c1cdbf7932fde57a7811c65b414 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 6 Dec 2022 18:08:14 -0800 Subject: [PATCH 3805/4122] Documentation: RISC-V: Mention the UEFI Standards The current patch acceptance policy requires that specifications are approved by the RISC-V foundation, but we rely on external specifications as well. This explicitly calls out the UEFI specifications that we're starting to depend on. Reviewed-by: Conor Dooley Reviewed-by: Atish Patra Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20221207020815.16214-4-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/patch-acceptance.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index d9d628505cd8..389a45584386 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -20,9 +20,11 @@ Submit Checklist Addendum ------------------------- We'll only accept patches for new modules or extensions if the specifications for those modules or extensions are listed as being -"Frozen" or "Ratified" by the RISC-V Foundation. (Developers may, of -course, maintain their own Linux kernel trees that contain code for -any draft extensions that they wish.) +unlikely to be incompatibly changed in the future. For +specifications from the RISC-V foundation this means "Frozen" or +"Ratified", for the UEFI forum specifications this means a published +ECR. (Developers may, of course, maintain their own Linux kernel trees +that contain code for any draft extensions that they wish.) Additionally, the RISC-V specification allows implementors to create their own custom extensions. These custom extensions aren't required From a39c636506cb90b9ba25cbb0a78bbcc3725ea227 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 6 Dec 2022 18:08:15 -0800 Subject: [PATCH 3806/4122] Documentation: RISC-V: patch-acceptance: s/implementor/implementer Implementor does appear to be a word, but it's not very common. Suggested-by: Conor Dooley Reviewed-by: Anup Patel Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221207020815.16214-5-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/patch-acceptance.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index 389a45584386..07d5a5623e2a 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -26,7 +26,7 @@ specifications from the RISC-V foundation this means "Frozen" or ECR. (Developers may, of course, maintain their own Linux kernel trees that contain code for any draft extensions that they wish.) -Additionally, the RISC-V specification allows implementors to create +Additionally, the RISC-V specification allows implementers to create their own custom extensions. These custom extensions aren't required to go through any review or ratification process by the RISC-V Foundation. To avoid the maintenance complexity and potential @@ -37,5 +37,5 @@ RISC-V extensions, we'll only consider patches for extensions that either: - Have been implemented in hardware that is widely available, per standard Linux practice. -(Implementors, may, of course, maintain their own Linux kernel trees containing +(Implementers, may, of course, maintain their own Linux kernel trees containing code for any custom extensions that they wish.) From a785736d7e587abbfd59df44e9a815d8d109c28c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 13 Dec 2022 09:56:02 -0500 Subject: [PATCH 3807/4122] tracing: Have trigger filter parsing errors show up in error_log It is annoying that the filter parsing of triggers do not show up in the error_log. Trying to figure out what is incorrect in the input is difficult when it fails for a typo. Have the errors of filter parsing show up in error_log as well. Link: https://lore.kernel.org/linux-trace-kernel/20221213095602.083fa9fd@gandalf.local.home Cc: Masami Hiramatsu Cc: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d74932..19ce9d22bfd7 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1067,7 +1067,14 @@ int set_trigger_filter(char *filter_str, /* The filter is for the 'trigger' event, not the triggered event */ ret = create_event_filter(file->tr, file->event_call, - filter_str, false, &filter); + filter_str, true, &filter); + + /* Only enabled set_str for error handling */ + if (filter) { + kfree(filter->filter_string); + filter->filter_string = NULL; + } + /* * If create_event_filter() fails, filter still needs to be freed. * Which the calling code will do with data->filter. From fab89a09c86f948adfc7e20a7d608bd9f323bbe1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 12 Dec 2022 19:38:14 -0500 Subject: [PATCH 3808/4122] tracing: Remove pointer (asterisk) and brackets from cpumask_t field To differentiate between long arrays and cpumasks, the __cpumask() field was created. Part of the TRACE_EVENT() macros test if the type is signed or not by using the is_signed_type() macro. The __cpumask() field used the __dynamic_array() helper but because cpumask_t is a structure, it could not be used in the is_signed_type() macro as that would fail to build, so instead it passed in the pointer to cpumask_t. Unfortunately, that creates in the format file: field:__data_loc cpumask_t *[] mask; offset:36; size:4; signed:0; Which looks like an array of pointers to cpumask_t and not a cpumask_t type, which is misleading to user space parsers. Douglas Raillard pointed out that the "[]" are also misleading, as cpumask_t is not an array. Since cpumask() hasn't been created yet, and the parsers currently fail on it (but will still produce the raw output), make it be: field:__data_loc cpumask_t mask; offset:36; size:4; signed:0; Which is the correct type of the field. Then the parsers can be updated to handle this. Link: https://lore.kernel.org/lkml/6dda5e1d-9416-b55e-88f3-31d148bc925f@arm.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221212193814.0e3f1e43@gandalf.local.home Cc: Masami Hiramatsu Cc: Valentin Schneider Cc: Andrew Morton Fixes: 8230f27b1ccc ("tracing: Add __cpumask to denote a trace event field that is a cpumask_t") Reported-by: Douglas Raillard Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage4_event_fields.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index f2990d22313c..affd541fd25e 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -47,7 +47,10 @@ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) #undef __cpumask -#define __cpumask(item) __dynamic_array(cpumask_t *, item, -1) +#define __cpumask(item) { \ + .type = "__data_loc cpumask_t", .name = #item, \ + .size = 4, .align = 4, \ + .is_signed = 0, .filter_type = FILTER_OTHER }, #undef __sockaddr #define __sockaddr(field, len) __dynamic_array(u8, field, len) @@ -68,7 +71,10 @@ #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) #undef __rel_cpumask -#define __rel_cpumask(item) __rel_dynamic_array(cpumask_t *, item, -1) +#define __rel_cpumask(item) { \ + .type = "__rel_loc cpumask_t", .name = #item, \ + .size = 4, .align = 4, \ + .is_signed = 0, .filter_type = FILTER_OTHER }, #undef __rel_sockaddr #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) From 508f28c67171e276356650f407dd87d42b6913ef Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 3809/4122] LoongArch: Consolidate __ex_table construction Consolidate all the __ex_table constuction code with a _ASM_EXTABLE or _asm_extable helper. There should be no functional change as a result of this patch. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 13 ++++----- arch/loongarch/include/asm/uaccess.h | 9 ++---- arch/loongarch/kernel/fpu.S | 5 ++-- arch/loongarch/lib/clear_user.S | 5 ++-- arch/loongarch/lib/copy_user.S | 5 ++-- 6 files changed, 49 insertions(+), 23 deletions(-) create mode 100644 arch/loongarch/include/asm/asm-extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h new file mode 100644 index 000000000000..4f615bf56727 --- /dev/null +++ b/arch/loongarch/include/asm/asm-extable.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + .pushsection __ex_table, "a"; \ + .balign 8; \ + .quad (insn); \ + .quad (fixup); \ + .popsection; + + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup) + .endm + +#else /* __ASSEMBLY__ */ + +#include +#include + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + ".pushsection __ex_table, \"a\"\n" \ + ".balign 8\n" \ + ".quad ((" insn "))\n" \ + ".quad ((" fixup "))\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index feb6658c84ff..bdcd1c613299 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,10 +23,8 @@ "4: li.w %0, %6 \n" \ " b 3b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 4b \n" \ - " "__UA_ADDR "\t2b, 4b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ @@ -90,10 +89,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "4: li.d %0, %6 \n" " b 3b \n" " .previous \n" - " .section __ex_table,\"a\" \n" - " "__UA_ADDR "\t1b, 4b \n" - " "__UA_ADDR "\t2b, 4b \n" - " .previous \n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index a8ae2af4025a..bf9a4e218ac0 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -165,9 +166,7 @@ do { \ " move %1, $zero \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ : "m" (__m(ptr)), "i" (-EFAULT)); \ \ @@ -196,9 +195,7 @@ do { \ "3: li.w %0, %3 \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " " __UA_ADDR " 1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ : "Jr" (__pu_val), "i" (-EFAULT)); \ } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 576b3370a296..ccde94140c89 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -21,9 +22,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro sc_save_fp base diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 16ba2b8dd68a..7a066d6a41b8 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 97d20327a69e..f8ace04586c2 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* From 3d36f4298ba91fbdec6bc56aa7bb0663cba6ab0c Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 3810/4122] LoongArch: Switch to relative exception tables Similar to other architectures such as arm64, x86, riscv and so on, use offsets relative to the exception table entry values rather than their absolute addresses for both the exception location and the fixup. However, LoongArch label difference because it will actually produce two relocations, a pair of R_LARCH_ADD32 and R_LARCH_SUB32. Take simple code below for example: $ cat test_ex_table.S .section .text 1: nop .section __ex_table,"a" .balign 4 .long (1b - .) .previous $ loongarch64-unknown-linux-gnu-gcc -c test_ex_table.S $ loongarch64-unknown-linux-gnu-readelf -Wr test_ex_table.o Relocation section '.rela__ex_table' at offset 0x100 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000600000032 R_LARCH_ADD32 0000000000000000 .L1^B1 + 0 0000000000000000 0000000500000037 R_LARCH_SUB32 0000000000000000 L0^A + 0 The modpost will complain the R_LARCH_SUB32 relocation, so we need to patch modpost.c to skip this relocation for .rela__ex_table section. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 12 ++++---- arch/loongarch/include/asm/extable.h | 26 ++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 2 +- arch/loongarch/mm/extable.c | 38 ++++++++++++++++-------- scripts/mod/modpost.c | 13 ++++++++ scripts/sorttable.c | 2 +- 6 files changed, 72 insertions(+), 21 deletions(-) create mode 100644 arch/loongarch/include/asm/extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 4f615bf56727..74f8bc75472a 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -6,9 +6,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ .pushsection __ex_table, "a"; \ - .balign 8; \ - .quad (insn); \ - .quad (fixup); \ + .balign 4; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ .popsection; .macro _asm_extable, insn, fixup @@ -22,9 +22,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ ".pushsection __ex_table, \"a\"\n" \ - ".balign 8\n" \ - ".quad ((" insn "))\n" \ - ".quad ((" fixup "))\n" \ + ".balign 4\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h new file mode 100644 index 000000000000..b571c89705d1 --- /dev/null +++ b/arch/loongarch/include/asm/extable.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_EXTABLE_H +#define _ASM_LOONGARCH_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +bool fixup_exception(struct pt_regs *regs); + +#endif diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index bf9a4e218ac0..e33282e0bdef 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,8 +15,8 @@ #include #include #include +#include #include -#include #include extern u64 __ua_limit; diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index bc20988f2b87..08a9a7d6357a 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -3,20 +3,32 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include -#include -#include #include +#include +#include -int fixup_exception(struct pt_regs *regs) +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) { - const struct exception_table_entry *fixup; - - fixup = search_exception_tables(exception_era(regs)); - if (fixup) { - regs->csr_era = fixup->fixup; - - return 1; - } - - return 0; + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->csr_era = get_ex_fixup(ex); + + return true; +} + + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + ex = search_exception_tables(exception_era(regs)); + if (!ex) + return false; + + return ex_handler_fixup(ex, regs); } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2c80da0220c3..9321c0a05ffd 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1523,6 +1523,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) #define R_RISCV_SUB32 39 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + +#ifndef R_LARCH_SUB32 +#define R_LARCH_SUB32 55 +#endif + static void section_rela(const char *modname, struct elf_info *elf, Elf_Shdr *sechdr) { @@ -1564,6 +1572,11 @@ static void section_rela(const char *modname, struct elf_info *elf, ELF_R_TYPE(r.r_info) == R_RISCV_SUB32) continue; break; + case EM_LOONGARCH: + if (!strcmp("__ex_table", fromsec) && + ELF_R_TYPE(r.r_info) == R_LARCH_SUB32) + continue; + break; } sym = elf->symtab_start + r_sym; /* Skip special sections */ diff --git a/scripts/sorttable.c b/scripts/sorttable.c index fba40e99f354..0f2beda80478 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -312,12 +312,12 @@ static int do_file(char const *const fname, void *addr) case EM_PARISC: case EM_PPC: case EM_PPC64: + case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: case EM_ARCV2: case EM_ARM: - case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: From 26bc82441250f2e01621f5b26606a4f6926ee3ad Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3811/4122] LoongArch: extable: Add `type` and `data` fields This is a LoongArch port of commit d6e2cc564775 ("arm64: extable: add `type` and `data` fields"). Subsequent patches will add specialized handlers for fixups, in addition to the simple PC fixup we have today. In preparation, this patch adds a new `type` field to struct exception_table_entry, and uses this to distinguish the fixup and other cases. A `data` field is also added so that subsequent patches can associate data specific to each exception site (e.g. register numbers). Handlers are named ex_handler_*() for consistency, following the example of x86. At the same time, get_ex_fixup() is split out into a helper so that it can be used by other ex_handler_*() functions in the subsequent patches. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 15 +++++++++++---- arch/loongarch/include/asm/extable.h | 11 +++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 +-- arch/loongarch/mm/extable.c | 7 ++++++- scripts/sorttable.c | 2 +- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 74f8bc75472a..634bd770e3c4 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -2,17 +2,22 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 + #ifdef __ASSEMBLY__ -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ .balign 4; \ .long ((insn) - .); \ .long ((fixup) - .); \ + .short (type); \ + .short (data); \ .popsection; .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup) + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm #else /* __ASSEMBLY__ */ @@ -20,15 +25,17 @@ #include #include -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".balign 4\n" \ ".long ((" insn ") - .)\n" \ ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup) + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index b571c89705d1..92612b4364a1 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -17,10 +17,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index b3309a5e695b..efecda0c2361 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -4,6 +4,7 @@ #include #define PAGE_SIZE _PAGE_SIZE +#define RO_EXCEPTION_TABLE_ALIGN 4 /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -53,8 +54,6 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; - EXCEPTION_TABLE(16) - .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 08a9a7d6357a..fd2395221cff 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -30,5 +30,10 @@ bool fixup_exception(struct pt_regs *regs) if (!ex) return false; - return ex_handler_fixup(ex, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + } + + BUG(); } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 0f2beda80478..83cdb843d92f 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -304,6 +304,7 @@ static int do_file(char const *const fname, void *addr) switch (r2(&ehdr->e_machine)) { case EM_386: case EM_AARCH64: + case EM_LOONGARCH: case EM_RISCV: case EM_S390: case EM_X86_64: @@ -312,7 +313,6 @@ static int do_file(char const *const fname, void *addr) case EM_PARISC: case EM_PPC: case EM_PPC64: - case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: From 672999cfae3e830a64c4996362a26934fd555ff9 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3812/4122] LoongArch: extable: Add a dedicated uaccess handler Inspired by commit 2e77a62cb3a6("arm64: extable: add a dedicated uaccess handler"), do similar to LoongArch to add a dedicated uaccess exception handler to update registers in exception context and subsequently return back into the function which faulted, so we remove the need for fixups specialized to each faulting instruction. Add gpr-num.h here because we need to map the same GPR names to integer constants, so that we can use this to build meta-data for the exception fixups. The compiler treats gpr 0 as zero rather than $r0, so set it separately to .L__gpr_num_zero, otherwise the following assembly error will occurs: {standard input}: Assembler messages: {standard input}:1074: Error: invalid operands (*UND* and *ABS* sections) for `<<' {standard input}:1160: Error: invalid operands (*UND* and *ABS* sections) for `<<' make[1]: *** [scripts/Makefile.build:249: fs/fcntl.o] Error 1 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 22 ++++++---------------- arch/loongarch/include/asm/gpr-num.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 17 ++++------------- arch/loongarch/mm/extable.c | 22 ++++++++++++++++++++++ 5 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 arch/loongarch/include/asm/gpr-num.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 634bd770e3c4..f5502cb50c6e 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -4,6 +4,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 #ifdef __ASSEMBLY__ @@ -24,6 +25,7 @@ #include #include +#include #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ @@ -37,6 +39,26 @@ #define _ASM_EXTABLE(insn, fixup) \ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index bdcd1c613299..042ca4448e4d 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -19,16 +19,11 @@ "2: sc.w $t0, %2 \n" \ " beqz $t0, 1b \n" \ "3: \n" \ - " .section .fixup,\"ax\" \n" \ - "4: li.w %0, %6 \n" \ - " b 3b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ - : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ - "i" (-EFAULT) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg) \ : "memory", "t0"); \ } @@ -85,15 +80,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv " beqz $t0, 1b \n" "3: \n" __WEAK_LLSC_MB - " .section .fixup,\"ax\" \n" - "4: li.d %0, %6 \n" - " b 3b \n" - " .previous \n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) - : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), - "i" (-EFAULT) + : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval) : "memory", "t0"); *uval = val; diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h new file mode 100644 index 000000000000..e0941af20c7e --- /dev/null +++ b/arch/loongarch/include/asm/gpr-num.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .equ .L__gpr_num_zero, 0 + .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_$r\num, \num + .endr + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .equ .L__gpr_num_zero, 0\n" \ +" .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_$r\\num, \\num\n" \ +" .endr\n" \ + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index e33282e0bdef..255899d4a7c3 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -161,14 +161,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %1, %2 \n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " move %1, $zero \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ - : "m" (__m(ptr)), "i" (-EFAULT)); \ + : "m" (__m(ptr))); \ \ (val) = (__typeof__(*(ptr))) __gu_tmp; \ } @@ -191,13 +186,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %z2, %1 # __put_user_asm\n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ - : "Jr" (__pu_val), "i" (-EFAULT)); \ + : "Jr" (__pu_val)); \ } #define __get_kernel_nofault(dst, src, type, err_label) \ diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index fd2395221cff..9b0cfd898940 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -13,6 +14,13 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } +static inline void regs_set_gpr(struct pt_regs *regs, + unsigned int offset, unsigned long val) +{ + if (offset && offset <= MAX_REG_OFFSET) + *(unsigned long *)((unsigned long)regs + offset) = val; +} + static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -21,6 +29,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); + regs->csr_era = get_ex_fixup(ex); + + return true; +} bool fixup_exception(struct pt_regs *regs) { @@ -33,6 +53,8 @@ bool fixup_exception(struct pt_regs *regs) switch (ex->type) { case EX_TYPE_FIXUP: return ex_handler_fixup(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); } BUG(); From 912bcfaf36771a2bf7a83799ce5454850d1c3f40 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3813/4122] LoongArch: Remove the .fixup section usage Use the `.L_xxx` label to improve fixup code and then remove the .fixup section usage. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 14 +++++--------- arch/loongarch/lib/copy_user.S | 16 ++++++---------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 7a066d6a41b8..d5c9e44ac8c4 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a1, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a1, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __clear_user(void *addr, size_t size) @@ -36,7 +32,7 @@ SYM_FUNC_START(__clear_user) 2: move a0, a1 jr ra - fixup_ex 1, 3, 0, 1 + _asm_extable 1b, .L_fixup_handle_0 SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index f8ace04586c2..61933d964da0 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a2, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a2, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __copy_user(void *to, const void *from, size_t n) @@ -39,8 +35,8 @@ SYM_FUNC_START(__copy_user) 3: move a0, a2 jr ra - fixup_ex 1, 4, 0, 1 - fixup_ex 2, 4, 0, 0 + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_0 SYM_FUNC_END(__copy_user) EXPORT_SYMBOL(__copy_user) From dbcd7f5fafea64dbe588c4ec18bc309fde5d1e1c Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3814/4122] LoongArch: BPF: Add BPF exception tables Inspired by commit 800834285361("bpf, arm64: Add BPF exception tables"), do similar to LoongArch to add BPF exception tables. When a tracing BPF program attempts to read memory without using the bpf_probe_read() helper, the verifier marks the load instruction with the BPF_PROBE_MEM flag. Since the LoongArch JIT does not currently recognize this flag it falls back to the interpreter. Add support for BPF_PROBE_MEM, by appending an exception table to the BPF program. If the load instruction causes a data abort, the fixup infrastructure finds the exception table and fixes up the fault, by clearing the destination register and jumping over the faulting instruction. To keep the compact exception table entry format, inspect the pc in fixup_exception(). A more generic solution would add a "handler" field to the table entry, like on x86, s390 and arm64, etc. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 1 + arch/loongarch/include/asm/extable.h | 10 +++ arch/loongarch/mm/extable.c | 2 + arch/loongarch/net/bpf_jit.c | 86 ++++++++++++++++++++++-- arch/loongarch/net/bpf_jit.h | 2 + 5 files changed, 96 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index f5502cb50c6e..df05005f2b80 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -5,6 +5,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_UACCESS_ERR_ZERO 2 +#define EX_TYPE_BPF 3 #ifdef __ASSEMBLY__ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index 92612b4364a1..5abf29f1bc91 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -32,6 +32,16 @@ do { \ (b)->data = (tmp).data; \ } while (0) +#ifdef CONFIG_BPF_JIT +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); +#else +static inline +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + return false; +} +#endif /* !CONFIG_BPF_JIT */ + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 9b0cfd898940..9ab69872dcff 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -55,6 +55,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); } BUG(); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index bdcd0c7719a9..c4b1947ebf76 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -387,6 +387,65 @@ static bool is_signed_bpf_cond(u8 cond) cond == BPF_JSGE || cond == BPF_JSLE; } +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) + +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + + regs->regs[dst_reg] = 0; + regs->csr_era = (unsigned long)&ex->fixup - offset; + + return true; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + unsigned long pc; + off_t offset; + struct exception_table_entry *ex; + + if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->num_exentries]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + + ex->insn = offset; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + */ + offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + return -ERANGE; + + ex->type = EX_TYPE_BPF; + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ctx->num_exentries++; + + return 0; +} + static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) { u8 tm = -1; @@ -816,6 +875,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { @@ -854,6 +917,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* *(size *)(dst + off) = imm */ @@ -1018,6 +1085,9 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) + return -1; + return 0; } @@ -1025,7 +1095,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; u8 *image_ptr; - int image_size; + int image_size, prog_size, extable_size; struct jit_ctx ctx; struct jit_data *jit_data; struct bpf_binary_header *header; @@ -1066,7 +1136,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } @@ -1088,12 +1158,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); + /* Now we know the actual image size. * As each LoongArch instruction is of length 32bit, * we are translating number of JITed intructions into * the size required to store these JITed code. */ - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; + image_size = prog_size + extable_size; /* Now we know the size of the structure to make */ header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); @@ -1104,9 +1177,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass to generate final JIT code */ ctx.image = (union loongarch_instruction *)image_ptr; + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; + ctx.num_exentries = 0; build_prologue(&ctx); if (build_body(&ctx, extra_pass)) { @@ -1125,7 +1201,7 @@ skip_init_ctx: /* And we're done */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); /* Update the icache */ flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); @@ -1147,7 +1223,7 @@ skip_init_ctx: jit_data->header = header; } prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; prog->bpf_func = (void *)ctx.image; if (!prog->is_func || extra_pass) { diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index e665ddb0aeb8..ca708024fdd3 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -4,6 +4,7 @@ * * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -15,6 +16,7 @@ struct jit_ctx { unsigned int flags; unsigned int epilogue_offset; u32 *offset; + int num_exentries; union loongarch_instruction *image; u32 stack_size; }; From 61a6fccc0bd2e8030b2672a52ef3f6706b2b2ee4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3815/4122] LoongArch: Add unaligned access support Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support unaligned access in hardware, while Loongson-3 series (Loongson-3A5000, Loongson-3C5000) are configurable whether support unaligned access in hardware. This patch add unaligned access emulation for those LoongArch processors without hardware support. Signed-off-by: Huacai Chen --- Documentation/admin-guide/sysctl/kernel.rst | 8 +- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/inst.h | 14 + arch/loongarch/include/asm/thread_info.h | 2 +- arch/loongarch/kernel/Makefile | 3 +- arch/loongarch/kernel/traps.c | 27 ++ arch/loongarch/kernel/unaligned.c | 499 ++++++++++++++++++++ arch/loongarch/lib/Makefile | 2 +- arch/loongarch/lib/unaligned.S | 84 ++++ 9 files changed, 634 insertions(+), 7 deletions(-) create mode 100644 arch/loongarch/kernel/unaligned.c create mode 100644 arch/loongarch/lib/unaligned.S diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 98d1b198b2b4..f2b802cd6208 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -433,8 +433,8 @@ ignore-unaligned-usertrap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``; -currently, ``arc`` and ``ia64``), controls whether all unaligned traps -are logged. +currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all +unaligned traps are logged. = ============================================================= 0 Log all unaligned accesses. @@ -1457,8 +1457,8 @@ unaligned-trap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently, -``arc`` and ``parisc``), controls whether unaligned traps are caught -and emulated (instead of failing). +``arc``, ``parisc`` and ``loongarch``), controls whether unaligned traps +are caught and emulated (instead of failing). = ======================================================== 0 Do not emulate unaligned accesses. diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 903096bd87f8..0daf6263655b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -121,6 +121,8 @@ config LOONGARCH select RTC_LIB select SMP select SPARSE_IRQ + select SYSCTL_ARCH_UNALIGN_ALLOW + select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_EXCEPTION_TRACE select SWIOTLB select TRACE_IRQFLAGS_SUPPORT diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index fce1843ceebb..889d6c9fc2b6 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -76,6 +76,10 @@ enum reg2i12_op { ldbu_op = 0xa8, ldhu_op = 0xa9, ldwu_op = 0xaa, + flds_op = 0xac, + fsts_op = 0xad, + fldd_op = 0xae, + fstd_op = 0xaf, }; enum reg2i14_op { @@ -146,6 +150,10 @@ enum reg3_op { ldxbu_op = 0x7040, ldxhu_op = 0x7048, ldxwu_op = 0x7050, + fldxs_op = 0x7060, + fldxd_op = 0x7068, + fstxs_op = 0x7070, + fstxd_op = 0x7078, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) +struct pt_regs; + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc); +unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); +unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index b7dd9f19a5a9..1a3354ca056e 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -38,7 +38,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ - .flags = 0, \ + .flags = _TIF_FIXADE, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 42be564278fa..2ad2555b53ea 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,7 +7,8 @@ extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ + unaligned.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 1a4dce84ebc6..7ea62faeeadb 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs) irqentry_exit(regs, state); } +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */ + asmlinkage void noinstr do_ale(struct pt_regs *regs) { + unsigned int *pc; irqentry_state_t state = irqentry_enter(regs); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); + + /* + * Did we catch a fault trying to load an instruction? + */ + if (regs->csr_badvaddr == regs->csr_era) + goto sigbus; + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) + goto sigbus; + if (!unaligned_enabled) + goto sigbus; + if (!no_unaligned_warning) + show_registers(regs); + + pc = (unsigned int *)exception_era(regs); + + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc); + + goto out; + +sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); +out: irqentry_exit(regs, state); } diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c new file mode 100644 index 000000000000..bdff825d29ef --- /dev/null +++ b/arch/loongarch/kernel/unaligned.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle unaligned accesses by emulation. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1996, 1998, 1999, 2002 by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2014 Imagination Technologies Ltd. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "access-helper.h" + +#ifdef CONFIG_DEBUG_FS +static u32 unaligned_instructions_user; +static u32 unaligned_instructions_kernel; +#endif + +static inline unsigned long read_fpr(unsigned int idx) +{ +#define READ_FPR(idx, __value) \ + __asm__ __volatile__("movfr2gr.d %0, $f"#idx"\n\t" : "=r"(__value)); + + unsigned long __value; + + switch (idx) { + case 0: + READ_FPR(0, __value); + break; + case 1: + READ_FPR(1, __value); + break; + case 2: + READ_FPR(2, __value); + break; + case 3: + READ_FPR(3, __value); + break; + case 4: + READ_FPR(4, __value); + break; + case 5: + READ_FPR(5, __value); + break; + case 6: + READ_FPR(6, __value); + break; + case 7: + READ_FPR(7, __value); + break; + case 8: + READ_FPR(8, __value); + break; + case 9: + READ_FPR(9, __value); + break; + case 10: + READ_FPR(10, __value); + break; + case 11: + READ_FPR(11, __value); + break; + case 12: + READ_FPR(12, __value); + break; + case 13: + READ_FPR(13, __value); + break; + case 14: + READ_FPR(14, __value); + break; + case 15: + READ_FPR(15, __value); + break; + case 16: + READ_FPR(16, __value); + break; + case 17: + READ_FPR(17, __value); + break; + case 18: + READ_FPR(18, __value); + break; + case 19: + READ_FPR(19, __value); + break; + case 20: + READ_FPR(20, __value); + break; + case 21: + READ_FPR(21, __value); + break; + case 22: + READ_FPR(22, __value); + break; + case 23: + READ_FPR(23, __value); + break; + case 24: + READ_FPR(24, __value); + break; + case 25: + READ_FPR(25, __value); + break; + case 26: + READ_FPR(26, __value); + break; + case 27: + READ_FPR(27, __value); + break; + case 28: + READ_FPR(28, __value); + break; + case 29: + READ_FPR(29, __value); + break; + case 30: + READ_FPR(30, __value); + break; + case 31: + READ_FPR(31, __value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef READ_FPR + return __value; +} + +static inline void write_fpr(unsigned int idx, unsigned long value) +{ +#define WRITE_FPR(idx, value) \ + __asm__ __volatile__("movgr2fr.d $f"#idx", %0\n\t" :: "r"(value)); + + switch (idx) { + case 0: + WRITE_FPR(0, value); + break; + case 1: + WRITE_FPR(1, value); + break; + case 2: + WRITE_FPR(2, value); + break; + case 3: + WRITE_FPR(3, value); + break; + case 4: + WRITE_FPR(4, value); + break; + case 5: + WRITE_FPR(5, value); + break; + case 6: + WRITE_FPR(6, value); + break; + case 7: + WRITE_FPR(7, value); + break; + case 8: + WRITE_FPR(8, value); + break; + case 9: + WRITE_FPR(9, value); + break; + case 10: + WRITE_FPR(10, value); + break; + case 11: + WRITE_FPR(11, value); + break; + case 12: + WRITE_FPR(12, value); + break; + case 13: + WRITE_FPR(13, value); + break; + case 14: + WRITE_FPR(14, value); + break; + case 15: + WRITE_FPR(15, value); + break; + case 16: + WRITE_FPR(16, value); + break; + case 17: + WRITE_FPR(17, value); + break; + case 18: + WRITE_FPR(18, value); + break; + case 19: + WRITE_FPR(19, value); + break; + case 20: + WRITE_FPR(20, value); + break; + case 21: + WRITE_FPR(21, value); + break; + case 22: + WRITE_FPR(22, value); + break; + case 23: + WRITE_FPR(23, value); + break; + case 24: + WRITE_FPR(24, value); + break; + case 25: + WRITE_FPR(25, value); + break; + case 26: + WRITE_FPR(26, value); + break; + case 27: + WRITE_FPR(27, value); + break; + case 28: + WRITE_FPR(28, value); + break; + case 29: + WRITE_FPR(29, value); + break; + case 30: + WRITE_FPR(30, value); + break; + case 31: + WRITE_FPR(31, value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef WRITE_FPR +} + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc) +{ + bool fp = false; + bool sign, write; + bool user = user_mode(regs); + unsigned int res, size = 0; + unsigned long value = 0; + union loongarch_instruction insn; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + __get_inst(&insn.word, pc, user); + + switch (insn.reg2i12_format.opcode) { + case ldh_op: + size = 2; + sign = true; + write = false; + break; + case ldhu_op: + size = 2; + sign = false; + write = false; + break; + case sth_op: + size = 2; + sign = true; + write = true; + break; + case ldw_op: + size = 4; + sign = true; + write = false; + break; + case ldwu_op: + size = 4; + sign = false; + write = false; + break; + case stw_op: + size = 4; + sign = true; + write = true; + break; + case ldd_op: + size = 8; + sign = true; + write = false; + break; + case std_op: + size = 8; + sign = true; + write = true; + break; + case flds_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fsts_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + switch (insn.reg2i14_format.opcode) { + case ldptrw_op: + size = 4; + sign = true; + write = false; + break; + case stptrw_op: + size = 4; + sign = true; + write = true; + break; + case ldptrd_op: + size = 8; + sign = true; + write = false; + break; + case stptrd_op: + size = 8; + sign = true; + write = true; + break; + } + + switch (insn.reg3_format.opcode) { + case ldxh_op: + size = 2; + sign = true; + write = false; + break; + case ldxhu_op: + size = 2; + sign = false; + write = false; + break; + case stxh_op: + size = 2; + sign = true; + write = true; + break; + case ldxw_op: + size = 4; + sign = true; + write = false; + break; + case ldxwu_op: + size = 4; + sign = false; + write = false; + break; + case stxw_op: + size = 4; + sign = true; + write = true; + break; + case ldxd_op: + size = 8; + sign = true; + write = false; + break; + case stxd_op: + size = 8; + sign = true; + write = true; + break; + case fldxs_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fstxs_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldxd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstxd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + if (!size) + goto sigbus; + if (user && !access_ok(addr, size)) + goto sigbus; + + if (!write) { + res = unaligned_read(addr, &value, size, sign); + if (res) + goto fault; + + /* Rd is the same field in any formats */ + if (!fp) + regs->regs[insn.reg3_format.rd] = value; + else { + if (is_fpu_owner()) + write_fpr(insn.reg3_format.rd, value); + else + set_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0, value); + } + } else { + /* Rd is the same field in any formats */ + if (!fp) + value = regs->regs[insn.reg3_format.rd]; + else { + if (is_fpu_owner()) + value = read_fpr(insn.reg3_format.rd); + else + value = get_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0); + } + + res = unaligned_write(addr, value, size); + if (res) + goto fault; + } + +#ifdef CONFIG_DEBUG_FS + if (user) + unaligned_instructions_user++; + else + unaligned_instructions_kernel++; +#endif + + compute_return_era(regs); + + return; + +fault: + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return; + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return; + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return; +} + +#ifdef CONFIG_DEBUG_FS +static int __init debugfs_unaligned(void) +{ + struct dentry *d; + + d = debugfs_create_dir("loongarch", NULL); + if (!d) + return -ENOMEM; + + debugfs_create_u32("unaligned_instructions_user", + S_IRUGO, d, &unaligned_instructions_user); + debugfs_create_u32("unaligned_instructions_kernel", + S_IRUGO, d, &unaligned_instructions_kernel); + + return 0; +} +arch_initcall(debugfs_unaligned); +#endif diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index e36635fccb69..867895530340 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,4 +3,4 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S new file mode 100644 index 000000000000..9177fd638f07 --- /dev/null +++ b/arch/loongarch/lib/unaligned.S @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include + +#include +#include +#include +#include +#include +#include + +.L_fixup_handle_unaligned: + li.w a0, -EFAULT + jr ra + +/* + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign) + * + * a0: addr + * a1: value + * a2: n + * a3: sign + */ +SYM_FUNC_START(unaligned_read) + beqz a2, 5f + + li.w t2, 0 + addi.d t0, a2, -1 + slli.d t1, t0, 3 + add.d a0, a0, t0 + + beqz a3, 2f +1: ld.b t3, a0, 0 + b 3f + +2: ld.bu t3, a0, 0 +3: sll.d t3, t3, t1 + or t2, t2, t3 + addi.d t1, t1, -8 + addi.d a0, a0, -1 + addi.d a2, a2, -1 + bgtz a2, 2b +4: st.d t2, a1, 0 + + move a0, a2 + jr ra + +5: li.w a0, -EFAULT + jr ra + + _asm_extable 1b, .L_fixup_handle_unaligned + _asm_extable 2b, .L_fixup_handle_unaligned + _asm_extable 4b, .L_fixup_handle_unaligned +SYM_FUNC_END(unaligned_read) + +/* + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n) + * + * a0: addr + * a1: value + * a2: n + */ +SYM_FUNC_START(unaligned_write) + beqz a2, 3f + + li.w t0, 0 +1: srl.d t1, a1, t0 +2: st.b t1, a0, 0 + addi.d t0, t0, 8 + addi.d a2, a2, -1 + addi.d a0, a0, 1 + bgtz a2, 1b + + move a0, a2 + jr ra + +3: li.w a0, -EFAULT + jr ra + + _asm_extable 2b, .L_fixup_handle_unaligned +SYM_FUNC_END(unaligned_write) From 19e5eb15b00c5841b4b9bd9777af2865a40d2f39 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3816/4122] LoongArch: Add alternative runtime patching mechanism Introduce the "alternative" mechanism from ARM64 and x86 for LoongArch to apply runtime patching. The main purpose of this patch is to provide a framework. In future we can use this mechanism (i.e., the ALTERNATIVE and ALTERNATIVE_2 macros) to optimize hotspot functions according to cpu features. Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/alternative-asm.h | 82 +++++++ arch/loongarch/include/asm/alternative.h | 111 +++++++++ arch/loongarch/include/asm/bugs.h | 15 ++ arch/loongarch/include/asm/inst.h | 18 ++ arch/loongarch/kernel/Makefile | 2 +- arch/loongarch/kernel/alternative.c | 246 +++++++++++++++++++ arch/loongarch/kernel/module.c | 15 ++ arch/loongarch/kernel/setup.c | 7 + arch/loongarch/kernel/vmlinux.lds.S | 12 + 9 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/alternative-asm.h create mode 100644 arch/loongarch/include/asm/alternative.h create mode 100644 arch/loongarch/include/asm/bugs.h create mode 100644 arch/loongarch/kernel/alternative.c diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h new file mode 100644 index 000000000000..ff3d10ac393f --- /dev/null +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .short \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".fill" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h new file mode 100644 index 000000000000..cee7b29785ab --- /dev/null +++ b/arch/loongarch/include/asm/alternative.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* offset to original instruction */ + s32 replace_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x03400000\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .short " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h new file mode 100644 index 000000000000..98396535163b --- /dev/null +++ b/arch/loongarch/include/asm/bugs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BUGS_H +#define _ASM_BUGS_H + +#include +#include + +extern void check_bugs(void); + +#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 889d6c9fc2b6..67215af47b3d 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -8,6 +8,7 @@ #include #include +#define INSN_NOP 0x03400000 #define INSN_BREAK 0x002a0000 #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 @@ -28,6 +29,7 @@ enum reg0i26_op { enum reg1i20_op { lu12iw_op = 0x0a, lu32id_op = 0x0b, + pcaddi_op = 0x0c, pcaddu12i_op = 0x0e, pcaddu18i_op = 0x0f, }; @@ -35,6 +37,8 @@ enum reg1i20_op { enum reg1i21_op { beqz_op = 0x10, bnez_op = 0x11, + bceqz_op = 0x12, /* bits[9:8] = 0x00 */ + bcnez_op = 0x12, /* bits[9:8] = 0x01 */ }; enum reg2_op { @@ -315,6 +319,12 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit) return val & (1UL << (bit - 1)); } +static inline bool is_pc_ins(union loongarch_instruction *ip) +{ + return ip->reg1i20_format.opcode >= pcaddi_op && + ip->reg1i20_format.opcode <= pcaddu18i_op; +} + static inline bool is_branch_ins(union loongarch_instruction *ip) { return ip->reg1i21_format.opcode >= beqz_op && @@ -353,6 +363,14 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) return val < (1UL << bit); } +static inline unsigned long sign_extend(unsigned long val, unsigned int idx) +{ + if (!is_imm_negative(val, idx + 1)) + return ((1UL << idx) - 1) & val; + else + return ~((1UL << idx) - 1) | val; +} + #define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ int offset) \ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 2ad2555b53ea..86744531b100 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -8,7 +8,7 @@ extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ - unaligned.o + alternative.o unaligned.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c new file mode 100644 index 000000000000..c5aebeac960b --- /dev/null +++ b/arch/loongarch/kernel/alternative.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + +#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE) + +static int __initdata_or_module debug_alternative; + +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_WORDS(buf, count, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int _j; \ + union loongarch_instruction *_buf = buf; \ + \ + if (!(count)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (_j = 0; _j < count - 1; _j++) \ + printk(KERN_CONT "<%08x> ", _buf[_j].word); \ + printk(KERN_CONT "<%08x>\n", _buf[_j].word); \ + } \ +} while (0) + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(union loongarch_instruction *insn, int count) +{ + while (count--) { + insn->word = INSN_NOP; + insn++; + } +} + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, + void *start, void *end) +{ + unsigned int si, si_l, si_h; + unsigned long cur_pc, jump_addr, pc; + long offset; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_l = src->reg0i26_format.immediate_l; + si_h = src->reg0i26_format.immediate_h; + switch (src->reg0i26_format.opcode) { + case b_op: + case bl_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + offset >>= 2; + buf->reg0i26_format.immediate_h = offset >> 16; + buf->reg0i26_format.immediate_l = offset; + return; + } + + si_l = src->reg1i21_format.immediate_l; + si_h = src->reg1i21_format.immediate_h; + switch (src->reg1i21_format.opcode) { + case bceqz_op: /* bceqz_op = bcnez_op */ + BUG_ON(buf->reg1i21_format.rj & BIT(4)); + fallthrough; + case beqz_op: + case bnez_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_4M || offset >= SZ_4M); + offset >>= 2; + buf->reg1i21_format.immediate_h = offset >> 16; + buf->reg1i21_format.immediate_l = offset; + return; + } + + si = src->reg2i16_format.immediate; + switch (src->reg2i16_format.opcode) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + jump_addr = cur_pc + sign_extend(si << 2, 17); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128K || offset >= SZ_128K); + offset >>= 2; + buf->reg2i16_format.immediate = offset; + return; + } +} + +static int __init_or_module copy_alt_insns(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i].word = src[i].word; + + if (is_pc_ins(&src[i])) { + pr_err("Not support pcrel instruction at present!"); + return -EINVAL; + } + + if (is_branch_ins(&src[i]) && + src[i].reg2i16_format.opcode != jirl_op) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } + } + + return 0; +} + +/* + * text_poke_early - Update instructions on a live kernel at boot time + * + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +static void *__init_or_module text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr) +{ + int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < nr; i++) + insn[i].word = buf[i].word; + + local_irq_restore(flags); + + wbflush(); + flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr)); + + return insn; +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + */ +void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + unsigned int nr_instr, nr_repl, nr_insnbuf; + union loongarch_instruction *instr, *replacement; + union loongarch_instruction insnbuf[MAX_PATCH_SIZE]; + + DPRINTK("alt table %px, -> %px", start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->replace_offset + a->replace_offset; + + BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen & 0x3); + BUG_ON(a->replacementlen & 0x3); + + nr_instr = a->instrlen / LOONGARCH_INSN_SIZE; + nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE; + + if (!cpu_has(a->feature)) { + DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + continue; + } + + DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr); + DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement); + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr); + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + alternatives_patched = 1; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 097595b2fc14..825fcf77f9e7 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -17,6 +17,7 @@ #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -456,3 +457,17 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (!strcmp(".altinstructions", secstrs + s->sh_name)) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + } + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index ae436def7ee9..53831bcb11ca 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -31,7 +31,9 @@ #include #include +#include #include +#include #include #include #include @@ -80,6 +82,11 @@ const char *get_system_type(void) return "generic-loongson-machine"; } +void __init check_bugs(void) +{ + alternative_instructions(); +} + static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index efecda0c2361..733b16e8d55d 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -54,6 +54,18 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } From a275a82dcd4024c75337db15d59ed039c31e21da Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 3817/4122] LoongArch: Use alternative to optimize libraries Use the alternative to optimize common libraries according whether CPU has UAL (hardware unaligned access support) feature, including memset(), memcopy(), memmove(), copy_user() and clear_user(). We have tested UnixBench on a Loongson-3A5000 quad-core machine (1.6GHz): 1, One copy, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9566582.0 819.8 Double-Precision Whetstone 55.0 2805.3 510.1 Execl Throughput 43.0 2120.0 493.0 File Copy 1024 bufsize 2000 maxblocks 3960.0 209833.0 529.9 File Copy 256 bufsize 500 maxblocks 1655.0 89400.0 540.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 320036.0 551.8 Pipe Throughput 12440.0 340624.0 273.8 Pipe-based Context Switching 4000.0 109939.1 274.8 Process Creation 126.0 4728.7 375.3 Shell Scripts (1 concurrent) 42.4 2223.1 524.3 Shell Scripts (8 concurrent) 6.0 883.1 1471.9 System Call Overhead 15000.0 518639.1 345.8 ======== System Benchmarks Index Score 500.2 2, One copy, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9567674.7 819.9 Double-Precision Whetstone 55.0 2805.5 510.1 Execl Throughput 43.0 2392.7 556.4 File Copy 1024 bufsize 2000 maxblocks 3960.0 417804.0 1055.1 File Copy 256 bufsize 500 maxblocks 1655.0 112909.5 682.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 1255207.4 2164.2 Pipe Throughput 12440.0 555712.0 446.7 Pipe-based Context Switching 4000.0 99964.5 249.9 Process Creation 126.0 5192.5 412.1 Shell Scripts (1 concurrent) 42.4 2302.4 543.0 Shell Scripts (8 concurrent) 6.0 919.6 1532.6 System Call Overhead 15000.0 511159.3 340.8 ======== System Benchmarks Index Score 640.1 3, Four copies, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38268610.5 3279.2 Double-Precision Whetstone 55.0 11222.2 2040.4 Execl Throughput 43.0 7892.0 1835.3 File Copy 1024 bufsize 2000 maxblocks 3960.0 235149.6 593.8 File Copy 256 bufsize 500 maxblocks 1655.0 74959.6 452.9 File Copy 4096 bufsize 8000 maxblocks 5800.0 545048.5 939.7 Pipe Throughput 12440.0 1337359.0 1075.0 Pipe-based Context Switching 4000.0 473663.9 1184.2 Process Creation 126.0 17491.2 1388.2 Shell Scripts (1 concurrent) 42.4 6865.7 1619.3 Shell Scripts (8 concurrent) 6.0 1015.9 1693.1 System Call Overhead 15000.0 1899535.2 1266.4 ======== System Benchmarks Index Score 1278.3 4, Four copies, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38272815.5 3279.6 Double-Precision Whetstone 55.0 11222.8 2040.5 Execl Throughput 43.0 8839.2 2055.6 File Copy 1024 bufsize 2000 maxblocks 3960.0 313912.9 792.7 File Copy 256 bufsize 500 maxblocks 1655.0 80976.1 489.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 1176594.3 2028.6 Pipe Throughput 12440.0 2100941.9 1688.9 Pipe-based Context Switching 4000.0 476696.4 1191.7 Process Creation 126.0 18394.7 1459.9 Shell Scripts (1 concurrent) 42.4 7172.2 1691.6 Shell Scripts (8 concurrent) 6.0 1058.3 1763.9 System Call Overhead 15000.0 1874714.7 1249.8 ======== System Benchmarks Index Score 1488.8 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/string.h | 5 ++ arch/loongarch/lib/Makefile | 3 +- arch/loongarch/lib/clear_user.S | 70 ++++++++++++++-- arch/loongarch/lib/copy_user.S | 91 +++++++++++++++++++-- arch/loongarch/lib/memcpy.S | 95 ++++++++++++++++++++++ arch/loongarch/lib/memmove.S | 121 ++++++++++++++++++++++++++++ arch/loongarch/lib/memset.S | 91 +++++++++++++++++++++ 7 files changed, 465 insertions(+), 11 deletions(-) create mode 100644 arch/loongarch/lib/memcpy.S create mode 100644 arch/loongarch/lib/memmove.S create mode 100644 arch/loongarch/lib/memset.S diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index b07e60ded957..7b29cc9c70aa 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -5,8 +5,13 @@ #ifndef _ASM_STRING_H #define _ASM_STRING_H +#define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 867895530340..40bde632900f 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,4 +3,5 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o +lib-y += delay.o memset.o memcpy.o memmove.o \ + clear_user.o copy_user.o dump_tlb.o unaligned.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index d5c9e44ac8c4..2dc48e61a2c8 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,25 +3,37 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a1, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__clear_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __clear_user_generic", \ + "b __clear_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) + /* - * unsigned long __clear_user(void *addr, size_t size) + * unsigned long __clear_user_generic(void *addr, size_t size) * * a0: addr * a1: size */ -SYM_FUNC_START(__clear_user) +SYM_FUNC_START(__clear_user_generic) beqz a1, 2f 1: st.b zero, a0, 0 @@ -33,6 +45,54 @@ SYM_FUNC_START(__clear_user) jr ra _asm_extable 1b, .L_fixup_handle_0 -SYM_FUNC_END(__clear_user) +SYM_FUNC_END(__clear_user_generic) -EXPORT_SYMBOL(__clear_user) +/* + * unsigned long __clear_user_fast(void *addr, unsigned long size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user_fast) + beqz a1, 10f + + ori a2, zero, 64 + blt a1, a2, 9f + + /* set 64 bytes at a time */ +1: st.d zero, a0, 0 +2: st.d zero, a0, 8 +3: st.d zero, a0, 16 +4: st.d zero, a0, 24 +5: st.d zero, a0, 32 +6: st.d zero, a0, 40 +7: st.d zero, a0, 48 +8: st.d zero, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, -64 + bge a1, a2, 1b + + beqz a1, 10f + + /* set the remaining bytes */ +9: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 9b + + /* return */ +10: move a0, a1 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 +SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 61933d964da0..55ac6020a1ad 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,26 +3,38 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a2, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__copy_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __copy_user_generic", \ + "b __copy_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) + /* - * unsigned long __copy_user(void *to, const void *from, size_t n) + * unsigned long __copy_user_generic(void *to, const void *from, size_t n) * * a0: to * a1: from * a2: n */ -SYM_FUNC_START(__copy_user) +SYM_FUNC_START(__copy_user_generic) beqz a2, 3f 1: ld.b t0, a1, 0 @@ -37,6 +49,75 @@ SYM_FUNC_START(__copy_user) _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_0 -SYM_FUNC_END(__copy_user) +SYM_FUNC_END(__copy_user_generic) -EXPORT_SYMBOL(__copy_user) +/* + * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user_fast) + beqz a2, 19f + + ori a3, zero, 64 + blt a2, a3, 17f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 +2: ld.d t1, a1, 8 +3: ld.d t2, a1, 16 +4: ld.d t3, a1, 24 +5: ld.d t4, a1, 32 +6: ld.d t5, a1, 40 +7: ld.d t6, a1, 48 +8: ld.d t7, a1, 56 +9: st.d t0, a0, 0 +10: st.d t1, a0, 8 +11: st.d t2, a0, 16 +12: st.d t3, a0, 24 +13: st.d t4, a0, 32 +14: st.d t5, a0, 40 +15: st.d t6, a0, 48 +16: st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a3, 1b + + beqz a2, 19f + + /* copy the remaining bytes */ +17: ld.b t0, a1, 0 +18: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 17b + + /* return */ +19: move a0, a2 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_4 + _asm_extable 14b, .L_fixup_handle_5 + _asm_extable 15b, .L_fixup_handle_6 + _asm_extable 16b, .L_fixup_handle_7 + _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 18b, .L_fixup_handle_0 +SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S new file mode 100644 index 000000000000..7c07d595ee89 --- /dev/null +++ b/arch/loongarch/lib/memcpy.S @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memcpy_generic", \ + "b __memcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) + +/* + * void *__memcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_generic) + move a3, a0 + beqz a2, 2f + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_generic) + +/* + * void *__memcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + st.d t0, a0, 0 + st.d t1, a0, 8 + st.d t2, a0, 16 + st.d t3, a0, 24 + st.d t4, a0, 32 + st.d t5, a0, 40 + st.d t6, a0, 48 + st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S new file mode 100644 index 000000000000..6ffdb46da78f --- /dev/null +++ b/arch/loongarch/lib/memmove.S @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 3f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + + /* if (src - dst) < 64, copy 1 byte at a time */ +1: ori a3, zero, 64 + sub.d t0, a1, a0 + blt t0, a3, 2f + b memcpy +2: b __memcpy_generic + + /* if (dst - src) < 64, copy 1 byte at a time */ +3: ori a3, zero, 64 + sub.d t0, a0, a1 + blt t0, a3, 4f + b rmemcpy +4: b __rmemcpy_generic +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) + +SYM_FUNC_START(rmemcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __rmemcpy_generic", \ + "b __rmemcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(rmemcpy) + +/* + * void *__rmemcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_generic) + move a3, a0 + beqz a2, 2f + + add.d a0, a0, a2 + add.d a1, a1, a2 + +1: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_generic) + +/* + * void *__rmemcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_fast) + move a3, a0 + beqz a2, 3f + + add.d a0, a0, a2 + add.d a1, a1, a2 + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, -8 + ld.d t1, a1, -16 + ld.d t2, a1, -24 + ld.d t3, a1, -32 + ld.d t4, a1, -40 + ld.d t5, a1, -48 + ld.d t6, a1, -56 + ld.d t7, a1, -64 + st.d t0, a0, -8 + st.d t1, a0, -16 + st.d t2, a0, -24 + st.d t3, a0, -32 + st.d t4, a0, -40 + st.d t5, a0, -48 + st.d t6, a0, -56 + st.d t7, a0, -64 + + addi.d a0, a0, -64 + addi.d a1, a1, -64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S new file mode 100644 index 000000000000..e7cb4ea3747d --- /dev/null +++ b/arch/loongarch/lib/memset.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +.macro fill_to_64 r0 + bstrins.d \r0, \r0, 15, 8 + bstrins.d \r0, \r0, 31, 16 + bstrins.d \r0, \r0, 63, 32 +.endm + +SYM_FUNC_START(memset) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memset_generic", \ + "b __memset_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) + +/* + * void *__memset_generic(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_generic) + move a3, a0 + beqz a2, 2f + +1: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memset_generic) + +/* + * void *__memset_fast(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* fill a1 to 64 bits */ + fill_to_64 a1 + + /* set 64 bytes at a time */ +1: st.d a1, a0, 0 + st.d a1, a0, 8 + st.d a1, a0, 16 + st.d a1, a0, 24 + st.d a1, a0, 32 + st.d a1, a0, 40 + st.d a1, a0, 48 + st.d a1, a0, 56 + + addi.d a0, a0, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* set the remaining bytes */ +2: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memset_fast) From 88d4d957edc707e037449ef71a58c6530a39d01e Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 10 Dec 2022 22:40:05 +0800 Subject: [PATCH 3818/4122] LoongArch: Add FDT booting support from efi system table Since commit 40cd01a9c324("efi/loongarch: libstub: remove dependency on flattened DT"), we can parse the FDT from efi system table. And now, LoongArch is coming to support booting with FDT, so we add the relevant booting support as well as parameter parsing. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/efi.h | 1 + arch/loongarch/include/asm/setup.h | 1 + arch/loongarch/kernel/acpi.c | 11 +++++- arch/loongarch/kernel/efi.c | 15 +++++++- arch/loongarch/kernel/env.c | 2 + arch/loongarch/kernel/numa.c | 17 +++++++- arch/loongarch/kernel/setup.c | 62 +++++++++++++++++++++++++++++- arch/loongarch/kernel/smp.c | 34 ++++++++++++++++ arch/loongarch/pci/acpi.c | 7 +++- 10 files changed, 145 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0daf6263655b..48db4b27b9af 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -111,6 +111,8 @@ config LOONGARCH select MODULES_USE_ELF_RELA if MODULES select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select OF + select OF_EARLY_FLATTREE select PCI select PCI_DOMAINS_GENERIC select PCI_ECAM if ACPI diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 174567b00ddb..81e5d3371868 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -9,6 +9,7 @@ void __init efi_init(void); void __init efi_runtime_init(void); +void __init *efi_fdt_pointer(void); void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index ca373f8e3c4d..72ead58039f3 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -13,6 +13,7 @@ extern unsigned long eentry; extern unsigned long tlbrentry; +extern char init_command_line[COMMAND_LINE_SIZE]; extern void tlb_init(int cpu); extern void cpu_cache_init(void); extern void cache_error_setup(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 8319cc409009..5a63f85f5798 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -145,14 +146,14 @@ void __init acpi_boot_table_init(void) * If acpi_disabled, bail out */ if (acpi_disabled) - return; + goto fdt_earlycon; /* * Initialize the ACPI boot-time table parser. */ if (acpi_table_init()) { disable_acpi(); - return; + goto fdt_earlycon; } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -164,6 +165,12 @@ void __init acpi_boot_table_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + + return; + +fdt_earlycon: + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index a31329971133..ea485b0e1e7f 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -28,16 +28,29 @@ static unsigned long efi_nr_tables; static unsigned long efi_config_table; static unsigned long __initdata boot_memmap = EFI_INVALID_TABLE_ADDR; +static unsigned long __initdata fdt_pointer = EFI_INVALID_TABLE_ADDR; static efi_system_table_t *efi_systab; static efi_config_table_type_t arch_tables[] __initdata = { {LINUX_EFI_BOOT_MEMMAP_GUID, &boot_memmap, "MEMMAP" }, + {DEVICE_TREE_GUID, &fdt_pointer, "FDTPTR" }, {}, }; +void __init *efi_fdt_pointer(void) +{ + if (!efi_systab) + return NULL; + + if (fdt_pointer == EFI_INVALID_TABLE_ADDR) + return NULL; + + return early_memremap_ro(fdt_pointer, SZ_64K); +} + void __init efi_runtime_init(void) { - if (!efi_enabled(EFI_BOOT)) + if (!efi_enabled(EFI_BOOT) || !efi_systab->runtime) return; if (efi_runtime_disabled()) { diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 6d56a463b091..6b3bfb0092e6 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -11,6 +11,7 @@ #include #include #include +#include u64 efi_system_table; struct loongson_system_configuration loongson_sysconf; @@ -27,6 +28,7 @@ void __init init_environ(void) clear_bit(EFI_BOOT, &efi.flags); strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); + strscpy(init_command_line, cmdline, COMMAND_LINE_SIZE); early_memunmap(cmdline, COMMAND_LINE_SIZE); efi_system_table = fw_arg2; diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a13f92593cfd..3019ca14c760 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -388,6 +388,21 @@ static void __init numa_default_distance(void) } } +/* + * fake_numa_init() - For Non-ACPI systems + * Return: 0 on success, -errno on failure. + */ +static int __init fake_numa_init(void) +{ + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM() - 1; + + node_set(0, numa_nodes_parsed); + pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end); + + return numa_add_memblk(0, start, end + 1); +} + int __init init_numa_memory(void) { int i; @@ -404,7 +419,7 @@ int __init init_numa_memory(void) memset(&numa_meminfo, 0, sizeof(numa_meminfo)); /* Parse SRAT and SLIT if provided by firmware. */ - ret = acpi_numa_init(); + ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); if (ret < 0) return ret; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 53831bcb11ca..f9f5a130710c 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include @@ -69,6 +72,7 @@ static const char dmi_empty_string[] = " "; * * These are initialized so they are in the .data section */ +char init_command_line[COMMAND_LINE_SIZE] __initdata; static int num_standard_resources; static struct resource *standard_resources; @@ -253,6 +257,58 @@ static void __init arch_parse_crashkernel(void) #endif } +static void __init fdt_setup(void) +{ +#ifdef CONFIG_OF_EARLY_FLATTREE + void *fdt_pointer; + + /* ACPI-based systems do not require parsing fdt */ + if (acpi_os_get_root_pointer()) + return; + + /* Look for a device tree configuration table entry */ + fdt_pointer = efi_fdt_pointer(); + if (!fdt_pointer || fdt_check_header(fdt_pointer)) + return; + + early_init_dt_scan(fdt_pointer); + early_init_fdt_reserve_self(); + + max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); +#endif +} + +static void __init bootcmdline_init(char **cmdline_p) +{ + /* + * If CONFIG_CMDLINE_FORCE is enabled then initializing the command line + * is trivial - we simply use the built-in command line unconditionally & + * unmodified. + */ + if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + goto out; + } + +#ifdef CONFIG_OF_FLATTREE + /* + * If CONFIG_CMDLINE_BOOTLOADER is enabled and we are in FDT-based system, + * the boot_command_line will be overwritten by early_init_dt_scan_chosen(). + * So we need to append init_command_line (the original copy of boot_command_line) + * to boot_command_line. + */ + if (initial_boot_params) { + if (boot_command_line[0]) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + + strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE); + } +#endif + +out: + *cmdline_p = boot_command_line; +} + void __init platform_init(void) { arch_reserve_vmcore(); @@ -265,6 +321,7 @@ void __init platform_init(void) acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif + unflatten_and_copy_device_tree(); #ifdef CONFIG_NUMA init_numa_memory(); @@ -297,6 +354,8 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); + early_init_fdt_scan_reserved_mem(); + /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate @@ -422,12 +481,13 @@ static void __init prefill_possible_map(void) void __init setup_arch(char **cmdline_p) { cpu_probe(); - *cmdline_p = boot_command_line; init_environ(); efi_init(); + fdt_setup(); memblock_init(); pagetable_init(); + bootcmdline_init(cmdline_p); parse_early_param(); reserve_initrd_mem(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 14508d429ffa..b78816cf74ae 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -180,8 +180,42 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } +static void __init fdt_smp_setup(void) +{ +#ifdef CONFIG_OF + unsigned int cpu, cpuid; + struct device_node *node = NULL; + + for_each_of_cpu_node(node) { + if (!of_device_is_available(node)) + continue; + + cpuid = of_get_cpu_hwid(node, 0); + if (cpuid >= nr_cpu_ids) + continue; + + if (cpuid == loongson_sysconf.boot_cpu_id) { + cpu = 0; + numa_add_cpu(cpu); + } else { + cpu = cpumask_next_zero(-1, cpu_present_mask); + } + + num_processors++; + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; + } + + loongson_sysconf.nr_cpus = num_processors; +#endif +} + void __init loongson_smp_setup(void) { + fdt_smp_setup(); + cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 8235ec92b41f..365f7de771cb 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -26,9 +26,12 @@ void pcibios_add_bus(struct pci_bus *bus) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_config_window *cfg = bridge->bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_device *adev = NULL; struct device *bus_dev = &bridge->bus->dev; + struct pci_config_window *cfg = bridge->bus->sysdata; + + if (!acpi_disabled) + adev = to_acpi_device(cfg->parent); ACPI_COMPANION_SET(&bridge->dev, adev); set_dev_node(bus_dev, pa_to_nid(cfg->res.start)); From 27cab431564edba9919d1a82c2d9636d622a2493 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 10 Dec 2022 22:40:05 +0800 Subject: [PATCH 3819/4122] LoongArch: Add processing ISA Node in DeviceTree Similar to commit 6d0068ad15e4f771b3 ("MIPS: Loongson64: Process ISA Node in DeviceTree"), we process ISA node in DeviceTree for FDT-based systems. Previously, we are hardcoding reserved ISA I/O Space in, now we are processing it I/O via DeviceTree directly. The ranges property of ISA node is used to determine the size and address of reserved I/O space. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index f9f5a130710c..fdabf2ac1927 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -429,6 +429,81 @@ static void __init resource_init(void) #endif } +static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, + resource_size_t hw_start, resource_size_t size) +{ + int ret = 0; + unsigned long vaddr; + struct logic_pio_hwaddr *range; + + range = kzalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return -ENOMEM; + + range->fwnode = fwnode; + range->size = size = round_up(size, PAGE_SIZE); + range->hw_start = hw_start; + range->flags = LOGIC_PIO_CPU_MMIO; + + ret = logic_pio_register_range(range); + if (ret) { + kfree(range); + return ret; + } + + /* Legacy ISA must placed at the start of PCI_IOBASE */ + if (range->io_start != 0) { + logic_pio_unregister_range(range); + kfree(range); + return -EINVAL; + } + + vaddr = (unsigned long)(PCI_IOBASE + range->io_start); + ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL)); + + return 0; +} + +static __init int arch_reserve_pio_range(void) +{ + struct device_node *np; + + for_each_node_by_name(np, "isa") { + struct of_range range; + struct of_range_parser parser; + + pr_info("ISA Bridge: %pOF\n", np); + + if (of_range_parser_init(&parser, np)) { + pr_info("Failed to parse resources.\n"); + of_node_put(np); + break; + } + + for_each_of_range(&parser, &range) { + switch (range.flags & IORESOURCE_TYPE_BITS) { + case IORESOURCE_IO: + pr_info(" IO 0x%016llx..0x%016llx -> 0x%016llx\n", + range.cpu_addr, + range.cpu_addr + range.size - 1, + range.bus_addr); + if (add_legacy_isa_io(&np->fwnode, range.cpu_addr, range.size)) + pr_warn("Failed to reserve legacy IO in Logic PIO\n"); + break; + case IORESOURCE_MEM: + pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx\n", + range.cpu_addr, + range.cpu_addr + range.size - 1, + range.bus_addr); + break; + } + } + } + + return 0; +} +arch_initcall(arch_reserve_pio_range); + static int __init reserve_memblock_reserved_regions(void) { u64 i, j; From 366bb35a8e48198cefcd3484ac6b2374d1347873 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3820/4122] LoongArch: Add suspend (ACPI S3) support Add suspend (Suspend To RAM, aka ACPI S3) support for LoongArch. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 ++ arch/loongarch/Makefile | 3 + arch/loongarch/include/asm/acpi.h | 10 +++ arch/loongarch/include/asm/bootinfo.h | 1 + arch/loongarch/include/asm/loongson.h | 3 + arch/loongarch/include/asm/time.h | 1 + arch/loongarch/kernel/acpi.c | 6 ++ arch/loongarch/kernel/smp.c | 1 + arch/loongarch/kernel/time.c | 11 +++- arch/loongarch/power/Makefile | 3 + arch/loongarch/power/platform.c | 57 +++++++++++++++++ arch/loongarch/power/suspend.c | 73 ++++++++++++++++++++++ arch/loongarch/power/suspend_asm.S | 89 +++++++++++++++++++++++++++ 13 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 arch/loongarch/power/Makefile create mode 100644 arch/loongarch/power/platform.c create mode 100644 arch/loongarch/power/suspend.c create mode 100644 arch/loongarch/power/suspend_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 48db4b27b9af..0c8b2b1a9626 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -56,6 +56,7 @@ config LOONGARCH select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT select COMMON_CLK + select CPU_PM select EFI select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE @@ -517,6 +518,10 @@ config ARCH_MMAP_RND_BITS_MAX menu "Power management options" +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" endmenu diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 01b57b726322..5232d8c0f9ca 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -104,6 +104,9 @@ endif libs-y += arch/loongarch/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/loongarch/power/ + ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 825c2519b9d1..4198753aa1d0 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -35,4 +35,14 @@ extern struct list_head acpi_wakeup_device_list; #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT +extern int loongarch_acpi_suspend(void); +extern int (*acpi_suspend_lowlevel)(void); +extern void loongarch_suspend_enter(void); + +static inline unsigned long acpi_get_wakeup_address(void) +{ + extern void loongarch_wakeup_start(void); + return (unsigned long)loongarch_wakeup_start; +} + #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index ed0910e8b856..0051b526ac6d 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -32,6 +32,7 @@ struct loongson_system_configuration { int cores_per_node; int cores_per_package; unsigned long cores_io_master; + unsigned long suspend_addr; const char *cpuname; }; diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h index 00db93edae1b..12494cffffd1 100644 --- a/arch/loongarch/include/asm/loongson.h +++ b/arch/loongarch/include/asm/loongson.h @@ -136,4 +136,7 @@ typedef enum { #define ls7a_writel(val, addr) *(volatile unsigned int *)TO_UNCACHE(addr) = (val) #define ls7a_writeq(val, addr) *(volatile unsigned long *)TO_UNCACHE(addr) = (val) +void enable_gpe_wakeup(void); +void enable_pci_wakeup(void); + #endif /* __ASM_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h index 2eae219301d0..037a2d1b8ff4 100644 --- a/arch/loongarch/include/asm/time.h +++ b/arch/loongarch/include/asm/time.h @@ -12,6 +12,7 @@ extern u64 cpu_clock_freq; extern u64 const_clock_freq; +extern void save_counter(void); extern void sync_counter(void); static inline unsigned int calc_const_freq(void) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 5a63f85f5798..98f431157e4c 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -140,6 +140,12 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } +#ifndef CONFIG_SUSPEND +int (*acpi_suspend_lowlevel)(void); +#else +int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend; +#endif + void __init acpi_boot_table_init(void) { /* diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index b78816cf74ae..8c6e227cb29d 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 786735dcc8d6..a6576dea590c 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -115,12 +115,17 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_timeval; +static long init_offset __nosavedata; + +void save_counter(void) +{ + init_offset = drdtime(); +} void sync_counter(void) { /* Ensure counter begin at 0 */ - csr_write64(-init_timeval, LOONGARCH_CSR_CNTC); + csr_write64(init_offset, LOONGARCH_CSR_CNTC); } static int get_timer_irq(void) @@ -219,7 +224,7 @@ void __init time_init(void) else const_clock_freq = calc_const_freq(); - init_timeval = drdtime() - csr_read64(LOONGARCH_CSR_CNTC); + init_offset = -(drdtime() - csr_read64(LOONGARCH_CSR_CNTC)); constant_clockevent_init(); constant_clocksource_init(); diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile new file mode 100644 index 000000000000..6740117decaa --- /dev/null +++ b/arch/loongarch/power/Makefile @@ -0,0 +1,3 @@ +obj-y += platform.o + +obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c new file mode 100644 index 000000000000..3ea8e07aa225 --- /dev/null +++ b/arch/loongarch/power/platform.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include + +void enable_gpe_wakeup(void) +{ + if (acpi_disabled) + return; + + if (acpi_gbl_reduced_hardware) + return; + + acpi_enable_all_wakeup_gpes(); +} + +void enable_pci_wakeup(void) +{ + if (acpi_disabled) + return; + + if (acpi_gbl_reduced_hardware) + return; + + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_STATUS, 1); + + if (acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE) + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0); +} + +static int __init loongson3_acpi_suspend_init(void) +{ +#ifdef CONFIG_ACPI + acpi_status status; + uint64_t suspend_addr = 0; + + if (acpi_disabled || acpi_gbl_reduced_hardware) + return 0; + + acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); + status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr); + if (ACPI_FAILURE(status) || !suspend_addr) { + pr_err("ACPI S3 is not support!\n"); + return -1; + } + loongson_sysconf.suspend_addr = (u64)phys_to_virt(PHYSADDR(suspend_addr)); +#endif + return 0; +} + +device_initcall(loongson3_acpi_suspend_init); diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c new file mode 100644 index 000000000000..5e19733e5e05 --- /dev/null +++ b/arch/loongarch/power/suspend.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson-specific suspend support + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +u64 loongarch_suspend_addr; + +struct saved_registers { + u32 ecfg; + u32 euen; + u64 pgd; + u64 kpgd; + u32 pwctl0; + u32 pwctl1; +}; +static struct saved_registers saved_regs; + +static void arch_common_suspend(void) +{ + save_counter(); + saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL); + saved_regs.kpgd = csr_read64(LOONGARCH_CSR_PGDH); + saved_regs.pwctl0 = csr_read32(LOONGARCH_CSR_PWCTL0); + saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1); + saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN); + + loongarch_suspend_addr = loongson_sysconf.suspend_addr; +} + +static void arch_common_resume(void) +{ + sync_counter(); + local_flush_tlb_all(); + csr_write64(per_cpu_offset(0), PERCPU_BASE_KS); + csr_write64(eentry, LOONGARCH_CSR_EENTRY); + csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); + csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + + csr_write64(saved_regs.pgd, LOONGARCH_CSR_PGDL); + csr_write64(saved_regs.kpgd, LOONGARCH_CSR_PGDH); + csr_write32(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0); + csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); + csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG); + csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN); +} + +int loongarch_acpi_suspend(void) +{ + enable_gpe_wakeup(); + enable_pci_wakeup(); + + arch_common_suspend(); + + /* processor specific suspend */ + loongarch_suspend_enter(); + + arch_common_resume(); + + return 0; +} diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S new file mode 100644 index 000000000000..eb2675642f9f --- /dev/null +++ b/arch/loongarch/power/suspend_asm.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Sleep helper for Loongson-3 sleep mode. + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + +/* preparatory stuff */ +.macro SETUP_SLEEP + addi.d sp, sp, -PT_SIZE + st.d $r1, sp, PT_R1 + st.d $r2, sp, PT_R2 + st.d $r3, sp, PT_R3 + st.d $r4, sp, PT_R4 + st.d $r21, sp, PT_R21 + st.d $r22, sp, PT_R22 + st.d $r23, sp, PT_R23 + st.d $r24, sp, PT_R24 + st.d $r25, sp, PT_R25 + st.d $r26, sp, PT_R26 + st.d $r27, sp, PT_R27 + st.d $r28, sp, PT_R28 + st.d $r29, sp, PT_R29 + st.d $r30, sp, PT_R30 + st.d $r31, sp, PT_R31 + + la.pcrel t0, acpi_saved_sp + st.d sp, t0, 0 +.endm + +.macro SETUP_WAKEUP + ld.d $r1, sp, PT_R1 + ld.d $r2, sp, PT_R2 + ld.d $r3, sp, PT_R3 + ld.d $r4, sp, PT_R4 + ld.d $r21, sp, PT_R21 + ld.d $r22, sp, PT_R22 + ld.d $r23, sp, PT_R23 + ld.d $r24, sp, PT_R24 + ld.d $r25, sp, PT_R25 + ld.d $r26, sp, PT_R26 + ld.d $r27, sp, PT_R27 + ld.d $r28, sp, PT_R28 + ld.d $r29, sp, PT_R29 + ld.d $r30, sp, PT_R30 + ld.d $r31, sp, PT_R31 +.endm + + .text + .align 12 + +/* Sleep/wakeup code for Loongson-3 */ +SYM_FUNC_START(loongarch_suspend_enter) + SETUP_SLEEP + bl __flush_cache_all + + /* Pass RA and SP to BIOS */ + addi.d a1, sp, 0 + la.pcrel a0, loongarch_wakeup_start + la.pcrel t0, loongarch_suspend_addr + ld.d t0, t0, 0 + jirl a0, t0, 0 /* Call BIOS's STR sleep routine */ + + /* + * This is where we return upon wakeup. + * Reload all of the registers and return. + */ +SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + + la.abs t0, 0f + jr t0 +0: + la.pcrel t0, acpi_saved_sp + ld.d sp, t0, 0 + SETUP_WAKEUP + addi.d sp, sp, PT_SIZE + jr ra +SYM_FUNC_END(loongarch_suspend_enter) From 7db54bfe44a662c8f2c10277bccfa02c2f4c719c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3821/4122] LoongArch: Add hibernation (ACPI S4) support Add hibernation (Suspend to Disk, aka ACPI S4) support for LoongArch. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 ++ arch/loongarch/kernel/asm-offsets.c | 12 +++++ arch/loongarch/kernel/reset.c | 5 +++ arch/loongarch/kernel/setup.c | 5 +++ arch/loongarch/power/Makefile | 1 + arch/loongarch/power/hibernate.c | 62 ++++++++++++++++++++++++++ arch/loongarch/power/hibernate_asm.S | 66 ++++++++++++++++++++++++++++ 7 files changed, 154 insertions(+) create mode 100644 arch/loongarch/power/hibernate.c create mode 100644 arch/loongarch/power/hibernate_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0c8b2b1a9626..576a649ac13c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -521,6 +521,9 @@ menu "Power management options" config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_HIBERNATION_POSSIBLE + def_bool y + source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bdd88eda9513..4ef494577813 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -257,3 +257,15 @@ void output_smpboot_defines(void) BLANK(); } #endif + +#ifdef CONFIG_HIBERNATION +void output_pbe_defines(void) +{ + COMMENT(" Linux struct pbe offsets. "); + OFFSET(PBE_ADDRESS, pbe, address); + OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address); + OFFSET(PBE_NEXT, pbe, next); + DEFINE(PBE_SIZE, sizeof(struct pbe)); + BLANK(); +} +#endif diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 8c82021eb2f4..1ef8c6383535 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -15,6 +15,7 @@ #include #include #include +#include void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -41,6 +42,10 @@ void machine_power_off(void) #ifdef CONFIG_SMP preempt_disable(); smp_send_stop(); +#endif +#ifdef CONFIG_PM + if (!acpi_disabled) + enable_pci_wakeup(); #endif do_kernel_power_off(); #ifdef CONFIG_EFI diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index fdabf2ac1927..4344502c0b31 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -370,6 +371,10 @@ static void __init arch_mem_init(char **cmdline_p) dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + /* Reserve for hibernation. */ + register_nosave_region(PFN_DOWN(__pa_symbol(&__nosave_begin)), + PFN_UP(__pa_symbol(&__nosave_end))); + memblock_dump_all(); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile index 6740117decaa..58151d003e40 100644 --- a/arch/loongarch/power/Makefile +++ b/arch/loongarch/power/Makefile @@ -1,3 +1,4 @@ obj-y += platform.o obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c new file mode 100644 index 000000000000..1e0590542f98 --- /dev/null +++ b/arch/loongarch/power/hibernate.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +static u32 saved_crmd; +static u32 saved_prmd; +static u32 saved_euen; +static u32 saved_ecfg; +static u64 saved_pcpu_base; +struct pt_regs saved_regs; + +void save_processor_state(void) +{ + saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); + saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); + saved_euen = csr_read32(LOONGARCH_CSR_EUEN); + saved_ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_pcpu_base = csr_read64(PERCPU_BASE_KS); + + if (is_fpu_owner()) + save_fp(current); +} + +void restore_processor_state(void) +{ + csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); + csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); + csr_write32(saved_euen, LOONGARCH_CSR_EUEN); + csr_write32(saved_ecfg, LOONGARCH_CSR_ECFG); + csr_write64(saved_pcpu_base, PERCPU_BASE_KS); + + if (is_fpu_owner()) + restore_fp(current); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +extern int swsusp_asm_suspend(void); + +int swsusp_arch_suspend(void) +{ + enable_pci_wakeup(); + return swsusp_asm_suspend(); +} + +extern int swsusp_asm_resume(void); + +int swsusp_arch_resume(void) +{ + /* Avoid TLB mismatch during and after kernel resume */ + local_flush_tlb_all(); + return swsusp_asm_resume(); +} diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S new file mode 100644 index 000000000000..3c747c08d65d --- /dev/null +++ b/arch/loongarch/power/hibernate_asm.S @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hibernation support specific for LoongArch + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +.text +SYM_FUNC_START(swsusp_asm_suspend) + la.pcrel t0, saved_regs + PTR_S ra, t0, PT_R1 + PTR_S tp, t0, PT_R2 + PTR_S sp, t0, PT_R3 + PTR_S u0, t0, PT_R21 + PTR_S fp, t0, PT_R22 + PTR_S s0, t0, PT_R23 + PTR_S s1, t0, PT_R24 + PTR_S s2, t0, PT_R25 + PTR_S s3, t0, PT_R26 + PTR_S s4, t0, PT_R27 + PTR_S s5, t0, PT_R28 + PTR_S s6, t0, PT_R29 + PTR_S s7, t0, PT_R30 + PTR_S s8, t0, PT_R31 + b swsusp_save +SYM_FUNC_END(swsusp_asm_suspend) + +SYM_FUNC_START(swsusp_asm_resume) + la.pcrel t0, restore_pblist + PTR_L t0, t0, 0 +0: + PTR_L t1, t0, PBE_ADDRESS /* source */ + PTR_L t2, t0, PBE_ORIG_ADDRESS /* destination */ + PTR_LI t3, _PAGE_SIZE + PTR_ADD t3, t3, t1 +1: + REG_L t8, t1, 0 + REG_S t8, t2, 0 + PTR_ADDI t1, t1, SZREG + PTR_ADDI t2, t2, SZREG + bne t1, t3, 1b + PTR_L t0, t0, PBE_NEXT + bnez t0, 0b + la.pcrel t0, saved_regs + PTR_L ra, t0, PT_R1 + PTR_L tp, t0, PT_R2 + PTR_L sp, t0, PT_R3 + PTR_L u0, t0, PT_R21 + PTR_L fp, t0, PT_R22 + PTR_L s0, t0, PT_R23 + PTR_L s1, t0, PT_R24 + PTR_L s2, t0, PT_R25 + PTR_L s3, t0, PT_R26 + PTR_L s4, t0, PT_R27 + PTR_L s5, t0, PT_R28 + PTR_L s6, t0, PT_R29 + PTR_L s7, t0, PT_R30 + PTR_L s8, t0, PT_R31 + PTR_LI a0, 0x0 + jirl zero, ra, 0 +SYM_FUNC_END(swsusp_asm_resume) From 09f33601bf940f955c10a6e75a1c1b7bcadee5e2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3822/4122] LoongArch: Add basic STACKPROTECTOR support Add basic stack protector support similar to other architectures. A constant canary value is set at boot time, and with help of compiler's -fstack-protector we can detect stack corruption. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/stackprotector.h | 38 +++++++++++++++++++++ arch/loongarch/kernel/asm-offsets.c | 3 ++ arch/loongarch/kernel/process.c | 6 ++++ arch/loongarch/kernel/switch.S | 5 +++ 5 files changed, 53 insertions(+) create mode 100644 arch/loongarch/include/asm/stackprotector.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 576a649ac13c..28d827c6abb3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -103,6 +103,7 @@ config LOONGARCH select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_SETUP_PER_CPU_AREA if NUMA + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h new file mode 100644 index 000000000000..a1a965751a7b --- /dev/null +++ b/arch/loongarch/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary and + * on LoongArch gcc expects it to be defined by a global variable called + * "__stack_chk_guard". + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 4ef494577813..4bdb203fc66e 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -68,6 +68,9 @@ void output_task_defines(void) OFFSET(TASK_FLAGS, task_struct, flags); OFFSET(TASK_MM, task_struct, mm); OFFSET(TASK_PID, task_struct, pid); +#if defined(CONFIG_STACKPROTECTOR) + OFFSET(TASK_STACK_CANARY, task_struct, stack_canary); +#endif DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); BLANK(); } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index ddb8ba4eb399..790cc14c5f06 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -47,6 +47,12 @@ #include #include +#ifdef CONFIG_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + /* * Idle related variables and functions */ diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 202a163cb32f..31dd8199b245 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -23,6 +23,11 @@ SYM_FUNC_START(__switch_to) stptr.d ra, a0, THREAD_REG01 stptr.d a3, a0, THREAD_SCHED_RA stptr.d a4, a0, THREAD_SCHED_CFA +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) + la t7, __stack_chk_guard + LONG_L t8, a1, TASK_STACK_CANARY + LONG_S t8, t7, 0 +#endif move tp, a2 cpu_restore_nonscratch a1 From 9151dde40356880bb445f719f5ebbb1319054d5f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3823/4122] LoongArch: module: Use got/plt section indices for relocations Instead of saving a pointer to the .got, .plt and .plt_idx sections to apply {got,plt}-based relocations, save and use their section indices instead. The mod->arch.{core,init}.{got,plt} pointers were problematic for live- patch because they pointed within temporary section headers (provided by the module loader via info->sechdrs) that would be freed after module load. Since livepatch modules may need to apply relocations post-module- load (for example, to patch a module that is loaded later), using section indices to offset into the section headers (instead of accessing them through a saved pointer) allows livepatch modules on LoongArch to pass in their own copy of the section headers to apply_relocate_add() to apply delayed relocations. The method used is same as commit c8ebf64eab743 ("arm64/module: use plt section indices for relocations"). Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/module.h | 22 +++++----- arch/loongarch/kernel/module-sections.c | 54 +++++++++++++------------ arch/loongarch/kernel/module.c | 39 ++++++++++++------ 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index b29b19a46f42..60dc62a1146e 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -11,7 +11,7 @@ #define RELA_STACK_DEPTH 16 struct mod_section { - Elf_Shdr *shdr; + int shndx; int num_entries; int max_entries; }; @@ -37,8 +37,8 @@ struct plt_idx_entry { Elf_Addr symbol_addr; }; -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); static inline struct got_entry emit_got_entry(Elf_Addr val) { @@ -62,10 +62,10 @@ static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val) return (struct plt_idx_entry) { val }; } -static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) +static inline int get_plt_idx(unsigned long val, Elf_Shdr *sechdrs, const struct mod_section *sec) { int i; - struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sec->shdr->sh_addr; + struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) { if (plt_idx[i].symbol_addr == val) @@ -76,11 +76,12 @@ static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) } static inline struct plt_entry *get_plt_entry(unsigned long val, - const struct mod_section *sec_plt, - const struct mod_section *sec_plt_idx) + Elf_Shdr *sechdrs, + const struct mod_section *sec_plt, + const struct mod_section *sec_plt_idx) { - int plt_idx = get_plt_idx(val, sec_plt_idx); - struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + int plt_idx = get_plt_idx(val, sechdrs, sec_plt_idx); + struct plt_entry *plt = (struct plt_entry *)sechdrs[sec_plt->shndx].sh_addr; if (plt_idx < 0) return NULL; @@ -89,10 +90,11 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, } static inline struct got_entry *get_got_entry(Elf_Addr val, + Elf_Shdr *sechdrs, const struct mod_section *sec) { - struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; int i; + struct got_entry *got = (struct got_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) if (got[i].symbol_addr == val) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index d296a70b758f..13d9a427325a 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,17 +7,17 @@ #include #include -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { struct mod_section *got_sec = &mod->arch.got; int i = got_sec->num_entries; - struct got_entry *got = get_got_entry(val, got_sec); + struct got_entry *got = get_got_entry(val, sechdrs, got_sec); if (got) return (Elf_Addr)got; /* There is no GOT entry for val yet, create a new one. */ - got = (struct got_entry *)got_sec->shdr->sh_addr; + got = (struct got_entry *)sechdrs[got_sec->shndx].sh_addr; got[i] = emit_got_entry(val); got_sec->num_entries++; @@ -33,12 +33,12 @@ Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) return (Elf_Addr)&got[i]; } -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; struct mod_section *plt_idx_sec = &mod->arch.plt_idx; - struct plt_entry *plt = get_plt_entry(val, plt_sec, plt_idx_sec); + struct plt_entry *plt = get_plt_entry(val, sechdrs, plt_sec, plt_idx_sec); struct plt_idx_entry *plt_idx; if (plt) @@ -47,9 +47,9 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) nr = plt_sec->num_entries; /* There is no duplicate entry, create a new one */ - plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt = (struct plt_entry *)sechdrs[plt_sec->shndx].sh_addr; plt[nr] = emit_plt_entry(val); - plt_idx = (struct plt_idx_entry *)plt_idx_sec->shdr->sh_addr; + plt_idx = (struct plt_idx_entry *)sechdrs[plt_idx_sec->shndx].sh_addr; plt_idx[nr] = emit_plt_idx_entry(val); plt_sec->num_entries++; @@ -103,28 +103,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) - mod->arch.got.shdr = sechdrs + i; + mod->arch.got.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) - mod->arch.plt.shdr = sechdrs + i; + mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) - mod->arch.plt_idx.shdr = sechdrs + i; + mod->arch.plt_idx.shndx = i; } - if (!mod->arch.got.shdr) { + if (!mod->arch.got.shndx) { pr_err("%s: module GOT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt.shdr) { + if (!mod->arch.plt.shndx) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt_idx.shdr) { + if (!mod->arch.plt_idx.shndx) { pr_err("%s: module PLT.IDX section(s) missing\n", mod->name); return -ENOEXEC; } @@ -145,24 +146,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, count_max_entries(relas, num_rela, &num_plts, &num_gots); } - mod->arch.got.shdr->sh_type = SHT_NOBITS; - mod->arch.got.shdr->sh_flags = SHF_ALLOC; - mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + got_sec = sechdrs + mod->arch.got.shndx; + got_sec->sh_type = SHT_NOBITS; + got_sec->sh_flags = SHF_ALLOC; + got_sec->sh_addralign = L1_CACHE_BYTES; + got_sec->sh_size = (num_gots + 1) * sizeof(struct got_entry); mod->arch.got.num_entries = 0; mod->arch.got.max_entries = num_gots; - mod->arch.plt.shdr->sh_type = SHT_NOBITS; - mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + plt_sec = sechdrs + mod->arch.plt.shndx; + plt_sec->sh_type = SHT_NOBITS; + plt_sec->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + plt_sec->sh_addralign = L1_CACHE_BYTES; + plt_sec->sh_size = (num_plts + 1) * sizeof(struct plt_entry); mod->arch.plt.num_entries = 0; mod->arch.plt.max_entries = num_plts; - mod->arch.plt_idx.shdr->sh_type = SHT_NOBITS; - mod->arch.plt_idx.shdr->sh_flags = SHF_ALLOC; - mod->arch.plt_idx.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt_idx.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); + plt_idx_sec = sechdrs + mod->arch.plt_idx.shndx; + plt_idx_sec->sh_type = SHT_NOBITS; + plt_idx_sec->sh_flags = SHF_ALLOC; + plt_idx_sec->sh_addralign = L1_CACHE_BYTES; + plt_idx_sec->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 825fcf77f9e7..899dc677cec3 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -99,16 +99,17 @@ static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Add return 0; } -static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type); } @@ -272,17 +273,18 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } -static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_b26(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; union loongarch_instruction *insn = (union loongarch_instruction *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); offset = (void *)v - (void *)location; @@ -339,10 +341,11 @@ static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, return 0; } -static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_got_pc(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { - Elf_Addr got = module_emit_got_entry(mod, v); + Elf_Addr got = module_emit_got_entry(mod, sechdrs, v); if (!got) return -EINVAL; @@ -387,13 +390,10 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel, [R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute, [R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup, - [R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel, [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, - [R_LARCH_B26] = apply_r_larch_b26, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, - [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, @@ -444,7 +444,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, sym->st_value, rel[i].r_addend, (u64)location); v = sym->st_value + rel[i].r_addend; - err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + switch (type) { + case R_LARCH_B26: + err = apply_r_larch_b26(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12: + err = apply_r_larch_got_pc(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_SOP_PUSH_PLT_PCREL: + err = apply_r_larch_sop_push_plt_pcrel(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + default: + err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + } if (err) return err; } From dbe3ba3018ec1fc53ea0d0adf0f687f5d438039d Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3824/4122] LoongArch/ftrace: Add basic support This patch contains basic ftrace support for LoongArch. Specifically, function tracer (HAVE_FUNCTION_TRACER), function graph tracer (HAVE_ FUNCTION_GRAPH_TRACER) are implemented following the instructions in Documentation/trace/ftrace-design.txt. Use `-pg` makes stub like a child function `void _mcount(void *ra)`. Thus, it can be seen store RA and alloc stack before `call _mcount`. Find `alloc stack` at first, and then find `store RA`. Note that the functions in both inst.c and time.c should not be hooked with the compiler's -pg option: to prevent infinite self-referencing for the former, and to ignore early setup stuff for the latter. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/ftrace.h | 21 +++++++ arch/loongarch/kernel/Makefile | 8 +++ arch/loongarch/kernel/ftrace.c | 73 ++++++++++++++++++++++ arch/loongarch/kernel/mcount.S | 96 +++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+) create mode 100644 arch/loongarch/include/asm/ftrace.h create mode 100644 arch/loongarch/kernel/ftrace.c create mode 100644 arch/loongarch/kernel/mcount.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 28d827c6abb3..72a41e08f9f4 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -90,6 +90,8 @@ config LOONGARCH select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h new file mode 100644 index 000000000000..c3f5cde40464 --- /dev/null +++ b/arch/loongarch/include/asm/ftrace.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_FTRACE_H +#define _ASM_LOONGARCH_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +#define mcount _mcount +extern void _mcount(void); +extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old); +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_LOONGARCH_FTRACE_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 86744531b100..3f71bce1c7ce 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -15,6 +15,14 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +ifdef CONFIG_FUNCTION_TRACER +obj-y += mcount.o ftrace.o +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) +endif + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c new file mode 100644 index 000000000000..8c3ec1bc7aad --- /dev/null +++ b/arch/loongarch/kernel/ftrace.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * As `call _mcount` follows LoongArch psABI, ra-saved operation and + * stack operation can be found before this insn. + */ + +static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off) +{ + int limit = 32; + union loongarch_instruction *insn; + + insn = (union loongarch_instruction *)insn_addr; + + do { + insn--; + limit--; + + if (is_ra_save_ins(insn)) + *ra_off = -((1 << 12) - insn->reg2i12_format.immediate); + + } while (!is_stack_alloc_ins(insn) && limit); + + if (!limit) + return -EINVAL; + + return 0; +} + +void prepare_ftrace_return(unsigned long self_addr, + unsigned long callsite_sp, unsigned long old) +{ + int ra_off; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + if (ftrace_get_parent_ra_addr(self_addr, &ra_off)) + goto out; + + if (!function_graph_enter(old, self_addr, 0, NULL)) + *(unsigned long *)(callsite_sp + ra_off) = return_hooker; + + return; + +out: + ftrace_graph_stop(); + WARN_ON(1); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S new file mode 100644 index 000000000000..8cdc1563cd33 --- /dev/null +++ b/arch/loongarch/kernel/mcount.S @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch specific _mcount support + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text + +#define MCOUNT_S0_OFFSET (0) +#define MCOUNT_RA_OFFSET (SZREG) +#define MCOUNT_STACK_SIZE (2 * SZREG) + + .macro MCOUNT_SAVE_REGS + PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE + PTR_S s0, sp, MCOUNT_S0_OFFSET + PTR_S ra, sp, MCOUNT_RA_OFFSET + move s0, a0 + .endm + + .macro MCOUNT_RESTORE_REGS + move a0, s0 + PTR_L ra, sp, MCOUNT_RA_OFFSET + PTR_L s0, sp, MCOUNT_S0_OFFSET + PTR_ADDI sp, sp, MCOUNT_STACK_SIZE + .endm + +SYM_FUNC_START(_mcount) + la.pcrel t1, ftrace_stub + la.pcrel t2, ftrace_trace_function /* Prepare t2 for (1) */ + PTR_L t2, t2, 0 + beq t1, t2, fgraph_trace + + MCOUNT_SAVE_REGS + + move a0, ra /* arg0: self return address */ + move a1, s0 /* arg1: parent's return address */ + jirl ra, t2, 0 /* (1) call *ftrace_trace_function */ + + MCOUNT_RESTORE_REGS + +fgraph_trace: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la.pcrel t1, ftrace_stub + la.pcrel t3, ftrace_graph_return + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller + la.pcrel t1, ftrace_graph_entry_stub + la.pcrel t3, ftrace_graph_entry + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller +#endif + +SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + jr ra +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_INNER_LABEL(ftrace_graph_func, SYM_L_GLOBAL) + bl ftrace_stub +#endif +SYM_FUNC_END(_mcount) +EXPORT_SYMBOL(_mcount) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_FUNC_START(ftrace_graph_caller) + MCOUNT_SAVE_REGS + + PTR_ADDI a0, ra, -4 /* arg0: Callsite self return addr */ + PTR_ADDI a1, sp, MCOUNT_STACK_SIZE /* arg1: Callsite sp */ + move a2, s0 /* arg2: Callsite parent ra */ + bl prepare_ftrace_return + + MCOUNT_RESTORE_REGS + jr ra +SYM_FUNC_END(ftrace_graph_caller) + +SYM_FUNC_START(return_to_handler) + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + bl ftrace_return_to_handler + + /* Restore the real parent address: a0 -> ra */ + move ra, a0 + + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + jr ra +SYM_FUNC_END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From a0a458fbd6f2317832e2d74acdbfa2451c3f4b8f Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3825/4122] LoongArch/ftrace: Add recordmcount support Recordmcount utility under scripts is run, after compiling each object, to find out all the locations of calling _mcount() and put them into specific seciton named __mcount_loc. Then the linker collects all such information into a table in the kernel image (between __start_mcount_loc and __stop_mcount_loc) for later use by ftrace. This patch adds LoongArch specific definitions to identify such locations. And on LoongArch, only the C version is used to build the kernel now that CONFIG_HAVE_C_RECORDMCOUNT is on. Acked-by: Steven Rostedt (Google) Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ scripts/recordmcount.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 72a41e08f9f4..8b81cf9a6503 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -85,11 +85,13 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index cce12e1971d8..e30216525325 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -38,6 +38,14 @@ #define R_AARCH64_ABS64 257 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#endif + #define R_ARM_PC24 1 #define R_ARM_THM_CALL 10 #define R_ARM_CALL 28 @@ -441,6 +449,28 @@ static int arm64_is_fake_mcount(Elf64_Rel const *rp) return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } +static int LARCH32_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + +static int LARCH64_is_fake_mcount(Elf64_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -558,6 +588,7 @@ static int do_file(char const *const fname) break; case EM_IA_64: reltype = R_IA64_IMM64; break; case EM_MIPS: /* reltype: e_class */ break; + case EM_LOONGARCH: /* reltype: e_class */ break; case EM_PPC: reltype = R_PPC_ADDR32; break; case EM_PPC64: reltype = R_PPC64_ADDR64; break; case EM_S390: /* reltype: e_class */ break; @@ -589,6 +620,10 @@ static int do_file(char const *const fname) reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } + if (w2(ehdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_32; + is_fake_mcount32 = LARCH32_is_fake_mcount; + } if (do32(ehdr, fname, reltype) < 0) goto out; break; @@ -610,6 +645,10 @@ static int do_file(char const *const fname) Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } + if (w2(ghdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_64; + is_fake_mcount64 = LARCH64_is_fake_mcount; + } if (do64(ghdr, fname, reltype) < 0) goto out; break; From 4733f09d880745953b88c3358b49ad495aecd8e9 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3826/4122] LoongArch/ftrace: Add dynamic function tracer support The compiler has inserted 2 NOPs before the regular function prologue. T series registers are available and safe because of LoongArch's psABI. At runtime, we can replace nop with bl to enable ftrace call and replace bl with nop to disable ftrace call. The bl instruction requires us to save the original RA value, so it saves RA at t0 here. Details are: | Compiled | Disabled | Enabled | +------------+------------------------+------------------------+ | nop | move t0, ra | move t0, ra | | nop | nop | bl ftrace_caller | | func_body | func_body | func_body | The RA value will be recovered by ftrace_regs_entry, and restored into RA before returning to the regular function prologue. When a function is not being traced, the "move t0, ra" is not harmful. 1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c) The two functions turn each recorded call site of filtered functions into a call to ftrace_caller or nops. 2) ftracce_update_ftrace_func (in kernel/ftrace.c) turns the nops at ftrace_call into a call to a generic entry for function tracers. 3) ftrace_caller (in kernel/mcount_dyn.S) The entry where each _mcount call sites calls to once they are filtered to be traced. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/Makefile | 5 ++ arch/loongarch/include/asm/ftrace.h | 21 +++++ arch/loongarch/include/asm/inst.h | 11 +++ arch/loongarch/include/asm/unwind.h | 2 +- arch/loongarch/kernel/Makefile | 15 ++-- arch/loongarch/kernel/ftrace_dyn.c | 110 ++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 92 ++++++++++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 85 ++++++++++++++++++ arch/loongarch/kernel/unwind_prologue.c | 35 +++++++- 10 files changed, 367 insertions(+), 10 deletions(-) create mode 100644 arch/loongarch/kernel/ftrace_dyn.c create mode 100644 arch/loongarch/kernel/mcount_dyn.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 8b81cf9a6503..6e9aaa747ef7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -88,6 +88,7 @@ config LOONGARCH select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 5232d8c0f9ca..4402387d2755 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -25,6 +25,11 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +ifdef CONFIG_DYNAMIC_FTRACE +KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + ifdef CONFIG_64BIT tool-archpref = $(64bit-tool-archpref) UTS_MACHINE := loongarch64 diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index c3f5cde40464..09ff0f84663d 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -11,9 +11,30 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #ifndef __ASSEMBLY__ + +#ifndef CONFIG_DYNAMIC_FTRACE + #define mcount _mcount extern void _mcount(void); extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old); + +#else + +struct dyn_ftrace; +struct dyn_arch_ftrace { }; + +#define ftrace_init_nop ftrace_init_nop +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); + +#endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 67215af47b3d..88e1673524e1 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip) is_imm12_negative(ip->reg2i12_format.immediate); } +int larch_insn_read(void *addr, u32 *insnp); +int larch_insn_write(void *addr, u32 insn); +int larch_insn_patch_text(void *addr, u32 insn); + +u32 larch_insn_gen_nop(void); +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk); +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index 6af4718bdf01..a51eec00efb8 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -20,7 +20,7 @@ struct unwind_state { char type; /* UNWINDER_XXX */ struct stack_info stack_info; struct task_struct *task; - bool first, error; + bool first, error, is_ftrace; unsigned long sp, pc, ra; }; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 3f71bce1c7ce..fcaa024a685e 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o ifdef CONFIG_FUNCTION_TRACER -obj-y += mcount.o ftrace.o -CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) + ifndef CONFIG_DYNAMIC_FTRACE + obj-y += mcount.o ftrace.o + CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + else + obj-y += mcount_dyn.o ftrace_dyn.o + CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE) + endif + CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_MODULES) += module.o module-sections.o diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c new file mode 100644 index 000000000000..3b82bface840 --- /dev/null +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include + +#include + +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) +{ + u32 replaced; + + if (validate) { + if (larch_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + + if (larch_insn_patch_text((void *)pc, new)) + return -EPERM; + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + u32 new; + unsigned long pc; + + pc = (unsigned long)&ftrace_call; + new = larch_insn_gen_bl(pc, (unsigned long)func); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * The compiler has inserted 2 NOPs before the regular function prologue. + * T series registers are available and safe because of LoongArch's psABI. + * + * At runtime, we can replace nop with bl to enable ftrace call and replace bl + * with nop to disable ftrace call. The bl requires us to save the original RA + * value, so it saves RA at t0 here. + * + * Details are: + * + * | Compiled | Disabled | Enabled | + * +------------+------------------------+------------------------+ + * | nop | move t0, ra | move t0, ra | + * | nop | nop | bl ftrace_caller | + * | func_body | func_body | func_body | + * + * The RA value will be recovered by ftrace_regs_entry, and restored into RA + * before returning to the regular function prologue. When a function is not + * being traced, the "move t0, ra" is not harmful. + */ + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip; + old = larch_insn_gen_nop(); + new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + old = larch_insn_gen_nop(); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + new = larch_insn_gen_nop(); + old = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +void arch_ftrace_update_code(int command) +{ + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index b1df0ec34bd1..4fd22b4413d0 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -2,8 +2,100 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include +#include + +#include #include +static DEFINE_RAW_SPINLOCK(patch_lock); + +int larch_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE); + if (!ret) + *insnp = val; + + return ret; +} + +int larch_insn_write(void *addr, u32 insn) +{ + int ret; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE); + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +int larch_insn_patch_text(void *addr, u32 insn) +{ + int ret; + u32 *tp = addr; + + if ((unsigned long)tp & 3) + return -EINVAL; + + ret = larch_insn_write(tp, insn); + if (!ret) + flush_icache_range((unsigned long)tp, + (unsigned long)tp + LOONGARCH_INSN_SIZE); + + return ret; +} + +u32 larch_insn_gen_nop(void) +{ + return INSN_NOP; +} + +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) +{ + long offset = dest - pc; + unsigned int immediate_l, immediate_h; + union loongarch_instruction insn; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated bl instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + immediate_l = offset & 0xffff; + offset >>= 16; + immediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = bl_op; + insn.reg0i26_format.immediate_l = immediate_l; + insn.reg0i26_format.immediate_h = immediate_h; + + return insn.word; +} + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + union loongarch_instruction insn; + + insn.reg3_format.opcode = or_op; + insn.reg3_format.rd = rd; + insn.reg3_format.rj = rj; + insn.reg3_format.rk = rk; + + return insn.word; +} + +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + return larch_insn_gen_or(rd, rj, 0); +} + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S new file mode 100644 index 000000000000..45ba88d2aacc --- /dev/null +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text +/* + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the + * regular C function prologue. When PC arrived here, the last 2 instructions + * are as follows: + * move t0, ra + * bl callsite (for modules, callsite is a tramplione) + * + * modules trampoline is as follows: + * lu12i.w t1, callsite[31:12] + * lu32i.d t1, callsite[51:32] + * lu52i.d t1, t1, callsite[63:52] + * jirl zero, t1, callsite[11:0] >> 2 + * + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to + * that the T series regs are available and safe because each C functions + * follows the LoongArch's psABI as well. + */ + + .macro ftrace_regs_entry + PTR_ADDI sp, sp, -PT_SIZE + PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */ + PTR_S a0, sp, PT_R4 + PTR_S a1, sp, PT_R5 + PTR_S a2, sp, PT_R6 + PTR_S a3, sp, PT_R7 + PTR_S a4, sp, PT_R8 + PTR_S a5, sp, PT_R9 + PTR_S a6, sp, PT_R10 + PTR_S a7, sp, PT_R11 + PTR_S fp, sp, PT_R22 + PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ + PTR_ADDI t8, sp, PT_SIZE + PTR_S t8, sp, PT_R3 + .endm + +SYM_FUNC_START(ftrace_stub) + jr ra +SYM_FUNC_END(ftrace_stub) + +SYM_CODE_START(ftrace_common) + PTR_ADDI a0, ra, -8 /* arg0: ip */ + move a1, t0 /* arg1: parent_ip */ + la.pcrel t1, function_trace_op + PTR_L a2, t1, 0 /* arg2: op */ + move a3, sp /* arg3: regs */ + +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + bl ftrace_stub +/* + * As we didn't use S series regs in this assmembly code and all calls + * are C function which will save S series regs by themselves, there is + * no need to restore S series regs. The T series is available and safe + * at the callsite, so there is no need to restore the T series regs. + */ +ftrace_common_return: + PTR_L ra, sp, PT_R1 + PTR_L a0, sp, PT_R4 + PTR_L a1, sp, PT_R5 + PTR_L a2, sp, PT_R6 + PTR_L a3, sp, PT_R7 + PTR_L a4, sp, PT_R8 + PTR_L a5, sp, PT_R9 + PTR_L a6, sp, PT_R10 + PTR_L a7, sp, PT_R11 + PTR_L fp, sp, PT_R22 + PTR_L t0, sp, PT_ERA + PTR_ADDI sp, sp, PT_SIZE + jr t0 +SYM_CODE_END(ftrace_common) + +SYM_CODE_START(ftrace_caller) + ftrace_regs_entry + b ftrace_common +SYM_CODE_END(ftrace_caller) diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 4571c3c87cd4..46fe344d7fba 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -8,6 +8,16 @@ #include #include +static inline void unwind_state_fixup(struct unwind_state *state) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + static unsigned long ftrace = (unsigned long)ftrace_call + 4; + + if (state->pc == ftrace) + state->is_ftrace = true; +#endif +} + unsigned long unwind_get_return_address(struct unwind_state *state) { @@ -41,15 +51,30 @@ static bool unwind_by_guess(struct unwind_state *state) static bool unwind_by_prologue(struct unwind_state *state) { - struct stack_info *info = &state->stack_info; - union loongarch_instruction *ip, *ip_end; long frame_ra = -1; unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; + struct pt_regs *regs; + struct stack_info *info = &state->stack_info; + union loongarch_instruction *ip, *ip_end; if (state->sp >= info->end || state->sp < info->begin) return false; + if (state->is_ftrace) { + /* + * As we meet ftrace_regs_entry, reset first flag like first doing + * tracing. Prologue analysis will stop soon because PC is at entry. + */ + regs = (struct pt_regs *)state->sp; + state->first = true; + state->is_ftrace = false; + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + return true; + } + if (!kallsyms_lookup_size_offset(pc, &size, &offset)) return false; @@ -95,7 +120,7 @@ static bool unwind_by_prologue(struct unwind_state *state) state->pc = *(unsigned long *)(state->sp + frame_ra); state->sp = state->sp + frame_size; - return !!__kernel_text_address(state->pc); + goto out; first: state->first = false; @@ -104,7 +129,9 @@ first: state->pc = state->ra; - return !!__kernel_text_address(state->ra); +out: + unwind_state_fixup(state); + return !!__kernel_text_address(state->pc); } void unwind_start(struct unwind_state *state, struct task_struct *task, From 5fcfad3d41cc70f39fb31e7ee314989cc4c5f02c Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3827/4122] LoongArch/ftrace: Add dynamic function graph tracer support Once the function_graph tracer is enabled, a filtered function has the following call sequence: 1) ftracer_caller ==> on/off by ftrace_make_call/ftrace_make_nop 2) ftrace_graph_caller 3) ftrace_graph_call ==> on/off by ftrace_en/disable_ftrace_graph_caller 4) prepare_ftrace_return Considering the following DYNAMIC_FTRACE_WITH_REGS feature, it would be more extendable to have a ftrace_graph_caller function, instead of calling prepare_ftrace_return directly in ftrace_caller. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/ftrace_dyn.c | 44 ++++++++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 24 ++++++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 33 ++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 3b82bface840..5a801c328e2a 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -108,3 +108,47 @@ int __init ftrace_dyn_arch_init(void) { return 0; } + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) +{ + unsigned long old; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + if (!function_graph_enter(old, self_addr, 0, NULL)) + *parent = return_hooker; +} + +static int ftrace_modify_graph_caller(bool enable) +{ + u32 branch, nop; + unsigned long pc, func; + extern void ftrace_graph_call(void); + + pc = (unsigned long)&ftrace_graph_call; + func = (unsigned long)&ftrace_graph_caller; + + nop = larch_insn_gen_nop(); + branch = larch_insn_gen_b(pc, func); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 4fd22b4413d0..39671e87e31c 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -55,6 +55,30 @@ u32 larch_insn_gen_nop(void) return INSN_NOP; } +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest) +{ + long offset = dest - pc; + unsigned int immediate_l, immediate_h; + union loongarch_instruction insn; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated b instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + immediate_l = offset & 0xffff; + offset >>= 16; + immediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = b_op; + insn.reg0i26_format.immediate_l = immediate_l; + insn.reg0i26_format.immediate_h = immediate_h; + + return insn.word; +} + u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) { long offset = dest - pc; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index 45ba88d2aacc..cce3daa2eb1e 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -57,6 +57,11 @@ SYM_CODE_START(ftrace_common) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) + nop /* b ftrace_graph_caller */ +#endif + /* * As we didn't use S series regs in this assmembly code and all calls * are C function which will save S series regs by themselves, there is @@ -83,3 +88,31 @@ SYM_CODE_START(ftrace_caller) ftrace_regs_entry b ftrace_common SYM_CODE_END(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_CODE_START(ftrace_graph_caller) + PTR_L a0, sp, PT_ERA + PTR_ADDI a0, a0, -8 /* arg0: self_addr */ + PTR_ADDI a1, sp, PT_R1 /* arg1: parent */ + bl prepare_ftrace_return + b ftrace_common_return +SYM_CODE_END(ftrace_graph_caller) + +SYM_CODE_START(return_to_handler) + /* Save return value regs */ + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + move a0, zero + bl ftrace_return_to_handler + move ra, a0 + + /* Restore return value regs */ + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + + jr ra +SYM_CODE_END(return_to_handler) +#endif From 8778ba2c8a5df11859dc6f2b2205700388b63fd3 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 3828/4122] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_REGS support This patch implements CONFIG_DYNAMIC_FTRACE_WITH_REGS on LoongArch, which allows a traced function's arguments (and some other registers) to be captured into a struct pt_regs, allowing these to be inspected and modified. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/ftrace.h | 2 ++ arch/loongarch/kernel/ftrace_dyn.c | 15 +++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 35 +++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6e9aaa747ef7..f2d1b2aef2d4 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -89,6 +89,7 @@ config LOONGARCH select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 09ff0f84663d..dd4a0c8efd24 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -23,6 +23,8 @@ extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsit struct dyn_ftrace; struct dyn_arch_ftrace { }; +#define ARCH_SUPPORTS_FTRACE_OPS 1 + #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 5a801c328e2a..d6f30918f94f 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -28,6 +28,21 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) return 0; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + new = larch_insn_gen_bl(pc, addr); + old = larch_insn_gen_bl(pc, old_addr); + + return ftrace_modify_code(pc, old, new, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + int ftrace_update_ftrace_func(ftrace_func_t func) { u32 new; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index cce3daa2eb1e..bbabf06244c2 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -27,7 +27,7 @@ * follows the LoongArch's psABI as well. */ - .macro ftrace_regs_entry + .macro ftrace_regs_entry allregs=0 PTR_ADDI sp, sp, -PT_SIZE PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */ PTR_S a0, sp, PT_R4 @@ -39,6 +39,30 @@ PTR_S a6, sp, PT_R10 PTR_S a7, sp, PT_R11 PTR_S fp, sp, PT_R22 + .if \allregs + PTR_S tp, sp, PT_R2 + PTR_S t0, sp, PT_R12 + PTR_S t1, sp, PT_R13 + PTR_S t2, sp, PT_R14 + PTR_S t3, sp, PT_R15 + PTR_S t4, sp, PT_R16 + PTR_S t5, sp, PT_R17 + PTR_S t6, sp, PT_R18 + PTR_S t7, sp, PT_R19 + PTR_S t8, sp, PT_R20 + PTR_S u0, sp, PT_R21 + PTR_S s0, sp, PT_R23 + PTR_S s1, sp, PT_R24 + PTR_S s2, sp, PT_R25 + PTR_S s3, sp, PT_R26 + PTR_S s4, sp, PT_R27 + PTR_S s5, sp, PT_R28 + PTR_S s6, sp, PT_R29 + PTR_S s7, sp, PT_R30 + PTR_S s8, sp, PT_R31 + /* Clear it for later use as a flag sometimes. */ + PTR_S zero, sp, PT_R0 + .endif PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ PTR_ADDI t8, sp, PT_SIZE PTR_S t8, sp, PT_R3 @@ -85,10 +109,17 @@ ftrace_common_return: SYM_CODE_END(ftrace_common) SYM_CODE_START(ftrace_caller) - ftrace_regs_entry + ftrace_regs_entry allregs=0 b ftrace_common SYM_CODE_END(ftrace_caller) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +SYM_CODE_START(ftrace_regs_caller) + ftrace_regs_entry allregs=1 + b ftrace_common +SYM_CODE_END(ftrace_regs_caller) +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) PTR_L a0, sp, PT_ERA From ac7127e1cc65aeb578998c992a05dbc80fa18f0f Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:16 +0800 Subject: [PATCH 3829/4122] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_ARGS support Allow for arguments to be passed in to ftrace_regs by default. If this is set, then arguments and stack can be found from the pt_regs. 1. HAVE_DYNAMIC_FTRACE_WITH_ARGS don't need special hook for graph tracer entry point, but instead we can use graph_ops::func function to install the return_hooker. 2. Livepatch requires this option in the future. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/ftrace.h | 17 +++++++++++++++++ arch/loongarch/kernel/ftrace_dyn.c | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index f2d1b2aef2d4..a36bb0fe1977 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -89,6 +89,7 @@ config LOONGARCH select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index dd4a0c8efd24..ee7feface27a 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -37,6 +37,23 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_ops; + +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &fregs->regs; +} + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index d6f30918f94f..439ba829b9fd 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -139,6 +139,16 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) *parent = return_hooker; } +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct pt_regs *regs = &fregs->regs; + unsigned long *parent = (unsigned long *)®s->regs[1]; + + prepare_ftrace_return(ip, (unsigned long *)parent); +} +#else static int ftrace_modify_graph_caller(bool enable) { u32 branch, nop; @@ -166,4 +176,5 @@ int ftrace_disable_ftrace_graph_caller(void) { return ftrace_modify_graph_caller(false); } +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From a51ac5246d2505b58229242959d2bc73d113ca50 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 3830/4122] LoongArch/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support ftrace_graph_ret_addr() can be called by stack unwinding code to convert a found stack return address ('ret') to its original value, in case the function graph tracer has modified it to be 'return_to_handler'. If the hasn't been modified, the unchanged value of 'ret' is returned. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ftrace.h | 3 +++ arch/loongarch/include/asm/unwind.h | 1 + arch/loongarch/kernel/ftrace_dyn.c | 2 +- arch/loongarch/kernel/unwind_guess.c | 4 +++- arch/loongarch/kernel/unwind_prologue.c | 15 +++++++++++---- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index ee7feface27a..8c7d137e4871 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -6,6 +6,8 @@ #ifndef _ASM_LOONGARCH_FTRACE_H #define _ASM_LOONGARCH_FTRACE_H +#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ @@ -24,6 +26,7 @@ struct dyn_ftrace; struct dyn_arch_ftrace { }; #define ARCH_SUPPORTS_FTRACE_OPS 1 +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index a51eec00efb8..f2b52b9ea93d 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -21,6 +21,7 @@ struct unwind_state { struct stack_info stack_info; struct task_struct *task; bool first, error, is_ftrace; + int graph_idx; unsigned long sp, pc, ra; }; diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 439ba829b9fd..e23c3be29baa 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -135,7 +135,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) old = *parent; - if (!function_graph_enter(old, self_addr, 0, NULL)) + if (!function_graph_enter(old, self_addr, 0, parent)) *parent = return_hooker; } diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 5afa6064d73e..e2d2e4f3001f 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include +#include #include @@ -53,7 +54,8 @@ bool unwind_next_frame(struct unwind_state *state) state->sp < info->end; state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); - + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); if (__kernel_text_address(addr)) return true; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 46fe344d7fba..0f8d1451ebb8 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include #include @@ -42,6 +43,8 @@ static bool unwind_by_guess(struct unwind_state *state) state->sp < info->end; state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); if (__kernel_text_address(addr)) return true; } @@ -174,8 +177,11 @@ bool unwind_next_frame(struct unwind_state *state) break; case UNWINDER_PROLOGUE: - if (unwind_by_prologue(state)) + if (unwind_by_prologue(state)) { + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + state->pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); return true; + } if (info->type == STACK_TYPE_IRQ && info->end == state->sp) { @@ -185,10 +191,11 @@ bool unwind_next_frame(struct unwind_state *state) if (user_mode(regs) || !__kernel_text_address(pc)) return false; - state->pc = pc; - state->sp = regs->regs[3]; - state->ra = regs->regs[1]; state->first = true; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); get_stack_info(state->sp, state->task, info); return true; From 28ac0a9e04d7dfb42220dc9d221164d93f20fb3a Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 3831/4122] LoongArch: modules/ftrace: Initialize PLT at load time This patch implements ftrace trampolines through plt entry. Tested by forcing ftrace_make_call() to use the module PLT, and then loading up a module after setting up ftrace with: | echo ":mod:" > set_ftrace_filter; | echo function > current_tracer; | modprobe Since FTRACE_ADDR/FTRACE_REGS_ADDR is only defined when CONFIG_DYNAMIC_ FTRACE is selected, we wrap their usage in module_init_ftrace_plt() with ifdeffery rather than using IS_ENABLED(). Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ftrace.h | 4 ++ arch/loongarch/include/asm/inst.h | 3 + arch/loongarch/include/asm/module.h | 5 +- arch/loongarch/include/asm/module.lds.h | 1 + arch/loongarch/kernel/ftrace_dyn.c | 93 +++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 11 +++ arch/loongarch/kernel/module-sections.c | 12 +++- arch/loongarch/kernel/module.c | 21 ++++++ 8 files changed, 148 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 8c7d137e4871..90f9d3399b2a 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -6,6 +6,10 @@ #ifndef _ASM_LOONGARCH_FTRACE_H #define _ASM_LOONGARCH_FTRACE_H +#define FTRACE_PLT_IDX 0 +#define FTRACE_REGS_PLT_IDX 1 +#define NR_FTRACE_PLTS 2 + #define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 88e1673524e1..c00e1512d4fa 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -13,10 +13,12 @@ #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 +#define ADDR_IMMMASK_LU12IW 0x00000000FFFFF000 #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 #define ADDR_IMMSHIFT_LU52ID 52 #define ADDR_IMMSHIFT_LU32ID 32 +#define ADDR_IMMSHIFT_LU12IW 12 #define ADDR_IMMSHIFT_ADDU16ID 16 #define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) @@ -360,6 +362,7 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk); u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); +u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 60dc62a1146e..12a0f1e66916 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -20,6 +20,9 @@ struct mod_arch_specific { struct mod_section got; struct mod_section plt; struct mod_section plt_idx; + + /* For CONFIG_DYNAMIC_FTRACE */ + struct plt_entry *ftrace_trampolines; }; struct got_entry { @@ -49,7 +52,7 @@ static inline struct plt_entry emit_plt_entry(unsigned long val) { u32 lu12iw, lu32id, lu52id, jirl; - lu12iw = (lu12iw_op << 25 | (((val >> 12) & 0xfffff) << 5) | LOONGARCH_GPR_T1); + lu12iw = larch_insn_gen_lu12iw(LOONGARCH_GPR_T1, ADDR_IMM(val, LU12IW)); lu32id = larch_insn_gen_lu32id(LOONGARCH_GPR_T1, ADDR_IMM(val, LU32ID)); lu52id = larch_insn_gen_lu52id(LOONGARCH_GPR_T1, LOONGARCH_GPR_T1, ADDR_IMM(val, LU52ID)); jirl = larch_insn_gen_jirl(0, LOONGARCH_GPR_T1, 0, (val & 0xfff)); diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index a3d1bc0fcc72..438f09d4ccf4 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -5,4 +5,5 @@ SECTIONS { .got : { BYTE(0) } .plt : { BYTE(0) } .plt.idx : { BYTE(0) } + .ftrace_trampoline : { BYTE(0) } } diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index e23c3be29baa..0f07591cab30 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -9,6 +9,7 @@ #include #include +#include static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) { @@ -29,18 +30,78 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) } #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + +#ifdef CONFIG_MODULES +static inline int __get_mod(struct module **mod, unsigned long addr) +{ + preempt_disable(); + *mod = __module_text_address(addr); + preempt_enable(); + + if (WARN_ON(!(*mod))) + return -EINVAL; + + return 0; +} + +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt = mod->arch.ftrace_trampolines; + + if (addr == FTRACE_ADDR) + return &plt[FTRACE_PLT_IDX]; + if (addr == FTRACE_REGS_ADDR && + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return &plt[FTRACE_REGS_PLT_IDX]; + + return NULL; +} + +static unsigned long get_plt_addr(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt; + + plt = get_ftrace_plt(mod, addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)addr); + return -EINVAL; + } + + return (unsigned long)plt; +} +#endif + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + + old_addr = get_plt_addr(mod, old_addr); + } +#endif + new = larch_insn_gen_bl(pc, addr); old = larch_insn_gen_bl(pc, old_addr); return ftrace_modify_code(pc, old, new, true); } + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ int ftrace_update_ftrace_func(ftrace_func_t func) @@ -91,9 +152,25 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } +#endif + old = larch_insn_gen_nop(); new = larch_insn_gen_bl(pc, addr); @@ -104,9 +181,25 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } +#endif + new = larch_insn_gen_nop(); old = larch_insn_gen_bl(pc, addr); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 39671e87e31c..512579d79b22 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -120,6 +120,17 @@ u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) return larch_insn_gen_or(rd, rj, 0); } +u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm) +{ + union loongarch_instruction insn; + + insn.reg1i20_format.opcode = lu12iw_op; + insn.reg1i20_format.rd = rd; + insn.reg1i20_format.immediate = imm; + + return insn.word; +} + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 13d9a427325a..d4dbcda1c4b0 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -6,6 +6,7 @@ #include #include #include +#include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { @@ -103,7 +104,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; - Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec, *tramp = NULL; /* * Find the empty .plt sections. @@ -115,6 +116,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shndx = i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".ftrace_trampoline")) + tramp = sechdrs + i; } if (!mod->arch.got.shndx) { @@ -170,5 +173,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 899dc677cec3..b8b86088b2dd 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -473,6 +475,23 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } +static void module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + struct plt_entry *ftrace_plts; + + ftrace_plts = (void *)sechdrs->sh_addr; + + ftrace_plts[FTRACE_PLT_IDX] = emit_plt_entry(FTRACE_ADDR); + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ftrace_plts[FTRACE_REGS_PLT_IDX] = emit_plt_entry(FTRACE_REGS_ADDR); + + mod->arch.ftrace_trampolines = ftrace_plts; +#endif +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { @@ -482,6 +501,8 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (!strcmp(".altinstructions", secstrs + s->sh_name)) apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name)) + module_init_ftrace_plt(hdr, s, mod); } return 0; From 5535f4f70cfc15ef55b6ea7c7e17337b17337cb6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 3832/4122] LoongArch: Update Loongson-3 default config file 1, Enable suspend (ACPI S3) and hibernation (ACPI S4). 2, Enable some options for FDT-based systems (e.g., SERIAL_OF_PLATFORM). 3, Enable CONFIG_KALLSYMS_ALL and CONFIG_DEBUG_FS to convenient ftrace. 4, Regenerate the whole file to keep the order of options be the same as the latest source code. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 56 ++++++++++++---------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3540e9c0a631..eb84cae642e5 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -34,12 +34,13 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_USERFAULTFD=y +CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y -# CONFIG_COMPAT_BRK is not set CONFIG_LOONGARCH=y CONFIG_64BIT=y CONFIG_MACH_LOONGSON64=y +CONFIG_PAGE_SIZE_16KB=y +CONFIG_HZ_250=y CONFIG_DMI=y CONFIG_EFI=y CONFIG_SMP=y @@ -47,14 +48,14 @@ CONFIG_HOTPLUG_CPU=y CONFIG_NR_CPUS=64 CONFIG_NUMA=y CONFIG_KEXEC=y -CONFIG_PAGE_SIZE_16KB=y -CONFIG_HZ_250=y +CONFIG_SUSPEND=y +CONFIG_HIBERNATION=y CONFIG_ACPI=y CONFIG_ACPI_SPCR_TABLE=y -CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_TAD=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_PCI_SLOT=y CONFIG_ACPI_HOTPLUG_MEMORY=y CONFIG_EFI_ZBOOT=y @@ -73,17 +74,19 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m +CONFIG_ZPOOL=y +CONFIG_ZSWAP=y +CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y +CONFIG_ZBUD=y +CONFIG_Z3FOLD=y +CONFIG_ZSMALLOC=m +# CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_ZSWAP=y -CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y -CONFIG_ZPOOL=y -CONFIG_ZBUD=y -CONFIG_Z3FOLD=y -CONFIG_ZSMALLOC=m +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -118,7 +121,6 @@ CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NF_CONNTRACK=m -CONFIG_NF_LOG_NETDEV=m CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_NETBIOS_NS=m @@ -416,6 +418,7 @@ CONFIG_SCSI_VIRTIO=m CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_DWC=y CONFIG_PATA_ATIIXP=y CONFIG_PATA_PCMCIA=m CONFIG_MD=y @@ -469,13 +472,11 @@ CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set CONFIG_BNX2=y -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CAVIUM is not set CONFIG_CHELSIO_T1=m CONFIG_CHELSIO_T1_1G=y CONFIG_CHELSIO_T3=m CONFIG_CHELSIO_T4=m -# CONFIG_NET_VENDOR_CIRRUS is not set # CONFIG_NET_VENDOR_CISCO is not set # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set @@ -496,6 +497,7 @@ CONFIG_IXGBE=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set CONFIG_8139CP=m @@ -505,9 +507,9 @@ CONFIG_R8169=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set CONFIG_STMMAC_ETH=y # CONFIG_NET_VENDOR_SUN is not set @@ -588,6 +590,7 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_NONSTANDARD=y CONFIG_PRINTER=m CONFIG_VIRTIO_CONSOLE=y @@ -602,6 +605,11 @@ CONFIG_I2C_GPIO=y CONFIG_SPI=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_RESTART=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_POWER_RESET_SYSCON_POWEROFF=y +CONFIG_SYSCON_REBOOT_MODE=y CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m @@ -609,16 +617,16 @@ CONFIG_SENSORS_W83627HF=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y +CONFIG_IR_IMON_DECODER=m +CONFIG_IR_JVC_DECODER=m +CONFIG_IR_MCE_KBD_DECODER=m CONFIG_IR_NEC_DECODER=m CONFIG_IR_RC5_DECODER=m CONFIG_IR_RC6_DECODER=m -CONFIG_IR_JVC_DECODER=m -CONFIG_IR_SONY_DECODER=m CONFIG_IR_SANYO_DECODER=m CONFIG_IR_SHARP_DECODER=m -CONFIG_IR_MCE_KBD_DECODER=m +CONFIG_IR_SONY_DECODER=m CONFIG_IR_XMP_DECODER=m -CONFIG_IR_IMON_DECODER=m CONFIG_MEDIA_SUPPORT=m CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m @@ -638,6 +646,7 @@ CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y +CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y @@ -647,7 +656,6 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m -# CONFIG_SND_ISA is not set CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y @@ -818,10 +826,6 @@ CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m @@ -831,6 +835,9 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m @@ -844,6 +851,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set From f3b4a00f0f62da252c598310698dfc82ef2f2e2e Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 11 Dec 2022 09:55:32 +0200 Subject: [PATCH 3833/4122] net: macsec: fix net device access prior to holding a lock Currently macsec offload selection update routine accesses the net device prior to holding the relevant lock. Fix by holding the lock prior to the device access. Fixes: dcb780fb2795 ("net: macsec: add nla support for changing the offloading selection") Reviewed-by: Raed Salem Signed-off-by: Emeel Hakim Link: https://lore.kernel.org/r/20221211075532.28099-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 937f5b1f04ff..bf8ac7a3ded7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2593,7 +2593,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) const struct macsec_ops *ops; struct macsec_context ctx; struct macsec_dev *macsec; - int ret; + int ret = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; @@ -2606,28 +2606,36 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) macsec_genl_offload_policy, NULL)) return -EINVAL; + rtnl_lock(); + dev = get_dev_from_nl(genl_info_net(info), attrs); - if (IS_ERR(dev)) - return PTR_ERR(dev); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto out; + } macsec = macsec_priv(dev); - if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) - return -EINVAL; + if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) { + ret = -EINVAL; + goto out; + } offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload == offload) - return 0; + goto out; /* Check if the offloading mode is supported by the underlying layers */ if (offload != MACSEC_OFFLOAD_OFF && - !macsec_check_offload(offload, macsec)) - return -EOPNOTSUPP; + !macsec_check_offload(offload, macsec)) { + ret = -EOPNOTSUPP; + goto out; + } /* Check if the net device is busy. */ - if (netif_running(dev)) - return -EBUSY; - - rtnl_lock(); + if (netif_running(dev)) { + ret = -EBUSY; + goto out; + } prev_offload = macsec->offload; macsec->offload = offload; @@ -2662,7 +2670,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) rollback: macsec->offload = prev_offload; - +out: rtnl_unlock(); return ret; } From 3d0b738fc5adf9f380702ac1424672e4b32c3781 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 12 Dec 2022 11:56:45 +0800 Subject: [PATCH 3834/4122] bonding: add missed __rcu annotation for curr_active_slave There is one direct accesses to bond->curr_active_slave in bond_miimon_commit(). Protected it by rcu_access_pointer() since the later of this function also use this one. Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7767afe116b..6a4bbd5aa3e0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2699,7 +2699,7 @@ static void bond_miimon_commit(struct bonding *bond) bond_miimon_link_change(bond, slave, BOND_LINK_UP); - if (!bond->curr_active_slave || slave == primary) + if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary) goto do_failover; continue; From e95cc44763a41d5c715ef16742bcb1d8e6524a62 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 12 Dec 2022 11:56:46 +0800 Subject: [PATCH 3835/4122] bonding: do failover when high prio link up Currently, when a high prio link enslaved, or when current link down, the high prio port could be selected. But when high prio link up, the new active slave reselection is not triggered. Fix it by checking link's prio when getting up. Making the do_failover after looping all slaves as there may be multi high prio slaves up. Reported-by: Liang Li Fixes: 0a2ff7cc8ad4 ("Bonding: add per-port priority for failover re-selection") Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6a4bbd5aa3e0..b4c65783960a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2654,8 +2654,9 @@ static void bond_miimon_link_change(struct bonding *bond, static void bond_miimon_commit(struct bonding *bond) { - struct list_head *iter; struct slave *slave, *primary; + bool do_failover = false; + struct list_head *iter; bond_for_each_slave(bond, slave, iter) { switch (slave->link_new_state) { @@ -2699,8 +2700,9 @@ static void bond_miimon_commit(struct bonding *bond) bond_miimon_link_change(bond, slave, BOND_LINK_UP); - if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary) - goto do_failover; + if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary || + slave->prio > rcu_dereference(bond->curr_active_slave)->prio) + do_failover = true; continue; @@ -2721,7 +2723,7 @@ static void bond_miimon_commit(struct bonding *bond) bond_miimon_link_change(bond, slave, BOND_LINK_DOWN); if (slave == rcu_access_pointer(bond->curr_active_slave)) - goto do_failover; + do_failover = true; continue; @@ -2732,8 +2734,9 @@ static void bond_miimon_commit(struct bonding *bond) continue; } + } -do_failover: + if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); @@ -3531,6 +3534,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) */ static void bond_ab_arp_commit(struct bonding *bond) { + bool do_failover = false; struct list_head *iter; unsigned long last_tx; struct slave *slave; @@ -3560,8 +3564,9 @@ static void bond_ab_arp_commit(struct bonding *bond) slave_info(bond->dev, slave->dev, "link status definitely up\n"); if (!rtnl_dereference(bond->curr_active_slave) || - slave == rtnl_dereference(bond->primary_slave)) - goto do_failover; + slave == rtnl_dereference(bond->primary_slave) || + slave->prio > rtnl_dereference(bond->curr_active_slave)->prio) + do_failover = true; } @@ -3580,7 +3585,7 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave == rtnl_dereference(bond->curr_active_slave)) { RCU_INIT_POINTER(bond->current_arp_slave, NULL); - goto do_failover; + do_failover = true; } continue; @@ -3604,8 +3609,9 @@ static void bond_ab_arp_commit(struct bonding *bond) slave->link_new_state); continue; } + } -do_failover: + if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); From 42a8d4aaea8414f60eb2ed2d92df89a6e2db4615 Mon Sep 17 00:00:00 2001 From: Liang Li Date: Mon, 12 Dec 2022 11:56:47 +0800 Subject: [PATCH 3836/4122] selftests: bonding: add bonding prio option test Add a test for bonding prio option. Here is the test result: ]# ./option_prio.sh TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=0) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=1) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=2) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=0) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=1) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=2) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=0) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=1) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=2) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=0) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=1) [ OK ] TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=2) [ OK ] Signed-off-by: Liang Li Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/option_prio.sh | 245 ++++++++++++++++++ 2 files changed, 247 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/option_prio.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 0f3921908b07..8e3b786a748f 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -7,7 +7,8 @@ TEST_PROGS := \ bond-lladdr-target.sh \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ - mode-2-recovery-updelay.sh + mode-2-recovery-updelay.sh \ + option_prio.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh new file mode 100755 index 000000000000..c32eebff5005 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/option_prio.sh @@ -0,0 +1,245 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bonding option prio +# + +ALL_TESTS=" + prio_arp_ip_target_test + prio_miimon_test +" + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh + +destroy() +{ + ip link del bond0 &>/dev/null + ip link del br0 &>/dev/null + ip link del veth0 &>/dev/null + ip link del veth1 &>/dev/null + ip link del veth2 &>/dev/null + ip netns del ns1 &>/dev/null + ip link del veth3 &>/dev/null +} + +cleanup() +{ + pre_cleanup + + destroy +} + +skip() +{ + local skip=1 + ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null + ip link add name veth0 type veth peer name veth0_p + ip link set veth0 master bond0 + + # check if iproute support prio option + ip link set dev veth0 type bond_slave prio 10 + [[ $? -ne 0 ]] && skip=0 + + # check if bonding support prio option + ip -d link show veth0 | grep -q "prio 10" + [[ $? -ne 0 ]] && skip=0 + + ip link del bond0 &>/dev/null + ip link del veth0 + + return $skip +} + +active_slave="" +check_active_slave() +{ + local target_active_slave=$1 + active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)" + test "$active_slave" = "$target_active_slave" + check_err $? "Current active slave is $active_slave but not $target_active_slave" +} + + +# Test bonding prio option with mode=$mode monitor=$monitor +# and primary_reselect=$primary_reselect +prio_test() +{ + RET=0 + + local monitor=$1 + local mode=$2 + local primary_reselect=$3 + + local bond_ip4="192.169.1.2" + local peer_ip4="192.169.1.1" + local bond_ip6="2009:0a:0b::02" + local peer_ip6="2009:0a:0b::01" + + + # create veths + ip link add name veth0 type veth peer name veth0_p + ip link add name veth1 type veth peer name veth1_p + ip link add name veth2 type veth peer name veth2_p + + # create bond + if [[ "$monitor" == "miimon" ]];then + ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect + elif [[ "$monitor" == "arp_ip_target" ]];then + ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect + elif [[ "$monitor" == "ns_ip6_target" ]];then + ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect + fi + ip link set bond0 up + ip link set veth0 master bond0 + ip link set veth1 master bond0 + ip link set veth2 master bond0 + # check bonding member prio value + ip link set dev veth0 type bond_slave prio 0 + ip link set dev veth1 type bond_slave prio 10 + ip link set dev veth2 type bond_slave prio 11 + ip -d link show veth0 | grep -q 'prio 0' + check_err $? "veth0 prio is not 0" + ip -d link show veth1 | grep -q 'prio 10' + check_err $? "veth0 prio is not 10" + ip -d link show veth2 | grep -q 'prio 11' + check_err $? "veth0 prio is not 11" + + ip link set veth0 up + ip link set veth1 up + ip link set veth2 up + ip link set veth0_p up + ip link set veth1_p up + ip link set veth2_p up + + # prepare ping target + ip link add name br0 type bridge + ip link set br0 up + ip link set veth0_p master br0 + ip link set veth1_p master br0 + ip link set veth2_p master br0 + ip link add name veth3 type veth peer name veth3_p + ip netns add ns1 + ip link set veth3_p master br0 up + ip link set veth3 netns ns1 up + ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3 + ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3 + ip addr add $bond_ip4/24 dev bond0 + ip addr add $bond_ip6/64 dev bond0 + sleep 5 + + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 1." + ping6 $peer_ip6 -c5 -I bond0 &>/dev/null + check_err $? "ping6 failed 1." + + # active salve should be the primary slave + check_active_slave veth1 + + # active slave should be the higher prio slave + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 2." + check_active_slave veth2 + + # when only 1 slave is up + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 3." + check_active_slave veth0 + + # when a higher prio slave change to up + ip link set veth2 up + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 4." + case $primary_reselect in + "0") + check_active_slave "veth2" + ;; + "1") + check_active_slave "veth0" + ;; + "2") + check_active_slave "veth0" + ;; + esac + local pre_active_slave=$active_slave + + # when the primary slave change to up + ip link set veth1 up + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 5." + case $primary_reselect in + "0") + check_active_slave "veth1" + ;; + "1") + check_active_slave "$pre_active_slave" + ;; + "2") + check_active_slave "$pre_active_slave" + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 6." + check_active_slave "veth1" + ;; + esac + + # Test changing bond salve prio + if [[ "$primary_reselect" == "0" ]];then + ip link set dev veth0 type bond_slave prio 1000000 + ip link set dev veth1 type bond_slave prio 0 + ip link set dev veth2 type bond_slave prio -50 + ip -d link show veth0 | grep -q 'prio 1000000' + check_err $? "veth0 prio is not 1000000" + ip -d link show veth1 | grep -q 'prio 0' + check_err $? "veth1 prio is not 0" + ip -d link show veth2 | grep -q 'prio -50' + check_err $? "veth3 prio is not -50" + check_active_slave "veth1" + + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 7." + check_active_slave "veth0" + fi + + cleanup + + log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect" +} + +prio_miimon_test() +{ + local mode + local primary_reselect + + for mode in 1 5 6; do + for primary_reselect in 0 1 2; do + prio_test "miimon" $mode $primary_reselect + done + done +} + +prio_arp_ip_target_test() +{ + local primary_reselect + + for primary_reselect in 0 1 2; do + prio_test "arp_ip_target" 1 $primary_reselect + done +} + +if skip;then + log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'." + exit 0 +fi + +trap cleanup EXIT + +tests_run + +exit "$EXIT_STATUS" From ddc9648db162eee556edd5222d2808fe33730203 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 12 Dec 2022 16:41:37 +0800 Subject: [PATCH 3837/4122] mISDN: hfcsusb: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() It is not allowed to call kfree_skb() or consume_skb() from hardware interrupt context or with hardware interrupts being disabled. It should use dev_kfree_skb_irq() or dev_consume_skb_irq() instead. The difference between them is free reason, dev_kfree_skb_irq() means the SKB is dropped in error and dev_consume_skb_irq() means the SKB is consumed in normal. skb_queue_purge() is called under spin_lock_irqsave() in hfcusb_l2l1D(), kfree_skb() is called in it, to fix this, use skb_queue_splice_init() to move the dch->squeue to a free queue, also enqueue the tx_skb and rx_skb, at last calling __skb_queue_purge() to free the SKBs afer unlock. In tx_iso_complete(), dev_kfree_skb() is called to consume the transmitted SKB, so replace it with dev_consume_skb_irq(). Fixes: 69f52adb2d53 ("mISDN: Add HFC USB driver") Signed-off-by: Yang Yingliang Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcsusb.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 651f2f8f685b..1efd17979f24 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -326,20 +326,24 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); if (hw->protocol == ISDN_P_NT_S0) { + struct sk_buff_head free_queue; + + __skb_queue_head_init(&free_queue); hfcsusb_ph_command(hw, HFC_L1_DEACTIVATE_NT); spin_lock_irqsave(&hw->lock, flags); - skb_queue_purge(&dch->squeue); + skb_queue_splice_init(&dch->squeue, &free_queue); if (dch->tx_skb) { - dev_kfree_skb(dch->tx_skb); + __skb_queue_tail(&free_queue, dch->tx_skb); dch->tx_skb = NULL; } dch->tx_idx = 0; if (dch->rx_skb) { - dev_kfree_skb(dch->rx_skb); + __skb_queue_tail(&free_queue, dch->rx_skb); dch->rx_skb = NULL; } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); spin_unlock_irqrestore(&hw->lock, flags); + __skb_queue_purge(&free_queue); #ifdef FIXME if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags)) dchannel_sched_event(&hc->dch, D_CLEARBUSY); @@ -1330,7 +1334,7 @@ tx_iso_complete(struct urb *urb) printk("\n"); } - dev_kfree_skb(tx_skb); + dev_consume_skb_irq(tx_skb); tx_skb = NULL; if (fifo->dch && get_next_dframe(fifo->dch)) tx_skb = fifo->dch->tx_skb; From f0f596bd75a9d573ca9b587abb39cee0b916bb82 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 12 Dec 2022 16:41:38 +0800 Subject: [PATCH 3838/4122] mISDN: hfcpci: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() It is not allowed to call kfree_skb() or consume_skb() from hardware interrupt context or with hardware interrupts being disabled. skb_queue_purge() is called under spin_lock_irqsave() in hfcpci_l2l1D(), kfree_skb() is called in it, to fix this, use skb_queue_splice_init() to move the dch->squeue to a free queue, also enqueue the tx_skb and rx_skb, at last calling __skb_queue_purge() to free the SKBs afer unlock. Fixes: 1700fe1a10dc ("Add mISDN HFC PCI driver") Signed-off-by: Yang Yingliang Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcpci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index e964a8dd8512..c0331b268010 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1617,16 +1617,19 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); spin_lock_irqsave(&hc->lock, flags); if (hc->hw.protocol == ISDN_P_NT_S0) { + struct sk_buff_head free_queue; + + __skb_queue_head_init(&free_queue); /* prepare deactivation */ Write_hfc(hc, HFCPCI_STATES, 0x40); - skb_queue_purge(&dch->squeue); + skb_queue_splice_init(&dch->squeue, &free_queue); if (dch->tx_skb) { - dev_kfree_skb(dch->tx_skb); + __skb_queue_tail(&free_queue, dch->tx_skb); dch->tx_skb = NULL; } dch->tx_idx = 0; if (dch->rx_skb) { - dev_kfree_skb(dch->rx_skb); + __skb_queue_tail(&free_queue, dch->rx_skb); dch->rx_skb = NULL; } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); @@ -1639,10 +1642,12 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) hc->hw.mst_m &= ~HFCPCI_MASTER; Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m); ret = 0; + spin_unlock_irqrestore(&hc->lock, flags); + __skb_queue_purge(&free_queue); } else { ret = l1_event(dch->l1, hh->prim); + spin_unlock_irqrestore(&hc->lock, flags); } - spin_unlock_irqrestore(&hc->lock, flags); break; } if (!ret) From 1232946cf522b8de9e398828bde325d7c41f29dd Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 12 Dec 2022 16:41:39 +0800 Subject: [PATCH 3839/4122] mISDN: hfcmulti: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() It is not allowed to call kfree_skb() or consume_skb() from hardware interrupt context or with hardware interrupts being disabled. skb_queue_purge() is called under spin_lock_irqsave() in handle_dmsg() and hfcm_l1callback(), kfree_skb() is called in them, to fix this, use skb_queue_splice_init() to move the dch->squeue to a free queue, also enqueue the tx_skb and rx_skb, at last calling __skb_queue_purge() to free the SKBs afer unlock. Fixes: af69fb3a8ffa ("Add mISDN HFC multiport driver") Signed-off-by: Yang Yingliang Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcmulti.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 4f7eaa17fb27..e840609c50eb 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -3217,6 +3217,7 @@ static int hfcm_l1callback(struct dchannel *dch, u_int cmd) { struct hfc_multi *hc = dch->hw; + struct sk_buff_head free_queue; u_long flags; switch (cmd) { @@ -3245,6 +3246,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) l1_event(dch->l1, HW_POWERUP_IND); break; case HW_DEACT_REQ: + __skb_queue_head_init(&free_queue); /* start deactivation */ spin_lock_irqsave(&hc->lock, flags); if (hc->ctype == HFC_TYPE_E1) { @@ -3264,20 +3266,21 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) plxsd_checksync(hc, 0); } } - skb_queue_purge(&dch->squeue); + skb_queue_splice_init(&dch->squeue, &free_queue); if (dch->tx_skb) { - dev_kfree_skb(dch->tx_skb); + __skb_queue_tail(&free_queue, dch->tx_skb); dch->tx_skb = NULL; } dch->tx_idx = 0; if (dch->rx_skb) { - dev_kfree_skb(dch->rx_skb); + __skb_queue_tail(&free_queue, dch->rx_skb); dch->rx_skb = NULL; } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) del_timer(&dch->timer); spin_unlock_irqrestore(&hc->lock, flags); + __skb_queue_purge(&free_queue); break; case HW_POWERUP_REQ: spin_lock_irqsave(&hc->lock, flags); @@ -3384,6 +3387,9 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) case PH_DEACTIVATE_REQ: test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); if (dch->dev.D.protocol != ISDN_P_TE_S0) { + struct sk_buff_head free_queue; + + __skb_queue_head_init(&free_queue); spin_lock_irqsave(&hc->lock, flags); if (debug & DEBUG_HFCMULTI_MSG) printk(KERN_DEBUG @@ -3405,14 +3411,14 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) /* deactivate */ dch->state = 1; } - skb_queue_purge(&dch->squeue); + skb_queue_splice_init(&dch->squeue, &free_queue); if (dch->tx_skb) { - dev_kfree_skb(dch->tx_skb); + __skb_queue_tail(&free_queue, dch->tx_skb); dch->tx_skb = NULL; } dch->tx_idx = 0; if (dch->rx_skb) { - dev_kfree_skb(dch->rx_skb); + __skb_queue_tail(&free_queue, dch->rx_skb); dch->rx_skb = NULL; } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); @@ -3424,6 +3430,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) #endif ret = 0; spin_unlock_irqrestore(&hc->lock, flags); + __skb_queue_purge(&free_queue); } else ret = l1_event(dch->l1, hh->prim); break; From de5dc44370fbd6b46bd7f1a1e00369be54a041c8 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 12 Dec 2022 11:00:31 -0800 Subject: [PATCH 3840/4122] igb: Initialize mailbox message for VF reset When a MAC address is not assigned to the VF, that portion of the message sent to the VF is not set. The memory, however, is allocated from the stack meaning that information may be leaked to the VM. Initialize the message buffer to 0 so that no information is passed to the VM in this case. Fixes: 6ddbc4cf1f4d ("igb: Indicate failure on vf reset for empty mac address") Reported-by: Akihiko Odaki Signed-off-by: Tony Nguyen Reviewed-by: Akihiko Odaki Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221212190031.3983342-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 97290fc0fddd..3c0c35ecea10 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7525,7 +7525,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - u32 reg, msgbuf[3]; + u32 reg, msgbuf[3] = {}; u8 *addr = (u8 *)(&msgbuf[1]); /* process all the same items cleared in a function level reset */ From 2d4ee16d969c97996e80e4c9cb6de0acaff22c9f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 13 Dec 2022 15:52:08 -0700 Subject: [PATCH 3841/4122] wireguard: timers: cast enum limits members to int in prints Since gcc13, each member of an enum has the same type as the enum. And that is inherited from its members. Provided "REKEY_AFTER_MESSAGES = 1ULL << 60", the named type is unsigned long. This generates warnings with gcc-13: error: format '%d' expects argument of type 'int', but argument 6 has type 'long unsigned int' Cast those particular enum members to int when printing them. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36113 Cc: Martin Liska Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Jason A. Donenfeld Link: https://lore.kernel.org/all/20221213225208.3343692-2-Jason@zx2c4.com/ Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/timers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c index b5706b6718b1..53d8a57a0dfa 100644 --- a/drivers/net/wireguard/timers.c +++ b/drivers/net/wireguard/timers.c @@ -46,7 +46,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer) if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2); del_timer(&peer->timer_send_keepalive); /* We drop all packets without a keypair and don't try again, @@ -64,7 +64,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer) ++peer->timer_handshake_attempts; pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, REKEY_TIMEOUT, + &peer->endpoint.addr, (int)REKEY_TIMEOUT, peer->timer_handshake_attempts + 1); /* We clear the endpoint address src address, in case this is @@ -94,7 +94,7 @@ static void wg_expired_new_handshake(struct timer_list *timer) pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)); /* We clear the endpoint address src address, in case this is the cause * of trouble. */ @@ -126,7 +126,7 @@ static void wg_queued_expired_zero_key_material(struct work_struct *work) pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); wg_peer_put(peer); From d74f4a3f6d88a2416564bc6bf937e423a4ae8f8e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 14 Dec 2022 10:39:11 +0800 Subject: [PATCH 3842/4122] cifs: Remove duplicated include in cifsglob.h ./fs/cifs/cifsglob.h: linux/scatterlist.h is included more than once. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3459 Fixes: f7f291e14dde ("cifs: fix oops during encryption") Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 703685e2db5e..82f2d3070c26 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -23,7 +23,6 @@ #include "cifs_fs_sb.h" #include "cifsacl.h" #include -#include #include #include "../smbfs_common/smb2pdu.h" #include "smb2pdu.h" From f65a486821cfd363833079b2a7b0769250ee21c9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 22:04:07 +0900 Subject: [PATCH 3843/4122] kbuild: change module.order to list *.o instead of *.ko scripts/Makefile.build replaces the suffix .o with .ko, then scripts/Makefile.modpost calls the sed command to change .ko back to the original .o suffix. Instead of converting the suffixes back-and-forth, store the .o paths in modules.order, and replace it with .ko in 'make modules_install'. This avoids the unneeded sed command. Signed-off-by: Masahiro Yamada Reviewed-by: Luis Chamberlain --- Makefile | 2 +- scripts/Makefile.build | 2 +- scripts/Makefile.modfinal | 6 +++--- scripts/Makefile.modinst | 2 +- scripts/Makefile.modpost | 7 +++++-- scripts/clang-tools/gen_compile_commands.py | 8 ++++---- scripts/gen_autoksyms.sh | 2 +- scripts/mod/modpost.c | 11 ++++------- scripts/modules-check.sh | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 591485152a95..f506879e7452 100644 --- a/Makefile +++ b/Makefile @@ -1564,7 +1564,7 @@ __modinst_pre: rm -f $(MODLIB)/build ; \ ln -s $(CURDIR) $(MODLIB)/build ; \ fi - @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order + @sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order @cp -f modules.builtin $(MODLIB)/ @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 799df12b53f3..267eb7aac5b2 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -435,7 +435,7 @@ $(obj)/built-in.a: $(real-obj-y) FORCE # modules.order unless contained modules are updated. cmd_modules_order = { $(foreach m, $(real-prereqs), \ - $(if $(filter %/modules.order, $m), cat $m, echo $(patsubst %.o,%.ko,$m));) :; } \ + $(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \ > $@ $(obj)/modules.order: $(obj-m) FORCE diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 83f2797e530c..a30d5b08eee9 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -15,7 +15,7 @@ include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order modules := $(call read-file, $(MODORDER)) -__modfinal: $(modules) +__modfinal: $(modules:%.o=%.ko) @: # modname and part-of-module are set to make c_flags define proper module flags @@ -57,13 +57,13 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) # Re-generate module BTFs if either module's .ko or vmlinux changed -$(modules): %.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE +%.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) endif -targets += $(modules) $(modules:.ko=.mod.o) +targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 65aac6be78ec..836391e5d209 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -26,7 +26,7 @@ suffix-$(CONFIG_MODULE_COMPRESS_GZIP) := .gz suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz suffix-$(CONFIG_MODULE_COMPRESS_ZSTD) := .zst -modules := $(patsubst $(extmod_prefix)%, $(dst)/%$(suffix-y), $(modules)) +modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules)) __modinst: $(modules) @: diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 55a72f5eb76d..f814a6acd200 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -107,7 +107,10 @@ ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) modpost-args += -w endif -modorder-if-needed := $(if $(KBUILD_MODULES), $(MODORDER)) +ifdef KBUILD_MODULES +modorder-if-needed := $(MODORDER) +modpost-args += -T $(MODORDER) +endif MODPOST = scripts/mod/modpost @@ -119,7 +122,7 @@ quiet_cmd_modpost = MODPOST $@ echo >&2 "WARNING: $(missing-input) is missing."; \ echo >&2 " Modules may not have dependencies or modversions."; \ echo >&2 " You may get many unresolved symbol warnings.";) \ - sed 's/ko$$/o/' $(or $(modorder-if-needed), /dev/null) | $(MODPOST) $(modpost-args) -T - $(vmlinux.o-if-present) + $(MODPOST) $(modpost-args) $(vmlinux.o-if-present) targets += $(output-symdump) $(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index d800b2c0af97..0227522959a4 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -138,10 +138,10 @@ def cmdfiles_for_modorder(modorder): """ with open(modorder) as f: for line in f: - ko = line.rstrip() - base, ext = os.path.splitext(ko) - if ext != '.ko': - sys.exit('{}: module path must end with .ko'.format(ko)) + obj = line.rstrip() + base, ext = os.path.splitext(obj) + if ext != '.o': + sys.exit('{}: module path must end with .o'.format(obj)) mod = base + '.mod' # Read from *.mod, to get a list of objects that compose the module. with open(mod) as m: diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh index 653fadbad302..12bcfae940ee 100755 --- a/scripts/gen_autoksyms.sh +++ b/scripts/gen_autoksyms.sh @@ -48,7 +48,7 @@ cat > "$output_file" << EOT EOT { - [ -n "${read_modorder}" ] && sed 's/ko$/usyms/' modules.order | xargs cat + [ -n "${read_modorder}" ] && sed 's/o$/usyms/' modules.order | xargs cat echo "$needed_symbols" [ -n "$ksym_wl" ] && cat "$ksym_wl" } | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 56d856f2e511..b48838a71bf6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1856,11 +1856,9 @@ static void read_symbols_from_files(const char *filename) FILE *in = stdin; char fname[PATH_MAX]; - if (strcmp(filename, "-") != 0) { - in = fopen(filename, "r"); - if (!in) - fatal("Can't open filenames file %s: %m", filename); - } + in = fopen(filename, "r"); + if (!in) + fatal("Can't open filenames file %s: %m", filename); while (fgets(fname, PATH_MAX, in) != NULL) { if (strends(fname, "\n")) @@ -1868,8 +1866,7 @@ static void read_symbols_from_files(const char *filename) read_symbols(fname); } - if (in != stdin) - fclose(in); + fclose(in); } #define SZ 500 diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh index e06327722263..4c8da90de78e 100755 --- a/scripts/modules-check.sh +++ b/scripts/modules-check.sh @@ -16,7 +16,7 @@ check_same_name_modules() for m in $(sed 's:.*/::' "$1" | sort | uniq -d) do echo "error: the following would cause module name conflict:" >&2 - sed -n "/\/$m/s:^: :p" "$1" >&2 + sed -n "/\/$m/s:^\(.*\)\.o\$: \1.ko:p" "$1" >&2 exit_code=1 done } From 3d57e1b7b1d42d4040f0d993b66ff06beda02c54 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 22:04:08 +0900 Subject: [PATCH 3844/4122] kbuild: refactor the prerequisites of the modpost rule The prerequisites of modpost are cluttered. The variables *-if-present and *-if-needed are unreadable. It is cleaner to append them into modpost-deps. Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f814a6acd200..5eb5e8280379 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -38,6 +38,8 @@ __modpost: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +MODPOST = scripts/mod/modpost + modpost-args = \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ @@ -46,6 +48,8 @@ modpost-args = \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ -o $@ +modpost-deps := $(MODPOST) + # 'make -i -k' ignores compile errors, and builds as many modules as possible. ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),) modpost-args += -n @@ -78,12 +82,13 @@ targets += .vmlinux.objs .vmlinux.objs: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed,vmlinux_objs) -vmlinux.o-if-present := $(wildcard vmlinux.o) -output-symdump := vmlinux.symvers - -ifdef KBUILD_MODULES -output-symdump := $(if $(vmlinux.o-if-present), Module.symvers, modules-only.symvers) -missing-input := $(filter-out $(vmlinux.o-if-present),vmlinux.o) +ifeq ($(wildcard vmlinux.o),) +missing-input := vmlinux.o +output-symdump := modules-only.symvers +else +modpost-args += vmlinux.o +modpost-deps += vmlinux.o +output-symdump := $(if $(KBUILD_MODULES), Module.symvers, vmlinux.symvers) endif else @@ -95,11 +100,16 @@ src := $(obj) # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS include $(kbuild-file) -module.symvers-if-present := $(wildcard Module.symvers) output-symdump := $(KBUILD_EXTMOD)/Module.symvers -missing-input := $(filter-out $(module.symvers-if-present), Module.symvers) -modpost-args += -e $(addprefix -i ,$(module.symvers-if-present) $(KBUILD_EXTRA_SYMBOLS)) +ifeq ($(wildcard Module.symvers),) +missing-input := Module.symvers +else +modpost-args += -i Module.symvers +modpost-deps += Module.symvers +endif + +modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS)) endif # ($(KBUILD_EXTMOD),) @@ -108,12 +118,10 @@ modpost-args += -w endif ifdef KBUILD_MODULES -modorder-if-needed := $(MODORDER) modpost-args += -T $(MODORDER) +modpost-deps += $(MODORDER) endif -MODPOST = scripts/mod/modpost - # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ @@ -122,10 +130,10 @@ quiet_cmd_modpost = MODPOST $@ echo >&2 "WARNING: $(missing-input) is missing."; \ echo >&2 " Modules may not have dependencies or modversions."; \ echo >&2 " You may get many unresolved symbol warnings.";) \ - $(MODPOST) $(modpost-args) $(vmlinux.o-if-present) + $(MODPOST) $(modpost-args) targets += $(output-symdump) -$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE +$(output-symdump): $(modpost-deps) FORCE $(call if_changed,modpost) __modpost: $(output-symdump) From 87d599fc3955e59b1ed30f350321a4be5353f945 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 13 Dec 2022 20:24:20 +0900 Subject: [PATCH 3845/4122] kbuild: ensure Make >= 3.82 is used Documentation/process/changes.rst notes the minimal GNU Make version, but it is not checked anywhere. We could check $(MAKE_VERSION), but another simple way is to check $(.FEATURES) since the feature list always grows. GNU Make 3.81 expands $(.FEATURES) to: target-specific order-only second-expansion else-if archives jobserver check-symlink GNU Make 3.82 expands $(.FEATURES) to: target-specific order-only second-expansion else-if shortest-stem undefine archives jobserver check-symlink To ensure Make >= 3.82, you can check either 'shortest-stem' or 'undefine'. This way is not always possible. For example, Make 4.0 through 4.2 have the same set of $(.FEATURES). At that point, we will need to come up with a different approach. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Reviewed-by: Nicolas Schier --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index f506879e7452..5f015f206e12 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,10 @@ NAME = Hurr durr I'ma ninja sloth # Comments in this file are targeted only to the developer, do not # expect to learn how to build the kernel reading this file. +ifeq ($(filter undefine,$(.FEATURES)),) +$(error GNU Make >= 3.82 is required. Your Make version is $(MAKE_VERSION)) +endif + $(if $(filter __%, $(MAKECMDGOALS)), \ $(error targets prefixed with '__' are only for internal use)) From 0d24f1b7cc65ee73ea8d04e0d10f77a7cb7a83f3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:28 -0700 Subject: [PATCH 3846/4122] padata: Mark padata_work_init() as __ref When building arm64 allmodconfig + ThinLTO with clang and a proposed modpost update to account for -ffuncton-sections, the following warning appears: WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text) LLVM has optimized padata_work_init() to include the address of padata_mt_helper() directly because it inlined the other call to padata_work_init() with padata_parallel_worker(), meaning the remaining uses of padata_work_init() use padata_mt_helper() as the work_fn argument. This optimization causes modpost to complain since padata_work_init() is not __init, whereas padata_mt_helper() is. Since padata_work_init() is only called from __init code when padata_mt_helper() is passed as the work_fn argument, mark padata_work_init() as __ref, which makes it clear to modpost that this scenario is okay. Suggested-by: Daniel Jordan Signed-off-by: Nathan Chancellor Acked-by: Daniel Jordan Signed-off-by: Masahiro Yamada --- kernel/padata.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index e5819bb8bd1d..d175cc000453 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -83,8 +83,16 @@ static struct padata_work *padata_work_alloc(void) return pw; } -static void padata_work_init(struct padata_work *pw, work_func_t work_fn, - void *data, int flags) +/* + * This function is marked __ref because this function may be optimized in such + * a way that it directly refers to work_fn's address, which causes modpost to + * complain when work_fn is marked __init. This scenario was observed with clang + * LTO, where padata_work_init() was optimized to refer directly to + * padata_mt_helper() because the calls to padata_work_init() with other work_fn + * values were eliminated or inlined. + */ +static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn, + void *data, int flags) { if (flags & PADATA_WORK_ONSTACK) INIT_WORK_ONSTACK(&pw->pw_work, work_fn); From 19331e84c3873256537d446afec1f6c507f8c4ef Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:29 -0700 Subject: [PATCH 3847/4122] modpost: Include '.text.*' in TEXT_SECTIONS Commit 6c730bfc894f ("modpost: handle -ffunction-sections") added ".text.*" to the OTHER_TEXT_SECTIONS macro to fix certain section mismatch warnings. Unfortunately, this makes it impossible for modpost to warn about section mismatches with LTO, which implies '-ffunction-sections', as all functions are put in their own '.text.' sections, which may still reference functions in sections they are not supposed to, such as __init. Fix this by moving ".text.*" into TEXT_SECTIONS, so that configurations with '-ffunction-sections' will see warnings about mismatched sections. Link: https://lore.kernel.org/Y39kI3MOtVI5BAnV@google.com/ Reported-by: Vincent Donnefort Reviewed-and-tested-by: Alexander Lobakin Reviewed-by: Sami Tolvanen Tested-by: Vincent Donnefort Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index b48838a71bf6..640e1a244ba9 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -822,10 +822,10 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" From 11e47bbd700f31bd1ee9f8863381bc9e741c0e97 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 12 Dec 2022 14:07:48 +0100 Subject: [PATCH 3848/4122] gpio: sim: set a limit on the number of GPIOs With the removal of ARCH_NR_GPIOS in commit 7b61212f2a07 ("gpiolib: Get rid of ARCH_NR_GPIOS") the gpiolib core no longer sanitizes the number of GPIOs for us. This causes the gpio-sim selftests to now fail when setting the number of GPIOs to 99999 and expecting the probe() to fail. Set a sane limit of 1024 on the number of simulated GPIOs and bail out of probe if it's exceeded. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202212112236.756f5db9-oliver.sang@intel.com Fixes: 7b61212f2a07 ("gpiolib: Get rid of ARCH_NR_GPIOS") Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 1020c2feb249..60514bc5454f 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -31,6 +31,7 @@ #include "gpiolib.h" +#define GPIO_SIM_NGPIO_MAX 1024 #define GPIO_SIM_PROP_MAX 4 /* Max 3 properties + sentinel. */ #define GPIO_SIM_NUM_ATTRS 3 /* value, pull and sentinel */ @@ -371,6 +372,9 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) if (ret) return ret; + if (num_lines > GPIO_SIM_NGPIO_MAX) + return -ERANGE; + ret = fwnode_property_read_string(swnode, "gpio-sim,label", &label); if (ret) { label = devm_kasprintf(dev, GFP_KERNEL, "%s-%s", From 904f309ae7edaadc9fd0ee04be8281d7781d97e4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 13 Dec 2022 18:06:51 -0800 Subject: [PATCH 3849/4122] thermal: intel: Don't set HFI status bit to 1 When CPU doesn't support HFI (Hardware Feedback Interface), don't include BIT 26 in the mask to prevent clearing. otherwise this results in: unchecked MSR access error: WRMSR to 0x1b1 (tried to write 0x0000000004000aa8) at rIP: 0xffffffff8b8559fe (throttle_active_work+0xbe/0x1b0) Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status") Reported-by: Linus Torvalds Tested-by: Linus Torvalds Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/therm_throt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index 4bb7fddaa143..2e22bb82b738 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -194,7 +194,7 @@ static const struct attribute_group thermal_attr_group = { #define THERM_STATUS_PROCHOT_LOG BIT(1) #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) -#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26)) +#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) /* * Clear the bits in package thermal status register for bit = 1 @@ -211,6 +211,9 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask) } else { msr = MSR_IA32_PACKAGE_THERM_STATUS; msr_val = THERM_STATUS_CLEAR_PKG_MASK; + if (boot_cpu_has(X86_FEATURE_HFI)) + msr_val |= BIT(26); + } msr_val &= ~bit_mask; From fb9f5ee9bf5cf7ebc8731a7033e57d98832dc650 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 13 Dec 2022 17:24:29 -0500 Subject: [PATCH 3850/4122] tracing: Do not synchronize freeing of trigger filter on boot up If a trigger filter on the kernel command line fails to apply (due to syntax error), it will be freed. The freeing will call tracepoint_synchronize_unregister(), but this is not needed during early boot up, and will even trigger a lockdep splat. Avoid calling the synchronization function when system_state is SYSTEM_BOOTING. Link: https://lore.kernel.org/linux-trace-kernel/20221213172429.7774f4ba@gandalf.local.home Cc: Andrew Morton Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 19ce9d22bfd7..e535959939d3 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1085,8 +1085,14 @@ int set_trigger_filter(char *filter_str, rcu_assign_pointer(data->filter, filter); if (tmp) { - /* Make sure the call is done with the filter */ - tracepoint_synchronize_unregister(); + /* + * Make sure the call is done with the filter. + * It is possible that a filter could fail at boot up, + * and then this path will be called. Avoid the synchronization + * in that case. + */ + if (system_state != SYSTEM_BOOTING) + tracepoint_synchronize_unregister(); free_event_filter(tmp); } From 3c97d25ceb75fd3e660ba9fcd4c630d0b057a5a2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 09:34:04 -0300 Subject: [PATCH 3851/4122] perf stat: Check existence of os->prefix, fixing a segfault We need to check if we have a OS prefix, otherwise we stumble on a metric segv that I'm now seeing in Arnaldo's tree: $ gdb --args perf stat -M Backend true ... Performance counter stats for 'true': 4,712,355 TOPDOWN.SLOTS # 17.3 % tma_core_bound Program received signal SIGSEGV, Segmentation fault. __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77 77 ../sysdeps/x86_64/multiarch/strlen-evex.S: No such file or directory. (gdb) bt #0 __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77 #1 0x00007ffff74749a5 in __GI__IO_fputs (str=0x0, fp=0x7ffff75f5680 <_IO_2_1_stderr_>) #2 0x0000555555779f28 in do_new_line_std (config=0x555555e077c0 , os=0x7fffffffbf10) at util/stat-display.c:356 #3 0x000055555577a081 in print_metric_std (config=0x555555e077c0 , ctx=0x7fffffffbf10, color=0x0, fmt=0x5555558b77b5 "%8.1f", unit=0x7fffffffbb10 "% tma_memory_bound", val=13.165355724442199) at util/stat-display.c:380 #4 0x00005555557768b6 in generic_metric (config=0x555555e077c0 , metric_expr=0x55555593d5b7 "((CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES))"..., metric_events=0x555555f334e0, metric_refs=0x555555ec81d0, name=0x555555f32e80 "TOPDOWN.SLOTS", metric_name=0x555555f26c80 "tma_memory_bound", metric_unit=0x55555593d5b1 "100%", runtime=0, map_idx=0, out=0x7fffffffbd90, st=0x555555e9e620 ) at util/stat-shadow.c:934 #5 0x0000555555778cac in perf_stat__print_shadow_stats (config=0x555555e077c0 , evsel=0x555555f289d0, avg=4712355, map_idx=0, out=0x7fffffffbd90, metric_events=0x555555e078e8 , st=0x555555e9e620 ) at util/stat-shadow.c:1329 #6 0x000055555577b6a0 in printout (config=0x555555e077c0 , os=0x7fffffffbf10, uval=4712355, run=325322, ena=325322, noise=4712355, map_idx=0) at util/stat-display.c:741 #7 0x000055555577bc74 in print_counter_aggrdata (config=0x555555e077c0 , counter=0x555555f289d0, s=0, os=0x7fffffffbf10) at util/stat-display.c:838 #8 0x000055555577c1d8 in print_counter (config=0x555555e077c0 , counter=0x555555f289d0, os=0x7fffffffbf10) at util/stat-display.c:957 #9 0x000055555577dba0 in evlist__print_counters (evlist=0x555555ec3610, config=0x555555e077c0 , _target=0x555555e01c80 , ts=0x0, argc=1, argv=0x7fffffffe450) at util/stat-display.c:1413 #10 0x00005555555fc821 in print_counters (ts=0x0, argc=1, argv=0x7fffffffe450) at builtin-stat.c:1040 #11 0x000055555560091a in cmd_stat (argc=1, argv=0x7fffffffe450) at builtin-stat.c:2665 #12 0x00005555556b1eea in run_builtin (p=0x555555e11f70 , argc=4, argv=0x7fffffffe450) at perf.c:322 #13 0x00005555556b2181 in handle_internal_command (argc=4, argv=0x7fffffffe450) at perf.c:376 #14 0x00005555556b22d7 in run_argv (argcp=0x7fffffffe27c, argv=0x7fffffffe270) at perf.c:420 #15 0x00005555556b26ef in main (argc=4, argv=0x7fffffffe450) at perf.c:550 (gdb) Fixes: f123b2d84ecec9a3 ("perf stat: Remove prefix argument in print_metric_headers()") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/CAP-5=fUOjSM5HajU9TCD6prY39LbX4OQbkEbtKPPGRBPBN=_VQ@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f1ee4b052198..9b7772e6abf6 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -353,7 +353,8 @@ static void do_new_line_std(struct perf_stat_config *config, struct outstate *os) { fputc('\n', os->fh); - fputs(os->prefix, os->fh); + if (os->prefix) + fputs(os->prefix, os->fh); aggr_printout(config, os->evsel, os->id, os->nr); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); From 955f6def5590ce6ca11a1c1ced0d2d1c95421059 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 5 Dec 2022 12:14:43 +0530 Subject: [PATCH 3852/4122] perf record: Add remaining branch filters: "no_cycles", "no_flags" & "hw_index" This adds all remaining branch filters i.e "no_cycles", "no_flags" and "hw_index". While here, also updates the documentation. Signed-off-by: Anshuman Khandual Cc: James Clark Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20221205064443.533587-1-anshuman.khandual@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 +++++ tools/perf/util/parse-branch-options.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9ea6d44aca58..5f9fa07b3dde 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -388,6 +388,7 @@ following filters are defined: - any_call: any function call or system call - any_ret: any function return or system call return - ind_call: any indirect branch + - ind_jmp: any indirect jump - call: direct calls, including far (to/from kernel) calls - u: only when the branch target is at the user level - k: only when the branch target is in the kernel @@ -396,6 +397,10 @@ following filters are defined: - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches + - call_stack: save call stack + - no_flags: don't save branch flags e.g prediction, misprediction etc + - no_cycles: don't save branch cycles + - hw_index: save branch hardware index - save_type: save branch type during sampling in case binary is not available later For the platforms with Intel Arch LBR support (12th-Gen+ client or 4th-Gen Xeon+ server), the save branch type is unconditionally enabled diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 31faf2bb49ff..fd67d204d720 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,8 +30,11 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS), + BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), BRANCH_END }; From 8f4b1e3cebce5d12048409393de751e4d663ce42 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 5 Dec 2022 09:58:52 +0530 Subject: [PATCH 3853/4122] perf stat: Fix printing field separator in CSV metrics output In 'perf stat' with CSV output option, number of fields in metrics output is not matching with number of fields in other event output lines. Sample output below after applying patch to fix printing os->prefix. # ./perf stat -x, --per-socket -a -C 1 ls S0,1,82.11,msec,cpu-clock,82111626,100.00,1.000,CPUs utilized S0,1,2,,context-switches,82109314,100.00,24.358,/sec ------ ====> S0,1,,,,,,,1.71,stalled cycles per insn The above command line uses field separator as "," via "-x," option and per-socket option displays socket value as first field. But here the last line for "stalled cycles per insn" has more separators. Each csv output line is expected to have 8 field separators (for the 9 fields), where as last line has 9 "," in the result. Patch fixes this issue. The counter stats are displayed by function "perf_stat__print_shadow_stats" in code "util/stat-shadow.c". While printing the stats info for "stalled cycles per insn", function "new_line_csv" is used as new_line callback. The fields printed in each line contains: "Socket_id,aggr nr,Avg,unit,event_name,run,enable_percent,ratio,unit" The metric output prints Socket_id, aggr nr, ratio and unit. It has to skip through remaining five fields ie, Avg,unit,event_name,run,enable_percent. The csv line callback uses "os->nfields" to know the number of fields to skip to match with other lines. Currently it is set as: os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); But in case of aggregation modes, csv_sep already gets printed along with each field (Function "aggr_printout" in util/stat-display.c). So aggr_fields can be removed from nfields. And fixed number of fields to skip has to be "4". This is to skip fields for: "avg, unit, event name, run, enable_percent" This needs 4 csv separators. Patch removes aggr_fields and uses 4 as fixed number of os->nfields to skip. After the patch: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,79.08,msec,cpu-clock,79085956,100.00,1.000,CPUs utilized S0,1,7,,context-switches,79084176,100.00,88.514,/sec ------ ====> S0,1,,,,,,0.81,stalled cycles per insn Fixes: 92a61f6412d3a09d ("perf stat: Implement CSV metrics output") Reported-by: Disha Goel Reviewed-by: Kajol Jain Signed-off-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Disha Goel Cc: Andi Kleen Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20221205042852.83382-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9b7772e6abf6..660e4f6616f5 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -687,20 +687,9 @@ static void printout(struct perf_stat_config *config, struct outstate *os, struct evsel *counter = os->evsel; if (config->csv_output) { - static const int aggr_fields[AGGR_MAX] = { - [AGGR_NONE] = 1, - [AGGR_GLOBAL] = 0, - [AGGR_SOCKET] = 2, - [AGGR_DIE] = 2, - [AGGR_CORE] = 2, - [AGGR_THREAD] = 1, - [AGGR_UNSET] = 0, - [AGGR_NODE] = 1, - }; - pm = config->metric_only ? print_metric_only_csv : print_metric_csv; nl = config->metric_only ? new_line_metric : new_line_csv; - os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); + os->nfields = 4 + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; nl = config->metric_only ? new_line_metric : new_line_json; From 1849f9f00926c54fa284be3b7f801de8b010572b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:39 -0800 Subject: [PATCH 3854/4122] tools lib api: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 3649c7f7ea65..044860ac1ed1 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -88,10 +88,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -99,14 +99,28 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libapi_headers) \ - $(call do_install,cpu.h,$(prefix)/include/api,644); \ - $(call do_install,debug.h,$(prefix)/include/api,644); \ - $(call do_install,io.h,$(prefix)/include/api,644); \ - $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ - $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \ - $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); +HDRS := cpu.h debug.h io.h +FD_HDRS := fd/array.h +FS_HDRS := fs/fs.h fs/tracing_path.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_FD_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FD_HDRS)) +INSTALL_FS_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FS_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_FD_HDRS): $(INSTALL_HDRS_PFX)/fd/%.h: fd/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fd/,644) + +$(INSTALL_FS_HDRS): $(INSTALL_HDRS_PFX)/fs/%.h: fs/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fs/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_FD_HDRS) $(INSTALL_FS_HDRS) + $(call QUIET_INSTALL, libapi_headers) install: install_lib install_headers From 47e02b94a4c98dcc8072e56efaae5057174050fa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:40 -0800 Subject: [PATCH 3855/4122] tools lib perf: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index a90fb8c6bed4..30b7f91e7147 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -176,10 +176,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: libs @@ -187,23 +187,24 @@ install_lib: libs $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libperf_headers) \ - $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644); +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h +INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h + +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_INTERNAL_HDRS_PFX := $(DESTDIR)$(prefix)/include/internal +INSTALL_INTERNAL_HDRS := $(addprefix $(INSTALL_INTERNAL_HDRS_PFX)/, $(INTERNAL_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: include/perf/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_INTERNAL_HDRS): $(INSTALL_INTERNAL_HDRS_PFX)/%.h: include/internal/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_INTERNAL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS) + $(call QUIET_INSTALL, libperf_headers) install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ From 5d890591db6bed8ca69bd4bfe0cdaca372973033 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:41 -0800 Subject: [PATCH 3856/4122] tools lib subcmd: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 9a316d8b89df..b87213263a5e 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -89,10 +89,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -100,13 +100,16 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libsubcmd_headers) \ - $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ - $(call do_install,help.h,$(prefix)/include/subcmd,644); \ - $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ - $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \ - $(call do_install,run-command.h,$(prefix)/include/subcmd,644); +HDRS := exec-cmd.h help.h pager.h parse-options.h run-command.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/subcmd +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsubcmd_headers) install: install_lib install_headers From 113bb3964297467baeb1fd2c4f86d0a4142e4259 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:42 -0800 Subject: [PATCH 3857/4122] tools lib symbol: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Makefile | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile index ea8707b3442a..13d43c6f92b4 100644 --- a/tools/lib/symbol/Makefile +++ b/tools/lib/symbol/Makefile @@ -89,10 +89,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -100,9 +100,16 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libsymbol_headers) \ - $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); +HDRS := kallsyms.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/symbol +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsymbol_headers) install: install_lib install_headers From 117195d9f8af74c65bf57d9b56f496b5b3655bcb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 2 Dec 2022 11:04:47 -0800 Subject: [PATCH 3858/4122] perf stat: Fix multi-line metric output in JSON When a metric produces more than one values, it missed to print the opening bracket. Fixes: ab6baaae27357290 ("perf stat: Fix JSON output in metric-only mode") Reported-by: Weilin Wang Signed-off-by: Namhyung Kim Tested-by: Weilin Wang Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221202190447.1588680-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 660e4f6616f5..7f885b04c23f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -441,7 +441,7 @@ static void new_line_json(struct perf_stat_config *config, void *ctx) { struct outstate *os = ctx; - fputc('\n', os->fh); + fputs("\n{", os->fh); if (os->prefix) fprintf(os->fh, "%s", os->prefix); aggr_printout(config, os->evsel, os->id, os->nr); From 3f81f72d30b46efb614d93f430684c0deb8439b7 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 30 Nov 2022 11:15:21 +0000 Subject: [PATCH 3859/4122] perf stat: Fix invalid output handle In this context, 'os' is already a pointer so the extra dereference isn't required. This fixes the following test failure on aarch64: $ ./perf test "json output" -vvv 92: perf stat JSON output linter : --- start --- Checking json output: no args Test failed for input: ... Fatal error: glibc detected an invalid stdio handle ---- end ---- perf stat JSON output linter: FAILED! Fixes: e7f4da312259e618 ("perf stat: Pass struct outstate to printout()") Signed-off-by: James Clark Tested-by: Athira Jajeev Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221130111521.334152-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7f885b04c23f..ead4915c4a03 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -731,7 +731,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, perf_stat__print_shadow_stats(config, counter, uval, map_idx, &out, &config->metric_events, &rt_stat); } else { - pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } if (!config->metric_only) { From fce9a619145181ca6a41253f7de3df56b1e4ad59 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:32 -0800 Subject: [PATCH 3860/4122] perf util: Make header guard consistent with tool Remove git reference by changing GIT_COMPAT_UTIL_H to __PERF_UTIL_H. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20221130062935.2219247-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c1f2d423a9ec..63cdab0e5314 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef GIT_COMPAT_UTIL_H -#define GIT_COMPAT_UTIL_H +#ifndef __PERF_UTIL_H +#define __PERF_UTIL_H #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ @@ -94,4 +94,4 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, 0; \ }) -#endif /* GIT_COMPAT_UTIL_H */ +#endif /* __PERF_UTIL_H */ From 5b7a29fb0b7d67e5d40cd6557e073afb6a7466ab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:33 -0800 Subject: [PATCH 3861/4122] perf util: Add host_is_bigendian to util.h Avoid libtraceevent dependency for tep_is_bigendian or trace-event.h dependency for bigendian. Add a new host_is_bigendian to util.h, using the compiler defined __BYTE_ORDER__ when available. Committer notes: Added: #else /* !__BYTE_ORDER__ */ On that nested #ifdef block, as per Namhyung's suggestion. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20221130062935.2219247-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 4 ++-- tools/perf/tests/sample-parsing.c | 4 ++-- tools/perf/util/evsel.c | 5 +---- tools/perf/util/trace-event-info.c | 14 +++----------- tools/perf/util/trace-event-read.c | 3 ++- tools/perf/util/trace-event.h | 2 -- tools/perf/util/util.h | 19 +++++++++++++++++++ 7 files changed, 29 insertions(+), 22 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 95feb6ef34a0..cb8cd09938d5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -16,7 +16,6 @@ #include "dso.h" #include "env.h" #include "parse-events.h" -#include "trace-event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" @@ -28,6 +27,7 @@ #include "util/mmap.h" #include "util/string2.h" #include "util/synthetic-events.h" +#include "util/util.h" #include "thread.h" #include "tests.h" @@ -79,7 +79,7 @@ static size_t read_objdump_chunk(const char **line, unsigned char **buf, * see disassemble_bytes() at binutils/objdump.c for details * how objdump chooses display endian) */ - if (bytes_read > 1 && !bigendian()) { + if (bytes_read > 1 && !host_is_bigendian()) { unsigned char *chunk_end = chunk_start + bytes_read - 1; unsigned char tmp; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 20930dd48ee0..927c7f0cc4cc 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -13,7 +13,7 @@ #include "evsel.h" #include "debug.h" #include "util/synthetic-events.h" -#include "util/trace-event.h" +#include "util/util.h" #include "tests.h" @@ -117,7 +117,7 @@ static bool samples_same(const struct perf_sample *s1, COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) { if (needs_swap) - return ((tep_is_bigendian()) ? + return ((host_is_bigendian()) ? (FLAG(s2).value == BS_EXPECTED_BE) : (FLAG(s2).value == BS_EXPECTED_LE)); else diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 45f4f08399ae..0f617359a82f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2320,11 +2320,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) * as it has variable bit-field sizes. Instead the * macro takes the bit-field position/size, * swaps it based on the host endianness. - * - * tep_is_bigendian() is used here instead of - * bigendian() to avoid python test fails. */ - if (tep_is_bigendian()) { + if (host_is_bigendian()) { new_val = bitfield_swap(value, 0, 1); new_val |= bitfield_swap(value, 1, 1); new_val |= bitfield_swap(value, 2, 1); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 892c323b4ac9..c24b3a15e319 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -26,6 +26,7 @@ #include #include "evsel.h" #include "debug.h" +#include "util.h" #define VERSION "0.6" #define MAX_EVENT_LENGTH 512 @@ -38,15 +39,6 @@ struct tracepoint_path { struct tracepoint_path *next; }; -int bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; - unsigned int *ptr; - - ptr = (unsigned int *)(void *)str; - return *ptr == 0x01020304; -} - /* unfortunately, you can not stat debugfs or proc files for size */ static int record_file(const char *file, ssize_t hdr_sz) { @@ -79,7 +71,7 @@ static int record_file(const char *file, ssize_t hdr_sz) /* ugh, handle big-endian hdr_size == 4 */ sizep = (char*)&size; - if (bigendian()) + if (host_is_bigendian()) sizep += sizeof(u64) - hdr_sz; if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) { @@ -564,7 +556,7 @@ static int tracing_data_header(void) return -1; /* save endian */ - if (bigendian()) + if (host_is_bigendian()) buf[0] = 1; else buf[0] = 0; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a01af783310..43146a4ce2fb 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -17,6 +17,7 @@ #include "trace-event.h" #include "debug.h" +#include "util.h" static int input_fd; @@ -414,7 +415,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) return -1; } file_bigendian = buf[0]; - host_bigendian = bigendian(); + host_bigendian = host_is_bigendian() ? 1 : 0; if (trace_event__init(tevent)) { pr_debug("trace_event__init failed"); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 640981105788..8f39f5bcb2c2 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -27,8 +27,6 @@ trace_event__tp_format(const char *sys, const char *name); struct tep_event *trace_event__tp_format_id(int id); -int bigendian(void); - void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 63cdab0e5314..1d3b300af5a1 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -94,4 +94,23 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, 0; \ }) +static inline bool host_is_bigendian(void) +{ +#ifdef __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return false; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return true; +#else +#error "Unrecognized __BYTE_ORDER__" +#endif +#else /* !__BYTE_ORDER__ */ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; + unsigned int *ptr; + + ptr = (unsigned int *)(void *)str; + return *ptr == 0x01020304; +#endif +} + #endif /* __PERF_UTIL_H */ From 336b92da1aa4228a664f27972f61e6186f369e79 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 6 Dec 2022 10:02:36 +0530 Subject: [PATCH 3862/4122] perf tool: Move pmus list variable to a new file The 'pmus' list variable is defined as static variable under pmu.c file. Introduce a new pmus.c file and migrate this variable to it. Also make it non static so that it can be accessed from outside. Suggested-by: Ian Rogers Signed-off-by: Ravi Bangoria Acked-by: Ian Rogers Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Athira Jajeev Cc: Jiri Olsa Cc: Kajol Jain Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Thomas Richter Cc: carsten.haitzler@arm.com Link: https://lore.kernel.org/r/20221206043237.12159-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/pmu.c | 2 +- tools/perf/util/pmus.c | 5 +++++ tools/perf/util/pmus.h | 9 +++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/pmus.c create mode 100644 tools/perf/util/pmus.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ab37f588ee8b..d04802bfa23f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -73,6 +73,7 @@ perf-y += trace-event-parse.o perf-y += parse-events-flex.o perf-y += parse-events-bison.o perf-y += pmu.o +perf-y += pmus.o perf-y += pmu-flex.o perf-y += pmu-bison.o perf-y += pmu-hybrid.o diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8ff6462f051e..2bdeb89352e7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -22,6 +22,7 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "pmus.h" #include "parse-events.h" #include "print-events.h" #include "header.h" @@ -58,7 +59,6 @@ struct perf_pmu_format { int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; -static LIST_HEAD(pmus); static bool hybrid_scanned; /* diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c new file mode 100644 index 000000000000..7f3b93c4d229 --- /dev/null +++ b/tools/perf/util/pmus.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +LIST_HEAD(pmus); diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h new file mode 100644 index 000000000000..5ec12007eb5c --- /dev/null +++ b/tools/perf/util/pmus.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PMUS_H +#define __PMUS_H + +extern struct list_head pmus; + +#define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list) + +#endif /* __PMUS_H */ From 9d9b22bedad13d96b34fe005ef44b4523c4eb786 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 6 Dec 2022 10:02:37 +0530 Subject: [PATCH 3863/4122] perf test: Add event group test for events in multiple PMUs Multiple events in a group can belong to one or more PMUs, however there are some limitations. One of the limitations is that perf doesn't allow creating a group of events from different hw PMUs. Write a simple test to create various combinations of hw, sw and uncore PMU events and verify group creation succeeds or fails as expected. Signed-off-by: Ravi Bangoria Acked-by: Ian Rogers Acked-by: Kan Liang Acked-by: Madhavan Srinivasan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Athira Jajeev Cc: Carsten Haitzler Cc: Jiri Olsa Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Thomas Richter Link: https://lore.kernel.org/r/20221206043237.12159-3-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/event_groups.c | 127 ++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 130 insertions(+) create mode 100644 tools/perf/tests/event_groups.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 11b69023011b..658b5052c24d 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -67,6 +67,7 @@ perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o perf-y += sigtrap.o +perf-y += event_groups.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4c6ae59a4dfd..ddd8262bfa26 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -110,6 +110,7 @@ static struct test_suite *generic_tests[] = { &suite__perf_time_to_tsc, &suite__dlfilter, &suite__sigtrap, + &suite__event_groups, NULL, }; diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c new file mode 100644 index 000000000000..612c0444aaa8 --- /dev/null +++ b/tools/perf/tests/event_groups.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "linux/perf_event.h" +#include "tests.h" +#include "debug.h" +#include "pmu.h" +#include "pmus.h" +#include "header.h" +#include "../perf-sys.h" + +/* hw: cycles, sw: context-switch, uncore: [arch dependent] */ +static int types[] = {0, 1, -1}; +static unsigned long configs[] = {0, 3, 0}; + +#define NR_UNCORE_PMUS 5 + +/* Uncore pmus that support more than 3 counters */ +static struct uncore_pmus { + const char *name; + __u64 config; +} uncore_pmus[NR_UNCORE_PMUS] = { + { "amd_l3", 0x0 }, + { "amd_df", 0x0 }, + { "uncore_imc_0", 0x1 }, /* Intel */ + { "core_imc", 0x318 }, /* PowerPC: core_imc/CPM_STCX_FIN/ */ + { "hv_24x7", 0x22000000003 }, /* PowerPC: hv_24x7/CPM_STCX_FIN/ */ +}; + +static int event_open(int type, unsigned long config, int group_fd) +{ + struct perf_event_attr attr; + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.type = type; + attr.size = sizeof(struct perf_event_attr); + attr.config = config; + /* + * When creating an event group, typically the group leader is + * initialized with disabled set to 1 and any child events are + * initialized with disabled set to 0. Despite disabled being 0, + * the child events will not start until the group leader is + * enabled. + */ + attr.disabled = group_fd == -1 ? 1 : 0; + + return sys_perf_event_open(&attr, -1, 0, group_fd, 0); +} + +static int setup_uncore_event(void) +{ + struct perf_pmu *pmu; + int i; + + if (list_empty(&pmus)) + perf_pmu__scan(NULL); + + perf_pmus__for_each_pmu(pmu) { + for (i = 0; i < NR_UNCORE_PMUS; i++) { + if (!strcmp(uncore_pmus[i].name, pmu->name)) { + pr_debug("Using %s for uncore pmu event\n", pmu->name); + types[2] = pmu->type; + configs[2] = uncore_pmus[i].config; + return 0; + } + } + } + return -1; +} + +static int run_test(int i, int j, int k) +{ + int erroneous = ((((1 << i) | (1 << j) | (1 << k)) & 5) == 5); + int group_fd, sibling_fd1, sibling_fd2; + + group_fd = event_open(types[i], configs[i], -1); + if (group_fd == -1) + return -1; + + sibling_fd1 = event_open(types[j], configs[j], group_fd); + if (sibling_fd1 == -1) { + close(group_fd); + return erroneous ? 0 : -1; + } + + sibling_fd2 = event_open(types[k], configs[k], group_fd); + if (sibling_fd2 == -1) { + close(sibling_fd1); + close(group_fd); + return erroneous ? 0 : -1; + } + + close(sibling_fd2); + close(sibling_fd1); + close(group_fd); + return erroneous ? -1 : 0; +} + +static int test__event_groups(struct test_suite *text __maybe_unused, int subtest __maybe_unused) +{ + int i, j, k; + int ret; + int r; + + ret = setup_uncore_event(); + if (ret || types[2] == -1) + return TEST_SKIP; + + ret = TEST_OK; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + for (k = 0; k < 3; k++) { + r = run_test(i, j, k); + if (r) + ret = TEST_FAIL; + + pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n", + types[i], configs[i], types[j], configs[j], + types[k], configs[k], r ? "Fail" : "Pass"); + } + } + } + return ret; +} + +DEFINE_SUITE("Event groups", event_groups); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e15f24cfc909..fb4b5ad4dd0f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -147,6 +147,7 @@ DECLARE_SUITE(expand_cgroup_events); DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); DECLARE_SUITE(sigtrap); +DECLARE_SUITE(event_groups); /* * PowerPC and S390 do not support creation of instruction breakpoints using the From cc2367eebb0c3c5501cddd5823e5feda7b57f706 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Dec 2022 13:49:04 -0300 Subject: [PATCH 3864/4122] machine: Adopt is_lock_function() from builtin-lock.c It is used in bpf_lock_contention.c and builtin-lock.c will be made CONFIG_LIBTRACEEVENT=y conditional, so move it to machine.c, that is always available. This makes those 4 global variables for sched and lock text start and end to move to 'struct machine' too, as conceivably we can have that info for several machine instances, say some 'perf diff' like tool. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 58 +-------------------------- tools/perf/util/bpf_lock_contention.c | 2 +- tools/perf/util/lock-contention.h | 2 - tools/perf/util/machine.c | 40 ++++++++++++++++++ tools/perf/util/machine.h | 5 +++ 5 files changed, 48 insertions(+), 59 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 0d280093b19a..15ce6358f127 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -67,11 +67,6 @@ static enum { LOCK_AGGR_CALLER, } aggr_mode = LOCK_AGGR_ADDR; -static u64 sched_text_start; -static u64 sched_text_end; -static u64 lock_text_start; -static u64 lock_text_end; - static struct thread_stat *thread_stat_find(u32 tid) { struct rb_node *node; @@ -854,55 +849,6 @@ end: return 0; } -bool is_lock_function(struct machine *machine, u64 addr) -{ - if (!sched_text_start) { - struct map *kmap; - struct symbol *sym; - - sym = machine__find_kernel_symbol_by_name(machine, - "__sched_text_start", - &kmap); - if (!sym) { - /* to avoid retry */ - sched_text_start = 1; - return false; - } - - sched_text_start = kmap->unmap_ip(kmap, sym->start); - - /* should not fail from here */ - sym = machine__find_kernel_symbol_by_name(machine, - "__sched_text_end", - &kmap); - sched_text_end = kmap->unmap_ip(kmap, sym->start); - - sym = machine__find_kernel_symbol_by_name(machine, - "__lock_text_start", - &kmap); - lock_text_start = kmap->unmap_ip(kmap, sym->start); - - sym = machine__find_kernel_symbol_by_name(machine, - "__lock_text_end", - &kmap); - lock_text_end = kmap->unmap_ip(kmap, sym->start); - } - - /* failed to get kernel symbols */ - if (sched_text_start == 1) - return false; - - /* mutex and rwsem functions are in sched text section */ - if (sched_text_start <= addr && addr < sched_text_end) - return true; - - /* spinlock functions are in lock text section */ - if (lock_text_start <= addr && addr < lock_text_end) - return true; - - return false; -} - static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, char *buf, int size) { @@ -961,7 +907,7 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl goto next; sym = node->ms.sym; - if (sym && !is_lock_function(machine, node->ip)) { + if (sym && !machine__is_lock_function(machine, node->ip)) { get_symbol_name_offset(node->ms.map, sym, node->ip, buf, size); return 0; @@ -1007,7 +953,7 @@ static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) if (++skip <= stack_skip) goto next; - if (node->ms.sym && is_lock_function(machine, node->ip)) + if (node->ms.sym && machine__is_lock_function(machine, node->ip)) goto next; hash ^= hash_long((unsigned long)node->ip, 64); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 4db9ad3d50c4..f4ebb9a2e380 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -153,7 +153,7 @@ int lock_contention_read(struct lock_contention *con) bpf_map_lookup_elem(stack, &key, stack_trace); /* skip lock internal functions */ - while (is_lock_function(machine, stack_trace[idx]) && + while (machine__is_lock_function(machine, stack_trace[idx]) && idx < con->max_stack - 1) idx++; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index e3c061b1795b..a2346875098d 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -145,6 +145,4 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse #endif /* HAVE_BPF_SKEL */ -bool is_lock_function(struct machine *machine, u64 addr); - #endif /* PERF_LOCK_CONTENTION_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 76316e459c3d..803c9d1803dd 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -3336,3 +3336,43 @@ int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, voi } return err; } + +bool machine__is_lock_function(struct machine *machine, u64 addr) +{ + if (!machine->sched.text_start) { + struct map *kmap; + struct symbol *sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_start", &kmap); + + if (!sym) { + /* to avoid retry */ + machine->sched.text_start = 1; + return false; + } + + machine->sched.text_start = kmap->unmap_ip(kmap, sym->start); + + /* should not fail from here */ + sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap); + machine->sched.text_end = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_start", &kmap); + machine->lock.text_start = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap); + machine->lock.text_end = kmap->unmap_ip(kmap, sym->start); + } + + /* failed to get kernel symbols */ + if (machine->sched.text_start == 1) + return false; + + /* mutex and rwsem functions are in sched text section */ + if (machine->sched.text_start <= addr && addr < machine->sched.text_end) + return true; + + /* spinlock functions are in lock text section */ + if (machine->lock.text_start <= addr && addr < machine->lock.text_end) + return true; + + return false; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 6267c1d6f232..d034ecaf89c1 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -56,6 +56,10 @@ struct machine { struct maps *kmaps; struct map *vmlinux_map; u64 kernel_start; + struct { + u64 text_start; + u64 text_end; + } sched, lock; pid_t *current_tid; size_t current_tid_sz; union { /* Tool specific area */ @@ -212,6 +216,7 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is_lock_function(struct machine *machine, u64 addr); bool machine__is(struct machine *machine, const char *arch); bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); From 616aa32d6f221faa0235d2586ec1706dca70a439 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 14:59:38 -0800 Subject: [PATCH 3865/4122] perf build: Fixes for LIBTRACEEVENT_DYNAMIC If LIBTRACEEVENT_DYNAMIC is enabled then avoid the install step for the plugins. If disabled correct DESTDIR so that the plugins are installed under /traceevent/plugins. Fixes: ef019df01e207971 ("perf build: Install libtraceevent locally when building") Reported-by: Alexander Gordeev Signed-off-by: Ian Rogers Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221205225940.3079667-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a17a6ea85e81..6689f644782f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -884,7 +884,7 @@ $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \ $(LIBTRACEEVENT_FLAGS) install endif @@ -1093,7 +1093,11 @@ install-tests: all install-gtk $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' $(Q)$(MAKE) -C tests/shell/coresight install-tests -install-bin: install-tools install-tests install-traceevent-plugins +install-bin: install-tools install-tests + +ifndef LIBTRACEEVENT_DYNAMIC +install-bin: install-traceevent-plugins +endif install: install-bin try-install-man From b897613510890d6e92b6a276a20f6c3d96fe90e8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Dec 2022 09:58:04 -0800 Subject: [PATCH 3866/4122] perf stat: Update event skip condition for system-wide per-thread mode and merged uncore and hybrid events In print_counter_aggrdata(), it skips some events that has no aggregate count. It's actually for system-wide per-thread mode and merged uncore and hybrid events. Let's update the condition to check them explicitly. Fixes: 91f85f98da7ab8c3 ("perf stat: Display event stats using aggr counts") Reported-by: Athira Jajeev Signed-off-by: Namhyung Kim Acked-by: Athira Jajeev Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221206175804.391387-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ead4915c4a03..8bd8b0142630 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -804,7 +804,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->nr = aggr->nr; os->evsel = counter; - if (counter->supported && aggr->nr == 0) + /* Skip already merged uncore/hybrid events */ + if (counter->merged_stat) return; uniquify_counter(config, counter); @@ -813,6 +814,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; + /* + * Skip value 0 when enabling --per-thread globally, otherwise it will + * have too many 0 output. + */ + if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide) + return; + if (!metric_only) { if (config->json_output) fputc('{', output); @@ -889,9 +897,6 @@ static void print_aggr(struct perf_stat_config *config, print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { - if (counter->merged_stat) - continue; - print_counter_aggrdata(config, counter, s, os); } print_metric_end(config, os); @@ -918,9 +923,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { - if (counter->merged_stat) - continue; - if (counter->cgrp != os->cgrp) continue; @@ -940,9 +942,6 @@ static void print_counter(struct perf_stat_config *config, if (!config->aggr_map) return; - if (counter->merged_stat) - return; - for (s = 0; s < config->aggr_map->nr; s++) { print_counter_aggrdata(config, counter, s, os); } From 40769665b63d8c84b5b1c63fee404d4c20cff751 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 6 Dec 2022 21:59:08 -0800 Subject: [PATCH 3867/4122] perf jevents: Parse metrics during conversion Currently the 'MetricExpr' json value is passed from the json file to the pmu-events.c. This change introduces an expression tree that is parsed into. The parsing is done largely by using operator overloading and python's 'eval' function. Two advantages in doing this are: 1) Broken metrics fail at compile time rather than relying on `perf test` to detect. `perf test` remains relevant for checking event encoding and actual metric use. 2) The conversion to a string from the tree can minimize the metric's string size, for example, preferring 1e6 over 1000000, avoiding multiplication by 1 and removing unnecessary whitespace. On x86 this reduces the string size by 2,930bytes (0.07%). In future changes it would be possible to programmatically generate the json expressions (a single line of text and so a pain to write manually) for an architecture using the expression tree. This could avoid copy-pasting metrics for all architecture variants. v4. Doesn't simplify "0*SLOTS" to 0, as the pattern is used to fix Intel metrics with topdown events. v3. Avoids generic types on standard types like set that aren't supported until Python 3.9, fixing an issue with Python 3.6 reported-by John Garry. v3 also fixes minor pylint issues and adds a call to Simplify on the read expression tree. v2. Improvements to type information. Committer notes: Added one-line fixer from Ian, see first Link: tag below. Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Link: https://lore.kernel.org/r/CAP-5=fWa=zNK_ecpWGoGggHCQx7z-oW0eGMQf19Maywg0QK=4g@mail.gmail.com Link: https://lore.kernel.org/r/20221207055908.1385448-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/Build | 2 +- tools/perf/pmu-events/jevents.py | 12 +- tools/perf/pmu-events/metric.py | 502 +++++++++++++++++++++++++++ tools/perf/pmu-events/metric_test.py | 157 +++++++++ 4 files changed, 669 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/metric.py create mode 100644 tools/perf/pmu-events/metric_test.py diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 04ef95174660..15b9e8fdbffa 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -21,7 +21,7 @@ $(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c $(call rule_mkdir) $(Q)$(call echo-cmd,gen)cp $< $@ else -$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) +$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py $(call rule_mkdir) $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@ endif diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 0daa3e007528..4c398e0eeb2f 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -4,6 +4,7 @@ import argparse import csv import json +import metric import os import sys from typing import (Callable, Dict, Optional, Sequence, Set, Tuple) @@ -268,9 +269,10 @@ class JsonEvent: self.metric_name = jd.get('MetricName') self.metric_group = jd.get('MetricGroup') self.metric_constraint = jd.get('MetricConstraint') - self.metric_expr = jd.get('MetricExpr') - if self.metric_expr: - self.metric_expr = self.metric_expr.replace('\\', '\\\\') + self.metric_expr = None + if 'MetricExpr' in jd: + self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify() + arch_std = jd.get('ArchStdEvent') if precise and self.desc and '(Precise Event)' not in self.desc: extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise ' @@ -322,6 +324,10 @@ class JsonEvent: s = '' for attr in _json_event_attributes: x = getattr(self, attr) + if x and attr == 'metric_expr': + # Convert parsed metric expressions into a string. Slashes + # must be doubled in the file. + x = x.ToPerfJson().replace('\\', '\\\\') s += f'{x}\\000' if x else '\\000' return s diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py new file mode 100644 index 000000000000..4797ed4fd817 --- /dev/null +++ b/tools/perf/pmu-events/metric.py @@ -0,0 +1,502 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +"""Parse or generate representations of perf metrics.""" +import ast +import decimal +import json +import re +from typing import Dict, List, Optional, Set, Union + + +class Expression: + """Abstract base class of elements in a metric expression.""" + + def ToPerfJson(self) -> str: + """Returns a perf json file encoded representation.""" + raise NotImplementedError() + + def ToPython(self) -> str: + """Returns a python expr parseable representation.""" + raise NotImplementedError() + + def Simplify(self): + """Returns a simplified version of self.""" + raise NotImplementedError() + + def Equals(self, other) -> bool: + """Returns true when two expressions are the same.""" + raise NotImplementedError() + + def __str__(self) -> str: + return self.ToPerfJson() + + def __or__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('|', self, other) + + def __ror__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('|', other, self) + + def __xor__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('^', self, other) + + def __and__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('&', self, other) + + def __lt__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('<', self, other) + + def __gt__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('>', self, other) + + def __add__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('+', self, other) + + def __radd__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('+', other, self) + + def __sub__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('-', self, other) + + def __rsub__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('-', other, self) + + def __mul__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('*', self, other) + + def __rmul__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('*', other, self) + + def __truediv__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('/', self, other) + + def __rtruediv__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('/', other, self) + + def __mod__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('%', self, other) + + +def _Constify(val: Union[bool, int, float, Expression]) -> Expression: + """Used to ensure that the nodes in the expression tree are all Expression.""" + if isinstance(val, bool): + return Constant(1 if val else 0) + if isinstance(val, (int, float)): + return Constant(val) + return val + + +# Simple lookup for operator precedence, used to avoid unnecessary +# brackets. Precedence matches that of python and the simple expression parser. +_PRECEDENCE = { + '|': 0, + '^': 1, + '&': 2, + '<': 3, + '>': 3, + '+': 4, + '-': 4, + '*': 5, + '/': 5, + '%': 5, +} + + +class Operator(Expression): + """Represents a binary operator in the parse tree.""" + + def __init__(self, operator: str, lhs: Union[int, float, Expression], + rhs: Union[int, float, Expression]): + self.operator = operator + self.lhs = _Constify(lhs) + self.rhs = _Constify(rhs) + + def Bracket(self, + other: Expression, + other_str: str, + rhs: bool = False) -> str: + """If necessary brackets the given other value. + + If ``other`` is an operator then a bracket is necessary when + this/self operator has higher precedence. Consider: '(a + b) * c', + ``other_str`` will be 'a + b'. A bracket is necessary as without + the bracket 'a + b * c' will evaluate 'b * c' first. However, '(a + * b) + c' doesn't need a bracket as 'a * b' will always be + evaluated first. For 'a / (b * c)' (ie the same precedence level + operations) then we add the bracket to best match the original + input, but not for '(a / b) * c' where the bracket is unnecessary. + + Args: + other (Expression): is a lhs or rhs operator + other_str (str): ``other`` in the appropriate string form + rhs (bool): is ``other`` on the RHS + + Returns: + str: possibly bracketed other_str + """ + if isinstance(other, Operator): + if _PRECEDENCE.get(self.operator, -1) > _PRECEDENCE.get( + other.operator, -1): + return f'({other_str})' + if rhs and _PRECEDENCE.get(self.operator, -1) == _PRECEDENCE.get( + other.operator, -1): + return f'({other_str})' + return other_str + + def ToPerfJson(self): + return (f'{self.Bracket(self.lhs, self.lhs.ToPerfJson())} {self.operator} ' + f'{self.Bracket(self.rhs, self.rhs.ToPerfJson(), True)}') + + def ToPython(self): + return (f'{self.Bracket(self.lhs, self.lhs.ToPython())} {self.operator} ' + f'{self.Bracket(self.rhs, self.rhs.ToPython(), True)}') + + def Simplify(self) -> Expression: + lhs = self.lhs.Simplify() + rhs = self.rhs.Simplify() + if isinstance(lhs, Constant) and isinstance(rhs, Constant): + return Constant(ast.literal_eval(lhs + self.operator + rhs)) + + if isinstance(self.lhs, Constant): + if self.operator in ('+', '|') and lhs.value == '0': + return rhs + + # Simplify multiplication by 0 except for the slot event which + # is deliberately introduced using this pattern. + if self.operator == '*' and lhs.value == '0' and ( + not isinstance(rhs, Event) or 'slots' not in rhs.name.lower()): + return Constant(0) + + if self.operator == '*' and lhs.value == '1': + return rhs + + if isinstance(rhs, Constant): + if self.operator in ('+', '|') and rhs.value == '0': + return lhs + + if self.operator == '*' and rhs.value == '0': + return Constant(0) + + if self.operator == '*' and self.rhs.value == '1': + return lhs + + return Operator(self.operator, lhs, rhs) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Operator): + return self.operator == other.operator and self.lhs.Equals( + other.lhs) and self.rhs.Equals(other.rhs) + return False + + +class Select(Expression): + """Represents a select ternary in the parse tree.""" + + def __init__(self, true_val: Union[int, float, Expression], + cond: Union[int, float, Expression], + false_val: Union[int, float, Expression]): + self.true_val = _Constify(true_val) + self.cond = _Constify(cond) + self.false_val = _Constify(false_val) + + def ToPerfJson(self): + true_str = self.true_val.ToPerfJson() + cond_str = self.cond.ToPerfJson() + false_str = self.false_val.ToPerfJson() + return f'({true_str} if {cond_str} else {false_str})' + + def ToPython(self): + return (f'Select({self.true_val.ToPython()}, {self.cond.ToPython()}, ' + f'{self.false_val.ToPython()})') + + def Simplify(self) -> Expression: + cond = self.cond.Simplify() + true_val = self.true_val.Simplify() + false_val = self.false_val.Simplify() + if isinstance(cond, Constant): + return false_val if cond.value == '0' else true_val + + if true_val.Equals(false_val): + return true_val + + return Select(true_val, cond, false_val) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Select): + return self.cond.Equals(other.cond) and self.false_val.Equals( + other.false_val) and self.true_val.Equals(other.true_val) + return False + + +class Function(Expression): + """A function in an expression like min, max, d_ratio.""" + + def __init__(self, + fn: str, + lhs: Union[int, float, Expression], + rhs: Optional[Union[int, float, Expression]] = None): + self.fn = fn + self.lhs = _Constify(lhs) + self.rhs = _Constify(rhs) + + def ToPerfJson(self): + if self.rhs: + return f'{self.fn}({self.lhs.ToPerfJson()}, {self.rhs.ToPerfJson()})' + return f'{self.fn}({self.lhs.ToPerfJson()})' + + def ToPython(self): + if self.rhs: + return f'{self.fn}({self.lhs.ToPython()}, {self.rhs.ToPython()})' + return f'{self.fn}({self.lhs.ToPython()})' + + def Simplify(self) -> Expression: + lhs = self.lhs.Simplify() + rhs = self.rhs.Simplify() if self.rhs else None + if isinstance(lhs, Constant) and isinstance(rhs, Constant): + if self.fn == 'd_ratio': + if rhs.value == '0': + return Constant(0) + Constant(ast.literal_eval(f'{lhs} / {rhs}')) + return Constant(ast.literal_eval(f'{self.fn}({lhs}, {rhs})')) + + return Function(self.fn, lhs, rhs) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Function): + return self.fn == other.fn and self.lhs.Equals( + other.lhs) and self.rhs.Equals(other.rhs) + return False + + +def _FixEscapes(s: str) -> str: + s = re.sub(r'([^\\]),', r'\1\\,', s) + return re.sub(r'([^\\])=', r'\1\\=', s) + + +class Event(Expression): + """An event in an expression.""" + + def __init__(self, name: str, legacy_name: str = ''): + self.name = _FixEscapes(name) + self.legacy_name = _FixEscapes(legacy_name) + + def ToPerfJson(self): + result = re.sub('/', '@', self.name) + return result + + def ToPython(self): + return f'Event(r"{self.name}")' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Event) and self.name == other.name + + +class Constant(Expression): + """A constant within the expression tree.""" + + def __init__(self, value: Union[float, str]): + ctx = decimal.Context() + ctx.prec = 20 + dec = ctx.create_decimal(repr(value) if isinstance(value, float) else value) + self.value = dec.normalize().to_eng_string() + self.value = self.value.replace('+', '') + self.value = self.value.replace('E', 'e') + + def ToPerfJson(self): + return self.value + + def ToPython(self): + return f'Constant({self.value})' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Constant) and self.value == other.value + + +class Literal(Expression): + """A runtime literal within the expression tree.""" + + def __init__(self, value: str): + self.value = value + + def ToPerfJson(self): + return self.value + + def ToPython(self): + return f'Literal({self.value})' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Literal) and self.value == other.value + + +def min(lhs: Union[int, float, Expression], rhs: Union[int, float, + Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('min', lhs, rhs) + + +def max(lhs: Union[int, float, Expression], rhs: Union[int, float, + Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('max', lhs, rhs) + + +def d_ratio(lhs: Union[int, float, Expression], + rhs: Union[int, float, Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('d_ratio', lhs, rhs) + + +def source_count(event: Event) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('source_count', event) + + +class Metric: + """An individual metric that will specifiable on the perf command line.""" + groups: Set[str] + expr: Expression + scale_unit: str + constraint: bool + + def __init__(self, + name: str, + description: str, + expr: Expression, + scale_unit: str, + constraint: bool = False): + self.name = name + self.description = description + self.expr = expr.Simplify() + # Workraound valid_only_metric hiding certain metrics based on unit. + scale_unit = scale_unit.replace('/sec', ' per sec') + if scale_unit[0].isdigit(): + self.scale_unit = scale_unit + else: + self.scale_unit = f'1{scale_unit}' + self.constraint = constraint + self.groups = set() + + def __lt__(self, other): + """Sort order.""" + return self.name < other.name + + def AddToMetricGroup(self, group): + """Callback used when being added to a MetricGroup.""" + self.groups.add(group.name) + + def Flatten(self) -> Set['Metric']: + """Return a leaf metric.""" + return set([self]) + + def ToPerfJson(self) -> Dict[str, str]: + """Return as dictionary for Json generation.""" + result = { + 'MetricName': self.name, + 'MetricGroup': ';'.join(sorted(self.groups)), + 'BriefDescription': self.description, + 'MetricExpr': self.expr.ToPerfJson(), + 'ScaleUnit': self.scale_unit + } + if self.constraint: + result['MetricConstraint'] = 'NO_NMI_WATCHDOG' + + return result + + +class _MetricJsonEncoder(json.JSONEncoder): + """Special handling for Metric objects.""" + + def default(self, o): + if isinstance(o, Metric): + return o.ToPerfJson() + return json.JSONEncoder.default(self, o) + + +class MetricGroup: + """A group of metrics. + + Metric groups may be specificd on the perf command line, but within + the json they aren't encoded. Metrics may be in multiple groups + which can facilitate arrangements similar to trees. + """ + + def __init__(self, name: str, metric_list: List[Union[Metric, + 'MetricGroup']]): + self.name = name + self.metric_list = metric_list + for metric in metric_list: + metric.AddToMetricGroup(self) + + def AddToMetricGroup(self, group): + """Callback used when a MetricGroup is added into another.""" + for metric in self.metric_list: + metric.AddToMetricGroup(group) + + def Flatten(self) -> Set[Metric]: + """Returns a set of all leaf metrics.""" + result = set() + for x in self.metric_list: + result = result.union(x.Flatten()) + + return result + + def ToPerfJson(self) -> str: + return json.dumps(sorted(self.Flatten()), indent=2, cls=_MetricJsonEncoder) + + def __str__(self) -> str: + return self.ToPerfJson() + + +class _RewriteIfExpToSelect(ast.NodeTransformer): + + def visit_IfExp(self, node): + # pylint: disable=invalid-name + self.generic_visit(node) + call = ast.Call( + func=ast.Name(id='Select', ctx=ast.Load()), + args=[node.body, node.test, node.orelse], + keywords=[]) + ast.copy_location(call, node.test) + return call + + +def ParsePerfJson(orig: str) -> Expression: + """A simple json metric expression decoder. + + Converts a json encoded metric expression by way of python's ast and + eval routine. First tokens are mapped to Event calls, then + accidentally converted keywords or literals are mapped to their + appropriate calls. Python's ast is used to match if-else that can't + be handled via operator overloading. Finally the ast is evaluated. + + Args: + orig (str): String to parse. + + Returns: + Expression: The parsed string. + """ + # pylint: disable=eval-used + py = orig.strip() + py = re.sub(r'([a-zA-Z][^-+/\* \\\(\),]*(?:\\.[^-+/\* \\\(\),]*)*)', + r'Event(r"\1")', py) + py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py) + py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py) + keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count'] + for kw in keywords: + py = re.sub(rf'Event\(r"{kw}"\)', kw, py) + + parsed = ast.parse(py, mode='eval') + _RewriteIfExpToSelect().visit(parsed) + parsed = ast.fix_missing_locations(parsed) + return _Constify(eval(compile(parsed, orig, 'eval'))) diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py new file mode 100644 index 000000000000..15315d0f716c --- /dev/null +++ b/tools/perf/pmu-events/metric_test.py @@ -0,0 +1,157 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +import unittest +from metric import Constant +from metric import Event +from metric import ParsePerfJson + + +class TestMetricExpressions(unittest.TestCase): + + def test_Operators(self): + a = Event('a') + b = Event('b') + self.assertEqual((a | b).ToPerfJson(), 'a | b') + self.assertEqual((a ^ b).ToPerfJson(), 'a ^ b') + self.assertEqual((a & b).ToPerfJson(), 'a & b') + self.assertEqual((a < b).ToPerfJson(), 'a < b') + self.assertEqual((a > b).ToPerfJson(), 'a > b') + self.assertEqual((a + b).ToPerfJson(), 'a + b') + self.assertEqual((a - b).ToPerfJson(), 'a - b') + self.assertEqual((a * b).ToPerfJson(), 'a * b') + self.assertEqual((a / b).ToPerfJson(), 'a / b') + self.assertEqual((a % b).ToPerfJson(), 'a % b') + one = Constant(1) + self.assertEqual((a + one).ToPerfJson(), 'a + 1') + + def test_Brackets(self): + a = Event('a') + b = Event('b') + c = Event('c') + self.assertEqual((a * b + c).ToPerfJson(), 'a * b + c') + self.assertEqual((a + b * c).ToPerfJson(), 'a + b * c') + self.assertEqual(((a + a) + a).ToPerfJson(), 'a + a + a') + self.assertEqual(((a + b) * c).ToPerfJson(), '(a + b) * c') + self.assertEqual((a + (b * c)).ToPerfJson(), 'a + b * c') + self.assertEqual(((a / b) * c).ToPerfJson(), 'a / b * c') + self.assertEqual((a / (b * c)).ToPerfJson(), 'a / (b * c)') + + def test_ParsePerfJson(self): + # Based on an example of a real metric. + before = '(a + b + c + d) / (2 * e)' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle events with '-' in their name. Note, in + # the json file the '\' are doubled to '\\'. + before = r'topdown\-fe\-bound / topdown\-slots - 1' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle escaped modifiers. Note, in the json file + # the '\' are doubled to '\\'. + before = r'arb@event\=0x81\,umask\=0x1@ + arb@event\=0x84\,umask\=0x1@' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle exponents in numbers. + before = r'a + 1e12 + b' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + def test_IfElseTests(self): + # if-else needs rewriting to Select and back. + before = r'Event1 if #smt_on else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before = r'Event1 if 0 else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before = r'Event1 if 1 else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Ensure the select is evaluate last. + before = r'Event1 + 1 if Event2 < 2 else Event3 + 3' + after = (r'Select(Event(r"Event1") + Constant(1), Event(r"Event2") < ' + r'Constant(2), Event(r"Event3") + Constant(3))') + self.assertEqual(ParsePerfJson(before).ToPython(), after) + + before = r'Event1 > 1 if Event2 < 2 else Event3 > 3' + after = (r'Select(Event(r"Event1") > Constant(1), Event(r"Event2") < ' + r'Constant(2), Event(r"Event3") > Constant(3))') + self.assertEqual(ParsePerfJson(before).ToPython(), after) + + before = r'min(a + b if c > 1 else c + d, e + f)' + after = r'min((a + b if c > 1 else c + d), e + f)' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before =3D r'a if b else c if d else e' + after =3D r'(a if b else (c if d else e))' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + def test_ToPython(self): + # pylint: disable=eval-used + # Based on an example of a real metric. + before = '(a + b + c + d) / (2 * e)' + py = ParsePerfJson(before).ToPython() + after = eval(py).ToPerfJson() + self.assertEqual(before, after) + + def test_Simplify(self): + before = '1 + 2 + 3' + after = '6' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a + 0' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 + a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a | 0' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 | a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a * 0' + after = '0' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 * a' + after = '0' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a * 1' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '1 * a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if 0 else b' + after = 'b' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if 1 else b' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if b else a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + # Pattern used to add a slots event to metrics that require it. + before = '0 * SLOTS' + after = '0 * SLOTS' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + +if __name__ == '__main__': + unittest.main() From 378ef0f5d9d7f4652d7a40e0711e8b845ada1cbd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 14:59:39 -0800 Subject: [PATCH 3868/4122] perf build: Use libtraceevent from the system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the LIBTRACEEVENT_DYNAMIC and LIBTRACEFS_DYNAMIC make command line variables. If libtraceevent isn't installed or NO_LIBTRACEEVENT=1 is passed to the build, don't compile in libtraceevent and libtracefs support. This also disables CONFIG_TRACE that controls "perf trace". CONFIG_LIBTRACEEVENT is used to control enablement in Build/Makefiles, HAVE_LIBTRACEEVENT is used in C code. Without HAVE_LIBTRACEEVENT tracepoints are disabled and as such the commands kmem, kwork, lock, sched and timechart are removed. The majority of commands continue to work including "perf test". Committer notes: Fixed up a tools/perf/util/Build reject and added: #include to tools/perf/util/scripting-engines/trace-event-perl.c. Committer testing: $ rpm -qi libtraceevent-devel Name : libtraceevent-devel Version : 1.5.3 Release : 2.fc36 Architecture: x86_64 Install Date: Mon 25 Jul 2022 03:20:19 PM -03 Group : Unspecified Size : 27728 License : LGPLv2+ and GPLv2+ Signature : RSA/SHA256, Fri 15 Apr 2022 02:11:58 PM -03, Key ID 999f7cbf38ab71f4 Source RPM : libtraceevent-1.5.3-2.fc36.src.rpm Build Date : Fri 15 Apr 2022 10:57:01 AM -03 Build Host : buildvm-x86-05.iad2.fedoraproject.org Packager : Fedora Project Vendor : Fedora Project URL : https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ Bug URL : https://bugz.fedoraproject.org/libtraceevent Summary : Development headers of libtraceevent Description : Development headers of libtraceevent-libs $ Default build: $ ldd ~/bin/perf | grep tracee libtraceevent.so.1 => /lib64/libtraceevent.so.1 (0x00007f1dcaf8f000) $ # perf trace -e sched:* --max-events 10 0.000 migration/0/17 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, dest_cpu: 1) 0.005 migration/0/17 sched:sched_wake_idle_without_ipi(cpu: 1) 0.011 migration/0/17 sched:sched_switch(prev_comm: "", prev_pid: 17 (migration/0), prev_state: 1, next_comm: "", next_prio: 120) 1.173 :0/0 sched:sched_wakeup(comm: "", pid: 3138 (gnome-terminal-), prio: 120) 1.180 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 3138 (gnome-terminal-), next_prio: 120) 0.156 migration/1/21 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, orig_cpu: 1, dest_cpu: 2) 0.160 migration/1/21 sched:sched_wake_idle_without_ipi(cpu: 2) 0.166 migration/1/21 sched:sched_switch(prev_comm: "", prev_pid: 21 (migration/1), prev_state: 1, next_comm: "", next_prio: 120) 1.183 :0/0 sched:sched_wakeup(comm: "", pid: 1602985 (kworker/u16:0-f), prio: 120, target_cpu: 1) 1.186 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 1602985 (kworker/u16:0-f), next_prio: 120) # Had to tweak tools/perf/util/setup.py to make sure the python binding shared object links with libtraceevent if -DHAVE_LIBTRACEEVENT is present in CFLAGS. Building with NO_LIBTRACEEVENT=1 uncovered some more build failures: - Make building of data-convert-bt.c to CONFIG_LIBTRACEEVENT=y - perf-$(CONFIG_LIBTRACEEVENT) += scripts/ - bpf_kwork.o needs also to be dependent on CONFIG_LIBTRACEEVENT=y - The python binding needed some fixups and util/trace-event.c can't be built and linked with the python binding shared object, so remove it in tools/perf/util/setup.py and exclude it from the list of dependencies in the python/perf.so Makefile.perf target. Building without libtraceevent-devel installed uncovered more build failures: - The python binding tools/perf/util/python.c was assuming that traceevent/parse-events.h was always available, which was the case when we defaulted to using the in-kernel tools/lib/traceevent/ files, now we need to enclose it under ifdef HAVE_LIBTRACEEVENT, just like the other parts of it that deal with tracepoints. - We have to ifdef the rules in the Build files with CONFIG_LIBTRACEEVENT=y to build builtin-trace.c and tools/perf/trace/beauty/ as we only ifdef setting CONFIG_TRACE=y when setting NO_LIBTRACEEVENT=1 in the make command line, not when we don't detect libtraceevent-devel installed in the system. Simplification here to avoid these two ways of disabling builtin-trace.c and not having CONFIG_TRACE=y when libtraceevent-devel isn't installed is the clean way. From Athira: tools/perf/arch/powerpc/util/Build -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o Then, ditto for arm64 and s390, detected by container cross build tests. - s/390 uses test__checkevent_tracepoint() that is now only available if HAVE_LIBTRACEEVENT is defined, enclose the callsite with ifder HAVE_LIBTRACEEVENT. Also from Athira: With this change, I could successfully compile in these environment: - Without libtraceevent-devel installed - With libtraceevent-devel installed - With “make NO_LIBTRACEEVENT=1” Then, finally rename CONFIG_TRACEEVENT to CONFIG_LIBTRACEEVENT for consistency with other libraries detected in tools/perf/. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Athira Rajeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221205225940.3079667-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 20 ++-- tools/perf/Makefile.config | 37 ++++--- tools/perf/Makefile.perf | 104 ++---------------- tools/perf/arch/arm64/util/Build | 2 +- tools/perf/arch/powerpc/util/Build | 2 +- tools/perf/arch/s390/util/Build | 2 +- tools/perf/arch/x86/util/Build | 2 +- tools/perf/arch/x86/util/intel-pt.c | 4 + tools/perf/builtin-annotate.c | 2 + tools/perf/builtin-data.c | 5 +- tools/perf/builtin-inject.c | 8 ++ tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 12 +- tools/perf/builtin-kwork.c | 1 + tools/perf/builtin-record.c | 2 + tools/perf/builtin-report.c | 9 +- tools/perf/builtin-script.c | 19 +++- tools/perf/builtin-timechart.c | 1 + tools/perf/builtin-trace.c | 5 +- tools/perf/builtin-version.c | 1 + tools/perf/perf.c | 24 +++- .../perf/scripts/python/Perf-Trace-Util/Build | 2 +- tools/perf/tests/Build | 12 +- tools/perf/tests/builtin-test.c | 6 + tools/perf/tests/parse-events.c | 23 +++- tools/perf/util/Build | 21 ++-- tools/perf/util/data-convert-bt.c | 5 +- tools/perf/util/data-convert-json.c | 9 +- tools/perf/util/evlist.c | 6 +- tools/perf/util/evlist.h | 4 + tools/perf/util/evsel.c | 11 +- tools/perf/util/evsel.h | 12 +- tools/perf/util/evsel_fprintf.c | 7 +- tools/perf/util/header.c | 19 ++++ tools/perf/util/header.h | 2 + tools/perf/util/intel-pt.c | 7 +- tools/perf/util/parse-events.c | 15 +++ tools/perf/util/parse-events.h | 1 - tools/perf/util/python.c | 10 ++ tools/perf/util/scripting-engines/Build | 6 +- .../util/scripting-engines/trace-event-perl.c | 1 + .../scripting-engines/trace-event-python.c | 1 + tools/perf/util/session.c | 2 + tools/perf/util/session.h | 2 + tools/perf/util/setup.py | 10 +- tools/perf/util/sort.c | 60 ++++++++-- tools/perf/util/synthetic-events.c | 6 + tools/perf/util/trace-event-parse.c | 2 + tools/perf/util/trace-event-read.c | 1 + tools/perf/util/trace-event-scripting.c | 1 + tools/perf/util/trace-event.c | 1 - tools/perf/util/trace-event.h | 11 +- 52 files changed, 355 insertions(+), 184 deletions(-) diff --git a/tools/perf/Build b/tools/perf/Build index 496b096153bb..6dd67e502295 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -5,7 +5,6 @@ perf-y += builtin-diff.o perf-y += builtin-evlist.o perf-y += builtin-ftrace.o perf-y += builtin-help.o -perf-y += builtin-sched.o perf-y += builtin-buildid-list.o perf-y += builtin-buildid-cache.o perf-y += builtin-kallsyms.o @@ -13,11 +12,8 @@ perf-y += builtin-list.o perf-y += builtin-record.o perf-y += builtin-report.o perf-y += builtin-stat.o -perf-y += builtin-timechart.o perf-y += builtin-top.o perf-y += builtin-script.o -perf-y += builtin-kmem.o -perf-y += builtin-lock.o perf-y += builtin-kvm.o perf-y += builtin-inject.o perf-y += builtin-mem.o @@ -25,9 +21,18 @@ perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o perf-y += builtin-daemon.o -perf-y += builtin-kwork.o -perf-$(CONFIG_TRACE) += builtin-trace.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-kmem.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-kwork.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-lock.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-sched.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-timechart.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_TRACE) += builtin-trace.o + perf-$(CONFIG_TRACE) += trace/beauty/ +endif + perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ @@ -51,7 +56,6 @@ CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" perf-y += util/ perf-y += arch/ perf-y += ui/ -perf-y += scripts/ -perf-$(CONFIG_TRACE) += trace/beauty/ +perf-$(CONFIG_LIBTRACEEVENT) += scripts/ gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9cc3c48f3288..680228e19c1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -763,18 +763,20 @@ ifndef NO_LIBUNWIND EXTLIBS += $(EXTLIBS_LIBUNWIND) endif -ifeq ($(NO_SYSCALL_TABLE),0) - $(call detected,CONFIG_TRACE) -else - ifndef NO_LIBAUDIT - $(call feature_check,libaudit) - ifneq ($(feature-libaudit), 1) - msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); - NO_LIBAUDIT := 1 - else - CFLAGS += -DHAVE_LIBAUDIT_SUPPORT - EXTLIBS += -laudit - $(call detected,CONFIG_TRACE) +ifneq ($(NO_LIBTRACEEVENT),1) + ifeq ($(NO_SYSCALL_TABLE),0) + $(call detected,CONFIG_TRACE) + else + ifndef NO_LIBAUDIT + $(call feature_check,libaudit) + ifneq ($(feature-libaudit), 1) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + NO_LIBAUDIT := 1 + else + CFLAGS += -DHAVE_LIBAUDIT_SUPPORT + EXTLIBS += -laudit + $(call detected,CONFIG_TRACE) + endif endif endif endif @@ -1182,9 +1184,11 @@ ifdef LIBPFM4 endif endif -ifdef LIBTRACEEVENT_DYNAMIC +# libtraceevent is a recommended dependency picked up from the system. +ifneq ($(NO_LIBTRACEEVENT),1) $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) + CFLAGS += -DHAVE_LIBTRACEEVENT EXTLIBS += -ltraceevent LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent) LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION))) @@ -1192,12 +1196,11 @@ ifdef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION))) LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) + $(call detected,CONFIG_LIBTRACEEVENT) else - dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); + dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel) endif -endif -ifdef LIBTRACEFS_DYNAMIC $(call feature_check,libtracefs) ifeq ($(feature-libtracefs), 1) EXTLIBS += -ltracefs @@ -1207,8 +1210,6 @@ ifdef LIBTRACEFS_DYNAMIC LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) - else - dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6689f644782f..98f629bbd1aa 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -128,10 +128,6 @@ include ../scripts/utilities.mak # # Define BUILD_BPF_SKEL to enable BPF skeletons # -# Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking -# -# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking -# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -242,10 +238,6 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ -ifndef LIBTRACEEVENT_DYNAMIC -LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ -LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins -endif LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ @@ -295,31 +287,6 @@ SCRIPT_SH += perf-iostat.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -ifndef LIBTRACEEVENT_DYNAMIC -ifneq ($(OUTPUT),) - LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent -else - LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent -endif -LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins -LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT) -LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT) -LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include -LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a -export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list -CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include -# -# The static build has no dynsym table, so this does not work for -# static build. Looks like linker starts to scream about that now -# (in Fedora 26) so we need to switch it off for static build. -DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) -else -LIBTRACEEVENT_DYNAMIC_LIST = -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -endif - ifneq ($(OUTPUT),) LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi else @@ -380,13 +347,14 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so -PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -ifndef LIBTRACEEVENT_DYNAMIC -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) +ifeq ($(CONFIG_LIBTRACEEVENT),y) + PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) else -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) + PYTHON_EXT_SRCS := $(shell grep -v '^\#\|util/trace-event.c' util/python-ext-sources) endif +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) + SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) PROGRAMS += $(OUTPUT)perf @@ -430,9 +398,6 @@ ifndef NO_LIBBPF PERFLIBS += $(LIBBPF) endif endif -ifndef LIBTRACEEVENT_DYNAMIC - PERFLIBS += $(LIBTRACEEVENT) -endif # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -682,9 +647,9 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) # Create python binding output directory if not already present _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python') -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF) +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF) $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \ - CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ + CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS)' \ $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/ @@ -710,8 +675,8 @@ $(PERF_IN): prepare FORCE $(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ $(GTK_IN): FORCE prepare @@ -797,10 +762,6 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(LIBSYMBOL) \ bpf-skel -ifndef LIBTRACEEVENT_DYNAMIC -prepare: $(LIBTRACEEVENT) -endif - $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -856,38 +817,6 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -ifndef LIBTRACEEVENT_DYNAMIC -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' - -$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) - $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \ - $@ install_headers - -$(LIBTRACEEVENT)-clean: - $(call QUIET_CLEAN, libtraceevent) - $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT) - -libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT) - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ - plugins - -libtraceevent_plugins-clean: - $(call QUIET_CLEAN, libtraceevent_plugins) - $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT) - -$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ - $(LIBTRACEEVENT_FLAGS) $@ - -install-traceevent-plugins: libtraceevent_plugins - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \ - $(LIBTRACEEVENT_FLAGS) install -endif - $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ DESTDIR=$(LIBAPI_DESTDIR) prefix= \ @@ -1095,10 +1024,6 @@ install-tests: all install-gtk install-bin: install-tools install-tests -ifndef LIBTRACEEVENT_DYNAMIC -install-bin: install-traceevent-plugins -endif - install: install-bin try-install-man install-python_ext: @@ -1124,11 +1049,6 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ -ifndef LIBTRACEEVENT_DYNAMIC -$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT): - $(Q)$(MKDIR) -p $@ -endif - ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) @@ -1211,10 +1131,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $( $(call QUIET_CLEAN, Documentation) \ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null -ifndef LIBTRACEEVENT_DYNAMIC -clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean -endif - # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) # file if defined, with no further action. @@ -1232,6 +1148,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope FORCE prepare -.PHONY: libtraceevent_plugins archheaders +.PHONY: archheaders endif # force_fixdep diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 337aa9bdf905..78ef7115be3d 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -3,7 +3,7 @@ perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o perf-y += pmu.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 0115f3166568..9889245c555c 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,5 @@ perf-y += header.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 3d9d0f4f72ca..db6884086997 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,5 @@ perf-y += header.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index dbeb04cb336e..195ccfdef7aa 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,7 +1,7 @@ perf-y += header.o perf-y += tsc.o perf-y += pmu.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-y += topdown.o perf-y += machine.o diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index af102f471e9f..1e39a034cee9 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -418,6 +418,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, return 0; } +#ifdef HAVE_LIBTRACEEVENT static int intel_pt_track_switches(struct evlist *evlist) { const char *sched_switch = "sched:sched_switch"; @@ -439,6 +440,7 @@ static int intel_pt_track_switches(struct evlist *evlist) return 0; } +#endif static void intel_pt_valid_str(char *str, size_t len, u64 valid) { @@ -829,6 +831,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ptr->have_sched_switch = 2; } } else { +#ifdef HAVE_LIBTRACEEVENT err = intel_pt_track_switches(evlist); if (err == -EPERM) pr_debug2("Unable to select sched:sched_switch\n"); @@ -836,6 +839,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return err; else ptr->have_sched_switch = 1; +#endif } } diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 517d928c00e3..90458ca6933f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -499,7 +499,9 @@ int cmd_annotate(int argc, const char **argv) .namespaces = perf_event__process_namespaces, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index c22d82d2a73c..b2a9a3b7f68d 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -78,12 +78,13 @@ static int cmd_data_convert(int argc, const char **argv) return bt_convert__perf2json(input_name, to_json, &opts); if (to_ctf) { -#ifdef HAVE_LIBBABELTRACE_SUPPORT +#if defined(HAVE_LIBBABELTRACE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) return bt_convert__perf2ctf(input_name, to_ctf, &opts); #else pr_err("The libbabeltrace support is not compiled in. perf should be " "compiled with environment variables LIBBABELTRACE=1 and " - "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); + "LIBBABELTRACE_DIR=/path/to/libbabeltrace/.\n" + "Check also if libbtraceevent devel files are available.\n"); return -1; #endif } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e254f18986f7..3f4e4dd5abf3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -607,6 +607,7 @@ static int perf_event__repipe_exit(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBTRACEEVENT static int perf_event__repipe_tracing_data(struct perf_session *session, union perf_event *event) { @@ -614,6 +615,7 @@ static int perf_event__repipe_tracing_data(struct perf_session *session, return perf_event__process_tracing_data(session, event); } +#endif static int dso__read_build_id(struct dso *dso) { @@ -807,6 +809,7 @@ static int perf_inject__sched_switch(struct perf_tool *tool, return 0; } +#ifdef HAVE_LIBTRACEEVENT static int perf_inject__sched_stat(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample, @@ -836,6 +839,7 @@ found: build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); return perf_event__repipe(tool, event_sw, &sample_sw, machine); } +#endif static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) { @@ -1961,7 +1965,9 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; +#ifdef HAVE_LIBTRACEEVENT inject->tool.tracing_data = perf_event__repipe_tracing_data; +#endif } output_data_offset = perf_session__data_offset(session->evlist); @@ -1984,8 +1990,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->handler = perf_inject__sched_switch; } else if (!strcmp(name, "sched:sched_process_exit")) evsel->handler = perf_inject__sched_process_exit; +#ifdef HAVE_LIBTRACEEVENT else if (!strncmp(name, "sched:sched_stat_", 17)) evsel->handler = perf_inject__sched_stat; +#endif } } else if (inject->itrace_synth_opts.vm_time_correlation) { session->itrace_synth_opts = &inject->itrace_synth_opts; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ebfab2ca1702..e20656c431a4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -35,6 +35,7 @@ #include #include +#include static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 7d9ec1bac1a2..641e739c717c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -63,7 +63,7 @@ static const char *get_filename_for_perf_kvm(void) return filename; } -#ifdef HAVE_KVM_STAT_SUPPORT +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, @@ -654,7 +654,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events); } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) static int process_lost_event(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -742,7 +742,7 @@ static bool verify_vcpu(int vcpu) return true; } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) /* keeping the max events to a modest level to keep * the processing of samples per mmap smooth. */ @@ -1290,7 +1290,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) return kvm_events_report_vcpu(kvm); } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) static struct evlist *kvm_live_event_list(void) { struct evlist *evlist; @@ -1507,7 +1507,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) if (strlen(argv[1]) > 2 && strstarts("report", argv[1])) return kvm_events_report(&kvm, argc - 1 , argv + 1); -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) if (!strncmp(argv[1], "live", 4)) return kvm_events_live(&kvm, argc - 1 , argv + 1); #endif @@ -1644,7 +1644,7 @@ int cmd_kvm(int argc, const char **argv) return cmd_top(argc, argv); else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0])) return __cmd_buildid_list(file_name, argc, argv); -#ifdef HAVE_KVM_STAT_SUPPORT +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) else if (strlen(argv[0]) > 2 && strstarts("stat", argv[0])) return kvm_cmd_stat(file_name, argc, argv); #endif diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index 0e02b8098644..dc59d75180d1 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -23,6 +23,7 @@ #include #include +#include #include #include diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b7fd7ec586fb..7e17374f6c1a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1701,8 +1701,10 @@ static void record__init_features(struct record *rec) if (rec->no_buildid) perf_header__clear_feat(&session->header, HEADER_BUILD_ID); +#ifdef HAVE_LIBTRACEEVENT if (!have_tracepoints(&rec->evlist->core.entries)) perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); +#endif if (!rec->opts.branch_stack) perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b6d77d3da64f..2ee2ecca208e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -67,6 +67,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct report { struct perf_tool tool; struct perf_session *session; @@ -1199,7 +1203,9 @@ int cmd_report(int argc, const char **argv) .lost = perf_event__process_lost, .read = process_read_event, .attr = process_attr, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, @@ -1660,6 +1666,7 @@ repeat: report.range_num); } +#ifdef HAVE_LIBTRACEEVENT if (session->tevent.pevent && tep_set_function_resolver(session->tevent.pevent, machine__resolve_kernel_addr, @@ -1668,7 +1675,7 @@ repeat: __func__); return -1; } - +#endif sort__setup_elide(stdout); ret = __cmd_report(&report); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d7ec8c1af293..88888fb885c8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -62,6 +62,9 @@ #include "perf.h" #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif static char const *script_name; static char const *generate_script_lang; @@ -2154,12 +2157,12 @@ static void process_event(struct perf_script *script, perf_sample__fprintf_bts(sample, evsel, thread, al, addr_al, machine, fp); return; } - +#ifdef HAVE_LIBTRACEEVENT if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } - +#endif if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) perf_sample__fprintf_synth(sample, evsel, fp); @@ -2283,8 +2286,10 @@ static void process_stat_interval(u64 tstamp) static void setup_scripting(void) { +#ifdef HAVE_LIBTRACEEVENT setup_perl_scripting(); setup_python_scripting(); +#endif } static int flush_scripting(void) @@ -3784,7 +3789,9 @@ int cmd_script(int argc, const char **argv) .fork = perf_event__process_fork, .attr = process_attr, .event_update = perf_event__process_event_update, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .feature = process_feature_event, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, @@ -4215,6 +4222,7 @@ script_found: else symbol_conf.use_callchain = false; +#ifdef HAVE_LIBTRACEEVENT if (session->tevent.pevent && tep_set_function_resolver(session->tevent.pevent, machine__resolve_kernel_addr, @@ -4223,7 +4231,7 @@ script_found: err = -1; goto out_delete; } - +#endif if (generate_script_lang) { struct stat perf_stat; int input; @@ -4259,9 +4267,12 @@ script_found: err = -ENOENT; goto out_delete; } - +#ifdef HAVE_LIBTRACEEVENT err = scripting_ops->generate_script(session->tevent.pevent, "perf-script"); +#else + err = scripting_ops->generate_script(NULL, "perf-script"); +#endif goto out_delete; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c36296bb7637..6c629e7d370a 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -38,6 +38,7 @@ #include "util/string2.h" #include "util/tracepoint.h" #include +#include #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 543c379d2a57..6909cd9f48d1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -15,7 +15,6 @@ */ #include "util/record.h" -#include #include #include #include "util/bpf_map.h" @@ -80,6 +79,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index a71f491224da..a886929ec6e5 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -82,6 +82,7 @@ static void library_status(void) STATUS(HAVE_AIO_SUPPORT, aio); STATUS(HAVE_ZSTD_SUPPORT, zstd); STATUS(HAVE_LIBPFM, libpfm4); + STATUS(HAVE_LIBTRACEEVENT, libtraceevent); } int cmd_version(int argc, const char **argv) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 7af135dea1cd..82bbe0ca858b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -70,20 +70,26 @@ static struct cmd_struct commands[] = { { "report", cmd_report, 0 }, { "bench", cmd_bench, 0 }, { "stat", cmd_stat, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "timechart", cmd_timechart, 0 }, +#endif { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, { "script", cmd_script, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "sched", cmd_sched, 0 }, +#endif #ifdef HAVE_LIBELF_SUPPORT { "probe", cmd_probe, 0 }, #endif +#ifdef HAVE_LIBTRACEEVENT { "kmem", cmd_kmem, 0 }, { "lock", cmd_lock, 0 }, +#endif { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) +#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -91,7 +97,9 @@ static struct cmd_struct commands[] = { { "data", cmd_data, 0 }, { "ftrace", cmd_ftrace, 0 }, { "daemon", cmd_daemon, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "kwork", cmd_kwork, 0 }, +#endif }; struct pager_config { @@ -500,14 +508,18 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) - setup_path(); - argv[0] = "trace"; - return cmd_trace(argc, argv); -#else +#ifndef HAVE_LIBTRACEEVENT + fprintf(stderr, + "trace command not available: missing libtraceevent devel package at build time.\n"); + goto out; +#elif !defined(HAVE_LIBAUDIT_SUPPORT) && !defined(HAVE_SYSCALL_TABLE_SUPPORT) fprintf(stderr, "trace command not available: missing audit-libs devel package at build time.\n"); goto out; +#else + setup_path(); + argv[0] = "trace"; + return cmd_trace(argc, argv); #endif } /* Look for flags.. */ diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index 7d0e33ce6aba..d5fed4e42617 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,3 @@ -perf-y += Context.o +perf-$(CONFIG_LIBTRACEEVENT) += Context.o CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 658b5052c24d..90fd1eb317bb 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -6,13 +6,13 @@ perf-y += parse-events.o perf-y += dso-data.o perf-y += attr.o perf-y += vmlinux-kallsyms.o -perf-y += openat-syscall.o -perf-y += openat-syscall-all-cpus.o -perf-y += openat-syscall-tp-fields.o -perf-y += mmap-basic.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-tp-fields.o +perf-$(CONFIG_LIBTRACEEVENT) += mmap-basic.o perf-y += perf-record.o perf-y += evsel-roundtrip-name.o -perf-y += evsel-tp-sched.o +perf-$(CONFIG_LIBTRACEEVENT) += evsel-tp-sched.o perf-y += fdarray.o perf-y += pmu.o perf-y += pmu-events.o @@ -30,7 +30,7 @@ perf-y += task-exit.o perf-y += sw-clock.o perf-y += mmap-thread-lookup.o perf-y += thread-maps-share.o -perf-y += switch-tracking.o +perf-$(CONFIG_LIBTRACEEVENT) += switch-tracking.o perf-y += keep-tracking.o perf-y += code-reading.o perf-y += sample-parsing.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index ddd8262bfa26..f6c16ad8ed50 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -38,9 +38,11 @@ struct test_suite *__weak arch_tests[] = { static struct test_suite *generic_tests[] = { &suite__vmlinux_matches_kallsyms, +#ifdef HAVE_LIBTRACEEVENT &suite__openat_syscall_event, &suite__openat_syscall_event_on_all_cpus, &suite__basic_mmap, +#endif &suite__mem, &suite__parse_events, &suite__expr, @@ -51,8 +53,10 @@ static struct test_suite *generic_tests[] = { &suite__dso_data_cache, &suite__dso_data_reopen, &suite__perf_evsel__roundtrip_name_test, +#ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, &suite__syscall_openat_tp_fields, +#endif &suite__attr, &suite__hists_link, &suite__python_use, @@ -71,7 +75,9 @@ static struct test_suite *generic_tests[] = { &suite__thread_maps_share, &suite__hists_output, &suite__hists_cumulate, +#ifdef HAVE_LIBTRACEEVENT &suite__switch_tracking, +#endif &suite__fdarray__filter, &suite__fdarray__add, &suite__kmod_path__parse, diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3440dd2616b0..71a5cb343311 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,6 +20,8 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +#ifdef HAVE_LIBTRACEEVENT + #if defined(__s390x__) /* Return true if kvm module is available and loaded. Test this * and return success when trace point kvm_s390_create_vm @@ -76,6 +78,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) } return TEST_OK; } +#endif /* HAVE_LIBTRACEEVENT */ static int test__checkevent_raw(struct evlist *evlist) { @@ -222,6 +225,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__checkevent_tracepoint_modifier(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -252,6 +256,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) return test__checkevent_tracepoint_multi(evlist); } +#endif /* HAVE_LIBTRACEEVENT */ static int test__checkevent_raw_modifier(struct evlist *evlist) { @@ -453,6 +458,7 @@ static int test__checkevent_pmu(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__checkevent_list(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -491,6 +497,7 @@ static int test__checkevent_list(struct evlist *evlist) return TEST_OK; } +#endif static int test__checkevent_pmu_name(struct evlist *evlist) { @@ -762,6 +769,7 @@ static int test__group2(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *leader; @@ -853,6 +861,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) return TEST_OK; } +#endif static int test__group4(struct evlist *evlist __maybe_unused) { @@ -1460,6 +1469,7 @@ static int test__sym_event_dc(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int count_tracepoints(void) { struct dirent *events_ent; @@ -1513,6 +1523,7 @@ static int test__all_tracepoints(struct evlist *evlist) return test__checkevent_tracepoint_multi(evlist); } +#endif /* HAVE_LIBTRACEVENT */ static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) { @@ -1642,6 +1653,7 @@ struct evlist_test { }; static const struct evlist_test test__events[] = { +#ifdef HAVE_LIBTRACEEVENT { .name = "syscalls:sys_enter_openat", .check = test__checkevent_tracepoint, @@ -1652,6 +1664,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_tracepoint_multi, /* 1 */ }, +#endif { .name = "r1a", .check = test__checkevent_raw, @@ -1702,6 +1715,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_w, /* 1 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "syscalls:sys_enter_openat:k", .check = test__checkevent_tracepoint_modifier, @@ -1712,6 +1726,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_tracepoint_multi_modifier, /* 3 */ }, +#endif { .name = "r1a:kp", .check = test__checkevent_raw_modifier, @@ -1757,11 +1772,13 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_w_modifier, /* 2 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "r1,syscalls:sys_enter_openat:k,1:1:hp", .check = test__checkevent_list, /* 3 */ }, +#endif { .name = "instructions:G", .check = test__checkevent_exclude_host_modifier, @@ -1792,11 +1809,13 @@ static const struct evlist_test test__events[] = { .check = test__group2, /* 9 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", .check = test__group3, /* 0 */ }, +#endif { .name = "{cycles:u,instructions:kp}:p", .check = test__group4, @@ -1807,11 +1826,13 @@ static const struct evlist_test test__events[] = { .check = test__group5, /* 2 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "*:*", .check = test__all_tracepoints, /* 3 */ }, +#endif { .name = "{cycles,cache-misses:G}:H", .check = test__group_gh1, @@ -1867,7 +1888,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_len_rw_modifier, /* 4 */ }, -#if defined(__s390x__) +#if defined(__s390x__) && defined(HAVE_LIBTRACEEVENT) { .name = "kvm-s390:kvm_s390_create_vm", .check = test__checkevent_tracepoint, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d04802bfa23f..8345464d0130 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -69,7 +69,6 @@ perf-y += namespaces.o perf-y += comm.o perf-y += thread.o perf-y += thread_map.o -perf-y += trace-event-parse.o perf-y += parse-events-flex.o perf-y += parse-events-bison.o perf-y += pmu.o @@ -77,11 +76,12 @@ perf-y += pmus.o perf-y += pmu-flex.o perf-y += pmu-bison.o perf-y += pmu-hybrid.o -perf-y += trace-event-read.o -perf-y += trace-event-info.o -perf-y += trace-event-scripting.o -perf-y += trace-event.o perf-y += svghelper.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-scripting.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o perf-y += sort.o perf-y += hist.o perf-y += util.o @@ -153,8 +153,12 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o +endif + perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o @@ -189,7 +193,10 @@ perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o -perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +endif + perf-y += data-convert-json.o perf-y += scripting-engines/ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index c65cdaf6975e..8031b586e813 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "asm/bug.h" #include "data-convert.h" #include "session.h" @@ -36,6 +35,10 @@ #include "clockid.h" #include "util/sample.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 57db59068cb6..ba9d93ce9463 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -27,6 +27,10 @@ #include "util/thread.h" #include "util/tool.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct convert_json { struct perf_tool tool; FILE *out; @@ -217,6 +221,7 @@ static int process_sample_event(struct perf_tool *tool, } output_json_format(out, false, 3, "]"); +#ifdef HAVE_LIBTRACEEVENT if (sample->raw_data) { int i; struct tep_format_field **fields; @@ -236,7 +241,7 @@ static int process_sample_event(struct perf_tool *tool, free(fields); } } - +#endif output_json_format(out, false, 2, "}"); return 0; } @@ -313,7 +318,9 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index fbf3192bced9..590d4e77effc 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -290,6 +290,7 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) return evsel; } +#ifdef HAVE_LIBTRACEEVENT struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) { struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0); @@ -305,7 +306,8 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) evlist__add(evlist, evsel); return evsel; -}; +} +#endif int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { @@ -376,6 +378,7 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char return NULL; } +#ifdef HAVE_LIBTRACEEVENT int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { struct evsel *evsel = evsel__newtp(sys, name); @@ -387,6 +390,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, evlist__add(evlist, evsel); return 0; } +#endif struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 16734c6756b3..e5b84ead566c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -127,7 +127,9 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) { return evlist__add_aux_dummy(evlist, true); } +#ifdef HAVE_LIBTRACEEVENT struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide); +#endif int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); @@ -135,7 +137,9 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); int evlist__start_sb_thread(struct evlist *evlist, struct target *target); void evlist__stop_sb_thread(struct evlist *evlist); +#ifdef HAVE_LIBTRACEEVENT int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); +#endif int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0f617359a82f..ca911856c4b1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -57,6 +56,10 @@ #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct perf_missing_features perf_missing_features; static clockid_t clockid; @@ -439,7 +442,9 @@ struct evsel *evsel__clone(struct evsel *orig) goto out_err; } evsel->cgrp = cgroup__get(orig->cgrp); +#ifdef HAVE_LIBTRACEEVENT evsel->tp_format = orig->tp_format; +#endif evsel->handler = orig->handler; evsel->core.leader = orig->core.leader; @@ -479,6 +484,7 @@ out_err: /* * Returns pointer with encoded error via interface. */ +#ifdef HAVE_LIBTRACEEVENT struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); @@ -516,6 +522,7 @@ out_free: out_err: return ERR_PTR(err); } +#endif const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", @@ -2758,6 +2765,7 @@ u16 evsel__id_hdr_size(struct evsel *evsel) return size; } +#ifdef HAVE_LIBTRACEEVENT struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); @@ -2831,6 +2839,7 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } +#endif bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f3485799ddf9..d572be41b960 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -72,7 +72,9 @@ struct evsel { char *name; char *group_name; const char *pmu_name; +#ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; +#endif char *filter; unsigned long max_events; double scale; @@ -223,11 +225,14 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) } struct evsel *evsel__clone(struct evsel *orig); -struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); int copy_config_terms(struct list_head *dst, struct list_head *src); void free_config_terms(struct list_head *config_terms); + +#ifdef HAVE_LIBTRACEEVENT +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); + /* * Returns pointer with encoded error via interface. */ @@ -235,10 +240,13 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name) { return evsel__newtp_idx(sys, name, 0); } +#endif struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); +#ifdef HAVE_LIBTRACEEVENT struct tep_event *event_format__new(const char *sys, const char *name); +#endif void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); @@ -323,6 +331,7 @@ bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; +#ifdef HAVE_LIBTRACEEVENT void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); @@ -330,6 +339,7 @@ static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sampl { return evsel__rawptr(evsel, sample, name); } +#endif struct tep_format_field; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8c2ea8001329..bd22c4932d10 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -2,7 +2,6 @@ #include #include #include -#include #include "evsel.h" #include "util/evsel_fprintf.h" #include "util/event.h" @@ -13,6 +12,10 @@ #include "srcline.h" #include "dso.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { va_list args; @@ -74,6 +77,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE term, (u64)evsel->core.attr.sample_freq); } +#ifdef HAVE_LIBTRACEEVENT if (details->trace_fields) { struct tep_format_field *field; @@ -96,6 +100,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE field = field->next; } } +#endif out: fputc('\n', fp); return ++printed; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dc2ae397d400..404d816ca124 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -298,6 +303,7 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) return 0; } +#ifdef HAVE_LIBTRACEEVENT static int write_tracing_data(struct feat_fd *ff, struct evlist *evlist) { @@ -306,6 +312,7 @@ static int write_tracing_data(struct feat_fd *ff, return read_tracing_data(ff->fd, &evlist->core.entries); } +#endif static int write_build_id(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -2394,12 +2401,14 @@ FEAT_PROCESS_STR_FUN(arch, arch); FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc); FEAT_PROCESS_STR_FUN(cpuid, cpuid); +#ifdef HAVE_LIBTRACEEVENT static int process_tracing_data(struct feat_fd *ff, void *data) { ssize_t ret = trace_report(ff->fd, data, false); return ret < 0 ? -1 : 0; } +#endif static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) { @@ -3366,7 +3375,9 @@ err: const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { +#ifdef HAVE_LIBTRACEEVENT FEAT_OPN(TRACING_DATA, tracing_data, false), +#endif FEAT_OPN(BUILD_ID, build_id, false), FEAT_OPR(HOSTNAME, hostname, false), FEAT_OPR(OSRELEASE, osrelease, false), @@ -4082,6 +4093,7 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } +#ifdef HAVE_LIBTRACEEVENT static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent) { struct tep_event *event; @@ -4125,6 +4137,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h return 0; } +#endif int perf_session__read_header(struct perf_session *session, int repipe_fd) { @@ -4230,11 +4243,15 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd) lseek(fd, tmp, SEEK_SET); } +#ifdef HAVE_LIBTRACEEVENT perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent)) goto out_delete_evlist; +#else + perf_header__process_sections(header, fd, NULL, perf_file_section__process); +#endif return 0; out_errno: @@ -4412,6 +4429,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, return 0; } +#ifdef HAVE_LIBTRACEEVENT int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event) { @@ -4459,6 +4477,7 @@ int perf_event__process_tracing_data(struct perf_session *session, return size_read + padding; } +#endif int perf_event__process_build_id(struct perf_session *session, union perf_event *event) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2d5e601ba60f..e3861ae62172 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -160,8 +160,10 @@ int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); +#ifdef HAVE_LIBTRACEEVENT int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event); +#endif int perf_event__process_build_id(struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index e3548ddef254..6d3921627e33 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3142,6 +3142,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; } +#ifdef HAVE_LIBTRACEEVENT static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { @@ -3165,6 +3166,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, -1, tid); } +#endif /* HAVE_LIBTRACEEVENT */ static int intel_pt_context_switch_in(struct intel_pt *pt, struct perf_sample *sample) @@ -3433,9 +3435,12 @@ static int intel_pt_process_event(struct perf_session *session, return err; } +#ifdef HAVE_LIBTRACEEVENT if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) err = intel_pt_process_switch(pt, sample); - else if (event->header.type == PERF_RECORD_ITRACE_START) + else +#endif + if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) err = intel_pt_process_aux_output_hw_id(pt, event, sample); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6502cd679f57..21cce83462b3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -445,6 +445,7 @@ out_free_terms: return ret; } +#ifdef HAVE_LIBTRACEEVENT static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name) { @@ -593,6 +594,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, closedir(events_dir); return ret; } +#endif /* HAVE_LIBTRACEEVENT */ #ifdef HAVE_LIBBPF_SUPPORT struct __add_bpf_event_param { @@ -1143,6 +1145,7 @@ static int config_term_pmu(struct perf_event_attr *attr, return config_term_common(attr, term, err); } +#ifdef HAVE_LIBTRACEEVENT static int config_term_tracepoint(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) @@ -1170,6 +1173,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return 0; } +#endif static int config_attr(struct perf_event_attr *attr, struct list_head *head, @@ -1325,6 +1329,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { +#ifdef HAVE_LIBTRACEEVENT if (head_config) { struct perf_event_attr attr; @@ -1339,6 +1344,16 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, else return add_tracepoint_event(list, idx, sys, event, err, head_config); +#else + (void)list; + (void)idx; + (void)sys; + (void)event; + (void)head_config; + parse_events_error__handle(err, 0, strdup("unsupported tracepoint"), + strdup("libtraceevent is necessary for tracepoint support")); + return -1; +#endif } int parse_events_add_numeric(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 07df7bb7b042..428e72eaafcc 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -18,7 +18,6 @@ struct parse_events_error; struct option; struct perf_pmu; -bool have_tracepoints(struct list_head *evlist); bool is_event_supported(u8 type, u64 config); const char *event_type(int type); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b5941c74a0d6..6fb84b7455b8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -5,7 +5,9 @@ #include #include #include +#ifdef HAVE_LIBTRACEEVENT #include +#endif #include #include "evlist.h" #include "callchain.h" @@ -417,6 +419,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) return ret; } +#ifdef HAVE_LIBTRACEEVENT static bool is_tracepoint(struct pyrf_event *pevent) { return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT; @@ -486,14 +489,17 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) return tracepoint_field(pevent, field); } +#endif /* HAVE_LIBTRACEEVENT */ static PyObject* pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name) { PyObject *obj = NULL; +#ifdef HAVE_LIBTRACEEVENT if (is_tracepoint(pevent)) obj = get_tracepoint_field(pevent, attr_name); +#endif return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name); } @@ -1326,6 +1332,9 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { +#ifndef HAVE_LIBTRACEEVENT + return NULL; +#else struct tep_event *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; @@ -1340,6 +1349,7 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, return _PyLong_FromLong(-1); return _PyLong_FromLong(tp_format->id); +#endif // HAVE_LIBTRACEEVENT } static PyMethodDef perf__methods[] = { diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 0f5ba28339cf..d47820c0b4d4 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,5 +1,7 @@ -perf-$(CONFIG_LIBPERL) += trace-event-perl.o -perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_LIBPERL) += trace-event-perl.o + perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +endif CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 5b602b6d4685..0bacb49408f8 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include /* perl needs the following define, right after including stdbool.h */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d685a7399ee2..fabba21919b8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "../build-id.h" #include "../counts.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1facd4616317..7c021c6cedb9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -314,7 +314,9 @@ void perf_session__delete(struct perf_session *session) evlist__delete(session->evlist); perf_data__close(session->data); } +#ifdef HAVE_LIBTRACEEVENT trace_event__cleanup(&session->tevent); +#endif free(session); } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index be5871ea558f..ee3715e8563b 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -33,7 +33,9 @@ struct perf_session { struct auxtrace *auxtrace; struct itrace_synth_opts *itrace_synth_opts; struct list_head auxtrace_index; +#ifdef HAVE_LIBTRACEEVENT struct trace_event tevent; +#endif struct perf_record_time_conv time_conv; bool repipe; bool one_mmap; diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 43e7ca40b2ec..e80ffbbfacfb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -63,12 +63,18 @@ libperf = getenv('LIBPERF') ext_sources = [f.strip() for f in open('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +extra_libraries = [] + +if '-DHAVE_LIBTRACEEVENT' in cflags: + extra_libraries += [ 'traceevent' ] +else: + ext_sources.remove('util/trace-event.c') + # use full paths with source files ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) -extra_libraries = [] if '-DHAVE_LIBNUMA_SUPPORT' in cflags: - extra_libraries = [ 'numa' ] + extra_libraries += [ 'numa' ] if '-DHAVE_LIBCAP_SUPPORT' in cflags: extra_libraries += [ 'cap' ] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2e7330867e2e..c7a97b33e134 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -22,7 +22,6 @@ #include "srcline.h" #include "strlist.h" #include "strbuf.h" -#include #include "mem-events.h" #include "annotate.h" #include "event.h" @@ -32,6 +31,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; @@ -743,6 +746,7 @@ struct sort_entry sort_time = { /* --sort trace */ +#ifdef HAVE_LIBTRACEEVENT static char *get_trace_output(struct hist_entry *he) { struct trace_seq seq; @@ -806,6 +810,7 @@ struct sort_entry sort_trace = { .se_snprintf = hist_entry__trace_snprintf, .se_width_idx = HISTC_TRACE, }; +#endif /* HAVE_LIBTRACEEVENT */ /* sort keys for branch stacks */ @@ -2022,7 +2027,9 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), +#ifdef HAVE_LIBTRACEEVENT DIM(SORT_TRACE, "trace", sort_trace), +#endif DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP, "cgroup", sort_cgroup), @@ -2206,7 +2213,14 @@ bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ return hse->se == &sort_ ## key ; \ } +#ifdef HAVE_LIBTRACEEVENT MK_SORT_ENTRY_CHK(trace) +#else +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt __maybe_unused) +{ + return false; +} +#endif MK_SORT_ENTRY_CHK(srcline) MK_SORT_ENTRY_CHK(srcfile) MK_SORT_ENTRY_CHK(thread) @@ -2347,6 +2361,17 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, return 0; } +#ifndef HAVE_LIBTRACEEVENT +bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused) +{ + return false; +} +bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused, + struct hists *hists __maybe_unused) +{ + return false; +} +#else struct hpp_dynamic_entry { struct perf_hpp_fmt hpp; struct evsel *evsel; @@ -2621,6 +2646,7 @@ __alloc_dynamic_entry(struct evsel *evsel, struct tep_format_field *field, return hde; } +#endif /* HAVE_LIBTRACEEVENT */ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) { @@ -2633,6 +2659,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) new_hse = memdup(hse, sizeof(*hse)); if (new_hse) new_fmt = &new_hse->hpp; +#ifdef HAVE_LIBTRACEEVENT } else if (perf_hpp__is_dynamic_entry(fmt)) { struct hpp_dynamic_entry *hde, *new_hde; @@ -2640,6 +2667,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) new_hde = memdup(hde, sizeof(*hde)); if (new_hde) new_fmt = &new_hde->hpp; +#endif } else { new_fmt = memdup(fmt, sizeof(*fmt)); } @@ -2719,6 +2747,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) return evsel; } +#ifdef HAVE_LIBTRACEEVENT static int __dynamic_dimension__add(struct evsel *evsel, struct tep_format_field *field, bool raw_trace, int level) @@ -2789,13 +2818,13 @@ static int add_all_matching_fields(struct evlist *evlist, } return ret; } +#endif /* HAVE_LIBTRACEEVENT */ static int add_dynamic_entry(struct evlist *evlist, const char *tok, int level) { char *str, *event_name, *field_name, *opt_name; struct evsel *evsel; - struct tep_format_field *field; bool raw_trace = symbol_conf.raw_trace; int ret = 0; @@ -2820,6 +2849,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, raw_trace = true; } +#ifdef HAVE_LIBTRACEEVENT if (!strcmp(field_name, "trace_fields")) { ret = add_all_dynamic_fields(evlist, raw_trace, level); goto out; @@ -2829,6 +2859,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } +#endif evsel = find_evsel(evlist, event_name); if (evsel == NULL) { @@ -2843,10 +2874,12 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, goto out; } +#ifdef HAVE_LIBTRACEEVENT if (!strcmp(field_name, "*")) { ret = add_evsel_fields(evsel, raw_trace, level); } else { - field = tep_find_any_field(evsel->tp_format, field_name); + struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name); + if (field == NULL) { pr_debug("Cannot find event field for %s.%s\n", event_name, field_name); @@ -2855,6 +2888,10 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = __dynamic_dimension__add(evsel, field, raw_trace, level); } +#else + (void)level; + (void)raw_trace; +#endif /* HAVE_LIBTRACEEVENT */ out: free(str); @@ -2955,11 +2992,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { - if (!strcmp(dynamic_headers[j], sd->name)) + if (sd->name && !strcmp(dynamic_headers[j], sd->name)) sort_dimension_add_dynamic_header(sd); } @@ -3009,7 +3046,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__BRANCH) @@ -3025,7 +3062,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { struct sort_dimension *sd = &memory_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__MEMORY) @@ -3339,7 +3376,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; return __sort_dimension__add_output(list, sd); @@ -3357,7 +3394,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__BRANCH) @@ -3369,7 +3406,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { struct sort_dimension *sd = &memory_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__MEMORY) @@ -3508,6 +3545,9 @@ void reset_output_field(void) static void add_key(struct strbuf *sb, const char *str, int *llen) { + if (!str) + return; + if (*llen >= 75) { strbuf_addstr(sb, "\n\t\t\t "); *llen = INDENT; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 0645795ff080..3ab6a92b1a6d 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2157,6 +2157,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr * return err; } +#ifdef HAVE_LIBTRACEEVENT int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process) { @@ -2203,6 +2204,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e return aligned_size; } +#endif int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine) @@ -2355,6 +2357,7 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, } ret += err; +#ifdef HAVE_LIBTRACEEVENT if (have_tracepoints(&evlist->core.entries)) { int fd = perf_data__fd(data); @@ -2374,6 +2377,9 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, } ret += err; } +#else + (void)data; +#endif return ret; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c9c83a40647c..2d3c2576bab7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -11,6 +11,8 @@ #include "trace-event.h" #include +#include +#include static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 43146a4ce2fb..1162c49b8082 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 636a010d929b..56175c53f9af 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "debug.h" #include "trace-event.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index b3ee651e3d91..8ad75b31e09b 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 - #include #include #include diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 8f39f5bcb2c2..add6c5d9531c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -2,9 +2,11 @@ #ifndef _PERF_UTIL_TRACE_EVENT_H #define _PERF_UTIL_TRACE_EVENT_H -#include -#include "parse-events.h" +#include +#include +#include +struct evlist; struct machine; struct perf_sample; union perf_event; @@ -18,6 +20,11 @@ struct trace_event { struct tep_plugin_list *plugin_list; }; +typedef char *(tep_func_resolver_t)(void *priv, + unsigned long long *addrp, char **modp); + +bool have_tracepoints(struct list_head *evlist); + int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); int trace_event__register_resolver(struct machine *machine, From 4171925aa9f3f7bf57b100238f148b50c45c3b1b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:35 -0800 Subject: [PATCH 3869/4122] tools lib traceevent: Remove libtraceevent libtraceevent is now out-of-date and it is better to depend on the system version. Remove this code that is no longer depended upon by any builds. Committer notes: Removed the removed tools/lib/traceevent/ from tools/perf/MANIFEST, so that 'make perf-tar-src-pkg' works. Signed-off-by: Ian Rogers Acked-by: Steven Rostedt (VMware) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221130062935.2219247-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/.gitignore | 4 - tools/lib/traceevent/Build | 8 - tools/lib/traceevent/Documentation/Makefile | 207 - .../traceevent/Documentation/asciidoc.conf | 120 - .../Documentation/libtraceevent-commands.txt | 153 - .../Documentation/libtraceevent-cpus.txt | 77 - .../libtraceevent-endian_read.txt | 78 - .../libtraceevent-event_find.txt | 103 - .../Documentation/libtraceevent-event_get.txt | 99 - .../libtraceevent-event_list.txt | 122 - .../libtraceevent-event_print.txt | 130 - .../libtraceevent-field_find.txt | 118 - .../libtraceevent-field_get_val.txt | 122 - .../libtraceevent-field_print.txt | 126 - .../libtraceevent-field_read.txt | 81 - .../Documentation/libtraceevent-fields.txt | 105 - .../libtraceevent-file_endian.txt | 91 - .../Documentation/libtraceevent-filter.txt | 209 - .../Documentation/libtraceevent-func_apis.txt | 183 - .../Documentation/libtraceevent-func_find.txt | 88 - .../Documentation/libtraceevent-handle.txt | 101 - .../libtraceevent-header_page.txt | 102 - .../libtraceevent-host_endian.txt | 104 - .../Documentation/libtraceevent-long_size.txt | 78 - .../Documentation/libtraceevent-page_size.txt | 82 - .../libtraceevent-parse_event.txt | 90 - .../libtraceevent-parse_head.txt | 82 - .../Documentation/libtraceevent-plugins.txt | 122 - .../libtraceevent-record_parse.txt | 137 - .../libtraceevent-reg_event_handler.txt | 156 - .../libtraceevent-reg_print_func.txt | 155 - .../Documentation/libtraceevent-set_flag.txt | 104 - .../Documentation/libtraceevent-strerror.txt | 85 - .../Documentation/libtraceevent-tseq.txt | 158 - .../Documentation/libtraceevent.txt | 192 - .../traceevent/Documentation/manpage-1.72.xsl | 14 - .../traceevent/Documentation/manpage-base.xsl | 35 - .../Documentation/manpage-bold-literal.xsl | 17 - .../Documentation/manpage-normal.xsl | 13 - .../Documentation/manpage-suppress-sp.xsl | 21 - tools/lib/traceevent/Makefile | 300 - tools/lib/traceevent/event-parse-api.c | 333 - tools/lib/traceevent/event-parse-local.h | 123 - tools/lib/traceevent/event-parse.c | 7624 ----------------- tools/lib/traceevent/event-parse.h | 750 -- tools/lib/traceevent/event-plugin.c | 711 -- tools/lib/traceevent/event-utils.h | 67 - tools/lib/traceevent/kbuffer-parse.c | 809 -- tools/lib/traceevent/kbuffer.h | 68 - .../lib/traceevent/libtraceevent.pc.template | 10 - tools/lib/traceevent/parse-filter.c | 2281 ----- tools/lib/traceevent/parse-utils.c | 71 - tools/lib/traceevent/plugins/Build | 12 - tools/lib/traceevent/plugins/Makefile | 225 - .../lib/traceevent/plugins/plugin_cfg80211.c | 43 - .../lib/traceevent/plugins/plugin_function.c | 282 - tools/lib/traceevent/plugins/plugin_futex.c | 123 - tools/lib/traceevent/plugins/plugin_hrtimer.c | 74 - tools/lib/traceevent/plugins/plugin_jbd2.c | 61 - tools/lib/traceevent/plugins/plugin_kmem.c | 80 - tools/lib/traceevent/plugins/plugin_kvm.c | 527 -- .../lib/traceevent/plugins/plugin_mac80211.c | 88 - .../traceevent/plugins/plugin_sched_switch.c | 146 - tools/lib/traceevent/plugins/plugin_scsi.c | 434 - tools/lib/traceevent/plugins/plugin_tlb.c | 66 - tools/lib/traceevent/plugins/plugin_xen.c | 138 - tools/lib/traceevent/tep_strerror.c | 53 - tools/lib/traceevent/trace-seq.c | 249 - tools/lib/traceevent/trace-seq.h | 55 - tools/perf/MANIFEST | 1 - 70 files changed, 19876 deletions(-) delete mode 100644 tools/lib/traceevent/.gitignore delete mode 100644 tools/lib/traceevent/Build delete mode 100644 tools/lib/traceevent/Documentation/Makefile delete mode 100644 tools/lib/traceevent/Documentation/asciidoc.conf delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-commands.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-cpus.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_get.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_list.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_print.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_print.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_read.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-fields.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-filter.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-handle.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-header_page.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-long_size.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-page_size.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-plugins.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-strerror.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-tseq.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent.txt delete mode 100644 tools/lib/traceevent/Documentation/manpage-1.72.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-base.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-bold-literal.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-normal.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl delete mode 100644 tools/lib/traceevent/Makefile delete mode 100644 tools/lib/traceevent/event-parse-api.c delete mode 100644 tools/lib/traceevent/event-parse-local.h delete mode 100644 tools/lib/traceevent/event-parse.c delete mode 100644 tools/lib/traceevent/event-parse.h delete mode 100644 tools/lib/traceevent/event-plugin.c delete mode 100644 tools/lib/traceevent/event-utils.h delete mode 100644 tools/lib/traceevent/kbuffer-parse.c delete mode 100644 tools/lib/traceevent/kbuffer.h delete mode 100644 tools/lib/traceevent/libtraceevent.pc.template delete mode 100644 tools/lib/traceevent/parse-filter.c delete mode 100644 tools/lib/traceevent/parse-utils.c delete mode 100644 tools/lib/traceevent/plugins/Build delete mode 100644 tools/lib/traceevent/plugins/Makefile delete mode 100644 tools/lib/traceevent/plugins/plugin_cfg80211.c delete mode 100644 tools/lib/traceevent/plugins/plugin_function.c delete mode 100644 tools/lib/traceevent/plugins/plugin_futex.c delete mode 100644 tools/lib/traceevent/plugins/plugin_hrtimer.c delete mode 100644 tools/lib/traceevent/plugins/plugin_jbd2.c delete mode 100644 tools/lib/traceevent/plugins/plugin_kmem.c delete mode 100644 tools/lib/traceevent/plugins/plugin_kvm.c delete mode 100644 tools/lib/traceevent/plugins/plugin_mac80211.c delete mode 100644 tools/lib/traceevent/plugins/plugin_sched_switch.c delete mode 100644 tools/lib/traceevent/plugins/plugin_scsi.c delete mode 100644 tools/lib/traceevent/plugins/plugin_tlb.c delete mode 100644 tools/lib/traceevent/plugins/plugin_xen.c delete mode 100644 tools/lib/traceevent/tep_strerror.c delete mode 100644 tools/lib/traceevent/trace-seq.c delete mode 100644 tools/lib/traceevent/trace-seq.h diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore deleted file mode 100644 index 7123c70b9ebc..000000000000 --- a/tools/lib/traceevent/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -TRACEEVENT-CFLAGS -libtraceevent-dynamic-list -libtraceevent.so.* diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build deleted file mode 100644 index f9a5d79578f5..000000000000 --- a/tools/lib/traceevent/Build +++ /dev/null @@ -1,8 +0,0 @@ -libtraceevent-y += event-parse.o -libtraceevent-y += event-plugin.o -libtraceevent-y += trace-seq.o -libtraceevent-y += parse-filter.o -libtraceevent-y += parse-utils.o -libtraceevent-y += kbuffer-parse.o -libtraceevent-y += tep_strerror.o -libtraceevent-y += event-parse-api.o diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile deleted file mode 100644 index aa72ab96c3c1..000000000000 --- a/tools/lib/traceevent/Documentation/Makefile +++ /dev/null @@ -1,207 +0,0 @@ -include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak - -# This Makefile and manpage XSL files were taken from tools/perf/Documentation -# and modified for libtraceevent. - -MAN3_TXT= \ - $(wildcard libtraceevent-*.txt) \ - libtraceevent.txt - -MAN_TXT = $(MAN3_TXT) -_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) -_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) -_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT)) - -MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) -MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) -DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3)) - -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix?=$(HOME) -endif -bindir?=$(prefix)/bin -htmldir?=$(prefix)/share/doc/libtraceevent-doc -pdfdir?=$(prefix)/share/doc/libtraceevent-doc -mandir?=$(prefix)/share/man -man3dir=$(mandir)/man3 - -ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf -ASCIIDOC_HTML = xhtml11 -MANPAGE_XSL = manpage-normal.xsl -XMLTO_EXTRA = -INSTALL?=install -RM ?= rm -f - -ifdef USE_ASCIIDOCTOR -ASCIIDOC = asciidoctor -ASCIIDOC_EXTRA = -a compat-mode -ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions -ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual" -ASCIIDOC_HTML = xhtml5 -endif - -XMLTO=xmlto - -_tmp_tool_path := $(call get-executable,$(ASCIIDOC)) -ifeq ($(_tmp_tool_path),) - missing_tools = $(ASCIIDOC) -endif - -ifndef USE_ASCIIDOCTOR -_tmp_tool_path := $(call get-executable,$(XMLTO)) -ifeq ($(_tmp_tool_path),) - missing_tools += $(XMLTO) -endif -endif - -# -# For asciidoc ... -# -7.1.2, no extra settings are needed. -# 8.0-, set ASCIIDOC8. -# - -# -# For docbook-xsl ... -# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) -# 1.69.0, no extra settings are needed? -# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? -# 1.71.1, no extra settings are needed? -# 1.72.0, set DOCBOOK_XSL_172. -# 1.73.0-, set ASCIIDOC_NO_ROFF -# - -# -# If you had been using DOCBOOK_XSL_172 in an attempt to get rid -# of 'the ".ft C" problem' in your generated manpages, and you -# instead ended up with weird characters around callouts, try -# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). -# - -ifdef ASCIIDOC8 -ASCIIDOC_EXTRA += -a asciidoc7compatible -endif -ifdef DOCBOOK_XSL_172 -ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff -MANPAGE_XSL = manpage-1.72.xsl -else - ifdef ASCIIDOC_NO_ROFF - # docbook-xsl after 1.72 needs the regular XSL, but will not - # pass-thru raw roff codes from asciidoc.conf, so turn them off. - ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff - endif -endif -ifdef MAN_BOLD_LITERAL -XMLTO_EXTRA += -m manpage-bold-literal.xsl -endif -ifdef DOCBOOK_SUPPRESS_SP -XMLTO_EXTRA += -m manpage-suppress-sp.xsl -endif - -SHELL_PATH ?= $(SHELL) -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) - -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -export DESTDIR DESTDIR_SQ - -# -# Please note that there is a minor bug in asciidoc. -# The version after 6.0.3 _will_ include the patch found here: -# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2 -# -# Until that version is released you may have to apply the patch -# yourself - yes, all 6 characters of it! -# -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifneq ($(V),1) - QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; - QUIET_XMLTO = @echo ' XMLTO '$@; - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR ' $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V -endif -endif - -all: html man - -man: man3 -man3: $(DOC_MAN3) - -html: $(MAN_HTML) - -$(MAN_HTML) $(DOC_MAN3): asciidoc.conf - -install: install-man - -check-man-tools: -ifdef missing_tools - $(error "You need to install $(missing_tools) for man pages") -endif - -do-install-man: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \ - $(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir); - -install-man: check-man-tools man do-install-man - -uninstall: uninstall-man - -uninstall-man: - $(call QUIET_UNINST, Documentation-man) \ - $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3)) - - -ifdef missing_tools - DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) -else - DO_INSTALL_MAN = do-install-man -endif - -CLEAN_FILES = \ - $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ - $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ - $(DOC_MAN3) *.3 - -clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) - -ifdef USE_ASCIIDOCTOR -$(OUTPUT)%.3 : $(OUTPUT)%.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b manpage -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ -endif - -$(OUTPUT)%.3 : $(OUTPUT)%.xml - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< - -$(OUTPUT)%.xml : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ - -$(MAN_HTML): $(OUTPUT)%.html : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf deleted file mode 100644 index 07595717f06e..000000000000 --- a/tools/lib/traceevent/Documentation/asciidoc.conf +++ /dev/null @@ -1,120 +0,0 @@ -## linktep: macro -# -# Usage: linktep:command[manpage-section] -# -# Note, {0} is the manpage section, while {target} is the command. -# -# Show TEP link as: (
); if section is defined, else just show -# the command. - -[macros] -(?su)[\\]?(?Plinktep):(?P\S*?)\[(?P.*?)\]= - -[attributes] -asterisk=* -plus=+ -caret=^ -startsb=[ -endsb=] -tilde=~ - -ifdef::backend-docbook[] -[linktep-inlinemacro] -{0%{target}} -{0#} -{0#{target}{0}} -{0#} -endif::backend-docbook[] - -ifdef::backend-docbook[] -ifndef::tep-asciidoc-no-roff[] -# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. -# v1.72 breaks with this because it replaces dots not in roff requests. -[listingblock] -{title} - -ifdef::doctype-manpage[] - .ft C -endif::doctype-manpage[] -| -ifdef::doctype-manpage[] - .ft -endif::doctype-manpage[] - -{title#} -endif::tep-asciidoc-no-roff[] - -ifdef::tep-asciidoc-no-roff[] -ifdef::doctype-manpage[] -# The following two small workarounds insert a simple paragraph after screen -[listingblock] -{title} - -| - -{title#} - -[verseblock] -{title} -{title%} -{title#} -| - -{title#} -{title%} -endif::doctype-manpage[] -endif::tep-asciidoc-no-roff[] -endif::backend-docbook[] - -ifdef::doctype-manpage[] -ifdef::backend-docbook[] -[header] -template::[header-declarations] - - -{mantitle} -{manvolnum} -libtraceevent -{libtraceevent_version} -libtraceevent Manual - - - {manname1} - {manname2} - {manname3} - {manname4} - {manname5} - {manname6} - {manname7} - {manname8} - {manname9} - {manname10} - {manname11} - {manname12} - {manname13} - {manname14} - {manname15} - {manname16} - {manname17} - {manname18} - {manname19} - {manname20} - {manname21} - {manname22} - {manname23} - {manname24} - {manname25} - {manname26} - {manname27} - {manname28} - {manname29} - {manname30} - {manpurpose} - -endif::backend-docbook[] -endif::doctype-manpage[] - -ifdef::backend-xhtml11[] -[linktep-inlinemacro] -{target}{0?({0})} -endif::backend-xhtml11[] diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt deleted file mode 100644 index bec552001f8e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt +++ /dev/null @@ -1,153 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_comm, tep_override_comm, tep_pid_is_registered, -tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid - -Manage pid to process name mappings. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); -const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_); -struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); -int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_); --- - -DESCRIPTION ------------ -These functions can be used to handle the mapping between pid and process name. -The library builds a cache of these mappings, which is used to display the name -of the process, instead of its pid. This information can be retrieved from -tracefs/saved_cmdlines file. - -The _tep_register_comm()_ function registers a _pid_ / process name mapping. -If a command with the same _pid_ is already registered, an error is returned. -The _pid_ argument is the process ID, the _comm_ argument is the process name, -_tep_ is the event context. The _comm_ is duplicated internally. - -The _tep_override_comm()_ function registers a _pid_ / process name mapping. -If a process with the same pid is already registered, the process name string is -udapted with the new one. The _pid_ argument is the process ID, the _comm_ -argument is the process name, _tep_ is the event context. The _comm_ is -duplicated internally. - -The _tep_is_pid_registered()_ function checks if a pid has a process name -mapping registered. The _pid_ argument is the process ID, _tep_ is the event -context. - -The _tep_data_comm_from_pid()_ function returns the process name for a given -pid. The _pid_ argument is the process ID, _tep_ is the event context. -The returned string should not be freed, but will be freed when the _tep_ -handler is closed. - -The _tep_data_pid_from_comm()_ function returns a pid for a given process name. -The _comm_ argument is the process name, _tep_ is the event context. -The argument _next_ is the cmdline structure to search for the next pid. -As there may be more than one pid for a given process, the result of this call -can be passed back into a recurring call in the _next_ parameter, to search for -the next pid. If _next_ is NULL, it will return the first pid associated with -the _comm_. The function performs a linear search, so it may be slow. - -The _tep_cmdline_pid()_ function returns the pid associated with a given -_cmdline_. The _tep_ argument is the event context. - -RETURN VALUE ------------- -_tep_register_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this -_pid_ is already registered. - -_tep_override_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_. - -_tep_is_pid_registered()_ function returns true if the _pid_ has a process name -mapped to it, false otherwise. - -_tep_data_comm_from_pid()_ function returns the process name as string, or the -string "<...>" if there is no mapping for the given pid. - -_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that -holds a pid for a given process, or NULL if none is found. This result can be -passed back into a recurring call as the _next_ parameter of the function. - -_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_ - is NULL, then -1 is returned. - -EXAMPLE -------- -The following example registers pid for command "ls", in context of event _tep_ -and performs various searches for pid / process name mappings: -[source,c] --- -#include -... -int ret; -int ls_pid = 1021; -struct tep_handle *tep = tep_alloc(); -... - ret = tep_register_comm(tep, "ls", ls_pid); - if (ret != 0 && errno == EEXIST) - ret = tep_override_comm(tep, "ls", ls_pid); - if (ret != 0) { - /* Failed to register pid / command mapping */ - } -... - if (tep_is_pid_registered(tep, ls_pid) == 0) { - /* Command mapping for ls_pid is not registered */ - } -... - const char *comm = tep_data_comm_from_pid(tep, ls_pid); - if (comm) { - /* Found process name for ls_pid */ - } -... - int pid; - struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL); - while (cmd) { - pid = tep_cmdline_pid(tep, cmd); - /* Found pid for process "ls" */ - cmd = tep_data_pid_from_comm(tep, "ls", cmd); - } --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt deleted file mode 100644 index 5ad70e43b752..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt +++ /dev/null @@ -1,77 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing -buffer representing it. Note, the buffer may be empty. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); -void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); --- - -DESCRIPTION ------------ -The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. - -The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. -The _cpu_ argument is the number of CPUs with tracing data. - -RETURN VALUE ------------- -The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing -data recorded. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_cpus(tep, 5); -... - printf("We have tracing data for %d CPUs", tep_get_cpus(tep)); --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt deleted file mode 100644 index e64851b6e189..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include * - -unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); --- - -DESCRIPTION ------------ -The _tep_read_number()_ function reads an integer from raw data, taking into -account the endianness of the raw data and the current host. The _tep_ argument -is the trace event parser context. The _ptr_ is a pointer to the raw data, where -the integer is, and the _size_ is the size of the integer. - -RETURN VALUE ------------- -The _tep_read_number()_ function returns the integer in the byte order of -the current host. In case of an error, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int offset = 24; - int data = tep_read_number(tep, record->data + offset, 4); - - /* Read the 4 bytes at the offset 24 of data as an integer */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt deleted file mode 100644 index 7bc062c9f76f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt +++ /dev/null @@ -1,103 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_event,tep_find_event_by_name,tep_find_event_by_record - -Find events by given key. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); -struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); -struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); --- - -DESCRIPTION ------------ -This set of functions can be used to search for an event, based on a given -criteria. All functions require a pointer to a _tep_, trace event parser -context. - -The _tep_find_event()_ function searches for an event by given event _id_. The -event ID is assigned dynamically and can be viewed in event's format file, -"ID" field. - -The tep_find_event_by_name()_ function searches for an event by given -event _name_, under the system _sys_. If the _sys_ is NULL (not specified), -the first event with _name_ is returned. - -The tep_find_event_by_record()_ function searches for an event from a given -_record_. - -RETURN VALUE ------------- -All these functions return a pointer to the found event, or NULL if there is no -such event. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event; - -event = tep_find_event(tep, 1857); -if (event == NULL) { - /* There is no event with ID 1857 */ -} - -event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event == NULL) { - /* There is no kvm_exit event, from kvm system */ -} - -void event_from_record(struct tep_record *record) -{ - struct tep_event *event = tep_find_event_by_record(tep, record); - if (event == NULL) { - /* There is no event from given record */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt deleted file mode 100644 index 6525092fc417..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt +++ /dev/null @@ -1,99 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_event, tep_get_first_event, tep_get_events_count - Access events. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); -struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); -int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -The _tep_get_event()_ function returns a pointer to event at the given _index_. -The _tep_ argument is trace event parser context, the _index_ is the index of -the requested event. - -The _tep_get_first_event()_ function returns a pointer to the first event. -As events are stored in an array, this function returns the pointer to the -beginning of the array. The _tep_ argument is trace event parser context. - -The _tep_get_events_count()_ function returns the number of the events -in the array. The _tep_ argument is trace event parser context. - -RETURN VALUE ------------- -The _tep_get_event()_ returns a pointer to the event located at _index_. -NULL is returned in case of error, in case there are no events or _index_ is -out of range. - -The _tep_get_first_event()_ returns a pointer to the first event. NULL is -returned in case of error, or in case there are no events. - -The _tep_get_events_count()_ returns the number of the events. 0 is -returned in case of error, or in case there are no events. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i,count = tep_get_events_count(tep); -struct tep_event *event, *events = tep_get_first_event(tep); - -if (events == NULL) { - /* There are no events */ -} else { - for (i = 0; i < count; i++) { - event = (events+i); - /* process events[i] */ - } - - /* Get the last event */ - event = tep_get_event(tep, count-1); -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt deleted file mode 100644 index fba350e5a4cb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_list_events, tep_list_events_copy - -Get list of events, sorted by given criteria. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_event_sort_type* { - _TEP_EVENT_SORT_ID_, - _TEP_EVENT_SORT_NAME_, - _TEP_EVENT_SORT_SYSTEM_, -}; - -struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); -struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); --- - -DESCRIPTION ------------ -The _tep_list_events()_ function returns an array of pointers to the events, -sorted by the _sort_type_ criteria. The last element of the array is NULL. -The returned memory must not be freed, it is managed by the library. -The function is not thread safe. The _tep_ argument is trace event parser -context. The _sort_type_ argument is the required sort criteria: -[verse] --- - _TEP_EVENT_SORT_ID_ - sort by the event ID. - _TEP_EVENT_SORT_NAME_ - sort by the event (name, system, id) triplet. - _TEP_EVENT_SORT_SYSTEM_ - sort by the event (system, name, id) triplet. --- - -The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_. -It has the same behavior, but the returned array is allocated internally and -must be freed by the caller. Note that the content of the array must not be -freed (see the EXAMPLE below). - -RETURN VALUE ------------- -The _tep_list_events()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must not be freed, -it is managed by the library. - -The _tep_list_events_copy()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must be freed by -the caller. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_event_format **events; - -i=0; -events = tep_list_events(tep, TEP_EVENT_SORT_ID); -if (events == NULL) { - /* Failed to get the events, sorted by ID */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by ID */ - i++; - } -} - -i=0; -events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME); -if (events == NULL) { - /* Failed to get the events, sorted by name */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by name */ - i++; - } - free(events); -} - -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt deleted file mode 100644 index 2c6a61811118..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt +++ /dev/null @@ -1,130 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_event - Writes event information into a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._) --- - -DESCRIPTION ------------ - -The _tep_print_event()_ function parses the event information of the given -_record_ and writes it into the trace sequence _s_, according to the format -string _fmt_. The desired information is specified after the format string. -The _fmt_ is printf-like format string, following arguments are supported: -[verse] --- - TEP_PRINT_PID, "%d" - PID of the event. - TEP_PRINT_CPU, "%d" - Event CPU. - TEP_PRINT_COMM, "%s" - Event command string. - TEP_PRINT_NAME, "%s" - Event name. - TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more - fields - interrupt state, scheduling state, - current context, and preemption count. - Field 1 is the interrupt enabled state: - d : Interrupts are disabled - . : Interrupts are enabled - X : The architecture does not support this - information - Field 2 is the "need resched" state. - N : The task is set to call the scheduler when - possible, as another higher priority task - may need to be scheduled in. - . : The task is not set to call the scheduler. - Field 3 is the context state. - . : Normal context - s : Soft interrupt context - h : Hard interrupt context - H : Hard interrupt context which triggered - during soft interrupt context. - z : NMI context - Z : NMI context which triggered during hard - interrupt context - Field 4 is the preemption count. - . : The preempt count is zero. - On preemptible kernels (where the task can be scheduled - out in arbitrary locations while in kernel context), the - preempt count, when non zero, will prevent the kernel - from scheduling out the current task. The preempt count - number is displayed when it is not zero. - Depending on the kernel, it may show other fields - (lock depth, or migration disabled, which are unique to - specialized kernels). - TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be - specified as part of this format string: - "%precision.divisord". Example: - "%3.1000d" - divide the time by 1000 and print the first - 3 digits before the dot. Thus, the time stamp - "123456000" will be printed as "123.456" - TEP_PRINT_INFO, "%s" - event information. - TEP_PRINT_INFO_RAW, "%s" - event information, in raw format. - --- -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_handle *tep = tep_alloc(); -... -void print_my_event(struct tep_record *record) -{ - trace_seq_reset(&seq); - tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s", - TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU, - TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME, - TEP_PRINT_INFO); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt deleted file mode 100644 index 0896af5b9eff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt +++ /dev/null @@ -1,118 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_common_field, tep_find_field, tep_find_any_field - -Search for a field in an event. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); --- - -DESCRIPTION ------------ -These functions search for a field with given name in an event. The field -returned can be used to find the field content from within a data record. - -The _tep_find_common_field()_ function searches for a common field with _name_ -in the _event_. - -The _tep_find_field()_ function searches for an event specific field with -_name_ in the _event_. - -The _tep_find_any_field()_ function searches for any field with _name_ in the -_event_. - -RETURN VALUE ------------- -The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_ -functions return a pointer to the found field, or NULL in case there is no field -with the requested name. - -EXAMPLE -------- -[source,c] --- -#include -... -void get_htimer_info(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_format_field *field; - struct tep_event *event; - long long softexpires; - int mode; - int pid; - - event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); - - field = tep_find_common_field(event, "common_pid"); - if (field == NULL) { - /* Cannot find "common_pid" field in the event */ - } else { - /* Get pid from the data record */ - pid = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_field(event, "softexpires"); - if (field == NULL) { - /* Cannot find "softexpires" event specific field in the event */ - } else { - /* Get softexpires parameter from the data record */ - softexpires = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_any_field(event, "mode"); - if (field == NULL) { - /* Cannot find "mode" field in the event */ - } else - { - /* Get mode parameter from the data record */ - mode = tep_read_number(tep, record->data + field->offset, - field->size); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt deleted file mode 100644 index 6324f0d48aeb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val, -tep_get_field_raw - Get value of a field. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); --- - -DESCRIPTION ------------ -These functions can be used to find a field and retrieve its value. - -The _tep_get_any_field_val()_ function searches in the _record_ for a field -with _name_, part of the _event_. If the field is found, its value is stored in -_val_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -The _tep_get_common_field_val()_ function does the same as -_tep_get_any_field_val()_, but searches only in the common fields. This works -for any event as all events include the common fields. - -The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_, -but searches only in the event specific fields. - -The _tep_get_field_raw()_ function searches in the _record_ for a field with -_name_, part of the _event_. If the field is found, a pointer to where the field -exists in the record's raw data is returned. The size of the data is stored in -_len_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -RETURN VALUE ------------- -The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and -_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error. - -The _tep_get_field_raw()_ function returns a pointer to field's raw data, and -places the length of this data in _len_. In case of an error NULL is returned. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -... -void process_record(struct tep_record *record) -{ - int len; - char *comm; - struct tep_event_format *event; - unsigned long long val; - - event = tep_find_event_by_record(pevent, record); - if (event != NULL) { - if (tep_get_common_field_val(NULL, event, "common_type", - record, &val, 0) == 0) { - /* Got the value of common type field */ - } - if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) { - /* Got the value of pid specific field */ - } - comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0); - if (comm != NULL) { - /* Got a pointer to the comm event specific field */ - } - } -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt deleted file mode 100644 index 9a9df98ac44d..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt +++ /dev/null @@ -1,126 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field - -Print the field content. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); -void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); -int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); -int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); --- - -DESCRIPTION ------------ -These functions print recorded field's data, according to the field's type. - -The _tep_print_field()_ function extracts from the recorded raw _data_ value of -the _field_ and prints it into _s_, according to the field type. - -The _tep_print_fields()_ prints each field name followed by the record's field -value according to the field's type: -[verse] --- -"field1_name=field1_value field2_name=field2_value ..." --- -It iterates all fields of the _event_, and calls _tep_print_field()_ for each of -them. - -The _tep_print_num_field()_ function prints a numeric field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_ and is printed in the -_s_, according to the given format string _fmt_. If the argument _err_ is -non-zero, and an error occures - it is printed in the _s_. - -The _tep_print_func_field()_ function prints a function field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_. The value is assumed -to be a function address, and a search is perform to find the name of this -function. The function name (if found) and its address are printed in the _s_, -according to the given format string _fmt_. If the argument _err_ is non-zero, -and an error occures - it is printed in _s_. - -RETURN VALUE ------------- -The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1 -on success, -1 in case of an error or 0 if the print buffer _s_ is full. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - trace_seq_reset(&seq); - - /* Print the value of "common_pid" */ - tep_print_field(&seq, record->data, field_pid); - - /* Print all fields of the "hrtimer_start" event */ - tep_print_fields(&seq, record->data, record->size, event); - - /* Print the value of "expires" field with custom format string */ - tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0); - - /* Print the address and the name of "function" field with custom format string */ - tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0); - } - ... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt deleted file mode 100644 index 64e9e25d3fd9..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt +++ /dev/null @@ -1,81 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number_field - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); --- - -DESCRIPTION ------------ -The _tep_read_number_field()_ function reads the value of the _field_ from the -raw _data_ and stores it in the _value_. The function sets the _value_ according -to the endianness of the raw data and the current machine and stores it in -_value_. - -RETURN VALUE ------------- -The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - unsigned long long pid; - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - if (tep_read_number_field(field_pid, record->data, &pid) != 0) { - /* Failed to get "common_pid" value */ - } -} -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt deleted file mode 100644 index 1ccb531d5114..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt +++ /dev/null @@ -1,105 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_event_common_fields, tep_event_fields - Get a list of fields for an event. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); -struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); --- - -DESCRIPTION ------------ -The _tep_event_common_fields()_ function returns an array of pointers to common -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -The _tep_event_fields()_ function returns an array of pointers to event specific -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -RETURN VALUE ------------- -Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return -an array of pointers to tep_format_field structures in case of success, or -NULL in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_format_field **fields; -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event != NULL) { - fields = tep_event_common_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the common fields - of the kvm_exit event - */ - i++; - } - free(fields); - } - fields = tep_event_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the event specific - fields of the kvm_exit event - */ - i++; - } - free(fields); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt deleted file mode 100644 index f401ad311047..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt +++ /dev/null @@ -1,91 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the -raw data being accessed by the tep handler. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - --- -DESCRIPTION ------------ -The _tep_is_file_bigendian()_ function gets the endianness of the raw data, -being accessed by the tep handler. The _tep_ argument is trace event parser -context. - -The _tep_set_file_bigendian()_ function sets the endianness of raw data being -accessed by the tep handler. The _tep_ argument is trace event parser context. -[verse] --- -The _endian_ argument is the endianness: - _TEP_LITTLE_ENDIAN_ - the raw data is in little endian format, - _TEP_BIG_ENDIAN_ - the raw data is in big endian format. --- -RETURN VALUE ------------- -The _tep_is_file_bigendian()_ function returns true if the data is in bigendian -format, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_file_bigendian(tep)) { - /* The raw data is in big endian */ - } else { - /* The raw data is in little endian */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt deleted file mode 100644 index 4a9962d8cb59..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt +++ /dev/null @@ -1,209 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string, -tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered, -tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str - -Event filter related APIs. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); -void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); -void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); -enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); -int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); -int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); -int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); -char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); --- - -DESCRIPTION ------------ -Filters can be attached to traced events. They can be used to filter out various -events when outputting them. Each event can be filtered based on its parameters, -described in the event's format file. This set of functions can be used to -create, delete, modify and attach event filters. - -The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument -is the trace event parser context. - -The _tep_filter_free()_ function frees an event filter and all resources that it -had used. - -The _tep_filter_reset()_ function removes all rules from an event filter and -resets it. - -The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The -_filter_str_ argument is the filter string, that contains the rule. - -The _tep_event_filtered()_ function checks if the event with _event_id_ has -_filter_. - -The _tep_filter_remove_event()_ function removes a _filter_ for an event with -_event_id_. - -The _tep_filter_match()_ function tests if a _record_ matches given _filter_. - -The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter. - -The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_. - -The _tep_filter_make_string()_ function constructs a string, displaying -the _filter_ contents for given _event_id_. - -The _tep_filter_strerror()_ function copies the _filter_ error buffer into the -given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_ -is copied a string, describing the error _err_. - -RETURN VALUE ------------- -The _tep_filter_alloc()_ function returns a pointer to the newly created event -filter, or NULL in case of an error. - -The _tep_filter_add_filter_str()_ function returns 0 if the rule was -successfully added or a negative error code. Use _tep_filter_strerror()_ to see -actual error message in case of an error. - -The _tep_event_filtered()_ function returns 1 if the filter is found for given -event, or 0 otherwise. - -The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or -0 if the event was not found. - -The _tep_filter_match()_ function returns _tep_errno_, according to the result: -[verse] --- -_pass:[TEP_ERRNO__FILTER_MATCH]_ - filter found for event, the record matches. -_pass:[TEP_ERRNO__FILTER_MISS]_ - filter found for event, the record does not match. -_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_ - no filter found for record's event. -_pass:[TEP_ERRNO__NO_FILTER]_ - no rules in the filter. --- -or any other _tep_errno_, if an error occurred during the test. - -The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules - were copied. - -The _tep_filter_compare()_ function returns 1 if the two filters hold the same -content, or 0 if they do not. - -The _tep_filter_make_string()_ function returns a string, which must be freed -with free(), or NULL in case of an error. - -The _tep_filter_strerror()_ function returns 0 if message was filled -successfully, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char errstr[200]; -int ret; - -struct tep_event_filter *filter = tep_filter_alloc(tep); -struct tep_event_filter *filter1 = tep_filter_alloc(tep); -ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1"); -if(ret < 0) { - tep_filter_strerror(filter, ret, errstr, sizeof(errstr)); - /* Failed to add a new rule to the filter, the error string is in errstr */ -} -if (tep_filter_copy(filter1, filter) != 0) { - /* Failed to copy filter in filter1 */ -} -... -if (tep_filter_compare(filter, filter1) != 1) { - /* Both filters are different */ -} -... -void process_record(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_event *event; - char *fstring; - - event = tep_find_event_by_record(tep, record); - - if (tep_event_filtered(filter, event->id) == 1) { - /* The event has filter */ - fstring = tep_filter_make_string(filter, event->id); - if (fstring != NULL) { - /* The filter for the event is in fstring */ - free(fstring); - } - } - - switch (tep_filter_match(filter, record)) { - case TEP_ERRNO__FILTER_MATCH: - /* The filter matches the record */ - break; - case TEP_ERRNO__FILTER_MISS: - /* The filter does not match the record */ - break; - case TEP_ERRNO__FILTER_NOT_FOUND: - /* No filter found for record's event */ - break; - case TEP_ERRNO__NO_FILTER: - /* There are no rules in the filter */ - break - default: - /* An error occurred during the test */ - break; - } - - if (tep_filter_remove_event(filter, event->id) == 1) { - /* The event was removed from the filter */ - } -} - -... -tep_filter_reset(filter); -... -tep_filter_free(filter); -tep_filter_free(filter1); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt deleted file mode 100644 index f6aca0df2151..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt +++ /dev/null @@ -1,183 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function, tep_find_function_address, tep_set_function_resolver, -tep_reset_function_resolver, tep_register_function, tep_register_print_string - -function related tep APIs - -SYNOPSIS --------- -[verse] --- -*#include * - -typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_); -int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); -void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); -int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -Some tools may have already a way to resolve the kernel functions. These APIs -allow them to keep using it instead of duplicating all the entries inside. - -The _tep_func_resolver_t_ type is the prototype of the alternative kernel -functions resolver. This function receives a pointer to its custom context -(set with the _tep_set_function_resolver()_ call ) and the address of a kernel -function, which has to be resolved. In case of success, it should return -the name of the function and its module (if any) in _modp_. - -The _tep_set_function_resolver()_ function registers _func_ as an alternative -kernel functions resolver. The _tep_ argument is trace event parser context. -The _priv_ argument is a custom context of the _func_ function. The function -resolver is used by the APIs _tep_find_function()_, -_tep_find_function_address()_, and _tep_print_func_field()_ to resolve -a function address to a function name. - -The _tep_reset_function_resolver()_ function resets the kernel functions -resolver to the default function. The _tep_ argument is trace event parser -context. - - -These APIs can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select -the function that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select -the function that would contain it. The _tep_ argument is the trace event -parser context. - -The _tep_register_function()_ function registers a function name mapped to an -address and (optional) module. This mapping is used in case the function tracer -or events have "%pS" parameter in its format string. It is common to pass in -the kallsyms function names with their corresponding addresses with this -function. The _tep_ argument is the trace event parser context. The _name_ is -the name of the function, the string is copied internally. The _addr_ is the -start address of the function. The _mod_ is the kernel module the function may -be in (NULL for none). - -The _tep_register_print_string()_ function registers a string by the address -it was stored in the kernel. Some strings internal to the kernel with static -address are passed to certain events. The "%s" in the event's format field -which has an address needs to know what string would be at that address. The -tep_register_print_string() supplies the parsing with the mapping between kernel -addresses and those strings. The _tep_ argument is the trace event parser -context. The _fmt_ is the string to register, it is copied internally. -The _addr_ is the address the string was located at. - - -RETURN VALUE ------------- -The _tep_set_function_resolver()_ function returns 0 in case of success, or -1 -in case of an error. - -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -The _tep_register_function()_ function returns 0 in case of success. In case of -an error -1 is returned, and errno is set to the appropriate error number. - -The _tep_register_print_string()_ function returns 0 in case of success. In case -of an error -1 is returned, and errno is set to the appropriate error number. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *my_resolve_kernel_addr(void *context, - unsigned long long *addrp, char **modp) -{ - struct db *function_database = context; - struct symbol *sym = sql_lookup(function_database, *addrp); - - if (!sym) - return NULL; - - *modp = sym->module_name; - return sym->name; -} - -void show_function( unsigned long long addr) -{ - unsigned long long fstart; - const char *fname; - - if (tep_set_function_resolver(tep, my_resolve_kernel_addr, - function_database) != 0) { - /* failed to register my_resolve_kernel_addr */ - } - - /* These APIs use my_resolve_kernel_addr() to resolve the addr */ - fname = tep_find_function(tep, addr); - fstart = tep_find_function_address(tep, addr); - - /* - addr is in function named fname, starting at fstart address, - at offset (addr - fstart) - */ - - tep_reset_function_resolver(tep); - -} -... - if (tep_register_function(tep, "kvm_exit", - (unsigned long long) 0x12345678, "kvm") != 0) { - /* Failed to register kvm_exit address mapping */ - } -... - if (tep_register_print_string(tep, "print string", - (unsigned long long) 0x87654321, NULL) != 0) { - /* Failed to register "print string" address mapping */ - } -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt deleted file mode 100644 index 04840e244445..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt +++ /dev/null @@ -1,88 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function,tep_find_function_address - Find function name / start address. - -SYNOPSIS --------- -[verse] --- -*#include * - -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -These functions can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select the function -that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select the -function that would contain it. The _tep_ argument is the trace event parser context. - -RETURN VALUE ------------- -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void show_function( unsigned long long addr) -{ - const char *fname = tep_find_function(tep, addr); - unsigned long long fstart = tep_find_function_address(tep, addr); - - /* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt deleted file mode 100644 index 45b20172e262..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt +++ /dev/null @@ -1,101 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage -references of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_handle pass:[*]*tep_alloc*(void); -void *tep_free*(struct tep_handle pass:[*]_tep_); -void *tep_ref*(struct tep_handle pass:[*]_tep_); -void *tep_unref*(struct tep_handle pass:[*]_tep_); -int *tep_get_ref*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -These are the main functions to create and destroy tep_handle - the main -structure, representing the trace event parser context. This context is used as -the input parameter of most library APIs. - -The _tep_alloc()_ function allocates and initializes the tep context. - -The _tep_free()_ function will decrement the reference of the _tep_ handler. -When there is no more references, then it will free the handler, as well -as clean up all its resources that it had used. The argument _tep_ is -the pointer to the trace event parser context. - -The _tep_ref()_ function adds a reference to the _tep_ handler. - -The _tep_unref()_ function removes a reference from the _tep_ handler. When -the last reference is removed, the _tep_ is destroyed, and all resources that -it had used are cleaned up. - -The _tep_ref_get()_ functions gets the current references of the _tep_ handler. - -RETURN VALUE ------------- -_tep_alloc()_ returns a pointer to a newly created tep_handle structure. -NULL is returned in case there is not enough free memory to allocate it. - -_tep_ref_get()_ returns the current references of _tep_. -If _tep_ is NULL, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include - -... -struct tep_handle *tep = tep_alloc(); -... -int ref = tep_get_ref(tep); -tep_ref(tep); -if ( (ref+1) != tep_get_ref(tep)) { - /* Something wrong happened, the counter is not incremented by 1 */ -} -tep_unref(tep); -... -tep_free(tep); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt deleted file mode 100644 index 615d117dc39f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt +++ /dev/null @@ -1,102 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format - -Get the data stored in the header page, in kernel context. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); -int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); -bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); --- -DESCRIPTION ------------ -These functions retrieve information from kernel context, stored in tracefs -events/header_page. Old kernels do not have header page info, so default values -from user space context are used. - -The _tep_get_header_page_size()_ function returns the size of a long integer, -in kernel context. The _tep_ argument is trace event parser context. -This information is retrieved from tracefs events/header_page, "commit" field. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in kernel context. The _tep_ argument is trace event parser context. This -information is retrieved from tracefs events/header_page, "timestamp" field. - -The _tep_is_old_format()_ function returns true if the kernel predates -the addition of events/header_page, otherwise it returns false. - -RETURN VALUE ------------- -The _tep_get_header_page_size()_ function returns the size of a long integer, -in bytes. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in bytes. - -The _tep_is_old_format()_ function returns true, if an old kernel is used to -generate the tracing data, which has no event/header_page. If the kernel is new, -or _tep_ is NULL, false is returned. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - int longsize; - int timesize; - bool old; - - longsize = tep_get_header_page_size(tep); - timesize = tep_get_header_timestamp_size(tep); - old = tep_is_old_format(tep); - - printf ("%s kernel is used to generate the tracing data.\n", - old?"Old":"New"); - printf("The size of a long integer is %d bytes.\n", longsize); - printf("The timestamps size is %d bytes.\n", timesize); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt deleted file mode 100644 index d5d375eb8d1e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set -the endianness of the local machine. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -int *tep_is_bigendian*(void); -bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); --- - -DESCRIPTION ------------ - -The _tep_is_bigendian()_ gets the endianness of the machine, executing -the function. - -The _tep_is_local_bigendian()_ function gets the endianness of the local -machine, saved in the _tep_ handler. The _tep_ argument is the trace event -parser context. This API is a bit faster than _tep_is_bigendian()_, as it -returns cached endianness of the local machine instead of checking it each time. - -The _tep_set_local_bigendian()_ function sets the endianness of the local -machine in the _tep_ handler. The _tep_ argument is trace event parser context. -The _endian_ argument is the endianness: -[verse] --- - _TEP_LITTLE_ENDIAN_ - the machine is little endian, - _TEP_BIG_ENDIAN_ - the machine is big endian. --- - -RETURN VALUE ------------- -The _tep_is_bigendian()_ function returns non zero if the endianness of the -machine, executing the code, is big endian and zero otherwise. - -The _tep_is_local_bigendian()_ function returns true, if the endianness of the -local machine, saved in the _tep_ handler, is big endian, or false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - if (tep_is_bigendian()) - tep_set_local_bigendian(tep, TEP_BIG_ENDIAN); - else - tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_local_bigendian(tep)) - printf("This machine you are running on is bigendian\n"); - else - printf("This machine you are running on is little endian\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt deleted file mode 100644 index 01d78ea2519a..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on -the machine, where the trace is generated, in bytes - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); -void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. - -The _tep_set_long_size()_ function sets the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. -The _long_size_ is the size of a long integer, in bytes. - -RETURN VALUE ------------- -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated, in bytes. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -tep_set_long_size(tep, 4); -... -int long_size = tep_get_long_size(tep); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt deleted file mode 100644 index 452c0cfa1822..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on -the machine, where the trace is generated - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); -void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); --- - -DESCRIPTION ------------ -The _tep_get_page_size()_ function returns the size of a memory page on -the machine, where the trace is generated. The _tep_ argument is trace -event parser context. - -The _tep_set_page_size()_ function stores in the _tep_ context the size of a -memory page on the machine, where the trace is generated. -The _tep_ argument is trace event parser context. -The _page_size_ argument is the size of a memory page, in bytes. - -RETURN VALUE ------------- -The _tep_get_page_size()_ function returns size of the memory page, in bytes. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... - int page_size = getpagesize(); - - tep_set_page_size(tep, page_size); - - printf("The page size for this machine is %d\n", tep_get_page_size(tep)); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt deleted file mode 100644 index f248114ca1ff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt +++ /dev/null @@ -1,90 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_event, tep_parse_format - Parse the event format information - -SYNOPSIS --------- -[verse] --- -*#include * - -enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); -enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); --- - -DESCRIPTION ------------ -The _tep_parse_event()_ function parses the event format and creates an event -structure to quickly parse raw data for a given event. The _tep_ argument is -the trace event parser context. The created event structure is stored in the -_tep_ context. The _buf_ argument is a buffer with _size_, where the event -format data is. The event format data can be taken from -tracefs/events/.../.../format files. The _sys_ argument is the system of -the event. - -The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only -difference is in the extra _eventp_ argument, where the newly created event -structure is returned. - -RETURN VALUE ------------- -Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success, -or TEP_ERRNO__... in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -struct tep_event *event = NULL; -buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size); -if (tep_parse_event(tep, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} - -if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt deleted file mode 100644 index c90f16c7d8e6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_header_page - Parses the data stored in the header page. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_parse_header_page()_ function parses the header page data from _buf_, -and initializes the _tep_, trace event parser context, with it. The buffer -_buf_ is with _size_, and is supposed to be copied from -tracefs/events/header_page. - -Some old kernels do not have header page info, in this case the -_tep_parse_header_page()_ function can be called with _size_ equal to 0. The -_tep_ context is initialized with default values. The _long_size_ can be used in -this use case, to set the size of a long integer to be used. - -RETURN VALUE ------------- -The _tep_parse_header_page()_ function returns 0 in case of success, or -1 -in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -buf = read_file("/sys/kernel/tracing/events/header_page", &size); -if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) { - /* Failed to parse the header page */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt deleted file mode 100644 index 4d6394397d92..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); -void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); -void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_, - void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep, - const char pass:[*]path, - const char pass:[*]name, - void pass:[*]data), - void pass:[*]_data_); --- - -DESCRIPTION ------------ -The _tep_load_plugins()_ function loads all plugins, located in the plugin -directories. The _tep_ argument is trace event parser context. -The plugin directories are : -[verse] --- - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST - - System's plugin directory, defined at the library compile time. It - depends on the library installation prefix and usually is - _(install_preffix)/lib/traceevent/plugins_ - - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST --- -Loading of plugins can be controlled by the _tep_flags_, using the -_tep_set_flag()_ API: -[verse] --- - _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in - the system's plugin directory. - _TEP_DISABLE_PLUGINS_ - do not load any plugins. --- -The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if -loading of all plugins is not the desired case. - -The _tep_unload_plugins()_ function unloads the plugins, previously loaded by -_tep_load_plugins()_. The _tep_ argument is trace event parser context. The -_plugin_list_ is the list of loaded plugins, returned by -the _tep_load_plugins()_ function. - -The _tep_load_plugins_hook_ function walks through all directories with plugins -and calls user specified _load_plugin()_ hook for each plugin file. Only files -with given _suffix_ are considered to be plugins. The _data_ is a user specified -context, passed to _load_plugin()_. Directories and the walk order are the same -as in _tep_load_plugins()_ API. - -RETURN VALUE ------------- -The _tep_load_plugins()_ function returns a list of successfully loaded plugins, -or NULL in case no plugins are loaded. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_plugin_list *plugins = tep_load_plugins(tep); -if (plugins == NULL) { - /* no plugins are loaded */ -} -... -tep_unload_plugins(plugins, tep); -... -void print_plugin(struct tep_handle *tep, const char *path, - const char *name, void *data) -{ - pritnf("Found libtraceevent plugin %s/%s\n", path, name); -} -... -tep_load_plugins_hook(tep, ".so", print_plugin, NULL); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt deleted file mode 100644 index e9a69116c78b..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt +++ /dev/null @@ -1,137 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags - -Extract common fields from a record. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *trace_flag_type* { - _TRACE_FLAG_IRQS_OFF_, - _TRACE_FLAG_IRQS_NOSUPPORT_, - _TRACE_FLAG_NEED_RESCHED_, - _TRACE_FLAG_HARDIRQ_, - _TRACE_FLAG_SOFTIRQ_, -}; - -int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); --- - -DESCRIPTION ------------ -This set of functions can be used to extract common fields from a record. - -The _tep_data_type()_ function gets the event id from the record _rec_. -It reads the "common_type" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_pid()_ function gets the process id from the record _rec_. -It reads the "common_pid" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_preempt_count()_ function gets the preemption count from the -record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is -the trace event parser context. - -The _tep_data_flags()_ function gets the latency flags from the record _rec_. -It reads the "common_flags" field. The _tep_ argument is the trace event parser -context. Supported latency flags are: -[verse] --- - _TRACE_FLAG_IRQS_OFF_, Interrupts are disabled. - _TRACE_FLAG_IRQS_NOSUPPORT_, Reading IRQ flag is not supported by the architecture. - _TRACE_FLAG_NEED_RESCHED_, Task needs rescheduling. - _TRACE_FLAG_HARDIRQ_, Hard IRQ is running. - _TRACE_FLAG_SOFTIRQ_, Soft IRQ is running. --- - -RETURN VALUE ------------- -The _tep_data_type()_ function returns an integer, representing the event id. - -The _tep_data_pid()_ function returns an integer, representing the process id - -The _tep_data_preempt_count()_ function returns an integer, representing the -preemption count. - -The _tep_data_flags()_ function returns an integer, representing the latency -flags. Look at the _trace_flag_type_ enum for supported flags. - -All these functions in case of an error return a negative integer. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int data; - - data = tep_data_type(tep, record); - if (data >= 0) { - /* Got the ID of the event */ - } - - data = tep_data_pid(tep, record); - if (data >= 0) { - /* Got the process ID */ - } - - data = tep_data_preempt_count(tep, record); - if (data >= 0) { - /* Got the preemption count */ - } - - data = tep_data_flags(tep, record); - if (data >= 0) { - /* Got the latency flags */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt deleted file mode 100644 index 53d37d72a1c1..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt +++ /dev/null @@ -1,156 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_event_handler, tep_unregister_event_handler - Register / -unregisters a callback function to parse an event information. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_reg_handler* { - _TEP_REGISTER_SUCCESS_, - _TEP_REGISTER_SUCCESS_OVERWRITE_, -}; - -int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); -int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - -typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context); --- - -DESCRIPTION ------------ -The _tep_register_event_handler()_ function registers a handler function, -which is going to be called to parse the information for a given event. -The _tep_ argument is the trace event parser context. The _id_ argument is -the id of the event. The _sys_name_ argument is the name of the system, -the event belongs to. The _event_name_ argument is the name of the event. -If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and -_event_name_ are used. The _func_ is a pointer to the function, which is going -to be called to parse the event information. The _context_ argument is a pointer -to the context data, which will be passed to the _func_. If a handler function -for the same event is already registered, it will be overridden with the new -one. This mechanism allows a developer to override the parsing of a given event. -If for some reason the default print format is not sufficient, the developer -can register a function for an event to be used to parse the data instead. - -The _tep_unregister_event_handler()_ function unregisters the handler function, -previously registered with _tep_register_event_handler()_. The _tep_ argument -is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_, -and _context_ are the same arguments, as when the callback function _func_ was -registered. - -The _tep_event_handler_func_ is the type of the custom event handler -function. The _s_ argument is the trace sequence, it can be used to create a -custom string, describing the event. A _record_ to get the event from is passed -as input parameter and also the _event_ - the handle to the record's event. The -_context_ is custom context, set when the custom event handler is registered. - -RETURN VALUE ------------- -The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_ -if the new handler is registered successfully or -_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten. -If there is not enough memory to complete the registration, -TEP_ERRNO__MEM_ALLOC_FAILED is returned. - -The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed -successful or, -1 if the event was not found. - -The _tep_event_handler_func_ should return -1 in case of an error, -or 0 otherwise. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -int timer_expire_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - trace_seq_printf(s, "hrtimer="); - - if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1) - tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1); - - trace_seq_printf(s, " now="); - - tep_print_num_field(s, "%llu", event, "now", record, 1); - - tep_print_func_field(s, " function=%s", event, "function", record, 0); - - return 0; -} -... - int ret; - - ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if (ret < 0) { - char buf[32]; - - tep_strerror(tep, ret, buf, 32) - printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf); - } else { - switch (ret) { - case TEP_REGISTER_SUCCESS: - printf ("Registered handler for hrtimer_expire_entry\n"); - break; - case TEP_REGISTER_SUCCESS_OVERWRITE: - printf ("Overwrote handler for hrtimer_expire_entry\n"); - break; - } - } -... - ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if ( ret ) - printf ("Failed to unregister handler for hrtimer_expire_entry\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt deleted file mode 100644 index 708dce91ebd8..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt +++ /dev/null @@ -1,155 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_print_function,tep_unregister_print_function - -Registers / Unregisters a helper function. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_func_arg_type* { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args); - -int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); -int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); --- - -DESCRIPTION ------------ -Some events may have helper functions in the print format arguments. -This allows a plugin to dynamically create a way to process one of -these functions. - -The _tep_register_print_function()_ registers such helper function. The _tep_ -argument is the trace event parser context. The _func_ argument is a pointer -to the helper function. The _ret_type_ argument is the return type of the -helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name -of the helper function, as seen in the print format arguments. The _..._ is a -variable list of _tep_func_arg_type_ enums, the _func_ function arguments. -This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section. - -The _tep_unregister_print_function()_ unregisters a helper function, previously -registered with _tep_register_print_function()_. The _tep_ argument is the -trace event parser context. The _func_ and _name_ arguments are the same, used -when the helper function was registered. - -The _tep_func_handler_ is the type of the helper function. The _s_ argument is -the trace sequence, it can be used to create a custom string. -The _args_ is a list of arguments, defined when the helper function was -registered. - -RETURN VALUE ------------- -The _tep_register_print_function()_ function returns 0 in case of success. -In case of an error, TEP_ERRNO_... code is returned. - -The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -Some events have internal functions calls, that appear in the print format -output. For example "tracefs/events/i915/g4x_wm/format" has: -[source,c] --- -print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", - ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary, - REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane, - REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane, - REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc) --- -Notice the call to function _yesno()_ in the print arguments. In the kernel -context, this function has the following implementation: -[source,c] --- -static const char *yesno(int x) -{ - static const char *yes = "yes"; - static const char *no = "no"; - - return x ? yes : no; -} --- -The user space event parser has no idea how to handle this _yesno()_ function. -The _tep_register_print_function()_ API can be used to register a user space -helper function, mapped to the kernel's _yesno()_: -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -static const char *yes_no_helper(int x) -{ - return x ? "yes" : "no"; -} -... - if ( tep_register_print_function(tep, - yes_no_helper, - TEP_FUNC_ARG_STRING, - "yesno", - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID) != 0) { - /* Failed to register yes_no_helper function */ - } - -/* - Now, when the event parser encounters this yesno() function, it will know - how to handle it. -*/ -... - if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) { - /* Failed to unregister yes_no_helper function */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt deleted file mode 100644 index b0599780b9a6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_set_flag, tep_clear_flag, tep_test_flag - -Manage flags of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_flag* { - _TEP_NSEC_OUTPUT_, - _TEP_DISABLE_SYS_PLUGINS_, - _TEP_DISABLE_PLUGINS_ -}; -void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); --- - -DESCRIPTION ------------ -Trace event parser context flags are defined in *enum tep_flag*: -[verse] --- -_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds. -_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin - directory. This directory is defined at library compile - time, and usually depends on library installation - prefix: (install_preffix)/lib/traceevent/plugins -_TEP_DISABLE_PLUGINS_ - disable all library plugins: - - in system's plugin directory - - in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - in user's home directory, _~/.traceevent/plugins_ --- -Note: plugin related flags must me set before calling _tep_load_plugins()_ API. - -The _tep_set_flag()_ function sets _flag_ to _tep_ context. - -The _tep_clear_flag()_ function clears _flag_ from _tep_ context. - -The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context. - -RETURN VALUE ------------- -_tep_test_flag()_ function returns true if _flag_ is set, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -/* Print timestamps in nanoseconds */ -tep_set_flag(tep, TEP_NSEC_OUTPUT); -... -if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) { - /* print timestamps in nanoseconds */ -} else { - /* print timestamps in microseconds */ -} -... -/* Print timestamps in microseconds */ -tep_clear_flag(tep, TEP_NSEC_OUTPUT); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt deleted file mode 100644 index ee4062a00c9f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt +++ /dev/null @@ -1,85 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_strerror - Returns a string describing regular errno and tep error number. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - --- -DESCRIPTION ------------ -The _tep_strerror()_ function converts tep error number into a human -readable string. -The _tep_ argument is trace event parser context. The _errnum_ is a regular -errno, defined in errno.h, or a tep error number. The string, describing this -error number is copied in the _buf_ argument. The _buflen_ argument is -the size of the _buf_. - -It as a thread safe wrapper around strerror_r(). The library function has two -different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always -behaves as the POSIX version - the error string is copied in the user supplied -buffer. - -RETURN VALUE ------------- -The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the -string is copied into _buf_. If _errnum_ is not a valid error number, --1 is returned and _buf_ is not modified. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char buf[32]; -char *pool = calloc(1, 128); -if (tep == NULL) { - tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32); - printf ("The pool is not initialized, %s", buf); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt deleted file mode 100644 index 8ac6aa174e12..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt +++ /dev/null @@ -1,158 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate, -trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf, -trace_seq_do_fprintf, trace_seq_do_printf - -Initialize / destroy a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *trace_seq_init*(struct trace_seq pass:[*]_s_); -void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); -void *trace_seq_reset*(struct trace_seq pass:[*]_s_); -void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); -int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); -int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); -int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._); -int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); -int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); -int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); --- - -DESCRIPTION ------------ -Trace sequences are used to allow a function to call several other functions -to create a string of data to use. - -The _trace_seq_init()_ function initializes the trace sequence _s_. - -The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees -all its resources that it had used. - -The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All -characters already written in _s_ will be deleted. - -The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts -the null character pass:['\0'] at the end of the buffer. - -The _trace_seq_putc()_ function puts a single character _c_ in the trace -sequence _s_. - -The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the -trace sequence _s_. - -The _trace_seq_printf()_ function puts a formated string _fmt _with -variable arguments _..._ in the trace sequence _s_. - -The _trace_seq_vprintf()_ function puts a formated string _fmt _with -list of arguments _args_ in the trace sequence _s_. - -The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to -the standard output stdout. - -The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_ -to the given file _fp_. - -RETURN VALUE ------------- -Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of -characters put in the trace sequence, or 0 in case of an error - -Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the -trace oversizes the buffer's free space, the number of characters printed, or -a negative value in case of an error. - -Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the -number of printed characters, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct trace_seq seq; -trace_seq_init(&seq); -... -void foo_seq_print(struct trace_seq *tseq, char *format, ...) -{ - va_list ap; - va_start(ap, format); - if (trace_seq_vprintf(tseq, format, ap) <= 0) { - /* Failed to print in the trace sequence */ - } - va_end(ap); -} - -trace_seq_reset(&seq); - -char *str = " MAN page example"; -if (trace_seq_puts(&seq, str) != strlen(str)) { - /* Failed to put str in the trace sequence */ -} -if (trace_seq_putc(&seq, ':') != 1) { - /* Failed to put ':' in the trace sequence */ -} -if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) { - /* Failed to print in the trace sequence */ -} -foo_seq_print( &seq, " %d\n", 2); - -trace_seq_terminate(&seq); -... - -if (trace_seq_do_printf(&seq) < 0 ) { - /* Failed to print the sequence buffer to the standard output */ -} -FILE *fp = fopen("trace.txt", "w"); -if (trace_seq_do_fprintf(&seq, fp) < 0 ) [ - /* Failed to print the sequence buffer to the trace.txt file */ -} - -trace_seq_destroy(&seq); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt deleted file mode 100644 index d530a7ce8fb2..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ /dev/null @@ -1,192 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -libtraceevent - Linux kernel trace event library - -SYNOPSIS --------- -[verse] --- -*#include * - -Management of tep handler data structure and access of its members: - struct tep_handle pass:[*]*tep_alloc*(void); - void *tep_free*(struct tep_handle pass:[*]_tep_); - void *tep_ref*(struct tep_handle pass:[*]_tep_); - void *tep_unref*(struct tep_handle pass:[*]_tep_); - int *tep_get_ref*(struct tep_handle pass:[*]_tep_); - void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_); - int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); - void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); - int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); - void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); - int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); - void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); - int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); - int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); - bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); - int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - -Register / unregister APIs: - int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); - int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); - int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); - int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); - int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); - -Plugins management: - struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); - void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); - char pass:[*]pass:[*]*tep_plugin_list_options*(void); - void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_); - int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_); - void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_); - void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_); - -Event related APIs: - struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); - struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); - int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); - struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._); - -Event finding: - struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); - struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); - struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); - -Parsing of event files: - int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); - enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - -APIs related to fields from event's format files: - struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); - struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); - void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); - int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); - -Event fields printing: - void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); - void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); - int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - -Event fields finding: - struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - -Functions resolver: - int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); - void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); - const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - -Filter management: - struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); - enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); - enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); - int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); - int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); - void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); - char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); - int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); - -Parsing various data from the records: - int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - -Command and task related APIs: - const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_); - struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); - int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); - int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_); - -Endian related APIs: - int *tep_is_bigendian*(void); - unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); - bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - -Trace sequences: -*#include * - void *trace_seq_init*(struct trace_seq pass:[*]_s_); - void *trace_seq_reset*(struct trace_seq pass:[*]_s_); - void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); - int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...); - int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); - int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); - int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); - void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); - int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); - int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); --- - -DESCRIPTION ------------ -The libtraceevent(3) library provides APIs to access kernel tracepoint events, -located in the tracefs file system under the events directory. - -ENVIRONMENT ------------ -[verse] --- -TRACEEVENT_PLUGIN_DIR - Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins. --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl deleted file mode 100644 index b4d315cb8c47..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-1.72.xsl +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl deleted file mode 100644 index a264fa616093..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-base.xsl +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - sp - - - - - - - - br - - - diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl deleted file mode 100644 index 608eb5df6281..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - fB - - - fR - - - diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl deleted file mode 100644 index a48f5b11f3dc..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-normal.xsl +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - -\ -. - - diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl deleted file mode 100644 index a63c7632a87d..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile deleted file mode 100644 index 98dfd4badea3..000000000000 --- a/tools/lib/traceevent/Makefile +++ /dev/null @@ -1,300 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# trace-cmd version -EP_VERSION = 1 -EP_PATCHLEVEL = 1 -EP_EXTRAVERSION = 0 - -# file format version -FILE_VERSION = 6 - -MAKEFLAGS += --no-print-directory - - -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,NM,$(CROSS_COMPILE)nm) -$(call allow-override,PKG_CONFIG,pkg-config) - -EXT = -std=gnu99 -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) -ifeq ($(LP64), 1) - libdir_relative_temp = lib64 -else - libdir_relative_temp = lib -endif - -libdir_relative ?= $(libdir_relative_temp) -prefix ?= /usr/local -libdir = $(prefix)/$(libdir_relative) -man_dir = $(prefix)/share/man -man_dir_SQ = '$(subst ','\'',$(man_dir))' -pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ - --variable pc_path pkg-config | tr ":" " ")) -includedir_relative = traceevent -includedir = $(prefix)/include/$(includedir_relative) -includedir_SQ = '$(subst ','\'',$(includedir))' - -export man_dir man_dir_SQ INSTALL -export DESTDIR DESTDIR_SQ -export EVENT_PARSE_VERSION - -include ../../scripts/Makefile.include - -# copy a bit from Linux kbuild - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -export prefix libdir src obj - -# Shell quotes -libdir_SQ = $(subst ','\'',$(libdir)) -libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) - -CONFIG_INCLUDES = -CONFIG_LIBS = -CONFIG_FLAGS = - -VERSION = $(EP_VERSION) -PATCHLEVEL = $(EP_PATCHLEVEL) -EXTRAVERSION = $(EP_EXTRAVERSION) - -OBJ = $@ -N = - -EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) - -LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) -LIB_INSTALL = libtraceevent.a libtraceevent.so* -LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) - -INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -# Append required CFLAGS -override CFLAGS += -fPIC -override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) -D_GNU_SOURCE - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - -# Disable command line variables (CFLAGS) override from top -# level Makefile (perf), otherwise build Makefile will get -# the same command line setup. -MAKEOVERRIDES= - -export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj - -TE_IN := $(OUTPUT)libtraceevent-in.o -LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) - -CMD_TARGETS = $(LIB_TARGET) - -TARGETS = $(CMD_TARGETS) - -all: all_cmd plugins - -all_cmd: $(CMD_TARGETS) - -$(TE_IN): force - $(Q)$(MAKE) $(build)=libtraceevent - -$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) - $(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ - @ln -sf $(@F) $(OUTPUT)libtraceevent.so - @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) - -$(OUTPUT)libtraceevent.a: $(TE_IN) - $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ - -$(OUTPUT)%.so: $(OUTPUT)%-in.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ - -define make_version.h - (echo '/* This file is automatically generated. Do not modify. */'; \ - echo \#define VERSION_CODE $(shell \ - expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ - echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ - echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ - echo '#define FILE_VERSION '$(FILE_VERSION); \ - ) > $1 -endef - -define update_version.h - ($(call make_version.h, $@.tmp); \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -ep_version.h: force - $(Q)$(N)$(call update_version.h) - -VERSION_FILES = ep_version.h - -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -tags: force - $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ - --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' - -TAGS: force - $(RM) TAGS - find . -name '*.[ch]' | xargs etags \ - --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' - -define do_install_mkdir - if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ - fi -endef - -define do_install - $(call do_install_mkdir,$2); \ - $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' -endef - -PKG_CONFIG_SOURCE_FILE = libtraceevent.pc -PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE)) -define do_install_pkgconfig_file - if [ -n "${pkgconfig_dir}" ]; then \ - cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \ - sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \ - $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ - else \ - (echo Failed to locate pkg-config directory) 1>&2; \ - fi -endef - -install_lib: all_cmd install_plugins install_headers install_pkgconfig - $(call QUIET_INSTALL, $(LIB_TARGET)) \ - $(call do_install_mkdir,$(libdir_SQ)); \ - cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) - -install_pkgconfig: - $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ - $(call do_install_pkgconfig_file,$(prefix)) - -install_headers: - $(call QUIET_INSTALL, traceevent_headers) \ - $(call do_install,event-parse.h,$(includedir_SQ),644); \ - $(call do_install,event-utils.h,$(includedir_SQ),644); \ - $(call do_install,trace-seq.h,$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(includedir_SQ),644); - -install: install_lib - -clean: clean_plugins - $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ - $(RM) TRACEEVENT-CFLAGS tags TAGS; \ - $(RM) $(PKG_CONFIG_FILE) - -PHONY += doc -doc: - $(call descend,Documentation) - -PHONY += doc-clean -doc-clean: - $(call descend,Documentation,clean) - -PHONY += doc-install -doc-install: - $(call descend,Documentation,install) - -PHONY += doc-uninstall -doc-uninstall: - $(call descend,Documentation,uninstall) - -PHONY += help -help: - @echo 'Possible targets:' - @echo'' - @echo ' all - default, compile the library and the'\ - 'plugins' - @echo ' plugins - compile the plugins' - @echo ' install - install the library, the plugins,'\ - 'the header and pkgconfig files' - @echo ' clean - clean the library and the plugins object files' - @echo ' doc - compile the documentation files - man'\ - 'and html pages, in the Documentation directory' - @echo ' doc-clean - clean the documentation files' - @echo ' doc-install - install the man pages' - @echo ' doc-uninstall - uninstall the man pages' - @echo'' - -PHONY += plugins -plugins: - $(call descend,plugins) - -PHONY += install_plugins -install_plugins: - $(call descend,plugins,install) - -PHONY += clean_plugins -clean_plugins: - $(call descend,plugins,clean) - -force: - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. -.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c deleted file mode 100644 index f8361e45d446..000000000000 --- a/tools/lib/traceevent/event-parse-api.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" - -/** - * tep_get_event - returns the event with the given index - * @tep: a handle to the tep_handle - * @index: index of the requested event, in the range 0 .. nr_events - * - * This returns pointer to the element of the events array with the given index - * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned. - */ -struct tep_event *tep_get_event(struct tep_handle *tep, int index) -{ - if (tep && tep->events && index < tep->nr_events) - return tep->events[index]; - - return NULL; -} - -/** - * tep_get_first_event - returns the first event in the events array - * @tep: a handle to the tep_handle - * - * This returns pointer to the first element of the events array - * If @tep is NULL, NULL is returned. - */ -struct tep_event *tep_get_first_event(struct tep_handle *tep) -{ - return tep_get_event(tep, 0); -} - -/** - * tep_get_events_count - get the number of defined events - * @tep: a handle to the tep_handle - * - * This returns number of elements in event array - * If @tep is NULL, 0 is returned. - */ -int tep_get_events_count(struct tep_handle *tep) -{ - if (tep) - return tep->nr_events; - return 0; -} - -/** - * tep_set_flag - set event parser flag - * @tep: a handle to the tep_handle - * @flag: flag, or combination of flags to be set - * can be any combination from enum tep_flag - * - * This sets a flag or combination of flags from enum tep_flag - */ -void tep_set_flag(struct tep_handle *tep, int flag) -{ - if (tep) - tep->flags |= flag; -} - -/** - * tep_clear_flag - clear event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be cleared - * - * This clears a tep flag - */ -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - tep->flags &= ~flag; -} - -/** - * tep_test_flag - check the state of event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be checked - * - * This returns the state of the requested tep flag. - * Returns: true if the flag is set, false otherwise. - */ -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - return tep->flags & flag; - return false; -} - -__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data) -{ - unsigned short swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 8) | - ((data & (0xffULL << 8)) >> 8); - - return swap; -} - -__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data) -{ - unsigned int swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 24) | - ((data & (0xffULL << 8)) << 8) | - ((data & (0xffULL << 16)) >> 8) | - ((data & (0xffULL << 24)) >> 24); - - return swap; -} - -__hidden unsigned long long -data2host8(struct tep_handle *tep, unsigned long long data) -{ - unsigned long long swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 56) | - ((data & (0xffULL << 8)) << 40) | - ((data & (0xffULL << 16)) << 24) | - ((data & (0xffULL << 24)) << 8) | - ((data & (0xffULL << 32)) >> 8) | - ((data & (0xffULL << 40)) >> 24) | - ((data & (0xffULL << 48)) >> 40) | - ((data & (0xffULL << 56)) >> 56); - - return swap; -} - -/** - * tep_get_header_page_size - get size of the header page - * @tep: a handle to the tep_handle - * - * This returns size of the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_size_size; - return 0; -} - -/** - * tep_get_header_timestamp_size - get size of the timestamp in the header page - * @tep: a handle to the tep_handle - * - * This returns size of the timestamp in the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_timestamp_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_ts_size; - return 0; -} - -/** - * tep_get_cpus - get the number of CPUs - * @tep: a handle to the tep_handle - * - * This returns the number of CPUs - * If @tep is NULL, 0 is returned. - */ -int tep_get_cpus(struct tep_handle *tep) -{ - if (tep) - return tep->cpus; - return 0; -} - -/** - * tep_set_cpus - set the number of CPUs - * @tep: a handle to the tep_handle - * - * This sets the number of CPUs - */ -void tep_set_cpus(struct tep_handle *tep, int cpus) -{ - if (tep) - tep->cpus = cpus; -} - -/** - * tep_get_long_size - get the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a long integer on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_long_size(struct tep_handle *tep) -{ - if (tep) - return tep->long_size; - return 0; -} - -/** - * tep_set_long_size - set the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * @size: size, in bytes, of a long integer - * - * This sets the size of a long integer on the traced machine - */ -void tep_set_long_size(struct tep_handle *tep, int long_size) -{ - if (tep) - tep->long_size = long_size; -} - -/** - * tep_get_page_size - get the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a memory page on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->page_size; - return 0; -} - -/** - * tep_set_page_size - set the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * @_page_size: size of a memory page, in bytes - * - * This sets the size of a memory page on the traced machine - */ -void tep_set_page_size(struct tep_handle *tep, int _page_size) -{ - if (tep) - tep->page_size = _page_size; -} - -/** - * tep_is_file_bigendian - return the endian of the file - * @tep: a handle to the tep_handle - * - * This returns true if the file is in big endian order - * If @tep is NULL, false is returned. - */ -bool tep_is_file_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->file_bigendian == TEP_BIG_ENDIAN); - return false; -} - -/** - * tep_set_file_bigendian - set if the file is in big endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the file is in big endian order - * - * This sets if the file is in big endian order - */ -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->file_bigendian = endian; -} - -/** - * tep_is_local_bigendian - return the endian of the saved local machine - * @tep: a handle to the tep_handle - * - * This returns true if the saved local machine in @tep is big endian. - * If @tep is NULL, false is returned. - */ -bool tep_is_local_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->host_bigendian == TEP_BIG_ENDIAN); - return 0; -} - -/** - * tep_set_local_bigendian - set the stored local machine endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the local host has big endian order - * - * This sets the endian order for the local machine. - */ -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->host_bigendian = endian; -} - -/** - * tep_is_old_format - get if an old kernel is used - * @tep: a handle to the tep_handle - * - * This returns true, if an old kernel is used to generate the tracing events or - * false if a new kernel is used. Old kernels did not have header page info. - * If @tep is NULL, false is returned. - */ -bool tep_is_old_format(struct tep_handle *tep) -{ - if (tep) - return tep->old_format; - return false; -} - -/** - * tep_set_test_filters - set a flag to test a filter string - * @tep: a handle to the tep_handle - * @test_filters: the new value of the test_filters flag - * - * This sets a flag to test a filter string. If this flag is set, when - * tep_filter_add_filter_str() API as called,it will print the filter string - * instead of adding it. - */ -void tep_set_test_filters(struct tep_handle *tep, int test_filters) -{ - if (tep) - tep->test_filters = test_filters; -} diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h deleted file mode 100644 index fd4bbcfbb849..000000000000 --- a/tools/lib/traceevent/event-parse-local.h +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#ifndef _PARSE_EVENTS_INT_H -#define _PARSE_EVENTS_INT_H - -struct tep_cmdline; -struct cmdline_list; -struct func_map; -struct func_list; -struct event_handler; -struct func_resolver; -struct tep_plugins_dir; - -#define __hidden __attribute__((visibility ("hidden"))) - -struct tep_handle { - int ref_count; - - int header_page_ts_offset; - int header_page_ts_size; - int header_page_size_offset; - int header_page_size_size; - int header_page_data_offset; - int header_page_data_size; - int header_page_overwrite; - - enum tep_endian file_bigendian; - enum tep_endian host_bigendian; - - int old_format; - - int cpus; - int long_size; - int page_size; - - struct tep_cmdline *cmdlines; - struct cmdline_list *cmdlist; - int cmdline_count; - - struct func_map *func_map; - struct func_resolver *func_resolver; - struct func_list *funclist; - unsigned int func_count; - - struct printk_map *printk_map; - struct printk_list *printklist; - unsigned int printk_count; - - struct tep_event **events; - int nr_events; - struct tep_event **sort_events; - enum tep_event_sort_type last_type; - - int type_offset; - int type_size; - - int pid_offset; - int pid_size; - - int pc_offset; - int pc_size; - - int flags_offset; - int flags_size; - - int ld_offset; - int ld_size; - - int test_filters; - - int flags; - - struct tep_format_field *bprint_ip_field; - struct tep_format_field *bprint_fmt_field; - struct tep_format_field *bprint_buf_field; - - struct event_handler *handlers; - struct tep_function_handler *func_handlers; - - /* cache */ - struct tep_event *last_event; - - struct tep_plugins_dir *plugins_dir; -}; - -enum tep_print_parse_type { - PRINT_FMT_STRING, - PRINT_FMT_ARG_DIGIT, - PRINT_FMT_ARG_POINTER, - PRINT_FMT_ARG_STRING, -}; - -struct tep_print_parse { - struct tep_print_parse *next; - - char *format; - int ls; - enum tep_print_parse_type type; - struct tep_print_arg *arg; - struct tep_print_arg *len_as_arg; -}; - -void free_tep_event(struct tep_event *event); -void free_tep_format_field(struct tep_format_field *field); -void free_tep_plugin_paths(struct tep_handle *tep); - -unsigned short data2host2(struct tep_handle *tep, unsigned short data); -unsigned int data2host4(struct tep_handle *tep, unsigned int data); -unsigned long long data2host8(struct tep_handle *tep, unsigned long long data); - -/* access to the internal parser */ -int peek_char(void); -void init_input_buf(const char *buf, unsigned long long size); -unsigned long long get_input_buf_ptr(void); -const char *get_input_buf(void); -enum tep_event_type read_token(char **tok); -void free_token(char *tok); - -#endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c deleted file mode 100644 index 8e24c4c78c7f..000000000000 --- a/tools/lib/traceevent/event-parse.c +++ /dev/null @@ -1,7624 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - * - * The parts for function graph printing was taken and modified from the - * Linux Kernel that were written by - * - Copyright (C) 2009 Frederic Weisbecker, - * Frederic Weisbecker gave his permission to relicense the code to - * the Lesser General Public License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "event-parse.h" - -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -static const char *input_buf; -static unsigned long long input_buf_ptr; -static unsigned long long input_buf_siz; - -static int is_flag_field; -static int is_symbolic_field; - -static int show_warning = 1; - -#define do_warning(fmt, ...) \ - do { \ - if (show_warning) \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -#define do_warning_event(event, fmt, ...) \ - do { \ - if (!show_warning) \ - continue; \ - \ - if (event) \ - warning("[%s:%s] " fmt, event->system, \ - event->name, ##__VA_ARGS__); \ - else \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -/** - * init_input_buf - init buffer for parsing - * @buf: buffer to parse - * @size: the size of the buffer - * - * Initializes the internal buffer that tep_read_token() will parse. - */ -__hidden void init_input_buf(const char *buf, unsigned long long size) -{ - input_buf = buf; - input_buf_siz = size; - input_buf_ptr = 0; -} - -__hidden const char *get_input_buf(void) -{ - return input_buf; -} - -__hidden unsigned long long get_input_buf_ptr(void) -{ - return input_buf_ptr; -} - -struct event_handler { - struct event_handler *next; - int id; - const char *sys_name; - const char *event_name; - tep_event_handler_func func; - void *context; -}; - -struct func_params { - struct func_params *next; - enum tep_func_arg_type type; -}; - -struct tep_function_handler { - struct tep_function_handler *next; - enum tep_func_arg_type ret_type; - char *name; - tep_func_handler func; - struct func_params *params; - int nr_args; -}; - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg); - -static void free_func_handle(struct tep_function_handler *func); - -void breakpoint(void) -{ - static int x; - x++; -} - -static struct tep_print_arg *alloc_arg(void) -{ - return calloc(1, sizeof(struct tep_print_arg)); -} - -struct tep_cmdline { - char *comm; - int pid; -}; - -static int cmdline_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - - if (ca->pid < cb->pid) - return -1; - if (ca->pid > cb->pid) - return 1; - - return 0; -} - -/* Looking for where to place the key */ -static int cmdline_slot_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - const struct tep_cmdline *cb1 = cb + 1; - - if (ca->pid < cb->pid) - return -1; - - if (ca->pid > cb->pid) { - if (ca->pid <= cb1->pid) - return 0; - return 1; - } - - return 0; -} - -struct cmdline_list { - struct cmdline_list *next; - char *comm; - int pid; -}; - -static int cmdline_init(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist = tep->cmdlist; - struct cmdline_list *item; - struct tep_cmdline *cmdlines; - int i; - - cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count); - if (!cmdlines) - return -1; - - i = 0; - while (cmdlist) { - cmdlines[i].pid = cmdlist->pid; - cmdlines[i].comm = cmdlist->comm; - i++; - item = cmdlist; - cmdlist = cmdlist->next; - free(item); - } - - qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - - tep->cmdlines = cmdlines; - tep->cmdlist = NULL; - - return 0; -} - -static const char *find_cmdline(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return ""; - - if (!tep->cmdlines && cmdline_init(tep)) - return ""; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return comm->comm; - return "<...>"; -} - -/** - * tep_is_pid_registered - return if a pid has a cmdline registered - * @tep: a handle to the trace event parser context - * @pid: The pid to check if it has a cmdline registered with. - * - * Returns true if the pid has a cmdline mapped to it - * false otherwise. - */ -bool tep_is_pid_registered(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return true; - - if (!tep->cmdlines && cmdline_init(tep)) - return false; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return true; - return false; -} - -/* - * If the command lines have been converted to an array, then - * we must add this pid. This is much slower than when cmdlines - * are added before the array is initialized. - */ -static int add_new_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct tep_cmdline *cmdlines = tep->cmdlines; - struct tep_cmdline *cmdline; - struct tep_cmdline key; - char *new_comm; - int cnt; - - if (!pid) - return 0; - - /* avoid duplicates */ - key.pid = pid; - - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - if (cmdline) { - if (!override) { - errno = EEXIST; - return -1; - } - new_comm = strdup(comm); - if (!new_comm) { - errno = ENOMEM; - return -1; - } - free(cmdline->comm); - cmdline->comm = new_comm; - - return 0; - } - - cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1)); - if (!cmdlines) { - errno = ENOMEM; - return -1; - } - tep->cmdlines = cmdlines; - - key.comm = strdup(comm); - if (!key.comm) { - errno = ENOMEM; - return -1; - } - - if (!tep->cmdline_count) { - /* no entries yet */ - tep->cmdlines[0] = key; - tep->cmdline_count++; - return 0; - } - - /* Now find where we want to store the new cmdline */ - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1, - sizeof(*tep->cmdlines), cmdline_slot_cmp); - - cnt = tep->cmdline_count; - if (cmdline) { - /* cmdline points to the one before the spot we want */ - cmdline++; - cnt -= cmdline - tep->cmdlines; - - } else { - /* The new entry is either before or after the list */ - if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) { - tep->cmdlines[tep->cmdline_count++] = key; - return 0; - } - cmdline = &tep->cmdlines[0]; - } - memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline))); - *cmdline = key; - - tep->cmdline_count++; - - return 0; -} - -static int _tep_register_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct cmdline_list *item; - - if (tep->cmdlines) - return add_new_comm(tep, comm, pid, override); - - item = malloc(sizeof(*item)); - if (!item) - return -1; - - if (comm) - item->comm = strdup(comm); - else - item->comm = strdup("<...>"); - if (!item->comm) { - free(item); - return -1; - } - item->pid = pid; - item->next = tep->cmdlist; - - tep->cmdlist = item; - tep->cmdline_count++; - - return 0; -} - -/** - * tep_register_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, -1 is returned and errno is set to EEXIST - */ -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid) -{ - return _tep_register_comm(tep, comm, pid, false); -} - -/** - * tep_override_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, the command string is udapted with the new one - */ -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid) -{ - if (!tep->cmdlines && cmdline_init(tep)) { - errno = ENOMEM; - return -1; - } - return _tep_register_comm(tep, comm, pid, true); -} - -struct func_map { - unsigned long long addr; - char *func; - char *mod; -}; - -struct func_list { - struct func_list *next; - unsigned long long addr; - char *func; - char *mod; -}; - -static int func_cmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if (fa->addr < fb->addr) - return -1; - if (fa->addr > fb->addr) - return 1; - - return 0; -} - -/* - * We are searching for a record in between, not an exact - * match. - */ -static int func_bcmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if ((fa->addr == fb->addr) || - - (fa->addr > fb->addr && - fa->addr < (fb+1)->addr)) - return 0; - - if (fa->addr < fb->addr) - return -1; - - return 1; -} - -static int func_map_init(struct tep_handle *tep) -{ - struct func_list *funclist; - struct func_list *item; - struct func_map *func_map; - int i; - - func_map = malloc(sizeof(*func_map) * (tep->func_count + 1)); - if (!func_map) - return -1; - - funclist = tep->funclist; - - i = 0; - while (funclist) { - func_map[i].func = funclist->func; - func_map[i].addr = funclist->addr; - func_map[i].mod = funclist->mod; - i++; - item = funclist; - funclist = funclist->next; - free(item); - } - - qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp); - - /* - * Add a special record at the end. - */ - func_map[tep->func_count].func = NULL; - func_map[tep->func_count].addr = 0; - func_map[tep->func_count].mod = NULL; - - tep->func_map = func_map; - tep->funclist = NULL; - - return 0; -} - -static struct func_map * -__find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *func; - struct func_map key; - - if (!tep->func_map) - func_map_init(tep); - - key.addr = addr; - - func = bsearch(&key, tep->func_map, tep->func_count, - sizeof(*tep->func_map), func_bcmp); - - return func; -} - -struct func_resolver { - tep_func_resolver_t *func; - void *priv; - struct func_map map; -}; - -/** - * tep_set_function_resolver - set an alternative function resolver - * @tep: a handle to the trace event parser context - * @resolver: function to be used - * @priv: resolver function private state. - * - * Some tools may have already a way to resolve kernel functions, allow them to - * keep using it instead of duplicating all the entries inside tep->funclist. - */ -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv) -{ - struct func_resolver *resolver = malloc(sizeof(*resolver)); - - if (resolver == NULL) - return -1; - - resolver->func = func; - resolver->priv = priv; - - free(tep->func_resolver); - tep->func_resolver = resolver; - - return 0; -} - -/** - * tep_reset_function_resolver - reset alternative function resolver - * @tep: a handle to the trace event parser context - * - * Stop using whatever alternative resolver was set, use the default - * one instead. - */ -void tep_reset_function_resolver(struct tep_handle *tep) -{ - free(tep->func_resolver); - tep->func_resolver = NULL; -} - -static struct func_map * -find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - if (!tep->func_resolver) - return __find_func(tep, addr); - - map = &tep->func_resolver->map; - map->mod = NULL; - map->addr = addr; - map->func = tep->func_resolver->func(tep->func_resolver->priv, - &map->addr, &map->mod); - if (map->func == NULL) - return NULL; - - return map; -} - -/** - * tep_find_function - find a function by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns a pointer to the function stored that has the given - * address. Note, the address does not have to be exact, it - * will select the function that would contain the address. - */ -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return NULL; - - return map->func; -} - -/** - * tep_find_function_address - find a function address by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns the address the function starts at. This can be used in - * conjunction with tep_find_function to print both the function - * name and the function offset. - */ -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return 0; - - return map->addr; -} - -/** - * tep_register_function - register a function with a given address - * @tep: a handle to the trace event parser context - * @function: the function name to register - * @addr: the address the function starts at - * @mod: the kernel module the function may be in (NULL for none) - * - * This registers a function name with an address and module. - * The @func passed in is duplicated. - */ -int tep_register_function(struct tep_handle *tep, char *func, - unsigned long long addr, char *mod) -{ - struct func_list *item = malloc(sizeof(*item)); - - if (!item) - return -1; - - item->next = tep->funclist; - item->func = strdup(func); - if (!item->func) - goto out_free; - - if (mod) { - item->mod = strdup(mod); - if (!item->mod) - goto out_free_func; - } else - item->mod = NULL; - item->addr = addr; - - tep->funclist = item; - tep->func_count++; - - return 0; - -out_free_func: - free(item->func); - item->func = NULL; -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_funcs - print out the stored functions - * @tep: a handle to the trace event parser context - * - * This prints out the stored functions. - */ -void tep_print_funcs(struct tep_handle *tep) -{ - int i; - - if (!tep->func_map) - func_map_init(tep); - - for (i = 0; i < (int)tep->func_count; i++) { - printf("%016llx %s", - tep->func_map[i].addr, - tep->func_map[i].func); - if (tep->func_map[i].mod) - printf(" [%s]\n", tep->func_map[i].mod); - else - printf("\n"); - } -} - -struct printk_map { - unsigned long long addr; - char *printk; -}; - -struct printk_list { - struct printk_list *next; - unsigned long long addr; - char *printk; -}; - -static int printk_cmp(const void *a, const void *b) -{ - const struct printk_map *pa = a; - const struct printk_map *pb = b; - - if (pa->addr < pb->addr) - return -1; - if (pa->addr > pb->addr) - return 1; - - return 0; -} - -static int printk_map_init(struct tep_handle *tep) -{ - struct printk_list *printklist; - struct printk_list *item; - struct printk_map *printk_map; - int i; - - printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1)); - if (!printk_map) - return -1; - - printklist = tep->printklist; - - i = 0; - while (printklist) { - printk_map[i].printk = printklist->printk; - printk_map[i].addr = printklist->addr; - i++; - item = printklist; - printklist = printklist->next; - free(item); - } - - qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp); - - tep->printk_map = printk_map; - tep->printklist = NULL; - - return 0; -} - -static struct printk_map * -find_printk(struct tep_handle *tep, unsigned long long addr) -{ - struct printk_map *printk; - struct printk_map key; - - if (!tep->printk_map && printk_map_init(tep)) - return NULL; - - key.addr = addr; - - printk = bsearch(&key, tep->printk_map, tep->printk_count, - sizeof(*tep->printk_map), printk_cmp); - - return printk; -} - -/** - * tep_register_print_string - register a string by its address - * @tep: a handle to the trace event parser context - * @fmt: the string format to register - * @addr: the address the string was located at - * - * This registers a string by the address it was stored in the kernel. - * The @fmt passed in is duplicated. - */ -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr) -{ - struct printk_list *item = malloc(sizeof(*item)); - char *p; - - if (!item) - return -1; - - item->next = tep->printklist; - item->addr = addr; - - /* Strip off quotes and '\n' from the end */ - if (fmt[0] == '"') - fmt++; - item->printk = strdup(fmt); - if (!item->printk) - goto out_free; - - p = item->printk + strlen(item->printk) - 1; - if (*p == '"') - *p = 0; - - p -= 2; - if (strcmp(p, "\\n") == 0) - *p = 0; - - tep->printklist = item; - tep->printk_count++; - - return 0; - -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_printk - print out the stored strings - * @tep: a handle to the trace event parser context - * - * This prints the string formats that were stored. - */ -void tep_print_printk(struct tep_handle *tep) -{ - int i; - - if (!tep->printk_map) - printk_map_init(tep); - - for (i = 0; i < (int)tep->printk_count; i++) { - printf("%016llx %s\n", - tep->printk_map[i].addr, - tep->printk_map[i].printk); - } -} - -static struct tep_event *alloc_event(void) -{ - return calloc(1, sizeof(struct tep_event)); -} - -static int add_event(struct tep_handle *tep, struct tep_event *event) -{ - int i; - struct tep_event **events = realloc(tep->events, sizeof(event) * - (tep->nr_events + 1)); - if (!events) - return -1; - - tep->events = events; - - for (i = 0; i < tep->nr_events; i++) { - if (tep->events[i]->id > event->id) - break; - } - if (i < tep->nr_events) - memmove(&tep->events[i + 1], - &tep->events[i], - sizeof(event) * (tep->nr_events - i)); - - tep->events[i] = event; - tep->nr_events++; - - event->tep = tep; - - return 0; -} - -static int event_item_type(enum tep_event_type type) -{ - switch (type) { - case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE: - return 1; - case TEP_EVENT_ERROR ... TEP_EVENT_DELIM: - default: - return 0; - } -} - -static void free_flag_sym(struct tep_print_flag_sym *fsym) -{ - struct tep_print_flag_sym *next; - - while (fsym) { - next = fsym->next; - free(fsym->value); - free(fsym->str); - free(fsym); - fsym = next; - } -} - -static void free_arg(struct tep_print_arg *arg) -{ - struct tep_print_arg *farg; - - if (!arg) - return; - - switch (arg->type) { - case TEP_PRINT_ATOM: - free(arg->atom.atom); - break; - case TEP_PRINT_FIELD: - free(arg->field.name); - break; - case TEP_PRINT_FLAGS: - free_arg(arg->flags.field); - free(arg->flags.delim); - free_flag_sym(arg->flags.flags); - break; - case TEP_PRINT_SYMBOL: - free_arg(arg->symbol.field); - free_flag_sym(arg->symbol.symbols); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - free_arg(arg->hex.field); - free_arg(arg->hex.size); - break; - case TEP_PRINT_INT_ARRAY: - free_arg(arg->int_array.field); - free_arg(arg->int_array.count); - free_arg(arg->int_array.el_size); - break; - case TEP_PRINT_TYPE: - free(arg->typecast.type); - free_arg(arg->typecast.item); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - free(arg->string.string); - break; - case TEP_PRINT_BITMASK: - free(arg->bitmask.bitmask); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - free(arg->dynarray.index); - break; - case TEP_PRINT_OP: - free(arg->op.op); - free_arg(arg->op.left); - free_arg(arg->op.right); - break; - case TEP_PRINT_FUNC: - while (arg->func.args) { - farg = arg->func.args; - arg->func.args = farg->next; - free_arg(farg); - } - break; - - case TEP_PRINT_NULL: - default: - break; - } - - free(arg); -} - -static enum tep_event_type get_type(int ch) -{ - if (ch == '\n') - return TEP_EVENT_NEWLINE; - if (isspace(ch)) - return TEP_EVENT_SPACE; - if (isalnum(ch) || ch == '_') - return TEP_EVENT_ITEM; - if (ch == '\'') - return TEP_EVENT_SQUOTE; - if (ch == '"') - return TEP_EVENT_DQUOTE; - if (!isprint(ch)) - return TEP_EVENT_NONE; - if (ch == '(' || ch == ')' || ch == ',') - return TEP_EVENT_DELIM; - - return TEP_EVENT_OP; -} - -static int __read_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr++]; -} - -/** - * peek_char - peek at the next character that will be read - * - * Returns the next character read, or -1 if end of buffer. - */ -__hidden int peek_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr]; -} - -static int extend_token(char **tok, char *buf, int size) -{ - char *newtok = realloc(*tok, size); - - if (!newtok) { - free(*tok); - *tok = NULL; - return -1; - } - - if (!*tok) - strcpy(newtok, buf); - else - strcat(newtok, buf); - *tok = newtok; - - return 0; -} - -static enum tep_event_type force_token(const char *str, char **tok); - -static enum tep_event_type __read_token(char **tok) -{ - char buf[BUFSIZ]; - int ch, last_ch, quote_ch, next_ch; - int i = 0; - int tok_size = 0; - enum tep_event_type type; - - *tok = NULL; - - - ch = __read_char(); - if (ch < 0) - return TEP_EVENT_NONE; - - type = get_type(ch); - if (type == TEP_EVENT_NONE) - return type; - - buf[i++] = ch; - - switch (type) { - case TEP_EVENT_NEWLINE: - case TEP_EVENT_DELIM: - if (asprintf(tok, "%c", ch) < 0) - return TEP_EVENT_ERROR; - - return type; - - case TEP_EVENT_OP: - switch (ch) { - case '-': - next_ch = peek_char(); - if (next_ch == '>') { - buf[i++] = __read_char(); - break; - } - /* fall through */ - case '+': - case '|': - case '&': - case '>': - case '<': - last_ch = ch; - ch = peek_char(); - if (ch != last_ch) - goto test_equal; - buf[i++] = __read_char(); - switch (last_ch) { - case '>': - case '<': - goto test_equal; - default: - break; - } - break; - case '!': - case '=': - goto test_equal; - default: /* what should we do instead? */ - break; - } - buf[i] = 0; - *tok = strdup(buf); - return type; - - test_equal: - ch = peek_char(); - if (ch == '=') - buf[i++] = __read_char(); - goto out; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - /* don't keep quotes */ - i--; - quote_ch = ch; - last_ch = 0; - concat: - do { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - last_ch = ch; - ch = __read_char(); - buf[i++] = ch; - /* the '\' '\' will cancel itself */ - if (ch == '\\' && last_ch == '\\') - last_ch = 0; - } while (ch != quote_ch || last_ch == '\\'); - /* remove the last quote */ - i--; - - /* - * For strings (double quotes) check the next token. - * If it is another string, concatinate the two. - */ - if (type == TEP_EVENT_DQUOTE) { - unsigned long long save_input_buf_ptr = input_buf_ptr; - - do { - ch = __read_char(); - } while (isspace(ch)); - if (ch == '"') - goto concat; - input_buf_ptr = save_input_buf_ptr; - } - - goto out; - - case TEP_EVENT_ERROR ... TEP_EVENT_SPACE: - case TEP_EVENT_ITEM: - default: - break; - } - - while (get_type(peek_char()) == type) { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - ch = __read_char(); - buf[i++] = ch; - } - - out: - buf[i] = 0; - if (extend_token(tok, buf, tok_size + i + 1) < 0) - return TEP_EVENT_NONE; - - if (type == TEP_EVENT_ITEM) { - /* - * Older versions of the kernel has a bug that - * creates invalid symbols and will break the mac80211 - * parsing. This is a work around to that bug. - * - * See Linux kernel commit: - * 811cb50baf63461ce0bdb234927046131fc7fa8b - */ - if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\"%s\" ", tok); - } else if (strcmp(*tok, "STA_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" sta:%pM\" ", tok); - } else if (strcmp(*tok, "VIF_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" vif:%p(%d)\" ", tok); - } - } - - return type; -} - -static enum tep_event_type force_token(const char *str, char **tok) -{ - const char *save_input_buf; - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - enum tep_event_type type; - - /* save off the current input pointers */ - save_input_buf = input_buf; - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - init_input_buf(str, strlen(str)); - - type = __read_token(tok); - - /* reset back to original token */ - input_buf = save_input_buf; - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - - return type; -} - -/** - * free_token - free a token returned by tep_read_token - * @token: the token to free - */ -__hidden void free_token(char *tok) -{ - if (tok) - free(tok); -} - -/** - * read_token - access to utilities to use the tep parser - * @tok: The token to return - * - * This will parse tokens from the string given by - * tep_init_data(). - * - * Returns the token type. - */ -__hidden enum tep_event_type read_token(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE) - return type; - - free_token(*tok); - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -/* no newline */ -static enum tep_event_type read_token_item(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE) - return type; - free_token(*tok); - *tok = NULL; - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -static int test_type(enum tep_event_type type, enum tep_event_type expect) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - return 0; -} - -static int test_type_token(enum tep_event_type type, const char *token, - enum tep_event_type expect, const char *expect_tok) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - - if (strcmp(token, expect_tok) != 0) { - do_warning("Error: expected '%s' but read '%s'", - expect_tok, token); - return -1; - } - return 0; -} - -static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok) -{ - enum tep_event_type type; - - if (newline_ok) - type = read_token(tok); - else - type = read_token_item(tok); - return test_type(type, expect); -} - -static int read_expect_type(enum tep_event_type expect, char **tok) -{ - return __read_expect_type(expect, tok, 1); -} - -static int __read_expected(enum tep_event_type expect, const char *str, - int newline_ok) -{ - enum tep_event_type type; - char *token; - int ret; - - if (newline_ok) - type = read_token(&token); - else - type = read_token_item(&token); - - ret = test_type_token(type, token, expect, str); - - free_token(token); - - return ret; -} - -static int read_expected(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 1); -} - -static int read_expected_item(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 0); -} - -static char *event_read_name(void) -{ - char *token; - - if (read_expected(TEP_EVENT_ITEM, "name") < 0) - return NULL; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return NULL; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - return token; - - fail: - free_token(token); - return NULL; -} - -static int event_read_id(void) -{ - char *token; - int id; - - if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - id = strtoul(token, NULL, 0); - free_token(token); - return id; - - fail: - free_token(token); - return -1; -} - -static int field_is_string(struct tep_format_field *field) -{ - if ((field->flags & TEP_FIELD_IS_ARRAY) && - (strstr(field->type, "char") || strstr(field->type, "u8") || - strstr(field->type, "s8"))) - return 1; - - return 0; -} - -static int field_is_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__data_loc", 10) == 0) - return 1; - - return 0; -} - -static int field_is_relative_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__rel_loc", 9) == 0) - return 1; - - return 0; -} - -static int field_is_long(struct tep_format_field *field) -{ - /* includes long long */ - if (strstr(field->type, "long")) - return 1; - - return 0; -} - -static unsigned int type_size(const char *name) -{ - /* This covers all TEP_FIELD_IS_STRING types. */ - static struct { - const char *type; - unsigned int size; - } table[] = { - { "u8", 1 }, - { "u16", 2 }, - { "u32", 4 }, - { "u64", 8 }, - { "s8", 1 }, - { "s16", 2 }, - { "s32", 4 }, - { "s64", 8 }, - { "char", 1 }, - { }, - }; - int i; - - for (i = 0; table[i].type; i++) { - if (!strcmp(table[i].type, name)) - return table[i].size; - } - - return 0; -} - -static int append(char **buf, const char *delim, const char *str) -{ - char *new_buf; - - new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1); - if (!new_buf) - return -1; - strcat(new_buf, delim); - strcat(new_buf, str); - *buf = new_buf; - return 0; -} - -static int event_read_fields(struct tep_event *event, struct tep_format_field **fields) -{ - struct tep_format_field *field = NULL; - enum tep_event_type type; - char *token; - char *last_token; - char *delim = " "; - int count = 0; - int ret; - - do { - unsigned int size_dynamic = 0; - - type = read_token(&token); - if (type == TEP_EVENT_NEWLINE) { - free_token(token); - return count; - } - - count++; - - if (test_type_token(type, token, TEP_EVENT_ITEM, "field")) - goto fail; - free_token(token); - - type = read_token(&token); - /* - * The ftrace fields may still use the "special" name. - * Just ignore it. - */ - if (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) { - free_token(token); - type = read_token(&token); - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0) - goto fail; - - free_token(token); - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - last_token = token; - - field = calloc(1, sizeof(*field)); - if (!field) - goto fail; - - field->event = event; - - /* read the rest of the type */ - for (;;) { - type = read_token(&token); - if (type == TEP_EVENT_ITEM || - (type == TEP_EVENT_OP && strcmp(token, "*") == 0) || - /* - * Some of the ftrace fields are broken and have - * an illegal "." in them. - */ - (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_OP && strcmp(token, ".") == 0)) { - - if (strcmp(token, "*") == 0) - field->flags |= TEP_FIELD_IS_POINTER; - - if (field->type) { - ret = append(&field->type, delim, last_token); - free(last_token); - if (ret < 0) - goto fail; - } else - field->type = last_token; - last_token = token; - delim = " "; - continue; - } - - /* Handle __attribute__((user)) */ - if ((type == TEP_EVENT_DELIM) && - strcmp("__attribute__", last_token) == 0 && - token[0] == '(') { - int depth = 1; - int ret; - - ret = append(&field->type, " ", last_token); - ret |= append(&field->type, "", "("); - if (ret < 0) - goto fail; - - delim = " "; - while ((type = read_token(&token)) != TEP_EVENT_NONE) { - if (type == TEP_EVENT_DELIM) { - if (token[0] == '(') - depth++; - else if (token[0] == ')') - depth--; - if (!depth) - break; - ret = append(&field->type, "", token); - delim = ""; - } else { - ret = append(&field->type, delim, token); - delim = " "; - } - if (ret < 0) - goto fail; - free(last_token); - last_token = token; - } - continue; - } - break; - } - - if (!field->type) { - do_warning_event(event, "%s: no type found", __func__); - goto fail; - } - field->name = field->alias = last_token; - - if (test_type(type, TEP_EVENT_OP)) - goto fail; - - if (strcmp(token, "[") == 0) { - enum tep_event_type last_type = type; - char *brackets = token; - - field->flags |= TEP_FIELD_IS_ARRAY; - - type = read_token(&token); - - if (type == TEP_EVENT_ITEM) - field->arraylen = strtoul(token, NULL, 0); - else - field->arraylen = 0; - - while (strcmp(token, "]") != 0) { - const char *delim; - - if (last_type == TEP_EVENT_ITEM && - type == TEP_EVENT_ITEM) - delim = " "; - else - delim = ""; - - last_type = type; - - ret = append(&brackets, delim, token); - if (ret < 0) { - free(brackets); - goto fail; - } - /* We only care about the last token */ - field->arraylen = strtoul(token, NULL, 0); - free_token(token); - type = read_token(&token); - if (type == TEP_EVENT_NONE) { - free(brackets); - do_warning_event(event, "failed to find token"); - goto fail; - } - } - - free_token(token); - - ret = append(&brackets, "", "]"); - if (ret < 0) { - free(brackets); - goto fail; - } - - /* add brackets to type */ - - type = read_token(&token); - /* - * If the next token is not an OP, then it is of - * the format: type [] item; - */ - if (type == TEP_EVENT_ITEM) { - ret = append(&field->type, " ", field->name); - if (ret < 0) { - free(brackets); - goto fail; - } - ret = append(&field->type, "", brackets); - - size_dynamic = type_size(field->name); - free_token(field->name); - field->name = field->alias = token; - type = read_token(&token); - } else { - ret = append(&field->type, "", brackets); - if (ret < 0) { - free(brackets); - goto fail; - } - } - free(brackets); - } - - if (field_is_string(field)) - field->flags |= TEP_FIELD_IS_STRING; - if (field_is_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC; - if (field_is_relative_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; - if (field_is_long(field)) - field->flags |= TEP_FIELD_IS_LONG; - - if (test_type_token(type, token, TEP_EVENT_OP, ";")) - goto fail; - free_token(token); - - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->offset = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->size = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (test_type_token(type, token, TEP_EVENT_ITEM, "signed")) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - if (strtoul(token, NULL, 0)) - field->flags |= TEP_FIELD_IS_SIGNED; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - - free_token(token); - - if (field->flags & TEP_FIELD_IS_ARRAY) { - if (field->arraylen) - field->elementsize = field->size / field->arraylen; - else if (field->flags & TEP_FIELD_IS_DYNAMIC) - field->elementsize = size_dynamic; - else if (field->flags & TEP_FIELD_IS_STRING) - field->elementsize = 1; - else if (field->flags & TEP_FIELD_IS_LONG) - field->elementsize = event->tep ? - event->tep->long_size : - sizeof(long); - } else - field->elementsize = field->size; - - *fields = field; - fields = &field->next; - - } while (1); - - return 0; - -fail: - free_token(token); -fail_expect: - if (field) { - free(field->type); - free(field->name); - free(field); - } - return -1; -} - -static int event_read_format(struct tep_event *event) -{ - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "format") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - free_token(token); - - ret = event_read_fields(event, &event->format.common_fields); - if (ret < 0) - return ret; - event->format.nr_common = ret; - - ret = event_read_fields(event, &event->format.fields); - if (ret < 0) - return ret; - event->format.nr_fields = ret; - - return 0; - - fail: - free_token(token); - return -1; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type); - -static enum tep_event_type -process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token; - - type = read_token(&token); - *tok = token; - - return process_arg_token(event, arg, tok, type); -} - -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok); - -/* - * For __print_symbolic() and __print_flags, we need to completely - * evaluate the first argument, which defines what to print next. - */ -static enum tep_event_type -process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - - type = process_arg(event, arg, tok); - - while (type == TEP_EVENT_OP) { - type = process_op(event, arg, tok); - } - - return type; -} - -static enum tep_event_type -process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg, *left, *right; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - left = alloc_arg(); - right = alloc_arg(); - - if (!arg || !left || !right) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* arg will be freed at out_free */ - free_arg(left); - free_arg(right); - goto out_free; - } - - arg->type = TEP_PRINT_OP; - arg->op.left = left; - arg->op.right = right; - - *tok = NULL; - type = process_arg(event, left, &token); - - again: - if (type == TEP_EVENT_ERROR) - goto out_free; - - /* Handle other operations in the arguments */ - if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) { - type = process_op(event, left, &token); - goto again; - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":")) - goto out_free; - - arg->op.op = token; - - type = process_arg(event, right, &token); - - top->op.right = arg; - - *tok = token; - return type; - -out_free: - /* Top may point to itself */ - top->op.right = NULL; - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_array(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* '*tok' is set to top->op.op. No need to free. */ - *tok = NULL; - return TEP_EVENT_ERROR; - } - - *tok = NULL; - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free; - - top->op.right = arg; - - free_token(token); - type = read_token_item(&token); - *tok = token; - - return type; - -out_free: - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static int get_op_prio(char *op) -{ - if (!op[1]) { - switch (op[0]) { - case '~': - case '!': - return 4; - case '*': - case '/': - case '%': - return 6; - case '+': - case '-': - return 7; - /* '>>' and '<<' are 8 */ - case '<': - case '>': - return 9; - /* '==' and '!=' are 10 */ - case '&': - return 11; - case '^': - return 12; - case '|': - return 13; - case '?': - return 16; - default: - do_warning("unknown op '%c'", op[0]); - return -1; - } - } else { - if (strcmp(op, "++") == 0 || - strcmp(op, "--") == 0) { - return 3; - } else if (strcmp(op, ">>") == 0 || - strcmp(op, "<<") == 0) { - return 8; - } else if (strcmp(op, ">=") == 0 || - strcmp(op, "<=") == 0) { - return 9; - } else if (strcmp(op, "==") == 0 || - strcmp(op, "!=") == 0) { - return 10; - } else if (strcmp(op, "&&") == 0) { - return 14; - } else if (strcmp(op, "||") == 0) { - return 15; - } else { - do_warning("unknown op '%s'", op); - return -1; - } - } -} - -static int set_op_prio(struct tep_print_arg *arg) -{ - - /* single ops are the greatest */ - if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL) - arg->op.prio = 0; - else - arg->op.prio = get_op_prio(arg->op.op); - - return arg->op.prio; -} - -/* Note, *tok does not get freed, but will most likely be saved */ -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *left, *right = NULL; - enum tep_event_type type; - char *token; - - /* the op is passed in via tok */ - token = *tok; - - if (arg->type == TEP_PRINT_OP && !arg->op.left) { - /* handle single op */ - if (token[1]) { - do_warning_event(event, "bad op token %s", token); - goto out_free; - } - switch (token[0]) { - case '~': - case '!': - case '+': - case '-': - break; - default: - do_warning_event(event, "bad op token %s", token); - goto out_free; - - } - - /* make an empty left */ - left = alloc_arg(); - if (!left) - goto out_warn_free; - - left->type = TEP_PRINT_NULL; - arg->op.left = left; - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - arg->op.right = right; - - /* do not free the token, it belongs to an op */ - *tok = NULL; - type = process_arg(event, right, tok); - - } else if (strcmp(token, "?") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_cond(event, arg, tok); - - } else if (strcmp(token, ">>") == 0 || - strcmp(token, "<<") == 0 || - strcmp(token, "&") == 0 || - strcmp(token, "|") == 0 || - strcmp(token, "&&") == 0 || - strcmp(token, "||") == 0 || - strcmp(token, "-") == 0 || - strcmp(token, "+") == 0 || - strcmp(token, "*") == 0 || - strcmp(token, "^") == 0 || - strcmp(token, "/") == 0 || - strcmp(token, "%") == 0 || - strcmp(token, "<") == 0 || - strcmp(token, ">") == 0 || - strcmp(token, "<=") == 0 || - strcmp(token, ">=") == 0 || - strcmp(token, "==") == 0 || - strcmp(token, "!=") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.right = NULL; - - if (set_op_prio(arg) == -1) { - event->flags |= TEP_EVENT_FL_FAILED; - /* arg->op.op (= token) will be freed at out_free */ - arg->op.op = NULL; - goto out_free; - } - - type = read_token_item(&token); - *tok = token; - - /* could just be a type pointer */ - if ((strcmp(arg->op.op, "*") == 0) && - type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) { - int ret; - - if (left->type != TEP_PRINT_ATOM) { - do_warning_event(event, "bad pointer type"); - goto out_free; - } - ret = append(&left->atom.atom, " ", "*"); - if (ret < 0) - goto out_warn_free; - - free(arg->op.op); - *arg = *left; - free(left); - - return type; - } - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - type = process_arg_token(event, right, tok, type); - if (type == TEP_EVENT_ERROR) { - free_arg(right); - /* token was freed in process_arg_token() via *tok */ - token = NULL; - goto out_free; - } - - if (right->type == TEP_PRINT_OP && - get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { - struct tep_print_arg tmp; - - /* rotate ops according to the priority */ - arg->op.right = right->op.left; - - tmp = *arg; - *arg = *right; - *right = tmp; - - arg->op.left = right; - } else { - arg->op.right = right; - } - - } else if (strcmp(token, "[") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_array(event, arg, tok); - - } else { - do_warning_event(event, "unknown op '%s'", token); - event->flags |= TEP_EVENT_FL_FAILED; - /* the arg is now the left side */ - goto out_free; - } - - if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) { - int prio; - - /* higher prios need to be closer to the root */ - prio = get_op_prio(*tok); - - if (prio > arg->op.prio) - return process_op(event, arg, tok); - - return process_op(event, right, tok); - } - - return type; - -out_warn_free: - do_warning_event(event, "%s: not enough memory!", __func__); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *field; - char *token; - - if (read_expected(TEP_EVENT_OP, "->") < 0) - goto out_err; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - field = token; - - arg->type = TEP_PRINT_FIELD; - arg->field.name = field; - - if (is_flag_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_FLAG; - is_flag_field = 0; - } else if (is_symbolic_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC; - is_symbolic_field = 0; - } - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static int alloc_and_process_delim(struct tep_event *event, char *next_token, - struct tep_print_arg **print_arg) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token; - int ret = 0; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - errno = ENOMEM; - return -1; - } - - type = process_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) { - errno = EINVAL; - ret = -1; - free_arg(field); - goto out_free_token; - } - - *print_arg = field; - -out_free_token: - free_token(token); - - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg); - -static unsigned long long -eval_type_str(unsigned long long val, const char *type, int pointer) -{ - int sign = 0; - char *ref; - int len; - - len = strlen(type); - - if (pointer) { - - if (type[len-1] != '*') { - do_warning("pointer expected with non pointer type"); - return val; - } - - ref = malloc(len); - if (!ref) { - do_warning("%s: not enough memory!", __func__); - return val; - } - memcpy(ref, type, len); - - /* chop off the " *" */ - ref[len - 2] = 0; - - val = eval_type_str(val, ref, 0); - free(ref); - return val; - } - - /* check if this is a pointer */ - if (type[len - 1] == '*') - return val; - - /* Try to figure out the arg size*/ - if (strncmp(type, "struct", 6) == 0) - /* all bets off */ - return val; - - if (strcmp(type, "u8") == 0) - return val & 0xff; - - if (strcmp(type, "u16") == 0) - return val & 0xffff; - - if (strcmp(type, "u32") == 0) - return val & 0xffffffff; - - if (strcmp(type, "u64") == 0 || - strcmp(type, "s64") == 0) - return val; - - if (strcmp(type, "s8") == 0) - return (unsigned long long)(char)val & 0xff; - - if (strcmp(type, "s16") == 0) - return (unsigned long long)(short)val & 0xffff; - - if (strcmp(type, "s32") == 0) - return (unsigned long long)(int)val & 0xffffffff; - - if (strncmp(type, "unsigned ", 9) == 0) { - sign = 0; - type += 9; - } - - if (strcmp(type, "char") == 0) { - if (sign) - return (unsigned long long)(char)val & 0xff; - else - return val & 0xff; - } - - if (strcmp(type, "short") == 0) { - if (sign) - return (unsigned long long)(short)val & 0xffff; - else - return val & 0xffff; - } - - if (strcmp(type, "int") == 0) { - if (sign) - return (unsigned long long)(int)val & 0xffffffff; - else - return val & 0xffffffff; - } - - return val; -} - -/* - * Try to figure out the type. - */ -static unsigned long long -eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer) -{ - if (arg->type != TEP_PRINT_TYPE) { - do_warning("expected type argument"); - return 0; - } - - return eval_type_str(val, arg->typecast.type, pointer); -} - -static int arg_num_eval(struct tep_print_arg *arg, long long *val) -{ - long long left, right; - int ret = 1; - - switch (arg->type) { - case TEP_PRINT_ATOM: - *val = strtoll(arg->atom.atom, NULL, 0); - break; - case TEP_PRINT_TYPE: - ret = arg_num_eval(arg->typecast.item, val); - if (!ret) - break; - *val = eval_type(*val, arg, 0); - break; - case TEP_PRINT_OP: - switch (arg->op.op[0]) { - case '|': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left || right; - else - *val = left | right; - break; - case '&': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left && right; - else - *val = left & right; - break; - case '<': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left < right; - break; - case '<': - *val = left << right; - break; - case '=': - *val = left <= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '>': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left > right; - break; - case '>': - *val = left >> right; - break; - case '=': - *val = left >= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '=': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - if (arg->op.op[1] != '=') { - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } else - *val = left == right; - break; - case '!': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - switch (arg->op.op[1]) { - case '=': - *val = left != right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '-': - /* check for negative */ - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left - right; - break; - case '+': - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left + right; - break; - case '~': - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = ~right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - ret = 0; - - } - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg) -{ - long long val; - static char buf[24]; - - switch (arg->type) { - case TEP_PRINT_ATOM: - return arg->atom.atom; - case TEP_PRINT_TYPE: - return arg_eval(arg->typecast.item); - case TEP_PRINT_OP: - if (!arg_num_eval(arg, &val)) - break; - sprintf(buf, "%lld", val); - return buf; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - break; - } - - return NULL; -} - -static enum tep_event_type -process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok) -{ - enum tep_event_type type; - struct tep_print_arg *arg = NULL; - struct tep_print_flag_sym *field; - char *token = *tok; - char *value; - - do { - free_token(token); - type = read_token_item(&token); - if (test_type_token(type, token, TEP_EVENT_OP, "{")) - break; - - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - field = calloc(1, sizeof(*field)); - if (!field) - goto out_free; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->value = strdup(value); - if (field->value == NULL) - goto out_free_field; - - free_arg(arg); - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "}")) - goto out_free_field; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->str = strdup(value); - if (field->str == NULL) - goto out_free_field; - free_arg(arg); - arg = NULL; - - *list = field; - list = &field->next; - - free_token(token); - type = read_token_item(&token); - } while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0); - - *tok = token; - return type; - -out_free_field: - free_flag_sym(field); -out_free: - free_arg(arg); - free_token(token); - *tok = NULL; - - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_FLAGS; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - /* Handle operations in the first argument */ - while (type == TEP_EVENT_OP) - type = process_op(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - free_token(token); - - arg->flags.field = field; - - type = read_token_item(&token); - if (event_item_type(type)) { - arg->flags.delim = token; - type = read_token_item(&token); - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_SYMBOL; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - - arg->symbol.field = field; - - type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex_common(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_print_arg_type type) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = type; - - if (alloc_and_process_delim(event, ",", &arg->hex.field)) - goto out; - - if (alloc_and_process_delim(event, ")", &arg->hex.size)) - goto free_field; - - return read_token_item(tok); - -free_field: - free_arg(arg->hex.field); - arg->hex.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX); -} - -static enum tep_event_type -process_hex_str(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR); -} - -static enum tep_event_type -process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_INT_ARRAY; - - if (alloc_and_process_delim(event, ",", &arg->int_array.field)) - goto out; - - if (alloc_and_process_delim(event, ",", &arg->int_array.count)) - goto free_field; - - if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) - goto free_size; - - return read_token_item(tok); - -free_size: - free_arg(arg->int_array.count); - arg->int_array.count = NULL; -free_field: - free_arg(arg->int_array.field); - arg->int_array.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_DYNAMIC_ARRAY; - - /* - * The item within the parenthesis is another field that holds - * the index into where the array starts. - */ - type = read_token(&token); - *tok = token; - if (type != TEP_EVENT_ITEM) - goto out_free; - - /* Find the field */ - - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(&token); - *tok = token; - if (type != TEP_EVENT_OP || strcmp(token, "[") != 0) - return type; - - free_token(token); - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return TEP_EVENT_ERROR; - } - - type = process_arg(event, arg, &token); - if (type == TEP_EVENT_ERROR) - goto out_free_arg; - - if (!test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free_arg; - - free_token(token); - type = read_token_item(tok); - return type; - - out_free_arg: - free_arg(arg); - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN; - - /* Find the field */ - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - free_token(token); - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *item_arg; - enum tep_event_type type; - char *token; - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(&token); - - /* - * If the next token is an item or another open paren, then - * this was a typecast. - */ - if (event_item_type(type) || - (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) { - - /* make this a typecast and contine */ - - /* prevous must be an atom */ - if (arg->type != TEP_PRINT_ATOM) { - do_warning_event(event, "previous needed to be TEP_PRINT_ATOM"); - goto out_free; - } - - item_arg = alloc_arg(); - if (!item_arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - goto out_free; - } - - arg->type = TEP_PRINT_TYPE; - arg->typecast.type = arg->atom.atom; - arg->typecast.item = item_arg; - type = process_arg_token(event, item_arg, &token, type); - - } - - *tok = token; - return type; - - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - - -static enum tep_event_type -process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_STRING; - arg->string.string = token; - arg->string.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_BITMASK; - arg->bitmask.bitmask = token; - arg->bitmask.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static struct tep_function_handler * -find_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - - if (!tep) - return NULL; - - for (func = tep->func_handlers; func; func = func->next) { - if (strcmp(func->name, func_name) == 0) - break; - } - - return func; -} - -static void remove_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - struct tep_function_handler **next; - - next = &tep->func_handlers; - while ((func = *next)) { - if (strcmp(func->name, func_name) == 0) { - *next = func->next; - free_func_handle(func); - break; - } - next = &func->next; - } -} - -static enum tep_event_type -process_func_handler(struct tep_event *event, struct tep_function_handler *func, - struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg **next_arg; - struct tep_print_arg *farg; - enum tep_event_type type; - char *token; - int i; - - arg->type = TEP_PRINT_FUNC; - arg->func.func = func; - - *tok = NULL; - - next_arg = &(arg->func.args); - for (i = 0; i < func->nr_args; i++) { - farg = alloc_arg(); - if (!farg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return TEP_EVENT_ERROR; - } - - type = process_arg(event, farg, &token); - if (i < (func->nr_args - 1)) { - if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) { - do_warning_event(event, - "Error: function '%s()' expects %d arguments but event %s only uses %d", - func->name, func->nr_args, - event->name, i + 1); - goto err; - } - } else { - if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) { - do_warning_event(event, - "Error: function '%s()' only expects %d arguments but event %s has more", - func->name, func->nr_args, event->name); - goto err; - } - } - - *next_arg = farg; - next_arg = &(farg->next); - free_token(token); - } - - type = read_token(&token); - *tok = token; - - return type; - -err: - free_arg(farg); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token = NULL; - - /* Handle __builtin_expect( cond, #) */ - type = process_arg(event, arg, &token); - - if (type != TEP_EVENT_DELIM || token[0] != ',') - goto out_free; - - free_token(token); - - /* We don't care what the second parameter is of the __builtin_expect() */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_function(struct tep_event *event, struct tep_print_arg *arg, - char *token, char **tok) -{ - struct tep_function_handler *func; - - if (strcmp(token, "__print_flags") == 0) { - free_token(token); - is_flag_field = 1; - return process_flags(event, arg, tok); - } - if (strcmp(token, "__print_symbolic") == 0) { - free_token(token); - is_symbolic_field = 1; - return process_symbols(event, arg, tok); - } - if (strcmp(token, "__print_hex") == 0) { - free_token(token); - return process_hex(event, arg, tok); - } - if (strcmp(token, "__print_hex_str") == 0) { - free_token(token); - return process_hex_str(event, arg, tok); - } - if (strcmp(token, "__print_array") == 0) { - free_token(token); - return process_int_array(event, arg, tok); - } - if (strcmp(token, "__get_str") == 0 || - strcmp(token, "__get_rel_str") == 0) { - free_token(token); - return process_str(event, arg, tok); - } - if (strcmp(token, "__get_bitmask") == 0 || - strcmp(token, "__get_rel_bitmask") == 0) { - free_token(token); - return process_bitmask(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array") == 0 || - strcmp(token, "__get_rel_dynamic_array") == 0) { - free_token(token); - return process_dynamic_array(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array_len") == 0 || - strcmp(token, "__get_rel_dynamic_array_len") == 0) { - free_token(token); - return process_dynamic_array_len(event, arg, tok); - } - if (strcmp(token, "__builtin_expect") == 0) { - free_token(token); - return process_builtin_expect(event, arg, tok); - } - - func = find_func_handler(event->tep, token); - if (func) { - free_token(token); - return process_func_handler(event, func, arg, tok); - } - - do_warning_event(event, "function %s not defined", token); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type) -{ - char *token; - char *atom; - - token = *tok; - - switch (type) { - case TEP_EVENT_ITEM: - if (strcmp(token, "REC") == 0) { - free_token(token); - type = process_entry(event, arg, &token); - break; - } - atom = token; - /* test the next token */ - type = read_token_item(&token); - - /* - * If the next token is a parenthesis, then this - * is a function. - */ - if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) { - free_token(token); - token = NULL; - /* this will free atom. */ - type = process_function(event, arg, atom, &token); - break; - } - /* atoms can be more than one token long */ - while (type == TEP_EVENT_ITEM) { - int ret; - - ret = append(&atom, " ", token); - if (ret < 0) { - free(atom); - *tok = NULL; - free_token(token); - return TEP_EVENT_ERROR; - } - free_token(token); - type = read_token_item(&token); - } - - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = atom; - break; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = token; - type = read_token_item(&token); - break; - case TEP_EVENT_DELIM: - if (strcmp(token, "(") == 0) { - free_token(token); - type = process_paren(event, arg, &token); - break; - } - case TEP_EVENT_OP: - /* handle single ops */ - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = NULL; - type = process_op(event, arg, &token); - - /* On error, the op is freed */ - if (type == TEP_EVENT_ERROR) - arg->op.op = NULL; - - /* return error type if errored */ - break; - - case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE: - default: - do_warning_event(event, "unexpected type %d", type); - return TEP_EVENT_ERROR; - } - *tok = token; - - return type; -} - -static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list) -{ - enum tep_event_type type = TEP_EVENT_ERROR; - struct tep_print_arg *arg; - char *token; - int args = 0; - - do { - if (type == TEP_EVENT_NEWLINE) { - type = read_token_item(&token); - continue; - } - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return -1; - } - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) { - free_token(token); - free_arg(arg); - return -1; - } - - *list = arg; - args++; - - if (type == TEP_EVENT_OP) { - type = process_op(event, arg, &token); - free_token(token); - if (type == TEP_EVENT_ERROR) { - *list = NULL; - free_arg(arg); - return -1; - } - list = &arg->next; - continue; - } - - if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) { - free_token(token); - *list = arg; - list = &arg->next; - continue; - } - break; - } while (type != TEP_EVENT_NONE); - - if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR) - free_token(token); - - return args; -} - -static int event_read_print(struct tep_event *event) -{ - enum tep_event_type type; - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "print") < 0) - return -1; - - if (read_expected(TEP_EVENT_ITEM, "fmt") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0) - goto fail; - - concat: - event->print_fmt.format = token; - event->print_fmt.args = NULL; - - /* ok to have no arg */ - type = read_token_item(&token); - - if (type == TEP_EVENT_NONE) - return 0; - - /* Handle concatenation of print lines */ - if (type == TEP_EVENT_DQUOTE) { - char *cat; - - if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) - goto fail; - free_token(token); - free_token(event->print_fmt.format); - event->print_fmt.format = NULL; - token = cat; - goto concat; - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto fail; - - free_token(token); - - ret = event_read_print_args(event, &event->print_fmt.args); - if (ret < 0) - return -1; - - return ret; - - fail: - free_token(token); - return -1; -} - -/** - * tep_find_common_field - return a common field by event - * @event: handle for the event - * @name: the name of the common field to return - * - * Returns a common field from the event by the given @name. - * This only searches the common fields and not all field. - */ -struct tep_format_field * -tep_find_common_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.common_fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_field - find a non-common field - * @event: handle for the event - * @name: the name of the non-common field - * - * Returns a non-common field by the given @name. - * This does not search common fields. - */ -struct tep_format_field * -tep_find_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_any_field - find any field by name - * @event: handle for the event - * @name: the name of the field - * - * Returns a field by the given @name. - * This searches the common field names first, then - * the non-common ones if a common one was not found. - */ -struct tep_format_field * -tep_find_any_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - format = tep_find_common_field(event, name); - if (format) - return format; - return tep_find_field(event, name); -} - -/** - * tep_read_number - read a number from data - * @tep: a handle to the trace event parser context - * @ptr: the raw data - * @size: the size of the data that holds the number - * - * Returns the number (converted to host) from the - * raw data. - */ -unsigned long long tep_read_number(struct tep_handle *tep, - const void *ptr, int size) -{ - unsigned long long val; - - switch (size) { - case 1: - return *(unsigned char *)ptr; - case 2: - return data2host2(tep, *(unsigned short *)ptr); - case 4: - return data2host4(tep, *(unsigned int *)ptr); - case 8: - memcpy(&val, (ptr), sizeof(unsigned long long)); - return data2host8(tep, val); - default: - /* BUG! */ - return 0; - } -} - -/** - * tep_read_number_field - read a number from data - * @field: a handle to the field - * @data: the raw data to read - * @value: the value to place the number in - * - * Reads raw data according to a field offset and size, - * and translates it into @value. - * - * Returns 0 on success, -1 otherwise. - */ -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value) -{ - if (!field) - return -1; - switch (field->size) { - case 1: - case 2: - case 4: - case 8: - *value = tep_read_number(field->event->tep, - data + field->offset, field->size); - return 0; - default: - return -1; - } -} - -static int get_common_info(struct tep_handle *tep, - const char *type, int *offset, int *size) -{ - struct tep_event *event; - struct tep_format_field *field; - - /* - * All events should have the same common elements. - * Pick any event to find where the type is; - */ - if (!tep->events) { - do_warning("no event_list!"); - return -1; - } - - event = tep->events[0]; - field = tep_find_common_field(event, type); - if (!field) - return -1; - - *offset = field->offset; - *size = field->size; - - return 0; -} - -static int __parse_common(struct tep_handle *tep, void *data, - int *size, int *offset, const char *name) -{ - int ret; - - if (!*size) { - ret = get_common_info(tep, name, offset, size); - if (ret < 0) - return ret; - } - return tep_read_number(tep, data + *offset, *size); -} - -static int trace_parse_common_type(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->type_size, &tep->type_offset, - "common_type"); -} - -static int parse_common_pid(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pid_size, &tep->pid_offset, - "common_pid"); -} - -static int parse_common_pc(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pc_size, &tep->pc_offset, - "common_preempt_count"); -} - -static int parse_common_flags(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->flags_size, &tep->flags_offset, - "common_flags"); -} - -static int parse_common_lock_depth(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_lock_depth"); -} - -static int parse_common_migrate_disable(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_migrate_disable"); -} - -static int events_id_cmp(const void *a, const void *b); - -/** - * tep_find_event - find an event by given id - * @tep: a handle to the trace event parser context - * @id: the id of the event - * - * Returns an event that has a given @id. - */ -struct tep_event *tep_find_event(struct tep_handle *tep, int id) -{ - struct tep_event **eventptr; - struct tep_event key; - struct tep_event *pkey = &key; - - /* Check cache first */ - if (tep->last_event && tep->last_event->id == id) - return tep->last_event; - - key.id = id; - - eventptr = bsearch(&pkey, tep->events, tep->nr_events, - sizeof(*tep->events), events_id_cmp); - - if (eventptr) { - tep->last_event = *eventptr; - return *eventptr; - } - - return NULL; -} - -/** - * tep_find_event_by_name - find an event by given name - * @tep: a handle to the trace event parser context - * @sys: the system name to search for - * @name: the name of the event to search for - * - * This returns an event with a given @name and under the system - * @sys. If @sys is NULL the first event with @name is returned. - */ -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, - const char *sys, const char *name) -{ - struct tep_event *event = NULL; - int i; - - if (tep->last_event && - strcmp(tep->last_event->name, name) == 0 && - (!sys || strcmp(tep->last_event->system, sys) == 0)) - return tep->last_event; - - for (i = 0; i < tep->nr_events; i++) { - event = tep->events[i]; - if (strcmp(event->name, name) == 0) { - if (!sys) - break; - if (strcmp(event->system, sys) == 0) - break; - } - } - if (i == tep->nr_events) - event = NULL; - - tep->last_event = event; - return event; -} - -static unsigned long long -eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - unsigned long long val = 0; - unsigned long long left, right; - struct tep_print_arg *typearg = NULL; - struct tep_print_arg *larg; - unsigned long offset; - unsigned int field_size; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return 0; - case TEP_PRINT_ATOM: - return strtoull(arg->atom.atom, NULL, 0); - case TEP_PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - if (!arg->field.field) - goto out_warning_field; - - } - /* must be a number */ - val = tep_read_number(tep, data + arg->field.field->offset, - arg->field.field->size); - break; - case TEP_PRINT_FLAGS: - case TEP_PRINT_SYMBOL: - case TEP_PRINT_INT_ARRAY: - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - break; - case TEP_PRINT_TYPE: - val = eval_num_arg(data, size, event, arg->typecast.item); - return eval_type(val, arg, 0); - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - return 0; - case TEP_PRINT_FUNC: { - struct trace_seq s; - trace_seq_init(&s); - val = process_defined_func(&s, data, size, event, arg); - trace_seq_destroy(&s); - return val; - } - case TEP_PRINT_OP: - if (strcmp(arg->op.op, "[") == 0) { - /* - * Arrays are special, since we don't want - * to read the arg as is. - */ - right = eval_num_arg(data, size, event, arg->op.right); - - /* handle typecasts */ - larg = arg->op.left; - while (larg->type == TEP_PRINT_TYPE) { - if (!typearg) - typearg = larg; - larg = larg->typecast.item; - } - - /* Default to long size */ - field_size = tep->long_size; - - switch (larg->type) { - case TEP_PRINT_DYNAMIC_ARRAY: - offset = tep_read_number(tep, - data + larg->dynarray.field->offset, - larg->dynarray.field->size); - if (larg->dynarray.field->elementsize) - field_size = larg->dynarray.field->elementsize; - /* - * The actual length of the dynamic array is stored - * in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - offset += right; - break; - case TEP_PRINT_FIELD: - if (!larg->field.field) { - larg->field.field = - tep_find_any_field(event, larg->field.name); - if (!larg->field.field) { - arg = larg; - goto out_warning_field; - } - } - field_size = larg->field.field->elementsize; - offset = larg->field.field->offset + - right * larg->field.field->elementsize; - break; - default: - goto default_op; /* oops, all bets off */ - } - val = tep_read_number(tep, - data + offset, field_size); - if (typearg) - val = eval_type(val, typearg, 1); - break; - } else if (strcmp(arg->op.op, "?") == 0) { - left = eval_num_arg(data, size, event, arg->op.left); - arg = arg->op.right; - if (left) - val = eval_num_arg(data, size, event, arg->op.left); - else - val = eval_num_arg(data, size, event, arg->op.right); - break; - } - default_op: - left = eval_num_arg(data, size, event, arg->op.left); - right = eval_num_arg(data, size, event, arg->op.right); - switch (arg->op.op[0]) { - case '!': - switch (arg->op.op[1]) { - case 0: - val = !right; - break; - case '=': - val = left != right; - break; - default: - goto out_warning_op; - } - break; - case '~': - val = ~right; - break; - case '|': - if (arg->op.op[1]) - val = left || right; - else - val = left | right; - break; - case '&': - if (arg->op.op[1]) - val = left && right; - else - val = left & right; - break; - case '<': - switch (arg->op.op[1]) { - case 0: - val = left < right; - break; - case '<': - val = left << right; - break; - case '=': - val = left <= right; - break; - default: - goto out_warning_op; - } - break; - case '>': - switch (arg->op.op[1]) { - case 0: - val = left > right; - break; - case '>': - val = left >> right; - break; - case '=': - val = left >= right; - break; - default: - goto out_warning_op; - } - break; - case '=': - if (arg->op.op[1] != '=') - goto out_warning_op; - - val = left == right; - break; - case '-': - val = left - right; - break; - case '+': - val = left + right; - break; - case '/': - val = left / right; - break; - case '%': - val = left % right; - break; - case '*': - val = left * right; - break; - default: - goto out_warning_op; - } - break; - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - val = (unsigned long long)(offset >> 16); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - /* Without [], we pass the address to the dynamic data */ - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - val = (unsigned long long)((unsigned long)data + offset); - break; - default: /* not sure what to do there */ - return 0; - } - return val; - -out_warning_op: - do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op); - return 0; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return 0; -} - -struct flag { - const char *name; - unsigned long long value; -}; - -static const struct flag flags[] = { - { "HI_SOFTIRQ", 0 }, - { "TIMER_SOFTIRQ", 1 }, - { "NET_TX_SOFTIRQ", 2 }, - { "NET_RX_SOFTIRQ", 3 }, - { "BLOCK_SOFTIRQ", 4 }, - { "IRQ_POLL_SOFTIRQ", 5 }, - { "TASKLET_SOFTIRQ", 6 }, - { "SCHED_SOFTIRQ", 7 }, - { "HRTIMER_SOFTIRQ", 8 }, - { "RCU_SOFTIRQ", 9 }, - - { "HRTIMER_NORESTART", 0 }, - { "HRTIMER_RESTART", 1 }, -}; - -static long long eval_flag(const char *flag) -{ - int i; - - /* - * Some flags in the format files do not get converted. - * If the flag is not numeric, see if it is something that - * we already know about. - */ - if (isdigit(flag[0])) - return strtoull(flag, NULL, 0); - - for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) - if (strcmp(flags[i].name, flag) == 0) - return flags[i].value; - - return -1LL; -} - -static void print_str_to_seq(struct trace_seq *s, const char *format, - int len_arg, const char *str) -{ - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); -} - -static void print_bitmask_to_seq(struct tep_handle *tep, - struct trace_seq *s, const char *format, - int len_arg, const void *data, int size) -{ - int nr_bits = size * 8; - int str_size = (nr_bits + 3) / 4; - int len = 0; - char buf[3]; - char *str; - int index; - int i; - - /* - * The kernel likes to put in commas every 32 bits, we - * can do the same. - */ - str_size += (nr_bits - 1) / 32; - - str = malloc(str_size + 1); - if (!str) { - do_warning("%s: not enough memory!", __func__); - return; - } - str[str_size] = 0; - - /* Start out with -2 for the two chars per byte */ - for (i = str_size - 2; i >= 0; i -= 2) { - /* - * data points to a bit mask of size bytes. - * In the kernel, this is an array of long words, thus - * endianness is very important. - */ - if (tep->file_bigendian) - index = size - (len + 1); - else - index = len; - - snprintf(buf, 3, "%02x", *((unsigned char *)data + index)); - memcpy(str + i, buf, 2); - len++; - if (!(len & 3) && i > 0) { - i--; - str[i] = ','; - } - } - - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); - - free(str); -} - -static void print_str_arg(struct trace_seq *s, void *data, int size, - struct tep_event *event, const char *format, - int len_arg, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - struct tep_print_flag_sym *flag; - struct tep_format_field *field; - struct printk_map *printk; - long long val, fval; - unsigned long long addr; - char *str; - unsigned char *hex; - int print; - int i, len; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return; - case TEP_PRINT_ATOM: - print_str_to_seq(s, format, len_arg, arg->atom.atom); - return; - case TEP_PRINT_FIELD: - field = arg->field.field; - if (!field) { - field = tep_find_any_field(event, arg->field.name); - if (!field) { - str = arg->field.name; - goto out_warning_field; - } - arg->field.field = field; - } - /* Zero sized fields, mean the rest of the data */ - len = field->size ? : size - field->offset; - - /* - * Some events pass in pointers. If this is not an array - * and the size is the same as long_size, assume that it - * is a pointer. - */ - if (!(field->flags & TEP_FIELD_IS_ARRAY) && - field->size == tep->long_size) { - - /* Handle heterogeneous recording and processing - * architectures - * - * CASE I: - * Traces recorded on 32-bit devices (32-bit - * addressing) and processed on 64-bit devices: - * In this case, only 32 bits should be read. - * - * CASE II: - * Traces recorded on 64 bit devices and processed - * on 32-bit devices: - * In this case, 64 bits must be read. - */ - addr = (tep->long_size == 8) ? - *(unsigned long long *)(data + field->offset) : - (unsigned long long)*(unsigned int *)(data + field->offset); - - /* Check if it matches a print format */ - printk = find_printk(tep, addr); - if (printk) - trace_seq_puts(s, printk->printk); - else - trace_seq_printf(s, "%llx", addr); - break; - } - str = malloc(len + 1); - if (!str) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return; - } - memcpy(str, data + field->offset, len); - str[len] = 0; - print_str_to_seq(s, format, len_arg, str); - free(str); - break; - case TEP_PRINT_FLAGS: - val = eval_num_arg(data, size, event, arg->flags.field); - print = 0; - for (flag = arg->flags.flags; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (!val && fval < 0) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - if (fval > 0 && (val & fval) == fval) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - print_str_to_seq(s, format, len_arg, flag->str); - print = 1; - val &= ~fval; - } - } - if (val) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - trace_seq_printf(s, "0x%llx", val); - } - break; - case TEP_PRINT_SYMBOL: - val = eval_num_arg(data, size, event, arg->symbol.field); - for (flag = arg->symbol.symbols; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (val == fval) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - } - if (!flag) - trace_seq_printf(s, "0x%llx", val); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - offset = tep_read_number(tep, - data + arg->hex.field->dynarray.field->offset, - arg->hex.field->dynarray.field->size); - hex = data + (offset & 0xffff); - } else { - field = arg->hex.field->field.field; - if (!field) { - str = arg->hex.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->hex.field->field.field = field; - } - hex = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->hex.size); - for (i = 0; i < len; i++) { - if (i && arg->type == TEP_PRINT_HEX) - trace_seq_putc(s, ' '); - trace_seq_printf(s, "%02x", hex[i]); - } - break; - - case TEP_PRINT_INT_ARRAY: { - void *num; - int el_size; - - if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - struct tep_format_field *field = - arg->int_array.field->dynarray.field; - offset = tep_read_number(tep, - data + field->offset, - field->size); - num = data + (offset & 0xffff); - } else { - field = arg->int_array.field->field.field; - if (!field) { - str = arg->int_array.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->int_array.field->field.field = field; - } - num = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->int_array.count); - el_size = eval_num_arg(data, size, event, - arg->int_array.el_size); - for (i = 0; i < len; i++) { - if (i) - trace_seq_putc(s, ' '); - - if (el_size == 1) { - trace_seq_printf(s, "%u", *(uint8_t *)num); - } else if (el_size == 2) { - trace_seq_printf(s, "%u", *(uint16_t *)num); - } else if (el_size == 4) { - trace_seq_printf(s, "%u", *(uint32_t *)num); - } else if (el_size == 8) { - trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); - } else { - trace_seq_printf(s, "BAD SIZE:%d 0x%x", - el_size, *(uint8_t *)num); - el_size = 1; - } - - num += el_size; - } - break; - } - case TEP_PRINT_TYPE: - break; - case TEP_PRINT_STRING: { - int str_offset; - - if (!arg->string.field) - arg->string.field = tep_find_any_field(event, arg->string.string); - if (!arg->string.field) - break; - - str_offset = data2host4(tep, - *(unsigned int *)(data + arg->string.field->offset)); - str_offset &= 0xffff; - if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) - str_offset += arg->string.field->offset + arg->string.field->size; - print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); - break; - } - case TEP_PRINT_BSTRING: - print_str_to_seq(s, format, len_arg, arg->string.string); - break; - case TEP_PRINT_BITMASK: { - int bitmask_offset; - int bitmask_size; - - if (!arg->bitmask.field) - arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); - if (!arg->bitmask.field) - break; - bitmask_offset = data2host4(tep, - *(unsigned int *)(data + arg->bitmask.field->offset)); - bitmask_size = bitmask_offset >> 16; - bitmask_offset &= 0xffff; - if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) - bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; - print_bitmask_to_seq(tep, s, format, len_arg, - data + bitmask_offset, bitmask_size); - break; - } - case TEP_PRINT_OP: - /* - * The only op for string should be ? : - */ - if (arg->op.op[0] != '?') - return; - val = eval_num_arg(data, size, event, arg->op.left); - if (val) - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.left); - else - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.right); - break; - case TEP_PRINT_FUNC: - process_defined_func(s, data, size, event, arg); - break; - default: - /* well... */ - break; - } - - return; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); -} - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_function_handler *func_handle = arg->func.func; - struct func_params *param; - unsigned long long *args; - unsigned long long ret; - struct tep_print_arg *farg; - struct trace_seq str; - struct save_str { - struct save_str *next; - char *str; - } *strings = NULL, *string; - int i; - - if (!func_handle->nr_args) { - ret = (*func_handle->func)(s, NULL); - goto out; - } - - farg = arg->func.args; - param = func_handle->params; - - ret = ULLONG_MAX; - args = malloc(sizeof(*args) * func_handle->nr_args); - if (!args) - goto out; - - for (i = 0; i < func_handle->nr_args; i++) { - switch (param->type) { - case TEP_FUNC_ARG_INT: - case TEP_FUNC_ARG_LONG: - case TEP_FUNC_ARG_PTR: - args[i] = eval_num_arg(data, size, event, farg); - break; - case TEP_FUNC_ARG_STRING: - trace_seq_init(&str); - print_str_arg(&str, data, size, event, "%s", -1, farg); - trace_seq_terminate(&str); - string = malloc(sizeof(*string)); - if (!string) { - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - string->next = strings; - string->str = strdup(str.buffer); - if (!string->str) { - free(string); - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - args[i] = (uintptr_t)string->str; - strings = string; - trace_seq_destroy(&str); - break; - default: - /* - * Something went totally wrong, this is not - * an input error, something in this code broke. - */ - do_warning_event(event, "Unexpected end of arguments\n"); - goto out_free; - } - farg = farg->next; - param = param->next; - } - - ret = (*func_handle->func)(s, args); -out_free: - free(args); - while (strings) { - string = strings; - strings = string->next; - free(string->str); - free(string); - } - - out: - /* TBD : handle return type here */ - return ret; -} - -static void free_args(struct tep_print_arg *args) -{ - struct tep_print_arg *next; - - while (args) { - next = args->next; - - free_arg(args); - args = next; - } -} - -static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - struct tep_format_field *field, *ip_field; - struct tep_print_arg *args, *arg, **next; - unsigned long long ip, val; - char *ptr; - void *bptr; - int vsize = 0; - - field = tep->bprint_buf_field; - ip_field = tep->bprint_ip_field; - - if (!field) { - field = tep_find_field(event, "buf"); - if (!field) { - do_warning_event(event, "can't find buffer field for binary printk"); - return NULL; - } - ip_field = tep_find_field(event, "ip"); - if (!ip_field) { - do_warning_event(event, "can't find ip field for binary printk"); - return NULL; - } - tep->bprint_buf_field = field; - tep->bprint_ip_field = ip_field; - } - - ip = tep_read_number(tep, data + ip_field->offset, ip_field->size); - - /* - * The first arg is the IP pointer. - */ - args = alloc_arg(); - if (!args) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - return NULL; - } - arg = args; - arg->next = NULL; - next = &arg->next; - - arg->type = TEP_PRINT_ATOM; - - if (asprintf(&arg->atom.atom, "%lld", ip) < 0) - goto out_free; - - /* skip the first "%ps: " */ - for (ptr = fmt + 5, bptr = data + field->offset; - bptr < data + size && *ptr; ptr++) { - int ls = 0; - - if (*ptr == '%') { - process_again: - ptr++; - switch (*ptr) { - case '%': - break; - case 'l': - ls++; - goto process_again; - case 'L': - ls = 2; - goto process_again; - case '0' ... '9': - goto process_again; - case '.': - goto process_again; - case 'z': - case 'Z': - ls = 1; - goto process_again; - case 'p': - ls = 1; - if (isalnum(ptr[1])) { - ptr++; - /* Check for special pointers */ - switch (*ptr) { - case 's': - case 'S': - case 'x': - break; - case 'f': - case 'F': - /* - * Pre-5.5 kernels use %pf and - * %pF for printing symbols - * while kernels since 5.5 use - * %pfw for fwnodes. So check - * %p[fF] isn't followed by 'w'. - */ - if (ptr[1] != 'w') - break; - /* fall through */ - default: - /* - * Older kernels do not process - * dereferenced pointers. - * Only process if the pointer - * value is a printable. - */ - if (isprint(*(char *)bptr)) - goto process_string; - } - } - /* fall through */ - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - switch (ls) { - case 0: - vsize = 4; - break; - case 1: - vsize = tep->long_size; - break; - case 2: - vsize = 8; - break; - default: - vsize = ls; /* ? */ - break; - } - /* fall through */ - case '*': - if (*ptr == '*') - vsize = 4; - - /* the pointers are always 4 bytes aligned */ - bptr = (void *)(((unsigned long)bptr + 3) & - ~3); - val = tep_read_number(tep, bptr, vsize); - bptr += vsize; - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_ATOM; - if (asprintf(&arg->atom.atom, "%lld", val) < 0) { - free(arg); - goto out_free; - } - *next = arg; - next = &arg->next; - /* - * The '*' case means that an arg is used as the length. - * We need to continue to figure out for what. - */ - if (*ptr == '*') - goto process_again; - - break; - case 's': - process_string: - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_BSTRING; - arg->string.string = strdup(bptr); - if (!arg->string.string) - goto out_free; - bptr += strlen(bptr) + 1; - *next = arg; - next = &arg->next; - default: - break; - } - } - } - - return args; - -out_free: - free_args(args); - return NULL; -} - -static char * -get_bprint_format(void *data, int size __maybe_unused, - struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - unsigned long long addr; - struct tep_format_field *field; - struct printk_map *printk; - char *format; - - field = tep->bprint_fmt_field; - - if (!field) { - field = tep_find_field(event, "fmt"); - if (!field) { - do_warning_event(event, "can't find format field for binary printk"); - return NULL; - } - tep->bprint_fmt_field = field; - } - - addr = tep_read_number(tep, data + field->offset, field->size); - - printk = find_printk(tep, addr); - if (!printk) { - if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0) - return NULL; - return format; - } - - if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0) - return NULL; - - return format; -} - -static int print_mac_arg(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; - bool reverse = false; - unsigned char *buf; - int ret = 0; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return 0; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", - arg->type); - return 0; - } - - if (format[0] == 'm') { - fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; - } else if (format[0] == 'M' && format[1] == 'F') { - fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x"; - ret++; - } - if (format[1] == 'R') { - reverse = true; - ret++; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - if (arg->field.field->size != 6) { - trace_seq_printf(s, "INVALIDMAC"); - return ret; - } - - buf = data + arg->field.field->offset; - if (reverse) - trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - - return ret; -} - -static int parse_ip4_print_args(struct tep_handle *tep, - const char *ptr, bool *reverse) -{ - int ret = 0; - - *reverse = false; - - /* hnbl */ - switch (*ptr) { - case 'h': - if (tep->file_bigendian) - *reverse = false; - else - *reverse = true; - ret++; - break; - case 'l': - *reverse = true; - ret++; - break; - case 'n': - case 'b': - ret++; - /* fall through */ - default: - *reverse = false; - break; - } - - return ret; -} - -static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf) -{ - const char *fmt; - - if (i == 'i') - fmt = "%03d.%03d.%03d.%03d"; - else - fmt = "%d.%d.%d.%d"; - - if (reverse) - trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); - -} - -static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) -{ - return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | - (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; -} - -static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) -{ - return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); -} - -static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) -{ - int i, j, range; - unsigned char zerolength[8]; - int longest = 1; - int colonpos = -1; - uint16_t word; - uint8_t hi, lo; - bool needcolon = false; - bool useIPv4; - struct in6_addr in6; - - memcpy(&in6, addr, sizeof(struct in6_addr)); - - useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); - - memset(zerolength, 0, sizeof(zerolength)); - - if (useIPv4) - range = 6; - else - range = 8; - - /* find position of longest 0 run */ - for (i = 0; i < range; i++) { - for (j = i; j < range; j++) { - if (in6.s6_addr16[j] != 0) - break; - zerolength[i]++; - } - } - for (i = 0; i < range; i++) { - if (zerolength[i] > longest) { - longest = zerolength[i]; - colonpos = i; - } - } - if (longest == 1) /* don't compress a single 0 */ - colonpos = -1; - - /* emit address */ - for (i = 0; i < range; i++) { - if (i == colonpos) { - if (needcolon || i == 0) - trace_seq_printf(s, ":"); - trace_seq_printf(s, ":"); - needcolon = false; - i += longest - 1; - continue; - } - if (needcolon) { - trace_seq_printf(s, ":"); - needcolon = false; - } - /* hex u16 without leading 0s */ - word = ntohs(in6.s6_addr16[i]); - hi = word >> 8; - lo = word & 0xff; - if (hi) - trace_seq_printf(s, "%x%02x", hi, lo); - else - trace_seq_printf(s, "%x", lo); - - needcolon = true; - } - - if (useIPv4) { - if (needcolon) - trace_seq_printf(s, ":"); - print_ip4_addr(s, 'I', false, &in6.s6_addr[12]); - } - - return; -} - -static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) -{ - int j; - - for (j = 0; j < 16; j += 2) { - trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); - if (i == 'I' && j < 14) - trace_seq_printf(s, ":"); - } -} - -/* - * %pi4 print an IPv4 address with leading zeros - * %pI4 print an IPv4 address without leading zeros - * %pi6 print an IPv6 address without colons - * %pI6 print an IPv6 address with colons - * %pI6c print an IPv6 address in compressed form with colons - * %pISpc print an IP address based on sockaddr; p adds port. - */ -static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - bool reverse = false; - unsigned char *buf; - int ret; - - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 4) { - trace_seq_printf(s, "INVALIDIPv4"); - return ret; - } - - print_ip4_addr(s, i, reverse, buf); - return ret; - -} - -static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0; - unsigned char *buf; - int rc = 0; - - /* pI6c */ - if (i == 'I' && *ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - return rc; -} - -static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0, have_p = 0; - unsigned char *buf; - struct sockaddr_storage *sa; - bool reverse = false; - int rc = 0; - int ret; - - /* pISpc */ - if (i == 'I') { - if (*ptr == 'p') { - have_p = 1; - ptr++; - rc++; - } - if (*ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - } - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - ptr += ret; - rc += ret; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - sa = (struct sockaddr_storage *) (data + arg->field.field->offset); - - if (sa->ss_family == AF_INET) { - struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in)) { - trace_seq_printf(s, "INVALIDIPv4"); - return rc; - } - - print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr); - if (have_p) - trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); - - - } else if (sa->ss_family == AF_INET6) { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in6)) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_p) - trace_seq_printf(s, "["); - - buf = (unsigned char *) &sa6->sin6_addr; - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - if (have_p) - trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); - } - - return rc; -} - -static int print_ip_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char i = *ptr; /* 'i' or 'I' */ - int rc = 1; - - /* IP version */ - ptr++; - - switch (*ptr) { - case '4': - rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg); - break; - case '6': - rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg); - break; - case 'S': - rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg); - break; - default: - return 0; - } - - return rc; -} - -static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; -static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - -static int print_uuid_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const int *index = uuid_index; - char *format = "%02x"; - int ret = 0; - char *buf; - int i; - - switch (*(ptr + 1)) { - case 'L': - format = "%02X"; - /* fall through */ - case 'l': - index = guid_index; - ret++; - break; - case 'B': - format = "%02X"; - /* fall through */ - case 'b': - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDUUID"); - return ret; - } - - buf = data + arg->field.field->offset; - - for (i = 0; i < 16; i++) { - trace_seq_printf(s, format, buf[index[i]] & 0xff); - switch (i) { - case 3: - case 5: - case 7: - case 9: - trace_seq_printf(s, "-"); - break; - } - } - - return ret; -} - -static int print_raw_buff_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg, int print_len) -{ - int plen = print_len; - char *delim = " "; - int ret = 0; - char *buf; - int i; - unsigned long offset; - int arr_len; - - switch (*(ptr + 1)) { - case 'C': - delim = ":"; - ret++; - break; - case 'D': - delim = "-"; - ret++; - break; - case 'N': - delim = ""; - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - offset = tep_read_number(event->tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - arr_len = (unsigned long long)(offset >> 16); - buf = data + (offset & 0xffff); - - if (arr_len < plen) - plen = arr_len; - - if (plen < 1) - return ret; - - trace_seq_printf(s, "%02x", buf[0] & 0xff); - for (i = 1; i < plen; i++) - trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff); - - return ret; -} - -static int is_printable_array(char *p, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len && p[i]; i++) - if (!isprint(p[i]) && !isspace(p[i])) - return 0; - return 1; -} - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field) -{ - unsigned long long val; - unsigned int offset, len, i; - struct tep_handle *tep = field->event->tep; - - if (field->flags & TEP_FIELD_IS_ARRAY) { - offset = field->offset; - len = field->size; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - val = tep_read_number(tep, data + offset, len); - offset = val; - len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } - if (field->flags & TEP_FIELD_IS_STRING && - is_printable_array(data + offset, len)) { - trace_seq_printf(s, "%s", (char *)data + offset); - } else { - trace_seq_puts(s, "ARRAY["); - for (i = 0; i < len; i++) { - if (i) - trace_seq_puts(s, ", "); - trace_seq_printf(s, "%02x", - *((unsigned char *)data + offset + i)); - } - trace_seq_putc(s, ']'); - field->flags &= ~TEP_FIELD_IS_STRING; - } - } else { - val = tep_read_number(tep, data + field->offset, - field->size); - if (field->flags & TEP_FIELD_IS_POINTER) { - trace_seq_printf(s, "0x%llx", val); - } else if (field->flags & TEP_FIELD_IS_SIGNED) { - switch (field->size) { - case 4: - /* - * If field is long then print it in hex. - * A long usually stores pointers. - */ - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%x", (int)val); - else - trace_seq_printf(s, "%d", (int)val); - break; - case 2: - trace_seq_printf(s, "%2d", (short)val); - break; - case 1: - trace_seq_printf(s, "%1d", (char)val); - break; - default: - trace_seq_printf(s, "%lld", val); - } - } else { - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%llx", val); - else - trace_seq_printf(s, "%llu", val); - } - } -} - -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event) -{ - struct tep_format_field *field; - - field = event->format.fields; - while (field) { - trace_seq_printf(s, " %s=", field->name); - tep_print_field(s, data, field); - field = field->next; - } -} - -static int print_function(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - struct func_map *func; - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - func = find_func(event->tep, val); - if (func) { - trace_seq_puts(s, func->func); - if (*format == 'F' || *format == 'S') - trace_seq_printf(s, "+0x%llx", val - func->addr); - } else { - if (event->tep->long_size == 4) - trace_seq_printf(s, "0x%lx", (long)val); - else - trace_seq_printf(s, "0x%llx", (long long)val); - } - - return 0; -} - -static int print_arg_pointer(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - int ret = 1; - - if (arg->type == TEP_PRINT_BSTRING) { - trace_seq_puts(s, arg->string.string); - return 0; - } - while (*format) { - if (*format == 'p') { - format++; - break; - } - format++; - } - - switch (*format) { - case 'F': - case 'f': - case 'S': - case 's': - ret += print_function(s, format, data, size, event, arg); - break; - case 'M': - case 'm': - ret += print_mac_arg(s, format, data, size, event, arg); - break; - case 'I': - case 'i': - ret += print_ip_arg(s, format, data, size, event, arg); - break; - case 'U': - ret += print_uuid_arg(s, format, data, size, event, arg); - break; - case 'h': - ret += print_raw_buff_arg(s, format, data, size, event, arg, plen); - break; - default: - ret = 0; - val = eval_num_arg(data, size, event, arg); - trace_seq_printf(s, "%p", (void *)(intptr_t)val); - break; - } - - return ret; - -} - -static int print_arg_number(struct trace_seq *s, const char *format, int plen, - void *data, int size, int ls, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - - switch (ls) { - case -2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (char)val); - else - trace_seq_printf(s, format, (char)val); - break; - case -1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (short)val); - else - trace_seq_printf(s, format, (short)val); - break; - case 0: - if (plen >= 0) - trace_seq_printf(s, format, plen, (int)val); - else - trace_seq_printf(s, format, (int)val); - break; - case 1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long)val); - else - trace_seq_printf(s, format, (long)val); - break; - case 2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long long)val); - else - trace_seq_printf(s, format, (long long)val); - break; - default: - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - return 0; -} - - -static void print_arg_string(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct trace_seq p; - - /* Use helper trace_seq */ - trace_seq_init(&p); - print_str_arg(&p, data, size, event, - format, plen, arg); - trace_seq_terminate(&p); - trace_seq_puts(s, p.buffer); - trace_seq_destroy(&p); -} - -static int parse_arg_format_pointer(const char *format) -{ - int ret = 0; - int index; - int loop; - - switch (*format) { - case 'F': - case 'S': - case 'f': - case 's': - ret++; - break; - case 'M': - case 'm': - /* [mM]R , [mM]F */ - switch (format[1]) { - case 'R': - case 'F': - ret++; - break; - } - ret++; - break; - case 'I': - case 'i': - index = 2; - loop = 1; - switch (format[1]) { - case 'S': - /*[S][pfs]*/ - while (loop) { - switch (format[index]) { - case 'p': - case 'f': - case 's': - ret++; - index++; - break; - default: - loop = 0; - break; - } - } - /* fall through */ - case '4': - /* [4S][hnbl] */ - switch (format[index]) { - case 'h': - case 'n': - case 'l': - case 'b': - ret++; - index++; - break; - } - if (format[1] == '4') { - ret++; - break; - } - /* fall through */ - case '6': - /* [6S]c */ - if (format[index] == 'c') - ret++; - ret++; - break; - } - ret++; - break; - case 'U': - switch (format[1]) { - case 'L': - case 'l': - case 'B': - case 'b': - ret++; - break; - } - ret++; - break; - case 'h': - switch (format[1]) { - case 'C': - case 'D': - case 'N': - ret++; - break; - } - ret++; - break; - default: - break; - } - - return ret; -} - -static void free_parse_args(struct tep_print_parse *arg) -{ - struct tep_print_parse *del; - - while (arg) { - del = arg; - arg = del->next; - free(del->format); - free(del); - } -} - -static int parse_arg_add(struct tep_print_parse **parse, char *format, - enum tep_print_parse_type type, - struct tep_print_arg *arg, - struct tep_print_arg *len_as_arg, - int ls) -{ - struct tep_print_parse *parg = NULL; - - parg = calloc(1, sizeof(*parg)); - if (!parg) - goto error; - parg->format = strdup(format); - if (!parg->format) - goto error; - parg->type = type; - parg->arg = arg; - parg->len_as_arg = len_as_arg; - parg->ls = ls; - *parse = parg; - return 0; -error: - if (parg) { - free(parg->format); - free(parg); - } - return -1; -} - -static int parse_arg_format(struct tep_print_parse **parse, - struct tep_event *event, - const char *format, struct tep_print_arg **arg) -{ - struct tep_print_arg *len_arg = NULL; - char print_format[32]; - const char *start = format; - int ret = 0; - int ls = 0; - int res; - int len; - - format++; - ret++; - for (; *format; format++) { - switch (*format) { - case '#': - /* FIXME: need to handle properly */ - break; - case 'h': - ls--; - break; - case 'l': - ls++; - break; - case 'L': - ls = 2; - break; - case '.': - case 'z': - case 'Z': - case '0' ... '9': - case '-': - break; - case '*': - /* The argument is the length. */ - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - if (len_arg) { - do_warning_event(event, "argument already matched"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - len_arg = *arg; - *arg = (*arg)->next; - break; - case 'p': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - res = parse_arg_format_pointer(format + 1); - if (res > 0) { - format += res; - ret += res; - } - len = ((unsigned long)format + 1) - - (unsigned long)start; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_POINTER, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 30) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - if (event->tep->long_size == 8 && ls == 1 && - sizeof(long) != 8) { - char *p; - - /* make %l into %ll */ - if (ls == 1 && (p = strchr(print_format, 'l'))) - memmove(p+1, p, strlen(p)+1); - ls = 2; - } - if (ls < -2 || ls > 2) { - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 's': - if (!*arg) { - do_warning_event(event, "no matching argument"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_STRING, *arg, len_arg, 0); - *arg = (*arg)->next; - ret++; - return ret; - default: - snprintf(print_format, 32, ">%c<", *format); - parse_arg_add(parse, print_format, - PRINT_FMT_STRING, NULL, NULL, 0); - ret++; - return ret; - } - ret++; - } - -out_failed: - return ret; - -} - -static int parse_arg_string(struct tep_print_parse **parse, const char *format) -{ - struct trace_seq s; - int ret = 0; - - trace_seq_init(&s); - for (; *format; format++) { - if (*format == '\\') { - format++; - ret++; - switch (*format) { - case 'n': - trace_seq_putc(&s, '\n'); - break; - case 't': - trace_seq_putc(&s, '\t'); - break; - case 'r': - trace_seq_putc(&s, '\r'); - break; - case '\\': - trace_seq_putc(&s, '\\'); - break; - default: - trace_seq_putc(&s, *format); - break; - } - } else if (*format == '%') { - if (*(format + 1) == '%') { - trace_seq_putc(&s, '%'); - format++; - ret++; - } else - break; - } else - trace_seq_putc(&s, *format); - - ret++; - } - trace_seq_terminate(&s); - parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0); - trace_seq_destroy(&s); - - return ret; -} - -static struct tep_print_parse * -parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg) -{ - struct tep_print_parse *parse_ret = NULL; - struct tep_print_parse **parse = NULL; - int ret; - int len; - - len = strlen(format); - while (*format) { - if (!parse_ret) - parse = &parse_ret; - if (*format == '%' && *(format + 1) != '%') - ret = parse_arg_format(parse, event, format, &arg); - else - ret = parse_arg_string(parse, format); - if (*parse) - parse = &((*parse)->next); - - len -= ret; - if (len > 0) - format += ret; - else - break; - } - return parse_ret; -} - -static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s, - void *data, int size, struct tep_event *event) -{ - int len_arg; - - while (parse) { - if (parse->len_as_arg) - len_arg = eval_num_arg(data, size, event, parse->len_as_arg); - switch (parse->type) { - case PRINT_FMT_ARG_DIGIT: - print_arg_number(s, parse->format, - parse->len_as_arg ? len_arg : -1, data, - size, parse->ls, event, parse->arg); - break; - case PRINT_FMT_ARG_POINTER: - print_arg_pointer(s, parse->format, - parse->len_as_arg ? len_arg : 1, - data, size, event, parse->arg); - break; - case PRINT_FMT_ARG_STRING: - print_arg_string(s, parse->format, - parse->len_as_arg ? len_arg : -1, - data, size, event, parse->arg); - break; - case PRINT_FMT_STRING: - default: - trace_seq_printf(s, "%s", parse->format); - break; - } - parse = parse->next; - } -} - -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) -{ - struct tep_print_parse *parse = event->print_fmt.print_cache; - struct tep_print_arg *args = NULL; - char *bprint_fmt = NULL; - - if (event->flags & TEP_EVENT_FL_FAILED) { - trace_seq_printf(s, "[FAILED TO PARSE]"); - tep_print_fields(s, data, size, event); - return; - } - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - parse = parse_args(event, bprint_fmt, args); - } - - print_event_cache(parse, s, data, size, event); - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - free_parse_args(parse); - free_args(args); - free(bprint_fmt); - } -} - -/* - * This parses out the Latency format (interrupts disabled, - * need rescheduling, in hard/soft interrupt, preempt count - * and lock depth) and places it into the trace_seq. - */ -static void data_latency_format(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_record *record) -{ - static int check_lock_depth = 1; - static int check_migrate_disable = 1; - static int lock_depth_exists; - static int migrate_disable_exists; - unsigned int lat_flags; - struct trace_seq sq; - unsigned int pc; - int lock_depth = 0; - int migrate_disable = 0; - int hardirq; - int softirq; - void *data = record->data; - - trace_seq_init(&sq); - lat_flags = parse_common_flags(tep, data); - pc = parse_common_pc(tep, data); - /* lock_depth may not always exist */ - if (lock_depth_exists) - lock_depth = parse_common_lock_depth(tep, data); - else if (check_lock_depth) { - lock_depth = parse_common_lock_depth(tep, data); - if (lock_depth < 0) - check_lock_depth = 0; - else - lock_depth_exists = 1; - } - - /* migrate_disable may not always exist */ - if (migrate_disable_exists) - migrate_disable = parse_common_migrate_disable(tep, data); - else if (check_migrate_disable) { - migrate_disable = parse_common_migrate_disable(tep, data); - if (migrate_disable < 0) - check_migrate_disable = 0; - else - migrate_disable_exists = 1; - } - - hardirq = lat_flags & TRACE_FLAG_HARDIRQ; - softirq = lat_flags & TRACE_FLAG_SOFTIRQ; - - trace_seq_printf(&sq, "%c%c%c", - (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (lat_flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.'); - - if (pc) - trace_seq_printf(&sq, "%x", pc); - else - trace_seq_printf(&sq, "."); - - if (migrate_disable_exists) { - if (migrate_disable < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", migrate_disable); - } - - if (lock_depth_exists) { - if (lock_depth < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", lock_depth); - } - - if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) { - s->state = TRACE_SEQ__MEM_ALLOC_FAILED; - return; - } - - trace_seq_terminate(&sq); - trace_seq_puts(s, sq.buffer); - trace_seq_destroy(&sq); - trace_seq_terminate(s); -} - -/** - * tep_data_type - parse out the given event type - * @tep: a handle to the trace event parser context - * @rec: the record to read from - * - * This returns the event id from the @rec. - */ -int tep_data_type(struct tep_handle *tep, struct tep_record *rec) -{ - return trace_parse_common_type(tep, rec->data); -} - -/** - * tep_data_pid - parse the PID from record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the PID from a record. - */ -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pid(tep, rec->data); -} - -/** - * tep_data_preempt_count - parse the preempt count from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the preempt count from a record. - */ -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pc(tep, rec->data); -} - -/** - * tep_data_flags - parse the latency flags from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the latency flags from a record. - * - * Use trace_flag_type enum for the flags (see event-parse.h). - */ -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_flags(tep, rec->data); -} - -/** - * tep_data_comm_from_pid - return the command line from PID - * @tep: a handle to the trace event parser context - * @pid: the PID of the task to search for - * - * This returns a pointer to the command line that has the given - * @pid. - */ -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid) -{ - const char *comm; - - comm = find_cmdline(tep, pid); - return comm; -} - -static struct tep_cmdline * -pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)next; - - if (cmdlist) - cmdlist = cmdlist->next; - else - cmdlist = tep->cmdlist; - - while (cmdlist && strcmp(cmdlist->comm, comm) != 0) - cmdlist = cmdlist->next; - - return (struct tep_cmdline *)cmdlist; -} - -/** - * tep_data_pid_from_comm - return the pid from a given comm - * @tep: a handle to the trace event parser context - * @comm: the cmdline to find the pid from - * @next: the cmdline structure to find the next comm - * - * This returns the cmdline structure that holds a pid for a given - * comm, or NULL if none found. As there may be more than one pid for - * a given comm, the result of this call can be passed back into - * a recurring call in the @next parameter, and then it will find the - * next pid. - * Also, it does a linear search, so it may be slow. - */ -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next) -{ - struct tep_cmdline *cmdline; - - /* - * If the cmdlines have not been converted yet, then use - * the list. - */ - if (!tep->cmdlines) - return pid_from_cmdlist(tep, comm, next); - - if (next) { - /* - * The next pointer could have been still from - * a previous call before cmdlines were created - */ - if (next < tep->cmdlines || - next >= tep->cmdlines + tep->cmdline_count) - next = NULL; - else - cmdline = next++; - } - - if (!next) - cmdline = tep->cmdlines; - - while (cmdline < tep->cmdlines + tep->cmdline_count) { - if (strcmp(cmdline->comm, comm) == 0) - return cmdline; - cmdline++; - } - return NULL; -} - -/** - * tep_cmdline_pid - return the pid associated to a given cmdline - * @tep: a handle to the trace event parser context - * @cmdline: The cmdline structure to get the pid from - * - * Returns the pid for a give cmdline. If @cmdline is NULL, then - * -1 is returned. - */ -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; - - if (!cmdline) - return -1; - - /* - * If cmdlines have not been created yet, or cmdline is - * not part of the array, then treat it as a cmdlist instead. - */ - if (!tep->cmdlines || - cmdline < tep->cmdlines || - cmdline >= tep->cmdlines + tep->cmdline_count) - return cmdlist->pid; - - return cmdline->pid; -} - -/* - * This parses the raw @data using the given @event information and - * writes the print format into the trace_seq. - */ -static void print_event_info(struct trace_seq *s, char *format, bool raw, - struct tep_event *event, struct tep_record *record) -{ - int print_pretty = 1; - - if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW)) - tep_print_fields(s, record->data, record->size, event); - else { - - if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE)) - print_pretty = event->handler(s, record, event, - event->context); - - if (print_pretty) - pretty_print(s, record->data, record->size, event); - } - - trace_seq_terminate(s); -} - -/** - * tep_find_event_by_record - return the event from a given record - * @tep: a handle to the trace event parser context - * @record: The record to get the event from - * - * Returns the associated event for a given record, or NULL if non is - * is found. - */ -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record) -{ - int type; - - if (record->size < 0) { - do_warning("ug! negative record size %d", record->size); - return NULL; - } - - type = trace_parse_common_type(tep, record->data); - - return tep_find_event(tep, type); -} - -/* - * Writes the timestamp of the record into @s. Time divisor and precision can be - * specified as part of printf @format string. Example: - * "%3.1000d" - divide the time by 1000 and print the first 3 digits - * before the dot. Thus, the timestamp "123456000" will be printed as - * "123.456" - */ -static void print_event_time(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_event *event, - struct tep_record *record) -{ - unsigned long long time; - char *divstr; - int prec = 0, pr; - int div = 0; - int p10 = 1; - - if (isdigit(*(format + 1))) - prec = atoi(format + 1); - divstr = strchr(format, '.'); - if (divstr && isdigit(*(divstr + 1))) - div = atoi(divstr + 1); - time = record->ts; - if (div) { - time += div / 2; - time /= div; - } - pr = prec; - while (pr--) - p10 *= 10; - - if (p10 > 1 && p10 < time) - trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); - else - trace_seq_printf(s, "%12llu", time); -} - -struct print_event_type { - enum { - EVENT_TYPE_INT = 1, - EVENT_TYPE_STRING, - EVENT_TYPE_UNKNOWN, - } type; - char format[32]; -}; - -static void print_string(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - const char *arg, struct print_event_type *type) -{ - const char *comm; - int pid; - - if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) { - data_latency_format(tep, s, type->format, record); - } else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) { - pid = parse_common_pid(tep, record->data); - comm = find_cmdline(tep, pid); - trace_seq_printf(s, type->format, comm); - } else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) { - print_event_info(s, type->format, true, event, record); - } else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) { - print_event_info(s, type->format, false, event, record); - } else if (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) { - trace_seq_printf(s, type->format, event->name); - } else { - trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg); - } - -} - -static void print_int(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - int arg, struct print_event_type *type) -{ - int param; - - switch (arg) { - case TEP_PRINT_CPU: - param = record->cpu; - break; - case TEP_PRINT_PID: - param = parse_common_pid(tep, record->data); - break; - case TEP_PRINT_TIME: - return print_event_time(tep, s, type->format, event, record); - default: - return; - } - trace_seq_printf(s, type->format, param); -} - -static int tep_print_event_param_type(char *format, - struct print_event_type *type) -{ - char *str = format + 1; - int i = 1; - - type->type = EVENT_TYPE_UNKNOWN; - while (*str) { - switch (*str) { - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - type->type = EVENT_TYPE_INT; - break; - case 's': - type->type = EVENT_TYPE_STRING; - break; - } - str++; - i++; - if (type->type != EVENT_TYPE_UNKNOWN) - break; - } - memset(type->format, 0, 32); - memcpy(type->format, format, i < 32 ? i : 31); - return i; -} - -/** - * tep_print_event - Write various event information - * @tep: a handle to the trace event parser context - * @s: the trace_seq to write to - * @record: The record to get the event from - * @format: a printf format string. Supported event fileds: - * TEP_PRINT_PID, "%d" - event PID - * TEP_PRINT_CPU, "%d" - event CPU - * TEP_PRINT_COMM, "%s" - event command string - * TEP_PRINT_NAME, "%s" - event name - * TEP_PRINT_LATENCY, "%s" - event latency - * TEP_PRINT_TIME, %d - event time stamp. A divisor and precision - * can be specified as part of this format string: - * "%precision.divisord". Example: - * "%3.1000d" - divide the time by 1000 and print the first - * 3 digits before the dot. Thus, the time stamp - * "123456000" will be printed as "123.456" - * TEP_PRINT_INFO, "%s" - event information. If any width is specified in - * the format string, the event information will be printed - * in raw format. - * Writes the specified event information into @s. - */ -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) -{ - struct print_event_type type; - char *format = strdup(fmt); - char *current = format; - char *str = format; - int offset; - va_list args; - struct tep_event *event; - - if (!format) - return; - - event = tep_find_event_by_record(tep, record); - va_start(args, fmt); - while (*current) { - current = strchr(str, '%'); - if (!current) { - trace_seq_puts(s, str); - break; - } - memset(&type, 0, sizeof(type)); - offset = tep_print_event_param_type(current, &type); - *current = '\0'; - trace_seq_puts(s, str); - current += offset; - switch (type.type) { - case EVENT_TYPE_STRING: - print_string(tep, s, record, event, - va_arg(args, char*), &type); - break; - case EVENT_TYPE_INT: - print_int(tep, s, record, event, - va_arg(args, int), &type); - break; - case EVENT_TYPE_UNKNOWN: - default: - trace_seq_printf(s, "[UNKNOWN TYPE]"); - break; - } - str = current; - - } - va_end(args); - free(format); -} - -static int events_id_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - - if ((*ea)->id < (*eb)->id) - return -1; - - if ((*ea)->id > (*eb)->id) - return 1; - - return 0; -} - -static int events_name_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static int events_system_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static struct tep_event **list_events_copy(struct tep_handle *tep) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = malloc(sizeof(*events) * (tep->nr_events + 1)); - if (!events) - return NULL; - - memcpy(events, tep->events, sizeof(*events) * tep->nr_events); - events[tep->nr_events] = NULL; - return events; -} - -static void list_events_sort(struct tep_event **events, int nr_events, - enum tep_event_sort_type sort_type) -{ - int (*sort)(const void *a, const void *b); - - switch (sort_type) { - case TEP_EVENT_SORT_ID: - sort = events_id_cmp; - break; - case TEP_EVENT_SORT_NAME: - sort = events_name_cmp; - break; - case TEP_EVENT_SORT_SYSTEM: - sort = events_system_cmp; - break; - default: - sort = NULL; - } - - if (sort) - qsort(events, nr_events, sizeof(*events), sort); -} - -/** - * tep_list_events - Get events, sorted by given criteria. - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * memory must not be freed, it is managed by the library. - * The function is not thread safe. - */ -struct tep_event **tep_list_events(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = tep->sort_events; - if (events && tep->last_type == sort_type) - return events; - - if (!events) { - events = list_events_copy(tep); - if (!events) - return NULL; - - tep->sort_events = events; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) { - tep->last_type = sort_type; - return events; - } - } - - list_events_sort(events, tep->nr_events, sort_type); - tep->last_type = sort_type; - - return events; -} - - -/** - * tep_list_events_copy - Thread safe version of tep_list_events() - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * array is newly allocated inside the function and must be freed by the caller - */ -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = list_events_copy(tep); - if (!events) - return NULL; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) - return events; - - list_events_sort(events, tep->nr_events, sort_type); - - return events; -} - -static struct tep_format_field ** -get_event_fields(const char *type, const char *name, - int count, struct tep_format_field *list) -{ - struct tep_format_field **fields; - struct tep_format_field *field; - int i = 0; - - fields = malloc(sizeof(*fields) * (count + 1)); - if (!fields) - return NULL; - - for (field = list; field; field = field->next) { - fields[i++] = field; - if (i == count + 1) { - do_warning("event %s has more %s fields than specified", - name, type); - i--; - break; - } - } - - if (i != count) - do_warning("event %s has less %s fields than specified", - name, type); - - fields[i] = NULL; - - return fields; -} - -/** - * tep_event_common_fields - return a list of common fields for an event - * @event: the event to return the common fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_common_fields(struct tep_event *event) -{ - return get_event_fields("common", event->name, - event->format.nr_common, - event->format.common_fields); -} - -/** - * tep_event_fields - return a list of event specific fields for an event - * @event: the event to return the fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_fields(struct tep_event *event) -{ - return get_event_fields("event", event->name, - event->format.nr_fields, - event->format.fields); -} - -static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field) -{ - trace_seq_printf(s, "{ %s, %s }", field->value, field->str); - if (field->next) { - trace_seq_puts(s, ", "); - print_fields(s, field->next); - } -} - -/* for debugging */ -static void print_args(struct tep_print_arg *args) -{ - int print_paren = 1; - struct trace_seq s; - - switch (args->type) { - case TEP_PRINT_NULL: - printf("null"); - break; - case TEP_PRINT_ATOM: - printf("%s", args->atom.atom); - break; - case TEP_PRINT_FIELD: - printf("REC->%s", args->field.name); - break; - case TEP_PRINT_FLAGS: - printf("__print_flags("); - print_args(args->flags.field); - printf(", %s, ", args->flags.delim); - trace_seq_init(&s); - print_fields(&s, args->flags.flags); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_SYMBOL: - printf("__print_symbolic("); - print_args(args->symbol.field); - printf(", "); - trace_seq_init(&s); - print_fields(&s, args->symbol.symbols); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_HEX: - printf("__print_hex("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_HEX_STR: - printf("__print_hex_str("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_INT_ARRAY: - printf("__print_array("); - print_args(args->int_array.field); - printf(", "); - print_args(args->int_array.count); - printf(", "); - print_args(args->int_array.el_size); - printf(")"); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - printf("__get_str(%s)", args->string.string); - break; - case TEP_PRINT_BITMASK: - printf("__get_bitmask(%s)", args->bitmask.bitmask); - break; - case TEP_PRINT_TYPE: - printf("(%s)", args->typecast.type); - print_args(args->typecast.item); - break; - case TEP_PRINT_OP: - if (strcmp(args->op.op, ":") == 0) - print_paren = 0; - if (print_paren) - printf("("); - print_args(args->op.left); - printf(" %s ", args->op.op); - print_args(args->op.right); - if (print_paren) - printf(")"); - break; - default: - /* we should warn... */ - return; - } - if (args->next) { - printf("\n"); - print_args(args->next); - } -} - -static void parse_header_field(const char *field, - int *offset, int *size, int mandatory) -{ - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - char *token; - int type; - - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - if (read_expected(TEP_EVENT_ITEM, "field") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - /* type */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - free_token(token); - - /* - * If this is not a mandatory field, then test it first. - */ - if (mandatory) { - if (read_expected(TEP_EVENT_ITEM, field) < 0) - return; - } else { - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - if (strcmp(token, field) != 0) - goto discard; - free_token(token); - } - - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *offset = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *size = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (type != TEP_EVENT_ITEM) - goto fail; - - if (strcmp(token, "signed") != 0) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - fail: - free_token(token); - return; - - discard: - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - *offset = 0; - *size = 0; - free_token(token); -} - -/** - * tep_parse_header_page - parse the data stored in the header page - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the header page format string - * @size: the size of @buf - * @long_size: the long size to use if there is no header - * - * This parses the header page format for information on the - * ring buffer used. The @buf should be copied from - * - * /sys/kernel/debug/tracing/events/header_page - */ -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size) -{ - int ignore; - - if (!size) { - /* - * Old kernels did not have header page info. - * Sorry but we just use what we find here in user space. - */ - tep->header_page_ts_size = sizeof(long long); - tep->header_page_size_size = long_size; - tep->header_page_data_offset = sizeof(long long) + long_size; - tep->old_format = 1; - return -1; - } - init_input_buf(buf, size); - - parse_header_field("timestamp", &tep->header_page_ts_offset, - &tep->header_page_ts_size, 1); - parse_header_field("commit", &tep->header_page_size_offset, - &tep->header_page_size_size, 1); - parse_header_field("overwrite", &tep->header_page_overwrite, - &ignore, 0); - parse_header_field("data", &tep->header_page_data_offset, - &tep->header_page_data_size, 1); - - return 0; -} - -static int event_matches(struct tep_event *event, - int id, const char *sys_name, - const char *event_name) -{ - if (id >= 0 && id != event->id) - return 0; - - if (event_name && (strcmp(event_name, event->name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return 0; - - return 1; -} - -static void free_handler(struct event_handler *handle) -{ - free((void *)handle->sys_name); - free((void *)handle->event_name); - free(handle); -} - -static int find_event_handle(struct tep_handle *tep, struct tep_event *event) -{ - struct event_handler *handle, **next; - - for (next = &tep->handlers; *next; - next = &(*next)->next) { - handle = *next; - if (event_matches(event, handle->id, - handle->sys_name, - handle->event_name)) - break; - } - - if (!(*next)) - return 0; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = handle->func; - event->context = handle->context; - - *next = handle->next; - free_handler(handle); - - return 1; -} - -/** - * parse_format - parse the event format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -static enum tep_errno parse_format(struct tep_event **eventp, - struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event; - int ret; - - init_input_buf(buf, size); - - *eventp = event = alloc_event(); - if (!event) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - event->name = event_read_name(); - if (!event->name) { - /* Bad event? */ - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - if (strcmp(sys, "ftrace") == 0) { - event->flags |= TEP_EVENT_FL_ISFTRACE; - - if (strcmp(event->name, "bprint") == 0) - event->flags |= TEP_EVENT_FL_ISBPRINT; - } - - event->id = event_read_id(); - if (event->id < 0) { - ret = TEP_ERRNO__READ_ID_FAILED; - /* - * This isn't an allocation error actually. - * But as the ID is critical, just bail out. - */ - goto event_alloc_failed; - } - - event->system = strdup(sys); - if (!event->system) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - /* Add tep to event so that it can be referenced */ - event->tep = tep; - - ret = event_read_format(event); - if (ret < 0) { - ret = TEP_ERRNO__READ_FORMAT_FAILED; - goto event_parse_failed; - } - - /* - * If the event has an override, don't print warnings if the event - * print format fails to parse. - */ - if (tep && find_event_handle(tep, event)) - show_warning = 0; - - ret = event_read_print(event); - show_warning = 1; - - if (ret < 0) { - ret = TEP_ERRNO__READ_PRINT_FAILED; - goto event_parse_failed; - } - - if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) { - struct tep_format_field *field; - struct tep_print_arg *arg, **list; - - /* old ftrace had no args */ - list = &event->print_fmt.args; - for (field = event->format.fields; field; field = field->next) { - arg = alloc_arg(); - if (!arg) { - event->flags |= TEP_EVENT_FL_FAILED; - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->type = TEP_PRINT_FIELD; - arg->field.name = strdup(field->name); - if (!arg->field.name) { - event->flags |= TEP_EVENT_FL_FAILED; - free_arg(arg); - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->field.field = field; - *list = arg; - list = &arg->next; - } - } - - if (!(event->flags & TEP_EVENT_FL_ISBPRINT)) - event->print_fmt.print_cache = parse_args(event, - event->print_fmt.format, - event->print_fmt.args); - - return 0; - - event_parse_failed: - event->flags |= TEP_EVENT_FL_FAILED; - return ret; - - event_alloc_failed: - free(event->system); - free(event->name); - free(event); - *eventp = NULL; - return ret; -} - -static enum tep_errno -__parse_event(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, unsigned long size, - const char *sys) -{ - int ret = parse_format(eventp, tep, buf, size, sys); - struct tep_event *event = *eventp; - - if (event == NULL) - return ret; - - if (tep && add_event(tep, event)) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_add_failed; - } - -#define PRINT_ARGS 0 - if (PRINT_ARGS && event->print_fmt.args) - print_args(event->print_fmt.args); - - return 0; - -event_add_failed: - free_tep_event(event); - return ret; -} - -/** - * tep_parse_format - parse the event format - * @tep: a handle to the trace event parser context - * @eventp: returned format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys) -{ - return __parse_event(tep, eventp, buf, size, sys); -} - -/** - * tep_parse_event - parse the event format - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event = NULL; - return __parse_event(tep, &event, buf, size, sys); -} - -int get_field_val(struct trace_seq *s, struct tep_format_field *field, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - if (!field) { - if (err) - trace_seq_printf(s, "", name); - return -1; - } - - if (tep_read_number_field(field, record->data, val)) { - if (err) - trace_seq_printf(s, " %s=INVALID", name); - return -1; - } - - return 0; -} - -/** - * tep_get_field_raw - return the raw pointer into the data field - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @len: place to store the field length. - * @err: print default error if failed. - * - * Returns a pointer into record->data of the field and places - * the length of the field in @len. - * - * On failure, it returns NULL. - */ -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err) -{ - struct tep_format_field *field; - void *data = record->data; - unsigned offset; - int dummy; - - if (!event) - return NULL; - - field = tep_find_field(event, name); - - if (!field) { - if (err) - trace_seq_printf(s, "", name); - return NULL; - } - - /* Allow @len to be NULL */ - if (!len) - len = &dummy; - - offset = field->offset; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - offset = tep_read_number(event->tep, - data + offset, field->size); - *len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } else - *len = field->size; - - return data + offset; -} - -/** - * tep_get_field_val - find a field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_common_field_val - find a common field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_common_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_any_field_val - find a any field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_any_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_print_num_field - print a field and a format - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - unsigned long long val; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - return trace_seq_printf(s, fmt, val); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -/** - * tep_print_func_field - print a field and a format for function pointers - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - struct tep_handle *tep = event->tep; - unsigned long long val; - struct func_map *func; - char tmp[128]; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - func = find_func(tep, val); - - if (func) - snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); - else - sprintf(tmp, "0x%08llx", val); - - return trace_seq_printf(s, fmt, tmp); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -static void free_func_handle(struct tep_function_handler *func) -{ - struct func_params *params; - - free(func->name); - - while (func->params) { - params = func->params; - func->params = params->next; - free(params); - } - - free(func); -} - -/** - * tep_register_print_function - register a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @ret_type: the return type of the helper function - * @name: the name of the helper function - * @parameters: A list of enum tep_func_arg_type - * - * Some events may have helper functions in the print format arguments. - * This allows a plugin to dynamically create a way to process one - * of these functions. - * - * The @parameters is a variable list of tep_func_arg_type enums that - * must end with TEP_FUNC_ARG_VOID. - */ -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...) -{ - struct tep_function_handler *func_handle; - struct func_params **next_param; - struct func_params *param; - enum tep_func_arg_type type; - va_list ap; - int ret; - - func_handle = find_func_handler(tep, name); - if (func_handle) { - /* - * This is most like caused by the users own - * plugins updating the function. This overrides the - * system defaults. - */ - pr_stat("override of function helper '%s'", name); - remove_func_handler(tep, name); - } - - func_handle = calloc(1, sizeof(*func_handle)); - if (!func_handle) { - do_warning("Failed to allocate function handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - func_handle->ret_type = ret_type; - func_handle->name = strdup(name); - func_handle->func = func; - if (!func_handle->name) { - do_warning("Failed to allocate function name"); - free(func_handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - next_param = &(func_handle->params); - va_start(ap, name); - for (;;) { - type = va_arg(ap, enum tep_func_arg_type); - if (type == TEP_FUNC_ARG_VOID) - break; - - if (type >= TEP_FUNC_ARG_MAX_TYPES) { - do_warning("Invalid argument type %d", type); - ret = TEP_ERRNO__INVALID_ARG_TYPE; - goto out_free; - } - - param = malloc(sizeof(*param)); - if (!param) { - do_warning("Failed to allocate function param"); - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto out_free; - } - param->type = type; - param->next = NULL; - - *next_param = param; - next_param = &(param->next); - - func_handle->nr_args++; - } - va_end(ap); - - func_handle->next = tep->func_handlers; - tep->func_handlers = func_handle; - - return 0; - out_free: - va_end(ap); - free_func_handle(func_handle); - return ret; -} - -/** - * tep_unregister_print_function - unregister a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @name: the name of the helper function - * - * This function removes existing print handler for function @name. - * - * Returns 0 if the handler was removed successully, -1 otherwise. - */ -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name) -{ - struct tep_function_handler *func_handle; - - func_handle = find_func_handler(tep, name); - if (func_handle && func_handle->func == func) { - remove_func_handler(tep, name); - return 0; - } - return -1; -} - -static struct tep_event *search_event(struct tep_handle *tep, int id, - const char *sys_name, - const char *event_name) -{ - struct tep_event *event; - - if (id >= 0) { - /* search by id */ - event = tep_find_event(tep, id); - if (!event) - return NULL; - if (event_name && (strcmp(event_name, event->name) != 0)) - return NULL; - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return NULL; - } else { - event = tep_find_event_by_name(tep, sys_name, event_name); - if (!event) - return NULL; - } - return event; -} - -/** - * tep_register_event_handler - register a way to parse an event - * @tep: a handle to the trace event parser context - * @id: the id of the event to register - * @sys_name: the system name the event belongs to - * @event_name: the name of the event - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function allows a developer to override the parsing of - * a given event. If for some reason the default print format - * is not sufficient, this function will register a function - * for an event to be used to parse the data instead. - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns: - * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten - * TEP_REGISTER_SUCCESS if a new handler is registered successfully - * negative TEP_ERRNO_... in case of an error - * - */ -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = func; - event->context = context; - return TEP_REGISTER_SUCCESS_OVERWRITE; - - not_found: - /* Save for later use. */ - handle = calloc(1, sizeof(*handle)); - if (!handle) { - do_warning("Failed to allocate event handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->id = id; - if (event_name) - handle->event_name = strdup(event_name); - if (sys_name) - handle->sys_name = strdup(sys_name); - - if ((event_name && !handle->event_name) || - (sys_name && !handle->sys_name)) { - do_warning("Failed to allocate event/sys name"); - free((void *)handle->event_name); - free((void *)handle->sys_name); - free(handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->func = func; - handle->next = tep->handlers; - tep->handlers = handle; - handle->context = context; - - return TEP_REGISTER_SUCCESS; -} - -static int handle_matches(struct event_handler *handler, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - if (id >= 0 && id != handler->id) - return 0; - - if (event_name && (strcmp(event_name, handler->event_name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, handler->sys_name) != 0)) - return 0; - - if (func != handler->func || context != handler->context) - return 0; - - return 1; -} - -/** - * tep_unregister_event_handler - unregister an existing event handler - * @tep: a handle to the trace event parser context - * @id: the id of the event to unregister - * @sys_name: the system name the handler belongs to - * @event_name: the name of the event handler - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function removes existing event handler (parser). - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns 0 if handler was removed successfully, -1 if event was not found. - */ -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - struct event_handler **next; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - if (event->handler == func && event->context == context) { - pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.", - event->id, event->system, event->name); - - event->handler = NULL; - event->context = NULL; - return 0; - } - -not_found: - for (next = &tep->handlers; *next; next = &(*next)->next) { - handle = *next; - if (handle_matches(handle, id, sys_name, event_name, - func, context)) - break; - } - - if (!(*next)) - return -1; - - *next = handle->next; - free_handler(handle); - - return 0; -} - -/** - * tep_alloc - create a tep handle - */ -struct tep_handle *tep_alloc(void) -{ - struct tep_handle *tep = calloc(1, sizeof(*tep)); - - if (tep) { - tep->ref_count = 1; - tep->host_bigendian = tep_is_bigendian(); - } - - return tep; -} - -void tep_ref(struct tep_handle *tep) -{ - tep->ref_count++; -} - -int tep_get_ref(struct tep_handle *tep) -{ - if (tep) - return tep->ref_count; - return 0; -} - -__hidden void free_tep_format_field(struct tep_format_field *field) -{ - free(field->type); - if (field->alias != field->name) - free(field->alias); - free(field->name); - free(field); -} - -static void free_format_fields(struct tep_format_field *field) -{ - struct tep_format_field *next; - - while (field) { - next = field->next; - free_tep_format_field(field); - field = next; - } -} - -static void free_formats(struct tep_format *format) -{ - free_format_fields(format->common_fields); - free_format_fields(format->fields); -} - -__hidden void free_tep_event(struct tep_event *event) -{ - free(event->name); - free(event->system); - - free_formats(&event->format); - - free(event->print_fmt.format); - free_args(event->print_fmt.args); - free_parse_args(event->print_fmt.print_cache); - free(event); -} - -/** - * tep_free - free a tep handle - * @tep: the tep handle to free - */ -void tep_free(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist, *cmdnext; - struct func_list *funclist, *funcnext; - struct printk_list *printklist, *printknext; - struct tep_function_handler *func_handler; - struct event_handler *handle; - int i; - - if (!tep) - return; - - cmdlist = tep->cmdlist; - funclist = tep->funclist; - printklist = tep->printklist; - - tep->ref_count--; - if (tep->ref_count) - return; - - if (tep->cmdlines) { - for (i = 0; i < tep->cmdline_count; i++) - free(tep->cmdlines[i].comm); - free(tep->cmdlines); - } - - while (cmdlist) { - cmdnext = cmdlist->next; - free(cmdlist->comm); - free(cmdlist); - cmdlist = cmdnext; - } - - if (tep->func_map) { - for (i = 0; i < (int)tep->func_count; i++) { - free(tep->func_map[i].func); - free(tep->func_map[i].mod); - } - free(tep->func_map); - } - - while (funclist) { - funcnext = funclist->next; - free(funclist->func); - free(funclist->mod); - free(funclist); - funclist = funcnext; - } - - while (tep->func_handlers) { - func_handler = tep->func_handlers; - tep->func_handlers = func_handler->next; - free_func_handle(func_handler); - } - - if (tep->printk_map) { - for (i = 0; i < (int)tep->printk_count; i++) - free(tep->printk_map[i].printk); - free(tep->printk_map); - } - - while (printklist) { - printknext = printklist->next; - free(printklist->printk); - free(printklist); - printklist = printknext; - } - - for (i = 0; i < tep->nr_events; i++) - free_tep_event(tep->events[i]); - - while (tep->handlers) { - handle = tep->handlers; - tep->handlers = handle->next; - free_handler(handle); - } - - free(tep->events); - free(tep->sort_events); - free(tep->func_resolver); - free_tep_plugin_paths(tep); - - free(tep); -} - -void tep_unref(struct tep_handle *tep) -{ - tep_free(tep); -} diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h deleted file mode 100644 index 41d4f9f6a843..000000000000 --- a/tools/lib/traceevent/event-parse.h +++ /dev/null @@ -1,750 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H - -#include -#include -#include -#include -#include - -#include "trace-seq.h" - -#ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) -#endif - -#ifndef DEBUG_RECORD -#define DEBUG_RECORD 0 -#endif - -struct tep_record { - unsigned long long ts; - unsigned long long offset; - long long missed_events; /* buffer dropped events before */ - int record_size; /* size of binary record */ - int size; /* size of data */ - void *data; - int cpu; - int ref_count; - int locked; /* Do not free, even if ref_count is zero */ - void *priv; -#if DEBUG_RECORD - struct tep_record *prev; - struct tep_record *next; - long alloc_addr; -#endif -}; - -/* ----------------------- tep ----------------------- */ - -struct tep_handle; -struct tep_event; - -typedef int (*tep_event_handler_func)(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, - void *context); - -typedef int (*tep_plugin_load_func)(struct tep_handle *tep); -typedef int (*tep_plugin_unload_func)(struct tep_handle *tep); - -struct tep_plugin_option { - struct tep_plugin_option *next; - void *handle; - char *file; - char *name; - char *plugin_alias; - char *description; - const char *value; - void *priv; - int set; -}; - -/* - * Plugin hooks that can be called: - * - * TEP_PLUGIN_LOADER: (required) - * The function name to initialized the plugin. - * - * int TEP_PLUGIN_LOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_UNLOADER: (optional) - * The function called just before unloading - * - * int TEP_PLUGIN_UNLOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_OPTIONS: (optional) - * Plugin options that can be set before loading - * - * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = { - * { - * .name = "option-name", - * .plugin_alias = "override-file-name", (optional) - * .description = "description of option to show users", - * }, - * { - * .name = NULL, - * }, - * }; - * - * Array must end with .name = NULL; - * - * - * .plugin_alias is used to give a shorter name to access - * the vairable. Useful if a plugin handles more than one event. - * - * If .value is not set, then it is considered a boolean and only - * .set will be processed. If .value is defined, then it is considered - * a string option and .set will be ignored. - * - * TEP_PLUGIN_ALIAS: (optional) - * The name to use for finding options (uses filename if not defined) - */ -#define TEP_PLUGIN_LOADER tep_plugin_loader -#define TEP_PLUGIN_UNLOADER tep_plugin_unloader -#define TEP_PLUGIN_OPTIONS tep_plugin_options -#define TEP_PLUGIN_ALIAS tep_plugin_alias -#define _MAKE_STR(x) #x -#define MAKE_STR(x) _MAKE_STR(x) -#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER) -#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER) -#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS) -#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS) - -enum tep_format_flags { - TEP_FIELD_IS_ARRAY = 1, - TEP_FIELD_IS_POINTER = 2, - TEP_FIELD_IS_SIGNED = 4, - TEP_FIELD_IS_STRING = 8, - TEP_FIELD_IS_DYNAMIC = 16, - TEP_FIELD_IS_LONG = 32, - TEP_FIELD_IS_FLAG = 64, - TEP_FIELD_IS_SYMBOLIC = 128, - TEP_FIELD_IS_RELATIVE = 256, -}; - -struct tep_format_field { - struct tep_format_field *next; - struct tep_event *event; - char *type; - char *name; - char *alias; - int offset; - int size; - unsigned int arraylen; - unsigned int elementsize; - unsigned long flags; -}; - -struct tep_format { - int nr_common; - int nr_fields; - struct tep_format_field *common_fields; - struct tep_format_field *fields; -}; - -struct tep_print_arg_atom { - char *atom; -}; - -struct tep_print_arg_string { - char *string; - struct tep_format_field *field; -}; - -struct tep_print_arg_bitmask { - char *bitmask; - struct tep_format_field *field; -}; - -struct tep_print_arg_field { - char *name; - struct tep_format_field *field; -}; - -struct tep_print_flag_sym { - struct tep_print_flag_sym *next; - char *value; - char *str; -}; - -struct tep_print_arg_typecast { - char *type; - struct tep_print_arg *item; -}; - -struct tep_print_arg_flags { - struct tep_print_arg *field; - char *delim; - struct tep_print_flag_sym *flags; -}; - -struct tep_print_arg_symbol { - struct tep_print_arg *field; - struct tep_print_flag_sym *symbols; -}; - -struct tep_print_arg_hex { - struct tep_print_arg *field; - struct tep_print_arg *size; -}; - -struct tep_print_arg_int_array { - struct tep_print_arg *field; - struct tep_print_arg *count; - struct tep_print_arg *el_size; -}; - -struct tep_print_arg_dynarray { - struct tep_format_field *field; - struct tep_print_arg *index; -}; - -struct tep_print_arg; - -struct tep_print_arg_op { - char *op; - int prio; - struct tep_print_arg *left; - struct tep_print_arg *right; -}; - -struct tep_function_handler; - -struct tep_print_arg_func { - struct tep_function_handler *func; - struct tep_print_arg *args; -}; - -enum tep_print_arg_type { - TEP_PRINT_NULL, - TEP_PRINT_ATOM, - TEP_PRINT_FIELD, - TEP_PRINT_FLAGS, - TEP_PRINT_SYMBOL, - TEP_PRINT_HEX, - TEP_PRINT_INT_ARRAY, - TEP_PRINT_TYPE, - TEP_PRINT_STRING, - TEP_PRINT_BSTRING, - TEP_PRINT_DYNAMIC_ARRAY, - TEP_PRINT_OP, - TEP_PRINT_FUNC, - TEP_PRINT_BITMASK, - TEP_PRINT_DYNAMIC_ARRAY_LEN, - TEP_PRINT_HEX_STR, -}; - -struct tep_print_arg { - struct tep_print_arg *next; - enum tep_print_arg_type type; - union { - struct tep_print_arg_atom atom; - struct tep_print_arg_field field; - struct tep_print_arg_typecast typecast; - struct tep_print_arg_flags flags; - struct tep_print_arg_symbol symbol; - struct tep_print_arg_hex hex; - struct tep_print_arg_int_array int_array; - struct tep_print_arg_func func; - struct tep_print_arg_string string; - struct tep_print_arg_bitmask bitmask; - struct tep_print_arg_op op; - struct tep_print_arg_dynarray dynarray; - }; -}; - -struct tep_print_parse; - -struct tep_print_fmt { - char *format; - struct tep_print_arg *args; - struct tep_print_parse *print_cache; -}; - -struct tep_event { - struct tep_handle *tep; - char *name; - int id; - int flags; - struct tep_format format; - struct tep_print_fmt print_fmt; - char *system; - tep_event_handler_func handler; - void *context; -}; - -enum { - TEP_EVENT_FL_ISFTRACE = 0x01, - TEP_EVENT_FL_ISPRINT = 0x02, - TEP_EVENT_FL_ISBPRINT = 0x04, - TEP_EVENT_FL_ISFUNCENT = 0x10, - TEP_EVENT_FL_ISFUNCRET = 0x20, - TEP_EVENT_FL_NOHANDLE = 0x40, - TEP_EVENT_FL_PRINTRAW = 0x80, - - TEP_EVENT_FL_FAILED = 0x80000000 -}; - -enum tep_event_sort_type { - TEP_EVENT_SORT_ID, - TEP_EVENT_SORT_NAME, - TEP_EVENT_SORT_SYSTEM, -}; - -enum tep_event_type { - TEP_EVENT_ERROR, - TEP_EVENT_NONE, - TEP_EVENT_SPACE, - TEP_EVENT_NEWLINE, - TEP_EVENT_OP, - TEP_EVENT_DELIM, - TEP_EVENT_ITEM, - TEP_EVENT_DQUOTE, - TEP_EVENT_SQUOTE, -}; - -typedef unsigned long long (*tep_func_handler)(struct trace_seq *s, - unsigned long long *args); - -enum tep_func_arg_type { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -enum tep_flag { - TEP_NSEC_OUTPUT = 1, /* output in NSECS */ - TEP_DISABLE_SYS_PLUGINS = 1 << 1, - TEP_DISABLE_PLUGINS = 1 << 2, -}; - -#define TEP_ERRORS \ - _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ - _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ - _PE(READ_ID_FAILED, "failed to read event id"), \ - _PE(READ_FORMAT_FAILED, "failed to read event format"), \ - _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ - _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ - _PE(INVALID_ARG_TYPE, "invalid argument type"), \ - _PE(INVALID_EXP_TYPE, "invalid expression type"), \ - _PE(INVALID_OP_TYPE, "invalid operator type"), \ - _PE(INVALID_EVENT_NAME, "invalid event name"), \ - _PE(EVENT_NOT_FOUND, "no event found"), \ - _PE(SYNTAX_ERROR, "syntax error"), \ - _PE(ILLEGAL_RVALUE, "illegal rvalue"), \ - _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \ - _PE(INVALID_REGEX, "regex did not compute"), \ - _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \ - _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \ - _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \ - _PE(REPARENT_FAILED, "failed to reparent filter OP"), \ - _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \ - _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \ - _PE(ILLEGAL_TOKEN, "illegal token"), \ - _PE(INVALID_PAREN, "open parenthesis cannot come here"), \ - _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \ - _PE(UNKNOWN_TOKEN, "unknown token"), \ - _PE(FILTER_NOT_FOUND, "no filter found"), \ - _PE(NOT_A_NUMBER, "must have number field"), \ - _PE(NO_FILTER, "no filters exists"), \ - _PE(FILTER_MISS, "record does not match to filter") - -#undef _PE -#define _PE(__code, __str) TEP_ERRNO__ ## __code -enum tep_errno { - TEP_ERRNO__SUCCESS = 0, - TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS, - - /* - * Choose an arbitrary negative big number not to clash with standard - * errno since SUS requires the errno has distinct positive values. - * See 'Issue 6' in the link below. - * - * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html - */ - __TEP_ERRNO__START = -100000, - - TEP_ERRORS, - - __TEP_ERRNO__END, -}; -#undef _PE - -struct tep_plugin_list; - -#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) - -enum tep_plugin_load_priority { - TEP_PLUGIN_FIRST, - TEP_PLUGIN_LAST, -}; - -int tep_add_plugin_path(struct tep_handle *tep, char *path, - enum tep_plugin_load_priority prio); -struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); -void tep_unload_plugins(struct tep_plugin_list *plugin_list, - struct tep_handle *tep); -void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data); -char **tep_plugin_list_options(void); -void tep_plugin_free_options_list(char **list); -int tep_plugin_add_options(const char *name, - struct tep_plugin_option *options); -int tep_plugin_add_option(const char *name, const char *val); -void tep_plugin_remove_options(struct tep_plugin_option *options); -void tep_plugin_print_options(struct trace_seq *s); -void tep_print_plugins(struct trace_seq *s, - const char *prefix, const char *suffix, - const struct tep_plugin_list *list); - -/* tep_handle */ -typedef char *(tep_func_resolver_t)(void *priv, - unsigned long long *addrp, char **modp); -void tep_set_flag(struct tep_handle *tep, int flag); -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag); -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags); - -static inline int tep_is_bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; - unsigned int val; - - memcpy(&val, str, 4); - return val == 0x01020304; -} - -/* taken from kernel/trace/trace.h */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, -}; - -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv); -void tep_reset_function_resolver(struct tep_handle *tep); -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_register_function(struct tep_handle *tep, char *name, - unsigned long long addr, char *mod); -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr); -bool tep_is_pid_registered(struct tep_handle *tep, int pid); - -struct tep_event *tep_get_event(struct tep_handle *tep, int index); - -#define TEP_PRINT_INFO "INFO" -#define TEP_PRINT_INFO_RAW "INFO_RAW" -#define TEP_PRINT_COMM "COMM" -#define TEP_PRINT_LATENCY "LATENCY" -#define TEP_PRINT_NAME "NAME" -#define TEP_PRINT_PID 1U -#define TEP_PRINT_TIME 2U -#define TEP_PRINT_CPU 3U - -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) - __attribute__ ((format (printf, 4, 5))); - -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size); - -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys); -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys); - -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err); - -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); - -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -enum tep_reg_handler { - TEP_REGISTER_SUCCESS = 0, - TEP_REGISTER_SUCCESS_OVERWRITE, -}; - -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...); -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name); - -struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name); - -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr); -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr); -unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size); -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value); - -struct tep_event *tep_get_first_event(struct tep_handle *tep); -int tep_get_events_count(struct tep_handle *tep); -struct tep_event *tep_find_event(struct tep_handle *tep, int id); - -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name); -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record); - -int tep_data_type(struct tep_handle *tep, struct tep_record *rec); -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec); -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec); -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec); -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid); -struct tep_cmdline; -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next); -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline); - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field); -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event); -int tep_strerror(struct tep_handle *tep, enum tep_errno errnum, - char *buf, size_t buflen); - -struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type); -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type); -struct tep_format_field **tep_event_common_fields(struct tep_event *event); -struct tep_format_field **tep_event_fields(struct tep_event *event); - -enum tep_endian { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; -int tep_get_cpus(struct tep_handle *tep); -void tep_set_cpus(struct tep_handle *tep, int cpus); -int tep_get_long_size(struct tep_handle *tep); -void tep_set_long_size(struct tep_handle *tep, int long_size); -int tep_get_page_size(struct tep_handle *tep); -void tep_set_page_size(struct tep_handle *tep, int _page_size); -bool tep_is_file_bigendian(struct tep_handle *tep); -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian); -bool tep_is_local_bigendian(struct tep_handle *tep); -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian); -int tep_get_header_page_size(struct tep_handle *tep); -int tep_get_header_timestamp_size(struct tep_handle *tep); -bool tep_is_old_format(struct tep_handle *tep); -void tep_set_test_filters(struct tep_handle *tep, int test_filters); - -struct tep_handle *tep_alloc(void); -void tep_free(struct tep_handle *tep); -void tep_ref(struct tep_handle *tep); -void tep_unref(struct tep_handle *tep); -int tep_get_ref(struct tep_handle *tep); - -/* for debugging */ -void tep_print_funcs(struct tep_handle *tep); -void tep_print_printk(struct tep_handle *tep); - -/* ----------------------- filtering ----------------------- */ - -enum tep_filter_boolean_type { - TEP_FILTER_FALSE, - TEP_FILTER_TRUE, -}; - -enum tep_filter_op_type { - TEP_FILTER_OP_AND = 1, - TEP_FILTER_OP_OR, - TEP_FILTER_OP_NOT, -}; - -enum tep_filter_cmp_type { - TEP_FILTER_CMP_NONE, - TEP_FILTER_CMP_EQ, - TEP_FILTER_CMP_NE, - TEP_FILTER_CMP_GT, - TEP_FILTER_CMP_LT, - TEP_FILTER_CMP_GE, - TEP_FILTER_CMP_LE, - TEP_FILTER_CMP_MATCH, - TEP_FILTER_CMP_NOT_MATCH, - TEP_FILTER_CMP_REGEX, - TEP_FILTER_CMP_NOT_REGEX, -}; - -enum tep_filter_exp_type { - TEP_FILTER_EXP_NONE, - TEP_FILTER_EXP_ADD, - TEP_FILTER_EXP_SUB, - TEP_FILTER_EXP_MUL, - TEP_FILTER_EXP_DIV, - TEP_FILTER_EXP_MOD, - TEP_FILTER_EXP_RSHIFT, - TEP_FILTER_EXP_LSHIFT, - TEP_FILTER_EXP_AND, - TEP_FILTER_EXP_OR, - TEP_FILTER_EXP_XOR, - TEP_FILTER_EXP_NOT, -}; - -enum tep_filter_arg_type { - TEP_FILTER_ARG_NONE, - TEP_FILTER_ARG_BOOLEAN, - TEP_FILTER_ARG_VALUE, - TEP_FILTER_ARG_FIELD, - TEP_FILTER_ARG_EXP, - TEP_FILTER_ARG_OP, - TEP_FILTER_ARG_NUM, - TEP_FILTER_ARG_STR, -}; - -enum tep_filter_value_type { - TEP_FILTER_NUMBER, - TEP_FILTER_STRING, - TEP_FILTER_CHAR -}; - -struct tep_filter_arg; - -struct tep_filter_arg_boolean { - enum tep_filter_boolean_type value; -}; - -struct tep_filter_arg_field { - struct tep_format_field *field; -}; - -struct tep_filter_arg_value { - enum tep_filter_value_type type; - union { - char *str; - unsigned long long val; - }; -}; - -struct tep_filter_arg_op { - enum tep_filter_op_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_exp { - enum tep_filter_exp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_num { - enum tep_filter_cmp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_str { - enum tep_filter_cmp_type type; - struct tep_format_field *field; - char *val; - char *buffer; - regex_t reg; -}; - -struct tep_filter_arg { - enum tep_filter_arg_type type; - union { - struct tep_filter_arg_boolean boolean; - struct tep_filter_arg_field field; - struct tep_filter_arg_value value; - struct tep_filter_arg_op op; - struct tep_filter_arg_exp exp; - struct tep_filter_arg_num num; - struct tep_filter_arg_str str; - }; -}; - -struct tep_filter_type { - int event_id; - struct tep_event *event; - struct tep_filter_arg *filter; -}; - -#define TEP_FILTER_ERROR_BUFSZ 1024 - -struct tep_event_filter { - struct tep_handle *tep; - int filters; - struct tep_filter_type *event_filters; - char error_buffer[TEP_FILTER_ERROR_BUFSZ]; -}; - -struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep); - -/* for backward compatibility */ -#define FILTER_NONE TEP_ERRNO__NO_FILTER -#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND -#define FILTER_MISS TEP_ERRNO__FILTER_MISS -#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH - -enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, - const char *filter_str); - -enum tep_errno tep_filter_match(struct tep_event_filter *filter, - struct tep_record *record); - -int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err, - char *buf, size_t buflen); - -int tep_event_filtered(struct tep_event_filter *filter, - int event_id); - -void tep_filter_reset(struct tep_event_filter *filter); - -void tep_filter_free(struct tep_event_filter *filter); - -char *tep_filter_make_string(struct tep_event_filter *filter, int event_id); - -int tep_filter_remove_event(struct tep_event_filter *filter, - int event_id); - -int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source); - -int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2); - -#endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c deleted file mode 100644 index e7f93d5fe4fd..000000000000 --- a/tools/lib/traceevent/event-plugin.c +++ /dev/null @@ -1,711 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/" - -static struct registered_plugin_options { - struct registered_plugin_options *next; - struct tep_plugin_option *options; -} *registered_options; - -static struct trace_plugin_options { - struct trace_plugin_options *next; - char *plugin; - char *option; - char *value; -} *trace_plugin_options; - -struct tep_plugin_list { - struct tep_plugin_list *next; - char *name; - void *handle; -}; - -struct tep_plugins_dir { - struct tep_plugins_dir *next; - char *path; - enum tep_plugin_load_priority prio; -}; - -static void lower_case(char *str) -{ - if (!str) - return; - for (; *str; str++) - *str = tolower(*str); -} - -static int update_option_value(struct tep_plugin_option *op, const char *val) -{ - char *op_val; - - if (!val) { - /* toggle, only if option is boolean */ - if (op->value) - /* Warn? */ - return 0; - op->set ^= 1; - return 0; - } - - /* - * If the option has a value then it takes a string - * otherwise the option is a boolean. - */ - if (op->value) { - op->value = val; - return 0; - } - - /* Option is boolean, must be either "1", "0", "true" or "false" */ - - op_val = strdup(val); - if (!op_val) - return -1; - lower_case(op_val); - - if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) - op->set = 1; - else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) - op->set = 0; - free(op_val); - - return 0; -} - -/** - * tep_plugin_list_options - get list of plugin options - * - * Returns an array of char strings that list the currently registered - * plugin options in the format of :